I tried to modify the code in order to handle stereo, the LoadWAV function:
u32 SampleCount = SampleDataSize / (sizeof(s16)*ChannelCount); s16 *SourceData = SampleData; if (ChannelCount == 1) { Result.Samples[0] = SampleData; Result.Samples[1] = 0; } else if (ChannelCount == 2) { Result.Samples[0] = PushArray(Arena, SampleCount, s16, {0,16}); Result.Samples[1] = PushArray(Arena, SampleCount, s16, {0,16}); for (u32 SampleIndex = 0; SampleIndex < SampleCount; ++SampleIndex) { Result.Samples[0][SampleIndex] = SourceData[2*SampleIndex]; Result.Samples[1][SampleIndex] = SourceData[1+2*SampleIndex]; } }
In the mixer code, inside the loop which copies the samples from the playing sound to the audio buffer I had to modify the code slightly so that it fills the right channel with the correct data:
for (u32 LoopIndex = 0; LoopIndex < ChunksToMix; ++LoopIndex) { f32 SampleP = BeginSampleP + LoopIndexC*(f32)LoopIndex; __m128 SamplePos = _mm_setr_ps(SampleP + 0.0f*dSample, SampleP + 1.0f*dSample, SampleP + 2.0f*dSample, SampleP + 3.0f*dSample); __m128i SampleIndex = _mm_cvttps_epi32(SamplePos); __m128 Frac = _mm_sub_ps(SamplePos, _mm_cvtepi32_ps(SampleIndex)); __m128 SampleValueF0 = _mm_set_ps(Sound->Samples[0][((s32 *)&SampleIndex)[0]], Sound->Samples[0][((s32 *)&SampleIndex)[1]], Sound->Samples[0][((s32 *)&SampleIndex)[2]], Sound->Samples[0][((s32 *)&SampleIndex)[3]]); __m128 SampleValueC0 = _mm_set_ps(Sound->Samples[0][((s32 *)&SampleIndex)[0] + 1], Sound->Samples[0][((s32 *)&SampleIndex)[1] + 1], Sound->Samples[0][((s32 *)&SampleIndex)[2] + 1], Sound->Samples[0][((s32 *)&SampleIndex)[3] + 1]); __m128 SampleValue0 = _mm_add_ps(_mm_mul_ps(_mm_sub_ps(One, Frac), SampleValueF0), _mm_mul_ps(Frac, SampleValueC0)); __m128 SampleValueF1 = _mm_set_ps(Sound->Samples[1][((s32 *)&SampleIndex)[0]], Sound->Samples[1][((s32 *)&SampleIndex)[1]], Sound->Samples[1][((s32 *)&SampleIndex)[2]], Sound->Samples[1][((s32 *)&SampleIndex)[3]]); __m128 SampleValueC1 = _mm_set_ps(Sound->Samples[1][((s32 *)&SampleIndex)[0] + 1], Sound->Samples[1][((s32 *)&SampleIndex)[1] + 1], Sound->Samples[1][((s32 *)&SampleIndex)[2] + 1], Sound->Samples[1][((s32 *)&SampleIndex)[3] + 1]); __m128 SampleValue1 = _mm_add_ps(_mm_mul_ps(_mm_sub_ps(One, Frac), SampleValueF1), _mm_mul_ps(Frac, SampleValueC1)); __m128 D0 = _mm_load_ps((f32 *)&Dest0[0]); __m128 D1 = _mm_load_ps((f32 *)&Dest1[0]); D0 = _mm_add_ps(D0, _mm_mul_ps(_mm_mul_ps(MasterVolume0, Volume0), SampleValue0)); D1 = _mm_add_ps(D1, _mm_mul_ps(_mm_mul_ps(MasterVolume1, Volume1), SampleValue1)); _mm_store_ps((f32 *)&Dest0[0], D0); _mm_store_ps((f32 *)&Dest1[0], D1); ++Dest0; ++Dest1; Volume0 = _mm_add_ps(Volume0, dVolumeChunk0); Volume1 = _mm_add_ps(Volume1, dVolumeChunk1); }
However that did not work, I kept getting garbage on the right channel. After some extensive testing I was sure that the data was in fact all correct and the only possible place for mistake was the actual reading code, so I tested and got it working as follows:
s16 *RightChannelSamples = (s16 *)((u08 *)Sound->Samples + Sound->SampleCount*sizeof(s16)); __m128 SampleValueF1 = _mm_set_ps(RightChannelSamples[((s32 *)&SampleIndex)[0]], RightChannelSamples[((s32 *)&SampleIndex)[1]], RightChannelSamples[((s32 *)&SampleIndex)[2]], RightChannelSamples[((s32 *)&SampleIndex)[3]]); __m128 SampleValueC1 = _mm_set_ps(RightChannelSamples[((s32 *)&SampleIndex)[0] + 1], RightChannelSamples[((s32 *)&SampleIndex)[1] + 1], RightChannelSamples[((s32 *)&SampleIndex)[2] + 1], RightChannelSamples[((s32 *)&SampleIndex)[3] + 1]); __m128 SampleValue1 = _mm_add_ps(_mm_mul_ps(_mm_sub_ps(One, Frac), SampleValueF1), _mm_mul_ps(Frac, SampleValueC1));
Sound is a loaded_sound:
struct loaded_sound { // NOTE: Sample count is divided by 8 s16 *Samples[2]; u32 SampleCount; u32 ChannelCount; };
It is written out like this in the asset packer:
for (u32 ChannelIndex = 0; ChannelIndex < WAV.ChannelCount; ++ChannelIndex) { fwrite(WAV.Samples[ChannelIndex], Dest->Sound.SampleCount*sizeof(s16), 1, Out); }
So the layout is [LEFT LEFT LEFT LEFT ...][RIGHT RIGHT RIGHT RIGHT ...]
I thought that Sound->Samples[1] would advance the entire block, but maybe C doesn't know how much to advance? I'm confused. Does anyone make sense of it all?
You showed code that writes samples out to asset file. But how are they loaded back in (from asset file, not wav)?
Only way your fixed code works is if in memory all the samples are together - after all left channel samples there are right channel samples. But that is not what Samples member declares. That member declares two pointers that can point anywhere in memory. So the question is - how exactly is it loaded/allocated?
That member declares two pointers that can point anywhere in memory. So the question is - how exactly is it loaded/allocated?
The sound is flat loaded by a single call to ReadDataFromFile:
struct load_asset_work { task_with_memory *Task; asset *Asset; platform_file_handle *Handle; u64 Offset; u64 Size; void *Dest; u32 FinalState; finalize_asset_operation FinalizeOperation; }; internal void LoadAssetWorkDirectly(load_asset_work *Work) { TIMED_BLOCK(); Platform.ReadDataFromFile(Work->Handle, Work->Offset, Work->Size, Work->Dest); if (PlatformNoFileErrors(Work->Handle)) { switch (Work->FinalizeOperation) { case FinalizeAsset_None: { // NOTE: Nothing to do } break; case FinalizeAsset_Font: { // ... } break; } } ENGINE_WRITE_BARRIER; if (!PlatformNoFileErrors(Work->Handle)) { ZeroSize(Work->Size, Work->Dest); } Work->Asset->State = Work->FinalState; }
ReadDataFromFile file:
internal PLATFORM_READ_DATA_FROM_FILE(Win64ReadDataFromFile) { if (PlatformNoFileErrors(Source)) { win64_platform_file_handle *Handle = (win64_platform_file_handle *)Source->Platform; OVERLAPPED Overlapped = {}; Overlapped.Offset = (u32)((Offset >> 0) & 0xFFFFFFFF); Overlapped.OffsetHigh = (u32)((Offset >> 32) & 0xFFFFFFFF); u32 FileSize32 = SafeTruncateToU32(Size); DWORD BytesRead; if (ReadFile(Handle->Win64Handle, Dest, FileSize32, &BytesRead, &Overlapped) && (BytesRead == FileSize32)) { } else { WinFileError(Source, "Read file failed."); } } }
It seems to me that it is exactly as you said, since the samples are written out to asset file first left channels then right channels, that is the layout they get loaded to memory as well.
Any way to use the Sound->Samples[1] notation? (I heard may times that "pointers are just arrays" but I'm starting to think they are not so similar).
This is not the problem with "pointers are arrays". It is problem with how you allocate and read into them. You need to prepare memory the same way how you will access it.
You did not show how you allocate it.
If you want it be properly allocated and read into it with one read, then you need to do this:
loaded_sound* Sound = ...; s16* SampleData = PushArray(Arena, Sound->SampleCount * Sound->ChannelCount, s16); for (int i=0; i<ChannelCount; i++) { Sound->SampleData[i] = SampleData + i*Sound->SampleCount; } // now issue Read of "SampleCount*SampleCount*sizeof(s16)" size into "SampleData" pointed location
Sorry about that. The allocation happens here:
hha_sound *Info = &Asset->HHA.Sound; asset_memory_size Size = {}; Size.Section = Info->SampleCount*sizeof(s16); Size.Data = Info->ChannelCount*Size.Section; Size.Total = Size.Data + sizeof(asset_memory_header); Asset->Header = AcquireAssetMemory(Assets, Size.Total, ID.Value); loaded_sound *Sound = &Asset->Header->Sound; Sound->SampleCount = Info->SampleCount; Sound->ChannelCount = Info->ChannelCount; u32 ChannelSize = Size.Section; void *Memory = (Asset->Header + 1); s16 *SampleAt = (s16 *)Memory; for (u32 ChannelIndex = 0; ChannelIndex < Sound->ChannelCount; ++ChannelIndex) { Sound->Samples[ChannelIndex] = SampleAt; SampleAt += ChannelSize; } load_asset_work *Work = PushStruct(&Task->Arena, load_asset_work); Work->Task = Task; Work->Asset = Assets->Assets + ID.Value; Work->Handle = GetFileHandleFor(Assets, Asset->FileIndex); Work->Offset = Asset->HHA.DataOffset; Work->Size = Size.Data; Work->Dest = Memory; Work->FinalState = AssetState_Loaded; Work->FinalizeOperation = FinalizeAsset_None; Platform.AddEntry(Assets->TranState->LowPriorityQueue, LoadAssetWork, Work);
I believe AcquireAssetMemory returns a contiguous block of memory.
Seems to me that is the code responsible for the layout:
void *Memory = (Asset->Header + 1); s16 *SampleAt = (s16 *)Memory; for (u32 ChannelIndex = 0; ChannelIndex < Sound->ChannelCount; ++ChannelIndex) { Sound->Samples[ChannelIndex] = SampleAt; SampleAt += ChannelSize; }
Now that I look at it, my reading code does not account for sizeof(asset_memory_header) so I might me reading it slightly wrong still.
So that part of code assumes samples will be interleaved - LRLRLRLR... That is the part you need to fix.
Oh no, I misread the code. The ChannelSize
variable is confusing. What does it contain? And why it is being added to SampleAt
for each channel when you're setting up Sound->Samples. You should be adding SampleCount
there instead.
The ChannelSize variable is confusing. What does it contain? And why it is being added to SampleAt for each channel when you're setting up Sound->Samples.
I Agree the name is a bit confusing, but it seems to me that it is doing roughly what you said:
asset_memory_size Size = {}; Size.Section = Info->SampleCount*sizeof(s16); Size.Data = Info->ChannelCount*Size.Section; Size.Total = Size.Data + sizeof(asset_memory_header); ... u32 ChannelSize = Size.Section;
ChannelSize contains Info->SampleCount*sizeof(s16), in other words, it is the size of the entire block which contains the samples for a specific channel.
Ah, I missed that assignment to Size.Section above.
In that case the problem is very clear. You were advancing pointer of each channel by amount of bytes per channel, not amount of samples per channel.
You were advancing pointer of each channel by amount of bytes per channel, not amount of samples per channel.
Are you refering to this part of the code: (?)
__m128 SampleValueF1 = _mm_set_ps(Sound->Samples[1][((s32 *)&SampleIndex)[0]], Sound->Samples[1][((s32 *)&SampleIndex)[1]], Sound->Samples[1][((s32 *)&SampleIndex)[2]], Sound->Samples[1][((s32 *)&SampleIndex)[3]]);
I'm not really sure what you mean by "advancing pointer of each channel".
(Side note: how do I make things bold in the text like you did? Can it be done inside a code block?)
No, I'm referring to your allocation code. That is only place where the error is. Usage code is fine.
You need to fix your loop that assigns Sound->Samples pointers. To this:
for (u32 ChannelIndex = 0; ChannelIndex < Sound->ChannelCount; ++ChannelIndex) { Sound->Samples[ChannelIndex] = SampleAt; SampleAt += Sound->SampleCount; // <--- this place !!!! }
You have memory block with SampleCount*ChannelCount*2
bytes. You want first pointer point at beginning (offset 0), and second pointer point in middle (at byte offset SampleCount*2).
If you do what you do with SampleAt += Size.Section;
that is same as SampleAt += Info->SampleCount*sizeof(s16)
which will advance SampleAt for right channel to the end of this memory block - it will point one byte after the whole memory block. Which will not contain any data of sound at all when you start using it.
Check the values in debugger, literally put Samples[0], Samples[1], SamplesAt, etc.. values in watch window and check the values to verify that you have exactly what you expect instead of that wrong Samples[1] pointer.