diff --git a/src/raudio.c b/src/raudio.c index 75547d285..8322f1baf 100644 --- a/src/raudio.c +++ b/src/raudio.c @@ -295,6 +295,10 @@ typedef struct tagBITMAPINFOHEADER { #define MAX_AUDIO_BUFFER_POOL_CHANNELS 16 // Audio pool channels #endif +#ifndef AUDIO_BUFFER_RESIDUAL_CAPACITY + #define AUDIO_BUFFER_RESIDUAL_CAPACITY 8 // In PCM frames. For resampling and pitch shifting. +#endif + //---------------------------------------------------------------------------------- // Types and Structures Definition //---------------------------------------------------------------------------------- @@ -337,6 +341,8 @@ typedef enum { // Audio buffer struct struct rAudioBuffer { ma_data_converter converter; // Audio data converter + unsigned char* converterResidual; // Cached residual input frames for use by the converter + unsigned int converterResidualCount; // The number of valid frames sitting in converterResidual AudioCallback callback; // Audio buffer callback for buffer filling on audio threads rAudioProcessor *processor; // Audio processor @@ -586,6 +592,15 @@ AudioBuffer *LoadAudioBuffer(ma_format format, ma_uint32 channels, ma_uint32 sam return NULL; } + // A cache for use by the converter is necessary when resampling because + // when generating output frames a different number of input frames will + // be consumed. Any residual input frames need to be kept track of to + // ensure there are no discontinuities. Since raylib supports pitch + // shifting, which is done through resampling, a cache will always be + // required. This will be kept relatively small to avoid too much wastage. + audioBuffer->converterResidualCount = 0; + audioBuffer->converterResidual = (unsigned char*)RL_CALLOC(AUDIO_BUFFER_RESIDUAL_CAPACITY*ma_get_bytes_per_frame(format, channels), 1); + // Init audio buffer values audioBuffer->volume = 1.0f; audioBuffer->pitch = 1.0f; @@ -621,6 +636,7 @@ void UnloadAudioBuffer(AudioBuffer *buffer) { UntrackAudioBuffer(buffer); ma_data_converter_uninit(&buffer->converter, NULL); + RL_FREE(buffer->converterResidual); RL_FREE(buffer->data); RL_FREE(buffer); } @@ -705,7 +721,7 @@ void SetAudioBufferPitch(AudioBuffer *buffer, float pitch) // Note that this changes the duration of the sound: // - higher pitches will make the sound faster // - lower pitches make it slower - ma_uint32 outputSampleRate = (ma_uint32)((float)buffer->converter.sampleRateOut/pitch); + ma_uint32 outputSampleRate = (ma_uint32)((float)AUDIO.System.device.sampleRate/pitch); ma_data_converter_set_rate(&buffer->converter, buffer->converter.sampleRateIn, outputSampleRate); buffer->pitch = pitch; @@ -2456,38 +2472,78 @@ static ma_uint32 ReadAudioBufferFramesInMixingFormat(AudioBuffer *audioBuffer, f // NOTE: Continuously converting data from the AudioBuffer's internal format to the mixing format, // which should be defined by the output format of the data converter. // This is done until frameCount frames have been output. - // The important detail to remember is that more data than required should neeveer be read, - // for the specified number of output frames. - // This can be achieved with ma_data_converter_get_required_input_frame_count() + ma_uint32 bpf = ma_get_bytes_per_frame(audioBuffer->converter.formatIn, audioBuffer->converter.channelsIn); ma_uint8 inputBuffer[4096] = { 0 }; - ma_uint32 inputBufferFrameCap = sizeof(inputBuffer)/ma_get_bytes_per_frame(audioBuffer->converter.formatIn, audioBuffer->converter.channelsIn); - + ma_uint32 inputBufferFrameCap = sizeof(inputBuffer)/bpf; + ma_uint32 totalOutputFramesProcessed = 0; while (totalOutputFramesProcessed < frameCount) { + float *runningFramesOut = framesOut + (totalOutputFramesProcessed*audioBuffer->converter.channelsOut); ma_uint64 outputFramesToProcessThisIteration = frameCount - totalOutputFramesProcessed; ma_uint64 inputFramesToProcessThisIteration = 0; - - (void)ma_data_converter_get_required_input_frame_count(&audioBuffer->converter, outputFramesToProcessThisIteration, &inputFramesToProcessThisIteration); - if (inputFramesToProcessThisIteration > inputBufferFrameCap) + + // Process any residual input frames from the previous read first. + if (audioBuffer->converterResidualCount > 0) { - inputFramesToProcessThisIteration = inputBufferFrameCap; + ma_uint64 inputFramesProcessedThisIteration = audioBuffer->converterResidualCount; + ma_uint64 outputFramesProcessedThisIteration = outputFramesToProcessThisIteration; + ma_data_converter_process_pcm_frames(&audioBuffer->converter, audioBuffer->converterResidual, &inputFramesProcessedThisIteration, runningFramesOut, &outputFramesProcessedThisIteration); + + // Make sure the data in the cache is consumed. This can be optimized to use a cursor instead of a memmove(). + memmove(audioBuffer->converterResidual, audioBuffer->converterResidual + inputFramesProcessedThisIteration*bpf, (size_t)(AUDIO_BUFFER_RESIDUAL_CAPACITY - inputFramesProcessedThisIteration) * bpf); + audioBuffer->converterResidualCount -= (ma_uint32)inputFramesProcessedThisIteration; // Safe cast + + totalOutputFramesProcessed += (ma_uint32)outputFramesProcessedThisIteration; // Safe cast } + else + { + // Getting here means there are no residual frames from the previous read. Fresh data can now be + // pulled from the AudioBuffer and processed. + // + // A best guess needs to be used made to determine how many input frames to pull from the + // buffer. There are three possible outcomes: 1) exact; 2) underestimated; 3) overestimated. + // + // When the guess is exactly correct or underestimated there is nothing special to handle - it'll be + // handled naturally by the loop. + // + // When the guess is overestimated, that's when it gets more complicated. In this case, any overflow + // needs to be stored in a buffer for later processing by the next read. + ma_uint32 estimatedInputFrameCount = (ma_uint32)(((float)audioBuffer->converter.resampler.sampleRateIn / audioBuffer->converter.resampler.sampleRateOut) * outputFramesToProcessThisIteration); + if (estimatedInputFrameCount == 0) + { + estimatedInputFrameCount = 1; // Make sure at least one input frame is read. + } - float *runningFramesOut = framesOut + (totalOutputFramesProcessed*audioBuffer->converter.channelsOut); + if (estimatedInputFrameCount > inputBufferFrameCap) + { + estimatedInputFrameCount = inputBufferFrameCap; + } - // At this point we can convert the data to our mixing format - ma_uint64 inputFramesProcessedThisIteration = ReadAudioBufferFramesInInternalFormat(audioBuffer, inputBuffer, (ma_uint32)inputFramesToProcessThisIteration); - ma_uint64 outputFramesProcessedThisIteration = outputFramesToProcessThisIteration; - ma_data_converter_process_pcm_frames(&audioBuffer->converter, inputBuffer, &inputFramesProcessedThisIteration, runningFramesOut, &outputFramesProcessedThisIteration); + estimatedInputFrameCount = ReadAudioBufferFramesInInternalFormat(audioBuffer, inputBuffer, estimatedInputFrameCount); - totalOutputFramesProcessed += (ma_uint32)outputFramesProcessedThisIteration; // Safe cast + ma_uint64 inputFramesProcessedThisIteration = estimatedInputFrameCount; + ma_uint64 outputFramesProcessedThisIteration = outputFramesToProcessThisIteration; + ma_data_converter_process_pcm_frames(&audioBuffer->converter, inputBuffer, &inputFramesProcessedThisIteration, runningFramesOut, &outputFramesProcessedThisIteration); - if (inputFramesProcessedThisIteration < inputFramesToProcessThisIteration) break; // Ran out of input data + if (estimatedInputFrameCount > inputFramesProcessedThisIteration) + { + // Getting here means the estimated input frame count was overestimated. The residual needs + // be stored for later use. + ma_uint64 residualFrameCount = estimatedInputFrameCount - inputFramesProcessedThisIteration; - // This should never be hit, but added here for safety - // Ensures we get out of the loop when no input nor output frames are processed - if ((inputFramesProcessedThisIteration == 0) && (outputFramesProcessedThisIteration == 0)) break; + // A safety check to make sure the capacity of the residual cache is not exceeded. + if (residualFrameCount > AUDIO_BUFFER_RESIDUAL_CAPACITY) + { + residualFrameCount = AUDIO_BUFFER_RESIDUAL_CAPACITY; + } + + memcpy(audioBuffer->converterResidual, inputBuffer + inputFramesProcessedThisIteration*bpf, (size_t)(residualFrameCount * bpf)); + audioBuffer->converterResidualCount = residualFrameCount; + } + + totalOutputFramesProcessed += (ma_uint32)outputFramesProcessedThisIteration; + } } return totalOutputFramesProcessed;