1 | /* |
2 | * Copyright (C) 2010, Google Inc. All rights reserved. |
3 | * |
4 | * Redistribution and use in source and binary forms, with or without |
5 | * modification, are permitted provided that the following conditions |
6 | * are met: |
7 | * 1. Redistributions of source code must retain the above copyright |
8 | * notice, this list of conditions and the following disclaimer. |
9 | * 2. Redistributions in binary form must reproduce the above copyright |
10 | * notice, this list of conditions and the following disclaimer in the |
11 | * documentation and/or other materials provided with the distribution. |
12 | * |
13 | * THIS SOFTWARE IS PROVIDED BY APPLE INC. AND ITS CONTRIBUTORS ``AS IS'' AND ANY |
14 | * EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED |
15 | * WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE |
16 | * DISCLAIMED. IN NO EVENT SHALL APPLE INC. OR ITS CONTRIBUTORS BE LIABLE FOR ANY |
17 | * DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES |
18 | * (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; |
19 | * LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON |
20 | * ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT |
21 | * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS |
22 | * SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. |
23 | */ |
24 | |
25 | #include "config.h" |
26 | |
27 | #if ENABLE(WEB_AUDIO) |
28 | |
29 | #include "HRTFPanner.h" |
30 | |
31 | #include "AudioBus.h" |
32 | #include "FFTConvolver.h" |
33 | #include "HRTFDatabase.h" |
34 | #include "HRTFDatabaseLoader.h" |
35 | #include <algorithm> |
36 | #include <wtf/MathExtras.h> |
37 | |
38 | namespace WebCore { |
39 | |
40 | // The value of 2 milliseconds is larger than the largest delay which exists in any HRTFKernel from the default HRTFDatabase (0.0136 seconds). |
41 | // We ASSERT the delay values used in process() with this value. |
42 | const double MaxDelayTimeSeconds = 0.002; |
43 | |
44 | const int UninitializedAzimuth = -1; |
45 | const unsigned RenderingQuantum = 128; |
46 | |
47 | HRTFPanner::HRTFPanner(float sampleRate, HRTFDatabaseLoader* databaseLoader) |
48 | : Panner(PanningModelType::HRTF) |
49 | , m_databaseLoader(databaseLoader) |
50 | , m_sampleRate(sampleRate) |
51 | , m_crossfadeSelection(CrossfadeSelection1) |
52 | , m_azimuthIndex1(UninitializedAzimuth) |
53 | , m_elevation1(0) |
54 | , m_azimuthIndex2(UninitializedAzimuth) |
55 | , m_elevation2(0) |
56 | , m_crossfadeX(0) |
57 | , m_crossfadeIncr(0) |
58 | , m_convolverL1(fftSizeForSampleRate(sampleRate)) |
59 | , m_convolverR1(fftSizeForSampleRate(sampleRate)) |
60 | , m_convolverL2(fftSizeForSampleRate(sampleRate)) |
61 | , m_convolverR2(fftSizeForSampleRate(sampleRate)) |
62 | , m_delayLineL(MaxDelayTimeSeconds, sampleRate) |
63 | , m_delayLineR(MaxDelayTimeSeconds, sampleRate) |
64 | , m_tempL1(RenderingQuantum) |
65 | , m_tempR1(RenderingQuantum) |
66 | , m_tempL2(RenderingQuantum) |
67 | , m_tempR2(RenderingQuantum) |
68 | { |
69 | ASSERT(databaseLoader); |
70 | } |
71 | |
72 | HRTFPanner::~HRTFPanner() = default; |
73 | |
74 | size_t HRTFPanner::fftSizeForSampleRate(float sampleRate) |
75 | { |
76 | // The HRTF impulse responses (loaded as audio resources) are 512 sample-frames @44.1KHz. |
77 | // Currently, we truncate the impulse responses to half this size, but an FFT-size of twice impulse response size is needed (for convolution). |
78 | // So for sample rates around 44.1KHz an FFT size of 512 is good. We double the FFT-size only for sample rates at least double this. |
79 | ASSERT(sampleRate >= 44100 && sampleRate <= 96000.0); |
80 | return (sampleRate < 88200.0) ? 512 : 1024; |
81 | } |
82 | |
83 | void HRTFPanner::reset() |
84 | { |
85 | m_convolverL1.reset(); |
86 | m_convolverR1.reset(); |
87 | m_convolverL2.reset(); |
88 | m_convolverR2.reset(); |
89 | m_delayLineL.reset(); |
90 | m_delayLineR.reset(); |
91 | } |
92 | |
93 | int HRTFPanner::calculateDesiredAzimuthIndexAndBlend(double azimuth, double& azimuthBlend) |
94 | { |
95 | // Convert the azimuth angle from the range -180 -> +180 into the range 0 -> 360. |
96 | // The azimuth index may then be calculated from this positive value. |
97 | if (azimuth < 0) |
98 | azimuth += 360.0; |
99 | |
100 | HRTFDatabase* database = m_databaseLoader->database(); |
101 | ASSERT(database); |
102 | |
103 | int numberOfAzimuths = database->numberOfAzimuths(); |
104 | const double angleBetweenAzimuths = 360.0 / numberOfAzimuths; |
105 | |
106 | // Calculate the azimuth index and the blend (0 -> 1) for interpolation. |
107 | double desiredAzimuthIndexFloat = azimuth / angleBetweenAzimuths; |
108 | int desiredAzimuthIndex = static_cast<int>(desiredAzimuthIndexFloat); |
109 | azimuthBlend = desiredAzimuthIndexFloat - static_cast<double>(desiredAzimuthIndex); |
110 | |
111 | // We don't immediately start using this azimuth index, but instead approach this index from the last index we rendered at. |
112 | // This minimizes the clicks and graininess for moving sources which occur otherwise. |
113 | desiredAzimuthIndex = std::max(0, desiredAzimuthIndex); |
114 | desiredAzimuthIndex = std::min(numberOfAzimuths - 1, desiredAzimuthIndex); |
115 | return desiredAzimuthIndex; |
116 | } |
117 | |
118 | void HRTFPanner::pan(double desiredAzimuth, double elevation, const AudioBus* inputBus, AudioBus* outputBus, size_t framesToProcess) |
119 | { |
120 | unsigned numInputChannels = inputBus ? inputBus->numberOfChannels() : 0; |
121 | |
122 | bool isInputGood = inputBus && numInputChannels >= 1 && numInputChannels <= 2; |
123 | ASSERT(isInputGood); |
124 | |
125 | bool isOutputGood = outputBus && outputBus->numberOfChannels() == 2 && framesToProcess <= outputBus->length(); |
126 | ASSERT(isOutputGood); |
127 | |
128 | if (!isInputGood || !isOutputGood) { |
129 | if (outputBus) |
130 | outputBus->zero(); |
131 | return; |
132 | } |
133 | |
134 | // This code only runs as long as the context is alive and after database has been loaded. |
135 | HRTFDatabase* database = m_databaseLoader->database(); |
136 | ASSERT(database); |
137 | if (!database) { |
138 | outputBus->zero(); |
139 | return; |
140 | } |
141 | |
142 | // IRCAM HRTF azimuths values from the loaded database is reversed from the panner's notion of azimuth. |
143 | double azimuth = -desiredAzimuth; |
144 | |
145 | bool isAzimuthGood = azimuth >= -180.0 && azimuth <= 180.0; |
146 | ASSERT(isAzimuthGood); |
147 | if (!isAzimuthGood) { |
148 | outputBus->zero(); |
149 | return; |
150 | } |
151 | |
152 | // Normally, we'll just be dealing with mono sources. |
153 | // If we have a stereo input, implement stereo panning with left source processed by left HRTF, and right source by right HRTF. |
154 | const AudioChannel* inputChannelL = inputBus->channelByType(AudioBus::ChannelLeft); |
155 | const AudioChannel* inputChannelR = numInputChannels > 1 ? inputBus->channelByType(AudioBus::ChannelRight) : 0; |
156 | |
157 | // Get source and destination pointers. |
158 | const float* sourceL = inputChannelL->data(); |
159 | const float* sourceR = numInputChannels > 1 ? inputChannelR->data() : sourceL; |
160 | float* destinationL = outputBus->channelByType(AudioBus::ChannelLeft)->mutableData(); |
161 | float* destinationR = outputBus->channelByType(AudioBus::ChannelRight)->mutableData(); |
162 | |
163 | double azimuthBlend; |
164 | int desiredAzimuthIndex = calculateDesiredAzimuthIndexAndBlend(azimuth, azimuthBlend); |
165 | |
166 | // Initially snap azimuth and elevation values to first values encountered. |
167 | if (m_azimuthIndex1 == UninitializedAzimuth) { |
168 | m_azimuthIndex1 = desiredAzimuthIndex; |
169 | m_elevation1 = elevation; |
170 | } |
171 | if (m_azimuthIndex2 == UninitializedAzimuth) { |
172 | m_azimuthIndex2 = desiredAzimuthIndex; |
173 | m_elevation2 = elevation; |
174 | } |
175 | |
176 | // Cross-fade / transition over a period of around 45 milliseconds. |
177 | // This is an empirical value tuned to be a reasonable trade-off between |
178 | // smoothness and speed. |
179 | const double fadeFrames = sampleRate() <= 48000 ? 2048 : 4096; |
180 | |
181 | // Check for azimuth and elevation changes, initiating a cross-fade if needed. |
182 | if (!m_crossfadeX && m_crossfadeSelection == CrossfadeSelection1) { |
183 | if (desiredAzimuthIndex != m_azimuthIndex1 || elevation != m_elevation1) { |
184 | // Cross-fade from 1 -> 2 |
185 | m_crossfadeIncr = 1 / fadeFrames; |
186 | m_azimuthIndex2 = desiredAzimuthIndex; |
187 | m_elevation2 = elevation; |
188 | } |
189 | } |
190 | if (m_crossfadeX == 1 && m_crossfadeSelection == CrossfadeSelection2) { |
191 | if (desiredAzimuthIndex != m_azimuthIndex2 || elevation != m_elevation2) { |
192 | // Cross-fade from 2 -> 1 |
193 | m_crossfadeIncr = -1 / fadeFrames; |
194 | m_azimuthIndex1 = desiredAzimuthIndex; |
195 | m_elevation1 = elevation; |
196 | } |
197 | } |
198 | |
199 | // This algorithm currently requires that we process in power-of-two size chunks at least RenderingQuantum. |
200 | ASSERT(1UL << static_cast<int>(log2(framesToProcess)) == framesToProcess); |
201 | ASSERT(framesToProcess >= RenderingQuantum); |
202 | |
203 | const unsigned framesPerSegment = RenderingQuantum; |
204 | const unsigned numberOfSegments = framesToProcess / framesPerSegment; |
205 | |
206 | for (unsigned segment = 0; segment < numberOfSegments; ++segment) { |
207 | // Get the HRTFKernels and interpolated delays. |
208 | HRTFKernel* kernelL1; |
209 | HRTFKernel* kernelR1; |
210 | HRTFKernel* kernelL2; |
211 | HRTFKernel* kernelR2; |
212 | double frameDelayL1; |
213 | double frameDelayR1; |
214 | double frameDelayL2; |
215 | double frameDelayR2; |
216 | database->getKernelsFromAzimuthElevation(azimuthBlend, m_azimuthIndex1, m_elevation1, kernelL1, kernelR1, frameDelayL1, frameDelayR1); |
217 | database->getKernelsFromAzimuthElevation(azimuthBlend, m_azimuthIndex2, m_elevation2, kernelL2, kernelR2, frameDelayL2, frameDelayR2); |
218 | |
219 | bool areKernelsGood = kernelL1 && kernelR1 && kernelL2 && kernelR2; |
220 | ASSERT(areKernelsGood); |
221 | if (!areKernelsGood) { |
222 | outputBus->zero(); |
223 | return; |
224 | } |
225 | |
226 | ASSERT(frameDelayL1 / sampleRate() < MaxDelayTimeSeconds && frameDelayR1 / sampleRate() < MaxDelayTimeSeconds); |
227 | ASSERT(frameDelayL2 / sampleRate() < MaxDelayTimeSeconds && frameDelayR2 / sampleRate() < MaxDelayTimeSeconds); |
228 | |
229 | // Crossfade inter-aural delays based on transitions. |
230 | double frameDelayL = (1 - m_crossfadeX) * frameDelayL1 + m_crossfadeX * frameDelayL2; |
231 | double frameDelayR = (1 - m_crossfadeX) * frameDelayR1 + m_crossfadeX * frameDelayR2; |
232 | |
233 | // Calculate the source and destination pointers for the current segment. |
234 | unsigned offset = segment * framesPerSegment; |
235 | const float* segmentSourceL = sourceL + offset; |
236 | const float* segmentSourceR = sourceR + offset; |
237 | float* segmentDestinationL = destinationL + offset; |
238 | float* segmentDestinationR = destinationR + offset; |
239 | |
240 | // First run through delay lines for inter-aural time difference. |
241 | m_delayLineL.setDelayFrames(frameDelayL); |
242 | m_delayLineR.setDelayFrames(frameDelayR); |
243 | m_delayLineL.process(segmentSourceL, segmentDestinationL, framesPerSegment); |
244 | m_delayLineR.process(segmentSourceR, segmentDestinationR, framesPerSegment); |
245 | |
246 | bool needsCrossfading = m_crossfadeIncr; |
247 | |
248 | // Have the convolvers render directly to the final destination if we're not cross-fading. |
249 | float* convolutionDestinationL1 = needsCrossfading ? m_tempL1.data() : segmentDestinationL; |
250 | float* convolutionDestinationR1 = needsCrossfading ? m_tempR1.data() : segmentDestinationR; |
251 | float* convolutionDestinationL2 = needsCrossfading ? m_tempL2.data() : segmentDestinationL; |
252 | float* convolutionDestinationR2 = needsCrossfading ? m_tempR2.data() : segmentDestinationR; |
253 | |
254 | // Now do the convolutions. |
255 | // Note that we avoid doing convolutions on both sets of convolvers if we're not currently cross-fading. |
256 | |
257 | if (m_crossfadeSelection == CrossfadeSelection1 || needsCrossfading) { |
258 | m_convolverL1.process(kernelL1->fftFrame(), segmentDestinationL, convolutionDestinationL1, framesPerSegment); |
259 | m_convolverR1.process(kernelR1->fftFrame(), segmentDestinationR, convolutionDestinationR1, framesPerSegment); |
260 | } |
261 | |
262 | if (m_crossfadeSelection == CrossfadeSelection2 || needsCrossfading) { |
263 | m_convolverL2.process(kernelL2->fftFrame(), segmentDestinationL, convolutionDestinationL2, framesPerSegment); |
264 | m_convolverR2.process(kernelR2->fftFrame(), segmentDestinationR, convolutionDestinationR2, framesPerSegment); |
265 | } |
266 | |
267 | if (needsCrossfading) { |
268 | // Apply linear cross-fade. |
269 | float x = m_crossfadeX; |
270 | float incr = m_crossfadeIncr; |
271 | for (unsigned i = 0; i < framesPerSegment; ++i) { |
272 | segmentDestinationL[i] = (1 - x) * convolutionDestinationL1[i] + x * convolutionDestinationL2[i]; |
273 | segmentDestinationR[i] = (1 - x) * convolutionDestinationR1[i] + x * convolutionDestinationR2[i]; |
274 | x += incr; |
275 | } |
276 | // Update cross-fade value from local. |
277 | m_crossfadeX = x; |
278 | |
279 | if (m_crossfadeIncr > 0 && fabs(m_crossfadeX - 1) < m_crossfadeIncr) { |
280 | // We've fully made the crossfade transition from 1 -> 2. |
281 | m_crossfadeSelection = CrossfadeSelection2; |
282 | m_crossfadeX = 1; |
283 | m_crossfadeIncr = 0; |
284 | } else if (m_crossfadeIncr < 0 && fabs(m_crossfadeX) < -m_crossfadeIncr) { |
285 | // We've fully made the crossfade transition from 2 -> 1. |
286 | m_crossfadeSelection = CrossfadeSelection1; |
287 | m_crossfadeX = 0; |
288 | m_crossfadeIncr = 0; |
289 | } |
290 | } |
291 | } |
292 | } |
293 | |
294 | double HRTFPanner::tailTime() const |
295 | { |
296 | // Because HRTFPanner is implemented with a DelayKernel and a FFTConvolver, the tailTime of the HRTFPanner |
297 | // is the sum of the tailTime of the DelayKernel and the tailTime of the FFTConvolver, which is MaxDelayTimeSeconds |
298 | // and fftSize() / 2, respectively. |
299 | return MaxDelayTimeSeconds + (fftSize() / 2) / static_cast<double>(sampleRate()); |
300 | } |
301 | |
302 | double HRTFPanner::latencyTime() const |
303 | { |
304 | // The latency of a FFTConvolver is also fftSize() / 2, and is in addition to its tailTime of the |
305 | // same value. |
306 | return (fftSize() / 2) / static_cast<double>(sampleRate()); |
307 | } |
308 | |
309 | } // namespace WebCore |
310 | |
311 | #endif // ENABLE(WEB_AUDIO) |
312 | |