1/*
2 * Copyright (C) 2010, Google Inc. All rights reserved.
3 *
4 * Redistribution and use in source and binary forms, with or without
5 * modification, are permitted provided that the following conditions
6 * are met:
7 * 1. Redistributions of source code must retain the above copyright
8 * notice, this list of conditions and the following disclaimer.
9 * 2. Redistributions in binary form must reproduce the above copyright
10 * notice, this list of conditions and the following disclaimer in the
11 * documentation and/or other materials provided with the distribution.
12 *
13 * THIS SOFTWARE IS PROVIDED BY APPLE INC. AND ITS CONTRIBUTORS ``AS IS'' AND ANY
14 * EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED
15 * WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
16 * DISCLAIMED. IN NO EVENT SHALL APPLE INC. OR ITS CONTRIBUTORS BE LIABLE FOR ANY
17 * DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES
18 * (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES;
19 * LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON
20 * ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
21 * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
22 * SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
23 */
24
25#include "config.h"
26
27#if ENABLE(WEB_AUDIO)
28
29#include "VectorMath.h"
30
31#if USE(ACCELERATE)
32#include <Accelerate/Accelerate.h>
33#endif
34
35#if CPU(X86_SSE2)
36#include <emmintrin.h>
37#endif
38
39#if HAVE(ARM_NEON_INTRINSICS)
40#include <arm_neon.h>
41#endif
42
43#include <algorithm>
44#include <math.h>
45
46namespace WebCore {
47
48namespace VectorMath {
49
50#if USE(ACCELERATE)
51// On the Mac we use the highly optimized versions in Accelerate.framework
52
53void vsmul(const float* sourceP, int sourceStride, const float* scale, float* destP, int destStride, size_t framesToProcess)
54{
55 vDSP_vsmul(sourceP, sourceStride, scale, destP, destStride, framesToProcess);
56}
57
58void vadd(const float* source1P, int sourceStride1, const float* source2P, int sourceStride2, float* destP, int destStride, size_t framesToProcess)
59{
60 vDSP_vadd(source1P, sourceStride1, source2P, sourceStride2, destP, destStride, framesToProcess);
61}
62
63void vmul(const float* source1P, int sourceStride1, const float* source2P, int sourceStride2, float* destP, int destStride, size_t framesToProcess)
64{
65 vDSP_vmul(source1P, sourceStride1, source2P, sourceStride2, destP, destStride, framesToProcess);
66}
67
68void zvmul(const float* real1P, const float* imag1P, const float* real2P, const float* imag2P, float* realDestP, float* imagDestP, size_t framesToProcess)
69{
70 DSPSplitComplex sc1;
71 DSPSplitComplex sc2;
72 DSPSplitComplex dest;
73 sc1.realp = const_cast<float*>(real1P);
74 sc1.imagp = const_cast<float*>(imag1P);
75 sc2.realp = const_cast<float*>(real2P);
76 sc2.imagp = const_cast<float*>(imag2P);
77 dest.realp = realDestP;
78 dest.imagp = imagDestP;
79 vDSP_zvmul(&sc1, 1, &sc2, 1, &dest, 1, framesToProcess, 1);
80}
81
82void vsma(const float* sourceP, int sourceStride, const float* scale, float* destP, int destStride, size_t framesToProcess)
83{
84 vDSP_vsma(sourceP, sourceStride, scale, destP, destStride, destP, destStride, framesToProcess);
85}
86
87void vmaxmgv(const float* sourceP, int sourceStride, float* maxP, size_t framesToProcess)
88{
89 vDSP_maxmgv(sourceP, sourceStride, maxP, framesToProcess);
90}
91
92void vsvesq(const float* sourceP, int sourceStride, float* sumP, size_t framesToProcess)
93{
94 vDSP_svesq(const_cast<float*>(sourceP), sourceStride, sumP, framesToProcess);
95}
96
97void vclip(const float* sourceP, int sourceStride, const float* lowThresholdP, const float* highThresholdP, float* destP, int destStride, size_t framesToProcess)
98{
99 vDSP_vclip(const_cast<float*>(sourceP), sourceStride, const_cast<float*>(lowThresholdP), const_cast<float*>(highThresholdP), destP, destStride, framesToProcess);
100}
101#else
102
103void vsma(const float* sourceP, int sourceStride, const float* scale, float* destP, int destStride, size_t framesToProcess)
104{
105 int n = framesToProcess;
106
107#if CPU(X86_SSE2)
108 if ((sourceStride == 1) && (destStride == 1)) {
109 float k = *scale;
110
111 // If the sourceP address is not 16-byte aligned, the first several frames (at most three) should be processed separately.
112 while ((reinterpret_cast<uintptr_t>(sourceP) & 0x0F) && n) {
113 *destP += k * *sourceP;
114 sourceP++;
115 destP++;
116 n--;
117 }
118
119 // Now the sourceP is aligned, use SSE.
120 int tailFrames = n % 4;
121 const float* endP = destP + n - tailFrames;
122
123 __m128 pSource;
124 __m128 dest;
125 __m128 temp;
126 __m128 mScale = _mm_set_ps1(k);
127
128 bool destAligned = !(reinterpret_cast<uintptr_t>(destP) & 0x0F);
129
130#define SSE2_MULT_ADD(loadInstr, storeInstr) \
131 while (destP < endP) \
132 { \
133 pSource = _mm_load_ps(sourceP); \
134 temp = _mm_mul_ps(pSource, mScale); \
135 dest = _mm_##loadInstr##_ps(destP); \
136 dest = _mm_add_ps(dest, temp); \
137 _mm_##storeInstr##_ps(destP, dest); \
138 sourceP += 4; \
139 destP += 4; \
140 }
141
142 if (destAligned)
143 SSE2_MULT_ADD(load, store)
144 else
145 SSE2_MULT_ADD(loadu, storeu)
146
147 n = tailFrames;
148 }
149#elif HAVE(ARM_NEON_INTRINSICS)
150 if ((sourceStride == 1) && (destStride == 1)) {
151 int tailFrames = n % 4;
152 const float* endP = destP + n - tailFrames;
153
154 float32x4_t k = vdupq_n_f32(*scale);
155 while (destP < endP) {
156 float32x4_t source = vld1q_f32(sourceP);
157 float32x4_t dest = vld1q_f32(destP);
158
159 dest = vmlaq_f32(dest, source, k);
160 vst1q_f32(destP, dest);
161
162 sourceP += 4;
163 destP += 4;
164 }
165 n = tailFrames;
166 }
167#endif
168 while (n) {
169 *destP += *sourceP * *scale;
170 sourceP += sourceStride;
171 destP += destStride;
172 n--;
173 }
174}
175
176void vsmul(const float* sourceP, int sourceStride, const float* scale, float* destP, int destStride, size_t framesToProcess)
177{
178 int n = framesToProcess;
179
180#if CPU(X86_SSE2)
181 if ((sourceStride == 1) && (destStride == 1)) {
182 float k = *scale;
183
184 // If the sourceP address is not 16-byte aligned, the first several frames (at most three) should be processed separately.
185 while ((reinterpret_cast<size_t>(sourceP) & 0x0F) && n) {
186 *destP = k * *sourceP;
187 sourceP++;
188 destP++;
189 n--;
190 }
191
192 // Now the sourceP address is aligned and start to apply SSE.
193 int group = n / 4;
194 __m128 mScale = _mm_set_ps1(k);
195 __m128* pSource;
196 __m128* pDest;
197 __m128 dest;
198
199
200 if (reinterpret_cast<size_t>(destP) & 0x0F) {
201 while (group--) {
202 pSource = reinterpret_cast<__m128*>(const_cast<float*>(sourceP));
203 dest = _mm_mul_ps(*pSource, mScale);
204 _mm_storeu_ps(destP, dest);
205
206 sourceP += 4;
207 destP += 4;
208 }
209 } else {
210 while (group--) {
211 pSource = reinterpret_cast<__m128*>(const_cast<float*>(sourceP));
212 pDest = reinterpret_cast<__m128*>(destP);
213 *pDest = _mm_mul_ps(*pSource, mScale);
214
215 sourceP += 4;
216 destP += 4;
217 }
218 }
219
220 // Non-SSE handling for remaining frames which is less than 4.
221 n %= 4;
222 while (n) {
223 *destP = k * *sourceP;
224 sourceP++;
225 destP++;
226 n--;
227 }
228 } else { // If strides are not 1, rollback to normal algorithm.
229#elif HAVE(ARM_NEON_INTRINSICS)
230 if ((sourceStride == 1) && (destStride == 1)) {
231 float k = *scale;
232 int tailFrames = n % 4;
233 const float* endP = destP + n - tailFrames;
234
235 while (destP < endP) {
236 float32x4_t source = vld1q_f32(sourceP);
237 vst1q_f32(destP, vmulq_n_f32(source, k));
238
239 sourceP += 4;
240 destP += 4;
241 }
242 n = tailFrames;
243 }
244#endif
245 float k = *scale;
246 while (n--) {
247 *destP = k * *sourceP;
248 sourceP += sourceStride;
249 destP += destStride;
250 }
251#if CPU(X86_SSE2)
252 }
253#endif
254}
255
256void vadd(const float* source1P, int sourceStride1, const float* source2P, int sourceStride2, float* destP, int destStride, size_t framesToProcess)
257{
258 int n = framesToProcess;
259
260#if CPU(X86_SSE2)
261 if ((sourceStride1 ==1) && (sourceStride2 == 1) && (destStride == 1)) {
262 // If the sourceP address is not 16-byte aligned, the first several frames (at most three) should be processed separately.
263 while ((reinterpret_cast<size_t>(source1P) & 0x0F) && n) {
264 *destP = *source1P + *source2P;
265 source1P++;
266 source2P++;
267 destP++;
268 n--;
269 }
270
271 // Now the source1P address is aligned and start to apply SSE.
272 int group = n / 4;
273 __m128* pSource1;
274 __m128* pSource2;
275 __m128* pDest;
276 __m128 source2;
277 __m128 dest;
278
279 bool source2Aligned = !(reinterpret_cast<size_t>(source2P) & 0x0F);
280 bool destAligned = !(reinterpret_cast<size_t>(destP) & 0x0F);
281
282 if (source2Aligned && destAligned) { // all aligned
283 while (group--) {
284 pSource1 = reinterpret_cast<__m128*>(const_cast<float*>(source1P));
285 pSource2 = reinterpret_cast<__m128*>(const_cast<float*>(source2P));
286 pDest = reinterpret_cast<__m128*>(destP);
287 *pDest = _mm_add_ps(*pSource1, *pSource2);
288
289 source1P += 4;
290 source2P += 4;
291 destP += 4;
292 }
293
294 } else if (source2Aligned && !destAligned) { // source2 aligned but dest not aligned
295 while (group--) {
296 pSource1 = reinterpret_cast<__m128*>(const_cast<float*>(source1P));
297 pSource2 = reinterpret_cast<__m128*>(const_cast<float*>(source2P));
298 dest = _mm_add_ps(*pSource1, *pSource2);
299 _mm_storeu_ps(destP, dest);
300
301 source1P += 4;
302 source2P += 4;
303 destP += 4;
304 }
305
306 } else if (!source2Aligned && destAligned) { // source2 not aligned but dest aligned
307 while (group--) {
308 pSource1 = reinterpret_cast<__m128*>(const_cast<float*>(source1P));
309 source2 = _mm_loadu_ps(source2P);
310 pDest = reinterpret_cast<__m128*>(destP);
311 *pDest = _mm_add_ps(*pSource1, source2);
312
313 source1P += 4;
314 source2P += 4;
315 destP += 4;
316 }
317 } else if (!source2Aligned && !destAligned) { // both source2 and dest not aligned
318 while (group--) {
319 pSource1 = reinterpret_cast<__m128*>(const_cast<float*>(source1P));
320 source2 = _mm_loadu_ps(source2P);
321 dest = _mm_add_ps(*pSource1, source2);
322 _mm_storeu_ps(destP, dest);
323
324 source1P += 4;
325 source2P += 4;
326 destP += 4;
327 }
328 }
329
330 // Non-SSE handling for remaining frames which is less than 4.
331 n %= 4;
332 while (n) {
333 *destP = *source1P + *source2P;
334 source1P++;
335 source2P++;
336 destP++;
337 n--;
338 }
339 } else { // if strides are not 1, rollback to normal algorithm
340#elif HAVE(ARM_NEON_INTRINSICS)
341 if ((sourceStride1 ==1) && (sourceStride2 == 1) && (destStride == 1)) {
342 int tailFrames = n % 4;
343 const float* endP = destP + n - tailFrames;
344
345 while (destP < endP) {
346 float32x4_t source1 = vld1q_f32(source1P);
347 float32x4_t source2 = vld1q_f32(source2P);
348 vst1q_f32(destP, vaddq_f32(source1, source2));
349
350 source1P += 4;
351 source2P += 4;
352 destP += 4;
353 }
354 n = tailFrames;
355 }
356#endif
357 while (n--) {
358 *destP = *source1P + *source2P;
359 source1P += sourceStride1;
360 source2P += sourceStride2;
361 destP += destStride;
362 }
363#if CPU(X86_SSE2)
364 }
365#endif
366}
367
368void vmul(const float* source1P, int sourceStride1, const float* source2P, int sourceStride2, float* destP, int destStride, size_t framesToProcess)
369{
370
371 int n = framesToProcess;
372
373#if CPU(X86_SSE2)
374 if ((sourceStride1 == 1) && (sourceStride2 == 1) && (destStride == 1)) {
375 // If the source1P address is not 16-byte aligned, the first several frames (at most three) should be processed separately.
376 while ((reinterpret_cast<uintptr_t>(source1P) & 0x0F) && n) {
377 *destP = *source1P * *source2P;
378 source1P++;
379 source2P++;
380 destP++;
381 n--;
382 }
383
384 // Now the source1P address aligned and start to apply SSE.
385 int tailFrames = n % 4;
386 const float* endP = destP + n - tailFrames;
387 __m128 pSource1;
388 __m128 pSource2;
389 __m128 dest;
390
391 bool source2Aligned = !(reinterpret_cast<uintptr_t>(source2P) & 0x0F);
392 bool destAligned = !(reinterpret_cast<uintptr_t>(destP) & 0x0F);
393
394#define SSE2_MULT(loadInstr, storeInstr) \
395 while (destP < endP) \
396 { \
397 pSource1 = _mm_load_ps(source1P); \
398 pSource2 = _mm_##loadInstr##_ps(source2P); \
399 dest = _mm_mul_ps(pSource1, pSource2); \
400 _mm_##storeInstr##_ps(destP, dest); \
401 source1P += 4; \
402 source2P += 4; \
403 destP += 4; \
404 }
405
406 if (source2Aligned && destAligned) // Both aligned.
407 SSE2_MULT(load, store)
408 else if (source2Aligned && !destAligned) // Source2 is aligned but dest not.
409 SSE2_MULT(load, storeu)
410 else if (!source2Aligned && destAligned) // Dest is aligned but source2 not.
411 SSE2_MULT(loadu, store)
412 else // Neither aligned.
413 SSE2_MULT(loadu, storeu)
414
415 n = tailFrames;
416 }
417#elif HAVE(ARM_NEON_INTRINSICS)
418 if ((sourceStride1 ==1) && (sourceStride2 == 1) && (destStride == 1)) {
419 int tailFrames = n % 4;
420 const float* endP = destP + n - tailFrames;
421
422 while (destP < endP) {
423 float32x4_t source1 = vld1q_f32(source1P);
424 float32x4_t source2 = vld1q_f32(source2P);
425 vst1q_f32(destP, vmulq_f32(source1, source2));
426
427 source1P += 4;
428 source2P += 4;
429 destP += 4;
430 }
431 n = tailFrames;
432 }
433#endif
434 while (n) {
435 *destP = *source1P * *source2P;
436 source1P += sourceStride1;
437 source2P += sourceStride2;
438 destP += destStride;
439 n--;
440 }
441}
442
443void zvmul(const float* real1P, const float* imag1P, const float* real2P, const float* imag2P, float* realDestP, float* imagDestP, size_t framesToProcess)
444{
445 unsigned i = 0;
446#if CPU(X86_SSE2)
447 // Only use the SSE optimization in the very common case that all addresses are 16-byte aligned.
448 // Otherwise, fall through to the scalar code below.
449 if (!(reinterpret_cast<uintptr_t>(real1P) & 0x0F)
450 && !(reinterpret_cast<uintptr_t>(imag1P) & 0x0F)
451 && !(reinterpret_cast<uintptr_t>(real2P) & 0x0F)
452 && !(reinterpret_cast<uintptr_t>(imag2P) & 0x0F)
453 && !(reinterpret_cast<uintptr_t>(realDestP) & 0x0F)
454 && !(reinterpret_cast<uintptr_t>(imagDestP) & 0x0F)) {
455
456 unsigned endSize = framesToProcess - framesToProcess % 4;
457 while (i < endSize) {
458 __m128 real1 = _mm_load_ps(real1P + i);
459 __m128 real2 = _mm_load_ps(real2P + i);
460 __m128 imag1 = _mm_load_ps(imag1P + i);
461 __m128 imag2 = _mm_load_ps(imag2P + i);
462 __m128 real = _mm_mul_ps(real1, real2);
463 real = _mm_sub_ps(real, _mm_mul_ps(imag1, imag2));
464 __m128 imag = _mm_mul_ps(real1, imag2);
465 imag = _mm_add_ps(imag, _mm_mul_ps(imag1, real2));
466 _mm_store_ps(realDestP + i, real);
467 _mm_store_ps(imagDestP + i, imag);
468 i += 4;
469 }
470 }
471#elif HAVE(ARM_NEON_INTRINSICS)
472 unsigned endSize = framesToProcess - framesToProcess % 4;
473 while (i < endSize) {
474 float32x4_t real1 = vld1q_f32(real1P + i);
475 float32x4_t real2 = vld1q_f32(real2P + i);
476 float32x4_t imag1 = vld1q_f32(imag1P + i);
477 float32x4_t imag2 = vld1q_f32(imag2P + i);
478
479 float32x4_t realResult = vmlsq_f32(vmulq_f32(real1, real2), imag1, imag2);
480 float32x4_t imagResult = vmlaq_f32(vmulq_f32(real1, imag2), imag1, real2);
481
482 vst1q_f32(realDestP + i, realResult);
483 vst1q_f32(imagDestP + i, imagResult);
484
485 i += 4;
486 }
487#endif
488 for (; i < framesToProcess; ++i) {
489 // Read and compute result before storing them, in case the
490 // destination is the same as one of the sources.
491 float realResult = real1P[i] * real2P[i] - imag1P[i] * imag2P[i];
492 float imagResult = real1P[i] * imag2P[i] + imag1P[i] * real2P[i];
493
494 realDestP[i] = realResult;
495 imagDestP[i] = imagResult;
496 }
497}
498
499void vsvesq(const float* sourceP, int sourceStride, float* sumP, size_t framesToProcess)
500{
501 int n = framesToProcess;
502 float sum = 0;
503
504#if CPU(X86_SSE2)
505 if (sourceStride == 1) {
506 // If the sourceP address is not 16-byte aligned, the first several frames (at most three) should be processed separately.
507 while ((reinterpret_cast<uintptr_t>(sourceP) & 0x0F) && n) {
508 float sample = *sourceP;
509 sum += sample * sample;
510 sourceP++;
511 n--;
512 }
513
514 // Now the sourceP is aligned, use SSE.
515 int tailFrames = n % 4;
516 const float* endP = sourceP + n - tailFrames;
517 __m128 source;
518 __m128 mSum = _mm_setzero_ps();
519
520 while (sourceP < endP) {
521 source = _mm_load_ps(sourceP);
522 source = _mm_mul_ps(source, source);
523 mSum = _mm_add_ps(mSum, source);
524 sourceP += 4;
525 }
526
527 // Summarize the SSE results.
528 const float* groupSumP = reinterpret_cast<float*>(&mSum);
529 sum += groupSumP[0] + groupSumP[1] + groupSumP[2] + groupSumP[3];
530
531 n = tailFrames;
532 }
533#elif HAVE(ARM_NEON_INTRINSICS)
534 if (sourceStride == 1) {
535 int tailFrames = n % 4;
536 const float* endP = sourceP + n - tailFrames;
537
538 float32x4_t fourSum = vdupq_n_f32(0);
539 while (sourceP < endP) {
540 float32x4_t source = vld1q_f32(sourceP);
541 fourSum = vmlaq_f32(fourSum, source, source);
542 sourceP += 4;
543 }
544 float32x2_t twoSum = vadd_f32(vget_low_f32(fourSum), vget_high_f32(fourSum));
545
546 float groupSum[2];
547 vst1_f32(groupSum, twoSum);
548 sum += groupSum[0] + groupSum[1];
549
550 n = tailFrames;
551 }
552#endif
553
554 while (n--) {
555 float sample = *sourceP;
556 sum += sample * sample;
557 sourceP += sourceStride;
558 }
559
560 ASSERT(sumP);
561 *sumP = sum;
562}
563
564void vmaxmgv(const float* sourceP, int sourceStride, float* maxP, size_t framesToProcess)
565{
566 int n = framesToProcess;
567 float max = 0;
568
569#if CPU(X86_SSE2)
570 if (sourceStride == 1) {
571 // If the sourceP address is not 16-byte aligned, the first several frames (at most three) should be processed separately.
572 while ((reinterpret_cast<uintptr_t>(sourceP) & 0x0F) && n) {
573 max = std::max(max, fabsf(*sourceP));
574 sourceP++;
575 n--;
576 }
577
578 // Now the sourceP is aligned, use SSE.
579 int tailFrames = n % 4;
580 const float* endP = sourceP + n - tailFrames;
581 __m128 source;
582 __m128 mMax = _mm_setzero_ps();
583 int mask = 0x7FFFFFFF;
584 __m128 mMask = _mm_set1_ps(*reinterpret_cast<float*>(&mask));
585
586 while (sourceP < endP) {
587 source = _mm_load_ps(sourceP);
588 // Calculate the absolute value by anding source with mask, the sign bit is set to 0.
589 source = _mm_and_ps(source, mMask);
590 mMax = _mm_max_ps(mMax, source);
591 sourceP += 4;
592 }
593
594 // Get max from the SSE results.
595 const float* groupMaxP = reinterpret_cast<float*>(&mMax);
596 max = std::max(max, groupMaxP[0]);
597 max = std::max(max, groupMaxP[1]);
598 max = std::max(max, groupMaxP[2]);
599 max = std::max(max, groupMaxP[3]);
600
601 n = tailFrames;
602 }
603#elif HAVE(ARM_NEON_INTRINSICS)
604 if (sourceStride == 1) {
605 int tailFrames = n % 4;
606 const float* endP = sourceP + n - tailFrames;
607
608 float32x4_t fourMax = vdupq_n_f32(0);
609 while (sourceP < endP) {
610 float32x4_t source = vld1q_f32(sourceP);
611 fourMax = vmaxq_f32(fourMax, vabsq_f32(source));
612 sourceP += 4;
613 }
614 float32x2_t twoMax = vmax_f32(vget_low_f32(fourMax), vget_high_f32(fourMax));
615
616 float groupMax[2];
617 vst1_f32(groupMax, twoMax);
618 max = std::max(groupMax[0], groupMax[1]);
619
620 n = tailFrames;
621 }
622#endif
623
624 while (n--) {
625 max = std::max(max, fabsf(*sourceP));
626 sourceP += sourceStride;
627 }
628
629 ASSERT(maxP);
630 *maxP = max;
631}
632
633void vclip(const float* sourceP, int sourceStride, const float* lowThresholdP, const float* highThresholdP, float* destP, int destStride, size_t framesToProcess)
634{
635 int n = framesToProcess;
636 float lowThreshold = *lowThresholdP;
637 float highThreshold = *highThresholdP;
638
639 // FIXME: Optimize for SSE2.
640#if HAVE(ARM_NEON_INTRINSICS)
641 if ((sourceStride == 1) && (destStride == 1)) {
642 int tailFrames = n % 4;
643 const float* endP = destP + n - tailFrames;
644
645 float32x4_t low = vdupq_n_f32(lowThreshold);
646 float32x4_t high = vdupq_n_f32(highThreshold);
647 while (destP < endP) {
648 float32x4_t source = vld1q_f32(sourceP);
649 vst1q_f32(destP, vmaxq_f32(vminq_f32(source, high), low));
650 sourceP += 4;
651 destP += 4;
652 }
653 n = tailFrames;
654 }
655#endif
656 while (n--) {
657 *destP = std::max(std::min(*sourceP, highThreshold), lowThreshold);
658 sourceP += sourceStride;
659 destP += destStride;
660 }
661}
662
663#endif // USE(ACCELERATE)
664
665} // namespace VectorMath
666
667} // namespace WebCore
668
669#endif // ENABLE(WEB_AUDIO)
670