Audio plugin host https://kx.studio/carla
You can not select more than 25 topics Topics must start with a letter or number, can include dashes ('-') and can be up to 35 characters long.

775 lines
32KB

  1. /*
  2. ==============================================================================
  3. This file is part of the JUCE library.
  4. Copyright (c) 2013 - Raw Material Software Ltd.
  5. Permission is granted to use this software under the terms of either:
  6. a) the GPL v2 (or any later version)
  7. b) the Affero GPL v3
  8. Details of these licenses can be found at: www.gnu.org/licenses
  9. JUCE is distributed in the hope that it will be useful, but WITHOUT ANY
  10. WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS FOR
  11. A PARTICULAR PURPOSE. See the GNU General Public License for more details.
  12. ------------------------------------------------------------------------------
  13. To release a closed-source product which uses JUCE, commercial licenses are
  14. available: visit www.juce.com for more information.
  15. ==============================================================================
  16. */
  17. namespace FloatVectorHelpers
  18. {
  19. #define JUCE_INCREMENT_SRC_DEST dest += (16 / sizeof (*dest)); src += (16 / sizeof (*dest));
  20. #define JUCE_INCREMENT_DEST dest += (16 / sizeof (*dest));
  21. #if JUCE_USE_SSE_INTRINSICS
  22. static bool sse2Present = false;
  23. static bool isSSE2Available() noexcept
  24. {
  25. if (sse2Present)
  26. return true;
  27. sse2Present = SystemStats::hasSSE2();
  28. return sse2Present;
  29. }
  30. inline static bool isAligned (const void* p) noexcept
  31. {
  32. return (((pointer_sized_int) p) & 15) == 0;
  33. }
  34. struct BasicOps32
  35. {
  36. typedef float Type;
  37. typedef __m128 ParallelType;
  38. enum { numParallel = 4 };
  39. static forcedinline ParallelType load1 (Type v) noexcept { return _mm_load1_ps (&v); }
  40. static forcedinline ParallelType loadA (const Type* v) noexcept { return _mm_load_ps (v); }
  41. static forcedinline ParallelType loadU (const Type* v) noexcept { return _mm_loadu_ps (v); }
  42. static forcedinline void storeA (Type* dest, ParallelType a) noexcept { _mm_store_ps (dest, a); }
  43. static forcedinline void storeU (Type* dest, ParallelType a) noexcept { _mm_storeu_ps (dest, a); }
  44. static forcedinline ParallelType add (ParallelType a, ParallelType b) noexcept { return _mm_add_ps (a, b); }
  45. static forcedinline ParallelType sub (ParallelType a, ParallelType b) noexcept { return _mm_sub_ps (a, b); }
  46. static forcedinline ParallelType mul (ParallelType a, ParallelType b) noexcept { return _mm_mul_ps (a, b); }
  47. static forcedinline ParallelType max (ParallelType a, ParallelType b) noexcept { return _mm_max_ps (a, b); }
  48. static forcedinline ParallelType min (ParallelType a, ParallelType b) noexcept { return _mm_min_ps (a, b); }
  49. static forcedinline Type max (ParallelType a) noexcept { Type v[numParallel]; storeU (v, a); return jmax (v[0], v[1], v[2], v[3]); }
  50. static forcedinline Type min (ParallelType a) noexcept { Type v[numParallel]; storeU (v, a); return jmin (v[0], v[1], v[2], v[3]); }
  51. };
  52. struct BasicOps64
  53. {
  54. typedef double Type;
  55. typedef __m128d ParallelType;
  56. enum { numParallel = 2 };
  57. static forcedinline ParallelType load1 (Type v) noexcept { return _mm_load1_pd (&v); }
  58. static forcedinline ParallelType loadA (const Type* v) noexcept { return _mm_load_pd (v); }
  59. static forcedinline ParallelType loadU (const Type* v) noexcept { return _mm_loadu_pd (v); }
  60. static forcedinline void storeA (Type* dest, ParallelType a) noexcept { _mm_store_pd (dest, a); }
  61. static forcedinline void storeU (Type* dest, ParallelType a) noexcept { _mm_storeu_pd (dest, a); }
  62. static forcedinline ParallelType add (ParallelType a, ParallelType b) noexcept { return _mm_add_pd (a, b); }
  63. static forcedinline ParallelType sub (ParallelType a, ParallelType b) noexcept { return _mm_sub_pd (a, b); }
  64. static forcedinline ParallelType mul (ParallelType a, ParallelType b) noexcept { return _mm_mul_pd (a, b); }
  65. static forcedinline ParallelType max (ParallelType a, ParallelType b) noexcept { return _mm_max_pd (a, b); }
  66. static forcedinline ParallelType min (ParallelType a, ParallelType b) noexcept { return _mm_min_pd (a, b); }
  67. static forcedinline Type max (ParallelType a) noexcept { Type v[numParallel]; storeU (v, a); return jmax (v[0], v[1]); }
  68. static forcedinline Type min (ParallelType a) noexcept { Type v[numParallel]; storeU (v, a); return jmin (v[0], v[1]); }
  69. };
  70. #define JUCE_BEGIN_VEC_OP \
  71. typedef FloatVectorHelpers::ModeType<sizeof(*dest)>::Mode Mode; \
  72. if (FloatVectorHelpers::isSSE2Available()) \
  73. { \
  74. const int numLongOps = num / Mode::numParallel;
  75. #define JUCE_FINISH_VEC_OP(normalOp) \
  76. num &= (Mode::numParallel - 1); \
  77. if (num == 0) return; \
  78. } \
  79. for (int i = 0; i < num; ++i) normalOp;
  80. #define JUCE_PERFORM_VEC_OP_DEST(normalOp, vecOp, locals, setupOp) \
  81. JUCE_BEGIN_VEC_OP \
  82. setupOp \
  83. if (FloatVectorHelpers::isAligned (dest)) JUCE_VEC_LOOP (vecOp, dummy, Mode::loadA, Mode::storeA, locals, JUCE_INCREMENT_DEST) \
  84. else JUCE_VEC_LOOP (vecOp, dummy, Mode::loadU, Mode::storeU, locals, JUCE_INCREMENT_DEST) \
  85. JUCE_FINISH_VEC_OP (normalOp)
  86. #define JUCE_PERFORM_VEC_OP_SRC_DEST(normalOp, vecOp, locals, increment, setupOp) \
  87. JUCE_BEGIN_VEC_OP \
  88. setupOp \
  89. if (FloatVectorHelpers::isAligned (dest)) \
  90. { \
  91. if (FloatVectorHelpers::isAligned (src)) JUCE_VEC_LOOP (vecOp, Mode::loadA, Mode::loadA, Mode::storeA, locals, increment) \
  92. else JUCE_VEC_LOOP (vecOp, Mode::loadU, Mode::loadA, Mode::storeA, locals, increment) \
  93. }\
  94. else \
  95. { \
  96. if (FloatVectorHelpers::isAligned (src)) JUCE_VEC_LOOP (vecOp, Mode::loadA, Mode::loadU, Mode::storeU, locals, increment) \
  97. else JUCE_VEC_LOOP (vecOp, Mode::loadU, Mode::loadU, Mode::storeU, locals, increment) \
  98. } \
  99. JUCE_FINISH_VEC_OP (normalOp)
  100. //==============================================================================
  101. #elif JUCE_USE_ARM_NEON
  102. struct BasicOps32
  103. {
  104. typedef float Type;
  105. typedef float32x4_t ParallelType;
  106. enum { numParallel = 4 };
  107. static forcedinline ParallelType load1 (Type v) noexcept { return vld1q_dup_f32 (&v); }
  108. static forcedinline ParallelType loadA (const Type* v) noexcept { return vld1q_f32 (v); }
  109. static forcedinline ParallelType loadU (const Type* v) noexcept { return vld1q_f32 (v); }
  110. static forcedinline void storeA (Type* dest, ParallelType a) noexcept { vst1q_f32 (dest, a); }
  111. static forcedinline void storeU (Type* dest, ParallelType a) noexcept { vst1q_f32 (dest, a); }
  112. static forcedinline ParallelType add (ParallelType a, ParallelType b) noexcept { return vaddq_f32 (a, b); }
  113. static forcedinline ParallelType sub (ParallelType a, ParallelType b) noexcept { return vsubq_f32 (a, b); }
  114. static forcedinline ParallelType mul (ParallelType a, ParallelType b) noexcept { return vmulq_f32 (a, b); }
  115. static forcedinline ParallelType max (ParallelType a, ParallelType b) noexcept { return vmaxq_f32 (a, b); }
  116. static forcedinline ParallelType min (ParallelType a, ParallelType b) noexcept { return vminq_f32 (a, b); }
  117. static forcedinline Type max (ParallelType a) noexcept { Type v[numParallel]; storeU (v, a); return jmax (v[0], v[1], v[2], v[3]); }
  118. static forcedinline Type min (ParallelType a) noexcept { Type v[numParallel]; storeU (v, a); return jmin (v[0], v[1], v[2], v[3]); }
  119. };
  120. struct BasicOps64
  121. {
  122. typedef double Type;
  123. typedef double ParallelType;
  124. enum { numParallel = 1 };
  125. static forcedinline ParallelType load1 (Type v) noexcept { return v; }
  126. static forcedinline ParallelType loadA (const Type* v) noexcept { return *v; }
  127. static forcedinline ParallelType loadU (const Type* v) noexcept { return *v; }
  128. static forcedinline void storeA (Type* dest, ParallelType a) noexcept { *dest = a; }
  129. static forcedinline void storeU (Type* dest, ParallelType a) noexcept { *dest = a; }
  130. static forcedinline ParallelType add (ParallelType a, ParallelType b) noexcept { return a + b; }
  131. static forcedinline ParallelType sub (ParallelType a, ParallelType b) noexcept { return a - b; }
  132. static forcedinline ParallelType mul (ParallelType a, ParallelType b) noexcept { return a * b; }
  133. static forcedinline ParallelType max (ParallelType a, ParallelType b) noexcept { return jmax (a, b); }
  134. static forcedinline ParallelType min (ParallelType a, ParallelType b) noexcept { return jmin (a, b); }
  135. static forcedinline Type max (ParallelType a) noexcept { return a; }
  136. static forcedinline Type min (ParallelType a) noexcept { return a; }
  137. };
  138. #define JUCE_BEGIN_VEC_OP \
  139. typedef FloatVectorHelpers::ModeType<sizeof(*dest)>::Mode Mode; \
  140. if (Mode::numParallel > 1) \
  141. { \
  142. const int numLongOps = num / Mode::numParallel;
  143. #define JUCE_FINISH_VEC_OP(normalOp) \
  144. num &= (Mode::numParallel - 1); \
  145. if (num == 0) return; \
  146. } \
  147. for (int i = 0; i < num; ++i) normalOp;
  148. #define JUCE_PERFORM_VEC_OP_DEST(normalOp, vecOp, locals, setupOp) \
  149. JUCE_BEGIN_VEC_OP \
  150. setupOp \
  151. JUCE_VEC_LOOP (vecOp, dummy, Mode::loadU, Mode::storeU, locals, JUCE_INCREMENT_DEST) \
  152. JUCE_FINISH_VEC_OP (normalOp)
  153. #define JUCE_PERFORM_VEC_OP_SRC_DEST(normalOp, vecOp, locals, increment, setupOp) \
  154. JUCE_BEGIN_VEC_OP \
  155. setupOp \
  156. JUCE_VEC_LOOP (vecOp, Mode::loadU, Mode::loadU, Mode::storeU, locals, increment) \
  157. JUCE_FINISH_VEC_OP (normalOp)
  158. //==============================================================================
  159. #else
  160. #define JUCE_PERFORM_VEC_OP_DEST(normalOp, vecOp, locals, setupOp) \
  161. for (int i = 0; i < num; ++i) normalOp;
  162. #define JUCE_PERFORM_VEC_OP_SRC_DEST(normalOp, vecOp, locals, increment, setupOp) \
  163. for (int i = 0; i < num; ++i) normalOp;
  164. #endif
  165. //==============================================================================
  166. #define JUCE_VEC_LOOP(vecOp, srcLoad, dstLoad, dstStore, locals, increment) \
  167. for (int i = 0; i < numLongOps; ++i) \
  168. { \
  169. locals (srcLoad, dstLoad); \
  170. dstStore (dest, vecOp); \
  171. increment; \
  172. }
  173. #define JUCE_LOAD_NONE(srcLoad, dstLoad)
  174. #define JUCE_LOAD_DEST(srcLoad, dstLoad) const Mode::ParallelType d = dstLoad (dest);
  175. #define JUCE_LOAD_SRC(srcLoad, dstLoad) const Mode::ParallelType s = srcLoad (src);
  176. #define JUCE_LOAD_SRC_DEST(srcLoad, dstLoad) const Mode::ParallelType d = dstLoad (dest), s = srcLoad (src);
  177. #if JUCE_USE_SSE_INTRINSICS || JUCE_USE_ARM_NEON
  178. template<int typeSize> struct ModeType { typedef BasicOps32 Mode; };
  179. template<> struct ModeType<8> { typedef BasicOps64 Mode; };
  180. template <typename Mode>
  181. struct MinMax
  182. {
  183. typedef typename Mode::Type Type;
  184. typedef typename Mode::ParallelType ParallelType;
  185. static Type findMinOrMax (const Type* src, int num, const bool isMinimum) noexcept
  186. {
  187. int numLongOps = num / Mode::numParallel;
  188. #if JUCE_USE_SSE_INTRINSICS
  189. if (numLongOps > 1 && isSSE2Available())
  190. #else
  191. if (numLongOps > 1)
  192. #endif
  193. {
  194. ParallelType val;
  195. #if ! JUCE_USE_ARM_NEON
  196. if (isAligned (src))
  197. {
  198. val = Mode::loadA (src);
  199. if (isMinimum)
  200. {
  201. while (--numLongOps > 0)
  202. {
  203. src += Mode::numParallel;
  204. val = Mode::min (val, Mode::loadA (src));
  205. }
  206. }
  207. else
  208. {
  209. while (--numLongOps > 0)
  210. {
  211. src += Mode::numParallel;
  212. val = Mode::max (val, Mode::loadA (src));
  213. }
  214. }
  215. }
  216. else
  217. #endif
  218. {
  219. val = Mode::loadU (src);
  220. if (isMinimum)
  221. {
  222. while (--numLongOps > 0)
  223. {
  224. src += Mode::numParallel;
  225. val = Mode::min (val, Mode::loadU (src));
  226. }
  227. }
  228. else
  229. {
  230. while (--numLongOps > 0)
  231. {
  232. src += Mode::numParallel;
  233. val = Mode::max (val, Mode::loadU (src));
  234. }
  235. }
  236. }
  237. Type result = isMinimum ? Mode::min (val)
  238. : Mode::max (val);
  239. num &= (Mode::numParallel - 1);
  240. src += Mode::numParallel;
  241. for (int i = 0; i < num; ++i)
  242. result = isMinimum ? jmin (result, src[i])
  243. : jmax (result, src[i]);
  244. return result;
  245. }
  246. return isMinimum ? juce::findMinimum (src, num)
  247. : juce::findMaximum (src, num);
  248. }
  249. static Range<Type> findMinAndMax (const Type* src, int num) noexcept
  250. {
  251. int numLongOps = num / Mode::numParallel;
  252. #if JUCE_USE_SSE_INTRINSICS
  253. if (numLongOps > 1 && isSSE2Available())
  254. #else
  255. if (numLongOps > 1)
  256. #endif
  257. {
  258. ParallelType mn, mx;
  259. #if ! JUCE_USE_ARM_NEON
  260. if (isAligned (src))
  261. {
  262. mn = Mode::loadA (src);
  263. mx = mn;
  264. while (--numLongOps > 0)
  265. {
  266. src += Mode::numParallel;
  267. const ParallelType v = Mode::loadA (src);
  268. mn = Mode::min (mn, v);
  269. mx = Mode::max (mx, v);
  270. }
  271. }
  272. else
  273. #endif
  274. {
  275. mn = Mode::loadU (src);
  276. mx = mn;
  277. while (--numLongOps > 0)
  278. {
  279. src += Mode::numParallel;
  280. const ParallelType v = Mode::loadU (src);
  281. mn = Mode::min (mn, v);
  282. mx = Mode::max (mx, v);
  283. }
  284. }
  285. Range<Type> result (Mode::min (mn),
  286. Mode::max (mx));
  287. num &= (Mode::numParallel - 1);
  288. src += Mode::numParallel;
  289. for (int i = 0; i < num; ++i)
  290. result = result.getUnionWith (src[i]);
  291. return result;
  292. }
  293. return Range<Type>::findMinAndMax (src, num);
  294. }
  295. };
  296. #endif
  297. }
  298. //==============================================================================
  299. void JUCE_CALLTYPE FloatVectorOperations::clear (float* dest, int num) noexcept
  300. {
  301. #if JUCE_USE_VDSP_FRAMEWORK
  302. vDSP_vclr (dest, 1, (size_t) num);
  303. #else
  304. zeromem (dest, num * sizeof (float));
  305. #endif
  306. }
  307. void JUCE_CALLTYPE FloatVectorOperations::clear (double* dest, int num) noexcept
  308. {
  309. #if JUCE_USE_VDSP_FRAMEWORK
  310. vDSP_vclrD (dest, 1, (size_t) num);
  311. #else
  312. zeromem (dest, num * sizeof (double));
  313. #endif
  314. }
  315. void JUCE_CALLTYPE FloatVectorOperations::fill (float* dest, float valueToFill, int num) noexcept
  316. {
  317. #if JUCE_USE_VDSP_FRAMEWORK
  318. vDSP_vfill (&valueToFill, dest, 1, (size_t) num);
  319. #else
  320. JUCE_PERFORM_VEC_OP_DEST (dest[i] = valueToFill, val, JUCE_LOAD_NONE,
  321. const Mode::ParallelType val = Mode::load1 (valueToFill);)
  322. #endif
  323. }
  324. void JUCE_CALLTYPE FloatVectorOperations::fill (double* dest, double valueToFill, int num) noexcept
  325. {
  326. #if JUCE_USE_VDSP_FRAMEWORK
  327. vDSP_vfillD (&valueToFill, dest, 1, (size_t) num);
  328. #else
  329. JUCE_PERFORM_VEC_OP_DEST (dest[i] = valueToFill, val, JUCE_LOAD_NONE,
  330. const Mode::ParallelType val = Mode::load1 (valueToFill);)
  331. #endif
  332. }
  333. void JUCE_CALLTYPE FloatVectorOperations::copy (float* dest, const float* src, int num) noexcept
  334. {
  335. memcpy (dest, src, (size_t) num * sizeof (float));
  336. }
  337. void JUCE_CALLTYPE FloatVectorOperations::copy (double* dest, const double* src, int num) noexcept
  338. {
  339. memcpy (dest, src, (size_t) num * sizeof (double));
  340. }
  341. void JUCE_CALLTYPE FloatVectorOperations::copyWithMultiply (float* dest, const float* src, float multiplier, int num) noexcept
  342. {
  343. #if JUCE_USE_VDSP_FRAMEWORK
  344. vDSP_vsmul (src, 1, &multiplier, dest, 1, (vDSP_Length) num);
  345. #else
  346. JUCE_PERFORM_VEC_OP_SRC_DEST (dest[i] = src[i] * multiplier, Mode::mul (mult, s),
  347. JUCE_LOAD_SRC, JUCE_INCREMENT_SRC_DEST,
  348. const Mode::ParallelType mult = Mode::load1 (multiplier);)
  349. #endif
  350. }
  351. void JUCE_CALLTYPE FloatVectorOperations::copyWithMultiply (double* dest, const double* src, double multiplier, int num) noexcept
  352. {
  353. #if JUCE_USE_VDSP_FRAMEWORK
  354. vDSP_vsmulD (src, 1, &multiplier, dest, 1, (vDSP_Length) num);
  355. #else
  356. JUCE_PERFORM_VEC_OP_SRC_DEST (dest[i] = src[i] * multiplier, Mode::mul (mult, s),
  357. JUCE_LOAD_SRC, JUCE_INCREMENT_SRC_DEST,
  358. const Mode::ParallelType mult = Mode::load1 (multiplier);)
  359. #endif
  360. }
  361. void JUCE_CALLTYPE FloatVectorOperations::add (float* dest, float amount, int num) noexcept
  362. {
  363. JUCE_PERFORM_VEC_OP_DEST (dest[i] += amount, Mode::add (d, amountToAdd), JUCE_LOAD_DEST,
  364. const Mode::ParallelType amountToAdd = Mode::load1 (amount);)
  365. }
  366. void JUCE_CALLTYPE FloatVectorOperations::add (double* dest, double amount, int num) noexcept
  367. {
  368. JUCE_PERFORM_VEC_OP_DEST (dest[i] += amount, Mode::add (d, amountToAdd), JUCE_LOAD_DEST,
  369. const Mode::ParallelType amountToAdd = Mode::load1 (amount);)
  370. }
  371. void JUCE_CALLTYPE FloatVectorOperations::add (float* dest, const float* src, int num) noexcept
  372. {
  373. #if JUCE_USE_VDSP_FRAMEWORK
  374. vDSP_vadd (src, 1, dest, 1, dest, 1, (vDSP_Length) num);
  375. #else
  376. JUCE_PERFORM_VEC_OP_SRC_DEST (dest[i] += src[i], Mode::add (d, s), JUCE_LOAD_SRC_DEST, JUCE_INCREMENT_SRC_DEST, )
  377. #endif
  378. }
  379. void JUCE_CALLTYPE FloatVectorOperations::add (double* dest, const double* src, int num) noexcept
  380. {
  381. #if JUCE_USE_VDSP_FRAMEWORK
  382. vDSP_vaddD (src, 1, dest, 1, dest, 1, (vDSP_Length) num);
  383. #else
  384. JUCE_PERFORM_VEC_OP_SRC_DEST (dest[i] += src[i], Mode::add (d, s), JUCE_LOAD_SRC_DEST, JUCE_INCREMENT_SRC_DEST, )
  385. #endif
  386. }
  387. void JUCE_CALLTYPE FloatVectorOperations::subtract (float* dest, const float* src, int num) noexcept
  388. {
  389. #if JUCE_USE_VDSP_FRAMEWORK
  390. vDSP_vsub (src, 1, dest, 1, dest, 1, (vDSP_Length) num);
  391. #else
  392. JUCE_PERFORM_VEC_OP_SRC_DEST (dest[i] -= src[i], Mode::sub (d, s), JUCE_LOAD_SRC_DEST, JUCE_INCREMENT_SRC_DEST, )
  393. #endif
  394. }
  395. void JUCE_CALLTYPE FloatVectorOperations::subtract (double* dest, const double* src, int num) noexcept
  396. {
  397. #if JUCE_USE_VDSP_FRAMEWORK
  398. vDSP_vsubD (src, 1, dest, 1, dest, 1, (vDSP_Length) num);
  399. #else
  400. JUCE_PERFORM_VEC_OP_SRC_DEST (dest[i] -= src[i], Mode::sub (d, s), JUCE_LOAD_SRC_DEST, JUCE_INCREMENT_SRC_DEST, )
  401. #endif
  402. }
  403. void JUCE_CALLTYPE FloatVectorOperations::addWithMultiply (float* dest, const float* src, float multiplier, int num) noexcept
  404. {
  405. JUCE_PERFORM_VEC_OP_SRC_DEST (dest[i] += src[i] * multiplier, Mode::add (d, Mode::mul (mult, s)),
  406. JUCE_LOAD_SRC_DEST, JUCE_INCREMENT_SRC_DEST,
  407. const Mode::ParallelType mult = Mode::load1 (multiplier);)
  408. }
  409. void JUCE_CALLTYPE FloatVectorOperations::addWithMultiply (double* dest, const double* src, double multiplier, int num) noexcept
  410. {
  411. JUCE_PERFORM_VEC_OP_SRC_DEST (dest[i] += src[i] * multiplier, Mode::add (d, Mode::mul (mult, s)),
  412. JUCE_LOAD_SRC_DEST, JUCE_INCREMENT_SRC_DEST,
  413. const Mode::ParallelType mult = Mode::load1 (multiplier);)
  414. }
  415. void JUCE_CALLTYPE FloatVectorOperations::multiply (float* dest, const float* src, int num) noexcept
  416. {
  417. #if JUCE_USE_VDSP_FRAMEWORK
  418. vDSP_vmul (src, 1, dest, 1, dest, 1, (vDSP_Length) num);
  419. #else
  420. JUCE_PERFORM_VEC_OP_SRC_DEST (dest[i] *= src[i], Mode::mul (d, s), JUCE_LOAD_SRC_DEST, JUCE_INCREMENT_SRC_DEST, )
  421. #endif
  422. }
  423. void JUCE_CALLTYPE FloatVectorOperations::multiply (double* dest, const double* src, int num) noexcept
  424. {
  425. #if JUCE_USE_VDSP_FRAMEWORK
  426. vDSP_vmulD (src, 1, dest, 1, dest, 1, (vDSP_Length) num);
  427. #else
  428. JUCE_PERFORM_VEC_OP_SRC_DEST (dest[i] *= src[i], Mode::mul (d, s), JUCE_LOAD_SRC_DEST, JUCE_INCREMENT_SRC_DEST, )
  429. #endif
  430. }
  431. void JUCE_CALLTYPE FloatVectorOperations::multiply (float* dest, float multiplier, int num) noexcept
  432. {
  433. #if JUCE_USE_VDSP_FRAMEWORK
  434. vDSP_vsmul (dest, 1, &multiplier, dest, 1, (vDSP_Length) num);
  435. #else
  436. JUCE_PERFORM_VEC_OP_DEST (dest[i] *= multiplier, Mode::mul (d, mult), JUCE_LOAD_DEST,
  437. const Mode::ParallelType mult = Mode::load1 (multiplier);)
  438. #endif
  439. }
  440. void JUCE_CALLTYPE FloatVectorOperations::multiply (double* dest, double multiplier, int num) noexcept
  441. {
  442. #if JUCE_USE_VDSP_FRAMEWORK
  443. vDSP_vsmulD (dest, 1, &multiplier, dest, 1, (vDSP_Length) num);
  444. #else
  445. JUCE_PERFORM_VEC_OP_DEST (dest[i] *= multiplier, Mode::mul (d, mult), JUCE_LOAD_DEST,
  446. const Mode::ParallelType mult = Mode::load1 (multiplier);)
  447. #endif
  448. }
  449. void FloatVectorOperations::negate (float* dest, const float* src, int num) noexcept
  450. {
  451. #if JUCE_USE_VDSP_FRAMEWORK
  452. vDSP_vneg ((float*) src, 1, dest, 1, (vDSP_Length) num);
  453. #else
  454. copyWithMultiply (dest, src, -1.0f, num);
  455. #endif
  456. }
  457. void FloatVectorOperations::negate (double* dest, const double* src, int num) noexcept
  458. {
  459. #if JUCE_USE_VDSP_FRAMEWORK
  460. vDSP_vnegD ((double*) src, 1, dest, 1, (vDSP_Length) num);
  461. #else
  462. copyWithMultiply (dest, src, -1.0f, num);
  463. #endif
  464. }
  465. void JUCE_CALLTYPE FloatVectorOperations::convertFixedToFloat (float* dest, const int* src, float multiplier, int num) noexcept
  466. {
  467. #if JUCE_USE_ARM_NEON
  468. JUCE_PERFORM_VEC_OP_SRC_DEST (dest[i] = src[i] * multiplier,
  469. vmulq_n_f32 (vcvtq_f32_s32 (vld1q_s32 (src)), multiplier),
  470. JUCE_LOAD_NONE, JUCE_INCREMENT_SRC_DEST, )
  471. #else
  472. JUCE_PERFORM_VEC_OP_SRC_DEST (dest[i] = src[i] * multiplier,
  473. Mode::mul (mult, _mm_cvtepi32_ps (_mm_loadu_si128 ((const __m128i*) src))),
  474. JUCE_LOAD_NONE, JUCE_INCREMENT_SRC_DEST,
  475. const Mode::ParallelType mult = Mode::load1 (multiplier);)
  476. #endif
  477. }
  478. Range<float> JUCE_CALLTYPE FloatVectorOperations::findMinAndMax (const float* src, int num) noexcept
  479. {
  480. #if JUCE_USE_SSE_INTRINSICS || JUCE_USE_ARM_NEON
  481. return FloatVectorHelpers::MinMax<FloatVectorHelpers::BasicOps32>::findMinAndMax (src, num);
  482. #else
  483. return Range<float>::findMinAndMax (src, num);
  484. #endif
  485. }
  486. Range<double> JUCE_CALLTYPE FloatVectorOperations::findMinAndMax (const double* src, int num) noexcept
  487. {
  488. #if JUCE_USE_SSE_INTRINSICS || JUCE_USE_ARM_NEON
  489. return FloatVectorHelpers::MinMax<FloatVectorHelpers::BasicOps64>::findMinAndMax (src, num);
  490. #else
  491. return Range<double>::findMinAndMax (src, num);
  492. #endif
  493. }
  494. float JUCE_CALLTYPE FloatVectorOperations::findMinimum (const float* src, int num) noexcept
  495. {
  496. #if JUCE_USE_SSE_INTRINSICS || JUCE_USE_ARM_NEON
  497. return FloatVectorHelpers::MinMax<FloatVectorHelpers::BasicOps32>::findMinOrMax (src, num, true);
  498. #else
  499. return juce::findMinimum (src, num);
  500. #endif
  501. }
  502. double JUCE_CALLTYPE FloatVectorOperations::findMinimum (const double* src, int num) noexcept
  503. {
  504. #if JUCE_USE_SSE_INTRINSICS || JUCE_USE_ARM_NEON
  505. return FloatVectorHelpers::MinMax<FloatVectorHelpers::BasicOps64>::findMinOrMax (src, num, true);
  506. #else
  507. return juce::findMinimum (src, num);
  508. #endif
  509. }
  510. float JUCE_CALLTYPE FloatVectorOperations::findMaximum (const float* src, int num) noexcept
  511. {
  512. #if JUCE_USE_SSE_INTRINSICS || JUCE_USE_ARM_NEON
  513. return FloatVectorHelpers::MinMax<FloatVectorHelpers::BasicOps32>::findMinOrMax (src, num, false);
  514. #else
  515. return juce::findMaximum (src, num);
  516. #endif
  517. }
  518. double JUCE_CALLTYPE FloatVectorOperations::findMaximum (const double* src, int num) noexcept
  519. {
  520. #if JUCE_USE_SSE_INTRINSICS || JUCE_USE_ARM_NEON
  521. return FloatVectorHelpers::MinMax<FloatVectorHelpers::BasicOps64>::findMinOrMax (src, num, false);
  522. #else
  523. return juce::findMaximum (src, num);
  524. #endif
  525. }
  526. void JUCE_CALLTYPE FloatVectorOperations::enableFlushToZeroMode (bool shouldEnable) noexcept
  527. {
  528. #if JUCE_USE_SSE_INTRINSICS
  529. if (FloatVectorHelpers::isSSE2Available())
  530. _MM_SET_FLUSH_ZERO_MODE (shouldEnable ? _MM_FLUSH_ZERO_ON : _MM_FLUSH_ZERO_OFF);
  531. #endif
  532. (void) shouldEnable;
  533. }
  534. //==============================================================================
  535. //==============================================================================
  536. #if JUCE_UNIT_TESTS
  537. class FloatVectorOperationsTests : public UnitTest
  538. {
  539. public:
  540. FloatVectorOperationsTests() : UnitTest ("FloatVectorOperations") {}
  541. template <typename ValueType>
  542. struct TestRunner
  543. {
  544. static void runTest (UnitTest& u, Random random)
  545. {
  546. const int range = random.nextBool() ? 500 : 10;
  547. const int num = random.nextInt (range) + 1;
  548. HeapBlock<ValueType> buffer1 ((size_t) num + 16), buffer2 ((size_t) num + 16);
  549. HeapBlock<int> buffer3 ((size_t) num + 16);
  550. #if JUCE_ARM
  551. ValueType* const data1 = buffer1;
  552. ValueType* const data2 = buffer2;
  553. int* const int1 = buffer3;
  554. #else
  555. ValueType* const data1 = addBytesToPointer (buffer1.getData(), random.nextInt (16));
  556. ValueType* const data2 = addBytesToPointer (buffer2.getData(), random.nextInt (16));
  557. int* const int1 = addBytesToPointer (buffer3.getData(), random.nextInt (16));
  558. #endif
  559. fillRandomly (random, data1, num);
  560. fillRandomly (random, data2, num);
  561. Range<ValueType> minMax1 (FloatVectorOperations::findMinAndMax (data1, num));
  562. Range<ValueType> minMax2 (Range<ValueType>::findMinAndMax (data1, num));
  563. u.expect (minMax1 == minMax2);
  564. u.expect (valuesMatch (FloatVectorOperations::findMinimum (data1, num), juce::findMinimum (data1, num)));
  565. u.expect (valuesMatch (FloatVectorOperations::findMaximum (data1, num), juce::findMaximum (data1, num)));
  566. u.expect (valuesMatch (FloatVectorOperations::findMinimum (data2, num), juce::findMinimum (data2, num)));
  567. u.expect (valuesMatch (FloatVectorOperations::findMaximum (data2, num), juce::findMaximum (data2, num)));
  568. FloatVectorOperations::clear (data1, num);
  569. u.expect (areAllValuesEqual (data1, num, 0));
  570. FloatVectorOperations::fill (data1, (ValueType) 2, num);
  571. u.expect (areAllValuesEqual (data1, num, (ValueType) 2));
  572. FloatVectorOperations::add (data1, (ValueType) 2, num);
  573. u.expect (areAllValuesEqual (data1, num, (ValueType) 4));
  574. FloatVectorOperations::copy (data2, data1, num);
  575. u.expect (areAllValuesEqual (data2, num, (ValueType) 4));
  576. FloatVectorOperations::add (data2, data1, num);
  577. u.expect (areAllValuesEqual (data2, num, (ValueType) 8));
  578. FloatVectorOperations::copyWithMultiply (data2, data1, (ValueType) 4, num);
  579. u.expect (areAllValuesEqual (data2, num, (ValueType) 16));
  580. FloatVectorOperations::addWithMultiply (data2, data1, (ValueType) 4, num);
  581. u.expect (areAllValuesEqual (data2, num, (ValueType) 32));
  582. FloatVectorOperations::multiply (data1, (ValueType) 2, num);
  583. u.expect (areAllValuesEqual (data1, num, (ValueType) 8));
  584. FloatVectorOperations::multiply (data1, data2, num);
  585. u.expect (areAllValuesEqual (data1, num, (ValueType) 256));
  586. FloatVectorOperations::negate (data2, data1, num);
  587. u.expect (areAllValuesEqual (data2, num, (ValueType) -256));
  588. FloatVectorOperations::subtract (data1, data2, num);
  589. u.expect (areAllValuesEqual (data1, num, (ValueType) 512));
  590. fillRandomly (random, int1, num);
  591. doConversionTest (u, data1, data2, int1, num);
  592. }
  593. static void doConversionTest (UnitTest& u, float* data1, float* data2, int* const int1, int num)
  594. {
  595. FloatVectorOperations::convertFixedToFloat (data1, int1, 2.0f, num);
  596. convertFixed (data2, int1, 2.0f, num);
  597. u.expect (buffersMatch (data1, data2, num));
  598. }
  599. static void doConversionTest (UnitTest&, double*, double*, int*, int) {}
  600. static void fillRandomly (Random& random, ValueType* d, int num)
  601. {
  602. while (--num >= 0)
  603. *d++ = (ValueType) (random.nextDouble() * 1000.0);
  604. }
  605. static void fillRandomly (Random& random, int* d, int num)
  606. {
  607. while (--num >= 0)
  608. *d++ = random.nextInt();
  609. }
  610. static void convertFixed (float* d, const int* s, ValueType multiplier, int num)
  611. {
  612. while (--num >= 0)
  613. *d++ = *s++ * multiplier;
  614. }
  615. static bool areAllValuesEqual (const ValueType* d, int num, ValueType target)
  616. {
  617. while (--num >= 0)
  618. if (*d++ != target)
  619. return false;
  620. return true;
  621. }
  622. static bool buffersMatch (const ValueType* d1, const ValueType* d2, int num)
  623. {
  624. while (--num >= 0)
  625. if (! valuesMatch (*d1++, *d2++))
  626. return false;
  627. return true;
  628. }
  629. static bool valuesMatch (ValueType v1, ValueType v2)
  630. {
  631. return std::abs (v1 - v2) < std::numeric_limits<ValueType>::epsilon();
  632. }
  633. };
  634. void runTest()
  635. {
  636. beginTest ("FloatVectorOperations");
  637. for (int i = 1000; --i >= 0;)
  638. {
  639. TestRunner<float>::runTest (*this, getRandom());
  640. TestRunner<double>::runTest (*this, getRandom());
  641. }
  642. }
  643. };
  644. static FloatVectorOperationsTests vectorOpTests;
  645. #endif