Audio plugin host https://kx.studio/carla
You can not select more than 25 topics Topics must start with a letter or number, can include dashes ('-') and can be up to 35 characters long.

894 lines
37KB

  1. /*
  2. ==============================================================================
  3. This file is part of the JUCE library.
  4. Copyright (c) 2013 - Raw Material Software Ltd.
  5. Permission is granted to use this software under the terms of either:
  6. a) the GPL v2 (or any later version)
  7. b) the Affero GPL v3
  8. Details of these licenses can be found at: www.gnu.org/licenses
  9. JUCE is distributed in the hope that it will be useful, but WITHOUT ANY
  10. WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS FOR
  11. A PARTICULAR PURPOSE. See the GNU General Public License for more details.
  12. ------------------------------------------------------------------------------
  13. To release a closed-source product which uses JUCE, commercial licenses are
  14. available: visit www.juce.com for more information.
  15. ==============================================================================
  16. */
  17. namespace FloatVectorHelpers
  18. {
  19. #define JUCE_INCREMENT_SRC_DEST dest += (16 / sizeof (*dest)); src += (16 / sizeof (*dest));
  20. #define JUCE_INCREMENT_SRC1_SRC2_DEST dest += (16 / sizeof (*dest)); src1 += (16 / sizeof (*dest)); src2 += (16 / sizeof (*dest));
  21. #define JUCE_INCREMENT_DEST dest += (16 / sizeof (*dest));
  22. #if JUCE_USE_SSE_INTRINSICS
  23. static bool sse2Present = false;
  24. static bool isSSE2Available() noexcept
  25. {
  26. if (sse2Present)
  27. return true;
  28. sse2Present = SystemStats::hasSSE2();
  29. return sse2Present;
  30. }
  31. inline static bool isAligned (const void* p) noexcept
  32. {
  33. return (((pointer_sized_int) p) & 15) == 0;
  34. }
  35. struct BasicOps32
  36. {
  37. typedef float Type;
  38. typedef __m128 ParallelType;
  39. enum { numParallel = 4 };
  40. static forcedinline ParallelType load1 (Type v) noexcept { return _mm_load1_ps (&v); }
  41. static forcedinline ParallelType loadA (const Type* v) noexcept { return _mm_load_ps (v); }
  42. static forcedinline ParallelType loadU (const Type* v) noexcept { return _mm_loadu_ps (v); }
  43. static forcedinline void storeA (Type* dest, ParallelType a) noexcept { _mm_store_ps (dest, a); }
  44. static forcedinline void storeU (Type* dest, ParallelType a) noexcept { _mm_storeu_ps (dest, a); }
  45. static forcedinline ParallelType add (ParallelType a, ParallelType b) noexcept { return _mm_add_ps (a, b); }
  46. static forcedinline ParallelType sub (ParallelType a, ParallelType b) noexcept { return _mm_sub_ps (a, b); }
  47. static forcedinline ParallelType mul (ParallelType a, ParallelType b) noexcept { return _mm_mul_ps (a, b); }
  48. static forcedinline ParallelType max (ParallelType a, ParallelType b) noexcept { return _mm_max_ps (a, b); }
  49. static forcedinline ParallelType min (ParallelType a, ParallelType b) noexcept { return _mm_min_ps (a, b); }
  50. static forcedinline Type max (ParallelType a) noexcept { Type v[numParallel]; storeU (v, a); return jmax (v[0], v[1], v[2], v[3]); }
  51. static forcedinline Type min (ParallelType a) noexcept { Type v[numParallel]; storeU (v, a); return jmin (v[0], v[1], v[2], v[3]); }
  52. };
  53. struct BasicOps64
  54. {
  55. typedef double Type;
  56. typedef __m128d ParallelType;
  57. enum { numParallel = 2 };
  58. static forcedinline ParallelType load1 (Type v) noexcept { return _mm_load1_pd (&v); }
  59. static forcedinline ParallelType loadA (const Type* v) noexcept { return _mm_load_pd (v); }
  60. static forcedinline ParallelType loadU (const Type* v) noexcept { return _mm_loadu_pd (v); }
  61. static forcedinline void storeA (Type* dest, ParallelType a) noexcept { _mm_store_pd (dest, a); }
  62. static forcedinline void storeU (Type* dest, ParallelType a) noexcept { _mm_storeu_pd (dest, a); }
  63. static forcedinline ParallelType add (ParallelType a, ParallelType b) noexcept { return _mm_add_pd (a, b); }
  64. static forcedinline ParallelType sub (ParallelType a, ParallelType b) noexcept { return _mm_sub_pd (a, b); }
  65. static forcedinline ParallelType mul (ParallelType a, ParallelType b) noexcept { return _mm_mul_pd (a, b); }
  66. static forcedinline ParallelType max (ParallelType a, ParallelType b) noexcept { return _mm_max_pd (a, b); }
  67. static forcedinline ParallelType min (ParallelType a, ParallelType b) noexcept { return _mm_min_pd (a, b); }
  68. static forcedinline Type max (ParallelType a) noexcept { Type v[numParallel]; storeU (v, a); return jmax (v[0], v[1]); }
  69. static forcedinline Type min (ParallelType a) noexcept { Type v[numParallel]; storeU (v, a); return jmin (v[0], v[1]); }
  70. };
  71. #define JUCE_BEGIN_VEC_OP \
  72. typedef FloatVectorHelpers::ModeType<sizeof(*dest)>::Mode Mode; \
  73. if (FloatVectorHelpers::isSSE2Available()) \
  74. { \
  75. const int numLongOps = num / Mode::numParallel;
  76. #define JUCE_FINISH_VEC_OP(normalOp) \
  77. num &= (Mode::numParallel - 1); \
  78. if (num == 0) return; \
  79. } \
  80. for (int i = 0; i < num; ++i) normalOp;
  81. #define JUCE_PERFORM_VEC_OP_DEST(normalOp, vecOp, locals, setupOp) \
  82. JUCE_BEGIN_VEC_OP \
  83. setupOp \
  84. if (FloatVectorHelpers::isAligned (dest)) JUCE_VEC_LOOP (vecOp, dummy, Mode::loadA, Mode::storeA, locals, JUCE_INCREMENT_DEST) \
  85. else JUCE_VEC_LOOP (vecOp, dummy, Mode::loadU, Mode::storeU, locals, JUCE_INCREMENT_DEST) \
  86. JUCE_FINISH_VEC_OP (normalOp)
  87. #define JUCE_PERFORM_VEC_OP_SRC_DEST(normalOp, vecOp, locals, increment, setupOp) \
  88. JUCE_BEGIN_VEC_OP \
  89. setupOp \
  90. if (FloatVectorHelpers::isAligned (dest)) \
  91. { \
  92. if (FloatVectorHelpers::isAligned (src)) JUCE_VEC_LOOP (vecOp, Mode::loadA, Mode::loadA, Mode::storeA, locals, increment) \
  93. else JUCE_VEC_LOOP (vecOp, Mode::loadU, Mode::loadA, Mode::storeA, locals, increment) \
  94. }\
  95. else \
  96. { \
  97. if (FloatVectorHelpers::isAligned (src)) JUCE_VEC_LOOP (vecOp, Mode::loadA, Mode::loadU, Mode::storeU, locals, increment) \
  98. else JUCE_VEC_LOOP (vecOp, Mode::loadU, Mode::loadU, Mode::storeU, locals, increment) \
  99. } \
  100. JUCE_FINISH_VEC_OP (normalOp)
  101. #define JUCE_PERFORM_VEC_OP_SRC1_SRC2_DEST(normalOp, vecOp, locals, increment, setupOp) \
  102. JUCE_BEGIN_VEC_OP \
  103. setupOp \
  104. { \
  105. Mode::ParallelType (&loadSrc1) (const Mode::Type* v) = FloatVectorHelpers::isAligned (src1) ? Mode::loadA : Mode::loadU; \
  106. Mode::ParallelType (&loadSrc2) (const Mode::Type* v) = FloatVectorHelpers::isAligned (src2) ? Mode::loadA : Mode::loadU; \
  107. void (&storeDst) (Mode::Type* dest, Mode::ParallelType a) = FloatVectorHelpers::isAligned (dest) ? Mode::storeA : Mode::storeU; \
  108. JUCE_VEC_LOOP_TWO_SOURCES (vecOp, loadSrc1, loadSrc2, storeDst, locals, increment); \
  109. } \
  110. JUCE_FINISH_VEC_OP (normalOp)
  111. //==============================================================================
  112. #elif JUCE_USE_ARM_NEON
  113. struct BasicOps32
  114. {
  115. typedef float Type;
  116. typedef float32x4_t ParallelType;
  117. enum { numParallel = 4 };
  118. static forcedinline ParallelType load1 (Type v) noexcept { return vld1q_dup_f32 (&v); }
  119. static forcedinline ParallelType loadA (const Type* v) noexcept { return vld1q_f32 (v); }
  120. static forcedinline ParallelType loadU (const Type* v) noexcept { return vld1q_f32 (v); }
  121. static forcedinline void storeA (Type* dest, ParallelType a) noexcept { vst1q_f32 (dest, a); }
  122. static forcedinline void storeU (Type* dest, ParallelType a) noexcept { vst1q_f32 (dest, a); }
  123. static forcedinline ParallelType add (ParallelType a, ParallelType b) noexcept { return vaddq_f32 (a, b); }
  124. static forcedinline ParallelType sub (ParallelType a, ParallelType b) noexcept { return vsubq_f32 (a, b); }
  125. static forcedinline ParallelType mul (ParallelType a, ParallelType b) noexcept { return vmulq_f32 (a, b); }
  126. static forcedinline ParallelType max (ParallelType a, ParallelType b) noexcept { return vmaxq_f32 (a, b); }
  127. static forcedinline ParallelType min (ParallelType a, ParallelType b) noexcept { return vminq_f32 (a, b); }
  128. static forcedinline Type max (ParallelType a) noexcept { Type v[numParallel]; storeU (v, a); return jmax (v[0], v[1], v[2], v[3]); }
  129. static forcedinline Type min (ParallelType a) noexcept { Type v[numParallel]; storeU (v, a); return jmin (v[0], v[1], v[2], v[3]); }
  130. };
  131. struct BasicOps64
  132. {
  133. typedef double Type;
  134. typedef double ParallelType;
  135. enum { numParallel = 1 };
  136. static forcedinline ParallelType load1 (Type v) noexcept { return v; }
  137. static forcedinline ParallelType loadA (const Type* v) noexcept { return *v; }
  138. static forcedinline ParallelType loadU (const Type* v) noexcept { return *v; }
  139. static forcedinline void storeA (Type* dest, ParallelType a) noexcept { *dest = a; }
  140. static forcedinline void storeU (Type* dest, ParallelType a) noexcept { *dest = a; }
  141. static forcedinline ParallelType add (ParallelType a, ParallelType b) noexcept { return a + b; }
  142. static forcedinline ParallelType sub (ParallelType a, ParallelType b) noexcept { return a - b; }
  143. static forcedinline ParallelType mul (ParallelType a, ParallelType b) noexcept { return a * b; }
  144. static forcedinline ParallelType max (ParallelType a, ParallelType b) noexcept { return jmax (a, b); }
  145. static forcedinline ParallelType min (ParallelType a, ParallelType b) noexcept { return jmin (a, b); }
  146. static forcedinline Type max (ParallelType a) noexcept { return a; }
  147. static forcedinline Type min (ParallelType a) noexcept { return a; }
  148. };
  149. #define JUCE_BEGIN_VEC_OP \
  150. typedef FloatVectorHelpers::ModeType<sizeof(*dest)>::Mode Mode; \
  151. if (Mode::numParallel > 1) \
  152. { \
  153. const int numLongOps = num / Mode::numParallel;
  154. #define JUCE_FINISH_VEC_OP(normalOp) \
  155. num &= (Mode::numParallel - 1); \
  156. if (num == 0) return; \
  157. } \
  158. for (int i = 0; i < num; ++i) normalOp;
  159. #define JUCE_PERFORM_VEC_OP_DEST(normalOp, vecOp, locals, setupOp) \
  160. JUCE_BEGIN_VEC_OP \
  161. setupOp \
  162. JUCE_VEC_LOOP (vecOp, dummy, Mode::loadU, Mode::storeU, locals, JUCE_INCREMENT_DEST) \
  163. JUCE_FINISH_VEC_OP (normalOp)
  164. #define JUCE_PERFORM_VEC_OP_SRC_DEST(normalOp, vecOp, locals, increment, setupOp) \
  165. JUCE_BEGIN_VEC_OP \
  166. setupOp \
  167. JUCE_VEC_LOOP (vecOp, Mode::loadU, Mode::loadU, Mode::storeU, locals, increment) \
  168. JUCE_FINISH_VEC_OP (normalOp)
  169. #define JUCE_PERFORM_VEC_OP_SRC1_SRC2_DEST(normalOp, vecOp, locals, increment, setupOp) \
  170. JUCE_BEGIN_VEC_OP \
  171. setupOp \
  172. JUCE_VEC_LOOP_TWO_SOURCES (vecOp, Mode::loadU, Mode::loadU, Mode::storeU, locals, increment) \
  173. JUCE_FINISH_VEC_OP (normalOp)
  174. //==============================================================================
  175. #else
  176. #define JUCE_PERFORM_VEC_OP_DEST(normalOp, vecOp, locals, setupOp) \
  177. for (int i = 0; i < num; ++i) normalOp;
  178. #define JUCE_PERFORM_VEC_OP_SRC_DEST(normalOp, vecOp, locals, increment, setupOp) \
  179. for (int i = 0; i < num; ++i) normalOp;
  180. #define JUCE_PERFORM_VEC_OP_SRC1_SRC2_DEST(normalOp, vecOp, locals, increment, setupOp) \
  181. for (int i = 0; i < num; ++i) normalOp;
  182. #endif
  183. //==============================================================================
  184. #define JUCE_VEC_LOOP(vecOp, srcLoad, dstLoad, dstStore, locals, increment) \
  185. for (int i = 0; i < numLongOps; ++i) \
  186. { \
  187. locals (srcLoad, dstLoad); \
  188. dstStore (dest, vecOp); \
  189. increment; \
  190. }
  191. #define JUCE_VEC_LOOP_TWO_SOURCES(vecOp, src1Load, src2Load, dstStore, locals, increment) \
  192. for (int i = 0; i < numLongOps; ++i) \
  193. { \
  194. locals (src1Load, src2Load); \
  195. dstStore (dest, vecOp); \
  196. increment; \
  197. }
  198. #define JUCE_LOAD_NONE(srcLoad, dstLoad)
  199. #define JUCE_LOAD_DEST(srcLoad, dstLoad) const Mode::ParallelType d = dstLoad (dest);
  200. #define JUCE_LOAD_SRC(srcLoad, dstLoad) const Mode::ParallelType s = srcLoad (src);
  201. #define JUCE_LOAD_SRC1_SRC2(src1Load, src2Load) const Mode::ParallelType s1 = src1Load (src1), s2 = src2Load (src2);
  202. #define JUCE_LOAD_SRC_DEST(srcLoad, dstLoad) const Mode::ParallelType d = dstLoad (dest), s = srcLoad (src);
  203. #if JUCE_USE_SSE_INTRINSICS || JUCE_USE_ARM_NEON
  204. template<int typeSize> struct ModeType { typedef BasicOps32 Mode; };
  205. template<> struct ModeType<8> { typedef BasicOps64 Mode; };
  206. template <typename Mode>
  207. struct MinMax
  208. {
  209. typedef typename Mode::Type Type;
  210. typedef typename Mode::ParallelType ParallelType;
  211. static Type findMinOrMax (const Type* src, int num, const bool isMinimum) noexcept
  212. {
  213. int numLongOps = num / Mode::numParallel;
  214. #if JUCE_USE_SSE_INTRINSICS
  215. if (numLongOps > 1 && isSSE2Available())
  216. #else
  217. if (numLongOps > 1)
  218. #endif
  219. {
  220. ParallelType val;
  221. #if ! JUCE_USE_ARM_NEON
  222. if (isAligned (src))
  223. {
  224. val = Mode::loadA (src);
  225. if (isMinimum)
  226. {
  227. while (--numLongOps > 0)
  228. {
  229. src += Mode::numParallel;
  230. val = Mode::min (val, Mode::loadA (src));
  231. }
  232. }
  233. else
  234. {
  235. while (--numLongOps > 0)
  236. {
  237. src += Mode::numParallel;
  238. val = Mode::max (val, Mode::loadA (src));
  239. }
  240. }
  241. }
  242. else
  243. #endif
  244. {
  245. val = Mode::loadU (src);
  246. if (isMinimum)
  247. {
  248. while (--numLongOps > 0)
  249. {
  250. src += Mode::numParallel;
  251. val = Mode::min (val, Mode::loadU (src));
  252. }
  253. }
  254. else
  255. {
  256. while (--numLongOps > 0)
  257. {
  258. src += Mode::numParallel;
  259. val = Mode::max (val, Mode::loadU (src));
  260. }
  261. }
  262. }
  263. Type result = isMinimum ? Mode::min (val)
  264. : Mode::max (val);
  265. num &= (Mode::numParallel - 1);
  266. src += Mode::numParallel;
  267. for (int i = 0; i < num; ++i)
  268. result = isMinimum ? jmin (result, src[i])
  269. : jmax (result, src[i]);
  270. return result;
  271. }
  272. return isMinimum ? juce::findMinimum (src, num)
  273. : juce::findMaximum (src, num);
  274. }
  275. static Range<Type> findMinAndMax (const Type* src, int num) noexcept
  276. {
  277. int numLongOps = num / Mode::numParallel;
  278. #if JUCE_USE_SSE_INTRINSICS
  279. if (numLongOps > 1 && isSSE2Available())
  280. #else
  281. if (numLongOps > 1)
  282. #endif
  283. {
  284. ParallelType mn, mx;
  285. #if ! JUCE_USE_ARM_NEON
  286. if (isAligned (src))
  287. {
  288. mn = Mode::loadA (src);
  289. mx = mn;
  290. while (--numLongOps > 0)
  291. {
  292. src += Mode::numParallel;
  293. const ParallelType v = Mode::loadA (src);
  294. mn = Mode::min (mn, v);
  295. mx = Mode::max (mx, v);
  296. }
  297. }
  298. else
  299. #endif
  300. {
  301. mn = Mode::loadU (src);
  302. mx = mn;
  303. while (--numLongOps > 0)
  304. {
  305. src += Mode::numParallel;
  306. const ParallelType v = Mode::loadU (src);
  307. mn = Mode::min (mn, v);
  308. mx = Mode::max (mx, v);
  309. }
  310. }
  311. Range<Type> result (Mode::min (mn),
  312. Mode::max (mx));
  313. num &= (Mode::numParallel - 1);
  314. src += Mode::numParallel;
  315. for (int i = 0; i < num; ++i)
  316. result = result.getUnionWith (src[i]);
  317. return result;
  318. }
  319. return Range<Type>::findMinAndMax (src, num);
  320. }
  321. };
  322. #endif
  323. }
  324. //==============================================================================
  325. void JUCE_CALLTYPE FloatVectorOperations::clear (float* dest, int num) noexcept
  326. {
  327. #if JUCE_USE_VDSP_FRAMEWORK
  328. vDSP_vclr (dest, 1, (size_t) num);
  329. #else
  330. zeromem (dest, num * sizeof (float));
  331. #endif
  332. }
  333. void JUCE_CALLTYPE FloatVectorOperations::clear (double* dest, int num) noexcept
  334. {
  335. #if JUCE_USE_VDSP_FRAMEWORK
  336. vDSP_vclrD (dest, 1, (size_t) num);
  337. #else
  338. zeromem (dest, num * sizeof (double));
  339. #endif
  340. }
  341. void JUCE_CALLTYPE FloatVectorOperations::fill (float* dest, float valueToFill, int num) noexcept
  342. {
  343. #if JUCE_USE_VDSP_FRAMEWORK
  344. vDSP_vfill (&valueToFill, dest, 1, (size_t) num);
  345. #else
  346. JUCE_PERFORM_VEC_OP_DEST (dest[i] = valueToFill, val, JUCE_LOAD_NONE,
  347. const Mode::ParallelType val = Mode::load1 (valueToFill);)
  348. #endif
  349. }
  350. void JUCE_CALLTYPE FloatVectorOperations::fill (double* dest, double valueToFill, int num) noexcept
  351. {
  352. #if JUCE_USE_VDSP_FRAMEWORK
  353. vDSP_vfillD (&valueToFill, dest, 1, (size_t) num);
  354. #else
  355. JUCE_PERFORM_VEC_OP_DEST (dest[i] = valueToFill, val, JUCE_LOAD_NONE,
  356. const Mode::ParallelType val = Mode::load1 (valueToFill);)
  357. #endif
  358. }
  359. void JUCE_CALLTYPE FloatVectorOperations::copy (float* dest, const float* src, int num) noexcept
  360. {
  361. memcpy (dest, src, (size_t) num * sizeof (float));
  362. }
  363. void JUCE_CALLTYPE FloatVectorOperations::copy (double* dest, const double* src, int num) noexcept
  364. {
  365. memcpy (dest, src, (size_t) num * sizeof (double));
  366. }
  367. void JUCE_CALLTYPE FloatVectorOperations::copyWithMultiply (float* dest, const float* src, float multiplier, int num) noexcept
  368. {
  369. #if JUCE_USE_VDSP_FRAMEWORK
  370. vDSP_vsmul (src, 1, &multiplier, dest, 1, (vDSP_Length) num);
  371. #else
  372. JUCE_PERFORM_VEC_OP_SRC_DEST (dest[i] = src[i] * multiplier, Mode::mul (mult, s),
  373. JUCE_LOAD_SRC, JUCE_INCREMENT_SRC_DEST,
  374. const Mode::ParallelType mult = Mode::load1 (multiplier);)
  375. #endif
  376. }
  377. void JUCE_CALLTYPE FloatVectorOperations::copyWithMultiply (double* dest, const double* src, double multiplier, int num) noexcept
  378. {
  379. #if JUCE_USE_VDSP_FRAMEWORK
  380. vDSP_vsmulD (src, 1, &multiplier, dest, 1, (vDSP_Length) num);
  381. #else
  382. JUCE_PERFORM_VEC_OP_SRC_DEST (dest[i] = src[i] * multiplier, Mode::mul (mult, s),
  383. JUCE_LOAD_SRC, JUCE_INCREMENT_SRC_DEST,
  384. const Mode::ParallelType mult = Mode::load1 (multiplier);)
  385. #endif
  386. }
  387. void JUCE_CALLTYPE FloatVectorOperations::add (float* dest, float amount, int num) noexcept
  388. {
  389. JUCE_PERFORM_VEC_OP_DEST (dest[i] += amount, Mode::add (d, amountToAdd), JUCE_LOAD_DEST,
  390. const Mode::ParallelType amountToAdd = Mode::load1 (amount);)
  391. }
  392. void JUCE_CALLTYPE FloatVectorOperations::add (double* dest, double amount, int num) noexcept
  393. {
  394. JUCE_PERFORM_VEC_OP_DEST (dest[i] += amount, Mode::add (d, amountToAdd), JUCE_LOAD_DEST,
  395. const Mode::ParallelType amountToAdd = Mode::load1 (amount);)
  396. }
  397. void JUCE_CALLTYPE FloatVectorOperations::add (float* dest, float* src, float amount, int num) noexcept
  398. {
  399. #if JUCE_USE_VDSP_FRAMEWORK
  400. vDSP_vsadd (src, 1, &amount, dest, 1, (vDSP_Length) num);
  401. #else
  402. JUCE_PERFORM_VEC_OP_SRC_DEST (dest[i] = src[i] + amount, Mode::add (am, s),
  403. JUCE_LOAD_SRC, JUCE_INCREMENT_SRC_DEST,
  404. const Mode::ParallelType am = Mode::load1 (amount);)
  405. #endif
  406. }
  407. void JUCE_CALLTYPE FloatVectorOperations::add (double* dest, double* src, double amount, int num) noexcept
  408. {
  409. #if JUCE_USE_VDSP_FRAMEWORK
  410. vDSP_vsaddD (src, 1, &amount, dest, 1, (vDSP_Length) num);
  411. #else
  412. JUCE_PERFORM_VEC_OP_SRC_DEST (dest[i] = src[i] + amount, Mode::add (am, s),
  413. JUCE_LOAD_SRC, JUCE_INCREMENT_SRC_DEST,
  414. const Mode::ParallelType am = Mode::load1 (amount);)
  415. #endif
  416. }
  417. void JUCE_CALLTYPE FloatVectorOperations::add (float* dest, const float* src, int num) noexcept
  418. {
  419. #if JUCE_USE_VDSP_FRAMEWORK
  420. vDSP_vadd (src, 1, dest, 1, dest, 1, (vDSP_Length) num);
  421. #else
  422. JUCE_PERFORM_VEC_OP_SRC_DEST (dest[i] += src[i], Mode::add (d, s), JUCE_LOAD_SRC_DEST, JUCE_INCREMENT_SRC_DEST, )
  423. #endif
  424. }
  425. void JUCE_CALLTYPE FloatVectorOperations::add (double* dest, const double* src, int num) noexcept
  426. {
  427. #if JUCE_USE_VDSP_FRAMEWORK
  428. vDSP_vaddD (src, 1, dest, 1, dest, 1, (vDSP_Length) num);
  429. #else
  430. JUCE_PERFORM_VEC_OP_SRC_DEST (dest[i] += src[i], Mode::add (d, s), JUCE_LOAD_SRC_DEST, JUCE_INCREMENT_SRC_DEST, )
  431. #endif
  432. }
  433. void JUCE_CALLTYPE FloatVectorOperations::add (float* dest, const float* src1, const float* src2, int num) noexcept
  434. {
  435. #if JUCE_USE_VDSP_FRAMEWORK
  436. vDSP_vadd (src1, 1, src2, 1, dest, 1, (vDSP_Length) num);
  437. #else
  438. JUCE_PERFORM_VEC_OP_SRC1_SRC2_DEST (dest[i] = src1[i] + src2[i], Mode::add (s1, s2), JUCE_LOAD_SRC1_SRC2, JUCE_INCREMENT_SRC1_SRC2_DEST, )
  439. #endif
  440. }
  441. void JUCE_CALLTYPE FloatVectorOperations::add (double* dest, const double* src1, const double* src2, int num) noexcept
  442. {
  443. #if JUCE_USE_VDSP_FRAMEWORK
  444. vDSP_vaddD (src1, 1, src2, 1, dest, 1, (vDSP_Length) num);
  445. #else
  446. JUCE_PERFORM_VEC_OP_SRC1_SRC2_DEST (dest[i] = src1[i] + src2[i], Mode::add (s1, s2), JUCE_LOAD_SRC1_SRC2, JUCE_INCREMENT_SRC1_SRC2_DEST, )
  447. #endif
  448. }
  449. void JUCE_CALLTYPE FloatVectorOperations::subtract (float* dest, const float* src, int num) noexcept
  450. {
  451. #if JUCE_USE_VDSP_FRAMEWORK
  452. vDSP_vsub (src, 1, dest, 1, dest, 1, (vDSP_Length) num);
  453. #else
  454. JUCE_PERFORM_VEC_OP_SRC_DEST (dest[i] -= src[i], Mode::sub (d, s), JUCE_LOAD_SRC_DEST, JUCE_INCREMENT_SRC_DEST, )
  455. #endif
  456. }
  457. void JUCE_CALLTYPE FloatVectorOperations::subtract (double* dest, const double* src, int num) noexcept
  458. {
  459. #if JUCE_USE_VDSP_FRAMEWORK
  460. vDSP_vsubD (src, 1, dest, 1, dest, 1, (vDSP_Length) num);
  461. #else
  462. JUCE_PERFORM_VEC_OP_SRC_DEST (dest[i] -= src[i], Mode::sub (d, s), JUCE_LOAD_SRC_DEST, JUCE_INCREMENT_SRC_DEST, )
  463. #endif
  464. }
  465. void JUCE_CALLTYPE FloatVectorOperations::subtract (float* dest, const float* src1, const float* src2, int num) noexcept
  466. {
  467. #if JUCE_USE_VDSP_FRAMEWORK
  468. vDSP_vsub (src2, 1, src1, 1, dest, 1, (vDSP_Length) num);
  469. #else
  470. JUCE_PERFORM_VEC_OP_SRC1_SRC2_DEST (dest[i] = src1[i] - src2[i], Mode::sub (s1, s2), JUCE_LOAD_SRC1_SRC2, JUCE_INCREMENT_SRC1_SRC2_DEST, )
  471. #endif
  472. }
  473. void JUCE_CALLTYPE FloatVectorOperations::subtract (double* dest, const double* src1, const double* src2, int num) noexcept
  474. {
  475. #if JUCE_USE_VDSP_FRAMEWORK
  476. vDSP_vsubD (src2, 1, src1, 1, dest, 1, (vDSP_Length) num);
  477. #else
  478. JUCE_PERFORM_VEC_OP_SRC1_SRC2_DEST (dest[i] = src1[i] - src2[i], Mode::sub (s1, s2), JUCE_LOAD_SRC1_SRC2, JUCE_INCREMENT_SRC1_SRC2_DEST, )
  479. #endif
  480. }
  481. void JUCE_CALLTYPE FloatVectorOperations::addWithMultiply (float* dest, const float* src, float multiplier, int num) noexcept
  482. {
  483. JUCE_PERFORM_VEC_OP_SRC_DEST (dest[i] += src[i] * multiplier, Mode::add (d, Mode::mul (mult, s)),
  484. JUCE_LOAD_SRC_DEST, JUCE_INCREMENT_SRC_DEST,
  485. const Mode::ParallelType mult = Mode::load1 (multiplier);)
  486. }
  487. void JUCE_CALLTYPE FloatVectorOperations::addWithMultiply (double* dest, const double* src, double multiplier, int num) noexcept
  488. {
  489. JUCE_PERFORM_VEC_OP_SRC_DEST (dest[i] += src[i] * multiplier, Mode::add (d, Mode::mul (mult, s)),
  490. JUCE_LOAD_SRC_DEST, JUCE_INCREMENT_SRC_DEST,
  491. const Mode::ParallelType mult = Mode::load1 (multiplier);)
  492. }
  493. void JUCE_CALLTYPE FloatVectorOperations::multiply (float* dest, const float* src, int num) noexcept
  494. {
  495. #if JUCE_USE_VDSP_FRAMEWORK
  496. vDSP_vmul (src, 1, dest, 1, dest, 1, (vDSP_Length) num);
  497. #else
  498. JUCE_PERFORM_VEC_OP_SRC_DEST (dest[i] *= src[i], Mode::mul (d, s), JUCE_LOAD_SRC_DEST, JUCE_INCREMENT_SRC_DEST, )
  499. #endif
  500. }
  501. void JUCE_CALLTYPE FloatVectorOperations::multiply (double* dest, const double* src, int num) noexcept
  502. {
  503. #if JUCE_USE_VDSP_FRAMEWORK
  504. vDSP_vmulD (src, 1, dest, 1, dest, 1, (vDSP_Length) num);
  505. #else
  506. JUCE_PERFORM_VEC_OP_SRC_DEST (dest[i] *= src[i], Mode::mul (d, s), JUCE_LOAD_SRC_DEST, JUCE_INCREMENT_SRC_DEST, )
  507. #endif
  508. }
  509. void JUCE_CALLTYPE FloatVectorOperations::multiply (float* dest, const float* src1, const float* src2, int num) noexcept
  510. {
  511. #if JUCE_USE_VDSP_FRAMEWORK
  512. vDSP_vmul (src1, 1, src2, 1, dest, 1, (vDSP_Length) num);
  513. #else
  514. JUCE_PERFORM_VEC_OP_SRC1_SRC2_DEST (dest[i] = src1[i] * src2[i], Mode::mul (s1, s2), JUCE_LOAD_SRC1_SRC2, JUCE_INCREMENT_SRC1_SRC2_DEST, )
  515. #endif
  516. }
  517. void JUCE_CALLTYPE FloatVectorOperations::multiply (double* dest, const double* src1, const double* src2, int num) noexcept
  518. {
  519. #if JUCE_USE_VDSP_FRAMEWORK
  520. vDSP_vmulD (src1, 1, src2, 1, dest, 1, (vDSP_Length) num);
  521. #else
  522. JUCE_PERFORM_VEC_OP_SRC1_SRC2_DEST (dest[i] = src1[i] * src2[i], Mode::mul (s1, s2), JUCE_LOAD_SRC1_SRC2, JUCE_INCREMENT_SRC1_SRC2_DEST, )
  523. #endif
  524. }
  525. void JUCE_CALLTYPE FloatVectorOperations::multiply (float* dest, float multiplier, int num) noexcept
  526. {
  527. #if JUCE_USE_VDSP_FRAMEWORK
  528. vDSP_vsmul (dest, 1, &multiplier, dest, 1, (vDSP_Length) num);
  529. #else
  530. JUCE_PERFORM_VEC_OP_DEST (dest[i] *= multiplier, Mode::mul (d, mult), JUCE_LOAD_DEST,
  531. const Mode::ParallelType mult = Mode::load1 (multiplier);)
  532. #endif
  533. }
  534. void JUCE_CALLTYPE FloatVectorOperations::multiply (double* dest, double multiplier, int num) noexcept
  535. {
  536. #if JUCE_USE_VDSP_FRAMEWORK
  537. vDSP_vsmulD (dest, 1, &multiplier, dest, 1, (vDSP_Length) num);
  538. #else
  539. JUCE_PERFORM_VEC_OP_DEST (dest[i] *= multiplier, Mode::mul (d, mult), JUCE_LOAD_DEST,
  540. const Mode::ParallelType mult = Mode::load1 (multiplier);)
  541. #endif
  542. }
  543. void JUCE_CALLTYPE FloatVectorOperations::multiply (float* dest, const float* src, float multiplier, int num) noexcept
  544. {
  545. JUCE_PERFORM_VEC_OP_SRC_DEST (dest[i] = src[i] * multiplier, Mode::mul (mult, s),
  546. JUCE_LOAD_SRC, JUCE_INCREMENT_SRC_DEST,
  547. const Mode::ParallelType mult = Mode::load1 (multiplier);)
  548. }
  549. void JUCE_CALLTYPE FloatVectorOperations::multiply (double* dest, const double* src, double multiplier, int num) noexcept
  550. {
  551. JUCE_PERFORM_VEC_OP_SRC_DEST (dest[i] = src[i] * multiplier, Mode::mul (mult, s),
  552. JUCE_LOAD_SRC, JUCE_INCREMENT_SRC_DEST,
  553. const Mode::ParallelType mult = Mode::load1 (multiplier);)
  554. }
  555. void FloatVectorOperations::negate (float* dest, const float* src, int num) noexcept
  556. {
  557. #if JUCE_USE_VDSP_FRAMEWORK
  558. vDSP_vneg ((float*) src, 1, dest, 1, (vDSP_Length) num);
  559. #else
  560. copyWithMultiply (dest, src, -1.0f, num);
  561. #endif
  562. }
  563. void FloatVectorOperations::negate (double* dest, const double* src, int num) noexcept
  564. {
  565. #if JUCE_USE_VDSP_FRAMEWORK
  566. vDSP_vnegD ((double*) src, 1, dest, 1, (vDSP_Length) num);
  567. #else
  568. copyWithMultiply (dest, src, -1.0f, num);
  569. #endif
  570. }
  571. void JUCE_CALLTYPE FloatVectorOperations::convertFixedToFloat (float* dest, const int* src, float multiplier, int num) noexcept
  572. {
  573. #if JUCE_USE_ARM_NEON
  574. JUCE_PERFORM_VEC_OP_SRC_DEST (dest[i] = src[i] * multiplier,
  575. vmulq_n_f32 (vcvtq_f32_s32 (vld1q_s32 (src)), multiplier),
  576. JUCE_LOAD_NONE, JUCE_INCREMENT_SRC_DEST, )
  577. #else
  578. JUCE_PERFORM_VEC_OP_SRC_DEST (dest[i] = src[i] * multiplier,
  579. Mode::mul (mult, _mm_cvtepi32_ps (_mm_loadu_si128 ((const __m128i*) src))),
  580. JUCE_LOAD_NONE, JUCE_INCREMENT_SRC_DEST,
  581. const Mode::ParallelType mult = Mode::load1 (multiplier);)
  582. #endif
  583. }
  584. Range<float> JUCE_CALLTYPE FloatVectorOperations::findMinAndMax (const float* src, int num) noexcept
  585. {
  586. #if JUCE_USE_SSE_INTRINSICS || JUCE_USE_ARM_NEON
  587. return FloatVectorHelpers::MinMax<FloatVectorHelpers::BasicOps32>::findMinAndMax (src, num);
  588. #else
  589. return Range<float>::findMinAndMax (src, num);
  590. #endif
  591. }
  592. Range<double> JUCE_CALLTYPE FloatVectorOperations::findMinAndMax (const double* src, int num) noexcept
  593. {
  594. #if JUCE_USE_SSE_INTRINSICS || JUCE_USE_ARM_NEON
  595. return FloatVectorHelpers::MinMax<FloatVectorHelpers::BasicOps64>::findMinAndMax (src, num);
  596. #else
  597. return Range<double>::findMinAndMax (src, num);
  598. #endif
  599. }
  600. float JUCE_CALLTYPE FloatVectorOperations::findMinimum (const float* src, int num) noexcept
  601. {
  602. #if JUCE_USE_SSE_INTRINSICS || JUCE_USE_ARM_NEON
  603. return FloatVectorHelpers::MinMax<FloatVectorHelpers::BasicOps32>::findMinOrMax (src, num, true);
  604. #else
  605. return juce::findMinimum (src, num);
  606. #endif
  607. }
  608. double JUCE_CALLTYPE FloatVectorOperations::findMinimum (const double* src, int num) noexcept
  609. {
  610. #if JUCE_USE_SSE_INTRINSICS || JUCE_USE_ARM_NEON
  611. return FloatVectorHelpers::MinMax<FloatVectorHelpers::BasicOps64>::findMinOrMax (src, num, true);
  612. #else
  613. return juce::findMinimum (src, num);
  614. #endif
  615. }
  616. float JUCE_CALLTYPE FloatVectorOperations::findMaximum (const float* src, int num) noexcept
  617. {
  618. #if JUCE_USE_SSE_INTRINSICS || JUCE_USE_ARM_NEON
  619. return FloatVectorHelpers::MinMax<FloatVectorHelpers::BasicOps32>::findMinOrMax (src, num, false);
  620. #else
  621. return juce::findMaximum (src, num);
  622. #endif
  623. }
  624. double JUCE_CALLTYPE FloatVectorOperations::findMaximum (const double* src, int num) noexcept
  625. {
  626. #if JUCE_USE_SSE_INTRINSICS || JUCE_USE_ARM_NEON
  627. return FloatVectorHelpers::MinMax<FloatVectorHelpers::BasicOps64>::findMinOrMax (src, num, false);
  628. #else
  629. return juce::findMaximum (src, num);
  630. #endif
  631. }
  632. void JUCE_CALLTYPE FloatVectorOperations::enableFlushToZeroMode (bool shouldEnable) noexcept
  633. {
  634. #if JUCE_USE_SSE_INTRINSICS
  635. if (FloatVectorHelpers::isSSE2Available())
  636. _MM_SET_FLUSH_ZERO_MODE (shouldEnable ? _MM_FLUSH_ZERO_ON : _MM_FLUSH_ZERO_OFF);
  637. #endif
  638. (void) shouldEnable;
  639. }
  640. //==============================================================================
  641. //==============================================================================
  642. #if JUCE_UNIT_TESTS
  643. class FloatVectorOperationsTests : public UnitTest
  644. {
  645. public:
  646. FloatVectorOperationsTests() : UnitTest ("FloatVectorOperations") {}
  647. template <typename ValueType>
  648. struct TestRunner
  649. {
  650. static void runTest (UnitTest& u, Random random)
  651. {
  652. const int range = random.nextBool() ? 500 : 10;
  653. const int num = random.nextInt (range) + 1;
  654. HeapBlock<ValueType> buffer1 ((size_t) num + 16), buffer2 ((size_t) num + 16);
  655. HeapBlock<int> buffer3 ((size_t) num + 16);
  656. #if JUCE_ARM
  657. ValueType* const data1 = buffer1;
  658. ValueType* const data2 = buffer2;
  659. int* const int1 = buffer3;
  660. #else
  661. ValueType* const data1 = addBytesToPointer (buffer1.getData(), random.nextInt (16));
  662. ValueType* const data2 = addBytesToPointer (buffer2.getData(), random.nextInt (16));
  663. int* const int1 = addBytesToPointer (buffer3.getData(), random.nextInt (16));
  664. #endif
  665. fillRandomly (random, data1, num);
  666. fillRandomly (random, data2, num);
  667. Range<ValueType> minMax1 (FloatVectorOperations::findMinAndMax (data1, num));
  668. Range<ValueType> minMax2 (Range<ValueType>::findMinAndMax (data1, num));
  669. u.expect (minMax1 == minMax2);
  670. u.expect (valuesMatch (FloatVectorOperations::findMinimum (data1, num), juce::findMinimum (data1, num)));
  671. u.expect (valuesMatch (FloatVectorOperations::findMaximum (data1, num), juce::findMaximum (data1, num)));
  672. u.expect (valuesMatch (FloatVectorOperations::findMinimum (data2, num), juce::findMinimum (data2, num)));
  673. u.expect (valuesMatch (FloatVectorOperations::findMaximum (data2, num), juce::findMaximum (data2, num)));
  674. FloatVectorOperations::clear (data1, num);
  675. u.expect (areAllValuesEqual (data1, num, 0));
  676. FloatVectorOperations::fill (data1, (ValueType) 2, num);
  677. u.expect (areAllValuesEqual (data1, num, (ValueType) 2));
  678. FloatVectorOperations::add (data1, (ValueType) 2, num);
  679. u.expect (areAllValuesEqual (data1, num, (ValueType) 4));
  680. FloatVectorOperations::copy (data2, data1, num);
  681. u.expect (areAllValuesEqual (data2, num, (ValueType) 4));
  682. FloatVectorOperations::add (data2, data1, num);
  683. u.expect (areAllValuesEqual (data2, num, (ValueType) 8));
  684. FloatVectorOperations::copyWithMultiply (data2, data1, (ValueType) 4, num);
  685. u.expect (areAllValuesEqual (data2, num, (ValueType) 16));
  686. FloatVectorOperations::addWithMultiply (data2, data1, (ValueType) 4, num);
  687. u.expect (areAllValuesEqual (data2, num, (ValueType) 32));
  688. FloatVectorOperations::multiply (data1, (ValueType) 2, num);
  689. u.expect (areAllValuesEqual (data1, num, (ValueType) 8));
  690. FloatVectorOperations::multiply (data1, data2, num);
  691. u.expect (areAllValuesEqual (data1, num, (ValueType) 256));
  692. FloatVectorOperations::negate (data2, data1, num);
  693. u.expect (areAllValuesEqual (data2, num, (ValueType) -256));
  694. FloatVectorOperations::subtract (data1, data2, num);
  695. u.expect (areAllValuesEqual (data1, num, (ValueType) 512));
  696. fillRandomly (random, int1, num);
  697. doConversionTest (u, data1, data2, int1, num);
  698. }
  699. static void doConversionTest (UnitTest& u, float* data1, float* data2, int* const int1, int num)
  700. {
  701. FloatVectorOperations::convertFixedToFloat (data1, int1, 2.0f, num);
  702. convertFixed (data2, int1, 2.0f, num);
  703. u.expect (buffersMatch (data1, data2, num));
  704. }
  705. static void doConversionTest (UnitTest&, double*, double*, int*, int) {}
  706. static void fillRandomly (Random& random, ValueType* d, int num)
  707. {
  708. while (--num >= 0)
  709. *d++ = (ValueType) (random.nextDouble() * 1000.0);
  710. }
  711. static void fillRandomly (Random& random, int* d, int num)
  712. {
  713. while (--num >= 0)
  714. *d++ = random.nextInt();
  715. }
  716. static void convertFixed (float* d, const int* s, ValueType multiplier, int num)
  717. {
  718. while (--num >= 0)
  719. *d++ = *s++ * multiplier;
  720. }
  721. static bool areAllValuesEqual (const ValueType* d, int num, ValueType target)
  722. {
  723. while (--num >= 0)
  724. if (*d++ != target)
  725. return false;
  726. return true;
  727. }
  728. static bool buffersMatch (const ValueType* d1, const ValueType* d2, int num)
  729. {
  730. while (--num >= 0)
  731. if (! valuesMatch (*d1++, *d2++))
  732. return false;
  733. return true;
  734. }
  735. static bool valuesMatch (ValueType v1, ValueType v2)
  736. {
  737. return std::abs (v1 - v2) < std::numeric_limits<ValueType>::epsilon();
  738. }
  739. };
  740. void runTest()
  741. {
  742. beginTest ("FloatVectorOperations");
  743. for (int i = 1000; --i >= 0;)
  744. {
  745. TestRunner<float>::runTest (*this, getRandom());
  746. TestRunner<double>::runTest (*this, getRandom());
  747. }
  748. }
  749. };
  750. static FloatVectorOperationsTests vectorOpTests;
  751. #endif