Audio plugin host https://kx.studio/carla
You can not select more than 25 topics Topics must start with a letter or number, can include dashes ('-') and can be up to 35 characters long.

juce_FloatVectorOperations.cpp 31KB

10 years ago
10 years ago
10 years ago
10 years ago
10 years ago
10 years ago
10 years ago
10 years ago
10 years ago
10 years ago
10 years ago
10 years ago
10 years ago
10 years ago
10 years ago
10 years ago
10 years ago
10 years ago
10 years ago
10 years ago
10 years ago
10 years ago
10 years ago
10 years ago
10 years ago
10 years ago
10 years ago
10 years ago
10 years ago
10 years ago
10 years ago
10 years ago
10 years ago
10 years ago
10 years ago
10 years ago
10 years ago
10 years ago
10 years ago
10 years ago
10 years ago
10 years ago
10 years ago
10 years ago
10 years ago
10 years ago
10 years ago
10 years ago
10 years ago
10 years ago
10 years ago
10 years ago
10 years ago
10 years ago
10 years ago
10 years ago
10 years ago
10 years ago
10 years ago
10 years ago
10 years ago
10 years ago
10 years ago
10 years ago
10 years ago
10 years ago
10 years ago
10 years ago
10 years ago
10 years ago
10 years ago
10 years ago
10 years ago
10 years ago
10 years ago
10 years ago
10 years ago
10 years ago
10 years ago
10 years ago
10 years ago
10 years ago
10 years ago
10 years ago
10 years ago
10 years ago
10 years ago
10 years ago
10 years ago
10 years ago
10 years ago
10 years ago

  1. /*
  2. ==============================================================================
  3. This file is part of the JUCE library.
  4. Copyright (c) 2013 - Raw Material Software Ltd.
  5. Permission is granted to use this software under the terms of either:
  6. a) the GPL v2 (or any later version)
  7. b) the Affero GPL v3
  8. Details of these licenses can be found at: www.gnu.org/licenses
  9. JUCE is distributed in the hope that it will be useful, but WITHOUT ANY
  10. WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS FOR
  11. A PARTICULAR PURPOSE. See the GNU General Public License for more details.
  12. ------------------------------------------------------------------------------
  13. To release a closed-source product which uses JUCE, commercial licenses are
  14. available: visit www.juce.com for more information.
  15. ==============================================================================
  16. */
  17. namespace FloatVectorHelpers
  18. {
  19. #define JUCE_INCREMENT_SRC_DEST dest += (16 / sizeof (*dest)); src += (16 / sizeof (*dest));
  20. #define JUCE_INCREMENT_DEST dest += (16 / sizeof (*dest));
  21. #if JUCE_USE_SSE_INTRINSICS
  22. static bool sse2Present = false;
  23. static bool isSSE2Available() noexcept
  24. {
  25. if (sse2Present)
  26. return true;
  27. sse2Present = SystemStats::hasSSE2();
  28. return sse2Present;
  29. }
  30. inline static bool isAligned (const void* p) noexcept
  31. {
  32. return (((pointer_sized_int) p) & 15) == 0;
  33. }
  34. struct BasicOps32
  35. {
  36. typedef float Type;
  37. typedef __m128 ParallelType;
  38. enum { numParallel = 4 };
  39. static forcedinline ParallelType load1 (Type v) noexcept { return _mm_load1_ps (&v); }
  40. static forcedinline ParallelType loadA (const Type* v) noexcept { return _mm_load_ps (v); }
  41. static forcedinline ParallelType loadU (const Type* v) noexcept { return _mm_loadu_ps (v); }
  42. static forcedinline void storeA (Type* dest, ParallelType a) noexcept { _mm_store_ps (dest, a); }
  43. static forcedinline void storeU (Type* dest, ParallelType a) noexcept { _mm_storeu_ps (dest, a); }
  44. static forcedinline ParallelType add (ParallelType a, ParallelType b) noexcept { return _mm_add_ps (a, b); }
  45. static forcedinline ParallelType sub (ParallelType a, ParallelType b) noexcept { return _mm_sub_ps (a, b); }
  46. static forcedinline ParallelType mul (ParallelType a, ParallelType b) noexcept { return _mm_mul_ps (a, b); }
  47. static forcedinline ParallelType max (ParallelType a, ParallelType b) noexcept { return _mm_max_ps (a, b); }
  48. static forcedinline ParallelType min (ParallelType a, ParallelType b) noexcept { return _mm_min_ps (a, b); }
  49. static forcedinline Type max (ParallelType a) noexcept { Type v[numParallel]; storeU (v, a); return jmax (v[0], v[1], v[2], v[3]); }
  50. static forcedinline Type min (ParallelType a) noexcept { Type v[numParallel]; storeU (v, a); return jmin (v[0], v[1], v[2], v[3]); }
  51. };
  52. struct BasicOps64
  53. {
  54. typedef double Type;
  55. typedef __m128d ParallelType;
  56. enum { numParallel = 2 };
  57. static forcedinline ParallelType load1 (Type v) noexcept { return _mm_load1_pd (&v); }
  58. static forcedinline ParallelType loadA (const Type* v) noexcept { return _mm_load_pd (v); }
  59. static forcedinline ParallelType loadU (const Type* v) noexcept { return _mm_loadu_pd (v); }
  60. static forcedinline void storeA (Type* dest, ParallelType a) noexcept { _mm_store_pd (dest, a); }
  61. static forcedinline void storeU (Type* dest, ParallelType a) noexcept { _mm_storeu_pd (dest, a); }
  62. static forcedinline ParallelType add (ParallelType a, ParallelType b) noexcept { return _mm_add_pd (a, b); }
  63. static forcedinline ParallelType sub (ParallelType a, ParallelType b) noexcept { return _mm_sub_pd (a, b); }
  64. static forcedinline ParallelType mul (ParallelType a, ParallelType b) noexcept { return _mm_mul_pd (a, b); }
  65. static forcedinline ParallelType max (ParallelType a, ParallelType b) noexcept { return _mm_max_pd (a, b); }
  66. static forcedinline ParallelType min (ParallelType a, ParallelType b) noexcept { return _mm_min_pd (a, b); }
  67. static forcedinline Type max (ParallelType a) noexcept { Type v[numParallel]; storeU (v, a); return jmax (v[0], v[1]); }
  68. static forcedinline Type min (ParallelType a) noexcept { Type v[numParallel]; storeU (v, a); return jmin (v[0], v[1]); }
  69. };
  70. #define JUCE_BEGIN_VEC_OP \
  71. typedef FloatVectorHelpers::ModeType<sizeof(*dest)>::Mode Mode; \
  72. if (FloatVectorHelpers::isSSE2Available()) \
  73. { \
  74. const int numLongOps = num / Mode::numParallel;
  75. #define JUCE_FINISH_VEC_OP(normalOp) \
  76. num &= (Mode::numParallel - 1); \
  77. if (num == 0) return; \
  78. } \
  79. for (int i = 0; i < num; ++i) normalOp;
  80. #define JUCE_PERFORM_VEC_OP_DEST(normalOp, vecOp, locals, setupOp) \
  81. JUCE_BEGIN_VEC_OP \
  82. setupOp \
  83. if (FloatVectorHelpers::isAligned (dest)) JUCE_VEC_LOOP (vecOp, dummy, Mode::loadA, Mode::storeA, locals, JUCE_INCREMENT_DEST) \
  84. else JUCE_VEC_LOOP (vecOp, dummy, Mode::loadU, Mode::storeU, locals, JUCE_INCREMENT_DEST) \
  85. JUCE_FINISH_VEC_OP (normalOp)
  86. #define JUCE_PERFORM_VEC_OP_SRC_DEST(normalOp, vecOp, locals, increment, setupOp) \
  87. JUCE_BEGIN_VEC_OP \
  88. setupOp \
  89. if (FloatVectorHelpers::isAligned (dest)) \
  90. { \
  91. if (FloatVectorHelpers::isAligned (src)) JUCE_VEC_LOOP (vecOp, Mode::loadA, Mode::loadA, Mode::storeA, locals, increment) \
  92. else JUCE_VEC_LOOP (vecOp, Mode::loadU, Mode::loadA, Mode::storeA, locals, increment) \
  93. }\
  94. else \
  95. { \
  96. if (FloatVectorHelpers::isAligned (src)) JUCE_VEC_LOOP (vecOp, Mode::loadA, Mode::loadU, Mode::storeU, locals, increment) \
  97. else JUCE_VEC_LOOP (vecOp, Mode::loadU, Mode::loadU, Mode::storeU, locals, increment) \
  98. } \
  99. JUCE_FINISH_VEC_OP (normalOp)
  100. //==============================================================================
  101. #elif JUCE_USE_ARM_NEON
  102. struct BasicOps32
  103. {
  104. typedef float Type;
  105. typedef float32x4_t ParallelType;
  106. enum { numParallel = 4 };
  107. static forcedinline ParallelType load1 (Type v) noexcept { return vld1q_dup_f32 (&v); }
  108. static forcedinline ParallelType loadA (const Type* v) noexcept { return vld1q_f32 (v); }
  109. static forcedinline ParallelType loadU (const Type* v) noexcept { return vld1q_f32 (v); }
  110. static forcedinline void storeA (Type* dest, ParallelType a) noexcept { vst1q_f32 (dest, a); }
  111. static forcedinline void storeU (Type* dest, ParallelType a) noexcept { vst1q_f32 (dest, a); }
  112. static forcedinline ParallelType add (ParallelType a, ParallelType b) noexcept { return vaddq_f32 (a, b); }
  113. static forcedinline ParallelType sub (ParallelType a, ParallelType b) noexcept { return vsubq_f32 (a, b); }
  114. static forcedinline ParallelType mul (ParallelType a, ParallelType b) noexcept { return vmulq_f32 (a, b); }
  115. static forcedinline ParallelType max (ParallelType a, ParallelType b) noexcept { return vmaxq_f32 (a, b); }
  116. static forcedinline ParallelType min (ParallelType a, ParallelType b) noexcept { return vminq_f32 (a, b); }
  117. static forcedinline Type max (ParallelType a) noexcept { Type v[numParallel]; storeU (v, a); return jmax (v[0], v[1], v[2], v[3]); }
  118. static forcedinline Type min (ParallelType a) noexcept { Type v[numParallel]; storeU (v, a); return jmin (v[0], v[1], v[2], v[3]); }
  119. };
  120. struct BasicOps64
  121. {
  122. typedef double Type;
  123. typedef double ParallelType;
  124. enum { numParallel = 1 };
  125. static forcedinline ParallelType load1 (Type v) noexcept { return v; }
  126. static forcedinline ParallelType loadA (const Type* v) noexcept { return *v; }
  127. static forcedinline ParallelType loadU (const Type* v) noexcept { return *v; }
  128. static forcedinline void storeA (Type* dest, ParallelType a) noexcept { *dest = a; }
  129. static forcedinline void storeU (Type* dest, ParallelType a) noexcept { *dest = a; }
  130. static forcedinline ParallelType add (ParallelType a, ParallelType b) noexcept { return a + b; }
  131. static forcedinline ParallelType sub (ParallelType a, ParallelType b) noexcept { return a - b; }
  132. static forcedinline ParallelType mul (ParallelType a, ParallelType b) noexcept { return a * b; }
  133. static forcedinline ParallelType max (ParallelType a, ParallelType b) noexcept { return jmax (a, b); }
  134. static forcedinline ParallelType min (ParallelType a, ParallelType b) noexcept { return jmin (a, b); }
  135. static forcedinline Type max (ParallelType a) noexcept { return a; }
  136. static forcedinline Type min (ParallelType a) noexcept { return a; }
  137. };
  138. #define JUCE_BEGIN_VEC_OP \
  139. typedef FloatVectorHelpers::ModeType<sizeof(*dest)>::Mode Mode; \
  140. if (Mode::numParallel > 1) \
  141. { \
  142. const int numLongOps = num / Mode::numParallel;
  143. #define JUCE_FINISH_VEC_OP(normalOp) \
  144. num &= (Mode::numParallel - 1); \
  145. if (num == 0) return; \
  146. } \
  147. for (int i = 0; i < num; ++i) normalOp;
  148. #define JUCE_PERFORM_VEC_OP_DEST(normalOp, vecOp, locals, setupOp) \
  149. JUCE_BEGIN_VEC_OP \
  150. setupOp \
  151. JUCE_VEC_LOOP (vecOp, dummy, Mode::loadU, Mode::storeU, locals, JUCE_INCREMENT_DEST) \
  152. JUCE_FINISH_VEC_OP (normalOp)
  153. #define JUCE_PERFORM_VEC_OP_SRC_DEST(normalOp, vecOp, locals, increment, setupOp) \
  154. JUCE_BEGIN_VEC_OP \
  155. setupOp \
  156. JUCE_VEC_LOOP (vecOp, Mode::loadU, Mode::loadU, Mode::storeU, locals, increment) \
  157. JUCE_FINISH_VEC_OP (normalOp)
  158. //==============================================================================
  159. #else
  160. #define JUCE_PERFORM_VEC_OP_DEST(normalOp, vecOp, locals, setupOp) \
  161. for (int i = 0; i < num; ++i) normalOp;
  162. #define JUCE_PERFORM_VEC_OP_SRC_DEST(normalOp, vecOp, locals, increment, setupOp) \
  163. for (int i = 0; i < num; ++i) normalOp;
  164. #endif
  165. //==============================================================================
  166. #define JUCE_VEC_LOOP(vecOp, srcLoad, dstLoad, dstStore, locals, increment) \
  167. for (int i = 0; i < numLongOps; ++i) \
  168. { \
  169. locals (srcLoad, dstLoad); \
  170. dstStore (dest, vecOp); \
  171. increment; \
  172. }
  173. #define JUCE_LOAD_NONE(srcLoad, dstLoad)
  174. #define JUCE_LOAD_DEST(srcLoad, dstLoad) const Mode::ParallelType d = dstLoad (dest);
  175. #define JUCE_LOAD_SRC(srcLoad, dstLoad) const Mode::ParallelType s = srcLoad (src);
  176. #define JUCE_LOAD_SRC_DEST(srcLoad, dstLoad) const Mode::ParallelType d = dstLoad (dest), s = srcLoad (src);
  177. #if JUCE_USE_SSE_INTRINSICS || JUCE_USE_ARM_NEON
  178. template<int typeSize> struct ModeType { typedef BasicOps32 Mode; };
  179. template<> struct ModeType<8> { typedef BasicOps64 Mode; };
  180. template <typename Mode>
  181. struct MinMax
  182. {
  183. typedef typename Mode::Type Type;
  184. typedef typename Mode::ParallelType ParallelType;
  185. static Type findMinOrMax (const Type* src, int num, const bool isMinimum) noexcept
  186. {
  187. const int numLongOps = num / Mode::numParallel;
  188. #if JUCE_USE_SSE_INTRINSICS
  189. if (numLongOps > 1 && isSSE2Available())
  190. #else
  191. if (numLongOps > 1)
  192. #endif
  193. {
  194. ParallelType val;
  195. #if ! JUCE_USE_ARM_NEON
  196. if (isAligned (src))
  197. {
  198. val = Mode::loadA (src);
  199. if (isMinimum)
  200. {
  201. for (int i = 1; i < numLongOps; ++i)
  202. {
  203. src += Mode::numParallel;
  204. val = Mode::min (val, Mode::loadA (src));
  205. }
  206. }
  207. else
  208. {
  209. for (int i = 1; i < numLongOps; ++i)
  210. {
  211. src += Mode::numParallel;
  212. val = Mode::max (val, Mode::loadA (src));
  213. }
  214. }
  215. }
  216. else
  217. #endif
  218. {
  219. val = Mode::loadU (src);
  220. if (isMinimum)
  221. {
  222. for (int i = 1; i < numLongOps; ++i)
  223. {
  224. src += Mode::numParallel;
  225. val = Mode::min (val, Mode::loadU (src));
  226. }
  227. }
  228. else
  229. {
  230. for (int i = 1; i < numLongOps; ++i)
  231. {
  232. src += Mode::numParallel;
  233. val = Mode::max (val, Mode::loadU (src));
  234. }
  235. }
  236. }
  237. Type result = isMinimum ? Mode::min (val)
  238. : Mode::max (val);
  239. num &= (Mode::numParallel - 1);
  240. for (int i = 0; i < num; ++i)
  241. result = isMinimum ? jmin (result, src[i])
  242. : jmax (result, src[i]);
  243. return result;
  244. }
  245. return isMinimum ? juce::findMinimum (src, num)
  246. : juce::findMaximum (src, num);
  247. }
  248. static Range<Type> findMinAndMax (const Type* src, int num) noexcept
  249. {
  250. const int numLongOps = num / Mode::numParallel;
  251. #if JUCE_USE_SSE_INTRINSICS
  252. if (numLongOps > 1 && isSSE2Available())
  253. #else
  254. if (numLongOps > 1)
  255. #endif
  256. {
  257. ParallelType mn, mx;
  258. #if ! JUCE_USE_ARM_NEON
  259. if (isAligned (src))
  260. {
  261. mn = Mode::loadA (src);
  262. mx = mn;
  263. for (int i = 1; i < numLongOps; ++i)
  264. {
  265. src += Mode::numParallel;
  266. const ParallelType v = Mode::loadA (src);
  267. mn = Mode::min (mn, v);
  268. mx = Mode::max (mx, v);
  269. }
  270. }
  271. else
  272. #endif
  273. {
  274. mn = Mode::loadU (src);
  275. mx = mn;
  276. for (int i = 1; i < numLongOps; ++i)
  277. {
  278. src += Mode::numParallel;
  279. const ParallelType v = Mode::loadU (src);
  280. mn = Mode::min (mn, v);
  281. mx = Mode::max (mx, v);
  282. }
  283. }
  284. Range<Type> result (Mode::min (mn),
  285. Mode::max (mx));
  286. num &= 3;
  287. for (int i = 0; i < num; ++i)
  288. result = result.getUnionWith (src[i]);
  289. return result;
  290. }
  291. return Range<Type>::findMinAndMax (src, num);
  292. }
  293. };
  294. #endif
  295. }
  296. //==============================================================================
  297. void JUCE_CALLTYPE FloatVectorOperations::clear (float* dest, int num) noexcept
  298. {
  299. #if JUCE_USE_VDSP_FRAMEWORK
  300. vDSP_vclr (dest, 1, (size_t) num);
  301. #else
  302. zeromem (dest, num * sizeof (float));
  303. #endif
  304. }
  305. void JUCE_CALLTYPE FloatVectorOperations::clear (double* dest, int num) noexcept
  306. {
  307. #if JUCE_USE_VDSP_FRAMEWORK
  308. vDSP_vclrD (dest, 1, (size_t) num);
  309. #else
  310. zeromem (dest, num * sizeof (double));
  311. #endif
  312. }
  313. void JUCE_CALLTYPE FloatVectorOperations::fill (float* dest, float valueToFill, int num) noexcept
  314. {
  315. #if JUCE_USE_VDSP_FRAMEWORK
  316. vDSP_vfill (&valueToFill, dest, 1, (size_t) num);
  317. #else
  318. JUCE_PERFORM_VEC_OP_DEST (dest[i] = valueToFill, val, JUCE_LOAD_NONE,
  319. const Mode::ParallelType val = Mode::load1 (valueToFill);)
  320. #endif
  321. }
  322. void JUCE_CALLTYPE FloatVectorOperations::fill (double* dest, double valueToFill, int num) noexcept
  323. {
  324. #if JUCE_USE_VDSP_FRAMEWORK
  325. vDSP_vfillD (&valueToFill, dest, 1, (size_t) num);
  326. #else
  327. JUCE_PERFORM_VEC_OP_DEST (dest[i] = valueToFill, val, JUCE_LOAD_NONE,
  328. const Mode::ParallelType val = Mode::load1 (valueToFill);)
  329. #endif
  330. }
  331. void JUCE_CALLTYPE FloatVectorOperations::copy (float* dest, const float* src, int num) noexcept
  332. {
  333. memcpy (dest, src, (size_t) num * sizeof (float));
  334. }
  335. void JUCE_CALLTYPE FloatVectorOperations::copy (double* dest, const double* src, int num) noexcept
  336. {
  337. memcpy (dest, src, (size_t) num * sizeof (double));
  338. }
  339. void JUCE_CALLTYPE FloatVectorOperations::copyWithMultiply (float* dest, const float* src, float multiplier, int num) noexcept
  340. {
  341. #if JUCE_USE_VDSP_FRAMEWORK
  342. vDSP_vsmul (src, 1, &multiplier, dest, 1, num);
  343. #else
  344. JUCE_PERFORM_VEC_OP_SRC_DEST (dest[i] = src[i] * multiplier, Mode::mul (mult, s),
  345. JUCE_LOAD_SRC, JUCE_INCREMENT_SRC_DEST,
  346. const Mode::ParallelType mult = Mode::load1 (multiplier);)
  347. #endif
  348. }
  349. void JUCE_CALLTYPE FloatVectorOperations::copyWithMultiply (double* dest, const double* src, double multiplier, int num) noexcept
  350. {
  351. #if JUCE_USE_VDSP_FRAMEWORK
  352. vDSP_vsmulD (src, 1, &multiplier, dest, 1, num);
  353. #else
  354. JUCE_PERFORM_VEC_OP_SRC_DEST (dest[i] = src[i] * multiplier, Mode::mul (mult, s),
  355. JUCE_LOAD_SRC, JUCE_INCREMENT_SRC_DEST,
  356. const Mode::ParallelType mult = Mode::load1 (multiplier);)
  357. #endif
  358. }
  359. void JUCE_CALLTYPE FloatVectorOperations::add (float* dest, float amount, int num) noexcept
  360. {
  361. JUCE_PERFORM_VEC_OP_DEST (dest[i] += amount, Mode::add (d, amountToAdd), JUCE_LOAD_DEST,
  362. const Mode::ParallelType amountToAdd = Mode::load1 (amount);)
  363. }
  364. void JUCE_CALLTYPE FloatVectorOperations::add (double* dest, double amount, int num) noexcept
  365. {
  366. JUCE_PERFORM_VEC_OP_DEST (dest[i] += amount, Mode::add (d, amountToAdd), JUCE_LOAD_DEST,
  367. const Mode::ParallelType amountToAdd = Mode::load1 (amount);)
  368. }
  369. void JUCE_CALLTYPE FloatVectorOperations::add (float* dest, const float* src, int num) noexcept
  370. {
  371. #if JUCE_USE_VDSP_FRAMEWORK
  372. vDSP_vadd (src, 1, dest, 1, dest, 1, num);
  373. #else
  374. JUCE_PERFORM_VEC_OP_SRC_DEST (dest[i] += src[i], Mode::add (d, s), JUCE_LOAD_SRC_DEST, JUCE_INCREMENT_SRC_DEST, )
  375. #endif
  376. }
  377. void JUCE_CALLTYPE FloatVectorOperations::add (double* dest, const double* src, int num) noexcept
  378. {
  379. #if JUCE_USE_VDSP_FRAMEWORK
  380. vDSP_vaddD (src, 1, dest, 1, dest, 1, num);
  381. #else
  382. JUCE_PERFORM_VEC_OP_SRC_DEST (dest[i] += src[i], Mode::add (d, s), JUCE_LOAD_SRC_DEST, JUCE_INCREMENT_SRC_DEST, )
  383. #endif
  384. }
  385. void JUCE_CALLTYPE FloatVectorOperations::subtract (float* dest, const float* src, int num) noexcept
  386. {
  387. #if JUCE_USE_VDSP_FRAMEWORK
  388. vDSP_vsub (src, 1, dest, 1, dest, 1, num);
  389. #else
  390. JUCE_PERFORM_VEC_OP_SRC_DEST (dest[i] -= src[i], Mode::sub (d, s), JUCE_LOAD_SRC_DEST, JUCE_INCREMENT_SRC_DEST, )
  391. #endif
  392. }
  393. void JUCE_CALLTYPE FloatVectorOperations::subtract (double* dest, const double* src, int num) noexcept
  394. {
  395. #if JUCE_USE_VDSP_FRAMEWORK
  396. vDSP_vsubD (src, 1, dest, 1, dest, 1, num);
  397. #else
  398. JUCE_PERFORM_VEC_OP_SRC_DEST (dest[i] -= src[i], Mode::sub (d, s), JUCE_LOAD_SRC_DEST, JUCE_INCREMENT_SRC_DEST, )
  399. #endif
  400. }
  401. void JUCE_CALLTYPE FloatVectorOperations::addWithMultiply (float* dest, const float* src, float multiplier, int num) noexcept
  402. {
  403. JUCE_PERFORM_VEC_OP_SRC_DEST (dest[i] += src[i] * multiplier, Mode::add (d, Mode::mul (mult, s)),
  404. JUCE_LOAD_SRC_DEST, JUCE_INCREMENT_SRC_DEST,
  405. const Mode::ParallelType mult = Mode::load1 (multiplier);)
  406. }
  407. void JUCE_CALLTYPE FloatVectorOperations::addWithMultiply (double* dest, const double* src, double multiplier, int num) noexcept
  408. {
  409. JUCE_PERFORM_VEC_OP_SRC_DEST (dest[i] += src[i] * multiplier, Mode::add (d, Mode::mul (mult, s)),
  410. JUCE_LOAD_SRC_DEST, JUCE_INCREMENT_SRC_DEST,
  411. const Mode::ParallelType mult = Mode::load1 (multiplier);)
  412. }
  413. void JUCE_CALLTYPE FloatVectorOperations::multiply (float* dest, const float* src, int num) noexcept
  414. {
  415. #if JUCE_USE_VDSP_FRAMEWORK
  416. vDSP_vmul (src, 1, dest, 1, dest, 1, num);
  417. #else
  418. JUCE_PERFORM_VEC_OP_SRC_DEST (dest[i] *= src[i], Mode::mul (d, s), JUCE_LOAD_SRC_DEST, JUCE_INCREMENT_SRC_DEST, )
  419. #endif
  420. }
  421. void JUCE_CALLTYPE FloatVectorOperations::multiply (double* dest, const double* src, int num) noexcept
  422. {
  423. #if JUCE_USE_VDSP_FRAMEWORK
  424. vDSP_vmulD (src, 1, dest, 1, dest, 1, num);
  425. #else
  426. JUCE_PERFORM_VEC_OP_SRC_DEST (dest[i] *= src[i], Mode::mul (d, s), JUCE_LOAD_SRC_DEST, JUCE_INCREMENT_SRC_DEST, )
  427. #endif
  428. }
  429. void JUCE_CALLTYPE FloatVectorOperations::multiply (float* dest, float multiplier, int num) noexcept
  430. {
  431. #if JUCE_USE_VDSP_FRAMEWORK
  432. vDSP_vsmul (dest, 1, &multiplier, dest, 1, num);
  433. #else
  434. JUCE_PERFORM_VEC_OP_DEST (dest[i] *= multiplier, Mode::mul (d, mult), JUCE_LOAD_DEST,
  435. const Mode::ParallelType mult = Mode::load1 (multiplier);)
  436. #endif
  437. }
  438. void JUCE_CALLTYPE FloatVectorOperations::multiply (double* dest, double multiplier, int num) noexcept
  439. {
  440. #if JUCE_USE_VDSP_FRAMEWORK
  441. vDSP_vsmulD (dest, 1, &multiplier, dest, 1, num);
  442. #else
  443. JUCE_PERFORM_VEC_OP_DEST (dest[i] *= multiplier, Mode::mul (d, mult), JUCE_LOAD_DEST,
  444. const Mode::ParallelType mult = Mode::load1 (multiplier);)
  445. #endif
  446. }
  447. void FloatVectorOperations::negate (float* dest, const float* src, int num) noexcept
  448. {
  449. #if JUCE_USE_VDSP_FRAMEWORK
  450. vDSP_vneg ((float*) src, 1, dest, 1, (vDSP_Length) num);
  451. #else
  452. copyWithMultiply (dest, src, -1.0f, num);
  453. #endif
  454. }
  455. void FloatVectorOperations::negate (double* dest, const double* src, int num) noexcept
  456. {
  457. #if JUCE_USE_VDSP_FRAMEWORK
  458. vDSP_vnegD ((double*) src, 1, dest, 1, (vDSP_Length) num);
  459. #else
  460. copyWithMultiply (dest, src, -1.0f, num);
  461. #endif
  462. }
  463. void JUCE_CALLTYPE FloatVectorOperations::convertFixedToFloat (float* dest, const int* src, float multiplier, int num) noexcept
  464. {
  465. #if JUCE_USE_ARM_NEON
  466. JUCE_PERFORM_VEC_OP_SRC_DEST (dest[i] = src[i] * multiplier,
  467. vmulq_n_f32 (vcvtq_f32_s32 (vld1q_s32 (src)), multiplier),
  468. JUCE_LOAD_NONE, JUCE_INCREMENT_SRC_DEST, )
  469. #else
  470. JUCE_PERFORM_VEC_OP_SRC_DEST (dest[i] = src[i] * multiplier,
  471. Mode::mul (mult, _mm_cvtepi32_ps (_mm_loadu_si128 ((const __m128i*) src))),
  472. JUCE_LOAD_NONE, JUCE_INCREMENT_SRC_DEST,
  473. const Mode::ParallelType mult = Mode::load1 (multiplier);)
  474. #endif
  475. }
  476. Range<float> JUCE_CALLTYPE FloatVectorOperations::findMinAndMax (const float* src, int num) noexcept
  477. {
  478. #if JUCE_USE_SSE_INTRINSICS || JUCE_USE_ARM_NEON
  479. return FloatVectorHelpers::MinMax<FloatVectorHelpers::BasicOps32>::findMinAndMax (src, num);
  480. #else
  481. return Range<float>::findMinAndMax (src, num);
  482. #endif
  483. }
  484. Range<double> JUCE_CALLTYPE FloatVectorOperations::findMinAndMax (const double* src, int num) noexcept
  485. {
  486. #if JUCE_USE_SSE_INTRINSICS || JUCE_USE_ARM_NEON
  487. return FloatVectorHelpers::MinMax<FloatVectorHelpers::BasicOps64>::findMinAndMax (src, num);
  488. #else
  489. return Range<double>::findMinAndMax (src, num);
  490. #endif
  491. }
  492. float JUCE_CALLTYPE FloatVectorOperations::findMinimum (const float* src, int num) noexcept
  493. {
  494. #if JUCE_USE_SSE_INTRINSICS || JUCE_USE_ARM_NEON
  495. return FloatVectorHelpers::MinMax<FloatVectorHelpers::BasicOps32>::findMinOrMax (src, num, true);
  496. #else
  497. return juce::findMinimum (src, num);
  498. #endif
  499. }
  500. double JUCE_CALLTYPE FloatVectorOperations::findMinimum (const double* src, int num) noexcept
  501. {
  502. #if JUCE_USE_SSE_INTRINSICS || JUCE_USE_ARM_NEON
  503. return FloatVectorHelpers::MinMax<FloatVectorHelpers::BasicOps64>::findMinOrMax (src, num, true);
  504. #else
  505. return juce::findMinimum (src, num);
  506. #endif
  507. }
  508. float JUCE_CALLTYPE FloatVectorOperations::findMaximum (const float* src, int num) noexcept
  509. {
  510. #if JUCE_USE_SSE_INTRINSICS || JUCE_USE_ARM_NEON
  511. return FloatVectorHelpers::MinMax<FloatVectorHelpers::BasicOps32>::findMinOrMax (src, num, false);
  512. #else
  513. return juce::findMaximum (src, num);
  514. #endif
  515. }
  516. double JUCE_CALLTYPE FloatVectorOperations::findMaximum (const double* src, int num) noexcept
  517. {
  518. #if JUCE_USE_SSE_INTRINSICS || JUCE_USE_ARM_NEON
  519. return FloatVectorHelpers::MinMax<FloatVectorHelpers::BasicOps64>::findMinOrMax (src, num, false);
  520. #else
  521. return juce::findMaximum (src, num);
  522. #endif
  523. }
  524. void JUCE_CALLTYPE FloatVectorOperations::enableFlushToZeroMode (bool shouldEnable) noexcept
  525. {
  526. #if JUCE_USE_SSE_INTRINSICS
  527. if (FloatVectorHelpers::isSSE2Available())
  528. _MM_SET_FLUSH_ZERO_MODE (shouldEnable ? _MM_FLUSH_ZERO_ON : _MM_FLUSH_ZERO_OFF);
  529. #endif
  530. (void) shouldEnable;
  531. }
  532. //==============================================================================
  533. //==============================================================================
  534. #if JUCE_UNIT_TESTS
  535. class FloatVectorOperationsTests : public UnitTest
  536. {
  537. public:
  538. FloatVectorOperationsTests() : UnitTest ("FloatVectorOperations") {}
  539. template <typename ValueType>
  540. struct TestRunner
  541. {
  542. static void runTest (UnitTest& u, Random random)
  543. {
  544. const int range = random.nextBool() ? 500 : 10;
  545. const int num = random.nextInt (range) + 1;
  546. HeapBlock<ValueType> buffer1 (num + 16), buffer2 (num + 16);
  547. HeapBlock<int> buffer3 (num + 16);
  548. #if JUCE_ARM
  549. ValueType* const data1 = buffer1;
  550. ValueType* const data2 = buffer2;
  551. int* const int1 = buffer3;
  552. #else
  553. ValueType* const data1 = addBytesToPointer (buffer1.getData(), random.nextInt (16));
  554. ValueType* const data2 = addBytesToPointer (buffer2.getData(), random.nextInt (16));
  555. int* const int1 = addBytesToPointer (buffer3.getData(), random.nextInt (16));
  556. #endif
  557. fillRandomly (random, data1, num);
  558. fillRandomly (random, data2, num);
  559. Range<ValueType> minMax1 (FloatVectorOperations::findMinAndMax (data1, num));
  560. Range<ValueType> minMax2 (Range<ValueType>::findMinAndMax (data1, num));
  561. u.expect (minMax1 == minMax2);
  562. u.expect (valuesMatch (FloatVectorOperations::findMinimum (data1, num), juce::findMinimum (data1, num)));
  563. u.expect (valuesMatch (FloatVectorOperations::findMaximum (data1, num), juce::findMaximum (data1, num)));
  564. u.expect (valuesMatch (FloatVectorOperations::findMinimum (data2, num), juce::findMinimum (data2, num)));
  565. u.expect (valuesMatch (FloatVectorOperations::findMaximum (data2, num), juce::findMaximum (data2, num)));
  566. FloatVectorOperations::clear (data1, num);
  567. u.expect (areAllValuesEqual (data1, num, 0));
  568. FloatVectorOperations::fill (data1, (ValueType) 2, num);
  569. u.expect (areAllValuesEqual (data1, num, (ValueType) 2));
  570. FloatVectorOperations::add (data1, (ValueType) 2, num);
  571. u.expect (areAllValuesEqual (data1, num, (ValueType) 4));
  572. FloatVectorOperations::copy (data2, data1, num);
  573. u.expect (areAllValuesEqual (data2, num, (ValueType) 4));
  574. FloatVectorOperations::add (data2, data1, num);
  575. u.expect (areAllValuesEqual (data2, num, (ValueType) 8));
  576. FloatVectorOperations::copyWithMultiply (data2, data1, (ValueType) 4, num);
  577. u.expect (areAllValuesEqual (data2, num, (ValueType) 16));
  578. FloatVectorOperations::addWithMultiply (data2, data1, (ValueType) 4, num);
  579. u.expect (areAllValuesEqual (data2, num, (ValueType) 32));
  580. FloatVectorOperations::multiply (data1, (ValueType) 2, num);
  581. u.expect (areAllValuesEqual (data1, num, (ValueType) 8));
  582. FloatVectorOperations::multiply (data1, data2, num);
  583. u.expect (areAllValuesEqual (data1, num, (ValueType) 256));
  584. FloatVectorOperations::negate (data2, data1, num);
  585. u.expect (areAllValuesEqual (data2, num, (ValueType) -256));
  586. FloatVectorOperations::subtract (data1, data2, num);
  587. u.expect (areAllValuesEqual (data1, num, (ValueType) 512));
  588. fillRandomly (random, int1, num);
  589. doConversionTest (u, data1, data2, int1, num);
  590. }
  591. static void doConversionTest (UnitTest& u, float* data1, float* data2, int* const int1, int num)
  592. {
  593. FloatVectorOperations::convertFixedToFloat (data1, int1, 2.0f, num);
  594. convertFixed (data2, int1, 2.0f, num);
  595. u.expect (buffersMatch (data1, data2, num));
  596. }
  597. static void doConversionTest (UnitTest&, double*, double*, int*, int) {}
  598. static void fillRandomly (Random& random, ValueType* d, int num)
  599. {
  600. while (--num >= 0)
  601. *d++ = (ValueType) (random.nextDouble() * 1000.0);
  602. }
  603. static void fillRandomly (Random& random, int* d, int num)
  604. {
  605. while (--num >= 0)
  606. *d++ = random.nextInt();
  607. }
  608. static void convertFixed (float* d, const int* s, ValueType multiplier, int num)
  609. {
  610. while (--num >= 0)
  611. *d++ = *s++ * multiplier;
  612. }
  613. static bool areAllValuesEqual (const ValueType* d, int num, ValueType target)
  614. {
  615. while (--num >= 0)
  616. if (*d++ != target)
  617. return false;
  618. return true;
  619. }
  620. static bool buffersMatch (const ValueType* d1, const ValueType* d2, int num)
  621. {
  622. while (--num >= 0)
  623. if (! valuesMatch (*d1++, *d2++))
  624. return false;
  625. return true;
  626. }
  627. static bool valuesMatch (ValueType v1, ValueType v2)
  628. {
  629. return std::abs (v1 - v2) < std::numeric_limits<ValueType>::epsilon();
  630. }
  631. };
  632. void runTest()
  633. {
  634. beginTest ("FloatVectorOperations");
  635. for (int i = 1000; --i >= 0;)
  636. {
  637. TestRunner<float>::runTest (*this, getRandom());
  638. TestRunner<double>::runTest (*this, getRandom());
  639. }
  640. }
  641. };
  642. static FloatVectorOperationsTests vectorOpTests;
  643. #endif