Audio plugin host https://kx.studio/carla
You can not select more than 25 topics Topics must start with a letter or number, can include dashes ('-') and can be up to 35 characters long.

juce_FloatVectorOperations.cpp 32KB

10 years ago
10 years ago
10 years ago
10 years ago
10 years ago
10 years ago
10 years ago
10 years ago
10 years ago
10 years ago
10 years ago
10 years ago
10 years ago
10 years ago
10 years ago
10 years ago
10 years ago
10 years ago
10 years ago
10 years ago
10 years ago
10 years ago
10 years ago
10 years ago
10 years ago
10 years ago
10 years ago
10 years ago
10 years ago
10 years ago
10 years ago
10 years ago
10 years ago
10 years ago
10 years ago
10 years ago
10 years ago
10 years ago
10 years ago
10 years ago
10 years ago
10 years ago
10 years ago
10 years ago
10 years ago
10 years ago
10 years ago
10 years ago
10 years ago
10 years ago
10 years ago
10 years ago
10 years ago
10 years ago
10 years ago
10 years ago
10 years ago
10 years ago
10 years ago
10 years ago
10 years ago
10 years ago
10 years ago
10 years ago
10 years ago
10 years ago
10 years ago
10 years ago
10 years ago
10 years ago
10 years ago
10 years ago
10 years ago
10 years ago
10 years ago
10 years ago
10 years ago
10 years ago
10 years ago
10 years ago
10 years ago
10 years ago
10 years ago
10 years ago
10 years ago
10 years ago
10 years ago
10 years ago
10 years ago
10 years ago
10 years ago
10 years ago
10 years ago
10 years ago
10 years ago
10 years ago
10 years ago
10 years ago
10 years ago
10 years ago
10 years ago
10 years ago
10 years ago
10 years ago
10 years ago
10 years ago
10 years ago
10 years ago
10 years ago
10 years ago
10 years ago
10 years ago
10 years ago
10 years ago
10 years ago
10 years ago
10 years ago
10 years ago
10 years ago
10 years ago
10 years ago
10 years ago
123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412413414415416417418419420421422423424425426427428429430431432433434435436437438439440441442443444445446447448449450451452453454455456457458459460461462463464465466467468469470471472473474475476477478479480481482483484485486487488489490491492493494495496497498499500501502503504505506507508509510511512513514515516517518519520521522523524525526527528529530531532533534535536537538539540541542543544545546547548549550551552553554555556557558559560561562563564565566567568569570571572573574575576577578579580581582583584585586587588589590591592593594595596597598599600601602603604605606607608609610611612613614615616617618619620621622623624625626627628629630631632633634635636637638639640641642643644645646647648649650651652653654655656657658659660661662663664665666667668669670671672673674675676677678679680681682683684685686687688689690691692693694695696697698699700701702703704705706707708709710711712713714715716717718719720721722723724725726727728729730731732733734735736737738739740741742743744745746747748749750751752753754755756757758759760761762763764765766767768769770771772773774
  1. /*
  2. ==============================================================================
  3. This file is part of the JUCE library.
  4. Copyright (c) 2013 - Raw Material Software Ltd.
  5. Permission is granted to use this software under the terms of either:
  6. a) the GPL v2 (or any later version)
  7. b) the Affero GPL v3
  8. Details of these licenses can be found at: www.gnu.org/licenses
  9. JUCE is distributed in the hope that it will be useful, but WITHOUT ANY
  10. WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS FOR
  11. A PARTICULAR PURPOSE. See the GNU General Public License for more details.
  12. ------------------------------------------------------------------------------
  13. To release a closed-source product which uses JUCE, commercial licenses are
  14. available: visit www.juce.com for more information.
  15. ==============================================================================
  16. */
  17. namespace FloatVectorHelpers
  18. {
  19. #define JUCE_INCREMENT_SRC_DEST dest += (16 / sizeof (*dest)); src += (16 / sizeof (*dest));
  20. #define JUCE_INCREMENT_DEST dest += (16 / sizeof (*dest));
  21. #if JUCE_USE_SSE_INTRINSICS
  22. static bool sse2Present = false;
  23. static bool isSSE2Available() noexcept
  24. {
  25. if (sse2Present)
  26. return true;
  27. sse2Present = SystemStats::hasSSE2();
  28. return sse2Present;
  29. }
  30. inline static bool isAligned (const void* p) noexcept
  31. {
  32. return (((pointer_sized_int) p) & 15) == 0;
  33. }
  34. struct BasicOps32
  35. {
  36. typedef float Type;
  37. typedef __m128 ParallelType;
  38. enum { numParallel = 4 };
  39. static forcedinline ParallelType load1 (Type v) noexcept { return _mm_load1_ps (&v); }
  40. static forcedinline ParallelType loadA (const Type* v) noexcept { return _mm_load_ps (v); }
  41. static forcedinline ParallelType loadU (const Type* v) noexcept { return _mm_loadu_ps (v); }
  42. static forcedinline void storeA (Type* dest, ParallelType a) noexcept { _mm_store_ps (dest, a); }
  43. static forcedinline void storeU (Type* dest, ParallelType a) noexcept { _mm_storeu_ps (dest, a); }
  44. static forcedinline ParallelType add (ParallelType a, ParallelType b) noexcept { return _mm_add_ps (a, b); }
  45. static forcedinline ParallelType sub (ParallelType a, ParallelType b) noexcept { return _mm_sub_ps (a, b); }
  46. static forcedinline ParallelType mul (ParallelType a, ParallelType b) noexcept { return _mm_mul_ps (a, b); }
  47. static forcedinline ParallelType max (ParallelType a, ParallelType b) noexcept { return _mm_max_ps (a, b); }
  48. static forcedinline ParallelType min (ParallelType a, ParallelType b) noexcept { return _mm_min_ps (a, b); }
  49. static forcedinline Type max (ParallelType a) noexcept { Type v[numParallel]; storeU (v, a); return jmax (v[0], v[1], v[2], v[3]); }
  50. static forcedinline Type min (ParallelType a) noexcept { Type v[numParallel]; storeU (v, a); return jmin (v[0], v[1], v[2], v[3]); }
  51. };
  52. struct BasicOps64
  53. {
  54. typedef double Type;
  55. typedef __m128d ParallelType;
  56. enum { numParallel = 2 };
  57. static forcedinline ParallelType load1 (Type v) noexcept { return _mm_load1_pd (&v); }
  58. static forcedinline ParallelType loadA (const Type* v) noexcept { return _mm_load_pd (v); }
  59. static forcedinline ParallelType loadU (const Type* v) noexcept { return _mm_loadu_pd (v); }
  60. static forcedinline void storeA (Type* dest, ParallelType a) noexcept { _mm_store_pd (dest, a); }
  61. static forcedinline void storeU (Type* dest, ParallelType a) noexcept { _mm_storeu_pd (dest, a); }
  62. static forcedinline ParallelType add (ParallelType a, ParallelType b) noexcept { return _mm_add_pd (a, b); }
  63. static forcedinline ParallelType sub (ParallelType a, ParallelType b) noexcept { return _mm_sub_pd (a, b); }
  64. static forcedinline ParallelType mul (ParallelType a, ParallelType b) noexcept { return _mm_mul_pd (a, b); }
  65. static forcedinline ParallelType max (ParallelType a, ParallelType b) noexcept { return _mm_max_pd (a, b); }
  66. static forcedinline ParallelType min (ParallelType a, ParallelType b) noexcept { return _mm_min_pd (a, b); }
  67. static forcedinline Type max (ParallelType a) noexcept { Type v[numParallel]; storeU (v, a); return jmax (v[0], v[1]); }
  68. static forcedinline Type min (ParallelType a) noexcept { Type v[numParallel]; storeU (v, a); return jmin (v[0], v[1]); }
  69. };
  70. #define JUCE_BEGIN_VEC_OP \
  71. typedef FloatVectorHelpers::ModeType<sizeof(*dest)>::Mode Mode; \
  72. if (FloatVectorHelpers::isSSE2Available()) \
  73. { \
  74. const int numLongOps = num / Mode::numParallel;
  75. #define JUCE_FINISH_VEC_OP(normalOp) \
  76. num &= (Mode::numParallel - 1); \
  77. if (num == 0) return; \
  78. } \
  79. for (int i = 0; i < num; ++i) normalOp;
  80. #define JUCE_PERFORM_VEC_OP_DEST(normalOp, vecOp, locals, setupOp) \
  81. JUCE_BEGIN_VEC_OP \
  82. setupOp \
  83. if (FloatVectorHelpers::isAligned (dest)) JUCE_VEC_LOOP (vecOp, dummy, Mode::loadA, Mode::storeA, locals, JUCE_INCREMENT_DEST) \
  84. else JUCE_VEC_LOOP (vecOp, dummy, Mode::loadU, Mode::storeU, locals, JUCE_INCREMENT_DEST) \
  85. JUCE_FINISH_VEC_OP (normalOp)
  86. #define JUCE_PERFORM_VEC_OP_SRC_DEST(normalOp, vecOp, locals, increment, setupOp) \
  87. JUCE_BEGIN_VEC_OP \
  88. setupOp \
  89. if (FloatVectorHelpers::isAligned (dest)) \
  90. { \
  91. if (FloatVectorHelpers::isAligned (src)) JUCE_VEC_LOOP (vecOp, Mode::loadA, Mode::loadA, Mode::storeA, locals, increment) \
  92. else JUCE_VEC_LOOP (vecOp, Mode::loadU, Mode::loadA, Mode::storeA, locals, increment) \
  93. }\
  94. else \
  95. { \
  96. if (FloatVectorHelpers::isAligned (src)) JUCE_VEC_LOOP (vecOp, Mode::loadA, Mode::loadU, Mode::storeU, locals, increment) \
  97. else JUCE_VEC_LOOP (vecOp, Mode::loadU, Mode::loadU, Mode::storeU, locals, increment) \
  98. } \
  99. JUCE_FINISH_VEC_OP (normalOp)
  100. //==============================================================================
  101. #elif JUCE_USE_ARM_NEON
  102. struct BasicOps32
  103. {
  104. typedef float Type;
  105. typedef float32x4_t ParallelType;
  106. enum { numParallel = 4 };
  107. static forcedinline ParallelType load1 (Type v) noexcept { return vld1q_dup_f32 (&v); }
  108. static forcedinline ParallelType loadA (const Type* v) noexcept { return vld1q_f32 (v); }
  109. static forcedinline ParallelType loadU (const Type* v) noexcept { return vld1q_f32 (v); }
  110. static forcedinline void storeA (Type* dest, ParallelType a) noexcept { vst1q_f32 (dest, a); }
  111. static forcedinline void storeU (Type* dest, ParallelType a) noexcept { vst1q_f32 (dest, a); }
  112. static forcedinline ParallelType add (ParallelType a, ParallelType b) noexcept { return vaddq_f32 (a, b); }
  113. static forcedinline ParallelType sub (ParallelType a, ParallelType b) noexcept { return vsubq_f32 (a, b); }
  114. static forcedinline ParallelType mul (ParallelType a, ParallelType b) noexcept { return vmulq_f32 (a, b); }
  115. static forcedinline ParallelType max (ParallelType a, ParallelType b) noexcept { return vmaxq_f32 (a, b); }
  116. static forcedinline ParallelType min (ParallelType a, ParallelType b) noexcept { return vminq_f32 (a, b); }
  117. static forcedinline Type max (ParallelType a) noexcept { Type v[numParallel]; storeU (v, a); return jmax (v[0], v[1], v[2], v[3]); }
  118. static forcedinline Type min (ParallelType a) noexcept { Type v[numParallel]; storeU (v, a); return jmin (v[0], v[1], v[2], v[3]); }
  119. };
  120. struct BasicOps64
  121. {
  122. typedef double Type;
  123. typedef double ParallelType;
  124. enum { numParallel = 1 };
  125. static forcedinline ParallelType load1 (Type v) noexcept { return v; }
  126. static forcedinline ParallelType loadA (const Type* v) noexcept { return *v; }
  127. static forcedinline ParallelType loadU (const Type* v) noexcept { return *v; }
  128. static forcedinline void storeA (Type* dest, ParallelType a) noexcept { *dest = a; }
  129. static forcedinline void storeU (Type* dest, ParallelType a) noexcept { *dest = a; }
  130. static forcedinline ParallelType add (ParallelType a, ParallelType b) noexcept { return a + b; }
  131. static forcedinline ParallelType sub (ParallelType a, ParallelType b) noexcept { return a - b; }
  132. static forcedinline ParallelType mul (ParallelType a, ParallelType b) noexcept { return a * b; }
  133. static forcedinline ParallelType max (ParallelType a, ParallelType b) noexcept { return jmax (a, b); }
  134. static forcedinline ParallelType min (ParallelType a, ParallelType b) noexcept { return jmin (a, b); }
  135. static forcedinline Type max (ParallelType a) noexcept { return a; }
  136. static forcedinline Type min (ParallelType a) noexcept { return a; }
  137. };
  138. #define JUCE_BEGIN_VEC_OP \
  139. typedef FloatVectorHelpers::ModeType<sizeof(*dest)>::Mode Mode; \
  140. if (Mode::numParallel > 1) \
  141. { \
  142. const int numLongOps = num / Mode::numParallel;
  143. #define JUCE_FINISH_VEC_OP(normalOp) \
  144. num &= (Mode::numParallel - 1); \
  145. if (num == 0) return; \
  146. } \
  147. for (int i = 0; i < num; ++i) normalOp;
  148. #define JUCE_PERFORM_VEC_OP_DEST(normalOp, vecOp, locals, setupOp) \
  149. JUCE_BEGIN_VEC_OP \
  150. setupOp \
  151. JUCE_VEC_LOOP (vecOp, dummy, Mode::loadU, Mode::storeU, locals, JUCE_INCREMENT_DEST) \
  152. JUCE_FINISH_VEC_OP (normalOp)
  153. #define JUCE_PERFORM_VEC_OP_SRC_DEST(normalOp, vecOp, locals, increment, setupOp) \
  154. JUCE_BEGIN_VEC_OP \
  155. setupOp \
  156. JUCE_VEC_LOOP (vecOp, Mode::loadU, Mode::loadU, Mode::storeU, locals, increment) \
  157. JUCE_FINISH_VEC_OP (normalOp)
  158. //==============================================================================
  159. #else
  160. #define JUCE_PERFORM_VEC_OP_DEST(normalOp, vecOp, locals, setupOp) \
  161. for (int i = 0; i < num; ++i) normalOp;
  162. #define JUCE_PERFORM_VEC_OP_SRC_DEST(normalOp, vecOp, locals, increment, setupOp) \
  163. for (int i = 0; i < num; ++i) normalOp;
  164. #endif
  165. //==============================================================================
  166. #define JUCE_VEC_LOOP(vecOp, srcLoad, dstLoad, dstStore, locals, increment) \
  167. for (int i = 0; i < numLongOps; ++i) \
  168. { \
  169. locals (srcLoad, dstLoad); \
  170. dstStore (dest, vecOp); \
  171. increment; \
  172. }
  173. #define JUCE_LOAD_NONE(srcLoad, dstLoad)
  174. #define JUCE_LOAD_DEST(srcLoad, dstLoad) const Mode::ParallelType d = dstLoad (dest);
  175. #define JUCE_LOAD_SRC(srcLoad, dstLoad) const Mode::ParallelType s = srcLoad (src);
  176. #define JUCE_LOAD_SRC_DEST(srcLoad, dstLoad) const Mode::ParallelType d = dstLoad (dest), s = srcLoad (src);
  177. #if JUCE_USE_SSE_INTRINSICS || JUCE_USE_ARM_NEON
  178. template<int typeSize> struct ModeType { typedef BasicOps32 Mode; };
  179. template<> struct ModeType<8> { typedef BasicOps64 Mode; };
  180. template <typename Mode>
  181. struct MinMax
  182. {
  183. typedef typename Mode::Type Type;
  184. typedef typename Mode::ParallelType ParallelType;
  185. static Type findMinOrMax (const Type* src, int num, const bool isMinimum) noexcept
  186. {
  187. int numLongOps = num / Mode::numParallel;
  188. #if JUCE_USE_SSE_INTRINSICS
  189. if (numLongOps > 1 && isSSE2Available())
  190. #else
  191. if (numLongOps > 1)
  192. #endif
  193. {
  194. ParallelType val;
  195. #if ! JUCE_USE_ARM_NEON
  196. if (isAligned (src))
  197. {
  198. val = Mode::loadA (src);
  199. if (isMinimum)
  200. {
  201. while (--numLongOps > 0)
  202. {
  203. src += Mode::numParallel;
  204. val = Mode::min (val, Mode::loadA (src));
  205. }
  206. }
  207. else
  208. {
  209. while (--numLongOps > 0)
  210. {
  211. src += Mode::numParallel;
  212. val = Mode::max (val, Mode::loadA (src));
  213. }
  214. }
  215. }
  216. else
  217. #endif
  218. {
  219. val = Mode::loadU (src);
  220. if (isMinimum)
  221. {
  222. while (--numLongOps > 0)
  223. {
  224. src += Mode::numParallel;
  225. val = Mode::min (val, Mode::loadU (src));
  226. }
  227. }
  228. else
  229. {
  230. while (--numLongOps > 0)
  231. {
  232. src += Mode::numParallel;
  233. val = Mode::max (val, Mode::loadU (src));
  234. }
  235. }
  236. }
  237. Type result = isMinimum ? Mode::min (val)
  238. : Mode::max (val);
  239. num &= (Mode::numParallel - 1);
  240. src += Mode::numParallel;
  241. for (int i = 0; i < num; ++i)
  242. result = isMinimum ? jmin (result, src[i])
  243. : jmax (result, src[i]);
  244. return result;
  245. }
  246. return isMinimum ? juce::findMinimum (src, num)
  247. : juce::findMaximum (src, num);
  248. }
  249. static Range<Type> findMinAndMax (const Type* src, int num) noexcept
  250. {
  251. int numLongOps = num / Mode::numParallel;
  252. #if JUCE_USE_SSE_INTRINSICS
  253. if (numLongOps > 1 && isSSE2Available())
  254. #else
  255. if (numLongOps > 1)
  256. #endif
  257. {
  258. ParallelType mn, mx;
  259. #if ! JUCE_USE_ARM_NEON
  260. if (isAligned (src))
  261. {
  262. mn = Mode::loadA (src);
  263. mx = mn;
  264. while (--numLongOps > 0)
  265. {
  266. src += Mode::numParallel;
  267. const ParallelType v = Mode::loadA (src);
  268. mn = Mode::min (mn, v);
  269. mx = Mode::max (mx, v);
  270. }
  271. }
  272. else
  273. #endif
  274. {
  275. mn = Mode::loadU (src);
  276. mx = mn;
  277. while (--numLongOps > 0)
  278. {
  279. src += Mode::numParallel;
  280. const ParallelType v = Mode::loadU (src);
  281. mn = Mode::min (mn, v);
  282. mx = Mode::max (mx, v);
  283. }
  284. }
  285. Range<Type> result (Mode::min (mn),
  286. Mode::max (mx));
  287. num &= (Mode::numParallel - 1);
  288. src += Mode::numParallel;
  289. for (int i = 0; i < num; ++i)
  290. result = result.getUnionWith (src[i]);
  291. return result;
  292. }
  293. return Range<Type>::findMinAndMax (src, num);
  294. }
  295. };
  296. #endif
  297. }
  298. //==============================================================================
  299. void JUCE_CALLTYPE FloatVectorOperations::clear (float* dest, int num) noexcept
  300. {
  301. #if JUCE_USE_VDSP_FRAMEWORK
  302. vDSP_vclr (dest, 1, (size_t) num);
  303. #else
  304. zeromem (dest, num * sizeof (float));
  305. #endif
  306. }
  307. void JUCE_CALLTYPE FloatVectorOperations::clear (double* dest, int num) noexcept
  308. {
  309. #if JUCE_USE_VDSP_FRAMEWORK
  310. vDSP_vclrD (dest, 1, (size_t) num);
  311. #else
  312. zeromem (dest, num * sizeof (double));
  313. #endif
  314. }
  315. void JUCE_CALLTYPE FloatVectorOperations::fill (float* dest, float valueToFill, int num) noexcept
  316. {
  317. #if JUCE_USE_VDSP_FRAMEWORK
  318. vDSP_vfill (&valueToFill, dest, 1, (size_t) num);
  319. #else
  320. JUCE_PERFORM_VEC_OP_DEST (dest[i] = valueToFill, val, JUCE_LOAD_NONE,
  321. const Mode::ParallelType val = Mode::load1 (valueToFill);)
  322. #endif
  323. }
  324. void JUCE_CALLTYPE FloatVectorOperations::fill (double* dest, double valueToFill, int num) noexcept
  325. {
  326. #if JUCE_USE_VDSP_FRAMEWORK
  327. vDSP_vfillD (&valueToFill, dest, 1, (size_t) num);
  328. #else
  329. JUCE_PERFORM_VEC_OP_DEST (dest[i] = valueToFill, val, JUCE_LOAD_NONE,
  330. const Mode::ParallelType val = Mode::load1 (valueToFill);)
  331. #endif
  332. }
  333. void JUCE_CALLTYPE FloatVectorOperations::copy (float* dest, const float* src, int num) noexcept
  334. {
  335. memcpy (dest, src, (size_t) num * sizeof (float));
  336. }
  337. void JUCE_CALLTYPE FloatVectorOperations::copy (double* dest, const double* src, int num) noexcept
  338. {
  339. memcpy (dest, src, (size_t) num * sizeof (double));
  340. }
  341. void JUCE_CALLTYPE FloatVectorOperations::copyWithMultiply (float* dest, const float* src, float multiplier, int num) noexcept
  342. {
  343. #if JUCE_USE_VDSP_FRAMEWORK
  344. vDSP_vsmul (src, 1, &multiplier, dest, 1, (vDSP_Length) num);
  345. #else
  346. JUCE_PERFORM_VEC_OP_SRC_DEST (dest[i] = src[i] * multiplier, Mode::mul (mult, s),
  347. JUCE_LOAD_SRC, JUCE_INCREMENT_SRC_DEST,
  348. const Mode::ParallelType mult = Mode::load1 (multiplier);)
  349. #endif
  350. }
  351. void JUCE_CALLTYPE FloatVectorOperations::copyWithMultiply (double* dest, const double* src, double multiplier, int num) noexcept
  352. {
  353. #if JUCE_USE_VDSP_FRAMEWORK
  354. vDSP_vsmulD (src, 1, &multiplier, dest, 1, (vDSP_Length) num);
  355. #else
  356. JUCE_PERFORM_VEC_OP_SRC_DEST (dest[i] = src[i] * multiplier, Mode::mul (mult, s),
  357. JUCE_LOAD_SRC, JUCE_INCREMENT_SRC_DEST,
  358. const Mode::ParallelType mult = Mode::load1 (multiplier);)
  359. #endif
  360. }
  361. void JUCE_CALLTYPE FloatVectorOperations::add (float* dest, float amount, int num) noexcept
  362. {
  363. JUCE_PERFORM_VEC_OP_DEST (dest[i] += amount, Mode::add (d, amountToAdd), JUCE_LOAD_DEST,
  364. const Mode::ParallelType amountToAdd = Mode::load1 (amount);)
  365. }
  366. void JUCE_CALLTYPE FloatVectorOperations::add (double* dest, double amount, int num) noexcept
  367. {
  368. JUCE_PERFORM_VEC_OP_DEST (dest[i] += amount, Mode::add (d, amountToAdd), JUCE_LOAD_DEST,
  369. const Mode::ParallelType amountToAdd = Mode::load1 (amount);)
  370. }
  371. void JUCE_CALLTYPE FloatVectorOperations::add (float* dest, const float* src, int num) noexcept
  372. {
  373. #if JUCE_USE_VDSP_FRAMEWORK
  374. vDSP_vadd (src, 1, dest, 1, dest, 1, (vDSP_Length) num);
  375. #else
  376. JUCE_PERFORM_VEC_OP_SRC_DEST (dest[i] += src[i], Mode::add (d, s), JUCE_LOAD_SRC_DEST, JUCE_INCREMENT_SRC_DEST, )
  377. #endif
  378. }
  379. void JUCE_CALLTYPE FloatVectorOperations::add (double* dest, const double* src, int num) noexcept
  380. {
  381. #if JUCE_USE_VDSP_FRAMEWORK
  382. vDSP_vaddD (src, 1, dest, 1, dest, 1, (vDSP_Length) num);
  383. #else
  384. JUCE_PERFORM_VEC_OP_SRC_DEST (dest[i] += src[i], Mode::add (d, s), JUCE_LOAD_SRC_DEST, JUCE_INCREMENT_SRC_DEST, )
  385. #endif
  386. }
  387. void JUCE_CALLTYPE FloatVectorOperations::subtract (float* dest, const float* src, int num) noexcept
  388. {
  389. #if JUCE_USE_VDSP_FRAMEWORK
  390. vDSP_vsub (src, 1, dest, 1, dest, 1, (vDSP_Length) num);
  391. #else
  392. JUCE_PERFORM_VEC_OP_SRC_DEST (dest[i] -= src[i], Mode::sub (d, s), JUCE_LOAD_SRC_DEST, JUCE_INCREMENT_SRC_DEST, )
  393. #endif
  394. }
  395. void JUCE_CALLTYPE FloatVectorOperations::subtract (double* dest, const double* src, int num) noexcept
  396. {
  397. #if JUCE_USE_VDSP_FRAMEWORK
  398. vDSP_vsubD (src, 1, dest, 1, dest, 1, (vDSP_Length) num);
  399. #else
  400. JUCE_PERFORM_VEC_OP_SRC_DEST (dest[i] -= src[i], Mode::sub (d, s), JUCE_LOAD_SRC_DEST, JUCE_INCREMENT_SRC_DEST, )
  401. #endif
  402. }
  403. void JUCE_CALLTYPE FloatVectorOperations::addWithMultiply (float* dest, const float* src, float multiplier, int num) noexcept
  404. {
  405. JUCE_PERFORM_VEC_OP_SRC_DEST (dest[i] += src[i] * multiplier, Mode::add (d, Mode::mul (mult, s)),
  406. JUCE_LOAD_SRC_DEST, JUCE_INCREMENT_SRC_DEST,
  407. const Mode::ParallelType mult = Mode::load1 (multiplier);)
  408. }
  409. void JUCE_CALLTYPE FloatVectorOperations::addWithMultiply (double* dest, const double* src, double multiplier, int num) noexcept
  410. {
  411. JUCE_PERFORM_VEC_OP_SRC_DEST (dest[i] += src[i] * multiplier, Mode::add (d, Mode::mul (mult, s)),
  412. JUCE_LOAD_SRC_DEST, JUCE_INCREMENT_SRC_DEST,
  413. const Mode::ParallelType mult = Mode::load1 (multiplier);)
  414. }
  415. void JUCE_CALLTYPE FloatVectorOperations::multiply (float* dest, const float* src, int num) noexcept
  416. {
  417. #if JUCE_USE_VDSP_FRAMEWORK
  418. vDSP_vmul (src, 1, dest, 1, dest, 1, (vDSP_Length) num);
  419. #else
  420. JUCE_PERFORM_VEC_OP_SRC_DEST (dest[i] *= src[i], Mode::mul (d, s), JUCE_LOAD_SRC_DEST, JUCE_INCREMENT_SRC_DEST, )
  421. #endif
  422. }
  423. void JUCE_CALLTYPE FloatVectorOperations::multiply (double* dest, const double* src, int num) noexcept
  424. {
  425. #if JUCE_USE_VDSP_FRAMEWORK
  426. vDSP_vmulD (src, 1, dest, 1, dest, 1, (vDSP_Length) num);
  427. #else
  428. JUCE_PERFORM_VEC_OP_SRC_DEST (dest[i] *= src[i], Mode::mul (d, s), JUCE_LOAD_SRC_DEST, JUCE_INCREMENT_SRC_DEST, )
  429. #endif
  430. }
  431. void JUCE_CALLTYPE FloatVectorOperations::multiply (float* dest, float multiplier, int num) noexcept
  432. {
  433. #if JUCE_USE_VDSP_FRAMEWORK
  434. vDSP_vsmul (dest, 1, &multiplier, dest, 1, (vDSP_Length) num);
  435. #else
  436. JUCE_PERFORM_VEC_OP_DEST (dest[i] *= multiplier, Mode::mul (d, mult), JUCE_LOAD_DEST,
  437. const Mode::ParallelType mult = Mode::load1 (multiplier);)
  438. #endif
  439. }
  440. void JUCE_CALLTYPE FloatVectorOperations::multiply (double* dest, double multiplier, int num) noexcept
  441. {
  442. #if JUCE_USE_VDSP_FRAMEWORK
  443. vDSP_vsmulD (dest, 1, &multiplier, dest, 1, (vDSP_Length) num);
  444. #else
  445. JUCE_PERFORM_VEC_OP_DEST (dest[i] *= multiplier, Mode::mul (d, mult), JUCE_LOAD_DEST,
  446. const Mode::ParallelType mult = Mode::load1 (multiplier);)
  447. #endif
  448. }
  449. void FloatVectorOperations::negate (float* dest, const float* src, int num) noexcept
  450. {
  451. #if JUCE_USE_VDSP_FRAMEWORK
  452. vDSP_vneg ((float*) src, 1, dest, 1, (vDSP_Length) num);
  453. #else
  454. copyWithMultiply (dest, src, -1.0f, num);
  455. #endif
  456. }
  457. void FloatVectorOperations::negate (double* dest, const double* src, int num) noexcept
  458. {
  459. #if JUCE_USE_VDSP_FRAMEWORK
  460. vDSP_vnegD ((double*) src, 1, dest, 1, (vDSP_Length) num);
  461. #else
  462. copyWithMultiply (dest, src, -1.0f, num);
  463. #endif
  464. }
  465. void JUCE_CALLTYPE FloatVectorOperations::convertFixedToFloat (float* dest, const int* src, float multiplier, int num) noexcept
  466. {
  467. #if JUCE_USE_ARM_NEON
  468. JUCE_PERFORM_VEC_OP_SRC_DEST (dest[i] = src[i] * multiplier,
  469. vmulq_n_f32 (vcvtq_f32_s32 (vld1q_s32 (src)), multiplier),
  470. JUCE_LOAD_NONE, JUCE_INCREMENT_SRC_DEST, )
  471. #else
  472. JUCE_PERFORM_VEC_OP_SRC_DEST (dest[i] = src[i] * multiplier,
  473. Mode::mul (mult, _mm_cvtepi32_ps (_mm_loadu_si128 ((const __m128i*) src))),
  474. JUCE_LOAD_NONE, JUCE_INCREMENT_SRC_DEST,
  475. const Mode::ParallelType mult = Mode::load1 (multiplier);)
  476. #endif
  477. }
  478. Range<float> JUCE_CALLTYPE FloatVectorOperations::findMinAndMax (const float* src, int num) noexcept
  479. {
  480. #if JUCE_USE_SSE_INTRINSICS || JUCE_USE_ARM_NEON
  481. return FloatVectorHelpers::MinMax<FloatVectorHelpers::BasicOps32>::findMinAndMax (src, num);
  482. #else
  483. return Range<float>::findMinAndMax (src, num);
  484. #endif
  485. }
  486. Range<double> JUCE_CALLTYPE FloatVectorOperations::findMinAndMax (const double* src, int num) noexcept
  487. {
  488. #if JUCE_USE_SSE_INTRINSICS || JUCE_USE_ARM_NEON
  489. return FloatVectorHelpers::MinMax<FloatVectorHelpers::BasicOps64>::findMinAndMax (src, num);
  490. #else
  491. return Range<double>::findMinAndMax (src, num);
  492. #endif
  493. }
  494. float JUCE_CALLTYPE FloatVectorOperations::findMinimum (const float* src, int num) noexcept
  495. {
  496. #if JUCE_USE_SSE_INTRINSICS || JUCE_USE_ARM_NEON
  497. return FloatVectorHelpers::MinMax<FloatVectorHelpers::BasicOps32>::findMinOrMax (src, num, true);
  498. #else
  499. return juce::findMinimum (src, num);
  500. #endif
  501. }
  502. double JUCE_CALLTYPE FloatVectorOperations::findMinimum (const double* src, int num) noexcept
  503. {
  504. #if JUCE_USE_SSE_INTRINSICS || JUCE_USE_ARM_NEON
  505. return FloatVectorHelpers::MinMax<FloatVectorHelpers::BasicOps64>::findMinOrMax (src, num, true);
  506. #else
  507. return juce::findMinimum (src, num);
  508. #endif
  509. }
  510. float JUCE_CALLTYPE FloatVectorOperations::findMaximum (const float* src, int num) noexcept
  511. {
  512. #if JUCE_USE_SSE_INTRINSICS || JUCE_USE_ARM_NEON
  513. return FloatVectorHelpers::MinMax<FloatVectorHelpers::BasicOps32>::findMinOrMax (src, num, false);
  514. #else
  515. return juce::findMaximum (src, num);
  516. #endif
  517. }
  518. double JUCE_CALLTYPE FloatVectorOperations::findMaximum (const double* src, int num) noexcept
  519. {
  520. #if JUCE_USE_SSE_INTRINSICS || JUCE_USE_ARM_NEON
  521. return FloatVectorHelpers::MinMax<FloatVectorHelpers::BasicOps64>::findMinOrMax (src, num, false);
  522. #else
  523. return juce::findMaximum (src, num);
  524. #endif
  525. }
  526. void JUCE_CALLTYPE FloatVectorOperations::enableFlushToZeroMode (bool shouldEnable) noexcept
  527. {
  528. #if JUCE_USE_SSE_INTRINSICS
  529. if (FloatVectorHelpers::isSSE2Available())
  530. _MM_SET_FLUSH_ZERO_MODE (shouldEnable ? _MM_FLUSH_ZERO_ON : _MM_FLUSH_ZERO_OFF);
  531. #endif
  532. (void) shouldEnable;
  533. }
  534. //==============================================================================
  535. //==============================================================================
  536. #if JUCE_UNIT_TESTS
  537. class FloatVectorOperationsTests : public UnitTest
  538. {
  539. public:
  540. FloatVectorOperationsTests() : UnitTest ("FloatVectorOperations") {}
  541. template <typename ValueType>
  542. struct TestRunner
  543. {
  544. static void runTest (UnitTest& u, Random random)
  545. {
  546. const int range = random.nextBool() ? 500 : 10;
  547. const int num = random.nextInt (range) + 1;
  548. HeapBlock<ValueType> buffer1 ((size_t) num + 16), buffer2 ((size_t) num + 16);
  549. HeapBlock<int> buffer3 ((size_t) num + 16);
  550. #if JUCE_ARM
  551. ValueType* const data1 = buffer1;
  552. ValueType* const data2 = buffer2;
  553. int* const int1 = buffer3;
  554. #else
  555. ValueType* const data1 = addBytesToPointer (buffer1.getData(), random.nextInt (16));
  556. ValueType* const data2 = addBytesToPointer (buffer2.getData(), random.nextInt (16));
  557. int* const int1 = addBytesToPointer (buffer3.getData(), random.nextInt (16));
  558. #endif
  559. fillRandomly (random, data1, num);
  560. fillRandomly (random, data2, num);
  561. Range<ValueType> minMax1 (FloatVectorOperations::findMinAndMax (data1, num));
  562. Range<ValueType> minMax2 (Range<ValueType>::findMinAndMax (data1, num));
  563. u.expect (minMax1 == minMax2);
  564. u.expect (valuesMatch (FloatVectorOperations::findMinimum (data1, num), juce::findMinimum (data1, num)));
  565. u.expect (valuesMatch (FloatVectorOperations::findMaximum (data1, num), juce::findMaximum (data1, num)));
  566. u.expect (valuesMatch (FloatVectorOperations::findMinimum (data2, num), juce::findMinimum (data2, num)));
  567. u.expect (valuesMatch (FloatVectorOperations::findMaximum (data2, num), juce::findMaximum (data2, num)));
  568. FloatVectorOperations::clear (data1, num);
  569. u.expect (areAllValuesEqual (data1, num, 0));
  570. FloatVectorOperations::fill (data1, (ValueType) 2, num);
  571. u.expect (areAllValuesEqual (data1, num, (ValueType) 2));
  572. FloatVectorOperations::add (data1, (ValueType) 2, num);
  573. u.expect (areAllValuesEqual (data1, num, (ValueType) 4));
  574. FloatVectorOperations::copy (data2, data1, num);
  575. u.expect (areAllValuesEqual (data2, num, (ValueType) 4));
  576. FloatVectorOperations::add (data2, data1, num);
  577. u.expect (areAllValuesEqual (data2, num, (ValueType) 8));
  578. FloatVectorOperations::copyWithMultiply (data2, data1, (ValueType) 4, num);
  579. u.expect (areAllValuesEqual (data2, num, (ValueType) 16));
  580. FloatVectorOperations::addWithMultiply (data2, data1, (ValueType) 4, num);
  581. u.expect (areAllValuesEqual (data2, num, (ValueType) 32));
  582. FloatVectorOperations::multiply (data1, (ValueType) 2, num);
  583. u.expect (areAllValuesEqual (data1, num, (ValueType) 8));
  584. FloatVectorOperations::multiply (data1, data2, num);
  585. u.expect (areAllValuesEqual (data1, num, (ValueType) 256));
  586. FloatVectorOperations::negate (data2, data1, num);
  587. u.expect (areAllValuesEqual (data2, num, (ValueType) -256));
  588. FloatVectorOperations::subtract (data1, data2, num);
  589. u.expect (areAllValuesEqual (data1, num, (ValueType) 512));
  590. fillRandomly (random, int1, num);
  591. doConversionTest (u, data1, data2, int1, num);
  592. }
  593. static void doConversionTest (UnitTest& u, float* data1, float* data2, int* const int1, int num)
  594. {
  595. FloatVectorOperations::convertFixedToFloat (data1, int1, 2.0f, num);
  596. convertFixed (data2, int1, 2.0f, num);
  597. u.expect (buffersMatch (data1, data2, num));
  598. }
  599. static void doConversionTest (UnitTest&, double*, double*, int*, int) {}
  600. static void fillRandomly (Random& random, ValueType* d, int num)
  601. {
  602. while (--num >= 0)
  603. *d++ = (ValueType) (random.nextDouble() * 1000.0);
  604. }
  605. static void fillRandomly (Random& random, int* d, int num)
  606. {
  607. while (--num >= 0)
  608. *d++ = random.nextInt();
  609. }
  610. static void convertFixed (float* d, const int* s, ValueType multiplier, int num)
  611. {
  612. while (--num >= 0)
  613. *d++ = *s++ * multiplier;
  614. }
  615. static bool areAllValuesEqual (const ValueType* d, int num, ValueType target)
  616. {
  617. while (--num >= 0)
  618. if (*d++ != target)
  619. return false;
  620. return true;
  621. }
  622. static bool buffersMatch (const ValueType* d1, const ValueType* d2, int num)
  623. {
  624. while (--num >= 0)
  625. if (! valuesMatch (*d1++, *d2++))
  626. return false;
  627. return true;
  628. }
  629. static bool valuesMatch (ValueType v1, ValueType v2)
  630. {
  631. return std::abs (v1 - v2) < std::numeric_limits<ValueType>::epsilon();
  632. }
  633. };
  634. void runTest()
  635. {
  636. beginTest ("FloatVectorOperations");
  637. for (int i = 1000; --i >= 0;)
  638. {
  639. TestRunner<float>::runTest (*this, getRandom());
  640. TestRunner<double>::runTest (*this, getRandom());
  641. }
  642. }
  643. };
  644. static FloatVectorOperationsTests vectorOpTests;
  645. #endif