Audio plugin host https://kx.studio/carla
You can not select more than 25 topics Topics must start with a letter or number, can include dashes ('-') and can be up to 35 characters long.

juce_FloatVectorOperations.cpp 37KB

10 years ago
10 years ago
10 years ago
10 years ago
10 years ago
10 years ago
10 years ago
10 years ago
10 years ago
10 years ago
10 years ago
10 years ago
10 years ago
10 years ago
10 years ago
10 years ago
10 years ago
10 years ago
10 years ago
10 years ago
10 years ago
10 years ago
10 years ago
10 years ago
10 years ago
10 years ago
10 years ago
10 years ago
10 years ago
10 years ago
10 years ago
10 years ago
10 years ago
10 years ago
10 years ago
10 years ago
10 years ago
10 years ago
10 years ago
10 years ago
10 years ago
10 years ago
10 years ago
10 years ago
10 years ago
10 years ago
10 years ago
10 years ago
10 years ago
10 years ago
10 years ago
10 years ago
10 years ago
10 years ago
10 years ago
10 years ago
10 years ago
10 years ago
10 years ago
10 years ago
10 years ago
10 years ago
10 years ago
10 years ago
10 years ago
10 years ago
10 years ago
10 years ago
10 years ago
10 years ago
10 years ago
10 years ago
10 years ago
10 years ago
10 years ago
10 years ago
10 years ago
10 years ago
10 years ago
10 years ago
10 years ago
10 years ago
10 years ago
10 years ago
10 years ago
10 years ago
10 years ago
10 years ago
10 years ago
10 years ago
10 years ago
10 years ago
10 years ago
10 years ago
10 years ago
10 years ago
10 years ago
10 years ago
10 years ago
10 years ago
10 years ago
10 years ago
10 years ago
10 years ago
10 years ago
10 years ago
10 years ago
10 years ago
10 years ago
10 years ago
10 years ago
10 years ago
10 years ago
10 years ago
10 years ago
10 years ago
10 years ago
10 years ago
10 years ago
10 years ago
10 years ago
10 years ago
10 years ago
10 years ago
10 years ago
10 years ago
10 years ago
10 years ago
10 years ago
10 years ago
10 years ago
10 years ago
123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412413414415416417418419420421422423424425426427428429430431432433434435436437438439440441442443444445446447448449450451452453454455456457458459460461462463464465466467468469470471472473474475476477478479480481482483484485486487488489490491492493494495496497498499500501502503504505506507508509510511512513514515516517518519520521522523524525526527528529530531532533534535536537538539540541542543544545546547548549550551552553554555556557558559560561562563564565566567568569570571572573574575576577578579580581582583584585586587588589590591592593594595596597598599600601602603604605606607608609610611612613614615616617618619620621622623624625626627628629630631632633634635636637638639640641642643644645646647648649650651652653654655656657658659660661662663664665666667668669670671672673674675676677678679680681682683684685686687688689690691692693694695696697698699700701702703704705706707708709710711712713714715716717718719720721722723724725726727728729730731732733734735736737738739740741742743744745746747748749750751752753754755756757758759760761762763764765766767768769770771772773774775776777778779780781782783784785786787788789790791792793794795796797798799800801802803804805806807808809810811812813814815816817818819820821822823824825826827828829830831832833834835836837838839840841842843844845846847848849850851852853854855856857858859860861862863864865866867868869870871872873874875876877878879880881882883884885886887888889890891892893
  1. /*
  2. ==============================================================================
  3. This file is part of the JUCE library.
  4. Copyright (c) 2013 - Raw Material Software Ltd.
  5. Permission is granted to use this software under the terms of either:
  6. a) the GPL v2 (or any later version)
  7. b) the Affero GPL v3
  8. Details of these licenses can be found at: www.gnu.org/licenses
  9. JUCE is distributed in the hope that it will be useful, but WITHOUT ANY
  10. WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS FOR
  11. A PARTICULAR PURPOSE. See the GNU General Public License for more details.
  12. ------------------------------------------------------------------------------
  13. To release a closed-source product which uses JUCE, commercial licenses are
  14. available: visit www.juce.com for more information.
  15. ==============================================================================
  16. */
  17. namespace FloatVectorHelpers
  18. {
  19. #define JUCE_INCREMENT_SRC_DEST dest += (16 / sizeof (*dest)); src += (16 / sizeof (*dest));
  20. #define JUCE_INCREMENT_SRC1_SRC2_DEST dest += (16 / sizeof (*dest)); src1 += (16 / sizeof (*dest)); src2 += (16 / sizeof (*dest));
  21. #define JUCE_INCREMENT_DEST dest += (16 / sizeof (*dest));
  22. #if JUCE_USE_SSE_INTRINSICS
  23. static bool sse2Present = false;
  24. static bool isSSE2Available() noexcept
  25. {
  26. if (sse2Present)
  27. return true;
  28. sse2Present = SystemStats::hasSSE2();
  29. return sse2Present;
  30. }
  31. inline static bool isAligned (const void* p) noexcept
  32. {
  33. return (((pointer_sized_int) p) & 15) == 0;
  34. }
  35. struct BasicOps32
  36. {
  37. typedef float Type;
  38. typedef __m128 ParallelType;
  39. enum { numParallel = 4 };
  40. static forcedinline ParallelType load1 (Type v) noexcept { return _mm_load1_ps (&v); }
  41. static forcedinline ParallelType loadA (const Type* v) noexcept { return _mm_load_ps (v); }
  42. static forcedinline ParallelType loadU (const Type* v) noexcept { return _mm_loadu_ps (v); }
  43. static forcedinline void storeA (Type* dest, ParallelType a) noexcept { _mm_store_ps (dest, a); }
  44. static forcedinline void storeU (Type* dest, ParallelType a) noexcept { _mm_storeu_ps (dest, a); }
  45. static forcedinline ParallelType add (ParallelType a, ParallelType b) noexcept { return _mm_add_ps (a, b); }
  46. static forcedinline ParallelType sub (ParallelType a, ParallelType b) noexcept { return _mm_sub_ps (a, b); }
  47. static forcedinline ParallelType mul (ParallelType a, ParallelType b) noexcept { return _mm_mul_ps (a, b); }
  48. static forcedinline ParallelType max (ParallelType a, ParallelType b) noexcept { return _mm_max_ps (a, b); }
  49. static forcedinline ParallelType min (ParallelType a, ParallelType b) noexcept { return _mm_min_ps (a, b); }
  50. static forcedinline Type max (ParallelType a) noexcept { Type v[numParallel]; storeU (v, a); return jmax (v[0], v[1], v[2], v[3]); }
  51. static forcedinline Type min (ParallelType a) noexcept { Type v[numParallel]; storeU (v, a); return jmin (v[0], v[1], v[2], v[3]); }
  52. };
  53. struct BasicOps64
  54. {
  55. typedef double Type;
  56. typedef __m128d ParallelType;
  57. enum { numParallel = 2 };
  58. static forcedinline ParallelType load1 (Type v) noexcept { return _mm_load1_pd (&v); }
  59. static forcedinline ParallelType loadA (const Type* v) noexcept { return _mm_load_pd (v); }
  60. static forcedinline ParallelType loadU (const Type* v) noexcept { return _mm_loadu_pd (v); }
  61. static forcedinline void storeA (Type* dest, ParallelType a) noexcept { _mm_store_pd (dest, a); }
  62. static forcedinline void storeU (Type* dest, ParallelType a) noexcept { _mm_storeu_pd (dest, a); }
  63. static forcedinline ParallelType add (ParallelType a, ParallelType b) noexcept { return _mm_add_pd (a, b); }
  64. static forcedinline ParallelType sub (ParallelType a, ParallelType b) noexcept { return _mm_sub_pd (a, b); }
  65. static forcedinline ParallelType mul (ParallelType a, ParallelType b) noexcept { return _mm_mul_pd (a, b); }
  66. static forcedinline ParallelType max (ParallelType a, ParallelType b) noexcept { return _mm_max_pd (a, b); }
  67. static forcedinline ParallelType min (ParallelType a, ParallelType b) noexcept { return _mm_min_pd (a, b); }
  68. static forcedinline Type max (ParallelType a) noexcept { Type v[numParallel]; storeU (v, a); return jmax (v[0], v[1]); }
  69. static forcedinline Type min (ParallelType a) noexcept { Type v[numParallel]; storeU (v, a); return jmin (v[0], v[1]); }
  70. };
  71. #define JUCE_BEGIN_VEC_OP \
  72. typedef FloatVectorHelpers::ModeType<sizeof(*dest)>::Mode Mode; \
  73. if (FloatVectorHelpers::isSSE2Available()) \
  74. { \
  75. const int numLongOps = num / Mode::numParallel;
  76. #define JUCE_FINISH_VEC_OP(normalOp) \
  77. num &= (Mode::numParallel - 1); \
  78. if (num == 0) return; \
  79. } \
  80. for (int i = 0; i < num; ++i) normalOp;
  81. #define JUCE_PERFORM_VEC_OP_DEST(normalOp, vecOp, locals, setupOp) \
  82. JUCE_BEGIN_VEC_OP \
  83. setupOp \
  84. if (FloatVectorHelpers::isAligned (dest)) JUCE_VEC_LOOP (vecOp, dummy, Mode::loadA, Mode::storeA, locals, JUCE_INCREMENT_DEST) \
  85. else JUCE_VEC_LOOP (vecOp, dummy, Mode::loadU, Mode::storeU, locals, JUCE_INCREMENT_DEST) \
  86. JUCE_FINISH_VEC_OP (normalOp)
  87. #define JUCE_PERFORM_VEC_OP_SRC_DEST(normalOp, vecOp, locals, increment, setupOp) \
  88. JUCE_BEGIN_VEC_OP \
  89. setupOp \
  90. if (FloatVectorHelpers::isAligned (dest)) \
  91. { \
  92. if (FloatVectorHelpers::isAligned (src)) JUCE_VEC_LOOP (vecOp, Mode::loadA, Mode::loadA, Mode::storeA, locals, increment) \
  93. else JUCE_VEC_LOOP (vecOp, Mode::loadU, Mode::loadA, Mode::storeA, locals, increment) \
  94. }\
  95. else \
  96. { \
  97. if (FloatVectorHelpers::isAligned (src)) JUCE_VEC_LOOP (vecOp, Mode::loadA, Mode::loadU, Mode::storeU, locals, increment) \
  98. else JUCE_VEC_LOOP (vecOp, Mode::loadU, Mode::loadU, Mode::storeU, locals, increment) \
  99. } \
  100. JUCE_FINISH_VEC_OP (normalOp)
  101. #define JUCE_PERFORM_VEC_OP_SRC1_SRC2_DEST(normalOp, vecOp, locals, increment, setupOp) \
  102. JUCE_BEGIN_VEC_OP \
  103. setupOp \
  104. { \
  105. Mode::ParallelType (&loadSrc1) (const Mode::Type* v) = FloatVectorHelpers::isAligned (src1) ? Mode::loadA : Mode::loadU; \
  106. Mode::ParallelType (&loadSrc2) (const Mode::Type* v) = FloatVectorHelpers::isAligned (src2) ? Mode::loadA : Mode::loadU; \
  107. void (&storeDst) (Mode::Type* dest, Mode::ParallelType a) = FloatVectorHelpers::isAligned (dest) ? Mode::storeA : Mode::storeU; \
  108. JUCE_VEC_LOOP_TWO_SOURCES (vecOp, loadSrc1, loadSrc2, storeDst, locals, increment); \
  109. } \
  110. JUCE_FINISH_VEC_OP (normalOp)
  111. //==============================================================================
  112. #elif JUCE_USE_ARM_NEON
  113. struct BasicOps32
  114. {
  115. typedef float Type;
  116. typedef float32x4_t ParallelType;
  117. enum { numParallel = 4 };
  118. static forcedinline ParallelType load1 (Type v) noexcept { return vld1q_dup_f32 (&v); }
  119. static forcedinline ParallelType loadA (const Type* v) noexcept { return vld1q_f32 (v); }
  120. static forcedinline ParallelType loadU (const Type* v) noexcept { return vld1q_f32 (v); }
  121. static forcedinline void storeA (Type* dest, ParallelType a) noexcept { vst1q_f32 (dest, a); }
  122. static forcedinline void storeU (Type* dest, ParallelType a) noexcept { vst1q_f32 (dest, a); }
  123. static forcedinline ParallelType add (ParallelType a, ParallelType b) noexcept { return vaddq_f32 (a, b); }
  124. static forcedinline ParallelType sub (ParallelType a, ParallelType b) noexcept { return vsubq_f32 (a, b); }
  125. static forcedinline ParallelType mul (ParallelType a, ParallelType b) noexcept { return vmulq_f32 (a, b); }
  126. static forcedinline ParallelType max (ParallelType a, ParallelType b) noexcept { return vmaxq_f32 (a, b); }
  127. static forcedinline ParallelType min (ParallelType a, ParallelType b) noexcept { return vminq_f32 (a, b); }
  128. static forcedinline Type max (ParallelType a) noexcept { Type v[numParallel]; storeU (v, a); return jmax (v[0], v[1], v[2], v[3]); }
  129. static forcedinline Type min (ParallelType a) noexcept { Type v[numParallel]; storeU (v, a); return jmin (v[0], v[1], v[2], v[3]); }
  130. };
  131. struct BasicOps64
  132. {
  133. typedef double Type;
  134. typedef double ParallelType;
  135. enum { numParallel = 1 };
  136. static forcedinline ParallelType load1 (Type v) noexcept { return v; }
  137. static forcedinline ParallelType loadA (const Type* v) noexcept { return *v; }
  138. static forcedinline ParallelType loadU (const Type* v) noexcept { return *v; }
  139. static forcedinline void storeA (Type* dest, ParallelType a) noexcept { *dest = a; }
  140. static forcedinline void storeU (Type* dest, ParallelType a) noexcept { *dest = a; }
  141. static forcedinline ParallelType add (ParallelType a, ParallelType b) noexcept { return a + b; }
  142. static forcedinline ParallelType sub (ParallelType a, ParallelType b) noexcept { return a - b; }
  143. static forcedinline ParallelType mul (ParallelType a, ParallelType b) noexcept { return a * b; }
  144. static forcedinline ParallelType max (ParallelType a, ParallelType b) noexcept { return jmax (a, b); }
  145. static forcedinline ParallelType min (ParallelType a, ParallelType b) noexcept { return jmin (a, b); }
  146. static forcedinline Type max (ParallelType a) noexcept { return a; }
  147. static forcedinline Type min (ParallelType a) noexcept { return a; }
  148. };
  149. #define JUCE_BEGIN_VEC_OP \
  150. typedef FloatVectorHelpers::ModeType<sizeof(*dest)>::Mode Mode; \
  151. if (Mode::numParallel > 1) \
  152. { \
  153. const int numLongOps = num / Mode::numParallel;
  154. #define JUCE_FINISH_VEC_OP(normalOp) \
  155. num &= (Mode::numParallel - 1); \
  156. if (num == 0) return; \
  157. } \
  158. for (int i = 0; i < num; ++i) normalOp;
  159. #define JUCE_PERFORM_VEC_OP_DEST(normalOp, vecOp, locals, setupOp) \
  160. JUCE_BEGIN_VEC_OP \
  161. setupOp \
  162. JUCE_VEC_LOOP (vecOp, dummy, Mode::loadU, Mode::storeU, locals, JUCE_INCREMENT_DEST) \
  163. JUCE_FINISH_VEC_OP (normalOp)
  164. #define JUCE_PERFORM_VEC_OP_SRC_DEST(normalOp, vecOp, locals, increment, setupOp) \
  165. JUCE_BEGIN_VEC_OP \
  166. setupOp \
  167. JUCE_VEC_LOOP (vecOp, Mode::loadU, Mode::loadU, Mode::storeU, locals, increment) \
  168. JUCE_FINISH_VEC_OP (normalOp)
  169. #define JUCE_PERFORM_VEC_OP_SRC1_SRC2_DEST(normalOp, vecOp, locals, increment, setupOp) \
  170. JUCE_BEGIN_VEC_OP \
  171. setupOp \
  172. JUCE_VEC_LOOP_TWO_SOURCES (vecOp, Mode::loadU, Mode::loadU, Mode::storeU, locals, increment) \
  173. JUCE_FINISH_VEC_OP (normalOp)
  174. //==============================================================================
  175. #else
  176. #define JUCE_PERFORM_VEC_OP_DEST(normalOp, vecOp, locals, setupOp) \
  177. for (int i = 0; i < num; ++i) normalOp;
  178. #define JUCE_PERFORM_VEC_OP_SRC_DEST(normalOp, vecOp, locals, increment, setupOp) \
  179. for (int i = 0; i < num; ++i) normalOp;
  180. #define JUCE_PERFORM_VEC_OP_SRC1_SRC2_DEST(normalOp, vecOp, locals, increment, setupOp) \
  181. for (int i = 0; i < num; ++i) normalOp;
  182. #endif
  183. //==============================================================================
  184. #define JUCE_VEC_LOOP(vecOp, srcLoad, dstLoad, dstStore, locals, increment) \
  185. for (int i = 0; i < numLongOps; ++i) \
  186. { \
  187. locals (srcLoad, dstLoad); \
  188. dstStore (dest, vecOp); \
  189. increment; \
  190. }
  191. #define JUCE_VEC_LOOP_TWO_SOURCES(vecOp, src1Load, src2Load, dstStore, locals, increment) \
  192. for (int i = 0; i < numLongOps; ++i) \
  193. { \
  194. locals (src1Load, src2Load); \
  195. dstStore (dest, vecOp); \
  196. increment; \
  197. }
  198. #define JUCE_LOAD_NONE(srcLoad, dstLoad)
  199. #define JUCE_LOAD_DEST(srcLoad, dstLoad) const Mode::ParallelType d = dstLoad (dest);
  200. #define JUCE_LOAD_SRC(srcLoad, dstLoad) const Mode::ParallelType s = srcLoad (src);
  201. #define JUCE_LOAD_SRC1_SRC2(src1Load, src2Load) const Mode::ParallelType s1 = src1Load (src1), s2 = src2Load (src2);
  202. #define JUCE_LOAD_SRC_DEST(srcLoad, dstLoad) const Mode::ParallelType d = dstLoad (dest), s = srcLoad (src);
  203. #if JUCE_USE_SSE_INTRINSICS || JUCE_USE_ARM_NEON
  204. template<int typeSize> struct ModeType { typedef BasicOps32 Mode; };
  205. template<> struct ModeType<8> { typedef BasicOps64 Mode; };
  206. template <typename Mode>
  207. struct MinMax
  208. {
  209. typedef typename Mode::Type Type;
  210. typedef typename Mode::ParallelType ParallelType;
  211. static Type findMinOrMax (const Type* src, int num, const bool isMinimum) noexcept
  212. {
  213. int numLongOps = num / Mode::numParallel;
  214. #if JUCE_USE_SSE_INTRINSICS
  215. if (numLongOps > 1 && isSSE2Available())
  216. #else
  217. if (numLongOps > 1)
  218. #endif
  219. {
  220. ParallelType val;
  221. #if ! JUCE_USE_ARM_NEON
  222. if (isAligned (src))
  223. {
  224. val = Mode::loadA (src);
  225. if (isMinimum)
  226. {
  227. while (--numLongOps > 0)
  228. {
  229. src += Mode::numParallel;
  230. val = Mode::min (val, Mode::loadA (src));
  231. }
  232. }
  233. else
  234. {
  235. while (--numLongOps > 0)
  236. {
  237. src += Mode::numParallel;
  238. val = Mode::max (val, Mode::loadA (src));
  239. }
  240. }
  241. }
  242. else
  243. #endif
  244. {
  245. val = Mode::loadU (src);
  246. if (isMinimum)
  247. {
  248. while (--numLongOps > 0)
  249. {
  250. src += Mode::numParallel;
  251. val = Mode::min (val, Mode::loadU (src));
  252. }
  253. }
  254. else
  255. {
  256. while (--numLongOps > 0)
  257. {
  258. src += Mode::numParallel;
  259. val = Mode::max (val, Mode::loadU (src));
  260. }
  261. }
  262. }
  263. Type result = isMinimum ? Mode::min (val)
  264. : Mode::max (val);
  265. num &= (Mode::numParallel - 1);
  266. src += Mode::numParallel;
  267. for (int i = 0; i < num; ++i)
  268. result = isMinimum ? jmin (result, src[i])
  269. : jmax (result, src[i]);
  270. return result;
  271. }
  272. return isMinimum ? juce::findMinimum (src, num)
  273. : juce::findMaximum (src, num);
  274. }
  275. static Range<Type> findMinAndMax (const Type* src, int num) noexcept
  276. {
  277. int numLongOps = num / Mode::numParallel;
  278. #if JUCE_USE_SSE_INTRINSICS
  279. if (numLongOps > 1 && isSSE2Available())
  280. #else
  281. if (numLongOps > 1)
  282. #endif
  283. {
  284. ParallelType mn, mx;
  285. #if ! JUCE_USE_ARM_NEON
  286. if (isAligned (src))
  287. {
  288. mn = Mode::loadA (src);
  289. mx = mn;
  290. while (--numLongOps > 0)
  291. {
  292. src += Mode::numParallel;
  293. const ParallelType v = Mode::loadA (src);
  294. mn = Mode::min (mn, v);
  295. mx = Mode::max (mx, v);
  296. }
  297. }
  298. else
  299. #endif
  300. {
  301. mn = Mode::loadU (src);
  302. mx = mn;
  303. while (--numLongOps > 0)
  304. {
  305. src += Mode::numParallel;
  306. const ParallelType v = Mode::loadU (src);
  307. mn = Mode::min (mn, v);
  308. mx = Mode::max (mx, v);
  309. }
  310. }
  311. Range<Type> result (Mode::min (mn),
  312. Mode::max (mx));
  313. num &= (Mode::numParallel - 1);
  314. src += Mode::numParallel;
  315. for (int i = 0; i < num; ++i)
  316. result = result.getUnionWith (src[i]);
  317. return result;
  318. }
  319. return Range<Type>::findMinAndMax (src, num);
  320. }
  321. };
  322. #endif
  323. }
  324. //==============================================================================
  325. void JUCE_CALLTYPE FloatVectorOperations::clear (float* dest, int num) noexcept
  326. {
  327. #if JUCE_USE_VDSP_FRAMEWORK
  328. vDSP_vclr (dest, 1, (size_t) num);
  329. #else
  330. zeromem (dest, num * sizeof (float));
  331. #endif
  332. }
  333. void JUCE_CALLTYPE FloatVectorOperations::clear (double* dest, int num) noexcept
  334. {
  335. #if JUCE_USE_VDSP_FRAMEWORK
  336. vDSP_vclrD (dest, 1, (size_t) num);
  337. #else
  338. zeromem (dest, num * sizeof (double));
  339. #endif
  340. }
  341. void JUCE_CALLTYPE FloatVectorOperations::fill (float* dest, float valueToFill, int num) noexcept
  342. {
  343. #if JUCE_USE_VDSP_FRAMEWORK
  344. vDSP_vfill (&valueToFill, dest, 1, (size_t) num);
  345. #else
  346. JUCE_PERFORM_VEC_OP_DEST (dest[i] = valueToFill, val, JUCE_LOAD_NONE,
  347. const Mode::ParallelType val = Mode::load1 (valueToFill);)
  348. #endif
  349. }
  350. void JUCE_CALLTYPE FloatVectorOperations::fill (double* dest, double valueToFill, int num) noexcept
  351. {
  352. #if JUCE_USE_VDSP_FRAMEWORK
  353. vDSP_vfillD (&valueToFill, dest, 1, (size_t) num);
  354. #else
  355. JUCE_PERFORM_VEC_OP_DEST (dest[i] = valueToFill, val, JUCE_LOAD_NONE,
  356. const Mode::ParallelType val = Mode::load1 (valueToFill);)
  357. #endif
  358. }
  359. void JUCE_CALLTYPE FloatVectorOperations::copy (float* dest, const float* src, int num) noexcept
  360. {
  361. memcpy (dest, src, (size_t) num * sizeof (float));
  362. }
  363. void JUCE_CALLTYPE FloatVectorOperations::copy (double* dest, const double* src, int num) noexcept
  364. {
  365. memcpy (dest, src, (size_t) num * sizeof (double));
  366. }
  367. void JUCE_CALLTYPE FloatVectorOperations::copyWithMultiply (float* dest, const float* src, float multiplier, int num) noexcept
  368. {
  369. #if JUCE_USE_VDSP_FRAMEWORK
  370. vDSP_vsmul (src, 1, &multiplier, dest, 1, (vDSP_Length) num);
  371. #else
  372. JUCE_PERFORM_VEC_OP_SRC_DEST (dest[i] = src[i] * multiplier, Mode::mul (mult, s),
  373. JUCE_LOAD_SRC, JUCE_INCREMENT_SRC_DEST,
  374. const Mode::ParallelType mult = Mode::load1 (multiplier);)
  375. #endif
  376. }
  377. void JUCE_CALLTYPE FloatVectorOperations::copyWithMultiply (double* dest, const double* src, double multiplier, int num) noexcept
  378. {
  379. #if JUCE_USE_VDSP_FRAMEWORK
  380. vDSP_vsmulD (src, 1, &multiplier, dest, 1, (vDSP_Length) num);
  381. #else
  382. JUCE_PERFORM_VEC_OP_SRC_DEST (dest[i] = src[i] * multiplier, Mode::mul (mult, s),
  383. JUCE_LOAD_SRC, JUCE_INCREMENT_SRC_DEST,
  384. const Mode::ParallelType mult = Mode::load1 (multiplier);)
  385. #endif
  386. }
  387. void JUCE_CALLTYPE FloatVectorOperations::add (float* dest, float amount, int num) noexcept
  388. {
  389. JUCE_PERFORM_VEC_OP_DEST (dest[i] += amount, Mode::add (d, amountToAdd), JUCE_LOAD_DEST,
  390. const Mode::ParallelType amountToAdd = Mode::load1 (amount);)
  391. }
  392. void JUCE_CALLTYPE FloatVectorOperations::add (double* dest, double amount, int num) noexcept
  393. {
  394. JUCE_PERFORM_VEC_OP_DEST (dest[i] += amount, Mode::add (d, amountToAdd), JUCE_LOAD_DEST,
  395. const Mode::ParallelType amountToAdd = Mode::load1 (amount);)
  396. }
  397. void JUCE_CALLTYPE FloatVectorOperations::add (float* dest, float* src, float amount, int num) noexcept
  398. {
  399. #if JUCE_USE_VDSP_FRAMEWORK
  400. vDSP_vsadd (src, 1, &amount, dest, 1, (vDSP_Length) num);
  401. #else
  402. JUCE_PERFORM_VEC_OP_SRC_DEST (dest[i] = src[i] + amount, Mode::add (am, s),
  403. JUCE_LOAD_SRC, JUCE_INCREMENT_SRC_DEST,
  404. const Mode::ParallelType am = Mode::load1 (amount);)
  405. #endif
  406. }
  407. void JUCE_CALLTYPE FloatVectorOperations::add (double* dest, double* src, double amount, int num) noexcept
  408. {
  409. #if JUCE_USE_VDSP_FRAMEWORK
  410. vDSP_vsaddD (src, 1, &amount, dest, 1, (vDSP_Length) num);
  411. #else
  412. JUCE_PERFORM_VEC_OP_SRC_DEST (dest[i] = src[i] + amount, Mode::add (am, s),
  413. JUCE_LOAD_SRC, JUCE_INCREMENT_SRC_DEST,
  414. const Mode::ParallelType am = Mode::load1 (amount);)
  415. #endif
  416. }
  417. void JUCE_CALLTYPE FloatVectorOperations::add (float* dest, const float* src, int num) noexcept
  418. {
  419. #if JUCE_USE_VDSP_FRAMEWORK
  420. vDSP_vadd (src, 1, dest, 1, dest, 1, (vDSP_Length) num);
  421. #else
  422. JUCE_PERFORM_VEC_OP_SRC_DEST (dest[i] += src[i], Mode::add (d, s), JUCE_LOAD_SRC_DEST, JUCE_INCREMENT_SRC_DEST, )
  423. #endif
  424. }
  425. void JUCE_CALLTYPE FloatVectorOperations::add (double* dest, const double* src, int num) noexcept
  426. {
  427. #if JUCE_USE_VDSP_FRAMEWORK
  428. vDSP_vaddD (src, 1, dest, 1, dest, 1, (vDSP_Length) num);
  429. #else
  430. JUCE_PERFORM_VEC_OP_SRC_DEST (dest[i] += src[i], Mode::add (d, s), JUCE_LOAD_SRC_DEST, JUCE_INCREMENT_SRC_DEST, )
  431. #endif
  432. }
  433. void JUCE_CALLTYPE FloatVectorOperations::add (float* dest, const float* src1, const float* src2, int num) noexcept
  434. {
  435. #if JUCE_USE_VDSP_FRAMEWORK
  436. vDSP_vadd (src1, 1, src2, 1, dest, 1, (vDSP_Length) num);
  437. #else
  438. JUCE_PERFORM_VEC_OP_SRC1_SRC2_DEST (dest[i] = src1[i] + src2[i], Mode::add (s1, s2), JUCE_LOAD_SRC1_SRC2, JUCE_INCREMENT_SRC1_SRC2_DEST, )
  439. #endif
  440. }
  441. void JUCE_CALLTYPE FloatVectorOperations::add (double* dest, const double* src1, const double* src2, int num) noexcept
  442. {
  443. #if JUCE_USE_VDSP_FRAMEWORK
  444. vDSP_vaddD (src1, 1, src2, 1, dest, 1, (vDSP_Length) num);
  445. #else
  446. JUCE_PERFORM_VEC_OP_SRC1_SRC2_DEST (dest[i] = src1[i] + src2[i], Mode::add (s1, s2), JUCE_LOAD_SRC1_SRC2, JUCE_INCREMENT_SRC1_SRC2_DEST, )
  447. #endif
  448. }
  449. void JUCE_CALLTYPE FloatVectorOperations::subtract (float* dest, const float* src, int num) noexcept
  450. {
  451. #if JUCE_USE_VDSP_FRAMEWORK
  452. vDSP_vsub (src, 1, dest, 1, dest, 1, (vDSP_Length) num);
  453. #else
  454. JUCE_PERFORM_VEC_OP_SRC_DEST (dest[i] -= src[i], Mode::sub (d, s), JUCE_LOAD_SRC_DEST, JUCE_INCREMENT_SRC_DEST, )
  455. #endif
  456. }
  457. void JUCE_CALLTYPE FloatVectorOperations::subtract (double* dest, const double* src, int num) noexcept
  458. {
  459. #if JUCE_USE_VDSP_FRAMEWORK
  460. vDSP_vsubD (src, 1, dest, 1, dest, 1, (vDSP_Length) num);
  461. #else
  462. JUCE_PERFORM_VEC_OP_SRC_DEST (dest[i] -= src[i], Mode::sub (d, s), JUCE_LOAD_SRC_DEST, JUCE_INCREMENT_SRC_DEST, )
  463. #endif
  464. }
  465. void JUCE_CALLTYPE FloatVectorOperations::subtract (float* dest, const float* src1, const float* src2, int num) noexcept
  466. {
  467. #if JUCE_USE_VDSP_FRAMEWORK
  468. vDSP_vsub (src2, 1, src1, 1, dest, 1, (vDSP_Length) num);
  469. #else
  470. JUCE_PERFORM_VEC_OP_SRC1_SRC2_DEST (dest[i] = src1[i] - src2[i], Mode::sub (s1, s2), JUCE_LOAD_SRC1_SRC2, JUCE_INCREMENT_SRC1_SRC2_DEST, )
  471. #endif
  472. }
  473. void JUCE_CALLTYPE FloatVectorOperations::subtract (double* dest, const double* src1, const double* src2, int num) noexcept
  474. {
  475. #if JUCE_USE_VDSP_FRAMEWORK
  476. vDSP_vsubD (src2, 1, src1, 1, dest, 1, (vDSP_Length) num);
  477. #else
  478. JUCE_PERFORM_VEC_OP_SRC1_SRC2_DEST (dest[i] = src1[i] - src2[i], Mode::sub (s1, s2), JUCE_LOAD_SRC1_SRC2, JUCE_INCREMENT_SRC1_SRC2_DEST, )
  479. #endif
  480. }
  481. void JUCE_CALLTYPE FloatVectorOperations::addWithMultiply (float* dest, const float* src, float multiplier, int num) noexcept
  482. {
  483. JUCE_PERFORM_VEC_OP_SRC_DEST (dest[i] += src[i] * multiplier, Mode::add (d, Mode::mul (mult, s)),
  484. JUCE_LOAD_SRC_DEST, JUCE_INCREMENT_SRC_DEST,
  485. const Mode::ParallelType mult = Mode::load1 (multiplier);)
  486. }
  487. void JUCE_CALLTYPE FloatVectorOperations::addWithMultiply (double* dest, const double* src, double multiplier, int num) noexcept
  488. {
  489. JUCE_PERFORM_VEC_OP_SRC_DEST (dest[i] += src[i] * multiplier, Mode::add (d, Mode::mul (mult, s)),
  490. JUCE_LOAD_SRC_DEST, JUCE_INCREMENT_SRC_DEST,
  491. const Mode::ParallelType mult = Mode::load1 (multiplier);)
  492. }
  493. void JUCE_CALLTYPE FloatVectorOperations::multiply (float* dest, const float* src, int num) noexcept
  494. {
  495. #if JUCE_USE_VDSP_FRAMEWORK
  496. vDSP_vmul (src, 1, dest, 1, dest, 1, (vDSP_Length) num);
  497. #else
  498. JUCE_PERFORM_VEC_OP_SRC_DEST (dest[i] *= src[i], Mode::mul (d, s), JUCE_LOAD_SRC_DEST, JUCE_INCREMENT_SRC_DEST, )
  499. #endif
  500. }
  501. void JUCE_CALLTYPE FloatVectorOperations::multiply (double* dest, const double* src, int num) noexcept
  502. {
  503. #if JUCE_USE_VDSP_FRAMEWORK
  504. vDSP_vmulD (src, 1, dest, 1, dest, 1, (vDSP_Length) num);
  505. #else
  506. JUCE_PERFORM_VEC_OP_SRC_DEST (dest[i] *= src[i], Mode::mul (d, s), JUCE_LOAD_SRC_DEST, JUCE_INCREMENT_SRC_DEST, )
  507. #endif
  508. }
  509. void JUCE_CALLTYPE FloatVectorOperations::multiply (float* dest, const float* src1, const float* src2, int num) noexcept
  510. {
  511. #if JUCE_USE_VDSP_FRAMEWORK
  512. vDSP_vmul (src1, 1, src2, 1, dest, 1, (vDSP_Length) num);
  513. #else
  514. JUCE_PERFORM_VEC_OP_SRC1_SRC2_DEST (dest[i] = src1[i] * src2[i], Mode::mul (s1, s2), JUCE_LOAD_SRC1_SRC2, JUCE_INCREMENT_SRC1_SRC2_DEST, )
  515. #endif
  516. }
  517. void JUCE_CALLTYPE FloatVectorOperations::multiply (double* dest, const double* src1, const double* src2, int num) noexcept
  518. {
  519. #if JUCE_USE_VDSP_FRAMEWORK
  520. vDSP_vmulD (src1, 1, src2, 1, dest, 1, (vDSP_Length) num);
  521. #else
  522. JUCE_PERFORM_VEC_OP_SRC1_SRC2_DEST (dest[i] = src1[i] * src2[i], Mode::mul (s1, s2), JUCE_LOAD_SRC1_SRC2, JUCE_INCREMENT_SRC1_SRC2_DEST, )
  523. #endif
  524. }
  525. void JUCE_CALLTYPE FloatVectorOperations::multiply (float* dest, float multiplier, int num) noexcept
  526. {
  527. #if JUCE_USE_VDSP_FRAMEWORK
  528. vDSP_vsmul (dest, 1, &multiplier, dest, 1, (vDSP_Length) num);
  529. #else
  530. JUCE_PERFORM_VEC_OP_DEST (dest[i] *= multiplier, Mode::mul (d, mult), JUCE_LOAD_DEST,
  531. const Mode::ParallelType mult = Mode::load1 (multiplier);)
  532. #endif
  533. }
  534. void JUCE_CALLTYPE FloatVectorOperations::multiply (double* dest, double multiplier, int num) noexcept
  535. {
  536. #if JUCE_USE_VDSP_FRAMEWORK
  537. vDSP_vsmulD (dest, 1, &multiplier, dest, 1, (vDSP_Length) num);
  538. #else
  539. JUCE_PERFORM_VEC_OP_DEST (dest[i] *= multiplier, Mode::mul (d, mult), JUCE_LOAD_DEST,
  540. const Mode::ParallelType mult = Mode::load1 (multiplier);)
  541. #endif
  542. }
  543. void JUCE_CALLTYPE FloatVectorOperations::multiply (float* dest, const float* src, float multiplier, int num) noexcept
  544. {
  545. JUCE_PERFORM_VEC_OP_SRC_DEST (dest[i] = src[i] * multiplier, Mode::mul (mult, s),
  546. JUCE_LOAD_SRC, JUCE_INCREMENT_SRC_DEST,
  547. const Mode::ParallelType mult = Mode::load1 (multiplier);)
  548. }
  549. void JUCE_CALLTYPE FloatVectorOperations::multiply (double* dest, const double* src, double multiplier, int num) noexcept
  550. {
  551. JUCE_PERFORM_VEC_OP_SRC_DEST (dest[i] = src[i] * multiplier, Mode::mul (mult, s),
  552. JUCE_LOAD_SRC, JUCE_INCREMENT_SRC_DEST,
  553. const Mode::ParallelType mult = Mode::load1 (multiplier);)
  554. }
  555. void FloatVectorOperations::negate (float* dest, const float* src, int num) noexcept
  556. {
  557. #if JUCE_USE_VDSP_FRAMEWORK
  558. vDSP_vneg ((float*) src, 1, dest, 1, (vDSP_Length) num);
  559. #else
  560. copyWithMultiply (dest, src, -1.0f, num);
  561. #endif
  562. }
  563. void FloatVectorOperations::negate (double* dest, const double* src, int num) noexcept
  564. {
  565. #if JUCE_USE_VDSP_FRAMEWORK
  566. vDSP_vnegD ((double*) src, 1, dest, 1, (vDSP_Length) num);
  567. #else
  568. copyWithMultiply (dest, src, -1.0f, num);
  569. #endif
  570. }
  571. void JUCE_CALLTYPE FloatVectorOperations::convertFixedToFloat (float* dest, const int* src, float multiplier, int num) noexcept
  572. {
  573. #if JUCE_USE_ARM_NEON
  574. JUCE_PERFORM_VEC_OP_SRC_DEST (dest[i] = src[i] * multiplier,
  575. vmulq_n_f32 (vcvtq_f32_s32 (vld1q_s32 (src)), multiplier),
  576. JUCE_LOAD_NONE, JUCE_INCREMENT_SRC_DEST, )
  577. #else
  578. JUCE_PERFORM_VEC_OP_SRC_DEST (dest[i] = src[i] * multiplier,
  579. Mode::mul (mult, _mm_cvtepi32_ps (_mm_loadu_si128 ((const __m128i*) src))),
  580. JUCE_LOAD_NONE, JUCE_INCREMENT_SRC_DEST,
  581. const Mode::ParallelType mult = Mode::load1 (multiplier);)
  582. #endif
  583. }
  584. Range<float> JUCE_CALLTYPE FloatVectorOperations::findMinAndMax (const float* src, int num) noexcept
  585. {
  586. #if JUCE_USE_SSE_INTRINSICS || JUCE_USE_ARM_NEON
  587. return FloatVectorHelpers::MinMax<FloatVectorHelpers::BasicOps32>::findMinAndMax (src, num);
  588. #else
  589. return Range<float>::findMinAndMax (src, num);
  590. #endif
  591. }
  592. Range<double> JUCE_CALLTYPE FloatVectorOperations::findMinAndMax (const double* src, int num) noexcept
  593. {
  594. #if JUCE_USE_SSE_INTRINSICS || JUCE_USE_ARM_NEON
  595. return FloatVectorHelpers::MinMax<FloatVectorHelpers::BasicOps64>::findMinAndMax (src, num);
  596. #else
  597. return Range<double>::findMinAndMax (src, num);
  598. #endif
  599. }
  600. float JUCE_CALLTYPE FloatVectorOperations::findMinimum (const float* src, int num) noexcept
  601. {
  602. #if JUCE_USE_SSE_INTRINSICS || JUCE_USE_ARM_NEON
  603. return FloatVectorHelpers::MinMax<FloatVectorHelpers::BasicOps32>::findMinOrMax (src, num, true);
  604. #else
  605. return juce::findMinimum (src, num);
  606. #endif
  607. }
  608. double JUCE_CALLTYPE FloatVectorOperations::findMinimum (const double* src, int num) noexcept
  609. {
  610. #if JUCE_USE_SSE_INTRINSICS || JUCE_USE_ARM_NEON
  611. return FloatVectorHelpers::MinMax<FloatVectorHelpers::BasicOps64>::findMinOrMax (src, num, true);
  612. #else
  613. return juce::findMinimum (src, num);
  614. #endif
  615. }
  616. float JUCE_CALLTYPE FloatVectorOperations::findMaximum (const float* src, int num) noexcept
  617. {
  618. #if JUCE_USE_SSE_INTRINSICS || JUCE_USE_ARM_NEON
  619. return FloatVectorHelpers::MinMax<FloatVectorHelpers::BasicOps32>::findMinOrMax (src, num, false);
  620. #else
  621. return juce::findMaximum (src, num);
  622. #endif
  623. }
  624. double JUCE_CALLTYPE FloatVectorOperations::findMaximum (const double* src, int num) noexcept
  625. {
  626. #if JUCE_USE_SSE_INTRINSICS || JUCE_USE_ARM_NEON
  627. return FloatVectorHelpers::MinMax<FloatVectorHelpers::BasicOps64>::findMinOrMax (src, num, false);
  628. #else
  629. return juce::findMaximum (src, num);
  630. #endif
  631. }
  632. void JUCE_CALLTYPE FloatVectorOperations::enableFlushToZeroMode (bool shouldEnable) noexcept
  633. {
  634. #if JUCE_USE_SSE_INTRINSICS
  635. if (FloatVectorHelpers::isSSE2Available())
  636. _MM_SET_FLUSH_ZERO_MODE (shouldEnable ? _MM_FLUSH_ZERO_ON : _MM_FLUSH_ZERO_OFF);
  637. #endif
  638. (void) shouldEnable;
  639. }
  640. //==============================================================================
  641. //==============================================================================
  642. #if JUCE_UNIT_TESTS
  643. class FloatVectorOperationsTests : public UnitTest
  644. {
  645. public:
  646. FloatVectorOperationsTests() : UnitTest ("FloatVectorOperations") {}
  647. template <typename ValueType>
  648. struct TestRunner
  649. {
  650. static void runTest (UnitTest& u, Random random)
  651. {
  652. const int range = random.nextBool() ? 500 : 10;
  653. const int num = random.nextInt (range) + 1;
  654. HeapBlock<ValueType> buffer1 ((size_t) num + 16), buffer2 ((size_t) num + 16);
  655. HeapBlock<int> buffer3 ((size_t) num + 16);
  656. #if JUCE_ARM
  657. ValueType* const data1 = buffer1;
  658. ValueType* const data2 = buffer2;
  659. int* const int1 = buffer3;
  660. #else
  661. ValueType* const data1 = addBytesToPointer (buffer1.getData(), random.nextInt (16));
  662. ValueType* const data2 = addBytesToPointer (buffer2.getData(), random.nextInt (16));
  663. int* const int1 = addBytesToPointer (buffer3.getData(), random.nextInt (16));
  664. #endif
  665. fillRandomly (random, data1, num);
  666. fillRandomly (random, data2, num);
  667. Range<ValueType> minMax1 (FloatVectorOperations::findMinAndMax (data1, num));
  668. Range<ValueType> minMax2 (Range<ValueType>::findMinAndMax (data1, num));
  669. u.expect (minMax1 == minMax2);
  670. u.expect (valuesMatch (FloatVectorOperations::findMinimum (data1, num), juce::findMinimum (data1, num)));
  671. u.expect (valuesMatch (FloatVectorOperations::findMaximum (data1, num), juce::findMaximum (data1, num)));
  672. u.expect (valuesMatch (FloatVectorOperations::findMinimum (data2, num), juce::findMinimum (data2, num)));
  673. u.expect (valuesMatch (FloatVectorOperations::findMaximum (data2, num), juce::findMaximum (data2, num)));
  674. FloatVectorOperations::clear (data1, num);
  675. u.expect (areAllValuesEqual (data1, num, 0));
  676. FloatVectorOperations::fill (data1, (ValueType) 2, num);
  677. u.expect (areAllValuesEqual (data1, num, (ValueType) 2));
  678. FloatVectorOperations::add (data1, (ValueType) 2, num);
  679. u.expect (areAllValuesEqual (data1, num, (ValueType) 4));
  680. FloatVectorOperations::copy (data2, data1, num);
  681. u.expect (areAllValuesEqual (data2, num, (ValueType) 4));
  682. FloatVectorOperations::add (data2, data1, num);
  683. u.expect (areAllValuesEqual (data2, num, (ValueType) 8));
  684. FloatVectorOperations::copyWithMultiply (data2, data1, (ValueType) 4, num);
  685. u.expect (areAllValuesEqual (data2, num, (ValueType) 16));
  686. FloatVectorOperations::addWithMultiply (data2, data1, (ValueType) 4, num);
  687. u.expect (areAllValuesEqual (data2, num, (ValueType) 32));
  688. FloatVectorOperations::multiply (data1, (ValueType) 2, num);
  689. u.expect (areAllValuesEqual (data1, num, (ValueType) 8));
  690. FloatVectorOperations::multiply (data1, data2, num);
  691. u.expect (areAllValuesEqual (data1, num, (ValueType) 256));
  692. FloatVectorOperations::negate (data2, data1, num);
  693. u.expect (areAllValuesEqual (data2, num, (ValueType) -256));
  694. FloatVectorOperations::subtract (data1, data2, num);
  695. u.expect (areAllValuesEqual (data1, num, (ValueType) 512));
  696. fillRandomly (random, int1, num);
  697. doConversionTest (u, data1, data2, int1, num);
  698. }
  699. static void doConversionTest (UnitTest& u, float* data1, float* data2, int* const int1, int num)
  700. {
  701. FloatVectorOperations::convertFixedToFloat (data1, int1, 2.0f, num);
  702. convertFixed (data2, int1, 2.0f, num);
  703. u.expect (buffersMatch (data1, data2, num));
  704. }
  705. static void doConversionTest (UnitTest&, double*, double*, int*, int) {}
  706. static void fillRandomly (Random& random, ValueType* d, int num)
  707. {
  708. while (--num >= 0)
  709. *d++ = (ValueType) (random.nextDouble() * 1000.0);
  710. }
  711. static void fillRandomly (Random& random, int* d, int num)
  712. {
  713. while (--num >= 0)
  714. *d++ = random.nextInt();
  715. }
  716. static void convertFixed (float* d, const int* s, ValueType multiplier, int num)
  717. {
  718. while (--num >= 0)
  719. *d++ = *s++ * multiplier;
  720. }
  721. static bool areAllValuesEqual (const ValueType* d, int num, ValueType target)
  722. {
  723. while (--num >= 0)
  724. if (*d++ != target)
  725. return false;
  726. return true;
  727. }
  728. static bool buffersMatch (const ValueType* d1, const ValueType* d2, int num)
  729. {
  730. while (--num >= 0)
  731. if (! valuesMatch (*d1++, *d2++))
  732. return false;
  733. return true;
  734. }
  735. static bool valuesMatch (ValueType v1, ValueType v2)
  736. {
  737. return std::abs (v1 - v2) < std::numeric_limits<ValueType>::epsilon();
  738. }
  739. };
  740. void runTest()
  741. {
  742. beginTest ("FloatVectorOperations");
  743. for (int i = 1000; --i >= 0;)
  744. {
  745. TestRunner<float>::runTest (*this, getRandom());
  746. TestRunner<double>::runTest (*this, getRandom());
  747. }
  748. }
  749. };
  750. static FloatVectorOperationsTests vectorOpTests;
  751. #endif