Audio plugin host https://kx.studio/carla
You can not select more than 25 topics Topics must start with a letter or number, can include dashes ('-') and can be up to 35 characters long.

juce_FloatVectorOperations.cpp 52KB

9 years ago
10 years ago
10 years ago
10 years ago
10 years ago
10 years ago
10 years ago
10 years ago
10 years ago
10 years ago
10 years ago
10 years ago
10 years ago
10 years ago
10 years ago
10 years ago
10 years ago
10 years ago
10 years ago
9 years ago
9 years ago
9 years ago
10 years ago
10 years ago
10 years ago
10 years ago
10 years ago
10 years ago
10 years ago
10 years ago
10 years ago
10 years ago
10 years ago
10 years ago
10 years ago
10 years ago
9 years ago
10 years ago
10 years ago
10 years ago
9 years ago
10 years ago
10 years ago
10 years ago
9 years ago
9 years ago
10 years ago
10 years ago
10 years ago
10 years ago
10 years ago
10 years ago
10 years ago
10 years ago
10 years ago
10 years ago
10 years ago
10 years ago
10 years ago
10 years ago
10 years ago
10 years ago
10 years ago
10 years ago
10 years ago
10 years ago
10 years ago
10 years ago
10 years ago
10 years ago
10 years ago
10 years ago
10 years ago
10 years ago
10 years ago
10 years ago
10 years ago
10 years ago
10 years ago
10 years ago
10 years ago
10 years ago
10 years ago
10 years ago
10 years ago
10 years ago
10 years ago
10 years ago
10 years ago
10 years ago
10 years ago
10 years ago
10 years ago
10 years ago
10 years ago
10 years ago
10 years ago
10 years ago
10 years ago
10 years ago
10 years ago
10 years ago
10 years ago
10 years ago
10 years ago
10 years ago
10 years ago
10 years ago
10 years ago
10 years ago
9 years ago
10 years ago
10 years ago
10 years ago
10 years ago
10 years ago
10 years ago
10 years ago
10 years ago
10 years ago
10 years ago
10 years ago
10 years ago
9 years ago
10 years ago
10 years ago
10 years ago
10 years ago
10 years ago
10 years ago
10 years ago
10 years ago
10 years ago
10 years ago
10 years ago
10 years ago
10 years ago
10 years ago
10 years ago
10 years ago
10 years ago
10 years ago
10 years ago
10 years ago
10 years ago
10 years ago
10 years ago
10 years ago
10 years ago
10 years ago
10 years ago
10 years ago
9 years ago
10 years ago
10 years ago
10 years ago
10 years ago
10 years ago
10 years ago
10 years ago
10 years ago
10 years ago
9 years ago
10 years ago
10 years ago

  1. /*
  2. ==============================================================================
  3. This file is part of the JUCE library.
  4. Copyright (c) 2015 - ROLI Ltd.
  5. Permission is granted to use this software under the terms of either:
  6. a) the GPL v2 (or any later version)
  7. b) the Affero GPL v3
  8. Details of these licenses can be found at: www.gnu.org/licenses
  9. JUCE is distributed in the hope that it will be useful, but WITHOUT ANY
  10. WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS FOR
  11. A PARTICULAR PURPOSE. See the GNU General Public License for more details.
  12. ------------------------------------------------------------------------------
  13. To release a closed-source product which uses JUCE, commercial licenses are
  14. available: visit www.juce.com for more information.
  15. ==============================================================================
  16. */
  17. namespace FloatVectorHelpers
  18. {
  19. #define JUCE_INCREMENT_SRC_DEST dest += (16 / sizeof (*dest)); src += (16 / sizeof (*dest));
  20. #define JUCE_INCREMENT_SRC1_SRC2_DEST dest += (16 / sizeof (*dest)); src1 += (16 / sizeof (*dest)); src2 += (16 / sizeof (*dest));
  21. #define JUCE_INCREMENT_DEST dest += (16 / sizeof (*dest));
  22. #if JUCE_USE_SSE_INTRINSICS
  23. static bool sse2Present = false;
  24. static bool isSSE2Available() noexcept
  25. {
  26. if (sse2Present)
  27. return true;
  28. sse2Present = SystemStats::hasSSE2();
  29. return sse2Present;
  30. }
  31. inline static bool isAligned (const void* p) noexcept
  32. {
  33. return (((pointer_sized_int) p) & 15) == 0;
  34. }
  35. struct BasicOps32
  36. {
  37. typedef float Type;
  38. typedef __m128 ParallelType;
  39. typedef __m128 IntegerType;
  40. enum { numParallel = 4 };
  41. // Integer and parallel types are the same for SSE. On neon they have different types
  42. static forcedinline IntegerType toint (ParallelType v) noexcept { return v; }
  43. static forcedinline ParallelType toflt (IntegerType v) noexcept { return v; }
  44. static forcedinline ParallelType load1 (Type v) noexcept { return _mm_load1_ps (&v); }
  45. static forcedinline ParallelType loadA (const Type* v) noexcept { return _mm_load_ps (v); }
  46. static forcedinline ParallelType loadU (const Type* v) noexcept { return _mm_loadu_ps (v); }
  47. static forcedinline void storeA (Type* dest, ParallelType a) noexcept { _mm_store_ps (dest, a); }
  48. static forcedinline void storeU (Type* dest, ParallelType a) noexcept { _mm_storeu_ps (dest, a); }
  49. static forcedinline ParallelType add (ParallelType a, ParallelType b) noexcept { return _mm_add_ps (a, b); }
  50. static forcedinline ParallelType sub (ParallelType a, ParallelType b) noexcept { return _mm_sub_ps (a, b); }
  51. static forcedinline ParallelType mul (ParallelType a, ParallelType b) noexcept { return _mm_mul_ps (a, b); }
  52. static forcedinline ParallelType max (ParallelType a, ParallelType b) noexcept { return _mm_max_ps (a, b); }
  53. static forcedinline ParallelType min (ParallelType a, ParallelType b) noexcept { return _mm_min_ps (a, b); }
  54. static forcedinline ParallelType bit_and (ParallelType a, ParallelType b) noexcept { return _mm_and_ps (a, b); }
  55. static forcedinline ParallelType bit_not (ParallelType a, ParallelType b) noexcept { return _mm_andnot_ps (a, b); }
  56. static forcedinline ParallelType bit_or (ParallelType a, ParallelType b) noexcept { return _mm_or_ps (a, b); }
  57. static forcedinline ParallelType bit_xor (ParallelType a, ParallelType b) noexcept { return _mm_xor_ps (a, b); }
  58. static forcedinline Type max (ParallelType a) noexcept { Type v[numParallel]; storeU (v, a); return jmax (v[0], v[1], v[2], v[3]); }
  59. static forcedinline Type min (ParallelType a) noexcept { Type v[numParallel]; storeU (v, a); return jmin (v[0], v[1], v[2], v[3]); }
  60. };
  61. struct BasicOps64
  62. {
  63. typedef double Type;
  64. typedef __m128d ParallelType;
  65. typedef __m128d IntegerType;
  66. enum { numParallel = 2 };
  67. // Integer and parallel types are the same for SSE. On neon they have different types
  68. static forcedinline IntegerType toint (ParallelType v) noexcept { return v; }
  69. static forcedinline ParallelType toflt (IntegerType v) noexcept { return v; }
  70. static forcedinline ParallelType load1 (Type v) noexcept { return _mm_load1_pd (&v); }
  71. static forcedinline ParallelType loadA (const Type* v) noexcept { return _mm_load_pd (v); }
  72. static forcedinline ParallelType loadU (const Type* v) noexcept { return _mm_loadu_pd (v); }
  73. static forcedinline void storeA (Type* dest, ParallelType a) noexcept { _mm_store_pd (dest, a); }
  74. static forcedinline void storeU (Type* dest, ParallelType a) noexcept { _mm_storeu_pd (dest, a); }
  75. static forcedinline ParallelType add (ParallelType a, ParallelType b) noexcept { return _mm_add_pd (a, b); }
  76. static forcedinline ParallelType sub (ParallelType a, ParallelType b) noexcept { return _mm_sub_pd (a, b); }
  77. static forcedinline ParallelType mul (ParallelType a, ParallelType b) noexcept { return _mm_mul_pd (a, b); }
  78. static forcedinline ParallelType max (ParallelType a, ParallelType b) noexcept { return _mm_max_pd (a, b); }
  79. static forcedinline ParallelType min (ParallelType a, ParallelType b) noexcept { return _mm_min_pd (a, b); }
  80. static forcedinline ParallelType bit_and (ParallelType a, ParallelType b) noexcept { return _mm_and_pd (a, b); }
  81. static forcedinline ParallelType bit_not (ParallelType a, ParallelType b) noexcept { return _mm_andnot_pd (a, b); }
  82. static forcedinline ParallelType bit_or (ParallelType a, ParallelType b) noexcept { return _mm_or_pd (a, b); }
  83. static forcedinline ParallelType bit_xor (ParallelType a, ParallelType b) noexcept { return _mm_xor_pd (a, b); }
  84. static forcedinline Type max (ParallelType a) noexcept { Type v[numParallel]; storeU (v, a); return jmax (v[0], v[1]); }
  85. static forcedinline Type min (ParallelType a) noexcept { Type v[numParallel]; storeU (v, a); return jmin (v[0], v[1]); }
  86. };
  87. #define JUCE_BEGIN_VEC_OP \
  88. typedef FloatVectorHelpers::ModeType<sizeof(*dest)>::Mode Mode; \
  89. if (FloatVectorHelpers::isSSE2Available()) \
  90. { \
  91. const int numLongOps = num / Mode::numParallel;
  92. #define JUCE_FINISH_VEC_OP(normalOp) \
  93. num &= (Mode::numParallel - 1); \
  94. if (num == 0) return; \
  95. } \
  96. for (int i = 0; i < num; ++i) normalOp;
  97. #define JUCE_PERFORM_VEC_OP_DEST(normalOp, vecOp, locals, setupOp) \
  98. JUCE_BEGIN_VEC_OP \
  99. setupOp \
  100. if (FloatVectorHelpers::isAligned (dest)) JUCE_VEC_LOOP (vecOp, dummy, Mode::loadA, Mode::storeA, locals, JUCE_INCREMENT_DEST) \
  101. else JUCE_VEC_LOOP (vecOp, dummy, Mode::loadU, Mode::storeU, locals, JUCE_INCREMENT_DEST) \
  102. JUCE_FINISH_VEC_OP (normalOp)
  103. #define JUCE_PERFORM_VEC_OP_SRC_DEST(normalOp, vecOp, locals, increment, setupOp) \
  104. JUCE_BEGIN_VEC_OP \
  105. setupOp \
  106. if (FloatVectorHelpers::isAligned (dest)) \
  107. { \
  108. if (FloatVectorHelpers::isAligned (src)) JUCE_VEC_LOOP (vecOp, Mode::loadA, Mode::loadA, Mode::storeA, locals, increment) \
  109. else JUCE_VEC_LOOP (vecOp, Mode::loadU, Mode::loadA, Mode::storeA, locals, increment) \
  110. }\
  111. else \
  112. { \
  113. if (FloatVectorHelpers::isAligned (src)) JUCE_VEC_LOOP (vecOp, Mode::loadA, Mode::loadU, Mode::storeU, locals, increment) \
  114. else JUCE_VEC_LOOP (vecOp, Mode::loadU, Mode::loadU, Mode::storeU, locals, increment) \
  115. } \
  116. JUCE_FINISH_VEC_OP (normalOp)
  117. #define JUCE_PERFORM_VEC_OP_SRC1_SRC2_DEST(normalOp, vecOp, locals, increment, setupOp) \
  118. JUCE_BEGIN_VEC_OP \
  119. setupOp \
  120. if (FloatVectorHelpers::isAligned (dest)) \
  121. { \
  122. if (FloatVectorHelpers::isAligned (src1)) \
  123. { \
  124. if (FloatVectorHelpers::isAligned (src2)) JUCE_VEC_LOOP_TWO_SOURCES (vecOp, Mode::loadA, Mode::loadA, Mode::storeA, locals, increment) \
  125. else JUCE_VEC_LOOP_TWO_SOURCES (vecOp, Mode::loadA, Mode::loadU, Mode::storeA, locals, increment) \
  126. } \
  127. else \
  128. { \
  129. if (FloatVectorHelpers::isAligned (src2)) JUCE_VEC_LOOP_TWO_SOURCES (vecOp, Mode::loadU, Mode::loadA, Mode::storeA, locals, increment) \
  130. else JUCE_VEC_LOOP_TWO_SOURCES (vecOp, Mode::loadU, Mode::loadU, Mode::storeA, locals, increment) \
  131. } \
  132. } \
  133. else \
  134. { \
  135. if (FloatVectorHelpers::isAligned (src1)) \
  136. { \
  137. if (FloatVectorHelpers::isAligned (src2)) JUCE_VEC_LOOP_TWO_SOURCES (vecOp, Mode::loadA, Mode::loadA, Mode::storeU, locals, increment) \
  138. else JUCE_VEC_LOOP_TWO_SOURCES (vecOp, Mode::loadA, Mode::loadU, Mode::storeU, locals, increment) \
  139. } \
  140. else \
  141. { \
  142. if (FloatVectorHelpers::isAligned (src2)) JUCE_VEC_LOOP_TWO_SOURCES (vecOp, Mode::loadU, Mode::loadA, Mode::storeU, locals, increment) \
  143. else JUCE_VEC_LOOP_TWO_SOURCES (vecOp, Mode::loadU, Mode::loadU, Mode::storeU, locals, increment) \
  144. } \
  145. } \
  146. JUCE_FINISH_VEC_OP (normalOp)
  147. #define JUCE_PERFORM_VEC_OP_SRC1_SRC2_DEST_DEST(normalOp, vecOp, locals, increment, setupOp) \
  148. JUCE_BEGIN_VEC_OP \
  149. setupOp \
  150. if (FloatVectorHelpers::isAligned (dest)) \
  151. { \
  152. if (FloatVectorHelpers::isAligned (src1)) \
  153. { \
  154. if (FloatVectorHelpers::isAligned (src2)) JUCE_VEC_LOOP_TWO_SOURCES_WITH_DEST_LOAD (vecOp, Mode::loadA, Mode::loadA, Mode::loadA, Mode::storeA, locals, increment) \
  155. else JUCE_VEC_LOOP_TWO_SOURCES_WITH_DEST_LOAD (vecOp, Mode::loadA, Mode::loadU, Mode::loadA, Mode::storeA, locals, increment) \
  156. } \
  157. else \
  158. { \
  159. if (FloatVectorHelpers::isAligned (src2)) JUCE_VEC_LOOP_TWO_SOURCES_WITH_DEST_LOAD (vecOp, Mode::loadU, Mode::loadA, Mode::loadA, Mode::storeA, locals, increment) \
  160. else JUCE_VEC_LOOP_TWO_SOURCES_WITH_DEST_LOAD (vecOp, Mode::loadU, Mode::loadU, Mode::loadA, Mode::storeA, locals, increment) \
  161. } \
  162. } \
  163. else \
  164. { \
  165. if (FloatVectorHelpers::isAligned (src1)) \
  166. { \
  167. if (FloatVectorHelpers::isAligned (src2)) JUCE_VEC_LOOP_TWO_SOURCES_WITH_DEST_LOAD (vecOp, Mode::loadA, Mode::loadA, Mode::loadU, Mode::storeU, locals, increment) \
  168. else JUCE_VEC_LOOP_TWO_SOURCES_WITH_DEST_LOAD (vecOp, Mode::loadA, Mode::loadU, Mode::loadU, Mode::storeU, locals, increment) \
  169. } \
  170. else \
  171. { \
  172. if (FloatVectorHelpers::isAligned (src2)) JUCE_VEC_LOOP_TWO_SOURCES_WITH_DEST_LOAD (vecOp, Mode::loadU, Mode::loadA, Mode::loadU, Mode::storeU, locals, increment) \
  173. else JUCE_VEC_LOOP_TWO_SOURCES_WITH_DEST_LOAD (vecOp, Mode::loadU, Mode::loadU, Mode::loadU, Mode::storeU, locals, increment) \
  174. } \
  175. } \
  176. JUCE_FINISH_VEC_OP (normalOp)
  177. //==============================================================================
  178. #elif JUCE_USE_ARM_NEON
  179. struct BasicOps32
  180. {
  181. typedef float Type;
  182. typedef float32x4_t ParallelType;
  183. typedef uint32x4_t IntegerType;
  184. enum { numParallel = 4 };
  185. static forcedinline IntegerType toint (ParallelType v) noexcept { union { ParallelType f; IntegerType i; } u; u.f = v; return u.i; }
  186. static forcedinline ParallelType toflt (IntegerType v) noexcept { union { ParallelType f; IntegerType i; } u; u.i = v; return u.f; }
  187. static forcedinline ParallelType load1 (Type v) noexcept { return vld1q_dup_f32 (&v); }
  188. static forcedinline ParallelType loadA (const Type* v) noexcept { return vld1q_f32 (v); }
  189. static forcedinline ParallelType loadU (const Type* v) noexcept { return vld1q_f32 (v); }
  190. static forcedinline void storeA (Type* dest, ParallelType a) noexcept { vst1q_f32 (dest, a); }
  191. static forcedinline void storeU (Type* dest, ParallelType a) noexcept { vst1q_f32 (dest, a); }
  192. static forcedinline ParallelType add (ParallelType a, ParallelType b) noexcept { return vaddq_f32 (a, b); }
  193. static forcedinline ParallelType sub (ParallelType a, ParallelType b) noexcept { return vsubq_f32 (a, b); }
  194. static forcedinline ParallelType mul (ParallelType a, ParallelType b) noexcept { return vmulq_f32 (a, b); }
  195. static forcedinline ParallelType max (ParallelType a, ParallelType b) noexcept { return vmaxq_f32 (a, b); }
  196. static forcedinline ParallelType min (ParallelType a, ParallelType b) noexcept { return vminq_f32 (a, b); }
  197. static forcedinline ParallelType bit_and (ParallelType a, ParallelType b) noexcept { return toflt (vandq_u32 (toint (a), toint (b))); }
  198. static forcedinline ParallelType bit_not (ParallelType a, ParallelType b) noexcept { return toflt (vbicq_u32 (toint (a), toint (b))); }
  199. static forcedinline ParallelType bit_or (ParallelType a, ParallelType b) noexcept { return toflt (vorrq_u32 (toint (a), toint (b))); }
  200. static forcedinline ParallelType bit_xor (ParallelType a, ParallelType b) noexcept { return toflt (veorq_u32 (toint (a), toint (b))); }
  201. static forcedinline Type max (ParallelType a) noexcept { Type v[numParallel]; storeU (v, a); return jmax (v[0], v[1], v[2], v[3]); }
  202. static forcedinline Type min (ParallelType a) noexcept { Type v[numParallel]; storeU (v, a); return jmin (v[0], v[1], v[2], v[3]); }
  203. };
  204. struct BasicOps64
  205. {
  206. typedef double Type;
  207. typedef double ParallelType;
  208. typedef uint64 IntegerType;
  209. enum { numParallel = 1 };
  210. static forcedinline IntegerType toint (ParallelType v) noexcept { union { ParallelType f; IntegerType i; } u; u.f = v; return u.i; }
  211. static forcedinline ParallelType toflt (IntegerType v) noexcept { union { ParallelType f; IntegerType i; } u; u.i = v; return u.f; }
  212. static forcedinline ParallelType load1 (Type v) noexcept { return v; }
  213. static forcedinline ParallelType loadA (const Type* v) noexcept { return *v; }
  214. static forcedinline ParallelType loadU (const Type* v) noexcept { return *v; }
  215. static forcedinline void storeA (Type* dest, ParallelType a) noexcept { *dest = a; }
  216. static forcedinline void storeU (Type* dest, ParallelType a) noexcept { *dest = a; }
  217. static forcedinline ParallelType add (ParallelType a, ParallelType b) noexcept { return a + b; }
  218. static forcedinline ParallelType sub (ParallelType a, ParallelType b) noexcept { return a - b; }
  219. static forcedinline ParallelType mul (ParallelType a, ParallelType b) noexcept { return a * b; }
  220. static forcedinline ParallelType max (ParallelType a, ParallelType b) noexcept { return jmax (a, b); }
  221. static forcedinline ParallelType min (ParallelType a, ParallelType b) noexcept { return jmin (a, b); }
  222. static forcedinline ParallelType bit_and (ParallelType a, ParallelType b) noexcept { return toflt (toint (a) & toint (b)); }
  223. static forcedinline ParallelType bit_not (ParallelType a, ParallelType b) noexcept { return toflt ((~toint (a)) & toint (b)); }
  224. static forcedinline ParallelType bit_or (ParallelType a, ParallelType b) noexcept { return toflt (toint (a) | toint (b)); }
  225. static forcedinline ParallelType bit_xor (ParallelType a, ParallelType b) noexcept { return toflt (toint (a) ^ toint (b)); }
  226. static forcedinline Type max (ParallelType a) noexcept { return a; }
  227. static forcedinline Type min (ParallelType a) noexcept { return a; }
  228. };
  229. #define JUCE_BEGIN_VEC_OP \
  230. typedef FloatVectorHelpers::ModeType<sizeof(*dest)>::Mode Mode; \
  231. if (Mode::numParallel > 1) \
  232. { \
  233. const int numLongOps = num / Mode::numParallel;
  234. #define JUCE_FINISH_VEC_OP(normalOp) \
  235. num &= (Mode::numParallel - 1); \
  236. if (num == 0) return; \
  237. } \
  238. for (int i = 0; i < num; ++i) normalOp;
  239. #define JUCE_PERFORM_VEC_OP_DEST(normalOp, vecOp, locals, setupOp) \
  240. JUCE_BEGIN_VEC_OP \
  241. setupOp \
  242. JUCE_VEC_LOOP (vecOp, dummy, Mode::loadU, Mode::storeU, locals, JUCE_INCREMENT_DEST) \
  243. JUCE_FINISH_VEC_OP (normalOp)
  244. #define JUCE_PERFORM_VEC_OP_SRC_DEST(normalOp, vecOp, locals, increment, setupOp) \
  245. JUCE_BEGIN_VEC_OP \
  246. setupOp \
  247. JUCE_VEC_LOOP (vecOp, Mode::loadU, Mode::loadU, Mode::storeU, locals, increment) \
  248. JUCE_FINISH_VEC_OP (normalOp)
  249. #define JUCE_PERFORM_VEC_OP_SRC1_SRC2_DEST(normalOp, vecOp, locals, increment, setupOp) \
  250. JUCE_BEGIN_VEC_OP \
  251. setupOp \
  252. JUCE_VEC_LOOP_TWO_SOURCES (vecOp, Mode::loadU, Mode::loadU, Mode::storeU, locals, increment) \
  253. JUCE_FINISH_VEC_OP (normalOp)
  254. #define JUCE_PERFORM_VEC_OP_SRC1_SRC2_DEST_DEST(normalOp, vecOp, locals, increment, setupOp) \
  255. JUCE_BEGIN_VEC_OP \
  256. setupOp \
  257. JUCE_VEC_LOOP_TWO_SOURCES_WITH_DEST_LOAD (vecOp, Mode::loadU, Mode::loadU, Mode::loadU, Mode::storeU, locals, increment) \
  258. JUCE_FINISH_VEC_OP (normalOp)
  259. //==============================================================================
  260. #else
  261. #define JUCE_PERFORM_VEC_OP_DEST(normalOp, vecOp, locals, setupOp) \
  262. for (int i = 0; i < num; ++i) normalOp;
  263. #define JUCE_PERFORM_VEC_OP_SRC_DEST(normalOp, vecOp, locals, increment, setupOp) \
  264. for (int i = 0; i < num; ++i) normalOp;
  265. #define JUCE_PERFORM_VEC_OP_SRC1_SRC2_DEST(normalOp, vecOp, locals, increment, setupOp) \
  266. for (int i = 0; i < num; ++i) normalOp;
  267. #define JUCE_PERFORM_VEC_OP_SRC1_SRC2_DEST_DEST(normalOp, vecOp, locals, increment, setupOp) \
  268. for (int i = 0; i < num; ++i) normalOp;
  269. #endif
  270. //==============================================================================
  271. #define JUCE_VEC_LOOP(vecOp, srcLoad, dstLoad, dstStore, locals, increment) \
  272. for (int i = 0; i < numLongOps; ++i) \
  273. { \
  274. locals (srcLoad, dstLoad); \
  275. dstStore (dest, vecOp); \
  276. increment; \
  277. }
  278. #define JUCE_VEC_LOOP_TWO_SOURCES(vecOp, src1Load, src2Load, dstStore, locals, increment) \
  279. for (int i = 0; i < numLongOps; ++i) \
  280. { \
  281. locals (src1Load, src2Load); \
  282. dstStore (dest, vecOp); \
  283. increment; \
  284. }
  285. #define JUCE_VEC_LOOP_TWO_SOURCES_WITH_DEST_LOAD(vecOp, src1Load, src2Load, dstLoad, dstStore, locals, increment) \
  286. for (int i = 0; i < numLongOps; ++i) \
  287. { \
  288. locals (src1Load, src2Load, dstLoad); \
  289. dstStore (dest, vecOp); \
  290. increment; \
  291. }
  292. #define JUCE_LOAD_NONE(srcLoad, dstLoad)
  293. #define JUCE_LOAD_DEST(srcLoad, dstLoad) const Mode::ParallelType d = dstLoad (dest);
  294. #define JUCE_LOAD_SRC(srcLoad, dstLoad) const Mode::ParallelType s = srcLoad (src);
  295. #define JUCE_LOAD_SRC1_SRC2(src1Load, src2Load) const Mode::ParallelType s1 = src1Load (src1), s2 = src2Load (src2);
  296. #define JUCE_LOAD_SRC1_SRC2_DEST(src1Load, src2Load, dstLoad) const Mode::ParallelType d = dstLoad (dest), s1 = src1Load (src1), s2 = src2Load (src2);
  297. #define JUCE_LOAD_SRC_DEST(srcLoad, dstLoad) const Mode::ParallelType d = dstLoad (dest), s = srcLoad (src);
  298. #if JUCE_USE_SSE_INTRINSICS || JUCE_USE_ARM_NEON
  299. template<int typeSize> struct ModeType { typedef BasicOps32 Mode; };
  300. template<> struct ModeType<8> { typedef BasicOps64 Mode; };
  301. template <typename Mode>
  302. struct MinMax
  303. {
  304. typedef typename Mode::Type Type;
  305. typedef typename Mode::ParallelType ParallelType;
  306. static Type findMinOrMax (const Type* src, int num, const bool isMinimum) noexcept
  307. {
  308. int numLongOps = num / Mode::numParallel;
  309. #if JUCE_USE_SSE_INTRINSICS
  310. if (numLongOps > 1 && isSSE2Available())
  311. #else
  312. if (numLongOps > 1)
  313. #endif
  314. {
  315. ParallelType val;
  316. #if ! JUCE_USE_ARM_NEON
  317. if (isAligned (src))
  318. {
  319. val = Mode::loadA (src);
  320. if (isMinimum)
  321. {
  322. while (--numLongOps > 0)
  323. {
  324. src += Mode::numParallel;
  325. val = Mode::min (val, Mode::loadA (src));
  326. }
  327. }
  328. else
  329. {
  330. while (--numLongOps > 0)
  331. {
  332. src += Mode::numParallel;
  333. val = Mode::max (val, Mode::loadA (src));
  334. }
  335. }
  336. }
  337. else
  338. #endif
  339. {
  340. val = Mode::loadU (src);
  341. if (isMinimum)
  342. {
  343. while (--numLongOps > 0)
  344. {
  345. src += Mode::numParallel;
  346. val = Mode::min (val, Mode::loadU (src));
  347. }
  348. }
  349. else
  350. {
  351. while (--numLongOps > 0)
  352. {
  353. src += Mode::numParallel;
  354. val = Mode::max (val, Mode::loadU (src));
  355. }
  356. }
  357. }
  358. Type result = isMinimum ? Mode::min (val)
  359. : Mode::max (val);
  360. num &= (Mode::numParallel - 1);
  361. src += Mode::numParallel;
  362. for (int i = 0; i < num; ++i)
  363. result = isMinimum ? jmin (result, src[i])
  364. : jmax (result, src[i]);
  365. return result;
  366. }
  367. return isMinimum ? juce::findMinimum (src, num)
  368. : juce::findMaximum (src, num);
  369. }
  370. static Range<Type> findMinAndMax (const Type* src, int num) noexcept
  371. {
  372. int numLongOps = num / Mode::numParallel;
  373. #if JUCE_USE_SSE_INTRINSICS
  374. if (numLongOps > 1 && isSSE2Available())
  375. #else
  376. if (numLongOps > 1)
  377. #endif
  378. {
  379. ParallelType mn, mx;
  380. #if ! JUCE_USE_ARM_NEON
  381. if (isAligned (src))
  382. {
  383. mn = Mode::loadA (src);
  384. mx = mn;
  385. while (--numLongOps > 0)
  386. {
  387. src += Mode::numParallel;
  388. const ParallelType v = Mode::loadA (src);
  389. mn = Mode::min (mn, v);
  390. mx = Mode::max (mx, v);
  391. }
  392. }
  393. else
  394. #endif
  395. {
  396. mn = Mode::loadU (src);
  397. mx = mn;
  398. while (--numLongOps > 0)
  399. {
  400. src += Mode::numParallel;
  401. const ParallelType v = Mode::loadU (src);
  402. mn = Mode::min (mn, v);
  403. mx = Mode::max (mx, v);
  404. }
  405. }
  406. Range<Type> result (Mode::min (mn),
  407. Mode::max (mx));
  408. num &= (Mode::numParallel - 1);
  409. src += Mode::numParallel;
  410. for (int i = 0; i < num; ++i)
  411. result = result.getUnionWith (src[i]);
  412. return result;
  413. }
  414. return Range<Type>::findMinAndMax (src, num);
  415. }
  416. };
  417. #endif
  418. }
  419. //==============================================================================
  420. void JUCE_CALLTYPE FloatVectorOperations::clear (float* dest, int num) noexcept
  421. {
  422. #if JUCE_USE_VDSP_FRAMEWORK
  423. vDSP_vclr (dest, 1, (size_t) num);
  424. #else
  425. zeromem (dest, (size_t) num * sizeof (float));
  426. #endif
  427. }
  428. void JUCE_CALLTYPE FloatVectorOperations::clear (double* dest, int num) noexcept
  429. {
  430. #if JUCE_USE_VDSP_FRAMEWORK
  431. vDSP_vclrD (dest, 1, (size_t) num);
  432. #else
  433. zeromem (dest, (size_t) num * sizeof (double));
  434. #endif
  435. }
  436. void JUCE_CALLTYPE FloatVectorOperations::fill (float* dest, float valueToFill, int num) noexcept
  437. {
  438. #if JUCE_USE_VDSP_FRAMEWORK
  439. vDSP_vfill (&valueToFill, dest, 1, (size_t) num);
  440. #else
  441. JUCE_PERFORM_VEC_OP_DEST (dest[i] = valueToFill, val, JUCE_LOAD_NONE,
  442. const Mode::ParallelType val = Mode::load1 (valueToFill);)
  443. #endif
  444. }
  445. void JUCE_CALLTYPE FloatVectorOperations::fill (double* dest, double valueToFill, int num) noexcept
  446. {
  447. #if JUCE_USE_VDSP_FRAMEWORK
  448. vDSP_vfillD (&valueToFill, dest, 1, (size_t) num);
  449. #else
  450. JUCE_PERFORM_VEC_OP_DEST (dest[i] = valueToFill, val, JUCE_LOAD_NONE,
  451. const Mode::ParallelType val = Mode::load1 (valueToFill);)
  452. #endif
  453. }
  454. void JUCE_CALLTYPE FloatVectorOperations::copy (float* dest, const float* src, int num) noexcept
  455. {
  456. memcpy (dest, src, (size_t) num * sizeof (float));
  457. }
  458. void JUCE_CALLTYPE FloatVectorOperations::copy (double* dest, const double* src, int num) noexcept
  459. {
  460. memcpy (dest, src, (size_t) num * sizeof (double));
  461. }
  462. void JUCE_CALLTYPE FloatVectorOperations::copyWithMultiply (float* dest, const float* src, float multiplier, int num) noexcept
  463. {
  464. #if JUCE_USE_VDSP_FRAMEWORK
  465. vDSP_vsmul (src, 1, &multiplier, dest, 1, (vDSP_Length) num);
  466. #else
  467. JUCE_PERFORM_VEC_OP_SRC_DEST (dest[i] = src[i] * multiplier, Mode::mul (mult, s),
  468. JUCE_LOAD_SRC, JUCE_INCREMENT_SRC_DEST,
  469. const Mode::ParallelType mult = Mode::load1 (multiplier);)
  470. #endif
  471. }
  472. void JUCE_CALLTYPE FloatVectorOperations::copyWithMultiply (double* dest, const double* src, double multiplier, int num) noexcept
  473. {
  474. #if JUCE_USE_VDSP_FRAMEWORK
  475. vDSP_vsmulD (src, 1, &multiplier, dest, 1, (vDSP_Length) num);
  476. #else
  477. JUCE_PERFORM_VEC_OP_SRC_DEST (dest[i] = src[i] * multiplier, Mode::mul (mult, s),
  478. JUCE_LOAD_SRC, JUCE_INCREMENT_SRC_DEST,
  479. const Mode::ParallelType mult = Mode::load1 (multiplier);)
  480. #endif
  481. }
  482. void JUCE_CALLTYPE FloatVectorOperations::add (float* dest, float amount, int num) noexcept
  483. {
  484. #if JUCE_USE_VDSP_FRAMEWORK
  485. vDSP_vsadd (dest, 1, &amount, dest, 1, (vDSP_Length) num);
  486. #else
  487. JUCE_PERFORM_VEC_OP_DEST (dest[i] += amount, Mode::add (d, amountToAdd), JUCE_LOAD_DEST,
  488. const Mode::ParallelType amountToAdd = Mode::load1 (amount);)
  489. #endif
  490. }
  491. void JUCE_CALLTYPE FloatVectorOperations::add (double* dest, double amount, int num) noexcept
  492. {
  493. JUCE_PERFORM_VEC_OP_DEST (dest[i] += amount, Mode::add (d, amountToAdd), JUCE_LOAD_DEST,
  494. const Mode::ParallelType amountToAdd = Mode::load1 (amount);)
  495. }
  496. void JUCE_CALLTYPE FloatVectorOperations::add (float* dest, float* src, float amount, int num) noexcept
  497. {
  498. #if JUCE_USE_VDSP_FRAMEWORK
  499. vDSP_vsadd (src, 1, &amount, dest, 1, (vDSP_Length) num);
  500. #else
  501. JUCE_PERFORM_VEC_OP_SRC_DEST (dest[i] = src[i] + amount, Mode::add (am, s),
  502. JUCE_LOAD_SRC, JUCE_INCREMENT_SRC_DEST,
  503. const Mode::ParallelType am = Mode::load1 (amount);)
  504. #endif
  505. }
  506. void JUCE_CALLTYPE FloatVectorOperations::add (double* dest, double* src, double amount, int num) noexcept
  507. {
  508. #if JUCE_USE_VDSP_FRAMEWORK
  509. vDSP_vsaddD (src, 1, &amount, dest, 1, (vDSP_Length) num);
  510. #else
  511. JUCE_PERFORM_VEC_OP_SRC_DEST (dest[i] = src[i] + amount, Mode::add (am, s),
  512. JUCE_LOAD_SRC, JUCE_INCREMENT_SRC_DEST,
  513. const Mode::ParallelType am = Mode::load1 (amount);)
  514. #endif
  515. }
  516. void JUCE_CALLTYPE FloatVectorOperations::add (float* dest, const float* src, int num) noexcept
  517. {
  518. #if JUCE_USE_VDSP_FRAMEWORK
  519. vDSP_vadd (src, 1, dest, 1, dest, 1, (vDSP_Length) num);
  520. #else
  521. JUCE_PERFORM_VEC_OP_SRC_DEST (dest[i] += src[i], Mode::add (d, s), JUCE_LOAD_SRC_DEST, JUCE_INCREMENT_SRC_DEST, )
  522. #endif
  523. }
  524. void JUCE_CALLTYPE FloatVectorOperations::add (double* dest, const double* src, int num) noexcept
  525. {
  526. #if JUCE_USE_VDSP_FRAMEWORK
  527. vDSP_vaddD (src, 1, dest, 1, dest, 1, (vDSP_Length) num);
  528. #else
  529. JUCE_PERFORM_VEC_OP_SRC_DEST (dest[i] += src[i], Mode::add (d, s), JUCE_LOAD_SRC_DEST, JUCE_INCREMENT_SRC_DEST, )
  530. #endif
  531. }
  532. void JUCE_CALLTYPE FloatVectorOperations::add (float* dest, const float* src1, const float* src2, int num) noexcept
  533. {
  534. #if JUCE_USE_VDSP_FRAMEWORK
  535. vDSP_vadd (src1, 1, src2, 1, dest, 1, (vDSP_Length) num);
  536. #else
  537. JUCE_PERFORM_VEC_OP_SRC1_SRC2_DEST (dest[i] = src1[i] + src2[i], Mode::add (s1, s2), JUCE_LOAD_SRC1_SRC2, JUCE_INCREMENT_SRC1_SRC2_DEST, )
  538. #endif
  539. }
  540. void JUCE_CALLTYPE FloatVectorOperations::add (double* dest, const double* src1, const double* src2, int num) noexcept
  541. {
  542. #if JUCE_USE_VDSP_FRAMEWORK
  543. vDSP_vaddD (src1, 1, src2, 1, dest, 1, (vDSP_Length) num);
  544. #else
  545. JUCE_PERFORM_VEC_OP_SRC1_SRC2_DEST (dest[i] = src1[i] + src2[i], Mode::add (s1, s2), JUCE_LOAD_SRC1_SRC2, JUCE_INCREMENT_SRC1_SRC2_DEST, )
  546. #endif
  547. }
  548. void JUCE_CALLTYPE FloatVectorOperations::subtract (float* dest, const float* src, int num) noexcept
  549. {
  550. #if JUCE_USE_VDSP_FRAMEWORK
  551. vDSP_vsub (src, 1, dest, 1, dest, 1, (vDSP_Length) num);
  552. #else
  553. JUCE_PERFORM_VEC_OP_SRC_DEST (dest[i] -= src[i], Mode::sub (d, s), JUCE_LOAD_SRC_DEST, JUCE_INCREMENT_SRC_DEST, )
  554. #endif
  555. }
  556. void JUCE_CALLTYPE FloatVectorOperations::subtract (double* dest, const double* src, int num) noexcept
  557. {
  558. #if JUCE_USE_VDSP_FRAMEWORK
  559. vDSP_vsubD (src, 1, dest, 1, dest, 1, (vDSP_Length) num);
  560. #else
  561. JUCE_PERFORM_VEC_OP_SRC_DEST (dest[i] -= src[i], Mode::sub (d, s), JUCE_LOAD_SRC_DEST, JUCE_INCREMENT_SRC_DEST, )
  562. #endif
  563. }
  564. void JUCE_CALLTYPE FloatVectorOperations::subtract (float* dest, const float* src1, const float* src2, int num) noexcept
  565. {
  566. #if JUCE_USE_VDSP_FRAMEWORK
  567. vDSP_vsub (src2, 1, src1, 1, dest, 1, (vDSP_Length) num);
  568. #else
  569. JUCE_PERFORM_VEC_OP_SRC1_SRC2_DEST (dest[i] = src1[i] - src2[i], Mode::sub (s1, s2), JUCE_LOAD_SRC1_SRC2, JUCE_INCREMENT_SRC1_SRC2_DEST, )
  570. #endif
  571. }
  572. void JUCE_CALLTYPE FloatVectorOperations::subtract (double* dest, const double* src1, const double* src2, int num) noexcept
  573. {
  574. #if JUCE_USE_VDSP_FRAMEWORK
  575. vDSP_vsubD (src2, 1, src1, 1, dest, 1, (vDSP_Length) num);
  576. #else
  577. JUCE_PERFORM_VEC_OP_SRC1_SRC2_DEST (dest[i] = src1[i] - src2[i], Mode::sub (s1, s2), JUCE_LOAD_SRC1_SRC2, JUCE_INCREMENT_SRC1_SRC2_DEST, )
  578. #endif
  579. }
  580. void JUCE_CALLTYPE FloatVectorOperations::addWithMultiply (float* dest, const float* src, float multiplier, int num) noexcept
  581. {
  582. #if JUCE_USE_VDSP_FRAMEWORK
  583. vDSP_vsma (src, 1, &multiplier, dest, 1, dest, 1, (vDSP_Length) num);
  584. #else
  585. JUCE_PERFORM_VEC_OP_SRC_DEST (dest[i] += src[i] * multiplier, Mode::add (d, Mode::mul (mult, s)),
  586. JUCE_LOAD_SRC_DEST, JUCE_INCREMENT_SRC_DEST,
  587. const Mode::ParallelType mult = Mode::load1 (multiplier);)
  588. #endif
  589. }
  590. void JUCE_CALLTYPE FloatVectorOperations::addWithMultiply (double* dest, const double* src, double multiplier, int num) noexcept
  591. {
  592. JUCE_PERFORM_VEC_OP_SRC_DEST (dest[i] += src[i] * multiplier, Mode::add (d, Mode::mul (mult, s)),
  593. JUCE_LOAD_SRC_DEST, JUCE_INCREMENT_SRC_DEST,
  594. const Mode::ParallelType mult = Mode::load1 (multiplier);)
  595. }
  596. void JUCE_CALLTYPE FloatVectorOperations::addWithMultiply (float* dest, const float* src1, const float* src2, int num) noexcept
  597. {
  598. #if JUCE_USE_VDSP_FRAMEWORK
  599. vDSP_vma ((float*) src1, 1, (float*) src2, 1, dest, 1, dest, 1, (vDSP_Length) num);
  600. #else
  601. JUCE_PERFORM_VEC_OP_SRC1_SRC2_DEST_DEST (dest[i] += src1[i] * src2[i], Mode::add (d, Mode::mul (s1, s2)),
  602. JUCE_LOAD_SRC1_SRC2_DEST,
  603. JUCE_INCREMENT_SRC1_SRC2_DEST, )
  604. #endif
  605. }
  606. void JUCE_CALLTYPE FloatVectorOperations::addWithMultiply (double* dest, const double* src1, const double* src2, int num) noexcept
  607. {
  608. #if JUCE_USE_VDSP_FRAMEWORK
  609. vDSP_vmaD ((double*) src1, 1, (double*) src2, 1, dest, 1, dest, 1, (vDSP_Length) num);
  610. #else
  611. JUCE_PERFORM_VEC_OP_SRC1_SRC2_DEST_DEST (dest[i] += src1[i] * src2[i], Mode::add (d, Mode::mul (s1, s2)),
  612. JUCE_LOAD_SRC1_SRC2_DEST,
  613. JUCE_INCREMENT_SRC1_SRC2_DEST, )
  614. #endif
  615. }
  616. void JUCE_CALLTYPE FloatVectorOperations::multiply (float* dest, const float* src, int num) noexcept
  617. {
  618. #if JUCE_USE_VDSP_FRAMEWORK
  619. vDSP_vmul (src, 1, dest, 1, dest, 1, (vDSP_Length) num);
  620. #else
  621. JUCE_PERFORM_VEC_OP_SRC_DEST (dest[i] *= src[i], Mode::mul (d, s), JUCE_LOAD_SRC_DEST, JUCE_INCREMENT_SRC_DEST, )
  622. #endif
  623. }
  624. void JUCE_CALLTYPE FloatVectorOperations::multiply (double* dest, const double* src, int num) noexcept
  625. {
  626. #if JUCE_USE_VDSP_FRAMEWORK
  627. vDSP_vmulD (src, 1, dest, 1, dest, 1, (vDSP_Length) num);
  628. #else
  629. JUCE_PERFORM_VEC_OP_SRC_DEST (dest[i] *= src[i], Mode::mul (d, s), JUCE_LOAD_SRC_DEST, JUCE_INCREMENT_SRC_DEST, )
  630. #endif
  631. }
  632. void JUCE_CALLTYPE FloatVectorOperations::multiply (float* dest, const float* src1, const float* src2, int num) noexcept
  633. {
  634. #if JUCE_USE_VDSP_FRAMEWORK
  635. vDSP_vmul (src1, 1, src2, 1, dest, 1, (vDSP_Length) num);
  636. #else
  637. JUCE_PERFORM_VEC_OP_SRC1_SRC2_DEST (dest[i] = src1[i] * src2[i], Mode::mul (s1, s2), JUCE_LOAD_SRC1_SRC2, JUCE_INCREMENT_SRC1_SRC2_DEST, )
  638. #endif
  639. }
  640. void JUCE_CALLTYPE FloatVectorOperations::multiply (double* dest, const double* src1, const double* src2, int num) noexcept
  641. {
  642. #if JUCE_USE_VDSP_FRAMEWORK
  643. vDSP_vmulD (src1, 1, src2, 1, dest, 1, (vDSP_Length) num);
  644. #else
  645. JUCE_PERFORM_VEC_OP_SRC1_SRC2_DEST (dest[i] = src1[i] * src2[i], Mode::mul (s1, s2), JUCE_LOAD_SRC1_SRC2, JUCE_INCREMENT_SRC1_SRC2_DEST, )
  646. #endif
  647. }
  648. void JUCE_CALLTYPE FloatVectorOperations::multiply (float* dest, float multiplier, int num) noexcept
  649. {
  650. #if JUCE_USE_VDSP_FRAMEWORK
  651. vDSP_vsmul (dest, 1, &multiplier, dest, 1, (vDSP_Length) num);
  652. #else
  653. JUCE_PERFORM_VEC_OP_DEST (dest[i] *= multiplier, Mode::mul (d, mult), JUCE_LOAD_DEST,
  654. const Mode::ParallelType mult = Mode::load1 (multiplier);)
  655. #endif
  656. }
  657. void JUCE_CALLTYPE FloatVectorOperations::multiply (double* dest, double multiplier, int num) noexcept
  658. {
  659. #if JUCE_USE_VDSP_FRAMEWORK
  660. vDSP_vsmulD (dest, 1, &multiplier, dest, 1, (vDSP_Length) num);
  661. #else
  662. JUCE_PERFORM_VEC_OP_DEST (dest[i] *= multiplier, Mode::mul (d, mult), JUCE_LOAD_DEST,
  663. const Mode::ParallelType mult = Mode::load1 (multiplier);)
  664. #endif
  665. }
  666. void JUCE_CALLTYPE FloatVectorOperations::multiply (float* dest, const float* src, float multiplier, int num) noexcept
  667. {
  668. JUCE_PERFORM_VEC_OP_SRC_DEST (dest[i] = src[i] * multiplier, Mode::mul (mult, s),
  669. JUCE_LOAD_SRC, JUCE_INCREMENT_SRC_DEST,
  670. const Mode::ParallelType mult = Mode::load1 (multiplier);)
  671. }
  672. void JUCE_CALLTYPE FloatVectorOperations::multiply (double* dest, const double* src, double multiplier, int num) noexcept
  673. {
  674. JUCE_PERFORM_VEC_OP_SRC_DEST (dest[i] = src[i] * multiplier, Mode::mul (mult, s),
  675. JUCE_LOAD_SRC, JUCE_INCREMENT_SRC_DEST,
  676. const Mode::ParallelType mult = Mode::load1 (multiplier);)
  677. }
  678. void FloatVectorOperations::negate (float* dest, const float* src, int num) noexcept
  679. {
  680. #if JUCE_USE_VDSP_FRAMEWORK
  681. vDSP_vneg ((float*) src, 1, dest, 1, (vDSP_Length) num);
  682. #else
  683. copyWithMultiply (dest, src, -1.0f, num);
  684. #endif
  685. }
  686. void FloatVectorOperations::negate (double* dest, const double* src, int num) noexcept
  687. {
  688. #if JUCE_USE_VDSP_FRAMEWORK
  689. vDSP_vnegD ((double*) src, 1, dest, 1, (vDSP_Length) num);
  690. #else
  691. copyWithMultiply (dest, src, -1.0f, num);
  692. #endif
  693. }
  694. void FloatVectorOperations::abs (float* dest, const float* src, int num) noexcept
  695. {
  696. #if JUCE_USE_VDSP_FRAMEWORK
  697. vDSP_vabs ((float*) src, 1, dest, 1, (vDSP_Length) num);
  698. #else
  699. union {float f; uint32 i;} signMask;
  700. signMask.i = 0x7fffffffUL;
  701. JUCE_PERFORM_VEC_OP_SRC_DEST (dest[i] = fabsf (src[i]), Mode::bit_and (s, mask),
  702. JUCE_LOAD_SRC, JUCE_INCREMENT_SRC_DEST,
  703. const Mode::ParallelType mask = Mode::load1 (signMask.f);)
  704. #endif
  705. }
  706. void FloatVectorOperations::abs (double* dest, const double* src, int num) noexcept
  707. {
  708. #if JUCE_USE_VDSP_FRAMEWORK
  709. vDSP_vabsD ((double*) src, 1, dest, 1, (vDSP_Length) num);
  710. #else
  711. union {double d; uint64 i;} signMask;
  712. signMask.i = 0x7fffffffffffffffULL;
  713. JUCE_PERFORM_VEC_OP_SRC_DEST (dest[i] = fabs (src[i]), Mode::bit_and (s, mask),
  714. JUCE_LOAD_SRC, JUCE_INCREMENT_SRC_DEST,
  715. const Mode::ParallelType mask = Mode::load1 (signMask.d);)
  716. #endif
  717. }
  718. void JUCE_CALLTYPE FloatVectorOperations::convertFixedToFloat (float* dest, const int* src, float multiplier, int num) noexcept
  719. {
  720. #if JUCE_USE_ARM_NEON
  721. JUCE_PERFORM_VEC_OP_SRC_DEST (dest[i] = src[i] * multiplier,
  722. vmulq_n_f32 (vcvtq_f32_s32 (vld1q_s32 (src)), multiplier),
  723. JUCE_LOAD_NONE, JUCE_INCREMENT_SRC_DEST, )
  724. #else
  725. JUCE_PERFORM_VEC_OP_SRC_DEST (dest[i] = src[i] * multiplier,
  726. Mode::mul (mult, _mm_cvtepi32_ps (_mm_loadu_si128 ((const __m128i*) src))),
  727. JUCE_LOAD_NONE, JUCE_INCREMENT_SRC_DEST,
  728. const Mode::ParallelType mult = Mode::load1 (multiplier);)
  729. #endif
  730. }
  731. void JUCE_CALLTYPE FloatVectorOperations::min (float* dest, const float* src, float comp, int num) noexcept
  732. {
  733. JUCE_PERFORM_VEC_OP_SRC_DEST (dest[i] = jmin (src[i], comp), Mode::min (s, cmp),
  734. JUCE_LOAD_SRC, JUCE_INCREMENT_SRC_DEST,
  735. const Mode::ParallelType cmp = Mode::load1 (comp);)
  736. }
  737. void JUCE_CALLTYPE FloatVectorOperations::min (double* dest, const double* src, double comp, int num) noexcept
  738. {
  739. JUCE_PERFORM_VEC_OP_SRC_DEST (dest[i] = jmin (src[i], comp), Mode::min (s, cmp),
  740. JUCE_LOAD_SRC, JUCE_INCREMENT_SRC_DEST,
  741. const Mode::ParallelType cmp = Mode::load1 (comp);)
  742. }
  743. void JUCE_CALLTYPE FloatVectorOperations::min (float* dest, const float* src1, const float* src2, int num) noexcept
  744. {
  745. #if JUCE_USE_VDSP_FRAMEWORK
  746. vDSP_vmin ((float*) src1, 1, (float*) src2, 1, dest, 1, (vDSP_Length) num);
  747. #else
  748. JUCE_PERFORM_VEC_OP_SRC1_SRC2_DEST (dest[i] = jmin (src1[i], src2[i]), Mode::min (s1, s2), JUCE_LOAD_SRC1_SRC2, JUCE_INCREMENT_SRC1_SRC2_DEST, )
  749. #endif
  750. }
  751. void JUCE_CALLTYPE FloatVectorOperations::min (double* dest, const double* src1, const double* src2, int num) noexcept
  752. {
  753. #if JUCE_USE_VDSP_FRAMEWORK
  754. vDSP_vminD ((double*) src1, 1, (double*) src2, 1, dest, 1, (vDSP_Length) num);
  755. #else
  756. JUCE_PERFORM_VEC_OP_SRC1_SRC2_DEST (dest[i] = jmin (src1[i], src2[i]), Mode::min (s1, s2), JUCE_LOAD_SRC1_SRC2, JUCE_INCREMENT_SRC1_SRC2_DEST, )
  757. #endif
  758. }
  759. void JUCE_CALLTYPE FloatVectorOperations::max (float* dest, const float* src, float comp, int num) noexcept
  760. {
  761. JUCE_PERFORM_VEC_OP_SRC_DEST (dest[i] = jmax (src[i], comp), Mode::max (s, cmp),
  762. JUCE_LOAD_SRC, JUCE_INCREMENT_SRC_DEST,
  763. const Mode::ParallelType cmp = Mode::load1 (comp);)
  764. }
  765. void JUCE_CALLTYPE FloatVectorOperations::max (double* dest, const double* src, double comp, int num) noexcept
  766. {
  767. JUCE_PERFORM_VEC_OP_SRC_DEST (dest[i] = jmax (src[i], comp), Mode::max (s, cmp),
  768. JUCE_LOAD_SRC, JUCE_INCREMENT_SRC_DEST,
  769. const Mode::ParallelType cmp = Mode::load1 (comp);)
  770. }
  771. void JUCE_CALLTYPE FloatVectorOperations::max (float* dest, const float* src1, const float* src2, int num) noexcept
  772. {
  773. #if JUCE_USE_VDSP_FRAMEWORK
  774. vDSP_vmax ((float*) src1, 1, (float*) src2, 1, dest, 1, (vDSP_Length) num);
  775. #else
  776. JUCE_PERFORM_VEC_OP_SRC1_SRC2_DEST (dest[i] = jmax (src1[i], src2[i]), Mode::max (s1, s2), JUCE_LOAD_SRC1_SRC2, JUCE_INCREMENT_SRC1_SRC2_DEST, )
  777. #endif
  778. }
  779. void JUCE_CALLTYPE FloatVectorOperations::max (double* dest, const double* src1, const double* src2, int num) noexcept
  780. {
  781. #if JUCE_USE_VDSP_FRAMEWORK
  782. vDSP_vmaxD ((double*) src1, 1, (double*) src2, 1, dest, 1, (vDSP_Length) num);
  783. #else
  784. JUCE_PERFORM_VEC_OP_SRC1_SRC2_DEST (dest[i] = jmax (src1[i], src2[i]), Mode::max (s1, s2), JUCE_LOAD_SRC1_SRC2, JUCE_INCREMENT_SRC1_SRC2_DEST, )
  785. #endif
  786. }
  787. void JUCE_CALLTYPE FloatVectorOperations::clip (float* dest, const float* src, float low, float high, int num) noexcept
  788. {
  789. jassert(high >= low);
  790. #if JUCE_USE_VDSP_FRAMEWORK
  791. vDSP_vclip ((float*) src, 1, &low, &high, dest, 1, (vDSP_Length) num);
  792. #else
  793. JUCE_PERFORM_VEC_OP_SRC_DEST (dest[i] = jmax (jmin (src[i], high), low), Mode::max (Mode::min (s, hi), lo),
  794. JUCE_LOAD_SRC, JUCE_INCREMENT_SRC_DEST,
  795. const Mode::ParallelType lo = Mode::load1 (low); const Mode::ParallelType hi = Mode::load1 (high);)
  796. #endif
  797. }
  798. void JUCE_CALLTYPE FloatVectorOperations::clip (double* dest, const double* src, double low, double high, int num) noexcept
  799. {
  800. jassert(high >= low);
  801. #if JUCE_USE_VDSP_FRAMEWORK
  802. vDSP_vclipD ((double*) src, 1, &low, &high, dest, 1, (vDSP_Length) num);
  803. #else
  804. JUCE_PERFORM_VEC_OP_SRC_DEST (dest[i] = jmax (jmin (src[i], high), low), Mode::max (Mode::min (s, hi), lo),
  805. JUCE_LOAD_SRC, JUCE_INCREMENT_SRC_DEST,
  806. const Mode::ParallelType lo = Mode::load1 (low); const Mode::ParallelType hi = Mode::load1 (high);)
  807. #endif
  808. }
  809. Range<float> JUCE_CALLTYPE FloatVectorOperations::findMinAndMax (const float* src, int num) noexcept
  810. {
  811. #if JUCE_USE_SSE_INTRINSICS || JUCE_USE_ARM_NEON
  812. return FloatVectorHelpers::MinMax<FloatVectorHelpers::BasicOps32>::findMinAndMax (src, num);
  813. #else
  814. return Range<float>::findMinAndMax (src, num);
  815. #endif
  816. }
  817. Range<double> JUCE_CALLTYPE FloatVectorOperations::findMinAndMax (const double* src, int num) noexcept
  818. {
  819. #if JUCE_USE_SSE_INTRINSICS || JUCE_USE_ARM_NEON
  820. return FloatVectorHelpers::MinMax<FloatVectorHelpers::BasicOps64>::findMinAndMax (src, num);
  821. #else
  822. return Range<double>::findMinAndMax (src, num);
  823. #endif
  824. }
  825. float JUCE_CALLTYPE FloatVectorOperations::findMinimum (const float* src, int num) noexcept
  826. {
  827. #if JUCE_USE_SSE_INTRINSICS || JUCE_USE_ARM_NEON
  828. return FloatVectorHelpers::MinMax<FloatVectorHelpers::BasicOps32>::findMinOrMax (src, num, true);
  829. #else
  830. return juce::findMinimum (src, num);
  831. #endif
  832. }
  833. double JUCE_CALLTYPE FloatVectorOperations::findMinimum (const double* src, int num) noexcept
  834. {
  835. #if JUCE_USE_SSE_INTRINSICS || JUCE_USE_ARM_NEON
  836. return FloatVectorHelpers::MinMax<FloatVectorHelpers::BasicOps64>::findMinOrMax (src, num, true);
  837. #else
  838. return juce::findMinimum (src, num);
  839. #endif
  840. }
  841. float JUCE_CALLTYPE FloatVectorOperations::findMaximum (const float* src, int num) noexcept
  842. {
  843. #if JUCE_USE_SSE_INTRINSICS || JUCE_USE_ARM_NEON
  844. return FloatVectorHelpers::MinMax<FloatVectorHelpers::BasicOps32>::findMinOrMax (src, num, false);
  845. #else
  846. return juce::findMaximum (src, num);
  847. #endif
  848. }
  849. double JUCE_CALLTYPE FloatVectorOperations::findMaximum (const double* src, int num) noexcept
  850. {
  851. #if JUCE_USE_SSE_INTRINSICS || JUCE_USE_ARM_NEON
  852. return FloatVectorHelpers::MinMax<FloatVectorHelpers::BasicOps64>::findMinOrMax (src, num, false);
  853. #else
  854. return juce::findMaximum (src, num);
  855. #endif
  856. }
  857. void JUCE_CALLTYPE FloatVectorOperations::enableFlushToZeroMode (bool shouldEnable) noexcept
  858. {
  859. #if JUCE_USE_SSE_INTRINSICS
  860. if (FloatVectorHelpers::isSSE2Available())
  861. _MM_SET_FLUSH_ZERO_MODE (shouldEnable ? _MM_FLUSH_ZERO_ON : _MM_FLUSH_ZERO_OFF);
  862. #endif
  863. (void) shouldEnable;
  864. }
  865. //==============================================================================
  866. //==============================================================================
  867. #if JUCE_UNIT_TESTS
  868. class FloatVectorOperationsTests : public UnitTest
  869. {
  870. public:
  871. FloatVectorOperationsTests() : UnitTest ("FloatVectorOperations") {}
  872. template <typename ValueType>
  873. struct TestRunner
  874. {
  875. static void runTest (UnitTest& u, Random random)
  876. {
  877. const int range = random.nextBool() ? 500 : 10;
  878. const int num = random.nextInt (range) + 1;
  879. HeapBlock<ValueType> buffer1 ((size_t) num + 16), buffer2 ((size_t) num + 16);
  880. HeapBlock<int> buffer3 ((size_t) num + 16);
  881. #if JUCE_ARM
  882. ValueType* const data1 = buffer1;
  883. ValueType* const data2 = buffer2;
  884. int* const int1 = buffer3;
  885. #else
  886. ValueType* const data1 = addBytesToPointer (buffer1.getData(), random.nextInt (16));
  887. ValueType* const data2 = addBytesToPointer (buffer2.getData(), random.nextInt (16));
  888. int* const int1 = addBytesToPointer (buffer3.getData(), random.nextInt (16));
  889. #endif
  890. fillRandomly (random, data1, num);
  891. fillRandomly (random, data2, num);
  892. Range<ValueType> minMax1 (FloatVectorOperations::findMinAndMax (data1, num));
  893. Range<ValueType> minMax2 (Range<ValueType>::findMinAndMax (data1, num));
  894. u.expect (minMax1 == minMax2);
  895. u.expect (valuesMatch (FloatVectorOperations::findMinimum (data1, num), juce::findMinimum (data1, num)));
  896. u.expect (valuesMatch (FloatVectorOperations::findMaximum (data1, num), juce::findMaximum (data1, num)));
  897. u.expect (valuesMatch (FloatVectorOperations::findMinimum (data2, num), juce::findMinimum (data2, num)));
  898. u.expect (valuesMatch (FloatVectorOperations::findMaximum (data2, num), juce::findMaximum (data2, num)));
  899. FloatVectorOperations::clear (data1, num);
  900. u.expect (areAllValuesEqual (data1, num, 0));
  901. FloatVectorOperations::fill (data1, (ValueType) 2, num);
  902. u.expect (areAllValuesEqual (data1, num, (ValueType) 2));
  903. FloatVectorOperations::add (data1, (ValueType) 2, num);
  904. u.expect (areAllValuesEqual (data1, num, (ValueType) 4));
  905. FloatVectorOperations::copy (data2, data1, num);
  906. u.expect (areAllValuesEqual (data2, num, (ValueType) 4));
  907. FloatVectorOperations::add (data2, data1, num);
  908. u.expect (areAllValuesEqual (data2, num, (ValueType) 8));
  909. FloatVectorOperations::copyWithMultiply (data2, data1, (ValueType) 4, num);
  910. u.expect (areAllValuesEqual (data2, num, (ValueType) 16));
  911. FloatVectorOperations::addWithMultiply (data2, data1, (ValueType) 4, num);
  912. u.expect (areAllValuesEqual (data2, num, (ValueType) 32));
  913. FloatVectorOperations::multiply (data1, (ValueType) 2, num);
  914. u.expect (areAllValuesEqual (data1, num, (ValueType) 8));
  915. FloatVectorOperations::multiply (data1, data2, num);
  916. u.expect (areAllValuesEqual (data1, num, (ValueType) 256));
  917. FloatVectorOperations::negate (data2, data1, num);
  918. u.expect (areAllValuesEqual (data2, num, (ValueType) -256));
  919. FloatVectorOperations::subtract (data1, data2, num);
  920. u.expect (areAllValuesEqual (data1, num, (ValueType) 512));
  921. FloatVectorOperations::abs (data1, data2, num);
  922. u.expect (areAllValuesEqual (data1, num, (ValueType) 256));
  923. FloatVectorOperations::abs (data2, data1, num);
  924. u.expect (areAllValuesEqual (data2, num, (ValueType) 256));
  925. fillRandomly (random, int1, num);
  926. doConversionTest (u, data1, data2, int1, num);
  927. FloatVectorOperations::fill (data1, (ValueType) 2, num);
  928. FloatVectorOperations::fill (data2, (ValueType) 3, num);
  929. FloatVectorOperations::addWithMultiply (data1, data1, data2, num);
  930. u.expect (areAllValuesEqual (data1, num, (ValueType) 8));
  931. }
  932. static void doConversionTest (UnitTest& u, float* data1, float* data2, int* const int1, int num)
  933. {
  934. FloatVectorOperations::convertFixedToFloat (data1, int1, 2.0f, num);
  935. convertFixed (data2, int1, 2.0f, num);
  936. u.expect (buffersMatch (data1, data2, num));
  937. }
  938. static void doConversionTest (UnitTest&, double*, double*, int*, int) {}
  939. static void fillRandomly (Random& random, ValueType* d, int num)
  940. {
  941. while (--num >= 0)
  942. *d++ = (ValueType) (random.nextDouble() * 1000.0);
  943. }
  944. static void fillRandomly (Random& random, int* d, int num)
  945. {
  946. while (--num >= 0)
  947. *d++ = random.nextInt();
  948. }
  949. static void convertFixed (float* d, const int* s, ValueType multiplier, int num)
  950. {
  951. while (--num >= 0)
  952. *d++ = *s++ * multiplier;
  953. }
  954. static bool areAllValuesEqual (const ValueType* d, int num, ValueType target)
  955. {
  956. while (--num >= 0)
  957. if (*d++ != target)
  958. return false;
  959. return true;
  960. }
  961. static bool buffersMatch (const ValueType* d1, const ValueType* d2, int num)
  962. {
  963. while (--num >= 0)
  964. if (! valuesMatch (*d1++, *d2++))
  965. return false;
  966. return true;
  967. }
  968. static bool valuesMatch (ValueType v1, ValueType v2)
  969. {
  970. return std::abs (v1 - v2) < std::numeric_limits<ValueType>::epsilon();
  971. }
  972. };
  973. void runTest() override
  974. {
  975. beginTest ("FloatVectorOperations");
  976. for (int i = 1000; --i >= 0;)
  977. {
  978. TestRunner<float>::runTest (*this, getRandom());
  979. TestRunner<double>::runTest (*this, getRandom());
  980. }
  981. }
  982. };
  983. static FloatVectorOperationsTests vectorOpTests;
  984. #endif