The JUCE cross-platform C++ framework, with DISTRHO/KXStudio specific changes
You can not select more than 25 topics Topics must start with a letter or number, can include dashes ('-') and can be up to 35 characters long.

1203 lines
54KB

  1. /*
  2. ==============================================================================
  3. This file is part of the JUCE library.
  4. Copyright (c) 2017 - ROLI Ltd.
  5. JUCE is an open source library subject to commercial or open-source
  6. licensing.
  7. The code included in this file is provided under the terms of the ISC license
  8. http://www.isc.org/downloads/software-support-policy/isc-license. Permission
  9. To use, copy, modify, and/or distribute this software for any purpose with or
  10. without fee is hereby granted provided that the above copyright notice and
  11. this permission notice appear in all copies.
  12. JUCE IS PROVIDED "AS IS" WITHOUT ANY WARRANTY, AND ALL WARRANTIES, WHETHER
  13. EXPRESSED OR IMPLIED, INCLUDING MERCHANTABILITY AND FITNESS FOR PURPOSE, ARE
  14. DISCLAIMED.
  15. ==============================================================================
  16. */
  17. namespace FloatVectorHelpers
  18. {
  19. #define JUCE_INCREMENT_SRC_DEST dest += (16 / sizeof (*dest)); src += (16 / sizeof (*dest));
  20. #define JUCE_INCREMENT_SRC1_SRC2_DEST dest += (16 / sizeof (*dest)); src1 += (16 / sizeof (*dest)); src2 += (16 / sizeof (*dest));
  21. #define JUCE_INCREMENT_DEST dest += (16 / sizeof (*dest));
  22. #if JUCE_USE_SSE_INTRINSICS
  23. inline static bool isAligned (const void* p) noexcept
  24. {
  25. return (((pointer_sized_int) p) & 15) == 0;
  26. }
  27. struct BasicOps32
  28. {
  29. typedef float Type;
  30. typedef __m128 ParallelType;
  31. typedef __m128 IntegerType;
  32. enum { numParallel = 4 };
  33. // Integer and parallel types are the same for SSE. On neon they have different types
  34. static forcedinline IntegerType toint (ParallelType v) noexcept { return v; }
  35. static forcedinline ParallelType toflt (IntegerType v) noexcept { return v; }
  36. static forcedinline ParallelType load1 (Type v) noexcept { return _mm_load1_ps (&v); }
  37. static forcedinline ParallelType loadA (const Type* v) noexcept { return _mm_load_ps (v); }
  38. static forcedinline ParallelType loadU (const Type* v) noexcept { return _mm_loadu_ps (v); }
  39. static forcedinline void storeA (Type* dest, ParallelType a) noexcept { _mm_store_ps (dest, a); }
  40. static forcedinline void storeU (Type* dest, ParallelType a) noexcept { _mm_storeu_ps (dest, a); }
  41. static forcedinline ParallelType add (ParallelType a, ParallelType b) noexcept { return _mm_add_ps (a, b); }
  42. static forcedinline ParallelType sub (ParallelType a, ParallelType b) noexcept { return _mm_sub_ps (a, b); }
  43. static forcedinline ParallelType mul (ParallelType a, ParallelType b) noexcept { return _mm_mul_ps (a, b); }
  44. static forcedinline ParallelType max (ParallelType a, ParallelType b) noexcept { return _mm_max_ps (a, b); }
  45. static forcedinline ParallelType min (ParallelType a, ParallelType b) noexcept { return _mm_min_ps (a, b); }
  46. static forcedinline ParallelType bit_and (ParallelType a, ParallelType b) noexcept { return _mm_and_ps (a, b); }
  47. static forcedinline ParallelType bit_not (ParallelType a, ParallelType b) noexcept { return _mm_andnot_ps (a, b); }
  48. static forcedinline ParallelType bit_or (ParallelType a, ParallelType b) noexcept { return _mm_or_ps (a, b); }
  49. static forcedinline ParallelType bit_xor (ParallelType a, ParallelType b) noexcept { return _mm_xor_ps (a, b); }
  50. static forcedinline Type max (ParallelType a) noexcept { Type v[numParallel]; storeU (v, a); return jmax (v[0], v[1], v[2], v[3]); }
  51. static forcedinline Type min (ParallelType a) noexcept { Type v[numParallel]; storeU (v, a); return jmin (v[0], v[1], v[2], v[3]); }
  52. };
  53. struct BasicOps64
  54. {
  55. typedef double Type;
  56. typedef __m128d ParallelType;
  57. typedef __m128d IntegerType;
  58. enum { numParallel = 2 };
  59. // Integer and parallel types are the same for SSE. On neon they have different types
  60. static forcedinline IntegerType toint (ParallelType v) noexcept { return v; }
  61. static forcedinline ParallelType toflt (IntegerType v) noexcept { return v; }
  62. static forcedinline ParallelType load1 (Type v) noexcept { return _mm_load1_pd (&v); }
  63. static forcedinline ParallelType loadA (const Type* v) noexcept { return _mm_load_pd (v); }
  64. static forcedinline ParallelType loadU (const Type* v) noexcept { return _mm_loadu_pd (v); }
  65. static forcedinline void storeA (Type* dest, ParallelType a) noexcept { _mm_store_pd (dest, a); }
  66. static forcedinline void storeU (Type* dest, ParallelType a) noexcept { _mm_storeu_pd (dest, a); }
  67. static forcedinline ParallelType add (ParallelType a, ParallelType b) noexcept { return _mm_add_pd (a, b); }
  68. static forcedinline ParallelType sub (ParallelType a, ParallelType b) noexcept { return _mm_sub_pd (a, b); }
  69. static forcedinline ParallelType mul (ParallelType a, ParallelType b) noexcept { return _mm_mul_pd (a, b); }
  70. static forcedinline ParallelType max (ParallelType a, ParallelType b) noexcept { return _mm_max_pd (a, b); }
  71. static forcedinline ParallelType min (ParallelType a, ParallelType b) noexcept { return _mm_min_pd (a, b); }
  72. static forcedinline ParallelType bit_and (ParallelType a, ParallelType b) noexcept { return _mm_and_pd (a, b); }
  73. static forcedinline ParallelType bit_not (ParallelType a, ParallelType b) noexcept { return _mm_andnot_pd (a, b); }
  74. static forcedinline ParallelType bit_or (ParallelType a, ParallelType b) noexcept { return _mm_or_pd (a, b); }
  75. static forcedinline ParallelType bit_xor (ParallelType a, ParallelType b) noexcept { return _mm_xor_pd (a, b); }
  76. static forcedinline Type max (ParallelType a) noexcept { Type v[numParallel]; storeU (v, a); return jmax (v[0], v[1]); }
  77. static forcedinline Type min (ParallelType a) noexcept { Type v[numParallel]; storeU (v, a); return jmin (v[0], v[1]); }
  78. };
  79. #define JUCE_BEGIN_VEC_OP \
  80. typedef FloatVectorHelpers::ModeType<sizeof(*dest)>::Mode Mode; \
  81. { \
  82. const int numLongOps = num / Mode::numParallel;
  83. #define JUCE_FINISH_VEC_OP(normalOp) \
  84. num &= (Mode::numParallel - 1); \
  85. if (num == 0) return; \
  86. } \
  87. for (int i = 0; i < num; ++i) normalOp;
  88. #define JUCE_PERFORM_VEC_OP_DEST(normalOp, vecOp, locals, setupOp) \
  89. JUCE_BEGIN_VEC_OP \
  90. setupOp \
  91. if (FloatVectorHelpers::isAligned (dest)) JUCE_VEC_LOOP (vecOp, dummy, Mode::loadA, Mode::storeA, locals, JUCE_INCREMENT_DEST) \
  92. else JUCE_VEC_LOOP (vecOp, dummy, Mode::loadU, Mode::storeU, locals, JUCE_INCREMENT_DEST) \
  93. JUCE_FINISH_VEC_OP (normalOp)
  94. #define JUCE_PERFORM_VEC_OP_SRC_DEST(normalOp, vecOp, locals, increment, setupOp) \
  95. JUCE_BEGIN_VEC_OP \
  96. setupOp \
  97. if (FloatVectorHelpers::isAligned (dest)) \
  98. { \
  99. if (FloatVectorHelpers::isAligned (src)) JUCE_VEC_LOOP (vecOp, Mode::loadA, Mode::loadA, Mode::storeA, locals, increment) \
  100. else JUCE_VEC_LOOP (vecOp, Mode::loadU, Mode::loadA, Mode::storeA, locals, increment) \
  101. }\
  102. else \
  103. { \
  104. if (FloatVectorHelpers::isAligned (src)) JUCE_VEC_LOOP (vecOp, Mode::loadA, Mode::loadU, Mode::storeU, locals, increment) \
  105. else JUCE_VEC_LOOP (vecOp, Mode::loadU, Mode::loadU, Mode::storeU, locals, increment) \
  106. } \
  107. JUCE_FINISH_VEC_OP (normalOp)
  108. #define JUCE_PERFORM_VEC_OP_SRC1_SRC2_DEST(normalOp, vecOp, locals, increment, setupOp) \
  109. JUCE_BEGIN_VEC_OP \
  110. setupOp \
  111. if (FloatVectorHelpers::isAligned (dest)) \
  112. { \
  113. if (FloatVectorHelpers::isAligned (src1)) \
  114. { \
  115. if (FloatVectorHelpers::isAligned (src2)) JUCE_VEC_LOOP_TWO_SOURCES (vecOp, Mode::loadA, Mode::loadA, Mode::storeA, locals, increment) \
  116. else JUCE_VEC_LOOP_TWO_SOURCES (vecOp, Mode::loadA, Mode::loadU, Mode::storeA, locals, increment) \
  117. } \
  118. else \
  119. { \
  120. if (FloatVectorHelpers::isAligned (src2)) JUCE_VEC_LOOP_TWO_SOURCES (vecOp, Mode::loadU, Mode::loadA, Mode::storeA, locals, increment) \
  121. else JUCE_VEC_LOOP_TWO_SOURCES (vecOp, Mode::loadU, Mode::loadU, Mode::storeA, locals, increment) \
  122. } \
  123. } \
  124. else \
  125. { \
  126. if (FloatVectorHelpers::isAligned (src1)) \
  127. { \
  128. if (FloatVectorHelpers::isAligned (src2)) JUCE_VEC_LOOP_TWO_SOURCES (vecOp, Mode::loadA, Mode::loadA, Mode::storeU, locals, increment) \
  129. else JUCE_VEC_LOOP_TWO_SOURCES (vecOp, Mode::loadA, Mode::loadU, Mode::storeU, locals, increment) \
  130. } \
  131. else \
  132. { \
  133. if (FloatVectorHelpers::isAligned (src2)) JUCE_VEC_LOOP_TWO_SOURCES (vecOp, Mode::loadU, Mode::loadA, Mode::storeU, locals, increment) \
  134. else JUCE_VEC_LOOP_TWO_SOURCES (vecOp, Mode::loadU, Mode::loadU, Mode::storeU, locals, increment) \
  135. } \
  136. } \
  137. JUCE_FINISH_VEC_OP (normalOp)
  138. #define JUCE_PERFORM_VEC_OP_SRC1_SRC2_DEST_DEST(normalOp, vecOp, locals, increment, setupOp) \
  139. JUCE_BEGIN_VEC_OP \
  140. setupOp \
  141. if (FloatVectorHelpers::isAligned (dest)) \
  142. { \
  143. if (FloatVectorHelpers::isAligned (src1)) \
  144. { \
  145. if (FloatVectorHelpers::isAligned (src2)) JUCE_VEC_LOOP_TWO_SOURCES_WITH_DEST_LOAD (vecOp, Mode::loadA, Mode::loadA, Mode::loadA, Mode::storeA, locals, increment) \
  146. else JUCE_VEC_LOOP_TWO_SOURCES_WITH_DEST_LOAD (vecOp, Mode::loadA, Mode::loadU, Mode::loadA, Mode::storeA, locals, increment) \
  147. } \
  148. else \
  149. { \
  150. if (FloatVectorHelpers::isAligned (src2)) JUCE_VEC_LOOP_TWO_SOURCES_WITH_DEST_LOAD (vecOp, Mode::loadU, Mode::loadA, Mode::loadA, Mode::storeA, locals, increment) \
  151. else JUCE_VEC_LOOP_TWO_SOURCES_WITH_DEST_LOAD (vecOp, Mode::loadU, Mode::loadU, Mode::loadA, Mode::storeA, locals, increment) \
  152. } \
  153. } \
  154. else \
  155. { \
  156. if (FloatVectorHelpers::isAligned (src1)) \
  157. { \
  158. if (FloatVectorHelpers::isAligned (src2)) JUCE_VEC_LOOP_TWO_SOURCES_WITH_DEST_LOAD (vecOp, Mode::loadA, Mode::loadA, Mode::loadU, Mode::storeU, locals, increment) \
  159. else JUCE_VEC_LOOP_TWO_SOURCES_WITH_DEST_LOAD (vecOp, Mode::loadA, Mode::loadU, Mode::loadU, Mode::storeU, locals, increment) \
  160. } \
  161. else \
  162. { \
  163. if (FloatVectorHelpers::isAligned (src2)) JUCE_VEC_LOOP_TWO_SOURCES_WITH_DEST_LOAD (vecOp, Mode::loadU, Mode::loadA, Mode::loadU, Mode::storeU, locals, increment) \
  164. else JUCE_VEC_LOOP_TWO_SOURCES_WITH_DEST_LOAD (vecOp, Mode::loadU, Mode::loadU, Mode::loadU, Mode::storeU, locals, increment) \
  165. } \
  166. } \
  167. JUCE_FINISH_VEC_OP (normalOp)
  168. //==============================================================================
  169. #elif JUCE_USE_ARM_NEON
  170. struct BasicOps32
  171. {
  172. typedef float Type;
  173. typedef float32x4_t ParallelType;
  174. typedef uint32x4_t IntegerType;
  175. union signMaskUnion { ParallelType f; IntegerType i; };
  176. enum { numParallel = 4 };
  177. static forcedinline IntegerType toint (ParallelType v) noexcept { signMaskUnion u; u.f = v; return u.i; }
  178. static forcedinline ParallelType toflt (IntegerType v) noexcept { signMaskUnion u; u.i = v; return u.f; }
  179. static forcedinline ParallelType load1 (Type v) noexcept { return vld1q_dup_f32 (&v); }
  180. static forcedinline ParallelType loadA (const Type* v) noexcept { return vld1q_f32 (v); }
  181. static forcedinline ParallelType loadU (const Type* v) noexcept { return vld1q_f32 (v); }
  182. static forcedinline void storeA (Type* dest, ParallelType a) noexcept { vst1q_f32 (dest, a); }
  183. static forcedinline void storeU (Type* dest, ParallelType a) noexcept { vst1q_f32 (dest, a); }
  184. static forcedinline ParallelType add (ParallelType a, ParallelType b) noexcept { return vaddq_f32 (a, b); }
  185. static forcedinline ParallelType sub (ParallelType a, ParallelType b) noexcept { return vsubq_f32 (a, b); }
  186. static forcedinline ParallelType mul (ParallelType a, ParallelType b) noexcept { return vmulq_f32 (a, b); }
  187. static forcedinline ParallelType max (ParallelType a, ParallelType b) noexcept { return vmaxq_f32 (a, b); }
  188. static forcedinline ParallelType min (ParallelType a, ParallelType b) noexcept { return vminq_f32 (a, b); }
  189. static forcedinline ParallelType bit_and (ParallelType a, ParallelType b) noexcept { return toflt (vandq_u32 (toint (a), toint (b))); }
  190. static forcedinline ParallelType bit_not (ParallelType a, ParallelType b) noexcept { return toflt (vbicq_u32 (toint (a), toint (b))); }
  191. static forcedinline ParallelType bit_or (ParallelType a, ParallelType b) noexcept { return toflt (vorrq_u32 (toint (a), toint (b))); }
  192. static forcedinline ParallelType bit_xor (ParallelType a, ParallelType b) noexcept { return toflt (veorq_u32 (toint (a), toint (b))); }
  193. static forcedinline Type max (ParallelType a) noexcept { Type v[numParallel]; storeU (v, a); return jmax (v[0], v[1], v[2], v[3]); }
  194. static forcedinline Type min (ParallelType a) noexcept { Type v[numParallel]; storeU (v, a); return jmin (v[0], v[1], v[2], v[3]); }
  195. };
  196. struct BasicOps64
  197. {
  198. typedef double Type;
  199. typedef double ParallelType;
  200. typedef uint64 IntegerType;
  201. union signMaskUnion { ParallelType f; IntegerType i; };
  202. enum { numParallel = 1 };
  203. static forcedinline IntegerType toint (ParallelType v) noexcept { signMaskUnion u; u.f = v; return u.i; }
  204. static forcedinline ParallelType toflt (IntegerType v) noexcept { signMaskUnion u; u.i = v; return u.f; }
  205. static forcedinline ParallelType load1 (Type v) noexcept { return v; }
  206. static forcedinline ParallelType loadA (const Type* v) noexcept { return *v; }
  207. static forcedinline ParallelType loadU (const Type* v) noexcept { return *v; }
  208. static forcedinline void storeA (Type* dest, ParallelType a) noexcept { *dest = a; }
  209. static forcedinline void storeU (Type* dest, ParallelType a) noexcept { *dest = a; }
  210. static forcedinline ParallelType add (ParallelType a, ParallelType b) noexcept { return a + b; }
  211. static forcedinline ParallelType sub (ParallelType a, ParallelType b) noexcept { return a - b; }
  212. static forcedinline ParallelType mul (ParallelType a, ParallelType b) noexcept { return a * b; }
  213. static forcedinline ParallelType max (ParallelType a, ParallelType b) noexcept { return jmax (a, b); }
  214. static forcedinline ParallelType min (ParallelType a, ParallelType b) noexcept { return jmin (a, b); }
  215. static forcedinline ParallelType bit_and (ParallelType a, ParallelType b) noexcept { return toflt (toint (a) & toint (b)); }
  216. static forcedinline ParallelType bit_not (ParallelType a, ParallelType b) noexcept { return toflt ((~toint (a)) & toint (b)); }
  217. static forcedinline ParallelType bit_or (ParallelType a, ParallelType b) noexcept { return toflt (toint (a) | toint (b)); }
  218. static forcedinline ParallelType bit_xor (ParallelType a, ParallelType b) noexcept { return toflt (toint (a) ^ toint (b)); }
  219. static forcedinline Type max (ParallelType a) noexcept { return a; }
  220. static forcedinline Type min (ParallelType a) noexcept { return a; }
  221. };
  222. #define JUCE_BEGIN_VEC_OP \
  223. typedef FloatVectorHelpers::ModeType<sizeof(*dest)>::Mode Mode; \
  224. if (Mode::numParallel > 1) \
  225. { \
  226. const int numLongOps = num / Mode::numParallel;
  227. #define JUCE_FINISH_VEC_OP(normalOp) \
  228. num &= (Mode::numParallel - 1); \
  229. if (num == 0) return; \
  230. } \
  231. for (int i = 0; i < num; ++i) normalOp;
  232. #define JUCE_PERFORM_VEC_OP_DEST(normalOp, vecOp, locals, setupOp) \
  233. JUCE_BEGIN_VEC_OP \
  234. setupOp \
  235. JUCE_VEC_LOOP (vecOp, dummy, Mode::loadU, Mode::storeU, locals, JUCE_INCREMENT_DEST) \
  236. JUCE_FINISH_VEC_OP (normalOp)
  237. #define JUCE_PERFORM_VEC_OP_SRC_DEST(normalOp, vecOp, locals, increment, setupOp) \
  238. JUCE_BEGIN_VEC_OP \
  239. setupOp \
  240. JUCE_VEC_LOOP (vecOp, Mode::loadU, Mode::loadU, Mode::storeU, locals, increment) \
  241. JUCE_FINISH_VEC_OP (normalOp)
  242. #define JUCE_PERFORM_VEC_OP_SRC1_SRC2_DEST(normalOp, vecOp, locals, increment, setupOp) \
  243. JUCE_BEGIN_VEC_OP \
  244. setupOp \
  245. JUCE_VEC_LOOP_TWO_SOURCES (vecOp, Mode::loadU, Mode::loadU, Mode::storeU, locals, increment) \
  246. JUCE_FINISH_VEC_OP (normalOp)
  247. #define JUCE_PERFORM_VEC_OP_SRC1_SRC2_DEST_DEST(normalOp, vecOp, locals, increment, setupOp) \
  248. JUCE_BEGIN_VEC_OP \
  249. setupOp \
  250. JUCE_VEC_LOOP_TWO_SOURCES_WITH_DEST_LOAD (vecOp, Mode::loadU, Mode::loadU, Mode::loadU, Mode::storeU, locals, increment) \
  251. JUCE_FINISH_VEC_OP (normalOp)
  252. //==============================================================================
  253. #else
  254. #define JUCE_PERFORM_VEC_OP_DEST(normalOp, vecOp, locals, setupOp) \
  255. for (int i = 0; i < num; ++i) normalOp;
  256. #define JUCE_PERFORM_VEC_OP_SRC_DEST(normalOp, vecOp, locals, increment, setupOp) \
  257. for (int i = 0; i < num; ++i) normalOp;
  258. #define JUCE_PERFORM_VEC_OP_SRC1_SRC2_DEST(normalOp, vecOp, locals, increment, setupOp) \
  259. for (int i = 0; i < num; ++i) normalOp;
  260. #define JUCE_PERFORM_VEC_OP_SRC1_SRC2_DEST_DEST(normalOp, vecOp, locals, increment, setupOp) \
  261. for (int i = 0; i < num; ++i) normalOp;
  262. #endif
  263. //==============================================================================
  264. #define JUCE_VEC_LOOP(vecOp, srcLoad, dstLoad, dstStore, locals, increment) \
  265. for (int i = 0; i < numLongOps; ++i) \
  266. { \
  267. locals (srcLoad, dstLoad); \
  268. dstStore (dest, vecOp); \
  269. increment; \
  270. }
  271. #define JUCE_VEC_LOOP_TWO_SOURCES(vecOp, src1Load, src2Load, dstStore, locals, increment) \
  272. for (int i = 0; i < numLongOps; ++i) \
  273. { \
  274. locals (src1Load, src2Load); \
  275. dstStore (dest, vecOp); \
  276. increment; \
  277. }
  278. #define JUCE_VEC_LOOP_TWO_SOURCES_WITH_DEST_LOAD(vecOp, src1Load, src2Load, dstLoad, dstStore, locals, increment) \
  279. for (int i = 0; i < numLongOps; ++i) \
  280. { \
  281. locals (src1Load, src2Load, dstLoad); \
  282. dstStore (dest, vecOp); \
  283. increment; \
  284. }
  285. #define JUCE_LOAD_NONE(srcLoad, dstLoad)
  286. #define JUCE_LOAD_DEST(srcLoad, dstLoad) const Mode::ParallelType d = dstLoad (dest);
  287. #define JUCE_LOAD_SRC(srcLoad, dstLoad) const Mode::ParallelType s = srcLoad (src);
  288. #define JUCE_LOAD_SRC1_SRC2(src1Load, src2Load) const Mode::ParallelType s1 = src1Load (src1), s2 = src2Load (src2);
  289. #define JUCE_LOAD_SRC1_SRC2_DEST(src1Load, src2Load, dstLoad) const Mode::ParallelType d = dstLoad (dest), s1 = src1Load (src1), s2 = src2Load (src2);
  290. #define JUCE_LOAD_SRC_DEST(srcLoad, dstLoad) const Mode::ParallelType d = dstLoad (dest), s = srcLoad (src);
  291. union signMask32 { float f; uint32 i; };
  292. union signMask64 { double d; uint64 i; };
  293. #if JUCE_USE_SSE_INTRINSICS || JUCE_USE_ARM_NEON
  294. template<int typeSize> struct ModeType { typedef BasicOps32 Mode; };
  295. template<> struct ModeType<8> { typedef BasicOps64 Mode; };
  296. template <typename Mode>
  297. struct MinMax
  298. {
  299. typedef typename Mode::Type Type;
  300. typedef typename Mode::ParallelType ParallelType;
  301. static Type findMinOrMax (const Type* src, int num, const bool isMinimum) noexcept
  302. {
  303. int numLongOps = num / Mode::numParallel;
  304. if (numLongOps > 1)
  305. {
  306. ParallelType val;
  307. #if ! JUCE_USE_ARM_NEON
  308. if (isAligned (src))
  309. {
  310. val = Mode::loadA (src);
  311. if (isMinimum)
  312. {
  313. while (--numLongOps > 0)
  314. {
  315. src += Mode::numParallel;
  316. val = Mode::min (val, Mode::loadA (src));
  317. }
  318. }
  319. else
  320. {
  321. while (--numLongOps > 0)
  322. {
  323. src += Mode::numParallel;
  324. val = Mode::max (val, Mode::loadA (src));
  325. }
  326. }
  327. }
  328. else
  329. #endif
  330. {
  331. val = Mode::loadU (src);
  332. if (isMinimum)
  333. {
  334. while (--numLongOps > 0)
  335. {
  336. src += Mode::numParallel;
  337. val = Mode::min (val, Mode::loadU (src));
  338. }
  339. }
  340. else
  341. {
  342. while (--numLongOps > 0)
  343. {
  344. src += Mode::numParallel;
  345. val = Mode::max (val, Mode::loadU (src));
  346. }
  347. }
  348. }
  349. Type result = isMinimum ? Mode::min (val)
  350. : Mode::max (val);
  351. num &= (Mode::numParallel - 1);
  352. src += Mode::numParallel;
  353. for (int i = 0; i < num; ++i)
  354. result = isMinimum ? jmin (result, src[i])
  355. : jmax (result, src[i]);
  356. return result;
  357. }
  358. return isMinimum ? juce::findMinimum (src, num)
  359. : juce::findMaximum (src, num);
  360. }
  361. static Range<Type> findMinAndMax (const Type* src, int num) noexcept
  362. {
  363. int numLongOps = num / Mode::numParallel;
  364. if (numLongOps > 1)
  365. {
  366. ParallelType mn, mx;
  367. #if ! JUCE_USE_ARM_NEON
  368. if (isAligned (src))
  369. {
  370. mn = Mode::loadA (src);
  371. mx = mn;
  372. while (--numLongOps > 0)
  373. {
  374. src += Mode::numParallel;
  375. const ParallelType v = Mode::loadA (src);
  376. mn = Mode::min (mn, v);
  377. mx = Mode::max (mx, v);
  378. }
  379. }
  380. else
  381. #endif
  382. {
  383. mn = Mode::loadU (src);
  384. mx = mn;
  385. while (--numLongOps > 0)
  386. {
  387. src += Mode::numParallel;
  388. const ParallelType v = Mode::loadU (src);
  389. mn = Mode::min (mn, v);
  390. mx = Mode::max (mx, v);
  391. }
  392. }
  393. Range<Type> result (Mode::min (mn),
  394. Mode::max (mx));
  395. num &= (Mode::numParallel - 1);
  396. src += Mode::numParallel;
  397. for (int i = 0; i < num; ++i)
  398. result = result.getUnionWith (src[i]);
  399. return result;
  400. }
  401. return Range<Type>::findMinAndMax (src, num);
  402. }
  403. };
  404. #endif
  405. }
  406. //==============================================================================
  407. namespace
  408. {
  409. #if JUCE_USE_VDSP_FRAMEWORK
  410. // This casts away constness to account for slightly different vDSP function signatures
  411. // in OSX 10.8 SDK and below. Can be safely removed once those SDKs are obsolete.
  412. template <typename ValueType>
  413. ValueType* osx108sdkCompatibilityCast (const ValueType* arg) noexcept { return const_cast<ValueType*> (arg); }
  414. #endif
  415. }
  416. //==============================================================================
  417. void JUCE_CALLTYPE FloatVectorOperations::clear (float* dest, int num) noexcept
  418. {
  419. #if JUCE_USE_VDSP_FRAMEWORK
  420. vDSP_vclr (dest, 1, (size_t) num);
  421. #else
  422. zeromem (dest, (size_t) num * sizeof (float));
  423. #endif
  424. }
  425. void JUCE_CALLTYPE FloatVectorOperations::clear (double* dest, int num) noexcept
  426. {
  427. #if JUCE_USE_VDSP_FRAMEWORK
  428. vDSP_vclrD (dest, 1, (size_t) num);
  429. #else
  430. zeromem (dest, (size_t) num * sizeof (double));
  431. #endif
  432. }
  433. void JUCE_CALLTYPE FloatVectorOperations::fill (float* dest, float valueToFill, int num) noexcept
  434. {
  435. #if JUCE_USE_VDSP_FRAMEWORK
  436. vDSP_vfill (&valueToFill, dest, 1, (size_t) num);
  437. #else
  438. JUCE_PERFORM_VEC_OP_DEST (dest[i] = valueToFill, val, JUCE_LOAD_NONE,
  439. const Mode::ParallelType val = Mode::load1 (valueToFill);)
  440. #endif
  441. }
  442. void JUCE_CALLTYPE FloatVectorOperations::fill (double* dest, double valueToFill, int num) noexcept
  443. {
  444. #if JUCE_USE_VDSP_FRAMEWORK
  445. vDSP_vfillD (&valueToFill, dest, 1, (size_t) num);
  446. #else
  447. JUCE_PERFORM_VEC_OP_DEST (dest[i] = valueToFill, val, JUCE_LOAD_NONE,
  448. const Mode::ParallelType val = Mode::load1 (valueToFill);)
  449. #endif
  450. }
  451. void JUCE_CALLTYPE FloatVectorOperations::copy (float* dest, const float* src, int num) noexcept
  452. {
  453. memcpy (dest, src, (size_t) num * sizeof (float));
  454. }
  455. void JUCE_CALLTYPE FloatVectorOperations::copy (double* dest, const double* src, int num) noexcept
  456. {
  457. memcpy (dest, src, (size_t) num * sizeof (double));
  458. }
  459. void JUCE_CALLTYPE FloatVectorOperations::copyWithMultiply (float* dest, const float* src, float multiplier, int num) noexcept
  460. {
  461. #if JUCE_USE_VDSP_FRAMEWORK
  462. vDSP_vsmul (src, 1, &multiplier, dest, 1, (vDSP_Length) num);
  463. #else
  464. JUCE_PERFORM_VEC_OP_SRC_DEST (dest[i] = src[i] * multiplier, Mode::mul (mult, s),
  465. JUCE_LOAD_SRC, JUCE_INCREMENT_SRC_DEST,
  466. const Mode::ParallelType mult = Mode::load1 (multiplier);)
  467. #endif
  468. }
  469. void JUCE_CALLTYPE FloatVectorOperations::copyWithMultiply (double* dest, const double* src, double multiplier, int num) noexcept
  470. {
  471. #if JUCE_USE_VDSP_FRAMEWORK
  472. vDSP_vsmulD (src, 1, &multiplier, dest, 1, (vDSP_Length) num);
  473. #else
  474. JUCE_PERFORM_VEC_OP_SRC_DEST (dest[i] = src[i] * multiplier, Mode::mul (mult, s),
  475. JUCE_LOAD_SRC, JUCE_INCREMENT_SRC_DEST,
  476. const Mode::ParallelType mult = Mode::load1 (multiplier);)
  477. #endif
  478. }
  479. void JUCE_CALLTYPE FloatVectorOperations::add (float* dest, float amount, int num) noexcept
  480. {
  481. #if JUCE_USE_VDSP_FRAMEWORK
  482. vDSP_vsadd (dest, 1, &amount, dest, 1, (vDSP_Length) num);
  483. #else
  484. JUCE_PERFORM_VEC_OP_DEST (dest[i] += amount, Mode::add (d, amountToAdd), JUCE_LOAD_DEST,
  485. const Mode::ParallelType amountToAdd = Mode::load1 (amount);)
  486. #endif
  487. }
  488. void JUCE_CALLTYPE FloatVectorOperations::add (double* dest, double amount, int num) noexcept
  489. {
  490. JUCE_PERFORM_VEC_OP_DEST (dest[i] += amount, Mode::add (d, amountToAdd), JUCE_LOAD_DEST,
  491. const Mode::ParallelType amountToAdd = Mode::load1 (amount);)
  492. }
  493. void JUCE_CALLTYPE FloatVectorOperations::add (float* dest, const float* src, float amount, int num) noexcept
  494. {
  495. #if JUCE_USE_VDSP_FRAMEWORK
  496. vDSP_vsadd (osx108sdkCompatibilityCast (src), 1, &amount, dest, 1, (vDSP_Length) num);
  497. #else
  498. JUCE_PERFORM_VEC_OP_SRC_DEST (dest[i] = src[i] + amount, Mode::add (am, s),
  499. JUCE_LOAD_SRC, JUCE_INCREMENT_SRC_DEST,
  500. const Mode::ParallelType am = Mode::load1 (amount);)
  501. #endif
  502. }
  503. void JUCE_CALLTYPE FloatVectorOperations::add (double* dest, const double* src, double amount, int num) noexcept
  504. {
  505. #if JUCE_USE_VDSP_FRAMEWORK
  506. vDSP_vsaddD (osx108sdkCompatibilityCast (src), 1, &amount, dest, 1, (vDSP_Length) num);
  507. #else
  508. JUCE_PERFORM_VEC_OP_SRC_DEST (dest[i] = src[i] + amount, Mode::add (am, s),
  509. JUCE_LOAD_SRC, JUCE_INCREMENT_SRC_DEST,
  510. const Mode::ParallelType am = Mode::load1 (amount);)
  511. #endif
  512. }
  513. void JUCE_CALLTYPE FloatVectorOperations::add (float* dest, const float* src, int num) noexcept
  514. {
  515. #if JUCE_USE_VDSP_FRAMEWORK
  516. vDSP_vadd (src, 1, dest, 1, dest, 1, (vDSP_Length) num);
  517. #else
  518. JUCE_PERFORM_VEC_OP_SRC_DEST (dest[i] += src[i], Mode::add (d, s), JUCE_LOAD_SRC_DEST, JUCE_INCREMENT_SRC_DEST, )
  519. #endif
  520. }
  521. void JUCE_CALLTYPE FloatVectorOperations::add (double* dest, const double* src, int num) noexcept
  522. {
  523. #if JUCE_USE_VDSP_FRAMEWORK
  524. vDSP_vaddD (src, 1, dest, 1, dest, 1, (vDSP_Length) num);
  525. #else
  526. JUCE_PERFORM_VEC_OP_SRC_DEST (dest[i] += src[i], Mode::add (d, s), JUCE_LOAD_SRC_DEST, JUCE_INCREMENT_SRC_DEST, )
  527. #endif
  528. }
  529. void JUCE_CALLTYPE FloatVectorOperations::add (float* dest, const float* src1, const float* src2, int num) noexcept
  530. {
  531. #if JUCE_USE_VDSP_FRAMEWORK
  532. vDSP_vadd (src1, 1, src2, 1, dest, 1, (vDSP_Length) num);
  533. #else
  534. JUCE_PERFORM_VEC_OP_SRC1_SRC2_DEST (dest[i] = src1[i] + src2[i], Mode::add (s1, s2), JUCE_LOAD_SRC1_SRC2, JUCE_INCREMENT_SRC1_SRC2_DEST, )
  535. #endif
  536. }
  537. void JUCE_CALLTYPE FloatVectorOperations::add (double* dest, const double* src1, const double* src2, int num) noexcept
  538. {
  539. #if JUCE_USE_VDSP_FRAMEWORK
  540. vDSP_vaddD (src1, 1, src2, 1, dest, 1, (vDSP_Length) num);
  541. #else
  542. JUCE_PERFORM_VEC_OP_SRC1_SRC2_DEST (dest[i] = src1[i] + src2[i], Mode::add (s1, s2), JUCE_LOAD_SRC1_SRC2, JUCE_INCREMENT_SRC1_SRC2_DEST, )
  543. #endif
  544. }
  545. void JUCE_CALLTYPE FloatVectorOperations::subtract (float* dest, const float* src, int num) noexcept
  546. {
  547. #if JUCE_USE_VDSP_FRAMEWORK
  548. vDSP_vsub (src, 1, dest, 1, dest, 1, (vDSP_Length) num);
  549. #else
  550. JUCE_PERFORM_VEC_OP_SRC_DEST (dest[i] -= src[i], Mode::sub (d, s), JUCE_LOAD_SRC_DEST, JUCE_INCREMENT_SRC_DEST, )
  551. #endif
  552. }
  553. void JUCE_CALLTYPE FloatVectorOperations::subtract (double* dest, const double* src, int num) noexcept
  554. {
  555. #if JUCE_USE_VDSP_FRAMEWORK
  556. vDSP_vsubD (src, 1, dest, 1, dest, 1, (vDSP_Length) num);
  557. #else
  558. JUCE_PERFORM_VEC_OP_SRC_DEST (dest[i] -= src[i], Mode::sub (d, s), JUCE_LOAD_SRC_DEST, JUCE_INCREMENT_SRC_DEST, )
  559. #endif
  560. }
  561. void JUCE_CALLTYPE FloatVectorOperations::subtract (float* dest, const float* src1, const float* src2, int num) noexcept
  562. {
  563. #if JUCE_USE_VDSP_FRAMEWORK
  564. vDSP_vsub (src2, 1, src1, 1, dest, 1, (vDSP_Length) num);
  565. #else
  566. JUCE_PERFORM_VEC_OP_SRC1_SRC2_DEST (dest[i] = src1[i] - src2[i], Mode::sub (s1, s2), JUCE_LOAD_SRC1_SRC2, JUCE_INCREMENT_SRC1_SRC2_DEST, )
  567. #endif
  568. }
  569. void JUCE_CALLTYPE FloatVectorOperations::subtract (double* dest, const double* src1, const double* src2, int num) noexcept
  570. {
  571. #if JUCE_USE_VDSP_FRAMEWORK
  572. vDSP_vsubD (src2, 1, src1, 1, dest, 1, (vDSP_Length) num);
  573. #else
  574. JUCE_PERFORM_VEC_OP_SRC1_SRC2_DEST (dest[i] = src1[i] - src2[i], Mode::sub (s1, s2), JUCE_LOAD_SRC1_SRC2, JUCE_INCREMENT_SRC1_SRC2_DEST, )
  575. #endif
  576. }
  577. void JUCE_CALLTYPE FloatVectorOperations::addWithMultiply (float* dest, const float* src, float multiplier, int num) noexcept
  578. {
  579. #if JUCE_USE_VDSP_FRAMEWORK
  580. vDSP_vsma (src, 1, &multiplier, dest, 1, dest, 1, (vDSP_Length) num);
  581. #else
  582. JUCE_PERFORM_VEC_OP_SRC_DEST (dest[i] += src[i] * multiplier, Mode::add (d, Mode::mul (mult, s)),
  583. JUCE_LOAD_SRC_DEST, JUCE_INCREMENT_SRC_DEST,
  584. const Mode::ParallelType mult = Mode::load1 (multiplier);)
  585. #endif
  586. }
  587. void JUCE_CALLTYPE FloatVectorOperations::addWithMultiply (double* dest, const double* src, double multiplier, int num) noexcept
  588. {
  589. #if JUCE_USE_VDSP_FRAMEWORK
  590. vDSP_vsmaD (src, 1, &multiplier, dest, 1, dest, 1, (vDSP_Length) num);
  591. #else
  592. JUCE_PERFORM_VEC_OP_SRC_DEST (dest[i] += src[i] * multiplier, Mode::add (d, Mode::mul (mult, s)),
  593. JUCE_LOAD_SRC_DEST, JUCE_INCREMENT_SRC_DEST,
  594. const Mode::ParallelType mult = Mode::load1 (multiplier);)
  595. #endif
  596. }
  597. void JUCE_CALLTYPE FloatVectorOperations::addWithMultiply (float* dest, const float* src1, const float* src2, int num) noexcept
  598. {
  599. #if JUCE_USE_VDSP_FRAMEWORK
  600. vDSP_vma ((float*) src1, 1, (float*) src2, 1, dest, 1, dest, 1, (vDSP_Length) num);
  601. #else
  602. JUCE_PERFORM_VEC_OP_SRC1_SRC2_DEST_DEST (dest[i] += src1[i] * src2[i], Mode::add (d, Mode::mul (s1, s2)),
  603. JUCE_LOAD_SRC1_SRC2_DEST,
  604. JUCE_INCREMENT_SRC1_SRC2_DEST, )
  605. #endif
  606. }
  607. void JUCE_CALLTYPE FloatVectorOperations::addWithMultiply (double* dest, const double* src1, const double* src2, int num) noexcept
  608. {
  609. #if JUCE_USE_VDSP_FRAMEWORK
  610. vDSP_vmaD ((double*) src1, 1, (double*) src2, 1, dest, 1, dest, 1, (vDSP_Length) num);
  611. #else
  612. JUCE_PERFORM_VEC_OP_SRC1_SRC2_DEST_DEST (dest[i] += src1[i] * src2[i], Mode::add (d, Mode::mul (s1, s2)),
  613. JUCE_LOAD_SRC1_SRC2_DEST,
  614. JUCE_INCREMENT_SRC1_SRC2_DEST, )
  615. #endif
  616. }
  617. void JUCE_CALLTYPE FloatVectorOperations::subtractWithMultiply (float* dest, const float* src, float multiplier, int num) noexcept
  618. {
  619. JUCE_PERFORM_VEC_OP_SRC_DEST (dest[i] -= src[i] * multiplier, Mode::sub (d, Mode::mul (mult, s)),
  620. JUCE_LOAD_SRC_DEST, JUCE_INCREMENT_SRC_DEST,
  621. const Mode::ParallelType mult = Mode::load1 (multiplier);)
  622. }
  623. void JUCE_CALLTYPE FloatVectorOperations::subtractWithMultiply (double* dest, const double* src, double multiplier, int num) noexcept
  624. {
  625. JUCE_PERFORM_VEC_OP_SRC_DEST (dest[i] -= src[i] * multiplier, Mode::sub (d, Mode::mul (mult, s)),
  626. JUCE_LOAD_SRC_DEST, JUCE_INCREMENT_SRC_DEST,
  627. const Mode::ParallelType mult = Mode::load1 (multiplier);)
  628. }
  629. void JUCE_CALLTYPE FloatVectorOperations::subtractWithMultiply (float* dest, const float* src1, const float* src2, int num) noexcept
  630. {
  631. JUCE_PERFORM_VEC_OP_SRC1_SRC2_DEST_DEST (dest[i] -= src1[i] * src2[i], Mode::sub (d, Mode::mul (s1, s2)),
  632. JUCE_LOAD_SRC1_SRC2_DEST,
  633. JUCE_INCREMENT_SRC1_SRC2_DEST, )
  634. }
  635. void JUCE_CALLTYPE FloatVectorOperations::subtractWithMultiply (double* dest, const double* src1, const double* src2, int num) noexcept
  636. {
  637. JUCE_PERFORM_VEC_OP_SRC1_SRC2_DEST_DEST (dest[i] -= src1[i] * src2[i], Mode::sub (d, Mode::mul (s1, s2)),
  638. JUCE_LOAD_SRC1_SRC2_DEST,
  639. JUCE_INCREMENT_SRC1_SRC2_DEST, )
  640. }
  641. void JUCE_CALLTYPE FloatVectorOperations::multiply (float* dest, const float* src, int num) noexcept
  642. {
  643. #if JUCE_USE_VDSP_FRAMEWORK
  644. vDSP_vmul (src, 1, dest, 1, dest, 1, (vDSP_Length) num);
  645. #else
  646. JUCE_PERFORM_VEC_OP_SRC_DEST (dest[i] *= src[i], Mode::mul (d, s), JUCE_LOAD_SRC_DEST, JUCE_INCREMENT_SRC_DEST, )
  647. #endif
  648. }
  649. void JUCE_CALLTYPE FloatVectorOperations::multiply (double* dest, const double* src, int num) noexcept
  650. {
  651. #if JUCE_USE_VDSP_FRAMEWORK
  652. vDSP_vmulD (src, 1, dest, 1, dest, 1, (vDSP_Length) num);
  653. #else
  654. JUCE_PERFORM_VEC_OP_SRC_DEST (dest[i] *= src[i], Mode::mul (d, s), JUCE_LOAD_SRC_DEST, JUCE_INCREMENT_SRC_DEST, )
  655. #endif
  656. }
  657. void JUCE_CALLTYPE FloatVectorOperations::multiply (float* dest, const float* src1, const float* src2, int num) noexcept
  658. {
  659. #if JUCE_USE_VDSP_FRAMEWORK
  660. vDSP_vmul (src1, 1, src2, 1, dest, 1, (vDSP_Length) num);
  661. #else
  662. JUCE_PERFORM_VEC_OP_SRC1_SRC2_DEST (dest[i] = src1[i] * src2[i], Mode::mul (s1, s2), JUCE_LOAD_SRC1_SRC2, JUCE_INCREMENT_SRC1_SRC2_DEST, )
  663. #endif
  664. }
  665. void JUCE_CALLTYPE FloatVectorOperations::multiply (double* dest, const double* src1, const double* src2, int num) noexcept
  666. {
  667. #if JUCE_USE_VDSP_FRAMEWORK
  668. vDSP_vmulD (src1, 1, src2, 1, dest, 1, (vDSP_Length) num);
  669. #else
  670. JUCE_PERFORM_VEC_OP_SRC1_SRC2_DEST (dest[i] = src1[i] * src2[i], Mode::mul (s1, s2), JUCE_LOAD_SRC1_SRC2, JUCE_INCREMENT_SRC1_SRC2_DEST, )
  671. #endif
  672. }
  673. void JUCE_CALLTYPE FloatVectorOperations::multiply (float* dest, float multiplier, int num) noexcept
  674. {
  675. #if JUCE_USE_VDSP_FRAMEWORK
  676. vDSP_vsmul (dest, 1, &multiplier, dest, 1, (vDSP_Length) num);
  677. #else
  678. JUCE_PERFORM_VEC_OP_DEST (dest[i] *= multiplier, Mode::mul (d, mult), JUCE_LOAD_DEST,
  679. const Mode::ParallelType mult = Mode::load1 (multiplier);)
  680. #endif
  681. }
  682. void JUCE_CALLTYPE FloatVectorOperations::multiply (double* dest, double multiplier, int num) noexcept
  683. {
  684. #if JUCE_USE_VDSP_FRAMEWORK
  685. vDSP_vsmulD (dest, 1, &multiplier, dest, 1, (vDSP_Length) num);
  686. #else
  687. JUCE_PERFORM_VEC_OP_DEST (dest[i] *= multiplier, Mode::mul (d, mult), JUCE_LOAD_DEST,
  688. const Mode::ParallelType mult = Mode::load1 (multiplier);)
  689. #endif
  690. }
  691. void JUCE_CALLTYPE FloatVectorOperations::multiply (float* dest, const float* src, float multiplier, int num) noexcept
  692. {
  693. JUCE_PERFORM_VEC_OP_SRC_DEST (dest[i] = src[i] * multiplier, Mode::mul (mult, s),
  694. JUCE_LOAD_SRC, JUCE_INCREMENT_SRC_DEST,
  695. const Mode::ParallelType mult = Mode::load1 (multiplier);)
  696. }
  697. void JUCE_CALLTYPE FloatVectorOperations::multiply (double* dest, const double* src, double multiplier, int num) noexcept
  698. {
  699. JUCE_PERFORM_VEC_OP_SRC_DEST (dest[i] = src[i] * multiplier, Mode::mul (mult, s),
  700. JUCE_LOAD_SRC, JUCE_INCREMENT_SRC_DEST,
  701. const Mode::ParallelType mult = Mode::load1 (multiplier);)
  702. }
  703. void FloatVectorOperations::negate (float* dest, const float* src, int num) noexcept
  704. {
  705. #if JUCE_USE_VDSP_FRAMEWORK
  706. vDSP_vneg ((float*) src, 1, dest, 1, (vDSP_Length) num);
  707. #else
  708. copyWithMultiply (dest, src, -1.0f, num);
  709. #endif
  710. }
  711. void FloatVectorOperations::negate (double* dest, const double* src, int num) noexcept
  712. {
  713. #if JUCE_USE_VDSP_FRAMEWORK
  714. vDSP_vnegD ((double*) src, 1, dest, 1, (vDSP_Length) num);
  715. #else
  716. copyWithMultiply (dest, src, -1.0f, num);
  717. #endif
  718. }
  719. void FloatVectorOperations::abs (float* dest, const float* src, int num) noexcept
  720. {
  721. #if JUCE_USE_VDSP_FRAMEWORK
  722. vDSP_vabs ((float*) src, 1, dest, 1, (vDSP_Length) num);
  723. #else
  724. FloatVectorHelpers::signMask32 signMask;
  725. signMask.i = 0x7fffffffUL;
  726. JUCE_PERFORM_VEC_OP_SRC_DEST (dest[i] = fabsf (src[i]), Mode::bit_and (s, mask),
  727. JUCE_LOAD_SRC, JUCE_INCREMENT_SRC_DEST,
  728. const Mode::ParallelType mask = Mode::load1 (signMask.f);)
  729. ignoreUnused (signMask);
  730. #endif
  731. }
  732. void FloatVectorOperations::abs (double* dest, const double* src, int num) noexcept
  733. {
  734. #if JUCE_USE_VDSP_FRAMEWORK
  735. vDSP_vabsD ((double*) src, 1, dest, 1, (vDSP_Length) num);
  736. #else
  737. FloatVectorHelpers::signMask64 signMask;
  738. signMask.i = 0x7fffffffffffffffULL;
  739. JUCE_PERFORM_VEC_OP_SRC_DEST (dest[i] = fabs (src[i]), Mode::bit_and (s, mask),
  740. JUCE_LOAD_SRC, JUCE_INCREMENT_SRC_DEST,
  741. const Mode::ParallelType mask = Mode::load1 (signMask.d);)
  742. ignoreUnused (signMask);
  743. #endif
  744. }
  745. void JUCE_CALLTYPE FloatVectorOperations::convertFixedToFloat (float* dest, const int* src, float multiplier, int num) noexcept
  746. {
  747. #if JUCE_USE_ARM_NEON
  748. JUCE_PERFORM_VEC_OP_SRC_DEST (dest[i] = src[i] * multiplier,
  749. vmulq_n_f32 (vcvtq_f32_s32 (vld1q_s32 (src)), multiplier),
  750. JUCE_LOAD_NONE, JUCE_INCREMENT_SRC_DEST, )
  751. #else
  752. JUCE_PERFORM_VEC_OP_SRC_DEST (dest[i] = (float) src[i] * multiplier,
  753. Mode::mul (mult, _mm_cvtepi32_ps (_mm_loadu_si128 ((const __m128i*) src))),
  754. JUCE_LOAD_NONE, JUCE_INCREMENT_SRC_DEST,
  755. const Mode::ParallelType mult = Mode::load1 (multiplier);)
  756. #endif
  757. }
  758. void JUCE_CALLTYPE FloatVectorOperations::min (float* dest, const float* src, float comp, int num) noexcept
  759. {
  760. JUCE_PERFORM_VEC_OP_SRC_DEST (dest[i] = jmin (src[i], comp), Mode::min (s, cmp),
  761. JUCE_LOAD_SRC, JUCE_INCREMENT_SRC_DEST,
  762. const Mode::ParallelType cmp = Mode::load1 (comp);)
  763. }
  764. void JUCE_CALLTYPE FloatVectorOperations::min (double* dest, const double* src, double comp, int num) noexcept
  765. {
  766. JUCE_PERFORM_VEC_OP_SRC_DEST (dest[i] = jmin (src[i], comp), Mode::min (s, cmp),
  767. JUCE_LOAD_SRC, JUCE_INCREMENT_SRC_DEST,
  768. const Mode::ParallelType cmp = Mode::load1 (comp);)
  769. }
  770. void JUCE_CALLTYPE FloatVectorOperations::min (float* dest, const float* src1, const float* src2, int num) noexcept
  771. {
  772. #if JUCE_USE_VDSP_FRAMEWORK
  773. vDSP_vmin ((float*) src1, 1, (float*) src2, 1, dest, 1, (vDSP_Length) num);
  774. #else
  775. JUCE_PERFORM_VEC_OP_SRC1_SRC2_DEST (dest[i] = jmin (src1[i], src2[i]), Mode::min (s1, s2), JUCE_LOAD_SRC1_SRC2, JUCE_INCREMENT_SRC1_SRC2_DEST, )
  776. #endif
  777. }
  778. void JUCE_CALLTYPE FloatVectorOperations::min (double* dest, const double* src1, const double* src2, int num) noexcept
  779. {
  780. #if JUCE_USE_VDSP_FRAMEWORK
  781. vDSP_vminD ((double*) src1, 1, (double*) src2, 1, dest, 1, (vDSP_Length) num);
  782. #else
  783. JUCE_PERFORM_VEC_OP_SRC1_SRC2_DEST (dest[i] = jmin (src1[i], src2[i]), Mode::min (s1, s2), JUCE_LOAD_SRC1_SRC2, JUCE_INCREMENT_SRC1_SRC2_DEST, )
  784. #endif
  785. }
  786. void JUCE_CALLTYPE FloatVectorOperations::max (float* dest, const float* src, float comp, int num) noexcept
  787. {
  788. JUCE_PERFORM_VEC_OP_SRC_DEST (dest[i] = jmax (src[i], comp), Mode::max (s, cmp),
  789. JUCE_LOAD_SRC, JUCE_INCREMENT_SRC_DEST,
  790. const Mode::ParallelType cmp = Mode::load1 (comp);)
  791. }
  792. void JUCE_CALLTYPE FloatVectorOperations::max (double* dest, const double* src, double comp, int num) noexcept
  793. {
  794. JUCE_PERFORM_VEC_OP_SRC_DEST (dest[i] = jmax (src[i], comp), Mode::max (s, cmp),
  795. JUCE_LOAD_SRC, JUCE_INCREMENT_SRC_DEST,
  796. const Mode::ParallelType cmp = Mode::load1 (comp);)
  797. }
  798. void JUCE_CALLTYPE FloatVectorOperations::max (float* dest, const float* src1, const float* src2, int num) noexcept
  799. {
  800. #if JUCE_USE_VDSP_FRAMEWORK
  801. vDSP_vmax ((float*) src1, 1, (float*) src2, 1, dest, 1, (vDSP_Length) num);
  802. #else
  803. JUCE_PERFORM_VEC_OP_SRC1_SRC2_DEST (dest[i] = jmax (src1[i], src2[i]), Mode::max (s1, s2), JUCE_LOAD_SRC1_SRC2, JUCE_INCREMENT_SRC1_SRC2_DEST, )
  804. #endif
  805. }
  806. void JUCE_CALLTYPE FloatVectorOperations::max (double* dest, const double* src1, const double* src2, int num) noexcept
  807. {
  808. #if JUCE_USE_VDSP_FRAMEWORK
  809. vDSP_vmaxD ((double*) src1, 1, (double*) src2, 1, dest, 1, (vDSP_Length) num);
  810. #else
  811. JUCE_PERFORM_VEC_OP_SRC1_SRC2_DEST (dest[i] = jmax (src1[i], src2[i]), Mode::max (s1, s2), JUCE_LOAD_SRC1_SRC2, JUCE_INCREMENT_SRC1_SRC2_DEST, )
  812. #endif
  813. }
  814. void JUCE_CALLTYPE FloatVectorOperations::clip (float* dest, const float* src, float low, float high, int num) noexcept
  815. {
  816. jassert(high >= low);
  817. #if JUCE_USE_VDSP_FRAMEWORK
  818. vDSP_vclip ((float*) src, 1, &low, &high, dest, 1, (vDSP_Length) num);
  819. #else
  820. JUCE_PERFORM_VEC_OP_SRC_DEST (dest[i] = jmax (jmin (src[i], high), low), Mode::max (Mode::min (s, hi), lo),
  821. JUCE_LOAD_SRC, JUCE_INCREMENT_SRC_DEST,
  822. const Mode::ParallelType lo = Mode::load1 (low); const Mode::ParallelType hi = Mode::load1 (high);)
  823. #endif
  824. }
  825. void JUCE_CALLTYPE FloatVectorOperations::clip (double* dest, const double* src, double low, double high, int num) noexcept
  826. {
  827. jassert(high >= low);
  828. #if JUCE_USE_VDSP_FRAMEWORK
  829. vDSP_vclipD ((double*) src, 1, &low, &high, dest, 1, (vDSP_Length) num);
  830. #else
  831. JUCE_PERFORM_VEC_OP_SRC_DEST (dest[i] = jmax (jmin (src[i], high), low), Mode::max (Mode::min (s, hi), lo),
  832. JUCE_LOAD_SRC, JUCE_INCREMENT_SRC_DEST,
  833. const Mode::ParallelType lo = Mode::load1 (low); const Mode::ParallelType hi = Mode::load1 (high);)
  834. #endif
  835. }
  836. Range<float> JUCE_CALLTYPE FloatVectorOperations::findMinAndMax (const float* src, int num) noexcept
  837. {
  838. #if JUCE_USE_SSE_INTRINSICS || JUCE_USE_ARM_NEON
  839. return FloatVectorHelpers::MinMax<FloatVectorHelpers::BasicOps32>::findMinAndMax (src, num);
  840. #else
  841. return Range<float>::findMinAndMax (src, num);
  842. #endif
  843. }
  844. Range<double> JUCE_CALLTYPE FloatVectorOperations::findMinAndMax (const double* src, int num) noexcept
  845. {
  846. #if JUCE_USE_SSE_INTRINSICS || JUCE_USE_ARM_NEON
  847. return FloatVectorHelpers::MinMax<FloatVectorHelpers::BasicOps64>::findMinAndMax (src, num);
  848. #else
  849. return Range<double>::findMinAndMax (src, num);
  850. #endif
  851. }
  852. float JUCE_CALLTYPE FloatVectorOperations::findMinimum (const float* src, int num) noexcept
  853. {
  854. #if JUCE_USE_SSE_INTRINSICS || JUCE_USE_ARM_NEON
  855. return FloatVectorHelpers::MinMax<FloatVectorHelpers::BasicOps32>::findMinOrMax (src, num, true);
  856. #else
  857. return juce::findMinimum (src, num);
  858. #endif
  859. }
  860. double JUCE_CALLTYPE FloatVectorOperations::findMinimum (const double* src, int num) noexcept
  861. {
  862. #if JUCE_USE_SSE_INTRINSICS || JUCE_USE_ARM_NEON
  863. return FloatVectorHelpers::MinMax<FloatVectorHelpers::BasicOps64>::findMinOrMax (src, num, true);
  864. #else
  865. return juce::findMinimum (src, num);
  866. #endif
  867. }
  868. float JUCE_CALLTYPE FloatVectorOperations::findMaximum (const float* src, int num) noexcept
  869. {
  870. #if JUCE_USE_SSE_INTRINSICS || JUCE_USE_ARM_NEON
  871. return FloatVectorHelpers::MinMax<FloatVectorHelpers::BasicOps32>::findMinOrMax (src, num, false);
  872. #else
  873. return juce::findMaximum (src, num);
  874. #endif
  875. }
  876. double JUCE_CALLTYPE FloatVectorOperations::findMaximum (const double* src, int num) noexcept
  877. {
  878. #if JUCE_USE_SSE_INTRINSICS || JUCE_USE_ARM_NEON
  879. return FloatVectorHelpers::MinMax<FloatVectorHelpers::BasicOps64>::findMinOrMax (src, num, false);
  880. #else
  881. return juce::findMaximum (src, num);
  882. #endif
  883. }
  884. void JUCE_CALLTYPE FloatVectorOperations::enableFlushToZeroMode (bool shouldEnable) noexcept
  885. {
  886. #if JUCE_USE_SSE_INTRINSICS
  887. _MM_SET_FLUSH_ZERO_MODE (shouldEnable ? _MM_FLUSH_ZERO_ON : _MM_FLUSH_ZERO_OFF);
  888. #endif
  889. ignoreUnused (shouldEnable);
  890. }
  891. void JUCE_CALLTYPE FloatVectorOperations::disableDenormalisedNumberSupport() noexcept
  892. {
  893. #if JUCE_USE_SSE_INTRINSICS
  894. const unsigned int mxcsr = _mm_getcsr();
  895. _mm_setcsr (mxcsr | 0x8040); // add the DAZ and FZ bits
  896. #endif
  897. }
  898. //==============================================================================
  899. //==============================================================================
  900. #if JUCE_UNIT_TESTS
  901. class FloatVectorOperationsTests : public UnitTest
  902. {
  903. public:
  904. FloatVectorOperationsTests() : UnitTest ("FloatVectorOperations", "Audio") {}
  905. template <typename ValueType>
  906. struct TestRunner
  907. {
  908. static void runTest (UnitTest& u, Random random)
  909. {
  910. const int range = random.nextBool() ? 500 : 10;
  911. const int num = random.nextInt (range) + 1;
  912. HeapBlock<ValueType> buffer1 ((size_t) num + 16), buffer2 ((size_t) num + 16);
  913. HeapBlock<int> buffer3 ((size_t) num + 16);
  914. #if JUCE_ARM
  915. ValueType* const data1 = buffer1;
  916. ValueType* const data2 = buffer2;
  917. int* const int1 = buffer3;
  918. #else
  919. // These tests deliberately operate on misaligned memory and will be flagged up by
  920. // checks for undefined behavior!
  921. ValueType* const data1 = addBytesToPointer (buffer1.getData(), random.nextInt (16));
  922. ValueType* const data2 = addBytesToPointer (buffer2.getData(), random.nextInt (16));
  923. int* const int1 = addBytesToPointer (buffer3.getData(), random.nextInt (16));
  924. #endif
  925. fillRandomly (random, data1, num);
  926. fillRandomly (random, data2, num);
  927. Range<ValueType> minMax1 (FloatVectorOperations::findMinAndMax (data1, num));
  928. Range<ValueType> minMax2 (Range<ValueType>::findMinAndMax (data1, num));
  929. u.expect (minMax1 == minMax2);
  930. u.expect (valuesMatch (FloatVectorOperations::findMinimum (data1, num), juce::findMinimum (data1, num)));
  931. u.expect (valuesMatch (FloatVectorOperations::findMaximum (data1, num), juce::findMaximum (data1, num)));
  932. u.expect (valuesMatch (FloatVectorOperations::findMinimum (data2, num), juce::findMinimum (data2, num)));
  933. u.expect (valuesMatch (FloatVectorOperations::findMaximum (data2, num), juce::findMaximum (data2, num)));
  934. FloatVectorOperations::clear (data1, num);
  935. u.expect (areAllValuesEqual (data1, num, 0));
  936. FloatVectorOperations::fill (data1, (ValueType) 2, num);
  937. u.expect (areAllValuesEqual (data1, num, (ValueType) 2));
  938. FloatVectorOperations::add (data1, (ValueType) 2, num);
  939. u.expect (areAllValuesEqual (data1, num, (ValueType) 4));
  940. FloatVectorOperations::copy (data2, data1, num);
  941. u.expect (areAllValuesEqual (data2, num, (ValueType) 4));
  942. FloatVectorOperations::add (data2, data1, num);
  943. u.expect (areAllValuesEqual (data2, num, (ValueType) 8));
  944. FloatVectorOperations::copyWithMultiply (data2, data1, (ValueType) 4, num);
  945. u.expect (areAllValuesEqual (data2, num, (ValueType) 16));
  946. FloatVectorOperations::addWithMultiply (data2, data1, (ValueType) 4, num);
  947. u.expect (areAllValuesEqual (data2, num, (ValueType) 32));
  948. FloatVectorOperations::multiply (data1, (ValueType) 2, num);
  949. u.expect (areAllValuesEqual (data1, num, (ValueType) 8));
  950. FloatVectorOperations::multiply (data1, data2, num);
  951. u.expect (areAllValuesEqual (data1, num, (ValueType) 256));
  952. FloatVectorOperations::negate (data2, data1, num);
  953. u.expect (areAllValuesEqual (data2, num, (ValueType) -256));
  954. FloatVectorOperations::subtract (data1, data2, num);
  955. u.expect (areAllValuesEqual (data1, num, (ValueType) 512));
  956. FloatVectorOperations::abs (data1, data2, num);
  957. u.expect (areAllValuesEqual (data1, num, (ValueType) 256));
  958. FloatVectorOperations::abs (data2, data1, num);
  959. u.expect (areAllValuesEqual (data2, num, (ValueType) 256));
  960. fillRandomly (random, int1, num);
  961. doConversionTest (u, data1, data2, int1, num);
  962. FloatVectorOperations::fill (data1, (ValueType) 2, num);
  963. FloatVectorOperations::fill (data2, (ValueType) 3, num);
  964. FloatVectorOperations::addWithMultiply (data1, data1, data2, num);
  965. u.expect (areAllValuesEqual (data1, num, (ValueType) 8));
  966. }
  967. static void doConversionTest (UnitTest& u, float* data1, float* data2, int* const int1, int num)
  968. {
  969. FloatVectorOperations::convertFixedToFloat (data1, int1, 2.0f, num);
  970. convertFixed (data2, int1, 2.0f, num);
  971. u.expect (buffersMatch (data1, data2, num));
  972. }
  973. static void doConversionTest (UnitTest&, double*, double*, int*, int) {}
  974. static void fillRandomly (Random& random, ValueType* d, int num)
  975. {
  976. while (--num >= 0)
  977. *d++ = (ValueType) (random.nextDouble() * 1000.0);
  978. }
  979. static void fillRandomly (Random& random, int* d, int num)
  980. {
  981. while (--num >= 0)
  982. *d++ = random.nextInt();
  983. }
  984. static void convertFixed (float* d, const int* s, ValueType multiplier, int num)
  985. {
  986. while (--num >= 0)
  987. *d++ = (float) *s++ * multiplier;
  988. }
  989. static bool areAllValuesEqual (const ValueType* d, int num, ValueType target)
  990. {
  991. while (--num >= 0)
  992. if (*d++ != target)
  993. return false;
  994. return true;
  995. }
  996. static bool buffersMatch (const ValueType* d1, const ValueType* d2, int num)
  997. {
  998. while (--num >= 0)
  999. if (! valuesMatch (*d1++, *d2++))
  1000. return false;
  1001. return true;
  1002. }
  1003. static bool valuesMatch (ValueType v1, ValueType v2)
  1004. {
  1005. return std::abs (v1 - v2) < std::numeric_limits<ValueType>::epsilon();
  1006. }
  1007. };
  1008. void runTest() override
  1009. {
  1010. beginTest ("FloatVectorOperations");
  1011. for (int i = 1000; --i >= 0;)
  1012. {
  1013. TestRunner<float>::runTest (*this, getRandom());
  1014. TestRunner<double>::runTest (*this, getRandom());
  1015. }
  1016. }
  1017. };
  1018. static FloatVectorOperationsTests vectorOpTests;
  1019. #endif