The JUCE cross-platform C++ framework, with DISTRHO/KXStudio specific changes
You can not select more than 25 topics Topics must start with a letter or number, can include dashes ('-') and can be up to 35 characters long.

1169 lines
52KB

  1. /*
  2. ==============================================================================
  3. This file is part of the JUCE library.
  4. Copyright (c) 2017 - ROLI Ltd.
  5. JUCE is an open source library subject to commercial or open-source
  6. licensing.
  7. The code included in this file is provided under the terms of the ISC license
  8. http://www.isc.org/downloads/software-support-policy/isc-license. Permission
  9. To use, copy, modify, and/or distribute this software for any purpose with or
  10. without fee is hereby granted provided that the above copyright notice and
  11. this permission notice appear in all copies.
  12. JUCE IS PROVIDED "AS IS" WITHOUT ANY WARRANTY, AND ALL WARRANTIES, WHETHER
  13. EXPRESSED OR IMPLIED, INCLUDING MERCHANTABILITY AND FITNESS FOR PURPOSE, ARE
  14. DISCLAIMED.
  15. ==============================================================================
  16. */
  17. namespace FloatVectorHelpers
  18. {
  19. #define JUCE_INCREMENT_SRC_DEST dest += (16 / sizeof (*dest)); src += (16 / sizeof (*dest));
  20. #define JUCE_INCREMENT_SRC1_SRC2_DEST dest += (16 / sizeof (*dest)); src1 += (16 / sizeof (*dest)); src2 += (16 / sizeof (*dest));
  21. #define JUCE_INCREMENT_DEST dest += (16 / sizeof (*dest));
  22. #if JUCE_USE_SSE_INTRINSICS
  23. inline static bool isAligned (const void* p) noexcept
  24. {
  25. return (((pointer_sized_int) p) & 15) == 0;
  26. }
  27. struct BasicOps32
  28. {
  29. typedef float Type;
  30. typedef __m128 ParallelType;
  31. typedef __m128 IntegerType;
  32. enum { numParallel = 4 };
  33. // Integer and parallel types are the same for SSE. On neon they have different types
  34. static forcedinline IntegerType toint (ParallelType v) noexcept { return v; }
  35. static forcedinline ParallelType toflt (IntegerType v) noexcept { return v; }
  36. static forcedinline ParallelType load1 (Type v) noexcept { return _mm_load1_ps (&v); }
  37. static forcedinline ParallelType loadA (const Type* v) noexcept { return _mm_load_ps (v); }
  38. static forcedinline ParallelType loadU (const Type* v) noexcept { return _mm_loadu_ps (v); }
  39. static forcedinline void storeA (Type* dest, ParallelType a) noexcept { _mm_store_ps (dest, a); }
  40. static forcedinline void storeU (Type* dest, ParallelType a) noexcept { _mm_storeu_ps (dest, a); }
  41. static forcedinline ParallelType add (ParallelType a, ParallelType b) noexcept { return _mm_add_ps (a, b); }
  42. static forcedinline ParallelType sub (ParallelType a, ParallelType b) noexcept { return _mm_sub_ps (a, b); }
  43. static forcedinline ParallelType mul (ParallelType a, ParallelType b) noexcept { return _mm_mul_ps (a, b); }
  44. static forcedinline ParallelType max (ParallelType a, ParallelType b) noexcept { return _mm_max_ps (a, b); }
  45. static forcedinline ParallelType min (ParallelType a, ParallelType b) noexcept { return _mm_min_ps (a, b); }
  46. static forcedinline ParallelType bit_and (ParallelType a, ParallelType b) noexcept { return _mm_and_ps (a, b); }
  47. static forcedinline ParallelType bit_not (ParallelType a, ParallelType b) noexcept { return _mm_andnot_ps (a, b); }
  48. static forcedinline ParallelType bit_or (ParallelType a, ParallelType b) noexcept { return _mm_or_ps (a, b); }
  49. static forcedinline ParallelType bit_xor (ParallelType a, ParallelType b) noexcept { return _mm_xor_ps (a, b); }
  50. static forcedinline Type max (ParallelType a) noexcept { Type v[numParallel]; storeU (v, a); return jmax (v[0], v[1], v[2], v[3]); }
  51. static forcedinline Type min (ParallelType a) noexcept { Type v[numParallel]; storeU (v, a); return jmin (v[0], v[1], v[2], v[3]); }
  52. };
  53. struct BasicOps64
  54. {
  55. typedef double Type;
  56. typedef __m128d ParallelType;
  57. typedef __m128d IntegerType;
  58. enum { numParallel = 2 };
  59. // Integer and parallel types are the same for SSE. On neon they have different types
  60. static forcedinline IntegerType toint (ParallelType v) noexcept { return v; }
  61. static forcedinline ParallelType toflt (IntegerType v) noexcept { return v; }
  62. static forcedinline ParallelType load1 (Type v) noexcept { return _mm_load1_pd (&v); }
  63. static forcedinline ParallelType loadA (const Type* v) noexcept { return _mm_load_pd (v); }
  64. static forcedinline ParallelType loadU (const Type* v) noexcept { return _mm_loadu_pd (v); }
  65. static forcedinline void storeA (Type* dest, ParallelType a) noexcept { _mm_store_pd (dest, a); }
  66. static forcedinline void storeU (Type* dest, ParallelType a) noexcept { _mm_storeu_pd (dest, a); }
  67. static forcedinline ParallelType add (ParallelType a, ParallelType b) noexcept { return _mm_add_pd (a, b); }
  68. static forcedinline ParallelType sub (ParallelType a, ParallelType b) noexcept { return _mm_sub_pd (a, b); }
  69. static forcedinline ParallelType mul (ParallelType a, ParallelType b) noexcept { return _mm_mul_pd (a, b); }
  70. static forcedinline ParallelType max (ParallelType a, ParallelType b) noexcept { return _mm_max_pd (a, b); }
  71. static forcedinline ParallelType min (ParallelType a, ParallelType b) noexcept { return _mm_min_pd (a, b); }
  72. static forcedinline ParallelType bit_and (ParallelType a, ParallelType b) noexcept { return _mm_and_pd (a, b); }
  73. static forcedinline ParallelType bit_not (ParallelType a, ParallelType b) noexcept { return _mm_andnot_pd (a, b); }
  74. static forcedinline ParallelType bit_or (ParallelType a, ParallelType b) noexcept { return _mm_or_pd (a, b); }
  75. static forcedinline ParallelType bit_xor (ParallelType a, ParallelType b) noexcept { return _mm_xor_pd (a, b); }
  76. static forcedinline Type max (ParallelType a) noexcept { Type v[numParallel]; storeU (v, a); return jmax (v[0], v[1]); }
  77. static forcedinline Type min (ParallelType a) noexcept { Type v[numParallel]; storeU (v, a); return jmin (v[0], v[1]); }
  78. };
  79. #define JUCE_BEGIN_VEC_OP \
  80. typedef FloatVectorHelpers::ModeType<sizeof(*dest)>::Mode Mode; \
  81. { \
  82. const int numLongOps = num / Mode::numParallel;
  83. #define JUCE_FINISH_VEC_OP(normalOp) \
  84. num &= (Mode::numParallel - 1); \
  85. if (num == 0) return; \
  86. } \
  87. for (int i = 0; i < num; ++i) normalOp;
  88. #define JUCE_PERFORM_VEC_OP_DEST(normalOp, vecOp, locals, setupOp) \
  89. JUCE_BEGIN_VEC_OP \
  90. setupOp \
  91. if (FloatVectorHelpers::isAligned (dest)) JUCE_VEC_LOOP (vecOp, dummy, Mode::loadA, Mode::storeA, locals, JUCE_INCREMENT_DEST) \
  92. else JUCE_VEC_LOOP (vecOp, dummy, Mode::loadU, Mode::storeU, locals, JUCE_INCREMENT_DEST) \
  93. JUCE_FINISH_VEC_OP (normalOp)
  94. #define JUCE_PERFORM_VEC_OP_SRC_DEST(normalOp, vecOp, locals, increment, setupOp) \
  95. JUCE_BEGIN_VEC_OP \
  96. setupOp \
  97. if (FloatVectorHelpers::isAligned (dest)) \
  98. { \
  99. if (FloatVectorHelpers::isAligned (src)) JUCE_VEC_LOOP (vecOp, Mode::loadA, Mode::loadA, Mode::storeA, locals, increment) \
  100. else JUCE_VEC_LOOP (vecOp, Mode::loadU, Mode::loadA, Mode::storeA, locals, increment) \
  101. }\
  102. else \
  103. { \
  104. if (FloatVectorHelpers::isAligned (src)) JUCE_VEC_LOOP (vecOp, Mode::loadA, Mode::loadU, Mode::storeU, locals, increment) \
  105. else JUCE_VEC_LOOP (vecOp, Mode::loadU, Mode::loadU, Mode::storeU, locals, increment) \
  106. } \
  107. JUCE_FINISH_VEC_OP (normalOp)
  108. #define JUCE_PERFORM_VEC_OP_SRC1_SRC2_DEST(normalOp, vecOp, locals, increment, setupOp) \
  109. JUCE_BEGIN_VEC_OP \
  110. setupOp \
  111. if (FloatVectorHelpers::isAligned (dest)) \
  112. { \
  113. if (FloatVectorHelpers::isAligned (src1)) \
  114. { \
  115. if (FloatVectorHelpers::isAligned (src2)) JUCE_VEC_LOOP_TWO_SOURCES (vecOp, Mode::loadA, Mode::loadA, Mode::storeA, locals, increment) \
  116. else JUCE_VEC_LOOP_TWO_SOURCES (vecOp, Mode::loadA, Mode::loadU, Mode::storeA, locals, increment) \
  117. } \
  118. else \
  119. { \
  120. if (FloatVectorHelpers::isAligned (src2)) JUCE_VEC_LOOP_TWO_SOURCES (vecOp, Mode::loadU, Mode::loadA, Mode::storeA, locals, increment) \
  121. else JUCE_VEC_LOOP_TWO_SOURCES (vecOp, Mode::loadU, Mode::loadU, Mode::storeA, locals, increment) \
  122. } \
  123. } \
  124. else \
  125. { \
  126. if (FloatVectorHelpers::isAligned (src1)) \
  127. { \
  128. if (FloatVectorHelpers::isAligned (src2)) JUCE_VEC_LOOP_TWO_SOURCES (vecOp, Mode::loadA, Mode::loadA, Mode::storeU, locals, increment) \
  129. else JUCE_VEC_LOOP_TWO_SOURCES (vecOp, Mode::loadA, Mode::loadU, Mode::storeU, locals, increment) \
  130. } \
  131. else \
  132. { \
  133. if (FloatVectorHelpers::isAligned (src2)) JUCE_VEC_LOOP_TWO_SOURCES (vecOp, Mode::loadU, Mode::loadA, Mode::storeU, locals, increment) \
  134. else JUCE_VEC_LOOP_TWO_SOURCES (vecOp, Mode::loadU, Mode::loadU, Mode::storeU, locals, increment) \
  135. } \
  136. } \
  137. JUCE_FINISH_VEC_OP (normalOp)
  138. #define JUCE_PERFORM_VEC_OP_SRC1_SRC2_DEST_DEST(normalOp, vecOp, locals, increment, setupOp) \
  139. JUCE_BEGIN_VEC_OP \
  140. setupOp \
  141. if (FloatVectorHelpers::isAligned (dest)) \
  142. { \
  143. if (FloatVectorHelpers::isAligned (src1)) \
  144. { \
  145. if (FloatVectorHelpers::isAligned (src2)) JUCE_VEC_LOOP_TWO_SOURCES_WITH_DEST_LOAD (vecOp, Mode::loadA, Mode::loadA, Mode::loadA, Mode::storeA, locals, increment) \
  146. else JUCE_VEC_LOOP_TWO_SOURCES_WITH_DEST_LOAD (vecOp, Mode::loadA, Mode::loadU, Mode::loadA, Mode::storeA, locals, increment) \
  147. } \
  148. else \
  149. { \
  150. if (FloatVectorHelpers::isAligned (src2)) JUCE_VEC_LOOP_TWO_SOURCES_WITH_DEST_LOAD (vecOp, Mode::loadU, Mode::loadA, Mode::loadA, Mode::storeA, locals, increment) \
  151. else JUCE_VEC_LOOP_TWO_SOURCES_WITH_DEST_LOAD (vecOp, Mode::loadU, Mode::loadU, Mode::loadA, Mode::storeA, locals, increment) \
  152. } \
  153. } \
  154. else \
  155. { \
  156. if (FloatVectorHelpers::isAligned (src1)) \
  157. { \
  158. if (FloatVectorHelpers::isAligned (src2)) JUCE_VEC_LOOP_TWO_SOURCES_WITH_DEST_LOAD (vecOp, Mode::loadA, Mode::loadA, Mode::loadU, Mode::storeU, locals, increment) \
  159. else JUCE_VEC_LOOP_TWO_SOURCES_WITH_DEST_LOAD (vecOp, Mode::loadA, Mode::loadU, Mode::loadU, Mode::storeU, locals, increment) \
  160. } \
  161. else \
  162. { \
  163. if (FloatVectorHelpers::isAligned (src2)) JUCE_VEC_LOOP_TWO_SOURCES_WITH_DEST_LOAD (vecOp, Mode::loadU, Mode::loadA, Mode::loadU, Mode::storeU, locals, increment) \
  164. else JUCE_VEC_LOOP_TWO_SOURCES_WITH_DEST_LOAD (vecOp, Mode::loadU, Mode::loadU, Mode::loadU, Mode::storeU, locals, increment) \
  165. } \
  166. } \
  167. JUCE_FINISH_VEC_OP (normalOp)
  168. //==============================================================================
  169. #elif JUCE_USE_ARM_NEON
  170. struct BasicOps32
  171. {
  172. typedef float Type;
  173. typedef float32x4_t ParallelType;
  174. typedef uint32x4_t IntegerType;
  175. union signMaskUnion { ParallelType f; IntegerType i; };
  176. enum { numParallel = 4 };
  177. static forcedinline IntegerType toint (ParallelType v) noexcept { signMaskUnion u; u.f = v; return u.i; }
  178. static forcedinline ParallelType toflt (IntegerType v) noexcept { signMaskUnion u; u.i = v; return u.f; }
  179. static forcedinline ParallelType load1 (Type v) noexcept { return vld1q_dup_f32 (&v); }
  180. static forcedinline ParallelType loadA (const Type* v) noexcept { return vld1q_f32 (v); }
  181. static forcedinline ParallelType loadU (const Type* v) noexcept { return vld1q_f32 (v); }
  182. static forcedinline void storeA (Type* dest, ParallelType a) noexcept { vst1q_f32 (dest, a); }
  183. static forcedinline void storeU (Type* dest, ParallelType a) noexcept { vst1q_f32 (dest, a); }
  184. static forcedinline ParallelType add (ParallelType a, ParallelType b) noexcept { return vaddq_f32 (a, b); }
  185. static forcedinline ParallelType sub (ParallelType a, ParallelType b) noexcept { return vsubq_f32 (a, b); }
  186. static forcedinline ParallelType mul (ParallelType a, ParallelType b) noexcept { return vmulq_f32 (a, b); }
  187. static forcedinline ParallelType max (ParallelType a, ParallelType b) noexcept { return vmaxq_f32 (a, b); }
  188. static forcedinline ParallelType min (ParallelType a, ParallelType b) noexcept { return vminq_f32 (a, b); }
  189. static forcedinline ParallelType bit_and (ParallelType a, ParallelType b) noexcept { return toflt (vandq_u32 (toint (a), toint (b))); }
  190. static forcedinline ParallelType bit_not (ParallelType a, ParallelType b) noexcept { return toflt (vbicq_u32 (toint (a), toint (b))); }
  191. static forcedinline ParallelType bit_or (ParallelType a, ParallelType b) noexcept { return toflt (vorrq_u32 (toint (a), toint (b))); }
  192. static forcedinline ParallelType bit_xor (ParallelType a, ParallelType b) noexcept { return toflt (veorq_u32 (toint (a), toint (b))); }
  193. static forcedinline Type max (ParallelType a) noexcept { Type v[numParallel]; storeU (v, a); return jmax (v[0], v[1], v[2], v[3]); }
  194. static forcedinline Type min (ParallelType a) noexcept { Type v[numParallel]; storeU (v, a); return jmin (v[0], v[1], v[2], v[3]); }
  195. };
  196. struct BasicOps64
  197. {
  198. typedef double Type;
  199. typedef double ParallelType;
  200. typedef uint64 IntegerType;
  201. union signMaskUnion { ParallelType f; IntegerType i; };
  202. enum { numParallel = 1 };
  203. static forcedinline IntegerType toint (ParallelType v) noexcept { signMaskUnion u; u.f = v; return u.i; }
  204. static forcedinline ParallelType toflt (IntegerType v) noexcept { signMaskUnion u; u.i = v; return u.f; }
  205. static forcedinline ParallelType load1 (Type v) noexcept { return v; }
  206. static forcedinline ParallelType loadA (const Type* v) noexcept { return *v; }
  207. static forcedinline ParallelType loadU (const Type* v) noexcept { return *v; }
  208. static forcedinline void storeA (Type* dest, ParallelType a) noexcept { *dest = a; }
  209. static forcedinline void storeU (Type* dest, ParallelType a) noexcept { *dest = a; }
  210. static forcedinline ParallelType add (ParallelType a, ParallelType b) noexcept { return a + b; }
  211. static forcedinline ParallelType sub (ParallelType a, ParallelType b) noexcept { return a - b; }
  212. static forcedinline ParallelType mul (ParallelType a, ParallelType b) noexcept { return a * b; }
  213. static forcedinline ParallelType max (ParallelType a, ParallelType b) noexcept { return jmax (a, b); }
  214. static forcedinline ParallelType min (ParallelType a, ParallelType b) noexcept { return jmin (a, b); }
  215. static forcedinline ParallelType bit_and (ParallelType a, ParallelType b) noexcept { return toflt (toint (a) & toint (b)); }
  216. static forcedinline ParallelType bit_not (ParallelType a, ParallelType b) noexcept { return toflt ((~toint (a)) & toint (b)); }
  217. static forcedinline ParallelType bit_or (ParallelType a, ParallelType b) noexcept { return toflt (toint (a) | toint (b)); }
  218. static forcedinline ParallelType bit_xor (ParallelType a, ParallelType b) noexcept { return toflt (toint (a) ^ toint (b)); }
  219. static forcedinline Type max (ParallelType a) noexcept { return a; }
  220. static forcedinline Type min (ParallelType a) noexcept { return a; }
  221. };
  222. #define JUCE_BEGIN_VEC_OP \
  223. typedef FloatVectorHelpers::ModeType<sizeof(*dest)>::Mode Mode; \
  224. if (Mode::numParallel > 1) \
  225. { \
  226. const int numLongOps = num / Mode::numParallel;
  227. #define JUCE_FINISH_VEC_OP(normalOp) \
  228. num &= (Mode::numParallel - 1); \
  229. if (num == 0) return; \
  230. } \
  231. for (int i = 0; i < num; ++i) normalOp;
  232. #define JUCE_PERFORM_VEC_OP_DEST(normalOp, vecOp, locals, setupOp) \
  233. JUCE_BEGIN_VEC_OP \
  234. setupOp \
  235. JUCE_VEC_LOOP (vecOp, dummy, Mode::loadU, Mode::storeU, locals, JUCE_INCREMENT_DEST) \
  236. JUCE_FINISH_VEC_OP (normalOp)
  237. #define JUCE_PERFORM_VEC_OP_SRC_DEST(normalOp, vecOp, locals, increment, setupOp) \
  238. JUCE_BEGIN_VEC_OP \
  239. setupOp \
  240. JUCE_VEC_LOOP (vecOp, Mode::loadU, Mode::loadU, Mode::storeU, locals, increment) \
  241. JUCE_FINISH_VEC_OP (normalOp)
  242. #define JUCE_PERFORM_VEC_OP_SRC1_SRC2_DEST(normalOp, vecOp, locals, increment, setupOp) \
  243. JUCE_BEGIN_VEC_OP \
  244. setupOp \
  245. JUCE_VEC_LOOP_TWO_SOURCES (vecOp, Mode::loadU, Mode::loadU, Mode::storeU, locals, increment) \
  246. JUCE_FINISH_VEC_OP (normalOp)
  247. #define JUCE_PERFORM_VEC_OP_SRC1_SRC2_DEST_DEST(normalOp, vecOp, locals, increment, setupOp) \
  248. JUCE_BEGIN_VEC_OP \
  249. setupOp \
  250. JUCE_VEC_LOOP_TWO_SOURCES_WITH_DEST_LOAD (vecOp, Mode::loadU, Mode::loadU, Mode::loadU, Mode::storeU, locals, increment) \
  251. JUCE_FINISH_VEC_OP (normalOp)
  252. //==============================================================================
  253. #else
  254. #define JUCE_PERFORM_VEC_OP_DEST(normalOp, vecOp, locals, setupOp) \
  255. for (int i = 0; i < num; ++i) normalOp;
  256. #define JUCE_PERFORM_VEC_OP_SRC_DEST(normalOp, vecOp, locals, increment, setupOp) \
  257. for (int i = 0; i < num; ++i) normalOp;
  258. #define JUCE_PERFORM_VEC_OP_SRC1_SRC2_DEST(normalOp, vecOp, locals, increment, setupOp) \
  259. for (int i = 0; i < num; ++i) normalOp;
  260. #define JUCE_PERFORM_VEC_OP_SRC1_SRC2_DEST_DEST(normalOp, vecOp, locals, increment, setupOp) \
  261. for (int i = 0; i < num; ++i) normalOp;
  262. #endif
  263. //==============================================================================
  264. #define JUCE_VEC_LOOP(vecOp, srcLoad, dstLoad, dstStore, locals, increment) \
  265. for (int i = 0; i < numLongOps; ++i) \
  266. { \
  267. locals (srcLoad, dstLoad); \
  268. dstStore (dest, vecOp); \
  269. increment; \
  270. }
  271. #define JUCE_VEC_LOOP_TWO_SOURCES(vecOp, src1Load, src2Load, dstStore, locals, increment) \
  272. for (int i = 0; i < numLongOps; ++i) \
  273. { \
  274. locals (src1Load, src2Load); \
  275. dstStore (dest, vecOp); \
  276. increment; \
  277. }
  278. #define JUCE_VEC_LOOP_TWO_SOURCES_WITH_DEST_LOAD(vecOp, src1Load, src2Load, dstLoad, dstStore, locals, increment) \
  279. for (int i = 0; i < numLongOps; ++i) \
  280. { \
  281. locals (src1Load, src2Load, dstLoad); \
  282. dstStore (dest, vecOp); \
  283. increment; \
  284. }
  285. #define JUCE_LOAD_NONE(srcLoad, dstLoad)
  286. #define JUCE_LOAD_DEST(srcLoad, dstLoad) const Mode::ParallelType d = dstLoad (dest);
  287. #define JUCE_LOAD_SRC(srcLoad, dstLoad) const Mode::ParallelType s = srcLoad (src);
  288. #define JUCE_LOAD_SRC1_SRC2(src1Load, src2Load) const Mode::ParallelType s1 = src1Load (src1), s2 = src2Load (src2);
  289. #define JUCE_LOAD_SRC1_SRC2_DEST(src1Load, src2Load, dstLoad) const Mode::ParallelType d = dstLoad (dest), s1 = src1Load (src1), s2 = src2Load (src2);
  290. #define JUCE_LOAD_SRC_DEST(srcLoad, dstLoad) const Mode::ParallelType d = dstLoad (dest), s = srcLoad (src);
  291. union signMask32 { float f; uint32 i; };
  292. union signMask64 { double d; uint64 i; };
  293. #if JUCE_USE_SSE_INTRINSICS || JUCE_USE_ARM_NEON
  294. template<int typeSize> struct ModeType { typedef BasicOps32 Mode; };
  295. template<> struct ModeType<8> { typedef BasicOps64 Mode; };
  296. template <typename Mode>
  297. struct MinMax
  298. {
  299. typedef typename Mode::Type Type;
  300. typedef typename Mode::ParallelType ParallelType;
  301. static Type findMinOrMax (const Type* src, int num, const bool isMinimum) noexcept
  302. {
  303. int numLongOps = num / Mode::numParallel;
  304. if (numLongOps > 1)
  305. {
  306. ParallelType val;
  307. #if ! JUCE_USE_ARM_NEON
  308. if (isAligned (src))
  309. {
  310. val = Mode::loadA (src);
  311. if (isMinimum)
  312. {
  313. while (--numLongOps > 0)
  314. {
  315. src += Mode::numParallel;
  316. val = Mode::min (val, Mode::loadA (src));
  317. }
  318. }
  319. else
  320. {
  321. while (--numLongOps > 0)
  322. {
  323. src += Mode::numParallel;
  324. val = Mode::max (val, Mode::loadA (src));
  325. }
  326. }
  327. }
  328. else
  329. #endif
  330. {
  331. val = Mode::loadU (src);
  332. if (isMinimum)
  333. {
  334. while (--numLongOps > 0)
  335. {
  336. src += Mode::numParallel;
  337. val = Mode::min (val, Mode::loadU (src));
  338. }
  339. }
  340. else
  341. {
  342. while (--numLongOps > 0)
  343. {
  344. src += Mode::numParallel;
  345. val = Mode::max (val, Mode::loadU (src));
  346. }
  347. }
  348. }
  349. Type result = isMinimum ? Mode::min (val)
  350. : Mode::max (val);
  351. num &= (Mode::numParallel - 1);
  352. src += Mode::numParallel;
  353. for (int i = 0; i < num; ++i)
  354. result = isMinimum ? jmin (result, src[i])
  355. : jmax (result, src[i]);
  356. return result;
  357. }
  358. return isMinimum ? juce::findMinimum (src, num)
  359. : juce::findMaximum (src, num);
  360. }
  361. static Range<Type> findMinAndMax (const Type* src, int num) noexcept
  362. {
  363. int numLongOps = num / Mode::numParallel;
  364. if (numLongOps > 1)
  365. {
  366. ParallelType mn, mx;
  367. #if ! JUCE_USE_ARM_NEON
  368. if (isAligned (src))
  369. {
  370. mn = Mode::loadA (src);
  371. mx = mn;
  372. while (--numLongOps > 0)
  373. {
  374. src += Mode::numParallel;
  375. const ParallelType v = Mode::loadA (src);
  376. mn = Mode::min (mn, v);
  377. mx = Mode::max (mx, v);
  378. }
  379. }
  380. else
  381. #endif
  382. {
  383. mn = Mode::loadU (src);
  384. mx = mn;
  385. while (--numLongOps > 0)
  386. {
  387. src += Mode::numParallel;
  388. const ParallelType v = Mode::loadU (src);
  389. mn = Mode::min (mn, v);
  390. mx = Mode::max (mx, v);
  391. }
  392. }
  393. Range<Type> result (Mode::min (mn),
  394. Mode::max (mx));
  395. num &= (Mode::numParallel - 1);
  396. src += Mode::numParallel;
  397. for (int i = 0; i < num; ++i)
  398. result = result.getUnionWith (src[i]);
  399. return result;
  400. }
  401. return Range<Type>::findMinAndMax (src, num);
  402. }
  403. };
  404. #endif
  405. }
  406. //==============================================================================
  407. namespace
  408. {
  409. #if JUCE_USE_VDSP_FRAMEWORK
  410. // This casts away constness to account for slightly different vDSP function signatures
  411. // in OSX 10.8 SDK and below. Can be safely removed once those SDKs are obsolete.
  412. template <typename ValueType>
  413. ValueType* osx108sdkCompatibilityCast (const ValueType* arg) noexcept { return const_cast<ValueType*> (arg); }
  414. #endif
  415. }
  416. //==============================================================================
  417. void JUCE_CALLTYPE FloatVectorOperations::clear (float* dest, int num) noexcept
  418. {
  419. #if JUCE_USE_VDSP_FRAMEWORK
  420. vDSP_vclr (dest, 1, (size_t) num);
  421. #else
  422. zeromem (dest, (size_t) num * sizeof (float));
  423. #endif
  424. }
  425. void JUCE_CALLTYPE FloatVectorOperations::clear (double* dest, int num) noexcept
  426. {
  427. #if JUCE_USE_VDSP_FRAMEWORK
  428. vDSP_vclrD (dest, 1, (size_t) num);
  429. #else
  430. zeromem (dest, (size_t) num * sizeof (double));
  431. #endif
  432. }
  433. void JUCE_CALLTYPE FloatVectorOperations::fill (float* dest, float valueToFill, int num) noexcept
  434. {
  435. #if JUCE_USE_VDSP_FRAMEWORK
  436. vDSP_vfill (&valueToFill, dest, 1, (size_t) num);
  437. #else
  438. JUCE_PERFORM_VEC_OP_DEST (dest[i] = valueToFill, val, JUCE_LOAD_NONE,
  439. const Mode::ParallelType val = Mode::load1 (valueToFill);)
  440. #endif
  441. }
  442. void JUCE_CALLTYPE FloatVectorOperations::fill (double* dest, double valueToFill, int num) noexcept
  443. {
  444. #if JUCE_USE_VDSP_FRAMEWORK
  445. vDSP_vfillD (&valueToFill, dest, 1, (size_t) num);
  446. #else
  447. JUCE_PERFORM_VEC_OP_DEST (dest[i] = valueToFill, val, JUCE_LOAD_NONE,
  448. const Mode::ParallelType val = Mode::load1 (valueToFill);)
  449. #endif
  450. }
  451. void JUCE_CALLTYPE FloatVectorOperations::copy (float* dest, const float* src, int num) noexcept
  452. {
  453. memcpy (dest, src, (size_t) num * sizeof (float));
  454. }
  455. void JUCE_CALLTYPE FloatVectorOperations::copy (double* dest, const double* src, int num) noexcept
  456. {
  457. memcpy (dest, src, (size_t) num * sizeof (double));
  458. }
  459. void JUCE_CALLTYPE FloatVectorOperations::copyWithMultiply (float* dest, const float* src, float multiplier, int num) noexcept
  460. {
  461. #if JUCE_USE_VDSP_FRAMEWORK
  462. vDSP_vsmul (src, 1, &multiplier, dest, 1, (vDSP_Length) num);
  463. #else
  464. JUCE_PERFORM_VEC_OP_SRC_DEST (dest[i] = src[i] * multiplier, Mode::mul (mult, s),
  465. JUCE_LOAD_SRC, JUCE_INCREMENT_SRC_DEST,
  466. const Mode::ParallelType mult = Mode::load1 (multiplier);)
  467. #endif
  468. }
  469. void JUCE_CALLTYPE FloatVectorOperations::copyWithMultiply (double* dest, const double* src, double multiplier, int num) noexcept
  470. {
  471. #if JUCE_USE_VDSP_FRAMEWORK
  472. vDSP_vsmulD (src, 1, &multiplier, dest, 1, (vDSP_Length) num);
  473. #else
  474. JUCE_PERFORM_VEC_OP_SRC_DEST (dest[i] = src[i] * multiplier, Mode::mul (mult, s),
  475. JUCE_LOAD_SRC, JUCE_INCREMENT_SRC_DEST,
  476. const Mode::ParallelType mult = Mode::load1 (multiplier);)
  477. #endif
  478. }
  479. void JUCE_CALLTYPE FloatVectorOperations::add (float* dest, float amount, int num) noexcept
  480. {
  481. #if JUCE_USE_VDSP_FRAMEWORK
  482. vDSP_vsadd (dest, 1, &amount, dest, 1, (vDSP_Length) num);
  483. #else
  484. JUCE_PERFORM_VEC_OP_DEST (dest[i] += amount, Mode::add (d, amountToAdd), JUCE_LOAD_DEST,
  485. const Mode::ParallelType amountToAdd = Mode::load1 (amount);)
  486. #endif
  487. }
  488. void JUCE_CALLTYPE FloatVectorOperations::add (double* dest, double amount, int num) noexcept
  489. {
  490. JUCE_PERFORM_VEC_OP_DEST (dest[i] += amount, Mode::add (d, amountToAdd), JUCE_LOAD_DEST,
  491. const Mode::ParallelType amountToAdd = Mode::load1 (amount);)
  492. }
  493. void JUCE_CALLTYPE FloatVectorOperations::add (float* dest, const float* src, float amount, int num) noexcept
  494. {
  495. #if JUCE_USE_VDSP_FRAMEWORK
  496. vDSP_vsadd (osx108sdkCompatibilityCast (src), 1, &amount, dest, 1, (vDSP_Length) num);
  497. #else
  498. JUCE_PERFORM_VEC_OP_SRC_DEST (dest[i] = src[i] + amount, Mode::add (am, s),
  499. JUCE_LOAD_SRC, JUCE_INCREMENT_SRC_DEST,
  500. const Mode::ParallelType am = Mode::load1 (amount);)
  501. #endif
  502. }
  503. void JUCE_CALLTYPE FloatVectorOperations::add (double* dest, const double* src, double amount, int num) noexcept
  504. {
  505. #if JUCE_USE_VDSP_FRAMEWORK
  506. vDSP_vsaddD (osx108sdkCompatibilityCast (src), 1, &amount, dest, 1, (vDSP_Length) num);
  507. #else
  508. JUCE_PERFORM_VEC_OP_SRC_DEST (dest[i] = src[i] + amount, Mode::add (am, s),
  509. JUCE_LOAD_SRC, JUCE_INCREMENT_SRC_DEST,
  510. const Mode::ParallelType am = Mode::load1 (amount);)
  511. #endif
  512. }
  513. void JUCE_CALLTYPE FloatVectorOperations::add (float* dest, const float* src, int num) noexcept
  514. {
  515. #if JUCE_USE_VDSP_FRAMEWORK
  516. vDSP_vadd (src, 1, dest, 1, dest, 1, (vDSP_Length) num);
  517. #else
  518. JUCE_PERFORM_VEC_OP_SRC_DEST (dest[i] += src[i], Mode::add (d, s), JUCE_LOAD_SRC_DEST, JUCE_INCREMENT_SRC_DEST, )
  519. #endif
  520. }
  521. void JUCE_CALLTYPE FloatVectorOperations::add (double* dest, const double* src, int num) noexcept
  522. {
  523. #if JUCE_USE_VDSP_FRAMEWORK
  524. vDSP_vaddD (src, 1, dest, 1, dest, 1, (vDSP_Length) num);
  525. #else
  526. JUCE_PERFORM_VEC_OP_SRC_DEST (dest[i] += src[i], Mode::add (d, s), JUCE_LOAD_SRC_DEST, JUCE_INCREMENT_SRC_DEST, )
  527. #endif
  528. }
  529. void JUCE_CALLTYPE FloatVectorOperations::add (float* dest, const float* src1, const float* src2, int num) noexcept
  530. {
  531. #if JUCE_USE_VDSP_FRAMEWORK
  532. vDSP_vadd (src1, 1, src2, 1, dest, 1, (vDSP_Length) num);
  533. #else
  534. JUCE_PERFORM_VEC_OP_SRC1_SRC2_DEST (dest[i] = src1[i] + src2[i], Mode::add (s1, s2), JUCE_LOAD_SRC1_SRC2, JUCE_INCREMENT_SRC1_SRC2_DEST, )
  535. #endif
  536. }
  537. void JUCE_CALLTYPE FloatVectorOperations::add (double* dest, const double* src1, const double* src2, int num) noexcept
  538. {
  539. #if JUCE_USE_VDSP_FRAMEWORK
  540. vDSP_vaddD (src1, 1, src2, 1, dest, 1, (vDSP_Length) num);
  541. #else
  542. JUCE_PERFORM_VEC_OP_SRC1_SRC2_DEST (dest[i] = src1[i] + src2[i], Mode::add (s1, s2), JUCE_LOAD_SRC1_SRC2, JUCE_INCREMENT_SRC1_SRC2_DEST, )
  543. #endif
  544. }
  545. void JUCE_CALLTYPE FloatVectorOperations::subtract (float* dest, const float* src, int num) noexcept
  546. {
  547. #if JUCE_USE_VDSP_FRAMEWORK
  548. vDSP_vsub (src, 1, dest, 1, dest, 1, (vDSP_Length) num);
  549. #else
  550. JUCE_PERFORM_VEC_OP_SRC_DEST (dest[i] -= src[i], Mode::sub (d, s), JUCE_LOAD_SRC_DEST, JUCE_INCREMENT_SRC_DEST, )
  551. #endif
  552. }
  553. void JUCE_CALLTYPE FloatVectorOperations::subtract (double* dest, const double* src, int num) noexcept
  554. {
  555. #if JUCE_USE_VDSP_FRAMEWORK
  556. vDSP_vsubD (src, 1, dest, 1, dest, 1, (vDSP_Length) num);
  557. #else
  558. JUCE_PERFORM_VEC_OP_SRC_DEST (dest[i] -= src[i], Mode::sub (d, s), JUCE_LOAD_SRC_DEST, JUCE_INCREMENT_SRC_DEST, )
  559. #endif
  560. }
  561. void JUCE_CALLTYPE FloatVectorOperations::subtract (float* dest, const float* src1, const float* src2, int num) noexcept
  562. {
  563. #if JUCE_USE_VDSP_FRAMEWORK
  564. vDSP_vsub (src2, 1, src1, 1, dest, 1, (vDSP_Length) num);
  565. #else
  566. JUCE_PERFORM_VEC_OP_SRC1_SRC2_DEST (dest[i] = src1[i] - src2[i], Mode::sub (s1, s2), JUCE_LOAD_SRC1_SRC2, JUCE_INCREMENT_SRC1_SRC2_DEST, )
  567. #endif
  568. }
  569. void JUCE_CALLTYPE FloatVectorOperations::subtract (double* dest, const double* src1, const double* src2, int num) noexcept
  570. {
  571. #if JUCE_USE_VDSP_FRAMEWORK
  572. vDSP_vsubD (src2, 1, src1, 1, dest, 1, (vDSP_Length) num);
  573. #else
  574. JUCE_PERFORM_VEC_OP_SRC1_SRC2_DEST (dest[i] = src1[i] - src2[i], Mode::sub (s1, s2), JUCE_LOAD_SRC1_SRC2, JUCE_INCREMENT_SRC1_SRC2_DEST, )
  575. #endif
  576. }
  577. void JUCE_CALLTYPE FloatVectorOperations::addWithMultiply (float* dest, const float* src, float multiplier, int num) noexcept
  578. {
  579. #if JUCE_USE_VDSP_FRAMEWORK
  580. vDSP_vsma (src, 1, &multiplier, dest, 1, dest, 1, (vDSP_Length) num);
  581. #else
  582. JUCE_PERFORM_VEC_OP_SRC_DEST (dest[i] += src[i] * multiplier, Mode::add (d, Mode::mul (mult, s)),
  583. JUCE_LOAD_SRC_DEST, JUCE_INCREMENT_SRC_DEST,
  584. const Mode::ParallelType mult = Mode::load1 (multiplier);)
  585. #endif
  586. }
  587. void JUCE_CALLTYPE FloatVectorOperations::addWithMultiply (double* dest, const double* src, double multiplier, int num) noexcept
  588. {
  589. JUCE_PERFORM_VEC_OP_SRC_DEST (dest[i] += src[i] * multiplier, Mode::add (d, Mode::mul (mult, s)),
  590. JUCE_LOAD_SRC_DEST, JUCE_INCREMENT_SRC_DEST,
  591. const Mode::ParallelType mult = Mode::load1 (multiplier);)
  592. }
  593. void JUCE_CALLTYPE FloatVectorOperations::addWithMultiply (float* dest, const float* src1, const float* src2, int num) noexcept
  594. {
  595. #if JUCE_USE_VDSP_FRAMEWORK
  596. vDSP_vma ((float*) src1, 1, (float*) src2, 1, dest, 1, dest, 1, (vDSP_Length) num);
  597. #else
  598. JUCE_PERFORM_VEC_OP_SRC1_SRC2_DEST_DEST (dest[i] += src1[i] * src2[i], Mode::add (d, Mode::mul (s1, s2)),
  599. JUCE_LOAD_SRC1_SRC2_DEST,
  600. JUCE_INCREMENT_SRC1_SRC2_DEST, )
  601. #endif
  602. }
  603. void JUCE_CALLTYPE FloatVectorOperations::addWithMultiply (double* dest, const double* src1, const double* src2, int num) noexcept
  604. {
  605. #if JUCE_USE_VDSP_FRAMEWORK
  606. vDSP_vmaD ((double*) src1, 1, (double*) src2, 1, dest, 1, dest, 1, (vDSP_Length) num);
  607. #else
  608. JUCE_PERFORM_VEC_OP_SRC1_SRC2_DEST_DEST (dest[i] += src1[i] * src2[i], Mode::add (d, Mode::mul (s1, s2)),
  609. JUCE_LOAD_SRC1_SRC2_DEST,
  610. JUCE_INCREMENT_SRC1_SRC2_DEST, )
  611. #endif
  612. }
  613. void JUCE_CALLTYPE FloatVectorOperations::multiply (float* dest, const float* src, int num) noexcept
  614. {
  615. #if JUCE_USE_VDSP_FRAMEWORK
  616. vDSP_vmul (src, 1, dest, 1, dest, 1, (vDSP_Length) num);
  617. #else
  618. JUCE_PERFORM_VEC_OP_SRC_DEST (dest[i] *= src[i], Mode::mul (d, s), JUCE_LOAD_SRC_DEST, JUCE_INCREMENT_SRC_DEST, )
  619. #endif
  620. }
  621. void JUCE_CALLTYPE FloatVectorOperations::multiply (double* dest, const double* src, int num) noexcept
  622. {
  623. #if JUCE_USE_VDSP_FRAMEWORK
  624. vDSP_vmulD (src, 1, dest, 1, dest, 1, (vDSP_Length) num);
  625. #else
  626. JUCE_PERFORM_VEC_OP_SRC_DEST (dest[i] *= src[i], Mode::mul (d, s), JUCE_LOAD_SRC_DEST, JUCE_INCREMENT_SRC_DEST, )
  627. #endif
  628. }
  629. void JUCE_CALLTYPE FloatVectorOperations::multiply (float* dest, const float* src1, const float* src2, int num) noexcept
  630. {
  631. #if JUCE_USE_VDSP_FRAMEWORK
  632. vDSP_vmul (src1, 1, src2, 1, dest, 1, (vDSP_Length) num);
  633. #else
  634. JUCE_PERFORM_VEC_OP_SRC1_SRC2_DEST (dest[i] = src1[i] * src2[i], Mode::mul (s1, s2), JUCE_LOAD_SRC1_SRC2, JUCE_INCREMENT_SRC1_SRC2_DEST, )
  635. #endif
  636. }
  637. void JUCE_CALLTYPE FloatVectorOperations::multiply (double* dest, const double* src1, const double* src2, int num) noexcept
  638. {
  639. #if JUCE_USE_VDSP_FRAMEWORK
  640. vDSP_vmulD (src1, 1, src2, 1, dest, 1, (vDSP_Length) num);
  641. #else
  642. JUCE_PERFORM_VEC_OP_SRC1_SRC2_DEST (dest[i] = src1[i] * src2[i], Mode::mul (s1, s2), JUCE_LOAD_SRC1_SRC2, JUCE_INCREMENT_SRC1_SRC2_DEST, )
  643. #endif
  644. }
  645. void JUCE_CALLTYPE FloatVectorOperations::multiply (float* dest, float multiplier, int num) noexcept
  646. {
  647. #if JUCE_USE_VDSP_FRAMEWORK
  648. vDSP_vsmul (dest, 1, &multiplier, dest, 1, (vDSP_Length) num);
  649. #else
  650. JUCE_PERFORM_VEC_OP_DEST (dest[i] *= multiplier, Mode::mul (d, mult), JUCE_LOAD_DEST,
  651. const Mode::ParallelType mult = Mode::load1 (multiplier);)
  652. #endif
  653. }
  654. void JUCE_CALLTYPE FloatVectorOperations::multiply (double* dest, double multiplier, int num) noexcept
  655. {
  656. #if JUCE_USE_VDSP_FRAMEWORK
  657. vDSP_vsmulD (dest, 1, &multiplier, dest, 1, (vDSP_Length) num);
  658. #else
  659. JUCE_PERFORM_VEC_OP_DEST (dest[i] *= multiplier, Mode::mul (d, mult), JUCE_LOAD_DEST,
  660. const Mode::ParallelType mult = Mode::load1 (multiplier);)
  661. #endif
  662. }
  663. void JUCE_CALLTYPE FloatVectorOperations::multiply (float* dest, const float* src, float multiplier, int num) noexcept
  664. {
  665. JUCE_PERFORM_VEC_OP_SRC_DEST (dest[i] = src[i] * multiplier, Mode::mul (mult, s),
  666. JUCE_LOAD_SRC, JUCE_INCREMENT_SRC_DEST,
  667. const Mode::ParallelType mult = Mode::load1 (multiplier);)
  668. }
  669. void JUCE_CALLTYPE FloatVectorOperations::multiply (double* dest, const double* src, double multiplier, int num) noexcept
  670. {
  671. JUCE_PERFORM_VEC_OP_SRC_DEST (dest[i] = src[i] * multiplier, Mode::mul (mult, s),
  672. JUCE_LOAD_SRC, JUCE_INCREMENT_SRC_DEST,
  673. const Mode::ParallelType mult = Mode::load1 (multiplier);)
  674. }
  675. void FloatVectorOperations::negate (float* dest, const float* src, int num) noexcept
  676. {
  677. #if JUCE_USE_VDSP_FRAMEWORK
  678. vDSP_vneg ((float*) src, 1, dest, 1, (vDSP_Length) num);
  679. #else
  680. copyWithMultiply (dest, src, -1.0f, num);
  681. #endif
  682. }
  683. void FloatVectorOperations::negate (double* dest, const double* src, int num) noexcept
  684. {
  685. #if JUCE_USE_VDSP_FRAMEWORK
  686. vDSP_vnegD ((double*) src, 1, dest, 1, (vDSP_Length) num);
  687. #else
  688. copyWithMultiply (dest, src, -1.0f, num);
  689. #endif
  690. }
  691. void FloatVectorOperations::abs (float* dest, const float* src, int num) noexcept
  692. {
  693. #if JUCE_USE_VDSP_FRAMEWORK
  694. vDSP_vabs ((float*) src, 1, dest, 1, (vDSP_Length) num);
  695. #else
  696. FloatVectorHelpers::signMask32 signMask;
  697. signMask.i = 0x7fffffffUL;
  698. JUCE_PERFORM_VEC_OP_SRC_DEST (dest[i] = fabsf (src[i]), Mode::bit_and (s, mask),
  699. JUCE_LOAD_SRC, JUCE_INCREMENT_SRC_DEST,
  700. const Mode::ParallelType mask = Mode::load1 (signMask.f);)
  701. ignoreUnused (signMask);
  702. #endif
  703. }
  704. void FloatVectorOperations::abs (double* dest, const double* src, int num) noexcept
  705. {
  706. #if JUCE_USE_VDSP_FRAMEWORK
  707. vDSP_vabsD ((double*) src, 1, dest, 1, (vDSP_Length) num);
  708. #else
  709. FloatVectorHelpers::signMask64 signMask;
  710. signMask.i = 0x7fffffffffffffffULL;
  711. JUCE_PERFORM_VEC_OP_SRC_DEST (dest[i] = fabs (src[i]), Mode::bit_and (s, mask),
  712. JUCE_LOAD_SRC, JUCE_INCREMENT_SRC_DEST,
  713. const Mode::ParallelType mask = Mode::load1 (signMask.d);)
  714. ignoreUnused (signMask);
  715. #endif
  716. }
  717. void JUCE_CALLTYPE FloatVectorOperations::convertFixedToFloat (float* dest, const int* src, float multiplier, int num) noexcept
  718. {
  719. #if JUCE_USE_ARM_NEON
  720. JUCE_PERFORM_VEC_OP_SRC_DEST (dest[i] = src[i] * multiplier,
  721. vmulq_n_f32 (vcvtq_f32_s32 (vld1q_s32 (src)), multiplier),
  722. JUCE_LOAD_NONE, JUCE_INCREMENT_SRC_DEST, )
  723. #else
  724. JUCE_PERFORM_VEC_OP_SRC_DEST (dest[i] = src[i] * multiplier,
  725. Mode::mul (mult, _mm_cvtepi32_ps (_mm_loadu_si128 ((const __m128i*) src))),
  726. JUCE_LOAD_NONE, JUCE_INCREMENT_SRC_DEST,
  727. const Mode::ParallelType mult = Mode::load1 (multiplier);)
  728. #endif
  729. }
  730. void JUCE_CALLTYPE FloatVectorOperations::min (float* dest, const float* src, float comp, int num) noexcept
  731. {
  732. JUCE_PERFORM_VEC_OP_SRC_DEST (dest[i] = jmin (src[i], comp), Mode::min (s, cmp),
  733. JUCE_LOAD_SRC, JUCE_INCREMENT_SRC_DEST,
  734. const Mode::ParallelType cmp = Mode::load1 (comp);)
  735. }
  736. void JUCE_CALLTYPE FloatVectorOperations::min (double* dest, const double* src, double comp, int num) noexcept
  737. {
  738. JUCE_PERFORM_VEC_OP_SRC_DEST (dest[i] = jmin (src[i], comp), Mode::min (s, cmp),
  739. JUCE_LOAD_SRC, JUCE_INCREMENT_SRC_DEST,
  740. const Mode::ParallelType cmp = Mode::load1 (comp);)
  741. }
  742. void JUCE_CALLTYPE FloatVectorOperations::min (float* dest, const float* src1, const float* src2, int num) noexcept
  743. {
  744. #if JUCE_USE_VDSP_FRAMEWORK
  745. vDSP_vmin ((float*) src1, 1, (float*) src2, 1, dest, 1, (vDSP_Length) num);
  746. #else
  747. JUCE_PERFORM_VEC_OP_SRC1_SRC2_DEST (dest[i] = jmin (src1[i], src2[i]), Mode::min (s1, s2), JUCE_LOAD_SRC1_SRC2, JUCE_INCREMENT_SRC1_SRC2_DEST, )
  748. #endif
  749. }
  750. void JUCE_CALLTYPE FloatVectorOperations::min (double* dest, const double* src1, const double* src2, int num) noexcept
  751. {
  752. #if JUCE_USE_VDSP_FRAMEWORK
  753. vDSP_vminD ((double*) src1, 1, (double*) src2, 1, dest, 1, (vDSP_Length) num);
  754. #else
  755. JUCE_PERFORM_VEC_OP_SRC1_SRC2_DEST (dest[i] = jmin (src1[i], src2[i]), Mode::min (s1, s2), JUCE_LOAD_SRC1_SRC2, JUCE_INCREMENT_SRC1_SRC2_DEST, )
  756. #endif
  757. }
  758. void JUCE_CALLTYPE FloatVectorOperations::max (float* dest, const float* src, float comp, int num) noexcept
  759. {
  760. JUCE_PERFORM_VEC_OP_SRC_DEST (dest[i] = jmax (src[i], comp), Mode::max (s, cmp),
  761. JUCE_LOAD_SRC, JUCE_INCREMENT_SRC_DEST,
  762. const Mode::ParallelType cmp = Mode::load1 (comp);)
  763. }
  764. void JUCE_CALLTYPE FloatVectorOperations::max (double* dest, const double* src, double comp, int num) noexcept
  765. {
  766. JUCE_PERFORM_VEC_OP_SRC_DEST (dest[i] = jmax (src[i], comp), Mode::max (s, cmp),
  767. JUCE_LOAD_SRC, JUCE_INCREMENT_SRC_DEST,
  768. const Mode::ParallelType cmp = Mode::load1 (comp);)
  769. }
  770. void JUCE_CALLTYPE FloatVectorOperations::max (float* dest, const float* src1, const float* src2, int num) noexcept
  771. {
  772. #if JUCE_USE_VDSP_FRAMEWORK
  773. vDSP_vmax ((float*) src1, 1, (float*) src2, 1, dest, 1, (vDSP_Length) num);
  774. #else
  775. JUCE_PERFORM_VEC_OP_SRC1_SRC2_DEST (dest[i] = jmax (src1[i], src2[i]), Mode::max (s1, s2), JUCE_LOAD_SRC1_SRC2, JUCE_INCREMENT_SRC1_SRC2_DEST, )
  776. #endif
  777. }
  778. void JUCE_CALLTYPE FloatVectorOperations::max (double* dest, const double* src1, const double* src2, int num) noexcept
  779. {
  780. #if JUCE_USE_VDSP_FRAMEWORK
  781. vDSP_vmaxD ((double*) src1, 1, (double*) src2, 1, dest, 1, (vDSP_Length) num);
  782. #else
  783. JUCE_PERFORM_VEC_OP_SRC1_SRC2_DEST (dest[i] = jmax (src1[i], src2[i]), Mode::max (s1, s2), JUCE_LOAD_SRC1_SRC2, JUCE_INCREMENT_SRC1_SRC2_DEST, )
  784. #endif
  785. }
  786. void JUCE_CALLTYPE FloatVectorOperations::clip (float* dest, const float* src, float low, float high, int num) noexcept
  787. {
  788. jassert(high >= low);
  789. #if JUCE_USE_VDSP_FRAMEWORK
  790. vDSP_vclip ((float*) src, 1, &low, &high, dest, 1, (vDSP_Length) num);
  791. #else
  792. JUCE_PERFORM_VEC_OP_SRC_DEST (dest[i] = jmax (jmin (src[i], high), low), Mode::max (Mode::min (s, hi), lo),
  793. JUCE_LOAD_SRC, JUCE_INCREMENT_SRC_DEST,
  794. const Mode::ParallelType lo = Mode::load1 (low); const Mode::ParallelType hi = Mode::load1 (high);)
  795. #endif
  796. }
  797. void JUCE_CALLTYPE FloatVectorOperations::clip (double* dest, const double* src, double low, double high, int num) noexcept
  798. {
  799. jassert(high >= low);
  800. #if JUCE_USE_VDSP_FRAMEWORK
  801. vDSP_vclipD ((double*) src, 1, &low, &high, dest, 1, (vDSP_Length) num);
  802. #else
  803. JUCE_PERFORM_VEC_OP_SRC_DEST (dest[i] = jmax (jmin (src[i], high), low), Mode::max (Mode::min (s, hi), lo),
  804. JUCE_LOAD_SRC, JUCE_INCREMENT_SRC_DEST,
  805. const Mode::ParallelType lo = Mode::load1 (low); const Mode::ParallelType hi = Mode::load1 (high);)
  806. #endif
  807. }
  808. Range<float> JUCE_CALLTYPE FloatVectorOperations::findMinAndMax (const float* src, int num) noexcept
  809. {
  810. #if JUCE_USE_SSE_INTRINSICS || JUCE_USE_ARM_NEON
  811. return FloatVectorHelpers::MinMax<FloatVectorHelpers::BasicOps32>::findMinAndMax (src, num);
  812. #else
  813. return Range<float>::findMinAndMax (src, num);
  814. #endif
  815. }
  816. Range<double> JUCE_CALLTYPE FloatVectorOperations::findMinAndMax (const double* src, int num) noexcept
  817. {
  818. #if JUCE_USE_SSE_INTRINSICS || JUCE_USE_ARM_NEON
  819. return FloatVectorHelpers::MinMax<FloatVectorHelpers::BasicOps64>::findMinAndMax (src, num);
  820. #else
  821. return Range<double>::findMinAndMax (src, num);
  822. #endif
  823. }
  824. float JUCE_CALLTYPE FloatVectorOperations::findMinimum (const float* src, int num) noexcept
  825. {
  826. #if JUCE_USE_SSE_INTRINSICS || JUCE_USE_ARM_NEON
  827. return FloatVectorHelpers::MinMax<FloatVectorHelpers::BasicOps32>::findMinOrMax (src, num, true);
  828. #else
  829. return juce::findMinimum (src, num);
  830. #endif
  831. }
  832. double JUCE_CALLTYPE FloatVectorOperations::findMinimum (const double* src, int num) noexcept
  833. {
  834. #if JUCE_USE_SSE_INTRINSICS || JUCE_USE_ARM_NEON
  835. return FloatVectorHelpers::MinMax<FloatVectorHelpers::BasicOps64>::findMinOrMax (src, num, true);
  836. #else
  837. return juce::findMinimum (src, num);
  838. #endif
  839. }
  840. float JUCE_CALLTYPE FloatVectorOperations::findMaximum (const float* src, int num) noexcept
  841. {
  842. #if JUCE_USE_SSE_INTRINSICS || JUCE_USE_ARM_NEON
  843. return FloatVectorHelpers::MinMax<FloatVectorHelpers::BasicOps32>::findMinOrMax (src, num, false);
  844. #else
  845. return juce::findMaximum (src, num);
  846. #endif
  847. }
  848. double JUCE_CALLTYPE FloatVectorOperations::findMaximum (const double* src, int num) noexcept
  849. {
  850. #if JUCE_USE_SSE_INTRINSICS || JUCE_USE_ARM_NEON
  851. return FloatVectorHelpers::MinMax<FloatVectorHelpers::BasicOps64>::findMinOrMax (src, num, false);
  852. #else
  853. return juce::findMaximum (src, num);
  854. #endif
  855. }
  856. void JUCE_CALLTYPE FloatVectorOperations::enableFlushToZeroMode (bool shouldEnable) noexcept
  857. {
  858. #if JUCE_USE_SSE_INTRINSICS
  859. _MM_SET_FLUSH_ZERO_MODE (shouldEnable ? _MM_FLUSH_ZERO_ON : _MM_FLUSH_ZERO_OFF);
  860. #endif
  861. ignoreUnused (shouldEnable);
  862. }
  863. void JUCE_CALLTYPE FloatVectorOperations::disableDenormalisedNumberSupport() noexcept
  864. {
  865. #if JUCE_USE_SSE_INTRINSICS
  866. const unsigned int mxcsr = _mm_getcsr();
  867. _mm_setcsr (mxcsr | 0x8040); // add the DAZ and FZ bits
  868. #endif
  869. }
  870. //==============================================================================
  871. //==============================================================================
  872. #if JUCE_UNIT_TESTS
  873. class FloatVectorOperationsTests : public UnitTest
  874. {
  875. public:
  876. FloatVectorOperationsTests() : UnitTest ("FloatVectorOperations") {}
  877. template <typename ValueType>
  878. struct TestRunner
  879. {
  880. static void runTest (UnitTest& u, Random random)
  881. {
  882. const int range = random.nextBool() ? 500 : 10;
  883. const int num = random.nextInt (range) + 1;
  884. HeapBlock<ValueType> buffer1 ((size_t) num + 16), buffer2 ((size_t) num + 16);
  885. HeapBlock<int> buffer3 ((size_t) num + 16);
  886. #if JUCE_ARM
  887. ValueType* const data1 = buffer1;
  888. ValueType* const data2 = buffer2;
  889. int* const int1 = buffer3;
  890. #else
  891. ValueType* const data1 = addBytesToPointer (buffer1.getData(), random.nextInt (16));
  892. ValueType* const data2 = addBytesToPointer (buffer2.getData(), random.nextInt (16));
  893. int* const int1 = addBytesToPointer (buffer3.getData(), random.nextInt (16));
  894. #endif
  895. fillRandomly (random, data1, num);
  896. fillRandomly (random, data2, num);
  897. Range<ValueType> minMax1 (FloatVectorOperations::findMinAndMax (data1, num));
  898. Range<ValueType> minMax2 (Range<ValueType>::findMinAndMax (data1, num));
  899. u.expect (minMax1 == minMax2);
  900. u.expect (valuesMatch (FloatVectorOperations::findMinimum (data1, num), juce::findMinimum (data1, num)));
  901. u.expect (valuesMatch (FloatVectorOperations::findMaximum (data1, num), juce::findMaximum (data1, num)));
  902. u.expect (valuesMatch (FloatVectorOperations::findMinimum (data2, num), juce::findMinimum (data2, num)));
  903. u.expect (valuesMatch (FloatVectorOperations::findMaximum (data2, num), juce::findMaximum (data2, num)));
  904. FloatVectorOperations::clear (data1, num);
  905. u.expect (areAllValuesEqual (data1, num, 0));
  906. FloatVectorOperations::fill (data1, (ValueType) 2, num);
  907. u.expect (areAllValuesEqual (data1, num, (ValueType) 2));
  908. FloatVectorOperations::add (data1, (ValueType) 2, num);
  909. u.expect (areAllValuesEqual (data1, num, (ValueType) 4));
  910. FloatVectorOperations::copy (data2, data1, num);
  911. u.expect (areAllValuesEqual (data2, num, (ValueType) 4));
  912. FloatVectorOperations::add (data2, data1, num);
  913. u.expect (areAllValuesEqual (data2, num, (ValueType) 8));
  914. FloatVectorOperations::copyWithMultiply (data2, data1, (ValueType) 4, num);
  915. u.expect (areAllValuesEqual (data2, num, (ValueType) 16));
  916. FloatVectorOperations::addWithMultiply (data2, data1, (ValueType) 4, num);
  917. u.expect (areAllValuesEqual (data2, num, (ValueType) 32));
  918. FloatVectorOperations::multiply (data1, (ValueType) 2, num);
  919. u.expect (areAllValuesEqual (data1, num, (ValueType) 8));
  920. FloatVectorOperations::multiply (data1, data2, num);
  921. u.expect (areAllValuesEqual (data1, num, (ValueType) 256));
  922. FloatVectorOperations::negate (data2, data1, num);
  923. u.expect (areAllValuesEqual (data2, num, (ValueType) -256));
  924. FloatVectorOperations::subtract (data1, data2, num);
  925. u.expect (areAllValuesEqual (data1, num, (ValueType) 512));
  926. FloatVectorOperations::abs (data1, data2, num);
  927. u.expect (areAllValuesEqual (data1, num, (ValueType) 256));
  928. FloatVectorOperations::abs (data2, data1, num);
  929. u.expect (areAllValuesEqual (data2, num, (ValueType) 256));
  930. fillRandomly (random, int1, num);
  931. doConversionTest (u, data1, data2, int1, num);
  932. FloatVectorOperations::fill (data1, (ValueType) 2, num);
  933. FloatVectorOperations::fill (data2, (ValueType) 3, num);
  934. FloatVectorOperations::addWithMultiply (data1, data1, data2, num);
  935. u.expect (areAllValuesEqual (data1, num, (ValueType) 8));
  936. }
  937. static void doConversionTest (UnitTest& u, float* data1, float* data2, int* const int1, int num)
  938. {
  939. FloatVectorOperations::convertFixedToFloat (data1, int1, 2.0f, num);
  940. convertFixed (data2, int1, 2.0f, num);
  941. u.expect (buffersMatch (data1, data2, num));
  942. }
  943. static void doConversionTest (UnitTest&, double*, double*, int*, int) {}
  944. static void fillRandomly (Random& random, ValueType* d, int num)
  945. {
  946. while (--num >= 0)
  947. *d++ = (ValueType) (random.nextDouble() * 1000.0);
  948. }
  949. static void fillRandomly (Random& random, int* d, int num)
  950. {
  951. while (--num >= 0)
  952. *d++ = random.nextInt();
  953. }
  954. static void convertFixed (float* d, const int* s, ValueType multiplier, int num)
  955. {
  956. while (--num >= 0)
  957. *d++ = *s++ * multiplier;
  958. }
  959. static bool areAllValuesEqual (const ValueType* d, int num, ValueType target)
  960. {
  961. while (--num >= 0)
  962. if (*d++ != target)
  963. return false;
  964. return true;
  965. }
  966. static bool buffersMatch (const ValueType* d1, const ValueType* d2, int num)
  967. {
  968. while (--num >= 0)
  969. if (! valuesMatch (*d1++, *d2++))
  970. return false;
  971. return true;
  972. }
  973. static bool valuesMatch (ValueType v1, ValueType v2)
  974. {
  975. return std::abs (v1 - v2) < std::numeric_limits<ValueType>::epsilon();
  976. }
  977. };
  978. void runTest() override
  979. {
  980. beginTest ("FloatVectorOperations");
  981. for (int i = 1000; --i >= 0;)
  982. {
  983. TestRunner<float>::runTest (*this, getRandom());
  984. TestRunner<double>::runTest (*this, getRandom());
  985. }
  986. }
  987. };
  988. static FloatVectorOperationsTests vectorOpTests;
  989. #endif