Audio plugin host https://kx.studio/carla
You can not select more than 25 topics Topics must start with a letter or number, can include dashes ('-') and can be up to 35 characters long.

353 lines
12KB

  1. /*
  2. ==============================================================================
  3. This file is part of the JUCE library.
  4. Copyright (c) 2013 - Raw Material Software Ltd.
  5. Permission is granted to use this software under the terms of either:
  6. a) the GPL v2 (or any later version)
  7. b) the Affero GPL v3
  8. Details of these licenses can be found at: www.gnu.org/licenses
  9. JUCE is distributed in the hope that it will be useful, but WITHOUT ANY
  10. WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS FOR
  11. A PARTICULAR PURPOSE. See the GNU General Public License for more details.
  12. ------------------------------------------------------------------------------
  13. To release a closed-source product which uses JUCE, commercial licenses are
  14. available: visit www.juce.com for more information.
  15. ==============================================================================
  16. */
  17. #if JUCE_USE_SSE_INTRINSICS
  18. namespace FloatVectorHelpers
  19. {
  20. static bool sse2Present = false;
  21. static bool isSSE2Available() noexcept
  22. {
  23. if (sse2Present)
  24. return true;
  25. sse2Present = SystemStats::hasSSE2();
  26. return sse2Present;
  27. }
  28. inline static bool isAligned (const void* p) noexcept
  29. {
  30. return (((pointer_sized_int) p) & 15) == 0;
  31. }
  32. static inline float findMinimumOrMaximum (const float* src, int num, const bool isMinimum) noexcept
  33. {
  34. #if JUCE_USE_SSE_INTRINSICS
  35. const int numLongOps = num / 4;
  36. if (numLongOps > 1 && FloatVectorHelpers::isSSE2Available())
  37. {
  38. __m128 val;
  39. #define JUCE_MINIMUMMAXIMUM_SSE_LOOP(loadOp, minMaxOp) \
  40. val = loadOp (src); \
  41. src += 4; \
  42. for (int i = 1; i < numLongOps; ++i) \
  43. { \
  44. const __m128 s = loadOp (src); \
  45. val = minMaxOp (val, s); \
  46. src += 4; \
  47. }
  48. if (isMinimum)
  49. {
  50. if (FloatVectorHelpers::isAligned (src)) { JUCE_MINIMUMMAXIMUM_SSE_LOOP (_mm_load_ps, _mm_min_ps) }
  51. else { JUCE_MINIMUMMAXIMUM_SSE_LOOP (_mm_loadu_ps, _mm_min_ps) }
  52. }
  53. else
  54. {
  55. if (FloatVectorHelpers::isAligned (src)) { JUCE_MINIMUMMAXIMUM_SSE_LOOP (_mm_load_ps, _mm_max_ps) }
  56. else { JUCE_MINIMUMMAXIMUM_SSE_LOOP (_mm_loadu_ps,_mm_max_ps) }
  57. }
  58. float localVal;
  59. {
  60. float vals[4];
  61. _mm_storeu_ps (vals, val);
  62. localVal = isMinimum ? jmin (vals[0], vals[1], vals[2], vals[3])
  63. : jmax (vals[0], vals[1], vals[2], vals[3]);
  64. }
  65. num &= 3;
  66. for (int i = 0; i < num; ++i)
  67. localVal = isMinimum ? jmin (localVal, src[i])
  68. : jmax (localVal, src[i]);
  69. return localVal;
  70. }
  71. #endif
  72. return isMinimum ? juce::findMinimum (src, num)
  73. : juce::findMaximum (src, num);
  74. }
  75. }
  76. #define JUCE_BEGIN_SSE_OP \
  77. if (FloatVectorHelpers::isSSE2Available()) \
  78. { \
  79. const int numLongOps = num / 4;
  80. #define JUCE_FINISH_SSE_OP(normalOp) \
  81. num &= 3; \
  82. if (num == 0) return; \
  83. } \
  84. for (int i = 0; i < num; ++i) normalOp;
  85. #define JUCE_SSE_LOOP(sseOp, srcLoad, dstLoad, dstStore, locals, increment) \
  86. for (int i = 0; i < numLongOps; ++i) \
  87. { \
  88. locals (srcLoad, dstLoad); \
  89. dstStore (dest, sseOp); \
  90. increment; \
  91. }
  92. #define JUCE_INCREMENT_SRC_DEST dest += 4; src += 4;
  93. #define JUCE_INCREMENT_DEST dest += 4;
  94. #define JUCE_LOAD_NONE(srcLoad, dstLoad)
  95. #define JUCE_LOAD_DEST(srcLoad, dstLoad) const __m128 d = dstLoad (dest);
  96. #define JUCE_LOAD_SRC(srcLoad, dstLoad) const __m128 s = srcLoad (src);
  97. #define JUCE_LOAD_SRC_DEST(srcLoad, dstLoad) const __m128 d = dstLoad (dest); const __m128 s = srcLoad (src);
  98. #define JUCE_PERFORM_SSE_OP_DEST(normalOp, sseOp, locals) \
  99. JUCE_BEGIN_SSE_OP \
  100. if (FloatVectorHelpers::isAligned (dest)) JUCE_SSE_LOOP (sseOp, dummy, _mm_load_ps, _mm_store_ps, locals, JUCE_INCREMENT_DEST) \
  101. else JUCE_SSE_LOOP (sseOp, dummy, _mm_loadu_ps, _mm_storeu_ps, locals, JUCE_INCREMENT_DEST) \
  102. JUCE_FINISH_SSE_OP (normalOp)
  103. #define JUCE_PERFORM_SSE_OP_SRC_DEST(normalOp, sseOp, locals, increment) \
  104. JUCE_BEGIN_SSE_OP \
  105. if (FloatVectorHelpers::isAligned (dest)) \
  106. { \
  107. if (FloatVectorHelpers::isAligned (src)) JUCE_SSE_LOOP (sseOp, _mm_load_ps, _mm_load_ps, _mm_store_ps, locals, increment) \
  108. else JUCE_SSE_LOOP (sseOp, _mm_loadu_ps, _mm_load_ps, _mm_store_ps, locals, increment) \
  109. }\
  110. else \
  111. { \
  112. if (FloatVectorHelpers::isAligned (src)) JUCE_SSE_LOOP (sseOp, _mm_load_ps, _mm_loadu_ps, _mm_storeu_ps, locals, increment) \
  113. else JUCE_SSE_LOOP (sseOp, _mm_loadu_ps, _mm_loadu_ps, _mm_storeu_ps, locals, increment) \
  114. } \
  115. JUCE_FINISH_SSE_OP (normalOp)
  116. #else
  117. #define JUCE_PERFORM_SSE_OP_DEST(normalOp, unused1, unused2) for (int i = 0; i < num; ++i) normalOp;
  118. #define JUCE_PERFORM_SSE_OP_SRC_DEST(normalOp, sseOp, locals, increment) for (int i = 0; i < num; ++i) normalOp;
  119. #endif
  120. void JUCE_CALLTYPE FloatVectorOperations::clear (float* dest, int num) noexcept
  121. {
  122. #if JUCE_USE_VDSP_FRAMEWORK
  123. vDSP_vclr (dest, 1, (size_t) num);
  124. #else
  125. zeromem (dest, num * sizeof (float));
  126. #endif
  127. }
  128. void JUCE_CALLTYPE FloatVectorOperations::fill (float* dest, float valueToFill, int num) noexcept
  129. {
  130. #if JUCE_USE_VDSP_FRAMEWORK
  131. vDSP_vfill (&valueToFill, dest, 1, (size_t) num);
  132. #else
  133. #if JUCE_USE_SSE_INTRINSICS
  134. const __m128 val = _mm_load1_ps (&valueToFill);
  135. #endif
  136. JUCE_PERFORM_SSE_OP_DEST (dest[i] = valueToFill, val, JUCE_LOAD_NONE)
  137. #endif
  138. }
  139. void JUCE_CALLTYPE FloatVectorOperations::copy (float* dest, const float* src, int num) noexcept
  140. {
  141. memcpy (dest, src, (size_t) num * sizeof (float));
  142. }
  143. void JUCE_CALLTYPE FloatVectorOperations::copyWithMultiply (float* dest, const float* src, float multiplier, int num) noexcept
  144. {
  145. #if JUCE_USE_VDSP_FRAMEWORK
  146. vDSP_vsmul (src, 1, &multiplier, dest, 1, num);
  147. #else
  148. #if JUCE_USE_SSE_INTRINSICS
  149. const __m128 mult = _mm_load1_ps (&multiplier);
  150. #endif
  151. JUCE_PERFORM_SSE_OP_SRC_DEST (dest[i] = src[i] * multiplier,
  152. _mm_mul_ps (mult, s),
  153. JUCE_LOAD_SRC, JUCE_INCREMENT_SRC_DEST)
  154. #endif
  155. }
  156. void JUCE_CALLTYPE FloatVectorOperations::add (float* dest, const float* src, int num) noexcept
  157. {
  158. #if JUCE_USE_VDSP_FRAMEWORK
  159. vDSP_vadd (src, 1, dest, 1, dest, 1, num);
  160. #else
  161. JUCE_PERFORM_SSE_OP_SRC_DEST (dest[i] += src[i],
  162. _mm_add_ps (d, s),
  163. JUCE_LOAD_SRC_DEST, JUCE_INCREMENT_SRC_DEST)
  164. #endif
  165. }
  166. void JUCE_CALLTYPE FloatVectorOperations::add (float* dest, float amount, int num) noexcept
  167. {
  168. #if JUCE_USE_SSE_INTRINSICS
  169. const __m128 amountToAdd = _mm_load1_ps (&amount);
  170. #endif
  171. JUCE_PERFORM_SSE_OP_DEST (dest[i] += amount,
  172. _mm_add_ps (d, amountToAdd),
  173. JUCE_LOAD_DEST)
  174. }
  175. void JUCE_CALLTYPE FloatVectorOperations::addWithMultiply (float* dest, const float* src, float multiplier, int num) noexcept
  176. {
  177. #if JUCE_USE_SSE_INTRINSICS
  178. const __m128 mult = _mm_load1_ps (&multiplier);
  179. #endif
  180. JUCE_PERFORM_SSE_OP_SRC_DEST (dest[i] += src[i] * multiplier,
  181. _mm_add_ps (d, _mm_mul_ps (mult, s)),
  182. JUCE_LOAD_SRC_DEST, JUCE_INCREMENT_SRC_DEST)
  183. }
  184. void JUCE_CALLTYPE FloatVectorOperations::multiply (float* dest, const float* src, int num) noexcept
  185. {
  186. #if JUCE_USE_VDSP_FRAMEWORK
  187. vDSP_vmul (src, 1, dest, 1, dest, 1, num);
  188. #else
  189. JUCE_PERFORM_SSE_OP_SRC_DEST (dest[i] *= src[i],
  190. _mm_mul_ps (d, s),
  191. JUCE_LOAD_SRC_DEST, JUCE_INCREMENT_SRC_DEST)
  192. #endif
  193. }
  194. void JUCE_CALLTYPE FloatVectorOperations::multiply (float* dest, float multiplier, int num) noexcept
  195. {
  196. #if JUCE_USE_VDSP_FRAMEWORK
  197. vDSP_vsmul (dest, 1, &multiplier, dest, 1, num);
  198. #else
  199. #if JUCE_USE_SSE_INTRINSICS
  200. const __m128 mult = _mm_load1_ps (&multiplier);
  201. #endif
  202. JUCE_PERFORM_SSE_OP_DEST (dest[i] *= multiplier,
  203. _mm_mul_ps (d, mult),
  204. JUCE_LOAD_DEST)
  205. #endif
  206. }
  207. void FloatVectorOperations::negate (float* dest, const float* src, int num) noexcept
  208. {
  209. #if JUCE_USE_VDSP_FRAMEWORK
  210. vDSP_vneg ((float*) src, 1, dest, 1, (vDSP_Length) num);
  211. #else
  212. copyWithMultiply (dest, src, -1.0f, num);
  213. #endif
  214. }
  215. void JUCE_CALLTYPE FloatVectorOperations::convertFixedToFloat (float* dest, const int* src, float multiplier, int num) noexcept
  216. {
  217. #if JUCE_USE_SSE_INTRINSICS
  218. const __m128 mult = _mm_load1_ps (&multiplier);
  219. #endif
  220. JUCE_PERFORM_SSE_OP_SRC_DEST (dest[i] = src[i] * multiplier,
  221. _mm_mul_ps (mult, _mm_cvtepi32_ps (_mm_loadu_si128 ((const __m128i*) src))),
  222. JUCE_LOAD_NONE, JUCE_INCREMENT_SRC_DEST)
  223. }
  224. void JUCE_CALLTYPE FloatVectorOperations::findMinAndMax (const float* src, int num, float& minResult, float& maxResult) noexcept
  225. {
  226. #if JUCE_USE_SSE_INTRINSICS
  227. const int numLongOps = num / 4;
  228. if (numLongOps > 1 && FloatVectorHelpers::isSSE2Available())
  229. {
  230. __m128 mn, mx;
  231. #define JUCE_MINMAX_SSE_LOOP(loadOp) \
  232. mn = loadOp (src); \
  233. mx = mn; \
  234. src += 4; \
  235. for (int i = 1; i < numLongOps; ++i) \
  236. { \
  237. const __m128 s = loadOp (src); \
  238. mn = _mm_min_ps (mn, s); \
  239. mx = _mm_max_ps (mx, s); \
  240. src += 4; \
  241. }
  242. if (FloatVectorHelpers::isAligned (src)) { JUCE_MINMAX_SSE_LOOP (_mm_load_ps) }
  243. else { JUCE_MINMAX_SSE_LOOP (_mm_loadu_ps) }
  244. float localMin, localMax;
  245. {
  246. float mns[4], mxs[4];
  247. _mm_storeu_ps (mns, mn);
  248. _mm_storeu_ps (mxs, mx);
  249. localMin = jmin (mns[0], mns[1], mns[2], mns[3]);
  250. localMax = jmax (mxs[0], mxs[1], mxs[2], mxs[3]);
  251. }
  252. num &= 3;
  253. for (int i = 0; i < num; ++i)
  254. {
  255. const float s = src[i];
  256. localMin = jmin (localMin, s);
  257. localMax = jmax (localMax, s);
  258. }
  259. minResult = localMin;
  260. maxResult = localMax;
  261. return;
  262. }
  263. #endif
  264. juce::findMinAndMax (src, num, minResult, maxResult);
  265. }
  266. float JUCE_CALLTYPE FloatVectorOperations::findMinimum (const float* src, int num) noexcept
  267. {
  268. #if JUCE_USE_SSE_INTRINSICS
  269. return FloatVectorHelpers::findMinimumOrMaximum (src, num, true);
  270. #else
  271. return juce::findMinimum (src, num);
  272. #endif
  273. }
  274. float JUCE_CALLTYPE FloatVectorOperations::findMaximum (const float* src, int num) noexcept
  275. {
  276. #if JUCE_USE_SSE_INTRINSICS
  277. return FloatVectorHelpers::findMinimumOrMaximum (src, num, false);
  278. #else
  279. return juce::findMaximum (src, num);
  280. #endif
  281. }
  282. void JUCE_CALLTYPE FloatVectorOperations::enableFlushToZeroMode (bool shouldEnable) noexcept
  283. {
  284. #if JUCE_USE_SSE_INTRINSICS
  285. if (FloatVectorHelpers::isSSE2Available())
  286. _MM_SET_FLUSH_ZERO_MODE (shouldEnable ? _MM_FLUSH_ZERO_ON : _MM_FLUSH_ZERO_OFF);
  287. #endif
  288. (void) shouldEnable;
  289. }