Audio plugin host https://kx.studio/carla
You can not select more than 25 topics Topics must start with a letter or number, can include dashes ('-') and can be up to 35 characters long.

345 lines
12KB

  1. /*
  2. ==============================================================================
  3. This file is part of the JUCE library.
  4. Copyright (c) 2013 - Raw Material Software Ltd.
  5. Permission is granted to use this software under the terms of either:
  6. a) the GPL v2 (or any later version)
  7. b) the Affero GPL v3
  8. Details of these licenses can be found at: www.gnu.org/licenses
  9. JUCE is distributed in the hope that it will be useful, but WITHOUT ANY
  10. WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS FOR
  11. A PARTICULAR PURPOSE. See the GNU General Public License for more details.
  12. ------------------------------------------------------------------------------
  13. To release a closed-source product which uses JUCE, commercial licenses are
  14. available: visit www.juce.com for more information.
  15. ==============================================================================
  16. */
  17. #if JUCE_USE_SSE_INTRINSICS
  18. namespace FloatVectorHelpers
  19. {
  20. static bool sse2Present = false;
  21. static bool isSSE2Available() noexcept
  22. {
  23. if (sse2Present)
  24. return true;
  25. sse2Present = SystemStats::hasSSE2();
  26. return sse2Present;
  27. }
  28. inline static bool isAligned (const void* p) noexcept
  29. {
  30. return (((pointer_sized_int) p) & 15) == 0;
  31. }
  32. inline static void mmEmpty() noexcept
  33. {
  34. #if ! JUCE_64BIT
  35. _mm_empty();
  36. #endif
  37. }
  38. static inline float findMinimumOrMaximum (const float* src, int num, const bool isMinimum) noexcept
  39. {
  40. #if JUCE_USE_SSE_INTRINSICS
  41. const int numLongOps = num / 4;
  42. if (numLongOps > 1 && FloatVectorHelpers::isSSE2Available())
  43. {
  44. __m128 val;
  45. #define JUCE_MINIMUMMAXIMUM_SSE_LOOP(loadOp, minMaxOp) \
  46. val = loadOp (src); \
  47. src += 4; \
  48. for (int i = 1; i < numLongOps; ++i) \
  49. { \
  50. const __m128 s = loadOp (src); \
  51. val = minMaxOp (val, s); \
  52. src += 4; \
  53. }
  54. if (isMinimum)
  55. {
  56. if (FloatVectorHelpers::isAligned (src)) { JUCE_MINIMUMMAXIMUM_SSE_LOOP (_mm_load_ps, _mm_min_ps) }
  57. else { JUCE_MINIMUMMAXIMUM_SSE_LOOP (_mm_loadu_ps, _mm_min_ps) }
  58. }
  59. else
  60. {
  61. if (FloatVectorHelpers::isAligned (src)) { JUCE_MINIMUMMAXIMUM_SSE_LOOP (_mm_load_ps, _mm_max_ps) }
  62. else { JUCE_MINIMUMMAXIMUM_SSE_LOOP (_mm_loadu_ps,_mm_max_ps) }
  63. }
  64. float localVal;
  65. {
  66. float vals[4];
  67. _mm_storeu_ps (vals, val);
  68. FloatVectorHelpers::mmEmpty();
  69. localVal = isMinimum ? jmin (vals[0], vals[1], vals[2], vals[3])
  70. : jmax (vals[0], vals[1], vals[2], vals[3]);
  71. }
  72. num &= 3;
  73. for (int i = 0; i < num; ++i)
  74. localVal = isMinimum ? jmin (localVal, src[i])
  75. : jmax (localVal, src[i]);
  76. return localVal;
  77. }
  78. #endif
  79. return isMinimum ? juce::findMinimum (src, num)
  80. : juce::findMaximum (src, num);
  81. }
  82. }
  83. #define JUCE_BEGIN_SSE_OP \
  84. if (FloatVectorHelpers::isSSE2Available()) \
  85. { \
  86. const int numLongOps = num / 4;
  87. #define JUCE_FINISH_SSE_OP(normalOp) \
  88. FloatVectorHelpers::mmEmpty(); \
  89. num &= 3; \
  90. if (num == 0) return; \
  91. } \
  92. for (int i = 0; i < num; ++i) normalOp;
  93. #define JUCE_SSE_LOOP(sseOp, srcLoad, dstLoad, dstStore, locals, increment) \
  94. for (int i = 0; i < numLongOps; ++i) \
  95. { \
  96. locals (srcLoad, dstLoad); \
  97. dstStore (dest, sseOp); \
  98. increment; \
  99. }
  100. #define JUCE_INCREMENT_SRC_DEST dest += 4; src += 4;
  101. #define JUCE_INCREMENT_DEST dest += 4;
  102. #define JUCE_LOAD_NONE(srcLoad, dstLoad)
  103. #define JUCE_LOAD_DEST(srcLoad, dstLoad) const __m128 d = dstLoad (dest);
  104. #define JUCE_LOAD_SRC(srcLoad, dstLoad) const __m128 s = srcLoad (src);
  105. #define JUCE_LOAD_SRC_DEST(srcLoad, dstLoad) const __m128 d = dstLoad (dest); const __m128 s = srcLoad (src);
  106. #define JUCE_PERFORM_SSE_OP_DEST(normalOp, sseOp, locals) \
  107. JUCE_BEGIN_SSE_OP \
  108. if (FloatVectorHelpers::isAligned (dest)) JUCE_SSE_LOOP (sseOp, dummy, _mm_load_ps, _mm_store_ps, locals, JUCE_INCREMENT_DEST) \
  109. else JUCE_SSE_LOOP (sseOp, dummy, _mm_loadu_ps, _mm_storeu_ps, locals, JUCE_INCREMENT_DEST) \
  110. JUCE_FINISH_SSE_OP (normalOp)
  111. #define JUCE_PERFORM_SSE_OP_SRC_DEST(normalOp, sseOp, locals, increment) \
  112. JUCE_BEGIN_SSE_OP \
  113. if (FloatVectorHelpers::isAligned (dest)) \
  114. { \
  115. if (FloatVectorHelpers::isAligned (src)) JUCE_SSE_LOOP (sseOp, _mm_load_ps, _mm_load_ps, _mm_store_ps, locals, increment) \
  116. else JUCE_SSE_LOOP (sseOp, _mm_loadu_ps, _mm_load_ps, _mm_store_ps, locals, increment) \
  117. }\
  118. else \
  119. { \
  120. if (FloatVectorHelpers::isAligned (src)) JUCE_SSE_LOOP (sseOp, _mm_load_ps, _mm_loadu_ps, _mm_storeu_ps, locals, increment) \
  121. else JUCE_SSE_LOOP (sseOp, _mm_loadu_ps, _mm_loadu_ps, _mm_storeu_ps, locals, increment) \
  122. } \
  123. JUCE_FINISH_SSE_OP (normalOp)
  124. #else
  125. #define JUCE_PERFORM_SSE_OP_DEST(normalOp, unused1, unused2) for (int i = 0; i < num; ++i) normalOp;
  126. #define JUCE_PERFORM_SSE_OP_SRC_DEST(normalOp, sseOp, locals, increment) for (int i = 0; i < num; ++i) normalOp;
  127. #endif
  128. void JUCE_CALLTYPE FloatVectorOperations::clear (float* dest, int num) noexcept
  129. {
  130. #if JUCE_USE_VDSP_FRAMEWORK
  131. vDSP_vclr (dest, 1, (size_t) num);
  132. #else
  133. zeromem (dest, num * sizeof (float));
  134. #endif
  135. }
  136. void JUCE_CALLTYPE FloatVectorOperations::fill (float* dest, float valueToFill, int num) noexcept
  137. {
  138. #if JUCE_USE_VDSP_FRAMEWORK
  139. vDSP_vfill (&valueToFill, dest, 1, (size_t) num);
  140. #else
  141. #if JUCE_USE_SSE_INTRINSICS
  142. const __m128 val = _mm_load1_ps (&valueToFill);
  143. #endif
  144. JUCE_PERFORM_SSE_OP_DEST (dest[i] = valueToFill, val, JUCE_LOAD_NONE)
  145. #endif
  146. }
  147. void JUCE_CALLTYPE FloatVectorOperations::copy (float* dest, const float* src, int num) noexcept
  148. {
  149. memcpy (dest, src, (size_t) num * sizeof (float));
  150. }
  151. void JUCE_CALLTYPE FloatVectorOperations::copyWithMultiply (float* dest, const float* src, float multiplier, int num) noexcept
  152. {
  153. #if JUCE_USE_VDSP_FRAMEWORK
  154. vDSP_vsmul (src, 1, &multiplier, dest, 1, num);
  155. #else
  156. #if JUCE_USE_SSE_INTRINSICS
  157. const __m128 mult = _mm_load1_ps (&multiplier);
  158. #endif
  159. JUCE_PERFORM_SSE_OP_SRC_DEST (dest[i] = src[i] * multiplier,
  160. _mm_mul_ps (mult, s),
  161. JUCE_LOAD_SRC, JUCE_INCREMENT_SRC_DEST)
  162. #endif
  163. }
  164. void JUCE_CALLTYPE FloatVectorOperations::add (float* dest, const float* src, int num) noexcept
  165. {
  166. #if JUCE_USE_VDSP_FRAMEWORK
  167. vDSP_vadd (src, 1, dest, 1, dest, 1, num);
  168. #else
  169. JUCE_PERFORM_SSE_OP_SRC_DEST (dest[i] += src[i],
  170. _mm_add_ps (d, s),
  171. JUCE_LOAD_SRC_DEST, JUCE_INCREMENT_SRC_DEST)
  172. #endif
  173. }
  174. void JUCE_CALLTYPE FloatVectorOperations::add (float* dest, float amount, int num) noexcept
  175. {
  176. #if JUCE_USE_SSE_INTRINSICS
  177. const __m128 amountToAdd = _mm_load1_ps (&amount);
  178. #endif
  179. JUCE_PERFORM_SSE_OP_DEST (dest[i] += amount,
  180. _mm_add_ps (d, amountToAdd),
  181. JUCE_LOAD_DEST)
  182. }
  183. void JUCE_CALLTYPE FloatVectorOperations::addWithMultiply (float* dest, const float* src, float multiplier, int num) noexcept
  184. {
  185. #if JUCE_USE_SSE_INTRINSICS
  186. const __m128 mult = _mm_load1_ps (&multiplier);
  187. #endif
  188. JUCE_PERFORM_SSE_OP_SRC_DEST (dest[i] += src[i] * multiplier,
  189. _mm_add_ps (d, _mm_mul_ps (mult, s)),
  190. JUCE_LOAD_SRC_DEST, JUCE_INCREMENT_SRC_DEST)
  191. }
  192. void JUCE_CALLTYPE FloatVectorOperations::multiply (float* dest, const float* src, int num) noexcept
  193. {
  194. #if JUCE_USE_VDSP_FRAMEWORK
  195. vDSP_vmul (src, 1, dest, 1, dest, 1, num);
  196. #else
  197. JUCE_PERFORM_SSE_OP_SRC_DEST (dest[i] *= src[i],
  198. _mm_mul_ps (d, s),
  199. JUCE_LOAD_SRC_DEST, JUCE_INCREMENT_SRC_DEST)
  200. #endif
  201. }
  202. void JUCE_CALLTYPE FloatVectorOperations::multiply (float* dest, float multiplier, int num) noexcept
  203. {
  204. #if JUCE_USE_VDSP_FRAMEWORK
  205. vDSP_vsmul (dest, 1, &multiplier, dest, 1, num);
  206. #else
  207. #if JUCE_USE_SSE_INTRINSICS
  208. const __m128 mult = _mm_load1_ps (&multiplier);
  209. #endif
  210. JUCE_PERFORM_SSE_OP_DEST (dest[i] *= multiplier,
  211. _mm_mul_ps (d, mult),
  212. JUCE_LOAD_DEST)
  213. #endif
  214. }
  215. void JUCE_CALLTYPE FloatVectorOperations::convertFixedToFloat (float* dest, const int* src, float multiplier, int num) noexcept
  216. {
  217. #if JUCE_USE_SSE_INTRINSICS
  218. const __m128 mult = _mm_load1_ps (&multiplier);
  219. #endif
  220. JUCE_PERFORM_SSE_OP_SRC_DEST (dest[i] = src[i] * multiplier,
  221. _mm_mul_ps (mult, _mm_cvtepi32_ps (_mm_loadu_si128 ((const __m128i*) src))),
  222. JUCE_LOAD_NONE, JUCE_INCREMENT_SRC_DEST)
  223. }
  224. void JUCE_CALLTYPE FloatVectorOperations::findMinAndMax (const float* src, int num, float& minResult, float& maxResult) noexcept
  225. {
  226. #if JUCE_USE_SSE_INTRINSICS
  227. const int numLongOps = num / 4;
  228. if (numLongOps > 1 && FloatVectorHelpers::isSSE2Available())
  229. {
  230. __m128 mn, mx;
  231. #define JUCE_MINMAX_SSE_LOOP(loadOp) \
  232. mn = loadOp (src); \
  233. mx = mn; \
  234. src += 4; \
  235. for (int i = 1; i < numLongOps; ++i) \
  236. { \
  237. const __m128 s = loadOp (src); \
  238. mn = _mm_min_ps (mn, s); \
  239. mx = _mm_max_ps (mx, s); \
  240. src += 4; \
  241. }
  242. if (FloatVectorHelpers::isAligned (src)) { JUCE_MINMAX_SSE_LOOP (_mm_load_ps) }
  243. else { JUCE_MINMAX_SSE_LOOP (_mm_loadu_ps) }
  244. float localMin, localMax;
  245. {
  246. float mns[4], mxs[4];
  247. _mm_storeu_ps (mns, mn);
  248. _mm_storeu_ps (mxs, mx);
  249. FloatVectorHelpers::mmEmpty();
  250. localMin = jmin (mns[0], mns[1], mns[2], mns[3]);
  251. localMax = jmax (mxs[0], mxs[1], mxs[2], mxs[3]);
  252. }
  253. num &= 3;
  254. for (int i = 0; i < num; ++i)
  255. {
  256. const float s = src[i];
  257. localMin = jmin (localMin, s);
  258. localMax = jmax (localMax, s);
  259. }
  260. minResult = localMin;
  261. maxResult = localMax;
  262. return;
  263. }
  264. #endif
  265. juce::findMinAndMax (src, num, minResult, maxResult);
  266. }
  267. float JUCE_CALLTYPE FloatVectorOperations::findMinimum (const float* src, int num) noexcept
  268. {
  269. #if JUCE_USE_SSE_INTRINSICS
  270. return FloatVectorHelpers::findMinimumOrMaximum (src, num, true);
  271. #else
  272. return juce::findMinimum (src, num);
  273. #endif
  274. }
  275. float JUCE_CALLTYPE FloatVectorOperations::findMaximum (const float* src, int num) noexcept
  276. {
  277. #if JUCE_USE_SSE_INTRINSICS
  278. return FloatVectorHelpers::findMinimumOrMaximum (src, num, false);
  279. #else
  280. return juce::findMaximum (src, num);
  281. #endif
  282. }