The JUCE cross-platform C++ framework, with DISTRHO/KXStudio specific changes
You can not select more than 25 topics Topics must start with a letter or number, can include dashes ('-') and can be up to 35 characters long.

276 lines
9.2KB

  1. /*
  2. ==============================================================================
  3. This file is part of the JUCE library - "Jules' Utility Class Extensions"
  4. Copyright 2004-11 by Raw Material Software Ltd.
  5. ------------------------------------------------------------------------------
  6. JUCE can be redistributed and/or modified under the terms of the GNU General
  7. Public License (Version 2), as published by the Free Software Foundation.
  8. A copy of the license is included in the JUCE distribution, or can be found
  9. online at www.gnu.org/licenses.
  10. JUCE is distributed in the hope that it will be useful, but WITHOUT ANY
  11. WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS FOR
  12. A PARTICULAR PURPOSE. See the GNU General Public License for more details.
  13. ------------------------------------------------------------------------------
  14. To release a closed-source product which uses JUCE, commercial licenses are
  15. available: visit www.rawmaterialsoftware.com/juce for more information.
  16. ==============================================================================
  17. */
  18. #if JUCE_USE_SSE_INTRINSICS
  19. namespace FloatVectorHelpers
  20. {
  21. static bool sse2Present = false;
  22. static bool isSSE2Available() noexcept
  23. {
  24. if (sse2Present)
  25. return true;
  26. sse2Present = SystemStats::hasSSE2();
  27. return sse2Present;
  28. }
  29. inline static bool isAligned (const void* p) noexcept
  30. {
  31. return (((pointer_sized_int) p) & 15) == 0;
  32. }
  33. inline static void mmEmpty() noexcept
  34. {
  35. #if ! JUCE_64BIT
  36. _mm_empty();
  37. #endif
  38. }
  39. }
  40. #define JUCE_BEGIN_SSE_OP \
  41. if (FloatVectorHelpers::isSSE2Available()) \
  42. { \
  43. const int numLongOps = num / 4;
  44. #define JUCE_FINISH_SSE_OP(normalOp) \
  45. FloatVectorHelpers::mmEmpty(); \
  46. num &= 3; \
  47. if (num == 0) return; \
  48. } \
  49. for (int i = 0; i < num; ++i) normalOp;
  50. #define JUCE_SSE_LOOP(sseOp, srcLoad, dstLoad, dstStore, locals, increment) \
  51. for (int i = 0; i < numLongOps; ++i) \
  52. { \
  53. locals (srcLoad, dstLoad); \
  54. dstStore (dest, sseOp); \
  55. increment; \
  56. }
  57. #define JUCE_INCREMENT_SRC_DEST dest += 4; src += 4;
  58. #define JUCE_INCREMENT_DEST dest += 4;
  59. #define JUCE_LOAD_NONE(srcLoad, dstLoad)
  60. #define JUCE_LOAD_DEST(srcLoad, dstLoad) const __m128 d = dstLoad (dest);
  61. #define JUCE_LOAD_SRC(srcLoad, dstLoad) const __m128 s = srcLoad (src);
  62. #define JUCE_LOAD_SRC_DEST(srcLoad, dstLoad) const __m128 d = dstLoad (dest); const __m128 s = srcLoad (src);
  63. #define JUCE_PERFORM_SSE_OP_DEST(normalOp, sseOp, locals) \
  64. JUCE_BEGIN_SSE_OP \
  65. if (FloatVectorHelpers::isAligned (dest)) JUCE_SSE_LOOP (sseOp, dummy, _mm_load_ps, _mm_store_ps, locals, JUCE_INCREMENT_DEST) \
  66. else JUCE_SSE_LOOP (sseOp, dummy, _mm_loadu_ps, _mm_storeu_ps, locals, JUCE_INCREMENT_DEST) \
  67. JUCE_FINISH_SSE_OP (normalOp)
  68. #define JUCE_PERFORM_SSE_OP_SRC_DEST(normalOp, sseOp, locals, increment) \
  69. JUCE_BEGIN_SSE_OP \
  70. if (FloatVectorHelpers::isAligned (dest)) \
  71. { \
  72. if (FloatVectorHelpers::isAligned (src)) JUCE_SSE_LOOP (sseOp, _mm_load_ps, _mm_load_ps, _mm_store_ps, locals, increment) \
  73. else JUCE_SSE_LOOP (sseOp, _mm_loadu_ps, _mm_load_ps, _mm_store_ps, locals, increment) \
  74. }\
  75. else \
  76. { \
  77. if (FloatVectorHelpers::isAligned (src)) JUCE_SSE_LOOP (sseOp, _mm_load_ps, _mm_loadu_ps, _mm_storeu_ps, locals, increment) \
  78. else JUCE_SSE_LOOP (sseOp, _mm_loadu_ps, _mm_loadu_ps, _mm_storeu_ps, locals, increment) \
  79. } \
  80. JUCE_FINISH_SSE_OP (normalOp)
  81. #else
  82. #define JUCE_PERFORM_SSE_OP_DEST(normalOp, unused1) for (int i = 0; i < num; ++i) normalOp;
  83. #define JUCE_PERFORM_SSE_OP_SRC_DEST(normalOp, sseOp, locals, increment) for (int i = 0; i < num; ++i) normalOp;
  84. #endif
  85. void FloatVectorOperations::clear (float* dest, const int num) noexcept
  86. {
  87. #if JUCE_USE_VDSP_FRAMEWORK
  88. vDSP_vclr (dest, 1, num);
  89. #else
  90. zeromem (dest, num * sizeof (float));
  91. #endif
  92. }
  93. void FloatVectorOperations::fill (float* dest, float valueToFill, int num) noexcept
  94. {
  95. #if JUCE_USE_VDSP_FRAMEWORK
  96. vDSP_vfill (&valueToFill, dest, 1, num);
  97. #else
  98. #if JUCE_USE_SSE_INTRINSICS
  99. const __m128 val = _mm_load1_ps (&valueToFill);
  100. #endif
  101. JUCE_PERFORM_SSE_OP_DEST (dest[i] = valueToFill, val, JUCE_LOAD_NONE)
  102. #endif
  103. }
  104. void FloatVectorOperations::copy (float* dest, const float* src, const int num) noexcept
  105. {
  106. memcpy (dest, src, num * sizeof (float));
  107. }
  108. void FloatVectorOperations::copyWithMultiply (float* dest, const float* src, float multiplier, int num) noexcept
  109. {
  110. #if JUCE_USE_VDSP_FRAMEWORK
  111. vDSP_vsmul (src, 1, &multiplier, dest, 1, num);
  112. #else
  113. #if JUCE_USE_SSE_INTRINSICS
  114. const __m128 mult = _mm_load1_ps (&multiplier);
  115. #endif
  116. JUCE_PERFORM_SSE_OP_SRC_DEST (dest[i] = src[i] * multiplier,
  117. _mm_mul_ps (mult, s),
  118. JUCE_LOAD_SRC, JUCE_INCREMENT_SRC_DEST)
  119. #endif
  120. }
  121. void FloatVectorOperations::add (float* dest, const float* src, int num) noexcept
  122. {
  123. #if JUCE_USE_VDSP_FRAMEWORK
  124. vDSP_vadd (src, 1, dest, 1, dest, 1, num);
  125. #else
  126. JUCE_PERFORM_SSE_OP_SRC_DEST (dest[i] += src[i],
  127. _mm_add_ps (d, s),
  128. JUCE_LOAD_SRC_DEST, JUCE_INCREMENT_SRC_DEST)
  129. #endif
  130. }
  131. void FloatVectorOperations::add (float* dest, float amount, int num) noexcept
  132. {
  133. #if JUCE_USE_SSE_INTRINSICS
  134. const __m128 amountToAdd = _mm_load1_ps (&amount);
  135. #endif
  136. JUCE_PERFORM_SSE_OP_DEST (dest[i] += amount,
  137. _mm_add_ps (d, amountToAdd),
  138. JUCE_LOAD_DEST)
  139. }
  140. void FloatVectorOperations::addWithMultiply (float* dest, const float* src, float multiplier, int num) noexcept
  141. {
  142. #if JUCE_USE_SSE_INTRINSICS
  143. const __m128 mult = _mm_load1_ps (&multiplier);
  144. #endif
  145. JUCE_PERFORM_SSE_OP_SRC_DEST (dest[i] += src[i] * multiplier,
  146. _mm_add_ps (d, _mm_mul_ps (mult, s)),
  147. JUCE_LOAD_SRC_DEST, JUCE_INCREMENT_SRC_DEST)
  148. }
  149. void FloatVectorOperations::multiply (float* dest, const float* src, int num) noexcept
  150. {
  151. #if JUCE_USE_VDSP_FRAMEWORK
  152. vDSP_vmul (src, 1, dest, 1, dest, 1, num);
  153. #else
  154. JUCE_PERFORM_SSE_OP_SRC_DEST (dest[i] *= src[i],
  155. _mm_mul_ps (d, s),
  156. JUCE_LOAD_SRC_DEST, JUCE_INCREMENT_SRC_DEST)
  157. #endif
  158. }
  159. void FloatVectorOperations::multiply (float* dest, float multiplier, int num) noexcept
  160. {
  161. #if JUCE_USE_VDSP_FRAMEWORK
  162. vDSP_vsmul (dest, 1, &multiplier, dest, 1, num);
  163. #else
  164. #if JUCE_USE_SSE_INTRINSICS
  165. const __m128 mult = _mm_load1_ps (&multiplier);
  166. #endif
  167. JUCE_PERFORM_SSE_OP_DEST (dest[i] *= multiplier,
  168. _mm_mul_ps (d, mult),
  169. JUCE_LOAD_DEST)
  170. #endif
  171. }
  172. void FloatVectorOperations::convertFixedToFloat (float* dest, const int* src, float multiplier, int num) noexcept
  173. {
  174. #if JUCE_USE_SSE_INTRINSICS
  175. const __m128 mult = _mm_load1_ps (&multiplier);
  176. #endif
  177. JUCE_PERFORM_SSE_OP_SRC_DEST (dest[i] = src[i] * multiplier,
  178. _mm_mul_ps (mult, _mm_cvtepi32_ps (_mm_loadu_si128 ((const __m128i*) src))),
  179. JUCE_LOAD_NONE, JUCE_INCREMENT_SRC_DEST)
  180. }
  181. void FloatVectorOperations::findMinAndMax (const float* src, int num, float& minResult, float& maxResult) noexcept
  182. {
  183. #if JUCE_USE_SSE_INTRINSICS
  184. const int numLongOps = num / 4;
  185. if (numLongOps > 1 && FloatVectorHelpers::isSSE2Available())
  186. {
  187. __m128 mn, mx;
  188. #define JUCE_MINMAX_SSE_LOOP(loadOp) \
  189. mn = loadOp (src); \
  190. mx = mn; \
  191. src += 4; \
  192. for (int i = 1; i < numLongOps; ++i) \
  193. { \
  194. const __m128 s = loadOp (src); \
  195. mn = _mm_min_ps (mn, s); \
  196. mx = _mm_max_ps (mx, s); \
  197. src += 4; \
  198. }
  199. if (FloatVectorHelpers::isAligned (src)) { JUCE_MINMAX_SSE_LOOP (_mm_load_ps) }
  200. else { JUCE_MINMAX_SSE_LOOP (_mm_loadu_ps) }
  201. float localMin, localMax;
  202. {
  203. float mns[4], mxs[4];
  204. _mm_storeu_ps (mns, mn);
  205. _mm_storeu_ps (mxs, mx);
  206. FloatVectorHelpers::mmEmpty();
  207. localMin = jmin (mns[0], mns[1], mns[2], mns[3]);
  208. localMax = jmax (mxs[0], mxs[1], mxs[2], mxs[3]);
  209. }
  210. num &= 3;
  211. if (num != 0)
  212. {
  213. for (int i = 0; i < num; ++i)
  214. {
  215. const float s = src[i];
  216. localMin = jmin (localMin, s);
  217. localMax = jmax (localMax, s);
  218. }
  219. }
  220. minResult = localMin;
  221. maxResult = localMax;
  222. return;
  223. }
  224. #endif
  225. juce::findMinAndMax (src, num, minResult, maxResult);
  226. }