The JUCE cross-platform C++ framework, with DISTRHO/KXStudio specific changes
You can not select more than 25 topics Topics must start with a letter or number, can include dashes ('-') and can be up to 35 characters long.

252 lines
8.7KB

  1. /*
  2. ==============================================================================
  3. This file is part of the JUCE library - "Jules' Utility Class Extensions"
  4. Copyright 2004-11 by Raw Material Software Ltd.
  5. ------------------------------------------------------------------------------
  6. JUCE can be redistributed and/or modified under the terms of the GNU General
  7. Public License (Version 2), as published by the Free Software Foundation.
  8. A copy of the license is included in the JUCE distribution, or can be found
  9. online at www.gnu.org/licenses.
  10. JUCE is distributed in the hope that it will be useful, but WITHOUT ANY
  11. WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS FOR
  12. A PARTICULAR PURPOSE. See the GNU General Public License for more details.
  13. ------------------------------------------------------------------------------
  14. To release a closed-source product which uses JUCE, commercial licenses are
  15. available: visit www.rawmaterialsoftware.com/juce for more information.
  16. ==============================================================================
  17. */
  18. #if JUCE_USE_SSE_INTRINSICS
  19. namespace FloatVectorHelpers
  20. {
  21. static bool sse2Present = false;
  22. static bool isSSE2Available() noexcept
  23. {
  24. if (sse2Present)
  25. return true;
  26. sse2Present = SystemStats::hasSSE2();
  27. return sse2Present;
  28. }
  29. inline static bool isAligned (const void* p) noexcept
  30. {
  31. return (((pointer_sized_int) p) & 15) == 0;
  32. }
  33. inline static void mmEmpty() noexcept
  34. {
  35. #if ! JUCE_64BIT
  36. _mm_empty();
  37. #endif
  38. }
  39. }
  40. #define JUCE_BEGIN_SSE_OP \
  41. if (FloatVectorHelpers::isSSE2Available()) \
  42. { \
  43. const int numLongOps = num / 4;
  44. #define JUCE_FINISH_SSE_OP(normalOp) \
  45. FloatVectorHelpers::mmEmpty(); \
  46. num &= 3; \
  47. if (num == 0) return; \
  48. } \
  49. for (int i = 0; i < num; ++i) normalOp;
  50. #define JUCE_SSE_LOOP(sseOp, srcLoad, dstLoad, dstStore, locals, increment) \
  51. for (int i = 0; i < numLongOps; ++i) \
  52. { \
  53. locals (srcLoad, dstLoad); \
  54. dstStore (dest, sseOp); \
  55. increment; \
  56. }
  57. #define JUCE_INCREMENT_SRC_DEST dest += 4; src += 4;
  58. #define JUCE_INCREMENT_DEST dest += 4;
  59. #define JUCE_LOAD_NONE(srcLoad, dstLoad)
  60. #define JUCE_LOAD_DEST(srcLoad, dstLoad) const __m128 d = dstLoad (dest);
  61. #define JUCE_LOAD_SRC(srcLoad, dstLoad) const __m128 s = srcLoad (src);
  62. #define JUCE_LOAD_SRC_DEST(srcLoad, dstLoad) const __m128 d = dstLoad (dest); const __m128 s = srcLoad (src);
  63. #define JUCE_PERFORM_SSE_OP_DEST(normalOp, sseOp, locals) \
  64. JUCE_BEGIN_SSE_OP \
  65. if (FloatVectorHelpers::isAligned (dest)) JUCE_SSE_LOOP (sseOp, dummy, _mm_load_ps, _mm_store_ps, locals, JUCE_INCREMENT_DEST) \
  66. else JUCE_SSE_LOOP (sseOp, dummy, _mm_loadu_ps, _mm_storeu_ps, locals, JUCE_INCREMENT_DEST) \
  67. JUCE_FINISH_SSE_OP (normalOp)
  68. #define JUCE_PERFORM_SSE_OP_SRC_DEST(normalOp, sseOp, locals, increment) \
  69. JUCE_BEGIN_SSE_OP \
  70. if (FloatVectorHelpers::isAligned (dest)) \
  71. { \
  72. if (FloatVectorHelpers::isAligned (src)) JUCE_SSE_LOOP (sseOp, _mm_load_ps, _mm_load_ps, _mm_store_ps, locals, increment) \
  73. else JUCE_SSE_LOOP (sseOp, _mm_loadu_ps, _mm_load_ps, _mm_store_ps, locals, increment) \
  74. }\
  75. else \
  76. { \
  77. if (FloatVectorHelpers::isAligned (src)) JUCE_SSE_LOOP (sseOp, _mm_load_ps, _mm_loadu_ps, _mm_storeu_ps, locals, increment) \
  78. else JUCE_SSE_LOOP (sseOp, _mm_loadu_ps, _mm_loadu_ps, _mm_storeu_ps, locals, increment) \
  79. } \
  80. JUCE_FINISH_SSE_OP (normalOp)
  81. #else
  82. #define JUCE_PERFORM_SSE_OP_DEST(normalOp, unused1) for (int i = 0; i < num; ++i) normalOp;
  83. #define JUCE_PERFORM_SSE_OP_SRC_DEST(normalOp, sseOp, locals, increment) for (int i = 0; i < num; ++i) normalOp;
  84. #endif
  85. void FloatVectorOperations::clear (float* dest, const int num) noexcept
  86. {
  87. zeromem (dest, num * sizeof (float));
  88. }
  89. void FloatVectorOperations::fill (float* dest, float valueToFill, int num) noexcept
  90. {
  91. #if JUCE_USE_SSE_INTRINSICS
  92. const __m128 val = _mm_load1_ps (&valueToFill);
  93. #endif
  94. JUCE_PERFORM_SSE_OP_DEST (dest[i] = valueToFill, val, JUCE_LOAD_NONE)
  95. }
  96. void FloatVectorOperations::copy (float* dest, const float* src, const int num) noexcept
  97. {
  98. memcpy (dest, src, num * sizeof (float));
  99. }
  100. void FloatVectorOperations::copyWithMultiply (float* dest, const float* src, float multiplier, int num) noexcept
  101. {
  102. #if JUCE_USE_SSE_INTRINSICS
  103. const __m128 mult = _mm_load1_ps (&multiplier);
  104. #endif
  105. JUCE_PERFORM_SSE_OP_SRC_DEST (dest[i] = src[i] * multiplier,
  106. _mm_mul_ps (mult, s),
  107. JUCE_LOAD_SRC, JUCE_INCREMENT_SRC_DEST)
  108. }
  109. void FloatVectorOperations::add (float* dest, const float* src, int num) noexcept
  110. {
  111. JUCE_PERFORM_SSE_OP_SRC_DEST (dest[i] += src[i],
  112. _mm_add_ps (d, s),
  113. JUCE_LOAD_SRC_DEST, JUCE_INCREMENT_SRC_DEST)
  114. }
  115. void FloatVectorOperations::add (float* dest, float amount, int num) noexcept
  116. {
  117. #if JUCE_USE_SSE_INTRINSICS
  118. const __m128 amountToAdd = _mm_load1_ps (&amount);
  119. #endif
  120. JUCE_PERFORM_SSE_OP_DEST (dest[i] += amount,
  121. _mm_add_ps (d, amountToAdd),
  122. JUCE_LOAD_DEST)
  123. }
  124. void FloatVectorOperations::addWithMultiply (float* dest, const float* src, float multiplier, int num) noexcept
  125. {
  126. #if JUCE_USE_SSE_INTRINSICS
  127. const __m128 mult = _mm_load1_ps (&multiplier);
  128. #endif
  129. JUCE_PERFORM_SSE_OP_SRC_DEST (dest[i] += src[i] * multiplier,
  130. _mm_add_ps (d, _mm_mul_ps (mult, s)),
  131. JUCE_LOAD_SRC_DEST, JUCE_INCREMENT_SRC_DEST)
  132. }
  133. void FloatVectorOperations::multiply (float* dest, const float* src, int num) noexcept
  134. {
  135. JUCE_PERFORM_SSE_OP_SRC_DEST (dest[i] *= src[i],
  136. _mm_mul_ps (d, s),
  137. JUCE_LOAD_SRC_DEST, JUCE_INCREMENT_SRC_DEST)
  138. }
  139. void FloatVectorOperations::multiply (float* dest, float multiplier, int num) noexcept
  140. {
  141. #if JUCE_USE_SSE_INTRINSICS
  142. const __m128 mult = _mm_load1_ps (&multiplier);
  143. #endif
  144. JUCE_PERFORM_SSE_OP_DEST (dest[i] *= multiplier,
  145. _mm_mul_ps (d, mult),
  146. JUCE_LOAD_DEST)
  147. }
  148. void FloatVectorOperations::convertFixedToFloat (float* dest, const int* src, float multiplier, int num) noexcept
  149. {
  150. #if JUCE_USE_SSE_INTRINSICS
  151. const __m128 mult = _mm_load1_ps (&multiplier);
  152. #endif
  153. JUCE_PERFORM_SSE_OP_SRC_DEST (dest[i] = src[i] * multiplier,
  154. _mm_mul_ps (mult, _mm_cvtepi32_ps (_mm_loadu_si128 ((const __m128i*) src))),
  155. JUCE_LOAD_NONE, JUCE_INCREMENT_SRC_DEST)
  156. }
  157. void FloatVectorOperations::findMinAndMax (const float* src, int num, float& minResult, float& maxResult) noexcept
  158. {
  159. #if JUCE_USE_SSE_INTRINSICS
  160. const int numLongOps = num / 4;
  161. if (numLongOps > 1 && FloatVectorHelpers::isSSE2Available())
  162. {
  163. __m128 mn, mx;
  164. #define JUCE_MINMAX_SSE_LOOP(loadOp) \
  165. mn = loadOp (src); \
  166. mx = mn; \
  167. src += 4; \
  168. for (int i = 1; i < numLongOps; ++i) \
  169. { \
  170. const __m128 s = loadOp (src); \
  171. mn = _mm_min_ps (mn, s); \
  172. mx = _mm_max_ps (mx, s); \
  173. src += 4; \
  174. }
  175. if (FloatVectorHelpers::isAligned (src)) { JUCE_MINMAX_SSE_LOOP (_mm_load_ps) }
  176. else { JUCE_MINMAX_SSE_LOOP (_mm_loadu_ps) }
  177. float localMin, localMax;
  178. {
  179. float mns[4], mxs[4];
  180. _mm_storeu_ps (mns, mn);
  181. _mm_storeu_ps (mxs, mx);
  182. FloatVectorHelpers::mmEmpty();
  183. localMin = jmin (mns[0], mns[1], mns[2], mns[3]);
  184. localMax = jmax (mxs[0], mxs[1], mxs[2], mxs[3]);
  185. }
  186. num &= 3;
  187. if (num != 0)
  188. {
  189. for (int i = 0; i < num; ++i)
  190. {
  191. const float s = src[i];
  192. localMin = jmin (localMin, s);
  193. localMax = jmax (localMax, s);
  194. }
  195. }
  196. minResult = localMin;
  197. maxResult = localMax;
  198. return;
  199. }
  200. #endif
  201. juce::findMinAndMax (src, num, minResult, maxResult);
  202. }