The JUCE cross-platform C++ framework, with DISTRHO/KXStudio specific changes
You can not select more than 25 topics Topics must start with a letter or number, can include dashes ('-') and can be up to 35 characters long.

235 lines
8.3KB

  1. /*
  2. ==============================================================================
  3. This file is part of the JUCE library - "Jules' Utility Class Extensions"
  4. Copyright 2004-11 by Raw Material Software Ltd.
  5. ------------------------------------------------------------------------------
  6. JUCE can be redistributed and/or modified under the terms of the GNU General
  7. Public License (Version 2), as published by the Free Software Foundation.
  8. A copy of the license is included in the JUCE distribution, or can be found
  9. online at www.gnu.org/licenses.
  10. JUCE is distributed in the hope that it will be useful, but WITHOUT ANY
  11. WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS FOR
  12. A PARTICULAR PURPOSE. See the GNU General Public License for more details.
  13. ------------------------------------------------------------------------------
  14. To release a closed-source product which uses JUCE, commercial licenses are
  15. available: visit www.rawmaterialsoftware.com/juce for more information.
  16. ==============================================================================
  17. */
  18. #if JUCE_USE_SSE_INTRINSICS
  19. namespace FloatVectorHelpers
  20. {
  21. static bool sse2Present = false;
  22. static bool isSSE2Available()
  23. {
  24. if (sse2Present)
  25. return true;
  26. sse2Present = SystemStats::hasSSE2();
  27. return sse2Present;
  28. }
  29. inline static bool isAligned (const void* p)
  30. {
  31. return (((pointer_sized_int) p) & 15) == 0;
  32. }
  33. }
  34. #define JUCE_BEGIN_SSE_OP \
  35. if (FloatVectorHelpers::isSSE2Available()) \
  36. { \
  37. const int numLongOps = num / 4;
  38. #define JUCE_FINISH_SSE_OP(normalOp) \
  39. _mm_empty(); \
  40. num &= 3; \
  41. if (num == 0) return; \
  42. } \
  43. for (int i = 0; i < num; ++i) normalOp;
  44. #define JUCE_SSE_LOOP(sseOp, srcLoad, dstLoad, dstStore, locals, increment) \
  45. for (int i = 0; i < numLongOps; ++i) \
  46. { \
  47. locals (srcLoad, dstLoad); \
  48. dstStore (dest, sseOp); \
  49. increment; \
  50. }
  51. #define JUCE_INCREMENT_SRC_DEST dest += 4; src += 4;
  52. #define JUCE_INCREMENT_DEST dest += 4;
  53. #define JUCE_LOAD_NONE(srcLoad, dstLoad)
  54. #define JUCE_LOAD_DEST(srcLoad, dstLoad) const __m128 d = dstLoad (dest);
  55. #define JUCE_LOAD_SRC(srcLoad, dstLoad) const __m128 s = srcLoad (src);
  56. #define JUCE_LOAD_SRC_DEST(srcLoad, dstLoad) const __m128 d = dstLoad (dest); const __m128 s = srcLoad (src);
  57. #define JUCE_PERFORM_SSE_OP_DEST(normalOp, sseOp) \
  58. JUCE_BEGIN_SSE_OP \
  59. if (FloatVectorHelpers::isAligned (dest)) JUCE_SSE_LOOP (sseOp, dummy, _mm_load_ps, _mm_store_ps, JUCE_LOAD_DEST, JUCE_INCREMENT_DEST) \
  60. else JUCE_SSE_LOOP (sseOp, dummy, _mm_loadu_ps, _mm_storeu_ps, JUCE_LOAD_DEST, JUCE_INCREMENT_DEST) \
  61. JUCE_FINISH_SSE_OP (normalOp)
  62. #define JUCE_PERFORM_SSE_OP_SRC_DEST(normalOp, sseOp, locals, increment) \
  63. JUCE_BEGIN_SSE_OP \
  64. if (FloatVectorHelpers::isAligned (dest)) \
  65. { \
  66. if (FloatVectorHelpers::isAligned (src)) JUCE_SSE_LOOP (sseOp, _mm_load_ps, _mm_load_ps, _mm_store_ps, locals, increment) \
  67. else JUCE_SSE_LOOP (sseOp, _mm_loadu_ps, _mm_load_ps, _mm_store_ps, locals, increment) \
  68. }\
  69. else \
  70. { \
  71. if (FloatVectorHelpers::isAligned (src)) JUCE_SSE_LOOP (sseOp, _mm_load_ps, _mm_loadu_ps, _mm_storeu_ps, locals, increment) \
  72. else JUCE_SSE_LOOP (sseOp, _mm_loadu_ps, _mm_loadu_ps, _mm_storeu_ps, locals, increment) \
  73. } \
  74. JUCE_FINISH_SSE_OP (normalOp)
  75. #else
  76. #define JUCE_PERFORM_SSE_OP_DEST(normalOp, unused1) for (int i = 0; i < num; ++i) normalOp;
  77. #define JUCE_PERFORM_SSE_OP_SRC_DEST(normalOp, sseOp, locals, increment) for (int i = 0; i < num; ++i) normalOp;
  78. #endif
  79. void FloatVectorOperations::clear (float* dest, const int num) noexcept
  80. {
  81. zeromem (dest, num * sizeof (float));
  82. }
  83. void FloatVectorOperations::copy (float* dest, const float* src, const int num) noexcept
  84. {
  85. memcpy (dest, src, num * sizeof (float));
  86. }
  87. void FloatVectorOperations::copyWithMultiply (float* dest, const float* src, float multiplier, int num) noexcept
  88. {
  89. #if JUCE_USE_SSE_INTRINSICS
  90. const __m128 mult = _mm_load1_ps (&multiplier);
  91. #endif
  92. JUCE_PERFORM_SSE_OP_SRC_DEST (dest[i] = src[i] * multiplier,
  93. _mm_mul_ps (mult, s),
  94. JUCE_LOAD_SRC, JUCE_INCREMENT_SRC_DEST)
  95. }
  96. void FloatVectorOperations::add (float* dest, const float* src, int num) noexcept
  97. {
  98. JUCE_PERFORM_SSE_OP_SRC_DEST (dest[i] += src[i],
  99. _mm_add_ps (d, s),
  100. JUCE_LOAD_SRC_DEST, JUCE_INCREMENT_SRC_DEST)
  101. }
  102. void FloatVectorOperations::add (float* dest, float amount, int num) noexcept
  103. {
  104. #if JUCE_USE_SSE_INTRINSICS
  105. const __m128 amountToAdd = _mm_load1_ps (&amount);
  106. #endif
  107. JUCE_PERFORM_SSE_OP_DEST (dest[i] += amount,
  108. _mm_add_ps (d, amountToAdd))
  109. }
  110. void FloatVectorOperations::addWithMultiply (float* dest, const float* src, float multiplier, int num) noexcept
  111. {
  112. #if JUCE_USE_SSE_INTRINSICS
  113. const __m128 mult = _mm_load1_ps (&multiplier);
  114. #endif
  115. JUCE_PERFORM_SSE_OP_SRC_DEST (dest[i] += src[i] * multiplier,
  116. _mm_add_ps (d, _mm_mul_ps (mult, s)),
  117. JUCE_LOAD_SRC_DEST, JUCE_INCREMENT_SRC_DEST)
  118. }
  119. void FloatVectorOperations::multiply (float* dest, const float* src, int num) noexcept
  120. {
  121. JUCE_PERFORM_SSE_OP_SRC_DEST (dest[i] *= src[i],
  122. _mm_mul_ps (d, s),
  123. JUCE_LOAD_SRC_DEST, JUCE_INCREMENT_SRC_DEST)
  124. }
  125. void FloatVectorOperations::multiply (float* dest, float multiplier, int num) noexcept
  126. {
  127. #if JUCE_USE_SSE_INTRINSICS
  128. const __m128 mult = _mm_load1_ps (&multiplier);
  129. #endif
  130. JUCE_PERFORM_SSE_OP_DEST (dest[i] *= multiplier,
  131. _mm_mul_ps (d, mult))
  132. }
  133. void FloatVectorOperations::convertFixedToFloat (float* dest, const int* src, float multiplier, int num) noexcept
  134. {
  135. #if JUCE_USE_SSE_INTRINSICS
  136. const __m128 mult = _mm_load1_ps (&multiplier);
  137. #endif
  138. JUCE_PERFORM_SSE_OP_SRC_DEST (dest[i] = src[i] * multiplier,
  139. _mm_mul_ps (mult, _mm_movelh_ps (_mm_cvt_pi2ps (_mm_setzero_ps(), ((const __m64*) src)[0]),
  140. _mm_cvt_pi2ps (_mm_setzero_ps(), ((const __m64*) src)[1]))),
  141. JUCE_LOAD_NONE, JUCE_INCREMENT_SRC_DEST)
  142. }
  143. void FloatVectorOperations::findMinAndMax (const float* src, int num, float& minResult, float& maxResult) noexcept
  144. {
  145. #if JUCE_USE_SSE_INTRINSICS
  146. const int numLongOps = num / 4;
  147. if (numLongOps > 1 && FloatVectorHelpers::isSSE2Available())
  148. {
  149. __m128 mn, mx;
  150. #define JUCE_MINMAX_SSE_LOOP(loadOp) \
  151. mn = loadOp (src); \
  152. mx = mn; \
  153. src += 4; \
  154. for (int i = 1; i < numLongOps; ++i) \
  155. { \
  156. const __m128 s = loadOp (src); \
  157. mn = _mm_min_ps (mn, s); \
  158. mx = _mm_max_ps (mx, s); \
  159. src += 4; \
  160. }
  161. if (FloatVectorHelpers::isAligned (src)) { JUCE_MINMAX_SSE_LOOP (_mm_load_ps) }
  162. else { JUCE_MINMAX_SSE_LOOP (_mm_loadu_ps) }
  163. float localMin, localMax;
  164. {
  165. float mns[4], mxs[4];
  166. _mm_storeu_ps (mns, mn);
  167. _mm_storeu_ps (mxs, mx);
  168. _mm_empty();
  169. localMin = jmin (mns[0], mns[1], mns[2], mns[3]);
  170. localMax = jmax (mxs[0], mxs[1], mxs[2], mxs[3]);
  171. }
  172. num &= 3;
  173. if (num != 0)
  174. {
  175. for (int i = 0; i < num; ++i)
  176. {
  177. const float s = src[i];
  178. localMin = jmin (localMin, s);
  179. localMax = jmax (localMax, s);
  180. }
  181. }
  182. minResult = localMin;
  183. maxResult = localMax;
  184. return;
  185. }
  186. #endif
  187. juce::findMinAndMax (src, num, minResult, maxResult);
  188. }