The JUCE cross-platform C++ framework, with DISTRHO/KXStudio specific changes
You can not select more than 25 topics Topics must start with a letter or number, can include dashes ('-') and can be up to 35 characters long.

346 lines
12KB

  1. /*
  2. ==============================================================================
  3. This file is part of the JUCE library - "Jules' Utility Class Extensions"
  4. Copyright 2004-11 by Raw Material Software Ltd.
  5. ------------------------------------------------------------------------------
  6. JUCE can be redistributed and/or modified under the terms of the GNU General
  7. Public License (Version 2), as published by the Free Software Foundation.
  8. A copy of the license is included in the JUCE distribution, or can be found
  9. online at www.gnu.org/licenses.
  10. JUCE is distributed in the hope that it will be useful, but WITHOUT ANY
  11. WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS FOR
  12. A PARTICULAR PURPOSE. See the GNU General Public License for more details.
  13. ------------------------------------------------------------------------------
  14. To release a closed-source product which uses JUCE, commercial licenses are
  15. available: visit www.rawmaterialsoftware.com/juce for more information.
  16. ==============================================================================
  17. */
  18. #if JUCE_USE_SSE_INTRINSICS
  19. namespace FloatVectorHelpers
  20. {
  21. static bool sse2Present = false;
  22. static bool isSSE2Available() noexcept
  23. {
  24. if (sse2Present)
  25. return true;
  26. sse2Present = SystemStats::hasSSE2();
  27. return sse2Present;
  28. }
  29. inline static bool isAligned (const void* p) noexcept
  30. {
  31. return (((pointer_sized_int) p) & 15) == 0;
  32. }
  33. inline static void mmEmpty() noexcept
  34. {
  35. #if ! JUCE_64BIT
  36. _mm_empty();
  37. #endif
  38. }
  39. static inline float findMinimumOrMaximum (const float* src, int num, const bool isMinimum) noexcept
  40. {
  41. #if JUCE_USE_SSE_INTRINSICS
  42. const int numLongOps = num / 4;
  43. if (numLongOps > 1 && FloatVectorHelpers::isSSE2Available())
  44. {
  45. __m128 val;
  46. #define JUCE_MINIMUMMAXIMUM_SSE_LOOP(loadOp, minMaxOp) \
  47. val = loadOp (src); \
  48. src += 4; \
  49. for (int i = 1; i < numLongOps; ++i) \
  50. { \
  51. const __m128 s = loadOp (src); \
  52. val = minMaxOp (val, s); \
  53. src += 4; \
  54. }
  55. if (isMinimum)
  56. {
  57. if (FloatVectorHelpers::isAligned (src)) { JUCE_MINIMUMMAXIMUM_SSE_LOOP (_mm_load_ps, _mm_min_ps) }
  58. else { JUCE_MINIMUMMAXIMUM_SSE_LOOP (_mm_loadu_ps, _mm_min_ps) }
  59. }
  60. else
  61. {
  62. if (FloatVectorHelpers::isAligned (src)) { JUCE_MINIMUMMAXIMUM_SSE_LOOP (_mm_load_ps, _mm_max_ps) }
  63. else { JUCE_MINIMUMMAXIMUM_SSE_LOOP (_mm_loadu_ps,_mm_max_ps) }
  64. }
  65. float localVal;
  66. {
  67. float vals[4];
  68. _mm_storeu_ps (vals, val);
  69. FloatVectorHelpers::mmEmpty();
  70. localVal = isMinimum ? jmin (vals[0], vals[1], vals[2], vals[3])
  71. : jmax (vals[0], vals[1], vals[2], vals[3]);
  72. }
  73. num &= 3;
  74. for (int i = 0; i < num; ++i)
  75. localVal = isMinimum ? jmin (localVal, src[i])
  76. : jmax (localVal, src[i]);
  77. return localVal;
  78. }
  79. #endif
  80. return isMinimum ? juce::findMinimum (src, num)
  81. : juce::findMaximum (src, num);
  82. }
  83. }
  84. #define JUCE_BEGIN_SSE_OP \
  85. if (FloatVectorHelpers::isSSE2Available()) \
  86. { \
  87. const int numLongOps = num / 4;
  88. #define JUCE_FINISH_SSE_OP(normalOp) \
  89. FloatVectorHelpers::mmEmpty(); \
  90. num &= 3; \
  91. if (num == 0) return; \
  92. } \
  93. for (int i = 0; i < num; ++i) normalOp;
  94. #define JUCE_SSE_LOOP(sseOp, srcLoad, dstLoad, dstStore, locals, increment) \
  95. for (int i = 0; i < numLongOps; ++i) \
  96. { \
  97. locals (srcLoad, dstLoad); \
  98. dstStore (dest, sseOp); \
  99. increment; \
  100. }
  101. #define JUCE_INCREMENT_SRC_DEST dest += 4; src += 4;
  102. #define JUCE_INCREMENT_DEST dest += 4;
  103. #define JUCE_LOAD_NONE(srcLoad, dstLoad)
  104. #define JUCE_LOAD_DEST(srcLoad, dstLoad) const __m128 d = dstLoad (dest);
  105. #define JUCE_LOAD_SRC(srcLoad, dstLoad) const __m128 s = srcLoad (src);
  106. #define JUCE_LOAD_SRC_DEST(srcLoad, dstLoad) const __m128 d = dstLoad (dest); const __m128 s = srcLoad (src);
  107. #define JUCE_PERFORM_SSE_OP_DEST(normalOp, sseOp, locals) \
  108. JUCE_BEGIN_SSE_OP \
  109. if (FloatVectorHelpers::isAligned (dest)) JUCE_SSE_LOOP (sseOp, dummy, _mm_load_ps, _mm_store_ps, locals, JUCE_INCREMENT_DEST) \
  110. else JUCE_SSE_LOOP (sseOp, dummy, _mm_loadu_ps, _mm_storeu_ps, locals, JUCE_INCREMENT_DEST) \
  111. JUCE_FINISH_SSE_OP (normalOp)
  112. #define JUCE_PERFORM_SSE_OP_SRC_DEST(normalOp, sseOp, locals, increment) \
  113. JUCE_BEGIN_SSE_OP \
  114. if (FloatVectorHelpers::isAligned (dest)) \
  115. { \
  116. if (FloatVectorHelpers::isAligned (src)) JUCE_SSE_LOOP (sseOp, _mm_load_ps, _mm_load_ps, _mm_store_ps, locals, increment) \
  117. else JUCE_SSE_LOOP (sseOp, _mm_loadu_ps, _mm_load_ps, _mm_store_ps, locals, increment) \
  118. }\
  119. else \
  120. { \
  121. if (FloatVectorHelpers::isAligned (src)) JUCE_SSE_LOOP (sseOp, _mm_load_ps, _mm_loadu_ps, _mm_storeu_ps, locals, increment) \
  122. else JUCE_SSE_LOOP (sseOp, _mm_loadu_ps, _mm_loadu_ps, _mm_storeu_ps, locals, increment) \
  123. } \
  124. JUCE_FINISH_SSE_OP (normalOp)
  125. #else
  126. #define JUCE_PERFORM_SSE_OP_DEST(normalOp, unused1, unused2) for (int i = 0; i < num; ++i) normalOp;
  127. #define JUCE_PERFORM_SSE_OP_SRC_DEST(normalOp, sseOp, locals, increment) for (int i = 0; i < num; ++i) normalOp;
  128. #endif
  129. void JUCE_CALLTYPE FloatVectorOperations::clear (float* dest, int num) noexcept
  130. {
  131. #if JUCE_USE_VDSP_FRAMEWORK
  132. vDSP_vclr (dest, 1, num);
  133. #else
  134. zeromem (dest, num * sizeof (float));
  135. #endif
  136. }
  137. void JUCE_CALLTYPE FloatVectorOperations::fill (float* dest, float valueToFill, int num) noexcept
  138. {
  139. #if JUCE_USE_VDSP_FRAMEWORK
  140. vDSP_vfill (&valueToFill, dest, 1, num);
  141. #else
  142. #if JUCE_USE_SSE_INTRINSICS
  143. const __m128 val = _mm_load1_ps (&valueToFill);
  144. #endif
  145. JUCE_PERFORM_SSE_OP_DEST (dest[i] = valueToFill, val, JUCE_LOAD_NONE)
  146. #endif
  147. }
  148. void JUCE_CALLTYPE FloatVectorOperations::copy (float* dest, const float* src, int num) noexcept
  149. {
  150. memcpy (dest, src, num * sizeof (float));
  151. }
  152. void JUCE_CALLTYPE FloatVectorOperations::copyWithMultiply (float* dest, const float* src, float multiplier, int num) noexcept
  153. {
  154. #if JUCE_USE_VDSP_FRAMEWORK
  155. vDSP_vsmul (src, 1, &multiplier, dest, 1, num);
  156. #else
  157. #if JUCE_USE_SSE_INTRINSICS
  158. const __m128 mult = _mm_load1_ps (&multiplier);
  159. #endif
  160. JUCE_PERFORM_SSE_OP_SRC_DEST (dest[i] = src[i] * multiplier,
  161. _mm_mul_ps (mult, s),
  162. JUCE_LOAD_SRC, JUCE_INCREMENT_SRC_DEST)
  163. #endif
  164. }
  165. void JUCE_CALLTYPE FloatVectorOperations::add (float* dest, const float* src, int num) noexcept
  166. {
  167. #if JUCE_USE_VDSP_FRAMEWORK
  168. vDSP_vadd (src, 1, dest, 1, dest, 1, num);
  169. #else
  170. JUCE_PERFORM_SSE_OP_SRC_DEST (dest[i] += src[i],
  171. _mm_add_ps (d, s),
  172. JUCE_LOAD_SRC_DEST, JUCE_INCREMENT_SRC_DEST)
  173. #endif
  174. }
  175. void JUCE_CALLTYPE FloatVectorOperations::add (float* dest, float amount, int num) noexcept
  176. {
  177. #if JUCE_USE_SSE_INTRINSICS
  178. const __m128 amountToAdd = _mm_load1_ps (&amount);
  179. #endif
  180. JUCE_PERFORM_SSE_OP_DEST (dest[i] += amount,
  181. _mm_add_ps (d, amountToAdd),
  182. JUCE_LOAD_DEST)
  183. }
  184. void JUCE_CALLTYPE FloatVectorOperations::addWithMultiply (float* dest, const float* src, float multiplier, int num) noexcept
  185. {
  186. #if JUCE_USE_SSE_INTRINSICS
  187. const __m128 mult = _mm_load1_ps (&multiplier);
  188. #endif
  189. JUCE_PERFORM_SSE_OP_SRC_DEST (dest[i] += src[i] * multiplier,
  190. _mm_add_ps (d, _mm_mul_ps (mult, s)),
  191. JUCE_LOAD_SRC_DEST, JUCE_INCREMENT_SRC_DEST)
  192. }
  193. void JUCE_CALLTYPE FloatVectorOperations::multiply (float* dest, const float* src, int num) noexcept
  194. {
  195. #if JUCE_USE_VDSP_FRAMEWORK
  196. vDSP_vmul (src, 1, dest, 1, dest, 1, num);
  197. #else
  198. JUCE_PERFORM_SSE_OP_SRC_DEST (dest[i] *= src[i],
  199. _mm_mul_ps (d, s),
  200. JUCE_LOAD_SRC_DEST, JUCE_INCREMENT_SRC_DEST)
  201. #endif
  202. }
  203. void JUCE_CALLTYPE FloatVectorOperations::multiply (float* dest, float multiplier, int num) noexcept
  204. {
  205. #if JUCE_USE_VDSP_FRAMEWORK
  206. vDSP_vsmul (dest, 1, &multiplier, dest, 1, num);
  207. #else
  208. #if JUCE_USE_SSE_INTRINSICS
  209. const __m128 mult = _mm_load1_ps (&multiplier);
  210. #endif
  211. JUCE_PERFORM_SSE_OP_DEST (dest[i] *= multiplier,
  212. _mm_mul_ps (d, mult),
  213. JUCE_LOAD_DEST)
  214. #endif
  215. }
  216. void JUCE_CALLTYPE FloatVectorOperations::convertFixedToFloat (float* dest, const int* src, float multiplier, int num) noexcept
  217. {
  218. #if JUCE_USE_SSE_INTRINSICS
  219. const __m128 mult = _mm_load1_ps (&multiplier);
  220. #endif
  221. JUCE_PERFORM_SSE_OP_SRC_DEST (dest[i] = src[i] * multiplier,
  222. _mm_mul_ps (mult, _mm_cvtepi32_ps (_mm_loadu_si128 ((const __m128i*) src))),
  223. JUCE_LOAD_NONE, JUCE_INCREMENT_SRC_DEST)
  224. }
  225. void JUCE_CALLTYPE FloatVectorOperations::findMinAndMax (const float* src, int num, float& minResult, float& maxResult) noexcept
  226. {
  227. #if JUCE_USE_SSE_INTRINSICS
  228. const int numLongOps = num / 4;
  229. if (numLongOps > 1 && FloatVectorHelpers::isSSE2Available())
  230. {
  231. __m128 mn, mx;
  232. #define JUCE_MINMAX_SSE_LOOP(loadOp) \
  233. mn = loadOp (src); \
  234. mx = mn; \
  235. src += 4; \
  236. for (int i = 1; i < numLongOps; ++i) \
  237. { \
  238. const __m128 s = loadOp (src); \
  239. mn = _mm_min_ps (mn, s); \
  240. mx = _mm_max_ps (mx, s); \
  241. src += 4; \
  242. }
  243. if (FloatVectorHelpers::isAligned (src)) { JUCE_MINMAX_SSE_LOOP (_mm_load_ps) }
  244. else { JUCE_MINMAX_SSE_LOOP (_mm_loadu_ps) }
  245. float localMin, localMax;
  246. {
  247. float mns[4], mxs[4];
  248. _mm_storeu_ps (mns, mn);
  249. _mm_storeu_ps (mxs, mx);
  250. FloatVectorHelpers::mmEmpty();
  251. localMin = jmin (mns[0], mns[1], mns[2], mns[3]);
  252. localMax = jmax (mxs[0], mxs[1], mxs[2], mxs[3]);
  253. }
  254. num &= 3;
  255. for (int i = 0; i < num; ++i)
  256. {
  257. const float s = src[i];
  258. localMin = jmin (localMin, s);
  259. localMax = jmax (localMax, s);
  260. }
  261. minResult = localMin;
  262. maxResult = localMax;
  263. return;
  264. }
  265. #endif
  266. juce::findMinAndMax (src, num, minResult, maxResult);
  267. }
  268. float JUCE_CALLTYPE FloatVectorOperations::findMinimum (const float* src, int num) noexcept
  269. {
  270. #if JUCE_USE_SSE_INTRINSICS
  271. return FloatVectorHelpers::findMinimumOrMaximum (src, num, true);
  272. #else
  273. return juce::findMinimum (src, num);
  274. #endif
  275. }
  276. float JUCE_CALLTYPE FloatVectorOperations::findMaximum (const float* src, int num) noexcept
  277. {
  278. #if JUCE_USE_SSE_INTRINSICS
  279. return FloatVectorHelpers::findMinimumOrMaximum (src, num, false);
  280. #else
  281. return juce::findMaximum (src, num);
  282. #endif
  283. }