The JUCE cross-platform C++ framework, with DISTRHO/KXStudio specific changes
You can not select more than 25 topics Topics must start with a letter or number, can include dashes ('-') and can be up to 35 characters long.

989 lines
34KB

  1. /*
  2. ==============================================================================
  3. This file is part of the JUCE library.
  4. Copyright (c) 2020 - Raw Material Software Limited
  5. JUCE is an open source library subject to commercial or open-source
  6. licensing.
  7. By using JUCE, you agree to the terms of both the JUCE 6 End-User License
  8. Agreement and JUCE Privacy Policy (both effective as of the 16th June 2020).
  9. End User License Agreement: www.juce.com/juce-6-licence
  10. Privacy Policy: www.juce.com/juce-privacy-policy
  11. Or: You may also use this code under the terms of the GPL v3 (see
  12. www.gnu.org/licenses).
  13. JUCE IS PROVIDED "AS IS" WITHOUT ANY WARRANTY, AND ALL WARRANTIES, WHETHER
  14. EXPRESSED OR IMPLIED, INCLUDING MERCHANTABILITY AND FITNESS FOR PURPOSE, ARE
  15. DISCLAIMED.
  16. ==============================================================================
  17. */
  18. namespace juce
  19. {
  20. namespace dsp
  21. {
  22. struct FFT::Instance
  23. {
  24. virtual ~Instance() = default;
  25. virtual void perform (const Complex<float>* input, Complex<float>* output, bool inverse) const noexcept = 0;
  26. virtual void performRealOnlyForwardTransform (float*, bool) const noexcept = 0;
  27. virtual void performRealOnlyInverseTransform (float*) const noexcept = 0;
  28. };
  29. struct FFT::Engine
  30. {
  31. Engine (int priorityToUse) : enginePriority (priorityToUse)
  32. {
  33. auto& list = getEngines();
  34. list.add (this);
  35. std::sort (list.begin(), list.end(), [] (Engine* a, Engine* b) { return b->enginePriority < a->enginePriority; });
  36. }
  37. virtual ~Engine() = default;
  38. virtual FFT::Instance* create (int order) const = 0;
  39. //==============================================================================
  40. static FFT::Instance* createBestEngineForPlatform (int order)
  41. {
  42. for (auto* engine : getEngines())
  43. if (auto* instance = engine->create (order))
  44. return instance;
  45. jassertfalse; // This should never happen as the fallback engine should always work!
  46. return nullptr;
  47. }
  48. private:
  49. static Array<Engine*>& getEngines()
  50. {
  51. static Array<Engine*> engines;
  52. return engines;
  53. }
  54. int enginePriority; // used so that faster engines have priority over slower ones
  55. };
  56. template <typename InstanceToUse>
  57. struct FFT::EngineImpl : public FFT::Engine
  58. {
  59. EngineImpl() : FFT::Engine (InstanceToUse::priority) {}
  60. FFT::Instance* create (int order) const override { return InstanceToUse::create (order); }
  61. };
  62. //==============================================================================
  63. //==============================================================================
  64. struct FFTFallback : public FFT::Instance
  65. {
  66. // this should have the least priority of all engines
  67. static constexpr int priority = -1;
  68. static FFTFallback* create (int order)
  69. {
  70. return new FFTFallback (order);
  71. }
  72. FFTFallback (int order)
  73. {
  74. configForward.reset (new FFTConfig (1 << order, false));
  75. configInverse.reset (new FFTConfig (1 << order, true));
  76. size = 1 << order;
  77. }
  78. void perform (const Complex<float>* input, Complex<float>* output, bool inverse) const noexcept override
  79. {
  80. if (size == 1)
  81. {
  82. *output = *input;
  83. return;
  84. }
  85. const SpinLock::ScopedLockType sl(processLock);
  86. jassert (configForward != nullptr);
  87. if (inverse)
  88. {
  89. configInverse->perform (input, output);
  90. const float scaleFactor = 1.0f / (float) size;
  91. for (int i = 0; i < size; ++i)
  92. output[i] *= scaleFactor;
  93. }
  94. else
  95. {
  96. configForward->perform (input, output);
  97. }
  98. }
  99. const size_t maxFFTScratchSpaceToAlloca = 256 * 1024;
  100. void performRealOnlyForwardTransform (float* d, bool) const noexcept override
  101. {
  102. if (size == 1)
  103. return;
  104. const size_t scratchSize = 16 + (size_t) size * sizeof (Complex<float>);
  105. if (scratchSize < maxFFTScratchSpaceToAlloca)
  106. {
  107. performRealOnlyForwardTransform (static_cast<Complex<float>*> (alloca (scratchSize)), d);
  108. }
  109. else
  110. {
  111. HeapBlock<char> heapSpace (scratchSize);
  112. performRealOnlyForwardTransform (unalignedPointerCast<Complex<float>*> (heapSpace.getData()), d);
  113. }
  114. }
  115. void performRealOnlyInverseTransform (float* d) const noexcept override
  116. {
  117. if (size == 1)
  118. return;
  119. const size_t scratchSize = 16 + (size_t) size * sizeof (Complex<float>);
  120. if (scratchSize < maxFFTScratchSpaceToAlloca)
  121. {
  122. performRealOnlyInverseTransform (static_cast<Complex<float>*> (alloca (scratchSize)), d);
  123. }
  124. else
  125. {
  126. HeapBlock<char> heapSpace (scratchSize);
  127. performRealOnlyInverseTransform (unalignedPointerCast<Complex<float>*> (heapSpace.getData()), d);
  128. }
  129. }
  130. void performRealOnlyForwardTransform (Complex<float>* scratch, float* d) const noexcept
  131. {
  132. for (int i = 0; i < size; ++i)
  133. scratch[i] = { d[i], 0 };
  134. perform (scratch, reinterpret_cast<Complex<float>*> (d), false);
  135. }
  136. void performRealOnlyInverseTransform (Complex<float>* scratch, float* d) const noexcept
  137. {
  138. auto* input = reinterpret_cast<Complex<float>*> (d);
  139. for (int i = size >> 1; i < size; ++i)
  140. input[i] = std::conj (input[size - i]);
  141. perform (input, scratch, true);
  142. for (int i = 0; i < size; ++i)
  143. {
  144. d[i] = scratch[i].real();
  145. d[i + size] = scratch[i].imag();
  146. }
  147. }
  148. //==============================================================================
  149. struct FFTConfig
  150. {
  151. FFTConfig (int sizeOfFFT, bool isInverse)
  152. : fftSize (sizeOfFFT), inverse (isInverse), twiddleTable ((size_t) sizeOfFFT)
  153. {
  154. auto inverseFactor = (inverse ? 2.0 : -2.0) * MathConstants<double>::pi / (double) fftSize;
  155. if (fftSize <= 4)
  156. {
  157. for (int i = 0; i < fftSize; ++i)
  158. {
  159. auto phase = i * inverseFactor;
  160. twiddleTable[i] = { (float) std::cos (phase),
  161. (float) std::sin (phase) };
  162. }
  163. }
  164. else
  165. {
  166. for (int i = 0; i < fftSize / 4; ++i)
  167. {
  168. auto phase = i * inverseFactor;
  169. twiddleTable[i] = { (float) std::cos (phase),
  170. (float) std::sin (phase) };
  171. }
  172. for (int i = fftSize / 4; i < fftSize / 2; ++i)
  173. {
  174. auto other = twiddleTable[i - fftSize / 4];
  175. twiddleTable[i] = { inverse ? -other.imag() : other.imag(),
  176. inverse ? other.real() : -other.real() };
  177. }
  178. twiddleTable[fftSize / 2].real (-1.0f);
  179. twiddleTable[fftSize / 2].imag (0.0f);
  180. for (int i = fftSize / 2; i < fftSize; ++i)
  181. {
  182. auto index = fftSize / 2 - (i - fftSize / 2);
  183. twiddleTable[i] = conj(twiddleTable[index]);
  184. }
  185. }
  186. auto root = (int) std::sqrt ((double) fftSize);
  187. int divisor = 4, n = fftSize;
  188. for (int i = 0; i < numElementsInArray (factors); ++i)
  189. {
  190. while ((n % divisor) != 0)
  191. {
  192. if (divisor == 2) divisor = 3;
  193. else if (divisor == 4) divisor = 2;
  194. else divisor += 2;
  195. if (divisor > root)
  196. divisor = n;
  197. }
  198. n /= divisor;
  199. jassert (divisor == 1 || divisor == 2 || divisor == 4);
  200. factors[i].radix = divisor;
  201. factors[i].length = n;
  202. }
  203. }
  204. void perform (const Complex<float>* input, Complex<float>* output) const noexcept
  205. {
  206. perform (input, output, 1, 1, factors);
  207. }
  208. const int fftSize;
  209. const bool inverse;
  210. struct Factor { int radix, length; };
  211. Factor factors[32];
  212. HeapBlock<Complex<float>> twiddleTable;
  213. void perform (const Complex<float>* input, Complex<float>* output, int stride, int strideIn, const Factor* facs) const noexcept
  214. {
  215. auto factor = *facs++;
  216. auto* originalOutput = output;
  217. auto* outputEnd = output + factor.radix * factor.length;
  218. if (stride == 1 && factor.radix <= 5)
  219. {
  220. for (int i = 0; i < factor.radix; ++i)
  221. perform (input + stride * strideIn * i, output + i * factor.length, stride * factor.radix, strideIn, facs);
  222. butterfly (factor, output, stride);
  223. return;
  224. }
  225. if (factor.length == 1)
  226. {
  227. do
  228. {
  229. *output++ = *input;
  230. input += stride * strideIn;
  231. }
  232. while (output < outputEnd);
  233. }
  234. else
  235. {
  236. do
  237. {
  238. perform (input, output, stride * factor.radix, strideIn, facs);
  239. input += stride * strideIn;
  240. output += factor.length;
  241. }
  242. while (output < outputEnd);
  243. }
  244. butterfly (factor, originalOutput, stride);
  245. }
  246. void butterfly (const Factor factor, Complex<float>* data, int stride) const noexcept
  247. {
  248. switch (factor.radix)
  249. {
  250. case 1: break;
  251. case 2: butterfly2 (data, stride, factor.length); return;
  252. case 4: butterfly4 (data, stride, factor.length); return;
  253. default: jassertfalse; break;
  254. }
  255. auto* scratch = static_cast<Complex<float>*> (alloca ((size_t) factor.radix * sizeof (Complex<float>)));
  256. for (int i = 0; i < factor.length; ++i)
  257. {
  258. for (int k = i, q1 = 0; q1 < factor.radix; ++q1)
  259. {
  260. scratch[q1] = data[k];
  261. k += factor.length;
  262. }
  263. for (int k = i, q1 = 0; q1 < factor.radix; ++q1)
  264. {
  265. int twiddleIndex = 0;
  266. data[k] = scratch[0];
  267. for (int q = 1; q < factor.radix; ++q)
  268. {
  269. twiddleIndex += stride * k;
  270. if (twiddleIndex >= fftSize)
  271. twiddleIndex -= fftSize;
  272. data[k] += scratch[q] * twiddleTable[twiddleIndex];
  273. }
  274. k += factor.length;
  275. }
  276. }
  277. }
  278. void butterfly2 (Complex<float>* data, const int stride, const int length) const noexcept
  279. {
  280. auto* dataEnd = data + length;
  281. auto* tw = twiddleTable.getData();
  282. for (int i = length; --i >= 0;)
  283. {
  284. auto s = *dataEnd;
  285. s *= (*tw);
  286. tw += stride;
  287. *dataEnd++ = *data - s;
  288. *data++ += s;
  289. }
  290. }
  291. void butterfly4 (Complex<float>* data, const int stride, const int length) const noexcept
  292. {
  293. auto lengthX2 = length * 2;
  294. auto lengthX3 = length * 3;
  295. auto strideX2 = stride * 2;
  296. auto strideX3 = stride * 3;
  297. auto* twiddle1 = twiddleTable.getData();
  298. auto* twiddle2 = twiddle1;
  299. auto* twiddle3 = twiddle1;
  300. for (int i = length; --i >= 0;)
  301. {
  302. auto s0 = data[length] * *twiddle1;
  303. auto s1 = data[lengthX2] * *twiddle2;
  304. auto s2 = data[lengthX3] * *twiddle3;
  305. auto s3 = s0; s3 += s2;
  306. auto s4 = s0; s4 -= s2;
  307. auto s5 = *data; s5 -= s1;
  308. *data += s1;
  309. data[lengthX2] = *data;
  310. data[lengthX2] -= s3;
  311. twiddle1 += stride;
  312. twiddle2 += strideX2;
  313. twiddle3 += strideX3;
  314. *data += s3;
  315. if (inverse)
  316. {
  317. data[length] = { s5.real() - s4.imag(),
  318. s5.imag() + s4.real() };
  319. data[lengthX3] = { s5.real() + s4.imag(),
  320. s5.imag() - s4.real() };
  321. }
  322. else
  323. {
  324. data[length] = { s5.real() + s4.imag(),
  325. s5.imag() - s4.real() };
  326. data[lengthX3] = { s5.real() - s4.imag(),
  327. s5.imag() + s4.real() };
  328. }
  329. ++data;
  330. }
  331. }
  332. JUCE_DECLARE_NON_COPYABLE_WITH_LEAK_DETECTOR (FFTConfig)
  333. };
  334. //==============================================================================
  335. SpinLock processLock;
  336. std::unique_ptr<FFTConfig> configForward, configInverse;
  337. int size;
  338. };
  339. FFT::EngineImpl<FFTFallback> fftFallback;
  340. //==============================================================================
  341. //==============================================================================
  342. #if (JUCE_MAC || JUCE_IOS) && JUCE_USE_VDSP_FRAMEWORK
  343. struct AppleFFT : public FFT::Instance
  344. {
  345. static constexpr int priority = 5;
  346. static AppleFFT* create (int order)
  347. {
  348. return new AppleFFT (order);
  349. }
  350. AppleFFT (int orderToUse)
  351. : order (static_cast<vDSP_Length> (orderToUse)),
  352. fftSetup (vDSP_create_fftsetup (order, 2)),
  353. forwardNormalisation (0.5f),
  354. inverseNormalisation (1.0f / static_cast<float> (1 << order))
  355. {}
  356. ~AppleFFT() override
  357. {
  358. if (fftSetup != nullptr)
  359. {
  360. vDSP_destroy_fftsetup (fftSetup);
  361. fftSetup = nullptr;
  362. }
  363. }
  364. void perform (const Complex<float>* input, Complex<float>* output, bool inverse) const noexcept override
  365. {
  366. auto size = (1 << order);
  367. DSPSplitComplex splitInput (toSplitComplex (const_cast<Complex<float>*> (input)));
  368. DSPSplitComplex splitOutput (toSplitComplex (output));
  369. vDSP_fft_zop (fftSetup, &splitInput, 2, &splitOutput, 2,
  370. order, inverse ? kFFTDirection_Inverse : kFFTDirection_Forward);
  371. float factor = (inverse ? inverseNormalisation : forwardNormalisation * 2.0f);
  372. vDSP_vsmul ((float*) output, 1, &factor, (float*) output, 1, static_cast<size_t> (size << 1));
  373. }
  374. void performRealOnlyForwardTransform (float* inoutData, bool ignoreNegativeFreqs) const noexcept override
  375. {
  376. auto size = (1 << order);
  377. auto* inout = reinterpret_cast<Complex<float>*> (inoutData);
  378. auto splitInOut (toSplitComplex (inout));
  379. inoutData[size] = 0.0f;
  380. vDSP_fft_zrip (fftSetup, &splitInOut, 2, order, kFFTDirection_Forward);
  381. vDSP_vsmul (inoutData, 1, &forwardNormalisation, inoutData, 1, static_cast<size_t> (size << 1));
  382. mirrorResult (inout, ignoreNegativeFreqs);
  383. }
  384. void performRealOnlyInverseTransform (float* inoutData) const noexcept override
  385. {
  386. auto* inout = reinterpret_cast<Complex<float>*> (inoutData);
  387. auto size = (1 << order);
  388. auto splitInOut (toSplitComplex (inout));
  389. // Imaginary part of nyquist and DC frequencies are always zero
  390. // so Apple uses the imaginary part of the DC frequency to store
  391. // the real part of the nyquist frequency
  392. if (size != 1)
  393. inout[0] = Complex<float> (inout[0].real(), inout[size >> 1].real());
  394. vDSP_fft_zrip (fftSetup, &splitInOut, 2, order, kFFTDirection_Inverse);
  395. vDSP_vsmul (inoutData, 1, &inverseNormalisation, inoutData, 1, static_cast<size_t> (size << 1));
  396. vDSP_vclr (inoutData + size, 1, static_cast<size_t> (size));
  397. }
  398. private:
  399. //==============================================================================
  400. void mirrorResult (Complex<float>* out, bool ignoreNegativeFreqs) const noexcept
  401. {
  402. auto size = (1 << order);
  403. auto i = size >> 1;
  404. // Imaginary part of nyquist and DC frequencies are always zero
  405. // so Apple uses the imaginary part of the DC frequency to store
  406. // the real part of the nyquist frequency
  407. out[i++] = { out[0].imag(), 0.0 };
  408. out[0] = { out[0].real(), 0.0 };
  409. if (! ignoreNegativeFreqs)
  410. for (; i < size; ++i)
  411. out[i] = std::conj (out[size - i]);
  412. }
  413. static DSPSplitComplex toSplitComplex (Complex<float>* data) noexcept
  414. {
  415. // this assumes that Complex interleaves real and imaginary parts
  416. // and is tightly packed.
  417. return { reinterpret_cast<float*> (data),
  418. reinterpret_cast<float*> (data) + 1};
  419. }
  420. //==============================================================================
  421. vDSP_Length order;
  422. FFTSetup fftSetup;
  423. float forwardNormalisation, inverseNormalisation;
  424. };
  425. FFT::EngineImpl<AppleFFT> appleFFT;
  426. #endif
  427. //==============================================================================
  428. //==============================================================================
  429. #if JUCE_DSP_USE_SHARED_FFTW || JUCE_DSP_USE_STATIC_FFTW
  430. #if JUCE_DSP_USE_STATIC_FFTW
  431. extern "C"
  432. {
  433. void* fftwf_plan_dft_1d (int, void*, void*, int, int);
  434. void* fftwf_plan_dft_r2c_1d (int, void*, void*, int);
  435. void* fftwf_plan_dft_c2r_1d (int, void*, void*, int);
  436. void fftwf_destroy_plan (void*);
  437. void fftwf_execute_dft (void*, void*, void*);
  438. void fftwf_execute_dft_r2c (void*, void*, void*);
  439. void fftwf_execute_dft_c2r (void*, void*, void*);
  440. }
  441. #endif
  442. struct FFTWImpl : public FFT::Instance
  443. {
  444. #if JUCE_DSP_USE_STATIC_FFTW
  445. // if the JUCE developer has gone through the hassle of statically
  446. // linking in fftw, they probably want to use it
  447. static constexpr int priority = 10;
  448. #else
  449. static constexpr int priority = 3;
  450. #endif
  451. struct FFTWPlan;
  452. using FFTWPlanRef = FFTWPlan*;
  453. enum
  454. {
  455. measure = 0,
  456. unaligned = (1 << 1),
  457. estimate = (1 << 6)
  458. };
  459. struct Symbols
  460. {
  461. FFTWPlanRef (*plan_dft_fftw) (unsigned, Complex<float>*, Complex<float>*, int, unsigned);
  462. FFTWPlanRef (*plan_r2c_fftw) (unsigned, float*, Complex<float>*, unsigned);
  463. FFTWPlanRef (*plan_c2r_fftw) (unsigned, Complex<float>*, float*, unsigned);
  464. void (*destroy_fftw) (FFTWPlanRef);
  465. void (*execute_dft_fftw) (FFTWPlanRef, const Complex<float>*, Complex<float>*);
  466. void (*execute_r2c_fftw) (FFTWPlanRef, float*, Complex<float>*);
  467. void (*execute_c2r_fftw) (FFTWPlanRef, Complex<float>*, float*);
  468. #if JUCE_DSP_USE_STATIC_FFTW
  469. template <typename FuncPtr, typename ActualSymbolType>
  470. static bool symbol (FuncPtr& dst, ActualSymbolType sym)
  471. {
  472. dst = reinterpret_cast<FuncPtr> (sym);
  473. return true;
  474. }
  475. #else
  476. template <typename FuncPtr>
  477. static bool symbol (DynamicLibrary& lib, FuncPtr& dst, const char* name)
  478. {
  479. dst = reinterpret_cast<FuncPtr> (lib.getFunction (name));
  480. return (dst != nullptr);
  481. }
  482. #endif
  483. };
  484. static FFTWImpl* create (int order)
  485. {
  486. DynamicLibrary lib;
  487. #if ! JUCE_DSP_USE_STATIC_FFTW
  488. #if JUCE_MAC
  489. auto libName = "libfftw3f.dylib";
  490. #elif JUCE_WINDOWS
  491. auto libName = "libfftw3f.dll";
  492. #else
  493. auto libName = "libfftw3f.so";
  494. #endif
  495. if (lib.open (libName))
  496. #endif
  497. {
  498. Symbols symbols;
  499. #if JUCE_DSP_USE_STATIC_FFTW
  500. if (! Symbols::symbol (symbols.plan_dft_fftw, fftwf_plan_dft_1d)) return nullptr;
  501. if (! Symbols::symbol (symbols.plan_r2c_fftw, fftwf_plan_dft_r2c_1d)) return nullptr;
  502. if (! Symbols::symbol (symbols.plan_c2r_fftw, fftwf_plan_dft_c2r_1d)) return nullptr;
  503. if (! Symbols::symbol (symbols.destroy_fftw, fftwf_destroy_plan)) return nullptr;
  504. if (! Symbols::symbol (symbols.execute_dft_fftw, fftwf_execute_dft)) return nullptr;
  505. if (! Symbols::symbol (symbols.execute_r2c_fftw, fftwf_execute_dft_r2c)) return nullptr;
  506. if (! Symbols::symbol (symbols.execute_c2r_fftw, fftwf_execute_dft_c2r)) return nullptr;
  507. #else
  508. if (! Symbols::symbol (lib, symbols.plan_dft_fftw, "fftwf_plan_dft_1d")) return nullptr;
  509. if (! Symbols::symbol (lib, symbols.plan_r2c_fftw, "fftwf_plan_dft_r2c_1d")) return nullptr;
  510. if (! Symbols::symbol (lib, symbols.plan_c2r_fftw, "fftwf_plan_dft_c2r_1d")) return nullptr;
  511. if (! Symbols::symbol (lib, symbols.destroy_fftw, "fftwf_destroy_plan")) return nullptr;
  512. if (! Symbols::symbol (lib, symbols.execute_dft_fftw, "fftwf_execute_dft")) return nullptr;
  513. if (! Symbols::symbol (lib, symbols.execute_r2c_fftw, "fftwf_execute_dft_r2c")) return nullptr;
  514. if (! Symbols::symbol (lib, symbols.execute_c2r_fftw, "fftwf_execute_dft_c2r")) return nullptr;
  515. #endif
  516. return new FFTWImpl (static_cast<size_t> (order), std::move (lib), symbols);
  517. }
  518. return nullptr;
  519. }
  520. FFTWImpl (size_t orderToUse, DynamicLibrary&& libraryToUse, const Symbols& symbols)
  521. : fftwLibrary (std::move (libraryToUse)), fftw (symbols), order (static_cast<size_t> (orderToUse))
  522. {
  523. ScopedLock lock (getFFTWPlanLock());
  524. auto n = (1u << order);
  525. HeapBlock<Complex<float>> in (n), out (n);
  526. c2cForward = fftw.plan_dft_fftw (n, in.getData(), out.getData(), -1, unaligned | estimate);
  527. c2cInverse = fftw.plan_dft_fftw (n, in.getData(), out.getData(), +1, unaligned | estimate);
  528. r2c = fftw.plan_r2c_fftw (n, (float*) in.getData(), in.getData(), unaligned | estimate);
  529. c2r = fftw.plan_c2r_fftw (n, in.getData(), (float*) in.getData(), unaligned | estimate);
  530. }
  531. ~FFTWImpl() override
  532. {
  533. ScopedLock lock (getFFTWPlanLock());
  534. fftw.destroy_fftw (c2cForward);
  535. fftw.destroy_fftw (c2cInverse);
  536. fftw.destroy_fftw (r2c);
  537. fftw.destroy_fftw (c2r);
  538. }
  539. void perform (const Complex<float>* input, Complex<float>* output, bool inverse) const noexcept override
  540. {
  541. if (inverse)
  542. {
  543. auto n = (1u << order);
  544. fftw.execute_dft_fftw (c2cInverse, input, output);
  545. FloatVectorOperations::multiply ((float*) output, 1.0f / static_cast<float> (n), (int) n << 1);
  546. }
  547. else
  548. {
  549. fftw.execute_dft_fftw (c2cForward, input, output);
  550. }
  551. }
  552. void performRealOnlyForwardTransform (float* inputOutputData, bool ignoreNegativeFreqs) const noexcept override
  553. {
  554. if (order == 0)
  555. return;
  556. auto* out = reinterpret_cast<Complex<float>*> (inputOutputData);
  557. fftw.execute_r2c_fftw (r2c, inputOutputData, out);
  558. auto size = (1 << order);
  559. if (! ignoreNegativeFreqs)
  560. for (int i = size >> 1; i < size; ++i)
  561. out[i] = std::conj (out[size - i]);
  562. }
  563. void performRealOnlyInverseTransform (float* inputOutputData) const noexcept override
  564. {
  565. auto n = (1u << order);
  566. fftw.execute_c2r_fftw (c2r, (Complex<float>*) inputOutputData, inputOutputData);
  567. FloatVectorOperations::multiply ((float*) inputOutputData, 1.0f / static_cast<float> (n), (int) n);
  568. }
  569. //==============================================================================
  570. // fftw's plan_* and destroy_* methods are NOT thread safe. So we need to share
  571. // a lock between all instances of FFTWImpl
  572. static CriticalSection& getFFTWPlanLock() noexcept
  573. {
  574. static CriticalSection cs;
  575. return cs;
  576. }
  577. //==============================================================================
  578. DynamicLibrary fftwLibrary;
  579. Symbols fftw;
  580. size_t order;
  581. FFTWPlanRef c2cForward, c2cInverse, r2c, c2r;
  582. };
  583. FFT::EngineImpl<FFTWImpl> fftwEngine;
  584. #endif
  585. //==============================================================================
  586. //==============================================================================
  587. #if JUCE_DSP_USE_INTEL_MKL
  588. struct IntelFFT : public FFT::Instance
  589. {
  590. static constexpr int priority = 8;
  591. static bool succeeded (MKL_LONG status) noexcept { return status == 0; }
  592. static IntelFFT* create (int orderToUse)
  593. {
  594. DFTI_DESCRIPTOR_HANDLE mklc2c, mklc2r;
  595. if (DftiCreateDescriptor (&mklc2c, DFTI_SINGLE, DFTI_COMPLEX, 1, 1 << orderToUse) == 0)
  596. {
  597. if (succeeded (DftiSetValue (mklc2c, DFTI_PLACEMENT, DFTI_NOT_INPLACE))
  598. && succeeded (DftiSetValue (mklc2c, DFTI_BACKWARD_SCALE, 1.0f / static_cast<float> (1 << orderToUse)))
  599. && succeeded (DftiCommitDescriptor (mklc2c)))
  600. {
  601. if (succeeded (DftiCreateDescriptor (&mklc2r, DFTI_SINGLE, DFTI_REAL, 1, 1 << orderToUse)))
  602. {
  603. if (succeeded (DftiSetValue (mklc2r, DFTI_PLACEMENT, DFTI_INPLACE))
  604. && succeeded (DftiSetValue (mklc2r, DFTI_BACKWARD_SCALE, 1.0f / static_cast<float> (1 << orderToUse)))
  605. && succeeded (DftiCommitDescriptor (mklc2r)))
  606. {
  607. return new IntelFFT (static_cast<size_t> (orderToUse), mklc2c, mklc2r);
  608. }
  609. DftiFreeDescriptor (&mklc2r);
  610. }
  611. }
  612. DftiFreeDescriptor (&mklc2c);
  613. }
  614. return {};
  615. }
  616. IntelFFT (size_t orderToUse, DFTI_DESCRIPTOR_HANDLE c2cToUse, DFTI_DESCRIPTOR_HANDLE cr2ToUse)
  617. : order (orderToUse), c2c (c2cToUse), c2r (cr2ToUse)
  618. {}
  619. ~IntelFFT()
  620. {
  621. DftiFreeDescriptor (&c2c);
  622. DftiFreeDescriptor (&c2r);
  623. }
  624. void perform (const Complex<float>* input, Complex<float>* output, bool inverse) const noexcept override
  625. {
  626. if (inverse)
  627. DftiComputeBackward (c2c, (void*) input, output);
  628. else
  629. DftiComputeForward (c2c, (void*) input, output);
  630. }
  631. void performRealOnlyForwardTransform (float* inputOutputData, bool ignoreNegativeFreqs) const noexcept override
  632. {
  633. if (order == 0)
  634. return;
  635. DftiComputeForward (c2r, inputOutputData);
  636. auto* out = reinterpret_cast<Complex<float>*> (inputOutputData);
  637. auto size = (1 << order);
  638. if (! ignoreNegativeFreqs)
  639. for (int i = size >> 1; i < size; ++i)
  640. out[i] = std::conj (out[size - i]);
  641. }
  642. void performRealOnlyInverseTransform (float* inputOutputData) const noexcept override
  643. {
  644. DftiComputeBackward (c2r, inputOutputData);
  645. }
  646. size_t order;
  647. DFTI_DESCRIPTOR_HANDLE c2c, c2r;
  648. };
  649. FFT::EngineImpl<IntelFFT> fftwEngine;
  650. #endif
  651. //==============================================================================
  652. //==============================================================================
  653. // Visual Studio should define no more than one of these, depending on the
  654. // setting at 'Project' > 'Properties' > 'Configuration Properties' > 'Intel
  655. // Performance Libraries' > 'Use Intel(R) IPP'
  656. #if _IPP_SEQUENTIAL_STATIC || _IPP_SEQUENTIAL_DYNAMIC || _IPP_PARALLEL_STATIC || _IPP_PARALLEL_DYNAMIC
  657. class IntelPerformancePrimitivesFFT : public FFT::Instance
  658. {
  659. public:
  660. static constexpr auto priority = 9;
  661. static IntelPerformancePrimitivesFFT* create (const int order)
  662. {
  663. auto complexContext = Context<ComplexTraits>::create (order);
  664. auto realContext = Context<RealTraits> ::create (order);
  665. if (complexContext.isValid() && realContext.isValid())
  666. return new IntelPerformancePrimitivesFFT (std::move (complexContext), std::move (realContext), order);
  667. return {};
  668. }
  669. void perform (const Complex<float>* input, Complex<float>* output, bool inverse) const noexcept override
  670. {
  671. if (inverse)
  672. {
  673. ippsFFTInv_CToC_32fc (reinterpret_cast<const Ipp32fc*> (input),
  674. reinterpret_cast<Ipp32fc*> (output),
  675. cplx.specPtr,
  676. cplx.workBuf.get());
  677. }
  678. else
  679. {
  680. ippsFFTFwd_CToC_32fc (reinterpret_cast<const Ipp32fc*> (input),
  681. reinterpret_cast<Ipp32fc*> (output),
  682. cplx.specPtr,
  683. cplx.workBuf.get());
  684. }
  685. }
  686. void performRealOnlyForwardTransform (float* inoutData, bool ignoreNegativeFreqs) const noexcept override
  687. {
  688. ippsFFTFwd_RToCCS_32f_I (inoutData, real.specPtr, real.workBuf.get());
  689. if (order == 0)
  690. return;
  691. auto* out = reinterpret_cast<Complex<float>*> (inoutData);
  692. const auto size = (1 << order);
  693. if (! ignoreNegativeFreqs)
  694. for (auto i = size >> 1; i < size; ++i)
  695. out[i] = std::conj (out[size - i]);
  696. }
  697. void performRealOnlyInverseTransform (float* inoutData) const noexcept override
  698. {
  699. ippsFFTInv_CCSToR_32f_I (inoutData, real.specPtr, real.workBuf.get());
  700. }
  701. private:
  702. static constexpr auto flag = IPP_FFT_DIV_INV_BY_N;
  703. static constexpr auto hint = ippAlgHintFast;
  704. struct IppFree
  705. {
  706. template <typename Ptr>
  707. void operator() (Ptr* ptr) const noexcept { ippsFree (ptr); }
  708. };
  709. using IppPtr = std::unique_ptr<Ipp8u[], IppFree>;
  710. template <typename Traits>
  711. struct Context
  712. {
  713. using SpecPtr = typename Traits::Spec*;
  714. static Context create (const int order)
  715. {
  716. int specSize = 0, initSize = 0, workSize = 0;
  717. if (Traits::getSize (order, flag, hint, &specSize, &initSize, &workSize) != ippStsNoErr)
  718. return {};
  719. const auto initBuf = IppPtr (ippsMalloc_8u (initSize));
  720. auto specBuf = IppPtr (ippsMalloc_8u (specSize));
  721. SpecPtr specPtr = nullptr;
  722. if (Traits::init (&specPtr, order, flag, hint, specBuf.get(), initBuf.get()) != ippStsNoErr)
  723. return {};
  724. if (reinterpret_cast<const Ipp8u*> (specPtr) != specBuf.get())
  725. return {};
  726. return { std::move (specBuf), IppPtr (ippsMalloc_8u (workSize)), specPtr };
  727. }
  728. Context() noexcept = default;
  729. Context (IppPtr&& spec, IppPtr&& work, typename Traits::Spec* ptr) noexcept
  730. : specBuf (std::move (spec)), workBuf (std::move (work)), specPtr (ptr)
  731. {}
  732. bool isValid() const noexcept { return specPtr != nullptr; }
  733. IppPtr specBuf, workBuf;
  734. SpecPtr specPtr = nullptr;
  735. };
  736. struct ComplexTraits
  737. {
  738. static constexpr auto getSize = ippsFFTGetSize_C_32fc;
  739. static constexpr auto init = ippsFFTInit_C_32fc;
  740. using Spec = IppsFFTSpec_C_32fc;
  741. };
  742. struct RealTraits
  743. {
  744. static constexpr auto getSize = ippsFFTGetSize_R_32f;
  745. static constexpr auto init = ippsFFTInit_R_32f;
  746. using Spec = IppsFFTSpec_R_32f;
  747. };
  748. IntelPerformancePrimitivesFFT (Context<ComplexTraits>&& complexToUse,
  749. Context<RealTraits>&& realToUse,
  750. const int orderToUse)
  751. : cplx (std::move (complexToUse)),
  752. real (std::move (realToUse)),
  753. order (orderToUse)
  754. {}
  755. Context<ComplexTraits> cplx;
  756. Context<RealTraits> real;
  757. int order = 0;
  758. };
  759. FFT::EngineImpl<IntelPerformancePrimitivesFFT> intelPerformancePrimitivesFFT;
  760. #endif
  761. //==============================================================================
  762. //==============================================================================
  763. FFT::FFT (int order)
  764. : engine (FFT::Engine::createBestEngineForPlatform (order)),
  765. size (1 << order)
  766. {
  767. }
  768. FFT::~FFT() {}
  769. void FFT::perform (const Complex<float>* input, Complex<float>* output, bool inverse) const noexcept
  770. {
  771. if (engine != nullptr)
  772. engine->perform (input, output, inverse);
  773. }
  774. void FFT::performRealOnlyForwardTransform (float* inputOutputData, bool ignoreNeagtiveFreqs) const noexcept
  775. {
  776. if (engine != nullptr)
  777. engine->performRealOnlyForwardTransform (inputOutputData, ignoreNeagtiveFreqs);
  778. }
  779. void FFT::performRealOnlyInverseTransform (float* inputOutputData) const noexcept
  780. {
  781. if (engine != nullptr)
  782. engine->performRealOnlyInverseTransform (inputOutputData);
  783. }
  784. void FFT::performFrequencyOnlyForwardTransform (float* inputOutputData) const noexcept
  785. {
  786. if (size == 1)
  787. return;
  788. performRealOnlyForwardTransform (inputOutputData);
  789. auto* out = reinterpret_cast<Complex<float>*> (inputOutputData);
  790. for (int i = 0; i < size; ++i)
  791. inputOutputData[i] = std::abs (out[i]);
  792. zeromem (&inputOutputData[size], static_cast<size_t> (size) * sizeof (float));
  793. }
  794. } // namespace dsp
  795. } // namespace juce