The JUCE cross-platform C++ framework, with DISTRHO/KXStudio specific changes
You can not select more than 25 topics Topics must start with a letter or number, can include dashes ('-') and can be up to 35 characters long.

982 lines
34KB

  1. /*
  2. ==============================================================================
  3. This file is part of the JUCE 6 technical preview.
  4. Copyright (c) 2017 - ROLI Ltd.
  5. You may use this code under the terms of the GPL v3
  6. (see www.gnu.org/licenses).
  7. For this technical preview, this file is not subject to commercial licensing.
  8. JUCE IS PROVIDED "AS IS" WITHOUT ANY WARRANTY, AND ALL WARRANTIES, WHETHER
  9. EXPRESSED OR IMPLIED, INCLUDING MERCHANTABILITY AND FITNESS FOR PURPOSE, ARE
  10. DISCLAIMED.
  11. ==============================================================================
  12. */
  13. namespace juce
  14. {
  15. namespace dsp
  16. {
  17. struct FFT::Instance
  18. {
  19. virtual ~Instance() = default;
  20. virtual void perform (const Complex<float>* input, Complex<float>* output, bool inverse) const noexcept = 0;
  21. virtual void performRealOnlyForwardTransform (float*, bool) const noexcept = 0;
  22. virtual void performRealOnlyInverseTransform (float*) const noexcept = 0;
  23. };
  24. struct FFT::Engine
  25. {
  26. Engine (int priorityToUse) : enginePriority (priorityToUse)
  27. {
  28. auto& list = getEngines();
  29. list.add (this);
  30. std::sort (list.begin(), list.end(), [] (Engine* a, Engine* b) { return b->enginePriority < a->enginePriority; });
  31. }
  32. virtual ~Engine() = default;
  33. virtual FFT::Instance* create (int order) const = 0;
  34. //==============================================================================
  35. static FFT::Instance* createBestEngineForPlatform (int order)
  36. {
  37. for (auto* engine : getEngines())
  38. if (auto* instance = engine->create (order))
  39. return instance;
  40. jassertfalse; // This should never happen as the fallback engine should always work!
  41. return nullptr;
  42. }
  43. private:
  44. static Array<Engine*>& getEngines()
  45. {
  46. static Array<Engine*> engines;
  47. return engines;
  48. }
  49. int enginePriority; // used so that faster engines have priority over slower ones
  50. };
  51. template <typename InstanceToUse>
  52. struct FFT::EngineImpl : public FFT::Engine
  53. {
  54. EngineImpl() : FFT::Engine (InstanceToUse::priority) {}
  55. FFT::Instance* create (int order) const override { return InstanceToUse::create (order); }
  56. };
  57. //==============================================================================
  58. //==============================================================================
  59. struct FFTFallback : public FFT::Instance
  60. {
  61. // this should have the least priority of all engines
  62. static constexpr int priority = -1;
  63. static FFTFallback* create (int order)
  64. {
  65. return new FFTFallback (order);
  66. }
  67. FFTFallback (int order)
  68. {
  69. configForward.reset (new FFTConfig (1 << order, false));
  70. configInverse.reset (new FFTConfig (1 << order, true));
  71. size = 1 << order;
  72. }
  73. void perform (const Complex<float>* input, Complex<float>* output, bool inverse) const noexcept override
  74. {
  75. if (size == 1)
  76. {
  77. *output = *input;
  78. return;
  79. }
  80. const SpinLock::ScopedLockType sl(processLock);
  81. jassert (configForward != nullptr);
  82. if (inverse)
  83. {
  84. configInverse->perform (input, output);
  85. const float scaleFactor = 1.0f / size;
  86. for (int i = 0; i < size; ++i)
  87. output[i] *= scaleFactor;
  88. }
  89. else
  90. {
  91. configForward->perform (input, output);
  92. }
  93. }
  94. const size_t maxFFTScratchSpaceToAlloca = 256 * 1024;
  95. void performRealOnlyForwardTransform (float* d, bool) const noexcept override
  96. {
  97. if (size == 1)
  98. return;
  99. const size_t scratchSize = 16 + (size_t) size * sizeof (Complex<float>);
  100. if (scratchSize < maxFFTScratchSpaceToAlloca)
  101. {
  102. performRealOnlyForwardTransform (static_cast<Complex<float>*> (alloca (scratchSize)), d);
  103. }
  104. else
  105. {
  106. HeapBlock<char> heapSpace (scratchSize);
  107. performRealOnlyForwardTransform (reinterpret_cast<Complex<float>*> (heapSpace.getData()), d);
  108. }
  109. }
  110. void performRealOnlyInverseTransform (float* d) const noexcept override
  111. {
  112. if (size == 1)
  113. return;
  114. const size_t scratchSize = 16 + (size_t) size * sizeof (Complex<float>);
  115. if (scratchSize < maxFFTScratchSpaceToAlloca)
  116. {
  117. performRealOnlyInverseTransform (static_cast<Complex<float>*> (alloca (scratchSize)), d);
  118. }
  119. else
  120. {
  121. HeapBlock<char> heapSpace (scratchSize);
  122. performRealOnlyInverseTransform (reinterpret_cast<Complex<float>*> (heapSpace.getData()), d);
  123. }
  124. }
  125. void performRealOnlyForwardTransform (Complex<float>* scratch, float* d) const noexcept
  126. {
  127. for (int i = 0; i < size; ++i)
  128. scratch[i] = { d[i], 0 };
  129. perform (scratch, reinterpret_cast<Complex<float>*> (d), false);
  130. }
  131. void performRealOnlyInverseTransform (Complex<float>* scratch, float* d) const noexcept
  132. {
  133. auto* input = reinterpret_cast<Complex<float>*> (d);
  134. for (auto i = size >> 1; i < size; ++i)
  135. input[i] = std::conj (input[size - i]);
  136. perform (input, scratch, true);
  137. for (int i = 0; i < size; ++i)
  138. {
  139. d[i] = scratch[i].real();
  140. d[i + size] = scratch[i].imag();
  141. }
  142. }
  143. //==============================================================================
  144. struct FFTConfig
  145. {
  146. FFTConfig (int sizeOfFFT, bool isInverse)
  147. : fftSize (sizeOfFFT), inverse (isInverse), twiddleTable ((size_t) sizeOfFFT)
  148. {
  149. auto inverseFactor = (inverse ? 2.0 : -2.0) * MathConstants<double>::pi / (double) fftSize;
  150. if (fftSize <= 4)
  151. {
  152. for (int i = 0; i < fftSize; ++i)
  153. {
  154. auto phase = i * inverseFactor;
  155. twiddleTable[i] = { (float) std::cos (phase),
  156. (float) std::sin (phase) };
  157. }
  158. }
  159. else
  160. {
  161. for (int i = 0; i < fftSize / 4; ++i)
  162. {
  163. auto phase = i * inverseFactor;
  164. twiddleTable[i] = { (float) std::cos (phase),
  165. (float) std::sin (phase) };
  166. }
  167. for (int i = fftSize / 4; i < fftSize / 2; ++i)
  168. {
  169. auto other = twiddleTable[i - fftSize / 4];
  170. twiddleTable[i] = { inverse ? -other.imag() : other.imag(),
  171. inverse ? other.real() : -other.real() };
  172. }
  173. twiddleTable[fftSize / 2].real (-1.0f);
  174. twiddleTable[fftSize / 2].imag (0.0f);
  175. for (int i = fftSize / 2; i < fftSize; ++i)
  176. {
  177. auto index = fftSize / 2 - (i - fftSize / 2);
  178. twiddleTable[i] = conj(twiddleTable[index]);
  179. }
  180. }
  181. auto root = (int) std::sqrt ((double) fftSize);
  182. int divisor = 4, n = fftSize;
  183. for (int i = 0; i < numElementsInArray (factors); ++i)
  184. {
  185. while ((n % divisor) != 0)
  186. {
  187. if (divisor == 2) divisor = 3;
  188. else if (divisor == 4) divisor = 2;
  189. else divisor += 2;
  190. if (divisor > root)
  191. divisor = n;
  192. }
  193. n /= divisor;
  194. jassert (divisor == 1 || divisor == 2 || divisor == 4);
  195. factors[i].radix = divisor;
  196. factors[i].length = n;
  197. }
  198. }
  199. void perform (const Complex<float>* input, Complex<float>* output) const noexcept
  200. {
  201. perform (input, output, 1, 1, factors);
  202. }
  203. const int fftSize;
  204. const bool inverse;
  205. struct Factor { int radix, length; };
  206. Factor factors[32];
  207. HeapBlock<Complex<float>> twiddleTable;
  208. void perform (const Complex<float>* input, Complex<float>* output, int stride, int strideIn, const Factor* facs) const noexcept
  209. {
  210. auto factor = *facs++;
  211. auto* originalOutput = output;
  212. auto* outputEnd = output + factor.radix * factor.length;
  213. if (stride == 1 && factor.radix <= 5)
  214. {
  215. for (int i = 0; i < factor.radix; ++i)
  216. perform (input + stride * strideIn * i, output + i * factor.length, stride * factor.radix, strideIn, facs);
  217. butterfly (factor, output, stride);
  218. return;
  219. }
  220. if (factor.length == 1)
  221. {
  222. do
  223. {
  224. *output++ = *input;
  225. input += stride * strideIn;
  226. }
  227. while (output < outputEnd);
  228. }
  229. else
  230. {
  231. do
  232. {
  233. perform (input, output, stride * factor.radix, strideIn, facs);
  234. input += stride * strideIn;
  235. output += factor.length;
  236. }
  237. while (output < outputEnd);
  238. }
  239. butterfly (factor, originalOutput, stride);
  240. }
  241. void butterfly (const Factor factor, Complex<float>* data, int stride) const noexcept
  242. {
  243. switch (factor.radix)
  244. {
  245. case 1: break;
  246. case 2: butterfly2 (data, stride, factor.length); return;
  247. case 4: butterfly4 (data, stride, factor.length); return;
  248. default: jassertfalse; break;
  249. }
  250. auto* scratch = static_cast<Complex<float>*> (alloca ((size_t) factor.radix * sizeof (Complex<float>)));
  251. for (int i = 0; i < factor.length; ++i)
  252. {
  253. for (int k = i, q1 = 0; q1 < factor.radix; ++q1)
  254. {
  255. scratch[q1] = data[k];
  256. k += factor.length;
  257. }
  258. for (int k = i, q1 = 0; q1 < factor.radix; ++q1)
  259. {
  260. int twiddleIndex = 0;
  261. data[k] = scratch[0];
  262. for (int q = 1; q < factor.radix; ++q)
  263. {
  264. twiddleIndex += stride * k;
  265. if (twiddleIndex >= fftSize)
  266. twiddleIndex -= fftSize;
  267. data[k] += scratch[q] * twiddleTable[twiddleIndex];
  268. }
  269. k += factor.length;
  270. }
  271. }
  272. }
  273. void butterfly2 (Complex<float>* data, const int stride, const int length) const noexcept
  274. {
  275. auto* dataEnd = data + length;
  276. auto* tw = twiddleTable.getData();
  277. for (int i = length; --i >= 0;)
  278. {
  279. auto s = *dataEnd;
  280. s *= (*tw);
  281. tw += stride;
  282. *dataEnd++ = *data - s;
  283. *data++ += s;
  284. }
  285. }
  286. void butterfly4 (Complex<float>* data, const int stride, const int length) const noexcept
  287. {
  288. auto lengthX2 = length * 2;
  289. auto lengthX3 = length * 3;
  290. auto strideX2 = stride * 2;
  291. auto strideX3 = stride * 3;
  292. auto* twiddle1 = twiddleTable.getData();
  293. auto* twiddle2 = twiddle1;
  294. auto* twiddle3 = twiddle1;
  295. for (int i = length; --i >= 0;)
  296. {
  297. auto s0 = data[length] * *twiddle1;
  298. auto s1 = data[lengthX2] * *twiddle2;
  299. auto s2 = data[lengthX3] * *twiddle3;
  300. auto s3 = s0; s3 += s2;
  301. auto s4 = s0; s4 -= s2;
  302. auto s5 = *data; s5 -= s1;
  303. *data += s1;
  304. data[lengthX2] = *data;
  305. data[lengthX2] -= s3;
  306. twiddle1 += stride;
  307. twiddle2 += strideX2;
  308. twiddle3 += strideX3;
  309. *data += s3;
  310. if (inverse)
  311. {
  312. data[length] = { s5.real() - s4.imag(),
  313. s5.imag() + s4.real() };
  314. data[lengthX3] = { s5.real() + s4.imag(),
  315. s5.imag() - s4.real() };
  316. }
  317. else
  318. {
  319. data[length] = { s5.real() + s4.imag(),
  320. s5.imag() - s4.real() };
  321. data[lengthX3] = { s5.real() - s4.imag(),
  322. s5.imag() + s4.real() };
  323. }
  324. ++data;
  325. }
  326. }
  327. JUCE_DECLARE_NON_COPYABLE_WITH_LEAK_DETECTOR (FFTConfig)
  328. };
  329. //==============================================================================
  330. SpinLock processLock;
  331. std::unique_ptr<FFTConfig> configForward, configInverse;
  332. int size;
  333. };
  334. FFT::EngineImpl<FFTFallback> fftFallback;
  335. //==============================================================================
  336. //==============================================================================
  337. #if (JUCE_MAC || JUCE_IOS) && JUCE_USE_VDSP_FRAMEWORK
  338. struct AppleFFT : public FFT::Instance
  339. {
  340. static constexpr int priority = 5;
  341. static AppleFFT* create (int order)
  342. {
  343. return new AppleFFT (order);
  344. }
  345. AppleFFT (int orderToUse)
  346. : order (static_cast<vDSP_Length> (orderToUse)),
  347. fftSetup (vDSP_create_fftsetup (order, 2)),
  348. forwardNormalisation (0.5f),
  349. inverseNormalisation (1.0f / static_cast<float> (1 << order))
  350. {}
  351. ~AppleFFT() override
  352. {
  353. if (fftSetup != nullptr)
  354. {
  355. vDSP_destroy_fftsetup (fftSetup);
  356. fftSetup = nullptr;
  357. }
  358. }
  359. void perform (const Complex<float>* input, Complex<float>* output, bool inverse) const noexcept override
  360. {
  361. auto size = (1 << order);
  362. DSPSplitComplex splitInput (toSplitComplex (const_cast<Complex<float>*> (input)));
  363. DSPSplitComplex splitOutput (toSplitComplex (output));
  364. vDSP_fft_zop (fftSetup, &splitInput, 2, &splitOutput, 2,
  365. order, inverse ? kFFTDirection_Inverse : kFFTDirection_Forward);
  366. float factor = (inverse ? inverseNormalisation : forwardNormalisation * 2.0f);
  367. vDSP_vsmul ((float*) output, 1, &factor, (float*) output, 1, static_cast<size_t> (size << 1));
  368. }
  369. void performRealOnlyForwardTransform (float* inoutData, bool ignoreNegativeFreqs) const noexcept override
  370. {
  371. auto size = (1 << order);
  372. auto* inout = reinterpret_cast<Complex<float>*> (inoutData);
  373. auto splitInOut (toSplitComplex (inout));
  374. inoutData[size] = 0.0f;
  375. vDSP_fft_zrip (fftSetup, &splitInOut, 2, order, kFFTDirection_Forward);
  376. vDSP_vsmul (inoutData, 1, &forwardNormalisation, inoutData, 1, static_cast<size_t> (size << 1));
  377. mirrorResult (inout, ignoreNegativeFreqs);
  378. }
  379. void performRealOnlyInverseTransform (float* inoutData) const noexcept override
  380. {
  381. auto* inout = reinterpret_cast<Complex<float>*> (inoutData);
  382. auto size = (1 << order);
  383. auto splitInOut (toSplitComplex (inout));
  384. // Imaginary part of nyquist and DC frequencies are always zero
  385. // so Apple uses the imaginary part of the DC frequency to store
  386. // the real part of the nyquist frequency
  387. if (size != 1)
  388. inout[0] = Complex<float> (inout[0].real(), inout[size >> 1].real());
  389. vDSP_fft_zrip (fftSetup, &splitInOut, 2, order, kFFTDirection_Inverse);
  390. vDSP_vsmul (inoutData, 1, &inverseNormalisation, inoutData, 1, static_cast<size_t> (size << 1));
  391. vDSP_vclr (inoutData + size, 1, static_cast<size_t> (size));
  392. }
  393. private:
  394. //==============================================================================
  395. void mirrorResult (Complex<float>* out, bool ignoreNegativeFreqs) const noexcept
  396. {
  397. auto size = (1 << order);
  398. auto i = size >> 1;
  399. // Imaginary part of nyquist and DC frequencies are always zero
  400. // so Apple uses the imaginary part of the DC frequency to store
  401. // the real part of the nyquist frequency
  402. out[i++] = { out[0].imag(), 0.0 };
  403. out[0] = { out[0].real(), 0.0 };
  404. if (! ignoreNegativeFreqs)
  405. for (; i < size; ++i)
  406. out[i] = std::conj (out[size - i]);
  407. }
  408. static DSPSplitComplex toSplitComplex (Complex<float>* data) noexcept
  409. {
  410. // this assumes that Complex interleaves real and imaginary parts
  411. // and is tightly packed.
  412. return { reinterpret_cast<float*> (data),
  413. reinterpret_cast<float*> (data) + 1};
  414. }
  415. //==============================================================================
  416. vDSP_Length order;
  417. FFTSetup fftSetup;
  418. float forwardNormalisation, inverseNormalisation;
  419. };
  420. FFT::EngineImpl<AppleFFT> appleFFT;
  421. #endif
  422. //==============================================================================
  423. //==============================================================================
  424. #if JUCE_DSP_USE_SHARED_FFTW || JUCE_DSP_USE_STATIC_FFTW
  425. #if JUCE_DSP_USE_STATIC_FFTW
  426. extern "C"
  427. {
  428. void* fftwf_plan_dft_1d (int, void*, void*, int, int);
  429. void* fftwf_plan_dft_r2c_1d (int, void*, void*, int);
  430. void* fftwf_plan_dft_c2r_1d (int, void*, void*, int);
  431. void fftwf_destroy_plan (void*);
  432. void fftwf_execute_dft (void*, void*, void*);
  433. void fftwf_execute_dft_r2c (void*, void*, void*);
  434. void fftwf_execute_dft_c2r (void*, void*, void*);
  435. }
  436. #endif
  437. struct FFTWImpl : public FFT::Instance
  438. {
  439. #if JUCE_DSP_USE_STATIC_FFTW
  440. // if the JUCE developer has gone through the hassle of statically
  441. // linking in fftw, they probably want to use it
  442. static constexpr int priority = 10;
  443. #else
  444. static constexpr int priority = 3;
  445. #endif
  446. struct FFTWPlan;
  447. using FFTWPlanRef = FFTWPlan*;
  448. enum
  449. {
  450. measure = 0,
  451. unaligned = (1 << 1),
  452. estimate = (1 << 6)
  453. };
  454. struct Symbols
  455. {
  456. FFTWPlanRef (*plan_dft_fftw) (unsigned, Complex<float>*, Complex<float>*, int, unsigned);
  457. FFTWPlanRef (*plan_r2c_fftw) (unsigned, float*, Complex<float>*, unsigned);
  458. FFTWPlanRef (*plan_c2r_fftw) (unsigned, Complex<float>*, float*, unsigned);
  459. void (*destroy_fftw) (FFTWPlanRef);
  460. void (*execute_dft_fftw) (FFTWPlanRef, const Complex<float>*, Complex<float>*);
  461. void (*execute_r2c_fftw) (FFTWPlanRef, float*, Complex<float>*);
  462. void (*execute_c2r_fftw) (FFTWPlanRef, Complex<float>*, float*);
  463. #if JUCE_DSP_USE_STATIC_FFTW
  464. template <typename FuncPtr, typename ActualSymbolType>
  465. static bool symbol (FuncPtr& dst, ActualSymbolType sym)
  466. {
  467. dst = reinterpret_cast<FuncPtr> (sym);
  468. return true;
  469. }
  470. #else
  471. template <typename FuncPtr>
  472. static bool symbol (DynamicLibrary& lib, FuncPtr& dst, const char* name)
  473. {
  474. dst = reinterpret_cast<FuncPtr> (lib.getFunction (name));
  475. return (dst != nullptr);
  476. }
  477. #endif
  478. };
  479. static FFTWImpl* create (int order)
  480. {
  481. DynamicLibrary lib;
  482. #if ! JUCE_DSP_USE_STATIC_FFTW
  483. #if JUCE_MAC
  484. auto libName = "libfftw3f.dylib";
  485. #elif JUCE_WINDOWS
  486. auto libName = "libfftw3f.dll";
  487. #else
  488. auto libName = "libfftw3f.so";
  489. #endif
  490. if (lib.open (libName))
  491. #endif
  492. {
  493. Symbols symbols;
  494. #if JUCE_DSP_USE_STATIC_FFTW
  495. if (! Symbols::symbol (symbols.plan_dft_fftw, fftwf_plan_dft_1d)) return nullptr;
  496. if (! Symbols::symbol (symbols.plan_r2c_fftw, fftwf_plan_dft_r2c_1d)) return nullptr;
  497. if (! Symbols::symbol (symbols.plan_c2r_fftw, fftwf_plan_dft_c2r_1d)) return nullptr;
  498. if (! Symbols::symbol (symbols.destroy_fftw, fftwf_destroy_plan)) return nullptr;
  499. if (! Symbols::symbol (symbols.execute_dft_fftw, fftwf_execute_dft)) return nullptr;
  500. if (! Symbols::symbol (symbols.execute_r2c_fftw, fftwf_execute_dft_r2c)) return nullptr;
  501. if (! Symbols::symbol (symbols.execute_c2r_fftw, fftwf_execute_dft_c2r)) return nullptr;
  502. #else
  503. if (! Symbols::symbol (lib, symbols.plan_dft_fftw, "fftwf_plan_dft_1d")) return nullptr;
  504. if (! Symbols::symbol (lib, symbols.plan_r2c_fftw, "fftwf_plan_dft_r2c_1d")) return nullptr;
  505. if (! Symbols::symbol (lib, symbols.plan_c2r_fftw, "fftwf_plan_dft_c2r_1d")) return nullptr;
  506. if (! Symbols::symbol (lib, symbols.destroy_fftw, "fftwf_destroy_plan")) return nullptr;
  507. if (! Symbols::symbol (lib, symbols.execute_dft_fftw, "fftwf_execute_dft")) return nullptr;
  508. if (! Symbols::symbol (lib, symbols.execute_r2c_fftw, "fftwf_execute_dft_r2c")) return nullptr;
  509. if (! Symbols::symbol (lib, symbols.execute_c2r_fftw, "fftwf_execute_dft_c2r")) return nullptr;
  510. #endif
  511. return new FFTWImpl (static_cast<size_t> (order), std::move (lib), symbols);
  512. }
  513. return nullptr;
  514. }
  515. FFTWImpl (size_t orderToUse, DynamicLibrary&& libraryToUse, const Symbols& symbols)
  516. : fftwLibrary (std::move (libraryToUse)), fftw (symbols), order (static_cast<size_t> (orderToUse))
  517. {
  518. ScopedLock lock (getFFTWPlanLock());
  519. auto n = (1u << order);
  520. HeapBlock<Complex<float>> in (n), out (n);
  521. c2cForward = fftw.plan_dft_fftw (n, in.getData(), out.getData(), -1, unaligned | estimate);
  522. c2cInverse = fftw.plan_dft_fftw (n, in.getData(), out.getData(), +1, unaligned | estimate);
  523. r2c = fftw.plan_r2c_fftw (n, (float*) in.getData(), in.getData(), unaligned | estimate);
  524. c2r = fftw.plan_c2r_fftw (n, in.getData(), (float*) in.getData(), unaligned | estimate);
  525. }
  526. ~FFTWImpl() override
  527. {
  528. ScopedLock lock (getFFTWPlanLock());
  529. fftw.destroy_fftw (c2cForward);
  530. fftw.destroy_fftw (c2cInverse);
  531. fftw.destroy_fftw (r2c);
  532. fftw.destroy_fftw (c2r);
  533. }
  534. void perform (const Complex<float>* input, Complex<float>* output, bool inverse) const noexcept override
  535. {
  536. if (inverse)
  537. {
  538. auto n = (1u << order);
  539. fftw.execute_dft_fftw (c2cInverse, input, output);
  540. FloatVectorOperations::multiply ((float*) output, 1.0f / static_cast<float> (n), (int) n << 1);
  541. }
  542. else
  543. {
  544. fftw.execute_dft_fftw (c2cForward, input, output);
  545. }
  546. }
  547. void performRealOnlyForwardTransform (float* inputOutputData, bool ignoreNegativeFreqs) const noexcept override
  548. {
  549. if (order == 0)
  550. return;
  551. auto* out = reinterpret_cast<Complex<float>*> (inputOutputData);
  552. fftw.execute_r2c_fftw (r2c, inputOutputData, out);
  553. auto size = (1 << order);
  554. if (! ignoreNegativeFreqs)
  555. for (auto i = size >> 1; i < size; ++i)
  556. out[i] = std::conj (out[size - i]);
  557. }
  558. void performRealOnlyInverseTransform (float* inputOutputData) const noexcept override
  559. {
  560. auto n = (1u << order);
  561. fftw.execute_c2r_fftw (c2r, (Complex<float>*) inputOutputData, inputOutputData);
  562. FloatVectorOperations::multiply ((float*) inputOutputData, 1.0f / static_cast<float> (n), (int) n);
  563. }
  564. //==============================================================================
  565. // fftw's plan_* and destroy_* methods are NOT thread safe. So we need to share
  566. // a lock between all instances of FFTWImpl
  567. static CriticalSection& getFFTWPlanLock() noexcept
  568. {
  569. static CriticalSection cs;
  570. return cs;
  571. }
  572. //==============================================================================
  573. DynamicLibrary fftwLibrary;
  574. Symbols fftw;
  575. size_t order;
  576. FFTWPlanRef c2cForward, c2cInverse, r2c, c2r;
  577. };
  578. FFT::EngineImpl<FFTWImpl> fftwEngine;
  579. #endif
  580. //==============================================================================
  581. //==============================================================================
  582. #if JUCE_DSP_USE_INTEL_MKL
  583. struct IntelFFT : public FFT::Instance
  584. {
  585. static constexpr int priority = 8;
  586. static bool succeeded (MKL_LONG status) noexcept { return status == 0; }
  587. static IntelFFT* create (int orderToUse)
  588. {
  589. DFTI_DESCRIPTOR_HANDLE mklc2c, mklc2r;
  590. if (DftiCreateDescriptor (&mklc2c, DFTI_SINGLE, DFTI_COMPLEX, 1, 1 << orderToUse) == 0)
  591. {
  592. if (succeeded (DftiSetValue (mklc2c, DFTI_PLACEMENT, DFTI_NOT_INPLACE))
  593. && succeeded (DftiSetValue (mklc2c, DFTI_BACKWARD_SCALE, 1.0f / static_cast<float> (1 << orderToUse)))
  594. && succeeded (DftiCommitDescriptor (mklc2c)))
  595. {
  596. if (succeeded (DftiCreateDescriptor (&mklc2r, DFTI_SINGLE, DFTI_REAL, 1, 1 << orderToUse)))
  597. {
  598. if (succeeded (DftiSetValue (mklc2r, DFTI_PLACEMENT, DFTI_INPLACE))
  599. && succeeded (DftiSetValue (mklc2r, DFTI_BACKWARD_SCALE, 1.0f / static_cast<float> (1 << orderToUse)))
  600. && succeeded (DftiCommitDescriptor (mklc2r)))
  601. {
  602. return new IntelFFT (static_cast<size_t> (orderToUse), mklc2c, mklc2r);
  603. }
  604. DftiFreeDescriptor (&mklc2r);
  605. }
  606. }
  607. DftiFreeDescriptor (&mklc2c);
  608. }
  609. return {};
  610. }
  611. IntelFFT (size_t orderToUse, DFTI_DESCRIPTOR_HANDLE c2cToUse, DFTI_DESCRIPTOR_HANDLE cr2ToUse)
  612. : order (orderToUse), c2c (c2cToUse), c2r (cr2ToUse)
  613. {}
  614. ~IntelFFT()
  615. {
  616. DftiFreeDescriptor (&c2c);
  617. DftiFreeDescriptor (&c2r);
  618. }
  619. void perform (const Complex<float>* input, Complex<float>* output, bool inverse) const noexcept override
  620. {
  621. if (inverse)
  622. DftiComputeBackward (c2c, (void*) input, output);
  623. else
  624. DftiComputeForward (c2c, (void*) input, output);
  625. }
  626. void performRealOnlyForwardTransform (float* inputOutputData, bool ignoreNegativeFreqs) const noexcept override
  627. {
  628. if (order == 0)
  629. return;
  630. DftiComputeForward (c2r, inputOutputData);
  631. auto* out = reinterpret_cast<Complex<float>*> (inputOutputData);
  632. auto size = (1 << order);
  633. if (! ignoreNegativeFreqs)
  634. for (auto i = size >> 1; i < size; ++i)
  635. out[i] = std::conj (out[size - i]);
  636. }
  637. void performRealOnlyInverseTransform (float* inputOutputData) const noexcept override
  638. {
  639. DftiComputeBackward (c2r, inputOutputData);
  640. }
  641. size_t order;
  642. DFTI_DESCRIPTOR_HANDLE c2c, c2r;
  643. };
  644. FFT::EngineImpl<IntelFFT> fftwEngine;
  645. #endif
  646. //==============================================================================
  647. //==============================================================================
  648. // Visual Studio should define no more than one of these, depending on the
  649. // setting at 'Project' > 'Properties' > 'Configuration Properties' > 'Intel
  650. // Performance Libraries' > 'Use Intel(R) IPP'
  651. #if _IPP_SEQUENTIAL_STATIC || _IPP_SEQUENTIAL_DYNAMIC || _IPP_PARALLEL_STATIC || _IPP_PARALLEL_DYNAMIC
  652. class IntelPerformancePrimitivesFFT : public FFT::Instance
  653. {
  654. public:
  655. static constexpr auto priority = 9;
  656. static IntelPerformancePrimitivesFFT* create (const int order)
  657. {
  658. auto complexContext = Context<ComplexTraits>::create (order);
  659. auto realContext = Context<RealTraits> ::create (order);
  660. if (complexContext.isValid() && realContext.isValid())
  661. return new IntelPerformancePrimitivesFFT (std::move (complexContext), std::move (realContext), order);
  662. return {};
  663. }
  664. void perform (const Complex<float>* input, Complex<float>* output, bool inverse) const noexcept override
  665. {
  666. if (inverse)
  667. {
  668. ippsFFTInv_CToC_32fc (reinterpret_cast<const Ipp32fc*> (input),
  669. reinterpret_cast<Ipp32fc*> (output),
  670. cplx.specPtr,
  671. cplx.workBuf.get());
  672. }
  673. else
  674. {
  675. ippsFFTFwd_CToC_32fc (reinterpret_cast<const Ipp32fc*> (input),
  676. reinterpret_cast<Ipp32fc*> (output),
  677. cplx.specPtr,
  678. cplx.workBuf.get());
  679. }
  680. }
  681. void performRealOnlyForwardTransform (float* inoutData, bool ignoreNegativeFreqs) const noexcept override
  682. {
  683. ippsFFTFwd_RToCCS_32f_I (inoutData, real.specPtr, real.workBuf.get());
  684. if (order == 0)
  685. return;
  686. auto* out = reinterpret_cast<Complex<float>*> (inoutData);
  687. const auto size = (1 << order);
  688. if (! ignoreNegativeFreqs)
  689. for (auto i = size >> 1; i < size; ++i)
  690. out[i] = std::conj (out[size - i]);
  691. }
  692. void performRealOnlyInverseTransform (float* inoutData) const noexcept override
  693. {
  694. ippsFFTInv_CCSToR_32f_I (inoutData, real.specPtr, real.workBuf.get());
  695. }
  696. private:
  697. static constexpr auto flag = IPP_FFT_DIV_INV_BY_N;
  698. static constexpr auto hint = ippAlgHintFast;
  699. struct IppFree
  700. {
  701. template <typename Ptr>
  702. void operator() (Ptr* ptr) const noexcept { ippsFree (ptr); }
  703. };
  704. using IppPtr = std::unique_ptr<Ipp8u[], IppFree>;
  705. template <typename Traits>
  706. struct Context
  707. {
  708. using SpecPtr = typename Traits::Spec*;
  709. static Context create (const int order)
  710. {
  711. int specSize = 0, initSize = 0, workSize = 0;
  712. if (Traits::getSize (order, flag, hint, &specSize, &initSize, &workSize) != ippStsNoErr)
  713. return {};
  714. const auto initBuf = IppPtr (ippsMalloc_8u (initSize));
  715. auto specBuf = IppPtr (ippsMalloc_8u (specSize));
  716. SpecPtr specPtr = nullptr;
  717. if (Traits::init (&specPtr, order, flag, hint, specBuf.get(), initBuf.get()) != ippStsNoErr)
  718. return {};
  719. if (reinterpret_cast<const Ipp8u*> (specPtr) != specBuf.get())
  720. return {};
  721. return { std::move (specBuf), IppPtr (ippsMalloc_8u (workSize)), specPtr };
  722. }
  723. Context() noexcept = default;
  724. Context (IppPtr&& spec, IppPtr&& work, typename Traits::Spec* ptr) noexcept
  725. : specBuf (std::move (spec)), workBuf (std::move (work)), specPtr (ptr)
  726. {}
  727. bool isValid() const noexcept { return specPtr != nullptr; }
  728. IppPtr specBuf, workBuf;
  729. SpecPtr specPtr = nullptr;
  730. };
  731. struct ComplexTraits
  732. {
  733. static constexpr auto getSize = ippsFFTGetSize_C_32fc;
  734. static constexpr auto init = ippsFFTInit_C_32fc;
  735. using Spec = IppsFFTSpec_C_32fc;
  736. };
  737. struct RealTraits
  738. {
  739. static constexpr auto getSize = ippsFFTGetSize_R_32f;
  740. static constexpr auto init = ippsFFTInit_R_32f;
  741. using Spec = IppsFFTSpec_R_32f;
  742. };
  743. IntelPerformancePrimitivesFFT (Context<ComplexTraits>&& complexToUse,
  744. Context<RealTraits>&& realToUse,
  745. const int orderToUse)
  746. : cplx (std::move (complexToUse)),
  747. real (std::move (realToUse)),
  748. order (orderToUse)
  749. {}
  750. Context<ComplexTraits> cplx;
  751. Context<RealTraits> real;
  752. int order = 0;
  753. };
  754. FFT::EngineImpl<IntelPerformancePrimitivesFFT> intelPerformancePrimitivesFFT;
  755. #endif
  756. //==============================================================================
  757. //==============================================================================
  758. FFT::FFT (int order)
  759. : engine (FFT::Engine::createBestEngineForPlatform (order)),
  760. size (1 << order)
  761. {
  762. }
  763. FFT::~FFT() {}
  764. void FFT::perform (const Complex<float>* input, Complex<float>* output, bool inverse) const noexcept
  765. {
  766. if (engine != nullptr)
  767. engine->perform (input, output, inverse);
  768. }
  769. void FFT::performRealOnlyForwardTransform (float* inputOutputData, bool ignoreNeagtiveFreqs) const noexcept
  770. {
  771. if (engine != nullptr)
  772. engine->performRealOnlyForwardTransform (inputOutputData, ignoreNeagtiveFreqs);
  773. }
  774. void FFT::performRealOnlyInverseTransform (float* inputOutputData) const noexcept
  775. {
  776. if (engine != nullptr)
  777. engine->performRealOnlyInverseTransform (inputOutputData);
  778. }
  779. void FFT::performFrequencyOnlyForwardTransform (float* inputOutputData) const noexcept
  780. {
  781. if (size == 1)
  782. return;
  783. performRealOnlyForwardTransform (inputOutputData);
  784. auto* out = reinterpret_cast<Complex<float>*> (inputOutputData);
  785. for (auto i = 0; i < size; ++i)
  786. inputOutputData[i] = std::abs (out[i]);
  787. zeromem (&inputOutputData[size], static_cast<size_t> (size) * sizeof (float));
  788. }
  789. } // namespace dsp
  790. } // namespace juce