The JUCE cross-platform C++ framework, with DISTRHO/KXStudio specific changes
You can not select more than 25 topics Topics must start with a letter or number, can include dashes ('-') and can be up to 35 characters long.

999 lines
34KB

  1. /*
  2. ==============================================================================
  3. This file is part of the JUCE library.
  4. Copyright (c) 2022 - Raw Material Software Limited
  5. JUCE is an open source library subject to commercial or open-source
  6. licensing.
  7. By using JUCE, you agree to the terms of both the JUCE 7 End-User License
  8. Agreement and JUCE Privacy Policy.
  9. End User License Agreement: www.juce.com/juce-7-licence
  10. Privacy Policy: www.juce.com/juce-privacy-policy
  11. Or: You may also use this code under the terms of the GPL v3 (see
  12. www.gnu.org/licenses).
  13. JUCE IS PROVIDED "AS IS" WITHOUT ANY WARRANTY, AND ALL WARRANTIES, WHETHER
  14. EXPRESSED OR IMPLIED, INCLUDING MERCHANTABILITY AND FITNESS FOR PURPOSE, ARE
  15. DISCLAIMED.
  16. ==============================================================================
  17. */
  18. namespace juce::dsp
  19. {
  20. struct FFT::Instance
  21. {
  22. virtual ~Instance() = default;
  23. virtual void perform (const Complex<float>* input, Complex<float>* output, bool inverse) const noexcept = 0;
  24. virtual void performRealOnlyForwardTransform (float*, bool) const noexcept = 0;
  25. virtual void performRealOnlyInverseTransform (float*) const noexcept = 0;
  26. };
  27. struct FFT::Engine
  28. {
  29. Engine (int priorityToUse) : enginePriority (priorityToUse)
  30. {
  31. auto& list = getEngines();
  32. list.add (this);
  33. std::sort (list.begin(), list.end(), [] (Engine* a, Engine* b) { return b->enginePriority < a->enginePriority; });
  34. }
  35. virtual ~Engine() = default;
  36. virtual FFT::Instance* create (int order) const = 0;
  37. //==============================================================================
  38. static FFT::Instance* createBestEngineForPlatform (int order)
  39. {
  40. for (auto* engine : getEngines())
  41. if (auto* instance = engine->create (order))
  42. return instance;
  43. jassertfalse; // This should never happen as the fallback engine should always work!
  44. return nullptr;
  45. }
  46. private:
  47. static Array<Engine*>& getEngines()
  48. {
  49. static Array<Engine*> engines;
  50. return engines;
  51. }
  52. int enginePriority; // used so that faster engines have priority over slower ones
  53. };
  54. template <typename InstanceToUse>
  55. struct FFT::EngineImpl : public FFT::Engine
  56. {
  57. EngineImpl() : FFT::Engine (InstanceToUse::priority) {}
  58. FFT::Instance* create (int order) const override { return InstanceToUse::create (order); }
  59. };
  60. //==============================================================================
  61. //==============================================================================
  62. struct FFTFallback : public FFT::Instance
  63. {
  64. // this should have the least priority of all engines
  65. static constexpr int priority = -1;
  66. static FFTFallback* create (int order)
  67. {
  68. return new FFTFallback (order);
  69. }
  70. FFTFallback (int order)
  71. {
  72. configForward.reset (new FFTConfig (1 << order, false));
  73. configInverse.reset (new FFTConfig (1 << order, true));
  74. size = 1 << order;
  75. }
  76. void perform (const Complex<float>* input, Complex<float>* output, bool inverse) const noexcept override
  77. {
  78. if (size == 1)
  79. {
  80. *output = *input;
  81. return;
  82. }
  83. const SpinLock::ScopedLockType sl (processLock);
  84. jassert (configForward != nullptr);
  85. if (inverse)
  86. {
  87. configInverse->perform (input, output);
  88. const float scaleFactor = 1.0f / (float) size;
  89. for (int i = 0; i < size; ++i)
  90. output[i] *= scaleFactor;
  91. }
  92. else
  93. {
  94. configForward->perform (input, output);
  95. }
  96. }
  97. const size_t maxFFTScratchSpaceToAlloca = 256 * 1024;
  98. void performRealOnlyForwardTransform (float* d, bool) const noexcept override
  99. {
  100. if (size == 1)
  101. return;
  102. const size_t scratchSize = 16 + (size_t) size * sizeof (Complex<float>);
  103. if (scratchSize < maxFFTScratchSpaceToAlloca)
  104. {
  105. JUCE_BEGIN_IGNORE_WARNINGS_MSVC (6255)
  106. performRealOnlyForwardTransform (static_cast<Complex<float>*> (alloca (scratchSize)), d);
  107. JUCE_END_IGNORE_WARNINGS_MSVC
  108. }
  109. else
  110. {
  111. HeapBlock<char> heapSpace (scratchSize);
  112. performRealOnlyForwardTransform (unalignedPointerCast<Complex<float>*> (heapSpace.getData()), d);
  113. }
  114. }
  115. void performRealOnlyInverseTransform (float* d) const noexcept override
  116. {
  117. if (size == 1)
  118. return;
  119. const size_t scratchSize = 16 + (size_t) size * sizeof (Complex<float>);
  120. if (scratchSize < maxFFTScratchSpaceToAlloca)
  121. {
  122. JUCE_BEGIN_IGNORE_WARNINGS_MSVC (6255)
  123. performRealOnlyInverseTransform (static_cast<Complex<float>*> (alloca (scratchSize)), d);
  124. JUCE_END_IGNORE_WARNINGS_MSVC
  125. }
  126. else
  127. {
  128. HeapBlock<char> heapSpace (scratchSize);
  129. performRealOnlyInverseTransform (unalignedPointerCast<Complex<float>*> (heapSpace.getData()), d);
  130. }
  131. }
  132. void performRealOnlyForwardTransform (Complex<float>* scratch, float* d) const noexcept
  133. {
  134. for (int i = 0; i < size; ++i)
  135. scratch[i] = { d[i], 0 };
  136. perform (scratch, reinterpret_cast<Complex<float>*> (d), false);
  137. }
  138. void performRealOnlyInverseTransform (Complex<float>* scratch, float* d) const noexcept
  139. {
  140. auto* input = reinterpret_cast<Complex<float>*> (d);
  141. for (int i = size >> 1; i < size; ++i)
  142. input[i] = std::conj (input[size - i]);
  143. perform (input, scratch, true);
  144. for (int i = 0; i < size; ++i)
  145. {
  146. d[i] = scratch[i].real();
  147. d[i + size] = scratch[i].imag();
  148. }
  149. }
  150. //==============================================================================
  151. struct FFTConfig
  152. {
  153. FFTConfig (int sizeOfFFT, bool isInverse)
  154. : fftSize (sizeOfFFT), inverse (isInverse), twiddleTable ((size_t) sizeOfFFT)
  155. {
  156. auto inverseFactor = (inverse ? 2.0 : -2.0) * MathConstants<double>::pi / (double) fftSize;
  157. if (fftSize <= 4)
  158. {
  159. for (int i = 0; i < fftSize; ++i)
  160. {
  161. auto phase = i * inverseFactor;
  162. twiddleTable[i] = { (float) std::cos (phase),
  163. (float) std::sin (phase) };
  164. }
  165. }
  166. else
  167. {
  168. for (int i = 0; i < fftSize / 4; ++i)
  169. {
  170. auto phase = i * inverseFactor;
  171. twiddleTable[i] = { (float) std::cos (phase),
  172. (float) std::sin (phase) };
  173. }
  174. for (int i = fftSize / 4; i < fftSize / 2; ++i)
  175. {
  176. auto other = twiddleTable[i - fftSize / 4];
  177. twiddleTable[i] = { inverse ? -other.imag() : other.imag(),
  178. inverse ? other.real() : -other.real() };
  179. }
  180. twiddleTable[fftSize / 2].real (-1.0f);
  181. twiddleTable[fftSize / 2].imag (0.0f);
  182. for (int i = fftSize / 2; i < fftSize; ++i)
  183. {
  184. auto index = fftSize / 2 - (i - fftSize / 2);
  185. twiddleTable[i] = conj (twiddleTable[index]);
  186. }
  187. }
  188. auto root = (int) std::sqrt ((double) fftSize);
  189. int divisor = 4, n = fftSize;
  190. for (int i = 0; i < numElementsInArray (factors); ++i)
  191. {
  192. while ((n % divisor) != 0)
  193. {
  194. if (divisor == 2) divisor = 3;
  195. else if (divisor == 4) divisor = 2;
  196. else divisor += 2;
  197. if (divisor > root)
  198. divisor = n;
  199. }
  200. n /= divisor;
  201. jassert (divisor == 1 || divisor == 2 || divisor == 4);
  202. factors[i].radix = divisor;
  203. factors[i].length = n;
  204. }
  205. }
  206. void perform (const Complex<float>* input, Complex<float>* output) const noexcept
  207. {
  208. perform (input, output, 1, 1, factors);
  209. }
  210. const int fftSize;
  211. const bool inverse;
  212. struct Factor { int radix, length; };
  213. Factor factors[32];
  214. HeapBlock<Complex<float>> twiddleTable;
  215. void perform (const Complex<float>* input, Complex<float>* output, int stride, int strideIn, const Factor* facs) const noexcept
  216. {
  217. auto factor = *facs++;
  218. auto* originalOutput = output;
  219. auto* outputEnd = output + factor.radix * factor.length;
  220. if (stride == 1 && factor.radix <= 5)
  221. {
  222. for (int i = 0; i < factor.radix; ++i)
  223. perform (input + stride * strideIn * i, output + i * factor.length, stride * factor.radix, strideIn, facs);
  224. butterfly (factor, output, stride);
  225. return;
  226. }
  227. if (factor.length == 1)
  228. {
  229. do
  230. {
  231. *output++ = *input;
  232. input += stride * strideIn;
  233. }
  234. while (output < outputEnd);
  235. }
  236. else
  237. {
  238. do
  239. {
  240. perform (input, output, stride * factor.radix, strideIn, facs);
  241. input += stride * strideIn;
  242. output += factor.length;
  243. }
  244. while (output < outputEnd);
  245. }
  246. butterfly (factor, originalOutput, stride);
  247. }
  248. void butterfly (const Factor factor, Complex<float>* data, int stride) const noexcept
  249. {
  250. switch (factor.radix)
  251. {
  252. case 1: break;
  253. case 2: butterfly2 (data, stride, factor.length); return;
  254. case 4: butterfly4 (data, stride, factor.length); return;
  255. default: jassertfalse; break;
  256. }
  257. JUCE_BEGIN_IGNORE_WARNINGS_MSVC (6255)
  258. auto* scratch = static_cast<Complex<float>*> (alloca ((size_t) factor.radix * sizeof (Complex<float>)));
  259. JUCE_END_IGNORE_WARNINGS_MSVC
  260. for (int i = 0; i < factor.length; ++i)
  261. {
  262. for (int k = i, q1 = 0; q1 < factor.radix; ++q1)
  263. {
  264. JUCE_BEGIN_IGNORE_WARNINGS_MSVC (6386)
  265. scratch[q1] = data[k];
  266. JUCE_END_IGNORE_WARNINGS_MSVC
  267. k += factor.length;
  268. }
  269. for (int k = i, q1 = 0; q1 < factor.radix; ++q1)
  270. {
  271. int twiddleIndex = 0;
  272. data[k] = scratch[0];
  273. for (int q = 1; q < factor.radix; ++q)
  274. {
  275. twiddleIndex += stride * k;
  276. if (twiddleIndex >= fftSize)
  277. twiddleIndex -= fftSize;
  278. JUCE_BEGIN_IGNORE_WARNINGS_MSVC (6385)
  279. data[k] += scratch[q] * twiddleTable[twiddleIndex];
  280. JUCE_END_IGNORE_WARNINGS_MSVC
  281. }
  282. k += factor.length;
  283. }
  284. }
  285. }
  286. void butterfly2 (Complex<float>* data, const int stride, const int length) const noexcept
  287. {
  288. auto* dataEnd = data + length;
  289. auto* tw = twiddleTable.getData();
  290. for (int i = length; --i >= 0;)
  291. {
  292. auto s = *dataEnd;
  293. s *= (*tw);
  294. tw += stride;
  295. *dataEnd++ = *data - s;
  296. *data++ += s;
  297. }
  298. }
  299. void butterfly4 (Complex<float>* data, const int stride, const int length) const noexcept
  300. {
  301. auto lengthX2 = length * 2;
  302. auto lengthX3 = length * 3;
  303. auto strideX2 = stride * 2;
  304. auto strideX3 = stride * 3;
  305. auto* twiddle1 = twiddleTable.getData();
  306. auto* twiddle2 = twiddle1;
  307. auto* twiddle3 = twiddle1;
  308. for (int i = length; --i >= 0;)
  309. {
  310. auto s0 = data[length] * *twiddle1;
  311. auto s1 = data[lengthX2] * *twiddle2;
  312. auto s2 = data[lengthX3] * *twiddle3;
  313. auto s3 = s0; s3 += s2;
  314. auto s4 = s0; s4 -= s2;
  315. auto s5 = *data; s5 -= s1;
  316. *data += s1;
  317. data[lengthX2] = *data;
  318. data[lengthX2] -= s3;
  319. twiddle1 += stride;
  320. twiddle2 += strideX2;
  321. twiddle3 += strideX3;
  322. *data += s3;
  323. if (inverse)
  324. {
  325. data[length] = { s5.real() - s4.imag(),
  326. s5.imag() + s4.real() };
  327. data[lengthX3] = { s5.real() + s4.imag(),
  328. s5.imag() - s4.real() };
  329. }
  330. else
  331. {
  332. data[length] = { s5.real() + s4.imag(),
  333. s5.imag() - s4.real() };
  334. data[lengthX3] = { s5.real() - s4.imag(),
  335. s5.imag() + s4.real() };
  336. }
  337. ++data;
  338. }
  339. }
  340. JUCE_DECLARE_NON_COPYABLE_WITH_LEAK_DETECTOR (FFTConfig)
  341. };
  342. //==============================================================================
  343. SpinLock processLock;
  344. std::unique_ptr<FFTConfig> configForward, configInverse;
  345. int size;
  346. };
  347. FFT::EngineImpl<FFTFallback> fftFallback;
  348. //==============================================================================
  349. //==============================================================================
  350. #if (JUCE_MAC || JUCE_IOS) && JUCE_USE_VDSP_FRAMEWORK
  351. struct AppleFFT : public FFT::Instance
  352. {
  353. static constexpr int priority = 5;
  354. static AppleFFT* create (int order)
  355. {
  356. return new AppleFFT (order);
  357. }
  358. AppleFFT (int orderToUse)
  359. : order (static_cast<vDSP_Length> (orderToUse)),
  360. fftSetup (vDSP_create_fftsetup (order, 2)),
  361. forwardNormalisation (0.5f),
  362. inverseNormalisation (1.0f / static_cast<float> (1 << order))
  363. {}
  364. ~AppleFFT() override
  365. {
  366. if (fftSetup != nullptr)
  367. {
  368. vDSP_destroy_fftsetup (fftSetup);
  369. fftSetup = nullptr;
  370. }
  371. }
  372. void perform (const Complex<float>* input, Complex<float>* output, bool inverse) const noexcept override
  373. {
  374. auto size = (1 << order);
  375. DSPSplitComplex splitInput (toSplitComplex (const_cast<Complex<float>*> (input)));
  376. DSPSplitComplex splitOutput (toSplitComplex (output));
  377. vDSP_fft_zop (fftSetup, &splitInput, 2, &splitOutput, 2,
  378. order, inverse ? kFFTDirection_Inverse : kFFTDirection_Forward);
  379. float factor = (inverse ? inverseNormalisation : forwardNormalisation * 2.0f);
  380. vDSP_vsmul ((float*) output, 1, &factor, (float*) output, 1, static_cast<size_t> (size << 1));
  381. }
  382. void performRealOnlyForwardTransform (float* inoutData, bool ignoreNegativeFreqs) const noexcept override
  383. {
  384. auto size = (1 << order);
  385. auto* inout = reinterpret_cast<Complex<float>*> (inoutData);
  386. auto splitInOut (toSplitComplex (inout));
  387. inoutData[size] = 0.0f;
  388. vDSP_fft_zrip (fftSetup, &splitInOut, 2, order, kFFTDirection_Forward);
  389. vDSP_vsmul (inoutData, 1, &forwardNormalisation, inoutData, 1, static_cast<size_t> (size << 1));
  390. mirrorResult (inout, ignoreNegativeFreqs);
  391. }
  392. void performRealOnlyInverseTransform (float* inoutData) const noexcept override
  393. {
  394. auto* inout = reinterpret_cast<Complex<float>*> (inoutData);
  395. auto size = (1 << order);
  396. auto splitInOut (toSplitComplex (inout));
  397. // Imaginary part of nyquist and DC frequencies are always zero
  398. // so Apple uses the imaginary part of the DC frequency to store
  399. // the real part of the nyquist frequency
  400. if (size != 1)
  401. inout[0] = Complex<float> (inout[0].real(), inout[size >> 1].real());
  402. vDSP_fft_zrip (fftSetup, &splitInOut, 2, order, kFFTDirection_Inverse);
  403. vDSP_vsmul (inoutData, 1, &inverseNormalisation, inoutData, 1, static_cast<size_t> (size << 1));
  404. vDSP_vclr (inoutData + size, 1, static_cast<size_t> (size));
  405. }
  406. private:
  407. //==============================================================================
  408. void mirrorResult (Complex<float>* out, bool ignoreNegativeFreqs) const noexcept
  409. {
  410. auto size = (1 << order);
  411. auto i = size >> 1;
  412. // Imaginary part of nyquist and DC frequencies are always zero
  413. // so Apple uses the imaginary part of the DC frequency to store
  414. // the real part of the nyquist frequency
  415. out[i++] = { out[0].imag(), 0.0 };
  416. out[0] = { out[0].real(), 0.0 };
  417. if (! ignoreNegativeFreqs)
  418. for (; i < size; ++i)
  419. out[i] = std::conj (out[size - i]);
  420. }
  421. static DSPSplitComplex toSplitComplex (Complex<float>* data) noexcept
  422. {
  423. // this assumes that Complex interleaves real and imaginary parts
  424. // and is tightly packed.
  425. return { reinterpret_cast<float*> (data),
  426. reinterpret_cast<float*> (data) + 1};
  427. }
  428. //==============================================================================
  429. vDSP_Length order;
  430. FFTSetup fftSetup;
  431. float forwardNormalisation, inverseNormalisation;
  432. };
  433. FFT::EngineImpl<AppleFFT> appleFFT;
  434. #endif
  435. //==============================================================================
  436. //==============================================================================
  437. #if JUCE_DSP_USE_SHARED_FFTW || JUCE_DSP_USE_STATIC_FFTW
  438. #if JUCE_DSP_USE_STATIC_FFTW
  439. extern "C"
  440. {
  441. void* fftwf_plan_dft_1d (int, void*, void*, int, int);
  442. void* fftwf_plan_dft_r2c_1d (int, void*, void*, int);
  443. void* fftwf_plan_dft_c2r_1d (int, void*, void*, int);
  444. void fftwf_destroy_plan (void*);
  445. void fftwf_execute_dft (void*, void*, void*);
  446. void fftwf_execute_dft_r2c (void*, void*, void*);
  447. void fftwf_execute_dft_c2r (void*, void*, void*);
  448. }
  449. #endif
  450. struct FFTWImpl : public FFT::Instance
  451. {
  452. #if JUCE_DSP_USE_STATIC_FFTW
  453. // if the JUCE developer has gone through the hassle of statically
  454. // linking in fftw, they probably want to use it
  455. static constexpr int priority = 10;
  456. #else
  457. static constexpr int priority = 3;
  458. #endif
  459. struct FFTWPlan;
  460. using FFTWPlanRef = FFTWPlan*;
  461. enum
  462. {
  463. measure = 0,
  464. unaligned = (1 << 1),
  465. estimate = (1 << 6)
  466. };
  467. struct Symbols
  468. {
  469. FFTWPlanRef (*plan_dft_fftw) (unsigned, Complex<float>*, Complex<float>*, int, unsigned);
  470. FFTWPlanRef (*plan_r2c_fftw) (unsigned, float*, Complex<float>*, unsigned);
  471. FFTWPlanRef (*plan_c2r_fftw) (unsigned, Complex<float>*, float*, unsigned);
  472. void (*destroy_fftw) (FFTWPlanRef);
  473. void (*execute_dft_fftw) (FFTWPlanRef, const Complex<float>*, Complex<float>*);
  474. void (*execute_r2c_fftw) (FFTWPlanRef, float*, Complex<float>*);
  475. void (*execute_c2r_fftw) (FFTWPlanRef, Complex<float>*, float*);
  476. #if JUCE_DSP_USE_STATIC_FFTW
  477. template <typename FuncPtr, typename ActualSymbolType>
  478. static bool symbol (FuncPtr& dst, ActualSymbolType sym)
  479. {
  480. dst = reinterpret_cast<FuncPtr> (sym);
  481. return true;
  482. }
  483. #else
  484. template <typename FuncPtr>
  485. static bool symbol (DynamicLibrary& lib, FuncPtr& dst, const char* name)
  486. {
  487. dst = reinterpret_cast<FuncPtr> (lib.getFunction (name));
  488. return (dst != nullptr);
  489. }
  490. #endif
  491. };
  492. static FFTWImpl* create (int order)
  493. {
  494. DynamicLibrary lib;
  495. #if ! JUCE_DSP_USE_STATIC_FFTW
  496. #if JUCE_MAC
  497. auto libName = "libfftw3f.dylib";
  498. #elif JUCE_WINDOWS
  499. auto libName = "libfftw3f.dll";
  500. #else
  501. auto libName = "libfftw3f.so";
  502. #endif
  503. if (lib.open (libName))
  504. #endif
  505. {
  506. Symbols symbols;
  507. #if JUCE_DSP_USE_STATIC_FFTW
  508. if (! Symbols::symbol (symbols.plan_dft_fftw, fftwf_plan_dft_1d)) return nullptr;
  509. if (! Symbols::symbol (symbols.plan_r2c_fftw, fftwf_plan_dft_r2c_1d)) return nullptr;
  510. if (! Symbols::symbol (symbols.plan_c2r_fftw, fftwf_plan_dft_c2r_1d)) return nullptr;
  511. if (! Symbols::symbol (symbols.destroy_fftw, fftwf_destroy_plan)) return nullptr;
  512. if (! Symbols::symbol (symbols.execute_dft_fftw, fftwf_execute_dft)) return nullptr;
  513. if (! Symbols::symbol (symbols.execute_r2c_fftw, fftwf_execute_dft_r2c)) return nullptr;
  514. if (! Symbols::symbol (symbols.execute_c2r_fftw, fftwf_execute_dft_c2r)) return nullptr;
  515. #else
  516. if (! Symbols::symbol (lib, symbols.plan_dft_fftw, "fftwf_plan_dft_1d")) return nullptr;
  517. if (! Symbols::symbol (lib, symbols.plan_r2c_fftw, "fftwf_plan_dft_r2c_1d")) return nullptr;
  518. if (! Symbols::symbol (lib, symbols.plan_c2r_fftw, "fftwf_plan_dft_c2r_1d")) return nullptr;
  519. if (! Symbols::symbol (lib, symbols.destroy_fftw, "fftwf_destroy_plan")) return nullptr;
  520. if (! Symbols::symbol (lib, symbols.execute_dft_fftw, "fftwf_execute_dft")) return nullptr;
  521. if (! Symbols::symbol (lib, symbols.execute_r2c_fftw, "fftwf_execute_dft_r2c")) return nullptr;
  522. if (! Symbols::symbol (lib, symbols.execute_c2r_fftw, "fftwf_execute_dft_c2r")) return nullptr;
  523. #endif
  524. return new FFTWImpl (static_cast<size_t> (order), std::move (lib), symbols);
  525. }
  526. return nullptr;
  527. }
  528. FFTWImpl (size_t orderToUse, DynamicLibrary&& libraryToUse, const Symbols& symbols)
  529. : fftwLibrary (std::move (libraryToUse)), fftw (symbols), order (static_cast<size_t> (orderToUse))
  530. {
  531. ScopedLock lock (getFFTWPlanLock());
  532. auto n = (1u << order);
  533. HeapBlock<Complex<float>> in (n), out (n);
  534. c2cForward = fftw.plan_dft_fftw (n, in.getData(), out.getData(), -1, unaligned | estimate);
  535. c2cInverse = fftw.plan_dft_fftw (n, in.getData(), out.getData(), +1, unaligned | estimate);
  536. r2c = fftw.plan_r2c_fftw (n, (float*) in.getData(), in.getData(), unaligned | estimate);
  537. c2r = fftw.plan_c2r_fftw (n, in.getData(), (float*) in.getData(), unaligned | estimate);
  538. }
  539. ~FFTWImpl() override
  540. {
  541. ScopedLock lock (getFFTWPlanLock());
  542. fftw.destroy_fftw (c2cForward);
  543. fftw.destroy_fftw (c2cInverse);
  544. fftw.destroy_fftw (r2c);
  545. fftw.destroy_fftw (c2r);
  546. }
  547. void perform (const Complex<float>* input, Complex<float>* output, bool inverse) const noexcept override
  548. {
  549. if (inverse)
  550. {
  551. auto n = (1u << order);
  552. fftw.execute_dft_fftw (c2cInverse, input, output);
  553. FloatVectorOperations::multiply ((float*) output, 1.0f / static_cast<float> (n), (int) n << 1);
  554. }
  555. else
  556. {
  557. fftw.execute_dft_fftw (c2cForward, input, output);
  558. }
  559. }
  560. void performRealOnlyForwardTransform (float* inputOutputData, bool ignoreNegativeFreqs) const noexcept override
  561. {
  562. if (order == 0)
  563. return;
  564. auto* out = reinterpret_cast<Complex<float>*> (inputOutputData);
  565. fftw.execute_r2c_fftw (r2c, inputOutputData, out);
  566. auto size = (1 << order);
  567. if (! ignoreNegativeFreqs)
  568. for (int i = size >> 1; i < size; ++i)
  569. out[i] = std::conj (out[size - i]);
  570. }
  571. void performRealOnlyInverseTransform (float* inputOutputData) const noexcept override
  572. {
  573. auto n = (1u << order);
  574. fftw.execute_c2r_fftw (c2r, (Complex<float>*) inputOutputData, inputOutputData);
  575. FloatVectorOperations::multiply ((float*) inputOutputData, 1.0f / static_cast<float> (n), (int) n);
  576. }
  577. //==============================================================================
  578. // fftw's plan_* and destroy_* methods are NOT thread safe. So we need to share
  579. // a lock between all instances of FFTWImpl
  580. static CriticalSection& getFFTWPlanLock() noexcept
  581. {
  582. static CriticalSection cs;
  583. return cs;
  584. }
  585. //==============================================================================
  586. DynamicLibrary fftwLibrary;
  587. Symbols fftw;
  588. size_t order;
  589. FFTWPlanRef c2cForward, c2cInverse, r2c, c2r;
  590. };
  591. FFT::EngineImpl<FFTWImpl> fftwEngine;
  592. #endif
  593. //==============================================================================
  594. //==============================================================================
  595. #if JUCE_DSP_USE_INTEL_MKL
  596. struct IntelFFT : public FFT::Instance
  597. {
  598. static constexpr int priority = 8;
  599. static bool succeeded (MKL_LONG status) noexcept { return status == 0; }
  600. static IntelFFT* create (int orderToUse)
  601. {
  602. DFTI_DESCRIPTOR_HANDLE mklc2c, mklc2r;
  603. if (DftiCreateDescriptor (&mklc2c, DFTI_SINGLE, DFTI_COMPLEX, 1, 1 << orderToUse) == 0)
  604. {
  605. if (succeeded (DftiSetValue (mklc2c, DFTI_PLACEMENT, DFTI_NOT_INPLACE))
  606. && succeeded (DftiSetValue (mklc2c, DFTI_BACKWARD_SCALE, 1.0f / static_cast<float> (1 << orderToUse)))
  607. && succeeded (DftiCommitDescriptor (mklc2c)))
  608. {
  609. if (succeeded (DftiCreateDescriptor (&mklc2r, DFTI_SINGLE, DFTI_REAL, 1, 1 << orderToUse)))
  610. {
  611. if (succeeded (DftiSetValue (mklc2r, DFTI_PLACEMENT, DFTI_INPLACE))
  612. && succeeded (DftiSetValue (mklc2r, DFTI_BACKWARD_SCALE, 1.0f / static_cast<float> (1 << orderToUse)))
  613. && succeeded (DftiCommitDescriptor (mklc2r)))
  614. {
  615. return new IntelFFT (static_cast<size_t> (orderToUse), mklc2c, mklc2r);
  616. }
  617. DftiFreeDescriptor (&mklc2r);
  618. }
  619. }
  620. DftiFreeDescriptor (&mklc2c);
  621. }
  622. return {};
  623. }
  624. IntelFFT (size_t orderToUse, DFTI_DESCRIPTOR_HANDLE c2cToUse, DFTI_DESCRIPTOR_HANDLE cr2ToUse)
  625. : order (orderToUse), c2c (c2cToUse), c2r (cr2ToUse)
  626. {}
  627. ~IntelFFT() override
  628. {
  629. DftiFreeDescriptor (&c2c);
  630. DftiFreeDescriptor (&c2r);
  631. }
  632. void perform (const Complex<float>* input, Complex<float>* output, bool inverse) const noexcept override
  633. {
  634. if (inverse)
  635. DftiComputeBackward (c2c, (void*) input, output);
  636. else
  637. DftiComputeForward (c2c, (void*) input, output);
  638. }
  639. void performRealOnlyForwardTransform (float* inputOutputData, bool ignoreNegativeFreqs) const noexcept override
  640. {
  641. if (order == 0)
  642. return;
  643. DftiComputeForward (c2r, inputOutputData);
  644. auto* out = reinterpret_cast<Complex<float>*> (inputOutputData);
  645. auto size = (1 << order);
  646. if (! ignoreNegativeFreqs)
  647. for (int i = size >> 1; i < size; ++i)
  648. out[i] = std::conj (out[size - i]);
  649. }
  650. void performRealOnlyInverseTransform (float* inputOutputData) const noexcept override
  651. {
  652. DftiComputeBackward (c2r, inputOutputData);
  653. }
  654. size_t order;
  655. DFTI_DESCRIPTOR_HANDLE c2c, c2r;
  656. };
  657. FFT::EngineImpl<IntelFFT> fftwEngine;
  658. #endif
  659. //==============================================================================
  660. //==============================================================================
  661. // Visual Studio should define no more than one of these, depending on the
  662. // setting at 'Project' > 'Properties' > 'Configuration Properties' > 'Intel
  663. // Performance Libraries' > 'Use Intel(R) IPP'
  664. #if _IPP_SEQUENTIAL_STATIC || _IPP_SEQUENTIAL_DYNAMIC || _IPP_PARALLEL_STATIC || _IPP_PARALLEL_DYNAMIC
  665. class IntelPerformancePrimitivesFFT : public FFT::Instance
  666. {
  667. public:
  668. static constexpr auto priority = 9;
  669. static IntelPerformancePrimitivesFFT* create (const int order)
  670. {
  671. auto complexContext = Context<ComplexTraits>::create (order);
  672. auto realContext = Context<RealTraits> ::create (order);
  673. if (complexContext.isValid() && realContext.isValid())
  674. return new IntelPerformancePrimitivesFFT (std::move (complexContext), std::move (realContext), order);
  675. return {};
  676. }
  677. void perform (const Complex<float>* input, Complex<float>* output, bool inverse) const noexcept override
  678. {
  679. if (inverse)
  680. {
  681. ippsFFTInv_CToC_32fc (reinterpret_cast<const Ipp32fc*> (input),
  682. reinterpret_cast<Ipp32fc*> (output),
  683. cplx.specPtr,
  684. cplx.workBuf.get());
  685. }
  686. else
  687. {
  688. ippsFFTFwd_CToC_32fc (reinterpret_cast<const Ipp32fc*> (input),
  689. reinterpret_cast<Ipp32fc*> (output),
  690. cplx.specPtr,
  691. cplx.workBuf.get());
  692. }
  693. }
  694. void performRealOnlyForwardTransform (float* inoutData, bool ignoreNegativeFreqs) const noexcept override
  695. {
  696. ippsFFTFwd_RToCCS_32f_I (inoutData, real.specPtr, real.workBuf.get());
  697. if (order == 0)
  698. return;
  699. auto* out = reinterpret_cast<Complex<float>*> (inoutData);
  700. const auto size = (1 << order);
  701. if (! ignoreNegativeFreqs)
  702. for (auto i = size >> 1; i < size; ++i)
  703. out[i] = std::conj (out[size - i]);
  704. }
  705. void performRealOnlyInverseTransform (float* inoutData) const noexcept override
  706. {
  707. ippsFFTInv_CCSToR_32f_I (inoutData, real.specPtr, real.workBuf.get());
  708. }
  709. private:
  710. static constexpr auto flag = IPP_FFT_DIV_INV_BY_N;
  711. static constexpr auto hint = ippAlgHintFast;
  712. struct IppFree
  713. {
  714. template <typename Ptr>
  715. void operator() (Ptr* ptr) const noexcept { ippsFree (ptr); }
  716. };
  717. using IppPtr = std::unique_ptr<Ipp8u[], IppFree>;
  718. template <typename Traits>
  719. struct Context
  720. {
  721. using SpecPtr = typename Traits::Spec*;
  722. static Context create (const int order)
  723. {
  724. int specSize = 0, initSize = 0, workSize = 0;
  725. if (Traits::getSize (order, flag, hint, &specSize, &initSize, &workSize) != ippStsNoErr)
  726. return {};
  727. const auto initBuf = IppPtr (ippsMalloc_8u (initSize));
  728. auto specBuf = IppPtr (ippsMalloc_8u (specSize));
  729. SpecPtr specPtr = nullptr;
  730. if (Traits::init (&specPtr, order, flag, hint, specBuf.get(), initBuf.get()) != ippStsNoErr)
  731. return {};
  732. return { std::move (specBuf), IppPtr (ippsMalloc_8u (workSize)), specPtr };
  733. }
  734. Context() noexcept = default;
  735. Context (IppPtr&& spec, IppPtr&& work, typename Traits::Spec* ptr) noexcept
  736. : specBuf (std::move (spec)), workBuf (std::move (work)), specPtr (ptr)
  737. {}
  738. bool isValid() const noexcept { return specPtr != nullptr; }
  739. IppPtr specBuf, workBuf;
  740. SpecPtr specPtr = nullptr;
  741. };
  742. struct ComplexTraits
  743. {
  744. static constexpr auto getSize = ippsFFTGetSize_C_32fc;
  745. static constexpr auto init = ippsFFTInit_C_32fc;
  746. using Spec = IppsFFTSpec_C_32fc;
  747. };
  748. struct RealTraits
  749. {
  750. static constexpr auto getSize = ippsFFTGetSize_R_32f;
  751. static constexpr auto init = ippsFFTInit_R_32f;
  752. using Spec = IppsFFTSpec_R_32f;
  753. };
  754. IntelPerformancePrimitivesFFT (Context<ComplexTraits>&& complexToUse,
  755. Context<RealTraits>&& realToUse,
  756. const int orderToUse)
  757. : cplx (std::move (complexToUse)),
  758. real (std::move (realToUse)),
  759. order (orderToUse)
  760. {}
  761. Context<ComplexTraits> cplx;
  762. Context<RealTraits> real;
  763. int order = 0;
  764. };
  765. FFT::EngineImpl<IntelPerformancePrimitivesFFT> intelPerformancePrimitivesFFT;
  766. #endif
  767. //==============================================================================
  768. //==============================================================================
  769. FFT::FFT (int order)
  770. : engine (FFT::Engine::createBestEngineForPlatform (order)),
  771. size (1 << order)
  772. {
  773. }
  774. FFT::FFT (FFT&&) noexcept = default;
  775. FFT& FFT::operator= (FFT&&) noexcept = default;
  776. FFT::~FFT() = default;
  777. void FFT::perform (const Complex<float>* input, Complex<float>* output, bool inverse) const noexcept
  778. {
  779. if (engine != nullptr)
  780. engine->perform (input, output, inverse);
  781. }
  782. void FFT::performRealOnlyForwardTransform (float* inputOutputData, bool ignoreNegativeFreqs) const noexcept
  783. {
  784. if (engine != nullptr)
  785. engine->performRealOnlyForwardTransform (inputOutputData, ignoreNegativeFreqs);
  786. }
  787. void FFT::performRealOnlyInverseTransform (float* inputOutputData) const noexcept
  788. {
  789. if (engine != nullptr)
  790. engine->performRealOnlyInverseTransform (inputOutputData);
  791. }
  792. void FFT::performFrequencyOnlyForwardTransform (float* inputOutputData, bool ignoreNegativeFreqs) const noexcept
  793. {
  794. if (size == 1)
  795. return;
  796. performRealOnlyForwardTransform (inputOutputData, ignoreNegativeFreqs);
  797. auto* out = reinterpret_cast<Complex<float>*> (inputOutputData);
  798. const auto limit = ignoreNegativeFreqs ? (size / 2) + 1 : size;
  799. for (int i = 0; i < limit; ++i)
  800. inputOutputData[i] = std::abs (out[i]);
  801. zeromem (inputOutputData + limit, static_cast<size_t> (size * 2 - limit) * sizeof (float));
  802. }
  803. } // namespace juce::dsp