The JUCE cross-platform C++ framework, with DISTRHO/KXStudio specific changes
You can not select more than 25 topics Topics must start with a letter or number, can include dashes ('-') and can be up to 35 characters long.

995 lines
34KB

  1. /*
  2. ==============================================================================
  3. This file is part of the JUCE 7 technical preview.
  4. Copyright (c) 2022 - Raw Material Software Limited
  5. You may use this code under the terms of the GPL v3
  6. (see www.gnu.org/licenses).
  7. For the technical preview this file cannot be licensed commercially.
  8. JUCE IS PROVIDED "AS IS" WITHOUT ANY WARRANTY, AND ALL WARRANTIES, WHETHER
  9. EXPRESSED OR IMPLIED, INCLUDING MERCHANTABILITY AND FITNESS FOR PURPOSE, ARE
  10. DISCLAIMED.
  11. ==============================================================================
  12. */
  13. namespace juce
  14. {
  15. namespace dsp
  16. {
  17. struct FFT::Instance
  18. {
  19. virtual ~Instance() = default;
  20. virtual void perform (const Complex<float>* input, Complex<float>* output, bool inverse) const noexcept = 0;
  21. virtual void performRealOnlyForwardTransform (float*, bool) const noexcept = 0;
  22. virtual void performRealOnlyInverseTransform (float*) const noexcept = 0;
  23. };
  24. struct FFT::Engine
  25. {
  26. Engine (int priorityToUse) : enginePriority (priorityToUse)
  27. {
  28. auto& list = getEngines();
  29. list.add (this);
  30. std::sort (list.begin(), list.end(), [] (Engine* a, Engine* b) { return b->enginePriority < a->enginePriority; });
  31. }
  32. virtual ~Engine() = default;
  33. virtual FFT::Instance* create (int order) const = 0;
  34. //==============================================================================
  35. static FFT::Instance* createBestEngineForPlatform (int order)
  36. {
  37. for (auto* engine : getEngines())
  38. if (auto* instance = engine->create (order))
  39. return instance;
  40. jassertfalse; // This should never happen as the fallback engine should always work!
  41. return nullptr;
  42. }
  43. private:
  44. static Array<Engine*>& getEngines()
  45. {
  46. static Array<Engine*> engines;
  47. return engines;
  48. }
  49. int enginePriority; // used so that faster engines have priority over slower ones
  50. };
  51. template <typename InstanceToUse>
  52. struct FFT::EngineImpl : public FFT::Engine
  53. {
  54. EngineImpl() : FFT::Engine (InstanceToUse::priority) {}
  55. FFT::Instance* create (int order) const override { return InstanceToUse::create (order); }
  56. };
  57. //==============================================================================
  58. //==============================================================================
  59. struct FFTFallback : public FFT::Instance
  60. {
  61. // this should have the least priority of all engines
  62. static constexpr int priority = -1;
  63. static FFTFallback* create (int order)
  64. {
  65. return new FFTFallback (order);
  66. }
  67. FFTFallback (int order)
  68. {
  69. configForward.reset (new FFTConfig (1 << order, false));
  70. configInverse.reset (new FFTConfig (1 << order, true));
  71. size = 1 << order;
  72. }
  73. void perform (const Complex<float>* input, Complex<float>* output, bool inverse) const noexcept override
  74. {
  75. if (size == 1)
  76. {
  77. *output = *input;
  78. return;
  79. }
  80. const SpinLock::ScopedLockType sl(processLock);
  81. jassert (configForward != nullptr);
  82. if (inverse)
  83. {
  84. configInverse->perform (input, output);
  85. const float scaleFactor = 1.0f / (float) size;
  86. for (int i = 0; i < size; ++i)
  87. output[i] *= scaleFactor;
  88. }
  89. else
  90. {
  91. configForward->perform (input, output);
  92. }
  93. }
  94. const size_t maxFFTScratchSpaceToAlloca = 256 * 1024;
  95. void performRealOnlyForwardTransform (float* d, bool) const noexcept override
  96. {
  97. if (size == 1)
  98. return;
  99. const size_t scratchSize = 16 + (size_t) size * sizeof (Complex<float>);
  100. if (scratchSize < maxFFTScratchSpaceToAlloca)
  101. {
  102. JUCE_BEGIN_IGNORE_WARNINGS_MSVC (6255)
  103. performRealOnlyForwardTransform (static_cast<Complex<float>*> (alloca (scratchSize)), d);
  104. JUCE_END_IGNORE_WARNINGS_MSVC
  105. }
  106. else
  107. {
  108. HeapBlock<char> heapSpace (scratchSize);
  109. performRealOnlyForwardTransform (unalignedPointerCast<Complex<float>*> (heapSpace.getData()), d);
  110. }
  111. }
  112. void performRealOnlyInverseTransform (float* d) const noexcept override
  113. {
  114. if (size == 1)
  115. return;
  116. const size_t scratchSize = 16 + (size_t) size * sizeof (Complex<float>);
  117. if (scratchSize < maxFFTScratchSpaceToAlloca)
  118. {
  119. JUCE_BEGIN_IGNORE_WARNINGS_MSVC (6255)
  120. performRealOnlyInverseTransform (static_cast<Complex<float>*> (alloca (scratchSize)), d);
  121. JUCE_END_IGNORE_WARNINGS_MSVC
  122. }
  123. else
  124. {
  125. HeapBlock<char> heapSpace (scratchSize);
  126. performRealOnlyInverseTransform (unalignedPointerCast<Complex<float>*> (heapSpace.getData()), d);
  127. }
  128. }
  129. void performRealOnlyForwardTransform (Complex<float>* scratch, float* d) const noexcept
  130. {
  131. for (int i = 0; i < size; ++i)
  132. scratch[i] = { d[i], 0 };
  133. perform (scratch, reinterpret_cast<Complex<float>*> (d), false);
  134. }
  135. void performRealOnlyInverseTransform (Complex<float>* scratch, float* d) const noexcept
  136. {
  137. auto* input = reinterpret_cast<Complex<float>*> (d);
  138. for (int i = size >> 1; i < size; ++i)
  139. input[i] = std::conj (input[size - i]);
  140. perform (input, scratch, true);
  141. for (int i = 0; i < size; ++i)
  142. {
  143. d[i] = scratch[i].real();
  144. d[i + size] = scratch[i].imag();
  145. }
  146. }
  147. //==============================================================================
  148. struct FFTConfig
  149. {
  150. FFTConfig (int sizeOfFFT, bool isInverse)
  151. : fftSize (sizeOfFFT), inverse (isInverse), twiddleTable ((size_t) sizeOfFFT)
  152. {
  153. auto inverseFactor = (inverse ? 2.0 : -2.0) * MathConstants<double>::pi / (double) fftSize;
  154. if (fftSize <= 4)
  155. {
  156. for (int i = 0; i < fftSize; ++i)
  157. {
  158. auto phase = i * inverseFactor;
  159. twiddleTable[i] = { (float) std::cos (phase),
  160. (float) std::sin (phase) };
  161. }
  162. }
  163. else
  164. {
  165. for (int i = 0; i < fftSize / 4; ++i)
  166. {
  167. auto phase = i * inverseFactor;
  168. twiddleTable[i] = { (float) std::cos (phase),
  169. (float) std::sin (phase) };
  170. }
  171. for (int i = fftSize / 4; i < fftSize / 2; ++i)
  172. {
  173. auto other = twiddleTable[i - fftSize / 4];
  174. twiddleTable[i] = { inverse ? -other.imag() : other.imag(),
  175. inverse ? other.real() : -other.real() };
  176. }
  177. twiddleTable[fftSize / 2].real (-1.0f);
  178. twiddleTable[fftSize / 2].imag (0.0f);
  179. for (int i = fftSize / 2; i < fftSize; ++i)
  180. {
  181. auto index = fftSize / 2 - (i - fftSize / 2);
  182. twiddleTable[i] = conj(twiddleTable[index]);
  183. }
  184. }
  185. auto root = (int) std::sqrt ((double) fftSize);
  186. int divisor = 4, n = fftSize;
  187. for (int i = 0; i < numElementsInArray (factors); ++i)
  188. {
  189. while ((n % divisor) != 0)
  190. {
  191. if (divisor == 2) divisor = 3;
  192. else if (divisor == 4) divisor = 2;
  193. else divisor += 2;
  194. if (divisor > root)
  195. divisor = n;
  196. }
  197. n /= divisor;
  198. jassert (divisor == 1 || divisor == 2 || divisor == 4);
  199. factors[i].radix = divisor;
  200. factors[i].length = n;
  201. }
  202. }
  203. void perform (const Complex<float>* input, Complex<float>* output) const noexcept
  204. {
  205. perform (input, output, 1, 1, factors);
  206. }
  207. const int fftSize;
  208. const bool inverse;
  209. struct Factor { int radix, length; };
  210. Factor factors[32];
  211. HeapBlock<Complex<float>> twiddleTable;
  212. void perform (const Complex<float>* input, Complex<float>* output, int stride, int strideIn, const Factor* facs) const noexcept
  213. {
  214. auto factor = *facs++;
  215. auto* originalOutput = output;
  216. auto* outputEnd = output + factor.radix * factor.length;
  217. if (stride == 1 && factor.radix <= 5)
  218. {
  219. for (int i = 0; i < factor.radix; ++i)
  220. perform (input + stride * strideIn * i, output + i * factor.length, stride * factor.radix, strideIn, facs);
  221. butterfly (factor, output, stride);
  222. return;
  223. }
  224. if (factor.length == 1)
  225. {
  226. do
  227. {
  228. *output++ = *input;
  229. input += stride * strideIn;
  230. }
  231. while (output < outputEnd);
  232. }
  233. else
  234. {
  235. do
  236. {
  237. perform (input, output, stride * factor.radix, strideIn, facs);
  238. input += stride * strideIn;
  239. output += factor.length;
  240. }
  241. while (output < outputEnd);
  242. }
  243. butterfly (factor, originalOutput, stride);
  244. }
  245. void butterfly (const Factor factor, Complex<float>* data, int stride) const noexcept
  246. {
  247. switch (factor.radix)
  248. {
  249. case 1: break;
  250. case 2: butterfly2 (data, stride, factor.length); return;
  251. case 4: butterfly4 (data, stride, factor.length); return;
  252. default: jassertfalse; break;
  253. }
  254. JUCE_BEGIN_IGNORE_WARNINGS_MSVC (6255)
  255. auto* scratch = static_cast<Complex<float>*> (alloca ((size_t) factor.radix * sizeof (Complex<float>)));
  256. JUCE_END_IGNORE_WARNINGS_MSVC
  257. for (int i = 0; i < factor.length; ++i)
  258. {
  259. for (int k = i, q1 = 0; q1 < factor.radix; ++q1)
  260. {
  261. JUCE_BEGIN_IGNORE_WARNINGS_MSVC (6386)
  262. scratch[q1] = data[k];
  263. JUCE_END_IGNORE_WARNINGS_MSVC
  264. k += factor.length;
  265. }
  266. for (int k = i, q1 = 0; q1 < factor.radix; ++q1)
  267. {
  268. int twiddleIndex = 0;
  269. data[k] = scratch[0];
  270. for (int q = 1; q < factor.radix; ++q)
  271. {
  272. twiddleIndex += stride * k;
  273. if (twiddleIndex >= fftSize)
  274. twiddleIndex -= fftSize;
  275. JUCE_BEGIN_IGNORE_WARNINGS_MSVC (6385)
  276. data[k] += scratch[q] * twiddleTable[twiddleIndex];
  277. JUCE_END_IGNORE_WARNINGS_MSVC
  278. }
  279. k += factor.length;
  280. }
  281. }
  282. }
  283. void butterfly2 (Complex<float>* data, const int stride, const int length) const noexcept
  284. {
  285. auto* dataEnd = data + length;
  286. auto* tw = twiddleTable.getData();
  287. for (int i = length; --i >= 0;)
  288. {
  289. auto s = *dataEnd;
  290. s *= (*tw);
  291. tw += stride;
  292. *dataEnd++ = *data - s;
  293. *data++ += s;
  294. }
  295. }
  296. void butterfly4 (Complex<float>* data, const int stride, const int length) const noexcept
  297. {
  298. auto lengthX2 = length * 2;
  299. auto lengthX3 = length * 3;
  300. auto strideX2 = stride * 2;
  301. auto strideX3 = stride * 3;
  302. auto* twiddle1 = twiddleTable.getData();
  303. auto* twiddle2 = twiddle1;
  304. auto* twiddle3 = twiddle1;
  305. for (int i = length; --i >= 0;)
  306. {
  307. auto s0 = data[length] * *twiddle1;
  308. auto s1 = data[lengthX2] * *twiddle2;
  309. auto s2 = data[lengthX3] * *twiddle3;
  310. auto s3 = s0; s3 += s2;
  311. auto s4 = s0; s4 -= s2;
  312. auto s5 = *data; s5 -= s1;
  313. *data += s1;
  314. data[lengthX2] = *data;
  315. data[lengthX2] -= s3;
  316. twiddle1 += stride;
  317. twiddle2 += strideX2;
  318. twiddle3 += strideX3;
  319. *data += s3;
  320. if (inverse)
  321. {
  322. data[length] = { s5.real() - s4.imag(),
  323. s5.imag() + s4.real() };
  324. data[lengthX3] = { s5.real() + s4.imag(),
  325. s5.imag() - s4.real() };
  326. }
  327. else
  328. {
  329. data[length] = { s5.real() + s4.imag(),
  330. s5.imag() - s4.real() };
  331. data[lengthX3] = { s5.real() - s4.imag(),
  332. s5.imag() + s4.real() };
  333. }
  334. ++data;
  335. }
  336. }
  337. JUCE_DECLARE_NON_COPYABLE_WITH_LEAK_DETECTOR (FFTConfig)
  338. };
  339. //==============================================================================
  340. SpinLock processLock;
  341. std::unique_ptr<FFTConfig> configForward, configInverse;
  342. int size;
  343. };
  344. FFT::EngineImpl<FFTFallback> fftFallback;
  345. //==============================================================================
  346. //==============================================================================
  347. #if (JUCE_MAC || JUCE_IOS) && JUCE_USE_VDSP_FRAMEWORK
  348. struct AppleFFT : public FFT::Instance
  349. {
  350. static constexpr int priority = 5;
  351. static AppleFFT* create (int order)
  352. {
  353. return new AppleFFT (order);
  354. }
  355. AppleFFT (int orderToUse)
  356. : order (static_cast<vDSP_Length> (orderToUse)),
  357. fftSetup (vDSP_create_fftsetup (order, 2)),
  358. forwardNormalisation (0.5f),
  359. inverseNormalisation (1.0f / static_cast<float> (1 << order))
  360. {}
  361. ~AppleFFT() override
  362. {
  363. if (fftSetup != nullptr)
  364. {
  365. vDSP_destroy_fftsetup (fftSetup);
  366. fftSetup = nullptr;
  367. }
  368. }
  369. void perform (const Complex<float>* input, Complex<float>* output, bool inverse) const noexcept override
  370. {
  371. auto size = (1 << order);
  372. DSPSplitComplex splitInput (toSplitComplex (const_cast<Complex<float>*> (input)));
  373. DSPSplitComplex splitOutput (toSplitComplex (output));
  374. vDSP_fft_zop (fftSetup, &splitInput, 2, &splitOutput, 2,
  375. order, inverse ? kFFTDirection_Inverse : kFFTDirection_Forward);
  376. float factor = (inverse ? inverseNormalisation : forwardNormalisation * 2.0f);
  377. vDSP_vsmul ((float*) output, 1, &factor, (float*) output, 1, static_cast<size_t> (size << 1));
  378. }
  379. void performRealOnlyForwardTransform (float* inoutData, bool ignoreNegativeFreqs) const noexcept override
  380. {
  381. auto size = (1 << order);
  382. auto* inout = reinterpret_cast<Complex<float>*> (inoutData);
  383. auto splitInOut (toSplitComplex (inout));
  384. inoutData[size] = 0.0f;
  385. vDSP_fft_zrip (fftSetup, &splitInOut, 2, order, kFFTDirection_Forward);
  386. vDSP_vsmul (inoutData, 1, &forwardNormalisation, inoutData, 1, static_cast<size_t> (size << 1));
  387. mirrorResult (inout, ignoreNegativeFreqs);
  388. }
  389. void performRealOnlyInverseTransform (float* inoutData) const noexcept override
  390. {
  391. auto* inout = reinterpret_cast<Complex<float>*> (inoutData);
  392. auto size = (1 << order);
  393. auto splitInOut (toSplitComplex (inout));
  394. // Imaginary part of nyquist and DC frequencies are always zero
  395. // so Apple uses the imaginary part of the DC frequency to store
  396. // the real part of the nyquist frequency
  397. if (size != 1)
  398. inout[0] = Complex<float> (inout[0].real(), inout[size >> 1].real());
  399. vDSP_fft_zrip (fftSetup, &splitInOut, 2, order, kFFTDirection_Inverse);
  400. vDSP_vsmul (inoutData, 1, &inverseNormalisation, inoutData, 1, static_cast<size_t> (size << 1));
  401. vDSP_vclr (inoutData + size, 1, static_cast<size_t> (size));
  402. }
  403. private:
  404. //==============================================================================
  405. void mirrorResult (Complex<float>* out, bool ignoreNegativeFreqs) const noexcept
  406. {
  407. auto size = (1 << order);
  408. auto i = size >> 1;
  409. // Imaginary part of nyquist and DC frequencies are always zero
  410. // so Apple uses the imaginary part of the DC frequency to store
  411. // the real part of the nyquist frequency
  412. out[i++] = { out[0].imag(), 0.0 };
  413. out[0] = { out[0].real(), 0.0 };
  414. if (! ignoreNegativeFreqs)
  415. for (; i < size; ++i)
  416. out[i] = std::conj (out[size - i]);
  417. }
  418. static DSPSplitComplex toSplitComplex (Complex<float>* data) noexcept
  419. {
  420. // this assumes that Complex interleaves real and imaginary parts
  421. // and is tightly packed.
  422. return { reinterpret_cast<float*> (data),
  423. reinterpret_cast<float*> (data) + 1};
  424. }
  425. //==============================================================================
  426. vDSP_Length order;
  427. FFTSetup fftSetup;
  428. float forwardNormalisation, inverseNormalisation;
  429. };
  430. FFT::EngineImpl<AppleFFT> appleFFT;
  431. #endif
  432. //==============================================================================
  433. //==============================================================================
  434. #if JUCE_DSP_USE_SHARED_FFTW || JUCE_DSP_USE_STATIC_FFTW
  435. #if JUCE_DSP_USE_STATIC_FFTW
  436. extern "C"
  437. {
  438. void* fftwf_plan_dft_1d (int, void*, void*, int, int);
  439. void* fftwf_plan_dft_r2c_1d (int, void*, void*, int);
  440. void* fftwf_plan_dft_c2r_1d (int, void*, void*, int);
  441. void fftwf_destroy_plan (void*);
  442. void fftwf_execute_dft (void*, void*, void*);
  443. void fftwf_execute_dft_r2c (void*, void*, void*);
  444. void fftwf_execute_dft_c2r (void*, void*, void*);
  445. }
  446. #endif
  447. struct FFTWImpl : public FFT::Instance
  448. {
  449. #if JUCE_DSP_USE_STATIC_FFTW
  450. // if the JUCE developer has gone through the hassle of statically
  451. // linking in fftw, they probably want to use it
  452. static constexpr int priority = 10;
  453. #else
  454. static constexpr int priority = 3;
  455. #endif
  456. struct FFTWPlan;
  457. using FFTWPlanRef = FFTWPlan*;
  458. enum
  459. {
  460. measure = 0,
  461. unaligned = (1 << 1),
  462. estimate = (1 << 6)
  463. };
  464. struct Symbols
  465. {
  466. FFTWPlanRef (*plan_dft_fftw) (unsigned, Complex<float>*, Complex<float>*, int, unsigned);
  467. FFTWPlanRef (*plan_r2c_fftw) (unsigned, float*, Complex<float>*, unsigned);
  468. FFTWPlanRef (*plan_c2r_fftw) (unsigned, Complex<float>*, float*, unsigned);
  469. void (*destroy_fftw) (FFTWPlanRef);
  470. void (*execute_dft_fftw) (FFTWPlanRef, const Complex<float>*, Complex<float>*);
  471. void (*execute_r2c_fftw) (FFTWPlanRef, float*, Complex<float>*);
  472. void (*execute_c2r_fftw) (FFTWPlanRef, Complex<float>*, float*);
  473. #if JUCE_DSP_USE_STATIC_FFTW
  474. template <typename FuncPtr, typename ActualSymbolType>
  475. static bool symbol (FuncPtr& dst, ActualSymbolType sym)
  476. {
  477. dst = reinterpret_cast<FuncPtr> (sym);
  478. return true;
  479. }
  480. #else
  481. template <typename FuncPtr>
  482. static bool symbol (DynamicLibrary& lib, FuncPtr& dst, const char* name)
  483. {
  484. dst = reinterpret_cast<FuncPtr> (lib.getFunction (name));
  485. return (dst != nullptr);
  486. }
  487. #endif
  488. };
  489. static FFTWImpl* create (int order)
  490. {
  491. DynamicLibrary lib;
  492. #if ! JUCE_DSP_USE_STATIC_FFTW
  493. #if JUCE_MAC
  494. auto libName = "libfftw3f.dylib";
  495. #elif JUCE_WINDOWS
  496. auto libName = "libfftw3f.dll";
  497. #else
  498. auto libName = "libfftw3f.so";
  499. #endif
  500. if (lib.open (libName))
  501. #endif
  502. {
  503. Symbols symbols;
  504. #if JUCE_DSP_USE_STATIC_FFTW
  505. if (! Symbols::symbol (symbols.plan_dft_fftw, fftwf_plan_dft_1d)) return nullptr;
  506. if (! Symbols::symbol (symbols.plan_r2c_fftw, fftwf_plan_dft_r2c_1d)) return nullptr;
  507. if (! Symbols::symbol (symbols.plan_c2r_fftw, fftwf_plan_dft_c2r_1d)) return nullptr;
  508. if (! Symbols::symbol (symbols.destroy_fftw, fftwf_destroy_plan)) return nullptr;
  509. if (! Symbols::symbol (symbols.execute_dft_fftw, fftwf_execute_dft)) return nullptr;
  510. if (! Symbols::symbol (symbols.execute_r2c_fftw, fftwf_execute_dft_r2c)) return nullptr;
  511. if (! Symbols::symbol (symbols.execute_c2r_fftw, fftwf_execute_dft_c2r)) return nullptr;
  512. #else
  513. if (! Symbols::symbol (lib, symbols.plan_dft_fftw, "fftwf_plan_dft_1d")) return nullptr;
  514. if (! Symbols::symbol (lib, symbols.plan_r2c_fftw, "fftwf_plan_dft_r2c_1d")) return nullptr;
  515. if (! Symbols::symbol (lib, symbols.plan_c2r_fftw, "fftwf_plan_dft_c2r_1d")) return nullptr;
  516. if (! Symbols::symbol (lib, symbols.destroy_fftw, "fftwf_destroy_plan")) return nullptr;
  517. if (! Symbols::symbol (lib, symbols.execute_dft_fftw, "fftwf_execute_dft")) return nullptr;
  518. if (! Symbols::symbol (lib, symbols.execute_r2c_fftw, "fftwf_execute_dft_r2c")) return nullptr;
  519. if (! Symbols::symbol (lib, symbols.execute_c2r_fftw, "fftwf_execute_dft_c2r")) return nullptr;
  520. #endif
  521. return new FFTWImpl (static_cast<size_t> (order), std::move (lib), symbols);
  522. }
  523. return nullptr;
  524. }
  525. FFTWImpl (size_t orderToUse, DynamicLibrary&& libraryToUse, const Symbols& symbols)
  526. : fftwLibrary (std::move (libraryToUse)), fftw (symbols), order (static_cast<size_t> (orderToUse))
  527. {
  528. ScopedLock lock (getFFTWPlanLock());
  529. auto n = (1u << order);
  530. HeapBlock<Complex<float>> in (n), out (n);
  531. c2cForward = fftw.plan_dft_fftw (n, in.getData(), out.getData(), -1, unaligned | estimate);
  532. c2cInverse = fftw.plan_dft_fftw (n, in.getData(), out.getData(), +1, unaligned | estimate);
  533. r2c = fftw.plan_r2c_fftw (n, (float*) in.getData(), in.getData(), unaligned | estimate);
  534. c2r = fftw.plan_c2r_fftw (n, in.getData(), (float*) in.getData(), unaligned | estimate);
  535. }
  536. ~FFTWImpl() override
  537. {
  538. ScopedLock lock (getFFTWPlanLock());
  539. fftw.destroy_fftw (c2cForward);
  540. fftw.destroy_fftw (c2cInverse);
  541. fftw.destroy_fftw (r2c);
  542. fftw.destroy_fftw (c2r);
  543. }
  544. void perform (const Complex<float>* input, Complex<float>* output, bool inverse) const noexcept override
  545. {
  546. if (inverse)
  547. {
  548. auto n = (1u << order);
  549. fftw.execute_dft_fftw (c2cInverse, input, output);
  550. FloatVectorOperations::multiply ((float*) output, 1.0f / static_cast<float> (n), (int) n << 1);
  551. }
  552. else
  553. {
  554. fftw.execute_dft_fftw (c2cForward, input, output);
  555. }
  556. }
  557. void performRealOnlyForwardTransform (float* inputOutputData, bool ignoreNegativeFreqs) const noexcept override
  558. {
  559. if (order == 0)
  560. return;
  561. auto* out = reinterpret_cast<Complex<float>*> (inputOutputData);
  562. fftw.execute_r2c_fftw (r2c, inputOutputData, out);
  563. auto size = (1 << order);
  564. if (! ignoreNegativeFreqs)
  565. for (int i = size >> 1; i < size; ++i)
  566. out[i] = std::conj (out[size - i]);
  567. }
  568. void performRealOnlyInverseTransform (float* inputOutputData) const noexcept override
  569. {
  570. auto n = (1u << order);
  571. fftw.execute_c2r_fftw (c2r, (Complex<float>*) inputOutputData, inputOutputData);
  572. FloatVectorOperations::multiply ((float*) inputOutputData, 1.0f / static_cast<float> (n), (int) n);
  573. }
  574. //==============================================================================
  575. // fftw's plan_* and destroy_* methods are NOT thread safe. So we need to share
  576. // a lock between all instances of FFTWImpl
  577. static CriticalSection& getFFTWPlanLock() noexcept
  578. {
  579. static CriticalSection cs;
  580. return cs;
  581. }
  582. //==============================================================================
  583. DynamicLibrary fftwLibrary;
  584. Symbols fftw;
  585. size_t order;
  586. FFTWPlanRef c2cForward, c2cInverse, r2c, c2r;
  587. };
  588. FFT::EngineImpl<FFTWImpl> fftwEngine;
  589. #endif
  590. //==============================================================================
  591. //==============================================================================
  592. #if JUCE_DSP_USE_INTEL_MKL
  593. struct IntelFFT : public FFT::Instance
  594. {
  595. static constexpr int priority = 8;
  596. static bool succeeded (MKL_LONG status) noexcept { return status == 0; }
  597. static IntelFFT* create (int orderToUse)
  598. {
  599. DFTI_DESCRIPTOR_HANDLE mklc2c, mklc2r;
  600. if (DftiCreateDescriptor (&mklc2c, DFTI_SINGLE, DFTI_COMPLEX, 1, 1 << orderToUse) == 0)
  601. {
  602. if (succeeded (DftiSetValue (mklc2c, DFTI_PLACEMENT, DFTI_NOT_INPLACE))
  603. && succeeded (DftiSetValue (mklc2c, DFTI_BACKWARD_SCALE, 1.0f / static_cast<float> (1 << orderToUse)))
  604. && succeeded (DftiCommitDescriptor (mklc2c)))
  605. {
  606. if (succeeded (DftiCreateDescriptor (&mklc2r, DFTI_SINGLE, DFTI_REAL, 1, 1 << orderToUse)))
  607. {
  608. if (succeeded (DftiSetValue (mklc2r, DFTI_PLACEMENT, DFTI_INPLACE))
  609. && succeeded (DftiSetValue (mklc2r, DFTI_BACKWARD_SCALE, 1.0f / static_cast<float> (1 << orderToUse)))
  610. && succeeded (DftiCommitDescriptor (mklc2r)))
  611. {
  612. return new IntelFFT (static_cast<size_t> (orderToUse), mklc2c, mklc2r);
  613. }
  614. DftiFreeDescriptor (&mklc2r);
  615. }
  616. }
  617. DftiFreeDescriptor (&mklc2c);
  618. }
  619. return {};
  620. }
  621. IntelFFT (size_t orderToUse, DFTI_DESCRIPTOR_HANDLE c2cToUse, DFTI_DESCRIPTOR_HANDLE cr2ToUse)
  622. : order (orderToUse), c2c (c2cToUse), c2r (cr2ToUse)
  623. {}
  624. ~IntelFFT() override
  625. {
  626. DftiFreeDescriptor (&c2c);
  627. DftiFreeDescriptor (&c2r);
  628. }
  629. void perform (const Complex<float>* input, Complex<float>* output, bool inverse) const noexcept override
  630. {
  631. if (inverse)
  632. DftiComputeBackward (c2c, (void*) input, output);
  633. else
  634. DftiComputeForward (c2c, (void*) input, output);
  635. }
  636. void performRealOnlyForwardTransform (float* inputOutputData, bool ignoreNegativeFreqs) const noexcept override
  637. {
  638. if (order == 0)
  639. return;
  640. DftiComputeForward (c2r, inputOutputData);
  641. auto* out = reinterpret_cast<Complex<float>*> (inputOutputData);
  642. auto size = (1 << order);
  643. if (! ignoreNegativeFreqs)
  644. for (int i = size >> 1; i < size; ++i)
  645. out[i] = std::conj (out[size - i]);
  646. }
  647. void performRealOnlyInverseTransform (float* inputOutputData) const noexcept override
  648. {
  649. DftiComputeBackward (c2r, inputOutputData);
  650. }
  651. size_t order;
  652. DFTI_DESCRIPTOR_HANDLE c2c, c2r;
  653. };
  654. FFT::EngineImpl<IntelFFT> fftwEngine;
  655. #endif
  656. //==============================================================================
  657. //==============================================================================
  658. // Visual Studio should define no more than one of these, depending on the
  659. // setting at 'Project' > 'Properties' > 'Configuration Properties' > 'Intel
  660. // Performance Libraries' > 'Use Intel(R) IPP'
  661. #if _IPP_SEQUENTIAL_STATIC || _IPP_SEQUENTIAL_DYNAMIC || _IPP_PARALLEL_STATIC || _IPP_PARALLEL_DYNAMIC
  662. class IntelPerformancePrimitivesFFT : public FFT::Instance
  663. {
  664. public:
  665. static constexpr auto priority = 9;
  666. static IntelPerformancePrimitivesFFT* create (const int order)
  667. {
  668. auto complexContext = Context<ComplexTraits>::create (order);
  669. auto realContext = Context<RealTraits> ::create (order);
  670. if (complexContext.isValid() && realContext.isValid())
  671. return new IntelPerformancePrimitivesFFT (std::move (complexContext), std::move (realContext), order);
  672. return {};
  673. }
  674. void perform (const Complex<float>* input, Complex<float>* output, bool inverse) const noexcept override
  675. {
  676. if (inverse)
  677. {
  678. ippsFFTInv_CToC_32fc (reinterpret_cast<const Ipp32fc*> (input),
  679. reinterpret_cast<Ipp32fc*> (output),
  680. cplx.specPtr,
  681. cplx.workBuf.get());
  682. }
  683. else
  684. {
  685. ippsFFTFwd_CToC_32fc (reinterpret_cast<const Ipp32fc*> (input),
  686. reinterpret_cast<Ipp32fc*> (output),
  687. cplx.specPtr,
  688. cplx.workBuf.get());
  689. }
  690. }
  691. void performRealOnlyForwardTransform (float* inoutData, bool ignoreNegativeFreqs) const noexcept override
  692. {
  693. ippsFFTFwd_RToCCS_32f_I (inoutData, real.specPtr, real.workBuf.get());
  694. if (order == 0)
  695. return;
  696. auto* out = reinterpret_cast<Complex<float>*> (inoutData);
  697. const auto size = (1 << order);
  698. if (! ignoreNegativeFreqs)
  699. for (auto i = size >> 1; i < size; ++i)
  700. out[i] = std::conj (out[size - i]);
  701. }
  702. void performRealOnlyInverseTransform (float* inoutData) const noexcept override
  703. {
  704. ippsFFTInv_CCSToR_32f_I (inoutData, real.specPtr, real.workBuf.get());
  705. }
  706. private:
  707. static constexpr auto flag = IPP_FFT_DIV_INV_BY_N;
  708. static constexpr auto hint = ippAlgHintFast;
  709. struct IppFree
  710. {
  711. template <typename Ptr>
  712. void operator() (Ptr* ptr) const noexcept { ippsFree (ptr); }
  713. };
  714. using IppPtr = std::unique_ptr<Ipp8u[], IppFree>;
  715. template <typename Traits>
  716. struct Context
  717. {
  718. using SpecPtr = typename Traits::Spec*;
  719. static Context create (const int order)
  720. {
  721. int specSize = 0, initSize = 0, workSize = 0;
  722. if (Traits::getSize (order, flag, hint, &specSize, &initSize, &workSize) != ippStsNoErr)
  723. return {};
  724. const auto initBuf = IppPtr (ippsMalloc_8u (initSize));
  725. auto specBuf = IppPtr (ippsMalloc_8u (specSize));
  726. SpecPtr specPtr = nullptr;
  727. if (Traits::init (&specPtr, order, flag, hint, specBuf.get(), initBuf.get()) != ippStsNoErr)
  728. return {};
  729. return { std::move (specBuf), IppPtr (ippsMalloc_8u (workSize)), specPtr };
  730. }
  731. Context() noexcept = default;
  732. Context (IppPtr&& spec, IppPtr&& work, typename Traits::Spec* ptr) noexcept
  733. : specBuf (std::move (spec)), workBuf (std::move (work)), specPtr (ptr)
  734. {}
  735. bool isValid() const noexcept { return specPtr != nullptr; }
  736. IppPtr specBuf, workBuf;
  737. SpecPtr specPtr = nullptr;
  738. };
  739. struct ComplexTraits
  740. {
  741. static constexpr auto getSize = ippsFFTGetSize_C_32fc;
  742. static constexpr auto init = ippsFFTInit_C_32fc;
  743. using Spec = IppsFFTSpec_C_32fc;
  744. };
  745. struct RealTraits
  746. {
  747. static constexpr auto getSize = ippsFFTGetSize_R_32f;
  748. static constexpr auto init = ippsFFTInit_R_32f;
  749. using Spec = IppsFFTSpec_R_32f;
  750. };
  751. IntelPerformancePrimitivesFFT (Context<ComplexTraits>&& complexToUse,
  752. Context<RealTraits>&& realToUse,
  753. const int orderToUse)
  754. : cplx (std::move (complexToUse)),
  755. real (std::move (realToUse)),
  756. order (orderToUse)
  757. {}
  758. Context<ComplexTraits> cplx;
  759. Context<RealTraits> real;
  760. int order = 0;
  761. };
  762. FFT::EngineImpl<IntelPerformancePrimitivesFFT> intelPerformancePrimitivesFFT;
  763. #endif
  764. //==============================================================================
  765. //==============================================================================
  766. FFT::FFT (int order)
  767. : engine (FFT::Engine::createBestEngineForPlatform (order)),
  768. size (1 << order)
  769. {
  770. }
  771. FFT::FFT (FFT&&) noexcept = default;
  772. FFT& FFT::operator= (FFT&&) noexcept = default;
  773. FFT::~FFT() = default;
  774. void FFT::perform (const Complex<float>* input, Complex<float>* output, bool inverse) const noexcept
  775. {
  776. if (engine != nullptr)
  777. engine->perform (input, output, inverse);
  778. }
  779. void FFT::performRealOnlyForwardTransform (float* inputOutputData, bool ignoreNegativeFreqs) const noexcept
  780. {
  781. if (engine != nullptr)
  782. engine->performRealOnlyForwardTransform (inputOutputData, ignoreNegativeFreqs);
  783. }
  784. void FFT::performRealOnlyInverseTransform (float* inputOutputData) const noexcept
  785. {
  786. if (engine != nullptr)
  787. engine->performRealOnlyInverseTransform (inputOutputData);
  788. }
  789. void FFT::performFrequencyOnlyForwardTransform (float* inputOutputData, bool ignoreNegativeFreqs) const noexcept
  790. {
  791. if (size == 1)
  792. return;
  793. performRealOnlyForwardTransform (inputOutputData, ignoreNegativeFreqs);
  794. auto* out = reinterpret_cast<Complex<float>*> (inputOutputData);
  795. const auto limit = ignoreNegativeFreqs ? (size / 2) + 1 : size;
  796. for (int i = 0; i < limit; ++i)
  797. inputOutputData[i] = std::abs (out[i]);
  798. zeromem (inputOutputData + limit, static_cast<size_t> (size * 2 - limit) * sizeof (float));
  799. }
  800. } // namespace dsp
  801. } // namespace juce