You can not select more than 25 topics Topics must start with a letter or number, can include dashes ('-') and can be up to 35 characters long.

fir.hpp 3.9KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135
  1. #pragma once
  2. #include "dsp/common.hpp"
  3. #include <pffft.h>
  4. namespace rack {
  5. namespace dsp {
  6. /** Performs a direct sum convolution */
  7. inline float convolveNaive(const float *in, const float *kernel, int len) {
  8. float y = 0.f;
  9. for (int i = 0; i < len; i++) {
  10. y += in[len - 1 - i] * kernel[i];
  11. }
  12. return y;
  13. }
  14. /** Computes the impulse response of a boxcar lowpass filter */
  15. inline void boxcarLowpassIR(float *out, int len, float cutoff = 0.5f) {
  16. for (int i = 0; i < len; i++) {
  17. float t = i - (len - 1) / 2.f;
  18. out[i] = 2 * cutoff * sinc(2 * cutoff * t);
  19. }
  20. }
  21. struct RealTimeConvolver {
  22. // `kernelBlocks` number of contiguous FFT blocks of size `blockSize`
  23. // indexed by [i * blockSize*2 + j]
  24. float *kernelFfts = NULL;
  25. float *inputFfts = NULL;
  26. float *outputTail = NULL;
  27. float *tmpBlock = NULL;
  28. size_t blockSize;
  29. size_t kernelBlocks = 0;
  30. size_t inputPos = 0;
  31. PFFFT_Setup *pffft;
  32. /** `blockSize` is the size of each FFT block. It should be >=32 and a power of 2. */
  33. RealTimeConvolver(size_t blockSize) {
  34. this->blockSize = blockSize;
  35. pffft = pffft_new_setup(blockSize*2, PFFFT_REAL);
  36. outputTail = new float[blockSize];
  37. memset(outputTail, 0, blockSize * sizeof(float));
  38. tmpBlock = new float[blockSize*2];
  39. memset(tmpBlock, 0, blockSize*2 * sizeof(float));
  40. }
  41. ~RealTimeConvolver() {
  42. setKernel(NULL, 0);
  43. delete[] outputTail;
  44. delete[] tmpBlock;
  45. pffft_destroy_setup(pffft);
  46. }
  47. void setKernel(const float *kernel, size_t length) {
  48. // Clear existing kernel
  49. if (kernelFfts) {
  50. pffft_aligned_free(kernelFfts);
  51. kernelFfts = NULL;
  52. }
  53. if (inputFfts) {
  54. pffft_aligned_free(inputFfts);
  55. inputFfts = NULL;
  56. }
  57. kernelBlocks = 0;
  58. inputPos = 0;
  59. if (kernel && length > 0) {
  60. // Round up to the nearest factor of `blockSize`
  61. kernelBlocks = (length - 1) / blockSize + 1;
  62. // Allocate blocks
  63. kernelFfts = (float*) pffft_aligned_malloc(sizeof(float) * blockSize*2 * kernelBlocks);
  64. inputFfts = (float*) pffft_aligned_malloc(sizeof(float) * blockSize*2 * kernelBlocks);
  65. memset(inputFfts, 0, sizeof(float) * blockSize*2 * kernelBlocks);
  66. for (size_t i = 0; i < kernelBlocks; i++) {
  67. // Pad each block with zeros
  68. memset(tmpBlock, 0, sizeof(float) * blockSize*2);
  69. size_t len = std::min((int) blockSize, (int) (length - i*blockSize));
  70. memcpy(tmpBlock, &kernel[i*blockSize], sizeof(float)*len);
  71. // Compute fft
  72. pffft_transform(pffft, tmpBlock, &kernelFfts[blockSize*2 * i], NULL, PFFFT_FORWARD);
  73. }
  74. }
  75. }
  76. /** Applies reverb to input
  77. input and output must be of size `blockSize`
  78. */
  79. void processBlock(const float *input, float *output) {
  80. if (kernelBlocks == 0) {
  81. memset(output, 0, sizeof(float) * blockSize);
  82. return;
  83. }
  84. // Step input position
  85. inputPos = (inputPos + 1) % kernelBlocks;
  86. // Pad block with zeros
  87. memset(tmpBlock, 0, sizeof(float) * blockSize*2);
  88. memcpy(tmpBlock, input, sizeof(float) * blockSize);
  89. // Compute input fft
  90. pffft_transform(pffft, tmpBlock, &inputFfts[blockSize*2 * inputPos], NULL, PFFFT_FORWARD);
  91. // Create output fft
  92. memset(tmpBlock, 0, sizeof(float) * blockSize*2);
  93. // convolve input fft by kernel fft
  94. // Note: This is the CPU bottleneck loop
  95. for (size_t i = 0; i < kernelBlocks; i++) {
  96. size_t pos = (inputPos - i + kernelBlocks) % kernelBlocks;
  97. pffft_zconvolve_accumulate(pffft, &kernelFfts[blockSize*2 * i], &inputFfts[blockSize*2 * pos], tmpBlock, 1.f);
  98. }
  99. // Compute output
  100. pffft_transform(pffft, tmpBlock, tmpBlock, NULL, PFFFT_BACKWARD);
  101. // Add block tail from last output block
  102. for (size_t i = 0; i < blockSize; i++) {
  103. tmpBlock[i] += outputTail[i];
  104. }
  105. // Copy output block to output
  106. float scale = 1.f / (blockSize*2);
  107. for (size_t i = 0; i < blockSize; i++) {
  108. // Scale based on FFT
  109. output[i] = tmpBlock[i] * scale;
  110. }
  111. // Set tail
  112. for (size_t i = 0; i < blockSize; i++) {
  113. outputTail[i] = tmpBlock[i + blockSize];
  114. }
  115. }
  116. };
  117. } // namespace dsp
  118. } // namespace rack