You can not select more than 25 topics Topics must start with a letter or number, can include dashes ('-') and can be up to 35 characters long.

1435 lines
44KB

  1. /*
  2. * Copyright (c) 2018 The FFmpeg Project
  3. *
  4. * This file is part of FFmpeg.
  5. *
  6. * FFmpeg is free software; you can redistribute it and/or
  7. * modify it under the terms of the GNU Lesser General Public
  8. * License as published by the Free Software Foundation; either
  9. * version 2.1 of the License, or (at your option) any later version.
  10. *
  11. * FFmpeg is distributed in the hope that it will be useful,
  12. * but WITHOUT ANY WARRANTY; without even the implied warranty of
  13. * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
  14. * Lesser General Public License for more details.
  15. *
  16. * You should have received a copy of the GNU Lesser General Public
  17. * License along with FFmpeg; if not, write to the Free Software
  18. * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
  19. */
  20. #include <float.h>
  21. #include "libavutil/audio_fifo.h"
  22. #include "libavutil/avstring.h"
  23. #include "libavutil/channel_layout.h"
  24. #include "libavutil/opt.h"
  25. #include "libavcodec/avfft.h"
  26. #include "avfilter.h"
  27. #include "audio.h"
  28. #include "formats.h"
  29. #include "filters.h"
  30. #define C (M_LN10 * 0.1)
  31. #define RATIO 0.98
  32. #define RRATIO (1.0 - RATIO)
  33. enum OutModes {
  34. IN_MODE,
  35. OUT_MODE,
  36. NOISE_MODE,
  37. NB_MODES
  38. };
  39. enum NoiseType {
  40. WHITE_NOISE,
  41. VINYL_NOISE,
  42. SHELLAC_NOISE,
  43. CUSTOM_NOISE,
  44. NB_NOISE
  45. };
  46. typedef struct DeNoiseChannel {
  47. int band_noise[15];
  48. double noise_band_auto_var[15];
  49. double noise_band_sample[15];
  50. double *amt;
  51. double *band_amt;
  52. double *band_excit;
  53. double *gain;
  54. double *prior;
  55. double *prior_band_excit;
  56. double *clean_data;
  57. double *noisy_data;
  58. double *out_samples;
  59. double *spread_function;
  60. double *abs_var;
  61. double *rel_var;
  62. double *min_abs_var;
  63. FFTComplex *fft_data;
  64. FFTContext *fft, *ifft;
  65. double noise_band_norm[15];
  66. double noise_band_avr[15];
  67. double noise_band_avi[15];
  68. double noise_band_var[15];
  69. double sfm_threshold;
  70. double sfm_alpha;
  71. double sfm_results[3];
  72. int sfm_fail_flags[512];
  73. int sfm_fail_total;
  74. } DeNoiseChannel;
  75. typedef struct AudioFFTDeNoiseContext {
  76. const AVClass *class;
  77. float noise_reduction;
  78. float noise_floor;
  79. int noise_type;
  80. char *band_noise_str;
  81. float residual_floor;
  82. int track_noise;
  83. int track_residual;
  84. int output_mode;
  85. float last_residual_floor;
  86. float last_noise_floor;
  87. float last_noise_reduction;
  88. float last_noise_balance;
  89. int64_t block_count;
  90. int64_t pts;
  91. int channels;
  92. int sample_noise;
  93. int sample_noise_start;
  94. int sample_noise_end;
  95. float sample_rate;
  96. int buffer_length;
  97. int fft_length;
  98. int fft_length2;
  99. int bin_count;
  100. int window_length;
  101. int sample_advance;
  102. int number_of_bands;
  103. int band_centre[15];
  104. int *bin2band;
  105. double *window;
  106. double *band_alpha;
  107. double *band_beta;
  108. DeNoiseChannel *dnch;
  109. double max_gain;
  110. double max_var;
  111. double gain_scale;
  112. double window_weight;
  113. double floor;
  114. double sample_floor;
  115. double auto_floor;
  116. int noise_band_edge[17];
  117. int noise_band_count;
  118. double matrix_a[25];
  119. double vector_b[5];
  120. double matrix_b[75];
  121. double matrix_c[75];
  122. AVAudioFifo *fifo;
  123. } AudioFFTDeNoiseContext;
  124. #define OFFSET(x) offsetof(AudioFFTDeNoiseContext, x)
  125. #define AF AV_OPT_FLAG_AUDIO_PARAM|AV_OPT_FLAG_FILTERING_PARAM
  126. #define AFR AV_OPT_FLAG_AUDIO_PARAM|AV_OPT_FLAG_FILTERING_PARAM|AV_OPT_FLAG_RUNTIME_PARAM
  127. static const AVOption afftdn_options[] = {
  128. { "nr", "set the noise reduction", OFFSET(noise_reduction), AV_OPT_TYPE_FLOAT, {.dbl = 12}, .01, 97, AFR },
  129. { "nf", "set the noise floor", OFFSET(noise_floor), AV_OPT_TYPE_FLOAT, {.dbl =-50}, -80,-20, AFR },
  130. { "nt", "set the noise type", OFFSET(noise_type), AV_OPT_TYPE_INT, {.i64 = WHITE_NOISE}, WHITE_NOISE, NB_NOISE-1, AF, "type" },
  131. { "w", "white noise", 0, AV_OPT_TYPE_CONST, {.i64 = WHITE_NOISE}, 0, 0, AF, "type" },
  132. { "v", "vinyl noise", 0, AV_OPT_TYPE_CONST, {.i64 = VINYL_NOISE}, 0, 0, AF, "type" },
  133. { "s", "shellac noise", 0, AV_OPT_TYPE_CONST, {.i64 = SHELLAC_NOISE}, 0, 0, AF, "type" },
  134. { "c", "custom noise", 0, AV_OPT_TYPE_CONST, {.i64 = CUSTOM_NOISE}, 0, 0, AF, "type" },
  135. { "bn", "set the custom bands noise", OFFSET(band_noise_str), AV_OPT_TYPE_STRING, {.str = 0}, 0, 0, AF },
  136. { "rf", "set the residual floor", OFFSET(residual_floor), AV_OPT_TYPE_FLOAT, {.dbl =-38}, -80,-20, AFR },
  137. { "tn", "track noise", OFFSET(track_noise), AV_OPT_TYPE_BOOL, {.i64 = 0}, 0, 1, AFR },
  138. { "tr", "track residual", OFFSET(track_residual), AV_OPT_TYPE_BOOL, {.i64 = 0}, 0, 1, AFR },
  139. { "om", "set output mode", OFFSET(output_mode), AV_OPT_TYPE_INT, {.i64 = OUT_MODE}, 0, NB_MODES-1, AFR, "mode" },
  140. { "i", "input", 0, AV_OPT_TYPE_CONST, {.i64 = IN_MODE}, 0, 0, AFR, "mode" },
  141. { "o", "output", 0, AV_OPT_TYPE_CONST, {.i64 = OUT_MODE}, 0, 0, AFR, "mode" },
  142. { "n", "noise", 0, AV_OPT_TYPE_CONST, {.i64 = NOISE_MODE}, 0, 0, AFR, "mode" },
  143. { NULL }
  144. };
  145. AVFILTER_DEFINE_CLASS(afftdn);
  146. static int get_band_noise(AudioFFTDeNoiseContext *s,
  147. int band, double a,
  148. double b, double c)
  149. {
  150. double d1, d2, d3;
  151. d1 = a / s->band_centre[band];
  152. d1 = 10.0 * log(1.0 + d1 * d1) / M_LN10;
  153. d2 = b / s->band_centre[band];
  154. d2 = 10.0 * log(1.0 + d2 * d2) / M_LN10;
  155. d3 = s->band_centre[band] / c;
  156. d3 = 10.0 * log(1.0 + d3 * d3) / M_LN10;
  157. return lrint(-d1 + d2 - d3);
  158. }
  159. static void factor(double *array, int size)
  160. {
  161. for (int i = 0; i < size - 1; i++) {
  162. for (int j = i + 1; j < size; j++) {
  163. double d = array[j + i * size] / array[i + i * size];
  164. array[j + i * size] = d;
  165. for (int k = i + 1; k < size; k++) {
  166. array[j + k * size] -= d * array[i + k * size];
  167. }
  168. }
  169. }
  170. }
  171. static void solve(double *matrix, double *vector, int size)
  172. {
  173. for (int i = 0; i < size - 1; i++) {
  174. for (int j = i + 1; j < size; j++) {
  175. double d = matrix[j + i * size];
  176. vector[j] -= d * vector[i];
  177. }
  178. }
  179. vector[size - 1] /= matrix[size * size - 1];
  180. for (int i = size - 2; i >= 0; i--) {
  181. double d = vector[i];
  182. for (int j = i + 1; j < size; j++)
  183. d -= matrix[i + j * size] * vector[j];
  184. vector[i] = d / matrix[i + i * size];
  185. }
  186. }
  187. static int process_get_band_noise(AudioFFTDeNoiseContext *s,
  188. DeNoiseChannel *dnch,
  189. int band)
  190. {
  191. double product, sum, f;
  192. int i = 0;
  193. if (band < 15)
  194. return dnch->band_noise[band];
  195. for (int j = 0; j < 5; j++) {
  196. sum = 0.0;
  197. for (int k = 0; k < 15; k++)
  198. sum += s->matrix_b[i++] * dnch->band_noise[k];
  199. s->vector_b[j] = sum;
  200. }
  201. solve(s->matrix_a, s->vector_b, 5);
  202. f = (0.5 * s->sample_rate) / s->band_centre[14];
  203. f = 15.0 + log(f / 1.5) / log(1.5);
  204. sum = 0.0;
  205. product = 1.0;
  206. for (int j = 0; j < 5; j++) {
  207. sum += product * s->vector_b[j];
  208. product *= f;
  209. }
  210. return lrint(sum);
  211. }
  212. static void calculate_sfm(AudioFFTDeNoiseContext *s,
  213. DeNoiseChannel *dnch,
  214. int start, int end)
  215. {
  216. double d1 = 0.0, d2 = 1.0;
  217. int i = 0, j = 0;
  218. for (int k = start; k < end; k++) {
  219. if (dnch->noisy_data[k] > s->sample_floor) {
  220. j++;
  221. d1 += dnch->noisy_data[k];
  222. d2 *= dnch->noisy_data[k];
  223. if (d2 > 1.0E100) {
  224. d2 *= 1.0E-100;
  225. i++;
  226. } else if (d2 < 1.0E-100) {
  227. d2 *= 1.0E100;
  228. i--;
  229. }
  230. }
  231. }
  232. if (j > 1) {
  233. d1 /= j;
  234. dnch->sfm_results[0] = d1;
  235. d2 = log(d2) + 230.2585 * i;
  236. d2 /= j;
  237. d1 = log(d1);
  238. dnch->sfm_results[1] = d1;
  239. dnch->sfm_results[2] = d1 - d2;
  240. } else {
  241. dnch->sfm_results[0] = s->auto_floor;
  242. dnch->sfm_results[1] = dnch->sfm_threshold;
  243. dnch->sfm_results[2] = dnch->sfm_threshold;
  244. }
  245. }
  246. static double limit_gain(double a, double b)
  247. {
  248. if (a > 1.0)
  249. return (b * a - 1.0) / (b + a - 2.0);
  250. if (a < 1.0)
  251. return (b * a - 2.0 * a + 1.0) / (b - a);
  252. return 1.0;
  253. }
  254. static void process_frame(AudioFFTDeNoiseContext *s, DeNoiseChannel *dnch,
  255. FFTComplex *fft_data,
  256. double *prior, double *prior_band_excit, int track_noise)
  257. {
  258. double d1, d2, d3, gain;
  259. int n, i1;
  260. d1 = fft_data[0].re * fft_data[0].re;
  261. dnch->noisy_data[0] = d1;
  262. d2 = d1 / dnch->abs_var[0];
  263. d3 = RATIO * prior[0] + RRATIO * fmax(d2 - 1.0, 0.0);
  264. gain = d3 / (1.0 + d3);
  265. gain *= (gain + M_PI_4 / fmax(d2, 1.0E-6));
  266. prior[0] = (d2 * gain);
  267. dnch->clean_data[0] = (d1 * gain);
  268. gain = sqrt(gain);
  269. dnch->gain[0] = gain;
  270. n = 0;
  271. for (int i = 1; i < s->fft_length2; i++) {
  272. d1 = fft_data[i].re * fft_data[i].re + fft_data[i].im * fft_data[i].im;
  273. if (d1 > s->sample_floor)
  274. n = i;
  275. dnch->noisy_data[i] = d1;
  276. d2 = d1 / dnch->abs_var[i];
  277. d3 = RATIO * prior[i] + RRATIO * fmax(d2 - 1.0, 0.0);
  278. gain = d3 / (1.0 + d3);
  279. gain *= (gain + M_PI_4 / fmax(d2, 1.0E-6));
  280. prior[i] = d2 * gain;
  281. dnch->clean_data[i] = d1 * gain;
  282. gain = sqrt(gain);
  283. dnch->gain[i] = gain;
  284. }
  285. d1 = fft_data[0].im * fft_data[0].im;
  286. if (d1 > s->sample_floor)
  287. n = s->fft_length2;
  288. dnch->noisy_data[s->fft_length2] = d1;
  289. d2 = d1 / dnch->abs_var[s->fft_length2];
  290. d3 = RATIO * prior[s->fft_length2] + RRATIO * fmax(d2 - 1.0, 0.0);
  291. gain = d3 / (1.0 + d3);
  292. gain *= gain + M_PI_4 / fmax(d2, 1.0E-6);
  293. prior[s->fft_length2] = d2 * gain;
  294. dnch->clean_data[s->fft_length2] = d1 * gain;
  295. gain = sqrt(gain);
  296. dnch->gain[s->fft_length2] = gain;
  297. if (n > s->fft_length2 - 2) {
  298. n = s->bin_count;
  299. i1 = s->noise_band_count;
  300. } else {
  301. i1 = 0;
  302. for (int i = 0; i <= s->noise_band_count; i++) {
  303. if (n > 1.1 * s->noise_band_edge[i]) {
  304. i1 = i;
  305. }
  306. }
  307. }
  308. if (track_noise && (i1 > s->noise_band_count / 2)) {
  309. int j = FFMIN(n, s->noise_band_edge[i1]);
  310. int m = 3, k;
  311. for (k = i1 - 1; k >= 0; k--) {
  312. int i = s->noise_band_edge[k];
  313. calculate_sfm(s, dnch, i, j);
  314. dnch->noise_band_sample[k] = dnch->sfm_results[0];
  315. if (dnch->sfm_results[2] + 0.013 * m * fmax(0.0, dnch->sfm_results[1] - 20.53) >= dnch->sfm_threshold) {
  316. break;
  317. }
  318. j = i;
  319. m++;
  320. }
  321. if (k < i1 - 1) {
  322. double sum = 0.0, min, max;
  323. int i;
  324. for (i = i1 - 1; i > k; i--) {
  325. min = log(dnch->noise_band_sample[i] / dnch->noise_band_auto_var[i]);
  326. sum += min;
  327. }
  328. i = i1 - k - 1;
  329. if (i < 5) {
  330. min = 3.0E-4 * i * i;
  331. } else {
  332. min = 3.0E-4 * (8 * i - 16);
  333. }
  334. if (i < 3) {
  335. max = 2.0E-4 * i * i;
  336. } else {
  337. max = 2.0E-4 * (4 * i - 4);
  338. }
  339. if (s->track_residual) {
  340. if (s->last_noise_floor > s->last_residual_floor + 9) {
  341. min *= 0.5;
  342. max *= 0.75;
  343. } else if (s->last_noise_floor > s->last_residual_floor + 6) {
  344. min *= 0.4;
  345. max *= 1.0;
  346. } else if (s->last_noise_floor > s->last_residual_floor + 4) {
  347. min *= 0.3;
  348. max *= 1.3;
  349. } else if (s->last_noise_floor > s->last_residual_floor + 2) {
  350. min *= 0.2;
  351. max *= 1.6;
  352. } else if (s->last_noise_floor > s->last_residual_floor) {
  353. min *= 0.1;
  354. max *= 2.0;
  355. } else {
  356. min = 0.0;
  357. max *= 2.5;
  358. }
  359. }
  360. sum = av_clipd(sum, -min, max);
  361. sum = exp(sum);
  362. for (int i = 0; i < 15; i++)
  363. dnch->noise_band_auto_var[i] *= sum;
  364. } else if (dnch->sfm_results[2] >= dnch->sfm_threshold) {
  365. dnch->sfm_fail_flags[s->block_count & 0x1FF] = 1;
  366. dnch->sfm_fail_total += 1;
  367. }
  368. }
  369. for (int i = 0; i < s->number_of_bands; i++) {
  370. dnch->band_excit[i] = 0.0;
  371. dnch->band_amt[i] = 0.0;
  372. }
  373. for (int i = 0; i < s->bin_count; i++) {
  374. dnch->band_excit[s->bin2band[i]] += dnch->clean_data[i];
  375. }
  376. for (int i = 0; i < s->number_of_bands; i++) {
  377. dnch->band_excit[i] = fmax(dnch->band_excit[i],
  378. s->band_alpha[i] * dnch->band_excit[i] +
  379. s->band_beta[i] * prior_band_excit[i]);
  380. prior_band_excit[i] = dnch->band_excit[i];
  381. }
  382. for (int j = 0, i = 0; j < s->number_of_bands; j++) {
  383. for (int k = 0; k < s->number_of_bands; k++) {
  384. dnch->band_amt[j] += dnch->spread_function[i++] * dnch->band_excit[k];
  385. }
  386. }
  387. for (int i = 0; i < s->bin_count; i++)
  388. dnch->amt[i] = dnch->band_amt[s->bin2band[i]];
  389. if (dnch->amt[0] > dnch->abs_var[0]) {
  390. dnch->gain[0] = 1.0;
  391. } else if (dnch->amt[0] > dnch->min_abs_var[0]) {
  392. double limit = sqrt(dnch->abs_var[0] / dnch->amt[0]);
  393. dnch->gain[0] = limit_gain(dnch->gain[0], limit);
  394. } else {
  395. dnch->gain[0] = limit_gain(dnch->gain[0], s->max_gain);
  396. }
  397. if (dnch->amt[s->fft_length2] > dnch->abs_var[s->fft_length2]) {
  398. dnch->gain[s->fft_length2] = 1.0;
  399. } else if (dnch->amt[s->fft_length2] > dnch->min_abs_var[s->fft_length2]) {
  400. double limit = sqrt(dnch->abs_var[s->fft_length2] / dnch->amt[s->fft_length2]);
  401. dnch->gain[s->fft_length2] = limit_gain(dnch->gain[s->fft_length2], limit);
  402. } else {
  403. dnch->gain[s->fft_length2] = limit_gain(dnch->gain[s->fft_length2], s->max_gain);
  404. }
  405. for (int i = 1; i < s->fft_length2; i++) {
  406. if (dnch->amt[i] > dnch->abs_var[i]) {
  407. dnch->gain[i] = 1.0;
  408. } else if (dnch->amt[i] > dnch->min_abs_var[i]) {
  409. double limit = sqrt(dnch->abs_var[i] / dnch->amt[i]);
  410. dnch->gain[i] = limit_gain(dnch->gain[i], limit);
  411. } else {
  412. dnch->gain[i] = limit_gain(dnch->gain[i], s->max_gain);
  413. }
  414. }
  415. gain = dnch->gain[0];
  416. dnch->clean_data[0] = (gain * gain * dnch->noisy_data[0]);
  417. fft_data[0].re *= gain;
  418. gain = dnch->gain[s->fft_length2];
  419. dnch->clean_data[s->fft_length2] = (gain * gain * dnch->noisy_data[s->fft_length2]);
  420. fft_data[0].im *= gain;
  421. for (int i = 1; i < s->fft_length2; i++) {
  422. gain = dnch->gain[i];
  423. dnch->clean_data[i] = (gain * gain * dnch->noisy_data[i]);
  424. fft_data[i].re *= gain;
  425. fft_data[i].im *= gain;
  426. }
  427. }
  428. static double freq2bark(double x)
  429. {
  430. double d = x / 7500.0;
  431. return 13.0 * atan(7.6E-4 * x) + 3.5 * atan(d * d);
  432. }
  433. static int get_band_centre(AudioFFTDeNoiseContext *s, int band)
  434. {
  435. if (band == -1)
  436. return lrint(s->band_centre[0] / 1.5);
  437. return s->band_centre[band];
  438. }
  439. static int get_band_edge(AudioFFTDeNoiseContext *s, int band)
  440. {
  441. int i;
  442. if (band == 15) {
  443. i = lrint(s->band_centre[14] * 1.224745);
  444. } else {
  445. i = lrint(s->band_centre[band] / 1.224745);
  446. }
  447. return FFMIN(i, s->sample_rate / 2);
  448. }
  449. static void set_band_parameters(AudioFFTDeNoiseContext *s,
  450. DeNoiseChannel *dnch)
  451. {
  452. double band_noise, d2, d3, d4, d5;
  453. int i = 0, j = 0, k = 0;
  454. d5 = 0.0;
  455. band_noise = process_get_band_noise(s, dnch, 0);
  456. for (int m = j; m <= s->fft_length2; m++) {
  457. if (m == j) {
  458. i = j;
  459. d5 = band_noise;
  460. if (k == 15) {
  461. j = s->bin_count;
  462. } else {
  463. j = s->fft_length * get_band_centre(s, k) / s->sample_rate;
  464. }
  465. d2 = j - i;
  466. band_noise = process_get_band_noise(s, dnch, k);
  467. k++;
  468. }
  469. d3 = (j - m) / d2;
  470. d4 = (m - i) / d2;
  471. dnch->rel_var[m] = exp((d5 * d3 + band_noise * d4) * C);
  472. }
  473. dnch->rel_var[s->fft_length2] = exp(band_noise * C);
  474. for (i = 0; i < 15; i++)
  475. dnch->noise_band_auto_var[i] = s->max_var * exp((process_get_band_noise(s, dnch, i) - 2.0) * C);
  476. for (i = 0; i <= s->fft_length2; i++) {
  477. dnch->abs_var[i] = fmax(s->max_var * dnch->rel_var[i], 1.0);
  478. dnch->min_abs_var[i] = s->gain_scale * dnch->abs_var[i];
  479. }
  480. }
  481. static void read_custom_noise(AudioFFTDeNoiseContext *s, int ch)
  482. {
  483. DeNoiseChannel *dnch = &s->dnch[ch];
  484. char *p, *arg, *saveptr = NULL;
  485. int i, ret, band_noise[15] = { 0 };
  486. if (!s->band_noise_str)
  487. return;
  488. p = av_strdup(s->band_noise_str);
  489. if (!p)
  490. return;
  491. for (i = 0; i < 15; i++) {
  492. if (!(arg = av_strtok(p, "| ", &saveptr)))
  493. break;
  494. p = NULL;
  495. ret = av_sscanf(arg, "%d", &band_noise[i]);
  496. if (ret != 1) {
  497. av_log(s, AV_LOG_ERROR, "Custom band noise must be integer.\n");
  498. break;
  499. }
  500. band_noise[i] = av_clip(band_noise[i], -24, 24);
  501. }
  502. av_free(p);
  503. memcpy(dnch->band_noise, band_noise, sizeof(band_noise));
  504. }
  505. static void set_parameters(AudioFFTDeNoiseContext *s)
  506. {
  507. if (s->last_noise_floor != s->noise_floor)
  508. s->last_noise_floor = s->noise_floor;
  509. if (s->track_residual)
  510. s->last_noise_floor = fmaxf(s->last_noise_floor, s->residual_floor);
  511. s->max_var = s->floor * exp((100.0 + s->last_noise_floor) * C);
  512. if (s->track_residual) {
  513. s->last_residual_floor = s->residual_floor;
  514. s->last_noise_reduction = fmax(s->last_noise_floor - s->last_residual_floor, 0);
  515. s->max_gain = exp(s->last_noise_reduction * (0.5 * C));
  516. } else if (s->noise_reduction != s->last_noise_reduction) {
  517. s->last_noise_reduction = s->noise_reduction;
  518. s->last_residual_floor = av_clipf(s->last_noise_floor - s->last_noise_reduction, -80, -20);
  519. s->max_gain = exp(s->last_noise_reduction * (0.5 * C));
  520. }
  521. s->gain_scale = 1.0 / (s->max_gain * s->max_gain);
  522. for (int ch = 0; ch < s->channels; ch++) {
  523. DeNoiseChannel *dnch = &s->dnch[ch];
  524. set_band_parameters(s, dnch);
  525. }
  526. }
  527. static int config_input(AVFilterLink *inlink)
  528. {
  529. AVFilterContext *ctx = inlink->dst;
  530. AudioFFTDeNoiseContext *s = ctx->priv;
  531. double wscale, sar, sum, sdiv;
  532. int i, j, k, m, n;
  533. s->dnch = av_calloc(inlink->channels, sizeof(*s->dnch));
  534. if (!s->dnch)
  535. return AVERROR(ENOMEM);
  536. s->pts = AV_NOPTS_VALUE;
  537. s->channels = inlink->channels;
  538. s->sample_rate = inlink->sample_rate;
  539. s->sample_advance = s->sample_rate / 80;
  540. s->window_length = 3 * s->sample_advance;
  541. s->fft_length2 = 1 << (32 - ff_clz(s->window_length));
  542. s->fft_length = s->fft_length2 * 2;
  543. s->buffer_length = s->fft_length * 2;
  544. s->bin_count = s->fft_length2 + 1;
  545. s->band_centre[0] = 80;
  546. for (i = 1; i < 15; i++) {
  547. s->band_centre[i] = lrint(1.5 * s->band_centre[i - 1] + 5.0);
  548. if (s->band_centre[i] < 1000) {
  549. s->band_centre[i] = 10 * (s->band_centre[i] / 10);
  550. } else if (s->band_centre[i] < 5000) {
  551. s->band_centre[i] = 50 * ((s->band_centre[i] + 20) / 50);
  552. } else if (s->band_centre[i] < 15000) {
  553. s->band_centre[i] = 100 * ((s->band_centre[i] + 45) / 100);
  554. } else {
  555. s->band_centre[i] = 1000 * ((s->band_centre[i] + 495) / 1000);
  556. }
  557. }
  558. for (j = 0; j < 5; j++) {
  559. for (k = 0; k < 5; k++) {
  560. s->matrix_a[j + k * 5] = 0.0;
  561. for (m = 0; m < 15; m++)
  562. s->matrix_a[j + k * 5] += pow(m, j + k);
  563. }
  564. }
  565. factor(s->matrix_a, 5);
  566. i = 0;
  567. for (j = 0; j < 5; j++)
  568. for (k = 0; k < 15; k++)
  569. s->matrix_b[i++] = pow(k, j);
  570. i = 0;
  571. for (j = 0; j < 15; j++)
  572. for (k = 0; k < 5; k++)
  573. s->matrix_c[i++] = pow(j, k);
  574. s->window = av_calloc(s->window_length, sizeof(*s->window));
  575. s->bin2band = av_calloc(s->bin_count, sizeof(*s->bin2band));
  576. if (!s->window || !s->bin2band)
  577. return AVERROR(ENOMEM);
  578. sdiv = s->sample_rate / 17640.0;
  579. for (i = 0; i <= s->fft_length2; i++)
  580. s->bin2band[i] = lrint(sdiv * freq2bark((0.5 * i * s->sample_rate) / s->fft_length2));
  581. s->number_of_bands = s->bin2band[s->fft_length2] + 1;
  582. s->band_alpha = av_calloc(s->number_of_bands, sizeof(*s->band_alpha));
  583. s->band_beta = av_calloc(s->number_of_bands, sizeof(*s->band_beta));
  584. if (!s->band_alpha || !s->band_beta)
  585. return AVERROR(ENOMEM);
  586. for (int ch = 0; ch < inlink->channels; ch++) {
  587. DeNoiseChannel *dnch = &s->dnch[ch];
  588. switch (s->noise_type) {
  589. case WHITE_NOISE:
  590. for (i = 0; i < 15; i++)
  591. dnch->band_noise[i] = 0;
  592. break;
  593. case VINYL_NOISE:
  594. for (i = 0; i < 15; i++)
  595. dnch->band_noise[i] = get_band_noise(s, i, 50.0, 500.5, 2125.0) + FFMAX(i - 7, 0);
  596. break;
  597. case SHELLAC_NOISE:
  598. for (i = 0; i < 15; i++)
  599. dnch->band_noise[i] = get_band_noise(s, i, 1.0, 500.0, 1.0E10) + FFMAX(i - 12, -5);
  600. break;
  601. case CUSTOM_NOISE:
  602. read_custom_noise(s, ch);
  603. break;
  604. default:
  605. return AVERROR_BUG;
  606. }
  607. dnch->sfm_threshold = 0.8;
  608. dnch->sfm_alpha = 0.05;
  609. for (i = 0; i < 512; i++)
  610. dnch->sfm_fail_flags[i] = 0;
  611. dnch->sfm_fail_total = 0;
  612. j = FFMAX((int)(10.0 * (1.3 - dnch->sfm_threshold)), 1);
  613. for (i = 0; i < 512; i += j) {
  614. dnch->sfm_fail_flags[i] = 1;
  615. dnch->sfm_fail_total += 1;
  616. }
  617. dnch->amt = av_calloc(s->bin_count, sizeof(*dnch->amt));
  618. dnch->band_amt = av_calloc(s->number_of_bands, sizeof(*dnch->band_amt));
  619. dnch->band_excit = av_calloc(s->number_of_bands, sizeof(*dnch->band_excit));
  620. dnch->gain = av_calloc(s->bin_count, sizeof(*dnch->gain));
  621. dnch->prior = av_calloc(s->bin_count, sizeof(*dnch->prior));
  622. dnch->prior_band_excit = av_calloc(s->number_of_bands, sizeof(*dnch->prior_band_excit));
  623. dnch->clean_data = av_calloc(s->bin_count, sizeof(*dnch->clean_data));
  624. dnch->noisy_data = av_calloc(s->bin_count, sizeof(*dnch->noisy_data));
  625. dnch->out_samples = av_calloc(s->buffer_length, sizeof(*dnch->out_samples));
  626. dnch->abs_var = av_calloc(s->bin_count, sizeof(*dnch->abs_var));
  627. dnch->rel_var = av_calloc(s->bin_count, sizeof(*dnch->rel_var));
  628. dnch->min_abs_var = av_calloc(s->bin_count, sizeof(*dnch->min_abs_var));
  629. dnch->fft_data = av_calloc(s->fft_length2 + 1, sizeof(*dnch->fft_data));
  630. dnch->fft = av_fft_init(av_log2(s->fft_length2), 0);
  631. dnch->ifft = av_fft_init(av_log2(s->fft_length2), 1);
  632. dnch->spread_function = av_calloc(s->number_of_bands * s->number_of_bands,
  633. sizeof(*dnch->spread_function));
  634. if (!dnch->amt ||
  635. !dnch->band_amt ||
  636. !dnch->band_excit ||
  637. !dnch->gain ||
  638. !dnch->prior ||
  639. !dnch->prior_band_excit ||
  640. !dnch->clean_data ||
  641. !dnch->noisy_data ||
  642. !dnch->out_samples ||
  643. !dnch->fft_data ||
  644. !dnch->abs_var ||
  645. !dnch->rel_var ||
  646. !dnch->min_abs_var ||
  647. !dnch->spread_function ||
  648. !dnch->fft ||
  649. !dnch->ifft)
  650. return AVERROR(ENOMEM);
  651. }
  652. for (int ch = 0; ch < inlink->channels; ch++) {
  653. DeNoiseChannel *dnch = &s->dnch[ch];
  654. double *prior_band_excit = dnch->prior_band_excit;
  655. double *prior = dnch->prior;
  656. double min, max;
  657. double p1, p2;
  658. p1 = pow(0.1, 2.5 / sdiv);
  659. p2 = pow(0.1, 1.0 / sdiv);
  660. j = 0;
  661. for (m = 0; m < s->number_of_bands; m++) {
  662. for (n = 0; n < s->number_of_bands; n++) {
  663. if (n < m) {
  664. dnch->spread_function[j++] = pow(p2, m - n);
  665. } else if (n > m) {
  666. dnch->spread_function[j++] = pow(p1, n - m);
  667. } else {
  668. dnch->spread_function[j++] = 1.0;
  669. }
  670. }
  671. }
  672. for (m = 0; m < s->number_of_bands; m++) {
  673. dnch->band_excit[m] = 0.0;
  674. prior_band_excit[m] = 0.0;
  675. }
  676. for (m = 0; m <= s->fft_length2; m++)
  677. dnch->band_excit[s->bin2band[m]] += 1.0;
  678. j = 0;
  679. for (m = 0; m < s->number_of_bands; m++) {
  680. for (n = 0; n < s->number_of_bands; n++)
  681. prior_band_excit[m] += dnch->spread_function[j++] * dnch->band_excit[n];
  682. }
  683. min = pow(0.1, 2.5);
  684. max = pow(0.1, 1.0);
  685. for (int i = 0; i < s->number_of_bands; i++) {
  686. if (i < lrint(12.0 * sdiv)) {
  687. dnch->band_excit[i] = pow(0.1, 1.45 + 0.1 * i / sdiv);
  688. } else {
  689. dnch->band_excit[i] = pow(0.1, 2.5 - 0.2 * (i / sdiv - 14.0));
  690. }
  691. dnch->band_excit[i] = av_clipd(dnch->band_excit[i], min, max);
  692. }
  693. for (int i = 0; i <= s->fft_length2; i++)
  694. prior[i] = RRATIO;
  695. for (int i = 0; i < s->buffer_length; i++)
  696. dnch->out_samples[i] = 0;
  697. j = 0;
  698. for (int i = 0; i < s->number_of_bands; i++)
  699. for (int k = 0; k < s->number_of_bands; k++)
  700. dnch->spread_function[j++] *= dnch->band_excit[i] / prior_band_excit[i];
  701. }
  702. j = 0;
  703. sar = s->sample_advance / s->sample_rate;
  704. for (int i = 0; i <= s->fft_length2; i++) {
  705. if ((i == s->fft_length2) || (s->bin2band[i] > j)) {
  706. double d6 = (i - 1) * s->sample_rate / s->fft_length;
  707. double d7 = fmin(0.008 + 2.2 / d6, 0.03);
  708. s->band_alpha[j] = exp(-sar / d7);
  709. s->band_beta[j] = 1.0 - s->band_alpha[j];
  710. j = s->bin2band[i];
  711. }
  712. }
  713. wscale = sqrt(16.0 / (9.0 * s->fft_length));
  714. sum = 0.0;
  715. for (int i = 0; i < s->window_length; i++) {
  716. double d10 = sin(i * M_PI / s->window_length);
  717. d10 *= wscale * d10;
  718. s->window[i] = d10;
  719. sum += d10 * d10;
  720. }
  721. s->window_weight = 0.5 * sum;
  722. s->floor = (1LL << 48) * exp(-23.025558369790467) * s->window_weight;
  723. s->sample_floor = s->floor * exp(4.144600506562284);
  724. s->auto_floor = s->floor * exp(6.907667510937141);
  725. set_parameters(s);
  726. s->noise_band_edge[0] = FFMIN(s->fft_length2, s->fft_length * get_band_edge(s, 0) / s->sample_rate);
  727. i = 0;
  728. for (int j = 1; j < 16; j++) {
  729. s->noise_band_edge[j] = FFMIN(s->fft_length2, s->fft_length * get_band_edge(s, j) / s->sample_rate);
  730. if (s->noise_band_edge[j] > lrint(1.1 * s->noise_band_edge[j - 1]))
  731. i++;
  732. s->noise_band_edge[16] = i;
  733. }
  734. s->noise_band_count = s->noise_band_edge[16];
  735. s->fifo = av_audio_fifo_alloc(inlink->format, inlink->channels, s->fft_length);
  736. if (!s->fifo)
  737. return AVERROR(ENOMEM);
  738. return 0;
  739. }
  740. static void preprocess(FFTComplex *in, int len)
  741. {
  742. double d1, d2, d3, d4, d5, d6, d7, d8, d9, d10;
  743. int n, i, k;
  744. d5 = 2.0 * M_PI / len;
  745. d8 = sin(0.5 * d5);
  746. d8 = -2.0 * d8 * d8;
  747. d7 = sin(d5);
  748. d9 = 1.0 + d8;
  749. d6 = d7;
  750. n = len / 2;
  751. for (i = 1; i < len / 4; i++) {
  752. k = n - i;
  753. d2 = 0.5 * (in[i].re + in[k].re);
  754. d1 = 0.5 * (in[i].im - in[k].im);
  755. d4 = 0.5 * (in[i].im + in[k].im);
  756. d3 = 0.5 * (in[k].re - in[i].re);
  757. in[i].re = d2 + d9 * d4 + d6 * d3;
  758. in[i].im = d1 + d9 * d3 - d6 * d4;
  759. in[k].re = d2 - d9 * d4 - d6 * d3;
  760. in[k].im = -d1 + d9 * d3 - d6 * d4;
  761. d10 = d9;
  762. d9 += d9 * d8 - d6 * d7;
  763. d6 += d6 * d8 + d10 * d7;
  764. }
  765. d2 = in[0].re;
  766. in[0].re = d2 + in[0].im;
  767. in[0].im = d2 - in[0].im;
  768. }
  769. static void postprocess(FFTComplex *in, int len)
  770. {
  771. double d1, d2, d3, d4, d5, d6, d7, d8, d9, d10;
  772. int n, i, k;
  773. d5 = 2.0 * M_PI / len;
  774. d8 = sin(0.5 * d5);
  775. d8 = -2.0 * d8 * d8;
  776. d7 = sin(d5);
  777. d9 = 1.0 + d8;
  778. d6 = d7;
  779. n = len / 2;
  780. for (i = 1; i < len / 4; i++) {
  781. k = n - i;
  782. d2 = 0.5 * (in[i].re + in[k].re);
  783. d1 = 0.5 * (in[i].im - in[k].im);
  784. d4 = 0.5 * (in[i].re - in[k].re);
  785. d3 = 0.5 * (in[i].im + in[k].im);
  786. in[i].re = d2 - d9 * d3 - d6 * d4;
  787. in[i].im = d1 + d9 * d4 - d6 * d3;
  788. in[k].re = d2 + d9 * d3 + d6 * d4;
  789. in[k].im = -d1 + d9 * d4 - d6 * d3;
  790. d10 = d9;
  791. d9 += d9 * d8 - d6 * d7;
  792. d6 += d6 * d8 + d10 * d7;
  793. }
  794. d2 = in[0].re;
  795. in[0].re = 0.5 * (d2 + in[0].im);
  796. in[0].im = 0.5 * (d2 - in[0].im);
  797. }
  798. static void init_sample_noise(DeNoiseChannel *dnch)
  799. {
  800. for (int i = 0; i < 15; i++) {
  801. dnch->noise_band_norm[i] = 0.0;
  802. dnch->noise_band_avr[i] = 0.0;
  803. dnch->noise_band_avi[i] = 0.0;
  804. dnch->noise_band_var[i] = 0.0;
  805. }
  806. }
  807. static void sample_noise_block(AudioFFTDeNoiseContext *s,
  808. DeNoiseChannel *dnch,
  809. AVFrame *in, int ch)
  810. {
  811. float *src = (float *)in->extended_data[ch];
  812. double mag2, var = 0.0, avr = 0.0, avi = 0.0;
  813. int edge, j, k, n, edgemax;
  814. for (int i = 0; i < s->window_length; i++) {
  815. dnch->fft_data[i].re = s->window[i] * src[i] * (1LL << 24);
  816. dnch->fft_data[i].im = 0.0;
  817. }
  818. for (int i = s->window_length; i < s->fft_length2; i++) {
  819. dnch->fft_data[i].re = 0.0;
  820. dnch->fft_data[i].im = 0.0;
  821. }
  822. av_fft_permute(dnch->fft, dnch->fft_data);
  823. av_fft_calc(dnch->fft, dnch->fft_data);
  824. preprocess(dnch->fft_data, s->fft_length);
  825. edge = s->noise_band_edge[0];
  826. j = edge;
  827. k = 0;
  828. n = j;
  829. edgemax = fmin(s->fft_length2, s->noise_band_edge[15]);
  830. dnch->fft_data[s->fft_length2].re = dnch->fft_data[0].im;
  831. dnch->fft_data[0].im = 0.0;
  832. dnch->fft_data[s->fft_length2].im = 0.0;
  833. for (int i = j; i <= edgemax; i++) {
  834. if ((i == j) && (i < edgemax)) {
  835. if (j > edge) {
  836. dnch->noise_band_norm[k - 1] += j - edge;
  837. dnch->noise_band_avr[k - 1] += avr;
  838. dnch->noise_band_avi[k - 1] += avi;
  839. dnch->noise_band_var[k - 1] += var;
  840. }
  841. k++;
  842. edge = j;
  843. j = s->noise_band_edge[k];
  844. if (k == 15) {
  845. j++;
  846. }
  847. var = 0.0;
  848. avr = 0.0;
  849. avi = 0.0;
  850. }
  851. avr += dnch->fft_data[n].re;
  852. avi += dnch->fft_data[n].im;
  853. mag2 = dnch->fft_data[n].re * dnch->fft_data[n].re +
  854. dnch->fft_data[n].im * dnch->fft_data[n].im;
  855. mag2 = fmax(mag2, s->sample_floor);
  856. dnch->noisy_data[i] = mag2;
  857. var += mag2;
  858. n++;
  859. }
  860. dnch->noise_band_norm[k - 1] += j - edge;
  861. dnch->noise_band_avr[k - 1] += avr;
  862. dnch->noise_band_avi[k - 1] += avi;
  863. dnch->noise_band_var[k - 1] += var;
  864. }
  865. static void finish_sample_noise(AudioFFTDeNoiseContext *s,
  866. DeNoiseChannel *dnch,
  867. double *sample_noise)
  868. {
  869. for (int i = 0; i < s->noise_band_count; i++) {
  870. dnch->noise_band_avr[i] /= dnch->noise_band_norm[i];
  871. dnch->noise_band_avi[i] /= dnch->noise_band_norm[i];
  872. dnch->noise_band_var[i] /= dnch->noise_band_norm[i];
  873. dnch->noise_band_var[i] -= dnch->noise_band_avr[i] * dnch->noise_band_avr[i] +
  874. dnch->noise_band_avi[i] * dnch->noise_band_avi[i];
  875. dnch->noise_band_auto_var[i] = dnch->noise_band_var[i];
  876. sample_noise[i] = (1.0 / C) * log(dnch->noise_band_var[i] / s->floor) - 100.0;
  877. }
  878. if (s->noise_band_count < 15) {
  879. for (int i = s->noise_band_count; i < 15; i++)
  880. sample_noise[i] = sample_noise[i - 1];
  881. }
  882. }
  883. static void set_noise_profile(AudioFFTDeNoiseContext *s,
  884. DeNoiseChannel *dnch,
  885. double *sample_noise,
  886. int new_profile)
  887. {
  888. int new_band_noise[15];
  889. double temp[15];
  890. double sum = 0.0, d1;
  891. float new_noise_floor;
  892. int i, n;
  893. for (int m = 0; m < 15; m++)
  894. temp[m] = sample_noise[m];
  895. if (new_profile) {
  896. i = 0;
  897. for (int m = 0; m < 5; m++) {
  898. sum = 0.0;
  899. for (n = 0; n < 15; n++)
  900. sum += s->matrix_b[i++] * temp[n];
  901. s->vector_b[m] = sum;
  902. }
  903. solve(s->matrix_a, s->vector_b, 5);
  904. i = 0;
  905. for (int m = 0; m < 15; m++) {
  906. sum = 0.0;
  907. for (n = 0; n < 5; n++)
  908. sum += s->matrix_c[i++] * s->vector_b[n];
  909. temp[m] = sum;
  910. }
  911. }
  912. sum = 0.0;
  913. for (int m = 0; m < 15; m++)
  914. sum += temp[m];
  915. d1 = (int)(sum / 15.0 - 0.5);
  916. if (!new_profile)
  917. i = lrint(temp[7] - d1);
  918. for (d1 -= dnch->band_noise[7] - i; d1 > -20.0; d1 -= 1.0)
  919. ;
  920. for (int m = 0; m < 15; m++)
  921. temp[m] -= d1;
  922. new_noise_floor = d1 + 2.5;
  923. if (new_profile) {
  924. av_log(s, AV_LOG_INFO, "bn=");
  925. for (int m = 0; m < 15; m++) {
  926. new_band_noise[m] = lrint(temp[m]);
  927. new_band_noise[m] = av_clip(new_band_noise[m], -24, 24);
  928. av_log(s, AV_LOG_INFO, "%d ", new_band_noise[m]);
  929. }
  930. av_log(s, AV_LOG_INFO, "\n");
  931. memcpy(dnch->band_noise, new_band_noise, sizeof(new_band_noise));
  932. }
  933. if (s->track_noise)
  934. s->noise_floor = new_noise_floor;
  935. }
  936. typedef struct ThreadData {
  937. AVFrame *in;
  938. } ThreadData;
  939. static int filter_channel(AVFilterContext *ctx, void *arg, int jobnr, int nb_jobs)
  940. {
  941. AudioFFTDeNoiseContext *s = ctx->priv;
  942. ThreadData *td = arg;
  943. AVFrame *in = td->in;
  944. const int start = (in->channels * jobnr) / nb_jobs;
  945. const int end = (in->channels * (jobnr+1)) / nb_jobs;
  946. for (int ch = start; ch < end; ch++) {
  947. DeNoiseChannel *dnch = &s->dnch[ch];
  948. const float *src = (const float *)in->extended_data[ch];
  949. double *dst = dnch->out_samples;
  950. if (s->track_noise) {
  951. int i = s->block_count & 0x1FF;
  952. if (dnch->sfm_fail_flags[i])
  953. dnch->sfm_fail_total--;
  954. dnch->sfm_fail_flags[i] = 0;
  955. dnch->sfm_threshold *= 1.0 - dnch->sfm_alpha;
  956. dnch->sfm_threshold += dnch->sfm_alpha * (0.5 + (1.0 / 640) * dnch->sfm_fail_total);
  957. }
  958. for (int m = 0; m < s->window_length; m++) {
  959. dnch->fft_data[m].re = s->window[m] * src[m] * (1LL << 24);
  960. dnch->fft_data[m].im = 0;
  961. }
  962. for (int m = s->window_length; m < s->fft_length2; m++) {
  963. dnch->fft_data[m].re = 0;
  964. dnch->fft_data[m].im = 0;
  965. }
  966. av_fft_permute(dnch->fft, dnch->fft_data);
  967. av_fft_calc(dnch->fft, dnch->fft_data);
  968. preprocess(dnch->fft_data, s->fft_length);
  969. process_frame(s, dnch, dnch->fft_data,
  970. dnch->prior,
  971. dnch->prior_band_excit,
  972. s->track_noise);
  973. postprocess(dnch->fft_data, s->fft_length);
  974. av_fft_permute(dnch->ifft, dnch->fft_data);
  975. av_fft_calc(dnch->ifft, dnch->fft_data);
  976. for (int m = 0; m < s->window_length; m++)
  977. dst[m] += s->window[m] * dnch->fft_data[m].re / (1LL << 24);
  978. }
  979. return 0;
  980. }
  981. static void get_auto_noise_levels(AudioFFTDeNoiseContext *s,
  982. DeNoiseChannel *dnch,
  983. double *levels)
  984. {
  985. if (s->noise_band_count > 0) {
  986. for (int i = 0; i < s->noise_band_count; i++) {
  987. levels[i] = (1.0 / C) * log(dnch->noise_band_auto_var[i] / s->floor) - 100.0;
  988. }
  989. if (s->noise_band_count < 15) {
  990. for (int i = s->noise_band_count; i < 15; i++)
  991. levels[i] = levels[i - 1];
  992. }
  993. } else {
  994. for (int i = 0; i < 15; i++) {
  995. levels[i] = -100.0;
  996. }
  997. }
  998. }
  999. static int output_frame(AVFilterLink *inlink)
  1000. {
  1001. AVFilterContext *ctx = inlink->dst;
  1002. AVFilterLink *outlink = ctx->outputs[0];
  1003. AudioFFTDeNoiseContext *s = ctx->priv;
  1004. AVFrame *out = NULL, *in = NULL;
  1005. ThreadData td;
  1006. int ret = 0;
  1007. in = ff_get_audio_buffer(outlink, s->window_length);
  1008. if (!in)
  1009. return AVERROR(ENOMEM);
  1010. ret = av_audio_fifo_peek(s->fifo, (void **)in->extended_data, s->window_length);
  1011. if (ret < 0)
  1012. goto end;
  1013. if (s->track_noise) {
  1014. for (int ch = 0; ch < inlink->channels; ch++) {
  1015. DeNoiseChannel *dnch = &s->dnch[ch];
  1016. double levels[15];
  1017. get_auto_noise_levels(s, dnch, levels);
  1018. set_noise_profile(s, dnch, levels, 0);
  1019. }
  1020. if (s->noise_floor != s->last_noise_floor)
  1021. set_parameters(s);
  1022. }
  1023. if (s->sample_noise_start) {
  1024. for (int ch = 0; ch < inlink->channels; ch++) {
  1025. DeNoiseChannel *dnch = &s->dnch[ch];
  1026. init_sample_noise(dnch);
  1027. }
  1028. s->sample_noise_start = 0;
  1029. s->sample_noise = 1;
  1030. }
  1031. if (s->sample_noise) {
  1032. for (int ch = 0; ch < inlink->channels; ch++) {
  1033. DeNoiseChannel *dnch = &s->dnch[ch];
  1034. sample_noise_block(s, dnch, in, ch);
  1035. }
  1036. }
  1037. if (s->sample_noise_end) {
  1038. for (int ch = 0; ch < inlink->channels; ch++) {
  1039. DeNoiseChannel *dnch = &s->dnch[ch];
  1040. double sample_noise[15];
  1041. finish_sample_noise(s, dnch, sample_noise);
  1042. set_noise_profile(s, dnch, sample_noise, 1);
  1043. set_band_parameters(s, dnch);
  1044. }
  1045. s->sample_noise = 0;
  1046. s->sample_noise_end = 0;
  1047. }
  1048. s->block_count++;
  1049. td.in = in;
  1050. ctx->internal->execute(ctx, filter_channel, &td, NULL,
  1051. FFMIN(outlink->channels, ff_filter_get_nb_threads(ctx)));
  1052. out = ff_get_audio_buffer(outlink, s->sample_advance);
  1053. if (!out) {
  1054. ret = AVERROR(ENOMEM);
  1055. goto end;
  1056. }
  1057. for (int ch = 0; ch < inlink->channels; ch++) {
  1058. DeNoiseChannel *dnch = &s->dnch[ch];
  1059. double *src = dnch->out_samples;
  1060. float *orig = (float *)in->extended_data[ch];
  1061. float *dst = (float *)out->extended_data[ch];
  1062. switch (s->output_mode) {
  1063. case IN_MODE:
  1064. for (int m = 0; m < s->sample_advance; m++)
  1065. dst[m] = orig[m];
  1066. break;
  1067. case OUT_MODE:
  1068. for (int m = 0; m < s->sample_advance; m++)
  1069. dst[m] = src[m];
  1070. break;
  1071. case NOISE_MODE:
  1072. for (int m = 0; m < s->sample_advance; m++)
  1073. dst[m] = orig[m] - src[m];
  1074. break;
  1075. default:
  1076. av_frame_free(&out);
  1077. ret = AVERROR_BUG;
  1078. goto end;
  1079. }
  1080. memmove(src, src + s->sample_advance, (s->window_length - s->sample_advance) * sizeof(*src));
  1081. memset(src + (s->window_length - s->sample_advance), 0, s->sample_advance * sizeof(*src));
  1082. }
  1083. av_audio_fifo_drain(s->fifo, s->sample_advance);
  1084. out->pts = s->pts;
  1085. ret = ff_filter_frame(outlink, out);
  1086. if (ret < 0)
  1087. goto end;
  1088. s->pts += av_rescale_q(s->sample_advance, (AVRational){1, outlink->sample_rate}, outlink->time_base);
  1089. end:
  1090. av_frame_free(&in);
  1091. return ret;
  1092. }
  1093. static int activate(AVFilterContext *ctx)
  1094. {
  1095. AVFilterLink *inlink = ctx->inputs[0];
  1096. AVFilterLink *outlink = ctx->outputs[0];
  1097. AudioFFTDeNoiseContext *s = ctx->priv;
  1098. AVFrame *frame = NULL;
  1099. int ret;
  1100. FF_FILTER_FORWARD_STATUS_BACK(outlink, inlink);
  1101. ret = ff_inlink_consume_frame(inlink, &frame);
  1102. if (ret < 0)
  1103. return ret;
  1104. if (ret > 0) {
  1105. if (s->pts == AV_NOPTS_VALUE)
  1106. s->pts = frame->pts;
  1107. ret = av_audio_fifo_write(s->fifo, (void **)frame->extended_data, frame->nb_samples);
  1108. av_frame_free(&frame);
  1109. if (ret < 0)
  1110. return ret;
  1111. }
  1112. if (av_audio_fifo_size(s->fifo) >= s->window_length)
  1113. return output_frame(inlink);
  1114. FF_FILTER_FORWARD_STATUS(inlink, outlink);
  1115. if (ff_outlink_frame_wanted(outlink) &&
  1116. av_audio_fifo_size(s->fifo) < s->window_length) {
  1117. ff_inlink_request_frame(inlink);
  1118. return 0;
  1119. }
  1120. return FFERROR_NOT_READY;
  1121. }
  1122. static av_cold void uninit(AVFilterContext *ctx)
  1123. {
  1124. AudioFFTDeNoiseContext *s = ctx->priv;
  1125. av_freep(&s->window);
  1126. av_freep(&s->bin2band);
  1127. av_freep(&s->band_alpha);
  1128. av_freep(&s->band_beta);
  1129. if (s->dnch) {
  1130. for (int ch = 0; ch < s->channels; ch++) {
  1131. DeNoiseChannel *dnch = &s->dnch[ch];
  1132. av_freep(&dnch->amt);
  1133. av_freep(&dnch->band_amt);
  1134. av_freep(&dnch->band_excit);
  1135. av_freep(&dnch->gain);
  1136. av_freep(&dnch->prior);
  1137. av_freep(&dnch->prior_band_excit);
  1138. av_freep(&dnch->clean_data);
  1139. av_freep(&dnch->noisy_data);
  1140. av_freep(&dnch->out_samples);
  1141. av_freep(&dnch->spread_function);
  1142. av_freep(&dnch->abs_var);
  1143. av_freep(&dnch->rel_var);
  1144. av_freep(&dnch->min_abs_var);
  1145. av_freep(&dnch->fft_data);
  1146. av_fft_end(dnch->fft);
  1147. dnch->fft = NULL;
  1148. av_fft_end(dnch->ifft);
  1149. dnch->ifft = NULL;
  1150. }
  1151. av_freep(&s->dnch);
  1152. }
  1153. av_audio_fifo_free(s->fifo);
  1154. }
  1155. static int query_formats(AVFilterContext *ctx)
  1156. {
  1157. AVFilterFormats *formats = NULL;
  1158. AVFilterChannelLayouts *layouts = NULL;
  1159. static const enum AVSampleFormat sample_fmts[] = {
  1160. AV_SAMPLE_FMT_FLTP,
  1161. AV_SAMPLE_FMT_NONE
  1162. };
  1163. int ret;
  1164. formats = ff_make_format_list(sample_fmts);
  1165. if (!formats)
  1166. return AVERROR(ENOMEM);
  1167. ret = ff_set_common_formats(ctx, formats);
  1168. if (ret < 0)
  1169. return ret;
  1170. layouts = ff_all_channel_counts();
  1171. if (!layouts)
  1172. return AVERROR(ENOMEM);
  1173. ret = ff_set_common_channel_layouts(ctx, layouts);
  1174. if (ret < 0)
  1175. return ret;
  1176. formats = ff_all_samplerates();
  1177. return ff_set_common_samplerates(ctx, formats);
  1178. }
  1179. static int process_command(AVFilterContext *ctx, const char *cmd, const char *args,
  1180. char *res, int res_len, int flags)
  1181. {
  1182. AudioFFTDeNoiseContext *s = ctx->priv;
  1183. int need_reset = 0;
  1184. int ret = 0;
  1185. if (!strcmp(cmd, "sample_noise") ||
  1186. !strcmp(cmd, "sn")) {
  1187. if (!strcmp(args, "start")) {
  1188. s->sample_noise_start = 1;
  1189. s->sample_noise_end = 0;
  1190. } else if (!strcmp(args, "end") ||
  1191. !strcmp(args, "stop")) {
  1192. s->sample_noise_start = 0;
  1193. s->sample_noise_end = 1;
  1194. }
  1195. } else {
  1196. ret = ff_filter_process_command(ctx, cmd, args, res, res_len, flags);
  1197. if (ret < 0)
  1198. return ret;
  1199. need_reset = 1;
  1200. }
  1201. if (need_reset)
  1202. set_parameters(s);
  1203. return 0;
  1204. }
  1205. static const AVFilterPad inputs[] = {
  1206. {
  1207. .name = "default",
  1208. .type = AVMEDIA_TYPE_AUDIO,
  1209. .config_props = config_input,
  1210. },
  1211. { NULL }
  1212. };
  1213. static const AVFilterPad outputs[] = {
  1214. {
  1215. .name = "default",
  1216. .type = AVMEDIA_TYPE_AUDIO,
  1217. },
  1218. { NULL }
  1219. };
  1220. AVFilter ff_af_afftdn = {
  1221. .name = "afftdn",
  1222. .description = NULL_IF_CONFIG_SMALL("Denoise audio samples using FFT."),
  1223. .query_formats = query_formats,
  1224. .priv_size = sizeof(AudioFFTDeNoiseContext),
  1225. .priv_class = &afftdn_class,
  1226. .activate = activate,
  1227. .uninit = uninit,
  1228. .inputs = inputs,
  1229. .outputs = outputs,
  1230. .process_command = process_command,
  1231. .flags = AVFILTER_FLAG_SUPPORT_TIMELINE_GENERIC |
  1232. AVFILTER_FLAG_SLICE_THREADS,
  1233. };