You can not select more than 25 topics Topics must start with a letter or number, can include dashes ('-') and can be up to 35 characters long.

557 lines
16KB

  1. /*
  2. * Copyright (c) 2017 Paul B Mahol
  3. *
  4. * This file is part of FFmpeg.
  5. *
  6. * FFmpeg is free software; you can redistribute it and/or
  7. * modify it under the terms of the GNU Lesser General Public
  8. * License as published by the Free Software Foundation; either
  9. * version 2.1 of the License, or (at your option) any later version.
  10. *
  11. * FFmpeg is distributed in the hope that it will be useful,
  12. * but WITHOUT ANY WARRANTY; without even the implied warranty of
  13. * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
  14. * Lesser General Public License for more details.
  15. *
  16. * You should have received a copy of the GNU Lesser General Public
  17. * License along with FFmpeg; if not, write to the Free Software
  18. * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
  19. */
  20. /**
  21. * @file
  22. * An arbitrary audio FIR filter
  23. */
  24. #include "libavutil/audio_fifo.h"
  25. #include "libavutil/common.h"
  26. #include "libavutil/float_dsp.h"
  27. #include "libavutil/opt.h"
  28. #include "libavcodec/avfft.h"
  29. #include "audio.h"
  30. #include "avfilter.h"
  31. #include "formats.h"
  32. #include "internal.h"
  33. #include "af_afir.h"
  34. static void fcmul_add_c(float *sum, const float *t, const float *c, ptrdiff_t len)
  35. {
  36. int n;
  37. for (n = 0; n < len; n++) {
  38. const float cre = c[2 * n ];
  39. const float cim = c[2 * n + 1];
  40. const float tre = t[2 * n ];
  41. const float tim = t[2 * n + 1];
  42. sum[2 * n ] += tre * cre - tim * cim;
  43. sum[2 * n + 1] += tre * cim + tim * cre;
  44. }
  45. sum[2 * n] += t[2 * n] * c[2 * n];
  46. }
  47. static int fir_channel(AVFilterContext *ctx, void *arg, int ch, int nb_jobs)
  48. {
  49. AudioFIRContext *s = ctx->priv;
  50. const float *src = (const float *)s->in[0]->extended_data[ch];
  51. int index1 = (s->index + 1) % 3;
  52. int index2 = (s->index + 2) % 3;
  53. float *sum = s->sum[ch];
  54. AVFrame *out = arg;
  55. float *block;
  56. float *dst;
  57. int n, i, j;
  58. memset(sum, 0, sizeof(*sum) * s->fft_length);
  59. block = s->block[ch] + s->part_index * s->block_size;
  60. memset(block, 0, sizeof(*block) * s->fft_length);
  61. s->fdsp->vector_fmul_scalar(block + s->part_size, src, s->dry_gain, FFALIGN(s->nb_samples, 4));
  62. emms_c();
  63. av_rdft_calc(s->rdft[ch], block);
  64. block[2 * s->part_size] = block[1];
  65. block[1] = 0;
  66. j = s->part_index;
  67. for (i = 0; i < s->nb_partitions; i++) {
  68. const int coffset = i * s->coeff_size;
  69. const FFTComplex *coeff = s->coeff[ch * !s->one2many] + coffset;
  70. block = s->block[ch] + j * s->block_size;
  71. s->fcmul_add(sum, block, (const float *)coeff, s->part_size);
  72. if (j == 0)
  73. j = s->nb_partitions;
  74. j--;
  75. }
  76. sum[1] = sum[2 * s->part_size];
  77. av_rdft_calc(s->irdft[ch], sum);
  78. dst = (float *)s->buffer->extended_data[ch] + index1 * s->part_size;
  79. for (n = 0; n < s->part_size; n++) {
  80. dst[n] += sum[n];
  81. }
  82. dst = (float *)s->buffer->extended_data[ch] + index2 * s->part_size;
  83. memcpy(dst, sum + s->part_size, s->part_size * sizeof(*dst));
  84. dst = (float *)s->buffer->extended_data[ch] + s->index * s->part_size;
  85. if (out) {
  86. float *ptr = (float *)out->extended_data[ch];
  87. s->fdsp->vector_fmul_scalar(ptr, dst, s->wet_gain, FFALIGN(out->nb_samples, 4));
  88. emms_c();
  89. }
  90. return 0;
  91. }
  92. static int fir_frame(AudioFIRContext *s, AVFilterLink *outlink)
  93. {
  94. AVFilterContext *ctx = outlink->src;
  95. AVFrame *out = NULL;
  96. int ret;
  97. s->nb_samples = FFMIN(s->part_size, av_audio_fifo_size(s->fifo[0]));
  98. if (!s->want_skip) {
  99. out = ff_get_audio_buffer(outlink, s->nb_samples);
  100. if (!out)
  101. return AVERROR(ENOMEM);
  102. }
  103. s->in[0] = ff_get_audio_buffer(ctx->inputs[0], s->nb_samples);
  104. if (!s->in[0]) {
  105. av_frame_free(&out);
  106. return AVERROR(ENOMEM);
  107. }
  108. av_audio_fifo_peek(s->fifo[0], (void **)s->in[0]->extended_data, s->nb_samples);
  109. ctx->internal->execute(ctx, fir_channel, out, NULL, outlink->channels);
  110. s->part_index = (s->part_index + 1) % s->nb_partitions;
  111. av_audio_fifo_drain(s->fifo[0], s->nb_samples);
  112. if (!s->want_skip) {
  113. out->pts = s->pts;
  114. if (s->pts != AV_NOPTS_VALUE)
  115. s->pts += av_rescale_q(out->nb_samples, (AVRational){1, outlink->sample_rate}, outlink->time_base);
  116. }
  117. s->index++;
  118. if (s->index == 3)
  119. s->index = 0;
  120. av_frame_free(&s->in[0]);
  121. if (s->want_skip == 1) {
  122. s->want_skip = 0;
  123. ret = 0;
  124. } else {
  125. ret = ff_filter_frame(outlink, out);
  126. }
  127. return ret;
  128. }
  129. static int convert_coeffs(AVFilterContext *ctx)
  130. {
  131. AudioFIRContext *s = ctx->priv;
  132. int i, ch, n, N;
  133. s->nb_taps = av_audio_fifo_size(s->fifo[1]);
  134. if (s->nb_taps <= 0)
  135. return AVERROR(EINVAL);
  136. for (n = 4; (1 << n) < s->nb_taps; n++);
  137. N = FFMIN(n, 16);
  138. s->ir_length = 1 << n;
  139. s->fft_length = (1 << (N + 1)) + 1;
  140. s->part_size = 1 << (N - 1);
  141. s->block_size = FFALIGN(s->fft_length, 32);
  142. s->coeff_size = FFALIGN(s->part_size + 1, 32);
  143. s->nb_partitions = (s->nb_taps + s->part_size - 1) / s->part_size;
  144. s->nb_coeffs = s->ir_length + s->nb_partitions;
  145. for (ch = 0; ch < ctx->inputs[0]->channels; ch++) {
  146. s->sum[ch] = av_calloc(s->fft_length, sizeof(**s->sum));
  147. if (!s->sum[ch])
  148. return AVERROR(ENOMEM);
  149. }
  150. for (ch = 0; ch < ctx->inputs[1]->channels; ch++) {
  151. s->coeff[ch] = av_calloc(s->nb_partitions * s->coeff_size, sizeof(**s->coeff));
  152. if (!s->coeff[ch])
  153. return AVERROR(ENOMEM);
  154. }
  155. for (ch = 0; ch < ctx->inputs[0]->channels; ch++) {
  156. s->block[ch] = av_calloc(s->nb_partitions * s->block_size, sizeof(**s->block));
  157. if (!s->block[ch])
  158. return AVERROR(ENOMEM);
  159. }
  160. for (ch = 0; ch < ctx->inputs[0]->channels; ch++) {
  161. s->rdft[ch] = av_rdft_init(N, DFT_R2C);
  162. s->irdft[ch] = av_rdft_init(N, IDFT_C2R);
  163. if (!s->rdft[ch] || !s->irdft[ch])
  164. return AVERROR(ENOMEM);
  165. }
  166. s->in[1] = ff_get_audio_buffer(ctx->inputs[1], s->nb_taps);
  167. if (!s->in[1])
  168. return AVERROR(ENOMEM);
  169. s->buffer = ff_get_audio_buffer(ctx->inputs[0], s->part_size * 3);
  170. if (!s->buffer)
  171. return AVERROR(ENOMEM);
  172. av_audio_fifo_read(s->fifo[1], (void **)s->in[1]->extended_data, s->nb_taps);
  173. if (s->again) {
  174. float power = 0;
  175. for (ch = 0; ch < ctx->inputs[1]->channels; ch++) {
  176. float *time = (float *)s->in[1]->extended_data[!s->one2many * ch];
  177. for (i = 0; i < s->nb_taps; i++)
  178. power += FFABS(time[i]);
  179. }
  180. s->gain = sqrtf(1.f / (ctx->inputs[1]->channels * power)) / (sqrtf(ctx->inputs[1]->channels));
  181. for (ch = 0; ch < ctx->inputs[1]->channels; ch++) {
  182. float *time = (float *)s->in[1]->extended_data[!s->one2many * ch];
  183. s->fdsp->vector_fmul_scalar(time, time, s->gain, FFALIGN(s->nb_taps, 4));
  184. }
  185. }
  186. for (ch = 0; ch < ctx->inputs[1]->channels; ch++) {
  187. float *time = (float *)s->in[1]->extended_data[!s->one2many * ch];
  188. float *block = s->block[ch];
  189. FFTComplex *coeff = s->coeff[ch];
  190. for (i = FFMAX(1, s->length * s->nb_taps); i < s->nb_taps; i++)
  191. time[i] = 0;
  192. for (i = 0; i < s->nb_partitions; i++) {
  193. const float scale = 1.f / s->part_size;
  194. const int toffset = i * s->part_size;
  195. const int coffset = i * s->coeff_size;
  196. const int boffset = s->part_size;
  197. const int remaining = s->nb_taps - (i * s->part_size);
  198. const int size = remaining >= s->part_size ? s->part_size : remaining;
  199. memset(block, 0, sizeof(*block) * s->fft_length);
  200. memcpy(block + boffset, time + toffset, size * sizeof(*block));
  201. av_rdft_calc(s->rdft[0], block);
  202. coeff[coffset].re = block[0] * scale;
  203. coeff[coffset].im = 0;
  204. for (n = 1; n < s->part_size; n++) {
  205. coeff[coffset + n].re = block[2 * n] * scale;
  206. coeff[coffset + n].im = block[2 * n + 1] * scale;
  207. }
  208. coeff[coffset + s->part_size].re = block[1] * scale;
  209. coeff[coffset + s->part_size].im = 0;
  210. }
  211. }
  212. av_frame_free(&s->in[1]);
  213. av_log(ctx, AV_LOG_DEBUG, "nb_taps: %d\n", s->nb_taps);
  214. av_log(ctx, AV_LOG_DEBUG, "nb_partitions: %d\n", s->nb_partitions);
  215. av_log(ctx, AV_LOG_DEBUG, "partition size: %d\n", s->part_size);
  216. av_log(ctx, AV_LOG_DEBUG, "ir_length: %d\n", s->ir_length);
  217. s->have_coeffs = 1;
  218. return 0;
  219. }
  220. static int read_ir(AVFilterLink *link, AVFrame *frame)
  221. {
  222. AVFilterContext *ctx = link->dst;
  223. AudioFIRContext *s = ctx->priv;
  224. int nb_taps, max_nb_taps, ret;
  225. ret = av_audio_fifo_write(s->fifo[1], (void **)frame->extended_data,
  226. frame->nb_samples);
  227. av_frame_free(&frame);
  228. if (ret < 0)
  229. return ret;
  230. nb_taps = av_audio_fifo_size(s->fifo[1]);
  231. max_nb_taps = MAX_IR_DURATION * ctx->outputs[0]->sample_rate;
  232. if (nb_taps > max_nb_taps) {
  233. av_log(ctx, AV_LOG_ERROR, "Too big number of coefficients: %d > %d.\n", nb_taps, max_nb_taps);
  234. return AVERROR(EINVAL);
  235. }
  236. return 0;
  237. }
  238. static int filter_frame(AVFilterLink *link, AVFrame *frame)
  239. {
  240. AVFilterContext *ctx = link->dst;
  241. AudioFIRContext *s = ctx->priv;
  242. AVFilterLink *outlink = ctx->outputs[0];
  243. int ret;
  244. ret = av_audio_fifo_write(s->fifo[0], (void **)frame->extended_data,
  245. frame->nb_samples);
  246. if (ret > 0 && s->pts == AV_NOPTS_VALUE)
  247. s->pts = frame->pts;
  248. av_frame_free(&frame);
  249. if (ret < 0)
  250. return ret;
  251. if (!s->have_coeffs && s->eof_coeffs) {
  252. ret = convert_coeffs(ctx);
  253. if (ret < 0)
  254. return ret;
  255. }
  256. if (s->have_coeffs) {
  257. while (av_audio_fifo_size(s->fifo[0]) >= s->part_size) {
  258. ret = fir_frame(s, outlink);
  259. if (ret < 0)
  260. return ret;
  261. }
  262. }
  263. return 0;
  264. }
  265. static int request_frame(AVFilterLink *outlink)
  266. {
  267. AVFilterContext *ctx = outlink->src;
  268. AudioFIRContext *s = ctx->priv;
  269. int ret;
  270. if (!s->eof_coeffs) {
  271. ret = ff_request_frame(ctx->inputs[1]);
  272. if (ret == AVERROR_EOF) {
  273. s->eof_coeffs = 1;
  274. ret = 0;
  275. }
  276. return ret;
  277. }
  278. ret = ff_request_frame(ctx->inputs[0]);
  279. if (ret == AVERROR_EOF && s->have_coeffs) {
  280. if (s->need_padding) {
  281. AVFrame *silence = ff_get_audio_buffer(outlink, s->part_size);
  282. if (!silence)
  283. return AVERROR(ENOMEM);
  284. ret = av_audio_fifo_write(s->fifo[0], (void **)silence->extended_data,
  285. silence->nb_samples);
  286. av_frame_free(&silence);
  287. if (ret < 0)
  288. return ret;
  289. s->need_padding = 0;
  290. }
  291. while (av_audio_fifo_size(s->fifo[0]) > 0) {
  292. ret = fir_frame(s, outlink);
  293. if (ret < 0)
  294. return ret;
  295. }
  296. ret = AVERROR_EOF;
  297. }
  298. return ret;
  299. }
  300. static int query_formats(AVFilterContext *ctx)
  301. {
  302. AVFilterFormats *formats;
  303. AVFilterChannelLayouts *layouts;
  304. static const enum AVSampleFormat sample_fmts[] = {
  305. AV_SAMPLE_FMT_FLTP,
  306. AV_SAMPLE_FMT_NONE
  307. };
  308. int ret, i;
  309. layouts = ff_all_channel_counts();
  310. if ((ret = ff_channel_layouts_ref(layouts, &ctx->outputs[0]->in_channel_layouts)) < 0)
  311. return ret;
  312. for (i = 0; i < 2; i++) {
  313. layouts = ff_all_channel_counts();
  314. if ((ret = ff_channel_layouts_ref(layouts, &ctx->inputs[i]->out_channel_layouts)) < 0)
  315. return ret;
  316. }
  317. formats = ff_make_format_list(sample_fmts);
  318. if ((ret = ff_set_common_formats(ctx, formats)) < 0)
  319. return ret;
  320. formats = ff_all_samplerates();
  321. return ff_set_common_samplerates(ctx, formats);
  322. }
  323. static int config_output(AVFilterLink *outlink)
  324. {
  325. AVFilterContext *ctx = outlink->src;
  326. AudioFIRContext *s = ctx->priv;
  327. if (ctx->inputs[0]->channels != ctx->inputs[1]->channels &&
  328. ctx->inputs[1]->channels != 1) {
  329. av_log(ctx, AV_LOG_ERROR,
  330. "Second input must have same number of channels as first input or "
  331. "exactly 1 channel.\n");
  332. return AVERROR(EINVAL);
  333. }
  334. s->one2many = ctx->inputs[1]->channels == 1;
  335. outlink->sample_rate = ctx->inputs[0]->sample_rate;
  336. outlink->time_base = ctx->inputs[0]->time_base;
  337. outlink->channel_layout = ctx->inputs[0]->channel_layout;
  338. outlink->channels = ctx->inputs[0]->channels;
  339. s->fifo[0] = av_audio_fifo_alloc(ctx->inputs[0]->format, ctx->inputs[0]->channels, 1024);
  340. s->fifo[1] = av_audio_fifo_alloc(ctx->inputs[1]->format, ctx->inputs[1]->channels, 1024);
  341. if (!s->fifo[0] || !s->fifo[1])
  342. return AVERROR(ENOMEM);
  343. s->sum = av_calloc(outlink->channels, sizeof(*s->sum));
  344. s->coeff = av_calloc(ctx->inputs[1]->channels, sizeof(*s->coeff));
  345. s->block = av_calloc(ctx->inputs[0]->channels, sizeof(*s->block));
  346. s->rdft = av_calloc(outlink->channels, sizeof(*s->rdft));
  347. s->irdft = av_calloc(outlink->channels, sizeof(*s->irdft));
  348. if (!s->sum || !s->coeff || !s->block || !s->rdft || !s->irdft)
  349. return AVERROR(ENOMEM);
  350. s->nb_channels = outlink->channels;
  351. s->nb_coef_channels = ctx->inputs[1]->channels;
  352. s->want_skip = 1;
  353. s->need_padding = 1;
  354. s->pts = AV_NOPTS_VALUE;
  355. return 0;
  356. }
  357. static av_cold void uninit(AVFilterContext *ctx)
  358. {
  359. AudioFIRContext *s = ctx->priv;
  360. int ch;
  361. if (s->sum) {
  362. for (ch = 0; ch < s->nb_channels; ch++) {
  363. av_freep(&s->sum[ch]);
  364. }
  365. }
  366. av_freep(&s->sum);
  367. if (s->coeff) {
  368. for (ch = 0; ch < s->nb_coef_channels; ch++) {
  369. av_freep(&s->coeff[ch]);
  370. }
  371. }
  372. av_freep(&s->coeff);
  373. if (s->block) {
  374. for (ch = 0; ch < s->nb_channels; ch++) {
  375. av_freep(&s->block[ch]);
  376. }
  377. }
  378. av_freep(&s->block);
  379. if (s->rdft) {
  380. for (ch = 0; ch < s->nb_channels; ch++) {
  381. av_rdft_end(s->rdft[ch]);
  382. }
  383. }
  384. av_freep(&s->rdft);
  385. if (s->irdft) {
  386. for (ch = 0; ch < s->nb_channels; ch++) {
  387. av_rdft_end(s->irdft[ch]);
  388. }
  389. }
  390. av_freep(&s->irdft);
  391. av_frame_free(&s->in[0]);
  392. av_frame_free(&s->in[1]);
  393. av_frame_free(&s->buffer);
  394. av_audio_fifo_free(s->fifo[0]);
  395. av_audio_fifo_free(s->fifo[1]);
  396. av_freep(&s->fdsp);
  397. }
  398. static av_cold int init(AVFilterContext *ctx)
  399. {
  400. AudioFIRContext *s = ctx->priv;
  401. s->fcmul_add = fcmul_add_c;
  402. s->fdsp = avpriv_float_dsp_alloc(0);
  403. if (!s->fdsp)
  404. return AVERROR(ENOMEM);
  405. if (ARCH_X86)
  406. ff_afir_init_x86(s);
  407. return 0;
  408. }
  409. static const AVFilterPad afir_inputs[] = {
  410. {
  411. .name = "main",
  412. .type = AVMEDIA_TYPE_AUDIO,
  413. .filter_frame = filter_frame,
  414. },{
  415. .name = "ir",
  416. .type = AVMEDIA_TYPE_AUDIO,
  417. .filter_frame = read_ir,
  418. },
  419. { NULL }
  420. };
  421. static const AVFilterPad afir_outputs[] = {
  422. {
  423. .name = "default",
  424. .type = AVMEDIA_TYPE_AUDIO,
  425. .config_props = config_output,
  426. .request_frame = request_frame,
  427. },
  428. { NULL }
  429. };
  430. #define AF AV_OPT_FLAG_AUDIO_PARAM|AV_OPT_FLAG_FILTERING_PARAM
  431. #define OFFSET(x) offsetof(AudioFIRContext, x)
  432. static const AVOption afir_options[] = {
  433. { "dry", "set dry gain", OFFSET(dry_gain), AV_OPT_TYPE_FLOAT, {.dbl=1}, 0, 1, AF },
  434. { "wet", "set wet gain", OFFSET(wet_gain), AV_OPT_TYPE_FLOAT, {.dbl=1}, 0, 1, AF },
  435. { "length", "set IR length", OFFSET(length), AV_OPT_TYPE_FLOAT, {.dbl=1}, 0, 1, AF },
  436. { "again", "enable auto gain", OFFSET(again), AV_OPT_TYPE_BOOL, {.i64=1}, 0, 1, AF },
  437. { NULL }
  438. };
  439. AVFILTER_DEFINE_CLASS(afir);
  440. AVFilter ff_af_afir = {
  441. .name = "afir",
  442. .description = NULL_IF_CONFIG_SMALL("Apply Finite Impulse Response filter with supplied coefficients in 2nd stream."),
  443. .priv_size = sizeof(AudioFIRContext),
  444. .priv_class = &afir_class,
  445. .query_formats = query_formats,
  446. .init = init,
  447. .uninit = uninit,
  448. .inputs = afir_inputs,
  449. .outputs = afir_outputs,
  450. .flags = AVFILTER_FLAG_SLICE_THREADS,
  451. };