You can not select more than 25 topics Topics must start with a letter or number, can include dashes ('-') and can be up to 35 characters long.

543 lines
16KB

  1. /*
  2. * Copyright (c) 2017 Paul B Mahol
  3. *
  4. * This file is part of FFmpeg.
  5. *
  6. * FFmpeg is free software; you can redistribute it and/or
  7. * modify it under the terms of the GNU Lesser General Public
  8. * License as published by the Free Software Foundation; either
  9. * version 2.1 of the License, or (at your option) any later version.
  10. *
  11. * FFmpeg is distributed in the hope that it will be useful,
  12. * but WITHOUT ANY WARRANTY; without even the implied warranty of
  13. * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
  14. * Lesser General Public License for more details.
  15. *
  16. * You should have received a copy of the GNU Lesser General Public
  17. * License along with FFmpeg; if not, write to the Free Software
  18. * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
  19. */
  20. /**
  21. * @file
  22. * An arbitrary audio FIR filter
  23. */
  24. #include "libavutil/audio_fifo.h"
  25. #include "libavutil/common.h"
  26. #include "libavutil/float_dsp.h"
  27. #include "libavutil/opt.h"
  28. #include "libavcodec/avfft.h"
  29. #include "audio.h"
  30. #include "avfilter.h"
  31. #include "formats.h"
  32. #include "internal.h"
  33. #include "af_afir.h"
  34. static void fcmul_add_c(float *sum, const float *t, const float *c, ptrdiff_t len)
  35. {
  36. int n;
  37. for (n = 0; n < len; n++) {
  38. const float cre = c[2 * n ];
  39. const float cim = c[2 * n + 1];
  40. const float tre = t[2 * n ];
  41. const float tim = t[2 * n + 1];
  42. sum[2 * n ] += tre * cre - tim * cim;
  43. sum[2 * n + 1] += tre * cim + tim * cre;
  44. }
  45. sum[2 * n] += t[2 * n] * c[2 * n];
  46. }
  47. static int fir_channel(AVFilterContext *ctx, void *arg, int ch, int nb_jobs)
  48. {
  49. AudioFIRContext *s = ctx->priv;
  50. const float *src = (const float *)s->in[0]->extended_data[ch];
  51. int index1 = (s->index + 1) % 3;
  52. int index2 = (s->index + 2) % 3;
  53. float *sum = s->sum[ch];
  54. AVFrame *out = arg;
  55. float *block;
  56. float *dst;
  57. int n, i, j;
  58. memset(sum, 0, sizeof(*sum) * s->fft_length);
  59. block = s->block[ch] + s->part_index * s->block_size;
  60. memset(block, 0, sizeof(*block) * s->fft_length);
  61. s->fdsp->vector_fmul_scalar(block + s->part_size, src, s->dry_gain, FFALIGN(s->nb_samples, 4));
  62. emms_c();
  63. av_rdft_calc(s->rdft[ch], block);
  64. block[2 * s->part_size] = block[1];
  65. block[1] = 0;
  66. j = s->part_index;
  67. for (i = 0; i < s->nb_partitions; i++) {
  68. const int coffset = i * s->coeff_size;
  69. const FFTComplex *coeff = s->coeff[ch * !s->one2many] + coffset;
  70. block = s->block[ch] + j * s->block_size;
  71. s->fcmul_add(sum, block, (const float *)coeff, s->part_size);
  72. if (j == 0)
  73. j = s->nb_partitions;
  74. j--;
  75. }
  76. sum[1] = sum[2 * s->part_size];
  77. av_rdft_calc(s->irdft[ch], sum);
  78. dst = (float *)s->buffer->extended_data[ch] + index1 * s->part_size;
  79. for (n = 0; n < s->part_size; n++) {
  80. dst[n] += sum[n];
  81. }
  82. dst = (float *)s->buffer->extended_data[ch] + index2 * s->part_size;
  83. memcpy(dst, sum + s->part_size, s->part_size * sizeof(*dst));
  84. dst = (float *)s->buffer->extended_data[ch] + s->index * s->part_size;
  85. if (out) {
  86. float *ptr = (float *)out->extended_data[ch];
  87. s->fdsp->vector_fmul_scalar(ptr, dst, s->gain * s->wet_gain, FFALIGN(out->nb_samples, 4));
  88. emms_c();
  89. }
  90. return 0;
  91. }
  92. static int fir_frame(AudioFIRContext *s, AVFilterLink *outlink)
  93. {
  94. AVFilterContext *ctx = outlink->src;
  95. AVFrame *out = NULL;
  96. int ret;
  97. s->nb_samples = FFMIN(s->part_size, av_audio_fifo_size(s->fifo[0]));
  98. if (!s->want_skip) {
  99. out = ff_get_audio_buffer(outlink, s->nb_samples);
  100. if (!out)
  101. return AVERROR(ENOMEM);
  102. }
  103. s->in[0] = ff_get_audio_buffer(ctx->inputs[0], s->nb_samples);
  104. if (!s->in[0]) {
  105. av_frame_free(&out);
  106. return AVERROR(ENOMEM);
  107. }
  108. av_audio_fifo_peek(s->fifo[0], (void **)s->in[0]->extended_data, s->nb_samples);
  109. ctx->internal->execute(ctx, fir_channel, out, NULL, outlink->channels);
  110. s->part_index = (s->part_index + 1) % s->nb_partitions;
  111. av_audio_fifo_drain(s->fifo[0], s->nb_samples);
  112. if (!s->want_skip) {
  113. out->pts = s->pts;
  114. if (s->pts != AV_NOPTS_VALUE)
  115. s->pts += av_rescale_q(out->nb_samples, (AVRational){1, outlink->sample_rate}, outlink->time_base);
  116. }
  117. s->index++;
  118. if (s->index == 3)
  119. s->index = 0;
  120. av_frame_free(&s->in[0]);
  121. if (s->want_skip == 1) {
  122. s->want_skip = 0;
  123. ret = 0;
  124. } else {
  125. ret = ff_filter_frame(outlink, out);
  126. }
  127. return ret;
  128. }
  129. static int convert_coeffs(AVFilterContext *ctx)
  130. {
  131. AudioFIRContext *s = ctx->priv;
  132. int i, ch, n, N;
  133. float power = 0;
  134. s->nb_taps = av_audio_fifo_size(s->fifo[1]);
  135. if (s->nb_taps <= 0)
  136. return AVERROR(EINVAL);
  137. for (n = 4; (1 << n) < s->nb_taps; n++);
  138. N = FFMIN(n, 16);
  139. s->ir_length = 1 << n;
  140. s->fft_length = (1 << (N + 1)) + 1;
  141. s->part_size = 1 << (N - 1);
  142. s->block_size = FFALIGN(s->fft_length, 32);
  143. s->coeff_size = FFALIGN(s->part_size + 1, 32);
  144. s->nb_partitions = (s->nb_taps + s->part_size - 1) / s->part_size;
  145. s->nb_coeffs = s->ir_length + s->nb_partitions;
  146. for (ch = 0; ch < ctx->inputs[0]->channels; ch++) {
  147. s->sum[ch] = av_calloc(s->fft_length, sizeof(**s->sum));
  148. if (!s->sum[ch])
  149. return AVERROR(ENOMEM);
  150. }
  151. for (ch = 0; ch < ctx->inputs[1]->channels; ch++) {
  152. s->coeff[ch] = av_calloc(s->nb_partitions * s->coeff_size, sizeof(**s->coeff));
  153. if (!s->coeff[ch])
  154. return AVERROR(ENOMEM);
  155. }
  156. for (ch = 0; ch < ctx->inputs[0]->channels; ch++) {
  157. s->block[ch] = av_calloc(s->nb_partitions * s->block_size, sizeof(**s->block));
  158. if (!s->block[ch])
  159. return AVERROR(ENOMEM);
  160. }
  161. for (ch = 0; ch < ctx->inputs[0]->channels; ch++) {
  162. s->rdft[ch] = av_rdft_init(N, DFT_R2C);
  163. s->irdft[ch] = av_rdft_init(N, IDFT_C2R);
  164. if (!s->rdft[ch] || !s->irdft[ch])
  165. return AVERROR(ENOMEM);
  166. }
  167. s->in[1] = ff_get_audio_buffer(ctx->inputs[1], s->nb_taps);
  168. if (!s->in[1])
  169. return AVERROR(ENOMEM);
  170. s->buffer = ff_get_audio_buffer(ctx->inputs[0], s->part_size * 3);
  171. if (!s->buffer)
  172. return AVERROR(ENOMEM);
  173. av_audio_fifo_read(s->fifo[1], (void **)s->in[1]->extended_data, s->nb_taps);
  174. for (ch = 0; ch < ctx->inputs[1]->channels; ch++) {
  175. float *time = (float *)s->in[1]->extended_data[!s->one2many * ch];
  176. float *block = s->block[ch];
  177. FFTComplex *coeff = s->coeff[ch];
  178. power += s->fdsp->scalarproduct_float(time, time, s->nb_taps);
  179. for (i = FFMAX(1, s->length * s->nb_taps); i < s->nb_taps; i++)
  180. time[i] = 0;
  181. for (i = 0; i < s->nb_partitions; i++) {
  182. const float scale = 1.f / s->part_size;
  183. const int toffset = i * s->part_size;
  184. const int coffset = i * s->coeff_size;
  185. const int boffset = s->part_size;
  186. const int remaining = s->nb_taps - (i * s->part_size);
  187. const int size = remaining >= s->part_size ? s->part_size : remaining;
  188. memset(block, 0, sizeof(*block) * s->fft_length);
  189. memcpy(block + boffset, time + toffset, size * sizeof(*block));
  190. av_rdft_calc(s->rdft[0], block);
  191. coeff[coffset].re = block[0] * scale;
  192. coeff[coffset].im = 0;
  193. for (n = 1; n < s->part_size; n++) {
  194. coeff[coffset + n].re = block[2 * n] * scale;
  195. coeff[coffset + n].im = block[2 * n + 1] * scale;
  196. }
  197. coeff[coffset + s->part_size].re = block[1] * scale;
  198. coeff[coffset + s->part_size].im = 0;
  199. }
  200. }
  201. av_frame_free(&s->in[1]);
  202. s->gain = s->again ? 1.f / sqrtf(power / ctx->inputs[1]->channels) : 1.f;
  203. av_log(ctx, AV_LOG_DEBUG, "nb_taps: %d\n", s->nb_taps);
  204. av_log(ctx, AV_LOG_DEBUG, "nb_partitions: %d\n", s->nb_partitions);
  205. av_log(ctx, AV_LOG_DEBUG, "partition size: %d\n", s->part_size);
  206. av_log(ctx, AV_LOG_DEBUG, "ir_length: %d\n", s->ir_length);
  207. s->have_coeffs = 1;
  208. return 0;
  209. }
  210. static int read_ir(AVFilterLink *link, AVFrame *frame)
  211. {
  212. AVFilterContext *ctx = link->dst;
  213. AudioFIRContext *s = ctx->priv;
  214. int nb_taps, max_nb_taps, ret;
  215. ret = av_audio_fifo_write(s->fifo[1], (void **)frame->extended_data,
  216. frame->nb_samples);
  217. av_frame_free(&frame);
  218. if (ret < 0)
  219. return ret;
  220. nb_taps = av_audio_fifo_size(s->fifo[1]);
  221. max_nb_taps = MAX_IR_DURATION * ctx->outputs[0]->sample_rate;
  222. if (nb_taps > max_nb_taps) {
  223. av_log(ctx, AV_LOG_ERROR, "Too big number of coefficients: %d > %d.\n", nb_taps, max_nb_taps);
  224. return AVERROR(EINVAL);
  225. }
  226. return 0;
  227. }
  228. static int filter_frame(AVFilterLink *link, AVFrame *frame)
  229. {
  230. AVFilterContext *ctx = link->dst;
  231. AudioFIRContext *s = ctx->priv;
  232. AVFilterLink *outlink = ctx->outputs[0];
  233. int ret;
  234. ret = av_audio_fifo_write(s->fifo[0], (void **)frame->extended_data,
  235. frame->nb_samples);
  236. if (ret > 0 && s->pts == AV_NOPTS_VALUE)
  237. s->pts = frame->pts;
  238. av_frame_free(&frame);
  239. if (ret < 0)
  240. return ret;
  241. if (!s->have_coeffs && s->eof_coeffs) {
  242. ret = convert_coeffs(ctx);
  243. if (ret < 0)
  244. return ret;
  245. }
  246. if (s->have_coeffs) {
  247. while (av_audio_fifo_size(s->fifo[0]) >= s->part_size) {
  248. ret = fir_frame(s, outlink);
  249. if (ret < 0)
  250. return ret;
  251. }
  252. }
  253. return 0;
  254. }
  255. static int request_frame(AVFilterLink *outlink)
  256. {
  257. AVFilterContext *ctx = outlink->src;
  258. AudioFIRContext *s = ctx->priv;
  259. int ret;
  260. if (!s->eof_coeffs) {
  261. ret = ff_request_frame(ctx->inputs[1]);
  262. if (ret == AVERROR_EOF) {
  263. s->eof_coeffs = 1;
  264. ret = 0;
  265. }
  266. return ret;
  267. }
  268. ret = ff_request_frame(ctx->inputs[0]);
  269. if (ret == AVERROR_EOF && s->have_coeffs) {
  270. if (s->need_padding) {
  271. AVFrame *silence = ff_get_audio_buffer(outlink, s->part_size);
  272. if (!silence)
  273. return AVERROR(ENOMEM);
  274. ret = av_audio_fifo_write(s->fifo[0], (void **)silence->extended_data,
  275. silence->nb_samples);
  276. av_frame_free(&silence);
  277. if (ret < 0)
  278. return ret;
  279. s->need_padding = 0;
  280. }
  281. while (av_audio_fifo_size(s->fifo[0]) > 0) {
  282. ret = fir_frame(s, outlink);
  283. if (ret < 0)
  284. return ret;
  285. }
  286. ret = AVERROR_EOF;
  287. }
  288. return ret;
  289. }
  290. static int query_formats(AVFilterContext *ctx)
  291. {
  292. AVFilterFormats *formats;
  293. AVFilterChannelLayouts *layouts;
  294. static const enum AVSampleFormat sample_fmts[] = {
  295. AV_SAMPLE_FMT_FLTP,
  296. AV_SAMPLE_FMT_NONE
  297. };
  298. int ret, i;
  299. layouts = ff_all_channel_counts();
  300. if ((ret = ff_channel_layouts_ref(layouts, &ctx->outputs[0]->in_channel_layouts)) < 0)
  301. return ret;
  302. for (i = 0; i < 2; i++) {
  303. layouts = ff_all_channel_counts();
  304. if ((ret = ff_channel_layouts_ref(layouts, &ctx->inputs[i]->out_channel_layouts)) < 0)
  305. return ret;
  306. }
  307. formats = ff_make_format_list(sample_fmts);
  308. if ((ret = ff_set_common_formats(ctx, formats)) < 0)
  309. return ret;
  310. formats = ff_all_samplerates();
  311. return ff_set_common_samplerates(ctx, formats);
  312. }
  313. static int config_output(AVFilterLink *outlink)
  314. {
  315. AVFilterContext *ctx = outlink->src;
  316. AudioFIRContext *s = ctx->priv;
  317. if (ctx->inputs[0]->channels != ctx->inputs[1]->channels &&
  318. ctx->inputs[1]->channels != 1) {
  319. av_log(ctx, AV_LOG_ERROR,
  320. "Second input must have same number of channels as first input or "
  321. "exactly 1 channel.\n");
  322. return AVERROR(EINVAL);
  323. }
  324. s->one2many = ctx->inputs[1]->channels == 1;
  325. outlink->sample_rate = ctx->inputs[0]->sample_rate;
  326. outlink->time_base = ctx->inputs[0]->time_base;
  327. outlink->channel_layout = ctx->inputs[0]->channel_layout;
  328. outlink->channels = ctx->inputs[0]->channels;
  329. s->fifo[0] = av_audio_fifo_alloc(ctx->inputs[0]->format, ctx->inputs[0]->channels, 1024);
  330. s->fifo[1] = av_audio_fifo_alloc(ctx->inputs[1]->format, ctx->inputs[1]->channels, 1024);
  331. if (!s->fifo[0] || !s->fifo[1])
  332. return AVERROR(ENOMEM);
  333. s->sum = av_calloc(outlink->channels, sizeof(*s->sum));
  334. s->coeff = av_calloc(ctx->inputs[1]->channels, sizeof(*s->coeff));
  335. s->block = av_calloc(ctx->inputs[0]->channels, sizeof(*s->block));
  336. s->rdft = av_calloc(outlink->channels, sizeof(*s->rdft));
  337. s->irdft = av_calloc(outlink->channels, sizeof(*s->irdft));
  338. if (!s->sum || !s->coeff || !s->block || !s->rdft || !s->irdft)
  339. return AVERROR(ENOMEM);
  340. s->nb_channels = outlink->channels;
  341. s->nb_coef_channels = ctx->inputs[1]->channels;
  342. s->want_skip = 1;
  343. s->need_padding = 1;
  344. s->pts = AV_NOPTS_VALUE;
  345. return 0;
  346. }
  347. static av_cold void uninit(AVFilterContext *ctx)
  348. {
  349. AudioFIRContext *s = ctx->priv;
  350. int ch;
  351. if (s->sum) {
  352. for (ch = 0; ch < s->nb_channels; ch++) {
  353. av_freep(&s->sum[ch]);
  354. }
  355. }
  356. av_freep(&s->sum);
  357. if (s->coeff) {
  358. for (ch = 0; ch < s->nb_coef_channels; ch++) {
  359. av_freep(&s->coeff[ch]);
  360. }
  361. }
  362. av_freep(&s->coeff);
  363. if (s->block) {
  364. for (ch = 0; ch < s->nb_channels; ch++) {
  365. av_freep(&s->block[ch]);
  366. }
  367. }
  368. av_freep(&s->block);
  369. if (s->rdft) {
  370. for (ch = 0; ch < s->nb_channels; ch++) {
  371. av_rdft_end(s->rdft[ch]);
  372. }
  373. }
  374. av_freep(&s->rdft);
  375. if (s->irdft) {
  376. for (ch = 0; ch < s->nb_channels; ch++) {
  377. av_rdft_end(s->irdft[ch]);
  378. }
  379. }
  380. av_freep(&s->irdft);
  381. av_frame_free(&s->in[0]);
  382. av_frame_free(&s->in[1]);
  383. av_frame_free(&s->buffer);
  384. av_audio_fifo_free(s->fifo[0]);
  385. av_audio_fifo_free(s->fifo[1]);
  386. av_freep(&s->fdsp);
  387. }
  388. static av_cold int init(AVFilterContext *ctx)
  389. {
  390. AudioFIRContext *s = ctx->priv;
  391. s->fcmul_add = fcmul_add_c;
  392. s->fdsp = avpriv_float_dsp_alloc(0);
  393. if (!s->fdsp)
  394. return AVERROR(ENOMEM);
  395. if (ARCH_X86)
  396. ff_afir_init_x86(s);
  397. return 0;
  398. }
  399. static const AVFilterPad afir_inputs[] = {
  400. {
  401. .name = "main",
  402. .type = AVMEDIA_TYPE_AUDIO,
  403. .filter_frame = filter_frame,
  404. },{
  405. .name = "ir",
  406. .type = AVMEDIA_TYPE_AUDIO,
  407. .filter_frame = read_ir,
  408. },
  409. { NULL }
  410. };
  411. static const AVFilterPad afir_outputs[] = {
  412. {
  413. .name = "default",
  414. .type = AVMEDIA_TYPE_AUDIO,
  415. .config_props = config_output,
  416. .request_frame = request_frame,
  417. },
  418. { NULL }
  419. };
  420. #define AF AV_OPT_FLAG_AUDIO_PARAM|AV_OPT_FLAG_FILTERING_PARAM
  421. #define OFFSET(x) offsetof(AudioFIRContext, x)
  422. static const AVOption afir_options[] = {
  423. { "dry", "set dry gain", OFFSET(dry_gain), AV_OPT_TYPE_FLOAT, {.dbl=1}, 0, 1, AF },
  424. { "wet", "set wet gain", OFFSET(wet_gain), AV_OPT_TYPE_FLOAT, {.dbl=1}, 0, 1, AF },
  425. { "length", "set IR length", OFFSET(length), AV_OPT_TYPE_FLOAT, {.dbl=1}, 0, 1, AF },
  426. { "again", "enable auto gain", OFFSET(again), AV_OPT_TYPE_BOOL, {.i64=1}, 0, 1, AF },
  427. { NULL }
  428. };
  429. AVFILTER_DEFINE_CLASS(afir);
  430. AVFilter ff_af_afir = {
  431. .name = "afir",
  432. .description = NULL_IF_CONFIG_SMALL("Apply Finite Impulse Response filter with supplied coefficients in 2nd stream."),
  433. .priv_size = sizeof(AudioFIRContext),
  434. .priv_class = &afir_class,
  435. .query_formats = query_formats,
  436. .init = init,
  437. .uninit = uninit,
  438. .inputs = afir_inputs,
  439. .outputs = afir_outputs,
  440. .flags = AVFILTER_FLAG_SLICE_THREADS,
  441. };