You can not select more than 25 topics Topics must start with a letter or number, can include dashes ('-') and can be up to 35 characters long.

760 lines
23KB

  1. /*
  2. * Copyright (C) 2017 Paul B Mahol
  3. * Copyright (C) 2013-2015 Andreas Fuchs, Wolfgang Hrauda
  4. * This file is part of FFmpeg.
  5. *
  6. * FFmpeg is free software; you can redistribute it and/or
  7. * modify it under the terms of the GNU Lesser General Public
  8. * License as published by the Free Software Foundation; either
  9. * version 2.1 of the License, or (at your option) any later version.
  10. *
  11. * FFmpeg is distributed in the hope that it will be useful,
  12. * but WITHOUT ANY WARRANTY; without even the implied warranty of
  13. * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
  14. * Lesser General Public License for more details.
  15. *
  16. * You should have received a copy of the GNU Lesser General Public
  17. * License along with FFmpeg; if not, write to the Free Software
  18. * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
  19. */
  20. #include <math.h>
  21. #include "libavutil/avstring.h"
  22. #include "libavutil/channel_layout.h"
  23. #include "libavutil/float_dsp.h"
  24. #include "libavutil/intmath.h"
  25. #include "libavutil/opt.h"
  26. #include "libavcodec/avfft.h"
  27. #include "avfilter.h"
  28. #include "filters.h"
  29. #include "internal.h"
  30. #include "audio.h"
  31. #define TIME_DOMAIN 0
  32. #define FREQUENCY_DOMAIN 1
  33. #define HRIR_STEREO 0
  34. #define HRIR_MULTI 1
  35. typedef struct HeadphoneContext {
  36. const AVClass *class;
  37. char *map;
  38. int type;
  39. int lfe_channel;
  40. int have_hrirs;
  41. int eof_hrirs;
  42. int ir_len;
  43. int air_len;
  44. int nb_hrir_inputs;
  45. int nb_irs;
  46. float gain;
  47. float lfe_gain, gain_lfe;
  48. float *ringbuffer[2];
  49. int write[2];
  50. int buffer_length;
  51. int n_fft;
  52. int size;
  53. int hrir_fmt;
  54. float *data_ir[2];
  55. float *temp_src[2];
  56. FFTComplex *temp_fft[2];
  57. FFTComplex *temp_afft[2];
  58. FFTContext *fft[2], *ifft[2];
  59. FFTComplex *data_hrtf[2];
  60. float (*scalarproduct_float)(const float *v1, const float *v2, int len);
  61. struct hrir_inputs {
  62. int ir_len;
  63. int eof;
  64. } hrir_in[64];
  65. uint64_t mapping[64];
  66. } HeadphoneContext;
  67. static int parse_channel_name(const char *arg, uint64_t *rchannel)
  68. {
  69. uint64_t layout = av_get_channel_layout(arg);
  70. if (av_get_channel_layout_nb_channels(layout) != 1)
  71. return AVERROR(EINVAL);
  72. *rchannel = layout;
  73. return 0;
  74. }
  75. static void parse_map(AVFilterContext *ctx)
  76. {
  77. HeadphoneContext *s = ctx->priv;
  78. char *arg, *tokenizer, *p;
  79. uint64_t used_channels = 0;
  80. p = s->map;
  81. while ((arg = av_strtok(p, "|", &tokenizer))) {
  82. uint64_t out_channel;
  83. p = NULL;
  84. if (parse_channel_name(arg, &out_channel)) {
  85. av_log(ctx, AV_LOG_WARNING, "Failed to parse \'%s\' as channel name.\n", arg);
  86. continue;
  87. }
  88. if (used_channels & out_channel) {
  89. av_log(ctx, AV_LOG_WARNING, "Ignoring duplicate channel '%s'.\n", arg);
  90. continue;
  91. }
  92. used_channels |= out_channel;
  93. s->mapping[s->nb_irs] = out_channel;
  94. s->nb_irs++;
  95. }
  96. if (s->hrir_fmt == HRIR_MULTI)
  97. s->nb_hrir_inputs = 1;
  98. else
  99. s->nb_hrir_inputs = s->nb_irs;
  100. }
  101. typedef struct ThreadData {
  102. AVFrame *in, *out;
  103. int *write;
  104. float **ir;
  105. int *n_clippings;
  106. float **ringbuffer;
  107. float **temp_src;
  108. FFTComplex **temp_fft;
  109. FFTComplex **temp_afft;
  110. } ThreadData;
  111. static int headphone_convolute(AVFilterContext *ctx, void *arg, int jobnr, int nb_jobs)
  112. {
  113. HeadphoneContext *s = ctx->priv;
  114. ThreadData *td = arg;
  115. AVFrame *in = td->in, *out = td->out;
  116. int offset = jobnr;
  117. int *write = &td->write[jobnr];
  118. const float *const ir = td->ir[jobnr];
  119. int *n_clippings = &td->n_clippings[jobnr];
  120. float *ringbuffer = td->ringbuffer[jobnr];
  121. float *temp_src = td->temp_src[jobnr];
  122. const int ir_len = s->ir_len;
  123. const int air_len = s->air_len;
  124. const float *src = (const float *)in->data[0];
  125. float *dst = (float *)out->data[0];
  126. const int in_channels = in->channels;
  127. const int buffer_length = s->buffer_length;
  128. const uint32_t modulo = (uint32_t)buffer_length - 1;
  129. float *buffer[64];
  130. int wr = *write;
  131. int read;
  132. int i, l;
  133. dst += offset;
  134. for (l = 0; l < in_channels; l++) {
  135. buffer[l] = ringbuffer + l * buffer_length;
  136. }
  137. for (i = 0; i < in->nb_samples; i++) {
  138. const float *cur_ir = ir;
  139. *dst = 0;
  140. for (l = 0; l < in_channels; l++) {
  141. *(buffer[l] + wr) = src[l];
  142. }
  143. for (l = 0; l < in_channels; cur_ir += air_len, l++) {
  144. const float *const bptr = buffer[l];
  145. if (l == s->lfe_channel) {
  146. *dst += *(buffer[s->lfe_channel] + wr) * s->gain_lfe;
  147. continue;
  148. }
  149. read = (wr - (ir_len - 1)) & modulo;
  150. if (read + ir_len < buffer_length) {
  151. memcpy(temp_src, bptr + read, ir_len * sizeof(*temp_src));
  152. } else {
  153. int len = FFMIN(air_len - (read % ir_len), buffer_length - read);
  154. memcpy(temp_src, bptr + read, len * sizeof(*temp_src));
  155. memcpy(temp_src + len, bptr, (air_len - len) * sizeof(*temp_src));
  156. }
  157. dst[0] += s->scalarproduct_float(cur_ir, temp_src, FFALIGN(ir_len, 32));
  158. }
  159. if (fabsf(dst[0]) > 1)
  160. n_clippings[0]++;
  161. dst += 2;
  162. src += in_channels;
  163. wr = (wr + 1) & modulo;
  164. }
  165. *write = wr;
  166. return 0;
  167. }
  168. static int headphone_fast_convolute(AVFilterContext *ctx, void *arg, int jobnr, int nb_jobs)
  169. {
  170. HeadphoneContext *s = ctx->priv;
  171. ThreadData *td = arg;
  172. AVFrame *in = td->in, *out = td->out;
  173. int offset = jobnr;
  174. int *write = &td->write[jobnr];
  175. FFTComplex *hrtf = s->data_hrtf[jobnr];
  176. int *n_clippings = &td->n_clippings[jobnr];
  177. float *ringbuffer = td->ringbuffer[jobnr];
  178. const int ir_len = s->ir_len;
  179. const float *src = (const float *)in->data[0];
  180. float *dst = (float *)out->data[0];
  181. const int in_channels = in->channels;
  182. const int buffer_length = s->buffer_length;
  183. const uint32_t modulo = (uint32_t)buffer_length - 1;
  184. FFTComplex *fft_in = s->temp_fft[jobnr];
  185. FFTComplex *fft_acc = s->temp_afft[jobnr];
  186. FFTContext *ifft = s->ifft[jobnr];
  187. FFTContext *fft = s->fft[jobnr];
  188. const int n_fft = s->n_fft;
  189. const float fft_scale = 1.0f / s->n_fft;
  190. FFTComplex *hrtf_offset;
  191. int wr = *write;
  192. int n_read;
  193. int i, j;
  194. dst += offset;
  195. n_read = FFMIN(ir_len, in->nb_samples);
  196. for (j = 0; j < n_read; j++) {
  197. dst[2 * j] = ringbuffer[wr];
  198. ringbuffer[wr] = 0.0;
  199. wr = (wr + 1) & modulo;
  200. }
  201. for (j = n_read; j < in->nb_samples; j++) {
  202. dst[2 * j] = 0;
  203. }
  204. memset(fft_acc, 0, sizeof(FFTComplex) * n_fft);
  205. for (i = 0; i < in_channels; i++) {
  206. if (i == s->lfe_channel) {
  207. for (j = 0; j < in->nb_samples; j++) {
  208. dst[2 * j] += src[i + j * in_channels] * s->gain_lfe;
  209. }
  210. continue;
  211. }
  212. offset = i * n_fft;
  213. hrtf_offset = hrtf + offset;
  214. memset(fft_in, 0, sizeof(FFTComplex) * n_fft);
  215. for (j = 0; j < in->nb_samples; j++) {
  216. fft_in[j].re = src[j * in_channels + i];
  217. }
  218. av_fft_permute(fft, fft_in);
  219. av_fft_calc(fft, fft_in);
  220. for (j = 0; j < n_fft; j++) {
  221. const FFTComplex *hcomplex = hrtf_offset + j;
  222. const float re = fft_in[j].re;
  223. const float im = fft_in[j].im;
  224. fft_acc[j].re += re * hcomplex->re - im * hcomplex->im;
  225. fft_acc[j].im += re * hcomplex->im + im * hcomplex->re;
  226. }
  227. }
  228. av_fft_permute(ifft, fft_acc);
  229. av_fft_calc(ifft, fft_acc);
  230. for (j = 0; j < in->nb_samples; j++) {
  231. dst[2 * j] += fft_acc[j].re * fft_scale;
  232. if (fabsf(dst[2 * j]) > 1)
  233. n_clippings[0]++;
  234. }
  235. for (j = 0; j < ir_len - 1; j++) {
  236. int write_pos = (wr + j) & modulo;
  237. *(ringbuffer + write_pos) += fft_acc[in->nb_samples + j].re * fft_scale;
  238. }
  239. *write = wr;
  240. return 0;
  241. }
  242. static int check_ir(AVFilterLink *inlink, int input_number)
  243. {
  244. AVFilterContext *ctx = inlink->dst;
  245. HeadphoneContext *s = ctx->priv;
  246. int ir_len, max_ir_len;
  247. ir_len = ff_inlink_queued_samples(inlink);
  248. max_ir_len = 65536;
  249. if (ir_len > max_ir_len) {
  250. av_log(ctx, AV_LOG_ERROR, "Too big length of IRs: %d > %d.\n", ir_len, max_ir_len);
  251. return AVERROR(EINVAL);
  252. }
  253. s->hrir_in[input_number].ir_len = ir_len;
  254. s->ir_len = FFMAX(ir_len, s->ir_len);
  255. return 0;
  256. }
  257. static int headphone_frame(HeadphoneContext *s, AVFrame *in, AVFilterLink *outlink)
  258. {
  259. AVFilterContext *ctx = outlink->src;
  260. int n_clippings[2] = { 0 };
  261. ThreadData td;
  262. AVFrame *out;
  263. out = ff_get_audio_buffer(outlink, in->nb_samples);
  264. if (!out) {
  265. av_frame_free(&in);
  266. return AVERROR(ENOMEM);
  267. }
  268. out->pts = in->pts;
  269. td.in = in; td.out = out; td.write = s->write;
  270. td.ir = s->data_ir; td.n_clippings = n_clippings;
  271. td.ringbuffer = s->ringbuffer; td.temp_src = s->temp_src;
  272. td.temp_fft = s->temp_fft;
  273. td.temp_afft = s->temp_afft;
  274. if (s->type == TIME_DOMAIN) {
  275. ctx->internal->execute(ctx, headphone_convolute, &td, NULL, 2);
  276. } else {
  277. ctx->internal->execute(ctx, headphone_fast_convolute, &td, NULL, 2);
  278. }
  279. emms_c();
  280. if (n_clippings[0] + n_clippings[1] > 0) {
  281. av_log(ctx, AV_LOG_WARNING, "%d of %d samples clipped. Please reduce gain.\n",
  282. n_clippings[0] + n_clippings[1], out->nb_samples * 2);
  283. }
  284. av_frame_free(&in);
  285. return ff_filter_frame(outlink, out);
  286. }
  287. static int convert_coeffs(AVFilterContext *ctx, AVFilterLink *inlink)
  288. {
  289. struct HeadphoneContext *s = ctx->priv;
  290. const int ir_len = s->ir_len;
  291. int nb_input_channels = ctx->inputs[0]->channels;
  292. float gain_lin = expf((s->gain - 3 * nb_input_channels) / 20 * M_LN10);
  293. AVFrame *frame;
  294. int ret = 0;
  295. int n_fft;
  296. int i, j, k;
  297. s->air_len = 1 << (32 - ff_clz(ir_len));
  298. if (s->type == TIME_DOMAIN) {
  299. s->air_len = FFALIGN(s->air_len, 32);
  300. }
  301. s->buffer_length = 1 << (32 - ff_clz(s->air_len));
  302. s->n_fft = n_fft = 1 << (32 - ff_clz(ir_len + s->size));
  303. if (s->type == FREQUENCY_DOMAIN) {
  304. s->fft[0] = av_fft_init(av_log2(s->n_fft), 0);
  305. s->fft[1] = av_fft_init(av_log2(s->n_fft), 0);
  306. s->ifft[0] = av_fft_init(av_log2(s->n_fft), 1);
  307. s->ifft[1] = av_fft_init(av_log2(s->n_fft), 1);
  308. if (!s->fft[0] || !s->fft[1] || !s->ifft[0] || !s->ifft[1]) {
  309. av_log(ctx, AV_LOG_ERROR, "Unable to create FFT contexts of size %d.\n", s->n_fft);
  310. ret = AVERROR(ENOMEM);
  311. goto fail;
  312. }
  313. }
  314. if (s->type == TIME_DOMAIN) {
  315. s->ringbuffer[0] = av_calloc(s->buffer_length, sizeof(float) * nb_input_channels);
  316. s->ringbuffer[1] = av_calloc(s->buffer_length, sizeof(float) * nb_input_channels);
  317. } else {
  318. s->ringbuffer[0] = av_calloc(s->buffer_length, sizeof(float));
  319. s->ringbuffer[1] = av_calloc(s->buffer_length, sizeof(float));
  320. s->temp_fft[0] = av_calloc(s->n_fft, sizeof(FFTComplex));
  321. s->temp_fft[1] = av_calloc(s->n_fft, sizeof(FFTComplex));
  322. s->temp_afft[0] = av_calloc(s->n_fft, sizeof(FFTComplex));
  323. s->temp_afft[1] = av_calloc(s->n_fft, sizeof(FFTComplex));
  324. if (!s->temp_fft[0] || !s->temp_fft[1] ||
  325. !s->temp_afft[0] || !s->temp_afft[1]) {
  326. ret = AVERROR(ENOMEM);
  327. goto fail;
  328. }
  329. }
  330. if (!s->ringbuffer[0] || !s->ringbuffer[1]) {
  331. ret = AVERROR(ENOMEM);
  332. goto fail;
  333. }
  334. if (s->type == TIME_DOMAIN) {
  335. s->temp_src[0] = av_calloc(s->air_len, sizeof(float));
  336. s->temp_src[1] = av_calloc(s->air_len, sizeof(float));
  337. s->data_ir[0] = av_calloc(nb_input_channels * s->air_len, sizeof(*s->data_ir[0]));
  338. s->data_ir[1] = av_calloc(nb_input_channels * s->air_len, sizeof(*s->data_ir[1]));
  339. if (!s->data_ir[0] || !s->data_ir[1] || !s->temp_src[0] || !s->temp_src[1]) {
  340. ret = AVERROR(ENOMEM);
  341. goto fail;
  342. }
  343. } else {
  344. s->data_hrtf[0] = av_calloc(n_fft, sizeof(*s->data_hrtf[0]) * nb_input_channels);
  345. s->data_hrtf[1] = av_calloc(n_fft, sizeof(*s->data_hrtf[1]) * nb_input_channels);
  346. if (!s->data_hrtf[0] || !s->data_hrtf[1]) {
  347. ret = AVERROR(ENOMEM);
  348. goto fail;
  349. }
  350. }
  351. for (i = 0; i < s->nb_hrir_inputs; av_frame_free(&frame), i++) {
  352. int len = s->hrir_in[i].ir_len;
  353. float *ptr;
  354. ret = ff_inlink_consume_samples(ctx->inputs[i + 1], len, len, &frame);
  355. if (ret < 0)
  356. goto fail;
  357. ptr = (float *)frame->extended_data[0];
  358. if (s->hrir_fmt == HRIR_STEREO) {
  359. int idx = av_get_channel_layout_channel_index(inlink->channel_layout,
  360. s->mapping[i]);
  361. if (idx < 0)
  362. continue;
  363. if (s->type == TIME_DOMAIN) {
  364. float *data_ir_l = s->data_ir[0] + idx * s->air_len;
  365. float *data_ir_r = s->data_ir[1] + idx * s->air_len;
  366. for (j = 0; j < len; j++) {
  367. data_ir_l[j] = ptr[len * 2 - j * 2 - 2] * gain_lin;
  368. data_ir_r[j] = ptr[len * 2 - j * 2 - 1] * gain_lin;
  369. }
  370. } else {
  371. FFTComplex *fft_in_l = s->data_hrtf[0] + idx * n_fft;
  372. FFTComplex *fft_in_r = s->data_hrtf[1] + idx * n_fft;
  373. for (j = 0; j < len; j++) {
  374. fft_in_l[j].re = ptr[j * 2 ] * gain_lin;
  375. fft_in_r[j].re = ptr[j * 2 + 1] * gain_lin;
  376. }
  377. av_fft_permute(s->fft[0], fft_in_l);
  378. av_fft_calc(s->fft[0], fft_in_l);
  379. av_fft_permute(s->fft[0], fft_in_r);
  380. av_fft_calc(s->fft[0], fft_in_r);
  381. }
  382. } else {
  383. int I, N = ctx->inputs[1]->channels;
  384. for (k = 0; k < N / 2; k++) {
  385. int idx = av_get_channel_layout_channel_index(inlink->channel_layout,
  386. s->mapping[k]);
  387. if (idx < 0)
  388. continue;
  389. I = k * 2;
  390. if (s->type == TIME_DOMAIN) {
  391. float *data_ir_l = s->data_ir[0] + idx * s->air_len;
  392. float *data_ir_r = s->data_ir[1] + idx * s->air_len;
  393. for (j = 0; j < len; j++) {
  394. data_ir_l[j] = ptr[len * N - j * N - N + I ] * gain_lin;
  395. data_ir_r[j] = ptr[len * N - j * N - N + I + 1] * gain_lin;
  396. }
  397. } else {
  398. FFTComplex *fft_in_l = s->data_hrtf[0] + idx * n_fft;
  399. FFTComplex *fft_in_r = s->data_hrtf[1] + idx * n_fft;
  400. for (j = 0; j < len; j++) {
  401. fft_in_l[j].re = ptr[j * N + I ] * gain_lin;
  402. fft_in_r[j].re = ptr[j * N + I + 1] * gain_lin;
  403. }
  404. av_fft_permute(s->fft[0], fft_in_l);
  405. av_fft_calc(s->fft[0], fft_in_l);
  406. av_fft_permute(s->fft[0], fft_in_r);
  407. av_fft_calc(s->fft[0], fft_in_r);
  408. }
  409. }
  410. }
  411. }
  412. s->have_hrirs = 1;
  413. fail:
  414. return ret;
  415. }
  416. static int activate(AVFilterContext *ctx)
  417. {
  418. HeadphoneContext *s = ctx->priv;
  419. AVFilterLink *inlink = ctx->inputs[0];
  420. AVFilterLink *outlink = ctx->outputs[0];
  421. AVFrame *in = NULL;
  422. int i, ret;
  423. FF_FILTER_FORWARD_STATUS_BACK_ALL(ctx->outputs[0], ctx);
  424. if (!s->eof_hrirs) {
  425. int eof = 1;
  426. for (i = 0; i < s->nb_hrir_inputs; i++) {
  427. AVFilterLink *input = ctx->inputs[i + 1];
  428. if (s->hrir_in[i].eof)
  429. continue;
  430. if ((ret = check_ir(input, i)) < 0)
  431. return ret;
  432. if (ff_outlink_get_status(input) == AVERROR_EOF) {
  433. if (!ff_inlink_queued_samples(input)) {
  434. av_log(ctx, AV_LOG_ERROR, "No samples provided for "
  435. "HRIR stream %d.\n", i);
  436. return AVERROR_INVALIDDATA;
  437. }
  438. s->hrir_in[i].eof = 1;
  439. } else {
  440. if (ff_outlink_frame_wanted(ctx->outputs[0]))
  441. ff_inlink_request_frame(input);
  442. eof = 0;
  443. }
  444. }
  445. if (!eof)
  446. return 0;
  447. s->eof_hrirs = 1;
  448. ret = convert_coeffs(ctx, inlink);
  449. if (ret < 0)
  450. return ret;
  451. } else if (!s->have_hrirs)
  452. return AVERROR_EOF;
  453. if ((ret = ff_inlink_consume_samples(ctx->inputs[0], s->size, s->size, &in)) > 0) {
  454. ret = headphone_frame(s, in, outlink);
  455. if (ret < 0)
  456. return ret;
  457. }
  458. if (ret < 0)
  459. return ret;
  460. FF_FILTER_FORWARD_STATUS(ctx->inputs[0], ctx->outputs[0]);
  461. if (ff_outlink_frame_wanted(ctx->outputs[0]))
  462. ff_inlink_request_frame(ctx->inputs[0]);
  463. return 0;
  464. }
  465. static int query_formats(AVFilterContext *ctx)
  466. {
  467. struct HeadphoneContext *s = ctx->priv;
  468. AVFilterFormats *formats = NULL;
  469. AVFilterChannelLayouts *layouts = NULL;
  470. AVFilterChannelLayouts *stereo_layout = NULL;
  471. AVFilterChannelLayouts *hrir_layouts = NULL;
  472. int ret, i;
  473. ret = ff_add_format(&formats, AV_SAMPLE_FMT_FLT);
  474. if (ret)
  475. return ret;
  476. ret = ff_set_common_formats(ctx, formats);
  477. if (ret)
  478. return ret;
  479. layouts = ff_all_channel_layouts();
  480. if (!layouts)
  481. return AVERROR(ENOMEM);
  482. ret = ff_channel_layouts_ref(layouts, &ctx->inputs[0]->outcfg.channel_layouts);
  483. if (ret)
  484. return ret;
  485. ret = ff_add_channel_layout(&stereo_layout, AV_CH_LAYOUT_STEREO);
  486. if (ret)
  487. return ret;
  488. ret = ff_channel_layouts_ref(stereo_layout, &ctx->outputs[0]->incfg.channel_layouts);
  489. if (ret)
  490. return ret;
  491. if (s->hrir_fmt == HRIR_MULTI) {
  492. hrir_layouts = ff_all_channel_counts();
  493. if (!hrir_layouts)
  494. return AVERROR(ENOMEM);
  495. ret = ff_channel_layouts_ref(hrir_layouts, &ctx->inputs[1]->outcfg.channel_layouts);
  496. if (ret)
  497. return ret;
  498. } else {
  499. for (i = 1; i <= s->nb_hrir_inputs; i++) {
  500. ret = ff_channel_layouts_ref(stereo_layout, &ctx->inputs[i]->outcfg.channel_layouts);
  501. if (ret)
  502. return ret;
  503. }
  504. }
  505. formats = ff_all_samplerates();
  506. if (!formats)
  507. return AVERROR(ENOMEM);
  508. return ff_set_common_samplerates(ctx, formats);
  509. }
  510. static int config_input(AVFilterLink *inlink)
  511. {
  512. AVFilterContext *ctx = inlink->dst;
  513. HeadphoneContext *s = ctx->priv;
  514. if (s->nb_irs < inlink->channels) {
  515. av_log(ctx, AV_LOG_ERROR, "Number of HRIRs must be >= %d.\n", inlink->channels);
  516. return AVERROR(EINVAL);
  517. }
  518. s->lfe_channel = av_get_channel_layout_channel_index(inlink->channel_layout,
  519. AV_CH_LOW_FREQUENCY);
  520. return 0;
  521. }
  522. static av_cold int init(AVFilterContext *ctx)
  523. {
  524. HeadphoneContext *s = ctx->priv;
  525. int i, ret;
  526. AVFilterPad pad = {
  527. .name = "in0",
  528. .type = AVMEDIA_TYPE_AUDIO,
  529. .config_props = config_input,
  530. };
  531. if ((ret = ff_insert_inpad(ctx, 0, &pad)) < 0)
  532. return ret;
  533. if (!s->map) {
  534. av_log(ctx, AV_LOG_ERROR, "Valid mapping must be set.\n");
  535. return AVERROR(EINVAL);
  536. }
  537. parse_map(ctx);
  538. for (i = 0; i < s->nb_hrir_inputs; i++) {
  539. char *name = av_asprintf("hrir%d", i);
  540. AVFilterPad pad = {
  541. .name = name,
  542. .type = AVMEDIA_TYPE_AUDIO,
  543. };
  544. if (!name)
  545. return AVERROR(ENOMEM);
  546. if ((ret = ff_insert_inpad(ctx, i + 1, &pad)) < 0) {
  547. av_freep(&pad.name);
  548. return ret;
  549. }
  550. }
  551. if (s->type == TIME_DOMAIN) {
  552. AVFloatDSPContext *fdsp = avpriv_float_dsp_alloc(0);
  553. if (!fdsp)
  554. return AVERROR(ENOMEM);
  555. s->scalarproduct_float = fdsp->scalarproduct_float;
  556. av_free(fdsp);
  557. }
  558. return 0;
  559. }
  560. static int config_output(AVFilterLink *outlink)
  561. {
  562. AVFilterContext *ctx = outlink->src;
  563. HeadphoneContext *s = ctx->priv;
  564. AVFilterLink *inlink = ctx->inputs[0];
  565. if (s->hrir_fmt == HRIR_MULTI) {
  566. AVFilterLink *hrir_link = ctx->inputs[1];
  567. if (hrir_link->channels < inlink->channels * 2) {
  568. av_log(ctx, AV_LOG_ERROR, "Number of channels in HRIR stream must be >= %d.\n", inlink->channels * 2);
  569. return AVERROR(EINVAL);
  570. }
  571. }
  572. s->gain_lfe = expf((s->gain - 3 * inlink->channels + s->lfe_gain) / 20 * M_LN10);
  573. return 0;
  574. }
  575. static av_cold void uninit(AVFilterContext *ctx)
  576. {
  577. HeadphoneContext *s = ctx->priv;
  578. av_fft_end(s->ifft[0]);
  579. av_fft_end(s->ifft[1]);
  580. av_fft_end(s->fft[0]);
  581. av_fft_end(s->fft[1]);
  582. av_freep(&s->data_ir[0]);
  583. av_freep(&s->data_ir[1]);
  584. av_freep(&s->ringbuffer[0]);
  585. av_freep(&s->ringbuffer[1]);
  586. av_freep(&s->temp_src[0]);
  587. av_freep(&s->temp_src[1]);
  588. av_freep(&s->temp_fft[0]);
  589. av_freep(&s->temp_fft[1]);
  590. av_freep(&s->temp_afft[0]);
  591. av_freep(&s->temp_afft[1]);
  592. av_freep(&s->data_hrtf[0]);
  593. av_freep(&s->data_hrtf[1]);
  594. for (unsigned i = 1; i < ctx->nb_inputs; i++)
  595. av_freep(&ctx->input_pads[i].name);
  596. }
  597. #define OFFSET(x) offsetof(HeadphoneContext, x)
  598. #define FLAGS AV_OPT_FLAG_AUDIO_PARAM|AV_OPT_FLAG_FILTERING_PARAM
  599. static const AVOption headphone_options[] = {
  600. { "map", "set channels convolution mappings", OFFSET(map), AV_OPT_TYPE_STRING, {.str=NULL}, .flags = FLAGS },
  601. { "gain", "set gain in dB", OFFSET(gain), AV_OPT_TYPE_FLOAT, {.dbl=0}, -20, 40, .flags = FLAGS },
  602. { "lfe", "set lfe gain in dB", OFFSET(lfe_gain), AV_OPT_TYPE_FLOAT, {.dbl=0}, -20, 40, .flags = FLAGS },
  603. { "type", "set processing", OFFSET(type), AV_OPT_TYPE_INT, {.i64=1}, 0, 1, .flags = FLAGS, "type" },
  604. { "time", "time domain", 0, AV_OPT_TYPE_CONST, {.i64=0}, 0, 0, .flags = FLAGS, "type" },
  605. { "freq", "frequency domain", 0, AV_OPT_TYPE_CONST, {.i64=1}, 0, 0, .flags = FLAGS, "type" },
  606. { "size", "set frame size", OFFSET(size), AV_OPT_TYPE_INT, {.i64=1024},1024,96000, .flags = FLAGS },
  607. { "hrir", "set hrir format", OFFSET(hrir_fmt), AV_OPT_TYPE_INT, {.i64=HRIR_STEREO}, 0, 1, .flags = FLAGS, "hrir" },
  608. { "stereo", "hrir files have exactly 2 channels", 0, AV_OPT_TYPE_CONST, {.i64=HRIR_STEREO}, 0, 0, .flags = FLAGS, "hrir" },
  609. { "multich", "single multichannel hrir file", 0, AV_OPT_TYPE_CONST, {.i64=HRIR_MULTI}, 0, 0, .flags = FLAGS, "hrir" },
  610. { NULL }
  611. };
  612. AVFILTER_DEFINE_CLASS(headphone);
  613. static const AVFilterPad outputs[] = {
  614. {
  615. .name = "default",
  616. .type = AVMEDIA_TYPE_AUDIO,
  617. .config_props = config_output,
  618. },
  619. { NULL }
  620. };
  621. AVFilter ff_af_headphone = {
  622. .name = "headphone",
  623. .description = NULL_IF_CONFIG_SMALL("Apply headphone binaural spatialization with HRTFs in additional streams."),
  624. .priv_size = sizeof(HeadphoneContext),
  625. .priv_class = &headphone_class,
  626. .init = init,
  627. .uninit = uninit,
  628. .query_formats = query_formats,
  629. .activate = activate,
  630. .inputs = NULL,
  631. .outputs = outputs,
  632. .flags = AVFILTER_FLAG_SLICE_THREADS | AVFILTER_FLAG_DYNAMIC_INPUTS,
  633. };