You can not select more than 25 topics Topics must start with a letter or number, can include dashes ('-') and can be up to 35 characters long.

543 lines
18KB

  1. /*
  2. * Copyright (c) 2016 Paul B Mahol
  3. *
  4. * This file is part of FFmpeg.
  5. *
  6. * FFmpeg is free software; you can redistribute it and/or
  7. * modify it under the terms of the GNU Lesser General Public
  8. * License as published by the Free Software Foundation; either
  9. * version 2.1 of the License, or (at your option) any later version.
  10. *
  11. * FFmpeg is distributed in the hope that it will be useful,
  12. * but WITHOUT ANY WARRANTY; without even the implied warranty of
  13. * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
  14. * Lesser General Public License for more details.
  15. *
  16. * You should have received a copy of the GNU Lesser General Public
  17. * License along with FFmpeg; if not, write to the Free Software
  18. * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
  19. */
  20. /**
  21. * @file
  22. * SpectrumSynth filter
  23. * @todo support float pixel format
  24. */
  25. #include "libavcodec/avfft.h"
  26. #include "libavutil/avassert.h"
  27. #include "libavutil/channel_layout.h"
  28. #include "libavutil/ffmath.h"
  29. #include "libavutil/opt.h"
  30. #include "libavutil/parseutils.h"
  31. #include "avfilter.h"
  32. #include "formats.h"
  33. #include "audio.h"
  34. #include "video.h"
  35. #include "filters.h"
  36. #include "internal.h"
  37. #include "window_func.h"
  38. enum MagnitudeScale { LINEAR, LOG, NB_SCALES };
  39. enum SlideMode { REPLACE, SCROLL, FULLFRAME, RSCROLL, NB_SLIDES };
  40. enum Orientation { VERTICAL, HORIZONTAL, NB_ORIENTATIONS };
  41. typedef struct SpectrumSynthContext {
  42. const AVClass *class;
  43. int sample_rate;
  44. int channels;
  45. int scale;
  46. int sliding;
  47. int win_func;
  48. float overlap;
  49. int orientation;
  50. AVFrame *magnitude, *phase;
  51. FFTContext *fft; ///< Fast Fourier Transform context
  52. int fft_bits; ///< number of bits (FFT window size = 1<<fft_bits)
  53. FFTComplex **fft_data; ///< bins holder for each (displayed) channels
  54. int win_size;
  55. int size;
  56. int nb_freq;
  57. int hop_size;
  58. int start, end;
  59. int xpos;
  60. int xend;
  61. int64_t pts;
  62. float factor;
  63. AVFrame *buffer;
  64. float *window_func_lut; ///< Window function LUT
  65. } SpectrumSynthContext;
  66. #define OFFSET(x) offsetof(SpectrumSynthContext, x)
  67. #define A AV_OPT_FLAG_FILTERING_PARAM|AV_OPT_FLAG_AUDIO_PARAM
  68. #define V AV_OPT_FLAG_FILTERING_PARAM|AV_OPT_FLAG_VIDEO_PARAM
  69. static const AVOption spectrumsynth_options[] = {
  70. { "sample_rate", "set sample rate", OFFSET(sample_rate), AV_OPT_TYPE_INT, {.i64 = 44100}, 15, INT_MAX, A },
  71. { "channels", "set channels", OFFSET(channels), AV_OPT_TYPE_INT, {.i64 = 1}, 1, 8, A },
  72. { "scale", "set input amplitude scale", OFFSET(scale), AV_OPT_TYPE_INT, {.i64 = LOG}, 0, NB_SCALES-1, V, "scale" },
  73. { "lin", "linear", 0, AV_OPT_TYPE_CONST, {.i64=LINEAR}, 0, 0, V, "scale" },
  74. { "log", "logarithmic", 0, AV_OPT_TYPE_CONST, {.i64=LOG}, 0, 0, V, "scale" },
  75. { "slide", "set input sliding mode", OFFSET(sliding), AV_OPT_TYPE_INT, {.i64 = FULLFRAME}, 0, NB_SLIDES-1, V, "slide" },
  76. { "replace", "consume old columns with new", 0, AV_OPT_TYPE_CONST, {.i64=REPLACE}, 0, 0, V, "slide" },
  77. { "scroll", "consume only most right column", 0, AV_OPT_TYPE_CONST, {.i64=SCROLL}, 0, 0, V, "slide" },
  78. { "fullframe", "consume full frames", 0, AV_OPT_TYPE_CONST, {.i64=FULLFRAME}, 0, 0, V, "slide" },
  79. { "rscroll", "consume only most left column", 0, AV_OPT_TYPE_CONST, {.i64=RSCROLL}, 0, 0, V, "slide" },
  80. { "win_func", "set window function", OFFSET(win_func), AV_OPT_TYPE_INT, {.i64 = 0}, 0, NB_WFUNC-1, A, "win_func" },
  81. { "rect", "Rectangular", 0, AV_OPT_TYPE_CONST, {.i64=WFUNC_RECT}, 0, 0, A, "win_func" },
  82. { "bartlett", "Bartlett", 0, AV_OPT_TYPE_CONST, {.i64=WFUNC_BARTLETT}, 0, 0, A, "win_func" },
  83. { "hann", "Hann", 0, AV_OPT_TYPE_CONST, {.i64=WFUNC_HANNING}, 0, 0, A, "win_func" },
  84. { "hanning", "Hanning", 0, AV_OPT_TYPE_CONST, {.i64=WFUNC_HANNING}, 0, 0, A, "win_func" },
  85. { "hamming", "Hamming", 0, AV_OPT_TYPE_CONST, {.i64=WFUNC_HAMMING}, 0, 0, A, "win_func" },
  86. { "sine", "Sine", 0, AV_OPT_TYPE_CONST, {.i64=WFUNC_SINE}, 0, 0, A, "win_func" },
  87. { "overlap", "set window overlap", OFFSET(overlap), AV_OPT_TYPE_FLOAT, {.dbl=1}, 0, 1, A },
  88. { "orientation", "set orientation", OFFSET(orientation), AV_OPT_TYPE_INT, {.i64=VERTICAL}, 0, NB_ORIENTATIONS-1, V, "orientation" },
  89. { "vertical", NULL, 0, AV_OPT_TYPE_CONST, {.i64=VERTICAL}, 0, 0, V, "orientation" },
  90. { "horizontal", NULL, 0, AV_OPT_TYPE_CONST, {.i64=HORIZONTAL}, 0, 0, V, "orientation" },
  91. { NULL }
  92. };
  93. AVFILTER_DEFINE_CLASS(spectrumsynth);
  94. static int query_formats(AVFilterContext *ctx)
  95. {
  96. SpectrumSynthContext *s = ctx->priv;
  97. AVFilterFormats *formats = NULL;
  98. AVFilterChannelLayouts *layout = NULL;
  99. AVFilterLink *magnitude = ctx->inputs[0];
  100. AVFilterLink *phase = ctx->inputs[1];
  101. AVFilterLink *outlink = ctx->outputs[0];
  102. static const enum AVSampleFormat sample_fmts[] = { AV_SAMPLE_FMT_FLTP, AV_SAMPLE_FMT_NONE };
  103. static const enum AVPixelFormat pix_fmts[] = { AV_PIX_FMT_GRAY8, AV_PIX_FMT_GRAY16,
  104. AV_PIX_FMT_YUV444P, AV_PIX_FMT_YUVJ444P,
  105. AV_PIX_FMT_YUV444P16, AV_PIX_FMT_NONE };
  106. int ret, sample_rates[] = { 48000, -1 };
  107. formats = ff_make_format_list(sample_fmts);
  108. if ((ret = ff_formats_ref (formats, &outlink->incfg.formats )) < 0 ||
  109. (ret = ff_add_channel_layout (&layout, FF_COUNT2LAYOUT(s->channels))) < 0 ||
  110. (ret = ff_channel_layouts_ref (layout , &outlink->incfg.channel_layouts)) < 0)
  111. return ret;
  112. sample_rates[0] = s->sample_rate;
  113. formats = ff_make_format_list(sample_rates);
  114. if (!formats)
  115. return AVERROR(ENOMEM);
  116. if ((ret = ff_formats_ref(formats, &outlink->incfg.samplerates)) < 0)
  117. return ret;
  118. formats = ff_make_format_list(pix_fmts);
  119. if (!formats)
  120. return AVERROR(ENOMEM);
  121. if ((ret = ff_formats_ref(formats, &magnitude->outcfg.formats)) < 0)
  122. return ret;
  123. formats = ff_make_format_list(pix_fmts);
  124. if (!formats)
  125. return AVERROR(ENOMEM);
  126. if ((ret = ff_formats_ref(formats, &phase->outcfg.formats)) < 0)
  127. return ret;
  128. return 0;
  129. }
  130. static int config_output(AVFilterLink *outlink)
  131. {
  132. AVFilterContext *ctx = outlink->src;
  133. SpectrumSynthContext *s = ctx->priv;
  134. int width = ctx->inputs[0]->w;
  135. int height = ctx->inputs[0]->h;
  136. AVRational time_base = ctx->inputs[0]->time_base;
  137. AVRational frame_rate = ctx->inputs[0]->frame_rate;
  138. int i, ch, fft_bits;
  139. float factor, overlap;
  140. outlink->sample_rate = s->sample_rate;
  141. outlink->time_base = (AVRational){1, s->sample_rate};
  142. if (width != ctx->inputs[1]->w ||
  143. height != ctx->inputs[1]->h) {
  144. av_log(ctx, AV_LOG_ERROR,
  145. "Magnitude and Phase sizes differ (%dx%d vs %dx%d).\n",
  146. width, height,
  147. ctx->inputs[1]->w, ctx->inputs[1]->h);
  148. return AVERROR_INVALIDDATA;
  149. } else if (av_cmp_q(time_base, ctx->inputs[1]->time_base) != 0) {
  150. av_log(ctx, AV_LOG_ERROR,
  151. "Magnitude and Phase time bases differ (%d/%d vs %d/%d).\n",
  152. time_base.num, time_base.den,
  153. ctx->inputs[1]->time_base.num,
  154. ctx->inputs[1]->time_base.den);
  155. return AVERROR_INVALIDDATA;
  156. } else if (av_cmp_q(frame_rate, ctx->inputs[1]->frame_rate) != 0) {
  157. av_log(ctx, AV_LOG_ERROR,
  158. "Magnitude and Phase framerates differ (%d/%d vs %d/%d).\n",
  159. frame_rate.num, frame_rate.den,
  160. ctx->inputs[1]->frame_rate.num,
  161. ctx->inputs[1]->frame_rate.den);
  162. return AVERROR_INVALIDDATA;
  163. }
  164. s->size = s->orientation == VERTICAL ? height / s->channels : width / s->channels;
  165. s->xend = s->orientation == VERTICAL ? width : height;
  166. for (fft_bits = 1; 1 << fft_bits < 2 * s->size; fft_bits++);
  167. s->win_size = 1 << fft_bits;
  168. s->nb_freq = 1 << (fft_bits - 1);
  169. s->fft = av_fft_init(fft_bits, 1);
  170. if (!s->fft) {
  171. av_log(ctx, AV_LOG_ERROR, "Unable to create FFT context. "
  172. "The window size might be too high.\n");
  173. return AVERROR(EINVAL);
  174. }
  175. s->fft_data = av_calloc(s->channels, sizeof(*s->fft_data));
  176. if (!s->fft_data)
  177. return AVERROR(ENOMEM);
  178. for (ch = 0; ch < s->channels; ch++) {
  179. s->fft_data[ch] = av_calloc(s->win_size, sizeof(**s->fft_data));
  180. if (!s->fft_data[ch])
  181. return AVERROR(ENOMEM);
  182. }
  183. s->buffer = ff_get_audio_buffer(outlink, s->win_size * 2);
  184. if (!s->buffer)
  185. return AVERROR(ENOMEM);
  186. /* pre-calc windowing function */
  187. s->window_func_lut = av_realloc_f(s->window_func_lut, s->win_size,
  188. sizeof(*s->window_func_lut));
  189. if (!s->window_func_lut)
  190. return AVERROR(ENOMEM);
  191. generate_window_func(s->window_func_lut, s->win_size, s->win_func, &overlap);
  192. if (s->overlap == 1)
  193. s->overlap = overlap;
  194. s->hop_size = (1 - s->overlap) * s->win_size;
  195. for (factor = 0, i = 0; i < s->win_size; i++) {
  196. factor += s->window_func_lut[i] * s->window_func_lut[i];
  197. }
  198. s->factor = (factor / s->win_size) / FFMAX(1 / (1 - s->overlap) - 1, 1);
  199. return 0;
  200. }
  201. static void read16_fft_bin(SpectrumSynthContext *s,
  202. int x, int y, int f, int ch)
  203. {
  204. const int m_linesize = s->magnitude->linesize[0];
  205. const int p_linesize = s->phase->linesize[0];
  206. const uint16_t *m = (uint16_t *)(s->magnitude->data[0] + y * m_linesize);
  207. const uint16_t *p = (uint16_t *)(s->phase->data[0] + y * p_linesize);
  208. float magnitude, phase;
  209. switch (s->scale) {
  210. case LINEAR:
  211. magnitude = m[x] / (double)UINT16_MAX;
  212. break;
  213. case LOG:
  214. magnitude = ff_exp10(((m[x] / (double)UINT16_MAX) - 1.) * 6.);
  215. break;
  216. default:
  217. av_assert0(0);
  218. }
  219. phase = ((p[x] / (double)UINT16_MAX) * 2. - 1.) * M_PI;
  220. s->fft_data[ch][f].re = magnitude * cos(phase);
  221. s->fft_data[ch][f].im = magnitude * sin(phase);
  222. }
  223. static void read8_fft_bin(SpectrumSynthContext *s,
  224. int x, int y, int f, int ch)
  225. {
  226. const int m_linesize = s->magnitude->linesize[0];
  227. const int p_linesize = s->phase->linesize[0];
  228. const uint8_t *m = (uint8_t *)(s->magnitude->data[0] + y * m_linesize);
  229. const uint8_t *p = (uint8_t *)(s->phase->data[0] + y * p_linesize);
  230. float magnitude, phase;
  231. switch (s->scale) {
  232. case LINEAR:
  233. magnitude = m[x] / (double)UINT8_MAX;
  234. break;
  235. case LOG:
  236. magnitude = ff_exp10(((m[x] / (double)UINT8_MAX) - 1.) * 6.);
  237. break;
  238. default:
  239. av_assert0(0);
  240. }
  241. phase = ((p[x] / (double)UINT8_MAX) * 2. - 1.) * M_PI;
  242. s->fft_data[ch][f].re = magnitude * cos(phase);
  243. s->fft_data[ch][f].im = magnitude * sin(phase);
  244. }
  245. static void read_fft_data(AVFilterContext *ctx, int x, int h, int ch)
  246. {
  247. SpectrumSynthContext *s = ctx->priv;
  248. AVFilterLink *inlink = ctx->inputs[0];
  249. int start = h * (s->channels - ch) - 1;
  250. int end = h * (s->channels - ch - 1);
  251. int y, f;
  252. switch (s->orientation) {
  253. case VERTICAL:
  254. switch (inlink->format) {
  255. case AV_PIX_FMT_YUV444P16:
  256. case AV_PIX_FMT_GRAY16:
  257. for (y = start, f = 0; y >= end; y--, f++) {
  258. read16_fft_bin(s, x, y, f, ch);
  259. }
  260. break;
  261. case AV_PIX_FMT_YUVJ444P:
  262. case AV_PIX_FMT_YUV444P:
  263. case AV_PIX_FMT_GRAY8:
  264. for (y = start, f = 0; y >= end; y--, f++) {
  265. read8_fft_bin(s, x, y, f, ch);
  266. }
  267. break;
  268. }
  269. break;
  270. case HORIZONTAL:
  271. switch (inlink->format) {
  272. case AV_PIX_FMT_YUV444P16:
  273. case AV_PIX_FMT_GRAY16:
  274. for (y = end, f = 0; y <= start; y++, f++) {
  275. read16_fft_bin(s, y, x, f, ch);
  276. }
  277. break;
  278. case AV_PIX_FMT_YUVJ444P:
  279. case AV_PIX_FMT_YUV444P:
  280. case AV_PIX_FMT_GRAY8:
  281. for (y = end, f = 0; y <= start; y++, f++) {
  282. read8_fft_bin(s, y, x, f, ch);
  283. }
  284. break;
  285. }
  286. break;
  287. }
  288. }
  289. static void synth_window(AVFilterContext *ctx, int x)
  290. {
  291. SpectrumSynthContext *s = ctx->priv;
  292. const int h = s->size;
  293. int nb = s->win_size;
  294. int y, f, ch;
  295. for (ch = 0; ch < s->channels; ch++) {
  296. read_fft_data(ctx, x, h, ch);
  297. for (y = h; y <= s->nb_freq; y++) {
  298. s->fft_data[ch][y].re = 0;
  299. s->fft_data[ch][y].im = 0;
  300. }
  301. for (y = s->nb_freq + 1, f = s->nb_freq - 1; y < nb; y++, f--) {
  302. s->fft_data[ch][y].re = s->fft_data[ch][f].re;
  303. s->fft_data[ch][y].im = -s->fft_data[ch][f].im;
  304. }
  305. av_fft_permute(s->fft, s->fft_data[ch]);
  306. av_fft_calc(s->fft, s->fft_data[ch]);
  307. }
  308. }
  309. static int try_push_frame(AVFilterContext *ctx, int x)
  310. {
  311. SpectrumSynthContext *s = ctx->priv;
  312. AVFilterLink *outlink = ctx->outputs[0];
  313. const float factor = s->factor;
  314. int ch, n, i, ret;
  315. int start, end;
  316. AVFrame *out;
  317. synth_window(ctx, x);
  318. for (ch = 0; ch < s->channels; ch++) {
  319. float *buf = (float *)s->buffer->extended_data[ch];
  320. int j, k;
  321. start = s->start;
  322. end = s->end;
  323. k = end;
  324. for (i = 0, j = start; j < k && i < s->win_size; i++, j++) {
  325. buf[j] += s->fft_data[ch][i].re;
  326. }
  327. for (; i < s->win_size; i++, j++) {
  328. buf[j] = s->fft_data[ch][i].re;
  329. }
  330. start += s->hop_size;
  331. end = j;
  332. if (start >= s->win_size) {
  333. start -= s->win_size;
  334. end -= s->win_size;
  335. if (ch == s->channels - 1) {
  336. float *dst;
  337. int c;
  338. out = ff_get_audio_buffer(outlink, s->win_size);
  339. if (!out) {
  340. av_frame_free(&s->magnitude);
  341. av_frame_free(&s->phase);
  342. return AVERROR(ENOMEM);
  343. }
  344. out->pts = s->pts;
  345. s->pts += s->win_size;
  346. for (c = 0; c < s->channels; c++) {
  347. dst = (float *)out->extended_data[c];
  348. buf = (float *)s->buffer->extended_data[c];
  349. for (n = 0; n < s->win_size; n++) {
  350. dst[n] = buf[n] * factor;
  351. }
  352. memmove(buf, buf + s->win_size, s->win_size * 4);
  353. }
  354. ret = ff_filter_frame(outlink, out);
  355. if (ret < 0)
  356. return ret;
  357. }
  358. }
  359. }
  360. s->start = start;
  361. s->end = end;
  362. return 0;
  363. }
  364. static int try_push_frames(AVFilterContext *ctx)
  365. {
  366. SpectrumSynthContext *s = ctx->priv;
  367. int ret, x;
  368. if (!(s->magnitude && s->phase))
  369. return 0;
  370. switch (s->sliding) {
  371. case REPLACE:
  372. ret = try_push_frame(ctx, s->xpos);
  373. s->xpos++;
  374. if (s->xpos >= s->xend)
  375. s->xpos = 0;
  376. break;
  377. case SCROLL:
  378. s->xpos = s->xend - 1;
  379. ret = try_push_frame(ctx, s->xpos);
  380. break;
  381. case RSCROLL:
  382. s->xpos = 0;
  383. ret = try_push_frame(ctx, s->xpos);
  384. break;
  385. case FULLFRAME:
  386. for (x = 0; x < s->xend; x++) {
  387. ret = try_push_frame(ctx, x);
  388. if (ret < 0)
  389. break;
  390. }
  391. break;
  392. default:
  393. av_assert0(0);
  394. }
  395. av_frame_free(&s->magnitude);
  396. av_frame_free(&s->phase);
  397. return ret;
  398. }
  399. static int activate(AVFilterContext *ctx)
  400. {
  401. SpectrumSynthContext *s = ctx->priv;
  402. AVFrame **staging[2] = { &s->magnitude, &s->phase };
  403. int64_t pts;
  404. int i, ret;
  405. FF_FILTER_FORWARD_STATUS_BACK_ALL(ctx->outputs[0], ctx);
  406. for (i = 0; i < 2; i++) {
  407. if (*staging[i])
  408. continue;
  409. ret = ff_inlink_consume_frame(ctx->inputs[i], staging[i]);
  410. if (ret < 0)
  411. return ret;
  412. if (ret) {
  413. ff_filter_set_ready(ctx, 10);
  414. return try_push_frames(ctx);
  415. }
  416. }
  417. for (i = 0; i < 2; i++) {
  418. if (ff_inlink_acknowledge_status(ctx->inputs[i], &ret, &pts)) {
  419. ff_outlink_set_status(ctx->outputs[0], ret, pts);
  420. ff_inlink_set_status(ctx->inputs[1 - i], ret);
  421. return 0;
  422. }
  423. }
  424. if (ff_outlink_frame_wanted(ctx->outputs[0])) {
  425. for (i = 0; i < 2; i++) {
  426. if (!*staging[i])
  427. ff_inlink_request_frame(ctx->inputs[i]);
  428. }
  429. }
  430. return FFERROR_NOT_READY;
  431. }
  432. static av_cold void uninit(AVFilterContext *ctx)
  433. {
  434. SpectrumSynthContext *s = ctx->priv;
  435. int i;
  436. av_frame_free(&s->magnitude);
  437. av_frame_free(&s->phase);
  438. av_frame_free(&s->buffer);
  439. av_fft_end(s->fft);
  440. if (s->fft_data) {
  441. for (i = 0; i < s->channels; i++)
  442. av_freep(&s->fft_data[i]);
  443. }
  444. av_freep(&s->fft_data);
  445. av_freep(&s->window_func_lut);
  446. }
  447. static const AVFilterPad spectrumsynth_inputs[] = {
  448. {
  449. .name = "magnitude",
  450. .type = AVMEDIA_TYPE_VIDEO,
  451. },
  452. {
  453. .name = "phase",
  454. .type = AVMEDIA_TYPE_VIDEO,
  455. },
  456. { NULL }
  457. };
  458. static const AVFilterPad spectrumsynth_outputs[] = {
  459. {
  460. .name = "default",
  461. .type = AVMEDIA_TYPE_AUDIO,
  462. .config_props = config_output,
  463. },
  464. { NULL }
  465. };
  466. AVFilter ff_vaf_spectrumsynth = {
  467. .name = "spectrumsynth",
  468. .description = NULL_IF_CONFIG_SMALL("Convert input spectrum videos to audio output."),
  469. .uninit = uninit,
  470. .query_formats = query_formats,
  471. .activate = activate,
  472. .priv_size = sizeof(SpectrumSynthContext),
  473. .inputs = spectrumsynth_inputs,
  474. .outputs = spectrumsynth_outputs,
  475. .priv_class = &spectrumsynth_class,
  476. };