You can not select more than 25 topics Topics must start with a letter or number, can include dashes ('-') and can be up to 35 characters long.

438 lines
15KB

  1. /*
  2. * Copyright (c) 2015 Paul B Mahol
  3. *
  4. * This file is part of FFmpeg.
  5. *
  6. * FFmpeg is free software; you can redistribute it and/or
  7. * modify it under the terms of the GNU Lesser General Public
  8. * License as published by the Free Software Foundation; either
  9. * version 2.1 of the License, or (at your option) any later version.
  10. *
  11. * FFmpeg is distributed in the hope that it will be useful,
  12. * but WITHOUT ANY WARRANTY; without even the implied warranty of
  13. * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
  14. * Lesser General Public License for more details.
  15. *
  16. * You should have received a copy of the GNU Lesser General Public
  17. * License along with FFmpeg; if not, write to the Free Software
  18. * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
  19. */
  20. #include "libavutil/avassert.h"
  21. #include "libavutil/opt.h"
  22. #include "libavutil/parseutils.h"
  23. #include "avfilter.h"
  24. #include "filters.h"
  25. #include "formats.h"
  26. #include "audio.h"
  27. #include "video.h"
  28. #include "internal.h"
  29. enum DisplayScale { LINEAR, SQRT, CBRT, LOG, RLOG, NB_SCALES };
  30. enum AmplitudeScale { ALINEAR, ALOG, NB_ASCALES };
  31. enum SlideMode { REPLACE, SCROLL, NB_SLIDES };
  32. enum DisplayMode { SINGLE, SEPARATE, NB_DMODES };
  33. enum HistogramMode { ACCUMULATE, CURRENT, NB_HMODES };
  34. typedef struct AudioHistogramContext {
  35. const AVClass *class;
  36. AVFrame *out;
  37. int w, h;
  38. AVRational frame_rate;
  39. uint64_t *achistogram;
  40. uint64_t *shistogram;
  41. int ascale;
  42. int scale;
  43. float phisto;
  44. int histogram_h;
  45. int apos;
  46. int ypos;
  47. int slide;
  48. int dmode;
  49. int dchannels;
  50. int count;
  51. int frame_count;
  52. float *combine_buffer;
  53. AVFrame *in[101];
  54. int first;
  55. int nb_samples;
  56. } AudioHistogramContext;
  57. #define OFFSET(x) offsetof(AudioHistogramContext, x)
  58. #define FLAGS AV_OPT_FLAG_FILTERING_PARAM|AV_OPT_FLAG_VIDEO_PARAM
  59. static const AVOption ahistogram_options[] = {
  60. { "dmode", "set method to display channels", OFFSET(dmode), AV_OPT_TYPE_INT, {.i64=SINGLE}, 0, NB_DMODES-1, FLAGS, "dmode" },
  61. { "single", "all channels use single histogram", 0, AV_OPT_TYPE_CONST, {.i64=SINGLE}, 0, 0, FLAGS, "dmode" },
  62. { "separate", "each channel have own histogram", 0, AV_OPT_TYPE_CONST, {.i64=SEPARATE}, 0, 0, FLAGS, "dmode" },
  63. { "rate", "set video rate", OFFSET(frame_rate), AV_OPT_TYPE_VIDEO_RATE, {.str="25"}, 0, INT_MAX, FLAGS },
  64. { "r", "set video rate", OFFSET(frame_rate), AV_OPT_TYPE_VIDEO_RATE, {.str="25"}, 0, INT_MAX, FLAGS },
  65. { "size", "set video size", OFFSET(w), AV_OPT_TYPE_IMAGE_SIZE, {.str="hd720"}, 0, 0, FLAGS },
  66. { "s", "set video size", OFFSET(w), AV_OPT_TYPE_IMAGE_SIZE, {.str="hd720"}, 0, 0, FLAGS },
  67. { "scale", "set display scale", OFFSET(scale), AV_OPT_TYPE_INT, {.i64=LOG}, LINEAR, NB_SCALES-1, FLAGS, "scale" },
  68. { "log", "logarithmic", 0, AV_OPT_TYPE_CONST, {.i64=LOG}, 0, 0, FLAGS, "scale" },
  69. { "sqrt", "square root", 0, AV_OPT_TYPE_CONST, {.i64=SQRT}, 0, 0, FLAGS, "scale" },
  70. { "cbrt", "cubic root", 0, AV_OPT_TYPE_CONST, {.i64=CBRT}, 0, 0, FLAGS, "scale" },
  71. { "lin", "linear", 0, AV_OPT_TYPE_CONST, {.i64=LINEAR}, 0, 0, FLAGS, "scale" },
  72. { "rlog", "reverse logarithmic", 0, AV_OPT_TYPE_CONST, {.i64=RLOG}, 0, 0, FLAGS, "scale" },
  73. { "ascale", "set amplitude scale", OFFSET(ascale), AV_OPT_TYPE_INT, {.i64=ALOG}, LINEAR, NB_ASCALES-1, FLAGS, "ascale" },
  74. { "log", "logarithmic", 0, AV_OPT_TYPE_CONST, {.i64=ALOG}, 0, 0, FLAGS, "ascale" },
  75. { "lin", "linear", 0, AV_OPT_TYPE_CONST, {.i64=ALINEAR}, 0, 0, FLAGS, "ascale" },
  76. { "acount", "how much frames to accumulate", OFFSET(count), AV_OPT_TYPE_INT, {.i64=1}, -1, 100, FLAGS },
  77. { "rheight", "set histogram ratio of window height", OFFSET(phisto), AV_OPT_TYPE_FLOAT, {.dbl=0.10}, 0, 1, FLAGS },
  78. { "slide", "set sonogram sliding", OFFSET(slide), AV_OPT_TYPE_INT, {.i64=REPLACE}, 0, NB_SLIDES-1, FLAGS, "slide" },
  79. { "replace", "replace old rows with new", 0, AV_OPT_TYPE_CONST, {.i64=REPLACE}, 0, 0, FLAGS, "slide" },
  80. { "scroll", "scroll from top to bottom", 0, AV_OPT_TYPE_CONST, {.i64=SCROLL}, 0, 0, FLAGS, "slide" },
  81. { NULL }
  82. };
  83. AVFILTER_DEFINE_CLASS(ahistogram);
  84. static int query_formats(AVFilterContext *ctx)
  85. {
  86. AVFilterFormats *formats = NULL;
  87. AVFilterChannelLayouts *layouts = NULL;
  88. AVFilterLink *inlink = ctx->inputs[0];
  89. AVFilterLink *outlink = ctx->outputs[0];
  90. static const enum AVSampleFormat sample_fmts[] = { AV_SAMPLE_FMT_FLTP, AV_SAMPLE_FMT_NONE };
  91. static const enum AVPixelFormat pix_fmts[] = { AV_PIX_FMT_YUVA444P, AV_PIX_FMT_NONE };
  92. int ret = AVERROR(EINVAL);
  93. formats = ff_make_format_list(sample_fmts);
  94. if ((ret = ff_formats_ref (formats, &inlink->outcfg.formats )) < 0 ||
  95. (layouts = ff_all_channel_counts()) == NULL ||
  96. (ret = ff_channel_layouts_ref (layouts, &inlink->outcfg.channel_layouts)) < 0)
  97. return ret;
  98. formats = ff_all_samplerates();
  99. if ((ret = ff_formats_ref(formats, &inlink->outcfg.samplerates)) < 0)
  100. return ret;
  101. formats = ff_make_format_list(pix_fmts);
  102. if ((ret = ff_formats_ref(formats, &outlink->incfg.formats)) < 0)
  103. return ret;
  104. return 0;
  105. }
  106. static int config_input(AVFilterLink *inlink)
  107. {
  108. AVFilterContext *ctx = inlink->dst;
  109. AudioHistogramContext *s = ctx->priv;
  110. s->nb_samples = FFMAX(1, av_rescale(inlink->sample_rate, s->frame_rate.den, s->frame_rate.num));
  111. s->dchannels = s->dmode == SINGLE ? 1 : inlink->channels;
  112. s->shistogram = av_calloc(s->w, s->dchannels * sizeof(*s->shistogram));
  113. if (!s->shistogram)
  114. return AVERROR(ENOMEM);
  115. s->achistogram = av_calloc(s->w, s->dchannels * sizeof(*s->achistogram));
  116. if (!s->achistogram)
  117. return AVERROR(ENOMEM);
  118. return 0;
  119. }
  120. static int config_output(AVFilterLink *outlink)
  121. {
  122. AudioHistogramContext *s = outlink->src->priv;
  123. outlink->w = s->w;
  124. outlink->h = s->h;
  125. outlink->sample_aspect_ratio = (AVRational){1,1};
  126. outlink->frame_rate = s->frame_rate;
  127. s->histogram_h = s->h * s->phisto;
  128. s->ypos = s->h * s->phisto;
  129. if (s->dmode == SEPARATE) {
  130. s->combine_buffer = av_malloc_array(outlink->w * 3, sizeof(*s->combine_buffer));
  131. if (!s->combine_buffer)
  132. return AVERROR(ENOMEM);
  133. }
  134. return 0;
  135. }
  136. static int filter_frame(AVFilterLink *inlink, AVFrame *in)
  137. {
  138. AVFilterContext *ctx = inlink->dst;
  139. AVFilterLink *outlink = ctx->outputs[0];
  140. AudioHistogramContext *s = ctx->priv;
  141. const int H = s->histogram_h;
  142. const int w = s->w;
  143. int c, y, n, p, bin;
  144. uint64_t acmax = 1;
  145. AVFrame *clone;
  146. if (!s->out || s->out->width != outlink->w ||
  147. s->out->height != outlink->h) {
  148. av_frame_free(&s->out);
  149. s->out = ff_get_video_buffer(outlink, outlink->w, outlink->h);
  150. if (!s->out) {
  151. av_frame_free(&in);
  152. return AVERROR(ENOMEM);
  153. }
  154. for (n = H; n < s->h; n++) {
  155. memset(s->out->data[0] + n * s->out->linesize[0], 0, w);
  156. memset(s->out->data[1] + n * s->out->linesize[0], 127, w);
  157. memset(s->out->data[2] + n * s->out->linesize[0], 127, w);
  158. memset(s->out->data[3] + n * s->out->linesize[0], 0, w);
  159. }
  160. }
  161. if (s->dmode == SEPARATE) {
  162. for (y = 0; y < w; y++) {
  163. s->combine_buffer[3 * y ] = 0;
  164. s->combine_buffer[3 * y + 1] = 127.5;
  165. s->combine_buffer[3 * y + 2] = 127.5;
  166. }
  167. }
  168. for (n = 0; n < H; n++) {
  169. memset(s->out->data[0] + n * s->out->linesize[0], 0, w);
  170. memset(s->out->data[1] + n * s->out->linesize[0], 127, w);
  171. memset(s->out->data[2] + n * s->out->linesize[0], 127, w);
  172. memset(s->out->data[3] + n * s->out->linesize[0], 0, w);
  173. }
  174. s->out->pts = in->pts;
  175. s->first = s->frame_count;
  176. switch (s->ascale) {
  177. case ALINEAR:
  178. for (c = 0; c < inlink->channels; c++) {
  179. const float *src = (const float *)in->extended_data[c];
  180. uint64_t *achistogram = &s->achistogram[(s->dmode == SINGLE ? 0: c) * w];
  181. for (n = 0; n < in->nb_samples; n++) {
  182. bin = lrint(av_clipf(fabsf(src[n]), 0, 1) * (w - 1));
  183. achistogram[bin]++;
  184. }
  185. if (s->in[s->first] && s->count >= 0) {
  186. uint64_t *shistogram = &s->shistogram[(s->dmode == SINGLE ? 0: c) * w];
  187. const float *src2 = (const float *)s->in[s->first]->extended_data[c];
  188. for (n = 0; n < in->nb_samples; n++) {
  189. bin = lrint(av_clipf(fabsf(src2[n]), 0, 1) * (w - 1));
  190. shistogram[bin]++;
  191. }
  192. }
  193. }
  194. break;
  195. case ALOG:
  196. for (c = 0; c < inlink->channels; c++) {
  197. const float *src = (const float *)in->extended_data[c];
  198. uint64_t *achistogram = &s->achistogram[(s->dmode == SINGLE ? 0: c) * w];
  199. for (n = 0; n < in->nb_samples; n++) {
  200. bin = lrint(av_clipf(1 + log10(fabsf(src[n])) / 6, 0, 1) * (w - 1));
  201. achistogram[bin]++;
  202. }
  203. if (s->in[s->first] && s->count >= 0) {
  204. uint64_t *shistogram = &s->shistogram[(s->dmode == SINGLE ? 0: c) * w];
  205. const float *src2 = (const float *)s->in[s->first]->extended_data[c];
  206. for (n = 0; n < in->nb_samples; n++) {
  207. bin = lrint(av_clipf(1 + log10(fabsf(src2[n])) / 6, 0, 1) * (w - 1));
  208. shistogram[bin]++;
  209. }
  210. }
  211. }
  212. break;
  213. }
  214. av_frame_free(&s->in[s->frame_count]);
  215. s->in[s->frame_count] = in;
  216. s->frame_count++;
  217. if (s->frame_count > s->count)
  218. s->frame_count = 0;
  219. for (n = 0; n < w * s->dchannels; n++) {
  220. acmax = FFMAX(s->achistogram[n] - s->shistogram[n], acmax);
  221. }
  222. for (c = 0; c < s->dchannels; c++) {
  223. uint64_t *shistogram = &s->shistogram[c * w];
  224. uint64_t *achistogram = &s->achistogram[c * w];
  225. float yf, uf, vf;
  226. if (s->dmode == SEPARATE) {
  227. yf = 256.0f / s->dchannels;
  228. uf = yf * M_PI;
  229. vf = yf * M_PI;
  230. uf *= 0.5 * sin((2 * M_PI * c) / s->dchannels);
  231. vf *= 0.5 * cos((2 * M_PI * c) / s->dchannels);
  232. }
  233. for (n = 0; n < w; n++) {
  234. double a, aa;
  235. int h;
  236. a = achistogram[n] - shistogram[n];
  237. switch (s->scale) {
  238. case LINEAR:
  239. aa = a / (double)acmax;
  240. break;
  241. case SQRT:
  242. aa = sqrt(a) / sqrt(acmax);
  243. break;
  244. case CBRT:
  245. aa = cbrt(a) / cbrt(acmax);
  246. break;
  247. case LOG:
  248. aa = log2(a + 1) / log2(acmax + 1);
  249. break;
  250. case RLOG:
  251. aa = 1. - log2(a + 1) / log2(acmax + 1);
  252. if (aa == 1.)
  253. aa = 0;
  254. break;
  255. default:
  256. av_assert0(0);
  257. }
  258. h = aa * (H - 1);
  259. if (s->dmode == SINGLE) {
  260. for (y = H - h; y < H; y++) {
  261. s->out->data[0][y * s->out->linesize[0] + n] = 255;
  262. s->out->data[3][y * s->out->linesize[0] + n] = 255;
  263. }
  264. if (s->h - H > 0) {
  265. h = aa * 255;
  266. s->out->data[0][s->ypos * s->out->linesize[0] + n] = h;
  267. s->out->data[1][s->ypos * s->out->linesize[1] + n] = 127;
  268. s->out->data[2][s->ypos * s->out->linesize[2] + n] = 127;
  269. s->out->data[3][s->ypos * s->out->linesize[3] + n] = 255;
  270. }
  271. } else if (s->dmode == SEPARATE) {
  272. float *out = &s->combine_buffer[3 * n];
  273. int old;
  274. old = s->out->data[0][(H - h) * s->out->linesize[0] + n];
  275. for (y = H - h; y < H; y++) {
  276. if (s->out->data[0][y * s->out->linesize[0] + n] != old)
  277. break;
  278. old = s->out->data[0][y * s->out->linesize[0] + n];
  279. s->out->data[0][y * s->out->linesize[0] + n] = yf;
  280. s->out->data[1][y * s->out->linesize[1] + n] = 128+uf;
  281. s->out->data[2][y * s->out->linesize[2] + n] = 128+vf;
  282. s->out->data[3][y * s->out->linesize[3] + n] = 255;
  283. }
  284. out[0] += aa * yf;
  285. out[1] += aa * uf;
  286. out[2] += aa * vf;
  287. }
  288. }
  289. }
  290. if (s->h - H > 0) {
  291. if (s->dmode == SEPARATE) {
  292. for (n = 0; n < w; n++) {
  293. float *cb = &s->combine_buffer[3 * n];
  294. s->out->data[0][s->ypos * s->out->linesize[0] + n] = cb[0];
  295. s->out->data[1][s->ypos * s->out->linesize[1] + n] = cb[1];
  296. s->out->data[2][s->ypos * s->out->linesize[2] + n] = cb[2];
  297. s->out->data[3][s->ypos * s->out->linesize[3] + n] = 255;
  298. }
  299. }
  300. if (s->slide == SCROLL) {
  301. for (p = 0; p < 4; p++) {
  302. for (y = s->h; y >= H + 1; y--) {
  303. memmove(s->out->data[p] + (y ) * s->out->linesize[p],
  304. s->out->data[p] + (y-1) * s->out->linesize[p], w);
  305. }
  306. }
  307. }
  308. s->ypos++;
  309. if (s->slide == SCROLL || s->ypos >= s->h)
  310. s->ypos = H;
  311. }
  312. clone = av_frame_clone(s->out);
  313. if (!clone)
  314. return AVERROR(ENOMEM);
  315. return ff_filter_frame(outlink, clone);
  316. }
  317. static int activate(AVFilterContext *ctx)
  318. {
  319. AVFilterLink *inlink = ctx->inputs[0];
  320. AVFilterLink *outlink = ctx->outputs[0];
  321. AudioHistogramContext *s = ctx->priv;
  322. AVFrame *in;
  323. int ret;
  324. FF_FILTER_FORWARD_STATUS_BACK(outlink, inlink);
  325. ret = ff_inlink_consume_samples(inlink, s->nb_samples, s->nb_samples, &in);
  326. if (ret < 0)
  327. return ret;
  328. if (ret > 0)
  329. return filter_frame(inlink, in);
  330. FF_FILTER_FORWARD_STATUS(inlink, outlink);
  331. FF_FILTER_FORWARD_WANTED(outlink, inlink);
  332. return FFERROR_NOT_READY;
  333. }
  334. static av_cold void uninit(AVFilterContext *ctx)
  335. {
  336. AudioHistogramContext *s = ctx->priv;
  337. int i;
  338. av_frame_free(&s->out);
  339. av_freep(&s->shistogram);
  340. av_freep(&s->achistogram);
  341. av_freep(&s->combine_buffer);
  342. for (i = 0; i < 101; i++)
  343. av_frame_free(&s->in[i]);
  344. }
  345. static const AVFilterPad ahistogram_inputs[] = {
  346. {
  347. .name = "default",
  348. .type = AVMEDIA_TYPE_AUDIO,
  349. .config_props = config_input,
  350. },
  351. { NULL }
  352. };
  353. static const AVFilterPad ahistogram_outputs[] = {
  354. {
  355. .name = "default",
  356. .type = AVMEDIA_TYPE_VIDEO,
  357. .config_props = config_output,
  358. },
  359. { NULL }
  360. };
  361. AVFilter ff_avf_ahistogram = {
  362. .name = "ahistogram",
  363. .description = NULL_IF_CONFIG_SMALL("Convert input audio to histogram video output."),
  364. .uninit = uninit,
  365. .query_formats = query_formats,
  366. .priv_size = sizeof(AudioHistogramContext),
  367. .activate = activate,
  368. .inputs = ahistogram_inputs,
  369. .outputs = ahistogram_outputs,
  370. .priv_class = &ahistogram_class,
  371. };