You can not select more than 25 topics Topics must start with a letter or number, can include dashes ('-') and can be up to 35 characters long.

398 lines
14KB

  1. /*
  2. * Copyright (c) 2012 Fredrik Mellbin
  3. * Copyright (c) 2013 Clément Bœsch
  4. *
  5. * This file is part of FFmpeg.
  6. *
  7. * FFmpeg is free software; you can redistribute it and/or
  8. * modify it under the terms of the GNU Lesser General Public
  9. * License as published by the Free Software Foundation; either
  10. * version 2.1 of the License, or (at your option) any later version.
  11. *
  12. * FFmpeg is distributed in the hope that it will be useful,
  13. * but WITHOUT ANY WARRANTY; without even the implied warranty of
  14. * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
  15. * Lesser General Public License for more details.
  16. *
  17. * You should have received a copy of the GNU Lesser General Public
  18. * License along with FFmpeg; if not, write to the Free Software
  19. * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
  20. */
  21. #include "libavutil/opt.h"
  22. #include "libavutil/pixdesc.h"
  23. #include "libavutil/timestamp.h"
  24. #include "avfilter.h"
  25. #include "internal.h"
  26. #define INPUT_MAIN 0
  27. #define INPUT_CLEANSRC 1
  28. struct qitem {
  29. AVFrame *frame;
  30. int64_t maxbdiff;
  31. int64_t totdiff;
  32. };
  33. typedef struct {
  34. const AVClass *class;
  35. struct qitem *queue; ///< window of cycle frames and the associated data diff
  36. int fid; ///< current frame id in the queue
  37. int filled; ///< 1 if the queue is filled, 0 otherwise
  38. AVFrame *last; ///< last frame from the previous queue
  39. AVFrame **clean_src; ///< frame queue for the clean source
  40. int got_frame[2]; ///< frame request flag for each input stream
  41. double ts_unit; ///< timestamp units for the output frames
  42. uint32_t eof; ///< bitmask for end of stream
  43. int hsub, vsub; ///< chroma subsampling values
  44. int depth;
  45. int nxblocks, nyblocks;
  46. int bdiffsize;
  47. int64_t *bdiffs;
  48. /* options */
  49. int cycle;
  50. double dupthresh_flt;
  51. double scthresh_flt;
  52. int64_t dupthresh;
  53. int64_t scthresh;
  54. int blockx, blocky;
  55. int ppsrc;
  56. int chroma;
  57. } DecimateContext;
  58. #define OFFSET(x) offsetof(DecimateContext, x)
  59. #define FLAGS AV_OPT_FLAG_VIDEO_PARAM|AV_OPT_FLAG_FILTERING_PARAM
  60. static const AVOption decimate_options[] = {
  61. { "cycle", "set the number of frame from which one will be dropped", OFFSET(cycle), AV_OPT_TYPE_INT, {.i64 = 5}, 2, 25, FLAGS },
  62. { "dupthresh", "set duplicate threshold", OFFSET(dupthresh_flt), AV_OPT_TYPE_DOUBLE, {.dbl = 1.1}, 0, 100, FLAGS },
  63. { "scthresh", "set scene change threshold", OFFSET(scthresh_flt), AV_OPT_TYPE_DOUBLE, {.dbl = 15.0}, 0, 100, FLAGS },
  64. { "blockx", "set the size of the x-axis blocks used during metric calculations", OFFSET(blockx), AV_OPT_TYPE_INT, {.i64 = 32}, 4, 1<<9, FLAGS },
  65. { "blocky", "set the size of the y-axis blocks used during metric calculations", OFFSET(blocky), AV_OPT_TYPE_INT, {.i64 = 32}, 4, 1<<9, FLAGS },
  66. { "ppsrc", "mark main input as a pre-processed input and activate clean source input stream", OFFSET(ppsrc), AV_OPT_TYPE_INT, {.i64=0}, 0, 1, FLAGS },
  67. { "chroma", "set whether or not chroma is considered in the metric calculations", OFFSET(chroma), AV_OPT_TYPE_INT, {.i64=1}, 0, 1, FLAGS },
  68. { NULL }
  69. };
  70. AVFILTER_DEFINE_CLASS(decimate);
  71. static void calc_diffs(const DecimateContext *dm, struct qitem *q,
  72. const AVFrame *f1, const AVFrame *f2)
  73. {
  74. int64_t maxdiff = -1;
  75. int64_t *bdiffs = dm->bdiffs;
  76. int plane, i, j;
  77. memset(bdiffs, 0, dm->bdiffsize * sizeof(*bdiffs));
  78. for (plane = 0; plane < (dm->chroma ? 3 : 1); plane++) {
  79. int x, y, xl;
  80. const int linesize1 = f1->linesize[plane];
  81. const int linesize2 = f2->linesize[plane];
  82. const uint8_t *f1p = f1->data[plane];
  83. const uint8_t *f2p = f2->data[plane];
  84. int width = plane ? FF_CEIL_RSHIFT(f1->width, dm->hsub) : f1->width;
  85. int height = plane ? FF_CEIL_RSHIFT(f1->height, dm->vsub) : f1->height;
  86. int hblockx = dm->blockx / 2;
  87. int hblocky = dm->blocky / 2;
  88. if (plane) {
  89. hblockx >>= dm->hsub;
  90. hblocky >>= dm->vsub;
  91. }
  92. for (y = 0; y < height; y++) {
  93. int ydest = y / hblocky;
  94. int xdest = 0;
  95. #define CALC_DIFF(nbits) do { \
  96. for (x = 0; x < width; x += hblockx) { \
  97. int64_t acc = 0; \
  98. int m = FFMIN(width, x + hblockx); \
  99. for (xl = x; xl < m; xl++) \
  100. acc += abs(((const uint##nbits##_t *)f1p)[xl] - \
  101. ((const uint##nbits##_t *)f2p)[xl]); \
  102. bdiffs[ydest * dm->nxblocks + xdest] += acc; \
  103. xdest++; \
  104. } \
  105. } while (0)
  106. if (dm->depth == 8) CALC_DIFF(8);
  107. else CALC_DIFF(16);
  108. f1p += linesize1;
  109. f2p += linesize2;
  110. }
  111. }
  112. for (i = 0; i < dm->nyblocks - 1; i++) {
  113. for (j = 0; j < dm->nxblocks - 1; j++) {
  114. int64_t tmp = bdiffs[ i * dm->nxblocks + j ]
  115. + bdiffs[ i * dm->nxblocks + j + 1]
  116. + bdiffs[(i + 1) * dm->nxblocks + j ]
  117. + bdiffs[(i + 1) * dm->nxblocks + j + 1];
  118. if (tmp > maxdiff)
  119. maxdiff = tmp;
  120. }
  121. }
  122. q->totdiff = 0;
  123. for (i = 0; i < dm->bdiffsize; i++)
  124. q->totdiff += bdiffs[i];
  125. q->maxbdiff = maxdiff;
  126. }
  127. static int filter_frame(AVFilterLink *inlink, AVFrame *in)
  128. {
  129. int scpos = -1, duppos = -1;
  130. int drop = INT_MIN, i, lowest = 0, ret;
  131. AVFilterContext *ctx = inlink->dst;
  132. AVFilterLink *outlink = ctx->outputs[0];
  133. DecimateContext *dm = ctx->priv;
  134. AVFrame *prv;
  135. /* update frames queue(s) */
  136. if (FF_INLINK_IDX(inlink) == INPUT_MAIN) {
  137. dm->queue[dm->fid].frame = in;
  138. dm->got_frame[INPUT_MAIN] = 1;
  139. } else {
  140. dm->clean_src[dm->fid] = in;
  141. dm->got_frame[INPUT_CLEANSRC] = 1;
  142. }
  143. if (!dm->got_frame[INPUT_MAIN] || (dm->ppsrc && !dm->got_frame[INPUT_CLEANSRC]))
  144. return 0;
  145. dm->got_frame[INPUT_MAIN] = dm->got_frame[INPUT_CLEANSRC] = 0;
  146. if (in) {
  147. /* update frame metrics */
  148. prv = dm->fid ? dm->queue[dm->fid - 1].frame : dm->last;
  149. if (!prv)
  150. prv = in;
  151. calc_diffs(dm, &dm->queue[dm->fid], prv, in);
  152. if (++dm->fid != dm->cycle)
  153. return 0;
  154. av_frame_free(&dm->last);
  155. dm->last = av_frame_clone(in);
  156. dm->fid = 0;
  157. /* we have a complete cycle, select the frame to drop */
  158. lowest = 0;
  159. for (i = 0; i < dm->cycle; i++) {
  160. if (dm->queue[i].totdiff > dm->scthresh)
  161. scpos = i;
  162. if (dm->queue[i].maxbdiff < dm->queue[lowest].maxbdiff)
  163. lowest = i;
  164. }
  165. if (dm->queue[lowest].maxbdiff < dm->dupthresh)
  166. duppos = lowest;
  167. drop = scpos >= 0 && duppos < 0 ? scpos : lowest;
  168. }
  169. /* metrics debug */
  170. if (av_log_get_level() >= AV_LOG_DEBUG) {
  171. av_log(ctx, AV_LOG_DEBUG, "1/%d frame drop:\n", dm->cycle);
  172. for (i = 0; i < dm->cycle && dm->queue[i].frame; i++) {
  173. av_log(ctx, AV_LOG_DEBUG," #%d: totdiff=%08"PRIx64" maxbdiff=%08"PRIx64"%s%s%s%s\n",
  174. i + 1, dm->queue[i].totdiff, dm->queue[i].maxbdiff,
  175. i == scpos ? " sc" : "",
  176. i == duppos ? " dup" : "",
  177. i == lowest ? " lowest" : "",
  178. i == drop ? " [DROP]" : "");
  179. }
  180. }
  181. /* push all frames except the drop */
  182. ret = 0;
  183. for (i = 0; i < dm->cycle && dm->queue[i].frame; i++) {
  184. if (i == drop) {
  185. if (dm->ppsrc)
  186. av_frame_free(&dm->clean_src[i]);
  187. av_frame_free(&dm->queue[i].frame);
  188. } else {
  189. AVFrame *frame = dm->queue[i].frame;
  190. if (dm->ppsrc) {
  191. av_frame_free(&frame);
  192. frame = dm->clean_src[i];
  193. }
  194. frame->pts = outlink->frame_count * dm->ts_unit;
  195. ret = ff_filter_frame(outlink, frame);
  196. if (ret < 0)
  197. break;
  198. }
  199. }
  200. return ret;
  201. }
  202. static int config_input(AVFilterLink *inlink)
  203. {
  204. int max_value;
  205. AVFilterContext *ctx = inlink->dst;
  206. DecimateContext *dm = ctx->priv;
  207. const AVPixFmtDescriptor *pix_desc = av_pix_fmt_desc_get(inlink->format);
  208. const int w = inlink->w;
  209. const int h = inlink->h;
  210. dm->hsub = pix_desc->log2_chroma_w;
  211. dm->vsub = pix_desc->log2_chroma_h;
  212. dm->depth = pix_desc->comp[0].depth_minus1 + 1;
  213. max_value = (1 << dm->depth) - 1;
  214. dm->scthresh = (int64_t)(((int64_t)max_value * w * h * dm->scthresh_flt) / 100);
  215. dm->dupthresh = (int64_t)(((int64_t)max_value * dm->blockx * dm->blocky * dm->dupthresh_flt) / 100);
  216. dm->nxblocks = (w + dm->blockx/2 - 1) / (dm->blockx/2);
  217. dm->nyblocks = (h + dm->blocky/2 - 1) / (dm->blocky/2);
  218. dm->bdiffsize = dm->nxblocks * dm->nyblocks;
  219. dm->bdiffs = av_malloc(dm->bdiffsize * sizeof(*dm->bdiffs));
  220. dm->queue = av_calloc(dm->cycle, sizeof(*dm->queue));
  221. if (!dm->bdiffs || !dm->queue)
  222. return AVERROR(ENOMEM);
  223. if (dm->ppsrc) {
  224. dm->clean_src = av_calloc(dm->cycle, sizeof(*dm->clean_src));
  225. if (!dm->clean_src)
  226. return AVERROR(ENOMEM);
  227. }
  228. return 0;
  229. }
  230. static av_cold int decimate_init(AVFilterContext *ctx)
  231. {
  232. const DecimateContext *dm = ctx->priv;
  233. AVFilterPad pad = {
  234. .name = av_strdup("main"),
  235. .type = AVMEDIA_TYPE_VIDEO,
  236. .filter_frame = filter_frame,
  237. .config_props = config_input,
  238. };
  239. if (!pad.name)
  240. return AVERROR(ENOMEM);
  241. ff_insert_inpad(ctx, INPUT_MAIN, &pad);
  242. if (dm->ppsrc) {
  243. pad.name = av_strdup("clean_src");
  244. pad.config_props = NULL;
  245. if (!pad.name)
  246. return AVERROR(ENOMEM);
  247. ff_insert_inpad(ctx, INPUT_CLEANSRC, &pad);
  248. }
  249. if ((dm->blockx & (dm->blockx - 1)) ||
  250. (dm->blocky & (dm->blocky - 1))) {
  251. av_log(ctx, AV_LOG_ERROR, "blockx and blocky settings must be power of two\n");
  252. return AVERROR(EINVAL);
  253. }
  254. return 0;
  255. }
  256. static av_cold void decimate_uninit(AVFilterContext *ctx)
  257. {
  258. int i;
  259. DecimateContext *dm = ctx->priv;
  260. av_frame_free(&dm->last);
  261. av_freep(&dm->bdiffs);
  262. av_freep(&dm->queue);
  263. av_freep(&dm->clean_src);
  264. for (i = 0; i < ctx->nb_inputs; i++)
  265. av_freep(&ctx->input_pads[i].name);
  266. }
  267. static int request_inlink(AVFilterContext *ctx, int lid)
  268. {
  269. int ret = 0;
  270. DecimateContext *dm = ctx->priv;
  271. if (!dm->got_frame[lid]) {
  272. AVFilterLink *inlink = ctx->inputs[lid];
  273. ret = ff_request_frame(inlink);
  274. if (ret == AVERROR_EOF) { // flushing
  275. dm->eof |= 1 << lid;
  276. ret = filter_frame(inlink, NULL);
  277. }
  278. }
  279. return ret;
  280. }
  281. static int request_frame(AVFilterLink *outlink)
  282. {
  283. int ret;
  284. AVFilterContext *ctx = outlink->src;
  285. DecimateContext *dm = ctx->priv;
  286. const uint32_t eof_mask = 1<<INPUT_MAIN | dm->ppsrc<<INPUT_CLEANSRC;
  287. if ((dm->eof & eof_mask) == eof_mask) // flush done?
  288. return AVERROR_EOF;
  289. if ((ret = request_inlink(ctx, INPUT_MAIN)) < 0)
  290. return ret;
  291. if (dm->ppsrc && (ret = request_inlink(ctx, INPUT_CLEANSRC)) < 0)
  292. return ret;
  293. return 0;
  294. }
  295. static int query_formats(AVFilterContext *ctx)
  296. {
  297. static const enum AVPixelFormat pix_fmts[] = {
  298. #define PF_NOALPHA(suf) AV_PIX_FMT_YUV420##suf, AV_PIX_FMT_YUV422##suf, AV_PIX_FMT_YUV444##suf
  299. #define PF_ALPHA(suf) AV_PIX_FMT_YUVA420##suf, AV_PIX_FMT_YUVA422##suf, AV_PIX_FMT_YUVA444##suf
  300. #define PF(suf) PF_NOALPHA(suf), PF_ALPHA(suf)
  301. PF(P), PF(P9), PF(P10), PF_NOALPHA(P12), PF_NOALPHA(P14), PF(P16),
  302. AV_PIX_FMT_YUV411P, AV_PIX_FMT_YUV410P,
  303. AV_PIX_FMT_GRAY8,
  304. AV_PIX_FMT_NONE
  305. };
  306. ff_set_common_formats(ctx, ff_make_format_list(pix_fmts));
  307. return 0;
  308. }
  309. static int config_output(AVFilterLink *outlink)
  310. {
  311. AVFilterContext *ctx = outlink->src;
  312. DecimateContext *dm = ctx->priv;
  313. const AVFilterLink *inlink =
  314. ctx->inputs[dm->ppsrc ? INPUT_CLEANSRC : INPUT_MAIN];
  315. AVRational fps = inlink->frame_rate;
  316. if (!fps.num || !fps.den) {
  317. av_log(ctx, AV_LOG_ERROR, "The input needs a constant frame rate; "
  318. "current rate of %d/%d is invalid\n", fps.num, fps.den);
  319. return AVERROR(EINVAL);
  320. }
  321. fps = av_mul_q(fps, (AVRational){dm->cycle - 1, dm->cycle});
  322. av_log(ctx, AV_LOG_VERBOSE, "FPS: %d/%d -> %d/%d\n",
  323. inlink->frame_rate.num, inlink->frame_rate.den, fps.num, fps.den);
  324. outlink->flags |= FF_LINK_FLAG_REQUEST_LOOP;
  325. outlink->time_base = inlink->time_base;
  326. outlink->frame_rate = fps;
  327. outlink->sample_aspect_ratio = inlink->sample_aspect_ratio;
  328. outlink->w = inlink->w;
  329. outlink->h = inlink->h;
  330. dm->ts_unit = av_q2d(av_inv_q(av_mul_q(fps, outlink->time_base)));
  331. return 0;
  332. }
  333. static const AVFilterPad decimate_outputs[] = {
  334. {
  335. .name = "default",
  336. .type = AVMEDIA_TYPE_VIDEO,
  337. .request_frame = request_frame,
  338. .config_props = config_output,
  339. },
  340. { NULL }
  341. };
  342. AVFilter avfilter_vf_decimate = {
  343. .name = "decimate",
  344. .description = NULL_IF_CONFIG_SMALL("Decimate frames (post field matching filter)."),
  345. .init = decimate_init,
  346. .uninit = decimate_uninit,
  347. .priv_size = sizeof(DecimateContext),
  348. .query_formats = query_formats,
  349. .outputs = decimate_outputs,
  350. .priv_class = &decimate_class,
  351. .flags = AVFILTER_FLAG_DYNAMIC_INPUTS,
  352. };