You can not select more than 25 topics Topics must start with a letter or number, can include dashes ('-') and can be up to 35 characters long.

302 lines
9.3KB

  1. /*
  2. * Copyright (c) 2011 Smartjog S.A.S, Clément Bœsch <clement.boesch@smartjog.com>
  3. *
  4. * This file is part of FFmpeg.
  5. *
  6. * FFmpeg is free software; you can redistribute it and/or
  7. * modify it under the terms of the GNU Lesser General Public
  8. * License as published by the Free Software Foundation; either
  9. * version 2.1 of the License, or (at your option) any later version.
  10. *
  11. * FFmpeg is distributed in the hope that it will be useful,
  12. * but WITHOUT ANY WARRANTY; without even the implied warranty of
  13. * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
  14. * Lesser General Public License for more details.
  15. *
  16. * You should have received a copy of the GNU Lesser General Public
  17. * License along with FFmpeg; if not, write to the Free Software
  18. * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
  19. */
  20. /**
  21. * @file
  22. * Potential thumbnail lookup filter to reduce the risk of an inappropriate
  23. * selection (such as a black frame) we could get with an absolute seek.
  24. *
  25. * Simplified version of algorithm by Vadim Zaliva <lord@crocodile.org>.
  26. * @see http://notbrainsurgery.livejournal.com/29773.html
  27. */
  28. #include "libavutil/opt.h"
  29. #include "libavutil/pixdesc.h"
  30. #include "avfilter.h"
  31. #include "internal.h"
  32. #define HIST_SIZE (3*256)
  33. struct thumb_frame {
  34. AVFrame *buf; ///< cached frame
  35. int histogram[HIST_SIZE]; ///< RGB color distribution histogram of the frame
  36. };
  37. typedef struct ThumbContext {
  38. const AVClass *class;
  39. int n; ///< current frame
  40. int n_frames; ///< number of frames for analysis
  41. struct thumb_frame *frames; ///< the n_frames frames
  42. AVRational tb; ///< copy of the input timebase to ease access
  43. int planewidth[4];
  44. int planeheight[4];
  45. } ThumbContext;
  46. #define OFFSET(x) offsetof(ThumbContext, x)
  47. #define FLAGS AV_OPT_FLAG_VIDEO_PARAM|AV_OPT_FLAG_FILTERING_PARAM
  48. static const AVOption thumbnail_options[] = {
  49. { "n", "set the frames batch size", OFFSET(n_frames), AV_OPT_TYPE_INT, {.i64=100}, 2, INT_MAX, FLAGS },
  50. { NULL }
  51. };
  52. AVFILTER_DEFINE_CLASS(thumbnail);
  53. static av_cold int init(AVFilterContext *ctx)
  54. {
  55. ThumbContext *s = ctx->priv;
  56. s->frames = av_calloc(s->n_frames, sizeof(*s->frames));
  57. if (!s->frames) {
  58. av_log(ctx, AV_LOG_ERROR,
  59. "Allocation failure, try to lower the number of frames\n");
  60. return AVERROR(ENOMEM);
  61. }
  62. av_log(ctx, AV_LOG_VERBOSE, "batch size: %d frames\n", s->n_frames);
  63. return 0;
  64. }
  65. /**
  66. * @brief Compute Sum-square deviation to estimate "closeness".
  67. * @param hist color distribution histogram
  68. * @param median average color distribution histogram
  69. * @return sum of squared errors
  70. */
  71. static double frame_sum_square_err(const int *hist, const double *median)
  72. {
  73. int i;
  74. double err, sum_sq_err = 0;
  75. for (i = 0; i < HIST_SIZE; i++) {
  76. err = median[i] - (double)hist[i];
  77. sum_sq_err += err*err;
  78. }
  79. return sum_sq_err;
  80. }
  81. static AVFrame *get_best_frame(AVFilterContext *ctx)
  82. {
  83. AVFrame *picref;
  84. ThumbContext *s = ctx->priv;
  85. int i, j, best_frame_idx = 0;
  86. int nb_frames = s->n;
  87. double avg_hist[HIST_SIZE] = {0}, sq_err, min_sq_err = -1;
  88. // average histogram of the N frames
  89. for (j = 0; j < FF_ARRAY_ELEMS(avg_hist); j++) {
  90. for (i = 0; i < nb_frames; i++)
  91. avg_hist[j] += (double)s->frames[i].histogram[j];
  92. avg_hist[j] /= nb_frames;
  93. }
  94. // find the frame closer to the average using the sum of squared errors
  95. for (i = 0; i < nb_frames; i++) {
  96. sq_err = frame_sum_square_err(s->frames[i].histogram, avg_hist);
  97. if (i == 0 || sq_err < min_sq_err)
  98. best_frame_idx = i, min_sq_err = sq_err;
  99. }
  100. // free and reset everything (except the best frame buffer)
  101. for (i = 0; i < nb_frames; i++) {
  102. memset(s->frames[i].histogram, 0, sizeof(s->frames[i].histogram));
  103. if (i != best_frame_idx)
  104. av_frame_free(&s->frames[i].buf);
  105. }
  106. s->n = 0;
  107. // raise the chosen one
  108. picref = s->frames[best_frame_idx].buf;
  109. av_log(ctx, AV_LOG_INFO, "frame id #%d (pts_time=%f) selected "
  110. "from a set of %d images\n", best_frame_idx,
  111. picref->pts * av_q2d(s->tb), nb_frames);
  112. s->frames[best_frame_idx].buf = NULL;
  113. return picref;
  114. }
  115. static int filter_frame(AVFilterLink *inlink, AVFrame *frame)
  116. {
  117. int i, j;
  118. AVFilterContext *ctx = inlink->dst;
  119. ThumbContext *s = ctx->priv;
  120. AVFilterLink *outlink = ctx->outputs[0];
  121. int *hist = s->frames[s->n].histogram;
  122. const uint8_t *p = frame->data[0];
  123. // keep a reference of each frame
  124. s->frames[s->n].buf = frame;
  125. // update current frame histogram
  126. switch (inlink->format) {
  127. case AV_PIX_FMT_RGB24:
  128. case AV_PIX_FMT_BGR24:
  129. for (j = 0; j < inlink->h; j++) {
  130. for (i = 0; i < inlink->w; i++) {
  131. hist[0*256 + p[i*3 ]]++;
  132. hist[1*256 + p[i*3 + 1]]++;
  133. hist[2*256 + p[i*3 + 2]]++;
  134. }
  135. p += frame->linesize[0];
  136. }
  137. break;
  138. case AV_PIX_FMT_RGB0:
  139. case AV_PIX_FMT_BGR0:
  140. case AV_PIX_FMT_RGBA:
  141. case AV_PIX_FMT_BGRA:
  142. for (j = 0; j < inlink->h; j++) {
  143. for (i = 0; i < inlink->w; i++) {
  144. hist[0*256 + p[i*4 ]]++;
  145. hist[1*256 + p[i*4 + 1]]++;
  146. hist[2*256 + p[i*4 + 2]]++;
  147. }
  148. p += frame->linesize[0];
  149. }
  150. break;
  151. case AV_PIX_FMT_0RGB:
  152. case AV_PIX_FMT_0BGR:
  153. case AV_PIX_FMT_ARGB:
  154. case AV_PIX_FMT_ABGR:
  155. for (j = 0; j < inlink->h; j++) {
  156. for (i = 0; i < inlink->w; i++) {
  157. hist[0*256 + p[i*4 + 1]]++;
  158. hist[1*256 + p[i*4 + 2]]++;
  159. hist[2*256 + p[i*4 + 3]]++;
  160. }
  161. p += frame->linesize[0];
  162. }
  163. break;
  164. default:
  165. for (int plane = 0; plane < 3; plane++) {
  166. const uint8_t *p = frame->data[plane];
  167. for (j = 0; j < s->planeheight[plane]; j++) {
  168. for (i = 0; i < s->planewidth[plane]; i++)
  169. hist[256*plane + p[i]]++;
  170. p += frame->linesize[plane];
  171. }
  172. }
  173. break;
  174. }
  175. // no selection until the buffer of N frames is filled up
  176. s->n++;
  177. if (s->n < s->n_frames)
  178. return 0;
  179. return ff_filter_frame(outlink, get_best_frame(ctx));
  180. }
  181. static av_cold void uninit(AVFilterContext *ctx)
  182. {
  183. int i;
  184. ThumbContext *s = ctx->priv;
  185. for (i = 0; i < s->n_frames && s->frames && s->frames[i].buf; i++)
  186. av_frame_free(&s->frames[i].buf);
  187. av_freep(&s->frames);
  188. }
  189. static int request_frame(AVFilterLink *link)
  190. {
  191. AVFilterContext *ctx = link->src;
  192. ThumbContext *s = ctx->priv;
  193. int ret = ff_request_frame(ctx->inputs[0]);
  194. if (ret == AVERROR_EOF && s->n) {
  195. ret = ff_filter_frame(link, get_best_frame(ctx));
  196. if (ret < 0)
  197. return ret;
  198. ret = AVERROR_EOF;
  199. }
  200. if (ret < 0)
  201. return ret;
  202. return 0;
  203. }
  204. static int config_props(AVFilterLink *inlink)
  205. {
  206. AVFilterContext *ctx = inlink->dst;
  207. ThumbContext *s = ctx->priv;
  208. const AVPixFmtDescriptor *desc = av_pix_fmt_desc_get(inlink->format);
  209. s->tb = inlink->time_base;
  210. s->planewidth[1] = s->planewidth[2] = AV_CEIL_RSHIFT(inlink->w, desc->log2_chroma_w);
  211. s->planewidth[0] = s->planewidth[3] = inlink->w;
  212. s->planeheight[1] = s->planeheight[2] = AV_CEIL_RSHIFT(inlink->h, desc->log2_chroma_h);
  213. s->planeheight[0] = s->planeheight[3] = inlink->h;
  214. return 0;
  215. }
  216. static int query_formats(AVFilterContext *ctx)
  217. {
  218. static const enum AVPixelFormat pix_fmts[] = {
  219. AV_PIX_FMT_RGB24, AV_PIX_FMT_BGR24,
  220. AV_PIX_FMT_RGBA, AV_PIX_FMT_BGRA,
  221. AV_PIX_FMT_RGB0, AV_PIX_FMT_BGR0,
  222. AV_PIX_FMT_ABGR, AV_PIX_FMT_ARGB,
  223. AV_PIX_FMT_0BGR, AV_PIX_FMT_0RGB,
  224. AV_PIX_FMT_YUV410P, AV_PIX_FMT_YUV411P,
  225. AV_PIX_FMT_YUV420P, AV_PIX_FMT_YUV422P,
  226. AV_PIX_FMT_YUV440P, AV_PIX_FMT_YUV444P,
  227. AV_PIX_FMT_YUVJ420P, AV_PIX_FMT_YUVJ422P,
  228. AV_PIX_FMT_YUVJ440P, AV_PIX_FMT_YUVJ444P,
  229. AV_PIX_FMT_YUVJ411P,
  230. AV_PIX_FMT_YUVA420P, AV_PIX_FMT_YUVA422P, AV_PIX_FMT_YUVA444P,
  231. AV_PIX_FMT_GBRP, AV_PIX_FMT_GBRAP,
  232. AV_PIX_FMT_NONE
  233. };
  234. AVFilterFormats *fmts_list = ff_make_format_list(pix_fmts);
  235. if (!fmts_list)
  236. return AVERROR(ENOMEM);
  237. return ff_set_common_formats(ctx, fmts_list);
  238. }
  239. static const AVFilterPad thumbnail_inputs[] = {
  240. {
  241. .name = "default",
  242. .type = AVMEDIA_TYPE_VIDEO,
  243. .config_props = config_props,
  244. .filter_frame = filter_frame,
  245. },
  246. { NULL }
  247. };
  248. static const AVFilterPad thumbnail_outputs[] = {
  249. {
  250. .name = "default",
  251. .type = AVMEDIA_TYPE_VIDEO,
  252. .request_frame = request_frame,
  253. },
  254. { NULL }
  255. };
  256. AVFilter ff_vf_thumbnail = {
  257. .name = "thumbnail",
  258. .description = NULL_IF_CONFIG_SMALL("Select the most representative frame in a given sequence of consecutive frames."),
  259. .priv_size = sizeof(ThumbContext),
  260. .init = init,
  261. .uninit = uninit,
  262. .query_formats = query_formats,
  263. .inputs = thumbnail_inputs,
  264. .outputs = thumbnail_outputs,
  265. .priv_class = &thumbnail_class,
  266. .flags = AVFILTER_FLAG_SUPPORT_TIMELINE_GENERIC,
  267. };