You can not select more than 25 topics Topics must start with a letter or number, can include dashes ('-') and can be up to 35 characters long.

370 lines
11KB

  1. /*
  2. * Copyright (c) 2017 Ronald S. Bultje <rsbultje@gmail.com>
  3. * Copyright (c) 2017 Ashish Pratap Singh <ashk43712@gmail.com>
  4. *
  5. * This file is part of FFmpeg.
  6. *
  7. * FFmpeg is free software; you can redistribute it and/or
  8. * modify it under the terms of the GNU Lesser General Public
  9. * License as published by the Free Software Foundation; either
  10. * version 2.1 of the License, or (at your option) any later version.
  11. *
  12. * FFmpeg is distributed in the hope that it will be useful,
  13. * but WITHOUT ANY WARRANTY; without even the implied warranty of
  14. * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
  15. * Lesser General Public License for more details.
  16. *
  17. * You should have received a copy of the GNU Lesser General Public
  18. * License along with FFmpeg; if not, write to the Free Software
  19. * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
  20. */
  21. /**
  22. * @file
  23. * Calculate VMAF Motion score.
  24. */
  25. #include "libavutil/opt.h"
  26. #include "libavutil/pixdesc.h"
  27. #include "avfilter.h"
  28. #include "drawutils.h"
  29. #include "formats.h"
  30. #include "internal.h"
  31. #include "vmaf_motion.h"
  32. #define BIT_SHIFT 15
  33. static const float FILTER_5[5] = {
  34. 0.054488685,
  35. 0.244201342,
  36. 0.402619947,
  37. 0.244201342,
  38. 0.054488685
  39. };
  40. typedef struct VMAFMotionContext {
  41. const AVClass *class;
  42. VMAFMotionData data;
  43. FILE *stats_file;
  44. char *stats_file_str;
  45. } VMAFMotionContext;
  46. #define OFFSET(x) offsetof(VMAFMotionContext, x)
  47. #define FLAGS AV_OPT_FLAG_FILTERING_PARAM|AV_OPT_FLAG_VIDEO_PARAM
  48. static const AVOption vmafmotion_options[] = {
  49. {"stats_file", "Set file where to store per-frame difference information", OFFSET(stats_file_str), AV_OPT_TYPE_STRING, {.str=NULL}, 0, 0, FLAGS },
  50. { NULL }
  51. };
  52. AVFILTER_DEFINE_CLASS(vmafmotion);
  53. static uint64_t image_sad(const uint16_t *img1, const uint16_t *img2, int w,
  54. int h, ptrdiff_t _img1_stride, ptrdiff_t _img2_stride)
  55. {
  56. ptrdiff_t img1_stride = _img1_stride / sizeof(*img1);
  57. ptrdiff_t img2_stride = _img2_stride / sizeof(*img2);
  58. uint64_t sum = 0;
  59. int i, j;
  60. for (i = 0; i < h; i++) {
  61. for (j = 0; j < w; j++) {
  62. sum += abs(img1[j] - img2[j]);
  63. }
  64. img1 += img1_stride;
  65. img2 += img2_stride;
  66. }
  67. return sum;
  68. }
  69. static void convolution_x(const uint16_t *filter, int filt_w, const uint16_t *src,
  70. uint16_t *dst, int w, int h, ptrdiff_t _src_stride,
  71. ptrdiff_t _dst_stride)
  72. {
  73. ptrdiff_t src_stride = _src_stride / sizeof(*src);
  74. ptrdiff_t dst_stride = _dst_stride / sizeof(*dst);
  75. int radius = filt_w / 2;
  76. int borders_left = radius;
  77. int borders_right = w - (filt_w - radius);
  78. int i, j, k;
  79. int sum = 0;
  80. for (i = 0; i < h; i++) {
  81. for (j = 0; j < borders_left; j++) {
  82. sum = 0;
  83. for (k = 0; k < filt_w; k++) {
  84. int j_tap = FFABS(j - radius + k);
  85. if (j_tap >= w) {
  86. j_tap = w - (j_tap - w + 1);
  87. }
  88. sum += filter[k] * src[i * src_stride + j_tap];
  89. }
  90. dst[i * dst_stride + j] = sum >> BIT_SHIFT;
  91. }
  92. for (j = borders_left; j < borders_right; j++) {
  93. int sum = 0;
  94. for (k = 0; k < filt_w; k++) {
  95. sum += filter[k] * src[i * src_stride + j - radius + k];
  96. }
  97. dst[i * dst_stride + j] = sum >> BIT_SHIFT;
  98. }
  99. for (j = borders_right; j < w; j++) {
  100. sum = 0;
  101. for (k = 0; k < filt_w; k++) {
  102. int j_tap = FFABS(j - radius + k);
  103. if (j_tap >= w) {
  104. j_tap = w - (j_tap - w + 1);
  105. }
  106. sum += filter[k] * src[i * src_stride + j_tap];
  107. }
  108. dst[i * dst_stride + j] = sum >> BIT_SHIFT;
  109. }
  110. }
  111. }
  112. #define conv_y_fn(type, bits) \
  113. static void convolution_y_##bits##bit(const uint16_t *filter, int filt_w, \
  114. const uint8_t *_src, uint16_t *dst, \
  115. int w, int h, ptrdiff_t _src_stride, \
  116. ptrdiff_t _dst_stride) \
  117. { \
  118. const type *src = (const type *) _src; \
  119. ptrdiff_t src_stride = _src_stride / sizeof(*src); \
  120. ptrdiff_t dst_stride = _dst_stride / sizeof(*dst); \
  121. int radius = filt_w / 2; \
  122. int borders_top = radius; \
  123. int borders_bottom = h - (filt_w - radius); \
  124. int i, j, k; \
  125. int sum = 0; \
  126. \
  127. for (i = 0; i < borders_top; i++) { \
  128. for (j = 0; j < w; j++) { \
  129. sum = 0; \
  130. for (k = 0; k < filt_w; k++) { \
  131. int i_tap = FFABS(i - radius + k); \
  132. if (i_tap >= h) { \
  133. i_tap = h - (i_tap - h + 1); \
  134. } \
  135. sum += filter[k] * src[i_tap * src_stride + j]; \
  136. } \
  137. dst[i * dst_stride + j] = sum >> bits; \
  138. } \
  139. } \
  140. for (i = borders_top; i < borders_bottom; i++) { \
  141. for (j = 0; j < w; j++) { \
  142. sum = 0; \
  143. for (k = 0; k < filt_w; k++) { \
  144. sum += filter[k] * src[(i - radius + k) * src_stride + j]; \
  145. } \
  146. dst[i * dst_stride + j] = sum >> bits; \
  147. } \
  148. } \
  149. for (i = borders_bottom; i < h; i++) { \
  150. for (j = 0; j < w; j++) { \
  151. sum = 0; \
  152. for (k = 0; k < filt_w; k++) { \
  153. int i_tap = FFABS(i - radius + k); \
  154. if (i_tap >= h) { \
  155. i_tap = h - (i_tap - h + 1); \
  156. } \
  157. sum += filter[k] * src[i_tap * src_stride + j]; \
  158. } \
  159. dst[i * dst_stride + j] = sum >> bits; \
  160. } \
  161. } \
  162. }
  163. conv_y_fn(uint8_t, 8);
  164. conv_y_fn(uint16_t, 10);
  165. static void vmafmotiondsp_init(VMAFMotionDSPContext *dsp, int bpp) {
  166. dsp->convolution_x = convolution_x;
  167. dsp->convolution_y = bpp == 10 ? convolution_y_10bit : convolution_y_8bit;
  168. dsp->sad = image_sad;
  169. }
  170. double ff_vmafmotion_process(VMAFMotionData *s, AVFrame *ref)
  171. {
  172. double score;
  173. s->vmafdsp.convolution_y(s->filter, 5, ref->data[0], s->temp_data,
  174. s->width, s->height, ref->linesize[0], s->stride);
  175. s->vmafdsp.convolution_x(s->filter, 5, s->temp_data, s->blur_data[0],
  176. s->width, s->height, s->stride, s->stride);
  177. if (!s->nb_frames) {
  178. score = 0.0;
  179. } else {
  180. uint64_t sad = s->vmafdsp.sad(s->blur_data[1], s->blur_data[0],
  181. s->width, s->height, s->stride, s->stride);
  182. // the output score is always normalized to 8 bits
  183. score = (double) (sad * 1.0 / (s->width * s->height << (BIT_SHIFT - 8)));
  184. }
  185. FFSWAP(uint16_t *, s->blur_data[0], s->blur_data[1]);
  186. s->nb_frames++;
  187. s->motion_sum += score;
  188. return score;
  189. }
  190. static void set_meta(AVDictionary **metadata, const char *key, float d)
  191. {
  192. char value[128];
  193. snprintf(value, sizeof(value), "%0.2f", d);
  194. av_dict_set(metadata, key, value, 0);
  195. }
  196. static void do_vmafmotion(AVFilterContext *ctx, AVFrame *ref)
  197. {
  198. VMAFMotionContext *s = ctx->priv;
  199. double score;
  200. score = ff_vmafmotion_process(&s->data, ref);
  201. set_meta(&ref->metadata, "lavfi.vmafmotion.score", score);
  202. if (s->stats_file) {
  203. fprintf(s->stats_file,
  204. "n:%"PRId64" motion:%0.2lf\n", s->data.nb_frames, score);
  205. }
  206. }
  207. int ff_vmafmotion_init(VMAFMotionData *s,
  208. int w, int h, enum AVPixelFormat fmt)
  209. {
  210. size_t data_sz;
  211. int i;
  212. const AVPixFmtDescriptor *desc = av_pix_fmt_desc_get(fmt);
  213. s->width = w;
  214. s->height = h;
  215. s->stride = FFALIGN(w * sizeof(uint16_t), 32);
  216. data_sz = (size_t) s->stride * h;
  217. if (!(s->blur_data[0] = av_malloc(data_sz)) ||
  218. !(s->blur_data[1] = av_malloc(data_sz)) ||
  219. !(s->temp_data = av_malloc(data_sz))) {
  220. return AVERROR(ENOMEM);
  221. }
  222. for (i = 0; i < 5; i++) {
  223. s->filter[i] = lrint(FILTER_5[i] * (1 << BIT_SHIFT));
  224. }
  225. vmafmotiondsp_init(&s->vmafdsp, desc->comp[0].depth);
  226. return 0;
  227. }
  228. static int query_formats(AVFilterContext *ctx)
  229. {
  230. AVFilterFormats *fmts_list = NULL;
  231. int format, ret;
  232. for (format = 0; av_pix_fmt_desc_get(format); format++) {
  233. const AVPixFmtDescriptor *desc = av_pix_fmt_desc_get(format);
  234. if (!(desc->flags & (AV_PIX_FMT_FLAG_RGB | AV_PIX_FMT_FLAG_HWACCEL | AV_PIX_FMT_FLAG_BITSTREAM | AV_PIX_FMT_FLAG_PAL)) &&
  235. (desc->flags & AV_PIX_FMT_FLAG_PLANAR || desc->nb_components == 1) &&
  236. (!(desc->flags & AV_PIX_FMT_FLAG_BE) == !HAVE_BIGENDIAN || desc->comp[0].depth == 8) &&
  237. (desc->comp[0].depth == 8 || desc->comp[0].depth == 10) &&
  238. (ret = ff_add_format(&fmts_list, format)) < 0)
  239. return ret;
  240. }
  241. return ff_set_common_formats(ctx, fmts_list);
  242. }
  243. static int config_input_ref(AVFilterLink *inlink)
  244. {
  245. AVFilterContext *ctx = inlink->dst;
  246. VMAFMotionContext *s = ctx->priv;
  247. return ff_vmafmotion_init(&s->data, ctx->inputs[0]->w,
  248. ctx->inputs[0]->h, ctx->inputs[0]->format);
  249. }
  250. double ff_vmafmotion_uninit(VMAFMotionData *s)
  251. {
  252. av_free(s->blur_data[0]);
  253. av_free(s->blur_data[1]);
  254. av_free(s->temp_data);
  255. return s->nb_frames > 0 ? s->motion_sum / s->nb_frames : 0.0;
  256. }
  257. static int filter_frame(AVFilterLink *inlink, AVFrame *ref)
  258. {
  259. AVFilterContext *ctx = inlink->dst;
  260. do_vmafmotion(ctx, ref);
  261. return ff_filter_frame(ctx->outputs[0], ref);
  262. }
  263. static av_cold int init(AVFilterContext *ctx)
  264. {
  265. VMAFMotionContext *s = ctx->priv;
  266. if (s->stats_file_str) {
  267. if (!strcmp(s->stats_file_str, "-")) {
  268. s->stats_file = stdout;
  269. } else {
  270. s->stats_file = fopen(s->stats_file_str, "w");
  271. if (!s->stats_file) {
  272. int err = AVERROR(errno);
  273. char buf[128];
  274. av_strerror(err, buf, sizeof(buf));
  275. av_log(ctx, AV_LOG_ERROR, "Could not open stats file %s: %s\n",
  276. s->stats_file_str, buf);
  277. return err;
  278. }
  279. }
  280. }
  281. return 0;
  282. }
  283. static av_cold void uninit(AVFilterContext *ctx)
  284. {
  285. VMAFMotionContext *s = ctx->priv;
  286. double avg_motion = ff_vmafmotion_uninit(&s->data);
  287. if (s->data.nb_frames > 0) {
  288. av_log(ctx, AV_LOG_INFO, "VMAF Motion avg: %.3f\n", avg_motion);
  289. }
  290. if (s->stats_file && s->stats_file != stdout)
  291. fclose(s->stats_file);
  292. }
  293. static const AVFilterPad vmafmotion_inputs[] = {
  294. {
  295. .name = "reference",
  296. .type = AVMEDIA_TYPE_VIDEO,
  297. .filter_frame = filter_frame,
  298. .config_props = config_input_ref,
  299. },
  300. { NULL }
  301. };
  302. static const AVFilterPad vmafmotion_outputs[] = {
  303. {
  304. .name = "default",
  305. .type = AVMEDIA_TYPE_VIDEO,
  306. },
  307. { NULL }
  308. };
  309. AVFilter ff_vf_vmafmotion = {
  310. .name = "vmafmotion",
  311. .description = NULL_IF_CONFIG_SMALL("Calculate the VMAF Motion score."),
  312. .init = init,
  313. .uninit = uninit,
  314. .query_formats = query_formats,
  315. .priv_size = sizeof(VMAFMotionContext),
  316. .priv_class = &vmafmotion_class,
  317. .inputs = vmafmotion_inputs,
  318. .outputs = vmafmotion_outputs,
  319. };