You can not select more than 25 topics Topics must start with a letter or number, can include dashes ('-') and can be up to 35 characters long.

655 lines
19KB

  1. /*
  2. * Copyright (c) 2017 Ronald S. Bultje <rsbultje@gmail.com>
  3. * Copyright (c) 2017 Ashish Pratap Singh <ashk43712@gmail.com>
  4. * Copyright (c) 2021 Paul B Mahol
  5. *
  6. * This file is part of FFmpeg.
  7. *
  8. * FFmpeg is free software; you can redistribute it and/or
  9. * modify it under the terms of the GNU Lesser General Public
  10. * License as published by the Free Software Foundation; either
  11. * version 2.1 of the License, or (at your option) any later version.
  12. *
  13. * FFmpeg is distributed in the hope that it will be useful,
  14. * but WITHOUT ANY WARRANTY; without even the implied warranty of
  15. * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
  16. * Lesser General Public License for more details.
  17. *
  18. * You should have received a copy of the GNU Lesser General Public
  19. * License along with FFmpeg; if not, write to the Free Software
  20. * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
  21. */
  22. /**
  23. * @file
  24. * Calculate VIF between two input videos.
  25. */
  26. #include <float.h>
  27. #include "libavutil/avstring.h"
  28. #include "libavutil/opt.h"
  29. #include "libavutil/pixdesc.h"
  30. #include "avfilter.h"
  31. #include "framesync.h"
  32. #include "drawutils.h"
  33. #include "formats.h"
  34. #include "internal.h"
  35. #include "vif.h"
  36. #include "video.h"
  37. typedef struct VIFContext {
  38. const AVClass *class;
  39. FFFrameSync fs;
  40. const AVPixFmtDescriptor *desc;
  41. int width;
  42. int height;
  43. int nb_threads;
  44. float *data_buf[13];
  45. float **temp;
  46. float *ref_data;
  47. float *main_data;
  48. double vif_sum[4];
  49. double vif_min[4];
  50. double vif_max[4];
  51. uint64_t nb_frames;
  52. } VIFContext;
  53. #define OFFSET(x) offsetof(VIFContext, x)
  54. static const AVOption vif_options[] = {
  55. { NULL }
  56. };
  57. AVFILTER_DEFINE_CLASS(vif);
  58. static const uint8_t vif_filter1d_width1[4] = { 17, 9, 5, 3 };
  59. static const float vif_filter1d_table[4][17] =
  60. {
  61. {
  62. 0.00745626912, 0.0142655009, 0.0250313189, 0.0402820669, 0.0594526194,
  63. 0.0804751068, 0.0999041125, 0.113746084, 0.118773937, 0.113746084,
  64. 0.0999041125, 0.0804751068, 0.0594526194, 0.0402820669, 0.0250313189,
  65. 0.0142655009, 0.00745626912
  66. },
  67. {
  68. 0.0189780835, 0.0558981746, 0.120920904, 0.192116052, 0.224173605,
  69. 0.192116052, 0.120920904, 0.0558981746, 0.0189780835
  70. },
  71. {
  72. 0.054488685, 0.244201347, 0.402619958, 0.244201347, 0.054488685
  73. },
  74. {
  75. 0.166378498, 0.667243004, 0.166378498
  76. }
  77. };
  78. typedef struct ThreadData {
  79. const float *filter;
  80. const float *src;
  81. float *dst;
  82. int w, h;
  83. int src_stride;
  84. int dst_stride;
  85. int filter_width;
  86. float **temp;
  87. } ThreadData;
  88. static void vif_dec2(const float *src, float *dst, int w, int h,
  89. int src_stride, int dst_stride)
  90. {
  91. const int dst_px_stride = dst_stride / 2;
  92. for (int i = 0; i < h / 2; i++) {
  93. for (int j = 0; j < w / 2; j++)
  94. dst[i * dst_px_stride + j] = src[(i * 2) * src_stride + (j * 2)];
  95. }
  96. }
  97. static void vif_statistic(const float *mu1_sq, const float *mu2_sq,
  98. const float *mu1_mu2, const float *xx_filt,
  99. const float *yy_filt, const float *xy_filt,
  100. float *num, float *den, int w, int h)
  101. {
  102. static const float sigma_nsq = 2;
  103. static const float sigma_max_inv = 4.0/(255.0*255.0);
  104. float mu1_sq_val, mu2_sq_val, mu1_mu2_val, xx_filt_val, yy_filt_val, xy_filt_val;
  105. float sigma1_sq, sigma2_sq, sigma12, g, sv_sq, eps = 1.0e-10f;
  106. float gain_limit = 100.f;
  107. float num_val, den_val;
  108. float accum_num = 0.0f;
  109. float accum_den = 0.0f;
  110. for (int i = 0; i < h; i++) {
  111. float accum_inner_num = 0.f;
  112. float accum_inner_den = 0.f;
  113. for (int j = 0; j < w; j++) {
  114. mu1_sq_val = mu1_sq[i * w + j];
  115. mu2_sq_val = mu2_sq[i * w + j];
  116. mu1_mu2_val = mu1_mu2[i * w + j];
  117. xx_filt_val = xx_filt[i * w + j];
  118. yy_filt_val = yy_filt[i * w + j];
  119. xy_filt_val = xy_filt[i * w + j];
  120. sigma1_sq = xx_filt_val - mu1_sq_val;
  121. sigma2_sq = yy_filt_val - mu2_sq_val;
  122. sigma12 = xy_filt_val - mu1_mu2_val;
  123. sigma1_sq = FFMAX(sigma1_sq, 0.0f);
  124. sigma2_sq = FFMAX(sigma2_sq, 0.0f);
  125. sigma12 = FFMAX(sigma12, 0.0f);
  126. g = sigma12 / (sigma1_sq + eps);
  127. sv_sq = sigma2_sq - g * sigma12;
  128. if (sigma1_sq < eps) {
  129. g = 0.0f;
  130. sv_sq = sigma2_sq;
  131. sigma1_sq = 0.0f;
  132. }
  133. if (sigma2_sq < eps) {
  134. g = 0.0f;
  135. sv_sq = 0.0f;
  136. }
  137. if (g < 0.0f) {
  138. sv_sq = sigma2_sq;
  139. g = 0.0f;
  140. }
  141. sv_sq = FFMAX(sv_sq, eps);
  142. g = FFMIN(g, gain_limit);
  143. num_val = log2f(1.0f + g * g * sigma1_sq / (sv_sq + sigma_nsq));
  144. den_val = log2f(1.0f + sigma1_sq / sigma_nsq);
  145. if (sigma12 < 0.0f)
  146. num_val = 0.0f;
  147. if (sigma1_sq < sigma_nsq) {
  148. num_val = 1.0f - sigma2_sq * sigma_max_inv;
  149. den_val = 1.0f;
  150. }
  151. accum_inner_num += num_val;
  152. accum_inner_den += den_val;
  153. }
  154. accum_num += accum_inner_num;
  155. accum_den += accum_inner_den;
  156. }
  157. num[0] = accum_num;
  158. den[0] = accum_den;
  159. }
  160. static void vif_xx_yy_xy(const float *x, const float *y, float *xx, float *yy,
  161. float *xy, int w, int h)
  162. {
  163. for (int i = 0; i < h; i++) {
  164. for (int j = 0; j < w; j++) {
  165. float xval = x[j];
  166. float yval = y[j];
  167. float xxval = xval * xval;
  168. float yyval = yval * yval;
  169. float xyval = xval * yval;
  170. xx[j] = xxval;
  171. yy[j] = yyval;
  172. xy[j] = xyval;
  173. }
  174. xx += w;
  175. yy += w;
  176. xy += w;
  177. x += w;
  178. y += w;
  179. }
  180. }
  181. static int vif_filter1d(AVFilterContext *ctx, void *arg, int jobnr, int nb_jobs)
  182. {
  183. ThreadData *td = arg;
  184. const float *filter = td->filter;
  185. const float *src = td->src;
  186. float *dst = td->dst;
  187. int w = td->w;
  188. int h = td->h;
  189. int src_stride = td->src_stride;
  190. int dst_stride = td->dst_stride;
  191. int filt_w = td->filter_width;
  192. float *temp = td->temp[jobnr];
  193. const int slice_start = (h * jobnr) / nb_jobs;
  194. const int slice_end = (h * (jobnr+1)) / nb_jobs;
  195. for (int i = slice_start; i < slice_end; i++) {
  196. /** Vertical pass. */
  197. for (int j = 0; j < w; j++) {
  198. float sum = 0.f;
  199. if (i >= filt_w / 2 && i < h - filt_w / 2 - 1) {
  200. for (int filt_i = 0; filt_i < filt_w; filt_i++) {
  201. const float filt_coeff = filter[filt_i];
  202. float img_coeff;
  203. int ii = i - filt_w / 2 + filt_i;
  204. img_coeff = src[ii * src_stride + j];
  205. sum += filt_coeff * img_coeff;
  206. }
  207. } else {
  208. for (int filt_i = 0; filt_i < filt_w; filt_i++) {
  209. const float filt_coeff = filter[filt_i];
  210. int ii = i - filt_w / 2 + filt_i;
  211. float img_coeff;
  212. ii = ii < 0 ? -ii : (ii >= h ? 2 * h - ii - 1 : ii);
  213. img_coeff = src[ii * src_stride + j];
  214. sum += filt_coeff * img_coeff;
  215. }
  216. }
  217. temp[j] = sum;
  218. }
  219. /** Horizontal pass. */
  220. for (int j = 0; j < w; j++) {
  221. float sum = 0.f;
  222. if (j >= filt_w / 2 && j < w - filt_w / 2 - 1) {
  223. for (int filt_j = 0; filt_j < filt_w; filt_j++) {
  224. const float filt_coeff = filter[filt_j];
  225. int jj = j - filt_w / 2 + filt_j;
  226. float img_coeff;
  227. img_coeff = temp[jj];
  228. sum += filt_coeff * img_coeff;
  229. }
  230. } else {
  231. for (int filt_j = 0; filt_j < filt_w; filt_j++) {
  232. const float filt_coeff = filter[filt_j];
  233. int jj = j - filt_w / 2 + filt_j;
  234. float img_coeff;
  235. jj = jj < 0 ? -jj : (jj >= w ? 2 * w - jj - 1 : jj);
  236. img_coeff = temp[jj];
  237. sum += filt_coeff * img_coeff;
  238. }
  239. }
  240. dst[i * dst_stride + j] = sum;
  241. }
  242. }
  243. return 0;
  244. }
  245. int ff_compute_vif2(AVFilterContext *ctx,
  246. const float *ref, const float *main, int w, int h,
  247. int ref_stride, int main_stride, float *score,
  248. float *data_buf[14], float **temp,
  249. int gnb_threads)
  250. {
  251. ThreadData td;
  252. float *ref_scale = data_buf[0];
  253. float *main_scale = data_buf[1];
  254. float *ref_sq = data_buf[2];
  255. float *main_sq = data_buf[3];
  256. float *ref_main = data_buf[4];
  257. float *mu1 = data_buf[5];
  258. float *mu2 = data_buf[6];
  259. float *mu1_sq = data_buf[7];
  260. float *mu2_sq = data_buf[8];
  261. float *mu1_mu2 = data_buf[9];
  262. float *ref_sq_filt = data_buf[10];
  263. float *main_sq_filt = data_buf[11];
  264. float *ref_main_filt = data_buf[12];
  265. float *curr_ref_scale = (float *)ref;
  266. float *curr_main_scale = (float *)main;
  267. int curr_ref_stride = ref_stride;
  268. int curr_main_stride = main_stride;
  269. float num = 0.f;
  270. float den = 0.f;
  271. for (int scale = 0; scale < 4; scale++) {
  272. const float *filter = vif_filter1d_table[scale];
  273. int filter_width = vif_filter1d_width1[scale];
  274. const int nb_threads = FFMIN(h, gnb_threads);
  275. int buf_valid_w = w;
  276. int buf_valid_h = h;
  277. td.filter = filter;
  278. td.filter_width = filter_width;
  279. if (scale > 0) {
  280. td.src = curr_ref_scale;
  281. td.dst = mu1;
  282. td.w = w;
  283. td.h = h;
  284. td.src_stride = curr_ref_stride;
  285. td.dst_stride = w;
  286. td.temp = temp;
  287. ctx->internal->execute(ctx, vif_filter1d, &td, NULL, nb_threads);
  288. td.src = curr_main_scale;
  289. td.dst = mu2;
  290. td.src_stride = curr_main_stride;
  291. ctx->internal->execute(ctx, vif_filter1d, &td, NULL, nb_threads);
  292. vif_dec2(mu1, ref_scale, buf_valid_w, buf_valid_h, w, w);
  293. vif_dec2(mu2, main_scale, buf_valid_w, buf_valid_h, w, w);
  294. w = buf_valid_w / 2;
  295. h = buf_valid_h / 2;
  296. buf_valid_w = w;
  297. buf_valid_h = h;
  298. curr_ref_scale = ref_scale;
  299. curr_main_scale = main_scale;
  300. curr_ref_stride = w;
  301. curr_main_stride = w;
  302. }
  303. td.src = curr_ref_scale;
  304. td.dst = mu1;
  305. td.w = w;
  306. td.h = h;
  307. td.src_stride = curr_ref_stride;
  308. td.dst_stride = w;
  309. td.temp = temp;
  310. ctx->internal->execute(ctx, vif_filter1d, &td, NULL, nb_threads);
  311. td.src = curr_main_scale;
  312. td.dst = mu2;
  313. td.src_stride = curr_main_stride;
  314. ctx->internal->execute(ctx, vif_filter1d, &td, NULL, nb_threads);
  315. vif_xx_yy_xy(mu1, mu2, mu1_sq, mu2_sq, mu1_mu2, w, h);
  316. vif_xx_yy_xy(curr_ref_scale, curr_main_scale, ref_sq, main_sq, ref_main, w, h);
  317. td.src = ref_sq;
  318. td.dst = ref_sq_filt;
  319. td.src_stride = w;
  320. ctx->internal->execute(ctx, vif_filter1d, &td, NULL, nb_threads);
  321. td.src = main_sq;
  322. td.dst = main_sq_filt;
  323. td.src_stride = w;
  324. ctx->internal->execute(ctx, vif_filter1d, &td, NULL, nb_threads);
  325. td.src = ref_main;
  326. td.dst = ref_main_filt;
  327. ctx->internal->execute(ctx, vif_filter1d, &td, NULL, nb_threads);
  328. vif_statistic(mu1_sq, mu2_sq, mu1_mu2, ref_sq_filt, main_sq_filt,
  329. ref_main_filt, &num, &den, w, h);
  330. score[scale] = den <= FLT_EPSILON ? 1.f : num / den;
  331. }
  332. return 0;
  333. }
  334. #define offset_fn(type, bits) \
  335. static void offset_##bits##bit(VIFContext *s, \
  336. const AVFrame *ref, \
  337. AVFrame *main, int stride)\
  338. { \
  339. int w = s->width; \
  340. int h = s->height; \
  341. \
  342. int ref_stride = ref->linesize[0]; \
  343. int main_stride = main->linesize[0]; \
  344. \
  345. const type *ref_ptr = (const type *) ref->data[0]; \
  346. const type *main_ptr = (const type *) main->data[0]; \
  347. \
  348. float *ref_ptr_data = s->ref_data; \
  349. float *main_ptr_data = s->main_data; \
  350. \
  351. for (int i = 0; i < h; i++) { \
  352. for (int j = 0; j < w; j++) { \
  353. ref_ptr_data[j] = ref_ptr[j] - 128.f; \
  354. main_ptr_data[j] = main_ptr[j] - 128.f; \
  355. } \
  356. ref_ptr += ref_stride / sizeof(type); \
  357. ref_ptr_data += w; \
  358. main_ptr += main_stride / sizeof(type); \
  359. main_ptr_data += w; \
  360. } \
  361. }
  362. offset_fn(uint8_t, 8)
  363. offset_fn(uint16_t, 10)
  364. static void set_meta(AVDictionary **metadata, const char *key, float d)
  365. {
  366. char value[257];
  367. snprintf(value, sizeof(value), "%f", d);
  368. av_dict_set(metadata, key, value, 0);
  369. }
  370. static AVFrame *do_vif(AVFilterContext *ctx, AVFrame *main, const AVFrame *ref)
  371. {
  372. VIFContext *s = ctx->priv;
  373. AVDictionary **metadata = &main->metadata;
  374. float score[4];
  375. if (s->desc->comp[0].depth <= 8) {
  376. offset_8bit(s, ref, main, s->width);
  377. } else {
  378. offset_10bit(s, ref, main, s->width);
  379. }
  380. ff_compute_vif2(ctx,
  381. s->ref_data, s->main_data, s->width,
  382. s->height, s->width, s->width,
  383. score, s->data_buf, s->temp,
  384. s->nb_threads);
  385. set_meta(metadata, "lavfi.vif.scale.0", score[0]);
  386. set_meta(metadata, "lavfi.vif.scale.1", score[1]);
  387. set_meta(metadata, "lavfi.vif.scale.2", score[2]);
  388. set_meta(metadata, "lavfi.vif.scale.3", score[3]);
  389. for (int i = 0; i < 4; i++) {
  390. s->vif_min[i] = FFMIN(s->vif_min[i], score[i]);
  391. s->vif_max[i] = FFMAX(s->vif_max[i], score[i]);
  392. s->vif_sum[i] += score[i];
  393. }
  394. s->nb_frames++;
  395. return main;
  396. }
  397. static int query_formats(AVFilterContext *ctx)
  398. {
  399. static const enum AVPixelFormat pix_fmts[] = {
  400. AV_PIX_FMT_YUV444P, AV_PIX_FMT_YUV422P, AV_PIX_FMT_YUV420P,
  401. AV_PIX_FMT_YUV444P10LE, AV_PIX_FMT_YUV422P10LE, AV_PIX_FMT_YUV420P10LE,
  402. AV_PIX_FMT_NONE
  403. };
  404. AVFilterFormats *fmts_list = ff_make_format_list(pix_fmts);
  405. if (!fmts_list)
  406. return AVERROR(ENOMEM);
  407. return ff_set_common_formats(ctx, fmts_list);
  408. }
  409. static int config_input_ref(AVFilterLink *inlink)
  410. {
  411. AVFilterContext *ctx = inlink->dst;
  412. VIFContext *s = ctx->priv;
  413. if (ctx->inputs[0]->w != ctx->inputs[1]->w ||
  414. ctx->inputs[0]->h != ctx->inputs[1]->h) {
  415. av_log(ctx, AV_LOG_ERROR, "Width and height of input videos must be same.\n");
  416. return AVERROR(EINVAL);
  417. }
  418. if (ctx->inputs[0]->format != ctx->inputs[1]->format) {
  419. av_log(ctx, AV_LOG_ERROR, "Inputs must be of same pixel format.\n");
  420. return AVERROR(EINVAL);
  421. }
  422. s->desc = av_pix_fmt_desc_get(inlink->format);
  423. s->width = ctx->inputs[0]->w;
  424. s->height = ctx->inputs[0]->h;
  425. s->nb_threads = ff_filter_get_nb_threads(ctx);
  426. for (int i = 0; i < 4; i++) {
  427. s->vif_min[i] = DBL_MAX;
  428. s->vif_max[i] = -DBL_MAX;
  429. }
  430. for (int i = 0; i < 13; i++) {
  431. if (!(s->data_buf[i] = av_calloc(s->width, s->height * sizeof(float))))
  432. return AVERROR(ENOMEM);
  433. }
  434. if (!(s->ref_data = av_calloc(s->width, s->height * sizeof(float))))
  435. return AVERROR(ENOMEM);
  436. if (!(s->main_data = av_calloc(s->width, s->height * sizeof(float))))
  437. return AVERROR(ENOMEM);
  438. if (!(s->temp = av_calloc(s->nb_threads, sizeof(s->temp[0]))))
  439. return AVERROR(ENOMEM);
  440. for (int i = 0; i < s->nb_threads; i++) {
  441. if (!(s->temp[i] = av_calloc(s->width, sizeof(float))))
  442. return AVERROR(ENOMEM);
  443. }
  444. return 0;
  445. }
  446. static int process_frame(FFFrameSync *fs)
  447. {
  448. AVFilterContext *ctx = fs->parent;
  449. VIFContext *s = fs->opaque;
  450. AVFilterLink *outlink = ctx->outputs[0];
  451. AVFrame *out, *main = NULL, *ref = NULL;
  452. int ret;
  453. ret = ff_framesync_dualinput_get(fs, &main, &ref);
  454. if (ret < 0)
  455. return ret;
  456. if (ctx->is_disabled || !ref) {
  457. out = main;
  458. } else {
  459. out = do_vif(ctx, main, ref);
  460. }
  461. out->pts = av_rescale_q(s->fs.pts, s->fs.time_base, outlink->time_base);
  462. return ff_filter_frame(outlink, out);
  463. }
  464. static int config_output(AVFilterLink *outlink)
  465. {
  466. AVFilterContext *ctx = outlink->src;
  467. VIFContext *s = ctx->priv;
  468. AVFilterLink *mainlink = ctx->inputs[0];
  469. FFFrameSyncIn *in;
  470. int ret;
  471. outlink->w = mainlink->w;
  472. outlink->h = mainlink->h;
  473. outlink->time_base = mainlink->time_base;
  474. outlink->sample_aspect_ratio = mainlink->sample_aspect_ratio;
  475. outlink->frame_rate = mainlink->frame_rate;
  476. if ((ret = ff_framesync_init(&s->fs, ctx, 2)) < 0)
  477. return ret;
  478. in = s->fs.in;
  479. in[0].time_base = mainlink->time_base;
  480. in[1].time_base = ctx->inputs[1]->time_base;
  481. in[0].sync = 2;
  482. in[0].before = EXT_STOP;
  483. in[0].after = EXT_STOP;
  484. in[1].sync = 1;
  485. in[1].before = EXT_STOP;
  486. in[1].after = EXT_STOP;
  487. s->fs.opaque = s;
  488. s->fs.on_event = process_frame;
  489. return ff_framesync_configure(&s->fs);
  490. }
  491. static int activate(AVFilterContext *ctx)
  492. {
  493. VIFContext *s = ctx->priv;
  494. return ff_framesync_activate(&s->fs);
  495. }
  496. static av_cold void uninit(AVFilterContext *ctx)
  497. {
  498. VIFContext *s = ctx->priv;
  499. if (s->nb_frames > 0) {
  500. for (int i = 0; i < 4; i++)
  501. av_log(ctx, AV_LOG_INFO, "VIF scale=%d average:%f min:%f: max:%f\n",
  502. i, s->vif_sum[i] / s->nb_frames, s->vif_min[i], s->vif_max[i]);
  503. }
  504. for (int i = 0; i < 13; i++)
  505. av_freep(&s->data_buf[i]);
  506. av_freep(&s->ref_data);
  507. av_freep(&s->main_data);
  508. for (int i = 0; i < s->nb_threads && s->temp; i++)
  509. av_freep(&s->temp[i]);
  510. av_freep(&s->temp);
  511. ff_framesync_uninit(&s->fs);
  512. }
  513. static const AVFilterPad vif_inputs[] = {
  514. {
  515. .name = "main",
  516. .type = AVMEDIA_TYPE_VIDEO,
  517. },{
  518. .name = "reference",
  519. .type = AVMEDIA_TYPE_VIDEO,
  520. .config_props = config_input_ref,
  521. },
  522. { NULL }
  523. };
  524. static const AVFilterPad vif_outputs[] = {
  525. {
  526. .name = "default",
  527. .type = AVMEDIA_TYPE_VIDEO,
  528. .config_props = config_output,
  529. },
  530. { NULL }
  531. };
  532. AVFilter ff_vf_vif = {
  533. .name = "vif",
  534. .description = NULL_IF_CONFIG_SMALL("Calculate the VIF between two video streams."),
  535. .uninit = uninit,
  536. .query_formats = query_formats,
  537. .priv_size = sizeof(VIFContext),
  538. .priv_class = &vif_class,
  539. .activate = activate,
  540. .inputs = vif_inputs,
  541. .outputs = vif_outputs,
  542. .flags = AVFILTER_FLAG_SUPPORT_TIMELINE_INTERNAL | AVFILTER_FLAG_SLICE_THREADS,
  543. };