You can not select more than 25 topics Topics must start with a letter or number, can include dashes ('-') and can be up to 35 characters long.

1216 lines
39KB

  1. /*
  2. * Copyright (C) 2010-2011 Kevin Stone
  3. * Copyright (C) 2016 Paul B Mahol
  4. *
  5. * This file is part of FFmpeg.
  6. *
  7. * FFmpeg is free software; you can redistribute it and/or modify
  8. * it under the terms of the GNU General Public License as published by
  9. * the Free Software Foundation; either version 2 of the License, or
  10. * (at your option) any later version.
  11. *
  12. * FFmpeg is distributed in the hope that it will be useful,
  13. * but WITHOUT ANY WARRANTY; without even the implied warranty of
  14. * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
  15. * GNU General Public License for more details.
  16. *
  17. * You should have received a copy of the GNU General Public License along
  18. * with FFmpeg; if not, write to the Free Software Foundation, Inc.,
  19. * 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA.
  20. */
  21. #include <float.h>
  22. #include "libavutil/common.h"
  23. #include "libavutil/float_dsp.h"
  24. #include "libavutil/imgutils.h"
  25. #include "libavutil/opt.h"
  26. #include "libavutil/pixdesc.h"
  27. #include "avfilter.h"
  28. #include "formats.h"
  29. #include "internal.h"
  30. #include "video.h"
  31. typedef struct FrameData {
  32. uint8_t *paddedp[3];
  33. int padded_stride[3];
  34. int padded_width[3];
  35. int padded_height[3];
  36. uint8_t *dstp[3];
  37. int dst_stride[3];
  38. int field[3];
  39. int32_t *lcount[3];
  40. float *input;
  41. float *temp;
  42. } FrameData;
  43. typedef struct NNEDIContext {
  44. const AVClass *class;
  45. char *weights_file;
  46. AVFrame *src;
  47. AVFrame *second;
  48. AVFrame *dst;
  49. int eof;
  50. int64_t cur_pts;
  51. AVFloatDSPContext *fdsp;
  52. int nb_planes;
  53. int linesize[4];
  54. int planeheight[4];
  55. float *weights0;
  56. float *weights1[2];
  57. int asize;
  58. int nns;
  59. int xdia;
  60. int ydia;
  61. // Parameters
  62. int deint;
  63. int field;
  64. int process_plane;
  65. int nsize;
  66. int nnsparam;
  67. int qual;
  68. int etype;
  69. int pscrn;
  70. int fapprox;
  71. int max_value;
  72. void (*copy_pad)(const AVFrame *, FrameData *, struct NNEDIContext *, int);
  73. void (*evalfunc_0)(struct NNEDIContext *, FrameData *);
  74. void (*evalfunc_1)(struct NNEDIContext *, FrameData *);
  75. // Functions used in evalfunc_0
  76. void (*readpixels)(const uint8_t *, const int, float *);
  77. void (*compute_network0)(struct NNEDIContext *s, const float *, const float *, uint8_t *);
  78. int32_t (*process_line0)(const uint8_t *, int, uint8_t *, const uint8_t *, const int, const int, const int);
  79. // Functions used in evalfunc_1
  80. void (*extract)(const uint8_t *, const int, const int, const int, float *, float *);
  81. void (*dot_prod)(struct NNEDIContext *, const float *, const float *, float *, const int, const int, const float *);
  82. void (*expfunc)(float *, const int);
  83. void (*wae5)(const float *, const int, float *);
  84. FrameData frame_data;
  85. } NNEDIContext;
  86. #define OFFSET(x) offsetof(NNEDIContext, x)
  87. #define FLAGS AV_OPT_FLAG_VIDEO_PARAM|AV_OPT_FLAG_FILTERING_PARAM
  88. static const AVOption nnedi_options[] = {
  89. {"weights", "set weights file", OFFSET(weights_file), AV_OPT_TYPE_STRING, {.str="nnedi3_weights.bin"}, 0, 0, FLAGS },
  90. {"deint", "set which frames to deinterlace", OFFSET(deint), AV_OPT_TYPE_INT, {.i64=0}, 0, 1, FLAGS, "deint" },
  91. {"all", "deinterlace all frames", 0, AV_OPT_TYPE_CONST, {.i64=0}, 0, 0, FLAGS, "deint" },
  92. {"interlaced", "only deinterlace frames marked as interlaced", 0, AV_OPT_TYPE_CONST, {.i64=1}, 0, 0, FLAGS, "deint" },
  93. {"field", "set mode of operation", OFFSET(field), AV_OPT_TYPE_INT, {.i64=-1}, -2, 3, FLAGS, "field" },
  94. {"af", "use frame flags, both fields", 0, AV_OPT_TYPE_CONST, {.i64=-2}, 0, 0, FLAGS, "field" },
  95. {"a", "use frame flags, single field", 0, AV_OPT_TYPE_CONST, {.i64=-1}, 0, 0, FLAGS, "field" },
  96. {"t", "use top field only", 0, AV_OPT_TYPE_CONST, {.i64=0}, 0, 0, FLAGS, "field" },
  97. {"b", "use bottom field only", 0, AV_OPT_TYPE_CONST, {.i64=1}, 0, 0, FLAGS, "field" },
  98. {"tf", "use both fields, top first", 0, AV_OPT_TYPE_CONST, {.i64=2}, 0, 0, FLAGS, "field" },
  99. {"bf", "use both fields, bottom first", 0, AV_OPT_TYPE_CONST, {.i64=3}, 0, 0, FLAGS, "field" },
  100. {"planes", "set which planes to process", OFFSET(process_plane), AV_OPT_TYPE_INT, {.i64=7}, 0, 7, FLAGS },
  101. {"nsize", "set size of local neighborhood around each pixel, used by the predictor neural network", OFFSET(nsize), AV_OPT_TYPE_INT, {.i64=6}, 0, 6, FLAGS, "nsize" },
  102. {"s8x6", NULL, 0, AV_OPT_TYPE_CONST, {.i64=0}, 0, 0, FLAGS, "nsize" },
  103. {"s16x6", NULL, 0, AV_OPT_TYPE_CONST, {.i64=1}, 0, 0, FLAGS, "nsize" },
  104. {"s32x6", NULL, 0, AV_OPT_TYPE_CONST, {.i64=2}, 0, 0, FLAGS, "nsize" },
  105. {"s48x6", NULL, 0, AV_OPT_TYPE_CONST, {.i64=3}, 0, 0, FLAGS, "nsize" },
  106. {"s8x4", NULL, 0, AV_OPT_TYPE_CONST, {.i64=4}, 0, 0, FLAGS, "nsize" },
  107. {"s16x4", NULL, 0, AV_OPT_TYPE_CONST, {.i64=5}, 0, 0, FLAGS, "nsize" },
  108. {"s32x4", NULL, 0, AV_OPT_TYPE_CONST, {.i64=6}, 0, 0, FLAGS, "nsize" },
  109. {"nns", "set number of neurons in predictor neural network", OFFSET(nnsparam), AV_OPT_TYPE_INT, {.i64=1}, 0, 4, FLAGS, "nns" },
  110. {"n16", NULL, 0, AV_OPT_TYPE_CONST, {.i64=0}, 0, 0, FLAGS, "nns" },
  111. {"n32", NULL, 0, AV_OPT_TYPE_CONST, {.i64=1}, 0, 0, FLAGS, "nns" },
  112. {"n64", NULL, 0, AV_OPT_TYPE_CONST, {.i64=2}, 0, 0, FLAGS, "nns" },
  113. {"n128", NULL, 0, AV_OPT_TYPE_CONST, {.i64=3}, 0, 0, FLAGS, "nns" },
  114. {"n256", NULL, 0, AV_OPT_TYPE_CONST, {.i64=4}, 0, 0, FLAGS, "nns" },
  115. {"qual", "set quality", OFFSET(qual), AV_OPT_TYPE_INT, {.i64=1}, 1, 2, FLAGS, "qual" },
  116. {"fast", NULL, 0, AV_OPT_TYPE_CONST, {.i64=1}, 0, 0, FLAGS, "qual" },
  117. {"slow", NULL, 0, AV_OPT_TYPE_CONST, {.i64=2}, 0, 0, FLAGS, "qual" },
  118. {"etype", "set which set of weights to use in the predictor", OFFSET(etype), AV_OPT_TYPE_INT, {.i64=0}, 0, 1, FLAGS, "etype" },
  119. {"a", "weights trained to minimize absolute error", 0, AV_OPT_TYPE_CONST, {.i64=0}, 0, 0, FLAGS, "etype" },
  120. {"s", "weights trained to minimize squared error", 0, AV_OPT_TYPE_CONST, {.i64=1}, 0, 0, FLAGS, "etype" },
  121. {"pscrn", "set prescreening", OFFSET(pscrn), AV_OPT_TYPE_INT, {.i64=2}, 0, 2, FLAGS, "pscrn" },
  122. {"none", NULL, 0, AV_OPT_TYPE_CONST, {.i64=0}, 0, 0, FLAGS, "pscrn" },
  123. {"original", NULL, 0, AV_OPT_TYPE_CONST, {.i64=1}, 0, 0, FLAGS, "pscrn" },
  124. {"new", NULL, 0, AV_OPT_TYPE_CONST, {.i64=2}, 0, 0, FLAGS, "pscrn" },
  125. {"fapprox", NULL, OFFSET(fapprox), AV_OPT_TYPE_INT, {.i64=0}, 0, 3, FLAGS },
  126. { NULL }
  127. };
  128. AVFILTER_DEFINE_CLASS(nnedi);
  129. static int config_input(AVFilterLink *inlink)
  130. {
  131. AVFilterContext *ctx = inlink->dst;
  132. NNEDIContext *s = ctx->priv;
  133. const AVPixFmtDescriptor *desc = av_pix_fmt_desc_get(inlink->format);
  134. int ret;
  135. s->nb_planes = av_pix_fmt_count_planes(inlink->format);
  136. if ((ret = av_image_fill_linesizes(s->linesize, inlink->format, inlink->w)) < 0)
  137. return ret;
  138. s->planeheight[1] = s->planeheight[2] = AV_CEIL_RSHIFT(inlink->h, desc->log2_chroma_h);
  139. s->planeheight[0] = s->planeheight[3] = inlink->h;
  140. return 0;
  141. }
  142. static int config_output(AVFilterLink *outlink)
  143. {
  144. AVFilterContext *ctx = outlink->src;
  145. NNEDIContext *s = ctx->priv;
  146. outlink->time_base.num = ctx->inputs[0]->time_base.num;
  147. outlink->time_base.den = ctx->inputs[0]->time_base.den * 2;
  148. outlink->w = ctx->inputs[0]->w;
  149. outlink->h = ctx->inputs[0]->h;
  150. if (s->field > 1 || s->field == -2)
  151. outlink->frame_rate = av_mul_q(ctx->inputs[0]->frame_rate,
  152. (AVRational){2, 1});
  153. return 0;
  154. }
  155. static int query_formats(AVFilterContext *ctx)
  156. {
  157. static const enum AVPixelFormat pix_fmts[] = {
  158. AV_PIX_FMT_YUV410P, AV_PIX_FMT_YUV411P,
  159. AV_PIX_FMT_YUV420P, AV_PIX_FMT_YUV422P,
  160. AV_PIX_FMT_YUV440P, AV_PIX_FMT_YUV444P,
  161. AV_PIX_FMT_YUVJ444P, AV_PIX_FMT_YUVJ440P,
  162. AV_PIX_FMT_YUVJ422P, AV_PIX_FMT_YUVJ420P,
  163. AV_PIX_FMT_YUVJ411P,
  164. AV_PIX_FMT_GBRP,
  165. AV_PIX_FMT_GRAY8,
  166. AV_PIX_FMT_NONE
  167. };
  168. AVFilterFormats *fmts_list = ff_make_format_list(pix_fmts);
  169. if (!fmts_list)
  170. return AVERROR(ENOMEM);
  171. return ff_set_common_formats(ctx, fmts_list);
  172. }
  173. static void copy_pad(const AVFrame *src, FrameData *frame_data, NNEDIContext *s, int fn)
  174. {
  175. const int off = 1 - fn;
  176. int plane, y, x;
  177. for (plane = 0; plane < s->nb_planes; plane++) {
  178. const uint8_t *srcp = (const uint8_t *)src->data[plane];
  179. uint8_t *dstp = (uint8_t *)frame_data->paddedp[plane];
  180. const int src_stride = src->linesize[plane];
  181. const int dst_stride = frame_data->padded_stride[plane];
  182. const int src_height = s->planeheight[plane];
  183. const int dst_height = frame_data->padded_height[plane];
  184. const int src_width = s->linesize[plane];
  185. const int dst_width = frame_data->padded_width[plane];
  186. int c = 4;
  187. if (!(s->process_plane & (1 << plane)))
  188. continue;
  189. // Copy.
  190. for (y = off; y < src_height; y += 2)
  191. memcpy(dstp + 32 + (6 + y) * dst_stride,
  192. srcp + y * src_stride,
  193. src_width * sizeof(uint8_t));
  194. // And pad.
  195. dstp += (6 + off) * dst_stride;
  196. for (y = 6 + off; y < dst_height - 6; y += 2) {
  197. int c = 2;
  198. for (x = 0; x < 32; x++)
  199. dstp[x] = dstp[64 - x];
  200. for (x = dst_width - 32; x < dst_width; x++, c += 2)
  201. dstp[x] = dstp[x - c];
  202. dstp += dst_stride * 2;
  203. }
  204. dstp = (uint8_t *)frame_data->paddedp[plane];
  205. for (y = off; y < 6; y += 2)
  206. memcpy(dstp + y * dst_stride,
  207. dstp + (12 + 2 * off - y) * dst_stride,
  208. dst_width * sizeof(uint8_t));
  209. for (y = dst_height - 6 + off; y < dst_height; y += 2, c += 4)
  210. memcpy(dstp + y * dst_stride,
  211. dstp + (y - c) * dst_stride,
  212. dst_width * sizeof(uint8_t));
  213. }
  214. }
  215. static void elliott(float *data, const int n)
  216. {
  217. int i;
  218. for (i = 0; i < n; i++)
  219. data[i] = data[i] / (1.0f + FFABS(data[i]));
  220. }
  221. static void dot_prod(NNEDIContext *s, const float *data, const float *weights, float *vals, const int n, const int len, const float *scale)
  222. {
  223. int i;
  224. for (i = 0; i < n; i++) {
  225. float sum;
  226. sum = s->fdsp->scalarproduct_float(data, &weights[i * len], len);
  227. vals[i] = sum * scale[0] + weights[n * len + i];
  228. }
  229. }
  230. static void dot_prods(NNEDIContext *s, const float *dataf, const float *weightsf, float *vals, const int n, const int len, const float *scale)
  231. {
  232. const int16_t *data = (int16_t *)dataf;
  233. const int16_t *weights = (int16_t *)weightsf;
  234. const float *wf = (float *)&weights[n * len];
  235. int i, j;
  236. for (i = 0; i < n; i++) {
  237. int sum = 0, off = ((i >> 2) << 3) + (i & 3);
  238. for (j = 0; j < len; j++)
  239. sum += data[j] * weights[i * len + j];
  240. vals[i] = sum * wf[off] * scale[0] + wf[off + 4];
  241. }
  242. }
  243. static void compute_network0(NNEDIContext *s, const float *input, const float *weights, uint8_t *d)
  244. {
  245. float t, temp[12], scale = 1.0f;
  246. dot_prod(s, input, weights, temp, 4, 48, &scale);
  247. t = temp[0];
  248. elliott(temp, 4);
  249. temp[0] = t;
  250. dot_prod(s, temp, weights + 4 * 49, temp + 4, 4, 4, &scale);
  251. elliott(temp + 4, 4);
  252. dot_prod(s, temp, weights + 4 * 49 + 4 * 5, temp + 8, 4, 8, &scale);
  253. if (FFMAX(temp[10], temp[11]) <= FFMAX(temp[8], temp[9]))
  254. d[0] = 1;
  255. else
  256. d[0] = 0;
  257. }
  258. static void compute_network0_i16(NNEDIContext *s, const float *inputf, const float *weightsf, uint8_t *d)
  259. {
  260. const float *wf = weightsf + 2 * 48;
  261. float t, temp[12], scale = 1.0f;
  262. dot_prods(s, inputf, weightsf, temp, 4, 48, &scale);
  263. t = temp[0];
  264. elliott(temp, 4);
  265. temp[0] = t;
  266. dot_prod(s, temp, wf + 8, temp + 4, 4, 4, &scale);
  267. elliott(temp + 4, 4);
  268. dot_prod(s, temp, wf + 8 + 4 * 5, temp + 8, 4, 8, &scale);
  269. if (FFMAX(temp[10], temp[11]) <= FFMAX(temp[8], temp[9]))
  270. d[0] = 1;
  271. else
  272. d[0] = 0;
  273. }
  274. static void pixel2float48(const uint8_t *t8, const int pitch, float *p)
  275. {
  276. const uint8_t *t = (const uint8_t *)t8;
  277. int y, x;
  278. for (y = 0; y < 4; y++)
  279. for (x = 0; x < 12; x++)
  280. p[y * 12 + x] = t[y * pitch * 2 + x];
  281. }
  282. static void byte2word48(const uint8_t *t, const int pitch, float *pf)
  283. {
  284. int16_t *p = (int16_t *)pf;
  285. int y, x;
  286. for (y = 0; y < 4; y++)
  287. for (x = 0; x < 12; x++)
  288. p[y * 12 + x] = t[y * pitch * 2 + x];
  289. }
  290. static int32_t process_line0(const uint8_t *tempu, int width, uint8_t *dstp8, const uint8_t *src3p8, const int src_pitch, const int max_value, const int chroma)
  291. {
  292. uint8_t *dstp = (uint8_t *)dstp8;
  293. const uint8_t *src3p = (const uint8_t *)src3p8;
  294. int minimum = 0;
  295. int maximum = max_value - 1; // Technically the -1 is only needed for 8 and 16 bit input.
  296. int count = 0, x;
  297. for (x = 0; x < width; x++) {
  298. if (tempu[x]) {
  299. int tmp = 19 * (src3p[x + src_pitch * 2] + src3p[x + src_pitch * 4]) - 3 * (src3p[x] + src3p[x + src_pitch * 6]);
  300. tmp /= 32;
  301. dstp[x] = FFMAX(FFMIN(tmp, maximum), minimum);
  302. } else {
  303. memset(dstp + x, 255, sizeof(uint8_t));
  304. count++;
  305. }
  306. }
  307. return count;
  308. }
  309. // new prescreener functions
  310. static void byte2word64(const uint8_t *t, const int pitch, float *p)
  311. {
  312. int16_t *ps = (int16_t *)p;
  313. int y, x;
  314. for (y = 0; y < 4; y++)
  315. for (x = 0; x < 16; x++)
  316. ps[y * 16 + x] = t[y * pitch * 2 + x];
  317. }
  318. static void compute_network0new(NNEDIContext *s, const float *datai, const float *weights, uint8_t *d)
  319. {
  320. int16_t *data = (int16_t *)datai;
  321. int16_t *ws = (int16_t *)weights;
  322. float *wf = (float *)&ws[4 * 64];
  323. float vals[8];
  324. int mask, i, j;
  325. for (i = 0; i < 4; i++) {
  326. int sum = 0;
  327. float t;
  328. for (j = 0; j < 64; j++)
  329. sum += data[j] * ws[(i << 3) + ((j >> 3) << 5) + (j & 7)];
  330. t = sum * wf[i] + wf[4 + i];
  331. vals[i] = t / (1.0f + FFABS(t));
  332. }
  333. for (i = 0; i < 4; i++) {
  334. float sum = 0.0f;
  335. for (j = 0; j < 4; j++)
  336. sum += vals[j] * wf[8 + i + (j << 2)];
  337. vals[4 + i] = sum + wf[8 + 16 + i];
  338. }
  339. mask = 0;
  340. for (i = 0; i < 4; i++) {
  341. if (vals[4 + i] > 0.0f)
  342. mask |= (0x1 << (i << 3));
  343. }
  344. ((int *)d)[0] = mask;
  345. }
  346. static void evalfunc_0(NNEDIContext *s, FrameData *frame_data)
  347. {
  348. float *input = frame_data->input;
  349. const float *weights0 = s->weights0;
  350. float *temp = frame_data->temp;
  351. uint8_t *tempu = (uint8_t *)temp;
  352. int plane, x, y;
  353. // And now the actual work.
  354. for (plane = 0; plane < s->nb_planes; plane++) {
  355. const uint8_t *srcp = (const uint8_t *)frame_data->paddedp[plane];
  356. const int src_stride = frame_data->padded_stride[plane] / sizeof(uint8_t);
  357. const int width = frame_data->padded_width[plane];
  358. const int height = frame_data->padded_height[plane];
  359. uint8_t *dstp = (uint8_t *)frame_data->dstp[plane];
  360. const int dst_stride = frame_data->dst_stride[plane] / sizeof(uint8_t);
  361. const uint8_t *src3p;
  362. int ystart, ystop;
  363. int32_t *lcount;
  364. if (!(s->process_plane & (1 << plane)))
  365. continue;
  366. for (y = 1 - frame_data->field[plane]; y < height - 12; y += 2) {
  367. memcpy(dstp + y * dst_stride,
  368. srcp + 32 + (6 + y) * src_stride,
  369. (width - 64) * sizeof(uint8_t));
  370. }
  371. ystart = 6 + frame_data->field[plane];
  372. ystop = height - 6;
  373. srcp += ystart * src_stride;
  374. dstp += (ystart - 6) * dst_stride - 32;
  375. src3p = srcp - src_stride * 3;
  376. lcount = frame_data->lcount[plane] - 6;
  377. if (s->pscrn == 1) { // original
  378. for (y = ystart; y < ystop; y += 2) {
  379. for (x = 32; x < width - 32; x++) {
  380. s->readpixels((const uint8_t *)(src3p + x - 5), src_stride, input);
  381. s->compute_network0(s, input, weights0, tempu+x);
  382. }
  383. lcount[y] += s->process_line0(tempu + 32, width - 64, (uint8_t *)(dstp + 32), (const uint8_t *)(src3p + 32), src_stride, s->max_value, plane);
  384. src3p += src_stride * 2;
  385. dstp += dst_stride * 2;
  386. }
  387. } else if (s->pscrn > 1) { // new
  388. for (y = ystart; y < ystop; y += 2) {
  389. for (x = 32; x < width - 32; x += 4) {
  390. s->readpixels((const uint8_t *)(src3p + x - 6), src_stride, input);
  391. s->compute_network0(s, input, weights0, tempu + x);
  392. }
  393. lcount[y] += s->process_line0(tempu + 32, width - 64, (uint8_t *)(dstp + 32), (const uint8_t *)(src3p + 32), src_stride, s->max_value, plane);
  394. src3p += src_stride * 2;
  395. dstp += dst_stride * 2;
  396. }
  397. } else { // no prescreening
  398. for (y = ystart; y < ystop; y += 2) {
  399. memset(dstp + 32, 255, (width - 64) * sizeof(uint8_t));
  400. lcount[y] += width - 64;
  401. dstp += dst_stride * 2;
  402. }
  403. }
  404. }
  405. }
  406. static void extract_m8(const uint8_t *srcp8, const int stride, const int xdia, const int ydia, float *mstd, float *input)
  407. {
  408. // uint8_t or uint16_t or float
  409. const uint8_t *srcp = (const uint8_t *)srcp8;
  410. float scale;
  411. double tmp;
  412. // int32_t or int64_t or double
  413. int64_t sum = 0, sumsq = 0;
  414. int y, x;
  415. for (y = 0; y < ydia; y++) {
  416. const uint8_t *srcpT = srcp + y * stride * 2;
  417. for (x = 0; x < xdia; x++) {
  418. sum += srcpT[x];
  419. sumsq += (uint32_t)srcpT[x] * (uint32_t)srcpT[x];
  420. input[x] = srcpT[x];
  421. }
  422. input += xdia;
  423. }
  424. scale = 1.0f / (xdia * ydia);
  425. mstd[0] = sum * scale;
  426. tmp = (double)sumsq * scale - (double)mstd[0] * mstd[0];
  427. mstd[3] = 0.0f;
  428. if (tmp <= FLT_EPSILON)
  429. mstd[1] = mstd[2] = 0.0f;
  430. else {
  431. mstd[1] = sqrt(tmp);
  432. mstd[2] = 1.0f / mstd[1];
  433. }
  434. }
  435. static void extract_m8_i16(const uint8_t *srcp, const int stride, const int xdia, const int ydia, float *mstd, float *inputf)
  436. {
  437. int16_t *input = (int16_t *)inputf;
  438. float scale;
  439. int sum = 0, sumsq = 0;
  440. int y, x;
  441. for (y = 0; y < ydia; y++) {
  442. const uint8_t *srcpT = srcp + y * stride * 2;
  443. for (x = 0; x < xdia; x++) {
  444. sum += srcpT[x];
  445. sumsq += srcpT[x] * srcpT[x];
  446. input[x] = srcpT[x];
  447. }
  448. input += xdia;
  449. }
  450. scale = 1.0f / (float)(xdia * ydia);
  451. mstd[0] = sum * scale;
  452. mstd[1] = sumsq * scale - mstd[0] * mstd[0];
  453. mstd[3] = 0.0f;
  454. if (mstd[1] <= FLT_EPSILON)
  455. mstd[1] = mstd[2] = 0.0f;
  456. else {
  457. mstd[1] = sqrt(mstd[1]);
  458. mstd[2] = 1.0f / mstd[1];
  459. }
  460. }
  461. static const float exp_lo = -80.0f;
  462. static const float exp_hi = +80.0f;
  463. static void e2_m16(float *s, const int n)
  464. {
  465. int i;
  466. for (i = 0; i < n; i++)
  467. s[i] = exp(av_clipf(s[i], exp_lo, exp_hi));
  468. }
  469. const float min_weight_sum = 1e-10f;
  470. static void weighted_avg_elliott_mul5_m16(const float *w, const int n, float *mstd)
  471. {
  472. float vsum = 0.0f, wsum = 0.0f;
  473. int i;
  474. for (i = 0; i < n; i++) {
  475. vsum += w[i] * (w[n + i] / (1.0f + FFABS(w[n + i])));
  476. wsum += w[i];
  477. }
  478. if (wsum > min_weight_sum)
  479. mstd[3] += ((5.0f * vsum) / wsum) * mstd[1] + mstd[0];
  480. else
  481. mstd[3] += mstd[0];
  482. }
  483. static void evalfunc_1(NNEDIContext *s, FrameData *frame_data)
  484. {
  485. float *input = frame_data->input;
  486. float *temp = frame_data->temp;
  487. float **weights1 = s->weights1;
  488. const int qual = s->qual;
  489. const int asize = s->asize;
  490. const int nns = s->nns;
  491. const int xdia = s->xdia;
  492. const int xdiad2m1 = (xdia / 2) - 1;
  493. const int ydia = s->ydia;
  494. const float scale = 1.0f / (float)qual;
  495. int plane, y, x, i;
  496. for (plane = 0; plane < s->nb_planes; plane++) {
  497. const uint8_t *srcp = (const uint8_t *)frame_data->paddedp[plane];
  498. const int src_stride = frame_data->padded_stride[plane] / sizeof(uint8_t);
  499. const int width = frame_data->padded_width[plane];
  500. const int height = frame_data->padded_height[plane];
  501. uint8_t *dstp = (uint8_t *)frame_data->dstp[plane];
  502. const int dst_stride = frame_data->dst_stride[plane] / sizeof(uint8_t);
  503. const int ystart = frame_data->field[plane];
  504. const int ystop = height - 12;
  505. uint8_t *srcpp;
  506. if (!(s->process_plane & (1 << plane)))
  507. continue;
  508. srcp += (ystart + 6) * src_stride;
  509. dstp += ystart * dst_stride - 32;
  510. srcpp = srcp - (ydia - 1) * src_stride - xdiad2m1;
  511. for (y = ystart; y < ystop; y += 2) {
  512. for (x = 32; x < width - 32; x++) {
  513. uint32_t pixel = 0;
  514. uint32_t all_ones = 0;
  515. float mstd[4];
  516. memcpy(&pixel, dstp + x, sizeof(uint8_t));
  517. memset(&all_ones, 255, sizeof(uint8_t));
  518. if (pixel != all_ones)
  519. continue;
  520. s->extract((const uint8_t *)(srcpp + x), src_stride, xdia, ydia, mstd, input);
  521. for (i = 0; i < qual; i++) {
  522. s->dot_prod(s, input, weights1[i], temp, nns * 2, asize, mstd + 2);
  523. s->expfunc(temp, nns);
  524. s->wae5(temp, nns, mstd);
  525. }
  526. dstp[x] = FFMIN(FFMAX((int)(mstd[3] * scale + 0.5f), 0), s->max_value);
  527. }
  528. srcpp += src_stride * 2;
  529. dstp += dst_stride * 2;
  530. }
  531. }
  532. }
  533. #define NUM_NSIZE 7
  534. #define NUM_NNS 5
  535. static int roundds(const double f)
  536. {
  537. if (f - floor(f) >= 0.5)
  538. return FFMIN((int)ceil(f), 32767);
  539. return FFMAX((int)floor(f), -32768);
  540. }
  541. static void select_functions(NNEDIContext *s)
  542. {
  543. s->copy_pad = copy_pad;
  544. s->evalfunc_0 = evalfunc_0;
  545. s->evalfunc_1 = evalfunc_1;
  546. // evalfunc_0
  547. s->process_line0 = process_line0;
  548. if (s->pscrn < 2) { // original prescreener
  549. if (s->fapprox & 1) { // int16 dot products
  550. s->readpixels = byte2word48;
  551. s->compute_network0 = compute_network0_i16;
  552. } else {
  553. s->readpixels = pixel2float48;
  554. s->compute_network0 = compute_network0;
  555. }
  556. } else { // new prescreener
  557. // only int16 dot products
  558. s->readpixels = byte2word64;
  559. s->compute_network0 = compute_network0new;
  560. }
  561. // evalfunc_1
  562. s->wae5 = weighted_avg_elliott_mul5_m16;
  563. if (s->fapprox & 2) { // use int16 dot products
  564. s->extract = extract_m8_i16;
  565. s->dot_prod = dot_prods;
  566. } else { // use float dot products
  567. s->extract = extract_m8;
  568. s->dot_prod = dot_prod;
  569. }
  570. s->expfunc = e2_m16;
  571. }
  572. static int modnpf(const int m, const int n)
  573. {
  574. if ((m % n) == 0)
  575. return m;
  576. return m + n - (m % n);
  577. }
  578. static int get_frame(AVFilterContext *ctx, int is_second)
  579. {
  580. NNEDIContext *s = ctx->priv;
  581. AVFilterLink *outlink = ctx->outputs[0];
  582. AVFrame *src = s->src;
  583. FrameData *frame_data;
  584. int effective_field = s->field;
  585. size_t temp_size;
  586. int field_n;
  587. int plane;
  588. if (effective_field > 1)
  589. effective_field -= 2;
  590. else if (effective_field < 0)
  591. effective_field += 2;
  592. if (s->field < 0 && src->interlaced_frame && src->top_field_first == 0)
  593. effective_field = 0;
  594. else if (s->field < 0 && src->interlaced_frame && src->top_field_first == 1)
  595. effective_field = 1;
  596. else
  597. effective_field = !effective_field;
  598. if (s->field > 1 || s->field == -2) {
  599. if (is_second) {
  600. field_n = (effective_field == 0);
  601. } else {
  602. field_n = (effective_field == 1);
  603. }
  604. } else {
  605. field_n = effective_field;
  606. }
  607. s->dst = ff_get_video_buffer(outlink, outlink->w, outlink->h);
  608. if (!s->dst)
  609. return AVERROR(ENOMEM);
  610. av_frame_copy_props(s->dst, src);
  611. s->dst->interlaced_frame = 0;
  612. frame_data = &s->frame_data;
  613. for (plane = 0; plane < s->nb_planes; plane++) {
  614. int dst_height = s->planeheight[plane];
  615. int dst_width = s->linesize[plane];
  616. const int min_alignment = 16;
  617. const int min_pad = 10;
  618. if (!(s->process_plane & (1 << plane))) {
  619. av_image_copy_plane(s->dst->data[plane], s->dst->linesize[plane],
  620. src->data[plane], src->linesize[plane],
  621. s->linesize[plane],
  622. s->planeheight[plane]);
  623. continue;
  624. }
  625. frame_data->padded_width[plane] = dst_width + 64;
  626. frame_data->padded_height[plane] = dst_height + 12;
  627. frame_data->padded_stride[plane] = modnpf(frame_data->padded_width[plane] + min_pad, min_alignment); // TODO: maybe min_pad is in pixels too?
  628. if (!frame_data->paddedp[plane]) {
  629. frame_data->paddedp[plane] = av_malloc_array(frame_data->padded_stride[plane], frame_data->padded_height[plane]);
  630. if (!frame_data->paddedp[plane])
  631. return AVERROR(ENOMEM);
  632. }
  633. frame_data->dstp[plane] = s->dst->data[plane];
  634. frame_data->dst_stride[plane] = s->dst->linesize[plane];
  635. if (!frame_data->lcount[plane]) {
  636. frame_data->lcount[plane] = av_calloc(dst_height, sizeof(int32_t) * 16);
  637. if (!frame_data->lcount[plane])
  638. return AVERROR(ENOMEM);
  639. } else {
  640. memset(frame_data->lcount[plane], 0, dst_height * sizeof(int32_t) * 16);
  641. }
  642. frame_data->field[plane] = field_n;
  643. }
  644. if (!frame_data->input) {
  645. frame_data->input = av_malloc(512 * sizeof(float));
  646. if (!frame_data->input)
  647. return AVERROR(ENOMEM);
  648. }
  649. // evalfunc_0 requires at least padded_width[0] bytes.
  650. // evalfunc_1 requires at least 512 floats.
  651. if (!frame_data->temp) {
  652. temp_size = FFMAX(frame_data->padded_width[0], 512 * sizeof(float));
  653. frame_data->temp = av_malloc(temp_size);
  654. if (!frame_data->temp)
  655. return AVERROR(ENOMEM);
  656. }
  657. // Copy src to a padded "frame" in frame_data and mirror the edges.
  658. s->copy_pad(src, frame_data, s, field_n);
  659. // Handles prescreening and the cubic interpolation.
  660. s->evalfunc_0(s, frame_data);
  661. // The rest.
  662. s->evalfunc_1(s, frame_data);
  663. return 0;
  664. }
  665. static int filter_frame(AVFilterLink *inlink, AVFrame *src)
  666. {
  667. AVFilterContext *ctx = inlink->dst;
  668. AVFilterLink *outlink = ctx->outputs[0];
  669. NNEDIContext *s = ctx->priv;
  670. int ret;
  671. if ((s->field > 1 ||
  672. s->field == -2) && !s->second) {
  673. goto second;
  674. } else if (s->field > 1 ||
  675. s->field == -2) {
  676. AVFrame *dst;
  677. s->src = s->second;
  678. ret = get_frame(ctx, 1);
  679. if (ret < 0) {
  680. av_frame_free(&s->dst);
  681. av_frame_free(&s->src);
  682. av_frame_free(&s->second);
  683. return ret;
  684. }
  685. dst = s->dst;
  686. if (src->pts != AV_NOPTS_VALUE &&
  687. dst->pts != AV_NOPTS_VALUE)
  688. dst->pts += src->pts;
  689. else
  690. dst->pts = AV_NOPTS_VALUE;
  691. ret = ff_filter_frame(outlink, dst);
  692. if (ret < 0)
  693. return ret;
  694. if (s->eof)
  695. return 0;
  696. s->cur_pts = s->second->pts;
  697. av_frame_free(&s->second);
  698. second:
  699. if ((s->deint && src->interlaced_frame &&
  700. !ctx->is_disabled) ||
  701. (!s->deint && !ctx->is_disabled)) {
  702. s->second = src;
  703. }
  704. }
  705. if ((s->deint && !src->interlaced_frame) || ctx->is_disabled) {
  706. AVFrame *dst = av_frame_clone(src);
  707. if (!dst) {
  708. av_frame_free(&src);
  709. av_frame_free(&s->second);
  710. return AVERROR(ENOMEM);
  711. }
  712. if (s->field > 1 || s->field == -2) {
  713. av_frame_free(&s->second);
  714. if ((s->deint && src->interlaced_frame) ||
  715. (!s->deint))
  716. s->second = src;
  717. } else {
  718. av_frame_free(&src);
  719. }
  720. if (dst->pts != AV_NOPTS_VALUE)
  721. dst->pts *= 2;
  722. return ff_filter_frame(outlink, dst);
  723. }
  724. s->src = src;
  725. ret = get_frame(ctx, 0);
  726. if (ret < 0) {
  727. av_frame_free(&s->dst);
  728. av_frame_free(&s->src);
  729. av_frame_free(&s->second);
  730. return ret;
  731. }
  732. if (src->pts != AV_NOPTS_VALUE)
  733. s->dst->pts = src->pts * 2;
  734. if (s->field <= 1 && s->field > -2) {
  735. av_frame_free(&src);
  736. s->src = NULL;
  737. }
  738. return ff_filter_frame(outlink, s->dst);
  739. }
  740. static int request_frame(AVFilterLink *link)
  741. {
  742. AVFilterContext *ctx = link->src;
  743. NNEDIContext *s = ctx->priv;
  744. int ret;
  745. if (s->eof)
  746. return AVERROR_EOF;
  747. ret = ff_request_frame(ctx->inputs[0]);
  748. if (ret == AVERROR_EOF && s->second) {
  749. AVFrame *next = av_frame_clone(s->second);
  750. if (!next)
  751. return AVERROR(ENOMEM);
  752. next->pts = s->second->pts * 2 - s->cur_pts;
  753. s->eof = 1;
  754. filter_frame(ctx->inputs[0], next);
  755. } else if (ret < 0) {
  756. return ret;
  757. }
  758. return 0;
  759. }
  760. static av_cold int init(AVFilterContext *ctx)
  761. {
  762. NNEDIContext *s = ctx->priv;
  763. FILE *weights_file = NULL;
  764. int64_t expected_size = 13574928;
  765. int64_t weights_size;
  766. float *bdata;
  767. size_t bytes_read;
  768. const int xdia_table[NUM_NSIZE] = { 8, 16, 32, 48, 8, 16, 32 };
  769. const int ydia_table[NUM_NSIZE] = { 6, 6, 6, 6, 4, 4, 4 };
  770. const int nns_table[NUM_NNS] = { 16, 32, 64, 128, 256 };
  771. const int dims0 = 49 * 4 + 5 * 4 + 9 * 4;
  772. const int dims0new = 4 * 65 + 4 * 5;
  773. const int dims1 = nns_table[s->nnsparam] * 2 * (xdia_table[s->nsize] * ydia_table[s->nsize] + 1);
  774. int dims1tsize = 0;
  775. int dims1offset = 0;
  776. int ret = 0, i, j, k;
  777. weights_file = fopen(s->weights_file, "rb");
  778. if (!weights_file) {
  779. av_log(ctx, AV_LOG_ERROR, "No weights file provided, aborting!\n");
  780. return AVERROR(EINVAL);
  781. }
  782. if (fseek(weights_file, 0, SEEK_END)) {
  783. av_log(ctx, AV_LOG_ERROR, "Couldn't seek to the end of weights file.\n");
  784. fclose(weights_file);
  785. return AVERROR(EINVAL);
  786. }
  787. weights_size = ftell(weights_file);
  788. if (weights_size == -1) {
  789. fclose(weights_file);
  790. av_log(ctx, AV_LOG_ERROR, "Couldn't get size of weights file.\n");
  791. return AVERROR(EINVAL);
  792. } else if (weights_size != expected_size) {
  793. fclose(weights_file);
  794. av_log(ctx, AV_LOG_ERROR, "Unexpected weights file size.\n");
  795. return AVERROR(EINVAL);
  796. }
  797. if (fseek(weights_file, 0, SEEK_SET)) {
  798. fclose(weights_file);
  799. av_log(ctx, AV_LOG_ERROR, "Couldn't seek to the start of weights file.\n");
  800. return AVERROR(EINVAL);
  801. }
  802. bdata = (float *)av_malloc(expected_size);
  803. if (!bdata) {
  804. fclose(weights_file);
  805. return AVERROR(ENOMEM);
  806. }
  807. bytes_read = fread(bdata, 1, expected_size, weights_file);
  808. if (bytes_read != (size_t)expected_size) {
  809. fclose(weights_file);
  810. ret = AVERROR_INVALIDDATA;
  811. av_log(ctx, AV_LOG_ERROR, "Couldn't read weights file.\n");
  812. goto fail;
  813. }
  814. fclose(weights_file);
  815. for (j = 0; j < NUM_NNS; j++) {
  816. for (i = 0; i < NUM_NSIZE; i++) {
  817. if (i == s->nsize && j == s->nnsparam)
  818. dims1offset = dims1tsize;
  819. dims1tsize += nns_table[j] * 2 * (xdia_table[i] * ydia_table[i] + 1) * 2;
  820. }
  821. }
  822. s->weights0 = av_malloc_array(FFMAX(dims0, dims0new), sizeof(float));
  823. if (!s->weights0) {
  824. ret = AVERROR(ENOMEM);
  825. goto fail;
  826. }
  827. for (i = 0; i < 2; i++) {
  828. s->weights1[i] = av_malloc_array(dims1, sizeof(float));
  829. if (!s->weights1[i]) {
  830. ret = AVERROR(ENOMEM);
  831. goto fail;
  832. }
  833. }
  834. // Adjust prescreener weights
  835. if (s->pscrn >= 2) {// using new prescreener
  836. const float *bdw;
  837. int16_t *ws;
  838. float *wf;
  839. double mean[4] = { 0.0, 0.0, 0.0, 0.0 };
  840. int *offt = av_calloc(4 * 64, sizeof(int));
  841. if (!offt) {
  842. ret = AVERROR(ENOMEM);
  843. goto fail;
  844. }
  845. for (j = 0; j < 4; j++)
  846. for (k = 0; k < 64; k++)
  847. offt[j * 64 + k] = ((k >> 3) << 5) + ((j & 3) << 3) + (k & 7);
  848. bdw = bdata + dims0 + dims0new * (s->pscrn - 2);
  849. ws = (int16_t *)s->weights0;
  850. wf = (float *)&ws[4 * 64];
  851. // Calculate mean weight of each first layer neuron
  852. for (j = 0; j < 4; j++) {
  853. double cmean = 0.0;
  854. for (k = 0; k < 64; k++)
  855. cmean += bdw[offt[j * 64 + k]];
  856. mean[j] = cmean / 64.0;
  857. }
  858. // Factor mean removal and 1.0/127.5 scaling
  859. // into first layer weights. scale to int16 range
  860. for (j = 0; j < 4; j++) {
  861. double scale, mval = 0.0;
  862. for (k = 0; k < 64; k++)
  863. mval = FFMAX(mval, FFABS((bdw[offt[j * 64 + k]] - mean[j]) / 127.5));
  864. scale = 32767.0 / mval;
  865. for (k = 0; k < 64; k++)
  866. ws[offt[j * 64 + k]] = roundds(((bdw[offt[j * 64 + k]] - mean[j]) / 127.5) * scale);
  867. wf[j] = (float)(mval / 32767.0);
  868. }
  869. memcpy(wf + 4, bdw + 4 * 64, (dims0new - 4 * 64) * sizeof(float));
  870. av_free(offt);
  871. } else { // using old prescreener
  872. double mean[4] = { 0.0, 0.0, 0.0, 0.0 };
  873. // Calculate mean weight of each first layer neuron
  874. for (j = 0; j < 4; j++) {
  875. double cmean = 0.0;
  876. for (k = 0; k < 48; k++)
  877. cmean += bdata[j * 48 + k];
  878. mean[j] = cmean / 48.0;
  879. }
  880. if (s->fapprox & 1) {// use int16 dot products in first layer
  881. int16_t *ws = (int16_t *)s->weights0;
  882. float *wf = (float *)&ws[4 * 48];
  883. // Factor mean removal and 1.0/127.5 scaling
  884. // into first layer weights. scale to int16 range
  885. for (j = 0; j < 4; j++) {
  886. double scale, mval = 0.0;
  887. for (k = 0; k < 48; k++)
  888. mval = FFMAX(mval, FFABS((bdata[j * 48 + k] - mean[j]) / 127.5));
  889. scale = 32767.0 / mval;
  890. for (k = 0; k < 48; k++)
  891. ws[j * 48 + k] = roundds(((bdata[j * 48 + k] - mean[j]) / 127.5) * scale);
  892. wf[j] = (float)(mval / 32767.0);
  893. }
  894. memcpy(wf + 4, bdata + 4 * 48, (dims0 - 4 * 48) * sizeof(float));
  895. } else {// use float dot products in first layer
  896. double half = (1 << 8) - 1;
  897. half /= 2;
  898. // Factor mean removal and 1.0/half scaling
  899. // into first layer weights.
  900. for (j = 0; j < 4; j++)
  901. for (k = 0; k < 48; k++)
  902. s->weights0[j * 48 + k] = (float)((bdata[j * 48 + k] - mean[j]) / half);
  903. memcpy(s->weights0 + 4 * 48, bdata + 4 * 48, (dims0 - 4 * 48) * sizeof(float));
  904. }
  905. }
  906. // Adjust prediction weights
  907. for (i = 0; i < 2; i++) {
  908. const float *bdataT = bdata + dims0 + dims0new * 3 + dims1tsize * s->etype + dims1offset + i * dims1;
  909. const int nnst = nns_table[s->nnsparam];
  910. const int asize = xdia_table[s->nsize] * ydia_table[s->nsize];
  911. const int boff = nnst * 2 * asize;
  912. double *mean = (double *)av_calloc(asize + 1 + nnst * 2, sizeof(double));
  913. if (!mean) {
  914. ret = AVERROR(ENOMEM);
  915. goto fail;
  916. }
  917. // Calculate mean weight of each neuron (ignore bias)
  918. for (j = 0; j < nnst * 2; j++) {
  919. double cmean = 0.0;
  920. for (k = 0; k < asize; k++)
  921. cmean += bdataT[j * asize + k];
  922. mean[asize + 1 + j] = cmean / (double)asize;
  923. }
  924. // Calculate mean softmax neuron
  925. for (j = 0; j < nnst; j++) {
  926. for (k = 0; k < asize; k++)
  927. mean[k] += bdataT[j * asize + k] - mean[asize + 1 + j];
  928. mean[asize] += bdataT[boff + j];
  929. }
  930. for (j = 0; j < asize + 1; j++)
  931. mean[j] /= (double)(nnst);
  932. if (s->fapprox & 2) { // use int16 dot products
  933. int16_t *ws = (int16_t *)s->weights1[i];
  934. float *wf = (float *)&ws[nnst * 2 * asize];
  935. // Factor mean removal into weights, remove global offset from
  936. // softmax neurons, and scale weights to int16 range.
  937. for (j = 0; j < nnst; j++) { // softmax neurons
  938. double scale, mval = 0.0;
  939. for (k = 0; k < asize; k++)
  940. mval = FFMAX(mval, FFABS(bdataT[j * asize + k] - mean[asize + 1 + j] - mean[k]));
  941. scale = 32767.0 / mval;
  942. for (k = 0; k < asize; k++)
  943. ws[j * asize + k] = roundds((bdataT[j * asize + k] - mean[asize + 1 + j] - mean[k]) * scale);
  944. wf[(j >> 2) * 8 + (j & 3)] = (float)(mval / 32767.0);
  945. wf[(j >> 2) * 8 + (j & 3) + 4] = (float)(bdataT[boff + j] - mean[asize]);
  946. }
  947. for (j = nnst; j < nnst * 2; j++) { // elliott neurons
  948. double scale, mval = 0.0;
  949. for (k = 0; k < asize; k++)
  950. mval = FFMAX(mval, FFABS(bdataT[j * asize + k] - mean[asize + 1 + j]));
  951. scale = 32767.0 / mval;
  952. for (k = 0; k < asize; k++)
  953. ws[j * asize + k] = roundds((bdataT[j * asize + k] - mean[asize + 1 + j]) * scale);
  954. wf[(j >> 2) * 8 + (j & 3)] = (float)(mval / 32767.0);
  955. wf[(j >> 2) * 8 + (j & 3) + 4] = bdataT[boff + j];
  956. }
  957. } else { // use float dot products
  958. // Factor mean removal into weights, and remove global
  959. // offset from softmax neurons.
  960. for (j = 0; j < nnst * 2; j++) {
  961. for (k = 0; k < asize; k++) {
  962. const double q = j < nnst ? mean[k] : 0.0;
  963. s->weights1[i][j * asize + k] = (float)(bdataT[j * asize + k] - mean[asize + 1 + j] - q);
  964. }
  965. s->weights1[i][boff + j] = (float)(bdataT[boff + j] - (j < nnst ? mean[asize] : 0.0));
  966. }
  967. }
  968. av_free(mean);
  969. }
  970. s->nns = nns_table[s->nnsparam];
  971. s->xdia = xdia_table[s->nsize];
  972. s->ydia = ydia_table[s->nsize];
  973. s->asize = xdia_table[s->nsize] * ydia_table[s->nsize];
  974. s->max_value = 65535 >> 8;
  975. select_functions(s);
  976. s->fdsp = avpriv_float_dsp_alloc(0);
  977. if (!s->fdsp)
  978. return AVERROR(ENOMEM);
  979. fail:
  980. av_free(bdata);
  981. return ret;
  982. }
  983. static av_cold void uninit(AVFilterContext *ctx)
  984. {
  985. NNEDIContext *s = ctx->priv;
  986. int i;
  987. av_freep(&s->weights0);
  988. for (i = 0; i < 2; i++)
  989. av_freep(&s->weights1[i]);
  990. for (i = 0; i < s->nb_planes; i++) {
  991. av_freep(&s->frame_data.paddedp[i]);
  992. av_freep(&s->frame_data.lcount[i]);
  993. }
  994. av_freep(&s->frame_data.input);
  995. av_freep(&s->frame_data.temp);
  996. av_frame_free(&s->second);
  997. }
  998. static const AVFilterPad inputs[] = {
  999. {
  1000. .name = "default",
  1001. .type = AVMEDIA_TYPE_VIDEO,
  1002. .filter_frame = filter_frame,
  1003. .config_props = config_input,
  1004. },
  1005. { NULL }
  1006. };
  1007. static const AVFilterPad outputs[] = {
  1008. {
  1009. .name = "default",
  1010. .type = AVMEDIA_TYPE_VIDEO,
  1011. .config_props = config_output,
  1012. .request_frame = request_frame,
  1013. },
  1014. { NULL }
  1015. };
  1016. AVFilter ff_vf_nnedi = {
  1017. .name = "nnedi",
  1018. .description = NULL_IF_CONFIG_SMALL("Apply neural network edge directed interpolation intra-only deinterlacer."),
  1019. .priv_size = sizeof(NNEDIContext),
  1020. .priv_class = &nnedi_class,
  1021. .init = init,
  1022. .uninit = uninit,
  1023. .query_formats = query_formats,
  1024. .inputs = inputs,
  1025. .outputs = outputs,
  1026. .flags = AVFILTER_FLAG_SUPPORT_TIMELINE_INTERNAL,
  1027. };