You can not select more than 25 topics Topics must start with a letter or number, can include dashes ('-') and can be up to 35 characters long.

1888 lines
81KB

  1. /*
  2. * Copyright (c) 2013 Clément Bœsch
  3. * Copyright (c) 2018 Paul B Mahol
  4. *
  5. * This file is part of FFmpeg.
  6. *
  7. * FFmpeg is free software; you can redistribute it and/or
  8. * modify it under the terms of the GNU Lesser General Public
  9. * License as published by the Free Software Foundation; either
  10. * version 2.1 of the License, or (at your option) any later version.
  11. *
  12. * FFmpeg is distributed in the hope that it will be useful,
  13. * but WITHOUT ANY WARRANTY; without even the implied warranty of
  14. * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
  15. * Lesser General Public License for more details.
  16. *
  17. * You should have received a copy of the GNU Lesser General Public
  18. * License along with FFmpeg; if not, write to the Free Software
  19. * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
  20. */
  21. /**
  22. * @file
  23. * 3D Lookup table filter
  24. */
  25. #include "float.h"
  26. #include "libavutil/opt.h"
  27. #include "libavutil/file.h"
  28. #include "libavutil/intreadwrite.h"
  29. #include "libavutil/intfloat.h"
  30. #include "libavutil/avassert.h"
  31. #include "libavutil/pixdesc.h"
  32. #include "libavutil/avstring.h"
  33. #include "avfilter.h"
  34. #include "drawutils.h"
  35. #include "formats.h"
  36. #include "framesync.h"
  37. #include "internal.h"
  38. #include "video.h"
  39. #define R 0
  40. #define G 1
  41. #define B 2
  42. #define A 3
  43. enum interp_mode {
  44. INTERPOLATE_NEAREST,
  45. INTERPOLATE_TRILINEAR,
  46. INTERPOLATE_TETRAHEDRAL,
  47. NB_INTERP_MODE
  48. };
  49. struct rgbvec {
  50. float r, g, b;
  51. };
  52. /* 3D LUT don't often go up to level 32, but it is common to have a Hald CLUT
  53. * of 512x512 (64x64x64) */
  54. #define MAX_LEVEL 256
  55. typedef struct LUT3DContext {
  56. const AVClass *class;
  57. int interpolation; ///<interp_mode
  58. char *file;
  59. uint8_t rgba_map[4];
  60. int step;
  61. avfilter_action_func *interp;
  62. struct rgbvec scale;
  63. struct rgbvec *lut;
  64. int lutsize;
  65. int lutsize2;
  66. #if CONFIG_HALDCLUT_FILTER
  67. uint8_t clut_rgba_map[4];
  68. int clut_step;
  69. int clut_bits;
  70. int clut_planar;
  71. int clut_float;
  72. int clut_width;
  73. FFFrameSync fs;
  74. #endif
  75. } LUT3DContext;
  76. typedef struct ThreadData {
  77. AVFrame *in, *out;
  78. } ThreadData;
  79. #define OFFSET(x) offsetof(LUT3DContext, x)
  80. #define FLAGS AV_OPT_FLAG_FILTERING_PARAM|AV_OPT_FLAG_VIDEO_PARAM
  81. #define COMMON_OPTIONS \
  82. { "interp", "select interpolation mode", OFFSET(interpolation), AV_OPT_TYPE_INT, {.i64=INTERPOLATE_TETRAHEDRAL}, 0, NB_INTERP_MODE-1, FLAGS, "interp_mode" }, \
  83. { "nearest", "use values from the nearest defined points", 0, AV_OPT_TYPE_CONST, {.i64=INTERPOLATE_NEAREST}, INT_MIN, INT_MAX, FLAGS, "interp_mode" }, \
  84. { "trilinear", "interpolate values using the 8 points defining a cube", 0, AV_OPT_TYPE_CONST, {.i64=INTERPOLATE_TRILINEAR}, INT_MIN, INT_MAX, FLAGS, "interp_mode" }, \
  85. { "tetrahedral", "interpolate values using a tetrahedron", 0, AV_OPT_TYPE_CONST, {.i64=INTERPOLATE_TETRAHEDRAL}, INT_MIN, INT_MAX, FLAGS, "interp_mode" }, \
  86. { NULL }
  87. #define EXPONENT_MASK 0x7F800000
  88. #define MANTISSA_MASK 0x007FFFFF
  89. #define SIGN_MASK 0x7FFFFFFF
  90. static inline float sanitizef(float f)
  91. {
  92. union av_intfloat32 t;
  93. t.f = f;
  94. if ((t.i & EXPONENT_MASK) == EXPONENT_MASK) {
  95. if ((t.i & MANTISSA_MASK) != 0) {
  96. // NAN
  97. return 0.0f;
  98. } else if (t.i & SIGN_MASK) {
  99. // -INF
  100. return FLT_MIN;
  101. } else {
  102. // +INF
  103. return FLT_MAX;
  104. }
  105. }
  106. return f;
  107. }
  108. static inline float lerpf(float v0, float v1, float f)
  109. {
  110. return v0 + (v1 - v0) * f;
  111. }
  112. static inline struct rgbvec lerp(const struct rgbvec *v0, const struct rgbvec *v1, float f)
  113. {
  114. struct rgbvec v = {
  115. lerpf(v0->r, v1->r, f), lerpf(v0->g, v1->g, f), lerpf(v0->b, v1->b, f)
  116. };
  117. return v;
  118. }
  119. #define NEAR(x) ((int)((x) + .5))
  120. #define PREV(x) ((int)(x))
  121. #define NEXT(x) (FFMIN((int)(x) + 1, lut3d->lutsize - 1))
  122. /**
  123. * Get the nearest defined point
  124. */
  125. static inline struct rgbvec interp_nearest(const LUT3DContext *lut3d,
  126. const struct rgbvec *s)
  127. {
  128. return lut3d->lut[NEAR(s->r) * lut3d->lutsize2 + NEAR(s->g) * lut3d->lutsize + NEAR(s->b)];
  129. }
  130. /**
  131. * Interpolate using the 8 vertices of a cube
  132. * @see https://en.wikipedia.org/wiki/Trilinear_interpolation
  133. */
  134. static inline struct rgbvec interp_trilinear(const LUT3DContext *lut3d,
  135. const struct rgbvec *s)
  136. {
  137. const int lutsize2 = lut3d->lutsize2;
  138. const int lutsize = lut3d->lutsize;
  139. const int prev[] = {PREV(s->r), PREV(s->g), PREV(s->b)};
  140. const int next[] = {NEXT(s->r), NEXT(s->g), NEXT(s->b)};
  141. const struct rgbvec d = {s->r - prev[0], s->g - prev[1], s->b - prev[2]};
  142. const struct rgbvec c000 = lut3d->lut[prev[0] * lutsize2 + prev[1] * lutsize + prev[2]];
  143. const struct rgbvec c001 = lut3d->lut[prev[0] * lutsize2 + prev[1] * lutsize + next[2]];
  144. const struct rgbvec c010 = lut3d->lut[prev[0] * lutsize2 + next[1] * lutsize + prev[2]];
  145. const struct rgbvec c011 = lut3d->lut[prev[0] * lutsize2 + next[1] * lutsize + next[2]];
  146. const struct rgbvec c100 = lut3d->lut[next[0] * lutsize2 + prev[1] * lutsize + prev[2]];
  147. const struct rgbvec c101 = lut3d->lut[next[0] * lutsize2 + prev[1] * lutsize + next[2]];
  148. const struct rgbvec c110 = lut3d->lut[next[0] * lutsize2 + next[1] * lutsize + prev[2]];
  149. const struct rgbvec c111 = lut3d->lut[next[0] * lutsize2 + next[1] * lutsize + next[2]];
  150. const struct rgbvec c00 = lerp(&c000, &c100, d.r);
  151. const struct rgbvec c10 = lerp(&c010, &c110, d.r);
  152. const struct rgbvec c01 = lerp(&c001, &c101, d.r);
  153. const struct rgbvec c11 = lerp(&c011, &c111, d.r);
  154. const struct rgbvec c0 = lerp(&c00, &c10, d.g);
  155. const struct rgbvec c1 = lerp(&c01, &c11, d.g);
  156. const struct rgbvec c = lerp(&c0, &c1, d.b);
  157. return c;
  158. }
  159. /**
  160. * Tetrahedral interpolation. Based on code found in Truelight Software Library paper.
  161. * @see http://www.filmlight.ltd.uk/pdf/whitepapers/FL-TL-TN-0057-SoftwareLib.pdf
  162. */
  163. static inline struct rgbvec interp_tetrahedral(const LUT3DContext *lut3d,
  164. const struct rgbvec *s)
  165. {
  166. const int lutsize2 = lut3d->lutsize2;
  167. const int lutsize = lut3d->lutsize;
  168. const int prev[] = {PREV(s->r), PREV(s->g), PREV(s->b)};
  169. const int next[] = {NEXT(s->r), NEXT(s->g), NEXT(s->b)};
  170. const struct rgbvec d = {s->r - prev[0], s->g - prev[1], s->b - prev[2]};
  171. const struct rgbvec c000 = lut3d->lut[prev[0] * lutsize2 + prev[1] * lutsize + prev[2]];
  172. const struct rgbvec c111 = lut3d->lut[next[0] * lutsize2 + next[1] * lutsize + next[2]];
  173. struct rgbvec c;
  174. if (d.r > d.g) {
  175. if (d.g > d.b) {
  176. const struct rgbvec c100 = lut3d->lut[next[0] * lutsize2 + prev[1] * lutsize + prev[2]];
  177. const struct rgbvec c110 = lut3d->lut[next[0] * lutsize2 + next[1] * lutsize + prev[2]];
  178. c.r = (1-d.r) * c000.r + (d.r-d.g) * c100.r + (d.g-d.b) * c110.r + (d.b) * c111.r;
  179. c.g = (1-d.r) * c000.g + (d.r-d.g) * c100.g + (d.g-d.b) * c110.g + (d.b) * c111.g;
  180. c.b = (1-d.r) * c000.b + (d.r-d.g) * c100.b + (d.g-d.b) * c110.b + (d.b) * c111.b;
  181. } else if (d.r > d.b) {
  182. const struct rgbvec c100 = lut3d->lut[next[0] * lutsize2 + prev[1] * lutsize + prev[2]];
  183. const struct rgbvec c101 = lut3d->lut[next[0] * lutsize2 + prev[1] * lutsize + next[2]];
  184. c.r = (1-d.r) * c000.r + (d.r-d.b) * c100.r + (d.b-d.g) * c101.r + (d.g) * c111.r;
  185. c.g = (1-d.r) * c000.g + (d.r-d.b) * c100.g + (d.b-d.g) * c101.g + (d.g) * c111.g;
  186. c.b = (1-d.r) * c000.b + (d.r-d.b) * c100.b + (d.b-d.g) * c101.b + (d.g) * c111.b;
  187. } else {
  188. const struct rgbvec c001 = lut3d->lut[prev[0] * lutsize2 + prev[1] * lutsize + next[2]];
  189. const struct rgbvec c101 = lut3d->lut[next[0] * lutsize2 + prev[1] * lutsize + next[2]];
  190. c.r = (1-d.b) * c000.r + (d.b-d.r) * c001.r + (d.r-d.g) * c101.r + (d.g) * c111.r;
  191. c.g = (1-d.b) * c000.g + (d.b-d.r) * c001.g + (d.r-d.g) * c101.g + (d.g) * c111.g;
  192. c.b = (1-d.b) * c000.b + (d.b-d.r) * c001.b + (d.r-d.g) * c101.b + (d.g) * c111.b;
  193. }
  194. } else {
  195. if (d.b > d.g) {
  196. const struct rgbvec c001 = lut3d->lut[prev[0] * lutsize2 + prev[1] * lutsize + next[2]];
  197. const struct rgbvec c011 = lut3d->lut[prev[0] * lutsize2 + next[1] * lutsize + next[2]];
  198. c.r = (1-d.b) * c000.r + (d.b-d.g) * c001.r + (d.g-d.r) * c011.r + (d.r) * c111.r;
  199. c.g = (1-d.b) * c000.g + (d.b-d.g) * c001.g + (d.g-d.r) * c011.g + (d.r) * c111.g;
  200. c.b = (1-d.b) * c000.b + (d.b-d.g) * c001.b + (d.g-d.r) * c011.b + (d.r) * c111.b;
  201. } else if (d.b > d.r) {
  202. const struct rgbvec c010 = lut3d->lut[prev[0] * lutsize2 + next[1] * lutsize + prev[2]];
  203. const struct rgbvec c011 = lut3d->lut[prev[0] * lutsize2 + next[1] * lutsize + next[2]];
  204. c.r = (1-d.g) * c000.r + (d.g-d.b) * c010.r + (d.b-d.r) * c011.r + (d.r) * c111.r;
  205. c.g = (1-d.g) * c000.g + (d.g-d.b) * c010.g + (d.b-d.r) * c011.g + (d.r) * c111.g;
  206. c.b = (1-d.g) * c000.b + (d.g-d.b) * c010.b + (d.b-d.r) * c011.b + (d.r) * c111.b;
  207. } else {
  208. const struct rgbvec c010 = lut3d->lut[prev[0] * lutsize2 + next[1] * lutsize + prev[2]];
  209. const struct rgbvec c110 = lut3d->lut[next[0] * lutsize2 + next[1] * lutsize + prev[2]];
  210. c.r = (1-d.g) * c000.r + (d.g-d.r) * c010.r + (d.r-d.b) * c110.r + (d.b) * c111.r;
  211. c.g = (1-d.g) * c000.g + (d.g-d.r) * c010.g + (d.r-d.b) * c110.g + (d.b) * c111.g;
  212. c.b = (1-d.g) * c000.b + (d.g-d.r) * c010.b + (d.r-d.b) * c110.b + (d.b) * c111.b;
  213. }
  214. }
  215. return c;
  216. }
  217. #define DEFINE_INTERP_FUNC_PLANAR(name, nbits, depth) \
  218. static int interp_##nbits##_##name##_p##depth(AVFilterContext *ctx, void *arg, int jobnr, int nb_jobs) \
  219. { \
  220. int x, y; \
  221. const LUT3DContext *lut3d = ctx->priv; \
  222. const ThreadData *td = arg; \
  223. const AVFrame *in = td->in; \
  224. const AVFrame *out = td->out; \
  225. const int direct = out == in; \
  226. const int slice_start = (in->height * jobnr ) / nb_jobs; \
  227. const int slice_end = (in->height * (jobnr+1)) / nb_jobs; \
  228. uint8_t *grow = out->data[0] + slice_start * out->linesize[0]; \
  229. uint8_t *brow = out->data[1] + slice_start * out->linesize[1]; \
  230. uint8_t *rrow = out->data[2] + slice_start * out->linesize[2]; \
  231. uint8_t *arow = out->data[3] + slice_start * out->linesize[3]; \
  232. const uint8_t *srcgrow = in->data[0] + slice_start * in->linesize[0]; \
  233. const uint8_t *srcbrow = in->data[1] + slice_start * in->linesize[1]; \
  234. const uint8_t *srcrrow = in->data[2] + slice_start * in->linesize[2]; \
  235. const uint8_t *srcarow = in->data[3] + slice_start * in->linesize[3]; \
  236. const float scale_r = (lut3d->scale.r / ((1<<depth) - 1)) * (lut3d->lutsize - 1); \
  237. const float scale_g = (lut3d->scale.g / ((1<<depth) - 1)) * (lut3d->lutsize - 1); \
  238. const float scale_b = (lut3d->scale.b / ((1<<depth) - 1)) * (lut3d->lutsize - 1); \
  239. \
  240. for (y = slice_start; y < slice_end; y++) { \
  241. uint##nbits##_t *dstg = (uint##nbits##_t *)grow; \
  242. uint##nbits##_t *dstb = (uint##nbits##_t *)brow; \
  243. uint##nbits##_t *dstr = (uint##nbits##_t *)rrow; \
  244. uint##nbits##_t *dsta = (uint##nbits##_t *)arow; \
  245. const uint##nbits##_t *srcg = (const uint##nbits##_t *)srcgrow; \
  246. const uint##nbits##_t *srcb = (const uint##nbits##_t *)srcbrow; \
  247. const uint##nbits##_t *srcr = (const uint##nbits##_t *)srcrrow; \
  248. const uint##nbits##_t *srca = (const uint##nbits##_t *)srcarow; \
  249. for (x = 0; x < in->width; x++) { \
  250. const struct rgbvec scaled_rgb = {srcr[x] * scale_r, \
  251. srcg[x] * scale_g, \
  252. srcb[x] * scale_b}; \
  253. struct rgbvec vec = interp_##name(lut3d, &scaled_rgb); \
  254. dstr[x] = av_clip_uintp2(vec.r * (float)((1<<depth) - 1), depth); \
  255. dstg[x] = av_clip_uintp2(vec.g * (float)((1<<depth) - 1), depth); \
  256. dstb[x] = av_clip_uintp2(vec.b * (float)((1<<depth) - 1), depth); \
  257. if (!direct && in->linesize[3]) \
  258. dsta[x] = srca[x]; \
  259. } \
  260. grow += out->linesize[0]; \
  261. brow += out->linesize[1]; \
  262. rrow += out->linesize[2]; \
  263. arow += out->linesize[3]; \
  264. srcgrow += in->linesize[0]; \
  265. srcbrow += in->linesize[1]; \
  266. srcrrow += in->linesize[2]; \
  267. srcarow += in->linesize[3]; \
  268. } \
  269. return 0; \
  270. }
  271. DEFINE_INTERP_FUNC_PLANAR(nearest, 8, 8)
  272. DEFINE_INTERP_FUNC_PLANAR(trilinear, 8, 8)
  273. DEFINE_INTERP_FUNC_PLANAR(tetrahedral, 8, 8)
  274. DEFINE_INTERP_FUNC_PLANAR(nearest, 16, 9)
  275. DEFINE_INTERP_FUNC_PLANAR(trilinear, 16, 9)
  276. DEFINE_INTERP_FUNC_PLANAR(tetrahedral, 16, 9)
  277. DEFINE_INTERP_FUNC_PLANAR(nearest, 16, 10)
  278. DEFINE_INTERP_FUNC_PLANAR(trilinear, 16, 10)
  279. DEFINE_INTERP_FUNC_PLANAR(tetrahedral, 16, 10)
  280. DEFINE_INTERP_FUNC_PLANAR(nearest, 16, 12)
  281. DEFINE_INTERP_FUNC_PLANAR(trilinear, 16, 12)
  282. DEFINE_INTERP_FUNC_PLANAR(tetrahedral, 16, 12)
  283. DEFINE_INTERP_FUNC_PLANAR(nearest, 16, 14)
  284. DEFINE_INTERP_FUNC_PLANAR(trilinear, 16, 14)
  285. DEFINE_INTERP_FUNC_PLANAR(tetrahedral, 16, 14)
  286. DEFINE_INTERP_FUNC_PLANAR(nearest, 16, 16)
  287. DEFINE_INTERP_FUNC_PLANAR(trilinear, 16, 16)
  288. DEFINE_INTERP_FUNC_PLANAR(tetrahedral, 16, 16)
  289. #define DEFINE_INTERP_FUNC_PLANAR_FLOAT(name, depth) \
  290. static int interp_##name##_pf##depth(AVFilterContext *ctx, void *arg, int jobnr, int nb_jobs) \
  291. { \
  292. int x, y; \
  293. const LUT3DContext *lut3d = ctx->priv; \
  294. const ThreadData *td = arg; \
  295. const AVFrame *in = td->in; \
  296. const AVFrame *out = td->out; \
  297. const int direct = out == in; \
  298. const int slice_start = (in->height * jobnr ) / nb_jobs; \
  299. const int slice_end = (in->height * (jobnr+1)) / nb_jobs; \
  300. uint8_t *grow = out->data[0] + slice_start * out->linesize[0]; \
  301. uint8_t *brow = out->data[1] + slice_start * out->linesize[1]; \
  302. uint8_t *rrow = out->data[2] + slice_start * out->linesize[2]; \
  303. uint8_t *arow = out->data[3] + slice_start * out->linesize[3]; \
  304. const uint8_t *srcgrow = in->data[0] + slice_start * in->linesize[0]; \
  305. const uint8_t *srcbrow = in->data[1] + slice_start * in->linesize[1]; \
  306. const uint8_t *srcrrow = in->data[2] + slice_start * in->linesize[2]; \
  307. const uint8_t *srcarow = in->data[3] + slice_start * in->linesize[3]; \
  308. const float lutsize = lut3d->lutsize - 1; \
  309. const float scale_r = lut3d->scale.r * lutsize; \
  310. const float scale_g = lut3d->scale.g * lutsize; \
  311. const float scale_b = lut3d->scale.b * lutsize; \
  312. \
  313. for (y = slice_start; y < slice_end; y++) { \
  314. float *dstg = (float *)grow; \
  315. float *dstb = (float *)brow; \
  316. float *dstr = (float *)rrow; \
  317. float *dsta = (float *)arow; \
  318. const float *srcg = (const float *)srcgrow; \
  319. const float *srcb = (const float *)srcbrow; \
  320. const float *srcr = (const float *)srcrrow; \
  321. const float *srca = (const float *)srcarow; \
  322. for (x = 0; x < in->width; x++) { \
  323. const struct rgbvec scaled_rgb = {av_clipf(sanitizef(srcr[x]) * scale_r, 0, lutsize), \
  324. av_clipf(sanitizef(srcg[x]) * scale_g, 0, lutsize), \
  325. av_clipf(sanitizef(srcb[x]) * scale_b, 0, lutsize)}; \
  326. struct rgbvec vec = interp_##name(lut3d, &scaled_rgb); \
  327. dstr[x] = vec.r; \
  328. dstg[x] = vec.g; \
  329. dstb[x] = vec.b; \
  330. if (!direct && in->linesize[3]) \
  331. dsta[x] = srca[x]; \
  332. } \
  333. grow += out->linesize[0]; \
  334. brow += out->linesize[1]; \
  335. rrow += out->linesize[2]; \
  336. arow += out->linesize[3]; \
  337. srcgrow += in->linesize[0]; \
  338. srcbrow += in->linesize[1]; \
  339. srcrrow += in->linesize[2]; \
  340. srcarow += in->linesize[3]; \
  341. } \
  342. return 0; \
  343. }
  344. DEFINE_INTERP_FUNC_PLANAR_FLOAT(nearest, 32)
  345. DEFINE_INTERP_FUNC_PLANAR_FLOAT(trilinear, 32)
  346. DEFINE_INTERP_FUNC_PLANAR_FLOAT(tetrahedral, 32)
  347. #define DEFINE_INTERP_FUNC(name, nbits) \
  348. static int interp_##nbits##_##name(AVFilterContext *ctx, void *arg, int jobnr, int nb_jobs) \
  349. { \
  350. int x, y; \
  351. const LUT3DContext *lut3d = ctx->priv; \
  352. const ThreadData *td = arg; \
  353. const AVFrame *in = td->in; \
  354. const AVFrame *out = td->out; \
  355. const int direct = out == in; \
  356. const int step = lut3d->step; \
  357. const uint8_t r = lut3d->rgba_map[R]; \
  358. const uint8_t g = lut3d->rgba_map[G]; \
  359. const uint8_t b = lut3d->rgba_map[B]; \
  360. const uint8_t a = lut3d->rgba_map[A]; \
  361. const int slice_start = (in->height * jobnr ) / nb_jobs; \
  362. const int slice_end = (in->height * (jobnr+1)) / nb_jobs; \
  363. uint8_t *dstrow = out->data[0] + slice_start * out->linesize[0]; \
  364. const uint8_t *srcrow = in ->data[0] + slice_start * in ->linesize[0]; \
  365. const float scale_r = (lut3d->scale.r / ((1<<nbits) - 1)) * (lut3d->lutsize - 1); \
  366. const float scale_g = (lut3d->scale.g / ((1<<nbits) - 1)) * (lut3d->lutsize - 1); \
  367. const float scale_b = (lut3d->scale.b / ((1<<nbits) - 1)) * (lut3d->lutsize - 1); \
  368. \
  369. for (y = slice_start; y < slice_end; y++) { \
  370. uint##nbits##_t *dst = (uint##nbits##_t *)dstrow; \
  371. const uint##nbits##_t *src = (const uint##nbits##_t *)srcrow; \
  372. for (x = 0; x < in->width * step; x += step) { \
  373. const struct rgbvec scaled_rgb = {src[x + r] * scale_r, \
  374. src[x + g] * scale_g, \
  375. src[x + b] * scale_b}; \
  376. struct rgbvec vec = interp_##name(lut3d, &scaled_rgb); \
  377. dst[x + r] = av_clip_uint##nbits(vec.r * (float)((1<<nbits) - 1)); \
  378. dst[x + g] = av_clip_uint##nbits(vec.g * (float)((1<<nbits) - 1)); \
  379. dst[x + b] = av_clip_uint##nbits(vec.b * (float)((1<<nbits) - 1)); \
  380. if (!direct && step == 4) \
  381. dst[x + a] = src[x + a]; \
  382. } \
  383. dstrow += out->linesize[0]; \
  384. srcrow += in ->linesize[0]; \
  385. } \
  386. return 0; \
  387. }
  388. DEFINE_INTERP_FUNC(nearest, 8)
  389. DEFINE_INTERP_FUNC(trilinear, 8)
  390. DEFINE_INTERP_FUNC(tetrahedral, 8)
  391. DEFINE_INTERP_FUNC(nearest, 16)
  392. DEFINE_INTERP_FUNC(trilinear, 16)
  393. DEFINE_INTERP_FUNC(tetrahedral, 16)
  394. #define MAX_LINE_SIZE 512
  395. static int skip_line(const char *p)
  396. {
  397. while (*p && av_isspace(*p))
  398. p++;
  399. return !*p || *p == '#';
  400. }
  401. #define NEXT_LINE(loop_cond) do { \
  402. if (!fgets(line, sizeof(line), f)) { \
  403. av_log(ctx, AV_LOG_ERROR, "Unexpected EOF\n"); \
  404. return AVERROR_INVALIDDATA; \
  405. } \
  406. } while (loop_cond)
  407. static int allocate_3dlut(AVFilterContext *ctx, int lutsize)
  408. {
  409. LUT3DContext *lut3d = ctx->priv;
  410. if (lutsize < 2 || lutsize > MAX_LEVEL) {
  411. av_log(ctx, AV_LOG_ERROR, "Too large or invalid 3D LUT size\n");
  412. return AVERROR(EINVAL);
  413. }
  414. av_freep(&lut3d->lut);
  415. lut3d->lut = av_malloc_array(lutsize * lutsize * lutsize, sizeof(*lut3d->lut));
  416. if (!lut3d->lut)
  417. return AVERROR(ENOMEM);
  418. lut3d->lutsize = lutsize;
  419. lut3d->lutsize2 = lutsize * lutsize;
  420. return 0;
  421. }
  422. /* Basically r g and b float values on each line, with a facultative 3DLUTSIZE
  423. * directive; seems to be generated by Davinci */
  424. static int parse_dat(AVFilterContext *ctx, FILE *f)
  425. {
  426. LUT3DContext *lut3d = ctx->priv;
  427. char line[MAX_LINE_SIZE];
  428. int ret, i, j, k, size, size2;
  429. lut3d->lutsize = size = 33;
  430. size2 = size * size;
  431. NEXT_LINE(skip_line(line));
  432. if (!strncmp(line, "3DLUTSIZE ", 10)) {
  433. size = strtol(line + 10, NULL, 0);
  434. NEXT_LINE(skip_line(line));
  435. }
  436. ret = allocate_3dlut(ctx, size);
  437. if (ret < 0)
  438. return ret;
  439. for (k = 0; k < size; k++) {
  440. for (j = 0; j < size; j++) {
  441. for (i = 0; i < size; i++) {
  442. struct rgbvec *vec = &lut3d->lut[k * size2 + j * size + i];
  443. if (k != 0 || j != 0 || i != 0)
  444. NEXT_LINE(skip_line(line));
  445. if (av_sscanf(line, "%f %f %f", &vec->r, &vec->g, &vec->b) != 3)
  446. return AVERROR_INVALIDDATA;
  447. }
  448. }
  449. }
  450. return 0;
  451. }
  452. /* Iridas format */
  453. static int parse_cube(AVFilterContext *ctx, FILE *f)
  454. {
  455. LUT3DContext *lut3d = ctx->priv;
  456. char line[MAX_LINE_SIZE];
  457. float min[3] = {0.0, 0.0, 0.0};
  458. float max[3] = {1.0, 1.0, 1.0};
  459. while (fgets(line, sizeof(line), f)) {
  460. if (!strncmp(line, "LUT_3D_SIZE", 11)) {
  461. int ret, i, j, k;
  462. const int size = strtol(line + 12, NULL, 0);
  463. const int size2 = size * size;
  464. ret = allocate_3dlut(ctx, size);
  465. if (ret < 0)
  466. return ret;
  467. for (k = 0; k < size; k++) {
  468. for (j = 0; j < size; j++) {
  469. for (i = 0; i < size; i++) {
  470. struct rgbvec *vec = &lut3d->lut[i * size2 + j * size + k];
  471. do {
  472. try_again:
  473. NEXT_LINE(0);
  474. if (!strncmp(line, "DOMAIN_", 7)) {
  475. float *vals = NULL;
  476. if (!strncmp(line + 7, "MIN ", 4)) vals = min;
  477. else if (!strncmp(line + 7, "MAX ", 4)) vals = max;
  478. if (!vals)
  479. return AVERROR_INVALIDDATA;
  480. av_sscanf(line + 11, "%f %f %f", vals, vals + 1, vals + 2);
  481. av_log(ctx, AV_LOG_DEBUG, "min: %f %f %f | max: %f %f %f\n",
  482. min[0], min[1], min[2], max[0], max[1], max[2]);
  483. goto try_again;
  484. } else if (!strncmp(line, "TITLE", 5)) {
  485. goto try_again;
  486. }
  487. } while (skip_line(line));
  488. if (av_sscanf(line, "%f %f %f", &vec->r, &vec->g, &vec->b) != 3)
  489. return AVERROR_INVALIDDATA;
  490. }
  491. }
  492. }
  493. break;
  494. }
  495. }
  496. lut3d->scale.r = av_clipf(1. / (max[0] - min[0]), 0.f, 1.f);
  497. lut3d->scale.g = av_clipf(1. / (max[1] - min[1]), 0.f, 1.f);
  498. lut3d->scale.b = av_clipf(1. / (max[2] - min[2]), 0.f, 1.f);
  499. return 0;
  500. }
  501. /* Assume 17x17x17 LUT with a 16-bit depth
  502. * FIXME: it seems there are various 3dl formats */
  503. static int parse_3dl(AVFilterContext *ctx, FILE *f)
  504. {
  505. char line[MAX_LINE_SIZE];
  506. LUT3DContext *lut3d = ctx->priv;
  507. int ret, i, j, k;
  508. const int size = 17;
  509. const int size2 = 17 * 17;
  510. const float scale = 16*16*16;
  511. lut3d->lutsize = size;
  512. ret = allocate_3dlut(ctx, size);
  513. if (ret < 0)
  514. return ret;
  515. NEXT_LINE(skip_line(line));
  516. for (k = 0; k < size; k++) {
  517. for (j = 0; j < size; j++) {
  518. for (i = 0; i < size; i++) {
  519. int r, g, b;
  520. struct rgbvec *vec = &lut3d->lut[k * size2 + j * size + i];
  521. NEXT_LINE(skip_line(line));
  522. if (av_sscanf(line, "%d %d %d", &r, &g, &b) != 3)
  523. return AVERROR_INVALIDDATA;
  524. vec->r = r / scale;
  525. vec->g = g / scale;
  526. vec->b = b / scale;
  527. }
  528. }
  529. }
  530. return 0;
  531. }
  532. /* Pandora format */
  533. static int parse_m3d(AVFilterContext *ctx, FILE *f)
  534. {
  535. LUT3DContext *lut3d = ctx->priv;
  536. float scale;
  537. int ret, i, j, k, size, size2, in = -1, out = -1;
  538. char line[MAX_LINE_SIZE];
  539. uint8_t rgb_map[3] = {0, 1, 2};
  540. while (fgets(line, sizeof(line), f)) {
  541. if (!strncmp(line, "in", 2)) in = strtol(line + 2, NULL, 0);
  542. else if (!strncmp(line, "out", 3)) out = strtol(line + 3, NULL, 0);
  543. else if (!strncmp(line, "values", 6)) {
  544. const char *p = line + 6;
  545. #define SET_COLOR(id) do { \
  546. while (av_isspace(*p)) \
  547. p++; \
  548. switch (*p) { \
  549. case 'r': rgb_map[id] = 0; break; \
  550. case 'g': rgb_map[id] = 1; break; \
  551. case 'b': rgb_map[id] = 2; break; \
  552. } \
  553. while (*p && !av_isspace(*p)) \
  554. p++; \
  555. } while (0)
  556. SET_COLOR(0);
  557. SET_COLOR(1);
  558. SET_COLOR(2);
  559. break;
  560. }
  561. }
  562. if (in == -1 || out == -1) {
  563. av_log(ctx, AV_LOG_ERROR, "in and out must be defined\n");
  564. return AVERROR_INVALIDDATA;
  565. }
  566. if (in < 2 || out < 2 ||
  567. in > MAX_LEVEL*MAX_LEVEL*MAX_LEVEL ||
  568. out > MAX_LEVEL*MAX_LEVEL*MAX_LEVEL) {
  569. av_log(ctx, AV_LOG_ERROR, "invalid in (%d) or out (%d)\n", in, out);
  570. return AVERROR_INVALIDDATA;
  571. }
  572. for (size = 1; size*size*size < in; size++);
  573. lut3d->lutsize = size;
  574. size2 = size * size;
  575. ret = allocate_3dlut(ctx, size);
  576. if (ret < 0)
  577. return ret;
  578. scale = 1. / (out - 1);
  579. for (k = 0; k < size; k++) {
  580. for (j = 0; j < size; j++) {
  581. for (i = 0; i < size; i++) {
  582. struct rgbvec *vec = &lut3d->lut[k * size2 + j * size + i];
  583. float val[3];
  584. NEXT_LINE(0);
  585. if (av_sscanf(line, "%f %f %f", val, val + 1, val + 2) != 3)
  586. return AVERROR_INVALIDDATA;
  587. vec->r = val[rgb_map[0]] * scale;
  588. vec->g = val[rgb_map[1]] * scale;
  589. vec->b = val[rgb_map[2]] * scale;
  590. }
  591. }
  592. }
  593. return 0;
  594. }
  595. static int parse_cinespace(AVFilterContext *ctx, FILE *f)
  596. {
  597. LUT3DContext *lut3d = ctx->priv;
  598. char line[MAX_LINE_SIZE];
  599. float in_min[3] = {0.0, 0.0, 0.0};
  600. float in_max[3] = {1.0, 1.0, 1.0};
  601. float out_min[3] = {0.0, 0.0, 0.0};
  602. float out_max[3] = {1.0, 1.0, 1.0};
  603. int ret, inside_metadata = 0, size, size2;
  604. NEXT_LINE(skip_line(line));
  605. if (strncmp(line, "CSPLUTV100", 10)) {
  606. av_log(ctx, AV_LOG_ERROR, "Not cineSpace LUT format\n");
  607. return AVERROR(EINVAL);
  608. }
  609. NEXT_LINE(skip_line(line));
  610. if (strncmp(line, "3D", 2)) {
  611. av_log(ctx, AV_LOG_ERROR, "Not 3D LUT format\n");
  612. return AVERROR(EINVAL);
  613. }
  614. while (1) {
  615. NEXT_LINE(skip_line(line));
  616. if (!strncmp(line, "BEGIN METADATA", 14)) {
  617. inside_metadata = 1;
  618. continue;
  619. }
  620. if (!strncmp(line, "END METADATA", 12)) {
  621. inside_metadata = 0;
  622. continue;
  623. }
  624. if (inside_metadata == 0) {
  625. int size_r, size_g, size_b;
  626. for (int i = 0; i < 3; i++) {
  627. int npoints = strtol(line, NULL, 0);
  628. if (npoints != 2) {
  629. av_log(ctx, AV_LOG_ERROR, "Unsupported number of pre-lut points.\n");
  630. return AVERROR_PATCHWELCOME;
  631. }
  632. NEXT_LINE(skip_line(line));
  633. if (av_sscanf(line, "%f %f", &in_min[i], &in_max[i]) != 2)
  634. return AVERROR_INVALIDDATA;
  635. NEXT_LINE(skip_line(line));
  636. if (av_sscanf(line, "%f %f", &out_min[i], &out_max[i]) != 2)
  637. return AVERROR_INVALIDDATA;
  638. NEXT_LINE(skip_line(line));
  639. }
  640. if (av_sscanf(line, "%d %d %d", &size_r, &size_g, &size_b) != 3)
  641. return AVERROR(EINVAL);
  642. if (size_r != size_g || size_r != size_b) {
  643. av_log(ctx, AV_LOG_ERROR, "Unsupported size combination: %dx%dx%d.\n", size_r, size_g, size_b);
  644. return AVERROR_PATCHWELCOME;
  645. }
  646. size = size_r;
  647. size2 = size * size;
  648. ret = allocate_3dlut(ctx, size);
  649. if (ret < 0)
  650. return ret;
  651. for (int k = 0; k < size; k++) {
  652. for (int j = 0; j < size; j++) {
  653. for (int i = 0; i < size; i++) {
  654. struct rgbvec *vec = &lut3d->lut[i * size2 + j * size + k];
  655. if (k != 0 || j != 0 || i != 0)
  656. NEXT_LINE(skip_line(line));
  657. if (av_sscanf(line, "%f %f %f", &vec->r, &vec->g, &vec->b) != 3)
  658. return AVERROR_INVALIDDATA;
  659. vec->r *= out_max[0] - out_min[0];
  660. vec->g *= out_max[1] - out_min[1];
  661. vec->b *= out_max[2] - out_min[2];
  662. }
  663. }
  664. }
  665. break;
  666. }
  667. }
  668. lut3d->scale.r = av_clipf(1. / (in_max[0] - in_min[0]), 0.f, 1.f);
  669. lut3d->scale.g = av_clipf(1. / (in_max[1] - in_min[1]), 0.f, 1.f);
  670. lut3d->scale.b = av_clipf(1. / (in_max[2] - in_min[2]), 0.f, 1.f);
  671. return 0;
  672. }
  673. static int set_identity_matrix(AVFilterContext *ctx, int size)
  674. {
  675. LUT3DContext *lut3d = ctx->priv;
  676. int ret, i, j, k;
  677. const int size2 = size * size;
  678. const float c = 1. / (size - 1);
  679. ret = allocate_3dlut(ctx, size);
  680. if (ret < 0)
  681. return ret;
  682. for (k = 0; k < size; k++) {
  683. for (j = 0; j < size; j++) {
  684. for (i = 0; i < size; i++) {
  685. struct rgbvec *vec = &lut3d->lut[k * size2 + j * size + i];
  686. vec->r = k * c;
  687. vec->g = j * c;
  688. vec->b = i * c;
  689. }
  690. }
  691. }
  692. return 0;
  693. }
  694. static int query_formats(AVFilterContext *ctx)
  695. {
  696. static const enum AVPixelFormat pix_fmts[] = {
  697. AV_PIX_FMT_RGB24, AV_PIX_FMT_BGR24,
  698. AV_PIX_FMT_RGBA, AV_PIX_FMT_BGRA,
  699. AV_PIX_FMT_ARGB, AV_PIX_FMT_ABGR,
  700. AV_PIX_FMT_0RGB, AV_PIX_FMT_0BGR,
  701. AV_PIX_FMT_RGB0, AV_PIX_FMT_BGR0,
  702. AV_PIX_FMT_RGB48, AV_PIX_FMT_BGR48,
  703. AV_PIX_FMT_RGBA64, AV_PIX_FMT_BGRA64,
  704. AV_PIX_FMT_GBRP, AV_PIX_FMT_GBRAP,
  705. AV_PIX_FMT_GBRP9,
  706. AV_PIX_FMT_GBRP10, AV_PIX_FMT_GBRAP10,
  707. AV_PIX_FMT_GBRP12, AV_PIX_FMT_GBRAP12,
  708. AV_PIX_FMT_GBRP14,
  709. AV_PIX_FMT_GBRP16, AV_PIX_FMT_GBRAP16,
  710. AV_PIX_FMT_GBRPF32, AV_PIX_FMT_GBRAPF32,
  711. AV_PIX_FMT_NONE
  712. };
  713. AVFilterFormats *fmts_list = ff_make_format_list(pix_fmts);
  714. if (!fmts_list)
  715. return AVERROR(ENOMEM);
  716. return ff_set_common_formats(ctx, fmts_list);
  717. }
  718. static int config_input(AVFilterLink *inlink)
  719. {
  720. int depth, is16bit, isfloat, planar;
  721. LUT3DContext *lut3d = inlink->dst->priv;
  722. const AVPixFmtDescriptor *desc = av_pix_fmt_desc_get(inlink->format);
  723. depth = desc->comp[0].depth;
  724. is16bit = desc->comp[0].depth > 8;
  725. planar = desc->flags & AV_PIX_FMT_FLAG_PLANAR;
  726. isfloat = desc->flags & AV_PIX_FMT_FLAG_FLOAT;
  727. ff_fill_rgba_map(lut3d->rgba_map, inlink->format);
  728. lut3d->step = av_get_padded_bits_per_pixel(desc) >> (3 + is16bit);
  729. #define SET_FUNC(name) do { \
  730. if (planar && !isfloat) { \
  731. switch (depth) { \
  732. case 8: lut3d->interp = interp_8_##name##_p8; break; \
  733. case 9: lut3d->interp = interp_16_##name##_p9; break; \
  734. case 10: lut3d->interp = interp_16_##name##_p10; break; \
  735. case 12: lut3d->interp = interp_16_##name##_p12; break; \
  736. case 14: lut3d->interp = interp_16_##name##_p14; break; \
  737. case 16: lut3d->interp = interp_16_##name##_p16; break; \
  738. } \
  739. } else if (isfloat) { lut3d->interp = interp_##name##_pf32; \
  740. } else if (is16bit) { lut3d->interp = interp_16_##name; \
  741. } else { lut3d->interp = interp_8_##name; } \
  742. } while (0)
  743. switch (lut3d->interpolation) {
  744. case INTERPOLATE_NEAREST: SET_FUNC(nearest); break;
  745. case INTERPOLATE_TRILINEAR: SET_FUNC(trilinear); break;
  746. case INTERPOLATE_TETRAHEDRAL: SET_FUNC(tetrahedral); break;
  747. default:
  748. av_assert0(0);
  749. }
  750. return 0;
  751. }
  752. static AVFrame *apply_lut(AVFilterLink *inlink, AVFrame *in)
  753. {
  754. AVFilterContext *ctx = inlink->dst;
  755. LUT3DContext *lut3d = ctx->priv;
  756. AVFilterLink *outlink = inlink->dst->outputs[0];
  757. AVFrame *out;
  758. ThreadData td;
  759. if (av_frame_is_writable(in)) {
  760. out = in;
  761. } else {
  762. out = ff_get_video_buffer(outlink, outlink->w, outlink->h);
  763. if (!out) {
  764. av_frame_free(&in);
  765. return NULL;
  766. }
  767. av_frame_copy_props(out, in);
  768. }
  769. td.in = in;
  770. td.out = out;
  771. ctx->internal->execute(ctx, lut3d->interp, &td, NULL, FFMIN(outlink->h, ff_filter_get_nb_threads(ctx)));
  772. if (out != in)
  773. av_frame_free(&in);
  774. return out;
  775. }
  776. static int filter_frame(AVFilterLink *inlink, AVFrame *in)
  777. {
  778. AVFilterLink *outlink = inlink->dst->outputs[0];
  779. AVFrame *out = apply_lut(inlink, in);
  780. if (!out)
  781. return AVERROR(ENOMEM);
  782. return ff_filter_frame(outlink, out);
  783. }
  784. #if CONFIG_LUT3D_FILTER
  785. static const AVOption lut3d_options[] = {
  786. { "file", "set 3D LUT file name", OFFSET(file), AV_OPT_TYPE_STRING, {.str=NULL}, .flags = FLAGS },
  787. COMMON_OPTIONS
  788. };
  789. AVFILTER_DEFINE_CLASS(lut3d);
  790. static av_cold int lut3d_init(AVFilterContext *ctx)
  791. {
  792. int ret;
  793. FILE *f;
  794. const char *ext;
  795. LUT3DContext *lut3d = ctx->priv;
  796. lut3d->scale.r = lut3d->scale.g = lut3d->scale.b = 1.f;
  797. if (!lut3d->file) {
  798. return set_identity_matrix(ctx, 32);
  799. }
  800. f = av_fopen_utf8(lut3d->file, "r");
  801. if (!f) {
  802. ret = AVERROR(errno);
  803. av_log(ctx, AV_LOG_ERROR, "%s: %s\n", lut3d->file, av_err2str(ret));
  804. return ret;
  805. }
  806. ext = strrchr(lut3d->file, '.');
  807. if (!ext) {
  808. av_log(ctx, AV_LOG_ERROR, "Unable to guess the format from the extension\n");
  809. ret = AVERROR_INVALIDDATA;
  810. goto end;
  811. }
  812. ext++;
  813. if (!av_strcasecmp(ext, "dat")) {
  814. ret = parse_dat(ctx, f);
  815. } else if (!av_strcasecmp(ext, "3dl")) {
  816. ret = parse_3dl(ctx, f);
  817. } else if (!av_strcasecmp(ext, "cube")) {
  818. ret = parse_cube(ctx, f);
  819. } else if (!av_strcasecmp(ext, "m3d")) {
  820. ret = parse_m3d(ctx, f);
  821. } else if (!av_strcasecmp(ext, "csp")) {
  822. ret = parse_cinespace(ctx, f);
  823. } else {
  824. av_log(ctx, AV_LOG_ERROR, "Unrecognized '.%s' file type\n", ext);
  825. ret = AVERROR(EINVAL);
  826. }
  827. if (!ret && !lut3d->lutsize) {
  828. av_log(ctx, AV_LOG_ERROR, "3D LUT is empty\n");
  829. ret = AVERROR_INVALIDDATA;
  830. }
  831. end:
  832. fclose(f);
  833. return ret;
  834. }
  835. static av_cold void lut3d_uninit(AVFilterContext *ctx)
  836. {
  837. LUT3DContext *lut3d = ctx->priv;
  838. av_freep(&lut3d->lut);
  839. }
  840. static const AVFilterPad lut3d_inputs[] = {
  841. {
  842. .name = "default",
  843. .type = AVMEDIA_TYPE_VIDEO,
  844. .filter_frame = filter_frame,
  845. .config_props = config_input,
  846. },
  847. { NULL }
  848. };
  849. static const AVFilterPad lut3d_outputs[] = {
  850. {
  851. .name = "default",
  852. .type = AVMEDIA_TYPE_VIDEO,
  853. },
  854. { NULL }
  855. };
  856. AVFilter ff_vf_lut3d = {
  857. .name = "lut3d",
  858. .description = NULL_IF_CONFIG_SMALL("Adjust colors using a 3D LUT."),
  859. .priv_size = sizeof(LUT3DContext),
  860. .init = lut3d_init,
  861. .uninit = lut3d_uninit,
  862. .query_formats = query_formats,
  863. .inputs = lut3d_inputs,
  864. .outputs = lut3d_outputs,
  865. .priv_class = &lut3d_class,
  866. .flags = AVFILTER_FLAG_SUPPORT_TIMELINE_GENERIC | AVFILTER_FLAG_SLICE_THREADS,
  867. };
  868. #endif
  869. #if CONFIG_HALDCLUT_FILTER
  870. static void update_clut_packed(LUT3DContext *lut3d, const AVFrame *frame)
  871. {
  872. const uint8_t *data = frame->data[0];
  873. const int linesize = frame->linesize[0];
  874. const int w = lut3d->clut_width;
  875. const int step = lut3d->clut_step;
  876. const uint8_t *rgba_map = lut3d->clut_rgba_map;
  877. const int level = lut3d->lutsize;
  878. const int level2 = lut3d->lutsize2;
  879. #define LOAD_CLUT(nbits) do { \
  880. int i, j, k, x = 0, y = 0; \
  881. \
  882. for (k = 0; k < level; k++) { \
  883. for (j = 0; j < level; j++) { \
  884. for (i = 0; i < level; i++) { \
  885. const uint##nbits##_t *src = (const uint##nbits##_t *) \
  886. (data + y*linesize + x*step); \
  887. struct rgbvec *vec = &lut3d->lut[i * level2 + j * level + k]; \
  888. vec->r = src[rgba_map[0]] / (float)((1<<(nbits)) - 1); \
  889. vec->g = src[rgba_map[1]] / (float)((1<<(nbits)) - 1); \
  890. vec->b = src[rgba_map[2]] / (float)((1<<(nbits)) - 1); \
  891. if (++x == w) { \
  892. x = 0; \
  893. y++; \
  894. } \
  895. } \
  896. } \
  897. } \
  898. } while (0)
  899. switch (lut3d->clut_bits) {
  900. case 8: LOAD_CLUT(8); break;
  901. case 16: LOAD_CLUT(16); break;
  902. }
  903. }
  904. static void update_clut_planar(LUT3DContext *lut3d, const AVFrame *frame)
  905. {
  906. const uint8_t *datag = frame->data[0];
  907. const uint8_t *datab = frame->data[1];
  908. const uint8_t *datar = frame->data[2];
  909. const int glinesize = frame->linesize[0];
  910. const int blinesize = frame->linesize[1];
  911. const int rlinesize = frame->linesize[2];
  912. const int w = lut3d->clut_width;
  913. const int level = lut3d->lutsize;
  914. const int level2 = lut3d->lutsize2;
  915. #define LOAD_CLUT_PLANAR(nbits, depth) do { \
  916. int i, j, k, x = 0, y = 0; \
  917. \
  918. for (k = 0; k < level; k++) { \
  919. for (j = 0; j < level; j++) { \
  920. for (i = 0; i < level; i++) { \
  921. const uint##nbits##_t *gsrc = (const uint##nbits##_t *) \
  922. (datag + y*glinesize); \
  923. const uint##nbits##_t *bsrc = (const uint##nbits##_t *) \
  924. (datab + y*blinesize); \
  925. const uint##nbits##_t *rsrc = (const uint##nbits##_t *) \
  926. (datar + y*rlinesize); \
  927. struct rgbvec *vec = &lut3d->lut[i * level2 + j * level + k]; \
  928. vec->r = gsrc[x] / (float)((1<<(depth)) - 1); \
  929. vec->g = bsrc[x] / (float)((1<<(depth)) - 1); \
  930. vec->b = rsrc[x] / (float)((1<<(depth)) - 1); \
  931. if (++x == w) { \
  932. x = 0; \
  933. y++; \
  934. } \
  935. } \
  936. } \
  937. } \
  938. } while (0)
  939. switch (lut3d->clut_bits) {
  940. case 8: LOAD_CLUT_PLANAR(8, 8); break;
  941. case 9: LOAD_CLUT_PLANAR(16, 9); break;
  942. case 10: LOAD_CLUT_PLANAR(16, 10); break;
  943. case 12: LOAD_CLUT_PLANAR(16, 12); break;
  944. case 14: LOAD_CLUT_PLANAR(16, 14); break;
  945. case 16: LOAD_CLUT_PLANAR(16, 16); break;
  946. }
  947. }
  948. static void update_clut_float(LUT3DContext *lut3d, const AVFrame *frame)
  949. {
  950. const uint8_t *datag = frame->data[0];
  951. const uint8_t *datab = frame->data[1];
  952. const uint8_t *datar = frame->data[2];
  953. const int glinesize = frame->linesize[0];
  954. const int blinesize = frame->linesize[1];
  955. const int rlinesize = frame->linesize[2];
  956. const int w = lut3d->clut_width;
  957. const int level = lut3d->lutsize;
  958. const int level2 = lut3d->lutsize2;
  959. int i, j, k, x = 0, y = 0;
  960. for (k = 0; k < level; k++) {
  961. for (j = 0; j < level; j++) {
  962. for (i = 0; i < level; i++) {
  963. const float *gsrc = (const float *)(datag + y*glinesize);
  964. const float *bsrc = (const float *)(datab + y*blinesize);
  965. const float *rsrc = (const float *)(datar + y*rlinesize);
  966. struct rgbvec *vec = &lut3d->lut[i * level2 + j * level + k];
  967. vec->r = rsrc[x];
  968. vec->g = gsrc[x];
  969. vec->b = bsrc[x];
  970. if (++x == w) {
  971. x = 0;
  972. y++;
  973. }
  974. }
  975. }
  976. }
  977. }
  978. static int config_output(AVFilterLink *outlink)
  979. {
  980. AVFilterContext *ctx = outlink->src;
  981. LUT3DContext *lut3d = ctx->priv;
  982. int ret;
  983. ret = ff_framesync_init_dualinput(&lut3d->fs, ctx);
  984. if (ret < 0)
  985. return ret;
  986. outlink->w = ctx->inputs[0]->w;
  987. outlink->h = ctx->inputs[0]->h;
  988. outlink->time_base = ctx->inputs[0]->time_base;
  989. if ((ret = ff_framesync_configure(&lut3d->fs)) < 0)
  990. return ret;
  991. return 0;
  992. }
  993. static int activate(AVFilterContext *ctx)
  994. {
  995. LUT3DContext *s = ctx->priv;
  996. return ff_framesync_activate(&s->fs);
  997. }
  998. static int config_clut(AVFilterLink *inlink)
  999. {
  1000. int size, level, w, h;
  1001. AVFilterContext *ctx = inlink->dst;
  1002. LUT3DContext *lut3d = ctx->priv;
  1003. const AVPixFmtDescriptor *desc = av_pix_fmt_desc_get(inlink->format);
  1004. av_assert0(desc);
  1005. lut3d->clut_bits = desc->comp[0].depth;
  1006. lut3d->clut_planar = av_pix_fmt_count_planes(inlink->format) > 1;
  1007. lut3d->clut_float = desc->flags & AV_PIX_FMT_FLAG_FLOAT;
  1008. lut3d->clut_step = av_get_padded_bits_per_pixel(desc) >> 3;
  1009. ff_fill_rgba_map(lut3d->clut_rgba_map, inlink->format);
  1010. if (inlink->w > inlink->h)
  1011. av_log(ctx, AV_LOG_INFO, "Padding on the right (%dpx) of the "
  1012. "Hald CLUT will be ignored\n", inlink->w - inlink->h);
  1013. else if (inlink->w < inlink->h)
  1014. av_log(ctx, AV_LOG_INFO, "Padding at the bottom (%dpx) of the "
  1015. "Hald CLUT will be ignored\n", inlink->h - inlink->w);
  1016. lut3d->clut_width = w = h = FFMIN(inlink->w, inlink->h);
  1017. for (level = 1; level*level*level < w; level++);
  1018. size = level*level*level;
  1019. if (size != w) {
  1020. av_log(ctx, AV_LOG_WARNING, "The Hald CLUT width does not match the level\n");
  1021. return AVERROR_INVALIDDATA;
  1022. }
  1023. av_assert0(w == h && w == size);
  1024. level *= level;
  1025. if (level > MAX_LEVEL) {
  1026. const int max_clut_level = sqrt(MAX_LEVEL);
  1027. const int max_clut_size = max_clut_level*max_clut_level*max_clut_level;
  1028. av_log(ctx, AV_LOG_ERROR, "Too large Hald CLUT "
  1029. "(maximum level is %d, or %dx%d CLUT)\n",
  1030. max_clut_level, max_clut_size, max_clut_size);
  1031. return AVERROR(EINVAL);
  1032. }
  1033. return allocate_3dlut(ctx, level);
  1034. }
  1035. static int update_apply_clut(FFFrameSync *fs)
  1036. {
  1037. AVFilterContext *ctx = fs->parent;
  1038. LUT3DContext *lut3d = ctx->priv;
  1039. AVFilterLink *inlink = ctx->inputs[0];
  1040. AVFrame *master, *second, *out;
  1041. int ret;
  1042. ret = ff_framesync_dualinput_get(fs, &master, &second);
  1043. if (ret < 0)
  1044. return ret;
  1045. if (!second)
  1046. return ff_filter_frame(ctx->outputs[0], master);
  1047. if (lut3d->clut_float)
  1048. update_clut_float(ctx->priv, second);
  1049. else if (lut3d->clut_planar)
  1050. update_clut_planar(ctx->priv, second);
  1051. else
  1052. update_clut_packed(ctx->priv, second);
  1053. out = apply_lut(inlink, master);
  1054. return ff_filter_frame(ctx->outputs[0], out);
  1055. }
  1056. static av_cold int haldclut_init(AVFilterContext *ctx)
  1057. {
  1058. LUT3DContext *lut3d = ctx->priv;
  1059. lut3d->scale.r = lut3d->scale.g = lut3d->scale.b = 1.f;
  1060. lut3d->fs.on_event = update_apply_clut;
  1061. return 0;
  1062. }
  1063. static av_cold void haldclut_uninit(AVFilterContext *ctx)
  1064. {
  1065. LUT3DContext *lut3d = ctx->priv;
  1066. ff_framesync_uninit(&lut3d->fs);
  1067. av_freep(&lut3d->lut);
  1068. }
  1069. static const AVOption haldclut_options[] = {
  1070. COMMON_OPTIONS
  1071. };
  1072. FRAMESYNC_DEFINE_CLASS(haldclut, LUT3DContext, fs);
  1073. static const AVFilterPad haldclut_inputs[] = {
  1074. {
  1075. .name = "main",
  1076. .type = AVMEDIA_TYPE_VIDEO,
  1077. .config_props = config_input,
  1078. },{
  1079. .name = "clut",
  1080. .type = AVMEDIA_TYPE_VIDEO,
  1081. .config_props = config_clut,
  1082. },
  1083. { NULL }
  1084. };
  1085. static const AVFilterPad haldclut_outputs[] = {
  1086. {
  1087. .name = "default",
  1088. .type = AVMEDIA_TYPE_VIDEO,
  1089. .config_props = config_output,
  1090. },
  1091. { NULL }
  1092. };
  1093. AVFilter ff_vf_haldclut = {
  1094. .name = "haldclut",
  1095. .description = NULL_IF_CONFIG_SMALL("Adjust colors using a Hald CLUT."),
  1096. .priv_size = sizeof(LUT3DContext),
  1097. .preinit = haldclut_framesync_preinit,
  1098. .init = haldclut_init,
  1099. .uninit = haldclut_uninit,
  1100. .query_formats = query_formats,
  1101. .activate = activate,
  1102. .inputs = haldclut_inputs,
  1103. .outputs = haldclut_outputs,
  1104. .priv_class = &haldclut_class,
  1105. .flags = AVFILTER_FLAG_SUPPORT_TIMELINE_INTERNAL | AVFILTER_FLAG_SLICE_THREADS,
  1106. };
  1107. #endif
  1108. #if CONFIG_LUT1D_FILTER
  1109. enum interp_1d_mode {
  1110. INTERPOLATE_1D_NEAREST,
  1111. INTERPOLATE_1D_LINEAR,
  1112. INTERPOLATE_1D_CUBIC,
  1113. INTERPOLATE_1D_COSINE,
  1114. INTERPOLATE_1D_SPLINE,
  1115. NB_INTERP_1D_MODE
  1116. };
  1117. #define MAX_1D_LEVEL 65536
  1118. typedef struct LUT1DContext {
  1119. const AVClass *class;
  1120. char *file;
  1121. int interpolation; ///<interp_1d_mode
  1122. struct rgbvec scale;
  1123. uint8_t rgba_map[4];
  1124. int step;
  1125. float lut[3][MAX_1D_LEVEL];
  1126. int lutsize;
  1127. avfilter_action_func *interp;
  1128. } LUT1DContext;
  1129. #undef OFFSET
  1130. #define OFFSET(x) offsetof(LUT1DContext, x)
  1131. static void set_identity_matrix_1d(LUT1DContext *lut1d, int size)
  1132. {
  1133. const float c = 1. / (size - 1);
  1134. int i;
  1135. lut1d->lutsize = size;
  1136. for (i = 0; i < size; i++) {
  1137. lut1d->lut[0][i] = i * c;
  1138. lut1d->lut[1][i] = i * c;
  1139. lut1d->lut[2][i] = i * c;
  1140. }
  1141. }
  1142. static int parse_cinespace_1d(AVFilterContext *ctx, FILE *f)
  1143. {
  1144. LUT1DContext *lut1d = ctx->priv;
  1145. char line[MAX_LINE_SIZE];
  1146. float in_min[3] = {0.0, 0.0, 0.0};
  1147. float in_max[3] = {1.0, 1.0, 1.0};
  1148. float out_min[3] = {0.0, 0.0, 0.0};
  1149. float out_max[3] = {1.0, 1.0, 1.0};
  1150. int inside_metadata = 0, size;
  1151. NEXT_LINE(skip_line(line));
  1152. if (strncmp(line, "CSPLUTV100", 10)) {
  1153. av_log(ctx, AV_LOG_ERROR, "Not cineSpace LUT format\n");
  1154. return AVERROR(EINVAL);
  1155. }
  1156. NEXT_LINE(skip_line(line));
  1157. if (strncmp(line, "1D", 2)) {
  1158. av_log(ctx, AV_LOG_ERROR, "Not 1D LUT format\n");
  1159. return AVERROR(EINVAL);
  1160. }
  1161. while (1) {
  1162. NEXT_LINE(skip_line(line));
  1163. if (!strncmp(line, "BEGIN METADATA", 14)) {
  1164. inside_metadata = 1;
  1165. continue;
  1166. }
  1167. if (!strncmp(line, "END METADATA", 12)) {
  1168. inside_metadata = 0;
  1169. continue;
  1170. }
  1171. if (inside_metadata == 0) {
  1172. for (int i = 0; i < 3; i++) {
  1173. int npoints = strtol(line, NULL, 0);
  1174. if (npoints != 2) {
  1175. av_log(ctx, AV_LOG_ERROR, "Unsupported number of pre-lut points.\n");
  1176. return AVERROR_PATCHWELCOME;
  1177. }
  1178. NEXT_LINE(skip_line(line));
  1179. if (av_sscanf(line, "%f %f", &in_min[i], &in_max[i]) != 2)
  1180. return AVERROR_INVALIDDATA;
  1181. NEXT_LINE(skip_line(line));
  1182. if (av_sscanf(line, "%f %f", &out_min[i], &out_max[i]) != 2)
  1183. return AVERROR_INVALIDDATA;
  1184. NEXT_LINE(skip_line(line));
  1185. }
  1186. size = strtol(line, NULL, 0);
  1187. if (size < 2 || size > MAX_1D_LEVEL) {
  1188. av_log(ctx, AV_LOG_ERROR, "Too large or invalid 1D LUT size\n");
  1189. return AVERROR(EINVAL);
  1190. }
  1191. lut1d->lutsize = size;
  1192. for (int i = 0; i < size; i++) {
  1193. NEXT_LINE(skip_line(line));
  1194. if (av_sscanf(line, "%f %f %f", &lut1d->lut[0][i], &lut1d->lut[1][i], &lut1d->lut[2][i]) != 3)
  1195. return AVERROR_INVALIDDATA;
  1196. lut1d->lut[0][i] *= out_max[0] - out_min[0];
  1197. lut1d->lut[1][i] *= out_max[1] - out_min[1];
  1198. lut1d->lut[2][i] *= out_max[2] - out_min[2];
  1199. }
  1200. break;
  1201. }
  1202. }
  1203. lut1d->scale.r = av_clipf(1. / (in_max[0] - in_min[0]), 0.f, 1.f);
  1204. lut1d->scale.g = av_clipf(1. / (in_max[1] - in_min[1]), 0.f, 1.f);
  1205. lut1d->scale.b = av_clipf(1. / (in_max[2] - in_min[2]), 0.f, 1.f);
  1206. return 0;
  1207. }
  1208. static int parse_cube_1d(AVFilterContext *ctx, FILE *f)
  1209. {
  1210. LUT1DContext *lut1d = ctx->priv;
  1211. char line[MAX_LINE_SIZE];
  1212. float min[3] = {0.0, 0.0, 0.0};
  1213. float max[3] = {1.0, 1.0, 1.0};
  1214. while (fgets(line, sizeof(line), f)) {
  1215. if (!strncmp(line, "LUT_1D_SIZE", 11)) {
  1216. const int size = strtol(line + 12, NULL, 0);
  1217. int i;
  1218. if (size < 2 || size > MAX_1D_LEVEL) {
  1219. av_log(ctx, AV_LOG_ERROR, "Too large or invalid 1D LUT size\n");
  1220. return AVERROR(EINVAL);
  1221. }
  1222. lut1d->lutsize = size;
  1223. for (i = 0; i < size; i++) {
  1224. do {
  1225. try_again:
  1226. NEXT_LINE(0);
  1227. if (!strncmp(line, "DOMAIN_", 7)) {
  1228. float *vals = NULL;
  1229. if (!strncmp(line + 7, "MIN ", 4)) vals = min;
  1230. else if (!strncmp(line + 7, "MAX ", 4)) vals = max;
  1231. if (!vals)
  1232. return AVERROR_INVALIDDATA;
  1233. av_sscanf(line + 11, "%f %f %f", vals, vals + 1, vals + 2);
  1234. av_log(ctx, AV_LOG_DEBUG, "min: %f %f %f | max: %f %f %f\n",
  1235. min[0], min[1], min[2], max[0], max[1], max[2]);
  1236. goto try_again;
  1237. } else if (!strncmp(line, "LUT_1D_INPUT_RANGE ", 19)) {
  1238. av_sscanf(line + 19, "%f %f", min, max);
  1239. min[1] = min[2] = min[0];
  1240. max[1] = max[2] = max[0];
  1241. goto try_again;
  1242. } else if (!strncmp(line, "TITLE", 5)) {
  1243. goto try_again;
  1244. }
  1245. } while (skip_line(line));
  1246. if (av_sscanf(line, "%f %f %f", &lut1d->lut[0][i], &lut1d->lut[1][i], &lut1d->lut[2][i]) != 3)
  1247. return AVERROR_INVALIDDATA;
  1248. }
  1249. break;
  1250. }
  1251. }
  1252. lut1d->scale.r = av_clipf(1. / (max[0] - min[0]), 0.f, 1.f);
  1253. lut1d->scale.g = av_clipf(1. / (max[1] - min[1]), 0.f, 1.f);
  1254. lut1d->scale.b = av_clipf(1. / (max[2] - min[2]), 0.f, 1.f);
  1255. return 0;
  1256. }
  1257. static const AVOption lut1d_options[] = {
  1258. { "file", "set 1D LUT file name", OFFSET(file), AV_OPT_TYPE_STRING, {.str=NULL}, .flags = FLAGS },
  1259. { "interp", "select interpolation mode", OFFSET(interpolation), AV_OPT_TYPE_INT, {.i64=INTERPOLATE_1D_LINEAR}, 0, NB_INTERP_1D_MODE-1, FLAGS, "interp_mode" },
  1260. { "nearest", "use values from the nearest defined points", 0, AV_OPT_TYPE_CONST, {.i64=INTERPOLATE_1D_NEAREST}, INT_MIN, INT_MAX, FLAGS, "interp_mode" },
  1261. { "linear", "use values from the linear interpolation", 0, AV_OPT_TYPE_CONST, {.i64=INTERPOLATE_1D_LINEAR}, INT_MIN, INT_MAX, FLAGS, "interp_mode" },
  1262. { "cosine", "use values from the cosine interpolation", 0, AV_OPT_TYPE_CONST, {.i64=INTERPOLATE_1D_COSINE}, INT_MIN, INT_MAX, FLAGS, "interp_mode" },
  1263. { "cubic", "use values from the cubic interpolation", 0, AV_OPT_TYPE_CONST, {.i64=INTERPOLATE_1D_CUBIC}, INT_MIN, INT_MAX, FLAGS, "interp_mode" },
  1264. { "spline", "use values from the spline interpolation", 0, AV_OPT_TYPE_CONST, {.i64=INTERPOLATE_1D_SPLINE}, INT_MIN, INT_MAX, FLAGS, "interp_mode" },
  1265. { NULL }
  1266. };
  1267. AVFILTER_DEFINE_CLASS(lut1d);
  1268. static inline float interp_1d_nearest(const LUT1DContext *lut1d,
  1269. int idx, const float s)
  1270. {
  1271. return lut1d->lut[idx][NEAR(s)];
  1272. }
  1273. #define NEXT1D(x) (FFMIN((int)(x) + 1, lut1d->lutsize - 1))
  1274. static inline float interp_1d_linear(const LUT1DContext *lut1d,
  1275. int idx, const float s)
  1276. {
  1277. const int prev = PREV(s);
  1278. const int next = NEXT1D(s);
  1279. const float d = s - prev;
  1280. const float p = lut1d->lut[idx][prev];
  1281. const float n = lut1d->lut[idx][next];
  1282. return lerpf(p, n, d);
  1283. }
  1284. static inline float interp_1d_cosine(const LUT1DContext *lut1d,
  1285. int idx, const float s)
  1286. {
  1287. const int prev = PREV(s);
  1288. const int next = NEXT1D(s);
  1289. const float d = s - prev;
  1290. const float p = lut1d->lut[idx][prev];
  1291. const float n = lut1d->lut[idx][next];
  1292. const float m = (1.f - cosf(d * M_PI)) * .5f;
  1293. return lerpf(p, n, m);
  1294. }
  1295. static inline float interp_1d_cubic(const LUT1DContext *lut1d,
  1296. int idx, const float s)
  1297. {
  1298. const int prev = PREV(s);
  1299. const int next = NEXT1D(s);
  1300. const float mu = s - prev;
  1301. float a0, a1, a2, a3, mu2;
  1302. float y0 = lut1d->lut[idx][FFMAX(prev - 1, 0)];
  1303. float y1 = lut1d->lut[idx][prev];
  1304. float y2 = lut1d->lut[idx][next];
  1305. float y3 = lut1d->lut[idx][FFMIN(next + 1, lut1d->lutsize - 1)];
  1306. mu2 = mu * mu;
  1307. a0 = y3 - y2 - y0 + y1;
  1308. a1 = y0 - y1 - a0;
  1309. a2 = y2 - y0;
  1310. a3 = y1;
  1311. return a0 * mu * mu2 + a1 * mu2 + a2 * mu + a3;
  1312. }
  1313. static inline float interp_1d_spline(const LUT1DContext *lut1d,
  1314. int idx, const float s)
  1315. {
  1316. const int prev = PREV(s);
  1317. const int next = NEXT1D(s);
  1318. const float x = s - prev;
  1319. float c0, c1, c2, c3;
  1320. float y0 = lut1d->lut[idx][FFMAX(prev - 1, 0)];
  1321. float y1 = lut1d->lut[idx][prev];
  1322. float y2 = lut1d->lut[idx][next];
  1323. float y3 = lut1d->lut[idx][FFMIN(next + 1, lut1d->lutsize - 1)];
  1324. c0 = y1;
  1325. c1 = .5f * (y2 - y0);
  1326. c2 = y0 - 2.5f * y1 + 2.f * y2 - .5f * y3;
  1327. c3 = .5f * (y3 - y0) + 1.5f * (y1 - y2);
  1328. return ((c3 * x + c2) * x + c1) * x + c0;
  1329. }
  1330. #define DEFINE_INTERP_FUNC_PLANAR_1D(name, nbits, depth) \
  1331. static int interp_1d_##nbits##_##name##_p##depth(AVFilterContext *ctx, \
  1332. void *arg, int jobnr, \
  1333. int nb_jobs) \
  1334. { \
  1335. int x, y; \
  1336. const LUT1DContext *lut1d = ctx->priv; \
  1337. const ThreadData *td = arg; \
  1338. const AVFrame *in = td->in; \
  1339. const AVFrame *out = td->out; \
  1340. const int direct = out == in; \
  1341. const int slice_start = (in->height * jobnr ) / nb_jobs; \
  1342. const int slice_end = (in->height * (jobnr+1)) / nb_jobs; \
  1343. uint8_t *grow = out->data[0] + slice_start * out->linesize[0]; \
  1344. uint8_t *brow = out->data[1] + slice_start * out->linesize[1]; \
  1345. uint8_t *rrow = out->data[2] + slice_start * out->linesize[2]; \
  1346. uint8_t *arow = out->data[3] + slice_start * out->linesize[3]; \
  1347. const uint8_t *srcgrow = in->data[0] + slice_start * in->linesize[0]; \
  1348. const uint8_t *srcbrow = in->data[1] + slice_start * in->linesize[1]; \
  1349. const uint8_t *srcrrow = in->data[2] + slice_start * in->linesize[2]; \
  1350. const uint8_t *srcarow = in->data[3] + slice_start * in->linesize[3]; \
  1351. const float factor = (1 << depth) - 1; \
  1352. const float scale_r = (lut1d->scale.r / factor) * (lut1d->lutsize - 1); \
  1353. const float scale_g = (lut1d->scale.g / factor) * (lut1d->lutsize - 1); \
  1354. const float scale_b = (lut1d->scale.b / factor) * (lut1d->lutsize - 1); \
  1355. \
  1356. for (y = slice_start; y < slice_end; y++) { \
  1357. uint##nbits##_t *dstg = (uint##nbits##_t *)grow; \
  1358. uint##nbits##_t *dstb = (uint##nbits##_t *)brow; \
  1359. uint##nbits##_t *dstr = (uint##nbits##_t *)rrow; \
  1360. uint##nbits##_t *dsta = (uint##nbits##_t *)arow; \
  1361. const uint##nbits##_t *srcg = (const uint##nbits##_t *)srcgrow; \
  1362. const uint##nbits##_t *srcb = (const uint##nbits##_t *)srcbrow; \
  1363. const uint##nbits##_t *srcr = (const uint##nbits##_t *)srcrrow; \
  1364. const uint##nbits##_t *srca = (const uint##nbits##_t *)srcarow; \
  1365. for (x = 0; x < in->width; x++) { \
  1366. float r = srcr[x] * scale_r; \
  1367. float g = srcg[x] * scale_g; \
  1368. float b = srcb[x] * scale_b; \
  1369. r = interp_1d_##name(lut1d, 0, r); \
  1370. g = interp_1d_##name(lut1d, 1, g); \
  1371. b = interp_1d_##name(lut1d, 2, b); \
  1372. dstr[x] = av_clip_uintp2(r * factor, depth); \
  1373. dstg[x] = av_clip_uintp2(g * factor, depth); \
  1374. dstb[x] = av_clip_uintp2(b * factor, depth); \
  1375. if (!direct && in->linesize[3]) \
  1376. dsta[x] = srca[x]; \
  1377. } \
  1378. grow += out->linesize[0]; \
  1379. brow += out->linesize[1]; \
  1380. rrow += out->linesize[2]; \
  1381. arow += out->linesize[3]; \
  1382. srcgrow += in->linesize[0]; \
  1383. srcbrow += in->linesize[1]; \
  1384. srcrrow += in->linesize[2]; \
  1385. srcarow += in->linesize[3]; \
  1386. } \
  1387. return 0; \
  1388. }
  1389. DEFINE_INTERP_FUNC_PLANAR_1D(nearest, 8, 8)
  1390. DEFINE_INTERP_FUNC_PLANAR_1D(linear, 8, 8)
  1391. DEFINE_INTERP_FUNC_PLANAR_1D(cosine, 8, 8)
  1392. DEFINE_INTERP_FUNC_PLANAR_1D(cubic, 8, 8)
  1393. DEFINE_INTERP_FUNC_PLANAR_1D(spline, 8, 8)
  1394. DEFINE_INTERP_FUNC_PLANAR_1D(nearest, 16, 9)
  1395. DEFINE_INTERP_FUNC_PLANAR_1D(linear, 16, 9)
  1396. DEFINE_INTERP_FUNC_PLANAR_1D(cosine, 16, 9)
  1397. DEFINE_INTERP_FUNC_PLANAR_1D(cubic, 16, 9)
  1398. DEFINE_INTERP_FUNC_PLANAR_1D(spline, 16, 9)
  1399. DEFINE_INTERP_FUNC_PLANAR_1D(nearest, 16, 10)
  1400. DEFINE_INTERP_FUNC_PLANAR_1D(linear, 16, 10)
  1401. DEFINE_INTERP_FUNC_PLANAR_1D(cosine, 16, 10)
  1402. DEFINE_INTERP_FUNC_PLANAR_1D(cubic, 16, 10)
  1403. DEFINE_INTERP_FUNC_PLANAR_1D(spline, 16, 10)
  1404. DEFINE_INTERP_FUNC_PLANAR_1D(nearest, 16, 12)
  1405. DEFINE_INTERP_FUNC_PLANAR_1D(linear, 16, 12)
  1406. DEFINE_INTERP_FUNC_PLANAR_1D(cosine, 16, 12)
  1407. DEFINE_INTERP_FUNC_PLANAR_1D(cubic, 16, 12)
  1408. DEFINE_INTERP_FUNC_PLANAR_1D(spline, 16, 12)
  1409. DEFINE_INTERP_FUNC_PLANAR_1D(nearest, 16, 14)
  1410. DEFINE_INTERP_FUNC_PLANAR_1D(linear, 16, 14)
  1411. DEFINE_INTERP_FUNC_PLANAR_1D(cosine, 16, 14)
  1412. DEFINE_INTERP_FUNC_PLANAR_1D(cubic, 16, 14)
  1413. DEFINE_INTERP_FUNC_PLANAR_1D(spline, 16, 14)
  1414. DEFINE_INTERP_FUNC_PLANAR_1D(nearest, 16, 16)
  1415. DEFINE_INTERP_FUNC_PLANAR_1D(linear, 16, 16)
  1416. DEFINE_INTERP_FUNC_PLANAR_1D(cosine, 16, 16)
  1417. DEFINE_INTERP_FUNC_PLANAR_1D(cubic, 16, 16)
  1418. DEFINE_INTERP_FUNC_PLANAR_1D(spline, 16, 16)
  1419. #define DEFINE_INTERP_FUNC_PLANAR_1D_FLOAT(name, depth) \
  1420. static int interp_1d_##name##_pf##depth(AVFilterContext *ctx, \
  1421. void *arg, int jobnr, \
  1422. int nb_jobs) \
  1423. { \
  1424. int x, y; \
  1425. const LUT1DContext *lut1d = ctx->priv; \
  1426. const ThreadData *td = arg; \
  1427. const AVFrame *in = td->in; \
  1428. const AVFrame *out = td->out; \
  1429. const int direct = out == in; \
  1430. const int slice_start = (in->height * jobnr ) / nb_jobs; \
  1431. const int slice_end = (in->height * (jobnr+1)) / nb_jobs; \
  1432. uint8_t *grow = out->data[0] + slice_start * out->linesize[0]; \
  1433. uint8_t *brow = out->data[1] + slice_start * out->linesize[1]; \
  1434. uint8_t *rrow = out->data[2] + slice_start * out->linesize[2]; \
  1435. uint8_t *arow = out->data[3] + slice_start * out->linesize[3]; \
  1436. const uint8_t *srcgrow = in->data[0] + slice_start * in->linesize[0]; \
  1437. const uint8_t *srcbrow = in->data[1] + slice_start * in->linesize[1]; \
  1438. const uint8_t *srcrrow = in->data[2] + slice_start * in->linesize[2]; \
  1439. const uint8_t *srcarow = in->data[3] + slice_start * in->linesize[3]; \
  1440. const float lutsize = lut1d->lutsize - 1; \
  1441. const float scale_r = lut1d->scale.r * lutsize; \
  1442. const float scale_g = lut1d->scale.g * lutsize; \
  1443. const float scale_b = lut1d->scale.b * lutsize; \
  1444. \
  1445. for (y = slice_start; y < slice_end; y++) { \
  1446. float *dstg = (float *)grow; \
  1447. float *dstb = (float *)brow; \
  1448. float *dstr = (float *)rrow; \
  1449. float *dsta = (float *)arow; \
  1450. const float *srcg = (const float *)srcgrow; \
  1451. const float *srcb = (const float *)srcbrow; \
  1452. const float *srcr = (const float *)srcrrow; \
  1453. const float *srca = (const float *)srcarow; \
  1454. for (x = 0; x < in->width; x++) { \
  1455. float r = av_clipf(sanitizef(srcr[x]) * scale_r, 0.0f, lutsize); \
  1456. float g = av_clipf(sanitizef(srcg[x]) * scale_g, 0.0f, lutsize); \
  1457. float b = av_clipf(sanitizef(srcb[x]) * scale_b, 0.0f, lutsize); \
  1458. r = interp_1d_##name(lut1d, 0, r); \
  1459. g = interp_1d_##name(lut1d, 1, g); \
  1460. b = interp_1d_##name(lut1d, 2, b); \
  1461. dstr[x] = r; \
  1462. dstg[x] = g; \
  1463. dstb[x] = b; \
  1464. if (!direct && in->linesize[3]) \
  1465. dsta[x] = srca[x]; \
  1466. } \
  1467. grow += out->linesize[0]; \
  1468. brow += out->linesize[1]; \
  1469. rrow += out->linesize[2]; \
  1470. arow += out->linesize[3]; \
  1471. srcgrow += in->linesize[0]; \
  1472. srcbrow += in->linesize[1]; \
  1473. srcrrow += in->linesize[2]; \
  1474. srcarow += in->linesize[3]; \
  1475. } \
  1476. return 0; \
  1477. }
  1478. DEFINE_INTERP_FUNC_PLANAR_1D_FLOAT(nearest, 32)
  1479. DEFINE_INTERP_FUNC_PLANAR_1D_FLOAT(linear, 32)
  1480. DEFINE_INTERP_FUNC_PLANAR_1D_FLOAT(cosine, 32)
  1481. DEFINE_INTERP_FUNC_PLANAR_1D_FLOAT(cubic, 32)
  1482. DEFINE_INTERP_FUNC_PLANAR_1D_FLOAT(spline, 32)
  1483. #define DEFINE_INTERP_FUNC_1D(name, nbits) \
  1484. static int interp_1d_##nbits##_##name(AVFilterContext *ctx, void *arg, \
  1485. int jobnr, int nb_jobs) \
  1486. { \
  1487. int x, y; \
  1488. const LUT1DContext *lut1d = ctx->priv; \
  1489. const ThreadData *td = arg; \
  1490. const AVFrame *in = td->in; \
  1491. const AVFrame *out = td->out; \
  1492. const int direct = out == in; \
  1493. const int step = lut1d->step; \
  1494. const uint8_t r = lut1d->rgba_map[R]; \
  1495. const uint8_t g = lut1d->rgba_map[G]; \
  1496. const uint8_t b = lut1d->rgba_map[B]; \
  1497. const uint8_t a = lut1d->rgba_map[A]; \
  1498. const int slice_start = (in->height * jobnr ) / nb_jobs; \
  1499. const int slice_end = (in->height * (jobnr+1)) / nb_jobs; \
  1500. uint8_t *dstrow = out->data[0] + slice_start * out->linesize[0]; \
  1501. const uint8_t *srcrow = in ->data[0] + slice_start * in ->linesize[0]; \
  1502. const float factor = (1 << nbits) - 1; \
  1503. const float scale_r = (lut1d->scale.r / factor) * (lut1d->lutsize - 1); \
  1504. const float scale_g = (lut1d->scale.g / factor) * (lut1d->lutsize - 1); \
  1505. const float scale_b = (lut1d->scale.b / factor) * (lut1d->lutsize - 1); \
  1506. \
  1507. for (y = slice_start; y < slice_end; y++) { \
  1508. uint##nbits##_t *dst = (uint##nbits##_t *)dstrow; \
  1509. const uint##nbits##_t *src = (const uint##nbits##_t *)srcrow; \
  1510. for (x = 0; x < in->width * step; x += step) { \
  1511. float rr = src[x + r] * scale_r; \
  1512. float gg = src[x + g] * scale_g; \
  1513. float bb = src[x + b] * scale_b; \
  1514. rr = interp_1d_##name(lut1d, 0, rr); \
  1515. gg = interp_1d_##name(lut1d, 1, gg); \
  1516. bb = interp_1d_##name(lut1d, 2, bb); \
  1517. dst[x + r] = av_clip_uint##nbits(rr * factor); \
  1518. dst[x + g] = av_clip_uint##nbits(gg * factor); \
  1519. dst[x + b] = av_clip_uint##nbits(bb * factor); \
  1520. if (!direct && step == 4) \
  1521. dst[x + a] = src[x + a]; \
  1522. } \
  1523. dstrow += out->linesize[0]; \
  1524. srcrow += in ->linesize[0]; \
  1525. } \
  1526. return 0; \
  1527. }
  1528. DEFINE_INTERP_FUNC_1D(nearest, 8)
  1529. DEFINE_INTERP_FUNC_1D(linear, 8)
  1530. DEFINE_INTERP_FUNC_1D(cosine, 8)
  1531. DEFINE_INTERP_FUNC_1D(cubic, 8)
  1532. DEFINE_INTERP_FUNC_1D(spline, 8)
  1533. DEFINE_INTERP_FUNC_1D(nearest, 16)
  1534. DEFINE_INTERP_FUNC_1D(linear, 16)
  1535. DEFINE_INTERP_FUNC_1D(cosine, 16)
  1536. DEFINE_INTERP_FUNC_1D(cubic, 16)
  1537. DEFINE_INTERP_FUNC_1D(spline, 16)
  1538. static int config_input_1d(AVFilterLink *inlink)
  1539. {
  1540. int depth, is16bit, isfloat, planar;
  1541. LUT1DContext *lut1d = inlink->dst->priv;
  1542. const AVPixFmtDescriptor *desc = av_pix_fmt_desc_get(inlink->format);
  1543. depth = desc->comp[0].depth;
  1544. is16bit = desc->comp[0].depth > 8;
  1545. planar = desc->flags & AV_PIX_FMT_FLAG_PLANAR;
  1546. isfloat = desc->flags & AV_PIX_FMT_FLAG_FLOAT;
  1547. ff_fill_rgba_map(lut1d->rgba_map, inlink->format);
  1548. lut1d->step = av_get_padded_bits_per_pixel(desc) >> (3 + is16bit);
  1549. #define SET_FUNC_1D(name) do { \
  1550. if (planar && !isfloat) { \
  1551. switch (depth) { \
  1552. case 8: lut1d->interp = interp_1d_8_##name##_p8; break; \
  1553. case 9: lut1d->interp = interp_1d_16_##name##_p9; break; \
  1554. case 10: lut1d->interp = interp_1d_16_##name##_p10; break; \
  1555. case 12: lut1d->interp = interp_1d_16_##name##_p12; break; \
  1556. case 14: lut1d->interp = interp_1d_16_##name##_p14; break; \
  1557. case 16: lut1d->interp = interp_1d_16_##name##_p16; break; \
  1558. } \
  1559. } else if (isfloat) { lut1d->interp = interp_1d_##name##_pf32; \
  1560. } else if (is16bit) { lut1d->interp = interp_1d_16_##name; \
  1561. } else { lut1d->interp = interp_1d_8_##name; } \
  1562. } while (0)
  1563. switch (lut1d->interpolation) {
  1564. case INTERPOLATE_1D_NEAREST: SET_FUNC_1D(nearest); break;
  1565. case INTERPOLATE_1D_LINEAR: SET_FUNC_1D(linear); break;
  1566. case INTERPOLATE_1D_COSINE: SET_FUNC_1D(cosine); break;
  1567. case INTERPOLATE_1D_CUBIC: SET_FUNC_1D(cubic); break;
  1568. case INTERPOLATE_1D_SPLINE: SET_FUNC_1D(spline); break;
  1569. default:
  1570. av_assert0(0);
  1571. }
  1572. return 0;
  1573. }
  1574. static av_cold int lut1d_init(AVFilterContext *ctx)
  1575. {
  1576. int ret;
  1577. FILE *f;
  1578. const char *ext;
  1579. LUT1DContext *lut1d = ctx->priv;
  1580. lut1d->scale.r = lut1d->scale.g = lut1d->scale.b = 1.f;
  1581. if (!lut1d->file) {
  1582. set_identity_matrix_1d(lut1d, 32);
  1583. return 0;
  1584. }
  1585. f = av_fopen_utf8(lut1d->file, "r");
  1586. if (!f) {
  1587. ret = AVERROR(errno);
  1588. av_log(ctx, AV_LOG_ERROR, "%s: %s\n", lut1d->file, av_err2str(ret));
  1589. return ret;
  1590. }
  1591. ext = strrchr(lut1d->file, '.');
  1592. if (!ext) {
  1593. av_log(ctx, AV_LOG_ERROR, "Unable to guess the format from the extension\n");
  1594. ret = AVERROR_INVALIDDATA;
  1595. goto end;
  1596. }
  1597. ext++;
  1598. if (!av_strcasecmp(ext, "cube") || !av_strcasecmp(ext, "1dlut")) {
  1599. ret = parse_cube_1d(ctx, f);
  1600. } else if (!av_strcasecmp(ext, "csp")) {
  1601. ret = parse_cinespace_1d(ctx, f);
  1602. } else {
  1603. av_log(ctx, AV_LOG_ERROR, "Unrecognized '.%s' file type\n", ext);
  1604. ret = AVERROR(EINVAL);
  1605. }
  1606. if (!ret && !lut1d->lutsize) {
  1607. av_log(ctx, AV_LOG_ERROR, "1D LUT is empty\n");
  1608. ret = AVERROR_INVALIDDATA;
  1609. }
  1610. end:
  1611. fclose(f);
  1612. return ret;
  1613. }
  1614. static AVFrame *apply_1d_lut(AVFilterLink *inlink, AVFrame *in)
  1615. {
  1616. AVFilterContext *ctx = inlink->dst;
  1617. LUT1DContext *lut1d = ctx->priv;
  1618. AVFilterLink *outlink = inlink->dst->outputs[0];
  1619. AVFrame *out;
  1620. ThreadData td;
  1621. if (av_frame_is_writable(in)) {
  1622. out = in;
  1623. } else {
  1624. out = ff_get_video_buffer(outlink, outlink->w, outlink->h);
  1625. if (!out) {
  1626. av_frame_free(&in);
  1627. return NULL;
  1628. }
  1629. av_frame_copy_props(out, in);
  1630. }
  1631. td.in = in;
  1632. td.out = out;
  1633. ctx->internal->execute(ctx, lut1d->interp, &td, NULL, FFMIN(outlink->h, ff_filter_get_nb_threads(ctx)));
  1634. if (out != in)
  1635. av_frame_free(&in);
  1636. return out;
  1637. }
  1638. static int filter_frame_1d(AVFilterLink *inlink, AVFrame *in)
  1639. {
  1640. AVFilterLink *outlink = inlink->dst->outputs[0];
  1641. AVFrame *out = apply_1d_lut(inlink, in);
  1642. if (!out)
  1643. return AVERROR(ENOMEM);
  1644. return ff_filter_frame(outlink, out);
  1645. }
  1646. static const AVFilterPad lut1d_inputs[] = {
  1647. {
  1648. .name = "default",
  1649. .type = AVMEDIA_TYPE_VIDEO,
  1650. .filter_frame = filter_frame_1d,
  1651. .config_props = config_input_1d,
  1652. },
  1653. { NULL }
  1654. };
  1655. static const AVFilterPad lut1d_outputs[] = {
  1656. {
  1657. .name = "default",
  1658. .type = AVMEDIA_TYPE_VIDEO,
  1659. },
  1660. { NULL }
  1661. };
  1662. AVFilter ff_vf_lut1d = {
  1663. .name = "lut1d",
  1664. .description = NULL_IF_CONFIG_SMALL("Adjust colors using a 1D LUT."),
  1665. .priv_size = sizeof(LUT1DContext),
  1666. .init = lut1d_init,
  1667. .query_formats = query_formats,
  1668. .inputs = lut1d_inputs,
  1669. .outputs = lut1d_outputs,
  1670. .priv_class = &lut1d_class,
  1671. .flags = AVFILTER_FLAG_SUPPORT_TIMELINE_GENERIC | AVFILTER_FLAG_SLICE_THREADS,
  1672. };
  1673. #endif