You can not select more than 25 topics Topics must start with a letter or number, can include dashes ('-') and can be up to 35 characters long.

904 lines
30KB

  1. /*
  2. * Apple ProRes encoder
  3. *
  4. * Copyright (c) 2012 Konstantin Shishkov
  5. *
  6. * This file is part of Libav.
  7. *
  8. * Libav is free software; you can redistribute it and/or
  9. * modify it under the terms of the GNU Lesser General Public
  10. * License as published by the Free Software Foundation; either
  11. * version 2.1 of the License, or (at your option) any later version.
  12. *
  13. * Libav is distributed in the hope that it will be useful,
  14. * but WITHOUT ANY WARRANTY; without even the implied warranty of
  15. * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
  16. * Lesser General Public License for more details.
  17. *
  18. * You should have received a copy of the GNU Lesser General Public
  19. * License along with Libav; if not, write to the Free Software
  20. * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
  21. */
  22. #include "libavutil/opt.h"
  23. #include "avcodec.h"
  24. #include "put_bits.h"
  25. #include "bytestream.h"
  26. #include "internal.h"
  27. #include "proresdsp.h"
  28. #include "proresdata.h"
  29. #define CFACTOR_Y422 2
  30. #define CFACTOR_Y444 3
  31. #define MAX_MBS_PER_SLICE 8
  32. #define MAX_PLANES 3 // should be increased to 4 when there's PIX_FMT_YUV444AP10
  33. enum {
  34. PRORES_PROFILE_PROXY = 0,
  35. PRORES_PROFILE_LT,
  36. PRORES_PROFILE_STANDARD,
  37. PRORES_PROFILE_HQ,
  38. };
  39. #define NUM_MB_LIMITS 4
  40. static const int prores_mb_limits[NUM_MB_LIMITS] = {
  41. 1620, // up to 720x576
  42. 2700, // up to 960x720
  43. 6075, // up to 1440x1080
  44. 9216, // up to 2048x1152
  45. };
  46. static const struct prores_profile {
  47. const char *full_name;
  48. uint32_t tag;
  49. int min_quant;
  50. int max_quant;
  51. int br_tab[NUM_MB_LIMITS];
  52. uint8_t quant[64];
  53. } prores_profile_info[4] = {
  54. {
  55. .full_name = "proxy",
  56. .tag = MKTAG('a', 'p', 'c', 'o'),
  57. .min_quant = 4,
  58. .max_quant = 8,
  59. .br_tab = { 300, 242, 220, 194 },
  60. .quant = {
  61. 4, 7, 9, 11, 13, 14, 15, 63,
  62. 7, 7, 11, 12, 14, 15, 63, 63,
  63. 9, 11, 13, 14, 15, 63, 63, 63,
  64. 11, 11, 13, 14, 63, 63, 63, 63,
  65. 11, 13, 14, 63, 63, 63, 63, 63,
  66. 13, 14, 63, 63, 63, 63, 63, 63,
  67. 13, 63, 63, 63, 63, 63, 63, 63,
  68. 63, 63, 63, 63, 63, 63, 63, 63,
  69. },
  70. },
  71. {
  72. .full_name = "LT",
  73. .tag = MKTAG('a', 'p', 'c', 's'),
  74. .min_quant = 1,
  75. .max_quant = 9,
  76. .br_tab = { 720, 560, 490, 440 },
  77. .quant = {
  78. 4, 5, 6, 7, 9, 11, 13, 15,
  79. 5, 5, 7, 8, 11, 13, 15, 17,
  80. 6, 7, 9, 11, 13, 15, 15, 17,
  81. 7, 7, 9, 11, 13, 15, 17, 19,
  82. 7, 9, 11, 13, 14, 16, 19, 23,
  83. 9, 11, 13, 14, 16, 19, 23, 29,
  84. 9, 11, 13, 15, 17, 21, 28, 35,
  85. 11, 13, 16, 17, 21, 28, 35, 41,
  86. },
  87. },
  88. {
  89. .full_name = "standard",
  90. .tag = MKTAG('a', 'p', 'c', 'n'),
  91. .min_quant = 1,
  92. .max_quant = 6,
  93. .br_tab = { 1050, 808, 710, 632 },
  94. .quant = {
  95. 4, 4, 5, 5, 6, 7, 7, 9,
  96. 4, 4, 5, 6, 7, 7, 9, 9,
  97. 5, 5, 6, 7, 7, 9, 9, 10,
  98. 5, 5, 6, 7, 7, 9, 9, 10,
  99. 5, 6, 7, 7, 8, 9, 10, 12,
  100. 6, 7, 7, 8, 9, 10, 12, 15,
  101. 6, 7, 7, 9, 10, 11, 14, 17,
  102. 7, 7, 9, 10, 11, 14, 17, 21,
  103. },
  104. },
  105. {
  106. .full_name = "high quality",
  107. .tag = MKTAG('a', 'p', 'c', 'h'),
  108. .min_quant = 1,
  109. .max_quant = 6,
  110. .br_tab = { 1566, 1216, 1070, 950 },
  111. .quant = {
  112. 4, 4, 4, 4, 4, 4, 4, 4,
  113. 4, 4, 4, 4, 4, 4, 4, 4,
  114. 4, 4, 4, 4, 4, 4, 4, 4,
  115. 4, 4, 4, 4, 4, 4, 4, 5,
  116. 4, 4, 4, 4, 4, 4, 5, 5,
  117. 4, 4, 4, 4, 4, 5, 5, 6,
  118. 4, 4, 4, 4, 5, 5, 6, 7,
  119. 4, 4, 4, 4, 5, 6, 7, 7,
  120. },
  121. }
  122. // for 4444 profile bitrate numbers are { 2350, 1828, 1600, 1425 }
  123. };
  124. #define TRELLIS_WIDTH 16
  125. #define SCORE_LIMIT INT_MAX / 2
  126. struct TrellisNode {
  127. int prev_node;
  128. int quant;
  129. int bits;
  130. int score;
  131. };
  132. #define MAX_STORED_Q 16
  133. typedef struct ProresContext {
  134. AVClass *class;
  135. DECLARE_ALIGNED(16, DCTELEM, blocks)[MAX_PLANES][64 * 4 * MAX_MBS_PER_SLICE];
  136. DECLARE_ALIGNED(16, uint16_t, emu_buf)[16*16];
  137. int16_t quants[MAX_STORED_Q][64];
  138. int16_t custom_q[64];
  139. ProresDSPContext dsp;
  140. ScanTable scantable;
  141. int mb_width, mb_height;
  142. int mbs_per_slice;
  143. int num_chroma_blocks, chroma_factor;
  144. int slices_width;
  145. int num_slices;
  146. int num_planes;
  147. int bits_per_mb;
  148. int frame_size;
  149. int profile;
  150. const struct prores_profile *profile_info;
  151. struct TrellisNode *nodes;
  152. int *slice_q;
  153. } ProresContext;
  154. static void get_slice_data(ProresContext *ctx, const uint16_t *src,
  155. int linesize, int x, int y, int w, int h,
  156. DCTELEM *blocks,
  157. int mbs_per_slice, int blocks_per_mb, int is_chroma)
  158. {
  159. const uint16_t *esrc;
  160. const int mb_width = 4 * blocks_per_mb;
  161. int elinesize;
  162. int i, j, k;
  163. for (i = 0; i < mbs_per_slice; i++, src += mb_width) {
  164. if (x >= w) {
  165. memset(blocks, 0, 64 * (mbs_per_slice - i) * blocks_per_mb
  166. * sizeof(*blocks));
  167. return;
  168. }
  169. if (x + mb_width <= w && y + 16 <= h) {
  170. esrc = src;
  171. elinesize = linesize;
  172. } else {
  173. int bw, bh, pix;
  174. esrc = ctx->emu_buf;
  175. elinesize = 16 * sizeof(*ctx->emu_buf);
  176. bw = FFMIN(w - x, mb_width);
  177. bh = FFMIN(h - y, 16);
  178. for (j = 0; j < bh; j++) {
  179. memcpy(ctx->emu_buf + j * 16,
  180. (const uint8_t*)src + j * linesize,
  181. bw * sizeof(*src));
  182. pix = ctx->emu_buf[j * 16 + bw - 1];
  183. for (k = bw; k < mb_width; k++)
  184. ctx->emu_buf[j * 16 + k] = pix;
  185. }
  186. for (; j < 16; j++)
  187. memcpy(ctx->emu_buf + j * 16,
  188. ctx->emu_buf + (bh - 1) * 16,
  189. mb_width * sizeof(*ctx->emu_buf));
  190. }
  191. if (!is_chroma) {
  192. ctx->dsp.fdct(esrc, elinesize, blocks);
  193. blocks += 64;
  194. if (blocks_per_mb > 2) {
  195. ctx->dsp.fdct(src + 8, linesize, blocks);
  196. blocks += 64;
  197. }
  198. ctx->dsp.fdct(src + linesize * 4, linesize, blocks);
  199. blocks += 64;
  200. if (blocks_per_mb > 2) {
  201. ctx->dsp.fdct(src + linesize * 4 + 8, linesize, blocks);
  202. blocks += 64;
  203. }
  204. } else {
  205. ctx->dsp.fdct(esrc, elinesize, blocks);
  206. blocks += 64;
  207. ctx->dsp.fdct(src + linesize * 4, linesize, blocks);
  208. blocks += 64;
  209. if (blocks_per_mb > 2) {
  210. ctx->dsp.fdct(src + 8, linesize, blocks);
  211. blocks += 64;
  212. ctx->dsp.fdct(src + linesize * 4 + 8, linesize, blocks);
  213. blocks += 64;
  214. }
  215. }
  216. x += mb_width;
  217. }
  218. }
  219. /**
  220. * Write an unsigned rice/exp golomb codeword.
  221. */
  222. static inline void encode_vlc_codeword(PutBitContext *pb, unsigned codebook, int val)
  223. {
  224. unsigned int rice_order, exp_order, switch_bits, switch_val;
  225. int exponent;
  226. /* number of prefix bits to switch between Rice and expGolomb */
  227. switch_bits = (codebook & 3) + 1;
  228. rice_order = codebook >> 5; /* rice code order */
  229. exp_order = (codebook >> 2) & 7; /* exp golomb code order */
  230. switch_val = switch_bits << rice_order;
  231. if (val >= switch_val) {
  232. val -= switch_val - (1 << exp_order);
  233. exponent = av_log2(val);
  234. put_bits(pb, exponent - exp_order + switch_bits, 0);
  235. put_bits(pb, 1, 1);
  236. put_bits(pb, exponent, val);
  237. } else {
  238. exponent = val >> rice_order;
  239. if (exponent)
  240. put_bits(pb, exponent, 0);
  241. put_bits(pb, 1, 1);
  242. if (rice_order)
  243. put_sbits(pb, rice_order, val);
  244. }
  245. }
  246. #define GET_SIGN(x) ((x) >> 31)
  247. #define MAKE_CODE(x) (((x) << 1) ^ GET_SIGN(x))
  248. static void encode_dcs(PutBitContext *pb, DCTELEM *blocks,
  249. int blocks_per_slice, int scale)
  250. {
  251. int i;
  252. int codebook = 3, code, dc, prev_dc, delta, sign, new_sign;
  253. prev_dc = (blocks[0] - 0x4000) / scale;
  254. encode_vlc_codeword(pb, FIRST_DC_CB, MAKE_CODE(prev_dc));
  255. sign = 0;
  256. codebook = 3;
  257. blocks += 64;
  258. for (i = 1; i < blocks_per_slice; i++, blocks += 64) {
  259. dc = (blocks[0] - 0x4000) / scale;
  260. delta = dc - prev_dc;
  261. new_sign = GET_SIGN(delta);
  262. delta = (delta ^ sign) - sign;
  263. code = MAKE_CODE(delta);
  264. encode_vlc_codeword(pb, ff_prores_dc_codebook[codebook], code);
  265. codebook = (code + (code & 1)) >> 1;
  266. codebook = FFMIN(codebook, 3);
  267. sign = new_sign;
  268. prev_dc = dc;
  269. }
  270. }
  271. static void encode_acs(PutBitContext *pb, DCTELEM *blocks,
  272. int blocks_per_slice,
  273. int plane_size_factor,
  274. const uint8_t *scan, const int16_t *qmat)
  275. {
  276. int idx, i;
  277. int run, level, run_cb, lev_cb;
  278. int max_coeffs, abs_level;
  279. max_coeffs = blocks_per_slice << 6;
  280. run_cb = ff_prores_run_to_cb_index[4];
  281. lev_cb = ff_prores_lev_to_cb_index[2];
  282. run = 0;
  283. for (i = 1; i < 64; i++) {
  284. for (idx = scan[i]; idx < max_coeffs; idx += 64) {
  285. level = blocks[idx] / qmat[scan[i]];
  286. if (level) {
  287. abs_level = FFABS(level);
  288. encode_vlc_codeword(pb, ff_prores_ac_codebook[run_cb], run);
  289. encode_vlc_codeword(pb, ff_prores_ac_codebook[lev_cb],
  290. abs_level - 1);
  291. put_sbits(pb, 1, GET_SIGN(level));
  292. run_cb = ff_prores_run_to_cb_index[FFMIN(run, 15)];
  293. lev_cb = ff_prores_lev_to_cb_index[FFMIN(abs_level, 9)];
  294. run = 0;
  295. } else {
  296. run++;
  297. }
  298. }
  299. }
  300. }
  301. static int encode_slice_plane(ProresContext *ctx, PutBitContext *pb,
  302. const uint16_t *src, int linesize,
  303. int mbs_per_slice, DCTELEM *blocks,
  304. int blocks_per_mb, int plane_size_factor,
  305. const int16_t *qmat)
  306. {
  307. int blocks_per_slice, saved_pos;
  308. saved_pos = put_bits_count(pb);
  309. blocks_per_slice = mbs_per_slice * blocks_per_mb;
  310. encode_dcs(pb, blocks, blocks_per_slice, qmat[0]);
  311. encode_acs(pb, blocks, blocks_per_slice, plane_size_factor,
  312. ctx->scantable.permutated, qmat);
  313. flush_put_bits(pb);
  314. return (put_bits_count(pb) - saved_pos) >> 3;
  315. }
  316. static int encode_slice(AVCodecContext *avctx, const AVFrame *pic,
  317. PutBitContext *pb,
  318. int sizes[4], int x, int y, int quant,
  319. int mbs_per_slice)
  320. {
  321. ProresContext *ctx = avctx->priv_data;
  322. int i, xp, yp;
  323. int total_size = 0;
  324. const uint16_t *src;
  325. int slice_width_factor = av_log2(mbs_per_slice);
  326. int num_cblocks, pwidth;
  327. int plane_factor, is_chroma;
  328. uint16_t *qmat;
  329. if (quant < MAX_STORED_Q) {
  330. qmat = ctx->quants[quant];
  331. } else {
  332. qmat = ctx->custom_q;
  333. for (i = 0; i < 64; i++)
  334. qmat[i] = ctx->profile_info->quant[i] * quant;
  335. }
  336. for (i = 0; i < ctx->num_planes; i++) {
  337. is_chroma = (i == 1 || i == 2);
  338. plane_factor = slice_width_factor + 2;
  339. if (is_chroma)
  340. plane_factor += ctx->chroma_factor - 3;
  341. if (!is_chroma || ctx->chroma_factor == CFACTOR_Y444) {
  342. xp = x << 4;
  343. yp = y << 4;
  344. num_cblocks = 4;
  345. pwidth = avctx->width;
  346. } else {
  347. xp = x << 3;
  348. yp = y << 4;
  349. num_cblocks = 2;
  350. pwidth = avctx->width >> 1;
  351. }
  352. src = (const uint16_t*)(pic->data[i] + yp * pic->linesize[i]) + xp;
  353. get_slice_data(ctx, src, pic->linesize[i], xp, yp,
  354. pwidth, avctx->height, ctx->blocks[0],
  355. mbs_per_slice, num_cblocks, is_chroma);
  356. sizes[i] = encode_slice_plane(ctx, pb, src, pic->linesize[i],
  357. mbs_per_slice, ctx->blocks[0],
  358. num_cblocks, plane_factor,
  359. qmat);
  360. total_size += sizes[i];
  361. }
  362. return total_size;
  363. }
  364. static inline int estimate_vlc(unsigned codebook, int val)
  365. {
  366. unsigned int rice_order, exp_order, switch_bits, switch_val;
  367. int exponent;
  368. /* number of prefix bits to switch between Rice and expGolomb */
  369. switch_bits = (codebook & 3) + 1;
  370. rice_order = codebook >> 5; /* rice code order */
  371. exp_order = (codebook >> 2) & 7; /* exp golomb code order */
  372. switch_val = switch_bits << rice_order;
  373. if (val >= switch_val) {
  374. val -= switch_val - (1 << exp_order);
  375. exponent = av_log2(val);
  376. return exponent * 2 - exp_order + switch_bits + 1;
  377. } else {
  378. return (val >> rice_order) + rice_order + 1;
  379. }
  380. }
  381. static int estimate_dcs(int *error, DCTELEM *blocks, int blocks_per_slice,
  382. int scale)
  383. {
  384. int i;
  385. int codebook = 3, code, dc, prev_dc, delta, sign, new_sign;
  386. int bits;
  387. prev_dc = (blocks[0] - 0x4000) / scale;
  388. bits = estimate_vlc(FIRST_DC_CB, MAKE_CODE(prev_dc));
  389. sign = 0;
  390. codebook = 3;
  391. blocks += 64;
  392. *error += FFABS(blocks[0] - 0x4000) % scale;
  393. for (i = 1; i < blocks_per_slice; i++, blocks += 64) {
  394. dc = (blocks[0] - 0x4000) / scale;
  395. *error += FFABS(blocks[0] - 0x4000) % scale;
  396. delta = dc - prev_dc;
  397. new_sign = GET_SIGN(delta);
  398. delta = (delta ^ sign) - sign;
  399. code = MAKE_CODE(delta);
  400. bits += estimate_vlc(ff_prores_dc_codebook[codebook], code);
  401. codebook = (code + (code & 1)) >> 1;
  402. codebook = FFMIN(codebook, 3);
  403. sign = new_sign;
  404. prev_dc = dc;
  405. }
  406. return bits;
  407. }
  408. static int estimate_acs(int *error, DCTELEM *blocks, int blocks_per_slice,
  409. int plane_size_factor,
  410. const uint8_t *scan, const int16_t *qmat)
  411. {
  412. int idx, i;
  413. int run, level, run_cb, lev_cb;
  414. int max_coeffs, abs_level;
  415. int bits = 0;
  416. max_coeffs = blocks_per_slice << 6;
  417. run_cb = ff_prores_run_to_cb_index[4];
  418. lev_cb = ff_prores_lev_to_cb_index[2];
  419. run = 0;
  420. for (i = 1; i < 64; i++) {
  421. for (idx = scan[i]; idx < max_coeffs; idx += 64) {
  422. level = blocks[idx] / qmat[scan[i]];
  423. *error += FFABS(blocks[idx]) % qmat[scan[i]];
  424. if (level) {
  425. abs_level = FFABS(level);
  426. bits += estimate_vlc(ff_prores_ac_codebook[run_cb], run);
  427. bits += estimate_vlc(ff_prores_ac_codebook[lev_cb],
  428. abs_level - 1) + 1;
  429. run_cb = ff_prores_run_to_cb_index[FFMIN(run, 15)];
  430. lev_cb = ff_prores_lev_to_cb_index[FFMIN(abs_level, 9)];
  431. run = 0;
  432. } else {
  433. run++;
  434. }
  435. }
  436. }
  437. return bits;
  438. }
  439. static int estimate_slice_plane(ProresContext *ctx, int *error, int plane,
  440. const uint16_t *src, int linesize,
  441. int mbs_per_slice,
  442. int blocks_per_mb, int plane_size_factor,
  443. const int16_t *qmat)
  444. {
  445. int blocks_per_slice;
  446. int bits;
  447. blocks_per_slice = mbs_per_slice * blocks_per_mb;
  448. bits = estimate_dcs(error, ctx->blocks[plane], blocks_per_slice, qmat[0]);
  449. bits += estimate_acs(error, ctx->blocks[plane], blocks_per_slice,
  450. plane_size_factor, ctx->scantable.permutated, qmat);
  451. return FFALIGN(bits, 8);
  452. }
  453. static int find_slice_quant(AVCodecContext *avctx, const AVFrame *pic,
  454. int trellis_node, int x, int y, int mbs_per_slice)
  455. {
  456. ProresContext *ctx = avctx->priv_data;
  457. int i, q, pq, xp, yp;
  458. const uint16_t *src;
  459. int slice_width_factor = av_log2(mbs_per_slice);
  460. int num_cblocks[MAX_PLANES], pwidth;
  461. int plane_factor[MAX_PLANES], is_chroma[MAX_PLANES];
  462. const int min_quant = ctx->profile_info->min_quant;
  463. const int max_quant = ctx->profile_info->max_quant;
  464. int error, bits, bits_limit;
  465. int mbs, prev, cur, new_score;
  466. int slice_bits[TRELLIS_WIDTH], slice_score[TRELLIS_WIDTH];
  467. int overquant;
  468. uint16_t *qmat;
  469. mbs = x + mbs_per_slice;
  470. for (i = 0; i < ctx->num_planes; i++) {
  471. is_chroma[i] = (i == 1 || i == 2);
  472. plane_factor[i] = slice_width_factor + 2;
  473. if (is_chroma[i])
  474. plane_factor[i] += ctx->chroma_factor - 3;
  475. if (!is_chroma[i] || ctx->chroma_factor == CFACTOR_Y444) {
  476. xp = x << 4;
  477. yp = y << 4;
  478. num_cblocks[i] = 4;
  479. pwidth = avctx->width;
  480. } else {
  481. xp = x << 3;
  482. yp = y << 4;
  483. num_cblocks[i] = 2;
  484. pwidth = avctx->width >> 1;
  485. }
  486. src = (const uint16_t*)(pic->data[i] + yp * pic->linesize[i]) + xp;
  487. get_slice_data(ctx, src, pic->linesize[i], xp, yp,
  488. pwidth, avctx->height, ctx->blocks[i],
  489. mbs_per_slice, num_cblocks[i], is_chroma[i]);
  490. }
  491. for (q = min_quant; q < max_quant + 2; q++) {
  492. ctx->nodes[trellis_node + q].prev_node = -1;
  493. ctx->nodes[trellis_node + q].quant = q;
  494. }
  495. // todo: maybe perform coarser quantising to fit into frame size when needed
  496. for (q = min_quant; q <= max_quant; q++) {
  497. bits = 0;
  498. error = 0;
  499. for (i = 0; i < ctx->num_planes; i++) {
  500. bits += estimate_slice_plane(ctx, &error, i,
  501. src, pic->linesize[i],
  502. mbs_per_slice,
  503. num_cblocks[i], plane_factor[i],
  504. ctx->quants[q]);
  505. }
  506. if (bits > 65000 * 8) {
  507. error = SCORE_LIMIT;
  508. break;
  509. }
  510. slice_bits[q] = bits;
  511. slice_score[q] = error;
  512. }
  513. if (slice_bits[max_quant] <= ctx->bits_per_mb * mbs_per_slice) {
  514. slice_bits[max_quant + 1] = slice_bits[max_quant];
  515. slice_score[max_quant + 1] = slice_score[max_quant] + 1;
  516. overquant = max_quant;
  517. } else {
  518. for (q = max_quant + 1; q < 128; q++) {
  519. bits = 0;
  520. error = 0;
  521. if (q < MAX_STORED_Q) {
  522. qmat = ctx->quants[q];
  523. } else {
  524. qmat = ctx->custom_q;
  525. for (i = 0; i < 64; i++)
  526. qmat[i] = ctx->profile_info->quant[i] * q;
  527. }
  528. for (i = 0; i < ctx->num_planes; i++) {
  529. bits += estimate_slice_plane(ctx, &error, i,
  530. src, pic->linesize[i],
  531. mbs_per_slice,
  532. num_cblocks[i], plane_factor[i],
  533. qmat);
  534. }
  535. if (bits <= ctx->bits_per_mb * mbs_per_slice)
  536. break;
  537. }
  538. slice_bits[max_quant + 1] = bits;
  539. slice_score[max_quant + 1] = error;
  540. overquant = q;
  541. }
  542. ctx->nodes[trellis_node + max_quant + 1].quant = overquant;
  543. bits_limit = mbs * ctx->bits_per_mb;
  544. for (pq = min_quant; pq < max_quant + 2; pq++) {
  545. prev = trellis_node - TRELLIS_WIDTH + pq;
  546. for (q = min_quant; q < max_quant + 2; q++) {
  547. cur = trellis_node + q;
  548. bits = ctx->nodes[prev].bits + slice_bits[q];
  549. error = slice_score[q];
  550. if (bits > bits_limit)
  551. error = SCORE_LIMIT;
  552. if (ctx->nodes[prev].score < SCORE_LIMIT && error < SCORE_LIMIT)
  553. new_score = ctx->nodes[prev].score + error;
  554. else
  555. new_score = SCORE_LIMIT;
  556. if (ctx->nodes[cur].prev_node == -1 ||
  557. ctx->nodes[cur].score >= new_score) {
  558. ctx->nodes[cur].bits = bits;
  559. ctx->nodes[cur].score = new_score;
  560. ctx->nodes[cur].prev_node = prev;
  561. }
  562. }
  563. }
  564. error = ctx->nodes[trellis_node + min_quant].score;
  565. pq = trellis_node + min_quant;
  566. for (q = min_quant + 1; q < max_quant + 2; q++) {
  567. if (ctx->nodes[trellis_node + q].score <= error) {
  568. error = ctx->nodes[trellis_node + q].score;
  569. pq = trellis_node + q;
  570. }
  571. }
  572. return pq;
  573. }
  574. static int encode_frame(AVCodecContext *avctx, AVPacket *pkt,
  575. const AVFrame *pic, int *got_packet)
  576. {
  577. ProresContext *ctx = avctx->priv_data;
  578. uint8_t *orig_buf, *buf, *slice_hdr, *slice_sizes, *tmp;
  579. uint8_t *picture_size_pos;
  580. PutBitContext pb;
  581. int x, y, i, mb, q = 0;
  582. int sizes[4] = { 0 };
  583. int slice_hdr_size = 2 + 2 * (ctx->num_planes - 1);
  584. int frame_size, picture_size, slice_size;
  585. int mbs_per_slice = ctx->mbs_per_slice;
  586. int pkt_size, ret;
  587. *avctx->coded_frame = *pic;
  588. avctx->coded_frame->pict_type = AV_PICTURE_TYPE_I;
  589. avctx->coded_frame->key_frame = 1;
  590. pkt_size = ctx->frame_size + FF_MIN_BUFFER_SIZE;
  591. if ((ret = ff_alloc_packet(pkt, pkt_size)) < 0) {
  592. av_log(avctx, AV_LOG_ERROR, "Error getting output packet.\n");
  593. return ret;
  594. }
  595. orig_buf = pkt->data;
  596. // frame atom
  597. orig_buf += 4; // frame size
  598. bytestream_put_be32 (&orig_buf, FRAME_ID); // frame container ID
  599. buf = orig_buf;
  600. // frame header
  601. tmp = buf;
  602. buf += 2; // frame header size will be stored here
  603. bytestream_put_be16 (&buf, 0); // version 1
  604. bytestream_put_buffer(&buf, "Lavc", 4); // creator
  605. bytestream_put_be16 (&buf, avctx->width);
  606. bytestream_put_be16 (&buf, avctx->height);
  607. bytestream_put_byte (&buf, ctx->chroma_factor << 6); // frame flags
  608. bytestream_put_byte (&buf, 0); // reserved
  609. bytestream_put_byte (&buf, avctx->color_primaries);
  610. bytestream_put_byte (&buf, avctx->color_trc);
  611. bytestream_put_byte (&buf, avctx->colorspace);
  612. bytestream_put_byte (&buf, 0x40); // source format and alpha information
  613. bytestream_put_byte (&buf, 0); // reserved
  614. bytestream_put_byte (&buf, 0x03); // matrix flags - both matrices are present
  615. // luma quantisation matrix
  616. for (i = 0; i < 64; i++)
  617. bytestream_put_byte(&buf, ctx->profile_info->quant[i]);
  618. // chroma quantisation matrix
  619. for (i = 0; i < 64; i++)
  620. bytestream_put_byte(&buf, ctx->profile_info->quant[i]);
  621. bytestream_put_be16 (&tmp, buf - orig_buf); // write back frame header size
  622. // picture header
  623. picture_size_pos = buf + 1;
  624. bytestream_put_byte (&buf, 0x40); // picture header size (in bits)
  625. buf += 4; // picture data size will be stored here
  626. bytestream_put_be16 (&buf, ctx->num_slices); // total number of slices
  627. bytestream_put_byte (&buf, av_log2(ctx->mbs_per_slice) << 4); // slice width and height in MBs
  628. // seek table - will be filled during slice encoding
  629. slice_sizes = buf;
  630. buf += ctx->num_slices * 2;
  631. // slices
  632. for (y = 0; y < ctx->mb_height; y++) {
  633. mbs_per_slice = ctx->mbs_per_slice;
  634. for (x = mb = 0; x < ctx->mb_width; x += mbs_per_slice, mb++) {
  635. while (ctx->mb_width - x < mbs_per_slice)
  636. mbs_per_slice >>= 1;
  637. q = find_slice_quant(avctx, pic, (mb + 1) * TRELLIS_WIDTH, x, y,
  638. mbs_per_slice);
  639. }
  640. for (x = ctx->slices_width - 1; x >= 0; x--) {
  641. ctx->slice_q[x] = ctx->nodes[q].quant;
  642. q = ctx->nodes[q].prev_node;
  643. }
  644. mbs_per_slice = ctx->mbs_per_slice;
  645. for (x = mb = 0; x < ctx->mb_width; x += mbs_per_slice, mb++) {
  646. q = ctx->slice_q[mb];
  647. while (ctx->mb_width - x < mbs_per_slice)
  648. mbs_per_slice >>= 1;
  649. bytestream_put_byte(&buf, slice_hdr_size << 3);
  650. slice_hdr = buf;
  651. buf += slice_hdr_size - 1;
  652. init_put_bits(&pb, buf, (pkt_size - (buf - orig_buf)) * 8);
  653. encode_slice(avctx, pic, &pb, sizes, x, y, q, mbs_per_slice);
  654. bytestream_put_byte(&slice_hdr, q);
  655. slice_size = slice_hdr_size + sizes[ctx->num_planes - 1];
  656. for (i = 0; i < ctx->num_planes - 1; i++) {
  657. bytestream_put_be16(&slice_hdr, sizes[i]);
  658. slice_size += sizes[i];
  659. }
  660. bytestream_put_be16(&slice_sizes, slice_size);
  661. buf += slice_size - slice_hdr_size;
  662. }
  663. }
  664. orig_buf -= 8;
  665. frame_size = buf - orig_buf;
  666. picture_size = buf - picture_size_pos - 6;
  667. bytestream_put_be32(&orig_buf, frame_size);
  668. bytestream_put_be32(&picture_size_pos, picture_size);
  669. pkt->size = frame_size;
  670. pkt->flags |= AV_PKT_FLAG_KEY;
  671. *got_packet = 1;
  672. return 0;
  673. }
  674. static av_cold int encode_close(AVCodecContext *avctx)
  675. {
  676. ProresContext *ctx = avctx->priv_data;
  677. if (avctx->coded_frame->data[0])
  678. avctx->release_buffer(avctx, avctx->coded_frame);
  679. av_freep(&avctx->coded_frame);
  680. av_freep(&ctx->nodes);
  681. av_freep(&ctx->slice_q);
  682. return 0;
  683. }
  684. static av_cold int encode_init(AVCodecContext *avctx)
  685. {
  686. ProresContext *ctx = avctx->priv_data;
  687. int mps;
  688. int i, j;
  689. int min_quant, max_quant;
  690. avctx->bits_per_raw_sample = 10;
  691. avctx->coded_frame = avcodec_alloc_frame();
  692. if (!avctx->coded_frame)
  693. return AVERROR(ENOMEM);
  694. ff_proresdsp_init(&ctx->dsp);
  695. ff_init_scantable(ctx->dsp.dct_permutation, &ctx->scantable,
  696. ff_prores_progressive_scan);
  697. mps = ctx->mbs_per_slice;
  698. if (mps & (mps - 1)) {
  699. av_log(avctx, AV_LOG_ERROR,
  700. "there should be an integer power of two MBs per slice\n");
  701. return AVERROR(EINVAL);
  702. }
  703. ctx->chroma_factor = avctx->pix_fmt == PIX_FMT_YUV422P10
  704. ? CFACTOR_Y422
  705. : CFACTOR_Y444;
  706. ctx->profile_info = prores_profile_info + ctx->profile;
  707. ctx->num_planes = 3;
  708. ctx->mb_width = FFALIGN(avctx->width, 16) >> 4;
  709. ctx->mb_height = FFALIGN(avctx->height, 16) >> 4;
  710. ctx->slices_width = ctx->mb_width / mps;
  711. ctx->slices_width += av_popcount(ctx->mb_width - ctx->slices_width * mps);
  712. ctx->num_slices = ctx->mb_height * ctx->slices_width;
  713. for (i = 0; i < NUM_MB_LIMITS - 1; i++)
  714. if (prores_mb_limits[i] >= ctx->mb_width * ctx->mb_height)
  715. break;
  716. ctx->bits_per_mb = ctx->profile_info->br_tab[i];
  717. ctx->frame_size = ctx->num_slices * (2 + 2 * ctx->num_planes
  718. + (2 * mps * ctx->bits_per_mb) / 8)
  719. + 200;
  720. min_quant = ctx->profile_info->min_quant;
  721. max_quant = ctx->profile_info->max_quant;
  722. for (i = min_quant; i < MAX_STORED_Q; i++) {
  723. for (j = 0; j < 64; j++)
  724. ctx->quants[i][j] = ctx->profile_info->quant[j] * i;
  725. }
  726. avctx->codec_tag = ctx->profile_info->tag;
  727. av_log(avctx, AV_LOG_DEBUG, "profile %d, %d slices, %d bits per MB\n",
  728. ctx->profile, ctx->num_slices, ctx->bits_per_mb);
  729. av_log(avctx, AV_LOG_DEBUG, "estimated frame size %d\n",
  730. ctx->frame_size);
  731. ctx->nodes = av_malloc((ctx->slices_width + 1) * TRELLIS_WIDTH
  732. * sizeof(*ctx->nodes));
  733. if (!ctx->nodes) {
  734. encode_close(avctx);
  735. return AVERROR(ENOMEM);
  736. }
  737. for (i = min_quant; i < max_quant + 2; i++) {
  738. ctx->nodes[i].prev_node = -1;
  739. ctx->nodes[i].bits = 0;
  740. ctx->nodes[i].score = 0;
  741. }
  742. ctx->slice_q = av_malloc(ctx->slices_width * sizeof(*ctx->slice_q));
  743. if (!ctx->slice_q) {
  744. encode_close(avctx);
  745. return AVERROR(ENOMEM);
  746. }
  747. return 0;
  748. }
  749. #define OFFSET(x) offsetof(ProresContext, x)
  750. #define VE AV_OPT_FLAG_VIDEO_PARAM | AV_OPT_FLAG_ENCODING_PARAM
  751. static const AVOption options[] = {
  752. { "mbs_per_slice", "macroblocks per slice", OFFSET(mbs_per_slice),
  753. AV_OPT_TYPE_INT, { 8 }, 1, MAX_MBS_PER_SLICE, VE },
  754. { "profile", NULL, OFFSET(profile), AV_OPT_TYPE_INT,
  755. { PRORES_PROFILE_STANDARD },
  756. PRORES_PROFILE_PROXY, PRORES_PROFILE_HQ, VE, "profile" },
  757. { "proxy", NULL, 0, AV_OPT_TYPE_CONST, { PRORES_PROFILE_PROXY },
  758. 0, 0, VE, "profile" },
  759. { "lt", NULL, 0, AV_OPT_TYPE_CONST, { PRORES_PROFILE_LT },
  760. 0, 0, VE, "profile" },
  761. { "standard", NULL, 0, AV_OPT_TYPE_CONST, { PRORES_PROFILE_STANDARD },
  762. 0, 0, VE, "profile" },
  763. { "hq", NULL, 0, AV_OPT_TYPE_CONST, { PRORES_PROFILE_HQ },
  764. 0, 0, VE, "profile" },
  765. { NULL }
  766. };
  767. static const AVClass proresenc_class = {
  768. .class_name = "ProRes encoder",
  769. .item_name = av_default_item_name,
  770. .option = options,
  771. .version = LIBAVUTIL_VERSION_INT,
  772. };
  773. AVCodec ff_prores_encoder = {
  774. .name = "prores",
  775. .type = AVMEDIA_TYPE_VIDEO,
  776. .id = CODEC_ID_PRORES,
  777. .priv_data_size = sizeof(ProresContext),
  778. .init = encode_init,
  779. .close = encode_close,
  780. .encode2 = encode_frame,
  781. .long_name = NULL_IF_CONFIG_SMALL("Apple ProRes (iCodec Pro)"),
  782. .pix_fmts = (const enum PixelFormat[]) {
  783. PIX_FMT_YUV422P10, PIX_FMT_YUV444P10, PIX_FMT_NONE
  784. },
  785. .priv_class = &proresenc_class,
  786. };