You can not select more than 25 topics Topics must start with a letter or number, can include dashes ('-') and can be up to 35 characters long.

837 lines
27KB

  1. /*
  2. * Apple ProRes encoder
  3. *
  4. * Copyright (c) 2012 Konstantin Shishkov
  5. *
  6. * This file is part of Libav.
  7. *
  8. * Libav is free software; you can redistribute it and/or
  9. * modify it under the terms of the GNU Lesser General Public
  10. * License as published by the Free Software Foundation; either
  11. * version 2.1 of the License, or (at your option) any later version.
  12. *
  13. * Libav is distributed in the hope that it will be useful,
  14. * but WITHOUT ANY WARRANTY; without even the implied warranty of
  15. * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
  16. * Lesser General Public License for more details.
  17. *
  18. * You should have received a copy of the GNU Lesser General Public
  19. * License along with Libav; if not, write to the Free Software
  20. * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
  21. */
  22. #include "libavutil/opt.h"
  23. #include "avcodec.h"
  24. #include "put_bits.h"
  25. #include "bytestream.h"
  26. #include "internal.h"
  27. #include "proresdsp.h"
  28. #include "proresdata.h"
  29. #define CFACTOR_Y422 2
  30. #define CFACTOR_Y444 3
  31. #define MAX_MBS_PER_SLICE 8
  32. #define MAX_PLANES 3 // should be increased to 4 when there's PIX_FMT_YUV444AP10
  33. enum {
  34. PRORES_PROFILE_PROXY = 0,
  35. PRORES_PROFILE_LT,
  36. PRORES_PROFILE_STANDARD,
  37. PRORES_PROFILE_HQ,
  38. };
  39. #define NUM_MB_LIMITS 4
  40. static const int prores_mb_limits[NUM_MB_LIMITS] = {
  41. 1620, // up to 720x576
  42. 2700, // up to 960x720
  43. 6075, // up to 1440x1080
  44. 9216, // up to 2048x1152
  45. };
  46. static const struct prores_profile {
  47. const char *full_name;
  48. uint32_t tag;
  49. int min_quant;
  50. int max_quant;
  51. int br_tab[NUM_MB_LIMITS];
  52. uint8_t quant[64];
  53. } prores_profile_info[4] = {
  54. {
  55. .full_name = "proxy",
  56. .tag = MKTAG('a', 'p', 'c', 'o'),
  57. .min_quant = 4,
  58. .max_quant = 8,
  59. .br_tab = { 300, 242, 220, 194 },
  60. .quant = {
  61. 4, 7, 9, 11, 13, 14, 15, 63,
  62. 7, 7, 11, 12, 14, 15, 63, 63,
  63. 9, 11, 13, 14, 15, 63, 63, 63,
  64. 11, 11, 13, 14, 63, 63, 63, 63,
  65. 11, 13, 14, 63, 63, 63, 63, 63,
  66. 13, 14, 63, 63, 63, 63, 63, 63,
  67. 13, 63, 63, 63, 63, 63, 63, 63,
  68. 63, 63, 63, 63, 63, 63, 63, 63,
  69. },
  70. },
  71. {
  72. .full_name = "LT",
  73. .tag = MKTAG('a', 'p', 'c', 's'),
  74. .min_quant = 1,
  75. .max_quant = 9,
  76. .br_tab = { 720, 560, 490, 440 },
  77. .quant = {
  78. 4, 5, 6, 7, 9, 11, 13, 15,
  79. 5, 5, 7, 8, 11, 13, 15, 17,
  80. 6, 7, 9, 11, 13, 15, 15, 17,
  81. 7, 7, 9, 11, 13, 15, 17, 19,
  82. 7, 9, 11, 13, 14, 16, 19, 23,
  83. 9, 11, 13, 14, 16, 19, 23, 29,
  84. 9, 11, 13, 15, 17, 21, 28, 35,
  85. 11, 13, 16, 17, 21, 28, 35, 41,
  86. },
  87. },
  88. {
  89. .full_name = "standard",
  90. .tag = MKTAG('a', 'p', 'c', 'n'),
  91. .min_quant = 1,
  92. .max_quant = 6,
  93. .br_tab = { 1050, 808, 710, 632 },
  94. .quant = {
  95. 4, 4, 5, 5, 6, 7, 7, 9,
  96. 4, 4, 5, 6, 7, 7, 9, 9,
  97. 5, 5, 6, 7, 7, 9, 9, 10,
  98. 5, 5, 6, 7, 7, 9, 9, 10,
  99. 5, 6, 7, 7, 8, 9, 10, 12,
  100. 6, 7, 7, 8, 9, 10, 12, 15,
  101. 6, 7, 7, 9, 10, 11, 14, 17,
  102. 7, 7, 9, 10, 11, 14, 17, 21,
  103. },
  104. },
  105. {
  106. .full_name = "high quality",
  107. .tag = MKTAG('a', 'p', 'c', 'h'),
  108. .min_quant = 1,
  109. .max_quant = 6,
  110. .br_tab = { 1566, 1216, 1070, 950 },
  111. .quant = {
  112. 4, 4, 4, 4, 4, 4, 4, 4,
  113. 4, 4, 4, 4, 4, 4, 4, 4,
  114. 4, 4, 4, 4, 4, 4, 4, 4,
  115. 4, 4, 4, 4, 4, 4, 4, 5,
  116. 4, 4, 4, 4, 4, 4, 5, 5,
  117. 4, 4, 4, 4, 4, 5, 5, 6,
  118. 4, 4, 4, 4, 5, 5, 6, 7,
  119. 4, 4, 4, 4, 5, 6, 7, 7,
  120. },
  121. }
  122. // for 4444 profile bitrate numbers are { 2350, 1828, 1600, 1425 }
  123. };
  124. #define TRELLIS_WIDTH 16
  125. #define SCORE_LIMIT INT_MAX / 2
  126. struct TrellisNode {
  127. int prev_node;
  128. int quant;
  129. int bits;
  130. int score;
  131. };
  132. typedef struct ProresContext {
  133. AVClass *class;
  134. DECLARE_ALIGNED(16, DCTELEM, blocks)[MAX_PLANES][64 * 4 * MAX_MBS_PER_SLICE];
  135. DECLARE_ALIGNED(16, uint16_t, emu_buf)[16*16];
  136. int16_t quants[16][64];
  137. ProresDSPContext dsp;
  138. ScanTable scantable;
  139. int mb_width, mb_height;
  140. int mbs_per_slice;
  141. int num_chroma_blocks, chroma_factor;
  142. int slices_width;
  143. int num_slices;
  144. int num_planes;
  145. int bits_per_mb;
  146. int profile;
  147. const struct prores_profile *profile_info;
  148. struct TrellisNode *nodes;
  149. int *slice_q;
  150. } ProresContext;
  151. static void get_slice_data(ProresContext *ctx, const uint16_t *src,
  152. int linesize, int x, int y, int w, int h,
  153. DCTELEM *blocks,
  154. int mbs_per_slice, int blocks_per_mb)
  155. {
  156. const uint16_t *esrc;
  157. const int mb_width = 4 * blocks_per_mb;
  158. int elinesize;
  159. int i, j, k;
  160. for (i = 0; i < mbs_per_slice; i++, src += mb_width) {
  161. if (x >= w) {
  162. memset(blocks, 0, 64 * (mbs_per_slice - i) * blocks_per_mb
  163. * sizeof(*blocks));
  164. return;
  165. }
  166. if (x + mb_width <= w && y + 16 <= h) {
  167. esrc = src;
  168. elinesize = linesize;
  169. } else {
  170. int bw, bh, pix;
  171. const int estride = 16 / sizeof(*ctx->emu_buf);
  172. esrc = ctx->emu_buf;
  173. elinesize = 16;
  174. bw = FFMIN(w - x, mb_width);
  175. bh = FFMIN(h - y, 16);
  176. for (j = 0; j < bh; j++) {
  177. memcpy(ctx->emu_buf + j * estride, src + j * linesize,
  178. bw * sizeof(*src));
  179. pix = ctx->emu_buf[j * estride + bw - 1];
  180. for (k = bw; k < mb_width; k++)
  181. ctx->emu_buf[j * estride + k] = pix;
  182. }
  183. for (; j < 16; j++)
  184. memcpy(ctx->emu_buf + j * estride,
  185. ctx->emu_buf + (bh - 1) * estride,
  186. mb_width * sizeof(*ctx->emu_buf));
  187. }
  188. ctx->dsp.fdct(esrc, elinesize, blocks);
  189. blocks += 64;
  190. if (blocks_per_mb > 2) {
  191. ctx->dsp.fdct(src + 8, linesize, blocks);
  192. blocks += 64;
  193. }
  194. ctx->dsp.fdct(src + linesize * 4, linesize, blocks);
  195. blocks += 64;
  196. if (blocks_per_mb > 2) {
  197. ctx->dsp.fdct(src + linesize * 4 + 8, linesize, blocks);
  198. blocks += 64;
  199. }
  200. x += mb_width;
  201. }
  202. }
  203. /**
  204. * Write an unsigned rice/exp golomb codeword.
  205. */
  206. static inline void encode_vlc_codeword(PutBitContext *pb, uint8_t codebook, int val)
  207. {
  208. unsigned int rice_order, exp_order, switch_bits, switch_val;
  209. int exponent;
  210. /* number of prefix bits to switch between Rice and expGolomb */
  211. switch_bits = (codebook & 3) + 1;
  212. rice_order = codebook >> 5; /* rice code order */
  213. exp_order = (codebook >> 2) & 7; /* exp golomb code order */
  214. switch_val = switch_bits << rice_order;
  215. if (val >= switch_val) {
  216. val -= switch_val - (1 << exp_order);
  217. exponent = av_log2(val);
  218. put_bits(pb, exponent - exp_order + switch_bits, 0);
  219. put_bits(pb, 1, 1);
  220. put_bits(pb, exponent, val);
  221. } else {
  222. exponent = val >> rice_order;
  223. if (exponent)
  224. put_bits(pb, exponent, 0);
  225. put_bits(pb, 1, 1);
  226. if (rice_order)
  227. put_sbits(pb, rice_order, val);
  228. }
  229. }
  230. #define GET_SIGN(x) ((x) >> 31)
  231. #define MAKE_CODE(x) (((x) << 1) ^ GET_SIGN(x))
  232. static void encode_dcs(PutBitContext *pb, DCTELEM *blocks,
  233. int blocks_per_slice, int scale)
  234. {
  235. int i;
  236. int codebook = 3, code, dc, prev_dc, delta, sign, new_sign;
  237. prev_dc = (blocks[0] - 0x4000) / scale;
  238. encode_vlc_codeword(pb, FIRST_DC_CB, MAKE_CODE(prev_dc));
  239. codebook = 3;
  240. blocks += 64;
  241. for (i = 1; i < blocks_per_slice; i++, blocks += 64) {
  242. dc = (blocks[0] - 0x4000) / scale;
  243. delta = dc - prev_dc;
  244. new_sign = GET_SIGN(delta);
  245. delta = (delta ^ sign) - sign;
  246. code = MAKE_CODE(delta);
  247. encode_vlc_codeword(pb, ff_prores_dc_codebook[codebook], code);
  248. codebook = (code + (code & 1)) >> 1;
  249. codebook = FFMIN(codebook, 3);
  250. sign = new_sign;
  251. prev_dc = dc;
  252. }
  253. }
  254. static void encode_acs(PutBitContext *pb, DCTELEM *blocks,
  255. int blocks_per_slice,
  256. int plane_size_factor,
  257. const uint8_t *scan, const int16_t *qmat)
  258. {
  259. int idx, i;
  260. int run, level, run_cb, lev_cb;
  261. int max_coeffs, abs_level;
  262. max_coeffs = blocks_per_slice << 6;
  263. run_cb = ff_prores_run_to_cb_index[4];
  264. lev_cb = ff_prores_lev_to_cb_index[2];
  265. run = 0;
  266. for (i = 1; i < 64; i++) {
  267. for (idx = scan[i]; idx < max_coeffs; idx += 64) {
  268. level = blocks[idx] / qmat[scan[i]];
  269. if (level) {
  270. abs_level = FFABS(level);
  271. encode_vlc_codeword(pb, ff_prores_ac_codebook[run_cb], run);
  272. encode_vlc_codeword(pb, ff_prores_ac_codebook[lev_cb],
  273. abs_level - 1);
  274. put_sbits(pb, 1, GET_SIGN(level));
  275. run_cb = ff_prores_run_to_cb_index[FFMIN(run, 15)];
  276. lev_cb = ff_prores_lev_to_cb_index[FFMIN(abs_level, 9)];
  277. run = 0;
  278. } else {
  279. run++;
  280. }
  281. }
  282. }
  283. }
  284. static int encode_slice_plane(ProresContext *ctx, PutBitContext *pb,
  285. const uint16_t *src, int linesize,
  286. int mbs_per_slice, DCTELEM *blocks,
  287. int blocks_per_mb, int plane_size_factor,
  288. const int16_t *qmat)
  289. {
  290. int blocks_per_slice, saved_pos;
  291. saved_pos = put_bits_count(pb);
  292. blocks_per_slice = mbs_per_slice * blocks_per_mb;
  293. encode_dcs(pb, blocks, blocks_per_slice, qmat[0]);
  294. encode_acs(pb, blocks, blocks_per_slice, plane_size_factor,
  295. ctx->scantable.permutated, qmat);
  296. flush_put_bits(pb);
  297. return (put_bits_count(pb) - saved_pos) >> 3;
  298. }
  299. static int encode_slice(AVCodecContext *avctx, const AVFrame *pic,
  300. PutBitContext *pb,
  301. int sizes[4], int x, int y, int quant,
  302. int mbs_per_slice)
  303. {
  304. ProresContext *ctx = avctx->priv_data;
  305. int i, xp, yp;
  306. int total_size = 0;
  307. const uint16_t *src;
  308. int slice_width_factor = av_log2(mbs_per_slice);
  309. int num_cblocks, pwidth;
  310. int plane_factor, is_chroma;
  311. for (i = 0; i < ctx->num_planes; i++) {
  312. is_chroma = (i == 1 || i == 2);
  313. plane_factor = slice_width_factor + 2;
  314. if (is_chroma)
  315. plane_factor += ctx->chroma_factor - 3;
  316. if (!is_chroma || ctx->chroma_factor == CFACTOR_Y444) {
  317. xp = x << 4;
  318. yp = y << 4;
  319. num_cblocks = 4;
  320. pwidth = avctx->width;
  321. } else {
  322. xp = x << 3;
  323. yp = y << 4;
  324. num_cblocks = 2;
  325. pwidth = avctx->width >> 1;
  326. }
  327. src = (const uint16_t*)(pic->data[i] + yp * pic->linesize[i]) + xp;
  328. get_slice_data(ctx, src, pic->linesize[i], xp, yp,
  329. pwidth, avctx->height, ctx->blocks[0],
  330. mbs_per_slice, num_cblocks);
  331. sizes[i] = encode_slice_plane(ctx, pb, src, pic->linesize[i],
  332. mbs_per_slice, ctx->blocks[0],
  333. num_cblocks, plane_factor,
  334. ctx->quants[quant]);
  335. total_size += sizes[i];
  336. }
  337. return total_size;
  338. }
  339. static inline int estimate_vlc(uint8_t codebook, int val)
  340. {
  341. unsigned int rice_order, exp_order, switch_bits, switch_val;
  342. int exponent;
  343. /* number of prefix bits to switch between Rice and expGolomb */
  344. switch_bits = (codebook & 3) + 1;
  345. rice_order = codebook >> 5; /* rice code order */
  346. exp_order = (codebook >> 2) & 7; /* exp golomb code order */
  347. switch_val = switch_bits << rice_order;
  348. if (val >= switch_val) {
  349. val -= switch_val - (1 << exp_order);
  350. exponent = av_log2(val);
  351. return exponent * 2 - exp_order + switch_bits + 1;
  352. } else {
  353. return (val >> rice_order) + rice_order + 1;
  354. }
  355. }
  356. static int estimate_dcs(int *error, DCTELEM *blocks, int blocks_per_slice,
  357. int scale)
  358. {
  359. int i;
  360. int codebook = 3, code, dc, prev_dc, delta, sign, new_sign;
  361. int bits;
  362. prev_dc = (blocks[0] - 0x4000) / scale;
  363. bits = estimate_vlc(FIRST_DC_CB, MAKE_CODE(prev_dc));
  364. codebook = 3;
  365. blocks += 64;
  366. *error += FFABS(blocks[0] - 0x4000) % scale;
  367. for (i = 1; i < blocks_per_slice; i++, blocks += 64) {
  368. dc = (blocks[0] - 0x4000) / scale;
  369. *error += FFABS(blocks[0] - 0x4000) % scale;
  370. delta = dc - prev_dc;
  371. new_sign = GET_SIGN(delta);
  372. delta = (delta ^ sign) - sign;
  373. code = MAKE_CODE(delta);
  374. bits += estimate_vlc(ff_prores_dc_codebook[codebook], code);
  375. codebook = (code + (code & 1)) >> 1;
  376. codebook = FFMIN(codebook, 3);
  377. sign = new_sign;
  378. prev_dc = dc;
  379. }
  380. return bits;
  381. }
  382. static int estimate_acs(int *error, DCTELEM *blocks, int blocks_per_slice,
  383. int plane_size_factor,
  384. const uint8_t *scan, const int16_t *qmat)
  385. {
  386. int idx, i;
  387. int run, level, run_cb, lev_cb;
  388. int max_coeffs, abs_level;
  389. int bits = 0;
  390. max_coeffs = blocks_per_slice << 6;
  391. run_cb = ff_prores_run_to_cb_index[4];
  392. lev_cb = ff_prores_lev_to_cb_index[2];
  393. run = 0;
  394. for (i = 1; i < 64; i++) {
  395. for (idx = scan[i]; idx < max_coeffs; idx += 64) {
  396. level = blocks[idx] / qmat[scan[i]];
  397. *error += FFABS(blocks[idx]) % qmat[scan[i]];
  398. if (level) {
  399. abs_level = FFABS(level);
  400. bits += estimate_vlc(ff_prores_ac_codebook[run_cb], run);
  401. bits += estimate_vlc(ff_prores_ac_codebook[lev_cb],
  402. abs_level - 1) + 1;
  403. run_cb = ff_prores_run_to_cb_index[FFMIN(run, 15)];
  404. lev_cb = ff_prores_lev_to_cb_index[FFMIN(abs_level, 9)];
  405. run = 0;
  406. } else {
  407. run++;
  408. }
  409. }
  410. }
  411. return bits;
  412. }
  413. static int estimate_slice_plane(ProresContext *ctx, int *error, int plane,
  414. const uint16_t *src, int linesize,
  415. int mbs_per_slice,
  416. int blocks_per_mb, int plane_size_factor,
  417. const int16_t *qmat)
  418. {
  419. int blocks_per_slice;
  420. int bits;
  421. blocks_per_slice = mbs_per_slice * blocks_per_mb;
  422. bits = estimate_dcs(error, ctx->blocks[plane], blocks_per_slice, qmat[0]);
  423. bits += estimate_acs(error, ctx->blocks[plane], blocks_per_slice,
  424. plane_size_factor, ctx->scantable.permutated, qmat);
  425. return FFALIGN(bits, 8);
  426. }
  427. static int find_slice_quant(AVCodecContext *avctx, const AVFrame *pic,
  428. int trellis_node, int x, int y, int mbs_per_slice)
  429. {
  430. ProresContext *ctx = avctx->priv_data;
  431. int i, q, pq, xp, yp;
  432. const uint16_t *src;
  433. int slice_width_factor = av_log2(mbs_per_slice);
  434. int num_cblocks[MAX_PLANES], pwidth;
  435. int plane_factor[MAX_PLANES], is_chroma[MAX_PLANES];
  436. const int min_quant = ctx->profile_info->min_quant;
  437. const int max_quant = ctx->profile_info->max_quant;
  438. int error, bits, bits_limit;
  439. int mbs, prev, cur, new_score;
  440. int slice_bits[TRELLIS_WIDTH], slice_score[TRELLIS_WIDTH];
  441. mbs = x + mbs_per_slice;
  442. for (i = 0; i < ctx->num_planes; i++) {
  443. is_chroma[i] = (i == 1 || i == 2);
  444. plane_factor[i] = slice_width_factor + 2;
  445. if (is_chroma[i])
  446. plane_factor[i] += ctx->chroma_factor - 3;
  447. if (!is_chroma[i] || ctx->chroma_factor == CFACTOR_Y444) {
  448. xp = x << 4;
  449. yp = y << 4;
  450. num_cblocks[i] = 4;
  451. pwidth = avctx->width;
  452. } else {
  453. xp = x << 3;
  454. yp = y << 4;
  455. num_cblocks[i] = 2;
  456. pwidth = avctx->width >> 1;
  457. }
  458. src = (const uint16_t*)(pic->data[i] + yp * pic->linesize[i]) + xp;
  459. get_slice_data(ctx, src, pic->linesize[i], xp, yp,
  460. pwidth, avctx->height, ctx->blocks[i],
  461. mbs_per_slice, num_cblocks[i]);
  462. }
  463. for (q = min_quant; q <= max_quant; q++) {
  464. ctx->nodes[trellis_node + q].prev_node = -1;
  465. ctx->nodes[trellis_node + q].quant = q;
  466. }
  467. // todo: maybe perform coarser quantising to fit into frame size when needed
  468. for (q = min_quant; q <= max_quant; q++) {
  469. bits = 0;
  470. error = 0;
  471. for (i = 0; i < ctx->num_planes; i++) {
  472. bits += estimate_slice_plane(ctx, &error, i,
  473. src, pic->linesize[i],
  474. mbs_per_slice,
  475. num_cblocks[i], plane_factor[i],
  476. ctx->quants[q]);
  477. }
  478. if (bits > 65000 * 8) {
  479. error = SCORE_LIMIT;
  480. break;
  481. }
  482. slice_bits[q] = bits;
  483. slice_score[q] = error;
  484. }
  485. bits_limit = mbs * ctx->bits_per_mb;
  486. for (pq = min_quant; pq <= max_quant; pq++) {
  487. prev = trellis_node - TRELLIS_WIDTH + pq;
  488. for (q = min_quant; q <= max_quant; q++) {
  489. cur = trellis_node + q;
  490. bits = ctx->nodes[prev].bits + slice_bits[q];
  491. error = slice_score[q];
  492. if (bits > bits_limit)
  493. error = SCORE_LIMIT;
  494. if (ctx->nodes[prev].score < SCORE_LIMIT && error < SCORE_LIMIT)
  495. new_score = ctx->nodes[prev].score + error;
  496. else
  497. new_score = SCORE_LIMIT;
  498. if (ctx->nodes[cur].prev_node == -1 ||
  499. ctx->nodes[cur].score >= new_score) {
  500. ctx->nodes[cur].bits = bits;
  501. ctx->nodes[cur].score = new_score;
  502. ctx->nodes[cur].prev_node = prev;
  503. }
  504. }
  505. }
  506. error = ctx->nodes[trellis_node + min_quant].score;
  507. pq = trellis_node + min_quant;
  508. for (q = min_quant + 1; q <= max_quant; q++) {
  509. if (ctx->nodes[trellis_node + q].score <= error) {
  510. error = ctx->nodes[trellis_node + q].score;
  511. pq = trellis_node + q;
  512. }
  513. }
  514. return pq;
  515. }
  516. static int encode_frame(AVCodecContext *avctx, AVPacket *pkt,
  517. const AVFrame *pic, int *got_packet)
  518. {
  519. ProresContext *ctx = avctx->priv_data;
  520. uint8_t *orig_buf, *buf, *slice_hdr, *slice_sizes, *tmp;
  521. uint8_t *picture_size_pos;
  522. PutBitContext pb;
  523. int x, y, i, mb, q = 0;
  524. int sizes[4] = { 0 };
  525. int slice_hdr_size = 2 + 2 * (ctx->num_planes - 1);
  526. int frame_size, picture_size, slice_size;
  527. int mbs_per_slice = ctx->mbs_per_slice;
  528. int pkt_size, ret;
  529. *avctx->coded_frame = *pic;
  530. avctx->coded_frame->pict_type = AV_PICTURE_TYPE_I;
  531. avctx->coded_frame->key_frame = 1;
  532. pkt_size = ctx->mb_width * ctx->mb_height * 64 * 3 * 12
  533. + ctx->num_slices * 2 + 200 + FF_MIN_BUFFER_SIZE;
  534. if ((ret = ff_alloc_packet(pkt, pkt_size)) < 0) {
  535. av_log(avctx, AV_LOG_ERROR, "Error getting output packet.\n");
  536. return ret;
  537. }
  538. orig_buf = pkt->data;
  539. // frame atom
  540. orig_buf += 4; // frame size
  541. bytestream_put_be32 (&orig_buf, FRAME_ID); // frame container ID
  542. buf = orig_buf;
  543. // frame header
  544. tmp = buf;
  545. buf += 2; // frame header size will be stored here
  546. bytestream_put_be16 (&buf, 0); // version 1
  547. bytestream_put_buffer(&buf, "Lavc", 4); // creator
  548. bytestream_put_be16 (&buf, avctx->width);
  549. bytestream_put_be16 (&buf, avctx->height);
  550. bytestream_put_byte (&buf, ctx->chroma_factor << 6); // frame flags
  551. bytestream_put_byte (&buf, 0); // reserved
  552. bytestream_put_byte (&buf, 0); // primaries
  553. bytestream_put_byte (&buf, 0); // transfer function
  554. bytestream_put_byte (&buf, 6); // colour matrix - ITU-R BT.601-4
  555. bytestream_put_byte (&buf, 0x40); // source format and alpha information
  556. bytestream_put_byte (&buf, 0); // reserved
  557. bytestream_put_byte (&buf, 0x03); // matrix flags - both matrices are present
  558. // luma quantisation matrix
  559. for (i = 0; i < 64; i++)
  560. bytestream_put_byte(&buf, ctx->profile_info->quant[i]);
  561. // chroma quantisation matrix
  562. for (i = 0; i < 64; i++)
  563. bytestream_put_byte(&buf, ctx->profile_info->quant[i]);
  564. bytestream_put_be16 (&tmp, buf - orig_buf); // write back frame header size
  565. // picture header
  566. picture_size_pos = buf + 1;
  567. bytestream_put_byte (&buf, 0x40); // picture header size (in bits)
  568. buf += 4; // picture data size will be stored here
  569. bytestream_put_be16 (&buf, ctx->num_slices); // total number of slices
  570. bytestream_put_byte (&buf, av_log2(ctx->mbs_per_slice) << 4); // slice width and height in MBs
  571. // seek table - will be filled during slice encoding
  572. slice_sizes = buf;
  573. buf += ctx->num_slices * 2;
  574. // slices
  575. for (y = 0; y < ctx->mb_height; y++) {
  576. mbs_per_slice = ctx->mbs_per_slice;
  577. for (x = mb = 0; x < ctx->mb_width; x += mbs_per_slice, mb++) {
  578. while (ctx->mb_width - x < mbs_per_slice)
  579. mbs_per_slice >>= 1;
  580. q = find_slice_quant(avctx, pic, (mb + 1) * TRELLIS_WIDTH, x, y,
  581. mbs_per_slice);
  582. }
  583. for (x = ctx->slices_width - 1; x >= 0; x--) {
  584. ctx->slice_q[x] = ctx->nodes[q].quant;
  585. q = ctx->nodes[q].prev_node;
  586. }
  587. mbs_per_slice = ctx->mbs_per_slice;
  588. for (x = mb = 0; x < ctx->mb_width; x += mbs_per_slice, mb++) {
  589. q = ctx->slice_q[mb];
  590. while (ctx->mb_width - x < mbs_per_slice)
  591. mbs_per_slice >>= 1;
  592. bytestream_put_byte(&buf, slice_hdr_size << 3);
  593. slice_hdr = buf;
  594. buf += slice_hdr_size - 1;
  595. init_put_bits(&pb, buf, (pkt_size - (buf - orig_buf)) * 8);
  596. encode_slice(avctx, pic, &pb, sizes, x, y, q, mbs_per_slice);
  597. bytestream_put_byte(&slice_hdr, q);
  598. slice_size = slice_hdr_size + sizes[ctx->num_planes - 1];
  599. for (i = 0; i < ctx->num_planes - 1; i++) {
  600. bytestream_put_be16(&slice_hdr, sizes[i]);
  601. slice_size += sizes[i];
  602. }
  603. bytestream_put_be16(&slice_sizes, slice_size);
  604. buf += slice_size - slice_hdr_size;
  605. }
  606. }
  607. orig_buf -= 8;
  608. frame_size = buf - orig_buf;
  609. picture_size = buf - picture_size_pos - 6;
  610. bytestream_put_be32(&orig_buf, frame_size);
  611. bytestream_put_be32(&picture_size_pos, picture_size);
  612. pkt->size = frame_size;
  613. pkt->flags |= AV_PKT_FLAG_KEY;
  614. *got_packet = 1;
  615. return 0;
  616. }
  617. static av_cold int encode_close(AVCodecContext *avctx)
  618. {
  619. ProresContext *ctx = avctx->priv_data;
  620. if (avctx->coded_frame->data[0])
  621. avctx->release_buffer(avctx, avctx->coded_frame);
  622. av_freep(&avctx->coded_frame);
  623. av_freep(&ctx->nodes);
  624. av_freep(&ctx->slice_q);
  625. return 0;
  626. }
  627. static av_cold int encode_init(AVCodecContext *avctx)
  628. {
  629. ProresContext *ctx = avctx->priv_data;
  630. int mps;
  631. int i, j;
  632. int min_quant, max_quant;
  633. avctx->bits_per_raw_sample = 10;
  634. avctx->coded_frame = avcodec_alloc_frame();
  635. if (!avctx->coded_frame)
  636. return AVERROR(ENOMEM);
  637. ff_proresdsp_init(&ctx->dsp);
  638. ff_init_scantable(ctx->dsp.dct_permutation, &ctx->scantable,
  639. ff_prores_progressive_scan);
  640. mps = ctx->mbs_per_slice;
  641. if (mps & (mps - 1)) {
  642. av_log(avctx, AV_LOG_ERROR,
  643. "there should be an integer power of two MBs per slice\n");
  644. return AVERROR(EINVAL);
  645. }
  646. ctx->chroma_factor = avctx->pix_fmt == PIX_FMT_YUV422P10
  647. ? CFACTOR_Y422
  648. : CFACTOR_Y444;
  649. ctx->profile_info = prores_profile_info + ctx->profile;
  650. ctx->num_planes = 3;
  651. ctx->mb_width = FFALIGN(avctx->width, 16) >> 4;
  652. ctx->mb_height = FFALIGN(avctx->height, 16) >> 4;
  653. ctx->slices_width = ctx->mb_width / mps;
  654. ctx->slices_width += av_popcount(ctx->mb_width - ctx->slices_width * mps);
  655. ctx->num_slices = ctx->mb_height * ctx->slices_width;
  656. for (i = 0; i < NUM_MB_LIMITS - 1; i++)
  657. if (prores_mb_limits[i] >= ctx->mb_width * ctx->mb_height)
  658. break;
  659. ctx->bits_per_mb = ctx->profile_info->br_tab[i];
  660. min_quant = ctx->profile_info->min_quant;
  661. max_quant = ctx->profile_info->max_quant;
  662. for (i = min_quant; i <= max_quant; i++) {
  663. for (j = 0; j < 64; j++)
  664. ctx->quants[i][j] = ctx->profile_info->quant[j] * i;
  665. }
  666. avctx->codec_tag = ctx->profile_info->tag;
  667. av_log(avctx, AV_LOG_DEBUG, "profile %d, %d slices, %d bits per MB\n",
  668. ctx->profile, ctx->num_slices, ctx->bits_per_mb);
  669. ctx->nodes = av_malloc((ctx->slices_width + 1) * TRELLIS_WIDTH
  670. * sizeof(*ctx->nodes));
  671. if (!ctx->nodes) {
  672. encode_close(avctx);
  673. return AVERROR(ENOMEM);
  674. }
  675. for (i = min_quant; i <= max_quant; i++) {
  676. ctx->nodes[i].prev_node = -1;
  677. ctx->nodes[i].bits = 0;
  678. ctx->nodes[i].score = 0;
  679. }
  680. ctx->slice_q = av_malloc(ctx->slices_width * sizeof(*ctx->slice_q));
  681. if (!ctx->slice_q) {
  682. encode_close(avctx);
  683. return AVERROR(ENOMEM);
  684. }
  685. return 0;
  686. }
  687. #define OFFSET(x) offsetof(ProresContext, x)
  688. #define VE AV_OPT_FLAG_VIDEO_PARAM | AV_OPT_FLAG_ENCODING_PARAM
  689. static const AVOption options[] = {
  690. { "mbs_per_slice", "macroblocks per slice", OFFSET(mbs_per_slice),
  691. AV_OPT_TYPE_INT, { 8 }, 1, MAX_MBS_PER_SLICE, VE },
  692. { "profile", NULL, OFFSET(profile), AV_OPT_TYPE_INT,
  693. { PRORES_PROFILE_STANDARD },
  694. PRORES_PROFILE_PROXY, PRORES_PROFILE_HQ, VE, "profile" },
  695. { "proxy", NULL, 0, AV_OPT_TYPE_CONST, { PRORES_PROFILE_PROXY },
  696. 0, 0, VE, "profile" },
  697. { "lt", NULL, 0, AV_OPT_TYPE_CONST, { PRORES_PROFILE_LT },
  698. 0, 0, VE, "profile" },
  699. { "standard", NULL, 0, AV_OPT_TYPE_CONST, { PRORES_PROFILE_STANDARD },
  700. 0, 0, VE, "profile" },
  701. { "hq", NULL, 0, AV_OPT_TYPE_CONST, { PRORES_PROFILE_HQ },
  702. 0, 0, VE, "profile" },
  703. { NULL }
  704. };
  705. static const AVClass proresenc_class = {
  706. .class_name = "ProRes encoder",
  707. .item_name = av_default_item_name,
  708. .option = options,
  709. .version = LIBAVUTIL_VERSION_INT,
  710. };
  711. AVCodec ff_prores_encoder = {
  712. .name = "prores",
  713. .type = AVMEDIA_TYPE_VIDEO,
  714. .id = CODEC_ID_PRORES,
  715. .priv_data_size = sizeof(ProresContext),
  716. .init = encode_init,
  717. .close = encode_close,
  718. .encode2 = encode_frame,
  719. .long_name = NULL_IF_CONFIG_SMALL("Apple ProRes (iCodec Pro)"),
  720. .pix_fmts = (const enum PixelFormat[]) {
  721. PIX_FMT_YUV422P10, PIX_FMT_YUV444P10, PIX_FMT_NONE
  722. },
  723. .priv_class = &proresenc_class,
  724. };