You can not select more than 25 topics Topics must start with a letter or number, can include dashes ('-') and can be up to 35 characters long.

595 lines
18KB

  1. /*
  2. * librav1e encoder
  3. *
  4. * Copyright (c) 2019 Derek Buitenhuis
  5. *
  6. * This file is part of FFmpeg.
  7. *
  8. * FFmpeg is free software; you can redistribute it and/or
  9. * modify it under the terms of the GNU Lesser General Public
  10. * License as published by the Free Software Foundation; either
  11. * version 2.1 of the License, or (at your option) any later version.
  12. *
  13. * FFmpeg is distributed in the hope that it will be useful,
  14. * but WITHOUT ANY WARRANTY; without even the implied warranty of
  15. * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
  16. * Lesser General Public License for more details.
  17. *
  18. * You should have received a copy of the GNU Lesser General Public
  19. * License along with FFmpeg; if not, write to the Free Software
  20. * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
  21. */
  22. #include <rav1e.h>
  23. #include "libavutil/internal.h"
  24. #include "libavutil/avassert.h"
  25. #include "libavutil/base64.h"
  26. #include "libavutil/common.h"
  27. #include "libavutil/mathematics.h"
  28. #include "libavutil/opt.h"
  29. #include "libavutil/pixdesc.h"
  30. #include "avcodec.h"
  31. #include "internal.h"
  32. typedef struct librav1eContext {
  33. const AVClass *class;
  34. RaContext *ctx;
  35. AVBSFContext *bsf;
  36. uint8_t *pass_data;
  37. size_t pass_pos;
  38. int pass_size;
  39. char *rav1e_opts;
  40. int quantizer;
  41. int speed;
  42. int tiles;
  43. int tile_rows;
  44. int tile_cols;
  45. } librav1eContext;
  46. static inline RaPixelRange range_map(enum AVPixelFormat pix_fmt, enum AVColorRange range)
  47. {
  48. switch (pix_fmt) {
  49. case AV_PIX_FMT_YUVJ420P:
  50. case AV_PIX_FMT_YUVJ422P:
  51. case AV_PIX_FMT_YUVJ444P:
  52. return RA_PIXEL_RANGE_FULL;
  53. }
  54. switch (range) {
  55. case AVCOL_RANGE_JPEG:
  56. return RA_PIXEL_RANGE_FULL;
  57. case AVCOL_RANGE_MPEG:
  58. default:
  59. return RA_PIXEL_RANGE_LIMITED;
  60. }
  61. }
  62. static inline RaChromaSampling pix_fmt_map(enum AVPixelFormat pix_fmt)
  63. {
  64. switch (pix_fmt) {
  65. case AV_PIX_FMT_YUV420P:
  66. case AV_PIX_FMT_YUVJ420P:
  67. case AV_PIX_FMT_YUV420P10:
  68. case AV_PIX_FMT_YUV420P12:
  69. return RA_CHROMA_SAMPLING_CS420;
  70. case AV_PIX_FMT_YUV422P:
  71. case AV_PIX_FMT_YUVJ422P:
  72. case AV_PIX_FMT_YUV422P10:
  73. case AV_PIX_FMT_YUV422P12:
  74. return RA_CHROMA_SAMPLING_CS422;
  75. case AV_PIX_FMT_YUV444P:
  76. case AV_PIX_FMT_YUVJ444P:
  77. case AV_PIX_FMT_YUV444P10:
  78. case AV_PIX_FMT_YUV444P12:
  79. return RA_CHROMA_SAMPLING_CS444;
  80. default:
  81. av_assert0(0);
  82. }
  83. }
  84. static inline RaChromaSamplePosition chroma_loc_map(enum AVChromaLocation chroma_loc)
  85. {
  86. switch (chroma_loc) {
  87. case AVCHROMA_LOC_LEFT:
  88. return RA_CHROMA_SAMPLE_POSITION_VERTICAL;
  89. case AVCHROMA_LOC_TOPLEFT:
  90. return RA_CHROMA_SAMPLE_POSITION_COLOCATED;
  91. default:
  92. return RA_CHROMA_SAMPLE_POSITION_UNKNOWN;
  93. }
  94. }
  95. static int get_stats(AVCodecContext *avctx, int eos)
  96. {
  97. librav1eContext *ctx = avctx->priv_data;
  98. RaData* buf = rav1e_twopass_out(ctx->ctx);
  99. if (!buf)
  100. return 0;
  101. if (!eos) {
  102. uint8_t *tmp = av_fast_realloc(ctx->pass_data, &ctx->pass_size,
  103. ctx->pass_pos + buf->len);
  104. if (!tmp) {
  105. rav1e_data_unref(buf);
  106. return AVERROR(ENOMEM);
  107. }
  108. ctx->pass_data = tmp;
  109. memcpy(ctx->pass_data + ctx->pass_pos, buf->data, buf->len);
  110. ctx->pass_pos += buf->len;
  111. } else {
  112. size_t b64_size = AV_BASE64_SIZE(ctx->pass_pos);
  113. memcpy(ctx->pass_data, buf->data, buf->len);
  114. avctx->stats_out = av_malloc(b64_size);
  115. if (!avctx->stats_out) {
  116. rav1e_data_unref(buf);
  117. return AVERROR(ENOMEM);
  118. }
  119. av_base64_encode(avctx->stats_out, b64_size, ctx->pass_data, ctx->pass_pos);
  120. av_freep(&ctx->pass_data);
  121. }
  122. rav1e_data_unref(buf);
  123. return 0;
  124. }
  125. static int set_stats(AVCodecContext *avctx)
  126. {
  127. librav1eContext *ctx = avctx->priv_data;
  128. int ret = 1;
  129. while (ret > 0 && ctx->pass_size - ctx->pass_pos > 0) {
  130. ret = rav1e_twopass_in(ctx->ctx, ctx->pass_data + ctx->pass_pos, ctx->pass_size);
  131. if (ret < 0)
  132. return AVERROR_EXTERNAL;
  133. ctx->pass_pos += ret;
  134. }
  135. return 0;
  136. }
  137. static av_cold int librav1e_encode_close(AVCodecContext *avctx)
  138. {
  139. librav1eContext *ctx = avctx->priv_data;
  140. if (ctx->ctx) {
  141. rav1e_context_unref(ctx->ctx);
  142. ctx->ctx = NULL;
  143. }
  144. av_bsf_free(&ctx->bsf);
  145. av_freep(&ctx->pass_data);
  146. return 0;
  147. }
  148. static av_cold int librav1e_encode_init(AVCodecContext *avctx)
  149. {
  150. librav1eContext *ctx = avctx->priv_data;
  151. const AVPixFmtDescriptor *desc = av_pix_fmt_desc_get(avctx->pix_fmt);
  152. RaConfig *cfg = NULL;
  153. int rret;
  154. int ret = 0;
  155. cfg = rav1e_config_default();
  156. if (!cfg) {
  157. av_log(avctx, AV_LOG_ERROR, "Could not allocate rav1e config.\n");
  158. return AVERROR_EXTERNAL;
  159. }
  160. rav1e_config_set_time_base(cfg, (RaRational) {
  161. avctx->time_base.num * avctx->ticks_per_frame,
  162. avctx->time_base.den
  163. });
  164. if (avctx->flags & AV_CODEC_FLAG_PASS2) {
  165. if (!avctx->stats_in) {
  166. av_log(avctx, AV_LOG_ERROR, "No stats file provided for second pass.\n");
  167. ret = AVERROR(EINVAL);
  168. goto end;
  169. }
  170. ctx->pass_size = (strlen(avctx->stats_in) * 3) / 4;
  171. ctx->pass_data = av_malloc(ctx->pass_size);
  172. if (!ctx->pass_data) {
  173. av_log(avctx, AV_LOG_ERROR, "Could not allocate stats buffer.\n");
  174. ret = AVERROR(ENOMEM);
  175. goto end;
  176. }
  177. ctx->pass_size = av_base64_decode(ctx->pass_data, avctx->stats_in, ctx->pass_size);
  178. if (ctx->pass_size < 0) {
  179. av_log(avctx, AV_LOG_ERROR, "Invalid pass file.\n");
  180. ret = AVERROR(EINVAL);
  181. goto end;
  182. }
  183. }
  184. if (avctx->flags & AV_CODEC_FLAG_GLOBAL_HEADER) {
  185. const AVBitStreamFilter *filter = av_bsf_get_by_name("extract_extradata");
  186. int bret;
  187. if (!filter) {
  188. av_log(avctx, AV_LOG_ERROR, "extract_extradata bitstream filter "
  189. "not found. This is a bug, please report it.\n");
  190. ret = AVERROR_BUG;
  191. goto end;
  192. }
  193. bret = av_bsf_alloc(filter, &ctx->bsf);
  194. if (bret < 0) {
  195. ret = bret;
  196. goto end;
  197. }
  198. bret = avcodec_parameters_from_context(ctx->bsf->par_in, avctx);
  199. if (bret < 0) {
  200. ret = bret;
  201. goto end;
  202. }
  203. bret = av_bsf_init(ctx->bsf);
  204. if (bret < 0) {
  205. ret = bret;
  206. goto end;
  207. }
  208. }
  209. if (ctx->rav1e_opts) {
  210. AVDictionary *dict = NULL;
  211. AVDictionaryEntry *en = NULL;
  212. if (!av_dict_parse_string(&dict, ctx->rav1e_opts, "=", ":", 0)) {
  213. while (en = av_dict_get(dict, "", en, AV_DICT_IGNORE_SUFFIX)) {
  214. int parse_ret = rav1e_config_parse(cfg, en->key, en->value);
  215. if (parse_ret < 0)
  216. av_log(avctx, AV_LOG_WARNING, "Invalid value for %s: %s.\n", en->key, en->value);
  217. }
  218. av_dict_free(&dict);
  219. }
  220. }
  221. rret = rav1e_config_parse_int(cfg, "width", avctx->width);
  222. if (rret < 0) {
  223. av_log(avctx, AV_LOG_ERROR, "Invalid width passed to rav1e.\n");
  224. ret = AVERROR_INVALIDDATA;
  225. goto end;
  226. }
  227. rret = rav1e_config_parse_int(cfg, "height", avctx->height);
  228. if (rret < 0) {
  229. av_log(avctx, AV_LOG_ERROR, "Invalid height passed to rav1e.\n");
  230. ret = AVERROR_INVALIDDATA;
  231. goto end;
  232. }
  233. rret = rav1e_config_parse_int(cfg, "threads", avctx->thread_count);
  234. if (rret < 0)
  235. av_log(avctx, AV_LOG_WARNING, "Invalid number of threads, defaulting to auto.\n");
  236. if (ctx->speed >= 0) {
  237. rret = rav1e_config_parse_int(cfg, "speed", ctx->speed);
  238. if (rret < 0) {
  239. av_log(avctx, AV_LOG_ERROR, "Could not set speed preset.\n");
  240. ret = AVERROR_EXTERNAL;
  241. goto end;
  242. }
  243. }
  244. /* rav1e handles precedence between 'tiles' and cols/rows for us. */
  245. if (ctx->tiles > 0) {
  246. rret = rav1e_config_parse_int(cfg, "tiles", ctx->tiles);
  247. if (rret < 0) {
  248. av_log(avctx, AV_LOG_ERROR, "Could not set number of tiles to encode with.\n");
  249. ret = AVERROR_EXTERNAL;
  250. goto end;
  251. }
  252. }
  253. if (ctx->tile_rows > 0) {
  254. rret = rav1e_config_parse_int(cfg, "tile_rows", ctx->tile_rows);
  255. if (rret < 0) {
  256. av_log(avctx, AV_LOG_ERROR, "Could not set number of tile rows to encode with.\n");
  257. ret = AVERROR_EXTERNAL;
  258. goto end;
  259. }
  260. }
  261. if (ctx->tile_cols > 0) {
  262. rret = rav1e_config_parse_int(cfg, "tile_cols", ctx->tile_cols);
  263. if (rret < 0) {
  264. av_log(avctx, AV_LOG_ERROR, "Could not set number of tile cols to encode with.\n");
  265. ret = AVERROR_EXTERNAL;
  266. goto end;
  267. }
  268. }
  269. if (avctx->gop_size > 0) {
  270. rret = rav1e_config_parse_int(cfg, "key_frame_interval", avctx->gop_size);
  271. if (rret < 0) {
  272. av_log(avctx, AV_LOG_ERROR, "Could not set max keyint.\n");
  273. ret = AVERROR_EXTERNAL;
  274. goto end;
  275. }
  276. }
  277. if (avctx->keyint_min > 0) {
  278. rret = rav1e_config_parse_int(cfg, "min_key_frame_interval", avctx->keyint_min);
  279. if (rret < 0) {
  280. av_log(avctx, AV_LOG_ERROR, "Could not set min keyint.\n");
  281. ret = AVERROR_EXTERNAL;
  282. goto end;
  283. }
  284. }
  285. if (avctx->bit_rate && ctx->quantizer < 0) {
  286. int max_quantizer = avctx->qmax >= 0 ? avctx->qmax : 255;
  287. rret = rav1e_config_parse_int(cfg, "quantizer", max_quantizer);
  288. if (rret < 0) {
  289. av_log(avctx, AV_LOG_ERROR, "Could not set max quantizer.\n");
  290. ret = AVERROR_EXTERNAL;
  291. goto end;
  292. }
  293. if (avctx->qmin >= 0) {
  294. rret = rav1e_config_parse_int(cfg, "min_quantizer", avctx->qmin);
  295. if (rret < 0) {
  296. av_log(avctx, AV_LOG_ERROR, "Could not set min quantizer.\n");
  297. ret = AVERROR_EXTERNAL;
  298. goto end;
  299. }
  300. }
  301. rret = rav1e_config_parse_int(cfg, "bitrate", avctx->bit_rate);
  302. if (rret < 0) {
  303. av_log(avctx, AV_LOG_ERROR, "Could not set bitrate.\n");
  304. ret = AVERROR_INVALIDDATA;
  305. goto end;
  306. }
  307. } else if (ctx->quantizer >= 0) {
  308. if (avctx->bit_rate)
  309. av_log(avctx, AV_LOG_WARNING, "Both bitrate and quantizer specified. Using quantizer mode.");
  310. rret = rav1e_config_parse_int(cfg, "quantizer", ctx->quantizer);
  311. if (rret < 0) {
  312. av_log(avctx, AV_LOG_ERROR, "Could not set quantizer.\n");
  313. ret = AVERROR_EXTERNAL;
  314. goto end;
  315. }
  316. }
  317. rret = rav1e_config_set_pixel_format(cfg, desc->comp[0].depth,
  318. pix_fmt_map(avctx->pix_fmt),
  319. chroma_loc_map(avctx->chroma_sample_location),
  320. range_map(avctx->pix_fmt, avctx->color_range));
  321. if (rret < 0) {
  322. av_log(avctx, AV_LOG_ERROR, "Failed to set pixel format properties.\n");
  323. ret = AVERROR_INVALIDDATA;
  324. goto end;
  325. }
  326. /* rav1e's colorspace enums match standard values. */
  327. rret = rav1e_config_set_color_description(cfg, (RaMatrixCoefficients) avctx->colorspace,
  328. (RaColorPrimaries) avctx->color_primaries,
  329. (RaTransferCharacteristics) avctx->color_trc);
  330. if (rret < 0) {
  331. av_log(avctx, AV_LOG_WARNING, "Failed to set color properties.\n");
  332. if (avctx->err_recognition & AV_EF_EXPLODE) {
  333. ret = AVERROR_INVALIDDATA;
  334. goto end;
  335. }
  336. }
  337. ctx->ctx = rav1e_context_new(cfg);
  338. if (!ctx->ctx) {
  339. av_log(avctx, AV_LOG_ERROR, "Failed to create rav1e encode context.\n");
  340. ret = AVERROR_EXTERNAL;
  341. goto end;
  342. }
  343. ret = 0;
  344. end:
  345. rav1e_config_unref(cfg);
  346. return ret;
  347. }
  348. static int librav1e_send_frame(AVCodecContext *avctx, const AVFrame *frame)
  349. {
  350. librav1eContext *ctx = avctx->priv_data;
  351. RaFrame *rframe = NULL;
  352. int ret;
  353. if (frame) {
  354. const AVPixFmtDescriptor *desc = av_pix_fmt_desc_get(frame->format);
  355. rframe = rav1e_frame_new(ctx->ctx);
  356. if (!rframe) {
  357. av_log(avctx, AV_LOG_ERROR, "Could not allocate new rav1e frame.\n");
  358. return AVERROR(ENOMEM);
  359. }
  360. for (int i = 0; i < desc->nb_components; i++) {
  361. int shift = i ? desc->log2_chroma_h : 0;
  362. int bytes = desc->comp[0].depth == 8 ? 1 : 2;
  363. rav1e_frame_fill_plane(rframe, i, frame->data[i],
  364. (frame->height >> shift) * frame->linesize[i],
  365. frame->linesize[i], bytes);
  366. }
  367. }
  368. ret = rav1e_send_frame(ctx->ctx, rframe);
  369. if (rframe)
  370. rav1e_frame_unref(rframe); /* No need to unref if flushing. */
  371. switch (ret) {
  372. case RA_ENCODER_STATUS_SUCCESS:
  373. break;
  374. case RA_ENCODER_STATUS_ENOUGH_DATA:
  375. return AVERROR(EAGAIN);
  376. case RA_ENCODER_STATUS_FAILURE:
  377. av_log(avctx, AV_LOG_ERROR, "Could not send frame: %s\n", rav1e_status_to_str(ret));
  378. return AVERROR_EXTERNAL;
  379. default:
  380. av_log(avctx, AV_LOG_ERROR, "Unknown return code %d from rav1e_send_frame: %s\n", ret, rav1e_status_to_str(ret));
  381. return AVERROR_UNKNOWN;
  382. }
  383. return 0;
  384. }
  385. static int librav1e_receive_packet(AVCodecContext *avctx, AVPacket *pkt)
  386. {
  387. librav1eContext *ctx = avctx->priv_data;
  388. RaPacket *rpkt = NULL;
  389. int ret;
  390. retry:
  391. if (avctx->flags & AV_CODEC_FLAG_PASS1) {
  392. int sret = get_stats(avctx, 0);
  393. if (sret < 0)
  394. return sret;
  395. } else if (avctx->flags & AV_CODEC_FLAG_PASS2) {
  396. int sret = set_stats(avctx);
  397. if (sret < 0)
  398. return sret;
  399. }
  400. ret = rav1e_receive_packet(ctx->ctx, &rpkt);
  401. switch (ret) {
  402. case RA_ENCODER_STATUS_SUCCESS:
  403. break;
  404. case RA_ENCODER_STATUS_LIMIT_REACHED:
  405. if (avctx->flags & AV_CODEC_FLAG_PASS1) {
  406. int sret = get_stats(avctx, 1);
  407. if (sret < 0)
  408. return sret;
  409. }
  410. return AVERROR_EOF;
  411. case RA_ENCODER_STATUS_ENCODED:
  412. if (avctx->internal->draining)
  413. goto retry;
  414. return AVERROR(EAGAIN);
  415. case RA_ENCODER_STATUS_NEED_MORE_DATA:
  416. if (avctx->internal->draining) {
  417. av_log(avctx, AV_LOG_ERROR, "Unexpected error when receiving packet after EOF.\n");
  418. return AVERROR_EXTERNAL;
  419. }
  420. return AVERROR(EAGAIN);
  421. case RA_ENCODER_STATUS_FAILURE:
  422. av_log(avctx, AV_LOG_ERROR, "Could not encode frame: %s\n", rav1e_status_to_str(ret));
  423. return AVERROR_EXTERNAL;
  424. default:
  425. av_log(avctx, AV_LOG_ERROR, "Unknown return code %d from rav1e_receive_packet: %s\n", ret, rav1e_status_to_str(ret));
  426. return AVERROR_UNKNOWN;
  427. }
  428. ret = av_new_packet(pkt, rpkt->len);
  429. if (ret < 0) {
  430. av_log(avctx, AV_LOG_ERROR, "Could not allocate packet.\n");
  431. rav1e_packet_unref(rpkt);
  432. return ret;
  433. }
  434. memcpy(pkt->data, rpkt->data, rpkt->len);
  435. if (rpkt->frame_type == RA_FRAME_TYPE_KEY)
  436. pkt->flags |= AV_PKT_FLAG_KEY;
  437. pkt->pts = pkt->dts = rpkt->input_frameno * avctx->ticks_per_frame;
  438. if (avctx->flags & AV_CODEC_FLAG_GLOBAL_HEADER) {
  439. int ret = av_bsf_send_packet(ctx->bsf, pkt);
  440. if (ret < 0) {
  441. av_log(avctx, AV_LOG_ERROR, "extradata extraction send failed.\n");
  442. rav1e_packet_unref(rpkt);
  443. av_packet_unref(pkt);
  444. return ret;
  445. }
  446. ret = av_bsf_receive_packet(ctx->bsf, pkt);
  447. if (ret < 0) {
  448. av_log(avctx, AV_LOG_ERROR, "extradata extraction receive failed.\n");
  449. rav1e_packet_unref(rpkt);
  450. av_packet_unref(pkt);
  451. return ret;
  452. }
  453. }
  454. return 0;
  455. }
  456. #define OFFSET(x) offsetof(librav1eContext, x)
  457. #define VE AV_OPT_FLAG_VIDEO_PARAM | AV_OPT_FLAG_ENCODING_PARAM
  458. static const AVOption options[] = {
  459. { "qp", "use constant quantizer mode", OFFSET(quantizer), AV_OPT_TYPE_INT, { .i64 = 100 }, -1, 255, VE },
  460. { "speed", "what speed preset to use", OFFSET(speed), AV_OPT_TYPE_INT, { .i64 = -1 }, -1, 10, VE },
  461. { "tiles", "number of tiles encode with", OFFSET(tiles), AV_OPT_TYPE_INT, { .i64 = 0 }, -1, INT64_MAX, VE },
  462. { "tile-rows", "number of tiles rows to encode with", OFFSET(tile_rows), AV_OPT_TYPE_INT, { .i64 = 0 }, -1, INT64_MAX, VE },
  463. { "tile-columns", "number of tiles columns to encode with", OFFSET(tile_cols), AV_OPT_TYPE_INT, { .i64 = 0 }, -1, INT64_MAX, VE },
  464. { "rav1e-params", "set the rav1e configuration using a :-separated list of key=value parameters", OFFSET(rav1e_opts), AV_OPT_TYPE_STRING, { 0 }, 0, 0, VE },
  465. { NULL }
  466. };
  467. static const AVCodecDefault librav1e_defaults[] = {
  468. { "b", "0" },
  469. { "g", "0" },
  470. { "keyint_min", "0" },
  471. { "qmax", "-1" },
  472. { "qmin", "-1" },
  473. { NULL }
  474. };
  475. const enum AVPixelFormat librav1e_pix_fmts[] = {
  476. AV_PIX_FMT_YUV420P,
  477. AV_PIX_FMT_YUVJ420P,
  478. AV_PIX_FMT_YUV420P10,
  479. AV_PIX_FMT_YUV420P12,
  480. AV_PIX_FMT_YUV422P,
  481. AV_PIX_FMT_YUVJ422P,
  482. AV_PIX_FMT_YUV422P10,
  483. AV_PIX_FMT_YUV422P12,
  484. AV_PIX_FMT_YUV444P,
  485. AV_PIX_FMT_YUVJ444P,
  486. AV_PIX_FMT_YUV444P10,
  487. AV_PIX_FMT_YUV444P12,
  488. AV_PIX_FMT_NONE
  489. };
  490. static const AVClass class = {
  491. .class_name = "librav1e",
  492. .item_name = av_default_item_name,
  493. .option = options,
  494. .version = LIBAVUTIL_VERSION_INT,
  495. };
  496. AVCodec ff_librav1e_encoder = {
  497. .name = "librav1e",
  498. .long_name = NULL_IF_CONFIG_SMALL("librav1e AV1"),
  499. .type = AVMEDIA_TYPE_VIDEO,
  500. .id = AV_CODEC_ID_AV1,
  501. .init = librav1e_encode_init,
  502. .send_frame = librav1e_send_frame,
  503. .receive_packet = librav1e_receive_packet,
  504. .close = librav1e_encode_close,
  505. .priv_data_size = sizeof(librav1eContext),
  506. .priv_class = &class,
  507. .defaults = librav1e_defaults,
  508. .pix_fmts = librav1e_pix_fmts,
  509. .capabilities = AV_CODEC_CAP_DELAY | AV_CODEC_CAP_AUTO_THREADS,
  510. .caps_internal = FF_CODEC_CAP_INIT_CLEANUP,
  511. .wrapper_name = "librav1e",
  512. };