You can not select more than 25 topics Topics must start with a letter or number, can include dashes ('-') and can be up to 35 characters long.

525 lines
17KB

  1. /*
  2. * This file is part of FFmpeg.
  3. *
  4. * FFmpeg is free software; you can redistribute it and/or
  5. * modify it under the terms of the GNU Lesser General Public
  6. * License as published by the Free Software Foundation; either
  7. * version 2.1 of the License, or (at your option) any later version.
  8. *
  9. * FFmpeg is distributed in the hope that it will be useful,
  10. * but WITHOUT ANY WARRANTY; without even the implied warranty of
  11. * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
  12. * Lesser General Public License for more details.
  13. *
  14. * You should have received a copy of the GNU Lesser General Public
  15. * License along with FFmpeg; if not, write to the Free Software
  16. * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
  17. */
  18. #include "libavutil/avstring.h"
  19. #include "libavutil/common.h"
  20. #include "libavutil/opt.h"
  21. #include "bsf.h"
  22. #include "cbs.h"
  23. #include "cbs_h264.h"
  24. #include "h264.h"
  25. #include "h264_sei.h"
  26. enum {
  27. PASS,
  28. INSERT,
  29. REMOVE,
  30. };
  31. typedef struct H264MetadataContext {
  32. const AVClass *class;
  33. CodedBitstreamContext *cbc;
  34. CodedBitstreamFragment access_unit;
  35. H264RawAUD aud_nal;
  36. H264RawSEI sei_nal;
  37. int aud;
  38. AVRational sample_aspect_ratio;
  39. int video_format;
  40. int video_full_range_flag;
  41. int colour_primaries;
  42. int transfer_characteristics;
  43. int matrix_coefficients;
  44. int chroma_sample_loc_type;
  45. AVRational tick_rate;
  46. int fixed_frame_rate_flag;
  47. int crop_left;
  48. int crop_right;
  49. int crop_top;
  50. int crop_bottom;
  51. const char *sei_user_data;
  52. int sei_first_au;
  53. } H264MetadataContext;
  54. static int h264_metadata_update_sps(AVBSFContext *bsf,
  55. H264RawSPS *sps)
  56. {
  57. H264MetadataContext *ctx = bsf->priv_data;
  58. int need_vui = 0;
  59. int crop_unit_x, crop_unit_y;
  60. if (ctx->sample_aspect_ratio.num && ctx->sample_aspect_ratio.den) {
  61. // Table E-1.
  62. static const AVRational sar_idc[] = {
  63. { 0, 0 }, // Unspecified (never written here).
  64. { 1, 1 }, { 12, 11 }, { 10, 11 }, { 16, 11 },
  65. { 40, 33 }, { 24, 11 }, { 20, 11 }, { 32, 11 },
  66. { 80, 33 }, { 18, 11 }, { 15, 11 }, { 64, 33 },
  67. { 160, 99 }, { 4, 3 }, { 3, 2 }, { 2, 1 },
  68. };
  69. int num, den, i;
  70. av_reduce(&num, &den, ctx->sample_aspect_ratio.num,
  71. ctx->sample_aspect_ratio.den, 65535);
  72. for (i = 1; i < FF_ARRAY_ELEMS(sar_idc); i++) {
  73. if (num == sar_idc[i].num &&
  74. den == sar_idc[i].den)
  75. break;
  76. }
  77. if (i == FF_ARRAY_ELEMS(sar_idc)) {
  78. sps->vui.aspect_ratio_idc = 255;
  79. sps->vui.sar_width = num;
  80. sps->vui.sar_height = den;
  81. } else {
  82. sps->vui.aspect_ratio_idc = i;
  83. }
  84. sps->vui.aspect_ratio_info_present_flag = 1;
  85. need_vui = 1;
  86. }
  87. #define SET_OR_INFER(field, value, present_flag, infer) do { \
  88. if (value >= 0) { \
  89. field = value; \
  90. need_vui = 1; \
  91. } else if (!present_flag) \
  92. field = infer; \
  93. } while (0)
  94. if (ctx->video_format >= 0 ||
  95. ctx->video_full_range_flag >= 0 ||
  96. ctx->colour_primaries >= 0 ||
  97. ctx->transfer_characteristics >= 0 ||
  98. ctx->matrix_coefficients >= 0) {
  99. SET_OR_INFER(sps->vui.video_format, ctx->video_format,
  100. sps->vui.video_signal_type_present_flag, 5);
  101. SET_OR_INFER(sps->vui.video_full_range_flag,
  102. ctx->video_full_range_flag,
  103. sps->vui.video_signal_type_present_flag, 0);
  104. if (ctx->colour_primaries >= 0 ||
  105. ctx->transfer_characteristics >= 0 ||
  106. ctx->matrix_coefficients >= 0) {
  107. SET_OR_INFER(sps->vui.colour_primaries,
  108. ctx->colour_primaries,
  109. sps->vui.colour_description_present_flag, 2);
  110. SET_OR_INFER(sps->vui.transfer_characteristics,
  111. ctx->transfer_characteristics,
  112. sps->vui.colour_description_present_flag, 2);
  113. SET_OR_INFER(sps->vui.matrix_coefficients,
  114. ctx->matrix_coefficients,
  115. sps->vui.colour_description_present_flag, 2);
  116. sps->vui.colour_description_present_flag = 1;
  117. }
  118. sps->vui.video_signal_type_present_flag = 1;
  119. need_vui = 1;
  120. }
  121. if (ctx->chroma_sample_loc_type >= 0) {
  122. sps->vui.chroma_sample_loc_type_top_field =
  123. ctx->chroma_sample_loc_type;
  124. sps->vui.chroma_sample_loc_type_bottom_field =
  125. ctx->chroma_sample_loc_type;
  126. sps->vui.chroma_loc_info_present_flag = 1;
  127. need_vui = 1;
  128. }
  129. if (ctx->tick_rate.num && ctx->tick_rate.den) {
  130. int num, den;
  131. av_reduce(&num, &den, ctx->tick_rate.num, ctx->tick_rate.den,
  132. UINT32_MAX > INT_MAX ? UINT32_MAX : INT_MAX);
  133. sps->vui.time_scale = num;
  134. sps->vui.num_units_in_tick = den;
  135. sps->vui.timing_info_present_flag = 1;
  136. need_vui = 1;
  137. }
  138. SET_OR_INFER(sps->vui.fixed_frame_rate_flag,
  139. ctx->fixed_frame_rate_flag,
  140. sps->vui.timing_info_present_flag, 0);
  141. if (sps->separate_colour_plane_flag || sps->chroma_format_idc == 0) {
  142. crop_unit_x = 1;
  143. crop_unit_y = 2 - sps->frame_mbs_only_flag;
  144. } else {
  145. crop_unit_x = 1 + (sps->chroma_format_idc < 3);
  146. crop_unit_y = (1 + (sps->chroma_format_idc < 2)) *
  147. (2 - sps->frame_mbs_only_flag);
  148. }
  149. #define CROP(border, unit) do { \
  150. if (ctx->crop_ ## border >= 0) { \
  151. if (ctx->crop_ ## border % unit != 0) { \
  152. av_log(bsf, AV_LOG_ERROR, "Invalid value for crop_%s: " \
  153. "must be a multiple of %d.\n", #border, unit); \
  154. return AVERROR(EINVAL); \
  155. } \
  156. sps->frame_crop_ ## border ## _offset = \
  157. ctx->crop_ ## border / unit; \
  158. sps->frame_cropping_flag = 1; \
  159. } \
  160. } while (0)
  161. CROP(left, crop_unit_x);
  162. CROP(right, crop_unit_x);
  163. CROP(top, crop_unit_y);
  164. CROP(bottom, crop_unit_y);
  165. #undef CROP
  166. if (need_vui)
  167. sps->vui_parameters_present_flag = 1;
  168. return 0;
  169. }
  170. static int h264_metadata_filter(AVBSFContext *bsf, AVPacket *out)
  171. {
  172. H264MetadataContext *ctx = bsf->priv_data;
  173. AVPacket *in = NULL;
  174. CodedBitstreamFragment *au = &ctx->access_unit;
  175. int err, i, j, has_sps;
  176. char *sei_udu_string = NULL;
  177. err = ff_bsf_get_packet(bsf, &in);
  178. if (err < 0)
  179. goto fail;
  180. err = ff_cbs_read_packet(ctx->cbc, au, in);
  181. if (err < 0) {
  182. av_log(bsf, AV_LOG_ERROR, "Failed to read packet.\n");
  183. goto fail;
  184. }
  185. if (au->nb_units == 0) {
  186. av_log(bsf, AV_LOG_ERROR, "No NAL units in packet.\n");
  187. err = AVERROR_INVALIDDATA;
  188. goto fail;
  189. }
  190. // If an AUD is present, it must be the first NAL unit.
  191. if (au->units[0].type == H264_NAL_AUD) {
  192. if (ctx->aud == REMOVE)
  193. ff_cbs_delete_unit(ctx->cbc, au, 0);
  194. } else {
  195. if (ctx->aud == INSERT) {
  196. static const int primary_pic_type_table[] = {
  197. 0x084, // 2, 7
  198. 0x0a5, // 0, 2, 5, 7
  199. 0x0e7, // 0, 1, 2, 5, 6, 7
  200. 0x210, // 4, 9
  201. 0x318, // 3, 4, 8, 9
  202. 0x294, // 2, 4, 7, 9
  203. 0x3bd, // 0, 2, 3, 4, 5, 7, 8, 9
  204. 0x3ff, // 0, 1, 2, 3, 4, 5, 6, 7, 8, 9
  205. };
  206. int primary_pic_type_mask = 0xff;
  207. H264RawAUD *aud = &ctx->aud_nal;
  208. for (i = 0; i < au->nb_units; i++) {
  209. if (au->units[i].type == H264_NAL_SLICE ||
  210. au->units[i].type == H264_NAL_IDR_SLICE) {
  211. H264RawSlice *slice = au->units[i].content;
  212. for (j = 0; j < FF_ARRAY_ELEMS(primary_pic_type_table); j++) {
  213. if (!(primary_pic_type_table[j] &
  214. (1 << slice->header.slice_type)))
  215. primary_pic_type_mask &= ~(1 << j);
  216. }
  217. }
  218. }
  219. for (j = 0; j < FF_ARRAY_ELEMS(primary_pic_type_table); j++)
  220. if (primary_pic_type_mask & (1 << j))
  221. break;
  222. if (j >= FF_ARRAY_ELEMS(primary_pic_type_table)) {
  223. av_log(bsf, AV_LOG_ERROR, "No usable primary_pic_type: "
  224. "invalid slice types?\n");
  225. err = AVERROR_INVALIDDATA;
  226. goto fail;
  227. }
  228. aud->nal_unit_header.nal_unit_type = H264_NAL_AUD;
  229. aud->primary_pic_type = j;
  230. err = ff_cbs_insert_unit_content(ctx->cbc, au,
  231. 0, H264_NAL_AUD, aud);
  232. if (err < 0) {
  233. av_log(bsf, AV_LOG_ERROR, "Failed to insert AUD.\n");
  234. goto fail;
  235. }
  236. }
  237. }
  238. has_sps = 0;
  239. for (i = 0; i < au->nb_units; i++) {
  240. if (au->units[i].type == H264_NAL_SPS) {
  241. err = h264_metadata_update_sps(bsf, au->units[i].content);
  242. if (err < 0)
  243. goto fail;
  244. has_sps = 1;
  245. }
  246. }
  247. // Insert the SEI in access units containing SPSs, and also
  248. // unconditionally in the first access unit we ever see.
  249. if (ctx->sei_user_data && (has_sps || !ctx->sei_first_au)) {
  250. H264RawSEI *sei;
  251. H264RawSEIPayload *payload;
  252. H264RawSEIUserDataUnregistered *udu;
  253. int sei_pos, sei_new;
  254. ctx->sei_first_au = 1;
  255. for (i = 0; i < au->nb_units; i++) {
  256. if (au->units[i].type == H264_NAL_SEI ||
  257. au->units[i].type == H264_NAL_SLICE ||
  258. au->units[i].type == H264_NAL_IDR_SLICE)
  259. break;
  260. }
  261. sei_pos = i;
  262. if (sei_pos < au->nb_units &&
  263. au->units[sei_pos].type == H264_NAL_SEI) {
  264. sei_new = 0;
  265. sei = au->units[sei_pos].content;
  266. } else {
  267. sei_new = 1;
  268. sei = &ctx->sei_nal;
  269. memset(sei, 0, sizeof(*sei));
  270. sei->nal_unit_header.nal_unit_type = H264_NAL_SEI;
  271. err = ff_cbs_insert_unit_content(ctx->cbc, au,
  272. sei_pos, H264_NAL_SEI, sei);
  273. if (err < 0) {
  274. av_log(bsf, AV_LOG_ERROR, "Failed to insert SEI.\n");
  275. goto fail;
  276. }
  277. }
  278. payload = &sei->payload[sei->payload_count];
  279. payload->payload_type = H264_SEI_TYPE_USER_DATA_UNREGISTERED;
  280. udu = &payload->payload.user_data_unregistered;
  281. for (i = j = 0; j < 32 && ctx->sei_user_data[i]; i++) {
  282. int c, v;
  283. c = ctx->sei_user_data[i];
  284. if (c == '-') {
  285. continue;
  286. } else if (av_isxdigit(c)) {
  287. c = av_tolower(c);
  288. v = (c <= '9' ? c - '0' : c - 'a' + 10);
  289. } else {
  290. goto invalid_user_data;
  291. }
  292. if (i & 1)
  293. udu->uuid_iso_iec_11578[j / 2] |= v;
  294. else
  295. udu->uuid_iso_iec_11578[j / 2] = v << 4;
  296. ++j;
  297. }
  298. if (j == 32 && ctx->sei_user_data[i] == '+') {
  299. sei_udu_string = av_strdup(ctx->sei_user_data + i + 1);
  300. if (!sei_udu_string) {
  301. err = AVERROR(ENOMEM);
  302. goto sei_fail;
  303. }
  304. udu->data = sei_udu_string;
  305. udu->data_length = strlen(sei_udu_string);
  306. payload->payload_size = 16 + udu->data_length;
  307. if (!sei_new) {
  308. // This will be freed by the existing internal
  309. // reference in fragment_uninit().
  310. sei_udu_string = NULL;
  311. }
  312. } else {
  313. invalid_user_data:
  314. av_log(bsf, AV_LOG_ERROR, "Invalid user data: "
  315. "must be \"UUID+string\".\n");
  316. err = AVERROR(EINVAL);
  317. sei_fail:
  318. memset(payload, 0, sizeof(*payload));
  319. goto fail;
  320. }
  321. ++sei->payload_count;
  322. }
  323. err = ff_cbs_write_packet(ctx->cbc, out, au);
  324. if (err < 0) {
  325. av_log(bsf, AV_LOG_ERROR, "Failed to write packet.\n");
  326. goto fail;
  327. }
  328. err = av_packet_copy_props(out, in);
  329. if (err < 0)
  330. goto fail;
  331. err = 0;
  332. fail:
  333. ff_cbs_fragment_uninit(ctx->cbc, au);
  334. av_freep(&sei_udu_string);
  335. av_packet_free(&in);
  336. return err;
  337. }
  338. static int h264_metadata_init(AVBSFContext *bsf)
  339. {
  340. H264MetadataContext *ctx = bsf->priv_data;
  341. CodedBitstreamFragment *au = &ctx->access_unit;
  342. int err, i;
  343. err = ff_cbs_init(&ctx->cbc, AV_CODEC_ID_H264, bsf);
  344. if (err < 0)
  345. return err;
  346. if (bsf->par_in->extradata) {
  347. err = ff_cbs_read_extradata(ctx->cbc, au, bsf->par_in);
  348. if (err < 0) {
  349. av_log(bsf, AV_LOG_ERROR, "Failed to read extradata.\n");
  350. goto fail;
  351. }
  352. for (i = 0; i < au->nb_units; i++) {
  353. if (au->units[i].type == H264_NAL_SPS) {
  354. err = h264_metadata_update_sps(bsf, au->units[i].content);
  355. if (err < 0)
  356. goto fail;
  357. }
  358. }
  359. err = ff_cbs_write_extradata(ctx->cbc, bsf->par_out, au);
  360. if (err < 0) {
  361. av_log(bsf, AV_LOG_ERROR, "Failed to write extradata.\n");
  362. goto fail;
  363. }
  364. }
  365. err = 0;
  366. fail:
  367. ff_cbs_fragment_uninit(ctx->cbc, au);
  368. return err;
  369. }
  370. static void h264_metadata_close(AVBSFContext *bsf)
  371. {
  372. H264MetadataContext *ctx = bsf->priv_data;
  373. ff_cbs_close(&ctx->cbc);
  374. }
  375. #define OFFSET(x) offsetof(H264MetadataContext, x)
  376. static const AVOption h264_metadata_options[] = {
  377. { "aud", "Access Unit Delimiter NAL units",
  378. OFFSET(aud), AV_OPT_TYPE_INT,
  379. { .i64 = PASS }, PASS, REMOVE, 0, "aud" },
  380. { "pass", NULL, 0, AV_OPT_TYPE_CONST, { .i64 = PASS }, .unit = "aud" },
  381. { "insert", NULL, 0, AV_OPT_TYPE_CONST, { .i64 = INSERT }, .unit = "aud" },
  382. { "remove", NULL, 0, AV_OPT_TYPE_CONST, { .i64 = REMOVE }, .unit = "aud" },
  383. { "sample_aspect_ratio", "Set sample aspect ratio (table E-1)",
  384. OFFSET(sample_aspect_ratio), AV_OPT_TYPE_RATIONAL,
  385. { .dbl = 0.0 }, 0, 65535 },
  386. { "video_format", "Set video format (table E-2)",
  387. OFFSET(video_format), AV_OPT_TYPE_INT,
  388. { .i64 = -1 }, -1, 7 },
  389. { "video_full_range_flag", "Set video full range flag",
  390. OFFSET(video_full_range_flag), AV_OPT_TYPE_INT,
  391. { .i64 = -1 }, -1, 1 },
  392. { "colour_primaries", "Set colour primaries (table E-3)",
  393. OFFSET(colour_primaries), AV_OPT_TYPE_INT,
  394. { .i64 = -1 }, -1, 255 },
  395. { "transfer_characteristics", "Set transfer characteristics (table E-4)",
  396. OFFSET(transfer_characteristics), AV_OPT_TYPE_INT,
  397. { .i64 = -1 }, -1, 255 },
  398. { "matrix_coefficients", "Set matrix coefficients (table E-5)",
  399. OFFSET(matrix_coefficients), AV_OPT_TYPE_INT,
  400. { .i64 = -1 }, -1, 255 },
  401. { "chroma_sample_loc_type", "Set chroma sample location type (figure E-1)",
  402. OFFSET(chroma_sample_loc_type), AV_OPT_TYPE_INT,
  403. { .i64 = -1 }, -1, 6 },
  404. { "tick_rate", "Set VUI tick rate (num_units_in_tick / time_scale)",
  405. OFFSET(tick_rate), AV_OPT_TYPE_RATIONAL,
  406. { .dbl = 0.0 }, 0, UINT_MAX },
  407. { "fixed_frame_rate_flag", "Set VUI fixed frame rate flag",
  408. OFFSET(fixed_frame_rate_flag), AV_OPT_TYPE_INT,
  409. { .i64 = -1 }, -1, 1 },
  410. { "crop_left", "Set left border crop offset",
  411. OFFSET(crop_left), AV_OPT_TYPE_INT,
  412. { .i64 = -1 }, -1, H264_MAX_WIDTH },
  413. { "crop_right", "Set right border crop offset",
  414. OFFSET(crop_right), AV_OPT_TYPE_INT,
  415. { .i64 = -1 }, -1, H264_MAX_WIDTH },
  416. { "crop_top", "Set top border crop offset",
  417. OFFSET(crop_top), AV_OPT_TYPE_INT,
  418. { .i64 = -1 }, -1, H264_MAX_HEIGHT },
  419. { "crop_bottom", "Set bottom border crop offset",
  420. OFFSET(crop_bottom), AV_OPT_TYPE_INT,
  421. { .i64 = -1 }, -1, H264_MAX_HEIGHT },
  422. { "sei_user_data", "Insert SEI user data (UUID+string)",
  423. OFFSET(sei_user_data), AV_OPT_TYPE_STRING, { .str = NULL } },
  424. { NULL }
  425. };
  426. static const AVClass h264_metadata_class = {
  427. .class_name = "h264_metadata_bsf",
  428. .item_name = av_default_item_name,
  429. .option = h264_metadata_options,
  430. .version = LIBAVCODEC_VERSION_MAJOR,
  431. };
  432. static const enum AVCodecID h264_metadata_codec_ids[] = {
  433. AV_CODEC_ID_H264, AV_CODEC_ID_NONE,
  434. };
  435. const AVBitStreamFilter ff_h264_metadata_bsf = {
  436. .name = "h264_metadata",
  437. .priv_data_size = sizeof(H264MetadataContext),
  438. .priv_class = &h264_metadata_class,
  439. .init = &h264_metadata_init,
  440. .close = &h264_metadata_close,
  441. .filter = &h264_metadata_filter,
  442. .codec_ids = h264_metadata_codec_ids,
  443. };