You can not select more than 25 topics Topics must start with a letter or number, can include dashes ('-') and can be up to 35 characters long.

521 lines
17KB

  1. /*
  2. * This file is part of Libav.
  3. *
  4. * Libav is free software; you can redistribute it and/or
  5. * modify it under the terms of the GNU Lesser General Public
  6. * License as published by the Free Software Foundation; either
  7. * version 2.1 of the License, or (at your option) any later version.
  8. *
  9. * Libav is distributed in the hope that it will be useful,
  10. * but WITHOUT ANY WARRANTY; without even the implied warranty of
  11. * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
  12. * Lesser General Public License for more details.
  13. *
  14. * You should have received a copy of the GNU Lesser General Public
  15. * License along with Libav; if not, write to the Free Software
  16. * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
  17. */
  18. #include "libavutil/avstring.h"
  19. #include "libavutil/common.h"
  20. #include "libavutil/opt.h"
  21. #include "bsf.h"
  22. #include "cbs.h"
  23. #include "cbs_h264.h"
  24. #include "h264.h"
  25. #include "h264_sei.h"
  26. enum {
  27. PASS,
  28. INSERT,
  29. REMOVE,
  30. };
  31. typedef struct H264MetadataContext {
  32. const AVClass *class;
  33. CodedBitstreamContext cbc;
  34. CodedBitstreamFragment access_unit;
  35. H264RawAUD aud_nal;
  36. H264RawSEI sei_nal;
  37. int aud;
  38. AVRational sample_aspect_ratio;
  39. int video_format;
  40. int video_full_range_flag;
  41. int colour_primaries;
  42. int transfer_characteristics;
  43. int matrix_coefficients;
  44. int chroma_sample_loc_type;
  45. AVRational tick_rate;
  46. int fixed_frame_rate_flag;
  47. int crop_left;
  48. int crop_right;
  49. int crop_top;
  50. int crop_bottom;
  51. const char *sei_user_data;
  52. } H264MetadataContext;
  53. static int h264_metadata_update_sps(AVBSFContext *bsf,
  54. H264RawSPS *sps)
  55. {
  56. H264MetadataContext *ctx = bsf->priv_data;
  57. int need_vui = 0;
  58. int crop_unit_x, crop_unit_y;
  59. if (ctx->sample_aspect_ratio.num && ctx->sample_aspect_ratio.den) {
  60. // Table E-1.
  61. static const AVRational sar_idc[] = {
  62. { 0, 0 }, // Unspecified (never written here).
  63. { 1, 1 }, { 12, 11 }, { 10, 11 }, { 16, 11 },
  64. { 40, 33 }, { 24, 11 }, { 20, 11 }, { 32, 11 },
  65. { 80, 33 }, { 18, 11 }, { 15, 11 }, { 64, 33 },
  66. { 160, 99 }, { 4, 3 }, { 3, 2 }, { 2, 1 },
  67. };
  68. int num, den, i;
  69. av_reduce(&num, &den, ctx->sample_aspect_ratio.num,
  70. ctx->sample_aspect_ratio.den, 65535);
  71. for (i = 1; i < FF_ARRAY_ELEMS(sar_idc); i++) {
  72. if (num == sar_idc[i].num &&
  73. den == sar_idc[i].den)
  74. break;
  75. }
  76. if (i == FF_ARRAY_ELEMS(sar_idc)) {
  77. sps->vui.aspect_ratio_idc = 255;
  78. sps->vui.sar_width = num;
  79. sps->vui.sar_height = den;
  80. } else {
  81. sps->vui.aspect_ratio_idc = i;
  82. }
  83. sps->vui.aspect_ratio_info_present_flag = 1;
  84. need_vui = 1;
  85. }
  86. #define SET_OR_INFER(field, value, present_flag, infer) do { \
  87. if (value >= 0) { \
  88. field = value; \
  89. need_vui = 1; \
  90. } else if (!present_flag) \
  91. field = infer; \
  92. } while (0)
  93. if (ctx->video_format >= 0 ||
  94. ctx->video_full_range_flag >= 0 ||
  95. ctx->colour_primaries >= 0 ||
  96. ctx->transfer_characteristics >= 0 ||
  97. ctx->matrix_coefficients >= 0) {
  98. SET_OR_INFER(sps->vui.video_format, ctx->video_format,
  99. sps->vui.video_signal_type_present_flag, 5);
  100. SET_OR_INFER(sps->vui.video_full_range_flag,
  101. ctx->video_full_range_flag,
  102. sps->vui.video_signal_type_present_flag, 0);
  103. if (ctx->colour_primaries >= 0 ||
  104. ctx->transfer_characteristics >= 0 ||
  105. ctx->matrix_coefficients >= 0) {
  106. SET_OR_INFER(sps->vui.colour_primaries,
  107. ctx->colour_primaries,
  108. sps->vui.colour_description_present_flag, 2);
  109. SET_OR_INFER(sps->vui.transfer_characteristics,
  110. ctx->transfer_characteristics,
  111. sps->vui.colour_description_present_flag, 2);
  112. SET_OR_INFER(sps->vui.matrix_coefficients,
  113. ctx->matrix_coefficients,
  114. sps->vui.colour_description_present_flag, 2);
  115. sps->vui.colour_description_present_flag = 1;
  116. }
  117. sps->vui.video_signal_type_present_flag = 1;
  118. need_vui = 1;
  119. }
  120. if (ctx->chroma_sample_loc_type >= 0) {
  121. sps->vui.chroma_sample_loc_type_top_field =
  122. ctx->chroma_sample_loc_type;
  123. sps->vui.chroma_sample_loc_type_bottom_field =
  124. ctx->chroma_sample_loc_type;
  125. sps->vui.chroma_loc_info_present_flag = 1;
  126. need_vui = 1;
  127. }
  128. if (ctx->tick_rate.num && ctx->tick_rate.den) {
  129. int num, den;
  130. av_reduce(&num, &den, ctx->tick_rate.num, ctx->tick_rate.den,
  131. UINT32_MAX > INT_MAX ? UINT32_MAX : INT_MAX);
  132. sps->vui.time_scale = num;
  133. sps->vui.num_units_in_tick = den;
  134. sps->vui.timing_info_present_flag = 1;
  135. need_vui = 1;
  136. }
  137. SET_OR_INFER(sps->vui.fixed_frame_rate_flag,
  138. ctx->fixed_frame_rate_flag,
  139. sps->vui.timing_info_present_flag, 0);
  140. if (sps->separate_colour_plane_flag || sps->chroma_format_idc == 0) {
  141. crop_unit_x = 1;
  142. crop_unit_y = 2 - sps->frame_mbs_only_flag;
  143. } else {
  144. crop_unit_x = 1 + (sps->chroma_format_idc < 3);
  145. crop_unit_y = (1 + (sps->chroma_format_idc < 2)) *
  146. (2 - sps->frame_mbs_only_flag);
  147. }
  148. #define CROP(border, unit) do { \
  149. if (ctx->crop_ ## border >= 0) { \
  150. if (ctx->crop_ ## border % unit != 0) { \
  151. av_log(bsf, AV_LOG_ERROR, "Invalid value for crop_%s: " \
  152. "must be a multiple of %d.\n", #border, unit); \
  153. return AVERROR(EINVAL); \
  154. } \
  155. sps->frame_crop_ ## border ## _offset = \
  156. ctx->crop_ ## border / unit; \
  157. sps->frame_cropping_flag = 1; \
  158. } \
  159. } while (0)
  160. CROP(left, crop_unit_x);
  161. CROP(right, crop_unit_x);
  162. CROP(top, crop_unit_y);
  163. CROP(bottom, crop_unit_y);
  164. #undef CROP
  165. if (need_vui)
  166. sps->vui_parameters_present_flag = 1;
  167. return 0;
  168. }
  169. static int h264_metadata_filter(AVBSFContext *bsf, AVPacket *out)
  170. {
  171. H264MetadataContext *ctx = bsf->priv_data;
  172. AVPacket *in = NULL;
  173. CodedBitstreamFragment *au = &ctx->access_unit;
  174. int err, i, j, has_sps;
  175. char *sei_udu_string = NULL;
  176. err = ff_bsf_get_packet(bsf, &in);
  177. if (err < 0)
  178. goto fail;
  179. err = ff_cbs_read_packet(&ctx->cbc, au, in);
  180. if (err < 0) {
  181. av_log(bsf, AV_LOG_ERROR, "Failed to read packet.\n");
  182. goto fail;
  183. }
  184. if (au->nb_units == 0) {
  185. av_log(bsf, AV_LOG_ERROR, "No NAL units in packet.\n");
  186. err = AVERROR_INVALIDDATA;
  187. goto fail;
  188. }
  189. // If an AUD is present, it must be the first NAL unit.
  190. if (au->units[0].type == H264_NAL_AUD) {
  191. if (ctx->aud == REMOVE)
  192. ff_cbs_delete_unit(&ctx->cbc, au, 0);
  193. } else {
  194. if (ctx->aud == INSERT) {
  195. static const int primary_pic_type_table[] = {
  196. 0x084, // 2, 7
  197. 0x0a5, // 0, 2, 5, 7
  198. 0x0e7, // 0, 1, 2, 5, 6, 7
  199. 0x210, // 4, 9
  200. 0x318, // 3, 4, 8, 9
  201. 0x294, // 2, 4, 7, 9
  202. 0x3bd, // 0, 2, 3, 4, 5, 7, 8, 9
  203. 0x3ff, // 0, 1, 2, 3, 4, 5, 6, 7, 8, 9
  204. };
  205. int primary_pic_type_mask = 0xff;
  206. H264RawAUD *aud = &ctx->aud_nal;
  207. for (i = 0; i < au->nb_units; i++) {
  208. if (au->units[i].type == H264_NAL_SLICE ||
  209. au->units[i].type == H264_NAL_IDR_SLICE) {
  210. H264RawSlice *slice = au->units[i].content;
  211. for (j = 0; j < FF_ARRAY_ELEMS(primary_pic_type_table); j++) {
  212. if (!(primary_pic_type_table[j] &
  213. (1 << slice->header.slice_type)))
  214. primary_pic_type_mask &= ~(1 << j);
  215. }
  216. }
  217. }
  218. for (j = 0; j < FF_ARRAY_ELEMS(primary_pic_type_table); j++)
  219. if (primary_pic_type_mask & (1 << j))
  220. break;
  221. if (j >= FF_ARRAY_ELEMS(primary_pic_type_table)) {
  222. av_log(bsf, AV_LOG_ERROR, "No usable primary_pic_type: "
  223. "invalid slice types?\n");
  224. err = AVERROR_INVALIDDATA;
  225. goto fail;
  226. }
  227. aud->nal_unit_header.nal_unit_type = H264_NAL_AUD;
  228. aud->primary_pic_type = j;
  229. err = ff_cbs_insert_unit_content(&ctx->cbc, au,
  230. 0, H264_NAL_AUD, aud);
  231. if (err < 0) {
  232. av_log(bsf, AV_LOG_ERROR, "Failed to insert AUD.\n");
  233. goto fail;
  234. }
  235. }
  236. }
  237. has_sps = 0;
  238. for (i = 0; i < au->nb_units; i++) {
  239. if (au->units[i].type == H264_NAL_SPS) {
  240. err = h264_metadata_update_sps(bsf, au->units[i].content);
  241. if (err < 0)
  242. goto fail;
  243. has_sps = 1;
  244. }
  245. }
  246. // Only insert the SEI in access units containing SPSs.
  247. if (has_sps && ctx->sei_user_data) {
  248. H264RawSEI *sei;
  249. H264RawSEIPayload *payload;
  250. H264RawSEIUserDataUnregistered *udu;
  251. int sei_pos, sei_new;
  252. for (i = 0; i < au->nb_units; i++) {
  253. if (au->units[i].type == H264_NAL_SEI ||
  254. au->units[i].type == H264_NAL_SLICE ||
  255. au->units[i].type == H264_NAL_IDR_SLICE)
  256. break;
  257. }
  258. sei_pos = i;
  259. if (sei_pos < au->nb_units &&
  260. au->units[sei_pos].type == H264_NAL_SEI) {
  261. sei_new = 0;
  262. sei = au->units[sei_pos].content;
  263. } else {
  264. sei_new = 1;
  265. sei = &ctx->sei_nal;
  266. memset(sei, 0, sizeof(*sei));
  267. sei->nal_unit_header.nal_unit_type = H264_NAL_SEI;
  268. err = ff_cbs_insert_unit_content(&ctx->cbc, au,
  269. sei_pos, H264_NAL_SEI, sei);
  270. if (err < 0) {
  271. av_log(bsf, AV_LOG_ERROR, "Failed to insert SEI.\n");
  272. goto fail;
  273. }
  274. }
  275. payload = &sei->payload[sei->payload_count];
  276. payload->payload_type = H264_SEI_TYPE_USER_DATA_UNREGISTERED;
  277. udu = &payload->payload.user_data_unregistered;
  278. for (i = j = 0; j < 32 && ctx->sei_user_data[i]; i++) {
  279. int c, v;
  280. c = ctx->sei_user_data[i];
  281. if (c == '-') {
  282. continue;
  283. } else if (av_isxdigit(c)) {
  284. c = av_tolower(c);
  285. v = (c <= '9' ? c - '0' : c - 'a' + 10);
  286. } else {
  287. goto invalid_user_data;
  288. }
  289. if (i & 1)
  290. udu->uuid_iso_iec_11578[j / 2] |= v;
  291. else
  292. udu->uuid_iso_iec_11578[j / 2] = v << 4;
  293. ++j;
  294. }
  295. if (j == 32 && ctx->sei_user_data[i] == '+') {
  296. sei_udu_string = av_strdup(ctx->sei_user_data + i + 1);
  297. if (!sei_udu_string) {
  298. err = AVERROR(ENOMEM);
  299. goto sei_fail;
  300. }
  301. udu->data = sei_udu_string;
  302. udu->data_length = strlen(sei_udu_string);
  303. payload->payload_size = 16 + udu->data_length;
  304. if (!sei_new) {
  305. // This will be freed by the existing internal
  306. // reference in fragment_uninit().
  307. sei_udu_string = NULL;
  308. }
  309. } else {
  310. invalid_user_data:
  311. av_log(bsf, AV_LOG_ERROR, "Invalid user data: "
  312. "must be \"UUID+string\".\n");
  313. err = AVERROR(EINVAL);
  314. sei_fail:
  315. memset(payload, 0, sizeof(&payload));
  316. goto fail;
  317. }
  318. ++sei->payload_count;
  319. }
  320. err = ff_cbs_write_packet(&ctx->cbc, out, au);
  321. if (err < 0) {
  322. av_log(bsf, AV_LOG_ERROR, "Failed to write packet.\n");
  323. goto fail;
  324. }
  325. err = av_packet_copy_props(out, in);
  326. if (err < 0)
  327. goto fail;
  328. err = 0;
  329. fail:
  330. ff_cbs_fragment_uninit(&ctx->cbc, au);
  331. av_freep(&sei_udu_string);
  332. av_packet_free(&in);
  333. return err;
  334. }
  335. static int h264_metadata_init(AVBSFContext *bsf)
  336. {
  337. H264MetadataContext *ctx = bsf->priv_data;
  338. CodedBitstreamFragment *au = &ctx->access_unit;
  339. int err, i;
  340. err = ff_cbs_init(&ctx->cbc, AV_CODEC_ID_H264, bsf);
  341. if (err < 0)
  342. return err;
  343. if (bsf->par_in->extradata) {
  344. err = ff_cbs_read_extradata(&ctx->cbc, au, bsf->par_in);
  345. if (err < 0) {
  346. av_log(bsf, AV_LOG_ERROR, "Failed to read extradata.\n");
  347. goto fail;
  348. }
  349. for (i = 0; i < au->nb_units; i++) {
  350. if (au->units[i].type == H264_NAL_SPS) {
  351. err = h264_metadata_update_sps(bsf, au->units[i].content);
  352. if (err < 0)
  353. goto fail;
  354. }
  355. }
  356. err = ff_cbs_write_extradata(&ctx->cbc, bsf->par_out, au);
  357. if (err < 0) {
  358. av_log(bsf, AV_LOG_ERROR, "Failed to write extradata.\n");
  359. goto fail;
  360. }
  361. }
  362. err = 0;
  363. fail:
  364. ff_cbs_fragment_uninit(&ctx->cbc, au);
  365. return err;
  366. }
  367. static void h264_metadata_close(AVBSFContext *bsf)
  368. {
  369. H264MetadataContext *ctx = bsf->priv_data;
  370. ff_cbs_close(&ctx->cbc);
  371. }
  372. #define OFFSET(x) offsetof(H264MetadataContext, x)
  373. static const AVOption h264_metadata_options[] = {
  374. { "aud", "Access Unit Delimiter NAL units",
  375. OFFSET(aud), AV_OPT_TYPE_INT,
  376. { .i64 = PASS }, PASS, REMOVE, 0, "aud" },
  377. { "pass", NULL, 0, AV_OPT_TYPE_CONST, { .i64 = PASS }, .unit = "aud" },
  378. { "insert", NULL, 0, AV_OPT_TYPE_CONST, { .i64 = INSERT }, .unit = "aud" },
  379. { "remove", NULL, 0, AV_OPT_TYPE_CONST, { .i64 = REMOVE }, .unit = "aud" },
  380. { "sample_aspect_ratio", "Set sample aspect ratio (table E-1)",
  381. OFFSET(sample_aspect_ratio), AV_OPT_TYPE_RATIONAL,
  382. { .i64 = 0 }, 0, 65535 },
  383. { "video_format", "Set video format (table E-2)",
  384. OFFSET(video_format), AV_OPT_TYPE_INT,
  385. { .i64 = -1 }, -1, 7 },
  386. { "video_full_range_flag", "Set video full range flag",
  387. OFFSET(video_full_range_flag), AV_OPT_TYPE_INT,
  388. { .i64 = -1 }, -1, 1 },
  389. { "colour_primaries", "Set colour primaries (table E-3)",
  390. OFFSET(colour_primaries), AV_OPT_TYPE_INT,
  391. { .i64 = -1 }, -1, 255 },
  392. { "transfer_characteristics", "Set transfer characteristics (table E-4)",
  393. OFFSET(transfer_characteristics), AV_OPT_TYPE_INT,
  394. { .i64 = -1 }, -1, 255 },
  395. { "matrix_coefficients", "Set matrix coefficients (table E-5)",
  396. OFFSET(matrix_coefficients), AV_OPT_TYPE_INT,
  397. { .i64 = -1 }, -1, 255 },
  398. { "chroma_sample_loc_type", "Set chroma sample location type (figure E-1)",
  399. OFFSET(chroma_sample_loc_type), AV_OPT_TYPE_INT,
  400. { .i64 = -1 }, -1, 6 },
  401. { "tick_rate", "Set VUI tick rate (num_units_in_tick / time_scale)",
  402. OFFSET(tick_rate), AV_OPT_TYPE_RATIONAL,
  403. { .i64 = 0 }, 0, UINT_MAX },
  404. { "fixed_frame_rate_flag", "Set VUI fixed frame rate flag",
  405. OFFSET(fixed_frame_rate_flag), AV_OPT_TYPE_INT,
  406. { .i64 = -1 }, -1, 1 },
  407. { "crop_left", "Set left border crop offset",
  408. OFFSET(crop_left), AV_OPT_TYPE_INT,
  409. { .i64 = -1 }, -1, H264_MAX_WIDTH },
  410. { "crop_right", "Set right border crop offset",
  411. OFFSET(crop_right), AV_OPT_TYPE_INT,
  412. { .i64 = -1 }, -1, H264_MAX_WIDTH },
  413. { "crop_top", "Set top border crop offset",
  414. OFFSET(crop_top), AV_OPT_TYPE_INT,
  415. { .i64 = -1 }, -1, H264_MAX_HEIGHT },
  416. { "crop_bottom", "Set bottom border crop offset",
  417. OFFSET(crop_bottom), AV_OPT_TYPE_INT,
  418. { .i64 = -1 }, -1, H264_MAX_HEIGHT },
  419. { "sei_user_data", "Insert SEI user data (UUID+string)",
  420. OFFSET(sei_user_data), AV_OPT_TYPE_STRING, { .str = NULL } },
  421. { NULL }
  422. };
  423. static const AVClass h264_metadata_class = {
  424. .class_name = "h264_metadata_bsf",
  425. .item_name = av_default_item_name,
  426. .option = h264_metadata_options,
  427. .version = LIBAVCODEC_VERSION_MAJOR,
  428. };
  429. static const enum AVCodecID h264_metadata_codec_ids[] = {
  430. AV_CODEC_ID_H264, AV_CODEC_ID_NONE,
  431. };
  432. const AVBitStreamFilter ff_h264_metadata_bsf = {
  433. .name = "h264_metadata",
  434. .priv_data_size = sizeof(H264MetadataContext),
  435. .priv_class = &h264_metadata_class,
  436. .init = &h264_metadata_init,
  437. .close = &h264_metadata_close,
  438. .filter = &h264_metadata_filter,
  439. .codec_ids = h264_metadata_codec_ids,
  440. };