You can not select more than 25 topics Topics must start with a letter or number, can include dashes ('-') and can be up to 35 characters long.

534 lines
17KB

  1. /*
  2. * This file is part of Libav.
  3. *
  4. * Libav is free software; you can redistribute it and/or
  5. * modify it under the terms of the GNU Lesser General Public
  6. * License as published by the Free Software Foundation; either
  7. * version 2.1 of the License, or (at your option) any later version.
  8. *
  9. * Libav is distributed in the hope that it will be useful,
  10. * but WITHOUT ANY WARRANTY; without even the implied warranty of
  11. * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
  12. * Lesser General Public License for more details.
  13. *
  14. * You should have received a copy of the GNU Lesser General Public
  15. * License along with Libav; if not, write to the Free Software
  16. * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
  17. */
  18. #include "libavutil/avstring.h"
  19. #include "libavutil/common.h"
  20. #include "libavutil/opt.h"
  21. #include "bsf.h"
  22. #include "cbs.h"
  23. #include "cbs_h264.h"
  24. #include "h264.h"
  25. #include "h264_sei.h"
  26. enum {
  27. PASS,
  28. INSERT,
  29. REMOVE,
  30. };
  31. typedef struct H264MetadataContext {
  32. const AVClass *class;
  33. CodedBitstreamContext *cbc;
  34. CodedBitstreamFragment access_unit;
  35. H264RawAUD aud_nal;
  36. H264RawSEI sei_nal;
  37. int aud;
  38. AVRational sample_aspect_ratio;
  39. int video_format;
  40. int video_full_range_flag;
  41. int colour_primaries;
  42. int transfer_characteristics;
  43. int matrix_coefficients;
  44. int chroma_sample_loc_type;
  45. AVRational tick_rate;
  46. int fixed_frame_rate_flag;
  47. int crop_left;
  48. int crop_right;
  49. int crop_top;
  50. int crop_bottom;
  51. const char *sei_user_data;
  52. int sei_first_au;
  53. int delete_filler;
  54. } H264MetadataContext;
  55. static int h264_metadata_update_sps(AVBSFContext *bsf,
  56. H264RawSPS *sps)
  57. {
  58. H264MetadataContext *ctx = bsf->priv_data;
  59. int need_vui = 0;
  60. int crop_unit_x, crop_unit_y;
  61. if (ctx->sample_aspect_ratio.num && ctx->sample_aspect_ratio.den) {
  62. // Table E-1.
  63. static const AVRational sar_idc[] = {
  64. { 0, 0 }, // Unspecified (never written here).
  65. { 1, 1 }, { 12, 11 }, { 10, 11 }, { 16, 11 },
  66. { 40, 33 }, { 24, 11 }, { 20, 11 }, { 32, 11 },
  67. { 80, 33 }, { 18, 11 }, { 15, 11 }, { 64, 33 },
  68. { 160, 99 }, { 4, 3 }, { 3, 2 }, { 2, 1 },
  69. };
  70. int num, den, i;
  71. av_reduce(&num, &den, ctx->sample_aspect_ratio.num,
  72. ctx->sample_aspect_ratio.den, 65535);
  73. for (i = 1; i < FF_ARRAY_ELEMS(sar_idc); i++) {
  74. if (num == sar_idc[i].num &&
  75. den == sar_idc[i].den)
  76. break;
  77. }
  78. if (i == FF_ARRAY_ELEMS(sar_idc)) {
  79. sps->vui.aspect_ratio_idc = 255;
  80. sps->vui.sar_width = num;
  81. sps->vui.sar_height = den;
  82. } else {
  83. sps->vui.aspect_ratio_idc = i;
  84. }
  85. sps->vui.aspect_ratio_info_present_flag = 1;
  86. need_vui = 1;
  87. }
  88. #define SET_OR_INFER(field, value, present_flag, infer) do { \
  89. if (value >= 0) { \
  90. field = value; \
  91. need_vui = 1; \
  92. } else if (!present_flag) \
  93. field = infer; \
  94. } while (0)
  95. if (ctx->video_format >= 0 ||
  96. ctx->video_full_range_flag >= 0 ||
  97. ctx->colour_primaries >= 0 ||
  98. ctx->transfer_characteristics >= 0 ||
  99. ctx->matrix_coefficients >= 0) {
  100. SET_OR_INFER(sps->vui.video_format, ctx->video_format,
  101. sps->vui.video_signal_type_present_flag, 5);
  102. SET_OR_INFER(sps->vui.video_full_range_flag,
  103. ctx->video_full_range_flag,
  104. sps->vui.video_signal_type_present_flag, 0);
  105. if (ctx->colour_primaries >= 0 ||
  106. ctx->transfer_characteristics >= 0 ||
  107. ctx->matrix_coefficients >= 0) {
  108. SET_OR_INFER(sps->vui.colour_primaries,
  109. ctx->colour_primaries,
  110. sps->vui.colour_description_present_flag, 2);
  111. SET_OR_INFER(sps->vui.transfer_characteristics,
  112. ctx->transfer_characteristics,
  113. sps->vui.colour_description_present_flag, 2);
  114. SET_OR_INFER(sps->vui.matrix_coefficients,
  115. ctx->matrix_coefficients,
  116. sps->vui.colour_description_present_flag, 2);
  117. sps->vui.colour_description_present_flag = 1;
  118. }
  119. sps->vui.video_signal_type_present_flag = 1;
  120. need_vui = 1;
  121. }
  122. if (ctx->chroma_sample_loc_type >= 0) {
  123. sps->vui.chroma_sample_loc_type_top_field =
  124. ctx->chroma_sample_loc_type;
  125. sps->vui.chroma_sample_loc_type_bottom_field =
  126. ctx->chroma_sample_loc_type;
  127. sps->vui.chroma_loc_info_present_flag = 1;
  128. need_vui = 1;
  129. }
  130. if (ctx->tick_rate.num && ctx->tick_rate.den) {
  131. int num, den;
  132. av_reduce(&num, &den, ctx->tick_rate.num, ctx->tick_rate.den,
  133. UINT32_MAX > INT_MAX ? UINT32_MAX : INT_MAX);
  134. sps->vui.time_scale = num;
  135. sps->vui.num_units_in_tick = den;
  136. sps->vui.timing_info_present_flag = 1;
  137. need_vui = 1;
  138. }
  139. SET_OR_INFER(sps->vui.fixed_frame_rate_flag,
  140. ctx->fixed_frame_rate_flag,
  141. sps->vui.timing_info_present_flag, 0);
  142. if (sps->separate_colour_plane_flag || sps->chroma_format_idc == 0) {
  143. crop_unit_x = 1;
  144. crop_unit_y = 2 - sps->frame_mbs_only_flag;
  145. } else {
  146. crop_unit_x = 1 + (sps->chroma_format_idc < 3);
  147. crop_unit_y = (1 + (sps->chroma_format_idc < 2)) *
  148. (2 - sps->frame_mbs_only_flag);
  149. }
  150. #define CROP(border, unit) do { \
  151. if (ctx->crop_ ## border >= 0) { \
  152. if (ctx->crop_ ## border % unit != 0) { \
  153. av_log(bsf, AV_LOG_ERROR, "Invalid value for crop_%s: " \
  154. "must be a multiple of %d.\n", #border, unit); \
  155. return AVERROR(EINVAL); \
  156. } \
  157. sps->frame_crop_ ## border ## _offset = \
  158. ctx->crop_ ## border / unit; \
  159. sps->frame_cropping_flag = 1; \
  160. } \
  161. } while (0)
  162. CROP(left, crop_unit_x);
  163. CROP(right, crop_unit_x);
  164. CROP(top, crop_unit_y);
  165. CROP(bottom, crop_unit_y);
  166. #undef CROP
  167. if (need_vui)
  168. sps->vui_parameters_present_flag = 1;
  169. return 0;
  170. }
  171. static int h264_metadata_filter(AVBSFContext *bsf, AVPacket *out)
  172. {
  173. H264MetadataContext *ctx = bsf->priv_data;
  174. AVPacket *in = NULL;
  175. CodedBitstreamFragment *au = &ctx->access_unit;
  176. int err, i, j, has_sps;
  177. err = ff_bsf_get_packet(bsf, &in);
  178. if (err < 0)
  179. goto fail;
  180. err = ff_cbs_read_packet(ctx->cbc, au, in);
  181. if (err < 0) {
  182. av_log(bsf, AV_LOG_ERROR, "Failed to read packet.\n");
  183. goto fail;
  184. }
  185. if (au->nb_units == 0) {
  186. av_log(bsf, AV_LOG_ERROR, "No NAL units in packet.\n");
  187. err = AVERROR_INVALIDDATA;
  188. goto fail;
  189. }
  190. // If an AUD is present, it must be the first NAL unit.
  191. if (au->units[0].type == H264_NAL_AUD) {
  192. if (ctx->aud == REMOVE)
  193. ff_cbs_delete_unit(ctx->cbc, au, 0);
  194. } else {
  195. if (ctx->aud == INSERT) {
  196. static const int primary_pic_type_table[] = {
  197. 0x084, // 2, 7
  198. 0x0a5, // 0, 2, 5, 7
  199. 0x0e7, // 0, 1, 2, 5, 6, 7
  200. 0x210, // 4, 9
  201. 0x318, // 3, 4, 8, 9
  202. 0x294, // 2, 4, 7, 9
  203. 0x3bd, // 0, 2, 3, 4, 5, 7, 8, 9
  204. 0x3ff, // 0, 1, 2, 3, 4, 5, 6, 7, 8, 9
  205. };
  206. int primary_pic_type_mask = 0xff;
  207. H264RawAUD *aud = &ctx->aud_nal;
  208. for (i = 0; i < au->nb_units; i++) {
  209. if (au->units[i].type == H264_NAL_SLICE ||
  210. au->units[i].type == H264_NAL_IDR_SLICE) {
  211. H264RawSlice *slice = au->units[i].content;
  212. for (j = 0; j < FF_ARRAY_ELEMS(primary_pic_type_table); j++) {
  213. if (!(primary_pic_type_table[j] &
  214. (1 << slice->header.slice_type)))
  215. primary_pic_type_mask &= ~(1 << j);
  216. }
  217. }
  218. }
  219. for (j = 0; j < FF_ARRAY_ELEMS(primary_pic_type_table); j++)
  220. if (primary_pic_type_mask & (1 << j))
  221. break;
  222. if (j >= FF_ARRAY_ELEMS(primary_pic_type_table)) {
  223. av_log(bsf, AV_LOG_ERROR, "No usable primary_pic_type: "
  224. "invalid slice types?\n");
  225. err = AVERROR_INVALIDDATA;
  226. goto fail;
  227. }
  228. aud->nal_unit_header.nal_unit_type = H264_NAL_AUD;
  229. aud->primary_pic_type = j;
  230. err = ff_cbs_insert_unit_content(ctx->cbc, au,
  231. 0, H264_NAL_AUD, aud, NULL);
  232. if (err < 0) {
  233. av_log(bsf, AV_LOG_ERROR, "Failed to insert AUD.\n");
  234. goto fail;
  235. }
  236. }
  237. }
  238. has_sps = 0;
  239. for (i = 0; i < au->nb_units; i++) {
  240. if (au->units[i].type == H264_NAL_SPS) {
  241. err = h264_metadata_update_sps(bsf, au->units[i].content);
  242. if (err < 0)
  243. goto fail;
  244. has_sps = 1;
  245. }
  246. }
  247. // Only insert the SEI in access units containing SPSs, and also
  248. // unconditionally in the first access unit we ever see.
  249. if (ctx->sei_user_data && (has_sps || !ctx->sei_first_au)) {
  250. H264RawSEIPayload payload = {
  251. .payload_type = H264_SEI_TYPE_USER_DATA_UNREGISTERED,
  252. };
  253. H264RawSEIUserDataUnregistered *udu =
  254. &payload.payload.user_data_unregistered;
  255. ctx->sei_first_au = 1;
  256. for (i = j = 0; j < 32 && ctx->sei_user_data[i]; i++) {
  257. int c, v;
  258. c = ctx->sei_user_data[i];
  259. if (c == '-') {
  260. continue;
  261. } else if (av_isxdigit(c)) {
  262. c = av_tolower(c);
  263. v = (c <= '9' ? c - '0' : c - 'a' + 10);
  264. } else {
  265. goto invalid_user_data;
  266. }
  267. if (i & 1)
  268. udu->uuid_iso_iec_11578[j / 2] |= v;
  269. else
  270. udu->uuid_iso_iec_11578[j / 2] = v << 4;
  271. ++j;
  272. }
  273. if (j == 32 && ctx->sei_user_data[i] == '+') {
  274. size_t len = strlen(ctx->sei_user_data + i + 1);
  275. udu->data_ref = av_buffer_alloc(len + 1);
  276. if (!udu->data_ref) {
  277. err = AVERROR(ENOMEM);
  278. goto fail;
  279. }
  280. udu->data = udu->data_ref->data;
  281. udu->data_length = len + 1;
  282. memcpy(udu->data, ctx->sei_user_data + i + 1, len + 1);
  283. payload.payload_size = 16 + udu->data_length;
  284. err = ff_cbs_h264_add_sei_message(ctx->cbc, au, &payload);
  285. if (err < 0) {
  286. av_log(bsf, AV_LOG_ERROR, "Failed to add user data SEI "
  287. "message to access unit.\n");
  288. goto fail;
  289. }
  290. } else {
  291. invalid_user_data:
  292. av_log(bsf, AV_LOG_ERROR, "Invalid user data: "
  293. "must be \"UUID+string\".\n");
  294. err = AVERROR(EINVAL);
  295. }
  296. }
  297. if (ctx->delete_filler) {
  298. for (i = 0; i < au->nb_units; i++) {
  299. if (au->units[i].type == H264_NAL_FILLER_DATA) {
  300. // Filler NAL units.
  301. err = ff_cbs_delete_unit(ctx->cbc, au, i);
  302. if (err < 0) {
  303. av_log(bsf, AV_LOG_ERROR, "Failed to delete "
  304. "filler NAL.\n");
  305. goto fail;
  306. }
  307. --i;
  308. continue;
  309. }
  310. if (au->units[i].type == H264_NAL_SEI) {
  311. // Filler SEI messages.
  312. H264RawSEI *sei = au->units[i].content;
  313. for (j = 0; j < sei->payload_count; j++) {
  314. if (sei->payload[j].payload_type ==
  315. H264_SEI_TYPE_FILLER_PAYLOAD) {
  316. err = ff_cbs_h264_delete_sei_message(ctx->cbc, au,
  317. &au->units[i], j);
  318. if (err < 0) {
  319. av_log(bsf, AV_LOG_ERROR, "Failed to delete "
  320. "filler SEI message.\n");
  321. goto fail;
  322. }
  323. // Renumbering might have happened, start again at
  324. // the same NAL unit position.
  325. --i;
  326. break;
  327. }
  328. }
  329. }
  330. }
  331. }
  332. err = ff_cbs_write_packet(ctx->cbc, out, au);
  333. if (err < 0) {
  334. av_log(bsf, AV_LOG_ERROR, "Failed to write packet.\n");
  335. goto fail;
  336. }
  337. err = av_packet_copy_props(out, in);
  338. if (err < 0)
  339. goto fail;
  340. err = 0;
  341. fail:
  342. ff_cbs_fragment_uninit(ctx->cbc, au);
  343. av_packet_free(&in);
  344. return err;
  345. }
  346. static int h264_metadata_init(AVBSFContext *bsf)
  347. {
  348. H264MetadataContext *ctx = bsf->priv_data;
  349. CodedBitstreamFragment *au = &ctx->access_unit;
  350. int err, i;
  351. err = ff_cbs_init(&ctx->cbc, AV_CODEC_ID_H264, bsf);
  352. if (err < 0)
  353. return err;
  354. if (bsf->par_in->extradata) {
  355. err = ff_cbs_read_extradata(ctx->cbc, au, bsf->par_in);
  356. if (err < 0) {
  357. av_log(bsf, AV_LOG_ERROR, "Failed to read extradata.\n");
  358. goto fail;
  359. }
  360. for (i = 0; i < au->nb_units; i++) {
  361. if (au->units[i].type == H264_NAL_SPS) {
  362. err = h264_metadata_update_sps(bsf, au->units[i].content);
  363. if (err < 0)
  364. goto fail;
  365. }
  366. }
  367. err = ff_cbs_write_extradata(ctx->cbc, bsf->par_out, au);
  368. if (err < 0) {
  369. av_log(bsf, AV_LOG_ERROR, "Failed to write extradata.\n");
  370. goto fail;
  371. }
  372. }
  373. err = 0;
  374. fail:
  375. ff_cbs_fragment_uninit(ctx->cbc, au);
  376. return err;
  377. }
  378. static void h264_metadata_close(AVBSFContext *bsf)
  379. {
  380. H264MetadataContext *ctx = bsf->priv_data;
  381. ff_cbs_close(&ctx->cbc);
  382. }
  383. #define OFFSET(x) offsetof(H264MetadataContext, x)
  384. static const AVOption h264_metadata_options[] = {
  385. { "aud", "Access Unit Delimiter NAL units",
  386. OFFSET(aud), AV_OPT_TYPE_INT,
  387. { .i64 = PASS }, PASS, REMOVE, 0, "aud" },
  388. { "pass", NULL, 0, AV_OPT_TYPE_CONST, { .i64 = PASS }, .unit = "aud" },
  389. { "insert", NULL, 0, AV_OPT_TYPE_CONST, { .i64 = INSERT }, .unit = "aud" },
  390. { "remove", NULL, 0, AV_OPT_TYPE_CONST, { .i64 = REMOVE }, .unit = "aud" },
  391. { "sample_aspect_ratio", "Set sample aspect ratio (table E-1)",
  392. OFFSET(sample_aspect_ratio), AV_OPT_TYPE_RATIONAL,
  393. { .i64 = 0 }, 0, 65535 },
  394. { "video_format", "Set video format (table E-2)",
  395. OFFSET(video_format), AV_OPT_TYPE_INT,
  396. { .i64 = -1 }, -1, 7 },
  397. { "video_full_range_flag", "Set video full range flag",
  398. OFFSET(video_full_range_flag), AV_OPT_TYPE_INT,
  399. { .i64 = -1 }, -1, 1 },
  400. { "colour_primaries", "Set colour primaries (table E-3)",
  401. OFFSET(colour_primaries), AV_OPT_TYPE_INT,
  402. { .i64 = -1 }, -1, 255 },
  403. { "transfer_characteristics", "Set transfer characteristics (table E-4)",
  404. OFFSET(transfer_characteristics), AV_OPT_TYPE_INT,
  405. { .i64 = -1 }, -1, 255 },
  406. { "matrix_coefficients", "Set matrix coefficients (table E-5)",
  407. OFFSET(matrix_coefficients), AV_OPT_TYPE_INT,
  408. { .i64 = -1 }, -1, 255 },
  409. { "chroma_sample_loc_type", "Set chroma sample location type (figure E-1)",
  410. OFFSET(chroma_sample_loc_type), AV_OPT_TYPE_INT,
  411. { .i64 = -1 }, -1, 6 },
  412. { "tick_rate", "Set VUI tick rate (num_units_in_tick / time_scale)",
  413. OFFSET(tick_rate), AV_OPT_TYPE_RATIONAL,
  414. { .i64 = 0 }, 0, UINT_MAX },
  415. { "fixed_frame_rate_flag", "Set VUI fixed frame rate flag",
  416. OFFSET(fixed_frame_rate_flag), AV_OPT_TYPE_INT,
  417. { .i64 = -1 }, -1, 1 },
  418. { "crop_left", "Set left border crop offset",
  419. OFFSET(crop_left), AV_OPT_TYPE_INT,
  420. { .i64 = -1 }, -1, H264_MAX_WIDTH },
  421. { "crop_right", "Set right border crop offset",
  422. OFFSET(crop_right), AV_OPT_TYPE_INT,
  423. { .i64 = -1 }, -1, H264_MAX_WIDTH },
  424. { "crop_top", "Set top border crop offset",
  425. OFFSET(crop_top), AV_OPT_TYPE_INT,
  426. { .i64 = -1 }, -1, H264_MAX_HEIGHT },
  427. { "crop_bottom", "Set bottom border crop offset",
  428. OFFSET(crop_bottom), AV_OPT_TYPE_INT,
  429. { .i64 = -1 }, -1, H264_MAX_HEIGHT },
  430. { "sei_user_data", "Insert SEI user data (UUID+string)",
  431. OFFSET(sei_user_data), AV_OPT_TYPE_STRING, { .str = NULL } },
  432. { "delete_filler", "Delete all filler (both NAL and SEI)",
  433. OFFSET(delete_filler), AV_OPT_TYPE_INT, { .i64 = 0 }, 0, 1 },
  434. { NULL }
  435. };
  436. static const AVClass h264_metadata_class = {
  437. .class_name = "h264_metadata_bsf",
  438. .item_name = av_default_item_name,
  439. .option = h264_metadata_options,
  440. .version = LIBAVCODEC_VERSION_MAJOR,
  441. };
  442. static const enum AVCodecID h264_metadata_codec_ids[] = {
  443. AV_CODEC_ID_H264, AV_CODEC_ID_NONE,
  444. };
  445. const AVBitStreamFilter ff_h264_metadata_bsf = {
  446. .name = "h264_metadata",
  447. .priv_data_size = sizeof(H264MetadataContext),
  448. .priv_class = &h264_metadata_class,
  449. .init = &h264_metadata_init,
  450. .close = &h264_metadata_close,
  451. .filter = &h264_metadata_filter,
  452. .codec_ids = h264_metadata_codec_ids,
  453. };