You can not select more than 25 topics Topics must start with a letter or number, can include dashes ('-') and can be up to 35 characters long.

589 lines
18KB

  1. /*
  2. * Copyright (c) 2015 Martin Storsjo
  3. *
  4. * This file is part of Libav.
  5. *
  6. * Libav is free software; you can redistribute it and/or
  7. * modify it under the terms of the GNU Lesser General Public
  8. * License as published by the Free Software Foundation; either
  9. * version 2.1 of the License, or (at your option) any later version.
  10. *
  11. * Libav is distributed in the hope that it will be useful,
  12. * but WITHOUT ANY WARRANTY; without even the implied warranty of
  13. * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
  14. * Lesser General Public License for more details.
  15. *
  16. * You should have received a copy of the GNU Lesser General Public
  17. * License along with Libav; if not, write to the Free Software
  18. * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
  19. */
  20. #include "config.h"
  21. #include "libavutil/intreadwrite.h"
  22. #include "libavutil/mathematics.h"
  23. #include "libavutil/md5.h"
  24. #include "avformat.h"
  25. #if HAVE_UNISTD_H
  26. #include <unistd.h>
  27. #endif
  28. #if !HAVE_GETOPT
  29. #include "compat/getopt.c"
  30. #endif
  31. #define HASH_SIZE 16
  32. static const uint8_t h264_extradata[] = {
  33. 0x01, 0x4d, 0x40, 0x1e, 0xff, 0xe1, 0x00, 0x02, 0x67, 0x4d, 0x01, 0x00, 0x02, 0x68, 0xef
  34. };
  35. static const uint8_t aac_extradata[] = {
  36. 0x12, 0x10
  37. };
  38. const char *format = "mp4";
  39. AVFormatContext *ctx;
  40. uint8_t iobuf[32768];
  41. AVDictionary *opts;
  42. int write_file;
  43. const char *cur_name;
  44. FILE* out;
  45. int out_size;
  46. struct AVMD5* md5;
  47. uint8_t hash[HASH_SIZE];
  48. AVStream *video_st, *audio_st;
  49. int64_t audio_dts, video_dts;
  50. int bframes;
  51. int duration;
  52. int audio_duration;
  53. int frames;
  54. int gop_size;
  55. int64_t next_p_pts;
  56. enum AVPictureType last_picture;
  57. int skip_write;
  58. int skip_write_audio;
  59. int clear_duration;
  60. int num_warnings;
  61. int check_faults;
  62. static void count_warnings(void *avcl, int level, const char *fmt, va_list vl)
  63. {
  64. if (level == AV_LOG_WARNING)
  65. num_warnings++;
  66. }
  67. static void init_count_warnings(void)
  68. {
  69. av_log_set_callback(count_warnings);
  70. num_warnings = 0;
  71. }
  72. static void reset_count_warnings(void)
  73. {
  74. av_log_set_callback(av_log_default_callback);
  75. }
  76. static int io_write(void *opaque, uint8_t *buf, int size)
  77. {
  78. out_size += size;
  79. av_md5_update(md5, buf, size);
  80. if (out)
  81. fwrite(buf, 1, size, out);
  82. return size;
  83. }
  84. static void init_out(const char *name)
  85. {
  86. char buf[100];
  87. cur_name = name;
  88. snprintf(buf, sizeof(buf), "%s.%s", cur_name, format);
  89. av_md5_init(md5);
  90. if (write_file) {
  91. out = fopen(buf, "wb");
  92. if (!out)
  93. perror(buf);
  94. }
  95. out_size = 0;
  96. }
  97. static void close_out(void)
  98. {
  99. int i;
  100. av_md5_final(md5, hash);
  101. for (i = 0; i < HASH_SIZE; i++)
  102. printf("%02x", hash[i]);
  103. printf(" %d %s\n", out_size, cur_name);
  104. if (out)
  105. fclose(out);
  106. out = NULL;
  107. }
  108. static void check_func(int value, int line, const char *msg, ...)
  109. {
  110. if (!value) {
  111. va_list ap;
  112. va_start(ap, msg);
  113. printf("%d: ", line);
  114. vprintf(msg, ap);
  115. printf("\n");
  116. check_faults++;
  117. }
  118. }
  119. #define check(value, ...) check_func(value, __LINE__, __VA_ARGS__)
  120. static void init_fps(int bf, int audio_preroll, int fps)
  121. {
  122. AVStream *st;
  123. ctx = avformat_alloc_context();
  124. if (!ctx)
  125. exit(1);
  126. ctx->oformat = av_guess_format(format, NULL, NULL);
  127. if (!ctx->oformat)
  128. exit(1);
  129. ctx->pb = avio_alloc_context(iobuf, sizeof(iobuf), AVIO_FLAG_WRITE, NULL, NULL, io_write, NULL);
  130. if (!ctx->pb)
  131. exit(1);
  132. ctx->flags |= AVFMT_FLAG_BITEXACT;
  133. st = avformat_new_stream(ctx, NULL);
  134. if (!st)
  135. exit(1);
  136. st->codec->codec_type = AVMEDIA_TYPE_VIDEO;
  137. st->codec->codec_id = AV_CODEC_ID_H264;
  138. st->codec->width = 640;
  139. st->codec->height = 480;
  140. st->time_base.num = 1;
  141. st->time_base.den = 30;
  142. st->codec->extradata_size = sizeof(h264_extradata);
  143. st->codec->extradata = av_mallocz(st->codec->extradata_size + AV_INPUT_BUFFER_PADDING_SIZE);
  144. if (!st->codec->extradata)
  145. exit(1);
  146. memcpy(st->codec->extradata, h264_extradata, sizeof(h264_extradata));
  147. st->codec->flags |= AV_CODEC_FLAG_GLOBAL_HEADER;
  148. video_st = st;
  149. st = avformat_new_stream(ctx, NULL);
  150. if (!st)
  151. exit(1);
  152. st->codec->codec_type = AVMEDIA_TYPE_AUDIO;
  153. st->codec->codec_id = AV_CODEC_ID_AAC;
  154. st->codec->sample_rate = 44100;
  155. st->codec->channels = 2;
  156. st->time_base.num = 1;
  157. st->time_base.den = 44100;
  158. st->codec->extradata_size = sizeof(aac_extradata);
  159. st->codec->extradata = av_mallocz(st->codec->extradata_size + AV_INPUT_BUFFER_PADDING_SIZE);
  160. if (!st->codec->extradata)
  161. exit(1);
  162. memcpy(st->codec->extradata, aac_extradata, sizeof(aac_extradata));
  163. st->codec->flags |= AV_CODEC_FLAG_GLOBAL_HEADER;
  164. audio_st = st;
  165. if (avformat_write_header(ctx, &opts) < 0)
  166. exit(1);
  167. av_dict_free(&opts);
  168. frames = 0;
  169. gop_size = 30;
  170. duration = video_st->time_base.den / fps;
  171. audio_duration = 1024 * audio_st->time_base.den / audio_st->codec->sample_rate;
  172. if (audio_preroll)
  173. audio_preroll = 2048 * audio_st->time_base.den / audio_st->codec->sample_rate;
  174. bframes = bf;
  175. video_dts = bframes ? -duration : 0;
  176. audio_dts = -audio_preroll;
  177. }
  178. static void init(int bf, int audio_preroll)
  179. {
  180. init_fps(bf, audio_preroll, 30);
  181. }
  182. static void mux_frames(int n)
  183. {
  184. int end_frames = frames + n;
  185. while (1) {
  186. AVPacket pkt;
  187. uint8_t pktdata[4];
  188. av_init_packet(&pkt);
  189. if (av_compare_ts(audio_dts, audio_st->time_base, video_dts, video_st->time_base) < 0) {
  190. pkt.dts = pkt.pts = audio_dts;
  191. pkt.stream_index = 1;
  192. pkt.duration = audio_duration;
  193. audio_dts += audio_duration;
  194. } else {
  195. if (frames == end_frames)
  196. break;
  197. pkt.dts = video_dts;
  198. pkt.stream_index = 0;
  199. pkt.duration = duration;
  200. if ((frames % gop_size) == 0) {
  201. pkt.flags |= AV_PKT_FLAG_KEY;
  202. last_picture = AV_PICTURE_TYPE_I;
  203. pkt.pts = pkt.dts + duration;
  204. video_dts = pkt.pts;
  205. } else {
  206. if (last_picture == AV_PICTURE_TYPE_P) {
  207. last_picture = AV_PICTURE_TYPE_B;
  208. pkt.pts = pkt.dts;
  209. video_dts = next_p_pts;
  210. } else {
  211. last_picture = AV_PICTURE_TYPE_P;
  212. if (((frames + 1) % gop_size) == 0) {
  213. pkt.pts = pkt.dts + duration;
  214. video_dts = pkt.pts;
  215. } else {
  216. next_p_pts = pkt.pts = pkt.dts + 2 * duration;
  217. video_dts += duration;
  218. }
  219. }
  220. }
  221. if (!bframes)
  222. pkt.pts = pkt.dts;
  223. frames++;
  224. }
  225. if (clear_duration)
  226. pkt.duration = 0;
  227. AV_WB32(pktdata, pkt.pts);
  228. pkt.data = pktdata;
  229. pkt.size = 4;
  230. if (skip_write)
  231. continue;
  232. if (skip_write_audio && pkt.stream_index == 1)
  233. continue;
  234. av_write_frame(ctx, &pkt);
  235. }
  236. }
  237. static void mux_gops(int n)
  238. {
  239. mux_frames(gop_size * n);
  240. }
  241. static void skip_gops(int n)
  242. {
  243. skip_write = 1;
  244. mux_gops(n);
  245. skip_write = 0;
  246. }
  247. static void finish(void)
  248. {
  249. av_write_trailer(ctx);
  250. av_free(ctx->pb);
  251. avformat_free_context(ctx);
  252. ctx = NULL;
  253. }
  254. static void help(void)
  255. {
  256. printf("movenc-test [-w]\n"
  257. "-w write output into files\n");
  258. }
  259. int main(int argc, char **argv)
  260. {
  261. int c;
  262. uint8_t header[HASH_SIZE];
  263. uint8_t content[HASH_SIZE];
  264. int empty_moov_pos;
  265. int prev_pos;
  266. for (;;) {
  267. c = getopt(argc, argv, "wh");
  268. if (c == -1)
  269. break;
  270. switch (c) {
  271. case 'w':
  272. write_file = 1;
  273. break;
  274. default:
  275. case 'h':
  276. help();
  277. return 0;
  278. }
  279. }
  280. av_register_all();
  281. md5 = av_md5_alloc();
  282. if (!md5)
  283. return 1;
  284. // Write a fragmented file with an initial moov that actually contains some
  285. // samples. One moov+mdat with 1 second of data and one moof+mdat with 1
  286. // second of data.
  287. init_out("non-empty-moov");
  288. av_dict_set(&opts, "movflags", "frag_keyframe", 0);
  289. init(0, 0);
  290. mux_gops(2);
  291. finish();
  292. close_out();
  293. // Write a similar file, but with b-frames and audio preroll, handled
  294. // via an edit list.
  295. init_out("non-empty-moov-elst");
  296. av_dict_set(&opts, "movflags", "frag_keyframe", 0);
  297. av_dict_set(&opts, "use_editlist", "1", 0);
  298. init(1, 1);
  299. mux_gops(2);
  300. finish();
  301. close_out();
  302. // Use b-frames but no audio-preroll, but without an edit list.
  303. // Due to avoid_negative_ts == AVFMT_AVOID_NEG_TS_MAKE_ZERO, the dts
  304. // of the first audio packet is > 0, but it is set to zero since edit
  305. // lists aren't used, increasing the duration of the first packet instead.
  306. init_out("non-empty-moov-no-elst");
  307. av_dict_set(&opts, "movflags", "frag_keyframe", 0);
  308. av_dict_set(&opts, "use_editlist", "0", 0);
  309. init(1, 0);
  310. mux_gops(2);
  311. finish();
  312. close_out();
  313. format = "ismv";
  314. // Write an ISMV, with b-frames and audio preroll.
  315. init_out("ismv");
  316. av_dict_set(&opts, "movflags", "frag_keyframe", 0);
  317. init(1, 1);
  318. mux_gops(2);
  319. finish();
  320. close_out();
  321. format = "mp4";
  322. // An initial moov that doesn't contain any samples, followed by two
  323. // moof+mdat pairs.
  324. init_out("empty-moov");
  325. av_dict_set(&opts, "movflags", "frag_keyframe+empty_moov", 0);
  326. init(0, 0);
  327. mux_gops(2);
  328. finish();
  329. close_out();
  330. memcpy(content, hash, HASH_SIZE);
  331. // Similar to the previous one, but with input that doesn't start at
  332. // pts/dts 0. avoid_negative_ts behaves in the same way as
  333. // in non-empty-moov-no-elst above.
  334. init_out("empty-moov-no-elst");
  335. av_dict_set(&opts, "movflags", "frag_keyframe+empty_moov", 0);
  336. init(1, 0);
  337. mux_gops(2);
  338. finish();
  339. close_out();
  340. // Same as the previous one, but disable avoid_negative_ts (which
  341. // would require using an edit list, but with empty_moov, one can't
  342. // write a sensible edit list, when the start timestamps aren't known).
  343. // This should trigger a warning - we check that the warning is produced.
  344. init_count_warnings();
  345. init_out("empty-moov-no-elst-no-adjust");
  346. av_dict_set(&opts, "movflags", "frag_keyframe+empty_moov", 0);
  347. av_dict_set(&opts, "avoid_negative_ts", "0", 0);
  348. init(1, 0);
  349. mux_gops(2);
  350. finish();
  351. close_out();
  352. reset_count_warnings();
  353. check(num_warnings > 0, "No warnings printed for unhandled start offset");
  354. // Verify that delay_moov produces the same as empty_moov for
  355. // simple input
  356. init_out("delay-moov");
  357. av_dict_set(&opts, "movflags", "frag_keyframe+delay_moov", 0);
  358. init(0, 0);
  359. mux_gops(2);
  360. finish();
  361. close_out();
  362. check(!memcmp(hash, content, HASH_SIZE), "delay_moov differs from empty_moov");
  363. // Test writing content that requires an edit list using delay_moov
  364. init_out("delay-moov-elst");
  365. av_dict_set(&opts, "movflags", "frag_keyframe+delay_moov", 0);
  366. init(1, 1);
  367. mux_gops(2);
  368. finish();
  369. close_out();
  370. // Test writing a file with one track lacking packets, with delay_moov.
  371. skip_write_audio = 1;
  372. init_out("delay-moov-empty-track");
  373. av_dict_set(&opts, "movflags", "frag_keyframe+delay_moov", 0);
  374. init(0, 0);
  375. mux_gops(2);
  376. // The automatic flushing shouldn't output anything, since we're still
  377. // waiting for data for some tracks
  378. check(out_size == 0, "delay_moov flushed prematurely");
  379. // When closed (or manually flushed), all the written data should still
  380. // be output.
  381. finish();
  382. close_out();
  383. check(out_size > 0, "delay_moov didn't output anything");
  384. // Check that manually flushing still outputs things as expected. This
  385. // produces two fragments, while the one above produces only one.
  386. init_out("delay-moov-empty-track-flush");
  387. av_dict_set(&opts, "movflags", "frag_custom+delay_moov", 0);
  388. init(0, 0);
  389. mux_gops(1);
  390. av_write_frame(ctx, NULL); // Force writing the moov
  391. check(out_size > 0, "No moov written");
  392. av_write_frame(ctx, NULL);
  393. mux_gops(1);
  394. av_write_frame(ctx, NULL);
  395. finish();
  396. close_out();
  397. skip_write_audio = 0;
  398. // Verify that the header written by delay_moov when manually flushed
  399. // is identical to the one by empty_moov.
  400. init_out("empty-moov-header");
  401. av_dict_set(&opts, "movflags", "frag_keyframe+empty_moov", 0);
  402. init(0, 0);
  403. close_out();
  404. memcpy(header, hash, HASH_SIZE);
  405. init_out("empty-moov-content");
  406. mux_gops(2);
  407. // Written 2 seconds of content, with an automatic flush after 1 second.
  408. check(out_size > 0, "No automatic flush?");
  409. empty_moov_pos = prev_pos = out_size;
  410. // Manually flush the second fragment
  411. av_write_frame(ctx, NULL);
  412. check(out_size > prev_pos, "No second fragment flushed?");
  413. prev_pos = out_size;
  414. // Check that an extra flush doesn't output any more data
  415. av_write_frame(ctx, NULL);
  416. check(out_size == prev_pos, "More data written?");
  417. close_out();
  418. memcpy(content, hash, HASH_SIZE);
  419. // Ignore the trailer written here
  420. finish();
  421. init_out("delay-moov-header");
  422. av_dict_set(&opts, "movflags", "frag_custom+delay_moov", 0);
  423. init(0, 0);
  424. check(out_size == 0, "Output written during init with delay_moov");
  425. mux_gops(1); // Write 1 second of content
  426. av_write_frame(ctx, NULL); // Force writing the moov
  427. close_out();
  428. check(!memcmp(hash, header, HASH_SIZE), "delay_moov header differs from empty_moov");
  429. init_out("delay-moov-content");
  430. av_write_frame(ctx, NULL); // Flush the first fragment
  431. check(out_size == empty_moov_pos, "Manually flushed content differs from automatically flushed, %d vs %d", out_size, empty_moov_pos);
  432. mux_gops(1); // Write the rest of the content
  433. av_write_frame(ctx, NULL); // Flush the second fragment
  434. close_out();
  435. check(!memcmp(hash, content, HASH_SIZE), "delay_moov content differs from empty_moov");
  436. finish();
  437. // Verify that we can produce an identical second fragment without
  438. // writing the first one. First write the reference fragments that
  439. // we want to reproduce.
  440. av_dict_set(&opts, "movflags", "frag_custom+empty_moov+dash", 0);
  441. init(0, 0);
  442. mux_gops(1);
  443. av_write_frame(ctx, NULL); // Output the first fragment
  444. init_out("empty-moov-second-frag");
  445. mux_gops(1);
  446. av_write_frame(ctx, NULL); // Output the second fragment
  447. close_out();
  448. memcpy(content, hash, HASH_SIZE);
  449. finish();
  450. // Produce the same second fragment without actually writing the first
  451. // one before.
  452. av_dict_set(&opts, "movflags", "frag_custom+empty_moov+dash+frag_discont", 0);
  453. av_dict_set(&opts, "fragment_index", "2", 0);
  454. av_dict_set(&opts, "avoid_negative_ts", "0", 0);
  455. av_dict_set(&opts, "use_editlist", "0", 0);
  456. init(0, 0);
  457. skip_gops(1);
  458. init_out("empty-moov-second-frag-discont");
  459. mux_gops(1);
  460. av_write_frame(ctx, NULL); // Output the second fragment
  461. close_out();
  462. check(!memcmp(hash, content, HASH_SIZE), "discontinuously written fragment differs");
  463. finish();
  464. // Produce the same thing by using delay_moov, which requires a slightly
  465. // different call sequence.
  466. av_dict_set(&opts, "movflags", "frag_custom+delay_moov+dash+frag_discont", 0);
  467. av_dict_set(&opts, "fragment_index", "2", 0);
  468. init(0, 0);
  469. skip_gops(1);
  470. mux_gops(1);
  471. av_write_frame(ctx, NULL); // Output the moov
  472. init_out("delay-moov-second-frag-discont");
  473. av_write_frame(ctx, NULL); // Output the second fragment
  474. close_out();
  475. check(!memcmp(hash, content, HASH_SIZE), "discontinuously written fragment differs");
  476. finish();
  477. // Test VFR content, with sidx atoms (which declare the pts duration
  478. // of a fragment, forcing overriding the start pts of the next one).
  479. // Here, the fragment duration in pts is significantly different from
  480. // the duration in dts. The video stream starts at dts=-10,pts=0, and
  481. // the second fragment starts at dts=155,pts=156. The trun duration sum
  482. // of the first fragment is 165, which also is written as
  483. // baseMediaDecodeTime in the tfdt in the second fragment. The sidx for
  484. // the first fragment says earliest_presentation_time = 0 and
  485. // subsegment_duration = 156, which also matches the sidx in the second
  486. // fragment. For the audio stream, the pts and dts durations also don't
  487. // match - the input stream starts at pts=-2048, but that part is excluded
  488. // by the edit list.
  489. init_out("vfr");
  490. av_dict_set(&opts, "movflags", "frag_keyframe+delay_moov+dash", 0);
  491. init_fps(1, 1, 3);
  492. mux_frames(gop_size/2);
  493. duration /= 10;
  494. mux_frames(gop_size/2);
  495. mux_gops(1);
  496. finish();
  497. close_out();
  498. // Test VFR content, with cleared duration fields. In these cases,
  499. // the muxer must guess the duration of the last packet of each
  500. // fragment. As long as the framerate doesn't vary (too much) at the
  501. // fragment edge, it works just fine. Additionally, when automatically
  502. // cutting fragments, the muxer already know the timestamps of the next
  503. // packet for one stream (in most cases the video stream), avoiding
  504. // having to use guesses for that one.
  505. init_count_warnings();
  506. clear_duration = 1;
  507. init_out("vfr-noduration");
  508. av_dict_set(&opts, "movflags", "frag_keyframe+delay_moov+dash", 0);
  509. init_fps(1, 1, 3);
  510. mux_frames(gop_size/2);
  511. duration /= 10;
  512. mux_frames(gop_size/2);
  513. mux_gops(1);
  514. finish();
  515. close_out();
  516. clear_duration = 0;
  517. reset_count_warnings();
  518. check(num_warnings > 0, "No warnings printed for filled in durations");
  519. av_free(md5);
  520. return check_faults > 0 ? 1 : 0;
  521. }