You can not select more than 25 topics Topics must start with a letter or number, can include dashes ('-') and can be up to 35 characters long.

2690 lines
86KB

  1. /*
  2. * Matroska file demuxer (no muxer yet)
  3. * Copyright (c) 2003-2004 The ffmpeg Project
  4. *
  5. * This library is free software; you can redistribute it and/or
  6. * modify it under the terms of the GNU Lesser General Public
  7. * License as published by the Free Software Foundation; either
  8. * version 2 of the License, or (at your option) any later version.
  9. *
  10. * This library is distributed in the hope that it will be useful,
  11. * but WITHOUT ANY WARRANTY; without even the implied warranty of
  12. * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
  13. * Lesser General Public License for more details.
  14. *
  15. * You should have received a copy of the GNU Lesser General Public
  16. * License along with this library; if not, write to the Free Software
  17. * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA
  18. */
  19. /**
  20. * @file matroska.c
  21. * Matroska file demuxer
  22. * by Ronald Bultje <rbultje@ronald.bitfreak.net>
  23. * with a little help from Moritz Bunkus <moritz@bunkus.org>
  24. * Specs available on the matroska project page:
  25. * http://www.matroska.org/.
  26. */
  27. #include "avformat.h"
  28. /* For codec_get_bmp_id and codec_get_wav_id. */
  29. #include "avi.h"
  30. /* EBML version supported */
  31. #define EBML_VERSION 1
  32. /* top-level master-IDs */
  33. #define EBML_ID_HEADER 0x1A45DFA3
  34. /* IDs in the HEADER master */
  35. #define EBML_ID_EBMLVERSION 0x4286
  36. #define EBML_ID_EBMLREADVERSION 0x42F7
  37. #define EBML_ID_EBMLMAXIDLENGTH 0x42F2
  38. #define EBML_ID_EBMLMAXSIZELENGTH 0x42F3
  39. #define EBML_ID_DOCTYPE 0x4282
  40. #define EBML_ID_DOCTYPEVERSION 0x4287
  41. #define EBML_ID_DOCTYPEREADVERSION 0x4285
  42. /* general EBML types */
  43. #define EBML_ID_VOID 0xEC
  44. /*
  45. * Matroska element IDs. max. 32-bit.
  46. */
  47. /* toplevel segment */
  48. #define MATROSKA_ID_SEGMENT 0x18538067
  49. /* matroska top-level master IDs */
  50. #define MATROSKA_ID_INFO 0x1549A966
  51. #define MATROSKA_ID_TRACKS 0x1654AE6B
  52. #define MATROSKA_ID_CUES 0x1C53BB6B
  53. #define MATROSKA_ID_TAGS 0x1254C367
  54. #define MATROSKA_ID_SEEKHEAD 0x114D9B74
  55. #define MATROSKA_ID_CLUSTER 0x1F43B675
  56. /* IDs in the info master */
  57. #define MATROSKA_ID_TIMECODESCALE 0x2AD7B1
  58. #define MATROSKA_ID_DURATION 0x4489
  59. #define MATROSKA_ID_WRITINGAPP 0x5741
  60. #define MATROSKA_ID_MUXINGAPP 0x4D80
  61. #define MATROSKA_ID_DATEUTC 0x4461
  62. /* ID in the tracks master */
  63. #define MATROSKA_ID_TRACKENTRY 0xAE
  64. /* IDs in the trackentry master */
  65. #define MATROSKA_ID_TRACKNUMBER 0xD7
  66. #define MATROSKA_ID_TRACKUID 0x73C5
  67. #define MATROSKA_ID_TRACKTYPE 0x83
  68. #define MATROSKA_ID_TRACKAUDIO 0xE1
  69. #define MATROSKA_ID_TRACKVIDEO 0xE0
  70. #define MATROSKA_ID_CODECID 0x86
  71. #define MATROSKA_ID_CODECPRIVATE 0x63A2
  72. #define MATROSKA_ID_CODECNAME 0x258688
  73. #define MATROSKA_ID_CODECINFOURL 0x3B4040
  74. #define MATROSKA_ID_CODECDOWNLOADURL 0x26B240
  75. #define MATROSKA_ID_TRACKNAME 0x536E
  76. #define MATROSKA_ID_TRACKLANGUAGE 0x22B59C
  77. #define MATROSKA_ID_TRACKFLAGENABLED 0xB9
  78. #define MATROSKA_ID_TRACKFLAGDEFAULT 0x88
  79. #define MATROSKA_ID_TRACKFLAGLACING 0x9C
  80. #define MATROSKA_ID_TRACKMINCACHE 0x6DE7
  81. #define MATROSKA_ID_TRACKMAXCACHE 0x6DF8
  82. #define MATROSKA_ID_TRACKDEFAULTDURATION 0x23E383
  83. /* IDs in the trackvideo master */
  84. #define MATROSKA_ID_VIDEOFRAMERATE 0x2383E3
  85. #define MATROSKA_ID_VIDEODISPLAYWIDTH 0x54B0
  86. #define MATROSKA_ID_VIDEODISPLAYHEIGHT 0x54BA
  87. #define MATROSKA_ID_VIDEOPIXELWIDTH 0xB0
  88. #define MATROSKA_ID_VIDEOPIXELHEIGHT 0xBA
  89. #define MATROSKA_ID_VIDEOFLAGINTERLACED 0x9A
  90. #define MATROSKA_ID_VIDEOSTEREOMODE 0x53B9
  91. #define MATROSKA_ID_VIDEOASPECTRATIO 0x54B3
  92. #define MATROSKA_ID_VIDEOCOLOURSPACE 0x2EB524
  93. /* IDs in the trackaudio master */
  94. #define MATROSKA_ID_AUDIOSAMPLINGFREQ 0xB5
  95. #define MATROSKA_ID_AUDIOBITDEPTH 0x6264
  96. #define MATROSKA_ID_AUDIOCHANNELS 0x9F
  97. /* ID in the cues master */
  98. #define MATROSKA_ID_POINTENTRY 0xBB
  99. /* IDs in the pointentry master */
  100. #define MATROSKA_ID_CUETIME 0xB3
  101. #define MATROSKA_ID_CUETRACKPOSITION 0xB7
  102. /* IDs in the cuetrackposition master */
  103. #define MATROSKA_ID_CUETRACK 0xF7
  104. #define MATROSKA_ID_CUECLUSTERPOSITION 0xF1
  105. /* IDs in the tags master */
  106. /* TODO */
  107. /* IDs in the seekhead master */
  108. #define MATROSKA_ID_SEEKENTRY 0x4DBB
  109. /* IDs in the seekpoint master */
  110. #define MATROSKA_ID_SEEKID 0x53AB
  111. #define MATROSKA_ID_SEEKPOSITION 0x53AC
  112. /* IDs in the cluster master */
  113. #define MATROSKA_ID_CLUSTERTIMECODE 0xE7
  114. #define MATROSKA_ID_BLOCKGROUP 0xA0
  115. /* IDs in the blockgroup master */
  116. #define MATROSKA_ID_BLOCK 0xA1
  117. #define MATROSKA_ID_BLOCKDURATION 0x9B
  118. #define MATROSKA_ID_BLOCKREFERENCE 0xFB
  119. typedef enum {
  120. MATROSKA_TRACK_TYPE_VIDEO = 0x1,
  121. MATROSKA_TRACK_TYPE_AUDIO = 0x2,
  122. MATROSKA_TRACK_TYPE_COMPLEX = 0x3,
  123. MATROSKA_TRACK_TYPE_LOGO = 0x10,
  124. MATROSKA_TRACK_TYPE_SUBTITLE = 0x11,
  125. MATROSKA_TRACK_TYPE_CONTROL = 0x20,
  126. } MatroskaTrackType;
  127. typedef enum {
  128. MATROSKA_EYE_MODE_MONO = 0x0,
  129. MATROSKA_EYE_MODE_RIGHT = 0x1,
  130. MATROSKA_EYE_MODE_LEFT = 0x2,
  131. MATROSKA_EYE_MODE_BOTH = 0x3,
  132. } MatroskaEyeMode;
  133. typedef enum {
  134. MATROSKA_ASPECT_RATIO_MODE_FREE = 0x0,
  135. MATROSKA_ASPECT_RATIO_MODE_KEEP = 0x1,
  136. MATROSKA_ASPECT_RATIO_MODE_FIXED = 0x2,
  137. } MatroskaAspectRatioMode;
  138. /*
  139. * These aren't in any way "matroska-form" things,
  140. * it's just something I use in the muxer/demuxer.
  141. */
  142. typedef enum {
  143. MATROSKA_TRACK_ENABLED = (1<<0),
  144. MATROSKA_TRACK_DEFAULT = (1<<1),
  145. MATROSKA_TRACK_LACING = (1<<2),
  146. MATROSKA_TRACK_SHIFT = (1<<16)
  147. } MatroskaTrackFlags;
  148. typedef enum {
  149. MATROSKA_VIDEOTRACK_INTERLACED = (MATROSKA_TRACK_SHIFT<<0)
  150. } MatroskaVideoTrackFlags;
  151. /*
  152. * Matroska Codec IDs. Strings.
  153. */
  154. #define MATROSKA_CODEC_ID_VIDEO_VFW_FOURCC "V_MS/VFW/FOURCC"
  155. #define MATROSKA_CODEC_ID_VIDEO_UNCOMPRESSED "V_UNCOMPRESSED"
  156. #define MATROSKA_CODEC_ID_VIDEO_MPEG4_SP "V_MPEG4/ISO/SP"
  157. #define MATROSKA_CODEC_ID_VIDEO_MPEG4_ASP "V_MPEG4/ISO/ASP"
  158. #define MATROSKA_CODEC_ID_VIDEO_MPEG4_AP "V_MPEG4/ISO/AP"
  159. #define MATROSKA_CODEC_ID_VIDEO_MSMPEG4V3 "V_MPEG4/MS/V3"
  160. #define MATROSKA_CODEC_ID_VIDEO_MPEG1 "V_MPEG1"
  161. #define MATROSKA_CODEC_ID_VIDEO_MPEG2 "V_MPEG2"
  162. #define MATROSKA_CODEC_ID_VIDEO_MJPEG "V_MJPEG"
  163. /* TODO: Real/Quicktime */
  164. #define MATROSKA_CODEC_ID_AUDIO_ACM "A_MS/ACM"
  165. #define MATROSKA_CODEC_ID_AUDIO_MPEG1_L1 "A_MPEG/L1"
  166. #define MATROSKA_CODEC_ID_AUDIO_MPEG1_L2 "A_MPEG/L2"
  167. #define MATROSKA_CODEC_ID_AUDIO_MPEG1_L3 "A_MPEG/L3"
  168. #define MATROSKA_CODEC_ID_AUDIO_PCM_INT_BE "A_PCM/INT/BIG"
  169. #define MATROSKA_CODEC_ID_AUDIO_PCM_INT_LE "A_PCM/INT/LIT"
  170. #define MATROSKA_CODEC_ID_AUDIO_PCM_FLOAT "A_PCM/FLOAT/IEEE"
  171. #define MATROSKA_CODEC_ID_AUDIO_AC3 "A_AC3"
  172. #define MATROSKA_CODEC_ID_AUDIO_DTS "A_DTS"
  173. #define MATROSKA_CODEC_ID_AUDIO_VORBIS "A_VORBIS"
  174. #define MATROSKA_CODEC_ID_AUDIO_ACM "A_MS/ACM"
  175. #define MATROSKA_CODEC_ID_AUDIO_MPEG2 "A_AAC/MPEG2/"
  176. #define MATROSKA_CODEC_ID_AUDIO_MPEG4 "A_AAC/MPEG4/"
  177. /* TODO: AC3-9/10 (?), Real, Musepack, Quicktime */
  178. /* max. depth in the EBML tree structure */
  179. #define EBML_MAX_DEPTH 16
  180. typedef struct Track {
  181. MatroskaTrackType type;
  182. /* Unique track number and track ID. stream_index is the index that
  183. * the calling app uses for this track. */
  184. uint32_t num,
  185. uid,
  186. stream_index;
  187. char *name,
  188. *language;
  189. char *codec_id,
  190. *codec_name;
  191. unsigned char *codec_priv;
  192. int codec_priv_size;
  193. int64_t default_duration;
  194. MatroskaTrackFlags flags;
  195. } MatroskaTrack;
  196. typedef struct MatroskaVideoTrack {
  197. MatroskaTrack track;
  198. int pixel_width,
  199. pixel_height,
  200. display_width,
  201. display_height;
  202. uint32_t fourcc;
  203. MatroskaAspectRatioMode ar_mode;
  204. MatroskaEyeMode eye_mode;
  205. //..
  206. } MatroskaVideoTrack;
  207. typedef struct MatroskaAudioTrack {
  208. MatroskaTrack track;
  209. int channels,
  210. bitdepth,
  211. samplerate;
  212. //..
  213. } MatroskaAudioTrack;
  214. typedef struct MatroskaSubtitleTrack {
  215. MatroskaTrack track;
  216. //..
  217. } MatroskaSubtitleTrack;
  218. typedef struct MatroskaLevel {
  219. uint64_t start, length;
  220. } MatroskaLevel;
  221. typedef struct MatroskaDemuxIndex {
  222. uint64_t pos; /* of the corresponding *cluster*! */
  223. uint16_t track; /* reference to 'num' */
  224. uint64_t time; /* in nanoseconds */
  225. } MatroskaDemuxIndex;
  226. typedef struct MatroskaDemuxContext {
  227. AVFormatContext *ctx;
  228. /* ebml stuff */
  229. int num_levels;
  230. MatroskaLevel levels[EBML_MAX_DEPTH];
  231. int level_up;
  232. /* matroska stuff */
  233. char *writing_app,
  234. *muxing_app;
  235. int64_t created;
  236. /* timescale in the file */
  237. int64_t time_scale;
  238. /* length, position (time, ns) */
  239. int64_t duration,
  240. pos;
  241. /* num_streams is the number of streams that av_new_stream() was called
  242. * for ( = that are available to the calling program). */
  243. int num_tracks, num_streams;
  244. MatroskaTrack *tracks[MAX_STREAMS];
  245. /* cache for ID peeking */
  246. uint32_t peek_id;
  247. /* byte position of the segment inside the stream */
  248. offset_t segment_start;
  249. /* The packet queue. */
  250. AVPacket **packets;
  251. int num_packets;
  252. /* have we already parse metadata/cues/clusters? */
  253. int metadata_parsed,
  254. index_parsed,
  255. done;
  256. /* The index for seeking. */
  257. int num_indexes;
  258. MatroskaDemuxIndex *index;
  259. } MatroskaDemuxContext;
  260. /*
  261. * The first few functions handle EBML file parsing. The rest
  262. * is the document interpretation. Matroska really just is a
  263. * EBML file.
  264. */
  265. /*
  266. * Return: the amount of levels in the hierarchy that the
  267. * current element lies higher than the previous one.
  268. * The opposite isn't done - that's auto-done using master
  269. * element reading.
  270. */
  271. static int
  272. ebml_read_element_level_up (MatroskaDemuxContext *matroska)
  273. {
  274. ByteIOContext *pb = &matroska->ctx->pb;
  275. offset_t pos = url_ftell(pb);
  276. int num = 0;
  277. while (matroska->num_levels > 0) {
  278. MatroskaLevel *level = &matroska->levels[matroska->num_levels - 1];
  279. if (pos >= level->start + level->length) {
  280. matroska->num_levels--;
  281. num++;
  282. } else {
  283. break;
  284. }
  285. }
  286. return num;
  287. }
  288. /*
  289. * Read: an "EBML number", which is defined as a variable-length
  290. * array of bytes. The first byte indicates the length by giving a
  291. * number of 0-bits followed by a one. The position of the first
  292. * "one" bit inside the first byte indicates the length of this
  293. * number.
  294. * Returns: num. of bytes read. < 0 on error.
  295. */
  296. static int
  297. ebml_read_num (MatroskaDemuxContext *matroska,
  298. int max_size,
  299. uint64_t *number)
  300. {
  301. ByteIOContext *pb = &matroska->ctx->pb;
  302. int len_mask = 0x80, read = 1, n = 1;
  303. int64_t total = 0;
  304. /* the first byte tells us the length in bytes - get_byte() can normally
  305. * return 0, but since that's not a valid first ebmlID byte, we can
  306. * use it safely here to catch EOS. */
  307. if (!(total = get_byte(pb))) {
  308. /* we might encounter EOS here */
  309. if (!url_feof(pb)) {
  310. offset_t pos = url_ftell(pb);
  311. av_log(matroska->ctx, AV_LOG_ERROR,
  312. "Read error at pos. %llu (0x%llx)\n",
  313. pos, pos);
  314. }
  315. return AVERROR_IO; /* EOS or actual I/O error */
  316. }
  317. /* get the length of the EBML number */
  318. while (read <= max_size && !(total & len_mask)) {
  319. read++;
  320. len_mask >>= 1;
  321. }
  322. if (read > max_size) {
  323. offset_t pos = url_ftell(pb) - 1;
  324. av_log(matroska->ctx, AV_LOG_ERROR,
  325. "Invalid EBML number size tag 0x%02x at pos %llu (0x%llx)\n",
  326. (uint8_t) total, pos, pos);
  327. return AVERROR_INVALIDDATA;
  328. }
  329. /* read out length */
  330. total &= ~len_mask;
  331. while (n++ < read)
  332. total = (total << 8) | get_byte(pb);
  333. *number = total;
  334. return read;
  335. }
  336. /*
  337. * Read: the element content data ID.
  338. * Return: the number of bytes read or < 0 on error.
  339. */
  340. static int
  341. ebml_read_element_id (MatroskaDemuxContext *matroska,
  342. uint32_t *id,
  343. int *level_up)
  344. {
  345. int read;
  346. uint64_t total;
  347. /* if we re-call this, use our cached ID */
  348. if (matroska->peek_id != 0) {
  349. if (level_up)
  350. *level_up = 0;
  351. *id = matroska->peek_id;
  352. return 0;
  353. }
  354. /* read out the "EBML number", include tag in ID */
  355. if ((read = ebml_read_num(matroska, 4, &total)) < 0)
  356. return read;
  357. *id = matroska->peek_id = total | (1 << (read * 7));
  358. /* level tracking */
  359. if (level_up)
  360. *level_up = ebml_read_element_level_up(matroska);
  361. return read;
  362. }
  363. /*
  364. * Read: element content length.
  365. * Return: the number of bytes read or < 0 on error.
  366. */
  367. static int
  368. ebml_read_element_length (MatroskaDemuxContext *matroska,
  369. uint64_t *length)
  370. {
  371. /* clear cache since we're now beyond that data point */
  372. matroska->peek_id = 0;
  373. /* read out the "EBML number", include tag in ID */
  374. return ebml_read_num(matroska, 8, length);
  375. }
  376. /*
  377. * Return: the ID of the next element, or 0 on error.
  378. * Level_up contains the amount of levels that this
  379. * next element lies higher than the previous one.
  380. */
  381. static uint32_t
  382. ebml_peek_id (MatroskaDemuxContext *matroska,
  383. int *level_up)
  384. {
  385. uint32_t id;
  386. assert(level_up != NULL);
  387. if (ebml_read_element_id(matroska, &id, level_up) < 0)
  388. return 0;
  389. return id;
  390. }
  391. /*
  392. * Seek to a given offset.
  393. * 0 is success, -1 is failure.
  394. */
  395. static int
  396. ebml_read_seek (MatroskaDemuxContext *matroska,
  397. offset_t offset)
  398. {
  399. ByteIOContext *pb = &matroska->ctx->pb;
  400. /* clear ID cache, if any */
  401. matroska->peek_id = 0;
  402. return (url_fseek(pb, offset, SEEK_SET) == offset) ? 0 : -1;
  403. }
  404. /*
  405. * Skip the next element.
  406. * 0 is success, -1 is failure.
  407. */
  408. static int
  409. ebml_read_skip (MatroskaDemuxContext *matroska)
  410. {
  411. ByteIOContext *pb = &matroska->ctx->pb;
  412. uint32_t id;
  413. uint64_t length;
  414. int res;
  415. if ((res = ebml_read_element_id(matroska, &id, NULL)) < 0 ||
  416. (res = ebml_read_element_length(matroska, &length)) < 0)
  417. return res;
  418. url_fskip(pb, length);
  419. return 0;
  420. }
  421. /*
  422. * Read the next element as an unsigned int.
  423. * 0 is success, < 0 is failure.
  424. */
  425. static int
  426. ebml_read_uint (MatroskaDemuxContext *matroska,
  427. uint32_t *id,
  428. uint64_t *num)
  429. {
  430. ByteIOContext *pb = &matroska->ctx->pb;
  431. int n = 0, size, res;
  432. uint64_t rlength;
  433. if ((res = ebml_read_element_id(matroska, id, NULL)) < 0 ||
  434. (res = ebml_read_element_length(matroska, &rlength)) < 0)
  435. return res;
  436. size = rlength;
  437. if (size < 1 || size > 8) {
  438. offset_t pos = url_ftell(pb);
  439. av_log(matroska->ctx, AV_LOG_ERROR,
  440. "Invalid uint element size %d at position %lld (0x%llx)\n",
  441. size, pos, pos);
  442. return AVERROR_INVALIDDATA;
  443. }
  444. /* big-endian ordening; build up number */
  445. *num = 0;
  446. while (n++ < size)
  447. *num = (*num << 8) | get_byte(pb);
  448. return 0;
  449. }
  450. /*
  451. * Read the next element as a signed int.
  452. * 0 is success, < 0 is failure.
  453. */
  454. static int
  455. ebml_read_sint (MatroskaDemuxContext *matroska,
  456. uint32_t *id,
  457. int64_t *num)
  458. {
  459. ByteIOContext *pb = &matroska->ctx->pb;
  460. int size, n = 1, negative = 0, res;
  461. uint64_t rlength;
  462. if ((res = ebml_read_element_id(matroska, id, NULL)) < 0 ||
  463. (res = ebml_read_element_length(matroska, &rlength)) < 0)
  464. return res;
  465. size = rlength;
  466. if (size < 1 || size > 8) {
  467. offset_t pos = url_ftell(pb);
  468. av_log(matroska->ctx, AV_LOG_ERROR,
  469. "Invalid sint element size %d at position %lld (0x%llx)\n",
  470. size, pos, pos);
  471. return AVERROR_INVALIDDATA;
  472. }
  473. if ((*num = get_byte(pb)) & 0x80) {
  474. negative = 1;
  475. *num &= ~0x80;
  476. }
  477. *num = 0;
  478. while (n++ < size)
  479. *num = (*num << 8) | get_byte(pb);
  480. /* make signed */
  481. if (negative)
  482. *num = *num - (1LL << ((8 * size) - 1));
  483. return 0;
  484. }
  485. /*
  486. * Read the next element as a float.
  487. * 0 is success, < 0 is failure.
  488. */
  489. static int
  490. ebml_read_float (MatroskaDemuxContext *matroska,
  491. uint32_t *id,
  492. double *num)
  493. {
  494. ByteIOContext *pb = &matroska->ctx->pb;
  495. int size, res;
  496. uint64_t rlength;
  497. if ((res = ebml_read_element_id(matroska, id, NULL)) < 0 ||
  498. (res = ebml_read_element_length(matroska, &rlength)) < 0)
  499. return res;
  500. size = rlength;
  501. if (size != 4 && size != 8 && size != 10) {
  502. offset_t pos = url_ftell(pb);
  503. av_log(matroska->ctx, AV_LOG_ERROR,
  504. "Invalid float element size %d at position %llu (0x%llx)\n",
  505. size, pos, pos);
  506. return AVERROR_INVALIDDATA;
  507. }
  508. if (size == 10) {
  509. av_log(matroska->ctx, AV_LOG_ERROR,
  510. "FIXME! 10-byte floats unimplemented\n");
  511. return AVERROR_UNKNOWN;
  512. }
  513. if (size == 4) {
  514. float f;
  515. while (size-- > 0)
  516. #ifdef WORDS_BIGENDIAN
  517. ((uint8_t *) &f)[3 - size] = get_byte(pb);
  518. #else
  519. ((uint8_t *) &f)[size] = get_byte(pb);
  520. #endif
  521. *num = f;
  522. } else {
  523. double d;
  524. while (size-- > 0)
  525. #ifdef WORDS_BIGENDIAN
  526. ((uint8_t *) &d)[7 - size] = get_byte(pb);
  527. #else
  528. ((uint8_t *) &d)[size] = get_byte(pb);
  529. #endif
  530. *num = d;
  531. }
  532. return 0;
  533. }
  534. /*
  535. * Read the next element as an ASCII string.
  536. * 0 is success, < 0 is failure.
  537. */
  538. static int
  539. ebml_read_ascii (MatroskaDemuxContext *matroska,
  540. uint32_t *id,
  541. char **str)
  542. {
  543. ByteIOContext *pb = &matroska->ctx->pb;
  544. int size, res;
  545. uint64_t rlength;
  546. if ((res = ebml_read_element_id(matroska, id, NULL)) < 0 ||
  547. (res = ebml_read_element_length(matroska, &rlength)) < 0)
  548. return res;
  549. size = rlength;
  550. /* ebml strings are usually not 0-terminated, so we allocate one
  551. * byte more, read the string and NULL-terminate it ourselves. */
  552. if (!(*str = av_malloc(size + 1))) {
  553. av_log(matroska->ctx, AV_LOG_ERROR, "Memory allocation failed\n");
  554. return AVERROR_NOMEM;
  555. }
  556. if (get_buffer(pb, (uint8_t *) *str, size) != size) {
  557. offset_t pos = url_ftell(pb);
  558. av_log(matroska->ctx, AV_LOG_ERROR,
  559. "Read error at pos. %llu (0x%llx)\n", pos, pos);
  560. return AVERROR_IO;
  561. }
  562. (*str)[size] = '\0';
  563. return 0;
  564. }
  565. /*
  566. * Read the next element as a UTF-8 string.
  567. * 0 is success, < 0 is failure.
  568. */
  569. static int
  570. ebml_read_utf8 (MatroskaDemuxContext *matroska,
  571. uint32_t *id,
  572. char **str)
  573. {
  574. return ebml_read_ascii(matroska, id, str);
  575. }
  576. /*
  577. * Read the next element as a date (nanoseconds since 1/1/2000).
  578. * 0 is success, < 0 is failure.
  579. */
  580. static int
  581. ebml_read_date (MatroskaDemuxContext *matroska,
  582. uint32_t *id,
  583. int64_t *date)
  584. {
  585. return ebml_read_sint(matroska, id, date);
  586. }
  587. /*
  588. * Read the next element, but only the header. The contents
  589. * are supposed to be sub-elements which can be read separately.
  590. * 0 is success, < 0 is failure.
  591. */
  592. static int
  593. ebml_read_master (MatroskaDemuxContext *matroska,
  594. uint32_t *id)
  595. {
  596. ByteIOContext *pb = &matroska->ctx->pb;
  597. uint64_t length;
  598. MatroskaLevel *level;
  599. int res;
  600. if ((res = ebml_read_element_id(matroska, id, NULL)) < 0 ||
  601. (res = ebml_read_element_length(matroska, &length)) < 0)
  602. return res;
  603. /* protect... (Heaven forbids that the '>' is true) */
  604. if (matroska->num_levels >= EBML_MAX_DEPTH) {
  605. av_log(matroska->ctx, AV_LOG_ERROR,
  606. "File moves beyond max. allowed depth (%d)\n", EBML_MAX_DEPTH);
  607. return AVERROR_NOTSUPP;
  608. }
  609. /* remember level */
  610. level = &matroska->levels[matroska->num_levels++];
  611. level->start = url_ftell(pb);
  612. level->length = length;
  613. return 0;
  614. }
  615. /*
  616. * Read the next element as binary data.
  617. * 0 is success, < 0 is failure.
  618. */
  619. static int
  620. ebml_read_binary (MatroskaDemuxContext *matroska,
  621. uint32_t *id,
  622. uint8_t **binary,
  623. int *size)
  624. {
  625. ByteIOContext *pb = &matroska->ctx->pb;
  626. uint64_t rlength;
  627. int res;
  628. if ((res = ebml_read_element_id(matroska, id, NULL)) < 0 ||
  629. (res = ebml_read_element_length(matroska, &rlength)) < 0)
  630. return res;
  631. *size = rlength;
  632. if (!(*binary = av_malloc(*size))) {
  633. av_log(matroska->ctx, AV_LOG_ERROR,
  634. "Memory allocation error\n");
  635. return AVERROR_NOMEM;
  636. }
  637. if (get_buffer(pb, *binary, *size) != *size) {
  638. offset_t pos = url_ftell(pb);
  639. av_log(matroska->ctx, AV_LOG_ERROR,
  640. "Read error at pos. %llu (0x%llx)\n", pos, pos);
  641. return AVERROR_IO;
  642. }
  643. return 0;
  644. }
  645. /*
  646. * Read signed/unsigned "EBML" numbers.
  647. * Return: number of bytes processed, < 0 on error.
  648. * XXX: use ebml_read_num().
  649. */
  650. static int
  651. matroska_ebmlnum_uint (uint8_t *data,
  652. uint32_t size,
  653. uint64_t *num)
  654. {
  655. int len_mask = 0x80, read = 1, n = 1, num_ffs = 0;
  656. uint64_t total;
  657. if (size <= 0)
  658. return AVERROR_INVALIDDATA;
  659. total = data[0];
  660. while (read <= 8 && !(total & len_mask)) {
  661. read++;
  662. len_mask >>= 1;
  663. }
  664. if (read > 8)
  665. return AVERROR_INVALIDDATA;
  666. if ((total &= (len_mask - 1)) == len_mask - 1)
  667. num_ffs++;
  668. if (size < read)
  669. return AVERROR_INVALIDDATA;
  670. while (n < read) {
  671. if (data[n] == 0xff)
  672. num_ffs++;
  673. total = (total << 8) | data[n];
  674. n++;
  675. }
  676. if (!total)
  677. return AVERROR_INVALIDDATA;
  678. if (read == num_ffs)
  679. *num = (uint64_t)-1;
  680. else
  681. *num = total;
  682. return read;
  683. }
  684. /*
  685. * Same as above, but signed.
  686. */
  687. static int
  688. matroska_ebmlnum_sint (uint8_t *data,
  689. uint32_t size,
  690. int64_t *num)
  691. {
  692. uint64_t unum;
  693. int res;
  694. /* read as unsigned number first */
  695. if ((res = matroska_ebmlnum_uint(data, size, &unum)) < 0)
  696. return res;
  697. /* make signed (weird way) */
  698. if (unum == (uint64_t)-1)
  699. *num = INT64_MAX;
  700. else
  701. *num = unum - ((1LL << ((7 * res) - 1)) - 1);
  702. return res;
  703. }
  704. /*
  705. * Read an EBML header.
  706. * 0 is success, < 0 is failure.
  707. */
  708. static int
  709. ebml_read_header (MatroskaDemuxContext *matroska,
  710. char **doctype,
  711. int *version)
  712. {
  713. uint32_t id;
  714. int level_up, res = 0;
  715. /* default init */
  716. if (doctype)
  717. *doctype = NULL;
  718. if (version)
  719. *version = 1;
  720. if (!(id = ebml_peek_id(matroska, &level_up)) ||
  721. level_up != 0 || id != EBML_ID_HEADER) {
  722. av_log(matroska->ctx, AV_LOG_ERROR,
  723. "This is not an EBML file (id=0x%x/0x%x)\n", id, EBML_ID_HEADER);
  724. return AVERROR_INVALIDDATA;
  725. }
  726. if ((res = ebml_read_master(matroska, &id)) < 0)
  727. return res;
  728. while (res == 0) {
  729. if (!(id = ebml_peek_id(matroska, &level_up)))
  730. return AVERROR_IO;
  731. /* end-of-header */
  732. if (level_up)
  733. break;
  734. switch (id) {
  735. /* is our read version uptodate? */
  736. case EBML_ID_EBMLREADVERSION: {
  737. uint64_t num;
  738. if ((res = ebml_read_uint(matroska, &id, &num)) < 0)
  739. return res;
  740. if (num > EBML_VERSION) {
  741. av_log(matroska->ctx, AV_LOG_ERROR,
  742. "EBML version %llu (> %d) is not supported\n",
  743. num, EBML_VERSION);
  744. return AVERROR_INVALIDDATA;
  745. }
  746. break;
  747. }
  748. /* we only handle 8 byte lengths at max */
  749. case EBML_ID_EBMLMAXSIZELENGTH: {
  750. uint64_t num;
  751. if ((res = ebml_read_uint(matroska, &id, &num)) < 0)
  752. return res;
  753. if (num > sizeof(uint64_t)) {
  754. av_log(matroska->ctx, AV_LOG_ERROR,
  755. "Integers of size %llu (> %d) not supported\n",
  756. num, sizeof(uint64_t));
  757. return AVERROR_INVALIDDATA;
  758. }
  759. break;
  760. }
  761. /* we handle 4 byte IDs at max */
  762. case EBML_ID_EBMLMAXIDLENGTH: {
  763. uint64_t num;
  764. if ((res = ebml_read_uint(matroska, &id, &num)) < 0)
  765. return res;
  766. if (num > sizeof(uint32_t)) {
  767. av_log(matroska->ctx, AV_LOG_ERROR,
  768. "IDs of size %llu (> %u) not supported\n",
  769. num, sizeof(uint32_t));
  770. return AVERROR_INVALIDDATA;
  771. }
  772. break;
  773. }
  774. case EBML_ID_DOCTYPE: {
  775. char *text;
  776. if ((res = ebml_read_ascii(matroska, &id, &text)) < 0)
  777. return res;
  778. if (doctype) {
  779. if (*doctype)
  780. av_free(*doctype);
  781. *doctype = text;
  782. } else
  783. av_free(text);
  784. break;
  785. }
  786. case EBML_ID_DOCTYPEREADVERSION: {
  787. uint64_t num;
  788. if ((res = ebml_read_uint(matroska, &id, &num)) < 0)
  789. return res;
  790. if (version)
  791. *version = num;
  792. break;
  793. }
  794. default:
  795. av_log(matroska->ctx, AV_LOG_INFO,
  796. "Unknown data type 0x%x in EBML header", id);
  797. /* pass-through */
  798. case EBML_ID_VOID:
  799. /* we ignore these two, as they don't tell us anything we
  800. * care about */
  801. case EBML_ID_EBMLVERSION:
  802. case EBML_ID_DOCTYPEVERSION:
  803. res = ebml_read_skip (matroska);
  804. break;
  805. }
  806. }
  807. return 0;
  808. }
  809. /*
  810. * Put one packet in an application-supplied AVPacket struct.
  811. * Returns 0 on success or -1 on failure.
  812. */
  813. static int
  814. matroska_deliver_packet (MatroskaDemuxContext *matroska,
  815. AVPacket *pkt)
  816. {
  817. if (matroska->num_packets > 0) {
  818. memcpy(pkt, matroska->packets[0], sizeof(AVPacket));
  819. av_free(matroska->packets[0]);
  820. if (matroska->num_packets > 1) {
  821. memmove(&matroska->packets[0], &matroska->packets[1],
  822. (matroska->num_packets - 1) * sizeof(AVPacket *));
  823. matroska->packets =
  824. av_realloc(matroska->packets, (matroska->num_packets - 1) *
  825. sizeof(AVPacket *));
  826. } else {
  827. av_free(matroska->packets);
  828. matroska->packets = NULL;
  829. }
  830. matroska->num_packets--;
  831. return 0;
  832. }
  833. return -1;
  834. }
  835. /*
  836. * Put a packet into our internal queue. Will be delivered to the
  837. * user/application during the next get_packet() call.
  838. */
  839. static void
  840. matroska_queue_packet (MatroskaDemuxContext *matroska,
  841. AVPacket *pkt)
  842. {
  843. matroska->packets =
  844. av_realloc(matroska->packets, (matroska->num_packets + 1) *
  845. sizeof(AVPacket *));
  846. matroska->packets[matroska->num_packets] = pkt;
  847. matroska->num_packets++;
  848. }
  849. /*
  850. * Autodetecting...
  851. */
  852. static int
  853. matroska_probe (AVProbeData *p)
  854. {
  855. uint64_t total = 0;
  856. int len_mask = 0x80, size = 1, n = 1;
  857. uint8_t probe_data[] = { 'm', 'a', 't', 'r', 'o', 's', 'k', 'a' };
  858. if (p->buf_size < 5)
  859. return 0;
  860. /* ebml header? */
  861. if ((p->buf[0] << 24 | p->buf[1] << 16 |
  862. p->buf[2] << 8 | p->buf[3]) != EBML_ID_HEADER)
  863. return 0;
  864. /* length of header */
  865. total = p->buf[4];
  866. while (size <= 8 && !(total & len_mask)) {
  867. size++;
  868. len_mask >>= 1;
  869. }
  870. if (size > 8)
  871. return 0;
  872. total &= (len_mask - 1);
  873. while (n < size)
  874. total = (total << 8) | p->buf[4 + n++];
  875. /* does the probe data contain the whole header? */
  876. if (p->buf_size < 4 + size + total)
  877. return 0;
  878. /* the header must contain the document type 'matroska'. For now,
  879. * we don't parse the whole header but simply check for the
  880. * availability of that array of characters inside the header.
  881. * Not fully fool-proof, but good enough. */
  882. for (n = 4 + size; n < 4 + size + total - sizeof(probe_data); n++)
  883. if (!memcmp (&p->buf[n], probe_data, sizeof(probe_data)))
  884. return AVPROBE_SCORE_MAX;
  885. return 0;
  886. }
  887. /*
  888. * From here on, it's all XML-style DTD stuff... Needs no comments.
  889. */
  890. static int
  891. matroska_parse_info (MatroskaDemuxContext *matroska)
  892. {
  893. int res = 0;
  894. uint32_t id;
  895. av_log(matroska->ctx, AV_LOG_DEBUG, "Parsing info...\n");
  896. while (res == 0) {
  897. if (!(id = ebml_peek_id(matroska, &matroska->level_up))) {
  898. res = AVERROR_IO;
  899. break;
  900. } else if (matroska->level_up) {
  901. matroska->level_up--;
  902. break;
  903. }
  904. switch (id) {
  905. /* cluster timecode */
  906. case MATROSKA_ID_TIMECODESCALE: {
  907. uint64_t num;
  908. if ((res = ebml_read_uint(matroska, &id, &num)) < 0)
  909. break;
  910. matroska->time_scale = num;
  911. break;
  912. }
  913. case MATROSKA_ID_DURATION: {
  914. double num;
  915. if ((res = ebml_read_float(matroska, &id, &num)) < 0)
  916. break;
  917. matroska->duration = num * matroska->time_scale;
  918. break;
  919. }
  920. case MATROSKA_ID_WRITINGAPP: {
  921. char *text;
  922. if ((res = ebml_read_utf8(matroska, &id, &text)) < 0)
  923. break;
  924. matroska->writing_app = text;
  925. break;
  926. }
  927. case MATROSKA_ID_MUXINGAPP: {
  928. char *text;
  929. if ((res = ebml_read_utf8(matroska, &id, &text)) < 0)
  930. break;
  931. matroska->muxing_app = text;
  932. break;
  933. }
  934. case MATROSKA_ID_DATEUTC: {
  935. int64_t time;
  936. if ((res = ebml_read_date(matroska, &id, &time)) < 0)
  937. break;
  938. matroska->created = time;
  939. break;
  940. }
  941. default:
  942. av_log(matroska->ctx, AV_LOG_INFO,
  943. "Unknown entry 0x%x in info header\n", id);
  944. /* fall-through */
  945. case EBML_ID_VOID:
  946. res = ebml_read_skip(matroska);
  947. break;
  948. }
  949. if (matroska->level_up) {
  950. matroska->level_up--;
  951. break;
  952. }
  953. }
  954. return res;
  955. }
  956. static int
  957. matroska_add_stream (MatroskaDemuxContext *matroska)
  958. {
  959. int res = 0;
  960. uint32_t id;
  961. MatroskaTrack *track;
  962. av_log(matroska->ctx, AV_LOG_DEBUG, "parsing track, adding stream..,\n");
  963. /* Allocate a generic track. As soon as we know its type we'll realloc. */
  964. track = av_mallocz(sizeof(MatroskaTrack));
  965. matroska->num_tracks++;
  966. /* start with the master */
  967. if ((res = ebml_read_master(matroska, &id)) < 0)
  968. return res;
  969. /* try reading the trackentry headers */
  970. while (res == 0) {
  971. if (!(id = ebml_peek_id(matroska, &matroska->level_up))) {
  972. res = AVERROR_IO;
  973. break;
  974. } else if (matroska->level_up > 0) {
  975. matroska->level_up--;
  976. break;
  977. }
  978. switch (id) {
  979. /* track number (unique stream ID) */
  980. case MATROSKA_ID_TRACKNUMBER: {
  981. uint64_t num;
  982. if ((res = ebml_read_uint(matroska, &id, &num)) < 0)
  983. break;
  984. track->num = num;
  985. break;
  986. }
  987. /* track UID (unique identifier) */
  988. case MATROSKA_ID_TRACKUID: {
  989. uint64_t num;
  990. if ((res = ebml_read_uint(matroska, &id, &num)) < 0)
  991. break;
  992. track->uid = num;
  993. break;
  994. }
  995. /* track type (video, audio, combined, subtitle, etc.) */
  996. case MATROSKA_ID_TRACKTYPE: {
  997. uint64_t num;
  998. if (track->type != 0) {
  999. av_log(matroska->ctx, AV_LOG_INFO,
  1000. "More than one tracktype in an entry - skip\n");
  1001. break;
  1002. }
  1003. if ((res = ebml_read_uint(matroska, &id, &num)) < 0)
  1004. break;
  1005. track->type = num;
  1006. /* ok, so we're actually going to reallocate this thing */
  1007. switch (track->type) {
  1008. case MATROSKA_TRACK_TYPE_VIDEO:
  1009. track = (MatroskaTrack *)
  1010. av_realloc(track, sizeof(MatroskaVideoTrack));
  1011. break;
  1012. case MATROSKA_TRACK_TYPE_AUDIO:
  1013. track = (MatroskaTrack *)
  1014. av_realloc(track, sizeof(MatroskaAudioTrack));
  1015. ((MatroskaAudioTrack *)track)->channels = 1;
  1016. ((MatroskaAudioTrack *)track)->samplerate = 8000;
  1017. break;
  1018. case MATROSKA_TRACK_TYPE_SUBTITLE:
  1019. track = (MatroskaTrack *)
  1020. av_realloc(track, sizeof(MatroskaSubtitleTrack));
  1021. break;
  1022. case MATROSKA_TRACK_TYPE_COMPLEX:
  1023. case MATROSKA_TRACK_TYPE_LOGO:
  1024. case MATROSKA_TRACK_TYPE_CONTROL:
  1025. default:
  1026. av_log(matroska->ctx, AV_LOG_INFO,
  1027. "Unknown or unsupported track type 0x%x\n",
  1028. track->type);
  1029. track->type = 0;
  1030. break;
  1031. }
  1032. matroska->tracks[matroska->num_tracks - 1] = track;
  1033. break;
  1034. }
  1035. /* tracktype specific stuff for video */
  1036. case MATROSKA_ID_TRACKVIDEO: {
  1037. MatroskaVideoTrack *videotrack;
  1038. if (track->type != MATROSKA_TRACK_TYPE_VIDEO) {
  1039. av_log(matroska->ctx, AV_LOG_INFO,
  1040. "video data in non-video track - ignoring\n");
  1041. res = AVERROR_INVALIDDATA;
  1042. break;
  1043. } else if ((res = ebml_read_master(matroska, &id)) < 0)
  1044. break;
  1045. videotrack = (MatroskaVideoTrack *)track;
  1046. while (res == 0) {
  1047. if (!(id = ebml_peek_id(matroska, &matroska->level_up))) {
  1048. res = AVERROR_IO;
  1049. break;
  1050. } else if (matroska->level_up > 0) {
  1051. matroska->level_up--;
  1052. break;
  1053. }
  1054. switch (id) {
  1055. /* fixme, this should be one-up, but I get it here */
  1056. case MATROSKA_ID_TRACKDEFAULTDURATION: {
  1057. uint64_t num;
  1058. if ((res = ebml_read_uint (matroska, &id,
  1059. &num)) < 0)
  1060. break;
  1061. track->default_duration = num;
  1062. break;
  1063. }
  1064. /* video framerate */
  1065. case MATROSKA_ID_VIDEOFRAMERATE: {
  1066. double num;
  1067. if ((res = ebml_read_float(matroska, &id,
  1068. &num)) < 0)
  1069. break;
  1070. track->default_duration = 1000000000 * (1. / num);
  1071. break;
  1072. }
  1073. /* width of the size to display the video at */
  1074. case MATROSKA_ID_VIDEODISPLAYWIDTH: {
  1075. uint64_t num;
  1076. if ((res = ebml_read_uint(matroska, &id,
  1077. &num)) < 0)
  1078. break;
  1079. videotrack->display_width = num;
  1080. break;
  1081. }
  1082. /* height of the size to display the video at */
  1083. case MATROSKA_ID_VIDEODISPLAYHEIGHT: {
  1084. uint64_t num;
  1085. if ((res = ebml_read_uint(matroska, &id,
  1086. &num)) < 0)
  1087. break;
  1088. videotrack->display_height = num;
  1089. break;
  1090. }
  1091. /* width of the video in the file */
  1092. case MATROSKA_ID_VIDEOPIXELWIDTH: {
  1093. uint64_t num;
  1094. if ((res = ebml_read_uint(matroska, &id,
  1095. &num)) < 0)
  1096. break;
  1097. videotrack->pixel_width = num;
  1098. break;
  1099. }
  1100. /* height of the video in the file */
  1101. case MATROSKA_ID_VIDEOPIXELHEIGHT: {
  1102. uint64_t num;
  1103. if ((res = ebml_read_uint(matroska, &id,
  1104. &num)) < 0)
  1105. break;
  1106. videotrack->pixel_height = num;
  1107. break;
  1108. }
  1109. /* whether the video is interlaced */
  1110. case MATROSKA_ID_VIDEOFLAGINTERLACED: {
  1111. uint64_t num;
  1112. if ((res = ebml_read_uint(matroska, &id,
  1113. &num)) < 0)
  1114. break;
  1115. if (num)
  1116. track->flags |=
  1117. MATROSKA_VIDEOTRACK_INTERLACED;
  1118. else
  1119. track->flags &=
  1120. ~MATROSKA_VIDEOTRACK_INTERLACED;
  1121. break;
  1122. }
  1123. /* stereo mode (whether the video has two streams,
  1124. * where one is for the left eye and the other for
  1125. * the right eye, which creates a 3D-like
  1126. * effect) */
  1127. case MATROSKA_ID_VIDEOSTEREOMODE: {
  1128. uint64_t num;
  1129. if ((res = ebml_read_uint(matroska, &id,
  1130. &num)) < 0)
  1131. break;
  1132. if (num != MATROSKA_EYE_MODE_MONO &&
  1133. num != MATROSKA_EYE_MODE_LEFT &&
  1134. num != MATROSKA_EYE_MODE_RIGHT &&
  1135. num != MATROSKA_EYE_MODE_BOTH) {
  1136. av_log(matroska->ctx, AV_LOG_INFO,
  1137. "Ignoring unknown eye mode 0x%x\n",
  1138. (uint32_t) num);
  1139. break;
  1140. }
  1141. videotrack->eye_mode = num;
  1142. break;
  1143. }
  1144. /* aspect ratio behaviour */
  1145. case MATROSKA_ID_VIDEOASPECTRATIO: {
  1146. uint64_t num;
  1147. if ((res = ebml_read_uint(matroska, &id,
  1148. &num)) < 0)
  1149. break;
  1150. if (num != MATROSKA_ASPECT_RATIO_MODE_FREE &&
  1151. num != MATROSKA_ASPECT_RATIO_MODE_KEEP &&
  1152. num != MATROSKA_ASPECT_RATIO_MODE_FIXED) {
  1153. av_log(matroska->ctx, AV_LOG_INFO,
  1154. "Ignoring unknown aspect ratio 0x%x\n",
  1155. (uint32_t) num);
  1156. break;
  1157. }
  1158. videotrack->ar_mode = num;
  1159. break;
  1160. }
  1161. /* colourspace (only matters for raw video)
  1162. * fourcc */
  1163. case MATROSKA_ID_VIDEOCOLOURSPACE: {
  1164. uint64_t num;
  1165. if ((res = ebml_read_uint(matroska, &id,
  1166. &num)) < 0)
  1167. break;
  1168. videotrack->fourcc = num;
  1169. break;
  1170. }
  1171. default:
  1172. av_log(matroska->ctx, AV_LOG_INFO,
  1173. "Unknown video track header entry "
  1174. "0x%x - ignoring\n", id);
  1175. /* pass-through */
  1176. case EBML_ID_VOID:
  1177. res = ebml_read_skip(matroska);
  1178. break;
  1179. }
  1180. if (matroska->level_up) {
  1181. matroska->level_up--;
  1182. break;
  1183. }
  1184. }
  1185. break;
  1186. }
  1187. /* tracktype specific stuff for audio */
  1188. case MATROSKA_ID_TRACKAUDIO: {
  1189. MatroskaAudioTrack *audiotrack;
  1190. if (track->type != MATROSKA_TRACK_TYPE_AUDIO) {
  1191. av_log(matroska->ctx, AV_LOG_INFO,
  1192. "audio data in non-audio track - ignoring\n");
  1193. res = AVERROR_INVALIDDATA;
  1194. break;
  1195. } else if ((res = ebml_read_master(matroska, &id)) < 0)
  1196. break;
  1197. audiotrack = (MatroskaAudioTrack *)track;
  1198. while (res == 0) {
  1199. if (!(id = ebml_peek_id(matroska, &matroska->level_up))) {
  1200. res = AVERROR_IO;
  1201. break;
  1202. } else if (matroska->level_up > 0) {
  1203. matroska->level_up--;
  1204. break;
  1205. }
  1206. switch (id) {
  1207. /* samplerate */
  1208. case MATROSKA_ID_AUDIOSAMPLINGFREQ: {
  1209. double num;
  1210. if ((res = ebml_read_float(matroska, &id,
  1211. &num)) < 0)
  1212. break;
  1213. audiotrack->samplerate = num;
  1214. break;
  1215. }
  1216. /* bitdepth */
  1217. case MATROSKA_ID_AUDIOBITDEPTH: {
  1218. uint64_t num;
  1219. if ((res = ebml_read_uint(matroska, &id,
  1220. &num)) < 0)
  1221. break;
  1222. audiotrack->bitdepth = num;
  1223. break;
  1224. }
  1225. /* channels */
  1226. case MATROSKA_ID_AUDIOCHANNELS: {
  1227. uint64_t num;
  1228. if ((res = ebml_read_uint(matroska, &id,
  1229. &num)) < 0)
  1230. break;
  1231. audiotrack->channels = num;
  1232. break;
  1233. }
  1234. default:
  1235. av_log(matroska->ctx, AV_LOG_INFO,
  1236. "Unknown audio track header entry "
  1237. "0x%x - ignoring\n", id);
  1238. /* pass-through */
  1239. case EBML_ID_VOID:
  1240. res = ebml_read_skip(matroska);
  1241. break;
  1242. }
  1243. if (matroska->level_up) {
  1244. matroska->level_up--;
  1245. break;
  1246. }
  1247. }
  1248. break;
  1249. }
  1250. /* codec identifier */
  1251. case MATROSKA_ID_CODECID: {
  1252. char *text;
  1253. if ((res = ebml_read_ascii(matroska, &id, &text)) < 0)
  1254. break;
  1255. track->codec_id = text;
  1256. break;
  1257. }
  1258. /* codec private data */
  1259. case MATROSKA_ID_CODECPRIVATE: {
  1260. uint8_t *data;
  1261. int size;
  1262. if ((res = ebml_read_binary(matroska, &id, &data, &size) < 0))
  1263. break;
  1264. track->codec_priv = data;
  1265. track->codec_priv_size = size;
  1266. break;
  1267. }
  1268. /* name of the codec */
  1269. case MATROSKA_ID_CODECNAME: {
  1270. char *text;
  1271. if ((res = ebml_read_utf8(matroska, &id, &text)) < 0)
  1272. break;
  1273. track->codec_name = text;
  1274. break;
  1275. }
  1276. /* name of this track */
  1277. case MATROSKA_ID_TRACKNAME: {
  1278. char *text;
  1279. if ((res = ebml_read_utf8(matroska, &id, &text)) < 0)
  1280. break;
  1281. track->name = text;
  1282. break;
  1283. }
  1284. /* language (matters for audio/subtitles, mostly) */
  1285. case MATROSKA_ID_TRACKLANGUAGE: {
  1286. char *text;
  1287. if ((res = ebml_read_utf8(matroska, &id, &text)) < 0)
  1288. break;
  1289. track->language = text;
  1290. break;
  1291. }
  1292. /* whether this is actually used */
  1293. case MATROSKA_ID_TRACKFLAGENABLED: {
  1294. uint64_t num;
  1295. if ((res = ebml_read_uint(matroska, &id, &num)) < 0)
  1296. break;
  1297. if (num)
  1298. track->flags |= MATROSKA_TRACK_ENABLED;
  1299. else
  1300. track->flags &= ~MATROSKA_TRACK_ENABLED;
  1301. break;
  1302. }
  1303. /* whether it's the default for this track type */
  1304. case MATROSKA_ID_TRACKFLAGDEFAULT: {
  1305. uint64_t num;
  1306. if ((res = ebml_read_uint(matroska, &id, &num)) < 0)
  1307. break;
  1308. if (num)
  1309. track->flags |= MATROSKA_TRACK_DEFAULT;
  1310. else
  1311. track->flags &= ~MATROSKA_TRACK_DEFAULT;
  1312. break;
  1313. }
  1314. /* lacing (like MPEG, where blocks don't end/start on frame
  1315. * boundaries) */
  1316. case MATROSKA_ID_TRACKFLAGLACING: {
  1317. uint64_t num;
  1318. if ((res = ebml_read_uint(matroska, &id, &num)) < 0)
  1319. break;
  1320. if (num)
  1321. track->flags |= MATROSKA_TRACK_LACING;
  1322. else
  1323. track->flags &= ~MATROSKA_TRACK_LACING;
  1324. break;
  1325. }
  1326. /* default length (in time) of one data block in this track */
  1327. case MATROSKA_ID_TRACKDEFAULTDURATION: {
  1328. uint64_t num;
  1329. if ((res = ebml_read_uint(matroska, &id, &num)) < 0)
  1330. break;
  1331. track->default_duration = num;
  1332. break;
  1333. }
  1334. default:
  1335. av_log(matroska->ctx, AV_LOG_INFO,
  1336. "Unknown track header entry 0x%x - ignoring\n", id);
  1337. /* pass-through */
  1338. case EBML_ID_VOID:
  1339. /* we ignore these because they're nothing useful. */
  1340. case MATROSKA_ID_CODECINFOURL:
  1341. case MATROSKA_ID_CODECDOWNLOADURL:
  1342. case MATROSKA_ID_TRACKMINCACHE:
  1343. case MATROSKA_ID_TRACKMAXCACHE:
  1344. res = ebml_read_skip(matroska);
  1345. break;
  1346. }
  1347. if (matroska->level_up) {
  1348. matroska->level_up--;
  1349. break;
  1350. }
  1351. }
  1352. return res;
  1353. }
  1354. static int
  1355. matroska_parse_tracks (MatroskaDemuxContext *matroska)
  1356. {
  1357. int res = 0;
  1358. uint32_t id;
  1359. av_log(matroska->ctx, AV_LOG_DEBUG, "parsing tracks...\n");
  1360. while (res == 0) {
  1361. if (!(id = ebml_peek_id(matroska, &matroska->level_up))) {
  1362. res = AVERROR_IO;
  1363. break;
  1364. } else if (matroska->level_up) {
  1365. matroska->level_up--;
  1366. break;
  1367. }
  1368. switch (id) {
  1369. /* one track within the "all-tracks" header */
  1370. case MATROSKA_ID_TRACKENTRY:
  1371. res = matroska_add_stream(matroska);
  1372. break;
  1373. default:
  1374. av_log(matroska->ctx, AV_LOG_INFO,
  1375. "Unknown entry 0x%x in track header\n", id);
  1376. /* fall-through */
  1377. case EBML_ID_VOID:
  1378. res = ebml_read_skip(matroska);
  1379. break;
  1380. }
  1381. if (matroska->level_up) {
  1382. matroska->level_up--;
  1383. break;
  1384. }
  1385. }
  1386. return res;
  1387. }
  1388. static int
  1389. matroska_parse_index (MatroskaDemuxContext *matroska)
  1390. {
  1391. int res = 0;
  1392. uint32_t id;
  1393. MatroskaDemuxIndex idx;
  1394. av_log(matroska->ctx, AV_LOG_DEBUG, "parsing index...\n");
  1395. while (res == 0) {
  1396. if (!(id = ebml_peek_id(matroska, &matroska->level_up))) {
  1397. res = AVERROR_IO;
  1398. break;
  1399. } else if (matroska->level_up) {
  1400. matroska->level_up--;
  1401. break;
  1402. }
  1403. switch (id) {
  1404. /* one single index entry ('point') */
  1405. case MATROSKA_ID_POINTENTRY:
  1406. if ((res = ebml_read_master(matroska, &id)) < 0)
  1407. break;
  1408. /* in the end, we hope to fill one entry with a
  1409. * timestamp, a file position and a tracknum */
  1410. idx.pos = (uint64_t) -1;
  1411. idx.time = (uint64_t) -1;
  1412. idx.track = (uint16_t) -1;
  1413. while (res == 0) {
  1414. if (!(id = ebml_peek_id(matroska, &matroska->level_up))) {
  1415. res = AVERROR_IO;
  1416. break;
  1417. } else if (matroska->level_up) {
  1418. matroska->level_up--;
  1419. break;
  1420. }
  1421. switch (id) {
  1422. /* one single index entry ('point') */
  1423. case MATROSKA_ID_CUETIME: {
  1424. int64_t time;
  1425. if ((res = ebml_read_uint(matroska, &id,
  1426. &time)) < 0)
  1427. break;
  1428. idx.time = time * matroska->time_scale;
  1429. break;
  1430. }
  1431. /* position in the file + track to which it
  1432. * belongs */
  1433. case MATROSKA_ID_CUETRACKPOSITION:
  1434. if ((res = ebml_read_master(matroska, &id)) < 0)
  1435. break;
  1436. while (res == 0) {
  1437. if (!(id = ebml_peek_id (matroska,
  1438. &matroska->level_up))) {
  1439. res = AVERROR_IO;
  1440. break;
  1441. } else if (matroska->level_up) {
  1442. matroska->level_up--;
  1443. break;
  1444. }
  1445. switch (id) {
  1446. /* track number */
  1447. case MATROSKA_ID_CUETRACK: {
  1448. uint64_t num;
  1449. if ((res = ebml_read_uint(matroska,
  1450. &id, &num)) < 0)
  1451. break;
  1452. idx.track = num;
  1453. break;
  1454. }
  1455. /* position in file */
  1456. case MATROSKA_ID_CUECLUSTERPOSITION: {
  1457. uint64_t num;
  1458. if ((res = ebml_read_uint(matroska,
  1459. &id, &num)) < 0)
  1460. break;
  1461. idx.pos = num;
  1462. break;
  1463. }
  1464. default:
  1465. av_log(matroska->ctx, AV_LOG_INFO,
  1466. "Unknown entry 0x%x in "
  1467. "CuesTrackPositions\n", id);
  1468. /* fall-through */
  1469. case EBML_ID_VOID:
  1470. res = ebml_read_skip(matroska);
  1471. break;
  1472. }
  1473. if (matroska->level_up) {
  1474. matroska->level_up--;
  1475. break;
  1476. }
  1477. }
  1478. break;
  1479. default:
  1480. av_log(matroska->ctx, AV_LOG_INFO,
  1481. "Unknown entry 0x%x in cuespoint "
  1482. "index\n", id);
  1483. /* fall-through */
  1484. case EBML_ID_VOID:
  1485. res = ebml_read_skip(matroska);
  1486. break;
  1487. }
  1488. if (matroska->level_up) {
  1489. matroska->level_up--;
  1490. break;
  1491. }
  1492. }
  1493. /* so let's see if we got what we wanted */
  1494. if (idx.pos != (uint64_t) -1 &&
  1495. idx.time != (uint64_t) -1 &&
  1496. idx.track != (uint16_t) -1) {
  1497. if (matroska->num_indexes % 32 == 0) {
  1498. /* re-allocate bigger index */
  1499. matroska->index =
  1500. av_realloc(matroska->index,
  1501. (matroska->num_indexes + 32) *
  1502. sizeof(MatroskaDemuxIndex));
  1503. }
  1504. matroska->index[matroska->num_indexes] = idx;
  1505. matroska->num_indexes++;
  1506. }
  1507. break;
  1508. default:
  1509. av_log(matroska->ctx, AV_LOG_INFO,
  1510. "Unknown entry 0x%x in cues header\n", id);
  1511. /* fall-through */
  1512. case EBML_ID_VOID:
  1513. res = ebml_read_skip(matroska);
  1514. break;
  1515. }
  1516. if (matroska->level_up) {
  1517. matroska->level_up--;
  1518. break;
  1519. }
  1520. }
  1521. return res;
  1522. }
  1523. static int
  1524. matroska_parse_metadata (MatroskaDemuxContext *matroska)
  1525. {
  1526. int res = 0;
  1527. uint32_t id;
  1528. while (res == 0) {
  1529. if (!(id = ebml_peek_id(matroska, &matroska->level_up))) {
  1530. res = AVERROR_IO;
  1531. break;
  1532. } else if (matroska->level_up) {
  1533. matroska->level_up--;
  1534. break;
  1535. }
  1536. switch (id) {
  1537. /* Hm, this is unsupported... */
  1538. default:
  1539. av_log(matroska->ctx, AV_LOG_INFO,
  1540. "Unknown entry 0x%x in metadata header\n", id);
  1541. /* fall-through */
  1542. case EBML_ID_VOID:
  1543. res = ebml_read_skip(matroska);
  1544. break;
  1545. }
  1546. if (matroska->level_up) {
  1547. matroska->level_up--;
  1548. break;
  1549. }
  1550. }
  1551. return res;
  1552. }
  1553. static int
  1554. matroska_parse_seekhead (MatroskaDemuxContext *matroska)
  1555. {
  1556. int res = 0;
  1557. uint32_t id;
  1558. av_log(matroska->ctx, AV_LOG_DEBUG, "parsing seekhead...\n");
  1559. while (res == 0) {
  1560. if (!(id = ebml_peek_id(matroska, &matroska->level_up))) {
  1561. res = AVERROR_IO;
  1562. break;
  1563. } else if (matroska->level_up) {
  1564. matroska->level_up--;
  1565. break;
  1566. }
  1567. switch (id) {
  1568. case MATROSKA_ID_SEEKENTRY: {
  1569. uint32_t seek_id = 0, peek_id_cache = 0;
  1570. uint64_t seek_pos = (uint64_t) -1, t;
  1571. if ((res = ebml_read_master(matroska, &id)) < 0)
  1572. break;
  1573. while (res == 0) {
  1574. if (!(id = ebml_peek_id(matroska, &matroska->level_up))) {
  1575. res = AVERROR_IO;
  1576. break;
  1577. } else if (matroska->level_up) {
  1578. matroska->level_up--;
  1579. break;
  1580. }
  1581. switch (id) {
  1582. case MATROSKA_ID_SEEKID:
  1583. res = ebml_read_uint(matroska, &id, &t);
  1584. seek_id = t;
  1585. break;
  1586. case MATROSKA_ID_SEEKPOSITION:
  1587. res = ebml_read_uint(matroska, &id, &seek_pos);
  1588. break;
  1589. default:
  1590. av_log(matroska->ctx, AV_LOG_INFO,
  1591. "Unknown seekhead ID 0x%x\n", id);
  1592. /* fall-through */
  1593. case EBML_ID_VOID:
  1594. res = ebml_read_skip(matroska);
  1595. break;
  1596. }
  1597. if (matroska->level_up) {
  1598. matroska->level_up--;
  1599. break;
  1600. }
  1601. }
  1602. if (!seek_id || seek_pos == (uint64_t) -1) {
  1603. av_log(matroska->ctx, AV_LOG_INFO,
  1604. "Incomplete seekhead entry (0x%x/%llu)\n",
  1605. seek_id, seek_pos);
  1606. break;
  1607. }
  1608. switch (seek_id) {
  1609. case MATROSKA_ID_CUES:
  1610. case MATROSKA_ID_TAGS: {
  1611. uint32_t level_up = matroska->level_up;
  1612. offset_t before_pos;
  1613. uint64_t length;
  1614. MatroskaLevel level;
  1615. /* remember the peeked ID and the current position */
  1616. peek_id_cache = matroska->peek_id;
  1617. before_pos = url_ftell(&matroska->ctx->pb);
  1618. /* seek */
  1619. if ((res = ebml_read_seek(matroska, seek_pos +
  1620. matroska->segment_start)) < 0)
  1621. return res;
  1622. /* we don't want to lose our seekhead level, so we add
  1623. * a dummy. This is a crude hack. */
  1624. if (matroska->num_levels == EBML_MAX_DEPTH) {
  1625. av_log(matroska->ctx, AV_LOG_INFO,
  1626. "Max EBML element depth (%d) reached, "
  1627. "cannot parse further.\n", EBML_MAX_DEPTH);
  1628. return AVERROR_UNKNOWN;
  1629. }
  1630. level.start = 0;
  1631. level.length = (uint64_t)-1;
  1632. matroska->levels[matroska->num_levels] = level;
  1633. matroska->num_levels++;
  1634. /* check ID */
  1635. if (!(id = ebml_peek_id (matroska,
  1636. &matroska->level_up)))
  1637. break;
  1638. if (id != seek_id) {
  1639. av_log(matroska->ctx, AV_LOG_INFO,
  1640. "We looked for ID=0x%x but got "
  1641. "ID=0x%x (pos=%llu)",
  1642. seek_id, id, seek_pos +
  1643. matroska->segment_start);
  1644. goto finish;
  1645. }
  1646. /* read master + parse */
  1647. if ((res = ebml_read_master(matroska, &id)) < 0)
  1648. break;
  1649. switch (id) {
  1650. case MATROSKA_ID_CUES:
  1651. if (!(res = matroska_parse_index(matroska)) ||
  1652. url_feof(&matroska->ctx->pb)) {
  1653. matroska->index_parsed = 1;
  1654. res = 0;
  1655. }
  1656. break;
  1657. case MATROSKA_ID_TAGS:
  1658. if (!(res = matroska_parse_metadata(matroska)) ||
  1659. url_feof(&matroska->ctx->pb)) {
  1660. matroska->metadata_parsed = 1;
  1661. res = 0;
  1662. }
  1663. break;
  1664. }
  1665. if (res < 0)
  1666. break;
  1667. finish:
  1668. /* remove dummy level */
  1669. while (matroska->num_levels) {
  1670. matroska->num_levels--;
  1671. length =
  1672. matroska->levels[matroska->num_levels].length;
  1673. if (length == (uint64_t)-1)
  1674. break;
  1675. }
  1676. /* seek back */
  1677. if ((res = ebml_read_seek(matroska, before_pos)) < 0)
  1678. return res;
  1679. matroska->peek_id = peek_id_cache;
  1680. matroska->level_up = level_up;
  1681. break;
  1682. }
  1683. default:
  1684. av_log(matroska->ctx, AV_LOG_INFO,
  1685. "Ignoring seekhead entry for ID=0x%x\n",
  1686. seek_id);
  1687. break;
  1688. }
  1689. break;
  1690. }
  1691. default:
  1692. av_log(matroska->ctx, AV_LOG_INFO,
  1693. "Unknown seekhead ID 0x%x\n", id);
  1694. /* fall-through */
  1695. case EBML_ID_VOID:
  1696. res = ebml_read_skip(matroska);
  1697. break;
  1698. }
  1699. if (matroska->level_up) {
  1700. matroska->level_up--;
  1701. break;
  1702. }
  1703. }
  1704. return res;
  1705. }
  1706. static int
  1707. matroska_read_header (AVFormatContext *s,
  1708. AVFormatParameters *ap)
  1709. {
  1710. MatroskaDemuxContext *matroska = s->priv_data;
  1711. char *doctype;
  1712. int version, last_level, res = 0;
  1713. uint32_t id;
  1714. matroska->ctx = s;
  1715. /* First read the EBML header. */
  1716. doctype = NULL;
  1717. if ((res = ebml_read_header(matroska, &doctype, &version)) < 0)
  1718. return res;
  1719. if ((doctype == NULL) || strcmp(doctype, "matroska")) {
  1720. av_log(matroska->ctx, AV_LOG_ERROR,
  1721. "Wrong EBML doctype ('%s' != 'matroska').\n",
  1722. doctype ? doctype : "(none)");
  1723. if (doctype)
  1724. av_free(doctype);
  1725. return AVERROR_NOFMT;
  1726. }
  1727. av_free(doctype);
  1728. if (version != 1) {
  1729. av_log(matroska->ctx, AV_LOG_ERROR,
  1730. "Matroska demuxer version 1 too old for file version %d\n",
  1731. version);
  1732. return AVERROR_NOFMT;
  1733. }
  1734. /* The next thing is a segment. */
  1735. while (1) {
  1736. if (!(id = ebml_peek_id(matroska, &last_level)))
  1737. return AVERROR_IO;
  1738. if (id == MATROSKA_ID_SEGMENT)
  1739. break;
  1740. /* oi! */
  1741. av_log(matroska->ctx, AV_LOG_INFO,
  1742. "Expected a Segment ID (0x%x), but received 0x%x!\n",
  1743. MATROSKA_ID_SEGMENT, id);
  1744. if ((res = ebml_read_skip(matroska)) < 0)
  1745. return res;
  1746. }
  1747. /* We now have a Matroska segment.
  1748. * Seeks are from the beginning of the segment,
  1749. * after the segment ID/length. */
  1750. if ((res = ebml_read_master(matroska, &id)) < 0)
  1751. return res;
  1752. matroska->segment_start = url_ftell(&s->pb);
  1753. matroska->time_scale = 1000000;
  1754. /* we've found our segment, start reading the different contents in here */
  1755. while (res == 0) {
  1756. if (!(id = ebml_peek_id(matroska, &matroska->level_up))) {
  1757. res = AVERROR_IO;
  1758. break;
  1759. } else if (matroska->level_up) {
  1760. matroska->level_up--;
  1761. break;
  1762. }
  1763. switch (id) {
  1764. /* stream info */
  1765. case MATROSKA_ID_INFO: {
  1766. if ((res = ebml_read_master(matroska, &id)) < 0)
  1767. break;
  1768. res = matroska_parse_info(matroska);
  1769. break;
  1770. }
  1771. /* track info headers */
  1772. case MATROSKA_ID_TRACKS: {
  1773. if ((res = ebml_read_master(matroska, &id)) < 0)
  1774. break;
  1775. res = matroska_parse_tracks(matroska);
  1776. break;
  1777. }
  1778. /* stream index */
  1779. case MATROSKA_ID_CUES: {
  1780. if (!matroska->index_parsed) {
  1781. if ((res = ebml_read_master(matroska, &id)) < 0)
  1782. break;
  1783. res = matroska_parse_index(matroska);
  1784. } else
  1785. res = ebml_read_skip(matroska);
  1786. break;
  1787. }
  1788. /* metadata */
  1789. case MATROSKA_ID_TAGS: {
  1790. if (!matroska->metadata_parsed) {
  1791. if ((res = ebml_read_master(matroska, &id)) < 0)
  1792. break;
  1793. res = matroska_parse_metadata(matroska);
  1794. } else
  1795. res = ebml_read_skip(matroska);
  1796. break;
  1797. }
  1798. /* file index (if seekable, seek to Cues/Tags to parse it) */
  1799. case MATROSKA_ID_SEEKHEAD: {
  1800. if ((res = ebml_read_master(matroska, &id)) < 0)
  1801. break;
  1802. res = matroska_parse_seekhead(matroska);
  1803. break;
  1804. }
  1805. case MATROSKA_ID_CLUSTER: {
  1806. /* Do not read the master - this will be done in the next
  1807. * call to matroska_read_packet. */
  1808. res = 1;
  1809. break;
  1810. }
  1811. default:
  1812. av_log(matroska->ctx, AV_LOG_INFO,
  1813. "Unknown matroska file header ID 0x%x\n", id);
  1814. /* fall-through */
  1815. case EBML_ID_VOID:
  1816. res = ebml_read_skip(matroska);
  1817. break;
  1818. }
  1819. if (matroska->level_up) {
  1820. matroska->level_up--;
  1821. break;
  1822. }
  1823. }
  1824. if (res < 0)
  1825. return res;
  1826. /* Have we found a cluster? */
  1827. if (res == 1) {
  1828. int i;
  1829. enum CodecID codec_id;
  1830. MatroskaTrack *track;
  1831. AVStream *st;
  1832. av_set_pts_info(s, 24, 1, 1000); /* 24 bit pts in ms */
  1833. for (i = 0; i < matroska->num_tracks; i++) {
  1834. track = matroska->tracks[i];
  1835. /* libavformat does not really support subtitles.
  1836. * Also apply some sanity checks. */
  1837. if ((track->type == MATROSKA_TRACK_TYPE_SUBTITLE) ||
  1838. (track->codec_id == NULL))
  1839. continue;
  1840. /* Set the FourCC from the CodecID. */
  1841. /* This is the MS compatibility mode which stores a
  1842. * BITMAPINFOHEADER in the CodecPrivate. */
  1843. if (!strcmp(track->codec_id,
  1844. MATROSKA_CODEC_ID_VIDEO_VFW_FOURCC) &&
  1845. (track->codec_priv_size >= 40) &&
  1846. (track->codec_priv != NULL)) {
  1847. unsigned char *p;
  1848. /* Offset of biCompression. Stored in LE. */
  1849. p = (unsigned char *)track->codec_priv + 16;
  1850. ((MatroskaVideoTrack *)track)->fourcc = (p[3] << 24) |
  1851. (p[2] << 16) | (p[1] << 8) | p[0];
  1852. codec_id = codec_get_bmp_id(((MatroskaVideoTrack *)track)->fourcc);
  1853. } else if (!strcmp(track->codec_id,
  1854. MATROSKA_CODEC_ID_VIDEO_MPEG4_SP) ||
  1855. !strcmp(track->codec_id,
  1856. MATROSKA_CODEC_ID_VIDEO_MPEG4_ASP) ||
  1857. !strcmp(track->codec_id,
  1858. MATROSKA_CODEC_ID_VIDEO_MPEG4_AP))
  1859. codec_id = CODEC_ID_MPEG4;
  1860. /* else if (!strcmp(track->codec_id, */
  1861. /* MATROSKA_CODEC_ID_VIDEO_UNCOMPRESSED)) */
  1862. /* codec_id = CODEC_ID_???; */
  1863. else if (!strcmp(track->codec_id,
  1864. MATROSKA_CODEC_ID_VIDEO_MSMPEG4V3))
  1865. codec_id = CODEC_ID_MSMPEG4V3;
  1866. else if (!strcmp(track->codec_id,
  1867. MATROSKA_CODEC_ID_VIDEO_MPEG1) ||
  1868. !strcmp(track->codec_id,
  1869. MATROSKA_CODEC_ID_VIDEO_MPEG2))
  1870. codec_id = CODEC_ID_MPEG2VIDEO;
  1871. /* This is the MS compatibility mode which stores a
  1872. * WAVEFORMATEX in the CodecPrivate. */
  1873. else if (!strcmp(track->codec_id,
  1874. MATROSKA_CODEC_ID_AUDIO_ACM) &&
  1875. (track->codec_priv_size >= 18) &&
  1876. (track->codec_priv != NULL)) {
  1877. unsigned char *p;
  1878. uint16_t tag;
  1879. /* Offset of wFormatTag. Stored in LE. */
  1880. p = (unsigned char *)track->codec_priv;
  1881. tag = (p[1] << 8) | p[0];
  1882. codec_id = codec_get_wav_id(tag);
  1883. } else if (!strcmp(track->codec_id,
  1884. MATROSKA_CODEC_ID_AUDIO_MPEG1_L1) ||
  1885. !strcmp(track->codec_id,
  1886. MATROSKA_CODEC_ID_AUDIO_MPEG1_L2) ||
  1887. !strcmp(track->codec_id,
  1888. MATROSKA_CODEC_ID_AUDIO_MPEG1_L3))
  1889. codec_id = CODEC_ID_MP3;
  1890. else if (!strcmp(track->codec_id,
  1891. MATROSKA_CODEC_ID_AUDIO_PCM_INT_BE))
  1892. codec_id = CODEC_ID_PCM_U16BE;
  1893. else if (!strcmp(track->codec_id,
  1894. MATROSKA_CODEC_ID_AUDIO_PCM_INT_LE))
  1895. codec_id = CODEC_ID_PCM_U16LE;
  1896. /* else if (!strcmp(track->codec_id, */
  1897. /* MATROSKA_CODEC_ID_AUDIO_PCM_FLOAT)) */
  1898. /* codec_id = CODEC_ID_PCM_???; */
  1899. else if (!strcmp(track->codec_id,
  1900. MATROSKA_CODEC_ID_AUDIO_AC3))
  1901. codec_id = CODEC_ID_AC3;
  1902. /* No such codec id so far. */
  1903. /* else if (!strcmp(track->codec_id, */
  1904. /* MATROSKA_CODEC_ID_AUDIO_DTS)) */
  1905. /* codec_id = CODEC_ID_DTS; */
  1906. else if (!strcmp(track->codec_id,
  1907. MATROSKA_CODEC_ID_AUDIO_VORBIS))
  1908. codec_id = CODEC_ID_VORBIS;
  1909. else if (!strcmp(track->codec_id,
  1910. MATROSKA_CODEC_ID_AUDIO_MPEG2) ||
  1911. !strcmp(track->codec_id,
  1912. MATROSKA_CODEC_ID_AUDIO_MPEG4))
  1913. codec_id = CODEC_ID_AAC;
  1914. else
  1915. codec_id = CODEC_ID_NONE;
  1916. if (codec_id == CODEC_ID_NONE) {
  1917. av_log(matroska->ctx, AV_LOG_INFO,
  1918. "Unknown/unsupported CodecID %s.\n",
  1919. track->codec_id);
  1920. }
  1921. track->stream_index = matroska->num_streams;
  1922. matroska->num_streams++;
  1923. st = av_new_stream(s, track->stream_index);
  1924. if (st == NULL)
  1925. return AVERROR_NOMEM;
  1926. st->codec.codec_id = codec_id;
  1927. if (track->type == MATROSKA_TRACK_TYPE_VIDEO) {
  1928. MatroskaVideoTrack *videotrack = (MatroskaVideoTrack *)track;
  1929. st->codec.codec_type = CODEC_TYPE_VIDEO;
  1930. st->codec.codec_tag = videotrack->fourcc;
  1931. st->codec.width = videotrack->pixel_width;
  1932. st->codec.height = videotrack->pixel_height;
  1933. if (videotrack->display_width == 0)
  1934. st->codec.sample_aspect_ratio.num =
  1935. videotrack->pixel_width;
  1936. else
  1937. st->codec.sample_aspect_ratio.num =
  1938. videotrack->display_width;
  1939. if (videotrack->display_height == 0)
  1940. st->codec.sample_aspect_ratio.num =
  1941. videotrack->pixel_height;
  1942. else
  1943. st->codec.sample_aspect_ratio.num =
  1944. videotrack->display_height;
  1945. } else if (track->type == MATROSKA_TRACK_TYPE_AUDIO) {
  1946. MatroskaAudioTrack *audiotrack = (MatroskaAudioTrack *)track;
  1947. st->codec.codec_type = CODEC_TYPE_AUDIO;
  1948. st->codec.sample_rate = audiotrack->samplerate;
  1949. st->codec.channels = audiotrack->channels;
  1950. }
  1951. /* What do we do with private data? E.g. for Vorbis. */
  1952. }
  1953. }
  1954. return 0;
  1955. }
  1956. static int
  1957. matroska_find_track_by_num (MatroskaDemuxContext *matroska,
  1958. int num)
  1959. {
  1960. int i;
  1961. for (i = 0; i < matroska->num_tracks; i++)
  1962. if (matroska->tracks[i]->num == num)
  1963. return i;
  1964. return -1;
  1965. }
  1966. static int
  1967. matroska_parse_blockgroup (MatroskaDemuxContext *matroska,
  1968. uint64_t cluster_time)
  1969. {
  1970. int res = 0;
  1971. uint32_t id;
  1972. AVPacket *pkt;
  1973. int is_keyframe = PKT_FLAG_KEY, last_num_packets = matroska->num_packets;
  1974. av_log(matroska->ctx, AV_LOG_DEBUG, "parsing blockgroup...\n");
  1975. while (res == 0) {
  1976. if (!(id = ebml_peek_id(matroska, &matroska->level_up))) {
  1977. res = AVERROR_IO;
  1978. break;
  1979. } else if (matroska->level_up) {
  1980. matroska->level_up--;
  1981. break;
  1982. }
  1983. switch (id) {
  1984. /* one block inside the group. Note, block parsing is one
  1985. * of the harder things, so this code is a bit complicated.
  1986. * See http://www.matroska.org/ for documentation. */
  1987. case MATROSKA_ID_BLOCK: {
  1988. uint8_t *data, *origdata;
  1989. int size;
  1990. uint64_t time;
  1991. uint32_t *lace_size = NULL;
  1992. int n, track, flags, laces = 0;
  1993. uint64_t num;
  1994. if ((res = ebml_read_binary(matroska, &id, &data, &size)) < 0)
  1995. break;
  1996. origdata = data;
  1997. /* first byte(s): blocknum */
  1998. if ((n = matroska_ebmlnum_uint(data, size, &num)) < 0) {
  1999. av_log(matroska->ctx, AV_LOG_ERROR,
  2000. "EBML block data error\n");
  2001. av_free(origdata);
  2002. break;
  2003. }
  2004. data += n;
  2005. size -= n;
  2006. /* fetch track from num */
  2007. track = matroska_find_track_by_num(matroska, num);
  2008. if (size <= 3 || track < 0 || track >= matroska->num_tracks) {
  2009. av_log(matroska->ctx, AV_LOG_INFO,
  2010. "Invalid stream %d or size %u\n", track, size);
  2011. av_free(origdata);
  2012. break;
  2013. }
  2014. /* time (relative to cluster time) */
  2015. time = ((data[0] << 8) | data[1]) * matroska->time_scale;
  2016. data += 2;
  2017. size -= 2;
  2018. flags = *data;
  2019. data += 1;
  2020. size -= 1;
  2021. switch ((flags & 0x06) >> 1) {
  2022. case 0x0: /* no lacing */
  2023. laces = 1;
  2024. lace_size = av_mallocz(sizeof(int));
  2025. lace_size[0] = size;
  2026. break;
  2027. case 0x1: /* xiph lacing */
  2028. case 0x2: /* fixed-size lacing */
  2029. case 0x3: /* EBML lacing */
  2030. if (size == 0) {
  2031. res = -1;
  2032. break;
  2033. }
  2034. laces = (*data) + 1;
  2035. data += 1;
  2036. size -= 1;
  2037. lace_size = av_mallocz(laces * sizeof(int));
  2038. switch ((flags & 0x06) >> 1) {
  2039. case 0x1: /* xiph lacing */ {
  2040. uint8_t temp;
  2041. uint32_t total = 0;
  2042. for (n = 0; res == 0 && n < laces - 1; n++) {
  2043. while (1) {
  2044. if (size == 0) {
  2045. res = -1;
  2046. break;
  2047. }
  2048. temp = *data;
  2049. lace_size[n] += temp;
  2050. data += 1;
  2051. size -= 1;
  2052. if (temp != 0xff)
  2053. break;
  2054. }
  2055. total += lace_size[n];
  2056. }
  2057. lace_size[n] = size - total;
  2058. break;
  2059. }
  2060. case 0x2: /* fixed-size lacing */
  2061. for (n = 0; n < laces; n++)
  2062. lace_size[n] = size / laces;
  2063. break;
  2064. case 0x3: /* EBML lacing */ {
  2065. uint32_t total;
  2066. n = matroska_ebmlnum_uint(data, size, &num);
  2067. if (n < 0) {
  2068. av_log(matroska->ctx, AV_LOG_INFO,
  2069. "EBML block data error\n");
  2070. break;
  2071. }
  2072. data += n;
  2073. size -= n;
  2074. total = lace_size[0] = num;
  2075. for (n = 1; res == 0 && n < laces - 1; n++) {
  2076. int64_t snum;
  2077. int r;
  2078. r = matroska_ebmlnum_sint (data, size,
  2079. &snum);
  2080. if (r < 0) {
  2081. av_log(matroska->ctx, AV_LOG_INFO,
  2082. "EBML block data error\n");
  2083. break;
  2084. }
  2085. data += r;
  2086. size -= r;
  2087. lace_size[n] = lace_size[n - 1] + snum;
  2088. total += lace_size[n];
  2089. }
  2090. lace_size[n] = size - total;
  2091. break;
  2092. }
  2093. }
  2094. break;
  2095. }
  2096. if (res == 0) {
  2097. for (n = 0; n < laces; n++) {
  2098. uint64_t timecode = 0;
  2099. pkt = av_mallocz(sizeof(AVPacket));
  2100. /* XXX: prevent data copy... */
  2101. if (av_new_packet(pkt,lace_size[n]) < 0) {
  2102. res = AVERROR_NOMEM;
  2103. break;
  2104. }
  2105. if (cluster_time != (uint64_t)-1) {
  2106. if (time < 0 && (-time) > cluster_time)
  2107. timecode = cluster_time;
  2108. else
  2109. timecode = cluster_time + time;
  2110. }
  2111. /* FIXME: duration */
  2112. memcpy(pkt->data, data, lace_size[n]);
  2113. data += lace_size[n];
  2114. if (n == 0)
  2115. pkt->flags = is_keyframe;
  2116. pkt->stream_index =
  2117. matroska->tracks[track]->stream_index;
  2118. pkt->pts = timecode / 1000000; /* ns to ms */
  2119. matroska_queue_packet(matroska, pkt);
  2120. }
  2121. }
  2122. av_free(lace_size);
  2123. av_free(origdata);
  2124. break;
  2125. }
  2126. case MATROSKA_ID_BLOCKDURATION: {
  2127. uint64_t num;
  2128. if ((res = ebml_read_uint(matroska, &id, &num)) < 0)
  2129. break;
  2130. av_log(matroska->ctx, AV_LOG_INFO,
  2131. "FIXME: implement support for BlockDuration\n");
  2132. break;
  2133. }
  2134. case MATROSKA_ID_BLOCKREFERENCE:
  2135. /* We've found a reference, so not even the first frame in
  2136. * the lace is a key frame. */
  2137. is_keyframe = 0;
  2138. if (last_num_packets != matroska->num_packets)
  2139. matroska->packets[last_num_packets]->flags = 0;
  2140. res = ebml_read_skip(matroska);
  2141. break;
  2142. default:
  2143. av_log(matroska->ctx, AV_LOG_INFO,
  2144. "Unknown entry 0x%x in blockgroup data\n", id);
  2145. /* fall-through */
  2146. case EBML_ID_VOID:
  2147. res = ebml_read_skip(matroska);
  2148. break;
  2149. }
  2150. if (matroska->level_up) {
  2151. matroska->level_up--;
  2152. break;
  2153. }
  2154. }
  2155. return res;
  2156. }
  2157. static int
  2158. matroska_parse_cluster (MatroskaDemuxContext *matroska)
  2159. {
  2160. int res = 0;
  2161. uint32_t id;
  2162. uint64_t cluster_time = 0;
  2163. av_log(matroska->ctx, AV_LOG_DEBUG,
  2164. "parsing cluster at %lld\n", url_ftell(&matroska->ctx->pb));
  2165. while (res == 0) {
  2166. if (!(id = ebml_peek_id(matroska, &matroska->level_up))) {
  2167. res = AVERROR_IO;
  2168. break;
  2169. } else if (matroska->level_up) {
  2170. matroska->level_up--;
  2171. break;
  2172. }
  2173. switch (id) {
  2174. /* cluster timecode */
  2175. case MATROSKA_ID_CLUSTERTIMECODE: {
  2176. uint64_t num;
  2177. if ((res = ebml_read_uint(matroska, &id, &num)) < 0)
  2178. break;
  2179. cluster_time = num * matroska->time_scale;
  2180. break;
  2181. }
  2182. /* a group of blocks inside a cluster */
  2183. case MATROSKA_ID_BLOCKGROUP:
  2184. if ((res = ebml_read_master(matroska, &id)) < 0)
  2185. break;
  2186. res = matroska_parse_blockgroup(matroska, cluster_time);
  2187. break;
  2188. default:
  2189. av_log(matroska->ctx, AV_LOG_INFO,
  2190. "Unknown entry 0x%x in cluster data\n", id);
  2191. /* fall-through */
  2192. case EBML_ID_VOID:
  2193. res = ebml_read_skip(matroska);
  2194. break;
  2195. }
  2196. if (matroska->level_up) {
  2197. matroska->level_up--;
  2198. break;
  2199. }
  2200. }
  2201. return res;
  2202. }
  2203. static int
  2204. matroska_read_packet (AVFormatContext *s,
  2205. AVPacket *pkt)
  2206. {
  2207. MatroskaDemuxContext *matroska = s->priv_data;
  2208. int res = 0;
  2209. uint32_t id;
  2210. /* Do we still have a packet queued? */
  2211. if (matroska_deliver_packet(matroska, pkt) == 0)
  2212. return 0;
  2213. /* Have we already reached the end? */
  2214. if (matroska->done)
  2215. return AVERROR_IO;
  2216. while (res == 0) {
  2217. if (!(id = ebml_peek_id(matroska, &matroska->level_up))) {
  2218. res = AVERROR_IO;
  2219. break;
  2220. } else if (matroska->level_up) {
  2221. matroska->level_up--;
  2222. break;
  2223. }
  2224. switch (id) {
  2225. case MATROSKA_ID_CLUSTER:
  2226. if ((res = ebml_read_master(matroska, &id)) < 0)
  2227. break;
  2228. if ((res = matroska_parse_cluster(matroska)) == 0)
  2229. res = 1; /* Parsed one cluster, let's get out. */
  2230. break;
  2231. default:
  2232. case EBML_ID_VOID:
  2233. res = ebml_read_skip(matroska);
  2234. break;
  2235. }
  2236. if (matroska->level_up) {
  2237. matroska->level_up--;
  2238. break;
  2239. }
  2240. }
  2241. if (res == -1)
  2242. matroska->done = 1;
  2243. return matroska_deliver_packet(matroska, pkt);
  2244. }
  2245. static int
  2246. matroska_read_close (AVFormatContext *s)
  2247. {
  2248. MatroskaDemuxContext *matroska = s->priv_data;
  2249. int n = 0;
  2250. if (matroska->writing_app)
  2251. av_free(matroska->writing_app);
  2252. if (matroska->muxing_app)
  2253. av_free(matroska->muxing_app);
  2254. if (matroska->index)
  2255. av_free(matroska->index);
  2256. if (matroska->packets != NULL) {
  2257. for (n = 0; n < matroska->num_packets; n++) {
  2258. av_free_packet(matroska->packets[n]);
  2259. av_free(matroska->packets[n]);
  2260. }
  2261. av_free(matroska->packets);
  2262. }
  2263. for (n = 0; n < matroska->num_tracks; n++) {
  2264. MatroskaTrack *track = matroska->tracks[n];
  2265. if (track->codec_id)
  2266. av_free(track->codec_id);
  2267. if (track->codec_name)
  2268. av_free(track->codec_name);
  2269. if (track->codec_priv)
  2270. av_free(track->codec_priv);
  2271. if (track->name)
  2272. av_free(track->name);
  2273. if (track->language)
  2274. av_free(track->language);
  2275. av_free(track);
  2276. }
  2277. memset(matroska, 0, sizeof(MatroskaDemuxContext));
  2278. return 0;
  2279. }
  2280. static AVInputFormat matroska_iformat = {
  2281. "matroska",
  2282. "Matroska file format",
  2283. sizeof(MatroskaDemuxContext),
  2284. matroska_probe,
  2285. matroska_read_header,
  2286. matroska_read_packet,
  2287. matroska_read_close,
  2288. };
  2289. int
  2290. matroska_init(void)
  2291. {
  2292. av_register_input_format(&matroska_iformat);
  2293. return 0;
  2294. }