You can not select more than 25 topics Topics must start with a letter or number, can include dashes ('-') and can be up to 35 characters long.

1039 lines
35KB

  1. /*
  2. * FLV demuxer
  3. * Copyright (c) 2003 The Libav Project
  4. *
  5. * This demuxer will generate a 1 byte extradata for VP6F content.
  6. * It is composed of:
  7. * - upper 4 bits: difference between encoded width and visible width
  8. * - lower 4 bits: difference between encoded height and visible height
  9. *
  10. * This file is part of Libav.
  11. *
  12. * Libav is free software; you can redistribute it and/or
  13. * modify it under the terms of the GNU Lesser General Public
  14. * License as published by the Free Software Foundation; either
  15. * version 2.1 of the License, or (at your option) any later version.
  16. *
  17. * Libav is distributed in the hope that it will be useful,
  18. * but WITHOUT ANY WARRANTY; without even the implied warranty of
  19. * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
  20. * Lesser General Public License for more details.
  21. *
  22. * You should have received a copy of the GNU Lesser General Public
  23. * License along with Libav; if not, write to the Free Software
  24. * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
  25. */
  26. #include "libavutil/avstring.h"
  27. #include "libavutil/channel_layout.h"
  28. #include "libavutil/dict.h"
  29. #include "libavutil/opt.h"
  30. #include "libavutil/intfloat.h"
  31. #include "libavutil/mathematics.h"
  32. #include "libavcodec/bytestream.h"
  33. #include "libavcodec/mpeg4audio.h"
  34. #include "avformat.h"
  35. #include "internal.h"
  36. #include "avio_internal.h"
  37. #include "flv.h"
  38. #define KEYFRAMES_TAG "keyframes"
  39. #define KEYFRAMES_TIMESTAMP_TAG "times"
  40. #define KEYFRAMES_BYTEOFFSET_TAG "filepositions"
  41. #define VALIDATE_INDEX_TS_THRESH 2500
  42. typedef struct FLVContext {
  43. const AVClass *class; ///< Class for private options.
  44. int trust_metadata; ///< configure streams according onMetaData
  45. int wrong_dts; ///< wrong dts due to negative cts
  46. uint8_t *new_extradata[2];
  47. int new_extradata_size[2];
  48. int last_sample_rate;
  49. int last_channels;
  50. struct {
  51. int64_t dts;
  52. int64_t pos;
  53. } validate_index[2];
  54. int validate_next;
  55. int validate_count;
  56. int searched_for_end;
  57. AVRational framerate;
  58. } FLVContext;
  59. static int flv_probe(AVProbeData *p)
  60. {
  61. const uint8_t *d;
  62. d = p->buf;
  63. if (d[0] == 'F' &&
  64. d[1] == 'L' &&
  65. d[2] == 'V' &&
  66. d[3] < 5 && d[5] == 0 &&
  67. AV_RB32(d + 5) > 8) {
  68. return AVPROBE_SCORE_MAX;
  69. }
  70. return 0;
  71. }
  72. static AVStream *create_stream(AVFormatContext *s, int codec_type)
  73. {
  74. FLVContext *flv = s->priv_data;
  75. AVStream *st = avformat_new_stream(s, NULL);
  76. if (!st)
  77. return NULL;
  78. st->codecpar->codec_type = codec_type;
  79. if (codec_type == AVMEDIA_TYPE_VIDEO)
  80. st->avg_frame_rate = flv->framerate;
  81. avpriv_set_pts_info(st, 32, 1, 1000); /* 32 bit pts in ms */
  82. return st;
  83. }
  84. static int flv_same_audio_codec(AVCodecParameters *apar, int flags)
  85. {
  86. int bits_per_coded_sample = (flags & FLV_AUDIO_SAMPLESIZE_MASK) ? 16 : 8;
  87. int flv_codecid = flags & FLV_AUDIO_CODECID_MASK;
  88. int codec_id;
  89. if (!apar->codec_id && !apar->codec_tag)
  90. return 1;
  91. if (apar->bits_per_coded_sample != bits_per_coded_sample)
  92. return 0;
  93. switch (flv_codecid) {
  94. // no distinction between S16 and S8 PCM codec flags
  95. case FLV_CODECID_PCM:
  96. codec_id = bits_per_coded_sample == 8
  97. ? AV_CODEC_ID_PCM_U8
  98. #if HAVE_BIGENDIAN
  99. : AV_CODEC_ID_PCM_S16BE;
  100. #else
  101. : AV_CODEC_ID_PCM_S16LE;
  102. #endif
  103. return codec_id == apar->codec_id;
  104. case FLV_CODECID_PCM_LE:
  105. codec_id = bits_per_coded_sample == 8
  106. ? AV_CODEC_ID_PCM_U8
  107. : AV_CODEC_ID_PCM_S16LE;
  108. return codec_id == apar->codec_id;
  109. case FLV_CODECID_AAC:
  110. return apar->codec_id == AV_CODEC_ID_AAC;
  111. case FLV_CODECID_ADPCM:
  112. return apar->codec_id == AV_CODEC_ID_ADPCM_SWF;
  113. case FLV_CODECID_SPEEX:
  114. return apar->codec_id == AV_CODEC_ID_SPEEX;
  115. case FLV_CODECID_MP3:
  116. return apar->codec_id == AV_CODEC_ID_MP3;
  117. case FLV_CODECID_NELLYMOSER_8KHZ_MONO:
  118. case FLV_CODECID_NELLYMOSER_16KHZ_MONO:
  119. case FLV_CODECID_NELLYMOSER:
  120. return apar->codec_id == AV_CODEC_ID_NELLYMOSER;
  121. case FLV_CODECID_PCM_MULAW:
  122. return apar->sample_rate == 8000 &&
  123. apar->codec_id == AV_CODEC_ID_PCM_MULAW;
  124. case FLV_CODECID_PCM_ALAW:
  125. return apar->sample_rate == 8000 &&
  126. apar->codec_id == AV_CODEC_ID_PCM_ALAW;
  127. default:
  128. return apar->codec_tag == (flv_codecid >> FLV_AUDIO_CODECID_OFFSET);
  129. }
  130. }
  131. static void flv_set_audio_codec(AVFormatContext *s, AVStream *astream,
  132. AVCodecParameters *apar, int flv_codecid)
  133. {
  134. switch (flv_codecid) {
  135. // no distinction between S16 and S8 PCM codec flags
  136. case FLV_CODECID_PCM:
  137. apar->codec_id = apar->bits_per_coded_sample == 8
  138. ? AV_CODEC_ID_PCM_U8
  139. #if HAVE_BIGENDIAN
  140. : AV_CODEC_ID_PCM_S16BE;
  141. #else
  142. : AV_CODEC_ID_PCM_S16LE;
  143. #endif
  144. break;
  145. case FLV_CODECID_PCM_LE:
  146. apar->codec_id = apar->bits_per_coded_sample == 8
  147. ? AV_CODEC_ID_PCM_U8
  148. : AV_CODEC_ID_PCM_S16LE;
  149. break;
  150. case FLV_CODECID_AAC:
  151. apar->codec_id = AV_CODEC_ID_AAC;
  152. break;
  153. case FLV_CODECID_ADPCM:
  154. apar->codec_id = AV_CODEC_ID_ADPCM_SWF;
  155. break;
  156. case FLV_CODECID_SPEEX:
  157. apar->codec_id = AV_CODEC_ID_SPEEX;
  158. apar->sample_rate = 16000;
  159. break;
  160. case FLV_CODECID_MP3:
  161. apar->codec_id = AV_CODEC_ID_MP3;
  162. astream->need_parsing = AVSTREAM_PARSE_FULL;
  163. break;
  164. case FLV_CODECID_NELLYMOSER_8KHZ_MONO:
  165. // in case metadata does not otherwise declare samplerate
  166. apar->sample_rate = 8000;
  167. apar->codec_id = AV_CODEC_ID_NELLYMOSER;
  168. break;
  169. case FLV_CODECID_NELLYMOSER_16KHZ_MONO:
  170. apar->sample_rate = 16000;
  171. apar->codec_id = AV_CODEC_ID_NELLYMOSER;
  172. break;
  173. case FLV_CODECID_NELLYMOSER:
  174. apar->codec_id = AV_CODEC_ID_NELLYMOSER;
  175. break;
  176. case FLV_CODECID_PCM_MULAW:
  177. apar->sample_rate = 8000;
  178. apar->codec_id = AV_CODEC_ID_PCM_MULAW;
  179. break;
  180. case FLV_CODECID_PCM_ALAW:
  181. apar->sample_rate = 8000;
  182. apar->codec_id = AV_CODEC_ID_PCM_ALAW;
  183. break;
  184. default:
  185. av_log(s, AV_LOG_INFO, "Unsupported audio codec (%x)\n",
  186. flv_codecid >> FLV_AUDIO_CODECID_OFFSET);
  187. apar->codec_tag = flv_codecid >> FLV_AUDIO_CODECID_OFFSET;
  188. }
  189. }
  190. static int flv_same_video_codec(AVCodecParameters *vpar, int flags)
  191. {
  192. int flv_codecid = flags & FLV_VIDEO_CODECID_MASK;
  193. if (!vpar->codec_id && !vpar->codec_tag)
  194. return 1;
  195. switch (flv_codecid) {
  196. case FLV_CODECID_H263:
  197. return vpar->codec_id == AV_CODEC_ID_FLV1;
  198. case FLV_CODECID_SCREEN:
  199. return vpar->codec_id == AV_CODEC_ID_FLASHSV;
  200. case FLV_CODECID_SCREEN2:
  201. return vpar->codec_id == AV_CODEC_ID_FLASHSV2;
  202. case FLV_CODECID_VP6:
  203. return vpar->codec_id == AV_CODEC_ID_VP6F;
  204. case FLV_CODECID_VP6A:
  205. return vpar->codec_id == AV_CODEC_ID_VP6A;
  206. case FLV_CODECID_H264:
  207. return vpar->codec_id == AV_CODEC_ID_H264;
  208. default:
  209. return vpar->codec_tag == flv_codecid;
  210. }
  211. }
  212. static int flv_set_video_codec(AVFormatContext *s, AVStream *vstream,
  213. int flv_codecid, int read)
  214. {
  215. AVCodecParameters *par = vstream->codecpar;
  216. switch (flv_codecid) {
  217. case FLV_CODECID_H263:
  218. par->codec_id = AV_CODEC_ID_FLV1;
  219. break;
  220. case FLV_CODECID_SCREEN:
  221. par->codec_id = AV_CODEC_ID_FLASHSV;
  222. break;
  223. case FLV_CODECID_SCREEN2:
  224. par->codec_id = AV_CODEC_ID_FLASHSV2;
  225. break;
  226. case FLV_CODECID_VP6:
  227. par->codec_id = AV_CODEC_ID_VP6F;
  228. case FLV_CODECID_VP6A:
  229. if (flv_codecid == FLV_CODECID_VP6A)
  230. par->codec_id = AV_CODEC_ID_VP6A;
  231. if (read) {
  232. if (par->extradata_size != 1) {
  233. par->extradata = av_malloc(1);
  234. if (par->extradata)
  235. par->extradata_size = 1;
  236. }
  237. if (par->extradata)
  238. par->extradata[0] = avio_r8(s->pb);
  239. else
  240. avio_skip(s->pb, 1);
  241. }
  242. return 1; // 1 byte body size adjustment for flv_read_packet()
  243. case FLV_CODECID_H264:
  244. par->codec_id = AV_CODEC_ID_H264;
  245. return 3; // not 4, reading packet type will consume one byte
  246. default:
  247. av_log(s, AV_LOG_INFO, "Unsupported video codec (%x)\n", flv_codecid);
  248. par->codec_tag = flv_codecid;
  249. }
  250. return 0;
  251. }
  252. static int amf_get_string(AVIOContext *ioc, char *buffer, int buffsize)
  253. {
  254. int length = avio_rb16(ioc);
  255. if (length >= buffsize) {
  256. avio_skip(ioc, length);
  257. return -1;
  258. }
  259. avio_read(ioc, buffer, length);
  260. buffer[length] = '\0';
  261. return length;
  262. }
  263. static int parse_keyframes_index(AVFormatContext *s, AVIOContext *ioc,
  264. AVStream *vstream, int64_t max_pos)
  265. {
  266. FLVContext *flv = s->priv_data;
  267. unsigned int arraylen = 0, timeslen = 0, fileposlen = 0, i;
  268. double num_val;
  269. char str_val[256];
  270. int64_t *times = NULL;
  271. int64_t *filepositions = NULL;
  272. int ret = AVERROR(ENOSYS);
  273. int64_t initial_pos = avio_tell(ioc);
  274. if (s->flags & AVFMT_FLAG_IGNIDX)
  275. return 0;
  276. while (avio_tell(ioc) < max_pos - 2 &&
  277. amf_get_string(ioc, str_val, sizeof(str_val)) > 0) {
  278. int64_t *current_array;
  279. // Expect array object in context
  280. if (avio_r8(ioc) != AMF_DATA_TYPE_ARRAY)
  281. break;
  282. arraylen = avio_rb32(ioc);
  283. if (arraylen >> 28)
  284. break;
  285. /* Expect only 'times' or 'filepositions' sub-arrays in other
  286. * case refuse to use such metadata for indexing. */
  287. if (!strcmp(KEYFRAMES_TIMESTAMP_TAG, str_val) && !times) {
  288. if (!(times = av_mallocz(sizeof(*times) * arraylen))) {
  289. ret = AVERROR(ENOMEM);
  290. goto finish;
  291. }
  292. timeslen = arraylen;
  293. current_array = times;
  294. } else if (!strcmp(KEYFRAMES_BYTEOFFSET_TAG, str_val) &&
  295. !filepositions) {
  296. if (!(filepositions = av_mallocz(sizeof(*filepositions) * arraylen))) {
  297. ret = AVERROR(ENOMEM);
  298. goto finish;
  299. }
  300. fileposlen = arraylen;
  301. current_array = filepositions;
  302. } else
  303. // unexpected metatag inside keyframes, will not use such
  304. // metadata for indexing
  305. break;
  306. for (i = 0; i < arraylen && avio_tell(ioc) < max_pos - 1; i++) {
  307. if (avio_r8(ioc) != AMF_DATA_TYPE_NUMBER)
  308. goto finish;
  309. num_val = av_int2double(avio_rb64(ioc));
  310. current_array[i] = num_val;
  311. }
  312. if (times && filepositions) {
  313. // All done, exiting at a position allowing amf_parse_object
  314. // to finish parsing the object
  315. ret = 0;
  316. break;
  317. }
  318. }
  319. if (!ret && timeslen == fileposlen) {
  320. for (i = 0; i < fileposlen; i++) {
  321. av_add_index_entry(vstream, filepositions[i], times[i] * 1000,
  322. 0, 0, AVINDEX_KEYFRAME);
  323. if (i < 2) {
  324. flv->validate_index[i].pos = filepositions[i];
  325. flv->validate_index[i].dts = times[i] * 1000;
  326. flv->validate_count = i + 1;
  327. }
  328. }
  329. } else
  330. av_log(s, AV_LOG_WARNING, "Invalid keyframes object, skipping.\n");
  331. finish:
  332. av_freep(&times);
  333. av_freep(&filepositions);
  334. // If we got unexpected data, but successfully reset back to
  335. // the start pos, the caller can continue parsing
  336. if (ret < 0 && avio_seek(ioc, initial_pos, SEEK_SET) > 0)
  337. return 0;
  338. return ret;
  339. }
  340. static int amf_parse_object(AVFormatContext *s, AVStream *astream,
  341. AVStream *vstream, const char *key,
  342. int64_t max_pos, int depth)
  343. {
  344. AVCodecParameters *apar, *vpar;
  345. FLVContext *flv = s->priv_data;
  346. AVIOContext *ioc;
  347. AMFDataType amf_type;
  348. char str_val[256];
  349. double num_val;
  350. num_val = 0;
  351. ioc = s->pb;
  352. amf_type = avio_r8(ioc);
  353. switch (amf_type) {
  354. case AMF_DATA_TYPE_NUMBER:
  355. num_val = av_int2double(avio_rb64(ioc));
  356. break;
  357. case AMF_DATA_TYPE_BOOL:
  358. num_val = avio_r8(ioc);
  359. break;
  360. case AMF_DATA_TYPE_STRING:
  361. if (amf_get_string(ioc, str_val, sizeof(str_val)) < 0)
  362. return -1;
  363. break;
  364. case AMF_DATA_TYPE_OBJECT:
  365. if ((vstream || astream) && key &&
  366. !strcmp(KEYFRAMES_TAG, key) && depth == 1)
  367. if (parse_keyframes_index(s, ioc, vstream ? vstream : astream,
  368. max_pos) < 0)
  369. return -1;
  370. while (avio_tell(ioc) < max_pos - 2 &&
  371. amf_get_string(ioc, str_val, sizeof(str_val)) > 0)
  372. if (amf_parse_object(s, astream, vstream, str_val, max_pos,
  373. depth + 1) < 0)
  374. return -1; // if we couldn't skip, bomb out.
  375. if (avio_r8(ioc) != AMF_END_OF_OBJECT)
  376. return -1;
  377. break;
  378. case AMF_DATA_TYPE_NULL:
  379. case AMF_DATA_TYPE_UNDEFINED:
  380. case AMF_DATA_TYPE_UNSUPPORTED:
  381. break; // these take up no additional space
  382. case AMF_DATA_TYPE_MIXEDARRAY:
  383. avio_skip(ioc, 4); // skip 32-bit max array index
  384. while (avio_tell(ioc) < max_pos - 2 &&
  385. amf_get_string(ioc, str_val, sizeof(str_val)) > 0)
  386. // this is the only case in which we would want a nested
  387. // parse to not skip over the object
  388. if (amf_parse_object(s, astream, vstream, str_val, max_pos,
  389. depth + 1) < 0)
  390. return -1;
  391. if (avio_r8(ioc) != AMF_END_OF_OBJECT)
  392. return -1;
  393. break;
  394. case AMF_DATA_TYPE_ARRAY:
  395. {
  396. unsigned int arraylen, i;
  397. arraylen = avio_rb32(ioc);
  398. for (i = 0; i < arraylen && avio_tell(ioc) < max_pos - 1; i++)
  399. if (amf_parse_object(s, NULL, NULL, NULL, max_pos,
  400. depth + 1) < 0)
  401. return -1; // if we couldn't skip, bomb out.
  402. }
  403. break;
  404. case AMF_DATA_TYPE_DATE:
  405. avio_skip(ioc, 8 + 2); // timestamp (double) and UTC offset (int16)
  406. break;
  407. default: // unsupported type, we couldn't skip
  408. return -1;
  409. }
  410. if (key) {
  411. // stream info doesn't live any deeper than the first object
  412. if (depth == 1) {
  413. apar = astream ? astream->codecpar : NULL;
  414. vpar = vstream ? vstream->codecpar : NULL;
  415. if (amf_type == AMF_DATA_TYPE_NUMBER ||
  416. amf_type == AMF_DATA_TYPE_BOOL) {
  417. if (!strcmp(key, "duration"))
  418. s->duration = num_val * AV_TIME_BASE;
  419. else if (!strcmp(key, "videodatarate") && vpar &&
  420. 0 <= (int)(num_val * 1024.0))
  421. vpar->bit_rate = num_val * 1024.0;
  422. else if (!strcmp(key, "audiodatarate") && apar &&
  423. 0 <= (int)(num_val * 1024.0))
  424. apar->bit_rate = num_val * 1024.0;
  425. else if (!strcmp(key, "datastream")) {
  426. AVStream *st = create_stream(s, AVMEDIA_TYPE_DATA);
  427. if (!st)
  428. return AVERROR(ENOMEM);
  429. st->codecpar->codec_id = AV_CODEC_ID_TEXT;
  430. } else if (!strcmp(key, "framerate")) {
  431. flv->framerate = av_d2q(num_val, 1000);
  432. if (vstream)
  433. vstream->avg_frame_rate = flv->framerate;
  434. } else if (flv->trust_metadata) {
  435. if (!strcmp(key, "videocodecid") && vpar) {
  436. flv_set_video_codec(s, vstream, num_val, 0);
  437. } else if (!strcmp(key, "audiocodecid") && apar) {
  438. int id = ((int)num_val) << FLV_AUDIO_CODECID_OFFSET;
  439. flv_set_audio_codec(s, astream, apar, id);
  440. } else if (!strcmp(key, "audiosamplerate") && apar) {
  441. apar->sample_rate = num_val;
  442. } else if (!strcmp(key, "audiosamplesize") && apar) {
  443. apar->bits_per_coded_sample = num_val;
  444. } else if (!strcmp(key, "stereo") && apar) {
  445. apar->channels = num_val + 1;
  446. apar->channel_layout = apar->channels == 2 ?
  447. AV_CH_LAYOUT_STEREO :
  448. AV_CH_LAYOUT_MONO;
  449. } else if (!strcmp(key, "width") && vpar) {
  450. vpar->width = num_val;
  451. } else if (!strcmp(key, "height") && vpar) {
  452. vpar->height = num_val;
  453. }
  454. }
  455. }
  456. }
  457. if (!strcmp(key, "duration") ||
  458. !strcmp(key, "filesize") ||
  459. !strcmp(key, "width") ||
  460. !strcmp(key, "height") ||
  461. !strcmp(key, "videodatarate") ||
  462. !strcmp(key, "framerate") ||
  463. !strcmp(key, "videocodecid") ||
  464. !strcmp(key, "audiodatarate") ||
  465. !strcmp(key, "audiosamplerate") ||
  466. !strcmp(key, "audiosamplesize") ||
  467. !strcmp(key, "stereo") ||
  468. !strcmp(key, "audiocodecid") ||
  469. !strcmp(key, "datastream"))
  470. return 0;
  471. s->event_flags |= AVFMT_EVENT_FLAG_METADATA_UPDATED;
  472. if (amf_type == AMF_DATA_TYPE_BOOL) {
  473. av_strlcpy(str_val, num_val > 0 ? "true" : "false",
  474. sizeof(str_val));
  475. av_dict_set(&s->metadata, key, str_val, 0);
  476. } else if (amf_type == AMF_DATA_TYPE_NUMBER) {
  477. snprintf(str_val, sizeof(str_val), "%.f", num_val);
  478. av_dict_set(&s->metadata, key, str_val, 0);
  479. } else if (amf_type == AMF_DATA_TYPE_STRING)
  480. av_dict_set(&s->metadata, key, str_val, 0);
  481. }
  482. return 0;
  483. }
  484. static int flv_read_metabody(AVFormatContext *s, int64_t next_pos)
  485. {
  486. AMFDataType type;
  487. AVStream *stream, *astream, *vstream;
  488. AVIOContext *ioc;
  489. int i;
  490. // only needs to hold the string "onMetaData".
  491. // Anything longer is something we don't want.
  492. char buffer[11];
  493. astream = NULL;
  494. vstream = NULL;
  495. ioc = s->pb;
  496. // first object needs to be "onMetaData" string
  497. type = avio_r8(ioc);
  498. if (type != AMF_DATA_TYPE_STRING ||
  499. amf_get_string(ioc, buffer, sizeof(buffer)) < 0)
  500. return -1;
  501. if (!strcmp(buffer, "onTextData"))
  502. return 1;
  503. if (strcmp(buffer, "onMetaData") && strcmp(buffer, "onCuePoint"))
  504. return -1;
  505. // find the streams now so that amf_parse_object doesn't need to do
  506. // the lookup every time it is called.
  507. for (i = 0; i < s->nb_streams; i++) {
  508. stream = s->streams[i];
  509. if (stream->codecpar->codec_type == AVMEDIA_TYPE_AUDIO)
  510. astream = stream;
  511. else if (stream->codecpar->codec_type == AVMEDIA_TYPE_VIDEO)
  512. vstream = stream;
  513. }
  514. // parse the second object (we want a mixed array)
  515. if (amf_parse_object(s, astream, vstream, buffer, next_pos, 0) < 0)
  516. return -1;
  517. return 0;
  518. }
  519. static int flv_read_header(AVFormatContext *s)
  520. {
  521. int offset;
  522. avio_skip(s->pb, 4);
  523. avio_r8(s->pb); // flags
  524. s->ctx_flags |= AVFMTCTX_NOHEADER;
  525. offset = avio_rb32(s->pb);
  526. avio_seek(s->pb, offset, SEEK_SET);
  527. avio_skip(s->pb, 4);
  528. s->start_time = 0;
  529. return 0;
  530. }
  531. static int flv_read_close(AVFormatContext *s)
  532. {
  533. FLVContext *flv = s->priv_data;
  534. av_freep(&flv->new_extradata[0]);
  535. av_freep(&flv->new_extradata[1]);
  536. return 0;
  537. }
  538. static int flv_get_extradata(AVFormatContext *s, AVStream *st, int size)
  539. {
  540. av_free(st->codecpar->extradata);
  541. st->codecpar->extradata = av_mallocz(size + AV_INPUT_BUFFER_PADDING_SIZE);
  542. if (!st->codecpar->extradata)
  543. return AVERROR(ENOMEM);
  544. st->codecpar->extradata_size = size;
  545. avio_read(s->pb, st->codecpar->extradata, st->codecpar->extradata_size);
  546. return 0;
  547. }
  548. static int flv_queue_extradata(FLVContext *flv, AVIOContext *pb, int stream,
  549. int size)
  550. {
  551. av_free(flv->new_extradata[stream]);
  552. flv->new_extradata[stream] = av_mallocz(size +
  553. AV_INPUT_BUFFER_PADDING_SIZE);
  554. if (!flv->new_extradata[stream])
  555. return AVERROR(ENOMEM);
  556. flv->new_extradata_size[stream] = size;
  557. avio_read(pb, flv->new_extradata[stream], size);
  558. return 0;
  559. }
  560. static void clear_index_entries(AVFormatContext *s, int64_t pos)
  561. {
  562. int i, j, out;
  563. av_log(s, AV_LOG_WARNING,
  564. "Found invalid index entries, clearing the index.\n");
  565. for (i = 0; i < s->nb_streams; i++) {
  566. AVStream *st = s->streams[i];
  567. /* Remove all index entries that point to >= pos */
  568. out = 0;
  569. for (j = 0; j < st->nb_index_entries; j++)
  570. if (st->index_entries[j].pos < pos)
  571. st->index_entries[out++] = st->index_entries[j];
  572. st->nb_index_entries = out;
  573. }
  574. }
  575. static int amf_skip_tag(AVIOContext *pb, AMFDataType type)
  576. {
  577. int nb = -1, ret, parse_name = 1;
  578. switch (type) {
  579. case AMF_DATA_TYPE_NUMBER:
  580. avio_skip(pb, 8);
  581. break;
  582. case AMF_DATA_TYPE_BOOL:
  583. avio_skip(pb, 1);
  584. break;
  585. case AMF_DATA_TYPE_STRING:
  586. avio_skip(pb, avio_rb16(pb));
  587. break;
  588. case AMF_DATA_TYPE_ARRAY:
  589. parse_name = 0;
  590. case AMF_DATA_TYPE_MIXEDARRAY:
  591. nb = avio_rb32(pb);
  592. case AMF_DATA_TYPE_OBJECT:
  593. while(!pb->eof_reached && (nb-- > 0 || type != AMF_DATA_TYPE_ARRAY)) {
  594. if (parse_name) {
  595. int size = avio_rb16(pb);
  596. if (!size) {
  597. avio_skip(pb, 1);
  598. break;
  599. }
  600. avio_skip(pb, size);
  601. }
  602. if ((ret = amf_skip_tag(pb, avio_r8(pb))) < 0)
  603. return ret;
  604. }
  605. break;
  606. case AMF_DATA_TYPE_NULL:
  607. case AMF_DATA_TYPE_OBJECT_END:
  608. break;
  609. default:
  610. return AVERROR_INVALIDDATA;
  611. }
  612. return 0;
  613. }
  614. static int flv_data_packet(AVFormatContext *s, AVPacket *pkt,
  615. int64_t dts, int64_t next)
  616. {
  617. AVIOContext *pb = s->pb;
  618. AVStream *st = NULL;
  619. char buf[20];
  620. int ret = AVERROR_INVALIDDATA;
  621. int i, length = -1;
  622. switch (avio_r8(pb)) {
  623. case AMF_DATA_TYPE_MIXEDARRAY:
  624. avio_seek(pb, 4, SEEK_CUR);
  625. case AMF_DATA_TYPE_OBJECT:
  626. break;
  627. default:
  628. goto skip;
  629. }
  630. while ((ret = amf_get_string(pb, buf, sizeof(buf))) > 0) {
  631. AMFDataType type = avio_r8(pb);
  632. if (type == AMF_DATA_TYPE_STRING && !strcmp(buf, "text")) {
  633. length = avio_rb16(pb);
  634. ret = av_get_packet(pb, pkt, length);
  635. if (ret < 0)
  636. goto skip;
  637. else
  638. break;
  639. } else {
  640. if ((ret = amf_skip_tag(pb, type)) < 0)
  641. goto skip;
  642. }
  643. }
  644. if (length < 0) {
  645. ret = AVERROR_INVALIDDATA;
  646. goto skip;
  647. }
  648. for (i = 0; i < s->nb_streams; i++) {
  649. st = s->streams[i];
  650. if (st->codecpar->codec_type == AVMEDIA_TYPE_DATA)
  651. break;
  652. }
  653. if (i == s->nb_streams) {
  654. st = create_stream(s, AVMEDIA_TYPE_DATA);
  655. if (!st)
  656. return AVERROR(ENOMEM);
  657. st->codecpar->codec_id = AV_CODEC_ID_TEXT;
  658. }
  659. pkt->dts = dts;
  660. pkt->pts = dts;
  661. pkt->size = ret;
  662. pkt->stream_index = st->index;
  663. pkt->flags |= AV_PKT_FLAG_KEY;
  664. skip:
  665. avio_seek(s->pb, next + 4, SEEK_SET);
  666. return ret;
  667. }
  668. static int flv_read_packet(AVFormatContext *s, AVPacket *pkt)
  669. {
  670. FLVContext *flv = s->priv_data;
  671. int ret, i, size, flags, is_audio;
  672. enum FlvTagType type;
  673. int64_t next, pos;
  674. int64_t dts, pts = AV_NOPTS_VALUE;
  675. int sample_rate = 0, channels = 0;
  676. AVStream *st = NULL;
  677. /* pkt size is repeated at end. skip it */
  678. for (;; avio_skip(s->pb, 4)) {
  679. pos = avio_tell(s->pb);
  680. type = avio_r8(s->pb);
  681. size = avio_rb24(s->pb);
  682. dts = avio_rb24(s->pb);
  683. dts |= avio_r8(s->pb) << 24;
  684. av_log(s, AV_LOG_TRACE, "type:%d, size:%d, dts:%"PRId64"\n", type, size, dts);
  685. if (s->pb->eof_reached)
  686. return AVERROR_EOF;
  687. avio_skip(s->pb, 3); /* stream id, always 0 */
  688. flags = 0;
  689. if (flv->validate_next < flv->validate_count) {
  690. int64_t validate_pos = flv->validate_index[flv->validate_next].pos;
  691. if (pos == validate_pos) {
  692. if (FFABS(dts - flv->validate_index[flv->validate_next].dts) <=
  693. VALIDATE_INDEX_TS_THRESH) {
  694. flv->validate_next++;
  695. } else {
  696. clear_index_entries(s, validate_pos);
  697. flv->validate_count = 0;
  698. }
  699. } else if (pos > validate_pos) {
  700. clear_index_entries(s, validate_pos);
  701. flv->validate_count = 0;
  702. }
  703. }
  704. if (size == 0)
  705. continue;
  706. next = size + avio_tell(s->pb);
  707. if (type == FLV_TAG_TYPE_AUDIO) {
  708. is_audio = 1;
  709. flags = avio_r8(s->pb);
  710. size--;
  711. } else if (type == FLV_TAG_TYPE_VIDEO) {
  712. is_audio = 0;
  713. flags = avio_r8(s->pb);
  714. size--;
  715. if ((flags & 0xf0) == 0x50) /* video info / command frame */
  716. goto skip;
  717. } else {
  718. if (type == FLV_TAG_TYPE_META && size > 13 + 1 + 4)
  719. if (flv_read_metabody(s, next) > 0) {
  720. return flv_data_packet(s, pkt, dts, next);
  721. } else /* skip packet */
  722. av_log(s, AV_LOG_DEBUG,
  723. "Skipping flv packet: type %d, size %d, flags %d.\n",
  724. type, size, flags);
  725. skip:
  726. avio_seek(s->pb, next, SEEK_SET);
  727. continue;
  728. }
  729. /* skip empty data packets */
  730. if (!size)
  731. continue;
  732. /* now find stream */
  733. for (i = 0; i < s->nb_streams; i++) {
  734. st = s->streams[i];
  735. if (is_audio && st->codecpar->codec_type == AVMEDIA_TYPE_AUDIO) {
  736. if (flv_same_audio_codec(st->codecpar, flags))
  737. break;
  738. } else if (!is_audio &&
  739. st->codecpar->codec_type == AVMEDIA_TYPE_VIDEO) {
  740. if (flv_same_video_codec(st->codecpar, flags))
  741. break;
  742. }
  743. }
  744. if (i == s->nb_streams) {
  745. st = create_stream(s, is_audio ? AVMEDIA_TYPE_AUDIO
  746. : AVMEDIA_TYPE_VIDEO);
  747. if (!st)
  748. return AVERROR(ENOMEM);
  749. }
  750. av_log(s, AV_LOG_TRACE, "%d %X %d \n", is_audio, flags, st->discard);
  751. if ((flags & FLV_VIDEO_FRAMETYPE_MASK) == FLV_FRAME_KEY ||
  752. is_audio)
  753. av_add_index_entry(st, pos, dts, size, 0, AVINDEX_KEYFRAME);
  754. if ((st->discard >= AVDISCARD_NONKEY &&
  755. !((flags & FLV_VIDEO_FRAMETYPE_MASK) == FLV_FRAME_KEY || is_audio)) ||
  756. (st->discard >= AVDISCARD_BIDIR &&
  757. ((flags & FLV_VIDEO_FRAMETYPE_MASK) == FLV_FRAME_DISP_INTER && !is_audio)) ||
  758. st->discard >= AVDISCARD_ALL) {
  759. avio_seek(s->pb, next, SEEK_SET);
  760. continue;
  761. }
  762. break;
  763. }
  764. // if not streamed and no duration from metadata then seek to end to find
  765. // the duration from the timestamps
  766. if ((s->pb->seekable & AVIO_SEEKABLE_NORMAL) &&
  767. (!s->duration || s->duration == AV_NOPTS_VALUE) &&
  768. !flv->searched_for_end) {
  769. int size;
  770. const int64_t pos = avio_tell(s->pb);
  771. // Read the last 4 bytes of the file, this should be the size of the
  772. // previous FLV tag. Use the timestamp of its payload as duration.
  773. const int64_t fsize = avio_size(s->pb);
  774. avio_seek(s->pb, fsize - 4, SEEK_SET);
  775. size = avio_rb32(s->pb);
  776. if (size > 0 && size < fsize) {
  777. // Seek to the start of the last FLV tag at position (fsize - 4 - size)
  778. // but skip the byte indicating the type.
  779. avio_seek(s->pb, fsize - 3 - size, SEEK_SET);
  780. if (size == avio_rb24(s->pb) + 11) {
  781. uint32_t ts = avio_rb24(s->pb);
  782. ts |= avio_r8(s->pb) << 24;
  783. s->duration = ts * (int64_t)AV_TIME_BASE / 1000;
  784. }
  785. }
  786. avio_seek(s->pb, pos, SEEK_SET);
  787. flv->searched_for_end = 1;
  788. }
  789. if (is_audio) {
  790. int bits_per_coded_sample;
  791. channels = (flags & FLV_AUDIO_CHANNEL_MASK) == FLV_STEREO ? 2 : 1;
  792. sample_rate = 44100 << ((flags & FLV_AUDIO_SAMPLERATE_MASK) >>
  793. FLV_AUDIO_SAMPLERATE_OFFSET) >> 3;
  794. bits_per_coded_sample = (flags & FLV_AUDIO_SAMPLESIZE_MASK) ? 16 : 8;
  795. if (!st->codecpar->channels || !st->codecpar->sample_rate ||
  796. !st->codecpar->bits_per_coded_sample) {
  797. st->codecpar->channels = channels;
  798. st->codecpar->channel_layout = channels == 1
  799. ? AV_CH_LAYOUT_MONO
  800. : AV_CH_LAYOUT_STEREO;
  801. st->codecpar->sample_rate = sample_rate;
  802. st->codecpar->bits_per_coded_sample = bits_per_coded_sample;
  803. }
  804. if (!st->codecpar->codec_id) {
  805. flv_set_audio_codec(s, st, st->codecpar,
  806. flags & FLV_AUDIO_CODECID_MASK);
  807. flv->last_sample_rate =
  808. sample_rate = st->codecpar->sample_rate;
  809. flv->last_channels =
  810. channels = st->codecpar->channels;
  811. } else {
  812. AVCodecParameters *par = avcodec_parameters_alloc();
  813. if (!par) {
  814. ret = AVERROR(ENOMEM);
  815. goto leave;
  816. }
  817. par->sample_rate = sample_rate;
  818. par->bits_per_coded_sample = bits_per_coded_sample;
  819. flv_set_audio_codec(s, st, par, flags & FLV_AUDIO_CODECID_MASK);
  820. sample_rate = par->sample_rate;
  821. avcodec_parameters_free(&par);
  822. }
  823. } else {
  824. size -= flv_set_video_codec(s, st, flags & FLV_VIDEO_CODECID_MASK, 1);
  825. }
  826. if (st->codecpar->codec_id == AV_CODEC_ID_AAC ||
  827. st->codecpar->codec_id == AV_CODEC_ID_H264) {
  828. int type = avio_r8(s->pb);
  829. size--;
  830. if (size < 0) {
  831. ret = AVERROR_INVALIDDATA;
  832. goto leave;
  833. }
  834. if (st->codecpar->codec_id == AV_CODEC_ID_H264) {
  835. // sign extension
  836. int32_t cts = (avio_rb24(s->pb) + 0xff800000) ^ 0xff800000;
  837. pts = dts + cts;
  838. if (cts < 0 && !flv->wrong_dts) { // dts might be wrong
  839. flv->wrong_dts = 1;
  840. av_log(s, AV_LOG_WARNING,
  841. "Negative cts, previous timestamps might be wrong.\n");
  842. }
  843. }
  844. if (type == 0) {
  845. if (st->codecpar->extradata) {
  846. if ((ret = flv_queue_extradata(flv, s->pb, is_audio, size)) < 0)
  847. return ret;
  848. ret = AVERROR(EAGAIN);
  849. goto leave;
  850. }
  851. if ((ret = flv_get_extradata(s, st, size)) < 0)
  852. return ret;
  853. if (st->codecpar->codec_id == AV_CODEC_ID_AAC) {
  854. MPEG4AudioConfig cfg;
  855. /* Workaround for buggy Omnia A/XE encoder */
  856. AVDictionaryEntry *t = av_dict_get(s->metadata, "Encoder", NULL, 0);
  857. if (t && !strcmp(t->value, "Omnia A/XE"))
  858. st->codecpar->extradata_size = 2;
  859. avpriv_mpeg4audio_get_config(&cfg, st->codecpar->extradata,
  860. st->codecpar->extradata_size * 8, 1);
  861. st->codecpar->channels = cfg.channels;
  862. st->codecpar->channel_layout = 0;
  863. if (cfg.ext_sample_rate)
  864. st->codecpar->sample_rate = cfg.ext_sample_rate;
  865. else
  866. st->codecpar->sample_rate = cfg.sample_rate;
  867. av_log(s, AV_LOG_TRACE, "mp4a config channels %d sample rate %d\n",
  868. st->codecpar->channels, st->codecpar->sample_rate);
  869. }
  870. ret = AVERROR(EAGAIN);
  871. goto leave;
  872. }
  873. }
  874. /* skip empty data packets */
  875. if (!size) {
  876. ret = AVERROR(EAGAIN);
  877. goto leave;
  878. }
  879. ret = av_get_packet(s->pb, pkt, size);
  880. if (ret < 0)
  881. return AVERROR(EIO);
  882. /* note: we need to modify the packet size here to handle the last
  883. * packet */
  884. pkt->size = ret;
  885. pkt->dts = dts;
  886. pkt->pts = pts == AV_NOPTS_VALUE ? dts : pts;
  887. pkt->stream_index = st->index;
  888. if (flv->new_extradata[is_audio]) {
  889. uint8_t *side = av_packet_new_side_data(pkt, AV_PKT_DATA_NEW_EXTRADATA,
  890. flv->new_extradata_size[is_audio]);
  891. if (side) {
  892. memcpy(side, flv->new_extradata[is_audio],
  893. flv->new_extradata_size[is_audio]);
  894. av_freep(&flv->new_extradata[is_audio]);
  895. flv->new_extradata_size[is_audio] = 0;
  896. }
  897. }
  898. if (is_audio && (sample_rate != flv->last_sample_rate ||
  899. channels != flv->last_channels)) {
  900. flv->last_sample_rate = sample_rate;
  901. flv->last_channels = channels;
  902. ff_add_param_change(pkt, channels, 0, sample_rate, 0, 0);
  903. }
  904. if (is_audio || ((flags & FLV_VIDEO_FRAMETYPE_MASK) == FLV_FRAME_KEY))
  905. pkt->flags |= AV_PKT_FLAG_KEY;
  906. leave:
  907. avio_skip(s->pb, 4);
  908. return ret;
  909. }
  910. static int flv_read_seek(AVFormatContext *s, int stream_index,
  911. int64_t ts, int flags)
  912. {
  913. FLVContext *flv = s->priv_data;
  914. flv->validate_count = 0;
  915. return avio_seek_time(s->pb, stream_index, ts, flags);
  916. }
  917. #define OFFSET(x) offsetof(FLVContext, x)
  918. #define VD AV_OPT_FLAG_VIDEO_PARAM | AV_OPT_FLAG_DECODING_PARAM
  919. static const AVOption options[] = {
  920. { "flv_metadata", "Allocate streams according to the onMetaData array", OFFSET(trust_metadata), AV_OPT_TYPE_INT, { .i64 = 0 }, 0, 1, VD },
  921. { NULL }
  922. };
  923. static const AVClass class = {
  924. .class_name = "flvdec",
  925. .item_name = av_default_item_name,
  926. .option = options,
  927. .version = LIBAVUTIL_VERSION_INT,
  928. };
  929. AVInputFormat ff_flv_demuxer = {
  930. .name = "flv",
  931. .long_name = NULL_IF_CONFIG_SMALL("FLV (Flash Video)"),
  932. .priv_data_size = sizeof(FLVContext),
  933. .read_probe = flv_probe,
  934. .read_header = flv_read_header,
  935. .read_packet = flv_read_packet,
  936. .read_seek = flv_read_seek,
  937. .read_close = flv_read_close,
  938. .extensions = "flv",
  939. .priv_class = &class,
  940. };