You can not select more than 25 topics Topics must start with a letter or number, can include dashes ('-') and can be up to 35 characters long.

387 lines
14KB

  1. /*
  2. * WebM DASH Manifest XML muxer
  3. * Copyright (c) 2014 Vignesh Venkatasubramanian
  4. *
  5. * This file is part of FFmpeg.
  6. *
  7. * FFmpeg is free software; you can redistribute it and/or
  8. * modify it under the terms of the GNU Lesser General Public
  9. * License as published by the Free Software Foundation; either
  10. * version 2.1 of the License, or (at your option) any later version.
  11. *
  12. * FFmpeg is distributed in the hope that it will be useful,
  13. * but WITHOUT ANY WARRANTY; without even the implied warranty of
  14. * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
  15. * Lesser General Public License for more details.
  16. *
  17. * You should have received a copy of the GNU Lesser General Public
  18. * License along with FFmpeg; if not, write to the Free Software
  19. * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
  20. */
  21. /*
  22. * WebM DASH Specification:
  23. * https://sites.google.com/a/webmproject.org/wiki/adaptive-streaming/webm-dash-specification
  24. */
  25. #include <stdint.h>
  26. #include <string.h>
  27. #include "avformat.h"
  28. #include "avio_internal.h"
  29. #include "matroska.h"
  30. #include "libavutil/avstring.h"
  31. #include "libavutil/dict.h"
  32. #include "libavutil/opt.h"
  33. typedef struct AdaptationSet {
  34. char id[10];
  35. int *streams;
  36. int nb_streams;
  37. } AdaptationSet;
  38. typedef struct WebMDashMuxContext {
  39. const AVClass *class;
  40. char *adaptation_sets;
  41. AdaptationSet *as;
  42. int nb_as;
  43. int representation_id;
  44. } WebMDashMuxContext;
  45. static const char *get_codec_name(int codec_id)
  46. {
  47. switch (codec_id) {
  48. case AV_CODEC_ID_VP8:
  49. return "vp8";
  50. case AV_CODEC_ID_VP9:
  51. return "vp9";
  52. case AV_CODEC_ID_VORBIS:
  53. return "vorbis";
  54. case AV_CODEC_ID_OPUS:
  55. return "opus";
  56. }
  57. return NULL;
  58. }
  59. static double get_duration(AVFormatContext *s)
  60. {
  61. int i = 0;
  62. double max = 0.0;
  63. for (i = 0; i < s->nb_streams; i++) {
  64. AVDictionaryEntry *duration = av_dict_get(s->streams[i]->metadata,
  65. DURATION, NULL, 0);
  66. if (!duration || atof(duration->value) < 0) continue;
  67. if (atof(duration->value) > max) max = atof(duration->value);
  68. }
  69. return max / 1000;
  70. }
  71. static void write_header(AVFormatContext *s)
  72. {
  73. double min_buffer_time = 1.0;
  74. avio_printf(s->pb, "<?xml version=\"1.0\" encoding=\"UTF-8\"?>\n");
  75. avio_printf(s->pb, "<MPD\n");
  76. avio_printf(s->pb, " xmlns:xsi=\"http://www.w3.org/2001/XMLSchema-instance\"\n");
  77. avio_printf(s->pb, " xmlns=\"urn:mpeg:DASH:schema:MPD:2011\"\n");
  78. avio_printf(s->pb, " xsi:schemaLocation=\"urn:mpeg:DASH:schema:MPD:2011\"\n");
  79. avio_printf(s->pb, " type=\"static\"\n");
  80. avio_printf(s->pb, " mediaPresentationDuration=\"PT%gS\"\n",
  81. get_duration(s));
  82. avio_printf(s->pb, " minBufferTime=\"PT%gS\"\n",
  83. min_buffer_time);
  84. avio_printf(s->pb, " profiles=\"urn:webm:dash:profile:webm-on-demand:2012\"");
  85. avio_printf(s->pb, ">\n");
  86. }
  87. static void write_footer(AVFormatContext *s)
  88. {
  89. avio_printf(s->pb, "</MPD>\n");
  90. }
  91. static int subsegment_alignment(AVFormatContext *s, AdaptationSet *as) {
  92. int i;
  93. AVDictionaryEntry *gold = av_dict_get(s->streams[as->streams[0]]->metadata,
  94. CUE_TIMESTAMPS, NULL, 0);
  95. if (!gold) return 0;
  96. for (i = 1; i < as->nb_streams; i++) {
  97. AVDictionaryEntry *ts = av_dict_get(s->streams[as->streams[i]]->metadata,
  98. CUE_TIMESTAMPS, NULL, 0);
  99. if (!ts || strncmp(gold->value, ts->value, strlen(gold->value))) return 0;
  100. }
  101. return 1;
  102. }
  103. static int bitstream_switching(AVFormatContext *s, AdaptationSet *as) {
  104. int i;
  105. AVDictionaryEntry *gold_track_num = av_dict_get(s->streams[as->streams[0]]->metadata,
  106. TRACK_NUMBER, NULL, 0);
  107. AVCodecContext *gold_codec = s->streams[as->streams[0]]->codec;
  108. if (!gold_track_num) return 0;
  109. for (i = 1; i < as->nb_streams; i++) {
  110. AVDictionaryEntry *track_num = av_dict_get(s->streams[as->streams[i]]->metadata,
  111. TRACK_NUMBER, NULL, 0);
  112. AVCodecContext *codec = s->streams[as->streams[i]]->codec;
  113. if (!track_num ||
  114. strncmp(gold_track_num->value, track_num->value, strlen(gold_track_num->value)) ||
  115. gold_codec->codec_id != codec->codec_id ||
  116. gold_codec->extradata_size != codec->extradata_size ||
  117. memcmp(gold_codec->extradata, codec->extradata, codec->extradata_size)) {
  118. return 0;
  119. }
  120. }
  121. return 1;
  122. }
  123. /*
  124. * Writes a Representation within an Adaptation Set. Returns 0 on success and
  125. * < 0 on failure.
  126. */
  127. static int write_representation(AVFormatContext *s, AVStream *stream, int id,
  128. int output_width, int output_height,
  129. int output_sample_rate) {
  130. AVDictionaryEntry *irange = av_dict_get(stream->metadata, INITIALIZATION_RANGE, NULL, 0);
  131. AVDictionaryEntry *cues_start = av_dict_get(stream->metadata, CUES_START, NULL, 0);
  132. AVDictionaryEntry *cues_end = av_dict_get(stream->metadata, CUES_END, NULL, 0);
  133. AVDictionaryEntry *filename = av_dict_get(stream->metadata, FILENAME, NULL, 0);
  134. AVDictionaryEntry *bandwidth = av_dict_get(stream->metadata, BANDWIDTH, NULL, 0);
  135. if (!irange || cues_start == NULL || cues_end == NULL || filename == NULL ||
  136. !bandwidth) {
  137. return -1;
  138. }
  139. avio_printf(s->pb, "<Representation id=\"%d\"", id);
  140. avio_printf(s->pb, " bandwidth=\"%s\"", bandwidth->value);
  141. if (stream->codec->codec_type == AVMEDIA_TYPE_VIDEO && output_width)
  142. avio_printf(s->pb, " width=\"%d\"", stream->codec->width);
  143. if (stream->codec->codec_type == AVMEDIA_TYPE_VIDEO && output_height)
  144. avio_printf(s->pb, " height=\"%d\"", stream->codec->height);
  145. if (stream->codec->codec_type = AVMEDIA_TYPE_AUDIO && output_sample_rate)
  146. avio_printf(s->pb, " audioSamplingRate=\"%d\"", stream->codec->sample_rate);
  147. avio_printf(s->pb, ">\n");
  148. avio_printf(s->pb, "<BaseURL>%s</BaseURL>\n", filename->value);
  149. avio_printf(s->pb, "<SegmentBase\n");
  150. avio_printf(s->pb, " indexRange=\"%s-%s\">\n", cues_start->value, cues_end->value);
  151. avio_printf(s->pb, "<Initialization\n");
  152. avio_printf(s->pb, " range=\"0-%s\" />\n", irange->value);
  153. avio_printf(s->pb, "</SegmentBase>\n");
  154. avio_printf(s->pb, "</Representation>\n");
  155. return 0;
  156. }
  157. /*
  158. * Checks if width of all streams are the same. Returns 1 if true, 0 otherwise.
  159. */
  160. static int check_matching_width(AVFormatContext *s, AdaptationSet *as) {
  161. int first_width, i;
  162. if (as->nb_streams < 2) return 1;
  163. first_width = s->streams[as->streams[0]]->codec->width;
  164. for (i = 1; i < as->nb_streams; i++)
  165. if (first_width != s->streams[as->streams[i]]->codec->width)
  166. return 0;
  167. return 1;
  168. }
  169. /*
  170. * Checks if height of all streams are the same. Returns 1 if true, 0 otherwise.
  171. */
  172. static int check_matching_height(AVFormatContext *s, AdaptationSet *as) {
  173. int first_height, i;
  174. if (as->nb_streams < 2) return 1;
  175. first_height = s->streams[as->streams[0]]->codec->height;
  176. for (i = 1; i < as->nb_streams; i++)
  177. if (first_height != s->streams[as->streams[i]]->codec->height)
  178. return 0;
  179. return 1;
  180. }
  181. /*
  182. * Checks if sample rate of all streams are the same. Returns 1 if true, 0 otherwise.
  183. */
  184. static int check_matching_sample_rate(AVFormatContext *s, AdaptationSet *as) {
  185. int first_sample_rate, i;
  186. if (as->nb_streams < 2) return 1;
  187. first_sample_rate = s->streams[as->streams[0]]->codec->sample_rate;
  188. for (i = 1; i < as->nb_streams; i++)
  189. if (first_sample_rate != s->streams[as->streams[i]]->codec->sample_rate)
  190. return 0;
  191. return 1;
  192. }
  193. /*
  194. * Writes an Adaptation Set. Returns 0 on success and < 0 on failure.
  195. */
  196. static int write_adaptation_set(AVFormatContext *s, int as_index)
  197. {
  198. WebMDashMuxContext *w = s->priv_data;
  199. AdaptationSet *as = &w->as[as_index];
  200. AVCodecContext *codec = s->streams[as->streams[0]]->codec;
  201. AVDictionaryEntry *lang;
  202. int i;
  203. static const char boolean[2][6] = { "false", "true" };
  204. int subsegmentStartsWithSAP = 1;
  205. // Width, Height and Sample Rate will go in the AdaptationSet tag if they
  206. // are the same for all contained Representations. otherwise, they will go
  207. // on their respective Representation tag.
  208. int width_in_as = 1, height_in_as = 1, sample_rate_in_as = 1;
  209. if (codec->codec_type == AVMEDIA_TYPE_VIDEO) {
  210. width_in_as = check_matching_width(s, as);
  211. height_in_as = check_matching_height(s, as);
  212. } else {
  213. sample_rate_in_as = check_matching_sample_rate(s, as);
  214. }
  215. avio_printf(s->pb, "<AdaptationSet id=\"%s\"", as->id);
  216. avio_printf(s->pb, " mimeType=\"%s/webm\"",
  217. codec->codec_type == AVMEDIA_TYPE_VIDEO ? "video" : "audio");
  218. avio_printf(s->pb, " codecs=\"%s\"", get_codec_name(codec->codec_id));
  219. lang = av_dict_get(s->streams[as->streams[0]]->metadata, "language", NULL, 0);
  220. if (lang) avio_printf(s->pb, " lang=\"%s\"", lang->value);
  221. if (codec->codec_type == AVMEDIA_TYPE_VIDEO && width_in_as)
  222. avio_printf(s->pb, " width=\"%d\"", codec->width);
  223. if (codec->codec_type == AVMEDIA_TYPE_VIDEO && height_in_as)
  224. avio_printf(s->pb, " height=\"%d\"", codec->height);
  225. if (codec->codec_type == AVMEDIA_TYPE_AUDIO && sample_rate_in_as)
  226. avio_printf(s->pb, " audioSamplingRate=\"%d\"", codec->sample_rate);
  227. avio_printf(s->pb, " bitstreamSwitching=\"%s\"",
  228. boolean[bitstream_switching(s, as)]);
  229. avio_printf(s->pb, " subsegmentAlignment=\"%s\"",
  230. boolean[subsegment_alignment(s, as)]);
  231. for (i = 0; i < as->nb_streams; i++) {
  232. AVDictionaryEntry *kf = av_dict_get(s->streams[as->streams[i]]->metadata,
  233. CLUSTER_KEYFRAME, NULL, 0);
  234. if (!kf || !strncmp(kf->value, "0", 1)) subsegmentStartsWithSAP = 0;
  235. }
  236. avio_printf(s->pb, " subsegmentStartsWithSAP=\"%d\"", subsegmentStartsWithSAP);
  237. avio_printf(s->pb, ">\n");
  238. for (i = 0; i < as->nb_streams; i++) {
  239. write_representation(s, s->streams[as->streams[i]], w->representation_id++,
  240. !width_in_as, !height_in_as, !sample_rate_in_as);
  241. }
  242. avio_printf(s->pb, "</AdaptationSet>\n");
  243. return 0;
  244. }
  245. static int to_integer(char *p, int len)
  246. {
  247. int ret;
  248. char *q = av_malloc(sizeof(char) * len);
  249. if (!q) return -1;
  250. av_strlcpy(q, p, len);
  251. ret = atoi(q);
  252. av_free(q);
  253. return ret;
  254. }
  255. static int parse_adaptation_sets(AVFormatContext *s)
  256. {
  257. WebMDashMuxContext *w = s->priv_data;
  258. char *p = w->adaptation_sets;
  259. char *q;
  260. enum { new_set, parsed_id, parsing_streams } state;
  261. // syntax id=0,streams=0,1,2 id=1,streams=3,4 and so on
  262. state = new_set;
  263. while (p < w->adaptation_sets + strlen(w->adaptation_sets)) {
  264. if (*p == ' ')
  265. continue;
  266. else if (state == new_set && !strncmp(p, "id=", 3)) {
  267. w->as = av_realloc(w->as, sizeof(*w->as) * ++w->nb_as);
  268. if (w->as == NULL) return -1;
  269. w->as[w->nb_as - 1].nb_streams = 0;
  270. w->as[w->nb_as - 1].streams = NULL;
  271. p += 3; // consume "id="
  272. q = w->as[w->nb_as - 1].id;
  273. while (*p != ',') *q++ = *p++;
  274. *q = 0;
  275. p++;
  276. state = parsed_id;
  277. } else if (state == parsed_id && !strncmp(p, "streams=", 8)) {
  278. p += 8; // consume "streams="
  279. state = parsing_streams;
  280. } else if (state == parsing_streams) {
  281. struct AdaptationSet *as = &w->as[w->nb_as - 1];
  282. q = p;
  283. while (*q != '\0' && *q != ',' && *q != ' ') q++;
  284. as->streams = av_realloc(as->streams, sizeof(*as->streams) * ++as->nb_streams);
  285. if (as->streams == NULL) return -1;
  286. as->streams[as->nb_streams - 1] = to_integer(p, q - p + 1);
  287. if (as->streams[as->nb_streams - 1] < 0) return -1;
  288. if (*q == '\0') break;
  289. if (*q == ' ') state = new_set;
  290. p = ++q;
  291. } else {
  292. return -1;
  293. }
  294. }
  295. return 0;
  296. }
  297. static int webm_dash_manifest_write_header(AVFormatContext *s)
  298. {
  299. int i;
  300. double start = 0.0;
  301. WebMDashMuxContext *w = s->priv_data;
  302. parse_adaptation_sets(s);
  303. write_header(s);
  304. avio_printf(s->pb, "<Period id=\"0\"");
  305. avio_printf(s->pb, " start=\"PT%gS\"", start);
  306. avio_printf(s->pb, " duration=\"PT%gS\"", get_duration(s));
  307. avio_printf(s->pb, " >\n");
  308. for (i = 0; i < w->nb_as; i++) {
  309. if (write_adaptation_set(s, i) < 0) return -1;
  310. }
  311. avio_printf(s->pb, "</Period>\n");
  312. write_footer(s);
  313. return 0;
  314. }
  315. static int webm_dash_manifest_write_packet(AVFormatContext *s, AVPacket *pkt)
  316. {
  317. return AVERROR_EOF;
  318. }
  319. static int webm_dash_manifest_write_trailer(AVFormatContext *s)
  320. {
  321. WebMDashMuxContext *w = s->priv_data;
  322. int i;
  323. for (i = 0; i < w->nb_as; i++) {
  324. av_freep(&w->as[i].streams);
  325. }
  326. av_freep(&w->as);
  327. return 0;
  328. }
  329. #define OFFSET(x) offsetof(WebMDashMuxContext, x)
  330. static const AVOption options[] = {
  331. { "adaptation_sets", "Adaptation sets. Syntax: id=0,streams=0,1,2 id=1,streams=3,4 and so on", OFFSET(adaptation_sets), AV_OPT_TYPE_STRING, { 0 }, 0, 0, AV_OPT_FLAG_ENCODING_PARAM },
  332. { NULL },
  333. };
  334. #if CONFIG_WEBM_DASH_MANIFEST_MUXER
  335. static const AVClass webm_dash_class = {
  336. .class_name = "WebM DASH Manifest muxer",
  337. .item_name = av_default_item_name,
  338. .option = options,
  339. .version = LIBAVUTIL_VERSION_INT,
  340. };
  341. AVOutputFormat ff_webm_dash_manifest_muxer = {
  342. .name = "webm_dash_manifest",
  343. .long_name = NULL_IF_CONFIG_SMALL("WebM DASH Manifest"),
  344. .mime_type = "application/xml",
  345. .extensions = "xml",
  346. .priv_data_size = sizeof(WebMDashMuxContext),
  347. .write_header = webm_dash_manifest_write_header,
  348. .write_packet = webm_dash_manifest_write_packet,
  349. .write_trailer = webm_dash_manifest_write_trailer,
  350. .priv_class = &webm_dash_class,
  351. };
  352. #endif