You can not select more than 25 topics Topics must start with a letter or number, can include dashes ('-') and can be up to 35 characters long.

386 lines
14KB

  1. /*
  2. * WebM DASH Manifest XML muxer
  3. * Copyright (c) 2014 Vignesh Venkatasubramanian
  4. *
  5. * This file is part of FFmpeg.
  6. *
  7. * FFmpeg is free software; you can redistribute it and/or
  8. * modify it under the terms of the GNU Lesser General Public
  9. * License as published by the Free Software Foundation; either
  10. * version 2.1 of the License, or (at your option) any later version.
  11. *
  12. * FFmpeg is distributed in the hope that it will be useful,
  13. * but WITHOUT ANY WARRANTY; without even the implied warranty of
  14. * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
  15. * Lesser General Public License for more details.
  16. *
  17. * You should have received a copy of the GNU Lesser General Public
  18. * License along with FFmpeg; if not, write to the Free Software
  19. * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
  20. */
  21. /*
  22. * WebM DASH Specification:
  23. * https://sites.google.com/a/webmproject.org/wiki/adaptive-streaming/webm-dash-specification
  24. */
  25. #include <stdint.h>
  26. #include <string.h>
  27. #include "avformat.h"
  28. #include "avio_internal.h"
  29. #include "matroska.h"
  30. #include "libavutil/avstring.h"
  31. #include "libavutil/dict.h"
  32. #include "libavutil/opt.h"
  33. typedef struct AdaptationSet {
  34. char id[10];
  35. int *streams;
  36. int nb_streams;
  37. } AdaptationSet;
  38. typedef struct WebMDashMuxContext {
  39. const AVClass *class;
  40. char *adaptation_sets;
  41. AdaptationSet *as;
  42. int nb_as;
  43. } WebMDashMuxContext;
  44. static const char *get_codec_name(int codec_id)
  45. {
  46. switch (codec_id) {
  47. case AV_CODEC_ID_VP8:
  48. return "vp8";
  49. case AV_CODEC_ID_VP9:
  50. return "vp9";
  51. case AV_CODEC_ID_VORBIS:
  52. return "vorbis";
  53. case AV_CODEC_ID_OPUS:
  54. return "opus";
  55. }
  56. return NULL;
  57. }
  58. static double get_duration(AVFormatContext *s)
  59. {
  60. int i = 0;
  61. double max = 0.0;
  62. for (i = 0; i < s->nb_streams; i++) {
  63. AVDictionaryEntry *duration = av_dict_get(s->streams[i]->metadata,
  64. DURATION, NULL, 0);
  65. if (!duration || atof(duration->value) < 0) continue;
  66. if (atof(duration->value) > max) max = atof(duration->value);
  67. }
  68. return max / 1000;
  69. }
  70. static void write_header(AVFormatContext *s)
  71. {
  72. double min_buffer_time = 1.0;
  73. avio_printf(s->pb, "<?xml version=\"1.0\" encoding=\"UTF-8\"?>\n");
  74. avio_printf(s->pb, "<MPD\n");
  75. avio_printf(s->pb, " xmlns:xsi=\"http://www.w3.org/2001/XMLSchema-instance\"\n");
  76. avio_printf(s->pb, " xmlns=\"urn:mpeg:DASH:schema:MPD:2011\"\n");
  77. avio_printf(s->pb, " xsi:schemaLocation=\"urn:mpeg:DASH:schema:MPD:2011\"\n");
  78. avio_printf(s->pb, " type=\"static\"\n");
  79. avio_printf(s->pb, " mediaPresentationDuration=\"PT%gS\"\n",
  80. get_duration(s));
  81. avio_printf(s->pb, " minBufferTime=\"PT%gS\"\n",
  82. min_buffer_time);
  83. avio_printf(s->pb, " profiles=\"urn:webm:dash:profile:webm-on-demand:2012\"");
  84. avio_printf(s->pb, ">\n");
  85. }
  86. static void write_footer(AVFormatContext *s)
  87. {
  88. avio_printf(s->pb, "</MPD>\n");
  89. }
  90. static int subsegment_alignment(AVFormatContext *s, AdaptationSet *as) {
  91. int i;
  92. AVDictionaryEntry *gold = av_dict_get(s->streams[as->streams[0]]->metadata,
  93. CUE_TIMESTAMPS, NULL, 0);
  94. if (!gold) return 0;
  95. for (i = 1; i < as->nb_streams; i++) {
  96. AVDictionaryEntry *ts = av_dict_get(s->streams[as->streams[i]]->metadata,
  97. CUE_TIMESTAMPS, NULL, 0);
  98. if (!ts || strncmp(gold->value, ts->value, strlen(gold->value))) return 0;
  99. }
  100. return 1;
  101. }
  102. static int bitstream_switching(AVFormatContext *s, AdaptationSet *as) {
  103. int i;
  104. AVDictionaryEntry *gold_track_num = av_dict_get(s->streams[as->streams[0]]->metadata,
  105. TRACK_NUMBER, NULL, 0);
  106. AVCodecContext *gold_codec = s->streams[as->streams[0]]->codec;
  107. if (!gold_track_num) return 0;
  108. for (i = 1; i < as->nb_streams; i++) {
  109. AVDictionaryEntry *track_num = av_dict_get(s->streams[as->streams[i]]->metadata,
  110. TRACK_NUMBER, NULL, 0);
  111. AVCodecContext *codec = s->streams[as->streams[i]]->codec;
  112. if (!track_num ||
  113. strncmp(gold_track_num->value, track_num->value, strlen(gold_track_num->value)) ||
  114. gold_codec->codec_id != codec->codec_id ||
  115. gold_codec->extradata_size != codec->extradata_size ||
  116. memcmp(gold_codec->extradata, codec->extradata, codec->extradata_size)) {
  117. return 0;
  118. }
  119. }
  120. return 1;
  121. }
  122. /*
  123. * Writes a Representation within an Adaptation Set. Returns 0 on success and
  124. * < 0 on failure.
  125. */
  126. static int write_representation(AVFormatContext *s, AVStream *stream, int id,
  127. int output_width, int output_height,
  128. int output_sample_rate) {
  129. AVDictionaryEntry *irange = av_dict_get(stream->metadata, INITIALIZATION_RANGE, NULL, 0);
  130. AVDictionaryEntry *cues_start = av_dict_get(stream->metadata, CUES_START, NULL, 0);
  131. AVDictionaryEntry *cues_end = av_dict_get(stream->metadata, CUES_END, NULL, 0);
  132. AVDictionaryEntry *filename = av_dict_get(stream->metadata, FILENAME, NULL, 0);
  133. AVDictionaryEntry *bandwidth = av_dict_get(stream->metadata, BANDWIDTH, NULL, 0);
  134. if (!irange || cues_start == NULL || cues_end == NULL || filename == NULL ||
  135. !bandwidth) {
  136. return -1;
  137. }
  138. avio_printf(s->pb, "<Representation id=\"%d\"", id);
  139. avio_printf(s->pb, " bandwidth=\"%s\"", bandwidth->value);
  140. if (stream->codec->codec_type == AVMEDIA_TYPE_VIDEO && output_width)
  141. avio_printf(s->pb, " width=\"%d\"", stream->codec->width);
  142. if (stream->codec->codec_type == AVMEDIA_TYPE_VIDEO && output_height)
  143. avio_printf(s->pb, " height=\"%d\"", stream->codec->height);
  144. if (stream->codec->codec_type = AVMEDIA_TYPE_AUDIO && output_sample_rate)
  145. avio_printf(s->pb, " audioSamplingRate=\"%d\"", stream->codec->sample_rate);
  146. avio_printf(s->pb, ">\n");
  147. avio_printf(s->pb, "<BaseURL>%s</BaseURL>\n", filename->value);
  148. avio_printf(s->pb, "<SegmentBase\n");
  149. avio_printf(s->pb, " indexRange=\"%s-%s\">\n", cues_start->value, cues_end->value);
  150. avio_printf(s->pb, "<Initialization\n");
  151. avio_printf(s->pb, " range=\"0-%s\" />\n", irange->value);
  152. avio_printf(s->pb, "</SegmentBase>\n");
  153. avio_printf(s->pb, "</Representation>\n");
  154. return 0;
  155. }
  156. /*
  157. * Checks if width of all streams are the same. Returns 1 if true, 0 otherwise.
  158. */
  159. static int check_matching_width(AVFormatContext *s, AdaptationSet *as) {
  160. int first_width, i;
  161. if (as->nb_streams < 2) return 1;
  162. first_width = s->streams[as->streams[0]]->codec->width;
  163. for (i = 1; i < as->nb_streams; i++)
  164. if (first_width != s->streams[as->streams[i]]->codec->width)
  165. return 0;
  166. return 1;
  167. }
  168. /*
  169. * Checks if height of all streams are the same. Returns 1 if true, 0 otherwise.
  170. */
  171. static int check_matching_height(AVFormatContext *s, AdaptationSet *as) {
  172. int first_height, i;
  173. if (as->nb_streams < 2) return 1;
  174. first_height = s->streams[as->streams[0]]->codec->height;
  175. for (i = 1; i < as->nb_streams; i++)
  176. if (first_height != s->streams[as->streams[i]]->codec->height)
  177. return 0;
  178. return 1;
  179. }
  180. /*
  181. * Checks if sample rate of all streams are the same. Returns 1 if true, 0 otherwise.
  182. */
  183. static int check_matching_sample_rate(AVFormatContext *s, AdaptationSet *as) {
  184. int first_sample_rate, i;
  185. if (as->nb_streams < 2) return 1;
  186. first_sample_rate = s->streams[as->streams[0]]->codec->sample_rate;
  187. for (i = 1; i < as->nb_streams; i++)
  188. if (first_sample_rate != s->streams[as->streams[i]]->codec->sample_rate)
  189. return 0;
  190. return 1;
  191. }
  192. /*
  193. * Writes an Adaptation Set. Returns 0 on success and < 0 on failure.
  194. */
  195. static int write_adaptation_set(AVFormatContext *s, int as_index)
  196. {
  197. WebMDashMuxContext *w = s->priv_data;
  198. AdaptationSet *as = &w->as[as_index];
  199. AVCodecContext *codec = s->streams[as->streams[0]]->codec;
  200. AVDictionaryEntry *lang;
  201. int i;
  202. static const char boolean[2][6] = { "false", "true" };
  203. int subsegmentStartsWithSAP = 1;
  204. // Width, Height and Sample Rate will go in the AdaptationSet tag if they
  205. // are the same for all contained Representations. otherwise, they will go
  206. // on their respective Representation tag.
  207. int width_in_as = 1, height_in_as = 1, sample_rate_in_as = 1;
  208. if (codec->codec_type == AVMEDIA_TYPE_VIDEO) {
  209. width_in_as = check_matching_width(s, as);
  210. height_in_as = check_matching_height(s, as);
  211. } else {
  212. sample_rate_in_as = check_matching_sample_rate(s, as);
  213. }
  214. avio_printf(s->pb, "<AdaptationSet id=\"%s\"", as->id);
  215. avio_printf(s->pb, " mimeType=\"%s/webm\"",
  216. codec->codec_type == AVMEDIA_TYPE_VIDEO ? "video" : "audio");
  217. avio_printf(s->pb, " codecs=\"%s\"", get_codec_name(codec->codec_id));
  218. lang = av_dict_get(s->streams[as->streams[0]]->metadata, "language", NULL, 0);
  219. if (lang) avio_printf(s->pb, " lang=\"%s\"", lang->value);
  220. if (codec->codec_type == AVMEDIA_TYPE_VIDEO && width_in_as)
  221. avio_printf(s->pb, " width=\"%d\"", codec->width);
  222. if (codec->codec_type == AVMEDIA_TYPE_VIDEO && height_in_as)
  223. avio_printf(s->pb, " height=\"%d\"", codec->height);
  224. if (codec->codec_type == AVMEDIA_TYPE_AUDIO && sample_rate_in_as)
  225. avio_printf(s->pb, " audioSamplingRate=\"%d\"", codec->sample_rate);
  226. avio_printf(s->pb, " bitstreamSwitching=\"%s\"",
  227. boolean[bitstream_switching(s, as)]);
  228. avio_printf(s->pb, " subsegmentAlignment=\"%s\"",
  229. boolean[subsegment_alignment(s, as)]);
  230. for (i = 0; i < as->nb_streams; i++) {
  231. AVDictionaryEntry *kf = av_dict_get(s->streams[as->streams[i]]->metadata,
  232. CLUSTER_KEYFRAME, NULL, 0);
  233. if (!kf || !strncmp(kf->value, "0", 1)) subsegmentStartsWithSAP = 0;
  234. }
  235. avio_printf(s->pb, " subsegmentStartsWithSAP=\"%d\"", subsegmentStartsWithSAP);
  236. avio_printf(s->pb, ">\n");
  237. for (i = 0; i < as->nb_streams; i++) {
  238. write_representation(s, s->streams[as->streams[i]], i,
  239. !width_in_as, !height_in_as, !sample_rate_in_as);
  240. }
  241. avio_printf(s->pb, "</AdaptationSet>\n");
  242. return 0;
  243. }
  244. static int to_integer(char *p, int len)
  245. {
  246. int ret;
  247. char *q = av_malloc(sizeof(char) * len);
  248. if (!q) return -1;
  249. av_strlcpy(q, p, len);
  250. ret = atoi(q);
  251. av_free(q);
  252. return ret;
  253. }
  254. static int parse_adaptation_sets(AVFormatContext *s)
  255. {
  256. WebMDashMuxContext *w = s->priv_data;
  257. char *p = w->adaptation_sets;
  258. char *q;
  259. enum { new_set, parsed_id, parsing_streams } state;
  260. // syntax id=0,streams=0,1,2 id=1,streams=3,4 and so on
  261. state = new_set;
  262. while (p < w->adaptation_sets + strlen(w->adaptation_sets)) {
  263. if (*p == ' ')
  264. continue;
  265. else if (state == new_set && !strncmp(p, "id=", 3)) {
  266. w->as = av_realloc(w->as, sizeof(*w->as) * ++w->nb_as);
  267. if (w->as == NULL) return -1;
  268. w->as[w->nb_as - 1].nb_streams = 0;
  269. w->as[w->nb_as - 1].streams = NULL;
  270. p += 3; // consume "id="
  271. q = w->as[w->nb_as - 1].id;
  272. while (*p != ',') *q++ = *p++;
  273. *q = 0;
  274. p++;
  275. state = parsed_id;
  276. } else if (state == parsed_id && !strncmp(p, "streams=", 8)) {
  277. p += 8; // consume "streams="
  278. state = parsing_streams;
  279. } else if (state == parsing_streams) {
  280. struct AdaptationSet *as = &w->as[w->nb_as - 1];
  281. q = p;
  282. while (*q != '\0' && *q != ',' && *q != ' ') q++;
  283. as->streams = av_realloc(as->streams, sizeof(*as->streams) * ++as->nb_streams);
  284. if (as->streams == NULL) return -1;
  285. as->streams[as->nb_streams - 1] = to_integer(p, q - p + 1);
  286. if (as->streams[as->nb_streams - 1] < 0) return -1;
  287. if (*q == '\0') break;
  288. if (*q == ' ') state = new_set;
  289. p = ++q;
  290. } else {
  291. return -1;
  292. }
  293. }
  294. return 0;
  295. }
  296. static int webm_dash_manifest_write_header(AVFormatContext *s)
  297. {
  298. int i;
  299. double start = 0.0;
  300. WebMDashMuxContext *w = s->priv_data;
  301. parse_adaptation_sets(s);
  302. write_header(s);
  303. avio_printf(s->pb, "<Period id=\"0\"");
  304. avio_printf(s->pb, " start=\"PT%gS\"", start);
  305. avio_printf(s->pb, " duration=\"PT%gS\"", get_duration(s));
  306. avio_printf(s->pb, " >\n");
  307. for (i = 0; i < w->nb_as; i++) {
  308. if (write_adaptation_set(s, i) < 0) return -1;
  309. }
  310. avio_printf(s->pb, "</Period>\n");
  311. write_footer(s);
  312. return 0;
  313. }
  314. static int webm_dash_manifest_write_packet(AVFormatContext *s, AVPacket *pkt)
  315. {
  316. return AVERROR_EOF;
  317. }
  318. static int webm_dash_manifest_write_trailer(AVFormatContext *s)
  319. {
  320. WebMDashMuxContext *w = s->priv_data;
  321. int i;
  322. for (i = 0; i < w->nb_as; i++) {
  323. av_freep(&w->as[i].streams);
  324. }
  325. av_freep(&w->as);
  326. return 0;
  327. }
  328. #define OFFSET(x) offsetof(WebMDashMuxContext, x)
  329. static const AVOption options[] = {
  330. { "adaptation_sets", "Adaptation sets. Syntax: id=0,streams=0,1,2 id=1,streams=3,4 and so on", OFFSET(adaptation_sets), AV_OPT_TYPE_STRING, { 0 }, 0, 0, AV_OPT_FLAG_ENCODING_PARAM },
  331. { NULL },
  332. };
  333. #if CONFIG_WEBM_DASH_MANIFEST_MUXER
  334. static const AVClass webm_dash_class = {
  335. .class_name = "WebM DASH Manifest muxer",
  336. .item_name = av_default_item_name,
  337. .option = options,
  338. .version = LIBAVUTIL_VERSION_INT,
  339. };
  340. AVOutputFormat ff_webm_dash_manifest_muxer = {
  341. .name = "webm_dash_manifest",
  342. .long_name = NULL_IF_CONFIG_SMALL("WebM DASH Manifest"),
  343. .mime_type = "application/xml",
  344. .extensions = "xml",
  345. .priv_data_size = sizeof(WebMDashMuxContext),
  346. .write_header = webm_dash_manifest_write_header,
  347. .write_packet = webm_dash_manifest_write_packet,
  348. .write_trailer = webm_dash_manifest_write_trailer,
  349. .priv_class = &webm_dash_class,
  350. };
  351. #endif