webmdashenc: Support for live stream manifests

This patch adds support for creating DASH manifests for WebM Live Streams. It also updates the documentation and adds a fate test to verify the behavior of the new muxer flag. Signed-off-by: Vignesh Venkatasubramanian <vigneshv@google.com> Signed-off-by: Michael Niedermayer <michaelni@gmx.at>
10 years ago · 26f2e2f3f7
--- a/doc/muxers.texi
+++ b/doc/muxers.texi
@@ -1210,7 +1210,17 @@ is the @option{global_header} flag.

 WebM DASH Manifest muxer.

 This muxer implements the WebM DASH Manifest specification to generate the DASH manifest XML.
 This muxer implements the WebM DASH Manifest specification to generate the DASH
 manifest XML. It also supports manifest generation for DASH live streams.

 For more information see:

@itemize @bullet
@item
 WebM DASH Specification: @url{https://sites.google.com/a/webmproject.org/wiki/adaptive-streaming/webm-dash-specification}
@item
 ISO DASH Specification: @url{http://standards.iso.org/ittf/PubliclyAvailableStandards/c065274_ISO_IEC_23009-1_2014.zip}
@end itemize

@subsection Options

@@ -1221,6 +1231,28 @@ This muxer supports the following options:
 This option has the following syntax: "id=x,streams=a,b,c id=y,streams=d,e" where x and y are the
 unique identifiers of the adaptation sets and a,b,c,d and e are the indices of the corresponding
 audio and video streams. Any number of adaptation sets can be added using this option.

@item live
 Set this to 1 to create a live stream DASH Manifest. Default: 0.

@item chunk_start_index
 Start index of the first chunk. This will go in the @samp{startNumber} attribute
 of the @samp{SegmentTemplate} element in the manifest. Default: 0.

@item chunk_duration_ms
 Duration of each chunk in milliseconds. This will go in the @samp{duration}
 attribute of the @samp{SegmentTemplate} element in the manifest. Default: 1000.

@item utc_timing_url
 URL of the page that will return the UTC timestamp in ISO format. This will go
 in the @samp{value} attribute of the @samp{UTCTiming} element in the manifest.
 Default: None.

@item time_shift_buffer_depth
 Smallest time (in seconds) shifting buffer for which any Representation is
 guaranteed to be available. This will go in the @samp{timeShiftBufferDepth}
 attribute of the @samp{MPD} element. Default: 60.

@end table

@subsection Example
--- a/libavformat/webmdashenc.c
+++ b/libavformat/webmdashenc.c
@@ -22,8 +22,11 @@
 /*
 * WebM DASH Specification:
 * https://sites.google.com/a/webmproject.org/wiki/adaptive-streaming/webm-dash-specification
 * ISO DASH Specification:
 * http://standards.iso.org/ittf/PubliclyAvailableStandards/c065274_ISO_IEC_23009-1_2014.zip
 */

 #include <float.h>
 #include <stdint.h>
 #include <string.h>

@@ -34,6 +37,7 @@
 #include "libavutil/avstring.h"
 #include "libavutil/dict.h"
 #include "libavutil/opt.h"
 #include "libavutil/time_internal.h"

 typedef struct AdaptationSet {
    char id[10];
@@ -47,6 +51,12 @@ typedef struct WebMDashMuxContext {
    AdaptationSet *as;
    int nb_as;
    int representation_id;
    int is_live;
    int chunk_start_index;
    int chunk_duration;
    char *utc_timing_url;
    double time_shift_buffer_depth;
    int debug_mode;
 } WebMDashMuxContext;

 static const char *get_codec_name(int codec_id)
@@ -79,19 +89,42 @@ static double get_duration(AVFormatContext *s)

 static void write_header(AVFormatContext *s)
 {
    WebMDashMuxContext *w = s->priv_data;
    double min_buffer_time = 1.0;
    time_t local_time;
    struct tm *gmt, gmt_buffer;
    char *gmt_iso = av_malloc(21);
    avio_printf(s->pb, "<?xml version=\"1.0\" encoding=\"UTF-8\"?>\n");
    avio_printf(s->pb, "<MPD\n");
    avio_printf(s->pb, "  xmlns:xsi=\"http://www.w3.org/2001/XMLSchema-instance\"\n");
    avio_printf(s->pb, "  xmlns=\"urn:mpeg:DASH:schema:MPD:2011\"\n");
    avio_printf(s->pb, "  xsi:schemaLocation=\"urn:mpeg:DASH:schema:MPD:2011\"\n");
    avio_printf(s->pb, "  type=\"static\"\n");
    avio_printf(s->pb, "  mediaPresentationDuration=\"PT%gS\"\n",
                get_duration(s));
    avio_printf(s->pb, "  minBufferTime=\"PT%gS\"\n",
                min_buffer_time);
    avio_printf(s->pb, "  profiles=\"urn:webm:dash:profile:webm-on-demand:2012\"");
    avio_printf(s->pb, ">\n");
    avio_printf(s->pb, "  type=\"%s\"\n", w->is_live ? "dynamic" : "static");
    if (!w->is_live) {
        avio_printf(s->pb, "  mediaPresentationDuration=\"PT%gS\"\n",
                    get_duration(s));
    }
    avio_printf(s->pb, "  minBufferTime=\"PT%gS\"\n", min_buffer_time);
    avio_printf(s->pb, "  profiles=\"%s\"%s",
                w->is_live ? "urn:mpeg:dash:profile:isoff-live:2011" : "urn:webm:dash:profile:webm-on-demand:2012",
                w->is_live ? "\n" : ">\n");
    time(&local_time);
    gmt = gmtime_r(&local_time, &gmt_buffer);
    strftime(gmt_iso, 21, "%FT%TZ", gmt);
    if (w->debug_mode) {
        av_strlcpy(gmt_iso, "", 1);
    }
    if (w->is_live) {
        avio_printf(s->pb, "  availabilityStartTime=\"%s\"\n", gmt_iso);
        avio_printf(s->pb, "  timeShiftBufferDepth=\"PT%gS\"", w->time_shift_buffer_depth);
        avio_printf(s->pb, ">\n");
        avio_printf(s->pb, "<UTCTiming\n");
        avio_printf(s->pb, "  schemeIdUri=\"%s\"\n",
                    w->utc_timing_url ? "urn:mpeg:dash:utc:http-iso:2014" : "urn:mpeg:dash:utc:direct:2012");
        avio_printf(s->pb, "  value=\"%s\"/>\n",
                    w->utc_timing_url ? w->utc_timing_url : gmt_iso);
    }
    av_free(gmt_iso);
 }

 static void write_footer(AVFormatContext *s)
@@ -137,33 +170,47 @@ static int bitstream_switching(AVFormatContext *s, AdaptationSet *as) {
 * Writes a Representation within an Adaptation Set. Returns 0 on success and
 * < 0 on failure.
 */
 static int write_representation(AVFormatContext *s, AVStream *stream, int id,
 static int write_representation(AVFormatContext *s, AVStream *stream, char *id,
                                int output_width, int output_height,
                                int output_sample_rate) {
    WebMDashMuxContext *w = s->priv_data;
    AVDictionaryEntry *irange = av_dict_get(stream->metadata, INITIALIZATION_RANGE, NULL, 0);
    AVDictionaryEntry *cues_start = av_dict_get(stream->metadata, CUES_START, NULL, 0);
    AVDictionaryEntry *cues_end = av_dict_get(stream->metadata, CUES_END, NULL, 0);
    AVDictionaryEntry *filename = av_dict_get(stream->metadata, FILENAME, NULL, 0);
    AVDictionaryEntry *bandwidth = av_dict_get(stream->metadata, BANDWIDTH, NULL, 0);
    if (!irange || cues_start == NULL || cues_end == NULL || filename == NULL ||
        !bandwidth) {
    if ((w->is_live && (!filename)) ||
        (!w->is_live && (!irange || !cues_start || !cues_end || !filename || !bandwidth))) {
        return -1;
    }
    avio_printf(s->pb, "<Representation id=\"%d\"", id);
    avio_printf(s->pb, " bandwidth=\"%s\"", bandwidth->value);
    avio_printf(s->pb, "<Representation id=\"%s\"", id);
    // FIXME: For live, This should be obtained from the input file or as an AVOption.
    avio_printf(s->pb, " bandwidth=\"%s\"",
                w->is_live ? (stream->codec->codec_type == AVMEDIA_TYPE_AUDIO ? "128000" : "1000000") : bandwidth->value);
    if (stream->codec->codec_type == AVMEDIA_TYPE_VIDEO && output_width)
        avio_printf(s->pb, " width=\"%d\"", stream->codec->width);
    if (stream->codec->codec_type == AVMEDIA_TYPE_VIDEO && output_height)
        avio_printf(s->pb, " height=\"%d\"", stream->codec->height);
    if (stream->codec->codec_type = AVMEDIA_TYPE_AUDIO && output_sample_rate)
        avio_printf(s->pb, " audioSamplingRate=\"%d\"", stream->codec->sample_rate);
    avio_printf(s->pb, ">\n");
    avio_printf(s->pb, "<BaseURL>%s</BaseURL>\n", filename->value);
    avio_printf(s->pb, "<SegmentBase\n");
    avio_printf(s->pb, "  indexRange=\"%s-%s\">\n", cues_start->value, cues_end->value);
    avio_printf(s->pb, "<Initialization\n");
    avio_printf(s->pb, "  range=\"0-%s\" />\n", irange->value);
    avio_printf(s->pb, "</SegmentBase>\n");
    if (w->is_live) {
        // For live streams, Codec and Mime Type always go in the Representation tag.
        avio_printf(s->pb, " codecs=\"%s\"", get_codec_name(stream->codec->codec_id));
        avio_printf(s->pb, " mimeType=\"%s/webm\"",
                    stream->codec->codec_type == AVMEDIA_TYPE_VIDEO ? "video" : "audio");
        // For live streams, subsegments always start with key frames. So this
        // is always 1.
        avio_printf(s->pb, " startsWithSAP=\"1\"");
        avio_printf(s->pb, ">");
    } else {
        avio_printf(s->pb, ">\n");
        avio_printf(s->pb, "<BaseURL>%s</BaseURL>\n", filename->value);
        avio_printf(s->pb, "<SegmentBase\n");
        avio_printf(s->pb, "  indexRange=\"%s-%s\">\n", cues_start->value, cues_end->value);
        avio_printf(s->pb, "<Initialization\n");
        avio_printf(s->pb, "  range=\"0-%s\" />\n", irange->value);
        avio_printf(s->pb, "</SegmentBase>\n");
    }
    avio_printf(s->pb, "</Representation>\n");
    return 0;
 }
@@ -207,6 +254,51 @@ static int check_matching_sample_rate(AVFormatContext *s, AdaptationSet *as) {
    return 1;
 }

 /*
 * Parses a live header filename and computes the representation id,
 * initialization pattern and the media pattern. Pass NULL if you don't want to
 * compute any of those 3. Returns 0 on success and non-zero on failure.
 *
 * Name of the header file should conform to the following pattern:
 * <file_description>_<representation_id>.hdr where <file_description> can be
 * anything. The chunks should be named according to the following pattern:
 * <file_description>_<representation_id>_<chunk_number>.chk
 */
 static int parse_filename(char *filename, char **representation_id,
                          char **initialization_pattern, char **media_pattern) {
    char *underscore_pos = NULL;
    char *period_pos = NULL;
    char *temp_pos = NULL;
    char *filename_str = av_strdup(filename);
    if (!filename_str) return AVERROR(ENOMEM);
    temp_pos = av_stristr(filename_str, "_");
    while (temp_pos) {
        underscore_pos = temp_pos + 1;
        temp_pos = av_stristr(temp_pos + 1, "_");
    }
    if (!underscore_pos) return -1;
    period_pos = av_stristr(underscore_pos, ".");
    if (!period_pos) return -1;
    *(underscore_pos - 1) = 0;
    if (representation_id) {
        *representation_id = av_malloc(period_pos - underscore_pos + 1);
        if (!(*representation_id)) return AVERROR(ENOMEM);
        av_strlcpy(*representation_id, underscore_pos, period_pos - underscore_pos + 1);
    }
    if (initialization_pattern) {
        *initialization_pattern = av_asprintf("%s_$RepresentationID$.hdr",
                                              filename_str);
        if (!(*initialization_pattern)) return AVERROR(ENOMEM);
    }
    if (media_pattern) {
        *media_pattern = av_asprintf("%s_$RepresentationID$_$Number$.chk",
                                     filename_str);
        if (!(*media_pattern)) return AVERROR(ENOMEM);
    }
    av_free(filename_str);
    return 0;
 }

 /*
 * Writes an Adaptation Set. Returns 0 on success and < 0 on failure.
 */
@@ -222,13 +314,14 @@ static int write_adaptation_set(AVFormatContext *s, int as_index)

    // Width, Height and Sample Rate will go in the AdaptationSet tag if they
    // are the same for all contained Representations. otherwise, they will go
    // on their respective Representation tag.
    // on their respective Representation tag. For live streams, they always go
    // in the Representation tag.
    int width_in_as = 1, height_in_as = 1, sample_rate_in_as = 1;
    if (codec->codec_type == AVMEDIA_TYPE_VIDEO) {
      width_in_as = check_matching_width(s, as);
      height_in_as = check_matching_height(s, as);
      width_in_as = !w->is_live && check_matching_width(s, as);
      height_in_as = !w->is_live && check_matching_height(s, as);
    } else {
      sample_rate_in_as = check_matching_sample_rate(s, as);
      sample_rate_in_as = !w->is_live && check_matching_sample_rate(s, as);
    }

    avio_printf(s->pb, "<AdaptationSet id=\"%s\"", as->id);
@@ -249,19 +342,53 @@ static int write_adaptation_set(AVFormatContext *s, int as_index)
    avio_printf(s->pb, " bitstreamSwitching=\"%s\"",
                boolean[bitstream_switching(s, as)]);
    avio_printf(s->pb, " subsegmentAlignment=\"%s\"",
                boolean[subsegment_alignment(s, as)]);
                boolean[w->is_live || subsegment_alignment(s, as)]);

    for (i = 0; i < as->nb_streams; i++) {
        AVDictionaryEntry *kf = av_dict_get(s->streams[as->streams[i]]->metadata,
                                            CLUSTER_KEYFRAME, NULL, 0);
        if (!kf || !strncmp(kf->value, "0", 1)) subsegmentStartsWithSAP = 0;
        if (!w->is_live && (!kf || !strncmp(kf->value, "0", 1))) subsegmentStartsWithSAP = 0;
    }
    avio_printf(s->pb, " subsegmentStartsWithSAP=\"%d\"", subsegmentStartsWithSAP);
    avio_printf(s->pb, ">\n");

    if (w->is_live) {
        AVDictionaryEntry *filename =
            av_dict_get(s->streams[as->streams[0]]->metadata, FILENAME, NULL, 0);
        char *initialization_pattern = NULL;
        char *media_pattern = NULL;
        int ret = parse_filename(filename->value, NULL, &initialization_pattern,
                                 &media_pattern);
        if (ret) return ret;
        avio_printf(s->pb, "<ContentComponent id=\"1\" type=\"%s\"/>\n",
                    codec->codec_type == AVMEDIA_TYPE_VIDEO ? "video" : "audio");
        avio_printf(s->pb, "<SegmentTemplate");
        avio_printf(s->pb, " timescale=\"1000\"");
        avio_printf(s->pb, " duration=\"%d\"", w->chunk_duration);
        avio_printf(s->pb, " media=\"%s\"", media_pattern);
        avio_printf(s->pb, " startNumber=\"%d\"", w->chunk_start_index);
        avio_printf(s->pb, " initialization=\"%s\"", initialization_pattern);
        avio_printf(s->pb, "/>\n");
        av_free(initialization_pattern);
        av_free(media_pattern);
    }

    for (i = 0; i < as->nb_streams; i++) {
        write_representation(s, s->streams[as->streams[i]], w->representation_id++,
        char *representation_id = NULL;
        if (w->is_live) {
            AVDictionaryEntry *filename =
                av_dict_get(s->streams[as->streams[i]]->metadata, FILENAME, NULL, 0);
            if (!filename ||
                parse_filename(filename->value, &representation_id, NULL, NULL)) {
                return -1;
            }
        } else {
            representation_id = av_asprintf("%d", w->representation_id++);
            if (!representation_id) return -1;
        }
        write_representation(s, s->streams[as->streams[i]], representation_id,
                             !width_in_as, !height_in_as, !sample_rate_in_as);
        av_free(representation_id);
    }
    avio_printf(s->pb, "</AdaptationSet>\n");
    return 0;
@@ -333,7 +460,9 @@ static int webm_dash_manifest_write_header(AVFormatContext *s)
    write_header(s);
    avio_printf(s->pb, "<Period id=\"0\"");
    avio_printf(s->pb, " start=\"PT%gS\"", start);
    avio_printf(s->pb, " duration=\"PT%gS\"", get_duration(s));
    if (!w->is_live) {
        avio_printf(s->pb, " duration=\"PT%gS\"", get_duration(s));
    }
    avio_printf(s->pb, " >\n");

    for (i = 0; i < w->nb_as; i++) {
@@ -364,6 +493,12 @@ static int webm_dash_manifest_write_trailer(AVFormatContext *s)
 #define OFFSET(x) offsetof(WebMDashMuxContext, x)
 static const AVOption options[] = {
    { "adaptation_sets", "Adaptation sets. Syntax: id=0,streams=0,1,2 id=1,streams=3,4 and so on", OFFSET(adaptation_sets), AV_OPT_TYPE_STRING, { 0 }, 0, 0, AV_OPT_FLAG_ENCODING_PARAM },
    { "debug_mode", "[private option - users should never set this]. set this to 1 to create deterministic output", OFFSET(debug_mode), AV_OPT_TYPE_INT, {.i64 = 0}, 0, 1, AV_OPT_FLAG_ENCODING_PARAM },
    { "live", "set this to 1 to create a live stream manifest", OFFSET(is_live), AV_OPT_TYPE_INT, {.i64 = 0}, 0, 1, AV_OPT_FLAG_ENCODING_PARAM },
    { "chunk_start_index",  "start index of the chunk", OFFSET(chunk_start_index), AV_OPT_TYPE_INT, {.i64 = 0}, 0, INT_MAX, AV_OPT_FLAG_ENCODING_PARAM },
    { "chunk_duration_ms", "duration of each chunk (in milliseconds)", OFFSET(chunk_duration), AV_OPT_TYPE_INT, {.i64 = 1000}, 0, INT_MAX, AV_OPT_FLAG_ENCODING_PARAM },
    { "utc_timing_url", "URL of the page that will return the UTC timestamp in ISO format", OFFSET(utc_timing_url), AV_OPT_TYPE_STRING, { 0 }, 0, 0, AV_OPT_FLAG_ENCODING_PARAM },
    { "time_shift_buffer_depth", "Smallest time (in seconds) shifting buffer for which any Representation is guaranteed to be available.", OFFSET(time_shift_buffer_depth), AV_OPT_TYPE_DOUBLE, { .dbl = 60.0 }, 1.0, DBL_MAX, AV_OPT_FLAG_ENCODING_PARAM },
    { NULL },
 };

--- a/tests/fate/vpx.mak
+++ b/tests/fate/vpx.mak
@@ -43,6 +43,9 @@ fate-webm-dash-manifest-unaligned-audio-streams: CMD = run ffmpeg -f webm_dash_m
 FATE_VP8-$(call DEMDEC, WEBM_DASH_MANIFEST, VP8) += fate-webm-dash-manifest-representations
 fate-webm-dash-manifest-representations: CMD = run ffmpeg -f webm_dash_manifest -i $(TARGET_SAMPLES)/vp8/dash_video1.webm -f webm_dash_manifest -i $(TARGET_SAMPLES)/vp8/dash_video4.webm -c copy -map 0 -map 1 -f webm_dash_manifest -adaptation_sets "id=0,streams=0,1" -

 FATE_VP8-$(call DEMDEC, WEBM_DASH_MANIFEST, VP8) += fate-webm-dash-manifest-live
 fate-webm-dash-manifest-live: CMD = run ffmpeg -f webm_dash_manifest -live 1 -i $(TARGET_SAMPLES)/vp8/dash_live_video_360.hdr -f webm_dash_manifest -live 1 -i $(TARGET_SAMPLES)/vp8/dash_live_audio_171.hdr -c copy -map 0 -map 1 -f webm_dash_manifest -live 1 -adaptation_sets "id=0,streams=0 id=1,streams=1" -chunk_start_index 1 -chunk_duration_ms 5000 -time_shift_buffer_depth 7200 -debug_mode 1 -

 FATE_SAMPLES_AVCONV += $(FATE_VP6-yes)
 fate-vp6: $(FATE_VP6-yes)

--- a/tests/ref/fate/webm-dash-manifest-live
+++ b/tests/ref/fate/webm-dash-manifest-live
@@ -0,0 +1,26 @@
 <?xml version="1.0" encoding="UTF-8"?>
 <MPD
  xmlns:xsi="http://www.w3.org/2001/XMLSchema-instance"
  xmlns="urn:mpeg:DASH:schema:MPD:2011"
  xsi:schemaLocation="urn:mpeg:DASH:schema:MPD:2011"
  type="dynamic"
  minBufferTime="PT1S"
  profiles="urn:mpeg:dash:profile:isoff-live:2011"
  availabilityStartTime=""
  timeShiftBufferDepth="PT7200S">
 <UTCTiming
  schemeIdUri="urn:mpeg:dash:utc:direct:2012"
  value=""/>
 <Period id="0" start="PT0S" >
 <AdaptationSet id="0" mimeType="video/webm" codecs="vp9" bitstreamSwitching="true" subsegmentAlignment="true" subsegmentStartsWithSAP="1">
 <ContentComponent id="1" type="video"/>
 <SegmentTemplate timescale="1000" duration="5000" media="dash_live_video_$RepresentationID$_$Number$.chk" startNumber="1" initialization="dash_live_video_$RepresentationID$.hdr"/>
 <Representation id="360" bandwidth="1000000" width="640" height="360" codecs="vp9" mimeType="video/webm" startsWithSAP="1"></Representation>
 </AdaptationSet>
 <AdaptationSet id="1" mimeType="audio/webm" codecs="vorbis" bitstreamSwitching="true" subsegmentAlignment="true" subsegmentStartsWithSAP="1">
 <ContentComponent id="1" type="audio"/>
 <SegmentTemplate timescale="1000" duration="5000" media="dash_live_audio_$RepresentationID$_$Number$.chk" startNumber="1" initialization="dash_live_audio_$RepresentationID$.hdr"/>
 <Representation id="171" bandwidth="128000" audioSamplingRate="32000" codecs="vorbis" mimeType="audio/webm" startsWithSAP="1"></Representation>
 </AdaptationSet>
 </Period>
 </MPD>