Merge remote-tracking branch 'qatar/master'

* qatar/master: id3v2: fix doxy comment - 'machine byte order' makes no sense on char arrays VC1: restore mistakenly removed code twinvq: check output buffer size before decoding twinvq: return an error when the packet size is too small lavf: export some forgotten symbols with non-av prefixes. swscale: update altivec yuv2planeX asm to new per-plane API. swscale: make yuv2yuvX_10_sse2/avx 8/9/16-bits aware. yuv2planeX10 SIMD swscale: decide whether to use yuv2plane1/X on a per-plane basis. swscale: reintroduce full precision in 16-bit output. Split up yuv2yuvX functions Split out yuv2yuv1 luma and chroma in order to make them generic DSP functions lavc: replace references to deprecated AVCodecContext.error_recognition to use AVCodecContext.err_recognition lavc: translate non-flag-based er options into flag-based ef options at codec open add -err_filter AVOptions to access flag-based error recognition h264_weight: initialize "height" function argument properly. presets: spelling error in libvpx 1080p50_60 avplay: fix fullscreen behaviour with SDL 1.2.14 on Mac OS X Conflicts: ffplay.c libavformat/libavformat.v libswscale/swscale.c libswscale/x86/swscale_template.c tests/ref/lavfi/pixfmts_scale Merged-by: Michael Niedermayer <michaelni@gmx.at>
14 years ago · f97faf6751
--- a/ffplay.c
+++ b/ffplay.c
@@ -1349,7 +1349,7 @@ static int queue_picture(VideoState *is, AVFrame *src_frame, double pts1, int64_
 #endif
        SDL_Event event;

        vp->allocated = 0;
        vp->allocated  = 0;
        vp->reallocate = 0;

        /* the allocation must be done in the main thread to avoid
--- a/libavcodec/aacdec.c
+++ b/libavcodec/aacdec.c
@@ -596,7 +596,7 @@ static av_cold int aac_decode_init(AVCodecContext *avctx)
            int ret = set_default_channel_config(avctx, new_che_pos, ac->m4ac.chan_config);
            if (!ret)
                output_configure(ac, ac->che_pos, new_che_pos, ac->m4ac.chan_config, OC_GLOBAL_HDR);
            else if (avctx->error_recognition >= FF_ER_EXPLODE)
            else if (avctx->err_recognition & AV_EF_EXPLODE)
                return AVERROR_INVALIDDATA;
        }
    }
--- a/libavcodec/ac3dec.c
+++ b/libavcodec/ac3dec.c
@@ -1359,7 +1359,7 @@ static int ac3_decode_frame(AVCodecContext * avctx, void *data, int *data_size,
        if (s->frame_size > buf_size) {
            av_log(avctx, AV_LOG_ERROR, "incomplete frame\n");
            err = AAC_AC3_PARSE_ERROR_FRAME_SIZE;
        } else if (avctx->error_recognition >= FF_ER_CAREFUL) {
        } else if (avctx->err_recognition & AV_EF_CRCCHECK) {
            /* check for crc mismatch */
            if (av_crc(av_crc_get_table(AV_CRC_16_ANSI), 0, &buf[2], s->frame_size-2)) {
                av_log(avctx, AV_LOG_ERROR, "frame CRC mismatch\n");
--- a/libavcodec/alsdec.c
+++ b/libavcodec/alsdec.c
@@ -393,7 +393,7 @@ static av_cold int read_specific_config(ALSDecContext *ctx)
        if (get_bits_left(&gb) < 32)
            return -1;

        if (avctx->error_recognition >= FF_ER_CAREFUL) {
        if (avctx->err_recognition & AV_EF_CRCCHECK) {
            ctx->crc_table = av_crc_get_table(AV_CRC_32_IEEE_LE);
            ctx->crc       = 0xFFFFFFFF;
            ctx->crc_org   = ~get_bits_long(&gb, 32);
@@ -1476,7 +1476,7 @@ static int decode_frame(AVCodecContext *avctx,
    }

    // update CRC
    if (sconf->crc_enabled && avctx->error_recognition >= FF_ER_CAREFUL) {
    if (sconf->crc_enabled && (avctx->err_recognition & AV_EF_CRCCHECK)) {
        int swap = HAVE_BIGENDIAN != sconf->msb_first;

        if (ctx->avctx->bits_per_raw_sample == 24) {
@@ -1710,7 +1710,7 @@ static av_cold int decode_init(AVCodecContext *avctx)

    // allocate crc buffer
    if (HAVE_BIGENDIAN != sconf->msb_first && sconf->crc_enabled &&
        avctx->error_recognition >= FF_ER_CAREFUL) {
        (avctx->err_recognition & AV_EF_CRCCHECK)) {
        ctx->crc_buffer = av_malloc(sizeof(*ctx->crc_buffer) *
                                    ctx->cur_frame_length *
                                    avctx->channels *
--- a/libavcodec/h261dec.c
+++ b/libavcodec/h261dec.c
@@ -136,7 +136,7 @@ static int h261_decode_gob_header(H261Context *h){

    if(s->qscale==0) {
        av_log(s->avctx, AV_LOG_ERROR, "qscale has forbidden 0 value\n");
        if (s->avctx->error_recognition >= FF_ER_COMPLIANT)
        if (s->avctx->err_recognition & AV_EF_BITSTREAM)
            return -1;
    }

--- a/libavcodec/h263dec.c
+++ b/libavcodec/h263dec.c
@@ -732,7 +732,7 @@ intrax8_decoded:
 av_log(avctx, AV_LOG_DEBUG, "%"PRId64"\n", rdtsc()-time);
 #endif

    return (ret && avctx->error_recognition >= FF_ER_EXPLODE)?ret:get_consumed_bytes(s, buf_size);
    return (ret && (avctx->err_recognition & AV_EF_EXPLODE))?ret:get_consumed_bytes(s, buf_size);
 }

 AVCodec ff_h263_decoder = {
--- a/libavcodec/h264.c
+++ b/libavcodec/h264.c
@@ -2893,7 +2893,7 @@ static int decode_slice_header(H264Context *h, H264Context *h0){
            ff_thread_report_progress((AVFrame*)s->current_picture_ptr, INT_MAX, 1);
            ff_generate_sliding_window_mmcos(h);
            if (ff_h264_execute_ref_pic_marking(h, h->mmco, h->mmco_index) < 0 &&
                s->avctx->error_recognition >= FF_ER_EXPLODE)
                (s->avctx->err_recognition & AV_EF_EXPLODE))
                return AVERROR_INVALIDDATA;
            /* Error concealment: if a ref is missing, copy the previous ref in its place.
             * FIXME: avoiding a memcpy would be nice, but ref handling makes many assumptions
@@ -3072,7 +3072,7 @@ static int decode_slice_header(H264Context *h, H264Context *h0){
    }

    if(h->nal_ref_idc && ff_h264_decode_ref_pic_marking(h0, &s->gb) < 0 &&
       s->avctx->error_recognition >= FF_ER_EXPLODE)
       (s->avctx->err_recognition & AV_EF_EXPLODE))
        return AVERROR_INVALIDDATA;

    if(FRAME_MBAFF){
--- a/libavcodec/h264_refs.c
+++ b/libavcodec/h264_refs.c
@@ -654,7 +654,7 @@ int ff_h264_execute_ref_pic_marking(H264Context *h, MMCO *mmco, int mmco_count){

    print_short_term(h);
    print_long_term(h);
    return h->s.avctx->error_recognition >= FF_ER_EXPLODE ? err : 0;
    return (h->s.avctx->err_recognition & AV_EF_EXPLODE) ? err : 0;
 }

 int ff_h264_decode_ref_pic_marking(H264Context *h, GetBitContext *gb){
--- a/libavcodec/mjpegbdec.c
+++ b/libavcodec/mjpegbdec.c
@@ -82,7 +82,7 @@ read_header:
        init_get_bits(&s->gb, buf_ptr+dqt_offs, (buf_end - (buf_ptr+dqt_offs))*8);
        s->start_code = DQT;
        if (ff_mjpeg_decode_dqt(s) < 0 &&
            avctx->error_recognition >= FF_ER_EXPLODE)
            (avctx->err_recognition & AV_EF_EXPLODE))
          return AVERROR_INVALIDDATA;
    }

@@ -116,7 +116,7 @@ read_header:
        s->mjpb_skiptosod = (sod_offs - sos_offs - show_bits(&s->gb, 16));
        s->start_code = SOS;
        if (ff_mjpeg_decode_sos(s, NULL, NULL) < 0 &&
            avctx->error_recognition >= FF_ER_EXPLODE)
            (avctx->err_recognition & AV_EF_EXPLODE))
          return AVERROR_INVALIDDATA;
    }

--- a/libavcodec/mjpegdec.c
+++ b/libavcodec/mjpegdec.c
@@ -1522,7 +1522,7 @@ eoi_parser:
                        break;
                    }
                    if (ff_mjpeg_decode_sos(s, NULL, NULL) < 0 &&
                        avctx->error_recognition >= FF_ER_EXPLODE)
                        (avctx->err_recognition & AV_EF_EXPLODE))
                      return AVERROR_INVALIDDATA;
                    break;
                case DRI:
--- a/libavcodec/mpeg12.c
+++ b/libavcodec/mpeg12.c
@@ -1378,7 +1378,7 @@ static int mpeg1_decode_picture(AVCodecContext *avctx,
    if (s->pict_type == AV_PICTURE_TYPE_P || s->pict_type == AV_PICTURE_TYPE_B) {
        s->full_pel[0] = get_bits1(&s->gb);
        f_code = get_bits(&s->gb, 3);
        if (f_code == 0 && avctx->error_recognition >= FF_ER_COMPLIANT)
        if (f_code == 0 && (avctx->err_recognition & AV_EF_BITSTREAM))
            return -1;
        s->mpeg_f_code[0][0] = f_code;
        s->mpeg_f_code[0][1] = f_code;
@@ -1386,7 +1386,7 @@ static int mpeg1_decode_picture(AVCodecContext *avctx,
    if (s->pict_type == AV_PICTURE_TYPE_B) {
        s->full_pel[1] = get_bits1(&s->gb);
        f_code = get_bits(&s->gb, 3);
        if (f_code == 0 && avctx->error_recognition >= FF_ER_COMPLIANT)
        if (f_code == 0 && (avctx->err_recognition & AV_EF_BITSTREAM))
            return -1;
        s->mpeg_f_code[1][0] = f_code;
        s->mpeg_f_code[1][1] = f_code;
@@ -1819,7 +1819,7 @@ static int mpeg_decode_slice(Mpeg1Context *s1, int mb_y,
                             && s->progressive_frame == 0 /* vbv_delay == 0xBBB || 0xE10*/;

                if (left < 0 || (left && show_bits(&s->gb, FFMIN(left, 23)) && !is_d10)
                    || (avctx->error_recognition >= FF_ER_AGGRESSIVE && left > 8)) {
                    || ((avctx->err_recognition & AV_EF_BUFFER) && left > 8)) {
                    av_log(avctx, AV_LOG_ERROR, "end mismatch left=%d %0X\n", left, show_bits(&s->gb, FFMIN(left, 23)));
                    return -1;
                } else
@@ -1911,7 +1911,7 @@ static int slice_decode_thread(AVCodecContext *c, void *arg)
 //av_log(c, AV_LOG_DEBUG, "ret:%d resync:%d/%d mb:%d/%d ts:%d/%d ec:%d\n",
 //ret, s->resync_mb_x, s->resync_mb_y, s->mb_x, s->mb_y, s->start_mb_y, s->end_mb_y, s->error_count);
        if (ret < 0) {
            if (c->error_recognition >= FF_ER_EXPLODE)
            if (c->err_recognition & AV_EF_EXPLODE)
                return ret;
            if (s->resync_mb_x >= 0 && s->resync_mb_y >= 0)
                ff_er_add_slice(s, s->resync_mb_x, s->resync_mb_y, s->mb_x, s->mb_y, AC_ERROR | DC_ERROR | MV_ERROR);
@@ -1999,7 +1999,7 @@ static int mpeg1_decode_sequence(AVCodecContext *avctx,
    s->aspect_ratio_info = get_bits(&s->gb, 4);
    if (s->aspect_ratio_info == 0) {
        av_log(avctx, AV_LOG_ERROR, "aspect ratio has forbidden 0 value\n");
        if (avctx->error_recognition >= FF_ER_COMPLIANT)
        if (avctx->err_recognition & AV_EF_BITSTREAM)
            return -1;
    }
    s->frame_rate_index = get_bits(&s->gb, 4);
@@ -2287,7 +2287,7 @@ static int mpeg_decode_frame(AVCodecContext *avctx,

    if (avctx->extradata && !avctx->frame_number) {
        int ret = decode_chunks(avctx, picture, data_size, avctx->extradata, avctx->extradata_size);
        if (ret < 0 && avctx->error_recognition >= FF_ER_EXPLODE)
        if (ret < 0 && (avctx->err_recognition & AV_EF_EXPLODE))
            return ret;
    }

@@ -2347,7 +2347,7 @@ static int decode_chunks(AVCodecContext *avctx,
                    s->sync=1;
            } else {
                av_log(avctx, AV_LOG_ERROR, "ignoring SEQ_START_CODE after %X\n", last_code);
                if (avctx->error_recognition >= FF_ER_EXPLODE)
                if (avctx->err_recognition & AV_EF_EXPLODE)
                    return AVERROR_INVALIDDATA;
            }
            break;
@@ -2381,7 +2381,7 @@ static int decode_chunks(AVCodecContext *avctx,
                last_code = PICTURE_START_CODE;
            } else {
                av_log(avctx, AV_LOG_ERROR, "ignoring pic after %X\n", last_code);
                if (avctx->error_recognition >= FF_ER_EXPLODE)
                if (avctx->err_recognition & AV_EF_EXPLODE)
                    return AVERROR_INVALIDDATA;
            }
            break;
@@ -2394,7 +2394,7 @@ static int decode_chunks(AVCodecContext *avctx,
                mpeg_decode_sequence_extension(s);
                } else {
                    av_log(avctx, AV_LOG_ERROR, "ignoring seq ext after %X\n", last_code);
                    if (avctx->error_recognition >= FF_ER_EXPLODE)
                    if (avctx->err_recognition & AV_EF_EXPLODE)
                        return AVERROR_INVALIDDATA;
                }
                break;
@@ -2412,7 +2412,7 @@ static int decode_chunks(AVCodecContext *avctx,
                    mpeg_decode_picture_coding_extension(s);
                } else {
                    av_log(avctx, AV_LOG_ERROR, "ignoring pic cod ext after %X\n", last_code);
                    if (avctx->error_recognition >= FF_ER_EXPLODE)
                    if (avctx->err_recognition & AV_EF_EXPLODE)
                        return AVERROR_INVALIDDATA;
                }
                break;
@@ -2428,7 +2428,7 @@ static int decode_chunks(AVCodecContext *avctx,
                s->sync=1;
            } else {
                av_log(avctx, AV_LOG_ERROR, "ignoring GOP_START_CODE after %X\n", last_code);
                if (avctx->error_recognition >= FF_ER_EXPLODE)
                if (avctx->err_recognition & AV_EF_EXPLODE)
                    return AVERROR_INVALIDDATA;
            }
            break;
@@ -2475,7 +2475,7 @@ static int decode_chunks(AVCodecContext *avctx,

                if (!s2->pict_type) {
                    av_log(avctx, AV_LOG_ERROR, "Missing picture start code\n");
                    if (avctx->error_recognition >= FF_ER_EXPLODE)
                    if (avctx->err_recognition & AV_EF_EXPLODE)
                        return AVERROR_INVALIDDATA;
                    break;
                }
@@ -2516,7 +2516,7 @@ static int decode_chunks(AVCodecContext *avctx,
                    emms_c();

                    if (ret < 0) {
                        if (avctx->error_recognition >= FF_ER_EXPLODE)
                        if (avctx->err_recognition & AV_EF_EXPLODE)
                            return ret;
                        if (s2->resync_mb_x >= 0 && s2->resync_mb_y >= 0)
                            ff_er_add_slice(s2, s2->resync_mb_x, s2->resync_mb_y, s2->mb_x, s2->mb_y, AC_ERROR | DC_ERROR | MV_ERROR);
--- a/libavcodec/mpegaudiodec.c
+++ b/libavcodec/mpegaudiodec.c
@@ -79,7 +79,7 @@ typedef struct MPADecodeContext {
 #endif
    int adu_mode; ///< 0 for standard mp3, 1 for adu formatted mp3
    int dither_state;
    int error_recognition;
    int err_recognition;
    AVCodecContext* avctx;
    MPADSPContext mpadsp;
 } MPADecodeContext;
@@ -280,7 +280,7 @@ static av_cold int decode_init(AVCodecContext * avctx)
    ff_mpadsp_init(&s->mpadsp);

    avctx->sample_fmt= OUT_FMT;
    s->error_recognition= avctx->error_recognition;
    s->err_recognition = avctx->err_recognition;

    if (!init && !avctx->parse_only) {
        int offset;
@@ -1104,7 +1104,7 @@ static int huffman_decode(MPADecodeContext *s, GranuleDef *g,
                s_index -= 4;
                skip_bits_long(&s->gb, last_pos - pos);
                av_log(s->avctx, AV_LOG_INFO, "overread, skip %d enddists: %d %d\n", last_pos - pos, end_pos-pos, end_pos2-pos);
                if(s->error_recognition >= FF_ER_COMPLIANT)
                if(s->err_recognition & AV_EF_BITSTREAM)
                    s_index=0;
                break;
            }
@@ -1134,10 +1134,10 @@ static int huffman_decode(MPADecodeContext *s, GranuleDef *g,
    /* skip extension bits */
    bits_left = end_pos2 - get_bits_count(&s->gb);
 //av_log(NULL, AV_LOG_ERROR, "left:%d buf:%p\n", bits_left, s->in_gb.buffer);
    if (bits_left < 0 && s->error_recognition >= FF_ER_COMPLIANT) {
    if (bits_left < 0 && (s->err_recognition & AV_EF_BITSTREAM)) {
        av_log(s->avctx, AV_LOG_ERROR, "bits_left=%d\n", bits_left);
        s_index=0;
    }else if(bits_left > 0 && s->error_recognition >= FF_ER_AGGRESSIVE){
    }else if(bits_left > 0 && (s->err_recognition & AV_EF_BUFFER)){
        av_log(s->avctx, AV_LOG_ERROR, "bits_left=%d\n", bits_left);
        s_index=0;
    }
--- a/libavcodec/mxpegdec.c
+++ b/libavcodec/mxpegdec.c
@@ -275,11 +275,11 @@ static int mxpeg_decode_frame(AVCodecContext *avctx,
                    }

                    ret = ff_mjpeg_decode_sos(jpg, s->mxm_bitmask, reference_ptr);
                    if (ret < 0 && avctx->error_recognition >= FF_ER_EXPLODE)
                    if (ret < 0 && (avctx->err_recognition & AV_EF_EXPLODE))
                        return ret;
                } else {
                    ret = ff_mjpeg_decode_sos(jpg, NULL, NULL);
                    if (ret < 0 && avctx->error_recognition >= FF_ER_EXPLODE)
                    if (ret < 0 && (avctx->err_recognition & AV_EF_EXPLODE))
                        return ret;
                }

--- a/libavcodec/options.c
+++ b/libavcodec/options.c
@@ -201,14 +201,19 @@ static const AVOption options[]={
 {"unofficial", "allow unofficial extensions", 0, AV_OPT_TYPE_CONST, {.dbl = FF_COMPLIANCE_UNOFFICIAL }, INT_MIN, INT_MAX, V|D|E, "strict"},
 {"experimental", "allow non standardized experimental things", 0, AV_OPT_TYPE_CONST, {.dbl = FF_COMPLIANCE_EXPERIMENTAL }, INT_MIN, INT_MAX, V|D|E, "strict"},
 {"b_qoffset", "qp offset between P and B frames", OFFSET(b_quant_offset), AV_OPT_TYPE_FLOAT, {.dbl = 1.25 }, -FLT_MAX, FLT_MAX, V|E},
 #if FF_API_ER
 {"er", "set error detection aggressivity", OFFSET(error_recognition), AV_OPT_TYPE_INT, {.dbl = FF_ER_CAREFUL }, INT_MIN, INT_MAX, A|V|D, "er"},
 {"careful", NULL, 0, AV_OPT_TYPE_CONST, {.dbl = FF_ER_CAREFUL }, INT_MIN, INT_MAX, V|D, "er"},
 {"compliant", NULL, 0, AV_OPT_TYPE_CONST, {.dbl = FF_ER_COMPLIANT }, INT_MIN, INT_MAX, V|D, "er"},
 {"aggressive", NULL, 0, AV_OPT_TYPE_CONST, {.dbl = FF_ER_AGGRESSIVE }, INT_MIN, INT_MAX, V|D, "er"},
 #if FF_API_ER
 {"very_aggressive", NULL, 0, AV_OPT_TYPE_CONST, {.dbl = FF_ER_VERY_AGGRESSIVE }, INT_MIN, INT_MAX, V|D, "er"},
 #endif /* FF_API_ER */
 {"explode", "abort decoding on error recognition", 0, AV_OPT_TYPE_CONST, {.dbl = FF_ER_EXPLODE }, INT_MIN, INT_MAX, V|D, "er"},
 #endif /* FF_API_ER */
 {"err_filter", "set error detection filter flags", OFFSET(err_recognition), AV_OPT_TYPE_FLAGS, {.dbl = AV_EF_CRCCHECK }, INT_MIN, INT_MAX, A|V|D, "err_filter"},
 {"crccheck", NULL, 0, AV_OPT_TYPE_CONST, {.dbl = AV_EF_CRCCHECK }, INT_MIN, INT_MAX, V|D, "err_filter"},
 {"bitstream", NULL, 0, AV_OPT_TYPE_CONST, {.dbl = AV_EF_BITSTREAM }, INT_MIN, INT_MAX, V|D, "err_filter"},
 {"buffer", NULL, 0, AV_OPT_TYPE_CONST, {.dbl = AV_EF_BUFFER }, INT_MIN, INT_MAX, V|D, "err_filter"},
 {"explode", "abort decoding on minor error recognition", 0, AV_OPT_TYPE_CONST, {.dbl = AV_EF_EXPLODE }, INT_MIN, INT_MAX, V|D, "err_filter"},
 {"has_b_frames", NULL, OFFSET(has_b_frames), AV_OPT_TYPE_INT, {.dbl = DEFAULT }, INT_MIN, INT_MAX},
 {"block_align", NULL, OFFSET(block_align), AV_OPT_TYPE_INT, {.dbl = DEFAULT }, INT_MIN, INT_MAX},
 {"parse_only", NULL, OFFSET(parse_only), AV_OPT_TYPE_INT, {.dbl = DEFAULT }, INT_MIN, INT_MAX},
--- a/libavcodec/twinvq.c
+++ b/libavcodec/twinvq.c
@@ -822,7 +822,7 @@ static int twin_decode_frame(AVCodecContext * avctx, void *data,
    const ModeTab *mtab = tctx->mtab;
    float *out = data;
    enum FrameType ftype;
    int window_type;
    int window_type, out_size;
    static const enum FrameType wtype_to_ftype_table[] = {
        FT_LONG,   FT_LONG, FT_SHORT, FT_LONG,
        FT_MEDIUM, FT_LONG, FT_LONG,  FT_MEDIUM, FT_MEDIUM
@@ -831,8 +831,14 @@ static int twin_decode_frame(AVCodecContext * avctx, void *data,
    if (buf_size*8 < avctx->bit_rate*mtab->size/avctx->sample_rate + 8) {
        av_log(avctx, AV_LOG_ERROR,
               "Frame too small (%d bytes). Truncated file?\n", buf_size);
        *data_size = 0;
        return buf_size;
        return AVERROR(EINVAL);
    }

    out_size = mtab->size * avctx->channels *
               av_get_bytes_per_sample(avctx->sample_fmt);
    if (*data_size < out_size) {
        av_log(avctx, AV_LOG_ERROR, "output buffer is too small\n");
        return AVERROR(EINVAL);
    }

    init_get_bits(&gb, buf, buf_size * 8);
@@ -857,7 +863,7 @@ static int twin_decode_frame(AVCodecContext * avctx, void *data,
        return buf_size;
    }

    *data_size = mtab->size*avctx->channels*4;
    *data_size = out_size;

    return buf_size;
 }
--- a/libavcodec/utils.c
+++ b/libavcodec/utils.c
@@ -610,6 +610,16 @@ int attribute_align_arg avcodec_open2(AVCodecContext *avctx, AVCodec *codec, AVD
        goto free_and_end;
    }
    avctx->frame_number = 0;
 #if FF_API_ER

    av_log(avctx, AV_LOG_DEBUG, "err{or,}_recognition separate: %d; %d\n",
           avctx->error_recognition, avctx->err_recognition);
    /* FF_ER_CAREFUL (==1) implies AV_EF_CRCCHECK (== 1<<1 - 1),
       FF_ER_COMPLIANT (==2) implies AV_EF_{CRCCHECK,BITSTREAM} (== 1<<2 - 1), et cetera} */
    avctx->err_recognition |= (1<<(avctx->error_recognition-(avctx->error_recognition>=FF_ER_VERY_AGGRESSIVE))) - 1;
    av_log(avctx, AV_LOG_DEBUG, "err{or,}_recognition combined: %d; %d\n",
           avctx->error_recognition, avctx->err_recognition);
 #endif

    if (!HAVE_THREADS)
        av_log(avctx, AV_LOG_WARNING, "Warning: not compiled with thread support, using thread emulation\n");
--- a/libavcodec/vc1dec.c
+++ b/libavcodec/vc1dec.c
@@ -930,6 +930,8 @@ static void vc1_mc_4mv_chroma(VC1Context *v, int dir)
    if (!v->field_mode || (v->field_mode && !v->numref)) {
        valid_count = get_chroma_mv(mvx, mvy, intra, 0, &tx, &ty);
        if (!valid_count) {
            s->current_picture.f.motion_val[1][s->block_index[0]][0] = 0;
            s->current_picture.f.motion_val[1][s->block_index[0]][1] = 0;
            v->luma_mv[s->mb_x][0] = v->luma_mv[s->mb_x][1] = 0;
            return; //no need to do MC for intra blocks
        }
@@ -941,6 +943,8 @@ static void vc1_mc_4mv_chroma(VC1Context *v, int dir)
        if (dominant)
            chroma_ref_type = !v->cur_field_type;
    }
    s->current_picture.f.motion_val[1][s->block_index[0]][0] = tx;
    s->current_picture.f.motion_val[1][s->block_index[0]][1] = ty;
    uvmx = (tx + ((tx & 3) == 3)) >> 1;
    uvmy = (ty + ((ty & 3) == 3)) >> 1;

--- a/libavcodec/x86/h264_weight_10bit.asm
+++ b/libavcodec/x86/h264_weight_10bit.asm
@@ -44,6 +44,7 @@ SECTION .text
    PROLOGUE 0,6,8
    movifnidn  r0, r0mp
    movifnidn r1d, r1m
    movifnidn r2d, r2m
    movifnidn r4d, r4m
    movifnidn r5d, r5m
 %endmacro
--- a/libavformat/id3v2.h
+++ b/libavformat/id3v2.h
@@ -62,7 +62,7 @@ typedef struct ID3v2ExtraMetaGEOB {
 /**
 * Detect ID3v2 Header.
 * @param buf   must be ID3v2_HEADER_SIZE byte long
 * @param magic magic bytes to identify the header, machine byte order.
 * @param magic magic bytes to identify the header.
 * If in doubt, use ID3v2_DEFAULT_MAGIC.
 */
 int ff_id3v2_match(const uint8_t *buf, const char *magic);
--- a/libavformat/libavformat.v
+++ b/libavformat/libavformat.v
@@ -23,5 +23,10 @@ LIBAVFORMAT_$MAJOR {
                ff_timefilter_new;
                ff_timefilter_update;
                ff_timefilter_reset;
                get_*;
                put_*;
                udp_set_remote_url;
                udp_get_local_port;
                init_checksum;
        local: *;
 };
--- a/libswscale/ppc/swscale_altivec.c
+++ b/libswscale/ppc/swscale_altivec.c
@@ -94,34 +94,29 @@ altivec_packIntArrayToCharArray(int *val, uint8_t* dest, int dstW)

 //FIXME remove the usage of scratch buffers.
 static void
 yuv2yuvX_altivec_real(SwsContext *c,
                      const int16_t *lumFilter, const int16_t **lumSrc,
                      int lumFilterSize, const int16_t *chrFilter,
                      const int16_t **chrUSrc, const int16_t **chrVSrc,
                      int chrFilterSize, const int16_t **alpSrc,
                      uint8_t *dest[4], int dstW, int chrDstW)
 yuv2planeX_altivec(const int16_t *filter, int filterSize,
                   const int16_t **src, uint8_t *dest, int dstW,
                   const uint8_t *dither, int offset)
 {
    uint8_t *yDest = dest[0], *uDest = dest[1], *vDest = dest[2];
    const uint8_t *lumDither = c->lumDither8, *chrDither = c->chrDither8;
    register int i, j;
    {
        DECLARE_ALIGNED(16, int, val)[dstW];

        for (i=0; i<dstW; i++)
            val[i] = lumDither[i & 7] << 12;
            val[i] = dither[(i + offset) & 7] << 12;

        for (j = 0; j < lumFilterSize; j++) {
            vector signed short l1, vLumFilter = vec_ld(j << 1, lumFilter);
            vector unsigned char perm, perm0 = vec_lvsl(j << 1, lumFilter);
        for (j = 0; j < filterSize; j++) {
            vector signed short l1, vLumFilter = vec_ld(j << 1, filter);
            vector unsigned char perm, perm0 = vec_lvsl(j << 1, filter);
            vLumFilter = vec_perm(vLumFilter, vLumFilter, perm0);
            vLumFilter = vec_splat(vLumFilter, 0); // lumFilter[j] is loaded 8 times in vLumFilter

            perm = vec_lvsl(0, lumSrc[j]);
            l1 = vec_ld(0, lumSrc[j]);
            perm = vec_lvsl(0, src[j]);
            l1 = vec_ld(0, src[j]);

            for (i = 0; i < (dstW - 7); i+=8) {
                int offset = i << 2;
                vector signed short l2 = vec_ld((i << 1) + 16, lumSrc[j]);
                vector signed short l2 = vec_ld((i << 1) + 16, src[j]);

                vector signed int v1 = vec_ld(offset, val);
                vector signed int v2 = vec_ld(offset + 16, val);
@@ -143,73 +138,10 @@ yuv2yuvX_altivec_real(SwsContext *c,
                l1 = l2;
            }
            for ( ; i < dstW; i++) {
                val[i] += lumSrc[j][i] * lumFilter[j];
                val[i] += src[j][i] * filter[j];
            }
        }
        altivec_packIntArrayToCharArray(val, yDest, dstW);
    }
    if (uDest != 0) {
        DECLARE_ALIGNED(16, int, u)[chrDstW];
        DECLARE_ALIGNED(16, int, v)[chrDstW];

        for (i=0; i<chrDstW; i++) {
            u[i] = chrDither[i & 7] << 12;
            v[i] = chrDither[(i + 3) & 7] << 12;
        }

        for (j = 0; j < chrFilterSize; j++) {
            vector signed short l1, l1_V, vChrFilter = vec_ld(j << 1, chrFilter);
            vector unsigned char perm, perm0 = vec_lvsl(j << 1, chrFilter);
            vChrFilter = vec_perm(vChrFilter, vChrFilter, perm0);
            vChrFilter = vec_splat(vChrFilter, 0); // chrFilter[j] is loaded 8 times in vChrFilter

            perm = vec_lvsl(0, chrUSrc[j]);
            l1 = vec_ld(0, chrUSrc[j]);
            l1_V = vec_ld(0, chrVSrc[j]);

            for (i = 0; i < (chrDstW - 7); i+=8) {
                int offset = i << 2;
                vector signed short l2 = vec_ld((i << 1) + 16, chrUSrc[j]);
                vector signed short l2_V = vec_ld((i << 1) + 16, chrVSrc[j]);

                vector signed int v1 = vec_ld(offset, u);
                vector signed int v2 = vec_ld(offset + 16, u);
                vector signed int v1_V = vec_ld(offset, v);
                vector signed int v2_V = vec_ld(offset + 16, v);

                vector signed short ls = vec_perm(l1, l2, perm); // chrUSrc[j][i] ... chrUSrc[j][i+7]
                vector signed short ls_V = vec_perm(l1_V, l2_V, perm); // chrVSrc[j][i] ... chrVSrc[j][i]

                vector signed int i1 = vec_mule(vChrFilter, ls);
                vector signed int i2 = vec_mulo(vChrFilter, ls);
                vector signed int i1_V = vec_mule(vChrFilter, ls_V);
                vector signed int i2_V = vec_mulo(vChrFilter, ls_V);

                vector signed int vf1 = vec_mergeh(i1, i2);
                vector signed int vf2 = vec_mergel(i1, i2); // chrUSrc[j][i] * chrFilter[j] ... chrUSrc[j][i+7] * chrFilter[j]
                vector signed int vf1_V = vec_mergeh(i1_V, i2_V);
                vector signed int vf2_V = vec_mergel(i1_V, i2_V); // chrVSrc[j][i] * chrFilter[j] ... chrVSrc[j][i+7] * chrFilter[j]

                vector signed int vo1 = vec_add(v1, vf1);
                vector signed int vo2 = vec_add(v2, vf2);
                vector signed int vo1_V = vec_add(v1_V, vf1_V);
                vector signed int vo2_V = vec_add(v2_V, vf2_V);

                vec_st(vo1, offset, u);
                vec_st(vo2, offset + 16, u);
                vec_st(vo1_V, offset, v);
                vec_st(vo2_V, offset + 16, v);

                l1 = l2;
                l1_V = l2_V;
            }
            for ( ; i < chrDstW; i++) {
                u[i] += chrUSrc[j][i] * chrFilter[j];
                v[i] += chrVSrc[j][i] * chrFilter[j];
            }
        }
        altivec_packIntArrayToCharArray(u, uDest, chrDstW);
        altivec_packIntArrayToCharArray(v, vDest, chrDstW);
        altivec_packIntArrayToCharArray(val, dest, dstW);
    }
 }

@@ -405,7 +337,7 @@ void ff_sws_init_swScale_altivec(SwsContext *c)
    if (!is16BPS(dstFormat) && !is9_OR_10BPS(dstFormat) &&
        dstFormat != PIX_FMT_NV12 && dstFormat != PIX_FMT_NV21 &&
        !c->alpPixBuf) {
        c->yuv2yuvX     = yuv2yuvX_altivec_real;
        c->yuv2planeX = yuv2planeX_altivec;
    }

    /* The following list of supported dstFormat values should
--- a/libswscale/swscale.c
+++ b/libswscale/swscale.c
@@ -290,252 +290,150 @@ const uint16_t dither_scale[15][16]={
 {    3,    5,    7,    9,   11,   12,   14,   15,   15,   15,   15,   15,   15,   15,   16,65535,},
 };

 #define output_pixel(pos, val, bias, signedness) \
    if (big_endian) { \
        AV_WB16(pos, bias + av_clip_ ## signedness ## 16(val >> shift)); \
    } else { \
        AV_WL16(pos, bias + av_clip_ ## signedness ## 16(val >> shift)); \
    }

 static av_always_inline void
 yuv2yuvX16_c_template(const int16_t *lumFilter, const int32_t **lumSrc,
                      int lumFilterSize, const int16_t *chrFilter,
                      const int32_t **chrUSrc, const int32_t **chrVSrc,
                      int chrFilterSize, const int32_t **alpSrc,
                      uint16_t *dest[4], int dstW, int chrDstW,
                      int big_endian, int output_bits)
 yuv2plane1_16_c_template(const int32_t *src, uint16_t *dest, int dstW,
                         int big_endian, int output_bits)
 {
    //FIXME Optimize (just quickly written not optimized..)
    int i;
    int dword= output_bits == 16;
    uint16_t *yDest = dest[0], *uDest = dest[1], *vDest = dest[2],
             *aDest = CONFIG_SWSCALE_ALPHA ? dest[3] : NULL;
    int shift = 11 + 4*dword + 16 - output_bits - 1;
    int shift = 19 - output_bits;

 #define output_pixel(pos, val) \
    if (big_endian) { \
        AV_WB16(pos, av_clip_uint16(val >> shift)); \
    } else { \
        AV_WL16(pos, av_clip_uint16(val >> shift)); \
    for (i = 0; i < dstW; i++) {
        int val = src[i] + (1 << (shift - 1));
        output_pixel(&dest[i], val, 0, uint);
    }
 }

 static av_always_inline void
 yuv2planeX_16_c_template(const int16_t *filter, int filterSize,
                         const int32_t **src, uint16_t *dest, int dstW,
                         int big_endian, int output_bits)
 {
    int i;
    int dword= output_bits == 16;
    int shift = 15 + 16 - output_bits;

    for (i = 0; i < dstW; i++) {
        int val = 1 << (26-output_bits + 4*dword - 1);
        int val = 1 << (26-output_bits + 4*dword);
        int j;

        for (j = 0; j < lumFilterSize; j++)
            val += ((dword ? lumSrc[j][i] : ((int16_t**)lumSrc)[j][i]) * lumFilter[j])>>1;
        /* range of val is [0,0x7FFFFFFF], so 31 bits, but with lanczos/spline
         * filters (or anything with negative coeffs, the range can be slightly
         * wider in both directions. To account for this overflow, we subtract
         * a constant so it always fits in the signed range (assuming a
         * reasonable filterSize), and re-add that at the end. */
        val -= 0x40000000;
        for (j = 0; j < filterSize; j++)
            val += src[j][i] * filter[j];

        output_pixel(&yDest[i], val);
        output_pixel(&dest[i], val, 0x8000, int);
    }
 }

    if (uDest) {
        for (i = 0; i < chrDstW; i++) {
            int u = 1 << (26-output_bits + 4*dword - 1);
            int v = 1 << (26-output_bits + 4*dword - 1);
            int j;

            for (j = 0; j < chrFilterSize; j++) {
                u += ((dword ? chrUSrc[j][i] : ((int16_t**)chrUSrc)[j][i]) * chrFilter[j]) >> 1;
                v += ((dword ? chrVSrc[j][i] : ((int16_t**)chrVSrc)[j][i]) * chrFilter[j]) >> 1;
            }
 #undef output_pixel

            output_pixel(&uDest[i], u);
            output_pixel(&vDest[i], v);
        }
 #define output_pixel(pos, val) \
    if (big_endian) { \
        AV_WB16(pos, av_clip_uintp2(val >> shift, output_bits)); \
    } else { \
        AV_WL16(pos, av_clip_uintp2(val >> shift, output_bits)); \
    }

    if (CONFIG_SWSCALE_ALPHA && aDest) {
        for (i = 0; i < dstW; i++) {
            int val = 1 << (26-output_bits + 4*dword - 1);
            int j;

            for (j = 0; j < lumFilterSize; j++)
                val += ((dword ? alpSrc[j][i] : ((int16_t**)alpSrc)[j][i]) * lumFilter[j]) >> 1;
 static av_always_inline void
 yuv2plane1_10_c_template(const int16_t *src, uint16_t *dest, int dstW,
                         int big_endian, int output_bits)
 {
    int i;
    int shift = 15 - output_bits;

            output_pixel(&aDest[i], val);
        }
    for (i = 0; i < dstW; i++) {
        int val = src[i] + (1 << (shift - 1));
        output_pixel(&dest[i], val);
    }
 #undef output_pixel
 }

 static av_always_inline void
 yuv2yuvX10_c_template(const int16_t *lumFilter, const int16_t **lumSrc,
                      int lumFilterSize, const int16_t *chrFilter,
                      const int16_t **chrUSrc, const int16_t **chrVSrc,
                      int chrFilterSize, const int16_t **alpSrc,
                      uint16_t *dest[4], int dstW, int chrDstW,
                      int big_endian, int output_bits)
 yuv2planeX_10_c_template(const int16_t *filter, int filterSize,
                         const int16_t **src, uint16_t *dest, int dstW,
                         int big_endian, int output_bits)
 {
    //FIXME Optimize (just quickly written not optimized..)
    int i;
    uint16_t *yDest = dest[0], *uDest = dest[1], *vDest = dest[2],
             *aDest = CONFIG_SWSCALE_ALPHA ? dest[3] : NULL;
    int shift = 11 + 16 - output_bits;

 #define output_pixel(pos, val) \
    if (big_endian) { \
        AV_WB16(pos, av_clip_uintp2(val >> shift, output_bits)); \
    } else { \
        AV_WL16(pos, av_clip_uintp2(val >> shift, output_bits)); \
    }
    for (i = 0; i < dstW; i++) {
        int val = 1 << (26-output_bits);
        int j;

        for (j = 0; j < lumFilterSize; j++)
            val += lumSrc[j][i] * lumFilter[j];
        for (j = 0; j < filterSize; j++)
            val += src[j][i] * filter[j];

        output_pixel(&yDest[i], val);
        output_pixel(&dest[i], val);
    }
 }

    if (uDest) {
        for (i = 0; i < chrDstW; i++) {
            int u = 1 << (26-output_bits);
            int v = 1 << (26-output_bits);
            int j;

            for (j = 0; j < chrFilterSize; j++) {
                u += chrUSrc[j][i] * chrFilter[j];
                v += chrVSrc[j][i] * chrFilter[j];
            }

            output_pixel(&uDest[i], u);
            output_pixel(&vDest[i], v);
        }
    }

    if (CONFIG_SWSCALE_ALPHA && aDest) {
        for (i = 0; i < dstW; i++) {
            int val = 1 << (26-output_bits);
            int j;

            for (j = 0; j < lumFilterSize; j++)
                val += alpSrc[j][i] * lumFilter[j];

            output_pixel(&aDest[i], val);
        }
    }
 #undef output_pixel
 }

 #define yuv2NBPS(bits, BE_LE, is_be, yuv2yuvX_template_fn, typeX_t) \
 static void yuv2yuvX ## bits ## BE_LE ## _c(SwsContext *c, const int16_t *lumFilter, \
                              const int16_t **_lumSrc, int lumFilterSize, \
                              const int16_t *chrFilter, const int16_t **_chrUSrc, \
                              const int16_t **_chrVSrc, \
                              int chrFilterSize, const int16_t **_alpSrc, \
                              uint8_t *_dest[4], int dstW, int chrDstW) \
 #define yuv2NBPS(bits, BE_LE, is_be, template_size, typeX_t) \
 static void yuv2plane1_ ## bits ## BE_LE ## _c(const int16_t *src, \
                              uint8_t *dest, int dstW, \
                              const uint8_t *dither, int offset)\
 { \
    yuv2plane1_ ## template_size ## _c_template((const typeX_t *) src, \
                         (uint16_t *) dest, dstW, is_be, bits); \
 }\
 static void yuv2planeX_ ## bits ## BE_LE ## _c(const int16_t *filter, int filterSize, \
                              const int16_t **src, uint8_t *dest, int dstW, \
                              const uint8_t *dither, int offset)\
 { \
    const typeX_t **lumSrc  = (const typeX_t **) _lumSrc, \
                  **chrUSrc = (const typeX_t **) _chrUSrc, \
                  **chrVSrc = (const typeX_t **) _chrVSrc, \
                  **alpSrc  = (const typeX_t **) _alpSrc; \
    yuv2yuvX_template_fn(lumFilter, lumSrc, lumFilterSize, \
                         chrFilter, chrUSrc, chrVSrc, chrFilterSize, \
                         alpSrc, (uint16_t **) _dest, \
                         dstW, chrDstW, is_be, bits); \
 }
 yuv2NBPS( 9, BE, 1, yuv2yuvX10_c_template, int16_t);
 yuv2NBPS( 9, LE, 0, yuv2yuvX10_c_template, int16_t);
 yuv2NBPS(10, BE, 1, yuv2yuvX10_c_template, int16_t);
 yuv2NBPS(10, LE, 0, yuv2yuvX10_c_template, int16_t);
 yuv2NBPS(16, BE, 1, yuv2yuvX16_c_template, int32_t);
 yuv2NBPS(16, LE, 0, yuv2yuvX16_c_template, int32_t);

 static void yuv2yuvX_c(SwsContext *c, const int16_t *lumFilter,
                       const int16_t **lumSrc, int lumFilterSize,
                       const int16_t *chrFilter, const int16_t **chrUSrc,
                       const int16_t **chrVSrc,
                       int chrFilterSize, const int16_t **alpSrc,
                       uint8_t *dest[4], int dstW, int chrDstW)
 {
    uint8_t *yDest = dest[0], *uDest = dest[1], *vDest = dest[2],
            *aDest = CONFIG_SWSCALE_ALPHA ? dest[3] : NULL;
    int i;
    const uint8_t *lumDither = c->lumDither8, *chrDither = c->chrDither8;

    //FIXME Optimize (just quickly written not optimized..)
    yuv2planeX_## template_size ## _c_template(filter, \
                         filterSize, (const typeX_t **) src, \
                         (uint16_t *) dest, dstW, is_be, bits); \
 }
 yuv2NBPS( 9, BE, 1, 10, int16_t);
 yuv2NBPS( 9, LE, 0, 10, int16_t);
 yuv2NBPS(10, BE, 1, 10, int16_t);
 yuv2NBPS(10, LE, 0, 10, int16_t);
 yuv2NBPS(16, BE, 1, 16, int32_t);
 yuv2NBPS(16, LE, 0, 16, int32_t);

 static void yuv2planeX_8_c(const int16_t *filter, int filterSize,
                           const int16_t **src, uint8_t *dest, int dstW,
                           const uint8_t *dither, int offset)
 {
    int i;
    for (i=0; i<dstW; i++) {
        int val = lumDither[i & 7] << 12;
        int val = dither[(i + offset) & 7] << 12;
        int j;
        for (j=0; j<lumFilterSize; j++)
            val += lumSrc[j][i] * lumFilter[j];
        for (j=0; j<filterSize; j++)
            val += src[j][i] * filter[j];

        yDest[i]= av_clip_uint8(val>>19);
        dest[i]= av_clip_uint8(val>>19);
    }

    if (uDest)
        for (i=0; i<chrDstW; i++) {
            int u = chrDither[i & 7] << 12;
            int v = chrDither[(i + 3) & 7] << 12;
            int j;
            for (j=0; j<chrFilterSize; j++) {
                u += chrUSrc[j][i] * chrFilter[j];
                v += chrVSrc[j][i] * chrFilter[j];
            }

            uDest[i]= av_clip_uint8(u>>19);
            vDest[i]= av_clip_uint8(v>>19);
        }

    if (CONFIG_SWSCALE_ALPHA && aDest)
        for (i=0; i<dstW; i++) {
            int val = lumDither[i & 7] << 12;
            int j;
            for (j=0; j<lumFilterSize; j++)
                val += alpSrc[j][i] * lumFilter[j];

            aDest[i]= av_clip_uint8(val>>19);
        }
 }

 static void yuv2yuv1_c(SwsContext *c, const int16_t *lumSrc,
                       const int16_t *chrUSrc, const int16_t *chrVSrc,
                       const int16_t *alpSrc,
                       uint8_t *dest[4], int dstW, int chrDstW)
 static void yuv2plane1_8_c(const int16_t *src, uint8_t *dest, int dstW,
                           const uint8_t *dither, int offset)
 {
    uint8_t *yDest = dest[0], *uDest = dest[1], *vDest = dest[2],
            *aDest = CONFIG_SWSCALE_ALPHA ? dest[3] : NULL;
    int i;
    const uint8_t *lumDither = c->lumDither8, *chrDither = c->chrDither8;

    for (i=0; i<dstW; i++) {
        int val = (lumSrc[i]+  lumDither[i & 7]) >> 7;
        yDest[i]= av_clip_uint8(val);
        int val = (src[i] + dither[(i + offset) & 7]) >> 7;
        dest[i]= av_clip_uint8(val);
    }

    if (uDest)
        for (i=0; i<chrDstW; i++) {
            int u = (chrUSrc[i] + chrDither[i & 7])       >> 7;
            int v = (chrVSrc[i] + chrDither[(i + 3) & 7]) >> 7;
            uDest[i]= av_clip_uint8(u);
            vDest[i]= av_clip_uint8(v);
        }

    if (CONFIG_SWSCALE_ALPHA && aDest)
        for (i=0; i<dstW; i++) {
            int val = (alpSrc[i] + lumDither[i & 7]) >> 7;
            aDest[i]= av_clip_uint8(val);
        }
 }

 static void yuv2nv12X_c(SwsContext *c, const int16_t *lumFilter,
                        const int16_t **lumSrc, int lumFilterSize,
                        const int16_t *chrFilter, const int16_t **chrUSrc,
                        const int16_t **chrVSrc, int chrFilterSize,
                        const int16_t **alpSrc, uint8_t *dest[4],
                        int dstW, int chrDstW)
 static void yuv2nv12cX_c(SwsContext *c, const int16_t *chrFilter, int chrFilterSize,
                        const int16_t **chrUSrc, const int16_t **chrVSrc,
                        uint8_t *dest, int chrDstW)
 {
    uint8_t *yDest = dest[0], *uDest = dest[1];
    enum PixelFormat dstFormat = c->dstFormat;
    const uint8_t *lumDither = c->lumDither8, *chrDither = c->chrDither8;

    //FIXME Optimize (just quickly written not optimized..)
    const uint8_t *chrDither = c->chrDither8;
    int i;
    for (i=0; i<dstW; i++) {
        int val = lumDither[i & 7] << 12;
        int j;
        for (j=0; j<lumFilterSize; j++)
            val += lumSrc[j][i] * lumFilter[j];

        yDest[i]= av_clip_uint8(val>>19);
    }

    if (!uDest)
        return;

    if (dstFormat == PIX_FMT_NV12)
        for (i=0; i<chrDstW; i++) {
@@ -547,8 +445,8 @@ static void yuv2nv12X_c(SwsContext *c, const int16_t *lumFilter,
                v += chrVSrc[j][i] * chrFilter[j];
            }

            uDest[2*i]= av_clip_uint8(u>>19);
            uDest[2*i+1]= av_clip_uint8(v>>19);
            dest[2*i]= av_clip_uint8(u>>19);
            dest[2*i+1]= av_clip_uint8(v>>19);
        }
    else
        for (i=0; i<chrDstW; i++) {
@@ -560,8 +458,8 @@ static void yuv2nv12X_c(SwsContext *c, const int16_t *lumFilter,
                v += chrVSrc[j][i] * chrFilter[j];
            }

            uDest[2*i]= av_clip_uint8(v>>19);
            uDest[2*i+1]= av_clip_uint8(u>>19);
            dest[2*i]= av_clip_uint8(v>>19);
            dest[2*i+1]= av_clip_uint8(u>>19);
        }
 }

@@ -2310,26 +2208,31 @@ static av_always_inline void hcscale(SwsContext *c, int16_t *dst1, int16_t *dst2

 static av_always_inline void
 find_c_packed_planar_out_funcs(SwsContext *c,
                               yuv2planar1_fn *yuv2yuv1,    yuv2planarX_fn *yuv2yuvX,
                               yuv2planar1_fn *yuv2plane1, yuv2planarX_fn *yuv2planeX,
                               yuv2interleavedX_fn *yuv2nv12cX,
                               yuv2packed1_fn *yuv2packed1, yuv2packed2_fn *yuv2packed2,
                               yuv2packedX_fn *yuv2packedX)
 {
    enum PixelFormat dstFormat = c->dstFormat;

    if (dstFormat == PIX_FMT_NV12 || dstFormat == PIX_FMT_NV21) {
        *yuv2yuvX     = yuv2nv12X_c;
    } else if (is16BPS(dstFormat)) {
        *yuv2yuvX     = isBE(dstFormat) ? yuv2yuvX16BE_c  : yuv2yuvX16LE_c;
    if (is16BPS(dstFormat)) {
        *yuv2planeX = isBE(dstFormat) ? yuv2planeX_16BE_c  : yuv2planeX_16LE_c;
        *yuv2plane1 = isBE(dstFormat) ? yuv2plane1_16BE_c  : yuv2plane1_16LE_c;
    } else if (is9_OR_10BPS(dstFormat)) {
        if (av_pix_fmt_descriptors[dstFormat].comp[0].depth_minus1 == 8) {
            *yuv2yuvX = isBE(dstFormat) ? yuv2yuvX9BE_c :  yuv2yuvX9LE_c;
            *yuv2planeX = isBE(dstFormat) ? yuv2planeX_9BE_c  : yuv2planeX_9LE_c;
            *yuv2plane1 = isBE(dstFormat) ? yuv2plane1_9BE_c  : yuv2plane1_9LE_c;
        } else {
            *yuv2yuvX = isBE(dstFormat) ? yuv2yuvX10BE_c : yuv2yuvX10LE_c;
            *yuv2planeX = isBE(dstFormat) ? yuv2planeX_10BE_c  : yuv2planeX_10LE_c;
            *yuv2plane1 = isBE(dstFormat) ? yuv2plane1_10BE_c  : yuv2plane1_10LE_c;
        }
    } else {
        *yuv2yuv1     = yuv2yuv1_c;
        *yuv2yuvX     = yuv2yuvX_c;
        *yuv2plane1 = yuv2plane1_8_c;
        *yuv2planeX = yuv2planeX_8_c;
        if (dstFormat == PIX_FMT_NV12 || dstFormat == PIX_FMT_NV21)
            *yuv2nv12cX = yuv2nv12cX_c;
    }

    if(c->flags & SWS_FULL_CHR_H_INT) {
        switch (dstFormat) {
            case PIX_FMT_RGBA:
@@ -2591,10 +2494,11 @@ static int swScale(SwsContext *c, const uint8_t* src[],
    const int chrSrcSliceH= -((-srcSliceH) >> c->chrSrcVSubSample);
    int lastDstY;
    uint32_t *pal=c->pal_yuv;

    int should_dither= isNBPS(c->srcFormat) || is16BPS(c->srcFormat);
    yuv2planar1_fn yuv2yuv1 = c->yuv2yuv1;
    yuv2planarX_fn yuv2yuvX = c->yuv2yuvX;

    yuv2planar1_fn yuv2plane1 = c->yuv2plane1;
    yuv2planarX_fn yuv2planeX = c->yuv2planeX;
    yuv2interleavedX_fn yuv2nv12cX = c->yuv2nv12cX;
    yuv2packed1_fn yuv2packed1 = c->yuv2packed1;
    yuv2packed2_fn yuv2packed2 = c->yuv2packed2;
    yuv2packedX_fn yuv2packedX = c->yuv2packedX;
@@ -2748,9 +2652,8 @@ static int swScale(SwsContext *c, const uint8_t* src[],
        }
        if (dstY >= dstH-2) {
            // hmm looks like we can't use MMX here without overwriting this array's tail
            find_c_packed_planar_out_funcs(c, &yuv2yuv1, &yuv2yuvX,
                                           &yuv2packed1, &yuv2packed2,
                                           &yuv2packedX);
            find_c_packed_planar_out_funcs(c, &yuv2plane1, &yuv2planeX,  &yuv2nv12cX,
                                           &yuv2packed1, &yuv2packed2, &yuv2packedX);
        }

        {
@@ -2761,18 +2664,35 @@ static int swScale(SwsContext *c, const uint8_t* src[],

            if (isPlanarYUV(dstFormat) || dstFormat==PIX_FMT_GRAY8) { //YV12 like
                const int chrSkipMask= (1<<c->chrDstVSubSample)-1;
                if ((dstY&chrSkipMask) || isGray(dstFormat))
                    dest[1] = dest[2] = NULL; //FIXME split functions in lumi / chromi
                if (c->yuv2yuv1 && vLumFilterSize == 1 && vChrFilterSize == 1) { // unscaled YV12
                    const int16_t *alpBuf= (CONFIG_SWSCALE_ALPHA && alpPixBuf) ? alpSrcPtr[0] : NULL;
                    yuv2yuv1(c, lumSrcPtr[0], chrUSrcPtr[0], chrVSrcPtr[0], alpBuf,
                             dest, dstW, chrDstW);
                } else { //General YV12
                    yuv2yuvX(c, vLumFilter + dstY * vLumFilterSize,
                             lumSrcPtr, vLumFilterSize,
                             vChrFilter + chrDstY * vChrFilterSize,
                             chrUSrcPtr, chrVSrcPtr, vChrFilterSize,
                             alpSrcPtr, dest, dstW, chrDstW);

                if (vLumFilterSize == 1) {
                    yuv2plane1(lumSrcPtr[0], dest[0], dstW, c->lumDither8, 0);
                } else {
                    yuv2planeX(vLumFilter + dstY * vLumFilterSize, vLumFilterSize,
                               lumSrcPtr, dest[0], dstW, c->lumDither8, 0);
                }

                if (!((dstY&chrSkipMask) || isGray(dstFormat))) {
                    if (yuv2nv12cX) {
                        yuv2nv12cX(c, vChrFilter + chrDstY * vChrFilterSize, vChrFilterSize, chrUSrcPtr, chrVSrcPtr, dest[1], chrDstW);
                    } else if (vChrFilterSize == 1) {
                        yuv2plane1(chrUSrcPtr[0], dest[1], chrDstW, c->chrDither8, 0);
                        yuv2plane1(chrVSrcPtr[0], dest[2], chrDstW, c->chrDither8, 3);
                    } else {
                        yuv2planeX(vChrFilter + chrDstY * vChrFilterSize, vChrFilterSize,
                                   chrUSrcPtr, dest[1], chrDstW, c->chrDither8, 0);
                        yuv2planeX(vChrFilter + chrDstY * vChrFilterSize, vChrFilterSize,
                                   chrVSrcPtr, dest[2], chrDstW, c->chrDither8, 3);
                    }
                }

                if (CONFIG_SWSCALE_ALPHA && alpPixBuf){
                    if (vLumFilterSize == 1) {
                        yuv2plane1(alpSrcPtr[0], dest[3], dstW, c->lumDither8, 0);
                    } else {
                        yuv2planeX(vLumFilter + dstY * vLumFilterSize, vLumFilterSize,
                                   alpSrcPtr, dest[3], dstW, c->lumDither8, 0);
                    }
                }
            } else {
                assert(lumSrcPtr  + vLumFilterSize - 1 < lumPixBuf  + vLumBufSize*2);
@@ -2826,8 +2746,8 @@ static av_cold void sws_init_swScale_c(SwsContext *c)
 {
    enum PixelFormat srcFormat = c->srcFormat;

    find_c_packed_planar_out_funcs(c, &c->yuv2yuv1, &c->yuv2yuvX,
                                   &c->yuv2packed1, &c->yuv2packed2,
    find_c_packed_planar_out_funcs(c, &c->yuv2plane1, &c->yuv2planeX,
                                   &c->yuv2nv12cX, &c->yuv2packed1, &c->yuv2packed2,
                                   &c->yuv2packedX);

    c->chrToYV12 = NULL;
--- a/libswscale/swscale_internal.h
+++ b/libswscale/swscale_internal.h
@@ -61,56 +61,58 @@ typedef int (*SwsFunc)(struct SwsContext *context, const uint8_t* src[],
                       int srcStride[], int srcSliceY, int srcSliceH,
                       uint8_t* dst[], int dstStride[]);


 /**
 * Write one line of horizontally scaled Y/U/V/A to planar output
 * Write one line of horizontally scaled data to planar output
 * without any additional vertical scaling (or point-scaling).
 *
 * @param c       SWS scaling context
 * @param lumSrc  scaled luma (Y) source data, 15bit for 8-10bit output,
 *                19-bit for 16bit output (in int32_t)
 * @param chrUSrc scaled chroma (U) source data, 15bit for 8-10bit output,
 *                19-bit for 16bit output (in int32_t)
 * @param chrVSrc scaled chroma (V) source data, 15bit for 8-10bit output,
 * @param src     scaled source data, 15bit for 8-10bit output,
 *                19-bit for 16bit output (in int32_t)
 * @param alpSrc  scaled alpha (A) source data, 15bit for 8-10bit output,
 *                19-bit for 16bit output (in int32_t)
 * @param dest    pointer to the 4 output planes (Y/U/V/A). For >8bit
 * @param dest    pointer to the output plane. For >8bit
 *                output, this is in uint16_t
 * @param dstW    width of dest[0], dest[3], lumSrc and alpSrc in pixels
 * @param chrDstW width of dest[1], dest[2], chrUSrc and chrVSrc
 * @param dstW    width of destination in pixels
 * @param dither  ordered dither array of type int16_t and size 8
 * @param offset  Dither offset
 */
 typedef void (*yuv2planar1_fn) (struct SwsContext *c,
                                const int16_t *lumSrc, const int16_t *chrUSrc,
                                const int16_t *chrVSrc, const int16_t *alpSrc,
                                uint8_t *dest[4], int dstW, int chrDstW);
 typedef void (*yuv2planar1_fn) (const int16_t *src, uint8_t *dest, int dstW,
                                const uint8_t *dither, int offset);

 /**
 * Write one line of horizontally scaled Y/U/V/A to planar output
 * Write one line of horizontally scaled data to planar output
 * with multi-point vertical scaling between input pixels.
 *
 * @param c             SWS scaling context
 * @param lumFilter     vertical luma/alpha scaling coefficients, 12bit [0,4096]
 * @param lumSrc        scaled luma (Y) source data, 15bit for 8-10bit output,
 * @param filter        vertical luma/alpha scaling coefficients, 12bit [0,4096]
 * @param src           scaled luma (Y) or alpha (A) source data, 15bit for 8-10bit output,
 *                      19-bit for 16bit output (in int32_t)
 * @param lumFilterSize number of vertical luma/alpha input lines to scale
 * @param filterSize    number of vertical input lines to scale
 * @param dest          pointer to output plane. For >8bit
 *                      output, this is in uint16_t
 * @param dstW          width of destination pixels
 * @param offset        Dither offset
 */
 typedef void (*yuv2planarX_fn) (const int16_t *filter, int filterSize,
                                const int16_t **src, uint8_t *dest, int dstW,
                                const uint8_t *dither, int offset);

 /**
 * Write one line of horizontally scaled chroma to interleaved output
 * with multi-point vertical scaling between input pixels.
 *
 * @param c             SWS scaling context
 * @param chrFilter     vertical chroma scaling coefficients, 12bit [0,4096]
 * @param chrUSrc       scaled chroma (U) source data, 15bit for 8-10bit output,
 *                      19-bit for 16bit output (in int32_t)
 * @param chrVSrc       scaled chroma (V) source data, 15bit for 8-10bit output,
 *                      19-bit for 16bit output (in int32_t)
 * @param chrFilterSize number of vertical chroma input lines to scale
 * @param alpSrc        scaled alpha (A) source data, 15bit for 8-10bit output,
 *                      19-bit for 16bit output (in int32_t)
 * @param dest          pointer to the 4 output planes (Y/U/V/A). For >8bit
 * @param dest          pointer to the output plane. For >8bit
 *                      output, this is in uint16_t
 * @param dstW          width of dest[0], dest[3], lumSrc and alpSrc in pixels
 * @param chrDstW       width of dest[1], dest[2], chrUSrc and chrVSrc
 * @param dstW          width of chroma planes
 */
 typedef void (*yuv2planarX_fn) (struct SwsContext *c, const int16_t *lumFilter,
                                const int16_t **lumSrc, int lumFilterSize,
                                const int16_t *chrFilter, const int16_t **chrUSrc,
                                const int16_t **chrVSrc,  int chrFilterSize,
                                const int16_t **alpSrc, uint8_t *dest[4],
                                int dstW, int chrDstW);
 typedef void (*yuv2interleavedX_fn) (struct SwsContext *c, const int16_t *chrFilter, int chrFilterSize,
                                     const int16_t **chrUSrc, const int16_t **chrVSrc,
                                     uint8_t *dest, int dstW);

 /**
 * Write one line of horizontally scaled Y/U/V/A to packed-pixel YUV/RGB
 * output without any additional vertical scaling (or point-scaling). Note
@@ -412,8 +414,9 @@ typedef struct SwsContext {
 #endif

    /* function pointers for swScale() */
    yuv2planar1_fn yuv2yuv1;
    yuv2planarX_fn yuv2yuvX;
    yuv2planar1_fn yuv2plane1;
    yuv2planarX_fn yuv2planeX;
    yuv2interleavedX_fn yuv2nv12cX;
    yuv2packed1_fn yuv2packed1;
    yuv2packed2_fn yuv2packed2;
    yuv2packedX_fn yuv2packedX;
--- a/libswscale/x86/scale.asm
+++ b/libswscale/x86/scale.asm
@@ -1,6 +1,7 @@
 ;******************************************************************************
 ;* x86-optimized horizontal line scaling functions
 ;* x86-optimized horizontal/vertical line scaling functions
 ;* Copyright (c) 2011 Ronald S. Bultje <rsbultje@gmail.com>
 ;*                    Kieran Kunhya <kieran@kunhya.com>
 ;*
 ;* This file is part of Libav.
 ;*
@@ -28,6 +29,11 @@ max_19bit_int: times 4 dd 0x7ffff
 max_19bit_flt: times 4 dd 524287.0
 minshort:      times 8 dw 0x8000
 unicoeff:      times 4 dd 0x20000000
 yuv2yuvX_16_start:  times 4 dd 0x4000 - 0x40000000
 yuv2yuvX_10_start:  times 4 dd 0x10000
 yuv2yuvX_9_start:   times 4 dd 0x20000
 yuv2yuvX_10_upper:  times 8 dw 0x3ff
 yuv2yuvX_9_upper:   times 8 dw 0x1ff

 SECTION .text

@@ -429,3 +435,233 @@ INIT_XMM
 SCALE_FUNCS2 sse2,  6, 7, 8
 SCALE_FUNCS2 ssse3, 6, 6, 8
 SCALE_FUNCS2 sse4,  6, 6, 8

 ;-----------------------------------------------------------------------------
 ; vertical line scaling
 ;
 ; void yuv2plane1_<output_size>_<opt>(const int16_t *src, uint8_t *dst, int dstW,
 ;                                     const uint8_t *dither, int offset)
 ; and
 ; void yuv2planeX_<output_size>_<opt>(const int16_t *filter, int filterSize,
 ;                                     const int16_t **src, uint8_t *dst, int dstW,
 ;                                     const uint8_t *dither, int offset)
 ;
 ; Scale one or $filterSize lines of source data to generate one line of output
 ; data. The input is 15-bit in int16_t if $output_size is [8,10] and 19-bit in
 ; int32_t if $output_size is 16. $filter is 12-bits. $filterSize is a multiple
 ; of 2. $offset is either 0 or 3. $dither holds 8 values.
 ;-----------------------------------------------------------------------------

 %macro yuv2planeX_fn 4

 %ifdef ARCH_X86_32
 %define cntr_reg r1
 %define movsx mov
 %else
 %define cntr_reg r11
 %define movsx movsxd
 %endif

 cglobal yuv2planeX_%2_%1, %4, 7, %3
 %if %2 == 8 || %2 == 9 || %2 == 10
    pxor            m6,  m6
 %endif ; %2 == 8/9/10

 %if %2 == 8
 %ifdef ARCH_X86_32
 %assign pad 0x2c - (stack_offset & 15)
    SUB             rsp, pad
 %define m_dith m7
 %else ; x86-64
 %define m_dith m9
 %endif ; x86-32

    ; create registers holding dither
    movq        m_dith, [r5]             ; dither
    test            r6d, r6d
    jz              .no_rot
 %if mmsize == 16
    punpcklqdq  m_dith,  m_dith
 %endif ; mmsize == 16
    PALIGNR     m_dith,  m_dith,  3,  m0
 .no_rot:
 %if mmsize == 16
    punpcklbw   m_dith,  m6
 %ifdef ARCH_X86_64
    punpcklwd       m8,  m_dith,  m6
    pslld           m8,  12
 %else ; x86-32
    punpcklwd       m5,  m_dith,  m6
    pslld           m5,  12
 %endif ; x86-32/64
    punpckhwd   m_dith,  m6
    pslld       m_dith,  12
 %ifdef ARCH_X86_32
    mova      [rsp+ 0],  m5
    mova      [rsp+16],  m_dith
 %endif
 %else ; mmsize == 8
    punpcklbw       m5,  m_dith,  m6
    punpckhbw   m_dith,  m6
    punpcklwd       m4,  m5,  m6
    punpckhwd       m5,  m6
    punpcklwd       m3,  m_dith,  m6
    punpckhwd   m_dith,  m6
    pslld           m4,  12
    pslld           m5,  12
    pslld           m3,  12
    pslld       m_dith,  12
    mova      [rsp+ 0],  m4
    mova      [rsp+ 8],  m5
    mova      [rsp+16],  m3
    mova      [rsp+24],  m_dith
 %endif ; mmsize == 8/16
 %endif ; %2 == 8

    xor             r5,  r5

 .pixelloop
 %assign %%i 0
    ; the rep here is for the 8bit output mmx case, where dither covers
    ; 8 pixels but we can only handle 2 pixels per register, and thus 4
    ; pixels per iteration. In order to not have to keep track of where
    ; we are w.r.t. dithering, we unroll the mmx/8bit loop x2.
 %if %2 == 8
 %rep 16/mmsize
 %endif ; %2 == 8

 %if %2 == 8
 %ifdef ARCH_X86_32
    mova            m2, [rsp+mmsize*(0+%%i)]
    mova            m1, [rsp+mmsize*(1+%%i)]
 %else ; x86-64
    mova            m2,  m8
    mova            m1,  m_dith
 %endif ; x86-32/64
 %else ; %2 == 9/10/16
    mova            m1, [yuv2yuvX_%2_start]
    mova            m2,  m1
 %endif ; %2 == 8/9/10/16
    movsx     cntr_reg,  r1m
 .filterloop_ %+ %%i
    ; input pixels
    mov             r6, [r2+gprsize*cntr_reg-2*gprsize]
 %if %2 == 16
    mova            m3, [r6+r5*4]
    mova            m5, [r6+r5*4+mmsize]
 %else ; %2 == 8/9/10
    mova            m3, [r6+r5*2]
 %endif ; %2 == 8/9/10/16
    mov             r6, [r2+gprsize*cntr_reg-gprsize]
 %if %2 == 16
    mova            m4, [r6+r5*4]
    mova            m6, [r6+r5*4+mmsize]
 %else ; %2 == 8/9/10
    mova            m4, [r6+r5*2]
 %endif ; %2 == 8/9/10/16

    ; coefficients
    movd            m0, [r0+2*cntr_reg-4]; coeff[0], coeff[1]
 %if %2 == 16
    pshuflw         m7,  m0,  0          ; coeff[0]
    pshuflw         m0,  m0,  0x55       ; coeff[1]
    pmovsxwd        m7,  m7              ; word -> dword
    pmovsxwd        m0,  m0              ; word -> dword

    pmulld          m3,  m7
    pmulld          m5,  m7
    pmulld          m4,  m0
    pmulld          m6,  m0

    paddd           m2,  m3
    paddd           m1,  m5
    paddd           m2,  m4
    paddd           m1,  m6
 %else ; %2 == 10/9/8
    punpcklwd       m5,  m3,  m4
    punpckhwd       m3,  m4
    SPLATD          m0,  m0

    pmaddwd         m5,  m0
    pmaddwd         m3,  m0

    paddd           m2,  m5
    paddd           m1,  m3
 %endif ; %2 == 8/9/10/16

    sub       cntr_reg,  2
    jg .filterloop_ %+ %%i

 %if %2 == 16
    psrad           m2,  31 - %2
    psrad           m1,  31 - %2
 %else ; %2 == 10/9/8
    psrad           m2,  27 - %2
    psrad           m1,  27 - %2
 %endif ; %2 == 8/9/10/16

 %if %2 == 8
    packssdw        m2,  m1
    packuswb        m2,  m2
    movh     [r3+r5*1],  m2
 %else ; %2 == 9/10/16
 %if %2 == 16
    packssdw        m2,  m1
    paddw           m2, [minshort]
 %else ; %2 == 9/10
 %ifidn %1, sse4
    packusdw        m2,  m1
 %elifidn %1, avx
    packusdw        m2,  m1
 %else ; mmx2/sse2
    packssdw        m2,  m1
    pmaxsw          m2,  m6
 %endif ; mmx2/sse2/sse4/avx
    pminsw          m2, [yuv2yuvX_%2_upper]
 %endif ; %2 == 9/10/16
    mova     [r3+r5*2],  m2
 %endif ; %2 == 8/9/10/16

    add             r5,  mmsize/2
    sub             r4d, mmsize/2
 %if %2 == 8
 %assign %%i %%i+2
 %endrep
 %endif ; %2 == 8
    jg .pixelloop

 %if %2 == 8
 %ifdef ARCH_X86_32
    ADD             rsp, pad
    RET
 %else ; x86-64
    REP_RET
 %endif ; x86-32/64
 %else ; %2 == 9/10/16
    REP_RET
 %endif ; %2 == 8/9/10/16
 %endmacro

 %define PALIGNR PALIGNR_MMX
 %ifdef ARCH_X86_32
 INIT_MMX
 yuv2planeX_fn mmx,   8,  0, 7
 yuv2planeX_fn mmx2,  9,  0, 5
 yuv2planeX_fn mmx2, 10,  0, 5
 %endif

 INIT_XMM
 yuv2planeX_fn sse2,  8, 10, 7
 yuv2planeX_fn sse2,  9,  7, 5
 yuv2planeX_fn sse2, 10,  7, 5

 %define PALIGNR PALIGNR_SSSE3
 yuv2planeX_fn sse4,  8, 10, 7
 yuv2planeX_fn sse4,  9,  7, 5
 yuv2planeX_fn sse4, 10,  7, 5
 yuv2planeX_fn sse4, 16,  8, 5

 INIT_AVX
 yuv2planeX_fn avx,   8, 10, 7
 yuv2planeX_fn avx,   9,  7, 5
 yuv2planeX_fn avx,  10,  7, 5
--- a/libswscale/x86/swscale_mmx.c
+++ b/libswscale/x86/swscale_mmx.c
@@ -213,6 +213,23 @@ SCALE_FUNCS_SSE(sse2);
 SCALE_FUNCS_SSE(ssse3);
 SCALE_FUNCS_SSE(sse4);

 #define VSCALEX_FUNC(size, opt) \
 extern void ff_yuv2planeX_ ## size ## _ ## opt(const int16_t *filter, int filterSize, \
                                               const int16_t **src, uint8_t *dest, int dstW, \
                                               const uint8_t *dither, int offset)
 #define VSCALEX_FUNCS(opt1, opt2) \
    VSCALEX_FUNC(8,  opt1); \
    VSCALEX_FUNC(9,  opt2); \
    VSCALEX_FUNC(10, opt2)

 #if ARCH_X86_32
 VSCALEX_FUNCS(mmx,  mmx2);
 #endif
 VSCALEX_FUNCS(sse2, sse2);
 VSCALEX_FUNCS(sse4, sse4);
 VSCALEX_FUNC(16, sse4);
 VSCALEX_FUNCS(avx,  avx);

 void ff_sws_init_swScale_mmx(SwsContext *c)
 {
    int cpu_flags = av_get_cpu_flags();
@@ -249,10 +266,18 @@ void ff_sws_init_swScale_mmx(SwsContext *c)
    case 8:  ASSIGN_SCALE_FUNC2(hscalefn, 8, opt1, opt2); break; \
    default: ASSIGN_SCALE_FUNC2(hscalefn, X, opt1, opt2); break; \
    }
 #define ASSIGN_VSCALEX_FUNC(vscalefn, opt1, opt2, opt2chk, do_16_case) \
 switch(c->dstBpc){ \
    case 16:                                     do_16_case;                           break; \
    case 10: if (!isBE(c->dstFormat) && opt2chk) vscalefn = ff_yuv2planeX_10_ ## opt2; break; \
    case 9:  if (!isBE(c->dstFormat) && opt2chk) vscalefn = ff_yuv2planeX_9_  ## opt2; break; \
    default:                                     vscalefn = ff_yuv2planeX_8_  ## opt1; break; \
    }
 #if ARCH_X86_32
    if (cpu_flags & AV_CPU_FLAG_MMX) {
        ASSIGN_MMX_SCALE_FUNC(c->hyScale, c->hLumFilterSize, mmx, mmx);
        ASSIGN_MMX_SCALE_FUNC(c->hcScale, c->hChrFilterSize, mmx, mmx);
        ASSIGN_VSCALEX_FUNC(c->yuv2planeX, mmx, mmx2, cpu_flags & AV_CPU_FLAG_MMX2,);
    }
 #endif
 #define ASSIGN_SSE_SCALE_FUNC(hscalefn, filtersize, opt1, opt2) \
@@ -266,6 +291,7 @@ void ff_sws_init_swScale_mmx(SwsContext *c)
    if (cpu_flags & AV_CPU_FLAG_SSE2) {
        ASSIGN_SSE_SCALE_FUNC(c->hyScale, c->hLumFilterSize, sse2, sse2);
        ASSIGN_SSE_SCALE_FUNC(c->hcScale, c->hChrFilterSize, sse2, sse2);
        ASSIGN_VSCALEX_FUNC(c->yuv2planeX, sse2, sse2, 1,);
    }
    if (cpu_flags & AV_CPU_FLAG_SSSE3) {
        ASSIGN_SSE_SCALE_FUNC(c->hyScale, c->hLumFilterSize, ssse3, ssse3);
@@ -275,6 +301,12 @@ void ff_sws_init_swScale_mmx(SwsContext *c)
        /* Xto15 don't need special sse4 functions */
        ASSIGN_SSE_SCALE_FUNC(c->hyScale, c->hLumFilterSize, sse4, ssse3);
        ASSIGN_SSE_SCALE_FUNC(c->hcScale, c->hChrFilterSize, sse4, ssse3);
        ASSIGN_VSCALEX_FUNC(c->yuv2planeX, sse4, sse4, 1,
                            if (!isBE(c->dstFormat)) c->yuv2planeX = ff_yuv2planeX_16_sse4);
    }

    if (cpu_flags & AV_CPU_FLAG_AVX) {
        ASSIGN_VSCALEX_FUNC(c->yuv2planeX, avx, avx, 1,);
    }
 #endif
 }
--- a/libswscale/x86/swscale_template.c
+++ b/libswscale/x86/swscale_template.c
@@ -35,41 +35,6 @@
 #endif
 #define MOVNTQ(a,b)  REAL_MOVNTQ(a,b)

 #define YSCALEYUV2YV12X(offset, dest, end, pos) \
    __asm__ volatile(\
        "movq                  "DITHER16"+0(%0), %%mm3      \n\t"\
        "movq                  "DITHER16"+8(%0), %%mm4      \n\t"\
        "lea                     " offset "(%0), %%"REG_d"  \n\t"\
        "mov                        (%%"REG_d"), %%"REG_S"  \n\t"\
        ".p2align                             4             \n\t" /* FIXME Unroll? */\
        "1:                                                 \n\t"\
        "movq                      8(%%"REG_d"), %%mm0      \n\t" /* filterCoeff */\
        "movq                (%%"REG_S", %3, 2), %%mm2      \n\t" /* srcData */\
        "movq               8(%%"REG_S", %3, 2), %%mm5      \n\t" /* srcData */\
        "add                                $16, %%"REG_d"  \n\t"\
        "mov                        (%%"REG_d"), %%"REG_S"  \n\t"\
        "test                         %%"REG_S", %%"REG_S"  \n\t"\
        "pmulhw                           %%mm0, %%mm2      \n\t"\
        "pmulhw                           %%mm0, %%mm5      \n\t"\
        "paddw                            %%mm2, %%mm3      \n\t"\
        "paddw                            %%mm5, %%mm4      \n\t"\
        " jnz                                1b             \n\t"\
        "psraw                               $3, %%mm3      \n\t"\
        "psraw                               $3, %%mm4      \n\t"\
        "packuswb                         %%mm4, %%mm3      \n\t"\
        MOVNTQ(%%mm3, (%1, %3))\
        "add                                 $8, %3         \n\t"\
        "cmp                                 %2, %3         \n\t"\
        "movq                  "DITHER16"+0(%0), %%mm3      \n\t"\
        "movq                  "DITHER16"+8(%0), %%mm4      \n\t"\
        "lea                     " offset "(%0), %%"REG_d"  \n\t"\
        "mov                        (%%"REG_d"), %%"REG_S"  \n\t"\
        "jb                                  1b             \n\t"\
        :: "r" (&c->redDither),\
           "r" (dest), "g" ((x86_reg)(end)), "r"((x86_reg)(pos))\
        : "%"REG_d, "%"REG_S\
    );

 #if !COMPILE_TEMPLATE_MMX2
 static av_always_inline void
 dither_8to16(SwsContext *c, const uint8_t *srcDither, int rot)
@@ -106,170 +71,6 @@ dither_8to16(SwsContext *c, const uint8_t *srcDither, int rot)
 }
 #endif

 static void RENAME(yuv2yuvX)(SwsContext *c, const int16_t *lumFilter,
                             const int16_t **lumSrc, int lumFilterSize,
                             const int16_t *chrFilter, const int16_t **chrUSrc,
                             const int16_t **chrVSrc,
                             int chrFilterSize, const int16_t **alpSrc,
                             uint8_t *dest[4], int dstW, int chrDstW)
 {
    uint8_t *yDest = dest[0], *uDest = dest[1], *vDest = dest[2],
            *aDest = CONFIG_SWSCALE_ALPHA ? dest[3] : NULL;
    const uint8_t *lumDither = c->lumDither8, *chrDither = c->chrDither8;

    if (uDest) {
        x86_reg uv_off = c->uv_offx2 >> 1;
        dither_8to16(c, chrDither, 0);
        YSCALEYUV2YV12X(CHR_MMX_FILTER_OFFSET, uDest, chrDstW, 0)
        dither_8to16(c, chrDither, 1);
        YSCALEYUV2YV12X(CHR_MMX_FILTER_OFFSET, vDest - uv_off, chrDstW + uv_off, uv_off)
    }
    dither_8to16(c, lumDither, 0);
    if (CONFIG_SWSCALE_ALPHA && aDest) {
        YSCALEYUV2YV12X(ALP_MMX_FILTER_OFFSET, aDest, dstW, 0)
    }

    YSCALEYUV2YV12X(LUM_MMX_FILTER_OFFSET, yDest, dstW, 0)
 }

 #define YSCALEYUV2YV12X_ACCURATE(offset, dest, end, pos) \
    __asm__ volatile(\
        "lea                     " offset "(%0), %%"REG_d"  \n\t"\
        "movq                  "DITHER32"+0(%0), %%mm4      \n\t"\
        "movq                  "DITHER32"+8(%0), %%mm5      \n\t"\
        "movq                 "DITHER32"+16(%0), %%mm6      \n\t"\
        "movq                 "DITHER32"+24(%0), %%mm7      \n\t"\
        "mov                        (%%"REG_d"), %%"REG_S"  \n\t"\
        ".p2align                             4             \n\t"\
        "1:                                                 \n\t"\
        "movq                (%%"REG_S", %3, 2), %%mm0      \n\t" /* srcData */\
        "movq               8(%%"REG_S", %3, 2), %%mm2      \n\t" /* srcData */\
        "mov        "STR(APCK_PTR2)"(%%"REG_d"), %%"REG_S"  \n\t"\
        "movq                (%%"REG_S", %3, 2), %%mm1      \n\t" /* srcData */\
        "movq                             %%mm0, %%mm3      \n\t"\
        "punpcklwd                        %%mm1, %%mm0      \n\t"\
        "punpckhwd                        %%mm1, %%mm3      \n\t"\
        "movq       "STR(APCK_COEF)"(%%"REG_d"), %%mm1      \n\t" /* filterCoeff */\
        "pmaddwd                          %%mm1, %%mm0      \n\t"\
        "pmaddwd                          %%mm1, %%mm3      \n\t"\
        "paddd                            %%mm0, %%mm4      \n\t"\
        "paddd                            %%mm3, %%mm5      \n\t"\
        "movq               8(%%"REG_S", %3, 2), %%mm3      \n\t" /* srcData */\
        "mov        "STR(APCK_SIZE)"(%%"REG_d"), %%"REG_S"  \n\t"\
        "add                  $"STR(APCK_SIZE)", %%"REG_d"  \n\t"\
        "test                         %%"REG_S", %%"REG_S"  \n\t"\
        "movq                             %%mm2, %%mm0      \n\t"\
        "punpcklwd                        %%mm3, %%mm2      \n\t"\
        "punpckhwd                        %%mm3, %%mm0      \n\t"\
        "pmaddwd                          %%mm1, %%mm2      \n\t"\
        "pmaddwd                          %%mm1, %%mm0      \n\t"\
        "paddd                            %%mm2, %%mm6      \n\t"\
        "paddd                            %%mm0, %%mm7      \n\t"\
        " jnz                                1b             \n\t"\
        "psrad                              $19, %%mm4      \n\t"\
        "psrad                              $19, %%mm5      \n\t"\
        "psrad                              $19, %%mm6      \n\t"\
        "psrad                              $19, %%mm7      \n\t"\
        "packssdw                         %%mm5, %%mm4      \n\t"\
        "packssdw                         %%mm7, %%mm6      \n\t"\
        "packuswb                         %%mm6, %%mm4      \n\t"\
        MOVNTQ(%%mm4, (%1, %3))\
        "add                                 $8, %3         \n\t"\
        "cmp                                 %2, %3         \n\t"\
        "lea                     " offset "(%0), %%"REG_d"  \n\t"\
        "movq                  "DITHER32"+0(%0), %%mm4      \n\t"\
        "movq                  "DITHER32"+8(%0), %%mm5      \n\t"\
        "movq                 "DITHER32"+16(%0), %%mm6      \n\t"\
        "movq                 "DITHER32"+24(%0), %%mm7      \n\t"\
        "mov                        (%%"REG_d"), %%"REG_S"  \n\t"\
        "jb                                  1b             \n\t"\
        :: "r" (&c->redDither),\
        "r" (dest), "g" ((x86_reg)(end)), "r"((x86_reg)(pos))\
        : "%"REG_a, "%"REG_d, "%"REG_S\
    );

 #if !COMPILE_TEMPLATE_MMX2
 static av_always_inline void
 dither_8to32(SwsContext *c, const uint8_t *srcDither, int rot)
 {
    if (rot) {
        __asm__ volatile("pxor      %%mm0, %%mm0\n\t"
                         "movq       (%0), %%mm4\n\t"
                         "movq      %%mm4, %%mm5\n\t"
                         "psrlq       $24, %%mm4\n\t"
                         "psllq       $40, %%mm5\n\t"
                         "por       %%mm5, %%mm4\n\t"
                         "movq      %%mm4, %%mm6\n\t"
                         "punpcklbw %%mm0, %%mm4\n\t"
                         "punpckhbw %%mm0, %%mm6\n\t"
                         "movq      %%mm4, %%mm5\n\t"
                         "movq      %%mm6, %%mm7\n\t"
                         "punpcklwd %%mm0, %%mm4\n\t"
                         "punpckhwd %%mm0, %%mm5\n\t"
                         "punpcklwd %%mm0, %%mm6\n\t"
                         "punpckhwd %%mm0, %%mm7\n\t"
                         "pslld       $12, %%mm4\n\t"
                         "pslld       $12, %%mm5\n\t"
                         "pslld       $12, %%mm6\n\t"
                         "pslld       $12, %%mm7\n\t"
                         "movq      %%mm4, "DITHER32"+0(%1)\n\t"
                         "movq      %%mm5, "DITHER32"+8(%1)\n\t"
                         "movq      %%mm6, "DITHER32"+16(%1)\n\t"
                         "movq      %%mm7, "DITHER32"+24(%1)\n\t"
                         :: "r"(srcDither), "r"(&c->redDither)
                         );
    } else {
        __asm__ volatile("pxor      %%mm0, %%mm0\n\t"
                         "movq       (%0), %%mm4\n\t"
                         "movq      %%mm4, %%mm6\n\t"
                         "punpcklbw %%mm0, %%mm4\n\t"
                         "punpckhbw %%mm0, %%mm6\n\t"
                         "movq      %%mm4, %%mm5\n\t"
                         "movq      %%mm6, %%mm7\n\t"
                         "punpcklwd %%mm0, %%mm4\n\t"
                         "punpckhwd %%mm0, %%mm5\n\t"
                         "punpcklwd %%mm0, %%mm6\n\t"
                         "punpckhwd %%mm0, %%mm7\n\t"
                         "pslld       $12, %%mm4\n\t"
                         "pslld       $12, %%mm5\n\t"
                         "pslld       $12, %%mm6\n\t"
                         "pslld       $12, %%mm7\n\t"
                         "movq      %%mm4, "DITHER32"+0(%1)\n\t"
                         "movq      %%mm5, "DITHER32"+8(%1)\n\t"
                         "movq      %%mm6, "DITHER32"+16(%1)\n\t"
                         "movq      %%mm7, "DITHER32"+24(%1)\n\t"
                         :: "r"(srcDither), "r"(&c->redDither)
                         );
    }
 }
 #endif

 static void RENAME(yuv2yuvX_ar)(SwsContext *c, const int16_t *lumFilter,
                                const int16_t **lumSrc, int lumFilterSize,
                                const int16_t *chrFilter, const int16_t **chrUSrc,
                                const int16_t **chrVSrc,
                                int chrFilterSize, const int16_t **alpSrc,
                                uint8_t *dest[4], int dstW, int chrDstW)
 {
    uint8_t *yDest = dest[0], *uDest = dest[1], *vDest = dest[2],
            *aDest = CONFIG_SWSCALE_ALPHA ? dest[3] : NULL;
    const uint8_t *lumDither = c->lumDither8, *chrDither = c->chrDither8;

    if (uDest) {
        x86_reg uv_off = c->uv_offx2 >> 1;
        dither_8to32(c, chrDither, 0);
        YSCALEYUV2YV12X_ACCURATE(CHR_MMX_FILTER_OFFSET, uDest, chrDstW, 0)
        dither_8to32(c, chrDither, 1);
        YSCALEYUV2YV12X_ACCURATE(CHR_MMX_FILTER_OFFSET, vDest - uv_off, chrDstW + uv_off, uv_off)
    }
    dither_8to32(c, lumDither, 0);
    if (CONFIG_SWSCALE_ALPHA && aDest) {
        YSCALEYUV2YV12X_ACCURATE(ALP_MMX_FILTER_OFFSET, aDest, dstW, 0)
    }

    YSCALEYUV2YV12X_ACCURATE(LUM_MMX_FILTER_OFFSET, yDest, dstW, 0)
 }

 static void RENAME(yuv2yuv1)(SwsContext *c, const int16_t *lumSrc,
                             const int16_t *chrUSrc, const int16_t *chrVSrc,
                             const int16_t *alpSrc,
@@ -2095,8 +1896,7 @@ static av_cold void RENAME(sws_init_swScale)(SwsContext *c)
    if (!is16BPS(dstFormat) && !is9_OR_10BPS(dstFormat) && dstFormat != PIX_FMT_NV12
        && dstFormat != PIX_FMT_NV21 && !(c->flags & SWS_BITEXACT)) {
            if (c->flags & SWS_ACCURATE_RND) {
                c->yuv2yuv1 = RENAME(yuv2yuv1_ar    );
                c->yuv2yuvX = RENAME(yuv2yuvX_ar    );
                //c->yuv2yuv1 = RENAME(yuv2yuv1_ar    );
                if (!(c->flags & SWS_FULL_CHR_H_INT)) {
                    switch (c->dstFormat) {
                    case PIX_FMT_RGB32:   c->yuv2packedX = RENAME(yuv2rgb32_X_ar);   break;
@@ -2108,9 +1908,7 @@ static av_cold void RENAME(sws_init_swScale)(SwsContext *c)
                    }
                }
            } else {
                int should_dither= isNBPS(c->srcFormat) || is16BPS(c->srcFormat);
                c->yuv2yuv1 = should_dither ? RENAME(yuv2yuv1_ar    ) : RENAME(yuv2yuv1    );
                c->yuv2yuvX = RENAME(yuv2yuvX    );
                //c->yuv2yuv1 = RENAME(yuv2yuv1    );
                if (!(c->flags & SWS_FULL_CHR_H_INT)) {
                    switch (c->dstFormat) {
                    case PIX_FMT_RGB32:   c->yuv2packedX = RENAME(yuv2rgb32_X);   break;
--- a/tests/ref/lavfi/pixdesc
+++ b/tests/ref/lavfi/pixdesc
@@ -40,16 +40,16 @@ yuv420p9le          9ed4b1dfabc53fd9e586ff6c4c43af80
 yuv422p             c9bba4529821d796a6ab09f6a5fd355a
 yuv422p10be         bdc13b630fd668b34c6fe1aae28dfc71
 yuv422p10le         d0607c260a45c973e6639f4e449730ad
 yuv422p16be         4e9b3b3467aeebb6a528cee5966800ed
 yuv422p16le         f87c81bf16916b64d201359be0b4b6f4
 yuv422p16be         5499502e1c29534a158a1fe60e889f60
 yuv422p16le         e3d61fde6978591596bc36b914386623
 yuv422p9be          29b71579946940a8c00fa844c9dff507
 yuv422p9le          062b7f9cbb972bf36b5bdb1a7623701a
 yuv440p             5a064afe2b453bb52cdb3f176b1aa1cf
 yuv444p             0a98447b78fd476aa39686da6a74fa2e
 yuv444p10be         e65cbae7e4f1892c23defbc8e8052cf6
 yuv444p10le         767179dd82846cf00ee4c340c9c1ab74
 yuv444p16be         3ad639fff73e56f3b09dd20c335478d6
 yuv444p16le         8a7e66dc91ab7971fd24a9105ff2699b
 yuv444p16be         1c6ea2c2f5e539006112ceec3d4e7d90
 yuv444p16le         20f86bc2f68d2b3f1f2b48b97b2189f4
 yuv444p9be          6ab31f4c12b533ce318ecdff83cdd054
 yuv444p9le          f0606604a5c08becab6ba500124c4b7c
 yuva420p            a29884f3f3dfe1e00b961bc17bef3d47
--- a/tests/ref/lavfi/pixfmts_copy
+++ b/tests/ref/lavfi/pixfmts_copy
@@ -40,16 +40,16 @@ yuv420p9le          9ed4b1dfabc53fd9e586ff6c4c43af80
 yuv422p             c9bba4529821d796a6ab09f6a5fd355a
 yuv422p10be         bdc13b630fd668b34c6fe1aae28dfc71
 yuv422p10le         d0607c260a45c973e6639f4e449730ad
 yuv422p16be         4e9b3b3467aeebb6a528cee5966800ed
 yuv422p16le         f87c81bf16916b64d201359be0b4b6f4
 yuv422p16be         5499502e1c29534a158a1fe60e889f60
 yuv422p16le         e3d61fde6978591596bc36b914386623
 yuv422p9be          29b71579946940a8c00fa844c9dff507
 yuv422p9le          062b7f9cbb972bf36b5bdb1a7623701a
 yuv440p             5a064afe2b453bb52cdb3f176b1aa1cf
 yuv444p             0a98447b78fd476aa39686da6a74fa2e
 yuv444p10be         e65cbae7e4f1892c23defbc8e8052cf6
 yuv444p10le         767179dd82846cf00ee4c340c9c1ab74
 yuv444p16be         3ad639fff73e56f3b09dd20c335478d6
 yuv444p16le         8a7e66dc91ab7971fd24a9105ff2699b
 yuv444p16be         1c6ea2c2f5e539006112ceec3d4e7d90
 yuv444p16le         20f86bc2f68d2b3f1f2b48b97b2189f4
 yuv444p9be          6ab31f4c12b533ce318ecdff83cdd054
 yuv444p9le          f0606604a5c08becab6ba500124c4b7c
 yuva420p            a29884f3f3dfe1e00b961bc17bef3d47
--- a/tests/ref/lavfi/pixfmts_crop
+++ b/tests/ref/lavfi/pixfmts_crop
@@ -33,8 +33,8 @@ yuv422p16be         167e4338811a7d272925a4c6417d60da
 yuv422p16le         3359395d5875d581fa1e975013d30114
 yuv440p             2472417d980e395ad6843cbb8b633b29
 yuv444p             1f151980486848c96bc5585ced99003e
 yuv444p16be         5d0c0ea66ab43c0c590d8c2a9256e43f
 yuv444p16le         3c0a747c1b64feb0ab8dfba92f92579a
 yuv444p16be         1ce8fcd4712d525af983e6179d6a4f9e
 yuv444p16le         5f1441e18345aadb3f881dac99c6c08a
 yuva420p            7536753dfbc7932560fb50c921369a0e
 yuvj420p            21f891093006d42d7683b0e1d773a657
 yuvj422p            9a43d474c407590ad8f213880586b45e
--- a/tests/ref/lavfi/pixfmts_hflip
+++ b/tests/ref/lavfi/pixfmts_hflip
@@ -29,12 +29,12 @@ yuv420p             2d5c80f9ba2ddd85b2aeda3564cc7d64
 yuv420p16be         1c4fa93d0744de3cdc6d34ab55db3fb4
 yuv420p16le         92c74f5759068c381e4a066fe7faf2e0
 yuv422p             6e728f4eb9eae287c224f396d84be6ea
 yuv422p16be         69cf0605496c321546899a8442ee64fb
 yuv422p16le         f0b443fea72f4b6f462859a73b159664
 yuv422p16be         a05d43cd62b790087bd37083174557de
 yuv422p16le         6954abebcbc62d81068d58d0c62bdd5b
 yuv440p             a99e2b57ed601f39852715c9d675d0d3
 yuv444p             947e47f7bb5fdccc659d19b7df2b6fc3
 yuv444p16be         bc7d53923cff1d7e98d24540845fb64b
 yuv444p16le         5df206a93f85ef8b77f5bdc81d9b0a0b
 yuv444p16be         58c012e5ab73b066ef3c2b6411a395f1
 yuv444p16le         32c12794e184042a59738ab2de608c8d
 yuva420p            d83ec0c01498189f179ec574918185f1
 yuvj420p            df3aaaec3bb157c3bde5f0365af30f4f
 yuvj422p            d113871528d510a192797af59df9c05c
--- a/tests/ref/lavfi/pixfmts_null
+++ b/tests/ref/lavfi/pixfmts_null
@@ -40,16 +40,16 @@ yuv420p9le          9ed4b1dfabc53fd9e586ff6c4c43af80
 yuv422p             c9bba4529821d796a6ab09f6a5fd355a
 yuv422p10be         bdc13b630fd668b34c6fe1aae28dfc71
 yuv422p10le         d0607c260a45c973e6639f4e449730ad
 yuv422p16be         4e9b3b3467aeebb6a528cee5966800ed
 yuv422p16le         f87c81bf16916b64d201359be0b4b6f4
 yuv422p16be         5499502e1c29534a158a1fe60e889f60
 yuv422p16le         e3d61fde6978591596bc36b914386623
 yuv422p9be          29b71579946940a8c00fa844c9dff507
 yuv422p9le          062b7f9cbb972bf36b5bdb1a7623701a
 yuv440p             5a064afe2b453bb52cdb3f176b1aa1cf
 yuv444p             0a98447b78fd476aa39686da6a74fa2e
 yuv444p10be         e65cbae7e4f1892c23defbc8e8052cf6
 yuv444p10le         767179dd82846cf00ee4c340c9c1ab74
 yuv444p16be         3ad639fff73e56f3b09dd20c335478d6
 yuv444p16le         8a7e66dc91ab7971fd24a9105ff2699b
 yuv444p16be         1c6ea2c2f5e539006112ceec3d4e7d90
 yuv444p16le         20f86bc2f68d2b3f1f2b48b97b2189f4
 yuv444p9be          6ab31f4c12b533ce318ecdff83cdd054
 yuv444p9le          f0606604a5c08becab6ba500124c4b7c
 yuva420p            a29884f3f3dfe1e00b961bc17bef3d47
--- a/tests/ref/lavfi/pixfmts_scale
+++ b/tests/ref/lavfi/pixfmts_scale
@@ -33,23 +33,23 @@ yuv411p             1143e7c5cc28fe0922b051b17733bc4c
 yuv420p             fdad2d8df8985e3d17e73c71f713cb14
 yuv420p10be         6d335e75b553da590135cf8bb999610c
 yuv420p10le         d510ddbabefd03ef39ec943fcb51b709
 yuv420p16be         2a75942af24fbdc1fdfe189c6e7bf589
 yuv420p16le         c4264d92a7c273967a778f4f5daddbe3
 yuv420p16be         31988e9a5d6acacaa710f67bc1172f3a
 yuv420p16le         f5390ce399f88e0e4e2621ed7833b250
 yuv420p9be          ec4983b7a949c0472110a7a2c58e278a
 yuv420p9le          c136dce5913a722eee44ab72cff664b2
 yuv422p             918e37701ee7377d16a8a6c119c56a40
 yuv422p10be         cea7ca6b0e66d6f29539885896c88603
 yuv422p10le         a10c4a5837547716f13cd61918b145f9
 yuv422p16be         285993ee0c0f4f8e511ee46f93c5f38c
 yuv422p16le         61bfcee8e54465f760164f5a75d40b5e
 yuv422p16be         e7e34fe9264784763ab6cb406524c0f3
 yuv422p16le         c435b76b08204dda6908640fb5fd4621
 yuv422p9be          82494823944912f73cebc58ad2979bbd
 yuv422p9le          fc69c8a21f473916a4b4225636b97e06
 yuv440p             461503fdb9b90451020aa3b25ddf041c
 yuv444p             81b2eba962d12e8d64f003ac56f6faf2
 yuv444p10be         e9d3c8e744b8b0d8187ca092fa203fc9
 yuv444p10le         02f0a336e9da062a64df1ba487e102c5
 yuv444p16be         2677f3074d255f9dab625e9e2e092ca5
 yuv444p16le         65fa92521ef97088599ea83f9508cd5b
 yuv444p16be         0da9bed80f5542682ab286f3261cf24c
 yuv444p16le         a0c5d3c7bf3f181db503cf8e450d1335
 yuv444p9be          9ac2643ce7f7e5c4e17c8c9fd8494d4a
 yuv444p9le          896a1cc9cccca1ba410dd53942d33cc4
 yuva420p            8673a9131fb47de69788863f93a50eb7
--- a/tests/ref/lavfi/pixfmts_vflip
+++ b/tests/ref/lavfi/pixfmts_vflip
@@ -40,16 +40,16 @@ yuv420p9le          0f1e371a1374d3cba2205b70cc7cac90
 yuv422p             d7f5cb44d9b0210d66d6a8762640ab34
 yuv422p10be         588fe319b96513c32e21d3e32b45447f
 yuv422p10le         11b57f2bd9661024153f3973b9090cdb
 yuv422p16be         c092d083548c2a144c372a98c46875c7
 yuv422p16le         c071b9397a416d51cbe339345cbcba84
 yuv422p16be         9bd8f8c961822b586fa4cf992be54acc
 yuv422p16le         9c4a1239605c7952b736ac3130163f14
 yuv422p9be          7c6f1e140b3999ee7d923854e507752a
 yuv422p9le          51f10d79c07989060dd06e767e6d7d60
 yuv440p             876385e96165acf51271b20e5d85a416
 yuv444p             9c3c667d1613b72d15bc6d851c5eb8f7
 yuv444p10be         944a4997c4edb3a8dd0f0493cfd5a1fd
 yuv444p10le         2d0947ae89ecc6a501eee6832cb27e06
 yuv444p16be         6a954614fd2a8ae0df53e4fd76937af8
 yuv444p16le         65613965fb58cc4c3cd480a68b6540ea
 yuv444p16be         de2dedfc6f12073ffead113f86e07ecf
 yuv444p16le         8e83323cf102d6c823a03ae8a7b7e033
 yuv444p9be          6ac92b7dc9ab2fc59bee99204886899a
 yuv444p9le          85aef13a654953d3455d89770b0d74bd
 yuva420p            c705d1cf061d8c6580ac690b55f92276