From e11a5fccb237b54da884e19c3f09986b780bd618 Mon Sep 17 00:00:00 2001
From: Alex Converse <alex.converse@gmail.com>
Date: Fri, 29 Jul 2011 14:45:04 -0700
Subject: [PATCH 01/16] bink: Eliminate unnecessary shadow declaration.

---
 libavcodec/bink.c | 1 -
 1 file changed, 1 deletion(-)

diff --git a/libavcodec/bink.c b/libavcodec/bink.c
index 2b5bc75078..17683730d4 100644
--- a/libavcodec/bink.c
+++ b/libavcodec/bink.c
@@ -624,7 +624,6 @@ static int read_dct_coeffs(GetBitContext *gb, int32_t block[64], const uint8_t *
                         coef_list[--list_start] = ccoef;
                         mode_list[  list_start] = 3;
                     } else {
-                        int t;
                         if (!bits) {
                             t = 1 - (get_bits1(gb) << 1);
                         } else {

From a6c49f18abacb9bf52d4d808a2a56561a5b5445c Mon Sep 17 00:00:00 2001
From: Alex Converse <alex.converse@gmail.com>
Date: Fri, 29 Jul 2011 15:49:11 -0700
Subject: [PATCH 02/16] aac: Remove some suspicious illegal memcpy()s from LTP.

---
 libavcodec/aacdec.c | 2 --
 1 file changed, 2 deletions(-)

diff --git a/libavcodec/aacdec.c b/libavcodec/aacdec.c
index 6bf14275f9..783bf99fba 100644
--- a/libavcodec/aacdec.c
+++ b/libavcodec/aacdec.c
@@ -1789,12 +1789,10 @@ static void windowing_and_mdct_ltp(AACContext *ac, float *out,
     } else {
         memset(in, 0, 448 * sizeof(float));
         ac->dsp.vector_fmul(in + 448, in + 448, swindow_prev, 128);
-        memcpy(in + 576, in + 576, 448 * sizeof(float));
     }
     if (ics->window_sequence[0] != LONG_START_SEQUENCE) {
         ac->dsp.vector_fmul_reverse(in + 1024, in + 1024, lwindow, 1024);
     } else {
-        memcpy(in + 1024, in + 1024, 448 * sizeof(float));
         ac->dsp.vector_fmul_reverse(in + 1024 + 448, in + 1024 + 448, swindow, 128);
         memset(in + 1024 + 576, 0, 448 * sizeof(float));
     }

From 4326f6f525063e92d40e7a8f39cf7ccd7759aea7 Mon Sep 17 00:00:00 2001
From: Dustin Brody <libav@parsoma.net>
Date: Wed, 27 Jul 2011 20:44:58 -0400
Subject: [PATCH 03/16] mpeg4videodec: remove dead code that would have
 detected erroneous encoding

Signed-off-by: Diego Biurrun <diego@biurrun.de>
---
 libavcodec/mpeg4videodec.c | 22 ----------------------
 1 file changed, 22 deletions(-)

diff --git a/libavcodec/mpeg4videodec.c b/libavcodec/mpeg4videodec.c
index 57d55a1424..0d061acfb9 100644
--- a/libavcodec/mpeg4videodec.c
+++ b/libavcodec/mpeg4videodec.c
@@ -990,28 +990,6 @@ static inline int mpeg4_decode_block(MpegEncContext * s, DCTELEM * block,
                         SKIP_COUNTER(re, &s->gb, 1+12+1);
                     }
 
-#if 0
-                    if(s->error_recognition >= FF_ER_COMPLIANT){
-                        const int abs_level= FFABS(level);
-                        if(abs_level<=MAX_LEVEL && run<=MAX_RUN){
-                            const int run1= run - rl->max_run[last][abs_level] - 1;
-                            if(abs_level <= rl->max_level[last][run]){
-                                av_log(s->avctx, AV_LOG_ERROR, "illegal 3. esc, vlc encoding possible\n");
-                                return -1;
-                            }
-                            if(s->error_recognition > FF_ER_COMPLIANT){
-                                if(abs_level <= rl->max_level[last][run]*2){
-                                    av_log(s->avctx, AV_LOG_ERROR, "illegal 3. esc, esc 1 encoding possible\n");
-                                    return -1;
-                                }
-                                if(run1 >= 0 && abs_level <= rl->max_level[last][run1]){
-                                    av_log(s->avctx, AV_LOG_ERROR, "illegal 3. esc, esc 2 encoding possible\n");
-                                    return -1;
-                                }
-                            }
-                        }
-                    }
-#endif
                     if (level>0) level= level * qmul + qadd;
                     else         level= level * qmul - qadd;
 

From 9d39cbf6cfecfda88e2f822b0437ee22ffa6d745 Mon Sep 17 00:00:00 2001
From: Luca Barbato <lu_zero@gentoo.org>
Date: Mon, 1 Aug 2011 17:41:44 +0200
Subject: [PATCH 04/16] avpacket: signal possibly corrupted packets

Use AV_PKT_FLAG_CORRUPT flag to mark packets that might be incomplete
or have errors. Formats that have a mean to validate their packets
should use this flag to deliver such information instead of silently
dropping the packets.
---
 libavcodec/avcodec.h | 6 +++++-
 libavcodec/version.h | 2 +-
 2 files changed, 6 insertions(+), 2 deletions(-)

diff --git a/libavcodec/avcodec.h b/libavcodec/avcodec.h
index e1fd475ca6..eaf63e8dc1 100644
--- a/libavcodec/avcodec.h
+++ b/libavcodec/avcodec.h
@@ -791,6 +791,9 @@ typedef struct AVPacket {
     uint8_t *data;
     int   size;
     int   stream_index;
+    /**
+     * A combination of AV_PKT_FLAG values
+     */
     int   flags;
     /**
      * Additional packet data that can be provided by the container.
@@ -831,7 +834,8 @@ typedef struct AVPacket {
      */
     int64_t convergence_duration;
 } AVPacket;
-#define AV_PKT_FLAG_KEY   0x0001
+#define AV_PKT_FLAG_KEY     0x0001 ///< The packet contains a keyframe
+#define AV_PKT_FLAG_CORRUPT 0x0002 ///< The packet content is corrupted
 
 /**
  * Audio Video Frame.
diff --git a/libavcodec/version.h b/libavcodec/version.h
index 46e54990b1..24d2488249 100644
--- a/libavcodec/version.h
+++ b/libavcodec/version.h
@@ -22,7 +22,7 @@
 
 #define LIBAVCODEC_VERSION_MAJOR 53
 #define LIBAVCODEC_VERSION_MINOR  7
-#define LIBAVCODEC_VERSION_MICRO  0
+#define LIBAVCODEC_VERSION_MICRO  1
 
 #define LIBAVCODEC_VERSION_INT  AV_VERSION_INT(LIBAVCODEC_VERSION_MAJOR, \
                                                LIBAVCODEC_VERSION_MINOR, \

From 5038f9b2684c4be1d9543e34e76282c6935815ba Mon Sep 17 00:00:00 2001
From: Luca Barbato <lu_zero@gentoo.org>
Date: Mon, 1 Aug 2011 17:46:29 +0200
Subject: [PATCH 05/16] apichange: add an entry for AV_PKT_FLAG_CORRUPT

---
 doc/APIchanges | 3 +++
 1 file changed, 3 insertions(+)

diff --git a/doc/APIchanges b/doc/APIchanges
index c533818efa..09a12fe3d2 100644
--- a/doc/APIchanges
+++ b/doc/APIchanges
@@ -13,6 +13,9 @@ libavutil:   2011-04-18
 
 API changes, most recent first:
 
+2011-08-02 - 9d39cbf - lavc 53.7.1
+  Add AV_PKT_FLAG_CORRUPT AVPacket flag.
+
 2011-07-10 - a67c061 - lavf 53.3.0
   Add avformat_find_stream_info(), deprecate av_find_stream_info().
 

From be9c00615b5c2cb858b9905854726ebe578c007b Mon Sep 17 00:00:00 2001
From: Zohar Kelrich <lumimies@gmail.com>
Date: Sun, 24 Jul 2011 11:13:50 +0300
Subject: [PATCH 06/16] mpegts: Silence "can't seek" warning on unseekable

Do not try to seek when we already know we are not allowed to.
Silences warning that always happens when streaming.

Signed-off-by: Zohar Kelrich <lumimies@gmail.com>
Signed-off-by: Luca Barbato <lu_zero@gentoo.org>
---
 libavformat/mpegts.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/libavformat/mpegts.c b/libavformat/mpegts.c
index e9b984dc11..410507db6d 100644
--- a/libavformat/mpegts.c
+++ b/libavformat/mpegts.c
@@ -1488,7 +1488,7 @@ static int mpegts_read_header(AVFormatContext *s,
         /* normal demux */
 
         /* first do a scaning to get all the services */
-        if (avio_seek(pb, pos, SEEK_SET) < 0)
+        if (pb->seekable && avio_seek(pb, pos, SEEK_SET) < 0)
             av_log(s, AV_LOG_ERROR, "Unable to seek back to the start\n");
 
         mpegts_open_section_filter(ts, SDT_PID, sdt_cb, ts, 1);

From 8b9df201dfb2fa557b5dfc4e04c927d3f13a0dd9 Mon Sep 17 00:00:00 2001
From: Zohar Kelrich <lumimies@gmail.com>
Date: Sun, 24 Jul 2011 11:13:50 +0300
Subject: [PATCH 07/16] mpegts: Fix for continuity counter

Make continuity counter respect discontinuity flag
and null packets. Unpack the adaptation_field_control field.

Signed-off-by: Zohar Kelrich <lumimies@gmail.com>
Signed-off-by: Luca Barbato <lu_zero@gentoo.org>
---
 libavformat/mpegts.c | 30 ++++++++++++++++++++----------
 1 file changed, 20 insertions(+), 10 deletions(-)

diff --git a/libavformat/mpegts.c b/libavformat/mpegts.c
index 410507db6d..13b31177a5 100644
--- a/libavformat/mpegts.c
+++ b/libavformat/mpegts.c
@@ -1248,7 +1248,8 @@ static int handle_packet(MpegTSContext *ts, const uint8_t *packet)
 {
     AVFormatContext *s = ts->stream;
     MpegTSFilter *tss;
-    int len, pid, cc, expected_cc, cc_ok, afc, is_start;
+    int len, pid, cc, expected_cc, cc_ok, afc, is_start, is_discontinuity,
+        has_adaptation, has_payload;
     const uint8_t *p, *p_end;
     int64_t pos;
 
@@ -1264,20 +1265,29 @@ static int handle_packet(MpegTSContext *ts, const uint8_t *packet)
     if (!tss)
         return 0;
 
+    afc = (packet[3] >> 4) & 3;
+    if (afc == 0) /* reserved value */
+        return 0;
+    has_adaptation = afc & 2;
+    has_payload = afc & 1;
+    is_discontinuity = has_adaptation
+                && packet[4] != 0 /* with length > 0 */
+                && (packet[5] & 0x80); /* and discontinuity indicated */
+
     /* continuity check (currently not used) */
     cc = (packet[3] & 0xf);
-    expected_cc = (packet[3] & 0x10) ? (tss->last_cc + 1) & 0x0f : tss->last_cc;
-    cc_ok = (tss->last_cc < 0) || (expected_cc == cc);
+    expected_cc = has_payload ? (tss->last_cc + 1) & 0x0f : tss->last_cc;
+    cc_ok = pid == 0x1FFF // null packet PID
+            || is_discontinuity
+            || tss->last_cc < 0
+            || expected_cc == cc;
+
     tss->last_cc = cc;
 
-    /* skip adaptation field */
-    afc = (packet[3] >> 4) & 3;
-    p = packet + 4;
-    if (afc == 0) /* reserved value */
+    if (!has_payload)
         return 0;
-    if (afc == 2) /* adaptation field only */
-        return 0;
-    if (afc == 3) {
+    p = packet + 4;
+    if (has_adaptation) {
         /* skip adapation field */
         p += p[0] + 1;
     }

From c64f80b0e85ad138fca300f05278263bd713d27a Mon Sep 17 00:00:00 2001
From: Zohar Kelrich <lumimies@gmail.com>
Date: Sun, 24 Jul 2011 11:13:50 +0300
Subject: [PATCH 08/16] mpegts: Reset continuity counter on seek

Signed-off-by: Zohar Kelrich <lumimies@gmail.com>
Signed-off-by: Luca Barbato <lu_zero@gentoo.org>
---
 libavformat/mpegts.c | 1 +
 1 file changed, 1 insertion(+)

diff --git a/libavformat/mpegts.c b/libavformat/mpegts.c
index 13b31177a5..f3b76aa264 100644
--- a/libavformat/mpegts.c
+++ b/libavformat/mpegts.c
@@ -1625,6 +1625,7 @@ static int mpegts_read_packet(AVFormatContext *s,
             if (ts->pids[i] && ts->pids[i]->type == MPEGTS_PES) {
                 PESContext *pes = ts->pids[i]->u.pes_filter.opaque;
                 av_freep(&pes->buffer);
+                ts->pids[i]->last_cc = -1;
                 pes->data_index = 0;
                 pes->state = MPEGTS_SKIP; /* skip until pes header */
             }

From ce9e31655e5b8f8db3bb4f13f436fc836062a514 Mon Sep 17 00:00:00 2001
From: Zohar Kelrich <lumimies@gmail.com>
Date: Sun, 24 Jul 2011 11:13:50 +0300
Subject: [PATCH 09/16] mpegts: Mark corrupted packets

Signed-off-by: Zohar Kelrich <lumimies@gmail.com>
Signed-off-by: Luca Barbato <lu_zero@gentoo.org>
---
 libavformat/mpegts.c | 11 +++++++++++
 1 file changed, 11 insertions(+)

diff --git a/libavformat/mpegts.c b/libavformat/mpegts.c
index f3b76aa264..d4688aebfd 100644
--- a/libavformat/mpegts.c
+++ b/libavformat/mpegts.c
@@ -164,6 +164,7 @@ typedef struct PESContext {
     enum MpegTSState state;
     /* used to get the format */
     int data_index;
+    int flags; /**< copied to the AVPacket flags */
     int total_size;
     int pes_header_size;
     int extended_stream_id;
@@ -635,12 +636,14 @@ static void new_pes_packet(PESContext *pes, AVPacket *pkt)
     pkt->dts = pes->dts;
     /* store position of first TS packet of this PES packet */
     pkt->pos = pes->ts_packet_pos;
+    pkt->flags = pes->flags;
 
     /* reset pts values */
     pes->pts = AV_NOPTS_VALUE;
     pes->dts = AV_NOPTS_VALUE;
     pes->buffer = NULL;
     pes->data_index = 0;
+    pes->flags = 0;
 }
 
 /* return non zero if a packet could be constructed */
@@ -808,6 +811,7 @@ static int mpegts_push_data(MpegTSFilter *filter,
                     // pes packet size is < ts size packet and pes data is padded with 0xff
                     // not sure if this is legal in ts but see issue #2392
                     buf_size = pes->total_size;
+                    pes->flags |= AV_PKT_FLAG_CORRUPT;
                 }
                 memcpy(pes->buffer+pes->data_index, p, buf_size);
                 pes->data_index += buf_size;
@@ -1283,6 +1287,13 @@ static int handle_packet(MpegTSContext *ts, const uint8_t *packet)
             || expected_cc == cc;
 
     tss->last_cc = cc;
+    if (!cc_ok) {
+        av_log(NULL, AV_LOG_WARNING, "Continuity Check Failed\n");
+        if(tss->type == MPEGTS_PES) {
+            PESContext *pc = tss->u.pes_filter.opaque;
+            pc->flags |= AV_PKT_FLAG_CORRUPT;
+        }
+    }
 
     if (!has_payload)
         return 0;

From cdb9884a636987061d549ac23a248aabc4a91140 Mon Sep 17 00:00:00 2001
From: Zohar Kelrich <lumimies@gmail.com>
Date: Sun, 24 Jul 2011 11:13:50 +0300
Subject: [PATCH 10/16] mpegts: Move scan test to handle_packets

This fixes an issue where packets which start being read
while reading the header stick around after a seek.

Signed-off-by: Zohar Kelrich <lumimies@gmail.com>
Signed-off-by: Luca Barbato <lu_zero@gentoo.org>
---
 libavformat/mpegts.c | 39 ++++++++++++++++++++-------------------
 1 file changed, 20 insertions(+), 19 deletions(-)

diff --git a/libavformat/mpegts.c b/libavformat/mpegts.c
index d4688aebfd..a5a8ce2a67 100644
--- a/libavformat/mpegts.c
+++ b/libavformat/mpegts.c
@@ -1399,7 +1399,22 @@ static int handle_packets(MpegTSContext *ts, int nb_packets)
 {
     AVFormatContext *s = ts->stream;
     uint8_t packet[TS_PACKET_SIZE];
-    int packet_num, ret;
+    int packet_num, ret = 0;
+
+    if (avio_tell(s->pb) != ts->last_pos) {
+        int i;
+        av_dlog("Skipping after seek\n");
+        /* seek detected, flush pes buffer */
+        for (i = 0; i < NB_PID_MAX; i++) {
+            if (ts->pids[i] && ts->pids[i]->type == MPEGTS_PES) {
+                PESContext *pes = ts->pids[i]->u.pes_filter.opaque;
+                av_freep(&pes->buffer);
+                ts->pids[i]->last_cc = -1;
+                pes->data_index = 0;
+                pes->state = MPEGTS_SKIP; /* skip until pes header */
+            }
+        }
+    }
 
     ts->stop_parse = 0;
     packet_num = 0;
@@ -1411,12 +1426,13 @@ static int handle_packets(MpegTSContext *ts, int nb_packets)
             break;
         ret = read_packet(s, packet, ts->raw_packet_size);
         if (ret != 0)
-            return ret;
+            break;
         ret = handle_packet(ts, packet);
         if (ret != 0)
-            return ret;
+            break;
     }
-    return 0;
+    ts->last_pos = avio_tell(s->pb);
+    return ret;
 }
 
 static int mpegts_probe(AVProbeData *p)
@@ -1630,19 +1646,6 @@ static int mpegts_read_packet(AVFormatContext *s,
     MpegTSContext *ts = s->priv_data;
     int ret, i;
 
-    if (avio_tell(s->pb) != ts->last_pos) {
-        /* seek detected, flush pes buffer */
-        for (i = 0; i < NB_PID_MAX; i++) {
-            if (ts->pids[i] && ts->pids[i]->type == MPEGTS_PES) {
-                PESContext *pes = ts->pids[i]->u.pes_filter.opaque;
-                av_freep(&pes->buffer);
-                ts->pids[i]->last_cc = -1;
-                pes->data_index = 0;
-                pes->state = MPEGTS_SKIP; /* skip until pes header */
-            }
-        }
-    }
-
     ts->pkt = pkt;
     ret = handle_packets(ts, 0);
     if (ret < 0) {
@@ -1660,8 +1663,6 @@ static int mpegts_read_packet(AVFormatContext *s,
         }
     }
 
-    ts->last_pos = avio_tell(s->pb);
-
     return ret;
 }
 

From 5081514269a17809f8a8ff71e6b26e4b761e8266 Mon Sep 17 00:00:00 2001
From: Zohar Kelrich <lumimies@gmail.com>
Date: Sun, 24 Jul 2011 11:13:51 +0300
Subject: [PATCH 11/16] mpegts: Mark wrongly-sized packets as corrupted

Signed-off-by: Zohar Kelrich <lumimies@gmail.com>
Signed-off-by: Luca Barbato <lu_zero@gentoo.org>
---
 libavformat/mpegts.c | 9 +++++++--
 1 file changed, 7 insertions(+), 2 deletions(-)

diff --git a/libavformat/mpegts.c b/libavformat/mpegts.c
index a5a8ce2a67..2bc66ac685 100644
--- a/libavformat/mpegts.c
+++ b/libavformat/mpegts.c
@@ -625,6 +625,12 @@ static void new_pes_packet(PESContext *pes, AVPacket *pkt)
     pkt->destruct = av_destruct_packet;
     pkt->data = pes->buffer;
     pkt->size = pes->data_index;
+
+    if(pes->total_size != MAX_PES_PAYLOAD &&
+       pes->pes_header_size + pes->data_index != pes->total_size + 6) {
+        av_log(pes->ts, AV_LOG_WARNING, "PES packet size mismatch\n");
+        pes->flags |= AV_PKT_FLAG_CORRUPT;
+    }
     memset(pkt->data+pkt->size, 0, FF_INPUT_BUFFER_PADDING_SIZE);
 
     // Separate out the AC3 substream from an HDMV combined TrueHD/AC3 PID
@@ -811,7 +817,6 @@ static int mpegts_push_data(MpegTSFilter *filter,
                     // pes packet size is < ts size packet and pes data is padded with 0xff
                     // not sure if this is legal in ts but see issue #2392
                     buf_size = pes->total_size;
-                    pes->flags |= AV_PKT_FLAG_CORRUPT;
                 }
                 memcpy(pes->buffer+pes->data_index, p, buf_size);
                 pes->data_index += buf_size;
@@ -1288,7 +1293,7 @@ static int handle_packet(MpegTSContext *ts, const uint8_t *packet)
 
     tss->last_cc = cc;
     if (!cc_ok) {
-        av_log(NULL, AV_LOG_WARNING, "Continuity Check Failed\n");
+        av_log(ts, AV_LOG_WARNING, "Continuity Check Failed\n");
         if(tss->type == MPEGTS_PES) {
             PESContext *pc = tss->u.pes_filter.opaque;
             pc->flags |= AV_PKT_FLAG_CORRUPT;

From 73e8e8dbf969b9a0bc1591abcfeba474a42e47bc Mon Sep 17 00:00:00 2001
From: Zohar Kelrich <lumimies@gmail.com>
Date: Sun, 24 Jul 2011 17:28:33 +0300
Subject: [PATCH 12/16] lavf: Add an option to discard corrupted frames

Signed-off-by: Zohar Kelrich <lumimies@gmail.com>
Signed-off-by: Luca Barbato <lu_zero@gentoo.org>
---
 libavformat/avformat.h | 1 +
 libavformat/options.c  | 1 +
 libavformat/utils.c    | 9 +++++++++
 3 files changed, 11 insertions(+)

diff --git a/libavformat/avformat.h b/libavformat/avformat.h
index cfdbd11b36..2f68abab25 100644
--- a/libavformat/avformat.h
+++ b/libavformat/avformat.h
@@ -750,6 +750,7 @@ typedef struct AVFormatContext {
 #define AVFMT_FLAG_RTP_HINT     0x0040 ///< Deprecated, use the -movflags rtphint muxer specific AVOption instead
 #endif
 #define AVFMT_FLAG_CUSTOM_IO    0x0080 ///< The caller has supplied a custom AVIOContext, don't avio_close() it.
+#define AVFMT_FLAG_DISCARD_CORRUPT  0x0100 ///< Discard frames marked corrupted
 
 #if FF_API_LOOP_INPUT
     /**
diff --git a/libavformat/options.c b/libavformat/options.c
index c2729b75d9..961162876f 100644
--- a/libavformat/options.c
+++ b/libavformat/options.c
@@ -79,6 +79,7 @@ static const AVOption options[]={
 #if FF_API_FLAG_RTP_HINT
 {"rtphint", "add rtp hinting (deprecated, use the -movflags rtphint option instead)", 0, FF_OPT_TYPE_CONST, {.dbl = AVFMT_FLAG_RTP_HINT }, INT_MIN, INT_MAX, E, "fflags"},
 #endif
+{"discardcorrupt", "discard corrupted frames", 0, FF_OPT_TYPE_CONST, {.dbl = AVFMT_FLAG_DISCARD_CORRUPT }, INT_MIN, INT_MAX, D, "fflags"},
 {"analyzeduration", "how many microseconds are analyzed to estimate duration", OFFSET(max_analyze_duration), FF_OPT_TYPE_INT, {.dbl = 5*AV_TIME_BASE }, 0, INT_MAX, D},
 {"cryptokey", "decryption key", OFFSET(key), FF_OPT_TYPE_BINARY, {.dbl = 0}, 0, 0, D},
 {"indexmem", "max memory used for timestamp index (per stream)", OFFSET(max_index_size), FF_OPT_TYPE_INT, {.dbl = 1<<20 }, 0, INT_MAX, D},
diff --git a/libavformat/utils.c b/libavformat/utils.c
index c4d7623c8c..b848ebb827 100644
--- a/libavformat/utils.c
+++ b/libavformat/utils.c
@@ -719,6 +719,15 @@ int av_read_packet(AVFormatContext *s, AVPacket *pkt)
                 s->streams[i]->probe_packets = 0;
             continue;
         }
+
+        if ((s->flags & AVFMT_FLAG_DISCARD_CORRUPT) &&
+            (pkt->flags & AV_PKT_FLAG_CORRUPT)) {
+            av_log(s, AV_LOG_WARNING,
+                   "Dropped corrupted packet (stream = %d)\n",
+                   pkt->stream_index);
+            continue;
+        }
+
         st= s->streams[pkt->stream_index];
 
         switch(st->codec->codec_type){

From 6f7fe4723b9bfbb52341568906e6168966f486b3 Mon Sep 17 00:00:00 2001
From: Kostya Shishkov <kostya.shishkov@gmail.com>
Date: Tue, 2 Aug 2011 13:45:28 +0200
Subject: [PATCH 13/16] Correct chroma vector calculation for RealVideo 3.

Old version divided it wrong, which resulted in chroma drift (visible on FATE
sample too as dirty trails left by clouds).

Signed-off-by: Ronald S. Bultje <rsbultje@gmail.com>
---
 libavcodec/rv34.c   |  4 ++--
 tests/ref/fate/rv30 | 56 ++++++++++++++++++++++-----------------------
 2 files changed, 30 insertions(+), 30 deletions(-)

diff --git a/libavcodec/rv34.c b/libavcodec/rv34.c
index 9162da5194..1aec571ad3 100644
--- a/libavcodec/rv34.c
+++ b/libavcodec/rv34.c
@@ -737,8 +737,8 @@ static inline void rv34_mc(RV34DecContext *r, const int block_type,
         my = (s->current_picture_ptr->f.motion_val[dir][mv_pos][1] + (3 << 24)) / 3 - (1 << 24);
         lx = (s->current_picture_ptr->f.motion_val[dir][mv_pos][0] + (3 << 24)) % 3;
         ly = (s->current_picture_ptr->f.motion_val[dir][mv_pos][1] + (3 << 24)) % 3;
-        chroma_mx = (s->current_picture_ptr->f.motion_val[dir][mv_pos][0] + 1) >> 1;
-        chroma_my = (s->current_picture_ptr->f.motion_val[dir][mv_pos][1] + 1) >> 1;
+        chroma_mx = s->current_picture_ptr->f.motion_val[dir][mv_pos][0] / 2;
+        chroma_my = s->current_picture_ptr->f.motion_val[dir][mv_pos][1] / 2;
         umx = (chroma_mx + (3 << 24)) / 3 - (1 << 24);
         umy = (chroma_my + (3 << 24)) / 3 - (1 << 24);
         uvmx = chroma_coeffs[(chroma_mx + (3 << 24)) % 3];
diff --git a/tests/ref/fate/rv30 b/tests/ref/fate/rv30
index 5b43588bb4..6c99871eb6 100644
--- a/tests/ref/fate/rv30
+++ b/tests/ref/fate/rv30
@@ -16,31 +16,31 @@
 0, 112500, 126720, 0xe572dfc9
 0, 120000, 126720, 0xbc3cc34f
 0, 127500, 126720, 0xcf8cb0e2
-0, 135000, 126720, 0x6d1c630d
-0, 142500, 126720, 0x4338e469
-0, 150000, 126720, 0x9d82ea38
-0, 157500, 126720, 0x55e0b559
-0, 165000, 126720, 0x5eefb5ef
-0, 172500, 126720, 0x4b10b746
-0, 180000, 126720, 0x8b07a1db
-0, 187500, 126720, 0x8c639b34
-0, 195000, 126720, 0x63eb0b9f
-0, 202500, 126720, 0x31c80c83
-0, 210000, 126720, 0x78495352
-0, 217500, 126720, 0x63d609c4
-0, 225000, 126720, 0xcd2a62d8
-0, 232500, 126720, 0x4aea732d
-0, 240000, 126720, 0xe3bb352c
-0, 247500, 126720, 0x4b9036ad
-0, 255000, 126720, 0x88b66e2d
-0, 262500, 126720, 0x4a8a1b16
-0, 270000, 126720, 0x2e014eac
-0, 277500, 126720, 0x83212c67
-0, 285000, 126720, 0x4937e897
-0, 292500, 126720, 0x2d38babe
-0, 300000, 126720, 0xbcb43c09
-0, 307500, 126720, 0x955ffaf4
-0, 315000, 126720, 0x3337d4a2
-0, 322500, 126720, 0xe8f58c33
-0, 330000, 126720, 0x3a7f771f
-0, 337500, 126720, 0xb67c39b9
+0, 135000, 126720, 0x75ae61b6
+0, 142500, 126720, 0x554fe3e4
+0, 150000, 126720, 0x72ecea95
+0, 157500, 126720, 0x5d00b5fe
+0, 165000, 126720, 0xe39bba0d
+0, 172500, 126720, 0x9c21bad8
+0, 180000, 126720, 0x72f2a47d
+0, 187500, 126720, 0x4f639ebe
+0, 195000, 126720, 0x534a10cc
+0, 202500, 126720, 0xfdca11d3
+0, 210000, 126720, 0x0c735615
+0, 217500, 126720, 0x0eaf0c1b
+0, 225000, 126720, 0xce5e6794
+0, 232500, 126720, 0x14cf7974
+0, 240000, 126720, 0xbc513f2a
+0, 247500, 126720, 0xbc303fae
+0, 255000, 126720, 0xd9f67585
+0, 262500, 126720, 0x3378251f
+0, 270000, 126720, 0xb3ed5911
+0, 277500, 126720, 0xc15a3577
+0, 285000, 126720, 0x0a24f256
+0, 292500, 126720, 0xfab9c45d
+0, 300000, 126720, 0x45464610
+0, 307500, 126720, 0xfe2e057d
+0, 315000, 126720, 0x23efdc35
+0, 322500, 126720, 0x4d888b2e
+0, 330000, 126720, 0xdd0d74df
+0, 337500, 126720, 0x08382b8e

From 18b131de0473a3110c63966cd7c6cd2ab118d401 Mon Sep 17 00:00:00 2001
From: Vitor Sessak <vitor1001@gmail.com>
Date: Sat, 30 Jul 2011 18:39:25 +0200
Subject: [PATCH 14/16] dct32: Add SSE2 ASM optimizations

Signed-off-by: Ronald S. Bultje <rsbultje@gmail.com>
---
 libavcodec/x86/dct32_sse.asm | 39 +++++++++++++++++++++++++++---------
 libavcodec/x86/fft.c         |  2 ++
 libavcodec/x86/fft.h         |  1 +
 3 files changed, 33 insertions(+), 9 deletions(-)

diff --git a/libavcodec/x86/dct32_sse.asm b/libavcodec/x86/dct32_sse.asm
index 46daa43d8c..720a061078 100644
--- a/libavcodec/x86/dct32_sse.asm
+++ b/libavcodec/x86/dct32_sse.asm
@@ -63,6 +63,13 @@ ps_p1p1m1m1: dd 0, 0, 0x80000000, 0x80000000, 0, 0, 0x80000000, 0x80000000
     mulps  %1, %3
 %endmacro
 
+%macro BUTTERFLY0_SSE2 5
+    pshufd %4, %1, %5
+    xorps  %1, %2
+    addps  %1, %4
+    mulps  %1, %3
+%endmacro
+
 %macro BUTTERFLY0_AVX 5
     vshufps %4, %1, %1, %5
     vxorps  %1, %1, %2
@@ -405,18 +412,17 @@ INIT_XMM
 
 
 INIT_XMM
+%macro DCT32_FUNC 1
 ; void ff_dct32_float_sse(FFTSample *out, const FFTSample *in)
-cglobal dct32_float_sse, 2,3,16, out, in, tmp
+cglobal dct32_float_%1, 2,3,16, out, in, tmp
     ; pass 1
 
     movaps      m0, [inq+0]
-    movaps      m1, [inq+112]
-    shufps      m1, m1, 0x1b
+    LOAD_INV    m1, [inq+112]
     BUTTERFLY   m0, m1, [ps_cos_vec], m3
 
     movaps      m7, [inq+64]
-    movaps      m4, [inq+48]
-    shufps      m4, m4, 0x1b
+    LOAD_INV    m4, [inq+48]
     BUTTERFLY   m7, m4, [ps_cos_vec+32], m3
 
     ; pass 2
@@ -427,13 +433,11 @@ cglobal dct32_float_sse, 2,3,16, out, in, tmp
 
     ; pass 1
     movaps      m1, [inq+16]
-    movaps      m6, [inq+96]
-    shufps      m6, m6, 0x1b
+    LOAD_INV    m6, [inq+96]
     BUTTERFLY   m1, m6, [ps_cos_vec+16], m3
 
     movaps      m4, [inq+80]
-    movaps      m5, [inq+32]
-    shufps      m5, m5, 0x1b
+    LOAD_INV    m5, [inq+32]
     BUTTERFLY   m4, m5, [ps_cos_vec+48], m3
 
     ; pass 2
@@ -492,3 +496,20 @@ cglobal dct32_float_sse, 2,3,16, out, in, tmp
     PASS5
     PASS6
     RET
+%endmacro
+
+%macro LOAD_INV_SSE 2
+    movaps      %1, %2
+    shufps      %1, %1, 0x1b
+%endmacro
+
+%define LOAD_INV LOAD_INV_SSE
+DCT32_FUNC sse
+
+%macro LOAD_INV_SSE2 2
+    pshufd      %1, %2, 0x1b
+%endmacro
+
+%define LOAD_INV LOAD_INV_SSE2
+%define BUTTERFLY0 BUTTERFLY0_SSE2
+DCT32_FUNC sse2
diff --git a/libavcodec/x86/fft.c b/libavcodec/x86/fft.c
index 899f0f7ad5..f7308cca32 100644
--- a/libavcodec/x86/fft.c
+++ b/libavcodec/x86/fft.c
@@ -60,6 +60,8 @@ av_cold void ff_dct_init_mmx(DCTContext *s)
     int has_vectors = av_get_cpu_flags();
     if (has_vectors & AV_CPU_FLAG_AVX && HAVE_AVX)
         s->dct32 = ff_dct32_float_avx;
+    else if (has_vectors & AV_CPU_FLAG_SSE2 && HAVE_SSE)
+        s->dct32 = ff_dct32_float_sse2;
     else if (has_vectors & AV_CPU_FLAG_SSE && HAVE_SSE)
         s->dct32 = ff_dct32_float_sse;
 #endif
diff --git a/libavcodec/x86/fft.h b/libavcodec/x86/fft.h
index 0ade2b2e7b..9d68d5b219 100644
--- a/libavcodec/x86/fft.h
+++ b/libavcodec/x86/fft.h
@@ -35,6 +35,7 @@ void ff_imdct_calc_sse(FFTContext *s, FFTSample *output, const FFTSample *input)
 void ff_imdct_half_sse(FFTContext *s, FFTSample *output, const FFTSample *input);
 void ff_imdct_half_avx(FFTContext *s, FFTSample *output, const FFTSample *input);
 void ff_dct32_float_sse(FFTSample *out, const FFTSample *in);
+void ff_dct32_float_sse2(FFTSample *out, const FFTSample *in);
 void ff_dct32_float_avx(FFTSample *out, const FFTSample *in);
 
 #endif /* AVCODEC_X86_FFT_H */

From ac0fb5934893be554a44d2a1eb7a3bc7bf39da4a Mon Sep 17 00:00:00 2001
From: "Ronald S. Bultje" <rsbultje@gmail.com>
Date: Mon, 1 Aug 2011 21:04:19 -0700
Subject: [PATCH 15/16] swscale: use 15-bit intermediates for 9/10-bit scaling.

---
 libswscale/swscale.c          | 163 ++++++++++++++++++++++++----------
 libswscale/swscale_internal.h |  75 ++++++++++------
 libswscale/utils.c            |  12 ++-
 tests/ref/lavfi/pixdesc       |   4 +-
 tests/ref/lavfi/pixfmts_copy  |   4 +-
 tests/ref/lavfi/pixfmts_null  |   4 +-
 tests/ref/lavfi/pixfmts_scale |  12 +--
 tests/ref/lavfi/pixfmts_vflip |   4 +-
 8 files changed, 190 insertions(+), 88 deletions(-)

diff --git a/libswscale/swscale.c b/libswscale/swscale.c
index dd9f4a108f..6fc65a8b2a 100644
--- a/libswscale/swscale.c
+++ b/libswscale/swscale.c
@@ -211,17 +211,9 @@ yuv2yuvX16_c_template(const int16_t *lumFilter, const int32_t **lumSrc,
 
 #define output_pixel(pos, val) \
     if (big_endian) { \
-        if (output_bits == 16) { \
-            AV_WB16(pos, av_clip_uint16(val >> shift)); \
-        } else { \
-            AV_WB16(pos, av_clip_uintp2(val >> shift, output_bits)); \
-        } \
+        AV_WB16(pos, av_clip_uint16(val >> shift)); \
     } else { \
-        if (output_bits == 16) { \
-            AV_WL16(pos, av_clip_uint16(val >> shift)); \
-        } else { \
-            AV_WL16(pos, av_clip_uintp2(val >> shift, output_bits)); \
-        } \
+        AV_WL16(pos, av_clip_uint16(val >> shift)); \
     }
     for (i = 0; i < dstW; i++) {
         int val = 1 << (30-output_bits - 1);
@@ -263,7 +255,67 @@ yuv2yuvX16_c_template(const int16_t *lumFilter, const int32_t **lumSrc,
 #undef output_pixel
 }
 
-#define yuv2NBPS(bits, BE_LE, is_be) \
+static av_always_inline void
+yuv2yuvX10_c_template(const int16_t *lumFilter, const int16_t **lumSrc,
+                      int lumFilterSize, const int16_t *chrFilter,
+                      const int16_t **chrUSrc, const int16_t **chrVSrc,
+                      int chrFilterSize, const int16_t **alpSrc,
+                      uint16_t *dest[4], int dstW, int chrDstW,
+                      int big_endian, int output_bits)
+{
+    //FIXME Optimize (just quickly written not optimized..)
+    int i;
+    uint16_t *yDest = dest[0], *uDest = dest[1], *vDest = dest[2],
+    *aDest = CONFIG_SWSCALE_ALPHA ? dest[3] : NULL;
+    int shift = 15 + 16 - output_bits - 1;
+
+#define output_pixel(pos, val) \
+    if (big_endian) { \
+        AV_WB16(pos, av_clip_uintp2(val >> shift, output_bits)); \
+    } else { \
+        AV_WL16(pos, av_clip_uintp2(val >> shift, output_bits)); \
+    }
+    for (i = 0; i < dstW; i++) {
+        int val = 1 << (30-output_bits - 1);
+        int j;
+
+        for (j = 0; j < lumFilterSize; j++)
+            val += (lumSrc[j][i] * lumFilter[j]) >> 1;
+
+        output_pixel(&yDest[i], val);
+    }
+
+    if (uDest) {
+        for (i = 0; i < chrDstW; i++) {
+            int u = 1 << (30-output_bits - 1);
+            int v = 1 << (30-output_bits - 1);
+            int j;
+
+            for (j = 0; j < chrFilterSize; j++) {
+                u += (chrUSrc[j][i] * chrFilter[j]) >> 1;
+                v += (chrVSrc[j][i] * chrFilter[j]) >> 1;
+            }
+
+            output_pixel(&uDest[i], u);
+            output_pixel(&vDest[i], v);
+        }
+    }
+
+    if (CONFIG_SWSCALE_ALPHA && aDest) {
+        for (i = 0; i < dstW; i++) {
+            int val = 1 << (30-output_bits - 1);
+            int j;
+
+            for (j = 0; j < lumFilterSize; j++)
+                val += (alpSrc[j][i] * lumFilter[j]) >> 1;
+
+            output_pixel(&aDest[i], val);
+        }
+    }
+#undef output_pixel
+}
+
+#define yuv2NBPS(bits, BE_LE, is_be, yuv2yuvX_template_fn, typeX_t) \
 static void yuv2yuvX ## bits ## BE_LE ## _c(SwsContext *c, const int16_t *lumFilter, \
                               const int16_t **_lumSrc, int lumFilterSize, \
                               const int16_t *chrFilter, const int16_t **_chrUSrc, \
@@ -271,21 +323,21 @@ static void yuv2yuvX ## bits ## BE_LE ## _c(SwsContext *c, const int16_t *lumFil
                               int chrFilterSize, const int16_t **_alpSrc, \
                               uint8_t *_dest[4], int dstW, int chrDstW) \
 { \
-    const int32_t **lumSrc  = (const int32_t **) _lumSrc, \
-                  **chrUSrc = (const int32_t **) _chrUSrc, \
-                  **chrVSrc = (const int32_t **) _chrVSrc, \
-                  **alpSrc  = (const int32_t **) _alpSrc; \
-    yuv2yuvX16_c_template(lumFilter, lumSrc, lumFilterSize, \
-                          chrFilter, chrUSrc, chrVSrc, chrFilterSize, \
-                          alpSrc, (uint16_t **) _dest, \
-                          dstW, chrDstW, is_be, bits); \
-}
-yuv2NBPS( 9, BE, 1);
-yuv2NBPS( 9, LE, 0);
-yuv2NBPS(10, BE, 1);
-yuv2NBPS(10, LE, 0);
-yuv2NBPS(16, BE, 1);
-yuv2NBPS(16, LE, 0);
+    const typeX_t **lumSrc  = (const typeX_t **) _lumSrc, \
+                  **chrUSrc = (const typeX_t **) _chrUSrc, \
+                  **chrVSrc = (const typeX_t **) _chrVSrc, \
+                  **alpSrc  = (const typeX_t **) _alpSrc; \
+    yuv2yuvX_template_fn(lumFilter, lumSrc, lumFilterSize, \
+                         chrFilter, chrUSrc, chrVSrc, chrFilterSize, \
+                         alpSrc, (uint16_t **) _dest, \
+                         dstW, chrDstW, is_be, bits); \
+}
+yuv2NBPS( 9, BE, 1, yuv2yuvX10_c_template, int16_t);
+yuv2NBPS( 9, LE, 0, yuv2yuvX10_c_template, int16_t);
+yuv2NBPS(10, BE, 1, yuv2yuvX10_c_template, int16_t);
+yuv2NBPS(10, LE, 0, yuv2yuvX10_c_template, int16_t);
+yuv2NBPS(16, BE, 1, yuv2yuvX16_c_template, int32_t);
+yuv2NBPS(16, LE, 0, yuv2yuvX16_c_template, int32_t);
 
 static void yuv2yuvX_c(SwsContext *c, const int16_t *lumFilter,
                        const int16_t **lumSrc, int lumFilterSize,
@@ -1880,6 +1932,27 @@ static void hScale16_c(SwsContext *c, int16_t *_dst, int dstW, const uint8_t *_s
     }
 }
 
+static void hScale10_c(SwsContext *c, int16_t *dst, int dstW, const uint8_t *_src,
+                       const int16_t *filter,
+                       const int16_t *filterPos, int filterSize)
+{
+    int i;
+    const uint16_t *src = (const uint16_t *) _src;
+    int sh = av_pix_fmt_descriptors[c->srcFormat].comp[0].depth_minus1;
+
+    for (i = 0; i < dstW; i++) {
+        int j;
+        int srcPos = filterPos[i];
+        int val = 0;
+
+        for (j = 0; j < filterSize; j++) {
+            val += src[srcPos + j] * filter[filterSize * i + j];
+        }
+        // filter=14 bit, input=16 bit, output=30 bit, >> 11 makes 19 bit
+        dst[i] = FFMIN(val >> sh, (1 << 15) - 1);
+    }
+}
+
 // bilinear / bicubic scaling
 static void hScale_c(SwsContext *c, int16_t *dst, int dstW, const uint8_t *src,
                      const int16_t *filter, const int16_t *filterPos,
@@ -2025,7 +2098,7 @@ static av_always_inline void hyscale(SwsContext *c, int16_t *dst, int dstWidth,
     if (convertRange)
         convertRange(dst, dstWidth);
 
-    if (av_pix_fmt_descriptors[c->dstFormat].comp[0].depth_minus1 < 8 && c->scalingBpp == 16) {
+    if (av_pix_fmt_descriptors[c->dstFormat].comp[0].depth_minus1 < 10 && c->scalingBpp == 16) {
         c->scale19To15Fw(dst, (int32_t *) dst, dstWidth);
     }
 }
@@ -2052,7 +2125,7 @@ static av_always_inline void hcscale(SwsContext *c, int16_t *dst1, int16_t *dst2
                                      uint8_t *formatConvBuffer, uint32_t *pal)
 {
     if (c->chrToYV12) {
-        uint8_t *buf2 = formatConvBuffer + FFALIGN(srcW * c->scalingBpp >> 3, 16);
+        uint8_t *buf2 = formatConvBuffer + FFALIGN(srcW * FFALIGN(c->scalingBpp, 8) >> 3, 16);
         c->chrToYV12(formatConvBuffer, buf2, src1, src2, srcW, pal);
         src1= formatConvBuffer;
         src2= buf2;
@@ -2076,7 +2149,7 @@ static av_always_inline void hcscale(SwsContext *c, int16_t *dst1, int16_t *dst2
     if (c->chrConvertRange)
         c->chrConvertRange(dst1, dst2, dstWidth);
 
-    if (av_pix_fmt_descriptors[c->dstFormat].comp[0].depth_minus1 < 8 && c->scalingBpp == 16) {
+    if (av_pix_fmt_descriptors[c->dstFormat].comp[0].depth_minus1 < 10 && c->scalingBpp == 16) {
         c->scale19To15Fw(dst1, (int32_t *) dst1, dstWidth);
         c->scale19To15Fw(dst2, (int32_t *) dst2, dstWidth);
     }
@@ -2735,27 +2808,27 @@ static av_cold void sws_init_swScale_c(SwsContext *c)
     }
 
     if (c->scalingBpp == 8) {
-    c->hScale       = hScale_c;
-    if (c->flags & SWS_FAST_BILINEAR) {
-        c->hyscale_fast = hyscale_fast_c;
-        c->hcscale_fast = hcscale_fast_c;
-    }
-
-    if (c->srcRange != c->dstRange && !isAnyRGB(c->dstFormat)) {
-        if (c->srcRange) {
-            c->lumConvertRange = lumRangeFromJpeg_c;
-            c->chrConvertRange = chrRangeFromJpeg_c;
-        } else {
-            c->lumConvertRange = lumRangeToJpeg_c;
-            c->chrConvertRange = chrRangeToJpeg_c;
+        c->hScale       = hScale_c;
+        if (c->flags & SWS_FAST_BILINEAR) {
+            c->hyscale_fast = hyscale_fast_c;
+            c->hcscale_fast = hcscale_fast_c;
         }
-    }
     } else {
-        c->hScale = hScale16_c;
+        c->hScale = c->scalingBpp == 16 ? hScale16_c : hScale10_c;
         c->scale19To15Fw = scale19To15Fw_c;
         c->scale8To16Rv  = scale8To16Rv_c;
+    }
 
-        if (c->srcRange != c->dstRange && !isAnyRGB(c->dstFormat)) {
+    if (c->srcRange != c->dstRange && !isAnyRGB(c->dstFormat)) {
+        if (c->scalingBpp <= 10) {
+            if (c->srcRange) {
+                c->lumConvertRange = lumRangeFromJpeg_c;
+                c->chrConvertRange = chrRangeFromJpeg_c;
+            } else {
+                c->lumConvertRange = lumRangeToJpeg_c;
+                c->chrConvertRange = chrRangeToJpeg_c;
+            }
+        } else {
             if (c->srcRange) {
                 c->lumConvertRange = lumRangeFromJpeg16_c;
                 c->chrConvertRange = chrRangeFromJpeg16_c;
diff --git a/libswscale/swscale_internal.h b/libswscale/swscale_internal.h
index 9492303301..60787aed91 100644
--- a/libswscale/swscale_internal.h
+++ b/libswscale/swscale_internal.h
@@ -64,11 +64,16 @@ typedef int (*SwsFunc)(struct SwsContext *context, const uint8_t* src[],
  * without any additional vertical scaling (or point-scaling).
  *
  * @param c       SWS scaling context
- * @param lumSrc  scaled luma (Y) source data, 15bit for 8bit output
- * @param chrUSrc scaled chroma (U) source data, 15bit for 8bit output
- * @param chrVSrc scaled chroma (V) source data, 15bit for 8bit output
- * @param alpSrc  scaled alpha (A) source data, 15bit for 8bit output
- * @param dest    pointer to the 4 output planes (Y/U/V/A)
+ * @param lumSrc  scaled luma (Y) source data, 15bit for 8-10bit output,
+ *                19-bit for 16bit output (in int32_t)
+ * @param chrUSrc scaled chroma (U) source data, 15bit for 8-10bit output,
+ *                19-bit for 16bit output (in int32_t)
+ * @param chrVSrc scaled chroma (V) source data, 15bit for 8-10bit output,
+ *                19-bit for 16bit output (in int32_t)
+ * @param alpSrc  scaled alpha (A) source data, 15bit for 8-10bit output,
+ *                19-bit for 16bit output (in int32_t)
+ * @param dest    pointer to the 4 output planes (Y/U/V/A). For >8bit
+ *                output, this is in uint16_t
  * @param dstW    width of dest[0], dest[3], lumSrc and alpSrc in pixels
  * @param chrDstW width of dest[1], dest[2], chrUSrc and chrVSrc
  */
@@ -82,14 +87,19 @@ typedef void (*yuv2planar1_fn) (struct SwsContext *c,
  *
  * @param c             SWS scaling context
  * @param lumFilter     vertical luma/alpha scaling coefficients, 12bit [0,4096]
- * @param lumSrc        scaled luma (Y) source data, 15bit for 8bit output
+ * @param lumSrc        scaled luma (Y) source data, 15bit for 8-10bit output,
+ *                      19-bit for 16bit output (in int32_t)
  * @param lumFilterSize number of vertical luma/alpha input lines to scale
  * @param chrFilter     vertical chroma scaling coefficients, 12bit [0,4096]
- * @param chrUSrc       scaled chroma (U) source data, 15bit for 8bit output
- * @param chrVSrc       scaled chroma (V) source data, 15bit for 8bit output
+ * @param chrUSrc       scaled chroma (U) source data, 15bit for 8-10bit output,
+ *                      19-bit for 16bit output (in int32_t)
+ * @param chrVSrc       scaled chroma (V) source data, 15bit for 8-10bit output,
+ *                      19-bit for 16bit output (in int32_t)
  * @param chrFilterSize number of vertical chroma input lines to scale
- * @param alpSrc        scaled alpha (A) source data, 15bit for 8bit output
- * @param dest          pointer to the 4 output planes (Y/U/V/A)
+ * @param alpSrc        scaled alpha (A) source data, 15bit for 8-10bit output,
+ *                      19-bit for 16bit output (in int32_t)
+ * @param dest          pointer to the 4 output planes (Y/U/V/A). For >8bit
+ *                      output, this is in uint16_t
  * @param dstW          width of dest[0], dest[3], lumSrc and alpSrc in pixels
  * @param chrDstW       width of dest[1], dest[2], chrUSrc and chrVSrc
  */
@@ -105,11 +115,16 @@ typedef void (*yuv2planarX_fn) (struct SwsContext *c, const int16_t *lumFilter,
  * that this function may do chroma scaling, see the "uvalpha" argument.
  *
  * @param c       SWS scaling context
- * @param lumSrc  scaled luma (Y) source data, 15bit for 8bit output
- * @param chrUSrc scaled chroma (U) source data, 15bit for 8bit output
- * @param chrVSrc scaled chroma (V) source data, 15bit for 8bit output
- * @param alpSrc  scaled alpha (A) source data, 15bit for 8bit output
- * @param dest    pointer to the output plane
+ * @param lumSrc  scaled luma (Y) source data, 15bit for 8-10bit output,
+ *                19-bit for 16bit output (in int32_t)
+ * @param chrUSrc scaled chroma (U) source data, 15bit for 8-10bit output,
+ *                19-bit for 16bit output (in int32_t)
+ * @param chrVSrc scaled chroma (V) source data, 15bit for 8-10bit output,
+ *                19-bit for 16bit output (in int32_t)
+ * @param alpSrc  scaled alpha (A) source data, 15bit for 8-10bit output,
+ *                19-bit for 16bit output (in int32_t)
+ * @param dest    pointer to the output plane. For 16bit output, this is
+ *                uint16_t
  * @param dstW    width of lumSrc and alpSrc in pixels, number of pixels
  *                to write into dest[]
  * @param uvalpha chroma scaling coefficient for the second line of chroma
@@ -132,11 +147,16 @@ typedef void (*yuv2packed1_fn) (struct SwsContext *c,  const int16_t *lumSrc,
  * output by doing bilinear scaling between two input lines.
  *
  * @param c       SWS scaling context
- * @param lumSrc  scaled luma (Y) source data, 15bit for 8bit output
- * @param chrUSrc scaled chroma (U) source data, 15bit for 8bit output
- * @param chrVSrc scaled chroma (V) source data, 15bit for 8bit output
- * @param alpSrc  scaled alpha (A) source data, 15bit for 8bit output
- * @param dest    pointer to the output plane
+ * @param lumSrc  scaled luma (Y) source data, 15bit for 8-10bit output,
+ *                19-bit for 16bit output (in int32_t)
+ * @param chrUSrc scaled chroma (U) source data, 15bit for 8-10bit output,
+ *                19-bit for 16bit output (in int32_t)
+ * @param chrVSrc scaled chroma (V) source data, 15bit for 8-10bit output,
+ *                19-bit for 16bit output (in int32_t)
+ * @param alpSrc  scaled alpha (A) source data, 15bit for 8-10bit output,
+ *                19-bit for 16bit output (in int32_t)
+ * @param dest    pointer to the output plane. For 16bit output, this is
+ *                uint16_t
  * @param dstW    width of lumSrc and alpSrc in pixels, number of pixels
  *                to write into dest[]
  * @param yalpha  luma/alpha scaling coefficients for the second input line.
@@ -160,14 +180,19 @@ typedef void (*yuv2packed2_fn) (struct SwsContext *c,  const int16_t *lumSrc[2],
  *
  * @param c             SWS scaling context
  * @param lumFilter     vertical luma/alpha scaling coefficients, 12bit [0,4096]
- * @param lumSrc        scaled luma (Y) source data, 15bit for 8bit output
+ * @param lumSrc        scaled luma (Y) source data, 15bit for 8-10bit output,
+ *                      19-bit for 16bit output (in int32_t)
  * @param lumFilterSize number of vertical luma/alpha input lines to scale
  * @param chrFilter     vertical chroma scaling coefficients, 12bit [0,4096]
- * @param chrUSrc       scaled chroma (U) source data, 15bit for 8bit output
- * @param chrVSrc       scaled chroma (V) source data, 15bit for 8bit output
+ * @param chrUSrc       scaled chroma (U) source data, 15bit for 8-10bit output,
+ *                      19-bit for 16bit output (in int32_t)
+ * @param chrVSrc       scaled chroma (V) source data, 15bit for 8-10bit output,
+ *                      19-bit for 16bit output (in int32_t)
  * @param chrFilterSize number of vertical chroma input lines to scale
- * @param alpSrc        scaled alpha (A) source data, 15bit for 8bit output
- * @param dest          pointer to the output plane
+ * @param alpSrc        scaled alpha (A) source data, 15bit for 8-10bit output,
+ *                      19-bit for 16bit output (in int32_t)
+ * @param dest          pointer to the output plane. For 16bit output, this is
+ *                      uint16_t
  * @param dstW          width of lumSrc and alpSrc in pixels, number of pixels
  *                      to write into dest[]
  * @param y             vertical line number for this output. This does not need
diff --git a/libswscale/utils.c b/libswscale/utils.c
index c6abb6b446..18d1227c83 100644
--- a/libswscale/utils.c
+++ b/libswscale/utils.c
@@ -896,11 +896,15 @@ int sws_init_context(SwsContext *c, SwsFilter *srcFilter, SwsFilter *dstFilter)
         }
     }
 
-    c->scalingBpp = FFMAX(av_pix_fmt_descriptors[srcFormat].comp[0].depth_minus1,
-                          av_pix_fmt_descriptors[dstFormat].comp[0].depth_minus1) >= 8 ? 16 : 8;
+    c->scalingBpp = 1 + FFMAX(av_pix_fmt_descriptors[srcFormat].comp[0].depth_minus1,
+                             av_pix_fmt_descriptors[dstFormat].comp[0].depth_minus1);
+    if (c->scalingBpp <= 8)
+        c->scalingBpp = 8;
     if (c->scalingBpp == 16)
         dst_stride <<= 1;
-    FF_ALLOC_OR_GOTO(c, c->formatConvBuffer, FFALIGN(srcW, 16) * 2 * c->scalingBpp >> 3, fail);
+    FF_ALLOC_OR_GOTO(c, c->formatConvBuffer,
+                     FFALIGN(srcW, 16) * 2 * FFALIGN(c->scalingBpp, 8) >> 3,
+                     fail);
     if (HAVE_MMX2 && cpu_flags & AV_CPU_FLAG_MMX2 && c->scalingBpp == 8) {
         c->canMMX2BeUsed= (dstW >=srcW && (dstW&31)==0 && (srcW&15)==0) ? 1 : 0;
         if (!c->canMMX2BeUsed && dstW >=srcW && (srcW&15)==0 && (flags&SWS_FAST_BILINEAR)) {
@@ -1055,7 +1059,7 @@ int sws_init_context(SwsContext *c, SwsFilter *srcFilter, SwsFilter *dstFilter)
         c->lumPixBuf[i] = c->lumPixBuf[i+c->vLumBufSize];
     }
     // 64 / c->scalingBpp is the same as 16 / sizeof(scaling_intermediate)
-    c->uv_off_px   = dst_stride_px + 64 / c->scalingBpp;
+    c->uv_off_px   = dst_stride_px + 64 / (c->scalingBpp &~ 7);
     c->uv_off_byte = dst_stride + 16;
     for (i=0; i<c->vChrBufSize; i++) {
         FF_ALLOC_OR_GOTO(c, c->chrUPixBuf[i+c->vChrBufSize], dst_stride*2+32, fail);
diff --git a/tests/ref/lavfi/pixdesc b/tests/ref/lavfi/pixdesc
index 3730988777..0da94f861e 100644
--- a/tests/ref/lavfi/pixdesc
+++ b/tests/ref/lavfi/pixdesc
@@ -38,8 +38,8 @@ yuv420p16le         2d59c4f1d0314a5a957a7cfc4b6fabcc
 yuv420p9be          ce880fa07830e5297c22acf6e20555ce
 yuv420p9le          16543fda8f87d94a6cf857d2e8d4461a
 yuv422p             c9bba4529821d796a6ab09f6a5fd355a
-yuv422p10be         107c6e31a3d4d598bca1d8426aaa54f5
-yuv422p10le         3f478be644add24b6cc77e718a6e2afa
+yuv422p10be         a4a83d0811280eff7405d94a7de21596
+yuv422p10le         23717b6c73a59912c605f27877ae2fb6
 yuv422p16be         dc9886f2fccf87cc54b27e071a2c251e
 yuv422p16le         f181c8d8436f1233ba566d9bc88005ec
 yuv440p             5a064afe2b453bb52cdb3f176b1aa1cf
diff --git a/tests/ref/lavfi/pixfmts_copy b/tests/ref/lavfi/pixfmts_copy
index 3730988777..0da94f861e 100644
--- a/tests/ref/lavfi/pixfmts_copy
+++ b/tests/ref/lavfi/pixfmts_copy
@@ -38,8 +38,8 @@ yuv420p16le         2d59c4f1d0314a5a957a7cfc4b6fabcc
 yuv420p9be          ce880fa07830e5297c22acf6e20555ce
 yuv420p9le          16543fda8f87d94a6cf857d2e8d4461a
 yuv422p             c9bba4529821d796a6ab09f6a5fd355a
-yuv422p10be         107c6e31a3d4d598bca1d8426aaa54f5
-yuv422p10le         3f478be644add24b6cc77e718a6e2afa
+yuv422p10be         a4a83d0811280eff7405d94a7de21596
+yuv422p10le         23717b6c73a59912c605f27877ae2fb6
 yuv422p16be         dc9886f2fccf87cc54b27e071a2c251e
 yuv422p16le         f181c8d8436f1233ba566d9bc88005ec
 yuv440p             5a064afe2b453bb52cdb3f176b1aa1cf
diff --git a/tests/ref/lavfi/pixfmts_null b/tests/ref/lavfi/pixfmts_null
index 3730988777..0da94f861e 100644
--- a/tests/ref/lavfi/pixfmts_null
+++ b/tests/ref/lavfi/pixfmts_null
@@ -38,8 +38,8 @@ yuv420p16le         2d59c4f1d0314a5a957a7cfc4b6fabcc
 yuv420p9be          ce880fa07830e5297c22acf6e20555ce
 yuv420p9le          16543fda8f87d94a6cf857d2e8d4461a
 yuv422p             c9bba4529821d796a6ab09f6a5fd355a
-yuv422p10be         107c6e31a3d4d598bca1d8426aaa54f5
-yuv422p10le         3f478be644add24b6cc77e718a6e2afa
+yuv422p10be         a4a83d0811280eff7405d94a7de21596
+yuv422p10le         23717b6c73a59912c605f27877ae2fb6
 yuv422p16be         dc9886f2fccf87cc54b27e071a2c251e
 yuv422p16le         f181c8d8436f1233ba566d9bc88005ec
 yuv440p             5a064afe2b453bb52cdb3f176b1aa1cf
diff --git a/tests/ref/lavfi/pixfmts_scale b/tests/ref/lavfi/pixfmts_scale
index 392b9ce601..d68cdd41d0 100644
--- a/tests/ref/lavfi/pixfmts_scale
+++ b/tests/ref/lavfi/pixfmts_scale
@@ -31,15 +31,15 @@ uyvy422             314bd486277111a95d9369b944fa0400
 yuv410p             7df8f6d69b56a8dcb6c7ee908e5018b5
 yuv411p             1143e7c5cc28fe0922b051b17733bc4c
 yuv420p             fdad2d8df8985e3d17e73c71f713cb14
-yuv420p10be         af5429f27b9f95bf955e795921c65cdc
-yuv420p10le         d0b47e6a8a44e6b5ca0fe4349a4e393b
+yuv420p10be         04663d400e44692fe8a622a067f838da
+yuv420p10le         6171850f66df7a727b4bed1c87ef9188
 yuv420p16be         9688e33e03b8c8275ab2fb1df0f06bee
 yuv420p16le         cba8b390ad5e7b8678e419b8ce79c008
-yuv420p9be          a073b2d93b2a7dce2069ba252bc43175
-yuv420p9le          b67233c3c7d93763d07d88f697c145e1
+yuv420p9be          ab163dfef03c4d563aca99b24276b9fd
+yuv420p9le          cd56c5a76ce74e504dd59d25a5e4389c
 yuv422p             918e37701ee7377d16a8a6c119c56a40
-yuv422p10be         533fd21e7943c20a1026b19069b3b867
-yuv422p10le         59b20a4a8609f5da2dc54c78aea11e6c
+yuv422p10be         e8e80fed7121f3afac994f2afac42cd2
+yuv422p10le         370866666f4889ee0928345b16d68fb4
 yuv422p16be         2cf502d7d386db1f1b3b946679d897b1
 yuv422p16le         3002a4e47520731dcee5929aff49eb74
 yuv440p             461503fdb9b90451020aa3b25ddf041c
diff --git a/tests/ref/lavfi/pixfmts_vflip b/tests/ref/lavfi/pixfmts_vflip
index 2b62518a28..a2b11690ab 100644
--- a/tests/ref/lavfi/pixfmts_vflip
+++ b/tests/ref/lavfi/pixfmts_vflip
@@ -38,8 +38,8 @@ yuv420p16le         0f609e588e5a258644ef85170d70e030
 yuv420p9be          be40ec975fb2873891643cbbbddbc3b0
 yuv420p9le          7e606310d3f5ff12badf911e8f333471
 yuv422p             d7f5cb44d9b0210d66d6a8762640ab34
-yuv422p10be         a28b051168af49435c04af5f58dce47b
-yuv422p10le         35936ffff30df2697f47b9b8d2cb7dea
+yuv422p10be         51d9ef13fe43ea9549b3792bfd449bf7
+yuv422p10le         4b286a243ee0e715b2961bc1b469e629
 yuv422p16be         51d9aa4e78d121c226d919ce97976fe4
 yuv422p16le         12965c54bda8932ca72da194419a9908
 yuv440p             876385e96165acf51271b20e5d85a416

From 62ee0e6a977e1990c9853630c7dea1415b38bb28 Mon Sep 17 00:00:00 2001
From: "Ronald S. Bultje" <rsbultje@gmail.com>
Date: Tue, 2 Aug 2011 12:27:43 -0700
Subject: [PATCH 16/16] Revert "swscale: use 15-bit intermediates for 9/10-bit
 scaling."

This reverts commit ac0fb5934893be554a44d2a1eb7a3bc7bf39da4a. It
causes valgrind errors which I'll want to investigate before
resubmitting this.
---
 libswscale/swscale.c          | 163 ++++++++++------------------------
 libswscale/swscale_internal.h |  75 ++++++----------
 libswscale/utils.c            |  12 +--
 tests/ref/lavfi/pixdesc       |   4 +-
 tests/ref/lavfi/pixfmts_copy  |   4 +-
 tests/ref/lavfi/pixfmts_null  |   4 +-
 tests/ref/lavfi/pixfmts_scale |  12 +--
 tests/ref/lavfi/pixfmts_vflip |   4 +-
 8 files changed, 88 insertions(+), 190 deletions(-)

diff --git a/libswscale/swscale.c b/libswscale/swscale.c
index 6fc65a8b2a..dd9f4a108f 100644
--- a/libswscale/swscale.c
+++ b/libswscale/swscale.c
@@ -211,69 +211,17 @@ yuv2yuvX16_c_template(const int16_t *lumFilter, const int32_t **lumSrc,
 
 #define output_pixel(pos, val) \
     if (big_endian) { \
-        AV_WB16(pos, av_clip_uint16(val >> shift)); \
-    } else { \
-        AV_WL16(pos, av_clip_uint16(val >> shift)); \
-    }
-    for (i = 0; i < dstW; i++) {
-        int val = 1 << (30-output_bits - 1);
-        int j;
-
-        for (j = 0; j < lumFilterSize; j++)
-            val += (lumSrc[j][i] * lumFilter[j]) >> 1;
-
-        output_pixel(&yDest[i], val);
-    }
-
-    if (uDest) {
-        for (i = 0; i < chrDstW; i++) {
-            int u = 1 << (30-output_bits - 1);
-            int v = 1 << (30-output_bits - 1);
-            int j;
-
-            for (j = 0; j < chrFilterSize; j++) {
-                u += (chrUSrc[j][i] * chrFilter[j]) >> 1;
-                v += (chrVSrc[j][i] * chrFilter[j]) >> 1;
-            }
-
-            output_pixel(&uDest[i], u);
-            output_pixel(&vDest[i], v);
-        }
-    }
-
-    if (CONFIG_SWSCALE_ALPHA && aDest) {
-        for (i = 0; i < dstW; i++) {
-            int val = 1 << (30-output_bits - 1);
-            int j;
-
-            for (j = 0; j < lumFilterSize; j++)
-                val += (alpSrc[j][i] * lumFilter[j]) >> 1;
-
-            output_pixel(&aDest[i], val);
-        }
-    }
-#undef output_pixel
-}
-
-static av_always_inline void
-yuv2yuvX10_c_template(const int16_t *lumFilter, const int16_t **lumSrc,
-                      int lumFilterSize, const int16_t *chrFilter,
-                      const int16_t **chrUSrc, const int16_t **chrVSrc,
-                      int chrFilterSize, const int16_t **alpSrc,
-                      uint16_t *dest[4], int dstW, int chrDstW,
-                      int big_endian, int output_bits)
-{
-    //FIXME Optimize (just quickly written not optimized..)
-    int i;
-    uint16_t *yDest = dest[0], *uDest = dest[1], *vDest = dest[2],
-    *aDest = CONFIG_SWSCALE_ALPHA ? dest[3] : NULL;
-    int shift = 15 + 16 - output_bits - 1;
-
-#define output_pixel(pos, val) \
-    if (big_endian) { \
-        AV_WB16(pos, av_clip_uintp2(val >> shift, output_bits)); \
+        if (output_bits == 16) { \
+            AV_WB16(pos, av_clip_uint16(val >> shift)); \
+        } else { \
+            AV_WB16(pos, av_clip_uintp2(val >> shift, output_bits)); \
+        } \
     } else { \
-        AV_WL16(pos, av_clip_uintp2(val >> shift, output_bits)); \
+        if (output_bits == 16) { \
+            AV_WL16(pos, av_clip_uint16(val >> shift)); \
+        } else { \
+            AV_WL16(pos, av_clip_uintp2(val >> shift, output_bits)); \
+        } \
     }
     for (i = 0; i < dstW; i++) {
         int val = 1 << (30-output_bits - 1);
@@ -315,7 +263,7 @@ yuv2yuvX10_c_template(const int16_t *lumFilter, const int16_t **lumSrc,
 #undef output_pixel
 }
 
-#define yuv2NBPS(bits, BE_LE, is_be, yuv2yuvX_template_fn, typeX_t) \
+#define yuv2NBPS(bits, BE_LE, is_be) \
 static void yuv2yuvX ## bits ## BE_LE ## _c(SwsContext *c, const int16_t *lumFilter, \
                               const int16_t **_lumSrc, int lumFilterSize, \
                               const int16_t *chrFilter, const int16_t **_chrUSrc, \
@@ -323,21 +271,21 @@ static void yuv2yuvX ## bits ## BE_LE ## _c(SwsContext *c, const int16_t *lumFil
                               int chrFilterSize, const int16_t **_alpSrc, \
                               uint8_t *_dest[4], int dstW, int chrDstW) \
 { \
-    const typeX_t **lumSrc  = (const typeX_t **) _lumSrc, \
-                  **chrUSrc = (const typeX_t **) _chrUSrc, \
-                  **chrVSrc = (const typeX_t **) _chrVSrc, \
-                  **alpSrc  = (const typeX_t **) _alpSrc; \
-    yuv2yuvX_template_fn(lumFilter, lumSrc, lumFilterSize, \
-                         chrFilter, chrUSrc, chrVSrc, chrFilterSize, \
-                         alpSrc, (uint16_t **) _dest, \
-                         dstW, chrDstW, is_be, bits); \
-}
-yuv2NBPS( 9, BE, 1, yuv2yuvX10_c_template, int16_t);
-yuv2NBPS( 9, LE, 0, yuv2yuvX10_c_template, int16_t);
-yuv2NBPS(10, BE, 1, yuv2yuvX10_c_template, int16_t);
-yuv2NBPS(10, LE, 0, yuv2yuvX10_c_template, int16_t);
-yuv2NBPS(16, BE, 1, yuv2yuvX16_c_template, int32_t);
-yuv2NBPS(16, LE, 0, yuv2yuvX16_c_template, int32_t);
+    const int32_t **lumSrc  = (const int32_t **) _lumSrc, \
+                  **chrUSrc = (const int32_t **) _chrUSrc, \
+                  **chrVSrc = (const int32_t **) _chrVSrc, \
+                  **alpSrc  = (const int32_t **) _alpSrc; \
+    yuv2yuvX16_c_template(lumFilter, lumSrc, lumFilterSize, \
+                          chrFilter, chrUSrc, chrVSrc, chrFilterSize, \
+                          alpSrc, (uint16_t **) _dest, \
+                          dstW, chrDstW, is_be, bits); \
+}
+yuv2NBPS( 9, BE, 1);
+yuv2NBPS( 9, LE, 0);
+yuv2NBPS(10, BE, 1);
+yuv2NBPS(10, LE, 0);
+yuv2NBPS(16, BE, 1);
+yuv2NBPS(16, LE, 0);
 
 static void yuv2yuvX_c(SwsContext *c, const int16_t *lumFilter,
                        const int16_t **lumSrc, int lumFilterSize,
@@ -1932,27 +1880,6 @@ static void hScale16_c(SwsContext *c, int16_t *_dst, int dstW, const uint8_t *_s
     }
 }
 
-static void hScale10_c(SwsContext *c, int16_t *dst, int dstW, const uint8_t *_src,
-                       const int16_t *filter,
-                       const int16_t *filterPos, int filterSize)
-{
-    int i;
-    const uint16_t *src = (const uint16_t *) _src;
-    int sh = av_pix_fmt_descriptors[c->srcFormat].comp[0].depth_minus1;
-
-    for (i = 0; i < dstW; i++) {
-        int j;
-        int srcPos = filterPos[i];
-        int val = 0;
-
-        for (j = 0; j < filterSize; j++) {
-            val += src[srcPos + j] * filter[filterSize * i + j];
-        }
-        // filter=14 bit, input=16 bit, output=30 bit, >> 11 makes 19 bit
-        dst[i] = FFMIN(val >> sh, (1 << 15) - 1);
-    }
-}
-
 // bilinear / bicubic scaling
 static void hScale_c(SwsContext *c, int16_t *dst, int dstW, const uint8_t *src,
                      const int16_t *filter, const int16_t *filterPos,
@@ -2098,7 +2025,7 @@ static av_always_inline void hyscale(SwsContext *c, int16_t *dst, int dstWidth,
     if (convertRange)
         convertRange(dst, dstWidth);
 
-    if (av_pix_fmt_descriptors[c->dstFormat].comp[0].depth_minus1 < 10 && c->scalingBpp == 16) {
+    if (av_pix_fmt_descriptors[c->dstFormat].comp[0].depth_minus1 < 8 && c->scalingBpp == 16) {
         c->scale19To15Fw(dst, (int32_t *) dst, dstWidth);
     }
 }
@@ -2125,7 +2052,7 @@ static av_always_inline void hcscale(SwsContext *c, int16_t *dst1, int16_t *dst2
                                      uint8_t *formatConvBuffer, uint32_t *pal)
 {
     if (c->chrToYV12) {
-        uint8_t *buf2 = formatConvBuffer + FFALIGN(srcW * FFALIGN(c->scalingBpp, 8) >> 3, 16);
+        uint8_t *buf2 = formatConvBuffer + FFALIGN(srcW * c->scalingBpp >> 3, 16);
         c->chrToYV12(formatConvBuffer, buf2, src1, src2, srcW, pal);
         src1= formatConvBuffer;
         src2= buf2;
@@ -2149,7 +2076,7 @@ static av_always_inline void hcscale(SwsContext *c, int16_t *dst1, int16_t *dst2
     if (c->chrConvertRange)
         c->chrConvertRange(dst1, dst2, dstWidth);
 
-    if (av_pix_fmt_descriptors[c->dstFormat].comp[0].depth_minus1 < 10 && c->scalingBpp == 16) {
+    if (av_pix_fmt_descriptors[c->dstFormat].comp[0].depth_minus1 < 8 && c->scalingBpp == 16) {
         c->scale19To15Fw(dst1, (int32_t *) dst1, dstWidth);
         c->scale19To15Fw(dst2, (int32_t *) dst2, dstWidth);
     }
@@ -2808,27 +2735,27 @@ static av_cold void sws_init_swScale_c(SwsContext *c)
     }
 
     if (c->scalingBpp == 8) {
-        c->hScale       = hScale_c;
-        if (c->flags & SWS_FAST_BILINEAR) {
-            c->hyscale_fast = hyscale_fast_c;
-            c->hcscale_fast = hcscale_fast_c;
+    c->hScale       = hScale_c;
+    if (c->flags & SWS_FAST_BILINEAR) {
+        c->hyscale_fast = hyscale_fast_c;
+        c->hcscale_fast = hcscale_fast_c;
+    }
+
+    if (c->srcRange != c->dstRange && !isAnyRGB(c->dstFormat)) {
+        if (c->srcRange) {
+            c->lumConvertRange = lumRangeFromJpeg_c;
+            c->chrConvertRange = chrRangeFromJpeg_c;
+        } else {
+            c->lumConvertRange = lumRangeToJpeg_c;
+            c->chrConvertRange = chrRangeToJpeg_c;
         }
+    }
     } else {
-        c->hScale = c->scalingBpp == 16 ? hScale16_c : hScale10_c;
+        c->hScale = hScale16_c;
         c->scale19To15Fw = scale19To15Fw_c;
         c->scale8To16Rv  = scale8To16Rv_c;
-    }
 
-    if (c->srcRange != c->dstRange && !isAnyRGB(c->dstFormat)) {
-        if (c->scalingBpp <= 10) {
-            if (c->srcRange) {
-                c->lumConvertRange = lumRangeFromJpeg_c;
-                c->chrConvertRange = chrRangeFromJpeg_c;
-            } else {
-                c->lumConvertRange = lumRangeToJpeg_c;
-                c->chrConvertRange = chrRangeToJpeg_c;
-            }
-        } else {
+        if (c->srcRange != c->dstRange && !isAnyRGB(c->dstFormat)) {
             if (c->srcRange) {
                 c->lumConvertRange = lumRangeFromJpeg16_c;
                 c->chrConvertRange = chrRangeFromJpeg16_c;
diff --git a/libswscale/swscale_internal.h b/libswscale/swscale_internal.h
index 60787aed91..9492303301 100644
--- a/libswscale/swscale_internal.h
+++ b/libswscale/swscale_internal.h
@@ -64,16 +64,11 @@ typedef int (*SwsFunc)(struct SwsContext *context, const uint8_t* src[],
  * without any additional vertical scaling (or point-scaling).
  *
  * @param c       SWS scaling context
- * @param lumSrc  scaled luma (Y) source data, 15bit for 8-10bit output,
- *                19-bit for 16bit output (in int32_t)
- * @param chrUSrc scaled chroma (U) source data, 15bit for 8-10bit output,
- *                19-bit for 16bit output (in int32_t)
- * @param chrVSrc scaled chroma (V) source data, 15bit for 8-10bit output,
- *                19-bit for 16bit output (in int32_t)
- * @param alpSrc  scaled alpha (A) source data, 15bit for 8-10bit output,
- *                19-bit for 16bit output (in int32_t)
- * @param dest    pointer to the 4 output planes (Y/U/V/A). For >8bit
- *                output, this is in uint16_t
+ * @param lumSrc  scaled luma (Y) source data, 15bit for 8bit output
+ * @param chrUSrc scaled chroma (U) source data, 15bit for 8bit output
+ * @param chrVSrc scaled chroma (V) source data, 15bit for 8bit output
+ * @param alpSrc  scaled alpha (A) source data, 15bit for 8bit output
+ * @param dest    pointer to the 4 output planes (Y/U/V/A)
  * @param dstW    width of dest[0], dest[3], lumSrc and alpSrc in pixels
  * @param chrDstW width of dest[1], dest[2], chrUSrc and chrVSrc
  */
@@ -87,19 +82,14 @@ typedef void (*yuv2planar1_fn) (struct SwsContext *c,
  *
  * @param c             SWS scaling context
  * @param lumFilter     vertical luma/alpha scaling coefficients, 12bit [0,4096]
- * @param lumSrc        scaled luma (Y) source data, 15bit for 8-10bit output,
- *                      19-bit for 16bit output (in int32_t)
+ * @param lumSrc        scaled luma (Y) source data, 15bit for 8bit output
  * @param lumFilterSize number of vertical luma/alpha input lines to scale
  * @param chrFilter     vertical chroma scaling coefficients, 12bit [0,4096]
- * @param chrUSrc       scaled chroma (U) source data, 15bit for 8-10bit output,
- *                      19-bit for 16bit output (in int32_t)
- * @param chrVSrc       scaled chroma (V) source data, 15bit for 8-10bit output,
- *                      19-bit for 16bit output (in int32_t)
+ * @param chrUSrc       scaled chroma (U) source data, 15bit for 8bit output
+ * @param chrVSrc       scaled chroma (V) source data, 15bit for 8bit output
  * @param chrFilterSize number of vertical chroma input lines to scale
- * @param alpSrc        scaled alpha (A) source data, 15bit for 8-10bit output,
- *                      19-bit for 16bit output (in int32_t)
- * @param dest          pointer to the 4 output planes (Y/U/V/A). For >8bit
- *                      output, this is in uint16_t
+ * @param alpSrc        scaled alpha (A) source data, 15bit for 8bit output
+ * @param dest          pointer to the 4 output planes (Y/U/V/A)
  * @param dstW          width of dest[0], dest[3], lumSrc and alpSrc in pixels
  * @param chrDstW       width of dest[1], dest[2], chrUSrc and chrVSrc
  */
@@ -115,16 +105,11 @@ typedef void (*yuv2planarX_fn) (struct SwsContext *c, const int16_t *lumFilter,
  * that this function may do chroma scaling, see the "uvalpha" argument.
  *
  * @param c       SWS scaling context
- * @param lumSrc  scaled luma (Y) source data, 15bit for 8-10bit output,
- *                19-bit for 16bit output (in int32_t)
- * @param chrUSrc scaled chroma (U) source data, 15bit for 8-10bit output,
- *                19-bit for 16bit output (in int32_t)
- * @param chrVSrc scaled chroma (V) source data, 15bit for 8-10bit output,
- *                19-bit for 16bit output (in int32_t)
- * @param alpSrc  scaled alpha (A) source data, 15bit for 8-10bit output,
- *                19-bit for 16bit output (in int32_t)
- * @param dest    pointer to the output plane. For 16bit output, this is
- *                uint16_t
+ * @param lumSrc  scaled luma (Y) source data, 15bit for 8bit output
+ * @param chrUSrc scaled chroma (U) source data, 15bit for 8bit output
+ * @param chrVSrc scaled chroma (V) source data, 15bit for 8bit output
+ * @param alpSrc  scaled alpha (A) source data, 15bit for 8bit output
+ * @param dest    pointer to the output plane
  * @param dstW    width of lumSrc and alpSrc in pixels, number of pixels
  *                to write into dest[]
  * @param uvalpha chroma scaling coefficient for the second line of chroma
@@ -147,16 +132,11 @@ typedef void (*yuv2packed1_fn) (struct SwsContext *c,  const int16_t *lumSrc,
  * output by doing bilinear scaling between two input lines.
  *
  * @param c       SWS scaling context
- * @param lumSrc  scaled luma (Y) source data, 15bit for 8-10bit output,
- *                19-bit for 16bit output (in int32_t)
- * @param chrUSrc scaled chroma (U) source data, 15bit for 8-10bit output,
- *                19-bit for 16bit output (in int32_t)
- * @param chrVSrc scaled chroma (V) source data, 15bit for 8-10bit output,
- *                19-bit for 16bit output (in int32_t)
- * @param alpSrc  scaled alpha (A) source data, 15bit for 8-10bit output,
- *                19-bit for 16bit output (in int32_t)
- * @param dest    pointer to the output plane. For 16bit output, this is
- *                uint16_t
+ * @param lumSrc  scaled luma (Y) source data, 15bit for 8bit output
+ * @param chrUSrc scaled chroma (U) source data, 15bit for 8bit output
+ * @param chrVSrc scaled chroma (V) source data, 15bit for 8bit output
+ * @param alpSrc  scaled alpha (A) source data, 15bit for 8bit output
+ * @param dest    pointer to the output plane
  * @param dstW    width of lumSrc and alpSrc in pixels, number of pixels
  *                to write into dest[]
  * @param yalpha  luma/alpha scaling coefficients for the second input line.
@@ -180,19 +160,14 @@ typedef void (*yuv2packed2_fn) (struct SwsContext *c,  const int16_t *lumSrc[2],
  *
  * @param c             SWS scaling context
  * @param lumFilter     vertical luma/alpha scaling coefficients, 12bit [0,4096]
- * @param lumSrc        scaled luma (Y) source data, 15bit for 8-10bit output,
- *                      19-bit for 16bit output (in int32_t)
+ * @param lumSrc        scaled luma (Y) source data, 15bit for 8bit output
  * @param lumFilterSize number of vertical luma/alpha input lines to scale
  * @param chrFilter     vertical chroma scaling coefficients, 12bit [0,4096]
- * @param chrUSrc       scaled chroma (U) source data, 15bit for 8-10bit output,
- *                      19-bit for 16bit output (in int32_t)
- * @param chrVSrc       scaled chroma (V) source data, 15bit for 8-10bit output,
- *                      19-bit for 16bit output (in int32_t)
+ * @param chrUSrc       scaled chroma (U) source data, 15bit for 8bit output
+ * @param chrVSrc       scaled chroma (V) source data, 15bit for 8bit output
  * @param chrFilterSize number of vertical chroma input lines to scale
- * @param alpSrc        scaled alpha (A) source data, 15bit for 8-10bit output,
- *                      19-bit for 16bit output (in int32_t)
- * @param dest          pointer to the output plane. For 16bit output, this is
- *                      uint16_t
+ * @param alpSrc        scaled alpha (A) source data, 15bit for 8bit output
+ * @param dest          pointer to the output plane
  * @param dstW          width of lumSrc and alpSrc in pixels, number of pixels
  *                      to write into dest[]
  * @param y             vertical line number for this output. This does not need
diff --git a/libswscale/utils.c b/libswscale/utils.c
index 18d1227c83..c6abb6b446 100644
--- a/libswscale/utils.c
+++ b/libswscale/utils.c
@@ -896,15 +896,11 @@ int sws_init_context(SwsContext *c, SwsFilter *srcFilter, SwsFilter *dstFilter)
         }
     }
 
-    c->scalingBpp = 1 + FFMAX(av_pix_fmt_descriptors[srcFormat].comp[0].depth_minus1,
-                             av_pix_fmt_descriptors[dstFormat].comp[0].depth_minus1);
-    if (c->scalingBpp <= 8)
-        c->scalingBpp = 8;
+    c->scalingBpp = FFMAX(av_pix_fmt_descriptors[srcFormat].comp[0].depth_minus1,
+                          av_pix_fmt_descriptors[dstFormat].comp[0].depth_minus1) >= 8 ? 16 : 8;
     if (c->scalingBpp == 16)
         dst_stride <<= 1;
-    FF_ALLOC_OR_GOTO(c, c->formatConvBuffer,
-                     FFALIGN(srcW, 16) * 2 * FFALIGN(c->scalingBpp, 8) >> 3,
-                     fail);
+    FF_ALLOC_OR_GOTO(c, c->formatConvBuffer, FFALIGN(srcW, 16) * 2 * c->scalingBpp >> 3, fail);
     if (HAVE_MMX2 && cpu_flags & AV_CPU_FLAG_MMX2 && c->scalingBpp == 8) {
         c->canMMX2BeUsed= (dstW >=srcW && (dstW&31)==0 && (srcW&15)==0) ? 1 : 0;
         if (!c->canMMX2BeUsed && dstW >=srcW && (srcW&15)==0 && (flags&SWS_FAST_BILINEAR)) {
@@ -1059,7 +1055,7 @@ int sws_init_context(SwsContext *c, SwsFilter *srcFilter, SwsFilter *dstFilter)
         c->lumPixBuf[i] = c->lumPixBuf[i+c->vLumBufSize];
     }
     // 64 / c->scalingBpp is the same as 16 / sizeof(scaling_intermediate)
-    c->uv_off_px   = dst_stride_px + 64 / (c->scalingBpp &~ 7);
+    c->uv_off_px   = dst_stride_px + 64 / c->scalingBpp;
     c->uv_off_byte = dst_stride + 16;
     for (i=0; i<c->vChrBufSize; i++) {
         FF_ALLOC_OR_GOTO(c, c->chrUPixBuf[i+c->vChrBufSize], dst_stride*2+32, fail);
diff --git a/tests/ref/lavfi/pixdesc b/tests/ref/lavfi/pixdesc
index 0da94f861e..3730988777 100644
--- a/tests/ref/lavfi/pixdesc
+++ b/tests/ref/lavfi/pixdesc
@@ -38,8 +38,8 @@ yuv420p16le         2d59c4f1d0314a5a957a7cfc4b6fabcc
 yuv420p9be          ce880fa07830e5297c22acf6e20555ce
 yuv420p9le          16543fda8f87d94a6cf857d2e8d4461a
 yuv422p             c9bba4529821d796a6ab09f6a5fd355a
-yuv422p10be         a4a83d0811280eff7405d94a7de21596
-yuv422p10le         23717b6c73a59912c605f27877ae2fb6
+yuv422p10be         107c6e31a3d4d598bca1d8426aaa54f5
+yuv422p10le         3f478be644add24b6cc77e718a6e2afa
 yuv422p16be         dc9886f2fccf87cc54b27e071a2c251e
 yuv422p16le         f181c8d8436f1233ba566d9bc88005ec
 yuv440p             5a064afe2b453bb52cdb3f176b1aa1cf
diff --git a/tests/ref/lavfi/pixfmts_copy b/tests/ref/lavfi/pixfmts_copy
index 0da94f861e..3730988777 100644
--- a/tests/ref/lavfi/pixfmts_copy
+++ b/tests/ref/lavfi/pixfmts_copy
@@ -38,8 +38,8 @@ yuv420p16le         2d59c4f1d0314a5a957a7cfc4b6fabcc
 yuv420p9be          ce880fa07830e5297c22acf6e20555ce
 yuv420p9le          16543fda8f87d94a6cf857d2e8d4461a
 yuv422p             c9bba4529821d796a6ab09f6a5fd355a
-yuv422p10be         a4a83d0811280eff7405d94a7de21596
-yuv422p10le         23717b6c73a59912c605f27877ae2fb6
+yuv422p10be         107c6e31a3d4d598bca1d8426aaa54f5
+yuv422p10le         3f478be644add24b6cc77e718a6e2afa
 yuv422p16be         dc9886f2fccf87cc54b27e071a2c251e
 yuv422p16le         f181c8d8436f1233ba566d9bc88005ec
 yuv440p             5a064afe2b453bb52cdb3f176b1aa1cf
diff --git a/tests/ref/lavfi/pixfmts_null b/tests/ref/lavfi/pixfmts_null
index 0da94f861e..3730988777 100644
--- a/tests/ref/lavfi/pixfmts_null
+++ b/tests/ref/lavfi/pixfmts_null
@@ -38,8 +38,8 @@ yuv420p16le         2d59c4f1d0314a5a957a7cfc4b6fabcc
 yuv420p9be          ce880fa07830e5297c22acf6e20555ce
 yuv420p9le          16543fda8f87d94a6cf857d2e8d4461a
 yuv422p             c9bba4529821d796a6ab09f6a5fd355a
-yuv422p10be         a4a83d0811280eff7405d94a7de21596
-yuv422p10le         23717b6c73a59912c605f27877ae2fb6
+yuv422p10be         107c6e31a3d4d598bca1d8426aaa54f5
+yuv422p10le         3f478be644add24b6cc77e718a6e2afa
 yuv422p16be         dc9886f2fccf87cc54b27e071a2c251e
 yuv422p16le         f181c8d8436f1233ba566d9bc88005ec
 yuv440p             5a064afe2b453bb52cdb3f176b1aa1cf
diff --git a/tests/ref/lavfi/pixfmts_scale b/tests/ref/lavfi/pixfmts_scale
index d68cdd41d0..392b9ce601 100644
--- a/tests/ref/lavfi/pixfmts_scale
+++ b/tests/ref/lavfi/pixfmts_scale
@@ -31,15 +31,15 @@ uyvy422             314bd486277111a95d9369b944fa0400
 yuv410p             7df8f6d69b56a8dcb6c7ee908e5018b5
 yuv411p             1143e7c5cc28fe0922b051b17733bc4c
 yuv420p             fdad2d8df8985e3d17e73c71f713cb14
-yuv420p10be         04663d400e44692fe8a622a067f838da
-yuv420p10le         6171850f66df7a727b4bed1c87ef9188
+yuv420p10be         af5429f27b9f95bf955e795921c65cdc
+yuv420p10le         d0b47e6a8a44e6b5ca0fe4349a4e393b
 yuv420p16be         9688e33e03b8c8275ab2fb1df0f06bee
 yuv420p16le         cba8b390ad5e7b8678e419b8ce79c008
-yuv420p9be          ab163dfef03c4d563aca99b24276b9fd
-yuv420p9le          cd56c5a76ce74e504dd59d25a5e4389c
+yuv420p9be          a073b2d93b2a7dce2069ba252bc43175
+yuv420p9le          b67233c3c7d93763d07d88f697c145e1
 yuv422p             918e37701ee7377d16a8a6c119c56a40
-yuv422p10be         e8e80fed7121f3afac994f2afac42cd2
-yuv422p10le         370866666f4889ee0928345b16d68fb4
+yuv422p10be         533fd21e7943c20a1026b19069b3b867
+yuv422p10le         59b20a4a8609f5da2dc54c78aea11e6c
 yuv422p16be         2cf502d7d386db1f1b3b946679d897b1
 yuv422p16le         3002a4e47520731dcee5929aff49eb74
 yuv440p             461503fdb9b90451020aa3b25ddf041c
diff --git a/tests/ref/lavfi/pixfmts_vflip b/tests/ref/lavfi/pixfmts_vflip
index a2b11690ab..2b62518a28 100644
--- a/tests/ref/lavfi/pixfmts_vflip
+++ b/tests/ref/lavfi/pixfmts_vflip
@@ -38,8 +38,8 @@ yuv420p16le         0f609e588e5a258644ef85170d70e030
 yuv420p9be          be40ec975fb2873891643cbbbddbc3b0
 yuv420p9le          7e606310d3f5ff12badf911e8f333471
 yuv422p             d7f5cb44d9b0210d66d6a8762640ab34
-yuv422p10be         51d9ef13fe43ea9549b3792bfd449bf7
-yuv422p10le         4b286a243ee0e715b2961bc1b469e629
+yuv422p10be         a28b051168af49435c04af5f58dce47b
+yuv422p10le         35936ffff30df2697f47b9b8d2cb7dea
 yuv422p16be         51d9aa4e78d121c226d919ce97976fe4
 yuv422p16le         12965c54bda8932ca72da194419a9908
 yuv440p             876385e96165acf51271b20e5d85a416