From e11a5fccb237b54da884e19c3f09986b780bd618 Mon Sep 17 00:00:00 2001 From: Alex Converse Date: Fri, 29 Jul 2011 14:45:04 -0700 Subject: [PATCH 01/16] bink: Eliminate unnecessary shadow declaration. --- libavcodec/bink.c | 1 - 1 file changed, 1 deletion(-) diff --git a/libavcodec/bink.c b/libavcodec/bink.c index 2b5bc75078..17683730d4 100644 --- a/libavcodec/bink.c +++ b/libavcodec/bink.c @@ -624,7 +624,6 @@ static int read_dct_coeffs(GetBitContext *gb, int32_t block[64], const uint8_t * coef_list[--list_start] = ccoef; mode_list[ list_start] = 3; } else { - int t; if (!bits) { t = 1 - (get_bits1(gb) << 1); } else { From a6c49f18abacb9bf52d4d808a2a56561a5b5445c Mon Sep 17 00:00:00 2001 From: Alex Converse Date: Fri, 29 Jul 2011 15:49:11 -0700 Subject: [PATCH 02/16] aac: Remove some suspicious illegal memcpy()s from LTP. --- libavcodec/aacdec.c | 2 -- 1 file changed, 2 deletions(-) diff --git a/libavcodec/aacdec.c b/libavcodec/aacdec.c index 6bf14275f9..783bf99fba 100644 --- a/libavcodec/aacdec.c +++ b/libavcodec/aacdec.c @@ -1789,12 +1789,10 @@ static void windowing_and_mdct_ltp(AACContext *ac, float *out, } else { memset(in, 0, 448 * sizeof(float)); ac->dsp.vector_fmul(in + 448, in + 448, swindow_prev, 128); - memcpy(in + 576, in + 576, 448 * sizeof(float)); } if (ics->window_sequence[0] != LONG_START_SEQUENCE) { ac->dsp.vector_fmul_reverse(in + 1024, in + 1024, lwindow, 1024); } else { - memcpy(in + 1024, in + 1024, 448 * sizeof(float)); ac->dsp.vector_fmul_reverse(in + 1024 + 448, in + 1024 + 448, swindow, 128); memset(in + 1024 + 576, 0, 448 * sizeof(float)); } From 4326f6f525063e92d40e7a8f39cf7ccd7759aea7 Mon Sep 17 00:00:00 2001 From: Dustin Brody Date: Wed, 27 Jul 2011 20:44:58 -0400 Subject: [PATCH 03/16] mpeg4videodec: remove dead code that would have detected erroneous encoding Signed-off-by: Diego Biurrun --- libavcodec/mpeg4videodec.c | 22 ---------------------- 1 file changed, 22 deletions(-) diff --git a/libavcodec/mpeg4videodec.c b/libavcodec/mpeg4videodec.c index 57d55a1424..0d061acfb9 100644 --- a/libavcodec/mpeg4videodec.c +++ b/libavcodec/mpeg4videodec.c @@ -990,28 +990,6 @@ static inline int mpeg4_decode_block(MpegEncContext * s, DCTELEM * block, SKIP_COUNTER(re, &s->gb, 1+12+1); } -#if 0 - if(s->error_recognition >= FF_ER_COMPLIANT){ - const int abs_level= FFABS(level); - if(abs_level<=MAX_LEVEL && run<=MAX_RUN){ - const int run1= run - rl->max_run[last][abs_level] - 1; - if(abs_level <= rl->max_level[last][run]){ - av_log(s->avctx, AV_LOG_ERROR, "illegal 3. esc, vlc encoding possible\n"); - return -1; - } - if(s->error_recognition > FF_ER_COMPLIANT){ - if(abs_level <= rl->max_level[last][run]*2){ - av_log(s->avctx, AV_LOG_ERROR, "illegal 3. esc, esc 1 encoding possible\n"); - return -1; - } - if(run1 >= 0 && abs_level <= rl->max_level[last][run1]){ - av_log(s->avctx, AV_LOG_ERROR, "illegal 3. esc, esc 2 encoding possible\n"); - return -1; - } - } - } - } -#endif if (level>0) level= level * qmul + qadd; else level= level * qmul - qadd; From 9d39cbf6cfecfda88e2f822b0437ee22ffa6d745 Mon Sep 17 00:00:00 2001 From: Luca Barbato Date: Mon, 1 Aug 2011 17:41:44 +0200 Subject: [PATCH 04/16] avpacket: signal possibly corrupted packets Use AV_PKT_FLAG_CORRUPT flag to mark packets that might be incomplete or have errors. Formats that have a mean to validate their packets should use this flag to deliver such information instead of silently dropping the packets. --- libavcodec/avcodec.h | 6 +++++- libavcodec/version.h | 2 +- 2 files changed, 6 insertions(+), 2 deletions(-) diff --git a/libavcodec/avcodec.h b/libavcodec/avcodec.h index e1fd475ca6..eaf63e8dc1 100644 --- a/libavcodec/avcodec.h +++ b/libavcodec/avcodec.h @@ -791,6 +791,9 @@ typedef struct AVPacket { uint8_t *data; int size; int stream_index; + /** + * A combination of AV_PKT_FLAG values + */ int flags; /** * Additional packet data that can be provided by the container. @@ -831,7 +834,8 @@ typedef struct AVPacket { */ int64_t convergence_duration; } AVPacket; -#define AV_PKT_FLAG_KEY 0x0001 +#define AV_PKT_FLAG_KEY 0x0001 ///< The packet contains a keyframe +#define AV_PKT_FLAG_CORRUPT 0x0002 ///< The packet content is corrupted /** * Audio Video Frame. diff --git a/libavcodec/version.h b/libavcodec/version.h index 46e54990b1..24d2488249 100644 --- a/libavcodec/version.h +++ b/libavcodec/version.h @@ -22,7 +22,7 @@ #define LIBAVCODEC_VERSION_MAJOR 53 #define LIBAVCODEC_VERSION_MINOR 7 -#define LIBAVCODEC_VERSION_MICRO 0 +#define LIBAVCODEC_VERSION_MICRO 1 #define LIBAVCODEC_VERSION_INT AV_VERSION_INT(LIBAVCODEC_VERSION_MAJOR, \ LIBAVCODEC_VERSION_MINOR, \ From 5038f9b2684c4be1d9543e34e76282c6935815ba Mon Sep 17 00:00:00 2001 From: Luca Barbato Date: Mon, 1 Aug 2011 17:46:29 +0200 Subject: [PATCH 05/16] apichange: add an entry for AV_PKT_FLAG_CORRUPT --- doc/APIchanges | 3 +++ 1 file changed, 3 insertions(+) diff --git a/doc/APIchanges b/doc/APIchanges index c533818efa..09a12fe3d2 100644 --- a/doc/APIchanges +++ b/doc/APIchanges @@ -13,6 +13,9 @@ libavutil: 2011-04-18 API changes, most recent first: +2011-08-02 - 9d39cbf - lavc 53.7.1 + Add AV_PKT_FLAG_CORRUPT AVPacket flag. + 2011-07-10 - a67c061 - lavf 53.3.0 Add avformat_find_stream_info(), deprecate av_find_stream_info(). From be9c00615b5c2cb858b9905854726ebe578c007b Mon Sep 17 00:00:00 2001 From: Zohar Kelrich Date: Sun, 24 Jul 2011 11:13:50 +0300 Subject: [PATCH 06/16] mpegts: Silence "can't seek" warning on unseekable Do not try to seek when we already know we are not allowed to. Silences warning that always happens when streaming. Signed-off-by: Zohar Kelrich Signed-off-by: Luca Barbato --- libavformat/mpegts.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/libavformat/mpegts.c b/libavformat/mpegts.c index e9b984dc11..410507db6d 100644 --- a/libavformat/mpegts.c +++ b/libavformat/mpegts.c @@ -1488,7 +1488,7 @@ static int mpegts_read_header(AVFormatContext *s, /* normal demux */ /* first do a scaning to get all the services */ - if (avio_seek(pb, pos, SEEK_SET) < 0) + if (pb->seekable && avio_seek(pb, pos, SEEK_SET) < 0) av_log(s, AV_LOG_ERROR, "Unable to seek back to the start\n"); mpegts_open_section_filter(ts, SDT_PID, sdt_cb, ts, 1); From 8b9df201dfb2fa557b5dfc4e04c927d3f13a0dd9 Mon Sep 17 00:00:00 2001 From: Zohar Kelrich Date: Sun, 24 Jul 2011 11:13:50 +0300 Subject: [PATCH 07/16] mpegts: Fix for continuity counter Make continuity counter respect discontinuity flag and null packets. Unpack the adaptation_field_control field. Signed-off-by: Zohar Kelrich Signed-off-by: Luca Barbato --- libavformat/mpegts.c | 30 ++++++++++++++++++++---------- 1 file changed, 20 insertions(+), 10 deletions(-) diff --git a/libavformat/mpegts.c b/libavformat/mpegts.c index 410507db6d..13b31177a5 100644 --- a/libavformat/mpegts.c +++ b/libavformat/mpegts.c @@ -1248,7 +1248,8 @@ static int handle_packet(MpegTSContext *ts, const uint8_t *packet) { AVFormatContext *s = ts->stream; MpegTSFilter *tss; - int len, pid, cc, expected_cc, cc_ok, afc, is_start; + int len, pid, cc, expected_cc, cc_ok, afc, is_start, is_discontinuity, + has_adaptation, has_payload; const uint8_t *p, *p_end; int64_t pos; @@ -1264,20 +1265,29 @@ static int handle_packet(MpegTSContext *ts, const uint8_t *packet) if (!tss) return 0; + afc = (packet[3] >> 4) & 3; + if (afc == 0) /* reserved value */ + return 0; + has_adaptation = afc & 2; + has_payload = afc & 1; + is_discontinuity = has_adaptation + && packet[4] != 0 /* with length > 0 */ + && (packet[5] & 0x80); /* and discontinuity indicated */ + /* continuity check (currently not used) */ cc = (packet[3] & 0xf); - expected_cc = (packet[3] & 0x10) ? (tss->last_cc + 1) & 0x0f : tss->last_cc; - cc_ok = (tss->last_cc < 0) || (expected_cc == cc); + expected_cc = has_payload ? (tss->last_cc + 1) & 0x0f : tss->last_cc; + cc_ok = pid == 0x1FFF // null packet PID + || is_discontinuity + || tss->last_cc < 0 + || expected_cc == cc; + tss->last_cc = cc; - /* skip adaptation field */ - afc = (packet[3] >> 4) & 3; - p = packet + 4; - if (afc == 0) /* reserved value */ + if (!has_payload) return 0; - if (afc == 2) /* adaptation field only */ - return 0; - if (afc == 3) { + p = packet + 4; + if (has_adaptation) { /* skip adapation field */ p += p[0] + 1; } From c64f80b0e85ad138fca300f05278263bd713d27a Mon Sep 17 00:00:00 2001 From: Zohar Kelrich Date: Sun, 24 Jul 2011 11:13:50 +0300 Subject: [PATCH 08/16] mpegts: Reset continuity counter on seek Signed-off-by: Zohar Kelrich Signed-off-by: Luca Barbato --- libavformat/mpegts.c | 1 + 1 file changed, 1 insertion(+) diff --git a/libavformat/mpegts.c b/libavformat/mpegts.c index 13b31177a5..f3b76aa264 100644 --- a/libavformat/mpegts.c +++ b/libavformat/mpegts.c @@ -1625,6 +1625,7 @@ static int mpegts_read_packet(AVFormatContext *s, if (ts->pids[i] && ts->pids[i]->type == MPEGTS_PES) { PESContext *pes = ts->pids[i]->u.pes_filter.opaque; av_freep(&pes->buffer); + ts->pids[i]->last_cc = -1; pes->data_index = 0; pes->state = MPEGTS_SKIP; /* skip until pes header */ } From ce9e31655e5b8f8db3bb4f13f436fc836062a514 Mon Sep 17 00:00:00 2001 From: Zohar Kelrich Date: Sun, 24 Jul 2011 11:13:50 +0300 Subject: [PATCH 09/16] mpegts: Mark corrupted packets Signed-off-by: Zohar Kelrich Signed-off-by: Luca Barbato --- libavformat/mpegts.c | 11 +++++++++++ 1 file changed, 11 insertions(+) diff --git a/libavformat/mpegts.c b/libavformat/mpegts.c index f3b76aa264..d4688aebfd 100644 --- a/libavformat/mpegts.c +++ b/libavformat/mpegts.c @@ -164,6 +164,7 @@ typedef struct PESContext { enum MpegTSState state; /* used to get the format */ int data_index; + int flags; /**< copied to the AVPacket flags */ int total_size; int pes_header_size; int extended_stream_id; @@ -635,12 +636,14 @@ static void new_pes_packet(PESContext *pes, AVPacket *pkt) pkt->dts = pes->dts; /* store position of first TS packet of this PES packet */ pkt->pos = pes->ts_packet_pos; + pkt->flags = pes->flags; /* reset pts values */ pes->pts = AV_NOPTS_VALUE; pes->dts = AV_NOPTS_VALUE; pes->buffer = NULL; pes->data_index = 0; + pes->flags = 0; } /* return non zero if a packet could be constructed */ @@ -808,6 +811,7 @@ static int mpegts_push_data(MpegTSFilter *filter, // pes packet size is < ts size packet and pes data is padded with 0xff // not sure if this is legal in ts but see issue #2392 buf_size = pes->total_size; + pes->flags |= AV_PKT_FLAG_CORRUPT; } memcpy(pes->buffer+pes->data_index, p, buf_size); pes->data_index += buf_size; @@ -1283,6 +1287,13 @@ static int handle_packet(MpegTSContext *ts, const uint8_t *packet) || expected_cc == cc; tss->last_cc = cc; + if (!cc_ok) { + av_log(NULL, AV_LOG_WARNING, "Continuity Check Failed\n"); + if(tss->type == MPEGTS_PES) { + PESContext *pc = tss->u.pes_filter.opaque; + pc->flags |= AV_PKT_FLAG_CORRUPT; + } + } if (!has_payload) return 0; From cdb9884a636987061d549ac23a248aabc4a91140 Mon Sep 17 00:00:00 2001 From: Zohar Kelrich Date: Sun, 24 Jul 2011 11:13:50 +0300 Subject: [PATCH 10/16] mpegts: Move scan test to handle_packets This fixes an issue where packets which start being read while reading the header stick around after a seek. Signed-off-by: Zohar Kelrich Signed-off-by: Luca Barbato --- libavformat/mpegts.c | 39 ++++++++++++++++++++------------------- 1 file changed, 20 insertions(+), 19 deletions(-) diff --git a/libavformat/mpegts.c b/libavformat/mpegts.c index d4688aebfd..a5a8ce2a67 100644 --- a/libavformat/mpegts.c +++ b/libavformat/mpegts.c @@ -1399,7 +1399,22 @@ static int handle_packets(MpegTSContext *ts, int nb_packets) { AVFormatContext *s = ts->stream; uint8_t packet[TS_PACKET_SIZE]; - int packet_num, ret; + int packet_num, ret = 0; + + if (avio_tell(s->pb) != ts->last_pos) { + int i; + av_dlog("Skipping after seek\n"); + /* seek detected, flush pes buffer */ + for (i = 0; i < NB_PID_MAX; i++) { + if (ts->pids[i] && ts->pids[i]->type == MPEGTS_PES) { + PESContext *pes = ts->pids[i]->u.pes_filter.opaque; + av_freep(&pes->buffer); + ts->pids[i]->last_cc = -1; + pes->data_index = 0; + pes->state = MPEGTS_SKIP; /* skip until pes header */ + } + } + } ts->stop_parse = 0; packet_num = 0; @@ -1411,12 +1426,13 @@ static int handle_packets(MpegTSContext *ts, int nb_packets) break; ret = read_packet(s, packet, ts->raw_packet_size); if (ret != 0) - return ret; + break; ret = handle_packet(ts, packet); if (ret != 0) - return ret; + break; } - return 0; + ts->last_pos = avio_tell(s->pb); + return ret; } static int mpegts_probe(AVProbeData *p) @@ -1630,19 +1646,6 @@ static int mpegts_read_packet(AVFormatContext *s, MpegTSContext *ts = s->priv_data; int ret, i; - if (avio_tell(s->pb) != ts->last_pos) { - /* seek detected, flush pes buffer */ - for (i = 0; i < NB_PID_MAX; i++) { - if (ts->pids[i] && ts->pids[i]->type == MPEGTS_PES) { - PESContext *pes = ts->pids[i]->u.pes_filter.opaque; - av_freep(&pes->buffer); - ts->pids[i]->last_cc = -1; - pes->data_index = 0; - pes->state = MPEGTS_SKIP; /* skip until pes header */ - } - } - } - ts->pkt = pkt; ret = handle_packets(ts, 0); if (ret < 0) { @@ -1660,8 +1663,6 @@ static int mpegts_read_packet(AVFormatContext *s, } } - ts->last_pos = avio_tell(s->pb); - return ret; } From 5081514269a17809f8a8ff71e6b26e4b761e8266 Mon Sep 17 00:00:00 2001 From: Zohar Kelrich Date: Sun, 24 Jul 2011 11:13:51 +0300 Subject: [PATCH 11/16] mpegts: Mark wrongly-sized packets as corrupted Signed-off-by: Zohar Kelrich Signed-off-by: Luca Barbato --- libavformat/mpegts.c | 9 +++++++-- 1 file changed, 7 insertions(+), 2 deletions(-) diff --git a/libavformat/mpegts.c b/libavformat/mpegts.c index a5a8ce2a67..2bc66ac685 100644 --- a/libavformat/mpegts.c +++ b/libavformat/mpegts.c @@ -625,6 +625,12 @@ static void new_pes_packet(PESContext *pes, AVPacket *pkt) pkt->destruct = av_destruct_packet; pkt->data = pes->buffer; pkt->size = pes->data_index; + + if(pes->total_size != MAX_PES_PAYLOAD && + pes->pes_header_size + pes->data_index != pes->total_size + 6) { + av_log(pes->ts, AV_LOG_WARNING, "PES packet size mismatch\n"); + pes->flags |= AV_PKT_FLAG_CORRUPT; + } memset(pkt->data+pkt->size, 0, FF_INPUT_BUFFER_PADDING_SIZE); // Separate out the AC3 substream from an HDMV combined TrueHD/AC3 PID @@ -811,7 +817,6 @@ static int mpegts_push_data(MpegTSFilter *filter, // pes packet size is < ts size packet and pes data is padded with 0xff // not sure if this is legal in ts but see issue #2392 buf_size = pes->total_size; - pes->flags |= AV_PKT_FLAG_CORRUPT; } memcpy(pes->buffer+pes->data_index, p, buf_size); pes->data_index += buf_size; @@ -1288,7 +1293,7 @@ static int handle_packet(MpegTSContext *ts, const uint8_t *packet) tss->last_cc = cc; if (!cc_ok) { - av_log(NULL, AV_LOG_WARNING, "Continuity Check Failed\n"); + av_log(ts, AV_LOG_WARNING, "Continuity Check Failed\n"); if(tss->type == MPEGTS_PES) { PESContext *pc = tss->u.pes_filter.opaque; pc->flags |= AV_PKT_FLAG_CORRUPT; From 73e8e8dbf969b9a0bc1591abcfeba474a42e47bc Mon Sep 17 00:00:00 2001 From: Zohar Kelrich Date: Sun, 24 Jul 2011 17:28:33 +0300 Subject: [PATCH 12/16] lavf: Add an option to discard corrupted frames Signed-off-by: Zohar Kelrich Signed-off-by: Luca Barbato --- libavformat/avformat.h | 1 + libavformat/options.c | 1 + libavformat/utils.c | 9 +++++++++ 3 files changed, 11 insertions(+) diff --git a/libavformat/avformat.h b/libavformat/avformat.h index cfdbd11b36..2f68abab25 100644 --- a/libavformat/avformat.h +++ b/libavformat/avformat.h @@ -750,6 +750,7 @@ typedef struct AVFormatContext { #define AVFMT_FLAG_RTP_HINT 0x0040 ///< Deprecated, use the -movflags rtphint muxer specific AVOption instead #endif #define AVFMT_FLAG_CUSTOM_IO 0x0080 ///< The caller has supplied a custom AVIOContext, don't avio_close() it. +#define AVFMT_FLAG_DISCARD_CORRUPT 0x0100 ///< Discard frames marked corrupted #if FF_API_LOOP_INPUT /** diff --git a/libavformat/options.c b/libavformat/options.c index c2729b75d9..961162876f 100644 --- a/libavformat/options.c +++ b/libavformat/options.c @@ -79,6 +79,7 @@ static const AVOption options[]={ #if FF_API_FLAG_RTP_HINT {"rtphint", "add rtp hinting (deprecated, use the -movflags rtphint option instead)", 0, FF_OPT_TYPE_CONST, {.dbl = AVFMT_FLAG_RTP_HINT }, INT_MIN, INT_MAX, E, "fflags"}, #endif +{"discardcorrupt", "discard corrupted frames", 0, FF_OPT_TYPE_CONST, {.dbl = AVFMT_FLAG_DISCARD_CORRUPT }, INT_MIN, INT_MAX, D, "fflags"}, {"analyzeduration", "how many microseconds are analyzed to estimate duration", OFFSET(max_analyze_duration), FF_OPT_TYPE_INT, {.dbl = 5*AV_TIME_BASE }, 0, INT_MAX, D}, {"cryptokey", "decryption key", OFFSET(key), FF_OPT_TYPE_BINARY, {.dbl = 0}, 0, 0, D}, {"indexmem", "max memory used for timestamp index (per stream)", OFFSET(max_index_size), FF_OPT_TYPE_INT, {.dbl = 1<<20 }, 0, INT_MAX, D}, diff --git a/libavformat/utils.c b/libavformat/utils.c index c4d7623c8c..b848ebb827 100644 --- a/libavformat/utils.c +++ b/libavformat/utils.c @@ -719,6 +719,15 @@ int av_read_packet(AVFormatContext *s, AVPacket *pkt) s->streams[i]->probe_packets = 0; continue; } + + if ((s->flags & AVFMT_FLAG_DISCARD_CORRUPT) && + (pkt->flags & AV_PKT_FLAG_CORRUPT)) { + av_log(s, AV_LOG_WARNING, + "Dropped corrupted packet (stream = %d)\n", + pkt->stream_index); + continue; + } + st= s->streams[pkt->stream_index]; switch(st->codec->codec_type){ From 6f7fe4723b9bfbb52341568906e6168966f486b3 Mon Sep 17 00:00:00 2001 From: Kostya Shishkov Date: Tue, 2 Aug 2011 13:45:28 +0200 Subject: [PATCH 13/16] Correct chroma vector calculation for RealVideo 3. Old version divided it wrong, which resulted in chroma drift (visible on FATE sample too as dirty trails left by clouds). Signed-off-by: Ronald S. Bultje --- libavcodec/rv34.c | 4 ++-- tests/ref/fate/rv30 | 56 ++++++++++++++++++++++----------------------- 2 files changed, 30 insertions(+), 30 deletions(-) diff --git a/libavcodec/rv34.c b/libavcodec/rv34.c index 9162da5194..1aec571ad3 100644 --- a/libavcodec/rv34.c +++ b/libavcodec/rv34.c @@ -737,8 +737,8 @@ static inline void rv34_mc(RV34DecContext *r, const int block_type, my = (s->current_picture_ptr->f.motion_val[dir][mv_pos][1] + (3 << 24)) / 3 - (1 << 24); lx = (s->current_picture_ptr->f.motion_val[dir][mv_pos][0] + (3 << 24)) % 3; ly = (s->current_picture_ptr->f.motion_val[dir][mv_pos][1] + (3 << 24)) % 3; - chroma_mx = (s->current_picture_ptr->f.motion_val[dir][mv_pos][0] + 1) >> 1; - chroma_my = (s->current_picture_ptr->f.motion_val[dir][mv_pos][1] + 1) >> 1; + chroma_mx = s->current_picture_ptr->f.motion_val[dir][mv_pos][0] / 2; + chroma_my = s->current_picture_ptr->f.motion_val[dir][mv_pos][1] / 2; umx = (chroma_mx + (3 << 24)) / 3 - (1 << 24); umy = (chroma_my + (3 << 24)) / 3 - (1 << 24); uvmx = chroma_coeffs[(chroma_mx + (3 << 24)) % 3]; diff --git a/tests/ref/fate/rv30 b/tests/ref/fate/rv30 index 5b43588bb4..6c99871eb6 100644 --- a/tests/ref/fate/rv30 +++ b/tests/ref/fate/rv30 @@ -16,31 +16,31 @@ 0, 112500, 126720, 0xe572dfc9 0, 120000, 126720, 0xbc3cc34f 0, 127500, 126720, 0xcf8cb0e2 -0, 135000, 126720, 0x6d1c630d -0, 142500, 126720, 0x4338e469 -0, 150000, 126720, 0x9d82ea38 -0, 157500, 126720, 0x55e0b559 -0, 165000, 126720, 0x5eefb5ef -0, 172500, 126720, 0x4b10b746 -0, 180000, 126720, 0x8b07a1db -0, 187500, 126720, 0x8c639b34 -0, 195000, 126720, 0x63eb0b9f -0, 202500, 126720, 0x31c80c83 -0, 210000, 126720, 0x78495352 -0, 217500, 126720, 0x63d609c4 -0, 225000, 126720, 0xcd2a62d8 -0, 232500, 126720, 0x4aea732d -0, 240000, 126720, 0xe3bb352c -0, 247500, 126720, 0x4b9036ad -0, 255000, 126720, 0x88b66e2d -0, 262500, 126720, 0x4a8a1b16 -0, 270000, 126720, 0x2e014eac -0, 277500, 126720, 0x83212c67 -0, 285000, 126720, 0x4937e897 -0, 292500, 126720, 0x2d38babe -0, 300000, 126720, 0xbcb43c09 -0, 307500, 126720, 0x955ffaf4 -0, 315000, 126720, 0x3337d4a2 -0, 322500, 126720, 0xe8f58c33 -0, 330000, 126720, 0x3a7f771f -0, 337500, 126720, 0xb67c39b9 +0, 135000, 126720, 0x75ae61b6 +0, 142500, 126720, 0x554fe3e4 +0, 150000, 126720, 0x72ecea95 +0, 157500, 126720, 0x5d00b5fe +0, 165000, 126720, 0xe39bba0d +0, 172500, 126720, 0x9c21bad8 +0, 180000, 126720, 0x72f2a47d +0, 187500, 126720, 0x4f639ebe +0, 195000, 126720, 0x534a10cc +0, 202500, 126720, 0xfdca11d3 +0, 210000, 126720, 0x0c735615 +0, 217500, 126720, 0x0eaf0c1b +0, 225000, 126720, 0xce5e6794 +0, 232500, 126720, 0x14cf7974 +0, 240000, 126720, 0xbc513f2a +0, 247500, 126720, 0xbc303fae +0, 255000, 126720, 0xd9f67585 +0, 262500, 126720, 0x3378251f +0, 270000, 126720, 0xb3ed5911 +0, 277500, 126720, 0xc15a3577 +0, 285000, 126720, 0x0a24f256 +0, 292500, 126720, 0xfab9c45d +0, 300000, 126720, 0x45464610 +0, 307500, 126720, 0xfe2e057d +0, 315000, 126720, 0x23efdc35 +0, 322500, 126720, 0x4d888b2e +0, 330000, 126720, 0xdd0d74df +0, 337500, 126720, 0x08382b8e From 18b131de0473a3110c63966cd7c6cd2ab118d401 Mon Sep 17 00:00:00 2001 From: Vitor Sessak Date: Sat, 30 Jul 2011 18:39:25 +0200 Subject: [PATCH 14/16] dct32: Add SSE2 ASM optimizations Signed-off-by: Ronald S. Bultje --- libavcodec/x86/dct32_sse.asm | 39 +++++++++++++++++++++++++++--------- libavcodec/x86/fft.c | 2 ++ libavcodec/x86/fft.h | 1 + 3 files changed, 33 insertions(+), 9 deletions(-) diff --git a/libavcodec/x86/dct32_sse.asm b/libavcodec/x86/dct32_sse.asm index 46daa43d8c..720a061078 100644 --- a/libavcodec/x86/dct32_sse.asm +++ b/libavcodec/x86/dct32_sse.asm @@ -63,6 +63,13 @@ ps_p1p1m1m1: dd 0, 0, 0x80000000, 0x80000000, 0, 0, 0x80000000, 0x80000000 mulps %1, %3 %endmacro +%macro BUTTERFLY0_SSE2 5 + pshufd %4, %1, %5 + xorps %1, %2 + addps %1, %4 + mulps %1, %3 +%endmacro + %macro BUTTERFLY0_AVX 5 vshufps %4, %1, %1, %5 vxorps %1, %1, %2 @@ -405,18 +412,17 @@ INIT_XMM INIT_XMM +%macro DCT32_FUNC 1 ; void ff_dct32_float_sse(FFTSample *out, const FFTSample *in) -cglobal dct32_float_sse, 2,3,16, out, in, tmp +cglobal dct32_float_%1, 2,3,16, out, in, tmp ; pass 1 movaps m0, [inq+0] - movaps m1, [inq+112] - shufps m1, m1, 0x1b + LOAD_INV m1, [inq+112] BUTTERFLY m0, m1, [ps_cos_vec], m3 movaps m7, [inq+64] - movaps m4, [inq+48] - shufps m4, m4, 0x1b + LOAD_INV m4, [inq+48] BUTTERFLY m7, m4, [ps_cos_vec+32], m3 ; pass 2 @@ -427,13 +433,11 @@ cglobal dct32_float_sse, 2,3,16, out, in, tmp ; pass 1 movaps m1, [inq+16] - movaps m6, [inq+96] - shufps m6, m6, 0x1b + LOAD_INV m6, [inq+96] BUTTERFLY m1, m6, [ps_cos_vec+16], m3 movaps m4, [inq+80] - movaps m5, [inq+32] - shufps m5, m5, 0x1b + LOAD_INV m5, [inq+32] BUTTERFLY m4, m5, [ps_cos_vec+48], m3 ; pass 2 @@ -492,3 +496,20 @@ cglobal dct32_float_sse, 2,3,16, out, in, tmp PASS5 PASS6 RET +%endmacro + +%macro LOAD_INV_SSE 2 + movaps %1, %2 + shufps %1, %1, 0x1b +%endmacro + +%define LOAD_INV LOAD_INV_SSE +DCT32_FUNC sse + +%macro LOAD_INV_SSE2 2 + pshufd %1, %2, 0x1b +%endmacro + +%define LOAD_INV LOAD_INV_SSE2 +%define BUTTERFLY0 BUTTERFLY0_SSE2 +DCT32_FUNC sse2 diff --git a/libavcodec/x86/fft.c b/libavcodec/x86/fft.c index 899f0f7ad5..f7308cca32 100644 --- a/libavcodec/x86/fft.c +++ b/libavcodec/x86/fft.c @@ -60,6 +60,8 @@ av_cold void ff_dct_init_mmx(DCTContext *s) int has_vectors = av_get_cpu_flags(); if (has_vectors & AV_CPU_FLAG_AVX && HAVE_AVX) s->dct32 = ff_dct32_float_avx; + else if (has_vectors & AV_CPU_FLAG_SSE2 && HAVE_SSE) + s->dct32 = ff_dct32_float_sse2; else if (has_vectors & AV_CPU_FLAG_SSE && HAVE_SSE) s->dct32 = ff_dct32_float_sse; #endif diff --git a/libavcodec/x86/fft.h b/libavcodec/x86/fft.h index 0ade2b2e7b..9d68d5b219 100644 --- a/libavcodec/x86/fft.h +++ b/libavcodec/x86/fft.h @@ -35,6 +35,7 @@ void ff_imdct_calc_sse(FFTContext *s, FFTSample *output, const FFTSample *input) void ff_imdct_half_sse(FFTContext *s, FFTSample *output, const FFTSample *input); void ff_imdct_half_avx(FFTContext *s, FFTSample *output, const FFTSample *input); void ff_dct32_float_sse(FFTSample *out, const FFTSample *in); +void ff_dct32_float_sse2(FFTSample *out, const FFTSample *in); void ff_dct32_float_avx(FFTSample *out, const FFTSample *in); #endif /* AVCODEC_X86_FFT_H */ From ac0fb5934893be554a44d2a1eb7a3bc7bf39da4a Mon Sep 17 00:00:00 2001 From: "Ronald S. Bultje" Date: Mon, 1 Aug 2011 21:04:19 -0700 Subject: [PATCH 15/16] swscale: use 15-bit intermediates for 9/10-bit scaling. --- libswscale/swscale.c | 163 ++++++++++++++++++++++++---------- libswscale/swscale_internal.h | 75 ++++++++++------ libswscale/utils.c | 12 ++- tests/ref/lavfi/pixdesc | 4 +- tests/ref/lavfi/pixfmts_copy | 4 +- tests/ref/lavfi/pixfmts_null | 4 +- tests/ref/lavfi/pixfmts_scale | 12 +-- tests/ref/lavfi/pixfmts_vflip | 4 +- 8 files changed, 190 insertions(+), 88 deletions(-) diff --git a/libswscale/swscale.c b/libswscale/swscale.c index dd9f4a108f..6fc65a8b2a 100644 --- a/libswscale/swscale.c +++ b/libswscale/swscale.c @@ -211,17 +211,9 @@ yuv2yuvX16_c_template(const int16_t *lumFilter, const int32_t **lumSrc, #define output_pixel(pos, val) \ if (big_endian) { \ - if (output_bits == 16) { \ - AV_WB16(pos, av_clip_uint16(val >> shift)); \ - } else { \ - AV_WB16(pos, av_clip_uintp2(val >> shift, output_bits)); \ - } \ + AV_WB16(pos, av_clip_uint16(val >> shift)); \ } else { \ - if (output_bits == 16) { \ - AV_WL16(pos, av_clip_uint16(val >> shift)); \ - } else { \ - AV_WL16(pos, av_clip_uintp2(val >> shift, output_bits)); \ - } \ + AV_WL16(pos, av_clip_uint16(val >> shift)); \ } for (i = 0; i < dstW; i++) { int val = 1 << (30-output_bits - 1); @@ -263,7 +255,67 @@ yuv2yuvX16_c_template(const int16_t *lumFilter, const int32_t **lumSrc, #undef output_pixel } -#define yuv2NBPS(bits, BE_LE, is_be) \ +static av_always_inline void +yuv2yuvX10_c_template(const int16_t *lumFilter, const int16_t **lumSrc, + int lumFilterSize, const int16_t *chrFilter, + const int16_t **chrUSrc, const int16_t **chrVSrc, + int chrFilterSize, const int16_t **alpSrc, + uint16_t *dest[4], int dstW, int chrDstW, + int big_endian, int output_bits) +{ + //FIXME Optimize (just quickly written not optimized..) + int i; + uint16_t *yDest = dest[0], *uDest = dest[1], *vDest = dest[2], + *aDest = CONFIG_SWSCALE_ALPHA ? dest[3] : NULL; + int shift = 15 + 16 - output_bits - 1; + +#define output_pixel(pos, val) \ + if (big_endian) { \ + AV_WB16(pos, av_clip_uintp2(val >> shift, output_bits)); \ + } else { \ + AV_WL16(pos, av_clip_uintp2(val >> shift, output_bits)); \ + } + for (i = 0; i < dstW; i++) { + int val = 1 << (30-output_bits - 1); + int j; + + for (j = 0; j < lumFilterSize; j++) + val += (lumSrc[j][i] * lumFilter[j]) >> 1; + + output_pixel(&yDest[i], val); + } + + if (uDest) { + for (i = 0; i < chrDstW; i++) { + int u = 1 << (30-output_bits - 1); + int v = 1 << (30-output_bits - 1); + int j; + + for (j = 0; j < chrFilterSize; j++) { + u += (chrUSrc[j][i] * chrFilter[j]) >> 1; + v += (chrVSrc[j][i] * chrFilter[j]) >> 1; + } + + output_pixel(&uDest[i], u); + output_pixel(&vDest[i], v); + } + } + + if (CONFIG_SWSCALE_ALPHA && aDest) { + for (i = 0; i < dstW; i++) { + int val = 1 << (30-output_bits - 1); + int j; + + for (j = 0; j < lumFilterSize; j++) + val += (alpSrc[j][i] * lumFilter[j]) >> 1; + + output_pixel(&aDest[i], val); + } + } +#undef output_pixel +} + +#define yuv2NBPS(bits, BE_LE, is_be, yuv2yuvX_template_fn, typeX_t) \ static void yuv2yuvX ## bits ## BE_LE ## _c(SwsContext *c, const int16_t *lumFilter, \ const int16_t **_lumSrc, int lumFilterSize, \ const int16_t *chrFilter, const int16_t **_chrUSrc, \ @@ -271,21 +323,21 @@ static void yuv2yuvX ## bits ## BE_LE ## _c(SwsContext *c, const int16_t *lumFil int chrFilterSize, const int16_t **_alpSrc, \ uint8_t *_dest[4], int dstW, int chrDstW) \ { \ - const int32_t **lumSrc = (const int32_t **) _lumSrc, \ - **chrUSrc = (const int32_t **) _chrUSrc, \ - **chrVSrc = (const int32_t **) _chrVSrc, \ - **alpSrc = (const int32_t **) _alpSrc; \ - yuv2yuvX16_c_template(lumFilter, lumSrc, lumFilterSize, \ - chrFilter, chrUSrc, chrVSrc, chrFilterSize, \ - alpSrc, (uint16_t **) _dest, \ - dstW, chrDstW, is_be, bits); \ -} -yuv2NBPS( 9, BE, 1); -yuv2NBPS( 9, LE, 0); -yuv2NBPS(10, BE, 1); -yuv2NBPS(10, LE, 0); -yuv2NBPS(16, BE, 1); -yuv2NBPS(16, LE, 0); + const typeX_t **lumSrc = (const typeX_t **) _lumSrc, \ + **chrUSrc = (const typeX_t **) _chrUSrc, \ + **chrVSrc = (const typeX_t **) _chrVSrc, \ + **alpSrc = (const typeX_t **) _alpSrc; \ + yuv2yuvX_template_fn(lumFilter, lumSrc, lumFilterSize, \ + chrFilter, chrUSrc, chrVSrc, chrFilterSize, \ + alpSrc, (uint16_t **) _dest, \ + dstW, chrDstW, is_be, bits); \ +} +yuv2NBPS( 9, BE, 1, yuv2yuvX10_c_template, int16_t); +yuv2NBPS( 9, LE, 0, yuv2yuvX10_c_template, int16_t); +yuv2NBPS(10, BE, 1, yuv2yuvX10_c_template, int16_t); +yuv2NBPS(10, LE, 0, yuv2yuvX10_c_template, int16_t); +yuv2NBPS(16, BE, 1, yuv2yuvX16_c_template, int32_t); +yuv2NBPS(16, LE, 0, yuv2yuvX16_c_template, int32_t); static void yuv2yuvX_c(SwsContext *c, const int16_t *lumFilter, const int16_t **lumSrc, int lumFilterSize, @@ -1880,6 +1932,27 @@ static void hScale16_c(SwsContext *c, int16_t *_dst, int dstW, const uint8_t *_s } } +static void hScale10_c(SwsContext *c, int16_t *dst, int dstW, const uint8_t *_src, + const int16_t *filter, + const int16_t *filterPos, int filterSize) +{ + int i; + const uint16_t *src = (const uint16_t *) _src; + int sh = av_pix_fmt_descriptors[c->srcFormat].comp[0].depth_minus1; + + for (i = 0; i < dstW; i++) { + int j; + int srcPos = filterPos[i]; + int val = 0; + + for (j = 0; j < filterSize; j++) { + val += src[srcPos + j] * filter[filterSize * i + j]; + } + // filter=14 bit, input=16 bit, output=30 bit, >> 11 makes 19 bit + dst[i] = FFMIN(val >> sh, (1 << 15) - 1); + } +} + // bilinear / bicubic scaling static void hScale_c(SwsContext *c, int16_t *dst, int dstW, const uint8_t *src, const int16_t *filter, const int16_t *filterPos, @@ -2025,7 +2098,7 @@ static av_always_inline void hyscale(SwsContext *c, int16_t *dst, int dstWidth, if (convertRange) convertRange(dst, dstWidth); - if (av_pix_fmt_descriptors[c->dstFormat].comp[0].depth_minus1 < 8 && c->scalingBpp == 16) { + if (av_pix_fmt_descriptors[c->dstFormat].comp[0].depth_minus1 < 10 && c->scalingBpp == 16) { c->scale19To15Fw(dst, (int32_t *) dst, dstWidth); } } @@ -2052,7 +2125,7 @@ static av_always_inline void hcscale(SwsContext *c, int16_t *dst1, int16_t *dst2 uint8_t *formatConvBuffer, uint32_t *pal) { if (c->chrToYV12) { - uint8_t *buf2 = formatConvBuffer + FFALIGN(srcW * c->scalingBpp >> 3, 16); + uint8_t *buf2 = formatConvBuffer + FFALIGN(srcW * FFALIGN(c->scalingBpp, 8) >> 3, 16); c->chrToYV12(formatConvBuffer, buf2, src1, src2, srcW, pal); src1= formatConvBuffer; src2= buf2; @@ -2076,7 +2149,7 @@ static av_always_inline void hcscale(SwsContext *c, int16_t *dst1, int16_t *dst2 if (c->chrConvertRange) c->chrConvertRange(dst1, dst2, dstWidth); - if (av_pix_fmt_descriptors[c->dstFormat].comp[0].depth_minus1 < 8 && c->scalingBpp == 16) { + if (av_pix_fmt_descriptors[c->dstFormat].comp[0].depth_minus1 < 10 && c->scalingBpp == 16) { c->scale19To15Fw(dst1, (int32_t *) dst1, dstWidth); c->scale19To15Fw(dst2, (int32_t *) dst2, dstWidth); } @@ -2735,27 +2808,27 @@ static av_cold void sws_init_swScale_c(SwsContext *c) } if (c->scalingBpp == 8) { - c->hScale = hScale_c; - if (c->flags & SWS_FAST_BILINEAR) { - c->hyscale_fast = hyscale_fast_c; - c->hcscale_fast = hcscale_fast_c; - } - - if (c->srcRange != c->dstRange && !isAnyRGB(c->dstFormat)) { - if (c->srcRange) { - c->lumConvertRange = lumRangeFromJpeg_c; - c->chrConvertRange = chrRangeFromJpeg_c; - } else { - c->lumConvertRange = lumRangeToJpeg_c; - c->chrConvertRange = chrRangeToJpeg_c; + c->hScale = hScale_c; + if (c->flags & SWS_FAST_BILINEAR) { + c->hyscale_fast = hyscale_fast_c; + c->hcscale_fast = hcscale_fast_c; } - } } else { - c->hScale = hScale16_c; + c->hScale = c->scalingBpp == 16 ? hScale16_c : hScale10_c; c->scale19To15Fw = scale19To15Fw_c; c->scale8To16Rv = scale8To16Rv_c; + } - if (c->srcRange != c->dstRange && !isAnyRGB(c->dstFormat)) { + if (c->srcRange != c->dstRange && !isAnyRGB(c->dstFormat)) { + if (c->scalingBpp <= 10) { + if (c->srcRange) { + c->lumConvertRange = lumRangeFromJpeg_c; + c->chrConvertRange = chrRangeFromJpeg_c; + } else { + c->lumConvertRange = lumRangeToJpeg_c; + c->chrConvertRange = chrRangeToJpeg_c; + } + } else { if (c->srcRange) { c->lumConvertRange = lumRangeFromJpeg16_c; c->chrConvertRange = chrRangeFromJpeg16_c; diff --git a/libswscale/swscale_internal.h b/libswscale/swscale_internal.h index 9492303301..60787aed91 100644 --- a/libswscale/swscale_internal.h +++ b/libswscale/swscale_internal.h @@ -64,11 +64,16 @@ typedef int (*SwsFunc)(struct SwsContext *context, const uint8_t* src[], * without any additional vertical scaling (or point-scaling). * * @param c SWS scaling context - * @param lumSrc scaled luma (Y) source data, 15bit for 8bit output - * @param chrUSrc scaled chroma (U) source data, 15bit for 8bit output - * @param chrVSrc scaled chroma (V) source data, 15bit for 8bit output - * @param alpSrc scaled alpha (A) source data, 15bit for 8bit output - * @param dest pointer to the 4 output planes (Y/U/V/A) + * @param lumSrc scaled luma (Y) source data, 15bit for 8-10bit output, + * 19-bit for 16bit output (in int32_t) + * @param chrUSrc scaled chroma (U) source data, 15bit for 8-10bit output, + * 19-bit for 16bit output (in int32_t) + * @param chrVSrc scaled chroma (V) source data, 15bit for 8-10bit output, + * 19-bit for 16bit output (in int32_t) + * @param alpSrc scaled alpha (A) source data, 15bit for 8-10bit output, + * 19-bit for 16bit output (in int32_t) + * @param dest pointer to the 4 output planes (Y/U/V/A). For >8bit + * output, this is in uint16_t * @param dstW width of dest[0], dest[3], lumSrc and alpSrc in pixels * @param chrDstW width of dest[1], dest[2], chrUSrc and chrVSrc */ @@ -82,14 +87,19 @@ typedef void (*yuv2planar1_fn) (struct SwsContext *c, * * @param c SWS scaling context * @param lumFilter vertical luma/alpha scaling coefficients, 12bit [0,4096] - * @param lumSrc scaled luma (Y) source data, 15bit for 8bit output + * @param lumSrc scaled luma (Y) source data, 15bit for 8-10bit output, + * 19-bit for 16bit output (in int32_t) * @param lumFilterSize number of vertical luma/alpha input lines to scale * @param chrFilter vertical chroma scaling coefficients, 12bit [0,4096] - * @param chrUSrc scaled chroma (U) source data, 15bit for 8bit output - * @param chrVSrc scaled chroma (V) source data, 15bit for 8bit output + * @param chrUSrc scaled chroma (U) source data, 15bit for 8-10bit output, + * 19-bit for 16bit output (in int32_t) + * @param chrVSrc scaled chroma (V) source data, 15bit for 8-10bit output, + * 19-bit for 16bit output (in int32_t) * @param chrFilterSize number of vertical chroma input lines to scale - * @param alpSrc scaled alpha (A) source data, 15bit for 8bit output - * @param dest pointer to the 4 output planes (Y/U/V/A) + * @param alpSrc scaled alpha (A) source data, 15bit for 8-10bit output, + * 19-bit for 16bit output (in int32_t) + * @param dest pointer to the 4 output planes (Y/U/V/A). For >8bit + * output, this is in uint16_t * @param dstW width of dest[0], dest[3], lumSrc and alpSrc in pixels * @param chrDstW width of dest[1], dest[2], chrUSrc and chrVSrc */ @@ -105,11 +115,16 @@ typedef void (*yuv2planarX_fn) (struct SwsContext *c, const int16_t *lumFilter, * that this function may do chroma scaling, see the "uvalpha" argument. * * @param c SWS scaling context - * @param lumSrc scaled luma (Y) source data, 15bit for 8bit output - * @param chrUSrc scaled chroma (U) source data, 15bit for 8bit output - * @param chrVSrc scaled chroma (V) source data, 15bit for 8bit output - * @param alpSrc scaled alpha (A) source data, 15bit for 8bit output - * @param dest pointer to the output plane + * @param lumSrc scaled luma (Y) source data, 15bit for 8-10bit output, + * 19-bit for 16bit output (in int32_t) + * @param chrUSrc scaled chroma (U) source data, 15bit for 8-10bit output, + * 19-bit for 16bit output (in int32_t) + * @param chrVSrc scaled chroma (V) source data, 15bit for 8-10bit output, + * 19-bit for 16bit output (in int32_t) + * @param alpSrc scaled alpha (A) source data, 15bit for 8-10bit output, + * 19-bit for 16bit output (in int32_t) + * @param dest pointer to the output plane. For 16bit output, this is + * uint16_t * @param dstW width of lumSrc and alpSrc in pixels, number of pixels * to write into dest[] * @param uvalpha chroma scaling coefficient for the second line of chroma @@ -132,11 +147,16 @@ typedef void (*yuv2packed1_fn) (struct SwsContext *c, const int16_t *lumSrc, * output by doing bilinear scaling between two input lines. * * @param c SWS scaling context - * @param lumSrc scaled luma (Y) source data, 15bit for 8bit output - * @param chrUSrc scaled chroma (U) source data, 15bit for 8bit output - * @param chrVSrc scaled chroma (V) source data, 15bit for 8bit output - * @param alpSrc scaled alpha (A) source data, 15bit for 8bit output - * @param dest pointer to the output plane + * @param lumSrc scaled luma (Y) source data, 15bit for 8-10bit output, + * 19-bit for 16bit output (in int32_t) + * @param chrUSrc scaled chroma (U) source data, 15bit for 8-10bit output, + * 19-bit for 16bit output (in int32_t) + * @param chrVSrc scaled chroma (V) source data, 15bit for 8-10bit output, + * 19-bit for 16bit output (in int32_t) + * @param alpSrc scaled alpha (A) source data, 15bit for 8-10bit output, + * 19-bit for 16bit output (in int32_t) + * @param dest pointer to the output plane. For 16bit output, this is + * uint16_t * @param dstW width of lumSrc and alpSrc in pixels, number of pixels * to write into dest[] * @param yalpha luma/alpha scaling coefficients for the second input line. @@ -160,14 +180,19 @@ typedef void (*yuv2packed2_fn) (struct SwsContext *c, const int16_t *lumSrc[2], * * @param c SWS scaling context * @param lumFilter vertical luma/alpha scaling coefficients, 12bit [0,4096] - * @param lumSrc scaled luma (Y) source data, 15bit for 8bit output + * @param lumSrc scaled luma (Y) source data, 15bit for 8-10bit output, + * 19-bit for 16bit output (in int32_t) * @param lumFilterSize number of vertical luma/alpha input lines to scale * @param chrFilter vertical chroma scaling coefficients, 12bit [0,4096] - * @param chrUSrc scaled chroma (U) source data, 15bit for 8bit output - * @param chrVSrc scaled chroma (V) source data, 15bit for 8bit output + * @param chrUSrc scaled chroma (U) source data, 15bit for 8-10bit output, + * 19-bit for 16bit output (in int32_t) + * @param chrVSrc scaled chroma (V) source data, 15bit for 8-10bit output, + * 19-bit for 16bit output (in int32_t) * @param chrFilterSize number of vertical chroma input lines to scale - * @param alpSrc scaled alpha (A) source data, 15bit for 8bit output - * @param dest pointer to the output plane + * @param alpSrc scaled alpha (A) source data, 15bit for 8-10bit output, + * 19-bit for 16bit output (in int32_t) + * @param dest pointer to the output plane. For 16bit output, this is + * uint16_t * @param dstW width of lumSrc and alpSrc in pixels, number of pixels * to write into dest[] * @param y vertical line number for this output. This does not need diff --git a/libswscale/utils.c b/libswscale/utils.c index c6abb6b446..18d1227c83 100644 --- a/libswscale/utils.c +++ b/libswscale/utils.c @@ -896,11 +896,15 @@ int sws_init_context(SwsContext *c, SwsFilter *srcFilter, SwsFilter *dstFilter) } } - c->scalingBpp = FFMAX(av_pix_fmt_descriptors[srcFormat].comp[0].depth_minus1, - av_pix_fmt_descriptors[dstFormat].comp[0].depth_minus1) >= 8 ? 16 : 8; + c->scalingBpp = 1 + FFMAX(av_pix_fmt_descriptors[srcFormat].comp[0].depth_minus1, + av_pix_fmt_descriptors[dstFormat].comp[0].depth_minus1); + if (c->scalingBpp <= 8) + c->scalingBpp = 8; if (c->scalingBpp == 16) dst_stride <<= 1; - FF_ALLOC_OR_GOTO(c, c->formatConvBuffer, FFALIGN(srcW, 16) * 2 * c->scalingBpp >> 3, fail); + FF_ALLOC_OR_GOTO(c, c->formatConvBuffer, + FFALIGN(srcW, 16) * 2 * FFALIGN(c->scalingBpp, 8) >> 3, + fail); if (HAVE_MMX2 && cpu_flags & AV_CPU_FLAG_MMX2 && c->scalingBpp == 8) { c->canMMX2BeUsed= (dstW >=srcW && (dstW&31)==0 && (srcW&15)==0) ? 1 : 0; if (!c->canMMX2BeUsed && dstW >=srcW && (srcW&15)==0 && (flags&SWS_FAST_BILINEAR)) { @@ -1055,7 +1059,7 @@ int sws_init_context(SwsContext *c, SwsFilter *srcFilter, SwsFilter *dstFilter) c->lumPixBuf[i] = c->lumPixBuf[i+c->vLumBufSize]; } // 64 / c->scalingBpp is the same as 16 / sizeof(scaling_intermediate) - c->uv_off_px = dst_stride_px + 64 / c->scalingBpp; + c->uv_off_px = dst_stride_px + 64 / (c->scalingBpp &~ 7); c->uv_off_byte = dst_stride + 16; for (i=0; ivChrBufSize; i++) { FF_ALLOC_OR_GOTO(c, c->chrUPixBuf[i+c->vChrBufSize], dst_stride*2+32, fail); diff --git a/tests/ref/lavfi/pixdesc b/tests/ref/lavfi/pixdesc index 3730988777..0da94f861e 100644 --- a/tests/ref/lavfi/pixdesc +++ b/tests/ref/lavfi/pixdesc @@ -38,8 +38,8 @@ yuv420p16le 2d59c4f1d0314a5a957a7cfc4b6fabcc yuv420p9be ce880fa07830e5297c22acf6e20555ce yuv420p9le 16543fda8f87d94a6cf857d2e8d4461a yuv422p c9bba4529821d796a6ab09f6a5fd355a -yuv422p10be 107c6e31a3d4d598bca1d8426aaa54f5 -yuv422p10le 3f478be644add24b6cc77e718a6e2afa +yuv422p10be a4a83d0811280eff7405d94a7de21596 +yuv422p10le 23717b6c73a59912c605f27877ae2fb6 yuv422p16be dc9886f2fccf87cc54b27e071a2c251e yuv422p16le f181c8d8436f1233ba566d9bc88005ec yuv440p 5a064afe2b453bb52cdb3f176b1aa1cf diff --git a/tests/ref/lavfi/pixfmts_copy b/tests/ref/lavfi/pixfmts_copy index 3730988777..0da94f861e 100644 --- a/tests/ref/lavfi/pixfmts_copy +++ b/tests/ref/lavfi/pixfmts_copy @@ -38,8 +38,8 @@ yuv420p16le 2d59c4f1d0314a5a957a7cfc4b6fabcc yuv420p9be ce880fa07830e5297c22acf6e20555ce yuv420p9le 16543fda8f87d94a6cf857d2e8d4461a yuv422p c9bba4529821d796a6ab09f6a5fd355a -yuv422p10be 107c6e31a3d4d598bca1d8426aaa54f5 -yuv422p10le 3f478be644add24b6cc77e718a6e2afa +yuv422p10be a4a83d0811280eff7405d94a7de21596 +yuv422p10le 23717b6c73a59912c605f27877ae2fb6 yuv422p16be dc9886f2fccf87cc54b27e071a2c251e yuv422p16le f181c8d8436f1233ba566d9bc88005ec yuv440p 5a064afe2b453bb52cdb3f176b1aa1cf diff --git a/tests/ref/lavfi/pixfmts_null b/tests/ref/lavfi/pixfmts_null index 3730988777..0da94f861e 100644 --- a/tests/ref/lavfi/pixfmts_null +++ b/tests/ref/lavfi/pixfmts_null @@ -38,8 +38,8 @@ yuv420p16le 2d59c4f1d0314a5a957a7cfc4b6fabcc yuv420p9be ce880fa07830e5297c22acf6e20555ce yuv420p9le 16543fda8f87d94a6cf857d2e8d4461a yuv422p c9bba4529821d796a6ab09f6a5fd355a -yuv422p10be 107c6e31a3d4d598bca1d8426aaa54f5 -yuv422p10le 3f478be644add24b6cc77e718a6e2afa +yuv422p10be a4a83d0811280eff7405d94a7de21596 +yuv422p10le 23717b6c73a59912c605f27877ae2fb6 yuv422p16be dc9886f2fccf87cc54b27e071a2c251e yuv422p16le f181c8d8436f1233ba566d9bc88005ec yuv440p 5a064afe2b453bb52cdb3f176b1aa1cf diff --git a/tests/ref/lavfi/pixfmts_scale b/tests/ref/lavfi/pixfmts_scale index 392b9ce601..d68cdd41d0 100644 --- a/tests/ref/lavfi/pixfmts_scale +++ b/tests/ref/lavfi/pixfmts_scale @@ -31,15 +31,15 @@ uyvy422 314bd486277111a95d9369b944fa0400 yuv410p 7df8f6d69b56a8dcb6c7ee908e5018b5 yuv411p 1143e7c5cc28fe0922b051b17733bc4c yuv420p fdad2d8df8985e3d17e73c71f713cb14 -yuv420p10be af5429f27b9f95bf955e795921c65cdc -yuv420p10le d0b47e6a8a44e6b5ca0fe4349a4e393b +yuv420p10be 04663d400e44692fe8a622a067f838da +yuv420p10le 6171850f66df7a727b4bed1c87ef9188 yuv420p16be 9688e33e03b8c8275ab2fb1df0f06bee yuv420p16le cba8b390ad5e7b8678e419b8ce79c008 -yuv420p9be a073b2d93b2a7dce2069ba252bc43175 -yuv420p9le b67233c3c7d93763d07d88f697c145e1 +yuv420p9be ab163dfef03c4d563aca99b24276b9fd +yuv420p9le cd56c5a76ce74e504dd59d25a5e4389c yuv422p 918e37701ee7377d16a8a6c119c56a40 -yuv422p10be 533fd21e7943c20a1026b19069b3b867 -yuv422p10le 59b20a4a8609f5da2dc54c78aea11e6c +yuv422p10be e8e80fed7121f3afac994f2afac42cd2 +yuv422p10le 370866666f4889ee0928345b16d68fb4 yuv422p16be 2cf502d7d386db1f1b3b946679d897b1 yuv422p16le 3002a4e47520731dcee5929aff49eb74 yuv440p 461503fdb9b90451020aa3b25ddf041c diff --git a/tests/ref/lavfi/pixfmts_vflip b/tests/ref/lavfi/pixfmts_vflip index 2b62518a28..a2b11690ab 100644 --- a/tests/ref/lavfi/pixfmts_vflip +++ b/tests/ref/lavfi/pixfmts_vflip @@ -38,8 +38,8 @@ yuv420p16le 0f609e588e5a258644ef85170d70e030 yuv420p9be be40ec975fb2873891643cbbbddbc3b0 yuv420p9le 7e606310d3f5ff12badf911e8f333471 yuv422p d7f5cb44d9b0210d66d6a8762640ab34 -yuv422p10be a28b051168af49435c04af5f58dce47b -yuv422p10le 35936ffff30df2697f47b9b8d2cb7dea +yuv422p10be 51d9ef13fe43ea9549b3792bfd449bf7 +yuv422p10le 4b286a243ee0e715b2961bc1b469e629 yuv422p16be 51d9aa4e78d121c226d919ce97976fe4 yuv422p16le 12965c54bda8932ca72da194419a9908 yuv440p 876385e96165acf51271b20e5d85a416 From 62ee0e6a977e1990c9853630c7dea1415b38bb28 Mon Sep 17 00:00:00 2001 From: "Ronald S. Bultje" Date: Tue, 2 Aug 2011 12:27:43 -0700 Subject: [PATCH 16/16] Revert "swscale: use 15-bit intermediates for 9/10-bit scaling." This reverts commit ac0fb5934893be554a44d2a1eb7a3bc7bf39da4a. It causes valgrind errors which I'll want to investigate before resubmitting this. --- libswscale/swscale.c | 163 ++++++++++------------------------ libswscale/swscale_internal.h | 75 ++++++---------- libswscale/utils.c | 12 +-- tests/ref/lavfi/pixdesc | 4 +- tests/ref/lavfi/pixfmts_copy | 4 +- tests/ref/lavfi/pixfmts_null | 4 +- tests/ref/lavfi/pixfmts_scale | 12 +-- tests/ref/lavfi/pixfmts_vflip | 4 +- 8 files changed, 88 insertions(+), 190 deletions(-) diff --git a/libswscale/swscale.c b/libswscale/swscale.c index 6fc65a8b2a..dd9f4a108f 100644 --- a/libswscale/swscale.c +++ b/libswscale/swscale.c @@ -211,69 +211,17 @@ yuv2yuvX16_c_template(const int16_t *lumFilter, const int32_t **lumSrc, #define output_pixel(pos, val) \ if (big_endian) { \ - AV_WB16(pos, av_clip_uint16(val >> shift)); \ - } else { \ - AV_WL16(pos, av_clip_uint16(val >> shift)); \ - } - for (i = 0; i < dstW; i++) { - int val = 1 << (30-output_bits - 1); - int j; - - for (j = 0; j < lumFilterSize; j++) - val += (lumSrc[j][i] * lumFilter[j]) >> 1; - - output_pixel(&yDest[i], val); - } - - if (uDest) { - for (i = 0; i < chrDstW; i++) { - int u = 1 << (30-output_bits - 1); - int v = 1 << (30-output_bits - 1); - int j; - - for (j = 0; j < chrFilterSize; j++) { - u += (chrUSrc[j][i] * chrFilter[j]) >> 1; - v += (chrVSrc[j][i] * chrFilter[j]) >> 1; - } - - output_pixel(&uDest[i], u); - output_pixel(&vDest[i], v); - } - } - - if (CONFIG_SWSCALE_ALPHA && aDest) { - for (i = 0; i < dstW; i++) { - int val = 1 << (30-output_bits - 1); - int j; - - for (j = 0; j < lumFilterSize; j++) - val += (alpSrc[j][i] * lumFilter[j]) >> 1; - - output_pixel(&aDest[i], val); - } - } -#undef output_pixel -} - -static av_always_inline void -yuv2yuvX10_c_template(const int16_t *lumFilter, const int16_t **lumSrc, - int lumFilterSize, const int16_t *chrFilter, - const int16_t **chrUSrc, const int16_t **chrVSrc, - int chrFilterSize, const int16_t **alpSrc, - uint16_t *dest[4], int dstW, int chrDstW, - int big_endian, int output_bits) -{ - //FIXME Optimize (just quickly written not optimized..) - int i; - uint16_t *yDest = dest[0], *uDest = dest[1], *vDest = dest[2], - *aDest = CONFIG_SWSCALE_ALPHA ? dest[3] : NULL; - int shift = 15 + 16 - output_bits - 1; - -#define output_pixel(pos, val) \ - if (big_endian) { \ - AV_WB16(pos, av_clip_uintp2(val >> shift, output_bits)); \ + if (output_bits == 16) { \ + AV_WB16(pos, av_clip_uint16(val >> shift)); \ + } else { \ + AV_WB16(pos, av_clip_uintp2(val >> shift, output_bits)); \ + } \ } else { \ - AV_WL16(pos, av_clip_uintp2(val >> shift, output_bits)); \ + if (output_bits == 16) { \ + AV_WL16(pos, av_clip_uint16(val >> shift)); \ + } else { \ + AV_WL16(pos, av_clip_uintp2(val >> shift, output_bits)); \ + } \ } for (i = 0; i < dstW; i++) { int val = 1 << (30-output_bits - 1); @@ -315,7 +263,7 @@ yuv2yuvX10_c_template(const int16_t *lumFilter, const int16_t **lumSrc, #undef output_pixel } -#define yuv2NBPS(bits, BE_LE, is_be, yuv2yuvX_template_fn, typeX_t) \ +#define yuv2NBPS(bits, BE_LE, is_be) \ static void yuv2yuvX ## bits ## BE_LE ## _c(SwsContext *c, const int16_t *lumFilter, \ const int16_t **_lumSrc, int lumFilterSize, \ const int16_t *chrFilter, const int16_t **_chrUSrc, \ @@ -323,21 +271,21 @@ static void yuv2yuvX ## bits ## BE_LE ## _c(SwsContext *c, const int16_t *lumFil int chrFilterSize, const int16_t **_alpSrc, \ uint8_t *_dest[4], int dstW, int chrDstW) \ { \ - const typeX_t **lumSrc = (const typeX_t **) _lumSrc, \ - **chrUSrc = (const typeX_t **) _chrUSrc, \ - **chrVSrc = (const typeX_t **) _chrVSrc, \ - **alpSrc = (const typeX_t **) _alpSrc; \ - yuv2yuvX_template_fn(lumFilter, lumSrc, lumFilterSize, \ - chrFilter, chrUSrc, chrVSrc, chrFilterSize, \ - alpSrc, (uint16_t **) _dest, \ - dstW, chrDstW, is_be, bits); \ -} -yuv2NBPS( 9, BE, 1, yuv2yuvX10_c_template, int16_t); -yuv2NBPS( 9, LE, 0, yuv2yuvX10_c_template, int16_t); -yuv2NBPS(10, BE, 1, yuv2yuvX10_c_template, int16_t); -yuv2NBPS(10, LE, 0, yuv2yuvX10_c_template, int16_t); -yuv2NBPS(16, BE, 1, yuv2yuvX16_c_template, int32_t); -yuv2NBPS(16, LE, 0, yuv2yuvX16_c_template, int32_t); + const int32_t **lumSrc = (const int32_t **) _lumSrc, \ + **chrUSrc = (const int32_t **) _chrUSrc, \ + **chrVSrc = (const int32_t **) _chrVSrc, \ + **alpSrc = (const int32_t **) _alpSrc; \ + yuv2yuvX16_c_template(lumFilter, lumSrc, lumFilterSize, \ + chrFilter, chrUSrc, chrVSrc, chrFilterSize, \ + alpSrc, (uint16_t **) _dest, \ + dstW, chrDstW, is_be, bits); \ +} +yuv2NBPS( 9, BE, 1); +yuv2NBPS( 9, LE, 0); +yuv2NBPS(10, BE, 1); +yuv2NBPS(10, LE, 0); +yuv2NBPS(16, BE, 1); +yuv2NBPS(16, LE, 0); static void yuv2yuvX_c(SwsContext *c, const int16_t *lumFilter, const int16_t **lumSrc, int lumFilterSize, @@ -1932,27 +1880,6 @@ static void hScale16_c(SwsContext *c, int16_t *_dst, int dstW, const uint8_t *_s } } -static void hScale10_c(SwsContext *c, int16_t *dst, int dstW, const uint8_t *_src, - const int16_t *filter, - const int16_t *filterPos, int filterSize) -{ - int i; - const uint16_t *src = (const uint16_t *) _src; - int sh = av_pix_fmt_descriptors[c->srcFormat].comp[0].depth_minus1; - - for (i = 0; i < dstW; i++) { - int j; - int srcPos = filterPos[i]; - int val = 0; - - for (j = 0; j < filterSize; j++) { - val += src[srcPos + j] * filter[filterSize * i + j]; - } - // filter=14 bit, input=16 bit, output=30 bit, >> 11 makes 19 bit - dst[i] = FFMIN(val >> sh, (1 << 15) - 1); - } -} - // bilinear / bicubic scaling static void hScale_c(SwsContext *c, int16_t *dst, int dstW, const uint8_t *src, const int16_t *filter, const int16_t *filterPos, @@ -2098,7 +2025,7 @@ static av_always_inline void hyscale(SwsContext *c, int16_t *dst, int dstWidth, if (convertRange) convertRange(dst, dstWidth); - if (av_pix_fmt_descriptors[c->dstFormat].comp[0].depth_minus1 < 10 && c->scalingBpp == 16) { + if (av_pix_fmt_descriptors[c->dstFormat].comp[0].depth_minus1 < 8 && c->scalingBpp == 16) { c->scale19To15Fw(dst, (int32_t *) dst, dstWidth); } } @@ -2125,7 +2052,7 @@ static av_always_inline void hcscale(SwsContext *c, int16_t *dst1, int16_t *dst2 uint8_t *formatConvBuffer, uint32_t *pal) { if (c->chrToYV12) { - uint8_t *buf2 = formatConvBuffer + FFALIGN(srcW * FFALIGN(c->scalingBpp, 8) >> 3, 16); + uint8_t *buf2 = formatConvBuffer + FFALIGN(srcW * c->scalingBpp >> 3, 16); c->chrToYV12(formatConvBuffer, buf2, src1, src2, srcW, pal); src1= formatConvBuffer; src2= buf2; @@ -2149,7 +2076,7 @@ static av_always_inline void hcscale(SwsContext *c, int16_t *dst1, int16_t *dst2 if (c->chrConvertRange) c->chrConvertRange(dst1, dst2, dstWidth); - if (av_pix_fmt_descriptors[c->dstFormat].comp[0].depth_minus1 < 10 && c->scalingBpp == 16) { + if (av_pix_fmt_descriptors[c->dstFormat].comp[0].depth_minus1 < 8 && c->scalingBpp == 16) { c->scale19To15Fw(dst1, (int32_t *) dst1, dstWidth); c->scale19To15Fw(dst2, (int32_t *) dst2, dstWidth); } @@ -2808,27 +2735,27 @@ static av_cold void sws_init_swScale_c(SwsContext *c) } if (c->scalingBpp == 8) { - c->hScale = hScale_c; - if (c->flags & SWS_FAST_BILINEAR) { - c->hyscale_fast = hyscale_fast_c; - c->hcscale_fast = hcscale_fast_c; + c->hScale = hScale_c; + if (c->flags & SWS_FAST_BILINEAR) { + c->hyscale_fast = hyscale_fast_c; + c->hcscale_fast = hcscale_fast_c; + } + + if (c->srcRange != c->dstRange && !isAnyRGB(c->dstFormat)) { + if (c->srcRange) { + c->lumConvertRange = lumRangeFromJpeg_c; + c->chrConvertRange = chrRangeFromJpeg_c; + } else { + c->lumConvertRange = lumRangeToJpeg_c; + c->chrConvertRange = chrRangeToJpeg_c; } + } } else { - c->hScale = c->scalingBpp == 16 ? hScale16_c : hScale10_c; + c->hScale = hScale16_c; c->scale19To15Fw = scale19To15Fw_c; c->scale8To16Rv = scale8To16Rv_c; - } - if (c->srcRange != c->dstRange && !isAnyRGB(c->dstFormat)) { - if (c->scalingBpp <= 10) { - if (c->srcRange) { - c->lumConvertRange = lumRangeFromJpeg_c; - c->chrConvertRange = chrRangeFromJpeg_c; - } else { - c->lumConvertRange = lumRangeToJpeg_c; - c->chrConvertRange = chrRangeToJpeg_c; - } - } else { + if (c->srcRange != c->dstRange && !isAnyRGB(c->dstFormat)) { if (c->srcRange) { c->lumConvertRange = lumRangeFromJpeg16_c; c->chrConvertRange = chrRangeFromJpeg16_c; diff --git a/libswscale/swscale_internal.h b/libswscale/swscale_internal.h index 60787aed91..9492303301 100644 --- a/libswscale/swscale_internal.h +++ b/libswscale/swscale_internal.h @@ -64,16 +64,11 @@ typedef int (*SwsFunc)(struct SwsContext *context, const uint8_t* src[], * without any additional vertical scaling (or point-scaling). * * @param c SWS scaling context - * @param lumSrc scaled luma (Y) source data, 15bit for 8-10bit output, - * 19-bit for 16bit output (in int32_t) - * @param chrUSrc scaled chroma (U) source data, 15bit for 8-10bit output, - * 19-bit for 16bit output (in int32_t) - * @param chrVSrc scaled chroma (V) source data, 15bit for 8-10bit output, - * 19-bit for 16bit output (in int32_t) - * @param alpSrc scaled alpha (A) source data, 15bit for 8-10bit output, - * 19-bit for 16bit output (in int32_t) - * @param dest pointer to the 4 output planes (Y/U/V/A). For >8bit - * output, this is in uint16_t + * @param lumSrc scaled luma (Y) source data, 15bit for 8bit output + * @param chrUSrc scaled chroma (U) source data, 15bit for 8bit output + * @param chrVSrc scaled chroma (V) source data, 15bit for 8bit output + * @param alpSrc scaled alpha (A) source data, 15bit for 8bit output + * @param dest pointer to the 4 output planes (Y/U/V/A) * @param dstW width of dest[0], dest[3], lumSrc and alpSrc in pixels * @param chrDstW width of dest[1], dest[2], chrUSrc and chrVSrc */ @@ -87,19 +82,14 @@ typedef void (*yuv2planar1_fn) (struct SwsContext *c, * * @param c SWS scaling context * @param lumFilter vertical luma/alpha scaling coefficients, 12bit [0,4096] - * @param lumSrc scaled luma (Y) source data, 15bit for 8-10bit output, - * 19-bit for 16bit output (in int32_t) + * @param lumSrc scaled luma (Y) source data, 15bit for 8bit output * @param lumFilterSize number of vertical luma/alpha input lines to scale * @param chrFilter vertical chroma scaling coefficients, 12bit [0,4096] - * @param chrUSrc scaled chroma (U) source data, 15bit for 8-10bit output, - * 19-bit for 16bit output (in int32_t) - * @param chrVSrc scaled chroma (V) source data, 15bit for 8-10bit output, - * 19-bit for 16bit output (in int32_t) + * @param chrUSrc scaled chroma (U) source data, 15bit for 8bit output + * @param chrVSrc scaled chroma (V) source data, 15bit for 8bit output * @param chrFilterSize number of vertical chroma input lines to scale - * @param alpSrc scaled alpha (A) source data, 15bit for 8-10bit output, - * 19-bit for 16bit output (in int32_t) - * @param dest pointer to the 4 output planes (Y/U/V/A). For >8bit - * output, this is in uint16_t + * @param alpSrc scaled alpha (A) source data, 15bit for 8bit output + * @param dest pointer to the 4 output planes (Y/U/V/A) * @param dstW width of dest[0], dest[3], lumSrc and alpSrc in pixels * @param chrDstW width of dest[1], dest[2], chrUSrc and chrVSrc */ @@ -115,16 +105,11 @@ typedef void (*yuv2planarX_fn) (struct SwsContext *c, const int16_t *lumFilter, * that this function may do chroma scaling, see the "uvalpha" argument. * * @param c SWS scaling context - * @param lumSrc scaled luma (Y) source data, 15bit for 8-10bit output, - * 19-bit for 16bit output (in int32_t) - * @param chrUSrc scaled chroma (U) source data, 15bit for 8-10bit output, - * 19-bit for 16bit output (in int32_t) - * @param chrVSrc scaled chroma (V) source data, 15bit for 8-10bit output, - * 19-bit for 16bit output (in int32_t) - * @param alpSrc scaled alpha (A) source data, 15bit for 8-10bit output, - * 19-bit for 16bit output (in int32_t) - * @param dest pointer to the output plane. For 16bit output, this is - * uint16_t + * @param lumSrc scaled luma (Y) source data, 15bit for 8bit output + * @param chrUSrc scaled chroma (U) source data, 15bit for 8bit output + * @param chrVSrc scaled chroma (V) source data, 15bit for 8bit output + * @param alpSrc scaled alpha (A) source data, 15bit for 8bit output + * @param dest pointer to the output plane * @param dstW width of lumSrc and alpSrc in pixels, number of pixels * to write into dest[] * @param uvalpha chroma scaling coefficient for the second line of chroma @@ -147,16 +132,11 @@ typedef void (*yuv2packed1_fn) (struct SwsContext *c, const int16_t *lumSrc, * output by doing bilinear scaling between two input lines. * * @param c SWS scaling context - * @param lumSrc scaled luma (Y) source data, 15bit for 8-10bit output, - * 19-bit for 16bit output (in int32_t) - * @param chrUSrc scaled chroma (U) source data, 15bit for 8-10bit output, - * 19-bit for 16bit output (in int32_t) - * @param chrVSrc scaled chroma (V) source data, 15bit for 8-10bit output, - * 19-bit for 16bit output (in int32_t) - * @param alpSrc scaled alpha (A) source data, 15bit for 8-10bit output, - * 19-bit for 16bit output (in int32_t) - * @param dest pointer to the output plane. For 16bit output, this is - * uint16_t + * @param lumSrc scaled luma (Y) source data, 15bit for 8bit output + * @param chrUSrc scaled chroma (U) source data, 15bit for 8bit output + * @param chrVSrc scaled chroma (V) source data, 15bit for 8bit output + * @param alpSrc scaled alpha (A) source data, 15bit for 8bit output + * @param dest pointer to the output plane * @param dstW width of lumSrc and alpSrc in pixels, number of pixels * to write into dest[] * @param yalpha luma/alpha scaling coefficients for the second input line. @@ -180,19 +160,14 @@ typedef void (*yuv2packed2_fn) (struct SwsContext *c, const int16_t *lumSrc[2], * * @param c SWS scaling context * @param lumFilter vertical luma/alpha scaling coefficients, 12bit [0,4096] - * @param lumSrc scaled luma (Y) source data, 15bit for 8-10bit output, - * 19-bit for 16bit output (in int32_t) + * @param lumSrc scaled luma (Y) source data, 15bit for 8bit output * @param lumFilterSize number of vertical luma/alpha input lines to scale * @param chrFilter vertical chroma scaling coefficients, 12bit [0,4096] - * @param chrUSrc scaled chroma (U) source data, 15bit for 8-10bit output, - * 19-bit for 16bit output (in int32_t) - * @param chrVSrc scaled chroma (V) source data, 15bit for 8-10bit output, - * 19-bit for 16bit output (in int32_t) + * @param chrUSrc scaled chroma (U) source data, 15bit for 8bit output + * @param chrVSrc scaled chroma (V) source data, 15bit for 8bit output * @param chrFilterSize number of vertical chroma input lines to scale - * @param alpSrc scaled alpha (A) source data, 15bit for 8-10bit output, - * 19-bit for 16bit output (in int32_t) - * @param dest pointer to the output plane. For 16bit output, this is - * uint16_t + * @param alpSrc scaled alpha (A) source data, 15bit for 8bit output + * @param dest pointer to the output plane * @param dstW width of lumSrc and alpSrc in pixels, number of pixels * to write into dest[] * @param y vertical line number for this output. This does not need diff --git a/libswscale/utils.c b/libswscale/utils.c index 18d1227c83..c6abb6b446 100644 --- a/libswscale/utils.c +++ b/libswscale/utils.c @@ -896,15 +896,11 @@ int sws_init_context(SwsContext *c, SwsFilter *srcFilter, SwsFilter *dstFilter) } } - c->scalingBpp = 1 + FFMAX(av_pix_fmt_descriptors[srcFormat].comp[0].depth_minus1, - av_pix_fmt_descriptors[dstFormat].comp[0].depth_minus1); - if (c->scalingBpp <= 8) - c->scalingBpp = 8; + c->scalingBpp = FFMAX(av_pix_fmt_descriptors[srcFormat].comp[0].depth_minus1, + av_pix_fmt_descriptors[dstFormat].comp[0].depth_minus1) >= 8 ? 16 : 8; if (c->scalingBpp == 16) dst_stride <<= 1; - FF_ALLOC_OR_GOTO(c, c->formatConvBuffer, - FFALIGN(srcW, 16) * 2 * FFALIGN(c->scalingBpp, 8) >> 3, - fail); + FF_ALLOC_OR_GOTO(c, c->formatConvBuffer, FFALIGN(srcW, 16) * 2 * c->scalingBpp >> 3, fail); if (HAVE_MMX2 && cpu_flags & AV_CPU_FLAG_MMX2 && c->scalingBpp == 8) { c->canMMX2BeUsed= (dstW >=srcW && (dstW&31)==0 && (srcW&15)==0) ? 1 : 0; if (!c->canMMX2BeUsed && dstW >=srcW && (srcW&15)==0 && (flags&SWS_FAST_BILINEAR)) { @@ -1059,7 +1055,7 @@ int sws_init_context(SwsContext *c, SwsFilter *srcFilter, SwsFilter *dstFilter) c->lumPixBuf[i] = c->lumPixBuf[i+c->vLumBufSize]; } // 64 / c->scalingBpp is the same as 16 / sizeof(scaling_intermediate) - c->uv_off_px = dst_stride_px + 64 / (c->scalingBpp &~ 7); + c->uv_off_px = dst_stride_px + 64 / c->scalingBpp; c->uv_off_byte = dst_stride + 16; for (i=0; ivChrBufSize; i++) { FF_ALLOC_OR_GOTO(c, c->chrUPixBuf[i+c->vChrBufSize], dst_stride*2+32, fail); diff --git a/tests/ref/lavfi/pixdesc b/tests/ref/lavfi/pixdesc index 0da94f861e..3730988777 100644 --- a/tests/ref/lavfi/pixdesc +++ b/tests/ref/lavfi/pixdesc @@ -38,8 +38,8 @@ yuv420p16le 2d59c4f1d0314a5a957a7cfc4b6fabcc yuv420p9be ce880fa07830e5297c22acf6e20555ce yuv420p9le 16543fda8f87d94a6cf857d2e8d4461a yuv422p c9bba4529821d796a6ab09f6a5fd355a -yuv422p10be a4a83d0811280eff7405d94a7de21596 -yuv422p10le 23717b6c73a59912c605f27877ae2fb6 +yuv422p10be 107c6e31a3d4d598bca1d8426aaa54f5 +yuv422p10le 3f478be644add24b6cc77e718a6e2afa yuv422p16be dc9886f2fccf87cc54b27e071a2c251e yuv422p16le f181c8d8436f1233ba566d9bc88005ec yuv440p 5a064afe2b453bb52cdb3f176b1aa1cf diff --git a/tests/ref/lavfi/pixfmts_copy b/tests/ref/lavfi/pixfmts_copy index 0da94f861e..3730988777 100644 --- a/tests/ref/lavfi/pixfmts_copy +++ b/tests/ref/lavfi/pixfmts_copy @@ -38,8 +38,8 @@ yuv420p16le 2d59c4f1d0314a5a957a7cfc4b6fabcc yuv420p9be ce880fa07830e5297c22acf6e20555ce yuv420p9le 16543fda8f87d94a6cf857d2e8d4461a yuv422p c9bba4529821d796a6ab09f6a5fd355a -yuv422p10be a4a83d0811280eff7405d94a7de21596 -yuv422p10le 23717b6c73a59912c605f27877ae2fb6 +yuv422p10be 107c6e31a3d4d598bca1d8426aaa54f5 +yuv422p10le 3f478be644add24b6cc77e718a6e2afa yuv422p16be dc9886f2fccf87cc54b27e071a2c251e yuv422p16le f181c8d8436f1233ba566d9bc88005ec yuv440p 5a064afe2b453bb52cdb3f176b1aa1cf diff --git a/tests/ref/lavfi/pixfmts_null b/tests/ref/lavfi/pixfmts_null index 0da94f861e..3730988777 100644 --- a/tests/ref/lavfi/pixfmts_null +++ b/tests/ref/lavfi/pixfmts_null @@ -38,8 +38,8 @@ yuv420p16le 2d59c4f1d0314a5a957a7cfc4b6fabcc yuv420p9be ce880fa07830e5297c22acf6e20555ce yuv420p9le 16543fda8f87d94a6cf857d2e8d4461a yuv422p c9bba4529821d796a6ab09f6a5fd355a -yuv422p10be a4a83d0811280eff7405d94a7de21596 -yuv422p10le 23717b6c73a59912c605f27877ae2fb6 +yuv422p10be 107c6e31a3d4d598bca1d8426aaa54f5 +yuv422p10le 3f478be644add24b6cc77e718a6e2afa yuv422p16be dc9886f2fccf87cc54b27e071a2c251e yuv422p16le f181c8d8436f1233ba566d9bc88005ec yuv440p 5a064afe2b453bb52cdb3f176b1aa1cf diff --git a/tests/ref/lavfi/pixfmts_scale b/tests/ref/lavfi/pixfmts_scale index d68cdd41d0..392b9ce601 100644 --- a/tests/ref/lavfi/pixfmts_scale +++ b/tests/ref/lavfi/pixfmts_scale @@ -31,15 +31,15 @@ uyvy422 314bd486277111a95d9369b944fa0400 yuv410p 7df8f6d69b56a8dcb6c7ee908e5018b5 yuv411p 1143e7c5cc28fe0922b051b17733bc4c yuv420p fdad2d8df8985e3d17e73c71f713cb14 -yuv420p10be 04663d400e44692fe8a622a067f838da -yuv420p10le 6171850f66df7a727b4bed1c87ef9188 +yuv420p10be af5429f27b9f95bf955e795921c65cdc +yuv420p10le d0b47e6a8a44e6b5ca0fe4349a4e393b yuv420p16be 9688e33e03b8c8275ab2fb1df0f06bee yuv420p16le cba8b390ad5e7b8678e419b8ce79c008 -yuv420p9be ab163dfef03c4d563aca99b24276b9fd -yuv420p9le cd56c5a76ce74e504dd59d25a5e4389c +yuv420p9be a073b2d93b2a7dce2069ba252bc43175 +yuv420p9le b67233c3c7d93763d07d88f697c145e1 yuv422p 918e37701ee7377d16a8a6c119c56a40 -yuv422p10be e8e80fed7121f3afac994f2afac42cd2 -yuv422p10le 370866666f4889ee0928345b16d68fb4 +yuv422p10be 533fd21e7943c20a1026b19069b3b867 +yuv422p10le 59b20a4a8609f5da2dc54c78aea11e6c yuv422p16be 2cf502d7d386db1f1b3b946679d897b1 yuv422p16le 3002a4e47520731dcee5929aff49eb74 yuv440p 461503fdb9b90451020aa3b25ddf041c diff --git a/tests/ref/lavfi/pixfmts_vflip b/tests/ref/lavfi/pixfmts_vflip index a2b11690ab..2b62518a28 100644 --- a/tests/ref/lavfi/pixfmts_vflip +++ b/tests/ref/lavfi/pixfmts_vflip @@ -38,8 +38,8 @@ yuv420p16le 0f609e588e5a258644ef85170d70e030 yuv420p9be be40ec975fb2873891643cbbbddbc3b0 yuv420p9le 7e606310d3f5ff12badf911e8f333471 yuv422p d7f5cb44d9b0210d66d6a8762640ab34 -yuv422p10be 51d9ef13fe43ea9549b3792bfd449bf7 -yuv422p10le 4b286a243ee0e715b2961bc1b469e629 +yuv422p10be a28b051168af49435c04af5f58dce47b +yuv422p10le 35936ffff30df2697f47b9b8d2cb7dea yuv422p16be 51d9aa4e78d121c226d919ce97976fe4 yuv422p16le 12965c54bda8932ca72da194419a9908 yuv440p 876385e96165acf51271b20e5d85a416