From a97ee41bee60b2075c84e2ce6bb441304698744c Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Alberto=20Delm=C3=A1s?= Date: Sun, 2 Sep 2012 12:42:01 +0200 Subject: [PATCH 01/11] mss12: move SliceContexts out of the common context into the codec contexts Signed-off-by: Kostya Shishkov --- libavcodec/mss1.c | 9 +++---- libavcodec/mss12.c | 60 ++++++++++++++++++++++------------------------ libavcodec/mss12.h | 9 +++---- libavcodec/mss2.c | 14 +++++++---- 4 files changed, 46 insertions(+), 46 deletions(-) diff --git a/libavcodec/mss1.c b/libavcodec/mss1.c index caf0328ad0..678208b09b 100644 --- a/libavcodec/mss1.c +++ b/libavcodec/mss1.c @@ -30,7 +30,7 @@ typedef struct MSS1Context { MSS12Context ctx; AVFrame pic; - SliceContext sc[2]; + SliceContext sc; } MSS1Context; static void arith_normalise(ArithCoder *c) @@ -162,7 +162,8 @@ static int mss1_decode_frame(AVCodecContext *avctx, void *data, int *data_size, c->pal_stride = -ctx->pic.linesize[0]; c->keyframe = !arith_get_bit(&acoder); if (c->keyframe) { - ff_mss12_codec_reset(c); + c->corrupted = 0; + ff_mss12_slicecontext_reset(&ctx->sc); pal_changed = decode_pal(c, &acoder); ctx->pic.key_frame = 1; ctx->pic.pict_type = AV_PICTURE_TYPE_I; @@ -172,7 +173,7 @@ static int mss1_decode_frame(AVCodecContext *avctx, void *data, int *data_size, ctx->pic.key_frame = 0; ctx->pic.pict_type = AV_PICTURE_TYPE_P; } - c->corrupted = ff_mss12_decode_rect(&c->sc[0], &acoder, 0, 0, + c->corrupted = ff_mss12_decode_rect(&ctx->sc, &acoder, 0, 0, avctx->width, avctx->height); if (c->corrupted) return AVERROR_INVALIDDATA; @@ -194,7 +195,7 @@ static av_cold int mss1_decode_init(AVCodecContext *avctx) c->ctx.avctx = avctx; avctx->coded_frame = &c->pic; - ret = ff_mss12_decode_init(&c->ctx, 0); + ret = ff_mss12_decode_init(&c->ctx, 0, &c->sc, NULL); avctx->pix_fmt = PIX_FMT_PAL8; diff --git a/libavcodec/mss12.c b/libavcodec/mss12.c index 18f2f2808f..ea127696fc 100644 --- a/libavcodec/mss12.c +++ b/libavcodec/mss12.c @@ -435,39 +435,30 @@ static int decode_region_masked(MSS12Context const *c, ArithCoder *acoder, return 0; } -static av_cold void codec_init(MSS12Context *c, int version) +static av_cold void slicecontext_init(SliceContext *sc, + int version, int full_model_syms) { - int i; - for (i = 0; i < (c->slice_split ? 2 : 1); i++) { - c->sc[i].c = c; - model_init(&c->sc[i].intra_region, 2, THRESH_ADAPTIVE); - model_init(&c->sc[i].inter_region, 2, THRESH_ADAPTIVE); - model_init(&c->sc[i].split_mode, 3, THRESH_HIGH); - model_init(&c->sc[i].edge_mode, 2, THRESH_HIGH); - model_init(&c->sc[i].pivot, 3, THRESH_LOW); - - pixctx_init(&c->sc[i].intra_pix_ctx, 8, c->full_model_syms, 0); - - pixctx_init(&c->sc[i].inter_pix_ctx, version ? 3 : 2, - c->full_model_syms, version ? 1 : 0); - } - c->corrupted = 1; + model_init(&sc->intra_region, 2, THRESH_ADAPTIVE); + model_init(&sc->inter_region, 2, THRESH_ADAPTIVE); + model_init(&sc->split_mode, 3, THRESH_HIGH); + model_init(&sc->edge_mode, 2, THRESH_HIGH); + model_init(&sc->pivot, 3, THRESH_LOW); + + pixctx_init(&sc->intra_pix_ctx, 8, full_model_syms, 0); + + pixctx_init(&sc->inter_pix_ctx, version ? 3 : 2, + full_model_syms, version ? 1 : 0); } -void ff_mss12_codec_reset(MSS12Context *c) +void ff_mss12_slicecontext_reset(SliceContext *sc) { - int i; - for (i = 0; i < (c->slice_split ? 2 : 1); i++) { - model_reset(&c->sc[i].intra_region); - model_reset(&c->sc[i].inter_region); - model_reset(&c->sc[i].split_mode); - model_reset(&c->sc[i].edge_mode); - model_reset(&c->sc[i].pivot); - pixctx_reset(&c->sc[i].intra_pix_ctx); - pixctx_reset(&c->sc[i].inter_pix_ctx); - } - - c->corrupted = 0; + model_reset(&sc->intra_region); + model_reset(&sc->inter_region); + model_reset(&sc->split_mode); + model_reset(&sc->edge_mode); + model_reset(&sc->pivot); + pixctx_reset(&sc->intra_pix_ctx); + pixctx_reset(&sc->inter_pix_ctx); } static int decode_pivot(SliceContext *sc, ArithCoder *acoder, int base) @@ -595,7 +586,8 @@ int ff_mss12_decode_rect(SliceContext *sc, ArithCoder *acoder, return 0; } -av_cold int ff_mss12_decode_init(MSS12Context *c, int version) +av_cold int ff_mss12_decode_init(MSS12Context *c, int version, + SliceContext* sc1, SliceContext *sc2) { AVCodecContext *avctx = c->avctx; int i; @@ -690,7 +682,13 @@ av_cold int ff_mss12_decode_init(MSS12Context *c, int version) return AVERROR(ENOMEM); } - codec_init(c, version); + sc1->c = c; + slicecontext_init(sc1, version, c->full_model_syms); + if (c->slice_split) { + sc2->c = c; + slicecontext_init(sc2, version, c->full_model_syms); + } + c->corrupted = 1; return 0; } diff --git a/libavcodec/mss12.h b/libavcodec/mss12.h index 93d1f6146f..97cd25fdf2 100644 --- a/libavcodec/mss12.h +++ b/libavcodec/mss12.h @@ -86,21 +86,18 @@ typedef struct MSS12Context { int rgb_stride; int free_colours; int keyframe; - Model intra_region, inter_region; - Model pivot, edge_mode, split_mode; - PixContext intra_pix_ctx, inter_pix_ctx; int mvX, mvY; int corrupted; int slice_split; int full_model_syms; - SliceContext sc[2]; } MSS12Context; int ff_mss12_decode_rect(SliceContext *ctx, ArithCoder *acoder, int x, int y, int width, int height); void ff_mss12_model_update(Model *m, int val); -void ff_mss12_codec_reset(MSS12Context *ctx); -av_cold int ff_mss12_decode_init(MSS12Context *ctx, int version); +void ff_mss12_slicecontext_reset(SliceContext *sc); +av_cold int ff_mss12_decode_init(MSS12Context *c, int version, + SliceContext* sc1, SliceContext *sc2); av_cold int ff_mss12_decode_end(MSS12Context *ctx); #define ARITH_GET_BIT(VERSION) \ diff --git a/libavcodec/mss2.c b/libavcodec/mss2.c index 2a0bf47cde..e4c854a01e 100644 --- a/libavcodec/mss2.c +++ b/libavcodec/mss2.c @@ -671,14 +671,18 @@ static int mss2_decode_frame(AVCodecContext *avctx, void *data, int *data_size, buf += get_bits_count(&gb) >> 3; buf_size -= get_bits_count(&gb) >> 3; } else { - if (keyframe) - ff_mss12_codec_reset(c); + if (keyframe) { + c->corrupted = 0; + ff_mss12_slicecontext_reset(&ctx->sc[0]); + if (c->slice_split) + ff_mss12_slicecontext_reset(&ctx->sc[1]); + } else if (c->corrupted) return AVERROR_INVALIDDATA; bytestream2_init(&gB, buf, buf_size + ARITH2_PADDING); arith2_init(&acoder, &gB); c->keyframe = keyframe; - if (c->corrupted = ff_mss12_decode_rect(&c->sc[0], &acoder, 0, 0, + if (c->corrupted = ff_mss12_decode_rect(&ctx->sc[0], &acoder, 0, 0, avctx->width, ctx->split_position)) return AVERROR_INVALIDDATA; @@ -690,7 +694,7 @@ static int mss2_decode_frame(AVCodecContext *avctx, void *data, int *data_size, return AVERROR_INVALIDDATA; bytestream2_init(&gB, buf, buf_size + ARITH2_PADDING); arith2_init(&acoder, &gB); - if (c->corrupted = ff_mss12_decode_rect(&c->sc[1], &acoder, 0, + if (c->corrupted = ff_mss12_decode_rect(&ctx->sc[1], &acoder, 0, ctx->split_position, avctx->width, avctx->height - ctx->split_position)) @@ -830,7 +834,7 @@ static av_cold int mss2_decode_init(AVCodecContext *avctx) int ret; c->avctx = avctx; avctx->coded_frame = &ctx->pic; - if (ret = ff_mss12_decode_init(c, 1)) + if (ret = ff_mss12_decode_init(c, 1, &ctx->sc[0], &ctx->sc[1])) return ret; c->pal_stride = c->mask_stride; c->pal_pic = av_malloc(c->pal_stride * avctx->height); From 626c1a33ed43c943b142f3357aaf369239cfe54a Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Alberto=20Delm=C3=A1s?= Date: Sun, 2 Sep 2012 12:44:21 +0200 Subject: [PATCH 02/11] mss12: reduce SliceContext size from 1067 to 164 KB Signed-off-by: Kostya Shishkov --- libavcodec/mss1.c | 2 +- libavcodec/mss12.c | 54 +++++++++++++++++++++------------------------- libavcodec/mss12.h | 9 ++++---- libavcodec/mss2.c | 2 +- 4 files changed, 30 insertions(+), 37 deletions(-) diff --git a/libavcodec/mss1.c b/libavcodec/mss1.c index 678208b09b..1591bba535 100644 --- a/libavcodec/mss1.c +++ b/libavcodec/mss1.c @@ -89,7 +89,7 @@ static int arith_get_number(ArithCoder *c, int mod_val) return val; } -static int arith_get_prob(ArithCoder *c, int *probs) +static int arith_get_prob(ArithCoder *c, int16_t *probs) { int range = c->high - c->low + 1; int val = ((c->value - c->low + 1) * probs[0] - 1) / range; diff --git a/libavcodec/mss12.c b/libavcodec/mss12.c index ea127696fc..1059f66a8c 100644 --- a/libavcodec/mss12.c +++ b/libavcodec/mss12.c @@ -61,13 +61,9 @@ static void model_reset(Model *m) m->weights[i] = 1; m->cum_prob[i] = m->num_syms - i; } - m->weights[0] = -1; - m->idx2sym[0] = -1; - m->sym2idx[m->num_syms] = -1; - for (i = 0; i < m->num_syms; i++) { - m->sym2idx[i] = i + 1; + m->weights[0] = 0; + for (i = 0; i < m->num_syms; i++) m->idx2sym[i + 1] = i; - } } static av_cold void model_init(Model *m, int num_syms, int thr_weight) @@ -75,7 +71,6 @@ static av_cold void model_init(Model *m, int num_syms, int thr_weight) m->num_syms = num_syms; m->thr_weight = thr_weight; m->threshold = num_syms * thr_weight; - model_reset(m); } static void model_rescale_weights(Model *m) @@ -109,8 +104,6 @@ void ff_mss12_model_update(Model *m, int val) m->idx2sym[val] = sym2; m->idx2sym[i] = sym1; - m->sym2idx[sym1] = i; - m->sym2idx[sym2] = val; val = i; } @@ -123,7 +116,7 @@ void ff_mss12_model_update(Model *m, int val) static void pixctx_reset(PixContext *ctx) { - int i, j, k; + int i, j; if (!ctx->special_initial_cache) for (i = 0; i < ctx->cache_size; i++) @@ -137,16 +130,15 @@ static void pixctx_reset(PixContext *ctx) model_reset(&ctx->cache_model); model_reset(&ctx->full_model); - for (i = 0; i < 4; i++) - for (j = 0; j < sec_order_sizes[i]; j++) - for (k = 0; k < 4; k++) - model_reset(&ctx->sec_models[i][j][k]); + for (i = 0; i < 15; i++) + for (j = 0; j < 4; j++) + model_reset(&ctx->sec_models[i][j]); } static av_cold void pixctx_init(PixContext *ctx, int cache_size, int full_model_syms, int special_initial_cache) { - int i, j, k; + int i, j, k, idx; ctx->cache_size = cache_size + 4; ctx->num_syms = cache_size; @@ -155,10 +147,10 @@ static av_cold void pixctx_init(PixContext *ctx, int cache_size, model_init(&ctx->cache_model, ctx->num_syms + 1, THRESH_LOW); model_init(&ctx->full_model, full_model_syms, THRESH_HIGH); - for (i = 0; i < 4; i++) - for (j = 0; j < sec_order_sizes[i]; j++) + for (i = 0, idx = 0; i < 4; i++) + for (j = 0; j < sec_order_sizes[i]; j++, idx++) for (k = 0; k < 4; k++) - model_init(&ctx->sec_models[i][j][k], 2 + i, + model_init(&ctx->sec_models[idx][k], 2 + i, i ? THRESH_LOW : THRESH_ADAPTIVE); } @@ -268,46 +260,48 @@ static int decode_pixel_in_context(ArithCoder *acoder, PixContext *pctx, switch (nlen) { case 1: - case 4: layer = 0; break; case 2: if (neighbours[TOP] == neighbours[TOP_LEFT]) { if (neighbours[TOP_RIGHT] == neighbours[TOP_LEFT]) - layer = 3; + layer = 1; else if (neighbours[LEFT] == neighbours[TOP_LEFT]) layer = 2; else - layer = 4; + layer = 3; } else if (neighbours[TOP_RIGHT] == neighbours[TOP_LEFT]) { if (neighbours[LEFT] == neighbours[TOP_LEFT]) - layer = 1; + layer = 4; else layer = 5; } else if (neighbours[LEFT] == neighbours[TOP_LEFT]) { layer = 6; } else { - layer = 0; + layer = 7; } break; case 3: if (neighbours[TOP] == neighbours[TOP_LEFT]) - layer = 0; + layer = 8; else if (neighbours[TOP_RIGHT] == neighbours[TOP_LEFT]) - layer = 1; + layer = 9; else if (neighbours[LEFT] == neighbours[TOP_LEFT]) - layer = 2; + layer = 10; else if (neighbours[TOP_RIGHT] == neighbours[TOP]) - layer = 3; + layer = 11; else if (neighbours[TOP] == neighbours[LEFT]) - layer = 4; + layer = 12; else - layer = 5; + layer = 13; + break; + case 4: + layer = 14; break; } pix = acoder->get_model_sym(acoder, - &pctx->sec_models[nlen - 1][layer][sub]); + &pctx->sec_models[layer][sub]); if (pix < nlen) return ref_pix[pix]; else diff --git a/libavcodec/mss12.h b/libavcodec/mss12.h index 97cd25fdf2..678a0c0dfb 100644 --- a/libavcodec/mss12.h +++ b/libavcodec/mss12.h @@ -38,10 +38,9 @@ #define THRESH_HIGH 50 typedef struct Model { - int cum_prob[MODEL_MAX_SYMS + 1]; - int weights[MODEL_MAX_SYMS + 1]; - int idx2sym[MODEL_MAX_SYMS + 1]; - int sym2idx[MODEL_MAX_SYMS + 1]; + int16_t cum_prob[MODEL_MAX_SYMS + 1]; + int16_t weights[MODEL_MAX_SYMS + 1]; + uint8_t idx2sym[MODEL_MAX_SYMS + 1]; int num_syms; int thr_weight, threshold; } Model; @@ -60,7 +59,7 @@ typedef struct PixContext { int cache_size, num_syms; uint8_t cache[12]; Model cache_model, full_model; - Model sec_models[4][8][4]; + Model sec_models[15][4]; int special_initial_cache; } PixContext; diff --git a/libavcodec/mss2.c b/libavcodec/mss2.c index e4c854a01e..ce3cfb8a7a 100644 --- a/libavcodec/mss2.c +++ b/libavcodec/mss2.c @@ -106,7 +106,7 @@ static int arith2_get_number(ArithCoder *c, int n) return val; } -static int arith2_get_prob(ArithCoder *c, int *probs) +static int arith2_get_prob(ArithCoder *c, int16_t *probs) { int range = c->high - c->low + 1, n = *probs; int scale = av_log2(range) - av_log2(n); From 344fbc47c7b3b20f953babff65fffba8a38b8c01 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Alberto=20Delm=C3=A1s?= Date: Fri, 31 Aug 2012 11:24:26 +0200 Subject: [PATCH 03/11] mss12: merge decode_pixel() and decode_top_left_pixel() No meaningful generated code differences using gcc -O3. Signed-off-by: Kostya Shishkov --- libavcodec/mss12.c | 63 ++++++++++++++++------------------------------ 1 file changed, 21 insertions(+), 42 deletions(-) diff --git a/libavcodec/mss12.c b/libavcodec/mss12.c index 1059f66a8c..360197870b 100644 --- a/libavcodec/mss12.c +++ b/libavcodec/mss12.c @@ -154,50 +154,29 @@ static av_cold void pixctx_init(PixContext *ctx, int cache_size, i ? THRESH_LOW : THRESH_ADAPTIVE); } -static int decode_top_left_pixel(ArithCoder *acoder, PixContext *pctx) +static av_always_inline int decode_pixel(ArithCoder *acoder, PixContext *pctx, + uint8_t *ngb, int num_ngb, int any_ngb) { int i, val, pix; val = acoder->get_model_sym(acoder, &pctx->cache_model); if (val < pctx->num_syms) { - pix = pctx->cache[val]; - } else { - pix = acoder->get_model_sym(acoder, &pctx->full_model); - for (i = 0; i < pctx->cache_size - 1; i++) - if (pctx->cache[i] == pix) - break; - val = i; - } - if (val) { - for (i = val; i > 0; i--) - pctx->cache[i] = pctx->cache[i - 1]; - pctx->cache[0] = pix; - } - - return pix; -} - -static int decode_pixel(ArithCoder *acoder, PixContext *pctx, - uint8_t *ngb, int num_ngb) -{ - int i, val, pix; - - val = acoder->get_model_sym(acoder, &pctx->cache_model); - if (val < pctx->num_syms) { - int idx, j; - - idx = 0; - for (i = 0; i < pctx->cache_size; i++) { - for (j = 0; j < num_ngb; j++) - if (pctx->cache[i] == ngb[j]) - break; - if (j == num_ngb) { - if (idx == val) - break; - idx++; + if (any_ngb) { + int idx, j; + + idx = 0; + for (i = 0; i < pctx->cache_size; i++) { + for (j = 0; j < num_ngb; j++) + if (pctx->cache[i] == ngb[j]) + break; + if (j == num_ngb) { + if (idx == val) + break; + idx++; + } } + val = FFMIN(i, pctx->cache_size - 1); } - val = FFMIN(i, pctx->cache_size - 1); pix = pctx->cache[val]; } else { pix = acoder->get_model_sym(acoder, &pctx->full_model); @@ -305,7 +284,7 @@ static int decode_pixel_in_context(ArithCoder *acoder, PixContext *pctx, if (pix < nlen) return ref_pix[pix]; else - return decode_pixel(acoder, pctx, ref_pix, nlen); + return decode_pixel(acoder, pctx, ref_pix, nlen, 1); } static int decode_region(ArithCoder *acoder, uint8_t *dst, uint8_t *rgb_pic, @@ -320,7 +299,7 @@ static int decode_region(ArithCoder *acoder, uint8_t *dst, uint8_t *rgb_pic, for (j = 0; j < height; j++) { for (i = 0; i < width; i++) { if (!i && !j) - p = decode_top_left_pixel(acoder, pctx); + p = decode_pixel(acoder, pctx, NULL, 0, 0); else p = decode_pixel_in_context(acoder, pctx, dst + i, stride, i, j, width - i - 1); @@ -412,7 +391,7 @@ static int decode_region_masked(MSS12Context const *c, ArithCoder *acoder, return -1; } else if (mask[i] != 0x80) { if (!i && !j) - p = decode_top_left_pixel(acoder, pctx); + p = decode_pixel(acoder, pctx, NULL, 0, 0); else p = decode_pixel_in_context(acoder, pctx, dst + i, stride, i, j, width - i - 1); @@ -490,7 +469,7 @@ static int decode_region_intra(SliceContext *sc, ArithCoder *acoder, uint8_t *dst = c->pal_pic + x + y * stride; uint8_t *rgb_dst = c->rgb_pic + x * 3 + y * rgb_stride; - pix = decode_top_left_pixel(acoder, &sc->intra_pix_ctx); + pix = decode_pixel(acoder, &sc->intra_pix_ctx, NULL, 0, 0); rgb_pix = c->pal[pix]; for (i = 0; i < height; i++, dst += stride, rgb_dst += rgb_stride) { memset(dst, pix, width); @@ -516,7 +495,7 @@ static int decode_region_inter(SliceContext *sc, ArithCoder *acoder, mode = acoder->get_model_sym(acoder, &sc->inter_region); if (!mode) { - mode = decode_top_left_pixel(acoder, &sc->inter_pix_ctx); + mode = decode_pixel(acoder, &sc->inter_pix_ctx, NULL, 0, 0); if (c->avctx->err_recognition & AV_EF_EXPLODE && ( c->rgb_pic && mode != 0x01 && mode != 0x02 && mode != 0x04 || From a84ac7a86055144d21a6ecea188ae3596ec0a283 Mon Sep 17 00:00:00 2001 From: Diego Biurrun Date: Sun, 2 Sep 2012 17:03:41 +0200 Subject: [PATCH 04/11] x86: h264dsp: drop some unnecessary ifdefs around prototype declarations --- libavcodec/x86/h264dsp_init.c | 6 ------ 1 file changed, 6 deletions(-) diff --git a/libavcodec/x86/h264dsp_init.c b/libavcodec/x86/h264dsp_init.c index 7be78a8207..a44cf9ddef 100644 --- a/libavcodec/x86/h264dsp_init.c +++ b/libavcodec/x86/h264dsp_init.c @@ -39,11 +39,9 @@ IDCT_ADD_FUNC(8_dc, 10, sse2) IDCT_ADD_FUNC(8, 8, mmx) IDCT_ADD_FUNC(8, 8, sse2) IDCT_ADD_FUNC(8, 10, sse2) -#if HAVE_AVX_EXTERNAL IDCT_ADD_FUNC(, 10, avx) IDCT_ADD_FUNC(8_dc, 10, avx) IDCT_ADD_FUNC(8, 10, avx) -#endif #define IDCT_ADD_REP_FUNC(NUM, REP, DEPTH, OPT) \ @@ -64,10 +62,8 @@ IDCT_ADD_REP_FUNC(, 16intra, 8, mmx) IDCT_ADD_REP_FUNC(, 16intra, 8, mmx2) IDCT_ADD_REP_FUNC(, 16intra, 8, sse2) IDCT_ADD_REP_FUNC(, 16intra, 10, sse2) -#if HAVE_AVX_EXTERNAL IDCT_ADD_REP_FUNC(, 16, 10, avx) IDCT_ADD_REP_FUNC(, 16intra, 10, avx) -#endif #define IDCT_ADD_REP_FUNC2(NUM, REP, DEPTH, OPT) \ @@ -79,9 +75,7 @@ IDCT_ADD_REP_FUNC2(, 8, 8, mmx) IDCT_ADD_REP_FUNC2(, 8, 8, mmx2) IDCT_ADD_REP_FUNC2(, 8, 8, sse2) IDCT_ADD_REP_FUNC2(, 8, 10, sse2) -#if HAVE_AVX_EXTERNAL IDCT_ADD_REP_FUNC2(, 8, 10, avx) -#endif void ff_h264_luma_dc_dequant_idct_mmx(DCTELEM *output, DCTELEM *input, int qmul); void ff_h264_luma_dc_dequant_idct_sse2(DCTELEM *output, DCTELEM *input, int qmul); From f82c4fb27fa7f8afbe8411c9d37e85facbbc87ae Mon Sep 17 00:00:00 2001 From: Diego Biurrun Date: Wed, 29 Aug 2012 18:53:14 +0200 Subject: [PATCH 05/11] x86: Add convenience macros to check for CPU extensions and flags --- libavutil/x86/cpu.h | 57 +++++++++++++++++++++++++++++++++++++++++++++ 1 file changed, 57 insertions(+) create mode 100644 libavutil/x86/cpu.h diff --git a/libavutil/x86/cpu.h b/libavutil/x86/cpu.h new file mode 100644 index 0000000000..e14cb57416 --- /dev/null +++ b/libavutil/x86/cpu.h @@ -0,0 +1,57 @@ +/* + * This file is part of Libav. + * + * Libav is free software; you can redistribute it and/or + * modify it under the terms of the GNU Lesser General Public + * License as published by the Free Software Foundation; either + * version 2.1 of the License, or (at your option) any later version. + * + * Libav is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + * Lesser General Public License for more details. + * + * You should have received a copy of the GNU Lesser General Public + * License along with Libav; if not, write to the Free Software + * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA + */ + +#ifndef AVUTIL_X86_CPU_H +#define AVUTIL_X86_CPU_H + +#include "config.h" +#include "libavutil/cpu.h" + +#define CPUEXT(flags, suffix, cpuext) \ + (HAVE_ ## cpuext ## suffix && ((flags) & AV_CPU_FLAG_ ## cpuext)) + +#define AV_CPU_FLAG_AMD3DNOW AV_CPU_FLAG_3DNOW +#define AV_CPU_FLAG_AMD3DNOWEXT AV_CPU_FLAG_3DNOWEXT + +#define EXTERNAL_AMD3DNOW(flags) CPUEXT(flags, _EXTERNAL, AMD3DNOW) +#define EXTERNAL_AMD3DNOWEXT(flags) CPUEXT(flags, _EXTERNAL, AMD3DNOWEXT) +#define EXTERNAL_MMX(flags) CPUEXT(flags, _EXTERNAL, MMX) +#define EXTERNAL_MMXEXT(flags) CPUEXT(flags, _EXTERNAL, MMXEXT) +#define EXTERNAL_SSE(flags) CPUEXT(flags, _EXTERNAL, SSE) +#define EXTERNAL_SSE2(flags) CPUEXT(flags, _EXTERNAL, SSE2) +#define EXTERNAL_SSE3(flags) CPUEXT(flags, _EXTERNAL, SSE3) +#define EXTERNAL_SSSE3(flags) CPUEXT(flags, _EXTERNAL, SSSE3) +#define EXTERNAL_SSE4(flags) CPUEXT(flags, _EXTERNAL, SSE4) +#define EXTERNAL_SSE42(flags) CPUEXT(flags, _EXTERNAL, SSE42) +#define EXTERNAL_AVX(flags) CPUEXT(flags, _EXTERNAL, AVX) +#define EXTERNAL_FMA4(flags) CPUEXT(flags, _EXTERNAL, FMA4) + +#define INLINE_AMD3DNOW(flags) CPUEXT(flags, _INLINE, AMD3DNOW) +#define INLINE_AMD3DNOWEXT(flags) CPUEXT(flags, _INLINE, AMD3DNOWEXT) +#define INLINE_MMX(flags) CPUEXT(flags, _INLINE, MMX) +#define INLINE_MMXEXT(flags) CPUEXT(flags, _INLINE, MMXEXT) +#define INLINE_SSE(flags) CPUEXT(flags, _INLINE, SSE) +#define INLINE_SSE2(flags) CPUEXT(flags, _INLINE, SSE2) +#define INLINE_SSE3(flags) CPUEXT(flags, _INLINE, SSE3) +#define INLINE_SSSE3(flags) CPUEXT(flags, _INLINE, SSSE3) +#define INLINE_SSE4(flags) CPUEXT(flags, _INLINE, SSE4) +#define INLINE_SSE42(flags) CPUEXT(flags, _INLINE, SSE42) +#define INLINE_AVX(flags) CPUEXT(flags, _INLINE, AVX) +#define INLINE_FMA4(flags) CPUEXT(flags, _INLINE, FMA4) + +#endif /* AVUTIL_X86_CPU_H */ From aa264da5bf6a3d82a47abba4cfcfa629dd1f3daa Mon Sep 17 00:00:00 2001 From: Michael Niedermayer Date: Tue, 4 Sep 2012 14:02:30 +0300 Subject: [PATCH 06/11] adpcmenc: Calculate the IMA_QT predictor without overflow MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Previously, the value given to put_bits was 10 bits long for positive predictors, even though 9 bits were to be written. The extra bit could in some cases overwrite existing bits in the bitstream writer cache. This fixes a failed assert in put_bits.h, when running a version built with -DDEBUG. The fate test result gets slightly improved, thanks to getting rid of the overwritten bits in the bitstream writer cache. Signed-off-by: Martin Storsjö --- libavcodec/adpcmenc.c | 2 +- tests/ref/fate/acodec-adpcm-ima_qt | 6 +++--- 2 files changed, 4 insertions(+), 4 deletions(-) diff --git a/libavcodec/adpcmenc.c b/libavcodec/adpcmenc.c index 843b32fae5..f8ecd589dc 100644 --- a/libavcodec/adpcmenc.c +++ b/libavcodec/adpcmenc.c @@ -570,7 +570,7 @@ static int adpcm_encode_frame(AVCodecContext *avctx, AVPacket *avpkt, init_put_bits(&pb, dst, pkt_size * 8); for (ch = 0; ch < avctx->channels; ch++) { - put_bits(&pb, 9, (c->status[ch].prev_sample + 0x10000) >> 7); + put_bits(&pb, 9, (c->status[ch].prev_sample & 0xFFFF) >> 7); put_bits(&pb, 7, c->status[ch].step_index); if (avctx->trellis > 0) { uint8_t buf[64]; diff --git a/tests/ref/fate/acodec-adpcm-ima_qt b/tests/ref/fate/acodec-adpcm-ima_qt index 79b8c60ccc..80015275fc 100644 --- a/tests/ref/fate/acodec-adpcm-ima_qt +++ b/tests/ref/fate/acodec-adpcm-ima_qt @@ -1,4 +1,4 @@ -057d27978b35888776512e4e9669a63b *tests/data/fate/acodec-adpcm-ima_qt.aiff +23cbae1182e150ebf28e0abfb9cba127 *tests/data/fate/acodec-adpcm-ima_qt.aiff 281252 tests/data/fate/acodec-adpcm-ima_qt.aiff -169c40435c68d50112c9c61fc67e446d *tests/data/fate/acodec-adpcm-ima_qt.out.wav -stddev: 918.61 PSNR: 37.07 MAXDIFF:34029 bytes: 1058400/ 1058560 +b0fafd002c38fb70acaddfda1a31ed61 *tests/data/fate/acodec-adpcm-ima_qt.out.wav +stddev: 904.76 PSNR: 37.20 MAXDIFF:34029 bytes: 1058400/ 1058560 From 6d9e74cd4179f42a8fa860f2e08d370c7c36325f Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Martin=20Storsj=C3=B6?= Date: Tue, 4 Sep 2012 14:31:52 +0300 Subject: [PATCH 07/11] proresenc: Write the full value in one put_bits call MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Previously, the put_bits call writing the value wrote a value larger than the number of bits specified, failing asserts in debug mode. There was no actual bitstream writer corruption, since the overwritten bit already always was set to 1. Signed-off-by: Martin Storsjö --- libavcodec/proresenc.c | 3 +-- 1 file changed, 1 insertion(+), 2 deletions(-) diff --git a/libavcodec/proresenc.c b/libavcodec/proresenc.c index a24b7118c2..f4feed5ee4 100644 --- a/libavcodec/proresenc.c +++ b/libavcodec/proresenc.c @@ -299,8 +299,7 @@ static inline void encode_vlc_codeword(PutBitContext *pb, unsigned codebook, int exponent = av_log2(val); put_bits(pb, exponent - exp_order + switch_bits, 0); - put_bits(pb, 1, 1); - put_bits(pb, exponent, val); + put_bits(pb, exponent + 1, val); } else { exponent = val >> rice_order; From cc86bd4ccc19d79747c76925b36d01dc7cad07d2 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Martin=20Storsj=C3=B6?= Date: Tue, 4 Sep 2012 14:57:45 +0300 Subject: [PATCH 08/11] proresenc: Don't free a buffer not owned by the codec MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit The data in coded_frame isn't allocated using get_buffer, but is copied from the input frame to the encoder, so we should not try to free it ourselves. This fixes an assert failure when running in debug mode. Signed-off-by: Martin Storsjö --- libavcodec/proresenc.c | 3 --- 1 file changed, 3 deletions(-) diff --git a/libavcodec/proresenc.c b/libavcodec/proresenc.c index f4feed5ee4..86def00819 100644 --- a/libavcodec/proresenc.c +++ b/libavcodec/proresenc.c @@ -867,9 +867,6 @@ static av_cold int encode_close(AVCodecContext *avctx) ProresContext *ctx = avctx->priv_data; int i; - if (avctx->coded_frame->data[0]) - avctx->release_buffer(avctx, avctx->coded_frame); - av_freep(&avctx->coded_frame); if (ctx->tdata) { From a684267076fc577aaebed9e35b566796d361a69c Mon Sep 17 00:00:00 2001 From: Luca Barbato Date: Fri, 31 Aug 2012 11:22:20 +0200 Subject: [PATCH 09/11] pixdesc: cosmetics --- libavutil/pixdesc.c | 30 ++++++++++++++++++------------ 1 file changed, 18 insertions(+), 12 deletions(-) diff --git a/libavutil/pixdesc.c b/libavutil/pixdesc.c index 8e08b5a989..122072e21a 100644 --- a/libavutil/pixdesc.c +++ b/libavutil/pixdesc.c @@ -26,8 +26,10 @@ #include "intreadwrite.h" -void av_read_image_line(uint16_t *dst, const uint8_t *data[4], const int linesize[4], - const AVPixFmtDescriptor *desc, int x, int y, int c, int w, +void av_read_image_line(uint16_t *dst, + const uint8_t *data[4], const int linesize[4], + const AVPixFmtDescriptor *desc, + int x, int y, int c, int w, int read_pal_component) { AVComponentDescriptor comp = desc->comp[c]; @@ -53,7 +55,8 @@ void av_read_image_line(uint16_t *dst, const uint8_t *data[4], const int linesiz *dst++ = val; } } else { - const uint8_t *p = data[plane] + y * linesize[plane] + x * step + comp.offset_plus1 - 1; + const uint8_t *p = data[plane] + y * linesize[plane] + + x * step + comp.offset_plus1 - 1; int is_8bit = shift + depth <= 8; if (is_8bit) @@ -71,8 +74,10 @@ void av_read_image_line(uint16_t *dst, const uint8_t *data[4], const int linesiz } } -void av_write_image_line(const uint16_t *src, uint8_t *data[4], const int linesize[4], - const AVPixFmtDescriptor *desc, int x, int y, int c, int w) +void av_write_image_line(const uint16_t *src, + uint8_t *data[4], const int linesize[4], + const AVPixFmtDescriptor *desc, + int x, int y, int c, int w) { AVComponentDescriptor comp = desc->comp[c]; int plane = comp.plane; @@ -93,7 +98,8 @@ void av_write_image_line(const uint16_t *src, uint8_t *data[4], const int linesi } } else { int shift = comp.shift; - uint8_t *p = data[plane] + y * linesize[plane] + x * step + comp.offset_plus1 - 1; + uint8_t *p = data[plane] + y * linesize[plane] + + x * step + comp.offset_plus1 - 1; if (shift + depth <= 8) { p += !!(flags & PIX_FMT_BE); @@ -395,9 +401,9 @@ const AVPixFmtDescriptor av_pix_fmt_descriptors[PIX_FMT_NB] = { .log2_chroma_w = 1, .log2_chroma_h = 1, .comp = { - { 0,0,1,0,7 }, /* Y */ - { 1,1,1,0,7 }, /* U */ - { 1,1,2,0,7 }, /* V */ + { 0, 0, 1, 0, 7 }, /* Y */ + { 1, 1, 1, 0, 7 }, /* U */ + { 1, 1, 2, 0, 7 }, /* V */ }, .flags = PIX_FMT_PLANAR, }, @@ -863,9 +869,9 @@ const AVPixFmtDescriptor av_pix_fmt_descriptors[PIX_FMT_NB] = { .log2_chroma_w = 1, .log2_chroma_h = 0, .comp = { - {0,1,1,0,8}, /* Y */ - {1,1,1,0,8}, /* U */ - {2,1,1,0,8}, /* V */ + { 0, 1, 1, 0, 8 }, /* Y */ + { 1, 1, 1, 0, 8 }, /* U */ + { 2, 1, 1, 0, 8 }, /* V */ }, .flags = PIX_FMT_BE | PIX_FMT_PLANAR, }, From 30939390775029fa70b8491d570ac6013cd03c71 Mon Sep 17 00:00:00 2001 From: Luca Barbato Date: Tue, 4 Sep 2012 15:04:46 +0200 Subject: [PATCH 10/11] avio: make avio_close NULL the freed buffer --- libavformat/aviobuf.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/libavformat/aviobuf.c b/libavformat/aviobuf.c index 0353a17379..fb01613298 100644 --- a/libavformat/aviobuf.c +++ b/libavformat/aviobuf.c @@ -762,7 +762,7 @@ int avio_close(AVIOContext *s) return 0; h = s->opaque; - av_free(s->buffer); + av_freep(&s->buffer); av_free(s); return ffurl_close(h); } From b36f87ff90d87687f574d51385f47bb98d14600a Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Diego=20Elio=20Petten=C3=B2?= Date: Mon, 3 Sep 2012 05:20:44 -0700 Subject: [PATCH 11/11] configure: add support for bdver1 and bdver2 CPU types. MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Signed-off-by: Diego Elio Pettenò Signed-off-by: Luca Barbato --- configure | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/configure b/configure index ff051f537a..88c5c75004 100755 --- a/configure +++ b/configure @@ -2122,7 +2122,7 @@ suncc_flags(){ prescott|nocona) echo -xarch=sse3 -xchip=pentium4 ;; *-sse3) echo -xarch=sse3 ;; core2) echo -xarch=ssse3 -xchip=core2 ;; - amdfam10|barcelona) echo -xarch=sse4_1 ;; + amdfam10|barcelona|bdver*) echo -xarch=sse4_1 ;; athlon-4|athlon-[mx]p) echo -xarch=ssea ;; k8|opteron|athlon64|athlon-fx) echo -xarch=sse2a ;; @@ -2494,7 +2494,7 @@ elif enabled x86; then disable cmov ;; # targets that do support conditional mov (cmov) - i686|pentiumpro|pentium[23]|pentium-m|athlon|athlon-tbird|athlon-4|athlon-[mx]p|athlon64*|k8*|opteron*|athlon-fx|core2|amdfam10|barcelona|atom) + i686|pentiumpro|pentium[23]|pentium-m|athlon|athlon-tbird|athlon-4|athlon-[mx]p|athlon64*|k8*|opteron*|athlon-fx|core2|amdfam10|barcelona|atom|bdver*) cpuflags="-march=$cpu" enable cmov enable fast_cmov