This reverts commit f8bed30d8b. The reason
for this is that the overlap filter, which runs after IDCT, should run
on unclamped values, and thus IDCT and put_pixels() cannot be merged if
we want to attempt to be bitexact.
tags/n0.8
| @@ -130,8 +130,7 @@ do { \ | |||||
| /** Do inverse transform on 8x8 block | /** Do inverse transform on 8x8 block | ||||
| */ | */ | ||||
| static void vc1_inv_trans_8x8_altivec(DCTELEM block[64], | |||||
| int sign, int rangered) | |||||
| static void vc1_inv_trans_8x8_altivec(DCTELEM block[64]) | |||||
| { | { | ||||
| vector signed short src0, src1, src2, src3, src4, src5, src6, src7; | vector signed short src0, src1, src2, src3, src4, src5, src6, src7; | ||||
| vector signed int s0, s1, s2, s3, s4, s5, s6, s7; | vector signed int s0, s1, s2, s3, s4, s5, s6, s7; | ||||
| @@ -145,9 +144,6 @@ static void vc1_inv_trans_8x8_altivec(DCTELEM block[64], | |||||
| const vector unsigned int vec_2 = vec_splat_u32(2); | const vector unsigned int vec_2 = vec_splat_u32(2); | ||||
| const vector signed int vec_1s = vec_splat_s32(1); | const vector signed int vec_1s = vec_splat_s32(1); | ||||
| const vector unsigned int vec_1 = vec_splat_u32(1); | const vector unsigned int vec_1 = vec_splat_u32(1); | ||||
| const vector unsigned short rangered_shift = vec_splat_u16(1); | |||||
| const vector signed short signed_bias = vec_sl(vec_splat_s16(4), | |||||
| vec_splat_u16(4)); | |||||
| src0 = vec_ld( 0, block); | src0 = vec_ld( 0, block); | ||||
| src1 = vec_ld( 16, block); | src1 = vec_ld( 16, block); | ||||
| @@ -217,27 +213,6 @@ static void vc1_inv_trans_8x8_altivec(DCTELEM block[64], | |||||
| src6 = vec_pack(sE, s6); | src6 = vec_pack(sE, s6); | ||||
| src7 = vec_pack(sF, s7); | src7 = vec_pack(sF, s7); | ||||
| if (rangered) { | |||||
| if (!sign) { | |||||
| src0 = vec_sub(src0, signed_bias); | |||||
| src1 = vec_sub(src1, signed_bias); | |||||
| src2 = vec_sub(src2, signed_bias); | |||||
| src3 = vec_sub(src3, signed_bias); | |||||
| src4 = vec_sub(src4, signed_bias); | |||||
| src5 = vec_sub(src5, signed_bias); | |||||
| src6 = vec_sub(src6, signed_bias); | |||||
| src7 = vec_sub(src7, signed_bias); | |||||
| } | |||||
| src0 = vec_sl(src0, rangered_shift); | |||||
| src1 = vec_sl(src1, rangered_shift); | |||||
| src2 = vec_sl(src2, rangered_shift); | |||||
| src3 = vec_sl(src3, rangered_shift); | |||||
| src4 = vec_sl(src4, rangered_shift); | |||||
| src5 = vec_sl(src5, rangered_shift); | |||||
| src6 = vec_sl(src6, rangered_shift); | |||||
| src7 = vec_sl(src7, rangered_shift); | |||||
| } | |||||
| vec_st(src0, 0, block); | vec_st(src0, 0, block); | ||||
| vec_st(src1, 16, block); | vec_st(src1, 16, block); | ||||
| vec_st(src2, 32, block); | vec_st(src2, 32, block); | ||||
| @@ -248,36 +223,6 @@ static void vc1_inv_trans_8x8_altivec(DCTELEM block[64], | |||||
| vec_st(src7,112, block); | vec_st(src7,112, block); | ||||
| } | } | ||||
| static void vc1_inv_trans_8x8_add_altivec(uint8_t *dest, int stride, DCTELEM *b) | |||||
| { | |||||
| vc1_inv_trans_8x8_altivec(b, 0, 0); | |||||
| ff_add_pixels_clamped_c(b, dest, stride); | |||||
| } | |||||
| static void vc1_inv_trans_8x8_put_signed_altivec(uint8_t *dest, int stride, DCTELEM *b) | |||||
| { | |||||
| vc1_inv_trans_8x8_altivec(b, 1, 0); | |||||
| ff_put_signed_pixels_clamped_c(b, dest, stride); | |||||
| } | |||||
| static void vc1_inv_trans_8x8_put_signed_rangered_altivec(uint8_t *dest, int stride, DCTELEM *b) | |||||
| { | |||||
| vc1_inv_trans_8x8_altivec(b, 1, 1); | |||||
| ff_put_signed_pixels_clamped_c(b, dest, stride); | |||||
| } | |||||
| static void vc1_inv_trans_8x8_put_altivec(uint8_t *dest, int stride, DCTELEM *b) | |||||
| { | |||||
| vc1_inv_trans_8x8_altivec(b, 0, 0); | |||||
| ff_put_pixels_clamped_c(b, dest, stride); | |||||
| } | |||||
| static void vc1_inv_trans_8x8_put_rangered_altivec(uint8_t *dest, int stride, DCTELEM *b) | |||||
| { | |||||
| vc1_inv_trans_8x8_altivec(b, 0, 1); | |||||
| ff_put_pixels_clamped_c(b, dest, stride); | |||||
| } | |||||
| /** Do inverse transform on 8x4 part of block | /** Do inverse transform on 8x4 part of block | ||||
| */ | */ | ||||
| static void vc1_inv_trans_8x4_altivec(uint8_t *dest, int stride, DCTELEM *block) | static void vc1_inv_trans_8x4_altivec(uint8_t *dest, int stride, DCTELEM *block) | ||||
| @@ -396,11 +341,7 @@ void ff_vc1dsp_init_altivec(VC1DSPContext* dsp) | |||||
| if (!(av_get_cpu_flags() & AV_CPU_FLAG_ALTIVEC)) | if (!(av_get_cpu_flags() & AV_CPU_FLAG_ALTIVEC)) | ||||
| return; | return; | ||||
| dsp->vc1_inv_trans_8x8_add = vc1_inv_trans_8x8_add_altivec; | |||||
| dsp->vc1_inv_trans_8x8_put_signed[0] = vc1_inv_trans_8x8_put_signed_altivec; | |||||
| dsp->vc1_inv_trans_8x8_put_signed[1] = vc1_inv_trans_8x8_put_signed_rangered_altivec; | |||||
| dsp->vc1_inv_trans_8x8_put[0] = vc1_inv_trans_8x8_put_altivec; | |||||
| dsp->vc1_inv_trans_8x8_put[1] = vc1_inv_trans_8x8_put_rangered_altivec; | |||||
| dsp->vc1_inv_trans_8x8 = vc1_inv_trans_8x8_altivec; | |||||
| dsp->vc1_inv_trans_8x4 = vc1_inv_trans_8x4_altivec; | dsp->vc1_inv_trans_8x4 = vc1_inv_trans_8x4_altivec; | ||||
| dsp->put_no_rnd_vc1_chroma_pixels_tab[0] = put_no_rnd_vc1_chroma_mc8_altivec; | dsp->put_no_rnd_vc1_chroma_pixels_tab[0] = put_no_rnd_vc1_chroma_mc8_altivec; | ||||
| dsp->avg_no_rnd_vc1_chroma_pixels_tab[0] = avg_no_rnd_vc1_chroma_mc8_altivec; | dsp->avg_no_rnd_vc1_chroma_pixels_tab[0] = avg_no_rnd_vc1_chroma_mc8_altivec; | ||||
| @@ -280,28 +280,6 @@ static int vop_dquant_decoding(VC1Context *v) | |||||
| static int decode_sequence_header_adv(VC1Context *v, GetBitContext *gb); | static int decode_sequence_header_adv(VC1Context *v, GetBitContext *gb); | ||||
| static void simple_idct_put_rangered(uint8_t *dest, int line_size, DCTELEM *block) | |||||
| { | |||||
| int i; | |||||
| ff_simple_idct(block); | |||||
| for (i = 0; i < 64; i++) block[i] = (block[i] - 64) << 1; | |||||
| ff_put_pixels_clamped_c(block, dest, line_size); | |||||
| } | |||||
| static void simple_idct_put_signed(uint8_t *dest, int line_size, DCTELEM *block) | |||||
| { | |||||
| ff_simple_idct(block); | |||||
| ff_put_signed_pixels_clamped_c(block, dest, line_size); | |||||
| } | |||||
| static void simple_idct_put_signed_rangered(uint8_t *dest, int line_size, DCTELEM *block) | |||||
| { | |||||
| int i; | |||||
| ff_simple_idct(block); | |||||
| for (i = 0; i < 64; i++) block[i] <<= 1; | |||||
| ff_put_signed_pixels_clamped_c(block, dest, line_size); | |||||
| } | |||||
| /** | /** | ||||
| * Decode Simple/Main Profiles sequence header | * Decode Simple/Main Profiles sequence header | ||||
| * @see Figure 7-8, p16-17 | * @see Figure 7-8, p16-17 | ||||
| @@ -359,11 +337,7 @@ int vc1_decode_sequence_header(AVCodecContext *avctx, VC1Context *v, GetBitConte | |||||
| v->res_fasttx = get_bits1(gb); | v->res_fasttx = get_bits1(gb); | ||||
| if (!v->res_fasttx) | if (!v->res_fasttx) | ||||
| { | { | ||||
| v->vc1dsp.vc1_inv_trans_8x8_add = ff_simple_idct_add; | |||||
| v->vc1dsp.vc1_inv_trans_8x8_put[0] = ff_simple_idct_put; | |||||
| v->vc1dsp.vc1_inv_trans_8x8_put[1] = simple_idct_put_rangered; | |||||
| v->vc1dsp.vc1_inv_trans_8x8_put_signed[0] = simple_idct_put_signed; | |||||
| v->vc1dsp.vc1_inv_trans_8x8_put_signed[1] = simple_idct_put_signed_rangered; | |||||
| v->vc1dsp.vc1_inv_trans_8x8 = ff_simple_idct; | |||||
| v->vc1dsp.vc1_inv_trans_8x4 = ff_simple_idct84_add; | v->vc1dsp.vc1_inv_trans_8x4 = ff_simple_idct84_add; | ||||
| v->vc1dsp.vc1_inv_trans_4x8 = ff_simple_idct48_add; | v->vc1dsp.vc1_inv_trans_4x8 = ff_simple_idct48_add; | ||||
| v->vc1dsp.vc1_inv_trans_4x4 = ff_simple_idct44_add; | v->vc1dsp.vc1_inv_trans_4x4 = ff_simple_idct44_add; | ||||
| @@ -2016,7 +2016,8 @@ static int vc1_decode_p_block(VC1Context *v, DCTELEM block[64], int n, int mquan | |||||
| if(i==1) | if(i==1) | ||||
| v->vc1dsp.vc1_inv_trans_8x8_dc(dst, linesize, block); | v->vc1dsp.vc1_inv_trans_8x8_dc(dst, linesize, block); | ||||
| else{ | else{ | ||||
| v->vc1dsp.vc1_inv_trans_8x8_add(dst, linesize, block); | |||||
| v->vc1dsp.vc1_inv_trans_8x8(block); | |||||
| s->dsp.add_pixels_clamped(block, dst, linesize); | |||||
| } | } | ||||
| } | } | ||||
| break; | break; | ||||
| @@ -2258,7 +2259,7 @@ static int vc1_decode_p_mb(VC1Context *v) | |||||
| { | { | ||||
| MpegEncContext *s = &v->s; | MpegEncContext *s = &v->s; | ||||
| GetBitContext *gb = &s->gb; | GetBitContext *gb = &s->gb; | ||||
| int i; | |||||
| int i, j; | |||||
| int mb_pos = s->mb_x + s->mb_y * s->mb_stride; | int mb_pos = s->mb_x + s->mb_y * s->mb_stride; | ||||
| int cbp; /* cbp decoding stuff */ | int cbp; /* cbp decoding stuff */ | ||||
| int mqdiff, mquant; /* MB quantization */ | int mqdiff, mquant; /* MB quantization */ | ||||
| @@ -2288,8 +2289,6 @@ static int vc1_decode_p_mb(VC1Context *v) | |||||
| { | { | ||||
| if (!skipped) | if (!skipped) | ||||
| { | { | ||||
| vc1_idct_func idct8x8_fn; | |||||
| GET_MVDATA(dmv_x, dmv_y); | GET_MVDATA(dmv_x, dmv_y); | ||||
| if (s->mb_intra) { | if (s->mb_intra) { | ||||
| @@ -2324,7 +2323,6 @@ static int vc1_decode_p_mb(VC1Context *v) | |||||
| VC1_TTMB_VLC_BITS, 2); | VC1_TTMB_VLC_BITS, 2); | ||||
| if(!s->mb_intra) vc1_mc_1mv(v, 0); | if(!s->mb_intra) vc1_mc_1mv(v, 0); | ||||
| dst_idx = 0; | dst_idx = 0; | ||||
| idct8x8_fn = v->vc1dsp.vc1_inv_trans_8x8_put_signed[!!v->rangeredfrm]; | |||||
| for (i=0; i<6; i++) | for (i=0; i<6; i++) | ||||
| { | { | ||||
| s->dc_val[0][s->block_index[i]] = 0; | s->dc_val[0][s->block_index[i]] = 0; | ||||
| @@ -2342,9 +2340,9 @@ static int vc1_decode_p_mb(VC1Context *v) | |||||
| vc1_decode_intra_block(v, s->block[i], i, val, mquant, (i&4)?v->codingset2:v->codingset); | vc1_decode_intra_block(v, s->block[i], i, val, mquant, (i&4)?v->codingset2:v->codingset); | ||||
| if((i>3) && (s->flags & CODEC_FLAG_GRAY)) continue; | if((i>3) && (s->flags & CODEC_FLAG_GRAY)) continue; | ||||
| idct8x8_fn(s->dest[dst_idx] + off, | |||||
| i & 4 ? s->uvlinesize : s->linesize, | |||||
| s->block[i]); | |||||
| v->vc1dsp.vc1_inv_trans_8x8(s->block[i]); | |||||
| if(v->rangeredfrm) for(j = 0; j < 64; j++) s->block[i][j] <<= 1; | |||||
| s->dsp.put_signed_pixels_clamped(s->block[i], s->dest[dst_idx] + off, i & 4 ? s->uvlinesize : s->linesize); | |||||
| if(v->pq >= 9 && v->overlap) { | if(v->pq >= 9 && v->overlap) { | ||||
| if(v->c_avail) | if(v->c_avail) | ||||
| v->vc1dsp.vc1_h_overlap(s->dest[dst_idx] + off, i & 4 ? s->uvlinesize : s->linesize); | v->vc1dsp.vc1_h_overlap(s->dest[dst_idx] + off, i & 4 ? s->uvlinesize : s->linesize); | ||||
| @@ -2380,7 +2378,6 @@ static int vc1_decode_p_mb(VC1Context *v) | |||||
| { | { | ||||
| int intra_count = 0, coded_inter = 0; | int intra_count = 0, coded_inter = 0; | ||||
| int is_intra[6], is_coded[6]; | int is_intra[6], is_coded[6]; | ||||
| vc1_idct_func idct8x8_fn; | |||||
| /* Get CBPCY */ | /* Get CBPCY */ | ||||
| cbp = get_vlc2(&v->s.gb, v->cbpcy_vlc->table, VC1_CBPCY_P_VLC_BITS, 2); | cbp = get_vlc2(&v->s.gb, v->cbpcy_vlc->table, VC1_CBPCY_P_VLC_BITS, 2); | ||||
| for (i=0; i<6; i++) | for (i=0; i<6; i++) | ||||
| @@ -2431,7 +2428,6 @@ static int vc1_decode_p_mb(VC1Context *v) | |||||
| } | } | ||||
| if (!v->ttmbf && coded_inter) | if (!v->ttmbf && coded_inter) | ||||
| ttmb = get_vlc2(gb, ff_vc1_ttmb_vlc[v->tt_index].table, VC1_TTMB_VLC_BITS, 2); | ttmb = get_vlc2(gb, ff_vc1_ttmb_vlc[v->tt_index].table, VC1_TTMB_VLC_BITS, 2); | ||||
| idct8x8_fn = v->vc1dsp.vc1_inv_trans_8x8_put_signed[!!v->rangeredfrm]; | |||||
| for (i=0; i<6; i++) | for (i=0; i<6; i++) | ||||
| { | { | ||||
| dst_idx += i >> 2; | dst_idx += i >> 2; | ||||
| @@ -2447,9 +2443,9 @@ static int vc1_decode_p_mb(VC1Context *v) | |||||
| vc1_decode_intra_block(v, s->block[i], i, is_coded[i], mquant, (i&4)?v->codingset2:v->codingset); | vc1_decode_intra_block(v, s->block[i], i, is_coded[i], mquant, (i&4)?v->codingset2:v->codingset); | ||||
| if((i>3) && (s->flags & CODEC_FLAG_GRAY)) continue; | if((i>3) && (s->flags & CODEC_FLAG_GRAY)) continue; | ||||
| idct8x8_fn(s->dest[dst_idx] + off, | |||||
| (i&4)?s->uvlinesize:s->linesize, | |||||
| s->block[i]); | |||||
| v->vc1dsp.vc1_inv_trans_8x8(s->block[i]); | |||||
| if(v->rangeredfrm) for(j = 0; j < 64; j++) s->block[i][j] <<= 1; | |||||
| s->dsp.put_signed_pixels_clamped(s->block[i], s->dest[dst_idx] + off, (i&4)?s->uvlinesize:s->linesize); | |||||
| if(v->pq >= 9 && v->overlap) { | if(v->pq >= 9 && v->overlap) { | ||||
| if(v->c_avail) | if(v->c_avail) | ||||
| v->vc1dsp.vc1_h_overlap(s->dest[dst_idx] + off, i & 4 ? s->uvlinesize : s->linesize); | v->vc1dsp.vc1_h_overlap(s->dest[dst_idx] + off, i & 4 ? s->uvlinesize : s->linesize); | ||||
| @@ -2497,7 +2493,7 @@ static void vc1_decode_b_mb(VC1Context *v) | |||||
| { | { | ||||
| MpegEncContext *s = &v->s; | MpegEncContext *s = &v->s; | ||||
| GetBitContext *gb = &s->gb; | GetBitContext *gb = &s->gb; | ||||
| int i; | |||||
| int i, j; | |||||
| int mb_pos = s->mb_x + s->mb_y * s->mb_stride; | int mb_pos = s->mb_x + s->mb_y * s->mb_stride; | ||||
| int cbp = 0; /* cbp decoding stuff */ | int cbp = 0; /* cbp decoding stuff */ | ||||
| int mqdiff, mquant; /* MB quantization */ | int mqdiff, mquant; /* MB quantization */ | ||||
| @@ -2510,7 +2506,6 @@ static void vc1_decode_b_mb(VC1Context *v) | |||||
| int skipped, direct; | int skipped, direct; | ||||
| int dmv_x[2], dmv_y[2]; | int dmv_x[2], dmv_y[2]; | ||||
| int bmvtype = BMV_TYPE_BACKWARD; | int bmvtype = BMV_TYPE_BACKWARD; | ||||
| vc1_idct_func idct8x8_fn; | |||||
| mquant = v->pq; /* Loosy initialization */ | mquant = v->pq; /* Loosy initialization */ | ||||
| s->mb_intra = 0; | s->mb_intra = 0; | ||||
| @@ -2608,7 +2603,6 @@ static void vc1_decode_b_mb(VC1Context *v) | |||||
| } | } | ||||
| } | } | ||||
| dst_idx = 0; | dst_idx = 0; | ||||
| idct8x8_fn = v->vc1dsp.vc1_inv_trans_8x8_put_signed[!!v->rangeredfrm]; | |||||
| for (i=0; i<6; i++) | for (i=0; i<6; i++) | ||||
| { | { | ||||
| s->dc_val[0][s->block_index[i]] = 0; | s->dc_val[0][s->block_index[i]] = 0; | ||||
| @@ -2626,9 +2620,9 @@ static void vc1_decode_b_mb(VC1Context *v) | |||||
| vc1_decode_intra_block(v, s->block[i], i, val, mquant, (i&4)?v->codingset2:v->codingset); | vc1_decode_intra_block(v, s->block[i], i, val, mquant, (i&4)?v->codingset2:v->codingset); | ||||
| if((i>3) && (s->flags & CODEC_FLAG_GRAY)) continue; | if((i>3) && (s->flags & CODEC_FLAG_GRAY)) continue; | ||||
| idct8x8_fn(s->dest[dst_idx] + off, | |||||
| i & 4 ? s->uvlinesize : s->linesize, | |||||
| s->block[i]); | |||||
| v->vc1dsp.vc1_inv_trans_8x8(s->block[i]); | |||||
| if(v->rangeredfrm) for(j = 0; j < 64; j++) s->block[i][j] <<= 1; | |||||
| s->dsp.put_signed_pixels_clamped(s->block[i], s->dest[dst_idx] + off, i & 4 ? s->uvlinesize : s->linesize); | |||||
| } else if(val) { | } else if(val) { | ||||
| vc1_decode_p_block(v, s->block[i], i, mquant, ttmb, first_block, s->dest[dst_idx] + off, (i&4)?s->uvlinesize:s->linesize, (i&4) && (s->flags & CODEC_FLAG_GRAY), NULL); | vc1_decode_p_block(v, s->block[i], i, mquant, ttmb, first_block, s->dest[dst_idx] + off, (i&4)?s->uvlinesize:s->linesize, (i&4) && (s->flags & CODEC_FLAG_GRAY), NULL); | ||||
| if(!v->ttmbf && ttmb < 8) ttmb = -1; | if(!v->ttmbf && ttmb < 8) ttmb = -1; | ||||
| @@ -2641,12 +2635,11 @@ static void vc1_decode_b_mb(VC1Context *v) | |||||
| */ | */ | ||||
| static void vc1_decode_i_blocks(VC1Context *v) | static void vc1_decode_i_blocks(VC1Context *v) | ||||
| { | { | ||||
| int k; | |||||
| int k, j; | |||||
| MpegEncContext *s = &v->s; | MpegEncContext *s = &v->s; | ||||
| int cbp, val; | int cbp, val; | ||||
| uint8_t *coded_val; | uint8_t *coded_val; | ||||
| int mb_pos; | int mb_pos; | ||||
| vc1_idct_func idct8x8_fn; | |||||
| /* select codingmode used for VLC tables selection */ | /* select codingmode used for VLC tables selection */ | ||||
| switch(v->y_ac_table_index){ | switch(v->y_ac_table_index){ | ||||
| @@ -2681,10 +2674,6 @@ static void vc1_decode_i_blocks(VC1Context *v) | |||||
| s->mb_x = s->mb_y = 0; | s->mb_x = s->mb_y = 0; | ||||
| s->mb_intra = 1; | s->mb_intra = 1; | ||||
| s->first_slice_line = 1; | s->first_slice_line = 1; | ||||
| if(v->pq >= 9 && v->overlap) { | |||||
| idct8x8_fn = v->vc1dsp.vc1_inv_trans_8x8_put_signed[!!v->rangeredfrm]; | |||||
| } else | |||||
| idct8x8_fn = v->vc1dsp.vc1_inv_trans_8x8_put[!!v->rangeredfrm]; | |||||
| for(s->mb_y = 0; s->mb_y < s->mb_height; s->mb_y++) { | for(s->mb_y = 0; s->mb_y < s->mb_height; s->mb_y++) { | ||||
| s->mb_x = 0; | s->mb_x = 0; | ||||
| ff_init_block_index(s); | ff_init_block_index(s); | ||||
| @@ -2721,9 +2710,14 @@ static void vc1_decode_i_blocks(VC1Context *v) | |||||
| vc1_decode_i_block(v, s->block[k], k, val, (k<4)? v->codingset : v->codingset2); | vc1_decode_i_block(v, s->block[k], k, val, (k<4)? v->codingset : v->codingset2); | ||||
| if (k > 3 && (s->flags & CODEC_FLAG_GRAY)) continue; | if (k > 3 && (s->flags & CODEC_FLAG_GRAY)) continue; | ||||
| idct8x8_fn(dst[k], | |||||
| k & 4 ? s->uvlinesize : s->linesize, | |||||
| s->block[k]); | |||||
| v->vc1dsp.vc1_inv_trans_8x8(s->block[k]); | |||||
| if(v->pq >= 9 && v->overlap) { | |||||
| if (v->rangeredfrm) for(j = 0; j < 64; j++) s->block[k][j] <<= 1; | |||||
| s->dsp.put_signed_pixels_clamped(s->block[k], dst[k], k & 4 ? s->uvlinesize : s->linesize); | |||||
| } else { | |||||
| if (v->rangeredfrm) for(j = 0; j < 64; j++) s->block[k][j] = (s->block[k][j] - 64) << 1; | |||||
| s->dsp.put_pixels_clamped(s->block[k], dst[k], k & 4 ? s->uvlinesize : s->linesize); | |||||
| } | |||||
| } | } | ||||
| if(v->pq >= 9 && v->overlap) { | if(v->pq >= 9 && v->overlap) { | ||||
| @@ -2781,7 +2775,6 @@ static void vc1_decode_i_blocks_adv(VC1Context *v, int mby_start, int mby_end) | |||||
| int mqdiff; | int mqdiff; | ||||
| int overlap; | int overlap; | ||||
| GetBitContext *gb = &s->gb; | GetBitContext *gb = &s->gb; | ||||
| vc1_idct_func idct8x8_fn; | |||||
| /* select codingmode used for VLC tables selection */ | /* select codingmode used for VLC tables selection */ | ||||
| switch(v->y_ac_table_index){ | switch(v->y_ac_table_index){ | ||||
| @@ -2819,7 +2812,6 @@ static void vc1_decode_i_blocks_adv(VC1Context *v, int mby_start, int mby_end) | |||||
| memset(&s->coded_block[s->block_index[0]-s->b8_stride], 0, | memset(&s->coded_block[s->block_index[0]-s->b8_stride], 0, | ||||
| s->b8_stride * sizeof(*s->coded_block)); | s->b8_stride * sizeof(*s->coded_block)); | ||||
| } | } | ||||
| idct8x8_fn = v->vc1dsp.vc1_inv_trans_8x8_put_signed[0]; | |||||
| for(; s->mb_y < mby_end; s->mb_y++) { | for(; s->mb_y < mby_end; s->mb_y++) { | ||||
| s->mb_x = 0; | s->mb_x = 0; | ||||
| ff_init_block_index(s); | ff_init_block_index(s); | ||||
| @@ -2876,9 +2868,9 @@ static void vc1_decode_i_blocks_adv(VC1Context *v, int mby_start, int mby_end) | |||||
| vc1_decode_i_block_adv(v, s->block[k], k, val, (k<4)? v->codingset : v->codingset2, mquant); | vc1_decode_i_block_adv(v, s->block[k], k, val, (k<4)? v->codingset : v->codingset2, mquant); | ||||
| if (k > 3 && (s->flags & CODEC_FLAG_GRAY)) continue; | if (k > 3 && (s->flags & CODEC_FLAG_GRAY)) continue; | ||||
| idct8x8_fn(dst[k], | |||||
| k & 4 ? s->uvlinesize : s->linesize, | |||||
| s->block[k]); | |||||
| v->vc1dsp.vc1_inv_trans_8x8(s->block[k]); | |||||
| s->dsp.put_signed_pixels_clamped(s->block[k], dst[k], | |||||
| k & 4 ? s->uvlinesize : s->linesize); | |||||
| } | } | ||||
| if(overlap) { | if(overlap) { | ||||
| @@ -199,7 +199,7 @@ static void vc1_inv_trans_8x8_dc_c(uint8_t *dest, int linesize, DCTELEM *block) | |||||
| } | } | ||||
| } | } | ||||
| static av_always_inline void vc1_inv_trans_8x8_c(DCTELEM block[64], int shl, int sub) | |||||
| static void vc1_inv_trans_8x8_c(DCTELEM block[64]) | |||||
| { | { | ||||
| int i; | int i; | ||||
| register int t1,t2,t3,t4,t5,t6,t7,t8; | register int t1,t2,t3,t4,t5,t6,t7,t8; | ||||
| @@ -254,50 +254,20 @@ static av_always_inline void vc1_inv_trans_8x8_c(DCTELEM block[64], int shl, int | |||||
| t3 = 9 * src[ 8] - 16 * src[24] + 4 * src[40] + 15 * src[56]; | t3 = 9 * src[ 8] - 16 * src[24] + 4 * src[40] + 15 * src[56]; | ||||
| t4 = 4 * src[ 8] - 9 * src[24] + 15 * src[40] - 16 * src[56]; | t4 = 4 * src[ 8] - 9 * src[24] + 15 * src[40] - 16 * src[56]; | ||||
| dst[ 0] = (((t5 + t1 ) >> 7) - sub) << shl; | |||||
| dst[ 8] = (((t6 + t2 ) >> 7) - sub) << shl; | |||||
| dst[16] = (((t7 + t3 ) >> 7) - sub) << shl; | |||||
| dst[24] = (((t8 + t4 ) >> 7) - sub) << shl; | |||||
| dst[32] = (((t8 - t4 + 1) >> 7) - sub) << shl; | |||||
| dst[40] = (((t7 - t3 + 1) >> 7) - sub) << shl; | |||||
| dst[48] = (((t6 - t2 + 1) >> 7) - sub) << shl; | |||||
| dst[56] = (((t5 - t1 + 1) >> 7) - sub) << shl; | |||||
| dst[ 0] = (t5 + t1) >> 7; | |||||
| dst[ 8] = (t6 + t2) >> 7; | |||||
| dst[16] = (t7 + t3) >> 7; | |||||
| dst[24] = (t8 + t4) >> 7; | |||||
| dst[32] = (t8 - t4 + 1) >> 7; | |||||
| dst[40] = (t7 - t3 + 1) >> 7; | |||||
| dst[48] = (t6 - t2 + 1) >> 7; | |||||
| dst[56] = (t5 - t1 + 1) >> 7; | |||||
| src++; | src++; | ||||
| dst++; | dst++; | ||||
| } | } | ||||
| } | } | ||||
| static void vc1_inv_trans_8x8_add_c(uint8_t *dest, int linesize, DCTELEM *block) | |||||
| { | |||||
| vc1_inv_trans_8x8_c(block, 0, 0); | |||||
| ff_add_pixels_clamped_c(block, dest, linesize); | |||||
| } | |||||
| static void vc1_inv_trans_8x8_put_signed_c(uint8_t *dest, int linesize, DCTELEM *block) | |||||
| { | |||||
| vc1_inv_trans_8x8_c(block, 0, 0); | |||||
| ff_put_signed_pixels_clamped_c(block, dest, linesize); | |||||
| } | |||||
| static void vc1_inv_trans_8x8_put_signed_rangered_c(uint8_t *dest, int linesize, DCTELEM *block) | |||||
| { | |||||
| vc1_inv_trans_8x8_c(block, 1, 0); | |||||
| ff_put_signed_pixels_clamped_c(block, dest, linesize); | |||||
| } | |||||
| static void vc1_inv_trans_8x8_put_c(uint8_t *dest, int linesize, DCTELEM *block) | |||||
| { | |||||
| vc1_inv_trans_8x8_c(block, 0, 0); | |||||
| ff_put_pixels_clamped_c(block, dest, linesize); | |||||
| } | |||||
| static void vc1_inv_trans_8x8_put_rangered_c(uint8_t *dest, int linesize, DCTELEM *block) | |||||
| { | |||||
| vc1_inv_trans_8x8_c(block, 1, 64); | |||||
| ff_put_pixels_clamped_c(block, dest, linesize); | |||||
| } | |||||
| /** Do inverse transform on 8x4 part of block | /** Do inverse transform on 8x4 part of block | ||||
| */ | */ | ||||
| static void vc1_inv_trans_8x4_dc_c(uint8_t *dest, int linesize, DCTELEM *block) | static void vc1_inv_trans_8x4_dc_c(uint8_t *dest, int linesize, DCTELEM *block) | ||||
| @@ -692,11 +662,7 @@ static void avg_no_rnd_vc1_chroma_mc8_c(uint8_t *dst/*align 8*/, uint8_t *src/*a | |||||
| } | } | ||||
| av_cold void ff_vc1dsp_init(VC1DSPContext* dsp) { | av_cold void ff_vc1dsp_init(VC1DSPContext* dsp) { | ||||
| dsp->vc1_inv_trans_8x8_add = vc1_inv_trans_8x8_add_c; | |||||
| dsp->vc1_inv_trans_8x8_put_signed[0] = vc1_inv_trans_8x8_put_signed_c; | |||||
| dsp->vc1_inv_trans_8x8_put_signed[1] = vc1_inv_trans_8x8_put_signed_rangered_c; | |||||
| dsp->vc1_inv_trans_8x8_put[0] = vc1_inv_trans_8x8_put_c; | |||||
| dsp->vc1_inv_trans_8x8_put[1] = vc1_inv_trans_8x8_put_rangered_c; | |||||
| dsp->vc1_inv_trans_8x8 = vc1_inv_trans_8x8_c; | |||||
| dsp->vc1_inv_trans_4x8 = vc1_inv_trans_4x8_c; | dsp->vc1_inv_trans_4x8 = vc1_inv_trans_4x8_c; | ||||
| dsp->vc1_inv_trans_8x4 = vc1_inv_trans_8x4_c; | dsp->vc1_inv_trans_8x4 = vc1_inv_trans_8x4_c; | ||||
| dsp->vc1_inv_trans_4x4 = vc1_inv_trans_4x4_c; | dsp->vc1_inv_trans_4x4 = vc1_inv_trans_4x4_c; | ||||
| @@ -30,13 +30,9 @@ | |||||
| #include "dsputil.h" | #include "dsputil.h" | ||||
| typedef void (*vc1_idct_func)(uint8_t *dest, int line_size, DCTELEM *block); | |||||
| typedef struct VC1DSPContext { | typedef struct VC1DSPContext { | ||||
| /* vc1 functions */ | /* vc1 functions */ | ||||
| vc1_idct_func vc1_inv_trans_8x8_add; | |||||
| vc1_idct_func vc1_inv_trans_8x8_put_signed[2]; | |||||
| vc1_idct_func vc1_inv_trans_8x8_put[2]; | |||||
| void (*vc1_inv_trans_8x8)(DCTELEM *b); | |||||
| void (*vc1_inv_trans_8x4)(uint8_t *dest, int line_size, DCTELEM *block); | void (*vc1_inv_trans_8x4)(uint8_t *dest, int line_size, DCTELEM *block); | ||||
| void (*vc1_inv_trans_4x8)(uint8_t *dest, int line_size, DCTELEM *block); | void (*vc1_inv_trans_4x8)(uint8_t *dest, int line_size, DCTELEM *block); | ||||
| void (*vc1_inv_trans_4x4)(uint8_t *dest, int line_size, DCTELEM *block); | void (*vc1_inv_trans_4x4)(uint8_t *dest, int line_size, DCTELEM *block); | ||||