Fix SVQ3

Regression in r26336-7. Originally committed as revision 26341 to svn://svn.ffmpeg.org/ffmpeg/trunk
14 years ago · 2e18660115
--- a/libavcodec/dsputil.h
+++ b/libavcodec/dsputil.h
@@ -65,7 +65,7 @@ void ff_h264_idct8_add4_c(uint8_t *dst, const int *blockoffset, DCTELEM *block,
 void ff_h264_idct_add8_c(uint8_t **dest, const int *blockoffset, DCTELEM *block, int stride, const uint8_t nnzc[6*8]);

 void ff_h264_luma_dc_dequant_idct_c(DCTELEM *output, DCTELEM *input, int qmul);
 void ff_svq3_luma_dc_dequant_idct_c(DCTELEM *output, DCTELEM *input, int qp);
 void ff_svq3_luma_dc_dequant_idct_c(DCTELEM *block, int qp);
 void ff_svq3_add_idct_c(uint8_t *dst, DCTELEM *block, int stride, int qp, int dc);

 void ff_vector_fmul_window_c(float *dst, const float *src0, const float *src1,
--- a/libavcodec/h264.c
+++ b/libavcodec/h264.c
@@ -1203,8 +1203,8 @@ static av_always_inline void hl_decode_mb_internal(H264Context *h, int simple){
                }
            }else{
                h->hpc.pred16x16[ h->intra16x16_pred_mode ](dest_y , linesize);
                if(h->non_zero_count_cache[ scan8[LUMA_DC_BLOCK_INDEX] ]){
                    if(is_h264){
                if(is_h264){
                    if(h->non_zero_count_cache[ scan8[LUMA_DC_BLOCK_INDEX] ]){
                        if(!transform_bypass)
                            h->h264dsp.h264_luma_dc_dequant_idct(h->mb, h->mb_luma_dc, h->dequant4_coeff[0][s->qscale][0]);
                        else{
@@ -1213,9 +1213,9 @@ static av_always_inline void hl_decode_mb_internal(H264Context *h, int simple){
                            for(i = 0; i < 16; i++)
                                h->mb[dc_mapping[i]] = h->mb_luma_dc[i];
                        }
                    }else
                        ff_svq3_luma_dc_dequant_idct_c(h->mb, h->mb_luma_dc, s->qscale);
                }
                    }
                }else
                    ff_svq3_luma_dc_dequant_idct_c(h->mb, s->qscale);
            }
            if(h->deblocking_filter)
                xchg_mb_border(h, dest_y, dest_cb, dest_cr, linesize, uvlinesize, 0, simple);
@@ -1283,15 +1283,17 @@ static av_always_inline void hl_decode_mb_internal(H264Context *h, int simple){
                    }
                }
            }else{
                if(h->non_zero_count_cache[ scan8[CHROMA_DC_BLOCK_INDEX+0] ])
                    chroma_dc_dequant_idct_c(h->mb + 16*16     , h->chroma_qp[0], h->dequant4_coeff[IS_INTRA(mb_type) ? 1:4][h->chroma_qp[0]][0]);
                if(h->non_zero_count_cache[ scan8[CHROMA_DC_BLOCK_INDEX+1] ])
                    chroma_dc_dequant_idct_c(h->mb + 16*16+4*16, h->chroma_qp[1], h->dequant4_coeff[IS_INTRA(mb_type) ? 2:5][h->chroma_qp[1]][0]);
                if(is_h264){
                    if(h->non_zero_count_cache[ scan8[CHROMA_DC_BLOCK_INDEX+0] ])
                        chroma_dc_dequant_idct_c(h->mb + 16*16     , h->chroma_qp[0], h->dequant4_coeff[IS_INTRA(mb_type) ? 1:4][h->chroma_qp[0]][0]);
                    if(h->non_zero_count_cache[ scan8[CHROMA_DC_BLOCK_INDEX+1] ])
                        chroma_dc_dequant_idct_c(h->mb + 16*16+4*16, h->chroma_qp[1], h->dequant4_coeff[IS_INTRA(mb_type) ? 2:5][h->chroma_qp[1]][0]);
                    h->h264dsp.h264_idct_add8(dest, block_offset,
                                              h->mb, uvlinesize,
                                              h->non_zero_count_cache);
                }else{
                    chroma_dc_dequant_idct_c(h->mb + 16*16     , h->chroma_qp[0], h->dequant4_coeff[IS_INTRA(mb_type) ? 1:4][h->chroma_qp[0]][0]);
                    chroma_dc_dequant_idct_c(h->mb + 16*16+4*16, h->chroma_qp[1], h->dequant4_coeff[IS_INTRA(mb_type) ? 2:5][h->chroma_qp[1]][0]);
                    for(i=16; i<16+8; i++){
                        if(h->non_zero_count_cache[ scan8[i] ] || h->mb[i*16]){
                            uint8_t * const ptr= dest[(i&4)>>2] + block_offset[i];
--- a/libavcodec/svq3.c
+++ b/libavcodec/svq3.c
@@ -126,19 +126,21 @@ static const uint32_t svq3_dequant_coeff[32] = {
 };


 void ff_svq3_luma_dc_dequant_idct_c(DCTELEM *output, DCTELEM *input, int qp)
 void ff_svq3_luma_dc_dequant_idct_c(DCTELEM *block, int qp)
 {
    const int qmul = svq3_dequant_coeff[qp];
 #define stride 16
    int i;
    int temp[16];
    static const int x_offset[4] = {0, 1*stride, 4* stride,  5*stride};
    static const int y_offset[4] = {0, 2*stride, 8* stride, 10*stride};

    for (i = 0; i < 4; i++){
        const int z0= 13*(input[4*i+0] +    input[4*i+1]);
        const int z1= 13*(input[4*i+0] -    input[4*i+1]);
        const int z2=  7* input[4*i+2] - 17*input[4*i+3];
        const int z3= 17* input[4*i+2] +  7*input[4*i+3];
        const int offset = y_offset[i];
        const int z0 = 13*(block[offset+stride*0] +    block[offset+stride*4]);
        const int z1 = 13*(block[offset+stride*0] -    block[offset+stride*4]);
        const int z2 =  7* block[offset+stride*1] - 17*block[offset+stride*5];
        const int z3 = 17* block[offset+stride*1] +  7*block[offset+stride*5];

        temp[4*i+0] = z0+z3;
        temp[4*i+1] = z1+z2;
@@ -153,10 +155,10 @@ void ff_svq3_luma_dc_dequant_idct_c(DCTELEM *output, DCTELEM *input, int qp)
        const int z2 =  7* temp[4*1+i] - 17*temp[4*3+i];
        const int z3 = 17* temp[4*1+i] +  7*temp[4*3+i];

        output[stride*0 +offset] = ((z0 + z3)*qmul + 0x80000) >> 20;
        output[stride*2 +offset] = ((z1 + z2)*qmul + 0x80000) >> 20;
        output[stride*8 +offset] = ((z1 - z2)*qmul + 0x80000) >> 20;
        output[stride*10+offset] = ((z0 - z3)*qmul + 0x80000) >> 20;
        block[stride*0 +offset] = ((z0 + z3)*qmul + 0x80000) >> 20;
        block[stride*2 +offset] = ((z1 + z2)*qmul + 0x80000) >> 20;
        block[stride*8 +offset] = ((z1 - z2)*qmul + 0x80000) >> 20;
        block[stride*10+offset] = ((z0 - z3)*qmul + 0x80000) >> 20;
    }
 }
 #undef stride