Regression in r26336-7. Originally committed as revision 26341 to svn://svn.ffmpeg.org/ffmpeg/trunktags/n0.8
| @@ -65,7 +65,7 @@ void ff_h264_idct8_add4_c(uint8_t *dst, const int *blockoffset, DCTELEM *block, | |||||
| void ff_h264_idct_add8_c(uint8_t **dest, const int *blockoffset, DCTELEM *block, int stride, const uint8_t nnzc[6*8]); | void ff_h264_idct_add8_c(uint8_t **dest, const int *blockoffset, DCTELEM *block, int stride, const uint8_t nnzc[6*8]); | ||||
| void ff_h264_luma_dc_dequant_idct_c(DCTELEM *output, DCTELEM *input, int qmul); | void ff_h264_luma_dc_dequant_idct_c(DCTELEM *output, DCTELEM *input, int qmul); | ||||
| void ff_svq3_luma_dc_dequant_idct_c(DCTELEM *output, DCTELEM *input, int qp); | |||||
| void ff_svq3_luma_dc_dequant_idct_c(DCTELEM *block, int qp); | |||||
| void ff_svq3_add_idct_c(uint8_t *dst, DCTELEM *block, int stride, int qp, int dc); | void ff_svq3_add_idct_c(uint8_t *dst, DCTELEM *block, int stride, int qp, int dc); | ||||
| void ff_vector_fmul_window_c(float *dst, const float *src0, const float *src1, | void ff_vector_fmul_window_c(float *dst, const float *src0, const float *src1, | ||||
| @@ -1203,8 +1203,8 @@ static av_always_inline void hl_decode_mb_internal(H264Context *h, int simple){ | |||||
| } | } | ||||
| }else{ | }else{ | ||||
| h->hpc.pred16x16[ h->intra16x16_pred_mode ](dest_y , linesize); | h->hpc.pred16x16[ h->intra16x16_pred_mode ](dest_y , linesize); | ||||
| if(h->non_zero_count_cache[ scan8[LUMA_DC_BLOCK_INDEX] ]){ | |||||
| if(is_h264){ | |||||
| if(is_h264){ | |||||
| if(h->non_zero_count_cache[ scan8[LUMA_DC_BLOCK_INDEX] ]){ | |||||
| if(!transform_bypass) | if(!transform_bypass) | ||||
| h->h264dsp.h264_luma_dc_dequant_idct(h->mb, h->mb_luma_dc, h->dequant4_coeff[0][s->qscale][0]); | h->h264dsp.h264_luma_dc_dequant_idct(h->mb, h->mb_luma_dc, h->dequant4_coeff[0][s->qscale][0]); | ||||
| else{ | else{ | ||||
| @@ -1213,9 +1213,9 @@ static av_always_inline void hl_decode_mb_internal(H264Context *h, int simple){ | |||||
| for(i = 0; i < 16; i++) | for(i = 0; i < 16; i++) | ||||
| h->mb[dc_mapping[i]] = h->mb_luma_dc[i]; | h->mb[dc_mapping[i]] = h->mb_luma_dc[i]; | ||||
| } | } | ||||
| }else | |||||
| ff_svq3_luma_dc_dequant_idct_c(h->mb, h->mb_luma_dc, s->qscale); | |||||
| } | |||||
| } | |||||
| }else | |||||
| ff_svq3_luma_dc_dequant_idct_c(h->mb, s->qscale); | |||||
| } | } | ||||
| if(h->deblocking_filter) | if(h->deblocking_filter) | ||||
| xchg_mb_border(h, dest_y, dest_cb, dest_cr, linesize, uvlinesize, 0, simple); | xchg_mb_border(h, dest_y, dest_cb, dest_cr, linesize, uvlinesize, 0, simple); | ||||
| @@ -1283,15 +1283,17 @@ static av_always_inline void hl_decode_mb_internal(H264Context *h, int simple){ | |||||
| } | } | ||||
| } | } | ||||
| }else{ | }else{ | ||||
| if(h->non_zero_count_cache[ scan8[CHROMA_DC_BLOCK_INDEX+0] ]) | |||||
| chroma_dc_dequant_idct_c(h->mb + 16*16 , h->chroma_qp[0], h->dequant4_coeff[IS_INTRA(mb_type) ? 1:4][h->chroma_qp[0]][0]); | |||||
| if(h->non_zero_count_cache[ scan8[CHROMA_DC_BLOCK_INDEX+1] ]) | |||||
| chroma_dc_dequant_idct_c(h->mb + 16*16+4*16, h->chroma_qp[1], h->dequant4_coeff[IS_INTRA(mb_type) ? 2:5][h->chroma_qp[1]][0]); | |||||
| if(is_h264){ | if(is_h264){ | ||||
| if(h->non_zero_count_cache[ scan8[CHROMA_DC_BLOCK_INDEX+0] ]) | |||||
| chroma_dc_dequant_idct_c(h->mb + 16*16 , h->chroma_qp[0], h->dequant4_coeff[IS_INTRA(mb_type) ? 1:4][h->chroma_qp[0]][0]); | |||||
| if(h->non_zero_count_cache[ scan8[CHROMA_DC_BLOCK_INDEX+1] ]) | |||||
| chroma_dc_dequant_idct_c(h->mb + 16*16+4*16, h->chroma_qp[1], h->dequant4_coeff[IS_INTRA(mb_type) ? 2:5][h->chroma_qp[1]][0]); | |||||
| h->h264dsp.h264_idct_add8(dest, block_offset, | h->h264dsp.h264_idct_add8(dest, block_offset, | ||||
| h->mb, uvlinesize, | h->mb, uvlinesize, | ||||
| h->non_zero_count_cache); | h->non_zero_count_cache); | ||||
| }else{ | }else{ | ||||
| chroma_dc_dequant_idct_c(h->mb + 16*16 , h->chroma_qp[0], h->dequant4_coeff[IS_INTRA(mb_type) ? 1:4][h->chroma_qp[0]][0]); | |||||
| chroma_dc_dequant_idct_c(h->mb + 16*16+4*16, h->chroma_qp[1], h->dequant4_coeff[IS_INTRA(mb_type) ? 2:5][h->chroma_qp[1]][0]); | |||||
| for(i=16; i<16+8; i++){ | for(i=16; i<16+8; i++){ | ||||
| if(h->non_zero_count_cache[ scan8[i] ] || h->mb[i*16]){ | if(h->non_zero_count_cache[ scan8[i] ] || h->mb[i*16]){ | ||||
| uint8_t * const ptr= dest[(i&4)>>2] + block_offset[i]; | uint8_t * const ptr= dest[(i&4)>>2] + block_offset[i]; | ||||
| @@ -126,19 +126,21 @@ static const uint32_t svq3_dequant_coeff[32] = { | |||||
| }; | }; | ||||
| void ff_svq3_luma_dc_dequant_idct_c(DCTELEM *output, DCTELEM *input, int qp) | |||||
| void ff_svq3_luma_dc_dequant_idct_c(DCTELEM *block, int qp) | |||||
| { | { | ||||
| const int qmul = svq3_dequant_coeff[qp]; | const int qmul = svq3_dequant_coeff[qp]; | ||||
| #define stride 16 | #define stride 16 | ||||
| int i; | int i; | ||||
| int temp[16]; | int temp[16]; | ||||
| static const int x_offset[4] = {0, 1*stride, 4* stride, 5*stride}; | static const int x_offset[4] = {0, 1*stride, 4* stride, 5*stride}; | ||||
| static const int y_offset[4] = {0, 2*stride, 8* stride, 10*stride}; | |||||
| for (i = 0; i < 4; i++){ | for (i = 0; i < 4; i++){ | ||||
| const int z0= 13*(input[4*i+0] + input[4*i+1]); | |||||
| const int z1= 13*(input[4*i+0] - input[4*i+1]); | |||||
| const int z2= 7* input[4*i+2] - 17*input[4*i+3]; | |||||
| const int z3= 17* input[4*i+2] + 7*input[4*i+3]; | |||||
| const int offset = y_offset[i]; | |||||
| const int z0 = 13*(block[offset+stride*0] + block[offset+stride*4]); | |||||
| const int z1 = 13*(block[offset+stride*0] - block[offset+stride*4]); | |||||
| const int z2 = 7* block[offset+stride*1] - 17*block[offset+stride*5]; | |||||
| const int z3 = 17* block[offset+stride*1] + 7*block[offset+stride*5]; | |||||
| temp[4*i+0] = z0+z3; | temp[4*i+0] = z0+z3; | ||||
| temp[4*i+1] = z1+z2; | temp[4*i+1] = z1+z2; | ||||
| @@ -153,10 +155,10 @@ void ff_svq3_luma_dc_dequant_idct_c(DCTELEM *output, DCTELEM *input, int qp) | |||||
| const int z2 = 7* temp[4*1+i] - 17*temp[4*3+i]; | const int z2 = 7* temp[4*1+i] - 17*temp[4*3+i]; | ||||
| const int z3 = 17* temp[4*1+i] + 7*temp[4*3+i]; | const int z3 = 17* temp[4*1+i] + 7*temp[4*3+i]; | ||||
| output[stride*0 +offset] = ((z0 + z3)*qmul + 0x80000) >> 20; | |||||
| output[stride*2 +offset] = ((z1 + z2)*qmul + 0x80000) >> 20; | |||||
| output[stride*8 +offset] = ((z1 - z2)*qmul + 0x80000) >> 20; | |||||
| output[stride*10+offset] = ((z0 - z3)*qmul + 0x80000) >> 20; | |||||
| block[stride*0 +offset] = ((z0 + z3)*qmul + 0x80000) >> 20; | |||||
| block[stride*2 +offset] = ((z1 + z2)*qmul + 0x80000) >> 20; | |||||
| block[stride*8 +offset] = ((z1 - z2)*qmul + 0x80000) >> 20; | |||||
| block[stride*10+offset] = ((z0 - z3)*qmul + 0x80000) >> 20; | |||||
| } | } | ||||
| } | } | ||||
| #undef stride | #undef stride | ||||