dequantizers skip trailing zeros msmpeg4 non-intra decoder has its dequantizer "build in" now Originally committed as revision 260 to svn://svn.ffmpeg.org/ffmpeg/trunktags/v0.5
| @@ -83,9 +83,24 @@ static UINT8 simple_mmx_permutation[64]={ | |||
| 0x32, 0x3A, 0x33, 0x3B, 0x36, 0x3E, 0x37, 0x3F, | |||
| }; | |||
| /* used to skip zeros at the end */ | |||
| UINT8 zigzag_end[64]; | |||
| UINT8 permutation[64]; | |||
| //UINT8 invPermutation[64]; | |||
| static void build_zigzag_end() | |||
| { | |||
| int lastIndex; | |||
| int lastIndexAfterPerm=0; | |||
| for(lastIndex=0; lastIndex<64; lastIndex++) | |||
| { | |||
| if(zigzag_direct[lastIndex] > lastIndexAfterPerm) | |||
| lastIndexAfterPerm= zigzag_direct[lastIndex]; | |||
| zigzag_end[lastIndex]= lastIndexAfterPerm + 1; | |||
| } | |||
| } | |||
| void get_pixels_c(DCTELEM *block, const UINT8 *pixels, int line_size) | |||
| { | |||
| DCTELEM *p; | |||
| @@ -509,4 +524,6 @@ void dsputil_init(void) | |||
| block_permute(default_intra_matrix); | |||
| block_permute(default_non_intra_matrix); | |||
| } | |||
| build_zigzag_end(); | |||
| } | |||
| @@ -17,11 +17,14 @@ | |||
| * Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA. | |||
| * | |||
| * Optimized for ia32 cpus by Nick Kurshev <nickols_k@mail.ru> | |||
| * h263 dequantizer by Michael Niedermayer <michaelni@gmx.at> | |||
| */ | |||
| #include "../dsputil.h" | |||
| #include "../mpegvideo.h" | |||
| extern UINT8 zigzag_end[64]; | |||
| #if 0 | |||
| /* XXX: GL: I don't understand why this function needs optimization | |||
| @@ -69,8 +72,8 @@ static const unsigned long long int mm_wone __attribute__ ((aligned(8))) = 0x000 | |||
| static void dct_unquantize_h263_mmx(MpegEncContext *s, | |||
| DCTELEM *block, int n, int qscale) | |||
| { | |||
| int i, level, qmul, qadd; | |||
| int i, level, qmul, qadd, nCoeffs; | |||
| qmul = s->qscale << 1; | |||
| qadd = (s->qscale - 1) | 1; | |||
| @@ -91,10 +94,12 @@ static void dct_unquantize_h263_mmx(MpegEncContext *s, | |||
| block[i] = level; | |||
| } | |||
| } | |||
| nCoeffs=64; | |||
| } else { | |||
| i = 0; | |||
| nCoeffs= zigzag_end[ s->block_last_index[n] ]; | |||
| } | |||
| //printf("%d %d ", qmul, qadd); | |||
| asm volatile( | |||
| "movd %1, %%mm6 \n\t" //qmul | |||
| "packssdw %%mm6, %%mm6 \n\t" | |||
| @@ -138,9 +143,8 @@ asm volatile( | |||
| "movq %%mm1, 8(%0, %3) \n\t" | |||
| "addl $16, %3 \n\t" | |||
| "cmpl $128, %3 \n\t" | |||
| "jb 1b \n\t" | |||
| ::"r" (block), "g"(qmul), "g" (qadd), "r" (2*i) | |||
| "js 1b \n\t" | |||
| ::"r" (block+nCoeffs), "g"(qmul), "g" (qadd), "r" (2*(i-nCoeffs)) | |||
| : "memory" | |||
| ); | |||
| } | |||
| @@ -178,17 +182,22 @@ asm volatile( | |||
| static void dct_unquantize_mpeg1_mmx(MpegEncContext *s, | |||
| DCTELEM *block, int n, int qscale) | |||
| { | |||
| int i, level; | |||
| int i, level, nCoeffs; | |||
| const UINT16 *quant_matrix; | |||
| if(s->alternate_scan) nCoeffs= 64; | |||
| else nCoeffs= nCoeffs= zigzag_end[ s->block_last_index[n] ]; | |||
| if (s->mb_intra) { | |||
| if (n < 4) | |||
| block[0] = block[0] * s->y_dc_scale; | |||
| else | |||
| block[0] = block[0] * s->c_dc_scale; | |||
| if (s->out_format == FMT_H263) { | |||
| /* isnt used anymore (we have a h263 unquantizer since some time) | |||
| if (s->out_format == FMT_H263) { | |||
| i = 1; | |||
| goto unquant_even; | |||
| } | |||
| }*/ | |||
| /* XXX: only mpeg1 */ | |||
| quant_matrix = s->intra_matrix; | |||
| i=1; | |||
| @@ -214,7 +223,7 @@ static void dct_unquantize_mpeg1_mmx(MpegEncContext *s, | |||
| "packssdw %%mm6, %%mm7\n\t" /* mm7 = qscale | qscale | qscale | qscale */ | |||
| "pxor %%mm6, %%mm6\n\t" | |||
| ::"g"(qscale),"m"(mm_wone),"m"(mm_wabs):"memory"); | |||
| for(;i<64;i+=4) { | |||
| for(;i<nCoeffs;i+=4) { | |||
| __asm __volatile( | |||
| "movq %1, %%mm0\n\t" | |||
| "movq %%mm7, %%mm1\n\t" | |||
| @@ -258,7 +267,6 @@ static void dct_unquantize_mpeg1_mmx(MpegEncContext *s, | |||
| } | |||
| i++; | |||
| } | |||
| asm volatile( | |||
| "pcmpeqw %%mm7, %%mm7 \n\t" | |||
| "psrlw $15, %%mm7 \n\t" | |||
| @@ -307,9 +315,8 @@ asm volatile( | |||
| "movq %%mm5, 8(%0, %3) \n\t" | |||
| "addl $16, %3 \n\t" | |||
| "cmpl $128, %3 \n\t" | |||
| "jb 1b \n\t" | |||
| ::"r" (block), "r"(quant_matrix), "g" (qscale), "r" (2*i) | |||
| "js 1b \n\t" | |||
| ::"r" (block+nCoeffs), "r"(quant_matrix+nCoeffs), "g" (qscale), "r" (2*(i-nCoeffs)) | |||
| : "memory" | |||
| ); | |||
| } | |||
| @@ -68,6 +68,8 @@ static UINT8 h263_chroma_roundtab[16] = { | |||
| /* default motion estimation */ | |||
| int motion_estimation_method = ME_LOG; | |||
| extern UINT8 zigzag_end[64]; | |||
| /* XXX: should use variable shift ? */ | |||
| #define QMAT_SHIFT_MMX 19 | |||
| #define QMAT_SHIFT 25 | |||
| @@ -674,7 +676,8 @@ static inline void add_dct(MpegEncContext *s, | |||
| { | |||
| if (s->block_last_index[i] >= 0) { | |||
| if (!s->mpeg2) | |||
| s->dct_unquantize(s, block, i, s->qscale); | |||
| if(s->encoding || s->avctx==NULL || s->avctx->codec->id!=CODEC_ID_MSMPEG4) | |||
| s->dct_unquantize(s, block, i, s->qscale); | |||
| ff_idct (block); | |||
| add_pixels_clamped(block, dest, line_size); | |||
| } | |||
| @@ -1206,9 +1209,12 @@ static int dct_quantize_mmx(MpegEncContext *s, | |||
| static void dct_unquantize_mpeg1_c(MpegEncContext *s, | |||
| DCTELEM *block, int n, int qscale) | |||
| { | |||
| int i, level; | |||
| int i, level, nCoeffs; | |||
| const UINT16 *quant_matrix; | |||
| if(s->alternate_scan) nCoeffs= 64; | |||
| else nCoeffs= s->block_last_index[n]+1; | |||
| if (s->mb_intra) { | |||
| if (n < 4) | |||
| block[0] = block[0] * s->y_dc_scale; | |||
| @@ -1216,47 +1222,49 @@ static void dct_unquantize_mpeg1_c(MpegEncContext *s, | |||
| block[0] = block[0] * s->c_dc_scale; | |||
| /* XXX: only mpeg1 */ | |||
| quant_matrix = s->intra_matrix; | |||
| for(i=1;i<64;i++) { | |||
| level = block[i]; | |||
| for(i=1;i<nCoeffs;i++) { | |||
| int j= zigzag_direct[i]; | |||
| level = block[j]; | |||
| if (level) { | |||
| if (level < 0) { | |||
| level = -level; | |||
| level = (int)(level * qscale * quant_matrix[i]) >> 3; | |||
| level = (int)(level * qscale * quant_matrix[j]) >> 3; | |||
| level = (level - 1) | 1; | |||
| level = -level; | |||
| } else { | |||
| level = (int)(level * qscale * quant_matrix[i]) >> 3; | |||
| level = (int)(level * qscale * quant_matrix[j]) >> 3; | |||
| level = (level - 1) | 1; | |||
| } | |||
| #ifdef PARANOID | |||
| if (level < -2048 || level > 2047) | |||
| fprintf(stderr, "unquant error %d %d\n", i, level); | |||
| #endif | |||
| block[i] = level; | |||
| block[j] = level; | |||
| } | |||
| } | |||
| } else { | |||
| i = 0; | |||
| quant_matrix = s->non_intra_matrix; | |||
| for(;i<64;i++) { | |||
| level = block[i]; | |||
| for(i=1;i<nCoeffs;i++) { | |||
| int j= zigzag_direct[i]; | |||
| level = block[j]; | |||
| if (level) { | |||
| if (level < 0) { | |||
| level = -level; | |||
| level = (((level << 1) + 1) * qscale * | |||
| ((int) (quant_matrix[i]))) >> 4; | |||
| ((int) (quant_matrix[j]))) >> 4; | |||
| level = (level - 1) | 1; | |||
| level = -level; | |||
| } else { | |||
| level = (((level << 1) + 1) * qscale * | |||
| ((int) (quant_matrix[i]))) >> 4; | |||
| ((int) (quant_matrix[j]))) >> 4; | |||
| level = (level - 1) | 1; | |||
| } | |||
| #ifdef PARANOID | |||
| if (level < -2048 || level > 2047) | |||
| fprintf(stderr, "unquant error %d %d\n", i, level); | |||
| #endif | |||
| block[i] = level; | |||
| block[j] = level; | |||
| } | |||
| } | |||
| } | |||
| @@ -1266,6 +1274,7 @@ static void dct_unquantize_h263_c(MpegEncContext *s, | |||
| DCTELEM *block, int n, int qscale) | |||
| { | |||
| int i, level, qmul, qadd; | |||
| int nCoeffs; | |||
| if (s->mb_intra) { | |||
| if (n < 4) | |||
| @@ -1273,14 +1282,16 @@ static void dct_unquantize_h263_c(MpegEncContext *s, | |||
| else | |||
| block[0] = block[0] * s->c_dc_scale; | |||
| i = 1; | |||
| nCoeffs= 64; //does not allways use zigzag table | |||
| } else { | |||
| i = 0; | |||
| nCoeffs= zigzag_end[ s->block_last_index[n] ]; | |||
| } | |||
| qmul = s->qscale << 1; | |||
| qadd = (s->qscale - 1) | 1; | |||
| for(;i<64;i++) { | |||
| for(;i<nCoeffs;i++) { | |||
| level = block[i]; | |||
| if (level) { | |||
| if (level < 0) { | |||
| @@ -630,6 +630,7 @@ static int decode012(GetBitContext *gb) | |||
| int msmpeg4_decode_picture_header(MpegEncContext * s) | |||
| { | |||
| int code; | |||
| static int weirdAl=0; | |||
| s->pict_type = get_bits(&s->gb, 2) + 1; | |||
| if (s->pict_type != I_TYPE && | |||
| @@ -642,6 +643,7 @@ int msmpeg4_decode_picture_header(MpegEncContext * s) | |||
| code = get_bits(&s->gb, 5); | |||
| /* 0x17: one slice, 0x18: three slices */ | |||
| /* XXX: implement it */ | |||
| //printf("%d %d %d\n", code, s->slice_height, s->first_slice_line); | |||
| if (code < 0x17) | |||
| return -1; | |||
| s->slice_height = s->mb_height / (code - 0x16); | |||
| @@ -650,6 +652,11 @@ int msmpeg4_decode_picture_header(MpegEncContext * s) | |||
| s->dc_table_index = get_bits1(&s->gb); | |||
| s->no_rounding = 1; | |||
| /* printf(" %d %d %d %d \n", | |||
| s->qscale, | |||
| s->rl_chroma_table_index, | |||
| s->rl_table_index, | |||
| s->dc_table_index);*/ | |||
| } else { | |||
| s->use_skip_mb_code = get_bits1(&s->gb); | |||
| @@ -659,7 +666,16 @@ int msmpeg4_decode_picture_header(MpegEncContext * s) | |||
| s->dc_table_index = get_bits1(&s->gb); | |||
| s->mv_table_index = get_bits1(&s->gb); | |||
| s->no_rounding ^= 1; | |||
| /* printf(" %d %d %d %d %d \n", | |||
| s->use_skip_mb_code, | |||
| s->rl_table_index, | |||
| s->rl_chroma_table_index, | |||
| s->dc_table_index, | |||
| s->mv_table_index);*/ | |||
| if(weirdAl) | |||
| s->no_rounding = 0; | |||
| else | |||
| s->no_rounding ^= 1; | |||
| } | |||
| #ifdef DEBUG | |||
| printf("*****frame %d:\n", frame_count++); | |||
| @@ -785,8 +801,12 @@ static int msmpeg4_decode_block(MpegEncContext * s, DCTELEM * block, | |||
| int dc_pred_dir; | |||
| RLTable *rl; | |||
| const UINT8 *scan_table; | |||
| int qmul, qadd; | |||
| if (s->mb_intra) { | |||
| qmul=1; | |||
| qadd=0; | |||
| /* DC coef */ | |||
| set_stat(ST_DC); | |||
| level = msmpeg4_decode_dc(s, n, &dc_pred_dir); | |||
| @@ -798,6 +818,7 @@ static int msmpeg4_decode_block(MpegEncContext * s, DCTELEM * block, | |||
| } else { | |||
| rl = &rl_table[3 + s->rl_chroma_table_index]; | |||
| } | |||
| run_diff = 0; | |||
| i = 1; | |||
| if (!coded) { | |||
| @@ -813,6 +834,8 @@ static int msmpeg4_decode_block(MpegEncContext * s, DCTELEM * block, | |||
| } | |||
| set_stat(ST_INTRA_AC); | |||
| } else { | |||
| qmul = s->qscale << 1; | |||
| qadd = (s->qscale - 1) | 1; | |||
| i = 0; | |||
| rl = &rl_table[3 + s->rl_table_index]; | |||
| run_diff = 1; | |||
| @@ -837,13 +860,15 @@ static int msmpeg4_decode_block(MpegEncContext * s, DCTELEM * block, | |||
| run = get_bits(&s->gb, 6); | |||
| level = get_bits(&s->gb, 8); | |||
| level = (level << 24) >> 24; /* sign extend */ | |||
| if(level>0) level= level * qmul + qadd; | |||
| else level= level * qmul - qadd; | |||
| } else { | |||
| /* second escape */ | |||
| code = get_vlc(&s->gb, &rl->vlc); | |||
| if (code < 0 || code >= rl->n) | |||
| return -1; | |||
| run = rl->table_run[code]; | |||
| level = rl->table_level[code]; | |||
| level = rl->table_level[code] * qmul + qadd; | |||
| last = code >= rl->last; | |||
| run += rl->max_run[last][level] + run_diff; | |||
| if (get_bits1(&s->gb)) | |||
| @@ -858,12 +883,13 @@ static int msmpeg4_decode_block(MpegEncContext * s, DCTELEM * block, | |||
| level = rl->table_level[code]; | |||
| last = code >= rl->last; | |||
| level += rl->max_level[last][run]; | |||
| level= level * qmul + qadd; | |||
| if (get_bits1(&s->gb)) | |||
| level = -level; | |||
| } | |||
| } else { | |||
| run = rl->table_run[code]; | |||
| level = rl->table_level[code]; | |||
| level = rl->table_level[code] * qmul + qadd; | |||
| last = code >= rl->last; | |||
| if (get_bits1(&s->gb)) | |||
| level = -level; | |||