rewrote quantizer fixed bias (+10% compression/quality for h263 like codecs) qscale=1 support mpeg1 intra frames looks far less blocky added codec_id field Originally committed as revision 423 to svn://svn.ffmpeg.org/ffmpeg/trunktags/v0.5
| @@ -157,6 +157,9 @@ inline void dprintf(const char* fmt,...) {} | |||
| #endif /* HAVE_AV_CONFIG_H */ | |||
| /* assume b>0 */ | |||
| #define ROUNDED_DIV(a,b) (((a)>0 ? (a) + ((b)>>1) : (a) - ((b)>>1))/(b)) | |||
| /* bit output */ | |||
| struct PutBitContext; | |||
| @@ -904,8 +904,26 @@ void h263_encode_init(MpegEncContext *s) | |||
| s->mv_penalty= mv_penalty; //FIXME exact table for msmpeg4 & h263p | |||
| // use fcodes >1 only for mpeg4 & h263 & h263p FIXME | |||
| if(s->h263_plus) s->fcode_tab= umv_fcode_tab; | |||
| else if(s->h263_pred && !s->h263_msmpeg4) s->fcode_tab= fcode_tab; | |||
| switch(s->codec_id){ | |||
| case CODEC_ID_MPEG4: | |||
| s->fcode_tab= fcode_tab; | |||
| s->min_qcoeff= -2048; | |||
| s->max_qcoeff= 2047; | |||
| break; | |||
| case CODEC_ID_H263P: | |||
| s->fcode_tab= umv_fcode_tab; | |||
| s->min_qcoeff= -128; | |||
| s->max_qcoeff= 127; | |||
| break; | |||
| default: //nothing needed default table allready set in mpegvideo.c | |||
| s->min_qcoeff= -128; | |||
| s->max_qcoeff= 127; | |||
| } | |||
| /* h263 type bias */ | |||
| //FIXME mpeg4 mpeg quantizer | |||
| s->intra_quant_bias=0; | |||
| s->inter_quant_bias=-(1<<(QUANT_BIAS_SHIFT-2)); //(a - x/4)/x | |||
| } | |||
| static void h263_encode_block(MpegEncContext * s, DCTELEM * block, int n) | |||
| @@ -2702,8 +2720,8 @@ int mpeg4_decode_picture_header(MpegEncContext * s) | |||
| s->chroma_intra_matrix[i]= v; | |||
| v= ff_mpeg4_default_non_intra_matrix[i]; | |||
| s->non_intra_matrix[i]= v; | |||
| s->chroma_non_intra_matrix[i]= v; | |||
| s->inter_matrix[i]= v; | |||
| s->chroma_inter_matrix[i]= v; | |||
| } | |||
| /* load custom intra matrix */ | |||
| @@ -2725,15 +2743,15 @@ int mpeg4_decode_picture_header(MpegEncContext * s) | |||
| if(v==0) break; | |||
| j= zigzag_direct[i]; | |||
| s->non_intra_matrix[j]= v; | |||
| s->chroma_non_intra_matrix[j]= v; | |||
| s->inter_matrix[j]= v; | |||
| s->chroma_inter_matrix[j]= v; | |||
| } | |||
| /* replicate last value */ | |||
| for(; i<64; i++){ | |||
| j= zigzag_direct[i]; | |||
| s->non_intra_matrix[j]= v; | |||
| s->chroma_non_intra_matrix[j]= v; | |||
| s->inter_matrix[j]= v; | |||
| s->chroma_inter_matrix[j]= v; | |||
| } | |||
| } | |||
| @@ -73,17 +73,13 @@ static int h263_decode_init(AVCodecContext *avctx) | |||
| default: | |||
| return -1; | |||
| } | |||
| s->codec_id= avctx->codec->id; | |||
| /* for h263, we allocate the images after having read the header */ | |||
| if (avctx->codec->id != CODEC_ID_H263 && avctx->codec->id != CODEC_ID_MPEG4) | |||
| if (MPV_common_init(s) < 0) | |||
| return -1; | |||
| /* XXX: suppress this matrix init, only needed because using mpeg1 | |||
| dequantize in mmx case */ | |||
| for(i=0;i<64;i++) | |||
| s->non_intra_matrix[i] = default_non_intra_matrix[i]; | |||
| if (s->h263_msmpeg4) | |||
| msmpeg4_decode_init_vlc(s); | |||
| else | |||
| @@ -251,7 +247,7 @@ static int h263_decode_frame(AVCodecContext *avctx, | |||
| if(msmpeg4_decode_ext_header(s, buf_size) < 0) return -1; | |||
| /* divx 5.01+ bistream reorder stuff */ | |||
| if(s->h263_pred && s->bitstream_buffer_size==0){ | |||
| if(s->codec_id==CODEC_ID_MPEG4 && s->bitstream_buffer_size==0){ | |||
| int current_pos= get_bits_count(&s->gb)/8; | |||
| if( buf_size - current_pos > 5 | |||
| && buf_size - current_pos < BITSTREAM_BUFFER_SIZE){ | |||
| @@ -26,8 +26,6 @@ | |||
| #include "../mangle.h" | |||
| extern UINT8 zigzag_end[64]; | |||
| extern void (*draw_edges)(UINT8 *buf, int wrap, int width, int height, int w); | |||
| extern int (*dct_quantize)(MpegEncContext *s, DCTELEM *block, int n, int qscale); | |||
| extern UINT8 zigzag_direct_noperm[64]; | |||
| extern UINT16 inv_zigzag_direct16[64]; | |||
| @@ -260,7 +258,7 @@ asm volatile( | |||
| block[0]= block0; | |||
| } else { | |||
| quant_matrix = s->non_intra_matrix; | |||
| quant_matrix = s->inter_matrix; | |||
| asm volatile( | |||
| "pcmpeqw %%mm7, %%mm7 \n\t" | |||
| "psrlw $15, %%mm7 \n\t" | |||
| @@ -382,7 +380,7 @@ asm volatile( | |||
| //Note, we dont do mismatch control for intra as errors cannot accumulate | |||
| } else { | |||
| quant_matrix = s->non_intra_matrix; | |||
| quant_matrix = s->inter_matrix; | |||
| asm volatile( | |||
| "pcmpeqw %%mm7, %%mm7 \n\t" | |||
| "psrlq $48, %%mm7 \n\t" | |||
| @@ -33,149 +33,160 @@ | |||
| static int RENAME(dct_quantize)(MpegEncContext *s, | |||
| DCTELEM *block, int n, | |||
| int qscale) | |||
| int qscale, int *overflow) | |||
| { | |||
| int i, level, last_non_zero_p1, q; | |||
| const UINT16 *qmat; | |||
| int level=0, last_non_zero_p1, q; //=0 is cuz gcc says uninitalized ... | |||
| const UINT16 *qmat, *bias; | |||
| static __align8 INT16 temp_block[64]; | |||
| int minLevel, maxLevel; | |||
| if(s->avctx!=NULL && s->avctx->codec->id==CODEC_ID_MPEG4){ | |||
| /* mpeg4 */ | |||
| minLevel= -2048; | |||
| maxLevel= 2047; | |||
| }else if(s->out_format==FMT_MPEG1){ | |||
| /* mpeg1 */ | |||
| minLevel= -255; | |||
| maxLevel= 255; | |||
| }else if(s->out_format==FMT_MJPEG){ | |||
| /* (m)jpeg */ | |||
| minLevel= -1023; | |||
| maxLevel= 1023; | |||
| }else{ | |||
| /* h263 / msmpeg4 */ | |||
| minLevel= -128; | |||
| maxLevel= 127; | |||
| } | |||
| av_fdct (block); | |||
| if (s->mb_intra) { | |||
| int dummy; | |||
| if (n < 4) | |||
| q = s->y_dc_scale; | |||
| else | |||
| q = s->c_dc_scale; | |||
| /* note: block[0] is assumed to be positive */ | |||
| #if 1 | |||
| asm volatile ( | |||
| "xorl %%edx, %%edx \n\t" | |||
| "mul %%ecx \n\t" | |||
| : "=d" (temp_block[0]), "=a"(dummy) | |||
| : "a" (block[0] + (q >> 1)), "c" (inverse[q]) | |||
| ); | |||
| asm volatile ( | |||
| "xorl %%edx, %%edx \n\t" | |||
| "mul %%ecx \n\t" | |||
| : "=d" (level), "=a"(dummy) | |||
| : "a" (block[0] + (q >> 1)), "c" (inverse[q]) | |||
| ); | |||
| #else | |||
| asm volatile ( | |||
| "xorl %%edx, %%edx \n\t" | |||
| "divw %%cx \n\t" | |||
| "movzwl %%ax, %%eax \n\t" | |||
| : "=a" (temp_block[0]) | |||
| : "a" (block[0] + (q >> 1)), "c" (q) | |||
| : "%edx" | |||
| ); | |||
| asm volatile ( | |||
| "xorl %%edx, %%edx \n\t" | |||
| "divw %%cx \n\t" | |||
| "movzwl %%ax, %%eax \n\t" | |||
| : "=a" (level) | |||
| : "a" (block[0] + (q >> 1)), "c" (q) | |||
| : "%edx" | |||
| ); | |||
| #endif | |||
| block[0]=0; //avoid fake overflow | |||
| // temp_block[0] = (block[0] + (q >> 1)) / q; | |||
| i = 1; | |||
| last_non_zero_p1 = 1; | |||
| if (s->out_format == FMT_H263) { | |||
| qmat = s->q_non_intra_matrix16; | |||
| } else { | |||
| qmat = s->q_intra_matrix16; | |||
| } | |||
| for(i=1;i<4;i++) { | |||
| level = block[i] * qmat[i]; | |||
| level = level / (1 << (QMAT_SHIFT_MMX - 3)); | |||
| /* XXX: currently, this code is not optimal. the range should be: | |||
| mpeg1: -255..255 | |||
| mpeg2: -2048..2047 | |||
| h263: -128..127 | |||
| mpeg4: -2048..2047 | |||
| */ | |||
| if (level > maxLevel) | |||
| level = maxLevel; | |||
| else if (level < minLevel) | |||
| level = minLevel; | |||
| temp_block[i] = level; | |||
| if(level) | |||
| if(last_non_zero_p1 < inv_zigzag_direct16[i]) last_non_zero_p1= inv_zigzag_direct16[i]; | |||
| block[i]=0; | |||
| } | |||
| bias = s->q_intra_matrix16_bias[qscale]; | |||
| qmat = s->q_intra_matrix16[qscale]; | |||
| } else { | |||
| i = 0; | |||
| last_non_zero_p1 = 0; | |||
| qmat = s->q_non_intra_matrix16; | |||
| bias = s->q_inter_matrix16_bias[qscale]; | |||
| qmat = s->q_inter_matrix16[qscale]; | |||
| } | |||
| asm volatile( /* XXX: small rounding bug, but it shouldnt matter */ | |||
| "movd %3, %%mm3 \n\t" | |||
| SPREADW(%%mm3) | |||
| "movd %4, %%mm4 \n\t" | |||
| SPREADW(%%mm4) | |||
| #ifndef HAVE_MMX2 | |||
| "movd %5, %%mm5 \n\t" | |||
| SPREADW(%%mm5) | |||
| #endif | |||
| "pxor %%mm7, %%mm7 \n\t" | |||
| "movd %%eax, %%mm2 \n\t" | |||
| SPREADW(%%mm2) | |||
| "movl %6, %%eax \n\t" | |||
| ".balign 16 \n\t" | |||
| "1: \n\t" | |||
| "movq (%1, %%eax), %%mm0 \n\t" | |||
| "movq (%2, %%eax), %%mm1 \n\t" | |||
| "movq %%mm0, %%mm6 \n\t" | |||
| "psraw $15, %%mm6 \n\t" | |||
| "pmulhw %%mm0, %%mm1 \n\t" | |||
| "psubsw %%mm6, %%mm1 \n\t" | |||
| #ifdef HAVE_MMX2 | |||
| "pminsw %%mm3, %%mm1 \n\t" | |||
| "pmaxsw %%mm4, %%mm1 \n\t" | |||
| #else | |||
| "paddsw %%mm3, %%mm1 \n\t" | |||
| "psubusw %%mm4, %%mm1 \n\t" | |||
| "paddsw %%mm5, %%mm1 \n\t" | |||
| #endif | |||
| "movq %%mm1, (%8, %%eax) \n\t" | |||
| "pcmpeqw %%mm7, %%mm1 \n\t" | |||
| "movq (%7, %%eax), %%mm0 \n\t" | |||
| "movq %%mm7, (%1, %%eax) \n\t" | |||
| "pandn %%mm0, %%mm1 \n\t" | |||
| PMAXW(%%mm1, %%mm2) | |||
| "addl $8, %%eax \n\t" | |||
| " js 1b \n\t" | |||
| "movq %%mm2, %%mm0 \n\t" | |||
| "psrlq $32, %%mm2 \n\t" | |||
| PMAXW(%%mm0, %%mm2) | |||
| "movq %%mm2, %%mm0 \n\t" | |||
| "psrlq $16, %%mm2 \n\t" | |||
| PMAXW(%%mm0, %%mm2) | |||
| "movd %%mm2, %%eax \n\t" | |||
| "movzbl %%al, %%eax \n\t" | |||
| : "+a" (last_non_zero_p1) | |||
| : "r" (block+64), "r" (qmat+64), | |||
| #ifdef HAVE_MMX2 | |||
| "m" (maxLevel), "m" (minLevel), "m" (minLevel /* dummy */), "g" (2*i - 128), | |||
| #else | |||
| "m" (0x7FFF - maxLevel), "m" (0x7FFF -maxLevel + minLevel), "m" (minLevel), "g" (2*i - 128), | |||
| #endif | |||
| "r" (inv_zigzag_direct16+64), "r" (temp_block+64) | |||
| ); | |||
| if(s->out_format == FMT_H263){ | |||
| asm volatile( | |||
| "movd %%eax, %%mm3 \n\t" // last_non_zero_p1 | |||
| SPREADW(%%mm3) | |||
| "pxor %%mm7, %%mm7 \n\t" // 0 | |||
| "pxor %%mm4, %%mm4 \n\t" // 0 | |||
| "movq (%2), %%mm5 \n\t" // qmat[0] | |||
| "pxor %%mm6, %%mm6 \n\t" | |||
| "psubw (%3), %%mm6 \n\t" // -bias[0] | |||
| "movl $-128, %%eax \n\t" | |||
| ".balign 16 \n\t" | |||
| "1: \n\t" | |||
| "pxor %%mm1, %%mm1 \n\t" // 0 | |||
| "movq (%1, %%eax), %%mm0 \n\t" // block[i] | |||
| "pcmpgtw %%mm0, %%mm1 \n\t" // block[i] <= 0 ? 0xFF : 0x00 | |||
| "pxor %%mm1, %%mm0 \n\t" | |||
| "psubw %%mm1, %%mm0 \n\t" // ABS(block[i]) | |||
| "psubusw %%mm6, %%mm0 \n\t" // ABS(block[i]) + bias[0] | |||
| "pmulhw %%mm5, %%mm0 \n\t" // (ABS(block[i])*qmat[0] - bias[0]*qmat[0])>>16 | |||
| "por %%mm0, %%mm4 \n\t" | |||
| "pxor %%mm1, %%mm0 \n\t" | |||
| "psubw %%mm1, %%mm0 \n\t" // out=((ABS(block[i])*qmat[0] - bias[0]*qmat[0])>>16)*sign(block[i]) | |||
| "movq %%mm0, (%5, %%eax) \n\t" | |||
| "pcmpeqw %%mm7, %%mm0 \n\t" // out==0 ? 0xFF : 0x00 | |||
| "movq (%4, %%eax), %%mm1 \n\t" | |||
| "movq %%mm7, (%1, %%eax) \n\t" // 0 | |||
| "pandn %%mm1, %%mm0 \n\t" | |||
| PMAXW(%%mm0, %%mm3) | |||
| "addl $8, %%eax \n\t" | |||
| " js 1b \n\t" | |||
| "movq %%mm3, %%mm0 \n\t" | |||
| "psrlq $32, %%mm3 \n\t" | |||
| PMAXW(%%mm0, %%mm3) | |||
| "movq %%mm3, %%mm0 \n\t" | |||
| "psrlq $16, %%mm3 \n\t" | |||
| PMAXW(%%mm0, %%mm3) | |||
| "movd %%mm3, %%eax \n\t" | |||
| "movzbl %%al, %%eax \n\t" // last_non_zero_p1 | |||
| : "+a" (last_non_zero_p1) | |||
| : "r" (block+64), "r" (qmat), "r" (bias), | |||
| "r" (inv_zigzag_direct16+64), "r" (temp_block+64) | |||
| ); | |||
| // note the asm is split cuz gcc doesnt like that many operands ... | |||
| asm volatile( | |||
| "movd %1, %%mm1 \n\t" // max_qcoeff | |||
| SPREADW(%%mm1) | |||
| "psubusw %%mm1, %%mm4 \n\t" | |||
| "packuswb %%mm4, %%mm4 \n\t" | |||
| "movd %%mm4, %0 \n\t" // *overflow | |||
| : "=g" (*overflow) | |||
| : "g" (s->max_qcoeff) | |||
| ); | |||
| }else{ // FMT_H263 | |||
| asm volatile( | |||
| "movd %%eax, %%mm3 \n\t" // last_non_zero_p1 | |||
| SPREADW(%%mm3) | |||
| "pxor %%mm7, %%mm7 \n\t" // 0 | |||
| "pxor %%mm4, %%mm4 \n\t" // 0 | |||
| "movl $-128, %%eax \n\t" | |||
| ".balign 16 \n\t" | |||
| "1: \n\t" | |||
| "pxor %%mm1, %%mm1 \n\t" // 0 | |||
| "movq (%1, %%eax), %%mm0 \n\t" // block[i] | |||
| "pcmpgtw %%mm0, %%mm1 \n\t" // block[i] <= 0 ? 0xFF : 0x00 | |||
| "pxor %%mm1, %%mm0 \n\t" | |||
| "psubw %%mm1, %%mm0 \n\t" // ABS(block[i]) | |||
| "movq (%3, %%eax), %%mm6 \n\t" // bias[0] | |||
| "paddusw %%mm6, %%mm0 \n\t" // ABS(block[i]) + bias[0] | |||
| "movq (%2, %%eax), %%mm5 \n\t" // qmat[i] | |||
| "pmulhw %%mm5, %%mm0 \n\t" // (ABS(block[i])*qmat[0] + bias[0]*qmat[0])>>16 | |||
| "por %%mm0, %%mm4 \n\t" | |||
| "pxor %%mm1, %%mm0 \n\t" | |||
| "psubw %%mm1, %%mm0 \n\t" // out=((ABS(block[i])*qmat[0] - bias[0]*qmat[0])>>16)*sign(block[i]) | |||
| "movq %%mm0, (%5, %%eax) \n\t" | |||
| "pcmpeqw %%mm7, %%mm0 \n\t" // out==0 ? 0xFF : 0x00 | |||
| "movq (%4, %%eax), %%mm1 \n\t" | |||
| "movq %%mm7, (%1, %%eax) \n\t" // 0 | |||
| "pandn %%mm1, %%mm0 \n\t" | |||
| PMAXW(%%mm0, %%mm3) | |||
| "addl $8, %%eax \n\t" | |||
| " js 1b \n\t" | |||
| "movq %%mm3, %%mm0 \n\t" | |||
| "psrlq $32, %%mm3 \n\t" | |||
| PMAXW(%%mm0, %%mm3) | |||
| "movq %%mm3, %%mm0 \n\t" | |||
| "psrlq $16, %%mm3 \n\t" | |||
| PMAXW(%%mm0, %%mm3) | |||
| "movd %%mm3, %%eax \n\t" | |||
| "movzbl %%al, %%eax \n\t" // last_non_zero_p1 | |||
| : "+a" (last_non_zero_p1) | |||
| : "r" (block+64), "r" (qmat+64), "r" (bias+64), | |||
| "r" (inv_zigzag_direct16+64), "r" (temp_block+64) | |||
| ); | |||
| // note the asm is split cuz gcc doesnt like that many operands ... | |||
| asm volatile( | |||
| "movd %1, %%mm1 \n\t" // max_qcoeff | |||
| SPREADW(%%mm1) | |||
| "psubusw %%mm1, %%mm4 \n\t" | |||
| "packuswb %%mm4, %%mm4 \n\t" | |||
| "movd %%mm4, %0 \n\t" // *overflow | |||
| : "=g" (*overflow) | |||
| : "g" (s->max_qcoeff) | |||
| ); | |||
| } | |||
| if(s->mb_intra) temp_block[0]= level; //FIXME move afer permute | |||
| // last_non_zero_p1=64; | |||
| /* permute for IDCT */ | |||
| asm volatile( | |||
| "movl %0, %%eax \n\t" | |||
| "movl %0, %%eax \n\t" | |||
| "pushl %%ebp \n\t" | |||
| "movl %%esp, " MANGLE(esp_temp) "\n\t" | |||
| "1: \n\t" | |||
| @@ -203,5 +214,6 @@ static int RENAME(dct_quantize)(MpegEncContext *s, | |||
| } | |||
| */ | |||
| //block_permute(block); | |||
| return last_non_zero_p1 - 1; | |||
| } | |||
| @@ -160,6 +160,9 @@ int mjpeg_init(MpegEncContext *s) | |||
| m = malloc(sizeof(MJpegContext)); | |||
| if (!m) | |||
| return -1; | |||
| s->min_qcoeff=-1023; | |||
| s->max_qcoeff= 1023; | |||
| /* build all the huffman tables */ | |||
| build_huffman_codes(m->huff_size_dc_luminance, | |||
| @@ -399,8 +399,11 @@ void mpeg1_encode_init(MpegEncContext *s) | |||
| } | |||
| } | |||
| s->mv_penalty= mv_penalty; | |||
| s->fcode_tab= fcode_tab; | |||
| s->min_qcoeff=-255; | |||
| s->max_qcoeff= 255; | |||
| s->intra_quant_bias= 3<<(QUANT_BIAS_SHIFT-3); //(a + x*3/8)/x | |||
| s->inter_quant_bias= 0; | |||
| } | |||
| static inline void encode_dc(MpegEncContext *s, int diff, int component) | |||
| @@ -1027,9 +1030,9 @@ static int mpeg2_decode_block_non_intra(MpegEncContext *s, | |||
| UINT8 *buf_ptr; | |||
| i = 0; | |||
| if (n < 4) | |||
| matrix = s->non_intra_matrix; | |||
| matrix = s->inter_matrix; | |||
| else | |||
| matrix = s->chroma_non_intra_matrix; | |||
| matrix = s->chroma_inter_matrix; | |||
| /* special case for the first coef. no need to add a second vlc table */ | |||
| SAVE_BITS(&s->gb); | |||
| @@ -1183,6 +1186,7 @@ static int mpeg_decode_init(AVCodecContext *avctx) | |||
| s->buf_ptr = s->buffer; | |||
| s->mpeg_enc_ctx.picture_number = 0; | |||
| s->repeat_field = 0; | |||
| s->mpeg_enc_ctx.codec_id= avctx->codec->id; | |||
| return 0; | |||
| } | |||
| @@ -1292,8 +1296,8 @@ static void mpeg_decode_quant_matrix_extension(MpegEncContext *s) | |||
| for(i=0;i<64;i++) { | |||
| v = get_bits(&s->gb, 8); | |||
| j = zigzag_direct[i]; | |||
| s->non_intra_matrix[j] = v; | |||
| s->chroma_non_intra_matrix[j] = v; | |||
| s->inter_matrix[j] = v; | |||
| s->chroma_inter_matrix[j] = v; | |||
| } | |||
| } | |||
| if (get_bits1(&s->gb)) { | |||
| @@ -1307,7 +1311,7 @@ static void mpeg_decode_quant_matrix_extension(MpegEncContext *s) | |||
| for(i=0;i<64;i++) { | |||
| v = get_bits(&s->gb, 8); | |||
| j = zigzag_direct[i]; | |||
| s->chroma_non_intra_matrix[j] = v; | |||
| s->chroma_inter_matrix[j] = v; | |||
| } | |||
| } | |||
| } | |||
| @@ -1386,7 +1390,6 @@ static int mpeg_decode_slice(AVCodecContext *avctx, | |||
| s->mb_x = -1; | |||
| s->mb_y = start_code; | |||
| s->mb_incr = 0; | |||
| /* start frame decoding */ | |||
| if (s->first_slice) { | |||
| s->first_slice = 0; | |||
| @@ -1526,20 +1529,20 @@ static int mpeg1_decode_sequence(AVCodecContext *avctx, | |||
| for(i=0;i<64;i++) { | |||
| v = get_bits(&s->gb, 8); | |||
| j = zigzag_direct[i]; | |||
| s->non_intra_matrix[j] = v; | |||
| s->chroma_non_intra_matrix[j] = v; | |||
| s->inter_matrix[j] = v; | |||
| s->chroma_inter_matrix[j] = v; | |||
| } | |||
| #ifdef DEBUG | |||
| dprintf("non intra matrix present\n"); | |||
| for(i=0;i<64;i++) | |||
| dprintf(" %d", s->non_intra_matrix[zigzag_direct[i]]); | |||
| dprintf(" %d", s->inter_matrix[zigzag_direct[i]]); | |||
| printf("\n"); | |||
| #endif | |||
| } else { | |||
| for(i=0;i<64;i++) { | |||
| v = default_non_intra_matrix[i]; | |||
| s->non_intra_matrix[i] = v; | |||
| s->chroma_non_intra_matrix[i] = v; | |||
| s->inter_matrix[i] = v; | |||
| s->chroma_inter_matrix[i] = v; | |||
| } | |||
| } | |||
| @@ -1566,7 +1569,7 @@ static int mpeg_decode_frame(AVCodecContext *avctx, | |||
| dprintf("fill_buffer\n"); | |||
| *data_size = 0; | |||
| /* special case for last picture */ | |||
| if (buf_size == 0) { | |||
| if (s2->picture_number > 0) { | |||
| @@ -1591,7 +1594,7 @@ static int mpeg_decode_frame(AVCodecContext *avctx, | |||
| *data_size = sizeof(AVPicture); | |||
| goto the_end; | |||
| } | |||
| while (buf_ptr < buf_end) { | |||
| buf_start = buf_ptr; | |||
| /* find start next code */ | |||
| @@ -38,9 +38,9 @@ static void dct_unquantize_mpeg2_c(MpegEncContext *s, | |||
| static void dct_unquantize_h263_c(MpegEncContext *s, | |||
| DCTELEM *block, int n, int qscale); | |||
| static void draw_edges_c(UINT8 *buf, int wrap, int width, int height, int w); | |||
| static int dct_quantize_c(MpegEncContext *s, DCTELEM *block, int n, int qscale); | |||
| static int dct_quantize_c(MpegEncContext *s, DCTELEM *block, int n, int qscale, int *overflow); | |||
| int (*dct_quantize)(MpegEncContext *s, DCTELEM *block, int n, int qscale)= dct_quantize_c; | |||
| int (*dct_quantize)(MpegEncContext *s, DCTELEM *block, int n, int qscale, int *overflow)= dct_quantize_c; | |||
| void (*draw_edges)(UINT8 *buf, int wrap, int width, int height, int w)= draw_edges_c; | |||
| #define EDGE_WIDTH 16 | |||
| @@ -78,29 +78,38 @@ extern UINT8 zigzag_end[64]; | |||
| /* default motion estimation */ | |||
| int motion_estimation_method = ME_EPZS; | |||
| static void convert_matrix(int *qmat, UINT16 *qmat16, const UINT16 *quant_matrix, int qscale) | |||
| static void convert_matrix(int (*qmat)[64], uint16_t (*qmat16)[64], uint16_t (*qmat16_bias)[64], | |||
| const UINT16 *quant_matrix, int bias) | |||
| { | |||
| int i; | |||
| if (av_fdct == jpeg_fdct_ifast) { | |||
| for(i=0;i<64;i++) { | |||
| /* 16 <= qscale * quant_matrix[i] <= 7905 */ | |||
| /* 19952 <= aanscales[i] * qscale * quant_matrix[i] <= 249205026 */ | |||
| /* (1<<36)/19952 >= (1<<36)/(aanscales[i] * qscale * quant_matrix[i]) >= (1<<36)/249205026 */ | |||
| /* 3444240 >= (1<<36)/(aanscales[i] * qscale * quant_matrix[i]) >= 275 */ | |||
| qmat[block_permute_op(i)] = (int)((UINT64_C(1) << (QMAT_SHIFT + 11)) / | |||
| (aanscales[i] * qscale * quant_matrix[block_permute_op(i)])); | |||
| } | |||
| } else { | |||
| for(i=0;i<64;i++) { | |||
| /* We can safely suppose that 16 <= quant_matrix[i] <= 255 | |||
| So 16 <= qscale * quant_matrix[i] <= 7905 | |||
| so (1<<19) / 16 >= (1<<19) / (qscale * quant_matrix[i]) >= (1<<19) / 7905 | |||
| so 32768 >= (1<<19) / (qscale * quant_matrix[i]) >= 67 | |||
| */ | |||
| qmat[i] = (1 << QMAT_SHIFT_MMX) / (qscale * quant_matrix[i]); | |||
| qmat16[i] = (1 << QMAT_SHIFT_MMX) / (qscale * quant_matrix[block_permute_op(i)]); | |||
| int qscale; | |||
| for(qscale=1; qscale<32; qscale++){ | |||
| int i; | |||
| if (av_fdct == jpeg_fdct_ifast) { | |||
| for(i=0;i<64;i++) { | |||
| const int j= block_permute_op(i); | |||
| /* 16 <= qscale * quant_matrix[i] <= 7905 */ | |||
| /* 19952 <= aanscales[i] * qscale * quant_matrix[i] <= 249205026 */ | |||
| /* (1<<36)/19952 >= (1<<36)/(aanscales[i] * qscale * quant_matrix[i]) >= (1<<36)/249205026 */ | |||
| /* 3444240 >= (1<<36)/(aanscales[i] * qscale * quant_matrix[i]) >= 275 */ | |||
| qmat[qscale][j] = (int)((UINT64_C(1) << (QMAT_SHIFT + 11)) / | |||
| (aanscales[i] * qscale * quant_matrix[j])); | |||
| } | |||
| } else { | |||
| for(i=0;i<64;i++) { | |||
| /* We can safely suppose that 16 <= quant_matrix[i] <= 255 | |||
| So 16 <= qscale * quant_matrix[i] <= 7905 | |||
| so (1<<19) / 16 >= (1<<19) / (qscale * quant_matrix[i]) >= (1<<19) / 7905 | |||
| so 32768 >= (1<<19) / (qscale * quant_matrix[i]) >= 67 | |||
| */ | |||
| qmat [qscale][i] = (1 << QMAT_SHIFT_MMX) / (qscale * quant_matrix[i]); | |||
| qmat16[qscale][i] = (1 << QMAT_SHIFT_MMX) / (qscale * quant_matrix[block_permute_op(i)]); | |||
| if(qmat16[qscale][i]==0 || qmat16[qscale][i]==128*256) qmat16[qscale][i]=128*256-1; | |||
| qmat16_bias[qscale][i]= ROUNDED_DIV(bias<<(16-QUANT_BIAS_SHIFT), qmat16[qscale][i]); | |||
| } | |||
| } | |||
| } | |||
| } | |||
| @@ -388,7 +397,8 @@ int MPV_encode_init(AVCodecContext *avctx) | |||
| s->max_b_frames= avctx->max_b_frames; | |||
| s->rc_strategy= avctx->rc_strategy; | |||
| s->b_frame_strategy= avctx->b_frame_strategy; | |||
| s->codec_id= avctx->codec->id; | |||
| if (s->gop_size <= 1) { | |||
| s->intra_only = 1; | |||
| s->gop_size = 12; | |||
| @@ -523,8 +533,21 @@ int MPV_encode_init(AVCodecContext *avctx) | |||
| /* init default q matrix */ | |||
| for(i=0;i<64;i++) { | |||
| s->intra_matrix[i] = default_intra_matrix[i]; | |||
| s->non_intra_matrix[i] = default_non_intra_matrix[i]; | |||
| if(s->out_format == FMT_H263) | |||
| s->intra_matrix[i] = default_non_intra_matrix[i]; | |||
| else | |||
| s->intra_matrix[i] = default_intra_matrix[i]; | |||
| s->inter_matrix[i] = default_non_intra_matrix[i]; | |||
| } | |||
| /* precompute matrix */ | |||
| /* for mjpeg, we do include qscale in the matrix */ | |||
| if (s->out_format != FMT_MJPEG) { | |||
| convert_matrix(s->q_intra_matrix, s->q_intra_matrix16, s->q_intra_matrix16_bias, | |||
| s->intra_matrix, s->intra_quant_bias); | |||
| convert_matrix(s->q_inter_matrix, s->q_inter_matrix16, s->q_inter_matrix16_bias, | |||
| s->inter_matrix, s->inter_quant_bias); | |||
| } | |||
| if(ff_rate_control_init(s) < 0) | |||
| @@ -1307,6 +1330,21 @@ void MPV_decode_mb(MpegEncContext *s, DCTELEM block[6][64]) | |||
| emms_c(); //FIXME remove | |||
| } | |||
| static inline void clip_coeffs(MpegEncContext *s, DCTELEM *block, int last_index) | |||
| { | |||
| int i; | |||
| const int maxlevel= s->max_qcoeff; | |||
| const int minlevel= s->min_qcoeff; | |||
| for(i=0; i<=last_index; i++){ | |||
| const int j = zigzag_direct[i]; | |||
| int level = block[j]; | |||
| if (level>maxlevel) level=maxlevel; | |||
| else if(level<minlevel) level=minlevel; | |||
| block[j]= level; | |||
| } | |||
| } | |||
| static void encode_mb(MpegEncContext *s, int motion_x, int motion_y) | |||
| { | |||
| @@ -1407,8 +1445,19 @@ static void encode_mb(MpegEncContext *s, int motion_x, int motion_y) | |||
| s->y_dc_scale = 8; | |||
| s->c_dc_scale = 8; | |||
| } | |||
| for(i=0;i<6;i++) { | |||
| s->block_last_index[i] = dct_quantize(s, s->block[i], i, s->qscale); | |||
| if(s->out_format==FMT_MJPEG){ | |||
| for(i=0;i<6;i++) { | |||
| int overflow; | |||
| s->block_last_index[i] = dct_quantize(s, s->block[i], i, 8, &overflow); | |||
| if(overflow) clip_coeffs(s, s->block[i], s->block_last_index[i]); | |||
| } | |||
| }else{ | |||
| for(i=0;i<6;i++) { | |||
| int overflow; | |||
| s->block_last_index[i] = dct_quantize(s, s->block[i], i, s->qscale, &overflow); | |||
| // FIXME we could decide to change to quantizer instead of clipping | |||
| if(overflow) clip_coeffs(s, s->block[i], s->block_last_index[i]); | |||
| } | |||
| } | |||
| /* huffman encode */ | |||
| @@ -1596,17 +1645,13 @@ static void encode_picture(MpegEncContext *s, int picture_number) | |||
| else if (!s->fixed_qscale) | |||
| s->qscale = ff_rate_estimate_qscale(s); | |||
| /* precompute matrix */ | |||
| if (s->out_format == FMT_MJPEG) { | |||
| /* for mjpeg, we do include qscale in the matrix */ | |||
| s->intra_matrix[0] = default_intra_matrix[0]; | |||
| for(i=1;i<64;i++) | |||
| s->intra_matrix[i] = (default_intra_matrix[i] * s->qscale) >> 3; | |||
| convert_matrix(s->q_intra_matrix, s->q_intra_matrix16, s->intra_matrix, 8); | |||
| } else { | |||
| convert_matrix(s->q_intra_matrix, s->q_intra_matrix16, s->intra_matrix, s->qscale); | |||
| convert_matrix(s->q_non_intra_matrix, s->q_non_intra_matrix16, s->non_intra_matrix, s->qscale); | |||
| convert_matrix(s->q_intra_matrix, s->q_intra_matrix16, | |||
| s->q_intra_matrix16_bias, s->intra_matrix, s->intra_quant_bias); | |||
| } | |||
| s->last_bits= get_bit_count(&s->pb); | |||
| @@ -1957,29 +2002,13 @@ static void encode_picture(MpegEncContext *s, int picture_number) | |||
| static int dct_quantize_c(MpegEncContext *s, | |||
| DCTELEM *block, int n, | |||
| int qscale) | |||
| int qscale, int *overflow) | |||
| { | |||
| int i, j, level, last_non_zero, q; | |||
| const int *qmat; | |||
| int minLevel, maxLevel; | |||
| if(s->avctx!=NULL && s->avctx->codec->id==CODEC_ID_MPEG4){ | |||
| /* mpeg4 */ | |||
| minLevel= -2048; | |||
| maxLevel= 2047; | |||
| }else if(s->out_format==FMT_MPEG1){ | |||
| /* mpeg1 */ | |||
| minLevel= -255; | |||
| maxLevel= 255; | |||
| }else if(s->out_format==FMT_MJPEG){ | |||
| /* (m)jpeg */ | |||
| minLevel= -1023; | |||
| maxLevel= 1023; | |||
| }else{ | |||
| /* h263 / msmpeg4 */ | |||
| minLevel= -128; | |||
| maxLevel= 127; | |||
| } | |||
| int bias; | |||
| int max=0; | |||
| unsigned int threshold1, threshold2; | |||
| av_fdct (block); | |||
| @@ -1998,71 +2027,40 @@ static int dct_quantize_c(MpegEncContext *s, | |||
| block[0] = (block[0] + (q >> 1)) / q; | |||
| i = 1; | |||
| last_non_zero = 0; | |||
| if (s->out_format == FMT_H263) { | |||
| qmat = s->q_non_intra_matrix; | |||
| } else { | |||
| qmat = s->q_intra_matrix; | |||
| } | |||
| qmat = s->q_intra_matrix[qscale]; | |||
| bias= s->intra_quant_bias<<(QMAT_SHIFT - 3 - QUANT_BIAS_SHIFT); | |||
| } else { | |||
| i = 0; | |||
| last_non_zero = -1; | |||
| qmat = s->q_non_intra_matrix; | |||
| qmat = s->q_inter_matrix[qscale]; | |||
| bias= s->inter_quant_bias<<(QMAT_SHIFT - 3 - QUANT_BIAS_SHIFT); | |||
| } | |||
| threshold1= (1<<(QMAT_SHIFT - 3)) - bias - 1; | |||
| threshold2= threshold1<<1; | |||
| for(;i<64;i++) { | |||
| j = zigzag_direct[i]; | |||
| level = block[j]; | |||
| level = level * qmat[j]; | |||
| #ifdef PARANOID | |||
| { | |||
| static int count = 0; | |||
| int level1, level2, qmat1; | |||
| double val; | |||
| if (qmat == s->q_non_intra_matrix) { | |||
| qmat1 = default_non_intra_matrix[j] * s->qscale; | |||
| } else { | |||
| qmat1 = default_intra_matrix[j] * s->qscale; | |||
| } | |||
| if (av_fdct != jpeg_fdct_ifast) | |||
| val = ((double)block[j] * 8.0) / (double)qmat1; | |||
| else | |||
| val = ((double)block[j] * 8.0 * 2048.0) / | |||
| ((double)qmat1 * aanscales[j]); | |||
| level1 = (int)val; | |||
| level2 = level / (1 << (QMAT_SHIFT - 3)); | |||
| if (level1 != level2) { | |||
| fprintf(stderr, "%d: quant error qlevel=%d wanted=%d level=%d qmat1=%d qmat=%d wantedf=%0.6f\n", | |||
| count, level2, level1, block[j], qmat1, qmat[j], | |||
| val); | |||
| count++; | |||
| } | |||
| } | |||
| #endif | |||
| /* XXX: slight error for the low range. Test should be equivalent to | |||
| (level <= -(1 << (QMAT_SHIFT - 3)) || level >= (1 << | |||
| (QMAT_SHIFT - 3))) | |||
| */ | |||
| if (((level << (31 - (QMAT_SHIFT - 3))) >> (31 - (QMAT_SHIFT - 3))) != | |||
| level) { | |||
| level = level / (1 << (QMAT_SHIFT - 3)); | |||
| /* XXX: currently, this code is not optimal. the range should be: | |||
| mpeg1: -255..255 | |||
| mpeg2: -2048..2047 | |||
| h263: -128..127 | |||
| mpeg4: -2048..2047 | |||
| */ | |||
| if (level > maxLevel) | |||
| level = maxLevel; | |||
| else if (level < minLevel) | |||
| level = minLevel; | |||
| block[j] = level; | |||
| // if( bias+level >= (1<<(QMAT_SHIFT - 3)) | |||
| // || bias-level >= (1<<(QMAT_SHIFT - 3))){ | |||
| if(((unsigned)(level+threshold1))>threshold2){ | |||
| if(level>0){ | |||
| level= (bias + level)>>(QMAT_SHIFT - 3); | |||
| block[j]= level; | |||
| }else{ | |||
| level= (bias - level)>>(QMAT_SHIFT - 3); | |||
| block[j]= -level; | |||
| } | |||
| max |=level; | |||
| last_non_zero = i; | |||
| } else { | |||
| block[j] = 0; | |||
| }else{ | |||
| block[j]=0; | |||
| } | |||
| } | |||
| *overflow= s->max_qcoeff < max; //overflow might have happend | |||
| return last_non_zero; | |||
| } | |||
| @@ -2104,7 +2102,7 @@ static void dct_unquantize_mpeg1_c(MpegEncContext *s, | |||
| } | |||
| } else { | |||
| i = 0; | |||
| quant_matrix = s->non_intra_matrix; | |||
| quant_matrix = s->inter_matrix; | |||
| for(;i<nCoeffs;i++) { | |||
| int j= zigzag_direct[i]; | |||
| level = block[j]; | |||
| @@ -2166,7 +2164,7 @@ static void dct_unquantize_mpeg2_c(MpegEncContext *s, | |||
| } else { | |||
| int sum=-1; | |||
| i = 0; | |||
| quant_matrix = s->non_intra_matrix; | |||
| quant_matrix = s->inter_matrix; | |||
| for(;i<nCoeffs;i++) { | |||
| int j= zigzag_direct[i]; | |||
| level = block[j]; | |||
| @@ -83,11 +83,15 @@ typedef struct MpegEncContext { | |||
| int bit_rate; /* wanted bit rate */ | |||
| int bit_rate_tolerance; /* amount of +- bits (>0)*/ | |||
| enum OutputFormat out_format; /* output format */ | |||
| int h263_pred; /* use mpeg4/h263 ac/dc predictions */ | |||
| /* the following codec id fields are deprecated in favor of codec_id */ | |||
| int h263_plus; /* h263 plus headers */ | |||
| int h263_rv10; /* use RV10 variation for H263 */ | |||
| int h263_pred; /* use mpeg4/h263 ac/dc predictions */ | |||
| int h263_msmpeg4; /* generate MSMPEG4 compatible stream */ | |||
| int h263_msmpeg4; /* generate MSMPEG4 compatible stream (deprecated, use msmpeg4_version instead)*/ | |||
| int h263_intel; /* use I263 intel h263 header */ | |||
| int codec_id; /* see CODEC_ID_xxx */ | |||
| int fixed_qscale; /* fixed qscale if non zero */ | |||
| float qcompress; /* amount of qscale change between easy & hard scenes (0.0-1.0) */ | |||
| float qblur; /* amount of qscale smoothing over time (0.0-1.0) */ | |||
| @@ -213,14 +217,21 @@ typedef struct MpegEncContext { | |||
| /* matrix transmitted in the bitstream */ | |||
| UINT16 intra_matrix[64]; | |||
| UINT16 chroma_intra_matrix[64]; | |||
| UINT16 non_intra_matrix[64]; | |||
| UINT16 chroma_non_intra_matrix[64]; | |||
| UINT16 inter_matrix[64]; | |||
| UINT16 chroma_inter_matrix[64]; | |||
| #define QUANT_BIAS_SHIFT 4 | |||
| int intra_quant_bias; /* bias for the quantizer */ | |||
| int inter_quant_bias; /* bias for the quantizer */ | |||
| int min_qcoeff; /* minimum encodable coefficient */ | |||
| int max_qcoeff; /* maximum encodable coefficient */ | |||
| /* precomputed matrix (combine qscale and DCT renorm) */ | |||
| int q_intra_matrix[64]; | |||
| int q_non_intra_matrix[64]; | |||
| int q_intra_matrix[32][64]; | |||
| int q_inter_matrix[32][64]; | |||
| /* identical to the above but for MMX & these are not permutated */ | |||
| UINT16 __align8 q_intra_matrix16[64]; | |||
| UINT16 __align8 q_non_intra_matrix16[64]; | |||
| UINT16 __align8 q_intra_matrix16[32][64]; | |||
| UINT16 __align8 q_inter_matrix16[32][64]; | |||
| UINT16 __align8 q_intra_matrix16_bias[32][64]; | |||
| UINT16 __align8 q_inter_matrix16_bias[32][64]; | |||
| int block_last_index[6]; /* last non zero coefficient in block */ | |||
| void *opaque; /* private data for the user */ | |||
| @@ -328,7 +339,7 @@ typedef struct MpegEncContext { | |||
| int first_slice_line; /* used in mpeg4 too to handle resync markers */ | |||
| int flipflop_rounding; | |||
| int bitrate; | |||
| int msmpeg4_version; /* 1=mp41, 2=mp42, 3=mp43/divx3 */ | |||
| int msmpeg4_version; /* 0=not msmpeg4, 1=mp41, 2=mp42, 3=mp43/divx3 */ | |||
| /* decompression specific */ | |||
| GetBitContext gb; | |||
| @@ -386,6 +397,8 @@ void MPV_frame_end(MpegEncContext *s); | |||
| #ifdef HAVE_MMX | |||
| void MPV_common_init_mmx(MpegEncContext *s); | |||
| #endif | |||
| int (*dct_quantize)(MpegEncContext *s, DCTELEM *block, int n, int qscale, int *overflow); | |||
| void (*draw_edges)(UINT8 *buf, int wrap, int width, int height, int w); | |||
| /* motion_est.c */ | |||
| void ff_estimate_p_frame_motion(MpegEncContext * s, | |||
| @@ -340,6 +340,7 @@ static int rv10_decode_init(AVCodecContext *avctx) | |||
| int i; | |||
| static int done; | |||
| // s->avctx= avctx; | |||
| s->out_format = FMT_H263; | |||
| s->width = avctx->width; | |||
| @@ -351,11 +352,6 @@ static int rv10_decode_init(AVCodecContext *avctx) | |||
| if (MPV_common_init(s) < 0) | |||
| return -1; | |||
| /* XXX: suppress this matrix init, only needed because using mpeg1 | |||
| dequantize in mmx case */ | |||
| for(i=0;i<64;i++) | |||
| s->non_intra_matrix[i] = default_non_intra_matrix[i]; | |||
| h263_decode_init_vlc(s); | |||
| /* init rv vlc */ | |||