fixed msmpeg4 infinite loop if buggy stream

rewrote quantizer fixed bias (+10% compression/quality for h263 like codecs) qscale=1 support mpeg1 intra frames looks far less blocky added codec_id field Originally committed as revision 423 to svn://svn.ffmpeg.org/ffmpeg/trunk
24 years ago · d7e9533aa0
--- a/libavcodec/common.h
+++ b/libavcodec/common.h
@@ -157,6 +157,9 @@ inline void dprintf(const char* fmt,...) {}

 #endif /* HAVE_AV_CONFIG_H */

 /* assume b>0 */
 #define ROUNDED_DIV(a,b) (((a)>0 ? (a) + ((b)>>1) : (a) - ((b)>>1))/(b))

 /* bit output */

 struct PutBitContext;
--- a/libavcodec/h263.c
+++ b/libavcodec/h263.c
@@ -904,8 +904,26 @@ void h263_encode_init(MpegEncContext *s)
    s->mv_penalty= mv_penalty; //FIXME exact table for msmpeg4 & h263p
    
    // use fcodes >1 only for mpeg4 & h263 & h263p FIXME
    if(s->h263_plus) s->fcode_tab= umv_fcode_tab;
    else if(s->h263_pred && !s->h263_msmpeg4) s->fcode_tab= fcode_tab;
    switch(s->codec_id){
    case CODEC_ID_MPEG4:
        s->fcode_tab= fcode_tab;
        s->min_qcoeff= -2048;
        s->max_qcoeff=  2047;
        break;
    case CODEC_ID_H263P:
        s->fcode_tab= umv_fcode_tab;
        s->min_qcoeff= -128;
        s->max_qcoeff=  127;
        break;
    default: //nothing needed default table allready set in mpegvideo.c
        s->min_qcoeff= -128;
        s->max_qcoeff=  127;
    }

    /* h263 type bias */
    //FIXME mpeg4 mpeg quantizer    
    s->intra_quant_bias=0;
    s->inter_quant_bias=-(1<<(QUANT_BIAS_SHIFT-2)); //(a - x/4)/x
 }

 static void h263_encode_block(MpegEncContext * s, DCTELEM * block, int n)
@@ -2702,8 +2720,8 @@ int mpeg4_decode_picture_header(MpegEncContext * s)
                    s->chroma_intra_matrix[i]= v;
                    
                    v= ff_mpeg4_default_non_intra_matrix[i];
                    s->non_intra_matrix[i]= v;
                    s->chroma_non_intra_matrix[i]= v;
                    s->inter_matrix[i]= v;
                    s->chroma_inter_matrix[i]= v;
                }

                /* load custom intra matrix */
@@ -2725,15 +2743,15 @@ int mpeg4_decode_picture_header(MpegEncContext * s)
                        if(v==0) break;

                        j= zigzag_direct[i];
                        s->non_intra_matrix[j]= v;
                        s->chroma_non_intra_matrix[j]= v;
                        s->inter_matrix[j]= v;
                        s->chroma_inter_matrix[j]= v;
                    }

                    /* replicate last value */
                    for(; i<64; i++){
                        j= zigzag_direct[i];
                        s->non_intra_matrix[j]= v;
                        s->chroma_non_intra_matrix[j]= v;
                        s->inter_matrix[j]= v;
                        s->chroma_inter_matrix[j]= v;
                    }
                }

--- a/libavcodec/h263dec.c
+++ b/libavcodec/h263dec.c
@@ -73,17 +73,13 @@ static int h263_decode_init(AVCodecContext *avctx)
    default:
        return -1;
    }

    s->codec_id= avctx->codec->id;
    
    /* for h263, we allocate the images after having read the header */
    if (avctx->codec->id != CODEC_ID_H263 && avctx->codec->id != CODEC_ID_MPEG4)
        if (MPV_common_init(s) < 0)
            return -1;

    /* XXX: suppress this matrix init, only needed because using mpeg1
       dequantize in mmx case */
    for(i=0;i<64;i++)
        s->non_intra_matrix[i] = default_non_intra_matrix[i];

    if (s->h263_msmpeg4)
        msmpeg4_decode_init_vlc(s);
    else
@@ -251,7 +247,7 @@ static int h263_decode_frame(AVCodecContext *avctx,
        if(msmpeg4_decode_ext_header(s, buf_size) < 0) return -1;
    
    /* divx 5.01+ bistream reorder stuff */
    if(s->h263_pred && s->bitstream_buffer_size==0){
    if(s->codec_id==CODEC_ID_MPEG4 && s->bitstream_buffer_size==0){
        int current_pos= get_bits_count(&s->gb)/8;
        if(   buf_size - current_pos > 5 
           && buf_size - current_pos < BITSTREAM_BUFFER_SIZE){
--- a/libavcodec/i386/mpegvideo_mmx.c
+++ b/libavcodec/i386/mpegvideo_mmx.c
@@ -26,8 +26,6 @@
 #include "../mangle.h"

 extern UINT8 zigzag_end[64];
 extern void (*draw_edges)(UINT8 *buf, int wrap, int width, int height, int w);
 extern int (*dct_quantize)(MpegEncContext *s, DCTELEM *block, int n, int qscale);

 extern UINT8 zigzag_direct_noperm[64];
 extern UINT16 inv_zigzag_direct16[64];
@@ -260,7 +258,7 @@ asm volatile(
        block[0]= block0;

        } else {
        quant_matrix = s->non_intra_matrix;
        quant_matrix = s->inter_matrix;
 asm volatile(
 		"pcmpeqw %%mm7, %%mm7		\n\t"
 		"psrlw $15, %%mm7		\n\t"
@@ -382,7 +380,7 @@ asm volatile(
        //Note, we dont do mismatch control for intra as errors cannot accumulate

    } else {
        quant_matrix = s->non_intra_matrix;
        quant_matrix = s->inter_matrix;
 asm volatile(
 		"pcmpeqw %%mm7, %%mm7		\n\t"
                "psrlq $48, %%mm7		\n\t"
--- a/libavcodec/i386/mpegvideo_mmx_template.c
+++ b/libavcodec/i386/mpegvideo_mmx_template.c
@@ -33,149 +33,160 @@

 static int RENAME(dct_quantize)(MpegEncContext *s,
                            DCTELEM *block, int n,
                            int qscale)
                            int qscale, int *overflow)
 {
    int i, level, last_non_zero_p1, q;
    const UINT16 *qmat;
    int level=0, last_non_zero_p1, q; //=0 is cuz gcc says uninitalized ...
    const UINT16 *qmat, *bias;
    static __align8 INT16 temp_block[64];
    int minLevel, maxLevel;
    
    if(s->avctx!=NULL && s->avctx->codec->id==CODEC_ID_MPEG4){
 	/* mpeg4 */
        minLevel= -2048;
 	maxLevel= 2047;
    }else if(s->out_format==FMT_MPEG1){
 	/* mpeg1 */
        minLevel= -255;
 	maxLevel= 255;
    }else if(s->out_format==FMT_MJPEG){
 	/* (m)jpeg */
        minLevel= -1023;
 	maxLevel= 1023;
    }else{
 	/* h263 / msmpeg4 */
        minLevel= -128;
 	maxLevel= 127;
    }

    av_fdct (block);
    

    if (s->mb_intra) {
        int dummy;
        if (n < 4)
            q = s->y_dc_scale;
        else
            q = s->c_dc_scale;
        
        /* note: block[0] is assumed to be positive */
 #if 1
 	asm volatile (
 		"xorl %%edx, %%edx	\n\t"
 		"mul %%ecx		\n\t"
 		: "=d" (temp_block[0]), "=a"(dummy)
 		: "a" (block[0] + (q >> 1)), "c" (inverse[q])
 	);
        asm volatile (
        	"xorl %%edx, %%edx	\n\t"
        	"mul %%ecx		\n\t"
        	: "=d" (level), "=a"(dummy)
        	: "a" (block[0] + (q >> 1)), "c" (inverse[q])
        );
 #else
 	asm volatile (
 		"xorl %%edx, %%edx	\n\t"
 		"divw %%cx		\n\t"
 		"movzwl %%ax, %%eax	\n\t"
 		: "=a" (temp_block[0])
 		: "a" (block[0] + (q >> 1)), "c" (q)
 		: "%edx"
 	);
        asm volatile (
        	"xorl %%edx, %%edx	\n\t"
        	"divw %%cx		\n\t"
        	"movzwl %%ax, %%eax	\n\t"
        	: "=a" (level)
        	: "a" (block[0] + (q >> 1)), "c" (q)
        	: "%edx"
        );
 #endif
        block[0]=0; //avoid fake overflow
 //        temp_block[0] = (block[0] + (q >> 1)) / q;
        i = 1;
        last_non_zero_p1 = 1;
        if (s->out_format == FMT_H263) {
            qmat = s->q_non_intra_matrix16;
        } else {
            qmat = s->q_intra_matrix16;
        }
        for(i=1;i<4;i++) {
            level = block[i] * qmat[i];
            level = level / (1 << (QMAT_SHIFT_MMX - 3));
            /* XXX: currently, this code is not optimal. the range should be:
               mpeg1: -255..255
               mpeg2: -2048..2047
               h263:  -128..127
               mpeg4: -2048..2047
            */
            if (level > maxLevel)
                level = maxLevel;
            else if (level < minLevel)
                level = minLevel;
            temp_block[i] = level;

 	    if(level) 
 	        if(last_non_zero_p1 < inv_zigzag_direct16[i]) last_non_zero_p1= inv_zigzag_direct16[i];
 	    block[i]=0;
        }
        bias = s->q_intra_matrix16_bias[qscale];
        qmat = s->q_intra_matrix16[qscale];
    } else {
        i = 0;
        last_non_zero_p1 = 0;
        qmat = s->q_non_intra_matrix16;
        bias = s->q_inter_matrix16_bias[qscale];
        qmat = s->q_inter_matrix16[qscale];
    }

    asm volatile( /* XXX: small rounding bug, but it shouldnt matter */
 	"movd %3, %%mm3			\n\t"
 	SPREADW(%%mm3)
 	"movd %4, %%mm4			\n\t"
 	SPREADW(%%mm4)
 #ifndef HAVE_MMX2	
 	"movd %5, %%mm5			\n\t"
 	SPREADW(%%mm5)
 #endif
 	"pxor %%mm7, %%mm7		\n\t"
 	"movd %%eax, %%mm2		\n\t"
 	SPREADW(%%mm2)
 	"movl %6, %%eax			\n\t"
 	".balign 16			\n\t"
 	"1:				\n\t"
 	"movq (%1, %%eax), %%mm0	\n\t"
 	"movq (%2, %%eax), %%mm1	\n\t"
 	"movq %%mm0, %%mm6		\n\t"
 	"psraw $15, %%mm6		\n\t"
 	"pmulhw %%mm0, %%mm1		\n\t"
 	"psubsw %%mm6, %%mm1		\n\t"
 #ifdef HAVE_MMX2
 	"pminsw %%mm3, %%mm1		\n\t"
 	"pmaxsw %%mm4, %%mm1		\n\t"
 #else
 	"paddsw %%mm3, %%mm1		\n\t"
 	"psubusw %%mm4, %%mm1		\n\t"
 	"paddsw %%mm5, %%mm1		\n\t"
 #endif
 	"movq %%mm1, (%8, %%eax)	\n\t"
 	"pcmpeqw %%mm7, %%mm1		\n\t"
 	"movq (%7, %%eax), %%mm0	\n\t"
 	"movq %%mm7, (%1, %%eax)	\n\t"
 	"pandn %%mm0, %%mm1		\n\t"
 	PMAXW(%%mm1, %%mm2)
 	"addl $8, %%eax			\n\t"
 	" js 1b				\n\t"
 	"movq %%mm2, %%mm0		\n\t"
 	"psrlq $32, %%mm2		\n\t"
 	PMAXW(%%mm0, %%mm2)
 	"movq %%mm2, %%mm0		\n\t"
 	"psrlq $16, %%mm2		\n\t"
 	PMAXW(%%mm0, %%mm2)
 	"movd %%mm2, %%eax		\n\t"
 	"movzbl %%al, %%eax		\n\t"
 	: "+a" (last_non_zero_p1)
 	: "r" (block+64), "r" (qmat+64), 
 #ifdef HAVE_MMX2
 	  "m" (maxLevel),          "m" (minLevel),                    "m" (minLevel /* dummy */), "g" (2*i - 128),
 #else
 	  "m" (0x7FFF - maxLevel), "m" (0x7FFF -maxLevel + minLevel), "m" (minLevel),             "g" (2*i - 128),
 #endif
 	  "r" (inv_zigzag_direct16+64), "r" (temp_block+64)
    );
    if(s->out_format == FMT_H263){
    
        asm volatile(
            "movd %%eax, %%mm3			\n\t" // last_non_zero_p1
            SPREADW(%%mm3)
            "pxor %%mm7, %%mm7			\n\t" // 0
            "pxor %%mm4, %%mm4			\n\t" // 0
            "movq (%2), %%mm5			\n\t" // qmat[0]
            "pxor %%mm6, %%mm6			\n\t"
            "psubw (%3), %%mm6			\n\t" // -bias[0]
            "movl $-128, %%eax			\n\t"
            ".balign 16				\n\t"
            "1:					\n\t"
            "pxor %%mm1, %%mm1			\n\t" // 0
            "movq (%1, %%eax), %%mm0		\n\t" // block[i]
            "pcmpgtw %%mm0, %%mm1		\n\t" // block[i] <= 0 ? 0xFF : 0x00
            "pxor %%mm1, %%mm0			\n\t" 
            "psubw %%mm1, %%mm0			\n\t" // ABS(block[i])
            "psubusw %%mm6, %%mm0		\n\t" // ABS(block[i]) + bias[0]
            "pmulhw %%mm5, %%mm0		\n\t" // (ABS(block[i])*qmat[0] - bias[0]*qmat[0])>>16
            "por %%mm0, %%mm4			\n\t" 
            "pxor %%mm1, %%mm0			\n\t" 
            "psubw %%mm1, %%mm0			\n\t" // out=((ABS(block[i])*qmat[0] - bias[0]*qmat[0])>>16)*sign(block[i])
            "movq %%mm0, (%5, %%eax)		\n\t"
            "pcmpeqw %%mm7, %%mm0		\n\t" // out==0 ? 0xFF : 0x00
            "movq (%4, %%eax), %%mm1		\n\t" 
            "movq %%mm7, (%1, %%eax)		\n\t" // 0
            "pandn %%mm1, %%mm0			\n\t"
 	    PMAXW(%%mm0, %%mm3)
            "addl $8, %%eax			\n\t"
            " js 1b				\n\t"
            "movq %%mm3, %%mm0			\n\t"
            "psrlq $32, %%mm3			\n\t"
 	    PMAXW(%%mm0, %%mm3)
            "movq %%mm3, %%mm0			\n\t"
            "psrlq $16, %%mm3			\n\t"
 	    PMAXW(%%mm0, %%mm3)
            "movd %%mm3, %%eax			\n\t"
            "movzbl %%al, %%eax			\n\t" // last_non_zero_p1
 	    : "+a" (last_non_zero_p1)
            : "r" (block+64), "r" (qmat), "r" (bias),
              "r" (inv_zigzag_direct16+64), "r" (temp_block+64)
        );
        // note the asm is split cuz gcc doesnt like that many operands ...
        asm volatile(
            "movd %1, %%mm1			\n\t" // max_qcoeff
 	    SPREADW(%%mm1)
            "psubusw %%mm1, %%mm4		\n\t" 
            "packuswb %%mm4, %%mm4		\n\t"
            "movd %%mm4, %0			\n\t" // *overflow
        : "=g" (*overflow)
        : "g" (s->max_qcoeff)
        );
    }else{ // FMT_H263
        asm volatile(
            "movd %%eax, %%mm3			\n\t" // last_non_zero_p1
            SPREADW(%%mm3)
            "pxor %%mm7, %%mm7			\n\t" // 0
            "pxor %%mm4, %%mm4			\n\t" // 0
            "movl $-128, %%eax			\n\t"
            ".balign 16				\n\t"
            "1:					\n\t"
            "pxor %%mm1, %%mm1			\n\t" // 0
            "movq (%1, %%eax), %%mm0		\n\t" // block[i]
            "pcmpgtw %%mm0, %%mm1		\n\t" // block[i] <= 0 ? 0xFF : 0x00
            "pxor %%mm1, %%mm0			\n\t" 
            "psubw %%mm1, %%mm0			\n\t" // ABS(block[i])
            "movq (%3, %%eax), %%mm6		\n\t" // bias[0]
            "paddusw %%mm6, %%mm0		\n\t" // ABS(block[i]) + bias[0]
            "movq (%2, %%eax), %%mm5		\n\t" // qmat[i]
            "pmulhw %%mm5, %%mm0		\n\t" // (ABS(block[i])*qmat[0] + bias[0]*qmat[0])>>16
            "por %%mm0, %%mm4			\n\t" 
            "pxor %%mm1, %%mm0			\n\t" 
            "psubw %%mm1, %%mm0			\n\t" // out=((ABS(block[i])*qmat[0] - bias[0]*qmat[0])>>16)*sign(block[i])
            "movq %%mm0, (%5, %%eax)		\n\t"
            "pcmpeqw %%mm7, %%mm0		\n\t" // out==0 ? 0xFF : 0x00
            "movq (%4, %%eax), %%mm1		\n\t" 
            "movq %%mm7, (%1, %%eax)		\n\t" // 0
            "pandn %%mm1, %%mm0			\n\t"
 	    PMAXW(%%mm0, %%mm3)
            "addl $8, %%eax			\n\t"
            " js 1b				\n\t"
            "movq %%mm3, %%mm0			\n\t"
            "psrlq $32, %%mm3			\n\t"
 	    PMAXW(%%mm0, %%mm3)
            "movq %%mm3, %%mm0			\n\t"
            "psrlq $16, %%mm3			\n\t"
 	    PMAXW(%%mm0, %%mm3)
            "movd %%mm3, %%eax			\n\t"
            "movzbl %%al, %%eax			\n\t" // last_non_zero_p1
 	    : "+a" (last_non_zero_p1)
            : "r" (block+64), "r" (qmat+64), "r" (bias+64),
              "r" (inv_zigzag_direct16+64), "r" (temp_block+64)
        );
        // note the asm is split cuz gcc doesnt like that many operands ...
        asm volatile(
            "movd %1, %%mm1			\n\t" // max_qcoeff
 	    SPREADW(%%mm1)
            "psubusw %%mm1, %%mm4		\n\t" 
            "packuswb %%mm4, %%mm4		\n\t"
            "movd %%mm4, %0			\n\t" // *overflow
        : "=g" (*overflow)
        : "g" (s->max_qcoeff)
        );
    }

    if(s->mb_intra) temp_block[0]= level; //FIXME move afer permute
 // last_non_zero_p1=64;       
    /* permute for IDCT */
    asm volatile(
 	"movl %0, %%eax			\n\t"
        "movl %0, %%eax			\n\t"
 	"pushl %%ebp			\n\t"
 	"movl %%esp, " MANGLE(esp_temp) "\n\t"
 	"1:				\n\t"
@@ -203,5 +214,6 @@ static int RENAME(dct_quantize)(MpegEncContext *s,
    }
 */
 //block_permute(block);

    return last_non_zero_p1 - 1;
 }
--- a/libavcodec/mjpeg.c
+++ b/libavcodec/mjpeg.c
@@ -160,6 +160,9 @@ int mjpeg_init(MpegEncContext *s)
    m = malloc(sizeof(MJpegContext));
    if (!m)
        return -1;
    
    s->min_qcoeff=-1023;
    s->max_qcoeff= 1023;

    /* build all the huffman tables */
    build_huffman_codes(m->huff_size_dc_luminance,
--- a/libavcodec/mpeg12.c
+++ b/libavcodec/mpeg12.c
@@ -399,8 +399,11 @@ void mpeg1_encode_init(MpegEncContext *s)
        }
    }
    s->mv_penalty= mv_penalty;
    
    s->fcode_tab= fcode_tab;
    s->min_qcoeff=-255;
    s->max_qcoeff= 255;
    s->intra_quant_bias= 3<<(QUANT_BIAS_SHIFT-3); //(a + x*3/8)/x
    s->inter_quant_bias= 0;
 }
 
 static inline void encode_dc(MpegEncContext *s, int diff, int component)
@@ -1027,9 +1030,9 @@ static int mpeg2_decode_block_non_intra(MpegEncContext *s,
        UINT8 *buf_ptr;
        i = 0;
        if (n < 4) 
            matrix = s->non_intra_matrix;
            matrix = s->inter_matrix;
        else
            matrix = s->chroma_non_intra_matrix;
            matrix = s->chroma_inter_matrix;
            
        /* special case for the first coef. no need to add a second vlc table */
        SAVE_BITS(&s->gb);
@@ -1183,6 +1186,7 @@ static int mpeg_decode_init(AVCodecContext *avctx)
    s->buf_ptr = s->buffer;
    s->mpeg_enc_ctx.picture_number = 0;
    s->repeat_field = 0;
    s->mpeg_enc_ctx.codec_id= avctx->codec->id;
    return 0;
 }

@@ -1292,8 +1296,8 @@ static void mpeg_decode_quant_matrix_extension(MpegEncContext *s)
        for(i=0;i<64;i++) {
            v = get_bits(&s->gb, 8);
            j = zigzag_direct[i];
            s->non_intra_matrix[j] = v;
            s->chroma_non_intra_matrix[j] = v;
            s->inter_matrix[j] = v;
            s->chroma_inter_matrix[j] = v;
        }
    }
    if (get_bits1(&s->gb)) {
@@ -1307,7 +1311,7 @@ static void mpeg_decode_quant_matrix_extension(MpegEncContext *s)
        for(i=0;i<64;i++) {
            v = get_bits(&s->gb, 8);
            j = zigzag_direct[i];
            s->chroma_non_intra_matrix[j] = v;
            s->chroma_inter_matrix[j] = v;
        }
    }
 }
@@ -1386,7 +1390,6 @@ static int mpeg_decode_slice(AVCodecContext *avctx,
    s->mb_x = -1;
    s->mb_y = start_code;
    s->mb_incr = 0;

    /* start frame decoding */
    if (s->first_slice) {
        s->first_slice = 0;
@@ -1526,20 +1529,20 @@ static int mpeg1_decode_sequence(AVCodecContext *avctx,
        for(i=0;i<64;i++) {
            v = get_bits(&s->gb, 8);
            j = zigzag_direct[i];
            s->non_intra_matrix[j] = v;
            s->chroma_non_intra_matrix[j] = v;
            s->inter_matrix[j] = v;
            s->chroma_inter_matrix[j] = v;
        }
 #ifdef DEBUG
        dprintf("non intra matrix present\n");
        for(i=0;i<64;i++)
            dprintf(" %d", s->non_intra_matrix[zigzag_direct[i]]);
            dprintf(" %d", s->inter_matrix[zigzag_direct[i]]);
        printf("\n");
 #endif
    } else {
        for(i=0;i<64;i++) {
            v = default_non_intra_matrix[i];
            s->non_intra_matrix[i] = v;
            s->chroma_non_intra_matrix[i] = v;
            s->inter_matrix[i] = v;
            s->chroma_inter_matrix[i] = v;
        }
    }

@@ -1566,7 +1569,7 @@ static int mpeg_decode_frame(AVCodecContext *avctx,
    dprintf("fill_buffer\n");

    *data_size = 0;
    

    /* special case for last picture */
    if (buf_size == 0) {
        if (s2->picture_number > 0) {
@@ -1591,7 +1594,7 @@ static int mpeg_decode_frame(AVCodecContext *avctx,
        *data_size = sizeof(AVPicture);
        goto the_end;
    }
        

    while (buf_ptr < buf_end) {
        buf_start = buf_ptr;
        /* find start next code */
--- a/libavcodec/mpegvideo.c
+++ b/libavcodec/mpegvideo.c
@@ -38,9 +38,9 @@ static void dct_unquantize_mpeg2_c(MpegEncContext *s,
 static void dct_unquantize_h263_c(MpegEncContext *s, 
                                  DCTELEM *block, int n, int qscale);
 static void draw_edges_c(UINT8 *buf, int wrap, int width, int height, int w);
 static int dct_quantize_c(MpegEncContext *s, DCTELEM *block, int n, int qscale);
 static int dct_quantize_c(MpegEncContext *s, DCTELEM *block, int n, int qscale, int *overflow);

 int (*dct_quantize)(MpegEncContext *s, DCTELEM *block, int n, int qscale)= dct_quantize_c;
 int (*dct_quantize)(MpegEncContext *s, DCTELEM *block, int n, int qscale, int *overflow)= dct_quantize_c;
 void (*draw_edges)(UINT8 *buf, int wrap, int width, int height, int w)= draw_edges_c;

 #define EDGE_WIDTH 16
@@ -78,29 +78,38 @@ extern UINT8 zigzag_end[64];
 /* default motion estimation */
 int motion_estimation_method = ME_EPZS;

 static void convert_matrix(int *qmat, UINT16 *qmat16, const UINT16 *quant_matrix, int qscale)
 static void convert_matrix(int (*qmat)[64], uint16_t (*qmat16)[64], uint16_t (*qmat16_bias)[64],
                           const UINT16 *quant_matrix, int bias)
 {
    int i;

    if (av_fdct == jpeg_fdct_ifast) {
        for(i=0;i<64;i++) {
            /* 16 <= qscale * quant_matrix[i] <= 7905 */
            /* 19952         <= aanscales[i] * qscale * quant_matrix[i]           <= 249205026 */
            /* (1<<36)/19952 >= (1<<36)/(aanscales[i] * qscale * quant_matrix[i]) >= (1<<36)/249205026 */
            /* 3444240       >= (1<<36)/(aanscales[i] * qscale * quant_matrix[i]) >= 275 */
            
            qmat[block_permute_op(i)] = (int)((UINT64_C(1) << (QMAT_SHIFT + 11)) / 
                            (aanscales[i] * qscale * quant_matrix[block_permute_op(i)]));
        }
    } else {
        for(i=0;i<64;i++) {
            /* We can safely suppose that 16 <= quant_matrix[i] <= 255
               So 16           <= qscale * quant_matrix[i]             <= 7905
               so (1<<19) / 16 >= (1<<19) / (qscale * quant_matrix[i]) >= (1<<19) / 7905
               so 32768        >= (1<<19) / (qscale * quant_matrix[i]) >= 67
            */
            qmat[i]   = (1 << QMAT_SHIFT_MMX) / (qscale * quant_matrix[i]);
            qmat16[i] = (1 << QMAT_SHIFT_MMX) / (qscale * quant_matrix[block_permute_op(i)]);
    int qscale;

    for(qscale=1; qscale<32; qscale++){
        int i;
        if (av_fdct == jpeg_fdct_ifast) {
            for(i=0;i<64;i++) {
                const int j= block_permute_op(i);
                /* 16 <= qscale * quant_matrix[i] <= 7905 */
                /* 19952         <= aanscales[i] * qscale * quant_matrix[i]           <= 249205026 */
                /* (1<<36)/19952 >= (1<<36)/(aanscales[i] * qscale * quant_matrix[i]) >= (1<<36)/249205026 */
                /* 3444240       >= (1<<36)/(aanscales[i] * qscale * quant_matrix[i]) >= 275 */
                
                qmat[qscale][j] = (int)((UINT64_C(1) << (QMAT_SHIFT + 11)) / 
                                (aanscales[i] * qscale * quant_matrix[j]));
            }
        } else {
            for(i=0;i<64;i++) {
                /* We can safely suppose that 16 <= quant_matrix[i] <= 255
                   So 16           <= qscale * quant_matrix[i]             <= 7905
                   so (1<<19) / 16 >= (1<<19) / (qscale * quant_matrix[i]) >= (1<<19) / 7905
                   so 32768        >= (1<<19) / (qscale * quant_matrix[i]) >= 67
                */
                qmat  [qscale][i] = (1 << QMAT_SHIFT_MMX) / (qscale * quant_matrix[i]);
                qmat16[qscale][i] = (1 << QMAT_SHIFT_MMX) / (qscale * quant_matrix[block_permute_op(i)]);

                if(qmat16[qscale][i]==0 || qmat16[qscale][i]==128*256) qmat16[qscale][i]=128*256-1;

                qmat16_bias[qscale][i]= ROUNDED_DIV(bias<<(16-QUANT_BIAS_SHIFT), qmat16[qscale][i]);
            }
        }
    }
 }
@@ -388,7 +397,8 @@ int MPV_encode_init(AVCodecContext *avctx)
    s->max_b_frames= avctx->max_b_frames;
    s->rc_strategy= avctx->rc_strategy;
    s->b_frame_strategy= avctx->b_frame_strategy;
    
    s->codec_id= avctx->codec->id;

    if (s->gop_size <= 1) {
        s->intra_only = 1;
        s->gop_size = 12;
@@ -523,8 +533,21 @@ int MPV_encode_init(AVCodecContext *avctx)
    
    /* init default q matrix */
    for(i=0;i<64;i++) {
        s->intra_matrix[i] = default_intra_matrix[i];
        s->non_intra_matrix[i] = default_non_intra_matrix[i];
        if(s->out_format == FMT_H263)
            s->intra_matrix[i] = default_non_intra_matrix[i];
        else
            s->intra_matrix[i] = default_intra_matrix[i];

        s->inter_matrix[i] = default_non_intra_matrix[i];
    }

    /* precompute matrix */
        /* for mjpeg, we do include qscale in the matrix */
    if (s->out_format != FMT_MJPEG) {
        convert_matrix(s->q_intra_matrix, s->q_intra_matrix16, s->q_intra_matrix16_bias, 
                       s->intra_matrix, s->intra_quant_bias);
        convert_matrix(s->q_inter_matrix, s->q_inter_matrix16, s->q_inter_matrix16_bias, 
                       s->inter_matrix, s->inter_quant_bias);
    }

    if(ff_rate_control_init(s) < 0)
@@ -1307,6 +1330,21 @@ void MPV_decode_mb(MpegEncContext *s, DCTELEM block[6][64])
    emms_c(); //FIXME remove
 }

 static inline void clip_coeffs(MpegEncContext *s, DCTELEM *block, int last_index)
 {
    int i;
    const int maxlevel= s->max_qcoeff;
    const int minlevel= s->min_qcoeff;

    for(i=0; i<=last_index; i++){
        const int j = zigzag_direct[i];
        int level = block[j];
       
        if     (level>maxlevel) level=maxlevel;
        else if(level<minlevel) level=minlevel;
        block[j]= level;
    }
 }

 static void encode_mb(MpegEncContext *s, int motion_x, int motion_y)
 {
@@ -1407,8 +1445,19 @@ static void encode_mb(MpegEncContext *s, int motion_x, int motion_y)
        s->y_dc_scale = 8;
        s->c_dc_scale = 8;
    }
    for(i=0;i<6;i++) {
        s->block_last_index[i] = dct_quantize(s, s->block[i], i, s->qscale);
    if(s->out_format==FMT_MJPEG){
        for(i=0;i<6;i++) {
            int overflow;
            s->block_last_index[i] = dct_quantize(s, s->block[i], i, 8, &overflow);
            if(overflow) clip_coeffs(s, s->block[i], s->block_last_index[i]);
        }
    }else{
        for(i=0;i<6;i++) {
            int overflow;
            s->block_last_index[i] = dct_quantize(s, s->block[i], i, s->qscale, &overflow);
            // FIXME we could decide to change to quantizer instead of clipping
            if(overflow) clip_coeffs(s, s->block[i], s->block_last_index[i]);
        }
    }

    /* huffman encode */
@@ -1596,17 +1645,13 @@ static void encode_picture(MpegEncContext *s, int picture_number)
    else if (!s->fixed_qscale) 
        s->qscale = ff_rate_estimate_qscale(s);


    /* precompute matrix */
    if (s->out_format == FMT_MJPEG) {
        /* for mjpeg, we do include qscale in the matrix */
        s->intra_matrix[0] = default_intra_matrix[0];
        for(i=1;i<64;i++)
            s->intra_matrix[i] = (default_intra_matrix[i] * s->qscale) >> 3;
        convert_matrix(s->q_intra_matrix, s->q_intra_matrix16, s->intra_matrix, 8);
    } else {
        convert_matrix(s->q_intra_matrix, s->q_intra_matrix16, s->intra_matrix, s->qscale);
        convert_matrix(s->q_non_intra_matrix, s->q_non_intra_matrix16, s->non_intra_matrix, s->qscale);
        convert_matrix(s->q_intra_matrix, s->q_intra_matrix16, 
                       s->q_intra_matrix16_bias, s->intra_matrix, s->intra_quant_bias);
    }

    s->last_bits= get_bit_count(&s->pb);
@@ -1957,29 +2002,13 @@ static void encode_picture(MpegEncContext *s, int picture_number)

 static int dct_quantize_c(MpegEncContext *s, 
                        DCTELEM *block, int n,
                        int qscale)
                        int qscale, int *overflow)
 {
    int i, j, level, last_non_zero, q;
    const int *qmat;
    int minLevel, maxLevel;

    if(s->avctx!=NULL && s->avctx->codec->id==CODEC_ID_MPEG4){
 	/* mpeg4 */
        minLevel= -2048;
 	maxLevel= 2047;
    }else if(s->out_format==FMT_MPEG1){
 	/* mpeg1 */
        minLevel= -255;
 	maxLevel= 255;
    }else if(s->out_format==FMT_MJPEG){
 	/* (m)jpeg */
        minLevel= -1023;
 	maxLevel= 1023;
    }else{
 	/* h263 / msmpeg4 */
        minLevel= -128;
 	maxLevel= 127;
    }
    int bias;
    int max=0;
    unsigned int threshold1, threshold2;

    av_fdct (block);

@@ -1998,71 +2027,40 @@ static int dct_quantize_c(MpegEncContext *s,
        block[0] = (block[0] + (q >> 1)) / q;
        i = 1;
        last_non_zero = 0;
        if (s->out_format == FMT_H263) {
            qmat = s->q_non_intra_matrix;
        } else {
            qmat = s->q_intra_matrix;
        }
        qmat = s->q_intra_matrix[qscale];
        bias= s->intra_quant_bias<<(QMAT_SHIFT - 3 - QUANT_BIAS_SHIFT);
    } else {
        i = 0;
        last_non_zero = -1;
        qmat = s->q_non_intra_matrix;
        qmat = s->q_inter_matrix[qscale];
        bias= s->inter_quant_bias<<(QMAT_SHIFT - 3 - QUANT_BIAS_SHIFT);
    }
    threshold1= (1<<(QMAT_SHIFT - 3)) - bias - 1;
    threshold2= threshold1<<1;

    for(;i<64;i++) {
        j = zigzag_direct[i];
        level = block[j];
        level = level * qmat[j];
 #ifdef PARANOID
        {
            static int count = 0;
            int level1, level2, qmat1;
            double val;
            if (qmat == s->q_non_intra_matrix) {
                qmat1 = default_non_intra_matrix[j] * s->qscale;
            } else {
                qmat1 = default_intra_matrix[j] * s->qscale;
            }
            if (av_fdct != jpeg_fdct_ifast)
                val = ((double)block[j] * 8.0) / (double)qmat1;
            else
                val = ((double)block[j] * 8.0 * 2048.0) / 
                    ((double)qmat1 * aanscales[j]);
            level1 = (int)val;
            level2 = level / (1 << (QMAT_SHIFT - 3));
            if (level1 != level2) {
                fprintf(stderr, "%d: quant error qlevel=%d wanted=%d level=%d qmat1=%d qmat=%d wantedf=%0.6f\n", 
                        count, level2, level1, block[j], qmat1, qmat[j],
                        val);
                count++;
            }

        }
 #endif
        /* XXX: slight error for the low range. Test should be equivalent to
           (level <= -(1 << (QMAT_SHIFT - 3)) || level >= (1 <<
           (QMAT_SHIFT - 3)))
        */
        if (((level << (31 - (QMAT_SHIFT - 3))) >> (31 - (QMAT_SHIFT - 3))) != 
            level) {
            level = level / (1 << (QMAT_SHIFT - 3));
            /* XXX: currently, this code is not optimal. the range should be:
               mpeg1: -255..255
               mpeg2: -2048..2047
               h263:  -128..127
               mpeg4: -2048..2047
            */
            if (level > maxLevel)
                level = maxLevel;
            else if (level < minLevel)
                level = minLevel;

            block[j] = level;
 //        if(   bias+level >= (1<<(QMAT_SHIFT - 3))
 //           || bias-level >= (1<<(QMAT_SHIFT - 3))){
        if(((unsigned)(level+threshold1))>threshold2){
            if(level>0){
                level= (bias + level)>>(QMAT_SHIFT - 3);
                block[j]= level;
            }else{
                level= (bias - level)>>(QMAT_SHIFT - 3);
                block[j]= -level;
            }
            max |=level;
            last_non_zero = i;
        } else {
            block[j] = 0;
        }else{
            block[j]=0;
        }
    }
    *overflow= s->max_qcoeff < max; //overflow might have happend
    
    return last_non_zero;
 }

@@ -2104,7 +2102,7 @@ static void dct_unquantize_mpeg1_c(MpegEncContext *s,
        }
    } else {
        i = 0;
        quant_matrix = s->non_intra_matrix;
        quant_matrix = s->inter_matrix;
        for(;i<nCoeffs;i++) {
            int j= zigzag_direct[i];
            level = block[j];
@@ -2166,7 +2164,7 @@ static void dct_unquantize_mpeg2_c(MpegEncContext *s,
    } else {
        int sum=-1;
        i = 0;
        quant_matrix = s->non_intra_matrix;
        quant_matrix = s->inter_matrix;
        for(;i<nCoeffs;i++) {
            int j= zigzag_direct[i];
            level = block[j];
--- a/libavcodec/mpegvideo.h
+++ b/libavcodec/mpegvideo.h
@@ -83,11 +83,15 @@ typedef struct MpegEncContext {
    int bit_rate;        /* wanted bit rate */
    int bit_rate_tolerance; /* amount of +- bits (>0)*/
    enum OutputFormat out_format; /* output format */
    int h263_pred;    /* use mpeg4/h263 ac/dc predictions */

 /* the following codec id fields are deprecated in favor of codec_id */
    int h263_plus; /* h263 plus headers */
    int h263_rv10; /* use RV10 variation for H263 */
    int h263_pred; /* use mpeg4/h263 ac/dc predictions */
    int h263_msmpeg4; /* generate MSMPEG4 compatible stream */
    int h263_msmpeg4; /* generate MSMPEG4 compatible stream (deprecated, use msmpeg4_version instead)*/
    int h263_intel; /* use I263 intel h263 header */
    
    int codec_id;     /* see CODEC_ID_xxx */
    int fixed_qscale; /* fixed qscale if non zero */
    float qcompress;  /* amount of qscale change between easy & hard scenes (0.0-1.0) */
    float qblur;      /* amount of qscale smoothing over time (0.0-1.0) */
@@ -213,14 +217,21 @@ typedef struct MpegEncContext {
    /* matrix transmitted in the bitstream */
    UINT16 intra_matrix[64];
    UINT16 chroma_intra_matrix[64];
    UINT16 non_intra_matrix[64];
    UINT16 chroma_non_intra_matrix[64];
    UINT16 inter_matrix[64];
    UINT16 chroma_inter_matrix[64];
 #define QUANT_BIAS_SHIFT 4
    int intra_quant_bias;    /* bias for the quantizer */
    int inter_quant_bias;    /* bias for the quantizer */
    int min_qcoeff;          /* minimum encodable coefficient */
    int max_qcoeff;          /* maximum encodable coefficient */
    /* precomputed matrix (combine qscale and DCT renorm) */
    int q_intra_matrix[64];
    int q_non_intra_matrix[64];
    int q_intra_matrix[32][64];
    int q_inter_matrix[32][64];
    /* identical to the above but for MMX & these are not permutated */
    UINT16 __align8 q_intra_matrix16[64];
    UINT16 __align8 q_non_intra_matrix16[64];
    UINT16 __align8 q_intra_matrix16[32][64];
    UINT16 __align8 q_inter_matrix16[32][64];
    UINT16 __align8 q_intra_matrix16_bias[32][64];
    UINT16 __align8 q_inter_matrix16_bias[32][64];
    int block_last_index[6];  /* last non zero coefficient in block */

    void *opaque; /* private data for the user */
@@ -328,7 +339,7 @@ typedef struct MpegEncContext {
    int first_slice_line;  /* used in mpeg4 too to handle resync markers */
    int flipflop_rounding;
    int bitrate;
    int msmpeg4_version;   /* 1=mp41, 2=mp42, 3=mp43/divx3 */
    int msmpeg4_version;   /* 0=not msmpeg4, 1=mp41, 2=mp42, 3=mp43/divx3 */
    /* decompression specific */
    GetBitContext gb;

@@ -386,6 +397,8 @@ void MPV_frame_end(MpegEncContext *s);
 #ifdef HAVE_MMX
 void MPV_common_init_mmx(MpegEncContext *s);
 #endif
 int (*dct_quantize)(MpegEncContext *s, DCTELEM *block, int n, int qscale, int *overflow);
 void (*draw_edges)(UINT8 *buf, int wrap, int width, int height, int w);

 /* motion_est.c */
 void ff_estimate_p_frame_motion(MpegEncContext * s,
--- a/libavcodec/rv10.c
+++ b/libavcodec/rv10.c
@@ -340,6 +340,7 @@ static int rv10_decode_init(AVCodecContext *avctx)
    int i;
    static int done;

 //    s->avctx= avctx;
    s->out_format = FMT_H263;

    s->width = avctx->width;
@@ -351,11 +352,6 @@ static int rv10_decode_init(AVCodecContext *avctx)
    if (MPV_common_init(s) < 0)
        return -1;

    /* XXX: suppress this matrix init, only needed because using mpeg1
       dequantize in mmx case */
    for(i=0;i<64;i++)
        s->non_intra_matrix[i] = default_non_intra_matrix[i];

    h263_decode_init_vlc(s);

    /* init rv vlc */