(commit by michael)

mmx & mmx2 quantizer c dct permutation bugfix dont copy input on intra only encodings if it can be avoided dont draw edges on intra only stuff Originally committed as revision 281 to svn://svn.ffmpeg.org/ffmpeg/trunk
24 years ago · 2f349de286
--- a/libavcodec/dsputil.c
+++ b/libavcodec/dsputil.c
@@ -49,6 +49,12 @@ UINT8 zigzag_direct[64] = {
    53, 60, 61, 54, 47, 55, 62, 63
 };

 /* not permutated inverse zigzag_direct + 1 for MMX quantizer */
 UINT16 __align8 inv_zigzag_direct16[64];

 /* not permutated zigzag_direct for MMX quantizer */
 UINT8 zigzag_direct_noperm[64];

 UINT8 ff_alternate_horizontal_scan[64] = {
    0,  1,  2,  3,  8,  9, 16, 17, 
    10, 11,  4,  5,  6,  7, 15, 14,
@@ -83,6 +89,42 @@ static UINT8 simple_mmx_permutation[64]={
 	0x32, 0x3A, 0x36, 0x3B, 0x33, 0x3E, 0x37, 0x3F,
 };

 /* a*inverse[b]>>32 == a/b for all 0<=a<=65536 && 2<=b<=255 */
 UINT32 inverse[256]={
         0, 4294967295U,2147483648U,1431655766, 1073741824,  858993460,  715827883,  613566757, 
 536870912,  477218589,  429496730,  390451573,  357913942,  330382100,  306783379,  286331154, 
 268435456,  252645136,  238609295,  226050911,  214748365,  204522253,  195225787,  186737709, 
 178956971,  171798692,  165191050,  159072863,  153391690,  148102321,  143165577,  138547333, 
 134217728,  130150525,  126322568,  122713352,  119304648,  116080198,  113025456,  110127367, 
 107374183,  104755300,  102261127,   99882961,   97612894,   95443718,   93368855,   91382283, 
  89478486,   87652394,   85899346,   84215046,   82595525,   81037119,   79536432,   78090315, 
  76695845,   75350304,   74051161,   72796056,   71582789,   70409300,   69273667,   68174085, 
  67108864,   66076420,   65075263,   64103990,   63161284,   62245903,   61356676,   60492498, 
  59652324,   58835169,   58040099,   57266231,   56512728,   55778797,   55063684,   54366675, 
  53687092,   53024288,   52377650,   51746594,   51130564,   50529028,   49941481,   49367441, 
  48806447,   48258060,   47721859,   47197443,   46684428,   46182445,   45691142,   45210183, 
  44739243,   44278014,   43826197,   43383509,   42949673,   42524429,   42107523,   41698712, 
  41297763,   40904451,   40518560,   40139882,   39768216,   39403370,   39045158,   38693400, 
  38347923,   38008561,   37675152,   37347542,   37025581,   36709123,   36398028,   36092163, 
  35791395,   35495598,   35204650,   34918434,   34636834,   34359739,   34087043,   33818641, 
  33554432,   33294321,   33038210,   32786010,   32537632,   32292988,   32051995,   31814573, 
  31580642,   31350127,   31122952,   30899046,   30678338,   30460761,   30246249,   30034737, 
  29826162,   29620465,   29417585,   29217465,   29020050,   28825284,   28633116,   28443493, 
  28256364,   28071682,   27889399,   27709467,   27531842,   27356480,   27183338,   27012373, 
  26843546,   26676816,   26512144,   26349493,   26188825,   26030105,   25873297,   25718368, 
  25565282,   25414008,   25264514,   25116768,   24970741,   24826401,   24683721,   24542671, 
  24403224,   24265352,   24129030,   23994231,   23860930,   23729102,   23598722,   23469767, 
  23342214,   23216040,   23091223,   22967740,   22845571,   22724695,   22605092,   22486740, 
  22369622,   22253717,   22139007,   22025474,   21913099,   21801865,   21691755,   21582751, 
  21474837,   21367997,   21262215,   21157475,   21053762,   20951060,   20849356,   20748635, 
  20648882,   20550083,   20452226,   20355296,   20259280,   20164166,   20069941,   19976593, 
  19884108,   19792477,   19701685,   19611723,   19522579,   19434242,   19346700,   19259944, 
  19173962,   19088744,   19004281,   18920561,   18837576,   18755316,   18673771,   18592933, 
  18512791,   18433337,   18354562,   18276457,   18199014,   18122225,   18046082,   17970575, 
  17895698,   17821442,   17747799,   17674763,   17602325,   17530479,   17459217,   17388532, 
  17318417,   17248865,   17179870,   17111424,   17043522,   16976156,   16909321,   16843010,
 };

 /* used to skip zeros at the end */
 UINT8 zigzag_end[64];

@@ -515,6 +557,9 @@ void dsputil_init(void)
    else
        for(i=0; i<64; i++) permutation[i]=i;

    for(i=0; i<64; i++) inv_zigzag_direct16[zigzag_direct[i]]= i+1;
    for(i=0; i<64; i++) zigzag_direct_noperm[i]= zigzag_direct[i];
    
    if (use_permuted_idct) {
        /* permute for IDCT */
        for(i=0;i<64;i++) {
--- a/libavcodec/i386/mpegvideo_mmx.c
+++ b/libavcodec/i386/mpegvideo_mmx.c
@@ -22,9 +22,16 @@

 #include "../dsputil.h"
 #include "../mpegvideo.h"
 #include "../avcodec.h"
 #include "../mangle.h"

 extern UINT8 zigzag_end[64];
 extern void (*draw_edges)(UINT8 *buf, int wrap, int width, int height, int w);
 extern int (*dct_quantize)(MpegEncContext *s, DCTELEM *block, int n, int qscale);

 extern UINT8 zigzag_direct_noperm[64];
 extern UINT16 inv_zigzag_direct16[64];
 extern UINT32 inverse[256];

 #if 0

@@ -252,7 +259,7 @@ static void dct_unquantize_mpeg1_mmx(MpegEncContext *s,
        }
    } else {
        i = 0;
    unquant_even:
 //    unquant_even:
        quant_matrix = s->non_intra_matrix;
 	/* Align on 4 elements boundary */
 	while(i&7)
@@ -411,6 +418,20 @@ static void draw_edges_mmx(UINT8 *buf, int wrap, int width, int height, int w)
    }
 }

 static volatile int esp_temp;

 void unused_var_warning_killer(){
 	esp_temp++;
 }

 #undef HAVE_MMX2
 #define RENAME(a) a ## _MMX
 #include "mpegvideo_mmx_template.c"

 #define HAVE_MMX2
 #undef RENAME
 #define RENAME(a) a ## _MMX2
 #include "mpegvideo_mmx_template.c"

 void MPV_common_init_mmx(MpegEncContext *s)
 {
@@ -421,5 +442,11 @@ void MPV_common_init_mmx(MpegEncContext *s)
        	s->dct_unquantize = dct_unquantize_mpeg1_mmx;
 	
 	draw_edges = draw_edges_mmx;

 	if(mm_flags & MM_MMXEXT){
 	        dct_quantize= dct_quantize_MMX2;
 	}else{
 		dct_quantize= dct_quantize_MMX;
 	}
    }
 }
--- a/libavcodec/i386/mpegvideo_mmx_template.c
+++ b/libavcodec/i386/mpegvideo_mmx_template.c
@@ -0,0 +1,201 @@
 /*
    Copyright (C) 2002 Michael Niedermayer <michaelni@gmx.at>

    This program is free software; you can redistribute it and/or modify
    it under the terms of the GNU General Public License as published by
    the Free Software Foundation; either version 2 of the License, or
    (at your option) any later version.

    This program is distributed in the hope that it will be useful,
    but WITHOUT ANY WARRANTY; without even the implied warranty of
    MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
    GNU General Public License for more details.

    You should have received a copy of the GNU General Public License
    along with this program; if not, write to the Free Software
    Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA  02111-1307  USA
 */

 #undef SPREADW
 #undef PMAXW
 #ifdef HAVE_MMX2
 #define SPREADW(a) "pshufw $0, " #a ", " #a " \n\t"
 #define PMAXW(a,b) "pmaxsw " #a ", " #b " \n\t"

 #else
 #define SPREADW(a) \
 	"punpcklwd " #a ", " #a " \n\t"\
 	"punpcklwd " #a ", " #a " \n\t"
 #define PMAXW(a,b) \
 	"psubusw " #a ", " #b " \n\t"\
 	"paddw " #a ", " #b " \n\t"
 #endif

 static int RENAME(dct_quantize)(MpegEncContext *s,
                            DCTELEM *block, int n,
                            int qscale)
 {
    int i, level, last_non_zero_p1, q;
    const UINT16 *qmat;
    static __align8 INT16 temp_block[64];
    int minLevel, maxLevel;
    
    if(s->avctx!=NULL && s->avctx->codec->id==CODEC_ID_MPEG4){
 	/* mpeg4 */
        minLevel= -2048;
 	maxLevel= 2047;
    }else if(s->out_format==FMT_MPEG1){
 	/* mpeg1 */
        minLevel= -255;
 	maxLevel= 255;
    }else{
 	/* h263 / msmpeg4 */
        minLevel= -128;
 	maxLevel= 127;
    }

    av_fdct (block);
    
    if (s->mb_intra) {
        int dummy;
        if (n < 4)
            q = s->y_dc_scale;
        else
            q = s->c_dc_scale;
        
        /* note: block[0] is assumed to be positive */
 #if 1
 	asm volatile (
 		"xorl %%edx, %%edx	\n\t"
 		"mul %%ebx		\n\t"
 		: "=d" (temp_block[0]), "=a"(dummy)
 		: "a" (block[0] + (q >> 1)), "b" (inverse[q])
 	);
 #else
 	asm volatile (
 		"xorl %%edx, %%edx	\n\t"
 		"divw %%bx		\n\t"
 		"movzwl %%ax, %%eax	\n\t"
 		: "=a" (temp_block[0])
 		: "a" (block[0] + (q >> 1)), "b" (q)
 		: "%edx"
 	);
 #endif
 //        temp_block[0] = (block[0] + (q >> 1)) / q;
        i = 1;
        last_non_zero_p1 = 1;
        if (s->out_format == FMT_H263) {
            qmat = s->q_non_intra_matrix16;
        } else {
            qmat = s->q_intra_matrix16;
        }
        for(i=1;i<4;i++) {
            level = block[i] * qmat[i];
            level = level / (1 << (QMAT_SHIFT_MMX - 3));
            /* XXX: currently, this code is not optimal. the range should be:
               mpeg1: -255..255
               mpeg2: -2048..2047
               h263:  -128..127
               mpeg4: -2048..2047
            */
            if (level > maxLevel)
                level = maxLevel;
            else if (level < minLevel)
                level = minLevel;
            temp_block[i] = level;

 	    if(level) 
 	        if(last_non_zero_p1 < inv_zigzag_direct16[i]) last_non_zero_p1= inv_zigzag_direct16[i];
 	    block[i]=0;
        }
    } else {
        i = 0;
        last_non_zero_p1 = 0;
        qmat = s->q_non_intra_matrix16;
    }

    asm volatile( /* XXX: small rounding bug, but it shouldnt matter */
 	"movd %3, %%mm3			\n\t"
 	SPREADW(%%mm3)
 	"movd %4, %%mm4			\n\t"
 	SPREADW(%%mm4)
 	"movd %5, %%mm5			\n\t"
 	SPREADW(%%mm5)
 	"pxor %%mm7, %%mm7		\n\t"
 	"movd %%eax, %%mm2		\n\t"
 	SPREADW(%%mm2)
 	"movl %6, %%eax			\n\t"
 	".balign 16			\n\t"
 	"1:				\n\t"
 	"movq (%1, %%eax), %%mm0	\n\t"
 	"movq (%2, %%eax), %%mm1	\n\t"
 	"movq %%mm0, %%mm6		\n\t"
 	"psraw $15, %%mm6		\n\t"
 	"pmulhw %%mm0, %%mm1		\n\t"
 	"psubsw %%mm6, %%mm1		\n\t"
 #ifdef HAVE_MMX2
 	"pminsw %%mm3, %%mm1		\n\t"
 	"pmaxsw %%mm4, %%mm1		\n\t"
 #else
 	"paddsw %%mm3, %%mm1		\n\t"
 	"psubusw %%mm4, %%mm1		\n\t"
 	"paddsw %%mm5, %%mm1		\n\t"
 #endif
 	"movq %%mm1, (%8, %%eax)	\n\t"
 	"pcmpeqw %%mm7, %%mm1		\n\t"
 	"movq (%7, %%eax), %%mm0	\n\t"
 	"movq %%mm7, (%1, %%eax)	\n\t"
 	"pandn %%mm0, %%mm1		\n\t"
 	PMAXW(%%mm1, %%mm2)
 	"addl $8, %%eax			\n\t"
 	" js 1b				\n\t"
 	"movq %%mm2, %%mm0		\n\t"
 	"psrlq $32, %%mm2		\n\t"
 	PMAXW(%%mm0, %%mm2)
 	"movq %%mm2, %%mm0		\n\t"
 	"psrlq $16, %%mm2		\n\t"
 	PMAXW(%%mm0, %%mm2)
 	"movd %%mm2, %%eax		\n\t"
 	"movzbl %%al, %%eax		\n\t"
 	: "+a" (last_non_zero_p1)
 	: "r" (block+64), "r" (qmat+64), 
 #ifdef HAVE_MMX2
 	  "m" (maxLevel),          "m" (minLevel),                    "m" (0 /* dummy */), "g" (2*i - 128),
 #else
 	  "m" (0x7FFF - maxLevel), "m" (0x7FFF -maxLevel + minLevel), "m" (minLevel),      "g" (2*i - 128),
 #endif
 	  "r" (inv_zigzag_direct16+64), "r" (temp_block+64)
    );
 // last_non_zero_p1=64;       
    /* permute for IDCT */
    asm volatile(
 	"movl %0, %%eax			\n\t"
 	"pushl %%ebp			\n\t"
 	"movl %%esp, " MANGLE(esp_temp) "\n\t"
 	"1:				\n\t"
 	"movzbl (%1, %%eax), %%ebx	\n\t"
 	"movzbl 1(%1, %%eax), %%ebp	\n\t"
 	"movw (%2, %%ebx, 2), %%cx	\n\t"
 	"movw (%2, %%ebp, 2), %%sp	\n\t"
 	"movzbl " MANGLE(permutation) "(%%ebx), %%ebx\n\t"
 	"movzbl " MANGLE(permutation) "(%%ebp), %%ebp\n\t"
 	"movw %%cx, (%3, %%ebx, 2)	\n\t"
 	"movw %%sp, (%3, %%ebp, 2)	\n\t"
 	"addl $2, %%eax			\n\t"
 	" js 1b				\n\t"
 	"movl " MANGLE(esp_temp) ", %%esp\n\t"
 	"popl %%ebp			\n\t"
 	: 
 	: "g" (-last_non_zero_p1), "d" (zigzag_direct_noperm+last_non_zero_p1), "S" (temp_block), "D" (block)
 	: "%eax", "%ebx", "%ecx"
 	);
 /*
    for(i=0; i<last_non_zero_p1; i++)
    {
       int j= zigzag_direct_noperm[i];
       block[block_permute_op(j)]= temp_block[j];
    }
 */
 //block_permute(block);
    return last_non_zero_p1 - 1;
 }
--- a/libavcodec/mpegvideo.c
+++ b/libavcodec/mpegvideo.c
@@ -35,12 +35,10 @@ static void dct_unquantize_mpeg1_c(MpegEncContext *s,
                                   DCTELEM *block, int n, int qscale);
 static void dct_unquantize_h263_c(MpegEncContext *s, 
                                  DCTELEM *block, int n, int qscale);
 static int dct_quantize(MpegEncContext *s, DCTELEM *block, int n, int qscale);
 static int dct_quantize_mmx(MpegEncContext *s, 
                            DCTELEM *block, int n,
                            int qscale);
 static void draw_edges_c(UINT8 *buf, int wrap, int width, int height, int w);
 static int dct_quantize_c(MpegEncContext *s, DCTELEM *block, int n, int qscale);

 int (*dct_quantize)(MpegEncContext *s, DCTELEM *block, int n, int qscale)= dct_quantize_c;
 void (*draw_edges)(UINT8 *buf, int wrap, int width, int height, int w)= draw_edges_c;

 #define EDGE_WIDTH 16
@@ -74,29 +72,29 @@ int motion_estimation_method = ME_LOG;

 extern UINT8 zigzag_end[64];

 /* XXX: should use variable shift ? */
 #define QMAT_SHIFT_MMX 19
 #define QMAT_SHIFT 25

 static void convert_matrix(int *qmat, const UINT16 *quant_matrix, int qscale)
 static void convert_matrix(int *qmat, UINT16 *qmat16, const UINT16 *quant_matrix, int qscale)
 {
    int i;

    if (av_fdct == jpeg_fdct_ifast) {
        for(i=0;i<64;i++) {
            /* 16 <= qscale * quant_matrix[i] <= 7905 */
            /* 19952 <= aanscales[i] * qscale * quant_matrix[i] <= 249205026 */
            /* 19952         <= aanscales[i] * qscale * quant_matrix[i]           <= 249205026 */
            /* (1<<36)/19952 >= (1<<36)/(aanscales[i] * qscale * quant_matrix[i]) >= (1<<36)/249205026 */
            /* 3444240       >= (1<<36)/(aanscales[i] * qscale * quant_matrix[i]) >= 275 */
            
            qmat[i] = (int)((UINT64_C(1) << (QMAT_SHIFT + 11)) / 
                            (aanscales[i] * qscale * quant_matrix[i]));
            qmat[block_permute_op(i)] = (int)((UINT64_C(1) << (QMAT_SHIFT + 11)) / 
                            (aanscales[i] * qscale * quant_matrix[block_permute_op(i)]));
        }
    } else {
        for(i=0;i<64;i++) {
            /* We can safely suppose that 16 <= quant_matrix[i] <= 255
               So 16 <= qscale * quant_matrix[i] <= 7905
               so (1 << QMAT_SHIFT) / 16 >= qmat[i] >= (1 << QMAT_SHIFT) / 7905
               So 16           <= qscale * quant_matrix[i]             <= 7905
               so (1<<19) / 16 >= (1<<19) / (qscale * quant_matrix[i]) >= (1<<19) / 7905
               so 32768        >= (1<<19) / (qscale * quant_matrix[i]) >= 67
            */
            qmat[i] = (1 << QMAT_SHIFT_MMX) / (qscale * quant_matrix[i]);
            qmat[i]   = (1 << QMAT_SHIFT_MMX) / (qscale * quant_matrix[i]);
            qmat16[i] = (1 << QMAT_SHIFT_MMX) / (qscale * quant_matrix[block_permute_op(i)]);
        }
    }
 }
@@ -418,7 +416,7 @@ void MPV_frame_start(MpegEncContext *s)
 void MPV_frame_end(MpegEncContext *s)
 {
    /* draw edge for correct motion prediction if outside */
    if (s->pict_type != B_TYPE) {
    if (s->pict_type != B_TYPE && !s->intra_only) {
      if(s->avctx==NULL || s->avctx->codec->id!=CODEC_ID_MPEG4){
        draw_edges(s->current_picture[0], s->linesize, s->mb_width*16, s->mb_height*16, EDGE_WIDTH);
        draw_edges(s->current_picture[1], s->linesize/2, s->mb_width*8, s->mb_height*8, EDGE_WIDTH/2);
@@ -457,7 +455,7 @@ int MPV_encode_picture(AVCodecContext *avctx,
    avctx->key_frame = (s->pict_type == I_TYPE);
    
    MPV_frame_start(s);

    
    for(i=0;i<3;i++) {
        UINT8 *src = pict->data[i];
        UINT8 *dest = s->current_picture[i];
@@ -472,11 +470,15 @@ int MPV_encode_picture(AVCodecContext *avctx,
            h >>= 1;
        }

        for(j=0;j<h;j++) {
            memcpy(dest, src, w);
            dest += dest_wrap;
            src += src_wrap;
        }
 	if(s->intra_only && dest_wrap==src_wrap){
 	    s->current_picture[i] = pict->data[i];
 	}else {
            for(j=0;j<h;j++) {
                memcpy(dest, src, w);
                dest += dest_wrap;
                src += src_wrap;
            }
 	}
        s->new_picture[i] = s->current_picture[i];
    }

@@ -873,10 +875,10 @@ static void encode_picture(MpegEncContext *s, int picture_number)
        s->intra_matrix[0] = default_intra_matrix[0];
        for(i=1;i<64;i++)
            s->intra_matrix[i] = (default_intra_matrix[i] * s->qscale) >> 3;
        convert_matrix(s->q_intra_matrix, s->intra_matrix, 8);
        convert_matrix(s->q_intra_matrix, s->q_intra_matrix16, s->intra_matrix, 8);
    } else {
        convert_matrix(s->q_intra_matrix, s->intra_matrix, s->qscale);
        convert_matrix(s->q_non_intra_matrix, s->non_intra_matrix, s->qscale);
        convert_matrix(s->q_intra_matrix, s->q_intra_matrix16, s->intra_matrix, s->qscale);
        convert_matrix(s->q_non_intra_matrix, s->q_non_intra_matrix16, s->non_intra_matrix, s->qscale);
    }

    switch(s->out_format) {
@@ -1011,14 +1013,8 @@ static void encode_picture(MpegEncContext *s, int picture_number)
                s->y_dc_scale = 8;
                s->c_dc_scale = 8;
            }

            for(i=0;i<6;i++) {
                int last_index;
                if (av_fdct == jpeg_fdct_ifast)
                    last_index = dct_quantize(s, s->block[i], i, s->qscale);
                else
                    last_index = dct_quantize_mmx(s, s->block[i], i, s->qscale);
                s->block_last_index[i] = last_index;
                s->block_last_index[i] = dct_quantize(s, s->block[i], i, s->qscale);
            }

            /* huffman encode */
@@ -1060,7 +1056,7 @@ static void encode_picture(MpegEncContext *s, int picture_number)
    //    fprintf(stderr,"\nNumber of GOB: %d", s->gob_number);
 }

 static int dct_quantize(MpegEncContext *s, 
 static int dct_quantize_c(MpegEncContext *s, 
                        DCTELEM *block, int n,
                        int qscale)
 {
@@ -1157,85 +1153,7 @@ static int dct_quantize(MpegEncContext *s,
                level = maxLevel;
            else if (level < minLevel)
                level = minLevel;
            block[j] = level;
            last_non_zero = i;
        } else {
            block[j] = 0;
        }
    }
    return last_non_zero;
 }

 static int dct_quantize_mmx(MpegEncContext *s, 
                            DCTELEM *block, int n,
                            int qscale)
 {
    int i, j, level, last_non_zero, q;
    const int *qmat;
    int minLevel, maxLevel;

    if(s->avctx!=NULL && s->avctx->codec->id==CODEC_ID_MPEG4){
 	/* mpeg4 */
        minLevel= -2048;
 	maxLevel= 2047;
    }else if(s->out_format==FMT_MPEG1){
 	/* mpeg1 */
        minLevel= -255;
 	maxLevel= 255;
    }else{
 	/* h263 / msmpeg4 */
        minLevel= -128;
 	maxLevel= 127;
    }

    av_fdct (block);
    
    /* we need this permutation so that we correct the IDCT
       permutation. will be moved into DCT code */
    block_permute(block);

    if (s->mb_intra) {
        if (n < 4)
            q = s->y_dc_scale;
        else
            q = s->c_dc_scale;
        
        /* note: block[0] is assumed to be positive */
        block[0] = (block[0] + (q >> 1)) / q;
        i = 1;
        last_non_zero = 0;
        if (s->out_format == FMT_H263) {
            qmat = s->q_non_intra_matrix;
        } else {
            qmat = s->q_intra_matrix;
        }
    } else {
        i = 0;
        last_non_zero = -1;
        qmat = s->q_non_intra_matrix;
    }

    for(;i<64;i++) {
        j = zigzag_direct[i];
        level = block[j];
        level = level * qmat[j];
        /* XXX: slight error for the low range. Test should be equivalent to
           (level <= -(1 << (QMAT_SHIFT_MMX - 3)) || level >= (1 <<
           (QMAT_SHIFT_MMX - 3)))
        */
        if (((level << (31 - (QMAT_SHIFT_MMX - 3))) >> (31 - (QMAT_SHIFT_MMX - 3))) != 
            level) {
            level = level / (1 << (QMAT_SHIFT_MMX - 3));
            /* XXX: currently, this code is not optimal. the range should be:
               mpeg1: -255..255
               mpeg2: -2048..2047
               h263:  -128..127
               mpeg4: -2048..2047
            */
            if (level > maxLevel)
                level = maxLevel;
            else if (level < minLevel)
                level = minLevel;
            block[j] = level;
            last_non_zero = i;
        } else {
--- a/libavcodec/mpegvideo.h
+++ b/libavcodec/mpegvideo.h
@@ -30,6 +30,9 @@ enum OutputFormat {

 #define MPEG_BUF_SIZE (16 * 1024)

 #define QMAT_SHIFT_MMX 19
 #define QMAT_SHIFT 25

 typedef struct MpegEncContext {
    struct AVCodecContext *avctx;
    /* the following parameters must be initialized before encoding */
@@ -120,6 +123,9 @@ typedef struct MpegEncContext {
    /* precomputed matrix (combine qscale and DCT renorm) */
    int q_intra_matrix[64];
    int q_non_intra_matrix[64];
    /* identical to the above but for MMX & these are not permutated */
    UINT16 __align8 q_intra_matrix16[64] ;
    UINT16 __align8 q_non_intra_matrix16[64];
    int block_last_index[6];  /* last non zero coefficient in block */

    void *opaque; /* private data for the user */