function moved to dspcontext mmx&mmx2 optimized change SSE -> SAD as default (better quality) vbv buffer size command line option in kbyte Originally committed as revision 2669 to svn://svn.ffmpeg.org/ffmpeg/trunktags/v0.5
@@ -111,6 +111,7 @@ static int video_codec_id = CODEC_ID_NONE; | |||
static int same_quality = 0; | |||
static int b_frames = 0; | |||
static int mb_decision = FF_MB_DECISION_SIMPLE; | |||
static int ildct_cmp = FF_CMP_VSAD; | |||
static int mb_cmp = FF_CMP_SAD; | |||
static int sub_cmp = FF_CMP_SAD; | |||
static int cmp = FF_CMP_SAD; | |||
@@ -1639,7 +1640,7 @@ static void opt_video_bitrate_min(const char *arg) | |||
static void opt_video_buffer_size(const char *arg) | |||
{ | |||
video_rc_buffer_size = atoi(arg) * 1024; | |||
video_rc_buffer_size = atoi(arg) * 8*1024; | |||
} | |||
static void opt_video_rc_eq(char *arg) | |||
@@ -1841,6 +1842,11 @@ static void opt_mb_cmp(const char *arg) | |||
mb_cmp = atoi(arg); | |||
} | |||
static void opt_ildct_cmp(const char *arg) | |||
{ | |||
ildct_cmp = atoi(arg); | |||
} | |||
static void opt_sub_cmp(const char *arg) | |||
{ | |||
sub_cmp = atoi(arg); | |||
@@ -2372,6 +2378,7 @@ static void opt_output_file(const char *filename) | |||
video_enc->mb_decision = mb_decision; | |||
video_enc->mb_cmp = mb_cmp; | |||
video_enc->ildct_cmp = ildct_cmp; | |||
video_enc->me_sub_cmp = sub_cmp; | |||
video_enc->me_cmp = cmp; | |||
@@ -3000,7 +3007,7 @@ const OptionDef options[] = { | |||
{ "bt", HAS_ARG | OPT_VIDEO, {(void*)opt_video_bitrate_tolerance}, "set video bitrate tolerance (in kbit/s)", "tolerance" }, | |||
{ "maxrate", HAS_ARG | OPT_VIDEO, {(void*)opt_video_bitrate_max}, "set max video bitrate tolerance (in kbit/s)", "bitrate" }, | |||
{ "minrate", HAS_ARG | OPT_VIDEO, {(void*)opt_video_bitrate_min}, "set min video bitrate tolerance (in kbit/s)", "bitrate" }, | |||
{ "bufsize", HAS_ARG | OPT_VIDEO, {(void*)opt_video_buffer_size}, "set ratecontrol buffere size (in kbit)", "size" }, | |||
{ "bufsize", HAS_ARG | OPT_VIDEO, {(void*)opt_video_buffer_size}, "set ratecontrol buffere size (in kByte)", "size" }, | |||
{ "vcodec", HAS_ARG | OPT_VIDEO, {(void*)opt_video_codec}, "force video codec ('copy' to copy stream)", "codec" }, | |||
{ "me", HAS_ARG | OPT_EXPERT | OPT_VIDEO, {(void*)opt_motion_estimation}, "set motion estimation method", | |||
"method" }, | |||
@@ -3012,6 +3019,7 @@ const OptionDef options[] = { | |||
{ "hq", OPT_BOOL, {(void*)&mb_decision}, "activate high quality settings" }, | |||
{ "mbd", HAS_ARG | OPT_EXPERT | OPT_VIDEO, {(void*)opt_mb_decision}, "macroblock decision", "mode" }, | |||
{ "mbcmp", HAS_ARG | OPT_EXPERT | OPT_VIDEO, {(void*)opt_mb_cmp}, "macroblock compare function", "cmp function" }, | |||
{ "ildctcmp", HAS_ARG | OPT_EXPERT | OPT_VIDEO, {(void*)opt_ildct_cmp}, "ildct compare function", "cmp function" }, | |||
{ "subcmp", HAS_ARG | OPT_EXPERT | OPT_VIDEO, {(void*)opt_sub_cmp}, "subpel compare function", "cmp function" }, | |||
{ "cmp", HAS_ARG | OPT_EXPERT | OPT_VIDEO, {(void*)opt_cmp}, "fullpel compare function", "cmp function" }, | |||
{ "4mv", OPT_BOOL | OPT_EXPERT | OPT_VIDEO, {(void*)&use_4mv}, "use four motion vector by macroblock (MPEG4)" }, | |||
@@ -17,7 +17,7 @@ extern "C" { | |||
#define FFMPEG_VERSION_INT 0x000408 | |||
#define FFMPEG_VERSION "0.4.8" | |||
#define LIBAVCODEC_BUILD 4698 | |||
#define LIBAVCODEC_BUILD 4699 | |||
#define LIBAVCODEC_VERSION_INT FFMPEG_VERSION_INT | |||
#define LIBAVCODEC_VERSION FFMPEG_VERSION | |||
@@ -1196,6 +1196,12 @@ typedef struct AVCodecContext { | |||
* - decoding: unused | |||
*/ | |||
int mb_cmp; | |||
/** | |||
* interlaced dct compare function | |||
* - encoding: set by user. | |||
* - decoding: unused | |||
*/ | |||
int ildct_cmp; | |||
#define FF_CMP_SAD 0 | |||
#define FF_CMP_SSE 1 | |||
#define FF_CMP_SATD 2 | |||
@@ -1204,6 +1210,8 @@ typedef struct AVCodecContext { | |||
#define FF_CMP_BIT 5 | |||
#define FF_CMP_RD 6 | |||
#define FF_CMP_ZERO 7 | |||
#define FF_CMP_VSAD 8 | |||
#define FF_CMP_VSSE 9 | |||
#define FF_CMP_CHROMA 256 | |||
/** | |||
@@ -2560,6 +2560,53 @@ void ff_block_permute(DCTELEM *block, uint8_t *permutation, const uint8_t *scant | |||
} | |||
} | |||
static int zero_cmp(void *s, uint8_t *a, uint8_t *b, int stride, int h){ | |||
return 0; | |||
} | |||
void ff_set_cmp(DSPContext* c, me_cmp_func *cmp, int type){ | |||
int i; | |||
memset(cmp, 0, sizeof(void*)*5); | |||
for(i=0; i<5; i++){ | |||
switch(type&0xFF){ | |||
case FF_CMP_SAD: | |||
cmp[i]= c->sad[i]; | |||
break; | |||
case FF_CMP_SATD: | |||
cmp[i]= c->hadamard8_diff[i]; | |||
break; | |||
case FF_CMP_SSE: | |||
cmp[i]= c->sse[i]; | |||
break; | |||
case FF_CMP_DCT: | |||
cmp[i]= c->dct_sad[i]; | |||
break; | |||
case FF_CMP_PSNR: | |||
cmp[i]= c->quant_psnr[i]; | |||
break; | |||
case FF_CMP_BIT: | |||
cmp[i]= c->bit[i]; | |||
break; | |||
case FF_CMP_RD: | |||
cmp[i]= c->rd[i]; | |||
break; | |||
case FF_CMP_VSAD: | |||
cmp[i]= c->vsad[i]; | |||
break; | |||
case FF_CMP_VSSE: | |||
cmp[i]= c->vsse[i]; | |||
break; | |||
case FF_CMP_ZERO: | |||
cmp[i]= zero_cmp; | |||
break; | |||
default: | |||
av_log(NULL, AV_LOG_ERROR,"internal error in cmp function selection\n"); | |||
} | |||
} | |||
} | |||
/** | |||
* memset(blocks, 0, sizeof(DCTELEM)*6*64) | |||
*/ | |||
@@ -2685,17 +2732,19 @@ if(sum>maxi){ | |||
return sum; | |||
} | |||
static int hadamard8_abs_c(uint8_t *src, int stride, int mean){ | |||
static int hadamard8_intra8x8_c(/*MpegEncContext*/ void *s, uint8_t *src, uint8_t *dummy, int stride, int h){ | |||
int i; | |||
int temp[64]; | |||
int sum=0; | |||
//FIXME OOOPS ignore 0 term instead of mean mess | |||
assert(h==8); | |||
for(i=0; i<8; i++){ | |||
//FIXME try pointer walks | |||
BUTTERFLY2(temp[8*i+0], temp[8*i+1], src[stride*i+0]-mean,src[stride*i+1]-mean); | |||
BUTTERFLY2(temp[8*i+2], temp[8*i+3], src[stride*i+2]-mean,src[stride*i+3]-mean); | |||
BUTTERFLY2(temp[8*i+4], temp[8*i+5], src[stride*i+4]-mean,src[stride*i+5]-mean); | |||
BUTTERFLY2(temp[8*i+6], temp[8*i+7], src[stride*i+6]-mean,src[stride*i+7]-mean); | |||
BUTTERFLY2(temp[8*i+0], temp[8*i+1], src[stride*i+0],src[stride*i+1]); | |||
BUTTERFLY2(temp[8*i+2], temp[8*i+3], src[stride*i+2],src[stride*i+3]); | |||
BUTTERFLY2(temp[8*i+4], temp[8*i+5], src[stride*i+4],src[stride*i+5]); | |||
BUTTERFLY2(temp[8*i+6], temp[8*i+7], src[stride*i+6],src[stride*i+7]); | |||
BUTTERFLY1(temp[8*i+0], temp[8*i+2]); | |||
BUTTERFLY1(temp[8*i+1], temp[8*i+3]); | |||
@@ -2726,6 +2775,8 @@ static int hadamard8_abs_c(uint8_t *src, int stride, int mean){ | |||
+BUTTERFLYA(temp[8*3+i], temp[8*7+i]); | |||
} | |||
sum -= ABS(temp[8*0] + temp[8*4]); // -mean | |||
return sum; | |||
} | |||
@@ -2911,7 +2962,69 @@ static int bit8x8_c(/*MpegEncContext*/ void *c, uint8_t *src1, uint8_t *src2, in | |||
return bits; | |||
} | |||
static int vsad_intra16_c(/*MpegEncContext*/ void *c, uint8_t *s, uint8_t *dummy, int stride, int h){ | |||
int score=0; | |||
int x,y; | |||
for(y=1; y<h; y++){ | |||
for(x=0; x<16; x+=4){ | |||
score+= ABS(s[x ] - s[x +stride]) + ABS(s[x+1] - s[x+1+stride]) | |||
+ABS(s[x+2] - s[x+2+stride]) + ABS(s[x+3] - s[x+3+stride]); | |||
} | |||
s+= stride; | |||
} | |||
return score; | |||
} | |||
static int vsad16_c(/*MpegEncContext*/ void *c, uint8_t *s1, uint8_t *s2, int stride, int h){ | |||
int score=0; | |||
int x,y; | |||
for(y=1; y<h; y++){ | |||
for(x=0; x<16; x++){ | |||
score+= ABS(s1[x ] - s2[x ] - s1[x +stride] + s2[x +stride]); | |||
} | |||
s1+= stride; | |||
s2+= stride; | |||
} | |||
return score; | |||
} | |||
#define SQ(a) ((a)*(a)) | |||
static int vsse_intra16_c(/*MpegEncContext*/ void *c, uint8_t *s, uint8_t *dummy, int stride, int h){ | |||
int score=0; | |||
int x,y; | |||
for(y=1; y<h; y++){ | |||
for(x=0; x<16; x+=4){ | |||
score+= SQ(s[x ] - s[x +stride]) + SQ(s[x+1] - s[x+1+stride]) | |||
+SQ(s[x+2] - s[x+2+stride]) + SQ(s[x+3] - s[x+3+stride]); | |||
} | |||
s+= stride; | |||
} | |||
return score; | |||
} | |||
static int vsse16_c(/*MpegEncContext*/ void *c, uint8_t *s1, uint8_t *s2, int stride, int h){ | |||
int score=0; | |||
int x,y; | |||
for(y=1; y<h; y++){ | |||
for(x=0; x<16; x++){ | |||
score+= SQ(s1[x ] - s2[x ] - s1[x +stride] + s2[x +stride]); | |||
} | |||
s1+= stride; | |||
s2+= stride; | |||
} | |||
return score; | |||
} | |||
WARPER8_16_SQ(hadamard8_diff8x8_c, hadamard8_diff16_c) | |||
WARPER8_16_SQ(hadamard8_intra8x8_c, hadamard8_intra16_c) | |||
WARPER8_16_SQ(dct_sad8x8_c, dct_sad16_c) | |||
WARPER8_16_SQ(quant_psnr8x8_c, quant_psnr16_c) | |||
WARPER8_16_SQ(rd8x8_c, rd16_c) | |||
@@ -3095,13 +3208,12 @@ void dsputil_init(DSPContext* c, AVCodecContext *avctx) | |||
c->put_mspel_pixels_tab[6]= put_mspel8_mc22_c; | |||
c->put_mspel_pixels_tab[7]= put_mspel8_mc32_c; | |||
c->hadamard8_abs = hadamard8_abs_c; | |||
#define SET_CMP_FUNC(name) \ | |||
c->name[0]= name ## 16_c;\ | |||
c->name[1]= name ## 8x8_c; | |||
SET_CMP_FUNC(hadamard8_diff) | |||
c->hadamard8_diff[4]= hadamard8_intra16_c; | |||
SET_CMP_FUNC(dct_sad) | |||
c->sad[0]= pix_abs16_c; | |||
c->sad[1]= pix_abs8_c; | |||
@@ -3110,6 +3222,10 @@ void dsputil_init(DSPContext* c, AVCodecContext *avctx) | |||
SET_CMP_FUNC(quant_psnr) | |||
SET_CMP_FUNC(rd) | |||
SET_CMP_FUNC(bit) | |||
c->vsad[0]= vsad16_c; | |||
c->vsad[4]= vsad_intra16_c; | |||
c->vsse[0]= vsse16_c; | |||
c->vsse[4]= vsse_intra16_c; | |||
c->add_bytes= add_bytes_c; | |||
c->diff_bytes= diff_bytes_c; | |||
@@ -138,21 +138,22 @@ typedef struct DSPContext { | |||
int (*pix_norm1)(uint8_t * pix, int line_size); | |||
// 16x16 8x8 4x4 2x2 16x8 8x4 4x2 8x16 4x8 2x4 | |||
me_cmp_func sad[4]; /* identical to pix_absAxA except additional void * */ | |||
me_cmp_func sse[4]; | |||
me_cmp_func hadamard8_diff[4]; | |||
me_cmp_func dct_sad[4]; | |||
me_cmp_func quant_psnr[4]; | |||
me_cmp_func bit[4]; | |||
me_cmp_func rd[4]; | |||
int (*hadamard8_abs )(uint8_t *src, int stride, int mean); | |||
me_cmp_func sad[5]; /* identical to pix_absAxA except additional void * */ | |||
me_cmp_func sse[5]; | |||
me_cmp_func hadamard8_diff[5]; | |||
me_cmp_func dct_sad[5]; | |||
me_cmp_func quant_psnr[5]; | |||
me_cmp_func bit[5]; | |||
me_cmp_func rd[5]; | |||
me_cmp_func vsad[5]; | |||
me_cmp_func vsse[5]; | |||
me_cmp_func me_pre_cmp[5]; | |||
me_cmp_func me_cmp[5]; | |||
me_cmp_func me_sub_cmp[5]; | |||
me_cmp_func mb_cmp[5]; | |||
me_cmp_func ildct_cmp[5]; //only width 16 used | |||
/* maybe create an array for 16/8/4/2 functions */ | |||
/** | |||
* Halfpel motion compensation with rounding (a+b+1)>>1. | |||
* this is an array[4][4] of motion compensation funcions for 4 | |||
@@ -293,6 +294,8 @@ void dsputil_init(DSPContext* p, AVCodecContext *avctx); | |||
*/ | |||
void ff_block_permute(DCTELEM *block, uint8_t *permutation, const uint8_t *scantable, int last); | |||
void ff_set_cmp(DSPContext* c, me_cmp_func *cmp, int type); | |||
#define BYTE_VEC32(c) ((c)*0x01010101UL) | |||
static inline uint32_t rnd_avg32(uint32_t a, uint32_t b) | |||
@@ -22,6 +22,9 @@ | |||
#include "../dsputil.h" | |||
#include "../simple_idct.h" | |||
//#undef NDEBUG | |||
//#include <assert.h> | |||
extern const uint8_t ff_h263_loop_filter_strength[32]; | |||
int mm_flags; /* multimedia extension flags */ | |||
@@ -747,6 +750,246 @@ static int sse16_mmx(void *v, uint8_t * pix1, uint8_t * pix2, int line_size, int | |||
return tmp; | |||
} | |||
static int vsad_intra16_mmx(void *v, uint8_t * pix, uint8_t * dummy, int line_size, int h) { | |||
int tmp; | |||
assert( (((int)pix) & 7) == 0); | |||
assert((line_size &7) ==0); | |||
#define SUM(in0, in1, out0, out1) \ | |||
"movq (%0), %%mm2\n"\ | |||
"movq 8(%0), %%mm3\n"\ | |||
"addl %2,%0\n"\ | |||
"movq %%mm2, " #out0 "\n"\ | |||
"movq %%mm3, " #out1 "\n"\ | |||
"psubusb " #in0 ", %%mm2\n"\ | |||
"psubusb " #in1 ", %%mm3\n"\ | |||
"psubusb " #out0 ", " #in0 "\n"\ | |||
"psubusb " #out1 ", " #in1 "\n"\ | |||
"por %%mm2, " #in0 "\n"\ | |||
"por %%mm3, " #in1 "\n"\ | |||
"movq " #in0 ", %%mm2\n"\ | |||
"movq " #in1 ", %%mm3\n"\ | |||
"punpcklbw %%mm7, " #in0 "\n"\ | |||
"punpcklbw %%mm7, " #in1 "\n"\ | |||
"punpckhbw %%mm7, %%mm2\n"\ | |||
"punpckhbw %%mm7, %%mm3\n"\ | |||
"paddw " #in1 ", " #in0 "\n"\ | |||
"paddw %%mm3, %%mm2\n"\ | |||
"paddw %%mm2, " #in0 "\n"\ | |||
"paddw " #in0 ", %%mm6\n" | |||
asm volatile ( | |||
"movl %3,%%ecx\n" | |||
"pxor %%mm6,%%mm6\n" | |||
"pxor %%mm7,%%mm7\n" | |||
"movq (%0),%%mm0\n" | |||
"movq 8(%0),%%mm1\n" | |||
"addl %2,%0\n" | |||
"subl $2, %%ecx\n" | |||
SUM(%%mm0, %%mm1, %%mm4, %%mm5) | |||
"1:\n" | |||
SUM(%%mm4, %%mm5, %%mm0, %%mm1) | |||
SUM(%%mm0, %%mm1, %%mm4, %%mm5) | |||
"subl $2, %%ecx\n" | |||
"jnz 1b\n" | |||
"movq %%mm6,%%mm0\n" | |||
"psrlq $32, %%mm6\n" | |||
"paddw %%mm6,%%mm0\n" | |||
"movq %%mm0,%%mm6\n" | |||
"psrlq $16, %%mm0\n" | |||
"paddw %%mm6,%%mm0\n" | |||
"movd %%mm0,%1\n" | |||
: "+r" (pix), "=r"(tmp) | |||
: "r" (line_size) , "m" (h) | |||
: "%ecx"); | |||
return tmp & 0xFFFF; | |||
} | |||
#undef SUM | |||
static int vsad_intra16_mmx2(void *v, uint8_t * pix, uint8_t * dummy, int line_size, int h) { | |||
int tmp; | |||
assert( (((int)pix) & 7) == 0); | |||
assert((line_size &7) ==0); | |||
#define SUM(in0, in1, out0, out1) \ | |||
"movq (%0), " #out0 "\n"\ | |||
"movq 8(%0), " #out1 "\n"\ | |||
"addl %2,%0\n"\ | |||
"psadbw " #out0 ", " #in0 "\n"\ | |||
"psadbw " #out1 ", " #in1 "\n"\ | |||
"paddw " #in1 ", " #in0 "\n"\ | |||
"paddw " #in0 ", %%mm6\n" | |||
asm volatile ( | |||
"movl %3,%%ecx\n" | |||
"pxor %%mm6,%%mm6\n" | |||
"pxor %%mm7,%%mm7\n" | |||
"movq (%0),%%mm0\n" | |||
"movq 8(%0),%%mm1\n" | |||
"addl %2,%0\n" | |||
"subl $2, %%ecx\n" | |||
SUM(%%mm0, %%mm1, %%mm4, %%mm5) | |||
"1:\n" | |||
SUM(%%mm4, %%mm5, %%mm0, %%mm1) | |||
SUM(%%mm0, %%mm1, %%mm4, %%mm5) | |||
"subl $2, %%ecx\n" | |||
"jnz 1b\n" | |||
"movd %%mm6,%1\n" | |||
: "+r" (pix), "=r"(tmp) | |||
: "r" (line_size) , "m" (h) | |||
: "%ecx"); | |||
return tmp; | |||
} | |||
#undef SUM | |||
static int vsad16_mmx(void *v, uint8_t * pix1, uint8_t * pix2, int line_size, int h) { | |||
int tmp; | |||
assert( (((int)pix1) & 7) == 0); | |||
assert( (((int)pix2) & 7) == 0); | |||
assert((line_size &7) ==0); | |||
#define SUM(in0, in1, out0, out1) \ | |||
"movq (%0),%%mm2\n"\ | |||
"movq (%1)," #out0 "\n"\ | |||
"movq 8(%0),%%mm3\n"\ | |||
"movq 8(%1)," #out1 "\n"\ | |||
"addl %3,%0\n"\ | |||
"addl %3,%1\n"\ | |||
"psubb " #out0 ", %%mm2\n"\ | |||
"psubb " #out1 ", %%mm3\n"\ | |||
"pxor %%mm7, %%mm2\n"\ | |||
"pxor %%mm7, %%mm3\n"\ | |||
"movq %%mm2, " #out0 "\n"\ | |||
"movq %%mm3, " #out1 "\n"\ | |||
"psubusb " #in0 ", %%mm2\n"\ | |||
"psubusb " #in1 ", %%mm3\n"\ | |||
"psubusb " #out0 ", " #in0 "\n"\ | |||
"psubusb " #out1 ", " #in1 "\n"\ | |||
"por %%mm2, " #in0 "\n"\ | |||
"por %%mm3, " #in1 "\n"\ | |||
"movq " #in0 ", %%mm2\n"\ | |||
"movq " #in1 ", %%mm3\n"\ | |||
"punpcklbw %%mm7, " #in0 "\n"\ | |||
"punpcklbw %%mm7, " #in1 "\n"\ | |||
"punpckhbw %%mm7, %%mm2\n"\ | |||
"punpckhbw %%mm7, %%mm3\n"\ | |||
"paddw " #in1 ", " #in0 "\n"\ | |||
"paddw %%mm3, %%mm2\n"\ | |||
"paddw %%mm2, " #in0 "\n"\ | |||
"paddw " #in0 ", %%mm6\n" | |||
asm volatile ( | |||
"movl %4,%%ecx\n" | |||
"pxor %%mm6,%%mm6\n" | |||
"pcmpeqw %%mm7,%%mm7\n" | |||
"psllw $15, %%mm7\n" | |||
"packsswb %%mm7, %%mm7\n" | |||
"movq (%0),%%mm0\n" | |||
"movq (%1),%%mm2\n" | |||
"movq 8(%0),%%mm1\n" | |||
"movq 8(%1),%%mm3\n" | |||
"addl %3,%0\n" | |||
"addl %3,%1\n" | |||
"subl $2, %%ecx\n" | |||
"psubb %%mm2, %%mm0\n" | |||
"psubb %%mm3, %%mm1\n" | |||
"pxor %%mm7, %%mm0\n" | |||
"pxor %%mm7, %%mm1\n" | |||
SUM(%%mm0, %%mm1, %%mm4, %%mm5) | |||
"1:\n" | |||
SUM(%%mm4, %%mm5, %%mm0, %%mm1) | |||
SUM(%%mm0, %%mm1, %%mm4, %%mm5) | |||
"subl $2, %%ecx\n" | |||
"jnz 1b\n" | |||
"movq %%mm6,%%mm0\n" | |||
"psrlq $32, %%mm6\n" | |||
"paddw %%mm6,%%mm0\n" | |||
"movq %%mm0,%%mm6\n" | |||
"psrlq $16, %%mm0\n" | |||
"paddw %%mm6,%%mm0\n" | |||
"movd %%mm0,%2\n" | |||
: "+r" (pix1), "+r" (pix2), "=r"(tmp) | |||
: "r" (line_size) , "m" (h) | |||
: "%ecx"); | |||
return tmp & 0x7FFF; | |||
} | |||
#undef SUM | |||
static int vsad16_mmx2(void *v, uint8_t * pix1, uint8_t * pix2, int line_size, int h) { | |||
int tmp; | |||
assert( (((int)pix1) & 7) == 0); | |||
assert( (((int)pix2) & 7) == 0); | |||
assert((line_size &7) ==0); | |||
#define SUM(in0, in1, out0, out1) \ | |||
"movq (%0)," #out0 "\n"\ | |||
"movq (%1),%%mm2\n"\ | |||
"movq 8(%0)," #out1 "\n"\ | |||
"movq 8(%1),%%mm3\n"\ | |||
"addl %3,%0\n"\ | |||
"addl %3,%1\n"\ | |||
"psubb %%mm2, " #out0 "\n"\ | |||
"psubb %%mm3, " #out1 "\n"\ | |||
"pxor %%mm7, " #out0 "\n"\ | |||
"pxor %%mm7, " #out1 "\n"\ | |||
"psadbw " #out0 ", " #in0 "\n"\ | |||
"psadbw " #out1 ", " #in1 "\n"\ | |||
"paddw " #in1 ", " #in0 "\n"\ | |||
"paddw " #in0 ", %%mm6\n" | |||
asm volatile ( | |||
"movl %4,%%ecx\n" | |||
"pxor %%mm6,%%mm6\n" | |||
"pcmpeqw %%mm7,%%mm7\n" | |||
"psllw $15, %%mm7\n" | |||
"packsswb %%mm7, %%mm7\n" | |||
"movq (%0),%%mm0\n" | |||
"movq (%1),%%mm2\n" | |||
"movq 8(%0),%%mm1\n" | |||
"movq 8(%1),%%mm3\n" | |||
"addl %3,%0\n" | |||
"addl %3,%1\n" | |||
"subl $2, %%ecx\n" | |||
"psubb %%mm2, %%mm0\n" | |||
"psubb %%mm3, %%mm1\n" | |||
"pxor %%mm7, %%mm0\n" | |||
"pxor %%mm7, %%mm1\n" | |||
SUM(%%mm0, %%mm1, %%mm4, %%mm5) | |||
"1:\n" | |||
SUM(%%mm4, %%mm5, %%mm0, %%mm1) | |||
SUM(%%mm0, %%mm1, %%mm4, %%mm5) | |||
"subl $2, %%ecx\n" | |||
"jnz 1b\n" | |||
"movd %%mm6,%2\n" | |||
: "+r" (pix1), "+r" (pix2), "=r"(tmp) | |||
: "r" (line_size) , "m" (h) | |||
: "%ecx"); | |||
return tmp; | |||
} | |||
#undef SUM | |||
static void diff_bytes_mmx(uint8_t *dst, uint8_t *src1, uint8_t *src2, int w){ | |||
int i=0; | |||
asm volatile( | |||
@@ -1874,6 +2117,11 @@ void dsputil_init_mmx(DSPContext* c, AVCodecContext *avctx) | |||
c->pix_norm1 = pix_norm1_mmx; | |||
c->sse[0] = sse16_mmx; | |||
c->vsad[4]= vsad_intra16_mmx; | |||
if(!(avctx->flags & CODEC_FLAG_BITEXACT)){ | |||
c->vsad[0] = vsad16_mmx; | |||
} | |||
#endif //CONFIG_ENCODERS | |||
c->h263_v_loop_filter= h263_v_loop_filter_mmx; | |||
@@ -1897,6 +2145,7 @@ void dsputil_init_mmx(DSPContext* c, AVCodecContext *avctx) | |||
#ifdef CONFIG_ENCODERS | |||
c->hadamard8_diff[0]= hadamard8_diff16_mmx2; | |||
c->hadamard8_diff[1]= hadamard8_diff_mmx2; | |||
c->vsad[4]= vsad_intra16_mmx2; | |||
#endif //CONFIG_ENCODERS | |||
if(!(avctx->flags & CODEC_FLAG_BITEXACT)){ | |||
@@ -1906,6 +2155,7 @@ void dsputil_init_mmx(DSPContext* c, AVCodecContext *avctx) | |||
c->put_no_rnd_pixels_tab[1][2] = put_no_rnd_pixels8_y2_mmx2; | |||
c->avg_pixels_tab[0][3] = avg_pixels16_xy2_mmx2; | |||
c->avg_pixels_tab[1][3] = avg_pixels8_xy2_mmx2; | |||
c->vsad[0] = vsad16_mmx2; | |||
} | |||
#if 1 | |||
@@ -277,49 +277,6 @@ if((x) >= xmin && 4*(x) + (dx) <= 4*xmax && (y) >= ymin && 4*(y) + (dy) <= 4*yma | |||
#undef INIT | |||
#undef CMP__DIRECT | |||
static int zero_cmp(void *s, uint8_t *a, uint8_t *b, int stride, int h){ | |||
return 0; | |||
} | |||
static void set_cmp(MpegEncContext *s, me_cmp_func *cmp, int type){ | |||
DSPContext* c= &s->dsp; | |||
int i; | |||
memset(cmp, 0, sizeof(void*)*5); | |||
for(i=0; i<4; i++){ | |||
switch(type&0xFF){ | |||
case FF_CMP_SAD: | |||
cmp[i]= c->sad[i]; | |||
break; | |||
case FF_CMP_SATD: | |||
cmp[i]= c->hadamard8_diff[i]; | |||
break; | |||
case FF_CMP_SSE: | |||
cmp[i]= c->sse[i]; | |||
break; | |||
case FF_CMP_DCT: | |||
cmp[i]= c->dct_sad[i]; | |||
break; | |||
case FF_CMP_PSNR: | |||
cmp[i]= c->quant_psnr[i]; | |||
break; | |||
case FF_CMP_BIT: | |||
cmp[i]= c->bit[i]; | |||
break; | |||
case FF_CMP_RD: | |||
cmp[i]= c->rd[i]; | |||
break; | |||
case FF_CMP_ZERO: | |||
cmp[i]= zero_cmp; | |||
break; | |||
default: | |||
av_log(s->avctx, AV_LOG_ERROR,"internal error in cmp function selection\n"); | |||
} | |||
} | |||
} | |||
static inline int get_penalty_factor(MpegEncContext *s, int type){ | |||
switch(type&0xFF){ | |||
default: | |||
@@ -340,10 +297,10 @@ static inline int get_penalty_factor(MpegEncContext *s, int type){ | |||
} | |||
void ff_init_me(MpegEncContext *s){ | |||
set_cmp(s, s->dsp.me_pre_cmp, s->avctx->me_pre_cmp); | |||
set_cmp(s, s->dsp.me_cmp, s->avctx->me_cmp); | |||
set_cmp(s, s->dsp.me_sub_cmp, s->avctx->me_sub_cmp); | |||
set_cmp(s, s->dsp.mb_cmp, s->avctx->mb_cmp); | |||
ff_set_cmp(&s->dsp, s->dsp.me_pre_cmp, s->avctx->me_pre_cmp); | |||
ff_set_cmp(&s->dsp, s->dsp.me_cmp, s->avctx->me_cmp); | |||
ff_set_cmp(&s->dsp, s->dsp.me_sub_cmp, s->avctx->me_sub_cmp); | |||
ff_set_cmp(&s->dsp, s->dsp.mb_cmp, s->avctx->mb_cmp); | |||
if(s->flags&CODEC_FLAG_QPEL){ | |||
if(s->avctx->me_sub_cmp&FF_CMP_CHROMA) | |||
@@ -1783,6 +1740,10 @@ void ff_estimate_b_frame_motion(MpegEncContext * s, | |||
} | |||
//FIXME something smarter | |||
if(dmin>256*256*16) type&= ~CANDIDATE_MB_TYPE_DIRECT; //dont try direct mode if its invalid for this MB | |||
#if 0 | |||
if(s->out_format == FMT_MPEG1) | |||
type |= CANDIDATE_MB_TYPE_INTRA; | |||
#endif | |||
} | |||
s->mb_type[mb_y*s->mb_stride + mb_x]= type; | |||
@@ -973,6 +973,8 @@ int MPV_encode_init(AVCodecContext *avctx) | |||
s->progressive_frame= | |||
s->progressive_sequence= !(avctx->flags & (CODEC_FLAG_INTERLACED_DCT|CODEC_FLAG_INTERLACED_ME)); | |||
ff_set_cmp(&s->dsp, s->dsp.ildct_cmp, s->avctx->ildct_cmp); | |||
ff_init_me(s); | |||
#ifdef CONFIG_ENCODERS | |||
@@ -3168,71 +3170,6 @@ static inline void clip_coeffs(MpegEncContext *s, DCTELEM *block, int last_index | |||
av_log(s->avctx, AV_LOG_INFO, "warning, cliping %d dct coefficents to %d..%d\n", overflow, minlevel, maxlevel); | |||
} | |||
#if 0 | |||
static int pix_vcmp16x8(uint8_t *s, int stride){ //FIXME move to dsputil & optimize | |||
int score=0; | |||
int x,y; | |||
for(y=0; y<7; y++){ | |||
for(x=0; x<16; x+=4){ | |||
score+= ABS(s[x ] - s[x +stride]) + ABS(s[x+1] - s[x+1+stride]) | |||
+ABS(s[x+2] - s[x+2+stride]) + ABS(s[x+3] - s[x+3+stride]); | |||
} | |||
s+= stride; | |||
} | |||
return score; | |||
} | |||
static int pix_diff_vcmp16x8(uint8_t *s1, uint8_t*s2, int stride){ //FIXME move to dsputil & optimize | |||
int score=0; | |||
int x,y; | |||
for(y=0; y<7; y++){ | |||
for(x=0; x<16; x++){ | |||
score+= ABS(s1[x ] - s2[x ] - s1[x +stride] + s2[x +stride]); | |||
} | |||
s1+= stride; | |||
s2+= stride; | |||
} | |||
return score; | |||
} | |||
#else | |||
#define SQ(a) ((a)*(a)) | |||
static int pix_vcmp16x8(uint8_t *s, int stride){ //FIXME move to dsputil & optimize | |||
int score=0; | |||
int x,y; | |||
for(y=0; y<7; y++){ | |||
for(x=0; x<16; x+=4){ | |||
score+= SQ(s[x ] - s[x +stride]) + SQ(s[x+1] - s[x+1+stride]) | |||
+SQ(s[x+2] - s[x+2+stride]) + SQ(s[x+3] - s[x+3+stride]); | |||
} | |||
s+= stride; | |||
} | |||
return score; | |||
} | |||
static int pix_diff_vcmp16x8(uint8_t *s1, uint8_t*s2, int stride){ //FIXME move to dsputil & optimize | |||
int score=0; | |||
int x,y; | |||
for(y=0; y<7; y++){ | |||
for(x=0; x<16; x++){ | |||
score+= SQ(s1[x ] - s2[x ] - s1[x +stride] + s2[x +stride]); | |||
} | |||
s1+= stride; | |||
s2+= stride; | |||
} | |||
return score; | |||
} | |||
#endif | |||
#endif //CONFIG_ENCODERS | |||
/** | |||
@@ -3352,16 +3289,20 @@ static void encode_mb(MpegEncContext *s, int motion_x, int motion_y) | |||
if(s->flags&CODEC_FLAG_INTERLACED_DCT){ | |||
int progressive_score, interlaced_score; | |||
progressive_score= pix_vcmp16x8(ptr, wrap_y ) + pix_vcmp16x8(ptr + wrap_y*8, wrap_y ); | |||
interlaced_score = pix_vcmp16x8(ptr, wrap_y*2) + pix_vcmp16x8(ptr + wrap_y , wrap_y*2); | |||
if(progressive_score > interlaced_score + 100){ | |||
s->interlaced_dct=1; | |||
s->interlaced_dct=0; | |||
progressive_score= s->dsp.ildct_cmp[4](s, ptr , NULL, wrap_y, 8) | |||
+s->dsp.ildct_cmp[4](s, ptr + wrap_y*8, NULL, wrap_y, 8) - 400; | |||
if(progressive_score > 0){ | |||
interlaced_score = s->dsp.ildct_cmp[4](s, ptr , NULL, wrap_y*2, 8) | |||
+s->dsp.ildct_cmp[4](s, ptr + wrap_y , NULL, wrap_y*2, 8); | |||
if(progressive_score > interlaced_score){ | |||
s->interlaced_dct=1; | |||
dct_offset= wrap_y; | |||
wrap_y<<=1; | |||
}else | |||
s->interlaced_dct=0; | |||
dct_offset= wrap_y; | |||
wrap_y<<=1; | |||
} | |||
} | |||
} | |||
s->dsp.get_pixels(s->block[0], ptr , wrap_y); | |||
@@ -3430,19 +3371,24 @@ static void encode_mb(MpegEncContext *s, int motion_x, int motion_y) | |||
if(s->flags&CODEC_FLAG_INTERLACED_DCT){ | |||
int progressive_score, interlaced_score; | |||
s->interlaced_dct=0; | |||
progressive_score= s->dsp.ildct_cmp[0](s, dest_y , ptr_y , wrap_y, 8) | |||
+s->dsp.ildct_cmp[0](s, dest_y + wrap_y*8, ptr_y + wrap_y*8, wrap_y, 8) - 400; | |||
progressive_score= pix_diff_vcmp16x8(ptr_y , dest_y , wrap_y ) | |||
+ pix_diff_vcmp16x8(ptr_y + wrap_y*8, dest_y + wrap_y*8, wrap_y ); | |||
interlaced_score = pix_diff_vcmp16x8(ptr_y , dest_y , wrap_y*2) | |||
+ pix_diff_vcmp16x8(ptr_y + wrap_y , dest_y + wrap_y , wrap_y*2); | |||
if(s->avctx->ildct_cmp == FF_CMP_VSSE) progressive_score -= 400; | |||
if(progressive_score>0){ | |||
interlaced_score = s->dsp.ildct_cmp[0](s, dest_y , ptr_y , wrap_y*2, 8) | |||
+s->dsp.ildct_cmp[0](s, dest_y + wrap_y , ptr_y + wrap_y , wrap_y*2, 8); | |||
if(progressive_score > interlaced_score + 600){ | |||
s->interlaced_dct=1; | |||
if(progressive_score > interlaced_score){ | |||
s->interlaced_dct=1; | |||
dct_offset= wrap_y; | |||
wrap_y<<=1; | |||
}else | |||
s->interlaced_dct=0; | |||
dct_offset= wrap_y; | |||
wrap_y<<=1; | |||
} | |||
} | |||
} | |||
s->dsp.diff_pixels(s->block[0], ptr_y , dest_y , wrap_y); | |||
@@ -5,9 +5,9 @@ stddev: 7.63 PSNR:30.47 bytes:7602176 | |||
b588110bebb48b5a1815ac26d0f0c9cc *./data/a-mpeg2.mpg | |||
ddfa5c618dab54df0f47976ddd55d90f *./data/out.yuv | |||
stddev: 7.65 PSNR:30.44 bytes:7602176 | |||
826f088b9b3d051642f51e05860c9738 *./data/a-mpeg2i.mpg | |||
af80cb3a57800a0870273f62697ba29f *./data/out.yuv | |||
stddev: 7.93 PSNR:30.13 bytes:7602176 | |||
13336cffcba456ff4a7607b2a7e57b33 *./data/a-mpeg2i.mpg | |||
4c9701eb83ed81dd9a328af83d7d7c8a *./data/out.yuv | |||
stddev: 7.66 PSNR:30.43 bytes:7602176 | |||
d0dc46dd831398237a690ebbeff18b64 *./data/a-msmpeg4v2.avi | |||
712aa6c959d1d90a78fe98657cbff19c *./data/out.yuv | |||
stddev: 8.11 PSNR:29.94 bytes:7602176 | |||
@@ -5,9 +5,9 @@ stddev: 4.93 PSNR:34.25 bytes:7602176 | |||
aa0f088777131d8ffb627e6ff37312ca *./data/a-mpeg2.mpg | |||
830e7d798089ea6213e0867fd7676fde *./data/out.yuv | |||
stddev: 4.95 PSNR:34.22 bytes:7602176 | |||
aff7511e16a07314cac0489d3dbc4477 *./data/a-mpeg2i.mpg | |||
6199bac131333a8dba043e69b2071dd0 *./data/out.yuv | |||
stddev: 4.97 PSNR:34.19 bytes:7602176 | |||
6da01fd0d910fbfcdc5b212ef3dd65cb *./data/a-mpeg2i.mpg | |||
1e21fd7ed53abf352f9ea8548afa80a3 *./data/out.yuv | |||
stddev: 4.96 PSNR:34.20 bytes:7602176 | |||
14db391f167b52b21a983157b410affc *./data/a-msmpeg4v2.avi | |||
fc8881e0904af9491d5fa0163183954b *./data/out.yuv | |||
stddev: 5.29 PSNR:33.64 bytes:7602176 | |||