no malloc(64) memcpy free stuff no filter1 A->B then filter2 A->B (yes not B->A) no incorrect rouding after the 1d filter Originally committed as revision 3177 to svn://svn.ffmpeg.org/ffmpeg/trunktags/v0.5
@@ -2360,32 +2360,29 @@ static void h263_h_loop_filter_c(uint8_t *src, int stride, int qscale){ | |||||
} | } | ||||
} | } | ||||
static void h261_v_loop_filter_c(uint8_t *dest,uint8_t *src, int stride){ | |||||
int i,j,xy,yz; | |||||
int res; | |||||
for(i=0; i<8; i++){ | |||||
for(j=1; j<7; j++){ | |||||
xy = j * stride + i; | |||||
yz = j * 8 + i; | |||||
res = (int)src[yz-1*8] + ((int)(src[yz+0*8]) * 2) + (int)src[yz+1*8]; | |||||
res +=2; | |||||
res >>=2; | |||||
dest[xy] = (uint8_t)res; | |||||
static void h261_loop_filter_c(uint8_t *src, int stride){ | |||||
int x,y,xy,yz; | |||||
int temp[64]; | |||||
for(x=0; x<8; x++){ | |||||
temp[x ] = 4*src[x ]; | |||||
temp[x + 7*8] = 4*src[x + 7*stride]; | |||||
} | |||||
for(y=1; y<7; y++){ | |||||
for(x=0; x<8; x++){ | |||||
xy = y * stride + x; | |||||
yz = y * 8 + x; | |||||
temp[yz] = src[xy - stride] + 2*src[xy] + src[xy + stride]; | |||||
} | } | ||||
} | } | ||||
} | |||||
static void h261_h_loop_filter_c(uint8_t *dest,uint8_t *src, int stride){ | |||||
int i,j,xy,yz; | |||||
int res; | |||||
for(i=1; i<7; i++){ | |||||
for(j=0; j<8; j++){ | |||||
xy = j * stride + i; | |||||
yz = j * 8 + i; | |||||
res = (int)src[yz-1] + ((int)(src[yz]) *2) + (int)src[yz+1]; | |||||
res+=2; | |||||
res>>=2; | |||||
dest[xy] = (uint8_t)res; | |||||
for(y=0; y<8; y++){ | |||||
src[ y*stride] = (temp[ y*8] + 2)>>2; | |||||
src[7+y*stride] = (temp[7+y*8] + 2)>>2; | |||||
for(x=1; x<7; x++){ | |||||
xy = y * stride + x; | |||||
yz = y * 8 + x; | |||||
src[xy] = (temp[yz-1] + 2*temp[yz] + temp[yz+1] + 8)>>4; | |||||
} | } | ||||
} | } | ||||
} | } | ||||
@@ -3325,8 +3322,7 @@ void dsputil_init(DSPContext* c, AVCodecContext *avctx) | |||||
c->h263_h_loop_filter= h263_h_loop_filter_c; | c->h263_h_loop_filter= h263_h_loop_filter_c; | ||||
c->h263_v_loop_filter= h263_v_loop_filter_c; | c->h263_v_loop_filter= h263_v_loop_filter_c; | ||||
c->h261_h_loop_filter= h261_h_loop_filter_c; | |||||
c->h261_v_loop_filter= h261_v_loop_filter_c; | |||||
c->h261_loop_filter= h261_loop_filter_c; | |||||
c->try_8x8basis= try_8x8basis_c; | c->try_8x8basis= try_8x8basis_c; | ||||
c->add_8x8basis= add_8x8basis_c; | c->add_8x8basis= add_8x8basis_c; | ||||
@@ -261,8 +261,7 @@ typedef struct DSPContext { | |||||
void (*h263_v_loop_filter)(uint8_t *src, int stride, int qscale); | void (*h263_v_loop_filter)(uint8_t *src, int stride, int qscale); | ||||
void (*h263_h_loop_filter)(uint8_t *src, int stride, int qscale); | void (*h263_h_loop_filter)(uint8_t *src, int stride, int qscale); | ||||
void (*h261_v_loop_filter)(uint8_t *dest,uint8_t *src, int stride); | |||||
void (*h261_h_loop_filter)(uint8_t *dest,uint8_t *src, int stride); | |||||
void (*h261_loop_filter)(uint8_t *src, int stride); | |||||
/* (I)DCT */ | /* (I)DCT */ | ||||
void (*fdct)(DCTELEM *block/* align 16*/); | void (*fdct)(DCTELEM *block/* align 16*/); | ||||
@@ -64,44 +64,13 @@ void ff_h261_loop_filter(H261Context * h){ | |||||
uint8_t *dest_y = s->dest[0]; | uint8_t *dest_y = s->dest[0]; | ||||
uint8_t *dest_cb= s->dest[1]; | uint8_t *dest_cb= s->dest[1]; | ||||
uint8_t *dest_cr= s->dest[2]; | uint8_t *dest_cr= s->dest[2]; | ||||
uint8_t *src; | |||||
CHECKED_ALLOCZ((src),sizeof(uint8_t) * 64 ); | |||||
for(i=0; i<8;i++) | |||||
memcpy(src+i*8,dest_y+i*linesize,sizeof(uint8_t) * 8 ); | |||||
s->dsp.h261_v_loop_filter(dest_y, src, linesize); | |||||
s->dsp.h261_h_loop_filter(dest_y, src, linesize); | |||||
for(i=0; i<8;i++) | |||||
memcpy(src+i*8,dest_y+i*linesize + 8,sizeof(uint8_t) * 8 ); | |||||
s->dsp.h261_v_loop_filter(dest_y + 8, src, linesize); | |||||
s->dsp.h261_h_loop_filter(dest_y + 8, src, linesize); | |||||
for(i=0; i<8;i++) | |||||
memcpy(src+i*8,dest_y+(i+8)*linesize,sizeof(uint8_t) * 8 ); | |||||
s->dsp.h261_v_loop_filter(dest_y + 8 * linesize, src, linesize); | |||||
s->dsp.h261_h_loop_filter(dest_y + 8 * linesize, src, linesize); | |||||
for(i=0; i<8;i++) | |||||
memcpy(src+i*8,dest_y+(i+8)*linesize + 8,sizeof(uint8_t) * 8 ); | |||||
s->dsp.h261_v_loop_filter(dest_y + 8 * linesize + 8, src, linesize); | |||||
s->dsp.h261_h_loop_filter(dest_y + 8 * linesize + 8, src, linesize); | |||||
for(i=0; i<8;i++) | |||||
memcpy(src+i*8,dest_cb+i*uvlinesize,sizeof(uint8_t) * 8 ); | |||||
s->dsp.h261_v_loop_filter(dest_cb, src, uvlinesize); | |||||
s->dsp.h261_h_loop_filter(dest_cb, src, uvlinesize); | |||||
for(i=0; i<8;i++) | |||||
memcpy(src+i*8,dest_cr+i*uvlinesize,sizeof(uint8_t) * 8 ); | |||||
s->dsp.h261_v_loop_filter(dest_cr, src, uvlinesize); | |||||
s->dsp.h261_h_loop_filter(dest_cr, src, uvlinesize); | |||||
fail: | |||||
av_free(src); | |||||
return; | |||||
s->dsp.h261_loop_filter(dest_y , linesize); | |||||
s->dsp.h261_loop_filter(dest_y + 8, linesize); | |||||
s->dsp.h261_loop_filter(dest_y + 8 * linesize , linesize); | |||||
s->dsp.h261_loop_filter(dest_y + 8 * linesize + 8, linesize); | |||||
s->dsp.h261_loop_filter(dest_cb, uvlinesize); | |||||
s->dsp.h261_loop_filter(dest_cr, uvlinesize); | |||||
} | } | ||||
static int h261_decode_block(H261Context *h, DCTELEM *block, | static int h261_decode_block(H261Context *h, DCTELEM *block, | ||||