Originally committed as revision 16045 to svn://svn.ffmpeg.org/ffmpeg/trunktags/v0.5
@@ -3420,6 +3420,11 @@ void ff_set_cmp(DSPContext* c, me_cmp_func *cmp, int type){ | |||||
} | } | ||||
} | } | ||||
static void clear_block_c(DCTELEM *block) | |||||
{ | |||||
memset(block, 0, sizeof(DCTELEM)*64); | |||||
} | |||||
/** | /** | ||||
* memset(blocks, 0, sizeof(DCTELEM)*6*64) | * memset(blocks, 0, sizeof(DCTELEM)*6*64) | ||||
*/ | */ | ||||
@@ -4288,6 +4293,7 @@ void dsputil_init(DSPContext* c, AVCodecContext *avctx) | |||||
c->sum_abs_dctelem = sum_abs_dctelem_c; | c->sum_abs_dctelem = sum_abs_dctelem_c; | ||||
c->gmc1 = gmc1_c; | c->gmc1 = gmc1_c; | ||||
c->gmc = ff_gmc_c; | c->gmc = ff_gmc_c; | ||||
c->clear_block = clear_block_c; | |||||
c->clear_blocks = clear_blocks_c; | c->clear_blocks = clear_blocks_c; | ||||
c->pix_sum = pix_sum_c; | c->pix_sum = pix_sum_c; | ||||
c->pix_norm1 = pix_norm1_c; | c->pix_norm1 = pix_norm1_c; | ||||
@@ -203,6 +203,7 @@ typedef struct DSPContext { | |||||
*/ | */ | ||||
void (*gmc )(uint8_t *dst/*align 8*/, uint8_t *src/*align 1*/, int stride, int h, int ox, int oy, | void (*gmc )(uint8_t *dst/*align 8*/, uint8_t *src/*align 1*/, int stride, int h, int ox, int oy, | ||||
int dxx, int dxy, int dyx, int dyy, int shift, int r, int width, int height); | int dxx, int dxy, int dyx, int dyy, int shift, int r, int width, int height); | ||||
void (*clear_block)(DCTELEM *block/*align 16*/); | |||||
void (*clear_blocks)(DCTELEM *blocks/*align 16*/); | void (*clear_blocks)(DCTELEM *blocks/*align 16*/); | ||||
int (*pix_sum)(uint8_t * pix, int line_size); | int (*pix_sum)(uint8_t * pix, int line_size); | ||||
int (*pix_norm1)(uint8_t * pix, int line_size); | int (*pix_norm1)(uint8_t * pix, int line_size); | ||||
@@ -810,7 +810,7 @@ static inline int get_p_cbp(MpegEncContext * s, | |||||
for (i = 0; i < 6; i++) { | for (i = 0; i < 6; i++) { | ||||
if (s->block_last_index[i] >= 0 && ((cbp >> (5 - i))&1)==0 ){ | if (s->block_last_index[i] >= 0 && ((cbp >> (5 - i))&1)==0 ){ | ||||
s->block_last_index[i]= -1; | s->block_last_index[i]= -1; | ||||
memset(s->block[i], 0, sizeof(DCTELEM)*64); | |||||
s->dsp.clear_block(s->block[i]); | |||||
} | } | ||||
} | } | ||||
}else{ | }else{ | ||||
@@ -853,7 +853,7 @@ static inline int get_b_cbp(MpegEncContext * s, DCTELEM block[6][64], | |||||
for (i = 0; i < 6; i++) { | for (i = 0; i < 6; i++) { | ||||
if (s->block_last_index[i] >= 0 && ((cbp >> (5 - i))&1)==0 ){ | if (s->block_last_index[i] >= 0 && ((cbp >> (5 - i))&1)==0 ){ | ||||
s->block_last_index[i]= -1; | s->block_last_index[i]= -1; | ||||
memset(s->block[i], 0, sizeof(DCTELEM)*64); | |||||
s->dsp.clear_block(s->block[i]); | |||||
} | } | ||||
} | } | ||||
}else{ | }else{ | ||||
@@ -4651,7 +4651,7 @@ retry: | |||||
rl = &rl_intra_aic; | rl = &rl_intra_aic; | ||||
i = 0; | i = 0; | ||||
s->gb= gb; | s->gb= gb; | ||||
memset(block, 0, sizeof(DCTELEM)*64); | |||||
s->dsp.clear_block(block); | |||||
goto retry; | goto retry; | ||||
} | } | ||||
av_log(s->avctx, AV_LOG_ERROR, "run overflow at %dx%d i:%d\n", s->mb_x, s->mb_y, s->mb_intra); | av_log(s->avctx, AV_LOG_ERROR, "run overflow at %dx%d i:%d\n", s->mb_x, s->mb_y, s->mb_intra); | ||||
@@ -464,21 +464,42 @@ static void avg_pixels16_sse2(uint8_t *block, const uint8_t *pixels, int line_si | |||||
); | ); | ||||
} | } | ||||
static void clear_blocks_mmx(DCTELEM *blocks) | |||||
#define CLEAR_BLOCKS(name,n) \ | |||||
static void name(DCTELEM *blocks)\ | |||||
{\ | |||||
__asm__ volatile(\ | |||||
"pxor %%mm7, %%mm7 \n\t"\ | |||||
"mov %1, %%"REG_a" \n\t"\ | |||||
"1: \n\t"\ | |||||
"movq %%mm7, (%0, %%"REG_a") \n\t"\ | |||||
"movq %%mm7, 8(%0, %%"REG_a") \n\t"\ | |||||
"movq %%mm7, 16(%0, %%"REG_a") \n\t"\ | |||||
"movq %%mm7, 24(%0, %%"REG_a") \n\t"\ | |||||
"add $32, %%"REG_a" \n\t"\ | |||||
" js 1b \n\t"\ | |||||
: : "r" (((uint8_t *)blocks)+128*n),\ | |||||
"i" (-128*n)\ | |||||
: "%"REG_a\ | |||||
);\ | |||||
} | |||||
CLEAR_BLOCKS(clear_blocks_mmx, 6) | |||||
CLEAR_BLOCKS(clear_block_mmx, 1) | |||||
static void clear_block_sse(DCTELEM *block) | |||||
{ | { | ||||
__asm__ volatile( | __asm__ volatile( | ||||
"pxor %%mm7, %%mm7 \n\t" | |||||
"mov $-128*6, %%"REG_a" \n\t" | |||||
"1: \n\t" | |||||
"movq %%mm7, (%0, %%"REG_a") \n\t" | |||||
"movq %%mm7, 8(%0, %%"REG_a") \n\t" | |||||
"movq %%mm7, 16(%0, %%"REG_a") \n\t" | |||||
"movq %%mm7, 24(%0, %%"REG_a") \n\t" | |||||
"add $32, %%"REG_a" \n\t" | |||||
" js 1b \n\t" | |||||
: : "r" (((uint8_t *)blocks)+128*6) | |||||
: "%"REG_a | |||||
); | |||||
"xorps %%xmm0, %%xmm0 \n" | |||||
"movaps %%xmm0, (%0) \n" | |||||
"movaps %%xmm0, 16(%0) \n" | |||||
"movaps %%xmm0, 32(%0) \n" | |||||
"movaps %%xmm0, 48(%0) \n" | |||||
"movaps %%xmm0, 64(%0) \n" | |||||
"movaps %%xmm0, 80(%0) \n" | |||||
"movaps %%xmm0, 96(%0) \n" | |||||
"movaps %%xmm0, 112(%0) \n" | |||||
:: "r"(block) | |||||
: "memory" | |||||
); | |||||
} | } | ||||
static void add_bytes_mmx(uint8_t *dst, uint8_t *src, int w){ | static void add_bytes_mmx(uint8_t *dst, uint8_t *src, int w){ | ||||
@@ -2569,7 +2590,10 @@ void dsputil_init_mmx(DSPContext* c, AVCodecContext *avctx) | |||||
c->put_pixels_clamped = put_pixels_clamped_mmx; | c->put_pixels_clamped = put_pixels_clamped_mmx; | ||||
c->put_signed_pixels_clamped = put_signed_pixels_clamped_mmx; | c->put_signed_pixels_clamped = put_signed_pixels_clamped_mmx; | ||||
c->add_pixels_clamped = add_pixels_clamped_mmx; | c->add_pixels_clamped = add_pixels_clamped_mmx; | ||||
c->clear_block = clear_block_mmx; | |||||
c->clear_blocks = clear_blocks_mmx; | c->clear_blocks = clear_blocks_mmx; | ||||
if (mm_flags & FF_MM_SSE) | |||||
c->clear_block = clear_block_sse; | |||||
#define SET_HPEL_FUNCS(PFX, IDX, SIZE, CPU) \ | #define SET_HPEL_FUNCS(PFX, IDX, SIZE, CPU) \ | ||||
c->PFX ## _pixels_tab[IDX][0] = PFX ## _pixels ## SIZE ## _ ## CPU; \ | c->PFX ## _pixels_tab[IDX][0] = PFX ## _pixels ## SIZE ## _ ## CPU; \ | ||||
@@ -511,7 +511,7 @@ static int x8_decode_intra_mb(IntraX8Context* const w, const int chroma){ | |||||
int sign; | int sign; | ||||
assert(w->orient<12); | assert(w->orient<12); | ||||
memset(s->block[0],0x00,64*sizeof(DCTELEM)); | |||||
s->dsp.clear_block(s->block[0]); | |||||
if(chroma){ | if(chroma){ | ||||
dc_mode=2; | dc_mode=2; | ||||
@@ -163,7 +163,7 @@ static int vlc_decode_block(MimicContext *ctx, int num_coeffs, int qscale) | |||||
DCTELEM *block = ctx->dct_block; | DCTELEM *block = ctx->dct_block; | ||||
unsigned int pos; | unsigned int pos; | ||||
memset(block, 0, 64 * sizeof(DCTELEM)); | |||||
ctx->dsp.clear_block(block); | |||||
block[0] = get_bits(&ctx->gb, 8) << 3; | block[0] = get_bits(&ctx->gb, 8) << 3; | ||||
@@ -444,7 +444,7 @@ static int decode_dc_progressive(MJpegDecodeContext *s, DCTELEM *block, int comp | |||||
int dc_index, int16_t *quant_matrix, int Al) | int dc_index, int16_t *quant_matrix, int Al) | ||||
{ | { | ||||
int val; | int val; | ||||
memset(block, 0, 64*sizeof(DCTELEM)); | |||||
s->dsp.clear_block(block); | |||||
val = mjpeg_decode_dc(s, dc_index); | val = mjpeg_decode_dc(s, dc_index); | ||||
if (val == 0xffff) { | if (val == 0xffff) { | ||||
av_log(s->avctx, AV_LOG_ERROR, "error dc\n"); | av_log(s->avctx, AV_LOG_ERROR, "error dc\n"); | ||||
@@ -800,7 +800,7 @@ static int mjpeg_decode_scan(MJpegDecodeContext *s, int nb_components, int Ah, i | |||||
if(s->interlaced && s->bottom_field) | if(s->interlaced && s->bottom_field) | ||||
ptr += linesize[c] >> 1; | ptr += linesize[c] >> 1; | ||||
if(!s->progressive) { | if(!s->progressive) { | ||||
memset(s->block, 0, sizeof(s->block)); | |||||
s->dsp.clear_block(s->block); | |||||
if(decode_block(s, s->block, i, | if(decode_block(s, s->block, i, | ||||
s->dc_index[i], s->ac_index[i], | s->dc_index[i], s->ac_index[i], | ||||
s->quant_matrixes[ s->quant_index[c] ]) < 0) { | s->quant_matrixes[ s->quant_index[c] ]) < 0) { | ||||
@@ -1402,14 +1402,14 @@ static void render_slice(Vp3DecodeContext *s, int slice) | |||||
/* dequantize the DCT coefficients */ | /* dequantize the DCT coefficients */ | ||||
if(s->avctx->idct_algo==FF_IDCT_VP3){ | if(s->avctx->idct_algo==FF_IDCT_VP3){ | ||||
Coeff *coeff= s->coeffs + i; | Coeff *coeff= s->coeffs + i; | ||||
memset(block, 0, sizeof(block)); | |||||
s->dsp.clear_block(block); | |||||
while(coeff->next){ | while(coeff->next){ | ||||
block[coeff->index]= coeff->coeff * dequantizer[coeff->index]; | block[coeff->index]= coeff->coeff * dequantizer[coeff->index]; | ||||
coeff= coeff->next; | coeff= coeff->next; | ||||
} | } | ||||
}else{ | }else{ | ||||
Coeff *coeff= s->coeffs + i; | Coeff *coeff= s->coeffs + i; | ||||
memset(block, 0, sizeof(block)); | |||||
s->dsp.clear_block(block); | |||||
while(coeff->next){ | while(coeff->next){ | ||||
block[coeff->index]= (coeff->coeff * dequantizer[coeff->index] + 2)>>2; | block[coeff->index]= (coeff->coeff * dequantizer[coeff->index] + 2)>>2; | ||||
coeff= coeff->next; | coeff= coeff->next; | ||||
@@ -405,7 +405,7 @@ static void vp56_decode_mb(vp56_context_t *s, int row, int col, int is_alpha) | |||||
mb_type = vp56_decode_mv(s, row, col); | mb_type = vp56_decode_mv(s, row, col); | ||||
ref_frame = vp56_reference_frame[mb_type]; | ref_frame = vp56_reference_frame[mb_type]; | ||||
memset(s->block_coeff, 0, sizeof(s->block_coeff)); | |||||
s->dsp.clear_blocks(*s->block_coeff); | |||||
s->parse_coeff(s); | s->parse_coeff(s); | ||||
@@ -43,12 +43,12 @@ static void wmv2_add_block(Wmv2Context *w, DCTELEM *block1, uint8_t *dst, int st | |||||
case 1: | case 1: | ||||
ff_simple_idct84_add(dst , stride, block1); | ff_simple_idct84_add(dst , stride, block1); | ||||
ff_simple_idct84_add(dst + 4*stride, stride, w->abt_block2[n]); | ff_simple_idct84_add(dst + 4*stride, stride, w->abt_block2[n]); | ||||
memset(w->abt_block2[n], 0, 64*sizeof(DCTELEM)); | |||||
s->dsp.clear_block(w->abt_block2[n]); | |||||
break; | break; | ||||
case 2: | case 2: | ||||
ff_simple_idct48_add(dst , stride, block1); | ff_simple_idct48_add(dst , stride, block1); | ||||
ff_simple_idct48_add(dst + 4 , stride, w->abt_block2[n]); | ff_simple_idct48_add(dst + 4 , stride, w->abt_block2[n]); | ||||
memset(w->abt_block2[n], 0, 64*sizeof(DCTELEM)); | |||||
s->dsp.clear_block(w->abt_block2[n]); | |||||
break; | break; | ||||
default: | default: | ||||
av_log(s->avctx, AV_LOG_ERROR, "internal error in WMV2 abt\n"); | av_log(s->avctx, AV_LOG_ERROR, "internal error in WMV2 abt\n"); | ||||