mpeg12 decoding optimization Originally committed as revision 364 to svn://svn.ffmpeg.org/ffmpeg/trunktags/v0.5
| @@ -30,6 +30,7 @@ void (*get_pixels)(DCTELEM *block, const UINT8 *pixels, int line_size); | |||||
| void (*put_pixels_clamped)(const DCTELEM *block, UINT8 *pixels, int line_size); | void (*put_pixels_clamped)(const DCTELEM *block, UINT8 *pixels, int line_size); | ||||
| void (*add_pixels_clamped)(const DCTELEM *block, UINT8 *pixels, int line_size); | void (*add_pixels_clamped)(const DCTELEM *block, UINT8 *pixels, int line_size); | ||||
| void (*gmc1)(UINT8 *dst, UINT8 *src, int srcStride, int h, int x16, int y16, int rounder); | void (*gmc1)(UINT8 *dst, UINT8 *src, int srcStride, int h, int x16, int y16, int rounder); | ||||
| void (*clear_blocks)(DCTELEM *blocks); | |||||
| op_pixels_abs_func pix_abs16x16; | op_pixels_abs_func pix_abs16x16; | ||||
| op_pixels_abs_func pix_abs16x16_x2; | op_pixels_abs_func pix_abs16x16_x2; | ||||
| @@ -866,6 +867,11 @@ void block_permute(INT16 *block) | |||||
| } | } | ||||
| #endif | #endif | ||||
| void clear_blocks_c(DCTELEM *blocks) | |||||
| { | |||||
| memset(blocks, 0, sizeof(DCTELEM)*6*64); | |||||
| } | |||||
| void dsputil_init(void) | void dsputil_init(void) | ||||
| { | { | ||||
| int i, j; | int i, j; | ||||
| @@ -890,6 +896,7 @@ void dsputil_init(void) | |||||
| put_pixels_clamped = put_pixels_clamped_c; | put_pixels_clamped = put_pixels_clamped_c; | ||||
| add_pixels_clamped = add_pixels_clamped_c; | add_pixels_clamped = add_pixels_clamped_c; | ||||
| gmc1= gmc1_c; | gmc1= gmc1_c; | ||||
| clear_blocks= clear_blocks_c; | |||||
| pix_abs16x16 = pix_abs16x16_c; | pix_abs16x16 = pix_abs16x16_c; | ||||
| pix_abs16x16_x2 = pix_abs16x16_x2_c; | pix_abs16x16_x2 = pix_abs16x16_x2_c; | ||||
| @@ -40,11 +40,13 @@ extern void (*get_pixels)(DCTELEM *block, const UINT8 *pixels, int line_size); | |||||
| extern void (*put_pixels_clamped)(const DCTELEM *block, UINT8 *pixels, int line_size); | extern void (*put_pixels_clamped)(const DCTELEM *block, UINT8 *pixels, int line_size); | ||||
| extern void (*add_pixels_clamped)(const DCTELEM *block, UINT8 *pixels, int line_size); | extern void (*add_pixels_clamped)(const DCTELEM *block, UINT8 *pixels, int line_size); | ||||
| extern void (*gmc1)(UINT8 *dst, UINT8 *src, int srcStride, int h, int x16, int y16, int rounder); | extern void (*gmc1)(UINT8 *dst, UINT8 *src, int srcStride, int h, int x16, int y16, int rounder); | ||||
| extern void (*clear_blocks)(DCTELEM *blocks); | |||||
| void get_pixels_c(DCTELEM *block, const UINT8 *pixels, int line_size); | void get_pixels_c(DCTELEM *block, const UINT8 *pixels, int line_size); | ||||
| void put_pixels_clamped_c(const DCTELEM *block, UINT8 *pixels, int line_size); | void put_pixels_clamped_c(const DCTELEM *block, UINT8 *pixels, int line_size); | ||||
| void add_pixels_clamped_c(const DCTELEM *block, UINT8 *pixels, int line_size); | void add_pixels_clamped_c(const DCTELEM *block, UINT8 *pixels, int line_size); | ||||
| void clear_blocks_c(DCTELEM *blocks); | |||||
| /* add and put pixel (decoding) */ | /* add and put pixel (decoding) */ | ||||
| typedef void (*op_pixels_func)(UINT8 *block, const UINT8 *pixels, int line_size, int h); | typedef void (*op_pixels_func)(UINT8 *block, const UINT8 *pixels, int line_size, int h); | ||||
| @@ -156,6 +156,7 @@ static int h263_decode_frame(AVCodecContext *avctx, | |||||
| if (s->mb_y && !s->h263_pred) { | if (s->mb_y && !s->h263_pred) { | ||||
| s->first_gob_line = h263_decode_gob_header(s); | s->first_gob_line = h263_decode_gob_header(s); | ||||
| } | } | ||||
| s->block_index[0]= s->block_wrap[0]*(s->mb_y*2 + 1) - 1; | s->block_index[0]= s->block_wrap[0]*(s->mb_y*2 + 1) - 1; | ||||
| s->block_index[1]= s->block_wrap[0]*(s->mb_y*2 + 1); | s->block_index[1]= s->block_wrap[0]*(s->mb_y*2 + 1); | ||||
| s->block_index[2]= s->block_wrap[0]*(s->mb_y*2 + 2) - 1; | s->block_index[2]= s->block_wrap[0]*(s->mb_y*2 + 2) - 1; | ||||
| @@ -183,28 +184,8 @@ static int h263_decode_frame(AVCodecContext *avctx, | |||||
| s->y_dc_scale = 8; | s->y_dc_scale = 8; | ||||
| s->c_dc_scale = 8; | s->c_dc_scale = 8; | ||||
| } | } | ||||
| #ifdef HAVE_MMX | |||||
| if (mm_flags & MM_MMX) { | |||||
| asm volatile( | |||||
| "pxor %%mm7, %%mm7 \n\t" | |||||
| "movl $-128*6, %%eax \n\t" | |||||
| "1: \n\t" | |||||
| "movq %%mm7, (%0, %%eax) \n\t" | |||||
| "movq %%mm7, 8(%0, %%eax) \n\t" | |||||
| "movq %%mm7, 16(%0, %%eax) \n\t" | |||||
| "movq %%mm7, 24(%0, %%eax) \n\t" | |||||
| "addl $32, %%eax \n\t" | |||||
| " js 1b \n\t" | |||||
| : : "r" (((int)s->block)+128*6) | |||||
| : "%eax" | |||||
| ); | |||||
| }else{ | |||||
| memset(s->block, 0, sizeof(s->block)); | |||||
| } | |||||
| #else | |||||
| memset(s->block, 0, sizeof(s->block)); | |||||
| #endif | |||||
| clear_blocks(s->block[0]); | |||||
| s->mv_dir = MV_DIR_FORWARD; | s->mv_dir = MV_DIR_FORWARD; | ||||
| s->mv_type = MV_TYPE_16X16; | s->mv_type = MV_TYPE_16X16; | ||||
| if (s->h263_msmpeg4) { | if (s->h263_msmpeg4) { | ||||
| @@ -1025,6 +1025,23 @@ static void sub_pixels_xy2_mmx( DCTELEM *block, const UINT8 *pixels, int line | |||||
| } while(--h); | } while(--h); | ||||
| } | } | ||||
| static void clear_blocks_mmx(DCTELEM *blocks) | |||||
| { | |||||
| asm volatile( | |||||
| "pxor %%mm7, %%mm7 \n\t" | |||||
| "movl $-128*6, %%eax \n\t" | |||||
| "1: \n\t" | |||||
| "movq %%mm7, (%0, %%eax) \n\t" | |||||
| "movq %%mm7, 8(%0, %%eax) \n\t" | |||||
| "movq %%mm7, 16(%0, %%eax) \n\t" | |||||
| "movq %%mm7, 24(%0, %%eax) \n\t" | |||||
| "addl $32, %%eax \n\t" | |||||
| " js 1b \n\t" | |||||
| : : "r" (((int)blocks)+128*6) | |||||
| : "%eax" | |||||
| ); | |||||
| } | |||||
| static void just_return() { return; } | static void just_return() { return; } | ||||
| void dsputil_init_mmx(void) | void dsputil_init_mmx(void) | ||||
| @@ -1049,7 +1066,8 @@ void dsputil_init_mmx(void) | |||||
| get_pixels = get_pixels_mmx; | get_pixels = get_pixels_mmx; | ||||
| put_pixels_clamped = put_pixels_clamped_mmx; | put_pixels_clamped = put_pixels_clamped_mmx; | ||||
| add_pixels_clamped = add_pixels_clamped_mmx; | add_pixels_clamped = add_pixels_clamped_mmx; | ||||
| clear_blocks= clear_blocks_mmx; | |||||
| pix_abs16x16 = pix_abs16x16_mmx; | pix_abs16x16 = pix_abs16x16_mmx; | ||||
| pix_abs16x16_x2 = pix_abs16x16_x2_mmx; | pix_abs16x16_x2 = pix_abs16x16_x2_mmx; | ||||
| pix_abs16x16_y2 = pix_abs16x16_y2_mmx; | pix_abs16x16_y2 = pix_abs16x16_y2_mmx; | ||||
| @@ -1402,7 +1402,7 @@ static int mpeg_decode_slice(AVCodecContext *avctx, | |||||
| } | } | ||||
| for(;;) { | for(;;) { | ||||
| memset(s->block, 0, sizeof(s->block)); | |||||
| clear_blocks(s->block[0]); | |||||
| ret = mpeg_decode_mb(s, s->block); | ret = mpeg_decode_mb(s, s->block); | ||||
| dprintf("ret=%d\n", ret); | dprintf("ret=%d\n", ret); | ||||
| if (ret < 0) | if (ret < 0) | ||||