Originally committed as revision 1194 to svn://svn.ffmpeg.org/ffmpeg/trunktags/v0.5
| @@ -20,7 +20,7 @@ | |||
| */ | |||
| #include "avcodec.h" | |||
| #include "dsputil.h" | |||
| /* | |||
| void (*get_pixels)(DCTELEM *block, const UINT8 *pixels, int line_size); | |||
| void (*diff_pixels)(DCTELEM *block, const UINT8 *s1, const UINT8 *s2, int stride); | |||
| void (*put_pixels_clamped)(const DCTELEM *block, UINT8 *pixels, int line_size); | |||
| @@ -41,7 +41,7 @@ op_pixels_abs_func pix_abs8x8; | |||
| op_pixels_abs_func pix_abs8x8_x2; | |||
| op_pixels_abs_func pix_abs8x8_y2; | |||
| op_pixels_abs_func pix_abs8x8_xy2; | |||
| */ | |||
| int ff_bit_exact=0; | |||
| UINT8 cropTbl[256 + 2 * MAX_NEG_CROP]; | |||
| @@ -84,7 +84,7 @@ const UINT8 ff_alternate_vertical_scan[64] = { | |||
| }; | |||
| /* a*inverse[b]>>32 == a/b for all 0<=a<=65536 && 2<=b<=255 */ | |||
| UINT32 inverse[256]={ | |||
| const UINT32 inverse[256]={ | |||
| 0, 4294967295U,2147483648U,1431655766, 1073741824, 858993460, 715827883, 613566757, | |||
| 536870912, 477218589, 429496730, 390451573, 357913942, 330382100, 306783379, 286331154, | |||
| 268435456, 252645136, 238609295, 226050911, 214748365, 204522253, 195225787, 186737709, | |||
| @@ -119,7 +119,7 @@ UINT32 inverse[256]={ | |||
| 17318417, 17248865, 17179870, 17111424, 17043522, 16976156, 16909321, 16843010, | |||
| }; | |||
| int pix_sum_c(UINT8 * pix, int line_size) | |||
| static int pix_sum_c(UINT8 * pix, int line_size) | |||
| { | |||
| int s, i, j; | |||
| @@ -141,7 +141,7 @@ int pix_sum_c(UINT8 * pix, int line_size) | |||
| return s; | |||
| } | |||
| int pix_norm1_c(UINT8 * pix, int line_size) | |||
| static int pix_norm1_c(UINT8 * pix, int line_size) | |||
| { | |||
| int s, i, j; | |||
| UINT32 *sq = squareTbl + 256; | |||
| @@ -165,7 +165,7 @@ int pix_norm1_c(UINT8 * pix, int line_size) | |||
| } | |||
| void get_pixels_c(DCTELEM *restrict block, const UINT8 *pixels, int line_size) | |||
| static void get_pixels_c(DCTELEM *restrict block, const UINT8 *pixels, int line_size) | |||
| { | |||
| int i; | |||
| @@ -184,8 +184,8 @@ void get_pixels_c(DCTELEM *restrict block, const UINT8 *pixels, int line_size) | |||
| } | |||
| } | |||
| void diff_pixels_c(DCTELEM *restrict block, const UINT8 *s1, const UINT8 *s2, | |||
| int stride){ | |||
| static void diff_pixels_c(DCTELEM *restrict block, const UINT8 *s1, | |||
| const UINT8 *s2, int stride){ | |||
| int i; | |||
| /* read the pixels */ | |||
| @@ -205,8 +205,8 @@ void diff_pixels_c(DCTELEM *restrict block, const UINT8 *s1, const UINT8 *s2, | |||
| } | |||
| void put_pixels_clamped_c(const DCTELEM *block, UINT8 *restrict pixels, | |||
| int line_size) | |||
| static void put_pixels_clamped_c(const DCTELEM *block, UINT8 *restrict pixels, | |||
| int line_size) | |||
| { | |||
| int i; | |||
| UINT8 *cm = cropTbl + MAX_NEG_CROP; | |||
| @@ -227,7 +227,7 @@ void put_pixels_clamped_c(const DCTELEM *block, UINT8 *restrict pixels, | |||
| } | |||
| } | |||
| void add_pixels_clamped_c(const DCTELEM *block, UINT8 *restrict pixels, | |||
| static void add_pixels_clamped_c(const DCTELEM *block, UINT8 *restrict pixels, | |||
| int line_size) | |||
| { | |||
| int i; | |||
| @@ -1353,7 +1353,7 @@ QPEL_MC(0, avg_ , _ , op_avg) | |||
| #undef op_put | |||
| #undef op_put_no_rnd | |||
| int pix_abs16x16_c(UINT8 *pix1, UINT8 *pix2, int line_size) | |||
| static int pix_abs16x16_c(UINT8 *pix1, UINT8 *pix2, int line_size) | |||
| { | |||
| int s, i; | |||
| @@ -1381,7 +1381,7 @@ int pix_abs16x16_c(UINT8 *pix1, UINT8 *pix2, int line_size) | |||
| return s; | |||
| } | |||
| int pix_abs16x16_x2_c(UINT8 *pix1, UINT8 *pix2, int line_size) | |||
| static int pix_abs16x16_x2_c(UINT8 *pix1, UINT8 *pix2, int line_size) | |||
| { | |||
| int s, i; | |||
| @@ -1409,7 +1409,7 @@ int pix_abs16x16_x2_c(UINT8 *pix1, UINT8 *pix2, int line_size) | |||
| return s; | |||
| } | |||
| int pix_abs16x16_y2_c(UINT8 *pix1, UINT8 *pix2, int line_size) | |||
| static int pix_abs16x16_y2_c(UINT8 *pix1, UINT8 *pix2, int line_size) | |||
| { | |||
| int s, i; | |||
| UINT8 *pix3 = pix2 + line_size; | |||
| @@ -1439,7 +1439,7 @@ int pix_abs16x16_y2_c(UINT8 *pix1, UINT8 *pix2, int line_size) | |||
| return s; | |||
| } | |||
| int pix_abs16x16_xy2_c(UINT8 *pix1, UINT8 *pix2, int line_size) | |||
| static int pix_abs16x16_xy2_c(UINT8 *pix1, UINT8 *pix2, int line_size) | |||
| { | |||
| int s, i; | |||
| UINT8 *pix3 = pix2 + line_size; | |||
| @@ -1469,7 +1469,7 @@ int pix_abs16x16_xy2_c(UINT8 *pix1, UINT8 *pix2, int line_size) | |||
| return s; | |||
| } | |||
| int pix_abs8x8_c(UINT8 *pix1, UINT8 *pix2, int line_size) | |||
| static int pix_abs8x8_c(UINT8 *pix1, UINT8 *pix2, int line_size) | |||
| { | |||
| int s, i; | |||
| @@ -1489,7 +1489,7 @@ int pix_abs8x8_c(UINT8 *pix1, UINT8 *pix2, int line_size) | |||
| return s; | |||
| } | |||
| int pix_abs8x8_x2_c(UINT8 *pix1, UINT8 *pix2, int line_size) | |||
| static int pix_abs8x8_x2_c(UINT8 *pix1, UINT8 *pix2, int line_size) | |||
| { | |||
| int s, i; | |||
| @@ -1509,7 +1509,7 @@ int pix_abs8x8_x2_c(UINT8 *pix1, UINT8 *pix2, int line_size) | |||
| return s; | |||
| } | |||
| int pix_abs8x8_y2_c(UINT8 *pix1, UINT8 *pix2, int line_size) | |||
| static int pix_abs8x8_y2_c(UINT8 *pix1, UINT8 *pix2, int line_size) | |||
| { | |||
| int s, i; | |||
| UINT8 *pix3 = pix2 + line_size; | |||
| @@ -1531,7 +1531,7 @@ int pix_abs8x8_y2_c(UINT8 *pix1, UINT8 *pix2, int line_size) | |||
| return s; | |||
| } | |||
| int pix_abs8x8_xy2_c(UINT8 *pix1, UINT8 *pix2, int line_size) | |||
| static int pix_abs8x8_xy2_c(UINT8 *pix1, UINT8 *pix2, int line_size) | |||
| { | |||
| int s, i; | |||
| UINT8 *pix3 = pix2 + line_size; | |||
| @@ -1574,12 +1574,12 @@ void ff_block_permute(INT16 *block, UINT8 *permutation, const UINT8 *scantable, | |||
| } | |||
| } | |||
| void clear_blocks_c(DCTELEM *blocks) | |||
| static void clear_blocks_c(DCTELEM *blocks) | |||
| { | |||
| memset(blocks, 0, sizeof(DCTELEM)*6*64); | |||
| } | |||
| void dsputil_init(void) | |||
| void dsputil_init(DSPContext* c, unsigned mask) | |||
| { | |||
| int i; | |||
| @@ -1593,42 +1593,82 @@ void dsputil_init(void) | |||
| squareTbl[i] = (i - 256) * (i - 256); | |||
| } | |||
| get_pixels = get_pixels_c; | |||
| diff_pixels = diff_pixels_c; | |||
| put_pixels_clamped = put_pixels_clamped_c; | |||
| add_pixels_clamped = add_pixels_clamped_c; | |||
| ff_gmc1= gmc1_c; | |||
| ff_gmc= gmc_c; | |||
| clear_blocks= clear_blocks_c; | |||
| pix_sum= pix_sum_c; | |||
| pix_norm1= pix_norm1_c; | |||
| pix_abs16x16 = pix_abs16x16_c; | |||
| pix_abs16x16_x2 = pix_abs16x16_x2_c; | |||
| pix_abs16x16_y2 = pix_abs16x16_y2_c; | |||
| pix_abs16x16_xy2 = pix_abs16x16_xy2_c; | |||
| pix_abs8x8 = pix_abs8x8_c; | |||
| pix_abs8x8_x2 = pix_abs8x8_x2_c; | |||
| pix_abs8x8_y2 = pix_abs8x8_y2_c; | |||
| pix_abs8x8_xy2 = pix_abs8x8_xy2_c; | |||
| c->get_pixels = get_pixels_c; | |||
| c->diff_pixels = diff_pixels_c; | |||
| c->put_pixels_clamped = put_pixels_clamped_c; | |||
| c->add_pixels_clamped = add_pixels_clamped_c; | |||
| c->gmc1 = gmc1_c; | |||
| c->gmc = gmc_c; | |||
| c->clear_blocks = clear_blocks_c; | |||
| c->pix_sum = pix_sum_c; | |||
| c->pix_norm1 = pix_norm1_c; | |||
| c->pix_abs16x16 = pix_abs16x16_c; | |||
| c->pix_abs16x16_x2 = pix_abs16x16_x2_c; | |||
| c->pix_abs16x16_y2 = pix_abs16x16_y2_c; | |||
| c->pix_abs16x16_xy2 = pix_abs16x16_xy2_c; | |||
| c->pix_abs8x8 = pix_abs8x8_c; | |||
| c->pix_abs8x8_x2 = pix_abs8x8_x2_c; | |||
| c->pix_abs8x8_y2 = pix_abs8x8_y2_c; | |||
| c->pix_abs8x8_xy2 = pix_abs8x8_xy2_c; | |||
| c->put_pixels_tab[0][0] = put_pixels16; | |||
| c->put_pixels_tab[0][1] = put_pixels16_x2; | |||
| c->put_pixels_tab[0][2] = put_pixels16_y2; | |||
| c->put_pixels_tab[0][3] = put_pixels16_xy2; | |||
| c->put_no_rnd_pixels_tab[0][0] = put_pixels16; | |||
| c->put_no_rnd_pixels_tab[0][1] = put_no_rnd_pixels16_x2; | |||
| c->put_no_rnd_pixels_tab[0][2] = put_no_rnd_pixels16_y2; | |||
| c->put_no_rnd_pixels_tab[0][3] = put_no_rnd_pixels16_xy2; | |||
| c->avg_pixels_tab[0][0] = avg_pixels16; | |||
| c->avg_pixels_tab[0][1] = avg_pixels16_x2; | |||
| c->avg_pixels_tab[0][2] = avg_pixels16_y2; | |||
| c->avg_pixels_tab[0][3] = avg_pixels16_xy2; | |||
| c->avg_no_rnd_pixels_tab[0][0] = avg_no_rnd_pixels16; | |||
| c->avg_no_rnd_pixels_tab[0][1] = avg_no_rnd_pixels16_x2; | |||
| c->avg_no_rnd_pixels_tab[0][2] = avg_no_rnd_pixels16_y2; | |||
| c->avg_no_rnd_pixels_tab[0][3] = avg_no_rnd_pixels16_xy2; | |||
| c->put_pixels_tab[1][0] = put_pixels8; | |||
| c->put_pixels_tab[1][1] = put_pixels8_x2; | |||
| c->put_pixels_tab[1][2] = put_pixels8_y2; | |||
| c->put_pixels_tab[1][3] = put_pixels8_xy2; | |||
| c->put_no_rnd_pixels_tab[1][0] = put_pixels8; | |||
| c->put_no_rnd_pixels_tab[1][1] = put_no_rnd_pixels8_x2; | |||
| c->put_no_rnd_pixels_tab[1][2] = put_no_rnd_pixels8_y2; | |||
| c->put_no_rnd_pixels_tab[1][3] = put_no_rnd_pixels8_xy2; | |||
| c->avg_pixels_tab[1][0] = avg_pixels8; | |||
| c->avg_pixels_tab[1][1] = avg_pixels8_x2; | |||
| c->avg_pixels_tab[1][2] = avg_pixels8_y2; | |||
| c->avg_pixels_tab[1][3] = avg_pixels8_xy2; | |||
| c->avg_no_rnd_pixels_tab[1][0] = avg_no_rnd_pixels8; | |||
| c->avg_no_rnd_pixels_tab[1][1] = avg_no_rnd_pixels8_x2; | |||
| c->avg_no_rnd_pixels_tab[1][2] = avg_no_rnd_pixels8_y2; | |||
| c->avg_no_rnd_pixels_tab[1][3] = avg_no_rnd_pixels8_xy2; | |||
| #ifdef HAVE_MMX | |||
| dsputil_init_mmx(); | |||
| dsputil_init_mmx(c, mask); | |||
| #endif | |||
| #ifdef ARCH_ARMV4L | |||
| dsputil_init_armv4l(); | |||
| dsputil_init_armv4l(c, mask); | |||
| #endif | |||
| #ifdef HAVE_MLIB | |||
| dsputil_init_mlib(); | |||
| dsputil_init_mlib(c, mask); | |||
| #endif | |||
| #ifdef ARCH_ALPHA | |||
| dsputil_init_alpha(); | |||
| dsputil_init_alpha(c, mask); | |||
| #endif | |||
| #ifdef ARCH_POWERPC | |||
| dsputil_init_ppc(); | |||
| dsputil_init_ppc(c, mask); | |||
| #endif | |||
| #ifdef HAVE_MMI | |||
| dsputil_init_mmi(); | |||
| dsputil_init_mmi(c, mask); | |||
| #endif | |||
| for(i=0; i<64; i++) inv_zigzag_direct16[ff_zigzag_direct[i]]= i+1; | |||
| @@ -1639,7 +1679,8 @@ void avcodec_set_bit_exact(void) | |||
| { | |||
| ff_bit_exact=1; | |||
| #ifdef HAVE_MMX | |||
| dsputil_set_bit_exact_mmx(); | |||
| #warning FIXME - set_bit_exact | |||
| // dsputil_set_bit_exact_mmx(); | |||
| #endif | |||
| } | |||
| @@ -45,10 +45,9 @@ extern const UINT8 ff_zigzag_direct[64]; | |||
| extern UINT32 squareTbl[512]; | |||
| extern UINT8 cropTbl[256 + 2 * MAX_NEG_CROP]; | |||
| void dsputil_init(void); | |||
| /* minimum alignment rules ;) | |||
| if u notice errors in the align stuff, need more alignment for some asm code for some cpu | |||
| if u notice errors in the align stuff, need more alignment for some asm code for some cpu | |||
| or need to use a function with less aligned data then send a mail to the ffmpeg-dev list, ... | |||
| !warning these alignments might not match reallity, (missing attribute((align)) stuff somewhere possible) | |||
| @@ -57,39 +56,20 @@ i (michael) didnt check them, these are just the alignents which i think could b | |||
| !future video codecs might need functions with less strict alignment | |||
| */ | |||
| /* pixel ops : interface with DCT */ | |||
| extern void (*get_pixels)(DCTELEM *block/*align 16*/, const UINT8 *pixels/*align 8*/, int line_size); | |||
| extern void (*diff_pixels)(DCTELEM *block/*align 16*/, const UINT8 *s1/*align 8*/, const UINT8 *s2/*align 8*/, int stride); | |||
| extern void (*put_pixels_clamped)(const DCTELEM *block/*align 16*/, UINT8 *pixels/*align 8*/, int line_size); | |||
| extern void (*add_pixels_clamped)(const DCTELEM *block/*align 16*/, UINT8 *pixels/*align 8*/, int line_size); | |||
| extern void (*ff_gmc1)(UINT8 *dst/*align 8*/, UINT8 *src/*align 1*/, int srcStride, int h, int x16, int y16, int rounder); | |||
| extern void (*ff_gmc )(UINT8 *dst/*align 8*/, UINT8 *src/*align 1*/, int stride, int h, int ox, int oy, | |||
| int dxx, int dxy, int dyx, int dyy, int shift, int r, int width, int height); | |||
| extern void (*clear_blocks)(DCTELEM *blocks/*align 16*/); | |||
| extern int (*pix_sum)(UINT8 * pix, int line_size); | |||
| extern int (*pix_norm1)(UINT8 * pix, int line_size); | |||
| /* | |||
| void get_pixels_c(DCTELEM *block, const UINT8 *pixels, int line_size); | |||
| void diff_pixels_c(DCTELEM *block, const UINT8 *s1, const UINT8 *s2, int stride); | |||
| void put_pixels_clamped_c(const DCTELEM *block, UINT8 *pixels, int line_size); | |||
| void add_pixels_clamped_c(const DCTELEM *block, UINT8 *pixels, int line_size); | |||
| void clear_blocks_c(DCTELEM *blocks); | |||
| */ | |||
| /* add and put pixel (decoding) */ | |||
| // blocksizes for op_pixels_func are 8x4,8x8 16x8 16x16 | |||
| typedef void (*op_pixels_func)(UINT8 *block/*align width (8 or 16)*/, const UINT8 *pixels/*align 1*/, int line_size, int h); | |||
| typedef void (*qpel_mc_func)(UINT8 *dst/*align width (8 or 16)*/, UINT8 *src/*align 1*/, int stride); | |||
| extern op_pixels_func put_pixels_tab[2][4]; | |||
| extern op_pixels_func avg_pixels_tab[2][4]; | |||
| extern op_pixels_func put_no_rnd_pixels_tab[2][4]; | |||
| extern op_pixels_func avg_no_rnd_pixels_tab[2][4]; | |||
| extern qpel_mc_func put_qpel_pixels_tab[2][16]; | |||
| extern qpel_mc_func avg_qpel_pixels_tab[2][16]; | |||
| extern qpel_mc_func put_no_rnd_qpel_pixels_tab[2][16]; | |||
| extern qpel_mc_func avg_no_rnd_qpel_pixels_tab[2][16]; | |||
| #define CALL_2X_PIXELS(a, b, n)\ | |||
| static void a(uint8_t *block, const uint8_t *pixels, int line_size, int h){\ | |||
| @@ -100,20 +80,46 @@ static void a(uint8_t *block, const uint8_t *pixels, int line_size, int h){\ | |||
| /* motion estimation */ | |||
| typedef int (*op_pixels_abs_func)(UINT8 *blk1/*align width (8 or 16)*/, UINT8 *blk2/*align 1*/, int line_size); | |||
| extern op_pixels_abs_func pix_abs16x16; | |||
| extern op_pixels_abs_func pix_abs16x16_x2; | |||
| extern op_pixels_abs_func pix_abs16x16_y2; | |||
| extern op_pixels_abs_func pix_abs16x16_xy2; | |||
| extern op_pixels_abs_func pix_abs8x8; | |||
| extern op_pixels_abs_func pix_abs8x8_x2; | |||
| extern op_pixels_abs_func pix_abs8x8_y2; | |||
| extern op_pixels_abs_func pix_abs8x8_xy2; | |||
| /* | |||
| int pix_abs16x16_c(UINT8 *blk1, UINT8 *blk2, int lx); | |||
| int pix_abs16x16_x2_c(UINT8 *blk1, UINT8 *blk2, int lx); | |||
| int pix_abs16x16_y2_c(UINT8 *blk1, UINT8 *blk2, int lx); | |||
| int pix_abs16x16_xy2_c(UINT8 *blk1, UINT8 *blk2, int lx); | |||
| */ | |||
| typedef struct DSPContext { | |||
| /* pixel ops : interface with DCT */ | |||
| void (*get_pixels)(DCTELEM *block/*align 16*/, const UINT8 *pixels/*align 8*/, int line_size); | |||
| void (*diff_pixels)(DCTELEM *block/*align 16*/, const UINT8 *s1/*align 8*/, const UINT8 *s2/*align 8*/, int stride); | |||
| void (*put_pixels_clamped)(const DCTELEM *block/*align 16*/, UINT8 *pixels/*align 8*/, int line_size); | |||
| void (*add_pixels_clamped)(const DCTELEM *block/*align 16*/, UINT8 *pixels/*align 8*/, int line_size); | |||
| void (*gmc1)(UINT8 *dst/*align 8*/, UINT8 *src/*align 1*/, int srcStride, int h, int x16, int y16, int rounder); | |||
| void (*gmc )(UINT8 *dst/*align 8*/, UINT8 *src/*align 1*/, int stride, int h, int ox, int oy, | |||
| int dxx, int dxy, int dyx, int dyy, int shift, int r, int width, int height); | |||
| void (*clear_blocks)(DCTELEM *blocks/*align 16*/); | |||
| int (*pix_sum)(UINT8 * pix, int line_size); | |||
| int (*pix_norm1)(UINT8 * pix, int line_size); | |||
| /* maybe create an array for 16/8 functions */ | |||
| op_pixels_func put_pixels_tab[2][4]; | |||
| op_pixels_func avg_pixels_tab[2][4]; | |||
| op_pixels_func put_no_rnd_pixels_tab[2][4]; | |||
| op_pixels_func avg_no_rnd_pixels_tab[2][4]; | |||
| qpel_mc_func put_qpel_pixels_tab[2][16]; | |||
| qpel_mc_func avg_qpel_pixels_tab[2][16]; | |||
| qpel_mc_func put_no_rnd_qpel_pixels_tab[2][16]; | |||
| qpel_mc_func avg_no_rnd_qpel_pixels_tab[2][16]; | |||
| op_pixels_abs_func pix_abs16x16; | |||
| op_pixels_abs_func pix_abs16x16_x2; | |||
| op_pixels_abs_func pix_abs16x16_y2; | |||
| op_pixels_abs_func pix_abs16x16_xy2; | |||
| op_pixels_abs_func pix_abs8x8; | |||
| op_pixels_abs_func pix_abs8x8_x2; | |||
| op_pixels_abs_func pix_abs8x8_y2; | |||
| op_pixels_abs_func pix_abs8x8_xy2; | |||
| } DSPContext; | |||
| void dsputil_init(DSPContext* p, unsigned mask); | |||
| /** | |||
| * permute block according to permuatation. | |||
| @@ -121,8 +127,12 @@ int pix_abs16x16_xy2_c(UINT8 *blk1, UINT8 *blk2, int lx); | |||
| */ | |||
| void ff_block_permute(INT16 *block, UINT8 *permutation, const UINT8 *scantable, int last); | |||
| #define emms_c() | |||
| #if defined(HAVE_MMX) | |||
| #undef emms_c() | |||
| #define MM_MMX 0x0001 /* standard MMX */ | |||
| #define MM_3DNOW 0x0004 /* AMD 3DNOW */ | |||
| #define MM_MMXEXT 0x0002 /* SSE integer functions or AMD MMX ext */ | |||
| @@ -132,6 +142,8 @@ void ff_block_permute(INT16 *block, UINT8 *permutation, const UINT8 *scantable, | |||
| extern int mm_flags; | |||
| int mm_support(void); | |||
| void add_pixels_clamped_mmx(const DCTELEM *block, UINT8 *pixels, int line_size); | |||
| void put_pixels_clamped_mmx(const DCTELEM *block, UINT8 *pixels, int line_size); | |||
| static inline void emms(void) | |||
| { | |||
| @@ -146,54 +158,44 @@ static inline void emms(void) | |||
| #define __align8 __attribute__ ((aligned (8))) | |||
| void dsputil_init_mmx(void); | |||
| void dsputil_set_bit_exact_mmx(void); | |||
| void dsputil_init_mmx(DSPContext* c, unsigned mask); | |||
| void dsputil_set_bit_exact_mmx(DSPContext* c, unsigned mask); | |||
| #elif defined(ARCH_ARMV4L) | |||
| #define emms_c() | |||
| /* This is to use 4 bytes read to the IDCT pointers for some 'zero' | |||
| line ptimizations */ | |||
| #define __align8 __attribute__ ((aligned (4))) | |||
| void dsputil_init_armv4l(void); | |||
| void dsputil_init_armv4l(DSPContext* c, unsigned mask); | |||
| #elif defined(HAVE_MLIB) | |||
| #define emms_c() | |||
| /* SPARC/VIS IDCT needs 8-byte aligned DCT blocks */ | |||
| #define __align8 __attribute__ ((aligned (8))) | |||
| void dsputil_init_mlib(void); | |||
| void dsputil_init_mlib(DSPContext* c, unsigned mask); | |||
| #elif defined(ARCH_ALPHA) | |||
| #define emms_c() | |||
| #define __align8 __attribute__ ((aligned (8))) | |||
| void dsputil_init_alpha(void); | |||
| void dsputil_init_alpha(DSPContext* c, unsigned mask); | |||
| #elif defined(ARCH_POWERPC) | |||
| #define emms_c() | |||
| #define __align8 __attribute__ ((aligned (16))) | |||
| void dsputil_init_ppc(void); | |||
| void dsputil_init_ppc(DSPContext* c, unsigned mask); | |||
| #elif defined(HAVE_MMI) | |||
| #define emms_c() | |||
| #define __align8 __attribute__ ((aligned (16))) | |||
| void dsputil_init_mmi(void); | |||
| void dsputil_init_mmi(DSPContext* c, unsigned mask); | |||
| #else | |||
| #define emms_c() | |||
| #define __align8 | |||
| #endif | |||
| @@ -263,9 +265,9 @@ typedef struct MDCTContext { | |||
| } MDCTContext; | |||
| int ff_mdct_init(MDCTContext *s, int nbits, int inverse); | |||
| void ff_imdct_calc(MDCTContext *s, FFTSample *output, | |||
| void ff_imdct_calc(MDCTContext *s, FFTSample *output, | |||
| const FFTSample *input, FFTSample *tmp); | |||
| void ff_mdct_calc(MDCTContext *s, FFTSample *out, | |||
| void ff_mdct_calc(MDCTContext *s, FFTSample *out, | |||
| const FFTSample *input, FFTSample *tmp); | |||
| void ff_mdct_end(MDCTContext *s); | |||
| @@ -114,6 +114,7 @@ static int dvvideo_decode_init(AVCodecContext *avctx) | |||
| /* XXX: fix it */ | |||
| memset(&s2, 0, sizeof(MpegEncContext)); | |||
| s2.avctx = avctx; | |||
| dsputil_init(&s2.dsp, avctx->dsp_mask); | |||
| if (DCT_common_init(&s2) < 0) | |||
| return -1; | |||
| @@ -331,7 +331,7 @@ static void guess_mv(MpegEncContext *s){ | |||
| s->mv_type = MV_TYPE_16X16; | |||
| s->mb_skiped=0; | |||
| clear_blocks(s->block[0]); | |||
| s->dsp.clear_blocks(s->block[0]); | |||
| s->mb_x= mb_x; | |||
| s->mb_y= mb_y; | |||
| @@ -458,7 +458,7 @@ int score_sum=0; | |||
| s->mv_type = MV_TYPE_16X16; | |||
| s->mb_skiped=0; | |||
| clear_blocks(s->block[0]); | |||
| s->dsp.clear_blocks(s->block[0]); | |||
| s->mb_x= mb_x; | |||
| s->mb_y= mb_y; | |||
| @@ -559,8 +559,8 @@ static int is_intra_more_likely(MpegEncContext *s){ | |||
| UINT8 *mb_ptr = s->current_picture[0] + mb_x*16 + mb_y*16*s->linesize; | |||
| UINT8 *last_mb_ptr= s->last_picture [0] + mb_x*16 + mb_y*16*s->linesize; | |||
| is_intra_likely += pix_abs16x16(last_mb_ptr, mb_ptr , s->linesize); | |||
| is_intra_likely -= pix_abs16x16(last_mb_ptr, last_mb_ptr+s->linesize*16, s->linesize); | |||
| is_intra_likely += s->dsp.pix_abs16x16(last_mb_ptr, mb_ptr , s->linesize); | |||
| is_intra_likely -= s->dsp.pix_abs16x16(last_mb_ptr, last_mb_ptr+s->linesize*16, s->linesize); | |||
| }else{ | |||
| if(s->mbintra_table[i]) //HACK (this is allways inited but we should use mb_type[]) | |||
| is_intra_likely++; | |||
| @@ -738,7 +738,7 @@ void ff_error_resilience(MpegEncContext *s){ | |||
| s->mv[0][0][1] = s->motion_val[ mb_x*2+1 + (mb_y*2+1)*s->block_wrap[0] ][1]; | |||
| } | |||
| clear_blocks(s->block[0]); | |||
| s->dsp.clear_blocks(s->block[0]); | |||
| s->mb_x= mb_x; | |||
| s->mb_y= mb_y; | |||
| @@ -778,8 +778,8 @@ void ff_error_resilience(MpegEncContext *s){ | |||
| s->mv[1][0][0]= 0; | |||
| s->mv[1][0][1]= 0; | |||
| } | |||
| clear_blocks(s->block[0]); | |||
| s->dsp.clear_blocks(s->block[0]); | |||
| s->mb_x= mb_x; | |||
| s->mb_y= mb_y; | |||
| MPV_decode_mb(s, s->block); | |||
| @@ -538,7 +538,7 @@ void mpeg4_encode_mb(MpegEncContext * s, | |||
| if(s->coded_order[i+1].pict_type!=B_TYPE) break; | |||
| b_pic= s->coded_order[i+1].picture[0] + offset; | |||
| diff= pix_abs16x16(p_pic, b_pic, s->linesize); | |||
| diff= s->dsp.pix_abs16x16(p_pic, b_pic, s->linesize); | |||
| if(diff>s->qscale*70){ //FIXME check that 70 is optimal | |||
| s->mb_skiped=0; | |||
| break; | |||
| @@ -195,7 +195,7 @@ static int decode_slice(MpegEncContext *s){ | |||
| } | |||
| /* DCT & quantize */ | |||
| clear_blocks(s->block[0]); | |||
| s->dsp.clear_blocks(s->block[0]); | |||
| s->mv_dir = MV_DIR_FORWARD; | |||
| s->mv_type = MV_TYPE_16X16; | |||
| @@ -22,7 +22,7 @@ | |||
| #include "../dsputil.h" | |||
| int mm_flags; /* multimedia extension flags */ | |||
| /* FIXME use them in static form */ | |||
| int pix_abs16x16_mmx(UINT8 *blk1, UINT8 *blk2, int lx); | |||
| int pix_abs16x16_x2_mmx(UINT8 *blk1, UINT8 *blk2, int lx); | |||
| int pix_abs16x16_y2_mmx(UINT8 *blk1, UINT8 *blk2, int lx); | |||
| @@ -242,7 +242,7 @@ static void diff_pixels_mmx(DCTELEM *block, const UINT8 *s1, const UINT8 *s2, in | |||
| ); | |||
| } | |||
| static void put_pixels_clamped_mmx(const DCTELEM *block, UINT8 *pixels, int line_size) | |||
| void put_pixels_clamped_mmx(const DCTELEM *block, UINT8 *pixels, int line_size) | |||
| { | |||
| const DCTELEM *p; | |||
| UINT8 *pix; | |||
| @@ -297,7 +297,7 @@ static void put_pixels_clamped_mmx(const DCTELEM *block, UINT8 *pixels, int line | |||
| :"memory"); | |||
| } | |||
| static void add_pixels_clamped_mmx(const DCTELEM *block, UINT8 *pixels, int line_size) | |||
| void add_pixels_clamped_mmx(const DCTELEM *block, UINT8 *pixels, int line_size) | |||
| { | |||
| const DCTELEM *p; | |||
| UINT8 *pix; | |||
| @@ -457,7 +457,7 @@ static int pix_sum16_mmx(UINT8 * pix, int line_size){ | |||
| static void just_return() { return; } | |||
| #endif | |||
| void dsputil_init_mmx(void) | |||
| void dsputil_init_mmx(DSPContext* c, unsigned mask) | |||
| { | |||
| mm_flags = mm_support(); | |||
| #if 0 | |||
| @@ -476,112 +476,112 @@ void dsputil_init_mmx(void) | |||
| #endif | |||
| if (mm_flags & MM_MMX) { | |||
| get_pixels = get_pixels_mmx; | |||
| diff_pixels = diff_pixels_mmx; | |||
| put_pixels_clamped = put_pixels_clamped_mmx; | |||
| add_pixels_clamped = add_pixels_clamped_mmx; | |||
| clear_blocks= clear_blocks_mmx; | |||
| pix_sum= pix_sum16_mmx; | |||
| pix_abs16x16 = pix_abs16x16_mmx; | |||
| pix_abs16x16_x2 = pix_abs16x16_x2_mmx; | |||
| pix_abs16x16_y2 = pix_abs16x16_y2_mmx; | |||
| pix_abs16x16_xy2 = pix_abs16x16_xy2_mmx; | |||
| pix_abs8x8 = pix_abs8x8_mmx; | |||
| pix_abs8x8_x2 = pix_abs8x8_x2_mmx; | |||
| pix_abs8x8_y2 = pix_abs8x8_y2_mmx; | |||
| pix_abs8x8_xy2= pix_abs8x8_xy2_mmx; | |||
| put_pixels_tab[0][0] = put_pixels16_mmx; | |||
| put_pixels_tab[0][1] = put_pixels16_x2_mmx; | |||
| put_pixels_tab[0][2] = put_pixels16_y2_mmx; | |||
| put_pixels_tab[0][3] = put_pixels16_xy2_mmx; | |||
| put_no_rnd_pixels_tab[0][0] = put_pixels16_mmx; | |||
| put_no_rnd_pixels_tab[0][1] = put_no_rnd_pixels16_x2_mmx; | |||
| put_no_rnd_pixels_tab[0][2] = put_no_rnd_pixels16_y2_mmx; | |||
| put_no_rnd_pixels_tab[0][3] = put_no_rnd_pixels16_xy2_mmx; | |||
| avg_pixels_tab[0][0] = avg_pixels16_mmx; | |||
| avg_pixels_tab[0][1] = avg_pixels16_x2_mmx; | |||
| avg_pixels_tab[0][2] = avg_pixels16_y2_mmx; | |||
| avg_pixels_tab[0][3] = avg_pixels16_xy2_mmx; | |||
| avg_no_rnd_pixels_tab[0][0] = avg_no_rnd_pixels16_mmx; | |||
| avg_no_rnd_pixels_tab[0][1] = avg_no_rnd_pixels16_x2_mmx; | |||
| avg_no_rnd_pixels_tab[0][2] = avg_no_rnd_pixels16_y2_mmx; | |||
| avg_no_rnd_pixels_tab[0][3] = avg_no_rnd_pixels16_xy2_mmx; | |||
| put_pixels_tab[1][0] = put_pixels8_mmx; | |||
| put_pixels_tab[1][1] = put_pixels8_x2_mmx; | |||
| put_pixels_tab[1][2] = put_pixels8_y2_mmx; | |||
| put_pixels_tab[1][3] = put_pixels8_xy2_mmx; | |||
| put_no_rnd_pixels_tab[1][0] = put_pixels8_mmx; | |||
| put_no_rnd_pixels_tab[1][1] = put_no_rnd_pixels8_x2_mmx; | |||
| put_no_rnd_pixels_tab[1][2] = put_no_rnd_pixels8_y2_mmx; | |||
| put_no_rnd_pixels_tab[1][3] = put_no_rnd_pixels8_xy2_mmx; | |||
| avg_pixels_tab[1][0] = avg_pixels8_mmx; | |||
| avg_pixels_tab[1][1] = avg_pixels8_x2_mmx; | |||
| avg_pixels_tab[1][2] = avg_pixels8_y2_mmx; | |||
| avg_pixels_tab[1][3] = avg_pixels8_xy2_mmx; | |||
| avg_no_rnd_pixels_tab[1][0] = avg_no_rnd_pixels8_mmx; | |||
| avg_no_rnd_pixels_tab[1][1] = avg_no_rnd_pixels8_x2_mmx; | |||
| avg_no_rnd_pixels_tab[1][2] = avg_no_rnd_pixels8_y2_mmx; | |||
| avg_no_rnd_pixels_tab[1][3] = avg_no_rnd_pixels8_xy2_mmx; | |||
| c->get_pixels = get_pixels_mmx; | |||
| c->diff_pixels = diff_pixels_mmx; | |||
| c->put_pixels_clamped = put_pixels_clamped_mmx; | |||
| c->add_pixels_clamped = add_pixels_clamped_mmx; | |||
| c->clear_blocks = clear_blocks_mmx; | |||
| c->pix_sum = pix_sum16_mmx; | |||
| c->pix_abs16x16 = pix_abs16x16_mmx; | |||
| c->pix_abs16x16_x2 = pix_abs16x16_x2_mmx; | |||
| c->pix_abs16x16_y2 = pix_abs16x16_y2_mmx; | |||
| c->pix_abs16x16_xy2 = pix_abs16x16_xy2_mmx; | |||
| c->pix_abs8x8 = pix_abs8x8_mmx; | |||
| c->pix_abs8x8_x2 = pix_abs8x8_x2_mmx; | |||
| c->pix_abs8x8_y2 = pix_abs8x8_y2_mmx; | |||
| c->pix_abs8x8_xy2 = pix_abs8x8_xy2_mmx; | |||
| c->put_pixels_tab[0][0] = put_pixels16_mmx; | |||
| c->put_pixels_tab[0][1] = put_pixels16_x2_mmx; | |||
| c->put_pixels_tab[0][2] = put_pixels16_y2_mmx; | |||
| c->put_pixels_tab[0][3] = put_pixels16_xy2_mmx; | |||
| c->put_no_rnd_pixels_tab[0][0] = put_pixels16_mmx; | |||
| c->put_no_rnd_pixels_tab[0][1] = put_no_rnd_pixels16_x2_mmx; | |||
| c->put_no_rnd_pixels_tab[0][2] = put_no_rnd_pixels16_y2_mmx; | |||
| c->put_no_rnd_pixels_tab[0][3] = put_no_rnd_pixels16_xy2_mmx; | |||
| c->avg_pixels_tab[0][0] = avg_pixels16_mmx; | |||
| c->avg_pixels_tab[0][1] = avg_pixels16_x2_mmx; | |||
| c->avg_pixels_tab[0][2] = avg_pixels16_y2_mmx; | |||
| c->avg_pixels_tab[0][3] = avg_pixels16_xy2_mmx; | |||
| c->avg_no_rnd_pixels_tab[0][0] = avg_no_rnd_pixels16_mmx; | |||
| c->avg_no_rnd_pixels_tab[0][1] = avg_no_rnd_pixels16_x2_mmx; | |||
| c->avg_no_rnd_pixels_tab[0][2] = avg_no_rnd_pixels16_y2_mmx; | |||
| c->avg_no_rnd_pixels_tab[0][3] = avg_no_rnd_pixels16_xy2_mmx; | |||
| c->put_pixels_tab[1][0] = put_pixels8_mmx; | |||
| c->put_pixels_tab[1][1] = put_pixels8_x2_mmx; | |||
| c->put_pixels_tab[1][2] = put_pixels8_y2_mmx; | |||
| c->put_pixels_tab[1][3] = put_pixels8_xy2_mmx; | |||
| c->put_no_rnd_pixels_tab[1][0] = put_pixels8_mmx; | |||
| c->put_no_rnd_pixels_tab[1][1] = put_no_rnd_pixels8_x2_mmx; | |||
| c->put_no_rnd_pixels_tab[1][2] = put_no_rnd_pixels8_y2_mmx; | |||
| c->put_no_rnd_pixels_tab[1][3] = put_no_rnd_pixels8_xy2_mmx; | |||
| c->avg_pixels_tab[1][0] = avg_pixels8_mmx; | |||
| c->avg_pixels_tab[1][1] = avg_pixels8_x2_mmx; | |||
| c->avg_pixels_tab[1][2] = avg_pixels8_y2_mmx; | |||
| c->avg_pixels_tab[1][3] = avg_pixels8_xy2_mmx; | |||
| c->avg_no_rnd_pixels_tab[1][0] = avg_no_rnd_pixels8_mmx; | |||
| c->avg_no_rnd_pixels_tab[1][1] = avg_no_rnd_pixels8_x2_mmx; | |||
| c->avg_no_rnd_pixels_tab[1][2] = avg_no_rnd_pixels8_y2_mmx; | |||
| c->avg_no_rnd_pixels_tab[1][3] = avg_no_rnd_pixels8_xy2_mmx; | |||
| if (mm_flags & MM_MMXEXT) { | |||
| pix_abs16x16 = pix_abs16x16_mmx2; | |||
| pix_abs16x16_x2 = pix_abs16x16_x2_mmx2; | |||
| pix_abs16x16_y2 = pix_abs16x16_y2_mmx2; | |||
| pix_abs16x16_xy2= pix_abs16x16_xy2_mmx2; | |||
| pix_abs8x8 = pix_abs8x8_mmx2; | |||
| pix_abs8x8_x2 = pix_abs8x8_x2_mmx2; | |||
| pix_abs8x8_y2 = pix_abs8x8_y2_mmx2; | |||
| pix_abs8x8_xy2= pix_abs8x8_xy2_mmx2; | |||
| put_pixels_tab[0][1] = put_pixels16_x2_mmx2; | |||
| put_pixels_tab[0][2] = put_pixels16_y2_mmx2; | |||
| put_no_rnd_pixels_tab[0][1] = put_no_rnd_pixels16_x2_mmx2; | |||
| put_no_rnd_pixels_tab[0][2] = put_no_rnd_pixels16_y2_mmx2; | |||
| avg_pixels_tab[0][0] = avg_pixels16_mmx2; | |||
| avg_pixels_tab[0][1] = avg_pixels16_x2_mmx2; | |||
| avg_pixels_tab[0][2] = avg_pixels16_y2_mmx2; | |||
| avg_pixels_tab[0][3] = avg_pixels16_xy2_mmx2; | |||
| put_pixels_tab[1][1] = put_pixels8_x2_mmx2; | |||
| put_pixels_tab[1][2] = put_pixels8_y2_mmx2; | |||
| put_no_rnd_pixels_tab[1][1] = put_no_rnd_pixels8_x2_mmx2; | |||
| put_no_rnd_pixels_tab[1][2] = put_no_rnd_pixels8_y2_mmx2; | |||
| avg_pixels_tab[1][0] = avg_pixels8_mmx2; | |||
| avg_pixels_tab[1][1] = avg_pixels8_x2_mmx2; | |||
| avg_pixels_tab[1][2] = avg_pixels8_y2_mmx2; | |||
| avg_pixels_tab[1][3] = avg_pixels8_xy2_mmx2; | |||
| c->pix_abs16x16 = pix_abs16x16_mmx2; | |||
| c->pix_abs16x16_x2 = pix_abs16x16_x2_mmx2; | |||
| c->pix_abs16x16_y2 = pix_abs16x16_y2_mmx2; | |||
| c->pix_abs16x16_xy2 = pix_abs16x16_xy2_mmx2; | |||
| c->pix_abs8x8 = pix_abs8x8_mmx2; | |||
| c->pix_abs8x8_x2 = pix_abs8x8_x2_mmx2; | |||
| c->pix_abs8x8_y2 = pix_abs8x8_y2_mmx2; | |||
| c->pix_abs8x8_xy2 = pix_abs8x8_xy2_mmx2; | |||
| c->put_pixels_tab[0][1] = put_pixels16_x2_mmx2; | |||
| c->put_pixels_tab[0][2] = put_pixels16_y2_mmx2; | |||
| c->put_no_rnd_pixels_tab[0][1] = put_no_rnd_pixels16_x2_mmx2; | |||
| c->put_no_rnd_pixels_tab[0][2] = put_no_rnd_pixels16_y2_mmx2; | |||
| c->avg_pixels_tab[0][0] = avg_pixels16_mmx2; | |||
| c->avg_pixels_tab[0][1] = avg_pixels16_x2_mmx2; | |||
| c->avg_pixels_tab[0][2] = avg_pixels16_y2_mmx2; | |||
| c->avg_pixels_tab[0][3] = avg_pixels16_xy2_mmx2; | |||
| c->put_pixels_tab[1][1] = put_pixels8_x2_mmx2; | |||
| c->put_pixels_tab[1][2] = put_pixels8_y2_mmx2; | |||
| c->put_no_rnd_pixels_tab[1][1] = put_no_rnd_pixels8_x2_mmx2; | |||
| c->put_no_rnd_pixels_tab[1][2] = put_no_rnd_pixels8_y2_mmx2; | |||
| c->avg_pixels_tab[1][0] = avg_pixels8_mmx2; | |||
| c->avg_pixels_tab[1][1] = avg_pixels8_x2_mmx2; | |||
| c->avg_pixels_tab[1][2] = avg_pixels8_y2_mmx2; | |||
| c->avg_pixels_tab[1][3] = avg_pixels8_xy2_mmx2; | |||
| } else if (mm_flags & MM_3DNOW) { | |||
| put_pixels_tab[0][1] = put_pixels16_x2_3dnow; | |||
| put_pixels_tab[0][2] = put_pixels16_y2_3dnow; | |||
| put_no_rnd_pixels_tab[0][1] = put_no_rnd_pixels16_x2_3dnow; | |||
| put_no_rnd_pixels_tab[0][2] = put_no_rnd_pixels16_y2_3dnow; | |||
| avg_pixels_tab[0][0] = avg_pixels16_3dnow; | |||
| avg_pixels_tab[0][1] = avg_pixels16_x2_3dnow; | |||
| avg_pixels_tab[0][2] = avg_pixels16_y2_3dnow; | |||
| avg_pixels_tab[0][3] = avg_pixels16_xy2_3dnow; | |||
| put_pixels_tab[1][1] = put_pixels8_x2_3dnow; | |||
| put_pixels_tab[1][2] = put_pixels8_y2_3dnow; | |||
| put_no_rnd_pixels_tab[1][1] = put_no_rnd_pixels8_x2_3dnow; | |||
| put_no_rnd_pixels_tab[1][2] = put_no_rnd_pixels8_y2_3dnow; | |||
| avg_pixels_tab[1][0] = avg_pixels8_3dnow; | |||
| avg_pixels_tab[1][1] = avg_pixels8_x2_3dnow; | |||
| avg_pixels_tab[1][2] = avg_pixels8_y2_3dnow; | |||
| avg_pixels_tab[1][3] = avg_pixels8_xy2_3dnow; | |||
| c->put_pixels_tab[0][1] = put_pixels16_x2_3dnow; | |||
| c->put_pixels_tab[0][2] = put_pixels16_y2_3dnow; | |||
| c->put_no_rnd_pixels_tab[0][1] = put_no_rnd_pixels16_x2_3dnow; | |||
| c->put_no_rnd_pixels_tab[0][2] = put_no_rnd_pixels16_y2_3dnow; | |||
| c->avg_pixels_tab[0][0] = avg_pixels16_3dnow; | |||
| c->avg_pixels_tab[0][1] = avg_pixels16_x2_3dnow; | |||
| c->avg_pixels_tab[0][2] = avg_pixels16_y2_3dnow; | |||
| c->avg_pixels_tab[0][3] = avg_pixels16_xy2_3dnow; | |||
| c->put_pixels_tab[1][1] = put_pixels8_x2_3dnow; | |||
| c->put_pixels_tab[1][2] = put_pixels8_y2_3dnow; | |||
| c->put_no_rnd_pixels_tab[1][1] = put_no_rnd_pixels8_x2_3dnow; | |||
| c->put_no_rnd_pixels_tab[1][2] = put_no_rnd_pixels8_y2_3dnow; | |||
| c->avg_pixels_tab[1][0] = avg_pixels8_3dnow; | |||
| c->avg_pixels_tab[1][1] = avg_pixels8_x2_3dnow; | |||
| c->avg_pixels_tab[1][2] = avg_pixels8_y2_3dnow; | |||
| c->avg_pixels_tab[1][3] = avg_pixels8_xy2_3dnow; | |||
| } | |||
| } | |||
| @@ -624,25 +624,24 @@ void dsputil_init_mmx(void) | |||
| /* remove any non bit exact operation (testing purpose). NOTE that | |||
| this function should be kept as small as possible because it is | |||
| always difficult to test automatically non bit exact cases. */ | |||
| void dsputil_set_bit_exact_mmx(void) | |||
| void dsputil_set_bit_exact_mmx(DSPContext* c, unsigned mask) | |||
| { | |||
| if (mm_flags & MM_MMX) { | |||
| /* MMX2 & 3DNOW */ | |||
| put_no_rnd_pixels_tab[0][1] = put_no_rnd_pixels16_x2_mmx; | |||
| put_no_rnd_pixels_tab[0][2] = put_no_rnd_pixels16_y2_mmx; | |||
| avg_pixels_tab[0][3] = avg_pixels16_xy2_mmx; | |||
| put_no_rnd_pixels_tab[1][1] = put_no_rnd_pixels8_x2_mmx; | |||
| put_no_rnd_pixels_tab[1][2] = put_no_rnd_pixels8_y2_mmx; | |||
| avg_pixels_tab[1][3] = avg_pixels8_xy2_mmx; | |||
| c->put_no_rnd_pixels_tab[0][1] = put_no_rnd_pixels16_x2_mmx; | |||
| c->put_no_rnd_pixels_tab[0][2] = put_no_rnd_pixels16_y2_mmx; | |||
| c->avg_pixels_tab[0][3] = avg_pixels16_xy2_mmx; | |||
| c->put_no_rnd_pixels_tab[1][1] = put_no_rnd_pixels8_x2_mmx; | |||
| c->put_no_rnd_pixels_tab[1][2] = put_no_rnd_pixels8_y2_mmx; | |||
| c->avg_pixels_tab[1][3] = avg_pixels8_xy2_mmx; | |||
| if (mm_flags & MM_MMXEXT) { | |||
| pix_abs16x16_x2 = pix_abs16x16_x2_mmx; | |||
| pix_abs16x16_y2 = pix_abs16x16_y2_mmx; | |||
| pix_abs16x16_xy2 = pix_abs16x16_xy2_mmx; | |||
| pix_abs8x8_x2 = pix_abs8x8_x2_mmx; | |||
| pix_abs8x8_y2 = pix_abs8x8_y2_mmx; | |||
| pix_abs8x8_xy2= pix_abs8x8_xy2_mmx; | |||
| c->pix_abs16x16_x2 = pix_abs16x16_x2_mmx; | |||
| c->pix_abs16x16_y2 = pix_abs16x16_y2_mmx; | |||
| c->pix_abs16x16_xy2 = pix_abs16x16_xy2_mmx; | |||
| c->pix_abs8x8_x2 = pix_abs8x8_x2_mmx; | |||
| c->pix_abs8x8_y2 = pix_abs8x8_y2_mmx; | |||
| c->pix_abs8x8_xy2= pix_abs8x8_xy2_mmx; | |||
| } | |||
| } | |||
| } | |||
| @@ -88,8 +88,8 @@ static int pix_norm(UINT8 * pix1, UINT8 * pix2, int line_size) | |||
| return s; | |||
| } | |||
| static void no_motion_search(MpegEncContext * s, | |||
| int *mx_ptr, int *my_ptr) | |||
| static inline void no_motion_search(MpegEncContext * s, | |||
| int *mx_ptr, int *my_ptr) | |||
| { | |||
| *mx_ptr = 16 * s->mb_x; | |||
| *my_ptr = 16 * s->mb_y; | |||
| @@ -123,7 +123,7 @@ static int full_motion_search(MpegEncContext * s, | |||
| my = 0; | |||
| for (y = y1; y <= y2; y++) { | |||
| for (x = x1; x <= x2; x++) { | |||
| d = pix_abs16x16(pix, ref_picture + (y * s->linesize) + x, | |||
| d = s->dsp.pix_abs16x16(pix, ref_picture + (y * s->linesize) + x, | |||
| s->linesize); | |||
| if (d < dmin || | |||
| (d == dmin && | |||
| @@ -188,7 +188,7 @@ static int log_motion_search(MpegEncContext * s, | |||
| do { | |||
| for (y = y1; y <= y2; y += range) { | |||
| for (x = x1; x <= x2; x += range) { | |||
| d = pix_abs16x16(pix, ref_picture + (y * s->linesize) + x, s->linesize); | |||
| d = s->dsp.pix_abs16x16(pix, ref_picture + (y * s->linesize) + x, s->linesize); | |||
| if (d < dmin || (d == dmin && (abs(x - xx) + abs(y - yy)) < (abs(mx - xx) + abs(my - yy)))) { | |||
| dmin = d; | |||
| mx = x; | |||
| @@ -268,7 +268,7 @@ static int phods_motion_search(MpegEncContext * s, | |||
| lastx = x; | |||
| for (x = x1; x <= x2; x += range) { | |||
| d = pix_abs16x16(pix, ref_picture + (y * s->linesize) + x, s->linesize); | |||
| d = s->dsp.pix_abs16x16(pix, ref_picture + (y * s->linesize) + x, s->linesize); | |||
| if (d < dminx || (d == dminx && (abs(x - xx) + abs(y - yy)) < (abs(mx - xx) + abs(my - yy)))) { | |||
| dminx = d; | |||
| mx = x; | |||
| @@ -277,7 +277,7 @@ static int phods_motion_search(MpegEncContext * s, | |||
| x = lastx; | |||
| for (y = y1; y <= y2; y += range) { | |||
| d = pix_abs16x16(pix, ref_picture + (y * s->linesize) + x, s->linesize); | |||
| d = s->dsp.pix_abs16x16(pix, ref_picture + (y * s->linesize) + x, s->linesize); | |||
| if (d < dminy || (d == dminy && (abs(x - xx) + abs(y - yy)) < (abs(mx - xx) + abs(my - yy)))) { | |||
| dminy = d; | |||
| my = y; | |||
| @@ -324,7 +324,7 @@ static int phods_motion_search(MpegEncContext * s, | |||
| const int key= ((y)<<ME_MAP_MV_BITS) + (x) + map_generation;\ | |||
| const int index= (((y)<<ME_MAP_SHIFT) + (x))&(ME_MAP_SIZE-1);\ | |||
| if(map[index]!=key){\ | |||
| d = pix_abs16x16(new_pic, old_pic + (x) + (y)*pic_stride, pic_stride);\ | |||
| d = s->dsp.pix_abs16x16(new_pic, old_pic + (x) + (y)*pic_stride, pic_stride);\ | |||
| d += (mv_penalty[((x)<<shift)-pred_x] + mv_penalty[((y)<<shift)-pred_y])*quant;\ | |||
| COPY3_IF_LT(dmin, d, best[0], x, best[1], y)\ | |||
| map[index]= key;\ | |||
| @@ -355,7 +355,7 @@ static int phods_motion_search(MpegEncContext * s, | |||
| const int key= ((y)<<ME_MAP_MV_BITS) + (x) + map_generation;\ | |||
| const int index= (((y)<<ME_MAP_SHIFT) + (x))&(ME_MAP_SIZE-1);\ | |||
| if(map[index]!=key){\ | |||
| d = pix_abs8x8(new_pic, old_pic + (x) + (y)*pic_stride, pic_stride);\ | |||
| d = s->dsp.pix_abs8x8(new_pic, old_pic + (x) + (y)*pic_stride, pic_stride);\ | |||
| d += (mv_penalty[((x)<<shift)-pred_x] + mv_penalty[((y)<<shift)-pred_y])*quant;\ | |||
| COPY3_IF_LT(dmin, d, best[0], x, best[1], y)\ | |||
| map[index]= key;\ | |||
| @@ -590,7 +590,7 @@ static int epzs_motion_search(MpegEncContext * s, | |||
| map_generation= update_map_generation(s); | |||
| dmin = pix_abs16x16(new_pic, old_pic, pic_stride); | |||
| dmin = s->dsp.pix_abs16x16(new_pic, old_pic, pic_stride); | |||
| map[0]= map_generation; | |||
| score_map[0]= dmin; | |||
| @@ -644,11 +644,11 @@ static int epzs_motion_search(MpegEncContext * s, | |||
| if(s->me_method==ME_EPZS) | |||
| dmin= small_diamond_search(s, best, dmin, new_pic, old_pic, pic_stride, | |||
| pred_x, pred_y, mv_penalty, quant, xmin, ymin, xmax, ymax, | |||
| shift, map, score_map, map_generation, pix_abs16x16); | |||
| shift, map, score_map, map_generation, s->dsp.pix_abs16x16); | |||
| else | |||
| dmin= cross_search(s, best, dmin, new_pic, old_pic, pic_stride, | |||
| pred_x, pred_y, mv_penalty, quant, xmin, ymin, xmax, ymax, | |||
| shift, map, score_map, map_generation, pix_abs16x16); | |||
| shift, map, score_map, map_generation, s->dsp.pix_abs16x16); | |||
| //check(best[0],best[1],0, b1) | |||
| *mx_ptr= best[0]; | |||
| *my_ptr= best[1]; | |||
| @@ -683,7 +683,7 @@ static int epzs_motion_search4(MpegEncContext * s, int block, | |||
| //printf("%d %d %d %d //",xmin, ymin, xmax, ymax); | |||
| /* first line */ | |||
| if ((s->mb_y == 0 || s->first_slice_line) && block<2) { | |||
| CHECK_MV4(P_LEFT[0]>>shift, P_LEFT[1]>>shift) | |||
| CHECK_MV4(P_LEFT[0]>>shift, P_LEFT[1]>>shift) | |||
| CHECK_MV4(P_LAST[0]>>shift, P_LAST[1]>>shift) | |||
| CHECK_MV4(P_MV1[0]>>shift, P_MV1[1]>>shift) | |||
| }else{ | |||
| @@ -705,11 +705,11 @@ static int epzs_motion_search4(MpegEncContext * s, int block, | |||
| if(s->me_method==ME_EPZS) | |||
| dmin= small_diamond_search(s, best, dmin, new_pic, old_pic, pic_stride, | |||
| pred_x, pred_y, mv_penalty, quant, xmin, ymin, xmax, ymax, | |||
| shift, map, score_map, map_generation, pix_abs8x8); | |||
| shift, map, score_map, map_generation, s->dsp.pix_abs8x8); | |||
| else | |||
| dmin= cross_search(s, best, dmin, new_pic, old_pic, pic_stride, | |||
| pred_x, pred_y, mv_penalty, quant, xmin, ymin, xmax, ymax, | |||
| shift, map, score_map, map_generation, pix_abs8x8); | |||
| shift, map, score_map, map_generation, s->dsp.pix_abs8x8); | |||
| *mx_ptr= best[0]; | |||
| *my_ptr= best[1]; | |||
| @@ -1023,8 +1023,8 @@ static inline int mv4_search(MpegEncContext *s, int xmin, int ymin, int xmax, in | |||
| dmin4 = epzs_motion_search4(s, block, &mx4, &my4, P, pred_x4, pred_y4, rel_xmin4, rel_ymin4, rel_xmax4, rel_ymax4, ref_picture); | |||
| dmin4= fast_halfpel_motion_search(s, &mx4, &my4, dmin4, rel_xmin4, rel_ymin4, rel_xmax4, rel_ymax4, | |||
| pred_x4, pred_y4, ref_picture, pix_abs8x8_x2, | |||
| pix_abs8x8_y2, pix_abs8x8_xy2, block); | |||
| pred_x4, pred_y4, ref_picture, s->dsp.pix_abs8x8_x2, | |||
| s->dsp.pix_abs8x8_y2, s->dsp.pix_abs8x8_xy2, block); | |||
| s->motion_val[ s->block_index[block] ][0]= mx4; | |||
| s->motion_val[ s->block_index[block] ][1]= my4; | |||
| @@ -1133,9 +1133,10 @@ void ff_estimate_p_frame_motion(MpegEncContext * s, | |||
| /* At this point (mx,my) are full-pell and the relative displacement */ | |||
| ppix = ref_picture + ((yy+my) * s->linesize) + (xx+mx); | |||
| sum = pix_sum(pix, s->linesize); | |||
| sum = s->dsp.pix_sum(pix, s->linesize); | |||
| varc = (pix_norm1(pix, s->linesize) - (((unsigned)(sum*sum))>>8) + 500 + 128)>>8; | |||
| varc = (s->dsp.pix_norm1(pix, s->linesize) - (((unsigned)(sum*sum))>>8) + 500 + 128)>>8; | |||
| // FIXME: MMX OPTIMIZE | |||
| vard = (pix_norm(pix, ppix, s->linesize)+128)>>8; | |||
| //printf("%d %d %d %X %X %X\n", s->mb_width, mb_x, mb_y,(int)s, (int)s->mb_var, (int)s->mc_mb_var); fflush(stdout); | |||
| @@ -1161,13 +1162,13 @@ void ff_estimate_p_frame_motion(MpegEncContext * s, | |||
| if (varc*2 + 200 > vard){ | |||
| mb_type|= MB_TYPE_INTER; | |||
| if(s->me_method >= ME_EPZS) | |||
| fast_halfpel_motion_search(s, &mx, &my, dmin, rel_xmin, rel_ymin, rel_xmax, rel_ymax, | |||
| pred_x, pred_y, ref_picture, pix_abs16x16_x2, pix_abs16x16_y2, | |||
| pix_abs16x16_xy2, 0); | |||
| fast_halfpel_motion_search(s, &mx, &my, dmin, rel_xmin, rel_ymin, rel_xmax, rel_ymax, | |||
| pred_x, pred_y, ref_picture, s->dsp.pix_abs16x16_x2, | |||
| s->dsp.pix_abs16x16_y2, s->dsp.pix_abs16x16_xy2, 0); | |||
| else | |||
| halfpel_motion_search( s, &mx, &my, dmin, rel_xmin, rel_ymin, rel_xmax, rel_ymax, | |||
| pred_x, pred_y, ref_picture, pix_abs16x16_x2, pix_abs16x16_y2, | |||
| pix_abs16x16_xy2, 0); | |||
| halfpel_motion_search( s, &mx, &my, dmin, rel_xmin, rel_ymin, rel_xmax, rel_ymax, | |||
| pred_x, pred_y, ref_picture, s->dsp.pix_abs16x16_x2, | |||
| s->dsp.pix_abs16x16_y2, s->dsp.pix_abs16x16_xy2, 0); | |||
| }else{ | |||
| mx <<=1; | |||
| my <<=1; | |||
| @@ -1186,13 +1187,13 @@ void ff_estimate_p_frame_motion(MpegEncContext * s, | |||
| mb_type|= MB_TYPE_INTER; | |||
| if (s->me_method != ME_ZERO) { | |||
| if(s->me_method >= ME_EPZS) | |||
| dmin= fast_halfpel_motion_search(s, &mx, &my, dmin, rel_xmin, rel_ymin, rel_xmax, rel_ymax, | |||
| pred_x, pred_y, ref_picture, pix_abs16x16_x2, pix_abs16x16_y2, | |||
| pix_abs16x16_xy2, 0); | |||
| dmin= fast_halfpel_motion_search(s, &mx, &my, dmin, rel_xmin, rel_ymin, rel_xmax, rel_ymax, | |||
| pred_x, pred_y, ref_picture, s->dsp.pix_abs16x16_x2, s->dsp.pix_abs16x16_y2, | |||
| s->dsp.pix_abs16x16_xy2, 0); | |||
| else | |||
| dmin= halfpel_motion_search(s, &mx, &my, dmin, rel_xmin, rel_ymin, rel_xmax, rel_ymax, | |||
| pred_x, pred_y, ref_picture, pix_abs16x16_x2, pix_abs16x16_y2, | |||
| pix_abs16x16_xy2, 0); | |||
| dmin= halfpel_motion_search(s, &mx, &my, dmin, rel_xmin, rel_ymin, rel_xmax, rel_ymax, | |||
| pred_x, pred_y, ref_picture, s->dsp.pix_abs16x16_x2, s->dsp.pix_abs16x16_y2, | |||
| s->dsp.pix_abs16x16_xy2, 0); | |||
| if((s->flags&CODEC_FLAG_4MV) | |||
| && !s->skip_me && varc>50 && vard>10){ | |||
| int dmin4= mv4_search(s, rel_xmin, rel_ymin, rel_xmax, rel_ymax, mx, my, shift); | |||
| @@ -1303,9 +1304,9 @@ int ff_estimate_motion_b(MpegEncContext * s, | |||
| break; | |||
| } | |||
| dmin= fast_halfpel_motion_search(s, &mx, &my, dmin, rel_xmin, rel_ymin, rel_xmax, rel_ymax, | |||
| pred_x, pred_y, ref_picture, pix_abs16x16_x2, pix_abs16x16_y2, | |||
| pix_abs16x16_xy2, 0); | |||
| dmin= fast_halfpel_motion_search(s, &mx, &my, dmin, rel_xmin, rel_ymin, rel_xmax, rel_ymax, | |||
| pred_x, pred_y, ref_picture, s->dsp.pix_abs16x16_x2, s->dsp.pix_abs16x16_y2, | |||
| s->dsp.pix_abs16x16_xy2, 0); | |||
| //printf("%d %d %d %d//", s->mb_x, s->mb_y, mx, my); | |||
| // s->mb_type[mb_y*s->mb_width + mb_x]= mb_type; | |||
| mv_table[mot_xy][0]= mx; | |||
| @@ -1343,8 +1344,8 @@ static inline int check_bidir_mv(MpegEncContext * s, | |||
| dxy&= 1; | |||
| ptr = s->last_picture[0] + (src_y * s->linesize) + src_x; | |||
| put_pixels_tab[0][dxy](dest_y , ptr , s->linesize, 16); | |||
| s->dsp.put_pixels_tab[0][dxy](dest_y , ptr , s->linesize, 16); | |||
| fbmin += (mv_penalty[motion_bx-pred_bx] + mv_penalty[motion_by-pred_by])*s->qscale; | |||
| dxy = ((motion_by & 1) << 1) | (motion_bx & 1); | |||
| @@ -1356,11 +1357,11 @@ static inline int check_bidir_mv(MpegEncContext * s, | |||
| src_y = clip(src_y, -16, s->height); | |||
| if (src_y == s->height) | |||
| dxy&= 1; | |||
| ptr = s->next_picture[0] + (src_y * s->linesize) + src_x; | |||
| avg_pixels_tab[0][dxy](dest_y , ptr , s->linesize, 16); | |||
| fbmin += pix_abs16x16(s->new_picture[0] + mb_x*16 + mb_y*16*s->linesize, dest_y, s->linesize); | |||
| s->dsp.avg_pixels_tab[0][dxy](dest_y , ptr , s->linesize, 16); | |||
| fbmin += s->dsp.pix_abs16x16(s->new_picture[0] + mb_x*16 + mb_y*16*s->linesize, dest_y, s->linesize); | |||
| return fbmin; | |||
| } | |||
| @@ -1443,7 +1444,7 @@ static inline int direct_search(MpegEncContext * s, | |||
| if (src_y == height) dxy &= ~2; | |||
| ptr = s->last_picture[0] + (src_y * s->linesize) + src_x; | |||
| put_pixels_tab[0][dxy](dest_y , ptr , s->linesize, 16); | |||
| s->dsp.put_pixels_tab[0][dxy](dest_y , ptr , s->linesize, 16); | |||
| dxy = ((motion_by & 1) << 1) | (motion_bx & 1); | |||
| src_x = (mb_x + bx) * 16 + (motion_bx >> 1); | |||
| @@ -1453,7 +1454,7 @@ static inline int direct_search(MpegEncContext * s, | |||
| src_y = clip(src_y, -16, height); | |||
| if (src_y == height) dxy &= ~2; | |||
| avg_pixels_tab[0][dxy](dest_y , ptr , s->linesize, 16); | |||
| s->dsp.avg_pixels_tab[0][dxy](dest_y , ptr , s->linesize, 16); | |||
| } | |||
| } | |||
| @@ -1623,7 +1623,7 @@ static int mpeg_decode_slice(AVCodecContext *avctx, | |||
| s->mb_incr= 1; | |||
| for(;;) { | |||
| clear_blocks(s->block[0]); | |||
| s->dsp.clear_blocks(s->block[0]); | |||
| ret = mpeg_decode_mb(s, s->block); | |||
| dprintf("ret=%d\n", ret); | |||
| @@ -57,7 +57,7 @@ static void emulated_edge_mc(MpegEncContext *s, UINT8 *src, int linesize, int bl | |||
| /* for jpeg fast DCT */ | |||
| #define CONST_BITS 14 | |||
| static const unsigned short aanscales[64] = { | |||
| static const uint16_t aanscales[64] = { | |||
| /* precomputed values scaled up by 14 bits */ | |||
| 16384, 22725, 21407, 19266, 16384, 12873, 8867, 4520, | |||
| 22725, 31521, 29692, 26722, 22725, 17855, 12299, 6270, | |||
| @@ -70,7 +70,7 @@ static const unsigned short aanscales[64] = { | |||
| }; | |||
| /* Input permutation for the simple_idct_mmx */ | |||
| static const UINT8 simple_mmx_permutation[64]={ | |||
| static const uint8_t simple_mmx_permutation[64]={ | |||
| 0x00, 0x08, 0x04, 0x09, 0x01, 0x0C, 0x05, 0x0D, | |||
| 0x10, 0x18, 0x14, 0x19, 0x11, 0x1C, 0x15, 0x1D, | |||
| 0x20, 0x28, 0x24, 0x29, 0x21, 0x2C, 0x25, 0x2D, | |||
| @@ -81,7 +81,7 @@ static const UINT8 simple_mmx_permutation[64]={ | |||
| 0x32, 0x3A, 0x36, 0x3B, 0x33, 0x3E, 0x37, 0x3F, | |||
| }; | |||
| static UINT8 h263_chroma_roundtab[16] = { | |||
| static const uint8_t h263_chroma_roundtab[16] = { | |||
| 0, 0, 0, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 2, 2, | |||
| }; | |||
| @@ -172,16 +172,19 @@ void ff_init_scantable(MpegEncContext *s, ScanTable *st, const UINT8 *src_scanta | |||
| } | |||
| /* XXX: those functions should be suppressed ASAP when all IDCTs are | |||
| converted */ | |||
| converted */ | |||
| // *FIXME* this is ugly hack using local static | |||
| static void (*ff_put_pixels_clamped)(const DCTELEM *block, UINT8 *pixels, int line_size); | |||
| static void (*ff_add_pixels_clamped)(const DCTELEM *block, UINT8 *pixels, int line_size); | |||
| static void ff_jref_idct_put(UINT8 *dest, int line_size, DCTELEM *block) | |||
| { | |||
| j_rev_dct (block); | |||
| put_pixels_clamped(block, dest, line_size); | |||
| ff_put_pixels_clamped(block, dest, line_size); | |||
| } | |||
| static void ff_jref_idct_add(UINT8 *dest, int line_size, DCTELEM *block) | |||
| { | |||
| j_rev_dct (block); | |||
| add_pixels_clamped(block, dest, line_size); | |||
| ff_add_pixels_clamped(block, dest, line_size); | |||
| } | |||
| /* init common dct for both encoder and decoder */ | |||
| @@ -189,6 +192,9 @@ int DCT_common_init(MpegEncContext *s) | |||
| { | |||
| int i; | |||
| ff_put_pixels_clamped = s->dsp.put_pixels_clamped; | |||
| ff_add_pixels_clamped = s->dsp.add_pixels_clamped; | |||
| s->dct_unquantize_h263 = dct_unquantize_h263_c; | |||
| s->dct_unquantize_mpeg1 = dct_unquantize_mpeg1_c; | |||
| s->dct_unquantize_mpeg2 = dct_unquantize_mpeg2_c; | |||
| @@ -268,29 +274,30 @@ int MPV_common_init(MpegEncContext *s) | |||
| UINT8 *pict; | |||
| int y_size, c_size, yc_size, i; | |||
| dsputil_init(&s->dsp, s->avctx->dsp_mask); | |||
| DCT_common_init(s); | |||
| s->flags= s->avctx->flags; | |||
| s->mb_width = (s->width + 15) / 16; | |||
| s->mb_height = (s->height + 15) / 16; | |||
| y_size = (2 * s->mb_width + 2) * (2 * s->mb_height + 2); | |||
| c_size = (s->mb_width + 2) * (s->mb_height + 2); | |||
| yc_size = y_size + 2 * c_size; | |||
| /* set default edge pos, will be overriden in decode_header if needed */ | |||
| s->h_edge_pos= s->mb_width*16; | |||
| s->v_edge_pos= s->mb_height*16; | |||
| s->mb_num = s->mb_width * s->mb_height; | |||
| y_size = (2 * s->mb_width + 2) * (2 * s->mb_height + 2); | |||
| c_size = (s->mb_width + 2) * (s->mb_height + 2); | |||
| yc_size = y_size + 2 * c_size; | |||
| /* convert fourcc to upper case */ | |||
| s->avctx->fourcc= toupper( s->avctx->fourcc &0xFF) | |||
| + (toupper((s->avctx->fourcc>>8 )&0xFF)<<8 ) | |||
| + (toupper((s->avctx->fourcc>>16)&0xFF)<<16) | |||
| + (toupper((s->avctx->fourcc>>24)&0xFF)<<24); | |||
| s->mb_num = s->mb_width * s->mb_height; | |||
| if(!(s->flags&CODEC_FLAG_DR1)){ | |||
| s->linesize = s->mb_width * 16 + 2 * EDGE_WIDTH; | |||
| s->uvlinesize = s->mb_width * 8 + EDGE_WIDTH; | |||
| @@ -1133,17 +1140,17 @@ static inline void gmc1_motion(MpegEncContext *s, | |||
| } | |||
| if((motion_x|motion_y)&7){ | |||
| ff_gmc1(dest_y , ptr , linesize, 16, motion_x&15, motion_y&15, 128 - s->no_rounding); | |||
| ff_gmc1(dest_y+8, ptr+8, linesize, 16, motion_x&15, motion_y&15, 128 - s->no_rounding); | |||
| s->dsp.gmc1(dest_y , ptr , linesize, 16, motion_x&15, motion_y&15, 128 - s->no_rounding); | |||
| s->dsp.gmc1(dest_y+8, ptr+8, linesize, 16, motion_x&15, motion_y&15, 128 - s->no_rounding); | |||
| }else{ | |||
| int dxy; | |||
| dxy= ((motion_x>>3)&1) | ((motion_y>>2)&2); | |||
| if (s->no_rounding){ | |||
| put_no_rnd_pixels_tab[0][dxy](dest_y, ptr, linesize, 16); | |||
| s->dsp.put_no_rnd_pixels_tab[0][dxy](dest_y, ptr, linesize, 16); | |||
| }else{ | |||
| put_pixels_tab [0][dxy](dest_y, ptr, linesize, 16); | |||
| } | |||
| s->dsp.put_pixels_tab [0][dxy](dest_y, ptr, linesize, 16); | |||
| } | |||
| } | |||
| if(s->flags&CODEC_FLAG_GRAY) return; | |||
| @@ -1167,14 +1174,14 @@ static inline void gmc1_motion(MpegEncContext *s, | |||
| emulated_edge_mc(s, ptr, uvlinesize, 9, 9, src_x, src_y, s->h_edge_pos>>1, s->v_edge_pos>>1); | |||
| ptr= s->edge_emu_buffer; | |||
| } | |||
| ff_gmc1(dest_cb + (dest_offset>>1), ptr, uvlinesize, 8, motion_x&15, motion_y&15, 128 - s->no_rounding); | |||
| s->dsp.gmc1(dest_cb + (dest_offset>>1), ptr, uvlinesize, 8, motion_x&15, motion_y&15, 128 - s->no_rounding); | |||
| ptr = ref_picture[2] + offset; | |||
| if(emu){ | |||
| emulated_edge_mc(s, ptr, uvlinesize, 9, 9, src_x, src_y, s->h_edge_pos>>1, s->v_edge_pos>>1); | |||
| ptr= s->edge_emu_buffer; | |||
| } | |||
| ff_gmc1(dest_cr + (dest_offset>>1), ptr, uvlinesize, 8, motion_x&15, motion_y&15, 128 - s->no_rounding); | |||
| s->dsp.gmc1(dest_cr + (dest_offset>>1), ptr, uvlinesize, 8, motion_x&15, motion_y&15, 128 - s->no_rounding); | |||
| return; | |||
| } | |||
| @@ -1199,14 +1206,14 @@ static inline void gmc_motion(MpegEncContext *s, | |||
| ox= s->sprite_offset[0][0] + s->sprite_delta[0][0]*s->mb_x*16 + s->sprite_delta[0][1]*s->mb_y*16; | |||
| oy= s->sprite_offset[0][1] + s->sprite_delta[1][0]*s->mb_x*16 + s->sprite_delta[1][1]*s->mb_y*16; | |||
| ff_gmc(dest_y, ptr, linesize, 16, | |||
| s->dsp.gmc(dest_y, ptr, linesize, 16, | |||
| ox, | |||
| oy, | |||
| s->sprite_delta[0][0], s->sprite_delta[0][1], | |||
| s->sprite_delta[1][0], s->sprite_delta[1][1], | |||
| a+1, (1<<(2*a+1)) - s->no_rounding, | |||
| s->h_edge_pos, s->v_edge_pos); | |||
| ff_gmc(dest_y+8, ptr, linesize, 16, | |||
| s->dsp.gmc(dest_y+8, ptr, linesize, 16, | |||
| ox + s->sprite_delta[0][0]*8, | |||
| oy + s->sprite_delta[1][0]*8, | |||
| s->sprite_delta[0][0], s->sprite_delta[0][1], | |||
| @@ -1224,7 +1231,7 @@ static inline void gmc_motion(MpegEncContext *s, | |||
| oy= s->sprite_offset[1][1] + s->sprite_delta[1][0]*s->mb_x*8 + s->sprite_delta[1][1]*s->mb_y*8; | |||
| ptr = ref_picture[1] + (src_offset>>1); | |||
| ff_gmc(dest_cb, ptr, uvlinesize, 8, | |||
| s->dsp.gmc(dest_cb, ptr, uvlinesize, 8, | |||
| ox, | |||
| oy, | |||
| s->sprite_delta[0][0], s->sprite_delta[0][1], | |||
| @@ -1233,7 +1240,7 @@ static inline void gmc_motion(MpegEncContext *s, | |||
| s->h_edge_pos>>1, s->v_edge_pos>>1); | |||
| ptr = ref_picture[2] + (src_offset>>1); | |||
| ff_gmc(dest_cr, ptr, uvlinesize, 8, | |||
| s->dsp.gmc(dest_cr, ptr, uvlinesize, 8, | |||
| ox, | |||
| oy, | |||
| s->sprite_delta[0][0], s->sprite_delta[0][1], | |||
| @@ -1248,7 +1255,7 @@ static void emulated_edge_mc(MpegEncContext *s, UINT8 *src, int linesize, int bl | |||
| int x, y; | |||
| int start_y, start_x, end_y, end_x; | |||
| UINT8 *buf= s->edge_emu_buffer; | |||
| if(src_y>= h){ | |||
| src+= (h-1-src_y)*linesize; | |||
| src_y=h-1; | |||
| @@ -1860,17 +1867,17 @@ void MPV_decode_mb(MpegEncContext *s, DCTELEM block[6][64]) | |||
| /* decoding or more than one mb_type (MC was allready done otherwise) */ | |||
| if((!s->encoding) || (s->mb_type[mb_xy]&(s->mb_type[mb_xy]-1))){ | |||
| if ((!s->no_rounding) || s->pict_type==B_TYPE){ | |||
| op_pix = put_pixels_tab; | |||
| op_qpix= put_qpel_pixels_tab; | |||
| op_pix = s->dsp.put_pixels_tab; | |||
| op_qpix= s->dsp.put_qpel_pixels_tab; | |||
| }else{ | |||
| op_pix = put_no_rnd_pixels_tab; | |||
| op_qpix= put_no_rnd_qpel_pixels_tab; | |||
| op_pix = s->dsp.put_no_rnd_pixels_tab; | |||
| op_qpix= s->dsp.put_no_rnd_qpel_pixels_tab; | |||
| } | |||
| if (s->mv_dir & MV_DIR_FORWARD) { | |||
| MPV_motion(s, dest_y, dest_cb, dest_cr, 0, s->last_picture, op_pix, op_qpix); | |||
| op_pix = avg_pixels_tab; | |||
| op_qpix= avg_qpel_pixels_tab; | |||
| op_pix = s->dsp.avg_pixels_tab; | |||
| op_qpix= s->dsp.avg_qpel_pixels_tab; | |||
| } | |||
| if (s->mv_dir & MV_DIR_BACKWARD) { | |||
| MPV_motion(s, dest_y, dest_cb, dest_cr, 1, s->next_picture, op_pix, op_qpix); | |||
| @@ -2224,10 +2231,10 @@ static void encode_mb(MpegEncContext *s, int motion_x, int motion_y) | |||
| s->interlaced_dct=0; | |||
| } | |||
| get_pixels(s->block[0], ptr , wrap_y); | |||
| get_pixels(s->block[1], ptr + 8, wrap_y); | |||
| get_pixels(s->block[2], ptr + dct_offset , wrap_y); | |||
| get_pixels(s->block[3], ptr + dct_offset + 8, wrap_y); | |||
| s->dsp.get_pixels(s->block[0], ptr , wrap_y); | |||
| s->dsp.get_pixels(s->block[1], ptr + 8, wrap_y); | |||
| s->dsp.get_pixels(s->block[2], ptr + dct_offset , wrap_y); | |||
| s->dsp.get_pixels(s->block[3], ptr + dct_offset + 8, wrap_y); | |||
| if(s->flags&CODEC_FLAG_GRAY){ | |||
| skip_dct[4]= 1; | |||
| @@ -2239,14 +2246,14 @@ static void encode_mb(MpegEncContext *s, int motion_x, int motion_y) | |||
| emulated_edge_mc(s, ptr, wrap_c, 8, 8, mb_x*8, mb_y*8, s->width>>1, s->height>>1); | |||
| ptr= s->edge_emu_buffer; | |||
| } | |||
| get_pixels(s->block[4], ptr, wrap_c); | |||
| s->dsp.get_pixels(s->block[4], ptr, wrap_c); | |||
| ptr = s->new_picture[2] + (mb_y * 8 * wrap_c) + mb_x * 8; | |||
| if(emu){ | |||
| emulated_edge_mc(s, ptr, wrap_c, 8, 8, mb_x*8, mb_y*8, s->width>>1, s->height>>1); | |||
| ptr= s->edge_emu_buffer; | |||
| } | |||
| get_pixels(s->block[5], ptr, wrap_c); | |||
| s->dsp.get_pixels(s->block[5], ptr, wrap_c); | |||
| } | |||
| }else{ | |||
| op_pixels_func (*op_pix)[4]; | |||
| @@ -2266,17 +2273,17 @@ static void encode_mb(MpegEncContext *s, int motion_x, int motion_y) | |||
| ptr_cr = s->new_picture[2] + (mb_y * 8 * wrap_c) + mb_x * 8; | |||
| if ((!s->no_rounding) || s->pict_type==B_TYPE){ | |||
| op_pix = put_pixels_tab; | |||
| op_qpix= put_qpel_pixels_tab; | |||
| op_pix = s->dsp.put_pixels_tab; | |||
| op_qpix= s->dsp.put_qpel_pixels_tab; | |||
| }else{ | |||
| op_pix = put_no_rnd_pixels_tab; | |||
| op_qpix= put_no_rnd_qpel_pixels_tab; | |||
| op_pix = s->dsp.put_no_rnd_pixels_tab; | |||
| op_qpix= s->dsp.put_no_rnd_qpel_pixels_tab; | |||
| } | |||
| if (s->mv_dir & MV_DIR_FORWARD) { | |||
| MPV_motion(s, dest_y, dest_cb, dest_cr, 0, s->last_picture, op_pix, op_qpix); | |||
| op_pix = avg_pixels_tab; | |||
| op_qpix= avg_qpel_pixels_tab; | |||
| op_pix = s->dsp.avg_pixels_tab; | |||
| op_qpix= s->dsp.avg_qpel_pixels_tab; | |||
| } | |||
| if (s->mv_dir & MV_DIR_BACKWARD) { | |||
| MPV_motion(s, dest_y, dest_cb, dest_cr, 1, s->next_picture, op_pix, op_qpix); | |||
| @@ -2305,10 +2312,10 @@ static void encode_mb(MpegEncContext *s, int motion_x, int motion_y) | |||
| s->interlaced_dct=0; | |||
| } | |||
| diff_pixels(s->block[0], ptr_y , dest_y , wrap_y); | |||
| diff_pixels(s->block[1], ptr_y + 8, dest_y + 8, wrap_y); | |||
| diff_pixels(s->block[2], ptr_y + dct_offset , dest_y + dct_offset , wrap_y); | |||
| diff_pixels(s->block[3], ptr_y + dct_offset + 8, dest_y + dct_offset + 8, wrap_y); | |||
| s->dsp.diff_pixels(s->block[0], ptr_y , dest_y , wrap_y); | |||
| s->dsp.diff_pixels(s->block[1], ptr_y + 8, dest_y + 8, wrap_y); | |||
| s->dsp.diff_pixels(s->block[2], ptr_y + dct_offset , dest_y + dct_offset , wrap_y); | |||
| s->dsp.diff_pixels(s->block[3], ptr_y + dct_offset + 8, dest_y + dct_offset + 8, wrap_y); | |||
| if(s->flags&CODEC_FLAG_GRAY){ | |||
| skip_dct[4]= 1; | |||
| @@ -2318,23 +2325,23 @@ static void encode_mb(MpegEncContext *s, int motion_x, int motion_y) | |||
| emulated_edge_mc(s, ptr_cb, wrap_c, 8, 8, mb_x*8, mb_y*8, s->width>>1, s->height>>1); | |||
| ptr_cb= s->edge_emu_buffer; | |||
| } | |||
| diff_pixels(s->block[4], ptr_cb, dest_cb, wrap_c); | |||
| s->dsp.diff_pixels(s->block[4], ptr_cb, dest_cb, wrap_c); | |||
| if(emu){ | |||
| emulated_edge_mc(s, ptr_cr, wrap_c, 8, 8, mb_x*8, mb_y*8, s->width>>1, s->height>>1); | |||
| ptr_cr= s->edge_emu_buffer; | |||
| } | |||
| diff_pixels(s->block[5], ptr_cr, dest_cr, wrap_c); | |||
| s->dsp.diff_pixels(s->block[5], ptr_cr, dest_cr, wrap_c); | |||
| } | |||
| /* pre quantization */ | |||
| if(s->mc_mb_var[s->mb_width*mb_y+ mb_x]<2*s->qscale*s->qscale){ | |||
| //FIXME optimize | |||
| if(pix_abs8x8(ptr_y , dest_y , wrap_y) < 20*s->qscale) skip_dct[0]= 1; | |||
| if(pix_abs8x8(ptr_y + 8, dest_y + 8, wrap_y) < 20*s->qscale) skip_dct[1]= 1; | |||
| if(pix_abs8x8(ptr_y +dct_offset , dest_y +dct_offset , wrap_y) < 20*s->qscale) skip_dct[2]= 1; | |||
| if(pix_abs8x8(ptr_y +dct_offset+ 8, dest_y +dct_offset+ 8, wrap_y) < 20*s->qscale) skip_dct[3]= 1; | |||
| if(pix_abs8x8(ptr_cb , dest_cb , wrap_y) < 20*s->qscale) skip_dct[4]= 1; | |||
| if(pix_abs8x8(ptr_cr , dest_cr , wrap_y) < 20*s->qscale) skip_dct[5]= 1; | |||
| if(s->dsp.pix_abs8x8(ptr_y , dest_y , wrap_y) < 20*s->qscale) skip_dct[0]= 1; | |||
| if(s->dsp.pix_abs8x8(ptr_y + 8, dest_y + 8, wrap_y) < 20*s->qscale) skip_dct[1]= 1; | |||
| if(s->dsp.pix_abs8x8(ptr_y +dct_offset , dest_y +dct_offset , wrap_y) < 20*s->qscale) skip_dct[2]= 1; | |||
| if(s->dsp.pix_abs8x8(ptr_y +dct_offset+ 8, dest_y +dct_offset+ 8, wrap_y) < 20*s->qscale) skip_dct[3]= 1; | |||
| if(s->dsp.pix_abs8x8(ptr_cb , dest_cb , wrap_y) < 20*s->qscale) skip_dct[4]= 1; | |||
| if(s->dsp.pix_abs8x8(ptr_cr , dest_cr , wrap_y) < 20*s->qscale) skip_dct[5]= 1; | |||
| #if 0 | |||
| { | |||
| static int stat[7]; | |||
| @@ -2601,9 +2608,9 @@ static void encode_picture(MpegEncContext *s, int picture_number) | |||
| int yy = mb_y * 16; | |||
| uint8_t *pix = s->new_picture[0] + (yy * s->linesize) + xx; | |||
| int varc; | |||
| int sum = pix_sum(pix, s->linesize); | |||
| int sum = s->dsp.pix_sum(pix, s->linesize); | |||
| varc = (pix_norm1(pix, s->linesize) - (((unsigned)(sum*sum))>>8) + 500 + 128)>>8; | |||
| varc = (s->dsp.pix_norm1(pix, s->linesize) - (((unsigned)(sum*sum))>>8) + 500 + 128)>>8; | |||
| s->mb_var [s->mb_width * mb_y + mb_x] = varc; | |||
| s->mb_mean[s->mb_width * mb_y + mb_x] = (sum+128)>>8; | |||
| @@ -221,6 +221,7 @@ typedef struct MpegEncContext { | |||
| int unrestricted_mv; | |||
| int h263_long_vectors; /* use horrible h263v1 long vector mode */ | |||
| DSPContext dsp; /* pointers for accelerated dsp fucntions */ | |||
| int f_code; /* forward MV resolution */ | |||
| int b_code; /* backward MV resolution for B Frames (mpeg4) */ | |||
| INT16 (*motion_val)[2]; /* used for MV prediction (4MV per MB) */ | |||
| @@ -447,7 +447,7 @@ static int rv10_decode_packet(AVCodecContext *avctx, | |||
| printf("**mb x=%d y=%d\n", s->mb_x, s->mb_y); | |||
| #endif | |||
| clear_blocks(s->block[0]); | |||
| s->dsp.clear_blocks(s->block[0]); | |||
| s->mv_dir = MV_DIR_FORWARD; | |||
| s->mv_type = MV_TYPE_16X16; | |||
| if (ff_h263_decode_mb(s, s->block) == SLICE_ERROR) { | |||
| @@ -804,7 +804,7 @@ static void svq1_skip_block (uint8_t *current, uint8_t *previous, int pitch, int | |||
| } | |||
| } | |||
| static int svq1_motion_inter_block (bit_buffer_t *bitbuf, | |||
| static int svq1_motion_inter_block (MpegEncContext *s, bit_buffer_t *bitbuf, | |||
| uint8_t *current, uint8_t *previous, int pitch, | |||
| svq1_pmv_t *motion, int x, int y) { | |||
| uint8_t *src; | |||
| @@ -839,12 +839,12 @@ static int svq1_motion_inter_block (bit_buffer_t *bitbuf, | |||
| src = &previous[(x + (mv.x >> 1)) + (y + (mv.y >> 1))*pitch]; | |||
| dst = current; | |||
| put_pixels_tab[0][((mv.y & 1) << 1) | (mv.x & 1)](dst,src,pitch,16); | |||
| s->dsp.put_pixels_tab[0][((mv.y & 1) << 1) | (mv.x & 1)](dst,src,pitch,16); | |||
| return 0; | |||
| } | |||
| static int svq1_motion_inter_4v_block (bit_buffer_t *bitbuf, | |||
| static int svq1_motion_inter_4v_block (MpegEncContext *s, bit_buffer_t *bitbuf, | |||
| uint8_t *current, uint8_t *previous, int pitch, | |||
| svq1_pmv_t *motion,int x, int y) { | |||
| uint8_t *src; | |||
| @@ -906,7 +906,7 @@ static int svq1_motion_inter_4v_block (bit_buffer_t *bitbuf, | |||
| src = &previous[(x + (pmv[i]->x >> 1)) + (y + (pmv[i]->y >> 1))*pitch]; | |||
| dst = current; | |||
| put_pixels_tab[1][((pmv[i]->y & 1) << 1) | (pmv[i]->x & 1)](dst,src,pitch,8); | |||
| s->dsp.put_pixels_tab[1][((pmv[i]->y & 1) << 1) | (pmv[i]->x & 1)](dst,src,pitch,8); | |||
| /* select next block */ | |||
| if (i & 1) { | |||
| @@ -921,7 +921,7 @@ static int svq1_motion_inter_4v_block (bit_buffer_t *bitbuf, | |||
| return 0; | |||
| } | |||
| static int svq1_decode_delta_block (bit_buffer_t *bitbuf, | |||
| static int svq1_decode_delta_block (MpegEncContext *s, bit_buffer_t *bitbuf, | |||
| uint8_t *current, uint8_t *previous, int pitch, | |||
| svq1_pmv_t *motion, int x, int y) { | |||
| uint32_t bit_cache; | |||
| @@ -951,7 +951,7 @@ static int svq1_decode_delta_block (bit_buffer_t *bitbuf, | |||
| break; | |||
| case SVQ1_BLOCK_INTER: | |||
| result = svq1_motion_inter_block (bitbuf, current, previous, pitch, motion, x, y); | |||
| result = svq1_motion_inter_block (s, bitbuf, current, previous, pitch, motion, x, y); | |||
| if (result != 0) | |||
| { | |||
| @@ -964,7 +964,7 @@ static int svq1_decode_delta_block (bit_buffer_t *bitbuf, | |||
| break; | |||
| case SVQ1_BLOCK_INTER_4V: | |||
| result = svq1_motion_inter_4v_block (bitbuf, current, previous, pitch, motion, x, y); | |||
| result = svq1_motion_inter_4v_block (s, bitbuf, current, previous, pitch, motion, x, y); | |||
| if (result != 0) | |||
| { | |||
| @@ -1142,8 +1142,8 @@ static int svq1_decode_frame(AVCodecContext *avctx, | |||
| for (y=0; y < height; y+=16) { | |||
| for (x=0; x < width; x+=16) { | |||
| result = svq1_decode_delta_block (&s->gb, ¤t[x], previous, | |||
| linesize, pmv, x, y); | |||
| result = svq1_decode_delta_block (s, &s->gb, ¤t[x], previous, | |||
| linesize, pmv, x, y); | |||
| if (result != 0) | |||
| { | |||
| #ifdef DEBUG_SVQ1 | |||