This avoids SIMD-optimized functions having to sign-extend their line size argument manually to be able to do pointer arithmetic.tags/n1.2
| @@ -32,7 +32,7 @@ void (*add_pixels_clamped_axp_p)(const int16_t *block, uint8_t *pixels, | |||||
| /* These functions were the base for the optimized assembler routines, | /* These functions were the base for the optimized assembler routines, | ||||
| and remain here for documentation purposes. */ | and remain here for documentation purposes. */ | ||||
| static void put_pixels_clamped_mvi(const int16_t *block, uint8_t *pixels, | static void put_pixels_clamped_mvi(const int16_t *block, uint8_t *pixels, | ||||
| int line_size) | |||||
| ptrdiff_t line_size) | |||||
| { | { | ||||
| int i = 8; | int i = 8; | ||||
| uint64_t clampmask = zap(-1, 0xaa); /* 0x00ff00ff00ff00ff */ | uint64_t clampmask = zap(-1, 0xaa); /* 0x00ff00ff00ff00ff */ | ||||
| @@ -56,7 +56,7 @@ static void put_pixels_clamped_mvi(const int16_t *block, uint8_t *pixels, | |||||
| } | } | ||||
| void add_pixels_clamped_mvi(const int16_t *block, uint8_t *pixels, | void add_pixels_clamped_mvi(const int16_t *block, uint8_t *pixels, | ||||
| int line_size) | |||||
| ptrdiff_t line_size) | |||||
| { | { | ||||
| int h = 8; | int h = 8; | ||||
| /* Keep this function a leaf function by generating the constants | /* Keep this function a leaf function by generating the constants | ||||
| @@ -212,7 +212,7 @@ static inline uint64_t avg4(uint64_t l1, uint64_t l2, uint64_t l3, uint64_t l4) | |||||
| #define MAKE_OP(OPNAME, SUFF, OPKIND, STORE) \ | #define MAKE_OP(OPNAME, SUFF, OPKIND, STORE) \ | ||||
| static void OPNAME ## _pixels ## SUFF ## _axp \ | static void OPNAME ## _pixels ## SUFF ## _axp \ | ||||
| (uint8_t *restrict block, const uint8_t *restrict pixels, \ | (uint8_t *restrict block, const uint8_t *restrict pixels, \ | ||||
| int line_size, int h) \ | |||||
| ptrdiff_t line_size, int h) \ | |||||
| { \ | { \ | ||||
| if ((size_t) pixels & 0x7) { \ | if ((size_t) pixels & 0x7) { \ | ||||
| OPKIND(uldq, STORE); \ | OPKIND(uldq, STORE); \ | ||||
| @@ -223,7 +223,7 @@ static void OPNAME ## _pixels ## SUFF ## _axp \ | |||||
| \ | \ | ||||
| static void OPNAME ## _pixels16 ## SUFF ## _axp \ | static void OPNAME ## _pixels16 ## SUFF ## _axp \ | ||||
| (uint8_t *restrict block, const uint8_t *restrict pixels, \ | (uint8_t *restrict block, const uint8_t *restrict pixels, \ | ||||
| int line_size, int h) \ | |||||
| ptrdiff_t line_size, int h) \ | |||||
| { \ | { \ | ||||
| OPNAME ## _pixels ## SUFF ## _axp(block, pixels, line_size, h); \ | OPNAME ## _pixels ## SUFF ## _axp(block, pixels, line_size, h); \ | ||||
| OPNAME ## _pixels ## SUFF ## _axp(block + 8, pixels + 8, line_size, h); \ | OPNAME ## _pixels ## SUFF ## _axp(block + 8, pixels + 8, line_size, h); \ | ||||
| @@ -262,7 +262,7 @@ PIXOP(put_no_rnd, STORE); | |||||
| PIXOP(avg_no_rnd, STORE); | PIXOP(avg_no_rnd, STORE); | ||||
| static void put_pixels16_axp_asm(uint8_t *block, const uint8_t *pixels, | static void put_pixels16_axp_asm(uint8_t *block, const uint8_t *pixels, | ||||
| int line_size, int h) | |||||
| ptrdiff_t line_size, int h) | |||||
| { | { | ||||
| put_pixels_axp_asm(block, pixels, line_size, h); | put_pixels_axp_asm(block, pixels, line_size, h); | ||||
| put_pixels_axp_asm(block + 8, pixels + 8, line_size, h); | put_pixels_axp_asm(block + 8, pixels + 8, line_size, h); | ||||
| @@ -26,7 +26,7 @@ void ff_simple_idct_put_axp(uint8_t *dest, int line_size, int16_t *block); | |||||
| void ff_simple_idct_add_axp(uint8_t *dest, int line_size, int16_t *block); | void ff_simple_idct_add_axp(uint8_t *dest, int line_size, int16_t *block); | ||||
| void put_pixels_axp_asm(uint8_t *block, const uint8_t *pixels, | void put_pixels_axp_asm(uint8_t *block, const uint8_t *pixels, | ||||
| int line_size, int h); | |||||
| ptrdiff_t line_size, int h); | |||||
| void put_pixels_clamped_mvi_asm(const int16_t *block, uint8_t *pixels, | void put_pixels_clamped_mvi_asm(const int16_t *block, uint8_t *pixels, | ||||
| int line_size); | int line_size); | ||||
| void add_pixels_clamped_mvi_asm(const int16_t *block, uint8_t *pixels, | void add_pixels_clamped_mvi_asm(const int16_t *block, uint8_t *pixels, | ||||
| @@ -30,16 +30,16 @@ void ff_simple_idct_arm(int16_t *data); | |||||
| static void (*ff_put_pixels_clamped)(const int16_t *block, uint8_t *pixels, int line_size); | static void (*ff_put_pixels_clamped)(const int16_t *block, uint8_t *pixels, int line_size); | ||||
| static void (*ff_add_pixels_clamped)(const int16_t *block, uint8_t *pixels, int line_size); | static void (*ff_add_pixels_clamped)(const int16_t *block, uint8_t *pixels, int line_size); | ||||
| void ff_put_pixels8_arm(uint8_t *block, const uint8_t *pixels, int line_size, int h); | |||||
| void ff_put_pixels8_x2_arm(uint8_t *block, const uint8_t *pixels, int line_size, int h); | |||||
| void ff_put_pixels8_y2_arm(uint8_t *block, const uint8_t *pixels, int line_size, int h); | |||||
| void ff_put_pixels8_xy2_arm(uint8_t *block, const uint8_t *pixels, int line_size, int h); | |||||
| void ff_put_pixels8_arm(uint8_t *block, const uint8_t *pixels, ptrdiff_t line_size, int h); | |||||
| void ff_put_pixels8_x2_arm(uint8_t *block, const uint8_t *pixels, ptrdiff_t line_size, int h); | |||||
| void ff_put_pixels8_y2_arm(uint8_t *block, const uint8_t *pixels, ptrdiff_t line_size, int h); | |||||
| void ff_put_pixels8_xy2_arm(uint8_t *block, const uint8_t *pixels, ptrdiff_t line_size, int h); | |||||
| void ff_put_no_rnd_pixels8_x2_arm(uint8_t *block, const uint8_t *pixels, int line_size, int h); | |||||
| void ff_put_no_rnd_pixels8_y2_arm(uint8_t *block, const uint8_t *pixels, int line_size, int h); | |||||
| void ff_put_no_rnd_pixels8_xy2_arm(uint8_t *block, const uint8_t *pixels, int line_size, int h); | |||||
| void ff_put_no_rnd_pixels8_x2_arm(uint8_t *block, const uint8_t *pixels, ptrdiff_t line_size, int h); | |||||
| void ff_put_no_rnd_pixels8_y2_arm(uint8_t *block, const uint8_t *pixels, ptrdiff_t line_size, int h); | |||||
| void ff_put_no_rnd_pixels8_xy2_arm(uint8_t *block, const uint8_t *pixels, ptrdiff_t line_size, int h); | |||||
| void ff_put_pixels16_arm(uint8_t *block, const uint8_t *pixels, int line_size, int h); | |||||
| void ff_put_pixels16_arm(uint8_t *block, const uint8_t *pixels, ptrdiff_t line_size, int h); | |||||
| CALL_2X_PIXELS(ff_put_pixels16_x2_arm, ff_put_pixels8_x2_arm, 8) | CALL_2X_PIXELS(ff_put_pixels16_x2_arm, ff_put_pixels8_x2_arm, 8) | ||||
| CALL_2X_PIXELS(ff_put_pixels16_y2_arm, ff_put_pixels8_y2_arm, 8) | CALL_2X_PIXELS(ff_put_pixels16_y2_arm, ff_put_pixels8_y2_arm, 8) | ||||
| @@ -28,23 +28,23 @@ void ff_simple_idct_armv6(int16_t *data); | |||||
| void ff_simple_idct_put_armv6(uint8_t *dest, int line_size, int16_t *data); | void ff_simple_idct_put_armv6(uint8_t *dest, int line_size, int16_t *data); | ||||
| void ff_simple_idct_add_armv6(uint8_t *dest, int line_size, int16_t *data); | void ff_simple_idct_add_armv6(uint8_t *dest, int line_size, int16_t *data); | ||||
| void ff_put_pixels16_armv6(uint8_t *, const uint8_t *, int, int); | |||||
| void ff_put_pixels16_x2_armv6(uint8_t *, const uint8_t *, int, int); | |||||
| void ff_put_pixels16_y2_armv6(uint8_t *, const uint8_t *, int, int); | |||||
| void ff_put_pixels16_armv6(uint8_t *, const uint8_t *, ptrdiff_t, int); | |||||
| void ff_put_pixels16_x2_armv6(uint8_t *, const uint8_t *, ptrdiff_t, int); | |||||
| void ff_put_pixels16_y2_armv6(uint8_t *, const uint8_t *, ptrdiff_t, int); | |||||
| void ff_put_pixels16_x2_no_rnd_armv6(uint8_t *, const uint8_t *, int, int); | |||||
| void ff_put_pixels16_y2_no_rnd_armv6(uint8_t *, const uint8_t *, int, int); | |||||
| void ff_put_pixels16_x2_no_rnd_armv6(uint8_t *, const uint8_t *, ptrdiff_t, int); | |||||
| void ff_put_pixels16_y2_no_rnd_armv6(uint8_t *, const uint8_t *, ptrdiff_t, int); | |||||
| void ff_avg_pixels16_armv6(uint8_t *, const uint8_t *, int, int); | |||||
| void ff_avg_pixels16_armv6(uint8_t *, const uint8_t *, ptrdiff_t, int); | |||||
| void ff_put_pixels8_armv6(uint8_t *, const uint8_t *, int, int); | |||||
| void ff_put_pixels8_x2_armv6(uint8_t *, const uint8_t *, int, int); | |||||
| void ff_put_pixels8_y2_armv6(uint8_t *, const uint8_t *, int, int); | |||||
| void ff_put_pixels8_armv6(uint8_t *, const uint8_t *, ptrdiff_t, int); | |||||
| void ff_put_pixels8_x2_armv6(uint8_t *, const uint8_t *, ptrdiff_t, int); | |||||
| void ff_put_pixels8_y2_armv6(uint8_t *, const uint8_t *, ptrdiff_t, int); | |||||
| void ff_put_pixels8_x2_no_rnd_armv6(uint8_t *, const uint8_t *, int, int); | |||||
| void ff_put_pixels8_y2_no_rnd_armv6(uint8_t *, const uint8_t *, int, int); | |||||
| void ff_put_pixels8_x2_no_rnd_armv6(uint8_t *, const uint8_t *, ptrdiff_t, int); | |||||
| void ff_put_pixels8_y2_no_rnd_armv6(uint8_t *, const uint8_t *, ptrdiff_t, int); | |||||
| void ff_avg_pixels8_armv6(uint8_t *, const uint8_t *, int, int); | |||||
| void ff_avg_pixels8_armv6(uint8_t *, const uint8_t *, ptrdiff_t, int); | |||||
| void ff_add_pixels_clamped_armv6(const int16_t *block, | void ff_add_pixels_clamped_armv6(const int16_t *block, | ||||
| uint8_t *restrict pixels, | uint8_t *restrict pixels, | ||||
| @@ -32,32 +32,32 @@ void ff_simple_idct_add_neon(uint8_t *dest, int line_size, int16_t *data); | |||||
| void ff_clear_block_neon(int16_t *block); | void ff_clear_block_neon(int16_t *block); | ||||
| void ff_clear_blocks_neon(int16_t *blocks); | void ff_clear_blocks_neon(int16_t *blocks); | ||||
| void ff_put_pixels16_neon(uint8_t *, const uint8_t *, int, int); | |||||
| void ff_put_pixels16_x2_neon(uint8_t *, const uint8_t *, int, int); | |||||
| void ff_put_pixels16_y2_neon(uint8_t *, const uint8_t *, int, int); | |||||
| void ff_put_pixels16_xy2_neon(uint8_t *, const uint8_t *, int, int); | |||||
| void ff_put_pixels8_neon(uint8_t *, const uint8_t *, int, int); | |||||
| void ff_put_pixels8_x2_neon(uint8_t *, const uint8_t *, int, int); | |||||
| void ff_put_pixels8_y2_neon(uint8_t *, const uint8_t *, int, int); | |||||
| void ff_put_pixels8_xy2_neon(uint8_t *, const uint8_t *, int, int); | |||||
| void ff_put_pixels16_x2_no_rnd_neon(uint8_t *, const uint8_t *, int, int); | |||||
| void ff_put_pixels16_y2_no_rnd_neon(uint8_t *, const uint8_t *, int, int); | |||||
| void ff_put_pixels16_xy2_no_rnd_neon(uint8_t *, const uint8_t *, int, int); | |||||
| void ff_put_pixels8_x2_no_rnd_neon(uint8_t *, const uint8_t *, int, int); | |||||
| void ff_put_pixels8_y2_no_rnd_neon(uint8_t *, const uint8_t *, int, int); | |||||
| void ff_put_pixels8_xy2_no_rnd_neon(uint8_t *, const uint8_t *, int, int); | |||||
| void ff_avg_pixels16_neon(uint8_t *, const uint8_t *, int, int); | |||||
| void ff_avg_pixels16_x2_neon(uint8_t *, const uint8_t *, int, int); | |||||
| void ff_avg_pixels16_y2_neon(uint8_t *, const uint8_t *, int, int); | |||||
| void ff_avg_pixels16_xy2_neon(uint8_t *, const uint8_t *, int, int); | |||||
| void ff_avg_pixels8_neon(uint8_t *, const uint8_t *, int, int); | |||||
| void ff_avg_pixels8_x2_neon(uint8_t *, const uint8_t *, int, int); | |||||
| void ff_avg_pixels8_y2_neon(uint8_t *, const uint8_t *, int, int); | |||||
| void ff_avg_pixels8_xy2_neon(uint8_t *, const uint8_t *, int, int); | |||||
| void ff_avg_pixels16_x2_no_rnd_neon(uint8_t *, const uint8_t *, int, int); | |||||
| void ff_avg_pixels16_y2_no_rnd_neon(uint8_t *, const uint8_t *, int, int); | |||||
| void ff_avg_pixels16_xy2_no_rnd_neon(uint8_t *, const uint8_t *, int, int); | |||||
| void ff_put_pixels16_neon(uint8_t *, const uint8_t *, ptrdiff_t, int); | |||||
| void ff_put_pixels16_x2_neon(uint8_t *, const uint8_t *, ptrdiff_t, int); | |||||
| void ff_put_pixels16_y2_neon(uint8_t *, const uint8_t *, ptrdiff_t, int); | |||||
| void ff_put_pixels16_xy2_neon(uint8_t *, const uint8_t *, ptrdiff_t, int); | |||||
| void ff_put_pixels8_neon(uint8_t *, const uint8_t *, ptrdiff_t, int); | |||||
| void ff_put_pixels8_x2_neon(uint8_t *, const uint8_t *, ptrdiff_t, int); | |||||
| void ff_put_pixels8_y2_neon(uint8_t *, const uint8_t *, ptrdiff_t, int); | |||||
| void ff_put_pixels8_xy2_neon(uint8_t *, const uint8_t *, ptrdiff_t, int); | |||||
| void ff_put_pixels16_x2_no_rnd_neon(uint8_t *, const uint8_t *, ptrdiff_t, int); | |||||
| void ff_put_pixels16_y2_no_rnd_neon(uint8_t *, const uint8_t *, ptrdiff_t, int); | |||||
| void ff_put_pixels16_xy2_no_rnd_neon(uint8_t *, const uint8_t *, ptrdiff_t, int); | |||||
| void ff_put_pixels8_x2_no_rnd_neon(uint8_t *, const uint8_t *, ptrdiff_t, int); | |||||
| void ff_put_pixels8_y2_no_rnd_neon(uint8_t *, const uint8_t *, ptrdiff_t, int); | |||||
| void ff_put_pixels8_xy2_no_rnd_neon(uint8_t *, const uint8_t *, ptrdiff_t, int); | |||||
| void ff_avg_pixels16_neon(uint8_t *, const uint8_t *, ptrdiff_t, int); | |||||
| void ff_avg_pixels16_x2_neon(uint8_t *, const uint8_t *, ptrdiff_t, int); | |||||
| void ff_avg_pixels16_y2_neon(uint8_t *, const uint8_t *, ptrdiff_t, int); | |||||
| void ff_avg_pixels16_xy2_neon(uint8_t *, const uint8_t *, ptrdiff_t, int); | |||||
| void ff_avg_pixels8_neon(uint8_t *, const uint8_t *, ptrdiff_t, int); | |||||
| void ff_avg_pixels8_x2_neon(uint8_t *, const uint8_t *, ptrdiff_t, int); | |||||
| void ff_avg_pixels8_y2_neon(uint8_t *, const uint8_t *, ptrdiff_t, int); | |||||
| void ff_avg_pixels8_xy2_neon(uint8_t *, const uint8_t *, ptrdiff_t, int); | |||||
| void ff_avg_pixels16_x2_no_rnd_neon(uint8_t *, const uint8_t *, ptrdiff_t, int); | |||||
| void ff_avg_pixels16_y2_no_rnd_neon(uint8_t *, const uint8_t *, ptrdiff_t, int); | |||||
| void ff_avg_pixels16_xy2_no_rnd_neon(uint8_t *, const uint8_t *, ptrdiff_t, int); | |||||
| void ff_add_pixels_clamped_neon(const int16_t *, uint8_t *, int); | void ff_add_pixels_clamped_neon(const int16_t *, uint8_t *, int); | ||||
| void ff_put_pixels_clamped_neon(const int16_t *, uint8_t *, int); | void ff_put_pixels_clamped_neon(const int16_t *, uint8_t *, int); | ||||
| @@ -55,73 +55,73 @@ static void bfin_clear_blocks (int16_t *blocks) | |||||
| static void bfin_put_pixels8 (uint8_t *block, const uint8_t *pixels, int line_size, int h) | |||||
| static void bfin_put_pixels8 (uint8_t *block, const uint8_t *pixels, ptrdiff_t line_size, int h) | |||||
| { | { | ||||
| ff_bfin_put_pixels8uc (block, pixels, pixels, line_size, line_size, h); | ff_bfin_put_pixels8uc (block, pixels, pixels, line_size, line_size, h); | ||||
| } | } | ||||
| static void bfin_put_pixels8_x2(uint8_t *block, const uint8_t *pixels, int line_size, int h) | |||||
| static void bfin_put_pixels8_x2(uint8_t *block, const uint8_t *pixels, ptrdiff_t line_size, int h) | |||||
| { | { | ||||
| ff_bfin_put_pixels8uc (block, pixels, pixels+1, line_size, line_size, h); | ff_bfin_put_pixels8uc (block, pixels, pixels+1, line_size, line_size, h); | ||||
| } | } | ||||
| static void bfin_put_pixels8_y2 (uint8_t *block, const uint8_t *pixels, int line_size, int h) | |||||
| static void bfin_put_pixels8_y2 (uint8_t *block, const uint8_t *pixels, ptrdiff_t line_size, int h) | |||||
| { | { | ||||
| ff_bfin_put_pixels8uc (block, pixels, pixels+line_size, line_size, line_size, h); | ff_bfin_put_pixels8uc (block, pixels, pixels+line_size, line_size, line_size, h); | ||||
| } | } | ||||
| static void bfin_put_pixels8_xy2 (uint8_t *block, const uint8_t *s0, int line_size, int h) | |||||
| static void bfin_put_pixels8_xy2 (uint8_t *block, const uint8_t *s0, ptrdiff_t line_size, int h) | |||||
| { | { | ||||
| ff_bfin_z_put_pixels8_xy2 (block,s0,line_size, line_size, h); | ff_bfin_z_put_pixels8_xy2 (block,s0,line_size, line_size, h); | ||||
| } | } | ||||
| static void bfin_put_pixels16 (uint8_t *block, const uint8_t *pixels, int line_size, int h) | |||||
| static void bfin_put_pixels16 (uint8_t *block, const uint8_t *pixels, ptrdiff_t line_size, int h) | |||||
| { | { | ||||
| ff_bfin_put_pixels16uc (block, pixels, pixels, line_size, line_size, h); | ff_bfin_put_pixels16uc (block, pixels, pixels, line_size, line_size, h); | ||||
| } | } | ||||
| static void bfin_put_pixels16_x2 (uint8_t *block, const uint8_t *pixels, int line_size, int h) | |||||
| static void bfin_put_pixels16_x2 (uint8_t *block, const uint8_t *pixels, ptrdiff_t line_size, int h) | |||||
| { | { | ||||
| ff_bfin_put_pixels16uc (block, pixels, pixels+1, line_size, line_size, h); | ff_bfin_put_pixels16uc (block, pixels, pixels+1, line_size, line_size, h); | ||||
| } | } | ||||
| static void bfin_put_pixels16_y2 (uint8_t *block, const uint8_t *pixels, int line_size, int h) | |||||
| static void bfin_put_pixels16_y2 (uint8_t *block, const uint8_t *pixels, ptrdiff_t line_size, int h) | |||||
| { | { | ||||
| ff_bfin_put_pixels16uc (block, pixels, pixels+line_size, line_size, line_size, h); | ff_bfin_put_pixels16uc (block, pixels, pixels+line_size, line_size, line_size, h); | ||||
| } | } | ||||
| static void bfin_put_pixels16_xy2 (uint8_t *block, const uint8_t *s0, int line_size, int h) | |||||
| static void bfin_put_pixels16_xy2 (uint8_t *block, const uint8_t *s0, ptrdiff_t line_size, int h) | |||||
| { | { | ||||
| ff_bfin_z_put_pixels16_xy2 (block,s0,line_size, line_size, h); | ff_bfin_z_put_pixels16_xy2 (block,s0,line_size, line_size, h); | ||||
| } | } | ||||
| static void bfin_put_pixels8_nornd (uint8_t *block, const uint8_t *pixels, int line_size, int h) | |||||
| static void bfin_put_pixels8_nornd (uint8_t *block, const uint8_t *pixels, ptrdiff_t line_size, int h) | |||||
| { | { | ||||
| ff_bfin_put_pixels8uc_nornd (block, pixels, pixels, line_size, h); | ff_bfin_put_pixels8uc_nornd (block, pixels, pixels, line_size, h); | ||||
| } | } | ||||
| static void bfin_put_pixels8_x2_nornd (uint8_t *block, const uint8_t *pixels, int line_size, int h) | |||||
| static void bfin_put_pixels8_x2_nornd (uint8_t *block, const uint8_t *pixels, ptrdiff_t line_size, int h) | |||||
| { | { | ||||
| ff_bfin_put_pixels8uc_nornd (block, pixels, pixels+1, line_size, h); | ff_bfin_put_pixels8uc_nornd (block, pixels, pixels+1, line_size, h); | ||||
| } | } | ||||
| static void bfin_put_pixels8_y2_nornd (uint8_t *block, const uint8_t *pixels, int line_size, int h) | |||||
| static void bfin_put_pixels8_y2_nornd (uint8_t *block, const uint8_t *pixels, ptrdiff_t line_size, int h) | |||||
| { | { | ||||
| ff_bfin_put_pixels8uc_nornd (block, pixels, pixels+line_size, line_size, h); | ff_bfin_put_pixels8uc_nornd (block, pixels, pixels+line_size, line_size, h); | ||||
| } | } | ||||
| static void bfin_put_pixels16_nornd (uint8_t *block, const uint8_t *pixels, int line_size, int h) | |||||
| static void bfin_put_pixels16_nornd (uint8_t *block, const uint8_t *pixels, ptrdiff_t line_size, int h) | |||||
| { | { | ||||
| ff_bfin_put_pixels16uc_nornd (block, pixels, pixels, line_size, h); | ff_bfin_put_pixels16uc_nornd (block, pixels, pixels, line_size, h); | ||||
| } | } | ||||
| static void bfin_put_pixels16_x2_nornd (uint8_t *block, const uint8_t *pixels, int line_size, int h) | |||||
| static void bfin_put_pixels16_x2_nornd (uint8_t *block, const uint8_t *pixels, ptrdiff_t line_size, int h) | |||||
| { | { | ||||
| ff_bfin_put_pixels16uc_nornd (block, pixels, pixels+1, line_size, h); | ff_bfin_put_pixels16uc_nornd (block, pixels, pixels+1, line_size, h); | ||||
| } | } | ||||
| static void bfin_put_pixels16_y2_nornd (uint8_t *block, const uint8_t *pixels, int line_size, int h) | |||||
| static void bfin_put_pixels16_y2_nornd (uint8_t *block, const uint8_t *pixels, ptrdiff_t line_size, int h) | |||||
| { | { | ||||
| ff_bfin_put_pixels16uc_nornd (block, pixels, pixels+line_size, line_size, h); | ff_bfin_put_pixels16uc_nornd (block, pixels, pixels+line_size, line_size, h); | ||||
| } | } | ||||
| @@ -137,7 +137,7 @@ void clear_blocks_c(int16_t *blocks); | |||||
| /* add and put pixel (decoding) */ | /* add and put pixel (decoding) */ | ||||
| // blocksizes for op_pixels_func are 8x4,8x8 16x8 16x16 | // blocksizes for op_pixels_func are 8x4,8x8 16x8 16x16 | ||||
| //h for op_pixels_func is limited to {width/2, width} but never larger than 16 and never smaller than 4 | //h for op_pixels_func is limited to {width/2, width} but never larger than 16 and never smaller than 4 | ||||
| typedef void (*op_pixels_func)(uint8_t *block/*align width (8 or 16)*/, const uint8_t *pixels/*align 1*/, int line_size, int h); | |||||
| typedef void (*op_pixels_func)(uint8_t *block/*align width (8 or 16)*/, const uint8_t *pixels/*align 1*/, ptrdiff_t line_size, int h); | |||||
| typedef void (*tpel_mc_func)(uint8_t *block/*align width (8 or 16)*/, const uint8_t *pixels/*align 1*/, int line_size, int w, int h); | typedef void (*tpel_mc_func)(uint8_t *block/*align width (8 or 16)*/, const uint8_t *pixels/*align 1*/, int line_size, int w, int h); | ||||
| typedef void (*qpel_mc_func)(uint8_t *dst/*align width (8 or 16)*/, uint8_t *src/*align 1*/, int stride); | typedef void (*qpel_mc_func)(uint8_t *dst/*align width (8 or 16)*/, uint8_t *src/*align 1*/, int stride); | ||||
| typedef void (*h264_chroma_mc_func)(uint8_t *dst/*align 8*/, uint8_t *src/*align 1*/, int srcStride, int h, int x, int y); | typedef void (*h264_chroma_mc_func)(uint8_t *dst/*align 8*/, uint8_t *src/*align 1*/, int srcStride, int h, int x, int y); | ||||
| @@ -172,19 +172,19 @@ static inline void FUNC(OPNAME ## _no_rnd_pixels16_l2)(uint8_t *dst, const uint8 | |||||
| FUNC(OPNAME ## _no_rnd_pixels8_l2)(dst+8*sizeof(pixel), src1+8*sizeof(pixel), src2+8*sizeof(pixel), dst_stride, src_stride1, src_stride2, h);\ | FUNC(OPNAME ## _no_rnd_pixels8_l2)(dst+8*sizeof(pixel), src1+8*sizeof(pixel), src2+8*sizeof(pixel), dst_stride, src_stride1, src_stride2, h);\ | ||||
| }\ | }\ | ||||
| \ | \ | ||||
| static inline void FUNCC(OPNAME ## _no_rnd_pixels8_x2)(uint8_t *block, const uint8_t *pixels, int line_size, int h){\ | |||||
| static inline void FUNCC(OPNAME ## _no_rnd_pixels8_x2)(uint8_t *block, const uint8_t *pixels, ptrdiff_t line_size, int h){\ | |||||
| FUNC(OPNAME ## _no_rnd_pixels8_l2)(block, pixels, pixels+sizeof(pixel), line_size, line_size, line_size, h);\ | FUNC(OPNAME ## _no_rnd_pixels8_l2)(block, pixels, pixels+sizeof(pixel), line_size, line_size, line_size, h);\ | ||||
| }\ | }\ | ||||
| \ | \ | ||||
| static inline void FUNCC(OPNAME ## _pixels8_x2)(uint8_t *block, const uint8_t *pixels, int line_size, int h){\ | |||||
| static inline void FUNCC(OPNAME ## _pixels8_x2)(uint8_t *block, const uint8_t *pixels, ptrdiff_t line_size, int h){\ | |||||
| FUNC(OPNAME ## _pixels8_l2)(block, pixels, pixels+sizeof(pixel), line_size, line_size, line_size, h);\ | FUNC(OPNAME ## _pixels8_l2)(block, pixels, pixels+sizeof(pixel), line_size, line_size, line_size, h);\ | ||||
| }\ | }\ | ||||
| \ | \ | ||||
| static inline void FUNCC(OPNAME ## _no_rnd_pixels8_y2)(uint8_t *block, const uint8_t *pixels, int line_size, int h){\ | |||||
| static inline void FUNCC(OPNAME ## _no_rnd_pixels8_y2)(uint8_t *block, const uint8_t *pixels, ptrdiff_t line_size, int h){\ | |||||
| FUNC(OPNAME ## _no_rnd_pixels8_l2)(block, pixels, pixels+line_size, line_size, line_size, line_size, h);\ | FUNC(OPNAME ## _no_rnd_pixels8_l2)(block, pixels, pixels+line_size, line_size, line_size, line_size, h);\ | ||||
| }\ | }\ | ||||
| \ | \ | ||||
| static inline void FUNCC(OPNAME ## _pixels8_y2)(uint8_t *block, const uint8_t *pixels, int line_size, int h){\ | |||||
| static inline void FUNCC(OPNAME ## _pixels8_y2)(uint8_t *block, const uint8_t *pixels, ptrdiff_t line_size, int h){\ | |||||
| FUNC(OPNAME ## _pixels8_l2)(block, pixels, pixels+line_size, line_size, line_size, line_size, h);\ | FUNC(OPNAME ## _pixels8_l2)(block, pixels, pixels+line_size, line_size, line_size, line_size, h);\ | ||||
| }\ | }\ | ||||
| \ | \ | ||||
| @@ -225,19 +225,19 @@ static inline void FUNC(OPNAME ## _pixels8_l4)(uint8_t *dst, const uint8_t *src1 | |||||
| }\ | }\ | ||||
| }\ | }\ | ||||
| \ | \ | ||||
| static inline void FUNCC(OPNAME ## _pixels4_x2)(uint8_t *block, const uint8_t *pixels, int line_size, int h){\ | |||||
| static inline void FUNCC(OPNAME ## _pixels4_x2)(uint8_t *block, const uint8_t *pixels, ptrdiff_t line_size, int h){\ | |||||
| FUNC(OPNAME ## _pixels4_l2)(block, pixels, pixels+sizeof(pixel), line_size, line_size, line_size, h);\ | FUNC(OPNAME ## _pixels4_l2)(block, pixels, pixels+sizeof(pixel), line_size, line_size, line_size, h);\ | ||||
| }\ | }\ | ||||
| \ | \ | ||||
| static inline void FUNCC(OPNAME ## _pixels4_y2)(uint8_t *block, const uint8_t *pixels, int line_size, int h){\ | |||||
| static inline void FUNCC(OPNAME ## _pixels4_y2)(uint8_t *block, const uint8_t *pixels, ptrdiff_t line_size, int h){\ | |||||
| FUNC(OPNAME ## _pixels4_l2)(block, pixels, pixels+line_size, line_size, line_size, line_size, h);\ | FUNC(OPNAME ## _pixels4_l2)(block, pixels, pixels+line_size, line_size, line_size, line_size, h);\ | ||||
| }\ | }\ | ||||
| \ | \ | ||||
| static inline void FUNCC(OPNAME ## _pixels2_x2)(uint8_t *block, const uint8_t *pixels, int line_size, int h){\ | |||||
| static inline void FUNCC(OPNAME ## _pixels2_x2)(uint8_t *block, const uint8_t *pixels, ptrdiff_t line_size, int h){\ | |||||
| FUNC(OPNAME ## _pixels2_l2)(block, pixels, pixels+sizeof(pixel), line_size, line_size, line_size, h);\ | FUNC(OPNAME ## _pixels2_l2)(block, pixels, pixels+sizeof(pixel), line_size, line_size, line_size, h);\ | ||||
| }\ | }\ | ||||
| \ | \ | ||||
| static inline void FUNCC(OPNAME ## _pixels2_y2)(uint8_t *block, const uint8_t *pixels, int line_size, int h){\ | |||||
| static inline void FUNCC(OPNAME ## _pixels2_y2)(uint8_t *block, const uint8_t *pixels, ptrdiff_t line_size, int h){\ | |||||
| FUNC(OPNAME ## _pixels2_l2)(block, pixels, pixels+line_size, line_size, line_size, line_size, h);\ | FUNC(OPNAME ## _pixels2_l2)(block, pixels, pixels+line_size, line_size, line_size, line_size, h);\ | ||||
| }\ | }\ | ||||
| \ | \ | ||||
| @@ -288,7 +288,7 @@ static inline void FUNC(OPNAME ## _no_rnd_pixels16_l4)(uint8_t *dst, const uint8 | |||||
| FUNC(OPNAME ## _no_rnd_pixels8_l4)(dst+8*sizeof(pixel), src1+8*sizeof(pixel), src2+8*sizeof(pixel), src3+8*sizeof(pixel), src4+8*sizeof(pixel), dst_stride, src_stride1, src_stride2, src_stride3, src_stride4, h);\ | FUNC(OPNAME ## _no_rnd_pixels8_l4)(dst+8*sizeof(pixel), src1+8*sizeof(pixel), src2+8*sizeof(pixel), src3+8*sizeof(pixel), src4+8*sizeof(pixel), dst_stride, src_stride1, src_stride2, src_stride3, src_stride4, h);\ | ||||
| }\ | }\ | ||||
| \ | \ | ||||
| static inline void FUNCC(OPNAME ## _pixels2_xy2)(uint8_t *_block, const uint8_t *_pixels, int line_size, int h)\ | |||||
| static inline void FUNCC(OPNAME ## _pixels2_xy2)(uint8_t *_block, const uint8_t *_pixels, ptrdiff_t line_size, int h)\ | |||||
| {\ | {\ | ||||
| int i, a0, b0, a1, b1;\ | int i, a0, b0, a1, b1;\ | ||||
| pixel *block = (pixel*)_block;\ | pixel *block = (pixel*)_block;\ | ||||
| @@ -324,7 +324,7 @@ static inline void FUNCC(OPNAME ## _pixels2_xy2)(uint8_t *_block, const uint8_t | |||||
| }\ | }\ | ||||
| }\ | }\ | ||||
| \ | \ | ||||
| static inline void FUNCC(OPNAME ## _pixels4_xy2)(uint8_t *block, const uint8_t *pixels, int line_size, int h)\ | |||||
| static inline void FUNCC(OPNAME ## _pixels4_xy2)(uint8_t *block, const uint8_t *pixels, ptrdiff_t line_size, int h)\ | |||||
| {\ | {\ | ||||
| /* FIXME HIGH BIT DEPTH */\ | /* FIXME HIGH BIT DEPTH */\ | ||||
| int i;\ | int i;\ | ||||
| @@ -403,7 +403,7 @@ static inline void FUNCC(OPNAME ## _pixels8_xy2)(uint8_t *block, const uint8_t * | |||||
| }\ | }\ | ||||
| }\ | }\ | ||||
| \ | \ | ||||
| static inline void FUNCC(OPNAME ## _no_rnd_pixels8_xy2)(uint8_t *block, const uint8_t *pixels, int line_size, int h)\ | |||||
| static inline void FUNCC(OPNAME ## _no_rnd_pixels8_xy2)(uint8_t *block, const uint8_t *pixels, ptrdiff_t line_size, int h)\ | |||||
| {\ | {\ | ||||
| /* FIXME HIGH BIT DEPTH */\ | /* FIXME HIGH BIT DEPTH */\ | ||||
| int j;\ | int j;\ | ||||
| @@ -290,7 +290,7 @@ static int zero_cmp(void *s, uint8_t *a, uint8_t *b, int stride, int h){ | |||||
| return 0; | return 0; | ||||
| } | } | ||||
| static void zero_hpel(uint8_t *a, const uint8_t *b, int stride, int h){ | |||||
| static void zero_hpel(uint8_t *a, const uint8_t *b, ptrdiff_t stride, int h){ | |||||
| } | } | ||||
| int ff_init_me(MpegEncContext *s){ | int ff_init_me(MpegEncContext *s){ | ||||
| @@ -607,7 +607,7 @@ static void add_bytes_altivec(uint8_t *dst, uint8_t *src, int w) { | |||||
| } | } | ||||
| /* next one assumes that ((line_size % 16) == 0) */ | /* next one assumes that ((line_size % 16) == 0) */ | ||||
| void ff_put_pixels16_altivec(uint8_t *block, const uint8_t *pixels, int line_size, int h) | |||||
| void ff_put_pixels16_altivec(uint8_t *block, const uint8_t *pixels, ptrdiff_t line_size, int h) | |||||
| { | { | ||||
| register vector unsigned char pixelsv1, pixelsv2; | register vector unsigned char pixelsv1, pixelsv2; | ||||
| register vector unsigned char pixelsv1B, pixelsv2B; | register vector unsigned char pixelsv1B, pixelsv2B; | ||||
| @@ -616,9 +616,9 @@ void ff_put_pixels16_altivec(uint8_t *block, const uint8_t *pixels, int line_siz | |||||
| register vector unsigned char perm = vec_lvsl(0, pixels); | register vector unsigned char perm = vec_lvsl(0, pixels); | ||||
| int i; | int i; | ||||
| register int line_size_2 = line_size << 1; | |||||
| register int line_size_3 = line_size + line_size_2; | |||||
| register int line_size_4 = line_size << 2; | |||||
| register ptrdiff_t line_size_2 = line_size << 1; | |||||
| register ptrdiff_t line_size_3 = line_size + line_size_2; | |||||
| register ptrdiff_t line_size_4 = line_size << 2; | |||||
| // hand-unrolling the loop by 4 gains about 15% | // hand-unrolling the loop by 4 gains about 15% | ||||
| // mininum execution time goes from 74 to 60 cycles | // mininum execution time goes from 74 to 60 cycles | ||||
| @@ -649,7 +649,7 @@ void ff_put_pixels16_altivec(uint8_t *block, const uint8_t *pixels, int line_siz | |||||
| /* next one assumes that ((line_size % 16) == 0) */ | /* next one assumes that ((line_size % 16) == 0) */ | ||||
| #define op_avg(a,b) a = ( ((a)|(b)) - ((((a)^(b))&0xFEFEFEFEUL)>>1) ) | #define op_avg(a,b) a = ( ((a)|(b)) - ((((a)^(b))&0xFEFEFEFEUL)>>1) ) | ||||
| void ff_avg_pixels16_altivec(uint8_t *block, const uint8_t *pixels, int line_size, int h) | |||||
| void ff_avg_pixels16_altivec(uint8_t *block, const uint8_t *pixels, ptrdiff_t line_size, int h) | |||||
| { | { | ||||
| register vector unsigned char pixelsv1, pixelsv2, pixelsv, blockv; | register vector unsigned char pixelsv1, pixelsv2, pixelsv, blockv; | ||||
| register vector unsigned char perm = vec_lvsl(0, pixels); | register vector unsigned char perm = vec_lvsl(0, pixels); | ||||
| @@ -668,7 +668,7 @@ void ff_avg_pixels16_altivec(uint8_t *block, const uint8_t *pixels, int line_siz | |||||
| } | } | ||||
| /* next one assumes that ((line_size % 8) == 0) */ | /* next one assumes that ((line_size % 8) == 0) */ | ||||
| static void avg_pixels8_altivec(uint8_t * block, const uint8_t * pixels, int line_size, int h) | |||||
| static void avg_pixels8_altivec(uint8_t * block, const uint8_t * pixels, ptrdiff_t line_size, int h) | |||||
| { | { | ||||
| register vector unsigned char pixelsv1, pixelsv2, pixelsv, blockv; | register vector unsigned char pixelsv1, pixelsv2, pixelsv, blockv; | ||||
| int i; | int i; | ||||
| @@ -699,7 +699,7 @@ static void avg_pixels8_altivec(uint8_t * block, const uint8_t * pixels, int lin | |||||
| } | } | ||||
| /* next one assumes that ((line_size % 8) == 0) */ | /* next one assumes that ((line_size % 8) == 0) */ | ||||
| static void put_pixels8_xy2_altivec(uint8_t *block, const uint8_t *pixels, int line_size, int h) | |||||
| static void put_pixels8_xy2_altivec(uint8_t *block, const uint8_t *pixels, ptrdiff_t line_size, int h) | |||||
| { | { | ||||
| register int i; | register int i; | ||||
| register vector unsigned char pixelsv1, pixelsv2, pixelsavg; | register vector unsigned char pixelsv1, pixelsv2, pixelsavg; | ||||
| @@ -758,7 +758,7 @@ static void put_pixels8_xy2_altivec(uint8_t *block, const uint8_t *pixels, int l | |||||
| } | } | ||||
| /* next one assumes that ((line_size % 8) == 0) */ | /* next one assumes that ((line_size % 8) == 0) */ | ||||
| static void put_no_rnd_pixels8_xy2_altivec(uint8_t *block, const uint8_t *pixels, int line_size, int h) | |||||
| static void put_no_rnd_pixels8_xy2_altivec(uint8_t *block, const uint8_t *pixels, ptrdiff_t line_size, int h) | |||||
| { | { | ||||
| register int i; | register int i; | ||||
| register vector unsigned char pixelsv1, pixelsv2, pixelsavg; | register vector unsigned char pixelsv1, pixelsv2, pixelsavg; | ||||
| @@ -818,7 +818,7 @@ static void put_no_rnd_pixels8_xy2_altivec(uint8_t *block, const uint8_t *pixels | |||||
| } | } | ||||
| /* next one assumes that ((line_size % 16) == 0) */ | /* next one assumes that ((line_size % 16) == 0) */ | ||||
| static void put_pixels16_xy2_altivec(uint8_t * block, const uint8_t * pixels, int line_size, int h) | |||||
| static void put_pixels16_xy2_altivec(uint8_t * block, const uint8_t * pixels, ptrdiff_t line_size, int h) | |||||
| { | { | ||||
| register int i; | register int i; | ||||
| register vector unsigned char pixelsv1, pixelsv2, pixelsv3, pixelsv4; | register vector unsigned char pixelsv1, pixelsv2, pixelsv3, pixelsv4; | ||||
| @@ -886,7 +886,7 @@ static void put_pixels16_xy2_altivec(uint8_t * block, const uint8_t * pixels, in | |||||
| } | } | ||||
| /* next one assumes that ((line_size % 16) == 0) */ | /* next one assumes that ((line_size % 16) == 0) */ | ||||
| static void put_no_rnd_pixels16_xy2_altivec(uint8_t * block, const uint8_t * pixels, int line_size, int h) | |||||
| static void put_no_rnd_pixels16_xy2_altivec(uint8_t * block, const uint8_t * pixels, ptrdiff_t line_size, int h) | |||||
| { | { | ||||
| register int i; | register int i; | ||||
| register vector unsigned char pixelsv1, pixelsv2, pixelsv3, pixelsv4; | register vector unsigned char pixelsv1, pixelsv2, pixelsv3, pixelsv4; | ||||
| @@ -1284,7 +1284,7 @@ static int hadamard8_diff16_altivec(/*MpegEncContext*/ void *s, uint8_t *dst, ui | |||||
| } | } | ||||
| /* next one assumes that ((line_size % 8) == 0) */ | /* next one assumes that ((line_size % 8) == 0) */ | ||||
| static void avg_pixels8_xy2_altivec(uint8_t *block, const uint8_t *pixels, int line_size, int h) | |||||
| static void avg_pixels8_xy2_altivec(uint8_t *block, const uint8_t *pixels, ptrdiff_t line_size, int h) | |||||
| { | { | ||||
| register int i; | register int i; | ||||
| register vector unsigned char pixelsv1, pixelsv2, pixelsavg; | register vector unsigned char pixelsv1, pixelsv2, pixelsavg; | ||||
| @@ -26,9 +26,9 @@ | |||||
| #include <stdint.h> | #include <stdint.h> | ||||
| #include "libavcodec/dsputil.h" | #include "libavcodec/dsputil.h" | ||||
| void ff_put_pixels16_altivec(uint8_t *block, const uint8_t *pixels, int line_size, int h); | |||||
| void ff_put_pixels16_altivec(uint8_t *block, const uint8_t *pixels, ptrdiff_t line_size, int h); | |||||
| void ff_avg_pixels16_altivec(uint8_t *block, const uint8_t *pixels, int line_size, int h); | |||||
| void ff_avg_pixels16_altivec(uint8_t *block, const uint8_t *pixels, ptrdiff_t line_size, int h); | |||||
| void ff_fdct_altivec(int16_t *block); | void ff_fdct_altivec(int16_t *block); | ||||
| void ff_gmc1_altivec(uint8_t *dst, uint8_t *src, int stride, int h, | void ff_gmc1_altivec(uint8_t *dst, uint8_t *src, int stride, int h, | ||||
| @@ -262,7 +262,7 @@ if (sz==16) { \ | |||||
| #define DEFFUNC(op,rnd,xy,sz,OP_N,avgfunc) \ | #define DEFFUNC(op,rnd,xy,sz,OP_N,avgfunc) \ | ||||
| static void op##_##rnd##_pixels##sz##_##xy (uint8_t * dest, const uint8_t * ref, \ | static void op##_##rnd##_pixels##sz##_##xy (uint8_t * dest, const uint8_t * ref, \ | ||||
| const int stride, int height) \ | |||||
| const int ptrdiff_t, int height) \ | |||||
| { \ | { \ | ||||
| switch((int)ref&3) { \ | switch((int)ref&3) { \ | ||||
| case 0:OP_N##0(sz,rnd##_##avgfunc); return; \ | case 0:OP_N##0(sz,rnd##_##avgfunc); return; \ | ||||
| @@ -120,7 +120,7 @@ DECLARE_ALIGNED(8, static const int16_t, constants256_1024)[] = | |||||
| #define TMP32 58 | #define TMP32 58 | ||||
| static void MC_put_o_16_vis (uint8_t * dest, const uint8_t * ref, | static void MC_put_o_16_vis (uint8_t * dest, const uint8_t * ref, | ||||
| const int stride, int height) | |||||
| const ptrdiff_t line_size, int height) | |||||
| { | { | ||||
| ref = vis_alignaddr(ref); | ref = vis_alignaddr(ref); | ||||
| do { /* 5 cycles */ | do { /* 5 cycles */ | ||||
| @@ -141,7 +141,7 @@ static void MC_put_o_16_vis (uint8_t * dest, const uint8_t * ref, | |||||
| } | } | ||||
| static void MC_put_o_8_vis (uint8_t * dest, const uint8_t * ref, | static void MC_put_o_8_vis (uint8_t * dest, const uint8_t * ref, | ||||
| const int stride, int height) | |||||
| const ptrdiff_t line_size, int height) | |||||
| { | { | ||||
| ref = vis_alignaddr(ref); | ref = vis_alignaddr(ref); | ||||
| do { /* 4 cycles */ | do { /* 4 cycles */ | ||||
| @@ -160,7 +160,7 @@ static void MC_put_o_8_vis (uint8_t * dest, const uint8_t * ref, | |||||
| static void MC_avg_o_16_vis (uint8_t * dest, const uint8_t * ref, | static void MC_avg_o_16_vis (uint8_t * dest, const uint8_t * ref, | ||||
| const int stride, int height) | |||||
| const ptrdiff_t line_size, int height) | |||||
| { | { | ||||
| int stride_8 = stride + 8; | int stride_8 = stride + 8; | ||||
| @@ -320,7 +320,7 @@ static void MC_avg_o_16_vis (uint8_t * dest, const uint8_t * ref, | |||||
| } | } | ||||
| static void MC_avg_o_8_vis (uint8_t * dest, const uint8_t * ref, | static void MC_avg_o_8_vis (uint8_t * dest, const uint8_t * ref, | ||||
| const int stride, int height) | |||||
| const ptrdiff_t line_size, int height) | |||||
| { | { | ||||
| ref = vis_alignaddr(ref); | ref = vis_alignaddr(ref); | ||||
| @@ -412,7 +412,7 @@ static void MC_avg_o_8_vis (uint8_t * dest, const uint8_t * ref, | |||||
| } | } | ||||
| static void MC_put_x_16_vis (uint8_t * dest, const uint8_t * ref, | static void MC_put_x_16_vis (uint8_t * dest, const uint8_t * ref, | ||||
| const int stride, int height) | |||||
| const ptrdiff_t line_size, int height) | |||||
| { | { | ||||
| unsigned long off = (unsigned long) ref & 0x7; | unsigned long off = (unsigned long) ref & 0x7; | ||||
| unsigned long off_plus_1 = off + 1; | unsigned long off_plus_1 = off + 1; | ||||
| @@ -604,7 +604,7 @@ static void MC_put_x_16_vis (uint8_t * dest, const uint8_t * ref, | |||||
| } | } | ||||
| static void MC_put_x_8_vis (uint8_t * dest, const uint8_t * ref, | static void MC_put_x_8_vis (uint8_t * dest, const uint8_t * ref, | ||||
| const int stride, int height) | |||||
| const ptrdiff_t line_size, int height) | |||||
| { | { | ||||
| unsigned long off = (unsigned long) ref & 0x7; | unsigned long off = (unsigned long) ref & 0x7; | ||||
| unsigned long off_plus_1 = off + 1; | unsigned long off_plus_1 = off + 1; | ||||
| @@ -727,7 +727,7 @@ static void MC_put_x_8_vis (uint8_t * dest, const uint8_t * ref, | |||||
| } | } | ||||
| static void MC_avg_x_16_vis (uint8_t * dest, const uint8_t * ref, | static void MC_avg_x_16_vis (uint8_t * dest, const uint8_t * ref, | ||||
| const int stride, int height) | |||||
| const ptrdiff_t line_size, int height) | |||||
| { | { | ||||
| unsigned long off = (unsigned long) ref & 0x7; | unsigned long off = (unsigned long) ref & 0x7; | ||||
| unsigned long off_plus_1 = off + 1; | unsigned long off_plus_1 = off + 1; | ||||
| @@ -817,7 +817,7 @@ static void MC_avg_x_16_vis (uint8_t * dest, const uint8_t * ref, | |||||
| } | } | ||||
| static void MC_avg_x_8_vis (uint8_t * dest, const uint8_t * ref, | static void MC_avg_x_8_vis (uint8_t * dest, const uint8_t * ref, | ||||
| const int stride, int height) | |||||
| const ptrdiff_t line_size, int height) | |||||
| { | { | ||||
| unsigned long off = (unsigned long) ref & 0x7; | unsigned long off = (unsigned long) ref & 0x7; | ||||
| unsigned long off_plus_1 = off + 1; | unsigned long off_plus_1 = off + 1; | ||||
| @@ -982,7 +982,7 @@ static void MC_avg_x_8_vis (uint8_t * dest, const uint8_t * ref, | |||||
| } | } | ||||
| static void MC_put_y_16_vis (uint8_t * dest, const uint8_t * ref, | static void MC_put_y_16_vis (uint8_t * dest, const uint8_t * ref, | ||||
| const int stride, int height) | |||||
| const ptrdiff_t line_size, int height) | |||||
| { | { | ||||
| ref = vis_alignaddr(ref); | ref = vis_alignaddr(ref); | ||||
| vis_ld64(ref[0], TMP0); | vis_ld64(ref[0], TMP0); | ||||
| @@ -1136,7 +1136,7 @@ static void MC_put_y_16_vis (uint8_t * dest, const uint8_t * ref, | |||||
| } | } | ||||
| static void MC_put_y_8_vis (uint8_t * dest, const uint8_t * ref, | static void MC_put_y_8_vis (uint8_t * dest, const uint8_t * ref, | ||||
| const int stride, int height) | |||||
| const ptrdiff_t line_size, int height) | |||||
| { | { | ||||
| ref = vis_alignaddr(ref); | ref = vis_alignaddr(ref); | ||||
| vis_ld64(ref[0], TMP0); | vis_ld64(ref[0], TMP0); | ||||
| @@ -1226,7 +1226,7 @@ static void MC_put_y_8_vis (uint8_t * dest, const uint8_t * ref, | |||||
| } | } | ||||
| static void MC_avg_y_16_vis (uint8_t * dest, const uint8_t * ref, | static void MC_avg_y_16_vis (uint8_t * dest, const uint8_t * ref, | ||||
| const int stride, int height) | |||||
| const ptrdiff_t line_size, int height) | |||||
| { | { | ||||
| int stride_8 = stride + 8; | int stride_8 = stride + 8; | ||||
| int stride_16 = stride + 16; | int stride_16 = stride + 16; | ||||
| @@ -1354,7 +1354,7 @@ static void MC_avg_y_16_vis (uint8_t * dest, const uint8_t * ref, | |||||
| } | } | ||||
| static void MC_avg_y_8_vis (uint8_t * dest, const uint8_t * ref, | static void MC_avg_y_8_vis (uint8_t * dest, const uint8_t * ref, | ||||
| const int stride, int height) | |||||
| const ptrdiff_t line_size, int height) | |||||
| { | { | ||||
| int stride_8 = stride + 8; | int stride_8 = stride + 8; | ||||
| @@ -1433,7 +1433,7 @@ static void MC_avg_y_8_vis (uint8_t * dest, const uint8_t * ref, | |||||
| } | } | ||||
| static void MC_put_xy_16_vis (uint8_t * dest, const uint8_t * ref, | static void MC_put_xy_16_vis (uint8_t * dest, const uint8_t * ref, | ||||
| const int stride, int height) | |||||
| const ptrdiff_t line_size, int height) | |||||
| { | { | ||||
| unsigned long off = (unsigned long) ref & 0x7; | unsigned long off = (unsigned long) ref & 0x7; | ||||
| unsigned long off_plus_1 = off + 1; | unsigned long off_plus_1 = off + 1; | ||||
| @@ -1597,7 +1597,7 @@ static void MC_put_xy_16_vis (uint8_t * dest, const uint8_t * ref, | |||||
| } | } | ||||
| static void MC_put_xy_8_vis (uint8_t * dest, const uint8_t * ref, | static void MC_put_xy_8_vis (uint8_t * dest, const uint8_t * ref, | ||||
| const int stride, int height) | |||||
| const ptrdiff_t line_size, int height) | |||||
| { | { | ||||
| unsigned long off = (unsigned long) ref & 0x7; | unsigned long off = (unsigned long) ref & 0x7; | ||||
| unsigned long off_plus_1 = off + 1; | unsigned long off_plus_1 = off + 1; | ||||
| @@ -1701,7 +1701,7 @@ static void MC_put_xy_8_vis (uint8_t * dest, const uint8_t * ref, | |||||
| } | } | ||||
| static void MC_avg_xy_16_vis (uint8_t * dest, const uint8_t * ref, | static void MC_avg_xy_16_vis (uint8_t * dest, const uint8_t * ref, | ||||
| const int stride, int height) | |||||
| const ptrdiff_t line_size, int height) | |||||
| { | { | ||||
| unsigned long off = (unsigned long) ref & 0x7; | unsigned long off = (unsigned long) ref & 0x7; | ||||
| unsigned long off_plus_1 = off + 1; | unsigned long off_plus_1 = off + 1; | ||||
| @@ -1897,7 +1897,7 @@ static void MC_avg_xy_16_vis (uint8_t * dest, const uint8_t * ref, | |||||
| } | } | ||||
| static void MC_avg_xy_8_vis (uint8_t * dest, const uint8_t * ref, | static void MC_avg_xy_8_vis (uint8_t * dest, const uint8_t * ref, | ||||
| const int stride, int height) | |||||
| const ptrdiff_t line_size, int height) | |||||
| { | { | ||||
| unsigned long off = (unsigned long) ref & 0x7; | unsigned long off = (unsigned long) ref & 0x7; | ||||
| unsigned long off_plus_1 = off + 1; | unsigned long off_plus_1 = off + 1; | ||||
| @@ -2040,7 +2040,7 @@ static void MC_avg_xy_8_vis (uint8_t * dest, const uint8_t * ref, | |||||
| */ | */ | ||||
| static void MC_put_no_round_o_16_vis (uint8_t * dest, const uint8_t * ref, | static void MC_put_no_round_o_16_vis (uint8_t * dest, const uint8_t * ref, | ||||
| const int stride, int height) | |||||
| const ptrdiff_t line_size, int height) | |||||
| { | { | ||||
| ref = vis_alignaddr(ref); | ref = vis_alignaddr(ref); | ||||
| do { /* 5 cycles */ | do { /* 5 cycles */ | ||||
| @@ -2061,7 +2061,7 @@ static void MC_put_no_round_o_16_vis (uint8_t * dest, const uint8_t * ref, | |||||
| } | } | ||||
| static void MC_put_no_round_o_8_vis (uint8_t * dest, const uint8_t * ref, | static void MC_put_no_round_o_8_vis (uint8_t * dest, const uint8_t * ref, | ||||
| const int stride, int height) | |||||
| const ptrdiff_t line_size, int height) | |||||
| { | { | ||||
| ref = vis_alignaddr(ref); | ref = vis_alignaddr(ref); | ||||
| do { /* 4 cycles */ | do { /* 4 cycles */ | ||||
| @@ -2080,7 +2080,7 @@ static void MC_put_no_round_o_8_vis (uint8_t * dest, const uint8_t * ref, | |||||
| static void MC_avg_no_round_o_16_vis (uint8_t * dest, const uint8_t * ref, | static void MC_avg_no_round_o_16_vis (uint8_t * dest, const uint8_t * ref, | ||||
| const int stride, int height) | |||||
| const ptrdiff_t line_size, int height) | |||||
| { | { | ||||
| int stride_8 = stride + 8; | int stride_8 = stride + 8; | ||||
| @@ -2240,7 +2240,7 @@ static void MC_avg_no_round_o_16_vis (uint8_t * dest, const uint8_t * ref, | |||||
| } | } | ||||
| static void MC_put_no_round_x_16_vis (uint8_t * dest, const uint8_t * ref, | static void MC_put_no_round_x_16_vis (uint8_t * dest, const uint8_t * ref, | ||||
| const int stride, int height) | |||||
| const ptrdiff_t line_size, int height) | |||||
| { | { | ||||
| unsigned long off = (unsigned long) ref & 0x7; | unsigned long off = (unsigned long) ref & 0x7; | ||||
| unsigned long off_plus_1 = off + 1; | unsigned long off_plus_1 = off + 1; | ||||
| @@ -2432,7 +2432,7 @@ static void MC_put_no_round_x_16_vis (uint8_t * dest, const uint8_t * ref, | |||||
| } | } | ||||
| static void MC_put_no_round_x_8_vis (uint8_t * dest, const uint8_t * ref, | static void MC_put_no_round_x_8_vis (uint8_t * dest, const uint8_t * ref, | ||||
| const int stride, int height) | |||||
| const ptrdiff_t line_size, int height) | |||||
| { | { | ||||
| unsigned long off = (unsigned long) ref & 0x7; | unsigned long off = (unsigned long) ref & 0x7; | ||||
| unsigned long off_plus_1 = off + 1; | unsigned long off_plus_1 = off + 1; | ||||
| @@ -2555,7 +2555,7 @@ static void MC_put_no_round_x_8_vis (uint8_t * dest, const uint8_t * ref, | |||||
| } | } | ||||
| static void MC_avg_no_round_x_16_vis (uint8_t * dest, const uint8_t * ref, | static void MC_avg_no_round_x_16_vis (uint8_t * dest, const uint8_t * ref, | ||||
| const int stride, int height) | |||||
| const ptrdiff_t line_size, int height) | |||||
| { | { | ||||
| unsigned long off = (unsigned long) ref & 0x7; | unsigned long off = (unsigned long) ref & 0x7; | ||||
| unsigned long off_plus_1 = off + 1; | unsigned long off_plus_1 = off + 1; | ||||
| @@ -2645,7 +2645,7 @@ static void MC_avg_no_round_x_16_vis (uint8_t * dest, const uint8_t * ref, | |||||
| } | } | ||||
| static void MC_put_no_round_y_16_vis (uint8_t * dest, const uint8_t * ref, | static void MC_put_no_round_y_16_vis (uint8_t * dest, const uint8_t * ref, | ||||
| const int stride, int height) | |||||
| const ptrdiff_t line_size, int height) | |||||
| { | { | ||||
| ref = vis_alignaddr(ref); | ref = vis_alignaddr(ref); | ||||
| vis_ld64(ref[0], TMP0); | vis_ld64(ref[0], TMP0); | ||||
| @@ -2799,7 +2799,7 @@ static void MC_put_no_round_y_16_vis (uint8_t * dest, const uint8_t * ref, | |||||
| } | } | ||||
| static void MC_put_no_round_y_8_vis (uint8_t * dest, const uint8_t * ref, | static void MC_put_no_round_y_8_vis (uint8_t * dest, const uint8_t * ref, | ||||
| const int stride, int height) | |||||
| const ptrdiff_t line_size, int height) | |||||
| { | { | ||||
| ref = vis_alignaddr(ref); | ref = vis_alignaddr(ref); | ||||
| vis_ld64(ref[0], TMP0); | vis_ld64(ref[0], TMP0); | ||||
| @@ -2889,7 +2889,7 @@ static void MC_put_no_round_y_8_vis (uint8_t * dest, const uint8_t * ref, | |||||
| } | } | ||||
| static void MC_avg_no_round_y_16_vis (uint8_t * dest, const uint8_t * ref, | static void MC_avg_no_round_y_16_vis (uint8_t * dest, const uint8_t * ref, | ||||
| const int stride, int height) | |||||
| const ptrdiff_t line_size, int height) | |||||
| { | { | ||||
| int stride_8 = stride + 8; | int stride_8 = stride + 8; | ||||
| int stride_16 = stride + 16; | int stride_16 = stride + 16; | ||||
| @@ -3017,7 +3017,7 @@ static void MC_avg_no_round_y_16_vis (uint8_t * dest, const uint8_t * ref, | |||||
| } | } | ||||
| static void MC_put_no_round_xy_16_vis (uint8_t * dest, const uint8_t * ref, | static void MC_put_no_round_xy_16_vis (uint8_t * dest, const uint8_t * ref, | ||||
| const int stride, int height) | |||||
| const ptrdiff_t line_size, int height) | |||||
| { | { | ||||
| unsigned long off = (unsigned long) ref & 0x7; | unsigned long off = (unsigned long) ref & 0x7; | ||||
| unsigned long off_plus_1 = off + 1; | unsigned long off_plus_1 = off + 1; | ||||
| @@ -3181,7 +3181,7 @@ static void MC_put_no_round_xy_16_vis (uint8_t * dest, const uint8_t * ref, | |||||
| } | } | ||||
| static void MC_put_no_round_xy_8_vis (uint8_t * dest, const uint8_t * ref, | static void MC_put_no_round_xy_8_vis (uint8_t * dest, const uint8_t * ref, | ||||
| const int stride, int height) | |||||
| const ptrdiff_t line_size, int height) | |||||
| { | { | ||||
| unsigned long off = (unsigned long) ref & 0x7; | unsigned long off = (unsigned long) ref & 0x7; | ||||
| unsigned long off_plus_1 = off + 1; | unsigned long off_plus_1 = off + 1; | ||||
| @@ -3285,7 +3285,7 @@ static void MC_put_no_round_xy_8_vis (uint8_t * dest, const uint8_t * ref, | |||||
| } | } | ||||
| static void MC_avg_no_round_xy_16_vis (uint8_t * dest, const uint8_t * ref, | static void MC_avg_no_round_xy_16_vis (uint8_t * dest, const uint8_t * ref, | ||||
| const int stride, int height) | |||||
| const ptrdiff_t line_size, int height) | |||||
| { | { | ||||
| unsigned long off = (unsigned long) ref & 0x7; | unsigned long off = (unsigned long) ref & 0x7; | ||||
| unsigned long off_plus_1 = off + 1; | unsigned long off_plus_1 = off + 1; | ||||
| @@ -27,14 +27,14 @@ | |||||
| //FIXME the following could be optimized too ... | //FIXME the following could be optimized too ... | ||||
| static void DEF(ff_put_no_rnd_pixels16_x2)(uint8_t *block, | static void DEF(ff_put_no_rnd_pixels16_x2)(uint8_t *block, | ||||
| const uint8_t *pixels, | const uint8_t *pixels, | ||||
| int line_size, int h) | |||||
| ptrdiff_t line_size, int h) | |||||
| { | { | ||||
| DEF(ff_put_no_rnd_pixels8_x2)(block, pixels, line_size, h); | DEF(ff_put_no_rnd_pixels8_x2)(block, pixels, line_size, h); | ||||
| DEF(ff_put_no_rnd_pixels8_x2)(block + 8, pixels + 8, line_size, h); | DEF(ff_put_no_rnd_pixels8_x2)(block + 8, pixels + 8, line_size, h); | ||||
| } | } | ||||
| static void DEF(ff_put_pixels16_y2)(uint8_t *block, const uint8_t *pixels, | static void DEF(ff_put_pixels16_y2)(uint8_t *block, const uint8_t *pixels, | ||||
| int line_size, int h) | |||||
| ptrdiff_t line_size, int h) | |||||
| { | { | ||||
| DEF(ff_put_pixels8_y2)(block, pixels, line_size, h); | DEF(ff_put_pixels8_y2)(block, pixels, line_size, h); | ||||
| DEF(ff_put_pixels8_y2)(block + 8, pixels + 8, line_size, h); | DEF(ff_put_pixels8_y2)(block + 8, pixels + 8, line_size, h); | ||||
| @@ -42,35 +42,35 @@ static void DEF(ff_put_pixels16_y2)(uint8_t *block, const uint8_t *pixels, | |||||
| static void DEF(ff_put_no_rnd_pixels16_y2)(uint8_t *block, | static void DEF(ff_put_no_rnd_pixels16_y2)(uint8_t *block, | ||||
| const uint8_t *pixels, | const uint8_t *pixels, | ||||
| int line_size, int h) | |||||
| ptrdiff_t line_size, int h) | |||||
| { | { | ||||
| DEF(ff_put_no_rnd_pixels8_y2)(block, pixels, line_size, h); | DEF(ff_put_no_rnd_pixels8_y2)(block, pixels, line_size, h); | ||||
| DEF(ff_put_no_rnd_pixels8_y2)(block + 8, pixels + 8, line_size, h); | DEF(ff_put_no_rnd_pixels8_y2)(block + 8, pixels + 8, line_size, h); | ||||
| } | } | ||||
| static void DEF(ff_avg_pixels16)(uint8_t *block, const uint8_t *pixels, | static void DEF(ff_avg_pixels16)(uint8_t *block, const uint8_t *pixels, | ||||
| int line_size, int h) | |||||
| ptrdiff_t line_size, int h) | |||||
| { | { | ||||
| DEF(ff_avg_pixels8)(block, pixels, line_size, h); | DEF(ff_avg_pixels8)(block, pixels, line_size, h); | ||||
| DEF(ff_avg_pixels8)(block + 8, pixels + 8, line_size, h); | DEF(ff_avg_pixels8)(block + 8, pixels + 8, line_size, h); | ||||
| } | } | ||||
| static void DEF(ff_avg_pixels16_x2)(uint8_t *block, const uint8_t *pixels, | static void DEF(ff_avg_pixels16_x2)(uint8_t *block, const uint8_t *pixels, | ||||
| int line_size, int h) | |||||
| ptrdiff_t line_size, int h) | |||||
| { | { | ||||
| DEF(ff_avg_pixels8_x2)(block, pixels, line_size, h); | DEF(ff_avg_pixels8_x2)(block, pixels, line_size, h); | ||||
| DEF(ff_avg_pixels8_x2)(block + 8, pixels + 8, line_size, h); | DEF(ff_avg_pixels8_x2)(block + 8, pixels + 8, line_size, h); | ||||
| } | } | ||||
| static void DEF(ff_avg_pixels16_y2)(uint8_t *block, const uint8_t *pixels, | static void DEF(ff_avg_pixels16_y2)(uint8_t *block, const uint8_t *pixels, | ||||
| int line_size, int h) | |||||
| ptrdiff_t line_size, int h) | |||||
| { | { | ||||
| DEF(ff_avg_pixels8_y2)(block, pixels, line_size, h); | DEF(ff_avg_pixels8_y2)(block, pixels, line_size, h); | ||||
| DEF(ff_avg_pixels8_y2)(block + 8, pixels + 8, line_size, h); | DEF(ff_avg_pixels8_y2)(block + 8, pixels + 8, line_size, h); | ||||
| } | } | ||||
| static void DEF(ff_avg_pixels16_xy2)(uint8_t *block, const uint8_t *pixels, | static void DEF(ff_avg_pixels16_xy2)(uint8_t *block, const uint8_t *pixels, | ||||
| int line_size, int h) | |||||
| ptrdiff_t line_size, int h) | |||||
| { | { | ||||
| DEF(ff_avg_pixels8_xy2)(block, pixels, line_size, h); | DEF(ff_avg_pixels8_xy2)(block, pixels, line_size, h); | ||||
| DEF(ff_avg_pixels8_xy2)(block + 8, pixels + 8, line_size, h); | DEF(ff_avg_pixels8_xy2)(block + 8, pixels + 8, line_size, h); | ||||
| @@ -83,9 +83,9 @@ DECLARE_ALIGNED(16, const double, ff_pd_2)[2] = { 2.0, 2.0 }; | |||||
| #if HAVE_YASM | #if HAVE_YASM | ||||
| void ff_put_pixels8_x2_mmxext(uint8_t *block, const uint8_t *pixels, | void ff_put_pixels8_x2_mmxext(uint8_t *block, const uint8_t *pixels, | ||||
| int line_size, int h); | |||||
| ptrdiff_t line_size, int h); | |||||
| void ff_put_pixels8_x2_3dnow(uint8_t *block, const uint8_t *pixels, | void ff_put_pixels8_x2_3dnow(uint8_t *block, const uint8_t *pixels, | ||||
| int line_size, int h); | |||||
| ptrdiff_t line_size, int h); | |||||
| void ff_put_pixels8_l2_mmxext(uint8_t *dst, uint8_t *src1, uint8_t *src2, | void ff_put_pixels8_l2_mmxext(uint8_t *dst, uint8_t *src1, uint8_t *src2, | ||||
| int dstStride, int src1Stride, int h); | int dstStride, int src1Stride, int h); | ||||
| void ff_put_no_rnd_pixels8_l2_mmxext(uint8_t *dst, uint8_t *src1, | void ff_put_no_rnd_pixels8_l2_mmxext(uint8_t *dst, uint8_t *src1, | ||||
| @@ -94,9 +94,9 @@ void ff_put_no_rnd_pixels8_l2_mmxext(uint8_t *dst, uint8_t *src1, | |||||
| void ff_avg_pixels8_l2_mmxext(uint8_t *dst, uint8_t *src1, uint8_t *src2, | void ff_avg_pixels8_l2_mmxext(uint8_t *dst, uint8_t *src1, uint8_t *src2, | ||||
| int dstStride, int src1Stride, int h); | int dstStride, int src1Stride, int h); | ||||
| void ff_put_pixels16_x2_mmxext(uint8_t *block, const uint8_t *pixels, | void ff_put_pixels16_x2_mmxext(uint8_t *block, const uint8_t *pixels, | ||||
| int line_size, int h); | |||||
| ptrdiff_t line_size, int h); | |||||
| void ff_put_pixels16_x2_3dnow(uint8_t *block, const uint8_t *pixels, | void ff_put_pixels16_x2_3dnow(uint8_t *block, const uint8_t *pixels, | ||||
| int line_size, int h); | |||||
| ptrdiff_t line_size, int h); | |||||
| void ff_put_pixels16_l2_mmxext(uint8_t *dst, uint8_t *src1, uint8_t *src2, | void ff_put_pixels16_l2_mmxext(uint8_t *dst, uint8_t *src1, uint8_t *src2, | ||||
| int dstStride, int src1Stride, int h); | int dstStride, int src1Stride, int h); | ||||
| void ff_avg_pixels16_l2_mmxext(uint8_t *dst, uint8_t *src1, uint8_t *src2, | void ff_avg_pixels16_l2_mmxext(uint8_t *dst, uint8_t *src1, uint8_t *src2, | ||||
| @@ -104,47 +104,47 @@ void ff_avg_pixels16_l2_mmxext(uint8_t *dst, uint8_t *src1, uint8_t *src2, | |||||
| void ff_put_no_rnd_pixels16_l2_mmxext(uint8_t *dst, uint8_t *src1, uint8_t *src2, | void ff_put_no_rnd_pixels16_l2_mmxext(uint8_t *dst, uint8_t *src1, uint8_t *src2, | ||||
| int dstStride, int src1Stride, int h); | int dstStride, int src1Stride, int h); | ||||
| void ff_put_no_rnd_pixels8_x2_mmxext(uint8_t *block, const uint8_t *pixels, | void ff_put_no_rnd_pixels8_x2_mmxext(uint8_t *block, const uint8_t *pixels, | ||||
| int line_size, int h); | |||||
| ptrdiff_t line_size, int h); | |||||
| void ff_put_no_rnd_pixels8_x2_3dnow(uint8_t *block, const uint8_t *pixels, | void ff_put_no_rnd_pixels8_x2_3dnow(uint8_t *block, const uint8_t *pixels, | ||||
| int line_size, int h); | |||||
| ptrdiff_t line_size, int h); | |||||
| void ff_put_no_rnd_pixels8_x2_exact_mmxext(uint8_t *block, | void ff_put_no_rnd_pixels8_x2_exact_mmxext(uint8_t *block, | ||||
| const uint8_t *pixels, | const uint8_t *pixels, | ||||
| int line_size, int h); | |||||
| ptrdiff_t line_size, int h); | |||||
| void ff_put_no_rnd_pixels8_x2_exact_3dnow(uint8_t *block, | void ff_put_no_rnd_pixels8_x2_exact_3dnow(uint8_t *block, | ||||
| const uint8_t *pixels, | const uint8_t *pixels, | ||||
| int line_size, int h); | |||||
| ptrdiff_t line_size, int h); | |||||
| void ff_put_pixels8_y2_mmxext(uint8_t *block, const uint8_t *pixels, | void ff_put_pixels8_y2_mmxext(uint8_t *block, const uint8_t *pixels, | ||||
| int line_size, int h); | |||||
| ptrdiff_t line_size, int h); | |||||
| void ff_put_pixels8_y2_3dnow(uint8_t *block, const uint8_t *pixels, | void ff_put_pixels8_y2_3dnow(uint8_t *block, const uint8_t *pixels, | ||||
| int line_size, int h); | |||||
| ptrdiff_t line_size, int h); | |||||
| void ff_put_no_rnd_pixels8_y2_mmxext(uint8_t *block, const uint8_t *pixels, | void ff_put_no_rnd_pixels8_y2_mmxext(uint8_t *block, const uint8_t *pixels, | ||||
| int line_size, int h); | |||||
| ptrdiff_t line_size, int h); | |||||
| void ff_put_no_rnd_pixels8_y2_3dnow(uint8_t *block, const uint8_t *pixels, | void ff_put_no_rnd_pixels8_y2_3dnow(uint8_t *block, const uint8_t *pixels, | ||||
| int line_size, int h); | |||||
| ptrdiff_t line_size, int h); | |||||
| void ff_put_no_rnd_pixels8_y2_exact_mmxext(uint8_t *block, | void ff_put_no_rnd_pixels8_y2_exact_mmxext(uint8_t *block, | ||||
| const uint8_t *pixels, | const uint8_t *pixels, | ||||
| int line_size, int h); | |||||
| ptrdiff_t line_size, int h); | |||||
| void ff_put_no_rnd_pixels8_y2_exact_3dnow(uint8_t *block, | void ff_put_no_rnd_pixels8_y2_exact_3dnow(uint8_t *block, | ||||
| const uint8_t *pixels, | const uint8_t *pixels, | ||||
| int line_size, int h); | |||||
| ptrdiff_t line_size, int h); | |||||
| void ff_avg_pixels8_mmxext(uint8_t *block, const uint8_t *pixels, | void ff_avg_pixels8_mmxext(uint8_t *block, const uint8_t *pixels, | ||||
| int line_size, int h); | |||||
| ptrdiff_t line_size, int h); | |||||
| void ff_avg_pixels8_3dnow(uint8_t *block, const uint8_t *pixels, | void ff_avg_pixels8_3dnow(uint8_t *block, const uint8_t *pixels, | ||||
| int line_size, int h); | |||||
| ptrdiff_t line_size, int h); | |||||
| void ff_avg_pixels8_x2_mmxext(uint8_t *block, const uint8_t *pixels, | void ff_avg_pixels8_x2_mmxext(uint8_t *block, const uint8_t *pixels, | ||||
| int line_size, int h); | |||||
| ptrdiff_t line_size, int h); | |||||
| void ff_avg_pixels8_x2_3dnow(uint8_t *block, const uint8_t *pixels, | void ff_avg_pixels8_x2_3dnow(uint8_t *block, const uint8_t *pixels, | ||||
| int line_size, int h); | |||||
| ptrdiff_t line_size, int h); | |||||
| void ff_avg_pixels8_y2_mmxext(uint8_t *block, const uint8_t *pixels, | void ff_avg_pixels8_y2_mmxext(uint8_t *block, const uint8_t *pixels, | ||||
| int line_size, int h); | |||||
| ptrdiff_t line_size, int h); | |||||
| void ff_avg_pixels8_y2_3dnow(uint8_t *block, const uint8_t *pixels, | void ff_avg_pixels8_y2_3dnow(uint8_t *block, const uint8_t *pixels, | ||||
| int line_size, int h); | |||||
| ptrdiff_t line_size, int h); | |||||
| void ff_avg_pixels8_xy2_mmxext(uint8_t *block, const uint8_t *pixels, | void ff_avg_pixels8_xy2_mmxext(uint8_t *block, const uint8_t *pixels, | ||||
| int line_size, int h); | |||||
| ptrdiff_t line_size, int h); | |||||
| void ff_avg_pixels8_xy2_3dnow(uint8_t *block, const uint8_t *pixels, | void ff_avg_pixels8_xy2_3dnow(uint8_t *block, const uint8_t *pixels, | ||||
| int line_size, int h); | |||||
| ptrdiff_t line_size, int h); | |||||
| void ff_put_pixels8_mmxext(uint8_t *block, const uint8_t *pixels, int line_size, int h); | |||||
| void ff_put_pixels8_mmxext(uint8_t *block, const uint8_t *pixels, ptrdiff_t line_size, int h); | |||||
| static void ff_put_pixels16_mmxext(uint8_t *block, const uint8_t *pixels, | static void ff_put_pixels16_mmxext(uint8_t *block, const uint8_t *pixels, | ||||
| int line_size, int h) | int line_size, int h) | ||||
| { | { | ||||
| @@ -1455,9 +1455,9 @@ static void gmc_mmx(uint8_t *dst, uint8_t *src, | |||||
| #endif /* HAVE_INLINE_ASM */ | #endif /* HAVE_INLINE_ASM */ | ||||
| void ff_put_pixels16_sse2(uint8_t *block, const uint8_t *pixels, | void ff_put_pixels16_sse2(uint8_t *block, const uint8_t *pixels, | ||||
| int line_size, int h); | |||||
| ptrdiff_t line_size, int h); | |||||
| void ff_avg_pixels16_sse2(uint8_t *block, const uint8_t *pixels, | void ff_avg_pixels16_sse2(uint8_t *block, const uint8_t *pixels, | ||||
| int line_size, int h); | |||||
| ptrdiff_t line_size, int h); | |||||
| void ff_put_h264_chroma_mc8_rnd_mmx (uint8_t *dst, uint8_t *src, | void ff_put_h264_chroma_mc8_rnd_mmx (uint8_t *dst, uint8_t *src, | ||||
| int stride, int h, int x, int y); | int stride, int h, int x, int y); | ||||
| @@ -25,7 +25,7 @@ | |||||
| */ | */ | ||||
| // put_pixels | // put_pixels | ||||
| static void DEF(put, pixels8_x2)(uint8_t *block, const uint8_t *pixels, int line_size, int h) | |||||
| static void DEF(put, pixels8_x2)(uint8_t *block, const uint8_t *pixels, ptrdiff_t line_size, int h) | |||||
| { | { | ||||
| MOVQ_BFE(mm6); | MOVQ_BFE(mm6); | ||||
| __asm__ volatile( | __asm__ volatile( | ||||
| @@ -107,7 +107,7 @@ static void av_unused DEF(put, pixels8_l2)(uint8_t *dst, uint8_t *src1, uint8_t | |||||
| :"memory"); | :"memory"); | ||||
| } | } | ||||
| static void DEF(put, pixels16_x2)(uint8_t *block, const uint8_t *pixels, int line_size, int h) | |||||
| static void DEF(put, pixels16_x2)(uint8_t *block, const uint8_t *pixels, ptrdiff_t line_size, int h) | |||||
| { | { | ||||
| MOVQ_BFE(mm6); | MOVQ_BFE(mm6); | ||||
| __asm__ volatile( | __asm__ volatile( | ||||
| @@ -202,7 +202,7 @@ static void av_unused DEF(put, pixels16_l2)(uint8_t *dst, uint8_t *src1, uint8_t | |||||
| :"memory"); | :"memory"); | ||||
| } | } | ||||
| static void DEF(put, pixels8_y2)(uint8_t *block, const uint8_t *pixels, int line_size, int h) | |||||
| static void DEF(put, pixels8_y2)(uint8_t *block, const uint8_t *pixels, ptrdiff_t line_size, int h) | |||||
| { | { | ||||
| MOVQ_BFE(mm6); | MOVQ_BFE(mm6); | ||||
| __asm__ volatile( | __asm__ volatile( | ||||
| @@ -231,7 +231,7 @@ static void DEF(put, pixels8_y2)(uint8_t *block, const uint8_t *pixels, int line | |||||
| :REG_a, "memory"); | :REG_a, "memory"); | ||||
| } | } | ||||
| static void DEF(put, pixels8_xy2)(uint8_t *block, const uint8_t *pixels, int line_size, int h) | |||||
| static void DEF(put, pixels8_xy2)(uint8_t *block, const uint8_t *pixels, ptrdiff_t line_size, int h) | |||||
| { | { | ||||
| MOVQ_ZERO(mm7); | MOVQ_ZERO(mm7); | ||||
| SET_RND(mm6); // =2 for rnd and =1 for no_rnd version | SET_RND(mm6); // =2 for rnd and =1 for no_rnd version | ||||
| @@ -298,7 +298,7 @@ static void DEF(put, pixels8_xy2)(uint8_t *block, const uint8_t *pixels, int lin | |||||
| } | } | ||||
| // avg_pixels | // avg_pixels | ||||
| static void av_unused DEF(avg, pixels4)(uint8_t *block, const uint8_t *pixels, int line_size, int h) | |||||
| static void av_unused DEF(avg, pixels4)(uint8_t *block, const uint8_t *pixels, ptrdiff_t line_size, int h) | |||||
| { | { | ||||
| MOVQ_BFE(mm6); | MOVQ_BFE(mm6); | ||||
| JUMPALIGN(); | JUMPALIGN(); | ||||
| @@ -319,7 +319,7 @@ static void av_unused DEF(avg, pixels4)(uint8_t *block, const uint8_t *pixels, i | |||||
| #ifndef NO_RND | #ifndef NO_RND | ||||
| // in case more speed is needed - unroling would certainly help | // in case more speed is needed - unroling would certainly help | ||||
| static void DEF(avg, pixels8)(uint8_t *block, const uint8_t *pixels, int line_size, int h) | |||||
| static void DEF(avg, pixels8)(uint8_t *block, const uint8_t *pixels, ptrdiff_t line_size, int h) | |||||
| { | { | ||||
| MOVQ_BFE(mm6); | MOVQ_BFE(mm6); | ||||
| JUMPALIGN(); | JUMPALIGN(); | ||||
| @@ -339,7 +339,7 @@ static void DEF(avg, pixels8)(uint8_t *block, const uint8_t *pixels, int line_si | |||||
| } | } | ||||
| #endif // NO_RND | #endif // NO_RND | ||||
| static void DEF(avg, pixels16)(uint8_t *block, const uint8_t *pixels, int line_size, int h) | |||||
| static void DEF(avg, pixels16)(uint8_t *block, const uint8_t *pixels, ptrdiff_t line_size, int h) | |||||
| { | { | ||||
| MOVQ_BFE(mm6); | MOVQ_BFE(mm6); | ||||
| JUMPALIGN(); | JUMPALIGN(); | ||||
| @@ -363,7 +363,7 @@ static void DEF(avg, pixels16)(uint8_t *block, const uint8_t *pixels, int line_s | |||||
| } | } | ||||
| #ifndef NO_RND | #ifndef NO_RND | ||||
| static void DEF(avg, pixels8_x2)(uint8_t *block, const uint8_t *pixels, int line_size, int h) | |||||
| static void DEF(avg, pixels8_x2)(uint8_t *block, const uint8_t *pixels, ptrdiff_t line_size, int h) | |||||
| { | { | ||||
| MOVQ_BFE(mm6); | MOVQ_BFE(mm6); | ||||
| JUMPALIGN(); | JUMPALIGN(); | ||||
| @@ -405,7 +405,7 @@ static av_unused void DEF(avg, pixels8_l2)(uint8_t *dst, uint8_t *src1, uint8_t | |||||
| } while (--h); | } while (--h); | ||||
| } | } | ||||
| static void DEF(avg, pixels16_x2)(uint8_t *block, const uint8_t *pixels, int line_size, int h) | |||||
| static void DEF(avg, pixels16_x2)(uint8_t *block, const uint8_t *pixels, ptrdiff_t line_size, int h) | |||||
| { | { | ||||
| MOVQ_BFE(mm6); | MOVQ_BFE(mm6); | ||||
| JUMPALIGN(); | JUMPALIGN(); | ||||
| @@ -458,7 +458,7 @@ static av_unused void DEF(avg, pixels16_l2)(uint8_t *dst, uint8_t *src1, uint8_t | |||||
| } while (--h); | } while (--h); | ||||
| } | } | ||||
| static void DEF(avg, pixels8_y2)(uint8_t *block, const uint8_t *pixels, int line_size, int h) | |||||
| static void DEF(avg, pixels8_y2)(uint8_t *block, const uint8_t *pixels, ptrdiff_t line_size, int h) | |||||
| { | { | ||||
| MOVQ_BFE(mm6); | MOVQ_BFE(mm6); | ||||
| __asm__ volatile( | __asm__ volatile( | ||||
| @@ -498,7 +498,7 @@ static void DEF(avg, pixels8_y2)(uint8_t *block, const uint8_t *pixels, int line | |||||
| } | } | ||||
| // this routine is 'slightly' suboptimal but mostly unused | // this routine is 'slightly' suboptimal but mostly unused | ||||
| static void DEF(avg, pixels8_xy2)(uint8_t *block, const uint8_t *pixels, int line_size, int h) | |||||
| static void DEF(avg, pixels8_xy2)(uint8_t *block, const uint8_t *pixels, ptrdiff_t line_size, int h) | |||||
| { | { | ||||
| MOVQ_ZERO(mm7); | MOVQ_ZERO(mm7); | ||||
| SET_RND(mm6); // =2 for rnd and =1 for no_rnd version | SET_RND(mm6); // =2 for rnd and =1 for no_rnd version | ||||
| @@ -573,22 +573,22 @@ static void DEF(avg, pixels8_xy2)(uint8_t *block, const uint8_t *pixels, int lin | |||||
| } | } | ||||
| //FIXME optimize | //FIXME optimize | ||||
| static void DEF(put, pixels16_y2)(uint8_t *block, const uint8_t *pixels, int line_size, int h){ | |||||
| static void DEF(put, pixels16_y2)(uint8_t *block, const uint8_t *pixels, ptrdiff_t line_size, int h){ | |||||
| DEF(put, pixels8_y2)(block , pixels , line_size, h); | DEF(put, pixels8_y2)(block , pixels , line_size, h); | ||||
| DEF(put, pixels8_y2)(block+8, pixels+8, line_size, h); | DEF(put, pixels8_y2)(block+8, pixels+8, line_size, h); | ||||
| } | } | ||||
| static void DEF(put, pixels16_xy2)(uint8_t *block, const uint8_t *pixels, int line_size, int h){ | |||||
| static void DEF(put, pixels16_xy2)(uint8_t *block, const uint8_t *pixels, ptrdiff_t line_size, int h){ | |||||
| DEF(put, pixels8_xy2)(block , pixels , line_size, h); | DEF(put, pixels8_xy2)(block , pixels , line_size, h); | ||||
| DEF(put, pixels8_xy2)(block+8, pixels+8, line_size, h); | DEF(put, pixels8_xy2)(block+8, pixels+8, line_size, h); | ||||
| } | } | ||||
| static void DEF(avg, pixels16_y2)(uint8_t *block, const uint8_t *pixels, int line_size, int h){ | |||||
| static void DEF(avg, pixels16_y2)(uint8_t *block, const uint8_t *pixels, ptrdiff_t line_size, int h){ | |||||
| DEF(avg, pixels8_y2)(block , pixels , line_size, h); | DEF(avg, pixels8_y2)(block , pixels , line_size, h); | ||||
| DEF(avg, pixels8_y2)(block+8, pixels+8, line_size, h); | DEF(avg, pixels8_y2)(block+8, pixels+8, line_size, h); | ||||
| } | } | ||||
| static void DEF(avg, pixels16_xy2)(uint8_t *block, const uint8_t *pixels, int line_size, int h){ | |||||
| static void DEF(avg, pixels16_xy2)(uint8_t *block, const uint8_t *pixels, ptrdiff_t line_size, int h){ | |||||
| DEF(avg, pixels8_xy2)(block , pixels , line_size, h); | DEF(avg, pixels8_xy2)(block , pixels , line_size, h); | ||||
| DEF(avg, pixels8_xy2)(block+8, pixels+8, line_size, h); | DEF(avg, pixels8_xy2)(block+8, pixels+8, line_size, h); | ||||
| } | } | ||||
| @@ -25,10 +25,9 @@ cextern pb_1 | |||||
| SECTION_TEXT | SECTION_TEXT | ||||
| ; put_pixels8_x2(uint8_t *block, const uint8_t *pixels, int line_size, int h) | |||||
| ; put_pixels8_x2(uint8_t *block, const uint8_t *pixels, ptrdiff_t line_size, int h) | |||||
| %macro PUT_PIXELS8_X2 0 | %macro PUT_PIXELS8_X2 0 | ||||
| cglobal put_pixels8_x2, 4,5 | cglobal put_pixels8_x2, 4,5 | ||||
| movsxdifnidn r2, r2d | |||||
| lea r4, [r2*2] | lea r4, [r2*2] | ||||
| .loop: | .loop: | ||||
| mova m0, [r1] | mova m0, [r1] | ||||
| @@ -58,10 +57,9 @@ INIT_MMX 3dnow | |||||
| PUT_PIXELS8_X2 | PUT_PIXELS8_X2 | ||||
| ; put_pixels16_x2(uint8_t *block, const uint8_t *pixels, int line_size, int h) | |||||
| ; put_pixels16_x2(uint8_t *block, const uint8_t *pixels, ptrdiff_t line_size, int h) | |||||
| %macro PUT_PIXELS_16 0 | %macro PUT_PIXELS_16 0 | ||||
| cglobal put_pixels16_x2, 4,5 | cglobal put_pixels16_x2, 4,5 | ||||
| movsxdifnidn r2, r2d | |||||
| lea r4, [r2*2] | lea r4, [r2*2] | ||||
| .loop: | .loop: | ||||
| mova m0, [r1] | mova m0, [r1] | ||||
| @@ -103,11 +101,10 @@ INIT_MMX 3dnow | |||||
| PUT_PIXELS_16 | PUT_PIXELS_16 | ||||
| ; put_no_rnd_pixels8_x2(uint8_t *block, const uint8_t *pixels, int line_size, int h) | |||||
| ; put_no_rnd_pixels8_x2(uint8_t *block, const uint8_t *pixels, ptrdiff_t line_size, int h) | |||||
| %macro PUT_NO_RND_PIXELS8_X2 0 | %macro PUT_NO_RND_PIXELS8_X2 0 | ||||
| cglobal put_no_rnd_pixels8_x2, 4,5 | cglobal put_no_rnd_pixels8_x2, 4,5 | ||||
| mova m6, [pb_1] | mova m6, [pb_1] | ||||
| movsxdifnidn r2, r2d | |||||
| lea r4, [r2*2] | lea r4, [r2*2] | ||||
| .loop: | .loop: | ||||
| mova m0, [r1] | mova m0, [r1] | ||||
| @@ -145,10 +142,9 @@ INIT_MMX 3dnow | |||||
| PUT_NO_RND_PIXELS8_X2 | PUT_NO_RND_PIXELS8_X2 | ||||
| ; put_no_rnd_pixels8_x2_exact(uint8_t *block, const uint8_t *pixels, int line_size, int h) | |||||
| ; put_no_rnd_pixels8_x2_exact(uint8_t *block, const uint8_t *pixels, ptrdiff_t line_size, int h) | |||||
| %macro PUT_NO_RND_PIXELS8_X2_EXACT 0 | %macro PUT_NO_RND_PIXELS8_X2_EXACT 0 | ||||
| cglobal put_no_rnd_pixels8_x2_exact, 4,5 | cglobal put_no_rnd_pixels8_x2_exact, 4,5 | ||||
| movsxdifnidn r2, r2d | |||||
| lea r4, [r2*3] | lea r4, [r2*3] | ||||
| pcmpeqb m6, m6 | pcmpeqb m6, m6 | ||||
| .loop: | .loop: | ||||
| @@ -193,10 +189,9 @@ INIT_MMX 3dnow | |||||
| PUT_NO_RND_PIXELS8_X2_EXACT | PUT_NO_RND_PIXELS8_X2_EXACT | ||||
| ; put_pixels8_y2(uint8_t *block, const uint8_t *pixels, int line_size, int h) | |||||
| ; put_pixels8_y2(uint8_t *block, const uint8_t *pixels, ptrdiff_t line_size, int h) | |||||
| %macro PUT_PIXELS8_Y2 0 | %macro PUT_PIXELS8_Y2 0 | ||||
| cglobal put_pixels8_y2, 4,5 | cglobal put_pixels8_y2, 4,5 | ||||
| movsxdifnidn r2, r2d | |||||
| lea r4, [r2*2] | lea r4, [r2*2] | ||||
| mova m0, [r1] | mova m0, [r1] | ||||
| sub r0, r2 | sub r0, r2 | ||||
| @@ -228,11 +223,10 @@ INIT_MMX 3dnow | |||||
| PUT_PIXELS8_Y2 | PUT_PIXELS8_Y2 | ||||
| ; put_no_rnd_pixels8_y2(uint8_t *block, const uint8_t *pixels, int line_size, int h) | |||||
| ; put_no_rnd_pixels8_y2(uint8_t *block, const uint8_t *pixels, ptrdiff_t line_size, int h) | |||||
| %macro PUT_NO_RND_PIXELS8_Y2 0 | %macro PUT_NO_RND_PIXELS8_Y2 0 | ||||
| cglobal put_no_rnd_pixels8_y2, 4,5 | cglobal put_no_rnd_pixels8_y2, 4,5 | ||||
| mova m6, [pb_1] | mova m6, [pb_1] | ||||
| movsxdifnidn r2, r2d | |||||
| lea r4, [r2+r2] | lea r4, [r2+r2] | ||||
| mova m0, [r1] | mova m0, [r1] | ||||
| sub r0, r2 | sub r0, r2 | ||||
| @@ -266,10 +260,9 @@ INIT_MMX 3dnow | |||||
| PUT_NO_RND_PIXELS8_Y2 | PUT_NO_RND_PIXELS8_Y2 | ||||
| ; put_no_rnd_pixels8_y2_exact(uint8_t *block, const uint8_t *pixels, int line_size, int h) | |||||
| ; put_no_rnd_pixels8_y2_exact(uint8_t *block, const uint8_t *pixels, ptrdiff_t line_size, int h) | |||||
| %macro PUT_NO_RND_PIXELS8_Y2_EXACT 0 | %macro PUT_NO_RND_PIXELS8_Y2_EXACT 0 | ||||
| cglobal put_no_rnd_pixels8_y2_exact, 4,5 | cglobal put_no_rnd_pixels8_y2_exact, 4,5 | ||||
| movsxdifnidn r2, r2d | |||||
| lea r4, [r2*3] | lea r4, [r2*3] | ||||
| mova m0, [r1] | mova m0, [r1] | ||||
| pcmpeqb m6, m6 | pcmpeqb m6, m6 | ||||
| @@ -309,10 +302,9 @@ INIT_MMX 3dnow | |||||
| PUT_NO_RND_PIXELS8_Y2_EXACT | PUT_NO_RND_PIXELS8_Y2_EXACT | ||||
| ; avg_pixels8(uint8_t *block, const uint8_t *pixels, int line_size, int h) | |||||
| ; avg_pixels8(uint8_t *block, const uint8_t *pixels, ptrdiff_t line_size, int h) | |||||
| %macro AVG_PIXELS8 0 | %macro AVG_PIXELS8 0 | ||||
| cglobal avg_pixels8, 4,5 | cglobal avg_pixels8, 4,5 | ||||
| movsxdifnidn r2, r2d | |||||
| lea r4, [r2*2] | lea r4, [r2*2] | ||||
| .loop: | .loop: | ||||
| mova m0, [r0] | mova m0, [r0] | ||||
| @@ -340,10 +332,9 @@ INIT_MMX 3dnow | |||||
| AVG_PIXELS8 | AVG_PIXELS8 | ||||
| ; avg_pixels8_x2(uint8_t *block, const uint8_t *pixels, int line_size, int h) | |||||
| ; avg_pixels8_x2(uint8_t *block, const uint8_t *pixels, ptrdiff_t line_size, int h) | |||||
| %macro AVG_PIXELS8_X2 0 | %macro AVG_PIXELS8_X2 0 | ||||
| cglobal avg_pixels8_x2, 4,5 | cglobal avg_pixels8_x2, 4,5 | ||||
| movsxdifnidn r2, r2d | |||||
| lea r4, [r2*2] | lea r4, [r2*2] | ||||
| .loop: | .loop: | ||||
| mova m0, [r1] | mova m0, [r1] | ||||
| @@ -377,10 +368,9 @@ INIT_MMX 3dnow | |||||
| AVG_PIXELS8_X2 | AVG_PIXELS8_X2 | ||||
| ; avg_pixels8_y2(uint8_t *block, const uint8_t *pixels, int line_size, int h) | |||||
| ; avg_pixels8_y2(uint8_t *block, const uint8_t *pixels, ptrdiff_t line_size, int h) | |||||
| %macro AVG_PIXELS8_Y2 0 | %macro AVG_PIXELS8_Y2 0 | ||||
| cglobal avg_pixels8_y2, 4,5 | cglobal avg_pixels8_y2, 4,5 | ||||
| movsxdifnidn r2, r2d | |||||
| lea r4, [r2*2] | lea r4, [r2*2] | ||||
| mova m0, [r1] | mova m0, [r1] | ||||
| sub r0, r2 | sub r0, r2 | ||||
| @@ -420,11 +410,10 @@ INIT_MMX 3dnow | |||||
| AVG_PIXELS8_Y2 | AVG_PIXELS8_Y2 | ||||
| ; avg_pixels8_xy2(uint8_t *block, const uint8_t *pixels, int line_size, int h) | |||||
| ; avg_pixels8_xy2(uint8_t *block, const uint8_t *pixels, ptrdiff_t line_size, int h) | |||||
| %macro AVG_PIXELS8_XY2 0 | %macro AVG_PIXELS8_XY2 0 | ||||
| cglobal avg_pixels8_xy2, 4,5 | cglobal avg_pixels8_xy2, 4,5 | ||||
| mova m6, [pb_1] | mova m6, [pb_1] | ||||
| movsxdifnidn r2, r2d | |||||
| lea r4, [r2*2] | lea r4, [r2*2] | ||||
| mova m0, [r1] | mova m0, [r1] | ||||
| pavgb m0, [r1+1] | pavgb m0, [r1+1] | ||||
| @@ -465,9 +454,8 @@ INIT_MMX 3dnow | |||||
| AVG_PIXELS8_XY2 | AVG_PIXELS8_XY2 | ||||
| INIT_XMM sse2 | INIT_XMM sse2 | ||||
| ; void put_pixels16_sse2(uint8_t *block, const uint8_t *pixels, int line_size, int h) | |||||
| ; void put_pixels16_sse2(uint8_t *block, const uint8_t *pixels, ptrdiff_t line_size, int h) | |||||
| cglobal put_pixels16, 4,5,4 | cglobal put_pixels16, 4,5,4 | ||||
| movsxdifnidn r2, r2d | |||||
| lea r4, [r2*3] | lea r4, [r2*3] | ||||
| .loop: | .loop: | ||||
| movu m0, [r1] | movu m0, [r1] | ||||
| @@ -484,9 +472,8 @@ cglobal put_pixels16, 4,5,4 | |||||
| jnz .loop | jnz .loop | ||||
| REP_RET | REP_RET | ||||
| ; void avg_pixels16_sse2(uint8_t *block, const uint8_t *pixels, int line_size, int h) | |||||
| ; void avg_pixels16_sse2(uint8_t *block, const uint8_t *pixels, ptrdiff_t line_size, int h) | |||||
| cglobal avg_pixels16, 4,5,4 | cglobal avg_pixels16, 4,5,4 | ||||
| movsxdifnidn r2, r2d | |||||
| lea r4, [r2*3] | lea r4, [r2*3] | ||||
| .loop: | .loop: | ||||
| movu m0, [r1] | movu m0, [r1] | ||||