This avoids SIMD-optimized functions having to sign-extend their stride argument manually to be able to do pointer arithmetic. Also adjust parameter names to be "stride" everywhere.tags/n3.3
| @@ -23,9 +23,9 @@ | |||
| #include "libavutil/arm/cpu.h" | |||
| #include "libavcodec/vp3dsp.h" | |||
| void ff_vp3_idct_put_neon(uint8_t *dest, int line_size, int16_t *data); | |||
| void ff_vp3_idct_add_neon(uint8_t *dest, int line_size, int16_t *data); | |||
| void ff_vp3_idct_dc_add_neon(uint8_t *dest, int line_size, int16_t *data); | |||
| void ff_vp3_idct_put_neon(uint8_t *dest, ptrdiff_t stride, int16_t *data); | |||
| void ff_vp3_idct_add_neon(uint8_t *dest, ptrdiff_t stride, int16_t *data); | |||
| void ff_vp3_idct_dc_add_neon(uint8_t *dest, ptrdiff_t stride, int16_t *data); | |||
| void ff_vp3_v_loop_filter_neon(uint8_t *, int, int *); | |||
| void ff_vp3_h_loop_filter_neon(uint8_t *, int, int *); | |||
| @@ -114,7 +114,7 @@ static inline vec_s16 M16(vec_s16 a, vec_s16 C) | |||
| #define ADD8(a) vec_add(a, eight) | |||
| #define SHIFT4(a) vec_sra(a, four) | |||
| static void vp3_idct_put_altivec(uint8_t *dst, int stride, int16_t block[64]) | |||
| static void vp3_idct_put_altivec(uint8_t *dst, ptrdiff_t stride, int16_t block[64]) | |||
| { | |||
| vec_u8 t; | |||
| IDCT_START | |||
| @@ -143,7 +143,7 @@ static void vp3_idct_put_altivec(uint8_t *dst, int stride, int16_t block[64]) | |||
| memset(block, 0, sizeof(*block) * 64); | |||
| } | |||
| static void vp3_idct_add_altivec(uint8_t *dst, int stride, int16_t block[64]) | |||
| static void vp3_idct_add_altivec(uint8_t *dst, ptrdiff_t stride, int16_t block[64]) | |||
| { | |||
| LOAD_ZERO; | |||
| vec_u8 t, vdst; | |||
| @@ -44,7 +44,7 @@ | |||
| #define M(a, b) (((a) * (b)) >> 16) | |||
| static av_always_inline void idct(uint8_t *dst, int stride, | |||
| static av_always_inline void idct(uint8_t *dst, ptrdiff_t stride, | |||
| int16_t *input, int type) | |||
| { | |||
| int16_t *ip = input; | |||
| @@ -195,21 +195,21 @@ static av_always_inline void idct(uint8_t *dst, int stride, | |||
| } | |||
| } | |||
| static void vp3_idct_put_c(uint8_t *dest /* align 8 */, int line_size, | |||
| static void vp3_idct_put_c(uint8_t *dest /* align 8 */, ptrdiff_t stride, | |||
| int16_t *block /* align 16 */) | |||
| { | |||
| idct(dest, line_size, block, 1); | |||
| idct(dest, stride, block, 1); | |||
| memset(block, 0, sizeof(*block) * 64); | |||
| } | |||
| static void vp3_idct_add_c(uint8_t *dest /* align 8 */, int line_size, | |||
| static void vp3_idct_add_c(uint8_t *dest /* align 8 */, ptrdiff_t stride, | |||
| int16_t *block /* align 16 */) | |||
| { | |||
| idct(dest, line_size, block, 2); | |||
| idct(dest, stride, block, 2); | |||
| memset(block, 0, sizeof(*block) * 64); | |||
| } | |||
| static void vp3_idct_dc_add_c(uint8_t *dest /* align 8 */, int line_size, | |||
| static void vp3_idct_dc_add_c(uint8_t *dest /* align 8 */, ptrdiff_t stride, | |||
| int16_t *block /* align 16 */) | |||
| { | |||
| int i, dc = (block[0] + 15) >> 5; | |||
| @@ -223,17 +223,17 @@ static void vp3_idct_dc_add_c(uint8_t *dest /* align 8 */, int line_size, | |||
| dest[5] = av_clip_uint8(dest[5] + dc); | |||
| dest[6] = av_clip_uint8(dest[6] + dc); | |||
| dest[7] = av_clip_uint8(dest[7] + dc); | |||
| dest += line_size; | |||
| dest += stride; | |||
| } | |||
| block[0] = 0; | |||
| } | |||
| static void vp3_v_loop_filter_c(uint8_t *first_pixel, int stride, | |||
| static void vp3_v_loop_filter_c(uint8_t *first_pixel, ptrdiff_t stride, | |||
| int *bounding_values) | |||
| { | |||
| unsigned char *end; | |||
| int filter_value; | |||
| const int nstride = -stride; | |||
| const ptrdiff_t nstride = -stride; | |||
| for (end = first_pixel + 8; first_pixel < end; first_pixel++) { | |||
| filter_value = (first_pixel[2 * nstride] - first_pixel[stride]) + | |||
| @@ -245,7 +245,7 @@ static void vp3_v_loop_filter_c(uint8_t *first_pixel, int stride, | |||
| } | |||
| } | |||
| static void vp3_h_loop_filter_c(uint8_t *first_pixel, int stride, | |||
| static void vp3_h_loop_filter_c(uint8_t *first_pixel, ptrdiff_t stride, | |||
| int *bounding_values) | |||
| { | |||
| unsigned char *end; | |||
| @@ -38,11 +38,11 @@ typedef struct VP3DSPContext { | |||
| const uint8_t *b, | |||
| ptrdiff_t stride, int h); | |||
| void (*idct_put)(uint8_t *dest, int line_size, int16_t *block); | |||
| void (*idct_add)(uint8_t *dest, int line_size, int16_t *block); | |||
| void (*idct_dc_add)(uint8_t *dest, int line_size, int16_t *block); | |||
| void (*v_loop_filter)(uint8_t *src, int stride, int *bounding_values); | |||
| void (*h_loop_filter)(uint8_t *src, int stride, int *bounding_values); | |||
| void (*idct_put)(uint8_t *dest, ptrdiff_t stride, int16_t *block); | |||
| void (*idct_add)(uint8_t *dest, ptrdiff_t stride, int16_t *block); | |||
| void (*idct_dc_add)(uint8_t *dest, ptrdiff_t stride, int16_t *block); | |||
| void (*v_loop_filter)(uint8_t *src, ptrdiff_t stride, int *bounding_values); | |||
| void (*h_loop_filter)(uint8_t *src, ptrdiff_t stride, int *bounding_values); | |||
| } VP3DSPContext; | |||
| void ff_vp3dsp_init(VP3DSPContext *c, int flags); | |||
| @@ -104,9 +104,6 @@ SECTION .text | |||
| INIT_MMX mmxext | |||
| cglobal vp3_v_loop_filter, 3, 4 | |||
| %if ARCH_X86_64 | |||
| movsxd r1, r1d | |||
| %endif | |||
| mov r3, r1 | |||
| neg r1 | |||
| movq m6, [r0+r1*2] | |||
| @@ -121,9 +118,6 @@ cglobal vp3_v_loop_filter, 3, 4 | |||
| RET | |||
| cglobal vp3_h_loop_filter, 3, 4 | |||
| %if ARCH_X86_64 | |||
| movsxd r1, r1d | |||
| %endif | |||
| lea r3, [r1*3] | |||
| movd m6, [r0 -2] | |||
| @@ -525,7 +519,6 @@ cglobal vp3_h_loop_filter, 3, 4 | |||
| cglobal vp3_idct_put, 3, 4, 9 | |||
| VP3_IDCT r2 | |||
| movsxdifnidn r1, r1d | |||
| mova m4, [pb_80] | |||
| lea r3, [r1*3] | |||
| %assign %%i 0 | |||
| @@ -582,7 +575,6 @@ cglobal vp3_idct_put, 3, 4, 9 | |||
| cglobal vp3_idct_add, 3, 4, 9 | |||
| VP3_IDCT r2 | |||
| movsxdifnidn r1, r1d | |||
| lea r3, [r1*3] | |||
| pxor m4, m4 | |||
| %if mmsize == 16 | |||
| @@ -689,9 +681,6 @@ vp3_idct_funcs | |||
| INIT_MMX mmxext | |||
| cglobal vp3_idct_dc_add, 3, 4 | |||
| %if ARCH_X86_64 | |||
| movsxd r1, r1d | |||
| %endif | |||
| movsx r3, word [r2] | |||
| mov word [r2], 0 | |||
| lea r2, [r1*3] | |||
| @@ -25,18 +25,17 @@ | |||
| #include "libavcodec/vp3dsp.h" | |||
| #include "config.h" | |||
| void ff_vp3_idct_put_mmx(uint8_t *dest, int line_size, int16_t *block); | |||
| void ff_vp3_idct_add_mmx(uint8_t *dest, int line_size, int16_t *block); | |||
| void ff_vp3_idct_put_mmx(uint8_t *dest, ptrdiff_t stride, int16_t *block); | |||
| void ff_vp3_idct_add_mmx(uint8_t *dest, ptrdiff_t stride, int16_t *block); | |||
| void ff_vp3_idct_put_sse2(uint8_t *dest, int line_size, int16_t *block); | |||
| void ff_vp3_idct_add_sse2(uint8_t *dest, int line_size, int16_t *block); | |||
| void ff_vp3_idct_put_sse2(uint8_t *dest, ptrdiff_t stride, int16_t *block); | |||
| void ff_vp3_idct_add_sse2(uint8_t *dest, ptrdiff_t stride, int16_t *block); | |||
| void ff_vp3_idct_dc_add_mmxext(uint8_t *dest, int line_size, | |||
| int16_t *block); | |||
| void ff_vp3_idct_dc_add_mmxext(uint8_t *dest, ptrdiff_t stride, int16_t *block); | |||
| void ff_vp3_v_loop_filter_mmxext(uint8_t *src, int stride, | |||
| void ff_vp3_v_loop_filter_mmxext(uint8_t *src, ptrdiff_t stride, | |||
| int *bounding_values); | |||
| void ff_vp3_h_loop_filter_mmxext(uint8_t *src, int stride, | |||
| void ff_vp3_h_loop_filter_mmxext(uint8_t *src, ptrdiff_t stride, | |||
| int *bounding_values); | |||
| av_cold void ff_vp3dsp_init_x86(VP3DSPContext *c, int flags) | |||