This avoids SIMD-optimized functions having to sign-extend their stride argument manually to be able to do pointer arithmetic.tags/n3.3
| @@ -28,18 +28,18 @@ | |||||
| #include "config.h" | #include "config.h" | ||||
| void ff_put_h264_chroma_mc8_neon(uint8_t *dst, uint8_t *src, int stride, | |||||
| void ff_put_h264_chroma_mc8_neon(uint8_t *dst, uint8_t *src, ptrdiff_t stride, | |||||
| int h, int x, int y); | int h, int x, int y); | ||||
| void ff_put_h264_chroma_mc4_neon(uint8_t *dst, uint8_t *src, int stride, | |||||
| void ff_put_h264_chroma_mc4_neon(uint8_t *dst, uint8_t *src, ptrdiff_t stride, | |||||
| int h, int x, int y); | int h, int x, int y); | ||||
| void ff_put_h264_chroma_mc2_neon(uint8_t *dst, uint8_t *src, int stride, | |||||
| void ff_put_h264_chroma_mc2_neon(uint8_t *dst, uint8_t *src, ptrdiff_t stride, | |||||
| int h, int x, int y); | int h, int x, int y); | ||||
| void ff_avg_h264_chroma_mc8_neon(uint8_t *dst, uint8_t *src, int stride, | |||||
| void ff_avg_h264_chroma_mc8_neon(uint8_t *dst, uint8_t *src, ptrdiff_t stride, | |||||
| int h, int x, int y); | int h, int x, int y); | ||||
| void ff_avg_h264_chroma_mc4_neon(uint8_t *dst, uint8_t *src, int stride, | |||||
| void ff_avg_h264_chroma_mc4_neon(uint8_t *dst, uint8_t *src, ptrdiff_t stride, | |||||
| int h, int x, int y); | int h, int x, int y); | ||||
| void ff_avg_h264_chroma_mc2_neon(uint8_t *dst, uint8_t *src, int stride, | |||||
| void ff_avg_h264_chroma_mc2_neon(uint8_t *dst, uint8_t *src, ptrdiff_t stride, | |||||
| int h, int x, int y); | int h, int x, int y); | ||||
| av_cold void ff_h264chroma_init_aarch64(H264ChromaContext *c, int bit_depth) | av_cold void ff_h264chroma_init_aarch64(H264ChromaContext *c, int bit_depth) | ||||
| @@ -21,10 +21,9 @@ | |||||
| #include "libavutil/aarch64/asm.S" | #include "libavutil/aarch64/asm.S" | ||||
| /* chroma_mc8(uint8_t *dst, uint8_t *src, int stride, int h, int x, int y) */ | |||||
| /* chroma_mc8(uint8_t *dst, uint8_t *src, ptrdiff_t stride, int h, int x, int y) */ | |||||
| .macro h264_chroma_mc8 type, codec=h264 | .macro h264_chroma_mc8 type, codec=h264 | ||||
| function ff_\type\()_\codec\()_chroma_mc8_neon, export=1 | function ff_\type\()_\codec\()_chroma_mc8_neon, export=1 | ||||
| sxtw x2, w2 | |||||
| .ifc \type,avg | .ifc \type,avg | ||||
| mov x8, x0 | mov x8, x0 | ||||
| .endif | .endif | ||||
| @@ -192,10 +191,9 @@ function ff_\type\()_\codec\()_chroma_mc8_neon, export=1 | |||||
| endfunc | endfunc | ||||
| .endm | .endm | ||||
| /* chroma_mc4(uint8_t *dst, uint8_t *src, int stride, int h, int x, int y) */ | |||||
| /* chroma_mc4(uint8_t *dst, uint8_t *src, ptrdiff_t stride, int h, int x, int y) */ | |||||
| .macro h264_chroma_mc4 type, codec=h264 | .macro h264_chroma_mc4 type, codec=h264 | ||||
| function ff_\type\()_\codec\()_chroma_mc4_neon, export=1 | function ff_\type\()_\codec\()_chroma_mc4_neon, export=1 | ||||
| sxtw x2, w2 | |||||
| .ifc \type,avg | .ifc \type,avg | ||||
| mov x8, x0 | mov x8, x0 | ||||
| .endif | .endif | ||||
| @@ -359,7 +357,6 @@ endfunc | |||||
| .macro h264_chroma_mc2 type | .macro h264_chroma_mc2 type | ||||
| function ff_\type\()_h264_chroma_mc2_neon, export=1 | function ff_\type\()_h264_chroma_mc2_neon, export=1 | ||||
| sxtw x2, w2 | |||||
| prfm pldl1strm, [x1] | prfm pldl1strm, [x1] | ||||
| prfm pldl1strm, [x1, x2] | prfm pldl1strm, [x1, x2] | ||||
| orr w7, w4, w5 | orr w7, w4, w5 | ||||
| @@ -25,15 +25,15 @@ | |||||
| #include "config.h" | #include "config.h" | ||||
| void ff_put_rv40_chroma_mc8_neon(uint8_t *dst, uint8_t *src, int stride, int h, | |||||
| int x, int y); | |||||
| void ff_put_rv40_chroma_mc4_neon(uint8_t *dst, uint8_t *src, int stride, int h, | |||||
| int x, int y); | |||||
| void ff_put_rv40_chroma_mc8_neon(uint8_t *dst, uint8_t *src, ptrdiff_t stride, | |||||
| int h, int x, int y); | |||||
| void ff_put_rv40_chroma_mc4_neon(uint8_t *dst, uint8_t *src, ptrdiff_t stride, | |||||
| int h, int x, int y); | |||||
| void ff_avg_rv40_chroma_mc8_neon(uint8_t *dst, uint8_t *src, int stride, int h, | |||||
| int x, int y); | |||||
| void ff_avg_rv40_chroma_mc4_neon(uint8_t *dst, uint8_t *src, int stride, int h, | |||||
| int x, int y); | |||||
| void ff_avg_rv40_chroma_mc8_neon(uint8_t *dst, uint8_t *src, ptrdiff_t stride, | |||||
| int h, int x, int y); | |||||
| void ff_avg_rv40_chroma_mc4_neon(uint8_t *dst, uint8_t *src, ptrdiff_t stride, | |||||
| int h, int x, int y); | |||||
| av_cold void ff_rv40dsp_init_aarch64(RV34DSPContext *c) | av_cold void ff_rv40dsp_init_aarch64(RV34DSPContext *c) | ||||
| { | { | ||||
| @@ -25,14 +25,14 @@ | |||||
| #include "config.h" | #include "config.h" | ||||
| void ff_put_vc1_chroma_mc8_neon(uint8_t *dst, uint8_t *src, int stride, int h, | |||||
| int x, int y); | |||||
| void ff_avg_vc1_chroma_mc8_neon(uint8_t *dst, uint8_t *src, int stride, int h, | |||||
| int x, int y); | |||||
| void ff_put_vc1_chroma_mc4_neon(uint8_t *dst, uint8_t *src, int stride, int h, | |||||
| int x, int y); | |||||
| void ff_avg_vc1_chroma_mc4_neon(uint8_t *dst, uint8_t *src, int stride, int h, | |||||
| int x, int y); | |||||
| void ff_put_vc1_chroma_mc8_neon(uint8_t *dst, uint8_t *src, ptrdiff_t stride, | |||||
| int h, int x, int y); | |||||
| void ff_avg_vc1_chroma_mc8_neon(uint8_t *dst, uint8_t *src, ptrdiff_t stride, | |||||
| int h, int x, int y); | |||||
| void ff_put_vc1_chroma_mc4_neon(uint8_t *dst, uint8_t *src, ptrdiff_t stride, | |||||
| int h, int x, int y); | |||||
| void ff_avg_vc1_chroma_mc4_neon(uint8_t *dst, uint8_t *src, ptrdiff_t stride, | |||||
| int h, int x, int y); | |||||
| av_cold void ff_vc1dsp_init_aarch64(VC1DSPContext *dsp) | av_cold void ff_vc1dsp_init_aarch64(VC1DSPContext *dsp) | ||||
| { | { | ||||
| @@ -26,13 +26,19 @@ | |||||
| #include "libavutil/arm/cpu.h" | #include "libavutil/arm/cpu.h" | ||||
| #include "libavcodec/h264chroma.h" | #include "libavcodec/h264chroma.h" | ||||
| void ff_put_h264_chroma_mc8_neon(uint8_t *, uint8_t *, int, int, int, int); | |||||
| void ff_put_h264_chroma_mc4_neon(uint8_t *, uint8_t *, int, int, int, int); | |||||
| void ff_put_h264_chroma_mc2_neon(uint8_t *, uint8_t *, int, int, int, int); | |||||
| void ff_put_h264_chroma_mc8_neon(uint8_t *dst, uint8_t *src, ptrdiff_t stride, | |||||
| int h, int x, int y); | |||||
| void ff_put_h264_chroma_mc4_neon(uint8_t *dst, uint8_t *src, ptrdiff_t stride, | |||||
| int h, int x, int y); | |||||
| void ff_put_h264_chroma_mc2_neon(uint8_t *dst, uint8_t *src, ptrdiff_t stride, | |||||
| int h, int x, int y); | |||||
| void ff_avg_h264_chroma_mc8_neon(uint8_t *, uint8_t *, int, int, int, int); | |||||
| void ff_avg_h264_chroma_mc4_neon(uint8_t *, uint8_t *, int, int, int, int); | |||||
| void ff_avg_h264_chroma_mc2_neon(uint8_t *, uint8_t *, int, int, int, int); | |||||
| void ff_avg_h264_chroma_mc8_neon(uint8_t *dst, uint8_t *src, ptrdiff_t stride, | |||||
| int h, int x, int y); | |||||
| void ff_avg_h264_chroma_mc4_neon(uint8_t *dst, uint8_t *src, ptrdiff_t stride, | |||||
| int h, int x, int y); | |||||
| void ff_avg_h264_chroma_mc2_neon(uint8_t *dst, uint8_t *src, ptrdiff_t stride, | |||||
| int h, int x, int y); | |||||
| av_cold void ff_h264chroma_init_arm(H264ChromaContext *c, int bit_depth) | av_cold void ff_h264chroma_init_arm(H264ChromaContext *c, int bit_depth) | ||||
| { | { | ||||
| @@ -20,7 +20,7 @@ | |||||
| #include "libavutil/arm/asm.S" | #include "libavutil/arm/asm.S" | ||||
| /* chroma_mc8(uint8_t *dst, uint8_t *src, int stride, int h, int x, int y) */ | |||||
| /* chroma_mc8(uint8_t *dst, uint8_t *src, ptrdiff_t stride, int h, int x, int y) */ | |||||
| .macro h264_chroma_mc8 type, codec=h264 | .macro h264_chroma_mc8 type, codec=h264 | ||||
| function ff_\type\()_\codec\()_chroma_mc8_neon, export=1 | function ff_\type\()_\codec\()_chroma_mc8_neon, export=1 | ||||
| push {r4-r7, lr} | push {r4-r7, lr} | ||||
| @@ -195,7 +195,7 @@ T cmp r7, #0 | |||||
| endfunc | endfunc | ||||
| .endm | .endm | ||||
| /* chroma_mc4(uint8_t *dst, uint8_t *src, int stride, int h, int x, int y) */ | |||||
| /* chroma_mc4(uint8_t *dst, uint8_t *src, ptrdiff_t stride, int h, int x, int y) */ | |||||
| .macro h264_chroma_mc4 type, codec=h264 | .macro h264_chroma_mc4 type, codec=h264 | ||||
| function ff_\type\()_\codec\()_chroma_mc4_neon, export=1 | function ff_\type\()_\codec\()_chroma_mc4_neon, export=1 | ||||
| push {r4-r7, lr} | push {r4-r7, lr} | ||||
| @@ -72,14 +72,14 @@ void ff_put_vc1_mspel_mc32_neon(uint8_t *dst, const uint8_t *src, | |||||
| void ff_put_vc1_mspel_mc33_neon(uint8_t *dst, const uint8_t *src, | void ff_put_vc1_mspel_mc33_neon(uint8_t *dst, const uint8_t *src, | ||||
| ptrdiff_t stride, int rnd); | ptrdiff_t stride, int rnd); | ||||
| void ff_put_vc1_chroma_mc8_neon(uint8_t *dst, uint8_t *src, int stride, int h, | |||||
| int x, int y); | |||||
| void ff_avg_vc1_chroma_mc8_neon(uint8_t *dst, uint8_t *src, int stride, int h, | |||||
| int x, int y); | |||||
| void ff_put_vc1_chroma_mc4_neon(uint8_t *dst, uint8_t *src, int stride, int h, | |||||
| int x, int y); | |||||
| void ff_avg_vc1_chroma_mc4_neon(uint8_t *dst, uint8_t *src, int stride, int h, | |||||
| int x, int y); | |||||
| void ff_put_vc1_chroma_mc8_neon(uint8_t *dst, uint8_t *src, ptrdiff_t stride, | |||||
| int h, int x, int y); | |||||
| void ff_avg_vc1_chroma_mc8_neon(uint8_t *dst, uint8_t *src, ptrdiff_t stride, | |||||
| int h, int x, int y); | |||||
| void ff_put_vc1_chroma_mc4_neon(uint8_t *dst, uint8_t *src, ptrdiff_t stride, | |||||
| int h, int x, int y); | |||||
| void ff_avg_vc1_chroma_mc4_neon(uint8_t *dst, uint8_t *src, ptrdiff_t stride, | |||||
| int h, int x, int y); | |||||
| av_cold void ff_vc1dsp_init_neon(VC1DSPContext *dsp) | av_cold void ff_vc1dsp_init_neon(VC1DSPContext *dsp) | ||||
| { | { | ||||
| @@ -19,9 +19,10 @@ | |||||
| #ifndef AVCODEC_H264CHROMA_H | #ifndef AVCODEC_H264CHROMA_H | ||||
| #define AVCODEC_H264CHROMA_H | #define AVCODEC_H264CHROMA_H | ||||
| #include <stddef.h> | |||||
| #include <stdint.h> | #include <stdint.h> | ||||
| typedef void (*h264_chroma_mc_func)(uint8_t *dst/*align 8*/, uint8_t *src/*align 1*/, int srcStride, int h, int x, int y); | |||||
| typedef void (*h264_chroma_mc_func)(uint8_t *dst /*align 8*/, uint8_t *src /*align 1*/, ptrdiff_t srcStride, int h, int x, int y); | |||||
| typedef struct H264ChromaContext { | typedef struct H264ChromaContext { | ||||
| h264_chroma_mc_func put_h264_chroma_pixels_tab[3]; | h264_chroma_mc_func put_h264_chroma_pixels_tab[3]; | ||||
| @@ -20,11 +20,13 @@ | |||||
| */ | */ | ||||
| #include <assert.h> | #include <assert.h> | ||||
| #include <stddef.h> | |||||
| #include "bit_depth_template.c" | #include "bit_depth_template.c" | ||||
| #define H264_CHROMA_MC(OPNAME, OP)\ | #define H264_CHROMA_MC(OPNAME, OP)\ | ||||
| static void FUNCC(OPNAME ## h264_chroma_mc2)(uint8_t *_dst/*align 8*/, uint8_t *_src/*align 1*/, int stride, int h, int x, int y){\ | |||||
| static void FUNCC(OPNAME ## h264_chroma_mc2)(uint8_t *_dst /*align 8*/, uint8_t *_src /*align 1*/, ptrdiff_t stride, int h, int x, int y)\ | |||||
| {\ | |||||
| pixel *dst = (pixel*)_dst;\ | pixel *dst = (pixel*)_dst;\ | ||||
| pixel *src = (pixel*)_src;\ | pixel *src = (pixel*)_src;\ | ||||
| const int A=(8-x)*(8-y);\ | const int A=(8-x)*(8-y);\ | ||||
| @@ -45,7 +47,7 @@ static void FUNCC(OPNAME ## h264_chroma_mc2)(uint8_t *_dst/*align 8*/, uint8_t * | |||||
| }\ | }\ | ||||
| } else if (B + C) {\ | } else if (B + C) {\ | ||||
| const int E= B+C;\ | const int E= B+C;\ | ||||
| const int step= C ? stride : 1;\ | |||||
| const ptrdiff_t step = C ? stride : 1;\ | |||||
| for(i=0; i<h; i++){\ | for(i=0; i<h; i++){\ | ||||
| OP(dst[0], (A*src[0] + E*src[step+0]));\ | OP(dst[0], (A*src[0] + E*src[step+0]));\ | ||||
| OP(dst[1], (A*src[1] + E*src[step+1]));\ | OP(dst[1], (A*src[1] + E*src[step+1]));\ | ||||
| @@ -62,7 +64,8 @@ static void FUNCC(OPNAME ## h264_chroma_mc2)(uint8_t *_dst/*align 8*/, uint8_t * | |||||
| }\ | }\ | ||||
| }\ | }\ | ||||
| \ | \ | ||||
| static void FUNCC(OPNAME ## h264_chroma_mc4)(uint8_t *_dst/*align 8*/, uint8_t *_src/*align 1*/, int stride, int h, int x, int y){\ | |||||
| static void FUNCC(OPNAME ## h264_chroma_mc4)(uint8_t *_dst /*align 8*/, uint8_t *_src /*align 1*/, ptrdiff_t stride, int h, int x, int y)\ | |||||
| {\ | |||||
| pixel *dst = (pixel*)_dst;\ | pixel *dst = (pixel*)_dst;\ | ||||
| pixel *src = (pixel*)_src;\ | pixel *src = (pixel*)_src;\ | ||||
| const int A=(8-x)*(8-y);\ | const int A=(8-x)*(8-y);\ | ||||
| @@ -85,7 +88,7 @@ static void FUNCC(OPNAME ## h264_chroma_mc4)(uint8_t *_dst/*align 8*/, uint8_t * | |||||
| }\ | }\ | ||||
| } else if (B + C) {\ | } else if (B + C) {\ | ||||
| const int E= B+C;\ | const int E= B+C;\ | ||||
| const int step= C ? stride : 1;\ | |||||
| const ptrdiff_t step = C ? stride : 1;\ | |||||
| for(i=0; i<h; i++){\ | for(i=0; i<h; i++){\ | ||||
| OP(dst[0], (A*src[0] + E*src[step+0]));\ | OP(dst[0], (A*src[0] + E*src[step+0]));\ | ||||
| OP(dst[1], (A*src[1] + E*src[step+1]));\ | OP(dst[1], (A*src[1] + E*src[step+1]));\ | ||||
| @@ -106,7 +109,8 @@ static void FUNCC(OPNAME ## h264_chroma_mc4)(uint8_t *_dst/*align 8*/, uint8_t * | |||||
| }\ | }\ | ||||
| }\ | }\ | ||||
| \ | \ | ||||
| static void FUNCC(OPNAME ## h264_chroma_mc8)(uint8_t *_dst/*align 8*/, uint8_t *_src/*align 1*/, int stride, int h, int x, int y){\ | |||||
| static void FUNCC(OPNAME ## h264_chroma_mc8)(uint8_t *_dst /*align 8*/, uint8_t *_src /*align 1*/, ptrdiff_t stride, int h, int x, int y)\ | |||||
| {\ | |||||
| pixel *dst = (pixel*)_dst;\ | pixel *dst = (pixel*)_dst;\ | ||||
| pixel *src = (pixel*)_src;\ | pixel *src = (pixel*)_src;\ | ||||
| const int A=(8-x)*(8-y);\ | const int A=(8-x)*(8-y);\ | ||||
| @@ -133,7 +137,7 @@ static void FUNCC(OPNAME ## h264_chroma_mc8)(uint8_t *_dst/*align 8*/, uint8_t * | |||||
| }\ | }\ | ||||
| } else if (B + C) {\ | } else if (B + C) {\ | ||||
| const int E= B+C;\ | const int E= B+C;\ | ||||
| const int step= C ? stride : 1;\ | |||||
| const ptrdiff_t step = C ? stride : 1;\ | |||||
| for(i=0; i<h; i++){\ | for(i=0; i<h; i++){\ | ||||
| OP(dst[0], (A*src[0] + E*src[step+0]));\ | OP(dst[0], (A*src[0] + E*src[step+0]));\ | ||||
| OP(dst[1], (A*src[1] + E*src[step+1]));\ | OP(dst[1], (A*src[1] + E*src[step+1]));\ | ||||
| @@ -72,7 +72,9 @@ | |||||
| #ifdef PREFIX_h264_chroma_mc8_altivec | #ifdef PREFIX_h264_chroma_mc8_altivec | ||||
| static void PREFIX_h264_chroma_mc8_altivec(uint8_t * dst, uint8_t * src, | static void PREFIX_h264_chroma_mc8_altivec(uint8_t * dst, uint8_t * src, | ||||
| int stride, int h, int x, int y) { | |||||
| ptrdiff_t stride, int h, | |||||
| int x, int y) | |||||
| { | |||||
| DECLARE_ALIGNED(16, signed int, ABCD)[4] = | DECLARE_ALIGNED(16, signed int, ABCD)[4] = | ||||
| {((8 - x) * (8 - y)), | {((8 - x) * (8 - y)), | ||||
| (( x) * (8 - y)), | (( x) * (8 - y)), | ||||
| @@ -201,7 +203,10 @@ static void PREFIX_h264_chroma_mc8_altivec(uint8_t * dst, uint8_t * src, | |||||
| /* this code assume that stride % 16 == 0 */ | /* this code assume that stride % 16 == 0 */ | ||||
| #ifdef PREFIX_no_rnd_vc1_chroma_mc8_altivec | #ifdef PREFIX_no_rnd_vc1_chroma_mc8_altivec | ||||
| static void PREFIX_no_rnd_vc1_chroma_mc8_altivec(uint8_t * dst, uint8_t * src, int stride, int h, int x, int y) { | |||||
| static void PREFIX_no_rnd_vc1_chroma_mc8_altivec(uint8_t *dst, uint8_t *src, | |||||
| ptrdiff_t stride, int h, | |||||
| int x, int y) | |||||
| { | |||||
| DECLARE_ALIGNED(16, signed int, ABCD)[4] = | DECLARE_ALIGNED(16, signed int, ABCD)[4] = | ||||
| {((8 - x) * (8 - y)), | {((8 - x) * (8 - y)), | ||||
| (( x) * (8 - y)), | (( x) * (8 - y)), | ||||
| @@ -291,7 +291,10 @@ static const int rv40_bias[4][4] = { | |||||
| }; | }; | ||||
| #define RV40_CHROMA_MC(OPNAME, OP)\ | #define RV40_CHROMA_MC(OPNAME, OP)\ | ||||
| static void OPNAME ## rv40_chroma_mc4_c(uint8_t *dst/*align 8*/, uint8_t *src/*align 1*/, int stride, int h, int x, int y){\ | |||||
| static void OPNAME ## rv40_chroma_mc4_c(uint8_t *dst /*align 8*/,\ | |||||
| uint8_t *src /*align 1*/,\ | |||||
| ptrdiff_t stride, int h, int x, int y)\ | |||||
| {\ | |||||
| const int A = (8-x) * (8-y);\ | const int A = (8-x) * (8-y);\ | ||||
| const int B = ( x) * (8-y);\ | const int B = ( x) * (8-y);\ | ||||
| const int C = (8-x) * ( y);\ | const int C = (8-x) * ( y);\ | ||||
| @@ -312,7 +315,7 @@ static void OPNAME ## rv40_chroma_mc4_c(uint8_t *dst/*align 8*/, uint8_t *src/*a | |||||
| }\ | }\ | ||||
| }else{\ | }else{\ | ||||
| const int E = B + C;\ | const int E = B + C;\ | ||||
| const int step = C ? stride : 1;\ | |||||
| const ptrdiff_t step = C ? stride : 1;\ | |||||
| for(i = 0; i < h; i++){\ | for(i = 0; i < h; i++){\ | ||||
| OP(dst[0], (A*src[0] + E*src[step+0] + bias));\ | OP(dst[0], (A*src[0] + E*src[step+0] + bias));\ | ||||
| OP(dst[1], (A*src[1] + E*src[step+1] + bias));\ | OP(dst[1], (A*src[1] + E*src[step+1] + bias));\ | ||||
| @@ -324,7 +327,10 @@ static void OPNAME ## rv40_chroma_mc4_c(uint8_t *dst/*align 8*/, uint8_t *src/*a | |||||
| }\ | }\ | ||||
| }\ | }\ | ||||
| \ | \ | ||||
| static void OPNAME ## rv40_chroma_mc8_c(uint8_t *dst/*align 8*/, uint8_t *src/*align 1*/, int stride, int h, int x, int y){\ | |||||
| static void OPNAME ## rv40_chroma_mc8_c(uint8_t *dst/*align 8*/,\ | |||||
| uint8_t *src/*align 1*/,\ | |||||
| ptrdiff_t stride, int h, int x, int y)\ | |||||
| {\ | |||||
| const int A = (8-x) * (8-y);\ | const int A = (8-x) * (8-y);\ | ||||
| const int B = ( x) * (8-y);\ | const int B = ( x) * (8-y);\ | ||||
| const int C = (8-x) * ( y);\ | const int C = (8-x) * ( y);\ | ||||
| @@ -349,7 +355,7 @@ static void OPNAME ## rv40_chroma_mc8_c(uint8_t *dst/*align 8*/, uint8_t *src/*a | |||||
| }\ | }\ | ||||
| }else{\ | }else{\ | ||||
| const int E = B + C;\ | const int E = B + C;\ | ||||
| const int step = C ? stride : 1;\ | |||||
| const ptrdiff_t step = C ? stride : 1;\ | |||||
| for(i = 0; i < h; i++){\ | for(i = 0; i < h; i++){\ | ||||
| OP(dst[0], (A*src[0] + E*src[step+0] + bias));\ | OP(dst[0], (A*src[0] + E*src[step+0] + bias));\ | ||||
| OP(dst[1], (A*src[1] + E*src[step+1] + bias));\ | OP(dst[1], (A*src[1] + E*src[step+1] + bias));\ | ||||
| @@ -700,7 +700,7 @@ static void avg_vc1_mspel_mc00_c(uint8_t *dst, const uint8_t *src, | |||||
| C * src[stride + a] + D * src[stride + a + 1] + 32 - 4) >> 6) | C * src[stride + a] + D * src[stride + a + 1] + 32 - 4) >> 6) | ||||
| static void put_no_rnd_vc1_chroma_mc8_c(uint8_t *dst /* align 8 */, | static void put_no_rnd_vc1_chroma_mc8_c(uint8_t *dst /* align 8 */, | ||||
| uint8_t *src /* align 1 */, | uint8_t *src /* align 1 */, | ||||
| int stride, int h, int x, int y) | |||||
| ptrdiff_t stride, int h, int x, int y) | |||||
| { | { | ||||
| const int A = (8 - x) * (8 - y); | const int A = (8 - x) * (8 - y); | ||||
| const int B = (x) * (8 - y); | const int B = (x) * (8 - y); | ||||
| @@ -725,7 +725,7 @@ static void put_no_rnd_vc1_chroma_mc8_c(uint8_t *dst /* align 8 */, | |||||
| } | } | ||||
| static void put_no_rnd_vc1_chroma_mc4_c(uint8_t *dst, uint8_t *src, | static void put_no_rnd_vc1_chroma_mc4_c(uint8_t *dst, uint8_t *src, | ||||
| int stride, int h, int x, int y) | |||||
| ptrdiff_t stride, int h, int x, int y) | |||||
| { | { | ||||
| const int A = (8 - x) * (8 - y); | const int A = (8 - x) * (8 - y); | ||||
| const int B = (x) * (8 - y); | const int B = (x) * (8 - y); | ||||
| @@ -748,7 +748,7 @@ static void put_no_rnd_vc1_chroma_mc4_c(uint8_t *dst, uint8_t *src, | |||||
| #define avg2(a, b) (((a) + (b) + 1) >> 1) | #define avg2(a, b) (((a) + (b) + 1) >> 1) | ||||
| static void avg_no_rnd_vc1_chroma_mc8_c(uint8_t *dst /* align 8 */, | static void avg_no_rnd_vc1_chroma_mc8_c(uint8_t *dst /* align 8 */, | ||||
| uint8_t *src /* align 1 */, | uint8_t *src /* align 1 */, | ||||
| int stride, int h, int x, int y) | |||||
| ptrdiff_t stride, int h, int x, int y) | |||||
| { | { | ||||
| const int A = (8 - x) * (8 - y); | const int A = (8 - x) * (8 - y); | ||||
| const int B = (x) * (8 - y); | const int B = (x) * (8 - y); | ||||
| @@ -774,7 +774,7 @@ static void avg_no_rnd_vc1_chroma_mc8_c(uint8_t *dst /* align 8 */, | |||||
| static void avg_no_rnd_vc1_chroma_mc4_c(uint8_t *dst /* align 8 */, | static void avg_no_rnd_vc1_chroma_mc4_c(uint8_t *dst /* align 8 */, | ||||
| uint8_t *src /* align 1 */, | uint8_t *src /* align 1 */, | ||||
| int stride, int h, int x, int y) | |||||
| ptrdiff_t stride, int h, int x, int y) | |||||
| { | { | ||||
| const int A = (8 - x) * (8 - y); | const int A = (8 - x) * (8 - y); | ||||
| const int B = ( x) * (8 - y); | const int B = ( x) * (8 - y); | ||||
| @@ -105,11 +105,8 @@ SECTION .text | |||||
| %endif ; rv40 | %endif ; rv40 | ||||
| ; void ff_put/avg_h264_chroma_mc8_*(uint8_t *dst /* align 8 */, | ; void ff_put/avg_h264_chroma_mc8_*(uint8_t *dst /* align 8 */, | ||||
| ; uint8_t *src /* align 1 */, | ; uint8_t *src /* align 1 */, | ||||
| ; int stride, int h, int mx, int my) | |||||
| ; ptrdiff_t stride, int h, int mx, int my) | |||||
| cglobal %1_%2_chroma_mc8%3, 6, 7 + extra_regs, 0 | cglobal %1_%2_chroma_mc8%3, 6, 7 + extra_regs, 0 | ||||
| %if ARCH_X86_64 | |||||
| movsxd r2, r2d | |||||
| %endif | |||||
| mov r6d, r5d | mov r6d, r5d | ||||
| or r6d, r4d | or r6d, r4d | ||||
| jne .at_least_one_non_zero | jne .at_least_one_non_zero | ||||
| @@ -291,9 +288,6 @@ cglobal %1_%2_chroma_mc8%3, 6, 7 + extra_regs, 0 | |||||
| %endif ; PIC | %endif ; PIC | ||||
| %endif ; rv40 | %endif ; rv40 | ||||
| cglobal %1_%2_chroma_mc4, 6, 6 + extra_regs, 0 | cglobal %1_%2_chroma_mc4, 6, 6 + extra_regs, 0 | ||||
| %if ARCH_X86_64 | |||||
| movsxd r2, r2d | |||||
| %endif | |||||
| pxor m7, m7 | pxor m7, m7 | ||||
| movd m2, r4d ; x | movd m2, r4d ; x | ||||
| movd m3, r5d ; y | movd m3, r5d ; y | ||||
| @@ -376,10 +370,6 @@ cglobal %1_%2_chroma_mc4, 6, 6 + extra_regs, 0 | |||||
| %macro chroma_mc2_mmx_func 2 | %macro chroma_mc2_mmx_func 2 | ||||
| cglobal %1_%2_chroma_mc2, 6, 7, 0 | cglobal %1_%2_chroma_mc2, 6, 7, 0 | ||||
| %if ARCH_X86_64 | |||||
| movsxd r2, r2d | |||||
| %endif | |||||
| mov r6d, r4d | mov r6d, r4d | ||||
| shl r4d, 16 | shl r4d, 16 | ||||
| sub r4d, r6d | sub r4d, r6d | ||||
| @@ -465,9 +455,6 @@ chroma_mc4_mmx_func avg, rv40 | |||||
| %macro chroma_mc8_ssse3_func 2-3 | %macro chroma_mc8_ssse3_func 2-3 | ||||
| cglobal %1_%2_chroma_mc8%3, 6, 7, 8 | cglobal %1_%2_chroma_mc8%3, 6, 7, 8 | ||||
| %if ARCH_X86_64 | |||||
| movsxd r2, r2d | |||||
| %endif | |||||
| mov r6d, r5d | mov r6d, r5d | ||||
| or r6d, r4d | or r6d, r4d | ||||
| jne .at_least_one_non_zero | jne .at_least_one_non_zero | ||||
| @@ -613,9 +600,6 @@ cglobal %1_%2_chroma_mc8%3, 6, 7, 8 | |||||
| %macro chroma_mc4_ssse3_func 2 | %macro chroma_mc4_ssse3_func 2 | ||||
| cglobal %1_%2_chroma_mc4, 6, 7, 0 | cglobal %1_%2_chroma_mc4, 6, 7, 0 | ||||
| %if ARCH_X86_64 | |||||
| movsxd r2, r2d | |||||
| %endif | |||||
| mov r6, r4 | mov r6, r4 | ||||
| shl r4d, 8 | shl r4d, 8 | ||||
| sub r4d, r6d | sub r4d, r6d | ||||
| @@ -57,12 +57,11 @@ SECTION .text | |||||
| %endmacro | %endmacro | ||||
| ;----------------------------------------------------------------------------- | ;----------------------------------------------------------------------------- | ||||
| ; void ff_put/avg_h264_chroma_mc8(pixel *dst, pixel *src, int stride, int h, | |||||
| ; int mx, int my) | |||||
| ; void ff_put/avg_h264_chroma_mc8(pixel *dst, pixel *src, ptrdiff_t stride, | |||||
| ; int h, int mx, int my) | |||||
| ;----------------------------------------------------------------------------- | ;----------------------------------------------------------------------------- | ||||
| %macro CHROMA_MC8 1 | %macro CHROMA_MC8 1 | ||||
| cglobal %1_h264_chroma_mc8_10, 6,7,8 | cglobal %1_h264_chroma_mc8_10, 6,7,8 | ||||
| movsxdifnidn r2, r2d | |||||
| mov r6d, r5d | mov r6d, r5d | ||||
| or r6d, r4d | or r6d, r4d | ||||
| jne .at_least_one_non_zero | jne .at_least_one_non_zero | ||||
| @@ -149,8 +148,8 @@ cglobal %1_h264_chroma_mc8_10, 6,7,8 | |||||
| %endmacro | %endmacro | ||||
| ;----------------------------------------------------------------------------- | ;----------------------------------------------------------------------------- | ||||
| ; void ff_put/avg_h264_chroma_mc4(pixel *dst, pixel *src, int stride, int h, | |||||
| ; int mx, int my) | |||||
| ; void ff_put/avg_h264_chroma_mc4(pixel *dst, pixel *src, ptrdiff_t stride, | |||||
| ; int h, int mx, int my) | |||||
| ;----------------------------------------------------------------------------- | ;----------------------------------------------------------------------------- | ||||
| ;TODO: xmm mc4 | ;TODO: xmm mc4 | ||||
| %macro MC4_OP 2 | %macro MC4_OP 2 | ||||
| @@ -174,7 +173,6 @@ cglobal %1_h264_chroma_mc8_10, 6,7,8 | |||||
| %macro CHROMA_MC4 1 | %macro CHROMA_MC4 1 | ||||
| cglobal %1_h264_chroma_mc4_10, 6,6,7 | cglobal %1_h264_chroma_mc4_10, 6,6,7 | ||||
| movsxdifnidn r2, r2d | |||||
| movd m2, r4m ; x | movd m2, r4m ; x | ||||
| movd m3, r5m ; y | movd m3, r5m ; y | ||||
| mova m4, [pw_8] | mova m4, [pw_8] | ||||
| @@ -200,12 +198,11 @@ cglobal %1_h264_chroma_mc4_10, 6,6,7 | |||||
| %endmacro | %endmacro | ||||
| ;----------------------------------------------------------------------------- | ;----------------------------------------------------------------------------- | ||||
| ; void ff_put/avg_h264_chroma_mc2(pixel *dst, pixel *src, int stride, int h, | |||||
| ; int mx, int my) | |||||
| ; void ff_put/avg_h264_chroma_mc2(pixel *dst, pixel *src, ptrdiff_t stride, | |||||
| ; int h, int mx, int my) | |||||
| ;----------------------------------------------------------------------------- | ;----------------------------------------------------------------------------- | ||||
| %macro CHROMA_MC2 1 | %macro CHROMA_MC2 1 | ||||
| cglobal %1_h264_chroma_mc2_10, 6,7 | cglobal %1_h264_chroma_mc2_10, 6,7 | ||||
| movsxdifnidn r2, r2d | |||||
| mov r6d, r4d | mov r6d, r4d | ||||
| shl r4d, 16 | shl r4d, 16 | ||||
| sub r4d, r6d | sub r4d, r6d | ||||
| @@ -25,38 +25,38 @@ | |||||
| #include "libavcodec/h264chroma.h" | #include "libavcodec/h264chroma.h" | ||||
| void ff_put_h264_chroma_mc8_rnd_mmx (uint8_t *dst, uint8_t *src, | void ff_put_h264_chroma_mc8_rnd_mmx (uint8_t *dst, uint8_t *src, | ||||
| int stride, int h, int x, int y); | |||||
| ptrdiff_t stride, int h, int x, int y); | |||||
| void ff_avg_h264_chroma_mc8_rnd_mmxext(uint8_t *dst, uint8_t *src, | void ff_avg_h264_chroma_mc8_rnd_mmxext(uint8_t *dst, uint8_t *src, | ||||
| int stride, int h, int x, int y); | |||||
| ptrdiff_t stride, int h, int x, int y); | |||||
| void ff_avg_h264_chroma_mc8_rnd_3dnow(uint8_t *dst, uint8_t *src, | void ff_avg_h264_chroma_mc8_rnd_3dnow(uint8_t *dst, uint8_t *src, | ||||
| int stride, int h, int x, int y); | |||||
| ptrdiff_t stride, int h, int x, int y); | |||||
| void ff_put_h264_chroma_mc4_mmx (uint8_t *dst, uint8_t *src, | void ff_put_h264_chroma_mc4_mmx (uint8_t *dst, uint8_t *src, | ||||
| int stride, int h, int x, int y); | |||||
| ptrdiff_t stride, int h, int x, int y); | |||||
| void ff_avg_h264_chroma_mc4_mmxext (uint8_t *dst, uint8_t *src, | void ff_avg_h264_chroma_mc4_mmxext (uint8_t *dst, uint8_t *src, | ||||
| int stride, int h, int x, int y); | |||||
| ptrdiff_t stride, int h, int x, int y); | |||||
| void ff_avg_h264_chroma_mc4_3dnow (uint8_t *dst, uint8_t *src, | void ff_avg_h264_chroma_mc4_3dnow (uint8_t *dst, uint8_t *src, | ||||
| int stride, int h, int x, int y); | |||||
| ptrdiff_t stride, int h, int x, int y); | |||||
| void ff_put_h264_chroma_mc2_mmxext (uint8_t *dst, uint8_t *src, | void ff_put_h264_chroma_mc2_mmxext (uint8_t *dst, uint8_t *src, | ||||
| int stride, int h, int x, int y); | |||||
| ptrdiff_t stride, int h, int x, int y); | |||||
| void ff_avg_h264_chroma_mc2_mmxext (uint8_t *dst, uint8_t *src, | void ff_avg_h264_chroma_mc2_mmxext (uint8_t *dst, uint8_t *src, | ||||
| int stride, int h, int x, int y); | |||||
| ptrdiff_t stride, int h, int x, int y); | |||||
| void ff_put_h264_chroma_mc8_rnd_ssse3(uint8_t *dst, uint8_t *src, | void ff_put_h264_chroma_mc8_rnd_ssse3(uint8_t *dst, uint8_t *src, | ||||
| int stride, int h, int x, int y); | |||||
| ptrdiff_t stride, int h, int x, int y); | |||||
| void ff_put_h264_chroma_mc4_ssse3 (uint8_t *dst, uint8_t *src, | void ff_put_h264_chroma_mc4_ssse3 (uint8_t *dst, uint8_t *src, | ||||
| int stride, int h, int x, int y); | |||||
| ptrdiff_t stride, int h, int x, int y); | |||||
| void ff_avg_h264_chroma_mc8_rnd_ssse3(uint8_t *dst, uint8_t *src, | void ff_avg_h264_chroma_mc8_rnd_ssse3(uint8_t *dst, uint8_t *src, | ||||
| int stride, int h, int x, int y); | |||||
| ptrdiff_t stride, int h, int x, int y); | |||||
| void ff_avg_h264_chroma_mc4_ssse3 (uint8_t *dst, uint8_t *src, | void ff_avg_h264_chroma_mc4_ssse3 (uint8_t *dst, uint8_t *src, | ||||
| int stride, int h, int x, int y); | |||||
| ptrdiff_t stride, int h, int x, int y); | |||||
| #define CHROMA_MC(OP, NUM, DEPTH, OPT) \ | #define CHROMA_MC(OP, NUM, DEPTH, OPT) \ | ||||
| void ff_ ## OP ## _h264_chroma_mc ## NUM ## _ ## DEPTH ## _ ## OPT \ | void ff_ ## OP ## _h264_chroma_mc ## NUM ## _ ## DEPTH ## _ ## OPT \ | ||||
| (uint8_t *dst, uint8_t *src, \ | (uint8_t *dst, uint8_t *src, \ | ||||
| int stride, int h, int x, int y); | |||||
| ptrdiff_t stride, int h, int x, int y); | |||||
| CHROMA_MC(put, 2, 10, mmxext) | CHROMA_MC(put, 2, 10, mmxext) | ||||
| CHROMA_MC(avg, 2, 10, mmxext) | CHROMA_MC(avg, 2, 10, mmxext) | ||||
| @@ -34,18 +34,18 @@ | |||||
| #if HAVE_YASM | #if HAVE_YASM | ||||
| void ff_put_rv40_chroma_mc8_mmx (uint8_t *dst, uint8_t *src, | void ff_put_rv40_chroma_mc8_mmx (uint8_t *dst, uint8_t *src, | ||||
| int stride, int h, int x, int y); | |||||
| ptrdiff_t stride, int h, int x, int y); | |||||
| void ff_avg_rv40_chroma_mc8_mmxext(uint8_t *dst, uint8_t *src, | void ff_avg_rv40_chroma_mc8_mmxext(uint8_t *dst, uint8_t *src, | ||||
| int stride, int h, int x, int y); | |||||
| ptrdiff_t stride, int h, int x, int y); | |||||
| void ff_avg_rv40_chroma_mc8_3dnow(uint8_t *dst, uint8_t *src, | void ff_avg_rv40_chroma_mc8_3dnow(uint8_t *dst, uint8_t *src, | ||||
| int stride, int h, int x, int y); | |||||
| ptrdiff_t stride, int h, int x, int y); | |||||
| void ff_put_rv40_chroma_mc4_mmx (uint8_t *dst, uint8_t *src, | void ff_put_rv40_chroma_mc4_mmx (uint8_t *dst, uint8_t *src, | ||||
| int stride, int h, int x, int y); | |||||
| ptrdiff_t stride, int h, int x, int y); | |||||
| void ff_avg_rv40_chroma_mc4_mmxext(uint8_t *dst, uint8_t *src, | void ff_avg_rv40_chroma_mc4_mmxext(uint8_t *dst, uint8_t *src, | ||||
| int stride, int h, int x, int y); | |||||
| ptrdiff_t stride, int h, int x, int y); | |||||
| void ff_avg_rv40_chroma_mc4_3dnow(uint8_t *dst, uint8_t *src, | void ff_avg_rv40_chroma_mc4_3dnow(uint8_t *dst, uint8_t *src, | ||||
| int stride, int h, int x, int y); | |||||
| ptrdiff_t stride, int h, int x, int y); | |||||
| #define DECLARE_WEIGHT(opt) \ | #define DECLARE_WEIGHT(opt) \ | ||||
| void ff_rv40_weight_func_rnd_16_##opt(uint8_t *dst, uint8_t *src1, uint8_t *src2, \ | void ff_rv40_weight_func_rnd_16_##opt(uint8_t *dst, uint8_t *src1, uint8_t *src2, \ | ||||
| @@ -71,15 +71,15 @@ static void avg_vc1_mspel_mc00_mmxext(uint8_t *dst, const uint8_t *src, | |||||
| #endif /* HAVE_YASM */ | #endif /* HAVE_YASM */ | ||||
| void ff_put_vc1_chroma_mc8_nornd_mmx (uint8_t *dst, uint8_t *src, | void ff_put_vc1_chroma_mc8_nornd_mmx (uint8_t *dst, uint8_t *src, | ||||
| int stride, int h, int x, int y); | |||||
| ptrdiff_t stride, int h, int x, int y); | |||||
| void ff_avg_vc1_chroma_mc8_nornd_mmxext(uint8_t *dst, uint8_t *src, | void ff_avg_vc1_chroma_mc8_nornd_mmxext(uint8_t *dst, uint8_t *src, | ||||
| int stride, int h, int x, int y); | |||||
| ptrdiff_t stride, int h, int x, int y); | |||||
| void ff_avg_vc1_chroma_mc8_nornd_3dnow(uint8_t *dst, uint8_t *src, | void ff_avg_vc1_chroma_mc8_nornd_3dnow(uint8_t *dst, uint8_t *src, | ||||
| int stride, int h, int x, int y); | |||||
| ptrdiff_t stride, int h, int x, int y); | |||||
| void ff_put_vc1_chroma_mc8_nornd_ssse3(uint8_t *dst, uint8_t *src, | void ff_put_vc1_chroma_mc8_nornd_ssse3(uint8_t *dst, uint8_t *src, | ||||
| int stride, int h, int x, int y); | |||||
| ptrdiff_t stride, int h, int x, int y); | |||||
| void ff_avg_vc1_chroma_mc8_nornd_ssse3(uint8_t *dst, uint8_t *src, | void ff_avg_vc1_chroma_mc8_nornd_ssse3(uint8_t *dst, uint8_t *src, | ||||
| int stride, int h, int x, int y); | |||||
| ptrdiff_t stride, int h, int x, int y); | |||||
| av_cold void ff_vc1dsp_init_x86(VC1DSPContext *dsp) | av_cold void ff_vc1dsp_init_x86(VC1DSPContext *dsp) | ||||