On 64bit platforms with 32bit int, this means we won't have to sign- extend the integer anymore.tags/n0.11
@@ -22,38 +22,38 @@ | |||||
void ff_vp8_luma_dc_wht_neon(DCTELEM block[4][4][16], DCTELEM dc[16]); | void ff_vp8_luma_dc_wht_neon(DCTELEM block[4][4][16], DCTELEM dc[16]); | ||||
void ff_vp8_luma_dc_wht_dc_neon(DCTELEM block[4][4][16], DCTELEM dc[16]); | void ff_vp8_luma_dc_wht_dc_neon(DCTELEM block[4][4][16], DCTELEM dc[16]); | ||||
void ff_vp8_idct_add_neon(uint8_t *dst, DCTELEM block[16], int stride); | |||||
void ff_vp8_idct_dc_add_neon(uint8_t *dst, DCTELEM block[16], int stride); | |||||
void ff_vp8_idct_dc_add4y_neon(uint8_t *dst, DCTELEM block[4][16], int stride); | |||||
void ff_vp8_idct_dc_add4uv_neon(uint8_t *dst, DCTELEM block[4][16], int stride); | |||||
void ff_vp8_idct_add_neon(uint8_t *dst, DCTELEM block[16], ptrdiff_t stride); | |||||
void ff_vp8_idct_dc_add_neon(uint8_t *dst, DCTELEM block[16], ptrdiff_t stride); | |||||
void ff_vp8_idct_dc_add4y_neon(uint8_t *dst, DCTELEM block[4][16], ptrdiff_t stride); | |||||
void ff_vp8_idct_dc_add4uv_neon(uint8_t *dst, DCTELEM block[4][16], ptrdiff_t stride); | |||||
void ff_vp8_v_loop_filter16_neon(uint8_t *dst, int stride, | |||||
void ff_vp8_v_loop_filter16_neon(uint8_t *dst, ptrdiff_t stride, | |||||
int flim_E, int flim_I, int hev_thresh); | int flim_E, int flim_I, int hev_thresh); | ||||
void ff_vp8_h_loop_filter16_neon(uint8_t *dst, int stride, | |||||
void ff_vp8_h_loop_filter16_neon(uint8_t *dst, ptrdiff_t stride, | |||||
int flim_E, int flim_I, int hev_thresh); | int flim_E, int flim_I, int hev_thresh); | ||||
void ff_vp8_v_loop_filter8uv_neon(uint8_t *dstU, uint8_t *dstV, int stride, | |||||
void ff_vp8_v_loop_filter8uv_neon(uint8_t *dstU, uint8_t *dstV, ptrdiff_t stride, | |||||
int flim_E, int flim_I, int hev_thresh); | int flim_E, int flim_I, int hev_thresh); | ||||
void ff_vp8_h_loop_filter8uv_neon(uint8_t *dstU, uint8_t *dstV, int stride, | |||||
void ff_vp8_h_loop_filter8uv_neon(uint8_t *dstU, uint8_t *dstV, ptrdiff_t stride, | |||||
int flim_E, int flim_I, int hev_thresh); | int flim_E, int flim_I, int hev_thresh); | ||||
void ff_vp8_v_loop_filter16_inner_neon(uint8_t *dst, int stride, | |||||
void ff_vp8_v_loop_filter16_inner_neon(uint8_t *dst, ptrdiff_t stride, | |||||
int flim_E, int flim_I, int hev_thresh); | int flim_E, int flim_I, int hev_thresh); | ||||
void ff_vp8_h_loop_filter16_inner_neon(uint8_t *dst, int stride, | |||||
void ff_vp8_h_loop_filter16_inner_neon(uint8_t *dst, ptrdiff_t stride, | |||||
int flim_E, int flim_I, int hev_thresh); | int flim_E, int flim_I, int hev_thresh); | ||||
void ff_vp8_v_loop_filter8uv_inner_neon(uint8_t *dstU, uint8_t *dstV, | void ff_vp8_v_loop_filter8uv_inner_neon(uint8_t *dstU, uint8_t *dstV, | ||||
int stride, int flim_E, int flim_I, | |||||
ptrdiff_t stride, int flim_E, int flim_I, | |||||
int hev_thresh); | int hev_thresh); | ||||
void ff_vp8_h_loop_filter8uv_inner_neon(uint8_t *dstU, uint8_t *dstV, | void ff_vp8_h_loop_filter8uv_inner_neon(uint8_t *dstU, uint8_t *dstV, | ||||
int stride, int flim_E, int flim_I, | |||||
ptrdiff_t stride, int flim_E, int flim_I, | |||||
int hev_thresh); | int hev_thresh); | ||||
void ff_vp8_v_loop_filter16_simple_neon(uint8_t *dst, int stride, int flim); | |||||
void ff_vp8_h_loop_filter16_simple_neon(uint8_t *dst, int stride, int flim); | |||||
void ff_vp8_v_loop_filter16_simple_neon(uint8_t *dst, ptrdiff_t stride, int flim); | |||||
void ff_vp8_h_loop_filter16_simple_neon(uint8_t *dst, ptrdiff_t stride, int flim); | |||||
#define VP8_MC(n) \ | #define VP8_MC(n) \ | ||||
void ff_put_vp8_##n##_neon(uint8_t *dst, int dststride, \ | |||||
uint8_t *src, int srcstride, \ | |||||
void ff_put_vp8_##n##_neon(uint8_t *dst, ptrdiff_t dststride, \ | |||||
uint8_t *src, ptrdiff_t srcstride, \ | |||||
int h, int x, int y) | int h, int x, int y) | ||||
#define VP8_EPEL(w) \ | #define VP8_EPEL(w) \ | ||||
@@ -75,8 +75,8 @@ static const vec_s8 h_subpel_filters_outer[3] = | |||||
dstv = vec_sra(dstv, c7) | dstv = vec_sra(dstv, c7) | ||||
static av_always_inline | static av_always_inline | ||||
void put_vp8_epel_h_altivec_core(uint8_t *dst, int dst_stride, | |||||
uint8_t *src, int src_stride, | |||||
void put_vp8_epel_h_altivec_core(uint8_t *dst, ptrdiff_t dst_stride, | |||||
uint8_t *src, ptrdiff_t src_stride, | |||||
int h, int mx, int w, int is6tap) | int h, int mx, int w, int is6tap) | ||||
{ | { | ||||
LOAD_H_SUBPEL_FILTER(mx-1); | LOAD_H_SUBPEL_FILTER(mx-1); | ||||
@@ -161,8 +161,8 @@ static const vec_u8 v_subpel_filters[7] = | |||||
dstv = vec_sra(dstv, c7) | dstv = vec_sra(dstv, c7) | ||||
static av_always_inline | static av_always_inline | ||||
void put_vp8_epel_v_altivec_core(uint8_t *dst, int dst_stride, | |||||
uint8_t *src, int src_stride, | |||||
void put_vp8_epel_v_altivec_core(uint8_t *dst, ptrdiff_t dst_stride, | |||||
uint8_t *src, ptrdiff_t src_stride, | |||||
int h, int my, int w, int is6tap) | int h, int my, int w, int is6tap) | ||||
{ | { | ||||
LOAD_V_SUBPEL_FILTER(my-1); | LOAD_V_SUBPEL_FILTER(my-1); | ||||
@@ -226,19 +226,19 @@ void put_vp8_epel_v_altivec_core(uint8_t *dst, int dst_stride, | |||||
#define EPEL_FUNCS(WIDTH, TAPS) \ | #define EPEL_FUNCS(WIDTH, TAPS) \ | ||||
static av_noinline \ | static av_noinline \ | ||||
void put_vp8_epel ## WIDTH ## _h ## TAPS ## _altivec(uint8_t *dst, int dst_stride, uint8_t *src, int src_stride, int h, int mx, int my) \ | |||||
void put_vp8_epel ## WIDTH ## _h ## TAPS ## _altivec(uint8_t *dst, ptrdiff_t dst_stride, uint8_t *src, ptrdiff_t src_stride, int h, int mx, int my) \ | |||||
{ \ | { \ | ||||
put_vp8_epel_h_altivec_core(dst, dst_stride, src, src_stride, h, mx, WIDTH, TAPS == 6); \ | put_vp8_epel_h_altivec_core(dst, dst_stride, src, src_stride, h, mx, WIDTH, TAPS == 6); \ | ||||
} \ | } \ | ||||
\ | \ | ||||
static av_noinline \ | static av_noinline \ | ||||
void put_vp8_epel ## WIDTH ## _v ## TAPS ## _altivec(uint8_t *dst, int dst_stride, uint8_t *src, int src_stride, int h, int mx, int my) \ | |||||
void put_vp8_epel ## WIDTH ## _v ## TAPS ## _altivec(uint8_t *dst, ptrdiff_t dst_stride, uint8_t *src, ptrdiff_t src_stride, int h, int mx, int my) \ | |||||
{ \ | { \ | ||||
put_vp8_epel_v_altivec_core(dst, dst_stride, src, src_stride, h, my, WIDTH, TAPS == 6); \ | put_vp8_epel_v_altivec_core(dst, dst_stride, src, src_stride, h, my, WIDTH, TAPS == 6); \ | ||||
} | } | ||||
#define EPEL_HV(WIDTH, HTAPS, VTAPS) \ | #define EPEL_HV(WIDTH, HTAPS, VTAPS) \ | ||||
static void put_vp8_epel ## WIDTH ## _h ## HTAPS ## v ## VTAPS ## _altivec(uint8_t *dst, int stride, uint8_t *src, int s, int h, int mx, int my) \ | |||||
static void put_vp8_epel ## WIDTH ## _h ## HTAPS ## v ## VTAPS ## _altivec(uint8_t *dst, ptrdiff_t stride, uint8_t *src, ptrdiff_t s, int h, int mx, int my) \ | |||||
{ \ | { \ | ||||
DECLARE_ALIGNED(16, uint8_t, tmp)[(2*WIDTH+5)*16]; \ | DECLARE_ALIGNED(16, uint8_t, tmp)[(2*WIDTH+5)*16]; \ | ||||
if (VTAPS == 6) { \ | if (VTAPS == 6) { \ | ||||
@@ -266,7 +266,7 @@ EPEL_HV(4, 4,6) | |||||
EPEL_HV(4, 6,4) | EPEL_HV(4, 6,4) | ||||
EPEL_HV(4, 4,4) | EPEL_HV(4, 4,4) | ||||
static void put_vp8_pixels16_altivec(uint8_t *dst, int stride, uint8_t *src, int s, int h, int mx, int my) | |||||
static void put_vp8_pixels16_altivec(uint8_t *dst, ptrdiff_t stride, uint8_t *src, ptrdiff_t s, int h, int mx, int my) | |||||
{ | { | ||||
ff_put_pixels16_altivec(dst, src, stride, h); | ff_put_pixels16_altivec(dst, src, stride, h); | ||||
} | } | ||||
@@ -77,7 +77,7 @@ static void vp8_luma_dc_wht_dc_c(DCTELEM block[4][4][16], DCTELEM dc[16]) | |||||
#define MUL_20091(a) ((((a)*20091) >> 16) + (a)) | #define MUL_20091(a) ((((a)*20091) >> 16) + (a)) | ||||
#define MUL_35468(a) (((a)*35468) >> 16) | #define MUL_35468(a) (((a)*35468) >> 16) | ||||
static void vp8_idct_add_c(uint8_t *dst, DCTELEM block[16], int stride) | |||||
static void vp8_idct_add_c(uint8_t *dst, DCTELEM block[16], ptrdiff_t stride) | |||||
{ | { | ||||
int i, t0, t1, t2, t3; | int i, t0, t1, t2, t3; | ||||
uint8_t *cm = ff_cropTbl + MAX_NEG_CROP; | uint8_t *cm = ff_cropTbl + MAX_NEG_CROP; | ||||
@@ -113,7 +113,7 @@ static void vp8_idct_add_c(uint8_t *dst, DCTELEM block[16], int stride) | |||||
} | } | ||||
} | } | ||||
static void vp8_idct_dc_add_c(uint8_t *dst, DCTELEM block[16], int stride) | |||||
static void vp8_idct_dc_add_c(uint8_t *dst, DCTELEM block[16], ptrdiff_t stride) | |||||
{ | { | ||||
int i, dc = (block[0] + 4) >> 3; | int i, dc = (block[0] + 4) >> 3; | ||||
uint8_t *cm = ff_cropTbl + MAX_NEG_CROP + dc; | uint8_t *cm = ff_cropTbl + MAX_NEG_CROP + dc; | ||||
@@ -128,7 +128,7 @@ static void vp8_idct_dc_add_c(uint8_t *dst, DCTELEM block[16], int stride) | |||||
} | } | ||||
} | } | ||||
static void vp8_idct_dc_add4uv_c(uint8_t *dst, DCTELEM block[4][16], int stride) | |||||
static void vp8_idct_dc_add4uv_c(uint8_t *dst, DCTELEM block[4][16], ptrdiff_t stride) | |||||
{ | { | ||||
vp8_idct_dc_add_c(dst+stride*0+0, block[0], stride); | vp8_idct_dc_add_c(dst+stride*0+0, block[0], stride); | ||||
vp8_idct_dc_add_c(dst+stride*0+4, block[1], stride); | vp8_idct_dc_add_c(dst+stride*0+4, block[1], stride); | ||||
@@ -136,7 +136,7 @@ static void vp8_idct_dc_add4uv_c(uint8_t *dst, DCTELEM block[4][16], int stride) | |||||
vp8_idct_dc_add_c(dst+stride*4+4, block[3], stride); | vp8_idct_dc_add_c(dst+stride*4+4, block[3], stride); | ||||
} | } | ||||
static void vp8_idct_dc_add4y_c(uint8_t *dst, DCTELEM block[4][16], int stride) | |||||
static void vp8_idct_dc_add4y_c(uint8_t *dst, DCTELEM block[4][16], ptrdiff_t stride) | |||||
{ | { | ||||
vp8_idct_dc_add_c(dst+ 0, block[0], stride); | vp8_idct_dc_add_c(dst+ 0, block[0], stride); | ||||
vp8_idct_dc_add_c(dst+ 4, block[1], stride); | vp8_idct_dc_add_c(dst+ 4, block[1], stride); | ||||
@@ -157,7 +157,7 @@ static void vp8_idct_dc_add4y_c(uint8_t *dst, DCTELEM block[4][16], int stride) | |||||
#define clip_int8(n) (cm[n+0x80]-0x80) | #define clip_int8(n) (cm[n+0x80]-0x80) | ||||
static av_always_inline void filter_common(uint8_t *p, int stride, int is4tap) | |||||
static av_always_inline void filter_common(uint8_t *p, ptrdiff_t stride, int is4tap) | |||||
{ | { | ||||
LOAD_PIXELS | LOAD_PIXELS | ||||
int a, f1, f2; | int a, f1, f2; | ||||
@@ -188,7 +188,7 @@ static av_always_inline void filter_common(uint8_t *p, int stride, int is4tap) | |||||
} | } | ||||
} | } | ||||
static av_always_inline int simple_limit(uint8_t *p, int stride, int flim) | |||||
static av_always_inline int simple_limit(uint8_t *p, ptrdiff_t stride, int flim) | |||||
{ | { | ||||
LOAD_PIXELS | LOAD_PIXELS | ||||
return 2*FFABS(p0-q0) + (FFABS(p1-q1) >> 1) <= flim; | return 2*FFABS(p0-q0) + (FFABS(p1-q1) >> 1) <= flim; | ||||
@@ -198,7 +198,7 @@ static av_always_inline int simple_limit(uint8_t *p, int stride, int flim) | |||||
* E - limit at the macroblock edge | * E - limit at the macroblock edge | ||||
* I - limit for interior difference | * I - limit for interior difference | ||||
*/ | */ | ||||
static av_always_inline int normal_limit(uint8_t *p, int stride, int E, int I) | |||||
static av_always_inline int normal_limit(uint8_t *p, ptrdiff_t stride, int E, int I) | |||||
{ | { | ||||
LOAD_PIXELS | LOAD_PIXELS | ||||
return simple_limit(p, stride, E) | return simple_limit(p, stride, E) | ||||
@@ -207,13 +207,13 @@ static av_always_inline int normal_limit(uint8_t *p, int stride, int E, int I) | |||||
} | } | ||||
// high edge variance | // high edge variance | ||||
static av_always_inline int hev(uint8_t *p, int stride, int thresh) | |||||
static av_always_inline int hev(uint8_t *p, ptrdiff_t stride, int thresh) | |||||
{ | { | ||||
LOAD_PIXELS | LOAD_PIXELS | ||||
return FFABS(p1-p0) > thresh || FFABS(q1-q0) > thresh; | return FFABS(p1-p0) > thresh || FFABS(q1-q0) > thresh; | ||||
} | } | ||||
static av_always_inline void filter_mbedge(uint8_t *p, int stride) | |||||
static av_always_inline void filter_mbedge(uint8_t *p, ptrdiff_t stride) | |||||
{ | { | ||||
int a0, a1, a2, w; | int a0, a1, a2, w; | ||||
uint8_t *cm = ff_cropTbl + MAX_NEG_CROP; | uint8_t *cm = ff_cropTbl + MAX_NEG_CROP; | ||||
@@ -236,7 +236,7 @@ static av_always_inline void filter_mbedge(uint8_t *p, int stride) | |||||
} | } | ||||
#define LOOP_FILTER(dir, size, stridea, strideb, maybe_inline) \ | #define LOOP_FILTER(dir, size, stridea, strideb, maybe_inline) \ | ||||
static maybe_inline void vp8_ ## dir ## _loop_filter ## size ## _c(uint8_t *dst, int stride,\ | |||||
static maybe_inline void vp8_ ## dir ## _loop_filter ## size ## _c(uint8_t *dst, ptrdiff_t stride,\ | |||||
int flim_E, int flim_I, int hev_thresh)\ | int flim_E, int flim_I, int hev_thresh)\ | ||||
{\ | {\ | ||||
int i;\ | int i;\ | ||||
@@ -250,7 +250,7 @@ static maybe_inline void vp8_ ## dir ## _loop_filter ## size ## _c(uint8_t *dst, | |||||
}\ | }\ | ||||
}\ | }\ | ||||
\ | \ | ||||
static maybe_inline void vp8_ ## dir ## _loop_filter ## size ## _inner_c(uint8_t *dst, int stride,\ | |||||
static maybe_inline void vp8_ ## dir ## _loop_filter ## size ## _inner_c(uint8_t *dst, ptrdiff_t stride,\ | |||||
int flim_E, int flim_I, int hev_thresh)\ | int flim_E, int flim_I, int hev_thresh)\ | ||||
{\ | {\ | ||||
int i;\ | int i;\ | ||||
@@ -270,13 +270,13 @@ LOOP_FILTER(h, 16, stride, 1,) | |||||
#define UV_LOOP_FILTER(dir, stridea, strideb) \ | #define UV_LOOP_FILTER(dir, stridea, strideb) \ | ||||
LOOP_FILTER(dir, 8, stridea, strideb, av_always_inline) \ | LOOP_FILTER(dir, 8, stridea, strideb, av_always_inline) \ | ||||
static void vp8_ ## dir ## _loop_filter8uv_c(uint8_t *dstU, uint8_t *dstV, int stride,\ | |||||
static void vp8_ ## dir ## _loop_filter8uv_c(uint8_t *dstU, uint8_t *dstV, ptrdiff_t stride,\ | |||||
int fE, int fI, int hev_thresh)\ | int fE, int fI, int hev_thresh)\ | ||||
{\ | {\ | ||||
vp8_ ## dir ## _loop_filter8_c(dstU, stride, fE, fI, hev_thresh);\ | vp8_ ## dir ## _loop_filter8_c(dstU, stride, fE, fI, hev_thresh);\ | ||||
vp8_ ## dir ## _loop_filter8_c(dstV, stride, fE, fI, hev_thresh);\ | vp8_ ## dir ## _loop_filter8_c(dstV, stride, fE, fI, hev_thresh);\ | ||||
}\ | }\ | ||||
static void vp8_ ## dir ## _loop_filter8uv_inner_c(uint8_t *dstU, uint8_t *dstV, int stride,\ | |||||
static void vp8_ ## dir ## _loop_filter8uv_inner_c(uint8_t *dstU, uint8_t *dstV, ptrdiff_t stride,\ | |||||
int fE, int fI, int hev_thresh)\ | int fE, int fI, int hev_thresh)\ | ||||
{\ | {\ | ||||
vp8_ ## dir ## _loop_filter8_inner_c(dstU, stride, fE, fI, hev_thresh);\ | vp8_ ## dir ## _loop_filter8_inner_c(dstU, stride, fE, fI, hev_thresh);\ | ||||
@@ -286,7 +286,7 @@ static void vp8_ ## dir ## _loop_filter8uv_inner_c(uint8_t *dstU, uint8_t *dstV, | |||||
UV_LOOP_FILTER(v, 1, stride) | UV_LOOP_FILTER(v, 1, stride) | ||||
UV_LOOP_FILTER(h, stride, 1) | UV_LOOP_FILTER(h, stride, 1) | ||||
static void vp8_v_loop_filter_simple_c(uint8_t *dst, int stride, int flim) | |||||
static void vp8_v_loop_filter_simple_c(uint8_t *dst, ptrdiff_t stride, int flim) | |||||
{ | { | ||||
int i; | int i; | ||||
@@ -295,7 +295,7 @@ static void vp8_v_loop_filter_simple_c(uint8_t *dst, int stride, int flim) | |||||
filter_common(dst+i, stride, 1); | filter_common(dst+i, stride, 1); | ||||
} | } | ||||
static void vp8_h_loop_filter_simple_c(uint8_t *dst, int stride, int flim) | |||||
static void vp8_h_loop_filter_simple_c(uint8_t *dst, ptrdiff_t stride, int flim) | |||||
{ | { | ||||
int i; | int i; | ||||
@@ -315,7 +315,7 @@ static const uint8_t subpel_filters[7][6] = { | |||||
}; | }; | ||||
#define PUT_PIXELS(WIDTH) \ | #define PUT_PIXELS(WIDTH) \ | ||||
static void put_vp8_pixels ## WIDTH ##_c(uint8_t *dst, int dststride, uint8_t *src, int srcstride, int h, int x, int y) { \ | |||||
static void put_vp8_pixels ## WIDTH ##_c(uint8_t *dst, ptrdiff_t dststride, uint8_t *src, ptrdiff_t srcstride, int h, int x, int y) { \ | |||||
int i; \ | int i; \ | ||||
for (i = 0; i < h; i++, dst+= dststride, src+= srcstride) { \ | for (i = 0; i < h; i++, dst+= dststride, src+= srcstride) { \ | ||||
memcpy(dst, src, WIDTH); \ | memcpy(dst, src, WIDTH); \ | ||||
@@ -335,7 +335,7 @@ PUT_PIXELS(4) | |||||
F[3]*src[x+1*stride] - F[4]*src[x+2*stride] + 64) >> 7] | F[3]*src[x+1*stride] - F[4]*src[x+2*stride] + 64) >> 7] | ||||
#define VP8_EPEL_H(SIZE, TAPS) \ | #define VP8_EPEL_H(SIZE, TAPS) \ | ||||
static void put_vp8_epel ## SIZE ## _h ## TAPS ## _c(uint8_t *dst, int dststride, uint8_t *src, int srcstride, int h, int mx, int my) \ | |||||
static void put_vp8_epel ## SIZE ## _h ## TAPS ## _c(uint8_t *dst, ptrdiff_t dststride, uint8_t *src, ptrdiff_t srcstride, int h, int mx, int my) \ | |||||
{ \ | { \ | ||||
const uint8_t *filter = subpel_filters[mx-1]; \ | const uint8_t *filter = subpel_filters[mx-1]; \ | ||||
uint8_t *cm = ff_cropTbl + MAX_NEG_CROP; \ | uint8_t *cm = ff_cropTbl + MAX_NEG_CROP; \ | ||||
@@ -349,7 +349,7 @@ static void put_vp8_epel ## SIZE ## _h ## TAPS ## _c(uint8_t *dst, int dststride | |||||
} \ | } \ | ||||
} | } | ||||
#define VP8_EPEL_V(SIZE, TAPS) \ | #define VP8_EPEL_V(SIZE, TAPS) \ | ||||
static void put_vp8_epel ## SIZE ## _v ## TAPS ## _c(uint8_t *dst, int dststride, uint8_t *src, int srcstride, int h, int mx, int my) \ | |||||
static void put_vp8_epel ## SIZE ## _v ## TAPS ## _c(uint8_t *dst, ptrdiff_t dststride, uint8_t *src, ptrdiff_t srcstride, int h, int mx, int my) \ | |||||
{ \ | { \ | ||||
const uint8_t *filter = subpel_filters[my-1]; \ | const uint8_t *filter = subpel_filters[my-1]; \ | ||||
uint8_t *cm = ff_cropTbl + MAX_NEG_CROP; \ | uint8_t *cm = ff_cropTbl + MAX_NEG_CROP; \ | ||||
@@ -363,7 +363,7 @@ static void put_vp8_epel ## SIZE ## _v ## TAPS ## _c(uint8_t *dst, int dststride | |||||
} \ | } \ | ||||
} | } | ||||
#define VP8_EPEL_HV(SIZE, HTAPS, VTAPS) \ | #define VP8_EPEL_HV(SIZE, HTAPS, VTAPS) \ | ||||
static void put_vp8_epel ## SIZE ## _h ## HTAPS ## v ## VTAPS ## _c(uint8_t *dst, int dststride, uint8_t *src, int srcstride, int h, int mx, int my) \ | |||||
static void put_vp8_epel ## SIZE ## _h ## HTAPS ## v ## VTAPS ## _c(uint8_t *dst, ptrdiff_t dststride, uint8_t *src, ptrdiff_t srcstride, int h, int mx, int my) \ | |||||
{ \ | { \ | ||||
const uint8_t *filter = subpel_filters[mx-1]; \ | const uint8_t *filter = subpel_filters[mx-1]; \ | ||||
uint8_t *cm = ff_cropTbl + MAX_NEG_CROP; \ | uint8_t *cm = ff_cropTbl + MAX_NEG_CROP; \ | ||||
@@ -416,7 +416,7 @@ VP8_EPEL_HV(8, 6, 6) | |||||
VP8_EPEL_HV(4, 6, 6) | VP8_EPEL_HV(4, 6, 6) | ||||
#define VP8_BILINEAR(SIZE) \ | #define VP8_BILINEAR(SIZE) \ | ||||
static void put_vp8_bilinear ## SIZE ## _h_c(uint8_t *dst, int stride, uint8_t *src, int s2, int h, int mx, int my) \ | |||||
static void put_vp8_bilinear ## SIZE ## _h_c(uint8_t *dst, ptrdiff_t stride, uint8_t *src, ptrdiff_t s2, int h, int mx, int my) \ | |||||
{ \ | { \ | ||||
int a = 8-mx, b = mx; \ | int a = 8-mx, b = mx; \ | ||||
int x, y; \ | int x, y; \ | ||||
@@ -428,7 +428,7 @@ static void put_vp8_bilinear ## SIZE ## _h_c(uint8_t *dst, int stride, uint8_t * | |||||
src += stride; \ | src += stride; \ | ||||
} \ | } \ | ||||
} \ | } \ | ||||
static void put_vp8_bilinear ## SIZE ## _v_c(uint8_t *dst, int stride, uint8_t *src, int s2, int h, int mx, int my) \ | |||||
static void put_vp8_bilinear ## SIZE ## _v_c(uint8_t *dst, ptrdiff_t stride, uint8_t *src, ptrdiff_t s2, int h, int mx, int my) \ | |||||
{ \ | { \ | ||||
int c = 8-my, d = my; \ | int c = 8-my, d = my; \ | ||||
int x, y; \ | int x, y; \ | ||||
@@ -441,7 +441,7 @@ static void put_vp8_bilinear ## SIZE ## _v_c(uint8_t *dst, int stride, uint8_t * | |||||
} \ | } \ | ||||
} \ | } \ | ||||
\ | \ | ||||
static void put_vp8_bilinear ## SIZE ## _hv_c(uint8_t *dst, int stride, uint8_t *src, int s2, int h, int mx, int my) \ | |||||
static void put_vp8_bilinear ## SIZE ## _hv_c(uint8_t *dst, ptrdiff_t stride, uint8_t *src, ptrdiff_t s2, int h, int mx, int my) \ | |||||
{ \ | { \ | ||||
int a = 8-mx, b = mx; \ | int a = 8-mx, b = mx; \ | ||||
int c = 8-my, d = my; \ | int c = 8-my, d = my; \ | ||||
@@ -29,40 +29,44 @@ | |||||
#include "dsputil.h" | #include "dsputil.h" | ||||
typedef void (*vp8_mc_func)(uint8_t *dst/*align 8*/, int dstStride, | |||||
uint8_t *src/*align 1*/, int srcStride, | |||||
typedef void (*vp8_mc_func)(uint8_t *dst/*align 8*/, ptrdiff_t dstStride, | |||||
uint8_t *src/*align 1*/, ptrdiff_t srcStride, | |||||
int h, int x, int y); | int h, int x, int y); | ||||
typedef struct VP8DSPContext { | typedef struct VP8DSPContext { | ||||
void (*vp8_luma_dc_wht)(DCTELEM block[4][4][16], DCTELEM dc[16]); | void (*vp8_luma_dc_wht)(DCTELEM block[4][4][16], DCTELEM dc[16]); | ||||
void (*vp8_luma_dc_wht_dc)(DCTELEM block[4][4][16], DCTELEM dc[16]); | void (*vp8_luma_dc_wht_dc)(DCTELEM block[4][4][16], DCTELEM dc[16]); | ||||
void (*vp8_idct_add)(uint8_t *dst, DCTELEM block[16], int stride); | |||||
void (*vp8_idct_dc_add)(uint8_t *dst, DCTELEM block[16], int stride); | |||||
void (*vp8_idct_dc_add4y)(uint8_t *dst, DCTELEM block[4][16], int stride); | |||||
void (*vp8_idct_dc_add4uv)(uint8_t *dst, DCTELEM block[4][16], int stride); | |||||
void (*vp8_idct_add)(uint8_t *dst, DCTELEM block[16], ptrdiff_t stride); | |||||
void (*vp8_idct_dc_add)(uint8_t *dst, DCTELEM block[16], ptrdiff_t stride); | |||||
void (*vp8_idct_dc_add4y)(uint8_t *dst, DCTELEM block[4][16], | |||||
ptrdiff_t stride); | |||||
void (*vp8_idct_dc_add4uv)(uint8_t *dst, DCTELEM block[4][16], | |||||
ptrdiff_t stride); | |||||
// loop filter applied to edges between macroblocks | // loop filter applied to edges between macroblocks | ||||
void (*vp8_v_loop_filter16y)(uint8_t *dst, int stride, | |||||
void (*vp8_v_loop_filter16y)(uint8_t *dst, ptrdiff_t stride, | |||||
int flim_E, int flim_I, int hev_thresh); | int flim_E, int flim_I, int hev_thresh); | ||||
void (*vp8_h_loop_filter16y)(uint8_t *dst, int stride, | |||||
void (*vp8_h_loop_filter16y)(uint8_t *dst, ptrdiff_t stride, | |||||
int flim_E, int flim_I, int hev_thresh); | int flim_E, int flim_I, int hev_thresh); | ||||
void (*vp8_v_loop_filter8uv)(uint8_t *dstU, uint8_t *dstV, int stride, | |||||
void (*vp8_v_loop_filter8uv)(uint8_t *dstU, uint8_t *dstV, ptrdiff_t stride, | |||||
int flim_E, int flim_I, int hev_thresh); | int flim_E, int flim_I, int hev_thresh); | ||||
void (*vp8_h_loop_filter8uv)(uint8_t *dstU, uint8_t *dstV, int stride, | |||||
void (*vp8_h_loop_filter8uv)(uint8_t *dstU, uint8_t *dstV, ptrdiff_t stride, | |||||
int flim_E, int flim_I, int hev_thresh); | int flim_E, int flim_I, int hev_thresh); | ||||
// loop filter applied to inner macroblock edges | // loop filter applied to inner macroblock edges | ||||
void (*vp8_v_loop_filter16y_inner)(uint8_t *dst, int stride, | |||||
void (*vp8_v_loop_filter16y_inner)(uint8_t *dst, ptrdiff_t stride, | |||||
int flim_E, int flim_I, int hev_thresh); | int flim_E, int flim_I, int hev_thresh); | ||||
void (*vp8_h_loop_filter16y_inner)(uint8_t *dst, int stride, | |||||
void (*vp8_h_loop_filter16y_inner)(uint8_t *dst, ptrdiff_t stride, | |||||
int flim_E, int flim_I, int hev_thresh); | int flim_E, int flim_I, int hev_thresh); | ||||
void (*vp8_v_loop_filter8uv_inner)(uint8_t *dstU, uint8_t *dstV, int stride, | |||||
void (*vp8_v_loop_filter8uv_inner)(uint8_t *dstU, uint8_t *dstV, | |||||
ptrdiff_t stride, | |||||
int flim_E, int flim_I, int hev_thresh); | int flim_E, int flim_I, int hev_thresh); | ||||
void (*vp8_h_loop_filter8uv_inner)(uint8_t *dstU, uint8_t *dstV, int stride, | |||||
void (*vp8_h_loop_filter8uv_inner)(uint8_t *dstU, uint8_t *dstV, | |||||
ptrdiff_t stride, | |||||
int flim_E, int flim_I, int hev_thresh); | int flim_E, int flim_I, int hev_thresh); | ||||
void (*vp8_v_loop_filter_simple)(uint8_t *dst, int stride, int flim); | |||||
void (*vp8_h_loop_filter_simple)(uint8_t *dst, int stride, int flim); | |||||
void (*vp8_v_loop_filter_simple)(uint8_t *dst, ptrdiff_t stride, int flim); | |||||
void (*vp8_h_loop_filter_simple)(uint8_t *dst, ptrdiff_t stride, int flim); | |||||
/** | /** | ||||
* first dimension: width>>3, height is assumed equal to width | * first dimension: width>>3, height is assumed equal to width | ||||
@@ -76,9 +80,12 @@ typedef struct VP8DSPContext { | |||||
vp8_mc_func put_vp8_bilinear_pixels_tab[3][3][3]; | vp8_mc_func put_vp8_bilinear_pixels_tab[3][3][3]; | ||||
} VP8DSPContext; | } VP8DSPContext; | ||||
void ff_put_vp8_pixels16_c(uint8_t *dst, uint8_t *src, int stride, int h, int x, int y); | |||||
void ff_put_vp8_pixels8_c(uint8_t *dst, uint8_t *src, int stride, int h, int x, int y); | |||||
void ff_put_vp8_pixels4_c(uint8_t *dst, uint8_t *src, int stride, int h, int x, int y); | |||||
void ff_put_vp8_pixels16_c(uint8_t *dst, uint8_t *src, ptrdiff_t stride, | |||||
int h, int x, int y); | |||||
void ff_put_vp8_pixels8_c(uint8_t *dst, uint8_t *src, ptrdiff_t stride, | |||||
int h, int x, int y); | |||||
void ff_put_vp8_pixels4_c(uint8_t *dst, uint8_t *src, ptrdiff_t stride, | |||||
int h, int x, int y); | |||||
void ff_vp8dsp_init(VP8DSPContext *c); | void ff_vp8dsp_init(VP8DSPContext *c); | ||||
void ff_vp8dsp_init_x86(VP8DSPContext *c); | void ff_vp8dsp_init_x86(VP8DSPContext *c); | ||||
@@ -29,98 +29,98 @@ | |||||
/* | /* | ||||
* MC functions | * MC functions | ||||
*/ | */ | ||||
extern void ff_put_vp8_epel4_h4_mmxext(uint8_t *dst, int dststride, | |||||
uint8_t *src, int srcstride, | |||||
extern void ff_put_vp8_epel4_h4_mmxext(uint8_t *dst, ptrdiff_t dststride, | |||||
uint8_t *src, ptrdiff_t srcstride, | |||||
int height, int mx, int my); | int height, int mx, int my); | ||||
extern void ff_put_vp8_epel4_h6_mmxext(uint8_t *dst, int dststride, | |||||
uint8_t *src, int srcstride, | |||||
extern void ff_put_vp8_epel4_h6_mmxext(uint8_t *dst, ptrdiff_t dststride, | |||||
uint8_t *src, ptrdiff_t srcstride, | |||||
int height, int mx, int my); | int height, int mx, int my); | ||||
extern void ff_put_vp8_epel4_v4_mmxext(uint8_t *dst, int dststride, | |||||
uint8_t *src, int srcstride, | |||||
extern void ff_put_vp8_epel4_v4_mmxext(uint8_t *dst, ptrdiff_t dststride, | |||||
uint8_t *src, ptrdiff_t srcstride, | |||||
int height, int mx, int my); | int height, int mx, int my); | ||||
extern void ff_put_vp8_epel4_v6_mmxext(uint8_t *dst, int dststride, | |||||
uint8_t *src, int srcstride, | |||||
extern void ff_put_vp8_epel4_v6_mmxext(uint8_t *dst, ptrdiff_t dststride, | |||||
uint8_t *src, ptrdiff_t srcstride, | |||||
int height, int mx, int my); | int height, int mx, int my); | ||||
extern void ff_put_vp8_epel8_h4_sse2 (uint8_t *dst, int dststride, | |||||
uint8_t *src, int srcstride, | |||||
extern void ff_put_vp8_epel8_h4_sse2 (uint8_t *dst, ptrdiff_t dststride, | |||||
uint8_t *src, ptrdiff_t srcstride, | |||||
int height, int mx, int my); | int height, int mx, int my); | ||||
extern void ff_put_vp8_epel8_h6_sse2 (uint8_t *dst, int dststride, | |||||
uint8_t *src, int srcstride, | |||||
extern void ff_put_vp8_epel8_h6_sse2 (uint8_t *dst, ptrdiff_t dststride, | |||||
uint8_t *src, ptrdiff_t srcstride, | |||||
int height, int mx, int my); | int height, int mx, int my); | ||||
extern void ff_put_vp8_epel8_v4_sse2 (uint8_t *dst, int dststride, | |||||
uint8_t *src, int srcstride, | |||||
extern void ff_put_vp8_epel8_v4_sse2 (uint8_t *dst, ptrdiff_t dststride, | |||||
uint8_t *src, ptrdiff_t srcstride, | |||||
int height, int mx, int my); | int height, int mx, int my); | ||||
extern void ff_put_vp8_epel8_v6_sse2 (uint8_t *dst, int dststride, | |||||
uint8_t *src, int srcstride, | |||||
extern void ff_put_vp8_epel8_v6_sse2 (uint8_t *dst, ptrdiff_t dststride, | |||||
uint8_t *src, ptrdiff_t srcstride, | |||||
int height, int mx, int my); | int height, int mx, int my); | ||||
extern void ff_put_vp8_epel4_h4_ssse3 (uint8_t *dst, int dststride, | |||||
uint8_t *src, int srcstride, | |||||
extern void ff_put_vp8_epel4_h4_ssse3 (uint8_t *dst, ptrdiff_t dststride, | |||||
uint8_t *src, ptrdiff_t srcstride, | |||||
int height, int mx, int my); | int height, int mx, int my); | ||||
extern void ff_put_vp8_epel4_h6_ssse3 (uint8_t *dst, int dststride, | |||||
uint8_t *src, int srcstride, | |||||
extern void ff_put_vp8_epel4_h6_ssse3 (uint8_t *dst, ptrdiff_t dststride, | |||||
uint8_t *src, ptrdiff_t srcstride, | |||||
int height, int mx, int my); | int height, int mx, int my); | ||||
extern void ff_put_vp8_epel4_v4_ssse3 (uint8_t *dst, int dststride, | |||||
uint8_t *src, int srcstride, | |||||
extern void ff_put_vp8_epel4_v4_ssse3 (uint8_t *dst, ptrdiff_t dststride, | |||||
uint8_t *src, ptrdiff_t srcstride, | |||||
int height, int mx, int my); | int height, int mx, int my); | ||||
extern void ff_put_vp8_epel4_v6_ssse3 (uint8_t *dst, int dststride, | |||||
uint8_t *src, int srcstride, | |||||
extern void ff_put_vp8_epel4_v6_ssse3 (uint8_t *dst, ptrdiff_t dststride, | |||||
uint8_t *src, ptrdiff_t srcstride, | |||||
int height, int mx, int my); | int height, int mx, int my); | ||||
extern void ff_put_vp8_epel8_h4_ssse3 (uint8_t *dst, int dststride, | |||||
uint8_t *src, int srcstride, | |||||
extern void ff_put_vp8_epel8_h4_ssse3 (uint8_t *dst, ptrdiff_t dststride, | |||||
uint8_t *src, ptrdiff_t srcstride, | |||||
int height, int mx, int my); | int height, int mx, int my); | ||||
extern void ff_put_vp8_epel8_h6_ssse3 (uint8_t *dst, int dststride, | |||||
uint8_t *src, int srcstride, | |||||
extern void ff_put_vp8_epel8_h6_ssse3 (uint8_t *dst, ptrdiff_t dststride, | |||||
uint8_t *src, ptrdiff_t srcstride, | |||||
int height, int mx, int my); | int height, int mx, int my); | ||||
extern void ff_put_vp8_epel8_v4_ssse3 (uint8_t *dst, int dststride, | |||||
uint8_t *src, int srcstride, | |||||
extern void ff_put_vp8_epel8_v4_ssse3 (uint8_t *dst, ptrdiff_t dststride, | |||||
uint8_t *src, ptrdiff_t srcstride, | |||||
int height, int mx, int my); | int height, int mx, int my); | ||||
extern void ff_put_vp8_epel8_v6_ssse3 (uint8_t *dst, int dststride, | |||||
uint8_t *src, int srcstride, | |||||
extern void ff_put_vp8_epel8_v6_ssse3 (uint8_t *dst, ptrdiff_t dststride, | |||||
uint8_t *src, ptrdiff_t srcstride, | |||||
int height, int mx, int my); | int height, int mx, int my); | ||||
extern void ff_put_vp8_bilinear4_h_mmxext(uint8_t *dst, int dststride, | |||||
uint8_t *src, int srcstride, | |||||
extern void ff_put_vp8_bilinear4_h_mmxext(uint8_t *dst, ptrdiff_t dststride, | |||||
uint8_t *src, ptrdiff_t srcstride, | |||||
int height, int mx, int my); | int height, int mx, int my); | ||||
extern void ff_put_vp8_bilinear8_h_sse2 (uint8_t *dst, int dststride, | |||||
uint8_t *src, int srcstride, | |||||
extern void ff_put_vp8_bilinear8_h_sse2 (uint8_t *dst, ptrdiff_t dststride, | |||||
uint8_t *src, ptrdiff_t srcstride, | |||||
int height, int mx, int my); | int height, int mx, int my); | ||||
extern void ff_put_vp8_bilinear4_h_ssse3 (uint8_t *dst, int dststride, | |||||
uint8_t *src, int srcstride, | |||||
extern void ff_put_vp8_bilinear4_h_ssse3 (uint8_t *dst, ptrdiff_t dststride, | |||||
uint8_t *src, ptrdiff_t srcstride, | |||||
int height, int mx, int my); | int height, int mx, int my); | ||||
extern void ff_put_vp8_bilinear8_h_ssse3 (uint8_t *dst, int dststride, | |||||
uint8_t *src, int srcstride, | |||||
extern void ff_put_vp8_bilinear8_h_ssse3 (uint8_t *dst, ptrdiff_t dststride, | |||||
uint8_t *src, ptrdiff_t srcstride, | |||||
int height, int mx, int my); | int height, int mx, int my); | ||||
extern void ff_put_vp8_bilinear4_v_mmxext(uint8_t *dst, int dststride, | |||||
uint8_t *src, int srcstride, | |||||
extern void ff_put_vp8_bilinear4_v_mmxext(uint8_t *dst, ptrdiff_t dststride, | |||||
uint8_t *src, ptrdiff_t srcstride, | |||||
int height, int mx, int my); | int height, int mx, int my); | ||||
extern void ff_put_vp8_bilinear8_v_sse2 (uint8_t *dst, int dststride, | |||||
uint8_t *src, int srcstride, | |||||
extern void ff_put_vp8_bilinear8_v_sse2 (uint8_t *dst, ptrdiff_t dststride, | |||||
uint8_t *src, ptrdiff_t srcstride, | |||||
int height, int mx, int my); | int height, int mx, int my); | ||||
extern void ff_put_vp8_bilinear4_v_ssse3 (uint8_t *dst, int dststride, | |||||
uint8_t *src, int srcstride, | |||||
extern void ff_put_vp8_bilinear4_v_ssse3 (uint8_t *dst, ptrdiff_t dststride, | |||||
uint8_t *src, ptrdiff_t srcstride, | |||||
int height, int mx, int my); | int height, int mx, int my); | ||||
extern void ff_put_vp8_bilinear8_v_ssse3 (uint8_t *dst, int dststride, | |||||
uint8_t *src, int srcstride, | |||||
extern void ff_put_vp8_bilinear8_v_ssse3 (uint8_t *dst, ptrdiff_t dststride, | |||||
uint8_t *src, ptrdiff_t srcstride, | |||||
int height, int mx, int my); | int height, int mx, int my); | ||||
extern void ff_put_vp8_pixels8_mmx (uint8_t *dst, int dststride, | |||||
uint8_t *src, int srcstride, | |||||
extern void ff_put_vp8_pixels8_mmx (uint8_t *dst, ptrdiff_t dststride, | |||||
uint8_t *src, ptrdiff_t srcstride, | |||||
int height, int mx, int my); | int height, int mx, int my); | ||||
extern void ff_put_vp8_pixels16_mmx(uint8_t *dst, int dststride, | |||||
uint8_t *src, int srcstride, | |||||
extern void ff_put_vp8_pixels16_mmx(uint8_t *dst, ptrdiff_t dststride, | |||||
uint8_t *src, ptrdiff_t srcstride, | |||||
int height, int mx, int my); | int height, int mx, int my); | ||||
extern void ff_put_vp8_pixels16_sse(uint8_t *dst, int dststride, | |||||
uint8_t *src, int srcstride, | |||||
extern void ff_put_vp8_pixels16_sse(uint8_t *dst, ptrdiff_t dststride, | |||||
uint8_t *src, ptrdiff_t srcstride, | |||||
int height, int mx, int my); | int height, int mx, int my); | ||||
#define TAP_W16(OPT, FILTERTYPE, TAPTYPE) \ | #define TAP_W16(OPT, FILTERTYPE, TAPTYPE) \ | ||||
static void ff_put_vp8_ ## FILTERTYPE ## 16_ ## TAPTYPE ## _ ## OPT( \ | static void ff_put_vp8_ ## FILTERTYPE ## 16_ ## TAPTYPE ## _ ## OPT( \ | ||||
uint8_t *dst, int dststride, uint8_t *src, \ | |||||
int srcstride, int height, int mx, int my) \ | |||||
uint8_t *dst, ptrdiff_t dststride, uint8_t *src, \ | |||||
ptrdiff_t srcstride, int height, int mx, int my) \ | |||||
{ \ | { \ | ||||
ff_put_vp8_ ## FILTERTYPE ## 8_ ## TAPTYPE ## _ ## OPT( \ | ff_put_vp8_ ## FILTERTYPE ## 8_ ## TAPTYPE ## _ ## OPT( \ | ||||
dst, dststride, src, srcstride, height, mx, my); \ | dst, dststride, src, srcstride, height, mx, my); \ | ||||
@@ -129,8 +129,8 @@ static void ff_put_vp8_ ## FILTERTYPE ## 16_ ## TAPTYPE ## _ ## OPT( \ | |||||
} | } | ||||
#define TAP_W8(OPT, FILTERTYPE, TAPTYPE) \ | #define TAP_W8(OPT, FILTERTYPE, TAPTYPE) \ | ||||
static void ff_put_vp8_ ## FILTERTYPE ## 8_ ## TAPTYPE ## _ ## OPT( \ | static void ff_put_vp8_ ## FILTERTYPE ## 8_ ## TAPTYPE ## _ ## OPT( \ | ||||
uint8_t *dst, int dststride, uint8_t *src, \ | |||||
int srcstride, int height, int mx, int my) \ | |||||
uint8_t *dst, ptrdiff_t dststride, uint8_t *src, \ | |||||
ptrdiff_t srcstride, int height, int mx, int my) \ | |||||
{ \ | { \ | ||||
ff_put_vp8_ ## FILTERTYPE ## 4_ ## TAPTYPE ## _ ## OPT( \ | ff_put_vp8_ ## FILTERTYPE ## 4_ ## TAPTYPE ## _ ## OPT( \ | ||||
dst, dststride, src, srcstride, height, mx, my); \ | dst, dststride, src, srcstride, height, mx, my); \ | ||||
@@ -161,8 +161,8 @@ TAP_W16(ssse3, bilinear, v) | |||||
#define HVTAP(OPT, ALIGN, TAPNUMX, TAPNUMY, SIZE, MAXHEIGHT) \ | #define HVTAP(OPT, ALIGN, TAPNUMX, TAPNUMY, SIZE, MAXHEIGHT) \ | ||||
static void ff_put_vp8_epel ## SIZE ## _h ## TAPNUMX ## v ## TAPNUMY ## _ ## OPT( \ | static void ff_put_vp8_epel ## SIZE ## _h ## TAPNUMX ## v ## TAPNUMY ## _ ## OPT( \ | ||||
uint8_t *dst, int dststride, uint8_t *src, \ | |||||
int srcstride, int height, int mx, int my) \ | |||||
uint8_t *dst, ptrdiff_t dststride, uint8_t *src, \ | |||||
ptrdiff_t srcstride, int height, int mx, int my) \ | |||||
{ \ | { \ | ||||
DECLARE_ALIGNED(ALIGN, uint8_t, tmp)[SIZE * (MAXHEIGHT + TAPNUMY - 1)]; \ | DECLARE_ALIGNED(ALIGN, uint8_t, tmp)[SIZE * (MAXHEIGHT + TAPNUMY - 1)]; \ | ||||
uint8_t *tmpptr = tmp + SIZE * (TAPNUMY / 2 - 1); \ | uint8_t *tmpptr = tmp + SIZE * (TAPNUMY / 2 - 1); \ | ||||
@@ -200,8 +200,8 @@ HVTAP(ssse3, 16, 6, 6, 4, 8) | |||||
#define HVBILIN(OPT, ALIGN, SIZE, MAXHEIGHT) \ | #define HVBILIN(OPT, ALIGN, SIZE, MAXHEIGHT) \ | ||||
static void ff_put_vp8_bilinear ## SIZE ## _hv_ ## OPT( \ | static void ff_put_vp8_bilinear ## SIZE ## _hv_ ## OPT( \ | ||||
uint8_t *dst, int dststride, uint8_t *src, \ | |||||
int srcstride, int height, int mx, int my) \ | |||||
uint8_t *dst, ptrdiff_t dststride, uint8_t *src, \ | |||||
ptrdiff_t srcstride, int height, int mx, int my) \ | |||||
{ \ | { \ | ||||
DECLARE_ALIGNED(ALIGN, uint8_t, tmp)[SIZE * (MAXHEIGHT + 2)]; \ | DECLARE_ALIGNED(ALIGN, uint8_t, tmp)[SIZE * (MAXHEIGHT + 2)]; \ | ||||
ff_put_vp8_bilinear ## SIZE ## _h_ ## OPT( \ | ff_put_vp8_bilinear ## SIZE ## _h_ ## OPT( \ | ||||
@@ -219,35 +219,58 @@ HVBILIN(ssse3, 8, 4, 8) | |||||
HVBILIN(ssse3, 8, 8, 16) | HVBILIN(ssse3, 8, 8, 16) | ||||
HVBILIN(ssse3, 8, 16, 16) | HVBILIN(ssse3, 8, 16, 16) | ||||
extern void ff_vp8_idct_dc_add_mmx(uint8_t *dst, DCTELEM block[16], int stride); | |||||
extern void ff_vp8_idct_dc_add_sse4(uint8_t *dst, DCTELEM block[16], int stride); | |||||
extern void ff_vp8_idct_dc_add4y_mmx(uint8_t *dst, DCTELEM block[4][16], int stride); | |||||
extern void ff_vp8_idct_dc_add4y_sse2(uint8_t *dst, DCTELEM block[4][16], int stride); | |||||
extern void ff_vp8_idct_dc_add4uv_mmx(uint8_t *dst, DCTELEM block[2][16], int stride); | |||||
extern void ff_vp8_idct_dc_add_mmx(uint8_t *dst, DCTELEM block[16], | |||||
ptrdiff_t stride); | |||||
extern void ff_vp8_idct_dc_add_sse4(uint8_t *dst, DCTELEM block[16], | |||||
ptrdiff_t stride); | |||||
extern void ff_vp8_idct_dc_add4y_mmx(uint8_t *dst, DCTELEM block[4][16], | |||||
ptrdiff_t stride); | |||||
extern void ff_vp8_idct_dc_add4y_sse2(uint8_t *dst, DCTELEM block[4][16], | |||||
ptrdiff_t stride); | |||||
extern void ff_vp8_idct_dc_add4uv_mmx(uint8_t *dst, DCTELEM block[2][16], | |||||
ptrdiff_t stride); | |||||
extern void ff_vp8_luma_dc_wht_mmx(DCTELEM block[4][4][16], DCTELEM dc[16]); | extern void ff_vp8_luma_dc_wht_mmx(DCTELEM block[4][4][16], DCTELEM dc[16]); | ||||
extern void ff_vp8_luma_dc_wht_sse(DCTELEM block[4][4][16], DCTELEM dc[16]); | extern void ff_vp8_luma_dc_wht_sse(DCTELEM block[4][4][16], DCTELEM dc[16]); | ||||
extern void ff_vp8_idct_add_mmx(uint8_t *dst, DCTELEM block[16], int stride); | |||||
extern void ff_vp8_idct_add_sse(uint8_t *dst, DCTELEM block[16], int stride); | |||||
extern void ff_vp8_idct_add_mmx(uint8_t *dst, DCTELEM block[16], | |||||
ptrdiff_t stride); | |||||
extern void ff_vp8_idct_add_sse(uint8_t *dst, DCTELEM block[16], | |||||
ptrdiff_t stride); | |||||
#define DECLARE_LOOP_FILTER(NAME)\ | #define DECLARE_LOOP_FILTER(NAME)\ | ||||
extern void ff_vp8_v_loop_filter_simple_ ## NAME(uint8_t *dst, int stride, int flim);\ | |||||
extern void ff_vp8_h_loop_filter_simple_ ## NAME(uint8_t *dst, int stride, int flim);\ | |||||
extern void ff_vp8_v_loop_filter16y_inner_ ## NAME (uint8_t *dst, int stride,\ | |||||
extern void ff_vp8_v_loop_filter_simple_ ## NAME(uint8_t *dst, \ | |||||
ptrdiff_t stride, \ | |||||
int flim);\ | |||||
extern void ff_vp8_h_loop_filter_simple_ ## NAME(uint8_t *dst, \ | |||||
ptrdiff_t stride, \ | |||||
int flim);\ | |||||
extern void ff_vp8_v_loop_filter16y_inner_ ## NAME (uint8_t *dst, \ | |||||
ptrdiff_t stride,\ | |||||
int e, int i, int hvt);\ | int e, int i, int hvt);\ | ||||
extern void ff_vp8_h_loop_filter16y_inner_ ## NAME (uint8_t *dst, int stride,\ | |||||
extern void ff_vp8_h_loop_filter16y_inner_ ## NAME (uint8_t *dst, \ | |||||
ptrdiff_t stride,\ | |||||
int e, int i, int hvt);\ | int e, int i, int hvt);\ | ||||
extern void ff_vp8_v_loop_filter8uv_inner_ ## NAME (uint8_t *dstU, uint8_t *dstV,\ | |||||
int s, int e, int i, int hvt);\ | |||||
extern void ff_vp8_h_loop_filter8uv_inner_ ## NAME (uint8_t *dstU, uint8_t *dstV,\ | |||||
int s, int e, int i, int hvt);\ | |||||
extern void ff_vp8_v_loop_filter16y_mbedge_ ## NAME(uint8_t *dst, int stride,\ | |||||
extern void ff_vp8_v_loop_filter8uv_inner_ ## NAME (uint8_t *dstU, \ | |||||
uint8_t *dstV,\ | |||||
ptrdiff_t s, \ | |||||
int e, int i, int hvt);\ | int e, int i, int hvt);\ | ||||
extern void ff_vp8_h_loop_filter16y_mbedge_ ## NAME(uint8_t *dst, int stride,\ | |||||
extern void ff_vp8_h_loop_filter8uv_inner_ ## NAME (uint8_t *dstU, \ | |||||
uint8_t *dstV,\ | |||||
ptrdiff_t s, \ | |||||
int e, int i, int hvt);\ | int e, int i, int hvt);\ | ||||
extern void ff_vp8_v_loop_filter8uv_mbedge_ ## NAME(uint8_t *dstU, uint8_t *dstV,\ | |||||
int s, int e, int i, int hvt);\ | |||||
extern void ff_vp8_h_loop_filter8uv_mbedge_ ## NAME(uint8_t *dstU, uint8_t *dstV,\ | |||||
int s, int e, int i, int hvt); | |||||
extern void ff_vp8_v_loop_filter16y_mbedge_ ## NAME(uint8_t *dst, \ | |||||
ptrdiff_t stride,\ | |||||
int e, int i, int hvt);\ | |||||
extern void ff_vp8_h_loop_filter16y_mbedge_ ## NAME(uint8_t *dst, \ | |||||
ptrdiff_t stride,\ | |||||
int e, int i, int hvt);\ | |||||
extern void ff_vp8_v_loop_filter8uv_mbedge_ ## NAME(uint8_t *dstU, \ | |||||
uint8_t *dstV,\ | |||||
ptrdiff_t s, \ | |||||
int e, int i, int hvt);\ | |||||
extern void ff_vp8_h_loop_filter8uv_mbedge_ ## NAME(uint8_t *dstU, \ | |||||
uint8_t *dstV,\ | |||||
ptrdiff_t s, \ | |||||
int e, int i, int hvt); | |||||
DECLARE_LOOP_FILTER(mmx) | DECLARE_LOOP_FILTER(mmx) | ||||
DECLARE_LOOP_FILTER(mmxext) | DECLARE_LOOP_FILTER(mmxext) | ||||