|
|
@@ -30,15 +30,8 @@ |
|
|
|
#undef MOVNTQ |
|
|
|
#undef EMMS |
|
|
|
#undef SFENCE |
|
|
|
#undef MMREG_SIZE |
|
|
|
#undef PAVGB |
|
|
|
|
|
|
|
#if COMPILE_TEMPLATE_SSE2 |
|
|
|
#define MMREG_SIZE 16 |
|
|
|
#else |
|
|
|
#define MMREG_SIZE 8 |
|
|
|
#endif |
|
|
|
|
|
|
|
#if COMPILE_TEMPLATE_AMD3DNOW |
|
|
|
#define PREFETCH "prefetch" |
|
|
|
#define PAVGB "pavgusb" |
|
|
@@ -64,6 +57,10 @@ |
|
|
|
#define SFENCE " # nop" |
|
|
|
#endif |
|
|
|
|
|
|
|
#if !COMPILE_TEMPLATE_SSE2 |
|
|
|
|
|
|
|
#if !COMPILE_TEMPLATE_AMD3DNOW |
|
|
|
|
|
|
|
static inline void RENAME(rgb24tobgr32)(const uint8_t *src, uint8_t *dst, long src_size) |
|
|
|
{ |
|
|
|
uint8_t *dest = dst; |
|
|
@@ -1513,7 +1510,9 @@ static inline void RENAME(yuy2toyv12)(const uint8_t *src, uint8_t *ydst, uint8_t |
|
|
|
SFENCE" \n\t" |
|
|
|
:::"memory"); |
|
|
|
} |
|
|
|
#endif /* !COMPILE_TEMPLATE_AMD3DNOW */ |
|
|
|
|
|
|
|
#if COMPILE_TEMPLATE_MMX2 || COMPILE_TEMPLATE_AMD3DNOW |
|
|
|
static inline void RENAME(planar2x)(const uint8_t *src, uint8_t *dst, long srcWidth, long srcHeight, long srcStride, long dstStride) |
|
|
|
{ |
|
|
|
long x,y; |
|
|
@@ -1530,7 +1529,6 @@ static inline void RENAME(planar2x)(const uint8_t *src, uint8_t *dst, long srcWi |
|
|
|
dst+= dstStride; |
|
|
|
|
|
|
|
for (y=1; y<srcHeight; y++) { |
|
|
|
#if COMPILE_TEMPLATE_MMX2 || COMPILE_TEMPLATE_AMD3DNOW |
|
|
|
const x86_reg mmxSize= srcWidth&~15; |
|
|
|
__asm__ volatile( |
|
|
|
"mov %4, %%"REG_a" \n\t" |
|
|
@@ -1564,17 +1562,10 @@ static inline void RENAME(planar2x)(const uint8_t *src, uint8_t *dst, long srcWi |
|
|
|
"punpckhbw %%mm3, %%mm7 \n\t" |
|
|
|
"punpcklbw %%mm2, %%mm4 \n\t" |
|
|
|
"punpckhbw %%mm2, %%mm6 \n\t" |
|
|
|
#if 1 |
|
|
|
MOVNTQ" %%mm5, (%2, %%"REG_a", 2) \n\t" |
|
|
|
MOVNTQ" %%mm7, 8(%2, %%"REG_a", 2) \n\t" |
|
|
|
MOVNTQ" %%mm4, (%3, %%"REG_a", 2) \n\t" |
|
|
|
MOVNTQ" %%mm6, 8(%3, %%"REG_a", 2) \n\t" |
|
|
|
#else |
|
|
|
"movq %%mm5, (%2, %%"REG_a", 2) \n\t" |
|
|
|
"movq %%mm7, 8(%2, %%"REG_a", 2) \n\t" |
|
|
|
"movq %%mm4, (%3, %%"REG_a", 2) \n\t" |
|
|
|
"movq %%mm6, 8(%3, %%"REG_a", 2) \n\t" |
|
|
|
#endif |
|
|
|
"add $8, %%"REG_a" \n\t" |
|
|
|
"movq -1(%0, %%"REG_a"), %%mm4 \n\t" |
|
|
|
"movq -1(%1, %%"REG_a"), %%mm5 \n\t" |
|
|
@@ -1584,12 +1575,6 @@ static inline void RENAME(planar2x)(const uint8_t *src, uint8_t *dst, long srcWi |
|
|
|
"g" (-mmxSize) |
|
|
|
: "%"REG_a |
|
|
|
); |
|
|
|
#else |
|
|
|
const x86_reg mmxSize=1; |
|
|
|
|
|
|
|
dst[0 ]= (3*src[0] + src[srcStride])>>2; |
|
|
|
dst[dstStride]= ( src[0] + 3*src[srcStride])>>2; |
|
|
|
#endif |
|
|
|
|
|
|
|
for (x=mmxSize-1; x<srcWidth-1; x++) { |
|
|
|
dst[2*x +1]= (3*src[x+0] + src[x+srcStride+1])>>2; |
|
|
@@ -1605,7 +1590,6 @@ static inline void RENAME(planar2x)(const uint8_t *src, uint8_t *dst, long srcWi |
|
|
|
} |
|
|
|
|
|
|
|
// last line |
|
|
|
#if 1 |
|
|
|
dst[0]= src[0]; |
|
|
|
|
|
|
|
for (x=0; x<srcWidth-1; x++) { |
|
|
@@ -1613,18 +1597,14 @@ static inline void RENAME(planar2x)(const uint8_t *src, uint8_t *dst, long srcWi |
|
|
|
dst[2*x+2]= ( src[x] + 3*src[x+1])>>2; |
|
|
|
} |
|
|
|
dst[2*srcWidth-1]= src[srcWidth-1]; |
|
|
|
#else |
|
|
|
for (x=0; x<srcWidth; x++) { |
|
|
|
dst[2*x+0]= |
|
|
|
dst[2*x+1]= src[x]; |
|
|
|
} |
|
|
|
#endif |
|
|
|
|
|
|
|
__asm__ volatile(EMMS" \n\t" |
|
|
|
SFENCE" \n\t" |
|
|
|
:::"memory"); |
|
|
|
} |
|
|
|
#endif /* COMPILE_TEMPLATE_MMX2 || COMPILE_TEMPLATE_AMD3DNOW */ |
|
|
|
|
|
|
|
#if !COMPILE_TEMPLATE_AMD3DNOW |
|
|
|
/** |
|
|
|
* Height should be a multiple of 2 and width should be a multiple of 16. |
|
|
|
* (If this is a problem for anyone then tell me, and I will fix it.) |
|
|
@@ -1728,6 +1708,7 @@ static inline void RENAME(uyvytoyv12)(const uint8_t *src, uint8_t *ydst, uint8_t |
|
|
|
SFENCE" \n\t" |
|
|
|
:::"memory"); |
|
|
|
} |
|
|
|
#endif /* !COMPILE_TEMPLATE_AMD3DNOW */ |
|
|
|
|
|
|
|
/** |
|
|
|
* Height should be a multiple of 2 and width should be a multiple of 2. |
|
|
@@ -1978,7 +1959,9 @@ static inline void RENAME(rgb24toyv12)(const uint8_t *src, uint8_t *ydst, uint8_ |
|
|
|
|
|
|
|
rgb24toyv12_c(src, ydst, udst, vdst, width, height-y, lumStride, chromStride, srcStride); |
|
|
|
} |
|
|
|
#endif /* !COMPILE_TEMPLATE_SSE2 */ |
|
|
|
|
|
|
|
#if !COMPILE_TEMPLATE_AMD3DNOW |
|
|
|
static void RENAME(interleaveBytes)(const uint8_t *src1, const uint8_t *src2, uint8_t *dest, |
|
|
|
long width, long height, long src1Stride, |
|
|
|
long src2Stride, long dstStride) |
|
|
@@ -2048,7 +2031,10 @@ static void RENAME(interleaveBytes)(const uint8_t *src1, const uint8_t *src2, ui |
|
|
|
::: "memory" |
|
|
|
); |
|
|
|
} |
|
|
|
#endif /* !COMPILE_TEMPLATE_AMD3DNOW */ |
|
|
|
|
|
|
|
#if !COMPILE_TEMPLATE_SSE2 |
|
|
|
#if !COMPILE_TEMPLATE_AMD3DNOW |
|
|
|
static inline void RENAME(vu9_to_vu12)(const uint8_t *src1, const uint8_t *src2, |
|
|
|
uint8_t *dst1, uint8_t *dst2, |
|
|
|
long width, long height, |
|
|
@@ -2228,6 +2214,7 @@ static inline void RENAME(yvu9_to_yuy2)(const uint8_t *src1, const uint8_t *src2 |
|
|
|
::: "memory" |
|
|
|
); |
|
|
|
} |
|
|
|
#endif /* !COMPILE_TEMPLATE_AMD3DNOW */ |
|
|
|
|
|
|
|
static void RENAME(extract_even)(const uint8_t *src, uint8_t *dst, x86_reg count) |
|
|
|
{ |
|
|
@@ -2266,6 +2253,7 @@ static void RENAME(extract_even)(const uint8_t *src, uint8_t *dst, x86_reg count |
|
|
|
} |
|
|
|
} |
|
|
|
|
|
|
|
#if !COMPILE_TEMPLATE_AMD3DNOW |
|
|
|
static void RENAME(extract_even2)(const uint8_t *src, uint8_t *dst0, uint8_t *dst1, x86_reg count) |
|
|
|
{ |
|
|
|
dst0+= count; |
|
|
@@ -2311,6 +2299,7 @@ static void RENAME(extract_even2)(const uint8_t *src, uint8_t *dst0, uint8_t *ds |
|
|
|
count++; |
|
|
|
} |
|
|
|
} |
|
|
|
#endif /* !COMPILE_TEMPLATE_AMD3DNOW */ |
|
|
|
|
|
|
|
static void RENAME(extract_even2avg)(const uint8_t *src0, const uint8_t *src1, uint8_t *dst0, uint8_t *dst1, x86_reg count) |
|
|
|
{ |
|
|
@@ -2365,6 +2354,7 @@ static void RENAME(extract_even2avg)(const uint8_t *src0, const uint8_t *src1, u |
|
|
|
} |
|
|
|
} |
|
|
|
|
|
|
|
#if !COMPILE_TEMPLATE_AMD3DNOW |
|
|
|
static void RENAME(extract_odd2)(const uint8_t *src, uint8_t *dst0, uint8_t *dst1, x86_reg count) |
|
|
|
{ |
|
|
|
dst0+= count; |
|
|
@@ -2411,6 +2401,7 @@ static void RENAME(extract_odd2)(const uint8_t *src, uint8_t *dst0, uint8_t *dst |
|
|
|
count++; |
|
|
|
} |
|
|
|
} |
|
|
|
#endif /* !COMPILE_TEMPLATE_AMD3DNOW */ |
|
|
|
|
|
|
|
static void RENAME(extract_odd2avg)(const uint8_t *src0, const uint8_t *src1, uint8_t *dst0, uint8_t *dst1, x86_reg count) |
|
|
|
{ |
|
|
@@ -2492,6 +2483,7 @@ static void RENAME(yuyvtoyuv420)(uint8_t *ydst, uint8_t *udst, uint8_t *vdst, co |
|
|
|
); |
|
|
|
} |
|
|
|
|
|
|
|
#if !COMPILE_TEMPLATE_AMD3DNOW |
|
|
|
static void RENAME(yuyvtoyuv422)(uint8_t *ydst, uint8_t *udst, uint8_t *vdst, const uint8_t *src, |
|
|
|
long width, long height, |
|
|
|
long lumStride, long chromStride, long srcStride) |
|
|
@@ -2514,6 +2506,7 @@ static void RENAME(yuyvtoyuv422)(uint8_t *ydst, uint8_t *udst, uint8_t *vdst, co |
|
|
|
::: "memory" |
|
|
|
); |
|
|
|
} |
|
|
|
#endif /* !COMPILE_TEMPLATE_AMD3DNOW */ |
|
|
|
|
|
|
|
static void RENAME(uyvytoyuv420)(uint8_t *ydst, uint8_t *udst, uint8_t *vdst, const uint8_t *src, |
|
|
|
long width, long height, |
|
|
@@ -2540,6 +2533,7 @@ static void RENAME(uyvytoyuv420)(uint8_t *ydst, uint8_t *udst, uint8_t *vdst, co |
|
|
|
); |
|
|
|
} |
|
|
|
|
|
|
|
#if !COMPILE_TEMPLATE_AMD3DNOW |
|
|
|
static void RENAME(uyvytoyuv422)(uint8_t *ydst, uint8_t *udst, uint8_t *vdst, const uint8_t *src, |
|
|
|
long width, long height, |
|
|
|
long lumStride, long chromStride, long srcStride) |
|
|
@@ -2562,9 +2556,13 @@ static void RENAME(uyvytoyuv422)(uint8_t *ydst, uint8_t *udst, uint8_t *vdst, co |
|
|
|
::: "memory" |
|
|
|
); |
|
|
|
} |
|
|
|
#endif /* !COMPILE_TEMPLATE_AMD3DNOW */ |
|
|
|
#endif /* !COMPILE_TEMPLATE_SSE2 */ |
|
|
|
|
|
|
|
static inline void RENAME(rgb2rgb_init)(void) |
|
|
|
{ |
|
|
|
#if !COMPILE_TEMPLATE_SSE2 |
|
|
|
#if !COMPILE_TEMPLATE_AMD3DNOW |
|
|
|
rgb15to16 = RENAME(rgb15to16); |
|
|
|
rgb15tobgr24 = RENAME(rgb15tobgr24); |
|
|
|
rgb15to32 = RENAME(rgb15to32); |
|
|
@@ -2588,14 +2586,22 @@ static inline void RENAME(rgb2rgb_init)(void) |
|
|
|
yuv422ptoyuy2 = RENAME(yuv422ptoyuy2); |
|
|
|
yuv422ptouyvy = RENAME(yuv422ptouyvy); |
|
|
|
yuy2toyv12 = RENAME(yuy2toyv12); |
|
|
|
planar2x = RENAME(planar2x); |
|
|
|
rgb24toyv12 = RENAME(rgb24toyv12); |
|
|
|
interleaveBytes = RENAME(interleaveBytes); |
|
|
|
vu9_to_vu12 = RENAME(vu9_to_vu12); |
|
|
|
yvu9_to_yuy2 = RENAME(yvu9_to_yuy2); |
|
|
|
|
|
|
|
uyvytoyuv420 = RENAME(uyvytoyuv420); |
|
|
|
uyvytoyuv422 = RENAME(uyvytoyuv422); |
|
|
|
yuyvtoyuv420 = RENAME(yuyvtoyuv420); |
|
|
|
yuyvtoyuv422 = RENAME(yuyvtoyuv422); |
|
|
|
#endif /* !COMPILE_TEMPLATE_SSE2 */ |
|
|
|
|
|
|
|
#if COMPILE_TEMPLATE_MMX2 || COMPILE_TEMPLATE_AMD3DNOW |
|
|
|
planar2x = RENAME(planar2x); |
|
|
|
#endif /* COMPILE_TEMPLATE_MMX2 || COMPILE_TEMPLATE_AMD3DNOW */ |
|
|
|
rgb24toyv12 = RENAME(rgb24toyv12); |
|
|
|
|
|
|
|
yuyvtoyuv420 = RENAME(yuyvtoyuv420); |
|
|
|
uyvytoyuv420 = RENAME(uyvytoyuv420); |
|
|
|
#endif /* COMPILE_TEMPLATE_SSE2 */ |
|
|
|
|
|
|
|
#if !COMPILE_TEMPLATE_AMD3DNOW |
|
|
|
interleaveBytes = RENAME(interleaveBytes); |
|
|
|
#endif /* !COMPILE_TEMPLATE_AMD3DNOW */ |
|
|
|
} |