- partial yvu9 support (copy only) - rgb 15/16 -> 24/32 converters - int->unsigned changes Originally committed as revision 6493 to svn://svn.mplayerhq.hu/mplayer/trunk/postproctags/v0.5
| @@ -20,6 +20,8 @@ | |||
| #define FAST_BGR2YV12 // use 7 bit coeffs instead of 15bit | |||
| #ifdef CAN_COMPILE_X86_ASM | |||
| static const uint64_t mmx_null __attribute__((aligned(8))) = 0x0000000000000000ULL; | |||
| static const uint64_t mmx_one __attribute__((aligned(8))) = 0xFFFFFFFFFFFFFFFFULL; | |||
| static const uint64_t mask32b __attribute__((aligned(8))) = 0x000000FF000000FFULL; | |||
| static const uint64_t mask32g __attribute__((aligned(8))) = 0x0000FF000000FF00ULL; | |||
| static const uint64_t mask32r __attribute__((aligned(8))) = 0x00FF000000FF0000ULL; | |||
| @@ -35,6 +37,11 @@ static const uint64_t mask24hhhh __attribute__((aligned(8))) = 0xffffffffffff00 | |||
| static const uint64_t mask15b __attribute__((aligned(8))) = 0x001F001F001F001FULL; /* 00000000 00011111 xxB */ | |||
| static const uint64_t mask15rg __attribute__((aligned(8))) = 0x7FE07FE07FE07FE0ULL; /* 01111111 11100000 RGx */ | |||
| static const uint64_t mask15s __attribute__((aligned(8))) = 0xFFE0FFE0FFE0FFE0ULL; | |||
| static const uint64_t mask15g __attribute__((aligned(8))) = 0x03E003E003E003E0ULL; | |||
| static const uint64_t mask15r __attribute__((aligned(8))) = 0x7C007C007C007C00ULL; | |||
| #define mask16b mask15b | |||
| static const uint64_t mask16g __attribute__((aligned(8))) = 0x07E007E007E007E0ULL; | |||
| static const uint64_t mask16r __attribute__((aligned(8))) = 0xF800F800F800F800ULL; | |||
| static const uint64_t red_16mask __attribute__((aligned(8))) = 0x0000f8000000f800ULL; | |||
| static const uint64_t green_16mask __attribute__((aligned(8)))= 0x000007e0000007e0ULL; | |||
| static const uint64_t blue_16mask __attribute__((aligned(8))) = 0x0000001f0000001fULL; | |||
| @@ -137,10 +144,68 @@ void rgb24to32(const uint8_t *src,uint8_t *dst,unsigned src_size) | |||
| else if(gCpuCaps.hasMMX) | |||
| rgb24to32_MMX(src, dst, src_size); | |||
| else | |||
| #endif | |||
| rgb24to32_C(src, dst, src_size); | |||
| #else | |||
| rgb24to32_C(src, dst, src_size); | |||
| } | |||
| void rgb15to24(const uint8_t *src,uint8_t *dst,unsigned src_size) | |||
| { | |||
| #ifdef CAN_COMPILE_X86_ASM | |||
| // ordered per speed fasterst first | |||
| if(gCpuCaps.hasMMX2) | |||
| rgb15to24_MMX2(src, dst, src_size); | |||
| else if(gCpuCaps.has3DNow) | |||
| rgb15to24_3DNow(src, dst, src_size); | |||
| else if(gCpuCaps.hasMMX) | |||
| rgb15to24_MMX(src, dst, src_size); | |||
| else | |||
| #endif | |||
| rgb15to24_C(src, dst, src_size); | |||
| } | |||
| void rgb16to24(const uint8_t *src,uint8_t *dst,unsigned src_size) | |||
| { | |||
| #ifdef CAN_COMPILE_X86_ASM | |||
| // ordered per speed fasterst first | |||
| if(gCpuCaps.hasMMX2) | |||
| rgb16to24_MMX2(src, dst, src_size); | |||
| else if(gCpuCaps.has3DNow) | |||
| rgb16to24_3DNow(src, dst, src_size); | |||
| else if(gCpuCaps.hasMMX) | |||
| rgb16to24_MMX(src, dst, src_size); | |||
| else | |||
| #endif | |||
| rgb16to24_C(src, dst, src_size); | |||
| } | |||
| void rgb15to32(const uint8_t *src,uint8_t *dst,unsigned src_size) | |||
| { | |||
| #ifdef CAN_COMPILE_X86_ASM | |||
| // ordered per speed fasterst first | |||
| if(gCpuCaps.hasMMX2) | |||
| rgb15to32_MMX2(src, dst, src_size); | |||
| else if(gCpuCaps.has3DNow) | |||
| rgb15to32_3DNow(src, dst, src_size); | |||
| else if(gCpuCaps.hasMMX) | |||
| rgb15to32_MMX(src, dst, src_size); | |||
| else | |||
| #endif | |||
| rgb15to32_C(src, dst, src_size); | |||
| } | |||
| void rgb16to32(const uint8_t *src,uint8_t *dst,unsigned src_size) | |||
| { | |||
| #ifdef CAN_COMPILE_X86_ASM | |||
| // ordered per speed fasterst first | |||
| if(gCpuCaps.hasMMX2) | |||
| rgb16to32_MMX2(src, dst, src_size); | |||
| else if(gCpuCaps.has3DNow) | |||
| rgb16to32_3DNow(src, dst, src_size); | |||
| else if(gCpuCaps.hasMMX) | |||
| rgb16to32_MMX(src, dst, src_size); | |||
| else | |||
| #endif | |||
| rgb16to32_C(src, dst, src_size); | |||
| } | |||
| void rgb32to24(const uint8_t *src,uint8_t *dst,unsigned src_size) | |||
| @@ -154,10 +219,8 @@ void rgb32to24(const uint8_t *src,uint8_t *dst,unsigned src_size) | |||
| else if(gCpuCaps.hasMMX) | |||
| rgb32to24_MMX(src, dst, src_size); | |||
| else | |||
| rgb32to24_C(src, dst, src_size); | |||
| #else | |||
| rgb32to24_C(src, dst, src_size); | |||
| #endif | |||
| rgb32to24_C(src, dst, src_size); | |||
| } | |||
| /* | |||
| @@ -177,10 +240,8 @@ void rgb15to16(const uint8_t *src,uint8_t *dst,unsigned src_size) | |||
| else if(gCpuCaps.hasMMX) | |||
| rgb15to16_MMX(src, dst, src_size); | |||
| else | |||
| rgb15to16_C(src, dst, src_size); | |||
| #else | |||
| rgb15to16_C(src, dst, src_size); | |||
| #endif | |||
| rgb15to16_C(src, dst, src_size); | |||
| } | |||
| /** | |||
| @@ -242,10 +303,8 @@ void rgb32to16(const uint8_t *src, uint8_t *dst, unsigned src_size) | |||
| else if(gCpuCaps.hasMMX) | |||
| rgb32to16_MMX(src, dst, src_size); | |||
| else | |||
| rgb32to16_C(src, dst, src_size); | |||
| #else | |||
| rgb32to16_C(src, dst, src_size); | |||
| #endif | |||
| rgb32to16_C(src, dst, src_size); | |||
| } | |||
| void rgb32to15(const uint8_t *src, uint8_t *dst, unsigned src_size) | |||
| @@ -259,10 +318,8 @@ void rgb32to15(const uint8_t *src, uint8_t *dst, unsigned src_size) | |||
| else if(gCpuCaps.hasMMX) | |||
| rgb32to15_MMX(src, dst, src_size); | |||
| else | |||
| rgb32to15_C(src, dst, src_size); | |||
| #else | |||
| rgb32to15_C(src, dst, src_size); | |||
| #endif | |||
| rgb32to15_C(src, dst, src_size); | |||
| } | |||
| void rgb24to16(const uint8_t *src, uint8_t *dst, unsigned src_size) | |||
| @@ -276,10 +333,8 @@ void rgb24to16(const uint8_t *src, uint8_t *dst, unsigned src_size) | |||
| else if(gCpuCaps.hasMMX) | |||
| rgb24to16_MMX(src, dst, src_size); | |||
| else | |||
| rgb24to16_C(src, dst, src_size); | |||
| #else | |||
| rgb24to16_C(src, dst, src_size); | |||
| #endif | |||
| rgb24to16_C(src, dst, src_size); | |||
| } | |||
| void rgb24to15(const uint8_t *src, uint8_t *dst, unsigned src_size) | |||
| @@ -293,10 +348,8 @@ void rgb24to15(const uint8_t *src, uint8_t *dst, unsigned src_size) | |||
| else if(gCpuCaps.hasMMX) | |||
| rgb24to15_MMX(src, dst, src_size); | |||
| else | |||
| rgb24to15_C(src, dst, src_size); | |||
| #else | |||
| rgb24to15_C(src, dst, src_size); | |||
| #endif | |||
| rgb24to15_C(src, dst, src_size); | |||
| } | |||
| /** | |||
| @@ -330,10 +383,8 @@ void rgb32tobgr32(const uint8_t *src, uint8_t *dst, unsigned int src_size) | |||
| else if(gCpuCaps.hasMMX) | |||
| rgb32tobgr32_MMX(src, dst, src_size); | |||
| else | |||
| rgb32tobgr32_C(src, dst, src_size); | |||
| #else | |||
| rgb32tobgr32_C(src, dst, src_size); | |||
| #endif | |||
| rgb32tobgr32_C(src, dst, src_size); | |||
| } | |||
| void rgb24tobgr24(const uint8_t *src, uint8_t *dst, unsigned int src_size) | |||
| @@ -347,10 +398,8 @@ void rgb24tobgr24(const uint8_t *src, uint8_t *dst, unsigned int src_size) | |||
| else if(gCpuCaps.hasMMX) | |||
| rgb24tobgr24_MMX(src, dst, src_size); | |||
| else | |||
| rgb24tobgr24_C(src, dst, src_size); | |||
| #else | |||
| rgb24tobgr24_C(src, dst, src_size); | |||
| #endif | |||
| rgb24tobgr24_C(src, dst, src_size); | |||
| } | |||
| /** | |||
| @@ -371,10 +420,8 @@ void yv12toyuy2(const uint8_t *ysrc, const uint8_t *usrc, const uint8_t *vsrc, u | |||
| else if(gCpuCaps.hasMMX) | |||
| yv12toyuy2_MMX(ysrc, usrc, vsrc, dst, width, height, lumStride, chromStride, dstStride); | |||
| else | |||
| yv12toyuy2_C(ysrc, usrc, vsrc, dst, width, height, lumStride, chromStride, dstStride); | |||
| #else | |||
| yv12toyuy2_C(ysrc, usrc, vsrc, dst, width, height, lumStride, chromStride, dstStride); | |||
| #endif | |||
| yv12toyuy2_C(ysrc, usrc, vsrc, dst, width, height, lumStride, chromStride, dstStride); | |||
| } | |||
| /** | |||
| @@ -394,10 +441,8 @@ void yuv422ptoyuy2(const uint8_t *ysrc, const uint8_t *usrc, const uint8_t *vsrc | |||
| else if(gCpuCaps.hasMMX) | |||
| yuv422ptoyuy2_MMX(ysrc, usrc, vsrc, dst, width, height, lumStride, chromStride, dstStride); | |||
| else | |||
| yuv422ptoyuy2_C(ysrc, usrc, vsrc, dst, width, height, lumStride, chromStride, dstStride); | |||
| #else | |||
| yuv422ptoyuy2_C(ysrc, usrc, vsrc, dst, width, height, lumStride, chromStride, dstStride); | |||
| #endif | |||
| yuv422ptoyuy2_C(ysrc, usrc, vsrc, dst, width, height, lumStride, chromStride, dstStride); | |||
| } | |||
| /** | |||
| @@ -418,10 +463,8 @@ void yuy2toyv12(const uint8_t *src, uint8_t *ydst, uint8_t *udst, uint8_t *vdst, | |||
| else if(gCpuCaps.hasMMX) | |||
| yuy2toyv12_MMX(src, ydst, udst, vdst, width, height, lumStride, chromStride, srcStride); | |||
| else | |||
| yuy2toyv12_C(src, ydst, udst, vdst, width, height, lumStride, chromStride, srcStride); | |||
| #else | |||
| yuy2toyv12_C(src, ydst, udst, vdst, width, height, lumStride, chromStride, srcStride); | |||
| #endif | |||
| yuy2toyv12_C(src, ydst, udst, vdst, width, height, lumStride, chromStride, srcStride); | |||
| } | |||
| /** | |||
| @@ -488,14 +531,13 @@ void rgb24toyv12(const uint8_t *src, uint8_t *ydst, uint8_t *udst, uint8_t *vdst | |||
| else if(gCpuCaps.hasMMX) | |||
| rgb24toyv12_MMX(src, ydst, udst, vdst, width, height, lumStride, chromStride, srcStride); | |||
| else | |||
| rgb24toyv12_C(src, ydst, udst, vdst, width, height, lumStride, chromStride, srcStride); | |||
| #else | |||
| rgb24toyv12_C(src, ydst, udst, vdst, width, height, lumStride, chromStride, srcStride); | |||
| #endif | |||
| rgb24toyv12_C(src, ydst, udst, vdst, width, height, lumStride, chromStride, srcStride); | |||
| } | |||
| void interleaveBytes(uint8_t *src1, uint8_t *src2, uint8_t *dst, | |||
| int width, int height, int src1Stride, int src2Stride, int dstStride) | |||
| unsigned width, unsigned height, unsigned src1Stride, | |||
| unsigned src2Stride, unsigned dstStride) | |||
| { | |||
| #ifdef CAN_COMPILE_X86_ASM | |||
| // ordered per speed fasterst first | |||
| @@ -506,8 +548,6 @@ void interleaveBytes(uint8_t *src1, uint8_t *src2, uint8_t *dst, | |||
| else if(gCpuCaps.hasMMX) | |||
| interleaveBytes_MMX(src1, src2, dst, width, height, src1Stride, src2Stride, dstStride); | |||
| else | |||
| interleaveBytes_C(src1, src2, dst, width, height, src1Stride, src2Stride, dstStride); | |||
| #else | |||
| interleaveBytes_C(src1, src2, dst, width, height, src1Stride, src2Stride, dstStride); | |||
| #endif | |||
| interleaveBytes_C(src1, src2, dst, width, height, src1Stride, src2Stride, dstStride); | |||
| } | |||
| @@ -10,12 +10,16 @@ | |||
| #define RGB2RGB_INCLUDED | |||
| extern void rgb24to32(const uint8_t *src,uint8_t *dst,unsigned src_size); | |||
| extern void rgb24to16(const uint8_t *src,uint8_t *dst,unsigned src_size); | |||
| extern void rgb24to15(const uint8_t *src,uint8_t *dst,unsigned src_size); | |||
| extern void rgb32to24(const uint8_t *src,uint8_t *dst,unsigned src_size); | |||
| extern void rgb15to16(const uint8_t *src,uint8_t *dst,unsigned src_size); | |||
| extern void rgb32to16(const uint8_t *src,uint8_t *dst,unsigned src_size); | |||
| extern void rgb32to15(const uint8_t *src,uint8_t *dst,unsigned src_size); | |||
| extern void rgb24to16(const uint8_t *src,uint8_t *dst,unsigned src_size); | |||
| extern void rgb24to15(const uint8_t *src,uint8_t *dst,unsigned src_size); | |||
| extern void rgb15to16(const uint8_t *src,uint8_t *dst,unsigned src_size); | |||
| extern void rgb15to24(const uint8_t *src,uint8_t *dst,unsigned src_size); | |||
| extern void rgb15to32(const uint8_t *src,uint8_t *dst,unsigned src_size); | |||
| extern void rgb16to24(const uint8_t *src,uint8_t *dst,unsigned src_size); | |||
| extern void rgb16to32(const uint8_t *src,uint8_t *dst,unsigned src_size); | |||
| extern void rgb32tobgr32(const uint8_t *src, uint8_t *dst, unsigned src_size); | |||
| extern void rgb24tobgr24(const uint8_t *src, uint8_t *dst, unsigned src_size); | |||
| @@ -39,7 +43,8 @@ extern void rgb24toyv12(const uint8_t *src, uint8_t *ydst, uint8_t *udst, uint8_ | |||
| unsigned int lumStride, unsigned int chromStride, unsigned int srcStride); | |||
| extern void interleaveBytes(uint8_t *src1, uint8_t *src2, uint8_t *dst, | |||
| int width, int height, int src1Stride, int src2Stride, int dstStride); | |||
| unsigned width, unsigned height, unsigned src1Stride, | |||
| unsigned src2Stride, unsigned dstStride); | |||
| #define MODE_RGB 0x1 | |||
| @@ -47,11 +52,11 @@ extern void interleaveBytes(uint8_t *src1, uint8_t *src2, uint8_t *dst, | |||
| typedef void (* yuv2rgb_fun) (uint8_t * image, uint8_t * py, | |||
| uint8_t * pu, uint8_t * pv, | |||
| int h_size, int v_size, | |||
| int rgb_stride, int y_stride, int uv_stride); | |||
| unsigned h_size, unsigned v_size, | |||
| unsigned rgb_stride, unsigned y_stride, unsigned uv_stride); | |||
| extern yuv2rgb_fun yuv2rgb; | |||
| void yuv2rgb_init (int bpp, int mode); | |||
| void yuv2rgb_init (unsigned bpp, int mode); | |||
| #endif | |||
| @@ -8,6 +8,13 @@ | |||
| * palette & yuv & runtime cpu stuff by Michael (michaelni@gmx.at) (under GPL) | |||
| */ | |||
| #include <stddef.h> | |||
| #include <inttypes.h> /* for __WORDSIZE */ | |||
| #ifndef __WORDSIZE | |||
| #warning You have misconfigured system and probably will lose performance! | |||
| #endif | |||
| #undef PREFETCH | |||
| #undef MOVNTQ | |||
| #undef EMMS | |||
| @@ -56,13 +63,13 @@ static inline void RENAME(rgb24to32)(const uint8_t *src,uint8_t *dst,unsigned sr | |||
| const uint8_t *s = src; | |||
| const uint8_t *end; | |||
| #ifdef HAVE_MMX | |||
| const uint8_t *mm_end; | |||
| uint8_t *mm_end; | |||
| #endif | |||
| end = s + src_size; | |||
| #ifdef HAVE_MMX | |||
| __asm __volatile(PREFETCH" %0"::"m"(*s):"memory"); | |||
| mm_end = end - 23; | |||
| __asm __volatile("movq %0, %%mm7"::"m"(mask32):"memory"); | |||
| mm_end = (uint8_t*)((((unsigned long)end)/24)*24); | |||
| while(s < mm_end) | |||
| { | |||
| __asm __volatile( | |||
| @@ -107,12 +114,12 @@ static inline void RENAME(rgb32to24)(const uint8_t *src,uint8_t *dst,unsigned sr | |||
| const uint8_t *s = src; | |||
| const uint8_t *end; | |||
| #ifdef HAVE_MMX | |||
| const uint8_t *mm_end; | |||
| uint8_t *mm_end; | |||
| #endif | |||
| end = s + src_size; | |||
| #ifdef HAVE_MMX | |||
| __asm __volatile(PREFETCH" %0"::"m"(*s):"memory"); | |||
| mm_end = end - 31; | |||
| mm_end = (uint8_t*)((((unsigned long)end)/32)*32); | |||
| while(s < mm_end) | |||
| { | |||
| __asm __volatile( | |||
| @@ -186,15 +193,16 @@ static inline void RENAME(rgb32to24)(const uint8_t *src,uint8_t *dst,unsigned sr | |||
| */ | |||
| static inline void RENAME(rgb15to16)(const uint8_t *src,uint8_t *dst,unsigned src_size) | |||
| { | |||
| register const uint8_t* s=src; | |||
| register uint8_t* d=dst; | |||
| register const uint8_t *end; | |||
| uint8_t *mm_end; | |||
| end = s + src_size; | |||
| #ifdef HAVE_MMX | |||
| register int offs=15-src_size; | |||
| register const char* s=src-offs; | |||
| register char* d=dst-offs; | |||
| __asm __volatile(PREFETCH" %0"::"m"(*(s+offs))); | |||
| __asm __volatile( | |||
| "movq %0, %%mm4\n\t" | |||
| ::"m"(mask15s)); | |||
| while(offs<0) | |||
| __asm __volatile(PREFETCH" %0"::"m"(*s)); | |||
| __asm __volatile("movq %0, %%mm4"::"m"(mask15s)); | |||
| mm_end = (uint8_t*)((((unsigned long)end)/16)*16); | |||
| while(s<mm_end) | |||
| { | |||
| __asm __volatile( | |||
| PREFETCH" 32%1\n\t" | |||
| @@ -208,40 +216,28 @@ static inline void RENAME(rgb15to16)(const uint8_t *src,uint8_t *dst,unsigned sr | |||
| "paddw %%mm3, %%mm2\n\t" | |||
| MOVNTQ" %%mm0, %0\n\t" | |||
| MOVNTQ" %%mm2, 8%0" | |||
| :"=m"(*(d+offs)) | |||
| :"m"(*(s+offs)) | |||
| :"=m"(*d) | |||
| :"m"(*s) | |||
| ); | |||
| offs+=16; | |||
| d+=16; | |||
| s+=16; | |||
| } | |||
| __asm __volatile(SFENCE:::"memory"); | |||
| __asm __volatile(EMMS:::"memory"); | |||
| #else | |||
| #if 0 | |||
| const uint16_t *s1=( uint16_t * )src; | |||
| uint16_t *d1=( uint16_t * )dst; | |||
| uint16_t *e=((uint8_t *)s1)+src_size; | |||
| while( s1<e ){ | |||
| register int x=*( s1++ ); | |||
| /* rrrrrggggggbbbbb | |||
| 0rrrrrgggggbbbbb | |||
| 0111 1111 1110 0000=0x7FE0 | |||
| 00000000000001 1111=0x001F */ | |||
| *( d1++ )=( x&0x001F )|( ( x&0x7FE0 )<<1 ); | |||
| } | |||
| #else | |||
| const unsigned *s1=( unsigned * )src; | |||
| unsigned *d1=( unsigned * )dst; | |||
| int i; | |||
| int size= src_size>>2; | |||
| for(i=0; i<size; i++) | |||
| { | |||
| register int x= s1[i]; | |||
| // d1[i] = x + (x&0x7FE07FE0); //faster but need msbit =0 which might not allways be true | |||
| d1[i] = (x&0x7FFF7FFF) + (x&0x7FE07FE0); | |||
| } | |||
| #endif | |||
| #endif | |||
| mm_end = (uint8_t*)((((unsigned long)end)/4)*4); | |||
| while(s < mm_end) | |||
| { | |||
| register unsigned x= *((uint32_t *)s); | |||
| *((uint32_t *)d) = (x&0x7FFF7FFF) + (x&0x7FE07FE0); | |||
| d+=4; | |||
| s+=4; | |||
| } | |||
| if(s < end) | |||
| { | |||
| register unsigned short x= *((uint16_t *)s); | |||
| *((uint16_t *)d) = (x&0x7FFF) + (x&0x7FE0); | |||
| } | |||
| } | |||
| static inline void RENAME(bgr24torgb24)(const uint8_t *src, uint8_t *dst, unsigned src_size) | |||
| @@ -257,17 +253,20 @@ static inline void RENAME(bgr24torgb24)(const uint8_t *src, uint8_t *dst, unsign | |||
| static inline void RENAME(rgb32to16)(const uint8_t *src, uint8_t *dst, unsigned src_size) | |||
| { | |||
| #ifdef HAVE_MMX | |||
| const uint8_t *s = src; | |||
| const uint8_t *end,*mm_end; | |||
| const uint8_t *end; | |||
| #ifdef HAVE_MMX | |||
| const uint8_t *mm_end; | |||
| #endif | |||
| uint16_t *d = (uint16_t *)dst; | |||
| end = s + src_size; | |||
| mm_end = end - 15; | |||
| #ifdef HAVE_MMX | |||
| __asm __volatile(PREFETCH" %0"::"m"(*src):"memory"); | |||
| __asm __volatile( | |||
| "movq %0, %%mm7\n\t" | |||
| "movq %1, %%mm6\n\t" | |||
| ::"m"(red_16mask),"m"(green_16mask)); | |||
| mm_end = (uint8_t*)((((unsigned long)end)/16)*16); | |||
| while(s < mm_end) | |||
| { | |||
| __asm __volatile( | |||
| @@ -303,43 +302,35 @@ static inline void RENAME(rgb32to16)(const uint8_t *src, uint8_t *dst, unsigned | |||
| d += 4; | |||
| s += 16; | |||
| } | |||
| __asm __volatile(SFENCE:::"memory"); | |||
| __asm __volatile(EMMS:::"memory"); | |||
| #endif | |||
| while(s < end) | |||
| { | |||
| const int b= *s++; | |||
| const int g= *s++; | |||
| const int r= *s++; | |||
| s++; | |||
| *d++ = (b>>3) | ((g&0xFC)<<3) | ((r&0xF8)<<8); | |||
| s++; | |||
| } | |||
| __asm __volatile(SFENCE:::"memory"); | |||
| __asm __volatile(EMMS:::"memory"); | |||
| #else | |||
| unsigned j,i,num_pixels=src_size/4; | |||
| uint16_t *d = (uint16_t *)dst; | |||
| for(i=0,j=0; j<num_pixels; i+=4,j++) | |||
| { | |||
| const int b= src[i+0]; | |||
| const int g= src[i+1]; | |||
| const int r= src[i+2]; | |||
| d[j]= (b>>3) | ((g&0xFC)<<3) | ((r&0xF8)<<8); | |||
| } | |||
| #endif | |||
| } | |||
| static inline void RENAME(rgb32to15)(const uint8_t *src, uint8_t *dst, unsigned src_size) | |||
| { | |||
| #ifdef HAVE_MMX | |||
| const uint8_t *s = src; | |||
| const uint8_t *end,*mm_end; | |||
| const uint8_t *end; | |||
| #ifdef HAVE_MMX | |||
| const uint8_t *mm_end; | |||
| #endif | |||
| uint16_t *d = (uint16_t *)dst; | |||
| end = s + src_size; | |||
| mm_end = end - 15; | |||
| #ifdef HAVE_MMX | |||
| __asm __volatile(PREFETCH" %0"::"m"(*src):"memory"); | |||
| __asm __volatile( | |||
| "movq %0, %%mm7\n\t" | |||
| "movq %1, %%mm6\n\t" | |||
| ::"m"(red_15mask),"m"(green_15mask)); | |||
| mm_end = (uint8_t*)((((unsigned long)end)/16)*16); | |||
| while(s < mm_end) | |||
| { | |||
| __asm __volatile( | |||
| @@ -375,43 +366,35 @@ static inline void RENAME(rgb32to15)(const uint8_t *src, uint8_t *dst, unsigned | |||
| d += 4; | |||
| s += 16; | |||
| } | |||
| __asm __volatile(SFENCE:::"memory"); | |||
| __asm __volatile(EMMS:::"memory"); | |||
| #endif | |||
| while(s < end) | |||
| { | |||
| const int b= *s++; | |||
| const int g= *s++; | |||
| const int r= *s++; | |||
| s++; | |||
| *d++ = (b>>3) | ((g&0xF8)<<2) | ((r&0xF8)<<7); | |||
| s++; | |||
| } | |||
| __asm __volatile(SFENCE:::"memory"); | |||
| __asm __volatile(EMMS:::"memory"); | |||
| #else | |||
| unsigned j,i,num_pixels=src_size/4; | |||
| uint16_t *d = (uint16_t *)dst; | |||
| for(i=0,j=0; j<num_pixels; i+=4,j++) | |||
| { | |||
| const int b= src[i+0]; | |||
| const int g= src[i+1]; | |||
| const int r= src[i+2]; | |||
| d[j]= (b>>3) | ((g&0xF8)<<2) | ((r&0xF8)<<7); | |||
| } | |||
| #endif | |||
| } | |||
| static inline void RENAME(rgb24to16)(const uint8_t *src, uint8_t *dst, unsigned src_size) | |||
| { | |||
| #ifdef HAVE_MMX | |||
| const uint8_t *s = src; | |||
| const uint8_t *end,*mm_end; | |||
| const uint8_t *end; | |||
| #ifdef HAVE_MMX | |||
| const uint8_t *mm_end; | |||
| #endif | |||
| uint16_t *d = (uint16_t *)dst; | |||
| end = s + src_size; | |||
| mm_end = end - 11; | |||
| #ifdef HAVE_MMX | |||
| __asm __volatile(PREFETCH" %0"::"m"(*src):"memory"); | |||
| __asm __volatile( | |||
| "movq %0, %%mm7\n\t" | |||
| "movq %1, %%mm6\n\t" | |||
| ::"m"(red_16mask),"m"(green_16mask)); | |||
| mm_end = (uint8_t*)((((unsigned long)end)/16)*16); | |||
| while(s < mm_end) | |||
| { | |||
| __asm __volatile( | |||
| @@ -447,6 +430,9 @@ static inline void RENAME(rgb24to16)(const uint8_t *src, uint8_t *dst, unsigned | |||
| d += 4; | |||
| s += 12; | |||
| } | |||
| __asm __volatile(SFENCE:::"memory"); | |||
| __asm __volatile(EMMS:::"memory"); | |||
| #endif | |||
| while(s < end) | |||
| { | |||
| const int b= *s++; | |||
| @@ -454,35 +440,24 @@ static inline void RENAME(rgb24to16)(const uint8_t *src, uint8_t *dst, unsigned | |||
| const int r= *s++; | |||
| *d++ = (b>>3) | ((g&0xFC)<<3) | ((r&0xF8)<<8); | |||
| } | |||
| __asm __volatile(SFENCE:::"memory"); | |||
| __asm __volatile(EMMS:::"memory"); | |||
| #else | |||
| unsigned j,i,num_pixels=src_size/3; | |||
| uint16_t *d = (uint16_t *)dst; | |||
| for(i=0,j=0; j<num_pixels; i+=3,j++) | |||
| { | |||
| const int b= src[i+0]; | |||
| const int g= src[i+1]; | |||
| const int r= src[i+2]; | |||
| d[j]= (b>>3) | ((g&0xFC)<<3) | ((r&0xF8)<<8); | |||
| } | |||
| #endif | |||
| } | |||
| static inline void RENAME(rgb24to15)(const uint8_t *src, uint8_t *dst, unsigned src_size) | |||
| { | |||
| #ifdef HAVE_MMX | |||
| const uint8_t *s = src; | |||
| const uint8_t *end,*mm_end; | |||
| const uint8_t *end; | |||
| #ifdef HAVE_MMX | |||
| const uint8_t *mm_end; | |||
| #endif | |||
| uint16_t *d = (uint16_t *)dst; | |||
| end = s + src_size; | |||
| mm_end = end -11; | |||
| #ifdef HAVE_MMX | |||
| __asm __volatile(PREFETCH" %0"::"m"(*src):"memory"); | |||
| __asm __volatile( | |||
| "movq %0, %%mm7\n\t" | |||
| "movq %1, %%mm6\n\t" | |||
| ::"m"(red_15mask),"m"(green_15mask)); | |||
| mm_end = (uint8_t*)((((unsigned long)end)/16)*16); | |||
| while(s < mm_end) | |||
| { | |||
| __asm __volatile( | |||
| @@ -518,6 +493,9 @@ static inline void RENAME(rgb24to15)(const uint8_t *src, uint8_t *dst, unsigned | |||
| d += 4; | |||
| s += 12; | |||
| } | |||
| __asm __volatile(SFENCE:::"memory"); | |||
| __asm __volatile(EMMS:::"memory"); | |||
| #endif | |||
| while(s < end) | |||
| { | |||
| const int b= *s++; | |||
| @@ -525,25 +503,448 @@ static inline void RENAME(rgb24to15)(const uint8_t *src, uint8_t *dst, unsigned | |||
| const int r= *s++; | |||
| *d++ = (b>>3) | ((g&0xF8)<<2) | ((r&0xF8)<<7); | |||
| } | |||
| } | |||
| /* | |||
| I use here less accurate approximation by simply | |||
| left-shifting the input | |||
| value and filling the low order bits with | |||
| zeroes. This method improves png's | |||
| compression but this scheme cannot reproduce white exactly, since it does not | |||
| generate an all-ones maximum value; the net effect is to darken the | |||
| image slightly. | |||
| The better method should be "left bit replication": | |||
| 4 3 2 1 0 | |||
| --------- | |||
| 1 1 0 1 1 | |||
| 7 6 5 4 3 2 1 0 | |||
| ---------------- | |||
| 1 1 0 1 1 1 1 0 | |||
| |=======| |===| | |||
| | Leftmost Bits Repeated to Fill Open Bits | |||
| | | |||
| Original Bits | |||
| */ | |||
| static inline void RENAME(rgb15to24)(const uint8_t *src, uint8_t *dst, unsigned src_size) | |||
| { | |||
| const uint16_t *end; | |||
| #ifdef HAVE_MMX | |||
| const uint16_t *mm_end; | |||
| #endif | |||
| uint8_t *d = (uint8_t *)dst; | |||
| const uint16_t *s = (uint16_t *)src; | |||
| end = s + src_size/2; | |||
| #ifdef HAVE_MMX | |||
| __asm __volatile(PREFETCH" %0"::"m"(*s):"memory"); | |||
| mm_end = (uint16_t*)((((unsigned long)end)/8)*8); | |||
| while(s < mm_end) | |||
| { | |||
| __asm __volatile( | |||
| PREFETCH" 32%1\n\t" | |||
| "movq %1, %%mm0\n\t" | |||
| "movq %1, %%mm1\n\t" | |||
| "movq %1, %%mm2\n\t" | |||
| "pand %2, %%mm0\n\t" | |||
| "pand %3, %%mm1\n\t" | |||
| "pand %4, %%mm2\n\t" | |||
| "psllq $3, %%mm0\n\t" | |||
| "psrlq $2, %%mm1\n\t" | |||
| "psrlq $7, %%mm2\n\t" | |||
| "movq %%mm0, %%mm3\n\t" | |||
| "movq %%mm1, %%mm4\n\t" | |||
| "movq %%mm2, %%mm5\n\t" | |||
| "punpcklwd %5, %%mm0\n\t" | |||
| "punpcklwd %5, %%mm1\n\t" | |||
| "punpcklwd %5, %%mm2\n\t" | |||
| "punpckhwd %5, %%mm3\n\t" | |||
| "punpckhwd %5, %%mm4\n\t" | |||
| "punpckhwd %5, %%mm5\n\t" | |||
| "psllq $8, %%mm1\n\t" | |||
| "psllq $16, %%mm2\n\t" | |||
| "por %%mm1, %%mm0\n\t" | |||
| "por %%mm2, %%mm0\n\t" | |||
| "psllq $8, %%mm4\n\t" | |||
| "psllq $16, %%mm5\n\t" | |||
| "por %%mm4, %%mm3\n\t" | |||
| "por %%mm5, %%mm3\n\t" | |||
| "movq %%mm0, %%mm6\n\t" | |||
| "movq %%mm3, %%mm7\n\t" | |||
| "movq 8%1, %%mm0\n\t" | |||
| "movq 8%1, %%mm1\n\t" | |||
| "movq 8%1, %%mm2\n\t" | |||
| "pand %2, %%mm0\n\t" | |||
| "pand %3, %%mm1\n\t" | |||
| "pand %4, %%mm2\n\t" | |||
| "psllq $3, %%mm0\n\t" | |||
| "psrlq $2, %%mm1\n\t" | |||
| "psrlq $7, %%mm2\n\t" | |||
| "movq %%mm0, %%mm3\n\t" | |||
| "movq %%mm1, %%mm4\n\t" | |||
| "movq %%mm2, %%mm5\n\t" | |||
| "punpcklwd %5, %%mm0\n\t" | |||
| "punpcklwd %5, %%mm1\n\t" | |||
| "punpcklwd %5, %%mm2\n\t" | |||
| "punpckhwd %5, %%mm3\n\t" | |||
| "punpckhwd %5, %%mm4\n\t" | |||
| "punpckhwd %5, %%mm5\n\t" | |||
| "psllq $8, %%mm1\n\t" | |||
| "psllq $16, %%mm2\n\t" | |||
| "por %%mm1, %%mm0\n\t" | |||
| "por %%mm2, %%mm0\n\t" | |||
| "psllq $8, %%mm4\n\t" | |||
| "psllq $16, %%mm5\n\t" | |||
| "por %%mm4, %%mm3\n\t" | |||
| "por %%mm5, %%mm3\n\t" | |||
| :"=m"(*d) | |||
| :"m"(*s),"m"(mask15b),"m"(mask15g),"m"(mask15r), "m"(mmx_null) | |||
| :"memory"); | |||
| /* Borrowed 32 to 24 */ | |||
| __asm __volatile( | |||
| "movq %%mm0, %%mm4\n\t" | |||
| "movq %%mm3, %%mm5\n\t" | |||
| "movq %%mm6, %%mm0\n\t" | |||
| "movq %%mm7, %%mm1\n\t" | |||
| "movq %%mm4, %%mm6\n\t" | |||
| "movq %%mm5, %%mm7\n\t" | |||
| "movq %%mm0, %%mm2\n\t" | |||
| "movq %%mm1, %%mm3\n\t" | |||
| "psrlq $8, %%mm2\n\t" | |||
| "psrlq $8, %%mm3\n\t" | |||
| "psrlq $8, %%mm6\n\t" | |||
| "psrlq $8, %%mm7\n\t" | |||
| "pand %2, %%mm0\n\t" | |||
| "pand %2, %%mm1\n\t" | |||
| "pand %2, %%mm4\n\t" | |||
| "pand %2, %%mm5\n\t" | |||
| "pand %3, %%mm2\n\t" | |||
| "pand %3, %%mm3\n\t" | |||
| "pand %3, %%mm6\n\t" | |||
| "pand %3, %%mm7\n\t" | |||
| "por %%mm2, %%mm0\n\t" | |||
| "por %%mm3, %%mm1\n\t" | |||
| "por %%mm6, %%mm4\n\t" | |||
| "por %%mm7, %%mm5\n\t" | |||
| "movq %%mm1, %%mm2\n\t" | |||
| "movq %%mm4, %%mm3\n\t" | |||
| "psllq $48, %%mm2\n\t" | |||
| "psllq $32, %%mm3\n\t" | |||
| "pand %4, %%mm2\n\t" | |||
| "pand %5, %%mm3\n\t" | |||
| "por %%mm2, %%mm0\n\t" | |||
| "psrlq $16, %%mm1\n\t" | |||
| "psrlq $32, %%mm4\n\t" | |||
| "psllq $16, %%mm5\n\t" | |||
| "por %%mm3, %%mm1\n\t" | |||
| "pand %6, %%mm5\n\t" | |||
| "por %%mm5, %%mm4\n\t" | |||
| MOVNTQ" %%mm0, %0\n\t" | |||
| MOVNTQ" %%mm1, 8%0\n\t" | |||
| MOVNTQ" %%mm4, 16%0" | |||
| :"=m"(*d) | |||
| :"m"(*s),"m"(mask24l),"m"(mask24h),"m"(mask24hh),"m"(mask24hhh),"m"(mask24hhhh) | |||
| :"memory"); | |||
| d += 24; | |||
| s += 8; | |||
| } | |||
| __asm __volatile(SFENCE:::"memory"); | |||
| __asm __volatile(EMMS:::"memory"); | |||
| #else | |||
| unsigned j,i,num_pixels=src_size/3; | |||
| uint16_t *d = (uint16_t *)dst; | |||
| for(i=0,j=0; j<num_pixels; i+=3,j++) | |||
| #endif | |||
| while(s < end) | |||
| { | |||
| register uint16_t bgr; | |||
| bgr = *s++; | |||
| *d++ = (bgr&0x1F)<<3; | |||
| *d++ = (bgr&0x3E0)>>2; | |||
| *d++ = (bgr&0x7C00)>>7; | |||
| } | |||
| } | |||
| static inline void RENAME(rgb16to24)(const uint8_t *src, uint8_t *dst, unsigned src_size) | |||
| { | |||
| const uint16_t *end; | |||
| #ifdef HAVE_MMX | |||
| const uint16_t *mm_end; | |||
| #endif | |||
| uint8_t *d = (uint8_t *)dst; | |||
| const uint16_t *s = (const uint16_t *)src; | |||
| end = s + src_size/2; | |||
| #ifdef HAVE_MMX | |||
| __asm __volatile(PREFETCH" %0"::"m"(*s):"memory"); | |||
| mm_end = (uint16_t*)((((unsigned long)end)/8)*8); | |||
| while(s < mm_end) | |||
| { | |||
| __asm __volatile( | |||
| PREFETCH" 32%1\n\t" | |||
| "movq %1, %%mm0\n\t" | |||
| "movq %1, %%mm1\n\t" | |||
| "movq %1, %%mm2\n\t" | |||
| "pand %2, %%mm0\n\t" | |||
| "pand %3, %%mm1\n\t" | |||
| "pand %4, %%mm2\n\t" | |||
| "psllq $3, %%mm0\n\t" | |||
| "psrlq $3, %%mm1\n\t" | |||
| "psrlq $8, %%mm2\n\t" | |||
| "movq %%mm0, %%mm3\n\t" | |||
| "movq %%mm1, %%mm4\n\t" | |||
| "movq %%mm2, %%mm5\n\t" | |||
| "punpcklwd %5, %%mm0\n\t" | |||
| "punpcklwd %5, %%mm1\n\t" | |||
| "punpcklwd %5, %%mm2\n\t" | |||
| "punpckhwd %5, %%mm3\n\t" | |||
| "punpckhwd %5, %%mm4\n\t" | |||
| "punpckhwd %5, %%mm5\n\t" | |||
| "psllq $8, %%mm1\n\t" | |||
| "psllq $16, %%mm2\n\t" | |||
| "por %%mm1, %%mm0\n\t" | |||
| "por %%mm2, %%mm0\n\t" | |||
| "psllq $8, %%mm4\n\t" | |||
| "psllq $16, %%mm5\n\t" | |||
| "por %%mm4, %%mm3\n\t" | |||
| "por %%mm5, %%mm3\n\t" | |||
| "movq %%mm0, %%mm6\n\t" | |||
| "movq %%mm3, %%mm7\n\t" | |||
| "movq 8%1, %%mm0\n\t" | |||
| "movq 8%1, %%mm1\n\t" | |||
| "movq 8%1, %%mm2\n\t" | |||
| "pand %2, %%mm0\n\t" | |||
| "pand %3, %%mm1\n\t" | |||
| "pand %4, %%mm2\n\t" | |||
| "psllq $3, %%mm0\n\t" | |||
| "psrlq $3, %%mm1\n\t" | |||
| "psrlq $8, %%mm2\n\t" | |||
| "movq %%mm0, %%mm3\n\t" | |||
| "movq %%mm1, %%mm4\n\t" | |||
| "movq %%mm2, %%mm5\n\t" | |||
| "punpcklwd %5, %%mm0\n\t" | |||
| "punpcklwd %5, %%mm1\n\t" | |||
| "punpcklwd %5, %%mm2\n\t" | |||
| "punpckhwd %5, %%mm3\n\t" | |||
| "punpckhwd %5, %%mm4\n\t" | |||
| "punpckhwd %5, %%mm5\n\t" | |||
| "psllq $8, %%mm1\n\t" | |||
| "psllq $16, %%mm2\n\t" | |||
| "por %%mm1, %%mm0\n\t" | |||
| "por %%mm2, %%mm0\n\t" | |||
| "psllq $8, %%mm4\n\t" | |||
| "psllq $16, %%mm5\n\t" | |||
| "por %%mm4, %%mm3\n\t" | |||
| "por %%mm5, %%mm3\n\t" | |||
| :"=m"(*d) | |||
| :"m"(*s),"m"(mask16b),"m"(mask16g),"m"(mask16r),"m"(mmx_null) | |||
| :"memory"); | |||
| /* Borrowed 32 to 24 */ | |||
| __asm __volatile( | |||
| "movq %%mm0, %%mm4\n\t" | |||
| "movq %%mm3, %%mm5\n\t" | |||
| "movq %%mm6, %%mm0\n\t" | |||
| "movq %%mm7, %%mm1\n\t" | |||
| "movq %%mm4, %%mm6\n\t" | |||
| "movq %%mm5, %%mm7\n\t" | |||
| "movq %%mm0, %%mm2\n\t" | |||
| "movq %%mm1, %%mm3\n\t" | |||
| "psrlq $8, %%mm2\n\t" | |||
| "psrlq $8, %%mm3\n\t" | |||
| "psrlq $8, %%mm6\n\t" | |||
| "psrlq $8, %%mm7\n\t" | |||
| "pand %2, %%mm0\n\t" | |||
| "pand %2, %%mm1\n\t" | |||
| "pand %2, %%mm4\n\t" | |||
| "pand %2, %%mm5\n\t" | |||
| "pand %3, %%mm2\n\t" | |||
| "pand %3, %%mm3\n\t" | |||
| "pand %3, %%mm6\n\t" | |||
| "pand %3, %%mm7\n\t" | |||
| "por %%mm2, %%mm0\n\t" | |||
| "por %%mm3, %%mm1\n\t" | |||
| "por %%mm6, %%mm4\n\t" | |||
| "por %%mm7, %%mm5\n\t" | |||
| "movq %%mm1, %%mm2\n\t" | |||
| "movq %%mm4, %%mm3\n\t" | |||
| "psllq $48, %%mm2\n\t" | |||
| "psllq $32, %%mm3\n\t" | |||
| "pand %4, %%mm2\n\t" | |||
| "pand %5, %%mm3\n\t" | |||
| "por %%mm2, %%mm0\n\t" | |||
| "psrlq $16, %%mm1\n\t" | |||
| "psrlq $32, %%mm4\n\t" | |||
| "psllq $16, %%mm5\n\t" | |||
| "por %%mm3, %%mm1\n\t" | |||
| "pand %6, %%mm5\n\t" | |||
| "por %%mm5, %%mm4\n\t" | |||
| MOVNTQ" %%mm0, %0\n\t" | |||
| MOVNTQ" %%mm1, 8%0\n\t" | |||
| MOVNTQ" %%mm4, 16%0" | |||
| :"=m"(*d) | |||
| :"m"(*s),"m"(mask24l),"m"(mask24h),"m"(mask24hh),"m"(mask24hhh),"m"(mask24hhhh) | |||
| :"memory"); | |||
| d += 24; | |||
| s += 8; | |||
| } | |||
| __asm __volatile(SFENCE:::"memory"); | |||
| __asm __volatile(EMMS:::"memory"); | |||
| #endif | |||
| while(s < end) | |||
| { | |||
| register uint16_t bgr; | |||
| bgr = *s++; | |||
| *d++ = (bgr&0x1F)<<3; | |||
| *d++ = (bgr&0x7E0)>>3; | |||
| *d++ = (bgr&0xF800)>>8; | |||
| } | |||
| } | |||
| static inline void RENAME(rgb15to32)(const uint8_t *src, uint8_t *dst, unsigned src_size) | |||
| { | |||
| const uint16_t *end; | |||
| #ifdef HAVE_MMX | |||
| const uint16_t *mm_end; | |||
| #endif | |||
| uint8_t *d = (uint8_t *)dst; | |||
| const uint16_t *s = (const uint16_t *)src; | |||
| end = s + src_size/2; | |||
| #ifdef HAVE_MMX | |||
| __asm __volatile(PREFETCH" %0"::"m"(*s):"memory"); | |||
| __asm __volatile("pxor %%mm7,%%mm7\n\t":::"memory"); | |||
| mm_end = (uint16_t*)((((unsigned long)end)/4)*4); | |||
| while(s < mm_end) | |||
| { | |||
| __asm __volatile( | |||
| PREFETCH" 32%1\n\t" | |||
| "movq %1, %%mm0\n\t" | |||
| "movq %1, %%mm1\n\t" | |||
| "movq %1, %%mm2\n\t" | |||
| "pand %2, %%mm0\n\t" | |||
| "pand %3, %%mm1\n\t" | |||
| "pand %4, %%mm2\n\t" | |||
| "psllq $3, %%mm0\n\t" | |||
| "psrlq $2, %%mm1\n\t" | |||
| "psrlq $7, %%mm2\n\t" | |||
| "movq %%mm0, %%mm3\n\t" | |||
| "movq %%mm1, %%mm4\n\t" | |||
| "movq %%mm2, %%mm5\n\t" | |||
| "punpcklwd %%mm7, %%mm0\n\t" | |||
| "punpcklwd %%mm7, %%mm1\n\t" | |||
| "punpcklwd %%mm7, %%mm2\n\t" | |||
| "punpckhwd %%mm7, %%mm3\n\t" | |||
| "punpckhwd %%mm7, %%mm4\n\t" | |||
| "punpckhwd %%mm7, %%mm5\n\t" | |||
| "psllq $8, %%mm1\n\t" | |||
| "psllq $16, %%mm2\n\t" | |||
| "por %%mm1, %%mm0\n\t" | |||
| "por %%mm2, %%mm0\n\t" | |||
| "psllq $8, %%mm4\n\t" | |||
| "psllq $16, %%mm5\n\t" | |||
| "por %%mm4, %%mm3\n\t" | |||
| "por %%mm5, %%mm3\n\t" | |||
| MOVNTQ" %%mm0, %0\n\t" | |||
| MOVNTQ" %%mm3, 8%0\n\t" | |||
| :"=m"(*d) | |||
| :"m"(*s),"m"(mask15b),"m"(mask15g),"m"(mask15r) | |||
| :"memory"); | |||
| d += 16; | |||
| s += 4; | |||
| } | |||
| __asm __volatile(SFENCE:::"memory"); | |||
| __asm __volatile(EMMS:::"memory"); | |||
| #endif | |||
| while(s < end) | |||
| { | |||
| const int b= src[i+0]; | |||
| const int g= src[i+1]; | |||
| const int r= src[i+2]; | |||
| register uint16_t bgr; | |||
| bgr = *s++; | |||
| *d++ = (bgr&0x1F)<<3; | |||
| *d++ = (bgr&0x3E0)>>2; | |||
| *d++ = (bgr&0x7C00)>>7; | |||
| *d++ = 0; | |||
| } | |||
| } | |||
| d[j]= (b>>3) | ((g&0xF8)<<2) | ((r&0xF8)<<7); | |||
| static inline void RENAME(rgb16to32)(const uint8_t *src, uint8_t *dst, unsigned src_size) | |||
| { | |||
| const uint16_t *end; | |||
| #ifdef HAVE_MMX | |||
| const uint16_t *mm_end; | |||
| #endif | |||
| uint8_t *d = (uint8_t *)dst; | |||
| const uint16_t *s = (uint16_t *)src; | |||
| end = s + src_size/2; | |||
| #ifdef HAVE_MMX | |||
| __asm __volatile(PREFETCH" %0"::"m"(*s):"memory"); | |||
| __asm __volatile("pxor %%mm7,%%mm7\n\t":::"memory"); | |||
| mm_end = (uint16_t*)((((unsigned long)end)/4)*4); | |||
| while(s < mm_end) | |||
| { | |||
| __asm __volatile( | |||
| PREFETCH" 32%1\n\t" | |||
| "movq %1, %%mm0\n\t" | |||
| "movq %1, %%mm1\n\t" | |||
| "movq %1, %%mm2\n\t" | |||
| "pand %2, %%mm0\n\t" | |||
| "pand %3, %%mm1\n\t" | |||
| "pand %4, %%mm2\n\t" | |||
| "psllq $3, %%mm0\n\t" | |||
| "psrlq $3, %%mm1\n\t" | |||
| "psrlq $8, %%mm2\n\t" | |||
| "movq %%mm0, %%mm3\n\t" | |||
| "movq %%mm1, %%mm4\n\t" | |||
| "movq %%mm2, %%mm5\n\t" | |||
| "punpcklwd %%mm7, %%mm0\n\t" | |||
| "punpcklwd %%mm7, %%mm1\n\t" | |||
| "punpcklwd %%mm7, %%mm2\n\t" | |||
| "punpckhwd %%mm7, %%mm3\n\t" | |||
| "punpckhwd %%mm7, %%mm4\n\t" | |||
| "punpckhwd %%mm7, %%mm5\n\t" | |||
| "psllq $8, %%mm1\n\t" | |||
| "psllq $16, %%mm2\n\t" | |||
| "por %%mm1, %%mm0\n\t" | |||
| "por %%mm2, %%mm0\n\t" | |||
| "psllq $8, %%mm4\n\t" | |||
| "psllq $16, %%mm5\n\t" | |||
| "por %%mm4, %%mm3\n\t" | |||
| "por %%mm5, %%mm3\n\t" | |||
| MOVNTQ" %%mm0, %0\n\t" | |||
| MOVNTQ" %%mm3, 8%0\n\t" | |||
| :"=m"(*d) | |||
| :"m"(*s),"m"(mask16b),"m"(mask16g),"m"(mask16r) | |||
| :"memory"); | |||
| d += 16; | |||
| s += 4; | |||
| } | |||
| __asm __volatile(SFENCE:::"memory"); | |||
| __asm __volatile(EMMS:::"memory"); | |||
| #endif | |||
| while(s < end) | |||
| { | |||
| register uint16_t bgr; | |||
| bgr = *s++; | |||
| *d++ = (bgr&0x1F)<<3; | |||
| *d++ = (bgr&0x7E0)>>3; | |||
| *d++ = (bgr&0xF800)>>8; | |||
| *d++ = 0; | |||
| } | |||
| } | |||
| static inline void RENAME(rgb32tobgr32)(const uint8_t *src, uint8_t *dst, unsigned int src_size) | |||
| { | |||
| #ifdef HAVE_MMX | |||
| /* TODO: unroll this loop */ | |||
| asm volatile ( | |||
| "xorl %%eax, %%eax \n\t" | |||
| ".balign 16 \n\t" | |||
| @@ -554,9 +955,9 @@ static inline void RENAME(rgb32tobgr32)(const uint8_t *src, uint8_t *dst, unsign | |||
| "movq %%mm0, %%mm2 \n\t" | |||
| "pslld $16, %%mm0 \n\t" | |||
| "psrld $16, %%mm1 \n\t" | |||
| "pand "MANGLE(mask32r)", %%mm0 \n\t" | |||
| "pand "MANGLE(mask32g)", %%mm2 \n\t" | |||
| "pand "MANGLE(mask32b)", %%mm1 \n\t" | |||
| "pand "MANGLE(mask32r)", %%mm0 \n\t" | |||
| "pand "MANGLE(mask32g)", %%mm2 \n\t" | |||
| "pand "MANGLE(mask32b)", %%mm1 \n\t" | |||
| "por %%mm0, %%mm2 \n\t" | |||
| "por %%mm1, %%mm2 \n\t" | |||
| MOVNTQ" %%mm2, (%1, %%eax) \n\t" | |||
| @@ -570,8 +971,8 @@ static inline void RENAME(rgb32tobgr32)(const uint8_t *src, uint8_t *dst, unsign | |||
| __asm __volatile(SFENCE:::"memory"); | |||
| __asm __volatile(EMMS:::"memory"); | |||
| #else | |||
| int i; | |||
| int num_pixels= src_size >> 2; | |||
| unsigned i; | |||
| unsigned num_pixels = src_size >> 2; | |||
| for(i=0; i<num_pixels; i++) | |||
| { | |||
| dst[4*i + 0] = src[4*i + 2]; | |||
| @@ -583,7 +984,7 @@ static inline void RENAME(rgb32tobgr32)(const uint8_t *src, uint8_t *dst, unsign | |||
| static inline void RENAME(rgb24tobgr24)(const uint8_t *src, uint8_t *dst, unsigned int src_size) | |||
| { | |||
| int i; | |||
| unsigned i; | |||
| #ifdef HAVE_MMX | |||
| int mmx_size= 23 - src_size; | |||
| asm volatile ( | |||
| @@ -631,15 +1032,16 @@ static inline void RENAME(rgb24tobgr24)(const uint8_t *src, uint8_t *dst, unsign | |||
| __asm __volatile(EMMS:::"memory"); | |||
| if(mmx_size==23) return; //finihsed, was multiple of 8 | |||
| src+= src_size; | |||
| dst+= src_size; | |||
| src_size= 23 - mmx_size; | |||
| src_size= 23-mmx_size; | |||
| src-= src_size; | |||
| dst-= src_size; | |||
| #endif | |||
| for(i=0; i<src_size; i+=3) | |||
| { | |||
| register int x; | |||
| register uint8_t x; | |||
| x = src[i + 2]; | |||
| dst[i + 1] = src[i + 1]; | |||
| dst[i + 2] = src[i + 0]; | |||
| @@ -651,8 +1053,8 @@ static inline void RENAME(yuvPlanartoyuy2)(const uint8_t *ysrc, const uint8_t *u | |||
| unsigned int width, unsigned int height, | |||
| unsigned int lumStride, unsigned int chromStride, unsigned int dstStride, int vertLumPerChroma) | |||
| { | |||
| int y; | |||
| const int chromWidth= width>>1; | |||
| unsigned y; | |||
| const unsigned chromWidth= width>>1; | |||
| for(y=0; y<height; y++) | |||
| { | |||
| #ifdef HAVE_MMX | |||
| @@ -691,14 +1093,33 @@ static inline void RENAME(yuvPlanartoyuy2)(const uint8_t *ysrc, const uint8_t *u | |||
| : "%eax" | |||
| ); | |||
| #else | |||
| #if __WORDSIZE >= 64 | |||
| int i; | |||
| for(i=0; i<chromWidth; i++) | |||
| { | |||
| dst[4*i+0] = ysrc[2*i+0]; | |||
| dst[4*i+1] = usrc[i]; | |||
| dst[4*i+2] = ysrc[2*i+1]; | |||
| dst[4*i+3] = vsrc[i]; | |||
| uint64_t *ldst = (uint64_t *) dst; | |||
| const uint8_t *yc = ysrc, *uc = usrc, *vc = vsrc; | |||
| for(i = 0; i < chromWidth; i += 2){ | |||
| uint64_t k, l; | |||
| k = yc[0] + (uc[0] << 8) + | |||
| (yc[1] << 16) + (vc[0] << 24); | |||
| l = yc[2] + (uc[1] << 8) + | |||
| (yc[3] << 16) + (vc[1] << 24); | |||
| *ldst++ = k + (l << 32); | |||
| yc += 4; | |||
| uc += 2; | |||
| vc += 2; | |||
| } | |||
| #else | |||
| int i, *idst = (int32_t *) dst; | |||
| const uint8_t *yc = ysrc, *uc = usrc, *vc = vsrc; | |||
| for(i = 0; i < chromWidth; i++){ | |||
| *idst++ = yc[0] + (uc[0] << 8) + | |||
| (yc[1] << 16) + (vc[0] << 24); | |||
| yc += 2; | |||
| uc++; | |||
| vc++; | |||
| } | |||
| #endif | |||
| #endif | |||
| if((y&(vertLumPerChroma-1))==(vertLumPerChroma-1) ) | |||
| { | |||
| @@ -748,8 +1169,8 @@ static inline void RENAME(yuy2toyv12)(const uint8_t *src, uint8_t *ydst, uint8_t | |||
| unsigned int width, unsigned int height, | |||
| unsigned int lumStride, unsigned int chromStride, unsigned int srcStride) | |||
| { | |||
| int y; | |||
| const int chromWidth= width>>1; | |||
| unsigned y; | |||
| const unsigned chromWidth= width>>1; | |||
| for(y=0; y<height; y+=2) | |||
| { | |||
| #ifdef HAVE_MMX | |||
| @@ -835,7 +1256,7 @@ static inline void RENAME(yuy2toyv12)(const uint8_t *src, uint8_t *ydst, uint8_t | |||
| : "memory", "%eax" | |||
| ); | |||
| #else | |||
| int i; | |||
| unsigned i; | |||
| for(i=0; i<chromWidth; i++) | |||
| { | |||
| ydst[2*i+0] = src[4*i+0]; | |||
| @@ -884,8 +1305,8 @@ static inline void RENAME(uyvytoyv12)(const uint8_t *src, uint8_t *ydst, uint8_t | |||
| unsigned int width, unsigned int height, | |||
| unsigned int lumStride, unsigned int chromStride, unsigned int srcStride) | |||
| { | |||
| int y; | |||
| const int chromWidth= width>>1; | |||
| unsigned y; | |||
| const unsigned chromWidth= width>>1; | |||
| for(y=0; y<height; y+=2) | |||
| { | |||
| #ifdef HAVE_MMX | |||
| @@ -971,7 +1392,7 @@ static inline void RENAME(uyvytoyv12)(const uint8_t *src, uint8_t *ydst, uint8_t | |||
| : "memory", "%eax" | |||
| ); | |||
| #else | |||
| int i; | |||
| unsigned i; | |||
| for(i=0; i<chromWidth; i++) | |||
| { | |||
| udst[i] = src[4*i+0]; | |||
| @@ -1010,12 +1431,12 @@ static inline void RENAME(rgb24toyv12)(const uint8_t *src, uint8_t *ydst, uint8_ | |||
| unsigned int width, unsigned int height, | |||
| unsigned int lumStride, unsigned int chromStride, unsigned int srcStride) | |||
| { | |||
| int y; | |||
| const int chromWidth= width>>1; | |||
| unsigned y; | |||
| const unsigned chromWidth= width>>1; | |||
| #ifdef HAVE_MMX | |||
| for(y=0; y<height-2; y+=2) | |||
| { | |||
| int i; | |||
| unsigned i; | |||
| for(i=0; i<2; i++) | |||
| { | |||
| asm volatile( | |||
| @@ -1254,7 +1675,7 @@ static inline void RENAME(rgb24toyv12)(const uint8_t *src, uint8_t *ydst, uint8_ | |||
| #endif | |||
| for(; y<height; y+=2) | |||
| { | |||
| int i; | |||
| unsigned i; | |||
| for(i=0; i<chromWidth; i++) | |||
| { | |||
| unsigned int b= src[6*i+0]; | |||
| @@ -1304,12 +1725,13 @@ static inline void RENAME(rgb24toyv12)(const uint8_t *src, uint8_t *ydst, uint8_ | |||
| } | |||
| void RENAME(interleaveBytes)(uint8_t *src1, uint8_t *src2, uint8_t *dest, | |||
| int width, int height, int src1Stride, int src2Stride, int dstStride){ | |||
| int h; | |||
| unsigned width, unsigned height, unsigned src1Stride, | |||
| unsigned src2Stride, unsigned dstStride){ | |||
| unsigned h; | |||
| for(h=0; h < height; h++) | |||
| { | |||
| int w; | |||
| unsigned w; | |||
| #ifdef HAVE_MMX | |||
| #ifdef HAVE_SSE2 | |||
| @@ -65,6 +65,14 @@ untested special converters | |||
| #include "rgb2rgb.h" | |||
| #include "../libvo/fastmemcpy.h" | |||
| #include "../mp_msg.h" | |||
| #define MSG_WARN(args...) mp_msg(MSGT_SWS,MSGL_WARN, ##args ) | |||
| #define MSG_FATAL(args...) mp_msg(MSGT_SWS,MSGL_FATAL, ##args ) | |||
| #define MSG_ERR(args...) mp_msg(MSGT_SWS,MSGL_ERR, ##args ) | |||
| #define MSG_V(args...) mp_msg(MSGT_SWS,MSGL_V, ##args ) | |||
| #define MSG_DBG2(args...) mp_msg(MSGT_SWS,MSGL_DBG2, ##args ) | |||
| #define MSG_INFO(args...) mp_msg(MSGT_SWS,MSGL_INFO, ##args ) | |||
| #undef MOVNTQ | |||
| #undef PAVGB | |||
| @@ -92,19 +100,26 @@ untested special converters | |||
| #endif | |||
| //FIXME replace this with something faster | |||
| #define isPlanarYUV(x) ((x)==IMGFMT_YV12 || (x)==IMGFMT_I420) | |||
| #define isYUV(x) ((x)==IMGFMT_YUY2 || isPlanarYUV(x)) | |||
| #define isHalfChrV(x) ((x)==IMGFMT_YV12 || (x)==IMGFMT_I420) | |||
| #define isBGR(x) ((x)==IMGFMT_BGR32|| (x)==IMGFMT_BGR24|| (x)==IMGFMT_BGR16|| (x)==IMGFMT_BGR15) | |||
| #define isRGB(x) ((x)==IMGFMT_RGB32|| (x)==IMGFMT_RGB24|| (x)==IMGFMT_RGB16|| (x)==IMGFMT_RGB15) | |||
| #define isPlanarYUV(x) ((x)==IMGFMT_YV12 || (x)==IMGFMT_I420 || (x)==IMGFMT_IYUV|| (x)==IMGFMT_YVU9 || (x)==IMGFMT_IF09) | |||
| #define isYUV(x) (!(isBGR(x) || isRGB(x))) | |||
| #define isHalfChrV(x) ((x)==IMGFMT_YV12 || (x)==IMGFMT_I420 || (x)==IMGFMT_IYUV) | |||
| #define isHalfChrH(x) ((x)==IMGFMT_YUY2 || (x)==IMGFMT_YV12 || (x)==IMGFMT_I420) | |||
| #define isPacked(x) ((x)==IMGFMT_YUY2 || ((x)&IMGFMT_BGR_MASK)==IMGFMT_BGR || ((x)&IMGFMT_RGB_MASK)==IMGFMT_RGB) | |||
| #define isGray(x) ((x)==IMGFMT_Y800) | |||
| #define isPacked(x) (isYUV(x) && !isPlanarYUV(x)) | |||
| #define isGray(x) ((x)==IMGFMT_Y800) /* Behaviour the same as PACKED but it's PLANAR */ | |||
| #define isSupportedIn(x) ((x)==IMGFMT_YV12 || (x)==IMGFMT_I420 || (x)==IMGFMT_YUY2 \ | |||
| || (x)==IMGFMT_BGR32|| (x)==IMGFMT_BGR24|| (x)==IMGFMT_BGR16|| (x)==IMGFMT_BGR15\ | |||
| || (x)==IMGFMT_RGB32|| (x)==IMGFMT_RGB24\ | |||
| || (x)==IMGFMT_Y800) | |||
| #define isSupportedOut(x) ((x)==IMGFMT_YV12 || (x)==IMGFMT_I420 \ | |||
| || (x)==IMGFMT_BGR32|| (x)==IMGFMT_BGR24|| (x)==IMGFMT_BGR16|| (x)==IMGFMT_BGR15) | |||
| #define isBGR(x) ((x)==IMGFMT_BGR32|| (x)==IMGFMT_BGR24|| (x)==IMGFMT_BGR16|| (x)==IMGFMT_BGR15) | |||
| #define isSupportedUnscaledIn(x) ((x)==IMGFMT_YV12 || (x)==IMGFMT_I420 || (x)==IMGFMT_YUY2 || (x)==IMGFMT_NV12 \ | |||
| || (x)==IMGFMT_BGR32|| (x)==IMGFMT_BGR24|| (x)==IMGFMT_BGR16|| (x)==IMGFMT_BGR15\ | |||
| || (x)==IMGFMT_RGB32|| (x)==IMGFMT_RGB24\ | |||
| || (x)==IMGFMT_Y800) | |||
| #define isSupportedUnscaledOut(x) ((x)==IMGFMT_YV12 || (x)==IMGFMT_I420 || (x) == IMGFMT_YUY2 \ | |||
| || (x)==IMGFMT_BGR32|| (x)==IMGFMT_BGR24|| (x)==IMGFMT_BGR16|| (x)==IMGFMT_BGR15) | |||
| #define RGB2YUV_SHIFT 16 | |||
| #define BY ((int)( 0.098*(1<<RGB2YUV_SHIFT)+0.5)) | |||
| @@ -751,7 +766,6 @@ static inline void initFilter(int16_t **outFilter, int16_t **filterPos, int *out | |||
| if (flags&SWS_BICUBIC) filterSize= 4; | |||
| else if(flags&SWS_X ) filterSize= 4; | |||
| else filterSize= 2; // SWS_BILINEAR / SWS_AREA | |||
| // printf("%d %d %d\n", filterSize, srcW, dstW); | |||
| filter= (double*)memalign(8, dstW*sizeof(double)*filterSize); | |||
| xDstInSrc= xInc/2 - 0x8000; | |||
| @@ -780,12 +794,10 @@ static inline void initFilter(int16_t **outFilter, int16_t **filterPos, int *out | |||
| y4 = ( -1.0*d + 1.0*d*d*d)/6.0; | |||
| } | |||
| // printf("%d %d %d \n", coeff, (int)d, xDstInSrc); | |||
| filter[i*filterSize + 0]= y1; | |||
| filter[i*filterSize + 1]= y2; | |||
| filter[i*filterSize + 2]= y3; | |||
| filter[i*filterSize + 3]= y4; | |||
| // printf("%1.3f %1.3f %1.3f %1.3f %1.3f\n",d , y1, y2, y3, y4); | |||
| } | |||
| else | |||
| { | |||
| @@ -795,7 +807,6 @@ static inline void initFilter(int16_t **outFilter, int16_t **filterPos, int *out | |||
| double d= ABS((xx<<16) - xDstInSrc)/(double)(1<<16); | |||
| double coeff= 1.0 - d; | |||
| if(coeff<0) coeff=0; | |||
| // printf("%d %d %d \n", coeff, (int)d, xDstInSrc); | |||
| filter[i*filterSize + j]= coeff; | |||
| xx++; | |||
| } | |||
| @@ -812,7 +823,6 @@ static inline void initFilter(int16_t **outFilter, int16_t **filterPos, int *out | |||
| else if(flags&SWS_X) filterSize= (int)ceil(1 + 4.0*srcW / (double)dstW); | |||
| else if(flags&SWS_AREA) filterSize= (int)ceil(1 + 1.0*srcW / (double)dstW); | |||
| else /* BILINEAR */ filterSize= (int)ceil(1 + 2.0*srcW / (double)dstW); | |||
| // printf("%d %d %d\n", *filterSize, srcW, dstW); | |||
| filter= (double*)memalign(8, dstW*sizeof(double)*filterSize); | |||
| xDstInSrc= xInc/2 - 0x8000; | |||
| @@ -849,7 +859,6 @@ static inline void initFilter(int16_t **outFilter, int16_t **filterPos, int *out | |||
| coeff= 1.0 - d; | |||
| if(coeff<0) coeff=0; | |||
| } | |||
| // printf("%1.3f %2.3f %d \n", coeff, d, xDstInSrc); | |||
| filter[i*filterSize + j]= coeff; | |||
| xx++; | |||
| } | |||
| @@ -940,7 +949,7 @@ static inline void initFilter(int16_t **outFilter, int16_t **filterPos, int *out | |||
| *outFilterSize= filterSize; | |||
| if(flags&SWS_PRINT_INFO) | |||
| mp_msg(MSGT_SWS,MSGL_V,"SwScaler: reducing / aligning filtersize %d -> %d\n", filter2Size, filterSize); | |||
| MSG_INFO("SwScaler: reducing / aligning filtersize %d -> %d\n", filter2Size, filterSize); | |||
| /* try to reduce the filter-size (step2 reduce it) */ | |||
| for(i=0; i<dstW; i++) | |||
| { | |||
| @@ -1254,6 +1263,32 @@ cpuCaps= gCpuCaps; | |||
| #endif //!RUNTIME_CPUDETECT | |||
| } | |||
| static void PlanarToNV12Wrapper(SwsContext *c, uint8_t* src[], int srcStride[], int srcSliceY, | |||
| int srcSliceH, uint8_t* dstParam[], int dstStride[]){ | |||
| uint8_t *dst=dstParam[0] + dstStride[0]*srcSliceY; | |||
| /* Copy Y plane */ | |||
| if(dstStride[0]==srcStride[0]) | |||
| memcpy(dst, src[0], srcSliceH*dstStride[0]); | |||
| else | |||
| { | |||
| int i; | |||
| uint8_t *srcPtr= src[0]; | |||
| uint8_t *dstPtr= dst; | |||
| for(i=0; i<srcSliceH; i++) | |||
| { | |||
| memcpy(dstPtr, srcPtr, srcStride[0]); | |||
| srcPtr+= srcStride[0]; | |||
| dstPtr+= dstStride[0]; | |||
| } | |||
| } | |||
| dst = dstParam[1] + dstStride[1]*srcSliceY; | |||
| if(c->srcFormat==IMGFMT_YV12) | |||
| interleaveBytes( src[1],src[2],dst,c->srcW,srcSliceH,srcStride[1],srcStride[2],dstStride[0] ); | |||
| else /* I420 & IYUV */ | |||
| interleaveBytes( src[2],src[1],dst,c->srcW,srcSliceH,srcStride[2],srcStride[1],dstStride[0] ); | |||
| } | |||
| /* Warper functions for yuv2bgr */ | |||
| static void planarYuvToBgr(SwsContext *c, uint8_t* src[], int srcStride[], int srcSliceY, | |||
| int srcSliceH, uint8_t* dstParam[], int dstStride[]){ | |||
| @@ -1265,6 +1300,16 @@ static void planarYuvToBgr(SwsContext *c, uint8_t* src[], int srcStride[], int s | |||
| yuv2rgb( dst,src[0],src[2],src[1],c->srcW,srcSliceH,dstStride[0],srcStride[0],srcStride[1] ); | |||
| } | |||
| static void Planar2PackedWrapper(SwsContext *c, uint8_t* src[], int srcStride[], int srcSliceY, | |||
| int srcSliceH, uint8_t* dstParam[], int dstStride[]){ | |||
| uint8_t *dst=dstParam[0] + dstStride[0]*srcSliceY; | |||
| if(c->srcFormat==IMGFMT_YV12) | |||
| yv12toyuy2( src[0],src[1],src[2],dst,c->srcW,srcSliceH,srcStride[0],srcStride[1],dstStride[0] ); | |||
| else /* I420 & IYUV */ | |||
| yv12toyuy2( src[0],src[2],src[1],dst,c->srcW,srcSliceH,srcStride[0],srcStride[1],dstStride[0] ); | |||
| } | |||
| static void bgr24to32Wrapper(SwsContext *c, uint8_t* src[], int srcStride[], int srcSliceY, | |||
| int srcSliceH, uint8_t* dst[], int dstStride[]){ | |||
| @@ -1285,6 +1330,46 @@ static void bgr24to32Wrapper(SwsContext *c, uint8_t* src[], int srcStride[], int | |||
| } | |||
| } | |||
| static void bgr24to16Wrapper(SwsContext *c, uint8_t* src[], int srcStride[], int srcSliceY, | |||
| int srcSliceH, uint8_t* dst[], int dstStride[]){ | |||
| if(dstStride[0]*3==srcStride[0]*2) | |||
| rgb24to16(src[0], dst[0] + dstStride[0]*srcSliceY, srcSliceH*srcStride[0]); | |||
| else | |||
| { | |||
| int i; | |||
| uint8_t *srcPtr= src[0]; | |||
| uint8_t *dstPtr= dst[0] + dstStride[0]*srcSliceY; | |||
| for(i=0; i<srcSliceH; i++) | |||
| { | |||
| rgb24to16(srcPtr, dstPtr, c->srcW*3); | |||
| srcPtr+= srcStride[0]; | |||
| dstPtr+= dstStride[0]; | |||
| } | |||
| } | |||
| } | |||
| static void bgr24to15Wrapper(SwsContext *c, uint8_t* src[], int srcStride[], int srcSliceY, | |||
| int srcSliceH, uint8_t* dst[], int dstStride[]){ | |||
| if(dstStride[0]*3==srcStride[0]*2) | |||
| rgb24to15(src[0], dst[0] + dstStride[0]*srcSliceY, srcSliceH*srcStride[0]); | |||
| else | |||
| { | |||
| int i; | |||
| uint8_t *srcPtr= src[0]; | |||
| uint8_t *dstPtr= dst[0] + dstStride[0]*srcSliceY; | |||
| for(i=0; i<srcSliceH; i++) | |||
| { | |||
| rgb24to15(srcPtr, dstPtr, c->srcW*3); | |||
| srcPtr+= srcStride[0]; | |||
| dstPtr+= dstStride[0]; | |||
| } | |||
| } | |||
| } | |||
| static void bgr32to24Wrapper(SwsContext *c, uint8_t* src[], int srcStride[], int srcSliceY, | |||
| int srcSliceH, uint8_t* dst[], int dstStride[]){ | |||
| @@ -1305,6 +1390,46 @@ static void bgr32to24Wrapper(SwsContext *c, uint8_t* src[], int srcStride[], int | |||
| } | |||
| } | |||
| static void bgr32to16Wrapper(SwsContext *c, uint8_t* src[], int srcStride[], int srcSliceY, | |||
| int srcSliceH, uint8_t* dst[], int dstStride[]){ | |||
| if(dstStride[0]*4==srcStride[0]*2) | |||
| rgb32to16(src[0], dst[0] + dstStride[0]*srcSliceY, srcSliceH*srcStride[0]); | |||
| else | |||
| { | |||
| int i; | |||
| uint8_t *srcPtr= src[0]; | |||
| uint8_t *dstPtr= dst[0] + dstStride[0]*srcSliceY; | |||
| for(i=0; i<srcSliceH; i++) | |||
| { | |||
| rgb32to16(srcPtr, dstPtr, c->srcW<<2); | |||
| srcPtr+= srcStride[0]; | |||
| dstPtr+= dstStride[0]; | |||
| } | |||
| } | |||
| } | |||
| static void bgr32to15Wrapper(SwsContext *c, uint8_t* src[], int srcStride[], int srcSliceY, | |||
| int srcSliceH, uint8_t* dst[], int dstStride[]){ | |||
| if(dstStride[0]*4==srcStride[0]*2) | |||
| rgb32to15(src[0], dst[0] + dstStride[0]*srcSliceY, srcSliceH*srcStride[0]); | |||
| else | |||
| { | |||
| int i; | |||
| uint8_t *srcPtr= src[0]; | |||
| uint8_t *dstPtr= dst[0] + dstStride[0]*srcSliceY; | |||
| for(i=0; i<srcSliceH; i++) | |||
| { | |||
| rgb32to15(srcPtr, dstPtr, c->srcW<<2); | |||
| srcPtr+= srcStride[0]; | |||
| dstPtr+= dstStride[0]; | |||
| } | |||
| } | |||
| } | |||
| static void bgr15to16Wrapper(SwsContext *c, uint8_t* src[], int srcStride[], int srcSliceY, | |||
| int srcSliceH, uint8_t* dst[], int dstStride[]){ | |||
| @@ -1325,6 +1450,86 @@ static void bgr15to16Wrapper(SwsContext *c, uint8_t* src[], int srcStride[], int | |||
| } | |||
| } | |||
| static void bgr15to24Wrapper(SwsContext *c, uint8_t* src[], int srcStride[], int srcSliceY, | |||
| int srcSliceH, uint8_t* dst[], int dstStride[]){ | |||
| if(dstStride[0]*2==srcStride[0]*3) | |||
| rgb15to24(src[0], dst[0] + dstStride[0]*srcSliceY, srcSliceH*srcStride[0]); | |||
| else | |||
| { | |||
| int i; | |||
| uint8_t *srcPtr= src[0]; | |||
| uint8_t *dstPtr= dst[0] + dstStride[0]*srcSliceY; | |||
| for(i=0; i<srcSliceH; i++) | |||
| { | |||
| rgb15to24(srcPtr, dstPtr, c->srcW<<1); | |||
| srcPtr+= srcStride[0]; | |||
| dstPtr+= dstStride[0]; | |||
| } | |||
| } | |||
| } | |||
| static void bgr15to32Wrapper(SwsContext *c, uint8_t* src[], int srcStride[], int srcSliceY, | |||
| int srcSliceH, uint8_t* dst[], int dstStride[]){ | |||
| if(dstStride[0]*2==srcStride[0]*4) | |||
| rgb15to32(src[0], dst[0] + dstStride[0]*srcSliceY, srcSliceH*srcStride[0]); | |||
| else | |||
| { | |||
| int i; | |||
| uint8_t *srcPtr= src[0]; | |||
| uint8_t *dstPtr= dst[0] + dstStride[0]*srcSliceY; | |||
| for(i=0; i<srcSliceH; i++) | |||
| { | |||
| rgb15to32(srcPtr, dstPtr, c->srcW<<1); | |||
| srcPtr+= srcStride[0]; | |||
| dstPtr+= dstStride[0]; | |||
| } | |||
| } | |||
| } | |||
| static void bgr16to24Wrapper(SwsContext *c, uint8_t* src[], int srcStride[], int srcSliceY, | |||
| int srcSliceH, uint8_t* dst[], int dstStride[]){ | |||
| if(dstStride[0]*2==srcStride[0]*3) | |||
| rgb16to24(src[0], dst[0] + dstStride[0]*srcSliceY, srcSliceH*srcStride[0]); | |||
| else | |||
| { | |||
| int i; | |||
| uint8_t *srcPtr= src[0]; | |||
| uint8_t *dstPtr= dst[0] + dstStride[0]*srcSliceY; | |||
| for(i=0; i<srcSliceH; i++) | |||
| { | |||
| rgb16to24(srcPtr, dstPtr, c->srcW<<1); | |||
| srcPtr+= srcStride[0]; | |||
| dstPtr+= dstStride[0]; | |||
| } | |||
| } | |||
| } | |||
| static void bgr16to32Wrapper(SwsContext *c, uint8_t* src[], int srcStride[], int srcSliceY, | |||
| int srcSliceH, uint8_t* dst[], int dstStride[]){ | |||
| if(dstStride[0]*2==srcStride[0]*4) | |||
| rgb16to32(src[0], dst[0] + dstStride[0]*srcSliceY, srcSliceH*srcStride[0]); | |||
| else | |||
| { | |||
| int i; | |||
| uint8_t *srcPtr= src[0]; | |||
| uint8_t *dstPtr= dst[0] + dstStride[0]*srcSliceY; | |||
| for(i=0; i<srcSliceH; i++) | |||
| { | |||
| rgb16to32(srcPtr, dstPtr, c->srcW<<1); | |||
| srcPtr+= srcStride[0]; | |||
| dstPtr+= dstStride[0]; | |||
| } | |||
| } | |||
| } | |||
| static void bgr24toyv12Wrapper(SwsContext *c, uint8_t* src[], int srcStride[], int srcSliceY, | |||
| int srcSliceH, uint8_t* dst[], int dstStride[]){ | |||
| @@ -1346,21 +1551,25 @@ static void simpleCopy(SwsContext *c, uint8_t* srcParam[], int srcStrideParam[], | |||
| uint8_t *src[3]; | |||
| uint8_t *dst[3]; | |||
| if(c->srcFormat == IMGFMT_I420){ | |||
| if(isPlanarYUV(c->srcFormat)) | |||
| { | |||
| if(c->srcFormat == IMGFMT_I420 || c->srcFormat == IMGFMT_IYUV){ | |||
| src[0]= srcParam[0]; | |||
| src[1]= srcParam[2]; | |||
| src[2]= srcParam[1]; | |||
| srcStride[0]= srcStrideParam[0]; | |||
| srcStride[1]= srcStrideParam[2]; | |||
| srcStride[2]= srcStrideParam[1]; | |||
| } | |||
| else if(c->srcFormat==IMGFMT_YV12){ | |||
| } | |||
| else | |||
| { | |||
| src[0]= srcParam[0]; | |||
| src[1]= srcParam[1]; | |||
| src[2]= srcParam[2]; | |||
| srcStride[0]= srcStrideParam[0]; | |||
| srcStride[1]= srcStrideParam[1]; | |||
| srcStride[2]= srcStrideParam[2]; | |||
| } | |||
| } | |||
| else if(isPacked(c->srcFormat) || isGray(c->srcFormat)){ | |||
| src[0]= srcParam[0]; | |||
| @@ -1371,7 +1580,7 @@ static void simpleCopy(SwsContext *c, uint8_t* srcParam[], int srcStrideParam[], | |||
| srcStride[2]= 0; | |||
| } | |||
| if(c->dstFormat == IMGFMT_I420){ | |||
| if(c->dstFormat == IMGFMT_I420 || c->dstFormat == IMGFMT_IYUV){ | |||
| dst[0]= dstParam[0]; | |||
| dst[1]= dstParam[2]; | |||
| dst[2]= dstParam[1]; | |||
| @@ -1411,9 +1620,21 @@ static void simpleCopy(SwsContext *c, uint8_t* srcParam[], int srcStrideParam[], | |||
| int plane; | |||
| for(plane=0; plane<3; plane++) | |||
| { | |||
| int length= plane==0 ? c->srcW : ((c->srcW+1)>>1); | |||
| int y= plane==0 ? srcSliceY: ((srcSliceY+1)>>1); | |||
| int height= plane==0 ? srcSliceH: ((srcSliceH+1)>>1); | |||
| int length; | |||
| int y; | |||
| int height; | |||
| if(c->srcFormat == IMGFMT_YVU9 || c->srcFormat == IMGFMT_IF09) | |||
| { | |||
| length= plane==0 ? c->srcW : ((c->srcW+1)>>2); | |||
| y= plane==0 ? srcSliceY: ((srcSliceY+1)>>2); | |||
| height= plane==0 ? srcSliceH: ((srcSliceH+1)>>2); | |||
| } | |||
| else | |||
| { | |||
| length= plane==0 ? c->srcW : ((c->srcW+1)>>1); | |||
| y= plane==0 ? srcSliceY: ((srcSliceY+1)>>1); | |||
| height= plane==0 ? srcSliceH: ((srcSliceH+1)>>1); | |||
| } | |||
| if(dstStride[plane]==srcStride[plane]) | |||
| memcpy(dst[plane] + dstStride[plane]*y, src[plane], height*dstStride[plane]); | |||
| @@ -1433,12 +1654,23 @@ static void simpleCopy(SwsContext *c, uint8_t* srcParam[], int srcStrideParam[], | |||
| } | |||
| } | |||
| static uint32_t remove_dup_fourcc(uint32_t fourcc) | |||
| { | |||
| switch(fourcc) | |||
| { | |||
| case IMGFMT_IYUV: return IMGFMT_I420; | |||
| case IMGFMT_Y8 : return IMGFMT_Y800; | |||
| default: return fourcc; | |||
| } | |||
| } | |||
| SwsContext *getSwsContext(int srcW, int srcH, int srcFormat, int dstW, int dstH, int dstFormat, int flags, | |||
| SwsFilter *srcFilter, SwsFilter *dstFilter){ | |||
| SwsContext *c; | |||
| int i; | |||
| int usesFilter; | |||
| int simple_copy, unscaled_copy; | |||
| SwsFilter dummyFilter= {NULL, NULL, NULL, NULL}; | |||
| #ifdef ARCH_X86 | |||
| @@ -1449,25 +1681,44 @@ SwsContext *getSwsContext(int srcW, int srcH, int srcFormat, int dstW, int dstH, | |||
| if(swScale==NULL) globalInit(); | |||
| /* avoid dupplicate Formats, so we dont need to check to much */ | |||
| if(srcFormat==IMGFMT_IYUV) srcFormat=IMGFMT_I420; | |||
| if(srcFormat==IMGFMT_Y8) srcFormat=IMGFMT_Y800; | |||
| if(dstFormat==IMGFMT_Y8) dstFormat=IMGFMT_Y800; | |||
| if(!isSupportedIn(srcFormat)) | |||
| { | |||
| mp_msg(MSGT_SWS,MSGL_ERR,"swScaler: %s is not supported as input format\n", vo_format_name(srcFormat)); | |||
| return NULL; | |||
| } | |||
| if(!isSupportedOut(dstFormat)) | |||
| srcFormat = remove_dup_fourcc(srcFormat); | |||
| dstFormat = remove_dup_fourcc(dstFormat); | |||
| /* don't refuse this beauty */ | |||
| unscaled_copy = (srcW == dstW && srcH == dstH); | |||
| simple_copy = (srcW == dstW && srcH == dstH && srcFormat == dstFormat); | |||
| if(!simple_copy) | |||
| { | |||
| mp_msg(MSGT_SWS,MSGL_ERR,"swScaler: %s is not supported as output format\n", vo_format_name(dstFormat)); | |||
| return NULL; | |||
| if(unscaled_copy) | |||
| { | |||
| if(!isSupportedUnscaledIn(srcFormat)) | |||
| { | |||
| MSG_ERR("swScaler: %s is not supported as input format\n", vo_format_name(srcFormat)); | |||
| return NULL; | |||
| } | |||
| if(!isSupportedUnscaledOut(dstFormat)) | |||
| { | |||
| MSG_ERR("swScaler: %s is not supported as output format\n", vo_format_name(dstFormat)); | |||
| return NULL; | |||
| } | |||
| } | |||
| else | |||
| { | |||
| if(!isSupportedIn(srcFormat)) | |||
| { | |||
| MSG_ERR("swScaler: %s is not supported as input format\n", vo_format_name(srcFormat)); | |||
| return NULL; | |||
| } | |||
| if(!isSupportedOut(dstFormat)) | |||
| { | |||
| MSG_ERR("swScaler: %s is not supported as output format\n", vo_format_name(dstFormat)); | |||
| return NULL; | |||
| } | |||
| } | |||
| } | |||
| /* sanity check */ | |||
| if(srcW<4 || srcH<1 || dstW<8 || dstH<1) //FIXME check if these are enough and try to lowwer them after fixing the relevant parts of the code | |||
| { | |||
| mp_msg(MSGT_SWS,MSGL_ERR,"swScaler: %dx%d -> %dx%d is invalid scaling dimension\n", | |||
| MSG_ERR("swScaler: %dx%d -> %dx%d is invalid scaling dimension\n", | |||
| srcW, srcH, dstW, dstH); | |||
| return NULL; | |||
| } | |||
| @@ -1501,6 +1752,26 @@ SwsContext *getSwsContext(int srcW, int srcH, int srcFormat, int dstW, int dstH, | |||
| /* unscaled special Cases */ | |||
| if(srcW==dstW && srcH==dstH && !usesFilter) | |||
| { | |||
| /* yv12_to_nv12 */ | |||
| if((srcFormat == IMGFMT_YV12||srcFormat==IMGFMT_I420)&&dstFormat == IMGFMT_NV12) | |||
| { | |||
| c->swScale= PlanarToNV12Wrapper; | |||
| if(flags&SWS_PRINT_INFO) | |||
| MSG_INFO("SwScaler: using unscaled %s -> %s special converter\n", | |||
| vo_format_name(srcFormat), vo_format_name(dstFormat)); | |||
| return c; | |||
| } | |||
| /* yv12_to_yuy2 */ | |||
| if((srcFormat == IMGFMT_YV12||srcFormat==IMGFMT_I420)&&dstFormat == IMGFMT_YUY2) | |||
| { | |||
| c->swScale= Planar2PackedWrapper; | |||
| if(flags&SWS_PRINT_INFO) | |||
| MSG_INFO("SwScaler: using unscaled %s -> %s special converter\n", | |||
| vo_format_name(srcFormat), vo_format_name(dstFormat)); | |||
| return c; | |||
| } | |||
| /* yuv2bgr */ | |||
| if(isPlanarYUV(srcFormat) && isBGR(dstFormat)) | |||
| { | |||
| @@ -1516,7 +1787,7 @@ SwsContext *getSwsContext(int srcW, int srcH, int srcFormat, int dstW, int dstH, | |||
| c->swScale= planarYuvToBgr; | |||
| if(flags&SWS_PRINT_INFO) | |||
| mp_msg(MSGT_SWS,MSGL_V,"SwScaler: using unscaled %s -> %s special converter\n", | |||
| MSG_INFO("SwScaler: using unscaled %s -> %s special converter\n", | |||
| vo_format_name(srcFormat), vo_format_name(dstFormat)); | |||
| return c; | |||
| } | |||
| @@ -1527,7 +1798,7 @@ SwsContext *getSwsContext(int srcW, int srcH, int srcFormat, int dstW, int dstH, | |||
| c->swScale= simpleCopy; | |||
| if(flags&SWS_PRINT_INFO) | |||
| mp_msg(MSGT_SWS,MSGL_V,"SwScaler: using unscaled %s -> %s special converter\n", | |||
| MSG_INFO("SwScaler: using unscaled %s -> %s special converter\n", | |||
| vo_format_name(srcFormat), vo_format_name(dstFormat)); | |||
| return c; | |||
| } | |||
| @@ -1539,7 +1810,31 @@ SwsContext *getSwsContext(int srcW, int srcH, int srcFormat, int dstW, int dstH, | |||
| c->swScale= bgr32to24Wrapper; | |||
| if(flags&SWS_PRINT_INFO) | |||
| mp_msg(MSGT_SWS,MSGL_V,"SwScaler: using unscaled %s -> %s special converter\n", | |||
| MSG_INFO("SwScaler: using unscaled %s -> %s special converter\n", | |||
| vo_format_name(srcFormat), vo_format_name(dstFormat)); | |||
| return c; | |||
| } | |||
| /* bgr32to16 & rgb32to16*/ | |||
| if((srcFormat==IMGFMT_BGR32 && dstFormat==IMGFMT_BGR16) | |||
| ||(srcFormat==IMGFMT_RGB32 && dstFormat==IMGFMT_RGB16)) | |||
| { | |||
| c->swScale= bgr32to16Wrapper; | |||
| if(flags&SWS_PRINT_INFO) | |||
| MSG_INFO("SwScaler: using unscaled %s -> %s special converter\n", | |||
| vo_format_name(srcFormat), vo_format_name(dstFormat)); | |||
| return c; | |||
| } | |||
| /* bgr32to15 & rgb32to15*/ | |||
| if((srcFormat==IMGFMT_BGR32 && dstFormat==IMGFMT_BGR15) | |||
| ||(srcFormat==IMGFMT_RGB32 && dstFormat==IMGFMT_RGB15)) | |||
| { | |||
| c->swScale= bgr32to15Wrapper; | |||
| if(flags&SWS_PRINT_INFO) | |||
| MSG_INFO("SwScaler: using unscaled %s -> %s special converter\n", | |||
| vo_format_name(srcFormat), vo_format_name(dstFormat)); | |||
| return c; | |||
| } | |||
| @@ -1551,7 +1846,31 @@ SwsContext *getSwsContext(int srcW, int srcH, int srcFormat, int dstW, int dstH, | |||
| c->swScale= bgr24to32Wrapper; | |||
| if(flags&SWS_PRINT_INFO) | |||
| mp_msg(MSGT_SWS,MSGL_V,"SwScaler: using unscaled %s -> %s special converter\n", | |||
| MSG_INFO("SwScaler: using unscaled %s -> %s special converter\n", | |||
| vo_format_name(srcFormat), vo_format_name(dstFormat)); | |||
| return c; | |||
| } | |||
| /* bgr24to16 & rgb24to16*/ | |||
| if((srcFormat==IMGFMT_BGR24 && dstFormat==IMGFMT_BGR16) | |||
| ||(srcFormat==IMGFMT_RGB24 && dstFormat==IMGFMT_RGB16)) | |||
| { | |||
| c->swScale= bgr24to16Wrapper; | |||
| if(flags&SWS_PRINT_INFO) | |||
| MSG_INFO("SwScaler: using unscaled %s -> %s special converter\n", | |||
| vo_format_name(srcFormat), vo_format_name(dstFormat)); | |||
| return c; | |||
| } | |||
| /* bgr24to15 & rgb24to15*/ | |||
| if((srcFormat==IMGFMT_BGR24 && dstFormat==IMGFMT_BGR15) | |||
| ||(srcFormat==IMGFMT_RGB24 && dstFormat==IMGFMT_RGB15)) | |||
| { | |||
| c->swScale= bgr24to15Wrapper; | |||
| if(flags&SWS_PRINT_INFO) | |||
| MSG_INFO("SwScaler: using unscaled %s -> %s special converter\n", | |||
| vo_format_name(srcFormat), vo_format_name(dstFormat)); | |||
| return c; | |||
| } | |||
| @@ -1562,7 +1881,55 @@ SwsContext *getSwsContext(int srcW, int srcH, int srcFormat, int dstW, int dstH, | |||
| c->swScale= bgr15to16Wrapper; | |||
| if(flags&SWS_PRINT_INFO) | |||
| mp_msg(MSGT_SWS,MSGL_V,"SwScaler: using unscaled %s -> %s special converter\n", | |||
| MSG_INFO("SwScaler: using unscaled %s -> %s special converter\n", | |||
| vo_format_name(srcFormat), vo_format_name(dstFormat)); | |||
| return c; | |||
| } | |||
| /* bgr15to24 */ | |||
| if((srcFormat==IMGFMT_BGR15 && dstFormat==IMGFMT_BGR24) | |||
| ||(srcFormat==IMGFMT_RGB15 && dstFormat==IMGFMT_RGB24)) | |||
| { | |||
| c->swScale= bgr15to24Wrapper; | |||
| if(flags&SWS_PRINT_INFO) | |||
| MSG_INFO("SwScaler: using unscaled %s -> %s special converter\n", | |||
| vo_format_name(srcFormat), vo_format_name(dstFormat)); | |||
| return c; | |||
| } | |||
| /* bgr15to32 */ | |||
| if((srcFormat==IMGFMT_BGR15 && dstFormat==IMGFMT_BGR32) | |||
| ||(srcFormat==IMGFMT_RGB15 && dstFormat==IMGFMT_RGB32)) | |||
| { | |||
| c->swScale= bgr15to32Wrapper; | |||
| if(flags&SWS_PRINT_INFO) | |||
| MSG_INFO("SwScaler: using unscaled %s -> %s special converter\n", | |||
| vo_format_name(srcFormat), vo_format_name(dstFormat)); | |||
| return c; | |||
| } | |||
| /* bgr16to24 */ | |||
| if((srcFormat==IMGFMT_BGR16 && dstFormat==IMGFMT_BGR24) | |||
| ||(srcFormat==IMGFMT_RGB16 && dstFormat==IMGFMT_RGB24)) | |||
| { | |||
| c->swScale= bgr16to24Wrapper; | |||
| if(flags&SWS_PRINT_INFO) | |||
| MSG_INFO("SwScaler: using unscaled %s -> %s special converter\n", | |||
| vo_format_name(srcFormat), vo_format_name(dstFormat)); | |||
| return c; | |||
| } | |||
| /* bgr16to32 */ | |||
| if((srcFormat==IMGFMT_BGR16 && dstFormat==IMGFMT_BGR32) | |||
| ||(srcFormat==IMGFMT_RGB16 && dstFormat==IMGFMT_RGB32)) | |||
| { | |||
| c->swScale= bgr16to32Wrapper; | |||
| if(flags&SWS_PRINT_INFO) | |||
| MSG_INFO("SwScaler: using unscaled %s -> %s special converter\n", | |||
| vo_format_name(srcFormat), vo_format_name(dstFormat)); | |||
| return c; | |||
| } | |||
| @@ -1573,7 +1940,7 @@ SwsContext *getSwsContext(int srcW, int srcH, int srcFormat, int dstW, int dstH, | |||
| c->swScale= bgr24toyv12Wrapper; | |||
| if(flags&SWS_PRINT_INFO) | |||
| mp_msg(MSGT_SWS,MSGL_V,"SwScaler: using unscaled %s -> %s special converter\n", | |||
| MSG_INFO("SwScaler: using unscaled %s -> %s special converter\n", | |||
| vo_format_name(srcFormat), vo_format_name(dstFormat)); | |||
| return c; | |||
| } | |||
| @@ -1585,7 +1952,7 @@ SwsContext *getSwsContext(int srcW, int srcH, int srcFormat, int dstW, int dstH, | |||
| if(!c->canMMX2BeUsed && dstW >=srcW && (srcW&15)==0 && (flags&SWS_FAST_BILINEAR)) | |||
| { | |||
| if(flags&SWS_PRINT_INFO) | |||
| mp_msg(MSGT_SWS,MSGL_WARN,"SwScaler: output Width is not a multiple of 32 -> no MMX2 scaler\n"); | |||
| MSG_INFO("SwScaler: output Width is not a multiple of 32 -> no MMX2 scaler\n"); | |||
| } | |||
| } | |||
| else | |||
| @@ -1723,33 +2090,35 @@ SwsContext *getSwsContext(int srcW, int srcH, int srcFormat, int dstW, int dstH, | |||
| char *dither= ""; | |||
| #endif | |||
| if(flags&SWS_FAST_BILINEAR) | |||
| mp_msg(MSGT_SWS,MSGL_INFO,"SwScaler: FAST_BILINEAR scaler, "); | |||
| MSG_INFO("\nSwScaler: FAST_BILINEAR scaler, "); | |||
| else if(flags&SWS_BILINEAR) | |||
| mp_msg(MSGT_SWS,MSGL_INFO,"SwScaler: BILINEAR scaler, "); | |||
| MSG_INFO("\nSwScaler: BILINEAR scaler, "); | |||
| else if(flags&SWS_BICUBIC) | |||
| mp_msg(MSGT_SWS,MSGL_INFO,"SwScaler: BICUBIC scaler, "); | |||
| MSG_INFO("\nSwScaler: BICUBIC scaler, "); | |||
| else if(flags&SWS_X) | |||
| mp_msg(MSGT_SWS,MSGL_INFO,"SwScaler: Experimental scaler, "); | |||
| MSG_INFO("\nSwScaler: Experimental scaler, "); | |||
| else if(flags&SWS_POINT) | |||
| mp_msg(MSGT_SWS,MSGL_INFO,"SwScaler: Nearest Neighbor / POINT scaler, "); | |||
| MSG_INFO("\nSwScaler: Nearest Neighbor / POINT scaler, "); | |||
| else if(flags&SWS_AREA) | |||
| mp_msg(MSGT_SWS,MSGL_INFO,"SwScaler: Area Averageing scaler, "); | |||
| MSG_INFO("\nSwScaler: Area Averageing scaler, "); | |||
| else | |||
| mp_msg(MSGT_SWS,MSGL_INFO,"SwScaler: ehh flags invalid?! "); | |||
| MSG_INFO("\nSwScaler: ehh flags invalid?! "); | |||
| mp_msg(MSGT_SWS,MSGL_INFO,"%dx%d %s -> %dx%d%s %s ", | |||
| srcW,srcH, vo_format_name(srcFormat), dstW,dstH, | |||
| (dstFormat==IMGFMT_BGR15 || dstFormat==IMGFMT_BGR16) ? | |||
| dither : "", vo_format_name(dstFormat)); | |||
| if(dstFormat==IMGFMT_BGR15 || dstFormat==IMGFMT_BGR16) | |||
| MSG_INFO("from %s to%s %s ", | |||
| vo_format_name(srcFormat), dither, vo_format_name(dstFormat)); | |||
| else | |||
| MSG_INFO("from %s to %s ", | |||
| vo_format_name(srcFormat), vo_format_name(dstFormat)); | |||
| if(cpuCaps.hasMMX2) | |||
| mp_msg(MSGT_SWS,MSGL_INFO,"using MMX2\n"); | |||
| MSG_INFO("using MMX2\n"); | |||
| else if(cpuCaps.has3DNow) | |||
| mp_msg(MSGT_SWS,MSGL_INFO,"using 3DNOW\n"); | |||
| MSG_INFO("using 3DNOW\n"); | |||
| else if(cpuCaps.hasMMX) | |||
| mp_msg(MSGT_SWS,MSGL_INFO,"using MMX\n"); | |||
| MSG_INFO("using MMX\n"); | |||
| else | |||
| mp_msg(MSGT_SWS,MSGL_INFO,"using C\n"); | |||
| MSG_INFO("using C\n"); | |||
| } | |||
| if((flags & SWS_PRINT_INFO) && verbose) | |||
| @@ -1757,70 +2126,70 @@ SwsContext *getSwsContext(int srcW, int srcH, int srcFormat, int dstW, int dstH, | |||
| if(cpuCaps.hasMMX) | |||
| { | |||
| if(c->canMMX2BeUsed && (flags&SWS_FAST_BILINEAR)) | |||
| mp_msg(MSGT_SWS,MSGL_V,"SwScaler: using FAST_BILINEAR MMX2 scaler for horizontal scaling\n"); | |||
| MSG_V("SwScaler: using FAST_BILINEAR MMX2 scaler for horizontal scaling\n"); | |||
| else | |||
| { | |||
| if(c->hLumFilterSize==4) | |||
| mp_msg(MSGT_SWS,MSGL_V,"SwScaler: using 4-tap MMX scaler for horizontal luminance scaling\n"); | |||
| MSG_V("SwScaler: using 4-tap MMX scaler for horizontal luminance scaling\n"); | |||
| else if(c->hLumFilterSize==8) | |||
| mp_msg(MSGT_SWS,MSGL_V,"SwScaler: using 8-tap MMX scaler for horizontal luminance scaling\n"); | |||
| MSG_V("SwScaler: using 8-tap MMX scaler for horizontal luminance scaling\n"); | |||
| else | |||
| mp_msg(MSGT_SWS,MSGL_V,"SwScaler: using n-tap MMX scaler for horizontal luminance scaling\n"); | |||
| MSG_V("SwScaler: using n-tap MMX scaler for horizontal luminance scaling\n"); | |||
| if(c->hChrFilterSize==4) | |||
| mp_msg(MSGT_SWS,MSGL_V,"SwScaler: using 4-tap MMX scaler for horizontal chrominance scaling\n"); | |||
| MSG_V("SwScaler: using 4-tap MMX scaler for horizontal chrominance scaling\n"); | |||
| else if(c->hChrFilterSize==8) | |||
| mp_msg(MSGT_SWS,MSGL_V,"SwScaler: using 8-tap MMX scaler for horizontal chrominance scaling\n"); | |||
| MSG_V("SwScaler: using 8-tap MMX scaler for horizontal chrominance scaling\n"); | |||
| else | |||
| mp_msg(MSGT_SWS,MSGL_V,"SwScaler: using n-tap MMX scaler for horizontal chrominance scaling\n"); | |||
| MSG_V("SwScaler: using n-tap MMX scaler for horizontal chrominance scaling\n"); | |||
| } | |||
| } | |||
| else | |||
| { | |||
| #ifdef ARCH_X86 | |||
| mp_msg(MSGT_SWS,MSGL_V,"SwScaler: using X86-Asm scaler for horizontal scaling\n"); | |||
| MSG_V("SwScaler: using X86-Asm scaler for horizontal scaling\n"); | |||
| #else | |||
| if(flags & SWS_FAST_BILINEAR) | |||
| mp_msg(MSGT_SWS,MSGL_V,"SwScaler: using FAST_BILINEAR C scaler for horizontal scaling\n"); | |||
| MSG_V("SwScaler: using FAST_BILINEAR C scaler for horizontal scaling\n"); | |||
| else | |||
| mp_msg(MSGT_SWS,MSGL_V,"SwScaler: using C scaler for horizontal scaling\n"); | |||
| MSG_V("SwScaler: using C scaler for horizontal scaling\n"); | |||
| #endif | |||
| } | |||
| if(isPlanarYUV(dstFormat)) | |||
| { | |||
| if(c->vLumFilterSize==1) | |||
| mp_msg(MSGT_SWS,MSGL_V,"SwScaler: using 1-tap %s \"scaler\" for vertical scaling (YV12 like)\n", cpuCaps.hasMMX ? "MMX" : "C"); | |||
| MSG_V("SwScaler: using 1-tap %s \"scaler\" for vertical scaling (YV12 like)\n", cpuCaps.hasMMX ? "MMX" : "C"); | |||
| else | |||
| mp_msg(MSGT_SWS,MSGL_V,"SwScaler: using n-tap %s scaler for vertical scaling (YV12 like)\n", cpuCaps.hasMMX ? "MMX" : "C"); | |||
| MSG_V("SwScaler: using n-tap %s scaler for vertical scaling (YV12 like)\n", cpuCaps.hasMMX ? "MMX" : "C"); | |||
| } | |||
| else | |||
| { | |||
| if(c->vLumFilterSize==1 && c->vChrFilterSize==2) | |||
| mp_msg(MSGT_SWS,MSGL_V,"SwScaler: using 1-tap %s \"scaler\" for vertical luminance scaling (BGR)\n" | |||
| MSG_V("SwScaler: using 1-tap %s \"scaler\" for vertical luminance scaling (BGR)\n" | |||
| "SwScaler: 2-tap scaler for vertical chrominance scaling (BGR)\n",cpuCaps.hasMMX ? "MMX" : "C"); | |||
| else if(c->vLumFilterSize==2 && c->vChrFilterSize==2) | |||
| mp_msg(MSGT_SWS,MSGL_V,"SwScaler: using 2-tap linear %s scaler for vertical scaling (BGR)\n", cpuCaps.hasMMX ? "MMX" : "C"); | |||
| MSG_V("SwScaler: using 2-tap linear %s scaler for vertical scaling (BGR)\n", cpuCaps.hasMMX ? "MMX" : "C"); | |||
| else | |||
| mp_msg(MSGT_SWS,MSGL_V,"SwScaler: using n-tap %s scaler for vertical scaling (BGR)\n", cpuCaps.hasMMX ? "MMX" : "C"); | |||
| MSG_V("SwScaler: using n-tap %s scaler for vertical scaling (BGR)\n", cpuCaps.hasMMX ? "MMX" : "C"); | |||
| } | |||
| if(dstFormat==IMGFMT_BGR24) | |||
| mp_msg(MSGT_SWS,MSGL_V,"SwScaler: using %s YV12->BGR24 Converter\n", | |||
| MSG_V("SwScaler: using %s YV12->BGR24 Converter\n", | |||
| cpuCaps.hasMMX2 ? "MMX2" : (cpuCaps.hasMMX ? "MMX" : "C")); | |||
| else if(dstFormat==IMGFMT_BGR32) | |||
| mp_msg(MSGT_SWS,MSGL_V,"SwScaler: using %s YV12->BGR32 Converter\n", cpuCaps.hasMMX ? "MMX" : "C"); | |||
| MSG_V("SwScaler: using %s YV12->BGR32 Converter\n", cpuCaps.hasMMX ? "MMX" : "C"); | |||
| else if(dstFormat==IMGFMT_BGR16) | |||
| mp_msg(MSGT_SWS,MSGL_V,"SwScaler: using %s YV12->BGR16 Converter\n", cpuCaps.hasMMX ? "MMX" : "C"); | |||
| MSG_V("SwScaler: using %s YV12->BGR16 Converter\n", cpuCaps.hasMMX ? "MMX" : "C"); | |||
| else if(dstFormat==IMGFMT_BGR15) | |||
| mp_msg(MSGT_SWS,MSGL_V,"SwScaler: using %s YV12->BGR15 Converter\n", cpuCaps.hasMMX ? "MMX" : "C"); | |||
| MSG_V("SwScaler: using %s YV12->BGR15 Converter\n", cpuCaps.hasMMX ? "MMX" : "C"); | |||
| mp_msg(MSGT_SWS,MSGL_V,"SwScaler: %dx%d -> %dx%d\n", srcW, srcH, dstW, dstH); | |||
| MSG_V("SwScaler: %dx%d -> %dx%d\n", srcW, srcH, dstW, dstH); | |||
| } | |||
| if((flags & SWS_PRINT_INFO) && verbose>1) | |||
| { | |||
| mp_msg(MSGT_SWS,MSGL_DBG2,"SwScaler:Lum srcW=%d srcH=%d dstW=%d dstH=%d xInc=%d yInc=%d\n", | |||
| MSG_DBG2("SwScaler:Lum srcW=%d srcH=%d dstW=%d dstH=%d xInc=%d yInc=%d\n", | |||
| c->srcW, c->srcH, c->dstW, c->dstH, c->lumXInc, c->lumYInc); | |||
| mp_msg(MSGT_SWS,MSGL_DBG2,"SwScaler:Chr srcW=%d srcH=%d dstW=%d dstH=%d xInc=%d yInc=%d\n", | |||
| MSG_DBG2("SwScaler:Chr srcW=%d srcH=%d dstW=%d dstH=%d xInc=%d yInc=%d\n", | |||
| c->chrSrcW, c->chrSrcH, c->chrDstW, c->chrDstH, c->chrXInc, c->chrYInc); | |||
| } | |||
| @@ -2039,9 +2408,9 @@ void printVec(SwsVector *a){ | |||
| for(i=0; i<a->length; i++) | |||
| { | |||
| int x= (int)((a->coeff[i]-min)*60.0/range +0.5); | |||
| printf("%1.3f ", a->coeff[i]); | |||
| for(;x>0; x--) printf(" "); | |||
| printf("|\n"); | |||
| MSG_DBG2("%1.3f ", a->coeff[i]); | |||
| for(;x>0; x--) MSG_DBG2(" "); | |||
| MSG_DBG2("|\n"); | |||
| } | |||
| } | |||
| @@ -2626,7 +2626,7 @@ static void RENAME(swScale)(SwsContext *c, uint8_t* srcParam[], int srcStridePar | |||
| srcStride[1]= srcStrideParam[1]; | |||
| srcStride[2]= srcStrideParam[2]; | |||
| } | |||
| else if(isPacked(c->srcFormat)){ | |||
| else if(isPacked(c->srcFormat) || isBGR(c->srcFormat) || isRGB(c->srcFormat)){ | |||
| src[0]= | |||
| src[1]= | |||
| src[2]= srcParam[0]; | |||
| @@ -156,7 +156,7 @@ const int32_t Inverse_Table_6_9[8][4] = { | |||
| {117579, 136230, 16907, 35559} /* SMPTE 240M (1987) */ | |||
| }; | |||
| static void yuv2rgb_c_init (int bpp, int mode); | |||
| static void yuv2rgb_c_init (unsigned bpp, int mode); | |||
| yuv2rgb_fun yuv2rgb; | |||
| @@ -166,11 +166,11 @@ static void (* yuv2rgb_c_internal) (uint8_t *, uint8_t *, | |||
| static void yuv2rgb_c (void * dst, uint8_t * py, | |||
| uint8_t * pu, uint8_t * pv, | |||
| int h_size, int v_size, | |||
| int rgb_stride, int y_stride, int uv_stride) | |||
| unsigned h_size, unsigned v_size, | |||
| unsigned rgb_stride, unsigned y_stride, unsigned uv_stride) | |||
| { | |||
| v_size >>= 1; | |||
| while (v_size--) { | |||
| yuv2rgb_c_internal (py, py + y_stride, pu, pv, dst, dst + rgb_stride, | |||
| h_size, v_size<<1); | |||
| @@ -182,7 +182,7 @@ static void yuv2rgb_c (void * dst, uint8_t * py, | |||
| } | |||
| } | |||
| void yuv2rgb_init (int bpp, int mode) | |||
| void yuv2rgb_init (unsigned bpp, int mode) | |||
| { | |||
| yuv2rgb = NULL; | |||
| #ifdef CAN_COMPILE_X86_ASM | |||
| @@ -676,7 +676,7 @@ static int div_round (int dividend, int divisor) | |||
| return -((-dividend + (divisor>>1)) / divisor); | |||
| } | |||
| static void yuv2rgb_c_init (int bpp, int mode) | |||
| static void yuv2rgb_c_init (unsigned bpp, int mode) | |||
| { | |||
| int i; | |||
| uint8_t table_Y[1024]; | |||
| @@ -29,8 +29,8 @@ | |||
| static void mlib_YUV2ARGB420_32(uint8_t* image, uint8_t* py, | |||
| uint8_t* pu, uint8_t* pv, | |||
| int h_size, int v_size, | |||
| int rgb_stride, int y_stride, int uv_stride) | |||
| unsigned h_size, unsigned v_size, | |||
| unsigned rgb_stride, unsigned y_stride, unsigned uv_stride) | |||
| { | |||
| mlib_VideoColorYUV2ARGB420(image, py, pu, pv, h_size, | |||
| v_size, rgb_stride, y_stride, uv_stride); | |||
| @@ -38,8 +38,8 @@ static void mlib_YUV2ARGB420_32(uint8_t* image, uint8_t* py, | |||
| static void mlib_YUV2ABGR420_32(uint8_t* image, uint8_t* py, | |||
| uint8_t* pu, uint8_t* pv, | |||
| int h_size, int v_size, | |||
| int rgb_stride, int y_stride, int uv_stride) | |||
| unsigned h_size, unsigned v_size, | |||
| unsigned rgb_stride, unsigned y_stride, unsigned uv_stride) | |||
| { | |||
| mlib_VideoColorYUV2ABGR420(image, py, pu, pv, h_size, | |||
| v_size, rgb_stride, y_stride, uv_stride); | |||
| @@ -47,15 +47,15 @@ static void mlib_YUV2ABGR420_32(uint8_t* image, uint8_t* py, | |||
| static void mlib_YUV2RGB420_24(uint8_t* image, uint8_t* py, | |||
| uint8_t* pu, uint8_t* pv, | |||
| int h_size, int v_size, | |||
| int rgb_stride, int y_stride, int uv_stride) | |||
| unsigned h_size, unsigned v_size, | |||
| unsigned rgb_stride, unsigned y_stride, unsigned uv_stride) | |||
| { | |||
| mlib_VideoColorYUV2RGB420(image, py, pu, pv, h_size, | |||
| v_size, rgb_stride, y_stride, uv_stride); | |||
| } | |||
| yuv2rgb_fun yuv2rgb_init_mlib(int bpp, int mode) | |||
| yuv2rgb_fun yuv2rgb_init_mlib(unsigned bpp, int mode) | |||
| { | |||
| if( bpp == 24 ) | |||
| @@ -123,8 +123,8 @@ | |||
| static inline void RENAME(yuv420_rgb16) (uint8_t * image, uint8_t * py, | |||
| uint8_t * pu, uint8_t * pv, | |||
| int h_size, int v_size, | |||
| int rgb_stride, int y_stride, int uv_stride) | |||
| unsigned h_size, unsigned v_size, | |||
| unsigned rgb_stride, unsigned y_stride, unsigned uv_stride) | |||
| { | |||
| int even = 1; | |||
| int x, y; | |||
| @@ -228,8 +228,8 @@ YUV2RGB | |||
| static inline void RENAME(yuv420_rgb15) (uint8_t * image, uint8_t * py, | |||
| uint8_t * pu, uint8_t * pv, | |||
| int h_size, int v_size, | |||
| int rgb_stride, int y_stride, int uv_stride) | |||
| unsigned h_size, unsigned v_size, | |||
| unsigned rgb_stride, unsigned y_stride, unsigned uv_stride) | |||
| { | |||
| int even = 1; | |||
| int x, y; | |||
| @@ -329,8 +329,8 @@ YUV2RGB | |||
| static inline void RENAME(yuv420_rgb24) (uint8_t * image, uint8_t * py, | |||
| uint8_t * pu, uint8_t * pv, | |||
| int h_size, int v_size, | |||
| int rgb_stride, int y_stride, int uv_stride) | |||
| unsigned h_size, unsigned v_size, | |||
| unsigned rgb_stride, unsigned y_stride, unsigned uv_stride) | |||
| { | |||
| int even = 1; | |||
| int x, y; | |||
| @@ -488,8 +488,8 @@ YUV2RGB | |||
| static inline void RENAME(yuv420_argb32) (uint8_t * image, uint8_t * py, | |||
| uint8_t * pu, uint8_t * pv, | |||
| int h_size, int v_size, | |||
| int rgb_stride, int y_stride, int uv_stride) | |||
| unsigned h_size, unsigned v_size, | |||
| unsigned rgb_stride, unsigned y_stride, unsigned uv_stride) | |||
| { | |||
| int even = 1; | |||
| int x, y; | |||
| @@ -584,7 +584,7 @@ YUV2RGB | |||
| __asm__ __volatile__ (EMMS); | |||
| } | |||
| yuv2rgb_fun RENAME(yuv2rgb_init) (int bpp, int mode) | |||
| yuv2rgb_fun RENAME(yuv2rgb_init) (unsigned bpp, int mode) | |||
| { | |||
| if (bpp == 15 && mode == MODE_RGB) return RENAME(yuv420_rgb15); | |||
| if (bpp == 16 && mode == MODE_RGB) return RENAME(yuv420_rgb16); | |||