Originally committed as revision 5583 to svn://svn.mplayerhq.hu/mplayer/trunk/postproctags/v0.5
| @@ -24,6 +24,9 @@ static const uint64_t mask32b __attribute__((aligned(8))) = 0x000000FF000000FFU | |||||
| static const uint64_t mask32g __attribute__((aligned(8))) = 0x0000FF000000FF00ULL; | static const uint64_t mask32g __attribute__((aligned(8))) = 0x0000FF000000FF00ULL; | ||||
| static const uint64_t mask32r __attribute__((aligned(8))) = 0x00FF000000FF0000ULL; | static const uint64_t mask32r __attribute__((aligned(8))) = 0x00FF000000FF0000ULL; | ||||
| static const uint64_t mask32 __attribute__((aligned(8))) = 0x00FFFFFF00FFFFFFULL; | static const uint64_t mask32 __attribute__((aligned(8))) = 0x00FFFFFF00FFFFFFULL; | ||||
| static const uint64_t mask24b __attribute__((aligned(8))) = 0x00FF0000FF0000FFULL; | |||||
| static const uint64_t mask24g __attribute__((aligned(8))) = 0xFF0000FF0000FF00ULL; | |||||
| static const uint64_t mask24r __attribute__((aligned(8))) = 0x0000FF0000FF0000ULL; | |||||
| static const uint64_t mask24l __attribute__((aligned(8))) = 0x0000000000FFFFFFULL; | static const uint64_t mask24l __attribute__((aligned(8))) = 0x0000000000FFFFFFULL; | ||||
| static const uint64_t mask24h __attribute__((aligned(8))) = 0x0000FFFFFF000000ULL; | static const uint64_t mask24h __attribute__((aligned(8))) = 0x0000FFFFFF000000ULL; | ||||
| static const uint64_t mask24hh __attribute__((aligned(8))) = 0xffff000000000000ULL; | static const uint64_t mask24hh __attribute__((aligned(8))) = 0xffff000000000000ULL; | ||||
| @@ -316,6 +319,23 @@ void rgb32tobgr32(const uint8_t *src, uint8_t *dst, unsigned int src_size) | |||||
| #endif | #endif | ||||
| } | } | ||||
| void rgb24tobgr24(const uint8_t *src, uint8_t *dst, unsigned int src_size) | |||||
| { | |||||
| #ifdef CAN_COMPILE_X86_ASM | |||||
| // ordered per speed fasterst first | |||||
| if(gCpuCaps.hasMMX2) | |||||
| rgb24tobgr24_MMX2(src, dst, src_size); | |||||
| else if(gCpuCaps.has3DNow) | |||||
| rgb24tobgr24_3DNow(src, dst, src_size); | |||||
| else if(gCpuCaps.hasMMX) | |||||
| rgb24tobgr24_MMX(src, dst, src_size); | |||||
| else | |||||
| rgb24tobgr24_C(src, dst, src_size); | |||||
| #else | |||||
| rgb24tobgr24_C(src, dst, src_size); | |||||
| #endif | |||||
| } | |||||
| /** | /** | ||||
| * | * | ||||
| * height should be a multiple of 2 and width should be a multiple of 16 (if this is a | * height should be a multiple of 2 and width should be a multiple of 16 (if this is a | ||||
| @@ -17,6 +17,7 @@ extern void rgb32to15(const uint8_t *src,uint8_t *dst,unsigned src_size); | |||||
| extern void rgb24to16(const uint8_t *src,uint8_t *dst,unsigned src_size); | extern void rgb24to16(const uint8_t *src,uint8_t *dst,unsigned src_size); | ||||
| extern void rgb24to15(const uint8_t *src,uint8_t *dst,unsigned src_size); | extern void rgb24to15(const uint8_t *src,uint8_t *dst,unsigned src_size); | ||||
| extern void rgb32tobgr32(const uint8_t *src, uint8_t *dst, unsigned src_size); | extern void rgb32tobgr32(const uint8_t *src, uint8_t *dst, unsigned src_size); | ||||
| extern void rgb24tobgr24(const uint8_t *src, uint8_t *dst, unsigned src_size); | |||||
| extern void palette8torgb32(const uint8_t *src, uint8_t *dst, unsigned num_pixels, const uint8_t *palette); | extern void palette8torgb32(const uint8_t *src, uint8_t *dst, unsigned num_pixels, const uint8_t *palette); | ||||
| @@ -571,6 +571,73 @@ static inline void RENAME(rgb32tobgr32)(const uint8_t *src, uint8_t *dst, unsign | |||||
| #endif | #endif | ||||
| } | } | ||||
| static inline void RENAME(rgb24tobgr24)(const uint8_t *src, uint8_t *dst, unsigned int src_size) | |||||
| { | |||||
| int i; | |||||
| #ifdef HAVE_MMX | |||||
| int mmx_size= 23 - src_size; | |||||
| asm volatile ( | |||||
| "movq "MANGLE(mask24r)", %%mm5 \n\t" | |||||
| "movq "MANGLE(mask24g)", %%mm6 \n\t" | |||||
| "movq "MANGLE(mask24b)", %%mm7 \n\t" | |||||
| ".balign 16 \n\t" | |||||
| "1: \n\t" | |||||
| PREFETCH" 32(%1, %%eax) \n\t" | |||||
| "movq (%1, %%eax), %%mm0 \n\t" // BGR BGR BG | |||||
| "movq (%1, %%eax), %%mm1 \n\t" // BGR BGR BG | |||||
| "movq 2(%1, %%eax), %%mm2 \n\t" // R BGR BGR B | |||||
| "psllq $16, %%mm0 \n\t" // 00 BGR BGR | |||||
| "pand %%mm5, %%mm0 \n\t" | |||||
| "pand %%mm6, %%mm1 \n\t" | |||||
| "pand %%mm7, %%mm2 \n\t" | |||||
| "por %%mm0, %%mm1 \n\t" | |||||
| "por %%mm2, %%mm1 \n\t" | |||||
| "movq 6(%1, %%eax), %%mm0 \n\t" // BGR BGR BG | |||||
| MOVNTQ" %%mm1, (%2, %%eax) \n\t" // RGB RGB RG | |||||
| "movq 8(%1, %%eax), %%mm1 \n\t" // R BGR BGR B | |||||
| "movq 10(%1, %%eax), %%mm2 \n\t" // GR BGR BGR | |||||
| "pand %%mm7, %%mm0 \n\t" | |||||
| "pand %%mm5, %%mm1 \n\t" | |||||
| "pand %%mm6, %%mm2 \n\t" | |||||
| "por %%mm0, %%mm1 \n\t" | |||||
| "por %%mm2, %%mm1 \n\t" | |||||
| "movq 14(%1, %%eax), %%mm0 \n\t" // R BGR BGR B | |||||
| MOVNTQ" %%mm1, 8(%2, %%eax) \n\t" // B RGB RGB R | |||||
| "movq 16(%1, %%eax), %%mm1 \n\t" // GR BGR BGR | |||||
| "movq 18(%1, %%eax), %%mm2 \n\t" // BGR BGR BG | |||||
| "pand %%mm6, %%mm0 \n\t" | |||||
| "pand %%mm7, %%mm1 \n\t" | |||||
| "pand %%mm5, %%mm2 \n\t" | |||||
| "por %%mm0, %%mm1 \n\t" | |||||
| "por %%mm2, %%mm1 \n\t" | |||||
| MOVNTQ" %%mm1, 16(%2, %%eax) \n\t" | |||||
| "addl $24, %%eax \n\t" | |||||
| " js 1b \n\t" | |||||
| : "+a" (mmx_size) | |||||
| : "r" (src-mmx_size), "r"(dst-mmx_size) | |||||
| ); | |||||
| __asm __volatile(SFENCE:::"memory"); | |||||
| __asm __volatile(EMMS:::"memory"); | |||||
| if(!mmx_size) return; //finihsed, was multiple of 8 | |||||
| src+= src_size; | |||||
| dst+= src_size; | |||||
| src_size= 24-mmx_size; | |||||
| src-= src_size; | |||||
| dst-= src_size; | |||||
| #endif | |||||
| for(i=0; i<src_size; i+=3) | |||||
| { | |||||
| register int x; | |||||
| x = src[i + 2]; | |||||
| dst[i + 1] = src[i + 1]; | |||||
| dst[i + 2] = src[i + 0]; | |||||
| dst[i + 0] = x; | |||||
| } | |||||
| } | |||||
| /** | /** | ||||
| * | * | ||||
| * height should be a multiple of 2 and width should be a multiple of 16 (if this is a | * height should be a multiple of 2 and width should be a multiple of 16 (if this is a | ||||