Originally committed as revision 25910 to svn://svn.mplayerhq.hu/mplayer/trunk/libswscaletags/v0.5
| @@ -122,19 +122,6 @@ DECLARE_ASM_CONST(8, uint64_t, red_15mask) = 0x00007c0000007c00ULL; | |||||
| DECLARE_ASM_CONST(8, uint64_t, green_15mask) = 0x000003e0000003e0ULL; | DECLARE_ASM_CONST(8, uint64_t, green_15mask) = 0x000003e0000003e0ULL; | ||||
| DECLARE_ASM_CONST(8, uint64_t, blue_15mask) = 0x0000001f0000001fULL; | DECLARE_ASM_CONST(8, uint64_t, blue_15mask) = 0x0000001f0000001fULL; | ||||
| #ifdef FAST_BGR2YV12 | |||||
| DECLARE_ASM_CONST(8, uint64_t, bgr2YCoeff) = 0x000000210041000DULL; | |||||
| DECLARE_ASM_CONST(8, uint64_t, bgr2UCoeff) = 0x0000FFEEFFDC0038ULL; | |||||
| DECLARE_ASM_CONST(8, uint64_t, bgr2VCoeff) = 0x00000038FFD2FFF8ULL; | |||||
| #else | |||||
| DECLARE_ASM_CONST(8, uint64_t, bgr2YCoeff) = 0x000020E540830C8BULL; | |||||
| DECLARE_ASM_CONST(8, uint64_t, bgr2UCoeff) = 0x0000ED0FDAC23831ULL; | |||||
| DECLARE_ASM_CONST(8, uint64_t, bgr2VCoeff) = 0x00003831D0E6F6EAULL; | |||||
| #endif | |||||
| DECLARE_ASM_CONST(8, uint64_t, bgr2YOffset) = 0x1010101010101010ULL; | |||||
| DECLARE_ASM_CONST(8, uint64_t, bgr2UVOffset) = 0x8080808080808080ULL; | |||||
| DECLARE_ASM_CONST(8, uint64_t, w1111) = 0x0001000100010001ULL; | |||||
| #if 0 | #if 0 | ||||
| static volatile uint64_t __attribute__((aligned(8))) b5Dither; | static volatile uint64_t __attribute__((aligned(8))) b5Dither; | ||||
| static volatile uint64_t __attribute__((aligned(8))) g5Dither; | static volatile uint64_t __attribute__((aligned(8))) g5Dither; | ||||
| @@ -2147,8 +2147,8 @@ static inline void RENAME(rgb24toyv12)(const uint8_t *src, uint8_t *ydst, uint8_ | |||||
| { | { | ||||
| asm volatile( | asm volatile( | ||||
| "mov %2, %%"REG_a" \n\t" | "mov %2, %%"REG_a" \n\t" | ||||
| "movq "MANGLE(bgr2YCoeff)", %%mm6 \n\t" | |||||
| "movq "MANGLE(w1111)", %%mm5 \n\t" | |||||
| "movq "MANGLE(ff_bgr2YCoeff)", %%mm6 \n\t" | |||||
| "movq "MANGLE(ff_w1111)", %%mm5 \n\t" | |||||
| "pxor %%mm7, %%mm7 \n\t" | "pxor %%mm7, %%mm7 \n\t" | ||||
| "lea (%%"REG_a", %%"REG_a", 2), %%"REG_d" \n\t" | "lea (%%"REG_a", %%"REG_a", 2), %%"REG_d" \n\t" | ||||
| ASMALIGN(4) | ASMALIGN(4) | ||||
| @@ -2206,7 +2206,7 @@ static inline void RENAME(rgb24toyv12)(const uint8_t *src, uint8_t *ydst, uint8_ | |||||
| "psraw $7, %%mm4 \n\t" | "psraw $7, %%mm4 \n\t" | ||||
| "packuswb %%mm4, %%mm0 \n\t" | "packuswb %%mm4, %%mm0 \n\t" | ||||
| "paddusb "MANGLE(bgr2YOffset)", %%mm0 \n\t" | |||||
| "paddusb "MANGLE(ff_bgr2YOffset)", %%mm0 \n\t" | |||||
| MOVNTQ" %%mm0, (%1, %%"REG_a") \n\t" | MOVNTQ" %%mm0, (%1, %%"REG_a") \n\t" | ||||
| "add $8, %%"REG_a" \n\t" | "add $8, %%"REG_a" \n\t" | ||||
| @@ -2220,8 +2220,8 @@ static inline void RENAME(rgb24toyv12)(const uint8_t *src, uint8_t *ydst, uint8_ | |||||
| src -= srcStride*2; | src -= srcStride*2; | ||||
| asm volatile( | asm volatile( | ||||
| "mov %4, %%"REG_a" \n\t" | "mov %4, %%"REG_a" \n\t" | ||||
| "movq "MANGLE(w1111)", %%mm5 \n\t" | |||||
| "movq "MANGLE(bgr2UCoeff)", %%mm6 \n\t" | |||||
| "movq "MANGLE(ff_w1111)", %%mm5 \n\t" | |||||
| "movq "MANGLE(ff_bgr2UCoeff)", %%mm6 \n\t" | |||||
| "pxor %%mm7, %%mm7 \n\t" | "pxor %%mm7, %%mm7 \n\t" | ||||
| "lea (%%"REG_a", %%"REG_a", 2), %%"REG_d" \n\t" | "lea (%%"REG_a", %%"REG_a", 2), %%"REG_d" \n\t" | ||||
| "add %%"REG_d", %%"REG_d" \n\t" | "add %%"REG_d", %%"REG_d" \n\t" | ||||
| @@ -2270,8 +2270,8 @@ static inline void RENAME(rgb24toyv12)(const uint8_t *src, uint8_t *ydst, uint8_ | |||||
| "psrlw $2, %%mm0 \n\t" | "psrlw $2, %%mm0 \n\t" | ||||
| "psrlw $2, %%mm2 \n\t" | "psrlw $2, %%mm2 \n\t" | ||||
| #endif | #endif | ||||
| "movq "MANGLE(bgr2VCoeff)", %%mm1 \n\t" | |||||
| "movq "MANGLE(bgr2VCoeff)", %%mm3 \n\t" | |||||
| "movq "MANGLE(ff_bgr2VCoeff)", %%mm1 \n\t" | |||||
| "movq "MANGLE(ff_bgr2VCoeff)", %%mm3 \n\t" | |||||
| "pmaddwd %%mm0, %%mm1 \n\t" | "pmaddwd %%mm0, %%mm1 \n\t" | ||||
| "pmaddwd %%mm2, %%mm3 \n\t" | "pmaddwd %%mm2, %%mm3 \n\t" | ||||
| @@ -2328,12 +2328,12 @@ static inline void RENAME(rgb24toyv12)(const uint8_t *src, uint8_t *ydst, uint8_ | |||||
| "paddw %%mm1, %%mm5 \n\t" | "paddw %%mm1, %%mm5 \n\t" | ||||
| "paddw %%mm3, %%mm2 \n\t" | "paddw %%mm3, %%mm2 \n\t" | ||||
| "paddw %%mm5, %%mm2 \n\t" | "paddw %%mm5, %%mm2 \n\t" | ||||
| "movq "MANGLE(w1111)", %%mm5 \n\t" | |||||
| "movq "MANGLE(ff_w1111)", %%mm5 \n\t" | |||||
| "psrlw $2, %%mm4 \n\t" | "psrlw $2, %%mm4 \n\t" | ||||
| "psrlw $2, %%mm2 \n\t" | "psrlw $2, %%mm2 \n\t" | ||||
| #endif | #endif | ||||
| "movq "MANGLE(bgr2VCoeff)", %%mm1 \n\t" | |||||
| "movq "MANGLE(bgr2VCoeff)", %%mm3 \n\t" | |||||
| "movq "MANGLE(ff_bgr2VCoeff)", %%mm1 \n\t" | |||||
| "movq "MANGLE(ff_bgr2VCoeff)", %%mm3 \n\t" | |||||
| "pmaddwd %%mm4, %%mm1 \n\t" | "pmaddwd %%mm4, %%mm1 \n\t" | ||||
| "pmaddwd %%mm2, %%mm3 \n\t" | "pmaddwd %%mm2, %%mm3 \n\t" | ||||
| @@ -2357,7 +2357,7 @@ static inline void RENAME(rgb24toyv12)(const uint8_t *src, uint8_t *ydst, uint8_ | |||||
| "punpckldq %%mm4, %%mm0 \n\t" | "punpckldq %%mm4, %%mm0 \n\t" | ||||
| "punpckhdq %%mm4, %%mm1 \n\t" | "punpckhdq %%mm4, %%mm1 \n\t" | ||||
| "packsswb %%mm1, %%mm0 \n\t" | "packsswb %%mm1, %%mm0 \n\t" | ||||
| "paddb "MANGLE(bgr2UVOffset)", %%mm0 \n\t" | |||||
| "paddb "MANGLE(ff_bgr2UVOffset)", %%mm0 \n\t" | |||||
| "movd %%mm0, (%2, %%"REG_a") \n\t" | "movd %%mm0, (%2, %%"REG_a") \n\t" | ||||
| "punpckhdq %%mm0, %%mm0 \n\t" | "punpckhdq %%mm0, %%mm0 \n\t" | ||||
| "movd %%mm0, (%3, %%"REG_a") \n\t" | "movd %%mm0, (%3, %%"REG_a") \n\t" | ||||
| @@ -209,22 +209,22 @@ DECLARE_ASM_CONST(8, uint64_t, b15Mask)= 0x001F001F001F001FLL; | |||||
| DECLARE_ASM_CONST(8, uint64_t, g15Mask)= 0x03E003E003E003E0LL; | DECLARE_ASM_CONST(8, uint64_t, g15Mask)= 0x03E003E003E003E0LL; | ||||
| DECLARE_ASM_CONST(8, uint64_t, r15Mask)= 0x7C007C007C007C00LL; | DECLARE_ASM_CONST(8, uint64_t, r15Mask)= 0x7C007C007C007C00LL; | ||||
| DECLARE_ASM_CONST(8, uint64_t, M24A)= 0x00FF0000FF0000FFLL; | |||||
| DECLARE_ASM_CONST(8, uint64_t, M24B)= 0xFF0000FF0000FF00LL; | |||||
| DECLARE_ASM_CONST(8, uint64_t, M24C)= 0x0000FF0000FF0000LL; | |||||
| DECLARE_ALIGNED(8, const uint64_t, ff_M24A) = 0x00FF0000FF0000FFLL; | |||||
| DECLARE_ALIGNED(8, const uint64_t, ff_M24B) = 0xFF0000FF0000FF00LL; | |||||
| DECLARE_ALIGNED(8, const uint64_t, ff_M24C) = 0x0000FF0000FF0000LL; | |||||
| #ifdef FAST_BGR2YV12 | #ifdef FAST_BGR2YV12 | ||||
| DECLARE_ASM_CONST(8, uint64_t, bgr2YCoeff) = 0x000000210041000DULL; | |||||
| DECLARE_ASM_CONST(8, uint64_t, bgr2UCoeff) = 0x0000FFEEFFDC0038ULL; | |||||
| DECLARE_ASM_CONST(8, uint64_t, bgr2VCoeff) = 0x00000038FFD2FFF8ULL; | |||||
| DECLARE_ALIGNED(8, const uint64_t, ff_bgr2YCoeff) = 0x000000210041000DULL; | |||||
| DECLARE_ALIGNED(8, const uint64_t, ff_bgr2UCoeff) = 0x0000FFEEFFDC0038ULL; | |||||
| DECLARE_ALIGNED(8, const uint64_t, ff_bgr2VCoeff) = 0x00000038FFD2FFF8ULL; | |||||
| #else | #else | ||||
| DECLARE_ASM_CONST(8, uint64_t, bgr2YCoeff) = 0x000020E540830C8BULL; | |||||
| DECLARE_ASM_CONST(8, uint64_t, bgr2UCoeff) = 0x0000ED0FDAC23831ULL; | |||||
| DECLARE_ASM_CONST(8, uint64_t, bgr2VCoeff) = 0x00003831D0E6F6EAULL; | |||||
| DECLARE_ALIGNED(8, const uint64_t, ff_bgr2YCoeff) = 0x000020E540830C8BULL; | |||||
| DECLARE_ALIGNED(8, const uint64_t, ff_bgr2UCoeff) = 0x0000ED0FDAC23831ULL; | |||||
| DECLARE_ALIGNED(8, const uint64_t, ff_bgr2VCoeff) = 0x00003831D0E6F6EAULL; | |||||
| #endif /* FAST_BGR2YV12 */ | #endif /* FAST_BGR2YV12 */ | ||||
| DECLARE_ASM_CONST(8, uint64_t, bgr2YOffset) = 0x1010101010101010ULL; | |||||
| DECLARE_ASM_CONST(8, uint64_t, bgr2UVOffset) = 0x8080808080808080ULL; | |||||
| DECLARE_ASM_CONST(8, uint64_t, w1111) = 0x0001000100010001ULL; | |||||
| DECLARE_ALIGNED(8, const uint64_t, ff_bgr2YOffset) = 0x1010101010101010ULL; | |||||
| DECLARE_ALIGNED(8, const uint64_t, ff_bgr2UVOffset) = 0x8080808080808080ULL; | |||||
| DECLARE_ALIGNED(8, const uint64_t, ff_w1111) = 0x0001000100010001ULL; | |||||
| #endif /* defined(ARCH_X86) */ | #endif /* defined(ARCH_X86) */ | ||||
| // clipping helper table for C implementations: | // clipping helper table for C implementations: | ||||
| @@ -367,7 +367,7 @@ void in_asm_used_var_warning_killer() | |||||
| { | { | ||||
| volatile int i= bF8+bFC+w10+ | volatile int i= bF8+bFC+w10+ | ||||
| bm00001111+bm00000111+bm11111000+b16Mask+g16Mask+r16Mask+b15Mask+g15Mask+r15Mask+ | bm00001111+bm00000111+bm11111000+b16Mask+g16Mask+r16Mask+b15Mask+g15Mask+r15Mask+ | ||||
| M24A+M24B+M24C+w02 + b5Dither+g5Dither+r5Dither+g6Dither+dither4[0]+dither8[0]+bm01010101; | |||||
| ff_M24A+ff_M24B+ff_M24C+w02 + b5Dither+g5Dither+r5Dither+g6Dither+dither4[0]+dither8[0]+bm01010101; | |||||
| if (i) i=0; | if (i) i=0; | ||||
| } | } | ||||
| #endif | #endif | ||||
| @@ -855,8 +855,8 @@ | |||||
| #define WRITEBGR24MMX2(dst, dstw, index) \ | #define WRITEBGR24MMX2(dst, dstw, index) \ | ||||
| /* mm2=B, %%mm4=G, %%mm5=R, %%mm7=0 */\ | /* mm2=B, %%mm4=G, %%mm5=R, %%mm7=0 */\ | ||||
| "movq "MANGLE(M24A)", %%mm0 \n\t"\ | |||||
| "movq "MANGLE(M24C)", %%mm7 \n\t"\ | |||||
| "movq "MANGLE(ff_M24A)", %%mm0 \n\t"\ | |||||
| "movq "MANGLE(ff_M24C)", %%mm7 \n\t"\ | |||||
| "pshufw $0x50, %%mm2, %%mm1 \n\t" /* B3 B2 B3 B2 B1 B0 B1 B0 */\ | "pshufw $0x50, %%mm2, %%mm1 \n\t" /* B3 B2 B3 B2 B1 B0 B1 B0 */\ | ||||
| "pshufw $0x50, %%mm4, %%mm3 \n\t" /* G3 G2 G3 G2 G1 G0 G1 G0 */\ | "pshufw $0x50, %%mm4, %%mm3 \n\t" /* G3 G2 G3 G2 G1 G0 G1 G0 */\ | ||||
| "pshufw $0x00, %%mm5, %%mm6 \n\t" /* R1 R0 R1 R0 R1 R0 R1 R0 */\ | "pshufw $0x00, %%mm5, %%mm6 \n\t" /* R1 R0 R1 R0 R1 R0 R1 R0 */\ | ||||
| @@ -875,7 +875,7 @@ | |||||
| "pshufw $0x55, %%mm4, %%mm3 \n\t" /* G4 G3 G4 G3 G4 G3 G4 G3 */\ | "pshufw $0x55, %%mm4, %%mm3 \n\t" /* G4 G3 G4 G3 G4 G3 G4 G3 */\ | ||||
| "pshufw $0xA5, %%mm5, %%mm6 \n\t" /* R5 R4 R5 R4 R3 R2 R3 R2 */\ | "pshufw $0xA5, %%mm5, %%mm6 \n\t" /* R5 R4 R5 R4 R3 R2 R3 R2 */\ | ||||
| \ | \ | ||||
| "pand "MANGLE(M24B)", %%mm1 \n\t" /* B5 B4 B3 */\ | |||||
| "pand "MANGLE(ff_M24B)", %%mm1 \n\t" /* B5 B4 B3 */\ | |||||
| "pand %%mm7, %%mm3 \n\t" /* G4 G3 */\ | "pand %%mm7, %%mm3 \n\t" /* G4 G3 */\ | ||||
| "pand %%mm0, %%mm6 \n\t" /* R4 R3 R2 */\ | "pand %%mm0, %%mm6 \n\t" /* R4 R3 R2 */\ | ||||
| \ | \ | ||||
| @@ -889,7 +889,7 @@ | |||||
| \ | \ | ||||
| "pand %%mm7, %%mm1 \n\t" /* B7 B6 */\ | "pand %%mm7, %%mm1 \n\t" /* B7 B6 */\ | ||||
| "pand %%mm0, %%mm3 \n\t" /* G7 G6 G5 */\ | "pand %%mm0, %%mm3 \n\t" /* G7 G6 G5 */\ | ||||
| "pand "MANGLE(M24B)", %%mm6 \n\t" /* R7 R6 R5 */\ | |||||
| "pand "MANGLE(ff_M24B)", %%mm6 \n\t" /* R7 R6 R5 */\ | |||||
| \ | \ | ||||
| "por %%mm1, %%mm3 \n\t"\ | "por %%mm1, %%mm3 \n\t"\ | ||||
| "por %%mm3, %%mm6 \n\t"\ | "por %%mm3, %%mm6 \n\t"\ | ||||
| @@ -1859,8 +1859,8 @@ static inline void RENAME(bgr24ToY)(uint8_t *dst, uint8_t *src, long width) | |||||
| #ifdef HAVE_MMX | #ifdef HAVE_MMX | ||||
| asm volatile( | asm volatile( | ||||
| "mov %2, %%"REG_a" \n\t" | "mov %2, %%"REG_a" \n\t" | ||||
| "movq "MANGLE(bgr2YCoeff)", %%mm6 \n\t" | |||||
| "movq "MANGLE(w1111)", %%mm5 \n\t" | |||||
| "movq "MANGLE(ff_bgr2YCoeff)", %%mm6 \n\t" | |||||
| "movq "MANGLE(ff_w1111)", %%mm5 \n\t" | |||||
| "pxor %%mm7, %%mm7 \n\t" | "pxor %%mm7, %%mm7 \n\t" | ||||
| "lea (%%"REG_a", %%"REG_a", 2), %%"REG_d" \n\t" | "lea (%%"REG_a", %%"REG_a", 2), %%"REG_d" \n\t" | ||||
| ASMALIGN(4) | ASMALIGN(4) | ||||
| @@ -1918,7 +1918,7 @@ static inline void RENAME(bgr24ToY)(uint8_t *dst, uint8_t *src, long width) | |||||
| "psraw $7, %%mm4 \n\t" | "psraw $7, %%mm4 \n\t" | ||||
| "packuswb %%mm4, %%mm0 \n\t" | "packuswb %%mm4, %%mm0 \n\t" | ||||
| "paddusb "MANGLE(bgr2YOffset)", %%mm0 \n\t" | |||||
| "paddusb "MANGLE(ff_bgr2YOffset)", %%mm0 \n\t" | |||||
| "movq %%mm0, (%1, %%"REG_a") \n\t" | "movq %%mm0, (%1, %%"REG_a") \n\t" | ||||
| "add $8, %%"REG_a" \n\t" | "add $8, %%"REG_a" \n\t" | ||||
| @@ -1944,8 +1944,8 @@ static inline void RENAME(bgr24ToUV)(uint8_t *dstU, uint8_t *dstV, uint8_t *src1 | |||||
| #ifdef HAVE_MMX | #ifdef HAVE_MMX | ||||
| asm volatile( | asm volatile( | ||||
| "mov %3, %%"REG_a" \n\t" | "mov %3, %%"REG_a" \n\t" | ||||
| "movq "MANGLE(w1111)", %%mm5 \n\t" | |||||
| "movq "MANGLE(bgr2UCoeff)", %%mm6 \n\t" | |||||
| "movq "MANGLE(ff_w1111)", %%mm5 \n\t" | |||||
| "movq "MANGLE(ff_bgr2UCoeff)", %%mm6 \n\t" | |||||
| "pxor %%mm7, %%mm7 \n\t" | "pxor %%mm7, %%mm7 \n\t" | ||||
| "lea (%%"REG_a", %%"REG_a", 2), %%"REG_d" \n\t" | "lea (%%"REG_a", %%"REG_a", 2), %%"REG_d" \n\t" | ||||
| "add %%"REG_d", %%"REG_d" \n\t" | "add %%"REG_d", %%"REG_d" \n\t" | ||||
| @@ -1977,8 +1977,8 @@ static inline void RENAME(bgr24ToUV)(uint8_t *dstU, uint8_t *dstV, uint8_t *src1 | |||||
| "psrlw $1, %%mm0 \n\t" | "psrlw $1, %%mm0 \n\t" | ||||
| "psrlw $1, %%mm2 \n\t" | "psrlw $1, %%mm2 \n\t" | ||||
| #endif | #endif | ||||
| "movq "MANGLE(bgr2VCoeff)", %%mm1 \n\t" | |||||
| "movq "MANGLE(bgr2VCoeff)", %%mm3 \n\t" | |||||
| "movq "MANGLE(ff_bgr2VCoeff)", %%mm1 \n\t" | |||||
| "movq "MANGLE(ff_bgr2VCoeff)", %%mm3 \n\t" | |||||
| "pmaddwd %%mm0, %%mm1 \n\t" | "pmaddwd %%mm0, %%mm1 \n\t" | ||||
| "pmaddwd %%mm2, %%mm3 \n\t" | "pmaddwd %%mm2, %%mm3 \n\t" | ||||
| @@ -2019,12 +2019,12 @@ static inline void RENAME(bgr24ToUV)(uint8_t *dstU, uint8_t *dstV, uint8_t *src1 | |||||
| "punpcklbw %%mm7, %%mm5 \n\t" | "punpcklbw %%mm7, %%mm5 \n\t" | ||||
| "punpcklbw %%mm7, %%mm2 \n\t" | "punpcklbw %%mm7, %%mm2 \n\t" | ||||
| "paddw %%mm5, %%mm2 \n\t" | "paddw %%mm5, %%mm2 \n\t" | ||||
| "movq "MANGLE(w1111)", %%mm5 \n\t" | |||||
| "movq "MANGLE(ff_w1111)", %%mm5 \n\t" | |||||
| "psrlw $2, %%mm4 \n\t" | "psrlw $2, %%mm4 \n\t" | ||||
| "psrlw $2, %%mm2 \n\t" | "psrlw $2, %%mm2 \n\t" | ||||
| #endif | #endif | ||||
| "movq "MANGLE(bgr2VCoeff)", %%mm1 \n\t" | |||||
| "movq "MANGLE(bgr2VCoeff)", %%mm3 \n\t" | |||||
| "movq "MANGLE(ff_bgr2VCoeff)", %%mm1 \n\t" | |||||
| "movq "MANGLE(ff_bgr2VCoeff)", %%mm3 \n\t" | |||||
| "pmaddwd %%mm4, %%mm1 \n\t" | "pmaddwd %%mm4, %%mm1 \n\t" | ||||
| "pmaddwd %%mm2, %%mm3 \n\t" | "pmaddwd %%mm2, %%mm3 \n\t" | ||||
| @@ -2048,7 +2048,7 @@ static inline void RENAME(bgr24ToUV)(uint8_t *dstU, uint8_t *dstV, uint8_t *src1 | |||||
| "punpckldq %%mm4, %%mm0 \n\t" | "punpckldq %%mm4, %%mm0 \n\t" | ||||
| "punpckhdq %%mm4, %%mm1 \n\t" | "punpckhdq %%mm4, %%mm1 \n\t" | ||||
| "packsswb %%mm1, %%mm0 \n\t" | "packsswb %%mm1, %%mm0 \n\t" | ||||
| "paddb "MANGLE(bgr2UVOffset)", %%mm0 \n\t" | |||||
| "paddb "MANGLE(ff_bgr2UVOffset)", %%mm0 \n\t" | |||||
| "movd %%mm0, (%1, %%"REG_a") \n\t" | "movd %%mm0, (%1, %%"REG_a") \n\t" | ||||
| "punpckhdq %%mm0, %%mm0 \n\t" | "punpckhdq %%mm0, %%mm0 \n\t" | ||||
| @@ -163,10 +163,6 @@ DECLARE_ASM_CONST(8, uint64_t, mmx_00ffw) = 0x00ff00ff00ff00ffULL; | |||||
| DECLARE_ASM_CONST(8, uint64_t, mmx_redmask) = 0xf8f8f8f8f8f8f8f8ULL; | DECLARE_ASM_CONST(8, uint64_t, mmx_redmask) = 0xf8f8f8f8f8f8f8f8ULL; | ||||
| DECLARE_ASM_CONST(8, uint64_t, mmx_grnmask) = 0xfcfcfcfcfcfcfcfcULL; | DECLARE_ASM_CONST(8, uint64_t, mmx_grnmask) = 0xfcfcfcfcfcfcfcfcULL; | ||||
| DECLARE_ASM_CONST(8, uint64_t, M24A)= 0x00FF0000FF0000FFULL; | |||||
| DECLARE_ASM_CONST(8, uint64_t, M24B)= 0xFF0000FF0000FF00ULL; | |||||
| DECLARE_ASM_CONST(8, uint64_t, M24C)= 0x0000FF0000FF0000ULL; | |||||
| // the volatile is required because gcc otherwise optimizes some writes away not knowing that these | // the volatile is required because gcc otherwise optimizes some writes away not knowing that these | ||||
| // are read in the asm block | // are read in the asm block | ||||
| static volatile uint64_t attribute_used __attribute__((aligned(8))) b5Dither; | static volatile uint64_t attribute_used __attribute__((aligned(8))) b5Dither; | ||||
| @@ -338,8 +338,8 @@ static inline int RENAME(yuv420_rgb24)(SwsContext *c, uint8_t* src[], int srcStr | |||||
| YUV2RGB | YUV2RGB | ||||
| /* mm0=B, %%mm2=G, %%mm1=R */ | /* mm0=B, %%mm2=G, %%mm1=R */ | ||||
| #ifdef HAVE_MMX2 | #ifdef HAVE_MMX2 | ||||
| "movq "MANGLE(M24A)", %%mm4 \n\t" | |||||
| "movq "MANGLE(M24C)", %%mm7 \n\t" | |||||
| "movq "MANGLE(ff_M24A)", %%mm4 \n\t" | |||||
| "movq "MANGLE(ff_M24C)", %%mm7 \n\t" | |||||
| "pshufw $0x50, %%mm0, %%mm5 \n\t" /* B3 B2 B3 B2 B1 B0 B1 B0 */ | "pshufw $0x50, %%mm0, %%mm5 \n\t" /* B3 B2 B3 B2 B1 B0 B1 B0 */ | ||||
| "pshufw $0x50, %%mm2, %%mm3 \n\t" /* G3 G2 G3 G2 G1 G0 G1 G0 */ | "pshufw $0x50, %%mm2, %%mm3 \n\t" /* G3 G2 G3 G2 G1 G0 G1 G0 */ | ||||
| "pshufw $0x00, %%mm1, %%mm6 \n\t" /* R1 R0 R1 R0 R1 R0 R1 R0 */ | "pshufw $0x00, %%mm1, %%mm6 \n\t" /* R1 R0 R1 R0 R1 R0 R1 R0 */ | ||||
| @@ -358,7 +358,7 @@ YUV2RGB | |||||
| "pshufw $0x55, %%mm2, %%mm3 \n\t" /* G4 G3 G4 G3 G4 G3 G4 G3 */ | "pshufw $0x55, %%mm2, %%mm3 \n\t" /* G4 G3 G4 G3 G4 G3 G4 G3 */ | ||||
| "pshufw $0xA5, %%mm1, %%mm6 \n\t" /* R5 R4 R5 R4 R3 R2 R3 R2 */ | "pshufw $0xA5, %%mm1, %%mm6 \n\t" /* R5 R4 R5 R4 R3 R2 R3 R2 */ | ||||
| "pand "MANGLE(M24B)", %%mm5 \n\t" /* B5 B4 B3 */ | |||||
| "pand "MANGLE(ff_M24B)", %%mm5 \n\t" /* B5 B4 B3 */ | |||||
| "pand %%mm7, %%mm3 \n\t" /* G4 G3 */ | "pand %%mm7, %%mm3 \n\t" /* G4 G3 */ | ||||
| "pand %%mm4, %%mm6 \n\t" /* R4 R3 R2 */ | "pand %%mm4, %%mm6 \n\t" /* R4 R3 R2 */ | ||||
| @@ -373,7 +373,7 @@ YUV2RGB | |||||
| "pand %%mm7, %%mm5 \n\t" /* B7 B6 */ | "pand %%mm7, %%mm5 \n\t" /* B7 B6 */ | ||||
| "pand %%mm4, %%mm3 \n\t" /* G7 G6 G5 */ | "pand %%mm4, %%mm3 \n\t" /* G7 G6 G5 */ | ||||
| "pand "MANGLE(M24B)", %%mm6 \n\t" /* R7 R6 R5 */ | |||||
| "pand "MANGLE(ff_M24B)", %%mm6 \n\t" /* R7 R6 R5 */ | |||||
| "movd 4 (%3, %0), %%mm1;" /* Load 4 Cr 00 00 00 00 v3 v2 v1 v0 */ | "movd 4 (%3, %0), %%mm1;" /* Load 4 Cr 00 00 00 00 v3 v2 v1 v0 */ | ||||
| \ | \ | ||||
| "por %%mm5, %%mm3 \n\t" | "por %%mm5, %%mm3 \n\t" | ||||