Originally committed as revision 25910 to svn://svn.mplayerhq.hu/mplayer/trunk/libswscaletags/v0.5
| @@ -122,19 +122,6 @@ DECLARE_ASM_CONST(8, uint64_t, red_15mask) = 0x00007c0000007c00ULL; | |||
| DECLARE_ASM_CONST(8, uint64_t, green_15mask) = 0x000003e0000003e0ULL; | |||
| DECLARE_ASM_CONST(8, uint64_t, blue_15mask) = 0x0000001f0000001fULL; | |||
| #ifdef FAST_BGR2YV12 | |||
| DECLARE_ASM_CONST(8, uint64_t, bgr2YCoeff) = 0x000000210041000DULL; | |||
| DECLARE_ASM_CONST(8, uint64_t, bgr2UCoeff) = 0x0000FFEEFFDC0038ULL; | |||
| DECLARE_ASM_CONST(8, uint64_t, bgr2VCoeff) = 0x00000038FFD2FFF8ULL; | |||
| #else | |||
| DECLARE_ASM_CONST(8, uint64_t, bgr2YCoeff) = 0x000020E540830C8BULL; | |||
| DECLARE_ASM_CONST(8, uint64_t, bgr2UCoeff) = 0x0000ED0FDAC23831ULL; | |||
| DECLARE_ASM_CONST(8, uint64_t, bgr2VCoeff) = 0x00003831D0E6F6EAULL; | |||
| #endif | |||
| DECLARE_ASM_CONST(8, uint64_t, bgr2YOffset) = 0x1010101010101010ULL; | |||
| DECLARE_ASM_CONST(8, uint64_t, bgr2UVOffset) = 0x8080808080808080ULL; | |||
| DECLARE_ASM_CONST(8, uint64_t, w1111) = 0x0001000100010001ULL; | |||
| #if 0 | |||
| static volatile uint64_t __attribute__((aligned(8))) b5Dither; | |||
| static volatile uint64_t __attribute__((aligned(8))) g5Dither; | |||
| @@ -2147,8 +2147,8 @@ static inline void RENAME(rgb24toyv12)(const uint8_t *src, uint8_t *ydst, uint8_ | |||
| { | |||
| asm volatile( | |||
| "mov %2, %%"REG_a" \n\t" | |||
| "movq "MANGLE(bgr2YCoeff)", %%mm6 \n\t" | |||
| "movq "MANGLE(w1111)", %%mm5 \n\t" | |||
| "movq "MANGLE(ff_bgr2YCoeff)", %%mm6 \n\t" | |||
| "movq "MANGLE(ff_w1111)", %%mm5 \n\t" | |||
| "pxor %%mm7, %%mm7 \n\t" | |||
| "lea (%%"REG_a", %%"REG_a", 2), %%"REG_d" \n\t" | |||
| ASMALIGN(4) | |||
| @@ -2206,7 +2206,7 @@ static inline void RENAME(rgb24toyv12)(const uint8_t *src, uint8_t *ydst, uint8_ | |||
| "psraw $7, %%mm4 \n\t" | |||
| "packuswb %%mm4, %%mm0 \n\t" | |||
| "paddusb "MANGLE(bgr2YOffset)", %%mm0 \n\t" | |||
| "paddusb "MANGLE(ff_bgr2YOffset)", %%mm0 \n\t" | |||
| MOVNTQ" %%mm0, (%1, %%"REG_a") \n\t" | |||
| "add $8, %%"REG_a" \n\t" | |||
| @@ -2220,8 +2220,8 @@ static inline void RENAME(rgb24toyv12)(const uint8_t *src, uint8_t *ydst, uint8_ | |||
| src -= srcStride*2; | |||
| asm volatile( | |||
| "mov %4, %%"REG_a" \n\t" | |||
| "movq "MANGLE(w1111)", %%mm5 \n\t" | |||
| "movq "MANGLE(bgr2UCoeff)", %%mm6 \n\t" | |||
| "movq "MANGLE(ff_w1111)", %%mm5 \n\t" | |||
| "movq "MANGLE(ff_bgr2UCoeff)", %%mm6 \n\t" | |||
| "pxor %%mm7, %%mm7 \n\t" | |||
| "lea (%%"REG_a", %%"REG_a", 2), %%"REG_d" \n\t" | |||
| "add %%"REG_d", %%"REG_d" \n\t" | |||
| @@ -2270,8 +2270,8 @@ static inline void RENAME(rgb24toyv12)(const uint8_t *src, uint8_t *ydst, uint8_ | |||
| "psrlw $2, %%mm0 \n\t" | |||
| "psrlw $2, %%mm2 \n\t" | |||
| #endif | |||
| "movq "MANGLE(bgr2VCoeff)", %%mm1 \n\t" | |||
| "movq "MANGLE(bgr2VCoeff)", %%mm3 \n\t" | |||
| "movq "MANGLE(ff_bgr2VCoeff)", %%mm1 \n\t" | |||
| "movq "MANGLE(ff_bgr2VCoeff)", %%mm3 \n\t" | |||
| "pmaddwd %%mm0, %%mm1 \n\t" | |||
| "pmaddwd %%mm2, %%mm3 \n\t" | |||
| @@ -2328,12 +2328,12 @@ static inline void RENAME(rgb24toyv12)(const uint8_t *src, uint8_t *ydst, uint8_ | |||
| "paddw %%mm1, %%mm5 \n\t" | |||
| "paddw %%mm3, %%mm2 \n\t" | |||
| "paddw %%mm5, %%mm2 \n\t" | |||
| "movq "MANGLE(w1111)", %%mm5 \n\t" | |||
| "movq "MANGLE(ff_w1111)", %%mm5 \n\t" | |||
| "psrlw $2, %%mm4 \n\t" | |||
| "psrlw $2, %%mm2 \n\t" | |||
| #endif | |||
| "movq "MANGLE(bgr2VCoeff)", %%mm1 \n\t" | |||
| "movq "MANGLE(bgr2VCoeff)", %%mm3 \n\t" | |||
| "movq "MANGLE(ff_bgr2VCoeff)", %%mm1 \n\t" | |||
| "movq "MANGLE(ff_bgr2VCoeff)", %%mm3 \n\t" | |||
| "pmaddwd %%mm4, %%mm1 \n\t" | |||
| "pmaddwd %%mm2, %%mm3 \n\t" | |||
| @@ -2357,7 +2357,7 @@ static inline void RENAME(rgb24toyv12)(const uint8_t *src, uint8_t *ydst, uint8_ | |||
| "punpckldq %%mm4, %%mm0 \n\t" | |||
| "punpckhdq %%mm4, %%mm1 \n\t" | |||
| "packsswb %%mm1, %%mm0 \n\t" | |||
| "paddb "MANGLE(bgr2UVOffset)", %%mm0 \n\t" | |||
| "paddb "MANGLE(ff_bgr2UVOffset)", %%mm0 \n\t" | |||
| "movd %%mm0, (%2, %%"REG_a") \n\t" | |||
| "punpckhdq %%mm0, %%mm0 \n\t" | |||
| "movd %%mm0, (%3, %%"REG_a") \n\t" | |||
| @@ -209,22 +209,22 @@ DECLARE_ASM_CONST(8, uint64_t, b15Mask)= 0x001F001F001F001FLL; | |||
| DECLARE_ASM_CONST(8, uint64_t, g15Mask)= 0x03E003E003E003E0LL; | |||
| DECLARE_ASM_CONST(8, uint64_t, r15Mask)= 0x7C007C007C007C00LL; | |||
| DECLARE_ASM_CONST(8, uint64_t, M24A)= 0x00FF0000FF0000FFLL; | |||
| DECLARE_ASM_CONST(8, uint64_t, M24B)= 0xFF0000FF0000FF00LL; | |||
| DECLARE_ASM_CONST(8, uint64_t, M24C)= 0x0000FF0000FF0000LL; | |||
| DECLARE_ALIGNED(8, const uint64_t, ff_M24A) = 0x00FF0000FF0000FFLL; | |||
| DECLARE_ALIGNED(8, const uint64_t, ff_M24B) = 0xFF0000FF0000FF00LL; | |||
| DECLARE_ALIGNED(8, const uint64_t, ff_M24C) = 0x0000FF0000FF0000LL; | |||
| #ifdef FAST_BGR2YV12 | |||
| DECLARE_ASM_CONST(8, uint64_t, bgr2YCoeff) = 0x000000210041000DULL; | |||
| DECLARE_ASM_CONST(8, uint64_t, bgr2UCoeff) = 0x0000FFEEFFDC0038ULL; | |||
| DECLARE_ASM_CONST(8, uint64_t, bgr2VCoeff) = 0x00000038FFD2FFF8ULL; | |||
| DECLARE_ALIGNED(8, const uint64_t, ff_bgr2YCoeff) = 0x000000210041000DULL; | |||
| DECLARE_ALIGNED(8, const uint64_t, ff_bgr2UCoeff) = 0x0000FFEEFFDC0038ULL; | |||
| DECLARE_ALIGNED(8, const uint64_t, ff_bgr2VCoeff) = 0x00000038FFD2FFF8ULL; | |||
| #else | |||
| DECLARE_ASM_CONST(8, uint64_t, bgr2YCoeff) = 0x000020E540830C8BULL; | |||
| DECLARE_ASM_CONST(8, uint64_t, bgr2UCoeff) = 0x0000ED0FDAC23831ULL; | |||
| DECLARE_ASM_CONST(8, uint64_t, bgr2VCoeff) = 0x00003831D0E6F6EAULL; | |||
| DECLARE_ALIGNED(8, const uint64_t, ff_bgr2YCoeff) = 0x000020E540830C8BULL; | |||
| DECLARE_ALIGNED(8, const uint64_t, ff_bgr2UCoeff) = 0x0000ED0FDAC23831ULL; | |||
| DECLARE_ALIGNED(8, const uint64_t, ff_bgr2VCoeff) = 0x00003831D0E6F6EAULL; | |||
| #endif /* FAST_BGR2YV12 */ | |||
| DECLARE_ASM_CONST(8, uint64_t, bgr2YOffset) = 0x1010101010101010ULL; | |||
| DECLARE_ASM_CONST(8, uint64_t, bgr2UVOffset) = 0x8080808080808080ULL; | |||
| DECLARE_ASM_CONST(8, uint64_t, w1111) = 0x0001000100010001ULL; | |||
| DECLARE_ALIGNED(8, const uint64_t, ff_bgr2YOffset) = 0x1010101010101010ULL; | |||
| DECLARE_ALIGNED(8, const uint64_t, ff_bgr2UVOffset) = 0x8080808080808080ULL; | |||
| DECLARE_ALIGNED(8, const uint64_t, ff_w1111) = 0x0001000100010001ULL; | |||
| #endif /* defined(ARCH_X86) */ | |||
| // clipping helper table for C implementations: | |||
| @@ -367,7 +367,7 @@ void in_asm_used_var_warning_killer() | |||
| { | |||
| volatile int i= bF8+bFC+w10+ | |||
| bm00001111+bm00000111+bm11111000+b16Mask+g16Mask+r16Mask+b15Mask+g15Mask+r15Mask+ | |||
| M24A+M24B+M24C+w02 + b5Dither+g5Dither+r5Dither+g6Dither+dither4[0]+dither8[0]+bm01010101; | |||
| ff_M24A+ff_M24B+ff_M24C+w02 + b5Dither+g5Dither+r5Dither+g6Dither+dither4[0]+dither8[0]+bm01010101; | |||
| if (i) i=0; | |||
| } | |||
| #endif | |||
| @@ -855,8 +855,8 @@ | |||
| #define WRITEBGR24MMX2(dst, dstw, index) \ | |||
| /* mm2=B, %%mm4=G, %%mm5=R, %%mm7=0 */\ | |||
| "movq "MANGLE(M24A)", %%mm0 \n\t"\ | |||
| "movq "MANGLE(M24C)", %%mm7 \n\t"\ | |||
| "movq "MANGLE(ff_M24A)", %%mm0 \n\t"\ | |||
| "movq "MANGLE(ff_M24C)", %%mm7 \n\t"\ | |||
| "pshufw $0x50, %%mm2, %%mm1 \n\t" /* B3 B2 B3 B2 B1 B0 B1 B0 */\ | |||
| "pshufw $0x50, %%mm4, %%mm3 \n\t" /* G3 G2 G3 G2 G1 G0 G1 G0 */\ | |||
| "pshufw $0x00, %%mm5, %%mm6 \n\t" /* R1 R0 R1 R0 R1 R0 R1 R0 */\ | |||
| @@ -875,7 +875,7 @@ | |||
| "pshufw $0x55, %%mm4, %%mm3 \n\t" /* G4 G3 G4 G3 G4 G3 G4 G3 */\ | |||
| "pshufw $0xA5, %%mm5, %%mm6 \n\t" /* R5 R4 R5 R4 R3 R2 R3 R2 */\ | |||
| \ | |||
| "pand "MANGLE(M24B)", %%mm1 \n\t" /* B5 B4 B3 */\ | |||
| "pand "MANGLE(ff_M24B)", %%mm1 \n\t" /* B5 B4 B3 */\ | |||
| "pand %%mm7, %%mm3 \n\t" /* G4 G3 */\ | |||
| "pand %%mm0, %%mm6 \n\t" /* R4 R3 R2 */\ | |||
| \ | |||
| @@ -889,7 +889,7 @@ | |||
| \ | |||
| "pand %%mm7, %%mm1 \n\t" /* B7 B6 */\ | |||
| "pand %%mm0, %%mm3 \n\t" /* G7 G6 G5 */\ | |||
| "pand "MANGLE(M24B)", %%mm6 \n\t" /* R7 R6 R5 */\ | |||
| "pand "MANGLE(ff_M24B)", %%mm6 \n\t" /* R7 R6 R5 */\ | |||
| \ | |||
| "por %%mm1, %%mm3 \n\t"\ | |||
| "por %%mm3, %%mm6 \n\t"\ | |||
| @@ -1859,8 +1859,8 @@ static inline void RENAME(bgr24ToY)(uint8_t *dst, uint8_t *src, long width) | |||
| #ifdef HAVE_MMX | |||
| asm volatile( | |||
| "mov %2, %%"REG_a" \n\t" | |||
| "movq "MANGLE(bgr2YCoeff)", %%mm6 \n\t" | |||
| "movq "MANGLE(w1111)", %%mm5 \n\t" | |||
| "movq "MANGLE(ff_bgr2YCoeff)", %%mm6 \n\t" | |||
| "movq "MANGLE(ff_w1111)", %%mm5 \n\t" | |||
| "pxor %%mm7, %%mm7 \n\t" | |||
| "lea (%%"REG_a", %%"REG_a", 2), %%"REG_d" \n\t" | |||
| ASMALIGN(4) | |||
| @@ -1918,7 +1918,7 @@ static inline void RENAME(bgr24ToY)(uint8_t *dst, uint8_t *src, long width) | |||
| "psraw $7, %%mm4 \n\t" | |||
| "packuswb %%mm4, %%mm0 \n\t" | |||
| "paddusb "MANGLE(bgr2YOffset)", %%mm0 \n\t" | |||
| "paddusb "MANGLE(ff_bgr2YOffset)", %%mm0 \n\t" | |||
| "movq %%mm0, (%1, %%"REG_a") \n\t" | |||
| "add $8, %%"REG_a" \n\t" | |||
| @@ -1944,8 +1944,8 @@ static inline void RENAME(bgr24ToUV)(uint8_t *dstU, uint8_t *dstV, uint8_t *src1 | |||
| #ifdef HAVE_MMX | |||
| asm volatile( | |||
| "mov %3, %%"REG_a" \n\t" | |||
| "movq "MANGLE(w1111)", %%mm5 \n\t" | |||
| "movq "MANGLE(bgr2UCoeff)", %%mm6 \n\t" | |||
| "movq "MANGLE(ff_w1111)", %%mm5 \n\t" | |||
| "movq "MANGLE(ff_bgr2UCoeff)", %%mm6 \n\t" | |||
| "pxor %%mm7, %%mm7 \n\t" | |||
| "lea (%%"REG_a", %%"REG_a", 2), %%"REG_d" \n\t" | |||
| "add %%"REG_d", %%"REG_d" \n\t" | |||
| @@ -1977,8 +1977,8 @@ static inline void RENAME(bgr24ToUV)(uint8_t *dstU, uint8_t *dstV, uint8_t *src1 | |||
| "psrlw $1, %%mm0 \n\t" | |||
| "psrlw $1, %%mm2 \n\t" | |||
| #endif | |||
| "movq "MANGLE(bgr2VCoeff)", %%mm1 \n\t" | |||
| "movq "MANGLE(bgr2VCoeff)", %%mm3 \n\t" | |||
| "movq "MANGLE(ff_bgr2VCoeff)", %%mm1 \n\t" | |||
| "movq "MANGLE(ff_bgr2VCoeff)", %%mm3 \n\t" | |||
| "pmaddwd %%mm0, %%mm1 \n\t" | |||
| "pmaddwd %%mm2, %%mm3 \n\t" | |||
| @@ -2019,12 +2019,12 @@ static inline void RENAME(bgr24ToUV)(uint8_t *dstU, uint8_t *dstV, uint8_t *src1 | |||
| "punpcklbw %%mm7, %%mm5 \n\t" | |||
| "punpcklbw %%mm7, %%mm2 \n\t" | |||
| "paddw %%mm5, %%mm2 \n\t" | |||
| "movq "MANGLE(w1111)", %%mm5 \n\t" | |||
| "movq "MANGLE(ff_w1111)", %%mm5 \n\t" | |||
| "psrlw $2, %%mm4 \n\t" | |||
| "psrlw $2, %%mm2 \n\t" | |||
| #endif | |||
| "movq "MANGLE(bgr2VCoeff)", %%mm1 \n\t" | |||
| "movq "MANGLE(bgr2VCoeff)", %%mm3 \n\t" | |||
| "movq "MANGLE(ff_bgr2VCoeff)", %%mm1 \n\t" | |||
| "movq "MANGLE(ff_bgr2VCoeff)", %%mm3 \n\t" | |||
| "pmaddwd %%mm4, %%mm1 \n\t" | |||
| "pmaddwd %%mm2, %%mm3 \n\t" | |||
| @@ -2048,7 +2048,7 @@ static inline void RENAME(bgr24ToUV)(uint8_t *dstU, uint8_t *dstV, uint8_t *src1 | |||
| "punpckldq %%mm4, %%mm0 \n\t" | |||
| "punpckhdq %%mm4, %%mm1 \n\t" | |||
| "packsswb %%mm1, %%mm0 \n\t" | |||
| "paddb "MANGLE(bgr2UVOffset)", %%mm0 \n\t" | |||
| "paddb "MANGLE(ff_bgr2UVOffset)", %%mm0 \n\t" | |||
| "movd %%mm0, (%1, %%"REG_a") \n\t" | |||
| "punpckhdq %%mm0, %%mm0 \n\t" | |||
| @@ -163,10 +163,6 @@ DECLARE_ASM_CONST(8, uint64_t, mmx_00ffw) = 0x00ff00ff00ff00ffULL; | |||
| DECLARE_ASM_CONST(8, uint64_t, mmx_redmask) = 0xf8f8f8f8f8f8f8f8ULL; | |||
| DECLARE_ASM_CONST(8, uint64_t, mmx_grnmask) = 0xfcfcfcfcfcfcfcfcULL; | |||
| DECLARE_ASM_CONST(8, uint64_t, M24A)= 0x00FF0000FF0000FFULL; | |||
| DECLARE_ASM_CONST(8, uint64_t, M24B)= 0xFF0000FF0000FF00ULL; | |||
| DECLARE_ASM_CONST(8, uint64_t, M24C)= 0x0000FF0000FF0000ULL; | |||
| // the volatile is required because gcc otherwise optimizes some writes away not knowing that these | |||
| // are read in the asm block | |||
| static volatile uint64_t attribute_used __attribute__((aligned(8))) b5Dither; | |||
| @@ -338,8 +338,8 @@ static inline int RENAME(yuv420_rgb24)(SwsContext *c, uint8_t* src[], int srcStr | |||
| YUV2RGB | |||
| /* mm0=B, %%mm2=G, %%mm1=R */ | |||
| #ifdef HAVE_MMX2 | |||
| "movq "MANGLE(M24A)", %%mm4 \n\t" | |||
| "movq "MANGLE(M24C)", %%mm7 \n\t" | |||
| "movq "MANGLE(ff_M24A)", %%mm4 \n\t" | |||
| "movq "MANGLE(ff_M24C)", %%mm7 \n\t" | |||
| "pshufw $0x50, %%mm0, %%mm5 \n\t" /* B3 B2 B3 B2 B1 B0 B1 B0 */ | |||
| "pshufw $0x50, %%mm2, %%mm3 \n\t" /* G3 G2 G3 G2 G1 G0 G1 G0 */ | |||
| "pshufw $0x00, %%mm1, %%mm6 \n\t" /* R1 R0 R1 R0 R1 R0 R1 R0 */ | |||
| @@ -358,7 +358,7 @@ YUV2RGB | |||
| "pshufw $0x55, %%mm2, %%mm3 \n\t" /* G4 G3 G4 G3 G4 G3 G4 G3 */ | |||
| "pshufw $0xA5, %%mm1, %%mm6 \n\t" /* R5 R4 R5 R4 R3 R2 R3 R2 */ | |||
| "pand "MANGLE(M24B)", %%mm5 \n\t" /* B5 B4 B3 */ | |||
| "pand "MANGLE(ff_M24B)", %%mm5 \n\t" /* B5 B4 B3 */ | |||
| "pand %%mm7, %%mm3 \n\t" /* G4 G3 */ | |||
| "pand %%mm4, %%mm6 \n\t" /* R4 R3 R2 */ | |||
| @@ -373,7 +373,7 @@ YUV2RGB | |||
| "pand %%mm7, %%mm5 \n\t" /* B7 B6 */ | |||
| "pand %%mm4, %%mm3 \n\t" /* G7 G6 G5 */ | |||
| "pand "MANGLE(M24B)", %%mm6 \n\t" /* R7 R6 R5 */ | |||
| "pand "MANGLE(ff_M24B)", %%mm6 \n\t" /* R7 R6 R5 */ | |||
| "movd 4 (%3, %0), %%mm1;" /* Load 4 Cr 00 00 00 00 v3 v2 v1 v0 */ | |||
| \ | |||
| "por %%mm5, %%mm3 \n\t" | |||