Originally committed as revision 28992 to svn://svn.mplayerhq.hu/mplayer/trunk/libswscaletags/v0.6
| @@ -1339,7 +1339,7 @@ static inline void RENAME(rgb16to32)(const uint8_t *src, uint8_t *dst, long src_ | |||||
| static inline void RENAME(rgb32tobgr32)(const uint8_t *src, uint8_t *dst, long src_size) | static inline void RENAME(rgb32tobgr32)(const uint8_t *src, uint8_t *dst, long src_size) | ||||
| { | { | ||||
| long idx = 15 - src_size; | |||||
| x86_reg idx = 15 - src_size; | |||||
| const uint8_t *s = src-idx; | const uint8_t *s = src-idx; | ||||
| uint8_t *d = dst-idx; | uint8_t *d = dst-idx; | ||||
| #if HAVE_MMX | #if HAVE_MMX | ||||
| @@ -1405,7 +1405,7 @@ static inline void RENAME(rgb24tobgr24)(const uint8_t *src, uint8_t *dst, long s | |||||
| { | { | ||||
| unsigned i; | unsigned i; | ||||
| #if HAVE_MMX | #if HAVE_MMX | ||||
| long mmx_size= 23 - src_size; | |||||
| x86_reg mmx_size= 23 - src_size; | |||||
| __asm__ volatile ( | __asm__ volatile ( | ||||
| "test %%"REG_a", %%"REG_a" \n\t" | "test %%"REG_a", %%"REG_a" \n\t" | ||||
| "jns 2f \n\t" | "jns 2f \n\t" | ||||
| @@ -1476,7 +1476,7 @@ static inline void RENAME(yuvPlanartoyuy2)(const uint8_t *ysrc, const uint8_t *u | |||||
| long lumStride, long chromStride, long dstStride, long vertLumPerChroma) | long lumStride, long chromStride, long dstStride, long vertLumPerChroma) | ||||
| { | { | ||||
| long y; | long y; | ||||
| const long chromWidth= width>>1; | |||||
| const x86_reg chromWidth= width>>1; | |||||
| for (y=0; y<height; y++) | for (y=0; y<height; y++) | ||||
| { | { | ||||
| #if HAVE_MMX | #if HAVE_MMX | ||||
| @@ -1628,7 +1628,7 @@ static inline void RENAME(yuvPlanartouyvy)(const uint8_t *ysrc, const uint8_t *u | |||||
| long lumStride, long chromStride, long dstStride, long vertLumPerChroma) | long lumStride, long chromStride, long dstStride, long vertLumPerChroma) | ||||
| { | { | ||||
| long y; | long y; | ||||
| const long chromWidth= width>>1; | |||||
| const x86_reg chromWidth= width>>1; | |||||
| for (y=0; y<height; y++) | for (y=0; y<height; y++) | ||||
| { | { | ||||
| #if HAVE_MMX | #if HAVE_MMX | ||||
| @@ -1758,7 +1758,7 @@ static inline void RENAME(yuy2toyv12)(const uint8_t *src, uint8_t *ydst, uint8_t | |||||
| long lumStride, long chromStride, long srcStride) | long lumStride, long chromStride, long srcStride) | ||||
| { | { | ||||
| long y; | long y; | ||||
| const long chromWidth= width>>1; | |||||
| const x86_reg chromWidth= width>>1; | |||||
| for (y=0; y<height; y+=2) | for (y=0; y<height; y+=2) | ||||
| { | { | ||||
| #if HAVE_MMX | #if HAVE_MMX | ||||
| @@ -1900,7 +1900,7 @@ static inline void RENAME(planar2x)(const uint8_t *src, uint8_t *dst, long srcWi | |||||
| for (y=1; y<srcHeight; y++){ | for (y=1; y<srcHeight; y++){ | ||||
| #if HAVE_MMX2 || HAVE_AMD3DNOW | #if HAVE_MMX2 || HAVE_AMD3DNOW | ||||
| const long mmxSize= srcWidth&~15; | |||||
| const x86_reg mmxSize= srcWidth&~15; | |||||
| __asm__ volatile( | __asm__ volatile( | ||||
| "mov %4, %%"REG_a" \n\t" | "mov %4, %%"REG_a" \n\t" | ||||
| "1: \n\t" | "1: \n\t" | ||||
| @@ -1944,7 +1944,7 @@ static inline void RENAME(planar2x)(const uint8_t *src, uint8_t *dst, long srcWi | |||||
| ); | ); | ||||
| #else | #else | ||||
| const long mmxSize=1; | |||||
| const x86_reg mmxSize=1; | |||||
| #endif | #endif | ||||
| dst[0 ]= (3*src[0] + src[srcStride])>>2; | dst[0 ]= (3*src[0] + src[srcStride])>>2; | ||||
| dst[dstStride]= ( src[0] + 3*src[srcStride])>>2; | dst[dstStride]= ( src[0] + 3*src[srcStride])>>2; | ||||
| @@ -1996,7 +1996,7 @@ static inline void RENAME(uyvytoyv12)(const uint8_t *src, uint8_t *ydst, uint8_t | |||||
| long lumStride, long chromStride, long srcStride) | long lumStride, long chromStride, long srcStride) | ||||
| { | { | ||||
| long y; | long y; | ||||
| const long chromWidth= width>>1; | |||||
| const x86_reg chromWidth= width>>1; | |||||
| for (y=0; y<height; y+=2) | for (y=0; y<height; y+=2) | ||||
| { | { | ||||
| #if HAVE_MMX | #if HAVE_MMX | ||||
| @@ -2123,7 +2123,7 @@ static inline void RENAME(rgb24toyv12)(const uint8_t *src, uint8_t *ydst, uint8_ | |||||
| long lumStride, long chromStride, long srcStride) | long lumStride, long chromStride, long srcStride) | ||||
| { | { | ||||
| long y; | long y; | ||||
| const long chromWidth= width>>1; | |||||
| const x86_reg chromWidth= width>>1; | |||||
| #if HAVE_MMX | #if HAVE_MMX | ||||
| for (y=0; y<height-2; y+=2) | for (y=0; y<height-2; y+=2) | ||||
| { | { | ||||
| @@ -2196,7 +2196,7 @@ static inline void RENAME(rgb24toyv12)(const uint8_t *src, uint8_t *ydst, uint8_ | |||||
| MOVNTQ" %%mm0, (%1, %%"REG_a") \n\t" | MOVNTQ" %%mm0, (%1, %%"REG_a") \n\t" | ||||
| "add $8, %%"REG_a" \n\t" | "add $8, %%"REG_a" \n\t" | ||||
| " js 1b \n\t" | " js 1b \n\t" | ||||
| : : "r" (src+width*3), "r" (ydst+width), "g" (-width) | |||||
| : : "r" (src+width*3), "r" (ydst+width), "g" ((x86_reg)-width) | |||||
| : "%"REG_a, "%"REG_d | : "%"REG_a, "%"REG_d | ||||
| ); | ); | ||||
| ydst += lumStride; | ydst += lumStride; | ||||
| @@ -2440,7 +2440,7 @@ static void RENAME(interleaveBytes)(uint8_t *src1, uint8_t *src2, uint8_t *dest, | |||||
| "add $16, %%"REG_a" \n\t" | "add $16, %%"REG_a" \n\t" | ||||
| "cmp %3, %%"REG_a" \n\t" | "cmp %3, %%"REG_a" \n\t" | ||||
| " jb 1b \n\t" | " jb 1b \n\t" | ||||
| ::"r"(dest), "r"(src1), "r"(src2), "r" (width-15) | |||||
| ::"r"(dest), "r"(src1), "r"(src2), "r" ((x86_reg)width-15) | |||||
| : "memory", "%"REG_a"" | : "memory", "%"REG_a"" | ||||
| ); | ); | ||||
| #else | #else | ||||
| @@ -2466,7 +2466,7 @@ static void RENAME(interleaveBytes)(uint8_t *src1, uint8_t *src2, uint8_t *dest, | |||||
| "add $16, %%"REG_a" \n\t" | "add $16, %%"REG_a" \n\t" | ||||
| "cmp %3, %%"REG_a" \n\t" | "cmp %3, %%"REG_a" \n\t" | ||||
| " jb 1b \n\t" | " jb 1b \n\t" | ||||
| ::"r"(dest), "r"(src1), "r"(src2), "r" (width-15) | |||||
| ::"r"(dest), "r"(src1), "r"(src2), "r" ((x86_reg)width-15) | |||||
| : "memory", "%"REG_a | : "memory", "%"REG_a | ||||
| ); | ); | ||||
| #endif | #endif | ||||
| @@ -2501,7 +2501,8 @@ static inline void RENAME(vu9_to_vu12)(const uint8_t *src1, const uint8_t *src2, | |||||
| long srcStride1, long srcStride2, | long srcStride1, long srcStride2, | ||||
| long dstStride1, long dstStride2) | long dstStride1, long dstStride2) | ||||
| { | { | ||||
| long y,x,w,h; | |||||
| x86_reg y; | |||||
| long x,w,h; | |||||
| w=width/2; h=height/2; | w=width/2; h=height/2; | ||||
| #if HAVE_MMX | #if HAVE_MMX | ||||
| __asm__ volatile( | __asm__ volatile( | ||||
| @@ -2604,7 +2605,8 @@ static inline void RENAME(yvu9_to_yuy2)(const uint8_t *src1, const uint8_t *src2 | |||||
| long srcStride1, long srcStride2, | long srcStride1, long srcStride2, | ||||
| long srcStride3, long dstStride) | long srcStride3, long dstStride) | ||||
| { | { | ||||
| long y,x,w,h; | |||||
| x86_reg x; | |||||
| long y,w,h; | |||||
| w=width/2; h=height; | w=width/2; h=height; | ||||
| for (y=0;y<h;y++){ | for (y=0;y<h;y++){ | ||||
| const uint8_t* yp=src1+srcStride1*y; | const uint8_t* yp=src1+srcStride1*y; | ||||
| @@ -1466,13 +1466,13 @@ error: | |||||
| static void initMMX2HScaler(int dstW, int xInc, uint8_t *funnyCode, int16_t *filter, int32_t *filterPos, int numSplits) | static void initMMX2HScaler(int dstW, int xInc, uint8_t *funnyCode, int16_t *filter, int32_t *filterPos, int numSplits) | ||||
| { | { | ||||
| uint8_t *fragmentA; | uint8_t *fragmentA; | ||||
| long imm8OfPShufW1A; | |||||
| long imm8OfPShufW2A; | |||||
| long fragmentLengthA; | |||||
| x86_reg imm8OfPShufW1A; | |||||
| x86_reg imm8OfPShufW2A; | |||||
| x86_reg fragmentLengthA; | |||||
| uint8_t *fragmentB; | uint8_t *fragmentB; | ||||
| long imm8OfPShufW1B; | |||||
| long imm8OfPShufW2B; | |||||
| long fragmentLengthB; | |||||
| x86_reg imm8OfPShufW1B; | |||||
| x86_reg imm8OfPShufW2B; | |||||
| x86_reg fragmentLengthB; | |||||
| int fragmentPos; | int fragmentPos; | ||||
| int xpos, i; | int xpos, i; | ||||
| @@ -961,7 +961,7 @@ static inline void RENAME(yuv2yuv1)(SwsContext *c, int16_t *lumSrc, int16_t *chr | |||||
| long p= uDest ? 3 : 1; | long p= uDest ? 3 : 1; | ||||
| uint8_t *src[3]= {lumSrc + dstW, chrSrc + chrDstW, chrSrc + VOFW + chrDstW}; | uint8_t *src[3]= {lumSrc + dstW, chrSrc + chrDstW, chrSrc + VOFW + chrDstW}; | ||||
| uint8_t *dst[3]= {dest, uDest, vDest}; | uint8_t *dst[3]= {dest, uDest, vDest}; | ||||
| long counter[3] = {dstW, chrDstW, chrDstW}; | |||||
| x86_reg counter[3] = {dstW, chrDstW, chrDstW}; | |||||
| if (c->flags & SWS_ACCURATE_RND){ | if (c->flags & SWS_ACCURATE_RND){ | ||||
| while(p--){ | while(p--){ | ||||
| @@ -1024,7 +1024,7 @@ static inline void RENAME(yuv2packedX)(SwsContext *c, int16_t *lumFilter, int16_ | |||||
| uint8_t *dest, long dstW, long dstY) | uint8_t *dest, long dstW, long dstY) | ||||
| { | { | ||||
| #if HAVE_MMX | #if HAVE_MMX | ||||
| long dummy=0; | |||||
| x86_reg dummy=0; | |||||
| if(!(c->flags & SWS_BITEXACT)){ | if(!(c->flags & SWS_BITEXACT)){ | ||||
| if (c->flags & SWS_ACCURATE_RND){ | if (c->flags & SWS_ACCURATE_RND){ | ||||
| switch(c->dstFormat){ | switch(c->dstFormat){ | ||||
| @@ -1515,7 +1515,7 @@ static inline void RENAME(yuy2ToY)(uint8_t *dst, uint8_t *src, long width, uint3 | |||||
| "movq %%mm0, (%2, %%"REG_a") \n\t" | "movq %%mm0, (%2, %%"REG_a") \n\t" | ||||
| "add $8, %%"REG_a" \n\t" | "add $8, %%"REG_a" \n\t" | ||||
| " js 1b \n\t" | " js 1b \n\t" | ||||
| : : "g" (-width), "r" (src+width*2), "r" (dst+width) | |||||
| : : "g" ((x86_reg)-width), "r" (src+width*2), "r" (dst+width) | |||||
| : "%"REG_a | : "%"REG_a | ||||
| ); | ); | ||||
| #else | #else | ||||
| @@ -1546,7 +1546,7 @@ static inline void RENAME(yuy2ToUV)(uint8_t *dstU, uint8_t *dstV, uint8_t *src1, | |||||
| "movd %%mm1, (%2, %%"REG_a") \n\t" | "movd %%mm1, (%2, %%"REG_a") \n\t" | ||||
| "add $4, %%"REG_a" \n\t" | "add $4, %%"REG_a" \n\t" | ||||
| " js 1b \n\t" | " js 1b \n\t" | ||||
| : : "g" (-width), "r" (src1+width*4), "r" (dstU+width), "r" (dstV+width) | |||||
| : : "g" ((x86_reg)-width), "r" (src1+width*4), "r" (dstU+width), "r" (dstV+width) | |||||
| : "%"REG_a | : "%"REG_a | ||||
| ); | ); | ||||
| #else | #else | ||||
| @@ -1576,7 +1576,7 @@ static inline void RENAME(uyvyToY)(uint8_t *dst, uint8_t *src, long width, uint3 | |||||
| "movq %%mm0, (%2, %%"REG_a") \n\t" | "movq %%mm0, (%2, %%"REG_a") \n\t" | ||||
| "add $8, %%"REG_a" \n\t" | "add $8, %%"REG_a" \n\t" | ||||
| " js 1b \n\t" | " js 1b \n\t" | ||||
| : : "g" (-width), "r" (src+width*2), "r" (dst+width) | |||||
| : : "g" ((x86_reg)-width), "r" (src+width*2), "r" (dst+width) | |||||
| : "%"REG_a | : "%"REG_a | ||||
| ); | ); | ||||
| #else | #else | ||||
| @@ -1607,7 +1607,7 @@ static inline void RENAME(uyvyToUV)(uint8_t *dstU, uint8_t *dstV, uint8_t *src1, | |||||
| "movd %%mm1, (%2, %%"REG_a") \n\t" | "movd %%mm1, (%2, %%"REG_a") \n\t" | ||||
| "add $4, %%"REG_a" \n\t" | "add $4, %%"REG_a" \n\t" | ||||
| " js 1b \n\t" | " js 1b \n\t" | ||||
| : : "g" (-width), "r" (src1+width*4), "r" (dstU+width), "r" (dstV+width) | |||||
| : : "g" ((x86_reg)-width), "r" (src1+width*4), "r" (dstU+width), "r" (dstV+width) | |||||
| : "%"REG_a | : "%"REG_a | ||||
| ); | ); | ||||
| #else | #else | ||||
| @@ -1731,7 +1731,7 @@ static inline void RENAME(bgr24ToY_mmx)(uint8_t *dst, uint8_t *src, long width, | |||||
| "add $4, %%"REG_a" \n\t" | "add $4, %%"REG_a" \n\t" | ||||
| " js 1b \n\t" | " js 1b \n\t" | ||||
| : "+r" (src) | : "+r" (src) | ||||
| : "r" (dst+width), "g" (-width) | |||||
| : "r" (dst+width), "g" ((x86_reg)-width) | |||||
| : "%"REG_a | : "%"REG_a | ||||
| ); | ); | ||||
| } | } | ||||
| @@ -1789,7 +1789,7 @@ static inline void RENAME(bgr24ToUV_mmx)(uint8_t *dstU, uint8_t *dstV, uint8_t * | |||||
| "add $4, %%"REG_a" \n\t" | "add $4, %%"REG_a" \n\t" | ||||
| " js 1b \n\t" | " js 1b \n\t" | ||||
| : "+r" (src) | : "+r" (src) | ||||
| : "r" (dstU+width), "r" (dstV+width), "g" (-width), "m"(ff_bgr24toUV[srcFormat == PIX_FMT_RGB24][0]) | |||||
| : "r" (dstU+width), "r" (dstV+width), "g" ((x86_reg)-width), "m"(ff_bgr24toUV[srcFormat == PIX_FMT_RGB24][0]) | |||||
| : "%"REG_a | : "%"REG_a | ||||
| ); | ); | ||||
| } | } | ||||
| @@ -1951,7 +1951,7 @@ static inline void RENAME(hScale)(int16_t *dst, int dstW, uint8_t *src, int srcW | |||||
| assert(filterSize % 4 == 0 && filterSize>0); | assert(filterSize % 4 == 0 && filterSize>0); | ||||
| if (filterSize==4) // Always true for upscaling, sometimes for down, too. | if (filterSize==4) // Always true for upscaling, sometimes for down, too. | ||||
| { | { | ||||
| long counter= -2*dstW; | |||||
| x86_reg counter= -2*dstW; | |||||
| filter-= counter*2; | filter-= counter*2; | ||||
| filterPos-= counter/2; | filterPos-= counter/2; | ||||
| dst-= counter/2; | dst-= counter/2; | ||||
| @@ -1997,7 +1997,7 @@ static inline void RENAME(hScale)(int16_t *dst, int dstW, uint8_t *src, int srcW | |||||
| } | } | ||||
| else if (filterSize==8) | else if (filterSize==8) | ||||
| { | { | ||||
| long counter= -2*dstW; | |||||
| x86_reg counter= -2*dstW; | |||||
| filter-= counter*4; | filter-= counter*4; | ||||
| filterPos-= counter/2; | filterPos-= counter/2; | ||||
| dst-= counter/2; | dst-= counter/2; | ||||
| @@ -2055,7 +2055,7 @@ static inline void RENAME(hScale)(int16_t *dst, int dstW, uint8_t *src, int srcW | |||||
| else | else | ||||
| { | { | ||||
| uint8_t *offset = src+filterSize; | uint8_t *offset = src+filterSize; | ||||
| long counter= -2*dstW; | |||||
| x86_reg counter= -2*dstW; | |||||
| //filter-= counter*filterSize/2; | //filter-= counter*filterSize/2; | ||||
| filterPos-= counter/2; | filterPos-= counter/2; | ||||
| dst-= counter/2; | dst-= counter/2; | ||||
| @@ -2098,7 +2098,7 @@ static inline void RENAME(hScale)(int16_t *dst, int dstW, uint8_t *src, int srcW | |||||
| : "+r" (counter), "+r" (filter) | : "+r" (counter), "+r" (filter) | ||||
| : "m" (filterPos), "m" (dst), "m"(offset), | : "m" (filterPos), "m" (dst), "m"(offset), | ||||
| "m" (src), "r" (filterSize*2) | |||||
| "m" (src), "r" ((x86_reg)filterSize*2) | |||||
| : "%"REG_a, "%"REG_c, "%"REG_d | : "%"REG_a, "%"REG_c, "%"REG_d | ||||
| ); | ); | ||||
| } | } | ||||
| @@ -2289,7 +2289,7 @@ FUNNY_Y_CODE | |||||
| else | else | ||||
| { | { | ||||
| #endif /* HAVE_MMX2 */ | #endif /* HAVE_MMX2 */ | ||||
| long xInc_shr16 = xInc >> 16; | |||||
| x86_reg xInc_shr16 = xInc >> 16; | |||||
| uint16_t xInc_mask = xInc & 0xffff; | uint16_t xInc_mask = xInc & 0xffff; | ||||
| //NO MMX just normal asm ... | //NO MMX just normal asm ... | ||||
| __asm__ volatile( | __asm__ volatile( | ||||
| @@ -2575,7 +2575,7 @@ FUNNY_UV_CODE | |||||
| else | else | ||||
| { | { | ||||
| #endif /* HAVE_MMX2 */ | #endif /* HAVE_MMX2 */ | ||||
| long xInc_shr16 = (long) (xInc >> 16); | |||||
| x86_reg xInc_shr16 = (x86_reg) (xInc >> 16); | |||||
| uint16_t xInc_mask = xInc & 0xffff; | uint16_t xInc_mask = xInc & 0xffff; | ||||
| __asm__ volatile( | __asm__ volatile( | ||||
| "xor %%"REG_a", %%"REG_a" \n\t" // i | "xor %%"REG_a", %%"REG_a" \n\t" // i | ||||
| @@ -2613,7 +2613,7 @@ FUNNY_UV_CODE | |||||
| /* GCC 3.3 makes MPlayer crash on IA-32 machines when using "g" operand here, | /* GCC 3.3 makes MPlayer crash on IA-32 machines when using "g" operand here, | ||||
| which is needed to support GCC 4.0. */ | which is needed to support GCC 4.0. */ | ||||
| #if ARCH_X86_64 && ((__GNUC__ > 3) || (__GNUC__ == 3 && __GNUC_MINOR__ >= 4)) | #if ARCH_X86_64 && ((__GNUC__ > 3) || (__GNUC__ == 3 && __GNUC_MINOR__ >= 4)) | ||||
| :: "m" (src1), "m" (dst), "g" ((long)dstWidth), "m" (xInc_shr16), "m" (xInc_mask), | |||||
| :: "m" (src1), "m" (dst), "g" ((x86_reg)dstWidth), "m" (xInc_shr16), "m" (xInc_mask), | |||||
| #else | #else | ||||
| :: "m" (src1), "m" (dst), "m" ((long)dstWidth), "m" (xInc_shr16), "m" (xInc_mask), | :: "m" (src1), "m" (dst), "m" ((long)dstWidth), "m" (xInc_shr16), "m" (xInc_mask), | ||||
| #endif | #endif | ||||
| @@ -33,6 +33,7 @@ | |||||
| #include "rgb2rgb.h" | #include "rgb2rgb.h" | ||||
| #include "swscale.h" | #include "swscale.h" | ||||
| #include "swscale_internal.h" | #include "swscale_internal.h" | ||||
| #include "libavutil/x86_cpu.h" | |||||
| #define DITHER1XBPP // only for MMX | #define DITHER1XBPP // only for MMX | ||||
| @@ -137,7 +137,7 @@ | |||||
| uint8_t *py = src[0] + y*srcStride[0]; \ | uint8_t *py = src[0] + y*srcStride[0]; \ | ||||
| uint8_t *pu = src[1] + (y>>1)*srcStride[1]; \ | uint8_t *pu = src[1] + (y>>1)*srcStride[1]; \ | ||||
| uint8_t *pv = src[2] + (y>>1)*srcStride[2]; \ | uint8_t *pv = src[2] + (y>>1)*srcStride[2]; \ | ||||
| long index= -h_size/2; \ | |||||
| x86_reg index= -h_size/2; \ | |||||
| #define YUV2RGB_INIT \ | #define YUV2RGB_INIT \ | ||||
| /* This MMX assembly code deals with a SINGLE scan line at a time, \ | /* This MMX assembly code deals with a SINGLE scan line at a time, \ | ||||