Originally committed as revision 27190 to svn://svn.mplayerhq.hu/mplayer/trunk/libswscaletags/v0.5
| @@ -2,8 +2,8 @@ | |||
| * Copyright (C) 2007 Marc Hoffman <marc.hoffman@analog.com> | |||
| * April 20, 2007 | |||
| * | |||
| * Blackfin Video Color Space Converters Operations | |||
| * convert I420 YV12 to RGB in various formats, | |||
| * Blackfin video color space converter operations | |||
| * convert I420 YV12 to RGB in various formats | |||
| * | |||
| * This file is part of FFmpeg. | |||
| * | |||
| @@ -24,8 +24,8 @@ | |||
| /* | |||
| YUV420 to RGB565 conversion. This routine takes a YUV 420 planar macroblock | |||
| and converts it to RGB565. R:5 bits, G:6 bits, B:5 bits.. packed into shorts | |||
| YUV420 to RGB565 conversion. This routine takes a YUV 420 planar macroblock | |||
| and converts it to RGB565. R:5 bits, G:6 bits, B:5 bits.. packed into shorts. | |||
| The following calculation is used for the conversion: | |||
| @@ -34,36 +34,36 @@ The following calculation is used for the conversion: | |||
| g = clipz((y-oy)*cy + cgv*(v-128) + cgu*(u-128)) | |||
| b = clipz((y-oy)*cy + cbu*(u-128)) | |||
| y,u,v are pre scaled by a factor of 4 i.e. left shifted to gain precision. | |||
| y,u,v are prescaled by a factor of 4 i.e. left-shifted to gain precision. | |||
| New factorization to eliminate the truncation error which was | |||
| occuring due to the byteop3p. | |||
| occurring due to the byteop3p. | |||
| 1) use the bytop16m to subtract quad bytes we use this in U8 this | |||
| 1) Use the bytop16m to subtract quad bytes we use this in U8 this | |||
| then so the offsets need to be renormalized to 8bits. | |||
| 2) scale operands up by a factor of 4 not 8 because Blackfin | |||
| 2) Scale operands up by a factor of 4 not 8 because Blackfin | |||
| multiplies include a shift. | |||
| 3) compute into the accumulators cy*yx0, cy*yx1 | |||
| 3) Compute into the accumulators cy*yx0, cy*yx1. | |||
| 4) compute each of the linear equations | |||
| 4) Compute each of the linear equations: | |||
| r = clipz((y - oy) * cy + crv * (v - 128)) | |||
| g = clipz((y - oy) * cy + cgv * (v - 128) + cgu * (u - 128)) | |||
| b = clipz((y - oy) * cy + cbu * (u - 128)) | |||
| reuse of the accumulators requires that we actually multiply | |||
| twice once with addition and the second time with a subtaction. | |||
| Reuse of the accumulators requires that we actually multiply | |||
| twice once with addition and the second time with a subtraction. | |||
| because of this we need to compute the equations in the order R B | |||
| Because of this we need to compute the equations in the order R B | |||
| then G saving the writes for B in the case of 24/32 bit color | |||
| formats. | |||
| api: yuv2rgb_kind (uint8_t *Y, uint8_t *U, uint8_t *V, int *out, | |||
| API: yuv2rgb_kind (uint8_t *Y, uint8_t *U, uint8_t *V, int *out, | |||
| int dW, uint32_t *coeffs); | |||
| A B | |||
| @@ -77,13 +77,13 @@ uint32_t oy,oc,zero,cy,crv,rmask,cbu,bmask,cgu,cgv; | |||
| coeffs is a pointer to oy. | |||
| the {rgb} masks are only utilized by the 565 packing algorithm. Note the data | |||
| replication is used to simplify the internal algorithms for the dual mac architecture | |||
| of BlackFin. | |||
| The {rgb} masks are only utilized by the 565 packing algorithm. Note the data | |||
| replication is used to simplify the internal algorithms for the dual Mac | |||
| architecture of BlackFin. | |||
| All routines are exported with _ff_bfin_ as a symbol prefix | |||
| All routines are exported with _ff_bfin_ as a symbol prefix. | |||
| rough performance gain compared against -O3: | |||
| Rough performance gain compared against -O3: | |||
| 2779809/1484290 187.28% | |||
| @@ -1,10 +1,10 @@ | |||
| /* | |||
| * rgb2rgb.c, Software RGB to RGB convertor | |||
| * pluralize by Software PAL8 to RGB convertor | |||
| * Software YUV to YUV convertor | |||
| * Software YUV to RGB convertor | |||
| * Written by Nick Kurshev. | |||
| * palette & YUV & runtime CPU stuff by Michael (michaelni@gmx.at) | |||
| * software RGB to RGB converter | |||
| * pluralize by software PAL8 to RGB converter | |||
| * software YUV to YUV converter | |||
| * software YUV to RGB converter | |||
| * Written by Nick Kurshev. | |||
| * palette & YUV & runtime CPU stuff by Michael (michaelni@gmx.at) | |||
| * | |||
| * This file is part of FFmpeg. | |||
| * | |||
| @@ -22,8 +22,8 @@ | |||
| * along with FFmpeg; if not, write to the Free Software | |||
| * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA | |||
| * | |||
| * the C code (not assembly, mmx, ...) of this file can be used | |||
| * under the LGPL license too | |||
| * The C code (not assembly, MMX, ...) of this file can be used | |||
| * under the LGPL license. | |||
| */ | |||
| #include <inttypes.h> | |||
| #include "config.h" | |||
| @@ -33,7 +33,7 @@ | |||
| #include "swscale.h" | |||
| #include "swscale_internal.h" | |||
| #define FAST_BGR2YV12 // use 7 bit coeffs instead of 15bit | |||
| #define FAST_BGR2YV12 // use 7-bit instead of 15-bit coefficients | |||
| void (*rgb24to32)(const uint8_t *src, uint8_t *dst, long src_size); | |||
| void (*rgb24to16)(const uint8_t *src, uint8_t *dst, long src_size); | |||
| @@ -149,8 +149,8 @@ static uint64_t __attribute__((aligned(8))) dither8[2]={ | |||
| #define RV ((int)( 0.439*(1<<RGB2YUV_SHIFT)+0.5)) | |||
| #define RU ((int)(-0.148*(1<<RGB2YUV_SHIFT)+0.5)) | |||
| //Note: we have C, MMX, MMX2, 3DNOW version therse no 3DNOW+MMX2 one | |||
| //Plain C versions | |||
| //Note: We have C, MMX, MMX2, 3DNOW versions, there is no 3DNOW + MMX2 one. | |||
| //plain C versions | |||
| #undef HAVE_MMX | |||
| #undef HAVE_MMX2 | |||
| #undef HAVE_3DNOW | |||
| @@ -190,10 +190,10 @@ static uint64_t __attribute__((aligned(8))) dither8[2]={ | |||
| #endif //ARCH_X86 || ARCH_X86_64 | |||
| /* | |||
| rgb15->rgb16 Original by Strepto/Astral | |||
| RGB15->RGB16 original by Strepto/Astral | |||
| ported to gcc & bugfixed : A'rpi | |||
| MMX2, 3DNOW optimization by Nick Kurshev | |||
| 32bit c version, and and&add trick by Michael Niedermayer | |||
| 32-bit C version, and and&add trick by Michael Niedermayer | |||
| */ | |||
| void sws_rgb2rgb_init(int flags){ | |||
| @@ -266,7 +266,7 @@ void palette8torgb24(const uint8_t *src, uint8_t *dst, long num_pixels, const ui | |||
| { | |||
| long i; | |||
| /* | |||
| writes 1 byte o much and might cause alignment issues on some architectures? | |||
| Writes 1 byte too much and might cause alignment issues on some architectures? | |||
| for (i=0; i<num_pixels; i++) | |||
| ((unsigned *)(&dst[i*3])) = ((unsigned *)palette)[src[i]]; | |||
| */ | |||
| @@ -284,7 +284,7 @@ void palette8tobgr24(const uint8_t *src, uint8_t *dst, long num_pixels, const ui | |||
| { | |||
| long i; | |||
| /* | |||
| writes 1 byte o much and might cause alignment issues on some architectures? | |||
| Writes 1 byte too much and might cause alignment issues on some architectures? | |||
| for (i=0; i<num_pixels; i++) | |||
| ((unsigned *)(&dst[i*3])) = ((unsigned *)palette)[src[i]]; | |||
| */ | |||
| @@ -299,7 +299,7 @@ void palette8tobgr24(const uint8_t *src, uint8_t *dst, long num_pixels, const ui | |||
| } | |||
| /** | |||
| * Palette is assumed to contain bgr16, see rgb32to16 to convert the palette | |||
| * Palette is assumed to contain BGR16, see rgb32to16 to convert the palette. | |||
| */ | |||
| void palette8torgb16(const uint8_t *src, uint8_t *dst, long num_pixels, const uint8_t *palette) | |||
| { | |||
| @@ -1,8 +1,8 @@ | |||
| /* | |||
| * rgb2rgb.h, Software RGB to RGB convertor | |||
| * pluralize by Software PAL8 to RGB convertor | |||
| * Software YUV to YUV convertor | |||
| * Software YUV to RGB convertor | |||
| * software RGB to RGB converter | |||
| * pluralize by Software PAL8 to RGB converter | |||
| * Software YUV to YUV converter | |||
| * Software YUV to RGB converter | |||
| * Written by Nick Kurshev. | |||
| * palette & YUV & runtime CPU stuff by Michael (michaelni@gmx.at) | |||
| * | |||
| @@ -28,7 +28,7 @@ | |||
| #include <inttypes.h> | |||
| /* A full collection of rgb to rgb(bgr) convertors */ | |||
| /* A full collection of RGB to RGB(BGR) converters */ | |||
| extern void (*rgb24to32) (const uint8_t *src, uint8_t *dst, long src_size); | |||
| extern void (*rgb24to16) (const uint8_t *src, uint8_t *dst, long src_size); | |||
| extern void (*rgb24to15) (const uint8_t *src, uint8_t *dst, long src_size); | |||
| @@ -71,53 +71,49 @@ extern void palette8torgb15(const uint8_t *src, uint8_t *dst, long num_pixels, c | |||
| extern void palette8tobgr15(const uint8_t *src, uint8_t *dst, long num_pixels, const uint8_t *palette); | |||
| /** | |||
| * | |||
| * height should be a multiple of 2 and width should be a multiple of 16 (if this is a | |||
| * problem for anyone then tell me, and ill fix it) | |||
| * chrominance data is only taken from every secound line others are ignored FIXME write HQ version | |||
| * Height should be a multiple of 2 and width should be a multiple of 16. | |||
| * (If this is a problem for anyone then tell me, and I will fix it.) | |||
| * Chrominance data is only taken from every second line, others are ignored. | |||
| * FIXME: Write HQ version. | |||
| */ | |||
| //void uyvytoyv12(const uint8_t *src, uint8_t *ydst, uint8_t *udst, uint8_t *vdst, | |||
| /** | |||
| * | |||
| * height should be a multiple of 2 and width should be a multiple of 16 (if this is a | |||
| * problem for anyone then tell me, and ill fix it) | |||
| * Height should be a multiple of 2 and width should be a multiple of 16. | |||
| * (If this is a problem for anyone then tell me, and I will fix it.) | |||
| */ | |||
| extern void (*yv12toyuy2)(const uint8_t *ysrc, const uint8_t *usrc, const uint8_t *vsrc, uint8_t *dst, | |||
| long width, long height, | |||
| long lumStride, long chromStride, long dstStride); | |||
| /** | |||
| * | |||
| * width should be a multiple of 16 | |||
| * Width should be a multiple of 16. | |||
| */ | |||
| extern void (*yuv422ptoyuy2)(const uint8_t *ysrc, const uint8_t *usrc, const uint8_t *vsrc, uint8_t *dst, | |||
| long width, long height, | |||
| long lumStride, long chromStride, long dstStride); | |||
| /** | |||
| * | |||
| * height should be a multiple of 2 and width should be a multiple of 16 (if this is a | |||
| * problem for anyone then tell me, and ill fix it) | |||
| * Height should be a multiple of 2 and width should be a multiple of 16. | |||
| * (If this is a problem for anyone then tell me, and I will fix it.) | |||
| */ | |||
| extern void (*yuy2toyv12)(const uint8_t *src, uint8_t *ydst, uint8_t *udst, uint8_t *vdst, | |||
| long width, long height, | |||
| long lumStride, long chromStride, long srcStride); | |||
| /** | |||
| * | |||
| * height should be a multiple of 2 and width should be a multiple of 16 (if this is a | |||
| * problem for anyone then tell me, and ill fix it) | |||
| * Height should be a multiple of 2 and width should be a multiple of 16. | |||
| * (If this is a problem for anyone then tell me, and I will fix it.) | |||
| */ | |||
| extern void (*yv12touyvy)(const uint8_t *ysrc, const uint8_t *usrc, const uint8_t *vsrc, uint8_t *dst, | |||
| long width, long height, | |||
| long lumStride, long chromStride, long dstStride); | |||
| /** | |||
| * | |||
| * height should be a multiple of 2 and width should be a multiple of 2 (if this is a | |||
| * problem for anyone then tell me, and ill fix it) | |||
| * chrominance data is only taken from every secound line others are ignored FIXME write HQ version | |||
| * Height should be a multiple of 2 and width should be a multiple of 2. | |||
| * (If this is a problem for anyone then tell me, and I will fix it.) | |||
| * Chrominance data is only taken from every second line, others are ignored. | |||
| * FIXME: Write HQ version. | |||
| */ | |||
| extern void (*rgb24toyv12)(const uint8_t *src, uint8_t *ydst, uint8_t *udst, uint8_t *vdst, | |||
| long width, long height, | |||
| @@ -1,11 +1,11 @@ | |||
| /* | |||
| * rgb2rgb.c, Software RGB to RGB convertor | |||
| * pluralize by Software PAL8 to RGB convertor | |||
| * Software YUV to YUV convertor | |||
| * Software YUV to RGB convertor | |||
| * Written by Nick Kurshev. | |||
| * palette & YUV & runtime CPU stuff by Michael (michaelni@gmx.at) | |||
| * lot of big-endian byteorder fixes by Alex Beregszaszi | |||
| * software RGB to RGB converter | |||
| * pluralize by software PAL8 to RGB converter | |||
| * software YUV to YUV converter | |||
| * software YUV to RGB converter | |||
| * Written by Nick Kurshev. | |||
| * palette & YUV & runtime CPU stuff by Michael (michaelni@gmx.at) | |||
| * lot of big-endian byte order fixes by Alex Beregszaszi | |||
| * | |||
| * This file is part of FFmpeg. | |||
| * | |||
| @@ -23,7 +23,7 @@ | |||
| * along with FFmpeg; if not, write to the Free Software | |||
| * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA | |||
| * | |||
| * The C code (not assembly, mmx, ...) of this file can be used | |||
| * The C code (not assembly, MMX, ...) of this file can be used | |||
| * under the LGPL license. | |||
| */ | |||
| @@ -229,10 +229,10 @@ static inline void RENAME(rgb32to24)(const uint8_t *src, uint8_t *dst, long src_ | |||
| } | |||
| /* | |||
| Original by Strepto/Astral | |||
| ported to gcc & bugfixed : A'rpi | |||
| original by Strepto/Astral | |||
| ported to gcc & bugfixed: A'rpi | |||
| MMX2, 3DNOW optimization by Nick Kurshev | |||
| 32 bit C version, and and&add trick by Michael Niedermayer | |||
| 32-bit C version, and and&add trick by Michael Niedermayer | |||
| */ | |||
| static inline void RENAME(rgb15to16)(const uint8_t *src, uint8_t *dst, long src_size) | |||
| { | |||
| @@ -926,9 +926,9 @@ static inline void RENAME(rgb24tobgr15)(const uint8_t *src, uint8_t *dst, long s | |||
| ---------------- | |||
| 1 1 0 1 1 1 1 0 | |||
| |=======| |===| | |||
| | Leftmost Bits Repeated to Fill Open Bits | |||
| | leftmost bits repeated to fill open bits | |||
| | | |||
| Original Bits | |||
| original bits | |||
| */ | |||
| static inline void RENAME(rgb15to24)(const uint8_t *src, uint8_t *dst, long src_size) | |||
| { | |||
| @@ -1006,7 +1006,7 @@ static inline void RENAME(rgb15to24)(const uint8_t *src, uint8_t *dst, long src_ | |||
| :"=m"(*d) | |||
| :"m"(*s),"m"(mask15b),"m"(mask15g),"m"(mask15r), "m"(mmx_null) | |||
| :"memory"); | |||
| /* Borrowed 32 to 24 */ | |||
| /* borrowed 32 to 24 */ | |||
| asm volatile( | |||
| "movq %%mm0, %%mm4 \n\t" | |||
| "movq %%mm3, %%mm5 \n\t" | |||
| @@ -1147,7 +1147,7 @@ static inline void RENAME(rgb16to24)(const uint8_t *src, uint8_t *dst, long src_ | |||
| :"=m"(*d) | |||
| :"m"(*s),"m"(mask16b),"m"(mask16g),"m"(mask16r),"m"(mmx_null) | |||
| :"memory"); | |||
| /* Borrowed 32 to 24 */ | |||
| /* borrowed 32 to 24 */ | |||
| asm volatile( | |||
| "movq %%mm0, %%mm4 \n\t" | |||
| "movq %%mm3, %%mm5 \n\t" | |||
| @@ -1479,7 +1479,7 @@ static inline void RENAME(rgb24tobgr24)(const uint8_t *src, uint8_t *dst, long s | |||
| asm volatile(SFENCE:::"memory"); | |||
| asm volatile(EMMS:::"memory"); | |||
| if (mmx_size==23) return; //finihsed, was multiple of 8 | |||
| if (mmx_size==23) return; //finished, was multiple of 8 | |||
| src+= src_size; | |||
| dst+= src_size; | |||
| @@ -1638,8 +1638,8 @@ asm( EMMS" \n\t" | |||
| } | |||
| /** | |||
| * Height should be a multiple of 2 and width should be a multiple of 16 (if | |||
| * this is a problem for anyone then tell me, and I will fix it). | |||
| * Height should be a multiple of 2 and width should be a multiple of 16. | |||
| * (If this is a problem for anyone then tell me, and I will fix it.) | |||
| */ | |||
| static inline void RENAME(yv12toyuy2)(const uint8_t *ysrc, const uint8_t *usrc, const uint8_t *vsrc, uint8_t *dst, | |||
| long width, long height, | |||
| @@ -1720,7 +1720,7 @@ static inline void RENAME(yuvPlanartouyvy)(const uint8_t *ysrc, const uint8_t *u | |||
| (vc[0] << 8) + (yc[1] << 0); | |||
| #else | |||
| *idst++ = uc[0] + (yc[0] << 8) + | |||
| (vc[0] << 16) + (yc[1] << 24); | |||
| (vc[0] << 16) + (yc[1] << 24); | |||
| #endif | |||
| yc += 2; | |||
| uc++; | |||
| @@ -1744,8 +1744,8 @@ asm( EMMS" \n\t" | |||
| } | |||
| /** | |||
| * Height should be a multiple of 2 and width should be a multiple of 16 (if | |||
| * this is a problem for anyone then tell me, and I will fix it). | |||
| * Height should be a multiple of 2 and width should be a multiple of 16 | |||
| * (If this is a problem for anyone then tell me, and I will fix it.) | |||
| */ | |||
| static inline void RENAME(yv12touyvy)(const uint8_t *ysrc, const uint8_t *usrc, const uint8_t *vsrc, uint8_t *dst, | |||
| long width, long height, | |||
| @@ -1766,8 +1766,8 @@ static inline void RENAME(yuv422ptoyuy2)(const uint8_t *ysrc, const uint8_t *usr | |||
| } | |||
| /** | |||
| * Height should be a multiple of 2 and width should be a multiple of 16 (if | |||
| * this is a problem for anyone then tell me, and I will fix it). | |||
| * Height should be a multiple of 2 and width should be a multiple of 16. | |||
| * (If this is a problem for anyone then tell me, and I will fix it.) | |||
| */ | |||
| static inline void RENAME(yuy2toyv12)(const uint8_t *src, uint8_t *ydst, uint8_t *udst, uint8_t *vdst, | |||
| long width, long height, | |||
| @@ -2002,9 +2002,9 @@ asm volatile( EMMS" \n\t" | |||
| } | |||
| /** | |||
| * Height should be a multiple of 2 and width should be a multiple of 16 (if | |||
| * this is a problem for anyone then tell me, and I will fix it). | |||
| * Chrominance data is only taken from every secound line, others are ignored. | |||
| * Height should be a multiple of 2 and width should be a multiple of 16. | |||
| * (If this is a problem for anyone then tell me, and I will fix it.) | |||
| * Chrominance data is only taken from every second line, others are ignored. | |||
| * FIXME: Write HQ version. | |||
| */ | |||
| static inline void RENAME(uyvytoyv12)(const uint8_t *src, uint8_t *ydst, uint8_t *udst, uint8_t *vdst, | |||
| @@ -2128,9 +2128,9 @@ asm volatile( EMMS" \n\t" | |||
| } | |||
| /** | |||
| * Height should be a multiple of 2 and width should be a multiple of 2 (if | |||
| * this is a problem for anyone then tell me, and I will fix it). | |||
| * Chrominance data is only taken from every secound line, | |||
| * Height should be a multiple of 2 and width should be a multiple of 2. | |||
| * (If this is a problem for anyone then tell me, and I will fix it.) | |||
| * Chrominance data is only taken from every second line, | |||
| * others are ignored in the C version. | |||
| * FIXME: Write HQ version. | |||
| */ | |||
| @@ -245,12 +245,12 @@ static inline void hScale_altivec_real(int16_t *dst, int dstW, uint8_t *src, int | |||
| src_v = vec_mergeh(src_v, (vector signed short)vzero); | |||
| filter_v = vec_ld(i << 3, filter); | |||
| // the 3 above is 2 (filterSize == 4) + 1 (sizeof(short) == 2) | |||
| // The 3 above is 2 (filterSize == 4) + 1 (sizeof(short) == 2). | |||
| // the neat trick : we only care for half the elements, | |||
| // The neat trick: We only care for half the elements, | |||
| // high or low depending on (i<<3)%16 (it's 0 or 8 here), | |||
| // and we're going to use vec_mule, so we chose | |||
| // carefully how to "unpack" the elements into the even slots | |||
| // and we're going to use vec_mule, so we choose | |||
| // carefully how to "unpack" the elements into the even slots. | |||
| if ((i << 3) % 16) | |||
| filter_v = vec_mergel(filter_v, (vector signed short)vzero); | |||
| else | |||
| @@ -405,12 +405,12 @@ static inline int yv12toyuy2_unscaled_altivec(SwsContext *c, uint8_t* src[], int | |||
| return srcSliceH; | |||
| } | |||
| /* this code assume: | |||
| /* This code assumes: | |||
| 1) dst is 16 bytes-aligned | |||
| 2) dstStride is a multiple of 16 | |||
| 3) width is a multiple of 16 | |||
| 4) lum&chrom stride are multiple of 8 | |||
| 4) lum & chrom stride are multiples of 8 | |||
| */ | |||
| for (y=0; y<height; y++) { | |||
| @@ -482,12 +482,12 @@ static inline int yv12touyvy_unscaled_altivec(SwsContext *c, uint8_t* src[], int | |||
| return srcSliceH; | |||
| } | |||
| /* this code assume: | |||
| /* This code assumes: | |||
| 1) dst is 16 bytes-aligned | |||
| 2) dstStride is a multiple of 16 | |||
| 3) width is a multiple of 16 | |||
| 4) lum&chrom stride are multiple of 8 | |||
| 4) lum & chrom stride are multiples of 8 | |||
| */ | |||
| for (y=0; y<height; y++) { | |||
| @@ -1,7 +1,7 @@ | |||
| /* | |||
| * Copyright (C) 2007 Marc Hoffman <marc.hoffman@analog.com> | |||
| * | |||
| * Blackfin Software Video SCALER Operations | |||
| * Blackfin software video scaler operations | |||
| * | |||
| * This file is part of FFmpeg. | |||
| * | |||
| @@ -37,7 +37,7 @@ | |||
| typedef int (*SwsFunc)(struct SwsContext *context, uint8_t* src[], int srcStride[], int srcSliceY, | |||
| int srcSliceH, uint8_t* dst[], int dstStride[]); | |||
| /* this struct should be aligned on at least 32-byte boundary */ | |||
| /* This struct should be aligned on at least a 32-byte boundary. */ | |||
| typedef struct SwsContext{ | |||
| /** | |||
| * info on struct for av_log | |||
| @@ -73,7 +73,7 @@ typedef struct SwsContext{ | |||
| int16_t *vChrFilter; | |||
| int16_t *vChrFilterPos; | |||
| uint8_t formatConvBuffer[VOF]; //FIXME dynamic alloc, but we have to change a lot of code for this to be useful | |||
| uint8_t formatConvBuffer[VOF]; //FIXME dynamic allocation, but we have to change a lot of code for this to be useful | |||
| int hLumFilterSize; | |||
| int hChrFilterSize; | |||
| @@ -122,7 +122,7 @@ typedef struct SwsContext{ | |||
| #define V_OFFSET "10*8" | |||
| #define LUM_MMX_FILTER_OFFSET "11*8" | |||
| #define CHR_MMX_FILTER_OFFSET "11*8+4*4*256" | |||
| #define DSTW_OFFSET "11*8+4*4*256*2" //do not change, it is hardcoded in the asm | |||
| #define DSTW_OFFSET "11*8+4*4*256*2" //do not change, it is hardcoded in the ASM | |||
| #define ESP_OFFSET "11*8+4*4*256*2+8" | |||
| #define VROUNDER_OFFSET "11*8+4*4*256*2+16" | |||
| #define U_TEMP "11*8+4*4*256*2+24" | |||
| @@ -17,8 +17,8 @@ | |||
| * along with FFmpeg; if not, write to the Free Software | |||
| * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA | |||
| * | |||
| * the C code (not assembly, mmx, ...) of this file can be used | |||
| * under the LGPL license too | |||
| * The C code (not assembly, MMX, ...) of this file can be used | |||
| * under the LGPL license. | |||
| */ | |||
| #undef REAL_MOVNTQ | |||
| @@ -30,7 +30,7 @@ | |||
| #undef SFENCE | |||
| #ifdef HAVE_3DNOW | |||
| /* On K6 femms is faster of emms. On K7 femms is directly mapped on emms. */ | |||
| /* On K6 femms is faster than emms. On K7 femms is directly mapped on emms. */ | |||
| #define EMMS "femms" | |||
| #else | |||
| #define EMMS "emms" | |||
| @@ -1503,7 +1503,7 @@ static inline void RENAME(yuv2packed1)(SwsContext *c, uint16_t *buf0, uint16_t * | |||
| const int yalpha1=0; | |||
| int i; | |||
| uint16_t *buf1= buf0; //FIXME needed for the rgb1/bgr1 | |||
| uint16_t *buf1= buf0; //FIXME needed for RGB1/BGR1 | |||
| const int yalpha= 4096; //FIXME ... | |||
| if (flags&SWS_FULL_CHR_H_INT) | |||
| @@ -1700,7 +1700,7 @@ static inline void RENAME(yuv2packed1)(SwsContext *c, uint16_t *buf0, uint16_t * | |||
| } | |||
| } | |||
| //FIXME yuy2* can read upto 7 samples to much | |||
| //FIXME yuy2* can read up to 7 samples too much | |||
| static inline void RENAME(yuy2ToY)(uint8_t *dst, uint8_t *src, long width) | |||
| { | |||
| @@ -2297,7 +2297,7 @@ static inline void RENAME(palToUV)(uint8_t *dstU, uint8_t *dstV, uint8_t *src1, | |||
| } | |||
| } | |||
| // Bilinear / Bicubic scaling | |||
| // bilinear / bicubic scaling | |||
| static inline void RENAME(hScale)(int16_t *dst, int dstW, uint8_t *src, int srcW, int xInc, | |||
| int16_t *filter, int16_t *filterPos, long filterSize) | |||
| { | |||
| @@ -2544,7 +2544,7 @@ static inline void RENAME(hyscale)(uint16_t *dst, long dstWidth, uint8_t *src, i | |||
| } | |||
| #ifdef HAVE_MMX | |||
| // use the new MMX scaler if the mmx2 can't be used (it is faster than the x86 ASM one) | |||
| // Use the new MMX scaler if the MMX2 one can't be used (it is faster than the x86 ASM one). | |||
| if (!(flags&SWS_FAST_BILINEAR) || (!canMMX2BeUsed)) | |||
| #else | |||
| if (!(flags&SWS_FAST_BILINEAR)) | |||
| @@ -2552,7 +2552,7 @@ static inline void RENAME(hyscale)(uint16_t *dst, long dstWidth, uint8_t *src, i | |||
| { | |||
| RENAME(hScale)(dst, dstWidth, src, srcW, xInc, hLumFilter, hLumFilterPos, hLumFilterSize); | |||
| } | |||
| else // Fast Bilinear upscale / crap downscale | |||
| else // fast bilinear upscale / crap downscale | |||
| { | |||
| #if defined(ARCH_X86) | |||
| #ifdef HAVE_MMX2 | |||
| @@ -2761,7 +2761,7 @@ inline static void RENAME(hcscale)(uint16_t *dst, long dstWidth, uint8_t *src1, | |||
| } | |||
| #ifdef HAVE_MMX | |||
| // use the new MMX scaler if the mmx2 can't be used (it is faster than the x86 ASM one) | |||
| // Use the new MMX scaler if the MMX2 one can't be used (it is faster than the x86 ASM one). | |||
| if (!(flags&SWS_FAST_BILINEAR) || (!canMMX2BeUsed)) | |||
| #else | |||
| if (!(flags&SWS_FAST_BILINEAR)) | |||
| @@ -2770,7 +2770,7 @@ inline static void RENAME(hcscale)(uint16_t *dst, long dstWidth, uint8_t *src1, | |||
| RENAME(hScale)(dst , dstWidth, src1, srcW, xInc, hChrFilter, hChrFilterPos, hChrFilterSize); | |||
| RENAME(hScale)(dst+VOFW, dstWidth, src2, srcW, xInc, hChrFilter, hChrFilterPos, hChrFilterSize); | |||
| } | |||
| else // Fast Bilinear upscale / crap downscale | |||
| else // fast bilinear upscale / crap downscale | |||
| { | |||
| #if defined(ARCH_X86) | |||
| #ifdef HAVE_MMX2 | |||
| @@ -2890,8 +2890,8 @@ FUNNY_UV_CODE | |||
| "cmp %2, %%"REG_a" \n\t" | |||
| " jb 1b \n\t" | |||
| /* GCC-3.3 makes MPlayer crash on IA-32 machines when using "g" operand here, | |||
| which is needed to support GCC-4.0 */ | |||
| /* GCC 3.3 makes MPlayer crash on IA-32 machines when using "g" operand here, | |||
| which is needed to support GCC 4.0. */ | |||
| #if defined(ARCH_X86_64) && ((__GNUC__ > 3) || (__GNUC__ == 3 && __GNUC_MINOR__ >= 4)) | |||
| :: "m" (src1), "m" (dst), "g" ((long)dstWidth), "m" (xInc_shr16), "m" (xInc_mask), | |||
| #else | |||
| @@ -2963,7 +2963,7 @@ static int RENAME(swScale)(SwsContext *c, uint8_t* src[], int srcStride[], int s | |||
| int lastDstY; | |||
| uint8_t *pal=NULL; | |||
| /* vars whch will change and which we need to storw back in the context */ | |||
| /* vars which will change and which we need to store back in the context */ | |||
| int dstY= c->dstY; | |||
| int lumBufIndex= c->lumBufIndex; | |||
| int chrBufIndex= c->chrBufIndex; | |||
| @@ -3004,13 +3004,14 @@ static int RENAME(swScale)(SwsContext *c, uint8_t* src[], int srcStride[], int s | |||
| if (flags & SWS_PRINT_INFO && firstTime) | |||
| { | |||
| av_log(c, AV_LOG_WARNING, "Warning: dstStride is not aligned!\n" | |||
| " ->cannot do aligned memory acesses anymore\n"); | |||
| " ->cannot do aligned memory accesses anymore\n"); | |||
| firstTime=0; | |||
| } | |||
| } | |||
| /* Note the user might start scaling the picture in the middle so this will not get executed | |||
| this is not really intended but works currently, so ppl might do it */ | |||
| /* Note the user might start scaling the picture in the middle so this | |||
| will not get executed. This is not really intended but works | |||
| currently, so people might do it. */ | |||
| if (srcSliceY ==0){ | |||
| lumBufIndex=0; | |||
| chrBufIndex=0; | |||
| @@ -3182,7 +3183,7 @@ static int RENAME(swScale)(SwsContext *c, uint8_t* src[], int srcStride[], int s | |||
| { | |||
| const int chrSkipMask= (1<<c->chrDstVSubSample)-1; | |||
| if ((dstY&chrSkipMask) || isGray(dstFormat)) uDest=vDest= NULL; //FIXME split functions in lumi / chromi | |||
| if (vLumFilterSize == 1 && vChrFilterSize == 1) // Unscaled YV12 | |||
| if (vLumFilterSize == 1 && vChrFilterSize == 1) // unscaled YV12 | |||
| { | |||
| int16_t *lumBuf = lumPixBuf[0]; | |||
| int16_t *chrBuf= chrPixBuf[0]; | |||
| @@ -3200,13 +3201,13 @@ static int RENAME(swScale)(SwsContext *c, uint8_t* src[], int srcStride[], int s | |||
| { | |||
| ASSERT(lumSrcPtr + vLumFilterSize - 1 < lumPixBuf + vLumBufSize*2); | |||
| ASSERT(chrSrcPtr + vChrFilterSize - 1 < chrPixBuf + vChrBufSize*2); | |||
| if (vLumFilterSize == 1 && vChrFilterSize == 2) //Unscaled RGB | |||
| if (vLumFilterSize == 1 && vChrFilterSize == 2) //unscaled RGB | |||
| { | |||
| int chrAlpha= vChrFilter[2*dstY+1]; | |||
| RENAME(yuv2packed1)(c, *lumSrcPtr, *chrSrcPtr, *(chrSrcPtr+1), | |||
| dest, dstW, chrAlpha, dstFormat, flags, dstY); | |||
| } | |||
| else if (vLumFilterSize == 2 && vChrFilterSize == 2) //BiLinear Upscale RGB | |||
| else if (vLumFilterSize == 2 && vChrFilterSize == 2) //bilinear upscale RGB | |||
| { | |||
| int lumAlpha= vLumFilter[2*dstY+1]; | |||
| int chrAlpha= vChrFilter[2*dstY+1]; | |||
| @@ -3217,7 +3218,7 @@ static int RENAME(swScale)(SwsContext *c, uint8_t* src[], int srcStride[], int s | |||
| RENAME(yuv2packed2)(c, *lumSrcPtr, *(lumSrcPtr+1), *chrSrcPtr, *(chrSrcPtr+1), | |||
| dest, dstW, lumAlpha, chrAlpha, dstY); | |||
| } | |||
| else //General RGB | |||
| else //general RGB | |||
| { | |||
| RENAME(yuv2packedX)(c, | |||
| vLumFilter+dstY*vLumFilterSize, lumSrcPtr, vLumFilterSize, | |||
| @@ -39,7 +39,7 @@ | |||
| #include "swscale.h" | |||
| #include "swscale_internal.h" | |||
| #define DITHER1XBPP // only for mmx | |||
| #define DITHER1XBPP // only for MMX | |||
| const uint8_t __attribute__((aligned(8))) dither_2x2_4[2][8]={ | |||
| { 1, 3, 1, 3, 1, 3, 1, 3, }, | |||
| @@ -155,8 +155,8 @@ DECLARE_ASM_CONST(8, uint64_t, mmx_00ffw) = 0x00ff00ff00ff00ffULL; | |||
| DECLARE_ASM_CONST(8, uint64_t, mmx_redmask) = 0xf8f8f8f8f8f8f8f8ULL; | |||
| DECLARE_ASM_CONST(8, uint64_t, mmx_grnmask) = 0xfcfcfcfcfcfcfcfcULL; | |||
| // the volatile is required because gcc otherwise optimizes some writes away not knowing that these | |||
| // are read in the asm block | |||
| // The volatile is required because gcc otherwise optimizes some writes away | |||
| // not knowing that these are read in the ASM block. | |||
| static volatile uint64_t attribute_used __attribute__((aligned(8))) b5Dither; | |||
| static volatile uint64_t attribute_used __attribute__((aligned(8))) g5Dither; | |||
| static volatile uint64_t attribute_used __attribute__((aligned(8))) g6Dither; | |||
| @@ -641,7 +641,7 @@ SwsFunc yuv2rgb_get_func_ptr (SwsContext *c) | |||
| } | |||
| #endif | |||
| av_log(c, AV_LOG_WARNING, "No accelerated colorspace conversion found\n"); | |||
| av_log(c, AV_LOG_WARNING, "No accelerated colorspace conversion found.\n"); | |||
| switch(c->dstFormat){ | |||
| case PIX_FMT_BGR32: | |||
| @@ -21,63 +21,68 @@ | |||
| */ | |||
| /* | |||
| convert I420 YV12 to RGB in various formats, | |||
| it rejects images that are not in 420 formats | |||
| it rejects images that don't have widths of multiples of 16 | |||
| it rejects images that don't have heights of multiples of 2 | |||
| reject defers to C simulation codes. | |||
| Convert I420 YV12 to RGB in various formats, | |||
| it rejects images that are not in 420 formats, | |||
| it rejects images that don't have widths of multiples of 16, | |||
| it rejects images that don't have heights of multiples of 2. | |||
| Reject defers to C simulation code. | |||
| lots of optimizations to be done here | |||
| Lots of optimizations to be done here. | |||
| 1. need to fix saturation code, I just couldn't get it to fly with packs and adds. | |||
| so we currently use max min to clip | |||
| 1. Need to fix saturation code. I just couldn't get it to fly with packs | |||
| and adds, so we currently use max/min to clip. | |||
| 2. the inefficient use of chroma loading needs a bit of brushing up | |||
| 2. The inefficient use of chroma loading needs a bit of brushing up. | |||
| 3. analysis of pipeline stalls needs to be done, use shark to identify pipeline stalls | |||
| 3. Analysis of pipeline stalls needs to be done. Use shark to identify | |||
| pipeline stalls. | |||
| MODIFIED to calculate coeffs from currently selected color space. | |||
| MODIFIED core to be a macro which you spec the output format. | |||
| ADDED UYVY conversion which is never called due to some thing in SWSCALE. | |||
| MODIFIED core to be a macro where you specify the output format. | |||
| ADDED UYVY conversion which is never called due to some thing in swscale. | |||
| CORRECTED algorithim selection to be strict on input formats. | |||
| ADDED runtime detection of altivec. | |||
| ADDED runtime detection of AltiVec. | |||
| ADDED altivec_yuv2packedX vertical scl + RGB converter | |||
| March 27,2004 | |||
| PERFORMANCE ANALYSIS | |||
| The C version use 25% of the processor or ~250Mips for D1 video rawvideo used as test | |||
| The ALTIVEC version uses 10% of the processor or ~100Mips for D1 video same sequence | |||
| The C version uses 25% of the processor or ~250Mips for D1 video rawvideo | |||
| used as test. | |||
| The AltiVec version uses 10% of the processor or ~100Mips for D1 video | |||
| same sequence. | |||
| 720*480*30 ~10MPS | |||
| 720 * 480 * 30 ~10MPS | |||
| so we have roughly 10clocks per pixel this is too high something has to be wrong. | |||
| so we have roughly 10 clocks per pixel. This is too high, something has | |||
| to be wrong. | |||
| OPTIMIZED clip codes to utilize vec_max and vec_packs removing the need for vec_min. | |||
| OPTIMIZED clip codes to utilize vec_max and vec_packs removing the | |||
| need for vec_min. | |||
| OPTIMIZED DST OUTPUT cache/dma controls. we are pretty much | |||
| guaranteed to have the input video frame it was just decompressed so | |||
| it probably resides in L1 caches. However we are creating the | |||
| output video stream this needs to use the DSTST instruction to | |||
| optimize for the cache. We couple this with the fact that we are | |||
| not going to be visiting the input buffer again so we mark it Least | |||
| Recently Used. This shaves 25% of the processor cycles off. | |||
| OPTIMIZED DST OUTPUT cache/DMA controls. We are pretty much guaranteed to have | |||
| the input video frame, it was just decompressed so it probably resides in L1 | |||
| caches. However, we are creating the output video stream. This needs to use the | |||
| DSTST instruction to optimize for the cache. We couple this with the fact that | |||
| we are not going to be visiting the input buffer again so we mark it Least | |||
| Recently Used. This shaves 25% of the processor cycles off. | |||
| Now MEMCPY is the largest mips consumer in the system, probably due | |||
| Now memcpy is the largest mips consumer in the system, probably due | |||
| to the inefficient X11 stuff. | |||
| GL libraries seem to be very slow on this machine 1.33Ghz PB running | |||
| Jaguar, this is not the case for my 1Ghz PB. I thought it might be | |||
| a versioning issues, however I have libGL.1.2.dylib for both | |||
| machines. ((We need to figure this out now)) | |||
| a versioning issue, however I have libGL.1.2.dylib for both | |||
| machines. (We need to figure this out now.) | |||
| GL2 libraries work now with patch for RGB32 | |||
| GL2 libraries work now with patch for RGB32. | |||
| NOTE quartz vo driver ARGB32_to_RGB24 consumes 30% of the processor | |||
| NOTE: quartz vo driver ARGB32_to_RGB24 consumes 30% of the processor. | |||
| Integrated luma prescaling adjustment for saturation/contrast/brightness adjustment. | |||
| Integrated luma prescaling adjustment for saturation/contrast/brightness | |||
| adjustment. | |||
| */ | |||
| #include <stdio.h> | |||
| @@ -1,9 +1,8 @@ | |||
| /* | |||
| * Copyright (C) 2007 Marc Hoffman <marc.hoffman@analog.com> | |||
| * April 20, 2007 | |||
| * | |||
| * Blackfin Video Color Space Converters Operations | |||
| * convert I420 YV12 to RGB in various formats, | |||
| * Blackfin video color space converter operations | |||
| * convert I420 YV12 to RGB in various formats | |||
| * | |||
| * This file is part of FFmpeg. | |||
| * | |||
| @@ -200,7 +199,7 @@ SwsFunc ff_bfin_yuv2rgb_get_func_ptr (SwsContext *c) | |||
| return 0; | |||
| } | |||
| av_log(c, AV_LOG_INFO, "BlackFin Accelerated Color Space Converter %s\n", | |||
| av_log(c, AV_LOG_INFO, "BlackFin accelerated color space converter %s\n", | |||
| sws_format_name (c->dstFormat)); | |||
| return f; | |||
| @@ -1,5 +1,6 @@ | |||
| /* | |||
| * yuv2rgb_mlib.c, Software YUV to RGB converter using mediaLib | |||
| * software YUV to RGB converter using mediaLib | |||
| * | |||
| * Copyright (C) 2003 Michael Niedermayer <michaelni@gmx.at> | |||
| * | |||
| * This file is part of FFmpeg. | |||
| @@ -1,5 +1,5 @@ | |||
| /* | |||
| * yuv2rgb_mmx.c, Software YUV to RGB converter with Intel MMX "technology" | |||
| * yuv2rgb_mmx.c, software YUV to RGB converter with Intel MMX "technology" | |||
| * | |||
| * Copyright (C) 2000, Silicon Integrated System Corp. | |||
| * | |||
| @@ -31,7 +31,7 @@ | |||
| #undef SFENCE | |||
| #ifdef HAVE_3DNOW | |||
| /* On K6 femms is faster of emms. On K7 femms is directly mapped on emms. */ | |||
| /* On K6 femms is faster than emms. On K7 femms is directly mapped on emms. */ | |||
| #define EMMS "femms" | |||
| #else | |||
| #define EMMS "emms" | |||
| @@ -147,8 +147,8 @@ static inline int RENAME(yuv420_rgb16)(SwsContext *c, uint8_t* src[], int srcStr | |||
| g6Dither= ff_dither4[y&1]; | |||
| g5Dither= ff_dither8[y&1]; | |||
| r5Dither= ff_dither8[(y+1)&1]; | |||
| /* this mmx assembly code deals with SINGLE scan line at a time, it convert 8 | |||
| pixels in each iteration */ | |||
| /* This MMX assembly code deals with a SINGLE scan line at a time, | |||
| * it converts 8 pixels in each iteration. */ | |||
| asm volatile ( | |||
| /* load data for start of next scan line */ | |||
| "movd (%2, %0), %%mm0;" /* Load 4 Cb 00 00 00 00 u3 u2 u1 u0 */ | |||
| @@ -156,8 +156,8 @@ static inline int RENAME(yuv420_rgb16)(SwsContext *c, uint8_t* src[], int srcStr | |||
| "movq (%5, %0, 2), %%mm6;" /* Load 8 Y Y7 Y6 Y5 Y4 Y3 Y2 Y1 Y0 */ | |||
| //".balign 16 \n\t" | |||
| "1: \n\t" | |||
| /* no speed diference on my p3@500 with prefetch, | |||
| * if it is faster for anyone with -benchmark then tell me | |||
| /* No speed difference on my p3@500 with prefetch, | |||
| * if it is faster for anyone with -benchmark then tell me. | |||
| PREFETCH" 64(%0) \n\t" | |||
| PREFETCH" 64(%1) \n\t" | |||
| PREFETCH" 64(%2) \n\t" | |||
| @@ -180,7 +180,7 @@ YUV2RGB | |||
| "movq %%mm0, %%mm5;" /* Copy B7-B0 */ | |||
| "movq %%mm2, %%mm7;" /* Copy G7-G0 */ | |||
| /* convert rgb24 plane to rgb16 pack for pixel 0-3 */ | |||
| /* convert RGB24 plane to RGB16 pack for pixel 0-3 */ | |||
| "punpcklbw %%mm4, %%mm2;" /* 0_0_0_0 0_0_0_0 g7g6g5g4 g3g2_0_0 */ | |||
| "punpcklbw %%mm1, %%mm0;" /* r7r6r5r4 r3_0_0_0 0_0_0_b7 b6b5b4b3 */ | |||
| @@ -190,7 +190,7 @@ YUV2RGB | |||
| "movq 8 (%5, %0, 2), %%mm6;" /* Load 8 Y Y7 Y6 Y5 Y4 Y3 Y2 Y1 Y0 */ | |||
| MOVNTQ " %%mm0, (%1);" /* store pixel 0-3 */ | |||
| /* convert rgb24 plane to rgb16 pack for pixel 0-3 */ | |||
| /* convert RGB24 plane to RGB16 pack for pixel 0-3 */ | |||
| "punpckhbw %%mm4, %%mm7;" /* 0_0_0_0 0_0_0_0 g7g6g5g4 g3g2_0_0 */ | |||
| "punpckhbw %%mm1, %%mm5;" /* r7r6r5r4 r3_0_0_0 0_0_0_b7 b6b5b4b3 */ | |||
| @@ -242,8 +242,8 @@ static inline int RENAME(yuv420_rgb15)(SwsContext *c, uint8_t* src[], int srcStr | |||
| g6Dither= ff_dither4[y&1]; | |||
| g5Dither= ff_dither8[y&1]; | |||
| r5Dither= ff_dither8[(y+1)&1]; | |||
| /* this mmx assembly code deals with SINGLE scan line at a time, it convert 8 | |||
| pixels in each iteration */ | |||
| /* This MMX assembly code deals with a SINGLE scan line at a time, | |||
| * it converts 8 pixels in each iteration. */ | |||
| asm volatile ( | |||
| /* load data for start of next scan line */ | |||
| "movd (%2, %0), %%mm0;" /* Load 4 Cb 00 00 00 00 u3 u2 u1 u0 */ | |||
| @@ -271,7 +271,7 @@ YUV2RGB | |||
| "movq %%mm0, %%mm5;" /* Copy B7-B0 */ | |||
| "movq %%mm2, %%mm7;" /* Copy G7-G0 */ | |||
| /* convert rgb24 plane to rgb16 pack for pixel 0-3 */ | |||
| /* convert RGB24 plane to RGB16 pack for pixel 0-3 */ | |||
| "punpcklbw %%mm4, %%mm2;" /* 0_0_0_0 0_0_0_0 g7g6g5g4 g3_0_0_0 */ | |||
| "punpcklbw %%mm1, %%mm0;" /* r7r6r5r4 r3_0_0_0 0_0_0_b7 b6b5b4b3 */ | |||
| @@ -281,7 +281,7 @@ YUV2RGB | |||
| "movq 8 (%5, %0, 2), %%mm6;" /* Load 8 Y Y7 Y6 Y5 Y4 Y3 Y2 Y1 Y0 */ | |||
| MOVNTQ " %%mm0, (%1);" /* store pixel 0-3 */ | |||
| /* convert rgb24 plane to rgb16 pack for pixel 0-3 */ | |||
| /* convert RGB24 plane to RGB16 pack for pixel 0-3 */ | |||
| "punpckhbw %%mm4, %%mm7;" /* 0_0_0_0 0_0_0_0 0_g7g6g5 g4g3_0_0 */ | |||
| "punpckhbw %%mm1, %%mm5;" /* r7r6r5r4 r3_0_0_0 0_0_0_b7 b6b5b4b3 */ | |||
| @@ -326,8 +326,8 @@ static inline int RENAME(yuv420_rgb24)(SwsContext *c, uint8_t* src[], int srcStr | |||
| uint8_t *pv = src[2] + (y>>1)*srcStride[2]; | |||
| long index= -h_size/2; | |||
| /* this mmx assembly code deals with SINGLE scan line at a time, it convert 8 | |||
| pixels in each iteration */ | |||
| /* This MMX assembly code deals with a SINGLE scan line at a time, | |||
| * it converts 8 pixels in each iteration. */ | |||
| asm volatile ( | |||
| /* load data for start of next scan line */ | |||
| "movd (%2, %0), %%mm0;" /* Load 4 Cb 00 00 00 00 u3 u2 u1 u0 */ | |||
| @@ -472,8 +472,8 @@ static inline int RENAME(yuv420_rgb32)(SwsContext *c, uint8_t* src[], int srcStr | |||
| uint8_t *pv = src[2] + (y>>1)*srcStride[2]; | |||
| long index= -h_size/2; | |||
| /* this mmx assembly code deals with SINGLE scan line at a time, it convert 8 | |||
| pixels in each iteration */ | |||
| /* This MMX assembly code deals with a SINGLE scan line at a time, | |||
| * it converts 8 pixels in each iteration. */ | |||
| asm volatile ( | |||
| /* load data for start of next scan line */ | |||
| "movd (%2, %0), %%mm0;" /* Load 4 Cb 00 00 00 00 u3 u2 u1 u0 */ | |||