Originally committed as revision 3144 to svn://svn.mplayerhq.hu/mplayer/trunk/postproctags/v0.5
@@ -6,10 +6,6 @@ LIBNAME = libpostproc.a | |||||
SRCS=postprocess.c swscale.c rgb2rgb.c yuv2rgb.c | SRCS=postprocess.c swscale.c rgb2rgb.c yuv2rgb.c | ||||
OBJS=$(SRCS:.c=.o) | OBJS=$(SRCS:.c=.o) | ||||
ifeq ($(TARGET_ARCH_X86),yes) | |||||
SRCS += yuv2rgb_mmx.c | |||||
endif | |||||
CFLAGS = $(OPTFLAGS) $(MLIB_INC) -I. -I.. -Wall $(EXTRA_INC) | CFLAGS = $(OPTFLAGS) $(MLIB_INC) -I. -I.. -Wall $(EXTRA_INC) | ||||
# -I/usr/X11R6/include/ | # -I/usr/X11R6/include/ | ||||
@@ -25,6 +25,7 @@ | |||||
* along with GNU Make; see the file COPYING. If not, write to | * along with GNU Make; see the file COPYING. If not, write to | ||||
* the Free Software Foundation, 675 Mass Ave, Cambridge, MA 02139, USA. | * the Free Software Foundation, 675 Mass Ave, Cambridge, MA 02139, USA. | ||||
* | * | ||||
* MMX/MMX2 Template stuff from Michael Niedermayer (michaelni@gmx.at) (needed for fast movntq support) | |||||
*/ | */ | ||||
#include <stdio.h> | #include <stdio.h> | ||||
@@ -34,12 +35,77 @@ | |||||
#include "config.h" | #include "config.h" | ||||
//#include "video_out.h" | //#include "video_out.h" | ||||
#include "rgb2rgb.h" | #include "rgb2rgb.h" | ||||
#include "../cpudetect.h" | |||||
#ifdef HAVE_MLIB | #ifdef HAVE_MLIB | ||||
#include "yuv2rgb_mlib.c" | #include "yuv2rgb_mlib.c" | ||||
#endif | #endif | ||||
extern yuv2rgb_fun yuv2rgb_init_mmx (int bpp, int mode); | |||||
#define DITHER1XBPP // only for mmx | |||||
#ifdef ARCH_X86 | |||||
#define CAN_COMPILE_X86_ASM | |||||
#endif | |||||
#ifdef CAN_COMPILE_X86_ASM | |||||
/* hope these constant values are cache line aligned */ | |||||
uint64_t __attribute__((aligned(8))) mmx_80w = 0x0080008000800080; | |||||
uint64_t __attribute__((aligned(8))) mmx_10w = 0x1010101010101010; | |||||
uint64_t __attribute__((aligned(8))) mmx_00ffw = 0x00ff00ff00ff00ff; | |||||
uint64_t __attribute__((aligned(8))) mmx_Y_coeff = 0x253f253f253f253f; | |||||
/* hope these constant values are cache line aligned */ | |||||
uint64_t __attribute__((aligned(8))) mmx_U_green = 0xf37df37df37df37d; | |||||
uint64_t __attribute__((aligned(8))) mmx_U_blue = 0x4093409340934093; | |||||
uint64_t __attribute__((aligned(8))) mmx_V_red = 0x3312331233123312; | |||||
uint64_t __attribute__((aligned(8))) mmx_V_green = 0xe5fce5fce5fce5fc; | |||||
/* hope these constant values are cache line aligned */ | |||||
uint64_t __attribute__((aligned(8))) mmx_redmask = 0xf8f8f8f8f8f8f8f8; | |||||
uint64_t __attribute__((aligned(8))) mmx_grnmask = 0xfcfcfcfcfcfcfcfc; | |||||
uint64_t __attribute__((aligned(8))) M24A= 0x00FF0000FF0000FFLL; | |||||
uint64_t __attribute__((aligned(8))) M24B= 0xFF0000FF0000FF00LL; | |||||
uint64_t __attribute__((aligned(8))) M24C= 0x0000FF0000FF0000LL; | |||||
// the volatile is required because gcc otherwise optimizes some writes away not knowing that these | |||||
// are read in the asm block | |||||
volatile uint64_t __attribute__((aligned(8))) b5Dither; | |||||
volatile uint64_t __attribute__((aligned(8))) g5Dither; | |||||
volatile uint64_t __attribute__((aligned(8))) g6Dither; | |||||
volatile uint64_t __attribute__((aligned(8))) r5Dither; | |||||
uint64_t __attribute__((aligned(8))) dither4[2]={ | |||||
0x0103010301030103LL, | |||||
0x0200020002000200LL,}; | |||||
uint64_t __attribute__((aligned(8))) dither8[2]={ | |||||
0x0602060206020602LL, | |||||
0x0004000400040004LL,}; | |||||
#undef HAVE_MMX | |||||
#undef ARCH_X86 | |||||
//MMX versions | |||||
#undef RENAME | |||||
#define HAVE_MMX | |||||
#undef HAVE_MMX2 | |||||
#undef HAVE_3DNOW | |||||
#define ARCH_X86 | |||||
#define RENAME(a) a ## _MMX | |||||
#include "yuv2rgb_template.c" | |||||
//MMX2 versions | |||||
#undef RENAME | |||||
#define HAVE_MMX | |||||
#define HAVE_MMX2 | |||||
#undef HAVE_3DNOW | |||||
#define ARCH_X86 | |||||
#define RENAME(a) a ## _MMX2 | |||||
#include "yuv2rgb_template.c" | |||||
#endif // CAN_COMPILE_X86_ASM | |||||
uint32_t matrix_coefficients = 6; | uint32_t matrix_coefficients = 6; | ||||
@@ -63,10 +129,10 @@ static void (* yuv2rgb_c_internal) (uint8_t *, uint8_t *, | |||||
uint8_t *, uint8_t *, | uint8_t *, uint8_t *, | ||||
void *, void *, int); | void *, void *, int); | ||||
static void yuv2rgb_c (void * dst, uint8_t * py, | |||||
uint8_t * pu, uint8_t * pv, | |||||
int h_size, int v_size, | |||||
int rgb_stride, int y_stride, int uv_stride) | |||||
static void yuv2rgb_c (void * dst, uint8_t * py, | |||||
uint8_t * pu, uint8_t * pv, | |||||
int h_size, int v_size, | |||||
int rgb_stride, int y_stride, int uv_stride) | |||||
{ | { | ||||
v_size >>= 1; | v_size >>= 1; | ||||
@@ -81,16 +147,29 @@ static void yuv2rgb_c (void * dst, uint8_t * py, | |||||
} | } | ||||
} | } | ||||
void yuv2rgb_init (int bpp, int mode) | |||||
void yuv2rgb_init (int bpp, int mode) | |||||
{ | { | ||||
yuv2rgb = NULL; | yuv2rgb = NULL; | ||||
#ifdef HAVE_MMX | |||||
if (yuv2rgb == NULL /*&& (config.flags & VO_MMX_ENABLE)*/) { | |||||
yuv2rgb = yuv2rgb_init_mmx (bpp, mode); | |||||
if (yuv2rgb != NULL) | |||||
printf ("Using MMX for colorspace transform\n"); | |||||
else | |||||
printf ("Cannot init MMX colorspace transform\n"); | |||||
#ifdef CAN_COMPILE_X86_ASM | |||||
if(gCpuCaps.hasMMX2) | |||||
{ | |||||
if (yuv2rgb == NULL /*&& (config.flags & VO_MMX_ENABLE)*/) { | |||||
yuv2rgb = yuv2rgb_init_MMX2 (bpp, mode); | |||||
if (yuv2rgb != NULL) | |||||
printf ("Using MMX2 for colorspace transform\n"); | |||||
else | |||||
printf ("Cannot init MMX2 colorspace transform\n"); | |||||
} | |||||
} | |||||
else if(gCpuCaps.hasMMX) | |||||
{ | |||||
if (yuv2rgb == NULL /*&& (config.flags & VO_MMX_ENABLE)*/) { | |||||
yuv2rgb = yuv2rgb_init_MMX (bpp, mode); | |||||
if (yuv2rgb != NULL) | |||||
printf ("Using MMX for colorspace transform\n"); | |||||
else | |||||
printf ("Cannot init MMX colorspace transform\n"); | |||||
} | |||||
} | } | ||||
#endif | #endif | ||||
#ifdef HAVE_MLIB | #ifdef HAVE_MLIB | ||||
@@ -24,58 +24,27 @@ | |||||
* the Free Software Foundation, 675 Mass Ave, Cambridge, MA 02139, USA. | * the Free Software Foundation, 675 Mass Ave, Cambridge, MA 02139, USA. | ||||
* | * | ||||
* 15,24 bpp and dithering from Michael Niedermayer (michaelni@gmx.at) | * 15,24 bpp and dithering from Michael Niedermayer (michaelni@gmx.at) | ||||
* MMX/MMX2 Template stuff from Michael Niedermayer (needed for fast movntq support) | |||||
*/ | */ | ||||
#include <stdio.h> | |||||
#include <stdlib.h> | |||||
#include "../config.h" | |||||
//#include "libmpeg2/mpeg2.h" | |||||
//#include "libmpeg2/mpeg2_internal.h" | |||||
#include <inttypes.h> | |||||
#include "rgb2rgb.h" | |||||
#include "../mmx_defs.h" | |||||
#define DITHER1XBPP | |||||
/* hope these constant values are cache line aligned */ | |||||
uint64_t __attribute__((aligned(8))) mmx_80w = 0x0080008000800080; | |||||
uint64_t __attribute__((aligned(8))) mmx_10w = 0x1010101010101010; | |||||
uint64_t __attribute__((aligned(8))) mmx_00ffw = 0x00ff00ff00ff00ff; | |||||
uint64_t __attribute__((aligned(8))) mmx_Y_coeff = 0x253f253f253f253f; | |||||
/* hope these constant values are cache line aligned */ | |||||
uint64_t __attribute__((aligned(8))) mmx_U_green = 0xf37df37df37df37d; | |||||
uint64_t __attribute__((aligned(8))) mmx_U_blue = 0x4093409340934093; | |||||
uint64_t __attribute__((aligned(8))) mmx_V_red = 0x3312331233123312; | |||||
uint64_t __attribute__((aligned(8))) mmx_V_green = 0xe5fce5fce5fce5fc; | |||||
/* hope these constant values are cache line aligned */ | |||||
uint64_t __attribute__((aligned(8))) mmx_redmask = 0xf8f8f8f8f8f8f8f8; | |||||
uint64_t __attribute__((aligned(8))) mmx_grnmask = 0xfcfcfcfcfcfcfcfc; | |||||
uint64_t __attribute__((aligned(8))) M24A= 0x00FF0000FF0000FFLL; | |||||
uint64_t __attribute__((aligned(8))) M24B= 0xFF0000FF0000FF00LL; | |||||
uint64_t __attribute__((aligned(8))) M24C= 0x0000FF0000FF0000LL; | |||||
// the volatile is required because gcc otherwise optimizes some writes away not knowing that these | |||||
// are read in the asm block | |||||
volatile uint64_t __attribute__((aligned(8))) b5Dither; | |||||
volatile uint64_t __attribute__((aligned(8))) g5Dither; | |||||
volatile uint64_t __attribute__((aligned(8))) g6Dither; | |||||
volatile uint64_t __attribute__((aligned(8))) r5Dither; | |||||
uint64_t __attribute__((aligned(8))) dither4[2]={ | |||||
0x0103010301030103LL, | |||||
0x0200020002000200LL,}; | |||||
uint64_t __attribute__((aligned(8))) dither8[2]={ | |||||
0x0602060206020602LL, | |||||
0x0004000400040004LL,}; | |||||
#undef MOVNTQ | |||||
#undef EMMS | |||||
#undef SFENCE | |||||
#ifdef HAVE_3DNOW | |||||
/* On K6 femms is faster of emms. On K7 femms is directly mapped on emms. */ | |||||
#define EMMS "femms" | |||||
#else | |||||
#define EMMS "emms" | |||||
#endif | |||||
#ifdef HAVE_MMX2 | |||||
#define MOVNTQ "movntq" | |||||
#define SFENCE "sfence" | |||||
#else | |||||
#define MOVNTQ "movq" | |||||
#define SFENCE "/nop" | |||||
#endif | |||||
#define YUV2RGB \ | #define YUV2RGB \ | ||||
/* Do the multiply part of the conversion for even and odd pixels, | /* Do the multiply part of the conversion for even and odd pixels, | ||||
@@ -152,7 +121,7 @@ uint64_t __attribute__((aligned(8))) dither8[2]={ | |||||
"punpcklbw %%mm5, %%mm2;" /* G7 G6 G5 G4 G3 G2 G1 G0 */\ | "punpcklbw %%mm5, %%mm2;" /* G7 G6 G5 G4 G3 G2 G1 G0 */\ | ||||
static void yuv420_rgb16_mmx (uint8_t * image, uint8_t * py, | |||||
static inline void RENAME(yuv420_rgb16) (uint8_t * image, uint8_t * py, | |||||
uint8_t * pu, uint8_t * pv, | uint8_t * pu, uint8_t * pv, | ||||
int h_size, int v_size, | int h_size, int v_size, | ||||
int rgb_stride, int y_stride, int uv_stride) | int rgb_stride, int y_stride, int uv_stride) | ||||
@@ -253,7 +222,7 @@ YUV2RGB | |||||
__asm__ __volatile__ (EMMS); | __asm__ __volatile__ (EMMS); | ||||
} | } | ||||
static void yuv420_rgb15_mmx (uint8_t * image, uint8_t * py, | |||||
static inline void RENAME(yuv420_rgb15) (uint8_t * image, uint8_t * py, | |||||
uint8_t * pu, uint8_t * pv, | uint8_t * pu, uint8_t * pv, | ||||
int h_size, int v_size, | int h_size, int v_size, | ||||
int rgb_stride, int y_stride, int uv_stride) | int rgb_stride, int y_stride, int uv_stride) | ||||
@@ -350,7 +319,7 @@ YUV2RGB | |||||
__asm__ __volatile__ (EMMS); | __asm__ __volatile__ (EMMS); | ||||
} | } | ||||
static void yuv420_rgb24_mmx (uint8_t * image, uint8_t * py, | |||||
static inline void RENAME(yuv420_rgb24) (uint8_t * image, uint8_t * py, | |||||
uint8_t * pu, uint8_t * pv, | uint8_t * pu, uint8_t * pv, | ||||
int h_size, int v_size, | int h_size, int v_size, | ||||
int rgb_stride, int y_stride, int uv_stride) | int rgb_stride, int y_stride, int uv_stride) | ||||
@@ -505,7 +474,7 @@ YUV2RGB | |||||
} | } | ||||
static void yuv420_argb32_mmx (uint8_t * image, uint8_t * py, | |||||
static inline void RENAME(yuv420_argb32) (uint8_t * image, uint8_t * py, | |||||
uint8_t * pu, uint8_t * pv, | uint8_t * pu, uint8_t * pv, | ||||
int h_size, int v_size, | int h_size, int v_size, | ||||
int rgb_stride, int y_stride, int uv_stride) | int rgb_stride, int y_stride, int uv_stride) | ||||
@@ -599,12 +568,12 @@ YUV2RGB | |||||
__asm__ __volatile__ (EMMS); | __asm__ __volatile__ (EMMS); | ||||
} | } | ||||
yuv2rgb_fun yuv2rgb_init_mmx (int bpp, int mode) | |||||
yuv2rgb_fun RENAME(yuv2rgb_init) (int bpp, int mode) | |||||
{ | { | ||||
if (bpp == 15 && mode == MODE_RGB) return yuv420_rgb15_mmx; | |||||
if (bpp == 16 && mode == MODE_RGB) return yuv420_rgb16_mmx; | |||||
if (bpp == 24 && mode == MODE_RGB) return yuv420_rgb24_mmx; | |||||
if (bpp == 32 && mode == MODE_RGB) return yuv420_argb32_mmx; | |||||
if (bpp == 15 && mode == MODE_RGB) return RENAME(yuv420_rgb15); | |||||
if (bpp == 16 && mode == MODE_RGB) return RENAME(yuv420_rgb16); | |||||
if (bpp == 24 && mode == MODE_RGB) return RENAME(yuv420_rgb24); | |||||
if (bpp == 32 && mode == MODE_RGB) return RENAME(yuv420_argb32); | |||||
return NULL; // Fallback to C. | return NULL; // Fallback to C. | ||||
} | } | ||||