| @@ -253,7 +253,7 @@ Optimization guide for ARM11 (used in Nokia N800 Internet Tablet): | |||||
| http://infocenter.arm.com/help/topic/com.arm.doc.ddi0211j/DDI0211J_arm1136_r1p5_trm.pdf | http://infocenter.arm.com/help/topic/com.arm.doc.ddi0211j/DDI0211J_arm1136_r1p5_trm.pdf | ||||
| Optimization guide for Intel XScale (used in Sharp Zaurus PDA): | Optimization guide for Intel XScale (used in Sharp Zaurus PDA): | ||||
| http://download.intel.com/design/intelxscale/27347302.pdf | http://download.intel.com/design/intelxscale/27347302.pdf | ||||
| Intel Wireless MMX2 Coprocessor: Programmers Reference Manual | |||||
| Intel Wireless MMX 2 Coprocessor: Programmers Reference Manual | |||||
| http://download.intel.com/design/intelxscale/31451001.pdf | http://download.intel.com/design/intelxscale/31451001.pdf | ||||
| PowerPC-specific: | PowerPC-specific: | ||||
| @@ -58,7 +58,7 @@ Input to YUV Converter | |||||
| Horizontal scaler | Horizontal scaler | ||||
| There are several horizontal scalers. A special case worth mentioning is | There are several horizontal scalers. A special case worth mentioning is | ||||
| the fast bilinear scaler that is made of runtime-generated MMX2 code | |||||
| the fast bilinear scaler that is made of runtime-generated MMXEXT code | |||||
| using specially tuned pshufw instructions. | using specially tuned pshufw instructions. | ||||
| The remaining scalers are specially-tuned for various filter lengths. | The remaining scalers are specially-tuned for various filter lengths. | ||||
| They scale 8-bit unsigned planar data to 16-bit signed planar data. | They scale 8-bit unsigned planar data to 16-bit signed planar data. | ||||
| @@ -1,5 +1,5 @@ | |||||
| /* | /* | ||||
| * DSP utils : average functions are compiled twice for 3dnow/mmx2 | |||||
| * DSP utils : average functions are compiled twice for 3dnow/mmxext | |||||
| * Copyright (c) 2000, 2001 Fabrice Bellard | * Copyright (c) 2000, 2001 Fabrice Bellard | ||||
| * Copyright (c) 2002-2004 Michael Niedermayer | * Copyright (c) 2002-2004 Michael Niedermayer | ||||
| * | * | ||||
| @@ -205,11 +205,11 @@ DECLARE_ALIGNED(16, const double, ff_pd_2)[2] = { 2.0, 2.0 }; | |||||
| #undef OP_AVG | #undef OP_AVG | ||||
| /***********************************/ | /***********************************/ | ||||
| /* MMX2 specific */ | |||||
| /* MMXEXT specific */ | |||||
| #define DEF(x) x ## _mmx2 | #define DEF(x) x ## _mmx2 | ||||
| /* Introduced only in MMX2 set */ | |||||
| /* Introduced only in MMXEXT set */ | |||||
| #define PAVGB "pavgb" | #define PAVGB "pavgb" | ||||
| #define OP_AVG PAVGB | #define OP_AVG PAVGB | ||||
| @@ -122,7 +122,7 @@ void (*yuyvtoyuv422)(uint8_t *ydst, uint8_t *udst, uint8_t *vdst, | |||||
| /* | /* | ||||
| * RGB15->RGB16 original by Strepto/Astral | * RGB15->RGB16 original by Strepto/Astral | ||||
| * ported to gcc & bugfixed : A'rpi | * ported to gcc & bugfixed : A'rpi | ||||
| * MMX2, 3DNOW optimization by Nick Kurshev | |||||
| * MMXEXT, 3DNOW optimization by Nick Kurshev | |||||
| * 32-bit C version, and and&add trick by Michael Niedermayer | * 32-bit C version, and and&add trick by Michael Niedermayer | ||||
| */ | */ | ||||
| @@ -77,7 +77,7 @@ static inline void rgb32tobgr24_c(const uint8_t *src, uint8_t *dst, | |||||
| /* | /* | ||||
| * original by Strepto/Astral | * original by Strepto/Astral | ||||
| * ported to gcc & bugfixed: A'rpi | * ported to gcc & bugfixed: A'rpi | ||||
| * MMX2, 3DNOW optimization by Nick Kurshev | |||||
| * MMXEXT, 3DNOW optimization by Nick Kurshev | |||||
| * 32-bit C version, and and&add trick by Michael Niedermayer | * 32-bit C version, and and&add trick by Michael Niedermayer | ||||
| */ | */ | ||||
| static inline void rgb15to16_c(const uint8_t *src, uint8_t *dst, int src_size) | static inline void rgb15to16_c(const uint8_t *src, uint8_t *dst, int src_size) | ||||
| @@ -307,10 +307,10 @@ typedef struct SwsContext { | |||||
| int vChrFilterSize; ///< Vertical filter size for chroma pixels. | int vChrFilterSize; ///< Vertical filter size for chroma pixels. | ||||
| //@} | //@} | ||||
| int lumMmx2FilterCodeSize; ///< Runtime-generated MMX2 horizontal fast bilinear scaler code size for luma/alpha planes. | |||||
| int chrMmx2FilterCodeSize; ///< Runtime-generated MMX2 horizontal fast bilinear scaler code size for chroma planes. | |||||
| uint8_t *lumMmx2FilterCode; ///< Runtime-generated MMX2 horizontal fast bilinear scaler code for luma/alpha planes. | |||||
| uint8_t *chrMmx2FilterCode; ///< Runtime-generated MMX2 horizontal fast bilinear scaler code for chroma planes. | |||||
| int lumMmx2FilterCodeSize; ///< Runtime-generated MMXEXT horizontal fast bilinear scaler code size for luma/alpha planes. | |||||
| int chrMmx2FilterCodeSize; ///< Runtime-generated MMXEXT horizontal fast bilinear scaler code size for chroma planes. | |||||
| uint8_t *lumMmx2FilterCode; ///< Runtime-generated MMXEXT horizontal fast bilinear scaler code for luma/alpha planes. | |||||
| uint8_t *chrMmx2FilterCode; ///< Runtime-generated MMXEXT horizontal fast bilinear scaler code for chroma planes. | |||||
| int canMMX2BeUsed; | int canMMX2BeUsed; | ||||
| @@ -616,7 +616,7 @@ static int initMMX2HScaler(int dstW, int xInc, uint8_t *filterCode, | |||||
| int xpos, i; | int xpos, i; | ||||
| // create an optimized horizontal scaling routine | // create an optimized horizontal scaling routine | ||||
| /* This scaler is made of runtime-generated MMX2 code using specially tuned | |||||
| /* This scaler is made of runtime-generated MMXEXT code using specially tuned | |||||
| * pshufw instructions. For every four output pixels, if four input pixels | * pshufw instructions. For every four output pixels, if four input pixels | ||||
| * are enough for the fast bilinear scaling, then a chunk of fragmentB is | * are enough for the fast bilinear scaling, then a chunk of fragmentB is | ||||
| * used. If five input pixels are needed, then a chunk of fragmentA is used. | * used. If five input pixels are needed, then a chunk of fragmentA is used. | ||||
| @@ -1007,7 +1007,7 @@ av_cold int sws_init_context(SwsContext *c, SwsFilter *srcFilter, | |||||
| && (flags & SWS_FAST_BILINEAR)) { | && (flags & SWS_FAST_BILINEAR)) { | ||||
| if (flags & SWS_PRINT_INFO) | if (flags & SWS_PRINT_INFO) | ||||
| av_log(c, AV_LOG_INFO, | av_log(c, AV_LOG_INFO, | ||||
| "output width is not a multiple of 32 -> no MMX2 scaler\n"); | |||||
| "output width is not a multiple of 32 -> no MMXEXT scaler\n"); | |||||
| } | } | ||||
| if (usesHFilter) | if (usesHFilter) | ||||
| c->canMMX2BeUsed = 0; | c->canMMX2BeUsed = 0; | ||||
| @@ -1237,7 +1237,7 @@ av_cold int sws_init_context(SwsContext *c, SwsFilter *srcFilter, | |||||
| sws_format_name(dstFormat)); | sws_format_name(dstFormat)); | ||||
| if (INLINE_MMXEXT(cpu_flags)) | if (INLINE_MMXEXT(cpu_flags)) | ||||
| av_log(c, AV_LOG_INFO, "using MMX2\n"); | |||||
| av_log(c, AV_LOG_INFO, "using MMXEXT\n"); | |||||
| else if (INLINE_AMD3DNOW(cpu_flags)) | else if (INLINE_AMD3DNOW(cpu_flags)) | ||||
| av_log(c, AV_LOG_INFO, "using 3DNOW\n"); | av_log(c, AV_LOG_INFO, "using 3DNOW\n"); | ||||
| else if (INLINE_MMX(cpu_flags)) | else if (INLINE_MMX(cpu_flags)) | ||||
| @@ -218,10 +218,10 @@ cglobal yuv2planeX_%1, %3, 8, %2, filter, fltsize, src, dst, w, dither, offset | |||||
| %else ; %1 == 9/10 | %else ; %1 == 9/10 | ||||
| %if cpuflag(sse4) | %if cpuflag(sse4) | ||||
| packusdw m2, m1 | packusdw m2, m1 | ||||
| %else ; mmx2/sse2 | |||||
| %else ; mmxext/sse2 | |||||
| packssdw m2, m1 | packssdw m2, m1 | ||||
| pmaxsw m2, m6 | pmaxsw m2, m6 | ||||
| %endif ; mmx2/sse2/sse4/avx | |||||
| %endif ; mmxext/sse2/sse4/avx | |||||
| pminsw m2, [yuv2yuvX_%1_upper] | pminsw m2, [yuv2yuvX_%1_upper] | ||||
| %endif ; %1 == 9/10/16 | %endif ; %1 == 9/10/16 | ||||
| mova [dstq+r5*2], m2 | mova [dstq+r5*2], m2 | ||||
| @@ -84,7 +84,7 @@ DECLARE_ASM_CONST(8, uint64_t, blue_15mask) = 0x0000001f0000001fULL; | |||||
| #define RV ((int)( 0.439*(1<<RGB2YUV_SHIFT)+0.5)) | #define RV ((int)( 0.439*(1<<RGB2YUV_SHIFT)+0.5)) | ||||
| #define RU ((int)(-0.148*(1<<RGB2YUV_SHIFT)+0.5)) | #define RU ((int)(-0.148*(1<<RGB2YUV_SHIFT)+0.5)) | ||||
| //Note: We have C, MMX, MMX2, 3DNOW versions, there is no 3DNOW + MMX2 one. | |||||
| // Note: We have C, MMX, MMXEXT, 3DNOW versions, there is no 3DNOW + MMXEXT one. | |||||
| #define COMPILE_TEMPLATE_MMXEXT 0 | #define COMPILE_TEMPLATE_MMXEXT 0 | ||||
| #define COMPILE_TEMPLATE_AMD3DNOW 0 | #define COMPILE_TEMPLATE_AMD3DNOW 0 | ||||
| @@ -95,7 +95,7 @@ DECLARE_ASM_CONST(8, uint64_t, blue_15mask) = 0x0000001f0000001fULL; | |||||
| #define RENAME(a) a ## _MMX | #define RENAME(a) a ## _MMX | ||||
| #include "rgb2rgb_template.c" | #include "rgb2rgb_template.c" | ||||
| //MMX2 versions | |||||
| // MMXEXT versions | |||||
| #undef RENAME | #undef RENAME | ||||
| #undef COMPILE_TEMPLATE_MMXEXT | #undef COMPILE_TEMPLATE_MMXEXT | ||||
| #define COMPILE_TEMPLATE_MMXEXT 1 | #define COMPILE_TEMPLATE_MMXEXT 1 | ||||
| @@ -123,7 +123,7 @@ DECLARE_ASM_CONST(8, uint64_t, blue_15mask) = 0x0000001f0000001fULL; | |||||
| /* | /* | ||||
| RGB15->RGB16 original by Strepto/Astral | RGB15->RGB16 original by Strepto/Astral | ||||
| ported to gcc & bugfixed : A'rpi | ported to gcc & bugfixed : A'rpi | ||||
| MMX2, 3DNOW optimization by Nick Kurshev | |||||
| MMXEXT, 3DNOW optimization by Nick Kurshev | |||||
| 32-bit C version, and and&add trick by Michael Niedermayer | 32-bit C version, and and&add trick by Michael Niedermayer | ||||
| */ | */ | ||||
| @@ -181,7 +181,7 @@ static inline void RENAME(rgb32tobgr24)(const uint8_t *src, uint8_t *dst, int sr | |||||
| /* | /* | ||||
| original by Strepto/Astral | original by Strepto/Astral | ||||
| ported to gcc & bugfixed: A'rpi | ported to gcc & bugfixed: A'rpi | ||||
| MMX2, 3DNOW optimization by Nick Kurshev | |||||
| MMXEXT, 3DNOW optimization by Nick Kurshev | |||||
| 32-bit C version, and and&add trick by Michael Niedermayer | 32-bit C version, and and&add trick by Michael Niedermayer | ||||
| */ | */ | ||||
| static inline void RENAME(rgb15to16)(const uint8_t *src, uint8_t *dst, int src_size) | static inline void RENAME(rgb15to16)(const uint8_t *src, uint8_t *dst, int src_size) | ||||
| @@ -78,7 +78,7 @@ DECLARE_ALIGNED(8, const uint64_t, ff_w1111) = 0x0001000100010001ULL; | |||||
| #include "swscale_template.c" | #include "swscale_template.c" | ||||
| #endif | #endif | ||||
| //MMX2 versions | |||||
| // MMXEXT versions | |||||
| #if HAVE_MMXEXT_INLINE | #if HAVE_MMXEXT_INLINE | ||||
| #undef RENAME | #undef RENAME | ||||
| #undef COMPILE_TEMPLATE_MMXEXT | #undef COMPILE_TEMPLATE_MMXEXT | ||||
| @@ -1615,7 +1615,7 @@ static av_cold void RENAME(sws_init_swScale)(SwsContext *c) | |||||
| } | } | ||||
| if (c->srcBpc == 8 && c->dstBpc <= 10) { | if (c->srcBpc == 8 && c->dstBpc <= 10) { | ||||
| // Use the new MMX scaler if the MMX2 one can't be used (it is faster than the x86 ASM one). | |||||
| // Use the new MMX scaler if the MMXEXT one can't be used (it is faster than the x86 ASM one). | |||||
| #if COMPILE_TEMPLATE_MMXEXT | #if COMPILE_TEMPLATE_MMXEXT | ||||
| if (c->flags & SWS_FAST_BILINEAR && c->canMMX2BeUsed) | if (c->flags & SWS_FAST_BILINEAR && c->canMMX2BeUsed) | ||||
| { | { | ||||
| @@ -3,7 +3,7 @@ | |||||
| * | * | ||||
| * Copyright (C) 2009 Konstantin Shishkov | * Copyright (C) 2009 Konstantin Shishkov | ||||
| * | * | ||||
| * MMX/MMX2 template stuff (needed for fast movntq support), | |||||
| * MMX/MMXEXT template stuff (needed for fast movntq support), | |||||
| * 1,4,8bpp support and context / deglobalize stuff | * 1,4,8bpp support and context / deglobalize stuff | ||||
| * by Michael Niedermayer (michaelni@gmx.at) | * by Michael Niedermayer (michaelni@gmx.at) | ||||
| * | * | ||||
| @@ -58,7 +58,7 @@ DECLARE_ASM_CONST(8, uint64_t, pb_07) = 0x0707070707070707ULL; | |||||
| #include "yuv2rgb_template.c" | #include "yuv2rgb_template.c" | ||||
| #endif /* HAVE_MMX_INLINE */ | #endif /* HAVE_MMX_INLINE */ | ||||
| //MMX2 versions | |||||
| // MMXEXT versions | |||||
| #if HAVE_MMXEXT_INLINE | #if HAVE_MMXEXT_INLINE | ||||
| #undef RENAME | #undef RENAME | ||||
| #undef COMPILE_TEMPLATE_MMXEXT | #undef COMPILE_TEMPLATE_MMXEXT | ||||