* qatar/master: lavr: fix handling of custom mix matrices fate: force pix_fmt in lagarith-rgb32 test fate: add tests for lagarith lossless video codec. ARMv6: vp8: fix stack allocation with Apple's assembler ARM: vp56: allow inline asm to build with clang fft: 3dnow: fix register name typo in DECL_IMDCT macro x86: dct32: port to cpuflags x86: build: replace mmx2 by mmxext Revert "wmapro: prevent division by zero when sample rate is unspecified" wmapro: prevent division by zero when sample rate is unspecified lagarith: fix color plane inversion for YUY2 output. lagarith: pad RGB buffer by 1 byte. dsputil: make add_hfyu_left_prediction_sse4() support unaligned src. Conflicts: doc/APIchanges libavcodec/lagarith.c libavfilter/x86/gradfun.c libavutil/cpu.h libavutil/version.h libswscale/utils.c libswscale/version.h libswscale/x86/yuv2rgb.c Merged-by: Michael Niedermayer <michaelni@gmx.at>tags/n1.0
| @@ -1378,7 +1378,7 @@ PREDEFINED = "__attribute__(x)=" \ | |||||
| "DEF(x)=x ## _TMPL" \ | "DEF(x)=x ## _TMPL" \ | ||||
| HAVE_AV_CONFIG_H \ | HAVE_AV_CONFIG_H \ | ||||
| HAVE_MMX \ | HAVE_MMX \ | ||||
| HAVE_MMX2 \ | |||||
| HAVE_MMXEXT \ | |||||
| HAVE_AMD3DNOW \ | HAVE_AMD3DNOW \ | ||||
| "DECLARE_ALIGNED(a,t,n)=t n" \ | "DECLARE_ALIGNED(a,t,n)=t n" \ | ||||
| "offsetof(x,y)=0x42" | "offsetof(x,y)=0x42" | ||||
| @@ -267,7 +267,7 @@ Optimization options (experts only): | |||||
| --disable-amd3dnow disable 3DNow! optimizations | --disable-amd3dnow disable 3DNow! optimizations | ||||
| --disable-amd3dnowext disable 3DNow! extended optimizations | --disable-amd3dnowext disable 3DNow! extended optimizations | ||||
| --disable-mmx disable MMX optimizations | --disable-mmx disable MMX optimizations | ||||
| --disable-mmx2 disable MMX2 optimizations | |||||
| --disable-mmxext disable MMXEXT optimizations | |||||
| --disable-sse disable SSE optimizations | --disable-sse disable SSE optimizations | ||||
| --disable-ssse3 disable SSSE3 optimizations | --disable-ssse3 disable SSSE3 optimizations | ||||
| --disable-avx disable AVX optimizations | --disable-avx disable AVX optimizations | ||||
| @@ -1182,7 +1182,7 @@ ARCH_EXT_LIST=' | |||||
| fma4 | fma4 | ||||
| mmi | mmi | ||||
| mmx | mmx | ||||
| mmx2 | |||||
| mmxext | |||||
| neon | neon | ||||
| ppc4xx | ppc4xx | ||||
| sse | sse | ||||
| @@ -1459,7 +1459,7 @@ x86_64_suggest="cmov fast_cmov" | |||||
| amd3dnow_deps="mmx" | amd3dnow_deps="mmx" | ||||
| amd3dnowext_deps="amd3dnow" | amd3dnowext_deps="amd3dnow" | ||||
| mmx_deps="x86" | mmx_deps="x86" | ||||
| mmx2_deps="mmx" | |||||
| mmxext_deps="mmx" | |||||
| sse_deps="mmx" | sse_deps="mmx" | ||||
| ssse3_deps="sse" | ssse3_deps="sse" | ||||
| avx_deps="ssse3" | avx_deps="ssse3" | ||||
| @@ -3194,9 +3194,9 @@ EOF | |||||
| # check whether xmm clobbers are supported | # check whether xmm clobbers are supported | ||||
| check_asm xmm_clobbers '"":::"%xmm0"' | check_asm xmm_clobbers '"":::"%xmm0"' | ||||
| # check whether binutils is new enough to compile SSSE3/MMX2 | |||||
| # check whether binutils is new enough to compile SSSE3/MMXEXT | |||||
| enabled ssse3 && check_asm ssse3 '"pabsw %xmm0, %xmm0"' | enabled ssse3 && check_asm ssse3 '"pabsw %xmm0, %xmm0"' | ||||
| enabled mmx2 && check_asm mmx2 '"pmaxub %mm0, %mm1"' | |||||
| enabled mmxext && check_asm mmxext '"pmaxub %mm0, %mm1"' | |||||
| if ! disabled_any asm mmx yasm; then | if ! disabled_any asm mmx yasm; then | ||||
| if check_cmd $yasmexe --version; then | if check_cmd $yasmexe --version; then | ||||
| @@ -3748,7 +3748,7 @@ echo "runtime cpu detection ${runtime_cpudetect-no}" | |||||
| if enabled x86; then | if enabled x86; then | ||||
| echo "${yasmexe} ${yasm-no}" | echo "${yasmexe} ${yasm-no}" | ||||
| echo "MMX enabled ${mmx-no}" | echo "MMX enabled ${mmx-no}" | ||||
| echo "MMX2 enabled ${mmx2-no}" | |||||
| echo "MMXEXT enabled ${mmxext-no}" | |||||
| echo "3DNow! enabled ${amd3dnow-no}" | echo "3DNow! enabled ${amd3dnow-no}" | ||||
| echo "3DNow! extended enabled ${amd3dnowext-no}" | echo "3DNow! extended enabled ${amd3dnowext-no}" | ||||
| echo "SSE enabled ${sse-no}" | echo "SSE enabled ${sse-no}" | ||||
| @@ -4019,6 +4019,7 @@ cat > $TMPH <<EOF | |||||
| #define EXTERN_PREFIX "${extern_prefix}" | #define EXTERN_PREFIX "${extern_prefix}" | ||||
| #define EXTERN_ASM ${extern_prefix} | #define EXTERN_ASM ${extern_prefix} | ||||
| #define SLIBSUF "$SLIBSUF" | #define SLIBSUF "$SLIBSUF" | ||||
| #define HAVE_MMX2 HAVE_MMXEXT | |||||
| EOF | EOF | ||||
| test -n "$assert_level" && | test -n "$assert_level" && | ||||
| @@ -70,6 +70,11 @@ API changes, most recent first: | |||||
| 2012-03-26 - a67d9cf - lavfi 2.66.100 | 2012-03-26 - a67d9cf - lavfi 2.66.100 | ||||
| Add avfilter_fill_frame_from_{audio_,}buffer_ref() functions. | Add avfilter_fill_frame_from_{audio_,}buffer_ref() functions. | ||||
| 2012-08-03 - xxxxxxx - lavu 51.37.1 - cpu.h | |||||
| lsws 2.1.1 - swscale.h | |||||
| Rename AV_CPU_FLAG_MMX2 ---> AV_CPU_FLAG_MMXEXT. | |||||
| Rename SWS_CPU_CAPS_MMX2 ---> SWS_CPU_CAPS_MMXEXT. | |||||
| 2012-07-xx - xxxxxxx - lavf 54.13.0 - avformat.h | 2012-07-xx - xxxxxxx - lavf 54.13.0 - avformat.h | ||||
| Add AVFMT_FLAG_NOBUFFER for low latency use cases. | Add AVFMT_FLAG_NOBUFFER for low latency use cases. | ||||
| @@ -29,6 +29,14 @@ | |||||
| # define T(x) | # define T(x) | ||||
| #endif | #endif | ||||
| #if CONFIG_THUMB || defined __clang__ | |||||
| # define L(x) | |||||
| # define U(x) x | |||||
| #else | |||||
| # define L(x) x | |||||
| # define U(x) | |||||
| #endif | |||||
| #if HAVE_ARMV6 && HAVE_INLINE_ASM | #if HAVE_ARMV6 && HAVE_INLINE_ASM | ||||
| #define vp56_rac_get_prob vp56_rac_get_prob_armv6 | #define vp56_rac_get_prob vp56_rac_get_prob_armv6 | ||||
| @@ -42,8 +50,8 @@ static inline int vp56_rac_get_prob_armv6(VP56RangeCoder *c, int pr) | |||||
| __asm__ ("adds %3, %3, %0 \n" | __asm__ ("adds %3, %3, %0 \n" | ||||
| "itt cs \n" | "itt cs \n" | ||||
| "cmpcs %7, %4 \n" | "cmpcs %7, %4 \n" | ||||
| A("ldrcsh %2, [%4], #2 \n") | |||||
| T("ldrhcs %2, [%4], #2 \n") | |||||
| L("ldrcsh %2, [%4], #2 \n") | |||||
| U("ldrhcs %2, [%4], #2 \n") | |||||
| "rsb %0, %6, #256 \n" | "rsb %0, %6, #256 \n" | ||||
| "smlabb %0, %5, %6, %0 \n" | "smlabb %0, %5, %6, %0 \n" | ||||
| T("itttt cs \n") | T("itttt cs \n") | ||||
| @@ -80,8 +88,8 @@ static inline int vp56_rac_get_prob_branchy_armv6(VP56RangeCoder *c, int pr) | |||||
| __asm__ ("adds %3, %3, %0 \n" | __asm__ ("adds %3, %3, %0 \n" | ||||
| "itt cs \n" | "itt cs \n" | ||||
| "cmpcs %7, %4 \n" | "cmpcs %7, %4 \n" | ||||
| A("ldrcsh %2, [%4], #2 \n") | |||||
| T("ldrhcs %2, [%4], #2 \n") | |||||
| L("ldrcsh %2, [%4], #2 \n") | |||||
| U("ldrhcs %2, [%4], #2 \n") | |||||
| "rsb %0, %6, #256 \n" | "rsb %0, %6, #256 \n" | ||||
| "smlabb %0, %5, %6, %0 \n" | "smlabb %0, %5, %6, %0 \n" | ||||
| T("itttt cs \n") | T("itttt cs \n") | ||||
| @@ -1226,7 +1226,13 @@ vp8_mc_1 bilin, 8, v | |||||
| vp8_mc_1 bilin, 4, h | vp8_mc_1 bilin, 4, h | ||||
| vp8_mc_1 bilin, 4, v | vp8_mc_1 bilin, 4, v | ||||
| #define TMPSIZE \size * (8 - 8*(\size > 4) + \ytaps - 1) | |||||
| /* True relational expressions have the value -1 in the GNU assembler, | |||||
| +1 in Apple's. */ | |||||
| #ifdef __APPLE__ | |||||
| # define TMPSIZE \size * (8 + 8*(\size > 4) + \ytaps - 1) | |||||
| #else | |||||
| # define TMPSIZE \size * (8 - 8*(\size > 4) + \ytaps - 1) | |||||
| #endif | |||||
| .macro vp8_mc_hv name, size, h, v, ytaps | .macro vp8_mc_hv name, size, h, v, ytaps | ||||
| function ff_put_vp8_\name\size\()_\h\v\()_armv6, export=1 | function ff_put_vp8_\name\size\()_\h\v\()_armv6, export=1 | ||||
| @@ -87,7 +87,7 @@ static const struct algo fdct_tab[] = { | |||||
| #if HAVE_MMX && HAVE_INLINE_ASM | #if HAVE_MMX && HAVE_INLINE_ASM | ||||
| { "MMX", ff_fdct_mmx, NO_PERM, AV_CPU_FLAG_MMX }, | { "MMX", ff_fdct_mmx, NO_PERM, AV_CPU_FLAG_MMX }, | ||||
| { "MMX2", ff_fdct_mmx2, NO_PERM, AV_CPU_FLAG_MMX2 }, | |||||
| { "MMXEXT", ff_fdct_mmx2, NO_PERM, AV_CPU_FLAG_MMXEXT }, | |||||
| { "SSE2", ff_fdct_sse2, NO_PERM, AV_CPU_FLAG_SSE2 }, | { "SSE2", ff_fdct_sse2, NO_PERM, AV_CPU_FLAG_SSE2 }, | ||||
| #endif | #endif | ||||
| @@ -132,7 +132,7 @@ static const struct algo idct_tab[] = { | |||||
| #endif | #endif | ||||
| { "SIMPLE-MMX", ff_simple_idct_mmx, MMX_SIMPLE_PERM, AV_CPU_FLAG_MMX }, | { "SIMPLE-MMX", ff_simple_idct_mmx, MMX_SIMPLE_PERM, AV_CPU_FLAG_MMX }, | ||||
| { "XVID-MMX", ff_idct_xvid_mmx, NO_PERM, AV_CPU_FLAG_MMX, 1 }, | { "XVID-MMX", ff_idct_xvid_mmx, NO_PERM, AV_CPU_FLAG_MMX, 1 }, | ||||
| { "XVID-MMX2", ff_idct_xvid_mmx2, NO_PERM, AV_CPU_FLAG_MMX2, 1 }, | |||||
| { "XVID-MMXEXT", ff_idct_xvid_mmx2, NO_PERM, AV_CPU_FLAG_MMXEXT, 1 }, | |||||
| { "XVID-SSE2", ff_idct_xvid_sse2, SSE2_PERM, AV_CPU_FLAG_SSE2, 1 }, | { "XVID-SSE2", ff_idct_xvid_sse2, SSE2_PERM, AV_CPU_FLAG_SSE2, 1 }, | ||||
| #if ARCH_X86_64 && HAVE_YASM | #if ARCH_X86_64 && HAVE_YASM | ||||
| { "PR-SSE2", ff_prores_idct_put_10_sse2_wrap, TRANSPOSE_PERM, AV_CPU_FLAG_SSE2, 1 }, | { "PR-SSE2", ff_prores_idct_put_10_sse2_wrap, TRANSPOSE_PERM, AV_CPU_FLAG_SSE2, 1 }, | ||||
| @@ -116,8 +116,8 @@ int main(int argc, char **argv) | |||||
| AVCodecContext *ctx; | AVCodecContext *ctx; | ||||
| int c; | int c; | ||||
| DSPContext cctx, mmxctx; | DSPContext cctx, mmxctx; | ||||
| int flags[2] = { AV_CPU_FLAG_MMX, AV_CPU_FLAG_MMX2 }; | |||||
| int flags_size = HAVE_MMX2 ? 2 : 1; | |||||
| int flags[2] = { AV_CPU_FLAG_MMX, AV_CPU_FLAG_MMXEXT }; | |||||
| int flags_size = HAVE_MMXEXT ? 2 : 1; | |||||
| if (argc > 1) { | if (argc > 1) { | ||||
| help(); | help(); | ||||
| @@ -68,7 +68,7 @@ cglobal ac3_exponent_min_%1, 3,4,2, exp, reuse_blks, expn, offset | |||||
| %define LOOP_ALIGN | %define LOOP_ALIGN | ||||
| INIT_MMX | INIT_MMX | ||||
| AC3_EXPONENT_MIN mmx | AC3_EXPONENT_MIN mmx | ||||
| %if HAVE_MMX2 | |||||
| %if HAVE_MMXEXT | |||||
| %define PMINUB PMINUB_MMXEXT | %define PMINUB PMINUB_MMXEXT | ||||
| %define LOOP_ALIGN ALIGN 16 | %define LOOP_ALIGN ALIGN 16 | ||||
| AC3_EXPONENT_MIN mmxext | AC3_EXPONENT_MIN mmxext | ||||
| @@ -65,7 +65,7 @@ av_cold void ff_ac3dsp_init_x86(AC3DSPContext *c, int bit_exact) | |||||
| c->float_to_fixed24 = ff_float_to_fixed24_3dnow; | c->float_to_fixed24 = ff_float_to_fixed24_3dnow; | ||||
| } | } | ||||
| } | } | ||||
| if (mm_flags & AV_CPU_FLAG_MMX2 && HAVE_MMX2) { | |||||
| if (mm_flags & AV_CPU_FLAG_MMXEXT && HAVE_MMXEXT) { | |||||
| c->ac3_exponent_min = ff_ac3_exponent_min_mmxext; | c->ac3_exponent_min = ff_ac3_exponent_min_mmxext; | ||||
| c->ac3_max_msb_abs_int16 = ff_ac3_max_msb_abs_int16_mmx2; | c->ac3_max_msb_abs_int16 = ff_ac3_max_msb_abs_int16_mmx2; | ||||
| } | } | ||||
| @@ -486,7 +486,7 @@ void ff_cavsdsp_init_mmx(CAVSDSPContext *c, AVCodecContext *avctx) | |||||
| int mm_flags = av_get_cpu_flags(); | int mm_flags = av_get_cpu_flags(); | ||||
| #if HAVE_INLINE_ASM | #if HAVE_INLINE_ASM | ||||
| if (mm_flags & AV_CPU_FLAG_MMX2) ff_cavsdsp_init_mmx2 (c, avctx); | |||||
| if (mm_flags & AV_CPU_FLAG_MMXEXT) ff_cavsdsp_init_mmx2(c, avctx); | |||||
| if (mm_flags & AV_CPU_FLAG_3DNOW) ff_cavsdsp_init_3dnow(c, avctx); | if (mm_flags & AV_CPU_FLAG_3DNOW) ff_cavsdsp_init_3dnow(c, avctx); | ||||
| #endif /* HAVE_INLINE_ASM */ | #endif /* HAVE_INLINE_ASM */ | ||||
| } | } | ||||
| @@ -42,39 +42,24 @@ ps_cos_vec: dd 0.500603, 0.505471, 0.515447, 0.531043 | |||||
| align 32 | align 32 | ||||
| ps_p1p1m1m1: dd 0, 0, 0x80000000, 0x80000000, 0, 0, 0x80000000, 0x80000000 | ps_p1p1m1m1: dd 0, 0, 0x80000000, 0x80000000, 0, 0, 0x80000000, 0x80000000 | ||||
| %macro BUTTERFLY_SSE 4 | |||||
| movaps %4, %1 | |||||
| subps %1, %2 | |||||
| addps %2, %4 | |||||
| mulps %1, %3 | |||||
| %endmacro | |||||
| %macro BUTTERFLY_AVX 4 | |||||
| vsubps %4, %1, %2 | |||||
| vaddps %2, %2, %1 | |||||
| vmulps %1, %4, %3 | |||||
| %endmacro | |||||
| %macro BUTTERFLY0_SSE 5 | |||||
| movaps %4, %1 | |||||
| shufps %1, %1, %5 | |||||
| xorps %4, %2 | |||||
| addps %1, %4 | |||||
| mulps %1, %3 | |||||
| %macro BUTTERFLY 4 | |||||
| subps %4, %1, %2 | |||||
| addps %2, %2, %1 | |||||
| mulps %1, %4, %3 | |||||
| %endmacro | %endmacro | ||||
| %macro BUTTERFLY0_SSE2 5 | |||||
| %macro BUTTERFLY0 5 | |||||
| %if cpuflag(sse2) && notcpuflag(avx) | |||||
| pshufd %4, %1, %5 | pshufd %4, %1, %5 | ||||
| xorps %1, %2 | xorps %1, %2 | ||||
| addps %1, %4 | addps %1, %4 | ||||
| mulps %1, %3 | mulps %1, %3 | ||||
| %endmacro | |||||
| %macro BUTTERFLY0_AVX 5 | |||||
| vshufps %4, %1, %1, %5 | |||||
| vxorps %1, %1, %2 | |||||
| vaddps %4, %4, %1 | |||||
| vmulps %1, %4, %3 | |||||
| %else | |||||
| shufps %4, %1, %1, %5 | |||||
| xorps %1, %1, %2 | |||||
| addps %4, %4, %1 | |||||
| mulps %1, %4, %3 | |||||
| %endif | |||||
| %endmacro | %endmacro | ||||
| %macro BUTTERFLY2 4 | %macro BUTTERFLY2 4 | ||||
| @@ -206,14 +191,11 @@ ps_p1p1m1m1: dd 0, 0, 0x80000000, 0x80000000, 0, 0, 0x80000000, 0x80000000 | |||||
| movss [outq+116], m6 | movss [outq+116], m6 | ||||
| %endmacro | %endmacro | ||||
| %define BUTTERFLY BUTTERFLY_AVX | |||||
| %define BUTTERFLY0 BUTTERFLY0_AVX | |||||
| INIT_YMM | |||||
| INIT_YMM avx | |||||
| SECTION_TEXT | SECTION_TEXT | ||||
| %if HAVE_AVX | %if HAVE_AVX | ||||
| ; void ff_dct32_float_avx(FFTSample *out, const FFTSample *in) | ; void ff_dct32_float_avx(FFTSample *out, const FFTSample *in) | ||||
| cglobal dct32_float_avx, 2,3,8, out, in, tmp | |||||
| cglobal dct32_float, 2,3,8, out, in, tmp | |||||
| ; pass 1 | ; pass 1 | ||||
| vmovaps m4, [inq+0] | vmovaps m4, [inq+0] | ||||
| vinsertf128 m5, m5, [inq+96], 1 | vinsertf128 m5, m5, [inq+96], 1 | ||||
| @@ -286,9 +268,6 @@ INIT_XMM | |||||
| RET | RET | ||||
| %endif | %endif | ||||
| %define BUTTERFLY BUTTERFLY_SSE | |||||
| %define BUTTERFLY0 BUTTERFLY0_SSE | |||||
| %if ARCH_X86_64 | %if ARCH_X86_64 | ||||
| %define SPILL SWAP | %define SPILL SWAP | ||||
| %define UNSPILL SWAP | %define UNSPILL SWAP | ||||
| @@ -411,10 +390,9 @@ INIT_XMM | |||||
| %endif | %endif | ||||
| INIT_XMM | |||||
| %macro DCT32_FUNC 1 | |||||
| ; void ff_dct32_float_sse(FFTSample *out, const FFTSample *in) | ; void ff_dct32_float_sse(FFTSample *out, const FFTSample *in) | ||||
| cglobal dct32_float_%1, 2,3,16, out, in, tmp | |||||
| %macro DCT32_FUNC 0 | |||||
| cglobal dct32_float, 2, 3, 16, out, in, tmp | |||||
| ; pass 1 | ; pass 1 | ||||
| movaps m0, [inq+0] | movaps m0, [inq+0] | ||||
| @@ -498,18 +476,16 @@ cglobal dct32_float_%1, 2,3,16, out, in, tmp | |||||
| RET | RET | ||||
| %endmacro | %endmacro | ||||
| %macro LOAD_INV_SSE 2 | |||||
| %macro LOAD_INV 2 | |||||
| %if cpuflag(sse2) | |||||
| pshufd %1, %2, 0x1b | |||||
| %elif cpuflag(sse) | |||||
| movaps %1, %2 | movaps %1, %2 | ||||
| shufps %1, %1, 0x1b | shufps %1, %1, 0x1b | ||||
| %endif | |||||
| %endmacro | %endmacro | ||||
| %define LOAD_INV LOAD_INV_SSE | |||||
| DCT32_FUNC sse | |||||
| %macro LOAD_INV_SSE2 2 | |||||
| pshufd %1, %2, 0x1b | |||||
| %endmacro | |||||
| %define LOAD_INV LOAD_INV_SSE2 | |||||
| %define BUTTERFLY0 BUTTERFLY0_SSE2 | |||||
| DCT32_FUNC sse2 | |||||
| INIT_XMM sse | |||||
| DCT32_FUNC | |||||
| INIT_XMM sse2 | |||||
| DCT32_FUNC | |||||
| @@ -3171,7 +3171,7 @@ void ff_dsputil_init_mmx(DSPContext *c, AVCodecContext *avctx) | |||||
| c->idct_add = ff_idct_xvid_sse2_add; | c->idct_add = ff_idct_xvid_sse2_add; | ||||
| c->idct = ff_idct_xvid_sse2; | c->idct = ff_idct_xvid_sse2; | ||||
| c->idct_permutation_type = FF_SSE2_IDCT_PERM; | c->idct_permutation_type = FF_SSE2_IDCT_PERM; | ||||
| } else if (mm_flags & AV_CPU_FLAG_MMX2) { | |||||
| } else if (mm_flags & AV_CPU_FLAG_MMXEXT) { | |||||
| c->idct_put = ff_idct_xvid_mmx2_put; | c->idct_put = ff_idct_xvid_mmx2_put; | ||||
| c->idct_add = ff_idct_xvid_mmx2_add; | c->idct_add = ff_idct_xvid_mmx2_add; | ||||
| c->idct = ff_idct_xvid_mmx2; | c->idct = ff_idct_xvid_mmx2; | ||||
| @@ -3187,7 +3187,7 @@ void ff_dsputil_init_mmx(DSPContext *c, AVCodecContext *avctx) | |||||
| dsputil_init_mmx(c, avctx, mm_flags); | dsputil_init_mmx(c, avctx, mm_flags); | ||||
| } | } | ||||
| if (mm_flags & AV_CPU_FLAG_MMX2) | |||||
| if (mm_flags & AV_CPU_FLAG_MMXEXT) | |||||
| dsputil_init_mmx2(c, avctx, mm_flags); | dsputil_init_mmx2(c, avctx, mm_flags); | ||||
| if (mm_flags & AV_CPU_FLAG_3DNOW && HAVE_AMD3DNOW) | if (mm_flags & AV_CPU_FLAG_3DNOW && HAVE_AMD3DNOW) | ||||
| @@ -388,12 +388,16 @@ cglobal add_hfyu_median_prediction_mmx2, 6,6,0, dst, top, diff, w, left, left_to | |||||
| RET | RET | ||||
| %macro ADD_HFYU_LEFT_LOOP 1 ; %1 = is_aligned | |||||
| %macro ADD_HFYU_LEFT_LOOP 2 ; %1 = dst_is_aligned, %2 = src_is_aligned | |||||
| add srcq, wq | add srcq, wq | ||||
| add dstq, wq | add dstq, wq | ||||
| neg wq | neg wq | ||||
| %%.loop: | %%.loop: | ||||
| %if %2 | |||||
| mova m1, [srcq+wq] | mova m1, [srcq+wq] | ||||
| %else | |||||
| movu m1, [srcq+wq] | |||||
| %endif | |||||
| mova m2, m1 | mova m2, m1 | ||||
| psllw m1, 8 | psllw m1, 8 | ||||
| paddb m1, m2 | paddb m1, m2 | ||||
| @@ -435,7 +439,7 @@ cglobal add_hfyu_left_prediction_ssse3, 3,3,7, dst, src, w, left | |||||
| mova m3, [pb_zz11zz55zz99zzdd] | mova m3, [pb_zz11zz55zz99zzdd] | ||||
| movd m0, leftm | movd m0, leftm | ||||
| psllq m0, 56 | psllq m0, 56 | ||||
| ADD_HFYU_LEFT_LOOP 1 | |||||
| ADD_HFYU_LEFT_LOOP 1, 1 | |||||
| INIT_XMM | INIT_XMM | ||||
| cglobal add_hfyu_left_prediction_sse4, 3,3,7, dst, src, w, left | cglobal add_hfyu_left_prediction_sse4, 3,3,7, dst, src, w, left | ||||
| @@ -446,12 +450,14 @@ cglobal add_hfyu_left_prediction_sse4, 3,3,7, dst, src, w, left | |||||
| movd m0, leftm | movd m0, leftm | ||||
| pslldq m0, 15 | pslldq m0, 15 | ||||
| test srcq, 15 | test srcq, 15 | ||||
| jnz add_hfyu_left_prediction_ssse3.skip_prologue | |||||
| jnz .src_unaligned | |||||
| test dstq, 15 | test dstq, 15 | ||||
| jnz .unaligned | |||||
| ADD_HFYU_LEFT_LOOP 1 | |||||
| .unaligned: | |||||
| ADD_HFYU_LEFT_LOOP 0 | |||||
| jnz .dst_unaligned | |||||
| ADD_HFYU_LEFT_LOOP 1, 1 | |||||
| .dst_unaligned: | |||||
| ADD_HFYU_LEFT_LOOP 0, 1 | |||||
| .src_unaligned: | |||||
| ADD_HFYU_LEFT_LOOP 0, 0 | |||||
| ; float scalarproduct_float_sse(const float *v1, const float *v2, int len) | ; float scalarproduct_float_sse(const float *v1, const float *v2, int len) | ||||
| @@ -1112,7 +1112,7 @@ void ff_dsputilenc_init_mmx(DSPContext* c, AVCodecContext *avctx) | |||||
| (dct_algo==FF_DCT_AUTO || dct_algo==FF_DCT_MMX)) { | (dct_algo==FF_DCT_AUTO || dct_algo==FF_DCT_MMX)) { | ||||
| if(mm_flags & AV_CPU_FLAG_SSE2){ | if(mm_flags & AV_CPU_FLAG_SSE2){ | ||||
| c->fdct = ff_fdct_sse2; | c->fdct = ff_fdct_sse2; | ||||
| }else if(mm_flags & AV_CPU_FLAG_MMX2){ | |||||
| } else if (mm_flags & AV_CPU_FLAG_MMXEXT) { | |||||
| c->fdct = ff_fdct_mmx2; | c->fdct = ff_fdct_mmx2; | ||||
| }else{ | }else{ | ||||
| c->fdct = ff_fdct_mmx; | c->fdct = ff_fdct_mmx; | ||||
| @@ -1145,8 +1145,7 @@ void ff_dsputilenc_init_mmx(DSPContext* c, AVCodecContext *avctx) | |||||
| c->ssd_int8_vs_int16 = ssd_int8_vs_int16_mmx; | c->ssd_int8_vs_int16 = ssd_int8_vs_int16_mmx; | ||||
| if (mm_flags & AV_CPU_FLAG_MMX2) { | |||||
| if (mm_flags & AV_CPU_FLAG_MMXEXT) { | |||||
| c->sum_abs_dctelem= sum_abs_dctelem_mmx2; | c->sum_abs_dctelem= sum_abs_dctelem_mmx2; | ||||
| c->vsad[4]= vsad_intra16_mmx2; | c->vsad[4]= vsad_intra16_mmx2; | ||||
| @@ -1187,7 +1186,7 @@ void ff_dsputilenc_init_mmx(DSPContext* c, AVCodecContext *avctx) | |||||
| c->hadamard8_diff[0] = ff_hadamard8_diff16_mmx; | c->hadamard8_diff[0] = ff_hadamard8_diff16_mmx; | ||||
| c->hadamard8_diff[1] = ff_hadamard8_diff_mmx; | c->hadamard8_diff[1] = ff_hadamard8_diff_mmx; | ||||
| if (mm_flags & AV_CPU_FLAG_MMX2) { | |||||
| if (mm_flags & AV_CPU_FLAG_MMXEXT) { | |||||
| c->hadamard8_diff[0] = ff_hadamard8_diff16_mmx2; | c->hadamard8_diff[0] = ff_hadamard8_diff16_mmx2; | ||||
| c->hadamard8_diff[1] = ff_hadamard8_diff_mmx2; | c->hadamard8_diff[1] = ff_hadamard8_diff_mmx2; | ||||
| } | } | ||||
| @@ -1041,7 +1041,7 @@ cglobal imdct_half, 3,12,8; FFTContext *s, FFTSample *output, const FFTSample *i | |||||
| mova [r1+r5*8], m0 | mova [r1+r5*8], m0 | ||||
| mova [r1+r6*8], m2 | mova [r1+r6*8], m2 | ||||
| add r4, 2 | add r4, 2 | ||||
| sub r4, 2 | |||||
| sub r3, 2 | |||||
| %else | %else | ||||
| %if ARCH_X86_64 | %if ARCH_X86_64 | ||||
| movzx r5, word [rrevtab+r4-4] | movzx r5, word [rrevtab+r4-4] | ||||
| @@ -198,7 +198,7 @@ void ff_h264_pred_init_x86(H264PredContext *h, int codec_id, const int bit_depth | |||||
| } | } | ||||
| } | } | ||||
| if (mm_flags & AV_CPU_FLAG_MMX2) { | |||||
| if (mm_flags & AV_CPU_FLAG_MMXEXT) { | |||||
| h->pred16x16[HOR_PRED8x8 ] = ff_pred16x16_horizontal_mmx2; | h->pred16x16[HOR_PRED8x8 ] = ff_pred16x16_horizontal_mmx2; | ||||
| h->pred16x16[DC_PRED8x8 ] = ff_pred16x16_dc_mmx2; | h->pred16x16[DC_PRED8x8 ] = ff_pred16x16_dc_mmx2; | ||||
| if (chroma_format_idc == 1) | if (chroma_format_idc == 1) | ||||
| @@ -308,7 +308,7 @@ void ff_h264_pred_init_x86(H264PredContext *h, int codec_id, const int bit_depth | |||||
| } | } | ||||
| } | } | ||||
| } else if (bit_depth == 10) { | } else if (bit_depth == 10) { | ||||
| if (mm_flags & AV_CPU_FLAG_MMX2) { | |||||
| if (mm_flags & AV_CPU_FLAG_MMXEXT) { | |||||
| h->pred4x4[DC_PRED ] = ff_pred4x4_dc_10_mmxext; | h->pred4x4[DC_PRED ] = ff_pred4x4_dc_10_mmxext; | ||||
| h->pred4x4[HOR_UP_PRED ] = ff_pred4x4_horizontal_up_10_mmxext; | h->pred4x4[HOR_UP_PRED ] = ff_pred4x4_horizontal_up_10_mmxext; | ||||
| @@ -218,7 +218,7 @@ void ff_h264dsp_init_x86(H264DSPContext *c, const int bit_depth, | |||||
| #if HAVE_YASM | #if HAVE_YASM | ||||
| int mm_flags = av_get_cpu_flags(); | int mm_flags = av_get_cpu_flags(); | ||||
| if (chroma_format_idc == 1 && mm_flags & AV_CPU_FLAG_MMX2) | |||||
| if (chroma_format_idc == 1 && mm_flags & AV_CPU_FLAG_MMXEXT) | |||||
| c->h264_loop_filter_strength = ff_h264_loop_filter_strength_mmx2; | c->h264_loop_filter_strength = ff_h264_loop_filter_strength_mmx2; | ||||
| if (bit_depth == 8) { | if (bit_depth == 8) { | ||||
| @@ -236,7 +236,7 @@ void ff_h264dsp_init_x86(H264DSPContext *c, const int bit_depth, | |||||
| if (mm_flags & AV_CPU_FLAG_CMOV) | if (mm_flags & AV_CPU_FLAG_CMOV) | ||||
| c->h264_luma_dc_dequant_idct = ff_h264_luma_dc_dequant_idct_mmx; | c->h264_luma_dc_dequant_idct = ff_h264_luma_dc_dequant_idct_mmx; | ||||
| if (mm_flags & AV_CPU_FLAG_MMX2) { | |||||
| if (mm_flags & AV_CPU_FLAG_MMXEXT) { | |||||
| c->h264_idct_dc_add = ff_h264_idct_dc_add_8_mmx2; | c->h264_idct_dc_add = ff_h264_idct_dc_add_8_mmx2; | ||||
| c->h264_idct8_dc_add = ff_h264_idct8_dc_add_8_mmx2; | c->h264_idct8_dc_add = ff_h264_idct8_dc_add_8_mmx2; | ||||
| c->h264_idct_add16 = ff_h264_idct_add16_8_mmx2; | c->h264_idct_add16 = ff_h264_idct_add16_8_mmx2; | ||||
| @@ -304,7 +304,7 @@ void ff_h264dsp_init_x86(H264DSPContext *c, const int bit_depth, | |||||
| } | } | ||||
| } else if (bit_depth == 10) { | } else if (bit_depth == 10) { | ||||
| if (mm_flags & AV_CPU_FLAG_MMX) { | if (mm_flags & AV_CPU_FLAG_MMX) { | ||||
| if (mm_flags & AV_CPU_FLAG_MMX2) { | |||||
| if (mm_flags & AV_CPU_FLAG_MMXEXT) { | |||||
| #if ARCH_X86_32 | #if ARCH_X86_32 | ||||
| c->h264_v_loop_filter_chroma = ff_deblock_v_chroma_10_mmx2; | c->h264_v_loop_filter_chroma = ff_deblock_v_chroma_10_mmx2; | ||||
| c->h264_v_loop_filter_chroma_intra = ff_deblock_v_chroma_intra_10_mmx2; | c->h264_v_loop_filter_chroma_intra = ff_deblock_v_chroma_intra_10_mmx2; | ||||
| @@ -444,7 +444,7 @@ void ff_dsputil_init_pix_mmx(DSPContext* c, AVCodecContext *avctx) | |||||
| c->sad[0]= sad16_mmx; | c->sad[0]= sad16_mmx; | ||||
| c->sad[1]= sad8_mmx; | c->sad[1]= sad8_mmx; | ||||
| } | } | ||||
| if (mm_flags & AV_CPU_FLAG_MMX2) { | |||||
| if (mm_flags & AV_CPU_FLAG_MMXEXT) { | |||||
| c->pix_abs[0][0] = sad16_mmx2; | c->pix_abs[0][0] = sad16_mmx2; | ||||
| c->pix_abs[1][0] = sad8_mmx2; | c->pix_abs[1][0] = sad8_mmx2; | ||||
| @@ -595,15 +595,15 @@ static void denoise_dct_sse2(MpegEncContext *s, DCTELEM *block){ | |||||
| #define HAVE_SSSE3 0 | #define HAVE_SSSE3 0 | ||||
| #undef HAVE_SSE2 | #undef HAVE_SSE2 | ||||
| #undef HAVE_MMX2 | |||||
| #undef HAVE_MMXEXT | |||||
| #define HAVE_SSE2 0 | #define HAVE_SSE2 0 | ||||
| #define HAVE_MMX2 0 | |||||
| #define HAVE_MMXEXT 0 | |||||
| #define RENAME(a) a ## _MMX | #define RENAME(a) a ## _MMX | ||||
| #define RENAMEl(a) a ## _mmx | #define RENAMEl(a) a ## _mmx | ||||
| #include "mpegvideo_mmx_template.c" | #include "mpegvideo_mmx_template.c" | ||||
| #undef HAVE_MMX2 | |||||
| #define HAVE_MMX2 1 | |||||
| #undef HAVE_MMXEXT | |||||
| #define HAVE_MMXEXT 1 | |||||
| #undef RENAME | #undef RENAME | ||||
| #undef RENAMEl | #undef RENAMEl | ||||
| #define RENAME(a) a ## _MMX2 | #define RENAME(a) a ## _MMX2 | ||||
| @@ -660,7 +660,7 @@ void ff_MPV_common_init_mmx(MpegEncContext *s) | |||||
| #endif | #endif | ||||
| if(mm_flags & AV_CPU_FLAG_SSE2){ | if(mm_flags & AV_CPU_FLAG_SSE2){ | ||||
| s->dct_quantize= dct_quantize_SSE2; | s->dct_quantize= dct_quantize_SSE2; | ||||
| } else if(mm_flags & AV_CPU_FLAG_MMX2){ | |||||
| } else if (mm_flags & AV_CPU_FLAG_MMXEXT) { | |||||
| s->dct_quantize= dct_quantize_MMX2; | s->dct_quantize= dct_quantize_MMX2; | ||||
| } else { | } else { | ||||
| s->dct_quantize= dct_quantize_MMX; | s->dct_quantize= dct_quantize_MMX; | ||||
| @@ -48,7 +48,7 @@ | |||||
| #define MMREG_WIDTH "8" | #define MMREG_WIDTH "8" | ||||
| #define MM "%%mm" | #define MM "%%mm" | ||||
| #define MOVQ "movq" | #define MOVQ "movq" | ||||
| #if HAVE_MMX2 | |||||
| #if HAVE_MMXEXT | |||||
| #define SPREADW(a) "pshufw $0, "a", "a" \n\t" | #define SPREADW(a) "pshufw $0, "a", "a" \n\t" | ||||
| #define PMAXW(a,b) "pmaxsw "a", "b" \n\t" | #define PMAXW(a,b) "pmaxsw "a", "b" \n\t" | ||||
| #define PMAX(a,b) \ | #define PMAX(a,b) \ | ||||
| @@ -41,7 +41,7 @@ void ff_pngdsp_init_x86(PNGDSPContext *dsp) | |||||
| if (flags & AV_CPU_FLAG_MMX) | if (flags & AV_CPU_FLAG_MMX) | ||||
| dsp->add_bytes_l2 = ff_add_bytes_l2_mmx; | dsp->add_bytes_l2 = ff_add_bytes_l2_mmx; | ||||
| #endif | #endif | ||||
| if (flags & AV_CPU_FLAG_MMX2) | |||||
| if (flags & AV_CPU_FLAG_MMXEXT) | |||||
| dsp->add_paeth_prediction = ff_add_png_paeth_prediction_mmx2; | dsp->add_paeth_prediction = ff_add_png_paeth_prediction_mmx2; | ||||
| if (flags & AV_CPU_FLAG_SSE2) | if (flags & AV_CPU_FLAG_SSE2) | ||||
| dsp->add_bytes_l2 = ff_add_bytes_l2_sse2; | dsp->add_bytes_l2 = ff_add_bytes_l2_sse2; | ||||
| @@ -37,7 +37,7 @@ av_cold void ff_rv34dsp_init_x86(RV34DSPContext* c, DSPContext *dsp) | |||||
| if (mm_flags & AV_CPU_FLAG_MMX) | if (mm_flags & AV_CPU_FLAG_MMX) | ||||
| c->rv34_idct_dc_add = ff_rv34_idct_dc_add_mmx; | c->rv34_idct_dc_add = ff_rv34_idct_dc_add_mmx; | ||||
| if (mm_flags & AV_CPU_FLAG_MMX2) { | |||||
| if (mm_flags & AV_CPU_FLAG_MMXEXT) { | |||||
| c->rv34_inv_transform_dc = ff_rv34_idct_dc_noround_mmx2; | c->rv34_inv_transform_dc = ff_rv34_idct_dc_noround_mmx2; | ||||
| c->rv34_idct_add = ff_rv34_idct_add_mmx2; | c->rv34_idct_add = ff_rv34_idct_add_mmx2; | ||||
| } | } | ||||
| @@ -204,7 +204,7 @@ void ff_rv40dsp_init_x86(RV34DSPContext *c, DSPContext *dsp) | |||||
| QPEL_MC_SET(put_, _mmx) | QPEL_MC_SET(put_, _mmx) | ||||
| #endif | #endif | ||||
| } | } | ||||
| if (mm_flags & AV_CPU_FLAG_MMX2) { | |||||
| if (mm_flags & AV_CPU_FLAG_MMXEXT) { | |||||
| c->avg_chroma_pixels_tab[0] = ff_avg_rv40_chroma_mc8_mmx2; | c->avg_chroma_pixels_tab[0] = ff_avg_rv40_chroma_mc8_mmx2; | ||||
| c->avg_chroma_pixels_tab[1] = ff_avg_rv40_chroma_mc4_mmx2; | c->avg_chroma_pixels_tab[1] = ff_avg_rv40_chroma_mc4_mmx2; | ||||
| c->rv40_weight_pixels_tab[0][0] = ff_rv40_weight_func_rnd_16_mmx2; | c->rv40_weight_pixels_tab[0][0] = ff_rv40_weight_func_rnd_16_mmx2; | ||||
| @@ -889,7 +889,7 @@ void ff_dwt_init_x86(DWTContext *c) | |||||
| c->inner_add_yblock = ff_snow_inner_add_yblock_sse2; | c->inner_add_yblock = ff_snow_inner_add_yblock_sse2; | ||||
| } | } | ||||
| else{ | else{ | ||||
| if(mm_flags & AV_CPU_FLAG_MMX2){ | |||||
| if (mm_flags & AV_CPU_FLAG_MMXEXT) { | |||||
| c->horizontal_compose97i = ff_snow_horizontal_compose97i_mmx; | c->horizontal_compose97i = ff_snow_horizontal_compose97i_mmx; | ||||
| #if HAVE_7REGS | #if HAVE_7REGS | ||||
| c->vertical_compose97i = ff_snow_vertical_compose97i_mmx; | c->vertical_compose97i = ff_snow_vertical_compose97i_mmx; | ||||
| @@ -760,7 +760,7 @@ void ff_vc1dsp_init_mmx(VC1DSPContext *dsp) | |||||
| dsp->put_no_rnd_vc1_chroma_pixels_tab[0]= ff_put_vc1_chroma_mc8_mmx_nornd; | dsp->put_no_rnd_vc1_chroma_pixels_tab[0]= ff_put_vc1_chroma_mc8_mmx_nornd; | ||||
| } | } | ||||
| if (mm_flags & AV_CPU_FLAG_MMX2){ | |||||
| if (mm_flags & AV_CPU_FLAG_MMXEXT) { | |||||
| dsp->avg_vc1_mspel_pixels_tab[ 0] = ff_avg_vc1_mspel_mc00_mmx2; | dsp->avg_vc1_mspel_pixels_tab[ 0] = ff_avg_vc1_mspel_mc00_mmx2; | ||||
| dsp->avg_vc1_mspel_pixels_tab[ 4] = avg_vc1_mspel_mc01_mmx2; | dsp->avg_vc1_mspel_pixels_tab[ 4] = avg_vc1_mspel_mc01_mmx2; | ||||
| dsp->avg_vc1_mspel_pixels_tab[ 8] = avg_vc1_mspel_mc02_mmx2; | dsp->avg_vc1_mspel_pixels_tab[ 8] = avg_vc1_mspel_mc02_mmx2; | ||||
| @@ -810,7 +810,7 @@ void ff_vc1dsp_init_mmx(VC1DSPContext *dsp) | |||||
| if (mm_flags & AV_CPU_FLAG_MMX) { | if (mm_flags & AV_CPU_FLAG_MMX) { | ||||
| } | } | ||||
| if (mm_flags & AV_CPU_FLAG_MMX2) { | |||||
| if (mm_flags & AV_CPU_FLAG_MMXEXT) { | |||||
| ASSIGN_LF(mmx2); | ASSIGN_LF(mmx2); | ||||
| } | } | ||||
| @@ -49,7 +49,7 @@ av_cold void ff_vp3dsp_init_x86(VP3DSPContext *c, int flags) | |||||
| } | } | ||||
| #endif | #endif | ||||
| if (HAVE_MMX2 && cpuflags & AV_CPU_FLAG_MMX2) { | |||||
| if (HAVE_MMXEXT && cpuflags & AV_CPU_FLAG_MMXEXT) { | |||||
| c->idct_dc_add = ff_vp3_idct_dc_add_mmx2; | c->idct_dc_add = ff_vp3_idct_dc_add_mmx2; | ||||
| if (!(flags & CODEC_FLAG_BITEXACT)) { | if (!(flags & CODEC_FLAG_BITEXACT)) { | ||||
| @@ -350,7 +350,7 @@ av_cold void ff_vp8dsp_init_x86(VP8DSPContext* c) | |||||
| /* note that 4-tap width=16 functions are missing because w=16 | /* note that 4-tap width=16 functions are missing because w=16 | ||||
| * is only used for luma, and luma is always a copy or sixtap. */ | * is only used for luma, and luma is always a copy or sixtap. */ | ||||
| if (mm_flags & AV_CPU_FLAG_MMX2) { | |||||
| if (mm_flags & AV_CPU_FLAG_MMXEXT) { | |||||
| VP8_MC_FUNC(2, 4, mmx2); | VP8_MC_FUNC(2, 4, mmx2); | ||||
| VP8_BILINEAR_MC_FUNC(2, 4, mmx2); | VP8_BILINEAR_MC_FUNC(2, 4, mmx2); | ||||
| #if ARCH_X86_32 | #if ARCH_X86_32 | ||||
| @@ -28,7 +28,7 @@ | |||||
| DECLARE_ALIGNED(16, static const uint16_t, pw_7f)[8] = {0x7F,0x7F,0x7F,0x7F,0x7F,0x7F,0x7F,0x7F}; | DECLARE_ALIGNED(16, static const uint16_t, pw_7f)[8] = {0x7F,0x7F,0x7F,0x7F,0x7F,0x7F,0x7F,0x7F}; | ||||
| DECLARE_ALIGNED(16, static const uint16_t, pw_ff)[8] = {0xFF,0xFF,0xFF,0xFF,0xFF,0xFF,0xFF,0xFF}; | DECLARE_ALIGNED(16, static const uint16_t, pw_ff)[8] = {0xFF,0xFF,0xFF,0xFF,0xFF,0xFF,0xFF,0xFF}; | ||||
| #if HAVE_MMX2 | |||||
| #if HAVE_MMXEXT | |||||
| static void gradfun_filter_line_mmx2(uint8_t *dst, const uint8_t *src, const uint16_t *dc, int width, int thresh, const uint16_t *dithers) | static void gradfun_filter_line_mmx2(uint8_t *dst, const uint8_t *src, const uint16_t *dc, int width, int thresh, const uint16_t *dithers) | ||||
| { | { | ||||
| intptr_t x; | intptr_t x; | ||||
| @@ -173,8 +173,8 @@ av_cold void ff_gradfun_init_x86(GradFunContext *gf) | |||||
| int cpu_flags = av_get_cpu_flags(); | int cpu_flags = av_get_cpu_flags(); | ||||
| #if HAVE_INLINE_ASM | #if HAVE_INLINE_ASM | ||||
| #if HAVE_MMX2 | |||||
| if (cpu_flags & AV_CPU_FLAG_MMX2) | |||||
| #if HAVE_MMXEXT | |||||
| if (cpu_flags & AV_CPU_FLAG_MMXEXT) | |||||
| gf->filter_line = gradfun_filter_line_mmx2; | gf->filter_line = gradfun_filter_line_mmx2; | ||||
| #endif | #endif | ||||
| #if HAVE_SSSE3 | #if HAVE_SSSE3 | ||||
| @@ -45,7 +45,7 @@ DECLARE_ASM_CONST(16, const xmm_reg, pw_1) = {0x0001000100010001ULL, 0x000100010 | |||||
| #undef COMPILE_TEMPLATE_SSE | #undef COMPILE_TEMPLATE_SSE | ||||
| #endif | #endif | ||||
| #if HAVE_MMX2 | |||||
| #if HAVE_MMXEXT | |||||
| #undef RENAME | #undef RENAME | ||||
| #define RENAME(a) a ## _mmx2 | #define RENAME(a) a ## _mmx2 | ||||
| #include "yadif_template.c" | #include "yadif_template.c" | ||||
| @@ -58,8 +58,8 @@ av_cold void ff_yadif_init_x86(YADIFContext *yadif) | |||||
| int cpu_flags = av_get_cpu_flags(); | int cpu_flags = av_get_cpu_flags(); | ||||
| #if HAVE_INLINE_ASM | #if HAVE_INLINE_ASM | ||||
| #if HAVE_MMX2 | |||||
| if (cpu_flags & AV_CPU_FLAG_MMX2) | |||||
| #if HAVE_MMXEXT | |||||
| if (cpu_flags & AV_CPU_FLAG_MMXEXT) | |||||
| yadif->filter_line = yadif_filter_line_mmx2; | yadif->filter_line = yadif_filter_line_mmx2; | ||||
| #endif | #endif | ||||
| #if HAVE_SSE | #if HAVE_SSE | ||||
| @@ -314,7 +314,15 @@ int ff_audio_mix_init(AVAudioResampleContext *avr) | |||||
| } | } | ||||
| /* build matrix if the user did not already set one */ | /* build matrix if the user did not already set one */ | ||||
| if (!avr->am->matrix) { | |||||
| if (avr->am->matrix) { | |||||
| if (avr->am->coeff_type != avr->mix_coeff_type || | |||||
| avr->am->in_layout != avr->in_channel_layout || | |||||
| avr->am->out_layout != avr->out_channel_layout) { | |||||
| av_log(avr, AV_LOG_ERROR, | |||||
| "Custom matrix does not match current parameters\n"); | |||||
| return AVERROR(EINVAL); | |||||
| } | |||||
| } else { | |||||
| int i, j; | int i, j; | ||||
| char in_layout_name[128]; | char in_layout_name[128]; | ||||
| char out_layout_name[128]; | char out_layout_name[128]; | ||||
| @@ -294,8 +294,8 @@ int avresample_get_matrix(AVAudioResampleContext *avr, double *matrix, | |||||
| in_channels = av_get_channel_layout_nb_channels(avr->in_channel_layout); | in_channels = av_get_channel_layout_nb_channels(avr->in_channel_layout); | ||||
| out_channels = av_get_channel_layout_nb_channels(avr->out_channel_layout); | out_channels = av_get_channel_layout_nb_channels(avr->out_channel_layout); | ||||
| if ( in_channels < 0 || in_channels > AVRESAMPLE_MAX_CHANNELS || | |||||
| out_channels < 0 || out_channels > AVRESAMPLE_MAX_CHANNELS) { | |||||
| if ( in_channels <= 0 || in_channels > AVRESAMPLE_MAX_CHANNELS || | |||||
| out_channels <= 0 || out_channels > AVRESAMPLE_MAX_CHANNELS) { | |||||
| av_log(avr, AV_LOG_ERROR, "Invalid channel layouts\n"); | av_log(avr, AV_LOG_ERROR, "Invalid channel layouts\n"); | ||||
| return AVERROR(EINVAL); | return AVERROR(EINVAL); | ||||
| } | } | ||||
| @@ -332,6 +332,7 @@ int avresample_get_matrix(AVAudioResampleContext *avr, double *matrix, | |||||
| av_log(avr, AV_LOG_ERROR, "Invalid mix coeff type\n"); | av_log(avr, AV_LOG_ERROR, "Invalid mix coeff type\n"); | ||||
| return AVERROR(EINVAL); | return AVERROR(EINVAL); | ||||
| } | } | ||||
| return 0; | return 0; | ||||
| } | } | ||||
| @@ -343,14 +344,16 @@ int avresample_set_matrix(AVAudioResampleContext *avr, const double *matrix, | |||||
| in_channels = av_get_channel_layout_nb_channels(avr->in_channel_layout); | in_channels = av_get_channel_layout_nb_channels(avr->in_channel_layout); | ||||
| out_channels = av_get_channel_layout_nb_channels(avr->out_channel_layout); | out_channels = av_get_channel_layout_nb_channels(avr->out_channel_layout); | ||||
| if ( in_channels < 0 || in_channels > AVRESAMPLE_MAX_CHANNELS || | |||||
| out_channels < 0 || out_channels > AVRESAMPLE_MAX_CHANNELS) { | |||||
| if ( in_channels <= 0 || in_channels > AVRESAMPLE_MAX_CHANNELS || | |||||
| out_channels <= 0 || out_channels > AVRESAMPLE_MAX_CHANNELS) { | |||||
| av_log(avr, AV_LOG_ERROR, "Invalid channel layouts\n"); | av_log(avr, AV_LOG_ERROR, "Invalid channel layouts\n"); | ||||
| return AVERROR(EINVAL); | return AVERROR(EINVAL); | ||||
| } | } | ||||
| if (avr->am->matrix) | |||||
| av_freep(avr->am->matrix); | |||||
| if (avr->am->matrix) { | |||||
| av_free(avr->am->matrix[0]); | |||||
| avr->am->matrix = NULL; | |||||
| } | |||||
| #define CONVERT_MATRIX(type, expr) \ | #define CONVERT_MATRIX(type, expr) \ | ||||
| avr->am->matrix_## type[0] = av_mallocz(out_channels * in_channels * \ | avr->am->matrix_## type[0] = av_mallocz(out_channels * in_channels * \ | ||||
| @@ -386,5 +389,11 @@ int avresample_set_matrix(AVAudioResampleContext *avr, const double *matrix, | |||||
| /* TODO: detect situations where we can just swap around pointers | /* TODO: detect situations where we can just swap around pointers | ||||
| instead of doing matrix multiplications with 0.0 and 1.0 */ | instead of doing matrix multiplications with 0.0 and 1.0 */ | ||||
| /* set AudioMix params */ | |||||
| avr->am->in_layout = avr->in_channel_layout; | |||||
| avr->am->out_layout = avr->out_channel_layout; | |||||
| avr->am->in_channels = in_channels; | |||||
| avr->am->out_channels = out_channels; | |||||
| return 0; | return 0; | ||||
| } | } | ||||
| @@ -48,9 +48,8 @@ int avresample_open(AVAudioResampleContext *avr) | |||||
| avr->resample_channels = FFMIN(avr->in_channels, avr->out_channels); | avr->resample_channels = FFMIN(avr->in_channels, avr->out_channels); | ||||
| avr->downmix_needed = avr->in_channels > avr->out_channels; | avr->downmix_needed = avr->in_channels > avr->out_channels; | ||||
| avr->upmix_needed = avr->out_channels > avr->in_channels || | avr->upmix_needed = avr->out_channels > avr->in_channels || | ||||
| avr->am->matrix || | |||||
| (avr->out_channels == avr->in_channels && | |||||
| avr->in_channel_layout != avr->out_channel_layout); | |||||
| (!avr->downmix_needed && (avr->am->matrix || | |||||
| avr->in_channel_layout != avr->out_channel_layout)); | |||||
| avr->mixing_needed = avr->downmix_needed || avr->upmix_needed; | avr->mixing_needed = avr->downmix_needed || avr->upmix_needed; | ||||
| /* set resampling parameters */ | /* set resampling parameters */ | ||||
| @@ -49,10 +49,10 @@ void av_set_cpu_flags_mask(int mask) | |||||
| int av_parse_cpu_flags(const char *s) | int av_parse_cpu_flags(const char *s) | ||||
| { | { | ||||
| #define CPUFLAG_MMX2 (AV_CPU_FLAG_MMX | AV_CPU_FLAG_MMX2 | AV_CPU_FLAG_CMOV) | |||||
| #define CPUFLAG_MMXEXT (AV_CPU_FLAG_MMX | AV_CPU_FLAG_MMXEXT | AV_CPU_FLAG_CMOV) | |||||
| #define CPUFLAG_3DNOW (AV_CPU_FLAG_3DNOW | AV_CPU_FLAG_MMX) | #define CPUFLAG_3DNOW (AV_CPU_FLAG_3DNOW | AV_CPU_FLAG_MMX) | ||||
| #define CPUFLAG_3DNOWEXT (AV_CPU_FLAG_3DNOWEXT | CPUFLAG_3DNOW) | #define CPUFLAG_3DNOWEXT (AV_CPU_FLAG_3DNOWEXT | CPUFLAG_3DNOW) | ||||
| #define CPUFLAG_SSE (AV_CPU_FLAG_SSE | CPUFLAG_MMX2) | |||||
| #define CPUFLAG_SSE (AV_CPU_FLAG_SSE | CPUFLAG_MMXEXT) | |||||
| #define CPUFLAG_SSE2 (AV_CPU_FLAG_SSE2 | CPUFLAG_SSE) | #define CPUFLAG_SSE2 (AV_CPU_FLAG_SSE2 | CPUFLAG_SSE) | ||||
| #define CPUFLAG_SSE2SLOW (AV_CPU_FLAG_SSE2SLOW | CPUFLAG_SSE2) | #define CPUFLAG_SSE2SLOW (AV_CPU_FLAG_SSE2SLOW | CPUFLAG_SSE2) | ||||
| #define CPUFLAG_SSE3 (AV_CPU_FLAG_SSE3 | CPUFLAG_SSE2) | #define CPUFLAG_SSE3 (AV_CPU_FLAG_SSE3 | CPUFLAG_SSE2) | ||||
| @@ -69,7 +69,7 @@ int av_parse_cpu_flags(const char *s) | |||||
| { "altivec" , NULL, 0, AV_OPT_TYPE_CONST, { AV_CPU_FLAG_ALTIVEC }, .unit = "flags" }, | { "altivec" , NULL, 0, AV_OPT_TYPE_CONST, { AV_CPU_FLAG_ALTIVEC }, .unit = "flags" }, | ||||
| #elif ARCH_X86 | #elif ARCH_X86 | ||||
| { "mmx" , NULL, 0, AV_OPT_TYPE_CONST, { AV_CPU_FLAG_MMX }, .unit = "flags" }, | { "mmx" , NULL, 0, AV_OPT_TYPE_CONST, { AV_CPU_FLAG_MMX }, .unit = "flags" }, | ||||
| { "mmx2" , NULL, 0, AV_OPT_TYPE_CONST, { CPUFLAG_MMX2 }, .unit = "flags" }, | |||||
| { "mmxext" , NULL, 0, AV_OPT_TYPE_CONST, { CPUFLAG_MMXEXT }, .unit = "flags" }, | |||||
| { "sse" , NULL, 0, AV_OPT_TYPE_CONST, { CPUFLAG_SSE }, .unit = "flags" }, | { "sse" , NULL, 0, AV_OPT_TYPE_CONST, { CPUFLAG_SSE }, .unit = "flags" }, | ||||
| { "sse2" , NULL, 0, AV_OPT_TYPE_CONST, { CPUFLAG_SSE2 }, .unit = "flags" }, | { "sse2" , NULL, 0, AV_OPT_TYPE_CONST, { CPUFLAG_SSE2 }, .unit = "flags" }, | ||||
| { "sse2slow", NULL, 0, AV_OPT_TYPE_CONST, { CPUFLAG_SSE2SLOW }, .unit = "flags" }, | { "sse2slow", NULL, 0, AV_OPT_TYPE_CONST, { CPUFLAG_SSE2SLOW }, .unit = "flags" }, | ||||
| @@ -174,7 +174,7 @@ static const struct { | |||||
| { AV_CPU_FLAG_ALTIVEC, "altivec" }, | { AV_CPU_FLAG_ALTIVEC, "altivec" }, | ||||
| #elif ARCH_X86 | #elif ARCH_X86 | ||||
| { AV_CPU_FLAG_MMX, "mmx" }, | { AV_CPU_FLAG_MMX, "mmx" }, | ||||
| { AV_CPU_FLAG_MMX2, "mmx2" }, | |||||
| { AV_CPU_FLAG_MMXEXT, "mmxext" }, | |||||
| { AV_CPU_FLAG_SSE, "sse" }, | { AV_CPU_FLAG_SSE, "sse" }, | ||||
| { AV_CPU_FLAG_SSE2, "sse2" }, | { AV_CPU_FLAG_SSE2, "sse2" }, | ||||
| { AV_CPU_FLAG_SSE2SLOW, "sse2(slow)" }, | { AV_CPU_FLAG_SSE2SLOW, "sse2(slow)" }, | ||||
| @@ -27,6 +27,7 @@ | |||||
| /* lower 16 bits - CPU features */ | /* lower 16 bits - CPU features */ | ||||
| #define AV_CPU_FLAG_MMX 0x0001 ///< standard MMX | #define AV_CPU_FLAG_MMX 0x0001 ///< standard MMX | ||||
| #define AV_CPU_FLAG_MMXEXT 0x0002 ///< SSE integer functions or AMD MMX ext | |||||
| #define AV_CPU_FLAG_MMX2 0x0002 ///< SSE integer functions or AMD MMX ext | #define AV_CPU_FLAG_MMX2 0x0002 ///< SSE integer functions or AMD MMX ext | ||||
| #define AV_CPU_FLAG_3DNOW 0x0004 ///< AMD 3DNOW | #define AV_CPU_FLAG_3DNOW 0x0004 ///< AMD 3DNOW | ||||
| #define AV_CPU_FLAG_SSE 0x0008 ///< SSE functions | #define AV_CPU_FLAG_SSE 0x0008 ///< SSE functions | ||||
| @@ -33,6 +33,7 @@ unsigned avutil_version(void) | |||||
| av_assert0(AVMEDIA_TYPE_ATTACHMENT == 4); | av_assert0(AVMEDIA_TYPE_ATTACHMENT == 4); | ||||
| av_assert0(AV_PICTURE_TYPE_BI == 7); | av_assert0(AV_PICTURE_TYPE_BI == 7); | ||||
| av_assert0(LIBAVUTIL_VERSION_MICRO >= 100); | av_assert0(LIBAVUTIL_VERSION_MICRO >= 100); | ||||
| av_assert0(HAVE_MMX2 == HAVE_MMXEXT); | |||||
| return LIBAVUTIL_VERSION_INT; | return LIBAVUTIL_VERSION_INT; | ||||
| } | } | ||||
| @@ -40,7 +40,7 @@ | |||||
| #define LIBAVUTIL_VERSION_MAJOR 51 | #define LIBAVUTIL_VERSION_MAJOR 51 | ||||
| #define LIBAVUTIL_VERSION_MINOR 66 | #define LIBAVUTIL_VERSION_MINOR 66 | ||||
| #define LIBAVUTIL_VERSION_MICRO 100 | |||||
| #define LIBAVUTIL_VERSION_MICRO 101 | |||||
| #define LIBAVUTIL_VERSION_INT AV_VERSION_INT(LIBAVUTIL_VERSION_MAJOR, \ | #define LIBAVUTIL_VERSION_INT AV_VERSION_INT(LIBAVUTIL_VERSION_MAJOR, \ | ||||
| LIBAVUTIL_VERSION_MINOR, \ | LIBAVUTIL_VERSION_MINOR, \ | ||||
| @@ -122,7 +122,7 @@ int ff_get_cpu_flags_x86(void) | |||||
| if (std_caps & (1 << 23)) | if (std_caps & (1 << 23)) | ||||
| rval |= AV_CPU_FLAG_MMX; | rval |= AV_CPU_FLAG_MMX; | ||||
| if (std_caps & (1 << 25)) | if (std_caps & (1 << 25)) | ||||
| rval |= AV_CPU_FLAG_MMX2; | |||||
| rval |= AV_CPU_FLAG_MMXEXT; | |||||
| #if HAVE_SSE | #if HAVE_SSE | ||||
| if (std_caps & (1 << 25)) | if (std_caps & (1 << 25)) | ||||
| rval |= AV_CPU_FLAG_SSE; | rval |= AV_CPU_FLAG_SSE; | ||||
| @@ -159,7 +159,7 @@ int ff_get_cpu_flags_x86(void) | |||||
| if (ext_caps & (1 << 23)) | if (ext_caps & (1 << 23)) | ||||
| rval |= AV_CPU_FLAG_MMX; | rval |= AV_CPU_FLAG_MMX; | ||||
| if (ext_caps & (1 << 22)) | if (ext_caps & (1 << 22)) | ||||
| rval |= AV_CPU_FLAG_MMX2; | |||||
| rval |= AV_CPU_FLAG_MMXEXT; | |||||
| /* Allow for selectively disabling SSE2 functions on AMD processors | /* Allow for selectively disabling SSE2 functions on AMD processors | ||||
| with SSE2 support but not SSE4a. This includes Athlon64, some | with SSE2 support but not SSE4a. This includes Athlon64, some | ||||
| @@ -663,8 +663,8 @@ static int swScale(SwsContext *c, const uint8_t *src[], | |||||
| if (isPlanar(dstFormat) && isALPHA(dstFormat) && !alpPixBuf) | if (isPlanar(dstFormat) && isALPHA(dstFormat) && !alpPixBuf) | ||||
| fillPlane(dst[3], dstStride[3], dstW, dstY - lastDstY, lastDstY, 255); | fillPlane(dst[3], dstStride[3], dstW, dstY - lastDstY, lastDstY, 255); | ||||
| #if HAVE_MMX2 && HAVE_INLINE_ASM | |||||
| if (av_get_cpu_flags() & AV_CPU_FLAG_MMX2) | |||||
| #if HAVE_MMXEXT && HAVE_INLINE_ASM | |||||
| if (av_get_cpu_flags() & AV_CPU_FLAG_MMXEXT) | |||||
| __asm__ volatile ("sfence" ::: "memory"); | __asm__ volatile ("sfence" ::: "memory"); | ||||
| #endif | #endif | ||||
| emms_c(); | emms_c(); | ||||
| @@ -82,7 +82,10 @@ const char *swscale_license(void); | |||||
| * are only provided for API compatibility. | * are only provided for API compatibility. | ||||
| */ | */ | ||||
| #define SWS_CPU_CAPS_MMX 0x80000000 | #define SWS_CPU_CAPS_MMX 0x80000000 | ||||
| #define SWS_CPU_CAPS_MMXEXT 0x20000000 | |||||
| #if LIBSWSCALE_VERSION_MAJOR < 3 | |||||
| #define SWS_CPU_CAPS_MMX2 0x20000000 | #define SWS_CPU_CAPS_MMX2 0x20000000 | ||||
| #endif | |||||
| #define SWS_CPU_CAPS_3DNOW 0x40000000 | #define SWS_CPU_CAPS_3DNOW 0x40000000 | ||||
| #define SWS_CPU_CAPS_ALTIVEC 0x10000000 | #define SWS_CPU_CAPS_ALTIVEC 0x10000000 | ||||
| #define SWS_CPU_CAPS_BFIN 0x01000000 | #define SWS_CPU_CAPS_BFIN 0x01000000 | ||||
| @@ -599,7 +599,7 @@ fail: | |||||
| return ret; | return ret; | ||||
| } | } | ||||
| #if HAVE_MMX2 && HAVE_INLINE_ASM | |||||
| #if HAVE_MMXEXT && HAVE_INLINE_ASM | |||||
| static int initMMX2HScaler(int dstW, int xInc, uint8_t *filterCode, | static int initMMX2HScaler(int dstW, int xInc, uint8_t *filterCode, | ||||
| int16_t *filter, int32_t *filterPos, int numSplits) | int16_t *filter, int32_t *filterPos, int numSplits) | ||||
| { | { | ||||
| @@ -762,7 +762,7 @@ static int initMMX2HScaler(int dstW, int xInc, uint8_t *filterCode, | |||||
| return fragmentPos + 1; | return fragmentPos + 1; | ||||
| } | } | ||||
| #endif /* HAVE_MMX2 && HAVE_INLINE_ASM */ | |||||
| #endif /* HAVE_MMXEXT && HAVE_INLINE_ASM */ | |||||
| static void getSubSampleFactors(int *h, int *v, enum PixelFormat format) | static void getSubSampleFactors(int *h, int *v, enum PixelFormat format) | ||||
| { | { | ||||
| @@ -1024,7 +1024,7 @@ av_cold int sws_init_context(SwsContext *c, SwsFilter *srcFilter, | |||||
| c->srcBpc = 16; | c->srcBpc = 16; | ||||
| if (c->dstBpc == 16) | if (c->dstBpc == 16) | ||||
| dst_stride <<= 1; | dst_stride <<= 1; | ||||
| if (HAVE_MMX2 && HAVE_INLINE_ASM && cpu_flags & AV_CPU_FLAG_MMX2 && | |||||
| if (HAVE_MMXEXT && HAVE_INLINE_ASM && cpu_flags & AV_CPU_FLAG_MMXEXT && | |||||
| c->srcBpc == 8 && c->dstBpc <= 14) { | c->srcBpc == 8 && c->dstBpc <= 14) { | ||||
| c->canMMX2BeUsed = (dstW >= srcW && (dstW & 31) == 0 && | c->canMMX2BeUsed = (dstW >= srcW && (dstW & 31) == 0 && | ||||
| (srcW & 15) == 0) ? 1 : 0; | (srcW & 15) == 0) ? 1 : 0; | ||||
| @@ -1063,7 +1063,7 @@ av_cold int sws_init_context(SwsContext *c, SwsFilter *srcFilter, | |||||
| /* precalculate horizontal scaler filter coefficients */ | /* precalculate horizontal scaler filter coefficients */ | ||||
| { | { | ||||
| #if HAVE_MMX2 && HAVE_INLINE_ASM | |||||
| #if HAVE_MMXEXT && HAVE_INLINE_ASM | |||||
| // can't downscale !!! | // can't downscale !!! | ||||
| if (c->canMMX2BeUsed && (flags & SWS_FAST_BILINEAR)) { | if (c->canMMX2BeUsed && (flags & SWS_FAST_BILINEAR)) { | ||||
| c->lumMmx2FilterCodeSize = initMMX2HScaler(dstW, c->lumXInc, NULL, | c->lumMmx2FilterCodeSize = initMMX2HScaler(dstW, c->lumXInc, NULL, | ||||
| @@ -1107,7 +1107,7 @@ av_cold int sws_init_context(SwsContext *c, SwsFilter *srcFilter, | |||||
| mprotect(c->chrMmx2FilterCode, c->chrMmx2FilterCodeSize, PROT_EXEC | PROT_READ); | mprotect(c->chrMmx2FilterCode, c->chrMmx2FilterCodeSize, PROT_EXEC | PROT_READ); | ||||
| #endif | #endif | ||||
| } else | } else | ||||
| #endif /* HAVE_MMX2 && HAVE_INLINE_ASM */ | |||||
| #endif /* HAVE_MMXEXT && HAVE_INLINE_ASM */ | |||||
| { | { | ||||
| const int filterAlign = | const int filterAlign = | ||||
| (HAVE_MMX && cpu_flags & AV_CPU_FLAG_MMX) ? 4 : | (HAVE_MMX && cpu_flags & AV_CPU_FLAG_MMX) ? 4 : | ||||
| @@ -1273,7 +1273,7 @@ av_cold int sws_init_context(SwsContext *c, SwsFilter *srcFilter, | |||||
| #endif | #endif | ||||
| av_get_pix_fmt_name(dstFormat)); | av_get_pix_fmt_name(dstFormat)); | ||||
| if (HAVE_MMX2 && cpu_flags & AV_CPU_FLAG_MMX2) | |||||
| if (HAVE_MMXEXT && cpu_flags & AV_CPU_FLAG_MMXEXT) | |||||
| av_log(c, AV_LOG_INFO, "using MMX2\n"); | av_log(c, AV_LOG_INFO, "using MMX2\n"); | ||||
| else if (HAVE_AMD3DNOW && cpu_flags & AV_CPU_FLAG_3DNOW) | else if (HAVE_AMD3DNOW && cpu_flags & AV_CPU_FLAG_3DNOW) | ||||
| av_log(c, AV_LOG_INFO, "using 3DNOW\n"); | av_log(c, AV_LOG_INFO, "using 3DNOW\n"); | ||||
| @@ -28,7 +28,7 @@ | |||||
| #define LIBSWSCALE_VERSION_MAJOR 2 | #define LIBSWSCALE_VERSION_MAJOR 2 | ||||
| #define LIBSWSCALE_VERSION_MINOR 1 | #define LIBSWSCALE_VERSION_MINOR 1 | ||||
| #define LIBSWSCALE_VERSION_MICRO 100 | |||||
| #define LIBSWSCALE_VERSION_MICRO 101 | |||||
| #define LIBSWSCALE_VERSION_INT AV_VERSION_INT(LIBSWSCALE_VERSION_MAJOR, \ | #define LIBSWSCALE_VERSION_INT AV_VERSION_INT(LIBSWSCALE_VERSION_MAJOR, \ | ||||
| LIBSWSCALE_VERSION_MINOR, \ | LIBSWSCALE_VERSION_MINOR, \ | ||||
| @@ -88,7 +88,7 @@ DECLARE_ASM_CONST(8, uint64_t, mul16_mid) = 0x2080208020802080ULL; | |||||
| //Note: We have C, MMX, MMX2, 3DNOW versions, there is no 3DNOW + MMX2 one. | //Note: We have C, MMX, MMX2, 3DNOW versions, there is no 3DNOW + MMX2 one. | ||||
| #define COMPILE_TEMPLATE_MMX2 0 | |||||
| #define COMPILE_TEMPLATE_MMXEXT 0 | |||||
| #define COMPILE_TEMPLATE_AMD3DNOW 0 | #define COMPILE_TEMPLATE_AMD3DNOW 0 | ||||
| #define COMPILE_TEMPLATE_SSE2 0 | #define COMPILE_TEMPLATE_SSE2 0 | ||||
| @@ -99,8 +99,8 @@ DECLARE_ASM_CONST(8, uint64_t, mul16_mid) = 0x2080208020802080ULL; | |||||
| //MMX2 versions | //MMX2 versions | ||||
| #undef RENAME | #undef RENAME | ||||
| #undef COMPILE_TEMPLATE_MMX2 | |||||
| #define COMPILE_TEMPLATE_MMX2 1 | |||||
| #undef COMPILE_TEMPLATE_MMXEXT | |||||
| #define COMPILE_TEMPLATE_MMXEXT 1 | |||||
| #define RENAME(a) a ## _MMX2 | #define RENAME(a) a ## _MMX2 | ||||
| #include "rgb2rgb_template.c" | #include "rgb2rgb_template.c" | ||||
| @@ -113,10 +113,10 @@ DECLARE_ASM_CONST(8, uint64_t, mul16_mid) = 0x2080208020802080ULL; | |||||
| //3DNOW versions | //3DNOW versions | ||||
| #undef RENAME | #undef RENAME | ||||
| #undef COMPILE_TEMPLATE_MMX2 | |||||
| #undef COMPILE_TEMPLATE_MMXEXT | |||||
| #undef COMPILE_TEMPLATE_SSE2 | #undef COMPILE_TEMPLATE_SSE2 | ||||
| #undef COMPILE_TEMPLATE_AMD3DNOW | #undef COMPILE_TEMPLATE_AMD3DNOW | ||||
| #define COMPILE_TEMPLATE_MMX2 0 | |||||
| #define COMPILE_TEMPLATE_MMXEXT 0 | |||||
| #define COMPILE_TEMPLATE_SSE2 0 | #define COMPILE_TEMPLATE_SSE2 0 | ||||
| #define COMPILE_TEMPLATE_AMD3DNOW 1 | #define COMPILE_TEMPLATE_AMD3DNOW 1 | ||||
| #define RENAME(a) a ## _3DNOW | #define RENAME(a) a ## _3DNOW | ||||
| @@ -140,7 +140,7 @@ av_cold void rgb2rgb_init_x86(void) | |||||
| rgb2rgb_init_MMX(); | rgb2rgb_init_MMX(); | ||||
| if (HAVE_AMD3DNOW && cpu_flags & AV_CPU_FLAG_3DNOW) | if (HAVE_AMD3DNOW && cpu_flags & AV_CPU_FLAG_3DNOW) | ||||
| rgb2rgb_init_3DNOW(); | rgb2rgb_init_3DNOW(); | ||||
| if (HAVE_MMX2 && cpu_flags & AV_CPU_FLAG_MMX2) | |||||
| if (HAVE_MMXEXT && cpu_flags & AV_CPU_FLAG_MMXEXT) | |||||
| rgb2rgb_init_MMX2(); | rgb2rgb_init_MMX2(); | ||||
| if (HAVE_SSE && cpu_flags & AV_CPU_FLAG_SSE2) | if (HAVE_SSE && cpu_flags & AV_CPU_FLAG_SSE2) | ||||
| rgb2rgb_init_SSE2(); | rgb2rgb_init_SSE2(); | ||||
| @@ -35,7 +35,7 @@ | |||||
| #if COMPILE_TEMPLATE_AMD3DNOW | #if COMPILE_TEMPLATE_AMD3DNOW | ||||
| #define PREFETCH "prefetch" | #define PREFETCH "prefetch" | ||||
| #define PAVGB "pavgusb" | #define PAVGB "pavgusb" | ||||
| #elif COMPILE_TEMPLATE_MMX2 | |||||
| #elif COMPILE_TEMPLATE_MMXEXT | |||||
| #define PREFETCH "prefetchnta" | #define PREFETCH "prefetchnta" | ||||
| #define PAVGB "pavgb" | #define PAVGB "pavgb" | ||||
| #else | #else | ||||
| @@ -49,7 +49,7 @@ | |||||
| #define EMMS "emms" | #define EMMS "emms" | ||||
| #endif | #endif | ||||
| #if COMPILE_TEMPLATE_MMX2 | |||||
| #if COMPILE_TEMPLATE_MMXEXT | |||||
| #define MOVNTQ "movntq" | #define MOVNTQ "movntq" | ||||
| #define SFENCE "sfence" | #define SFENCE "sfence" | ||||
| #else | #else | ||||
| @@ -1136,7 +1136,7 @@ static inline void RENAME(shuffle_bytes_2103)(const uint8_t *src, uint8_t *dst, | |||||
| PREFETCH" 32(%1, %0) \n\t" | PREFETCH" 32(%1, %0) \n\t" | ||||
| "movq (%1, %0), %%mm0 \n\t" | "movq (%1, %0), %%mm0 \n\t" | ||||
| "movq 8(%1, %0), %%mm1 \n\t" | "movq 8(%1, %0), %%mm1 \n\t" | ||||
| # if COMPILE_TEMPLATE_MMX2 | |||||
| # if COMPILE_TEMPLATE_MMXEXT | |||||
| "pshufw $177, %%mm0, %%mm3 \n\t" | "pshufw $177, %%mm0, %%mm3 \n\t" | ||||
| "pshufw $177, %%mm1, %%mm5 \n\t" | "pshufw $177, %%mm1, %%mm5 \n\t" | ||||
| "pand %%mm7, %%mm0 \n\t" | "pand %%mm7, %%mm0 \n\t" | ||||
| @@ -1500,7 +1500,7 @@ static inline void RENAME(yuy2toyv12)(const uint8_t *src, uint8_t *ydst, uint8_t | |||||
| } | } | ||||
| #endif /* !COMPILE_TEMPLATE_AMD3DNOW */ | #endif /* !COMPILE_TEMPLATE_AMD3DNOW */ | ||||
| #if COMPILE_TEMPLATE_MMX2 || COMPILE_TEMPLATE_AMD3DNOW | |||||
| #if COMPILE_TEMPLATE_MMXEXT || COMPILE_TEMPLATE_AMD3DNOW | |||||
| static inline void RENAME(planar2x)(const uint8_t *src, uint8_t *dst, int srcWidth, int srcHeight, int srcStride, int dstStride) | static inline void RENAME(planar2x)(const uint8_t *src, uint8_t *dst, int srcWidth, int srcHeight, int srcStride, int dstStride) | ||||
| { | { | ||||
| int x,y; | int x,y; | ||||
| @@ -1590,7 +1590,7 @@ static inline void RENAME(planar2x)(const uint8_t *src, uint8_t *dst, int srcWid | |||||
| SFENCE" \n\t" | SFENCE" \n\t" | ||||
| :::"memory"); | :::"memory"); | ||||
| } | } | ||||
| #endif /* COMPILE_TEMPLATE_MMX2 || COMPILE_TEMPLATE_AMD3DNOW */ | |||||
| #endif /* COMPILE_TEMPLATE_MMXEXT || COMPILE_TEMPLATE_AMD3DNOW */ | |||||
| #if !COMPILE_TEMPLATE_AMD3DNOW | #if !COMPILE_TEMPLATE_AMD3DNOW | ||||
| /** | /** | ||||
| @@ -1798,7 +1798,7 @@ static inline void RENAME(rgb24toyv12)(const uint8_t *src, uint8_t *ydst, uint8_ | |||||
| "1: \n\t" | "1: \n\t" | ||||
| PREFETCH" 64(%0, %%"REG_d") \n\t" | PREFETCH" 64(%0, %%"REG_d") \n\t" | ||||
| PREFETCH" 64(%1, %%"REG_d") \n\t" | PREFETCH" 64(%1, %%"REG_d") \n\t" | ||||
| #if COMPILE_TEMPLATE_MMX2 || COMPILE_TEMPLATE_AMD3DNOW | |||||
| #if COMPILE_TEMPLATE_MMXEXT || COMPILE_TEMPLATE_AMD3DNOW | |||||
| "movq (%0, %%"REG_d"), %%mm0 \n\t" | "movq (%0, %%"REG_d"), %%mm0 \n\t" | ||||
| "movq (%1, %%"REG_d"), %%mm1 \n\t" | "movq (%1, %%"REG_d"), %%mm1 \n\t" | ||||
| "movq 6(%0, %%"REG_d"), %%mm2 \n\t" | "movq 6(%0, %%"REG_d"), %%mm2 \n\t" | ||||
| @@ -1859,7 +1859,7 @@ static inline void RENAME(rgb24toyv12)(const uint8_t *src, uint8_t *ydst, uint8_ | |||||
| "packssdw %%mm1, %%mm0 \n\t" // V1 V0 U1 U0 | "packssdw %%mm1, %%mm0 \n\t" // V1 V0 U1 U0 | ||||
| "psraw $7, %%mm0 \n\t" | "psraw $7, %%mm0 \n\t" | ||||
| #if COMPILE_TEMPLATE_MMX2 || COMPILE_TEMPLATE_AMD3DNOW | |||||
| #if COMPILE_TEMPLATE_MMXEXT || COMPILE_TEMPLATE_AMD3DNOW | |||||
| "movq 12(%0, %%"REG_d"), %%mm4 \n\t" | "movq 12(%0, %%"REG_d"), %%mm4 \n\t" | ||||
| "movq 12(%1, %%"REG_d"), %%mm1 \n\t" | "movq 12(%1, %%"REG_d"), %%mm1 \n\t" | ||||
| "movq 18(%0, %%"REG_d"), %%mm2 \n\t" | "movq 18(%0, %%"REG_d"), %%mm2 \n\t" | ||||
| @@ -2580,9 +2580,9 @@ static inline void RENAME(rgb2rgb_init)(void) | |||||
| yuyvtoyuv422 = RENAME(yuyvtoyuv422); | yuyvtoyuv422 = RENAME(yuyvtoyuv422); | ||||
| #endif /* !COMPILE_TEMPLATE_SSE2 */ | #endif /* !COMPILE_TEMPLATE_SSE2 */ | ||||
| #if COMPILE_TEMPLATE_MMX2 || COMPILE_TEMPLATE_AMD3DNOW | |||||
| #if COMPILE_TEMPLATE_MMXEXT || COMPILE_TEMPLATE_AMD3DNOW | |||||
| planar2x = RENAME(planar2x); | planar2x = RENAME(planar2x); | ||||
| #endif /* COMPILE_TEMPLATE_MMX2 || COMPILE_TEMPLATE_AMD3DNOW */ | |||||
| #endif /* COMPILE_TEMPLATE_MMXEXT || COMPILE_TEMPLATE_AMD3DNOW */ | |||||
| rgb24toyv12 = RENAME(rgb24toyv12); | rgb24toyv12 = RENAME(rgb24toyv12); | ||||
| yuyvtoyuv420 = RENAME(yuyvtoyuv420); | yuyvtoyuv420 = RENAME(yuyvtoyuv420); | ||||
| @@ -74,16 +74,16 @@ DECLARE_ALIGNED(8, const uint64_t, ff_w1111) = 0x0001000100010001ULL; | |||||
| //MMX versions | //MMX versions | ||||
| #if HAVE_MMX | #if HAVE_MMX | ||||
| #undef RENAME | #undef RENAME | ||||
| #define COMPILE_TEMPLATE_MMX2 0 | |||||
| #define COMPILE_TEMPLATE_MMXEXT 0 | |||||
| #define RENAME(a) a ## _MMX | #define RENAME(a) a ## _MMX | ||||
| #include "swscale_template.c" | #include "swscale_template.c" | ||||
| #endif | #endif | ||||
| //MMX2 versions | //MMX2 versions | ||||
| #if HAVE_MMX2 | |||||
| #if HAVE_MMXEXT | |||||
| #undef RENAME | #undef RENAME | ||||
| #undef COMPILE_TEMPLATE_MMX2 | |||||
| #define COMPILE_TEMPLATE_MMX2 1 | |||||
| #undef COMPILE_TEMPLATE_MMXEXT | |||||
| #define COMPILE_TEMPLATE_MMXEXT 1 | |||||
| #define RENAME(a) a ## _MMX2 | #define RENAME(a) a ## _MMX2 | ||||
| #include "swscale_template.c" | #include "swscale_template.c" | ||||
| #endif | #endif | ||||
| @@ -375,8 +375,8 @@ av_cold void ff_sws_init_swScale_mmx(SwsContext *c) | |||||
| #if HAVE_INLINE_ASM | #if HAVE_INLINE_ASM | ||||
| if (cpu_flags & AV_CPU_FLAG_MMX) | if (cpu_flags & AV_CPU_FLAG_MMX) | ||||
| sws_init_swScale_MMX(c); | sws_init_swScale_MMX(c); | ||||
| #if HAVE_MMX2 | |||||
| if (cpu_flags & AV_CPU_FLAG_MMX2) | |||||
| #if HAVE_MMXEXT | |||||
| if (cpu_flags & AV_CPU_FLAG_MMXEXT) | |||||
| sws_init_swScale_MMX2(c); | sws_init_swScale_MMX2(c); | ||||
| if (cpu_flags & AV_CPU_FLAG_SSE3){ | if (cpu_flags & AV_CPU_FLAG_SSE3){ | ||||
| if(c->use_mmx_vfilter && !(c->flags & SWS_ACCURATE_RND)) | if(c->use_mmx_vfilter && !(c->flags & SWS_ACCURATE_RND)) | ||||
| @@ -439,7 +439,7 @@ switch(c->dstBpc){ \ | |||||
| if (cpu_flags & AV_CPU_FLAG_MMX) { | if (cpu_flags & AV_CPU_FLAG_MMX) { | ||||
| ASSIGN_MMX_SCALE_FUNC(c->hyScale, c->hLumFilterSize, mmx, mmx); | ASSIGN_MMX_SCALE_FUNC(c->hyScale, c->hLumFilterSize, mmx, mmx); | ||||
| ASSIGN_MMX_SCALE_FUNC(c->hcScale, c->hChrFilterSize, mmx, mmx); | ASSIGN_MMX_SCALE_FUNC(c->hcScale, c->hChrFilterSize, mmx, mmx); | ||||
| ASSIGN_VSCALE_FUNC(c->yuv2plane1, mmx, mmx2, cpu_flags & AV_CPU_FLAG_MMX2); | |||||
| ASSIGN_VSCALE_FUNC(c->yuv2plane1, mmx, mmx2, cpu_flags & AV_CPU_FLAG_MMXEXT); | |||||
| switch (c->srcFormat) { | switch (c->srcFormat) { | ||||
| case PIX_FMT_Y400A: | case PIX_FMT_Y400A: | ||||
| @@ -471,7 +471,7 @@ switch(c->dstBpc){ \ | |||||
| break; | break; | ||||
| } | } | ||||
| } | } | ||||
| if (cpu_flags & AV_CPU_FLAG_MMX2) { | |||||
| if (cpu_flags & AV_CPU_FLAG_MMXEXT) { | |||||
| ASSIGN_VSCALEX_FUNC(c->yuv2planeX, mmx2, , 1); | ASSIGN_VSCALEX_FUNC(c->yuv2planeX, mmx2, , 1); | ||||
| } | } | ||||
| #endif | #endif | ||||
| @@ -23,13 +23,13 @@ | |||||
| #undef MOVNTQ2 | #undef MOVNTQ2 | ||||
| #undef PREFETCH | #undef PREFETCH | ||||
| #if COMPILE_TEMPLATE_MMX2 | |||||
| #if COMPILE_TEMPLATE_MMXEXT | |||||
| #define PREFETCH "prefetchnta" | #define PREFETCH "prefetchnta" | ||||
| #else | #else | ||||
| #define PREFETCH " # nop" | #define PREFETCH " # nop" | ||||
| #endif | #endif | ||||
| #if COMPILE_TEMPLATE_MMX2 | |||||
| #if COMPILE_TEMPLATE_MMXEXT | |||||
| #define REAL_MOVNTQ(a,b) "movntq " #a ", " #b " \n\t" | #define REAL_MOVNTQ(a,b) "movntq " #a ", " #b " \n\t" | ||||
| #define MOVNTQ2 "movntq " | #define MOVNTQ2 "movntq " | ||||
| #else | #else | ||||
| @@ -38,7 +38,7 @@ | |||||
| #endif | #endif | ||||
| #define MOVNTQ(a,b) REAL_MOVNTQ(a,b) | #define MOVNTQ(a,b) REAL_MOVNTQ(a,b) | ||||
| #if !COMPILE_TEMPLATE_MMX2 | |||||
| #if !COMPILE_TEMPLATE_MMXEXT | |||||
| static av_always_inline void | static av_always_inline void | ||||
| dither_8to16(const uint8_t *srcDither, int rot) | dither_8to16(const uint8_t *srcDither, int rot) | ||||
| { | { | ||||
| @@ -641,7 +641,7 @@ static void RENAME(yuv2rgb555_X)(SwsContext *c, const int16_t *lumFilter, | |||||
| "cmp "#dstw", "#index" \n\t"\ | "cmp "#dstw", "#index" \n\t"\ | ||||
| " jb 1b \n\t" | " jb 1b \n\t" | ||||
| #if COMPILE_TEMPLATE_MMX2 | |||||
| #if COMPILE_TEMPLATE_MMXEXT | |||||
| #undef WRITEBGR24 | #undef WRITEBGR24 | ||||
| #define WRITEBGR24(dst, dstw, index) WRITEBGR24MMX2(dst, dstw, index) | #define WRITEBGR24(dst, dstw, index) WRITEBGR24MMX2(dst, dstw, index) | ||||
| #else | #else | ||||
| @@ -1445,7 +1445,7 @@ static void RENAME(yuv2yuyv422_1)(SwsContext *c, const int16_t *buf0, | |||||
| } | } | ||||
| } | } | ||||
| #if COMPILE_TEMPLATE_MMX2 | |||||
| #if COMPILE_TEMPLATE_MMXEXT | |||||
| static void RENAME(hyscale_fast)(SwsContext *c, int16_t *dst, | static void RENAME(hyscale_fast)(SwsContext *c, int16_t *dst, | ||||
| int dstWidth, const uint8_t *src, | int dstWidth, const uint8_t *src, | ||||
| int srcW, int xInc) | int srcW, int xInc) | ||||
| @@ -1627,7 +1627,7 @@ static void RENAME(hcscale_fast)(SwsContext *c, int16_t *dst1, int16_t *dst2, | |||||
| dst2[i] = src2[srcW-1]*128; | dst2[i] = src2[srcW-1]*128; | ||||
| } | } | ||||
| } | } | ||||
| #endif /* COMPILE_TEMPLATE_MMX2 */ | |||||
| #endif /* COMPILE_TEMPLATE_MMXEXT */ | |||||
| static av_cold void RENAME(sws_init_swScale)(SwsContext *c) | static av_cold void RENAME(sws_init_swScale)(SwsContext *c) | ||||
| { | { | ||||
| @@ -1691,17 +1691,17 @@ static av_cold void RENAME(sws_init_swScale)(SwsContext *c) | |||||
| if (c->srcBpc == 8 && c->dstBpc <= 14) { | if (c->srcBpc == 8 && c->dstBpc <= 14) { | ||||
| // Use the new MMX scaler if the MMX2 one can't be used (it is faster than the x86 ASM one). | // Use the new MMX scaler if the MMX2 one can't be used (it is faster than the x86 ASM one). | ||||
| #if COMPILE_TEMPLATE_MMX2 | |||||
| #if COMPILE_TEMPLATE_MMXEXT | |||||
| if (c->flags & SWS_FAST_BILINEAR && c->canMMX2BeUsed) | if (c->flags & SWS_FAST_BILINEAR && c->canMMX2BeUsed) | ||||
| { | { | ||||
| c->hyscale_fast = RENAME(hyscale_fast); | c->hyscale_fast = RENAME(hyscale_fast); | ||||
| c->hcscale_fast = RENAME(hcscale_fast); | c->hcscale_fast = RENAME(hcscale_fast); | ||||
| } else { | } else { | ||||
| #endif /* COMPILE_TEMPLATE_MMX2 */ | |||||
| #endif /* COMPILE_TEMPLATE_MMXEXT */ | |||||
| c->hyscale_fast = NULL; | c->hyscale_fast = NULL; | ||||
| c->hcscale_fast = NULL; | c->hcscale_fast = NULL; | ||||
| #if COMPILE_TEMPLATE_MMX2 | |||||
| #if COMPILE_TEMPLATE_MMXEXT | |||||
| } | } | ||||
| #endif /* COMPILE_TEMPLATE_MMX2 */ | |||||
| #endif /* COMPILE_TEMPLATE_MMXEXT */ | |||||
| } | } | ||||
| } | } | ||||
| @@ -52,20 +52,20 @@ DECLARE_ASM_CONST(8, uint64_t, pb_07) = 0x0707070707070707ULL; | |||||
| //MMX versions | //MMX versions | ||||
| #if HAVE_MMX | #if HAVE_MMX | ||||
| #undef RENAME | #undef RENAME | ||||
| #undef COMPILE_TEMPLATE_MMX2 | |||||
| #define COMPILE_TEMPLATE_MMX2 0 | |||||
| #undef COMPILE_TEMPLATE_MMXEXT | |||||
| #define COMPILE_TEMPLATE_MMXEXT 0 | |||||
| #define RENAME(a) a ## _MMX | #define RENAME(a) a ## _MMX | ||||
| #include "yuv2rgb_template.c" | #include "yuv2rgb_template.c" | ||||
| #endif /* HAVE_MMX */ | #endif /* HAVE_MMX */ | ||||
| //MMX2 versions | //MMX2 versions | ||||
| #if HAVE_MMX2 | |||||
| #if HAVE_MMXEXT | |||||
| #undef RENAME | #undef RENAME | ||||
| #undef COMPILE_TEMPLATE_MMX2 | |||||
| #define COMPILE_TEMPLATE_MMX2 1 | |||||
| #undef COMPILE_TEMPLATE_MMXEXT | |||||
| #define COMPILE_TEMPLATE_MMXEXT 1 | |||||
| #define RENAME(a) a ## _MMX2 | #define RENAME(a) a ## _MMX2 | ||||
| #include "yuv2rgb_template.c" | #include "yuv2rgb_template.c" | ||||
| #endif /* HAVE_MMX2 */ | |||||
| #endif /* HAVE_MMXEXT */ | |||||
| #endif /* HAVE_INLINE_ASM */ | #endif /* HAVE_INLINE_ASM */ | ||||
| @@ -74,8 +74,8 @@ av_cold SwsFunc ff_yuv2rgb_init_mmx(SwsContext *c) | |||||
| #if HAVE_INLINE_ASM | #if HAVE_INLINE_ASM | ||||
| int cpu_flags = av_get_cpu_flags(); | int cpu_flags = av_get_cpu_flags(); | ||||
| #if HAVE_MMX2 | |||||
| if (cpu_flags & AV_CPU_FLAG_MMX2) { | |||||
| #if HAVE_MMXEXT | |||||
| if (cpu_flags & AV_CPU_FLAG_MMXEXT) { | |||||
| switch (c->dstFormat) { | switch (c->dstFormat) { | ||||
| case PIX_FMT_RGB24: return yuv420_rgb24_MMX2; | case PIX_FMT_RGB24: return yuv420_rgb24_MMX2; | ||||
| case PIX_FMT_BGR24: return yuv420_bgr24_MMX2; | case PIX_FMT_BGR24: return yuv420_bgr24_MMX2; | ||||
| @@ -25,7 +25,7 @@ | |||||
| #undef EMMS | #undef EMMS | ||||
| #undef SFENCE | #undef SFENCE | ||||
| #if COMPILE_TEMPLATE_MMX2 | |||||
| #if COMPILE_TEMPLATE_MMXEXT | |||||
| #define MOVNTQ "movntq" | #define MOVNTQ "movntq" | ||||
| #define SFENCE "sfence" | #define SFENCE "sfence" | ||||
| #else | #else | ||||
| @@ -181,7 +181,7 @@ | |||||
| "paddusb "GREEN_DITHER"(%4), %%mm2\n\t" \ | "paddusb "GREEN_DITHER"(%4), %%mm2\n\t" \ | ||||
| "paddusb "RED_DITHER"(%4), %%mm1\n\t" \ | "paddusb "RED_DITHER"(%4), %%mm1\n\t" \ | ||||
| #if !COMPILE_TEMPLATE_MMX2 | |||||
| #if !COMPILE_TEMPLATE_MMXEXT | |||||
| static inline int RENAME(yuv420_rgb15)(SwsContext *c, const uint8_t *src[], | static inline int RENAME(yuv420_rgb15)(SwsContext *c, const uint8_t *src[], | ||||
| int srcStride[], | int srcStride[], | ||||
| int srcSliceY, int srcSliceH, | int srcSliceY, int srcSliceH, | ||||
| @@ -237,7 +237,7 @@ static inline int RENAME(yuv420_rgb16)(SwsContext *c, const uint8_t *src[], | |||||
| YUV2RGB_OPERANDS | YUV2RGB_OPERANDS | ||||
| YUV2RGB_ENDFUNC | YUV2RGB_ENDFUNC | ||||
| } | } | ||||
| #endif /* !COMPILE_TEMPLATE_MMX2 */ | |||||
| #endif /* !COMPILE_TEMPLATE_MMXEXT */ | |||||
| #define RGB_PACK24(blue, red)\ | #define RGB_PACK24(blue, red)\ | ||||
| "packuswb %%mm3, %%mm0 \n" /* R0 R2 R4 R6 R1 R3 R5 R7 */\ | "packuswb %%mm3, %%mm0 \n" /* R0 R2 R4 R6 R1 R3 R5 R7 */\ | ||||
| @@ -254,7 +254,7 @@ static inline int RENAME(yuv420_rgb16)(SwsContext *c, const uint8_t *src[], | |||||
| "punpckhwd %%mm6, %%mm5 \n" /* R4 G4 B4 R5 R6 G6 B6 R7 */\ | "punpckhwd %%mm6, %%mm5 \n" /* R4 G4 B4 R5 R6 G6 B6 R7 */\ | ||||
| RGB_PACK24_B | RGB_PACK24_B | ||||
| #if COMPILE_TEMPLATE_MMX2 | |||||
| #if COMPILE_TEMPLATE_MMXEXT | |||||
| DECLARE_ASM_CONST(8, int16_t, mask1101[4]) = {-1,-1, 0,-1}; | DECLARE_ASM_CONST(8, int16_t, mask1101[4]) = {-1,-1, 0,-1}; | ||||
| DECLARE_ASM_CONST(8, int16_t, mask0010[4]) = { 0, 0,-1, 0}; | DECLARE_ASM_CONST(8, int16_t, mask0010[4]) = { 0, 0,-1, 0}; | ||||
| DECLARE_ASM_CONST(8, int16_t, mask0110[4]) = { 0,-1,-1, 0}; | DECLARE_ASM_CONST(8, int16_t, mask0110[4]) = { 0,-1,-1, 0}; | ||||
| @@ -361,7 +361,7 @@ static inline int RENAME(yuv420_bgr24)(SwsContext *c, const uint8_t *src[], | |||||
| MOVNTQ " %%mm5, 16(%1)\n\t" \ | MOVNTQ " %%mm5, 16(%1)\n\t" \ | ||||
| MOVNTQ " %%mm"alpha", 24(%1)\n\t" \ | MOVNTQ " %%mm"alpha", 24(%1)\n\t" \ | ||||
| #if !COMPILE_TEMPLATE_MMX2 | |||||
| #if !COMPILE_TEMPLATE_MMXEXT | |||||
| static inline int RENAME(yuv420_rgb32)(SwsContext *c, const uint8_t *src[], | static inline int RENAME(yuv420_rgb32)(SwsContext *c, const uint8_t *src[], | ||||
| int srcStride[], | int srcStride[], | ||||
| int srcSliceY, int srcSliceH, | int srcSliceY, int srcSliceH, | ||||
| @@ -448,4 +448,4 @@ static inline int RENAME(yuva420_bgr32)(SwsContext *c, const uint8_t *src[], | |||||
| } | } | ||||
| #endif | #endif | ||||
| #endif /* !COMPILE_TEMPLATE_MMX2 */ | |||||
| #endif /* !COMPILE_TEMPLATE_MMXEXT */ | |||||
| @@ -1,3 +1,18 @@ | |||||
| FATE_LAGARITH += fate-lagarith-rgb24 | |||||
| fate-lagarith-rgb24: CMD = framecrc -i $(SAMPLES)/lagarith/lag-rgb24.avi | |||||
| FATE_LAGARITH += fate-lagarith-rgb32 | |||||
| fate-lagarith-rgb32: CMD = framecrc -i $(SAMPLES)/lagarith/lag-rgb32.avi -pix_fmt bgra | |||||
| FATE_LAGARITH += fate-lagarith-yuy2 | |||||
| fate-lagarith-yuy2: CMD = framecrc -i $(SAMPLES)/lagarith/lag-yuy2.avi | |||||
| FATE_LAGARITH += fate-lagarith-yv12 | |||||
| fate-lagarith-yv12: CMD = framecrc -i $(SAMPLES)/lagarith/lag-yv12.avi | |||||
| FATE_SAMPLES_AVCONV += $(FATE_LAGARITH) | |||||
| fate-lagarith: $(FATE_LAGARITH) | |||||
| FATE_LOCO += fate-loco-rgb | FATE_LOCO += fate-loco-rgb | ||||
| fate-loco-rgb: CMD = framecrc -i $(SAMPLES)/loco/pig-loco-rgb.avi | fate-loco-rgb: CMD = framecrc -i $(SAMPLES)/loco/pig-loco-rgb.avi | ||||
| @@ -0,0 +1,5 @@ | |||||
| #tb 0: 100/2997 | |||||
| 0, 0, 0, 1, 368640, 0x26f74db2 | |||||
| 0, 1, 1, 1, 368640, 0x63b29ea4 | |||||
| 0, 2, 2, 1, 368640, 0x19467f03 | |||||
| 0, 3, 3, 1, 368640, 0x5fdc3575 | |||||
| @@ -0,0 +1,26 @@ | |||||
| #tb 0: 1001/24000 | |||||
| 0, 0, 0, 1, 1382400, 0x00000000 | |||||
| 0, 1, 1, 1, 1382400, 0x00000000 | |||||
| 0, 2, 2, 1, 1382400, 0x00000000 | |||||
| 0, 3, 3, 1, 1382400, 0x00000000 | |||||
| 0, 4, 4, 1, 1382400, 0x00000000 | |||||
| 0, 5, 5, 1, 1382400, 0xf95bde46 | |||||
| 0, 6, 6, 1, 1382400, 0x4f4c0393 | |||||
| 0, 7, 7, 1, 1382400, 0xe5aa40db | |||||
| 0, 8, 8, 1, 1382400, 0xc25a8ba2 | |||||
| 0, 9, 9, 1, 1382400, 0x9db3150d | |||||
| 0, 10, 10, 1, 1382400, 0x730e64b3 | |||||
| 0, 11, 11, 1, 1382400, 0xf8fd7edf | |||||
| 0, 12, 12, 1, 1382400, 0x0114798a | |||||
| 0, 13, 13, 1, 1382400, 0x7571210f | |||||
| 0, 14, 14, 1, 1382400, 0x552ae59d | |||||
| 0, 15, 15, 1, 1382400, 0x7ae0c946 | |||||
| 0, 16, 16, 1, 1382400, 0x0818c3ef | |||||
| 0, 17, 17, 1, 1382400, 0x8257cac4 | |||||
| 0, 18, 18, 1, 1382400, 0x7762a979 | |||||
| 0, 19, 19, 1, 1382400, 0x282af57a | |||||
| 0, 20, 20, 1, 1382400, 0x3f42de50 | |||||
| 0, 21, 21, 1, 1382400, 0xc42d5f93 | |||||
| 0, 22, 22, 1, 1382400, 0x18775c90 | |||||
| 0, 23, 23, 1, 1382400, 0x34befa90 | |||||
| 0, 24, 24, 1, 1382400, 0xd33d5f53 | |||||
| @@ -0,0 +1,2 @@ | |||||
| #tb 0: 1/10 | |||||
| 0, 0, 0, 1, 1572864, 0xeed76a7d | |||||
| @@ -0,0 +1,3 @@ | |||||
| #tb 0: 1/60 | |||||
| 0, 0, 0, 1, 92160, 0x1dfdf5c1 | |||||
| 0, 1, 1, 1, 92160, 0x6965884f | |||||