Originally committed as revision 19864 to svn://svn.ffmpeg.org/ffmpeg/trunktags/v0.6
| @@ -465,6 +465,7 @@ OBJS-$(HAVE_MMX) += x86/cpuid.o \ | |||||
| x86/dnxhd_mmx.o \ | x86/dnxhd_mmx.o \ | ||||
| x86/dsputil_mmx.o \ | x86/dsputil_mmx.o \ | ||||
| x86/fdct_mmx.o \ | x86/fdct_mmx.o \ | ||||
| x86/fft.o \ | |||||
| x86/idct_mmx_xvid.o \ | x86/idct_mmx_xvid.o \ | ||||
| x86/idct_sse2_xvid.o \ | x86/idct_sse2_xvid.o \ | ||||
| x86/motion_est_mmx.o \ | x86/motion_est_mmx.o \ | ||||
| @@ -480,6 +481,7 @@ OBJS-$(ARCH_ALPHA) += alpha/dsputil_alpha.o \ | |||||
| OBJS-$(ARCH_ARM) += arm/dsputil_arm.o \ | OBJS-$(ARCH_ARM) += arm/dsputil_arm.o \ | ||||
| arm/dsputil_arm_s.o \ | arm/dsputil_arm_s.o \ | ||||
| arm/fft_init_arm.o \ | |||||
| arm/jrevdct_arm.o \ | arm/jrevdct_arm.o \ | ||||
| arm/mpegvideo_arm.o \ | arm/mpegvideo_arm.o \ | ||||
| arm/simple_idct_arm.o \ | arm/simple_idct_arm.o \ | ||||
| @@ -0,0 +1,39 @@ | |||||
| /* | |||||
| * Copyright (c) 2009 Mans Rullgard <mans@mansr.com> | |||||
| * | |||||
| * This file is part of FFmpeg. | |||||
| * | |||||
| * FFmpeg is free software; you can redistribute it and/or | |||||
| * modify it under the terms of the GNU Lesser General Public | |||||
| * License as published by the Free Software Foundation; either | |||||
| * version 2.1 of the License, or (at your option) any later version. | |||||
| * | |||||
| * FFmpeg is distributed in the hope that it will be useful, | |||||
| * but WITHOUT ANY WARRANTY; without even the implied warranty of | |||||
| * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU | |||||
| * Lesser General Public License for more details. | |||||
| * | |||||
| * You should have received a copy of the GNU Lesser General Public | |||||
| * License along with FFmpeg; if not, write to the Free Software | |||||
| * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA | |||||
| */ | |||||
| #include "libavcodec/dsputil.h" | |||||
| void ff_fft_permute_neon(FFTContext *s, FFTComplex *z); | |||||
| void ff_fft_calc_neon(FFTContext *s, FFTComplex *z); | |||||
| void ff_imdct_calc_neon(MDCTContext *s, FFTSample *output, const FFTSample *input); | |||||
| void ff_imdct_half_neon(MDCTContext *s, FFTSample *output, const FFTSample *input); | |||||
| void ff_mdct_calc_neon(MDCTContext *s, FFTSample *output, const FFTSample *input); | |||||
| av_cold void ff_fft_init_arm(FFTContext *s) | |||||
| { | |||||
| if (HAVE_NEON) { | |||||
| s->fft_permute = ff_fft_permute_neon; | |||||
| s->fft_calc = ff_fft_calc_neon; | |||||
| s->imdct_calc = ff_imdct_calc_neon; | |||||
| s->imdct_half = ff_imdct_half_neon; | |||||
| s->mdct_calc = ff_mdct_calc_neon; | |||||
| } | |||||
| } | |||||
| @@ -683,6 +683,7 @@ typedef struct FFTContext { | |||||
| void (*imdct_calc)(struct MDCTContext *s, FFTSample *output, const FFTSample *input); | void (*imdct_calc)(struct MDCTContext *s, FFTSample *output, const FFTSample *input); | ||||
| void (*imdct_half)(struct MDCTContext *s, FFTSample *output, const FFTSample *input); | void (*imdct_half)(struct MDCTContext *s, FFTSample *output, const FFTSample *input); | ||||
| void (*mdct_calc)(struct MDCTContext *s, FFTSample *output, const FFTSample *input); | void (*mdct_calc)(struct MDCTContext *s, FFTSample *output, const FFTSample *input); | ||||
| int split_radix; | |||||
| } FFTContext; | } FFTContext; | ||||
| extern FFTSample* const ff_cos_tabs[13]; | extern FFTSample* const ff_cos_tabs[13]; | ||||
| @@ -694,14 +695,11 @@ extern FFTSample* const ff_cos_tabs[13]; | |||||
| */ | */ | ||||
| int ff_fft_init(FFTContext *s, int nbits, int inverse); | int ff_fft_init(FFTContext *s, int nbits, int inverse); | ||||
| void ff_fft_permute_c(FFTContext *s, FFTComplex *z); | void ff_fft_permute_c(FFTContext *s, FFTComplex *z); | ||||
| void ff_fft_permute_sse(FFTContext *s, FFTComplex *z); | |||||
| void ff_fft_permute_neon(FFTContext *s, FFTComplex *z); | |||||
| void ff_fft_calc_c(FFTContext *s, FFTComplex *z); | void ff_fft_calc_c(FFTContext *s, FFTComplex *z); | ||||
| void ff_fft_calc_sse(FFTContext *s, FFTComplex *z); | |||||
| void ff_fft_calc_3dn(FFTContext *s, FFTComplex *z); | |||||
| void ff_fft_calc_3dn2(FFTContext *s, FFTComplex *z); | |||||
| void ff_fft_calc_altivec(FFTContext *s, FFTComplex *z); | |||||
| void ff_fft_calc_neon(FFTContext *s, FFTComplex *z); | |||||
| void ff_fft_init_altivec(FFTContext *s); | |||||
| void ff_fft_init_mmx(FFTContext *s); | |||||
| void ff_fft_init_neon(FFTContext *s); | |||||
| /** | /** | ||||
| * Do the permutation needed BEFORE calling ff_fft_calc(). | * Do the permutation needed BEFORE calling ff_fft_calc(). | ||||
| @@ -774,15 +772,6 @@ int ff_mdct_init(MDCTContext *s, int nbits, int inverse, double scale); | |||||
| void ff_imdct_calc_c(MDCTContext *s, FFTSample *output, const FFTSample *input); | void ff_imdct_calc_c(MDCTContext *s, FFTSample *output, const FFTSample *input); | ||||
| void ff_imdct_half_c(MDCTContext *s, FFTSample *output, const FFTSample *input); | void ff_imdct_half_c(MDCTContext *s, FFTSample *output, const FFTSample *input); | ||||
| void ff_mdct_calc_c(MDCTContext *s, FFTSample *output, const FFTSample *input); | void ff_mdct_calc_c(MDCTContext *s, FFTSample *output, const FFTSample *input); | ||||
| void ff_imdct_calc_3dn(MDCTContext *s, FFTSample *output, const FFTSample *input); | |||||
| void ff_imdct_half_3dn(MDCTContext *s, FFTSample *output, const FFTSample *input); | |||||
| void ff_imdct_calc_3dn2(MDCTContext *s, FFTSample *output, const FFTSample *input); | |||||
| void ff_imdct_half_3dn2(MDCTContext *s, FFTSample *output, const FFTSample *input); | |||||
| void ff_imdct_calc_sse(MDCTContext *s, FFTSample *output, const FFTSample *input); | |||||
| void ff_imdct_half_sse(MDCTContext *s, FFTSample *output, const FFTSample *input); | |||||
| void ff_imdct_calc_neon(MDCTContext *s, FFTSample *output, const FFTSample *input); | |||||
| void ff_imdct_half_neon(MDCTContext *s, FFTSample *output, const FFTSample *input); | |||||
| void ff_mdct_calc_neon(MDCTContext *s, FFTSample *output, const FFTSample *input); | |||||
| void ff_mdct_end(MDCTContext *s); | void ff_mdct_end(MDCTContext *s); | ||||
| /* Real Discrete Fourier Transform */ | /* Real Discrete Fourier Transform */ | ||||
| @@ -62,7 +62,6 @@ av_cold int ff_fft_init(FFTContext *s, int nbits, int inverse) | |||||
| { | { | ||||
| int i, j, m, n; | int i, j, m, n; | ||||
| float alpha, c1, s1, s2; | float alpha, c1, s1, s2; | ||||
| int split_radix = 1; | |||||
| int av_unused has_vectors; | int av_unused has_vectors; | ||||
| if (nbits < 2 || nbits > 16) | if (nbits < 2 || nbits > 16) | ||||
| @@ -87,41 +86,13 @@ av_cold int ff_fft_init(FFTContext *s, int nbits, int inverse) | |||||
| s->imdct_half = ff_imdct_half_c; | s->imdct_half = ff_imdct_half_c; | ||||
| s->mdct_calc = ff_mdct_calc_c; | s->mdct_calc = ff_mdct_calc_c; | ||||
| s->exptab1 = NULL; | s->exptab1 = NULL; | ||||
| s->split_radix = 1; | |||||
| #if HAVE_MMX && HAVE_YASM | |||||
| has_vectors = mm_support(); | |||||
| if (has_vectors & FF_MM_SSE && HAVE_SSE) { | |||||
| /* SSE for P3/P4/K8 */ | |||||
| s->imdct_calc = ff_imdct_calc_sse; | |||||
| s->imdct_half = ff_imdct_half_sse; | |||||
| s->fft_permute = ff_fft_permute_sse; | |||||
| s->fft_calc = ff_fft_calc_sse; | |||||
| } else if (has_vectors & FF_MM_3DNOWEXT && HAVE_AMD3DNOWEXT) { | |||||
| /* 3DNowEx for K7 */ | |||||
| s->imdct_calc = ff_imdct_calc_3dn2; | |||||
| s->imdct_half = ff_imdct_half_3dn2; | |||||
| s->fft_calc = ff_fft_calc_3dn2; | |||||
| } else if (has_vectors & FF_MM_3DNOW && HAVE_AMD3DNOW) { | |||||
| /* 3DNow! for K6-2/3 */ | |||||
| s->imdct_calc = ff_imdct_calc_3dn; | |||||
| s->imdct_half = ff_imdct_half_3dn; | |||||
| s->fft_calc = ff_fft_calc_3dn; | |||||
| } | |||||
| #elif HAVE_ALTIVEC | |||||
| has_vectors = mm_support(); | |||||
| if (has_vectors & FF_MM_ALTIVEC) { | |||||
| s->fft_calc = ff_fft_calc_altivec; | |||||
| split_radix = 0; | |||||
| } | |||||
| #elif HAVE_NEON | |||||
| s->fft_permute = ff_fft_permute_neon; | |||||
| s->fft_calc = ff_fft_calc_neon; | |||||
| s->imdct_calc = ff_imdct_calc_neon; | |||||
| s->imdct_half = ff_imdct_half_neon; | |||||
| s->mdct_calc = ff_mdct_calc_neon; | |||||
| #endif | |||||
| if (ARCH_ARM) ff_fft_init_arm(s); | |||||
| if (HAVE_ALTIVEC) ff_fft_init_altivec(s); | |||||
| if (HAVE_MMX) ff_fft_init_mmx(s); | |||||
| if (split_radix) { | |||||
| if (s->split_radix) { | |||||
| for(j=4; j<=nbits; j++) { | for(j=4; j<=nbits; j++) { | ||||
| int m = 1<<j; | int m = 1<<j; | ||||
| double freq = 2*M_PI/m; | double freq = 2*M_PI/m; | ||||
| @@ -133,3 +133,9 @@ POWERPC_PERF_START_COUNT(altivec_fft_num, s->nbits >= 6); | |||||
| POWERPC_PERF_STOP_COUNT(altivec_fft_num, s->nbits >= 6); | POWERPC_PERF_STOP_COUNT(altivec_fft_num, s->nbits >= 6); | ||||
| } | } | ||||
| av_cold void ff_fft_init_altivec(FFTContext *s) | |||||
| { | |||||
| s->fft_calc = ff_fft_calc_altivec; | |||||
| s->split_radix = 0; | |||||
| } | |||||
| @@ -0,0 +1,44 @@ | |||||
| /* | |||||
| * This file is part of FFmpeg. | |||||
| * | |||||
| * FFmpeg is free software; you can redistribute it and/or | |||||
| * modify it under the terms of the GNU Lesser General Public | |||||
| * License as published by the Free Software Foundation; either | |||||
| * version 2.1 of the License, or (at your option) any later version. | |||||
| * | |||||
| * FFmpeg is distributed in the hope that it will be useful, | |||||
| * but WITHOUT ANY WARRANTY; without even the implied warranty of | |||||
| * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU | |||||
| * Lesser General Public License for more details. | |||||
| * | |||||
| * You should have received a copy of the GNU Lesser General Public | |||||
| * License along with FFmpeg; if not, write to the Free Software | |||||
| * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA | |||||
| */ | |||||
| #include "libavcodec/dsputil.h" | |||||
| #include "fft.h" | |||||
| av_cold void ff_fft_init_mmx(FFTContext *s) | |||||
| { | |||||
| #if HAVE_YASM | |||||
| int has_vectors = mm_support(); | |||||
| if (has_vectors & FF_MM_SSE && HAVE_SSE) { | |||||
| /* SSE for P3/P4/K8 */ | |||||
| s->imdct_calc = ff_imdct_calc_sse; | |||||
| s->imdct_half = ff_imdct_half_sse; | |||||
| s->fft_permute = ff_fft_permute_sse; | |||||
| s->fft_calc = ff_fft_calc_sse; | |||||
| } else if (has_vectors & FF_MM_3DNOWEXT && HAVE_AMD3DNOWEXT) { | |||||
| /* 3DNowEx for K7 */ | |||||
| s->imdct_calc = ff_imdct_calc_3dn2; | |||||
| s->imdct_half = ff_imdct_half_3dn2; | |||||
| s->fft_calc = ff_fft_calc_3dn2; | |||||
| } else if (has_vectors & FF_MM_3DNOW && HAVE_AMD3DNOW) { | |||||
| /* 3DNow! for K6-2/3 */ | |||||
| s->imdct_calc = ff_imdct_calc_3dn; | |||||
| s->imdct_half = ff_imdct_half_3dn; | |||||
| s->fft_calc = ff_fft_calc_3dn; | |||||
| } | |||||
| #endif | |||||
| } | |||||
| @@ -0,0 +1,36 @@ | |||||
| /* | |||||
| * This file is part of FFmpeg. | |||||
| * | |||||
| * FFmpeg is free software; you can redistribute it and/or | |||||
| * modify it under the terms of the GNU Lesser General Public | |||||
| * License as published by the Free Software Foundation; either | |||||
| * version 2.1 of the License, or (at your option) any later version. | |||||
| * | |||||
| * FFmpeg is distributed in the hope that it will be useful, | |||||
| * but WITHOUT ANY WARRANTY; without even the implied warranty of | |||||
| * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU | |||||
| * Lesser General Public License for more details. | |||||
| * | |||||
| * You should have received a copy of the GNU Lesser General Public | |||||
| * License along with FFmpeg; if not, write to the Free Software | |||||
| * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA | |||||
| */ | |||||
| #ifndef AVCODEC_X86_FFT_H | |||||
| #define AVCODEC_X86_FFT_H | |||||
| #include "libavcodec/dsputil.h" | |||||
| void ff_fft_permute_sse(FFTContext *s, FFTComplex *z); | |||||
| void ff_fft_calc_sse(FFTContext *s, FFTComplex *z); | |||||
| void ff_fft_calc_3dn(FFTContext *s, FFTComplex *z); | |||||
| void ff_fft_calc_3dn2(FFTContext *s, FFTComplex *z); | |||||
| void ff_imdct_calc_3dn(MDCTContext *s, FFTSample *output, const FFTSample *input); | |||||
| void ff_imdct_half_3dn(MDCTContext *s, FFTSample *output, const FFTSample *input); | |||||
| void ff_imdct_calc_3dn2(MDCTContext *s, FFTSample *output, const FFTSample *input); | |||||
| void ff_imdct_half_3dn2(MDCTContext *s, FFTSample *output, const FFTSample *input); | |||||
| void ff_imdct_calc_sse(MDCTContext *s, FFTSample *output, const FFTSample *input); | |||||
| void ff_imdct_half_sse(MDCTContext *s, FFTSample *output, const FFTSample *input); | |||||
| #endif | |||||
| @@ -21,6 +21,7 @@ | |||||
| #include "libavutil/x86_cpu.h" | #include "libavutil/x86_cpu.h" | ||||
| #include "libavcodec/dsputil.h" | #include "libavcodec/dsputil.h" | ||||
| #include "fft.h" | |||||
| DECLARE_ALIGNED_8(static const int, m1m1[2]) = { 1<<31, 1<<31 }; | DECLARE_ALIGNED_8(static const int, m1m1[2]) = { 1<<31, 1<<31 }; | ||||
| @@ -21,6 +21,7 @@ | |||||
| #include "libavutil/x86_cpu.h" | #include "libavutil/x86_cpu.h" | ||||
| #include "libavcodec/dsputil.h" | #include "libavcodec/dsputil.h" | ||||
| #include "fft.h" | |||||
| DECLARE_ALIGNED(16, static const int, m1m1m1m1[4]) = | DECLARE_ALIGNED(16, static const int, m1m1m1m1[4]) = | ||||
| { 1 << 31, 1 << 31, 1 << 31, 1 << 31 }; | { 1 << 31, 1 << 31, 1 << 31, 1 << 31 }; | ||||