Originally committed as revision 19864 to svn://svn.ffmpeg.org/ffmpeg/trunktags/v0.6
| @@ -465,6 +465,7 @@ OBJS-$(HAVE_MMX) += x86/cpuid.o \ | |||
| x86/dnxhd_mmx.o \ | |||
| x86/dsputil_mmx.o \ | |||
| x86/fdct_mmx.o \ | |||
| x86/fft.o \ | |||
| x86/idct_mmx_xvid.o \ | |||
| x86/idct_sse2_xvid.o \ | |||
| x86/motion_est_mmx.o \ | |||
| @@ -480,6 +481,7 @@ OBJS-$(ARCH_ALPHA) += alpha/dsputil_alpha.o \ | |||
| OBJS-$(ARCH_ARM) += arm/dsputil_arm.o \ | |||
| arm/dsputil_arm_s.o \ | |||
| arm/fft_init_arm.o \ | |||
| arm/jrevdct_arm.o \ | |||
| arm/mpegvideo_arm.o \ | |||
| arm/simple_idct_arm.o \ | |||
| @@ -0,0 +1,39 @@ | |||
| /* | |||
| * Copyright (c) 2009 Mans Rullgard <mans@mansr.com> | |||
| * | |||
| * This file is part of FFmpeg. | |||
| * | |||
| * FFmpeg is free software; you can redistribute it and/or | |||
| * modify it under the terms of the GNU Lesser General Public | |||
| * License as published by the Free Software Foundation; either | |||
| * version 2.1 of the License, or (at your option) any later version. | |||
| * | |||
| * FFmpeg is distributed in the hope that it will be useful, | |||
| * but WITHOUT ANY WARRANTY; without even the implied warranty of | |||
| * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU | |||
| * Lesser General Public License for more details. | |||
| * | |||
| * You should have received a copy of the GNU Lesser General Public | |||
| * License along with FFmpeg; if not, write to the Free Software | |||
| * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA | |||
| */ | |||
| #include "libavcodec/dsputil.h" | |||
| void ff_fft_permute_neon(FFTContext *s, FFTComplex *z); | |||
| void ff_fft_calc_neon(FFTContext *s, FFTComplex *z); | |||
| void ff_imdct_calc_neon(MDCTContext *s, FFTSample *output, const FFTSample *input); | |||
| void ff_imdct_half_neon(MDCTContext *s, FFTSample *output, const FFTSample *input); | |||
| void ff_mdct_calc_neon(MDCTContext *s, FFTSample *output, const FFTSample *input); | |||
| av_cold void ff_fft_init_arm(FFTContext *s) | |||
| { | |||
| if (HAVE_NEON) { | |||
| s->fft_permute = ff_fft_permute_neon; | |||
| s->fft_calc = ff_fft_calc_neon; | |||
| s->imdct_calc = ff_imdct_calc_neon; | |||
| s->imdct_half = ff_imdct_half_neon; | |||
| s->mdct_calc = ff_mdct_calc_neon; | |||
| } | |||
| } | |||
| @@ -683,6 +683,7 @@ typedef struct FFTContext { | |||
| void (*imdct_calc)(struct MDCTContext *s, FFTSample *output, const FFTSample *input); | |||
| void (*imdct_half)(struct MDCTContext *s, FFTSample *output, const FFTSample *input); | |||
| void (*mdct_calc)(struct MDCTContext *s, FFTSample *output, const FFTSample *input); | |||
| int split_radix; | |||
| } FFTContext; | |||
| extern FFTSample* const ff_cos_tabs[13]; | |||
| @@ -694,14 +695,11 @@ extern FFTSample* const ff_cos_tabs[13]; | |||
| */ | |||
| int ff_fft_init(FFTContext *s, int nbits, int inverse); | |||
| void ff_fft_permute_c(FFTContext *s, FFTComplex *z); | |||
| void ff_fft_permute_sse(FFTContext *s, FFTComplex *z); | |||
| void ff_fft_permute_neon(FFTContext *s, FFTComplex *z); | |||
| void ff_fft_calc_c(FFTContext *s, FFTComplex *z); | |||
| void ff_fft_calc_sse(FFTContext *s, FFTComplex *z); | |||
| void ff_fft_calc_3dn(FFTContext *s, FFTComplex *z); | |||
| void ff_fft_calc_3dn2(FFTContext *s, FFTComplex *z); | |||
| void ff_fft_calc_altivec(FFTContext *s, FFTComplex *z); | |||
| void ff_fft_calc_neon(FFTContext *s, FFTComplex *z); | |||
| void ff_fft_init_altivec(FFTContext *s); | |||
| void ff_fft_init_mmx(FFTContext *s); | |||
| void ff_fft_init_neon(FFTContext *s); | |||
| /** | |||
| * Do the permutation needed BEFORE calling ff_fft_calc(). | |||
| @@ -774,15 +772,6 @@ int ff_mdct_init(MDCTContext *s, int nbits, int inverse, double scale); | |||
| void ff_imdct_calc_c(MDCTContext *s, FFTSample *output, const FFTSample *input); | |||
| void ff_imdct_half_c(MDCTContext *s, FFTSample *output, const FFTSample *input); | |||
| void ff_mdct_calc_c(MDCTContext *s, FFTSample *output, const FFTSample *input); | |||
| void ff_imdct_calc_3dn(MDCTContext *s, FFTSample *output, const FFTSample *input); | |||
| void ff_imdct_half_3dn(MDCTContext *s, FFTSample *output, const FFTSample *input); | |||
| void ff_imdct_calc_3dn2(MDCTContext *s, FFTSample *output, const FFTSample *input); | |||
| void ff_imdct_half_3dn2(MDCTContext *s, FFTSample *output, const FFTSample *input); | |||
| void ff_imdct_calc_sse(MDCTContext *s, FFTSample *output, const FFTSample *input); | |||
| void ff_imdct_half_sse(MDCTContext *s, FFTSample *output, const FFTSample *input); | |||
| void ff_imdct_calc_neon(MDCTContext *s, FFTSample *output, const FFTSample *input); | |||
| void ff_imdct_half_neon(MDCTContext *s, FFTSample *output, const FFTSample *input); | |||
| void ff_mdct_calc_neon(MDCTContext *s, FFTSample *output, const FFTSample *input); | |||
| void ff_mdct_end(MDCTContext *s); | |||
| /* Real Discrete Fourier Transform */ | |||
| @@ -62,7 +62,6 @@ av_cold int ff_fft_init(FFTContext *s, int nbits, int inverse) | |||
| { | |||
| int i, j, m, n; | |||
| float alpha, c1, s1, s2; | |||
| int split_radix = 1; | |||
| int av_unused has_vectors; | |||
| if (nbits < 2 || nbits > 16) | |||
| @@ -87,41 +86,13 @@ av_cold int ff_fft_init(FFTContext *s, int nbits, int inverse) | |||
| s->imdct_half = ff_imdct_half_c; | |||
| s->mdct_calc = ff_mdct_calc_c; | |||
| s->exptab1 = NULL; | |||
| s->split_radix = 1; | |||
| #if HAVE_MMX && HAVE_YASM | |||
| has_vectors = mm_support(); | |||
| if (has_vectors & FF_MM_SSE && HAVE_SSE) { | |||
| /* SSE for P3/P4/K8 */ | |||
| s->imdct_calc = ff_imdct_calc_sse; | |||
| s->imdct_half = ff_imdct_half_sse; | |||
| s->fft_permute = ff_fft_permute_sse; | |||
| s->fft_calc = ff_fft_calc_sse; | |||
| } else if (has_vectors & FF_MM_3DNOWEXT && HAVE_AMD3DNOWEXT) { | |||
| /* 3DNowEx for K7 */ | |||
| s->imdct_calc = ff_imdct_calc_3dn2; | |||
| s->imdct_half = ff_imdct_half_3dn2; | |||
| s->fft_calc = ff_fft_calc_3dn2; | |||
| } else if (has_vectors & FF_MM_3DNOW && HAVE_AMD3DNOW) { | |||
| /* 3DNow! for K6-2/3 */ | |||
| s->imdct_calc = ff_imdct_calc_3dn; | |||
| s->imdct_half = ff_imdct_half_3dn; | |||
| s->fft_calc = ff_fft_calc_3dn; | |||
| } | |||
| #elif HAVE_ALTIVEC | |||
| has_vectors = mm_support(); | |||
| if (has_vectors & FF_MM_ALTIVEC) { | |||
| s->fft_calc = ff_fft_calc_altivec; | |||
| split_radix = 0; | |||
| } | |||
| #elif HAVE_NEON | |||
| s->fft_permute = ff_fft_permute_neon; | |||
| s->fft_calc = ff_fft_calc_neon; | |||
| s->imdct_calc = ff_imdct_calc_neon; | |||
| s->imdct_half = ff_imdct_half_neon; | |||
| s->mdct_calc = ff_mdct_calc_neon; | |||
| #endif | |||
| if (ARCH_ARM) ff_fft_init_arm(s); | |||
| if (HAVE_ALTIVEC) ff_fft_init_altivec(s); | |||
| if (HAVE_MMX) ff_fft_init_mmx(s); | |||
| if (split_radix) { | |||
| if (s->split_radix) { | |||
| for(j=4; j<=nbits; j++) { | |||
| int m = 1<<j; | |||
| double freq = 2*M_PI/m; | |||
| @@ -133,3 +133,9 @@ POWERPC_PERF_START_COUNT(altivec_fft_num, s->nbits >= 6); | |||
| POWERPC_PERF_STOP_COUNT(altivec_fft_num, s->nbits >= 6); | |||
| } | |||
| av_cold void ff_fft_init_altivec(FFTContext *s) | |||
| { | |||
| s->fft_calc = ff_fft_calc_altivec; | |||
| s->split_radix = 0; | |||
| } | |||
| @@ -0,0 +1,44 @@ | |||
| /* | |||
| * This file is part of FFmpeg. | |||
| * | |||
| * FFmpeg is free software; you can redistribute it and/or | |||
| * modify it under the terms of the GNU Lesser General Public | |||
| * License as published by the Free Software Foundation; either | |||
| * version 2.1 of the License, or (at your option) any later version. | |||
| * | |||
| * FFmpeg is distributed in the hope that it will be useful, | |||
| * but WITHOUT ANY WARRANTY; without even the implied warranty of | |||
| * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU | |||
| * Lesser General Public License for more details. | |||
| * | |||
| * You should have received a copy of the GNU Lesser General Public | |||
| * License along with FFmpeg; if not, write to the Free Software | |||
| * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA | |||
| */ | |||
| #include "libavcodec/dsputil.h" | |||
| #include "fft.h" | |||
| av_cold void ff_fft_init_mmx(FFTContext *s) | |||
| { | |||
| #if HAVE_YASM | |||
| int has_vectors = mm_support(); | |||
| if (has_vectors & FF_MM_SSE && HAVE_SSE) { | |||
| /* SSE for P3/P4/K8 */ | |||
| s->imdct_calc = ff_imdct_calc_sse; | |||
| s->imdct_half = ff_imdct_half_sse; | |||
| s->fft_permute = ff_fft_permute_sse; | |||
| s->fft_calc = ff_fft_calc_sse; | |||
| } else if (has_vectors & FF_MM_3DNOWEXT && HAVE_AMD3DNOWEXT) { | |||
| /* 3DNowEx for K7 */ | |||
| s->imdct_calc = ff_imdct_calc_3dn2; | |||
| s->imdct_half = ff_imdct_half_3dn2; | |||
| s->fft_calc = ff_fft_calc_3dn2; | |||
| } else if (has_vectors & FF_MM_3DNOW && HAVE_AMD3DNOW) { | |||
| /* 3DNow! for K6-2/3 */ | |||
| s->imdct_calc = ff_imdct_calc_3dn; | |||
| s->imdct_half = ff_imdct_half_3dn; | |||
| s->fft_calc = ff_fft_calc_3dn; | |||
| } | |||
| #endif | |||
| } | |||
| @@ -0,0 +1,36 @@ | |||
| /* | |||
| * This file is part of FFmpeg. | |||
| * | |||
| * FFmpeg is free software; you can redistribute it and/or | |||
| * modify it under the terms of the GNU Lesser General Public | |||
| * License as published by the Free Software Foundation; either | |||
| * version 2.1 of the License, or (at your option) any later version. | |||
| * | |||
| * FFmpeg is distributed in the hope that it will be useful, | |||
| * but WITHOUT ANY WARRANTY; without even the implied warranty of | |||
| * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU | |||
| * Lesser General Public License for more details. | |||
| * | |||
| * You should have received a copy of the GNU Lesser General Public | |||
| * License along with FFmpeg; if not, write to the Free Software | |||
| * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA | |||
| */ | |||
| #ifndef AVCODEC_X86_FFT_H | |||
| #define AVCODEC_X86_FFT_H | |||
| #include "libavcodec/dsputil.h" | |||
| void ff_fft_permute_sse(FFTContext *s, FFTComplex *z); | |||
| void ff_fft_calc_sse(FFTContext *s, FFTComplex *z); | |||
| void ff_fft_calc_3dn(FFTContext *s, FFTComplex *z); | |||
| void ff_fft_calc_3dn2(FFTContext *s, FFTComplex *z); | |||
| void ff_imdct_calc_3dn(MDCTContext *s, FFTSample *output, const FFTSample *input); | |||
| void ff_imdct_half_3dn(MDCTContext *s, FFTSample *output, const FFTSample *input); | |||
| void ff_imdct_calc_3dn2(MDCTContext *s, FFTSample *output, const FFTSample *input); | |||
| void ff_imdct_half_3dn2(MDCTContext *s, FFTSample *output, const FFTSample *input); | |||
| void ff_imdct_calc_sse(MDCTContext *s, FFTSample *output, const FFTSample *input); | |||
| void ff_imdct_half_sse(MDCTContext *s, FFTSample *output, const FFTSample *input); | |||
| #endif | |||
| @@ -21,6 +21,7 @@ | |||
| #include "libavutil/x86_cpu.h" | |||
| #include "libavcodec/dsputil.h" | |||
| #include "fft.h" | |||
| DECLARE_ALIGNED_8(static const int, m1m1[2]) = { 1<<31, 1<<31 }; | |||
| @@ -21,6 +21,7 @@ | |||
| #include "libavutil/x86_cpu.h" | |||
| #include "libavcodec/dsputil.h" | |||
| #include "fft.h" | |||
| DECLARE_ALIGNED(16, static const int, m1m1m1m1[4]) = | |||
| { 1 << 31, 1 << 31, 1 << 31, 1 << 31 }; | |||