|
@@ -29,6 +29,7 @@ |
|
|
; i.e. {4x real, 4x imaginary, 4x real, ...} (or 2x respectively) |
|
|
; i.e. {4x real, 4x imaginary, 4x real, ...} (or 2x respectively) |
|
|
|
|
|
|
|
|
%include "x86inc.asm" |
|
|
%include "x86inc.asm" |
|
|
|
|
|
%include "x86util.asm" |
|
|
|
|
|
|
|
|
%if ARCH_X86_64 |
|
|
%if ARCH_X86_64 |
|
|
%define pointer resq |
|
|
%define pointer resq |
|
@@ -105,7 +106,7 @@ SECTION_TEXT |
|
|
pfadd %5, %4 ; {t6,t5} |
|
|
pfadd %5, %4 ; {t6,t5} |
|
|
pxor %3, [ps_m1p1] ; {t8,t7} |
|
|
pxor %3, [ps_m1p1] ; {t8,t7} |
|
|
mova %6, %1 |
|
|
mova %6, %1 |
|
|
pswapd %3, %3 |
|
|
|
|
|
|
|
|
PSWAPD %3, %3 |
|
|
pfadd %1, %5 ; {r0,i0} |
|
|
pfadd %1, %5 ; {r0,i0} |
|
|
pfsub %6, %5 ; {r2,i2} |
|
|
pfsub %6, %5 ; {r2,i2} |
|
|
mova %4, %2 |
|
|
mova %4, %2 |
|
@@ -396,7 +397,6 @@ fft32_interleave_avx: |
|
|
%endif |
|
|
%endif |
|
|
|
|
|
|
|
|
INIT_XMM sse |
|
|
INIT_XMM sse |
|
|
%define movdqa movaps |
|
|
|
|
|
|
|
|
|
|
|
align 16 |
|
|
align 16 |
|
|
fft4_avx: |
|
|
fft4_avx: |
|
@@ -469,8 +469,8 @@ fft8 %+ SUFFIX: |
|
|
mova Z(2), m2 |
|
|
mova Z(2), m2 |
|
|
T2_3DN m4, m5, Z(4), Z(5) |
|
|
T2_3DN m4, m5, Z(4), Z(5) |
|
|
T2_3DN m6, m7, Z2(6), Z2(7) |
|
|
T2_3DN m6, m7, Z2(6), Z2(7) |
|
|
pswapd m0, m5 |
|
|
|
|
|
pswapd m2, m7 |
|
|
|
|
|
|
|
|
PSWAPD m0, m5 |
|
|
|
|
|
PSWAPD m2, m7 |
|
|
pxor m0, [ps_m1p1] |
|
|
pxor m0, [ps_m1p1] |
|
|
pxor m2, [ps_m1p1] |
|
|
pxor m2, [ps_m1p1] |
|
|
pfsub m5, m0 |
|
|
pfsub m5, m0 |
|
@@ -498,11 +498,11 @@ fft8 %+ SUFFIX: |
|
|
ret |
|
|
ret |
|
|
%endmacro |
|
|
%endmacro |
|
|
|
|
|
|
|
|
INIT_MMX 3dnow2 |
|
|
|
|
|
FFT48_3DN |
|
|
|
|
|
|
|
|
|
|
|
%macro pswapd 2 |
|
|
|
|
|
%ifidn %1, %2 |
|
|
|
|
|
|
|
|
%if ARCH_X86_32 |
|
|
|
|
|
%macro PSWAPD 2 |
|
|
|
|
|
%if cpuflag(3dnow2) |
|
|
|
|
|
pswapd %1, %2 |
|
|
|
|
|
%elifidn %1, %2 |
|
|
movd [r0+12], %1 |
|
|
movd [r0+12], %1 |
|
|
punpckhdq %1, [r0+8] |
|
|
punpckhdq %1, [r0+8] |
|
|
%else |
|
|
%else |
|
@@ -512,9 +512,12 @@ FFT48_3DN |
|
|
%endif |
|
|
%endif |
|
|
%endmacro |
|
|
%endmacro |
|
|
|
|
|
|
|
|
INIT_MMX 3dnow |
|
|
|
|
|
|
|
|
INIT_MMX 3dnow2 |
|
|
FFT48_3DN |
|
|
FFT48_3DN |
|
|
|
|
|
|
|
|
|
|
|
INIT_MMX 3dnow |
|
|
|
|
|
FFT48_3DN |
|
|
|
|
|
%endif |
|
|
|
|
|
|
|
|
%define Z(x) [zcq + o1q*(x&6) + mmsize*(x&1)] |
|
|
%define Z(x) [zcq + o1q*(x&6) + mmsize*(x&1)] |
|
|
%define Z2(x) [zcq + o3q + mmsize*(x&1)] |
|
|
%define Z2(x) [zcq + o3q + mmsize*(x&1)] |
|
@@ -588,6 +591,7 @@ INIT_XMM sse |
|
|
DECL_PASS pass_sse, PASS_BIG 1 |
|
|
DECL_PASS pass_sse, PASS_BIG 1 |
|
|
DECL_PASS pass_interleave_sse, PASS_BIG 0 |
|
|
DECL_PASS pass_interleave_sse, PASS_BIG 0 |
|
|
|
|
|
|
|
|
|
|
|
%macro FFT_CALC_FUNC 0 |
|
|
cglobal fft_calc, 2,5,8 |
|
|
cglobal fft_calc, 2,5,8 |
|
|
mov r3d, [r0 + FFTContext.nbits] |
|
|
mov r3d, [r0 + FFTContext.nbits] |
|
|
PUSH r1 |
|
|
PUSH r1 |
|
@@ -597,23 +601,43 @@ cglobal fft_calc, 2,5,8 |
|
|
FFT_DISPATCH _interleave %+ SUFFIX, r1 |
|
|
FFT_DISPATCH _interleave %+ SUFFIX, r1 |
|
|
POP rcx |
|
|
POP rcx |
|
|
POP r4 |
|
|
POP r4 |
|
|
cmp rcx, 4 |
|
|
|
|
|
|
|
|
cmp rcx, 3+(mmsize/16) |
|
|
jg .end |
|
|
jg .end |
|
|
mov r2, -1 |
|
|
mov r2, -1 |
|
|
add rcx, 3 |
|
|
add rcx, 3 |
|
|
shl r2, cl |
|
|
shl r2, cl |
|
|
sub r4, r2 |
|
|
sub r4, r2 |
|
|
.loop |
|
|
.loop |
|
|
|
|
|
%if mmsize == 8 |
|
|
|
|
|
PSWAPD m0, [r4 + r2 + 4] |
|
|
|
|
|
mova [r4 + r2 + 4], m0 |
|
|
|
|
|
%else |
|
|
movaps xmm0, [r4 + r2] |
|
|
movaps xmm0, [r4 + r2] |
|
|
movaps xmm1, xmm0 |
|
|
movaps xmm1, xmm0 |
|
|
unpcklps xmm0, [r4 + r2 + 16] |
|
|
unpcklps xmm0, [r4 + r2 + 16] |
|
|
unpckhps xmm1, [r4 + r2 + 16] |
|
|
unpckhps xmm1, [r4 + r2 + 16] |
|
|
movaps [r4 + r2], xmm0 |
|
|
movaps [r4 + r2], xmm0 |
|
|
movaps [r4 + r2 + 16], xmm1 |
|
|
movaps [r4 + r2 + 16], xmm1 |
|
|
add r2, 32 |
|
|
|
|
|
|
|
|
%endif |
|
|
|
|
|
add r2, mmsize*2 |
|
|
jl .loop |
|
|
jl .loop |
|
|
.end: |
|
|
.end: |
|
|
|
|
|
%if cpuflag(3dnow) |
|
|
|
|
|
femms |
|
|
|
|
|
RET |
|
|
|
|
|
%else |
|
|
REP_RET |
|
|
REP_RET |
|
|
|
|
|
%endif |
|
|
|
|
|
%endmacro |
|
|
|
|
|
|
|
|
|
|
|
%if ARCH_X86_32 |
|
|
|
|
|
INIT_MMX 3dnow |
|
|
|
|
|
FFT_CALC_FUNC |
|
|
|
|
|
INIT_MMX 3dnow2 |
|
|
|
|
|
FFT_CALC_FUNC |
|
|
|
|
|
%endif |
|
|
|
|
|
INIT_XMM sse |
|
|
|
|
|
FFT_CALC_FUNC |
|
|
|
|
|
|
|
|
cglobal fft_permute, 2,7,1 |
|
|
cglobal fft_permute, 2,7,1 |
|
|
mov r4, [r0 + FFTContext.revtab] |
|
|
mov r4, [r0 + FFTContext.revtab] |
|
@@ -648,6 +672,7 @@ cglobal fft_permute, 2,7,1 |
|
|
jl .loopcopy |
|
|
jl .loopcopy |
|
|
REP_RET |
|
|
REP_RET |
|
|
|
|
|
|
|
|
|
|
|
%macro IMDCT_CALC_FUNC 0 |
|
|
cglobal imdct_calc, 3,5,3 |
|
|
cglobal imdct_calc, 3,5,3 |
|
|
mov r3d, [r0 + FFTContext.mdctsize] |
|
|
mov r3d, [r0 + FFTContext.mdctsize] |
|
|
mov r4, [r0 + FFTContext.imdcthalf] |
|
|
mov r4, [r0 + FFTContext.imdcthalf] |
|
@@ -671,22 +696,45 @@ cglobal imdct_calc, 3,5,3 |
|
|
POP r3 |
|
|
POP r3 |
|
|
lea r0, [r1 + 2*r3] |
|
|
lea r0, [r1 + 2*r3] |
|
|
mov r2, r3 |
|
|
mov r2, r3 |
|
|
sub r3, 16 |
|
|
|
|
|
|
|
|
sub r3, mmsize |
|
|
neg r2 |
|
|
neg r2 |
|
|
movaps xmm2, [ps_m1m1m1m1] |
|
|
|
|
|
|
|
|
mova m2, [ps_m1m1m1m1] |
|
|
.loop: |
|
|
.loop: |
|
|
movaps xmm0, [r1 + r3] |
|
|
|
|
|
movaps xmm1, [r0 + r2] |
|
|
|
|
|
shufps xmm0, xmm0, 0x1b |
|
|
|
|
|
shufps xmm1, xmm1, 0x1b |
|
|
|
|
|
xorps xmm0, xmm2 |
|
|
|
|
|
movaps [r0 + r3], xmm1 |
|
|
|
|
|
movaps [r1 + r2], xmm0 |
|
|
|
|
|
sub r3, 16 |
|
|
|
|
|
add r2, 16 |
|
|
|
|
|
|
|
|
%if mmsize == 8 |
|
|
|
|
|
PSWAPD m0, [r1 + r3] |
|
|
|
|
|
PSWAPD m1, [r0 + r2] |
|
|
|
|
|
pxor m0, m2 |
|
|
|
|
|
%else |
|
|
|
|
|
mova m0, [r1 + r3] |
|
|
|
|
|
mova m1, [r0 + r2] |
|
|
|
|
|
shufps m0, m0, 0x1b |
|
|
|
|
|
shufps m1, m1, 0x1b |
|
|
|
|
|
xorps m0, m2 |
|
|
|
|
|
%endif |
|
|
|
|
|
mova [r0 + r3], m1 |
|
|
|
|
|
mova [r1 + r2], m0 |
|
|
|
|
|
sub r3, mmsize |
|
|
|
|
|
add r2, mmsize |
|
|
jl .loop |
|
|
jl .loop |
|
|
|
|
|
%if cpuflag(3dnow) |
|
|
|
|
|
femms |
|
|
|
|
|
RET |
|
|
|
|
|
%else |
|
|
REP_RET |
|
|
REP_RET |
|
|
|
|
|
%endif |
|
|
|
|
|
%endmacro |
|
|
|
|
|
|
|
|
|
|
|
%if ARCH_X86_32 |
|
|
|
|
|
INIT_MMX 3dnow |
|
|
|
|
|
IMDCT_CALC_FUNC |
|
|
|
|
|
INIT_MMX 3dnow2 |
|
|
|
|
|
IMDCT_CALC_FUNC |
|
|
|
|
|
%endif |
|
|
|
|
|
|
|
|
|
|
|
INIT_XMM sse |
|
|
|
|
|
IMDCT_CALC_FUNC |
|
|
|
|
|
|
|
|
|
|
|
%if ARCH_X86_32 |
|
|
INIT_MMX 3dnow |
|
|
INIT_MMX 3dnow |
|
|
%define mulps pfmul |
|
|
%define mulps pfmul |
|
|
%define addps pfadd |
|
|
%define addps pfadd |
|
@@ -697,6 +745,7 @@ DECL_PASS pass_3dnow, PASS_SMALL 1, [wq], [wq+o1q] |
|
|
DECL_PASS pass_interleave_3dnow, PASS_BIG 0 |
|
|
DECL_PASS pass_interleave_3dnow, PASS_BIG 0 |
|
|
%define pass_3dnow2 pass_3dnow |
|
|
%define pass_3dnow2 pass_3dnow |
|
|
%define pass_interleave_3dnow2 pass_interleave_3dnow |
|
|
%define pass_interleave_3dnow2 pass_interleave_3dnow |
|
|
|
|
|
%endif |
|
|
|
|
|
|
|
|
%ifdef PIC |
|
|
%ifdef PIC |
|
|
%define SECTION_REL - $$ |
|
|
%define SECTION_REL - $$ |
|
@@ -760,12 +809,14 @@ DECL_FFT 6, _interleave |
|
|
INIT_XMM sse |
|
|
INIT_XMM sse |
|
|
DECL_FFT 5 |
|
|
DECL_FFT 5 |
|
|
DECL_FFT 5, _interleave |
|
|
DECL_FFT 5, _interleave |
|
|
|
|
|
%if ARCH_X86_32 |
|
|
INIT_MMX 3dnow |
|
|
INIT_MMX 3dnow |
|
|
DECL_FFT 4 |
|
|
DECL_FFT 4 |
|
|
DECL_FFT 4, _interleave |
|
|
DECL_FFT 4, _interleave |
|
|
INIT_MMX 3dnow2 |
|
|
INIT_MMX 3dnow2 |
|
|
DECL_FFT 4 |
|
|
DECL_FFT 4 |
|
|
DECL_FFT 4, _interleave |
|
|
DECL_FFT 4, _interleave |
|
|
|
|
|
%endif |
|
|
|
|
|
|
|
|
INIT_XMM sse |
|
|
INIT_XMM sse |
|
|
%undef mulps |
|
|
%undef mulps |
|
@@ -775,6 +826,37 @@ INIT_XMM sse |
|
|
%undef unpckhps |
|
|
%undef unpckhps |
|
|
|
|
|
|
|
|
%macro PREROTATER 5 ;-2*k, 2*k, input+n4, tcos+n8, tsin+n8 |
|
|
%macro PREROTATER 5 ;-2*k, 2*k, input+n4, tcos+n8, tsin+n8 |
|
|
|
|
|
%if mmsize == 8 ; j*2+2-n4, n4-2-j*2, input+n4, tcos+n8, tsin+n8 |
|
|
|
|
|
PSWAPD m0, [%3+%2*4] |
|
|
|
|
|
movq m2, [%3+%1*4-8] |
|
|
|
|
|
movq m3, m0 |
|
|
|
|
|
punpckldq m0, m2 |
|
|
|
|
|
punpckhdq m2, m3 |
|
|
|
|
|
movd m1, [%4+%1*2-4] ; tcos[j] |
|
|
|
|
|
movd m3, [%4+%2*2] ; tcos[n4-j-1] |
|
|
|
|
|
punpckldq m1, [%5+%1*2-4] ; tsin[j] |
|
|
|
|
|
punpckldq m3, [%5+%2*2] ; tsin[n4-j-1] |
|
|
|
|
|
|
|
|
|
|
|
mova m4, m0 |
|
|
|
|
|
PSWAPD m5, m1 |
|
|
|
|
|
pfmul m0, m1 |
|
|
|
|
|
pfmul m4, m5 |
|
|
|
|
|
mova m6, m2 |
|
|
|
|
|
PSWAPD m5, m3 |
|
|
|
|
|
pfmul m2, m3 |
|
|
|
|
|
pfmul m6, m5 |
|
|
|
|
|
%if cpuflag(3dnow2) |
|
|
|
|
|
pfpnacc m0, m4 |
|
|
|
|
|
pfpnacc m2, m6 |
|
|
|
|
|
%else |
|
|
|
|
|
SBUTTERFLY dq, 0, 4, 1 |
|
|
|
|
|
SBUTTERFLY dq, 2, 6, 3 |
|
|
|
|
|
pxor m4, m7 |
|
|
|
|
|
pxor m6, m7 |
|
|
|
|
|
pfadd m0, m4 |
|
|
|
|
|
pfadd m2, m6 |
|
|
|
|
|
%endif |
|
|
|
|
|
%else |
|
|
movaps xmm0, [%3+%2*4] |
|
|
movaps xmm0, [%3+%2*4] |
|
|
movaps xmm1, [%3+%1*4-0x10] |
|
|
movaps xmm1, [%3+%1*4-0x10] |
|
|
movaps xmm2, xmm0 |
|
|
movaps xmm2, xmm0 |
|
@@ -795,6 +877,7 @@ INIT_XMM sse |
|
|
movaps xmm0, xmm1 |
|
|
movaps xmm0, xmm1 |
|
|
unpcklps xmm1, xmm2 |
|
|
unpcklps xmm1, xmm2 |
|
|
unpckhps xmm0, xmm2 |
|
|
unpckhps xmm0, xmm2 |
|
|
|
|
|
%endif |
|
|
%endmacro |
|
|
%endmacro |
|
|
|
|
|
|
|
|
%macro CMUL 6 ;j, xmm0, xmm1, 3, 4, 5 |
|
|
%macro CMUL 6 ;j, xmm0, xmm1, 3, 4, 5 |
|
@@ -863,6 +946,40 @@ INIT_XMM sse |
|
|
jl .post |
|
|
jl .post |
|
|
%endmacro |
|
|
%endmacro |
|
|
|
|
|
|
|
|
|
|
|
%macro CMUL_3DNOW 6 |
|
|
|
|
|
mova m6, [%1+%2*2] |
|
|
|
|
|
mova %3, [%1+%2*2+8] |
|
|
|
|
|
mova %4, m6 |
|
|
|
|
|
mova m7, %3 |
|
|
|
|
|
pfmul m6, [%5+%2] |
|
|
|
|
|
pfmul %3, [%6+%2] |
|
|
|
|
|
pfmul %4, [%6+%2] |
|
|
|
|
|
pfmul m7, [%5+%2] |
|
|
|
|
|
pfsub %3, m6 |
|
|
|
|
|
pfadd %4, m7 |
|
|
|
|
|
%endmacro |
|
|
|
|
|
|
|
|
|
|
|
%macro POSROTATESHUF_3DNOW 5 ;j, k, z+n8, tcos+n8, tsin+n8 |
|
|
|
|
|
.post: |
|
|
|
|
|
CMUL_3DNOW %3, %1, m0, m1, %4, %5 |
|
|
|
|
|
CMUL_3DNOW %3, %2, m2, m3, %4, %5 |
|
|
|
|
|
movd [%3+%1*2+ 0], m0 |
|
|
|
|
|
movd [%3+%2*2+12], m1 |
|
|
|
|
|
movd [%3+%2*2+ 0], m2 |
|
|
|
|
|
movd [%3+%1*2+12], m3 |
|
|
|
|
|
psrlq m0, 32 |
|
|
|
|
|
psrlq m1, 32 |
|
|
|
|
|
psrlq m2, 32 |
|
|
|
|
|
psrlq m3, 32 |
|
|
|
|
|
movd [%3+%1*2+ 8], m0 |
|
|
|
|
|
movd [%3+%2*2+ 4], m1 |
|
|
|
|
|
movd [%3+%2*2+ 8], m2 |
|
|
|
|
|
movd [%3+%1*2+ 4], m3 |
|
|
|
|
|
sub %2, 8 |
|
|
|
|
|
add %1, 8 |
|
|
|
|
|
jl .post |
|
|
|
|
|
%endmacro |
|
|
|
|
|
|
|
|
%macro DECL_IMDCT 1 |
|
|
%macro DECL_IMDCT 1 |
|
|
cglobal imdct_half, 3,12,8; FFTContext *s, FFTSample *output, const FFTSample *input |
|
|
cglobal imdct_half, 3,12,8; FFTContext *s, FFTSample *output, const FFTSample *input |
|
|
%if ARCH_X86_64 |
|
|
%if ARCH_X86_64 |
|
@@ -892,22 +1009,34 @@ cglobal imdct_half, 3,12,8; FFTContext *s, FFTSample *output, const FFTSample *i |
|
|
push rrevtab |
|
|
push rrevtab |
|
|
%endif |
|
|
%endif |
|
|
|
|
|
|
|
|
sub r3, 4 |
|
|
|
|
|
%if ARCH_X86_64 |
|
|
|
|
|
|
|
|
sub r3, mmsize/4 |
|
|
|
|
|
%if ARCH_X86_64 || mmsize == 8 |
|
|
xor r4, r4 |
|
|
xor r4, r4 |
|
|
sub r4, r3 |
|
|
sub r4, r3 |
|
|
%endif |
|
|
%endif |
|
|
|
|
|
%if notcpuflag(3dnow2) && mmsize == 8 |
|
|
|
|
|
movd m7, [ps_m1m1m1m1] |
|
|
|
|
|
%endif |
|
|
.pre: |
|
|
.pre: |
|
|
%if ARCH_X86_64 == 0 |
|
|
%if ARCH_X86_64 == 0 |
|
|
;unspill |
|
|
;unspill |
|
|
|
|
|
%if mmsize != 8 |
|
|
xor r4, r4 |
|
|
xor r4, r4 |
|
|
sub r4, r3 |
|
|
sub r4, r3 |
|
|
mov rtsin, [esp+4] |
|
|
|
|
|
|
|
|
%endif |
|
|
mov rtcos, [esp+8] |
|
|
mov rtcos, [esp+8] |
|
|
|
|
|
mov rtsin, [esp+4] |
|
|
%endif |
|
|
%endif |
|
|
|
|
|
|
|
|
PREROTATER r4, r3, r2, rtcos, rtsin |
|
|
PREROTATER r4, r3, r2, rtcos, rtsin |
|
|
%if ARCH_X86_64 |
|
|
|
|
|
|
|
|
%if mmsize == 8 |
|
|
|
|
|
mov r6, [esp] ; rrevtab = ptr+n8 |
|
|
|
|
|
movzx r5, word [rrevtab+r4-2] ; rrevtab[j] |
|
|
|
|
|
movzx r6, word [rrevtab+r3] ; rrevtab[n4-j-1] |
|
|
|
|
|
mova [r1+r5*8], m0 |
|
|
|
|
|
mova [r1+r6*8], m2 |
|
|
|
|
|
add r4, 2 |
|
|
|
|
|
%elif ARCH_X86_64 |
|
|
movzx r5, word [rrevtab+r4-4] |
|
|
movzx r5, word [rrevtab+r4-4] |
|
|
movzx r6, word [rrevtab+r4-2] |
|
|
movzx r6, word [rrevtab+r4-2] |
|
|
movzx r10, word [rrevtab+r3] |
|
|
movzx r10, word [rrevtab+r3] |
|
@@ -928,7 +1057,7 @@ cglobal imdct_half, 3,12,8; FFTContext *s, FFTSample *output, const FFTSample *i |
|
|
movlps [r1+r5*8], xmm1 |
|
|
movlps [r1+r5*8], xmm1 |
|
|
movhps [r1+r4*8], xmm1 |
|
|
movhps [r1+r4*8], xmm1 |
|
|
%endif |
|
|
%endif |
|
|
sub r3, 4 |
|
|
|
|
|
|
|
|
sub r3, mmsize/4 |
|
|
jns .pre |
|
|
jns .pre |
|
|
|
|
|
|
|
|
mov r5, r0 |
|
|
mov r5, r0 |
|
@@ -953,12 +1082,23 @@ cglobal imdct_half, 3,12,8; FFTContext *s, FFTSample *output, const FFTSample *i |
|
|
%1 r0, r1, r6, rtcos, rtsin |
|
|
%1 r0, r1, r6, rtcos, rtsin |
|
|
%if ARCH_X86_64 == 0 |
|
|
%if ARCH_X86_64 == 0 |
|
|
add esp, 12 |
|
|
add esp, 12 |
|
|
|
|
|
%endif |
|
|
|
|
|
%if mmsize == 8 |
|
|
|
|
|
femms |
|
|
%endif |
|
|
%endif |
|
|
RET |
|
|
RET |
|
|
%endmacro |
|
|
%endmacro |
|
|
|
|
|
|
|
|
DECL_IMDCT POSROTATESHUF |
|
|
DECL_IMDCT POSROTATESHUF |
|
|
|
|
|
|
|
|
|
|
|
%if ARCH_X86_32 |
|
|
|
|
|
INIT_MMX 3dnow |
|
|
|
|
|
DECL_IMDCT POSROTATESHUF_3DNOW |
|
|
|
|
|
|
|
|
|
|
|
INIT_MMX 3dnow2 |
|
|
|
|
|
DECL_IMDCT POSROTATESHUF_3DNOW |
|
|
|
|
|
%endif |
|
|
|
|
|
|
|
|
INIT_YMM avx |
|
|
INIT_YMM avx |
|
|
|
|
|
|
|
|
%if HAVE_AVX |
|
|
%if HAVE_AVX |
|
|