@@ -1158,12 +1158,7 @@ ALIGN 16 | |||||
add src1q, 2*mmsize | add src1q, 2*mmsize | ||||
sub lenq, 2*mmsize | sub lenq, 2*mmsize | ||||
jge .loop | jge .loop | ||||
%if mmsize == 32 | |||||
vzeroupper | |||||
RET | |||||
%else | |||||
REP_RET | REP_RET | ||||
%endif | |||||
%endmacro | %endmacro | ||||
INIT_XMM sse | INIT_XMM sse | ||||
@@ -1193,12 +1188,7 @@ ALIGN 16 | |||||
sub lenq, 2*mmsize | sub lenq, 2*mmsize | ||||
jge .loop | jge .loop | ||||
%if mmsize == 32 | |||||
vzeroupper | |||||
RET | |||||
%else | |||||
REP_RET | REP_RET | ||||
%endif | |||||
%endmacro | %endmacro | ||||
INIT_XMM sse | INIT_XMM sse | ||||
@@ -1243,10 +1233,6 @@ cglobal butterflies_float_interleave, 4,4,3, dst, src0, src1, len | |||||
%endif | %endif | ||||
add lenq, mmsize | add lenq, mmsize | ||||
jl .loop | jl .loop | ||||
%if mmsize == 32 | |||||
vzeroupper | |||||
RET | |||||
%endif | |||||
.end: | .end: | ||||
REP_RET | REP_RET | ||||
%endmacro | %endmacro | ||||
@@ -749,9 +749,6 @@ section .text | |||||
; The others pass args in registers and don't spill anything. | ; The others pass args in registers and don't spill anything. | ||||
cglobal fft_dispatch%2, 2,5,8, z, nbits | cglobal fft_dispatch%2, 2,5,8, z, nbits | ||||
FFT_DISPATCH fullsuffix, nbits | FFT_DISPATCH fullsuffix, nbits | ||||
%if mmsize == 32 | |||||
vzeroupper | |||||
%endif | |||||
RET | RET | ||||
%endmacro ; DECL_FFT | %endmacro ; DECL_FFT | ||||
@@ -956,9 +953,6 @@ cglobal imdct_half, 3,12,8; FFTContext *s, FFTSample *output, const FFTSample *i | |||||
%1 r0, r1, r6, rtcos, rtsin | %1 r0, r1, r6, rtcos, rtsin | ||||
%if ARCH_X86_64 == 0 | %if ARCH_X86_64 == 0 | ||||
add esp, 12 | add esp, 12 | ||||
%endif | |||||
%if mmsize == 32 | |||||
vzeroupper | |||||
%endif | %endif | ||||
RET | RET | ||||
%endmacro | %endmacro | ||||
@@ -145,12 +145,7 @@ cglobal conv_s32_to_flt, 3,3,3, dst, src, len | |||||
mova [dstq+lenq+mmsize], m2 | mova [dstq+lenq+mmsize], m2 | ||||
add lenq, mmsize*2 | add lenq, mmsize*2 | ||||
jl .loop | jl .loop | ||||
%if mmsize == 32 | |||||
vzeroupper | |||||
RET | |||||
%else | |||||
REP_RET | REP_RET | ||||
%endif | |||||
%endmacro | %endmacro | ||||
INIT_XMM sse2 | INIT_XMM sse2 | ||||
@@ -218,12 +213,7 @@ cglobal conv_flt_to_s32, 3,3,5, dst, src, len | |||||
mova [dstq+lenq+3*mmsize], m3 | mova [dstq+lenq+3*mmsize], m3 | ||||
add lenq, mmsize*4 | add lenq, mmsize*4 | ||||
jl .loop | jl .loop | ||||
%if mmsize == 32 | |||||
vzeroupper | |||||
RET | |||||
%else | |||||
REP_RET | REP_RET | ||||
%endif | |||||
%endmacro | %endmacro | ||||
INIT_XMM sse2 | INIT_XMM sse2 | ||||
@@ -51,12 +51,7 @@ cglobal mix_2_to_1_fltp_flt, 3,4,6, src, matrix, len, src1 | |||||
add srcq, mmsize*2 | add srcq, mmsize*2 | ||||
sub lend, mmsize*2/4 | sub lend, mmsize*2/4 | ||||
jg .loop | jg .loop | ||||
%if mmsize == 32 | |||||
vzeroupper | |||||
RET | |||||
%else | |||||
REP_RET | REP_RET | ||||
%endif | |||||
%endmacro | %endmacro | ||||
INIT_XMM sse | INIT_XMM sse | ||||
@@ -175,12 +170,7 @@ cglobal mix_1_to_2_fltp_flt, 3,5,4, src0, matrix0, len, src1, matrix1 | |||||
add src0q, mmsize | add src0q, mmsize | ||||
sub lend, mmsize/4 | sub lend, mmsize/4 | ||||
jg .loop | jg .loop | ||||
%if mmsize == 32 | |||||
vzeroupper | |||||
RET | |||||
%else | |||||
REP_RET | REP_RET | ||||
%endif | |||||
%endmacro | %endmacro | ||||
INIT_XMM sse | INIT_XMM sse | ||||
@@ -40,12 +40,7 @@ ALIGN 16 | |||||
sub lenq, 2*mmsize | sub lenq, 2*mmsize | ||||
jge .loop | jge .loop | ||||
%if mmsize == 32 | |||||
vzeroupper | |||||
RET | |||||
%else | |||||
REP_RET | REP_RET | ||||
%endif | |||||
%endmacro | %endmacro | ||||
INIT_XMM sse | INIT_XMM sse | ||||
@@ -86,12 +81,7 @@ cglobal vector_fmac_scalar, 4,4,3, dst, src, mul, len | |||||
mova [dstq+lenq+mmsize], m2 | mova [dstq+lenq+mmsize], m2 | ||||
sub lenq, 2*mmsize | sub lenq, 2*mmsize | ||||
jge .loop | jge .loop | ||||
%if mmsize == 32 | |||||
vzeroupper | |||||
RET | |||||
%else | |||||
REP_RET | REP_RET | ||||
%endif | |||||
%endmacro | %endmacro | ||||
INIT_XMM sse | INIT_XMM sse | ||||
@@ -369,11 +369,14 @@ DECLARE_REG 14, R15, R15D, R15W, R15B, 120 | |||||
%macro RET 0 | %macro RET 0 | ||||
WIN64_RESTORE_XMM_INTERNAL rsp | WIN64_RESTORE_XMM_INTERNAL rsp | ||||
POP_IF_USED 14, 13, 12, 11, 10, 9, 8, 7 | POP_IF_USED 14, 13, 12, 11, 10, 9, 8, 7 | ||||
%if mmsize == 32 | |||||
vzeroupper | |||||
%endif | |||||
ret | ret | ||||
%endmacro | %endmacro | ||||
%macro REP_RET 0 | %macro REP_RET 0 | ||||
%if regs_used > 7 || xmm_regs_used > 6 | |||||
%if regs_used > 7 || xmm_regs_used > 6 || mmsize == 32 | |||||
RET | RET | ||||
%else | %else | ||||
rep ret | rep ret | ||||
@@ -410,11 +413,14 @@ DECLARE_REG 14, R15, R15D, R15W, R15B, 72 | |||||
%macro RET 0 | %macro RET 0 | ||||
POP_IF_USED 14, 13, 12, 11, 10, 9 | POP_IF_USED 14, 13, 12, 11, 10, 9 | ||||
%if mmsize == 32 | |||||
vzeroupper | |||||
%endif | |||||
ret | ret | ||||
%endmacro | %endmacro | ||||
%macro REP_RET 0 | %macro REP_RET 0 | ||||
%if regs_used > 9 | |||||
%if regs_used > 9 || mmsize == 32 | |||||
RET | RET | ||||
%else | %else | ||||
rep ret | rep ret | ||||
@@ -456,11 +462,14 @@ DECLARE_ARG 7, 8, 9, 10, 11, 12, 13, 14 | |||||
%macro RET 0 | %macro RET 0 | ||||
POP_IF_USED 6, 5, 4, 3 | POP_IF_USED 6, 5, 4, 3 | ||||
%if mmsize == 32 | |||||
vzeroupper | |||||
%endif | |||||
ret | ret | ||||
%endmacro | %endmacro | ||||
%macro REP_RET 0 | %macro REP_RET 0 | ||||
%if regs_used > 3 | |||||
%if regs_used > 3 || mmsize == 32 | |||||
RET | RET | ||||
%else | %else | ||||
rep ret | rep ret | ||||