* commit '566b7a20fd0cab44d344329538d314454a0bcc2f': x86: float dsp: butterflies_float SSE Conflicts: libavutil/x86/float_dsp.asm Merged-by: Michael Niedermayer <michaelni@gmx.at>tags/n2.0
| @@ -268,21 +268,23 @@ cglobal scalarproduct_float, 3,3,2, v1, v2, offset | |||||
| ;----------------------------------------------------------------------------- | ;----------------------------------------------------------------------------- | ||||
| INIT_XMM sse | INIT_XMM sse | ||||
| cglobal butterflies_float, 3,3,3, src0, src1, len | cglobal butterflies_float, 3,3,3, src0, src1, len | ||||
| movsxdifnidn lenq, lend | |||||
| test lenq, lenq | |||||
| jz .end | |||||
| shl lenq, 2 | |||||
| add src0q, lenq | |||||
| add src1q, lenq | |||||
| neg lenq | |||||
| %if ARCH_X86_64 | |||||
| movsxd lenq, lend | |||||
| %endif | |||||
| test lenq, lenq | |||||
| jz .end | |||||
| shl lenq, 2 | |||||
| add src0q, lenq | |||||
| add src1q, lenq | |||||
| neg lenq | |||||
| .loop: | .loop: | ||||
| mova m0, [src0q + lenq] | |||||
| mova m1, [src1q + lenq] | |||||
| subps m2, m0, m1 | |||||
| addps m0, m0, m1 | |||||
| mova [src1q + lenq], m2 | |||||
| mova [src0q + lenq], m0 | |||||
| add lenq, mmsize | |||||
| jl .loop | |||||
| mova m0, [src0q + lenq] | |||||
| mova m1, [src1q + lenq] | |||||
| subps m2, m0, m1 | |||||
| addps m0, m0, m1 | |||||
| mova [src1q + lenq], m2 | |||||
| mova [src0q + lenq], m0 | |||||
| add lenq, mmsize | |||||
| jl .loop | |||||
| .end: | .end: | ||||
| REP_RET | REP_RET | ||||