|
|
|
@@ -38,17 +38,7 @@ cglobal ps_add_squares, 3, 3, %1, dst, src, n |
|
|
|
movaps m1, [srcq+mmsize] |
|
|
|
mulps m0, m0 |
|
|
|
mulps m1, m1 |
|
|
|
%if cpuflag(sse3) |
|
|
|
haddps m0, m1 |
|
|
|
%else |
|
|
|
movaps m3, m0 |
|
|
|
movaps m4, m1 |
|
|
|
shufps m3, m3, q0301 |
|
|
|
shufps m4, m4, q0301 |
|
|
|
addps m0, m3 |
|
|
|
addps m1, m4 |
|
|
|
shufps m0, m1, q2020 |
|
|
|
%endif |
|
|
|
HADDPS m0, m1, m2 |
|
|
|
addps m0, [dstq] |
|
|
|
movaps [dstq], m0 |
|
|
|
add dstq, mmsize |
|
|
|
@@ -59,9 +49,9 @@ cglobal ps_add_squares, 3, 3, %1, dst, src, n |
|
|
|
%endmacro |
|
|
|
|
|
|
|
INIT_XMM sse |
|
|
|
PS_ADD_SQUARES 3 |
|
|
|
PS_ADD_SQUARES 2 |
|
|
|
INIT_XMM sse3 |
|
|
|
PS_ADD_SQUARES 5 |
|
|
|
PS_ADD_SQUARES 3 |
|
|
|
|
|
|
|
;******************************************************************* |
|
|
|
;void ff_ps_mul_pair_single_sse(float (*dst)[2], float (*src0)[2], |
|
|
|
|