| @@ -58,7 +58,6 @@ OBJS-$(CONFIG_VP9_DECODER) += x86/vp9dsp_init.o | |||||
| # GCC inline assembly optimizations | # GCC inline assembly optimizations | ||||
| # subsystems | # subsystems | ||||
| MMX-OBJS-$(CONFIG_AUDIODSP) += x86/audiodsp_mmx.o | |||||
| MMX-OBJS-$(CONFIG_FDCTDSP) += x86/fdct.o | MMX-OBJS-$(CONFIG_FDCTDSP) += x86/fdct.o | ||||
| MMX-OBJS-$(CONFIG_HPELDSP) += x86/fpel_mmx.o \ | MMX-OBJS-$(CONFIG_HPELDSP) += x86/fpel_mmx.o \ | ||||
| x86/hpeldsp_mmx.o | x86/hpeldsp_mmx.o | ||||
| @@ -135,3 +135,46 @@ VECTOR_CLIP_INT32 11, 1, 1, 0 | |||||
| %else | %else | ||||
| VECTOR_CLIP_INT32 6, 1, 0, 0 | VECTOR_CLIP_INT32 6, 1, 0, 0 | ||||
| %endif | %endif | ||||
| ; void ff_vector_clipf_sse(float *dst, const float *src, | |||||
| ; int len, float min, float max) | |||||
| INIT_XMM sse | |||||
| cglobal vector_clipf, 3, 3, 6, dst, src, len, min, max | |||||
| %if ARCH_X86_32 | |||||
| VBROADCASTSS m0, minm | |||||
| VBROADCASTSS m1, maxm | |||||
| %elif WIN64 | |||||
| VBROADCASTSS m0, m3 | |||||
| VBROADCASTSS m1, maxm | |||||
| %else ; 64bit sysv | |||||
| VBROADCASTSS m0, m0 | |||||
| VBROADCASTSS m1, m1 | |||||
| %endif | |||||
| movsxdifnidn lenq, lend | |||||
| .loop | |||||
| mova m2, [srcq + 4 * lenq - 4 * mmsize] | |||||
| mova m3, [srcq + 4 * lenq - 3 * mmsize] | |||||
| mova m4, [srcq + 4 * lenq - 2 * mmsize] | |||||
| mova m5, [srcq + 4 * lenq - 1 * mmsize] | |||||
| maxps m2, m0 | |||||
| maxps m3, m0 | |||||
| maxps m4, m0 | |||||
| maxps m5, m0 | |||||
| minps m2, m1 | |||||
| minps m3, m1 | |||||
| minps m4, m1 | |||||
| minps m5, m1 | |||||
| mova [dstq + 4 * lenq - 4 * mmsize], m2 | |||||
| mova [dstq + 4 * lenq - 3 * mmsize], m3 | |||||
| mova [dstq + 4 * lenq - 2 * mmsize], m4 | |||||
| mova [dstq + 4 * lenq - 1 * mmsize], m5 | |||||
| sub lenq, mmsize | |||||
| jg .loop | |||||
| RET | |||||
| @@ -49,7 +49,7 @@ av_cold void ff_audiodsp_init_x86(AudioDSPContext *c) | |||||
| if (EXTERNAL_MMXEXT(cpu_flags)) | if (EXTERNAL_MMXEXT(cpu_flags)) | ||||
| c->scalarproduct_int16 = ff_scalarproduct_int16_mmxext; | c->scalarproduct_int16 = ff_scalarproduct_int16_mmxext; | ||||
| if (INLINE_SSE(cpu_flags)) | |||||
| if (EXTERNAL_SSE(cpu_flags)) | |||||
| c->vector_clipf = ff_vector_clipf_sse; | c->vector_clipf = ff_vector_clipf_sse; | ||||
| if (EXTERNAL_SSE2(cpu_flags)) { | if (EXTERNAL_SSE2(cpu_flags)) { | ||||
| @@ -1,58 +0,0 @@ | |||||
| /* | |||||
| * This file is part of Libav. | |||||
| * | |||||
| * Libav is free software; you can redistribute it and/or | |||||
| * modify it under the terms of the GNU Lesser General Public | |||||
| * License as published by the Free Software Foundation; either | |||||
| * version 2.1 of the License, or (at your option) any later version. | |||||
| * | |||||
| * Libav is distributed in the hope that it will be useful, | |||||
| * but WITHOUT ANY WARRANTY; without even the implied warranty of | |||||
| * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU | |||||
| * Lesser General Public License for more details. | |||||
| * | |||||
| * You should have received a copy of the GNU Lesser General Public | |||||
| * License along with Libav; if not, write to the Free Software | |||||
| * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA | |||||
| */ | |||||
| #include "config.h" | |||||
| #include "libavutil/x86/asm.h" | |||||
| #include "audiodsp.h" | |||||
| #if HAVE_INLINE_ASM | |||||
| void ff_vector_clipf_sse(float *dst, const float *src, | |||||
| int len, float min, float max) | |||||
| { | |||||
| x86_reg i = (len - 16) * 4; | |||||
| __asm__ volatile ( | |||||
| "movss %3, %%xmm4 \n\t" | |||||
| "movss %4, %%xmm5 \n\t" | |||||
| "shufps $0, %%xmm4, %%xmm4 \n\t" | |||||
| "shufps $0, %%xmm5, %%xmm5 \n\t" | |||||
| "1: \n\t" | |||||
| "movaps (%2, %0), %%xmm0 \n\t" // 3/1 on intel | |||||
| "movaps 16(%2, %0), %%xmm1 \n\t" | |||||
| "movaps 32(%2, %0), %%xmm2 \n\t" | |||||
| "movaps 48(%2, %0), %%xmm3 \n\t" | |||||
| "maxps %%xmm4, %%xmm0 \n\t" | |||||
| "maxps %%xmm4, %%xmm1 \n\t" | |||||
| "maxps %%xmm4, %%xmm2 \n\t" | |||||
| "maxps %%xmm4, %%xmm3 \n\t" | |||||
| "minps %%xmm5, %%xmm0 \n\t" | |||||
| "minps %%xmm5, %%xmm1 \n\t" | |||||
| "minps %%xmm5, %%xmm2 \n\t" | |||||
| "minps %%xmm5, %%xmm3 \n\t" | |||||
| "movaps %%xmm0, (%1, %0) \n\t" | |||||
| "movaps %%xmm1, 16(%1, %0) \n\t" | |||||
| "movaps %%xmm2, 32(%1, %0) \n\t" | |||||
| "movaps %%xmm3, 48(%1, %0) \n\t" | |||||
| "sub $64, %0 \n\t" | |||||
| "jge 1b \n\t" | |||||
| : "+&r" (i) | |||||
| : "r" (dst), "r" (src), "m" (min), "m" (max) | |||||
| : "memory"); | |||||
| } | |||||
| #endif /* HAVE_INLINE_ASM */ | |||||