Also add an SSE2 version Signed-off-by: James Almer <jamrial@gmail.com> Signed-off-by: Michael Niedermayer <michaelni@gmx.at>tags/n2.3
| @@ -77,7 +77,7 @@ static void svq1_write_header(SVQ1EncContext *s, int frame_type) | |||||
| #define THRESHOLD_MULTIPLIER 0.6 | #define THRESHOLD_MULTIPLIER 0.6 | ||||
| static int ssd_int8_vs_int16_c(const int8_t *pix1, const int16_t *pix2, | static int ssd_int8_vs_int16_c(const int8_t *pix1, const int16_t *pix2, | ||||
| int size) | |||||
| intptr_t size) | |||||
| { | { | ||||
| int score = 0, i; | int score = 0, i; | ||||
| @@ -69,7 +69,7 @@ typedef struct SVQ1EncContext { | |||||
| uint8_t *scratchbuf; | uint8_t *scratchbuf; | ||||
| int (*ssd_int8_vs_int16)(const int8_t *pix1, const int16_t *pix2, | int (*ssd_int8_vs_int16)(const int8_t *pix1, const int16_t *pix2, | ||||
| int size); | |||||
| intptr_t size); | |||||
| } SVQ1EncContext; | } SVQ1EncContext; | ||||
| void ff_svq1enc_init_ppc(SVQ1EncContext *c); | void ff_svq1enc_init_ppc(SVQ1EncContext *c); | ||||
| @@ -45,7 +45,7 @@ OBJS-$(CONFIG_PRORES_LGPL_DECODER) += x86/proresdsp_init.o | |||||
| OBJS-$(CONFIG_RV30_DECODER) += x86/rv34dsp_init.o | OBJS-$(CONFIG_RV30_DECODER) += x86/rv34dsp_init.o | ||||
| OBJS-$(CONFIG_RV40_DECODER) += x86/rv34dsp_init.o \ | OBJS-$(CONFIG_RV40_DECODER) += x86/rv34dsp_init.o \ | ||||
| x86/rv40dsp_init.o | x86/rv40dsp_init.o | ||||
| OBJS-$(CONFIG_SVQ1_ENCODER) += x86/svq1enc.o | |||||
| OBJS-$(CONFIG_SVQ1_ENCODER) += x86/svq1enc_init.o | |||||
| OBJS-$(CONFIG_V210_DECODER) += x86/v210-init.o | OBJS-$(CONFIG_V210_DECODER) += x86/v210-init.o | ||||
| OBJS-$(CONFIG_TTA_DECODER) += x86/ttadsp_init.o | OBJS-$(CONFIG_TTA_DECODER) += x86/ttadsp_init.o | ||||
| OBJS-$(CONFIG_TRUEHD_DECODER) += x86/mlpdsp.o | OBJS-$(CONFIG_TRUEHD_DECODER) += x86/mlpdsp.o | ||||
| @@ -122,6 +122,7 @@ YASM-OBJS-$(CONFIG_PRORES_LGPL_DECODER) += x86/proresdsp.o | |||||
| YASM-OBJS-$(CONFIG_RV30_DECODER) += x86/rv34dsp.o | YASM-OBJS-$(CONFIG_RV30_DECODER) += x86/rv34dsp.o | ||||
| YASM-OBJS-$(CONFIG_RV40_DECODER) += x86/rv34dsp.o \ | YASM-OBJS-$(CONFIG_RV40_DECODER) += x86/rv34dsp.o \ | ||||
| x86/rv40dsp.o | x86/rv40dsp.o | ||||
| YASM-OBJS-$(CONFIG_SVQ1_ENCODER) += x86/svq1enc.o | |||||
| YASM-OBJS-$(CONFIG_TTA_DECODER) += x86/ttadsp.o | YASM-OBJS-$(CONFIG_TTA_DECODER) += x86/ttadsp.o | ||||
| YASM-OBJS-$(CONFIG_V210_DECODER) += x86/v210.o | YASM-OBJS-$(CONFIG_V210_DECODER) += x86/v210.o | ||||
| YASM-OBJS-$(CONFIG_VC1_DECODER) += x86/vc1dsp.o | YASM-OBJS-$(CONFIG_VC1_DECODER) += x86/vc1dsp.o | ||||
| @@ -0,0 +1,61 @@ | |||||
| ;****************************************************************************** | |||||
| ;* SIMD-optimized SVQ1 encoder functions | |||||
| ;* Copyright (c) 2007 Loren Merritt | |||||
| ;* | |||||
| ;* This file is part of FFmpeg. | |||||
| ;* | |||||
| ;* FFmpeg is free software; you can redistribute it and/or | |||||
| ;* modify it under the terms of the GNU Lesser General Public | |||||
| ;* License as published by the Free Software Foundation; either | |||||
| ;* version 2.1 of the License, or (at your option) any later version. | |||||
| ;* | |||||
| ;* FFmpeg is distributed in the hope that it will be useful, | |||||
| ;* but WITHOUT ANY WARRANTY; without even the implied warranty of | |||||
| ;* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU | |||||
| ;* Lesser General Public License for more details. | |||||
| ;* | |||||
| ;* You should have received a copy of the GNU Lesser General Public | |||||
| ;* License along with FFmpeg; if not, write to the Free Software | |||||
| ;* Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA | |||||
| ;****************************************************************************** | |||||
| %include "libavutil/x86/x86util.asm" | |||||
| SECTION_TEXT | |||||
| %macro SSD_INT8_VS_INT16 0 | |||||
| cglobal ssd_int8_vs_int16, 3, 3, 3, pix1, pix2, size | |||||
| pxor m0, m0 | |||||
| .loop | |||||
| sub sizeq, 8 | |||||
| movq m1, [pix1q + sizeq] | |||||
| mova m2, [pix2q + sizeq*2] | |||||
| %if mmsize == 8 | |||||
| movq m3, [pix2q + sizeq*2 + mmsize] | |||||
| punpckhbw m4, m1 | |||||
| punpcklbw m1, m1 | |||||
| psraw m4, 8 | |||||
| psraw m1, 8 | |||||
| psubw m3, m4 | |||||
| psubw m2, m1 | |||||
| pmaddwd m3, m3 | |||||
| pmaddwd m2, m2 | |||||
| paddd m0, m3 | |||||
| paddd m0, m2 | |||||
| %else | |||||
| punpcklbw m1, m1 | |||||
| psraw m1, 8 | |||||
| psubw m2, m1 | |||||
| pmaddwd m2, m2 | |||||
| paddd m0, m2 | |||||
| %endif | |||||
| jg .loop | |||||
| HADDD m0, m1 | |||||
| movd eax, m0 | |||||
| RET | |||||
| %endmacro | |||||
| INIT_MMX mmx | |||||
| SSD_INT8_VS_INT16 | |||||
| INIT_XMM sse2 | |||||
| SSD_INT8_VS_INT16 | |||||
| @@ -1,75 +0,0 @@ | |||||
| /* | |||||
| * Copyright (c) 2007 Loren Merritt | |||||
| * | |||||
| * This file is part of FFmpeg. | |||||
| * | |||||
| * FFmpeg is free software; you can redistribute it and/or | |||||
| * modify it under the terms of the GNU Lesser General Public | |||||
| * License as published by the Free Software Foundation; either | |||||
| * version 2.1 of the License, or (at your option) any later version. | |||||
| * | |||||
| * FFmpeg is distributed in the hope that it will be useful, | |||||
| * but WITHOUT ANY WARRANTY; without even the implied warranty of | |||||
| * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU | |||||
| * Lesser General Public License for more details. | |||||
| * | |||||
| * You should have received a copy of the GNU Lesser General Public | |||||
| * License along with FFmpeg; if not, write to the Free Software | |||||
| * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA | |||||
| */ | |||||
| #include "config.h" | |||||
| #include "libavutil/attributes.h" | |||||
| #include "libavutil/cpu.h" | |||||
| #include "libavutil/x86/asm.h" | |||||
| #include "libavutil/x86/cpu.h" | |||||
| #include "libavcodec/svq1enc.h" | |||||
| #if HAVE_INLINE_ASM | |||||
| static int ssd_int8_vs_int16_mmx(const int8_t *pix1, const int16_t *pix2, | |||||
| int size) | |||||
| { | |||||
| int sum; | |||||
| x86_reg i = size; | |||||
| __asm__ volatile ( | |||||
| "pxor %%mm4, %%mm4 \n" | |||||
| "1: \n" | |||||
| "sub $8, %0 \n" | |||||
| "movq (%2, %0), %%mm2 \n" | |||||
| "movq (%3, %0, 2), %%mm0 \n" | |||||
| "movq 8(%3, %0, 2), %%mm1 \n" | |||||
| "punpckhbw %%mm2, %%mm3 \n" | |||||
| "punpcklbw %%mm2, %%mm2 \n" | |||||
| "psraw $8, %%mm3 \n" | |||||
| "psraw $8, %%mm2 \n" | |||||
| "psubw %%mm3, %%mm1 \n" | |||||
| "psubw %%mm2, %%mm0 \n" | |||||
| "pmaddwd %%mm1, %%mm1 \n" | |||||
| "pmaddwd %%mm0, %%mm0 \n" | |||||
| "paddd %%mm1, %%mm4 \n" | |||||
| "paddd %%mm0, %%mm4 \n" | |||||
| "jg 1b \n" | |||||
| "movq %%mm4, %%mm3 \n" | |||||
| "psrlq $32, %%mm3 \n" | |||||
| "paddd %%mm3, %%mm4 \n" | |||||
| "movd %%mm4, %1 \n" | |||||
| : "+r" (i), "=r" (sum) | |||||
| : "r" (pix1), "r" (pix2)); | |||||
| return sum; | |||||
| } | |||||
| #endif /* HAVE_INLINE_ASM */ | |||||
| av_cold void ff_svq1enc_init_x86(SVQ1EncContext *c) | |||||
| { | |||||
| #if HAVE_INLINE_ASM | |||||
| int cpu_flags = av_get_cpu_flags(); | |||||
| if (INLINE_MMX(cpu_flags)) { | |||||
| c->ssd_int8_vs_int16 = ssd_int8_vs_int16_mmx; | |||||
| } | |||||
| #endif /* HAVE_INLINE_ASM */ | |||||
| } | |||||
| @@ -0,0 +1,42 @@ | |||||
| /* | |||||
| * Copyright (c) 2007 Loren Merritt | |||||
| * | |||||
| * This file is part of FFmpeg. | |||||
| * | |||||
| * FFmpeg is free software; you can redistribute it and/or | |||||
| * modify it under the terms of the GNU Lesser General Public | |||||
| * License as published by the Free Software Foundation; either | |||||
| * version 2.1 of the License, or (at your option) any later version. | |||||
| * | |||||
| * FFmpeg is distributed in the hope that it will be useful, | |||||
| * but WITHOUT ANY WARRANTY; without even the implied warranty of | |||||
| * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU | |||||
| * Lesser General Public License for more details. | |||||
| * | |||||
| * You should have received a copy of the GNU Lesser General Public | |||||
| * License along with FFmpeg; if not, write to the Free Software | |||||
| * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA | |||||
| */ | |||||
| #include "config.h" | |||||
| #include "libavutil/attributes.h" | |||||
| #include "libavutil/cpu.h" | |||||
| #include "libavutil/x86/cpu.h" | |||||
| #include "libavcodec/svq1enc.h" | |||||
| int ff_ssd_int8_vs_int16_mmx(const int8_t *pix1, const int16_t *pix2, | |||||
| intptr_t size); | |||||
| int ff_ssd_int8_vs_int16_sse2(const int8_t *pix1, const int16_t *pix2, | |||||
| intptr_t size); | |||||
| av_cold void ff_svq1enc_init_x86(SVQ1EncContext *c) | |||||
| { | |||||
| int cpu_flags = av_get_cpu_flags(); | |||||
| if (EXTERNAL_MMX(cpu_flags)) { | |||||
| c->ssd_int8_vs_int16 = ff_ssd_int8_vs_int16_mmx; | |||||
| } | |||||
| if (EXTERNAL_SSE2(cpu_flags)) { | |||||
| c->ssd_int8_vs_int16 = ff_ssd_int8_vs_int16_sse2; | |||||
| } | |||||
| } | |||||