| @@ -51,6 +51,7 @@ OBJS-$(CONFIG_SVQ1_ENCODER) += x86/svq1enc.o | |||
| OBJS-$(CONFIG_TRUEHD_DECODER) += x86/mlpdsp.o | |||
| OBJS-$(CONFIG_V210_ENCODER) += x86/v210enc_init.o | |||
| OBJS-$(CONFIG_VORBIS_DECODER) += x86/vorbisdsp_init.o | |||
| OBJS-$(CONFIG_VP3_DECODER) += x86/hpeldsp_vp3_init.o | |||
| OBJS-$(CONFIG_VP6_DECODER) += x86/vp6dsp_init.o | |||
| OBJS-$(CONFIG_VP9_DECODER) += x86/vp9dsp_init.o | |||
| @@ -122,5 +123,6 @@ YASM-OBJS-$(CONFIG_PRORES_DECODER) += x86/proresdsp.o | |||
| YASM-OBJS-$(CONFIG_RV40_DECODER) += x86/rv40dsp.o | |||
| YASM-OBJS-$(CONFIG_V210_ENCODER) += x86/v210enc.o | |||
| YASM-OBJS-$(CONFIG_VORBIS_DECODER) += x86/vorbisdsp.o | |||
| YASM-OBJS-$(CONFIG_VP3_DECODER) += x86/hpeldsp_vp3.o | |||
| YASM-OBJS-$(CONFIG_VP6_DECODER) += x86/vp6dsp.o | |||
| YASM-OBJS-$(CONFIG_VP9_DECODER) += x86/vp9dsp.o | |||
| @@ -142,53 +142,6 @@ INIT_MMX 3dnow | |||
| PUT_NO_RND_PIXELS8_X2 | |||
| ; void ff_put_no_rnd_pixels8_x2_exact(uint8_t *block, const uint8_t *pixels, ptrdiff_t line_size, int h) | |||
| %macro PUT_NO_RND_PIXELS8_X2_EXACT 0 | |||
| cglobal put_no_rnd_pixels8_x2_exact, 4,5 | |||
| lea r4, [r2*3] | |||
| pcmpeqb m6, m6 | |||
| .loop: | |||
| mova m0, [r1] | |||
| mova m2, [r1+r2] | |||
| mova m1, [r1+1] | |||
| mova m3, [r1+r2+1] | |||
| pxor m0, m6 | |||
| pxor m2, m6 | |||
| pxor m1, m6 | |||
| pxor m3, m6 | |||
| PAVGB m0, m1 | |||
| PAVGB m2, m3 | |||
| pxor m0, m6 | |||
| pxor m2, m6 | |||
| mova [r0], m0 | |||
| mova [r0+r2], m2 | |||
| mova m0, [r1+r2*2] | |||
| mova m1, [r1+r2*2+1] | |||
| mova m2, [r1+r4] | |||
| mova m3, [r1+r4+1] | |||
| pxor m0, m6 | |||
| pxor m1, m6 | |||
| pxor m2, m6 | |||
| pxor m3, m6 | |||
| PAVGB m0, m1 | |||
| PAVGB m2, m3 | |||
| pxor m0, m6 | |||
| pxor m2, m6 | |||
| mova [r0+r2*2], m0 | |||
| mova [r0+r4], m2 | |||
| lea r1, [r1+r2*4] | |||
| lea r0, [r0+r2*4] | |||
| sub r3d, 4 | |||
| jg .loop | |||
| REP_RET | |||
| %endmacro | |||
| INIT_MMX mmxext | |||
| PUT_NO_RND_PIXELS8_X2_EXACT | |||
| INIT_MMX 3dnow | |||
| PUT_NO_RND_PIXELS8_X2_EXACT | |||
| ; void ff_put_pixels8_y2(uint8_t *block, const uint8_t *pixels, ptrdiff_t line_size, int h) | |||
| %macro PUT_PIXELS8_Y2 0 | |||
| cglobal put_pixels8_y2, 4,5 | |||
| @@ -260,48 +213,6 @@ INIT_MMX 3dnow | |||
| PUT_NO_RND_PIXELS8_Y2 | |||
| ; void ff_put_no_rnd_pixels8_y2_exact(uint8_t *block, const uint8_t *pixels, ptrdiff_t line_size, int h) | |||
| %macro PUT_NO_RND_PIXELS8_Y2_EXACT 0 | |||
| cglobal put_no_rnd_pixels8_y2_exact, 4,5 | |||
| lea r4, [r2*3] | |||
| mova m0, [r1] | |||
| pcmpeqb m6, m6 | |||
| add r1, r2 | |||
| pxor m0, m6 | |||
| .loop: | |||
| mova m1, [r1] | |||
| mova m2, [r1+r2] | |||
| pxor m1, m6 | |||
| pxor m2, m6 | |||
| PAVGB m0, m1 | |||
| PAVGB m1, m2 | |||
| pxor m0, m6 | |||
| pxor m1, m6 | |||
| mova [r0], m0 | |||
| mova [r0+r2], m1 | |||
| mova m1, [r1+r2*2] | |||
| mova m0, [r1+r4] | |||
| pxor m1, m6 | |||
| pxor m0, m6 | |||
| PAVGB m2, m1 | |||
| PAVGB m1, m0 | |||
| pxor m2, m6 | |||
| pxor m1, m6 | |||
| mova [r0+r2*2], m2 | |||
| mova [r0+r4], m1 | |||
| lea r1, [r1+r2*4] | |||
| lea r0, [r0+r2*4] | |||
| sub r3d, 4 | |||
| jg .loop | |||
| REP_RET | |||
| %endmacro | |||
| INIT_MMX mmxext | |||
| PUT_NO_RND_PIXELS8_Y2_EXACT | |||
| INIT_MMX 3dnow | |||
| PUT_NO_RND_PIXELS8_Y2_EXACT | |||
| ; void ff_avg_pixels8(uint8_t *block, const uint8_t *pixels, ptrdiff_t line_size, int h) | |||
| %macro AVG_PIXELS8 0 | |||
| cglobal avg_pixels8, 4,5 | |||
| @@ -22,6 +22,8 @@ | |||
| #include <stddef.h> | |||
| #include <stdint.h> | |||
| #include "libavcodec/hpeldsp.h" | |||
| void ff_avg_pixels8_x2_mmx(uint8_t *block, const uint8_t *pixels, | |||
| ptrdiff_t line_size, int h); | |||
| @@ -35,4 +37,6 @@ void ff_put_pixels8_xy2_mmx(uint8_t *block, const uint8_t *pixels, | |||
| void ff_put_pixels16_xy2_mmx(uint8_t *block, const uint8_t *pixels, | |||
| ptrdiff_t line_size, int h); | |||
| void ff_hpeldsp_vp3_init_x86(HpelDSPContext *c, int cpu_flags, int flags); | |||
| #endif /* AVCODEC_X86_HPELDSP_H */ | |||
| @@ -43,12 +43,6 @@ void ff_put_no_rnd_pixels8_x2_mmxext(uint8_t *block, const uint8_t *pixels, | |||
| ptrdiff_t line_size, int h); | |||
| void ff_put_no_rnd_pixels8_x2_3dnow(uint8_t *block, const uint8_t *pixels, | |||
| ptrdiff_t line_size, int h); | |||
| void ff_put_no_rnd_pixels8_x2_exact_mmxext(uint8_t *block, | |||
| const uint8_t *pixels, | |||
| ptrdiff_t line_size, int h); | |||
| void ff_put_no_rnd_pixels8_x2_exact_3dnow(uint8_t *block, | |||
| const uint8_t *pixels, | |||
| ptrdiff_t line_size, int h); | |||
| void ff_put_pixels8_y2_mmxext(uint8_t *block, const uint8_t *pixels, | |||
| ptrdiff_t line_size, int h); | |||
| void ff_put_pixels8_y2_3dnow(uint8_t *block, const uint8_t *pixels, | |||
| @@ -57,12 +51,6 @@ void ff_put_no_rnd_pixels8_y2_mmxext(uint8_t *block, const uint8_t *pixels, | |||
| ptrdiff_t line_size, int h); | |||
| void ff_put_no_rnd_pixels8_y2_3dnow(uint8_t *block, const uint8_t *pixels, | |||
| ptrdiff_t line_size, int h); | |||
| void ff_put_no_rnd_pixels8_y2_exact_mmxext(uint8_t *block, | |||
| const uint8_t *pixels, | |||
| ptrdiff_t line_size, int h); | |||
| void ff_put_no_rnd_pixels8_y2_exact_3dnow(uint8_t *block, | |||
| const uint8_t *pixels, | |||
| ptrdiff_t line_size, int h); | |||
| void ff_avg_pixels8_3dnow(uint8_t *block, const uint8_t *pixels, | |||
| ptrdiff_t line_size, int h); | |||
| void ff_avg_pixels8_x2_mmxext(uint8_t *block, const uint8_t *pixels, | |||
| @@ -209,11 +197,6 @@ static void hpeldsp_init_mmxext(HpelDSPContext *c, int flags, int cpu_flags) | |||
| c->avg_pixels_tab[0][3] = avg_pixels16_xy2_mmxext; | |||
| c->avg_pixels_tab[1][3] = ff_avg_pixels8_xy2_mmxext; | |||
| } | |||
| if (flags & AV_CODEC_FLAG_BITEXACT && CONFIG_VP3_DECODER) { | |||
| c->put_no_rnd_pixels_tab[1][1] = ff_put_no_rnd_pixels8_x2_exact_mmxext; | |||
| c->put_no_rnd_pixels_tab[1][2] = ff_put_no_rnd_pixels8_y2_exact_mmxext; | |||
| } | |||
| #endif /* HAVE_MMXEXT_EXTERNAL */ | |||
| } | |||
| @@ -243,11 +226,6 @@ static void hpeldsp_init_3dnow(HpelDSPContext *c, int flags, int cpu_flags) | |||
| c->avg_pixels_tab[0][3] = avg_pixels16_xy2_3dnow; | |||
| c->avg_pixels_tab[1][3] = ff_avg_pixels8_xy2_3dnow; | |||
| } | |||
| if (flags & AV_CODEC_FLAG_BITEXACT && CONFIG_VP3_DECODER) { | |||
| c->put_no_rnd_pixels_tab[1][1] = ff_put_no_rnd_pixels8_x2_exact_3dnow; | |||
| c->put_no_rnd_pixels_tab[1][2] = ff_put_no_rnd_pixels8_y2_exact_3dnow; | |||
| } | |||
| #endif /* HAVE_AMD3DNOW_EXTERNAL */ | |||
| } | |||
| @@ -278,4 +256,7 @@ av_cold void ff_hpeldsp_init_x86(HpelDSPContext *c, int flags) | |||
| if (EXTERNAL_SSE2(cpu_flags)) | |||
| hpeldsp_init_sse2(c, flags, cpu_flags); | |||
| if (CONFIG_VP3_DECODER) | |||
| ff_hpeldsp_vp3_init_x86(c, cpu_flags, flags); | |||
| } | |||
| @@ -0,0 +1,111 @@ | |||
| ;****************************************************************************** | |||
| ;* SIMD-optimized halfpel functions for VP3 | |||
| ;* | |||
| ;* This file is part of Libav. | |||
| ;* | |||
| ;* Libav is free software; you can redistribute it and/or | |||
| ;* modify it under the terms of the GNU Lesser General Public | |||
| ;* License as published by the Free Software Foundation; either | |||
| ;* version 2.1 of the License, or (at your option) any later version. | |||
| ;* | |||
| ;* Libav is distributed in the hope that it will be useful, | |||
| ;* but WITHOUT ANY WARRANTY; without even the implied warranty of | |||
| ;* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU | |||
| ;* Lesser General Public License for more details. | |||
| ;* | |||
| ;* You should have received a copy of the GNU Lesser General Public | |||
| ;* License along with Libav; if not, write to the Free Software | |||
| ;* Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA | |||
| ;****************************************************************************** | |||
| %include "libavutil/x86/x86util.asm" | |||
| SECTION .text | |||
| ; void ff_put_no_rnd_pixels8_x2_exact(uint8_t *block, const uint8_t *pixels, ptrdiff_t line_size, int h) | |||
| %macro PUT_NO_RND_PIXELS8_X2_EXACT 0 | |||
| cglobal put_no_rnd_pixels8_x2_exact, 4,5 | |||
| lea r4, [r2*3] | |||
| pcmpeqb m6, m6 | |||
| .loop: | |||
| mova m0, [r1] | |||
| mova m2, [r1+r2] | |||
| mova m1, [r1+1] | |||
| mova m3, [r1+r2+1] | |||
| pxor m0, m6 | |||
| pxor m2, m6 | |||
| pxor m1, m6 | |||
| pxor m3, m6 | |||
| PAVGB m0, m1 | |||
| PAVGB m2, m3 | |||
| pxor m0, m6 | |||
| pxor m2, m6 | |||
| mova [r0], m0 | |||
| mova [r0+r2], m2 | |||
| mova m0, [r1+r2*2] | |||
| mova m1, [r1+r2*2+1] | |||
| mova m2, [r1+r4] | |||
| mova m3, [r1+r4+1] | |||
| pxor m0, m6 | |||
| pxor m1, m6 | |||
| pxor m2, m6 | |||
| pxor m3, m6 | |||
| PAVGB m0, m1 | |||
| PAVGB m2, m3 | |||
| pxor m0, m6 | |||
| pxor m2, m6 | |||
| mova [r0+r2*2], m0 | |||
| mova [r0+r4], m2 | |||
| lea r1, [r1+r2*4] | |||
| lea r0, [r0+r2*4] | |||
| sub r3d, 4 | |||
| jg .loop | |||
| REP_RET | |||
| %endmacro | |||
| INIT_MMX mmxext | |||
| PUT_NO_RND_PIXELS8_X2_EXACT | |||
| INIT_MMX 3dnow | |||
| PUT_NO_RND_PIXELS8_X2_EXACT | |||
| ; void ff_put_no_rnd_pixels8_y2_exact(uint8_t *block, const uint8_t *pixels, ptrdiff_t line_size, int h) | |||
| %macro PUT_NO_RND_PIXELS8_Y2_EXACT 0 | |||
| cglobal put_no_rnd_pixels8_y2_exact, 4,5 | |||
| lea r4, [r2*3] | |||
| mova m0, [r1] | |||
| pcmpeqb m6, m6 | |||
| add r1, r2 | |||
| pxor m0, m6 | |||
| .loop: | |||
| mova m1, [r1] | |||
| mova m2, [r1+r2] | |||
| pxor m1, m6 | |||
| pxor m2, m6 | |||
| PAVGB m0, m1 | |||
| PAVGB m1, m2 | |||
| pxor m0, m6 | |||
| pxor m1, m6 | |||
| mova [r0], m0 | |||
| mova [r0+r2], m1 | |||
| mova m1, [r1+r2*2] | |||
| mova m0, [r1+r4] | |||
| pxor m1, m6 | |||
| pxor m0, m6 | |||
| PAVGB m2, m1 | |||
| PAVGB m1, m0 | |||
| pxor m2, m6 | |||
| pxor m1, m6 | |||
| mova [r0+r2*2], m2 | |||
| mova [r0+r4], m1 | |||
| lea r1, [r1+r2*4] | |||
| lea r0, [r0+r2*4] | |||
| sub r3d, 4 | |||
| jg .loop | |||
| REP_RET | |||
| %endmacro | |||
| INIT_MMX mmxext | |||
| PUT_NO_RND_PIXELS8_Y2_EXACT | |||
| INIT_MMX 3dnow | |||
| PUT_NO_RND_PIXELS8_Y2_EXACT | |||
| @@ -0,0 +1,56 @@ | |||
| /* | |||
| * This file is part of Libav. | |||
| * | |||
| * Libav is free software; you can redistribute it and/or | |||
| * modify it under the terms of the GNU Lesser General Public | |||
| * License as published by the Free Software Foundation; either | |||
| * version 2.1 of the License, or (at your option) any later version. | |||
| * | |||
| * Libav is distributed in the hope that it will be useful, | |||
| * but WITHOUT ANY WARRANTY; without even the implied warranty of | |||
| * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU | |||
| * Lesser General Public License for more details. | |||
| * | |||
| * You should have received a copy of the GNU Lesser General Public | |||
| * License along with Libav; if not, write to the Free Software | |||
| * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA | |||
| */ | |||
| #include "libavutil/attributes.h" | |||
| #include "libavutil/cpu.h" | |||
| #include "libavutil/x86/cpu.h" | |||
| #include "libavcodec/avcodec.h" | |||
| #include "libavcodec/hpeldsp.h" | |||
| #include "hpeldsp.h" | |||
| void ff_put_no_rnd_pixels8_x2_exact_mmxext(uint8_t *block, | |||
| const uint8_t *pixels, | |||
| ptrdiff_t line_size, int h); | |||
| void ff_put_no_rnd_pixels8_x2_exact_3dnow(uint8_t *block, | |||
| const uint8_t *pixels, | |||
| ptrdiff_t line_size, int h); | |||
| void ff_put_no_rnd_pixels8_y2_exact_mmxext(uint8_t *block, | |||
| const uint8_t *pixels, | |||
| ptrdiff_t line_size, int h); | |||
| void ff_put_no_rnd_pixels8_y2_exact_3dnow(uint8_t *block, | |||
| const uint8_t *pixels, | |||
| ptrdiff_t line_size, int h); | |||
| av_cold void ff_hpeldsp_vp3_init_x86(HpelDSPContext *c, int cpu_flags, int flags) | |||
| { | |||
| if (EXTERNAL_AMD3DNOW(cpu_flags)) { | |||
| if (flags & AV_CODEC_FLAG_BITEXACT) { | |||
| c->put_no_rnd_pixels_tab[1][1] = ff_put_no_rnd_pixels8_x2_exact_3dnow; | |||
| c->put_no_rnd_pixels_tab[1][2] = ff_put_no_rnd_pixels8_y2_exact_3dnow; | |||
| } | |||
| } | |||
| if (EXTERNAL_MMXEXT(cpu_flags)) { | |||
| if (flags & AV_CODEC_FLAG_BITEXACT) { | |||
| c->put_no_rnd_pixels_tab[1][1] = ff_put_no_rnd_pixels8_x2_exact_mmxext; | |||
| c->put_no_rnd_pixels_tab[1][2] = ff_put_no_rnd_pixels8_y2_exact_mmxext; | |||
| } | |||
| } | |||
| } | |||