@@ -24,61 +24,7 @@ | |||
SECTION .text | |||
%macro INT16_LOOP 2 ; %1 = a/u (aligned/unaligned), %2 = add/sub | |||
movd m4, maskd | |||
SPLATW m4, m4 | |||
add wd, wd | |||
test wq, 2*mmsize - 1 | |||
jz %%.tomainloop | |||
push tmpq | |||
%%.wordloop: | |||
sub wq, 2 | |||
%ifidn %2, add | |||
mov tmpw, [srcq+wq] | |||
add tmpw, [dstq+wq] | |||
%else | |||
mov tmpw, [src1q+wq] | |||
sub tmpw, [src2q+wq] | |||
%endif | |||
and tmpw, maskw | |||
mov [dstq+wq], tmpw | |||
test wq, 2*mmsize - 1 | |||
jnz %%.wordloop | |||
pop tmpq | |||
%%.tomainloop: | |||
%ifidn %2, add | |||
add srcq, wq | |||
%else | |||
add src1q, wq | |||
add src2q, wq | |||
%endif | |||
add dstq, wq | |||
neg wq | |||
jz %%.end | |||
%%.loop: | |||
%ifidn %2, add | |||
mov%1 m0, [srcq+wq] | |||
mov%1 m1, [dstq+wq] | |||
mov%1 m2, [srcq+wq+mmsize] | |||
mov%1 m3, [dstq+wq+mmsize] | |||
%else | |||
mov%1 m0, [src1q+wq] | |||
mov%1 m1, [src2q+wq] | |||
mov%1 m2, [src1q+wq+mmsize] | |||
mov%1 m3, [src2q+wq+mmsize] | |||
%endif | |||
p%2w m0, m1 | |||
p%2w m2, m3 | |||
pand m0, m4 | |||
pand m2, m4 | |||
mov%1 [dstq+wq] , m0 | |||
mov%1 [dstq+wq+mmsize], m2 | |||
add wq, 2*mmsize | |||
jl %%.loop | |||
%%.end: | |||
RET | |||
%endmacro | |||
%include "libavcodec/x86/huffyuvdsp_template.asm" | |||
%if ARCH_X86_32 | |||
INIT_MMX mmx | |||
@@ -28,6 +28,7 @@ | |||
void ff_add_int16_mmx(uint16_t *dst, const uint16_t *src, unsigned mask, int w); | |||
void ff_add_int16_sse2(uint16_t *dst, const uint16_t *src, unsigned mask, int w); | |||
void ff_add_hfyu_left_pred_bgr32_mmx(uint8_t *dst, const uint8_t *src, | |||
intptr_t w, uint8_t *left); | |||
void ff_add_hfyu_left_pred_bgr32_sse2(uint8_t *dst, const uint8_t *src, | |||
@@ -0,0 +1,76 @@ | |||
;****************************************************************************** | |||
;* SIMD-optimized HuffYUV functions | |||
;* Copyright (c) 2008 Loren Merritt | |||
;* Copyright (c) 2014 Christophe Gisquet | |||
;* | |||
;* This file is part of FFmpeg. | |||
;* | |||
;* FFmpeg is free software; you can redistribute it and/or | |||
;* modify it under the terms of the GNU Lesser General Public | |||
;* License as published by the Free Software Foundation; either | |||
;* version 2.1 of the License, or (at your option) any later version. | |||
;* | |||
;* FFmpeg is distributed in the hope that it will be useful, | |||
;* but WITHOUT ANY WARRANTY; without even the implied warranty of | |||
;* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU | |||
;* Lesser General Public License for more details. | |||
;* | |||
;* You should have received a copy of the GNU Lesser General Public | |||
;* License along with FFmpeg; if not, write to the Free Software | |||
;* Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA | |||
;****************************************************************************** | |||
%macro INT16_LOOP 2 ; %1 = a/u (aligned/unaligned), %2 = add/sub | |||
movd m4, maskd | |||
SPLATW m4, m4 | |||
add wd, wd | |||
test wq, 2*mmsize - 1 | |||
jz %%.tomainloop | |||
push tmpq | |||
%%.wordloop: | |||
sub wq, 2 | |||
%ifidn %2, add | |||
mov tmpw, [srcq+wq] | |||
add tmpw, [dstq+wq] | |||
%else | |||
mov tmpw, [src1q+wq] | |||
sub tmpw, [src2q+wq] | |||
%endif | |||
and tmpw, maskw | |||
mov [dstq+wq], tmpw | |||
test wq, 2*mmsize - 1 | |||
jnz %%.wordloop | |||
pop tmpq | |||
%%.tomainloop: | |||
%ifidn %2, add | |||
add srcq, wq | |||
%else | |||
add src1q, wq | |||
add src2q, wq | |||
%endif | |||
add dstq, wq | |||
neg wq | |||
jz %%.end | |||
%%.loop: | |||
%ifidn %2, add | |||
mov%1 m0, [srcq+wq] | |||
mov%1 m1, [dstq+wq] | |||
mov%1 m2, [srcq+wq+mmsize] | |||
mov%1 m3, [dstq+wq+mmsize] | |||
%else | |||
mov%1 m0, [src1q+wq] | |||
mov%1 m1, [src2q+wq] | |||
mov%1 m2, [src1q+wq+mmsize] | |||
mov%1 m3, [src2q+wq+mmsize] | |||
%endif | |||
p%2w m0, m1 | |||
p%2w m2, m3 | |||
pand m0, m4 | |||
pand m2, m4 | |||
mov%1 [dstq+wq] , m0 | |||
mov%1 [dstq+wq+mmsize], m2 | |||
add wq, 2*mmsize | |||
jl %%.loop | |||
%%.end: | |||
RET | |||
%endmacro |
@@ -27,62 +27,10 @@ | |||
SECTION .text | |||
%include "libavcodec/x86/huffyuvdsp_template.asm" | |||
; void ff_diff_int16(uint8_t *dst, const uint8_t *src1, const uint8_t *src2, | |||
; unsigned mask, int w); | |||
%macro INT16_LOOP 2 ; %1 = a/u (aligned/unaligned), %2 = add/sub | |||
movd m4, maskd | |||
SPLATW m4, m4 | |||
add wd, wd | |||
test wq, 2*mmsize - 1 | |||
jz %%.tomainloop | |||
push tmpq | |||
%%.wordloop: | |||
sub wq, 2 | |||
%ifidn %2, add | |||
mov tmpw, [srcq+wq] | |||
add tmpw, [dstq+wq] | |||
%else | |||
mov tmpw, [src1q+wq] | |||
sub tmpw, [src2q+wq] | |||
%endif | |||
and tmpw, maskw | |||
mov [dstq+wq], tmpw | |||
test wq, 2*mmsize - 1 | |||
jnz %%.wordloop | |||
pop tmpq | |||
%%.tomainloop: | |||
%ifidn %2, add | |||
add srcq, wq | |||
%else | |||
add src1q, wq | |||
add src2q, wq | |||
%endif | |||
add dstq, wq | |||
neg wq | |||
jz %%.end | |||
%%.loop: | |||
%ifidn %2, add | |||
mov%1 m0, [srcq+wq] | |||
mov%1 m1, [dstq+wq] | |||
mov%1 m2, [srcq+wq+mmsize] | |||
mov%1 m3, [dstq+wq+mmsize] | |||
%else | |||
mov%1 m0, [src1q+wq] | |||
mov%1 m1, [src2q+wq] | |||
mov%1 m2, [src1q+wq+mmsize] | |||
mov%1 m3, [src2q+wq+mmsize] | |||
%endif | |||
p%2w m0, m1 | |||
p%2w m2, m3 | |||
pand m0, m4 | |||
pand m2, m4 | |||
mov%1 [dstq+wq] , m0 | |||
mov%1 [dstq+wq+mmsize], m2 | |||
add wq, 2*mmsize | |||
jl %%.loop | |||
%%.end: | |||
RET | |||
%endmacro | |||
%if ARCH_X86_32 | |||
INIT_MMX mmx | |||