Browse Source

avcodec/huffyuvdsp(enc) : move duplicate macro to a template file

tags/n4.0
Martin Vignali 7 years ago
parent
commit
7f9b67bcb6
4 changed files with 80 additions and 109 deletions
  1. +1
    -55
      libavcodec/x86/huffyuvdsp.asm
  2. +1
    -0
      libavcodec/x86/huffyuvdsp_init.c
  3. +76
    -0
      libavcodec/x86/huffyuvdsp_template.asm
  4. +2
    -54
      libavcodec/x86/huffyuvencdsp.asm

+ 1
- 55
libavcodec/x86/huffyuvdsp.asm View File

@@ -24,61 +24,7 @@

SECTION .text


%macro INT16_LOOP 2 ; %1 = a/u (aligned/unaligned), %2 = add/sub
movd m4, maskd
SPLATW m4, m4
add wd, wd
test wq, 2*mmsize - 1
jz %%.tomainloop
push tmpq
%%.wordloop:
sub wq, 2
%ifidn %2, add
mov tmpw, [srcq+wq]
add tmpw, [dstq+wq]
%else
mov tmpw, [src1q+wq]
sub tmpw, [src2q+wq]
%endif
and tmpw, maskw
mov [dstq+wq], tmpw
test wq, 2*mmsize - 1
jnz %%.wordloop
pop tmpq
%%.tomainloop:
%ifidn %2, add
add srcq, wq
%else
add src1q, wq
add src2q, wq
%endif
add dstq, wq
neg wq
jz %%.end
%%.loop:
%ifidn %2, add
mov%1 m0, [srcq+wq]
mov%1 m1, [dstq+wq]
mov%1 m2, [srcq+wq+mmsize]
mov%1 m3, [dstq+wq+mmsize]
%else
mov%1 m0, [src1q+wq]
mov%1 m1, [src2q+wq]
mov%1 m2, [src1q+wq+mmsize]
mov%1 m3, [src2q+wq+mmsize]
%endif
p%2w m0, m1
p%2w m2, m3
pand m0, m4
pand m2, m4
mov%1 [dstq+wq] , m0
mov%1 [dstq+wq+mmsize], m2
add wq, 2*mmsize
jl %%.loop
%%.end:
RET
%endmacro
%include "libavcodec/x86/huffyuvdsp_template.asm"

%if ARCH_X86_32
INIT_MMX mmx


+ 1
- 0
libavcodec/x86/huffyuvdsp_init.c View File

@@ -28,6 +28,7 @@

void ff_add_int16_mmx(uint16_t *dst, const uint16_t *src, unsigned mask, int w);
void ff_add_int16_sse2(uint16_t *dst, const uint16_t *src, unsigned mask, int w);

void ff_add_hfyu_left_pred_bgr32_mmx(uint8_t *dst, const uint8_t *src,
intptr_t w, uint8_t *left);
void ff_add_hfyu_left_pred_bgr32_sse2(uint8_t *dst, const uint8_t *src,


+ 76
- 0
libavcodec/x86/huffyuvdsp_template.asm View File

@@ -0,0 +1,76 @@
;******************************************************************************
;* SIMD-optimized HuffYUV functions
;* Copyright (c) 2008 Loren Merritt
;* Copyright (c) 2014 Christophe Gisquet
;*
;* This file is part of FFmpeg.
;*
;* FFmpeg is free software; you can redistribute it and/or
;* modify it under the terms of the GNU Lesser General Public
;* License as published by the Free Software Foundation; either
;* version 2.1 of the License, or (at your option) any later version.
;*
;* FFmpeg is distributed in the hope that it will be useful,
;* but WITHOUT ANY WARRANTY; without even the implied warranty of
;* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
;* Lesser General Public License for more details.
;*
;* You should have received a copy of the GNU Lesser General Public
;* License along with FFmpeg; if not, write to the Free Software
;* Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
;******************************************************************************

%macro INT16_LOOP 2 ; %1 = a/u (aligned/unaligned), %2 = add/sub
movd m4, maskd
SPLATW m4, m4
add wd, wd
test wq, 2*mmsize - 1
jz %%.tomainloop
push tmpq
%%.wordloop:
sub wq, 2
%ifidn %2, add
mov tmpw, [srcq+wq]
add tmpw, [dstq+wq]
%else
mov tmpw, [src1q+wq]
sub tmpw, [src2q+wq]
%endif
and tmpw, maskw
mov [dstq+wq], tmpw
test wq, 2*mmsize - 1
jnz %%.wordloop
pop tmpq
%%.tomainloop:
%ifidn %2, add
add srcq, wq
%else
add src1q, wq
add src2q, wq
%endif
add dstq, wq
neg wq
jz %%.end
%%.loop:
%ifidn %2, add
mov%1 m0, [srcq+wq]
mov%1 m1, [dstq+wq]
mov%1 m2, [srcq+wq+mmsize]
mov%1 m3, [dstq+wq+mmsize]
%else
mov%1 m0, [src1q+wq]
mov%1 m1, [src2q+wq]
mov%1 m2, [src1q+wq+mmsize]
mov%1 m3, [src2q+wq+mmsize]
%endif
p%2w m0, m1
p%2w m2, m3
pand m0, m4
pand m2, m4
mov%1 [dstq+wq] , m0
mov%1 [dstq+wq+mmsize], m2
add wq, 2*mmsize
jl %%.loop
%%.end:
RET
%endmacro

+ 2
- 54
libavcodec/x86/huffyuvencdsp.asm View File

@@ -27,62 +27,10 @@

SECTION .text

%include "libavcodec/x86/huffyuvdsp_template.asm"

; void ff_diff_int16(uint8_t *dst, const uint8_t *src1, const uint8_t *src2,
; unsigned mask, int w);
%macro INT16_LOOP 2 ; %1 = a/u (aligned/unaligned), %2 = add/sub
movd m4, maskd
SPLATW m4, m4
add wd, wd
test wq, 2*mmsize - 1
jz %%.tomainloop
push tmpq
%%.wordloop:
sub wq, 2
%ifidn %2, add
mov tmpw, [srcq+wq]
add tmpw, [dstq+wq]
%else
mov tmpw, [src1q+wq]
sub tmpw, [src2q+wq]
%endif
and tmpw, maskw
mov [dstq+wq], tmpw
test wq, 2*mmsize - 1
jnz %%.wordloop
pop tmpq
%%.tomainloop:
%ifidn %2, add
add srcq, wq
%else
add src1q, wq
add src2q, wq
%endif
add dstq, wq
neg wq
jz %%.end
%%.loop:
%ifidn %2, add
mov%1 m0, [srcq+wq]
mov%1 m1, [dstq+wq]
mov%1 m2, [srcq+wq+mmsize]
mov%1 m3, [dstq+wq+mmsize]
%else
mov%1 m0, [src1q+wq]
mov%1 m1, [src2q+wq]
mov%1 m2, [src1q+wq+mmsize]
mov%1 m3, [src2q+wq+mmsize]
%endif
p%2w m0, m1
p%2w m2, m3
pand m0, m4
pand m2, m4
mov%1 [dstq+wq] , m0
mov%1 [dstq+wq+mmsize], m2
add wq, 2*mmsize
jl %%.loop
%%.end:
RET
%endmacro

%if ARCH_X86_32
INIT_MMX mmx


Loading…
Cancel
Save