Browse Source

libavcodec/exr : add x86 SIMD for predictor

Signed-off-by: James Almer <jamrial@gmail.com>
tags/n3.4
Martin Vignali James Almer 8 years ago
parent
commit
ac5908b13f
6 changed files with 109 additions and 15 deletions
  1. +2
    -14
      libavcodec/exr.c
  2. +9
    -0
      libavcodec/exrdsp.c
  3. +1
    -0
      libavcodec/exrdsp.h
  4. +61
    -1
      libavcodec/x86/exrdsp.asm
  5. +13
    -0
      libavcodec/x86/exrdsp_init.c
  6. +23
    -0
      tests/checkasm/exrdsp.c

+ 2
- 14
libavcodec/exr.c View File

@@ -265,18 +265,6 @@ static inline uint16_t exr_halflt2uint(uint16_t v)
return (v + (1 << 16)) >> (exp + 1);
}

static void predictor(uint8_t *src, int size)
{
uint8_t *t = src + 1;
uint8_t *stop = src + size;

while (t < stop) {
int d = (int) t[-1] + (int) t[0] - 128;
t[0] = d;
++t;
}
}

static int zip_uncompress(EXRContext *s, const uint8_t *src, int compressed_size,
int uncompressed_size, EXRThreadData *td)
{
@@ -288,7 +276,7 @@ static int zip_uncompress(EXRContext *s, const uint8_t *src, int compressed_size

av_assert1(uncompressed_size % 2 == 0);

predictor(td->tmp, uncompressed_size);
s->dsp.predictor(td->tmp, uncompressed_size);
s->dsp.reorder_pixels(td->uncompressed_data, td->tmp, uncompressed_size);

return 0;
@@ -335,7 +323,7 @@ static int rle_uncompress(EXRContext *ctx, const uint8_t *src, int compressed_si

av_assert1(uncompressed_size % 2 == 0);

predictor(td->tmp, uncompressed_size);
ctx->dsp.predictor(td->tmp, uncompressed_size);
ctx->dsp.reorder_pixels(td->uncompressed_data, td->tmp, uncompressed_size);

return 0;


+ 9
- 0
libavcodec/exrdsp.c View File

@@ -38,9 +38,18 @@ static void reorder_pixels_scalar(uint8_t *dst, const uint8_t *src, ptrdiff_t si
}
}

static void predictor_scalar(uint8_t *src, ptrdiff_t size)
{
ptrdiff_t i;

for (i = 1; i < size; i++)
src[i] += src[i-1] - 128;
}

av_cold void ff_exrdsp_init(ExrDSPContext *c)
{
c->reorder_pixels = reorder_pixels_scalar;
c->predictor = predictor_scalar;

if (ARCH_X86)
ff_exrdsp_init_x86(c);


+ 1
- 0
libavcodec/exrdsp.h View File

@@ -24,6 +24,7 @@

typedef struct ExrDSPContext {
void (*reorder_pixels)(uint8_t *dst, const uint8_t *src, ptrdiff_t size);
void (*predictor)(uint8_t *src, ptrdiff_t size);
} ExrDSPContext;

void ff_exrdsp_init(ExrDSPContext *c);


+ 61
- 1
libavcodec/x86/exrdsp.asm View File

@@ -2,9 +2,11 @@
;* X86 Optimized functions for Open Exr Decoder
;* Copyright (c) 2006 Industrial Light & Magic, a division of Lucas Digital Ltd. LLC
;*
;* reorder_pixels based on patch by John Loy
;* reorder_pixels, predictor based on patch by John Loy
;* port to ASM by Jokyo Images support by CNC - French National Center for Cinema
;*
;* predictor AVX/AVX2 by Henrik Gramner
;*
;* This file is part of FFmpeg.
;*
;* FFmpeg is free software; you can redistribute it and/or
@@ -24,6 +26,9 @@

%include "libavutil/x86/x86util.asm"

cextern pb_15
cextern pb_80

SECTION .text

;------------------------------------------------------------------------------
@@ -60,3 +65,58 @@ REORDER_PIXELS
INIT_YMM avx2
REORDER_PIXELS
%endif


;------------------------------------------------------------------------------
; void ff_predictor(uint8_t *src, ptrdiff_t size);
;------------------------------------------------------------------------------

%macro PREDICTOR 0
cglobal predictor, 2,2,5, src, size
%if mmsize == 32
vbroadcasti128 m0, [pb_80]
%else
mova xm0, [pb_80]
%endif
mova xm1, [pb_15]
mova xm2, xm0
add srcq, sizeq
neg sizeq
.loop:
pxor m3, m0, [srcq + sizeq]
pslldq m4, m3, 1
paddb m3, m4
pslldq m4, m3, 2
paddb m3, m4
pslldq m4, m3, 4
paddb m3, m4
pslldq m4, m3, 8
%if mmsize == 32
paddb m3, m4
paddb xm2, xm3
vextracti128 xm4, m3, 1
mova [srcq + sizeq], xm2
pshufb xm2, xm1
paddb xm2, xm4
mova [srcq + sizeq + 16], xm2
%else
paddb m2, m3
paddb m2, m4
mova [srcq + sizeq], m2
%endif
pshufb xm2, xm1
add sizeq, mmsize
jl .loop
RET
%endmacro

INIT_XMM ssse3
PREDICTOR

INIT_XMM avx
PREDICTOR

%if HAVE_AVX2_EXTERNAL
INIT_YMM avx2
PREDICTOR
%endif

+ 13
- 0
libavcodec/x86/exrdsp_init.c View File

@@ -26,6 +26,12 @@ void ff_reorder_pixels_sse2(uint8_t *dst, const uint8_t *src, ptrdiff_t size);

void ff_reorder_pixels_avx2(uint8_t *dst, const uint8_t *src, ptrdiff_t size);

void ff_predictor_ssse3(uint8_t *src, ptrdiff_t size);

void ff_predictor_avx(uint8_t *src, ptrdiff_t size);

void ff_predictor_avx2(uint8_t *src, ptrdiff_t size);

av_cold void ff_exrdsp_init_x86(ExrDSPContext *dsp)
{
int cpu_flags = av_get_cpu_flags();
@@ -33,7 +39,14 @@ av_cold void ff_exrdsp_init_x86(ExrDSPContext *dsp)
if (EXTERNAL_SSE2(cpu_flags)) {
dsp->reorder_pixels = ff_reorder_pixels_sse2;
}
if (EXTERNAL_SSSE3(cpu_flags)) {
dsp->predictor = ff_predictor_ssse3;
}
if (EXTERNAL_AVX(cpu_flags)) {
dsp->predictor = ff_predictor_avx;
}
if (EXTERNAL_AVX2_FAST(cpu_flags)) {
dsp->reorder_pixels = ff_reorder_pixels_avx2;
dsp->predictor = ff_predictor_avx2;
}
}

+ 23
- 0
tests/checkasm/exrdsp.c View File

@@ -55,6 +55,24 @@ static void check_reorder_pixels(void) {
bench_new(dst_new, src, BUF_SIZE);
}

static void check_predictor(void) {
LOCAL_ALIGNED_32(uint8_t, src, [PADDED_BUF_SIZE]);
LOCAL_ALIGNED_32(uint8_t, dst_ref, [PADDED_BUF_SIZE]);
LOCAL_ALIGNED_32(uint8_t, dst_new, [PADDED_BUF_SIZE]);

declare_func(void, uint8_t *src, ptrdiff_t size);

memset(src, 0, PADDED_BUF_SIZE);
randomize_buffers();
memcpy(dst_ref, src, PADDED_BUF_SIZE);
memcpy(dst_new, src, PADDED_BUF_SIZE);
call_ref(dst_ref, BUF_SIZE);
call_new(dst_new, BUF_SIZE);
if (memcmp(dst_ref, dst_new, BUF_SIZE))
fail();
bench_new(dst_new, BUF_SIZE);
}

void checkasm_check_exrdsp(void)
{
ExrDSPContext h;
@@ -65,4 +83,9 @@ void checkasm_check_exrdsp(void)
check_reorder_pixels();

report("reorder_pixels");

if (check_func(h.predictor, "predictor"))
check_predictor();

report("predictor");
}

Loading…
Cancel
Save