Signed-off-by: James Almer <jamrial@gmail.com>tags/n3.4
| @@ -265,18 +265,6 @@ static inline uint16_t exr_halflt2uint(uint16_t v) | |||
| return (v + (1 << 16)) >> (exp + 1); | |||
| } | |||
| static void predictor(uint8_t *src, int size) | |||
| { | |||
| uint8_t *t = src + 1; | |||
| uint8_t *stop = src + size; | |||
| while (t < stop) { | |||
| int d = (int) t[-1] + (int) t[0] - 128; | |||
| t[0] = d; | |||
| ++t; | |||
| } | |||
| } | |||
| static int zip_uncompress(EXRContext *s, const uint8_t *src, int compressed_size, | |||
| int uncompressed_size, EXRThreadData *td) | |||
| { | |||
| @@ -288,7 +276,7 @@ static int zip_uncompress(EXRContext *s, const uint8_t *src, int compressed_size | |||
| av_assert1(uncompressed_size % 2 == 0); | |||
| predictor(td->tmp, uncompressed_size); | |||
| s->dsp.predictor(td->tmp, uncompressed_size); | |||
| s->dsp.reorder_pixels(td->uncompressed_data, td->tmp, uncompressed_size); | |||
| return 0; | |||
| @@ -335,7 +323,7 @@ static int rle_uncompress(EXRContext *ctx, const uint8_t *src, int compressed_si | |||
| av_assert1(uncompressed_size % 2 == 0); | |||
| predictor(td->tmp, uncompressed_size); | |||
| ctx->dsp.predictor(td->tmp, uncompressed_size); | |||
| ctx->dsp.reorder_pixels(td->uncompressed_data, td->tmp, uncompressed_size); | |||
| return 0; | |||
| @@ -38,9 +38,18 @@ static void reorder_pixels_scalar(uint8_t *dst, const uint8_t *src, ptrdiff_t si | |||
| } | |||
| } | |||
| static void predictor_scalar(uint8_t *src, ptrdiff_t size) | |||
| { | |||
| ptrdiff_t i; | |||
| for (i = 1; i < size; i++) | |||
| src[i] += src[i-1] - 128; | |||
| } | |||
| av_cold void ff_exrdsp_init(ExrDSPContext *c) | |||
| { | |||
| c->reorder_pixels = reorder_pixels_scalar; | |||
| c->predictor = predictor_scalar; | |||
| if (ARCH_X86) | |||
| ff_exrdsp_init_x86(c); | |||
| @@ -24,6 +24,7 @@ | |||
| typedef struct ExrDSPContext { | |||
| void (*reorder_pixels)(uint8_t *dst, const uint8_t *src, ptrdiff_t size); | |||
| void (*predictor)(uint8_t *src, ptrdiff_t size); | |||
| } ExrDSPContext; | |||
| void ff_exrdsp_init(ExrDSPContext *c); | |||
| @@ -2,9 +2,11 @@ | |||
| ;* X86 Optimized functions for Open Exr Decoder | |||
| ;* Copyright (c) 2006 Industrial Light & Magic, a division of Lucas Digital Ltd. LLC | |||
| ;* | |||
| ;* reorder_pixels based on patch by John Loy | |||
| ;* reorder_pixels, predictor based on patch by John Loy | |||
| ;* port to ASM by Jokyo Images support by CNC - French National Center for Cinema | |||
| ;* | |||
| ;* predictor AVX/AVX2 by Henrik Gramner | |||
| ;* | |||
| ;* This file is part of FFmpeg. | |||
| ;* | |||
| ;* FFmpeg is free software; you can redistribute it and/or | |||
| @@ -24,6 +26,9 @@ | |||
| %include "libavutil/x86/x86util.asm" | |||
| cextern pb_15 | |||
| cextern pb_80 | |||
| SECTION .text | |||
| ;------------------------------------------------------------------------------ | |||
| @@ -60,3 +65,58 @@ REORDER_PIXELS | |||
| INIT_YMM avx2 | |||
| REORDER_PIXELS | |||
| %endif | |||
| ;------------------------------------------------------------------------------ | |||
| ; void ff_predictor(uint8_t *src, ptrdiff_t size); | |||
| ;------------------------------------------------------------------------------ | |||
| %macro PREDICTOR 0 | |||
| cglobal predictor, 2,2,5, src, size | |||
| %if mmsize == 32 | |||
| vbroadcasti128 m0, [pb_80] | |||
| %else | |||
| mova xm0, [pb_80] | |||
| %endif | |||
| mova xm1, [pb_15] | |||
| mova xm2, xm0 | |||
| add srcq, sizeq | |||
| neg sizeq | |||
| .loop: | |||
| pxor m3, m0, [srcq + sizeq] | |||
| pslldq m4, m3, 1 | |||
| paddb m3, m4 | |||
| pslldq m4, m3, 2 | |||
| paddb m3, m4 | |||
| pslldq m4, m3, 4 | |||
| paddb m3, m4 | |||
| pslldq m4, m3, 8 | |||
| %if mmsize == 32 | |||
| paddb m3, m4 | |||
| paddb xm2, xm3 | |||
| vextracti128 xm4, m3, 1 | |||
| mova [srcq + sizeq], xm2 | |||
| pshufb xm2, xm1 | |||
| paddb xm2, xm4 | |||
| mova [srcq + sizeq + 16], xm2 | |||
| %else | |||
| paddb m2, m3 | |||
| paddb m2, m4 | |||
| mova [srcq + sizeq], m2 | |||
| %endif | |||
| pshufb xm2, xm1 | |||
| add sizeq, mmsize | |||
| jl .loop | |||
| RET | |||
| %endmacro | |||
| INIT_XMM ssse3 | |||
| PREDICTOR | |||
| INIT_XMM avx | |||
| PREDICTOR | |||
| %if HAVE_AVX2_EXTERNAL | |||
| INIT_YMM avx2 | |||
| PREDICTOR | |||
| %endif | |||
| @@ -26,6 +26,12 @@ void ff_reorder_pixels_sse2(uint8_t *dst, const uint8_t *src, ptrdiff_t size); | |||
| void ff_reorder_pixels_avx2(uint8_t *dst, const uint8_t *src, ptrdiff_t size); | |||
| void ff_predictor_ssse3(uint8_t *src, ptrdiff_t size); | |||
| void ff_predictor_avx(uint8_t *src, ptrdiff_t size); | |||
| void ff_predictor_avx2(uint8_t *src, ptrdiff_t size); | |||
| av_cold void ff_exrdsp_init_x86(ExrDSPContext *dsp) | |||
| { | |||
| int cpu_flags = av_get_cpu_flags(); | |||
| @@ -33,7 +39,14 @@ av_cold void ff_exrdsp_init_x86(ExrDSPContext *dsp) | |||
| if (EXTERNAL_SSE2(cpu_flags)) { | |||
| dsp->reorder_pixels = ff_reorder_pixels_sse2; | |||
| } | |||
| if (EXTERNAL_SSSE3(cpu_flags)) { | |||
| dsp->predictor = ff_predictor_ssse3; | |||
| } | |||
| if (EXTERNAL_AVX(cpu_flags)) { | |||
| dsp->predictor = ff_predictor_avx; | |||
| } | |||
| if (EXTERNAL_AVX2_FAST(cpu_flags)) { | |||
| dsp->reorder_pixels = ff_reorder_pixels_avx2; | |||
| dsp->predictor = ff_predictor_avx2; | |||
| } | |||
| } | |||
| @@ -55,6 +55,24 @@ static void check_reorder_pixels(void) { | |||
| bench_new(dst_new, src, BUF_SIZE); | |||
| } | |||
| static void check_predictor(void) { | |||
| LOCAL_ALIGNED_32(uint8_t, src, [PADDED_BUF_SIZE]); | |||
| LOCAL_ALIGNED_32(uint8_t, dst_ref, [PADDED_BUF_SIZE]); | |||
| LOCAL_ALIGNED_32(uint8_t, dst_new, [PADDED_BUF_SIZE]); | |||
| declare_func(void, uint8_t *src, ptrdiff_t size); | |||
| memset(src, 0, PADDED_BUF_SIZE); | |||
| randomize_buffers(); | |||
| memcpy(dst_ref, src, PADDED_BUF_SIZE); | |||
| memcpy(dst_new, src, PADDED_BUF_SIZE); | |||
| call_ref(dst_ref, BUF_SIZE); | |||
| call_new(dst_new, BUF_SIZE); | |||
| if (memcmp(dst_ref, dst_new, BUF_SIZE)) | |||
| fail(); | |||
| bench_new(dst_new, BUF_SIZE); | |||
| } | |||
| void checkasm_check_exrdsp(void) | |||
| { | |||
| ExrDSPContext h; | |||
| @@ -65,4 +83,9 @@ void checkasm_check_exrdsp(void) | |||
| check_reorder_pixels(); | |||
| report("reorder_pixels"); | |||
| if (check_func(h.predictor, "predictor")) | |||
| check_predictor(); | |||
| report("predictor"); | |||
| } | |||