Signed-off-by: James Almer <jamrial@gmail.com>tags/n3.4
| @@ -265,18 +265,6 @@ static inline uint16_t exr_halflt2uint(uint16_t v) | |||||
| return (v + (1 << 16)) >> (exp + 1); | return (v + (1 << 16)) >> (exp + 1); | ||||
| } | } | ||||
| static void predictor(uint8_t *src, int size) | |||||
| { | |||||
| uint8_t *t = src + 1; | |||||
| uint8_t *stop = src + size; | |||||
| while (t < stop) { | |||||
| int d = (int) t[-1] + (int) t[0] - 128; | |||||
| t[0] = d; | |||||
| ++t; | |||||
| } | |||||
| } | |||||
| static int zip_uncompress(EXRContext *s, const uint8_t *src, int compressed_size, | static int zip_uncompress(EXRContext *s, const uint8_t *src, int compressed_size, | ||||
| int uncompressed_size, EXRThreadData *td) | int uncompressed_size, EXRThreadData *td) | ||||
| { | { | ||||
| @@ -288,7 +276,7 @@ static int zip_uncompress(EXRContext *s, const uint8_t *src, int compressed_size | |||||
| av_assert1(uncompressed_size % 2 == 0); | av_assert1(uncompressed_size % 2 == 0); | ||||
| predictor(td->tmp, uncompressed_size); | |||||
| s->dsp.predictor(td->tmp, uncompressed_size); | |||||
| s->dsp.reorder_pixels(td->uncompressed_data, td->tmp, uncompressed_size); | s->dsp.reorder_pixels(td->uncompressed_data, td->tmp, uncompressed_size); | ||||
| return 0; | return 0; | ||||
| @@ -335,7 +323,7 @@ static int rle_uncompress(EXRContext *ctx, const uint8_t *src, int compressed_si | |||||
| av_assert1(uncompressed_size % 2 == 0); | av_assert1(uncompressed_size % 2 == 0); | ||||
| predictor(td->tmp, uncompressed_size); | |||||
| ctx->dsp.predictor(td->tmp, uncompressed_size); | |||||
| ctx->dsp.reorder_pixels(td->uncompressed_data, td->tmp, uncompressed_size); | ctx->dsp.reorder_pixels(td->uncompressed_data, td->tmp, uncompressed_size); | ||||
| return 0; | return 0; | ||||
| @@ -38,9 +38,18 @@ static void reorder_pixels_scalar(uint8_t *dst, const uint8_t *src, ptrdiff_t si | |||||
| } | } | ||||
| } | } | ||||
| static void predictor_scalar(uint8_t *src, ptrdiff_t size) | |||||
| { | |||||
| ptrdiff_t i; | |||||
| for (i = 1; i < size; i++) | |||||
| src[i] += src[i-1] - 128; | |||||
| } | |||||
| av_cold void ff_exrdsp_init(ExrDSPContext *c) | av_cold void ff_exrdsp_init(ExrDSPContext *c) | ||||
| { | { | ||||
| c->reorder_pixels = reorder_pixels_scalar; | c->reorder_pixels = reorder_pixels_scalar; | ||||
| c->predictor = predictor_scalar; | |||||
| if (ARCH_X86) | if (ARCH_X86) | ||||
| ff_exrdsp_init_x86(c); | ff_exrdsp_init_x86(c); | ||||
| @@ -24,6 +24,7 @@ | |||||
| typedef struct ExrDSPContext { | typedef struct ExrDSPContext { | ||||
| void (*reorder_pixels)(uint8_t *dst, const uint8_t *src, ptrdiff_t size); | void (*reorder_pixels)(uint8_t *dst, const uint8_t *src, ptrdiff_t size); | ||||
| void (*predictor)(uint8_t *src, ptrdiff_t size); | |||||
| } ExrDSPContext; | } ExrDSPContext; | ||||
| void ff_exrdsp_init(ExrDSPContext *c); | void ff_exrdsp_init(ExrDSPContext *c); | ||||
| @@ -2,9 +2,11 @@ | |||||
| ;* X86 Optimized functions for Open Exr Decoder | ;* X86 Optimized functions for Open Exr Decoder | ||||
| ;* Copyright (c) 2006 Industrial Light & Magic, a division of Lucas Digital Ltd. LLC | ;* Copyright (c) 2006 Industrial Light & Magic, a division of Lucas Digital Ltd. LLC | ||||
| ;* | ;* | ||||
| ;* reorder_pixels based on patch by John Loy | |||||
| ;* reorder_pixels, predictor based on patch by John Loy | |||||
| ;* port to ASM by Jokyo Images support by CNC - French National Center for Cinema | ;* port to ASM by Jokyo Images support by CNC - French National Center for Cinema | ||||
| ;* | ;* | ||||
| ;* predictor AVX/AVX2 by Henrik Gramner | |||||
| ;* | |||||
| ;* This file is part of FFmpeg. | ;* This file is part of FFmpeg. | ||||
| ;* | ;* | ||||
| ;* FFmpeg is free software; you can redistribute it and/or | ;* FFmpeg is free software; you can redistribute it and/or | ||||
| @@ -24,6 +26,9 @@ | |||||
| %include "libavutil/x86/x86util.asm" | %include "libavutil/x86/x86util.asm" | ||||
| cextern pb_15 | |||||
| cextern pb_80 | |||||
| SECTION .text | SECTION .text | ||||
| ;------------------------------------------------------------------------------ | ;------------------------------------------------------------------------------ | ||||
| @@ -60,3 +65,58 @@ REORDER_PIXELS | |||||
| INIT_YMM avx2 | INIT_YMM avx2 | ||||
| REORDER_PIXELS | REORDER_PIXELS | ||||
| %endif | %endif | ||||
| ;------------------------------------------------------------------------------ | |||||
| ; void ff_predictor(uint8_t *src, ptrdiff_t size); | |||||
| ;------------------------------------------------------------------------------ | |||||
| %macro PREDICTOR 0 | |||||
| cglobal predictor, 2,2,5, src, size | |||||
| %if mmsize == 32 | |||||
| vbroadcasti128 m0, [pb_80] | |||||
| %else | |||||
| mova xm0, [pb_80] | |||||
| %endif | |||||
| mova xm1, [pb_15] | |||||
| mova xm2, xm0 | |||||
| add srcq, sizeq | |||||
| neg sizeq | |||||
| .loop: | |||||
| pxor m3, m0, [srcq + sizeq] | |||||
| pslldq m4, m3, 1 | |||||
| paddb m3, m4 | |||||
| pslldq m4, m3, 2 | |||||
| paddb m3, m4 | |||||
| pslldq m4, m3, 4 | |||||
| paddb m3, m4 | |||||
| pslldq m4, m3, 8 | |||||
| %if mmsize == 32 | |||||
| paddb m3, m4 | |||||
| paddb xm2, xm3 | |||||
| vextracti128 xm4, m3, 1 | |||||
| mova [srcq + sizeq], xm2 | |||||
| pshufb xm2, xm1 | |||||
| paddb xm2, xm4 | |||||
| mova [srcq + sizeq + 16], xm2 | |||||
| %else | |||||
| paddb m2, m3 | |||||
| paddb m2, m4 | |||||
| mova [srcq + sizeq], m2 | |||||
| %endif | |||||
| pshufb xm2, xm1 | |||||
| add sizeq, mmsize | |||||
| jl .loop | |||||
| RET | |||||
| %endmacro | |||||
| INIT_XMM ssse3 | |||||
| PREDICTOR | |||||
| INIT_XMM avx | |||||
| PREDICTOR | |||||
| %if HAVE_AVX2_EXTERNAL | |||||
| INIT_YMM avx2 | |||||
| PREDICTOR | |||||
| %endif | |||||
| @@ -26,6 +26,12 @@ void ff_reorder_pixels_sse2(uint8_t *dst, const uint8_t *src, ptrdiff_t size); | |||||
| void ff_reorder_pixels_avx2(uint8_t *dst, const uint8_t *src, ptrdiff_t size); | void ff_reorder_pixels_avx2(uint8_t *dst, const uint8_t *src, ptrdiff_t size); | ||||
| void ff_predictor_ssse3(uint8_t *src, ptrdiff_t size); | |||||
| void ff_predictor_avx(uint8_t *src, ptrdiff_t size); | |||||
| void ff_predictor_avx2(uint8_t *src, ptrdiff_t size); | |||||
| av_cold void ff_exrdsp_init_x86(ExrDSPContext *dsp) | av_cold void ff_exrdsp_init_x86(ExrDSPContext *dsp) | ||||
| { | { | ||||
| int cpu_flags = av_get_cpu_flags(); | int cpu_flags = av_get_cpu_flags(); | ||||
| @@ -33,7 +39,14 @@ av_cold void ff_exrdsp_init_x86(ExrDSPContext *dsp) | |||||
| if (EXTERNAL_SSE2(cpu_flags)) { | if (EXTERNAL_SSE2(cpu_flags)) { | ||||
| dsp->reorder_pixels = ff_reorder_pixels_sse2; | dsp->reorder_pixels = ff_reorder_pixels_sse2; | ||||
| } | } | ||||
| if (EXTERNAL_SSSE3(cpu_flags)) { | |||||
| dsp->predictor = ff_predictor_ssse3; | |||||
| } | |||||
| if (EXTERNAL_AVX(cpu_flags)) { | |||||
| dsp->predictor = ff_predictor_avx; | |||||
| } | |||||
| if (EXTERNAL_AVX2_FAST(cpu_flags)) { | if (EXTERNAL_AVX2_FAST(cpu_flags)) { | ||||
| dsp->reorder_pixels = ff_reorder_pixels_avx2; | dsp->reorder_pixels = ff_reorder_pixels_avx2; | ||||
| dsp->predictor = ff_predictor_avx2; | |||||
| } | } | ||||
| } | } | ||||
| @@ -55,6 +55,24 @@ static void check_reorder_pixels(void) { | |||||
| bench_new(dst_new, src, BUF_SIZE); | bench_new(dst_new, src, BUF_SIZE); | ||||
| } | } | ||||
| static void check_predictor(void) { | |||||
| LOCAL_ALIGNED_32(uint8_t, src, [PADDED_BUF_SIZE]); | |||||
| LOCAL_ALIGNED_32(uint8_t, dst_ref, [PADDED_BUF_SIZE]); | |||||
| LOCAL_ALIGNED_32(uint8_t, dst_new, [PADDED_BUF_SIZE]); | |||||
| declare_func(void, uint8_t *src, ptrdiff_t size); | |||||
| memset(src, 0, PADDED_BUF_SIZE); | |||||
| randomize_buffers(); | |||||
| memcpy(dst_ref, src, PADDED_BUF_SIZE); | |||||
| memcpy(dst_new, src, PADDED_BUF_SIZE); | |||||
| call_ref(dst_ref, BUF_SIZE); | |||||
| call_new(dst_new, BUF_SIZE); | |||||
| if (memcmp(dst_ref, dst_new, BUF_SIZE)) | |||||
| fail(); | |||||
| bench_new(dst_new, BUF_SIZE); | |||||
| } | |||||
| void checkasm_check_exrdsp(void) | void checkasm_check_exrdsp(void) | ||||
| { | { | ||||
| ExrDSPContext h; | ExrDSPContext h; | ||||
| @@ -65,4 +83,9 @@ void checkasm_check_exrdsp(void) | |||||
| check_reorder_pixels(); | check_reorder_pixels(); | ||||
| report("reorder_pixels"); | report("reorder_pixels"); | ||||
| if (check_func(h.predictor, "predictor")) | |||||
| check_predictor(); | |||||
| report("predictor"); | |||||
| } | } | ||||