Browse Source

avcodec/jpeg200dsp: add ff_rct_int_{sse2,avx2}

Reviewed-by: Michael Niedermayer <michaelni@gmx.at>
Signed-off-by: James Almer <jamrial@gmail.com>
tags/n2.8
James Almer 10 years ago
parent
commit
9f815bc2c2
3 changed files with 47 additions and 0 deletions
  1. +1
    -0
      libavcodec/jpeg2000.c
  2. +36
    -0
      libavcodec/x86/jpeg2000dsp.asm
  3. +10
    -0
      libavcodec/x86/jpeg2000dsp_init.c

+ 1
- 0
libavcodec/jpeg2000.c View File

@@ -221,6 +221,7 @@ int ff_jpeg2000_init_component(Jpeg2000Component *comp,
if (!comp->f_data)
return AVERROR(ENOMEM);
} else {
csize += FF_INPUT_BUFFER_PADDING_SIZE / sizeof(*comp->i_data);
comp->f_data = NULL;
comp->i_data = av_mallocz_array(csize, sizeof(*comp->i_data));
if (!comp->i_data)


+ 36
- 0
libavcodec/x86/jpeg2000dsp.asm View File

@@ -106,3 +106,39 @@ INIT_XMM sse
ICT_FLOAT 10
INIT_YMM avx
ICT_FLOAT 9

;***************************************************************************
; ff_rct_int_<opt>(int32_t *src0, int32_t *src1, int32_t *src2, int csize)
;***************************************************************************
%macro RCT_INT 0
cglobal rct_int, 4, 4, 4, src0, src1, src2, csize
shl csized, 2
add src0q, csizeq
add src1q, csizeq
add src2q, csizeq
neg csizeq

align 16
.loop:
mova m1, [src1q+csizeq]
mova m2, [src2q+csizeq]
mova m0, [src0q+csizeq]
paddd m3, m1, m2
psrad m3, 2
psubd m0, m3
paddd m1, m0
paddd m2, m0
mova [src1q+csizeq], m0
mova [src2q+csizeq], m1
mova [src0q+csizeq], m2
add csizeq, mmsize
jl .loop
REP_RET
%endmacro

INIT_XMM sse2
RCT_INT
%if HAVE_AVX2_EXTERNAL
INIT_YMM avx2
RCT_INT
%endif

+ 10
- 0
libavcodec/x86/jpeg2000dsp_init.c View File

@@ -26,6 +26,8 @@

void ff_ict_float_sse(void *src0, void *src1, void *src2, int csize);
void ff_ict_float_avx(void *src0, void *src1, void *src2, int csize);
void ff_rct_int_sse2 (void *src0, void *src1, void *src2, int csize);
void ff_rct_int_avx2 (void *src0, void *src1, void *src2, int csize);

av_cold void ff_jpeg2000dsp_init_x86(Jpeg2000DSPContext *c)
{
@@ -34,7 +36,15 @@ av_cold void ff_jpeg2000dsp_init_x86(Jpeg2000DSPContext *c)
c->mct_decode[FF_DWT97] = ff_ict_float_sse;
}

if (EXTERNAL_SSE2(cpu_flags)) {
c->mct_decode[FF_DWT53] = ff_rct_int_sse2;
}

if (EXTERNAL_AVX_FAST(cpu_flags)) {
c->mct_decode[FF_DWT97] = ff_ict_float_avx;
}

if (EXTERNAL_AVX2(cpu_flags)) {
c->mct_decode[FF_DWT53] = ff_rct_int_avx2;
}
}

Loading…
Cancel
Save