Browse Source

avcodec/x86/dsp: add_int16_mmx / add_int16_sse2

Signed-off-by: Michael Niedermayer <michaelni@gmx.at>
tags/n2.2-rc1
Michael Niedermayer 11 years ago
parent
commit
a493f8541d
3 changed files with 70 additions and 0 deletions
  1. +65
    -0
      libavcodec/x86/dsputil.asm
  2. +3
    -0
      libavcodec/x86/dsputil_init.c
  3. +2
    -0
      libavcodec/x86/dsputil_x86.h

+ 65
- 0
libavcodec/x86/dsputil.asm View File

@@ -465,6 +465,71 @@ cglobal add_hfyu_left_prediction, 3,3,7, dst, src, w, left
.src_unaligned:
ADD_HFYU_LEFT_LOOP 0, 0


%macro ADD_INT16_LOOP 1 ; %1 = is_aligned
movd m4, maskq
punpcklwd m4, m4
punpcklwd m4, m4
punpcklwd m4, m4
add wq, wq
test wq, 2*mmsize - 1
jz %%.tomainloop
%%.wordloop:
sub wq, 2
mov ax, [srcq+wq]
add ax, [dstq+wq]
and ax, maskw
mov [dstq+wq], ax
test wq, 2*mmsize - 1
jnz %%.wordloop
%%.tomainloop:
add srcq, wq
add dstq, wq
neg wq
jz %%.end
%%.loop:
%if %1
mova m0, [srcq+wq]
mova m1, [dstq+wq]
mova m2, [srcq+wq+mmsize]
mova m3, [dstq+wq+mmsize]
%else
movu m0, [srcq+wq]
movu m1, [dstq+wq]
movu m2, [srcq+wq+mmsize]
movu m3, [dstq+wq+mmsize]
%endif
paddw m0, m1
paddw m2, m3
pand m0, m4
pand m2, m4
%if %1
mova [dstq+wq] , m0
mova [dstq+wq+mmsize], m2
%else
movu [dstq+wq] , m0
movu [dstq+wq+mmsize], m2
%endif
add wq, 2*mmsize
jl %%.loop
%%.end:
RET
%endmacro

INIT_MMX mmx
cglobal add_int16, 4,4,5, dst, src, mask, w
ADD_INT16_LOOP 1

INIT_XMM sse2
cglobal add_int16, 4,4,5, dst, src, mask, w
test srcq, mmsize-1
jnz .unaligned
test dstq, mmsize-1
jnz .unaligned
ADD_INT16_LOOP 1
.unaligned:
ADD_INT16_LOOP 0

;-----------------------------------------------------------------------------
; void ff_vector_clip_int32(int32_t *dst, const int32_t *src, int32_t min,
; int32_t max, unsigned int len)


+ 3
- 0
libavcodec/x86/dsputil_init.c View File

@@ -542,6 +542,7 @@ static av_cold void dsputil_init_mmx(DSPContext *c, AVCodecContext *avctx,
#endif /* HAVE_MMX_INLINE */

#if HAVE_MMX_EXTERNAL
c->add_int16 = ff_add_int16_mmx;
c->vector_clip_int32 = ff_vector_clip_int32_mmx;
#endif /* HAVE_MMX_EXTERNAL */
}
@@ -625,6 +626,8 @@ static av_cold void dsputil_init_sse2(DSPContext *c, AVCodecContext *avctx,
c->vector_clip_int32 = ff_vector_clip_int32_sse2;
}
c->bswap_buf = ff_bswap32_buf_sse2;

c->add_int16 = ff_add_int16_sse2;
#endif /* HAVE_SSE2_EXTERNAL */
}



+ 2
- 0
libavcodec/x86/dsputil_x86.h View File

@@ -116,6 +116,8 @@ void ff_clear_blocks_mmx(int16_t *blocks);
void ff_clear_blocks_sse(int16_t *blocks);

void ff_add_bytes_mmx(uint8_t *dst, uint8_t *src, int w);
void ff_add_int16_mmx(uint16_t *dst, const uint16_t *src, unsigned mask, int w);
void ff_add_int16_sse2(uint16_t *dst, const uint16_t *src, unsigned mask, int w);

void ff_add_hfyu_median_prediction_cmov(uint8_t *dst, const uint8_t *top,
const uint8_t *diff, int w,


Loading…
Cancel
Save