Browse Source

avcodec/utvideoenc : add SIMD (avx) for sub_left_prediction

asm code by Henrik Gramner
tags/n4.0
Martin Vignali 8 years ago
parent
commit
8f9c38b196
5 changed files with 71 additions and 19 deletions
  1. +15
    -0
      libavcodec/lossless_videoencdsp.c
  2. +5
    -0
      libavcodec/lossless_videoencdsp.h
  3. +1
    -19
      libavcodec/utvideoenc.c
  4. +43
    -0
      libavcodec/x86/lossless_videoencdsp.asm
  5. +7
    -0
      libavcodec/x86/lossless_videoencdsp_init.c

+ 15
- 0
libavcodec/lossless_videoencdsp.c View File

@@ -74,10 +74,25 @@ static void sub_median_pred_c(uint8_t *dst, const uint8_t *src1,
*left_top = lt;
}

static void sub_left_predict_c(uint8_t *dst, uint8_t *src,
ptrdiff_t stride, ptrdiff_t width, int height)
{
int i, j;
uint8_t prev = 0x80; /* Set the initial value */
for (j = 0; j < height; j++) {
for (i = 0; i < width; i++) {
*dst++ = src[i] - prev;
prev = src[i];
}
src += stride;
}
}

av_cold void ff_llvidencdsp_init(LLVidEncDSPContext *c)
{
c->diff_bytes = diff_bytes_c;
c->sub_median_pred = sub_median_pred_c;
c->sub_left_predict = sub_left_predict_c;

if (ARCH_X86)
ff_llvidencdsp_init_x86(c);


+ 5
- 0
libavcodec/lossless_videoencdsp.h View File

@@ -21,6 +21,8 @@

#include <stdint.h>

#include "avcodec.h"

typedef struct LLVidEncDSPContext {
void (*diff_bytes)(uint8_t *dst /* align 16 */,
const uint8_t *src1 /* align 16 */,
@@ -33,6 +35,9 @@ typedef struct LLVidEncDSPContext {
void (*sub_median_pred)(uint8_t *dst, const uint8_t *src1,
const uint8_t *src2, intptr_t w,
int *left, int *left_top);

void (*sub_left_predict)(uint8_t *dst, uint8_t *src,
ptrdiff_t stride, ptrdiff_t width, int height);
} LLVidEncDSPContext;

void ff_llvidencdsp_init(LLVidEncDSPContext *c);


+ 1
- 19
libavcodec/utvideoenc.c View File

@@ -283,23 +283,6 @@ static void mangle_rgb_planes(uint8_t *dst[4], ptrdiff_t dst_stride,
}
}

/* Write data to a plane with left prediction */
static void left_predict(uint8_t *src, uint8_t *dst, ptrdiff_t stride,
int width, int height)
{
int i, j;
uint8_t prev;

prev = 0x80; /* Set the initial value */
for (j = 0; j < height; j++) {
for (i = 0; i < width; i++) {
*dst++ = src[i] - prev;
prev = src[i];
}
src += stride;
}
}

#undef A
#undef B

@@ -436,8 +419,7 @@ static int encode_plane(AVCodecContext *avctx, uint8_t *src,
for (i = 0; i < c->slices; i++) {
sstart = send;
send = height * (i + 1) / c->slices & cmask;
left_predict(src + sstart * stride, dst + sstart * width,
stride, width, send - sstart);
c->llvidencdsp.sub_left_predict(dst + sstart * width, src + sstart * stride, stride, width, send - sstart);
}
break;
case PRED_MEDIAN:


+ 43
- 0
libavcodec/x86/lossless_videoencdsp.asm View File

@@ -25,6 +25,8 @@

%include "libavutil/x86/x86util.asm"

cextern pb_80

SECTION .text

; void ff_diff_bytes(uint8_t *dst, const uint8_t *src1, const uint8_t *src2,
@@ -149,3 +151,44 @@ DIFF_BYTES_PROLOGUE
DIFF_BYTES_BODY u, u
%undef i
%endif


;--------------------------------------------------------------------------------------------------
;void sub_left_predict(uint8_t *dst, uint8_t *src, ptrdiff_t stride, ptrdiff_t width, int height)
;--------------------------------------------------------------------------------------------------

INIT_XMM avx
cglobal sub_left_predict, 5,6,5, dst, src, stride, width, height, x
mova m1, [pb_80] ; prev initial
add dstq, widthq
add srcq, widthq
lea xd, [widthq-1]
neg widthq
and xd, 15
pinsrb m4, m1, xd, 15
mov xq, widthq

.loop:
movu m0, [srcq + widthq]
palignr m2, m0, m1, 15
movu m1, [srcq + widthq + 16]
palignr m3, m1, m0, 15
psubb m2, m0, m2
psubb m3, m1, m3
movu [dstq + widthq], m2
movu [dstq + widthq + 16], m3
add widthq, 2 * 16
jl .loop

add srcq, strideq
sub dstq, xq ; dst + width
test xd, 16
jz .mod32
mova m1, m0

.mod32:
pshufb m1, m4
mov widthq, xq
dec heightd
jg .loop
RET

+ 7
- 0
libavcodec/x86/lossless_videoencdsp_init.c View File

@@ -36,6 +36,9 @@ void ff_diff_bytes_sse2(uint8_t *dst, const uint8_t *src1, const uint8_t *src2,
void ff_diff_bytes_avx2(uint8_t *dst, const uint8_t *src1, const uint8_t *src2,
intptr_t w);

void ff_sub_left_predict_avx(uint8_t *dst, uint8_t *src,
ptrdiff_t stride, ptrdiff_t width, int height);

#if HAVE_INLINE_ASM

static void sub_median_pred_mmxext(uint8_t *dst, const uint8_t *src1,
@@ -98,6 +101,10 @@ av_cold void ff_llvidencdsp_init_x86(LLVidEncDSPContext *c)
c->diff_bytes = ff_diff_bytes_sse2;
}

if (EXTERNAL_AVX(cpu_flags)) {
c->sub_left_predict = ff_sub_left_predict_avx;
}

if (EXTERNAL_AVX2_FAST(cpu_flags)) {
c->diff_bytes = ff_diff_bytes_avx2;
}


Loading…
Cancel
Save