Signed-off-by: James Almer <jamrial@gmail.com>tags/n3.3
| @@ -882,7 +882,7 @@ static int left_prediction(HYuvContext *s, uint8_t *dst, const uint8_t *src, int | |||||
| if (s->bps <= 8) { | if (s->bps <= 8) { | ||||
| return s->llviddsp.add_left_pred(dst, src, w, acc); | return s->llviddsp.add_left_pred(dst, src, w, acc); | ||||
| } else { | } else { | ||||
| return s->llviddsp.add_hfyu_left_pred_int16(( uint16_t *)dst, (const uint16_t *)src, s->n-1, w, acc); | |||||
| return s->llviddsp.add_left_pred_int16(( uint16_t *)dst, (const uint16_t *)src, s->n-1, w, acc); | |||||
| } | } | ||||
| } | } | ||||
| @@ -79,7 +79,7 @@ static int add_left_pred_c(uint8_t *dst, const uint8_t *src, intptr_t w, | |||||
| return acc; | return acc; | ||||
| } | } | ||||
| static int add_hfyu_left_pred_int16_c(uint16_t *dst, const uint16_t *src, unsigned mask, int w, unsigned acc){ | |||||
| static int add_left_pred_int16_c(uint16_t *dst, const uint16_t *src, unsigned mask, int w, unsigned acc){ | |||||
| int i; | int i; | ||||
| for(i=0; i<w-1; i++){ | for(i=0; i<w-1; i++){ | ||||
| @@ -105,7 +105,7 @@ void ff_llviddsp_init(LLVidDSPContext *c) | |||||
| c->add_median_pred = add_median_pred_c; | c->add_median_pred = add_median_pred_c; | ||||
| c->add_left_pred = add_left_pred_c; | c->add_left_pred = add_left_pred_c; | ||||
| c->add_hfyu_left_pred_int16 = add_hfyu_left_pred_int16_c; | |||||
| c->add_left_pred_int16 = add_left_pred_int16_c; | |||||
| if (ARCH_X86) | if (ARCH_X86) | ||||
| ff_llviddsp_init_x86(c); | ff_llviddsp_init_x86(c); | ||||
| @@ -34,7 +34,8 @@ typedef struct LLVidDSPContext { | |||||
| int (*add_left_pred)(uint8_t *dst, const uint8_t *src, | int (*add_left_pred)(uint8_t *dst, const uint8_t *src, | ||||
| intptr_t w, int left); | intptr_t w, int left); | ||||
| int (*add_hfyu_left_pred_int16)(uint16_t *dst, const uint16_t *src, unsigned mask, int w, unsigned left); | |||||
| int (*add_left_pred_int16)(uint16_t *dst, const uint16_t *src, | |||||
| unsigned mask, int w, unsigned left); | |||||
| } LLVidDSPContext; | } LLVidDSPContext; | ||||
| void ff_llviddsp_init(LLVidDSPContext *llviddsp); | void ff_llviddsp_init(LLVidDSPContext *llviddsp); | ||||
| @@ -221,24 +221,24 @@ static int magy_decode_slice10(AVCodecContext *avctx, void *tdata, | |||||
| switch (pred) { | switch (pred) { | ||||
| case LEFT: | case LEFT: | ||||
| dst = (uint16_t *)p->data[i] + j * sheight * stride; | dst = (uint16_t *)p->data[i] + j * sheight * stride; | ||||
| s->llviddsp.add_hfyu_left_pred_int16(dst, dst, 1023, width, 0); | |||||
| s->llviddsp.add_left_pred_int16(dst, dst, 1023, width, 0); | |||||
| dst += stride; | dst += stride; | ||||
| if (interlaced) { | if (interlaced) { | ||||
| s->llviddsp.add_hfyu_left_pred_int16(dst, dst, 1023, width, 0); | |||||
| s->llviddsp.add_left_pred_int16(dst, dst, 1023, width, 0); | |||||
| dst += stride; | dst += stride; | ||||
| } | } | ||||
| for (k = 1 + interlaced; k < height; k++) { | for (k = 1 + interlaced; k < height; k++) { | ||||
| s->llviddsp.add_hfyu_left_pred_int16(dst, dst, 1023, width, dst[-fake_stride]); | |||||
| s->llviddsp.add_left_pred_int16(dst, dst, 1023, width, dst[-fake_stride]); | |||||
| dst += stride; | dst += stride; | ||||
| } | } | ||||
| break; | break; | ||||
| case GRADIENT: | case GRADIENT: | ||||
| dst = (uint16_t *)p->data[i] + j * sheight * stride; | dst = (uint16_t *)p->data[i] + j * sheight * stride; | ||||
| s->llviddsp.add_hfyu_left_pred_int16(dst, dst, 1023, width, 0); | |||||
| s->llviddsp.add_left_pred_int16(dst, dst, 1023, width, 0); | |||||
| left = lefttop = 0; | left = lefttop = 0; | ||||
| dst += stride; | dst += stride; | ||||
| if (interlaced) { | if (interlaced) { | ||||
| s->llviddsp.add_hfyu_left_pred_int16(dst, dst, 1023, width, 0); | |||||
| s->llviddsp.add_left_pred_int16(dst, dst, 1023, width, 0); | |||||
| left = lefttop = 0; | left = lefttop = 0; | ||||
| dst += stride; | dst += stride; | ||||
| } | } | ||||
| @@ -258,11 +258,11 @@ static int magy_decode_slice10(AVCodecContext *avctx, void *tdata, | |||||
| case MEDIAN: | case MEDIAN: | ||||
| dst = (uint16_t *)p->data[i] + j * sheight * stride; | dst = (uint16_t *)p->data[i] + j * sheight * stride; | ||||
| lefttop = left = dst[0]; | lefttop = left = dst[0]; | ||||
| s->llviddsp.add_hfyu_left_pred_int16(dst, dst, 1023, width, 0); | |||||
| s->llviddsp.add_left_pred_int16(dst, dst, 1023, width, 0); | |||||
| dst += stride; | dst += stride; | ||||
| if (interlaced) { | if (interlaced) { | ||||
| lefttop = left = dst[0]; | lefttop = left = dst[0]; | ||||
| s->llviddsp.add_hfyu_left_pred_int16(dst, dst, 1023, width, 0); | |||||
| s->llviddsp.add_left_pred_int16(dst, dst, 1023, width, 0); | |||||
| dst += stride; | dst += stride; | ||||
| } | } | ||||
| for (k = 1 + interlaced; k < height; k++) { | for (k = 1 + interlaced; k < height; k++) { | ||||
| @@ -258,9 +258,9 @@ ADD_BYTES | |||||
| RET | RET | ||||
| %endmacro | %endmacro | ||||
| ; int add_hfyu_left_pred_int16(uint16_t *dst, const uint16_t *src, unsigned mask, int w, int left) | |||||
| ; int add_left_pred_int16(uint16_t *dst, const uint16_t *src, unsigned mask, int w, int left) | |||||
| INIT_MMX ssse3 | INIT_MMX ssse3 | ||||
| cglobal add_hfyu_left_pred_int16, 4,4,8, dst, src, mask, w, left | |||||
| cglobal add_left_pred_int16, 4,4,8, dst, src, mask, w, left | |||||
| .skip_prologue: | .skip_prologue: | ||||
| mova m5, [pb_67] | mova m5, [pb_67] | ||||
| mova m3, [pb_zzzz2323zzzzabab] | mova m3, [pb_zzzz2323zzzzabab] | ||||
| @@ -271,7 +271,7 @@ cglobal add_hfyu_left_pred_int16, 4,4,8, dst, src, mask, w, left | |||||
| ADD_HFYU_LEFT_LOOP_INT16 a, a | ADD_HFYU_LEFT_LOOP_INT16 a, a | ||||
| INIT_XMM sse4 | INIT_XMM sse4 | ||||
| cglobal add_hfyu_left_pred_int16, 4,4,8, dst, src, mask, w, left | |||||
| cglobal add_left_pred_int16, 4,4,8, dst, src, mask, w, left | |||||
| mova m5, [pb_ef] | mova m5, [pb_ef] | ||||
| mova m4, [pb_zzzzzzzz67676767] | mova m4, [pb_zzzzzzzz67676767] | ||||
| mova m3, [pb_zzzz2323zzzzabab] | mova m3, [pb_zzzz2323zzzzabab] | ||||
| @@ -38,8 +38,8 @@ int ff_add_left_pred_ssse3(uint8_t *dst, const uint8_t *src, | |||||
| int ff_add_left_pred_sse4(uint8_t *dst, const uint8_t *src, | int ff_add_left_pred_sse4(uint8_t *dst, const uint8_t *src, | ||||
| intptr_t w, int left); | intptr_t w, int left); | ||||
| int ff_add_hfyu_left_pred_int16_ssse3(uint16_t *dst, const uint16_t *src, unsigned mask, int w, unsigned acc); | |||||
| int ff_add_hfyu_left_pred_int16_sse4(uint16_t *dst, const uint16_t *src, unsigned mask, int w, unsigned acc); | |||||
| int ff_add_left_pred_int16_ssse3(uint16_t *dst, const uint16_t *src, unsigned mask, int w, unsigned acc); | |||||
| int ff_add_left_pred_int16_sse4(uint16_t *dst, const uint16_t *src, unsigned mask, int w, unsigned acc); | |||||
| #if HAVE_INLINE_ASM && HAVE_7REGS && ARCH_X86_32 | #if HAVE_INLINE_ASM && HAVE_7REGS && ARCH_X86_32 | ||||
| static void add_median_pred_cmov(uint8_t *dst, const uint8_t *top, | static void add_median_pred_cmov(uint8_t *dst, const uint8_t *top, | ||||
| @@ -108,10 +108,10 @@ void ff_llviddsp_init_x86(LLVidDSPContext *c) | |||||
| if (cpu_flags & AV_CPU_FLAG_SSE4) // not really SSE4, just slow on Conroe | if (cpu_flags & AV_CPU_FLAG_SSE4) // not really SSE4, just slow on Conroe | ||||
| c->add_left_pred = ff_add_left_pred_sse4; | c->add_left_pred = ff_add_left_pred_sse4; | ||||
| c->add_hfyu_left_pred_int16 = ff_add_hfyu_left_pred_int16_ssse3; | |||||
| c->add_left_pred_int16 = ff_add_left_pred_int16_ssse3; | |||||
| } | } | ||||
| if (EXTERNAL_SSE4(cpu_flags)) { | if (EXTERNAL_SSE4(cpu_flags)) { | ||||
| c->add_hfyu_left_pred_int16 = ff_add_hfyu_left_pred_int16_sse4; | |||||
| c->add_left_pred_int16 = ff_add_left_pred_int16_sse4; | |||||
| } | } | ||||
| } | } | ||||