* qatar/master: proresdec: support mixed interlaced/non-interlaced content vp3/5: move put_no_rnd_pixels_l2 from dsputil to VP3DSPContext. Merged-by: Michael Niedermayer <michaelni@gmx.at>tags/n1.2
| @@ -2946,7 +2946,6 @@ av_cold void ff_dsputil_init(DSPContext* c, AVCodecContext *avctx) | |||
| c->clear_blocks = FUNCC(clear_blocks ## dct , depth);\ | |||
| c->add_pixels8 = FUNCC(add_pixels8 ## dct , depth);\ | |||
| c->add_pixels4 = FUNCC(add_pixels4 ## dct , depth);\ | |||
| c->put_no_rnd_pixels_l2 = FUNCC(put_no_rnd_pixels8_l2 , depth);\ | |||
| \ | |||
| c->put_h264_chroma_pixels_tab[0] = FUNCC(put_h264_chroma_mc8 , depth);\ | |||
| c->put_h264_chroma_pixels_tab[1] = FUNCC(put_h264_chroma_mc4 , depth);\ | |||
| @@ -308,8 +308,6 @@ typedef struct DSPContext { | |||
| */ | |||
| op_pixels_func avg_no_rnd_pixels_tab[4][4]; | |||
| void (*put_no_rnd_pixels_l2)(uint8_t *block/*align 8*/, const uint8_t *a/*align 1*/, const uint8_t *b/*align 1*/, int line_size, int h); | |||
| /** | |||
| * Thirdpel motion compensation with rounding (a+b+1)>>1. | |||
| * this is an array[12] of motion compensation functions for the 9 thirdpe | |||
| @@ -582,10 +582,6 @@ PIXOP2(put, op_put) | |||
| #define put_no_rnd_pixels8_c put_pixels8_c | |||
| #define put_no_rnd_pixels16_c put_pixels16_c | |||
| static void FUNCC(put_no_rnd_pixels8_l2)(uint8_t *dst, const uint8_t *a, const uint8_t *b, int stride, int h){ | |||
| FUNC(put_no_rnd_pixels8_l2)(dst, a, b, stride, stride, stride, h); | |||
| } | |||
| #define H264_CHROMA_MC(OPNAME, OP)\ | |||
| static void FUNCC(OPNAME ## h264_chroma_mc2)(uint8_t *p_dst/*align 8*/, uint8_t *p_src/*align 1*/, int stride, int h, int x, int y){\ | |||
| pixel *dst = (pixel*)p_dst;\ | |||
| @@ -164,6 +164,8 @@ static int decode_frame_header(ProresContext *ctx, const uint8_t *buf, | |||
| if (ctx->frame_type) { /* if interlaced */ | |||
| ctx->picture.interlaced_frame = 1; | |||
| ctx->picture.top_field_first = ctx->frame_type & 1; | |||
| } else { | |||
| ctx->picture.interlaced_frame = 0; | |||
| } | |||
| avctx->color_primaries = buf[14]; | |||
| @@ -1570,7 +1570,7 @@ static void render_slice(Vp3DecodeContext *s, int slice) | |||
| motion_source, stride, 8); | |||
| }else{ | |||
| int d= (motion_x ^ motion_y)>>31; // d is 0 if motion_x and _y have the same sign, else -1 | |||
| s->dsp.put_no_rnd_pixels_l2( | |||
| s->vp3dsp.put_no_rnd_pixels_l2( | |||
| output_plane + first_pixel, | |||
| motion_source - d, | |||
| motion_source + stride + 1 + d, | |||
| @@ -274,8 +274,27 @@ static void vp3_h_loop_filter_c(uint8_t *first_pixel, int stride, | |||
| } | |||
| } | |||
| static void put_no_rnd_pixels_l2(uint8_t *dst, const uint8_t *src1, | |||
| const uint8_t *src2, ptrdiff_t stride, int h) | |||
| { | |||
| int i; | |||
| for (i = 0; i < h; i++) { | |||
| uint32_t a, b; | |||
| a = AV_RN32A(&src1[i * stride]); | |||
| b = AV_RN32A(&src2[i * stride]); | |||
| AV_WN32A(&dst[i * stride], no_rnd_avg32(a, b)); | |||
| a = AV_RN32A(&src1[i * stride + 4]); | |||
| b = AV_RN32A(&src2[i * stride + 4]); | |||
| AV_WN32A(&dst[i * stride + 4], no_rnd_avg32(a, b)); | |||
| } | |||
| } | |||
| av_cold void ff_vp3dsp_init(VP3DSPContext *c, int flags) | |||
| { | |||
| c->put_no_rnd_pixels_l2 = put_no_rnd_pixels_l2; | |||
| c->idct_put = vp3_idct_put_c; | |||
| c->idct_add = vp3_idct_add_c; | |||
| c->idct_dc_add = vp3_idct_dc_add_c; | |||
| @@ -19,10 +19,26 @@ | |||
| #ifndef AVCODEC_VP3DSP_H | |||
| #define AVCODEC_VP3DSP_H | |||
| #include <stddef.h> | |||
| #include <stdint.h> | |||
| #include "dsputil.h" | |||
| typedef struct VP3DSPContext { | |||
| /** | |||
| * Copy 8xH pixels from source to destination buffer using a bilinear | |||
| * filter with no rounding (i.e. *dst = (*a + *b) >> 1). | |||
| * | |||
| * @param dst destination buffer, aligned by 8 | |||
| * @param a first source buffer, no alignment | |||
| * @param b second source buffer, no alignment | |||
| * @param stride distance between two lines in source/dest buffers | |||
| * @param h height | |||
| */ | |||
| void (*put_no_rnd_pixels_l2)(uint8_t *dst, | |||
| const uint8_t *a, | |||
| const uint8_t *b, | |||
| ptrdiff_t stride, int h); | |||
| void (*idct_put)(uint8_t *dest, int line_size, DCTELEM *block); | |||
| void (*idct_add)(uint8_t *dest, int line_size, DCTELEM *block); | |||
| void (*idct_dc_add)(uint8_t *dest, int line_size, DCTELEM *block); | |||
| @@ -373,9 +373,9 @@ static void vp56_mc(VP56Context *s, int b, int plane, uint8_t *src, | |||
| s->filter(s, dst, src_block, src_offset, src_offset+overlap_offset, | |||
| stride, s->mv[b], mask, s->filter_selection, b<4); | |||
| else | |||
| s->dsp.put_no_rnd_pixels_l2(dst, src_block+src_offset, | |||
| src_block+src_offset+overlap_offset, | |||
| stride, 8); | |||
| s->vp3dsp.put_no_rnd_pixels_l2(dst, src_block+src_offset, | |||
| src_block+src_offset+overlap_offset, | |||
| stride, 8); | |||
| } else { | |||
| s->dsp.put_pixels_tab[1][0](dst, src_block+src_offset, stride, 8); | |||
| } | |||
| @@ -1839,39 +1839,6 @@ void ff_avg_vc1_mspel_mc00_mmxext(uint8_t *dst, const uint8_t *src, | |||
| avg_pixels8_mmxext(dst, src, stride, 8); | |||
| } | |||
| /* only used in VP3/5/6 */ | |||
| static void put_vp_no_rnd_pixels8_l2_mmx(uint8_t *dst, const uint8_t *a, const uint8_t *b, int stride, int h) | |||
| { | |||
| // START_TIMER | |||
| MOVQ_BFE(mm6); | |||
| __asm__ volatile( | |||
| "1: \n\t" | |||
| "movq (%1), %%mm0 \n\t" | |||
| "movq (%2), %%mm1 \n\t" | |||
| "movq (%1,%4), %%mm2 \n\t" | |||
| "movq (%2,%4), %%mm3 \n\t" | |||
| PAVGBP_MMX_NO_RND(%%mm0, %%mm1, %%mm4, %%mm2, %%mm3, %%mm5) | |||
| "movq %%mm4, (%3) \n\t" | |||
| "movq %%mm5, (%3,%4) \n\t" | |||
| "movq (%1,%4,2), %%mm0 \n\t" | |||
| "movq (%2,%4,2), %%mm1 \n\t" | |||
| "movq (%1,%5), %%mm2 \n\t" | |||
| "movq (%2,%5), %%mm3 \n\t" | |||
| "lea (%1,%4,4), %1 \n\t" | |||
| "lea (%2,%4,4), %2 \n\t" | |||
| PAVGBP_MMX_NO_RND(%%mm0, %%mm1, %%mm4, %%mm2, %%mm3, %%mm5) | |||
| "movq %%mm4, (%3,%4,2) \n\t" | |||
| "movq %%mm5, (%3,%5) \n\t" | |||
| "lea (%3,%4,4), %3 \n\t" | |||
| "subl $4, %0 \n\t" | |||
| "jnz 1b \n\t" | |||
| :"+r"(h), "+r"(a), "+r"(b), "+r"(dst) | |||
| :"r"((x86_reg)stride), "r"((x86_reg)3L*stride) | |||
| :"memory"); | |||
| // STOP_TIMER("put_vp_no_rnd_pixels8_l2_mmx") | |||
| } | |||
| #if CONFIG_DIRAC_DECODER | |||
| #define DIRAC_PIXOP(OPNAME, EXT)\ | |||
| void ff_ ## OPNAME ## _dirac_pixels8_ ## EXT(uint8_t *dst, const uint8_t *src[5], int stride, int h)\ | |||
| @@ -2115,8 +2082,6 @@ static void dsputil_init_mmx(DSPContext *c, AVCodecContext *avctx, int mm_flags) | |||
| c->add_bytes = add_bytes_mmx; | |||
| c->put_no_rnd_pixels_l2= put_vp_no_rnd_pixels8_l2_mmx; | |||
| if (CONFIG_H263_DECODER || CONFIG_H263_ENCODER) { | |||
| c->h263_v_loop_filter = h263_v_loop_filter_mmx; | |||
| c->h263_h_loop_filter = h263_h_loop_filter_mmx; | |||
| @@ -1,4 +1,6 @@ | |||
| /* | |||
| * Copyright (c) 2009 David Conrad <lessen42@gmail.com> | |||
| * | |||
| * This file is part of FFmpeg. | |||
| * | |||
| * FFmpeg is free software; you can redistribute it and/or | |||
| @@ -21,6 +23,7 @@ | |||
| #include "libavutil/attributes.h" | |||
| #include "libavutil/cpu.h" | |||
| #include "libavutil/x86/cpu.h" | |||
| #include "libavutil/x86/asm.h" | |||
| #include "libavcodec/avcodec.h" | |||
| #include "libavcodec/vp3dsp.h" | |||
| #include "config.h" | |||
| @@ -39,10 +42,68 @@ void ff_vp3_v_loop_filter_mmxext(uint8_t *src, int stride, | |||
| void ff_vp3_h_loop_filter_mmxext(uint8_t *src, int stride, | |||
| int *bounding_values); | |||
| #if HAVE_INLINE_ASM | |||
| #define MOVQ_BFE(regd) \ | |||
| __asm__ volatile ( \ | |||
| "pcmpeqd %%"#regd", %%"#regd" \n\t" \ | |||
| "paddb %%"#regd", %%"#regd" \n\t" ::) | |||
| #define PAVGBP_MMX_NO_RND(rega, regb, regr, regc, regd, regp) \ | |||
| "movq "#rega", "#regr" \n\t" \ | |||
| "movq "#regc", "#regp" \n\t" \ | |||
| "pand "#regb", "#regr" \n\t" \ | |||
| "pand "#regd", "#regp" \n\t" \ | |||
| "pxor "#rega", "#regb" \n\t" \ | |||
| "pxor "#regc", "#regd" \n\t" \ | |||
| "pand %%mm6, "#regb" \n\t" \ | |||
| "pand %%mm6, "#regd" \n\t" \ | |||
| "psrlq $1, "#regb" \n\t" \ | |||
| "psrlq $1, "#regd" \n\t" \ | |||
| "paddb "#regb", "#regr" \n\t" \ | |||
| "paddb "#regd", "#regp" \n\t" | |||
| static void put_vp_no_rnd_pixels8_l2_mmx(uint8_t *dst, const uint8_t *a, const uint8_t *b, int stride, int h) | |||
| { | |||
| // START_TIMER | |||
| MOVQ_BFE(mm6); | |||
| __asm__ volatile( | |||
| "1: \n\t" | |||
| "movq (%1), %%mm0 \n\t" | |||
| "movq (%2), %%mm1 \n\t" | |||
| "movq (%1,%4), %%mm2 \n\t" | |||
| "movq (%2,%4), %%mm3 \n\t" | |||
| PAVGBP_MMX_NO_RND(%%mm0, %%mm1, %%mm4, %%mm2, %%mm3, %%mm5) | |||
| "movq %%mm4, (%3) \n\t" | |||
| "movq %%mm5, (%3,%4) \n\t" | |||
| "movq (%1,%4,2), %%mm0 \n\t" | |||
| "movq (%2,%4,2), %%mm1 \n\t" | |||
| "movq (%1,%5), %%mm2 \n\t" | |||
| "movq (%2,%5), %%mm3 \n\t" | |||
| "lea (%1,%4,4), %1 \n\t" | |||
| "lea (%2,%4,4), %2 \n\t" | |||
| PAVGBP_MMX_NO_RND(%%mm0, %%mm1, %%mm4, %%mm2, %%mm3, %%mm5) | |||
| "movq %%mm4, (%3,%4,2) \n\t" | |||
| "movq %%mm5, (%3,%5) \n\t" | |||
| "lea (%3,%4,4), %3 \n\t" | |||
| "subl $4, %0 \n\t" | |||
| "jnz 1b \n\t" | |||
| :"+r"(h), "+r"(a), "+r"(b), "+r"(dst) | |||
| :"r"((x86_reg)stride), "r"((x86_reg)3L*stride) | |||
| :"memory"); | |||
| // STOP_TIMER("put_vp_no_rnd_pixels8_l2_mmx") | |||
| } | |||
| #endif /* HAVE_INLINE_ASM */ | |||
| av_cold void ff_vp3dsp_init_x86(VP3DSPContext *c, int flags) | |||
| { | |||
| int cpuflags = av_get_cpu_flags(); | |||
| #if HAVE_INLINE_ASM | |||
| c->put_no_rnd_pixels_l2 = put_vp_no_rnd_pixels8_l2_mmx; | |||
| #endif /* HAVE_INLINE_ASM */ | |||
| #if ARCH_X86_32 | |||
| if (EXTERNAL_MMX(cpuflags)) { | |||
| c->idct_put = ff_vp3_idct_put_mmx; | |||