This moves all VP3-specific function pointers from dsputil to a new vp3dsp context. There is no reason to ever use the VP3 IDCT where an MPEG2 IDCT is expected or vice versa. Signed-off-by: Mans Rullgard <mans@mansr.com>tags/n1.0
| @@ -11,6 +11,7 @@ ARMV6-OBJS-$(CONFIG_AC3DSP) += arm/ac3dsp_armv6.o | |||||
| OBJS-$(CONFIG_MPEGAUDIODSP) += arm/mpegaudiodsp_init_arm.o | OBJS-$(CONFIG_MPEGAUDIODSP) += arm/mpegaudiodsp_init_arm.o | ||||
| ARMV6-OBJS-$(CONFIG_MPEGAUDIODSP) += arm/mpegaudiodsp_fixed_armv6.o | ARMV6-OBJS-$(CONFIG_MPEGAUDIODSP) += arm/mpegaudiodsp_fixed_armv6.o | ||||
| OBJS-$(CONFIG_VP3DSP) += arm/vp3dsp_init_arm.o | |||||
| OBJS-$(CONFIG_VP5_DECODER) += arm/vp56dsp_init_arm.o | OBJS-$(CONFIG_VP5_DECODER) += arm/vp56dsp_init_arm.o | ||||
| OBJS-$(CONFIG_VP6_DECODER) += arm/vp56dsp_init_arm.o | OBJS-$(CONFIG_VP6_DECODER) += arm/vp56dsp_init_arm.o | ||||
| OBJS-$(CONFIG_VP8_DECODER) += arm/vp8dsp_init_arm.o | OBJS-$(CONFIG_VP8_DECODER) += arm/vp8dsp_init_arm.o | ||||
| @@ -29,11 +29,6 @@ void ff_simple_idct_neon(DCTELEM *data); | |||||
| void ff_simple_idct_put_neon(uint8_t *dest, int line_size, DCTELEM *data); | void ff_simple_idct_put_neon(uint8_t *dest, int line_size, DCTELEM *data); | ||||
| void ff_simple_idct_add_neon(uint8_t *dest, int line_size, DCTELEM *data); | void ff_simple_idct_add_neon(uint8_t *dest, int line_size, DCTELEM *data); | ||||
| void ff_vp3_idct_neon(DCTELEM *data); | |||||
| void ff_vp3_idct_put_neon(uint8_t *dest, int line_size, DCTELEM *data); | |||||
| void ff_vp3_idct_add_neon(uint8_t *dest, int line_size, DCTELEM *data); | |||||
| void ff_vp3_idct_dc_add_neon(uint8_t *dest, int line_size, const DCTELEM *data); | |||||
| void ff_clear_block_neon(DCTELEM *block); | void ff_clear_block_neon(DCTELEM *block); | ||||
| void ff_clear_blocks_neon(DCTELEM *blocks); | void ff_clear_blocks_neon(DCTELEM *blocks); | ||||
| @@ -147,9 +142,6 @@ void ff_avg_h264_chroma_mc8_neon(uint8_t *, uint8_t *, int, int, int, int); | |||||
| void ff_avg_h264_chroma_mc4_neon(uint8_t *, uint8_t *, int, int, int, int); | void ff_avg_h264_chroma_mc4_neon(uint8_t *, uint8_t *, int, int, int, int); | ||||
| void ff_avg_h264_chroma_mc2_neon(uint8_t *, uint8_t *, int, int, int, int); | void ff_avg_h264_chroma_mc2_neon(uint8_t *, uint8_t *, int, int, int, int); | ||||
| void ff_vp3_v_loop_filter_neon(uint8_t *, int, int *); | |||||
| void ff_vp3_h_loop_filter_neon(uint8_t *, int, int *); | |||||
| void ff_vector_fmul_window_neon(float *dst, const float *src0, | void ff_vector_fmul_window_neon(float *dst, const float *src0, | ||||
| const float *src1, const float *win, int len); | const float *src1, const float *win, int len); | ||||
| void ff_vector_fmul_scalar_neon(float *dst, const float *src, float mul, | void ff_vector_fmul_scalar_neon(float *dst, const float *src, float mul, | ||||
| @@ -186,13 +178,6 @@ void ff_dsputil_init_neon(DSPContext *c, AVCodecContext *avctx) | |||||
| c->idct_add = ff_simple_idct_add_neon; | c->idct_add = ff_simple_idct_add_neon; | ||||
| c->idct = ff_simple_idct_neon; | c->idct = ff_simple_idct_neon; | ||||
| c->idct_permutation_type = FF_PARTTRANS_IDCT_PERM; | c->idct_permutation_type = FF_PARTTRANS_IDCT_PERM; | ||||
| } else if ((CONFIG_VP3_DECODER || CONFIG_VP5_DECODER || | |||||
| CONFIG_VP6_DECODER) && | |||||
| avctx->idct_algo == FF_IDCT_VP3) { | |||||
| c->idct_put = ff_vp3_idct_put_neon; | |||||
| c->idct_add = ff_vp3_idct_add_neon; | |||||
| c->idct = ff_vp3_idct_neon; | |||||
| c->idct_permutation_type = FF_TRANSPOSE_IDCT_PERM; | |||||
| } | } | ||||
| } | } | ||||
| @@ -319,12 +304,6 @@ void ff_dsputil_init_neon(DSPContext *c, AVCodecContext *avctx) | |||||
| c->avg_h264_qpel_pixels_tab[1][15] = ff_avg_h264_qpel8_mc33_neon; | c->avg_h264_qpel_pixels_tab[1][15] = ff_avg_h264_qpel8_mc33_neon; | ||||
| } | } | ||||
| if (CONFIG_VP3_DECODER) { | |||||
| c->vp3_v_loop_filter = ff_vp3_v_loop_filter_neon; | |||||
| c->vp3_h_loop_filter = ff_vp3_h_loop_filter_neon; | |||||
| c->vp3_idct_dc_add = ff_vp3_idct_dc_add_neon; | |||||
| } | |||||
| c->vector_fmul_window = ff_vector_fmul_window_neon; | c->vector_fmul_window = ff_vector_fmul_window_neon; | ||||
| c->vector_fmul_scalar = ff_vector_fmul_scalar_neon; | c->vector_fmul_scalar = ff_vector_fmul_scalar_neon; | ||||
| c->butterflies_float = ff_butterflies_float_neon; | c->butterflies_float = ff_butterflies_float_neon; | ||||
| @@ -0,0 +1,45 @@ | |||||
| /* | |||||
| * This file is part of Libav. | |||||
| * | |||||
| * Libav is free software; you can redistribute it and/or | |||||
| * modify it under the terms of the GNU Lesser General Public | |||||
| * License as published by the Free Software Foundation; either | |||||
| * version 2.1 of the License, or (at your option) any later version. | |||||
| * | |||||
| * Libav is distributed in the hope that it will be useful, | |||||
| * but WITHOUT ANY WARRANTY; without even the implied warranty of | |||||
| * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU | |||||
| * Lesser General Public License for more details. | |||||
| * | |||||
| * You should have received a copy of the GNU Lesser General Public | |||||
| * License along with Libav; if not, write to the Free Software | |||||
| * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA | |||||
| */ | |||||
| #include <stdint.h> | |||||
| #include "libavutil/attributes.h" | |||||
| #include "libavutil/cpu.h" | |||||
| #include "libavutil/arm/cpu.h" | |||||
| #include "libavcodec/vp3dsp.h" | |||||
| void ff_vp3_idct_put_neon(uint8_t *dest, int line_size, DCTELEM *data); | |||||
| void ff_vp3_idct_add_neon(uint8_t *dest, int line_size, DCTELEM *data); | |||||
| void ff_vp3_idct_dc_add_neon(uint8_t *dest, int line_size, const DCTELEM *data); | |||||
| void ff_vp3_v_loop_filter_neon(uint8_t *, int, int *); | |||||
| void ff_vp3_h_loop_filter_neon(uint8_t *, int, int *); | |||||
| av_cold void ff_vp3dsp_init_arm(VP3DSPContext *c, int flags) | |||||
| { | |||||
| int cpu_flags = av_get_cpu_flags(); | |||||
| if (have_neon(cpu_flags)) { | |||||
| c->idct_put = ff_vp3_idct_put_neon; | |||||
| c->idct_add = ff_vp3_idct_add_neon; | |||||
| c->idct_dc_add = ff_vp3_idct_dc_add_neon; | |||||
| c->v_loop_filter = ff_vp3_v_loop_filter_neon; | |||||
| c->h_loop_filter = ff_vp3_h_loop_filter_neon; | |||||
| c->idct_perm = FF_TRANSPOSE_IDCT_PERM; | |||||
| } | |||||
| } | |||||
| @@ -260,32 +260,6 @@ endfunc | |||||
| VP3_IDCT_END row | VP3_IDCT_END row | ||||
| VP3_IDCT_END col | VP3_IDCT_END col | ||||
| function ff_vp3_idct_neon, export=1 | |||||
| mov ip, lr | |||||
| mov r2, r0 | |||||
| bl vp3_idct_start_neon | |||||
| bl vp3_idct_end_row_neon | |||||
| mov r3, #8 | |||||
| bl vp3_idct_core_neon | |||||
| bl vp3_idct_end_col_neon | |||||
| mov lr, ip | |||||
| vpop {d8-d15} | |||||
| vshr.s16 q8, q8, #4 | |||||
| vshr.s16 q9, q9, #4 | |||||
| vshr.s16 q10, q10, #4 | |||||
| vshr.s16 q11, q11, #4 | |||||
| vshr.s16 q12, q12, #4 | |||||
| vst1.64 {d16-d19}, [r0,:128]! | |||||
| vshr.s16 q13, q13, #4 | |||||
| vshr.s16 q14, q14, #4 | |||||
| vst1.64 {d20-d23}, [r0,:128]! | |||||
| vshr.s16 q15, q15, #4 | |||||
| vst1.64 {d24-d27}, [r0,:128]! | |||||
| vst1.64 {d28-d31}, [r0,:128]! | |||||
| bx lr | |||||
| endfunc | |||||
| function ff_vp3_idct_put_neon, export=1 | function ff_vp3_idct_put_neon, export=1 | ||||
| mov ip, lr | mov ip, lr | ||||
| bl vp3_idct_start_neon | bl vp3_idct_start_neon | ||||
| @@ -2701,12 +2701,6 @@ av_cold void ff_dsputil_init(DSPContext* c, AVCodecContext *avctx) | |||||
| c->idct_add= ff_jref_idct_add; | c->idct_add= ff_jref_idct_add; | ||||
| c->idct = ff_j_rev_dct; | c->idct = ff_j_rev_dct; | ||||
| c->idct_permutation_type= FF_LIBMPEG2_IDCT_PERM; | c->idct_permutation_type= FF_LIBMPEG2_IDCT_PERM; | ||||
| }else if((CONFIG_VP3_DECODER || CONFIG_VP5_DECODER || CONFIG_VP6_DECODER ) && | |||||
| avctx->idct_algo==FF_IDCT_VP3){ | |||||
| c->idct_put= ff_vp3_idct_put_c; | |||||
| c->idct_add= ff_vp3_idct_add_c; | |||||
| c->idct = ff_vp3_idct_c; | |||||
| c->idct_permutation_type= FF_NO_IDCT_PERM; | |||||
| }else if(avctx->idct_algo==FF_IDCT_WMV2){ | }else if(avctx->idct_algo==FF_IDCT_WMV2){ | ||||
| c->idct_put= ff_wmv2_idct_put_c; | c->idct_put= ff_wmv2_idct_put_c; | ||||
| c->idct_add= ff_wmv2_idct_add_c; | c->idct_add= ff_wmv2_idct_add_c; | ||||
| @@ -2867,12 +2861,6 @@ av_cold void ff_dsputil_init(DSPContext* c, AVCodecContext *avctx) | |||||
| c->h263_v_loop_filter= h263_v_loop_filter_c; | c->h263_v_loop_filter= h263_v_loop_filter_c; | ||||
| } | } | ||||
| if (CONFIG_VP3_DECODER) { | |||||
| c->vp3_h_loop_filter= ff_vp3_h_loop_filter_c; | |||||
| c->vp3_v_loop_filter= ff_vp3_v_loop_filter_c; | |||||
| c->vp3_idct_dc_add= ff_vp3_idct_dc_add_c; | |||||
| } | |||||
| c->h261_loop_filter= h261_loop_filter_c; | c->h261_loop_filter= h261_loop_filter_c; | ||||
| c->try_8x8basis= try_8x8basis_c; | c->try_8x8basis= try_8x8basis_c; | ||||
| @@ -101,15 +101,6 @@ PUTAVG_PIXELS(10) | |||||
| #define ff_put_pixels16x16_c ff_put_pixels16x16_8_c | #define ff_put_pixels16x16_c ff_put_pixels16x16_8_c | ||||
| #define ff_avg_pixels16x16_c ff_avg_pixels16x16_8_c | #define ff_avg_pixels16x16_c ff_avg_pixels16x16_8_c | ||||
| /* VP3 DSP functions */ | |||||
| void ff_vp3_idct_c(DCTELEM *block/* align 16*/); | |||||
| void ff_vp3_idct_put_c(uint8_t *dest/*align 8*/, int line_size, DCTELEM *block/*align 16*/); | |||||
| void ff_vp3_idct_add_c(uint8_t *dest/*align 8*/, int line_size, DCTELEM *block/*align 16*/); | |||||
| void ff_vp3_idct_dc_add_c(uint8_t *dest/*align 8*/, int line_size, const DCTELEM *block/*align 16*/); | |||||
| void ff_vp3_v_loop_filter_c(uint8_t *src, int stride, int *bounding_values); | |||||
| void ff_vp3_h_loop_filter_c(uint8_t *src, int stride, int *bounding_values); | |||||
| /* EA functions */ | /* EA functions */ | ||||
| void ff_ea_idct_put_c(uint8_t *dest, int linesize, DCTELEM *block); | void ff_ea_idct_put_c(uint8_t *dest, int linesize, DCTELEM *block); | ||||
| @@ -391,10 +382,6 @@ typedef struct DSPContext { | |||||
| void (*x8_v_loop_filter)(uint8_t *src, int stride, int qscale); | void (*x8_v_loop_filter)(uint8_t *src, int stride, int qscale); | ||||
| void (*x8_h_loop_filter)(uint8_t *src, int stride, int qscale); | void (*x8_h_loop_filter)(uint8_t *src, int stride, int qscale); | ||||
| void (*vp3_idct_dc_add)(uint8_t *dest/*align 8*/, int line_size, const DCTELEM *block/*align 16*/); | |||||
| void (*vp3_v_loop_filter)(uint8_t *src, int stride, int *bounding_values); | |||||
| void (*vp3_h_loop_filter)(uint8_t *src, int stride, int *bounding_values); | |||||
| /* assume len is a multiple of 4, and arrays are 16-byte aligned */ | /* assume len is a multiple of 4, and arrays are 16-byte aligned */ | ||||
| void (*vorbis_inverse_coupling)(float *mag, float *ang, int blocksize); | void (*vorbis_inverse_coupling)(float *mag, float *ang, int blocksize); | ||||
| void (*ac3_downmix)(float (*samples)[256], float (*matrix)[2], int out_ch, int in_ch, int len); | void (*ac3_downmix)(float (*samples)[256], float (*matrix)[2], int out_ch, int in_ch, int len); | ||||
| @@ -1,14 +1,13 @@ | |||||
| OBJS += ppc/dsputil_ppc.o \ | OBJS += ppc/dsputil_ppc.o \ | ||||
| OBJS-$(CONFIG_VP3DSP) += ppc/vp3dsp_altivec.o | |||||
| FFT-OBJS-$(HAVE_GNU_AS) += ppc/fft_altivec_s.o | FFT-OBJS-$(HAVE_GNU_AS) += ppc/fft_altivec_s.o | ||||
| ALTIVEC-OBJS-$(CONFIG_FFT) += ppc/fft_altivec.o \ | ALTIVEC-OBJS-$(CONFIG_FFT) += ppc/fft_altivec.o \ | ||||
| $(FFT-OBJS-yes) | $(FFT-OBJS-yes) | ||||
| ALTIVEC-OBJS-$(CONFIG_H264DSP) += ppc/h264_altivec.o | ALTIVEC-OBJS-$(CONFIG_H264DSP) += ppc/h264_altivec.o | ||||
| ALTIVEC-OBJS-$(CONFIG_MPEGAUDIODSP) += ppc/mpegaudiodec_altivec.o | ALTIVEC-OBJS-$(CONFIG_MPEGAUDIODSP) += ppc/mpegaudiodec_altivec.o | ||||
| ALTIVEC-OBJS-$(CONFIG_VC1_DECODER) += ppc/vc1dsp_altivec.o | ALTIVEC-OBJS-$(CONFIG_VC1_DECODER) += ppc/vc1dsp_altivec.o | ||||
| ALTIVEC-OBJS-$(CONFIG_VP3_DECODER) += ppc/vp3dsp_altivec.o | |||||
| ALTIVEC-OBJS-$(CONFIG_VP5_DECODER) += ppc/vp3dsp_altivec.o | |||||
| ALTIVEC-OBJS-$(CONFIG_VP6_DECODER) += ppc/vp3dsp_altivec.o | |||||
| ALTIVEC-OBJS-$(CONFIG_VP8_DECODER) += ppc/vp8dsp_altivec.o | ALTIVEC-OBJS-$(CONFIG_VP8_DECODER) += ppc/vp8dsp_altivec.o | ||||
| ALTIVEC-OBJS += ppc/dsputil_altivec.o \ | ALTIVEC-OBJS += ppc/dsputil_altivec.o \ | ||||
| @@ -36,10 +36,6 @@ void ff_gmc1_altivec(uint8_t *dst, uint8_t *src, int stride, int h, | |||||
| void ff_idct_put_altivec(uint8_t *dest, int line_size, int16_t *block); | void ff_idct_put_altivec(uint8_t *dest, int line_size, int16_t *block); | ||||
| void ff_idct_add_altivec(uint8_t *dest, int line_size, int16_t *block); | void ff_idct_add_altivec(uint8_t *dest, int line_size, int16_t *block); | ||||
| void ff_vp3_idct_altivec(DCTELEM *block); | |||||
| void ff_vp3_idct_put_altivec(uint8_t *dest, int line_size, DCTELEM *block); | |||||
| void ff_vp3_idct_add_altivec(uint8_t *dest, int line_size, DCTELEM *block); | |||||
| void ff_dsputil_h264_init_ppc(DSPContext* c, AVCodecContext *avctx); | void ff_dsputil_h264_init_ppc(DSPContext* c, AVCodecContext *avctx); | ||||
| void ff_dsputil_init_altivec(DSPContext* c, AVCodecContext *avctx); | void ff_dsputil_init_altivec(DSPContext* c, AVCodecContext *avctx); | ||||
| @@ -185,12 +185,6 @@ void ff_dsputil_init_ppc(DSPContext* c, AVCodecContext *avctx) | |||||
| c->idct_put = ff_idct_put_altivec; | c->idct_put = ff_idct_put_altivec; | ||||
| c->idct_add = ff_idct_add_altivec; | c->idct_add = ff_idct_add_altivec; | ||||
| c->idct_permutation_type = FF_TRANSPOSE_IDCT_PERM; | c->idct_permutation_type = FF_TRANSPOSE_IDCT_PERM; | ||||
| }else if((CONFIG_VP3_DECODER || CONFIG_VP5_DECODER || CONFIG_VP6_DECODER) && | |||||
| avctx->idct_algo==FF_IDCT_VP3){ | |||||
| c->idct_put = ff_vp3_idct_put_altivec; | |||||
| c->idct_add = ff_vp3_idct_add_altivec; | |||||
| c->idct = ff_vp3_idct_altivec; | |||||
| c->idct_permutation_type = FF_TRANSPOSE_IDCT_PERM; | |||||
| } | } | ||||
| } | } | ||||
| @@ -18,6 +18,13 @@ | |||||
| * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA | * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA | ||||
| */ | */ | ||||
| #include "config.h" | |||||
| #include "libavutil/attributes.h" | |||||
| #include "libavutil/cpu.h" | |||||
| #include "libavcodec/vp3dsp.h" | |||||
| #if HAVE_ALTIVEC | |||||
| #include "libavutil/ppc/types_altivec.h" | #include "libavutil/ppc/types_altivec.h" | ||||
| #include "libavutil/ppc/util_altivec.h" | #include "libavutil/ppc/util_altivec.h" | ||||
| #include "libavcodec/dsputil.h" | #include "libavcodec/dsputil.h" | ||||
| @@ -107,25 +114,7 @@ static inline vec_s16 M16(vec_s16 a, vec_s16 C) | |||||
| #define ADD8(a) vec_add(a, eight) | #define ADD8(a) vec_add(a, eight) | ||||
| #define SHIFT4(a) vec_sra(a, four) | #define SHIFT4(a) vec_sra(a, four) | ||||
| void ff_vp3_idct_altivec(DCTELEM block[64]) | |||||
| { | |||||
| IDCT_START | |||||
| IDCT_1D(NOP, NOP) | |||||
| TRANSPOSE8(b0, b1, b2, b3, b4, b5, b6, b7); | |||||
| IDCT_1D(ADD8, SHIFT4) | |||||
| vec_st(b0, 0x00, block); | |||||
| vec_st(b1, 0x10, block); | |||||
| vec_st(b2, 0x20, block); | |||||
| vec_st(b3, 0x30, block); | |||||
| vec_st(b4, 0x40, block); | |||||
| vec_st(b5, 0x50, block); | |||||
| vec_st(b6, 0x60, block); | |||||
| vec_st(b7, 0x70, block); | |||||
| } | |||||
| void ff_vp3_idct_put_altivec(uint8_t *dst, int stride, DCTELEM block[64]) | |||||
| static void vp3_idct_put_altivec(uint8_t *dst, int stride, DCTELEM block[64]) | |||||
| { | { | ||||
| vec_u8 t; | vec_u8 t; | ||||
| IDCT_START | IDCT_START | ||||
| @@ -153,7 +142,7 @@ void ff_vp3_idct_put_altivec(uint8_t *dst, int stride, DCTELEM block[64]) | |||||
| PUT(b7) | PUT(b7) | ||||
| } | } | ||||
| void ff_vp3_idct_add_altivec(uint8_t *dst, int stride, DCTELEM block[64]) | |||||
| static void vp3_idct_add_altivec(uint8_t *dst, int stride, DCTELEM block[64]) | |||||
| { | { | ||||
| LOAD_ZERO; | LOAD_ZERO; | ||||
| vec_u8 t, vdst; | vec_u8 t, vdst; | ||||
| @@ -183,3 +172,14 @@ void ff_vp3_idct_add_altivec(uint8_t *dst, int stride, DCTELEM block[64]) | |||||
| ADD(b6) dst += stride; | ADD(b6) dst += stride; | ||||
| ADD(b7) | ADD(b7) | ||||
| } | } | ||||
| #endif /* HAVE_ALTIVEC */ | |||||
| av_cold void ff_vp3dsp_init_ppc(VP3DSPContext *c, int flags) | |||||
| { | |||||
| if (HAVE_ALTIVEC && av_get_cpu_flags() & AV_CPU_FLAG_ALTIVEC) { | |||||
| c->idct_put = vp3_idct_put_altivec; | |||||
| c->idct_add = vp3_idct_add_altivec; | |||||
| c->idct_perm = FF_TRANSPOSE_IDCT_PERM; | |||||
| } | |||||
| } | |||||
| @@ -40,6 +40,7 @@ | |||||
| #include "get_bits.h" | #include "get_bits.h" | ||||
| #include "vp3data.h" | #include "vp3data.h" | ||||
| #include "vp3dsp.h" | |||||
| #include "xiph.h" | #include "xiph.h" | ||||
| #include "thread.h" | #include "thread.h" | ||||
| @@ -135,6 +136,7 @@ typedef struct Vp3DecodeContext { | |||||
| AVFrame current_frame; | AVFrame current_frame; | ||||
| int keyframe; | int keyframe; | ||||
| DSPContext dsp; | DSPContext dsp; | ||||
| VP3DSPContext vp3dsp; | |||||
| int flipped_image; | int flipped_image; | ||||
| int last_slice_end; | int last_slice_end; | ||||
| int skip_loop_filter; | int skip_loop_filter; | ||||
| @@ -1302,14 +1304,14 @@ static void apply_loop_filter(Vp3DecodeContext *s, int plane, int ystart, int ye | |||||
| { | { | ||||
| /* do not perform left edge filter for left columns frags */ | /* do not perform left edge filter for left columns frags */ | ||||
| if (x > 0) { | if (x > 0) { | ||||
| s->dsp.vp3_h_loop_filter( | |||||
| s->vp3dsp.h_loop_filter( | |||||
| plane_data + 8*x, | plane_data + 8*x, | ||||
| stride, bounding_values); | stride, bounding_values); | ||||
| } | } | ||||
| /* do not perform top edge filter for top row fragments */ | /* do not perform top edge filter for top row fragments */ | ||||
| if (y > 0) { | if (y > 0) { | ||||
| s->dsp.vp3_v_loop_filter( | |||||
| s->vp3dsp.v_loop_filter( | |||||
| plane_data + 8*x, | plane_data + 8*x, | ||||
| stride, bounding_values); | stride, bounding_values); | ||||
| } | } | ||||
| @@ -1319,7 +1321,7 @@ static void apply_loop_filter(Vp3DecodeContext *s, int plane, int ystart, int ye | |||||
| * in this frame (it will be filtered in next iteration) */ | * in this frame (it will be filtered in next iteration) */ | ||||
| if ((x < width - 1) && | if ((x < width - 1) && | ||||
| (s->all_fragments[fragment + 1].coding_method == MODE_COPY)) { | (s->all_fragments[fragment + 1].coding_method == MODE_COPY)) { | ||||
| s->dsp.vp3_h_loop_filter( | |||||
| s->vp3dsp.h_loop_filter( | |||||
| plane_data + 8*x + 8, | plane_data + 8*x + 8, | ||||
| stride, bounding_values); | stride, bounding_values); | ||||
| } | } | ||||
| @@ -1329,7 +1331,7 @@ static void apply_loop_filter(Vp3DecodeContext *s, int plane, int ystart, int ye | |||||
| * in this frame (it will be filtered in the next row) */ | * in this frame (it will be filtered in the next row) */ | ||||
| if ((y < height - 1) && | if ((y < height - 1) && | ||||
| (s->all_fragments[fragment + width].coding_method == MODE_COPY)) { | (s->all_fragments[fragment + width].coding_method == MODE_COPY)) { | ||||
| s->dsp.vp3_v_loop_filter( | |||||
| s->vp3dsp.v_loop_filter( | |||||
| plane_data + 8*x + 8*stride, | plane_data + 8*x + 8*stride, | ||||
| stride, bounding_values); | stride, bounding_values); | ||||
| } | } | ||||
| @@ -1577,9 +1579,7 @@ static void render_slice(Vp3DecodeContext *s, int slice) | |||||
| index = vp3_dequant(s, s->all_fragments + i, plane, 0, block); | index = vp3_dequant(s, s->all_fragments + i, plane, 0, block); | ||||
| if (index > 63) | if (index > 63) | ||||
| continue; | continue; | ||||
| if(s->avctx->idct_algo!=FF_IDCT_VP3) | |||||
| block[0] += 128<<3; | |||||
| s->dsp.idct_put( | |||||
| s->vp3dsp.idct_put( | |||||
| output_plane + first_pixel, | output_plane + first_pixel, | ||||
| stride, | stride, | ||||
| block); | block); | ||||
| @@ -1588,12 +1588,12 @@ static void render_slice(Vp3DecodeContext *s, int slice) | |||||
| if (index > 63) | if (index > 63) | ||||
| continue; | continue; | ||||
| if (index > 0) { | if (index > 0) { | ||||
| s->dsp.idct_add( | |||||
| s->vp3dsp.idct_add( | |||||
| output_plane + first_pixel, | output_plane + first_pixel, | ||||
| stride, | stride, | ||||
| block); | block); | ||||
| } else { | } else { | ||||
| s->dsp.vp3_idct_dc_add(output_plane + first_pixel, stride, block); | |||||
| s->vp3dsp.idct_dc_add(output_plane + first_pixel, stride, block); | |||||
| } | } | ||||
| } | } | ||||
| } else { | } else { | ||||
| @@ -1676,10 +1676,10 @@ static av_cold int vp3_decode_init(AVCodecContext *avctx) | |||||
| if (avctx->pix_fmt == PIX_FMT_NONE) | if (avctx->pix_fmt == PIX_FMT_NONE) | ||||
| avctx->pix_fmt = PIX_FMT_YUV420P; | avctx->pix_fmt = PIX_FMT_YUV420P; | ||||
| avctx->chroma_sample_location = AVCHROMA_LOC_CENTER; | avctx->chroma_sample_location = AVCHROMA_LOC_CENTER; | ||||
| if(avctx->idct_algo==FF_IDCT_AUTO) | |||||
| avctx->idct_algo=FF_IDCT_VP3; | |||||
| ff_dsputil_init(&s->dsp, avctx); | ff_dsputil_init(&s->dsp, avctx); | ||||
| ff_vp3dsp_init(&s->vp3dsp, avctx->flags); | |||||
| ff_init_scantable_permutation(s->dsp.idct_permutation, s->vp3dsp.idct_perm); | |||||
| ff_init_scantable(s->dsp.idct_permutation, &s->scantable, ff_zigzag_direct); | ff_init_scantable(s->dsp.idct_permutation, &s->scantable, ff_zigzag_direct); | ||||
| /* initialize to an impossible value which will force a recalculation | /* initialize to an impossible value which will force a recalculation | ||||
| @@ -24,8 +24,10 @@ | |||||
| * source code. | * source code. | ||||
| */ | */ | ||||
| #include "libavutil/attributes.h" | |||||
| #include "avcodec.h" | #include "avcodec.h" | ||||
| #include "dsputil.h" | #include "dsputil.h" | ||||
| #include "vp3dsp.h" | |||||
| #define IdctAdjustBeforeShift 8 | #define IdctAdjustBeforeShift 8 | ||||
| #define xC1S7 64277 | #define xC1S7 64277 | ||||
| @@ -210,19 +212,16 @@ static av_always_inline void idct(uint8_t *dst, int stride, int16_t *input, int | |||||
| } | } | ||||
| } | } | ||||
| void ff_vp3_idct_c(DCTELEM *block/* align 16*/){ | |||||
| idct(NULL, 0, block, 0); | |||||
| } | |||||
| void ff_vp3_idct_put_c(uint8_t *dest/*align 8*/, int line_size, DCTELEM *block/*align 16*/){ | |||||
| static void vp3_idct_put_c(uint8_t *dest/*align 8*/, int line_size, DCTELEM *block/*align 16*/){ | |||||
| idct(dest, line_size, block, 1); | idct(dest, line_size, block, 1); | ||||
| } | } | ||||
| void ff_vp3_idct_add_c(uint8_t *dest/*align 8*/, int line_size, DCTELEM *block/*align 16*/){ | |||||
| static void vp3_idct_add_c(uint8_t *dest/*align 8*/, int line_size, DCTELEM *block/*align 16*/){ | |||||
| idct(dest, line_size, block, 2); | idct(dest, line_size, block, 2); | ||||
| } | } | ||||
| void ff_vp3_idct_dc_add_c(uint8_t *dest/*align 8*/, int line_size, const DCTELEM *block/*align 16*/){ | |||||
| static void vp3_idct_dc_add_c(uint8_t *dest/*align 8*/, int line_size, | |||||
| const DCTELEM *block/*align 16*/){ | |||||
| int i, dc = (block[0] + 15) >> 5; | int i, dc = (block[0] + 15) >> 5; | ||||
| for(i = 0; i < 8; i++){ | for(i = 0; i < 8; i++){ | ||||
| @@ -238,7 +237,8 @@ void ff_vp3_idct_dc_add_c(uint8_t *dest/*align 8*/, int line_size, const DCTELEM | |||||
| } | } | ||||
| } | } | ||||
| void ff_vp3_v_loop_filter_c(uint8_t *first_pixel, int stride, int *bounding_values) | |||||
| static void vp3_v_loop_filter_c(uint8_t *first_pixel, int stride, | |||||
| int *bounding_values) | |||||
| { | { | ||||
| unsigned char *end; | unsigned char *end; | ||||
| int filter_value; | int filter_value; | ||||
| @@ -254,7 +254,8 @@ void ff_vp3_v_loop_filter_c(uint8_t *first_pixel, int stride, int *bounding_valu | |||||
| } | } | ||||
| } | } | ||||
| void ff_vp3_h_loop_filter_c(uint8_t *first_pixel, int stride, int *bounding_values) | |||||
| static void vp3_h_loop_filter_c(uint8_t *first_pixel, int stride, | |||||
| int *bounding_values) | |||||
| { | { | ||||
| unsigned char *end; | unsigned char *end; | ||||
| int filter_value; | int filter_value; | ||||
| @@ -268,3 +269,21 @@ void ff_vp3_h_loop_filter_c(uint8_t *first_pixel, int stride, int *bounding_valu | |||||
| first_pixel[ 0] = av_clip_uint8(first_pixel[ 0] - filter_value); | first_pixel[ 0] = av_clip_uint8(first_pixel[ 0] - filter_value); | ||||
| } | } | ||||
| } | } | ||||
| av_cold void ff_vp3dsp_init(VP3DSPContext *c, int flags) | |||||
| { | |||||
| c->idct_put = vp3_idct_put_c; | |||||
| c->idct_add = vp3_idct_add_c; | |||||
| c->idct_dc_add = vp3_idct_dc_add_c; | |||||
| c->v_loop_filter = vp3_v_loop_filter_c; | |||||
| c->h_loop_filter = vp3_h_loop_filter_c; | |||||
| c->idct_perm = FF_NO_IDCT_PERM; | |||||
| if (ARCH_ARM) | |||||
| ff_vp3dsp_init_arm(c, flags); | |||||
| if (ARCH_PPC) | |||||
| ff_vp3dsp_init_ppc(c, flags); | |||||
| if (ARCH_X86) | |||||
| ff_vp3dsp_init_x86(c, flags); | |||||
| } | |||||
| @@ -0,0 +1,40 @@ | |||||
| /* | |||||
| * This file is part of Libav. | |||||
| * | |||||
| * Libav is free software; you can redistribute it and/or | |||||
| * modify it under the terms of the GNU Lesser General Public | |||||
| * License as published by the Free Software Foundation; either | |||||
| * version 2.1 of the License, or (at your option) any later version. | |||||
| * | |||||
| * Libav is distributed in the hope that it will be useful, | |||||
| * but WITHOUT ANY WARRANTY; without even the implied warranty of | |||||
| * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU | |||||
| * Lesser General Public License for more details. | |||||
| * | |||||
| * You should have received a copy of the GNU Lesser General Public | |||||
| * License along with Libav; if not, write to the Free Software | |||||
| * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA | |||||
| */ | |||||
| #ifndef AVCODEC_VP3DSP_H | |||||
| #define AVCODEC_VP3DSP_H | |||||
| #include <stdint.h> | |||||
| #include "dsputil.h" | |||||
| typedef struct VP3DSPContext { | |||||
| void (*idct_put)(uint8_t *dest, int line_size, DCTELEM *block); | |||||
| void (*idct_add)(uint8_t *dest, int line_size, DCTELEM *block); | |||||
| void (*idct_dc_add)(uint8_t *dest, int line_size, const DCTELEM *block); | |||||
| void (*v_loop_filter)(uint8_t *src, int stride, int *bounding_values); | |||||
| void (*h_loop_filter)(uint8_t *src, int stride, int *bounding_values); | |||||
| int idct_perm; | |||||
| } VP3DSPContext; | |||||
| void ff_vp3dsp_init(VP3DSPContext *c, int flags); | |||||
| void ff_vp3dsp_init_arm(VP3DSPContext *c, int flags); | |||||
| void ff_vp3dsp_init_ppc(VP3DSPContext *c, int flags); | |||||
| void ff_vp3dsp_init_x86(VP3DSPContext *c, int flags); | |||||
| #endif /* AVCODEC_VP3DSP_H */ | |||||
| @@ -411,7 +411,7 @@ static void vp56_decode_mb(VP56Context *s, int row, int col, int is_alpha) | |||||
| case VP56_MB_INTRA: | case VP56_MB_INTRA: | ||||
| for (b=0; b<b_max; b++) { | for (b=0; b<b_max; b++) { | ||||
| plane = ff_vp56_b2p[b+ab]; | plane = ff_vp56_b2p[b+ab]; | ||||
| s->dsp.idct_put(frame_current->data[plane] + s->block_offset[b], | |||||
| s->vp3dsp.idct_put(frame_current->data[plane] + s->block_offset[b], | |||||
| s->stride[plane], s->block_coeff[b]); | s->stride[plane], s->block_coeff[b]); | ||||
| } | } | ||||
| break; | break; | ||||
| @@ -424,7 +424,7 @@ static void vp56_decode_mb(VP56Context *s, int row, int col, int is_alpha) | |||||
| s->dsp.put_pixels_tab[1][0](frame_current->data[plane] + off, | s->dsp.put_pixels_tab[1][0](frame_current->data[plane] + off, | ||||
| frame_ref->data[plane] + off, | frame_ref->data[plane] + off, | ||||
| s->stride[plane], 8); | s->stride[plane], 8); | ||||
| s->dsp.idct_add(frame_current->data[plane] + off, | |||||
| s->vp3dsp.idct_add(frame_current->data[plane] + off, | |||||
| s->stride[plane], s->block_coeff[b]); | s->stride[plane], s->block_coeff[b]); | ||||
| } | } | ||||
| break; | break; | ||||
| @@ -442,7 +442,7 @@ static void vp56_decode_mb(VP56Context *s, int row, int col, int is_alpha) | |||||
| plane = ff_vp56_b2p[b+ab]; | plane = ff_vp56_b2p[b+ab]; | ||||
| vp56_mc(s, b, plane, frame_ref->data[plane], s->stride[plane], | vp56_mc(s, b, plane, frame_ref->data[plane], s->stride[plane], | ||||
| 16*col+x_off, 16*row+y_off); | 16*col+x_off, 16*row+y_off); | ||||
| s->dsp.idct_add(frame_current->data[plane] + s->block_offset[b], | |||||
| s->vp3dsp.idct_add(frame_current->data[plane] + s->block_offset[b], | |||||
| s->stride[plane], s->block_coeff[b]); | s->stride[plane], s->block_coeff[b]); | ||||
| } | } | ||||
| break; | break; | ||||
| @@ -666,10 +666,10 @@ av_cold void ff_vp56_init(AVCodecContext *avctx, int flip, int has_alpha) | |||||
| s->avctx = avctx; | s->avctx = avctx; | ||||
| avctx->pix_fmt = has_alpha ? PIX_FMT_YUVA420P : PIX_FMT_YUV420P; | avctx->pix_fmt = has_alpha ? PIX_FMT_YUVA420P : PIX_FMT_YUV420P; | ||||
| if (avctx->idct_algo == FF_IDCT_AUTO) | |||||
| avctx->idct_algo = FF_IDCT_VP3; | |||||
| ff_dsputil_init(&s->dsp, avctx); | ff_dsputil_init(&s->dsp, avctx); | ||||
| ff_vp3dsp_init(&s->vp3dsp, avctx->flags); | |||||
| ff_vp56dsp_init(&s->vp56dsp, avctx->codec->id); | ff_vp56dsp_init(&s->vp56dsp, avctx->codec->id); | ||||
| ff_init_scantable_permutation(s->dsp.idct_permutation, s->vp3dsp.idct_perm); | |||||
| ff_init_scantable(s->dsp.idct_permutation, &s->scantable,ff_zigzag_direct); | ff_init_scantable(s->dsp.idct_permutation, &s->scantable,ff_zigzag_direct); | ||||
| for (i=0; i<4; i++) | for (i=0; i<4; i++) | ||||
| @@ -30,6 +30,7 @@ | |||||
| #include "dsputil.h" | #include "dsputil.h" | ||||
| #include "get_bits.h" | #include "get_bits.h" | ||||
| #include "bytestream.h" | #include "bytestream.h" | ||||
| #include "vp3dsp.h" | |||||
| #include "vp56dsp.h" | #include "vp56dsp.h" | ||||
| typedef struct vp56_context VP56Context; | typedef struct vp56_context VP56Context; | ||||
| @@ -91,6 +92,7 @@ typedef struct { | |||||
| struct vp56_context { | struct vp56_context { | ||||
| AVCodecContext *avctx; | AVCodecContext *avctx; | ||||
| DSPContext dsp; | DSPContext dsp; | ||||
| VP3DSPContext vp3dsp; | |||||
| VP56DSPContext vp56dsp; | VP56DSPContext vp56dsp; | ||||
| ScanTable scantable; | ScanTable scantable; | ||||
| AVFrame frames[4]; | AVFrame frames[4]; | ||||
| @@ -1,5 +1,6 @@ | |||||
| OBJS-$(CONFIG_MLP_DECODER) += x86/mlpdsp.o | OBJS-$(CONFIG_MLP_DECODER) += x86/mlpdsp.o | ||||
| OBJS-$(CONFIG_TRUEHD_DECODER) += x86/mlpdsp.o | OBJS-$(CONFIG_TRUEHD_DECODER) += x86/mlpdsp.o | ||||
| OBJS-$(CONFIG_VP3DSP) += x86/vp3dsp_init.o | |||||
| OBJS-$(CONFIG_XMM_CLOBBER_TEST) += x86/w64xmmtest.o | OBJS-$(CONFIG_XMM_CLOBBER_TEST) += x86/w64xmmtest.o | ||||
| MMX-OBJS += x86/dsputil_mmx.o \ | MMX-OBJS += x86/dsputil_mmx.o \ | ||||
| @@ -2476,20 +2476,6 @@ static void vector_clipf_sse(float *dst, const float *src, | |||||
| ); | ); | ||||
| } | } | ||||
| void ff_vp3_idct_mmx(int16_t *input_data); | |||||
| void ff_vp3_idct_put_mmx(uint8_t *dest, int line_size, DCTELEM *block); | |||||
| void ff_vp3_idct_add_mmx(uint8_t *dest, int line_size, DCTELEM *block); | |||||
| void ff_vp3_idct_dc_add_mmx2(uint8_t *dest, int line_size, | |||||
| const DCTELEM *block); | |||||
| void ff_vp3_v_loop_filter_mmx2(uint8_t *src, int stride, int *bounding_values); | |||||
| void ff_vp3_h_loop_filter_mmx2(uint8_t *src, int stride, int *bounding_values); | |||||
| void ff_vp3_idct_sse2(int16_t *input_data); | |||||
| void ff_vp3_idct_put_sse2(uint8_t *dest, int line_size, DCTELEM *block); | |||||
| void ff_vp3_idct_add_sse2(uint8_t *dest, int line_size, DCTELEM *block); | |||||
| int32_t ff_scalarproduct_int16_mmx2(const int16_t *v1, const int16_t *v2, | int32_t ff_scalarproduct_int16_mmx2(const int16_t *v1, const int16_t *v2, | ||||
| int order); | int order); | ||||
| int32_t ff_scalarproduct_int16_sse2(const int16_t *v1, const int16_t *v2, | int32_t ff_scalarproduct_int16_sse2(const int16_t *v1, const int16_t *v2, | ||||
| @@ -2681,14 +2667,7 @@ static void dsputil_init_mmx2(DSPContext *c, AVCodecContext *avctx, | |||||
| c->avg_pixels_tab[0][3] = avg_pixels16_xy2_mmx2; | c->avg_pixels_tab[0][3] = avg_pixels16_xy2_mmx2; | ||||
| c->avg_pixels_tab[1][3] = avg_pixels8_xy2_mmx2; | c->avg_pixels_tab[1][3] = avg_pixels8_xy2_mmx2; | ||||
| } | } | ||||
| if (CONFIG_VP3_DECODER && HAVE_YASM) { | |||||
| c->vp3_v_loop_filter = ff_vp3_v_loop_filter_mmx2; | |||||
| c->vp3_h_loop_filter = ff_vp3_h_loop_filter_mmx2; | |||||
| } | |||||
| } | } | ||||
| if (CONFIG_VP3_DECODER && HAVE_YASM) | |||||
| c->vp3_idct_dc_add = ff_vp3_idct_dc_add_mmx2; | |||||
| if (CONFIG_VP3_DECODER && (avctx->codec_id == CODEC_ID_VP3 || | if (CONFIG_VP3_DECODER && (avctx->codec_id == CODEC_ID_VP3 || | ||||
| avctx->codec_id == CODEC_ID_THEORA)) { | avctx->codec_id == CODEC_ID_THEORA)) { | ||||
| @@ -3064,20 +3043,6 @@ void ff_dsputil_init_mmx(DSPContext *c, AVCodecContext *avctx) | |||||
| } | } | ||||
| c->idct_permutation_type = FF_LIBMPEG2_IDCT_PERM; | c->idct_permutation_type = FF_LIBMPEG2_IDCT_PERM; | ||||
| #endif | #endif | ||||
| } else if ((CONFIG_VP3_DECODER || CONFIG_VP5_DECODER || | |||||
| CONFIG_VP6_DECODER) && | |||||
| idct_algo == FF_IDCT_VP3 && HAVE_YASM) { | |||||
| if (mm_flags & AV_CPU_FLAG_SSE2) { | |||||
| c->idct_put = ff_vp3_idct_put_sse2; | |||||
| c->idct_add = ff_vp3_idct_add_sse2; | |||||
| c->idct = ff_vp3_idct_sse2; | |||||
| c->idct_permutation_type = FF_TRANSPOSE_IDCT_PERM; | |||||
| } else { | |||||
| c->idct_put = ff_vp3_idct_put_mmx; | |||||
| c->idct_add = ff_vp3_idct_add_mmx; | |||||
| c->idct = ff_vp3_idct_mmx; | |||||
| c->idct_permutation_type = FF_PARTTRANS_IDCT_PERM; | |||||
| } | |||||
| } else if (idct_algo == FF_IDCT_CAVS) { | } else if (idct_algo == FF_IDCT_CAVS) { | ||||
| c->idct_permutation_type = FF_TRANSPOSE_IDCT_PERM; | c->idct_permutation_type = FF_TRANSPOSE_IDCT_PERM; | ||||
| } else if (idct_algo == FF_IDCT_XVIDMMX) { | } else if (idct_algo == FF_IDCT_XVIDMMX) { | ||||
| @@ -524,10 +524,6 @@ cglobal vp3_h_loop_filter_mmx2, 3, 4 | |||||
| %endmacro | %endmacro | ||||
| %macro vp3_idct_funcs 3 | %macro vp3_idct_funcs 3 | ||||
| cglobal vp3_idct_%1, 1, 1, %2 | |||||
| VP3_IDCT_%1 r0 | |||||
| RET | |||||
| cglobal vp3_idct_put_%1, 3, %3, %2 | cglobal vp3_idct_put_%1, 3, %3, %2 | ||||
| VP3_IDCT_%1 r2 | VP3_IDCT_%1 r2 | ||||
| %if ARCH_X86_64 | %if ARCH_X86_64 | ||||
| @@ -0,0 +1,65 @@ | |||||
| /* | |||||
| * This file is part of Libav. | |||||
| * | |||||
| * Libav is free software; you can redistribute it and/or | |||||
| * modify it under the terms of the GNU Lesser General Public | |||||
| * License as published by the Free Software Foundation; either | |||||
| * version 2.1 of the License, or (at your option) any later version. | |||||
| * | |||||
| * Libav is distributed in the hope that it will be useful, | |||||
| * but WITHOUT ANY WARRANTY; without even the implied warranty of | |||||
| * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU | |||||
| * Lesser General Public License for more details. | |||||
| * | |||||
| * You should have received a copy of the GNU Lesser General Public | |||||
| * License along with Libav; if not, write to the Free Software | |||||
| * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA | |||||
| */ | |||||
| #include <stdint.h> | |||||
| #include "libavutil/attributes.h" | |||||
| #include "libavutil/cpu.h" | |||||
| #include "libavcodec/avcodec.h" | |||||
| #include "libavcodec/vp3dsp.h" | |||||
| #include "config.h" | |||||
| void ff_vp3_idct_put_mmx(uint8_t *dest, int line_size, DCTELEM *block); | |||||
| void ff_vp3_idct_add_mmx(uint8_t *dest, int line_size, DCTELEM *block); | |||||
| void ff_vp3_idct_put_sse2(uint8_t *dest, int line_size, DCTELEM *block); | |||||
| void ff_vp3_idct_add_sse2(uint8_t *dest, int line_size, DCTELEM *block); | |||||
| void ff_vp3_idct_dc_add_mmx2(uint8_t *dest, int line_size, | |||||
| const DCTELEM *block); | |||||
| void ff_vp3_v_loop_filter_mmx2(uint8_t *src, int stride, int *bounding_values); | |||||
| void ff_vp3_h_loop_filter_mmx2(uint8_t *src, int stride, int *bounding_values); | |||||
| av_cold void ff_vp3dsp_init_x86(VP3DSPContext *c, int flags) | |||||
| { | |||||
| #if HAVE_YASM | |||||
| int cpuflags = av_get_cpu_flags(); | |||||
| if (HAVE_MMX && cpuflags & AV_CPU_FLAG_MMX) { | |||||
| c->idct_put = ff_vp3_idct_put_mmx; | |||||
| c->idct_add = ff_vp3_idct_add_mmx; | |||||
| c->idct_perm = FF_PARTTRANS_IDCT_PERM; | |||||
| } | |||||
| if (HAVE_MMX2 && cpuflags & AV_CPU_FLAG_MMX2) { | |||||
| c->idct_dc_add = ff_vp3_idct_dc_add_mmx2; | |||||
| if (!(flags & CODEC_FLAG_BITEXACT)) { | |||||
| c->v_loop_filter = ff_vp3_v_loop_filter_mmx2; | |||||
| c->h_loop_filter = ff_vp3_h_loop_filter_mmx2; | |||||
| } | |||||
| } | |||||
| if (cpuflags & AV_CPU_FLAG_SSE2) { | |||||
| c->idct_put = ff_vp3_idct_put_sse2; | |||||
| c->idct_add = ff_vp3_idct_add_sse2; | |||||
| c->idct_perm = FF_TRANSPOSE_IDCT_PERM; | |||||
| } | |||||
| #endif | |||||
| } | |||||