* commit 'e74433a8e6fc00c8dbde293c97a3e45384c2c1d9': dsputil: Split clear_block*/fill_block* off into a separate context Conflicts: configure libavcodec/asvdec.c libavcodec/dnxhddec.c libavcodec/dnxhdenc.c libavcodec/dsputil.h libavcodec/eamad.c libavcodec/intrax8.c libavcodec/mjpegdec.c libavcodec/ppc/dsputil_ppc.c libavcodec/vc1dec.c libavcodec/x86/dsputil_init.c libavcodec/x86/dsputil_mmx.c Merged-by: Michael Niedermayer <michaelni@gmx.at>tags/n2.3
| @@ -1796,6 +1796,7 @@ CONFIG_EXTRA=" | |||
| aandcttables | |||
| ac3dsp | |||
| audio_frame_queue | |||
| blockdsp | |||
| cabac | |||
| dsputil | |||
| exif | |||
| @@ -1995,7 +1996,7 @@ mdct_select="fft" | |||
| rdft_select="fft" | |||
| mpegaudio_select="mpegaudiodsp" | |||
| mpegaudiodsp_select="dct" | |||
| mpegvideo_select="dsputil h264chroma hpeldsp videodsp" | |||
| mpegvideo_select="blockdsp dsputil h264chroma hpeldsp videodsp" | |||
| mpegvideoenc_select="dsputil mpegvideo qpeldsp" | |||
| # decoders / encoders | |||
| @@ -2014,18 +2015,18 @@ amrwb_decoder_select="lsp" | |||
| amv_decoder_select="sp5x_decoder exif" | |||
| amv_encoder_select="aandcttables" | |||
| ape_decoder_select="dsputil llauddsp" | |||
| asv1_decoder_select="dsputil" | |||
| asv1_decoder_select="blockdsp dsputil" | |||
| asv1_encoder_select="dsputil" | |||
| asv2_decoder_select="dsputil" | |||
| asv2_decoder_select="blockdsp dsputil" | |||
| asv2_encoder_select="dsputil" | |||
| atrac1_decoder_select="mdct sinewin" | |||
| atrac3_decoder_select="mdct" | |||
| atrac3p_decoder_select="mdct sinewin" | |||
| avrn_decoder_select="exif" | |||
| bink_decoder_select="dsputil hpeldsp" | |||
| bink_decoder_select="blockdsp hpeldsp" | |||
| binkaudio_dct_decoder_select="mdct rdft dct sinewin" | |||
| binkaudio_rdft_decoder_select="mdct rdft sinewin" | |||
| cavs_decoder_select="dsputil golomb h264chroma qpeldsp videodsp" | |||
| cavs_decoder_select="blockdsp dsputil golomb h264chroma qpeldsp videodsp" | |||
| cllc_decoder_select="dsputil" | |||
| comfortnoise_encoder_select="lpc" | |||
| cook_decoder_select="dsputil mdct sinewin" | |||
| @@ -2033,16 +2034,16 @@ cscd_decoder_select="lzo" | |||
| cscd_decoder_suggest="zlib" | |||
| dca_decoder_select="mdct" | |||
| dirac_decoder_select="dsputil dwt golomb videodsp" | |||
| dnxhd_decoder_select="dsputil" | |||
| dnxhd_encoder_select="aandcttables dsputil mpegvideoenc" | |||
| dnxhd_decoder_select="blockdsp dsputil" | |||
| dnxhd_encoder_select="aandcttables blockdsp dsputil mpegvideoenc" | |||
| dvvideo_decoder_select="dsputil" | |||
| dvvideo_encoder_select="dsputil" | |||
| dxa_decoder_select="zlib" | |||
| eac3_decoder_select="ac3_decoder" | |||
| eac3_encoder_select="ac3_encoder" | |||
| eamad_decoder_select="aandcttables dsputil mpegvideo" | |||
| eamad_decoder_select="aandcttables blockdsp dsputil mpegvideo" | |||
| eatgq_decoder_select="aandcttables" | |||
| eatqi_decoder_select="aandcttables dsputil error_resilience mpegvideo" | |||
| eatqi_decoder_select="aandcttables blockdsp dsputil error_resilience mpegvideo" | |||
| exr_decoder_select="zlib" | |||
| ffv1_decoder_select="golomb rangecoder" | |||
| ffv1_encoder_select="rangecoder" | |||
| @@ -2057,9 +2058,9 @@ flashsv2_encoder_select="zlib" | |||
| flashsv2_decoder_select="zlib" | |||
| flv_decoder_select="h263_decoder" | |||
| flv_encoder_select="h263_encoder" | |||
| fourxm_decoder_select="dsputil" | |||
| fourxm_decoder_select="blockdsp dsputil" | |||
| fraps_decoder_select="dsputil huffman" | |||
| g2m_decoder_select="dsputil zlib" | |||
| g2m_decoder_select="blockdsp dsputil zlib" | |||
| g729_decoder_select="dsputil" | |||
| h261_decoder_select="error_resilience mpegvideo" | |||
| h261_encoder_select="aandcttables mpegvideoenc" | |||
| @@ -2078,14 +2079,14 @@ indeo3_decoder_select="hpeldsp" | |||
| interplay_video_decoder_select="hpeldsp" | |||
| jpegls_decoder_select="golomb mjpeg_decoder" | |||
| jpegls_encoder_select="golomb" | |||
| jv_decoder_select="dsputil" | |||
| jv_decoder_select="blockdsp" | |||
| lagarith_decoder_select="huffyuvdsp" | |||
| ljpeg_encoder_select="aandcttables mpegvideoenc" | |||
| loco_decoder_select="golomb" | |||
| mdec_decoder_select="dsputil error_resilience mpegvideo" | |||
| mdec_decoder_select="blockdsp dsputil error_resilience mpegvideo" | |||
| metasound_decoder_select="lsp mdct sinewin" | |||
| mimic_decoder_select="dsputil hpeldsp" | |||
| mjpeg_decoder_select="dsputil hpeldsp exif" | |||
| mimic_decoder_select="blockdsp dsputil hpeldsp" | |||
| mjpeg_decoder_select="blockdsp dsputil hpeldsp exif" | |||
| mjpeg_encoder_select="aandcttables mpegvideoenc" | |||
| mjpegb_decoder_select="mjpeg_decoder" | |||
| mlp_decoder_select="mlp_parser" | |||
| @@ -2124,7 +2125,7 @@ on2avc_decoder_select="mdct" | |||
| opus_decoder_deps="swresample" | |||
| png_decoder_select="zlib" | |||
| png_encoder_select="huffyuvencdsp zlib" | |||
| prores_decoder_select="dsputil" | |||
| prores_decoder_select="blockdsp dsputil" | |||
| prores_encoder_select="dsputil" | |||
| qcelp_decoder_select="lsp" | |||
| qdm2_decoder_select="mdct rdft mpegaudiodsp" | |||
| @@ -2163,7 +2164,7 @@ twinvq_decoder_select="mdct lsp sinewin" | |||
| utvideo_decoder_select="dsputil" | |||
| utvideo_encoder_select="dsputil huffman huffyuvencdsp" | |||
| vble_decoder_select="huffyuvdsp" | |||
| vc1_decoder_select="error_resilience h263_decoder h264chroma h264qpel intrax8 qpeldsp" | |||
| vc1_decoder_select="blockdsp error_resilience h263_decoder h264chroma h264qpel intrax8 qpeldsp" | |||
| vc1image_decoder_select="vc1_decoder" | |||
| vorbis_decoder_select="mdct" | |||
| vorbis_encoder_select="mdct" | |||
| @@ -2185,7 +2186,7 @@ wmav2_encoder_select="mdct sinewin" | |||
| wmavoice_decoder_select="lsp rdft dct mdct sinewin" | |||
| wmv1_decoder_select="h263_decoder" | |||
| wmv1_encoder_select="h263_encoder" | |||
| wmv2_decoder_select="h263_decoder intrax8 videodsp" | |||
| wmv2_decoder_select="blockdsp h263_decoder intrax8 videodsp" | |||
| wmv2_encoder_select="h263_encoder" | |||
| wmv3_decoder_select="vc1_decoder" | |||
| wmv3image_decoder_select="wmv3_decoder" | |||
| @@ -31,6 +31,7 @@ | |||
| #include "libavutil/imgutils.h" | |||
| #include "libavutil/intreadwrite.h" | |||
| #include "avcodec.h" | |||
| #include "blockdsp.h" | |||
| #include "bytestream.h" | |||
| #include "dsputil.h" | |||
| #include "get_bits.h" | |||
| @@ -134,6 +135,7 @@ typedef struct CFrameBuffer { | |||
| typedef struct FourXContext { | |||
| AVCodecContext *avctx; | |||
| DSPContext dsp; | |||
| BlockDSPContext bdsp; | |||
| uint16_t *frame_buffer; | |||
| uint16_t *last_frame_buffer; | |||
| GetBitContext pre_gb; ///< ac/dc prefix | |||
| @@ -592,7 +594,7 @@ static int decode_i_mb(FourXContext *f) | |||
| int ret; | |||
| int i; | |||
| f->dsp.clear_blocks(f->block[0]); | |||
| f->bdsp.clear_blocks(f->block[0]); | |||
| for (i = 0; i < 6; i++) | |||
| if ((ret = decode_i_block(f, f->block[i])) < 0) | |||
| @@ -998,6 +1000,7 @@ static av_cold int decode_init(AVCodecContext *avctx) | |||
| } | |||
| f->version = AV_RL32(avctx->extradata) >> 16; | |||
| ff_blockdsp_init(&f->bdsp, avctx); | |||
| ff_dsputil_init(&f->dsp, avctx); | |||
| f->avctx = avctx; | |||
| init_vlcs(f); | |||
| @@ -33,6 +33,7 @@ OBJS = allcodecs.o \ | |||
| OBJS-$(CONFIG_AANDCTTABLES) += aandcttab.o | |||
| OBJS-$(CONFIG_AC3DSP) += ac3dsp.o | |||
| OBJS-$(CONFIG_AUDIO_FRAME_QUEUE) += audio_frame_queue.o | |||
| OBJS-$(CONFIG_BLOCKDSP) += blockdsp.o | |||
| OBJS-$(CONFIG_CABAC) += cabac.o | |||
| OBJS-$(CONFIG_CRYSTALHD) += crystalhd.o | |||
| OBJS-$(CONFIG_DCT) += dct.o dct32_fixed.o dct32_float.o | |||
| @@ -4,6 +4,7 @@ OBJS += arm/fmtconvert_init_arm.o | |||
| OBJS-$(CONFIG_AC3DSP) += arm/ac3dsp_init_arm.o \ | |||
| arm/ac3dsp_arm.o | |||
| OBJS-$(CONFIG_BLOCKDSP) += arm/blockdsp_init_arm.o | |||
| OBJS-$(CONFIG_DSPUTIL) += arm/dsputil_init_arm.o \ | |||
| arm/dsputil_arm.o \ | |||
| arm/jrevdct_arm.o \ | |||
| @@ -79,6 +80,8 @@ VFP-OBJS-$(CONFIG_DCA_DECODER) += arm/dcadsp_vfp.o \ | |||
| NEON-OBJS += arm/fmtconvert_neon.o | |||
| NEON-OBJS-$(CONFIG_AC3DSP) += arm/ac3dsp_neon.o | |||
| NEON-OBJS-$(CONFIG_BLOCKDSP) += arm/blockdsp_init_neon.o \ | |||
| arm/blockdsp_neon.o | |||
| NEON-OBJS-$(CONFIG_DSPUTIL) += arm/dsputil_init_neon.o \ | |||
| arm/dsputil_neon.o \ | |||
| arm/int_neon.o \ | |||
| @@ -0,0 +1,26 @@ | |||
| /* | |||
| * This file is part of FFmpeg. | |||
| * | |||
| * FFmpeg is free software; you can redistribute it and/or | |||
| * modify it under the terms of the GNU Lesser General Public | |||
| * License as published by the Free Software Foundation; either | |||
| * version 2.1 of the License, or (at your option) any later version. | |||
| * | |||
| * FFmpeg is distributed in the hope that it will be useful, | |||
| * but WITHOUT ANY WARRANTY; without even the implied warranty of | |||
| * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU | |||
| * Lesser General Public License for more details. | |||
| * | |||
| * You should have received a copy of the GNU Lesser General Public | |||
| * License along with FFmpeg; if not, write to the Free Software | |||
| * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA | |||
| */ | |||
| #ifndef AVCODEC_ARM_BLOCKDSP_ARM_H | |||
| #define AVCODEC_ARM_BLOCKDSP_ARM_H | |||
| #include "libavcodec/blockdsp.h" | |||
| void ff_blockdsp_init_neon(BlockDSPContext *c, unsigned high_bit_depth); | |||
| #endif /* AVCODEC_ARM_BLOCKDSP_ARM_H */ | |||
| @@ -0,0 +1,33 @@ | |||
| /* | |||
| * ARM optimized block operations | |||
| * | |||
| * This file is part of FFmpeg. | |||
| * | |||
| * FFmpeg is free software; you can redistribute it and/or | |||
| * modify it under the terms of the GNU Lesser General Public | |||
| * License as published by the Free Software Foundation; either | |||
| * version 2.1 of the License, or (at your option) any later version. | |||
| * | |||
| * FFmpeg is distributed in the hope that it will be useful, | |||
| * but WITHOUT ANY WARRANTY; without even the implied warranty of | |||
| * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU | |||
| * Lesser General Public License for more details. | |||
| * | |||
| * You should have received a copy of the GNU Lesser General Public | |||
| * License along with FFmpeg; if not, write to the Free Software | |||
| * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA | |||
| */ | |||
| #include "libavutil/attributes.h" | |||
| #include "libavutil/cpu.h" | |||
| #include "libavutil/arm/cpu.h" | |||
| #include "libavcodec/blockdsp.h" | |||
| #include "blockdsp_arm.h" | |||
| av_cold void ff_blockdsp_init_arm(BlockDSPContext *c, unsigned high_bit_depth) | |||
| { | |||
| int cpu_flags = av_get_cpu_flags(); | |||
| if (have_neon(cpu_flags)) | |||
| ff_blockdsp_init_neon(c, high_bit_depth); | |||
| } | |||
| @@ -0,0 +1,37 @@ | |||
| /* | |||
| * ARM NEON optimised block operations | |||
| * Copyright (c) 2008 Mans Rullgard <mans@mansr.com> | |||
| * | |||
| * This file is part of FFmpeg. | |||
| * | |||
| * FFmpeg is free software; you can redistribute it and/or | |||
| * modify it under the terms of the GNU Lesser General Public | |||
| * License as published by the Free Software Foundation; either | |||
| * version 2.1 of the License, or (at your option) any later version. | |||
| * | |||
| * FFmpeg is distributed in the hope that it will be useful, | |||
| * but WITHOUT ANY WARRANTY; without even the implied warranty of | |||
| * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU | |||
| * Lesser General Public License for more details. | |||
| * | |||
| * You should have received a copy of the GNU Lesser General Public | |||
| * License along with FFmpeg; if not, write to the Free Software | |||
| * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA | |||
| */ | |||
| #include <stdint.h> | |||
| #include "libavutil/attributes.h" | |||
| #include "libavcodec/blockdsp.h" | |||
| #include "blockdsp_arm.h" | |||
| void ff_clear_block_neon(int16_t *block); | |||
| void ff_clear_blocks_neon(int16_t *blocks); | |||
| av_cold void ff_blockdsp_init_neon(BlockDSPContext *c, unsigned high_bit_depth) | |||
| { | |||
| if (!high_bit_depth) { | |||
| c->clear_block = ff_clear_block_neon; | |||
| c->clear_blocks = ff_clear_blocks_neon; | |||
| } | |||
| } | |||
| @@ -0,0 +1,38 @@ | |||
| /* | |||
| * ARM NEON optimised block functions | |||
| * Copyright (c) 2008 Mans Rullgard <mans@mansr.com> | |||
| * | |||
| * This file is part of FFmpeg. | |||
| * | |||
| * FFmpeg is free software; you can redistribute it and/or | |||
| * modify it under the terms of the GNU Lesser General Public | |||
| * License as published by the Free Software Foundation; either | |||
| * version 2.1 of the License, or (at your option) any later version. | |||
| * | |||
| * FFmpeg is distributed in the hope that it will be useful, | |||
| * but WITHOUT ANY WARRANTY; without even the implied warranty of | |||
| * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU | |||
| * Lesser General Public License for more details. | |||
| * | |||
| * You should have received a copy of the GNU Lesser General Public | |||
| * License along with FFmpeg; if not, write to the Free Software | |||
| * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA | |||
| */ | |||
| #include "libavutil/arm/asm.S" | |||
| function ff_clear_block_neon, export=1 | |||
| vmov.i16 q0, #0 | |||
| .rept 8 | |||
| vst1.16 {q0}, [r0,:128]! | |||
| .endr | |||
| bx lr | |||
| endfunc | |||
| function ff_clear_blocks_neon, export=1 | |||
| vmov.i16 q0, #0 | |||
| .rept 8*6 | |||
| vst1.16 {q0}, [r0,:128]! | |||
| .endr | |||
| bx lr | |||
| endfunc | |||
| @@ -30,9 +30,6 @@ void ff_simple_idct_neon(int16_t *data); | |||
| void ff_simple_idct_put_neon(uint8_t *dest, int line_size, int16_t *data); | |||
| void ff_simple_idct_add_neon(uint8_t *dest, int line_size, int16_t *data); | |||
| void ff_clear_block_neon(int16_t *block); | |||
| void ff_clear_blocks_neon(int16_t *blocks); | |||
| void ff_add_pixels_clamped_neon(const int16_t *, uint8_t *, int); | |||
| void ff_put_pixels_clamped_neon(const int16_t *, uint8_t *, int); | |||
| void ff_put_signed_pixels_clamped_neon(const int16_t *, uint8_t *, int); | |||
| @@ -61,11 +58,6 @@ av_cold void ff_dsputil_init_neon(DSPContext *c, AVCodecContext *avctx, | |||
| c->put_pixels_clamped = ff_put_pixels_clamped_neon; | |||
| c->put_signed_pixels_clamped = ff_put_signed_pixels_clamped_neon; | |||
| if (!high_bit_depth) { | |||
| c->clear_block = ff_clear_block_neon; | |||
| c->clear_blocks = ff_clear_blocks_neon; | |||
| } | |||
| c->vector_clipf = ff_vector_clipf_neon; | |||
| c->vector_clip_int32 = ff_vector_clip_int32_neon; | |||
| @@ -21,22 +21,6 @@ | |||
| #include "libavutil/arm/asm.S" | |||
| function ff_clear_block_neon, export=1 | |||
| vmov.i16 q0, #0 | |||
| .rept 8 | |||
| vst1.16 {q0}, [r0,:128]! | |||
| .endr | |||
| bx lr | |||
| endfunc | |||
| function ff_clear_blocks_neon, export=1 | |||
| vmov.i16 q0, #0 | |||
| .rept 8*6 | |||
| vst1.16 {q0}, [r0,:128]! | |||
| .endr | |||
| bx lr | |||
| endfunc | |||
| function ff_put_pixels_clamped_neon, export=1 | |||
| vld1.16 {d16-d19}, [r0,:128]! | |||
| vqmovun.s16 d0, q8 | |||
| @@ -31,12 +31,14 @@ | |||
| #include "libavutil/mem.h" | |||
| #include "avcodec.h" | |||
| #include "blockdsp.h" | |||
| #include "dsputil.h" | |||
| #include "get_bits.h" | |||
| #include "put_bits.h" | |||
| typedef struct ASV1Context{ | |||
| AVCodecContext *avctx; | |||
| BlockDSPContext bdsp; | |||
| DSPContext dsp; | |||
| PutBitContext pb; | |||
| GetBitContext gb; | |||
| @@ -28,6 +28,7 @@ | |||
| #include "asv.h" | |||
| #include "avcodec.h" | |||
| #include "blockdsp.h" | |||
| #include "internal.h" | |||
| #include "mathops.h" | |||
| #include "mpeg12data.h" | |||
| @@ -163,7 +164,7 @@ static inline int decode_mb(ASV1Context *a, int16_t block[6][64]) | |||
| { | |||
| int i; | |||
| a->dsp.clear_blocks(block[0]); | |||
| a->bdsp.clear_blocks(block[0]); | |||
| if (a->avctx->codec_id == AV_CODEC_ID_ASV1) { | |||
| for (i = 0; i < 6; i++) { | |||
| @@ -276,6 +277,7 @@ static av_cold int decode_init(AVCodecContext *avctx) | |||
| } | |||
| ff_asv_common_init(avctx); | |||
| ff_blockdsp_init(&a->bdsp, avctx); | |||
| init_vlcs(a); | |||
| ff_init_scantable(a->dsp.idct_permutation, &a->scantable, ff_asv_scantab); | |||
| avctx->pix_fmt = AV_PIX_FMT_YUV420P; | |||
| @@ -24,9 +24,9 @@ | |||
| #include "libavutil/imgutils.h" | |||
| #include "libavutil/internal.h" | |||
| #include "avcodec.h" | |||
| #include "dsputil.h" | |||
| #include "binkdata.h" | |||
| #include "binkdsp.h" | |||
| #include "blockdsp.h" | |||
| #include "hpeldsp.h" | |||
| #include "internal.h" | |||
| #include "mathops.h" | |||
| @@ -113,7 +113,7 @@ typedef struct Bundle { | |||
| */ | |||
| typedef struct BinkContext { | |||
| AVCodecContext *avctx; | |||
| DSPContext dsp; | |||
| BlockDSPContext bdsp; | |||
| HpelDSPContext hdsp; | |||
| BinkDSPContext binkdsp; | |||
| AVFrame *last; | |||
| @@ -886,7 +886,7 @@ static int binkb_decode_plane(BinkContext *c, AVFrame *frame, GetBitContext *gb, | |||
| } else { | |||
| put_pixels8x8_overlapped(dst, ref, stride); | |||
| } | |||
| c->dsp.clear_block(block); | |||
| c->bdsp.clear_block(block); | |||
| v = binkb_get_value(c, BINKB_SRC_INTER_COEFS); | |||
| read_residue(gb, block, v); | |||
| c->binkdsp.add_pixels8(dst, block, stride); | |||
| @@ -910,7 +910,7 @@ static int binkb_decode_plane(BinkContext *c, AVFrame *frame, GetBitContext *gb, | |||
| break; | |||
| case 5: | |||
| v = binkb_get_value(c, BINKB_SRC_COLORS); | |||
| c->dsp.fill_block_tab[1](dst, v, stride, 8); | |||
| c->bdsp.fill_block_tab[1](dst, v, stride, 8); | |||
| break; | |||
| case 6: | |||
| for (i = 0; i < 2; i++) | |||
| @@ -1053,7 +1053,7 @@ static int bink_decode_plane(BinkContext *c, AVFrame *frame, GetBitContext *gb, | |||
| break; | |||
| case FILL_BLOCK: | |||
| v = get_value(c, BINK_SRC_COLORS); | |||
| c->dsp.fill_block_tab[0](dst, v, stride, 16); | |||
| c->bdsp.fill_block_tab[0](dst, v, stride, 16); | |||
| break; | |||
| case PATTERN_BLOCK: | |||
| for (i = 0; i < 2; i++) | |||
| @@ -1123,7 +1123,7 @@ static int bink_decode_plane(BinkContext *c, AVFrame *frame, GetBitContext *gb, | |||
| return AVERROR_INVALIDDATA; | |||
| } | |||
| c->hdsp.put_pixels_tab[1][0](dst, ref, stride, 8); | |||
| c->dsp.clear_block(block); | |||
| c->bdsp.clear_block(block); | |||
| v = get_bits(gb, 7); | |||
| read_residue(gb, block, v); | |||
| c->binkdsp.add_pixels8(dst, block, stride); | |||
| @@ -1136,7 +1136,7 @@ static int bink_decode_plane(BinkContext *c, AVFrame *frame, GetBitContext *gb, | |||
| break; | |||
| case FILL_BLOCK: | |||
| v = get_value(c, BINK_SRC_COLORS); | |||
| c->dsp.fill_block_tab[1](dst, v, stride, 8); | |||
| c->bdsp.fill_block_tab[1](dst, v, stride, 8); | |||
| break; | |||
| case INTER_BLOCK: | |||
| xoff = get_value(c, BINK_SRC_X_OFF); | |||
| @@ -1306,7 +1306,7 @@ static av_cold int decode_init(AVCodecContext *avctx) | |||
| avctx->pix_fmt = c->has_alpha ? AV_PIX_FMT_YUVA420P : AV_PIX_FMT_YUV420P; | |||
| ff_dsputil_init(&c->dsp, avctx); | |||
| ff_blockdsp_init(&c->bdsp, avctx); | |||
| ff_hpeldsp_init(&c->hdsp, avctx->flags); | |||
| ff_binkdsp_init(&c->binkdsp); | |||
| @@ -0,0 +1,78 @@ | |||
| /* | |||
| * This file is part of FFmpeg. | |||
| * | |||
| * FFmpeg is free software; you can redistribute it and/or | |||
| * modify it under the terms of the GNU Lesser General Public | |||
| * License as published by the Free Software Foundation; either | |||
| * version 2.1 of the License, or (at your option) any later version. | |||
| * | |||
| * FFmpeg is distributed in the hope that it will be useful, | |||
| * but WITHOUT ANY WARRANTY; without even the implied warranty of | |||
| * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU | |||
| * Lesser General Public License for more details. | |||
| * | |||
| * You should have received a copy of the GNU Lesser General Public | |||
| * License along with FFmpeg; if not, write to the Free Software | |||
| * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA | |||
| */ | |||
| #include <stdint.h> | |||
| #include <string.h> | |||
| #include "config.h" | |||
| #include "libavutil/attributes.h" | |||
| #include "avcodec.h" | |||
| #include "blockdsp.h" | |||
| #include "version.h" | |||
| static void clear_block_8_c(int16_t *block) | |||
| { | |||
| memset(block, 0, sizeof(int16_t) * 64); | |||
| } | |||
| static void clear_blocks_8_c(int16_t *blocks) | |||
| { | |||
| memset(blocks, 0, sizeof(int16_t) * 6 * 64); | |||
| } | |||
| static void fill_block16_c(uint8_t *block, uint8_t value, int line_size, int h) | |||
| { | |||
| int i; | |||
| for (i = 0; i < h; i++) { | |||
| memset(block, value, 16); | |||
| block += line_size; | |||
| } | |||
| } | |||
| static void fill_block8_c(uint8_t *block, uint8_t value, int line_size, int h) | |||
| { | |||
| int i; | |||
| for (i = 0; i < h; i++) { | |||
| memset(block, value, 8); | |||
| block += line_size; | |||
| } | |||
| } | |||
| av_cold void ff_blockdsp_init(BlockDSPContext *c, AVCodecContext *avctx) | |||
| { | |||
| const unsigned high_bit_depth = avctx->bits_per_raw_sample > 8; | |||
| c->clear_block = clear_block_8_c; | |||
| c->clear_blocks = clear_blocks_8_c; | |||
| c->fill_block_tab[0] = fill_block16_c; | |||
| c->fill_block_tab[1] = fill_block8_c; | |||
| if (ARCH_ARM) | |||
| ff_blockdsp_init_arm(c, high_bit_depth); | |||
| if (ARCH_PPC) | |||
| ff_blockdsp_init_ppc(c, high_bit_depth); | |||
| if (ARCH_X86) | |||
| #if FF_API_XVMC | |||
| ff_blockdsp_init_x86(c, high_bit_depth, avctx); | |||
| #else | |||
| ff_blockdsp_init_x86(c, high_bit_depth); | |||
| #endif /* FF_API_XVMC */ | |||
| } | |||
| @@ -0,0 +1,52 @@ | |||
| /* | |||
| * This file is part of FFmpeg. | |||
| * | |||
| * FFmpeg is free software; you can redistribute it and/or | |||
| * modify it under the terms of the GNU Lesser General Public | |||
| * License as published by the Free Software Foundation; either | |||
| * version 2.1 of the License, or (at your option) any later version. | |||
| * | |||
| * FFmpeg is distributed in the hope that it will be useful, | |||
| * but WITHOUT ANY WARRANTY; without even the implied warranty of | |||
| * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU | |||
| * Lesser General Public License for more details. | |||
| * | |||
| * You should have received a copy of the GNU Lesser General Public | |||
| * License along with FFmpeg; if not, write to the Free Software | |||
| * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA | |||
| */ | |||
| #ifndef AVCODEC_BLOCKDSP_H | |||
| #define AVCODEC_BLOCKDSP_H | |||
| #include <stdint.h> | |||
| #include "avcodec.h" | |||
| #include "version.h" | |||
| /* add and put pixel (decoding) | |||
| * Block sizes for op_pixels_func are 8x4,8x8 16x8 16x16. | |||
| * h for op_pixels_func is limited to { width / 2, width }, | |||
| * but never larger than 16 and never smaller than 4. */ | |||
| typedef void (*op_fill_func)(uint8_t *block /* align width (8 or 16) */, | |||
| uint8_t value, int line_size, int h); | |||
| typedef struct BlockDSPContext { | |||
| void (*clear_block)(int16_t *block /* align 16 */); | |||
| void (*clear_blocks)(int16_t *blocks /* align 16 */); | |||
| op_fill_func fill_block_tab[2]; | |||
| } BlockDSPContext; | |||
| void ff_blockdsp_init(BlockDSPContext *c, AVCodecContext *avctx); | |||
| void ff_blockdsp_init_arm(BlockDSPContext *c, unsigned high_bit_depth); | |||
| void ff_blockdsp_init_ppc(BlockDSPContext *c, unsigned high_bit_depth); | |||
| #if FF_API_XVMC | |||
| void ff_blockdsp_init_x86(BlockDSPContext *c, unsigned high_bit_depth, | |||
| AVCodecContext *avctx); | |||
| #else | |||
| void ff_blockdsp_init_x86(BlockDSPContext *c, unsigned high_bit_depth); | |||
| #endif /* FF_API_XVMC */ | |||
| #endif /* AVCODEC_BLOCKDSP_H */ | |||
| @@ -761,6 +761,7 @@ av_cold int ff_cavs_init(AVCodecContext *avctx) | |||
| { | |||
| AVSContext *h = avctx->priv_data; | |||
| ff_blockdsp_init(&h->bdsp, avctx); | |||
| ff_dsputil_init(&h->dsp, avctx); | |||
| ff_h264chroma_init(&h->h264chroma, 8); | |||
| ff_videodsp_init(&h->vdsp, 8); | |||
| @@ -23,6 +23,7 @@ | |||
| #define AVCODEC_CAVS_H | |||
| #include "cavsdsp.h" | |||
| #include "blockdsp.h" | |||
| #include "dsputil.h" | |||
| #include "h264chroma.h" | |||
| #include "get_bits.h" | |||
| @@ -162,6 +163,7 @@ typedef struct AVSFrame { | |||
| typedef struct AVSContext { | |||
| AVCodecContext *avctx; | |||
| DSPContext dsp; | |||
| BlockDSPContext bdsp; | |||
| H264ChromaContext h264chroma; | |||
| VideoDSPContext vdsp; | |||
| CAVSDSPContext cdsp; | |||
| @@ -589,7 +589,7 @@ static int decode_residual_block(AVSContext *h, GetBitContext *gb, | |||
| dequant_shift[qp], i)) < 0) | |||
| return ret; | |||
| h->cdsp.cavs_idct8_add(dst, block, stride); | |||
| h->dsp.clear_block(block); | |||
| h->bdsp.clear_block(block); | |||
| return 0; | |||
| } | |||
| @@ -25,6 +25,7 @@ | |||
| #include "libavutil/imgutils.h" | |||
| #include "libavutil/timer.h" | |||
| #include "avcodec.h" | |||
| #include "blockdsp.h" | |||
| #include "get_bits.h" | |||
| #include "dnxhddata.h" | |||
| #include "dsputil.h" | |||
| @@ -34,6 +35,7 @@ | |||
| typedef struct DNXHDContext { | |||
| AVCodecContext *avctx; | |||
| GetBitContext gb; | |||
| BlockDSPContext bdsp; | |||
| int64_t cid; ///< compression id | |||
| unsigned int width, height; | |||
| unsigned int mb_width, mb_height; | |||
| @@ -142,6 +144,7 @@ static int dnxhd_decode_header(DNXHDContext *ctx, AVFrame *frame, | |||
| ctx->avctx->pix_fmt = AV_PIX_FMT_YUV444P10; | |||
| ctx->avctx->bits_per_raw_sample = 10; | |||
| if (ctx->bit_depth != 10) { | |||
| ff_blockdsp_init(&ctx->bdsp, ctx->avctx); | |||
| ff_dsputil_init(&ctx->dsp, ctx->avctx); | |||
| ctx->bit_depth = 10; | |||
| ctx->decode_dct_block = dnxhd_decode_dct_block_10_444; | |||
| @@ -151,6 +154,7 @@ static int dnxhd_decode_header(DNXHDContext *ctx, AVFrame *frame, | |||
| ctx->avctx->pix_fmt = AV_PIX_FMT_YUV422P10; | |||
| ctx->avctx->bits_per_raw_sample = 10; | |||
| if (ctx->bit_depth != 10) { | |||
| ff_blockdsp_init(&ctx->bdsp, ctx->avctx); | |||
| ff_dsputil_init(&ctx->dsp, ctx->avctx); | |||
| ctx->bit_depth = 10; | |||
| ctx->decode_dct_block = dnxhd_decode_dct_block_10; | |||
| @@ -159,6 +163,7 @@ static int dnxhd_decode_header(DNXHDContext *ctx, AVFrame *frame, | |||
| ctx->avctx->pix_fmt = AV_PIX_FMT_YUV422P; | |||
| ctx->avctx->bits_per_raw_sample = 8; | |||
| if (ctx->bit_depth != 8) { | |||
| ff_blockdsp_init(&ctx->bdsp, ctx->avctx); | |||
| ff_dsputil_init(&ctx->dsp, ctx->avctx); | |||
| ctx->bit_depth = 8; | |||
| ctx->decode_dct_block = dnxhd_decode_dct_block_8; | |||
| @@ -338,12 +343,12 @@ static int dnxhd_decode_macroblock(DNXHDContext *ctx, AVFrame *frame, | |||
| } | |||
| for (i = 0; i < 8; i++) { | |||
| ctx->dsp.clear_block(ctx->blocks[i]); | |||
| ctx->bdsp.clear_block(ctx->blocks[i]); | |||
| ctx->decode_dct_block(ctx, ctx->blocks[i], i, qscale); | |||
| } | |||
| if (ctx->is_444) { | |||
| for (; i < 12; i++) { | |||
| ctx->dsp.clear_block(ctx->blocks[i]); | |||
| ctx->bdsp.clear_block(ctx->blocks[i]); | |||
| ctx->decode_dct_block(ctx, ctx->blocks[i], i, qscale); | |||
| } | |||
| } | |||
| @@ -29,6 +29,7 @@ | |||
| #include "libavutil/timer.h" | |||
| #include "avcodec.h" | |||
| #include "blockdsp.h" | |||
| #include "dsputil.h" | |||
| #include "internal.h" | |||
| #include "mpegvideo.h" | |||
| @@ -320,6 +321,7 @@ static av_cold int dnxhd_encode_init(AVCodecContext *avctx) | |||
| avctx->bits_per_raw_sample = ctx->cid_table->bit_depth; | |||
| ff_blockdsp_init(&ctx->bdsp, avctx); | |||
| ff_dct_common_init(&ctx->m); | |||
| ff_dct_encode_init(&ctx->m); | |||
| @@ -577,10 +579,10 @@ void dnxhd_get_blocks(DNXHDEncContext *ctx, int mb_x, int mb_y) | |||
| ptr_v + ctx->dct_uv_offset, | |||
| ctx->m.uvlinesize); | |||
| } else { | |||
| dsp->clear_block(ctx->blocks[4]); | |||
| dsp->clear_block(ctx->blocks[5]); | |||
| dsp->clear_block(ctx->blocks[6]); | |||
| dsp->clear_block(ctx->blocks[7]); | |||
| ctx->bdsp.clear_block(ctx->blocks[4]); | |||
| ctx->bdsp.clear_block(ctx->blocks[5]); | |||
| ctx->bdsp.clear_block(ctx->blocks[6]); | |||
| ctx->bdsp.clear_block(ctx->blocks[7]); | |||
| } | |||
| } else { | |||
| dsp->get_pixels(ctx->blocks[4], | |||
| @@ -41,6 +41,7 @@ typedef struct RCEntry { | |||
| typedef struct DNXHDEncContext { | |||
| AVClass *class; | |||
| BlockDSPContext bdsp; | |||
| MpegEncContext m; ///< Used for quantization dsp functions | |||
| int cid; | |||
| @@ -436,26 +436,6 @@ static int sum_abs_dctelem_c(int16_t *block) | |||
| return sum; | |||
| } | |||
| static void fill_block16_c(uint8_t *block, uint8_t value, int line_size, int h) | |||
| { | |||
| int i; | |||
| for (i = 0; i < h; i++) { | |||
| memset(block, value, 16); | |||
| block += line_size; | |||
| } | |||
| } | |||
| static void fill_block8_c(uint8_t *block, uint8_t value, int line_size, int h) | |||
| { | |||
| int i; | |||
| for (i = 0; i < h; i++) { | |||
| memset(block, value, 8); | |||
| block += line_size; | |||
| } | |||
| } | |||
| #define avg2(a, b) ((a + b + 1) >> 1) | |||
| #define avg4(a, b, c, d) ((a + b + c + d + 2) >> 2) | |||
| @@ -1517,16 +1497,6 @@ static void draw_edges_8_c(uint8_t *buf, int wrap, int width, int height, | |||
| memcpy(last_line + (i + 1) * wrap, last_line, width + w + w); | |||
| } | |||
| static void clear_block_8_c(int16_t *block) | |||
| { | |||
| memset(block, 0, sizeof(int16_t) * 64); | |||
| } | |||
| static void clear_blocks_8_c(int16_t *blocks) | |||
| { | |||
| memset(blocks, 0, sizeof(int16_t) * 6 * 64); | |||
| } | |||
| /* init static data */ | |||
| av_cold void ff_dsputil_static_init(void) | |||
| { | |||
| @@ -1641,9 +1611,6 @@ av_cold void ff_dsputil_init(DSPContext *c, AVCodecContext *avctx) | |||
| c->pix_sum = pix_sum_c; | |||
| c->pix_norm1 = pix_norm1_c; | |||
| c->fill_block_tab[0] = fill_block16_c; | |||
| c->fill_block_tab[1] = fill_block8_c; | |||
| /* TODO [0] 16 [1] 8 */ | |||
| c->pix_abs[0][0] = pix_abs16_c; | |||
| c->pix_abs[0][1] = pix_abs16_x2_c; | |||
| @@ -1705,9 +1672,6 @@ av_cold void ff_dsputil_init(DSPContext *c, AVCodecContext *avctx) | |||
| c->draw_edges = draw_edges_8_c; | |||
| c->clear_block = clear_block_8_c; | |||
| c->clear_blocks = clear_blocks_8_c; | |||
| switch (avctx->bits_per_raw_sample) { | |||
| case 9: | |||
| case 10: | |||
| @@ -51,13 +51,6 @@ void ff_gmc_c(uint8_t *dst, uint8_t *src, int stride, int h, int ox, int oy, | |||
| * !future video codecs might need functions with less strict alignment | |||
| */ | |||
| /* add and put pixel (decoding) | |||
| * Block sizes for op_pixels_func are 8x4,8x8 16x8 16x16. | |||
| * h for op_pixels_func is limited to { width / 2, width }, | |||
| * but never larger than 16 and never smaller than 4. */ | |||
| typedef void (*op_fill_func)(uint8_t *block /* align width (8 or 16) */, | |||
| uint8_t value, int line_size, int h); | |||
| struct MpegEncContext; | |||
| /* Motion estimation: | |||
| * h is limited to { width / 2, width, 2 * width }, | |||
| @@ -116,8 +109,7 @@ typedef struct DSPContext { | |||
| int stride, int h, int ox, int oy, | |||
| int dxx, int dxy, int dyx, int dyy, | |||
| int shift, int r, int width, int height); | |||
| void (*clear_block)(int16_t *block /* align 16 */); | |||
| void (*clear_blocks)(int16_t *blocks /* align 16 */); | |||
| int (*pix_sum)(uint8_t *pix, int line_size); | |||
| int (*pix_norm1)(uint8_t *pix, int line_size); | |||
| @@ -236,8 +228,6 @@ typedef struct DSPContext { | |||
| */ | |||
| void (*vector_clip_int32)(int32_t *dst, const int32_t *src, int32_t min, | |||
| int32_t max, unsigned int len); | |||
| op_fill_func fill_block_tab[2]; | |||
| } DSPContext; | |||
| void ff_dsputil_static_init(void); | |||
| @@ -44,6 +44,7 @@ | |||
| typedef struct MadContext { | |||
| AVCodecContext *avctx; | |||
| BlockDSPContext bdsp; | |||
| DSPContext dsp; | |||
| AVFrame *last_frame; | |||
| GetBitContext gb; | |||
| @@ -61,6 +62,7 @@ static av_cold int decode_init(AVCodecContext *avctx) | |||
| MadContext *s = avctx->priv_data; | |||
| s->avctx = avctx; | |||
| avctx->pix_fmt = AV_PIX_FMT_YUV420P; | |||
| ff_blockdsp_init(&s->bdsp, avctx); | |||
| ff_dsputil_init(&s->dsp, avctx); | |||
| ff_init_scantable_permutation(s->dsp.idct_permutation, FF_NO_IDCT_PERM); | |||
| ff_init_scantable(s->dsp.idct_permutation, &s->scantable, ff_zigzag_direct); | |||
| @@ -213,7 +215,7 @@ static int decode_mb(MadContext *s, AVFrame *frame, int inter) | |||
| if (s->last_frame->data[0]) | |||
| comp_block(s, frame, s->mb_x, s->mb_y, j, mv_x, mv_y, add); | |||
| } else { | |||
| s->dsp.clear_block(s->block); | |||
| s->bdsp.clear_block(s->block); | |||
| if(decode_block_intra(s, s->block) < 0) | |||
| return -1; | |||
| idct_put(s, frame, s->block, s->mb_x, s->mb_y, j); | |||
| @@ -27,6 +27,7 @@ | |||
| */ | |||
| #include "avcodec.h" | |||
| #include "blockdsp.h" | |||
| #include "get_bits.h" | |||
| #include "aandcttab.h" | |||
| #include "eaidct.h" | |||
| @@ -46,6 +47,7 @@ static av_cold int tqi_decode_init(AVCodecContext *avctx) | |||
| TqiContext *t = avctx->priv_data; | |||
| MpegEncContext *s = &t->s; | |||
| s->avctx = avctx; | |||
| ff_blockdsp_init(&s->bdsp, avctx); | |||
| ff_dsputil_init(&s->dsp, avctx); | |||
| ff_init_scantable_permutation(s->dsp.idct_permutation, FF_NO_IDCT_PERM); | |||
| ff_init_scantable(s->dsp.idct_permutation, &s->intra_scantable, ff_zigzag_direct); | |||
| @@ -59,7 +61,7 @@ static av_cold int tqi_decode_init(AVCodecContext *avctx) | |||
| static int tqi_decode_mb(MpegEncContext *s, int16_t (*block)[64]) | |||
| { | |||
| int n; | |||
| s->dsp.clear_blocks(block[0]); | |||
| s->bdsp.clear_blocks(block[0]); | |||
| for (n=0; n<6; n++) | |||
| if (ff_mpeg1_decode_block_intra(s, block[n], n) < 0) | |||
| return -1; | |||
| @@ -29,6 +29,7 @@ | |||
| #include "libavutil/intreadwrite.h" | |||
| #include "avcodec.h" | |||
| #include "blockdsp.h" | |||
| #include "bytestream.h" | |||
| #include "dsputil.h" | |||
| #include "get_bits.h" | |||
| @@ -72,6 +73,7 @@ static const uint8_t chroma_quant[64] = { | |||
| }; | |||
| typedef struct JPGContext { | |||
| BlockDSPContext bdsp; | |||
| DSPContext dsp; | |||
| ScanTable scantable; | |||
| @@ -150,6 +152,7 @@ static av_cold int jpg_init(AVCodecContext *avctx, JPGContext *c) | |||
| if (ret) | |||
| return ret; | |||
| ff_blockdsp_init(&c->bdsp, avctx); | |||
| ff_dsputil_init(&c->dsp, avctx); | |||
| ff_init_scantable(c->dsp.idct_permutation, &c->scantable, | |||
| ff_zigzag_direct); | |||
| @@ -193,7 +196,7 @@ static int jpg_decode_block(JPGContext *c, GetBitContext *gb, | |||
| const int is_chroma = !!plane; | |||
| const uint8_t *qmat = is_chroma ? chroma_quant : luma_quant; | |||
| c->dsp.clear_block(block); | |||
| c->bdsp.clear_block(block); | |||
| dc = get_vlc2(gb, c->dc_vlc[is_chroma].table, 9, 3); | |||
| if (dc < 0) | |||
| return AVERROR_INVALIDDATA; | |||
| @@ -259,7 +262,7 @@ static int jpg_decode_data(JPGContext *c, int width, int height, | |||
| for (i = 0; i < 3; i++) | |||
| c->prev_dc[i] = 1024; | |||
| bx = by = 0; | |||
| c->dsp.clear_blocks(c->block[0]); | |||
| c->bdsp.clear_blocks(c->block[0]); | |||
| for (mb_y = 0; mb_y < mb_h; mb_y++) { | |||
| for (mb_x = 0; mb_x < mb_w; mb_x++) { | |||
| if (mask && !mask[mb_x * 2] && !mask[mb_x * 2 + 1] && | |||
| @@ -442,7 +442,7 @@ static int h261_decode_mb(H261Context *h) | |||
| intra: | |||
| /* decode each block */ | |||
| if (s->mb_intra || HAS_CBP(h->mtype)) { | |||
| s->dsp.clear_blocks(s->block[0]); | |||
| s->bdsp.clear_blocks(s->block[0]); | |||
| for (i = 0; i < 6; i++) { | |||
| if (h261_decode_block(h, s->block[i], i, cbp & 32) < 0) | |||
| return SLICE_ERROR; | |||
| @@ -196,7 +196,7 @@ static inline int get_p_cbp(MpegEncContext * s, | |||
| for (i = 0; i < 6; i++) { | |||
| if (s->block_last_index[i] >= 0 && ((cbp >> (5 - i))&1)==0 ){ | |||
| s->block_last_index[i]= -1; | |||
| s->dsp.clear_block(s->block[i]); | |||
| s->bdsp.clear_block(s->block[i]); | |||
| } | |||
| } | |||
| }else{ | |||
| @@ -535,7 +535,7 @@ static int x8_decode_intra_mb(IntraX8Context* const w, const int chroma){ | |||
| int sign; | |||
| av_assert2(w->orient<12); | |||
| s->dsp.clear_block(s->block[0]); | |||
| s->bdsp.clear_block(s->block[0]); | |||
| if(chroma){ | |||
| dc_mode=2; | |||
| @@ -517,7 +517,7 @@ retry: | |||
| rl = &ff_rl_intra_aic; | |||
| i = 0; | |||
| s->gb= gb; | |||
| s->dsp.clear_block(block); | |||
| s->bdsp.clear_block(block); | |||
| goto retry; | |||
| } | |||
| av_log(s->avctx, AV_LOG_ERROR, "run overflow at %dx%d i:%d\n", s->mb_x, s->mb_y, s->mb_intra); | |||
| @@ -610,7 +610,7 @@ int ff_h263_decode_mb(MpegEncContext *s, | |||
| } | |||
| }while(cbpc == 20); | |||
| s->dsp.clear_blocks(s->block[0]); | |||
| s->bdsp.clear_blocks(s->block[0]); | |||
| dquant = cbpc & 8; | |||
| s->mb_intra = ((cbpc & 4) != 0); | |||
| @@ -705,7 +705,7 @@ int ff_h263_decode_mb(MpegEncContext *s, | |||
| s->mb_intra = IS_INTRA(mb_type); | |||
| if(HAS_CBP(mb_type)){ | |||
| s->dsp.clear_blocks(s->block[0]); | |||
| s->bdsp.clear_blocks(s->block[0]); | |||
| cbpc = get_vlc2(&s->gb, cbpc_b_vlc.table, CBPC_B_VLC_BITS, 1); | |||
| if(s->mb_intra){ | |||
| dquant = IS_QUANT(mb_type); | |||
| @@ -777,7 +777,7 @@ int ff_h263_decode_mb(MpegEncContext *s, | |||
| } | |||
| }while(cbpc == 8); | |||
| s->dsp.clear_blocks(s->block[0]); | |||
| s->bdsp.clear_blocks(s->block[0]); | |||
| dquant = cbpc & 4; | |||
| s->mb_intra = 1; | |||
| @@ -28,12 +28,12 @@ | |||
| #include "libavutil/intreadwrite.h" | |||
| #include "avcodec.h" | |||
| #include "dsputil.h" | |||
| #include "blockdsp.h" | |||
| #include "get_bits.h" | |||
| #include "internal.h" | |||
| typedef struct JvContext { | |||
| DSPContext dsp; | |||
| BlockDSPContext bdsp; | |||
| AVFrame *frame; | |||
| uint32_t palette[AVPALETTE_COUNT]; | |||
| int palette_has_changed; | |||
| @@ -48,7 +48,7 @@ static av_cold int decode_init(AVCodecContext *avctx) | |||
| return AVERROR(ENOMEM); | |||
| avctx->pix_fmt = AV_PIX_FMT_PAL8; | |||
| ff_dsputil_init(&s->dsp, avctx); | |||
| ff_blockdsp_init(&s->bdsp, avctx); | |||
| return 0; | |||
| } | |||
| @@ -113,14 +113,14 @@ static inline void decode4x4(GetBitContext *gb, uint8_t *dst, int linesize) | |||
| * Decode 8x8 block | |||
| */ | |||
| static inline void decode8x8(GetBitContext *gb, uint8_t *dst, int linesize, | |||
| DSPContext *dsp) | |||
| BlockDSPContext *bdsp) | |||
| { | |||
| int i, j, v[2]; | |||
| switch (get_bits(gb, 2)) { | |||
| case 1: | |||
| v[0] = get_bits(gb, 8); | |||
| dsp->fill_block_tab[1](dst, v[0], linesize, 8); | |||
| bdsp->fill_block_tab[1](dst, v[0], linesize, 8); | |||
| break; | |||
| case 2: | |||
| v[0] = get_bits(gb, 8); | |||
| @@ -167,7 +167,7 @@ static int decode_frame(AVCodecContext *avctx, void *data, int *got_frame, | |||
| for (i = 0; i < avctx->width; i += 8) | |||
| decode8x8(&gb, | |||
| s->frame->data[0] + j * s->frame->linesize[0] + i, | |||
| s->frame->linesize[0], &s->dsp); | |||
| s->frame->linesize[0], &s->bdsp); | |||
| buf += video_size; | |||
| } else if (video_type == 2) { | |||
| @@ -28,12 +28,14 @@ | |||
| */ | |||
| #include "avcodec.h" | |||
| #include "blockdsp.h" | |||
| #include "mpegvideo.h" | |||
| #include "mpeg12.h" | |||
| #include "thread.h" | |||
| typedef struct MDECContext { | |||
| AVCodecContext *avctx; | |||
| BlockDSPContext bdsp; | |||
| DSPContext dsp; | |||
| ThreadFrame frame; | |||
| GetBitContext gb; | |||
| @@ -123,7 +125,7 @@ static inline int decode_mb(MDECContext *a, int16_t block[6][64]) | |||
| int i, ret; | |||
| static const int block_index[6] = { 5, 4, 0, 1, 2, 3 }; | |||
| a->dsp.clear_blocks(block[0]); | |||
| a->bdsp.clear_blocks(block[0]); | |||
| for (i = 0; i < 6; i++) { | |||
| if ((ret = mdec_decode_block_intra(a, block[block_index[i]], | |||
| @@ -208,6 +210,7 @@ static av_cold int decode_init(AVCodecContext *avctx) | |||
| a->avctx = avctx; | |||
| ff_blockdsp_init(&a->bdsp, avctx); | |||
| ff_dsputil_init(&a->dsp, avctx); | |||
| ff_mpeg12_init_vlcs(); | |||
| ff_init_scantable(a->dsp.idct_permutation, &a->scantable, ff_zigzag_direct); | |||
| @@ -24,6 +24,7 @@ | |||
| #include <stdint.h> | |||
| #include "avcodec.h" | |||
| #include "blockdsp.h" | |||
| #include "internal.h" | |||
| #include "get_bits.h" | |||
| #include "bytestream.h" | |||
| @@ -52,6 +53,7 @@ typedef struct { | |||
| GetBitContext gb; | |||
| ScanTable scantable; | |||
| BlockDSPContext bdsp; | |||
| DSPContext dsp; | |||
| HpelDSPContext hdsp; | |||
| VLC vlc; | |||
| @@ -146,6 +148,7 @@ static av_cold int mimic_decode_init(AVCodecContext *avctx) | |||
| av_log(avctx, AV_LOG_ERROR, "error initializing vlc table\n"); | |||
| return ret; | |||
| } | |||
| ff_blockdsp_init(&ctx->bdsp, avctx); | |||
| ff_dsputil_init(&ctx->dsp, avctx); | |||
| ff_hpeldsp_init(&ctx->hdsp, avctx->flags); | |||
| ff_init_scantable(ctx->dsp.idct_permutation, &ctx->scantable, col_zag); | |||
| @@ -228,7 +231,7 @@ static int vlc_decode_block(MimicContext *ctx, int num_coeffs, int qscale) | |||
| int16_t *block = ctx->dct_block; | |||
| unsigned int pos; | |||
| ctx->dsp.clear_block(block); | |||
| ctx->bdsp.clear_block(block); | |||
| block[0] = get_bits(&ctx->gb, 8) << 3; | |||
| @@ -34,6 +34,7 @@ | |||
| #include "libavutil/avassert.h" | |||
| #include "libavutil/opt.h" | |||
| #include "avcodec.h" | |||
| #include "blockdsp.h" | |||
| #include "copy_block.h" | |||
| #include "internal.h" | |||
| #include "mjpeg.h" | |||
| @@ -106,6 +107,7 @@ av_cold int ff_mjpeg_decode_init(AVCodecContext *avctx) | |||
| } | |||
| s->avctx = avctx; | |||
| ff_blockdsp_init(&s->bdsp, avctx); | |||
| ff_hpeldsp_init(&s->hdsp, avctx->flags); | |||
| ff_dsputil_init(&s->dsp, avctx); | |||
| ff_init_scantable(s->dsp.idct_permutation, &s->scantable, ff_zigzag_direct); | |||
| @@ -652,7 +654,7 @@ static int decode_dc_progressive(MJpegDecodeContext *s, int16_t *block, | |||
| int16_t *quant_matrix, int Al) | |||
| { | |||
| int val; | |||
| s->dsp.clear_block(block); | |||
| s->bdsp.clear_block(block); | |||
| val = mjpeg_decode_dc(s, dc_index); | |||
| if (val == 0xfffff) { | |||
| av_log(s->avctx, AV_LOG_ERROR, "error dc\n"); | |||
| @@ -1217,7 +1219,7 @@ static int mjpeg_decode_scan(MJpegDecodeContext *s, int nb_components, int Ah, | |||
| linesize[c], s->avctx->lowres); | |||
| else { | |||
| s->dsp.clear_block(s->block); | |||
| s->bdsp.clear_block(s->block); | |||
| if (decode_block(s, s->block, i, | |||
| s->dc_index[i], s->ac_index[i], | |||
| s->quant_matrixes[s->quant_sindex[i]]) < 0) { | |||
| @@ -34,6 +34,7 @@ | |||
| #include "libavutil/stereo3d.h" | |||
| #include "avcodec.h" | |||
| #include "blockdsp.h" | |||
| #include "get_bits.h" | |||
| #include "dsputil.h" | |||
| #include "hpeldsp.h" | |||
| @@ -104,6 +105,7 @@ typedef struct MJpegDecodeContext { | |||
| uint64_t coefs_finished[MAX_COMPONENTS]; ///< bitmask of which coefs have been completely decoded (progressive mode) | |||
| int palette_index; | |||
| ScanTable scantable; | |||
| BlockDSPContext bdsp; | |||
| DSPContext dsp; | |||
| HpelDSPContext hdsp; | |||
| @@ -799,10 +799,10 @@ static int mpeg_decode_mb(MpegEncContext *s, int16_t block[12][64]) | |||
| av_dlog(s->avctx, "mb_type=%x\n", mb_type); | |||
| // motion_type = 0; /* avoid warning */ | |||
| if (IS_INTRA(mb_type)) { | |||
| s->dsp.clear_blocks(s->block[0]); | |||
| s->bdsp.clear_blocks(s->block[0]); | |||
| if (!s->chroma_y_shift) | |||
| s->dsp.clear_blocks(s->block[6]); | |||
| s->bdsp.clear_blocks(s->block[6]); | |||
| /* compute DCT type */ | |||
| // FIXME: add an interlaced_dct coded var? | |||
| @@ -1039,13 +1039,13 @@ static int mpeg_decode_mb(MpegEncContext *s, int16_t block[12][64]) | |||
| s->mb_intra = 0; | |||
| if (HAS_CBP(mb_type)) { | |||
| s->dsp.clear_blocks(s->block[0]); | |||
| s->bdsp.clear_blocks(s->block[0]); | |||
| cbp = get_vlc2(&s->gb, ff_mb_pat_vlc.table, MB_PAT_VLC_BITS, 1); | |||
| if (mb_block_count > 6) { | |||
| cbp <<= mb_block_count - 6; | |||
| cbp |= get_bits(&s->gb, mb_block_count - 6); | |||
| s->dsp.clear_blocks(s->block[6]); | |||
| s->bdsp.clear_blocks(s->block[6]); | |||
| } | |||
| if (cbp <= 0) { | |||
| av_log(s->avctx, AV_LOG_ERROR, | |||
| @@ -1261,7 +1261,7 @@ static int mpeg4_decode_partitioned_mb(MpegEncContext *s, int16_t block[6][64]) | |||
| if (!IS_SKIP(mb_type)) { | |||
| int i; | |||
| s->dsp.clear_blocks(s->block[0]); | |||
| s->bdsp.clear_blocks(s->block[0]); | |||
| /* decode each block */ | |||
| for (i = 0; i < 6; i++) { | |||
| if (mpeg4_decode_block(ctx, block[i], i, cbp & 32, s->mb_intra, ctx->rvlc) < 0) { | |||
| @@ -1339,7 +1339,7 @@ static int mpeg4_decode_mb(MpegEncContext *s, int16_t block[6][64]) | |||
| } | |||
| } while (cbpc == 20); | |||
| s->dsp.clear_blocks(s->block[0]); | |||
| s->bdsp.clear_blocks(s->block[0]); | |||
| dquant = cbpc & 8; | |||
| s->mb_intra = ((cbpc & 4) != 0); | |||
| if (s->mb_intra) | |||
| @@ -1485,7 +1485,7 @@ static int mpeg4_decode_mb(MpegEncContext *s, int16_t block[6][64]) | |||
| if (modb2) { | |||
| cbp = 0; | |||
| } else { | |||
| s->dsp.clear_blocks(s->block[0]); | |||
| s->bdsp.clear_blocks(s->block[0]); | |||
| cbp = get_bits(&s->gb, 6); | |||
| } | |||
| @@ -1620,7 +1620,7 @@ intra: | |||
| if (!s->progressive_sequence) | |||
| s->interlaced_dct = get_bits1(&s->gb); | |||
| s->dsp.clear_blocks(s->block[0]); | |||
| s->bdsp.clear_blocks(s->block[0]); | |||
| /* decode each block */ | |||
| for (i = 0; i < 6; i++) { | |||
| if (mpeg4_decode_block(ctx, block[i], i, cbp & 32, 1, 0) < 0) | |||
| @@ -485,7 +485,7 @@ static inline int get_b_cbp(MpegEncContext *s, int16_t block[6][64], | |||
| for (i = 0; i < 6; i++) { | |||
| if (s->block_last_index[i] >= 0 && ((cbp >> (5 - i)) & 1) == 0) { | |||
| s->block_last_index[i] = -1; | |||
| s->dsp.clear_block(s->block[i]); | |||
| s->bdsp.clear_block(s->block[i]); | |||
| } | |||
| } | |||
| } else { | |||
| @@ -33,6 +33,7 @@ | |||
| #include "libavutil/internal.h" | |||
| #include "libavutil/timer.h" | |||
| #include "avcodec.h" | |||
| #include "blockdsp.h" | |||
| #include "dsputil.h" | |||
| #include "h264chroma.h" | |||
| #include "internal.h" | |||
| @@ -352,7 +353,7 @@ static void mpeg_er_decode_mb(void *opaque, int ref, int mv_dir, int mv_type, | |||
| ff_init_block_index(s); | |||
| ff_update_block_index(s); | |||
| s->dsp.clear_blocks(s->block[0]); | |||
| s->bdsp.clear_blocks(s->block[0]); | |||
| s->dest[0] = s->current_picture.f->data[0] + (s->mb_y * 16 * s->linesize) + s->mb_x * 16; | |||
| s->dest[1] = s->current_picture.f->data[1] + (s->mb_y * (16 >> s->chroma_y_shift) * s->uvlinesize) + s->mb_x * (16 >> s->chroma_x_shift); | |||
| @@ -378,6 +379,7 @@ static void gray8(uint8_t *dst, const uint8_t *src, ptrdiff_t linesize, int h) | |||
| /* init common dct for both encoder and decoder */ | |||
| av_cold int ff_dct_common_init(MpegEncContext *s) | |||
| { | |||
| ff_blockdsp_init(&s->bdsp, s->avctx); | |||
| ff_dsputil_init(&s->dsp, s->avctx); | |||
| ff_h264chroma_init(&s->h264chroma, 8); //for lowres | |||
| ff_hpeldsp_init(&s->hdsp, s->avctx->flags); | |||
| @@ -29,6 +29,7 @@ | |||
| #define AVCODEC_MPEGVIDEO_H | |||
| #include "avcodec.h" | |||
| #include "blockdsp.h" | |||
| #include "dsputil.h" | |||
| #include "error_resilience.h" | |||
| #include "get_bits.h" | |||
| @@ -356,6 +357,7 @@ typedef struct MpegEncContext { | |||
| int unrestricted_mv; ///< mv can point outside of the coded picture | |||
| int h263_long_vectors; ///< use horrible h263v1 long vector mode | |||
| BlockDSPContext bdsp; | |||
| DSPContext dsp; ///< pointers for accelerated dsp functions | |||
| H264ChromaContext h264chroma; | |||
| HpelDSPContext hdsp; | |||
| @@ -179,7 +179,7 @@ static int msmpeg4v12_decode_mb(MpegEncContext *s, int16_t block[6][64]) | |||
| *mb_type_ptr = MB_TYPE_INTRA; | |||
| } | |||
| s->dsp.clear_blocks(s->block[0]); | |||
| s->bdsp.clear_blocks(s->block[0]); | |||
| for (i = 0; i < 6; i++) { | |||
| if (ff_msmpeg4_decode_block(s, block[i], i, (cbp >> (5 - i)) & 1, NULL) < 0) | |||
| { | |||
| @@ -270,7 +270,7 @@ static int msmpeg4v34_decode_mb(MpegEncContext *s, int16_t block[6][64]) | |||
| } | |||
| } | |||
| s->dsp.clear_blocks(s->block[0]); | |||
| s->bdsp.clear_blocks(s->block[0]); | |||
| for (i = 0; i < 6; i++) { | |||
| if (ff_msmpeg4_decode_block(s, block[i], i, (cbp >> (5 - i)) & 1, NULL) < 0) | |||
| { | |||
| @@ -1,5 +1,6 @@ | |||
| OBJS += ppc/fmtconvert_altivec.o \ | |||
| OBJS-$(CONFIG_BLOCKDSP) += ppc/blockdsp.o | |||
| OBJS-$(CONFIG_DSPUTIL) += ppc/dsputil_ppc.o | |||
| OBJS-$(CONFIG_FFT) += ppc/fft_altivec.o | |||
| OBJS-$(CONFIG_H264CHROMA) += ppc/h264chroma_init.o | |||
| @@ -0,0 +1,169 @@ | |||
| /* | |||
| * Copyright (c) 2002 Brian Foley | |||
| * Copyright (c) 2002 Dieter Shirley | |||
| * Copyright (c) 2003-2004 Romain Dolbeau <romain@dolbeau.org> | |||
| * | |||
| * This file is part of FFmpeg. | |||
| * | |||
| * FFmpeg is free software; you can redistribute it and/or | |||
| * modify it under the terms of the GNU Lesser General Public | |||
| * License as published by the Free Software Foundation; either | |||
| * version 2.1 of the License, or (at your option) any later version. | |||
| * | |||
| * FFmpeg is distributed in the hope that it will be useful, | |||
| * but WITHOUT ANY WARRANTY; without even the implied warranty of | |||
| * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU | |||
| * Lesser General Public License for more details. | |||
| * | |||
| * You should have received a copy of the GNU Lesser General Public | |||
| * License along with FFmpeg; if not, write to the Free Software | |||
| * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA | |||
| */ | |||
| #include "config.h" | |||
| #if HAVE_ALTIVEC_H | |||
| #include <altivec.h> | |||
| #endif | |||
| #include <string.h> | |||
| #include "libavutil/attributes.h" | |||
| #include "libavutil/cpu.h" | |||
| #include "libavutil/mem.h" | |||
| #include "libavutil/ppc/cpu.h" | |||
| #include "libavutil/ppc/types_altivec.h" | |||
| #include "libavcodec/blockdsp.h" | |||
| /* ***** WARNING ***** WARNING ***** WARNING ***** */ | |||
| /* | |||
| * clear_blocks_dcbz32_ppc will not work properly on PowerPC processors with | |||
| * a cache line size not equal to 32 bytes. Fortunately all processors used | |||
| * by Apple up to at least the 7450 (AKA second generation G4) use 32-byte | |||
| * cache lines. This is due to the use of the 'dcbz' instruction. It simply | |||
| * clears a single cache line to zero, so you need to know the cache line | |||
| * size to use it! It's absurd, but it's fast... | |||
| * | |||
| * update 24/06/2003: Apple released the G5 yesterday, with a PPC970. | |||
| * cache line size: 128 bytes. Oups. | |||
| * The semantics of dcbz was changed, it always clears 32 bytes. So the function | |||
| * below will work, but will be slow. So I fixed check_dcbz_effect to use dcbzl, | |||
| * which is defined to clear a cache line (as dcbz before). So we can still | |||
| * distinguish, and use dcbz (32 bytes) or dcbzl (one cache line) as required. | |||
| * | |||
| * see <http://developer.apple.com/technotes/tn/tn2087.html> | |||
| * and <http://developer.apple.com/technotes/tn/tn2086.html> | |||
| */ | |||
| static void clear_blocks_dcbz32_ppc(int16_t *blocks) | |||
| { | |||
| register int misal = (unsigned long) blocks & 0x00000010, i = 0; | |||
| if (misal) { | |||
| ((unsigned long *) blocks)[0] = 0L; | |||
| ((unsigned long *) blocks)[1] = 0L; | |||
| ((unsigned long *) blocks)[2] = 0L; | |||
| ((unsigned long *) blocks)[3] = 0L; | |||
| i += 16; | |||
| } | |||
| for (; i < sizeof(int16_t) * 6 * 64 - 31; i += 32) | |||
| __asm__ volatile ("dcbz %0,%1" :: "b" (blocks), "r" (i) : "memory"); | |||
| if (misal) { | |||
| ((unsigned long *) blocks)[188] = 0L; | |||
| ((unsigned long *) blocks)[189] = 0L; | |||
| ((unsigned long *) blocks)[190] = 0L; | |||
| ((unsigned long *) blocks)[191] = 0L; | |||
| i += 16; | |||
| } | |||
| } | |||
| /* Same as above, when dcbzl clears a whole 128 bytes cache line | |||
| * i.e. the PPC970 AKA G5. */ | |||
| static void clear_blocks_dcbz128_ppc(int16_t *blocks) | |||
| { | |||
| #if HAVE_DCBZL | |||
| register int misal = (unsigned long) blocks & 0x0000007f, i = 0; | |||
| if (misal) { | |||
| /* We could probably also optimize this case, | |||
| * but there's not much point as the machines | |||
| * aren't available yet (2003-06-26). */ | |||
| memset(blocks, 0, sizeof(int16_t) * 6 * 64); | |||
| } else { | |||
| for (; i < sizeof(int16_t) * 6 * 64; i += 128) | |||
| __asm__ volatile ("dcbzl %0,%1" :: "b" (blocks), "r" (i) : "memory"); | |||
| } | |||
| #else | |||
| memset(blocks, 0, sizeof(int16_t) * 6 * 64); | |||
| #endif | |||
| } | |||
| /* Check dcbz report how many bytes are set to 0 by dcbz. */ | |||
| /* update 24/06/2003: Replace dcbz by dcbzl to get the intended effect | |||
| * (Apple "fixed" dcbz). Unfortunately this cannot be used unless the | |||
| * assembler knows about dcbzl ... */ | |||
| static long check_dcbzl_effect(void) | |||
| { | |||
| long count = 0; | |||
| #if HAVE_DCBZL | |||
| register char *fakedata = av_malloc(1024); | |||
| register char *fakedata_middle; | |||
| register long zero = 0, i = 0; | |||
| if (!fakedata) | |||
| return 0L; | |||
| fakedata_middle = fakedata + 512; | |||
| memset(fakedata, 0xFF, 1024); | |||
| /* Below the constraint "b" seems to mean "address base register" | |||
| * in gcc-3.3 / RS/6000 speaks. Seems to avoid using r0, so.... */ | |||
| __asm__ volatile ("dcbzl %0, %1" :: "b" (fakedata_middle), "r" (zero)); | |||
| for (i = 0; i < 1024; i++) | |||
| if (fakedata[i] == (char) 0) | |||
| count++; | |||
| av_free(fakedata); | |||
| #endif | |||
| return count; | |||
| } | |||
| #if HAVE_ALTIVEC | |||
| static void clear_block_altivec(int16_t *block) | |||
| { | |||
| LOAD_ZERO; | |||
| vec_st(zero_s16v, 0, block); | |||
| vec_st(zero_s16v, 16, block); | |||
| vec_st(zero_s16v, 32, block); | |||
| vec_st(zero_s16v, 48, block); | |||
| vec_st(zero_s16v, 64, block); | |||
| vec_st(zero_s16v, 80, block); | |||
| vec_st(zero_s16v, 96, block); | |||
| vec_st(zero_s16v, 112, block); | |||
| } | |||
| #endif /* HAVE_ALTIVEC */ | |||
| av_cold void ff_blockdsp_init_ppc(BlockDSPContext *c, unsigned high_bit_depth) | |||
| { | |||
| // common optimizations whether AltiVec is available or not | |||
| if (!high_bit_depth) { | |||
| switch (check_dcbzl_effect()) { | |||
| case 32: | |||
| c->clear_blocks = clear_blocks_dcbz32_ppc; | |||
| break; | |||
| case 128: | |||
| c->clear_blocks = clear_blocks_dcbz128_ppc; | |||
| break; | |||
| default: | |||
| break; | |||
| } | |||
| } | |||
| #if HAVE_ALTIVEC | |||
| if (!PPC_ALTIVEC(av_get_cpu_flags())) | |||
| return; | |||
| if (!high_bit_depth) | |||
| c->clear_block = clear_block_altivec; | |||
| #endif /* HAVE_ALTIVEC */ | |||
| } | |||
| @@ -558,19 +558,6 @@ static void diff_pixels_altivec(int16_t *restrict block, const uint8_t *s1, | |||
| } | |||
| } | |||
| static void clear_block_altivec(int16_t *block) | |||
| { | |||
| LOAD_ZERO; | |||
| vec_st(zero_s16v, 0, block); | |||
| vec_st(zero_s16v, 16, block); | |||
| vec_st(zero_s16v, 32, block); | |||
| vec_st(zero_s16v, 48, block); | |||
| vec_st(zero_s16v, 64, block); | |||
| vec_st(zero_s16v, 80, block); | |||
| vec_st(zero_s16v, 96, block); | |||
| vec_st(zero_s16v, 112, block); | |||
| } | |||
| static int hadamard8_diff8x8_altivec(MpegEncContext *s, uint8_t *dst, | |||
| uint8_t *src, int stride, int h) | |||
| { | |||
| @@ -931,7 +918,6 @@ av_cold void ff_dsputil_init_altivec(DSPContext *c, AVCodecContext *avctx, | |||
| if (!high_bit_depth) { | |||
| c->get_pixels = get_pixels_altivec; | |||
| c->clear_block = clear_block_altivec; | |||
| } | |||
| c->hadamard8_diff[0] = hadamard8_diff16_altivec; | |||
| @@ -24,125 +24,15 @@ | |||
| #include "libavutil/attributes.h" | |||
| #include "libavutil/cpu.h" | |||
| #include "libavutil/mem.h" | |||
| #include "libavutil/ppc/cpu.h" | |||
| #include "libavcodec/avcodec.h" | |||
| #include "libavcodec/dsputil.h" | |||
| #include "dsputil_altivec.h" | |||
| /* ***** WARNING ***** WARNING ***** WARNING ***** */ | |||
| /* | |||
| * clear_blocks_dcbz32_ppc will not work properly on PowerPC processors with | |||
| * a cache line size not equal to 32 bytes. Fortunately all processors used | |||
| * by Apple up to at least the 7450 (AKA second generation G4) use 32-byte | |||
| * cache lines. This is due to the use of the 'dcbz' instruction. It simply | |||
| * clears a single cache line to zero, so you need to know the cache line | |||
| * size to use it! It's absurd, but it's fast... | |||
| * | |||
| * update 24/06/2003: Apple released the G5 yesterday, with a PPC970. | |||
| * cache line size: 128 bytes. Oups. | |||
| * The semantics of dcbz was changed, it always clears 32 bytes. So the function | |||
| * below will work, but will be slow. So I fixed check_dcbz_effect to use dcbzl, | |||
| * which is defined to clear a cache line (as dcbz before). So we can still | |||
| * distinguish, and use dcbz (32 bytes) or dcbzl (one cache line) as required. | |||
| * | |||
| * see <http://developer.apple.com/technotes/tn/tn2087.html> | |||
| * and <http://developer.apple.com/technotes/tn/tn2086.html> | |||
| */ | |||
| static void clear_blocks_dcbz32_ppc(int16_t *blocks) | |||
| { | |||
| register int misal = (unsigned long) blocks & 0x00000010, i = 0; | |||
| if (misal) { | |||
| ((unsigned long *) blocks)[0] = 0L; | |||
| ((unsigned long *) blocks)[1] = 0L; | |||
| ((unsigned long *) blocks)[2] = 0L; | |||
| ((unsigned long *) blocks)[3] = 0L; | |||
| i += 16; | |||
| } | |||
| for (; i < sizeof(int16_t) * 6 * 64 - 31; i += 32) | |||
| __asm__ volatile ("dcbz %0,%1" :: "b" (blocks), "r" (i) : "memory"); | |||
| if (misal) { | |||
| ((unsigned long *) blocks)[188] = 0L; | |||
| ((unsigned long *) blocks)[189] = 0L; | |||
| ((unsigned long *) blocks)[190] = 0L; | |||
| ((unsigned long *) blocks)[191] = 0L; | |||
| i += 16; | |||
| } | |||
| } | |||
| /* Same as above, when dcbzl clears a whole 128 bytes cache line | |||
| * i.e. the PPC970 AKA G5. */ | |||
| static void clear_blocks_dcbz128_ppc(int16_t *blocks) | |||
| { | |||
| #if HAVE_DCBZL | |||
| register int misal = (unsigned long) blocks & 0x0000007f, i = 0; | |||
| if (misal) { | |||
| /* We could probably also optimize this case, | |||
| * but there's not much point as the machines | |||
| * aren't available yet (2003-06-26). */ | |||
| memset(blocks, 0, sizeof(int16_t) * 6 * 64); | |||
| } else { | |||
| for (; i < sizeof(int16_t) * 6 * 64; i += 128) | |||
| __asm__ volatile ("dcbzl %0,%1" :: "b" (blocks), "r" (i) : "memory"); | |||
| } | |||
| #else | |||
| memset(blocks, 0, sizeof(int16_t) * 6 * 64); | |||
| #endif | |||
| } | |||
| /* Check dcbz report how many bytes are set to 0 by dcbz. */ | |||
| /* update 24/06/2003: Replace dcbz by dcbzl to get the intended effect | |||
| * (Apple "fixed" dcbz). Unfortunately this cannot be used unless the | |||
| * assembler knows about dcbzl ... */ | |||
| static long check_dcbzl_effect(void) | |||
| { | |||
| long count = 0; | |||
| #if HAVE_DCBZL | |||
| register char *fakedata = av_malloc(1024); | |||
| register char *fakedata_middle; | |||
| register long zero = 0, i = 0; | |||
| if (!fakedata) | |||
| return 0L; | |||
| fakedata_middle = fakedata + 512; | |||
| memset(fakedata, 0xFF, 1024); | |||
| /* Below the constraint "b" seems to mean "address base register" | |||
| * in gcc-3.3 / RS/6000 speaks. Seems to avoid using r0, so.... */ | |||
| __asm__ volatile ("dcbzl %0, %1" :: "b" (fakedata_middle), "r" (zero)); | |||
| for (i = 0; i < 1024; i++) | |||
| if (fakedata[i] == (char) 0) | |||
| count++; | |||
| av_free(fakedata); | |||
| #endif | |||
| return count; | |||
| } | |||
| av_cold void ff_dsputil_init_ppc(DSPContext *c, AVCodecContext *avctx, | |||
| unsigned high_bit_depth) | |||
| { | |||
| int mm_flags = av_get_cpu_flags(); | |||
| // common optimizations whether AltiVec is available or not | |||
| if (!high_bit_depth) { | |||
| switch (check_dcbzl_effect()) { | |||
| case 32: | |||
| c->clear_blocks = clear_blocks_dcbz32_ppc; | |||
| break; | |||
| case 128: | |||
| c->clear_blocks = clear_blocks_dcbz128_ppc; | |||
| break; | |||
| default: | |||
| break; | |||
| } | |||
| } | |||
| if (PPC_ALTIVEC(mm_flags)) { | |||
| ff_dsputil_init_altivec(c, avctx, high_bit_depth); | |||
| ff_int_init_altivec(c, avctx); | |||
| @@ -23,6 +23,7 @@ | |||
| #define AVCODEC_PRORESDEC_H | |||
| #include "dsputil.h" | |||
| #include "blockdsp.h" | |||
| #include "proresdsp.h" | |||
| typedef struct { | |||
| @@ -36,6 +37,7 @@ typedef struct { | |||
| typedef struct { | |||
| DSPContext dsp; | |||
| BlockDSPContext bdsp; | |||
| ProresDSPContext prodsp; | |||
| AVFrame *frame; | |||
| int frame_type; ///< 0 = progressive, 1 = tff, 2 = bff | |||
| @@ -50,6 +50,7 @@ static av_cold int decode_init(AVCodecContext *avctx) | |||
| avctx->bits_per_raw_sample = 10; | |||
| ff_dsputil_init(&ctx->dsp, avctx); | |||
| ff_blockdsp_init(&ctx->bdsp, avctx); | |||
| ff_proresdsp_init(&ctx->prodsp, avctx); | |||
| ff_init_scantable_permutation(idct_permutation, | |||
| @@ -366,7 +367,7 @@ static int decode_slice_luma(AVCodecContext *avctx, SliceContext *slice, | |||
| int ret; | |||
| for (i = 0; i < blocks_per_slice; i++) | |||
| ctx->dsp.clear_block(blocks+(i<<6)); | |||
| ctx->bdsp.clear_block(blocks+(i<<6)); | |||
| init_get_bits(&gb, buf, buf_size << 3); | |||
| @@ -399,7 +400,7 @@ static int decode_slice_chroma(AVCodecContext *avctx, SliceContext *slice, | |||
| int ret; | |||
| for (i = 0; i < blocks_per_slice; i++) | |||
| ctx->dsp.clear_block(blocks+(i<<6)); | |||
| ctx->bdsp.clear_block(blocks+(i<<6)); | |||
| init_get_bits(&gb, buf, buf_size << 3); | |||
| @@ -478,7 +479,7 @@ static void decode_slice_alpha(ProresContext *ctx, | |||
| int16_t *block; | |||
| for (i = 0; i < blocks_per_slice<<2; i++) | |||
| ctx->dsp.clear_block(blocks+(i<<6)); | |||
| ctx->bdsp.clear_block(blocks+(i<<6)); | |||
| init_get_bits(&gb, buf, buf_size << 3); | |||
| @@ -3016,7 +3016,7 @@ static int vc1_decode_intra_block(VC1Context *v, int16_t block[64], int n, | |||
| int scale; | |||
| int q1, q2 = 0; | |||
| s->dsp.clear_block(block); | |||
| s->bdsp.clear_block(block); | |||
| /* XXX: Guard against dumb values of mquant */ | |||
| mquant = (mquant < 1) ? 0 : ((mquant > 31) ? 31 : mquant); | |||
| @@ -3223,7 +3223,7 @@ static int vc1_decode_p_block(VC1Context *v, int16_t block[64], int n, | |||
| int ttblk = ttmb & 7; | |||
| int pat = 0; | |||
| s->dsp.clear_block(block); | |||
| s->bdsp.clear_block(block); | |||
| if (ttmb == -1) { | |||
| ttblk = ff_vc1_ttblk_to_tt[v->tt_index][get_vlc2(gb, ff_vc1_ttblk_vlc[v->tt_index].table, VC1_TTBLK_VLC_BITS, 1)]; | |||
| @@ -4801,7 +4801,7 @@ static void vc1_decode_i_blocks(VC1Context *v) | |||
| dst[3] = dst[2] + 8; | |||
| dst[4] = s->dest[1]; | |||
| dst[5] = s->dest[2]; | |||
| s->dsp.clear_blocks(s->block[0]); | |||
| s->bdsp.clear_blocks(s->block[0]); | |||
| mb_pos = s->mb_x + s->mb_y * s->mb_width; | |||
| s->current_picture.mb_type[mb_pos] = MB_TYPE_INTRA; | |||
| s->current_picture.qscale_table[mb_pos] = v->pq; | |||
| @@ -4941,7 +4941,7 @@ static void vc1_decode_i_blocks_adv(VC1Context *v) | |||
| for (;s->mb_x < s->mb_width; s->mb_x++) { | |||
| int16_t (*block)[64] = v->block[v->cur_blk_idx]; | |||
| ff_update_block_index(s); | |||
| s->dsp.clear_blocks(block[0]); | |||
| s->bdsp.clear_blocks(block[0]); | |||
| mb_pos = s->mb_x + s->mb_y * s->mb_stride; | |||
| s->current_picture.mb_type[mb_pos + v->mb_off] = MB_TYPE_INTRA; | |||
| s->current_picture.motion_val[1][s->block_index[0] + v->blocks_off][0] = 0; | |||
| @@ -5626,6 +5626,7 @@ static av_cold int vc1_decode_init(AVCodecContext *avctx) | |||
| // That this is necessary might indicate a bug. | |||
| ff_vc1_decode_end(avctx); | |||
| ff_blockdsp_init(&s->bdsp, avctx); | |||
| ff_h264chroma_init(&v->h264chroma, 8); | |||
| ff_qpeldsp_init(&s->qdsp); | |||
| @@ -28,6 +28,7 @@ | |||
| av_cold void ff_wmv2_common_init(Wmv2Context * w){ | |||
| MpegEncContext * const s= &w->s; | |||
| ff_blockdsp_init(&s->bdsp, s->avctx); | |||
| ff_wmv2dsp_init(&w->wdsp); | |||
| s->dsp.idct_permutation_type = w->wdsp.idct_perm; | |||
| ff_init_scantable_permutation(s->dsp.idct_permutation, | |||
| @@ -60,12 +61,12 @@ static void wmv2_add_block(Wmv2Context *w, int16_t *block1, uint8_t *dst, int st | |||
| case 1: | |||
| ff_simple_idct84_add(dst , stride, block1); | |||
| ff_simple_idct84_add(dst + 4*stride, stride, w->abt_block2[n]); | |||
| s->dsp.clear_block(w->abt_block2[n]); | |||
| s->bdsp.clear_block(w->abt_block2[n]); | |||
| break; | |||
| case 2: | |||
| ff_simple_idct48_add(dst , stride, block1); | |||
| ff_simple_idct48_add(dst + 4 , stride, w->abt_block2[n]); | |||
| s->dsp.clear_block(w->abt_block2[n]); | |||
| s->bdsp.clear_block(w->abt_block2[n]); | |||
| break; | |||
| default: | |||
| av_log(s->avctx, AV_LOG_ERROR, "internal error in WMV2 abt\n"); | |||
| @@ -385,7 +385,7 @@ int ff_wmv2_decode_mb(MpegEncContext *s, int16_t block[6][64]) | |||
| wmv2_pred_motion(w, &mx, &my); | |||
| if(cbp){ | |||
| s->dsp.clear_blocks(s->block[0]); | |||
| s->bdsp.clear_blocks(s->block[0]); | |||
| if(s->per_mb_rl_table){ | |||
| s->rl_table_index = decode012(&s->gb); | |||
| s->rl_chroma_table_index = s->rl_table_index; | |||
| @@ -431,7 +431,7 @@ int ff_wmv2_decode_mb(MpegEncContext *s, int16_t block[6][64]) | |||
| s->rl_chroma_table_index = s->rl_table_index; | |||
| } | |||
| s->dsp.clear_blocks(s->block[0]); | |||
| s->bdsp.clear_blocks(s->block[0]); | |||
| for (i = 0; i < 6; i++) { | |||
| if (ff_msmpeg4_decode_block(s, block[i], i, (cbp >> (5 - i)) & 1, NULL) < 0) | |||
| { | |||
| @@ -53,6 +53,7 @@ OBJS-$(CONFIG_VP8_DECODER) += x86/vp8dsp_init.o | |||
| OBJS-$(CONFIG_VP9_DECODER) += x86/vp9dsp_init.o | |||
| OBJS-$(CONFIG_WEBP_DECODER) += x86/vp8dsp_init.o | |||
| MMX-OBJS-$(CONFIG_BLOCKDSP) += x86/blockdsp_mmx.o | |||
| MMX-OBJS-$(CONFIG_DSPUTIL) += x86/dsputil_mmx.o \ | |||
| x86/idct_mmx_xvid.o \ | |||
| x86/idct_sse2_xvid.o \ | |||
| @@ -0,0 +1,61 @@ | |||
| /* | |||
| * This file is part of FFmpeg. | |||
| * | |||
| * FFmpeg is free software; you can redistribute it and/or | |||
| * modify it under the terms of the GNU Lesser General Public | |||
| * License as published by the Free Software Foundation; either | |||
| * version 2.1 of the License, or (at your option) any later version. | |||
| * | |||
| * FFmpeg is distributed in the hope that it will be useful, | |||
| * but WITHOUT ANY WARRANTY; without even the implied warranty of | |||
| * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU | |||
| * Lesser General Public License for more details. | |||
| * | |||
| * You should have received a copy of the GNU Lesser General Public | |||
| * License along with FFmpeg; if not, write to the Free Software | |||
| * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA | |||
| */ | |||
| #include <stdint.h> | |||
| #include "config.h" | |||
| #include "libavutil/attributes.h" | |||
| #include "libavutil/internal.h" | |||
| #include "libavutil/cpu.h" | |||
| #include "libavutil/x86/asm.h" | |||
| #include "libavutil/x86/cpu.h" | |||
| #include "libavcodec/blockdsp.h" | |||
| #include "libavcodec/version.h" | |||
| void ff_clear_block_mmx(int16_t *block); | |||
| void ff_clear_block_sse(int16_t *block); | |||
| void ff_clear_blocks_mmx(int16_t *blocks); | |||
| void ff_clear_blocks_sse(int16_t *blocks); | |||
| #if FF_API_XVMC | |||
| av_cold void ff_blockdsp_init_x86(BlockDSPContext *c, unsigned high_bit_depth, | |||
| AVCodecContext *avctx) | |||
| #else | |||
| av_cold void ff_blockdsp_init_x86(BlockDSPContext *c, unsigned high_bit_depth) | |||
| #endif /* FF_API_XVMC */ | |||
| { | |||
| #if HAVE_YASM | |||
| int cpu_flags = av_get_cpu_flags(); | |||
| if (!high_bit_depth) { | |||
| if (INLINE_MMX(cpu_flags)) { | |||
| c->clear_block = ff_clear_block_mmx; | |||
| c->clear_blocks = ff_clear_blocks_mmx; | |||
| } | |||
| /* XvMCCreateBlocks() may not allocate 16-byte aligned blocks */ | |||
| if (CONFIG_XVMC && avctx->hwaccel && avctx->hwaccel->decode_mb) | |||
| return; | |||
| if (INLINE_SSE(cpu_flags)) { | |||
| c->clear_block = ff_clear_block_sse; | |||
| c->clear_blocks = ff_clear_blocks_sse; | |||
| } | |||
| } | |||
| #endif /* HAVE_YASM */ | |||
| } | |||
| @@ -22,12 +22,10 @@ | |||
| #include "config.h" | |||
| #include "libavutil/attributes.h" | |||
| #include "libavutil/cpu.h" | |||
| #include "libavutil/internal.h" | |||
| #include "libavutil/x86/cpu.h" | |||
| #include "libavcodec/avcodec.h" | |||
| #include "libavcodec/dsputil.h" | |||
| #include "libavcodec/simple_idct.h" | |||
| #include "libavcodec/version.h" | |||
| #include "dsputil_x86.h" | |||
| #include "idct_xvid.h" | |||
| @@ -82,10 +80,6 @@ static av_cold void dsputil_init_mmx(DSPContext *c, AVCodecContext *avctx, | |||
| #endif /* HAVE_MMX_INLINE */ | |||
| #if HAVE_MMX_EXTERNAL | |||
| if (!high_bit_depth) { | |||
| c->clear_block = ff_clear_block_mmx; | |||
| c->clear_blocks = ff_clear_blocks_mmx; | |||
| } | |||
| c->vector_clip_int32 = ff_vector_clip_int32_mmx; | |||
| c->put_signed_pixels_clamped = ff_put_signed_pixels_clamped_mmx; | |||
| #endif /* HAVE_MMX_EXTERNAL */ | |||
| @@ -113,15 +107,6 @@ static av_cold void dsputil_init_sse(DSPContext *c, AVCodecContext *avctx, | |||
| #if HAVE_YASM | |||
| #if HAVE_SSE_EXTERNAL | |||
| c->vector_clipf = ff_vector_clipf_sse; | |||
| /* XvMCCreateBlocks() may not allocate 16-byte aligned blocks */ | |||
| if (CONFIG_XVMC && avctx->hwaccel && avctx->hwaccel->decode_mb) | |||
| return; | |||
| if (!high_bit_depth) { | |||
| c->clear_block = ff_clear_block_sse; | |||
| c->clear_blocks = ff_clear_blocks_sse; | |||
| } | |||
| #endif | |||
| #if HAVE_INLINE_ASM && CONFIG_VIDEODSP | |||
| c->gmc = ff_gmc_sse; | |||
| @@ -134,7 +134,6 @@ void ff_add_pixels_clamped_mmx(const int16_t *block, uint8_t *pixels, | |||
| } while (--i); | |||
| } | |||
| /* Draw the edges of width 'w' of an image of size width, height | |||
| * this MMX version can only handle w == 8 || w == 16. */ | |||
| void ff_draw_edges_mmx(uint8_t *buf, int wrap, int width, int height, | |||
| @@ -40,11 +40,6 @@ void ff_put_signed_pixels_clamped_mmx(const int16_t *block, uint8_t *pixels, | |||
| void ff_put_signed_pixels_clamped_sse2(const int16_t *block, uint8_t *pixels, | |||
| int line_size); | |||
| void ff_clear_block_mmx(int16_t *block); | |||
| void ff_clear_block_sse(int16_t *block); | |||
| void ff_clear_blocks_mmx(int16_t *blocks); | |||
| void ff_clear_blocks_sse(int16_t *blocks); | |||
| void ff_draw_edges_mmx(uint8_t *buf, int wrap, int width, int height, | |||
| int w, int h, int sides); | |||