@@ -1530,6 +1530,7 @@ CONFIG_EXTRA=" | |||||
aandcttables | aandcttables | ||||
ac3dsp | ac3dsp | ||||
audio_frame_queue | audio_frame_queue | ||||
blockdsp | |||||
cabac | cabac | ||||
dsputil | dsputil | ||||
gcrypt | gcrypt | ||||
@@ -1705,7 +1706,7 @@ mdct_select="fft" | |||||
rdft_select="fft" | rdft_select="fft" | ||||
mpegaudio_select="mpegaudiodsp" | mpegaudio_select="mpegaudiodsp" | ||||
mpegaudiodsp_select="dct" | mpegaudiodsp_select="dct" | ||||
mpegvideo_select="dsputil hpeldsp videodsp" | |||||
mpegvideo_select="blockdsp dsputil hpeldsp videodsp" | |||||
mpegvideoenc_select="dsputil mpegvideo qpeldsp" | mpegvideoenc_select="dsputil mpegvideo qpeldsp" | ||||
# decoders / encoders | # decoders / encoders | ||||
@@ -1722,33 +1723,33 @@ amrnb_decoder_select="lsp" | |||||
amrwb_decoder_select="lsp" | amrwb_decoder_select="lsp" | ||||
amv_decoder_select="sp5x_decoder" | amv_decoder_select="sp5x_decoder" | ||||
ape_decoder_select="dsputil" | ape_decoder_select="dsputil" | ||||
asv1_decoder_select="dsputil" | |||||
asv1_decoder_select="blockdsp dsputil" | |||||
asv1_encoder_select="dsputil" | asv1_encoder_select="dsputil" | ||||
asv2_decoder_select="dsputil" | |||||
asv2_decoder_select="blockdsp dsputil" | |||||
asv2_encoder_select="dsputil" | asv2_encoder_select="dsputil" | ||||
atrac1_decoder_select="mdct sinewin" | atrac1_decoder_select="mdct sinewin" | ||||
atrac3_decoder_select="mdct" | atrac3_decoder_select="mdct" | ||||
atrac3p_decoder_select="mdct sinewin" | atrac3p_decoder_select="mdct sinewin" | ||||
bink_decoder_select="dsputil hpeldsp" | |||||
bink_decoder_select="blockdsp hpeldsp" | |||||
binkaudio_dct_decoder_select="mdct rdft dct sinewin" | binkaudio_dct_decoder_select="mdct rdft dct sinewin" | ||||
binkaudio_rdft_decoder_select="mdct rdft sinewin" | binkaudio_rdft_decoder_select="mdct rdft sinewin" | ||||
cavs_decoder_select="dsputil golomb h264chroma qpeldsp videodsp" | |||||
cavs_decoder_select="blockdsp dsputil golomb h264chroma qpeldsp videodsp" | |||||
cllc_decoder_select="dsputil" | cllc_decoder_select="dsputil" | ||||
comfortnoise_encoder_select="lpc" | comfortnoise_encoder_select="lpc" | ||||
cook_decoder_select="dsputil mdct sinewin" | cook_decoder_select="dsputil mdct sinewin" | ||||
cscd_decoder_select="lzo" | cscd_decoder_select="lzo" | ||||
cscd_decoder_suggest="zlib" | cscd_decoder_suggest="zlib" | ||||
dca_decoder_select="mdct" | dca_decoder_select="mdct" | ||||
dnxhd_decoder_select="dsputil" | |||||
dnxhd_encoder_select="aandcttables dsputil mpegvideoenc" | |||||
dnxhd_decoder_select="blockdsp dsputil" | |||||
dnxhd_encoder_select="aandcttables blockdsp dsputil mpegvideoenc" | |||||
dvvideo_decoder_select="dsputil" | dvvideo_decoder_select="dsputil" | ||||
dvvideo_encoder_select="dsputil" | dvvideo_encoder_select="dsputil" | ||||
dxa_decoder_deps="zlib" | dxa_decoder_deps="zlib" | ||||
eac3_decoder_select="ac3_decoder" | eac3_decoder_select="ac3_decoder" | ||||
eac3_encoder_select="ac3_encoder" | eac3_encoder_select="ac3_encoder" | ||||
eamad_decoder_select="aandcttables dsputil mpegvideo" | |||||
eamad_decoder_select="aandcttables blockdsp dsputil mpegvideo" | |||||
eatgq_decoder_select="aandcttables dsputil" | eatgq_decoder_select="aandcttables dsputil" | ||||
eatqi_decoder_select="aandcttables dsputil error_resilience mpegvideo" | |||||
eatqi_decoder_select="aandcttables blockdsp dsputil error_resilience mpegvideo" | |||||
exr_decoder_deps="zlib" | exr_decoder_deps="zlib" | ||||
ffv1_decoder_select="golomb rangecoder" | ffv1_decoder_select="golomb rangecoder" | ||||
ffv1_encoder_select="rangecoder" | ffv1_encoder_select="rangecoder" | ||||
@@ -1762,10 +1763,10 @@ flashsv_encoder_deps="zlib" | |||||
flashsv2_decoder_deps="zlib" | flashsv2_decoder_deps="zlib" | ||||
flv_decoder_select="h263_decoder" | flv_decoder_select="h263_decoder" | ||||
flv_encoder_select="h263_encoder" | flv_encoder_select="h263_encoder" | ||||
fourxm_decoder_select="dsputil" | |||||
fourxm_decoder_select="blockdsp dsputil" | |||||
fraps_decoder_select="dsputil huffman" | fraps_decoder_select="dsputil huffman" | ||||
g2m_decoder_deps="zlib" | g2m_decoder_deps="zlib" | ||||
g2m_decoder_select="dsputil" | |||||
g2m_decoder_select="blockdsp dsputil" | |||||
h261_decoder_select="error_resilience mpegvideo" | h261_decoder_select="error_resilience mpegvideo" | ||||
h261_encoder_select="aandcttables mpegvideoenc" | h261_encoder_select="aandcttables mpegvideoenc" | ||||
h263_decoder_select="error_resilience h263_parser h263dsp mpegvideo qpeldsp" | h263_decoder_select="error_resilience h263_parser h263dsp mpegvideo qpeldsp" | ||||
@@ -1783,14 +1784,14 @@ indeo3_decoder_select="hpeldsp" | |||||
interplay_video_decoder_select="hpeldsp" | interplay_video_decoder_select="hpeldsp" | ||||
jpegls_decoder_select="golomb mjpeg_decoder" | jpegls_decoder_select="golomb mjpeg_decoder" | ||||
jpegls_encoder_select="golomb" | jpegls_encoder_select="golomb" | ||||
jv_decoder_select="dsputil" | |||||
jv_decoder_select="blockdsp" | |||||
lagarith_decoder_select="huffyuvdsp" | lagarith_decoder_select="huffyuvdsp" | ||||
ljpeg_encoder_select="aandcttables mpegvideoenc" | ljpeg_encoder_select="aandcttables mpegvideoenc" | ||||
loco_decoder_select="golomb" | loco_decoder_select="golomb" | ||||
mdec_decoder_select="dsputil error_resilience mpegvideo" | |||||
mdec_decoder_select="blockdsp dsputil error_resilience mpegvideo" | |||||
metasound_decoder_select="lsp mdct sinewin" | metasound_decoder_select="lsp mdct sinewin" | ||||
mimic_decoder_select="dsputil hpeldsp" | |||||
mjpeg_decoder_select="dsputil hpeldsp" | |||||
mimic_decoder_select="blockdsp dsputil hpeldsp" | |||||
mjpeg_decoder_select="blockdsp dsputil hpeldsp" | |||||
mjpeg_encoder_select="aandcttables mpegvideoenc" | mjpeg_encoder_select="aandcttables mpegvideoenc" | ||||
mjpegb_decoder_select="mjpeg_decoder" | mjpegb_decoder_select="mjpeg_decoder" | ||||
mlp_decoder_select="mlp_parser" | mlp_decoder_select="mlp_parser" | ||||
@@ -1862,7 +1863,7 @@ twinvq_decoder_select="mdct lsp sinewin" | |||||
utvideo_decoder_select="dsputil" | utvideo_decoder_select="dsputil" | ||||
utvideo_encoder_select="dsputil huffman huffyuvencdsp" | utvideo_encoder_select="dsputil huffman huffyuvencdsp" | ||||
vble_decoder_select="huffyuvdsp" | vble_decoder_select="huffyuvdsp" | ||||
vc1_decoder_select="error_resilience h263_decoder h264chroma h264qpel intrax8 qpeldsp" | |||||
vc1_decoder_select="blockdsp error_resilience h263_decoder h264chroma h264qpel intrax8 qpeldsp" | |||||
vc1image_decoder_select="vc1_decoder" | vc1image_decoder_select="vc1_decoder" | ||||
vorbis_decoder_select="mdct" | vorbis_decoder_select="mdct" | ||||
vorbis_encoder_select="mdct" | vorbis_encoder_select="mdct" | ||||
@@ -1883,7 +1884,7 @@ wmav2_encoder_select="mdct sinewin" | |||||
wmavoice_decoder_select="lsp rdft dct mdct sinewin" | wmavoice_decoder_select="lsp rdft dct mdct sinewin" | ||||
wmv1_decoder_select="h263_decoder" | wmv1_decoder_select="h263_decoder" | ||||
wmv1_encoder_select="h263_encoder" | wmv1_encoder_select="h263_encoder" | ||||
wmv2_decoder_select="h263_decoder intrax8 videodsp" | |||||
wmv2_decoder_select="blockdsp h263_decoder intrax8 videodsp" | |||||
wmv2_encoder_select="h263_encoder" | wmv2_encoder_select="h263_encoder" | ||||
wmv3_decoder_select="vc1_decoder" | wmv3_decoder_select="vc1_decoder" | ||||
wmv3image_decoder_select="wmv3_decoder" | wmv3image_decoder_select="wmv3_decoder" | ||||
@@ -30,6 +30,7 @@ | |||||
#include "libavutil/imgutils.h" | #include "libavutil/imgutils.h" | ||||
#include "libavutil/intreadwrite.h" | #include "libavutil/intreadwrite.h" | ||||
#include "avcodec.h" | #include "avcodec.h" | ||||
#include "blockdsp.h" | |||||
#include "bytestream.h" | #include "bytestream.h" | ||||
#include "dsputil.h" | #include "dsputil.h" | ||||
#include "get_bits.h" | #include "get_bits.h" | ||||
@@ -132,6 +133,7 @@ typedef struct CFrameBuffer { | |||||
typedef struct FourXContext { | typedef struct FourXContext { | ||||
AVCodecContext *avctx; | AVCodecContext *avctx; | ||||
DSPContext dsp; | DSPContext dsp; | ||||
BlockDSPContext bdsp; | |||||
uint16_t *frame_buffer; | uint16_t *frame_buffer; | ||||
uint16_t *last_frame_buffer; | uint16_t *last_frame_buffer; | ||||
GetBitContext pre_gb; ///< ac/dc prefix | GetBitContext pre_gb; ///< ac/dc prefix | ||||
@@ -564,7 +566,7 @@ static int decode_i_mb(FourXContext *f) | |||||
int ret; | int ret; | ||||
int i; | int i; | ||||
f->dsp.clear_blocks(f->block[0]); | |||||
f->bdsp.clear_blocks(f->block[0]); | |||||
for (i = 0; i < 6; i++) | for (i = 0; i < 6; i++) | ||||
if ((ret = decode_i_block(f, f->block[i])) < 0) | if ((ret = decode_i_block(f, f->block[i])) < 0) | ||||
@@ -953,6 +955,7 @@ static av_cold int decode_init(AVCodecContext *avctx) | |||||
} | } | ||||
f->version = AV_RL32(avctx->extradata) >> 16; | f->version = AV_RL32(avctx->extradata) >> 16; | ||||
ff_blockdsp_init(&f->bdsp, avctx); | |||||
ff_dsputil_init(&f->dsp, avctx); | ff_dsputil_init(&f->dsp, avctx); | ||||
f->avctx = avctx; | f->avctx = avctx; | ||||
init_vlcs(f); | init_vlcs(f); | ||||
@@ -28,6 +28,7 @@ OBJS = allcodecs.o \ | |||||
OBJS-$(CONFIG_AANDCTTABLES) += aandcttab.o | OBJS-$(CONFIG_AANDCTTABLES) += aandcttab.o | ||||
OBJS-$(CONFIG_AC3DSP) += ac3dsp.o | OBJS-$(CONFIG_AC3DSP) += ac3dsp.o | ||||
OBJS-$(CONFIG_AUDIO_FRAME_QUEUE) += audio_frame_queue.o | OBJS-$(CONFIG_AUDIO_FRAME_QUEUE) += audio_frame_queue.o | ||||
OBJS-$(CONFIG_BLOCKDSP) += blockdsp.o | |||||
OBJS-$(CONFIG_CABAC) += cabac.o | OBJS-$(CONFIG_CABAC) += cabac.o | ||||
OBJS-$(CONFIG_DCT) += dct.o dct32_fixed.o dct32_float.o | OBJS-$(CONFIG_DCT) += dct.o dct32_fixed.o dct32_float.o | ||||
OBJS-$(CONFIG_DXVA2) += dxva2.o | OBJS-$(CONFIG_DXVA2) += dxva2.o | ||||
@@ -4,6 +4,7 @@ OBJS += arm/fmtconvert_init_arm.o | |||||
OBJS-$(CONFIG_AC3DSP) += arm/ac3dsp_init_arm.o \ | OBJS-$(CONFIG_AC3DSP) += arm/ac3dsp_init_arm.o \ | ||||
arm/ac3dsp_arm.o | arm/ac3dsp_arm.o | ||||
OBJS-$(CONFIG_BLOCKDSP) += arm/blockdsp_init_arm.o | |||||
OBJS-$(CONFIG_DSPUTIL) += arm/dsputil_init_arm.o \ | OBJS-$(CONFIG_DSPUTIL) += arm/dsputil_init_arm.o \ | ||||
arm/dsputil_arm.o \ | arm/dsputil_arm.o \ | ||||
arm/jrevdct_arm.o \ | arm/jrevdct_arm.o \ | ||||
@@ -76,6 +77,8 @@ VFP-OBJS-$(CONFIG_DCA_DECODER) += arm/dcadsp_vfp.o \ | |||||
NEON-OBJS += arm/fmtconvert_neon.o | NEON-OBJS += arm/fmtconvert_neon.o | ||||
NEON-OBJS-$(CONFIG_AC3DSP) += arm/ac3dsp_neon.o | NEON-OBJS-$(CONFIG_AC3DSP) += arm/ac3dsp_neon.o | ||||
NEON-OBJS-$(CONFIG_BLOCKDSP) += arm/blockdsp_init_neon.o \ | |||||
arm/blockdsp_neon.o | |||||
NEON-OBJS-$(CONFIG_DSPUTIL) += arm/dsputil_init_neon.o \ | NEON-OBJS-$(CONFIG_DSPUTIL) += arm/dsputil_init_neon.o \ | ||||
arm/dsputil_neon.o \ | arm/dsputil_neon.o \ | ||||
arm/int_neon.o \ | arm/int_neon.o \ | ||||
@@ -0,0 +1,26 @@ | |||||
/* | |||||
* This file is part of Libav. | |||||
* | |||||
* Libav is free software; you can redistribute it and/or | |||||
* modify it under the terms of the GNU Lesser General Public | |||||
* License as published by the Free Software Foundation; either | |||||
* version 2.1 of the License, or (at your option) any later version. | |||||
* | |||||
* Libav is distributed in the hope that it will be useful, | |||||
* but WITHOUT ANY WARRANTY; without even the implied warranty of | |||||
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU | |||||
* Lesser General Public License for more details. | |||||
* | |||||
* You should have received a copy of the GNU Lesser General Public | |||||
* License along with Libav; if not, write to the Free Software | |||||
* Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA | |||||
*/ | |||||
#ifndef AVCODEC_ARM_BLOCKDSP_ARM_H | |||||
#define AVCODEC_ARM_BLOCKDSP_ARM_H | |||||
#include "libavcodec/blockdsp.h" | |||||
void ff_blockdsp_init_neon(BlockDSPContext *c, unsigned high_bit_depth); | |||||
#endif /* AVCODEC_ARM_BLOCKDSP_ARM_H */ |
@@ -0,0 +1,33 @@ | |||||
/* | |||||
* ARM optimized block operations | |||||
* | |||||
* This file is part of Libav. | |||||
* | |||||
* Libav is free software; you can redistribute it and/or | |||||
* modify it under the terms of the GNU Lesser General Public | |||||
* License as published by the Free Software Foundation; either | |||||
* version 2.1 of the License, or (at your option) any later version. | |||||
* | |||||
* Libav is distributed in the hope that it will be useful, | |||||
* but WITHOUT ANY WARRANTY; without even the implied warranty of | |||||
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU | |||||
* Lesser General Public License for more details. | |||||
* | |||||
* You should have received a copy of the GNU Lesser General Public | |||||
* License along with Libav; if not, write to the Free Software | |||||
* Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA | |||||
*/ | |||||
#include "libavutil/attributes.h" | |||||
#include "libavutil/cpu.h" | |||||
#include "libavutil/arm/cpu.h" | |||||
#include "libavcodec/blockdsp.h" | |||||
#include "blockdsp_arm.h" | |||||
av_cold void ff_blockdsp_init_arm(BlockDSPContext *c, unsigned high_bit_depth) | |||||
{ | |||||
int cpu_flags = av_get_cpu_flags(); | |||||
if (have_neon(cpu_flags)) | |||||
ff_blockdsp_init_neon(c, high_bit_depth); | |||||
} |
@@ -0,0 +1,37 @@ | |||||
/* | |||||
* ARM NEON optimised block operations | |||||
* Copyright (c) 2008 Mans Rullgard <mans@mansr.com> | |||||
* | |||||
* This file is part of Libav. | |||||
* | |||||
* Libav is free software; you can redistribute it and/or | |||||
* modify it under the terms of the GNU Lesser General Public | |||||
* License as published by the Free Software Foundation; either | |||||
* version 2.1 of the License, or (at your option) any later version. | |||||
* | |||||
* Libav is distributed in the hope that it will be useful, | |||||
* but WITHOUT ANY WARRANTY; without even the implied warranty of | |||||
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU | |||||
* Lesser General Public License for more details. | |||||
* | |||||
* You should have received a copy of the GNU Lesser General Public | |||||
* License along with Libav; if not, write to the Free Software | |||||
* Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA | |||||
*/ | |||||
#include <stdint.h> | |||||
#include "libavutil/attributes.h" | |||||
#include "libavcodec/blockdsp.h" | |||||
#include "blockdsp_arm.h" | |||||
void ff_clear_block_neon(int16_t *block); | |||||
void ff_clear_blocks_neon(int16_t *blocks); | |||||
av_cold void ff_blockdsp_init_neon(BlockDSPContext *c, unsigned high_bit_depth) | |||||
{ | |||||
if (!high_bit_depth) { | |||||
c->clear_block = ff_clear_block_neon; | |||||
c->clear_blocks = ff_clear_blocks_neon; | |||||
} | |||||
} |
@@ -0,0 +1,38 @@ | |||||
/* | |||||
* ARM NEON optimised block functions | |||||
* Copyright (c) 2008 Mans Rullgard <mans@mansr.com> | |||||
* | |||||
* This file is part of Libav. | |||||
* | |||||
* Libav is free software; you can redistribute it and/or | |||||
* modify it under the terms of the GNU Lesser General Public | |||||
* License as published by the Free Software Foundation; either | |||||
* version 2.1 of the License, or (at your option) any later version. | |||||
* | |||||
* Libav is distributed in the hope that it will be useful, | |||||
* but WITHOUT ANY WARRANTY; without even the implied warranty of | |||||
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU | |||||
* Lesser General Public License for more details. | |||||
* | |||||
* You should have received a copy of the GNU Lesser General Public | |||||
* License along with Libav; if not, write to the Free Software | |||||
* Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA | |||||
*/ | |||||
#include "libavutil/arm/asm.S" | |||||
function ff_clear_block_neon, export=1 | |||||
vmov.i16 q0, #0 | |||||
.rept 8 | |||||
vst1.16 {q0}, [r0,:128]! | |||||
.endr | |||||
bx lr | |||||
endfunc | |||||
function ff_clear_blocks_neon, export=1 | |||||
vmov.i16 q0, #0 | |||||
.rept 8*6 | |||||
vst1.16 {q0}, [r0,:128]! | |||||
.endr | |||||
bx lr | |||||
endfunc |
@@ -30,9 +30,6 @@ void ff_simple_idct_neon(int16_t *data); | |||||
void ff_simple_idct_put_neon(uint8_t *dest, int line_size, int16_t *data); | void ff_simple_idct_put_neon(uint8_t *dest, int line_size, int16_t *data); | ||||
void ff_simple_idct_add_neon(uint8_t *dest, int line_size, int16_t *data); | void ff_simple_idct_add_neon(uint8_t *dest, int line_size, int16_t *data); | ||||
void ff_clear_block_neon(int16_t *block); | |||||
void ff_clear_blocks_neon(int16_t *blocks); | |||||
void ff_add_pixels_clamped_neon(const int16_t *, uint8_t *, int); | void ff_add_pixels_clamped_neon(const int16_t *, uint8_t *, int); | ||||
void ff_put_pixels_clamped_neon(const int16_t *, uint8_t *, int); | void ff_put_pixels_clamped_neon(const int16_t *, uint8_t *, int); | ||||
void ff_put_signed_pixels_clamped_neon(const int16_t *, uint8_t *, int); | void ff_put_signed_pixels_clamped_neon(const int16_t *, uint8_t *, int); | ||||
@@ -61,11 +58,6 @@ av_cold void ff_dsputil_init_neon(DSPContext *c, AVCodecContext *avctx, | |||||
c->put_pixels_clamped = ff_put_pixels_clamped_neon; | c->put_pixels_clamped = ff_put_pixels_clamped_neon; | ||||
c->put_signed_pixels_clamped = ff_put_signed_pixels_clamped_neon; | c->put_signed_pixels_clamped = ff_put_signed_pixels_clamped_neon; | ||||
if (!high_bit_depth) { | |||||
c->clear_block = ff_clear_block_neon; | |||||
c->clear_blocks = ff_clear_blocks_neon; | |||||
} | |||||
c->vector_clipf = ff_vector_clipf_neon; | c->vector_clipf = ff_vector_clipf_neon; | ||||
c->vector_clip_int32 = ff_vector_clip_int32_neon; | c->vector_clip_int32 = ff_vector_clip_int32_neon; | ||||
@@ -21,22 +21,6 @@ | |||||
#include "libavutil/arm/asm.S" | #include "libavutil/arm/asm.S" | ||||
function ff_clear_block_neon, export=1 | |||||
vmov.i16 q0, #0 | |||||
.rept 8 | |||||
vst1.16 {q0}, [r0,:128]! | |||||
.endr | |||||
bx lr | |||||
endfunc | |||||
function ff_clear_blocks_neon, export=1 | |||||
vmov.i16 q0, #0 | |||||
.rept 8*6 | |||||
vst1.16 {q0}, [r0,:128]! | |||||
.endr | |||||
bx lr | |||||
endfunc | |||||
function ff_put_pixels_clamped_neon, export=1 | function ff_put_pixels_clamped_neon, export=1 | ||||
vld1.16 {d16-d19}, [r0,:128]! | vld1.16 {d16-d19}, [r0,:128]! | ||||
vqmovun.s16 d0, q8 | vqmovun.s16 d0, q8 | ||||
@@ -31,12 +31,14 @@ | |||||
#include "libavutil/mem.h" | #include "libavutil/mem.h" | ||||
#include "avcodec.h" | #include "avcodec.h" | ||||
#include "blockdsp.h" | |||||
#include "dsputil.h" | #include "dsputil.h" | ||||
#include "get_bits.h" | #include "get_bits.h" | ||||
#include "put_bits.h" | #include "put_bits.h" | ||||
typedef struct ASV1Context{ | typedef struct ASV1Context{ | ||||
AVCodecContext *avctx; | AVCodecContext *avctx; | ||||
BlockDSPContext bdsp; | |||||
DSPContext dsp; | DSPContext dsp; | ||||
PutBitContext pb; | PutBitContext pb; | ||||
GetBitContext gb; | GetBitContext gb; | ||||
@@ -28,6 +28,7 @@ | |||||
#include "asv.h" | #include "asv.h" | ||||
#include "avcodec.h" | #include "avcodec.h" | ||||
#include "blockdsp.h" | |||||
#include "put_bits.h" | #include "put_bits.h" | ||||
#include "internal.h" | #include "internal.h" | ||||
#include "mathops.h" | #include "mathops.h" | ||||
@@ -164,7 +165,7 @@ static inline int decode_mb(ASV1Context *a, int16_t block[6][64]) | |||||
{ | { | ||||
int i; | int i; | ||||
a->dsp.clear_blocks(block[0]); | |||||
a->bdsp.clear_blocks(block[0]); | |||||
if (a->avctx->codec_id == AV_CODEC_ID_ASV1) { | if (a->avctx->codec_id == AV_CODEC_ID_ASV1) { | ||||
for (i = 0; i < 6; i++) { | for (i = 0; i < 6; i++) { | ||||
@@ -280,6 +281,7 @@ static av_cold int decode_init(AVCodecContext *avctx) | |||||
} | } | ||||
ff_asv_common_init(avctx); | ff_asv_common_init(avctx); | ||||
ff_blockdsp_init(&a->bdsp, avctx); | |||||
init_vlcs(a); | init_vlcs(a); | ||||
ff_init_scantable(a->dsp.idct_permutation, &a->scantable, ff_asv_scantab); | ff_init_scantable(a->dsp.idct_permutation, &a->scantable, ff_asv_scantab); | ||||
avctx->pix_fmt = AV_PIX_FMT_YUV420P; | avctx->pix_fmt = AV_PIX_FMT_YUV420P; | ||||
@@ -24,9 +24,9 @@ | |||||
#include "libavutil/imgutils.h" | #include "libavutil/imgutils.h" | ||||
#include "libavutil/internal.h" | #include "libavutil/internal.h" | ||||
#include "avcodec.h" | #include "avcodec.h" | ||||
#include "dsputil.h" | |||||
#include "binkdata.h" | #include "binkdata.h" | ||||
#include "binkdsp.h" | #include "binkdsp.h" | ||||
#include "blockdsp.h" | |||||
#include "hpeldsp.h" | #include "hpeldsp.h" | ||||
#include "internal.h" | #include "internal.h" | ||||
#include "mathops.h" | #include "mathops.h" | ||||
@@ -113,7 +113,7 @@ typedef struct Bundle { | |||||
*/ | */ | ||||
typedef struct BinkContext { | typedef struct BinkContext { | ||||
AVCodecContext *avctx; | AVCodecContext *avctx; | ||||
DSPContext dsp; | |||||
BlockDSPContext bdsp; | |||||
HpelDSPContext hdsp; | HpelDSPContext hdsp; | ||||
BinkDSPContext binkdsp; | BinkDSPContext binkdsp; | ||||
AVFrame *last; | AVFrame *last; | ||||
@@ -880,7 +880,7 @@ static int binkb_decode_plane(BinkContext *c, AVFrame *frame, GetBitContext *gb, | |||||
} else { | } else { | ||||
put_pixels8x8_overlapped(dst, ref, stride); | put_pixels8x8_overlapped(dst, ref, stride); | ||||
} | } | ||||
c->dsp.clear_block(block); | |||||
c->bdsp.clear_block(block); | |||||
v = binkb_get_value(c, BINKB_SRC_INTER_COEFS); | v = binkb_get_value(c, BINKB_SRC_INTER_COEFS); | ||||
read_residue(gb, block, v); | read_residue(gb, block, v); | ||||
c->binkdsp.add_pixels8(dst, block, stride); | c->binkdsp.add_pixels8(dst, block, stride); | ||||
@@ -904,7 +904,7 @@ static int binkb_decode_plane(BinkContext *c, AVFrame *frame, GetBitContext *gb, | |||||
break; | break; | ||||
case 5: | case 5: | ||||
v = binkb_get_value(c, BINKB_SRC_COLORS); | v = binkb_get_value(c, BINKB_SRC_COLORS); | ||||
c->dsp.fill_block_tab[1](dst, v, stride, 8); | |||||
c->bdsp.fill_block_tab[1](dst, v, stride, 8); | |||||
break; | break; | ||||
case 6: | case 6: | ||||
for (i = 0; i < 2; i++) | for (i = 0; i < 2; i++) | ||||
@@ -1047,7 +1047,7 @@ static int bink_decode_plane(BinkContext *c, AVFrame *frame, GetBitContext *gb, | |||||
break; | break; | ||||
case FILL_BLOCK: | case FILL_BLOCK: | ||||
v = get_value(c, BINK_SRC_COLORS); | v = get_value(c, BINK_SRC_COLORS); | ||||
c->dsp.fill_block_tab[0](dst, v, stride, 16); | |||||
c->bdsp.fill_block_tab[0](dst, v, stride, 16); | |||||
break; | break; | ||||
case PATTERN_BLOCK: | case PATTERN_BLOCK: | ||||
for (i = 0; i < 2; i++) | for (i = 0; i < 2; i++) | ||||
@@ -1117,7 +1117,7 @@ static int bink_decode_plane(BinkContext *c, AVFrame *frame, GetBitContext *gb, | |||||
return AVERROR_INVALIDDATA; | return AVERROR_INVALIDDATA; | ||||
} | } | ||||
c->hdsp.put_pixels_tab[1][0](dst, ref, stride, 8); | c->hdsp.put_pixels_tab[1][0](dst, ref, stride, 8); | ||||
c->dsp.clear_block(block); | |||||
c->bdsp.clear_block(block); | |||||
v = get_bits(gb, 7); | v = get_bits(gb, 7); | ||||
read_residue(gb, block, v); | read_residue(gb, block, v); | ||||
c->binkdsp.add_pixels8(dst, block, stride); | c->binkdsp.add_pixels8(dst, block, stride); | ||||
@@ -1130,7 +1130,7 @@ static int bink_decode_plane(BinkContext *c, AVFrame *frame, GetBitContext *gb, | |||||
break; | break; | ||||
case FILL_BLOCK: | case FILL_BLOCK: | ||||
v = get_value(c, BINK_SRC_COLORS); | v = get_value(c, BINK_SRC_COLORS); | ||||
c->dsp.fill_block_tab[1](dst, v, stride, 8); | |||||
c->bdsp.fill_block_tab[1](dst, v, stride, 8); | |||||
break; | break; | ||||
case INTER_BLOCK: | case INTER_BLOCK: | ||||
xoff = get_value(c, BINK_SRC_X_OFF); | xoff = get_value(c, BINK_SRC_X_OFF); | ||||
@@ -1310,7 +1310,7 @@ static av_cold int decode_init(AVCodecContext *avctx) | |||||
avctx->pix_fmt = c->has_alpha ? AV_PIX_FMT_YUVA420P : AV_PIX_FMT_YUV420P; | avctx->pix_fmt = c->has_alpha ? AV_PIX_FMT_YUVA420P : AV_PIX_FMT_YUV420P; | ||||
ff_dsputil_init(&c->dsp, avctx); | |||||
ff_blockdsp_init(&c->bdsp, avctx); | |||||
ff_hpeldsp_init(&c->hdsp, avctx->flags); | ff_hpeldsp_init(&c->hdsp, avctx->flags); | ||||
ff_binkdsp_init(&c->binkdsp); | ff_binkdsp_init(&c->binkdsp); | ||||
@@ -0,0 +1,78 @@ | |||||
/* | |||||
* This file is part of Libav. | |||||
* | |||||
* Libav is free software; you can redistribute it and/or | |||||
* modify it under the terms of the GNU Lesser General Public | |||||
* License as published by the Free Software Foundation; either | |||||
* version 2.1 of the License, or (at your option) any later version. | |||||
* | |||||
* Libav is distributed in the hope that it will be useful, | |||||
* but WITHOUT ANY WARRANTY; without even the implied warranty of | |||||
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU | |||||
* Lesser General Public License for more details. | |||||
* | |||||
* You should have received a copy of the GNU Lesser General Public | |||||
* License along with Libav; if not, write to the Free Software | |||||
* Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA | |||||
*/ | |||||
#include <stdint.h> | |||||
#include <string.h> | |||||
#include "config.h" | |||||
#include "libavutil/attributes.h" | |||||
#include "avcodec.h" | |||||
#include "blockdsp.h" | |||||
#include "version.h" | |||||
static void clear_block_8_c(int16_t *block) | |||||
{ | |||||
memset(block, 0, sizeof(int16_t) * 64); | |||||
} | |||||
static void clear_blocks_8_c(int16_t *blocks) | |||||
{ | |||||
memset(blocks, 0, sizeof(int16_t) * 6 * 64); | |||||
} | |||||
static void fill_block16_c(uint8_t *block, uint8_t value, int line_size, int h) | |||||
{ | |||||
int i; | |||||
for (i = 0; i < h; i++) { | |||||
memset(block, value, 16); | |||||
block += line_size; | |||||
} | |||||
} | |||||
static void fill_block8_c(uint8_t *block, uint8_t value, int line_size, int h) | |||||
{ | |||||
int i; | |||||
for (i = 0; i < h; i++) { | |||||
memset(block, value, 8); | |||||
block += line_size; | |||||
} | |||||
} | |||||
av_cold void ff_blockdsp_init(BlockDSPContext *c, AVCodecContext *avctx) | |||||
{ | |||||
const unsigned high_bit_depth = avctx->bits_per_raw_sample > 8; | |||||
c->clear_block = clear_block_8_c; | |||||
c->clear_blocks = clear_blocks_8_c; | |||||
c->fill_block_tab[0] = fill_block16_c; | |||||
c->fill_block_tab[1] = fill_block8_c; | |||||
if (ARCH_ARM) | |||||
ff_blockdsp_init_arm(c, high_bit_depth); | |||||
if (ARCH_PPC) | |||||
ff_blockdsp_init_ppc(c, high_bit_depth); | |||||
if (ARCH_X86) | |||||
#if FF_API_XVMC | |||||
ff_blockdsp_init_x86(c, high_bit_depth, avctx); | |||||
#else | |||||
ff_blockdsp_init_x86(c, high_bit_depth); | |||||
#endif /* FF_API_XVMC */ | |||||
} |
@@ -0,0 +1,52 @@ | |||||
/* | |||||
* This file is part of Libav. | |||||
* | |||||
* Libav is free software; you can redistribute it and/or | |||||
* modify it under the terms of the GNU Lesser General Public | |||||
* License as published by the Free Software Foundation; either | |||||
* version 2.1 of the License, or (at your option) any later version. | |||||
* | |||||
* Libav is distributed in the hope that it will be useful, | |||||
* but WITHOUT ANY WARRANTY; without even the implied warranty of | |||||
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU | |||||
* Lesser General Public License for more details. | |||||
* | |||||
* You should have received a copy of the GNU Lesser General Public | |||||
* License along with Libav; if not, write to the Free Software | |||||
* Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA | |||||
*/ | |||||
#ifndef AVCODEC_BLOCKDSP_H | |||||
#define AVCODEC_BLOCKDSP_H | |||||
#include <stdint.h> | |||||
#include "avcodec.h" | |||||
#include "version.h" | |||||
/* add and put pixel (decoding) | |||||
* Block sizes for op_pixels_func are 8x4,8x8 16x8 16x16. | |||||
* h for op_pixels_func is limited to { width / 2, width }, | |||||
* but never larger than 16 and never smaller than 4. */ | |||||
typedef void (*op_fill_func)(uint8_t *block /* align width (8 or 16) */, | |||||
uint8_t value, int line_size, int h); | |||||
typedef struct BlockDSPContext { | |||||
void (*clear_block)(int16_t *block /* align 16 */); | |||||
void (*clear_blocks)(int16_t *blocks /* align 16 */); | |||||
op_fill_func fill_block_tab[2]; | |||||
} BlockDSPContext; | |||||
void ff_blockdsp_init(BlockDSPContext *c, AVCodecContext *avctx); | |||||
void ff_blockdsp_init_arm(BlockDSPContext *c, unsigned high_bit_depth); | |||||
void ff_blockdsp_init_ppc(BlockDSPContext *c, unsigned high_bit_depth); | |||||
#if FF_API_XVMC | |||||
void ff_blockdsp_init_x86(BlockDSPContext *c, unsigned high_bit_depth, | |||||
AVCodecContext *avctx); | |||||
#else | |||||
void ff_blockdsp_init_x86(BlockDSPContext *c, unsigned high_bit_depth); | |||||
#endif /* FF_API_XVMC */ | |||||
#endif /* AVCODEC_BLOCKDSP_H */ |
@@ -759,6 +759,7 @@ av_cold int ff_cavs_init(AVCodecContext *avctx) | |||||
{ | { | ||||
AVSContext *h = avctx->priv_data; | AVSContext *h = avctx->priv_data; | ||||
ff_blockdsp_init(&h->bdsp, avctx); | |||||
ff_dsputil_init(&h->dsp, avctx); | ff_dsputil_init(&h->dsp, avctx); | ||||
ff_h264chroma_init(&h->h264chroma, 8); | ff_h264chroma_init(&h->h264chroma, 8); | ||||
ff_videodsp_init(&h->vdsp, 8); | ff_videodsp_init(&h->vdsp, 8); | ||||
@@ -23,6 +23,7 @@ | |||||
#define AVCODEC_CAVS_H | #define AVCODEC_CAVS_H | ||||
#include "cavsdsp.h" | #include "cavsdsp.h" | ||||
#include "blockdsp.h" | |||||
#include "dsputil.h" | #include "dsputil.h" | ||||
#include "h264chroma.h" | #include "h264chroma.h" | ||||
#include "get_bits.h" | #include "get_bits.h" | ||||
@@ -162,6 +163,7 @@ typedef struct AVSFrame { | |||||
typedef struct AVSContext { | typedef struct AVSContext { | ||||
AVCodecContext *avctx; | AVCodecContext *avctx; | ||||
DSPContext dsp; | DSPContext dsp; | ||||
BlockDSPContext bdsp; | |||||
H264ChromaContext h264chroma; | H264ChromaContext h264chroma; | ||||
VideoDSPContext vdsp; | VideoDSPContext vdsp; | ||||
CAVSDSPContext cdsp; | CAVSDSPContext cdsp; | ||||
@@ -581,7 +581,7 @@ static int decode_residual_block(AVSContext *h, GetBitContext *gb, | |||||
dequant_shift[qp], i)) < 0) | dequant_shift[qp], i)) < 0) | ||||
return ret; | return ret; | ||||
h->cdsp.cavs_idct8_add(dst, block, stride); | h->cdsp.cavs_idct8_add(dst, block, stride); | ||||
h->dsp.clear_block(block); | |||||
h->bdsp.clear_block(block); | |||||
return 0; | return 0; | ||||
} | } | ||||
@@ -25,6 +25,7 @@ | |||||
#include "libavutil/imgutils.h" | #include "libavutil/imgutils.h" | ||||
#include "libavutil/timer.h" | #include "libavutil/timer.h" | ||||
#include "avcodec.h" | #include "avcodec.h" | ||||
#include "blockdsp.h" | |||||
#include "get_bits.h" | #include "get_bits.h" | ||||
#include "dnxhddata.h" | #include "dnxhddata.h" | ||||
#include "dsputil.h" | #include "dsputil.h" | ||||
@@ -33,6 +34,7 @@ | |||||
typedef struct DNXHDContext { | typedef struct DNXHDContext { | ||||
AVCodecContext *avctx; | AVCodecContext *avctx; | ||||
GetBitContext gb; | GetBitContext gb; | ||||
BlockDSPContext bdsp; | |||||
int cid; ///< compression id | int cid; ///< compression id | ||||
unsigned int width, height; | unsigned int width, height; | ||||
unsigned int mb_width, mb_height; | unsigned int mb_width, mb_height; | ||||
@@ -133,6 +135,7 @@ static int dnxhd_decode_header(DNXHDContext *ctx, AVFrame *frame, | |||||
ctx->avctx->pix_fmt = AV_PIX_FMT_YUV444P10; | ctx->avctx->pix_fmt = AV_PIX_FMT_YUV444P10; | ||||
ctx->avctx->bits_per_raw_sample = 10; | ctx->avctx->bits_per_raw_sample = 10; | ||||
if (ctx->bit_depth != 10) { | if (ctx->bit_depth != 10) { | ||||
ff_blockdsp_init(&ctx->bdsp, ctx->avctx); | |||||
ff_dsputil_init(&ctx->dsp, ctx->avctx); | ff_dsputil_init(&ctx->dsp, ctx->avctx); | ||||
ctx->bit_depth = 10; | ctx->bit_depth = 10; | ||||
ctx->decode_dct_block = dnxhd_decode_dct_block_10_444; | ctx->decode_dct_block = dnxhd_decode_dct_block_10_444; | ||||
@@ -142,6 +145,7 @@ static int dnxhd_decode_header(DNXHDContext *ctx, AVFrame *frame, | |||||
ctx->avctx->pix_fmt = AV_PIX_FMT_YUV422P10; | ctx->avctx->pix_fmt = AV_PIX_FMT_YUV422P10; | ||||
ctx->avctx->bits_per_raw_sample = 10; | ctx->avctx->bits_per_raw_sample = 10; | ||||
if (ctx->bit_depth != 10) { | if (ctx->bit_depth != 10) { | ||||
ff_blockdsp_init(&ctx->bdsp, ctx->avctx); | |||||
ff_dsputil_init(&ctx->dsp, ctx->avctx); | ff_dsputil_init(&ctx->dsp, ctx->avctx); | ||||
ctx->bit_depth = 10; | ctx->bit_depth = 10; | ||||
ctx->decode_dct_block = dnxhd_decode_dct_block_10; | ctx->decode_dct_block = dnxhd_decode_dct_block_10; | ||||
@@ -150,6 +154,7 @@ static int dnxhd_decode_header(DNXHDContext *ctx, AVFrame *frame, | |||||
ctx->avctx->pix_fmt = AV_PIX_FMT_YUV422P; | ctx->avctx->pix_fmt = AV_PIX_FMT_YUV422P; | ||||
ctx->avctx->bits_per_raw_sample = 8; | ctx->avctx->bits_per_raw_sample = 8; | ||||
if (ctx->bit_depth != 8) { | if (ctx->bit_depth != 8) { | ||||
ff_blockdsp_init(&ctx->bdsp, ctx->avctx); | |||||
ff_dsputil_init(&ctx->dsp, ctx->avctx); | ff_dsputil_init(&ctx->dsp, ctx->avctx); | ||||
ctx->bit_depth = 8; | ctx->bit_depth = 8; | ||||
ctx->decode_dct_block = dnxhd_decode_dct_block_8; | ctx->decode_dct_block = dnxhd_decode_dct_block_8; | ||||
@@ -307,12 +312,12 @@ static int dnxhd_decode_macroblock(DNXHDContext *ctx, AVFrame *frame, | |||||
skip_bits1(&ctx->gb); | skip_bits1(&ctx->gb); | ||||
for (i = 0; i < 8; i++) { | for (i = 0; i < 8; i++) { | ||||
ctx->dsp.clear_block(ctx->blocks[i]); | |||||
ctx->bdsp.clear_block(ctx->blocks[i]); | |||||
ctx->decode_dct_block(ctx, ctx->blocks[i], i, qscale); | ctx->decode_dct_block(ctx, ctx->blocks[i], i, qscale); | ||||
} | } | ||||
if (ctx->is_444) { | if (ctx->is_444) { | ||||
for (; i < 12; i++) { | for (; i < 12; i++) { | ||||
ctx->dsp.clear_block(ctx->blocks[i]); | |||||
ctx->bdsp.clear_block(ctx->blocks[i]); | |||||
ctx->decode_dct_block(ctx, ctx->blocks[i], i, qscale); | ctx->decode_dct_block(ctx, ctx->blocks[i], i, qscale); | ||||
} | } | ||||
} | } | ||||
@@ -29,6 +29,7 @@ | |||||
#include "libavutil/timer.h" | #include "libavutil/timer.h" | ||||
#include "avcodec.h" | #include "avcodec.h" | ||||
#include "blockdsp.h" | |||||
#include "dsputil.h" | #include "dsputil.h" | ||||
#include "internal.h" | #include "internal.h" | ||||
#include "mpegvideo.h" | #include "mpegvideo.h" | ||||
@@ -305,6 +306,7 @@ static av_cold int dnxhd_encode_init(AVCodecContext *avctx) | |||||
avctx->bits_per_raw_sample = ctx->cid_table->bit_depth; | avctx->bits_per_raw_sample = ctx->cid_table->bit_depth; | ||||
ff_blockdsp_init(&ctx->bdsp, avctx); | |||||
ff_dsputil_init(&ctx->m.dsp, avctx); | ff_dsputil_init(&ctx->m.dsp, avctx); | ||||
ff_dct_common_init(&ctx->m); | ff_dct_common_init(&ctx->m); | ||||
if (!ctx->m.dct_quantize) | if (!ctx->m.dct_quantize) | ||||
@@ -556,10 +558,10 @@ void dnxhd_get_blocks(DNXHDEncContext *ctx, int mb_x, int mb_y) | |||||
ptr_v + ctx->dct_uv_offset, | ptr_v + ctx->dct_uv_offset, | ||||
ctx->m.uvlinesize); | ctx->m.uvlinesize); | ||||
} else { | } else { | ||||
dsp->clear_block(ctx->blocks[4]); | |||||
dsp->clear_block(ctx->blocks[5]); | |||||
dsp->clear_block(ctx->blocks[6]); | |||||
dsp->clear_block(ctx->blocks[7]); | |||||
ctx->bdsp.clear_block(ctx->blocks[4]); | |||||
ctx->bdsp.clear_block(ctx->blocks[5]); | |||||
ctx->bdsp.clear_block(ctx->blocks[6]); | |||||
ctx->bdsp.clear_block(ctx->blocks[7]); | |||||
} | } | ||||
} else { | } else { | ||||
dsp->get_pixels(ctx->blocks[4], | dsp->get_pixels(ctx->blocks[4], | ||||
@@ -41,6 +41,7 @@ typedef struct RCEntry { | |||||
typedef struct DNXHDEncContext { | typedef struct DNXHDEncContext { | ||||
AVClass *class; | AVClass *class; | ||||
BlockDSPContext bdsp; | |||||
MpegEncContext m; ///< Used for quantization dsp functions | MpegEncContext m; ///< Used for quantization dsp functions | ||||
int cid; | int cid; | ||||
@@ -373,26 +373,6 @@ static int sum_abs_dctelem_c(int16_t *block) | |||||
return sum; | return sum; | ||||
} | } | ||||
static void fill_block16_c(uint8_t *block, uint8_t value, int line_size, int h) | |||||
{ | |||||
int i; | |||||
for (i = 0; i < h; i++) { | |||||
memset(block, value, 16); | |||||
block += line_size; | |||||
} | |||||
} | |||||
static void fill_block8_c(uint8_t *block, uint8_t value, int line_size, int h) | |||||
{ | |||||
int i; | |||||
for (i = 0; i < h; i++) { | |||||
memset(block, value, 8); | |||||
block += line_size; | |||||
} | |||||
} | |||||
#define avg2(a, b) ((a + b + 1) >> 1) | #define avg2(a, b) ((a + b + 1) >> 1) | ||||
#define avg4(a, b, c, d) ((a + b + c + d + 2) >> 2) | #define avg4(a, b, c, d) ((a + b + c + d + 2) >> 2) | ||||
@@ -1408,16 +1388,6 @@ static void draw_edges_8_c(uint8_t *buf, int wrap, int width, int height, | |||||
memcpy(last_line + (i + 1) * wrap, last_line, width + w + w); | memcpy(last_line + (i + 1) * wrap, last_line, width + w + w); | ||||
} | } | ||||
static void clear_block_8_c(int16_t *block) | |||||
{ | |||||
memset(block, 0, sizeof(int16_t) * 64); | |||||
} | |||||
static void clear_blocks_8_c(int16_t *blocks) | |||||
{ | |||||
memset(blocks, 0, sizeof(int16_t) * 6 * 64); | |||||
} | |||||
/* init static data */ | /* init static data */ | ||||
av_cold void ff_dsputil_static_init(void) | av_cold void ff_dsputil_static_init(void) | ||||
{ | { | ||||
@@ -1487,9 +1457,6 @@ av_cold void ff_dsputil_init(DSPContext *c, AVCodecContext *avctx) | |||||
c->pix_sum = pix_sum_c; | c->pix_sum = pix_sum_c; | ||||
c->pix_norm1 = pix_norm1_c; | c->pix_norm1 = pix_norm1_c; | ||||
c->fill_block_tab[0] = fill_block16_c; | |||||
c->fill_block_tab[1] = fill_block8_c; | |||||
/* TODO [0] 16 [1] 8 */ | /* TODO [0] 16 [1] 8 */ | ||||
c->pix_abs[0][0] = pix_abs16_c; | c->pix_abs[0][0] = pix_abs16_c; | ||||
c->pix_abs[0][1] = pix_abs16_x2_c; | c->pix_abs[0][1] = pix_abs16_x2_c; | ||||
@@ -1546,9 +1513,6 @@ av_cold void ff_dsputil_init(DSPContext *c, AVCodecContext *avctx) | |||||
c->draw_edges = draw_edges_8_c; | c->draw_edges = draw_edges_8_c; | ||||
c->clear_block = clear_block_8_c; | |||||
c->clear_blocks = clear_blocks_8_c; | |||||
switch (avctx->bits_per_raw_sample) { | switch (avctx->bits_per_raw_sample) { | ||||
case 9: | case 9: | ||||
case 10: | case 10: | ||||
@@ -38,26 +38,6 @@ void ff_gmc_c(uint8_t *dst, uint8_t *src, int stride, int h, int ox, int oy, | |||||
int dxx, int dxy, int dyx, int dyy, int shift, int r, | int dxx, int dxy, int dyx, int dyy, int shift, int r, | ||||
int width, int height); | int width, int height); | ||||
/* minimum alignment rules ;) | |||||
* If you notice errors in the align stuff, need more alignment for some ASM code | |||||
* for some CPU or need to use a function with less aligned data then send a mail | |||||
* to the libav-devel mailing list, ... | |||||
* | |||||
* !warning These alignments might not match reality, (missing attribute((align)) | |||||
* stuff somewhere possible). | |||||
* I (Michael) did not check them, these are just the alignments which I think | |||||
* could be reached easily ... | |||||
* | |||||
* !future video codecs might need functions with less strict alignment | |||||
*/ | |||||
/* add and put pixel (decoding) | |||||
* Block sizes for op_pixels_func are 8x4,8x8 16x8 16x16. | |||||
* h for op_pixels_func is limited to { width / 2, width }, | |||||
* but never larger than 16 and never smaller than 4. */ | |||||
typedef void (*op_fill_func)(uint8_t *block /* align width (8 or 16) */, | |||||
uint8_t value, int line_size, int h); | |||||
struct MpegEncContext; | struct MpegEncContext; | ||||
/* Motion estimation: | /* Motion estimation: | ||||
* h is limited to { width / 2, width, 2 * width }, | * h is limited to { width / 2, width, 2 * width }, | ||||
@@ -116,8 +96,7 @@ typedef struct DSPContext { | |||||
int stride, int h, int ox, int oy, | int stride, int h, int ox, int oy, | ||||
int dxx, int dxy, int dyx, int dyy, | int dxx, int dxy, int dyx, int dyy, | ||||
int shift, int r, int width, int height); | int shift, int r, int width, int height); | ||||
void (*clear_block)(int16_t *block /* align 16 */); | |||||
void (*clear_blocks)(int16_t *blocks /* align 16 */); | |||||
int (*pix_sum)(uint8_t *pix, int line_size); | int (*pix_sum)(uint8_t *pix, int line_size); | ||||
int (*pix_norm1)(uint8_t *pix, int line_size); | int (*pix_norm1)(uint8_t *pix, int line_size); | ||||
@@ -234,8 +213,6 @@ typedef struct DSPContext { | |||||
*/ | */ | ||||
void (*vector_clip_int32)(int32_t *dst, const int32_t *src, int32_t min, | void (*vector_clip_int32)(int32_t *dst, const int32_t *src, int32_t min, | ||||
int32_t max, unsigned int len); | int32_t max, unsigned int len); | ||||
op_fill_func fill_block_tab[2]; | |||||
} DSPContext; | } DSPContext; | ||||
void ff_dsputil_static_init(void); | void ff_dsputil_static_init(void); | ||||
@@ -44,6 +44,7 @@ | |||||
typedef struct MadContext { | typedef struct MadContext { | ||||
AVCodecContext *avctx; | AVCodecContext *avctx; | ||||
BlockDSPContext bdsp; | |||||
DSPContext dsp; | DSPContext dsp; | ||||
AVFrame *last_frame; | AVFrame *last_frame; | ||||
GetBitContext gb; | GetBitContext gb; | ||||
@@ -61,6 +62,7 @@ static av_cold int decode_init(AVCodecContext *avctx) | |||||
MadContext *s = avctx->priv_data; | MadContext *s = avctx->priv_data; | ||||
s->avctx = avctx; | s->avctx = avctx; | ||||
avctx->pix_fmt = AV_PIX_FMT_YUV420P; | avctx->pix_fmt = AV_PIX_FMT_YUV420P; | ||||
ff_blockdsp_init(&s->bdsp, avctx); | |||||
ff_dsputil_init(&s->dsp, avctx); | ff_dsputil_init(&s->dsp, avctx); | ||||
ff_init_scantable_permutation(s->dsp.idct_permutation, FF_NO_IDCT_PERM); | ff_init_scantable_permutation(s->dsp.idct_permutation, FF_NO_IDCT_PERM); | ||||
ff_init_scantable(s->dsp.idct_permutation, &s->scantable, ff_zigzag_direct); | ff_init_scantable(s->dsp.idct_permutation, &s->scantable, ff_zigzag_direct); | ||||
@@ -207,7 +209,7 @@ static void decode_mb(MadContext *s, AVFrame *frame, int inter) | |||||
int add = 2*decode_motion(&s->gb); | int add = 2*decode_motion(&s->gb); | ||||
comp_block(s, frame, s->mb_x, s->mb_y, j, mv_x, mv_y, add); | comp_block(s, frame, s->mb_x, s->mb_y, j, mv_x, mv_y, add); | ||||
} else { | } else { | ||||
s->dsp.clear_block(s->block); | |||||
s->bdsp.clear_block(s->block); | |||||
decode_block_intra(s, s->block); | decode_block_intra(s, s->block); | ||||
idct_put(s, frame, s->block, s->mb_x, s->mb_y, j); | idct_put(s, frame, s->block, s->mb_x, s->mb_y, j); | ||||
} | } | ||||
@@ -27,6 +27,7 @@ | |||||
*/ | */ | ||||
#include "avcodec.h" | #include "avcodec.h" | ||||
#include "blockdsp.h" | |||||
#include "get_bits.h" | #include "get_bits.h" | ||||
#include "aandcttab.h" | #include "aandcttab.h" | ||||
#include "eaidct.h" | #include "eaidct.h" | ||||
@@ -46,6 +47,7 @@ static av_cold int tqi_decode_init(AVCodecContext *avctx) | |||||
TqiContext *t = avctx->priv_data; | TqiContext *t = avctx->priv_data; | ||||
MpegEncContext *s = &t->s; | MpegEncContext *s = &t->s; | ||||
s->avctx = avctx; | s->avctx = avctx; | ||||
ff_blockdsp_init(&s->bdsp, avctx); | |||||
ff_dsputil_init(&s->dsp, avctx); | ff_dsputil_init(&s->dsp, avctx); | ||||
ff_init_scantable_permutation(s->dsp.idct_permutation, FF_NO_IDCT_PERM); | ff_init_scantable_permutation(s->dsp.idct_permutation, FF_NO_IDCT_PERM); | ||||
ff_init_scantable(s->dsp.idct_permutation, &s->intra_scantable, ff_zigzag_direct); | ff_init_scantable(s->dsp.idct_permutation, &s->intra_scantable, ff_zigzag_direct); | ||||
@@ -59,7 +61,7 @@ static av_cold int tqi_decode_init(AVCodecContext *avctx) | |||||
static int tqi_decode_mb(MpegEncContext *s, int16_t (*block)[64]) | static int tqi_decode_mb(MpegEncContext *s, int16_t (*block)[64]) | ||||
{ | { | ||||
int n; | int n; | ||||
s->dsp.clear_blocks(block[0]); | |||||
s->bdsp.clear_blocks(block[0]); | |||||
for (n=0; n<6; n++) | for (n=0; n<6; n++) | ||||
if (ff_mpeg1_decode_block_intra(s, block[n], n) < 0) | if (ff_mpeg1_decode_block_intra(s, block[n], n) < 0) | ||||
return -1; | return -1; | ||||
@@ -29,6 +29,7 @@ | |||||
#include "libavutil/intreadwrite.h" | #include "libavutil/intreadwrite.h" | ||||
#include "avcodec.h" | #include "avcodec.h" | ||||
#include "blockdsp.h" | |||||
#include "bytestream.h" | #include "bytestream.h" | ||||
#include "dsputil.h" | #include "dsputil.h" | ||||
#include "get_bits.h" | #include "get_bits.h" | ||||
@@ -72,6 +73,7 @@ static const uint8_t chroma_quant[64] = { | |||||
}; | }; | ||||
typedef struct JPGContext { | typedef struct JPGContext { | ||||
BlockDSPContext bdsp; | |||||
DSPContext dsp; | DSPContext dsp; | ||||
ScanTable scantable; | ScanTable scantable; | ||||
@@ -150,6 +152,7 @@ static av_cold int jpg_init(AVCodecContext *avctx, JPGContext *c) | |||||
if (ret) | if (ret) | ||||
return ret; | return ret; | ||||
ff_blockdsp_init(&c->bdsp, avctx); | |||||
ff_dsputil_init(&c->dsp, avctx); | ff_dsputil_init(&c->dsp, avctx); | ||||
ff_init_scantable(c->dsp.idct_permutation, &c->scantable, | ff_init_scantable(c->dsp.idct_permutation, &c->scantable, | ||||
ff_zigzag_direct); | ff_zigzag_direct); | ||||
@@ -193,7 +196,7 @@ static int jpg_decode_block(JPGContext *c, GetBitContext *gb, | |||||
const int is_chroma = !!plane; | const int is_chroma = !!plane; | ||||
const uint8_t *qmat = is_chroma ? chroma_quant : luma_quant; | const uint8_t *qmat = is_chroma ? chroma_quant : luma_quant; | ||||
c->dsp.clear_block(block); | |||||
c->bdsp.clear_block(block); | |||||
dc = get_vlc2(gb, c->dc_vlc[is_chroma].table, 9, 3); | dc = get_vlc2(gb, c->dc_vlc[is_chroma].table, 9, 3); | ||||
if (dc < 0) | if (dc < 0) | ||||
return AVERROR_INVALIDDATA; | return AVERROR_INVALIDDATA; | ||||
@@ -259,7 +262,7 @@ static int jpg_decode_data(JPGContext *c, int width, int height, | |||||
for (i = 0; i < 3; i++) | for (i = 0; i < 3; i++) | ||||
c->prev_dc[i] = 1024; | c->prev_dc[i] = 1024; | ||||
bx = by = 0; | bx = by = 0; | ||||
c->dsp.clear_blocks(c->block[0]); | |||||
c->bdsp.clear_blocks(c->block[0]); | |||||
for (mb_y = 0; mb_y < mb_h; mb_y++) { | for (mb_y = 0; mb_y < mb_h; mb_y++) { | ||||
for (mb_x = 0; mb_x < mb_w; mb_x++) { | for (mb_x = 0; mb_x < mb_w; mb_x++) { | ||||
if (mask && !mask[mb_x * 2] && !mask[mb_x * 2 + 1] && | if (mask && !mask[mb_x * 2] && !mask[mb_x * 2 + 1] && | ||||
@@ -433,7 +433,7 @@ static int h261_decode_mb(H261Context *h) | |||||
intra: | intra: | ||||
/* decode each block */ | /* decode each block */ | ||||
if (s->mb_intra || HAS_CBP(h->mtype)) { | if (s->mb_intra || HAS_CBP(h->mtype)) { | ||||
s->dsp.clear_blocks(s->block[0]); | |||||
s->bdsp.clear_blocks(s->block[0]); | |||||
for (i = 0; i < 6; i++) { | for (i = 0; i < 6; i++) { | ||||
if (h261_decode_block(h, s->block[i], i, cbp & 32) < 0) | if (h261_decode_block(h, s->block[i], i, cbp & 32) < 0) | ||||
return SLICE_ERROR; | return SLICE_ERROR; | ||||
@@ -197,7 +197,7 @@ static inline int get_p_cbp(MpegEncContext * s, | |||||
for (i = 0; i < 6; i++) { | for (i = 0; i < 6; i++) { | ||||
if (s->block_last_index[i] >= 0 && ((cbp >> (5 - i))&1)==0 ){ | if (s->block_last_index[i] >= 0 && ((cbp >> (5 - i))&1)==0 ){ | ||||
s->block_last_index[i]= -1; | s->block_last_index[i]= -1; | ||||
s->dsp.clear_block(s->block[i]); | |||||
s->bdsp.clear_block(s->block[i]); | |||||
} | } | ||||
} | } | ||||
}else{ | }else{ | ||||
@@ -538,7 +538,7 @@ static int x8_decode_intra_mb(IntraX8Context* const w, const int chroma){ | |||||
int sign; | int sign; | ||||
assert(w->orient<12); | assert(w->orient<12); | ||||
s->dsp.clear_block(s->block[0]); | |||||
s->bdsp.clear_block(s->block[0]); | |||||
if(chroma){ | if(chroma){ | ||||
dc_mode=2; | dc_mode=2; | ||||
@@ -538,7 +538,7 @@ retry: | |||||
rl = &ff_rl_intra_aic; | rl = &ff_rl_intra_aic; | ||||
i = 0; | i = 0; | ||||
s->gb= gb; | s->gb= gb; | ||||
s->dsp.clear_block(block); | |||||
s->bdsp.clear_block(block); | |||||
goto retry; | goto retry; | ||||
} | } | ||||
av_log(s->avctx, AV_LOG_ERROR, "run overflow at %dx%d i:%d\n", s->mb_x, s->mb_y, s->mb_intra); | av_log(s->avctx, AV_LOG_ERROR, "run overflow at %dx%d i:%d\n", s->mb_x, s->mb_y, s->mb_intra); | ||||
@@ -628,7 +628,7 @@ int ff_h263_decode_mb(MpegEncContext *s, | |||||
} | } | ||||
}while(cbpc == 20); | }while(cbpc == 20); | ||||
s->dsp.clear_blocks(s->block[0]); | |||||
s->bdsp.clear_blocks(s->block[0]); | |||||
dquant = cbpc & 8; | dquant = cbpc & 8; | ||||
s->mb_intra = ((cbpc & 4) != 0); | s->mb_intra = ((cbpc & 4) != 0); | ||||
@@ -723,7 +723,7 @@ int ff_h263_decode_mb(MpegEncContext *s, | |||||
s->mb_intra = IS_INTRA(mb_type); | s->mb_intra = IS_INTRA(mb_type); | ||||
if(HAS_CBP(mb_type)){ | if(HAS_CBP(mb_type)){ | ||||
s->dsp.clear_blocks(s->block[0]); | |||||
s->bdsp.clear_blocks(s->block[0]); | |||||
cbpc = get_vlc2(&s->gb, cbpc_b_vlc.table, CBPC_B_VLC_BITS, 1); | cbpc = get_vlc2(&s->gb, cbpc_b_vlc.table, CBPC_B_VLC_BITS, 1); | ||||
if(s->mb_intra){ | if(s->mb_intra){ | ||||
dquant = IS_QUANT(mb_type); | dquant = IS_QUANT(mb_type); | ||||
@@ -797,7 +797,7 @@ int ff_h263_decode_mb(MpegEncContext *s, | |||||
} | } | ||||
}while(cbpc == 8); | }while(cbpc == 8); | ||||
s->dsp.clear_blocks(s->block[0]); | |||||
s->bdsp.clear_blocks(s->block[0]); | |||||
dquant = cbpc & 4; | dquant = cbpc & 4; | ||||
s->mb_intra = 1; | s->mb_intra = 1; | ||||
@@ -28,12 +28,12 @@ | |||||
#include "libavutil/intreadwrite.h" | #include "libavutil/intreadwrite.h" | ||||
#include "avcodec.h" | #include "avcodec.h" | ||||
#include "dsputil.h" | |||||
#include "blockdsp.h" | |||||
#include "get_bits.h" | #include "get_bits.h" | ||||
#include "internal.h" | #include "internal.h" | ||||
typedef struct JvContext { | typedef struct JvContext { | ||||
DSPContext dsp; | |||||
BlockDSPContext bdsp; | |||||
AVFrame *frame; | AVFrame *frame; | ||||
uint32_t palette[AVPALETTE_COUNT]; | uint32_t palette[AVPALETTE_COUNT]; | ||||
int palette_has_changed; | int palette_has_changed; | ||||
@@ -48,7 +48,7 @@ static av_cold int decode_init(AVCodecContext *avctx) | |||||
return AVERROR(ENOMEM); | return AVERROR(ENOMEM); | ||||
avctx->pix_fmt = AV_PIX_FMT_PAL8; | avctx->pix_fmt = AV_PIX_FMT_PAL8; | ||||
ff_dsputil_init(&s->dsp, avctx); | |||||
ff_blockdsp_init(&s->bdsp, avctx); | |||||
return 0; | return 0; | ||||
} | } | ||||
@@ -113,14 +113,14 @@ static inline void decode4x4(GetBitContext *gb, uint8_t *dst, int linesize) | |||||
* Decode 8x8 block | * Decode 8x8 block | ||||
*/ | */ | ||||
static inline void decode8x8(GetBitContext *gb, uint8_t *dst, int linesize, | static inline void decode8x8(GetBitContext *gb, uint8_t *dst, int linesize, | ||||
DSPContext *dsp) | |||||
BlockDSPContext *bdsp) | |||||
{ | { | ||||
int i, j, v[2]; | int i, j, v[2]; | ||||
switch (get_bits(gb, 2)) { | switch (get_bits(gb, 2)) { | ||||
case 1: | case 1: | ||||
v[0] = get_bits(gb, 8); | v[0] = get_bits(gb, 8); | ||||
dsp->fill_block_tab[1](dst, v[0], linesize, 8); | |||||
bdsp->fill_block_tab[1](dst, v[0], linesize, 8); | |||||
break; | break; | ||||
case 2: | case 2: | ||||
v[0] = get_bits(gb, 8); | v[0] = get_bits(gb, 8); | ||||
@@ -163,7 +163,7 @@ static int decode_frame(AVCodecContext *avctx, void *data, int *got_frame, | |||||
for (i = 0; i < avctx->width; i += 8) | for (i = 0; i < avctx->width; i += 8) | ||||
decode8x8(&gb, | decode8x8(&gb, | ||||
s->frame->data[0] + j * s->frame->linesize[0] + i, | s->frame->data[0] + j * s->frame->linesize[0] + i, | ||||
s->frame->linesize[0], &s->dsp); | |||||
s->frame->linesize[0], &s->bdsp); | |||||
buf += video_size; | buf += video_size; | ||||
} else if (video_type == 2) { | } else if (video_type == 2) { | ||||
@@ -28,12 +28,14 @@ | |||||
*/ | */ | ||||
#include "avcodec.h" | #include "avcodec.h" | ||||
#include "blockdsp.h" | |||||
#include "mpegvideo.h" | #include "mpegvideo.h" | ||||
#include "mpeg12.h" | #include "mpeg12.h" | ||||
#include "thread.h" | #include "thread.h" | ||||
typedef struct MDECContext { | typedef struct MDECContext { | ||||
AVCodecContext *avctx; | AVCodecContext *avctx; | ||||
BlockDSPContext bdsp; | |||||
DSPContext dsp; | DSPContext dsp; | ||||
ThreadFrame frame; | ThreadFrame frame; | ||||
GetBitContext gb; | GetBitContext gb; | ||||
@@ -123,7 +125,7 @@ static inline int decode_mb(MDECContext *a, int16_t block[6][64]) | |||||
int i, ret; | int i, ret; | ||||
const int block_index[6] = { 5, 4, 0, 1, 2, 3 }; | const int block_index[6] = { 5, 4, 0, 1, 2, 3 }; | ||||
a->dsp.clear_blocks(block[0]); | |||||
a->bdsp.clear_blocks(block[0]); | |||||
for (i = 0; i < 6; i++) { | for (i = 0; i < 6; i++) { | ||||
if ((ret = mdec_decode_block_intra(a, block[block_index[i]], | if ((ret = mdec_decode_block_intra(a, block[block_index[i]], | ||||
@@ -212,6 +214,7 @@ static av_cold int decode_init(AVCodecContext *avctx) | |||||
a->avctx = avctx; | a->avctx = avctx; | ||||
ff_blockdsp_init(&a->bdsp, avctx); | |||||
ff_dsputil_init(&a->dsp, avctx); | ff_dsputil_init(&a->dsp, avctx); | ||||
ff_mpeg12_init_vlcs(); | ff_mpeg12_init_vlcs(); | ||||
ff_init_scantable(a->dsp.idct_permutation, &a->scantable, ff_zigzag_direct); | ff_init_scantable(a->dsp.idct_permutation, &a->scantable, ff_zigzag_direct); | ||||
@@ -24,6 +24,7 @@ | |||||
#include <stdint.h> | #include <stdint.h> | ||||
#include "avcodec.h" | #include "avcodec.h" | ||||
#include "blockdsp.h" | |||||
#include "internal.h" | #include "internal.h" | ||||
#include "get_bits.h" | #include "get_bits.h" | ||||
#include "bytestream.h" | #include "bytestream.h" | ||||
@@ -52,6 +53,7 @@ typedef struct { | |||||
GetBitContext gb; | GetBitContext gb; | ||||
ScanTable scantable; | ScanTable scantable; | ||||
BlockDSPContext bdsp; | |||||
DSPContext dsp; | DSPContext dsp; | ||||
HpelDSPContext hdsp; | HpelDSPContext hdsp; | ||||
VLC vlc; | VLC vlc; | ||||
@@ -145,6 +147,7 @@ static av_cold int mimic_decode_init(AVCodecContext *avctx) | |||||
av_log(avctx, AV_LOG_ERROR, "error initializing vlc table\n"); | av_log(avctx, AV_LOG_ERROR, "error initializing vlc table\n"); | ||||
return ret; | return ret; | ||||
} | } | ||||
ff_blockdsp_init(&ctx->bdsp, avctx); | |||||
ff_dsputil_init(&ctx->dsp, avctx); | ff_dsputil_init(&ctx->dsp, avctx); | ||||
ff_hpeldsp_init(&ctx->hdsp, avctx->flags); | ff_hpeldsp_init(&ctx->hdsp, avctx->flags); | ||||
ff_init_scantable(ctx->dsp.idct_permutation, &ctx->scantable, col_zag); | ff_init_scantable(ctx->dsp.idct_permutation, &ctx->scantable, col_zag); | ||||
@@ -227,7 +230,7 @@ static int vlc_decode_block(MimicContext *ctx, int num_coeffs, int qscale) | |||||
int16_t *block = ctx->dct_block; | int16_t *block = ctx->dct_block; | ||||
unsigned int pos; | unsigned int pos; | ||||
ctx->dsp.clear_block(block); | |||||
ctx->bdsp.clear_block(block); | |||||
block[0] = get_bits(&ctx->gb, 8) << 3; | block[0] = get_bits(&ctx->gb, 8) << 3; | ||||
@@ -35,6 +35,7 @@ | |||||
#include "libavutil/imgutils.h" | #include "libavutil/imgutils.h" | ||||
#include "libavutil/opt.h" | #include "libavutil/opt.h" | ||||
#include "avcodec.h" | #include "avcodec.h" | ||||
#include "blockdsp.h" | |||||
#include "internal.h" | #include "internal.h" | ||||
#include "mjpeg.h" | #include "mjpeg.h" | ||||
#include "mjpegdec.h" | #include "mjpegdec.h" | ||||
@@ -92,6 +93,7 @@ av_cold int ff_mjpeg_decode_init(AVCodecContext *avctx) | |||||
} | } | ||||
s->avctx = avctx; | s->avctx = avctx; | ||||
ff_blockdsp_init(&s->bdsp, avctx); | |||||
ff_hpeldsp_init(&s->hdsp, avctx->flags); | ff_hpeldsp_init(&s->hdsp, avctx->flags); | ||||
ff_dsputil_init(&s->dsp, avctx); | ff_dsputil_init(&s->dsp, avctx); | ||||
ff_init_scantable(s->dsp.idct_permutation, &s->scantable, ff_zigzag_direct); | ff_init_scantable(s->dsp.idct_permutation, &s->scantable, ff_zigzag_direct); | ||||
@@ -486,7 +488,7 @@ static int decode_dc_progressive(MJpegDecodeContext *s, int16_t *block, | |||||
int16_t *quant_matrix, int Al) | int16_t *quant_matrix, int Al) | ||||
{ | { | ||||
int val; | int val; | ||||
s->dsp.clear_block(block); | |||||
s->bdsp.clear_block(block); | |||||
val = mjpeg_decode_dc(s, dc_index); | val = mjpeg_decode_dc(s, dc_index); | ||||
if (val == 0xffff) { | if (val == 0xffff) { | ||||
av_log(s->avctx, AV_LOG_ERROR, "error dc\n"); | av_log(s->avctx, AV_LOG_ERROR, "error dc\n"); | ||||
@@ -878,7 +880,7 @@ static int mjpeg_decode_scan(MJpegDecodeContext *s, int nb_components, int Ah, | |||||
reference_data[c] + block_offset, | reference_data[c] + block_offset, | ||||
linesize[c], 8); | linesize[c], 8); | ||||
else { | else { | ||||
s->dsp.clear_block(s->block); | |||||
s->bdsp.clear_block(s->block); | |||||
if (decode_block(s, s->block, i, | if (decode_block(s, s->block, i, | ||||
s->dc_index[i], s->ac_index[i], | s->dc_index[i], s->ac_index[i], | ||||
s->quant_matrixes[s->quant_index[c]]) < 0) { | s->quant_matrixes[s->quant_index[c]]) < 0) { | ||||
@@ -33,6 +33,7 @@ | |||||
#include "libavutil/pixdesc.h" | #include "libavutil/pixdesc.h" | ||||
#include "avcodec.h" | #include "avcodec.h" | ||||
#include "blockdsp.h" | |||||
#include "get_bits.h" | #include "get_bits.h" | ||||
#include "dsputil.h" | #include "dsputil.h" | ||||
#include "hpeldsp.h" | #include "hpeldsp.h" | ||||
@@ -95,6 +96,7 @@ typedef struct MJpegDecodeContext { | |||||
uint8_t *last_nnz[MAX_COMPONENTS]; | uint8_t *last_nnz[MAX_COMPONENTS]; | ||||
uint64_t coefs_finished[MAX_COMPONENTS]; ///< bitmask of which coefs have been completely decoded (progressive mode) | uint64_t coefs_finished[MAX_COMPONENTS]; ///< bitmask of which coefs have been completely decoded (progressive mode) | ||||
ScanTable scantable; | ScanTable scantable; | ||||
BlockDSPContext bdsp; | |||||
DSPContext dsp; | DSPContext dsp; | ||||
HpelDSPContext hdsp; | HpelDSPContext hdsp; | ||||
@@ -776,10 +776,10 @@ static int mpeg_decode_mb(MpegEncContext *s, int16_t block[12][64]) | |||||
av_dlog(s->avctx, "mb_type=%x\n", mb_type); | av_dlog(s->avctx, "mb_type=%x\n", mb_type); | ||||
// motion_type = 0; /* avoid warning */ | // motion_type = 0; /* avoid warning */ | ||||
if (IS_INTRA(mb_type)) { | if (IS_INTRA(mb_type)) { | ||||
s->dsp.clear_blocks(s->block[0]); | |||||
s->bdsp.clear_blocks(s->block[0]); | |||||
if (!s->chroma_y_shift) | if (!s->chroma_y_shift) | ||||
s->dsp.clear_blocks(s->block[6]); | |||||
s->bdsp.clear_blocks(s->block[6]); | |||||
/* compute DCT type */ | /* compute DCT type */ | ||||
// FIXME: add an interlaced_dct coded var? | // FIXME: add an interlaced_dct coded var? | ||||
@@ -1014,13 +1014,13 @@ FF_ENABLE_DEPRECATION_WARNINGS | |||||
s->mb_intra = 0; | s->mb_intra = 0; | ||||
if (HAS_CBP(mb_type)) { | if (HAS_CBP(mb_type)) { | ||||
s->dsp.clear_blocks(s->block[0]); | |||||
s->bdsp.clear_blocks(s->block[0]); | |||||
cbp = get_vlc2(&s->gb, ff_mb_pat_vlc.table, MB_PAT_VLC_BITS, 1); | cbp = get_vlc2(&s->gb, ff_mb_pat_vlc.table, MB_PAT_VLC_BITS, 1); | ||||
if (mb_block_count > 6) { | if (mb_block_count > 6) { | ||||
cbp <<= mb_block_count - 6; | cbp <<= mb_block_count - 6; | ||||
cbp |= get_bits(&s->gb, mb_block_count - 6); | cbp |= get_bits(&s->gb, mb_block_count - 6); | ||||
s->dsp.clear_blocks(s->block[6]); | |||||
s->bdsp.clear_blocks(s->block[6]); | |||||
} | } | ||||
if (cbp <= 0) { | if (cbp <= 0) { | ||||
av_log(s->avctx, AV_LOG_ERROR, | av_log(s->avctx, AV_LOG_ERROR, | ||||
@@ -1227,7 +1227,7 @@ static int mpeg4_decode_partitioned_mb(MpegEncContext *s, int16_t block[6][64]) | |||||
if (!IS_SKIP(mb_type)) { | if (!IS_SKIP(mb_type)) { | ||||
int i; | int i; | ||||
s->dsp.clear_blocks(s->block[0]); | |||||
s->bdsp.clear_blocks(s->block[0]); | |||||
/* decode each block */ | /* decode each block */ | ||||
for (i = 0; i < 6; i++) { | for (i = 0; i < 6; i++) { | ||||
if (mpeg4_decode_block(ctx, block[i], i, cbp & 32, s->mb_intra, ctx->rvlc) < 0) { | if (mpeg4_decode_block(ctx, block[i], i, cbp & 32, s->mb_intra, ctx->rvlc) < 0) { | ||||
@@ -1305,7 +1305,7 @@ static int mpeg4_decode_mb(MpegEncContext *s, int16_t block[6][64]) | |||||
} | } | ||||
} while (cbpc == 20); | } while (cbpc == 20); | ||||
s->dsp.clear_blocks(s->block[0]); | |||||
s->bdsp.clear_blocks(s->block[0]); | |||||
dquant = cbpc & 8; | dquant = cbpc & 8; | ||||
s->mb_intra = ((cbpc & 4) != 0); | s->mb_intra = ((cbpc & 4) != 0); | ||||
if (s->mb_intra) | if (s->mb_intra) | ||||
@@ -1451,7 +1451,7 @@ static int mpeg4_decode_mb(MpegEncContext *s, int16_t block[6][64]) | |||||
if (modb2) { | if (modb2) { | ||||
cbp = 0; | cbp = 0; | ||||
} else { | } else { | ||||
s->dsp.clear_blocks(s->block[0]); | |||||
s->bdsp.clear_blocks(s->block[0]); | |||||
cbp = get_bits(&s->gb, 6); | cbp = get_bits(&s->gb, 6); | ||||
} | } | ||||
@@ -1586,7 +1586,7 @@ intra: | |||||
if (!s->progressive_sequence) | if (!s->progressive_sequence) | ||||
s->interlaced_dct = get_bits1(&s->gb); | s->interlaced_dct = get_bits1(&s->gb); | ||||
s->dsp.clear_blocks(s->block[0]); | |||||
s->bdsp.clear_blocks(s->block[0]); | |||||
/* decode each block */ | /* decode each block */ | ||||
for (i = 0; i < 6; i++) { | for (i = 0; i < 6; i++) { | ||||
if (mpeg4_decode_block(ctx, block[i], i, cbp & 32, 1, 0) < 0) | if (mpeg4_decode_block(ctx, block[i], i, cbp & 32, 1, 0) < 0) | ||||
@@ -485,7 +485,7 @@ static inline int get_b_cbp(MpegEncContext *s, int16_t block[6][64], | |||||
for (i = 0; i < 6; i++) { | for (i = 0; i < 6; i++) { | ||||
if (s->block_last_index[i] >= 0 && ((cbp >> (5 - i)) & 1) == 0) { | if (s->block_last_index[i] >= 0 && ((cbp >> (5 - i)) & 1) == 0) { | ||||
s->block_last_index[i] = -1; | s->block_last_index[i] = -1; | ||||
s->dsp.clear_block(s->block[i]); | |||||
s->bdsp.clear_block(s->block[i]); | |||||
} | } | ||||
} | } | ||||
} else { | } else { | ||||
@@ -33,6 +33,7 @@ | |||||
#include "libavutil/internal.h" | #include "libavutil/internal.h" | ||||
#include "libavutil/timer.h" | #include "libavutil/timer.h" | ||||
#include "avcodec.h" | #include "avcodec.h" | ||||
#include "blockdsp.h" | |||||
#include "dsputil.h" | #include "dsputil.h" | ||||
#include "internal.h" | #include "internal.h" | ||||
#include "mathops.h" | #include "mathops.h" | ||||
@@ -363,7 +364,7 @@ static void mpeg_er_decode_mb(void *opaque, int ref, int mv_dir, int mv_type, | |||||
ff_init_block_index(s); | ff_init_block_index(s); | ||||
ff_update_block_index(s); | ff_update_block_index(s); | ||||
s->dsp.clear_blocks(s->block[0]); | |||||
s->bdsp.clear_blocks(s->block[0]); | |||||
s->dest[0] = s->current_picture.f->data[0] + (s->mb_y * 16 * s->linesize) + s->mb_x * 16; | s->dest[0] = s->current_picture.f->data[0] + (s->mb_y * 16 * s->linesize) + s->mb_x * 16; | ||||
s->dest[1] = s->current_picture.f->data[1] + (s->mb_y * (16 >> s->chroma_y_shift) * s->uvlinesize) + s->mb_x * (16 >> s->chroma_x_shift); | s->dest[1] = s->current_picture.f->data[1] + (s->mb_y * (16 >> s->chroma_y_shift) * s->uvlinesize) + s->mb_x * (16 >> s->chroma_x_shift); | ||||
@@ -376,6 +377,7 @@ static void mpeg_er_decode_mb(void *opaque, int ref, int mv_dir, int mv_type, | |||||
/* init common dct for both encoder and decoder */ | /* init common dct for both encoder and decoder */ | ||||
av_cold int ff_dct_common_init(MpegEncContext *s) | av_cold int ff_dct_common_init(MpegEncContext *s) | ||||
{ | { | ||||
ff_blockdsp_init(&s->bdsp, s->avctx); | |||||
ff_dsputil_init(&s->dsp, s->avctx); | ff_dsputil_init(&s->dsp, s->avctx); | ||||
ff_hpeldsp_init(&s->hdsp, s->avctx->flags); | ff_hpeldsp_init(&s->hdsp, s->avctx->flags); | ||||
ff_videodsp_init(&s->vdsp, s->avctx->bits_per_raw_sample); | ff_videodsp_init(&s->vdsp, s->avctx->bits_per_raw_sample); | ||||
@@ -29,6 +29,7 @@ | |||||
#define AVCODEC_MPEGVIDEO_H | #define AVCODEC_MPEGVIDEO_H | ||||
#include "avcodec.h" | #include "avcodec.h" | ||||
#include "blockdsp.h" | |||||
#include "dsputil.h" | #include "dsputil.h" | ||||
#include "error_resilience.h" | #include "error_resilience.h" | ||||
#include "get_bits.h" | #include "get_bits.h" | ||||
@@ -347,6 +348,7 @@ typedef struct MpegEncContext { | |||||
int unrestricted_mv; ///< mv can point outside of the coded picture | int unrestricted_mv; ///< mv can point outside of the coded picture | ||||
int h263_long_vectors; ///< use horrible h263v1 long vector mode | int h263_long_vectors; ///< use horrible h263v1 long vector mode | ||||
BlockDSPContext bdsp; | |||||
DSPContext dsp; ///< pointers for accelerated dsp functions | DSPContext dsp; ///< pointers for accelerated dsp functions | ||||
HpelDSPContext hdsp; | HpelDSPContext hdsp; | ||||
QpelDSPContext qdsp; | QpelDSPContext qdsp; | ||||
@@ -174,7 +174,7 @@ static int msmpeg4v12_decode_mb(MpegEncContext *s, int16_t block[6][64]) | |||||
} | } | ||||
} | } | ||||
s->dsp.clear_blocks(s->block[0]); | |||||
s->bdsp.clear_blocks(s->block[0]); | |||||
for (i = 0; i < 6; i++) { | for (i = 0; i < 6; i++) { | ||||
if (ff_msmpeg4_decode_block(s, block[i], i, (cbp >> (5 - i)) & 1, NULL) < 0) | if (ff_msmpeg4_decode_block(s, block[i], i, (cbp >> (5 - i)) & 1, NULL) < 0) | ||||
{ | { | ||||
@@ -265,7 +265,7 @@ static int msmpeg4v34_decode_mb(MpegEncContext *s, int16_t block[6][64]) | |||||
} | } | ||||
} | } | ||||
s->dsp.clear_blocks(s->block[0]); | |||||
s->bdsp.clear_blocks(s->block[0]); | |||||
for (i = 0; i < 6; i++) { | for (i = 0; i < 6; i++) { | ||||
if (ff_msmpeg4_decode_block(s, block[i], i, (cbp >> (5 - i)) & 1, NULL) < 0) | if (ff_msmpeg4_decode_block(s, block[i], i, (cbp >> (5 - i)) & 1, NULL) < 0) | ||||
{ | { | ||||
@@ -1,5 +1,6 @@ | |||||
OBJS += ppc/fmtconvert_altivec.o \ | OBJS += ppc/fmtconvert_altivec.o \ | ||||
OBJS-$(CONFIG_BLOCKDSP) += ppc/blockdsp.o | |||||
OBJS-$(CONFIG_DSPUTIL) += ppc/dsputil_ppc.o | OBJS-$(CONFIG_DSPUTIL) += ppc/dsputil_ppc.o | ||||
OBJS-$(CONFIG_FFT) += ppc/fft_altivec.o | OBJS-$(CONFIG_FFT) += ppc/fft_altivec.o | ||||
OBJS-$(CONFIG_H264CHROMA) += ppc/h264chroma_init.o | OBJS-$(CONFIG_H264CHROMA) += ppc/h264chroma_init.o | ||||
@@ -0,0 +1,169 @@ | |||||
/* | |||||
* Copyright (c) 2002 Brian Foley | |||||
* Copyright (c) 2002 Dieter Shirley | |||||
* Copyright (c) 2003-2004 Romain Dolbeau <romain@dolbeau.org> | |||||
* | |||||
* This file is part of Libav. | |||||
* | |||||
* Libav is free software; you can redistribute it and/or | |||||
* modify it under the terms of the GNU Lesser General Public | |||||
* License as published by the Free Software Foundation; either | |||||
* version 2.1 of the License, or (at your option) any later version. | |||||
* | |||||
* Libav is distributed in the hope that it will be useful, | |||||
* but WITHOUT ANY WARRANTY; without even the implied warranty of | |||||
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU | |||||
* Lesser General Public License for more details. | |||||
* | |||||
* You should have received a copy of the GNU Lesser General Public | |||||
* License along with Libav; if not, write to the Free Software | |||||
* Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA | |||||
*/ | |||||
#include "config.h" | |||||
#if HAVE_ALTIVEC_H | |||||
#include <altivec.h> | |||||
#endif | |||||
#include <string.h> | |||||
#include "libavutil/attributes.h" | |||||
#include "libavutil/cpu.h" | |||||
#include "libavutil/mem.h" | |||||
#include "libavutil/ppc/cpu.h" | |||||
#include "libavutil/ppc/types_altivec.h" | |||||
#include "libavcodec/blockdsp.h" | |||||
/* ***** WARNING ***** WARNING ***** WARNING ***** */ | |||||
/* | |||||
* clear_blocks_dcbz32_ppc will not work properly on PowerPC processors with | |||||
* a cache line size not equal to 32 bytes. Fortunately all processors used | |||||
* by Apple up to at least the 7450 (AKA second generation G4) use 32-byte | |||||
* cache lines. This is due to the use of the 'dcbz' instruction. It simply | |||||
* clears a single cache line to zero, so you need to know the cache line | |||||
* size to use it! It's absurd, but it's fast... | |||||
* | |||||
* update 24/06/2003: Apple released the G5 yesterday, with a PPC970. | |||||
* cache line size: 128 bytes. Oups. | |||||
* The semantics of dcbz was changed, it always clears 32 bytes. So the function | |||||
* below will work, but will be slow. So I fixed check_dcbz_effect to use dcbzl, | |||||
* which is defined to clear a cache line (as dcbz before). So we can still | |||||
* distinguish, and use dcbz (32 bytes) or dcbzl (one cache line) as required. | |||||
* | |||||
* see <http://developer.apple.com/technotes/tn/tn2087.html> | |||||
* and <http://developer.apple.com/technotes/tn/tn2086.html> | |||||
*/ | |||||
static void clear_blocks_dcbz32_ppc(int16_t *blocks) | |||||
{ | |||||
register int misal = (unsigned long) blocks & 0x00000010, i = 0; | |||||
if (misal) { | |||||
((unsigned long *) blocks)[0] = 0L; | |||||
((unsigned long *) blocks)[1] = 0L; | |||||
((unsigned long *) blocks)[2] = 0L; | |||||
((unsigned long *) blocks)[3] = 0L; | |||||
i += 16; | |||||
} | |||||
for (; i < sizeof(int16_t) * 6 * 64 - 31; i += 32) | |||||
__asm__ volatile ("dcbz %0,%1" :: "b" (blocks), "r" (i) : "memory"); | |||||
if (misal) { | |||||
((unsigned long *) blocks)[188] = 0L; | |||||
((unsigned long *) blocks)[189] = 0L; | |||||
((unsigned long *) blocks)[190] = 0L; | |||||
((unsigned long *) blocks)[191] = 0L; | |||||
i += 16; | |||||
} | |||||
} | |||||
/* Same as above, when dcbzl clears a whole 128 bytes cache line | |||||
* i.e. the PPC970 AKA G5. */ | |||||
static void clear_blocks_dcbz128_ppc(int16_t *blocks) | |||||
{ | |||||
#if HAVE_DCBZL | |||||
register int misal = (unsigned long) blocks & 0x0000007f, i = 0; | |||||
if (misal) { | |||||
/* We could probably also optimize this case, | |||||
* but there's not much point as the machines | |||||
* aren't available yet (2003-06-26). */ | |||||
memset(blocks, 0, sizeof(int16_t) * 6 * 64); | |||||
} else { | |||||
for (; i < sizeof(int16_t) * 6 * 64; i += 128) | |||||
__asm__ volatile ("dcbzl %0,%1" :: "b" (blocks), "r" (i) : "memory"); | |||||
} | |||||
#else | |||||
memset(blocks, 0, sizeof(int16_t) * 6 * 64); | |||||
#endif | |||||
} | |||||
/* Check dcbz report how many bytes are set to 0 by dcbz. */ | |||||
/* update 24/06/2003: Replace dcbz by dcbzl to get the intended effect | |||||
* (Apple "fixed" dcbz). Unfortunately this cannot be used unless the | |||||
* assembler knows about dcbzl ... */ | |||||
static long check_dcbzl_effect(void) | |||||
{ | |||||
long count = 0; | |||||
#if HAVE_DCBZL | |||||
register char *fakedata = av_malloc(1024); | |||||
register char *fakedata_middle; | |||||
register long zero = 0, i = 0; | |||||
if (!fakedata) | |||||
return 0L; | |||||
fakedata_middle = fakedata + 512; | |||||
memset(fakedata, 0xFF, 1024); | |||||
/* Below the constraint "b" seems to mean "address base register" | |||||
* in gcc-3.3 / RS/6000 speaks. Seems to avoid using r0, so.... */ | |||||
__asm__ volatile ("dcbzl %0, %1" :: "b" (fakedata_middle), "r" (zero)); | |||||
for (i = 0; i < 1024; i++) | |||||
if (fakedata[i] == (char) 0) | |||||
count++; | |||||
av_free(fakedata); | |||||
#endif | |||||
return count; | |||||
} | |||||
#if HAVE_ALTIVEC | |||||
static void clear_block_altivec(int16_t *block) | |||||
{ | |||||
LOAD_ZERO; | |||||
vec_st(zero_s16v, 0, block); | |||||
vec_st(zero_s16v, 16, block); | |||||
vec_st(zero_s16v, 32, block); | |||||
vec_st(zero_s16v, 48, block); | |||||
vec_st(zero_s16v, 64, block); | |||||
vec_st(zero_s16v, 80, block); | |||||
vec_st(zero_s16v, 96, block); | |||||
vec_st(zero_s16v, 112, block); | |||||
} | |||||
#endif /* HAVE_ALTIVEC */ | |||||
av_cold void ff_blockdsp_init_ppc(BlockDSPContext *c, unsigned high_bit_depth) | |||||
{ | |||||
// common optimizations whether AltiVec is available or not | |||||
if (!high_bit_depth) { | |||||
switch (check_dcbzl_effect()) { | |||||
case 32: | |||||
c->clear_blocks = clear_blocks_dcbz32_ppc; | |||||
break; | |||||
case 128: | |||||
c->clear_blocks = clear_blocks_dcbz128_ppc; | |||||
break; | |||||
default: | |||||
break; | |||||
} | |||||
} | |||||
#if HAVE_ALTIVEC | |||||
if (!PPC_ALTIVEC(av_get_cpu_flags())) | |||||
return; | |||||
if (!high_bit_depth) | |||||
c->clear_block = clear_block_altivec; | |||||
#endif /* HAVE_ALTIVEC */ | |||||
} |
@@ -558,19 +558,6 @@ static void diff_pixels_altivec(int16_t *restrict block, const uint8_t *s1, | |||||
} | } | ||||
} | } | ||||
static void clear_block_altivec(int16_t *block) | |||||
{ | |||||
LOAD_ZERO; | |||||
vec_st(zero_s16v, 0, block); | |||||
vec_st(zero_s16v, 16, block); | |||||
vec_st(zero_s16v, 32, block); | |||||
vec_st(zero_s16v, 48, block); | |||||
vec_st(zero_s16v, 64, block); | |||||
vec_st(zero_s16v, 80, block); | |||||
vec_st(zero_s16v, 96, block); | |||||
vec_st(zero_s16v, 112, block); | |||||
} | |||||
static int hadamard8_diff8x8_altivec(MpegEncContext *s, uint8_t *dst, | static int hadamard8_diff8x8_altivec(MpegEncContext *s, uint8_t *dst, | ||||
uint8_t *src, int stride, int h) | uint8_t *src, int stride, int h) | ||||
{ | { | ||||
@@ -931,7 +918,6 @@ av_cold void ff_dsputil_init_altivec(DSPContext *c, AVCodecContext *avctx, | |||||
if (!high_bit_depth) { | if (!high_bit_depth) { | ||||
c->get_pixels = get_pixels_altivec; | c->get_pixels = get_pixels_altivec; | ||||
c->clear_block = clear_block_altivec; | |||||
} | } | ||||
c->hadamard8_diff[0] = hadamard8_diff16_altivec; | c->hadamard8_diff[0] = hadamard8_diff16_altivec; | ||||
@@ -24,124 +24,14 @@ | |||||
#include "libavutil/attributes.h" | #include "libavutil/attributes.h" | ||||
#include "libavutil/cpu.h" | #include "libavutil/cpu.h" | ||||
#include "libavutil/mem.h" | |||||
#include "libavutil/ppc/cpu.h" | #include "libavutil/ppc/cpu.h" | ||||
#include "libavcodec/avcodec.h" | #include "libavcodec/avcodec.h" | ||||
#include "libavcodec/dsputil.h" | #include "libavcodec/dsputil.h" | ||||
#include "dsputil_altivec.h" | #include "dsputil_altivec.h" | ||||
/* ***** WARNING ***** WARNING ***** WARNING ***** */ | |||||
/* | |||||
* clear_blocks_dcbz32_ppc will not work properly on PowerPC processors with | |||||
* a cache line size not equal to 32 bytes. Fortunately all processors used | |||||
* by Apple up to at least the 7450 (AKA second generation G4) use 32-byte | |||||
* cache lines. This is due to the use of the 'dcbz' instruction. It simply | |||||
* clears a single cache line to zero, so you need to know the cache line | |||||
* size to use it! It's absurd, but it's fast... | |||||
* | |||||
* update 24/06/2003: Apple released the G5 yesterday, with a PPC970. | |||||
* cache line size: 128 bytes. Oups. | |||||
* The semantics of dcbz was changed, it always clears 32 bytes. So the function | |||||
* below will work, but will be slow. So I fixed check_dcbz_effect to use dcbzl, | |||||
* which is defined to clear a cache line (as dcbz before). So we can still | |||||
* distinguish, and use dcbz (32 bytes) or dcbzl (one cache line) as required. | |||||
* | |||||
* see <http://developer.apple.com/technotes/tn/tn2087.html> | |||||
* and <http://developer.apple.com/technotes/tn/tn2086.html> | |||||
*/ | |||||
static void clear_blocks_dcbz32_ppc(int16_t *blocks) | |||||
{ | |||||
register int misal = (unsigned long) blocks & 0x00000010, i = 0; | |||||
if (misal) { | |||||
((unsigned long *) blocks)[0] = 0L; | |||||
((unsigned long *) blocks)[1] = 0L; | |||||
((unsigned long *) blocks)[2] = 0L; | |||||
((unsigned long *) blocks)[3] = 0L; | |||||
i += 16; | |||||
} | |||||
for (; i < sizeof(int16_t) * 6 * 64 - 31; i += 32) | |||||
__asm__ volatile ("dcbz %0,%1" :: "b" (blocks), "r" (i) : "memory"); | |||||
if (misal) { | |||||
((unsigned long *) blocks)[188] = 0L; | |||||
((unsigned long *) blocks)[189] = 0L; | |||||
((unsigned long *) blocks)[190] = 0L; | |||||
((unsigned long *) blocks)[191] = 0L; | |||||
i += 16; | |||||
} | |||||
} | |||||
/* Same as above, when dcbzl clears a whole 128 bytes cache line | |||||
* i.e. the PPC970 AKA G5. */ | |||||
static void clear_blocks_dcbz128_ppc(int16_t *blocks) | |||||
{ | |||||
#if HAVE_DCBZL | |||||
register int misal = (unsigned long) blocks & 0x0000007f, i = 0; | |||||
if (misal) { | |||||
/* We could probably also optimize this case, | |||||
* but there's not much point as the machines | |||||
* aren't available yet (2003-06-26). */ | |||||
memset(blocks, 0, sizeof(int16_t) * 6 * 64); | |||||
} else { | |||||
for (; i < sizeof(int16_t) * 6 * 64; i += 128) | |||||
__asm__ volatile ("dcbzl %0,%1" :: "b" (blocks), "r" (i) : "memory"); | |||||
} | |||||
#else | |||||
memset(blocks, 0, sizeof(int16_t) * 6 * 64); | |||||
#endif | |||||
} | |||||
/* Check dcbz report how many bytes are set to 0 by dcbz. */ | |||||
/* update 24/06/2003: Replace dcbz by dcbzl to get the intended effect | |||||
* (Apple "fixed" dcbz). Unfortunately this cannot be used unless the | |||||
* assembler knows about dcbzl ... */ | |||||
static long check_dcbzl_effect(void) | |||||
{ | |||||
long count = 0; | |||||
#if HAVE_DCBZL | |||||
register char *fakedata = av_malloc(1024); | |||||
register char *fakedata_middle; | |||||
register long zero = 0, i = 0; | |||||
if (!fakedata) | |||||
return 0L; | |||||
fakedata_middle = fakedata + 512; | |||||
memset(fakedata, 0xFF, 1024); | |||||
/* Below the constraint "b" seems to mean "address base register" | |||||
* in gcc-3.3 / RS/6000 speaks. Seems to avoid using r0, so.... */ | |||||
__asm__ volatile ("dcbzl %0, %1" :: "b" (fakedata_middle), "r" (zero)); | |||||
for (i = 0; i < 1024; i++) | |||||
if (fakedata[i] == (char) 0) | |||||
count++; | |||||
av_free(fakedata); | |||||
#endif | |||||
return count; | |||||
} | |||||
av_cold void ff_dsputil_init_ppc(DSPContext *c, AVCodecContext *avctx, | av_cold void ff_dsputil_init_ppc(DSPContext *c, AVCodecContext *avctx, | ||||
unsigned high_bit_depth) | unsigned high_bit_depth) | ||||
{ | { | ||||
// common optimizations whether AltiVec is available or not | |||||
if (!high_bit_depth) { | |||||
switch (check_dcbzl_effect()) { | |||||
case 32: | |||||
c->clear_blocks = clear_blocks_dcbz32_ppc; | |||||
break; | |||||
case 128: | |||||
c->clear_blocks = clear_blocks_dcbz128_ppc; | |||||
break; | |||||
default: | |||||
break; | |||||
} | |||||
} | |||||
if (PPC_ALTIVEC(av_get_cpu_flags())) { | if (PPC_ALTIVEC(av_get_cpu_flags())) { | ||||
ff_dsputil_init_altivec(c, avctx, high_bit_depth); | ff_dsputil_init_altivec(c, avctx, high_bit_depth); | ||||
ff_int_init_altivec(c, avctx); | ff_int_init_altivec(c, avctx); | ||||
@@ -3019,7 +3019,7 @@ static int vc1_decode_intra_block(VC1Context *v, int16_t block[64], int n, | |||||
int scale; | int scale; | ||||
int q1, q2 = 0; | int q1, q2 = 0; | ||||
s->dsp.clear_block(block); | |||||
s->bdsp.clear_block(block); | |||||
/* XXX: Guard against dumb values of mquant */ | /* XXX: Guard against dumb values of mquant */ | ||||
mquant = (mquant < 1) ? 0 : ((mquant > 31) ? 31 : mquant); | mquant = (mquant < 1) ? 0 : ((mquant > 31) ? 31 : mquant); | ||||
@@ -3226,7 +3226,7 @@ static int vc1_decode_p_block(VC1Context *v, int16_t block[64], int n, | |||||
int ttblk = ttmb & 7; | int ttblk = ttmb & 7; | ||||
int pat = 0; | int pat = 0; | ||||
s->dsp.clear_block(block); | |||||
s->bdsp.clear_block(block); | |||||
if (ttmb == -1) { | if (ttmb == -1) { | ||||
ttblk = ff_vc1_ttblk_to_tt[v->tt_index][get_vlc2(gb, ff_vc1_ttblk_vlc[v->tt_index].table, VC1_TTBLK_VLC_BITS, 1)]; | ttblk = ff_vc1_ttblk_to_tt[v->tt_index][get_vlc2(gb, ff_vc1_ttblk_vlc[v->tt_index].table, VC1_TTBLK_VLC_BITS, 1)]; | ||||
@@ -4797,7 +4797,7 @@ static void vc1_decode_i_blocks(VC1Context *v) | |||||
dst[3] = dst[2] + 8; | dst[3] = dst[2] + 8; | ||||
dst[4] = s->dest[1]; | dst[4] = s->dest[1]; | ||||
dst[5] = s->dest[2]; | dst[5] = s->dest[2]; | ||||
s->dsp.clear_blocks(s->block[0]); | |||||
s->bdsp.clear_blocks(s->block[0]); | |||||
mb_pos = s->mb_x + s->mb_y * s->mb_width; | mb_pos = s->mb_x + s->mb_y * s->mb_width; | ||||
s->current_picture.mb_type[mb_pos] = MB_TYPE_INTRA; | s->current_picture.mb_type[mb_pos] = MB_TYPE_INTRA; | ||||
s->current_picture.qscale_table[mb_pos] = v->pq; | s->current_picture.qscale_table[mb_pos] = v->pq; | ||||
@@ -4937,7 +4937,7 @@ static void vc1_decode_i_blocks_adv(VC1Context *v) | |||||
for (;s->mb_x < s->mb_width; s->mb_x++) { | for (;s->mb_x < s->mb_width; s->mb_x++) { | ||||
int16_t (*block)[64] = v->block[v->cur_blk_idx]; | int16_t (*block)[64] = v->block[v->cur_blk_idx]; | ||||
ff_update_block_index(s); | ff_update_block_index(s); | ||||
s->dsp.clear_blocks(block[0]); | |||||
s->bdsp.clear_blocks(block[0]); | |||||
mb_pos = s->mb_x + s->mb_y * s->mb_stride; | mb_pos = s->mb_x + s->mb_y * s->mb_stride; | ||||
s->current_picture.mb_type[mb_pos + v->mb_off] = MB_TYPE_INTRA; | s->current_picture.mb_type[mb_pos + v->mb_off] = MB_TYPE_INTRA; | ||||
s->current_picture.motion_val[1][s->block_index[0] + v->blocks_off][0] = 0; | s->current_picture.motion_val[1][s->block_index[0] + v->blocks_off][0] = 0; | ||||
@@ -5603,6 +5603,7 @@ static av_cold int vc1_decode_init(AVCodecContext *avctx) | |||||
if (ff_vc1_init_common(v) < 0) | if (ff_vc1_init_common(v) < 0) | ||||
return -1; | return -1; | ||||
ff_blockdsp_init(&s->bdsp, avctx); | |||||
ff_h264chroma_init(&v->h264chroma, 8); | ff_h264chroma_init(&v->h264chroma, 8); | ||||
ff_qpeldsp_init(&s->qdsp); | ff_qpeldsp_init(&s->qdsp); | ||||
ff_vc1dsp_init(&v->vc1dsp); | ff_vc1dsp_init(&v->vc1dsp); | ||||
@@ -28,6 +28,7 @@ | |||||
av_cold void ff_wmv2_common_init(Wmv2Context * w){ | av_cold void ff_wmv2_common_init(Wmv2Context * w){ | ||||
MpegEncContext * const s= &w->s; | MpegEncContext * const s= &w->s; | ||||
ff_blockdsp_init(&s->bdsp, s->avctx); | |||||
ff_wmv2dsp_init(&w->wdsp); | ff_wmv2dsp_init(&w->wdsp); | ||||
s->dsp.idct_permutation_type = w->wdsp.idct_perm; | s->dsp.idct_permutation_type = w->wdsp.idct_perm; | ||||
ff_init_scantable_permutation(s->dsp.idct_permutation, | ff_init_scantable_permutation(s->dsp.idct_permutation, | ||||
@@ -60,12 +61,12 @@ static void wmv2_add_block(Wmv2Context *w, int16_t *block1, uint8_t *dst, int st | |||||
case 1: | case 1: | ||||
ff_simple_idct84_add(dst , stride, block1); | ff_simple_idct84_add(dst , stride, block1); | ||||
ff_simple_idct84_add(dst + 4*stride, stride, w->abt_block2[n]); | ff_simple_idct84_add(dst + 4*stride, stride, w->abt_block2[n]); | ||||
s->dsp.clear_block(w->abt_block2[n]); | |||||
s->bdsp.clear_block(w->abt_block2[n]); | |||||
break; | break; | ||||
case 2: | case 2: | ||||
ff_simple_idct48_add(dst , stride, block1); | ff_simple_idct48_add(dst , stride, block1); | ||||
ff_simple_idct48_add(dst + 4 , stride, w->abt_block2[n]); | ff_simple_idct48_add(dst + 4 , stride, w->abt_block2[n]); | ||||
s->dsp.clear_block(w->abt_block2[n]); | |||||
s->bdsp.clear_block(w->abt_block2[n]); | |||||
break; | break; | ||||
default: | default: | ||||
av_log(s->avctx, AV_LOG_ERROR, "internal error in WMV2 abt\n"); | av_log(s->avctx, AV_LOG_ERROR, "internal error in WMV2 abt\n"); | ||||
@@ -385,7 +385,7 @@ int ff_wmv2_decode_mb(MpegEncContext *s, int16_t block[6][64]) | |||||
wmv2_pred_motion(w, &mx, &my); | wmv2_pred_motion(w, &mx, &my); | ||||
if(cbp){ | if(cbp){ | ||||
s->dsp.clear_blocks(s->block[0]); | |||||
s->bdsp.clear_blocks(s->block[0]); | |||||
if(s->per_mb_rl_table){ | if(s->per_mb_rl_table){ | ||||
s->rl_table_index = decode012(&s->gb); | s->rl_table_index = decode012(&s->gb); | ||||
s->rl_chroma_table_index = s->rl_table_index; | s->rl_chroma_table_index = s->rl_table_index; | ||||
@@ -431,7 +431,7 @@ int ff_wmv2_decode_mb(MpegEncContext *s, int16_t block[6][64]) | |||||
s->rl_chroma_table_index = s->rl_table_index; | s->rl_chroma_table_index = s->rl_table_index; | ||||
} | } | ||||
s->dsp.clear_blocks(s->block[0]); | |||||
s->bdsp.clear_blocks(s->block[0]); | |||||
for (i = 0; i < 6; i++) { | for (i = 0; i < 6; i++) { | ||||
if (ff_msmpeg4_decode_block(s, block[i], i, (cbp >> (5 - i)) & 1, NULL) < 0) | if (ff_msmpeg4_decode_block(s, block[i], i, (cbp >> (5 - i)) & 1, NULL) < 0) | ||||
{ | { | ||||
@@ -44,6 +44,7 @@ OBJS-$(CONFIG_VP7_DECODER) += x86/vp8dsp_init.o | |||||
OBJS-$(CONFIG_VP8_DECODER) += x86/vp8dsp_init.o | OBJS-$(CONFIG_VP8_DECODER) += x86/vp8dsp_init.o | ||||
OBJS-$(CONFIG_VP9_DECODER) += x86/vp9dsp_init.o | OBJS-$(CONFIG_VP9_DECODER) += x86/vp9dsp_init.o | ||||
MMX-OBJS-$(CONFIG_BLOCKDSP) += x86/blockdsp_mmx.o | |||||
MMX-OBJS-$(CONFIG_DSPUTIL) += x86/dsputil_mmx.o \ | MMX-OBJS-$(CONFIG_DSPUTIL) += x86/dsputil_mmx.o \ | ||||
x86/idct_mmx_xvid.o \ | x86/idct_mmx_xvid.o \ | ||||
x86/idct_sse2_xvid.o \ | x86/idct_sse2_xvid.o \ | ||||
@@ -0,0 +1,120 @@ | |||||
/* | |||||
* This file is part of Libav. | |||||
* | |||||
* Libav is free software; you can redistribute it and/or | |||||
* modify it under the terms of the GNU Lesser General Public | |||||
* License as published by the Free Software Foundation; either | |||||
* version 2.1 of the License, or (at your option) any later version. | |||||
* | |||||
* Libav is distributed in the hope that it will be useful, | |||||
* but WITHOUT ANY WARRANTY; without even the implied warranty of | |||||
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU | |||||
* Lesser General Public License for more details. | |||||
* | |||||
* You should have received a copy of the GNU Lesser General Public | |||||
* License along with Libav; if not, write to the Free Software | |||||
* Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA | |||||
*/ | |||||
#include <stdint.h> | |||||
#include "config.h" | |||||
#include "libavutil/attributes.h" | |||||
#include "libavutil/internal.h" | |||||
#include "libavutil/cpu.h" | |||||
#include "libavutil/x86/asm.h" | |||||
#include "libavutil/x86/cpu.h" | |||||
#include "libavcodec/blockdsp.h" | |||||
#include "libavcodec/version.h" | |||||
#if HAVE_INLINE_ASM | |||||
#define CLEAR_BLOCKS(name, n) \ | |||||
static void name(int16_t *blocks) \ | |||||
{ \ | |||||
__asm__ volatile ( \ | |||||
"pxor %%mm7, %%mm7 \n\t" \ | |||||
"mov %1, %%"REG_a" \n\t" \ | |||||
"1: \n\t" \ | |||||
"movq %%mm7, (%0, %%"REG_a") \n\t" \ | |||||
"movq %%mm7, 8(%0, %%"REG_a") \n\t" \ | |||||
"movq %%mm7, 16(%0, %%"REG_a") \n\t" \ | |||||
"movq %%mm7, 24(%0, %%"REG_a") \n\t" \ | |||||
"add $32, %%"REG_a" \n\t" \ | |||||
"js 1b \n\t" \ | |||||
:: "r"(((uint8_t *) blocks) + 128 * n), \ | |||||
"i"(-128 * n) \ | |||||
: "%"REG_a); \ | |||||
} | |||||
CLEAR_BLOCKS(clear_blocks_mmx, 6) | |||||
CLEAR_BLOCKS(clear_block_mmx, 1) | |||||
static void clear_block_sse(int16_t *block) | |||||
{ | |||||
__asm__ volatile ( | |||||
"xorps %%xmm0, %%xmm0 \n" | |||||
"movaps %%xmm0, (%0) \n" | |||||
"movaps %%xmm0, 16(%0) \n" | |||||
"movaps %%xmm0, 32(%0) \n" | |||||
"movaps %%xmm0, 48(%0) \n" | |||||
"movaps %%xmm0, 64(%0) \n" | |||||
"movaps %%xmm0, 80(%0) \n" | |||||
"movaps %%xmm0, 96(%0) \n" | |||||
"movaps %%xmm0, 112(%0) \n" | |||||
:: "r" (block) | |||||
: "memory"); | |||||
} | |||||
static void clear_blocks_sse(int16_t *blocks) | |||||
{ | |||||
__asm__ volatile ( | |||||
"xorps %%xmm0, %%xmm0 \n" | |||||
"mov %1, %%"REG_a" \n" | |||||
"1: \n" | |||||
"movaps %%xmm0, (%0, %%"REG_a") \n" | |||||
"movaps %%xmm0, 16(%0, %%"REG_a") \n" | |||||
"movaps %%xmm0, 32(%0, %%"REG_a") \n" | |||||
"movaps %%xmm0, 48(%0, %%"REG_a") \n" | |||||
"movaps %%xmm0, 64(%0, %%"REG_a") \n" | |||||
"movaps %%xmm0, 80(%0, %%"REG_a") \n" | |||||
"movaps %%xmm0, 96(%0, %%"REG_a") \n" | |||||
"movaps %%xmm0, 112(%0, %%"REG_a") \n" | |||||
"add $128, %%"REG_a" \n" | |||||
"js 1b \n" | |||||
:: "r"(((uint8_t *) blocks) + 128 * 6), "i"(-128 * 6) | |||||
: "%"REG_a); | |||||
} | |||||
#endif /* HAVE_INLINE_ASM */ | |||||
#if FF_API_XVMC | |||||
av_cold void ff_blockdsp_init_x86(BlockDSPContext *c, unsigned high_bit_depth, | |||||
AVCodecContext *avctx) | |||||
#else | |||||
av_cold void ff_blockdsp_init_x86(BlockDSPContext *c, unsigned high_bit_depth) | |||||
#endif /* FF_API_XVMC */ | |||||
{ | |||||
#if HAVE_INLINE_ASM | |||||
int cpu_flags = av_get_cpu_flags(); | |||||
if (!high_bit_depth) { | |||||
if (INLINE_MMX(cpu_flags)) { | |||||
c->clear_block = clear_block_mmx; | |||||
c->clear_blocks = clear_blocks_mmx; | |||||
} | |||||
#if FF_API_XVMC | |||||
FF_DISABLE_DEPRECATION_WARNINGS | |||||
/* XvMCCreateBlocks() may not allocate 16-byte aligned blocks */ | |||||
if (CONFIG_MPEG_XVMC_DECODER && avctx->xvmc_acceleration > 1) | |||||
return; | |||||
FF_ENABLE_DEPRECATION_WARNINGS | |||||
#endif /* FF_API_XVMC */ | |||||
if (INLINE_SSE(cpu_flags)) { | |||||
c->clear_block = clear_block_sse; | |||||
c->clear_blocks = clear_blocks_sse; | |||||
} | |||||
} | |||||
#endif /* HAVE_INLINE_ASM */ | |||||
} |
@@ -19,12 +19,10 @@ | |||||
#include "config.h" | #include "config.h" | ||||
#include "libavutil/attributes.h" | #include "libavutil/attributes.h" | ||||
#include "libavutil/cpu.h" | #include "libavutil/cpu.h" | ||||
#include "libavutil/internal.h" | |||||
#include "libavutil/x86/cpu.h" | #include "libavutil/x86/cpu.h" | ||||
#include "libavcodec/avcodec.h" | #include "libavcodec/avcodec.h" | ||||
#include "libavcodec/dsputil.h" | #include "libavcodec/dsputil.h" | ||||
#include "libavcodec/simple_idct.h" | #include "libavcodec/simple_idct.h" | ||||
#include "libavcodec/version.h" | |||||
#include "dsputil_x86.h" | #include "dsputil_x86.h" | ||||
#include "idct_xvid.h" | #include "idct_xvid.h" | ||||
@@ -54,8 +52,6 @@ static av_cold void dsputil_init_mmx(DSPContext *c, AVCodecContext *avctx, | |||||
c->add_pixels_clamped = ff_add_pixels_clamped_mmx; | c->add_pixels_clamped = ff_add_pixels_clamped_mmx; | ||||
if (!high_bit_depth) { | if (!high_bit_depth) { | ||||
c->clear_block = ff_clear_block_mmx; | |||||
c->clear_blocks = ff_clear_blocks_mmx; | |||||
c->draw_edges = ff_draw_edges_mmx; | c->draw_edges = ff_draw_edges_mmx; | ||||
switch (avctx->idct_algo) { | switch (avctx->idct_algo) { | ||||
@@ -103,19 +99,6 @@ static av_cold void dsputil_init_sse(DSPContext *c, AVCodecContext *avctx, | |||||
{ | { | ||||
#if HAVE_SSE_INLINE | #if HAVE_SSE_INLINE | ||||
c->vector_clipf = ff_vector_clipf_sse; | c->vector_clipf = ff_vector_clipf_sse; | ||||
#if FF_API_XVMC | |||||
FF_DISABLE_DEPRECATION_WARNINGS | |||||
/* XvMCCreateBlocks() may not allocate 16-byte aligned blocks */ | |||||
if (CONFIG_MPEG_XVMC_DECODER && avctx->xvmc_acceleration > 1) | |||||
return; | |||||
FF_ENABLE_DEPRECATION_WARNINGS | |||||
#endif /* FF_API_XVMC */ | |||||
if (!high_bit_depth) { | |||||
c->clear_block = ff_clear_block_sse; | |||||
c->clear_blocks = ff_clear_blocks_sse; | |||||
} | |||||
#endif /* HAVE_SSE_INLINE */ | #endif /* HAVE_SSE_INLINE */ | ||||
} | } | ||||
@@ -166,62 +166,6 @@ void ff_add_pixels_clamped_mmx(const int16_t *block, uint8_t *pixels, | |||||
} while (--i); | } while (--i); | ||||
} | } | ||||
#define CLEAR_BLOCKS(name, n) \ | |||||
void name(int16_t *blocks) \ | |||||
{ \ | |||||
__asm__ volatile ( \ | |||||
"pxor %%mm7, %%mm7 \n\t" \ | |||||
"mov %1, %%"REG_a" \n\t" \ | |||||
"1: \n\t" \ | |||||
"movq %%mm7, (%0, %%"REG_a") \n\t" \ | |||||
"movq %%mm7, 8(%0, %%"REG_a") \n\t" \ | |||||
"movq %%mm7, 16(%0, %%"REG_a") \n\t" \ | |||||
"movq %%mm7, 24(%0, %%"REG_a") \n\t" \ | |||||
"add $32, %%"REG_a" \n\t" \ | |||||
"js 1b \n\t" \ | |||||
:: "r"(((uint8_t *) blocks) + 128 * n), \ | |||||
"i"(-128 * n) \ | |||||
: "%"REG_a); \ | |||||
} | |||||
CLEAR_BLOCKS(ff_clear_blocks_mmx, 6) | |||||
CLEAR_BLOCKS(ff_clear_block_mmx, 1) | |||||
void ff_clear_block_sse(int16_t *block) | |||||
{ | |||||
__asm__ volatile ( | |||||
"xorps %%xmm0, %%xmm0 \n" | |||||
"movaps %%xmm0, (%0) \n" | |||||
"movaps %%xmm0, 16(%0) \n" | |||||
"movaps %%xmm0, 32(%0) \n" | |||||
"movaps %%xmm0, 48(%0) \n" | |||||
"movaps %%xmm0, 64(%0) \n" | |||||
"movaps %%xmm0, 80(%0) \n" | |||||
"movaps %%xmm0, 96(%0) \n" | |||||
"movaps %%xmm0, 112(%0) \n" | |||||
:: "r" (block) | |||||
: "memory"); | |||||
} | |||||
void ff_clear_blocks_sse(int16_t *blocks) | |||||
{ | |||||
__asm__ volatile ( | |||||
"xorps %%xmm0, %%xmm0 \n" | |||||
"mov %1, %%"REG_a" \n" | |||||
"1: \n" | |||||
"movaps %%xmm0, (%0, %%"REG_a") \n" | |||||
"movaps %%xmm0, 16(%0, %%"REG_a") \n" | |||||
"movaps %%xmm0, 32(%0, %%"REG_a") \n" | |||||
"movaps %%xmm0, 48(%0, %%"REG_a") \n" | |||||
"movaps %%xmm0, 64(%0, %%"REG_a") \n" | |||||
"movaps %%xmm0, 80(%0, %%"REG_a") \n" | |||||
"movaps %%xmm0, 96(%0, %%"REG_a") \n" | |||||
"movaps %%xmm0, 112(%0, %%"REG_a") \n" | |||||
"add $128, %%"REG_a" \n" | |||||
"js 1b \n" | |||||
:: "r"(((uint8_t *) blocks) + 128 * 6), "i"(-128 * 6) | |||||
: "%"REG_a); | |||||
} | |||||
/* Draw the edges of width 'w' of an image of size width, height | /* Draw the edges of width 'w' of an image of size width, height | ||||
* this MMX version can only handle w == 8 || w == 16. */ | * this MMX version can only handle w == 8 || w == 16. */ | ||||
void ff_draw_edges_mmx(uint8_t *buf, int wrap, int width, int height, | void ff_draw_edges_mmx(uint8_t *buf, int wrap, int width, int height, | ||||
@@ -38,11 +38,6 @@ void ff_put_pixels_clamped_mmx(const int16_t *block, uint8_t *pixels, | |||||
void ff_put_signed_pixels_clamped_mmx(const int16_t *block, uint8_t *pixels, | void ff_put_signed_pixels_clamped_mmx(const int16_t *block, uint8_t *pixels, | ||||
int line_size); | int line_size); | ||||
void ff_clear_block_mmx(int16_t *block); | |||||
void ff_clear_block_sse(int16_t *block); | |||||
void ff_clear_blocks_mmx(int16_t *blocks); | |||||
void ff_clear_blocks_sse(int16_t *blocks); | |||||
void ff_draw_edges_mmx(uint8_t *buf, int wrap, int width, int height, | void ff_draw_edges_mmx(uint8_t *buf, int wrap, int width, int height, | ||||
int w, int h, int sides); | int w, int h, int sides); | ||||