| @@ -1558,6 +1558,7 @@ CONFIG_EXTRA=" | |||
| mpegvideo | |||
| mpegvideoenc | |||
| nettle | |||
| pixblockdsp | |||
| qpeldsp | |||
| rangecoder | |||
| riffdec | |||
| @@ -1706,7 +1707,7 @@ threads_if_any="$THREADS_LIST" | |||
| # subsystems | |||
| dct_select="rdft" | |||
| dsputil_select="fdctdsp idctdsp" | |||
| dsputil_select="fdctdsp idctdsp pixblockdsp" | |||
| error_resilience_select="dsputil" | |||
| intrax8_select="error_resilience" | |||
| mdct_select="fft" | |||
| @@ -1715,7 +1716,7 @@ mpeg_er_select="error_resilience" | |||
| mpegaudio_select="mpegaudiodsp" | |||
| mpegaudiodsp_select="dct" | |||
| mpegvideo_select="blockdsp dsputil hpeldsp idctdsp videodsp" | |||
| mpegvideoenc_select="dsputil mpegvideo qpeldsp" | |||
| mpegvideoenc_select="dsputil mpegvideo pixblockdsp qpeldsp" | |||
| # decoders / encoders | |||
| aac_decoder_select="mdct sinewin" | |||
| @@ -1732,9 +1733,9 @@ amrwb_decoder_select="lsp" | |||
| amv_decoder_select="sp5x_decoder" | |||
| ape_decoder_select="bswapdsp" | |||
| asv1_decoder_select="blockdsp bswapdsp idctdsp" | |||
| asv1_encoder_select="bswapdsp dsputil fdctdsp" | |||
| asv1_encoder_select="bswapdsp fdctdsp pixblockdsp" | |||
| asv2_decoder_select="blockdsp bswapdsp idctdsp" | |||
| asv2_encoder_select="bswapdsp dsputil fdctdsp" | |||
| asv2_encoder_select="bswapdsp fdctdsp pixblockdsp" | |||
| atrac1_decoder_select="mdct sinewin" | |||
| atrac3_decoder_select="mdct" | |||
| atrac3p_decoder_select="mdct sinewin" | |||
| @@ -1749,9 +1750,9 @@ cscd_decoder_select="lzo" | |||
| cscd_decoder_suggest="zlib" | |||
| dca_decoder_select="mdct" | |||
| dnxhd_decoder_select="blockdsp idctdsp" | |||
| dnxhd_encoder_select="aandcttables blockdsp dsputil fdctdsp idctdsp mpegvideoenc" | |||
| dnxhd_encoder_select="aandcttables blockdsp fdctdsp idctdsp mpegvideoenc pixblockdsp" | |||
| dvvideo_decoder_select="dvprofile idctdsp" | |||
| dvvideo_encoder_select="dsputil dvprofile fdctdsp" | |||
| dvvideo_encoder_select="dsputil dvprofile fdctdsp pixblockdsp" | |||
| dxa_decoder_deps="zlib" | |||
| eac3_decoder_select="ac3_decoder" | |||
| eac3_encoder_select="ac3_encoder" | |||
| @@ -72,6 +72,7 @@ OBJS-$(CONFIG_MPEGVIDEO) += mpegvideo.o mpegvideodsp.o \ | |||
| OBJS-$(CONFIG_MPEGVIDEOENC) += mpegvideo_enc.o mpeg12data.o \ | |||
| motion_est.o ratecontrol.o \ | |||
| mpegvideoencdsp.o | |||
| OBJS-$(CONFIG_PIXBLOCKDSP) += pixblockdsp.o | |||
| OBJS-$(CONFIG_QPELDSP) += qpeldsp.o | |||
| OBJS-$(CONFIG_RANGECODER) += rangecoder.o | |||
| RDFT-OBJS-$(CONFIG_HARDCODED_TABLES) += sin_tables.o | |||
| @@ -23,6 +23,7 @@ OBJS-$(CONFIG_MPEGAUDIODSP) += arm/mpegaudiodsp_init_arm.o | |||
| OBJS-$(CONFIG_MPEGVIDEO) += arm/mpegvideo_arm.o | |||
| OBJS-$(CONFIG_MPEGVIDEOENC) += arm/mpegvideoencdsp_init_arm.o | |||
| OBJS-$(CONFIG_NEON_CLOBBER_TEST) += arm/neontest.o | |||
| OBJS-$(CONFIG_PIXBLOCKDSP) += arm/pixblockdsp_init_arm.o | |||
| OBJS-$(CONFIG_VIDEODSP) += arm/videodsp_init_arm.o | |||
| OBJS-$(CONFIG_VP3DSP) += arm/vp3dsp_init_arm.o | |||
| @@ -62,6 +63,7 @@ ARMV6-OBJS-$(CONFIG_IDCTDSP) += arm/idctdsp_init_armv6.o \ | |||
| arm/simple_idct_armv6.o | |||
| ARMV6-OBJS-$(CONFIG_MPEGAUDIODSP) += arm/mpegaudiodsp_fixed_armv6.o | |||
| ARMV6-OBJS-$(CONFIG_MPEGVIDEOENC) += arm/mpegvideoencdsp_armv6.o | |||
| ARMV6-OBJS-$(CONFIG_PIXBLOCKDSP) += arm/pixblockdsp_armv6.o | |||
| ARMV6-OBJS-$(CONFIG_MLP_DECODER) += arm/mlpdsp_armv6.o | |||
| ARMV6-OBJS-$(CONFIG_VP7_DECODER) += arm/vp8_armv6.o \ | |||
| @@ -20,61 +20,6 @@ | |||
| #include "libavutil/arm/asm.S" | |||
| function ff_get_pixels_armv6, export=1 | |||
| pld [r1, r2] | |||
| push {r4-r8, lr} | |||
| mov lr, #8 | |||
| 1: | |||
| ldrd_post r4, r5, r1, r2 | |||
| subs lr, lr, #1 | |||
| uxtb16 r6, r4 | |||
| uxtb16 r4, r4, ror #8 | |||
| uxtb16 r12, r5 | |||
| uxtb16 r8, r5, ror #8 | |||
| pld [r1, r2] | |||
| pkhbt r5, r6, r4, lsl #16 | |||
| pkhtb r6, r4, r6, asr #16 | |||
| pkhbt r7, r12, r8, lsl #16 | |||
| pkhtb r12, r8, r12, asr #16 | |||
| stm r0!, {r5,r6,r7,r12} | |||
| bgt 1b | |||
| pop {r4-r8, pc} | |||
| endfunc | |||
| function ff_diff_pixels_armv6, export=1 | |||
| pld [r1, r3] | |||
| pld [r2, r3] | |||
| push {r4-r9, lr} | |||
| mov lr, #8 | |||
| 1: | |||
| ldrd_post r4, r5, r1, r3 | |||
| ldrd_post r6, r7, r2, r3 | |||
| uxtb16 r8, r4 | |||
| uxtb16 r4, r4, ror #8 | |||
| uxtb16 r9, r6 | |||
| uxtb16 r6, r6, ror #8 | |||
| pld [r1, r3] | |||
| ssub16 r9, r8, r9 | |||
| ssub16 r6, r4, r6 | |||
| uxtb16 r8, r5 | |||
| uxtb16 r5, r5, ror #8 | |||
| pld [r2, r3] | |||
| pkhbt r4, r9, r6, lsl #16 | |||
| pkhtb r6, r6, r9, asr #16 | |||
| uxtb16 r9, r7 | |||
| uxtb16 r7, r7, ror #8 | |||
| ssub16 r9, r8, r9 | |||
| ssub16 r5, r5, r7 | |||
| subs lr, lr, #1 | |||
| pkhbt r8, r9, r5, lsl #16 | |||
| pkhtb r9, r5, r9, asr #16 | |||
| stm r0!, {r4,r6,r8,r9} | |||
| bgt 1b | |||
| pop {r4-r9, pc} | |||
| endfunc | |||
| function ff_pix_abs16_armv6, export=1 | |||
| ldr r0, [sp] | |||
| push {r4-r9, lr} | |||
| @@ -26,10 +26,6 @@ | |||
| #include "libavcodec/mpegvideo.h" | |||
| #include "dsputil_arm.h" | |||
| void ff_get_pixels_armv6(int16_t *block, const uint8_t *pixels, int stride); | |||
| void ff_diff_pixels_armv6(int16_t *block, const uint8_t *s1, | |||
| const uint8_t *s2, int stride); | |||
| int ff_pix_abs16_armv6(MpegEncContext *s, uint8_t *blk1, uint8_t *blk2, | |||
| int line_size, int h); | |||
| int ff_pix_abs16_x2_armv6(MpegEncContext *s, uint8_t *blk1, uint8_t *blk2, | |||
| @@ -46,10 +42,6 @@ int ff_sse16_armv6(MpegEncContext *s, uint8_t *blk1, uint8_t *blk2, | |||
| av_cold void ff_dsputil_init_armv6(DSPContext *c, AVCodecContext *avctx, | |||
| unsigned high_bit_depth) | |||
| { | |||
| if (!high_bit_depth) | |||
| c->get_pixels = ff_get_pixels_armv6; | |||
| c->diff_pixels = ff_diff_pixels_armv6; | |||
| c->pix_abs[0][0] = ff_pix_abs16_armv6; | |||
| c->pix_abs[0][1] = ff_pix_abs16_x2_armv6; | |||
| c->pix_abs[0][2] = ff_pix_abs16_y2_armv6; | |||
| @@ -0,0 +1,76 @@ | |||
| /* | |||
| * Copyright (c) 2009 Mans Rullgard <mans@mansr.com> | |||
| * | |||
| * This file is part of Libav. | |||
| * | |||
| * Libav is free software; you can redistribute it and/or | |||
| * modify it under the terms of the GNU Lesser General Public | |||
| * License as published by the Free Software Foundation; either | |||
| * version 2.1 of the License, or (at your option) any later version. | |||
| * | |||
| * Libav is distributed in the hope that it will be useful, | |||
| * but WITHOUT ANY WARRANTY; without even the implied warranty of | |||
| * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU | |||
| * Lesser General Public License for more details. | |||
| * | |||
| * You should have received a copy of the GNU Lesser General Public | |||
| * License along with Libav; if not, write to the Free Software | |||
| * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA | |||
| */ | |||
| #include "libavutil/arm/asm.S" | |||
| function ff_get_pixels_armv6, export=1 | |||
| pld [r1, r2] | |||
| push {r4-r8, lr} | |||
| mov lr, #8 | |||
| 1: | |||
| ldrd_post r4, r5, r1, r2 | |||
| subs lr, lr, #1 | |||
| uxtb16 r6, r4 | |||
| uxtb16 r4, r4, ror #8 | |||
| uxtb16 r12, r5 | |||
| uxtb16 r8, r5, ror #8 | |||
| pld [r1, r2] | |||
| pkhbt r5, r6, r4, lsl #16 | |||
| pkhtb r6, r4, r6, asr #16 | |||
| pkhbt r7, r12, r8, lsl #16 | |||
| pkhtb r12, r8, r12, asr #16 | |||
| stm r0!, {r5,r6,r7,r12} | |||
| bgt 1b | |||
| pop {r4-r8, pc} | |||
| endfunc | |||
| function ff_diff_pixels_armv6, export=1 | |||
| pld [r1, r3] | |||
| pld [r2, r3] | |||
| push {r4-r9, lr} | |||
| mov lr, #8 | |||
| 1: | |||
| ldrd_post r4, r5, r1, r3 | |||
| ldrd_post r6, r7, r2, r3 | |||
| uxtb16 r8, r4 | |||
| uxtb16 r4, r4, ror #8 | |||
| uxtb16 r9, r6 | |||
| uxtb16 r6, r6, ror #8 | |||
| pld [r1, r3] | |||
| ssub16 r9, r8, r9 | |||
| ssub16 r6, r4, r6 | |||
| uxtb16 r8, r5 | |||
| uxtb16 r5, r5, ror #8 | |||
| pld [r2, r3] | |||
| pkhbt r4, r9, r6, lsl #16 | |||
| pkhtb r6, r6, r9, asr #16 | |||
| uxtb16 r9, r7 | |||
| uxtb16 r7, r7, ror #8 | |||
| ssub16 r9, r8, r9 | |||
| ssub16 r5, r5, r7 | |||
| subs lr, lr, #1 | |||
| pkhbt r8, r9, r5, lsl #16 | |||
| pkhtb r9, r5, r9, asr #16 | |||
| stm r0!, {r4,r6,r8,r9} | |||
| bgt 1b | |||
| pop {r4-r9, pc} | |||
| endfunc | |||
| @@ -0,0 +1,42 @@ | |||
| /* | |||
| * This file is part of Libav. | |||
| * | |||
| * Libav is free software; you can redistribute it and/or | |||
| * modify it under the terms of the GNU Lesser General Public | |||
| * License as published by the Free Software Foundation; either | |||
| * version 2.1 of the License, or (at your option) any later version. | |||
| * | |||
| * Libav is distributed in the hope that it will be useful, | |||
| * but WITHOUT ANY WARRANTY; without even the implied warranty of | |||
| * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU | |||
| * Lesser General Public License for more details. | |||
| * | |||
| * You should have received a copy of the GNU Lesser General Public | |||
| * License along with Libav; if not, write to the Free Software | |||
| * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA | |||
| */ | |||
| #include <stdint.h> | |||
| #include "libavutil/attributes.h" | |||
| #include "libavutil/cpu.h" | |||
| #include "libavutil/arm/cpu.h" | |||
| #include "libavcodec/avcodec.h" | |||
| #include "libavcodec/pixblockdsp.h" | |||
| void ff_get_pixels_armv6(int16_t *block, const uint8_t *pixels, int stride); | |||
| void ff_diff_pixels_armv6(int16_t *block, const uint8_t *s1, | |||
| const uint8_t *s2, int stride); | |||
| av_cold void ff_pixblockdsp_init_arm(PixblockDSPContext *c, | |||
| AVCodecContext *avctx, | |||
| unsigned high_bit_depth) | |||
| { | |||
| int cpu_flags = av_get_cpu_flags(); | |||
| if (have_armv6(cpu_flags)) { | |||
| if (!high_bit_depth) | |||
| c->get_pixels = ff_get_pixels_armv6; | |||
| c->diff_pixels = ff_diff_pixels_armv6; | |||
| } | |||
| } | |||
| @@ -33,19 +33,19 @@ | |||
| #include "avcodec.h" | |||
| #include "blockdsp.h" | |||
| #include "bswapdsp.h" | |||
| #include "dsputil.h" | |||
| #include "fdctdsp.h" | |||
| #include "idctdsp.h" | |||
| #include "get_bits.h" | |||
| #include "pixblockdsp.h" | |||
| #include "put_bits.h" | |||
| typedef struct ASV1Context{ | |||
| AVCodecContext *avctx; | |||
| BlockDSPContext bdsp; | |||
| BswapDSPContext bbdsp; | |||
| DSPContext dsp; | |||
| FDCTDSPContext fdsp; | |||
| IDCTDSPContext idsp; | |||
| PixblockDSPContext pdsp; | |||
| PutBitContext pb; | |||
| GetBitContext gb; | |||
| ScanTable scantable; | |||
| @@ -159,16 +159,16 @@ static inline void dct_get(ASV1Context *a, const AVFrame *frame, | |||
| uint8_t *ptr_cb = frame->data[1] + (mb_y * 8 * frame->linesize[1]) + mb_x * 8; | |||
| uint8_t *ptr_cr = frame->data[2] + (mb_y * 8 * frame->linesize[2]) + mb_x * 8; | |||
| a->dsp.get_pixels(block[0], ptr_y , linesize); | |||
| a->dsp.get_pixels(block[1], ptr_y + 8, linesize); | |||
| a->dsp.get_pixels(block[2], ptr_y + 8*linesize , linesize); | |||
| a->dsp.get_pixels(block[3], ptr_y + 8*linesize + 8, linesize); | |||
| a->pdsp.get_pixels(block[0], ptr_y, linesize); | |||
| a->pdsp.get_pixels(block[1], ptr_y + 8, linesize); | |||
| a->pdsp.get_pixels(block[2], ptr_y + 8 * linesize, linesize); | |||
| a->pdsp.get_pixels(block[3], ptr_y + 8 * linesize + 8, linesize); | |||
| for(i=0; i<4; i++) | |||
| a->fdsp.fdct(block[i]); | |||
| if(!(a->avctx->flags&CODEC_FLAG_GRAY)){ | |||
| a->dsp.get_pixels(block[4], ptr_cb, frame->linesize[1]); | |||
| a->dsp.get_pixels(block[5], ptr_cr, frame->linesize[2]); | |||
| a->pdsp.get_pixels(block[4], ptr_cb, frame->linesize[1]); | |||
| a->pdsp.get_pixels(block[5], ptr_cr, frame->linesize[2]); | |||
| for(i=4; i<6; i++) | |||
| a->fdsp.fdct(block[i]); | |||
| } | |||
| @@ -248,8 +248,8 @@ static av_cold int encode_init(AVCodecContext *avctx){ | |||
| avctx->coded_frame->key_frame = 1; | |||
| ff_asv_common_init(avctx); | |||
| ff_dsputil_init(&a->dsp, avctx); | |||
| ff_fdctdsp_init(&a->fdsp, avctx); | |||
| ff_pixblockdsp_init(&a->pdsp, avctx); | |||
| if(avctx->global_quality == 0) avctx->global_quality= 4*FF_QUALITY_SCALE; | |||
| @@ -30,10 +30,10 @@ | |||
| #include "avcodec.h" | |||
| #include "blockdsp.h" | |||
| #include "dsputil.h" | |||
| #include "fdctdsp.h" | |||
| #include "internal.h" | |||
| #include "mpegvideo.h" | |||
| #include "pixblockdsp.h" | |||
| #include "dnxhdenc.h" | |||
| // The largest value that will not lead to overflow for 10bit samples. | |||
| @@ -308,10 +308,10 @@ static av_cold int dnxhd_encode_init(AVCodecContext *avctx) | |||
| avctx->bits_per_raw_sample = ctx->cid_table->bit_depth; | |||
| ff_blockdsp_init(&ctx->bdsp, avctx); | |||
| ff_dsputil_init(&ctx->m.dsp, avctx); | |||
| ff_fdctdsp_init(&ctx->m.fdsp, avctx); | |||
| ff_idctdsp_init(&ctx->m.idsp, avctx); | |||
| ff_mpegvideoencdsp_init(&ctx->m.mpvencdsp, avctx); | |||
| ff_pixblockdsp_init(&ctx->m.pdsp, avctx); | |||
| ff_dct_common_init(&ctx->m); | |||
| if (!ctx->m.dct_quantize) | |||
| ctx->m.dct_quantize = ff_dct_quantize_c; | |||
| @@ -540,12 +540,12 @@ void dnxhd_get_blocks(DNXHDEncContext *ctx, int mb_x, int mb_y) | |||
| ((mb_y << 4) * ctx->m.uvlinesize) + (mb_x << bs); | |||
| const uint8_t *ptr_v = ctx->thread[0]->src[2] + | |||
| ((mb_y << 4) * ctx->m.uvlinesize) + (mb_x << bs); | |||
| DSPContext *dsp = &ctx->m.dsp; | |||
| PixblockDSPContext *pdsp = &ctx->m.pdsp; | |||
| dsp->get_pixels(ctx->blocks[0], ptr_y, ctx->m.linesize); | |||
| dsp->get_pixels(ctx->blocks[1], ptr_y + bw, ctx->m.linesize); | |||
| dsp->get_pixels(ctx->blocks[2], ptr_u, ctx->m.uvlinesize); | |||
| dsp->get_pixels(ctx->blocks[3], ptr_v, ctx->m.uvlinesize); | |||
| pdsp->get_pixels(ctx->blocks[0], ptr_y, ctx->m.linesize); | |||
| pdsp->get_pixels(ctx->blocks[1], ptr_y + bw, ctx->m.linesize); | |||
| pdsp->get_pixels(ctx->blocks[2], ptr_u, ctx->m.uvlinesize); | |||
| pdsp->get_pixels(ctx->blocks[3], ptr_v, ctx->m.uvlinesize); | |||
| if (mb_y + 1 == ctx->m.mb_height && ctx->m.avctx->height == 1080) { | |||
| if (ctx->interlaced) { | |||
| @@ -568,14 +568,14 @@ void dnxhd_get_blocks(DNXHDEncContext *ctx, int mb_x, int mb_y) | |||
| ctx->bdsp.clear_block(ctx->blocks[7]); | |||
| } | |||
| } else { | |||
| dsp->get_pixels(ctx->blocks[4], | |||
| ptr_y + ctx->dct_y_offset, ctx->m.linesize); | |||
| dsp->get_pixels(ctx->blocks[5], | |||
| ptr_y + ctx->dct_y_offset + bw, ctx->m.linesize); | |||
| dsp->get_pixels(ctx->blocks[6], | |||
| ptr_u + ctx->dct_uv_offset, ctx->m.uvlinesize); | |||
| dsp->get_pixels(ctx->blocks[7], | |||
| ptr_v + ctx->dct_uv_offset, ctx->m.uvlinesize); | |||
| pdsp->get_pixels(ctx->blocks[4], | |||
| ptr_y + ctx->dct_y_offset, ctx->m.linesize); | |||
| pdsp->get_pixels(ctx->blocks[5], | |||
| ptr_y + ctx->dct_y_offset + bw, ctx->m.linesize); | |||
| pdsp->get_pixels(ctx->blocks[6], | |||
| ptr_u + ctx->dct_uv_offset, ctx->m.uvlinesize); | |||
| pdsp->get_pixels(ctx->blocks[7], | |||
| ptr_v + ctx->dct_uv_offset, ctx->m.uvlinesize); | |||
| } | |||
| } | |||
| @@ -35,13 +35,6 @@ | |||
| uint32_t ff_square_tab[512] = { 0, }; | |||
| #define BIT_DEPTH 16 | |||
| #include "dsputilenc_template.c" | |||
| #undef BIT_DEPTH | |||
| #define BIT_DEPTH 8 | |||
| #include "dsputilenc_template.c" | |||
| static int sse4_c(MpegEncContext *v, uint8_t *pix1, uint8_t *pix2, | |||
| int line_size, int h) | |||
| { | |||
| @@ -110,27 +103,6 @@ static int sse16_c(MpegEncContext *v, uint8_t *pix1, uint8_t *pix2, | |||
| return s; | |||
| } | |||
| static void diff_pixels_c(int16_t *restrict block, const uint8_t *s1, | |||
| const uint8_t *s2, int stride) | |||
| { | |||
| int i; | |||
| /* read the pixels */ | |||
| for (i = 0; i < 8; i++) { | |||
| block[0] = s1[0] - s2[0]; | |||
| block[1] = s1[1] - s2[1]; | |||
| block[2] = s1[2] - s2[2]; | |||
| block[3] = s1[3] - s2[3]; | |||
| block[4] = s1[4] - s2[4]; | |||
| block[5] = s1[5] - s2[5]; | |||
| block[6] = s1[6] - s2[6]; | |||
| block[7] = s1[7] - s2[7]; | |||
| s1 += stride; | |||
| s2 += stride; | |||
| block += 8; | |||
| } | |||
| } | |||
| static int sum_abs_dctelem_c(int16_t *block) | |||
| { | |||
| int sum = 0, i; | |||
| @@ -577,7 +549,7 @@ static int dct_sad8x8_c(MpegEncContext *s, uint8_t *src1, | |||
| assert(h == 8); | |||
| s->dsp.diff_pixels(temp, src1, src2, stride); | |||
| s->pdsp.diff_pixels(temp, src1, src2, stride); | |||
| s->fdsp.fdct(temp); | |||
| return s->dsp.sum_abs_dctelem(temp); | |||
| } | |||
| @@ -617,7 +589,7 @@ static int dct264_sad8x8_c(MpegEncContext *s, uint8_t *src1, | |||
| int16_t dct[8][8]; | |||
| int i, sum = 0; | |||
| s->dsp.diff_pixels(dct[0], src1, src2, stride); | |||
| s->pdsp.diff_pixels(dct[0], src1, src2, stride); | |||
| #define SRC(x) dct[i][x] | |||
| #define DST(x, v) dct[i][x] = v | |||
| @@ -644,7 +616,7 @@ static int dct_max8x8_c(MpegEncContext *s, uint8_t *src1, | |||
| assert(h == 8); | |||
| s->dsp.diff_pixels(temp, src1, src2, stride); | |||
| s->pdsp.diff_pixels(temp, src1, src2, stride); | |||
| s->fdsp.fdct(temp); | |||
| for (i = 0; i < 64; i++) | |||
| @@ -663,7 +635,7 @@ static int quant_psnr8x8_c(MpegEncContext *s, uint8_t *src1, | |||
| assert(h == 8); | |||
| s->mb_intra = 0; | |||
| s->dsp.diff_pixels(temp, src1, src2, stride); | |||
| s->pdsp.diff_pixels(temp, src1, src2, stride); | |||
| memcpy(bak, temp, 64 * sizeof(int16_t)); | |||
| @@ -694,7 +666,7 @@ static int rd8x8_c(MpegEncContext *s, uint8_t *src1, uint8_t *src2, | |||
| copy_block8(lsrc1, src1, 8, stride, 8); | |||
| copy_block8(lsrc2, src2, 8, stride, 8); | |||
| s->dsp.diff_pixels(temp, lsrc1, lsrc2, 8); | |||
| s->pdsp.diff_pixels(temp, lsrc1, lsrc2, 8); | |||
| s->block_last_index[0 /* FIXME */] = | |||
| last = | |||
| @@ -766,7 +738,7 @@ static int bit8x8_c(MpegEncContext *s, uint8_t *src1, uint8_t *src2, | |||
| assert(h == 8); | |||
| s->dsp.diff_pixels(temp, src1, src2, stride); | |||
| s->pdsp.diff_pixels(temp, src1, src2, stride); | |||
| s->block_last_index[0 /* FIXME */] = | |||
| last = | |||
| @@ -932,8 +904,6 @@ av_cold void ff_dsputil_init(DSPContext *c, AVCodecContext *avctx) | |||
| { | |||
| const unsigned high_bit_depth = avctx->bits_per_raw_sample > 8; | |||
| c->diff_pixels = diff_pixels_c; | |||
| c->sum_abs_dctelem = sum_abs_dctelem_c; | |||
| /* TODO [0] 16 [1] 8 */ | |||
| @@ -975,16 +945,6 @@ av_cold void ff_dsputil_init(DSPContext *c, AVCodecContext *avctx) | |||
| c->nsse[0] = nsse16_c; | |||
| c->nsse[1] = nsse8_c; | |||
| switch (avctx->bits_per_raw_sample) { | |||
| case 9: | |||
| case 10: | |||
| c->get_pixels = get_pixels_16_c; | |||
| break; | |||
| default: | |||
| c->get_pixels = get_pixels_8_c; | |||
| break; | |||
| } | |||
| if (ARCH_ARM) | |||
| ff_dsputil_init_arm(c, avctx, high_bit_depth); | |||
| if (ARCH_PPC) | |||
| @@ -48,14 +48,6 @@ typedef int (*me_cmp_func)(struct MpegEncContext *c, | |||
| * DSPContext. | |||
| */ | |||
| typedef struct DSPContext { | |||
| /* pixel ops : interface with DCT */ | |||
| void (*get_pixels)(int16_t *block /* align 16 */, | |||
| const uint8_t *pixels /* align 8 */, | |||
| int line_size); | |||
| void (*diff_pixels)(int16_t *block /* align 16 */, | |||
| const uint8_t *s1 /* align 8 */, | |||
| const uint8_t *s2 /* align 8 */, | |||
| int stride); | |||
| int (*sum_abs_dctelem)(int16_t *block /* align 16 */); | |||
| me_cmp_func sad[6]; /* identical to pix_absAxA except additional void * */ | |||
| @@ -31,6 +31,7 @@ | |||
| #include "dsputil.h" | |||
| #include "fdctdsp.h" | |||
| #include "internal.h" | |||
| #include "pixblockdsp.h" | |||
| #include "put_bits.h" | |||
| #include "dv.h" | |||
| #include "dv_tablegen.h" | |||
| @@ -41,6 +42,7 @@ static av_cold int dvvideo_encode_init(AVCodecContext *avctx) | |||
| DVVideoContext *s = avctx->priv_data; | |||
| DSPContext dsp; | |||
| FDCTDSPContext fdsp; | |||
| PixblockDSPContext pdsp; | |||
| int ret; | |||
| s->sys = avpriv_dv_codec_profile(avctx); | |||
| @@ -65,9 +67,10 @@ static av_cold int dvvideo_encode_init(AVCodecContext *avctx) | |||
| ff_dsputil_init(&dsp, avctx); | |||
| ff_fdctdsp_init(&fdsp, avctx); | |||
| ff_pixblockdsp_init(&pdsp, avctx); | |||
| ff_set_cmp(&dsp, dsp.ildct_cmp, avctx->ildct_cmp); | |||
| s->get_pixels = dsp.get_pixels; | |||
| s->get_pixels = pdsp.get_pixels; | |||
| s->ildct_cmp = dsp.ildct_cmp[5]; | |||
| s->fdct[0] = fdsp.fdct; | |||
| @@ -39,6 +39,7 @@ | |||
| #include "idctdsp.h" | |||
| #include "mpegvideodsp.h" | |||
| #include "mpegvideoencdsp.h" | |||
| #include "pixblockdsp.h" | |||
| #include "put_bits.h" | |||
| #include "ratecontrol.h" | |||
| #include "parser.h" | |||
| @@ -361,6 +362,7 @@ typedef struct MpegEncContext { | |||
| IDCTDSPContext idsp; | |||
| MpegVideoDSPContext mdsp; | |||
| MpegvideoEncDSPContext mpvencdsp; | |||
| PixblockDSPContext pdsp; | |||
| QpelDSPContext qdsp; | |||
| VideoDSPContext vdsp; | |||
| H263DSPContext h263dsp; | |||
| @@ -37,7 +37,6 @@ | |||
| #include "libavutil/timer.h" | |||
| #include "avcodec.h" | |||
| #include "dct.h" | |||
| #include "dsputil.h" | |||
| #include "idctdsp.h" | |||
| #include "mpeg12.h" | |||
| #include "mpegvideo.h" | |||
| @@ -48,6 +47,7 @@ | |||
| #include "mpegutils.h" | |||
| #include "mjpegenc.h" | |||
| #include "msmpeg4.h" | |||
| #include "pixblockdsp.h" | |||
| #include "qpeldsp.h" | |||
| #include "faandct.h" | |||
| #include "thread.h" | |||
| @@ -703,6 +703,7 @@ av_cold int ff_MPV_encode_init(AVCodecContext *avctx) | |||
| ff_fdctdsp_init(&s->fdsp, avctx); | |||
| ff_mpegvideoencdsp_init(&s->mpvencdsp, avctx); | |||
| ff_pixblockdsp_init(&s->pdsp, avctx); | |||
| ff_qpeldsp_init(&s->qdsp); | |||
| s->avctx->coded_frame = s->current_picture.f; | |||
| @@ -1943,22 +1944,22 @@ static av_always_inline void encode_mb_internal(MpegEncContext *s, | |||
| } | |||
| } | |||
| s->dsp.get_pixels(s->block[0], ptr_y , wrap_y); | |||
| s->dsp.get_pixels(s->block[1], ptr_y + 8 , wrap_y); | |||
| s->dsp.get_pixels(s->block[2], ptr_y + dct_offset , wrap_y); | |||
| s->dsp.get_pixels(s->block[3], ptr_y + dct_offset + 8 , wrap_y); | |||
| s->pdsp.get_pixels(s->block[0], ptr_y, wrap_y); | |||
| s->pdsp.get_pixels(s->block[1], ptr_y + 8, wrap_y); | |||
| s->pdsp.get_pixels(s->block[2], ptr_y + dct_offset, wrap_y); | |||
| s->pdsp.get_pixels(s->block[3], ptr_y + dct_offset + 8, wrap_y); | |||
| if (s->flags & CODEC_FLAG_GRAY) { | |||
| skip_dct[4] = 1; | |||
| skip_dct[5] = 1; | |||
| } else { | |||
| s->dsp.get_pixels(s->block[4], ptr_cb, wrap_c); | |||
| s->dsp.get_pixels(s->block[5], ptr_cr, wrap_c); | |||
| s->pdsp.get_pixels(s->block[4], ptr_cb, wrap_c); | |||
| s->pdsp.get_pixels(s->block[5], ptr_cr, wrap_c); | |||
| if (!s->chroma_y_shift) { /* 422 */ | |||
| s->dsp.get_pixels(s->block[6], | |||
| ptr_cb + (dct_offset >> 1), wrap_c); | |||
| s->dsp.get_pixels(s->block[7], | |||
| ptr_cr + (dct_offset >> 1), wrap_c); | |||
| s->pdsp.get_pixels(s->block[6], | |||
| ptr_cb + (dct_offset >> 1), wrap_c); | |||
| s->pdsp.get_pixels(s->block[7], | |||
| ptr_cr + (dct_offset >> 1), wrap_c); | |||
| } | |||
| } | |||
| } else { | |||
| @@ -2024,24 +2025,24 @@ static av_always_inline void encode_mb_internal(MpegEncContext *s, | |||
| } | |||
| } | |||
| s->dsp.diff_pixels(s->block[0], ptr_y, dest_y, wrap_y); | |||
| s->dsp.diff_pixels(s->block[1], ptr_y + 8, dest_y + 8, wrap_y); | |||
| s->dsp.diff_pixels(s->block[2], ptr_y + dct_offset, | |||
| dest_y + dct_offset, wrap_y); | |||
| s->dsp.diff_pixels(s->block[3], ptr_y + dct_offset + 8, | |||
| dest_y + dct_offset + 8, wrap_y); | |||
| s->pdsp.diff_pixels(s->block[0], ptr_y, dest_y, wrap_y); | |||
| s->pdsp.diff_pixels(s->block[1], ptr_y + 8, dest_y + 8, wrap_y); | |||
| s->pdsp.diff_pixels(s->block[2], ptr_y + dct_offset, | |||
| dest_y + dct_offset, wrap_y); | |||
| s->pdsp.diff_pixels(s->block[3], ptr_y + dct_offset + 8, | |||
| dest_y + dct_offset + 8, wrap_y); | |||
| if (s->flags & CODEC_FLAG_GRAY) { | |||
| skip_dct[4] = 1; | |||
| skip_dct[5] = 1; | |||
| } else { | |||
| s->dsp.diff_pixels(s->block[4], ptr_cb, dest_cb, wrap_c); | |||
| s->dsp.diff_pixels(s->block[5], ptr_cr, dest_cr, wrap_c); | |||
| s->pdsp.diff_pixels(s->block[4], ptr_cb, dest_cb, wrap_c); | |||
| s->pdsp.diff_pixels(s->block[5], ptr_cr, dest_cr, wrap_c); | |||
| if (!s->chroma_y_shift) { /* 422 */ | |||
| s->dsp.diff_pixels(s->block[6], ptr_cb + (dct_offset >> 1), | |||
| dest_cb + (dct_offset >> 1), wrap_c); | |||
| s->dsp.diff_pixels(s->block[7], ptr_cr + (dct_offset >> 1), | |||
| dest_cr + (dct_offset >> 1), wrap_c); | |||
| s->pdsp.diff_pixels(s->block[6], ptr_cb + (dct_offset >> 1), | |||
| dest_cb + (dct_offset >> 1), wrap_c); | |||
| s->pdsp.diff_pixels(s->block[7], ptr_cr + (dct_offset >> 1), | |||
| dest_cr + (dct_offset >> 1), wrap_c); | |||
| } | |||
| } | |||
| /* pre quantization */ | |||
| @@ -0,0 +1,76 @@ | |||
| /* | |||
| * This file is part of Libav. | |||
| * | |||
| * Libav is free software; you can redistribute it and/or | |||
| * modify it under the terms of the GNU Lesser General Public | |||
| * License as published by the Free Software Foundation; either | |||
| * version 2.1 of the License, or (at your option) any later version. | |||
| * | |||
| * Libav is distributed in the hope that it will be useful, | |||
| * but WITHOUT ANY WARRANTY; without even the implied warranty of | |||
| * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU | |||
| * Lesser General Public License for more details. | |||
| * | |||
| * You should have received a copy of the GNU Lesser General Public | |||
| * License along with Libav; if not, write to the Free Software | |||
| * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA | |||
| */ | |||
| #include <stdint.h> | |||
| #include "config.h" | |||
| #include "libavutil/attributes.h" | |||
| #include "avcodec.h" | |||
| #include "pixblockdsp.h" | |||
| #define BIT_DEPTH 16 | |||
| #include "pixblockdsp_template.c" | |||
| #undef BIT_DEPTH | |||
| #define BIT_DEPTH 8 | |||
| #include "pixblockdsp_template.c" | |||
| static void diff_pixels_c(int16_t *restrict block, const uint8_t *s1, | |||
| const uint8_t *s2, int stride) | |||
| { | |||
| int i; | |||
| /* read the pixels */ | |||
| for (i = 0; i < 8; i++) { | |||
| block[0] = s1[0] - s2[0]; | |||
| block[1] = s1[1] - s2[1]; | |||
| block[2] = s1[2] - s2[2]; | |||
| block[3] = s1[3] - s2[3]; | |||
| block[4] = s1[4] - s2[4]; | |||
| block[5] = s1[5] - s2[5]; | |||
| block[6] = s1[6] - s2[6]; | |||
| block[7] = s1[7] - s2[7]; | |||
| s1 += stride; | |||
| s2 += stride; | |||
| block += 8; | |||
| } | |||
| } | |||
| av_cold void ff_pixblockdsp_init(PixblockDSPContext *c, AVCodecContext *avctx) | |||
| { | |||
| const unsigned high_bit_depth = avctx->bits_per_raw_sample > 8; | |||
| c->diff_pixels = diff_pixels_c; | |||
| switch (avctx->bits_per_raw_sample) { | |||
| case 9: | |||
| case 10: | |||
| c->get_pixels = get_pixels_16_c; | |||
| break; | |||
| default: | |||
| c->get_pixels = get_pixels_8_c; | |||
| break; | |||
| } | |||
| if (ARCH_ARM) | |||
| ff_pixblockdsp_init_arm(c, avctx, high_bit_depth); | |||
| if (ARCH_PPC) | |||
| ff_pixblockdsp_init_ppc(c, avctx, high_bit_depth); | |||
| if (ARCH_X86) | |||
| ff_pixblockdsp_init_x86(c, avctx, high_bit_depth); | |||
| } | |||
| @@ -0,0 +1,44 @@ | |||
| /* | |||
| * This file is part of Libav. | |||
| * | |||
| * Libav is free software; you can redistribute it and/or | |||
| * modify it under the terms of the GNU Lesser General Public | |||
| * License as published by the Free Software Foundation; either | |||
| * version 2.1 of the License, or (at your option) any later version. | |||
| * | |||
| * Libav is distributed in the hope that it will be useful, | |||
| * but WITHOUT ANY WARRANTY; without even the implied warranty of | |||
| * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU | |||
| * Lesser General Public License for more details. | |||
| * | |||
| * You should have received a copy of the GNU Lesser General Public | |||
| * License along with Libav; if not, write to the Free Software | |||
| * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA | |||
| */ | |||
| #ifndef AVCODEC_PIXBLOCKDSP_H | |||
| #define AVCODEC_PIXBLOCKDSP_H | |||
| #include <stdint.h> | |||
| #include "avcodec.h" | |||
| typedef struct PixblockDSPContext { | |||
| void (*get_pixels)(int16_t *block /* align 16 */, | |||
| const uint8_t *pixels /* align 8 */, | |||
| int line_size); | |||
| void (*diff_pixels)(int16_t *block /* align 16 */, | |||
| const uint8_t *s1 /* align 8 */, | |||
| const uint8_t *s2 /* align 8 */, | |||
| int stride); | |||
| } PixblockDSPContext; | |||
| void ff_pixblockdsp_init(PixblockDSPContext *c, AVCodecContext *avctx); | |||
| void ff_pixblockdsp_init_arm(PixblockDSPContext *c, AVCodecContext *avctx, | |||
| unsigned high_bit_depth); | |||
| void ff_pixblockdsp_init_ppc(PixblockDSPContext *c, AVCodecContext *avctx, | |||
| unsigned high_bit_depth); | |||
| void ff_pixblockdsp_init_x86(PixblockDSPContext *c, AVCodecContext *avctx, | |||
| unsigned high_bit_depth); | |||
| #endif /* AVCODEC_PIXBLOCKDSP_H */ | |||
| @@ -1,10 +1,4 @@ | |||
| /* | |||
| * DSP utils | |||
| * Copyright (c) 2000, 2001 Fabrice Bellard | |||
| * Copyright (c) 2002-2004 Michael Niedermayer <michaelni@gmx.at> | |||
| * | |||
| * gmc & q-pel & 32/64 bit based MC by Michael Niedermayer <michaelni@gmx.at> | |||
| * | |||
| * This file is part of Libav. | |||
| * | |||
| * Libav is free software; you can redistribute it and/or | |||
| @@ -22,11 +16,6 @@ | |||
| * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA | |||
| */ | |||
| /** | |||
| * @file | |||
| * DSP utils | |||
| */ | |||
| #include "bit_depth_template.c" | |||
| static void FUNCC(get_pixels)(int16_t *restrict block, const uint8_t *_pixels, | |||
| @@ -15,6 +15,7 @@ OBJS-$(CONFIG_MPEGAUDIODSP) += ppc/mpegaudiodsp_altivec.o | |||
| OBJS-$(CONFIG_MPEGVIDEO) += ppc/mpegvideo_altivec.o \ | |||
| ppc/mpegvideodsp.o | |||
| OBJS-$(CONFIG_MPEGVIDEOENC) += ppc/mpegvideoencdsp.o | |||
| OBJS-$(CONFIG_PIXBLOCKDSP) += ppc/pixblockdsp.o | |||
| OBJS-$(CONFIG_VIDEODSP) += ppc/videodsp_ppc.o | |||
| OBJS-$(CONFIG_VP3DSP) += ppc/vp3dsp_altivec.o | |||
| @@ -402,105 +402,6 @@ static int sse16_altivec(MpegEncContext *v, uint8_t *pix1, uint8_t *pix2, | |||
| return s; | |||
| } | |||
| static void get_pixels_altivec(int16_t *restrict block, const uint8_t *pixels, | |||
| int line_size) | |||
| { | |||
| int i; | |||
| vector unsigned char perm = vec_lvsl(0, pixels); | |||
| const vector unsigned char zero = | |||
| (const vector unsigned char) vec_splat_u8(0); | |||
| for (i = 0; i < 8; i++) { | |||
| /* Read potentially unaligned pixels. | |||
| * We're reading 16 pixels, and actually only want 8, | |||
| * but we simply ignore the extras. */ | |||
| vector unsigned char pixl = vec_ld(0, pixels); | |||
| vector unsigned char pixr = vec_ld(7, pixels); | |||
| vector unsigned char bytes = vec_perm(pixl, pixr, perm); | |||
| // Convert the bytes into shorts. | |||
| vector signed short shorts = (vector signed short) vec_mergeh(zero, | |||
| bytes); | |||
| // Save the data to the block, we assume the block is 16-byte aligned. | |||
| vec_st(shorts, i * 16, (vector signed short *) block); | |||
| pixels += line_size; | |||
| } | |||
| } | |||
| static void diff_pixels_altivec(int16_t *restrict block, const uint8_t *s1, | |||
| const uint8_t *s2, int stride) | |||
| { | |||
| int i; | |||
| vector unsigned char perm1 = vec_lvsl(0, s1); | |||
| vector unsigned char perm2 = vec_lvsl(0, s2); | |||
| const vector unsigned char zero = | |||
| (const vector unsigned char) vec_splat_u8(0); | |||
| vector signed short shorts1, shorts2; | |||
| for (i = 0; i < 4; i++) { | |||
| /* Read potentially unaligned pixels. | |||
| * We're reading 16 pixels, and actually only want 8, | |||
| * but we simply ignore the extras. */ | |||
| vector unsigned char pixl = vec_ld(0, s1); | |||
| vector unsigned char pixr = vec_ld(15, s1); | |||
| vector unsigned char bytes = vec_perm(pixl, pixr, perm1); | |||
| // Convert the bytes into shorts. | |||
| shorts1 = (vector signed short) vec_mergeh(zero, bytes); | |||
| // Do the same for the second block of pixels. | |||
| pixl = vec_ld(0, s2); | |||
| pixr = vec_ld(15, s2); | |||
| bytes = vec_perm(pixl, pixr, perm2); | |||
| // Convert the bytes into shorts. | |||
| shorts2 = (vector signed short) vec_mergeh(zero, bytes); | |||
| // Do the subtraction. | |||
| shorts1 = vec_sub(shorts1, shorts2); | |||
| // Save the data to the block, we assume the block is 16-byte aligned. | |||
| vec_st(shorts1, 0, (vector signed short *) block); | |||
| s1 += stride; | |||
| s2 += stride; | |||
| block += 8; | |||
| /* The code below is a copy of the code above... | |||
| * This is a manual unroll. */ | |||
| /* Read potentially unaligned pixels. | |||
| * We're reading 16 pixels, and actually only want 8, | |||
| * but we simply ignore the extras. */ | |||
| pixl = vec_ld(0, s1); | |||
| pixr = vec_ld(15, s1); | |||
| bytes = vec_perm(pixl, pixr, perm1); | |||
| // Convert the bytes into shorts. | |||
| shorts1 = (vector signed short) vec_mergeh(zero, bytes); | |||
| // Do the same for the second block of pixels. | |||
| pixl = vec_ld(0, s2); | |||
| pixr = vec_ld(15, s2); | |||
| bytes = vec_perm(pixl, pixr, perm2); | |||
| // Convert the bytes into shorts. | |||
| shorts2 = (vector signed short) vec_mergeh(zero, bytes); | |||
| // Do the subtraction. | |||
| shorts1 = vec_sub(shorts1, shorts2); | |||
| // Save the data to the block, we assume the block is 16-byte aligned. | |||
| vec_st(shorts1, 0, (vector signed short *) block); | |||
| s1 += stride; | |||
| s2 += stride; | |||
| block += 8; | |||
| } | |||
| } | |||
| static int hadamard8_diff8x8_altivec(MpegEncContext *s, uint8_t *dst, | |||
| uint8_t *src, int stride, int h) | |||
| { | |||
| @@ -854,12 +755,6 @@ av_cold void ff_dsputil_init_altivec(DSPContext *c, AVCodecContext *avctx, | |||
| c->sse[0] = sse16_altivec; | |||
| c->sse[1] = sse8_altivec; | |||
| c->diff_pixels = diff_pixels_altivec; | |||
| if (!high_bit_depth) { | |||
| c->get_pixels = get_pixels_altivec; | |||
| } | |||
| c->hadamard8_diff[0] = hadamard8_diff16_altivec; | |||
| c->hadamard8_diff[1] = hadamard8_diff8x8_altivec; | |||
| } | |||
| @@ -0,0 +1,153 @@ | |||
| /* | |||
| * Copyright (c) 2002 Brian Foley | |||
| * Copyright (c) 2002 Dieter Shirley | |||
| * Copyright (c) 2003-2004 Romain Dolbeau <romain@dolbeau.org> | |||
| * | |||
| * This file is part of Libav. | |||
| * | |||
| * Libav is free software; you can redistribute it and/or | |||
| * modify it under the terms of the GNU Lesser General Public | |||
| * License as published by the Free Software Foundation; either | |||
| * version 2.1 of the License, or (at your option) any later version. | |||
| * | |||
| * Libav is distributed in the hope that it will be useful, | |||
| * but WITHOUT ANY WARRANTY; without even the implied warranty of | |||
| * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU | |||
| * Lesser General Public License for more details. | |||
| * | |||
| * You should have received a copy of the GNU Lesser General Public | |||
| * License along with Libav; if not, write to the Free Software | |||
| * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA | |||
| */ | |||
| #include "config.h" | |||
| #if HAVE_ALTIVEC_H | |||
| #include <altivec.h> | |||
| #endif | |||
| #include "libavutil/attributes.h" | |||
| #include "libavutil/cpu.h" | |||
| #include "libavutil/ppc/cpu.h" | |||
| #include "libavutil/ppc/types_altivec.h" | |||
| #include "libavutil/ppc/util_altivec.h" | |||
| #include "libavcodec/avcodec.h" | |||
| #include "libavcodec/pixblockdsp.h" | |||
| #if HAVE_ALTIVEC | |||
| static void get_pixels_altivec(int16_t *restrict block, const uint8_t *pixels, | |||
| int line_size) | |||
| { | |||
| int i; | |||
| vector unsigned char perm = vec_lvsl(0, pixels); | |||
| const vector unsigned char zero = | |||
| (const vector unsigned char) vec_splat_u8(0); | |||
| for (i = 0; i < 8; i++) { | |||
| /* Read potentially unaligned pixels. | |||
| * We're reading 16 pixels, and actually only want 8, | |||
| * but we simply ignore the extras. */ | |||
| vector unsigned char pixl = vec_ld(0, pixels); | |||
| vector unsigned char pixr = vec_ld(7, pixels); | |||
| vector unsigned char bytes = vec_perm(pixl, pixr, perm); | |||
| // Convert the bytes into shorts. | |||
| vector signed short shorts = (vector signed short) vec_mergeh(zero, | |||
| bytes); | |||
| // Save the data to the block, we assume the block is 16-byte aligned. | |||
| vec_st(shorts, i * 16, (vector signed short *) block); | |||
| pixels += line_size; | |||
| } | |||
| } | |||
| static void diff_pixels_altivec(int16_t *restrict block, const uint8_t *s1, | |||
| const uint8_t *s2, int stride) | |||
| { | |||
| int i; | |||
| vector unsigned char perm1 = vec_lvsl(0, s1); | |||
| vector unsigned char perm2 = vec_lvsl(0, s2); | |||
| const vector unsigned char zero = | |||
| (const vector unsigned char) vec_splat_u8(0); | |||
| vector signed short shorts1, shorts2; | |||
| for (i = 0; i < 4; i++) { | |||
| /* Read potentially unaligned pixels. | |||
| * We're reading 16 pixels, and actually only want 8, | |||
| * but we simply ignore the extras. */ | |||
| vector unsigned char pixl = vec_ld(0, s1); | |||
| vector unsigned char pixr = vec_ld(15, s1); | |||
| vector unsigned char bytes = vec_perm(pixl, pixr, perm1); | |||
| // Convert the bytes into shorts. | |||
| shorts1 = (vector signed short) vec_mergeh(zero, bytes); | |||
| // Do the same for the second block of pixels. | |||
| pixl = vec_ld(0, s2); | |||
| pixr = vec_ld(15, s2); | |||
| bytes = vec_perm(pixl, pixr, perm2); | |||
| // Convert the bytes into shorts. | |||
| shorts2 = (vector signed short) vec_mergeh(zero, bytes); | |||
| // Do the subtraction. | |||
| shorts1 = vec_sub(shorts1, shorts2); | |||
| // Save the data to the block, we assume the block is 16-byte aligned. | |||
| vec_st(shorts1, 0, (vector signed short *) block); | |||
| s1 += stride; | |||
| s2 += stride; | |||
| block += 8; | |||
| /* The code below is a copy of the code above... | |||
| * This is a manual unroll. */ | |||
| /* Read potentially unaligned pixels. | |||
| * We're reading 16 pixels, and actually only want 8, | |||
| * but we simply ignore the extras. */ | |||
| pixl = vec_ld(0, s1); | |||
| pixr = vec_ld(15, s1); | |||
| bytes = vec_perm(pixl, pixr, perm1); | |||
| // Convert the bytes into shorts. | |||
| shorts1 = (vector signed short) vec_mergeh(zero, bytes); | |||
| // Do the same for the second block of pixels. | |||
| pixl = vec_ld(0, s2); | |||
| pixr = vec_ld(15, s2); | |||
| bytes = vec_perm(pixl, pixr, perm2); | |||
| // Convert the bytes into shorts. | |||
| shorts2 = (vector signed short) vec_mergeh(zero, bytes); | |||
| // Do the subtraction. | |||
| shorts1 = vec_sub(shorts1, shorts2); | |||
| // Save the data to the block, we assume the block is 16-byte aligned. | |||
| vec_st(shorts1, 0, (vector signed short *) block); | |||
| s1 += stride; | |||
| s2 += stride; | |||
| block += 8; | |||
| } | |||
| } | |||
| #endif /* HAVE_ALTIVEC */ | |||
| av_cold void ff_pixblockdsp_init_ppc(PixblockDSPContext *c, | |||
| AVCodecContext *avctx, | |||
| unsigned high_bit_depth) | |||
| { | |||
| #if HAVE_ALTIVEC | |||
| if (!PPC_ALTIVEC(av_get_cpu_flags())) | |||
| return; | |||
| c->diff_pixels = diff_pixels_altivec; | |||
| if (!high_bit_depth) { | |||
| c->get_pixels = get_pixels_altivec; | |||
| } | |||
| #endif /* HAVE_ALTIVEC */ | |||
| } | |||
| @@ -26,6 +26,7 @@ OBJS-$(CONFIG_MPEGVIDEO) += x86/mpegvideo.o \ | |||
| x86/mpegvideodsp.o | |||
| OBJS-$(CONFIG_MPEGVIDEOENC) += x86/mpegvideoenc.o \ | |||
| x86/mpegvideoencdsp_init.o | |||
| OBJS-$(CONFIG_PIXBLOCKDSP) += x86/pixblockdsp_init.o | |||
| OBJS-$(CONFIG_QPELDSP) += x86/qpeldsp_init.o | |||
| OBJS-$(CONFIG_VIDEODSP) += x86/videodsp_init.o | |||
| OBJS-$(CONFIG_VP3DSP) += x86/vp3dsp_init.o | |||
| @@ -93,6 +94,7 @@ YASM-OBJS-$(CONFIG_HPELDSP) += x86/fpel.o \ | |||
| YASM-OBJS-$(CONFIG_HUFFYUVDSP) += x86/huffyuvdsp.o | |||
| YASM-OBJS-$(CONFIG_MPEGAUDIODSP) += x86/imdct36.o | |||
| YASM-OBJS-$(CONFIG_MPEGVIDEOENC) += x86/mpegvideoencdsp.o | |||
| YASM-OBJS-$(CONFIG_PIXBLOCKDSP) += x86/pixblockdsp.o | |||
| YASM-OBJS-$(CONFIG_QPELDSP) += x86/qpeldsp.o \ | |||
| x86/fpel.o \ | |||
| x86/qpel.o | |||
| @@ -334,87 +334,3 @@ cglobal sse16, 5, 5, 8 | |||
| paddd m7, m1 | |||
| movd eax, m7 ; return value | |||
| RET | |||
| INIT_MMX mmx | |||
| ; void ff_get_pixels_mmx(int16_t *block, const uint8_t *pixels, int line_size) | |||
| cglobal get_pixels, 3,4 | |||
| movsxdifnidn r2, r2d | |||
| add r0, 128 | |||
| mov r3, -128 | |||
| pxor m7, m7 | |||
| .loop: | |||
| mova m0, [r1] | |||
| mova m2, [r1+r2] | |||
| mova m1, m0 | |||
| mova m3, m2 | |||
| punpcklbw m0, m7 | |||
| punpckhbw m1, m7 | |||
| punpcklbw m2, m7 | |||
| punpckhbw m3, m7 | |||
| mova [r0+r3+ 0], m0 | |||
| mova [r0+r3+ 8], m1 | |||
| mova [r0+r3+16], m2 | |||
| mova [r0+r3+24], m3 | |||
| lea r1, [r1+r2*2] | |||
| add r3, 32 | |||
| js .loop | |||
| REP_RET | |||
| INIT_XMM sse2 | |||
| cglobal get_pixels, 3, 4 | |||
| movsxdifnidn r2, r2d | |||
| lea r3, [r2*3] | |||
| pxor m4, m4 | |||
| movh m0, [r1] | |||
| movh m1, [r1+r2] | |||
| movh m2, [r1+r2*2] | |||
| movh m3, [r1+r3] | |||
| lea r1, [r1+r2*4] | |||
| punpcklbw m0, m4 | |||
| punpcklbw m1, m4 | |||
| punpcklbw m2, m4 | |||
| punpcklbw m3, m4 | |||
| mova [r0], m0 | |||
| mova [r0+0x10], m1 | |||
| mova [r0+0x20], m2 | |||
| mova [r0+0x30], m3 | |||
| movh m0, [r1] | |||
| movh m1, [r1+r2*1] | |||
| movh m2, [r1+r2*2] | |||
| movh m3, [r1+r3] | |||
| punpcklbw m0, m4 | |||
| punpcklbw m1, m4 | |||
| punpcklbw m2, m4 | |||
| punpcklbw m3, m4 | |||
| mova [r0+0x40], m0 | |||
| mova [r0+0x50], m1 | |||
| mova [r0+0x60], m2 | |||
| mova [r0+0x70], m3 | |||
| RET | |||
| INIT_MMX mmx | |||
| ; void ff_diff_pixels_mmx(int16_t *block, const uint8_t *s1, const uint8_t *s2, | |||
| ; int stride); | |||
| cglobal diff_pixels, 4,5 | |||
| movsxdifnidn r3, r3d | |||
| pxor m7, m7 | |||
| add r0, 128 | |||
| mov r4, -128 | |||
| .loop: | |||
| mova m0, [r1] | |||
| mova m2, [r2] | |||
| mova m1, m0 | |||
| mova m3, m2 | |||
| punpcklbw m0, m7 | |||
| punpckhbw m1, m7 | |||
| punpcklbw m2, m7 | |||
| punpckhbw m3, m7 | |||
| psubw m0, m2 | |||
| psubw m1, m3 | |||
| mova [r0+r4+0], m0 | |||
| mova [r0+r4+8], m1 | |||
| add r1, r3 | |||
| add r2, r3 | |||
| add r4, 16 | |||
| jne .loop | |||
| REP_RET | |||
| @@ -30,11 +30,6 @@ | |||
| #include "libavcodec/mpegvideo.h" | |||
| #include "dsputil_x86.h" | |||
| void ff_get_pixels_mmx(int16_t *block, const uint8_t *pixels, int line_size); | |||
| void ff_get_pixels_sse2(int16_t *block, const uint8_t *pixels, int line_size); | |||
| void ff_diff_pixels_mmx(int16_t *block, const uint8_t *s1, const uint8_t *s2, | |||
| int stride); | |||
| #if HAVE_INLINE_ASM | |||
| static int sse8_mmx(MpegEncContext *v, uint8_t *pix1, uint8_t *pix2, | |||
| @@ -823,16 +818,6 @@ av_cold void ff_dsputilenc_init_mmx(DSPContext *c, AVCodecContext *avctx, | |||
| { | |||
| int cpu_flags = av_get_cpu_flags(); | |||
| if (EXTERNAL_MMX(cpu_flags)) { | |||
| if (!high_bit_depth) | |||
| c->get_pixels = ff_get_pixels_mmx; | |||
| c->diff_pixels = ff_diff_pixels_mmx; | |||
| } | |||
| if (EXTERNAL_SSE2(cpu_flags)) | |||
| if (!high_bit_depth) | |||
| c->get_pixels = ff_get_pixels_sse2; | |||
| #if HAVE_INLINE_ASM | |||
| if (INLINE_MMX(cpu_flags)) { | |||
| c->sum_abs_dctelem = sum_abs_dctelem_mmx; | |||
| @@ -0,0 +1,110 @@ | |||
| ;***************************************************************************** | |||
| ;* SIMD-optimized pixel operations | |||
| ;***************************************************************************** | |||
| ;* Copyright (c) 2000, 2001 Fabrice Bellard | |||
| ;* Copyright (c) 2002-2004 Michael Niedermayer <michaelni@gmx.at> | |||
| ;* | |||
| ;* This file is part of Libav. | |||
| ;* | |||
| ;* Libav is free software; you can redistribute it and/or | |||
| ;* modify it under the terms of the GNU Lesser General Public | |||
| ;* License as published by the Free Software Foundation; either | |||
| ;* version 2.1 of the License, or (at your option) any later version. | |||
| ;* | |||
| ;* Libav is distributed in the hope that it will be useful, | |||
| ;* but WITHOUT ANY WARRANTY; without even the implied warranty of | |||
| ;* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU | |||
| ;* Lesser General Public License for more details. | |||
| ;* | |||
| ;* You should have received a copy of the GNU Lesser General Public | |||
| ;* License along with Libav; if not, write to the Free Software | |||
| ;* Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA | |||
| ;***************************************************************************** | |||
| %include "libavutil/x86/x86util.asm" | |||
| SECTION .text | |||
| INIT_MMX mmx | |||
| ; void ff_get_pixels_mmx(int16_t *block, const uint8_t *pixels, int line_size) | |||
| cglobal get_pixels, 3,4 | |||
| movsxdifnidn r2, r2d | |||
| add r0, 128 | |||
| mov r3, -128 | |||
| pxor m7, m7 | |||
| .loop: | |||
| mova m0, [r1] | |||
| mova m2, [r1+r2] | |||
| mova m1, m0 | |||
| mova m3, m2 | |||
| punpcklbw m0, m7 | |||
| punpckhbw m1, m7 | |||
| punpcklbw m2, m7 | |||
| punpckhbw m3, m7 | |||
| mova [r0+r3+ 0], m0 | |||
| mova [r0+r3+ 8], m1 | |||
| mova [r0+r3+16], m2 | |||
| mova [r0+r3+24], m3 | |||
| lea r1, [r1+r2*2] | |||
| add r3, 32 | |||
| js .loop | |||
| REP_RET | |||
| INIT_XMM sse2 | |||
| cglobal get_pixels, 3, 4 | |||
| movsxdifnidn r2, r2d | |||
| lea r3, [r2*3] | |||
| pxor m4, m4 | |||
| movh m0, [r1] | |||
| movh m1, [r1+r2] | |||
| movh m2, [r1+r2*2] | |||
| movh m3, [r1+r3] | |||
| lea r1, [r1+r2*4] | |||
| punpcklbw m0, m4 | |||
| punpcklbw m1, m4 | |||
| punpcklbw m2, m4 | |||
| punpcklbw m3, m4 | |||
| mova [r0], m0 | |||
| mova [r0+0x10], m1 | |||
| mova [r0+0x20], m2 | |||
| mova [r0+0x30], m3 | |||
| movh m0, [r1] | |||
| movh m1, [r1+r2*1] | |||
| movh m2, [r1+r2*2] | |||
| movh m3, [r1+r3] | |||
| punpcklbw m0, m4 | |||
| punpcklbw m1, m4 | |||
| punpcklbw m2, m4 | |||
| punpcklbw m3, m4 | |||
| mova [r0+0x40], m0 | |||
| mova [r0+0x50], m1 | |||
| mova [r0+0x60], m2 | |||
| mova [r0+0x70], m3 | |||
| RET | |||
| INIT_MMX mmx | |||
| ; void ff_diff_pixels_mmx(int16_t *block, const uint8_t *s1, const uint8_t *s2, | |||
| ; int stride); | |||
| cglobal diff_pixels, 4,5 | |||
| movsxdifnidn r3, r3d | |||
| pxor m7, m7 | |||
| add r0, 128 | |||
| mov r4, -128 | |||
| .loop: | |||
| mova m0, [r1] | |||
| mova m2, [r2] | |||
| mova m1, m0 | |||
| mova m3, m2 | |||
| punpcklbw m0, m7 | |||
| punpckhbw m1, m7 | |||
| punpcklbw m2, m7 | |||
| punpckhbw m3, m7 | |||
| psubw m0, m2 | |||
| psubw m1, m3 | |||
| mova [r0+r4+0], m0 | |||
| mova [r0+r4+8], m1 | |||
| add r1, r3 | |||
| add r2, r3 | |||
| add r4, 16 | |||
| jne .loop | |||
| REP_RET | |||
| @@ -0,0 +1,47 @@ | |||
| /* | |||
| * SIMD-optimized pixel operations | |||
| * | |||
| * This file is part of Libav. | |||
| * | |||
| * Libav is free software; you can redistribute it and/or | |||
| * modify it under the terms of the GNU Lesser General Public | |||
| * License as published by the Free Software Foundation; either | |||
| * version 2.1 of the License, or (at your option) any later version. | |||
| * | |||
| * Libav is distributed in the hope that it will be useful, | |||
| * but WITHOUT ANY WARRANTY; without even the implied warranty of | |||
| * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU | |||
| * Lesser General Public License for more details. | |||
| * | |||
| * You should have received a copy of the GNU Lesser General Public | |||
| * License along with Libav; if not, write to the Free Software | |||
| * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA | |||
| */ | |||
| #include "libavutil/attributes.h" | |||
| #include "libavutil/cpu.h" | |||
| #include "libavutil/x86/cpu.h" | |||
| #include "libavcodec/pixblockdsp.h" | |||
| void ff_get_pixels_mmx(int16_t *block, const uint8_t *pixels, int line_size); | |||
| void ff_get_pixels_sse2(int16_t *block, const uint8_t *pixels, int line_size); | |||
| void ff_diff_pixels_mmx(int16_t *block, const uint8_t *s1, const uint8_t *s2, | |||
| int stride); | |||
| av_cold void ff_pixblockdsp_init_x86(PixblockDSPContext *c, | |||
| AVCodecContext *avctx, | |||
| unsigned high_bit_depth) | |||
| { | |||
| int cpu_flags = av_get_cpu_flags(); | |||
| if (EXTERNAL_MMX(cpu_flags)) { | |||
| if (!high_bit_depth) | |||
| c->get_pixels = ff_get_pixels_mmx; | |||
| c->diff_pixels = ff_diff_pixels_mmx; | |||
| } | |||
| if (EXTERNAL_SSE2(cpu_flags)) { | |||
| if (!high_bit_depth) | |||
| c->get_pixels = ff_get_pixels_sse2; | |||
| } | |||
| } | |||