* commit 'f46bb608d9d76c543e4929dc8cffe36b84bd789e': dsputil: Split off pixel block routines into their own context Conflicts: configure libavcodec/dsputil.c libavcodec/mpegvideo_enc.c libavcodec/pixblockdsp_template.c libavcodec/x86/dsputilenc.asm libavcodec/x86/dsputilenc_mmx.c Merged-by: Michael Niedermayer <michaelni@gmx.at>tags/n2.3
@@ -1829,6 +1829,7 @@ CONFIG_EXTRA=" | |||||
mpegvideo | mpegvideo | ||||
mpegvideoenc | mpegvideoenc | ||||
nettle | nettle | ||||
pixblockdsp | |||||
qpeldsp | qpeldsp | ||||
rangecoder | rangecoder | ||||
riffdec | riffdec | ||||
@@ -1997,7 +1998,7 @@ threads_if_any="$THREADS_LIST" | |||||
# subsystems | # subsystems | ||||
dct_select="rdft" | dct_select="rdft" | ||||
dsputil_select="fdctdsp idctdsp" | |||||
dsputil_select="fdctdsp idctdsp pixblockdsp" | |||||
error_resilience_select="dsputil" | error_resilience_select="dsputil" | ||||
frame_thread_encoder_deps="encoders threads" | frame_thread_encoder_deps="encoders threads" | ||||
intrax8_select="error_resilience" | intrax8_select="error_resilience" | ||||
@@ -2007,7 +2008,7 @@ mpeg_er_select="error_resilience" | |||||
mpegaudio_select="mpegaudiodsp" | mpegaudio_select="mpegaudiodsp" | ||||
mpegaudiodsp_select="dct" | mpegaudiodsp_select="dct" | ||||
mpegvideo_select="blockdsp dsputil h264chroma hpeldsp idctdsp videodsp" | mpegvideo_select="blockdsp dsputil h264chroma hpeldsp idctdsp videodsp" | ||||
mpegvideoenc_select="dsputil mpegvideo qpeldsp" | |||||
mpegvideoenc_select="dsputil mpegvideo pixblockdsp qpeldsp" | |||||
# decoders / encoders | # decoders / encoders | ||||
aac_decoder_select="mdct sinewin" | aac_decoder_select="mdct sinewin" | ||||
@@ -2026,9 +2027,9 @@ amv_decoder_select="sp5x_decoder exif" | |||||
amv_encoder_select="aandcttables mpegvideoenc" | amv_encoder_select="aandcttables mpegvideoenc" | ||||
ape_decoder_select="bswapdsp llauddsp" | ape_decoder_select="bswapdsp llauddsp" | ||||
asv1_decoder_select="blockdsp bswapdsp idctdsp" | asv1_decoder_select="blockdsp bswapdsp idctdsp" | ||||
asv1_encoder_select="bswapdsp dsputil fdctdsp" | |||||
asv1_encoder_select="bswapdsp fdctdsp pixblockdsp" | |||||
asv2_decoder_select="blockdsp bswapdsp idctdsp" | asv2_decoder_select="blockdsp bswapdsp idctdsp" | ||||
asv2_encoder_select="bswapdsp dsputil fdctdsp" | |||||
asv2_encoder_select="bswapdsp fdctdsp pixblockdsp" | |||||
atrac1_decoder_select="mdct sinewin" | atrac1_decoder_select="mdct sinewin" | ||||
atrac3_decoder_select="mdct" | atrac3_decoder_select="mdct" | ||||
atrac3p_decoder_select="mdct sinewin" | atrac3p_decoder_select="mdct sinewin" | ||||
@@ -2045,9 +2046,9 @@ cscd_decoder_suggest="zlib" | |||||
dca_decoder_select="mdct" | dca_decoder_select="mdct" | ||||
dirac_decoder_select="dsputil dwt golomb videodsp" | dirac_decoder_select="dsputil dwt golomb videodsp" | ||||
dnxhd_decoder_select="blockdsp idctdsp" | dnxhd_decoder_select="blockdsp idctdsp" | ||||
dnxhd_encoder_select="aandcttables blockdsp dsputil fdctdsp idctdsp mpegvideoenc" | |||||
dnxhd_encoder_select="aandcttables blockdsp fdctdsp idctdsp mpegvideoenc pixblockdsp" | |||||
dvvideo_decoder_select="dvprofile idctdsp" | dvvideo_decoder_select="dvprofile idctdsp" | ||||
dvvideo_encoder_select="dsputil dvprofile fdctdsp" | |||||
dvvideo_encoder_select="dsputil dvprofile fdctdsp pixblockdsp" | |||||
dxa_decoder_select="zlib" | dxa_decoder_select="zlib" | ||||
eac3_decoder_select="ac3_decoder" | eac3_decoder_select="ac3_decoder" | ||||
eac3_encoder_select="ac3_encoder" | eac3_encoder_select="ac3_encoder" | ||||
@@ -82,6 +82,7 @@ OBJS-$(CONFIG_MPEGVIDEO) += mpegvideo.o mpegvideodsp.o \ | |||||
OBJS-$(CONFIG_MPEGVIDEOENC) += mpegvideo_enc.o mpeg12data.o \ | OBJS-$(CONFIG_MPEGVIDEOENC) += mpegvideo_enc.o mpeg12data.o \ | ||||
motion_est.o ratecontrol.o \ | motion_est.o ratecontrol.o \ | ||||
mpegvideoencdsp.o | mpegvideoencdsp.o | ||||
OBJS-$(CONFIG_PIXBLOCKDSP) += pixblockdsp.o | |||||
OBJS-$(CONFIG_QPELDSP) += qpeldsp.o | OBJS-$(CONFIG_QPELDSP) += qpeldsp.o | ||||
OBJS-$(CONFIG_RANGECODER) += rangecoder.o | OBJS-$(CONFIG_RANGECODER) += rangecoder.o | ||||
RDFT-OBJS-$(CONFIG_HARDCODED_TABLES) += sin_tables.o | RDFT-OBJS-$(CONFIG_HARDCODED_TABLES) += sin_tables.o | ||||
@@ -24,6 +24,7 @@ OBJS-$(CONFIG_MPEGAUDIODSP) += arm/mpegaudiodsp_init_arm.o | |||||
OBJS-$(CONFIG_MPEGVIDEO) += arm/mpegvideo_arm.o | OBJS-$(CONFIG_MPEGVIDEO) += arm/mpegvideo_arm.o | ||||
OBJS-$(CONFIG_MPEGVIDEOENC) += arm/mpegvideoencdsp_init_arm.o | OBJS-$(CONFIG_MPEGVIDEOENC) += arm/mpegvideoencdsp_init_arm.o | ||||
OBJS-$(CONFIG_NEON_CLOBBER_TEST) += arm/neontest.o | OBJS-$(CONFIG_NEON_CLOBBER_TEST) += arm/neontest.o | ||||
OBJS-$(CONFIG_PIXBLOCKDSP) += arm/pixblockdsp_init_arm.o | |||||
OBJS-$(CONFIG_VIDEODSP) += arm/videodsp_init_arm.o | OBJS-$(CONFIG_VIDEODSP) += arm/videodsp_init_arm.o | ||||
OBJS-$(CONFIG_VP3DSP) += arm/vp3dsp_init_arm.o | OBJS-$(CONFIG_VP3DSP) += arm/vp3dsp_init_arm.o | ||||
@@ -63,6 +64,7 @@ ARMV6-OBJS-$(CONFIG_IDCTDSP) += arm/idctdsp_init_armv6.o \ | |||||
arm/simple_idct_armv6.o | arm/simple_idct_armv6.o | ||||
ARMV6-OBJS-$(CONFIG_MPEGAUDIODSP) += arm/mpegaudiodsp_fixed_armv6.o | ARMV6-OBJS-$(CONFIG_MPEGAUDIODSP) += arm/mpegaudiodsp_fixed_armv6.o | ||||
ARMV6-OBJS-$(CONFIG_MPEGVIDEOENC) += arm/mpegvideoencdsp_armv6.o | ARMV6-OBJS-$(CONFIG_MPEGVIDEOENC) += arm/mpegvideoencdsp_armv6.o | ||||
ARMV6-OBJS-$(CONFIG_PIXBLOCKDSP) += arm/pixblockdsp_armv6.o | |||||
ARMV6-OBJS-$(CONFIG_MLP_DECODER) += arm/mlpdsp_armv6.o | ARMV6-OBJS-$(CONFIG_MLP_DECODER) += arm/mlpdsp_armv6.o | ||||
ARMV6-OBJS-$(CONFIG_VC1_DECODER) += arm/startcode_armv6.o | ARMV6-OBJS-$(CONFIG_VC1_DECODER) += arm/startcode_armv6.o | ||||
@@ -20,61 +20,6 @@ | |||||
#include "libavutil/arm/asm.S" | #include "libavutil/arm/asm.S" | ||||
function ff_get_pixels_armv6, export=1 | |||||
pld [r1, r2] | |||||
push {r4-r8, lr} | |||||
mov lr, #8 | |||||
1: | |||||
ldrd_post r4, r5, r1, r2 | |||||
subs lr, lr, #1 | |||||
uxtb16 r6, r4 | |||||
uxtb16 r4, r4, ror #8 | |||||
uxtb16 r12, r5 | |||||
uxtb16 r8, r5, ror #8 | |||||
pld [r1, r2] | |||||
pkhbt r5, r6, r4, lsl #16 | |||||
pkhtb r6, r4, r6, asr #16 | |||||
pkhbt r7, r12, r8, lsl #16 | |||||
pkhtb r12, r8, r12, asr #16 | |||||
stm r0!, {r5,r6,r7,r12} | |||||
bgt 1b | |||||
pop {r4-r8, pc} | |||||
endfunc | |||||
function ff_diff_pixels_armv6, export=1 | |||||
pld [r1, r3] | |||||
pld [r2, r3] | |||||
push {r4-r9, lr} | |||||
mov lr, #8 | |||||
1: | |||||
ldrd_post r4, r5, r1, r3 | |||||
ldrd_post r6, r7, r2, r3 | |||||
uxtb16 r8, r4 | |||||
uxtb16 r4, r4, ror #8 | |||||
uxtb16 r9, r6 | |||||
uxtb16 r6, r6, ror #8 | |||||
pld [r1, r3] | |||||
ssub16 r9, r8, r9 | |||||
ssub16 r6, r4, r6 | |||||
uxtb16 r8, r5 | |||||
uxtb16 r5, r5, ror #8 | |||||
pld [r2, r3] | |||||
pkhbt r4, r9, r6, lsl #16 | |||||
pkhtb r6, r6, r9, asr #16 | |||||
uxtb16 r9, r7 | |||||
uxtb16 r7, r7, ror #8 | |||||
ssub16 r9, r8, r9 | |||||
ssub16 r5, r5, r7 | |||||
subs lr, lr, #1 | |||||
pkhbt r8, r9, r5, lsl #16 | |||||
pkhtb r9, r5, r9, asr #16 | |||||
stm r0!, {r4,r6,r8,r9} | |||||
bgt 1b | |||||
pop {r4-r9, pc} | |||||
endfunc | |||||
function ff_pix_abs16_armv6, export=1 | function ff_pix_abs16_armv6, export=1 | ||||
ldr r0, [sp] | ldr r0, [sp] | ||||
push {r4-r9, lr} | push {r4-r9, lr} | ||||
@@ -26,10 +26,6 @@ | |||||
#include "libavcodec/mpegvideo.h" | #include "libavcodec/mpegvideo.h" | ||||
#include "dsputil_arm.h" | #include "dsputil_arm.h" | ||||
void ff_get_pixels_armv6(int16_t *block, const uint8_t *pixels, int stride); | |||||
void ff_diff_pixels_armv6(int16_t *block, const uint8_t *s1, | |||||
const uint8_t *s2, int stride); | |||||
int ff_pix_abs16_armv6(MpegEncContext *s, uint8_t *blk1, uint8_t *blk2, | int ff_pix_abs16_armv6(MpegEncContext *s, uint8_t *blk1, uint8_t *blk2, | ||||
int line_size, int h); | int line_size, int h); | ||||
int ff_pix_abs16_x2_armv6(MpegEncContext *s, uint8_t *blk1, uint8_t *blk2, | int ff_pix_abs16_x2_armv6(MpegEncContext *s, uint8_t *blk1, uint8_t *blk2, | ||||
@@ -46,10 +42,6 @@ int ff_sse16_armv6(MpegEncContext *s, uint8_t *blk1, uint8_t *blk2, | |||||
av_cold void ff_dsputil_init_armv6(DSPContext *c, AVCodecContext *avctx, | av_cold void ff_dsputil_init_armv6(DSPContext *c, AVCodecContext *avctx, | ||||
unsigned high_bit_depth) | unsigned high_bit_depth) | ||||
{ | { | ||||
if (!high_bit_depth) | |||||
c->get_pixels = ff_get_pixels_armv6; | |||||
c->diff_pixels = ff_diff_pixels_armv6; | |||||
c->pix_abs[0][0] = ff_pix_abs16_armv6; | c->pix_abs[0][0] = ff_pix_abs16_armv6; | ||||
c->pix_abs[0][1] = ff_pix_abs16_x2_armv6; | c->pix_abs[0][1] = ff_pix_abs16_x2_armv6; | ||||
c->pix_abs[0][2] = ff_pix_abs16_y2_armv6; | c->pix_abs[0][2] = ff_pix_abs16_y2_armv6; | ||||
@@ -0,0 +1,76 @@ | |||||
/* | |||||
* Copyright (c) 2009 Mans Rullgard <mans@mansr.com> | |||||
* | |||||
* This file is part of FFmpeg. | |||||
* | |||||
* FFmpeg is free software; you can redistribute it and/or | |||||
* modify it under the terms of the GNU Lesser General Public | |||||
* License as published by the Free Software Foundation; either | |||||
* version 2.1 of the License, or (at your option) any later version. | |||||
* | |||||
* FFmpeg is distributed in the hope that it will be useful, | |||||
* but WITHOUT ANY WARRANTY; without even the implied warranty of | |||||
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU | |||||
* Lesser General Public License for more details. | |||||
* | |||||
* You should have received a copy of the GNU Lesser General Public | |||||
* License along with FFmpeg; if not, write to the Free Software | |||||
* Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA | |||||
*/ | |||||
#include "libavutil/arm/asm.S" | |||||
function ff_get_pixels_armv6, export=1 | |||||
pld [r1, r2] | |||||
push {r4-r8, lr} | |||||
mov lr, #8 | |||||
1: | |||||
ldrd_post r4, r5, r1, r2 | |||||
subs lr, lr, #1 | |||||
uxtb16 r6, r4 | |||||
uxtb16 r4, r4, ror #8 | |||||
uxtb16 r12, r5 | |||||
uxtb16 r8, r5, ror #8 | |||||
pld [r1, r2] | |||||
pkhbt r5, r6, r4, lsl #16 | |||||
pkhtb r6, r4, r6, asr #16 | |||||
pkhbt r7, r12, r8, lsl #16 | |||||
pkhtb r12, r8, r12, asr #16 | |||||
stm r0!, {r5,r6,r7,r12} | |||||
bgt 1b | |||||
pop {r4-r8, pc} | |||||
endfunc | |||||
function ff_diff_pixels_armv6, export=1 | |||||
pld [r1, r3] | |||||
pld [r2, r3] | |||||
push {r4-r9, lr} | |||||
mov lr, #8 | |||||
1: | |||||
ldrd_post r4, r5, r1, r3 | |||||
ldrd_post r6, r7, r2, r3 | |||||
uxtb16 r8, r4 | |||||
uxtb16 r4, r4, ror #8 | |||||
uxtb16 r9, r6 | |||||
uxtb16 r6, r6, ror #8 | |||||
pld [r1, r3] | |||||
ssub16 r9, r8, r9 | |||||
ssub16 r6, r4, r6 | |||||
uxtb16 r8, r5 | |||||
uxtb16 r5, r5, ror #8 | |||||
pld [r2, r3] | |||||
pkhbt r4, r9, r6, lsl #16 | |||||
pkhtb r6, r6, r9, asr #16 | |||||
uxtb16 r9, r7 | |||||
uxtb16 r7, r7, ror #8 | |||||
ssub16 r9, r8, r9 | |||||
ssub16 r5, r5, r7 | |||||
subs lr, lr, #1 | |||||
pkhbt r8, r9, r5, lsl #16 | |||||
pkhtb r9, r5, r9, asr #16 | |||||
stm r0!, {r4,r6,r8,r9} | |||||
bgt 1b | |||||
pop {r4-r9, pc} | |||||
endfunc |
@@ -0,0 +1,42 @@ | |||||
/* | |||||
* This file is part of FFmpeg. | |||||
* | |||||
* FFmpeg is free software; you can redistribute it and/or | |||||
* modify it under the terms of the GNU Lesser General Public | |||||
* License as published by the Free Software Foundation; either | |||||
* version 2.1 of the License, or (at your option) any later version. | |||||
* | |||||
* FFmpeg is distributed in the hope that it will be useful, | |||||
* but WITHOUT ANY WARRANTY; without even the implied warranty of | |||||
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU | |||||
* Lesser General Public License for more details. | |||||
* | |||||
* You should have received a copy of the GNU Lesser General Public | |||||
* License along with FFmpeg; if not, write to the Free Software | |||||
* Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA | |||||
*/ | |||||
#include <stdint.h> | |||||
#include "libavutil/attributes.h" | |||||
#include "libavutil/cpu.h" | |||||
#include "libavutil/arm/cpu.h" | |||||
#include "libavcodec/avcodec.h" | |||||
#include "libavcodec/pixblockdsp.h" | |||||
void ff_get_pixels_armv6(int16_t *block, const uint8_t *pixels, int stride); | |||||
void ff_diff_pixels_armv6(int16_t *block, const uint8_t *s1, | |||||
const uint8_t *s2, int stride); | |||||
av_cold void ff_pixblockdsp_init_arm(PixblockDSPContext *c, | |||||
AVCodecContext *avctx, | |||||
unsigned high_bit_depth) | |||||
{ | |||||
int cpu_flags = av_get_cpu_flags(); | |||||
if (have_armv6(cpu_flags)) { | |||||
if (!high_bit_depth) | |||||
c->get_pixels = ff_get_pixels_armv6; | |||||
c->diff_pixels = ff_diff_pixels_armv6; | |||||
} | |||||
} |
@@ -33,19 +33,19 @@ | |||||
#include "avcodec.h" | #include "avcodec.h" | ||||
#include "blockdsp.h" | #include "blockdsp.h" | ||||
#include "bswapdsp.h" | #include "bswapdsp.h" | ||||
#include "dsputil.h" | |||||
#include "fdctdsp.h" | #include "fdctdsp.h" | ||||
#include "idctdsp.h" | #include "idctdsp.h" | ||||
#include "get_bits.h" | #include "get_bits.h" | ||||
#include "pixblockdsp.h" | |||||
#include "put_bits.h" | #include "put_bits.h" | ||||
typedef struct ASV1Context{ | typedef struct ASV1Context{ | ||||
AVCodecContext *avctx; | AVCodecContext *avctx; | ||||
BlockDSPContext bdsp; | BlockDSPContext bdsp; | ||||
BswapDSPContext bbdsp; | BswapDSPContext bbdsp; | ||||
DSPContext dsp; | |||||
FDCTDSPContext fdsp; | FDCTDSPContext fdsp; | ||||
IDCTDSPContext idsp; | IDCTDSPContext idsp; | ||||
PixblockDSPContext pdsp; | |||||
PutBitContext pb; | PutBitContext pb; | ||||
GetBitContext gb; | GetBitContext gb; | ||||
ScanTable scantable; | ScanTable scantable; | ||||
@@ -160,16 +160,16 @@ static inline void dct_get(ASV1Context *a, const AVFrame *frame, | |||||
uint8_t *ptr_cb = frame->data[1] + (mb_y * 8 * frame->linesize[1]) + mb_x * 8; | uint8_t *ptr_cb = frame->data[1] + (mb_y * 8 * frame->linesize[1]) + mb_x * 8; | ||||
uint8_t *ptr_cr = frame->data[2] + (mb_y * 8 * frame->linesize[2]) + mb_x * 8; | uint8_t *ptr_cr = frame->data[2] + (mb_y * 8 * frame->linesize[2]) + mb_x * 8; | ||||
a->dsp.get_pixels(block[0], ptr_y , linesize); | |||||
a->dsp.get_pixels(block[1], ptr_y + 8, linesize); | |||||
a->dsp.get_pixels(block[2], ptr_y + 8*linesize , linesize); | |||||
a->dsp.get_pixels(block[3], ptr_y + 8*linesize + 8, linesize); | |||||
a->pdsp.get_pixels(block[0], ptr_y, linesize); | |||||
a->pdsp.get_pixels(block[1], ptr_y + 8, linesize); | |||||
a->pdsp.get_pixels(block[2], ptr_y + 8 * linesize, linesize); | |||||
a->pdsp.get_pixels(block[3], ptr_y + 8 * linesize + 8, linesize); | |||||
for(i=0; i<4; i++) | for(i=0; i<4; i++) | ||||
a->fdsp.fdct(block[i]); | a->fdsp.fdct(block[i]); | ||||
if(!(a->avctx->flags&CODEC_FLAG_GRAY)){ | if(!(a->avctx->flags&CODEC_FLAG_GRAY)){ | ||||
a->dsp.get_pixels(block[4], ptr_cb, frame->linesize[1]); | |||||
a->dsp.get_pixels(block[5], ptr_cr, frame->linesize[2]); | |||||
a->pdsp.get_pixels(block[4], ptr_cb, frame->linesize[1]); | |||||
a->pdsp.get_pixels(block[5], ptr_cr, frame->linesize[2]); | |||||
for(i=4; i<6; i++) | for(i=4; i<6; i++) | ||||
a->fdsp.fdct(block[i]); | a->fdsp.fdct(block[i]); | ||||
} | } | ||||
@@ -282,8 +282,8 @@ static av_cold int encode_init(AVCodecContext *avctx){ | |||||
const int scale= avctx->codec_id == AV_CODEC_ID_ASV1 ? 1 : 2; | const int scale= avctx->codec_id == AV_CODEC_ID_ASV1 ? 1 : 2; | ||||
ff_asv_common_init(avctx); | ff_asv_common_init(avctx); | ||||
ff_dsputil_init(&a->dsp, avctx); | |||||
ff_fdctdsp_init(&a->fdsp, avctx); | ff_fdctdsp_init(&a->fdsp, avctx); | ||||
ff_pixblockdsp_init(&a->pdsp, avctx); | |||||
if(avctx->global_quality <= 0) avctx->global_quality= 4*FF_QUALITY_SCALE; | if(avctx->global_quality <= 0) avctx->global_quality= 4*FF_QUALITY_SCALE; | ||||
@@ -30,10 +30,10 @@ | |||||
#include "avcodec.h" | #include "avcodec.h" | ||||
#include "blockdsp.h" | #include "blockdsp.h" | ||||
#include "dsputil.h" | |||||
#include "fdctdsp.h" | #include "fdctdsp.h" | ||||
#include "internal.h" | #include "internal.h" | ||||
#include "mpegvideo.h" | #include "mpegvideo.h" | ||||
#include "pixblockdsp.h" | |||||
#include "dnxhdenc.h" | #include "dnxhdenc.h" | ||||
@@ -326,6 +326,7 @@ static av_cold int dnxhd_encode_init(AVCodecContext *avctx) | |||||
ff_fdctdsp_init(&ctx->m.fdsp, avctx); | ff_fdctdsp_init(&ctx->m.fdsp, avctx); | ||||
ff_idctdsp_init(&ctx->m.idsp, avctx); | ff_idctdsp_init(&ctx->m.idsp, avctx); | ||||
ff_mpegvideoencdsp_init(&ctx->m.mpvencdsp, avctx); | ff_mpegvideoencdsp_init(&ctx->m.mpvencdsp, avctx); | ||||
ff_pixblockdsp_init(&ctx->m.pdsp, avctx); | |||||
ff_dct_common_init(&ctx->m); | ff_dct_common_init(&ctx->m); | ||||
ff_dct_encode_init(&ctx->m); | ff_dct_encode_init(&ctx->m); | ||||
@@ -561,12 +562,12 @@ void dnxhd_get_blocks(DNXHDEncContext *ctx, int mb_x, int mb_y) | |||||
((mb_y << 4) * ctx->m.uvlinesize) + (mb_x << bs); | ((mb_y << 4) * ctx->m.uvlinesize) + (mb_x << bs); | ||||
const uint8_t *ptr_v = ctx->thread[0]->src[2] + | const uint8_t *ptr_v = ctx->thread[0]->src[2] + | ||||
((mb_y << 4) * ctx->m.uvlinesize) + (mb_x << bs); | ((mb_y << 4) * ctx->m.uvlinesize) + (mb_x << bs); | ||||
DSPContext *dsp = &ctx->m.dsp; | |||||
PixblockDSPContext *pdsp = &ctx->m.pdsp; | |||||
dsp->get_pixels(ctx->blocks[0], ptr_y, ctx->m.linesize); | |||||
dsp->get_pixels(ctx->blocks[1], ptr_y + bw, ctx->m.linesize); | |||||
dsp->get_pixels(ctx->blocks[2], ptr_u, ctx->m.uvlinesize); | |||||
dsp->get_pixels(ctx->blocks[3], ptr_v, ctx->m.uvlinesize); | |||||
pdsp->get_pixels(ctx->blocks[0], ptr_y, ctx->m.linesize); | |||||
pdsp->get_pixels(ctx->blocks[1], ptr_y + bw, ctx->m.linesize); | |||||
pdsp->get_pixels(ctx->blocks[2], ptr_u, ctx->m.uvlinesize); | |||||
pdsp->get_pixels(ctx->blocks[3], ptr_v, ctx->m.uvlinesize); | |||||
if (mb_y + 1 == ctx->m.mb_height && ctx->m.avctx->height == 1080) { | if (mb_y + 1 == ctx->m.mb_height && ctx->m.avctx->height == 1080) { | ||||
if (ctx->interlaced) { | if (ctx->interlaced) { | ||||
@@ -589,14 +590,14 @@ void dnxhd_get_blocks(DNXHDEncContext *ctx, int mb_x, int mb_y) | |||||
ctx->bdsp.clear_block(ctx->blocks[7]); | ctx->bdsp.clear_block(ctx->blocks[7]); | ||||
} | } | ||||
} else { | } else { | ||||
dsp->get_pixels(ctx->blocks[4], | |||||
ptr_y + ctx->dct_y_offset, ctx->m.linesize); | |||||
dsp->get_pixels(ctx->blocks[5], | |||||
ptr_y + ctx->dct_y_offset + bw, ctx->m.linesize); | |||||
dsp->get_pixels(ctx->blocks[6], | |||||
ptr_u + ctx->dct_uv_offset, ctx->m.uvlinesize); | |||||
dsp->get_pixels(ctx->blocks[7], | |||||
ptr_v + ctx->dct_uv_offset, ctx->m.uvlinesize); | |||||
pdsp->get_pixels(ctx->blocks[4], | |||||
ptr_y + ctx->dct_y_offset, ctx->m.linesize); | |||||
pdsp->get_pixels(ctx->blocks[5], | |||||
ptr_y + ctx->dct_y_offset + bw, ctx->m.linesize); | |||||
pdsp->get_pixels(ctx->blocks[6], | |||||
ptr_u + ctx->dct_uv_offset, ctx->m.uvlinesize); | |||||
pdsp->get_pixels(ctx->blocks[7], | |||||
ptr_v + ctx->dct_uv_offset, ctx->m.uvlinesize); | |||||
} | } | ||||
} | } | ||||
@@ -36,13 +36,6 @@ | |||||
uint32_t ff_square_tab[512] = { 0, }; | uint32_t ff_square_tab[512] = { 0, }; | ||||
#define BIT_DEPTH 16 | |||||
#include "dsputilenc_template.c" | |||||
#undef BIT_DEPTH | |||||
#define BIT_DEPTH 8 | |||||
#include "dsputilenc_template.c" | |||||
static int sse4_c(MpegEncContext *v, uint8_t *pix1, uint8_t *pix2, | static int sse4_c(MpegEncContext *v, uint8_t *pix1, uint8_t *pix2, | ||||
int line_size, int h) | int line_size, int h) | ||||
{ | { | ||||
@@ -111,27 +104,6 @@ static int sse16_c(MpegEncContext *v, uint8_t *pix1, uint8_t *pix2, | |||||
return s; | return s; | ||||
} | } | ||||
static void diff_pixels_c(int16_t *av_restrict block, const uint8_t *s1, | |||||
const uint8_t *s2, int stride) | |||||
{ | |||||
int i; | |||||
/* read the pixels */ | |||||
for (i = 0; i < 8; i++) { | |||||
block[0] = s1[0] - s2[0]; | |||||
block[1] = s1[1] - s2[1]; | |||||
block[2] = s1[2] - s2[2]; | |||||
block[3] = s1[3] - s2[3]; | |||||
block[4] = s1[4] - s2[4]; | |||||
block[5] = s1[5] - s2[5]; | |||||
block[6] = s1[6] - s2[6]; | |||||
block[7] = s1[7] - s2[7]; | |||||
s1 += stride; | |||||
s2 += stride; | |||||
block += 8; | |||||
} | |||||
} | |||||
static int sum_abs_dctelem_c(int16_t *block) | static int sum_abs_dctelem_c(int16_t *block) | ||||
{ | { | ||||
int sum = 0, i; | int sum = 0, i; | ||||
@@ -586,7 +558,7 @@ static int dct_sad8x8_c(MpegEncContext *s, uint8_t *src1, | |||||
av_assert2(h == 8); | av_assert2(h == 8); | ||||
s->dsp.diff_pixels(temp, src1, src2, stride); | |||||
s->pdsp.diff_pixels(temp, src1, src2, stride); | |||||
s->fdsp.fdct(temp); | s->fdsp.fdct(temp); | ||||
return s->dsp.sum_abs_dctelem(temp); | return s->dsp.sum_abs_dctelem(temp); | ||||
} | } | ||||
@@ -626,7 +598,7 @@ static int dct264_sad8x8_c(MpegEncContext *s, uint8_t *src1, | |||||
int16_t dct[8][8]; | int16_t dct[8][8]; | ||||
int i, sum = 0; | int i, sum = 0; | ||||
s->dsp.diff_pixels(dct[0], src1, src2, stride); | |||||
s->pdsp.diff_pixels(dct[0], src1, src2, stride); | |||||
#define SRC(x) dct[i][x] | #define SRC(x) dct[i][x] | ||||
#define DST(x, v) dct[i][x] = v | #define DST(x, v) dct[i][x] = v | ||||
@@ -653,7 +625,7 @@ static int dct_max8x8_c(MpegEncContext *s, uint8_t *src1, | |||||
av_assert2(h == 8); | av_assert2(h == 8); | ||||
s->dsp.diff_pixels(temp, src1, src2, stride); | |||||
s->pdsp.diff_pixels(temp, src1, src2, stride); | |||||
s->fdsp.fdct(temp); | s->fdsp.fdct(temp); | ||||
for (i = 0; i < 64; i++) | for (i = 0; i < 64; i++) | ||||
@@ -672,7 +644,7 @@ static int quant_psnr8x8_c(MpegEncContext *s, uint8_t *src1, | |||||
av_assert2(h == 8); | av_assert2(h == 8); | ||||
s->mb_intra = 0; | s->mb_intra = 0; | ||||
s->dsp.diff_pixels(temp, src1, src2, stride); | |||||
s->pdsp.diff_pixels(temp, src1, src2, stride); | |||||
memcpy(bak, temp, 64 * sizeof(int16_t)); | memcpy(bak, temp, 64 * sizeof(int16_t)); | ||||
@@ -703,7 +675,7 @@ static int rd8x8_c(MpegEncContext *s, uint8_t *src1, uint8_t *src2, | |||||
copy_block8(lsrc1, src1, 8, stride, 8); | copy_block8(lsrc1, src1, 8, stride, 8); | ||||
copy_block8(lsrc2, src2, 8, stride, 8); | copy_block8(lsrc2, src2, 8, stride, 8); | ||||
s->dsp.diff_pixels(temp, lsrc1, lsrc2, 8); | |||||
s->pdsp.diff_pixels(temp, lsrc1, lsrc2, 8); | |||||
s->block_last_index[0 /* FIXME */] = | s->block_last_index[0 /* FIXME */] = | ||||
last = | last = | ||||
@@ -775,7 +747,7 @@ static int bit8x8_c(MpegEncContext *s, uint8_t *src1, uint8_t *src2, | |||||
av_assert2(h == 8); | av_assert2(h == 8); | ||||
s->dsp.diff_pixels(temp, src1, src2, stride); | |||||
s->pdsp.diff_pixels(temp, src1, src2, stride); | |||||
s->block_last_index[0 /* FIXME */] = | s->block_last_index[0 /* FIXME */] = | ||||
last = | last = | ||||
@@ -971,8 +943,6 @@ av_cold void ff_dsputil_init(DSPContext *c, AVCodecContext *avctx) | |||||
ff_check_alignment(); | ff_check_alignment(); | ||||
c->diff_pixels = diff_pixels_c; | |||||
c->sum_abs_dctelem = sum_abs_dctelem_c; | c->sum_abs_dctelem = sum_abs_dctelem_c; | ||||
/* TODO [0] 16 [1] 8 */ | /* TODO [0] 16 [1] 8 */ | ||||
@@ -1019,21 +989,6 @@ av_cold void ff_dsputil_init(DSPContext *c, AVCodecContext *avctx) | |||||
ff_dsputil_init_dwt(c); | ff_dsputil_init_dwt(c); | ||||
#endif | #endif | ||||
switch (avctx->bits_per_raw_sample) { | |||||
case 9: | |||||
case 10: | |||||
case 12: | |||||
case 14: | |||||
c->get_pixels = get_pixels_16_c; | |||||
break; | |||||
default: | |||||
if (avctx->bits_per_raw_sample<=8 || avctx->codec_type != AVMEDIA_TYPE_VIDEO) { | |||||
c->get_pixels = get_pixels_8_c; | |||||
} | |||||
break; | |||||
} | |||||
if (ARCH_ALPHA) | if (ARCH_ALPHA) | ||||
ff_dsputil_init_alpha(c, avctx); | ff_dsputil_init_alpha(c, avctx); | ||||
if (ARCH_ARM) | if (ARCH_ARM) | ||||
@@ -62,14 +62,6 @@ typedef int (*me_cmp_func)(struct MpegEncContext *c, | |||||
* DSPContext. | * DSPContext. | ||||
*/ | */ | ||||
typedef struct DSPContext { | typedef struct DSPContext { | ||||
/* pixel ops : interface with DCT */ | |||||
void (*get_pixels)(int16_t *block /* align 16 */, | |||||
const uint8_t *pixels /* align 8 */, | |||||
int line_size); | |||||
void (*diff_pixels)(int16_t *block /* align 16 */, | |||||
const uint8_t *s1 /* align 8 */, | |||||
const uint8_t *s2 /* align 8 */, | |||||
int stride); | |||||
int (*sum_abs_dctelem)(int16_t *block /* align 16 */); | int (*sum_abs_dctelem)(int16_t *block /* align 16 */); | ||||
me_cmp_func sad[6]; /* identical to pix_absAxA except additional void * */ | me_cmp_func sad[6]; /* identical to pix_absAxA except additional void * */ | ||||
@@ -31,6 +31,7 @@ | |||||
#include "dsputil.h" | #include "dsputil.h" | ||||
#include "fdctdsp.h" | #include "fdctdsp.h" | ||||
#include "internal.h" | #include "internal.h" | ||||
#include "pixblockdsp.h" | |||||
#include "put_bits.h" | #include "put_bits.h" | ||||
#include "dv.h" | #include "dv.h" | ||||
#include "dv_tablegen.h" | #include "dv_tablegen.h" | ||||
@@ -41,6 +42,7 @@ static av_cold int dvvideo_encode_init(AVCodecContext *avctx) | |||||
DVVideoContext *s = avctx->priv_data; | DVVideoContext *s = avctx->priv_data; | ||||
DSPContext dsp; | DSPContext dsp; | ||||
FDCTDSPContext fdsp; | FDCTDSPContext fdsp; | ||||
PixblockDSPContext pdsp; | |||||
int ret; | int ret; | ||||
s->sys = avpriv_dv_codec_profile(avctx); | s->sys = avpriv_dv_codec_profile(avctx); | ||||
@@ -70,9 +72,10 @@ static av_cold int dvvideo_encode_init(AVCodecContext *avctx) | |||||
memset(&dsp,0, sizeof(dsp)); | memset(&dsp,0, sizeof(dsp)); | ||||
ff_dsputil_init(&dsp, avctx); | ff_dsputil_init(&dsp, avctx); | ||||
ff_fdctdsp_init(&fdsp, avctx); | ff_fdctdsp_init(&fdsp, avctx); | ||||
ff_pixblockdsp_init(&pdsp, avctx); | |||||
ff_set_cmp(&dsp, dsp.ildct_cmp, avctx->ildct_cmp); | ff_set_cmp(&dsp, dsp.ildct_cmp, avctx->ildct_cmp); | ||||
s->get_pixels = dsp.get_pixels; | |||||
s->get_pixels = pdsp.get_pixels; | |||||
s->ildct_cmp = dsp.ildct_cmp[5]; | s->ildct_cmp = dsp.ildct_cmp[5]; | ||||
s->fdct[0] = fdsp.fdct; | s->fdct[0] = fdsp.fdct; | ||||
@@ -29,5 +29,6 @@ LIBAVCODEC_$MAJOR { | |||||
ff_dnxhd_cid_table; | ff_dnxhd_cid_table; | ||||
ff_idctdsp_init; | ff_idctdsp_init; | ||||
ff_fdctdsp_init; | ff_fdctdsp_init; | ||||
ff_pixblockdsp_init; | |||||
local: *; | local: *; | ||||
}; | }; |
@@ -40,6 +40,7 @@ | |||||
#include "idctdsp.h" | #include "idctdsp.h" | ||||
#include "mpegvideodsp.h" | #include "mpegvideodsp.h" | ||||
#include "mpegvideoencdsp.h" | #include "mpegvideoencdsp.h" | ||||
#include "pixblockdsp.h" | |||||
#include "put_bits.h" | #include "put_bits.h" | ||||
#include "ratecontrol.h" | #include "ratecontrol.h" | ||||
#include "parser.h" | #include "parser.h" | ||||
@@ -371,6 +372,7 @@ typedef struct MpegEncContext { | |||||
IDCTDSPContext idsp; | IDCTDSPContext idsp; | ||||
MpegVideoDSPContext mdsp; | MpegVideoDSPContext mdsp; | ||||
MpegvideoEncDSPContext mpvencdsp; | MpegvideoEncDSPContext mpvencdsp; | ||||
PixblockDSPContext pdsp; | |||||
QpelDSPContext qdsp; | QpelDSPContext qdsp; | ||||
VideoDSPContext vdsp; | VideoDSPContext vdsp; | ||||
H263DSPContext h263dsp; | H263DSPContext h263dsp; | ||||
@@ -37,7 +37,6 @@ | |||||
#include "libavutil/timer.h" | #include "libavutil/timer.h" | ||||
#include "avcodec.h" | #include "avcodec.h" | ||||
#include "dct.h" | #include "dct.h" | ||||
#include "dsputil.h" | |||||
#include "idctdsp.h" | #include "idctdsp.h" | ||||
#include "mpeg12.h" | #include "mpeg12.h" | ||||
#include "mpegvideo.h" | #include "mpegvideo.h" | ||||
@@ -48,6 +47,7 @@ | |||||
#include "mpegutils.h" | #include "mpegutils.h" | ||||
#include "mjpegenc.h" | #include "mjpegenc.h" | ||||
#include "msmpeg4.h" | #include "msmpeg4.h" | ||||
#include "pixblockdsp.h" | |||||
#include "qpeldsp.h" | #include "qpeldsp.h" | ||||
#include "faandct.h" | #include "faandct.h" | ||||
#include "thread.h" | #include "thread.h" | ||||
@@ -820,6 +820,7 @@ av_cold int ff_MPV_encode_init(AVCodecContext *avctx) | |||||
ff_fdctdsp_init(&s->fdsp, avctx); | ff_fdctdsp_init(&s->fdsp, avctx); | ||||
ff_mpegvideoencdsp_init(&s->mpvencdsp, avctx); | ff_mpegvideoencdsp_init(&s->mpvencdsp, avctx); | ||||
ff_pixblockdsp_init(&s->pdsp, avctx); | |||||
ff_qpeldsp_init(&s->qdsp); | ff_qpeldsp_init(&s->qdsp); | ||||
s->avctx->coded_frame = s->current_picture.f; | s->avctx->coded_frame = s->current_picture.f; | ||||
@@ -2093,27 +2094,27 @@ static av_always_inline void encode_mb_internal(MpegEncContext *s, | |||||
} | } | ||||
} | } | ||||
s->dsp.get_pixels(s->block[0], ptr_y , wrap_y); | |||||
s->dsp.get_pixels(s->block[1], ptr_y + 8 , wrap_y); | |||||
s->dsp.get_pixels(s->block[2], ptr_y + dct_offset , wrap_y); | |||||
s->dsp.get_pixels(s->block[3], ptr_y + dct_offset + 8 , wrap_y); | |||||
s->pdsp.get_pixels(s->block[0], ptr_y, wrap_y); | |||||
s->pdsp.get_pixels(s->block[1], ptr_y + 8, wrap_y); | |||||
s->pdsp.get_pixels(s->block[2], ptr_y + dct_offset, wrap_y); | |||||
s->pdsp.get_pixels(s->block[3], ptr_y + dct_offset + 8, wrap_y); | |||||
if (s->flags & CODEC_FLAG_GRAY) { | if (s->flags & CODEC_FLAG_GRAY) { | ||||
skip_dct[4] = 1; | skip_dct[4] = 1; | ||||
skip_dct[5] = 1; | skip_dct[5] = 1; | ||||
} else { | } else { | ||||
s->dsp.get_pixels(s->block[4], ptr_cb, wrap_c); | |||||
s->dsp.get_pixels(s->block[5], ptr_cr, wrap_c); | |||||
s->pdsp.get_pixels(s->block[4], ptr_cb, wrap_c); | |||||
s->pdsp.get_pixels(s->block[5], ptr_cr, wrap_c); | |||||
if (!s->chroma_y_shift && s->chroma_x_shift) { /* 422 */ | if (!s->chroma_y_shift && s->chroma_x_shift) { /* 422 */ | ||||
s->dsp.get_pixels(s->block[6], ptr_cb + uv_dct_offset, wrap_c); | |||||
s->dsp.get_pixels(s->block[7], ptr_cr + uv_dct_offset, wrap_c); | |||||
s->pdsp.get_pixels(s->block[6], ptr_cb + uv_dct_offset, wrap_c); | |||||
s->pdsp.get_pixels(s->block[7], ptr_cr + uv_dct_offset, wrap_c); | |||||
} else if (!s->chroma_y_shift && !s->chroma_x_shift) { /* 444 */ | } else if (!s->chroma_y_shift && !s->chroma_x_shift) { /* 444 */ | ||||
s->dsp.get_pixels(s->block[6], ptr_cb + 8, wrap_c); | |||||
s->dsp.get_pixels(s->block[7], ptr_cr + 8, wrap_c); | |||||
s->dsp.get_pixels(s->block[8], ptr_cb + uv_dct_offset, wrap_c); | |||||
s->dsp.get_pixels(s->block[9], ptr_cr + uv_dct_offset, wrap_c); | |||||
s->dsp.get_pixels(s->block[10], ptr_cb + uv_dct_offset + 8, wrap_c); | |||||
s->dsp.get_pixels(s->block[11], ptr_cr + uv_dct_offset + 8, wrap_c); | |||||
s->pdsp.get_pixels(s->block[ 6], ptr_cb + 8, wrap_c); | |||||
s->pdsp.get_pixels(s->block[ 7], ptr_cr + 8, wrap_c); | |||||
s->pdsp.get_pixels(s->block[ 8], ptr_cb + uv_dct_offset, wrap_c); | |||||
s->pdsp.get_pixels(s->block[ 9], ptr_cr + uv_dct_offset, wrap_c); | |||||
s->pdsp.get_pixels(s->block[10], ptr_cb + uv_dct_offset + 8, wrap_c); | |||||
s->pdsp.get_pixels(s->block[11], ptr_cr + uv_dct_offset + 8, wrap_c); | |||||
} | } | ||||
} | } | ||||
} else { | } else { | ||||
@@ -2180,24 +2181,24 @@ static av_always_inline void encode_mb_internal(MpegEncContext *s, | |||||
} | } | ||||
} | } | ||||
s->dsp.diff_pixels(s->block[0], ptr_y, dest_y, wrap_y); | |||||
s->dsp.diff_pixels(s->block[1], ptr_y + 8, dest_y + 8, wrap_y); | |||||
s->dsp.diff_pixels(s->block[2], ptr_y + dct_offset, | |||||
dest_y + dct_offset, wrap_y); | |||||
s->dsp.diff_pixels(s->block[3], ptr_y + dct_offset + 8, | |||||
dest_y + dct_offset + 8, wrap_y); | |||||
s->pdsp.diff_pixels(s->block[0], ptr_y, dest_y, wrap_y); | |||||
s->pdsp.diff_pixels(s->block[1], ptr_y + 8, dest_y + 8, wrap_y); | |||||
s->pdsp.diff_pixels(s->block[2], ptr_y + dct_offset, | |||||
dest_y + dct_offset, wrap_y); | |||||
s->pdsp.diff_pixels(s->block[3], ptr_y + dct_offset + 8, | |||||
dest_y + dct_offset + 8, wrap_y); | |||||
if (s->flags & CODEC_FLAG_GRAY) { | if (s->flags & CODEC_FLAG_GRAY) { | ||||
skip_dct[4] = 1; | skip_dct[4] = 1; | ||||
skip_dct[5] = 1; | skip_dct[5] = 1; | ||||
} else { | } else { | ||||
s->dsp.diff_pixels(s->block[4], ptr_cb, dest_cb, wrap_c); | |||||
s->dsp.diff_pixels(s->block[5], ptr_cr, dest_cr, wrap_c); | |||||
s->pdsp.diff_pixels(s->block[4], ptr_cb, dest_cb, wrap_c); | |||||
s->pdsp.diff_pixels(s->block[5], ptr_cr, dest_cr, wrap_c); | |||||
if (!s->chroma_y_shift) { /* 422 */ | if (!s->chroma_y_shift) { /* 422 */ | ||||
s->dsp.diff_pixels(s->block[6], ptr_cb + uv_dct_offset, | |||||
dest_cb + uv_dct_offset, wrap_c); | |||||
s->dsp.diff_pixels(s->block[7], ptr_cr + uv_dct_offset, | |||||
dest_cr + uv_dct_offset, wrap_c); | |||||
s->pdsp.diff_pixels(s->block[6], ptr_cb + uv_dct_offset, | |||||
dest_cb + uv_dct_offset, wrap_c); | |||||
s->pdsp.diff_pixels(s->block[7], ptr_cr + uv_dct_offset, | |||||
dest_cr + uv_dct_offset, wrap_c); | |||||
} | } | ||||
} | } | ||||
/* pre quantization */ | /* pre quantization */ | ||||
@@ -0,0 +1,80 @@ | |||||
/* | |||||
* This file is part of FFmpeg. | |||||
* | |||||
* FFmpeg is free software; you can redistribute it and/or | |||||
* modify it under the terms of the GNU Lesser General Public | |||||
* License as published by the Free Software Foundation; either | |||||
* version 2.1 of the License, or (at your option) any later version. | |||||
* | |||||
* FFmpeg is distributed in the hope that it will be useful, | |||||
* but WITHOUT ANY WARRANTY; without even the implied warranty of | |||||
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU | |||||
* Lesser General Public License for more details. | |||||
* | |||||
* You should have received a copy of the GNU Lesser General Public | |||||
* License along with FFmpeg; if not, write to the Free Software | |||||
* Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA | |||||
*/ | |||||
#include <stdint.h> | |||||
#include "config.h" | |||||
#include "libavutil/attributes.h" | |||||
#include "avcodec.h" | |||||
#include "pixblockdsp.h" | |||||
#define BIT_DEPTH 16 | |||||
#include "pixblockdsp_template.c" | |||||
#undef BIT_DEPTH | |||||
#define BIT_DEPTH 8 | |||||
#include "pixblockdsp_template.c" | |||||
static void diff_pixels_c(int16_t *av_restrict block, const uint8_t *s1, | |||||
const uint8_t *s2, int stride) | |||||
{ | |||||
int i; | |||||
/* read the pixels */ | |||||
for (i = 0; i < 8; i++) { | |||||
block[0] = s1[0] - s2[0]; | |||||
block[1] = s1[1] - s2[1]; | |||||
block[2] = s1[2] - s2[2]; | |||||
block[3] = s1[3] - s2[3]; | |||||
block[4] = s1[4] - s2[4]; | |||||
block[5] = s1[5] - s2[5]; | |||||
block[6] = s1[6] - s2[6]; | |||||
block[7] = s1[7] - s2[7]; | |||||
s1 += stride; | |||||
s2 += stride; | |||||
block += 8; | |||||
} | |||||
} | |||||
av_cold void ff_pixblockdsp_init(PixblockDSPContext *c, AVCodecContext *avctx) | |||||
{ | |||||
const unsigned high_bit_depth = avctx->bits_per_raw_sample > 8; | |||||
c->diff_pixels = diff_pixels_c; | |||||
switch (avctx->bits_per_raw_sample) { | |||||
case 9: | |||||
case 10: | |||||
case 12: | |||||
case 14: | |||||
c->get_pixels = get_pixels_16_c; | |||||
break; | |||||
default: | |||||
if (avctx->bits_per_raw_sample<=8 || avctx->codec_type != AVMEDIA_TYPE_VIDEO) { | |||||
c->get_pixels = get_pixels_8_c; | |||||
} | |||||
break; | |||||
} | |||||
if (ARCH_ARM) | |||||
ff_pixblockdsp_init_arm(c, avctx, high_bit_depth); | |||||
if (ARCH_PPC) | |||||
ff_pixblockdsp_init_ppc(c, avctx, high_bit_depth); | |||||
if (ARCH_X86) | |||||
ff_pixblockdsp_init_x86(c, avctx, high_bit_depth); | |||||
} |
@@ -0,0 +1,44 @@ | |||||
/* | |||||
* This file is part of FFmpeg. | |||||
* | |||||
* FFmpeg is free software; you can redistribute it and/or | |||||
* modify it under the terms of the GNU Lesser General Public | |||||
* License as published by the Free Software Foundation; either | |||||
* version 2.1 of the License, or (at your option) any later version. | |||||
* | |||||
* FFmpeg is distributed in the hope that it will be useful, | |||||
* but WITHOUT ANY WARRANTY; without even the implied warranty of | |||||
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU | |||||
* Lesser General Public License for more details. | |||||
* | |||||
* You should have received a copy of the GNU Lesser General Public | |||||
* License along with FFmpeg; if not, write to the Free Software | |||||
* Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA | |||||
*/ | |||||
#ifndef AVCODEC_PIXBLOCKDSP_H | |||||
#define AVCODEC_PIXBLOCKDSP_H | |||||
#include <stdint.h> | |||||
#include "avcodec.h" | |||||
typedef struct PixblockDSPContext { | |||||
void (*get_pixels)(int16_t *block /* align 16 */, | |||||
const uint8_t *pixels /* align 8 */, | |||||
int line_size); | |||||
void (*diff_pixels)(int16_t *block /* align 16 */, | |||||
const uint8_t *s1 /* align 8 */, | |||||
const uint8_t *s2 /* align 8 */, | |||||
int stride); | |||||
} PixblockDSPContext; | |||||
void ff_pixblockdsp_init(PixblockDSPContext *c, AVCodecContext *avctx); | |||||
void ff_pixblockdsp_init_arm(PixblockDSPContext *c, AVCodecContext *avctx, | |||||
unsigned high_bit_depth); | |||||
void ff_pixblockdsp_init_ppc(PixblockDSPContext *c, AVCodecContext *avctx, | |||||
unsigned high_bit_depth); | |||||
void ff_pixblockdsp_init_x86(PixblockDSPContext *c, AVCodecContext *avctx, | |||||
unsigned high_bit_depth); | |||||
#endif /* AVCODEC_PIXBLOCKDSP_H */ |
@@ -1,10 +1,4 @@ | |||||
/* | /* | ||||
* DSP utils | |||||
* Copyright (c) 2000, 2001 Fabrice Bellard | |||||
* Copyright (c) 2002-2004 Michael Niedermayer <michaelni@gmx.at> | |||||
* | |||||
* gmc & q-pel & 32/64 bit based MC by Michael Niedermayer <michaelni@gmx.at> | |||||
* | |||||
* This file is part of FFmpeg. | * This file is part of FFmpeg. | ||||
* | * | ||||
* FFmpeg is free software; you can redistribute it and/or | * FFmpeg is free software; you can redistribute it and/or | ||||
@@ -22,11 +16,6 @@ | |||||
* Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA | * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA | ||||
*/ | */ | ||||
/** | |||||
* @file | |||||
* DSP utils | |||||
*/ | |||||
#include "bit_depth_template.c" | #include "bit_depth_template.c" | ||||
static void FUNCC(get_pixels)(int16_t *av_restrict block, const uint8_t *_pixels, | static void FUNCC(get_pixels)(int16_t *av_restrict block, const uint8_t *_pixels, |
@@ -15,6 +15,7 @@ OBJS-$(CONFIG_MPEGAUDIODSP) += ppc/mpegaudiodsp_altivec.o | |||||
OBJS-$(CONFIG_MPEGVIDEO) += ppc/mpegvideo_altivec.o \ | OBJS-$(CONFIG_MPEGVIDEO) += ppc/mpegvideo_altivec.o \ | ||||
ppc/mpegvideodsp.o | ppc/mpegvideodsp.o | ||||
OBJS-$(CONFIG_MPEGVIDEOENC) += ppc/mpegvideoencdsp.o | OBJS-$(CONFIG_MPEGVIDEOENC) += ppc/mpegvideoencdsp.o | ||||
OBJS-$(CONFIG_PIXBLOCKDSP) += ppc/pixblockdsp.o | |||||
OBJS-$(CONFIG_VIDEODSP) += ppc/videodsp_ppc.o | OBJS-$(CONFIG_VIDEODSP) += ppc/videodsp_ppc.o | ||||
OBJS-$(CONFIG_VP3DSP) += ppc/vp3dsp_altivec.o | OBJS-$(CONFIG_VP3DSP) += ppc/vp3dsp_altivec.o | ||||
@@ -402,105 +402,6 @@ static int sse16_altivec(MpegEncContext *v, uint8_t *pix1, uint8_t *pix2, | |||||
return s; | return s; | ||||
} | } | ||||
static void get_pixels_altivec(int16_t *restrict block, const uint8_t *pixels, | |||||
int line_size) | |||||
{ | |||||
int i; | |||||
vector unsigned char perm = vec_lvsl(0, pixels); | |||||
const vector unsigned char zero = | |||||
(const vector unsigned char) vec_splat_u8(0); | |||||
for (i = 0; i < 8; i++) { | |||||
/* Read potentially unaligned pixels. | |||||
* We're reading 16 pixels, and actually only want 8, | |||||
* but we simply ignore the extras. */ | |||||
vector unsigned char pixl = vec_ld(0, pixels); | |||||
vector unsigned char pixr = vec_ld(7, pixels); | |||||
vector unsigned char bytes = vec_perm(pixl, pixr, perm); | |||||
// Convert the bytes into shorts. | |||||
vector signed short shorts = (vector signed short) vec_mergeh(zero, | |||||
bytes); | |||||
// Save the data to the block, we assume the block is 16-byte aligned. | |||||
vec_st(shorts, i * 16, (vector signed short *) block); | |||||
pixels += line_size; | |||||
} | |||||
} | |||||
static void diff_pixels_altivec(int16_t *restrict block, const uint8_t *s1, | |||||
const uint8_t *s2, int stride) | |||||
{ | |||||
int i; | |||||
vector unsigned char perm1 = vec_lvsl(0, s1); | |||||
vector unsigned char perm2 = vec_lvsl(0, s2); | |||||
const vector unsigned char zero = | |||||
(const vector unsigned char) vec_splat_u8(0); | |||||
vector signed short shorts1, shorts2; | |||||
for (i = 0; i < 4; i++) { | |||||
/* Read potentially unaligned pixels. | |||||
* We're reading 16 pixels, and actually only want 8, | |||||
* but we simply ignore the extras. */ | |||||
vector unsigned char pixl = vec_ld(0, s1); | |||||
vector unsigned char pixr = vec_ld(15, s1); | |||||
vector unsigned char bytes = vec_perm(pixl, pixr, perm1); | |||||
// Convert the bytes into shorts. | |||||
shorts1 = (vector signed short) vec_mergeh(zero, bytes); | |||||
// Do the same for the second block of pixels. | |||||
pixl = vec_ld(0, s2); | |||||
pixr = vec_ld(15, s2); | |||||
bytes = vec_perm(pixl, pixr, perm2); | |||||
// Convert the bytes into shorts. | |||||
shorts2 = (vector signed short) vec_mergeh(zero, bytes); | |||||
// Do the subtraction. | |||||
shorts1 = vec_sub(shorts1, shorts2); | |||||
// Save the data to the block, we assume the block is 16-byte aligned. | |||||
vec_st(shorts1, 0, (vector signed short *) block); | |||||
s1 += stride; | |||||
s2 += stride; | |||||
block += 8; | |||||
/* The code below is a copy of the code above... | |||||
* This is a manual unroll. */ | |||||
/* Read potentially unaligned pixels. | |||||
* We're reading 16 pixels, and actually only want 8, | |||||
* but we simply ignore the extras. */ | |||||
pixl = vec_ld(0, s1); | |||||
pixr = vec_ld(15, s1); | |||||
bytes = vec_perm(pixl, pixr, perm1); | |||||
// Convert the bytes into shorts. | |||||
shorts1 = (vector signed short) vec_mergeh(zero, bytes); | |||||
// Do the same for the second block of pixels. | |||||
pixl = vec_ld(0, s2); | |||||
pixr = vec_ld(15, s2); | |||||
bytes = vec_perm(pixl, pixr, perm2); | |||||
// Convert the bytes into shorts. | |||||
shorts2 = (vector signed short) vec_mergeh(zero, bytes); | |||||
// Do the subtraction. | |||||
shorts1 = vec_sub(shorts1, shorts2); | |||||
// Save the data to the block, we assume the block is 16-byte aligned. | |||||
vec_st(shorts1, 0, (vector signed short *) block); | |||||
s1 += stride; | |||||
s2 += stride; | |||||
block += 8; | |||||
} | |||||
} | |||||
static int hadamard8_diff8x8_altivec(MpegEncContext *s, uint8_t *dst, | static int hadamard8_diff8x8_altivec(MpegEncContext *s, uint8_t *dst, | ||||
uint8_t *src, int stride, int h) | uint8_t *src, int stride, int h) | ||||
{ | { | ||||
@@ -854,12 +755,6 @@ av_cold void ff_dsputil_init_altivec(DSPContext *c, AVCodecContext *avctx, | |||||
c->sse[0] = sse16_altivec; | c->sse[0] = sse16_altivec; | ||||
c->sse[1] = sse8_altivec; | c->sse[1] = sse8_altivec; | ||||
c->diff_pixels = diff_pixels_altivec; | |||||
if (!high_bit_depth) { | |||||
c->get_pixels = get_pixels_altivec; | |||||
} | |||||
c->hadamard8_diff[0] = hadamard8_diff16_altivec; | c->hadamard8_diff[0] = hadamard8_diff16_altivec; | ||||
c->hadamard8_diff[1] = hadamard8_diff8x8_altivec; | c->hadamard8_diff[1] = hadamard8_diff8x8_altivec; | ||||
} | } |
@@ -0,0 +1,153 @@ | |||||
/* | |||||
* Copyright (c) 2002 Brian Foley | |||||
* Copyright (c) 2002 Dieter Shirley | |||||
* Copyright (c) 2003-2004 Romain Dolbeau <romain@dolbeau.org> | |||||
* | |||||
* This file is part of FFmpeg. | |||||
* | |||||
* FFmpeg is free software; you can redistribute it and/or | |||||
* modify it under the terms of the GNU Lesser General Public | |||||
* License as published by the Free Software Foundation; either | |||||
* version 2.1 of the License, or (at your option) any later version. | |||||
* | |||||
* FFmpeg is distributed in the hope that it will be useful, | |||||
* but WITHOUT ANY WARRANTY; without even the implied warranty of | |||||
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU | |||||
* Lesser General Public License for more details. | |||||
* | |||||
* You should have received a copy of the GNU Lesser General Public | |||||
* License along with FFmpeg; if not, write to the Free Software | |||||
* Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA | |||||
*/ | |||||
#include "config.h" | |||||
#if HAVE_ALTIVEC_H | |||||
#include <altivec.h> | |||||
#endif | |||||
#include "libavutil/attributes.h" | |||||
#include "libavutil/cpu.h" | |||||
#include "libavutil/ppc/cpu.h" | |||||
#include "libavutil/ppc/types_altivec.h" | |||||
#include "libavutil/ppc/util_altivec.h" | |||||
#include "libavcodec/avcodec.h" | |||||
#include "libavcodec/pixblockdsp.h" | |||||
#if HAVE_ALTIVEC | |||||
static void get_pixels_altivec(int16_t *restrict block, const uint8_t *pixels, | |||||
int line_size) | |||||
{ | |||||
int i; | |||||
vector unsigned char perm = vec_lvsl(0, pixels); | |||||
const vector unsigned char zero = | |||||
(const vector unsigned char) vec_splat_u8(0); | |||||
for (i = 0; i < 8; i++) { | |||||
/* Read potentially unaligned pixels. | |||||
* We're reading 16 pixels, and actually only want 8, | |||||
* but we simply ignore the extras. */ | |||||
vector unsigned char pixl = vec_ld(0, pixels); | |||||
vector unsigned char pixr = vec_ld(7, pixels); | |||||
vector unsigned char bytes = vec_perm(pixl, pixr, perm); | |||||
// Convert the bytes into shorts. | |||||
vector signed short shorts = (vector signed short) vec_mergeh(zero, | |||||
bytes); | |||||
// Save the data to the block, we assume the block is 16-byte aligned. | |||||
vec_st(shorts, i * 16, (vector signed short *) block); | |||||
pixels += line_size; | |||||
} | |||||
} | |||||
static void diff_pixels_altivec(int16_t *restrict block, const uint8_t *s1, | |||||
const uint8_t *s2, int stride) | |||||
{ | |||||
int i; | |||||
vector unsigned char perm1 = vec_lvsl(0, s1); | |||||
vector unsigned char perm2 = vec_lvsl(0, s2); | |||||
const vector unsigned char zero = | |||||
(const vector unsigned char) vec_splat_u8(0); | |||||
vector signed short shorts1, shorts2; | |||||
for (i = 0; i < 4; i++) { | |||||
/* Read potentially unaligned pixels. | |||||
* We're reading 16 pixels, and actually only want 8, | |||||
* but we simply ignore the extras. */ | |||||
vector unsigned char pixl = vec_ld(0, s1); | |||||
vector unsigned char pixr = vec_ld(15, s1); | |||||
vector unsigned char bytes = vec_perm(pixl, pixr, perm1); | |||||
// Convert the bytes into shorts. | |||||
shorts1 = (vector signed short) vec_mergeh(zero, bytes); | |||||
// Do the same for the second block of pixels. | |||||
pixl = vec_ld(0, s2); | |||||
pixr = vec_ld(15, s2); | |||||
bytes = vec_perm(pixl, pixr, perm2); | |||||
// Convert the bytes into shorts. | |||||
shorts2 = (vector signed short) vec_mergeh(zero, bytes); | |||||
// Do the subtraction. | |||||
shorts1 = vec_sub(shorts1, shorts2); | |||||
// Save the data to the block, we assume the block is 16-byte aligned. | |||||
vec_st(shorts1, 0, (vector signed short *) block); | |||||
s1 += stride; | |||||
s2 += stride; | |||||
block += 8; | |||||
/* The code below is a copy of the code above... | |||||
* This is a manual unroll. */ | |||||
/* Read potentially unaligned pixels. | |||||
* We're reading 16 pixels, and actually only want 8, | |||||
* but we simply ignore the extras. */ | |||||
pixl = vec_ld(0, s1); | |||||
pixr = vec_ld(15, s1); | |||||
bytes = vec_perm(pixl, pixr, perm1); | |||||
// Convert the bytes into shorts. | |||||
shorts1 = (vector signed short) vec_mergeh(zero, bytes); | |||||
// Do the same for the second block of pixels. | |||||
pixl = vec_ld(0, s2); | |||||
pixr = vec_ld(15, s2); | |||||
bytes = vec_perm(pixl, pixr, perm2); | |||||
// Convert the bytes into shorts. | |||||
shorts2 = (vector signed short) vec_mergeh(zero, bytes); | |||||
// Do the subtraction. | |||||
shorts1 = vec_sub(shorts1, shorts2); | |||||
// Save the data to the block, we assume the block is 16-byte aligned. | |||||
vec_st(shorts1, 0, (vector signed short *) block); | |||||
s1 += stride; | |||||
s2 += stride; | |||||
block += 8; | |||||
} | |||||
} | |||||
#endif /* HAVE_ALTIVEC */ | |||||
av_cold void ff_pixblockdsp_init_ppc(PixblockDSPContext *c, | |||||
AVCodecContext *avctx, | |||||
unsigned high_bit_depth) | |||||
{ | |||||
#if HAVE_ALTIVEC | |||||
if (!PPC_ALTIVEC(av_get_cpu_flags())) | |||||
return; | |||||
c->diff_pixels = diff_pixels_altivec; | |||||
if (!high_bit_depth) { | |||||
c->get_pixels = get_pixels_altivec; | |||||
} | |||||
#endif /* HAVE_ALTIVEC */ | |||||
} |
@@ -31,6 +31,7 @@ OBJS-$(CONFIG_MPEGVIDEO) += x86/mpegvideo.o \ | |||||
x86/mpegvideodsp.o | x86/mpegvideodsp.o | ||||
OBJS-$(CONFIG_MPEGVIDEOENC) += x86/mpegvideoenc.o \ | OBJS-$(CONFIG_MPEGVIDEOENC) += x86/mpegvideoenc.o \ | ||||
x86/mpegvideoencdsp_init.o | x86/mpegvideoencdsp_init.o | ||||
OBJS-$(CONFIG_PIXBLOCKDSP) += x86/pixblockdsp_init.o | |||||
OBJS-$(CONFIG_QPELDSP) += x86/qpeldsp_init.o | OBJS-$(CONFIG_QPELDSP) += x86/qpeldsp_init.o | ||||
OBJS-$(CONFIG_VIDEODSP) += x86/videodsp_init.o | OBJS-$(CONFIG_VIDEODSP) += x86/videodsp_init.o | ||||
OBJS-$(CONFIG_VP3DSP) += x86/vp3dsp_init.o | OBJS-$(CONFIG_VP3DSP) += x86/vp3dsp_init.o | ||||
@@ -110,6 +111,7 @@ YASM-OBJS-$(CONFIG_LLAUDDSP) += x86/lossless_audiodsp.o | |||||
YASM-OBJS-$(CONFIG_LLVIDDSP) += x86/lossless_videodsp.o | YASM-OBJS-$(CONFIG_LLVIDDSP) += x86/lossless_videodsp.o | ||||
YASM-OBJS-$(CONFIG_MPEGAUDIODSP) += x86/imdct36.o | YASM-OBJS-$(CONFIG_MPEGAUDIODSP) += x86/imdct36.o | ||||
YASM-OBJS-$(CONFIG_MPEGVIDEOENC) += x86/mpegvideoencdsp.o | YASM-OBJS-$(CONFIG_MPEGVIDEOENC) += x86/mpegvideoencdsp.o | ||||
YASM-OBJS-$(CONFIG_PIXBLOCKDSP) += x86/pixblockdsp.o | |||||
YASM-OBJS-$(CONFIG_QPELDSP) += x86/qpeldsp.o \ | YASM-OBJS-$(CONFIG_QPELDSP) += x86/qpeldsp.o \ | ||||
x86/fpel.o \ | x86/fpel.o \ | ||||
x86/qpel.o | x86/qpel.o | ||||
@@ -352,115 +352,6 @@ SUM_SQUARED_ERRORS 16 | |||||
INIT_XMM sse2 | INIT_XMM sse2 | ||||
SUM_SQUARED_ERRORS 16 | SUM_SQUARED_ERRORS 16 | ||||
INIT_MMX mmx | |||||
; void ff_get_pixels_mmx(int16_t *block, const uint8_t *pixels, int line_size) | |||||
cglobal get_pixels, 3,4 | |||||
movsxdifnidn r2, r2d | |||||
add r0, 128 | |||||
mov r3, -128 | |||||
pxor m7, m7 | |||||
.loop: | |||||
mova m0, [r1] | |||||
mova m2, [r1+r2] | |||||
mova m1, m0 | |||||
mova m3, m2 | |||||
punpcklbw m0, m7 | |||||
punpckhbw m1, m7 | |||||
punpcklbw m2, m7 | |||||
punpckhbw m3, m7 | |||||
mova [r0+r3+ 0], m0 | |||||
mova [r0+r3+ 8], m1 | |||||
mova [r0+r3+16], m2 | |||||
mova [r0+r3+24], m3 | |||||
lea r1, [r1+r2*2] | |||||
add r3, 32 | |||||
js .loop | |||||
REP_RET | |||||
INIT_XMM sse2 | |||||
cglobal get_pixels, 3, 4, 5 | |||||
movsxdifnidn r2, r2d | |||||
lea r3, [r2*3] | |||||
pxor m4, m4 | |||||
movh m0, [r1] | |||||
movh m1, [r1+r2] | |||||
movh m2, [r1+r2*2] | |||||
movh m3, [r1+r3] | |||||
lea r1, [r1+r2*4] | |||||
punpcklbw m0, m4 | |||||
punpcklbw m1, m4 | |||||
punpcklbw m2, m4 | |||||
punpcklbw m3, m4 | |||||
mova [r0], m0 | |||||
mova [r0+0x10], m1 | |||||
mova [r0+0x20], m2 | |||||
mova [r0+0x30], m3 | |||||
movh m0, [r1] | |||||
movh m1, [r1+r2*1] | |||||
movh m2, [r1+r2*2] | |||||
movh m3, [r1+r3] | |||||
punpcklbw m0, m4 | |||||
punpcklbw m1, m4 | |||||
punpcklbw m2, m4 | |||||
punpcklbw m3, m4 | |||||
mova [r0+0x40], m0 | |||||
mova [r0+0x50], m1 | |||||
mova [r0+0x60], m2 | |||||
mova [r0+0x70], m3 | |||||
RET | |||||
INIT_MMX mmx | |||||
; void ff_diff_pixels_mmx(int16_t *block, const uint8_t *s1, const uint8_t *s2, | |||||
; int stride); | |||||
cglobal diff_pixels, 4,5 | |||||
movsxdifnidn r3, r3d | |||||
pxor m7, m7 | |||||
add r0, 128 | |||||
mov r4, -128 | |||||
.loop: | |||||
mova m0, [r1] | |||||
mova m2, [r2] | |||||
mova m1, m0 | |||||
mova m3, m2 | |||||
punpcklbw m0, m7 | |||||
punpckhbw m1, m7 | |||||
punpcklbw m2, m7 | |||||
punpckhbw m3, m7 | |||||
psubw m0, m2 | |||||
psubw m1, m3 | |||||
mova [r0+r4+0], m0 | |||||
mova [r0+r4+8], m1 | |||||
add r1, r3 | |||||
add r2, r3 | |||||
add r4, 16 | |||||
jne .loop | |||||
REP_RET | |||||
INIT_XMM sse2 | |||||
cglobal diff_pixels, 4, 5, 5 | |||||
movsxdifnidn r3, r3d | |||||
pxor m4, m4 | |||||
add r0, 128 | |||||
mov r4, -128 | |||||
.loop: | |||||
movh m0, [r1] | |||||
movh m2, [r2] | |||||
movh m1, [r1+r3] | |||||
movh m3, [r2+r3] | |||||
punpcklbw m0, m4 | |||||
punpcklbw m1, m4 | |||||
punpcklbw m2, m4 | |||||
punpcklbw m3, m4 | |||||
psubw m0, m2 | |||||
psubw m1, m3 | |||||
mova [r0+r4+0 ], m0 | |||||
mova [r0+r4+16], m1 | |||||
lea r1, [r1+r3*2] | |||||
lea r2, [r2+r3*2] | |||||
add r4, 32 | |||||
jne .loop | |||||
RET | |||||
;----------------------------------------------- | ;----------------------------------------------- | ||||
;int ff_sum_abs_dctelem(int16_t *block) | ;int ff_sum_abs_dctelem(int16_t *block) | ||||
;----------------------------------------------- | ;----------------------------------------------- | ||||
@@ -30,12 +30,6 @@ | |||||
#include "libavcodec/mpegvideo.h" | #include "libavcodec/mpegvideo.h" | ||||
#include "dsputil_x86.h" | #include "dsputil_x86.h" | ||||
void ff_get_pixels_mmx(int16_t *block, const uint8_t *pixels, int line_size); | |||||
void ff_get_pixels_sse2(int16_t *block, const uint8_t *pixels, int line_size); | |||||
void ff_diff_pixels_mmx(int16_t *block, const uint8_t *s1, const uint8_t *s2, | |||||
int stride); | |||||
void ff_diff_pixels_sse2(int16_t *block, const uint8_t *s1, const uint8_t *s2, | |||||
int stride); | |||||
int ff_sum_abs_dctelem_mmx(int16_t *block); | int ff_sum_abs_dctelem_mmx(int16_t *block); | ||||
int ff_sum_abs_dctelem_mmxext(int16_t *block); | int ff_sum_abs_dctelem_mmxext(int16_t *block); | ||||
int ff_sum_abs_dctelem_sse2(int16_t *block); | int ff_sum_abs_dctelem_sse2(int16_t *block); | ||||
@@ -353,16 +347,6 @@ av_cold void ff_dsputilenc_init_mmx(DSPContext *c, AVCodecContext *avctx, | |||||
{ | { | ||||
int cpu_flags = av_get_cpu_flags(); | int cpu_flags = av_get_cpu_flags(); | ||||
if (EXTERNAL_MMX(cpu_flags)) { | |||||
if (!high_bit_depth) | |||||
c->get_pixels = ff_get_pixels_mmx; | |||||
c->diff_pixels = ff_diff_pixels_mmx; | |||||
} | |||||
if (EXTERNAL_SSE2(cpu_flags)) | |||||
if (!high_bit_depth) | |||||
c->get_pixels = ff_get_pixels_sse2; | |||||
#if HAVE_INLINE_ASM | #if HAVE_INLINE_ASM | ||||
if (INLINE_MMX(cpu_flags)) { | if (INLINE_MMX(cpu_flags)) { | ||||
c->vsad[4] = vsad_intra16_mmx; | c->vsad[4] = vsad_intra16_mmx; | ||||
@@ -410,7 +394,6 @@ av_cold void ff_dsputilenc_init_mmx(DSPContext *c, AVCodecContext *avctx, | |||||
if (EXTERNAL_SSE2(cpu_flags)) { | if (EXTERNAL_SSE2(cpu_flags)) { | ||||
c->sse[0] = ff_sse16_sse2; | c->sse[0] = ff_sse16_sse2; | ||||
c->sum_abs_dctelem = ff_sum_abs_dctelem_sse2; | c->sum_abs_dctelem = ff_sum_abs_dctelem_sse2; | ||||
c->diff_pixels = ff_diff_pixels_sse2; | |||||
#if HAVE_ALIGNED_STACK | #if HAVE_ALIGNED_STACK | ||||
c->hadamard8_diff[0] = ff_hadamard8_diff16_sse2; | c->hadamard8_diff[0] = ff_hadamard8_diff16_sse2; | ||||
@@ -0,0 +1,135 @@ | |||||
;***************************************************************************** | |||||
;* SIMD-optimized pixel operations | |||||
;***************************************************************************** | |||||
;* Copyright (c) 2000, 2001 Fabrice Bellard | |||||
;* Copyright (c) 2002-2004 Michael Niedermayer <michaelni@gmx.at> | |||||
;* | |||||
;* This file is part of FFmpeg. | |||||
;* | |||||
;* FFmpeg is free software; you can redistribute it and/or | |||||
;* modify it under the terms of the GNU Lesser General Public | |||||
;* License as published by the Free Software Foundation; either | |||||
;* version 2.1 of the License, or (at your option) any later version. | |||||
;* | |||||
;* FFmpeg is distributed in the hope that it will be useful, | |||||
;* but WITHOUT ANY WARRANTY; without even the implied warranty of | |||||
;* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU | |||||
;* Lesser General Public License for more details. | |||||
;* | |||||
;* You should have received a copy of the GNU Lesser General Public | |||||
;* License along with FFmpeg; if not, write to the Free Software | |||||
;* Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA | |||||
;***************************************************************************** | |||||
%include "libavutil/x86/x86util.asm" | |||||
SECTION .text | |||||
INIT_MMX mmx | |||||
; void ff_get_pixels_mmx(int16_t *block, const uint8_t *pixels, int line_size) | |||||
cglobal get_pixels, 3,4 | |||||
movsxdifnidn r2, r2d | |||||
add r0, 128 | |||||
mov r3, -128 | |||||
pxor m7, m7 | |||||
.loop: | |||||
mova m0, [r1] | |||||
mova m2, [r1+r2] | |||||
mova m1, m0 | |||||
mova m3, m2 | |||||
punpcklbw m0, m7 | |||||
punpckhbw m1, m7 | |||||
punpcklbw m2, m7 | |||||
punpckhbw m3, m7 | |||||
mova [r0+r3+ 0], m0 | |||||
mova [r0+r3+ 8], m1 | |||||
mova [r0+r3+16], m2 | |||||
mova [r0+r3+24], m3 | |||||
lea r1, [r1+r2*2] | |||||
add r3, 32 | |||||
js .loop | |||||
REP_RET | |||||
INIT_XMM sse2 | |||||
cglobal get_pixels, 3, 4, 5 | |||||
movsxdifnidn r2, r2d | |||||
lea r3, [r2*3] | |||||
pxor m4, m4 | |||||
movh m0, [r1] | |||||
movh m1, [r1+r2] | |||||
movh m2, [r1+r2*2] | |||||
movh m3, [r1+r3] | |||||
lea r1, [r1+r2*4] | |||||
punpcklbw m0, m4 | |||||
punpcklbw m1, m4 | |||||
punpcklbw m2, m4 | |||||
punpcklbw m3, m4 | |||||
mova [r0], m0 | |||||
mova [r0+0x10], m1 | |||||
mova [r0+0x20], m2 | |||||
mova [r0+0x30], m3 | |||||
movh m0, [r1] | |||||
movh m1, [r1+r2*1] | |||||
movh m2, [r1+r2*2] | |||||
movh m3, [r1+r3] | |||||
punpcklbw m0, m4 | |||||
punpcklbw m1, m4 | |||||
punpcklbw m2, m4 | |||||
punpcklbw m3, m4 | |||||
mova [r0+0x40], m0 | |||||
mova [r0+0x50], m1 | |||||
mova [r0+0x60], m2 | |||||
mova [r0+0x70], m3 | |||||
RET | |||||
INIT_MMX mmx | |||||
; void ff_diff_pixels_mmx(int16_t *block, const uint8_t *s1, const uint8_t *s2, | |||||
; int stride); | |||||
cglobal diff_pixels, 4,5 | |||||
movsxdifnidn r3, r3d | |||||
pxor m7, m7 | |||||
add r0, 128 | |||||
mov r4, -128 | |||||
.loop: | |||||
mova m0, [r1] | |||||
mova m2, [r2] | |||||
mova m1, m0 | |||||
mova m3, m2 | |||||
punpcklbw m0, m7 | |||||
punpckhbw m1, m7 | |||||
punpcklbw m2, m7 | |||||
punpckhbw m3, m7 | |||||
psubw m0, m2 | |||||
psubw m1, m3 | |||||
mova [r0+r4+0], m0 | |||||
mova [r0+r4+8], m1 | |||||
add r1, r3 | |||||
add r2, r3 | |||||
add r4, 16 | |||||
jne .loop | |||||
REP_RET | |||||
INIT_XMM sse2 | |||||
cglobal diff_pixels, 4, 5, 5 | |||||
movsxdifnidn r3, r3d | |||||
pxor m4, m4 | |||||
add r0, 128 | |||||
mov r4, -128 | |||||
.loop: | |||||
movh m0, [r1] | |||||
movh m2, [r2] | |||||
movh m1, [r1+r3] | |||||
movh m3, [r2+r3] | |||||
punpcklbw m0, m4 | |||||
punpcklbw m1, m4 | |||||
punpcklbw m2, m4 | |||||
punpcklbw m3, m4 | |||||
psubw m0, m2 | |||||
psubw m1, m3 | |||||
mova [r0+r4+0 ], m0 | |||||
mova [r0+r4+16], m1 | |||||
lea r1, [r1+r3*2] | |||||
lea r2, [r2+r3*2] | |||||
add r4, 32 | |||||
jne .loop | |||||
RET |
@@ -0,0 +1,50 @@ | |||||
/* | |||||
* SIMD-optimized pixel operations | |||||
* | |||||
* This file is part of FFmpeg. | |||||
* | |||||
* FFmpeg is free software; you can redistribute it and/or | |||||
* modify it under the terms of the GNU Lesser General Public | |||||
* License as published by the Free Software Foundation; either | |||||
* version 2.1 of the License, or (at your option) any later version. | |||||
* | |||||
* FFmpeg is distributed in the hope that it will be useful, | |||||
* but WITHOUT ANY WARRANTY; without even the implied warranty of | |||||
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU | |||||
* Lesser General Public License for more details. | |||||
* | |||||
* You should have received a copy of the GNU Lesser General Public | |||||
* License along with FFmpeg; if not, write to the Free Software | |||||
* Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA | |||||
*/ | |||||
#include "libavutil/attributes.h" | |||||
#include "libavutil/cpu.h" | |||||
#include "libavutil/x86/cpu.h" | |||||
#include "libavcodec/pixblockdsp.h" | |||||
void ff_get_pixels_mmx(int16_t *block, const uint8_t *pixels, int line_size); | |||||
void ff_get_pixels_sse2(int16_t *block, const uint8_t *pixels, int line_size); | |||||
void ff_diff_pixels_mmx(int16_t *block, const uint8_t *s1, const uint8_t *s2, | |||||
int stride); | |||||
void ff_diff_pixels_sse2(int16_t *block, const uint8_t *s1, const uint8_t *s2, | |||||
int stride); | |||||
av_cold void ff_pixblockdsp_init_x86(PixblockDSPContext *c, | |||||
AVCodecContext *avctx, | |||||
unsigned high_bit_depth) | |||||
{ | |||||
int cpu_flags = av_get_cpu_flags(); | |||||
if (EXTERNAL_MMX(cpu_flags)) { | |||||
if (!high_bit_depth) | |||||
c->get_pixels = ff_get_pixels_mmx; | |||||
c->diff_pixels = ff_diff_pixels_mmx; | |||||
} | |||||
if (EXTERNAL_SSE2(cpu_flags)) { | |||||
if (!high_bit_depth) | |||||
c->get_pixels = ff_get_pixels_sse2; | |||||
c->diff_pixels = ff_diff_pixels_sse2; | |||||
} | |||||
} |
@@ -28,6 +28,7 @@ | |||||
#include "libavutil/pixdesc.h" | #include "libavutil/pixdesc.h" | ||||
#include "libavutil/timestamp.h" | #include "libavutil/timestamp.h" | ||||
#include "libavcodec/dsputil.h" | #include "libavcodec/dsputil.h" | ||||
#include "libavcodec/pixblockdsp.h" | |||||
#include "avfilter.h" | #include "avfilter.h" | ||||
#include "internal.h" | #include "internal.h" | ||||
#include "formats.h" | #include "formats.h" | ||||
@@ -49,6 +50,7 @@ typedef struct { | |||||
int hsub, vsub; ///< chroma subsampling values | int hsub, vsub; ///< chroma subsampling values | ||||
AVFrame *ref; ///< reference picture | AVFrame *ref; ///< reference picture | ||||
DSPContext dspctx; ///< context providing optimized diff routines | DSPContext dspctx; ///< context providing optimized diff routines | ||||
PixblockDSPContext pdsp; | |||||
AVCodecContext *avctx; ///< codec context required for the DSPContext | AVCodecContext *avctx; ///< codec context required for the DSPContext | ||||
} DecimateContext; | } DecimateContext; | ||||
@@ -75,6 +77,7 @@ static int diff_planes(AVFilterContext *ctx, | |||||
{ | { | ||||
DecimateContext *decimate = ctx->priv; | DecimateContext *decimate = ctx->priv; | ||||
DSPContext *dspctx = &decimate->dspctx; | DSPContext *dspctx = &decimate->dspctx; | ||||
PixblockDSPContext *pdsp = &decimate->pdsp; | |||||
int x, y; | int x, y; | ||||
int d, c = 0; | int d, c = 0; | ||||
@@ -84,7 +87,7 @@ static int diff_planes(AVFilterContext *ctx, | |||||
/* compute difference for blocks of 8x8 bytes */ | /* compute difference for blocks of 8x8 bytes */ | ||||
for (y = 0; y < h-7; y += 4) { | for (y = 0; y < h-7; y += 4) { | ||||
for (x = 8; x < w-7; x += 4) { | for (x = 8; x < w-7; x += 4) { | ||||
dspctx->diff_pixels(block, | |||||
pdsp->diff_pixels(block, | |||||
cur+x+y*linesize, | cur+x+y*linesize, | ||||
ref+x+y*linesize, linesize); | ref+x+y*linesize, linesize); | ||||
d = dspctx->sum_abs_dctelem(block); | d = dspctx->sum_abs_dctelem(block); | ||||
@@ -141,6 +144,7 @@ static av_cold int init(AVFilterContext *ctx) | |||||
if (!decimate->avctx) | if (!decimate->avctx) | ||||
return AVERROR(ENOMEM); | return AVERROR(ENOMEM); | ||||
avpriv_dsputil_init(&decimate->dspctx, decimate->avctx); | avpriv_dsputil_init(&decimate->dspctx, decimate->avctx); | ||||
ff_pixblockdsp_init(&decimate->pdsp, decimate->avctx); | |||||
return 0; | return 0; | ||||
} | } | ||||
@@ -232,7 +232,7 @@ static void filter(SPPContext *p, uint8_t *dst, uint8_t *src, | |||||
const int x1 = x + offset[i + count - 1][0]; | const int x1 = x + offset[i + count - 1][0]; | ||||
const int y1 = y + offset[i + count - 1][1]; | const int y1 = y + offset[i + count - 1][1]; | ||||
const int index = x1 + y1*linesize; | const int index = x1 + y1*linesize; | ||||
p->dsp.get_pixels(block, p->src + index, linesize); | |||||
p->pdsp.get_pixels(block, p->src + index, linesize); | |||||
p->fdsp.fdct(block); | p->fdsp.fdct(block); | ||||
p->requantize(block2, block, qp, p->idsp.idct_permutation); | p->requantize(block2, block, qp, p->idsp.idct_permutation); | ||||
p->idsp.idct(block2); | p->idsp.idct(block2); | ||||
@@ -380,9 +380,9 @@ static av_cold int init(AVFilterContext *ctx) | |||||
spp->avctx = avcodec_alloc_context3(NULL); | spp->avctx = avcodec_alloc_context3(NULL); | ||||
if (!spp->avctx) | if (!spp->avctx) | ||||
return AVERROR(ENOMEM); | return AVERROR(ENOMEM); | ||||
avpriv_dsputil_init(&spp->dsp, spp->avctx); | |||||
ff_idctdsp_init(&spp->idsp, spp->avctx); | ff_idctdsp_init(&spp->idsp, spp->avctx); | ||||
ff_fdctdsp_init(&spp->fdsp, spp->avctx); | ff_fdctdsp_init(&spp->fdsp, spp->avctx); | ||||
ff_pixblockdsp_init(&spp->pdsp, spp->avctx); | |||||
spp->store_slice = store_slice_c; | spp->store_slice = store_slice_c; | ||||
switch (spp->mode) { | switch (spp->mode) { | ||||
case MODE_HARD: spp->requantize = hardthresh_c; break; | case MODE_HARD: spp->requantize = hardthresh_c; break; | ||||
@@ -23,7 +23,7 @@ | |||||
#define AVFILTER_SPP_H | #define AVFILTER_SPP_H | ||||
#include "libavcodec/avcodec.h" | #include "libavcodec/avcodec.h" | ||||
#include "libavcodec/dsputil.h" | |||||
#include "libavcodec/pixblockdsp.h" | |||||
#include "libavcodec/idctdsp.h" | #include "libavcodec/idctdsp.h" | ||||
#include "libavcodec/fdctdsp.h" | #include "libavcodec/fdctdsp.h" | ||||
#include "avfilter.h" | #include "avfilter.h" | ||||
@@ -41,9 +41,9 @@ typedef struct { | |||||
uint8_t *src; | uint8_t *src; | ||||
int16_t *temp; | int16_t *temp; | ||||
AVCodecContext *avctx; | AVCodecContext *avctx; | ||||
DSPContext dsp; | |||||
IDCTDSPContext idsp; | IDCTDSPContext idsp; | ||||
FDCTDSPContext fdsp; | FDCTDSPContext fdsp; | ||||
PixblockDSPContext pdsp; | |||||
int8_t *non_b_qp_table; | int8_t *non_b_qp_table; | ||||
int non_b_qp_alloc_size; | int non_b_qp_alloc_size; | ||||
int use_bframe_qp; | int use_bframe_qp; | ||||