| @@ -1546,6 +1546,7 @@ CONFIG_EXTRA=" | |||||
| huffman | huffman | ||||
| huffyuvdsp | huffyuvdsp | ||||
| huffyuvencdsp | huffyuvencdsp | ||||
| idctdsp | |||||
| intrax8 | intrax8 | ||||
| lgplv3 | lgplv3 | ||||
| lpc | lpc | ||||
| @@ -1703,6 +1704,7 @@ threads_if_any="$THREADS_LIST" | |||||
| # subsystems | # subsystems | ||||
| dct_select="rdft" | dct_select="rdft" | ||||
| dsputil_select="idctdsp" | |||||
| error_resilience_select="dsputil" | error_resilience_select="dsputil" | ||||
| intrax8_select="error_resilience" | intrax8_select="error_resilience" | ||||
| mdct_select="fft" | mdct_select="fft" | ||||
| @@ -1710,7 +1712,7 @@ rdft_select="fft" | |||||
| mpeg_er_select="error_resilience" | mpeg_er_select="error_resilience" | ||||
| mpegaudio_select="mpegaudiodsp" | mpegaudio_select="mpegaudiodsp" | ||||
| mpegaudiodsp_select="dct" | mpegaudiodsp_select="dct" | ||||
| mpegvideo_select="blockdsp dsputil hpeldsp videodsp" | |||||
| mpegvideo_select="blockdsp dsputil hpeldsp idctdsp videodsp" | |||||
| mpegvideoenc_select="dsputil mpegvideo qpeldsp" | mpegvideoenc_select="dsputil mpegvideo qpeldsp" | ||||
| # decoders / encoders | # decoders / encoders | ||||
| @@ -1720,16 +1722,16 @@ aac_latm_decoder_select="aac_decoder aac_latm_parser" | |||||
| ac3_decoder_select="ac3_parser ac3dsp bswapdsp mdct" | ac3_decoder_select="ac3_parser ac3dsp bswapdsp mdct" | ||||
| ac3_encoder_select="ac3dsp audiodsp dsputil mdct" | ac3_encoder_select="ac3dsp audiodsp dsputil mdct" | ||||
| ac3_fixed_encoder_select="ac3dsp audiodsp dsputil mdct" | ac3_fixed_encoder_select="ac3dsp audiodsp dsputil mdct" | ||||
| aic_decoder_select="dsputil golomb" | |||||
| aic_decoder_select="golomb idctdsp" | |||||
| alac_encoder_select="lpc" | alac_encoder_select="lpc" | ||||
| als_decoder_select="bswapdsp" | als_decoder_select="bswapdsp" | ||||
| amrnb_decoder_select="lsp" | amrnb_decoder_select="lsp" | ||||
| amrwb_decoder_select="lsp" | amrwb_decoder_select="lsp" | ||||
| amv_decoder_select="sp5x_decoder" | amv_decoder_select="sp5x_decoder" | ||||
| ape_decoder_select="bswapdsp" | ape_decoder_select="bswapdsp" | ||||
| asv1_decoder_select="blockdsp bswapdsp dsputil" | |||||
| asv1_decoder_select="blockdsp bswapdsp idctdsp" | |||||
| asv1_encoder_select="bswapdsp dsputil" | asv1_encoder_select="bswapdsp dsputil" | ||||
| asv2_decoder_select="blockdsp bswapdsp dsputil" | |||||
| asv2_decoder_select="blockdsp bswapdsp idctdsp" | |||||
| asv2_encoder_select="bswapdsp dsputil" | asv2_encoder_select="bswapdsp dsputil" | ||||
| atrac1_decoder_select="mdct sinewin" | atrac1_decoder_select="mdct sinewin" | ||||
| atrac3_decoder_select="mdct" | atrac3_decoder_select="mdct" | ||||
| @@ -1737,23 +1739,23 @@ atrac3p_decoder_select="mdct sinewin" | |||||
| bink_decoder_select="blockdsp hpeldsp" | bink_decoder_select="blockdsp hpeldsp" | ||||
| binkaudio_dct_decoder_select="mdct rdft dct sinewin" | binkaudio_dct_decoder_select="mdct rdft dct sinewin" | ||||
| binkaudio_rdft_decoder_select="mdct rdft sinewin" | binkaudio_rdft_decoder_select="mdct rdft sinewin" | ||||
| cavs_decoder_select="blockdsp dsputil golomb h264chroma qpeldsp videodsp" | |||||
| cavs_decoder_select="blockdsp golomb h264chroma idctdsp qpeldsp videodsp" | |||||
| cllc_decoder_select="bswapdsp" | cllc_decoder_select="bswapdsp" | ||||
| comfortnoise_encoder_select="lpc" | comfortnoise_encoder_select="lpc" | ||||
| cook_decoder_select="audiodsp mdct sinewin" | cook_decoder_select="audiodsp mdct sinewin" | ||||
| cscd_decoder_select="lzo" | cscd_decoder_select="lzo" | ||||
| cscd_decoder_suggest="zlib" | cscd_decoder_suggest="zlib" | ||||
| dca_decoder_select="mdct" | dca_decoder_select="mdct" | ||||
| dnxhd_decoder_select="blockdsp dsputil" | |||||
| dnxhd_encoder_select="aandcttables blockdsp dsputil mpegvideoenc" | |||||
| dvvideo_decoder_select="dsputil" | |||||
| dnxhd_decoder_select="blockdsp idctdsp" | |||||
| dnxhd_encoder_select="aandcttables blockdsp dsputil idctdsp mpegvideoenc" | |||||
| dvvideo_decoder_select="idctdsp" | |||||
| dvvideo_encoder_select="dsputil" | dvvideo_encoder_select="dsputil" | ||||
| dxa_decoder_deps="zlib" | dxa_decoder_deps="zlib" | ||||
| eac3_decoder_select="ac3_decoder" | eac3_decoder_select="ac3_decoder" | ||||
| eac3_encoder_select="ac3_encoder" | eac3_encoder_select="ac3_encoder" | ||||
| eamad_decoder_select="aandcttables blockdsp bswapdsp dsputil mpegvideo" | |||||
| eatgq_decoder_select="aandcttables dsputil" | |||||
| eatqi_decoder_select="aandcttables blockdsp bswapdsp dsputil mpeg1video_decoder" | |||||
| eamad_decoder_select="aandcttables blockdsp bswapdsp idctdsp mpegvideo" | |||||
| eatgq_decoder_select="aandcttables idctdsp" | |||||
| eatqi_decoder_select="aandcttables blockdsp bswapdsp idctdsp mpeg1video_decoder" | |||||
| exr_decoder_deps="zlib" | exr_decoder_deps="zlib" | ||||
| ffv1_decoder_select="golomb rangecoder" | ffv1_decoder_select="golomb rangecoder" | ||||
| ffv1_encoder_select="rangecoder" | ffv1_encoder_select="rangecoder" | ||||
| @@ -1770,7 +1772,7 @@ flv_encoder_select="h263_encoder" | |||||
| fourxm_decoder_select="blockdsp bswapdsp" | fourxm_decoder_select="blockdsp bswapdsp" | ||||
| fraps_decoder_select="bswapdsp huffman" | fraps_decoder_select="bswapdsp huffman" | ||||
| g2m_decoder_deps="zlib" | g2m_decoder_deps="zlib" | ||||
| g2m_decoder_select="blockdsp dsputil" | |||||
| g2m_decoder_select="blockdsp idctdsp" | |||||
| h261_decoder_select="mpeg_er mpegvideo" | h261_decoder_select="mpeg_er mpegvideo" | ||||
| h261_encoder_select="aandcttables mpegvideoenc" | h261_encoder_select="aandcttables mpegvideoenc" | ||||
| h263_decoder_select="error_resilience h263_parser h263dsp mpeg_er mpegvideo qpeldsp" | h263_decoder_select="error_resilience h263_parser h263dsp mpeg_er mpegvideo qpeldsp" | ||||
| @@ -1790,12 +1792,12 @@ jpegls_decoder_select="golomb mjpeg_decoder" | |||||
| jpegls_encoder_select="golomb" | jpegls_encoder_select="golomb" | ||||
| jv_decoder_select="blockdsp" | jv_decoder_select="blockdsp" | ||||
| lagarith_decoder_select="huffyuvdsp" | lagarith_decoder_select="huffyuvdsp" | ||||
| ljpeg_encoder_select="aandcttables dsputil" | |||||
| ljpeg_encoder_select="aandcttables idctdsp" | |||||
| loco_decoder_select="golomb" | loco_decoder_select="golomb" | ||||
| mdec_decoder_select="blockdsp dsputil mpegvideo" | |||||
| mdec_decoder_select="blockdsp idctdsp mpegvideo" | |||||
| metasound_decoder_select="lsp mdct sinewin" | metasound_decoder_select="lsp mdct sinewin" | ||||
| mimic_decoder_select="blockdsp bswapdsp dsputil hpeldsp" | |||||
| mjpeg_decoder_select="blockdsp dsputil hpeldsp" | |||||
| mimic_decoder_select="blockdsp bswapdsp hpeldsp idctdsp" | |||||
| mjpeg_decoder_select="blockdsp hpeldsp idctdsp" | |||||
| mjpeg_encoder_select="aandcttables mpegvideoenc" | mjpeg_encoder_select="aandcttables mpegvideoenc" | ||||
| mjpegb_decoder_select="mjpeg_decoder" | mjpegb_decoder_select="mjpeg_decoder" | ||||
| mlp_decoder_select="mlp_parser" | mlp_decoder_select="mlp_parser" | ||||
| @@ -1829,13 +1831,13 @@ mss2_decoder_select="error_resilience mpeg_er qpeldsp vc1_decoder" | |||||
| mxpeg_decoder_select="mjpeg_decoder" | mxpeg_decoder_select="mjpeg_decoder" | ||||
| nellymoser_decoder_select="mdct sinewin" | nellymoser_decoder_select="mdct sinewin" | ||||
| nellymoser_encoder_select="audio_frame_queue mdct sinewin" | nellymoser_encoder_select="audio_frame_queue mdct sinewin" | ||||
| nuv_decoder_select="dsputil lzo" | |||||
| nuv_decoder_select="idctdsp lzo" | |||||
| on2avc_decoder_select="mdct" | on2avc_decoder_select="mdct" | ||||
| opus_decoder_deps="avresample" | opus_decoder_deps="avresample" | ||||
| png_decoder_deps="zlib" | png_decoder_deps="zlib" | ||||
| png_encoder_deps="zlib" | png_encoder_deps="zlib" | ||||
| png_encoder_select="huffyuvencdsp" | png_encoder_select="huffyuvencdsp" | ||||
| prores_decoder_select="dsputil" | |||||
| prores_decoder_select="idctdsp" | |||||
| prores_encoder_select="dsputil" | prores_encoder_select="dsputil" | ||||
| qcelp_decoder_select="lsp" | qcelp_decoder_select="lsp" | ||||
| qdm2_decoder_select="mdct rdft mpegaudiodsp" | qdm2_decoder_select="mdct rdft mpegaudiodsp" | ||||
| @@ -1888,7 +1890,7 @@ wmav2_encoder_select="mdct sinewin" | |||||
| wmavoice_decoder_select="lsp rdft dct mdct sinewin" | wmavoice_decoder_select="lsp rdft dct mdct sinewin" | ||||
| wmv1_decoder_select="h263_decoder" | wmv1_decoder_select="h263_decoder" | ||||
| wmv1_encoder_select="h263_encoder" | wmv1_encoder_select="h263_encoder" | ||||
| wmv2_decoder_select="blockdsp h263_decoder intrax8 videodsp" | |||||
| wmv2_decoder_select="blockdsp h263_decoder idctdsp intrax8 videodsp" | |||||
| wmv2_encoder_select="h263_encoder" | wmv2_encoder_select="h263_encoder" | ||||
| wmv3_decoder_select="vc1_decoder" | wmv3_decoder_select="vc1_decoder" | ||||
| wmv3image_decoder_select="wmv3_decoder" | wmv3image_decoder_select="wmv3_decoder" | ||||
| @@ -136,9 +136,6 @@ dct_unquantize_mpeg2 | |||||
| dct_unquantize_h263 | dct_unquantize_h263 | ||||
| Used in MPEG-4/H.263 en/decoding. | Used in MPEG-4/H.263 en/decoding. | ||||
| FIXME remaining functions? | |||||
| BTW, most of these functions are in dsputil.c/.h, some are in mpegvideo.c/.h. | |||||
| Alignment: | Alignment: | ||||
| @@ -33,9 +33,8 @@ OBJS-$(CONFIG_BLOCKDSP) += blockdsp.o | |||||
| OBJS-$(CONFIG_BSWAPDSP) += bswapdsp.o | OBJS-$(CONFIG_BSWAPDSP) += bswapdsp.o | ||||
| OBJS-$(CONFIG_CABAC) += cabac.o | OBJS-$(CONFIG_CABAC) += cabac.o | ||||
| OBJS-$(CONFIG_DCT) += dct.o dct32_fixed.o dct32_float.o | OBJS-$(CONFIG_DCT) += dct.o dct32_fixed.o dct32_float.o | ||||
| OBJS-$(CONFIG_DSPUTIL) += dsputil.o | |||||
| OBJS-$(CONFIG_DXVA2) += dxva2.o | OBJS-$(CONFIG_DXVA2) += dxva2.o | ||||
| OBJS-$(CONFIG_DSPUTIL) += dsputil.o faanidct.o \ | |||||
| simple_idct.o jrevdct.o | |||||
| OBJS-$(CONFIG_ENCODERS) += faandct.o jfdctfst.o jfdctint.o | OBJS-$(CONFIG_ENCODERS) += faandct.o jfdctfst.o jfdctint.o | ||||
| OBJS-$(CONFIG_ERROR_RESILIENCE) += error_resilience.o | OBJS-$(CONFIG_ERROR_RESILIENCE) += error_resilience.o | ||||
| FFT-OBJS-$(CONFIG_HARDCODED_TABLES) += cos_tables.o cos_fixed_tables.o | FFT-OBJS-$(CONFIG_HARDCODED_TABLES) += cos_tables.o cos_fixed_tables.o | ||||
| @@ -51,6 +50,8 @@ OBJS-$(CONFIG_HPELDSP) += hpeldsp.o | |||||
| OBJS-$(CONFIG_HUFFMAN) += huffman.o | OBJS-$(CONFIG_HUFFMAN) += huffman.o | ||||
| OBJS-$(CONFIG_HUFFYUVDSP) += huffyuvdsp.o | OBJS-$(CONFIG_HUFFYUVDSP) += huffyuvdsp.o | ||||
| OBJS-$(CONFIG_HUFFYUVENCDSP) += huffyuvencdsp.o | OBJS-$(CONFIG_HUFFYUVENCDSP) += huffyuvencdsp.o | ||||
| OBJS-$(CONFIG_IDCTDSP) += idctdsp.o faanidct.o \ | |||||
| simple_idct.o jrevdct.o | |||||
| OBJS-$(CONFIG_INTRAX8) += intrax8.o intrax8dsp.o | OBJS-$(CONFIG_INTRAX8) += intrax8.o intrax8dsp.o | ||||
| OBJS-$(CONFIG_LIBXVID) += libxvid_rc.o | OBJS-$(CONFIG_LIBXVID) += libxvid_rc.o | ||||
| OBJS-$(CONFIG_LPC) += lpc.o | OBJS-$(CONFIG_LPC) += lpc.o | ||||
| @@ -24,10 +24,10 @@ | |||||
| #include "avcodec.h" | #include "avcodec.h" | ||||
| #include "bytestream.h" | #include "bytestream.h" | ||||
| #include "dsputil.h" | |||||
| #include "internal.h" | #include "internal.h" | ||||
| #include "get_bits.h" | #include "get_bits.h" | ||||
| #include "golomb.h" | #include "golomb.h" | ||||
| #include "idctdsp.h" | |||||
| #include "unary.h" | #include "unary.h" | ||||
| #define AIC_HDR_SIZE 24 | #define AIC_HDR_SIZE 24 | ||||
| @@ -139,7 +139,7 @@ static const uint8_t *aic_scan[NUM_BANDS] = { | |||||
| typedef struct AICContext { | typedef struct AICContext { | ||||
| AVCodecContext *avctx; | AVCodecContext *avctx; | ||||
| AVFrame *frame; | AVFrame *frame; | ||||
| DSPContext dsp; | |||||
| IDCTDSPContext idsp; | |||||
| ScanTable scantable; | ScanTable scantable; | ||||
| int num_x_slices; | int num_x_slices; | ||||
| @@ -336,16 +336,15 @@ static int aic_decode_slice(AICContext *ctx, int mb_x, int mb_y, | |||||
| recombine_block_il(ctx->block, ctx->scantable.permutated, | recombine_block_il(ctx->block, ctx->scantable.permutated, | ||||
| &base_y, &ext_y, blk); | &base_y, &ext_y, blk); | ||||
| unquant_block(ctx->block, ctx->quant); | unquant_block(ctx->block, ctx->quant); | ||||
| ctx->dsp.idct(ctx->block); | |||||
| ctx->idsp.idct(ctx->block); | |||||
| if (!ctx->interlaced) { | if (!ctx->interlaced) { | ||||
| dst = Y + (blk >> 1) * 8 * ystride + (blk & 1) * 8; | dst = Y + (blk >> 1) * 8 * ystride + (blk & 1) * 8; | ||||
| ctx->dsp.put_signed_pixels_clamped(ctx->block, dst, | |||||
| ystride); | |||||
| ctx->idsp.put_signed_pixels_clamped(ctx->block, dst, ystride); | |||||
| } else { | } else { | ||||
| dst = Y + (blk & 1) * 8 + (blk >> 1) * ystride; | dst = Y + (blk & 1) * 8 + (blk >> 1) * ystride; | ||||
| ctx->dsp.put_signed_pixels_clamped(ctx->block, dst, | |||||
| ystride * 2); | |||||
| ctx->idsp.put_signed_pixels_clamped(ctx->block, dst, | |||||
| ystride * 2); | |||||
| } | } | ||||
| } | } | ||||
| Y += 16; | Y += 16; | ||||
| @@ -354,9 +353,9 @@ static int aic_decode_slice(AICContext *ctx, int mb_x, int mb_y, | |||||
| recombine_block(ctx->block, ctx->scantable.permutated, | recombine_block(ctx->block, ctx->scantable.permutated, | ||||
| &base_c, &ext_c); | &base_c, &ext_c); | ||||
| unquant_block(ctx->block, ctx->quant); | unquant_block(ctx->block, ctx->quant); | ||||
| ctx->dsp.idct(ctx->block); | |||||
| ctx->dsp.put_signed_pixels_clamped(ctx->block, C[blk], | |||||
| ctx->frame->linesize[blk + 1]); | |||||
| ctx->idsp.idct(ctx->block); | |||||
| ctx->idsp.put_signed_pixels_clamped(ctx->block, C[blk], | |||||
| ctx->frame->linesize[blk + 1]); | |||||
| C[blk] += 8; | C[blk] += 8; | ||||
| } | } | ||||
| } | } | ||||
| @@ -426,11 +425,11 @@ static av_cold int aic_decode_init(AVCodecContext *avctx) | |||||
| avctx->pix_fmt = AV_PIX_FMT_YUV420P; | avctx->pix_fmt = AV_PIX_FMT_YUV420P; | ||||
| ff_dsputil_init(&ctx->dsp, avctx); | |||||
| ff_idctdsp_init(&ctx->idsp, avctx); | |||||
| for (i = 0; i < 64; i++) | for (i = 0; i < 64; i++) | ||||
| scan[i] = i; | scan[i] = i; | ||||
| ff_init_scantable(ctx->dsp.idct_permutation, &ctx->scantable, scan); | |||||
| ff_init_scantable(ctx->idsp.idct_permutation, &ctx->scantable, scan); | |||||
| ctx->mb_width = FFALIGN(avctx->width, 16) >> 4; | ctx->mb_width = FFALIGN(avctx->width, 16) >> 4; | ||||
| ctx->mb_height = FFALIGN(avctx->height, 16) >> 4; | ctx->mb_height = FFALIGN(avctx->height, 16) >> 4; | ||||
| @@ -6,10 +6,7 @@ OBJS-$(CONFIG_AC3DSP) += arm/ac3dsp_init_arm.o \ | |||||
| arm/ac3dsp_arm.o | arm/ac3dsp_arm.o | ||||
| OBJS-$(CONFIG_AUDIODSP) += arm/audiodsp_init_arm.o | OBJS-$(CONFIG_AUDIODSP) += arm/audiodsp_init_arm.o | ||||
| OBJS-$(CONFIG_BLOCKDSP) += arm/blockdsp_init_arm.o | OBJS-$(CONFIG_BLOCKDSP) += arm/blockdsp_init_arm.o | ||||
| OBJS-$(CONFIG_DSPUTIL) += arm/dsputil_init_arm.o \ | |||||
| arm/dsputil_arm.o \ | |||||
| arm/jrevdct_arm.o \ | |||||
| arm/simple_idct_arm.o | |||||
| OBJS-$(CONFIG_DSPUTIL) += arm/dsputil_init_arm.o | |||||
| OBJS-$(CONFIG_FFT) += arm/fft_init_arm.o \ | OBJS-$(CONFIG_FFT) += arm/fft_init_arm.o \ | ||||
| arm/fft_fixed_init_arm.o | arm/fft_fixed_init_arm.o | ||||
| OBJS-$(CONFIG_H264CHROMA) += arm/h264chroma_init_arm.o | OBJS-$(CONFIG_H264CHROMA) += arm/h264chroma_init_arm.o | ||||
| @@ -18,6 +15,10 @@ OBJS-$(CONFIG_H264PRED) += arm/h264pred_init_arm.o | |||||
| OBJS-$(CONFIG_H264QPEL) += arm/h264qpel_init_arm.o | OBJS-$(CONFIG_H264QPEL) += arm/h264qpel_init_arm.o | ||||
| OBJS-$(CONFIG_HPELDSP) += arm/hpeldsp_init_arm.o \ | OBJS-$(CONFIG_HPELDSP) += arm/hpeldsp_init_arm.o \ | ||||
| arm/hpeldsp_arm.o | arm/hpeldsp_arm.o | ||||
| OBJS-$(CONFIG_IDCTDSP) += arm/idctdsp_init_arm.o \ | |||||
| arm/idctdsp_arm.o \ | |||||
| arm/jrevdct_arm.o \ | |||||
| arm/simple_idct_arm.o | |||||
| OBJS-$(CONFIG_MPEGAUDIODSP) += arm/mpegaudiodsp_init_arm.o | OBJS-$(CONFIG_MPEGAUDIODSP) += arm/mpegaudiodsp_init_arm.o | ||||
| OBJS-$(CONFIG_MPEGVIDEO) += arm/mpegvideo_arm.o | OBJS-$(CONFIG_MPEGVIDEO) += arm/mpegvideo_arm.o | ||||
| OBJS-$(CONFIG_NEON_CLOBBER_TEST) += arm/neontest.o | OBJS-$(CONFIG_NEON_CLOBBER_TEST) += arm/neontest.o | ||||
| @@ -40,7 +41,7 @@ OBJS-$(CONFIG_RV30_DECODER) += arm/rv34dsp_init_arm.o | |||||
| OBJS-$(CONFIG_RV40_DECODER) += arm/rv34dsp_init_arm.o \ | OBJS-$(CONFIG_RV40_DECODER) += arm/rv34dsp_init_arm.o \ | ||||
| arm/rv40dsp_init_arm.o | arm/rv40dsp_init_arm.o | ||||
| ARMV5TE-OBJS-$(CONFIG_DSPUTIL) += arm/dsputil_init_armv5te.o \ | |||||
| ARMV5TE-OBJS-$(CONFIG_IDCTDSP) += arm/idctdsp_init_armv5te.o \ | |||||
| arm/simple_idct_armv5te.o | arm/simple_idct_armv5te.o | ||||
| ARMV5TE-OBJS-$(CONFIG_MPEGVIDEO) += arm/mpegvideo_armv5te.o \ | ARMV5TE-OBJS-$(CONFIG_MPEGVIDEO) += arm/mpegvideo_armv5te.o \ | ||||
| arm/mpegvideo_armv5te_s.o | arm/mpegvideo_armv5te_s.o | ||||
| @@ -51,11 +52,13 @@ ARMV5TE-OBJS-$(CONFIG_MLP_DECODER) += arm/mlpdsp_armv5te.o | |||||
| ARMV6-OBJS-$(CONFIG_AC3DSP) += arm/ac3dsp_armv6.o | ARMV6-OBJS-$(CONFIG_AC3DSP) += arm/ac3dsp_armv6.o | ||||
| ARMV6-OBJS-$(CONFIG_DSPUTIL) += arm/dsputil_init_armv6.o \ | ARMV6-OBJS-$(CONFIG_DSPUTIL) += arm/dsputil_init_armv6.o \ | ||||
| arm/dsputil_armv6.o \ | |||||
| arm/simple_idct_armv6.o | |||||
| arm/dsputil_armv6.o | |||||
| ARMV6-OBJS-$(CONFIG_H264DSP) += arm/h264dsp_armv6.o | ARMV6-OBJS-$(CONFIG_H264DSP) += arm/h264dsp_armv6.o | ||||
| ARMV6-OBJS-$(CONFIG_HPELDSP) += arm/hpeldsp_init_armv6.o \ | ARMV6-OBJS-$(CONFIG_HPELDSP) += arm/hpeldsp_init_armv6.o \ | ||||
| arm/hpeldsp_armv6.o | arm/hpeldsp_armv6.o | ||||
| ARMV6-OBJS-$(CONFIG_IDCTDSP) += arm/idctdsp_init_armv6.o \ | |||||
| arm/idctdsp_armv6.o \ | |||||
| arm/simple_idct_armv6.o | |||||
| ARMV6-OBJS-$(CONFIG_MPEGAUDIODSP) += arm/mpegaudiodsp_fixed_armv6.o | ARMV6-OBJS-$(CONFIG_MPEGAUDIODSP) += arm/mpegaudiodsp_fixed_armv6.o | ||||
| ARMV6-OBJS-$(CONFIG_MLP_DECODER) += arm/mlpdsp_armv6.o | ARMV6-OBJS-$(CONFIG_MLP_DECODER) += arm/mlpdsp_armv6.o | ||||
| @@ -83,9 +86,6 @@ NEON-OBJS-$(CONFIG_AUDIODSP) += arm/audiodsp_init_neon.o \ | |||||
| arm/int_neon.o | arm/int_neon.o | ||||
| NEON-OBJS-$(CONFIG_BLOCKDSP) += arm/blockdsp_init_neon.o \ | NEON-OBJS-$(CONFIG_BLOCKDSP) += arm/blockdsp_init_neon.o \ | ||||
| arm/blockdsp_neon.o | arm/blockdsp_neon.o | ||||
| NEON-OBJS-$(CONFIG_DSPUTIL) += arm/dsputil_init_neon.o \ | |||||
| arm/dsputil_neon.o \ | |||||
| arm/simple_idct_neon.o | |||||
| NEON-OBJS-$(CONFIG_FFT) += arm/fft_neon.o \ | NEON-OBJS-$(CONFIG_FFT) += arm/fft_neon.o \ | ||||
| arm/fft_fixed_neon.o | arm/fft_fixed_neon.o | ||||
| NEON-OBJS-$(CONFIG_H264CHROMA) += arm/h264cmc_neon.o | NEON-OBJS-$(CONFIG_H264CHROMA) += arm/h264cmc_neon.o | ||||
| @@ -96,6 +96,9 @@ NEON-OBJS-$(CONFIG_H264QPEL) += arm/h264qpel_neon.o \ | |||||
| arm/hpeldsp_neon.o | arm/hpeldsp_neon.o | ||||
| NEON-OBJS-$(CONFIG_HPELDSP) += arm/hpeldsp_init_neon.o \ | NEON-OBJS-$(CONFIG_HPELDSP) += arm/hpeldsp_init_neon.o \ | ||||
| arm/hpeldsp_neon.o | arm/hpeldsp_neon.o | ||||
| NEON-OBJS-$(CONFIG_IDCTDSP) += arm/idctdsp_init_neon.o \ | |||||
| arm/idctdsp_neon.o \ | |||||
| arm/simple_idct_neon.o | |||||
| NEON-OBJS-$(CONFIG_MDCT) += arm/mdct_neon.o \ | NEON-OBJS-$(CONFIG_MDCT) += arm/mdct_neon.o \ | ||||
| arm/mdct_fixed_neon.o | arm/mdct_fixed_neon.o | ||||
| NEON-OBJS-$(CONFIG_MPEGVIDEO) += arm/mpegvideo_neon.o | NEON-OBJS-$(CONFIG_MPEGVIDEO) += arm/mpegvideo_neon.o | ||||
| @@ -24,11 +24,7 @@ | |||||
| #include "libavcodec/avcodec.h" | #include "libavcodec/avcodec.h" | ||||
| #include "libavcodec/dsputil.h" | #include "libavcodec/dsputil.h" | ||||
| void ff_dsputil_init_armv5te(DSPContext *c, AVCodecContext *avctx, | |||||
| unsigned high_bit_depth); | |||||
| void ff_dsputil_init_armv6(DSPContext *c, AVCodecContext *avctx, | void ff_dsputil_init_armv6(DSPContext *c, AVCodecContext *avctx, | ||||
| unsigned high_bit_depth); | unsigned high_bit_depth); | ||||
| void ff_dsputil_init_neon(DSPContext *c, AVCodecContext *avctx, | |||||
| unsigned high_bit_depth); | |||||
| #endif /* AVCODEC_ARM_DSPUTIL_ARM_H */ | #endif /* AVCODEC_ARM_DSPUTIL_ARM_H */ | ||||
| @@ -20,33 +20,6 @@ | |||||
| #include "libavutil/arm/asm.S" | #include "libavutil/arm/asm.S" | ||||
| function ff_add_pixels_clamped_armv6, export=1 | |||||
| push {r4-r8,lr} | |||||
| mov r3, #8 | |||||
| 1: | |||||
| ldm r0!, {r4,r5,r12,lr} | |||||
| ldrd r6, r7, [r1] | |||||
| pkhbt r8, r4, r5, lsl #16 | |||||
| pkhtb r5, r5, r4, asr #16 | |||||
| pkhbt r4, r12, lr, lsl #16 | |||||
| pkhtb lr, lr, r12, asr #16 | |||||
| pld [r1, r2] | |||||
| uxtab16 r8, r8, r6 | |||||
| uxtab16 r5, r5, r6, ror #8 | |||||
| uxtab16 r4, r4, r7 | |||||
| uxtab16 lr, lr, r7, ror #8 | |||||
| usat16 r8, #8, r8 | |||||
| usat16 r5, #8, r5 | |||||
| usat16 r4, #8, r4 | |||||
| usat16 lr, #8, lr | |||||
| orr r6, r8, r5, lsl #8 | |||||
| orr r7, r4, lr, lsl #8 | |||||
| subs r3, r3, #1 | |||||
| strd_post r6, r7, r1, r2 | |||||
| bgt 1b | |||||
| pop {r4-r8,pc} | |||||
| endfunc | |||||
| function ff_get_pixels_armv6, export=1 | function ff_get_pixels_armv6, export=1 | ||||
| pld [r1, r2] | pld [r1, r2] | ||||
| push {r4-r8, lr} | push {r4-r8, lr} | ||||
| @@ -28,71 +28,11 @@ | |||||
| #include "libavcodec/dsputil.h" | #include "libavcodec/dsputil.h" | ||||
| #include "dsputil_arm.h" | #include "dsputil_arm.h" | ||||
| void ff_j_rev_dct_arm(int16_t *data); | |||||
| void ff_simple_idct_arm(int16_t *data); | |||||
| /* XXX: local hack */ | |||||
| static void (*ff_put_pixels_clamped)(const int16_t *block, uint8_t *pixels, int line_size); | |||||
| static void (*ff_add_pixels_clamped)(const int16_t *block, uint8_t *pixels, int line_size); | |||||
| void ff_add_pixels_clamped_arm(const int16_t *block, uint8_t *dest, | |||||
| int line_size); | |||||
| /* XXX: those functions should be suppressed ASAP when all IDCTs are | |||||
| * converted */ | |||||
| static void j_rev_dct_arm_put(uint8_t *dest, int line_size, int16_t *block) | |||||
| { | |||||
| ff_j_rev_dct_arm(block); | |||||
| ff_put_pixels_clamped(block, dest, line_size); | |||||
| } | |||||
| static void j_rev_dct_arm_add(uint8_t *dest, int line_size, int16_t *block) | |||||
| { | |||||
| ff_j_rev_dct_arm(block); | |||||
| ff_add_pixels_clamped(block, dest, line_size); | |||||
| } | |||||
| static void simple_idct_arm_put(uint8_t *dest, int line_size, int16_t *block) | |||||
| { | |||||
| ff_simple_idct_arm(block); | |||||
| ff_put_pixels_clamped(block, dest, line_size); | |||||
| } | |||||
| static void simple_idct_arm_add(uint8_t *dest, int line_size, int16_t *block) | |||||
| { | |||||
| ff_simple_idct_arm(block); | |||||
| ff_add_pixels_clamped(block, dest, line_size); | |||||
| } | |||||
| av_cold void ff_dsputil_init_arm(DSPContext *c, AVCodecContext *avctx, | av_cold void ff_dsputil_init_arm(DSPContext *c, AVCodecContext *avctx, | ||||
| unsigned high_bit_depth) | unsigned high_bit_depth) | ||||
| { | { | ||||
| int cpu_flags = av_get_cpu_flags(); | int cpu_flags = av_get_cpu_flags(); | ||||
| ff_put_pixels_clamped = c->put_pixels_clamped; | |||||
| ff_add_pixels_clamped = c->add_pixels_clamped; | |||||
| if (!high_bit_depth) { | |||||
| if (avctx->idct_algo == FF_IDCT_AUTO || | |||||
| avctx->idct_algo == FF_IDCT_ARM) { | |||||
| c->idct_put = j_rev_dct_arm_put; | |||||
| c->idct_add = j_rev_dct_arm_add; | |||||
| c->idct = ff_j_rev_dct_arm; | |||||
| c->idct_permutation_type = FF_LIBMPEG2_IDCT_PERM; | |||||
| } else if (avctx->idct_algo == FF_IDCT_SIMPLEARM) { | |||||
| c->idct_put = simple_idct_arm_put; | |||||
| c->idct_add = simple_idct_arm_add; | |||||
| c->idct = ff_simple_idct_arm; | |||||
| c->idct_permutation_type = FF_NO_IDCT_PERM; | |||||
| } | |||||
| } | |||||
| c->add_pixels_clamped = ff_add_pixels_clamped_arm; | |||||
| if (have_armv5te(cpu_flags)) | |||||
| ff_dsputil_init_armv5te(c, avctx, high_bit_depth); | |||||
| if (have_armv6(cpu_flags)) | if (have_armv6(cpu_flags)) | ||||
| ff_dsputil_init_armv6(c, avctx, high_bit_depth); | ff_dsputil_init_armv6(c, avctx, high_bit_depth); | ||||
| if (have_neon(cpu_flags)) | |||||
| ff_dsputil_init_neon(c, avctx, high_bit_depth); | |||||
| } | } | ||||
| @@ -26,13 +26,6 @@ | |||||
| #include "libavcodec/mpegvideo.h" | #include "libavcodec/mpegvideo.h" | ||||
| #include "dsputil_arm.h" | #include "dsputil_arm.h" | ||||
| void ff_simple_idct_armv6(int16_t *data); | |||||
| void ff_simple_idct_put_armv6(uint8_t *dest, int line_size, int16_t *data); | |||||
| void ff_simple_idct_add_armv6(uint8_t *dest, int line_size, int16_t *data); | |||||
| void ff_add_pixels_clamped_armv6(const int16_t *block, uint8_t *pixels, | |||||
| int line_size); | |||||
| void ff_get_pixels_armv6(int16_t *block, const uint8_t *pixels, int stride); | void ff_get_pixels_armv6(int16_t *block, const uint8_t *pixels, int stride); | ||||
| void ff_diff_pixels_armv6(int16_t *block, const uint8_t *s1, | void ff_diff_pixels_armv6(int16_t *block, const uint8_t *s1, | ||||
| const uint8_t *s2, int stride); | const uint8_t *s2, int stride); | ||||
| @@ -56,17 +49,6 @@ int ff_pix_sum_armv6(uint8_t *pix, int line_size); | |||||
| av_cold void ff_dsputil_init_armv6(DSPContext *c, AVCodecContext *avctx, | av_cold void ff_dsputil_init_armv6(DSPContext *c, AVCodecContext *avctx, | ||||
| unsigned high_bit_depth) | unsigned high_bit_depth) | ||||
| { | { | ||||
| if (!high_bit_depth) { | |||||
| if (avctx->idct_algo == FF_IDCT_AUTO || | |||||
| avctx->idct_algo == FF_IDCT_SIMPLEARMV6) { | |||||
| c->idct_put = ff_simple_idct_put_armv6; | |||||
| c->idct_add = ff_simple_idct_add_armv6; | |||||
| c->idct = ff_simple_idct_armv6; | |||||
| c->idct_permutation_type = FF_LIBMPEG2_IDCT_PERM; | |||||
| } | |||||
| } | |||||
| c->add_pixels_clamped = ff_add_pixels_clamped_armv6; | |||||
| if (!high_bit_depth) | if (!high_bit_depth) | ||||
| c->get_pixels = ff_get_pixels_armv6; | c->get_pixels = ff_get_pixels_armv6; | ||||
| c->diff_pixels = ff_diff_pixels_armv6; | c->diff_pixels = ff_diff_pixels_armv6; | ||||
| @@ -1,5 +1,5 @@ | |||||
| @ | @ | ||||
| @ ARMv4 optimized DSP utils | |||||
| @ ARMv4-optimized IDCT functions | |||||
| @ Copyright (c) 2004 AGAWA Koji <i (AT) atty (DOT) jp> | @ Copyright (c) 2004 AGAWA Koji <i (AT) atty (DOT) jp> | ||||
| @ | @ | ||||
| @ This file is part of Libav. | @ This file is part of Libav. | ||||
| @@ -0,0 +1,34 @@ | |||||
| /* | |||||
| * Copyright (c) 2009 Mans Rullgard <mans@mansr.com> | |||||
| * | |||||
| * This file is part of Libav. | |||||
| * | |||||
| * Libav is free software; you can redistribute it and/or | |||||
| * modify it under the terms of the GNU Lesser General Public | |||||
| * License as published by the Free Software Foundation; either | |||||
| * version 2.1 of the License, or (at your option) any later version. | |||||
| * | |||||
| * Libav is distributed in the hope that it will be useful, | |||||
| * but WITHOUT ANY WARRANTY; without even the implied warranty of | |||||
| * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU | |||||
| * Lesser General Public License for more details. | |||||
| * | |||||
| * You should have received a copy of the GNU Lesser General Public | |||||
| * License along with Libav; if not, write to the Free Software | |||||
| * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA | |||||
| */ | |||||
| #ifndef AVCODEC_ARM_IDCTDSP_ARM_H | |||||
| #define AVCODEC_ARM_IDCTDSP_ARM_H | |||||
| #include "libavcodec/avcodec.h" | |||||
| #include "libavcodec/idctdsp.h" | |||||
| void ff_idctdsp_init_armv5te(IDCTDSPContext *c, AVCodecContext *avctx, | |||||
| unsigned high_bit_depth); | |||||
| void ff_idctdsp_init_armv6(IDCTDSPContext *c, AVCodecContext *avctx, | |||||
| unsigned high_bit_depth); | |||||
| void ff_idctdsp_init_neon(IDCTDSPContext *c, AVCodecContext *avctx, | |||||
| unsigned high_bit_depth); | |||||
| #endif /* AVCODEC_ARM_IDCTDSP_ARM_H */ | |||||
| @@ -0,0 +1,48 @@ | |||||
| /* | |||||
| * Copyright (c) 2009 Mans Rullgard <mans@mansr.com> | |||||
| * | |||||
| * This file is part of Libav. | |||||
| * | |||||
| * Libav is free software; you can redistribute it and/or | |||||
| * modify it under the terms of the GNU Lesser General Public | |||||
| * License as published by the Free Software Foundation; either | |||||
| * version 2.1 of the License, or (at your option) any later version. | |||||
| * | |||||
| * Libav is distributed in the hope that it will be useful, | |||||
| * but WITHOUT ANY WARRANTY; without even the implied warranty of | |||||
| * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU | |||||
| * Lesser General Public License for more details. | |||||
| * | |||||
| * You should have received a copy of the GNU Lesser General Public | |||||
| * License along with Libav; if not, write to the Free Software | |||||
| * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA | |||||
| */ | |||||
| #include "libavutil/arm/asm.S" | |||||
| function ff_add_pixels_clamped_armv6, export=1 | |||||
| push {r4-r8,lr} | |||||
| mov r3, #8 | |||||
| 1: | |||||
| ldm r0!, {r4,r5,r12,lr} | |||||
| ldrd r6, r7, [r1] | |||||
| pkhbt r8, r4, r5, lsl #16 | |||||
| pkhtb r5, r5, r4, asr #16 | |||||
| pkhbt r4, r12, lr, lsl #16 | |||||
| pkhtb lr, lr, r12, asr #16 | |||||
| pld [r1, r2] | |||||
| uxtab16 r8, r8, r6 | |||||
| uxtab16 r5, r5, r6, ror #8 | |||||
| uxtab16 r4, r4, r7 | |||||
| uxtab16 lr, lr, r7, ror #8 | |||||
| usat16 r8, #8, r8 | |||||
| usat16 r5, #8, r5 | |||||
| usat16 r4, #8, r4 | |||||
| usat16 lr, #8, lr | |||||
| orr r6, r8, r5, lsl #8 | |||||
| orr r7, r4, lr, lsl #8 | |||||
| subs r3, r3, #1 | |||||
| strd_post r6, r7, r1, r2 | |||||
| bgt 1b | |||||
| pop {r4-r8,pc} | |||||
| endfunc | |||||
| @@ -0,0 +1,98 @@ | |||||
| /* | |||||
| * ARM-optimized IDCT functions | |||||
| * Copyright (c) 2001 Lionel Ulmer | |||||
| * | |||||
| * This file is part of Libav. | |||||
| * | |||||
| * Libav is free software; you can redistribute it and/or | |||||
| * modify it under the terms of the GNU Lesser General Public | |||||
| * License as published by the Free Software Foundation; either | |||||
| * version 2.1 of the License, or (at your option) any later version. | |||||
| * | |||||
| * Libav is distributed in the hope that it will be useful, | |||||
| * but WITHOUT ANY WARRANTY; without even the implied warranty of | |||||
| * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU | |||||
| * Lesser General Public License for more details. | |||||
| * | |||||
| * You should have received a copy of the GNU Lesser General Public | |||||
| * License along with Libav; if not, write to the Free Software | |||||
| * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA | |||||
| */ | |||||
| #include <stdint.h> | |||||
| #include "libavutil/attributes.h" | |||||
| #include "libavutil/cpu.h" | |||||
| #include "libavutil/arm/cpu.h" | |||||
| #include "libavcodec/avcodec.h" | |||||
| #include "libavcodec/idctdsp.h" | |||||
| #include "idctdsp_arm.h" | |||||
| void ff_j_rev_dct_arm(int16_t *data); | |||||
| void ff_simple_idct_arm(int16_t *data); | |||||
| /* XXX: local hack */ | |||||
| static void (*ff_put_pixels_clamped)(const int16_t *block, uint8_t *pixels, int line_size); | |||||
| static void (*ff_add_pixels_clamped)(const int16_t *block, uint8_t *pixels, int line_size); | |||||
| void ff_add_pixels_clamped_arm(const int16_t *block, uint8_t *dest, | |||||
| int line_size); | |||||
| /* XXX: those functions should be suppressed ASAP when all IDCTs are | |||||
| * converted */ | |||||
| static void j_rev_dct_arm_put(uint8_t *dest, int line_size, int16_t *block) | |||||
| { | |||||
| ff_j_rev_dct_arm(block); | |||||
| ff_put_pixels_clamped(block, dest, line_size); | |||||
| } | |||||
| static void j_rev_dct_arm_add(uint8_t *dest, int line_size, int16_t *block) | |||||
| { | |||||
| ff_j_rev_dct_arm(block); | |||||
| ff_add_pixels_clamped(block, dest, line_size); | |||||
| } | |||||
| static void simple_idct_arm_put(uint8_t *dest, int line_size, int16_t *block) | |||||
| { | |||||
| ff_simple_idct_arm(block); | |||||
| ff_put_pixels_clamped(block, dest, line_size); | |||||
| } | |||||
| static void simple_idct_arm_add(uint8_t *dest, int line_size, int16_t *block) | |||||
| { | |||||
| ff_simple_idct_arm(block); | |||||
| ff_add_pixels_clamped(block, dest, line_size); | |||||
| } | |||||
| av_cold void ff_idctdsp_init_arm(IDCTDSPContext *c, AVCodecContext *avctx, | |||||
| unsigned high_bit_depth) | |||||
| { | |||||
| int cpu_flags = av_get_cpu_flags(); | |||||
| ff_put_pixels_clamped = c->put_pixels_clamped; | |||||
| ff_add_pixels_clamped = c->add_pixels_clamped; | |||||
| if (!high_bit_depth) { | |||||
| if (avctx->idct_algo == FF_IDCT_AUTO || | |||||
| avctx->idct_algo == FF_IDCT_ARM) { | |||||
| c->idct_put = j_rev_dct_arm_put; | |||||
| c->idct_add = j_rev_dct_arm_add; | |||||
| c->idct = ff_j_rev_dct_arm; | |||||
| c->idct_permutation_type = FF_LIBMPEG2_IDCT_PERM; | |||||
| } else if (avctx->idct_algo == FF_IDCT_SIMPLEARM) { | |||||
| c->idct_put = simple_idct_arm_put; | |||||
| c->idct_add = simple_idct_arm_add; | |||||
| c->idct = ff_simple_idct_arm; | |||||
| c->idct_permutation_type = FF_NO_IDCT_PERM; | |||||
| } | |||||
| } | |||||
| c->add_pixels_clamped = ff_add_pixels_clamped_arm; | |||||
| if (have_armv5te(cpu_flags)) | |||||
| ff_idctdsp_init_armv5te(c, avctx, high_bit_depth); | |||||
| if (have_armv6(cpu_flags)) | |||||
| ff_idctdsp_init_armv6(c, avctx, high_bit_depth); | |||||
| if (have_neon(cpu_flags)) | |||||
| ff_idctdsp_init_neon(c, avctx, high_bit_depth); | |||||
| } | |||||
| @@ -22,14 +22,14 @@ | |||||
| #include "libavutil/attributes.h" | #include "libavutil/attributes.h" | ||||
| #include "libavcodec/avcodec.h" | #include "libavcodec/avcodec.h" | ||||
| #include "libavcodec/dsputil.h" | |||||
| #include "dsputil_arm.h" | |||||
| #include "libavcodec/idctdsp.h" | |||||
| #include "idctdsp_arm.h" | |||||
| void ff_simple_idct_armv5te(int16_t *data); | void ff_simple_idct_armv5te(int16_t *data); | ||||
| void ff_simple_idct_put_armv5te(uint8_t *dest, int line_size, int16_t *data); | void ff_simple_idct_put_armv5te(uint8_t *dest, int line_size, int16_t *data); | ||||
| void ff_simple_idct_add_armv5te(uint8_t *dest, int line_size, int16_t *data); | void ff_simple_idct_add_armv5te(uint8_t *dest, int line_size, int16_t *data); | ||||
| av_cold void ff_dsputil_init_armv5te(DSPContext *c, AVCodecContext *avctx, | |||||
| av_cold void ff_idctdsp_init_armv5te(IDCTDSPContext *c, AVCodecContext *avctx, | |||||
| unsigned high_bit_depth) | unsigned high_bit_depth) | ||||
| { | { | ||||
| if (!high_bit_depth && | if (!high_bit_depth && | ||||
| @@ -0,0 +1,48 @@ | |||||
| /* | |||||
| * Copyright (c) 2009 Mans Rullgard <mans@mansr.com> | |||||
| * | |||||
| * This file is part of Libav. | |||||
| * | |||||
| * Libav is free software; you can redistribute it and/or | |||||
| * modify it under the terms of the GNU Lesser General Public | |||||
| * License as published by the Free Software Foundation; either | |||||
| * version 2.1 of the License, or (at your option) any later version. | |||||
| * | |||||
| * Libav is distributed in the hope that it will be useful, | |||||
| * but WITHOUT ANY WARRANTY; without even the implied warranty of | |||||
| * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU | |||||
| * Lesser General Public License for more details. | |||||
| * | |||||
| * You should have received a copy of the GNU Lesser General Public | |||||
| * License along with Libav; if not, write to the Free Software | |||||
| * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA | |||||
| */ | |||||
| #include <stdint.h> | |||||
| #include "libavutil/attributes.h" | |||||
| #include "libavcodec/avcodec.h" | |||||
| #include "libavcodec/idctdsp.h" | |||||
| #include "idctdsp_arm.h" | |||||
| void ff_simple_idct_armv6(int16_t *data); | |||||
| void ff_simple_idct_put_armv6(uint8_t *dest, int line_size, int16_t *data); | |||||
| void ff_simple_idct_add_armv6(uint8_t *dest, int line_size, int16_t *data); | |||||
| void ff_add_pixels_clamped_armv6(const int16_t *block, uint8_t *pixels, | |||||
| int line_size); | |||||
| av_cold void ff_idctdsp_init_armv6(IDCTDSPContext *c, AVCodecContext *avctx, | |||||
| unsigned high_bit_depth) | |||||
| { | |||||
| if (!high_bit_depth) { | |||||
| if (avctx->idct_algo == FF_IDCT_AUTO || | |||||
| avctx->idct_algo == FF_IDCT_SIMPLEARMV6) { | |||||
| c->idct_put = ff_simple_idct_put_armv6; | |||||
| c->idct_add = ff_simple_idct_add_armv6; | |||||
| c->idct = ff_simple_idct_armv6; | |||||
| c->idct_permutation_type = FF_LIBMPEG2_IDCT_PERM; | |||||
| } | |||||
| } | |||||
| c->add_pixels_clamped = ff_add_pixels_clamped_armv6; | |||||
| } | |||||
| @@ -1,5 +1,5 @@ | |||||
| /* | /* | ||||
| * ARM NEON optimised DSP functions | |||||
| * ARM-NEON-optimized IDCT functions | |||||
| * Copyright (c) 2008 Mans Rullgard <mans@mansr.com> | * Copyright (c) 2008 Mans Rullgard <mans@mansr.com> | ||||
| * | * | ||||
| * This file is part of Libav. | * This file is part of Libav. | ||||
| @@ -23,8 +23,8 @@ | |||||
| #include "libavutil/attributes.h" | #include "libavutil/attributes.h" | ||||
| #include "libavcodec/avcodec.h" | #include "libavcodec/avcodec.h" | ||||
| #include "libavcodec/dsputil.h" | |||||
| #include "dsputil_arm.h" | |||||
| #include "libavcodec/idctdsp.h" | |||||
| #include "idctdsp_arm.h" | |||||
| void ff_simple_idct_neon(int16_t *data); | void ff_simple_idct_neon(int16_t *data); | ||||
| void ff_simple_idct_put_neon(uint8_t *dest, int line_size, int16_t *data); | void ff_simple_idct_put_neon(uint8_t *dest, int line_size, int16_t *data); | ||||
| @@ -34,7 +34,7 @@ void ff_add_pixels_clamped_neon(const int16_t *, uint8_t *, int); | |||||
| void ff_put_pixels_clamped_neon(const int16_t *, uint8_t *, int); | void ff_put_pixels_clamped_neon(const int16_t *, uint8_t *, int); | ||||
| void ff_put_signed_pixels_clamped_neon(const int16_t *, uint8_t *, int); | void ff_put_signed_pixels_clamped_neon(const int16_t *, uint8_t *, int); | ||||
| av_cold void ff_dsputil_init_neon(DSPContext *c, AVCodecContext *avctx, | |||||
| av_cold void ff_idctdsp_init_neon(IDCTDSPContext *c, AVCodecContext *avctx, | |||||
| unsigned high_bit_depth) | unsigned high_bit_depth) | ||||
| { | { | ||||
| if (!high_bit_depth) { | if (!high_bit_depth) { | ||||
| @@ -1,5 +1,5 @@ | |||||
| /* | /* | ||||
| * ARM NEON optimised DSP functions | |||||
| * ARM-NEON-optimized IDCT functions | |||||
| * Copyright (c) 2008 Mans Rullgard <mans@mansr.com> | * Copyright (c) 2008 Mans Rullgard <mans@mansr.com> | ||||
| * | * | ||||
| * This file is part of Libav. | * This file is part of Libav. | ||||
| @@ -84,7 +84,6 @@ av_cold void ff_asv_common_init(AVCodecContext *avctx) { | |||||
| ASV1Context * const a = avctx->priv_data; | ASV1Context * const a = avctx->priv_data; | ||||
| ff_bswapdsp_init(&a->bbdsp); | ff_bswapdsp_init(&a->bbdsp); | ||||
| ff_dsputil_init(&a->dsp, avctx); | |||||
| a->mb_width = (avctx->width + 15) / 16; | a->mb_width = (avctx->width + 15) / 16; | ||||
| a->mb_height = (avctx->height + 15) / 16; | a->mb_height = (avctx->height + 15) / 16; | ||||
| @@ -34,6 +34,7 @@ | |||||
| #include "blockdsp.h" | #include "blockdsp.h" | ||||
| #include "bswapdsp.h" | #include "bswapdsp.h" | ||||
| #include "dsputil.h" | #include "dsputil.h" | ||||
| #include "idctdsp.h" | |||||
| #include "get_bits.h" | #include "get_bits.h" | ||||
| #include "put_bits.h" | #include "put_bits.h" | ||||
| @@ -42,6 +43,7 @@ typedef struct ASV1Context{ | |||||
| BlockDSPContext bdsp; | BlockDSPContext bdsp; | ||||
| BswapDSPContext bbdsp; | BswapDSPContext bbdsp; | ||||
| DSPContext dsp; | DSPContext dsp; | ||||
| IDCTDSPContext idsp; | |||||
| PutBitContext pb; | PutBitContext pb; | ||||
| GetBitContext gb; | GetBitContext gb; | ||||
| ScanTable scantable; | ScanTable scantable; | ||||
| @@ -30,6 +30,7 @@ | |||||
| #include "avcodec.h" | #include "avcodec.h" | ||||
| #include "blockdsp.h" | #include "blockdsp.h" | ||||
| #include "put_bits.h" | #include "put_bits.h" | ||||
| #include "idctdsp.h" | |||||
| #include "internal.h" | #include "internal.h" | ||||
| #include "mathops.h" | #include "mathops.h" | ||||
| #include "mpeg12data.h" | #include "mpeg12data.h" | ||||
| @@ -190,14 +191,14 @@ static inline void idct_put(ASV1Context *a, AVFrame *frame, int mb_x, int mb_y) | |||||
| uint8_t *dest_cb = frame->data[1] + (mb_y * 8 * frame->linesize[1]) + mb_x * 8; | uint8_t *dest_cb = frame->data[1] + (mb_y * 8 * frame->linesize[1]) + mb_x * 8; | ||||
| uint8_t *dest_cr = frame->data[2] + (mb_y * 8 * frame->linesize[2]) + mb_x * 8; | uint8_t *dest_cr = frame->data[2] + (mb_y * 8 * frame->linesize[2]) + mb_x * 8; | ||||
| a->dsp.idct_put(dest_y , linesize, block[0]); | |||||
| a->dsp.idct_put(dest_y + 8, linesize, block[1]); | |||||
| a->dsp.idct_put(dest_y + 8*linesize , linesize, block[2]); | |||||
| a->dsp.idct_put(dest_y + 8*linesize + 8, linesize, block[3]); | |||||
| a->idsp.idct_put(dest_y, linesize, block[0]); | |||||
| a->idsp.idct_put(dest_y + 8, linesize, block[1]); | |||||
| a->idsp.idct_put(dest_y + 8 * linesize, linesize, block[2]); | |||||
| a->idsp.idct_put(dest_y + 8 * linesize + 8, linesize, block[3]); | |||||
| if (!(a->avctx->flags&CODEC_FLAG_GRAY)) { | if (!(a->avctx->flags&CODEC_FLAG_GRAY)) { | ||||
| a->dsp.idct_put(dest_cb, frame->linesize[1], block[4]); | |||||
| a->dsp.idct_put(dest_cr, frame->linesize[2], block[5]); | |||||
| a->idsp.idct_put(dest_cb, frame->linesize[1], block[4]); | |||||
| a->idsp.idct_put(dest_cr, frame->linesize[2], block[5]); | |||||
| } | } | ||||
| } | } | ||||
| @@ -283,8 +284,9 @@ static av_cold int decode_init(AVCodecContext *avctx) | |||||
| ff_asv_common_init(avctx); | ff_asv_common_init(avctx); | ||||
| ff_blockdsp_init(&a->bdsp, avctx); | ff_blockdsp_init(&a->bdsp, avctx); | ||||
| ff_idctdsp_init(&a->idsp, avctx); | |||||
| init_vlcs(a); | init_vlcs(a); | ||||
| ff_init_scantable(a->dsp.idct_permutation, &a->scantable, ff_asv_scantab); | |||||
| ff_init_scantable(a->idsp.idct_permutation, &a->scantable, ff_asv_scantab); | |||||
| avctx->pix_fmt = AV_PIX_FMT_YUV420P; | avctx->pix_fmt = AV_PIX_FMT_YUV420P; | ||||
| a->inv_qscale = avctx->extradata[0]; | a->inv_qscale = avctx->extradata[0]; | ||||
| @@ -247,6 +247,7 @@ static av_cold int encode_init(AVCodecContext *avctx){ | |||||
| avctx->coded_frame->key_frame = 1; | avctx->coded_frame->key_frame = 1; | ||||
| ff_asv_common_init(avctx); | ff_asv_common_init(avctx); | ||||
| ff_dsputil_init(&a->dsp, avctx); | |||||
| if(avctx->global_quality == 0) avctx->global_quality= 4*FF_QUALITY_SCALE; | if(avctx->global_quality == 0) avctx->global_quality= 4*FF_QUALITY_SCALE; | ||||
| @@ -29,6 +29,7 @@ | |||||
| #include "get_bits.h" | #include "get_bits.h" | ||||
| #include "golomb.h" | #include "golomb.h" | ||||
| #include "h264chroma.h" | #include "h264chroma.h" | ||||
| #include "idctdsp.h" | |||||
| #include "mathops.h" | #include "mathops.h" | ||||
| #include "qpeldsp.h" | #include "qpeldsp.h" | ||||
| #include "cavs.h" | #include "cavs.h" | ||||
| @@ -760,13 +761,13 @@ av_cold int ff_cavs_init(AVCodecContext *avctx) | |||||
| AVSContext *h = avctx->priv_data; | AVSContext *h = avctx->priv_data; | ||||
| ff_blockdsp_init(&h->bdsp, avctx); | ff_blockdsp_init(&h->bdsp, avctx); | ||||
| ff_dsputil_init(&h->dsp, avctx); | |||||
| ff_h264chroma_init(&h->h264chroma, 8); | ff_h264chroma_init(&h->h264chroma, 8); | ||||
| ff_idctdsp_init(&h->idsp, avctx); | |||||
| ff_videodsp_init(&h->vdsp, 8); | ff_videodsp_init(&h->vdsp, 8); | ||||
| ff_cavsdsp_init(&h->cdsp, avctx); | ff_cavsdsp_init(&h->cdsp, avctx); | ||||
| ff_init_scantable_permutation(h->dsp.idct_permutation, | |||||
| ff_init_scantable_permutation(h->idsp.idct_permutation, | |||||
| h->cdsp.idct_perm); | h->cdsp.idct_perm); | ||||
| ff_init_scantable(h->dsp.idct_permutation, &h->scantable, ff_zigzag_direct); | |||||
| ff_init_scantable(h->idsp.idct_permutation, &h->scantable, ff_zigzag_direct); | |||||
| h->avctx = avctx; | h->avctx = avctx; | ||||
| avctx->pix_fmt = AV_PIX_FMT_YUV420P; | avctx->pix_fmt = AV_PIX_FMT_YUV420P; | ||||
| @@ -24,8 +24,8 @@ | |||||
| #include "cavsdsp.h" | #include "cavsdsp.h" | ||||
| #include "blockdsp.h" | #include "blockdsp.h" | ||||
| #include "dsputil.h" | |||||
| #include "h264chroma.h" | #include "h264chroma.h" | ||||
| #include "idctdsp.h" | |||||
| #include "get_bits.h" | #include "get_bits.h" | ||||
| #include "videodsp.h" | #include "videodsp.h" | ||||
| @@ -162,9 +162,9 @@ typedef struct AVSFrame { | |||||
| typedef struct AVSContext { | typedef struct AVSContext { | ||||
| AVCodecContext *avctx; | AVCodecContext *avctx; | ||||
| DSPContext dsp; | |||||
| BlockDSPContext bdsp; | BlockDSPContext bdsp; | ||||
| H264ChromaContext h264chroma; | H264ChromaContext h264chroma; | ||||
| IDCTDSPContext idsp; | |||||
| VideoDSPContext vdsp; | VideoDSPContext vdsp; | ||||
| CAVSDSPContext cdsp; | CAVSDSPContext cdsp; | ||||
| GetBitContext gb; | GetBitContext gb; | ||||
| @@ -24,7 +24,7 @@ | |||||
| #include <stdio.h> | #include <stdio.h> | ||||
| #include "dsputil.h" | |||||
| #include "idctdsp.h" | |||||
| #include "mathops.h" | #include "mathops.h" | ||||
| #include "cavsdsp.h" | #include "cavsdsp.h" | ||||
| #include "libavutil/common.h" | #include "libavutil/common.h" | ||||
| @@ -28,7 +28,7 @@ | |||||
| #include "blockdsp.h" | #include "blockdsp.h" | ||||
| #include "get_bits.h" | #include "get_bits.h" | ||||
| #include "dnxhddata.h" | #include "dnxhddata.h" | ||||
| #include "dsputil.h" | |||||
| #include "idctdsp.h" | |||||
| #include "internal.h" | #include "internal.h" | ||||
| typedef struct DNXHDContext { | typedef struct DNXHDContext { | ||||
| @@ -42,7 +42,7 @@ typedef struct DNXHDContext { | |||||
| int cur_field; ///< current interlaced field | int cur_field; ///< current interlaced field | ||||
| VLC ac_vlc, dc_vlc, run_vlc; | VLC ac_vlc, dc_vlc, run_vlc; | ||||
| int last_dc[3]; | int last_dc[3]; | ||||
| DSPContext dsp; | |||||
| IDCTDSPContext idsp; | |||||
| DECLARE_ALIGNED(16, int16_t, blocks)[12][64]; | DECLARE_ALIGNED(16, int16_t, blocks)[12][64]; | ||||
| ScanTable scantable; | ScanTable scantable; | ||||
| const CIDEntry *cid_table; | const CIDEntry *cid_table; | ||||
| @@ -95,7 +95,7 @@ static int dnxhd_init_vlc(DNXHDContext *ctx, int cid) | |||||
| ctx->cid_table->run_bits, 1, 1, | ctx->cid_table->run_bits, 1, 1, | ||||
| ctx->cid_table->run_codes, 2, 2, 0); | ctx->cid_table->run_codes, 2, 2, 0); | ||||
| ff_init_scantable(ctx->dsp.idct_permutation, &ctx->scantable, | |||||
| ff_init_scantable(ctx->idsp.idct_permutation, &ctx->scantable, | |||||
| ff_zigzag_direct); | ff_zigzag_direct); | ||||
| ctx->cid = cid; | ctx->cid = cid; | ||||
| } | } | ||||
| @@ -136,7 +136,7 @@ static int dnxhd_decode_header(DNXHDContext *ctx, AVFrame *frame, | |||||
| ctx->avctx->bits_per_raw_sample = 10; | ctx->avctx->bits_per_raw_sample = 10; | ||||
| if (ctx->bit_depth != 10) { | if (ctx->bit_depth != 10) { | ||||
| ff_blockdsp_init(&ctx->bdsp, ctx->avctx); | ff_blockdsp_init(&ctx->bdsp, ctx->avctx); | ||||
| ff_dsputil_init(&ctx->dsp, ctx->avctx); | |||||
| ff_idctdsp_init(&ctx->idsp, ctx->avctx); | |||||
| ctx->bit_depth = 10; | ctx->bit_depth = 10; | ||||
| ctx->decode_dct_block = dnxhd_decode_dct_block_10_444; | ctx->decode_dct_block = dnxhd_decode_dct_block_10_444; | ||||
| } | } | ||||
| @@ -146,7 +146,7 @@ static int dnxhd_decode_header(DNXHDContext *ctx, AVFrame *frame, | |||||
| ctx->avctx->bits_per_raw_sample = 10; | ctx->avctx->bits_per_raw_sample = 10; | ||||
| if (ctx->bit_depth != 10) { | if (ctx->bit_depth != 10) { | ||||
| ff_blockdsp_init(&ctx->bdsp, ctx->avctx); | ff_blockdsp_init(&ctx->bdsp, ctx->avctx); | ||||
| ff_dsputil_init(&ctx->dsp, ctx->avctx); | |||||
| ff_idctdsp_init(&ctx->idsp, ctx->avctx); | |||||
| ctx->bit_depth = 10; | ctx->bit_depth = 10; | ||||
| ctx->decode_dct_block = dnxhd_decode_dct_block_10; | ctx->decode_dct_block = dnxhd_decode_dct_block_10; | ||||
| } | } | ||||
| @@ -155,7 +155,7 @@ static int dnxhd_decode_header(DNXHDContext *ctx, AVFrame *frame, | |||||
| ctx->avctx->bits_per_raw_sample = 8; | ctx->avctx->bits_per_raw_sample = 8; | ||||
| if (ctx->bit_depth != 8) { | if (ctx->bit_depth != 8) { | ||||
| ff_blockdsp_init(&ctx->bdsp, ctx->avctx); | ff_blockdsp_init(&ctx->bdsp, ctx->avctx); | ||||
| ff_dsputil_init(&ctx->dsp, ctx->avctx); | |||||
| ff_idctdsp_init(&ctx->idsp, ctx->avctx); | |||||
| ctx->bit_depth = 8; | ctx->bit_depth = 8; | ||||
| ctx->decode_dct_block = dnxhd_decode_dct_block_8; | ctx->decode_dct_block = dnxhd_decode_dct_block_8; | ||||
| } | } | ||||
| @@ -340,34 +340,34 @@ static int dnxhd_decode_macroblock(DNXHDContext *ctx, AVFrame *frame, | |||||
| dct_y_offset = dct_linesize_luma << 3; | dct_y_offset = dct_linesize_luma << 3; | ||||
| dct_x_offset = 8 << shift1; | dct_x_offset = 8 << shift1; | ||||
| if (!ctx->is_444) { | if (!ctx->is_444) { | ||||
| ctx->dsp.idct_put(dest_y, dct_linesize_luma, ctx->blocks[0]); | |||||
| ctx->dsp.idct_put(dest_y + dct_x_offset, dct_linesize_luma, ctx->blocks[1]); | |||||
| ctx->dsp.idct_put(dest_y + dct_y_offset, dct_linesize_luma, ctx->blocks[4]); | |||||
| ctx->dsp.idct_put(dest_y + dct_y_offset + dct_x_offset, dct_linesize_luma, ctx->blocks[5]); | |||||
| ctx->idsp.idct_put(dest_y, dct_linesize_luma, ctx->blocks[0]); | |||||
| ctx->idsp.idct_put(dest_y + dct_x_offset, dct_linesize_luma, ctx->blocks[1]); | |||||
| ctx->idsp.idct_put(dest_y + dct_y_offset, dct_linesize_luma, ctx->blocks[4]); | |||||
| ctx->idsp.idct_put(dest_y + dct_y_offset + dct_x_offset, dct_linesize_luma, ctx->blocks[5]); | |||||
| if (!(ctx->avctx->flags & CODEC_FLAG_GRAY)) { | if (!(ctx->avctx->flags & CODEC_FLAG_GRAY)) { | ||||
| dct_y_offset = dct_linesize_chroma << 3; | dct_y_offset = dct_linesize_chroma << 3; | ||||
| ctx->dsp.idct_put(dest_u, dct_linesize_chroma, ctx->blocks[2]); | |||||
| ctx->dsp.idct_put(dest_v, dct_linesize_chroma, ctx->blocks[3]); | |||||
| ctx->dsp.idct_put(dest_u + dct_y_offset, dct_linesize_chroma, ctx->blocks[6]); | |||||
| ctx->dsp.idct_put(dest_v + dct_y_offset, dct_linesize_chroma, ctx->blocks[7]); | |||||
| ctx->idsp.idct_put(dest_u, dct_linesize_chroma, ctx->blocks[2]); | |||||
| ctx->idsp.idct_put(dest_v, dct_linesize_chroma, ctx->blocks[3]); | |||||
| ctx->idsp.idct_put(dest_u + dct_y_offset, dct_linesize_chroma, ctx->blocks[6]); | |||||
| ctx->idsp.idct_put(dest_v + dct_y_offset, dct_linesize_chroma, ctx->blocks[7]); | |||||
| } | } | ||||
| } else { | } else { | ||||
| ctx->dsp.idct_put(dest_y, dct_linesize_luma, ctx->blocks[0]); | |||||
| ctx->dsp.idct_put(dest_y + dct_x_offset, dct_linesize_luma, ctx->blocks[1]); | |||||
| ctx->dsp.idct_put(dest_y + dct_y_offset, dct_linesize_luma, ctx->blocks[6]); | |||||
| ctx->dsp.idct_put(dest_y + dct_y_offset + dct_x_offset, dct_linesize_luma, ctx->blocks[7]); | |||||
| ctx->idsp.idct_put(dest_y, dct_linesize_luma, ctx->blocks[0]); | |||||
| ctx->idsp.idct_put(dest_y + dct_x_offset, dct_linesize_luma, ctx->blocks[1]); | |||||
| ctx->idsp.idct_put(dest_y + dct_y_offset, dct_linesize_luma, ctx->blocks[6]); | |||||
| ctx->idsp.idct_put(dest_y + dct_y_offset + dct_x_offset, dct_linesize_luma, ctx->blocks[7]); | |||||
| if (!(ctx->avctx->flags & CODEC_FLAG_GRAY)) { | if (!(ctx->avctx->flags & CODEC_FLAG_GRAY)) { | ||||
| dct_y_offset = dct_linesize_chroma << 3; | dct_y_offset = dct_linesize_chroma << 3; | ||||
| ctx->dsp.idct_put(dest_u, dct_linesize_chroma, ctx->blocks[2]); | |||||
| ctx->dsp.idct_put(dest_u + dct_x_offset, dct_linesize_chroma, ctx->blocks[3]); | |||||
| ctx->dsp.idct_put(dest_u + dct_y_offset, dct_linesize_chroma, ctx->blocks[8]); | |||||
| ctx->dsp.idct_put(dest_u + dct_y_offset + dct_x_offset, dct_linesize_chroma, ctx->blocks[9]); | |||||
| ctx->dsp.idct_put(dest_v, dct_linesize_chroma, ctx->blocks[4]); | |||||
| ctx->dsp.idct_put(dest_v + dct_x_offset, dct_linesize_chroma, ctx->blocks[5]); | |||||
| ctx->dsp.idct_put(dest_v + dct_y_offset, dct_linesize_chroma, ctx->blocks[10]); | |||||
| ctx->dsp.idct_put(dest_v + dct_y_offset + dct_x_offset, dct_linesize_chroma, ctx->blocks[11]); | |||||
| ctx->idsp.idct_put(dest_u, dct_linesize_chroma, ctx->blocks[2]); | |||||
| ctx->idsp.idct_put(dest_u + dct_x_offset, dct_linesize_chroma, ctx->blocks[3]); | |||||
| ctx->idsp.idct_put(dest_u + dct_y_offset, dct_linesize_chroma, ctx->blocks[8]); | |||||
| ctx->idsp.idct_put(dest_u + dct_y_offset + dct_x_offset, dct_linesize_chroma, ctx->blocks[9]); | |||||
| ctx->idsp.idct_put(dest_v, dct_linesize_chroma, ctx->blocks[4]); | |||||
| ctx->idsp.idct_put(dest_v + dct_x_offset, dct_linesize_chroma, ctx->blocks[5]); | |||||
| ctx->idsp.idct_put(dest_v + dct_y_offset, dct_linesize_chroma, ctx->blocks[10]); | |||||
| ctx->idsp.idct_put(dest_v + dct_y_offset + dct_x_offset, dct_linesize_chroma, ctx->blocks[11]); | |||||
| } | } | ||||
| } | } | ||||
| @@ -200,14 +200,14 @@ static av_cold int dnxhd_init_qmat(DNXHDEncContext *ctx, int lbias, int cbias) | |||||
| if (ctx->cid_table->bit_depth == 8) { | if (ctx->cid_table->bit_depth == 8) { | ||||
| for (i = 1; i < 64; i++) { | for (i = 1; i < 64; i++) { | ||||
| int j = ctx->m.dsp.idct_permutation[ff_zigzag_direct[i]]; | |||||
| int j = ctx->m.idsp.idct_permutation[ff_zigzag_direct[i]]; | |||||
| weight_matrix[j] = ctx->cid_table->luma_weight[i]; | weight_matrix[j] = ctx->cid_table->luma_weight[i]; | ||||
| } | } | ||||
| ff_convert_matrix(&ctx->m, ctx->qmatrix_l, ctx->qmatrix_l16, | ff_convert_matrix(&ctx->m, ctx->qmatrix_l, ctx->qmatrix_l16, | ||||
| weight_matrix, ctx->m.intra_quant_bias, 1, | weight_matrix, ctx->m.intra_quant_bias, 1, | ||||
| ctx->m.avctx->qmax, 1); | ctx->m.avctx->qmax, 1); | ||||
| for (i = 1; i < 64; i++) { | for (i = 1; i < 64; i++) { | ||||
| int j = ctx->m.dsp.idct_permutation[ff_zigzag_direct[i]]; | |||||
| int j = ctx->m.idsp.idct_permutation[ff_zigzag_direct[i]]; | |||||
| weight_matrix[j] = ctx->cid_table->chroma_weight[i]; | weight_matrix[j] = ctx->cid_table->chroma_weight[i]; | ||||
| } | } | ||||
| ff_convert_matrix(&ctx->m, ctx->qmatrix_c, ctx->qmatrix_c16, | ff_convert_matrix(&ctx->m, ctx->qmatrix_c, ctx->qmatrix_c16, | ||||
| @@ -228,7 +228,7 @@ static av_cold int dnxhd_init_qmat(DNXHDEncContext *ctx, int lbias, int cbias) | |||||
| // 10-bit | // 10-bit | ||||
| for (qscale = 1; qscale <= ctx->m.avctx->qmax; qscale++) { | for (qscale = 1; qscale <= ctx->m.avctx->qmax; qscale++) { | ||||
| for (i = 1; i < 64; i++) { | for (i = 1; i < 64; i++) { | ||||
| int j = ctx->m.dsp.idct_permutation[ff_zigzag_direct[i]]; | |||||
| int j = ctx->m.idsp.idct_permutation[ff_zigzag_direct[i]]; | |||||
| /* The quantization formula from the VC-3 standard is: | /* The quantization formula from the VC-3 standard is: | ||||
| * quantized = sign(block[i]) * floor(abs(block[i]/s) * p / | * quantized = sign(block[i]) * floor(abs(block[i]/s) * p / | ||||
| @@ -308,6 +308,7 @@ static av_cold int dnxhd_encode_init(AVCodecContext *avctx) | |||||
| ff_blockdsp_init(&ctx->bdsp, avctx); | ff_blockdsp_init(&ctx->bdsp, avctx); | ||||
| ff_dsputil_init(&ctx->m.dsp, avctx); | ff_dsputil_init(&ctx->m.dsp, avctx); | ||||
| ff_idctdsp_init(&ctx->m.idsp, avctx); | |||||
| ff_dct_common_init(&ctx->m); | ff_dct_common_init(&ctx->m); | ||||
| if (!ctx->m.dct_quantize) | if (!ctx->m.dct_quantize) | ||||
| ctx->m.dct_quantize = ff_dct_quantize_c; | ctx->m.dct_quantize = ff_dct_quantize_c; | ||||
| @@ -634,7 +635,7 @@ static int dnxhd_calc_bits_thread(AVCodecContext *avctx, void *arg, | |||||
| if (avctx->mb_decision == FF_MB_DECISION_RD || !RC_VARIANCE) { | if (avctx->mb_decision == FF_MB_DECISION_RD || !RC_VARIANCE) { | ||||
| dnxhd_unquantize_c(ctx, block, i, qscale, last_index); | dnxhd_unquantize_c(ctx, block, i, qscale, last_index); | ||||
| ctx->m.dsp.idct(block); | |||||
| ctx->m.idsp.idct(block); | |||||
| ssd += dnxhd_ssd_block(block, src_block); | ssd += dnxhd_ssd_block(block, src_block); | ||||
| } | } | ||||
| } | } | ||||
| @@ -33,7 +33,6 @@ | |||||
| #include "dsputil.h" | #include "dsputil.h" | ||||
| #include "simple_idct.h" | #include "simple_idct.h" | ||||
| #include "faandct.h" | #include "faandct.h" | ||||
| #include "faanidct.h" | |||||
| #include "imgconvert.h" | #include "imgconvert.h" | ||||
| #include "mathops.h" | #include "mathops.h" | ||||
| #include "mpegvideo.h" | #include "mpegvideo.h" | ||||
| @@ -48,60 +47,6 @@ uint32_t ff_square_tab[512] = { 0, }; | |||||
| #define BIT_DEPTH 8 | #define BIT_DEPTH 8 | ||||
| #include "dsputilenc_template.c" | #include "dsputilenc_template.c" | ||||
| av_cold void ff_init_scantable(uint8_t *permutation, ScanTable *st, | |||||
| const uint8_t *src_scantable) | |||||
| { | |||||
| int i, end; | |||||
| st->scantable = src_scantable; | |||||
| for (i = 0; i < 64; i++) { | |||||
| int j = src_scantable[i]; | |||||
| st->permutated[i] = permutation[j]; | |||||
| } | |||||
| end = -1; | |||||
| for (i = 0; i < 64; i++) { | |||||
| int j = st->permutated[i]; | |||||
| if (j > end) | |||||
| end = j; | |||||
| st->raster_end[i] = end; | |||||
| } | |||||
| } | |||||
| av_cold void ff_init_scantable_permutation(uint8_t *idct_permutation, | |||||
| int idct_permutation_type) | |||||
| { | |||||
| int i; | |||||
| if (ARCH_X86) | |||||
| if (ff_init_scantable_permutation_x86(idct_permutation, | |||||
| idct_permutation_type)) | |||||
| return; | |||||
| switch (idct_permutation_type) { | |||||
| case FF_NO_IDCT_PERM: | |||||
| for (i = 0; i < 64; i++) | |||||
| idct_permutation[i] = i; | |||||
| break; | |||||
| case FF_LIBMPEG2_IDCT_PERM: | |||||
| for (i = 0; i < 64; i++) | |||||
| idct_permutation[i] = (i & 0x38) | ((i & 6) >> 1) | ((i & 1) << 2); | |||||
| break; | |||||
| case FF_TRANSPOSE_IDCT_PERM: | |||||
| for (i = 0; i < 64; i++) | |||||
| idct_permutation[i] = ((i & 7) << 3) | (i >> 3); | |||||
| break; | |||||
| case FF_PARTTRANS_IDCT_PERM: | |||||
| for (i = 0; i < 64; i++) | |||||
| idct_permutation[i] = (i & 0x24) | ((i & 3) << 3) | ((i >> 3) & 3); | |||||
| break; | |||||
| default: | |||||
| av_log(NULL, AV_LOG_ERROR, | |||||
| "Internal error, IDCT permutation not set\n"); | |||||
| } | |||||
| } | |||||
| static int pix_sum_c(uint8_t *pix, int line_size) | static int pix_sum_c(uint8_t *pix, int line_size) | ||||
| { | { | ||||
| int s = 0, i, j; | int s = 0, i, j; | ||||
| @@ -259,68 +204,6 @@ static void diff_pixels_c(int16_t *restrict block, const uint8_t *s1, | |||||
| } | } | ||||
| } | } | ||||
| static void put_pixels_clamped_c(const int16_t *block, uint8_t *restrict pixels, | |||||
| int line_size) | |||||
| { | |||||
| int i; | |||||
| /* read the pixels */ | |||||
| for (i = 0; i < 8; i++) { | |||||
| pixels[0] = av_clip_uint8(block[0]); | |||||
| pixels[1] = av_clip_uint8(block[1]); | |||||
| pixels[2] = av_clip_uint8(block[2]); | |||||
| pixels[3] = av_clip_uint8(block[3]); | |||||
| pixels[4] = av_clip_uint8(block[4]); | |||||
| pixels[5] = av_clip_uint8(block[5]); | |||||
| pixels[6] = av_clip_uint8(block[6]); | |||||
| pixels[7] = av_clip_uint8(block[7]); | |||||
| pixels += line_size; | |||||
| block += 8; | |||||
| } | |||||
| } | |||||
| static void put_signed_pixels_clamped_c(const int16_t *block, | |||||
| uint8_t *restrict pixels, | |||||
| int line_size) | |||||
| { | |||||
| int i, j; | |||||
| for (i = 0; i < 8; i++) { | |||||
| for (j = 0; j < 8; j++) { | |||||
| if (*block < -128) | |||||
| *pixels = 0; | |||||
| else if (*block > 127) | |||||
| *pixels = 255; | |||||
| else | |||||
| *pixels = (uint8_t) (*block + 128); | |||||
| block++; | |||||
| pixels++; | |||||
| } | |||||
| pixels += (line_size - 8); | |||||
| } | |||||
| } | |||||
| static void add_pixels_clamped_c(const int16_t *block, uint8_t *restrict pixels, | |||||
| int line_size) | |||||
| { | |||||
| int i; | |||||
| /* read the pixels */ | |||||
| for (i = 0; i < 8; i++) { | |||||
| pixels[0] = av_clip_uint8(pixels[0] + block[0]); | |||||
| pixels[1] = av_clip_uint8(pixels[1] + block[1]); | |||||
| pixels[2] = av_clip_uint8(pixels[2] + block[2]); | |||||
| pixels[3] = av_clip_uint8(pixels[3] + block[3]); | |||||
| pixels[4] = av_clip_uint8(pixels[4] + block[4]); | |||||
| pixels[5] = av_clip_uint8(pixels[5] + block[5]); | |||||
| pixels[6] = av_clip_uint8(pixels[6] + block[6]); | |||||
| pixels[7] = av_clip_uint8(pixels[7] + block[7]); | |||||
| pixels += line_size; | |||||
| block += 8; | |||||
| } | |||||
| } | |||||
| static int sum_abs_dctelem_c(int16_t *block) | static int sum_abs_dctelem_c(int16_t *block) | ||||
| { | { | ||||
| int sum = 0, i; | int sum = 0, i; | ||||
| @@ -967,7 +850,7 @@ static int rd8x8_c(MpegEncContext *s, uint8_t *src1, uint8_t *src2, | |||||
| s->dct_unquantize_inter(s, temp, 0, s->qscale); | s->dct_unquantize_inter(s, temp, 0, s->qscale); | ||||
| } | } | ||||
| s->dsp.idct_add(lsrc2, 8, temp); | |||||
| s->idsp.idct_add(lsrc2, 8, temp); | |||||
| distortion = s->dsp.sse[1](NULL, lsrc2, lsrc1, 8, 8); | distortion = s->dsp.sse[1](NULL, lsrc2, lsrc1, 8, 8); | ||||
| @@ -1138,18 +1021,6 @@ WRAPPER8_16_SQ(quant_psnr8x8_c, quant_psnr16_c) | |||||
| WRAPPER8_16_SQ(rd8x8_c, rd16_c) | WRAPPER8_16_SQ(rd8x8_c, rd16_c) | ||||
| WRAPPER8_16_SQ(bit8x8_c, bit16_c) | WRAPPER8_16_SQ(bit8x8_c, bit16_c) | ||||
| static void jref_idct_put(uint8_t *dest, int line_size, int16_t *block) | |||||
| { | |||||
| ff_j_rev_dct(block); | |||||
| put_pixels_clamped_c(block, dest, line_size); | |||||
| } | |||||
| static void jref_idct_add(uint8_t *dest, int line_size, int16_t *block) | |||||
| { | |||||
| ff_j_rev_dct(block); | |||||
| add_pixels_clamped_c(block, dest, line_size); | |||||
| } | |||||
| /* draw the edges of width 'w' of an image of size width, height */ | /* draw the edges of width 'w' of an image of size width, height */ | ||||
| // FIXME: Check that this is OK for MPEG-4 interlaced. | // FIXME: Check that this is OK for MPEG-4 interlaced. | ||||
| static void draw_edges_8_c(uint8_t *buf, int wrap, int width, int height, | static void draw_edges_8_c(uint8_t *buf, int wrap, int width, int height, | ||||
| @@ -1209,36 +1080,8 @@ av_cold void ff_dsputil_init(DSPContext *c, AVCodecContext *avctx) | |||||
| } | } | ||||
| #endif /* CONFIG_ENCODERS */ | #endif /* CONFIG_ENCODERS */ | ||||
| if (avctx->bits_per_raw_sample == 10) { | |||||
| c->idct_put = ff_simple_idct_put_10; | |||||
| c->idct_add = ff_simple_idct_add_10; | |||||
| c->idct = ff_simple_idct_10; | |||||
| c->idct_permutation_type = FF_NO_IDCT_PERM; | |||||
| } else { | |||||
| if (avctx->idct_algo == FF_IDCT_INT) { | |||||
| c->idct_put = jref_idct_put; | |||||
| c->idct_add = jref_idct_add; | |||||
| c->idct = ff_j_rev_dct; | |||||
| c->idct_permutation_type = FF_LIBMPEG2_IDCT_PERM; | |||||
| } else if (avctx->idct_algo == FF_IDCT_FAAN) { | |||||
| c->idct_put = ff_faanidct_put; | |||||
| c->idct_add = ff_faanidct_add; | |||||
| c->idct = ff_faanidct; | |||||
| c->idct_permutation_type = FF_NO_IDCT_PERM; | |||||
| } else { // accurate/default | |||||
| c->idct_put = ff_simple_idct_put_8; | |||||
| c->idct_add = ff_simple_idct_add_8; | |||||
| c->idct = ff_simple_idct_8; | |||||
| c->idct_permutation_type = FF_NO_IDCT_PERM; | |||||
| } | |||||
| } | |||||
| c->diff_pixels = diff_pixels_c; | c->diff_pixels = diff_pixels_c; | ||||
| c->put_pixels_clamped = put_pixels_clamped_c; | |||||
| c->put_signed_pixels_clamped = put_signed_pixels_clamped_c; | |||||
| c->add_pixels_clamped = add_pixels_clamped_c; | |||||
| c->sum_abs_dctelem = sum_abs_dctelem_c; | c->sum_abs_dctelem = sum_abs_dctelem_c; | ||||
| c->pix_sum = pix_sum_c; | c->pix_sum = pix_sum_c; | ||||
| @@ -1309,7 +1152,4 @@ av_cold void ff_dsputil_init(DSPContext *c, AVCodecContext *avctx) | |||||
| ff_dsputil_init_ppc(c, avctx, high_bit_depth); | ff_dsputil_init_ppc(c, avctx, high_bit_depth); | ||||
| if (ARCH_X86) | if (ARCH_X86) | ||||
| ff_dsputil_init_x86(c, avctx, high_bit_depth); | ff_dsputil_init_x86(c, avctx, high_bit_depth); | ||||
| ff_init_scantable_permutation(c->idct_permutation, | |||||
| c->idct_permutation_type); | |||||
| } | } | ||||
| @@ -44,22 +44,6 @@ typedef int (*me_cmp_func)(struct MpegEncContext *c, | |||||
| uint8_t *blk1 /* align width (8 or 16) */, | uint8_t *blk1 /* align width (8 or 16) */, | ||||
| uint8_t *blk2 /* align 1 */, int line_size, int h); | uint8_t *blk2 /* align 1 */, int line_size, int h); | ||||
| /** | |||||
| * Scantable. | |||||
| */ | |||||
| typedef struct ScanTable { | |||||
| const uint8_t *scantable; | |||||
| uint8_t permutated[64]; | |||||
| uint8_t raster_end[64]; | |||||
| } ScanTable; | |||||
| void ff_init_scantable(uint8_t *permutation, ScanTable *st, | |||||
| const uint8_t *src_scantable); | |||||
| void ff_init_scantable_permutation(uint8_t *idct_permutation, | |||||
| int idct_permutation_type); | |||||
| int ff_init_scantable_permutation_x86(uint8_t *idct_permutation, | |||||
| int idct_permutation_type); | |||||
| /** | /** | ||||
| * DSPContext. | * DSPContext. | ||||
| */ | */ | ||||
| @@ -72,15 +56,6 @@ typedef struct DSPContext { | |||||
| const uint8_t *s1 /* align 8 */, | const uint8_t *s1 /* align 8 */, | ||||
| const uint8_t *s2 /* align 8 */, | const uint8_t *s2 /* align 8 */, | ||||
| int stride); | int stride); | ||||
| void (*put_pixels_clamped)(const int16_t *block /* align 16 */, | |||||
| uint8_t *pixels /* align 8 */, | |||||
| int line_size); | |||||
| void (*put_signed_pixels_clamped)(const int16_t *block /* align 16 */, | |||||
| uint8_t *pixels /* align 8 */, | |||||
| int line_size); | |||||
| void (*add_pixels_clamped)(const int16_t *block /* align 16 */, | |||||
| uint8_t *pixels /* align 8 */, | |||||
| int line_size); | |||||
| int (*sum_abs_dctelem)(int16_t *block /* align 16 */); | int (*sum_abs_dctelem)(int16_t *block /* align 16 */); | ||||
| int (*pix_sum)(uint8_t *pix, int line_size); | int (*pix_sum)(uint8_t *pix, int line_size); | ||||
| @@ -112,47 +87,6 @@ typedef struct DSPContext { | |||||
| void (*fdct)(int16_t *block /* align 16 */); | void (*fdct)(int16_t *block /* align 16 */); | ||||
| void (*fdct248)(int16_t *block /* align 16 */); | void (*fdct248)(int16_t *block /* align 16 */); | ||||
| /* IDCT really */ | |||||
| void (*idct)(int16_t *block /* align 16 */); | |||||
| /** | |||||
| * block -> idct -> clip to unsigned 8 bit -> dest. | |||||
| * (-1392, 0, 0, ...) -> idct -> (-174, -174, ...) -> put -> (0, 0, ...) | |||||
| * @param line_size size in bytes of a horizontal line of dest | |||||
| */ | |||||
| void (*idct_put)(uint8_t *dest /* align 8 */, | |||||
| int line_size, int16_t *block /* align 16 */); | |||||
| /** | |||||
| * block -> idct -> add dest -> clip to unsigned 8 bit -> dest. | |||||
| * @param line_size size in bytes of a horizontal line of dest | |||||
| */ | |||||
| void (*idct_add)(uint8_t *dest /* align 8 */, | |||||
| int line_size, int16_t *block /* align 16 */); | |||||
| /** | |||||
| * IDCT input permutation. | |||||
| * Several optimized IDCTs need a permutated input (relative to the | |||||
| * normal order of the reference IDCT). | |||||
| * This permutation must be performed before the idct_put/add. | |||||
| * Note, normally this can be merged with the zigzag/alternate scan<br> | |||||
| * An example to avoid confusion: | |||||
| * - (->decode coeffs -> zigzag reorder -> dequant -> reference IDCT -> ...) | |||||
| * - (x -> reference DCT -> reference IDCT -> x) | |||||
| * - (x -> reference DCT -> simple_mmx_perm = idct_permutation | |||||
| * -> simple_idct_mmx -> x) | |||||
| * - (-> decode coeffs -> zigzag reorder -> simple_mmx_perm -> dequant | |||||
| * -> simple_idct_mmx -> ...) | |||||
| */ | |||||
| uint8_t idct_permutation[64]; | |||||
| int idct_permutation_type; | |||||
| #define FF_NO_IDCT_PERM 1 | |||||
| #define FF_LIBMPEG2_IDCT_PERM 2 | |||||
| #define FF_SIMPLE_IDCT_PERM 3 | |||||
| #define FF_TRANSPOSE_IDCT_PERM 4 | |||||
| #define FF_PARTTRANS_IDCT_PERM 5 | |||||
| #define FF_SSE2_IDCT_PERM 6 | |||||
| int (*try_8x8basis)(int16_t rem[64], int16_t weight[64], | int (*try_8x8basis)(int16_t rem[64], int16_t weight[64], | ||||
| int16_t basis[64], int scale); | int16_t basis[64], int scale); | ||||
| void (*add_8x8basis)(int16_t rem[64], int16_t basis[64], int scale); | void (*add_8x8basis)(int16_t rem[64], int16_t basis[64], int scale); | ||||
| @@ -39,6 +39,7 @@ | |||||
| #include "libavutil/imgutils.h" | #include "libavutil/imgutils.h" | ||||
| #include "libavutil/pixdesc.h" | #include "libavutil/pixdesc.h" | ||||
| #include "avcodec.h" | #include "avcodec.h" | ||||
| #include "idctdsp.h" | |||||
| #include "internal.h" | #include "internal.h" | ||||
| #include "get_bits.h" | #include "get_bits.h" | ||||
| #include "put_bits.h" | #include "put_bits.h" | ||||
| @@ -61,17 +62,17 @@ static const int dv_iweight_bits = 14; | |||||
| static av_cold int dvvideo_decode_init(AVCodecContext *avctx) | static av_cold int dvvideo_decode_init(AVCodecContext *avctx) | ||||
| { | { | ||||
| DVVideoContext *s = avctx->priv_data; | DVVideoContext *s = avctx->priv_data; | ||||
| DSPContext dsp; | |||||
| IDCTDSPContext idsp; | |||||
| int i; | int i; | ||||
| ff_dsputil_init(&dsp, avctx); | |||||
| ff_idctdsp_init(&idsp, avctx); | |||||
| for (i = 0; i < 64; i++) | for (i = 0; i < 64; i++) | ||||
| s->dv_zigzag[0][i] = dsp.idct_permutation[ff_zigzag_direct[i]]; | |||||
| s->dv_zigzag[0][i] = idsp.idct_permutation[ff_zigzag_direct[i]]; | |||||
| memcpy(s->dv_zigzag[1], ff_dv_zigzag248_direct, sizeof(s->dv_zigzag[1])); | memcpy(s->dv_zigzag[1], ff_dv_zigzag248_direct, sizeof(s->dv_zigzag[1])); | ||||
| s->idct_put[0] = dsp.idct_put; | |||||
| s->idct_put[0] = idsp.idct_put; | |||||
| s->idct_put[1] = ff_simple_idct248_put; | s->idct_put[1] = ff_simple_idct248_put; | ||||
| return ff_dvvideo_init(avctx); | return ff_dvvideo_init(avctx); | ||||
| @@ -110,7 +110,7 @@ static void fill_quantization_matrices(AVCodecContext *avctx, | |||||
| for (i = 0; i < 4; i++) | for (i = 0; i < 4; i++) | ||||
| qm->bNewQmatrix[i] = 1; | qm->bNewQmatrix[i] = 1; | ||||
| for (i = 0; i < 64; i++) { | for (i = 0; i < 64; i++) { | ||||
| int n = s->dsp.idct_permutation[ff_zigzag_direct[i]]; | |||||
| int n = s->idsp.idct_permutation[ff_zigzag_direct[i]]; | |||||
| qm->Qmatrix[0][i] = s->intra_matrix[n];; | qm->Qmatrix[0][i] = s->intra_matrix[n];; | ||||
| qm->Qmatrix[1][i] = s->inter_matrix[n];; | qm->Qmatrix[1][i] = s->inter_matrix[n];; | ||||
| qm->Qmatrix[2][i] = s->chroma_intra_matrix[n];; | qm->Qmatrix[2][i] = s->chroma_intra_matrix[n];; | ||||
| @@ -33,6 +33,7 @@ | |||||
| #include "get_bits.h" | #include "get_bits.h" | ||||
| #include "aandcttab.h" | #include "aandcttab.h" | ||||
| #include "eaidct.h" | #include "eaidct.h" | ||||
| #include "idctdsp.h" | |||||
| #include "internal.h" | #include "internal.h" | ||||
| #include "mpeg12.h" | #include "mpeg12.h" | ||||
| #include "mpeg12data.h" | #include "mpeg12data.h" | ||||
| @@ -47,7 +48,7 @@ typedef struct MadContext { | |||||
| AVCodecContext *avctx; | AVCodecContext *avctx; | ||||
| BlockDSPContext bdsp; | BlockDSPContext bdsp; | ||||
| BswapDSPContext bbdsp; | BswapDSPContext bbdsp; | ||||
| DSPContext dsp; | |||||
| IDCTDSPContext idsp; | |||||
| AVFrame *last_frame; | AVFrame *last_frame; | ||||
| GetBitContext gb; | GetBitContext gb; | ||||
| void *bitstream_buf; | void *bitstream_buf; | ||||
| @@ -66,9 +67,9 @@ static av_cold int decode_init(AVCodecContext *avctx) | |||||
| avctx->pix_fmt = AV_PIX_FMT_YUV420P; | avctx->pix_fmt = AV_PIX_FMT_YUV420P; | ||||
| ff_blockdsp_init(&s->bdsp, avctx); | ff_blockdsp_init(&s->bdsp, avctx); | ||||
| ff_bswapdsp_init(&s->bbdsp); | ff_bswapdsp_init(&s->bbdsp); | ||||
| ff_dsputil_init(&s->dsp, avctx); | |||||
| ff_init_scantable_permutation(s->dsp.idct_permutation, FF_NO_IDCT_PERM); | |||||
| ff_init_scantable(s->dsp.idct_permutation, &s->scantable, ff_zigzag_direct); | |||||
| ff_idctdsp_init(&s->idsp, avctx); | |||||
| ff_init_scantable_permutation(s->idsp.idct_permutation, FF_NO_IDCT_PERM); | |||||
| ff_init_scantable(s->idsp.idct_permutation, &s->scantable, ff_zigzag_direct); | |||||
| ff_mpeg12_init_vlcs(); | ff_mpeg12_init_vlcs(); | ||||
| s->last_frame = av_frame_alloc(); | s->last_frame = av_frame_alloc(); | ||||
| @@ -32,7 +32,7 @@ | |||||
| #define BITSTREAM_READER_LE | #define BITSTREAM_READER_LE | ||||
| #include "get_bits.h" | #include "get_bits.h" | ||||
| #include "bytestream.h" | #include "bytestream.h" | ||||
| #include "dsputil.h" | |||||
| #include "idctdsp.h" | |||||
| #include "aandcttab.h" | #include "aandcttab.h" | ||||
| #include "eaidct.h" | #include "eaidct.h" | ||||
| #include "internal.h" | #include "internal.h" | ||||
| @@ -32,6 +32,7 @@ | |||||
| #include "get_bits.h" | #include "get_bits.h" | ||||
| #include "aandcttab.h" | #include "aandcttab.h" | ||||
| #include "eaidct.h" | #include "eaidct.h" | ||||
| #include "idctdsp.h" | |||||
| #include "internal.h" | #include "internal.h" | ||||
| #include "mpeg12.h" | #include "mpeg12.h" | ||||
| #include "mpegvideo.h" | #include "mpegvideo.h" | ||||
| @@ -51,9 +52,9 @@ static av_cold int tqi_decode_init(AVCodecContext *avctx) | |||||
| s->avctx = avctx; | s->avctx = avctx; | ||||
| ff_blockdsp_init(&s->bdsp, avctx); | ff_blockdsp_init(&s->bdsp, avctx); | ||||
| ff_bswapdsp_init(&t->bsdsp); | ff_bswapdsp_init(&t->bsdsp); | ||||
| ff_dsputil_init(&s->dsp, avctx); | |||||
| ff_init_scantable_permutation(s->dsp.idct_permutation, FF_NO_IDCT_PERM); | |||||
| ff_init_scantable(s->dsp.idct_permutation, &s->intra_scantable, ff_zigzag_direct); | |||||
| ff_idctdsp_init(&s->idsp, avctx); | |||||
| ff_init_scantable_permutation(s->idsp.idct_permutation, FF_NO_IDCT_PERM); | |||||
| ff_init_scantable(s->idsp.idct_permutation, &s->intra_scantable, ff_zigzag_direct); | |||||
| s->qscale = 1; | s->qscale = 1; | ||||
| avctx->time_base = (AVRational){1, 15}; | avctx->time_base = (AVRational){1, 15}; | ||||
| avctx->pix_fmt = AV_PIX_FMT_YUV420P; | avctx->pix_fmt = AV_PIX_FMT_YUV420P; | ||||
| @@ -31,7 +31,7 @@ | |||||
| #include "avcodec.h" | #include "avcodec.h" | ||||
| #include "blockdsp.h" | #include "blockdsp.h" | ||||
| #include "bytestream.h" | #include "bytestream.h" | ||||
| #include "dsputil.h" | |||||
| #include "idctdsp.h" | |||||
| #include "get_bits.h" | #include "get_bits.h" | ||||
| #include "internal.h" | #include "internal.h" | ||||
| #include "mjpeg.h" | #include "mjpeg.h" | ||||
| @@ -74,7 +74,7 @@ static const uint8_t chroma_quant[64] = { | |||||
| typedef struct JPGContext { | typedef struct JPGContext { | ||||
| BlockDSPContext bdsp; | BlockDSPContext bdsp; | ||||
| DSPContext dsp; | |||||
| IDCTDSPContext idsp; | |||||
| ScanTable scantable; | ScanTable scantable; | ||||
| VLC dc_vlc[2], ac_vlc[2]; | VLC dc_vlc[2], ac_vlc[2]; | ||||
| @@ -153,8 +153,8 @@ static av_cold int jpg_init(AVCodecContext *avctx, JPGContext *c) | |||||
| return ret; | return ret; | ||||
| ff_blockdsp_init(&c->bdsp, avctx); | ff_blockdsp_init(&c->bdsp, avctx); | ||||
| ff_dsputil_init(&c->dsp, avctx); | |||||
| ff_init_scantable(c->dsp.idct_permutation, &c->scantable, | |||||
| ff_idctdsp_init(&c->idsp, avctx); | |||||
| ff_init_scantable(c->idsp.idct_permutation, &c->scantable, | |||||
| ff_zigzag_direct); | ff_zigzag_direct); | ||||
| return 0; | return 0; | ||||
| @@ -279,13 +279,13 @@ static int jpg_decode_data(JPGContext *c, int width, int height, | |||||
| if ((ret = jpg_decode_block(c, &gb, 0, | if ((ret = jpg_decode_block(c, &gb, 0, | ||||
| c->block[i + j * 2])) != 0) | c->block[i + j * 2])) != 0) | ||||
| return ret; | return ret; | ||||
| c->dsp.idct(c->block[i + j * 2]); | |||||
| c->idsp.idct(c->block[i + j * 2]); | |||||
| } | } | ||||
| } | } | ||||
| for (i = 1; i < 3; i++) { | for (i = 1; i < 3; i++) { | ||||
| if ((ret = jpg_decode_block(c, &gb, i, c->block[i + 3])) != 0) | if ((ret = jpg_decode_block(c, &gb, i, c->block[i + 3])) != 0) | ||||
| return ret; | return ret; | ||||
| c->dsp.idct(c->block[i + 3]); | |||||
| c->idsp.idct(c->block[i + 3]); | |||||
| } | } | ||||
| for (j = 0; j < 16; j++) { | for (j = 0; j < 16; j++) { | ||||
| @@ -267,7 +267,7 @@ void ff_h263_pred_acdc(MpegEncContext * s, int16_t *block, int n) | |||||
| if (a != 1024) { | if (a != 1024) { | ||||
| ac_val -= 16; | ac_val -= 16; | ||||
| for(i=1;i<8;i++) { | for(i=1;i<8;i++) { | ||||
| block[s->dsp.idct_permutation[i<<3]] += ac_val[i]; | |||||
| block[s->idsp.idct_permutation[i << 3]] += ac_val[i]; | |||||
| } | } | ||||
| pred_dc = a; | pred_dc = a; | ||||
| } | } | ||||
| @@ -276,7 +276,7 @@ void ff_h263_pred_acdc(MpegEncContext * s, int16_t *block, int n) | |||||
| if (c != 1024) { | if (c != 1024) { | ||||
| ac_val -= 16 * wrap; | ac_val -= 16 * wrap; | ||||
| for(i=1;i<8;i++) { | for(i=1;i<8;i++) { | ||||
| block[s->dsp.idct_permutation[i ]] += ac_val[i + 8]; | |||||
| block[s->idsp.idct_permutation[i]] += ac_val[i + 8]; | |||||
| } | } | ||||
| pred_dc = c; | pred_dc = c; | ||||
| } | } | ||||
| @@ -304,10 +304,10 @@ void ff_h263_pred_acdc(MpegEncContext * s, int16_t *block, int n) | |||||
| /* left copy */ | /* left copy */ | ||||
| for(i=1;i<8;i++) | for(i=1;i<8;i++) | ||||
| ac_val1[i ] = block[s->dsp.idct_permutation[i<<3]]; | |||||
| ac_val1[i] = block[s->idsp.idct_permutation[i << 3]]; | |||||
| /* top copy */ | /* top copy */ | ||||
| for(i=1;i<8;i++) | for(i=1;i<8;i++) | ||||
| ac_val1[8 + i] = block[s->dsp.idct_permutation[i ]]; | |||||
| ac_val1[8 + i] = block[s->idsp.idct_permutation[i]]; | |||||
| } | } | ||||
| int16_t *ff_h263_pred_motion(MpegEncContext * s, int block, int dir, | int16_t *ff_h263_pred_motion(MpegEncContext * s, int block, int dir, | ||||
| @@ -0,0 +1,197 @@ | |||||
| /* | |||||
| * This file is part of Libav. | |||||
| * | |||||
| * Libav is free software; you can redistribute it and/or | |||||
| * modify it under the terms of the GNU Lesser General Public | |||||
| * License as published by the Free Software Foundation; either | |||||
| * version 2.1 of the License, or (at your option) any later version. | |||||
| * | |||||
| * Libav is distributed in the hope that it will be useful, | |||||
| * but WITHOUT ANY WARRANTY; without even the implied warranty of | |||||
| * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU | |||||
| * Lesser General Public License for more details. | |||||
| * | |||||
| * You should have received a copy of the GNU Lesser General Public | |||||
| * License along with Libav; if not, write to the Free Software | |||||
| * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA | |||||
| */ | |||||
| #include "config.h" | |||||
| #include "libavutil/attributes.h" | |||||
| #include "libavutil/common.h" | |||||
| #include "avcodec.h" | |||||
| #include "dct.h" | |||||
| #include "faanidct.h" | |||||
| #include "idctdsp.h" | |||||
| #include "simple_idct.h" | |||||
| av_cold void ff_init_scantable(uint8_t *permutation, ScanTable *st, | |||||
| const uint8_t *src_scantable) | |||||
| { | |||||
| int i, end; | |||||
| st->scantable = src_scantable; | |||||
| for (i = 0; i < 64; i++) { | |||||
| int j = src_scantable[i]; | |||||
| st->permutated[i] = permutation[j]; | |||||
| } | |||||
| end = -1; | |||||
| for (i = 0; i < 64; i++) { | |||||
| int j = st->permutated[i]; | |||||
| if (j > end) | |||||
| end = j; | |||||
| st->raster_end[i] = end; | |||||
| } | |||||
| } | |||||
| av_cold void ff_init_scantable_permutation(uint8_t *idct_permutation, | |||||
| int idct_permutation_type) | |||||
| { | |||||
| int i; | |||||
| if (ARCH_X86) | |||||
| if (ff_init_scantable_permutation_x86(idct_permutation, | |||||
| idct_permutation_type)) | |||||
| return; | |||||
| switch (idct_permutation_type) { | |||||
| case FF_NO_IDCT_PERM: | |||||
| for (i = 0; i < 64; i++) | |||||
| idct_permutation[i] = i; | |||||
| break; | |||||
| case FF_LIBMPEG2_IDCT_PERM: | |||||
| for (i = 0; i < 64; i++) | |||||
| idct_permutation[i] = (i & 0x38) | ((i & 6) >> 1) | ((i & 1) << 2); | |||||
| break; | |||||
| case FF_TRANSPOSE_IDCT_PERM: | |||||
| for (i = 0; i < 64; i++) | |||||
| idct_permutation[i] = ((i & 7) << 3) | (i >> 3); | |||||
| break; | |||||
| case FF_PARTTRANS_IDCT_PERM: | |||||
| for (i = 0; i < 64; i++) | |||||
| idct_permutation[i] = (i & 0x24) | ((i & 3) << 3) | ((i >> 3) & 3); | |||||
| break; | |||||
| default: | |||||
| av_log(NULL, AV_LOG_ERROR, | |||||
| "Internal error, IDCT permutation not set\n"); | |||||
| } | |||||
| } | |||||
| static void put_pixels_clamped_c(const int16_t *block, uint8_t *restrict pixels, | |||||
| int line_size) | |||||
| { | |||||
| int i; | |||||
| /* read the pixels */ | |||||
| for (i = 0; i < 8; i++) { | |||||
| pixels[0] = av_clip_uint8(block[0]); | |||||
| pixels[1] = av_clip_uint8(block[1]); | |||||
| pixels[2] = av_clip_uint8(block[2]); | |||||
| pixels[3] = av_clip_uint8(block[3]); | |||||
| pixels[4] = av_clip_uint8(block[4]); | |||||
| pixels[5] = av_clip_uint8(block[5]); | |||||
| pixels[6] = av_clip_uint8(block[6]); | |||||
| pixels[7] = av_clip_uint8(block[7]); | |||||
| pixels += line_size; | |||||
| block += 8; | |||||
| } | |||||
| } | |||||
| static void put_signed_pixels_clamped_c(const int16_t *block, | |||||
| uint8_t *restrict pixels, | |||||
| int line_size) | |||||
| { | |||||
| int i, j; | |||||
| for (i = 0; i < 8; i++) { | |||||
| for (j = 0; j < 8; j++) { | |||||
| if (*block < -128) | |||||
| *pixels = 0; | |||||
| else if (*block > 127) | |||||
| *pixels = 255; | |||||
| else | |||||
| *pixels = (uint8_t) (*block + 128); | |||||
| block++; | |||||
| pixels++; | |||||
| } | |||||
| pixels += (line_size - 8); | |||||
| } | |||||
| } | |||||
| static void add_pixels_clamped_c(const int16_t *block, uint8_t *restrict pixels, | |||||
| int line_size) | |||||
| { | |||||
| int i; | |||||
| /* read the pixels */ | |||||
| for (i = 0; i < 8; i++) { | |||||
| pixels[0] = av_clip_uint8(pixels[0] + block[0]); | |||||
| pixels[1] = av_clip_uint8(pixels[1] + block[1]); | |||||
| pixels[2] = av_clip_uint8(pixels[2] + block[2]); | |||||
| pixels[3] = av_clip_uint8(pixels[3] + block[3]); | |||||
| pixels[4] = av_clip_uint8(pixels[4] + block[4]); | |||||
| pixels[5] = av_clip_uint8(pixels[5] + block[5]); | |||||
| pixels[6] = av_clip_uint8(pixels[6] + block[6]); | |||||
| pixels[7] = av_clip_uint8(pixels[7] + block[7]); | |||||
| pixels += line_size; | |||||
| block += 8; | |||||
| } | |||||
| } | |||||
| static void jref_idct_put(uint8_t *dest, int line_size, int16_t *block) | |||||
| { | |||||
| ff_j_rev_dct(block); | |||||
| put_pixels_clamped_c(block, dest, line_size); | |||||
| } | |||||
| static void jref_idct_add(uint8_t *dest, int line_size, int16_t *block) | |||||
| { | |||||
| ff_j_rev_dct(block); | |||||
| add_pixels_clamped_c(block, dest, line_size); | |||||
| } | |||||
| av_cold void ff_idctdsp_init(IDCTDSPContext *c, AVCodecContext *avctx) | |||||
| { | |||||
| const unsigned high_bit_depth = avctx->bits_per_raw_sample > 8; | |||||
| if (avctx->bits_per_raw_sample == 10) { | |||||
| c->idct_put = ff_simple_idct_put_10; | |||||
| c->idct_add = ff_simple_idct_add_10; | |||||
| c->idct = ff_simple_idct_10; | |||||
| c->idct_permutation_type = FF_NO_IDCT_PERM; | |||||
| } else { | |||||
| if (avctx->idct_algo == FF_IDCT_INT) { | |||||
| c->idct_put = jref_idct_put; | |||||
| c->idct_add = jref_idct_add; | |||||
| c->idct = ff_j_rev_dct; | |||||
| c->idct_permutation_type = FF_LIBMPEG2_IDCT_PERM; | |||||
| } else if (avctx->idct_algo == FF_IDCT_FAAN) { | |||||
| c->idct_put = ff_faanidct_put; | |||||
| c->idct_add = ff_faanidct_add; | |||||
| c->idct = ff_faanidct; | |||||
| c->idct_permutation_type = FF_NO_IDCT_PERM; | |||||
| } else { // accurate/default | |||||
| c->idct_put = ff_simple_idct_put_8; | |||||
| c->idct_add = ff_simple_idct_add_8; | |||||
| c->idct = ff_simple_idct_8; | |||||
| c->idct_permutation_type = FF_NO_IDCT_PERM; | |||||
| } | |||||
| } | |||||
| c->put_pixels_clamped = put_pixels_clamped_c; | |||||
| c->put_signed_pixels_clamped = put_signed_pixels_clamped_c; | |||||
| c->add_pixels_clamped = add_pixels_clamped_c; | |||||
| if (ARCH_ARM) | |||||
| ff_idctdsp_init_arm(c, avctx, high_bit_depth); | |||||
| if (ARCH_PPC) | |||||
| ff_idctdsp_init_ppc(c, avctx, high_bit_depth); | |||||
| if (ARCH_X86) | |||||
| ff_idctdsp_init_x86(c, avctx, high_bit_depth); | |||||
| ff_init_scantable_permutation(c->idct_permutation, | |||||
| c->idct_permutation_type); | |||||
| } | |||||
| @@ -0,0 +1,104 @@ | |||||
| /* | |||||
| * This file is part of Libav. | |||||
| * | |||||
| * Libav is free software; you can redistribute it and/or | |||||
| * modify it under the terms of the GNU Lesser General Public | |||||
| * License as published by the Free Software Foundation; either | |||||
| * version 2.1 of the License, or (at your option) any later version. | |||||
| * | |||||
| * Libav is distributed in the hope that it will be useful, | |||||
| * but WITHOUT ANY WARRANTY; without even the implied warranty of | |||||
| * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU | |||||
| * Lesser General Public License for more details. | |||||
| * | |||||
| * You should have received a copy of the GNU Lesser General Public | |||||
| * License along with Libav; if not, write to the Free Software | |||||
| * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA | |||||
| */ | |||||
| #ifndef AVCODEC_IDCTDSP_H | |||||
| #define AVCODEC_IDCTDSP_H | |||||
| #include <stdint.h> | |||||
| #include "avcodec.h" | |||||
| /** | |||||
| * Scantable. | |||||
| */ | |||||
| typedef struct ScanTable { | |||||
| const uint8_t *scantable; | |||||
| uint8_t permutated[64]; | |||||
| uint8_t raster_end[64]; | |||||
| } ScanTable; | |||||
| void ff_init_scantable(uint8_t *permutation, ScanTable *st, | |||||
| const uint8_t *src_scantable); | |||||
| void ff_init_scantable_permutation(uint8_t *idct_permutation, | |||||
| int idct_permutation_type); | |||||
| int ff_init_scantable_permutation_x86(uint8_t *idct_permutation, | |||||
| int idct_permutation_type); | |||||
| typedef struct IDCTDSPContext { | |||||
| /* pixel ops : interface with DCT */ | |||||
| void (*put_pixels_clamped)(const int16_t *block /* align 16 */, | |||||
| uint8_t *pixels /* align 8 */, | |||||
| int line_size); | |||||
| void (*put_signed_pixels_clamped)(const int16_t *block /* align 16 */, | |||||
| uint8_t *pixels /* align 8 */, | |||||
| int line_size); | |||||
| void (*add_pixels_clamped)(const int16_t *block /* align 16 */, | |||||
| uint8_t *pixels /* align 8 */, | |||||
| int line_size); | |||||
| void (*idct)(int16_t *block /* align 16 */); | |||||
| /** | |||||
| * block -> idct -> clip to unsigned 8 bit -> dest. | |||||
| * (-1392, 0, 0, ...) -> idct -> (-174, -174, ...) -> put -> (0, 0, ...) | |||||
| * @param line_size size in bytes of a horizontal line of dest | |||||
| */ | |||||
| void (*idct_put)(uint8_t *dest /* align 8 */, | |||||
| int line_size, int16_t *block /* align 16 */); | |||||
| /** | |||||
| * block -> idct -> add dest -> clip to unsigned 8 bit -> dest. | |||||
| * @param line_size size in bytes of a horizontal line of dest | |||||
| */ | |||||
| void (*idct_add)(uint8_t *dest /* align 8 */, | |||||
| int line_size, int16_t *block /* align 16 */); | |||||
| /** | |||||
| * IDCT input permutation. | |||||
| * Several optimized IDCTs need a permutated input (relative to the | |||||
| * normal order of the reference IDCT). | |||||
| * This permutation must be performed before the idct_put/add. | |||||
| * Note, normally this can be merged with the zigzag/alternate scan<br> | |||||
| * An example to avoid confusion: | |||||
| * - (->decode coeffs -> zigzag reorder -> dequant -> reference IDCT -> ...) | |||||
| * - (x -> reference DCT -> reference IDCT -> x) | |||||
| * - (x -> reference DCT -> simple_mmx_perm = idct_permutation | |||||
| * -> simple_idct_mmx -> x) | |||||
| * - (-> decode coeffs -> zigzag reorder -> simple_mmx_perm -> dequant | |||||
| * -> simple_idct_mmx -> ...) | |||||
| */ | |||||
| uint8_t idct_permutation[64]; | |||||
| int idct_permutation_type; | |||||
| #define FF_NO_IDCT_PERM 1 | |||||
| #define FF_LIBMPEG2_IDCT_PERM 2 | |||||
| #define FF_SIMPLE_IDCT_PERM 3 | |||||
| #define FF_TRANSPOSE_IDCT_PERM 4 | |||||
| #define FF_PARTTRANS_IDCT_PERM 5 | |||||
| #define FF_SSE2_IDCT_PERM 6 | |||||
| } IDCTDSPContext; | |||||
| void ff_idctdsp_init(IDCTDSPContext *c, AVCodecContext *avctx); | |||||
| void ff_idctdsp_init_arm(IDCTDSPContext *c, AVCodecContext *avctx, | |||||
| unsigned high_bit_depth); | |||||
| void ff_idctdsp_init_ppc(IDCTDSPContext *c, AVCodecContext *avctx, | |||||
| unsigned high_bit_depth); | |||||
| void ff_idctdsp_init_x86(IDCTDSPContext *c, AVCodecContext *avctx, | |||||
| unsigned high_bit_depth); | |||||
| #endif /* AVCODEC_IDCTDSP_H */ | |||||
| @@ -24,6 +24,7 @@ | |||||
| #include "avcodec.h" | #include "avcodec.h" | ||||
| #include "error_resilience.h" | #include "error_resilience.h" | ||||
| #include "get_bits.h" | #include "get_bits.h" | ||||
| #include "idctdsp.h" | |||||
| #include "mpegvideo.h" | #include "mpegvideo.h" | ||||
| #include "msmpeg4data.h" | #include "msmpeg4data.h" | ||||
| #include "intrax8huf.h" | #include "intrax8huf.h" | ||||
| @@ -440,7 +441,7 @@ lut2[q>12][c]={ | |||||
| static void x8_ac_compensation(IntraX8Context * const w, int const direction, int const dc_level){ | static void x8_ac_compensation(IntraX8Context * const w, int const direction, int const dc_level){ | ||||
| MpegEncContext * const s= w->s; | MpegEncContext * const s= w->s; | ||||
| int t; | int t; | ||||
| #define B(x,y) s->block[0][s->dsp.idct_permutation[(x)+(y)*8]] | |||||
| #define B(x, y) s->block[0][s->idsp.idct_permutation[(x) + (y) * 8]] | |||||
| #define T(x) ((x) * dc_level + 0x8000) >> 16; | #define T(x) ((x) * dc_level + 0x8000) >> 16; | ||||
| switch(direction){ | switch(direction){ | ||||
| case 0: | case 0: | ||||
| @@ -646,9 +647,9 @@ static int x8_decode_intra_mb(IntraX8Context* const w, const int chroma){ | |||||
| s->current_picture.f->linesize[!!chroma] ); | s->current_picture.f->linesize[!!chroma] ); | ||||
| } | } | ||||
| if(!zeros_only) | if(!zeros_only) | ||||
| s->dsp.idct_add ( s->dest[chroma], | |||||
| s->current_picture.f->linesize[!!chroma], | |||||
| s->block[0] ); | |||||
| s->idsp.idct_add(s->dest[chroma], | |||||
| s->current_picture.f->linesize[!!chroma], | |||||
| s->block[0]); | |||||
| block_placed: | block_placed: | ||||
| @@ -698,9 +699,9 @@ av_cold void ff_intrax8_common_init(IntraX8Context * w, MpegEncContext * const s | |||||
| assert(s->mb_width>0); | assert(s->mb_width>0); | ||||
| w->prediction_table=av_mallocz(s->mb_width*2*2);//two rows, 2 blocks per cannon mb | w->prediction_table=av_mallocz(s->mb_width*2*2);//two rows, 2 blocks per cannon mb | ||||
| ff_init_scantable(s->dsp.idct_permutation, &w->scantable[0], ff_wmv1_scantable[0]); | |||||
| ff_init_scantable(s->dsp.idct_permutation, &w->scantable[1], ff_wmv1_scantable[2]); | |||||
| ff_init_scantable(s->dsp.idct_permutation, &w->scantable[2], ff_wmv1_scantable[3]); | |||||
| ff_init_scantable(s->idsp.idct_permutation, &w->scantable[0], ff_wmv1_scantable[0]); | |||||
| ff_init_scantable(s->idsp.idct_permutation, &w->scantable[1], ff_wmv1_scantable[2]); | |||||
| ff_init_scantable(s->idsp.idct_permutation, &w->scantable[2], ff_wmv1_scantable[3]); | |||||
| ff_intrax8dsp_init(&w->dsp); | ff_intrax8dsp_init(&w->dsp); | ||||
| } | } | ||||
| @@ -35,7 +35,7 @@ | |||||
| #include "libavutil/pixdesc.h" | #include "libavutil/pixdesc.h" | ||||
| #include "avcodec.h" | #include "avcodec.h" | ||||
| #include "dsputil.h" | |||||
| #include "idctdsp.h" | |||||
| #include "internal.h" | #include "internal.h" | ||||
| #include "mjpegenc_common.h" | #include "mjpegenc_common.h" | ||||
| #include "mpegvideo.h" | #include "mpegvideo.h" | ||||
| @@ -43,7 +43,7 @@ | |||||
| #include "mjpegenc.h" | #include "mjpegenc.h" | ||||
| typedef struct LJpegEncContext { | typedef struct LJpegEncContext { | ||||
| DSPContext dsp; | |||||
| IDCTDSPContext idsp; | |||||
| ScanTable scantable; | ScanTable scantable; | ||||
| uint16_t matrix[64]; | uint16_t matrix[64]; | ||||
| @@ -285,8 +285,9 @@ static av_cold int ljpeg_encode_init(AVCodecContext *avctx) | |||||
| s->scratch = av_malloc_array(avctx->width + 1, sizeof(*s->scratch)); | s->scratch = av_malloc_array(avctx->width + 1, sizeof(*s->scratch)); | ||||
| ff_dsputil_init(&s->dsp, avctx); | |||||
| ff_init_scantable(s->dsp.idct_permutation, &s->scantable, ff_zigzag_direct); | |||||
| ff_idctdsp_init(&s->idsp, avctx); | |||||
| ff_init_scantable(s->idsp.idct_permutation, &s->scantable, | |||||
| ff_zigzag_direct); | |||||
| av_pix_fmt_get_chroma_sub_sample(avctx->pix_fmt, &chroma_h_shift, | av_pix_fmt_get_chroma_sub_sample(avctx->pix_fmt, &chroma_h_shift, | ||||
| &chroma_v_shift); | &chroma_v_shift); | ||||
| @@ -29,6 +29,7 @@ | |||||
| #include "avcodec.h" | #include "avcodec.h" | ||||
| #include "blockdsp.h" | #include "blockdsp.h" | ||||
| #include "idctdsp.h" | |||||
| #include "mpegvideo.h" | #include "mpegvideo.h" | ||||
| #include "mpeg12.h" | #include "mpeg12.h" | ||||
| #include "thread.h" | #include "thread.h" | ||||
| @@ -36,7 +37,7 @@ | |||||
| typedef struct MDECContext { | typedef struct MDECContext { | ||||
| AVCodecContext *avctx; | AVCodecContext *avctx; | ||||
| BlockDSPContext bdsp; | BlockDSPContext bdsp; | ||||
| DSPContext dsp; | |||||
| IDCTDSPContext idsp; | |||||
| ThreadFrame frame; | ThreadFrame frame; | ||||
| GetBitContext gb; | GetBitContext gb; | ||||
| ScanTable scantable; | ScanTable scantable; | ||||
| @@ -146,14 +147,14 @@ static inline void idct_put(MDECContext *a, AVFrame *frame, int mb_x, int mb_y) | |||||
| uint8_t *dest_cb = frame->data[1] + (mb_y * 8 * frame->linesize[1]) + mb_x * 8; | uint8_t *dest_cb = frame->data[1] + (mb_y * 8 * frame->linesize[1]) + mb_x * 8; | ||||
| uint8_t *dest_cr = frame->data[2] + (mb_y * 8 * frame->linesize[2]) + mb_x * 8; | uint8_t *dest_cr = frame->data[2] + (mb_y * 8 * frame->linesize[2]) + mb_x * 8; | ||||
| a->dsp.idct_put(dest_y, linesize, block[0]); | |||||
| a->dsp.idct_put(dest_y + 8, linesize, block[1]); | |||||
| a->dsp.idct_put(dest_y + 8 * linesize, linesize, block[2]); | |||||
| a->dsp.idct_put(dest_y + 8 * linesize + 8, linesize, block[3]); | |||||
| a->idsp.idct_put(dest_y, linesize, block[0]); | |||||
| a->idsp.idct_put(dest_y + 8, linesize, block[1]); | |||||
| a->idsp.idct_put(dest_y + 8 * linesize, linesize, block[2]); | |||||
| a->idsp.idct_put(dest_y + 8 * linesize + 8, linesize, block[3]); | |||||
| if (!(a->avctx->flags & CODEC_FLAG_GRAY)) { | if (!(a->avctx->flags & CODEC_FLAG_GRAY)) { | ||||
| a->dsp.idct_put(dest_cb, frame->linesize[1], block[4]); | |||||
| a->dsp.idct_put(dest_cr, frame->linesize[2], block[5]); | |||||
| a->idsp.idct_put(dest_cb, frame->linesize[1], block[4]); | |||||
| a->idsp.idct_put(dest_cr, frame->linesize[2], block[5]); | |||||
| } | } | ||||
| } | } | ||||
| @@ -215,9 +216,10 @@ static av_cold int decode_init(AVCodecContext *avctx) | |||||
| a->avctx = avctx; | a->avctx = avctx; | ||||
| ff_blockdsp_init(&a->bdsp, avctx); | ff_blockdsp_init(&a->bdsp, avctx); | ||||
| ff_dsputil_init(&a->dsp, avctx); | |||||
| ff_idctdsp_init(&a->idsp, avctx); | |||||
| ff_mpeg12_init_vlcs(); | ff_mpeg12_init_vlcs(); | ||||
| ff_init_scantable(a->dsp.idct_permutation, &a->scantable, ff_zigzag_direct); | |||||
| ff_init_scantable(a->idsp.idct_permutation, &a->scantable, | |||||
| ff_zigzag_direct); | |||||
| if (avctx->idct_algo == FF_IDCT_AUTO) | if (avctx->idct_algo == FF_IDCT_AUTO) | ||||
| avctx->idct_algo = FF_IDCT_SIMPLE; | avctx->idct_algo = FF_IDCT_SIMPLE; | ||||
| @@ -29,8 +29,8 @@ | |||||
| #include "get_bits.h" | #include "get_bits.h" | ||||
| #include "bytestream.h" | #include "bytestream.h" | ||||
| #include "bswapdsp.h" | #include "bswapdsp.h" | ||||
| #include "dsputil.h" | |||||
| #include "hpeldsp.h" | #include "hpeldsp.h" | ||||
| #include "idctdsp.h" | |||||
| #include "thread.h" | #include "thread.h" | ||||
| #define MIMIC_HEADER_SIZE 20 | #define MIMIC_HEADER_SIZE 20 | ||||
| @@ -56,8 +56,8 @@ typedef struct { | |||||
| ScanTable scantable; | ScanTable scantable; | ||||
| BlockDSPContext bdsp; | BlockDSPContext bdsp; | ||||
| BswapDSPContext bbdsp; | BswapDSPContext bbdsp; | ||||
| DSPContext dsp; | |||||
| HpelDSPContext hdsp; | HpelDSPContext hdsp; | ||||
| IDCTDSPContext idsp; | |||||
| VLC vlc; | VLC vlc; | ||||
| /* Kept in the context so multithreading can have a constant to read from */ | /* Kept in the context so multithreading can have a constant to read from */ | ||||
| @@ -151,9 +151,9 @@ static av_cold int mimic_decode_init(AVCodecContext *avctx) | |||||
| } | } | ||||
| ff_blockdsp_init(&ctx->bdsp, avctx); | ff_blockdsp_init(&ctx->bdsp, avctx); | ||||
| ff_bswapdsp_init(&ctx->bbdsp); | ff_bswapdsp_init(&ctx->bbdsp); | ||||
| ff_dsputil_init(&ctx->dsp, avctx); | |||||
| ff_hpeldsp_init(&ctx->hdsp, avctx->flags); | ff_hpeldsp_init(&ctx->hdsp, avctx->flags); | ||||
| ff_init_scantable(ctx->dsp.idct_permutation, &ctx->scantable, col_zag); | |||||
| ff_idctdsp_init(&ctx->idsp, avctx); | |||||
| ff_init_scantable(ctx->idsp.idct_permutation, &ctx->scantable, col_zag); | |||||
| for (i = 0; i < FF_ARRAY_ELEMS(ctx->frames); i++) { | for (i = 0; i < FF_ARRAY_ELEMS(ctx->frames); i++) { | ||||
| ctx->frames[i].f = av_frame_alloc(); | ctx->frames[i].f = av_frame_alloc(); | ||||
| @@ -302,7 +302,7 @@ static int decode(MimicContext *ctx, int quality, int num_coeffs, | |||||
| "block.\n"); | "block.\n"); | ||||
| return ret; | return ret; | ||||
| } | } | ||||
| ctx->dsp.idct_put(dst, stride, ctx->dct_block); | |||||
| ctx->idsp.idct_put(dst, stride, ctx->dct_block); | |||||
| } else { | } else { | ||||
| unsigned int backref = get_bits(&ctx->gb, 4); | unsigned int backref = get_bits(&ctx->gb, 4); | ||||
| int index = (ctx->cur_index + backref) & 15; | int index = (ctx->cur_index + backref) & 15; | ||||
| @@ -36,6 +36,7 @@ | |||||
| #include "libavutil/opt.h" | #include "libavutil/opt.h" | ||||
| #include "avcodec.h" | #include "avcodec.h" | ||||
| #include "blockdsp.h" | #include "blockdsp.h" | ||||
| #include "idctdsp.h" | |||||
| #include "internal.h" | #include "internal.h" | ||||
| #include "mjpeg.h" | #include "mjpeg.h" | ||||
| #include "mjpegdec.h" | #include "mjpegdec.h" | ||||
| @@ -95,8 +96,9 @@ av_cold int ff_mjpeg_decode_init(AVCodecContext *avctx) | |||||
| s->avctx = avctx; | s->avctx = avctx; | ||||
| ff_blockdsp_init(&s->bdsp, avctx); | ff_blockdsp_init(&s->bdsp, avctx); | ||||
| ff_hpeldsp_init(&s->hdsp, avctx->flags); | ff_hpeldsp_init(&s->hdsp, avctx->flags); | ||||
| ff_dsputil_init(&s->dsp, avctx); | |||||
| ff_init_scantable(s->dsp.idct_permutation, &s->scantable, ff_zigzag_direct); | |||||
| ff_idctdsp_init(&s->idsp, avctx); | |||||
| ff_init_scantable(s->idsp.idct_permutation, &s->scantable, | |||||
| ff_zigzag_direct); | |||||
| s->buffer_size = 0; | s->buffer_size = 0; | ||||
| s->buffer = NULL; | s->buffer = NULL; | ||||
| s->start_code = -1; | s->start_code = -1; | ||||
| @@ -889,7 +891,7 @@ static int mjpeg_decode_scan(MJpegDecodeContext *s, int nb_components, int Ah, | |||||
| "error y=%d x=%d\n", mb_y, mb_x); | "error y=%d x=%d\n", mb_y, mb_x); | ||||
| return AVERROR_INVALIDDATA; | return AVERROR_INVALIDDATA; | ||||
| } | } | ||||
| s->dsp.idct_put(ptr, linesize[c], s->block); | |||||
| s->idsp.idct_put(ptr, linesize[c], s->block); | |||||
| } | } | ||||
| } else { | } else { | ||||
| int block_idx = s->block_stride[c] * (v * mb_y + y) + | int block_idx = s->block_stride[c] * (v * mb_y + y) + | ||||
| @@ -1002,7 +1004,7 @@ static int mjpeg_decode_scan_progressive_ac(MJpegDecodeContext *s, int ss, | |||||
| reference_data + block_offset, | reference_data + block_offset, | ||||
| linesize, 8); | linesize, 8); | ||||
| } else { | } else { | ||||
| s->dsp.idct_put(ptr, linesize, *block); | |||||
| s->idsp.idct_put(ptr, linesize, *block); | |||||
| ptr += 8; | ptr += 8; | ||||
| } | } | ||||
| } | } | ||||
| @@ -35,8 +35,8 @@ | |||||
| #include "avcodec.h" | #include "avcodec.h" | ||||
| #include "blockdsp.h" | #include "blockdsp.h" | ||||
| #include "get_bits.h" | #include "get_bits.h" | ||||
| #include "dsputil.h" | |||||
| #include "hpeldsp.h" | #include "hpeldsp.h" | ||||
| #include "idctdsp.h" | |||||
| #define MAX_COMPONENTS 4 | #define MAX_COMPONENTS 4 | ||||
| @@ -97,8 +97,8 @@ typedef struct MJpegDecodeContext { | |||||
| uint64_t coefs_finished[MAX_COMPONENTS]; ///< bitmask of which coefs have been completely decoded (progressive mode) | uint64_t coefs_finished[MAX_COMPONENTS]; ///< bitmask of which coefs have been completely decoded (progressive mode) | ||||
| ScanTable scantable; | ScanTable scantable; | ||||
| BlockDSPContext bdsp; | BlockDSPContext bdsp; | ||||
| DSPContext dsp; | |||||
| HpelDSPContext hdsp; | HpelDSPContext hdsp; | ||||
| IDCTDSPContext idsp; | |||||
| int restart_interval; | int restart_interval; | ||||
| int restart_count; | int restart_count; | ||||
| @@ -26,7 +26,7 @@ | |||||
| #include "libavutil/pixfmt.h" | #include "libavutil/pixfmt.h" | ||||
| #include "avcodec.h" | #include "avcodec.h" | ||||
| #include "dsputil.h" | |||||
| #include "idctdsp.h" | |||||
| #include "put_bits.h" | #include "put_bits.h" | ||||
| #include "mjpegenc_common.h" | #include "mjpegenc_common.h" | ||||
| #include "mjpeg.h" | #include "mjpeg.h" | ||||
| @@ -24,7 +24,7 @@ | |||||
| #include <stdint.h> | #include <stdint.h> | ||||
| #include "avcodec.h" | #include "avcodec.h" | ||||
| #include "dsputil.h" | |||||
| #include "idctdsp.h" | |||||
| #include "put_bits.h" | #include "put_bits.h" | ||||
| void ff_mjpeg_encode_picture_header(AVCodecContext *avctx, PutBitContext *pb, | void ff_mjpeg_encode_picture_header(AVCodecContext *avctx, PutBitContext *pb, | ||||
| @@ -33,8 +33,8 @@ | |||||
| #include "avcodec.h" | #include "avcodec.h" | ||||
| #include "bytestream.h" | #include "bytestream.h" | ||||
| #include "dsputil.h" | |||||
| #include "error_resilience.h" | #include "error_resilience.h" | ||||
| #include "idctdsp.h" | |||||
| #include "internal.h" | #include "internal.h" | ||||
| #include "mpeg_er.h" | #include "mpeg_er.h" | ||||
| #include "mpeg12.h" | #include "mpeg12.h" | ||||
| @@ -1100,7 +1100,7 @@ static av_cold int mpeg_decode_init(AVCodecContext *avctx) | |||||
| /* we need some permutation to store matrices, | /* we need some permutation to store matrices, | ||||
| * until MPV_common_init() sets the real permutation. */ | * until MPV_common_init() sets the real permutation. */ | ||||
| for (i = 0; i < 64; i++) | for (i = 0; i < 64; i++) | ||||
| s2->dsp.idct_permutation[i] = i; | |||||
| s2->idsp.idct_permutation[i] = i; | |||||
| ff_MPV_decode_defaults(s2); | ff_MPV_decode_defaults(s2); | ||||
| @@ -1309,15 +1309,15 @@ static int mpeg_decode_postinit(AVCodecContext *avctx) | |||||
| /* Quantization matrices may need reordering | /* Quantization matrices may need reordering | ||||
| * if DCT permutation is changed. */ | * if DCT permutation is changed. */ | ||||
| memcpy(old_permutation, s->dsp.idct_permutation, 64 * sizeof(uint8_t)); | |||||
| memcpy(old_permutation, s->idsp.idct_permutation, 64 * sizeof(uint8_t)); | |||||
| if (ff_MPV_common_init(s) < 0) | if (ff_MPV_common_init(s) < 0) | ||||
| return -2; | return -2; | ||||
| quant_matrix_rebuild(s->intra_matrix, old_permutation, s->dsp.idct_permutation); | |||||
| quant_matrix_rebuild(s->inter_matrix, old_permutation, s->dsp.idct_permutation); | |||||
| quant_matrix_rebuild(s->chroma_intra_matrix, old_permutation, s->dsp.idct_permutation); | |||||
| quant_matrix_rebuild(s->chroma_inter_matrix, old_permutation, s->dsp.idct_permutation); | |||||
| quant_matrix_rebuild(s->intra_matrix, old_permutation, s->idsp.idct_permutation); | |||||
| quant_matrix_rebuild(s->inter_matrix, old_permutation, s->idsp.idct_permutation); | |||||
| quant_matrix_rebuild(s->chroma_intra_matrix, old_permutation, s->idsp.idct_permutation); | |||||
| quant_matrix_rebuild(s->chroma_inter_matrix, old_permutation, s->idsp.idct_permutation); | |||||
| s1->mpeg_enc_ctx_allocated = 1; | s1->mpeg_enc_ctx_allocated = 1; | ||||
| } | } | ||||
| @@ -1469,7 +1469,7 @@ static int load_matrix(MpegEncContext *s, uint16_t matrix0[64], | |||||
| int i; | int i; | ||||
| for (i = 0; i < 64; i++) { | for (i = 0; i < 64; i++) { | ||||
| int j = s->dsp.idct_permutation[ff_zigzag_direct[i]]; | |||||
| int j = s->idsp.idct_permutation[ff_zigzag_direct[i]]; | |||||
| int v = get_bits(&s->gb, 8); | int v = get_bits(&s->gb, 8); | ||||
| if (v == 0) { | if (v == 0) { | ||||
| av_log(s->avctx, AV_LOG_ERROR, "matrix damaged\n"); | av_log(s->avctx, AV_LOG_ERROR, "matrix damaged\n"); | ||||
| @@ -1561,11 +1561,11 @@ static void mpeg_decode_picture_coding_extension(Mpeg1Context *s1) | |||||
| } | } | ||||
| if (s->alternate_scan) { | if (s->alternate_scan) { | ||||
| ff_init_scantable(s->dsp.idct_permutation, &s->inter_scantable, ff_alternate_vertical_scan); | |||||
| ff_init_scantable(s->dsp.idct_permutation, &s->intra_scantable, ff_alternate_vertical_scan); | |||||
| ff_init_scantable(s->idsp.idct_permutation, &s->inter_scantable, ff_alternate_vertical_scan); | |||||
| ff_init_scantable(s->idsp.idct_permutation, &s->intra_scantable, ff_alternate_vertical_scan); | |||||
| } else { | } else { | ||||
| ff_init_scantable(s->dsp.idct_permutation, &s->inter_scantable, ff_zigzag_direct); | |||||
| ff_init_scantable(s->dsp.idct_permutation, &s->intra_scantable, ff_zigzag_direct); | |||||
| ff_init_scantable(s->idsp.idct_permutation, &s->inter_scantable, ff_zigzag_direct); | |||||
| ff_init_scantable(s->idsp.idct_permutation, &s->intra_scantable, ff_zigzag_direct); | |||||
| } | } | ||||
| /* composite display not parsed */ | /* composite display not parsed */ | ||||
| @@ -2070,7 +2070,7 @@ static int mpeg1_decode_sequence(AVCodecContext *avctx, | |||||
| load_matrix(s, s->chroma_intra_matrix, s->intra_matrix, 1); | load_matrix(s, s->chroma_intra_matrix, s->intra_matrix, 1); | ||||
| } else { | } else { | ||||
| for (i = 0; i < 64; i++) { | for (i = 0; i < 64; i++) { | ||||
| j = s->dsp.idct_permutation[i]; | |||||
| j = s->idsp.idct_permutation[i]; | |||||
| v = ff_mpeg1_default_intra_matrix[i]; | v = ff_mpeg1_default_intra_matrix[i]; | ||||
| s->intra_matrix[j] = v; | s->intra_matrix[j] = v; | ||||
| s->chroma_intra_matrix[j] = v; | s->chroma_intra_matrix[j] = v; | ||||
| @@ -2080,7 +2080,7 @@ static int mpeg1_decode_sequence(AVCodecContext *avctx, | |||||
| load_matrix(s, s->chroma_inter_matrix, s->inter_matrix, 0); | load_matrix(s, s->chroma_inter_matrix, s->inter_matrix, 0); | ||||
| } else { | } else { | ||||
| for (i = 0; i < 64; i++) { | for (i = 0; i < 64; i++) { | ||||
| int j = s->dsp.idct_permutation[i]; | |||||
| int j = s->idsp.idct_permutation[i]; | |||||
| v = ff_mpeg1_default_non_intra_matrix[i]; | v = ff_mpeg1_default_non_intra_matrix[i]; | ||||
| s->inter_matrix[j] = v; | s->inter_matrix[j] = v; | ||||
| s->chroma_inter_matrix[j] = v; | s->chroma_inter_matrix[j] = v; | ||||
| @@ -2142,7 +2142,7 @@ static int vcr2_init_sequence(AVCodecContext *avctx) | |||||
| s1->mpeg_enc_ctx_allocated = 1; | s1->mpeg_enc_ctx_allocated = 1; | ||||
| for (i = 0; i < 64; i++) { | for (i = 0; i < 64; i++) { | ||||
| int j = s->dsp.idct_permutation[i]; | |||||
| int j = s->idsp.idct_permutation[i]; | |||||
| v = ff_mpeg1_default_intra_matrix[i]; | v = ff_mpeg1_default_intra_matrix[i]; | ||||
| s->intra_matrix[j] = v; | s->intra_matrix[j] = v; | ||||
| s->chroma_intra_matrix[j] = v; | s->chroma_intra_matrix[j] = v; | ||||
| @@ -21,6 +21,7 @@ | |||||
| */ | */ | ||||
| #include "error_resilience.h" | #include "error_resilience.h" | ||||
| #include "idctdsp.h" | |||||
| #include "internal.h" | #include "internal.h" | ||||
| #include "mpegutils.h" | #include "mpegutils.h" | ||||
| #include "mpegvideo.h" | #include "mpegvideo.h" | ||||
| @@ -71,11 +72,11 @@ void ff_mpeg4_pred_ac(MpegEncContext *s, int16_t *block, int n, int dir) | |||||
| n == 1 || n == 3) { | n == 1 || n == 3) { | ||||
| /* same qscale */ | /* same qscale */ | ||||
| for (i = 1; i < 8; i++) | for (i = 1; i < 8; i++) | ||||
| block[s->dsp.idct_permutation[i << 3]] += ac_val[i]; | |||||
| block[s->idsp.idct_permutation[i << 3]] += ac_val[i]; | |||||
| } else { | } else { | ||||
| /* different qscale, we must rescale */ | /* different qscale, we must rescale */ | ||||
| for (i = 1; i < 8; i++) | for (i = 1; i < 8; i++) | ||||
| block[s->dsp.idct_permutation[i << 3]] += ROUNDED_DIV(ac_val[i] * qscale_table[xy], s->qscale); | |||||
| block[s->idsp.idct_permutation[i << 3]] += ROUNDED_DIV(ac_val[i] * qscale_table[xy], s->qscale); | |||||
| } | } | ||||
| } else { | } else { | ||||
| const int xy = s->mb_x + s->mb_y * s->mb_stride - s->mb_stride; | const int xy = s->mb_x + s->mb_y * s->mb_stride - s->mb_stride; | ||||
| @@ -86,21 +87,21 @@ void ff_mpeg4_pred_ac(MpegEncContext *s, int16_t *block, int n, int dir) | |||||
| n == 2 || n == 3) { | n == 2 || n == 3) { | ||||
| /* same qscale */ | /* same qscale */ | ||||
| for (i = 1; i < 8; i++) | for (i = 1; i < 8; i++) | ||||
| block[s->dsp.idct_permutation[i]] += ac_val[i + 8]; | |||||
| block[s->idsp.idct_permutation[i]] += ac_val[i + 8]; | |||||
| } else { | } else { | ||||
| /* different qscale, we must rescale */ | /* different qscale, we must rescale */ | ||||
| for (i = 1; i < 8; i++) | for (i = 1; i < 8; i++) | ||||
| block[s->dsp.idct_permutation[i]] += ROUNDED_DIV(ac_val[i + 8] * qscale_table[xy], s->qscale); | |||||
| block[s->idsp.idct_permutation[i]] += ROUNDED_DIV(ac_val[i + 8] * qscale_table[xy], s->qscale); | |||||
| } | } | ||||
| } | } | ||||
| } | } | ||||
| /* left copy */ | /* left copy */ | ||||
| for (i = 1; i < 8; i++) | for (i = 1; i < 8; i++) | ||||
| ac_val1[i] = block[s->dsp.idct_permutation[i << 3]]; | |||||
| ac_val1[i] = block[s->idsp.idct_permutation[i << 3]]; | |||||
| /* top copy */ | /* top copy */ | ||||
| for (i = 1; i < 8; i++) | for (i = 1; i < 8; i++) | ||||
| ac_val1[8 + i] = block[s->dsp.idct_permutation[i]]; | |||||
| ac_val1[8 + i] = block[s->idsp.idct_permutation[i]]; | |||||
| } | } | ||||
| /** | /** | ||||
| @@ -1815,7 +1816,7 @@ static int decode_vol_header(Mpeg4DecContext *ctx, GetBitContext *gb) | |||||
| /* load default matrixes */ | /* load default matrixes */ | ||||
| for (i = 0; i < 64; i++) { | for (i = 0; i < 64; i++) { | ||||
| int j = s->dsp.idct_permutation[i]; | |||||
| int j = s->idsp.idct_permutation[i]; | |||||
| v = ff_mpeg4_default_intra_matrix[i]; | v = ff_mpeg4_default_intra_matrix[i]; | ||||
| s->intra_matrix[j] = v; | s->intra_matrix[j] = v; | ||||
| s->chroma_intra_matrix[j] = v; | s->chroma_intra_matrix[j] = v; | ||||
| @@ -1835,14 +1836,14 @@ static int decode_vol_header(Mpeg4DecContext *ctx, GetBitContext *gb) | |||||
| break; | break; | ||||
| last = v; | last = v; | ||||
| j = s->dsp.idct_permutation[ff_zigzag_direct[i]]; | |||||
| j = s->idsp.idct_permutation[ff_zigzag_direct[i]]; | |||||
| s->intra_matrix[j] = last; | s->intra_matrix[j] = last; | ||||
| s->chroma_intra_matrix[j] = last; | s->chroma_intra_matrix[j] = last; | ||||
| } | } | ||||
| /* replicate last value */ | /* replicate last value */ | ||||
| for (; i < 64; i++) { | for (; i < 64; i++) { | ||||
| int j = s->dsp.idct_permutation[ff_zigzag_direct[i]]; | |||||
| int j = s->idsp.idct_permutation[ff_zigzag_direct[i]]; | |||||
| s->intra_matrix[j] = last; | s->intra_matrix[j] = last; | ||||
| s->chroma_intra_matrix[j] = last; | s->chroma_intra_matrix[j] = last; | ||||
| } | } | ||||
| @@ -1858,14 +1859,14 @@ static int decode_vol_header(Mpeg4DecContext *ctx, GetBitContext *gb) | |||||
| break; | break; | ||||
| last = v; | last = v; | ||||
| j = s->dsp.idct_permutation[ff_zigzag_direct[i]]; | |||||
| j = s->idsp.idct_permutation[ff_zigzag_direct[i]]; | |||||
| s->inter_matrix[j] = v; | s->inter_matrix[j] = v; | ||||
| s->chroma_inter_matrix[j] = v; | s->chroma_inter_matrix[j] = v; | ||||
| } | } | ||||
| /* replicate last value */ | /* replicate last value */ | ||||
| for (; i < 64; i++) { | for (; i < 64; i++) { | ||||
| int j = s->dsp.idct_permutation[ff_zigzag_direct[i]]; | |||||
| int j = s->idsp.idct_permutation[ff_zigzag_direct[i]]; | |||||
| s->inter_matrix[j] = last; | s->inter_matrix[j] = last; | ||||
| s->chroma_inter_matrix[j] = last; | s->chroma_inter_matrix[j] = last; | ||||
| } | } | ||||
| @@ -2219,15 +2220,15 @@ static int decode_vop_header(Mpeg4DecContext *ctx, GetBitContext *gb) | |||||
| } | } | ||||
| if (s->alternate_scan) { | if (s->alternate_scan) { | ||||
| ff_init_scantable(s->dsp.idct_permutation, &s->inter_scantable, ff_alternate_vertical_scan); | |||||
| ff_init_scantable(s->dsp.idct_permutation, &s->intra_scantable, ff_alternate_vertical_scan); | |||||
| ff_init_scantable(s->dsp.idct_permutation, &s->intra_h_scantable, ff_alternate_vertical_scan); | |||||
| ff_init_scantable(s->dsp.idct_permutation, &s->intra_v_scantable, ff_alternate_vertical_scan); | |||||
| ff_init_scantable(s->idsp.idct_permutation, &s->inter_scantable, ff_alternate_vertical_scan); | |||||
| ff_init_scantable(s->idsp.idct_permutation, &s->intra_scantable, ff_alternate_vertical_scan); | |||||
| ff_init_scantable(s->idsp.idct_permutation, &s->intra_h_scantable, ff_alternate_vertical_scan); | |||||
| ff_init_scantable(s->idsp.idct_permutation, &s->intra_v_scantable, ff_alternate_vertical_scan); | |||||
| } else { | } else { | ||||
| ff_init_scantable(s->dsp.idct_permutation, &s->inter_scantable, ff_zigzag_direct); | |||||
| ff_init_scantable(s->dsp.idct_permutation, &s->intra_scantable, ff_zigzag_direct); | |||||
| ff_init_scantable(s->dsp.idct_permutation, &s->intra_h_scantable, ff_alternate_horizontal_scan); | |||||
| ff_init_scantable(s->dsp.idct_permutation, &s->intra_v_scantable, ff_alternate_vertical_scan); | |||||
| ff_init_scantable(s->idsp.idct_permutation, &s->inter_scantable, ff_zigzag_direct); | |||||
| ff_init_scantable(s->idsp.idct_permutation, &s->intra_scantable, ff_zigzag_direct); | |||||
| ff_init_scantable(s->idsp.idct_permutation, &s->intra_h_scantable, ff_alternate_horizontal_scan); | |||||
| ff_init_scantable(s->idsp.idct_permutation, &s->intra_v_scantable, ff_alternate_vertical_scan); | |||||
| } | } | ||||
| if (s->pict_type == AV_PICTURE_TYPE_S && | if (s->pict_type == AV_PICTURE_TYPE_S && | ||||
| @@ -110,11 +110,11 @@ static inline void restore_ac_coeffs(MpegEncContext *s, int16_t block[6][64], | |||||
| if (dir[n]) { | if (dir[n]) { | ||||
| /* top prediction */ | /* top prediction */ | ||||
| for (i = 1; i < 8; i++) | for (i = 1; i < 8; i++) | ||||
| block[n][s->dsp.idct_permutation[i]] = ac_val[i + 8]; | |||||
| block[n][s->idsp.idct_permutation[i]] = ac_val[i + 8]; | |||||
| } else { | } else { | ||||
| /* left prediction */ | /* left prediction */ | ||||
| for (i = 1; i < 8; i++) | for (i = 1; i < 8; i++) | ||||
| block[n][s->dsp.idct_permutation[i << 3]] = ac_val[i]; | |||||
| block[n][s->idsp.idct_permutation[i << 3]] = ac_val[i]; | |||||
| } | } | ||||
| } | } | ||||
| } | } | ||||
| @@ -152,17 +152,17 @@ static inline int decide_ac_pred(MpegEncContext *s, int16_t block[6][64], | |||||
| if (s->mb_y == 0 || s->qscale == qscale_table[xy] || n == 2 || n == 3) { | if (s->mb_y == 0 || s->qscale == qscale_table[xy] || n == 2 || n == 3) { | ||||
| /* same qscale */ | /* same qscale */ | ||||
| for (i = 1; i < 8; i++) { | for (i = 1; i < 8; i++) { | ||||
| const int level = block[n][s->dsp.idct_permutation[i]]; | |||||
| block[n][s->dsp.idct_permutation[i]] = level - ac_val[i + 8]; | |||||
| ac_val1[i] = block[n][s->dsp.idct_permutation[i << 3]]; | |||||
| const int level = block[n][s->idsp.idct_permutation[i]]; | |||||
| block[n][s->idsp.idct_permutation[i]] = level - ac_val[i + 8]; | |||||
| ac_val1[i] = block[n][s->idsp.idct_permutation[i << 3]]; | |||||
| ac_val1[i + 8] = level; | ac_val1[i + 8] = level; | ||||
| } | } | ||||
| } else { | } else { | ||||
| /* different qscale, we must rescale */ | /* different qscale, we must rescale */ | ||||
| for (i = 1; i < 8; i++) { | for (i = 1; i < 8; i++) { | ||||
| const int level = block[n][s->dsp.idct_permutation[i]]; | |||||
| block[n][s->dsp.idct_permutation[i]] = level - ROUNDED_DIV(ac_val[i + 8] * qscale_table[xy], s->qscale); | |||||
| ac_val1[i] = block[n][s->dsp.idct_permutation[i << 3]]; | |||||
| const int level = block[n][s->idsp.idct_permutation[i]]; | |||||
| block[n][s->idsp.idct_permutation[i]] = level - ROUNDED_DIV(ac_val[i + 8] * qscale_table[xy], s->qscale); | |||||
| ac_val1[i] = block[n][s->idsp.idct_permutation[i << 3]]; | |||||
| ac_val1[i + 8] = level; | ac_val1[i + 8] = level; | ||||
| } | } | ||||
| } | } | ||||
| @@ -174,18 +174,18 @@ static inline int decide_ac_pred(MpegEncContext *s, int16_t block[6][64], | |||||
| if (s->mb_x == 0 || s->qscale == qscale_table[xy] || n == 1 || n == 3) { | if (s->mb_x == 0 || s->qscale == qscale_table[xy] || n == 1 || n == 3) { | ||||
| /* same qscale */ | /* same qscale */ | ||||
| for (i = 1; i < 8; i++) { | for (i = 1; i < 8; i++) { | ||||
| const int level = block[n][s->dsp.idct_permutation[i << 3]]; | |||||
| block[n][s->dsp.idct_permutation[i << 3]] = level - ac_val[i]; | |||||
| const int level = block[n][s->idsp.idct_permutation[i << 3]]; | |||||
| block[n][s->idsp.idct_permutation[i << 3]] = level - ac_val[i]; | |||||
| ac_val1[i] = level; | ac_val1[i] = level; | ||||
| ac_val1[i + 8] = block[n][s->dsp.idct_permutation[i]]; | |||||
| ac_val1[i + 8] = block[n][s->idsp.idct_permutation[i]]; | |||||
| } | } | ||||
| } else { | } else { | ||||
| /* different qscale, we must rescale */ | /* different qscale, we must rescale */ | ||||
| for (i = 1; i < 8; i++) { | for (i = 1; i < 8; i++) { | ||||
| const int level = block[n][s->dsp.idct_permutation[i << 3]]; | |||||
| block[n][s->dsp.idct_permutation[i << 3]] = level - ROUNDED_DIV(ac_val[i] * qscale_table[xy], s->qscale); | |||||
| const int level = block[n][s->idsp.idct_permutation[i << 3]]; | |||||
| block[n][s->idsp.idct_permutation[i << 3]] = level - ROUNDED_DIV(ac_val[i] * qscale_table[xy], s->qscale); | |||||
| ac_val1[i] = level; | ac_val1[i] = level; | ||||
| ac_val1[i + 8] = block[n][s->dsp.idct_permutation[i]]; | |||||
| ac_val1[i + 8] = block[n][s->idsp.idct_permutation[i]]; | |||||
| } | } | ||||
| } | } | ||||
| st[n] = s->intra_v_scantable.permutated; | st[n] = s->intra_v_scantable.permutated; | ||||
| @@ -34,7 +34,7 @@ | |||||
| #include "libavutil/timer.h" | #include "libavutil/timer.h" | ||||
| #include "avcodec.h" | #include "avcodec.h" | ||||
| #include "blockdsp.h" | #include "blockdsp.h" | ||||
| #include "dsputil.h" | |||||
| #include "idctdsp.h" | |||||
| #include "internal.h" | #include "internal.h" | ||||
| #include "mathops.h" | #include "mathops.h" | ||||
| #include "mpegutils.h" | #include "mpegutils.h" | ||||
| @@ -380,6 +380,7 @@ av_cold int ff_dct_common_init(MpegEncContext *s) | |||||
| ff_blockdsp_init(&s->bdsp, s->avctx); | ff_blockdsp_init(&s->bdsp, s->avctx); | ||||
| ff_dsputil_init(&s->dsp, s->avctx); | ff_dsputil_init(&s->dsp, s->avctx); | ||||
| ff_hpeldsp_init(&s->hdsp, s->avctx->flags); | ff_hpeldsp_init(&s->hdsp, s->avctx->flags); | ||||
| ff_idctdsp_init(&s->idsp, s->avctx); | |||||
| ff_mpegvideodsp_init(&s->mdsp); | ff_mpegvideodsp_init(&s->mdsp); | ||||
| ff_videodsp_init(&s->vdsp, s->avctx->bits_per_raw_sample); | ff_videodsp_init(&s->vdsp, s->avctx->bits_per_raw_sample); | ||||
| @@ -403,14 +404,14 @@ av_cold int ff_dct_common_init(MpegEncContext *s) | |||||
| * note: only wmv uses different ones | * note: only wmv uses different ones | ||||
| */ | */ | ||||
| if (s->alternate_scan) { | if (s->alternate_scan) { | ||||
| ff_init_scantable(s->dsp.idct_permutation, &s->inter_scantable , ff_alternate_vertical_scan); | |||||
| ff_init_scantable(s->dsp.idct_permutation, &s->intra_scantable , ff_alternate_vertical_scan); | |||||
| ff_init_scantable(s->idsp.idct_permutation, &s->inter_scantable, ff_alternate_vertical_scan); | |||||
| ff_init_scantable(s->idsp.idct_permutation, &s->intra_scantable, ff_alternate_vertical_scan); | |||||
| } else { | } else { | ||||
| ff_init_scantable(s->dsp.idct_permutation, &s->inter_scantable , ff_zigzag_direct); | |||||
| ff_init_scantable(s->dsp.idct_permutation, &s->intra_scantable , ff_zigzag_direct); | |||||
| ff_init_scantable(s->idsp.idct_permutation, &s->inter_scantable, ff_zigzag_direct); | |||||
| ff_init_scantable(s->idsp.idct_permutation, &s->intra_scantable, ff_zigzag_direct); | |||||
| } | } | ||||
| ff_init_scantable(s->dsp.idct_permutation, &s->intra_h_scantable, ff_alternate_horizontal_scan); | |||||
| ff_init_scantable(s->dsp.idct_permutation, &s->intra_v_scantable, ff_alternate_vertical_scan); | |||||
| ff_init_scantable(s->idsp.idct_permutation, &s->intra_h_scantable, ff_alternate_horizontal_scan); | |||||
| ff_init_scantable(s->idsp.idct_permutation, &s->intra_v_scantable, ff_alternate_vertical_scan); | |||||
| return 0; | return 0; | ||||
| } | } | ||||
| @@ -2041,7 +2042,7 @@ static inline void put_dct(MpegEncContext *s, | |||||
| int16_t *block, int i, uint8_t *dest, int line_size, int qscale) | int16_t *block, int i, uint8_t *dest, int line_size, int qscale) | ||||
| { | { | ||||
| s->dct_unquantize_intra(s, block, i, qscale); | s->dct_unquantize_intra(s, block, i, qscale); | ||||
| s->dsp.idct_put (dest, line_size, block); | |||||
| s->idsp.idct_put(dest, line_size, block); | |||||
| } | } | ||||
| /* add block[] to dest[] */ | /* add block[] to dest[] */ | ||||
| @@ -2049,7 +2050,7 @@ static inline void add_dct(MpegEncContext *s, | |||||
| int16_t *block, int i, uint8_t *dest, int line_size) | int16_t *block, int i, uint8_t *dest, int line_size) | ||||
| { | { | ||||
| if (s->block_last_index[i] >= 0) { | if (s->block_last_index[i] >= 0) { | ||||
| s->dsp.idct_add (dest, line_size, block); | |||||
| s->idsp.idct_add(dest, line_size, block); | |||||
| } | } | ||||
| } | } | ||||
| @@ -2059,7 +2060,7 @@ static inline void add_dequant_dct(MpegEncContext *s, | |||||
| if (s->block_last_index[i] >= 0) { | if (s->block_last_index[i] >= 0) { | ||||
| s->dct_unquantize_inter(s, block, i, qscale); | s->dct_unquantize_inter(s, block, i, qscale); | ||||
| s->dsp.idct_add (dest, line_size, block); | |||||
| s->idsp.idct_add(dest, line_size, block); | |||||
| } | } | ||||
| } | } | ||||
| @@ -2127,7 +2128,8 @@ FF_ENABLE_DEPRECATION_WARNINGS | |||||
| av_log(s->avctx, AV_LOG_DEBUG, "DCT coeffs of MB at %dx%d:\n", s->mb_x, s->mb_y); | av_log(s->avctx, AV_LOG_DEBUG, "DCT coeffs of MB at %dx%d:\n", s->mb_x, s->mb_y); | ||||
| for(i=0; i<6; i++){ | for(i=0; i<6; i++){ | ||||
| for(j=0; j<64; j++){ | for(j=0; j<64; j++){ | ||||
| av_log(s->avctx, AV_LOG_DEBUG, "%5d", block[i][s->dsp.idct_permutation[j]]); | |||||
| av_log(s->avctx, AV_LOG_DEBUG, "%5d", | |||||
| block[i][s->idsp.idct_permutation[j]]); | |||||
| } | } | ||||
| av_log(s->avctx, AV_LOG_DEBUG, "\n"); | av_log(s->avctx, AV_LOG_DEBUG, "\n"); | ||||
| } | } | ||||
| @@ -2304,29 +2306,29 @@ FF_ENABLE_DEPRECATION_WARNINGS | |||||
| } | } | ||||
| } | } | ||||
| }else{ | }else{ | ||||
| s->dsp.idct_put(dest_y , dct_linesize, block[0]); | |||||
| s->dsp.idct_put(dest_y + block_size, dct_linesize, block[1]); | |||||
| s->dsp.idct_put(dest_y + dct_offset , dct_linesize, block[2]); | |||||
| s->dsp.idct_put(dest_y + dct_offset + block_size, dct_linesize, block[3]); | |||||
| s->idsp.idct_put(dest_y, dct_linesize, block[0]); | |||||
| s->idsp.idct_put(dest_y + block_size, dct_linesize, block[1]); | |||||
| s->idsp.idct_put(dest_y + dct_offset, dct_linesize, block[2]); | |||||
| s->idsp.idct_put(dest_y + dct_offset + block_size, dct_linesize, block[3]); | |||||
| if(!CONFIG_GRAY || !(s->flags&CODEC_FLAG_GRAY)){ | if(!CONFIG_GRAY || !(s->flags&CODEC_FLAG_GRAY)){ | ||||
| if(s->chroma_y_shift){ | if(s->chroma_y_shift){ | ||||
| s->dsp.idct_put(dest_cb, uvlinesize, block[4]); | |||||
| s->dsp.idct_put(dest_cr, uvlinesize, block[5]); | |||||
| s->idsp.idct_put(dest_cb, uvlinesize, block[4]); | |||||
| s->idsp.idct_put(dest_cr, uvlinesize, block[5]); | |||||
| }else{ | }else{ | ||||
| dct_linesize = uvlinesize << s->interlaced_dct; | dct_linesize = uvlinesize << s->interlaced_dct; | ||||
| dct_offset = s->interlaced_dct ? uvlinesize : uvlinesize * 8; | dct_offset = s->interlaced_dct ? uvlinesize : uvlinesize * 8; | ||||
| s->dsp.idct_put(dest_cb, dct_linesize, block[4]); | |||||
| s->dsp.idct_put(dest_cr, dct_linesize, block[5]); | |||||
| s->dsp.idct_put(dest_cb + dct_offset, dct_linesize, block[6]); | |||||
| s->dsp.idct_put(dest_cr + dct_offset, dct_linesize, block[7]); | |||||
| s->idsp.idct_put(dest_cb, dct_linesize, block[4]); | |||||
| s->idsp.idct_put(dest_cr, dct_linesize, block[5]); | |||||
| s->idsp.idct_put(dest_cb + dct_offset, dct_linesize, block[6]); | |||||
| s->idsp.idct_put(dest_cr + dct_offset, dct_linesize, block[7]); | |||||
| if(!s->chroma_x_shift){//Chroma444 | if(!s->chroma_x_shift){//Chroma444 | ||||
| s->dsp.idct_put(dest_cb + 8, dct_linesize, block[8]); | |||||
| s->dsp.idct_put(dest_cr + 8, dct_linesize, block[9]); | |||||
| s->dsp.idct_put(dest_cb + 8 + dct_offset, dct_linesize, block[10]); | |||||
| s->dsp.idct_put(dest_cr + 8 + dct_offset, dct_linesize, block[11]); | |||||
| s->idsp.idct_put(dest_cb + 8, dct_linesize, block[8]); | |||||
| s->idsp.idct_put(dest_cr + 8, dct_linesize, block[9]); | |||||
| s->idsp.idct_put(dest_cb + 8 + dct_offset, dct_linesize, block[10]); | |||||
| s->idsp.idct_put(dest_cr + 8 + dct_offset, dct_linesize, block[11]); | |||||
| } | } | ||||
| } | } | ||||
| }//gray | }//gray | ||||
| @@ -35,6 +35,7 @@ | |||||
| #include "get_bits.h" | #include "get_bits.h" | ||||
| #include "h263dsp.h" | #include "h263dsp.h" | ||||
| #include "hpeldsp.h" | #include "hpeldsp.h" | ||||
| #include "idctdsp.h" | |||||
| #include "mpegvideodsp.h" | #include "mpegvideodsp.h" | ||||
| #include "put_bits.h" | #include "put_bits.h" | ||||
| #include "ratecontrol.h" | #include "ratecontrol.h" | ||||
| @@ -352,6 +353,7 @@ typedef struct MpegEncContext { | |||||
| BlockDSPContext bdsp; | BlockDSPContext bdsp; | ||||
| DSPContext dsp; ///< pointers for accelerated dsp functions | DSPContext dsp; ///< pointers for accelerated dsp functions | ||||
| HpelDSPContext hdsp; | HpelDSPContext hdsp; | ||||
| IDCTDSPContext idsp; | |||||
| MpegVideoDSPContext mdsp; | MpegVideoDSPContext mdsp; | ||||
| QpelDSPContext qdsp; | QpelDSPContext qdsp; | ||||
| VideoDSPContext vdsp; | VideoDSPContext vdsp; | ||||
| @@ -38,6 +38,7 @@ | |||||
| #include "avcodec.h" | #include "avcodec.h" | ||||
| #include "dct.h" | #include "dct.h" | ||||
| #include "dsputil.h" | #include "dsputil.h" | ||||
| #include "idctdsp.h" | |||||
| #include "mpeg12.h" | #include "mpeg12.h" | ||||
| #include "mpegvideo.h" | #include "mpegvideo.h" | ||||
| #include "h261.h" | #include "h261.h" | ||||
| @@ -86,7 +87,7 @@ void ff_convert_matrix(MpegEncContext *s, int (*qmat)[64], | |||||
| dsp->fdct == ff_jpeg_fdct_islow_10 || | dsp->fdct == ff_jpeg_fdct_islow_10 || | ||||
| dsp->fdct == ff_faandct) { | dsp->fdct == ff_faandct) { | ||||
| for (i = 0; i < 64; i++) { | for (i = 0; i < 64; i++) { | ||||
| const int j = dsp->idct_permutation[i]; | |||||
| const int j = s->idsp.idct_permutation[i]; | |||||
| /* 16 <= qscale * quant_matrix[i] <= 7905 | /* 16 <= qscale * quant_matrix[i] <= 7905 | ||||
| * Assume x = ff_aanscales[i] * qscale * quant_matrix[i] | * Assume x = ff_aanscales[i] * qscale * quant_matrix[i] | ||||
| * 19952 <= x <= 249205026 | * 19952 <= x <= 249205026 | ||||
| @@ -98,7 +99,7 @@ void ff_convert_matrix(MpegEncContext *s, int (*qmat)[64], | |||||
| } | } | ||||
| } else if (dsp->fdct == ff_fdct_ifast) { | } else if (dsp->fdct == ff_fdct_ifast) { | ||||
| for (i = 0; i < 64; i++) { | for (i = 0; i < 64; i++) { | ||||
| const int j = dsp->idct_permutation[i]; | |||||
| const int j = s->idsp.idct_permutation[i]; | |||||
| /* 16 <= qscale * quant_matrix[i] <= 7905 | /* 16 <= qscale * quant_matrix[i] <= 7905 | ||||
| * Assume x = ff_aanscales[i] * qscale * quant_matrix[i] | * Assume x = ff_aanscales[i] * qscale * quant_matrix[i] | ||||
| * 19952 <= x <= 249205026 | * 19952 <= x <= 249205026 | ||||
| @@ -111,7 +112,7 @@ void ff_convert_matrix(MpegEncContext *s, int (*qmat)[64], | |||||
| } | } | ||||
| } else { | } else { | ||||
| for (i = 0; i < 64; i++) { | for (i = 0; i < 64; i++) { | ||||
| const int j = dsp->idct_permutation[i]; | |||||
| const int j = s->idsp.idct_permutation[i]; | |||||
| /* We can safely suppose that 16 <= quant_matrix[i] <= 255 | /* We can safely suppose that 16 <= quant_matrix[i] <= 255 | ||||
| * Assume x = qscale * quant_matrix[i] | * Assume x = qscale * quant_matrix[i] | ||||
| * So 16 <= x <= 7905 | * So 16 <= x <= 7905 | ||||
| @@ -755,7 +756,7 @@ av_cold int ff_MPV_encode_init(AVCodecContext *avctx) | |||||
| /* init q matrix */ | /* init q matrix */ | ||||
| for (i = 0; i < 64; i++) { | for (i = 0; i < 64; i++) { | ||||
| int j = s->dsp.idct_permutation[i]; | |||||
| int j = s->idsp.idct_permutation[i]; | |||||
| if (CONFIG_MPEG4_ENCODER && s->codec_id == AV_CODEC_ID_MPEG4 && | if (CONFIG_MPEG4_ENCODER && s->codec_id == AV_CODEC_ID_MPEG4 && | ||||
| s->mpeg_quant) { | s->mpeg_quant) { | ||||
| s->intra_matrix[j] = ff_mpeg4_default_intra_matrix[i]; | s->intra_matrix[j] = ff_mpeg4_default_intra_matrix[i]; | ||||
| @@ -3360,7 +3361,7 @@ static int encode_picture(MpegEncContext *s, int picture_number) | |||||
| if (s->out_format == FMT_MJPEG) { | if (s->out_format == FMT_MJPEG) { | ||||
| /* for mjpeg, we do include qscale in the matrix */ | /* for mjpeg, we do include qscale in the matrix */ | ||||
| for(i=1;i<64;i++){ | for(i=1;i<64;i++){ | ||||
| int j= s->dsp.idct_permutation[i]; | |||||
| int j = s->idsp.idct_permutation[i]; | |||||
| s->intra_matrix[j] = av_clip_uint8((ff_mpeg1_default_intra_matrix[i] * s->qscale) >> 3); | s->intra_matrix[j] = av_clip_uint8((ff_mpeg1_default_intra_matrix[i] * s->qscale) >> 3); | ||||
| } | } | ||||
| @@ -3589,7 +3590,7 @@ static int dct_quantize_trellis_c(MpegEncContext *s, | |||||
| if(s->out_format == FMT_H263){ | if(s->out_format == FMT_H263){ | ||||
| unquant_coeff= alevel*qmul + qadd; | unquant_coeff= alevel*qmul + qadd; | ||||
| }else{ //MPEG1 | }else{ //MPEG1 | ||||
| j= s->dsp.idct_permutation[ scantable[i] ]; //FIXME optimize | |||||
| j = s->idsp.idct_permutation[scantable[i]]; // FIXME: optimize | |||||
| if(s->mb_intra){ | if(s->mb_intra){ | ||||
| unquant_coeff = (int)( alevel * qscale * s->intra_matrix[j]) >> 3; | unquant_coeff = (int)( alevel * qscale * s->intra_matrix[j]) >> 3; | ||||
| unquant_coeff = (unquant_coeff - 1) | 1; | unquant_coeff = (unquant_coeff - 1) | 1; | ||||
| @@ -3795,7 +3796,7 @@ static int messed_sign=0; | |||||
| #endif | #endif | ||||
| if(basis[0][0] == 0) | if(basis[0][0] == 0) | ||||
| build_basis(s->dsp.idct_permutation); | |||||
| build_basis(s->idsp.idct_permutation); | |||||
| qmul= qscale*2; | qmul= qscale*2; | ||||
| qadd= (qscale-1)|1; | qadd= (qscale-1)|1; | ||||
| @@ -4214,8 +4215,9 @@ int ff_dct_quantize_c(MpegEncContext *s, | |||||
| *overflow= s->max_qcoeff < max; //overflow might have happened | *overflow= s->max_qcoeff < max; //overflow might have happened | ||||
| /* we need this permutation so that we correct the IDCT, we only permute the !=0 elements */ | /* we need this permutation so that we correct the IDCT, we only permute the !=0 elements */ | ||||
| if (s->dsp.idct_permutation_type != FF_NO_IDCT_PERM) | |||||
| ff_block_permute(block, s->dsp.idct_permutation, scantable, last_non_zero); | |||||
| if (s->idsp.idct_permutation_type != FF_NO_IDCT_PERM) | |||||
| ff_block_permute(block, s->idsp.idct_permutation, | |||||
| scantable, last_non_zero); | |||||
| return last_non_zero; | return last_non_zero; | ||||
| } | } | ||||
| @@ -307,7 +307,7 @@ void ff_xvmc_decode_mb(MpegEncContext *s) | |||||
| if (s->mb_intra && (render->idct || !render->unsigned_intra)) | if (s->mb_intra && (render->idct || !render->unsigned_intra)) | ||||
| *s->pblocks[i][0] -= 1 << 10; | *s->pblocks[i][0] -= 1 << 10; | ||||
| if (!render->idct) { | if (!render->idct) { | ||||
| s->dsp.idct(*s->pblocks[i]); | |||||
| s->idsp.idct(*s->pblocks[i]); | |||||
| /* It is unclear if MC hardware requires pixel diff values to be | /* It is unclear if MC hardware requires pixel diff values to be | ||||
| * in the range [-255;255]. TODO: Clipping if such hardware is | * in the range [-255;255]. TODO: Clipping if such hardware is | ||||
| * ever found. As of now it would only be an unnecessary | * ever found. As of now it would only be an unnecessary | ||||
| @@ -28,7 +28,7 @@ | |||||
| */ | */ | ||||
| #include "avcodec.h" | #include "avcodec.h" | ||||
| #include "dsputil.h" | |||||
| #include "idctdsp.h" | |||||
| #include "mpegvideo.h" | #include "mpegvideo.h" | ||||
| #include "msmpeg4.h" | #include "msmpeg4.h" | ||||
| #include "libavutil/x86/asm.h" | #include "libavutil/x86/asm.h" | ||||
| @@ -136,10 +136,10 @@ av_cold void ff_msmpeg4_common_init(MpegEncContext *s) | |||||
| if(s->msmpeg4_version>=4){ | if(s->msmpeg4_version>=4){ | ||||
| ff_init_scantable(s->dsp.idct_permutation, &s->intra_scantable , ff_wmv1_scantable[1]); | |||||
| ff_init_scantable(s->dsp.idct_permutation, &s->intra_h_scantable, ff_wmv1_scantable[2]); | |||||
| ff_init_scantable(s->dsp.idct_permutation, &s->intra_v_scantable, ff_wmv1_scantable[3]); | |||||
| ff_init_scantable(s->dsp.idct_permutation, &s->inter_scantable , ff_wmv1_scantable[0]); | |||||
| ff_init_scantable(s->idsp.idct_permutation, &s->intra_scantable, ff_wmv1_scantable[1]); | |||||
| ff_init_scantable(s->idsp.idct_permutation, &s->intra_h_scantable, ff_wmv1_scantable[2]); | |||||
| ff_init_scantable(s->idsp.idct_permutation, &s->intra_v_scantable, ff_wmv1_scantable[3]); | |||||
| ff_init_scantable(s->idsp.idct_permutation, &s->inter_scantable, ff_wmv1_scantable[0]); | |||||
| } | } | ||||
| //Note the default tables are set in common_init in mpegvideo.c | //Note the default tables are set in common_init in mpegvideo.c | ||||
| @@ -28,6 +28,7 @@ | |||||
| #include "libavutil/lzo.h" | #include "libavutil/lzo.h" | ||||
| #include "libavutil/imgutils.h" | #include "libavutil/imgutils.h" | ||||
| #include "avcodec.h" | #include "avcodec.h" | ||||
| #include "idctdsp.h" | |||||
| #include "internal.h" | #include "internal.h" | ||||
| #include "rtjpeg.h" | #include "rtjpeg.h" | ||||
| @@ -9,6 +9,7 @@ OBJS-$(CONFIG_H264DSP) += ppc/h264dsp.o | |||||
| OBJS-$(CONFIG_H264QPEL) += ppc/h264qpel.o | OBJS-$(CONFIG_H264QPEL) += ppc/h264qpel.o | ||||
| OBJS-$(CONFIG_HPELDSP) += ppc/hpeldsp_altivec.o | OBJS-$(CONFIG_HPELDSP) += ppc/hpeldsp_altivec.o | ||||
| OBJS-$(CONFIG_HUFFYUVDSP) += ppc/huffyuvdsp_altivec.o | OBJS-$(CONFIG_HUFFYUVDSP) += ppc/huffyuvdsp_altivec.o | ||||
| OBJS-$(CONFIG_IDCTDSP) += ppc/idctdsp.o | |||||
| OBJS-$(CONFIG_MPEGAUDIODSP) += ppc/mpegaudiodsp_altivec.o | OBJS-$(CONFIG_MPEGAUDIODSP) += ppc/mpegaudiodsp_altivec.o | ||||
| OBJS-$(CONFIG_MPEGVIDEO) += ppc/mpegvideo_altivec.o \ | OBJS-$(CONFIG_MPEGVIDEO) += ppc/mpegvideo_altivec.o \ | ||||
| ppc/mpegvideodsp.o | ppc/mpegvideodsp.o | ||||
| @@ -24,7 +25,6 @@ OBJS-$(CONFIG_VP8_DECODER) += ppc/vp8dsp_altivec.o | |||||
| ALTIVEC-OBJS-$(CONFIG_DSPUTIL) += ppc/dsputil_altivec.o \ | ALTIVEC-OBJS-$(CONFIG_DSPUTIL) += ppc/dsputil_altivec.o \ | ||||
| ppc/fdct_altivec.o \ | ppc/fdct_altivec.o \ | ||||
| ppc/idct_altivec.o \ | |||||
| FFT-OBJS-$(HAVE_GNU_AS) += ppc/fft_altivec_s.o | FFT-OBJS-$(HAVE_GNU_AS) += ppc/fft_altivec_s.o | ||||
| ALTIVEC-OBJS-$(CONFIG_FFT) += $(FFT-OBJS-yes) | ALTIVEC-OBJS-$(CONFIG_FFT) += $(FFT-OBJS-yes) | ||||
| @@ -29,9 +29,6 @@ | |||||
| void ff_fdct_altivec(int16_t *block); | void ff_fdct_altivec(int16_t *block); | ||||
| void ff_idct_put_altivec(uint8_t *dest, int line_size, int16_t *block); | |||||
| void ff_idct_add_altivec(uint8_t *dest, int line_size, int16_t *block); | |||||
| void ff_dsputil_init_altivec(DSPContext *c, AVCodecContext *avctx, | void ff_dsputil_init_altivec(DSPContext *c, AVCodecContext *avctx, | ||||
| unsigned high_bit_depth); | unsigned high_bit_depth); | ||||
| @@ -42,12 +42,6 @@ av_cold void ff_dsputil_init_ppc(DSPContext *c, AVCodecContext *avctx, | |||||
| c->fdct = ff_fdct_altivec; | c->fdct = ff_fdct_altivec; | ||||
| } | } | ||||
| #endif //CONFIG_ENCODERS | #endif //CONFIG_ENCODERS | ||||
| if ((avctx->idct_algo == FF_IDCT_AUTO) || | |||||
| (avctx->idct_algo == FF_IDCT_ALTIVEC)) { | |||||
| c->idct_put = ff_idct_put_altivec; | |||||
| c->idct_add = ff_idct_add_altivec; | |||||
| c->idct_permutation_type = FF_TRANSPOSE_IDCT_PERM; | |||||
| } | |||||
| } | } | ||||
| } | } | ||||
| } | } | ||||
| @@ -37,8 +37,13 @@ | |||||
| #include <altivec.h> | #include <altivec.h> | ||||
| #endif | #endif | ||||
| #include "libavutil/attributes.h" | |||||
| #include "libavutil/cpu.h" | |||||
| #include "libavutil/ppc/cpu.h" | |||||
| #include "libavutil/ppc/types_altivec.h" | #include "libavutil/ppc/types_altivec.h" | ||||
| #include "dsputil_altivec.h" | |||||
| #include "libavcodec/idctdsp.h" | |||||
| #if HAVE_ALTIVEC | |||||
| #define IDCT_HALF \ | #define IDCT_HALF \ | ||||
| /* 1st stage */ \ | /* 1st stage */ \ | ||||
| @@ -148,7 +153,7 @@ static const vec_s16 constants[5] = { | |||||
| { 19266, 26722, 25172, 22654, 19266, 22654, 25172, 26722 } | { 19266, 26722, 25172, 22654, 19266, 22654, 25172, 26722 } | ||||
| }; | }; | ||||
| void ff_idct_put_altivec(uint8_t *dest, int stride, int16_t *blk) | |||||
| static void idct_put_altivec(uint8_t *dest, int stride, int16_t *blk) | |||||
| { | { | ||||
| vec_s16 *block = (vec_s16 *) blk; | vec_s16 *block = (vec_s16 *) blk; | ||||
| vec_u8 tmp; | vec_u8 tmp; | ||||
| @@ -177,7 +182,7 @@ void ff_idct_put_altivec(uint8_t *dest, int stride, int16_t *blk) | |||||
| COPY(dest, vx7); | COPY(dest, vx7); | ||||
| } | } | ||||
| void ff_idct_add_altivec(uint8_t *dest, int stride, int16_t *blk) | |||||
| static void idct_add_altivec(uint8_t *dest, int stride, int16_t *blk) | |||||
| { | { | ||||
| vec_s16 *block = (vec_s16 *) blk; | vec_s16 *block = (vec_s16 *) blk; | ||||
| vec_u8 tmp; | vec_u8 tmp; | ||||
| @@ -219,3 +224,22 @@ void ff_idct_add_altivec(uint8_t *dest, int stride, int16_t *blk) | |||||
| dest += stride; | dest += stride; | ||||
| ADD(dest, vx7, perm1); | ADD(dest, vx7, perm1); | ||||
| } | } | ||||
| #endif /* HAVE_ALTIVEC */ | |||||
| av_cold void ff_idctdsp_init_ppc(IDCTDSPContext *c, AVCodecContext *avctx, | |||||
| unsigned high_bit_depth) | |||||
| { | |||||
| #if HAVE_ALTIVEC | |||||
| if (PPC_ALTIVEC(av_get_cpu_flags())) { | |||||
| if (!high_bit_depth) { | |||||
| if ((avctx->idct_algo == FF_IDCT_AUTO) || | |||||
| (avctx->idct_algo == FF_IDCT_ALTIVEC)) { | |||||
| c->idct_add = idct_add_altivec; | |||||
| c->idct_put = idct_put_altivec; | |||||
| c->idct_permutation_type = FF_TRANSPOSE_IDCT_PERM; | |||||
| } | |||||
| } | |||||
| } | |||||
| #endif /* HAVE_ALTIVEC */ | |||||
| } | |||||
| @@ -34,7 +34,7 @@ | |||||
| #include "libavutil/intmath.h" | #include "libavutil/intmath.h" | ||||
| #include "avcodec.h" | #include "avcodec.h" | ||||
| #include "dsputil.h" | |||||
| #include "idctdsp.h" | |||||
| #include "internal.h" | #include "internal.h" | ||||
| #include "proresdata.h" | #include "proresdata.h" | ||||
| #include "proresdsp.h" | #include "proresdsp.h" | ||||
| @@ -23,7 +23,7 @@ | |||||
| #include "config.h" | #include "config.h" | ||||
| #include "libavutil/attributes.h" | #include "libavutil/attributes.h" | ||||
| #include "libavutil/common.h" | #include "libavutil/common.h" | ||||
| #include "dsputil.h" | |||||
| #include "idctdsp.h" | |||||
| #include "proresdsp.h" | #include "proresdsp.h" | ||||
| #include "simple_idct.h" | #include "simple_idct.h" | ||||
| @@ -121,7 +121,7 @@ int ff_rtjpeg_decode_frame_yuv420(RTJpegContext *c, AVFrame *f, | |||||
| if (res < 0) \ | if (res < 0) \ | ||||
| return res; \ | return res; \ | ||||
| if (res > 0) \ | if (res > 0) \ | ||||
| c->dsp.idct_put(dst, stride, block); \ | |||||
| c->idsp.idct_put(dst, stride, block); \ | |||||
| } while (0) | } while (0) | ||||
| int16_t *block = c->block; | int16_t *block = c->block; | ||||
| BLOCK(c->lquant, y1, f->linesize[0]); | BLOCK(c->lquant, y1, f->linesize[0]); | ||||
| @@ -159,7 +159,7 @@ void ff_rtjpeg_decode_init(RTJpegContext *c, int width, int height, | |||||
| const uint32_t *lquant, const uint32_t *cquant) { | const uint32_t *lquant, const uint32_t *cquant) { | ||||
| int i; | int i; | ||||
| for (i = 0; i < 64; i++) { | for (i = 0; i < 64; i++) { | ||||
| int p = c->dsp.idct_permutation[i]; | |||||
| int p = c->idsp.idct_permutation[i]; | |||||
| c->lquant[p] = lquant[i]; | c->lquant[p] = lquant[i]; | ||||
| c->cquant[p] = cquant[i]; | c->cquant[p] = cquant[i]; | ||||
| } | } | ||||
| @@ -171,13 +171,13 @@ void ff_rtjpeg_init(RTJpegContext *c, AVCodecContext *avctx) | |||||
| { | { | ||||
| int i; | int i; | ||||
| ff_dsputil_init(&c->dsp, avctx); | |||||
| ff_idctdsp_init(&c->idsp, avctx); | |||||
| for (i = 0; i < 64; i++) { | for (i = 0; i < 64; i++) { | ||||
| int z = ff_zigzag_direct[i]; | int z = ff_zigzag_direct[i]; | ||||
| z = ((z << 3) | (z >> 3)) & 63; // rtjpeg uses a transposed variant | z = ((z << 3) | (z >> 3)) & 63; // rtjpeg uses a transposed variant | ||||
| // permute the scan and quantization tables for the chosen idct | // permute the scan and quantization tables for the chosen idct | ||||
| c->scan[i] = c->dsp.idct_permutation[z]; | |||||
| c->scan[i] = c->idsp.idct_permutation[z]; | |||||
| } | } | ||||
| } | } | ||||
| @@ -23,15 +23,16 @@ | |||||
| #define AVCODEC_RTJPEG_H | #define AVCODEC_RTJPEG_H | ||||
| #include <stdint.h> | #include <stdint.h> | ||||
| #include "dsputil.h" | |||||
| #include "libavutil/mem.h" | #include "libavutil/mem.h" | ||||
| #include "idctdsp.h" | |||||
| #define RTJPEG_FILE_VERSION 0 | #define RTJPEG_FILE_VERSION 0 | ||||
| #define RTJPEG_HEADER_SIZE 12 | #define RTJPEG_HEADER_SIZE 12 | ||||
| typedef struct RTJpegContext { | typedef struct RTJpegContext { | ||||
| int w, h; | int w, h; | ||||
| DSPContext dsp; | |||||
| IDCTDSPContext idsp; | |||||
| uint8_t scan[64]; | uint8_t scan[64]; | ||||
| uint32_t lquant[64]; | uint32_t lquant[64]; | ||||
| uint32_t cquant[64]; | uint32_t cquant[64]; | ||||
| @@ -109,24 +109,24 @@ static void vc1_put_signed_blocks_clamped(VC1Context *v) | |||||
| fieldtx = v->fieldtx_plane[topleft_mb_pos]; | fieldtx = v->fieldtx_plane[topleft_mb_pos]; | ||||
| stride_y = s->linesize << fieldtx; | stride_y = s->linesize << fieldtx; | ||||
| v_dist = (16 - fieldtx) >> (fieldtx == 0); | v_dist = (16 - fieldtx) >> (fieldtx == 0); | ||||
| s->dsp.put_signed_pixels_clamped(v->block[v->topleft_blk_idx][0], | |||||
| s->dest[0] - 16 * s->linesize - 16, | |||||
| stride_y); | |||||
| s->dsp.put_signed_pixels_clamped(v->block[v->topleft_blk_idx][1], | |||||
| s->dest[0] - 16 * s->linesize - 8, | |||||
| stride_y); | |||||
| s->dsp.put_signed_pixels_clamped(v->block[v->topleft_blk_idx][2], | |||||
| s->dest[0] - v_dist * s->linesize - 16, | |||||
| stride_y); | |||||
| s->dsp.put_signed_pixels_clamped(v->block[v->topleft_blk_idx][3], | |||||
| s->dest[0] - v_dist * s->linesize - 8, | |||||
| stride_y); | |||||
| s->dsp.put_signed_pixels_clamped(v->block[v->topleft_blk_idx][4], | |||||
| s->dest[1] - 8 * s->uvlinesize - 8, | |||||
| s->uvlinesize); | |||||
| s->dsp.put_signed_pixels_clamped(v->block[v->topleft_blk_idx][5], | |||||
| s->dest[2] - 8 * s->uvlinesize - 8, | |||||
| s->uvlinesize); | |||||
| s->idsp.put_signed_pixels_clamped(v->block[v->topleft_blk_idx][0], | |||||
| s->dest[0] - 16 * s->linesize - 16, | |||||
| stride_y); | |||||
| s->idsp.put_signed_pixels_clamped(v->block[v->topleft_blk_idx][1], | |||||
| s->dest[0] - 16 * s->linesize - 8, | |||||
| stride_y); | |||||
| s->idsp.put_signed_pixels_clamped(v->block[v->topleft_blk_idx][2], | |||||
| s->dest[0] - v_dist * s->linesize - 16, | |||||
| stride_y); | |||||
| s->idsp.put_signed_pixels_clamped(v->block[v->topleft_blk_idx][3], | |||||
| s->dest[0] - v_dist * s->linesize - 8, | |||||
| stride_y); | |||||
| s->idsp.put_signed_pixels_clamped(v->block[v->topleft_blk_idx][4], | |||||
| s->dest[1] - 8 * s->uvlinesize - 8, | |||||
| s->uvlinesize); | |||||
| s->idsp.put_signed_pixels_clamped(v->block[v->topleft_blk_idx][5], | |||||
| s->dest[2] - 8 * s->uvlinesize - 8, | |||||
| s->uvlinesize); | |||||
| } | } | ||||
| if (s->mb_x == s->mb_width - 1) { | if (s->mb_x == s->mb_width - 1) { | ||||
| top_mb_pos = (s->mb_y - 1) * s->mb_stride + s->mb_x; | top_mb_pos = (s->mb_y - 1) * s->mb_stride + s->mb_x; | ||||
| @@ -134,24 +134,24 @@ static void vc1_put_signed_blocks_clamped(VC1Context *v) | |||||
| fieldtx = v->fieldtx_plane[top_mb_pos]; | fieldtx = v->fieldtx_plane[top_mb_pos]; | ||||
| stride_y = s->linesize << fieldtx; | stride_y = s->linesize << fieldtx; | ||||
| v_dist = fieldtx ? 15 : 8; | v_dist = fieldtx ? 15 : 8; | ||||
| s->dsp.put_signed_pixels_clamped(v->block[v->top_blk_idx][0], | |||||
| s->dest[0] - 16 * s->linesize, | |||||
| stride_y); | |||||
| s->dsp.put_signed_pixels_clamped(v->block[v->top_blk_idx][1], | |||||
| s->dest[0] - 16 * s->linesize + 8, | |||||
| stride_y); | |||||
| s->dsp.put_signed_pixels_clamped(v->block[v->top_blk_idx][2], | |||||
| s->dest[0] - v_dist * s->linesize, | |||||
| stride_y); | |||||
| s->dsp.put_signed_pixels_clamped(v->block[v->top_blk_idx][3], | |||||
| s->dest[0] - v_dist * s->linesize + 8, | |||||
| stride_y); | |||||
| s->dsp.put_signed_pixels_clamped(v->block[v->top_blk_idx][4], | |||||
| s->dest[1] - 8 * s->uvlinesize, | |||||
| s->uvlinesize); | |||||
| s->dsp.put_signed_pixels_clamped(v->block[v->top_blk_idx][5], | |||||
| s->dest[2] - 8 * s->uvlinesize, | |||||
| s->uvlinesize); | |||||
| s->idsp.put_signed_pixels_clamped(v->block[v->top_blk_idx][0], | |||||
| s->dest[0] - 16 * s->linesize, | |||||
| stride_y); | |||||
| s->idsp.put_signed_pixels_clamped(v->block[v->top_blk_idx][1], | |||||
| s->dest[0] - 16 * s->linesize + 8, | |||||
| stride_y); | |||||
| s->idsp.put_signed_pixels_clamped(v->block[v->top_blk_idx][2], | |||||
| s->dest[0] - v_dist * s->linesize, | |||||
| stride_y); | |||||
| s->idsp.put_signed_pixels_clamped(v->block[v->top_blk_idx][3], | |||||
| s->dest[0] - v_dist * s->linesize + 8, | |||||
| stride_y); | |||||
| s->idsp.put_signed_pixels_clamped(v->block[v->top_blk_idx][4], | |||||
| s->dest[1] - 8 * s->uvlinesize, | |||||
| s->uvlinesize); | |||||
| s->idsp.put_signed_pixels_clamped(v->block[v->top_blk_idx][5], | |||||
| s->dest[2] - 8 * s->uvlinesize, | |||||
| s->uvlinesize); | |||||
| } | } | ||||
| } | } | ||||
| @@ -3280,7 +3280,7 @@ static int vc1_decode_p_block(VC1Context *v, int16_t block[64], int n, | |||||
| v->vc1dsp.vc1_inv_trans_8x8_dc(dst, linesize, block); | v->vc1dsp.vc1_inv_trans_8x8_dc(dst, linesize, block); | ||||
| else { | else { | ||||
| v->vc1dsp.vc1_inv_trans_8x8(block); | v->vc1dsp.vc1_inv_trans_8x8(block); | ||||
| s->dsp.add_pixels_clamped(block, dst, linesize); | |||||
| s->idsp.add_pixels_clamped(block, dst, linesize); | |||||
| } | } | ||||
| } | } | ||||
| break; | break; | ||||
| @@ -3611,7 +3611,10 @@ static int vc1_decode_p_mb(VC1Context *v) | |||||
| if (v->rangeredfrm) | if (v->rangeredfrm) | ||||
| for (j = 0; j < 64; j++) | for (j = 0; j < 64; j++) | ||||
| s->block[i][j] <<= 1; | s->block[i][j] <<= 1; | ||||
| s->dsp.put_signed_pixels_clamped(s->block[i], s->dest[dst_idx] + off, i & 4 ? s->uvlinesize : s->linesize); | |||||
| s->idsp.put_signed_pixels_clamped(s->block[i], | |||||
| s->dest[dst_idx] + off, | |||||
| i & 4 ? s->uvlinesize | |||||
| : s->linesize); | |||||
| if (v->pq >= 9 && v->overlap) { | if (v->pq >= 9 && v->overlap) { | ||||
| if (v->c_avail) | if (v->c_avail) | ||||
| v->vc1dsp.vc1_h_overlap(s->dest[dst_idx] + off, i & 4 ? s->uvlinesize : s->linesize); | v->vc1dsp.vc1_h_overlap(s->dest[dst_idx] + off, i & 4 ? s->uvlinesize : s->linesize); | ||||
| @@ -3719,8 +3722,10 @@ static int vc1_decode_p_mb(VC1Context *v) | |||||
| if (v->rangeredfrm) | if (v->rangeredfrm) | ||||
| for (j = 0; j < 64; j++) | for (j = 0; j < 64; j++) | ||||
| s->block[i][j] <<= 1; | s->block[i][j] <<= 1; | ||||
| s->dsp.put_signed_pixels_clamped(s->block[i], s->dest[dst_idx] + off, | |||||
| (i & 4) ? s->uvlinesize : s->linesize); | |||||
| s->idsp.put_signed_pixels_clamped(s->block[i], | |||||
| s->dest[dst_idx] + off, | |||||
| (i & 4) ? s->uvlinesize | |||||
| : s->linesize); | |||||
| if (v->pq >= 9 && v->overlap) { | if (v->pq >= 9 && v->overlap) { | ||||
| if (v->c_avail) | if (v->c_avail) | ||||
| v->vc1dsp.vc1_h_overlap(s->dest[dst_idx] + off, i & 4 ? s->uvlinesize : s->linesize); | v->vc1dsp.vc1_h_overlap(s->dest[dst_idx] + off, i & 4 ? s->uvlinesize : s->linesize); | ||||
| @@ -3869,7 +3874,9 @@ static int vc1_decode_p_mb_intfr(VC1Context *v) | |||||
| stride_y = s->uvlinesize; | stride_y = s->uvlinesize; | ||||
| off = 0; | off = 0; | ||||
| } | } | ||||
| s->dsp.put_signed_pixels_clamped(s->block[i], s->dest[dst_idx] + off, stride_y); | |||||
| s->idsp.put_signed_pixels_clamped(s->block[i], | |||||
| s->dest[dst_idx] + off, | |||||
| stride_y); | |||||
| //TODO: loop filter | //TODO: loop filter | ||||
| } | } | ||||
| @@ -4031,7 +4038,10 @@ static int vc1_decode_p_mb_intfi(VC1Context *v) | |||||
| continue; | continue; | ||||
| v->vc1dsp.vc1_inv_trans_8x8(s->block[i]); | v->vc1dsp.vc1_inv_trans_8x8(s->block[i]); | ||||
| off = (i & 4) ? 0 : ((i & 1) * 8 + (i & 2) * 4 * s->linesize); | off = (i & 4) ? 0 : ((i & 1) * 8 + (i & 2) * 4 * s->linesize); | ||||
| s->dsp.put_signed_pixels_clamped(s->block[i], s->dest[dst_idx] + off, (i & 4) ? s->uvlinesize : s->linesize); | |||||
| s->idsp.put_signed_pixels_clamped(s->block[i], | |||||
| s->dest[dst_idx] + off, | |||||
| (i & 4) ? s->uvlinesize | |||||
| : s->linesize); | |||||
| // TODO: loop filter | // TODO: loop filter | ||||
| } | } | ||||
| } else { | } else { | ||||
| @@ -4233,7 +4243,10 @@ static void vc1_decode_b_mb(VC1Context *v) | |||||
| if (v->rangeredfrm) | if (v->rangeredfrm) | ||||
| for (j = 0; j < 64; j++) | for (j = 0; j < 64; j++) | ||||
| s->block[i][j] <<= 1; | s->block[i][j] <<= 1; | ||||
| s->dsp.put_signed_pixels_clamped(s->block[i], s->dest[dst_idx] + off, i & 4 ? s->uvlinesize : s->linesize); | |||||
| s->idsp.put_signed_pixels_clamped(s->block[i], | |||||
| s->dest[dst_idx] + off, | |||||
| i & 4 ? s->uvlinesize | |||||
| : s->linesize); | |||||
| } else if (val) { | } else if (val) { | ||||
| vc1_decode_p_block(v, s->block[i], i, mquant, ttmb, | vc1_decode_p_block(v, s->block[i], i, mquant, ttmb, | ||||
| first_block, s->dest[dst_idx] + off, | first_block, s->dest[dst_idx] + off, | ||||
| @@ -4305,7 +4318,10 @@ static void vc1_decode_b_mb_intfi(VC1Context *v) | |||||
| for (j = 0; j < 64; j++) | for (j = 0; j < 64; j++) | ||||
| s->block[i][j] <<= 1; | s->block[i][j] <<= 1; | ||||
| off = (i & 4) ? 0 : ((i & 1) * 8 + (i & 2) * 4 * s->linesize); | off = (i & 4) ? 0 : ((i & 1) * 8 + (i & 2) * 4 * s->linesize); | ||||
| s->dsp.put_signed_pixels_clamped(s->block[i], s->dest[dst_idx] + off, (i & 4) ? s->uvlinesize : s->linesize); | |||||
| s->idsp.put_signed_pixels_clamped(s->block[i], | |||||
| s->dest[dst_idx] + off, | |||||
| (i & 4) ? s->uvlinesize | |||||
| : s->linesize); | |||||
| // TODO: yet to perform loop filter | // TODO: yet to perform loop filter | ||||
| } | } | ||||
| } else { | } else { | ||||
| @@ -4524,7 +4540,9 @@ static int vc1_decode_b_mb_intfr(VC1Context *v) | |||||
| stride_y = s->uvlinesize; | stride_y = s->uvlinesize; | ||||
| off = 0; | off = 0; | ||||
| } | } | ||||
| s->dsp.put_signed_pixels_clamped(s->block[i], s->dest[dst_idx] + off, stride_y); | |||||
| s->idsp.put_signed_pixels_clamped(s->block[i], | |||||
| s->dest[dst_idx] + off, | |||||
| stride_y); | |||||
| } | } | ||||
| } else { | } else { | ||||
| s->mb_intra = v->is_intra[s->mb_x] = 0; | s->mb_intra = v->is_intra[s->mb_x] = 0; | ||||
| @@ -4828,12 +4846,16 @@ static void vc1_decode_i_blocks(VC1Context *v) | |||||
| if (v->rangeredfrm) | if (v->rangeredfrm) | ||||
| for (j = 0; j < 64; j++) | for (j = 0; j < 64; j++) | ||||
| s->block[k][j] <<= 1; | s->block[k][j] <<= 1; | ||||
| s->dsp.put_signed_pixels_clamped(s->block[k], dst[k], k & 4 ? s->uvlinesize : s->linesize); | |||||
| s->idsp.put_signed_pixels_clamped(s->block[k], dst[k], | |||||
| k & 4 ? s->uvlinesize | |||||
| : s->linesize); | |||||
| } else { | } else { | ||||
| if (v->rangeredfrm) | if (v->rangeredfrm) | ||||
| for (j = 0; j < 64; j++) | for (j = 0; j < 64; j++) | ||||
| s->block[k][j] = (s->block[k][j] - 64) << 1; | s->block[k][j] = (s->block[k][j] - 64) << 1; | ||||
| s->dsp.put_pixels_clamped(s->block[k], dst[k], k & 4 ? s->uvlinesize : s->linesize); | |||||
| s->idsp.put_pixels_clamped(s->block[k], dst[k], | |||||
| k & 4 ? s->uvlinesize | |||||
| : s->linesize); | |||||
| } | } | ||||
| } | } | ||||
| @@ -19,6 +19,7 @@ | |||||
| */ | */ | ||||
| #include "avcodec.h" | #include "avcodec.h" | ||||
| #include "idctdsp.h" | |||||
| #include "mpegvideo.h" | #include "mpegvideo.h" | ||||
| #include "msmpeg4data.h" | #include "msmpeg4data.h" | ||||
| #include "simple_idct.h" | #include "simple_idct.h" | ||||
| @@ -30,24 +31,24 @@ av_cold void ff_wmv2_common_init(Wmv2Context * w){ | |||||
| ff_blockdsp_init(&s->bdsp, s->avctx); | ff_blockdsp_init(&s->bdsp, s->avctx); | ||||
| ff_wmv2dsp_init(&w->wdsp); | ff_wmv2dsp_init(&w->wdsp); | ||||
| s->dsp.idct_permutation_type = w->wdsp.idct_perm; | |||||
| ff_init_scantable_permutation(s->dsp.idct_permutation, | |||||
| s->idsp.idct_permutation_type = w->wdsp.idct_perm; | |||||
| ff_init_scantable_permutation(s->idsp.idct_permutation, | |||||
| w->wdsp.idct_perm); | w->wdsp.idct_perm); | ||||
| ff_init_scantable(s->dsp.idct_permutation, &w->abt_scantable[0], | |||||
| ff_init_scantable(s->idsp.idct_permutation, &w->abt_scantable[0], | |||||
| ff_wmv2_scantableA); | ff_wmv2_scantableA); | ||||
| ff_init_scantable(s->dsp.idct_permutation, &w->abt_scantable[1], | |||||
| ff_init_scantable(s->idsp.idct_permutation, &w->abt_scantable[1], | |||||
| ff_wmv2_scantableB); | ff_wmv2_scantableB); | ||||
| ff_init_scantable(s->dsp.idct_permutation, &s->intra_scantable, | |||||
| ff_init_scantable(s->idsp.idct_permutation, &s->intra_scantable, | |||||
| ff_wmv1_scantable[1]); | ff_wmv1_scantable[1]); | ||||
| ff_init_scantable(s->dsp.idct_permutation, &s->intra_h_scantable, | |||||
| ff_init_scantable(s->idsp.idct_permutation, &s->intra_h_scantable, | |||||
| ff_wmv1_scantable[2]); | ff_wmv1_scantable[2]); | ||||
| ff_init_scantable(s->dsp.idct_permutation, &s->intra_v_scantable, | |||||
| ff_init_scantable(s->idsp.idct_permutation, &s->intra_v_scantable, | |||||
| ff_wmv1_scantable[3]); | ff_wmv1_scantable[3]); | ||||
| ff_init_scantable(s->dsp.idct_permutation, &s->inter_scantable, | |||||
| ff_init_scantable(s->idsp.idct_permutation, &s->inter_scantable, | |||||
| ff_wmv1_scantable[0]); | ff_wmv1_scantable[0]); | ||||
| s->dsp.idct_put = w->wdsp.idct_put; | |||||
| s->dsp.idct_add = w->wdsp.idct_add; | |||||
| s->dsp.idct = NULL; | |||||
| s->idsp.idct_put = w->wdsp.idct_put; | |||||
| s->idsp.idct_add = w->wdsp.idct_add; | |||||
| s->idsp.idct = NULL; | |||||
| } | } | ||||
| static void wmv2_add_block(Wmv2Context *w, int16_t *block1, uint8_t *dst, int stride, int n){ | static void wmv2_add_block(Wmv2Context *w, int16_t *block1, uint8_t *dst, int stride, int n){ | ||||
| @@ -19,7 +19,7 @@ | |||||
| #include "libavutil/attributes.h" | #include "libavutil/attributes.h" | ||||
| #include "libavutil/common.h" | #include "libavutil/common.h" | ||||
| #include "avcodec.h" | #include "avcodec.h" | ||||
| #include "dsputil.h" | |||||
| #include "idctdsp.h" | |||||
| #include "mathops.h" | #include "mathops.h" | ||||
| #include "wmv2dsp.h" | #include "wmv2dsp.h" | ||||
| @@ -18,6 +18,7 @@ OBJS-$(CONFIG_H264QPEL) += x86/h264_qpel.o | |||||
| OBJS-$(CONFIG_HPELDSP) += x86/hpeldsp_init.o | OBJS-$(CONFIG_HPELDSP) += x86/hpeldsp_init.o | ||||
| OBJS-$(CONFIG_HUFFYUVDSP) += x86/huffyuvdsp_init.o | OBJS-$(CONFIG_HUFFYUVDSP) += x86/huffyuvdsp_init.o | ||||
| OBJS-$(CONFIG_HUFFYUVENCDSP) += x86/huffyuvencdsp_mmx.o | OBJS-$(CONFIG_HUFFYUVENCDSP) += x86/huffyuvencdsp_mmx.o | ||||
| OBJS-$(CONFIG_IDCTDSP) += x86/idctdsp_init.o | |||||
| OBJS-$(CONFIG_LPC) += x86/lpc.o | OBJS-$(CONFIG_LPC) += x86/lpc.o | ||||
| OBJS-$(CONFIG_MPEGAUDIODSP) += x86/mpegaudiodsp.o | OBJS-$(CONFIG_MPEGAUDIODSP) += x86/mpegaudiodsp.o | ||||
| OBJS-$(CONFIG_MPEGVIDEO) += x86/mpegvideo.o \ | OBJS-$(CONFIG_MPEGVIDEO) += x86/mpegvideo.o \ | ||||
| @@ -49,13 +50,14 @@ OBJS-$(CONFIG_VP9_DECODER) += x86/vp9dsp_init.o | |||||
| MMX-OBJS-$(CONFIG_AUDIODSP) += x86/audiodsp_mmx.o | MMX-OBJS-$(CONFIG_AUDIODSP) += x86/audiodsp_mmx.o | ||||
| MMX-OBJS-$(CONFIG_BLOCKDSP) += x86/blockdsp_mmx.o | MMX-OBJS-$(CONFIG_BLOCKDSP) += x86/blockdsp_mmx.o | ||||
| MMX-OBJS-$(CONFIG_DSPUTIL) += x86/dsputil_mmx.o \ | |||||
| x86/idct_mmx_xvid.o \ | |||||
| x86/idct_sse2_xvid.o \ | |||||
| x86/simple_idct.o | |||||
| MMX-OBJS-$(CONFIG_DSPUTIL) += x86/dsputil_mmx.o | |||||
| MMX-OBJS-$(CONFIG_HPELDSP) += x86/fpel_mmx.o \ | MMX-OBJS-$(CONFIG_HPELDSP) += x86/fpel_mmx.o \ | ||||
| x86/hpeldsp_mmx.o | x86/hpeldsp_mmx.o | ||||
| MMX-OBJS-$(CONFIG_HUFFYUVDSP) += x86/huffyuvdsp_mmx.o | MMX-OBJS-$(CONFIG_HUFFYUVDSP) += x86/huffyuvdsp_mmx.o | ||||
| MMX-OBJS-$(CONFIG_IDCTDSP) += x86/idctdsp_mmx.o \ | |||||
| x86/idct_mmx_xvid.o \ | |||||
| x86/idct_sse2_xvid.o \ | |||||
| x86/simple_idct.o | |||||
| MMX-OBJS-$(CONFIG_QPELDSP) += x86/fpel_mmx.o | MMX-OBJS-$(CONFIG_QPELDSP) += x86/fpel_mmx.o | ||||
| MMX-OBJS-$(CONFIG_SVQ1_ENCODER) += x86/svq1enc_mmx.o | MMX-OBJS-$(CONFIG_SVQ1_ENCODER) += x86/svq1enc_mmx.o | ||||
| @@ -28,9 +28,10 @@ | |||||
| #include "libavutil/x86/asm.h" | #include "libavutil/x86/asm.h" | ||||
| #include "libavutil/x86/cpu.h" | #include "libavutil/x86/cpu.h" | ||||
| #include "libavcodec/cavsdsp.h" | #include "libavcodec/cavsdsp.h" | ||||
| #include "libavcodec/idctdsp.h" | |||||
| #include "constants.h" | #include "constants.h" | ||||
| #include "dsputil_x86.h" | |||||
| #include "fpel.h" | #include "fpel.h" | ||||
| #include "idctdsp.h" | |||||
| #include "config.h" | #include "config.h" | ||||
| #if HAVE_MMX_INLINE | #if HAVE_MMX_INLINE | ||||
| @@ -22,97 +22,18 @@ | |||||
| #include "libavutil/x86/cpu.h" | #include "libavutil/x86/cpu.h" | ||||
| #include "libavcodec/avcodec.h" | #include "libavcodec/avcodec.h" | ||||
| #include "libavcodec/dsputil.h" | #include "libavcodec/dsputil.h" | ||||
| #include "libavcodec/simple_idct.h" | |||||
| #include "dsputil_x86.h" | #include "dsputil_x86.h" | ||||
| #include "idct_xvid.h" | |||||
| /* Input permutation for the simple_idct_mmx */ | |||||
| static const uint8_t simple_mmx_permutation[64] = { | |||||
| 0x00, 0x08, 0x04, 0x09, 0x01, 0x0C, 0x05, 0x0D, | |||||
| 0x10, 0x18, 0x14, 0x19, 0x11, 0x1C, 0x15, 0x1D, | |||||
| 0x20, 0x28, 0x24, 0x29, 0x21, 0x2C, 0x25, 0x2D, | |||||
| 0x12, 0x1A, 0x16, 0x1B, 0x13, 0x1E, 0x17, 0x1F, | |||||
| 0x02, 0x0A, 0x06, 0x0B, 0x03, 0x0E, 0x07, 0x0F, | |||||
| 0x30, 0x38, 0x34, 0x39, 0x31, 0x3C, 0x35, 0x3D, | |||||
| 0x22, 0x2A, 0x26, 0x2B, 0x23, 0x2E, 0x27, 0x2F, | |||||
| 0x32, 0x3A, 0x36, 0x3B, 0x33, 0x3E, 0x37, 0x3F, | |||||
| }; | |||||
| static const uint8_t idct_sse2_row_perm[8] = { 0, 4, 1, 5, 2, 6, 3, 7 }; | |||||
| av_cold int ff_init_scantable_permutation_x86(uint8_t *idct_permutation, | |||||
| int idct_permutation_type) | |||||
| { | |||||
| int i; | |||||
| switch (idct_permutation_type) { | |||||
| case FF_SIMPLE_IDCT_PERM: | |||||
| for (i = 0; i < 64; i++) | |||||
| idct_permutation[i] = simple_mmx_permutation[i]; | |||||
| return 1; | |||||
| case FF_SSE2_IDCT_PERM: | |||||
| for (i = 0; i < 64; i++) | |||||
| idct_permutation[i] = (i & 0x38) | idct_sse2_row_perm[i & 7]; | |||||
| return 1; | |||||
| } | |||||
| return 0; | |||||
| } | |||||
| static av_cold void dsputil_init_mmx(DSPContext *c, AVCodecContext *avctx, | static av_cold void dsputil_init_mmx(DSPContext *c, AVCodecContext *avctx, | ||||
| int cpu_flags, unsigned high_bit_depth) | int cpu_flags, unsigned high_bit_depth) | ||||
| { | { | ||||
| #if HAVE_MMX_INLINE | #if HAVE_MMX_INLINE | ||||
| c->put_pixels_clamped = ff_put_pixels_clamped_mmx; | |||||
| c->put_signed_pixels_clamped = ff_put_signed_pixels_clamped_mmx; | |||||
| c->add_pixels_clamped = ff_add_pixels_clamped_mmx; | |||||
| if (!high_bit_depth) { | if (!high_bit_depth) { | ||||
| c->draw_edges = ff_draw_edges_mmx; | c->draw_edges = ff_draw_edges_mmx; | ||||
| switch (avctx->idct_algo) { | |||||
| case FF_IDCT_AUTO: | |||||
| case FF_IDCT_SIMPLEMMX: | |||||
| c->idct_put = ff_simple_idct_put_mmx; | |||||
| c->idct_add = ff_simple_idct_add_mmx; | |||||
| c->idct = ff_simple_idct_mmx; | |||||
| c->idct_permutation_type = FF_SIMPLE_IDCT_PERM; | |||||
| break; | |||||
| case FF_IDCT_XVIDMMX: | |||||
| c->idct_put = ff_idct_xvid_mmx_put; | |||||
| c->idct_add = ff_idct_xvid_mmx_add; | |||||
| c->idct = ff_idct_xvid_mmx; | |||||
| break; | |||||
| } | |||||
| } | } | ||||
| #endif /* HAVE_MMX_INLINE */ | #endif /* HAVE_MMX_INLINE */ | ||||
| } | } | ||||
| static av_cold void dsputil_init_mmxext(DSPContext *c, AVCodecContext *avctx, | |||||
| int cpu_flags, unsigned high_bit_depth) | |||||
| { | |||||
| #if HAVE_MMXEXT_INLINE | |||||
| if (!high_bit_depth && avctx->idct_algo == FF_IDCT_XVIDMMX) { | |||||
| c->idct_put = ff_idct_xvid_mmxext_put; | |||||
| c->idct_add = ff_idct_xvid_mmxext_add; | |||||
| c->idct = ff_idct_xvid_mmxext; | |||||
| } | |||||
| #endif /* HAVE_MMXEXT_INLINE */ | |||||
| } | |||||
| static av_cold void dsputil_init_sse2(DSPContext *c, AVCodecContext *avctx, | |||||
| int cpu_flags, unsigned high_bit_depth) | |||||
| { | |||||
| #if HAVE_SSE2_INLINE | |||||
| if (!high_bit_depth && avctx->idct_algo == FF_IDCT_XVIDMMX) { | |||||
| c->idct_put = ff_idct_xvid_sse2_put; | |||||
| c->idct_add = ff_idct_xvid_sse2_add; | |||||
| c->idct = ff_idct_xvid_sse2; | |||||
| c->idct_permutation_type = FF_SSE2_IDCT_PERM; | |||||
| } | |||||
| #endif /* HAVE_SSE2_INLINE */ | |||||
| } | |||||
| av_cold void ff_dsputil_init_x86(DSPContext *c, AVCodecContext *avctx, | av_cold void ff_dsputil_init_x86(DSPContext *c, AVCodecContext *avctx, | ||||
| unsigned high_bit_depth) | unsigned high_bit_depth) | ||||
| { | { | ||||
| @@ -121,12 +42,6 @@ av_cold void ff_dsputil_init_x86(DSPContext *c, AVCodecContext *avctx, | |||||
| if (X86_MMX(cpu_flags)) | if (X86_MMX(cpu_flags)) | ||||
| dsputil_init_mmx(c, avctx, cpu_flags, high_bit_depth); | dsputil_init_mmx(c, avctx, cpu_flags, high_bit_depth); | ||||
| if (X86_MMXEXT(cpu_flags)) | |||||
| dsputil_init_mmxext(c, avctx, cpu_flags, high_bit_depth); | |||||
| if (X86_SSE2(cpu_flags)) | |||||
| dsputil_init_sse2(c, avctx, cpu_flags, high_bit_depth); | |||||
| if (CONFIG_ENCODERS) | if (CONFIG_ENCODERS) | ||||
| ff_dsputilenc_init_mmx(c, avctx, high_bit_depth); | ff_dsputilenc_init_mmx(c, avctx, high_bit_depth); | ||||
| } | } | ||||
| @@ -30,141 +30,6 @@ | |||||
| #if HAVE_INLINE_ASM | #if HAVE_INLINE_ASM | ||||
| void ff_put_pixels_clamped_mmx(const int16_t *block, uint8_t *pixels, | |||||
| int line_size) | |||||
| { | |||||
| const int16_t *p; | |||||
| uint8_t *pix; | |||||
| /* read the pixels */ | |||||
| p = block; | |||||
| pix = pixels; | |||||
| /* unrolled loop */ | |||||
| __asm__ volatile ( | |||||
| "movq (%3), %%mm0 \n\t" | |||||
| "movq 8(%3), %%mm1 \n\t" | |||||
| "movq 16(%3), %%mm2 \n\t" | |||||
| "movq 24(%3), %%mm3 \n\t" | |||||
| "movq 32(%3), %%mm4 \n\t" | |||||
| "movq 40(%3), %%mm5 \n\t" | |||||
| "movq 48(%3), %%mm6 \n\t" | |||||
| "movq 56(%3), %%mm7 \n\t" | |||||
| "packuswb %%mm1, %%mm0 \n\t" | |||||
| "packuswb %%mm3, %%mm2 \n\t" | |||||
| "packuswb %%mm5, %%mm4 \n\t" | |||||
| "packuswb %%mm7, %%mm6 \n\t" | |||||
| "movq %%mm0, (%0) \n\t" | |||||
| "movq %%mm2, (%0, %1) \n\t" | |||||
| "movq %%mm4, (%0, %1, 2) \n\t" | |||||
| "movq %%mm6, (%0, %2) \n\t" | |||||
| :: "r" (pix), "r" ((x86_reg) line_size), "r" ((x86_reg) line_size * 3), | |||||
| "r" (p) | |||||
| : "memory"); | |||||
| pix += line_size * 4; | |||||
| p += 32; | |||||
| // if here would be an exact copy of the code above | |||||
| // compiler would generate some very strange code | |||||
| // thus using "r" | |||||
| __asm__ volatile ( | |||||
| "movq (%3), %%mm0 \n\t" | |||||
| "movq 8(%3), %%mm1 \n\t" | |||||
| "movq 16(%3), %%mm2 \n\t" | |||||
| "movq 24(%3), %%mm3 \n\t" | |||||
| "movq 32(%3), %%mm4 \n\t" | |||||
| "movq 40(%3), %%mm5 \n\t" | |||||
| "movq 48(%3), %%mm6 \n\t" | |||||
| "movq 56(%3), %%mm7 \n\t" | |||||
| "packuswb %%mm1, %%mm0 \n\t" | |||||
| "packuswb %%mm3, %%mm2 \n\t" | |||||
| "packuswb %%mm5, %%mm4 \n\t" | |||||
| "packuswb %%mm7, %%mm6 \n\t" | |||||
| "movq %%mm0, (%0) \n\t" | |||||
| "movq %%mm2, (%0, %1) \n\t" | |||||
| "movq %%mm4, (%0, %1, 2) \n\t" | |||||
| "movq %%mm6, (%0, %2) \n\t" | |||||
| :: "r" (pix), "r" ((x86_reg) line_size), "r" ((x86_reg) line_size * 3), | |||||
| "r" (p) | |||||
| : "memory"); | |||||
| } | |||||
| #define put_signed_pixels_clamped_mmx_half(off) \ | |||||
| "movq "#off"(%2), %%mm1 \n\t" \ | |||||
| "movq 16 + "#off"(%2), %%mm2 \n\t" \ | |||||
| "movq 32 + "#off"(%2), %%mm3 \n\t" \ | |||||
| "movq 48 + "#off"(%2), %%mm4 \n\t" \ | |||||
| "packsswb 8 + "#off"(%2), %%mm1 \n\t" \ | |||||
| "packsswb 24 + "#off"(%2), %%mm2 \n\t" \ | |||||
| "packsswb 40 + "#off"(%2), %%mm3 \n\t" \ | |||||
| "packsswb 56 + "#off"(%2), %%mm4 \n\t" \ | |||||
| "paddb %%mm0, %%mm1 \n\t" \ | |||||
| "paddb %%mm0, %%mm2 \n\t" \ | |||||
| "paddb %%mm0, %%mm3 \n\t" \ | |||||
| "paddb %%mm0, %%mm4 \n\t" \ | |||||
| "movq %%mm1, (%0) \n\t" \ | |||||
| "movq %%mm2, (%0, %3) \n\t" \ | |||||
| "movq %%mm3, (%0, %3, 2) \n\t" \ | |||||
| "movq %%mm4, (%0, %1) \n\t" | |||||
| void ff_put_signed_pixels_clamped_mmx(const int16_t *block, uint8_t *pixels, | |||||
| int line_size) | |||||
| { | |||||
| x86_reg line_skip = line_size; | |||||
| x86_reg line_skip3; | |||||
| __asm__ volatile ( | |||||
| "movq "MANGLE(ff_pb_80)", %%mm0 \n\t" | |||||
| "lea (%3, %3, 2), %1 \n\t" | |||||
| put_signed_pixels_clamped_mmx_half(0) | |||||
| "lea (%0, %3, 4), %0 \n\t" | |||||
| put_signed_pixels_clamped_mmx_half(64) | |||||
| : "+&r" (pixels), "=&r" (line_skip3) | |||||
| : "r" (block), "r" (line_skip) | |||||
| : "memory"); | |||||
| } | |||||
| void ff_add_pixels_clamped_mmx(const int16_t *block, uint8_t *pixels, | |||||
| int line_size) | |||||
| { | |||||
| const int16_t *p; | |||||
| uint8_t *pix; | |||||
| int i; | |||||
| /* read the pixels */ | |||||
| p = block; | |||||
| pix = pixels; | |||||
| MOVQ_ZERO(mm7); | |||||
| i = 4; | |||||
| do { | |||||
| __asm__ volatile ( | |||||
| "movq (%2), %%mm0 \n\t" | |||||
| "movq 8(%2), %%mm1 \n\t" | |||||
| "movq 16(%2), %%mm2 \n\t" | |||||
| "movq 24(%2), %%mm3 \n\t" | |||||
| "movq %0, %%mm4 \n\t" | |||||
| "movq %1, %%mm6 \n\t" | |||||
| "movq %%mm4, %%mm5 \n\t" | |||||
| "punpcklbw %%mm7, %%mm4 \n\t" | |||||
| "punpckhbw %%mm7, %%mm5 \n\t" | |||||
| "paddsw %%mm4, %%mm0 \n\t" | |||||
| "paddsw %%mm5, %%mm1 \n\t" | |||||
| "movq %%mm6, %%mm5 \n\t" | |||||
| "punpcklbw %%mm7, %%mm6 \n\t" | |||||
| "punpckhbw %%mm7, %%mm5 \n\t" | |||||
| "paddsw %%mm6, %%mm2 \n\t" | |||||
| "paddsw %%mm5, %%mm3 \n\t" | |||||
| "packuswb %%mm1, %%mm0 \n\t" | |||||
| "packuswb %%mm3, %%mm2 \n\t" | |||||
| "movq %%mm0, %0 \n\t" | |||||
| "movq %%mm2, %1 \n\t" | |||||
| : "+m" (*pix), "+m" (*(pix + line_size)) | |||||
| : "r" (p) | |||||
| : "memory"); | |||||
| pix += line_size * 2; | |||||
| p += 16; | |||||
| } while (--i); | |||||
| } | |||||
| /* Draw the edges of width 'w' of an image of size width, height | /* Draw the edges of width 'w' of an image of size width, height | ||||
| * this MMX version can only handle w == 8 || w == 16. */ | * this MMX version can only handle w == 8 || w == 16. */ | ||||
| void ff_draw_edges_mmx(uint8_t *buf, int wrap, int width, int height, | void ff_draw_edges_mmx(uint8_t *buf, int wrap, int width, int height, | ||||
| @@ -31,13 +31,6 @@ void ff_dsputilenc_init_mmx(DSPContext *c, AVCodecContext *avctx, | |||||
| unsigned high_bit_depth); | unsigned high_bit_depth); | ||||
| void ff_dsputil_init_pix_mmx(DSPContext *c, AVCodecContext *avctx); | void ff_dsputil_init_pix_mmx(DSPContext *c, AVCodecContext *avctx); | ||||
| void ff_add_pixels_clamped_mmx(const int16_t *block, uint8_t *pixels, | |||||
| int line_size); | |||||
| void ff_put_pixels_clamped_mmx(const int16_t *block, uint8_t *pixels, | |||||
| int line_size); | |||||
| void ff_put_signed_pixels_clamped_mmx(const int16_t *block, uint8_t *pixels, | |||||
| int line_size); | |||||
| void ff_draw_edges_mmx(uint8_t *buf, int wrap, int width, int height, | void ff_draw_edges_mmx(uint8_t *buf, int wrap, int width, int height, | ||||
| int w, int h, int sides); | int w, int h, int sides); | ||||
| @@ -44,8 +44,8 @@ | |||||
| #include "config.h" | #include "config.h" | ||||
| #include "libavcodec/avcodec.h" | #include "libavcodec/avcodec.h" | ||||
| #include "libavutil/mem.h" | #include "libavutil/mem.h" | ||||
| #include "dsputil_x86.h" | |||||
| #include "idct_xvid.h" | #include "idct_xvid.h" | ||||
| #include "idctdsp.h" | |||||
| #if HAVE_MMX_INLINE | #if HAVE_MMX_INLINE | ||||
| @@ -42,7 +42,7 @@ | |||||
| #include "libavutil/mem.h" | #include "libavutil/mem.h" | ||||
| #include "libavutil/x86/asm.h" | #include "libavutil/x86/asm.h" | ||||
| #include "idct_xvid.h" | #include "idct_xvid.h" | ||||
| #include "dsputil_x86.h" | |||||
| #include "idctdsp.h" | |||||
| #if HAVE_SSE2_INLINE | #if HAVE_SSE2_INLINE | ||||
| @@ -0,0 +1,31 @@ | |||||
| /* | |||||
| * This file is part of Libav. | |||||
| * | |||||
| * Libav is free software; you can redistribute it and/or | |||||
| * modify it under the terms of the GNU Lesser General Public | |||||
| * License as published by the Free Software Foundation; either | |||||
| * version 2.1 of the License, or (at your option) any later version. | |||||
| * | |||||
| * Libav is distributed in the hope that it will be useful, | |||||
| * but WITHOUT ANY WARRANTY; without even the implied warranty of | |||||
| * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU | |||||
| * Lesser General Public License for more details. | |||||
| * | |||||
| * You should have received a copy of the GNU Lesser General Public | |||||
| * License along with Libav; if not, write to the Free Software | |||||
| * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA | |||||
| */ | |||||
| #ifndef AVCODEC_X86_IDCTDSP_H | |||||
| #define AVCODEC_X86_IDCTDSP_H | |||||
| #include <stdint.h> | |||||
| void ff_add_pixels_clamped_mmx(const int16_t *block, uint8_t *pixels, | |||||
| int line_size); | |||||
| void ff_put_pixels_clamped_mmx(const int16_t *block, uint8_t *pixels, | |||||
| int line_size); | |||||
| void ff_put_signed_pixels_clamped_mmx(const int16_t *block, uint8_t *pixels, | |||||
| int line_size); | |||||
| #endif /* AVCODEC_X86_IDCTDSP_H */ | |||||
| @@ -0,0 +1,106 @@ | |||||
| /* | |||||
| * This file is part of Libav. | |||||
| * | |||||
| * Libav is free software; you can redistribute it and/or | |||||
| * modify it under the terms of the GNU Lesser General Public | |||||
| * License as published by the Free Software Foundation; either | |||||
| * version 2.1 of the License, or (at your option) any later version. | |||||
| * | |||||
| * Libav is distributed in the hope that it will be useful, | |||||
| * but WITHOUT ANY WARRANTY; without even the implied warranty of | |||||
| * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU | |||||
| * Lesser General Public License for more details. | |||||
| * | |||||
| * You should have received a copy of the GNU Lesser General Public | |||||
| * License along with Libav; if not, write to the Free Software | |||||
| * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA | |||||
| */ | |||||
| #include "config.h" | |||||
| #include "libavutil/attributes.h" | |||||
| #include "libavutil/cpu.h" | |||||
| #include "libavutil/x86/cpu.h" | |||||
| #include "libavcodec/avcodec.h" | |||||
| #include "libavcodec/idctdsp.h" | |||||
| #include "libavcodec/simple_idct.h" | |||||
| #include "idct_xvid.h" | |||||
| #include "idctdsp.h" | |||||
| /* Input permutation for the simple_idct_mmx */ | |||||
| static const uint8_t simple_mmx_permutation[64] = { | |||||
| 0x00, 0x08, 0x04, 0x09, 0x01, 0x0C, 0x05, 0x0D, | |||||
| 0x10, 0x18, 0x14, 0x19, 0x11, 0x1C, 0x15, 0x1D, | |||||
| 0x20, 0x28, 0x24, 0x29, 0x21, 0x2C, 0x25, 0x2D, | |||||
| 0x12, 0x1A, 0x16, 0x1B, 0x13, 0x1E, 0x17, 0x1F, | |||||
| 0x02, 0x0A, 0x06, 0x0B, 0x03, 0x0E, 0x07, 0x0F, | |||||
| 0x30, 0x38, 0x34, 0x39, 0x31, 0x3C, 0x35, 0x3D, | |||||
| 0x22, 0x2A, 0x26, 0x2B, 0x23, 0x2E, 0x27, 0x2F, | |||||
| 0x32, 0x3A, 0x36, 0x3B, 0x33, 0x3E, 0x37, 0x3F, | |||||
| }; | |||||
| static const uint8_t idct_sse2_row_perm[8] = { 0, 4, 1, 5, 2, 6, 3, 7 }; | |||||
| av_cold int ff_init_scantable_permutation_x86(uint8_t *idct_permutation, | |||||
| int idct_permutation_type) | |||||
| { | |||||
| int i; | |||||
| switch (idct_permutation_type) { | |||||
| case FF_SIMPLE_IDCT_PERM: | |||||
| for (i = 0; i < 64; i++) | |||||
| idct_permutation[i] = simple_mmx_permutation[i]; | |||||
| return 1; | |||||
| case FF_SSE2_IDCT_PERM: | |||||
| for (i = 0; i < 64; i++) | |||||
| idct_permutation[i] = (i & 0x38) | idct_sse2_row_perm[i & 7]; | |||||
| return 1; | |||||
| } | |||||
| return 0; | |||||
| } | |||||
| av_cold void ff_idctdsp_init_x86(IDCTDSPContext *c, AVCodecContext *avctx, | |||||
| unsigned high_bit_depth) | |||||
| { | |||||
| int cpu_flags = av_get_cpu_flags(); | |||||
| if (INLINE_MMX(cpu_flags)) { | |||||
| c->put_pixels_clamped = ff_put_pixels_clamped_mmx; | |||||
| c->put_signed_pixels_clamped = ff_put_signed_pixels_clamped_mmx; | |||||
| c->add_pixels_clamped = ff_add_pixels_clamped_mmx; | |||||
| if (!high_bit_depth) { | |||||
| switch (avctx->idct_algo) { | |||||
| case FF_IDCT_AUTO: | |||||
| case FF_IDCT_SIMPLEMMX: | |||||
| c->idct_put = ff_simple_idct_put_mmx; | |||||
| c->idct_add = ff_simple_idct_add_mmx; | |||||
| c->idct = ff_simple_idct_mmx; | |||||
| c->idct_permutation_type = FF_SIMPLE_IDCT_PERM; | |||||
| break; | |||||
| case FF_IDCT_XVIDMMX: | |||||
| c->idct_put = ff_idct_xvid_mmx_put; | |||||
| c->idct_add = ff_idct_xvid_mmx_add; | |||||
| c->idct = ff_idct_xvid_mmx; | |||||
| break; | |||||
| } | |||||
| } | |||||
| } | |||||
| if (INLINE_MMXEXT(cpu_flags)) { | |||||
| if (!high_bit_depth && avctx->idct_algo == FF_IDCT_XVIDMMX) { | |||||
| c->idct_put = ff_idct_xvid_mmxext_put; | |||||
| c->idct_add = ff_idct_xvid_mmxext_add; | |||||
| c->idct = ff_idct_xvid_mmxext; | |||||
| } | |||||
| } | |||||
| if (INLINE_SSE2(cpu_flags)) { | |||||
| if (!high_bit_depth && avctx->idct_algo == FF_IDCT_XVIDMMX) { | |||||
| c->idct_put = ff_idct_xvid_sse2_put; | |||||
| c->idct_add = ff_idct_xvid_sse2_add; | |||||
| c->idct = ff_idct_xvid_sse2; | |||||
| c->idct_permutation_type = FF_SSE2_IDCT_PERM; | |||||
| } | |||||
| } | |||||
| } | |||||
| @@ -0,0 +1,168 @@ | |||||
| /* | |||||
| * SIMD-optimized IDCT-related routines | |||||
| * Copyright (c) 2000, 2001 Fabrice Bellard | |||||
| * Copyright (c) 2002-2004 Michael Niedermayer <michaelni@gmx.at> | |||||
| * | |||||
| * MMX optimization by Nick Kurshev <nickols_k@mail.ru> | |||||
| * | |||||
| * This file is part of Libav. | |||||
| * | |||||
| * Libav is free software; you can redistribute it and/or | |||||
| * modify it under the terms of the GNU Lesser General Public | |||||
| * License as published by the Free Software Foundation; either | |||||
| * version 2.1 of the License, or (at your option) any later version. | |||||
| * | |||||
| * Libav is distributed in the hope that it will be useful, | |||||
| * but WITHOUT ANY WARRANTY; without even the implied warranty of | |||||
| * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU | |||||
| * Lesser General Public License for more details. | |||||
| * | |||||
| * You should have received a copy of the GNU Lesser General Public | |||||
| * License along with Libav; if not, write to the Free Software | |||||
| * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA | |||||
| */ | |||||
| #include "config.h" | |||||
| #include "libavutil/cpu.h" | |||||
| #include "libavutil/x86/asm.h" | |||||
| #include "idctdsp.h" | |||||
| #include "inline_asm.h" | |||||
| #if HAVE_INLINE_ASM | |||||
| void ff_put_pixels_clamped_mmx(const int16_t *block, uint8_t *pixels, | |||||
| int line_size) | |||||
| { | |||||
| const int16_t *p; | |||||
| uint8_t *pix; | |||||
| /* read the pixels */ | |||||
| p = block; | |||||
| pix = pixels; | |||||
| /* unrolled loop */ | |||||
| __asm__ volatile ( | |||||
| "movq (%3), %%mm0 \n\t" | |||||
| "movq 8(%3), %%mm1 \n\t" | |||||
| "movq 16(%3), %%mm2 \n\t" | |||||
| "movq 24(%3), %%mm3 \n\t" | |||||
| "movq 32(%3), %%mm4 \n\t" | |||||
| "movq 40(%3), %%mm5 \n\t" | |||||
| "movq 48(%3), %%mm6 \n\t" | |||||
| "movq 56(%3), %%mm7 \n\t" | |||||
| "packuswb %%mm1, %%mm0 \n\t" | |||||
| "packuswb %%mm3, %%mm2 \n\t" | |||||
| "packuswb %%mm5, %%mm4 \n\t" | |||||
| "packuswb %%mm7, %%mm6 \n\t" | |||||
| "movq %%mm0, (%0) \n\t" | |||||
| "movq %%mm2, (%0, %1) \n\t" | |||||
| "movq %%mm4, (%0, %1, 2) \n\t" | |||||
| "movq %%mm6, (%0, %2) \n\t" | |||||
| :: "r" (pix), "r" ((x86_reg) line_size), "r" ((x86_reg) line_size * 3), | |||||
| "r" (p) | |||||
| : "memory"); | |||||
| pix += line_size * 4; | |||||
| p += 32; | |||||
| // if here would be an exact copy of the code above | |||||
| // compiler would generate some very strange code | |||||
| // thus using "r" | |||||
| __asm__ volatile ( | |||||
| "movq (%3), %%mm0 \n\t" | |||||
| "movq 8(%3), %%mm1 \n\t" | |||||
| "movq 16(%3), %%mm2 \n\t" | |||||
| "movq 24(%3), %%mm3 \n\t" | |||||
| "movq 32(%3), %%mm4 \n\t" | |||||
| "movq 40(%3), %%mm5 \n\t" | |||||
| "movq 48(%3), %%mm6 \n\t" | |||||
| "movq 56(%3), %%mm7 \n\t" | |||||
| "packuswb %%mm1, %%mm0 \n\t" | |||||
| "packuswb %%mm3, %%mm2 \n\t" | |||||
| "packuswb %%mm5, %%mm4 \n\t" | |||||
| "packuswb %%mm7, %%mm6 \n\t" | |||||
| "movq %%mm0, (%0) \n\t" | |||||
| "movq %%mm2, (%0, %1) \n\t" | |||||
| "movq %%mm4, (%0, %1, 2) \n\t" | |||||
| "movq %%mm6, (%0, %2) \n\t" | |||||
| :: "r" (pix), "r" ((x86_reg) line_size), "r" ((x86_reg) line_size * 3), | |||||
| "r" (p) | |||||
| : "memory"); | |||||
| } | |||||
| #define put_signed_pixels_clamped_mmx_half(off) \ | |||||
| "movq "#off"(%2), %%mm1 \n\t" \ | |||||
| "movq 16 + "#off"(%2), %%mm2 \n\t" \ | |||||
| "movq 32 + "#off"(%2), %%mm3 \n\t" \ | |||||
| "movq 48 + "#off"(%2), %%mm4 \n\t" \ | |||||
| "packsswb 8 + "#off"(%2), %%mm1 \n\t" \ | |||||
| "packsswb 24 + "#off"(%2), %%mm2 \n\t" \ | |||||
| "packsswb 40 + "#off"(%2), %%mm3 \n\t" \ | |||||
| "packsswb 56 + "#off"(%2), %%mm4 \n\t" \ | |||||
| "paddb %%mm0, %%mm1 \n\t" \ | |||||
| "paddb %%mm0, %%mm2 \n\t" \ | |||||
| "paddb %%mm0, %%mm3 \n\t" \ | |||||
| "paddb %%mm0, %%mm4 \n\t" \ | |||||
| "movq %%mm1, (%0) \n\t" \ | |||||
| "movq %%mm2, (%0, %3) \n\t" \ | |||||
| "movq %%mm3, (%0, %3, 2) \n\t" \ | |||||
| "movq %%mm4, (%0, %1) \n\t" | |||||
| void ff_put_signed_pixels_clamped_mmx(const int16_t *block, uint8_t *pixels, | |||||
| int line_size) | |||||
| { | |||||
| x86_reg line_skip = line_size; | |||||
| x86_reg line_skip3; | |||||
| __asm__ volatile ( | |||||
| "movq "MANGLE(ff_pb_80)", %%mm0 \n\t" | |||||
| "lea (%3, %3, 2), %1 \n\t" | |||||
| put_signed_pixels_clamped_mmx_half(0) | |||||
| "lea (%0, %3, 4), %0 \n\t" | |||||
| put_signed_pixels_clamped_mmx_half(64) | |||||
| : "+&r" (pixels), "=&r" (line_skip3) | |||||
| : "r" (block), "r" (line_skip) | |||||
| : "memory"); | |||||
| } | |||||
| void ff_add_pixels_clamped_mmx(const int16_t *block, uint8_t *pixels, | |||||
| int line_size) | |||||
| { | |||||
| const int16_t *p; | |||||
| uint8_t *pix; | |||||
| int i; | |||||
| /* read the pixels */ | |||||
| p = block; | |||||
| pix = pixels; | |||||
| MOVQ_ZERO(mm7); | |||||
| i = 4; | |||||
| do { | |||||
| __asm__ volatile ( | |||||
| "movq (%2), %%mm0 \n\t" | |||||
| "movq 8(%2), %%mm1 \n\t" | |||||
| "movq 16(%2), %%mm2 \n\t" | |||||
| "movq 24(%2), %%mm3 \n\t" | |||||
| "movq %0, %%mm4 \n\t" | |||||
| "movq %1, %%mm6 \n\t" | |||||
| "movq %%mm4, %%mm5 \n\t" | |||||
| "punpcklbw %%mm7, %%mm4 \n\t" | |||||
| "punpckhbw %%mm7, %%mm5 \n\t" | |||||
| "paddsw %%mm4, %%mm0 \n\t" | |||||
| "paddsw %%mm5, %%mm1 \n\t" | |||||
| "movq %%mm6, %%mm5 \n\t" | |||||
| "punpcklbw %%mm7, %%mm6 \n\t" | |||||
| "punpckhbw %%mm7, %%mm5 \n\t" | |||||
| "paddsw %%mm6, %%mm2 \n\t" | |||||
| "paddsw %%mm5, %%mm3 \n\t" | |||||
| "packuswb %%mm1, %%mm0 \n\t" | |||||
| "packuswb %%mm3, %%mm2 \n\t" | |||||
| "movq %%mm0, %0 \n\t" | |||||
| "movq %%mm2, %1 \n\t" | |||||
| : "+m" (*pix), "+m" (*(pix + line_size)) | |||||
| : "r" (p) | |||||
| : "memory"); | |||||
| pix += line_size * 2; | |||||
| p += 16; | |||||
| } while (--i); | |||||
| } | |||||
| #endif /* HAVE_INLINE_ASM */ | |||||
| @@ -229,7 +229,7 @@ static int RENAME(dct_quantize)(MpegEncContext *s, | |||||
| if(s->mb_intra) block[0]= level; | if(s->mb_intra) block[0]= level; | ||||
| else block[0]= temp_block[0]; | else block[0]= temp_block[0]; | ||||
| if(s->dsp.idct_permutation_type == FF_SIMPLE_IDCT_PERM){ | |||||
| if (s->idsp.idct_permutation_type == FF_SIMPLE_IDCT_PERM) { | |||||
| if(last_non_zero_p1 <= 1) goto end; | if(last_non_zero_p1 <= 1) goto end; | ||||
| block[0x08] = temp_block[0x01]; block[0x10] = temp_block[0x08]; | block[0x08] = temp_block[0x01]; block[0x10] = temp_block[0x08]; | ||||
| block[0x20] = temp_block[0x10]; | block[0x20] = temp_block[0x10]; | ||||
| @@ -22,7 +22,7 @@ | |||||
| #include "libavutil/attributes.h" | #include "libavutil/attributes.h" | ||||
| #include "libavutil/x86/cpu.h" | #include "libavutil/x86/cpu.h" | ||||
| #include "libavcodec/dsputil.h" | |||||
| #include "libavcodec/idctdsp.h" | |||||
| #include "libavcodec/proresdsp.h" | #include "libavcodec/proresdsp.h" | ||||
| void ff_prores_idct_put_10_sse2(uint16_t *dst, int linesize, | void ff_prores_idct_put_10_sse2(uint16_t *dst, int linesize, | ||||
| @@ -23,7 +23,7 @@ | |||||
| #include "libavutil/internal.h" | #include "libavutil/internal.h" | ||||
| #include "libavutil/mem.h" | #include "libavutil/mem.h" | ||||
| #include "libavutil/x86/asm.h" | #include "libavutil/x86/asm.h" | ||||
| #include "dsputil_x86.h" | |||||
| #include "idctdsp.h" | |||||
| #if HAVE_INLINE_ASM | #if HAVE_INLINE_ASM | ||||