Signed-off-by: Martin Storsjö <martin@martin.st>tags/n2.0
| @@ -1360,6 +1360,7 @@ CONFIG_EXTRA=" | |||
| aandcttables | |||
| ac3dsp | |||
| audio_frame_queue | |||
| dsputil | |||
| gcrypt | |||
| golomb | |||
| gplv3 | |||
| @@ -1503,51 +1504,67 @@ log2_deps="!msvcrt" | |||
| # subsystems | |||
| dct_select="rdft" | |||
| error_resilience_select="dsputil" | |||
| lpc_select="dsputil" | |||
| mdct_select="fft" | |||
| rdft_select="fft" | |||
| mpegaudio_select="mpegaudiodsp" | |||
| mpegaudiodsp_select="dct" | |||
| mpegvideo_select="videodsp" | |||
| mpegvideo_select="dsputil videodsp" | |||
| mpegvideoenc_select="mpegvideo" | |||
| # decoders / encoders | |||
| aac_decoder_select="mdct sinewin" | |||
| aac_encoder_select="audio_frame_queue mdct sinewin" | |||
| aac_latm_decoder_select="aac_decoder aac_latm_parser" | |||
| ac3_decoder_select="mdct ac3dsp ac3_parser" | |||
| ac3_encoder_select="mdct ac3dsp" | |||
| ac3_fixed_encoder_select="mdct ac3dsp" | |||
| ac3_decoder_select="mdct ac3dsp ac3_parser dsputil" | |||
| ac3_encoder_select="mdct ac3dsp dsputil" | |||
| ac3_fixed_encoder_select="mdct ac3dsp dsputil" | |||
| alac_encoder_select="lpc" | |||
| als_decoder_select="dsputil" | |||
| amrnb_decoder_select="lsp" | |||
| amrwb_decoder_select="lsp" | |||
| amv_decoder_select="dsputil" | |||
| ape_decoder_select="dsputil" | |||
| asv1_decoder_select="dsputil" | |||
| asv1_encoder_select="dsputil" | |||
| asv2_decoder_select="dsputil" | |||
| asv2_encoder_select="dsputil" | |||
| atrac1_decoder_select="mdct sinewin" | |||
| atrac3_decoder_select="mdct" | |||
| bink_decoder_select="dsputil" | |||
| binkaudio_dct_decoder_select="mdct rdft dct sinewin" | |||
| binkaudio_rdft_decoder_select="mdct rdft sinewin" | |||
| cavs_decoder_select="golomb h264chroma videodsp" | |||
| cavs_decoder_select="dsputil golomb h264chroma videodsp" | |||
| cllc_decoder_select="dsputil" | |||
| comfortnoise_encoder_select="lpc" | |||
| cook_decoder_select="mdct sinewin" | |||
| cook_decoder_select="dsputil mdct sinewin" | |||
| cscd_decoder_select="lzo" | |||
| cscd_decoder_suggest="zlib" | |||
| dca_decoder_select="mdct" | |||
| dnxhd_encoder_select="aandcttables mpegvideoenc" | |||
| dnxhd_decoder_select="dsputil" | |||
| dnxhd_encoder_select="aandcttables dsputil mpegvideoenc" | |||
| dvvideo_decoder_select="dsputil" | |||
| dvvideo_encoder_select="dsputil" | |||
| dxa_decoder_select="zlib" | |||
| eac3_decoder_select="ac3_decoder" | |||
| eac3_encoder_select="ac3_encoder" | |||
| eamad_decoder_select="aandcttables mpegvideo" | |||
| eatgq_decoder_select="aandcttables" | |||
| eamad_decoder_select="aandcttables dsputil mpegvideo" | |||
| eatgq_decoder_select="aandcttables dsputil" | |||
| eatqi_decoder_select="aandcttables error_resilience mpegvideo" | |||
| ffv1_decoder_select="golomb rangecoder" | |||
| ffv1_encoder_select="rangecoder" | |||
| ffvhuff_encoder_select="huffman" | |||
| ffv1_decoder_select="dsputil golomb rangecoder" | |||
| ffv1_encoder_select="dsputil rangecoder" | |||
| ffvhuff_decoder_select="dsputil" | |||
| ffvhuff_encoder_select="dsputil huffman" | |||
| flac_decoder_select="golomb" | |||
| flac_encoder_select="golomb lpc" | |||
| flac_encoder_select="dsputil golomb lpc" | |||
| flashsv_decoder_select="zlib" | |||
| flashsv_encoder_select="zlib" | |||
| flashsv2_decoder_select="zlib" | |||
| flv_decoder_select="h263_decoder" | |||
| flv_encoder_select="h263_encoder" | |||
| fraps_decoder_select="huffman" | |||
| fourxm_decoder_select="dsputil" | |||
| fraps_decoder_select="dsputil huffman" | |||
| h261_decoder_select="error_resilience mpegvideo" | |||
| h261_encoder_select="aandcttables mpegvideoenc" | |||
| h263_decoder_select="error_resilience h263_parser mpegvideo" | |||
| @@ -1556,16 +1573,25 @@ h263i_decoder_select="h263_decoder" | |||
| h263p_encoder_select="h263_encoder" | |||
| h264_decoder_select="golomb h264chroma h264dsp h264pred h264qpel videodsp" | |||
| h264_decoder_suggest="error_resilience" | |||
| huffyuv_encoder_select="huffman" | |||
| iac_decoder_select="fft mdct sinewin" | |||
| imc_decoder_select="fft mdct sinewin" | |||
| jpegls_decoder_select="golomb" | |||
| huffyuv_decoder_select="dsputil" | |||
| huffyuv_encoder_select="dsputil huffman" | |||
| iac_decoder_select="dsputil fft mdct sinewin" | |||
| imc_decoder_select="dsputil fft mdct sinewin" | |||
| indeo3_decoder_select="dsputil" | |||
| interplay_video_decoder_select="dsputil" | |||
| jpegls_decoder_select="dsputil golomb" | |||
| jpegls_encoder_select="golomb" | |||
| jv_decoder_select="dsputil" | |||
| lagarith_decoder_select="dsputil" | |||
| ljpeg_encoder_select="aandcttables mpegvideoenc" | |||
| loco_decoder_select="golomb" | |||
| mdec_decoder_select="error_resilience mpegvideo" | |||
| mjpeg_encoder_select="aandcttables mpegvideoenc" | |||
| mlp_decoder_select="mlp_parser" | |||
| mdec_decoder_select="dsputil error_resilience mpegvideo" | |||
| mimic_decoder_select="dsputil" | |||
| mjpeg_decoder_select="dsputil" | |||
| mjpegb_decoder_select="dsputil" | |||
| mjpeg_encoder_select="aandcttables dsputil mpegvideoenc" | |||
| mlp_decoder_select="dsputil mlp_parser" | |||
| motionpixels_decoder_select="dsputil" | |||
| mp1_decoder_select="mpegaudio" | |||
| mp1float_decoder_select="mpegaudio" | |||
| mp2_decoder_select="mpegaudio" | |||
| @@ -1576,8 +1602,8 @@ mp3adufloat_decoder_select="mpegaudio" | |||
| mp3float_decoder_select="mpegaudio" | |||
| mp3on4_decoder_select="mpegaudio" | |||
| mp3on4float_decoder_select="mpegaudio" | |||
| mpc7_decoder_select="mpegaudiodsp" | |||
| mpc8_decoder_select="mpegaudiodsp" | |||
| mpc7_decoder_select="dsputil mpegaudiodsp" | |||
| mpc8_decoder_select="dsputil mpegaudiodsp" | |||
| mpeg_xvmc_decoder_deps="X11_extensions_XvMClib_h" | |||
| mpeg_xvmc_decoder_select="mpeg2video_decoder" | |||
| mpeg1video_decoder_select="error_resilience mpegvideo" | |||
| @@ -1592,11 +1618,14 @@ msmpeg4v2_encoder_select="h263_encoder" | |||
| msmpeg4v3_decoder_select="h263_decoder" | |||
| msmpeg4v3_encoder_select="h263_encoder" | |||
| mss2_decoder_select="error_resilience vc1_decoder" | |||
| mxpeg_decoder_select="dsputil" | |||
| nellymoser_decoder_select="mdct sinewin" | |||
| nellymoser_encoder_select="audio_frame_queue mdct sinewin" | |||
| nuv_decoder_select="lzo" | |||
| png_decoder_select="zlib" | |||
| png_encoder_select="zlib" | |||
| nuv_decoder_select="dsputil lzo" | |||
| png_decoder_select="dsputil zlib" | |||
| png_encoder_select="dsputil zlib" | |||
| prores_decoder_select="dsputil" | |||
| prores_encoder_select="dsputil" | |||
| qcelp_decoder_select="lsp" | |||
| qdm2_decoder_select="mdct rdft mpegaudiodsp" | |||
| ra_144_encoder_select="audio_frame_queue lpc" | |||
| @@ -1609,23 +1638,31 @@ rv30_decoder_select="error_resilience golomb h264chroma h264pred h264qpel mpegvi | |||
| rv40_decoder_select="error_resilience golomb h264chroma h264pred h264qpel mpegvideo videodsp" | |||
| shorten_decoder_select="golomb" | |||
| sipr_decoder_select="lsp" | |||
| svq1_encoder_select="aandcttables mpegvideoenc" | |||
| sp5x_decoder_select="dsputil" | |||
| svq1_decoder_select="dsputil" | |||
| svq1_encoder_select="aandcttables dsputil mpegvideoenc" | |||
| svq3_decoder_select="golomb h264chroma h264dsp h264pred h264qpel mpegvideo videodsp" | |||
| svq3_decoder_suggest="error_resilience zlib" | |||
| tak_decoder_select="dsputil" | |||
| theora_decoder_select="vp3_decoder" | |||
| tiff_decoder_suggest="zlib" | |||
| tiff_encoder_suggest="zlib" | |||
| thp_decoder_select="dsputil" | |||
| truehd_decoder_select="mlp_decoder" | |||
| truemotion2_decoder_select="dsputil" | |||
| truespeech_decoder_select="dsputil" | |||
| tscc_decoder_select="zlib" | |||
| twinvq_decoder_select="mdct lsp sinewin" | |||
| utvideo_encoder_select="huffman" | |||
| utvideo_decoder_select="dsputil" | |||
| utvideo_encoder_select="dsputil huffman" | |||
| vble_decoder_select="dsputil" | |||
| vc1_decoder_select="error_resilience h263_decoder h264chroma h264qpel" | |||
| vc1image_decoder_select="vc1_decoder" | |||
| vorbis_decoder_select="mdct" | |||
| vorbis_encoder_select="mdct" | |||
| vp3_decoder_select="vp3dsp videodsp" | |||
| vp5_decoder_select="h264chroma videodsp vp3dsp" | |||
| vp6_decoder_select="h264chroma huffman videodsp vp3dsp" | |||
| vp3_decoder_select="dsputil vp3dsp videodsp" | |||
| vp5_decoder_select="dsputil h264chroma videodsp vp3dsp" | |||
| vp6_decoder_select="dsputil h264chroma huffman videodsp vp3dsp" | |||
| vp6a_decoder_select="vp6_decoder" | |||
| vp6f_decoder_select="vp6_decoder" | |||
| vp8_decoder_select="h264pred videodsp" | |||
| @@ -17,17 +17,13 @@ OBJS = allcodecs.o \ | |||
| bitstream.o \ | |||
| bitstream_filter.o \ | |||
| codec_desc.o \ | |||
| dsputil.o \ | |||
| faanidct.o \ | |||
| fmtconvert.o \ | |||
| imgconvert.o \ | |||
| jrevdct.o \ | |||
| log2_tab.o \ | |||
| mathtables.o \ | |||
| options.o \ | |||
| parser.o \ | |||
| raw.o \ | |||
| simple_idct.o \ | |||
| utils.o \ | |||
| # parts needed for many different codecs | |||
| @@ -36,6 +32,8 @@ OBJS-$(CONFIG_AC3DSP) += ac3dsp.o | |||
| OBJS-$(CONFIG_AUDIO_FRAME_QUEUE) += audio_frame_queue.o | |||
| OBJS-$(CONFIG_DCT) += dct.o dct32_fixed.o dct32_float.o | |||
| OBJS-$(CONFIG_DXVA2) += dxva2.o | |||
| OBJS-$(CONFIG_DSPUTIL) += dsputil.o faanidct.o \ | |||
| simple_idct.o jrevdct.o | |||
| OBJS-$(CONFIG_ENCODERS) += faandct.o jfdctfst.o jfdctint.o | |||
| OBJS-$(CONFIG_ERROR_RESILIENCE) += error_resilience.o | |||
| FFT-OBJS-$(CONFIG_HARDCODED_TABLES) += cos_tables.o cos_fixed_tables.o | |||
| @@ -117,7 +117,8 @@ static void avcodec_init(void) | |||
| return; | |||
| initialized = 1; | |||
| ff_dsputil_static_init(); | |||
| if (CONFIG_DSPUTIL) | |||
| ff_dsputil_static_init(); | |||
| } | |||
| int av_codec_is_encoder(const AVCodec *codec) | |||
| @@ -1,4 +1,5 @@ | |||
| OBJS += x86/fmtconvert_init.o | |||
| OBJS += x86/constants.o \ | |||
| x86/fmtconvert_init.o \ | |||
| OBJS-$(CONFIG_AAC_DECODER) += x86/sbrdsp_init.o | |||
| OBJS-$(CONFIG_AC3DSP) += x86/ac3dsp_init.o | |||
| @@ -29,7 +30,7 @@ OBJS-$(CONFIG_VP6_DECODER) += x86/vp56dsp_init.o | |||
| OBJS-$(CONFIG_VP8_DECODER) += x86/vp8dsp_init.o | |||
| OBJS-$(CONFIG_XMM_CLOBBER_TEST) += x86/w64xmmtest.o | |||
| MMX-OBJS += x86/dsputil_mmx.o \ | |||
| MMX-OBJS-$(CONFIG_DSPUTIL) += x86/dsputil_mmx.o \ | |||
| x86/fdct.o \ | |||
| x86/idct_mmx_xvid.o \ | |||
| x86/idct_sse2_xvid.o \ | |||
| @@ -42,6 +43,10 @@ MMX-OBJS-$(CONFIG_VC1_DECODER) += x86/vc1dsp_mmx.o | |||
| YASM-OBJS-$(CONFIG_AAC_DECODER) += x86/sbrdsp.o | |||
| YASM-OBJS-$(CONFIG_AC3DSP) += x86/ac3dsp.o | |||
| YASM-OBJS-$(CONFIG_DCT) += x86/dct32.o | |||
| YASM-OBJS-$(CONFIG_DSPUTIL) += x86/dsputil.o \ | |||
| x86/hpeldsp.o \ | |||
| x86/mpeg4qpel.o \ | |||
| x86/qpel.o | |||
| YASM-OBJS-$(CONFIG_ENCODERS) += x86/dsputilenc.o | |||
| YASM-OBJS-$(CONFIG_FFT) += x86/fft.o | |||
| YASM-OBJS-$(CONFIG_H263_DECODER) += x86/h263_loopfilter.o | |||
| @@ -57,7 +62,8 @@ YASM-OBJS-$(CONFIG_H264DSP) += x86/h264_deblock.o \ | |||
| YASM-OBJS-$(CONFIG_H264PRED) += x86/h264_intrapred.o \ | |||
| x86/h264_intrapred_10bit.o | |||
| YASM-OBJS-$(CONFIG_H264QPEL) += x86/h264_qpel_8bit.o \ | |||
| x86/h264_qpel_10bit.o | |||
| x86/h264_qpel_10bit.o \ | |||
| x86/qpel.o | |||
| YASM-OBJS-$(CONFIG_MPEGAUDIODSP) += x86/imdct36.o | |||
| YASM-OBJS-$(CONFIG_PNG_DECODER) += x86/pngdsp.o | |||
| YASM-OBJS-$(CONFIG_PRORES_DECODER) += x86/proresdsp.o | |||
| @@ -71,9 +77,5 @@ YASM-OBJS-$(CONFIG_VP3DSP) += x86/vp3dsp.o | |||
| YASM-OBJS-$(CONFIG_VP6_DECODER) += x86/vp56dsp.o | |||
| YASM-OBJS-$(CONFIG_VP8_DECODER) += x86/vp8dsp.o | |||
| YASM-OBJS += x86/dsputil.o \ | |||
| x86/deinterlace.o \ | |||
| YASM-OBJS += x86/deinterlace.o \ | |||
| x86/fmtconvert.o \ | |||
| x86/hpeldsp.o \ | |||
| x86/mpeg4qpel.o \ | |||
| x86/qpel.o \ | |||
| @@ -0,0 +1,42 @@ | |||
| /* | |||
| * MMX/SSE constants used across x86 dsp optimizations. | |||
| * | |||
| * This file is part of Libav. | |||
| * | |||
| * Libav is free software; you can redistribute it and/or | |||
| * modify it under the terms of the GNU Lesser General Public | |||
| * License as published by the Free Software Foundation; either | |||
| * version 2.1 of the License, or (at your option) any later version. | |||
| * | |||
| * Libav is distributed in the hope that it will be useful, | |||
| * but WITHOUT ANY WARRANTY; without even the implied warranty of | |||
| * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU | |||
| * Lesser General Public License for more details. | |||
| * | |||
| * You should have received a copy of the GNU Lesser General Public | |||
| * License along with Libav; if not, write to the Free Software | |||
| * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA | |||
| */ | |||
| #include "libavutil/mem.h" | |||
| #include "libavutil/x86/asm.h" // for xmm_reg | |||
| DECLARE_ALIGNED(8, const uint64_t, ff_bone) = 0x0101010101010101ULL; | |||
| DECLARE_ALIGNED(8, const uint64_t, ff_wtwo) = 0x0002000200020002ULL; | |||
| DECLARE_ALIGNED(16, const xmm_reg, ff_pw_1) = { 0x0001000100010001ULL, 0x0001000100010001ULL }; | |||
| DECLARE_ALIGNED(16, const xmm_reg, ff_pw_2) = { 0x0002000200020002ULL, 0x0002000200020002ULL }; | |||
| DECLARE_ALIGNED(16, const xmm_reg, ff_pw_3) = { 0x0003000300030003ULL, 0x0003000300030003ULL }; | |||
| DECLARE_ALIGNED(16, const xmm_reg, ff_pw_4) = { 0x0004000400040004ULL, 0x0004000400040004ULL }; | |||
| DECLARE_ALIGNED(16, const xmm_reg, ff_pw_5) = { 0x0005000500050005ULL, 0x0005000500050005ULL }; | |||
| DECLARE_ALIGNED(16, const xmm_reg, ff_pw_8) = { 0x0008000800080008ULL, 0x0008000800080008ULL }; | |||
| DECLARE_ALIGNED(16, const xmm_reg, ff_pw_9) = { 0x0009000900090009ULL, 0x0009000900090009ULL }; | |||
| DECLARE_ALIGNED(16, const xmm_reg, ff_pw_16) = { 0x0010001000100010ULL, 0x0010001000100010ULL }; | |||
| DECLARE_ALIGNED(16, const xmm_reg, ff_pw_18) = { 0x0012001200120012ULL, 0x0012001200120012ULL }; | |||
| DECLARE_ALIGNED(16, const xmm_reg, ff_pw_32) = { 0x0020002000200020ULL, 0x0020002000200020ULL }; | |||
| DECLARE_ALIGNED(16, const xmm_reg, ff_pw_64) = { 0x0040004000400040ULL, 0x0040004000400040ULL }; | |||
| DECLARE_ALIGNED(16, const xmm_reg, ff_pb_0) = { 0x0000000000000000ULL, 0x0000000000000000ULL }; | |||
| DECLARE_ALIGNED(16, const xmm_reg, ff_pb_1) = { 0x0101010101010101ULL, 0x0101010101010101ULL }; | |||
| DECLARE_ALIGNED(16, const xmm_reg, ff_pb_3) = { 0x0303030303030303ULL, 0x0303030303030303ULL }; | |||
| DECLARE_ALIGNED(16, const xmm_reg, ff_pb_80) = { 0x8080808080808080ULL, 0x8080808080808080ULL }; | |||
| @@ -36,47 +36,22 @@ | |||
| //#include <assert.h> | |||
| /* pixel operations */ | |||
| DECLARE_ALIGNED(8, const uint64_t, ff_bone) = 0x0101010101010101ULL; | |||
| DECLARE_ALIGNED(8, const uint64_t, ff_wtwo) = 0x0002000200020002ULL; | |||
| DECLARE_ALIGNED(16, const xmm_reg, ff_pw_1) = { 0x0001000100010001ULL, 0x0001000100010001ULL }; | |||
| DECLARE_ALIGNED(16, const xmm_reg, ff_pw_2) = { 0x0002000200020002ULL, 0x0002000200020002ULL }; | |||
| DECLARE_ALIGNED(16, const xmm_reg, ff_pw_3) = { 0x0003000300030003ULL, 0x0003000300030003ULL }; | |||
| DECLARE_ALIGNED(16, const xmm_reg, ff_pw_4) = { 0x0004000400040004ULL, 0x0004000400040004ULL }; | |||
| DECLARE_ALIGNED(16, const xmm_reg, ff_pw_5) = { 0x0005000500050005ULL, 0x0005000500050005ULL }; | |||
| DECLARE_ALIGNED(16, const xmm_reg, ff_pw_8) = { 0x0008000800080008ULL, 0x0008000800080008ULL }; | |||
| DECLARE_ALIGNED(16, const xmm_reg, ff_pw_9) = { 0x0009000900090009ULL, 0x0009000900090009ULL }; | |||
| DECLARE_ALIGNED(8, const uint64_t, ff_pw_15) = 0x000F000F000F000FULL; | |||
| DECLARE_ALIGNED(16, const xmm_reg, ff_pw_16) = { 0x0010001000100010ULL, 0x0010001000100010ULL }; | |||
| DECLARE_ALIGNED(16, const xmm_reg, ff_pw_17) = { 0x0011001100110011ULL, 0x0011001100110011ULL }; | |||
| DECLARE_ALIGNED(16, const xmm_reg, ff_pw_18) = { 0x0012001200120012ULL, 0x0012001200120012ULL }; | |||
| DECLARE_ALIGNED(8, const uint64_t, ff_pw_20) = 0x0014001400140014ULL; | |||
| DECLARE_ALIGNED(16, const xmm_reg, ff_pw_27) = { 0x001B001B001B001BULL, 0x001B001B001B001BULL }; | |||
| DECLARE_ALIGNED(16, const xmm_reg, ff_pw_28) = { 0x001C001C001C001CULL, 0x001C001C001C001CULL }; | |||
| DECLARE_ALIGNED(16, const xmm_reg, ff_pw_32) = { 0x0020002000200020ULL, 0x0020002000200020ULL }; | |||
| DECLARE_ALIGNED(8, const uint64_t, ff_pw_42) = 0x002A002A002A002AULL; | |||
| DECLARE_ALIGNED(8, const uint64_t, ff_pw_53) = 0x0035003500350035ULL; | |||
| DECLARE_ALIGNED(16, const xmm_reg, ff_pw_63) = { 0x003F003F003F003FULL, 0x003F003F003F003FULL }; | |||
| DECLARE_ALIGNED(16, const xmm_reg, ff_pw_64) = { 0x0040004000400040ULL, 0x0040004000400040ULL }; | |||
| DECLARE_ALIGNED(8, const uint64_t, ff_pw_96) = 0x0060006000600060ULL; | |||
| DECLARE_ALIGNED(8, const uint64_t, ff_pw_128) = 0x0080008000800080ULL; | |||
| DECLARE_ALIGNED(8, const uint64_t, ff_pw_255) = 0x00ff00ff00ff00ffULL; | |||
| DECLARE_ALIGNED(16, const xmm_reg, ff_pw_512) = { 0x0200020002000200ULL, 0x0200020002000200ULL }; | |||
| DECLARE_ALIGNED(16, const xmm_reg, ff_pw_1019) = { 0x03FB03FB03FB03FBULL, 0x03FB03FB03FB03FBULL }; | |||
| DECLARE_ALIGNED(16, const xmm_reg, ff_pb_0) = { 0x0000000000000000ULL, 0x0000000000000000ULL }; | |||
| DECLARE_ALIGNED(16, const xmm_reg, ff_pb_1) = { 0x0101010101010101ULL, 0x0101010101010101ULL }; | |||
| DECLARE_ALIGNED(16, const xmm_reg, ff_pb_3) = { 0x0303030303030303ULL, 0x0303030303030303ULL }; | |||
| DECLARE_ALIGNED(16, const xmm_reg, ff_pb_4) = { 0x0404040404040404ULL, 0x0404040404040404ULL }; | |||
| DECLARE_ALIGNED(8, const uint64_t, ff_pb_7) = 0x0707070707070707ULL; | |||
| DECLARE_ALIGNED(8, const uint64_t, ff_pb_1F) = 0x1F1F1F1F1F1F1F1FULL; | |||
| DECLARE_ALIGNED(8, const uint64_t, ff_pb_3F) = 0x3F3F3F3F3F3F3F3FULL; | |||
| DECLARE_ALIGNED(16, const xmm_reg, ff_pb_80) = { 0x8080808080808080ULL, 0x8080808080808080ULL }; | |||
| DECLARE_ALIGNED(8, const uint64_t, ff_pb_81) = 0x8181818181818181ULL; | |||
| DECLARE_ALIGNED(16, const xmm_reg, ff_pb_A1) = { 0xA1A1A1A1A1A1A1A1ULL, 0xA1A1A1A1A1A1A1A1ULL }; | |||
| DECLARE_ALIGNED(16, const xmm_reg, ff_pb_F8) = { 0xF8F8F8F8F8F8F8F8ULL, 0xF8F8F8F8F8F8F8F8ULL }; | |||
| DECLARE_ALIGNED(8, const uint64_t, ff_pb_FC) = 0xFCFCFCFCFCFCFCFCULL; | |||
| DECLARE_ALIGNED(16, const xmm_reg, ff_pb_FE) = { 0xFEFEFEFEFEFEFEFEULL, 0xFEFEFEFEFEFEFEFEULL }; | |||
| DECLARE_ALIGNED(16, const double, ff_pd_1)[2] = { 1.0, 1.0 }; | |||
| DECLARE_ALIGNED(16, const double, ff_pd_2)[2] = { 2.0, 2.0 }; | |||
| @@ -28,8 +28,6 @@ | |||
| #include "libavcodec/dsputil.h" | |||
| #include "libavutil/x86/asm.h" | |||
| typedef struct xmm_reg { uint64_t a, b; } xmm_reg; | |||
| extern const uint64_t ff_bone; | |||
| extern const uint64_t ff_wtwo; | |||
| @@ -41,12 +39,9 @@ extern const uint64_t ff_pw_15; | |||
| extern const xmm_reg ff_pw_16; | |||
| extern const xmm_reg ff_pw_18; | |||
| extern const uint64_t ff_pw_20; | |||
| extern const xmm_reg ff_pw_27; | |||
| extern const xmm_reg ff_pw_28; | |||
| extern const xmm_reg ff_pw_32; | |||
| extern const uint64_t ff_pw_42; | |||
| extern const uint64_t ff_pw_53; | |||
| extern const xmm_reg ff_pw_63; | |||
| extern const xmm_reg ff_pw_64; | |||
| extern const uint64_t ff_pw_96; | |||
| extern const uint64_t ff_pw_128; | |||
| @@ -58,10 +53,8 @@ extern const uint64_t ff_pb_7; | |||
| extern const uint64_t ff_pb_1F; | |||
| extern const uint64_t ff_pb_3F; | |||
| extern const uint64_t ff_pb_81; | |||
| extern const xmm_reg ff_pb_A1; | |||
| extern const xmm_reg ff_pb_F8; | |||
| extern const uint64_t ff_pb_FC; | |||
| extern const xmm_reg ff_pb_FE; | |||
| extern const double ff_pd_1[2]; | |||
| extern const double ff_pd_2[2]; | |||
| @@ -60,7 +60,7 @@ rnd_rv40_1d_tbl: times 4 dw 0 | |||
| cextern pw_3 | |||
| cextern pw_4 | |||
| cextern pw_8 | |||
| cextern pw_28 | |||
| pw_28: times 8 dw 28 | |||
| cextern pw_32 | |||
| cextern pw_64 | |||
| @@ -28,6 +28,7 @@ | |||
| SECTION_RODATA | |||
| pb_A1: times 16 db 0xA1 | |||
| pb_3_1: times 4 db 3, 1 | |||
| SECTION .text | |||
| @@ -35,7 +36,6 @@ SECTION .text | |||
| cextern pb_0 | |||
| cextern pb_1 | |||
| cextern pb_3 | |||
| cextern pb_A1 | |||
| ; expands to [base],...,[base+7*stride] | |||
| %define PASS8ROWS(base, base3, stride, stride3) \ | |||
| @@ -143,11 +143,15 @@ filter_h6_shuf1: db 0, 5, 1, 6, 2, 7, 3, 8, 4, 9, 5, 10, 6, 11, 7, 12 | |||
| filter_h6_shuf2: db 1, 2, 2, 3, 3, 4, 4, 5, 5, 6, 6, 7, 7, 8, 8, 9 | |||
| filter_h6_shuf3: db 3, 4, 4, 5, 5, 6, 6, 7, 7, 8, 8, 9, 9, 10, 10, 11 | |||
| pw_256: times 8 dw 256 | |||
| pw_27: times 8 dw 27 | |||
| pw_63: times 8 dw 63 | |||
| pw_256: times 8 dw 256 | |||
| pw_20091: times 4 dw 20091 | |||
| pw_17734: times 4 dw 17734 | |||
| pb_4: times 16 db 4 | |||
| pb_F8: times 16 db 0xF8 | |||
| pb_FE: times 16 db 0xFE | |||
| pb_27_63: times 8 db 27, 63 | |||
| pb_18_63: times 8 db 18, 63 | |||
| pb_9_63: times 8 db 9, 63 | |||
| @@ -156,15 +160,10 @@ cextern pb_1 | |||
| cextern pw_3 | |||
| cextern pb_3 | |||
| cextern pw_4 | |||
| cextern pb_4 | |||
| cextern pw_9 | |||
| cextern pw_18 | |||
| cextern pw_27 | |||
| cextern pw_63 | |||
| cextern pw_64 | |||
| cextern pb_80 | |||
| cextern pb_F8 | |||
| cextern pb_FE | |||
| SECTION .text | |||
| @@ -24,6 +24,8 @@ | |||
| #include <stdint.h> | |||
| #include "config.h" | |||
| typedef struct xmm_reg { uint64_t a, b; } xmm_reg; | |||
| #if ARCH_X86_64 | |||
| # define OPSIZE "q" | |||
| # define REG_a "rax" | |||