| @@ -1529,6 +1529,7 @@ CONFIG_EXTRA=" | |||||
| aandcttables | aandcttables | ||||
| ac3dsp | ac3dsp | ||||
| audio_frame_queue | audio_frame_queue | ||||
| audiodsp | |||||
| blockdsp | blockdsp | ||||
| cabac | cabac | ||||
| dsputil | dsputil | ||||
| @@ -1713,8 +1714,8 @@ aac_decoder_select="mdct sinewin" | |||||
| aac_encoder_select="audio_frame_queue mdct sinewin" | aac_encoder_select="audio_frame_queue mdct sinewin" | ||||
| aac_latm_decoder_select="aac_decoder aac_latm_parser" | aac_latm_decoder_select="aac_decoder aac_latm_parser" | ||||
| ac3_decoder_select="mdct ac3dsp ac3_parser dsputil" | ac3_decoder_select="mdct ac3dsp ac3_parser dsputil" | ||||
| ac3_encoder_select="mdct ac3dsp dsputil" | |||||
| ac3_fixed_encoder_select="mdct ac3dsp dsputil" | |||||
| ac3_encoder_select="ac3dsp audiodsp dsputil mdct" | |||||
| ac3_fixed_encoder_select="ac3dsp audiodsp dsputil mdct" | |||||
| aic_decoder_select="dsputil golomb" | aic_decoder_select="dsputil golomb" | ||||
| alac_encoder_select="lpc" | alac_encoder_select="lpc" | ||||
| als_decoder_select="dsputil" | als_decoder_select="dsputil" | ||||
| @@ -1735,7 +1736,7 @@ binkaudio_rdft_decoder_select="mdct rdft sinewin" | |||||
| cavs_decoder_select="blockdsp dsputil golomb h264chroma qpeldsp videodsp" | cavs_decoder_select="blockdsp dsputil golomb h264chroma qpeldsp videodsp" | ||||
| cllc_decoder_select="dsputil" | cllc_decoder_select="dsputil" | ||||
| comfortnoise_encoder_select="lpc" | comfortnoise_encoder_select="lpc" | ||||
| cook_decoder_select="dsputil mdct sinewin" | |||||
| cook_decoder_select="audiodsp mdct sinewin" | |||||
| cscd_decoder_select="lzo" | cscd_decoder_select="lzo" | ||||
| cscd_decoder_suggest="zlib" | cscd_decoder_suggest="zlib" | ||||
| dca_decoder_select="mdct" | dca_decoder_select="mdct" | ||||
| @@ -1849,7 +1850,7 @@ svq1_decoder_select="hpeldsp" | |||||
| svq1_encoder_select="aandcttables dsputil hpeldsp mpegvideoenc" | svq1_encoder_select="aandcttables dsputil hpeldsp mpegvideoenc" | ||||
| svq3_decoder_select="h264_decoder hpeldsp tpeldsp" | svq3_decoder_select="h264_decoder hpeldsp tpeldsp" | ||||
| svq3_decoder_suggest="zlib" | svq3_decoder_suggest="zlib" | ||||
| tak_decoder_select="dsputil" | |||||
| tak_decoder_select="audiodsp" | |||||
| theora_decoder_select="vp3_decoder" | theora_decoder_select="vp3_decoder" | ||||
| thp_decoder_select="mjpeg_decoder" | thp_decoder_select="mjpeg_decoder" | ||||
| tiff_decoder_suggest="zlib" | tiff_decoder_suggest="zlib" | ||||
| @@ -28,6 +28,7 @@ OBJS = allcodecs.o \ | |||||
| OBJS-$(CONFIG_AANDCTTABLES) += aandcttab.o | OBJS-$(CONFIG_AANDCTTABLES) += aandcttab.o | ||||
| OBJS-$(CONFIG_AC3DSP) += ac3dsp.o | OBJS-$(CONFIG_AC3DSP) += ac3dsp.o | ||||
| OBJS-$(CONFIG_AUDIO_FRAME_QUEUE) += audio_frame_queue.o | OBJS-$(CONFIG_AUDIO_FRAME_QUEUE) += audio_frame_queue.o | ||||
| OBJS-$(CONFIG_AUDIODSP) += audiodsp.o | |||||
| OBJS-$(CONFIG_BLOCKDSP) += blockdsp.o | OBJS-$(CONFIG_BLOCKDSP) += blockdsp.o | ||||
| OBJS-$(CONFIG_CABAC) += cabac.o | OBJS-$(CONFIG_CABAC) += cabac.o | ||||
| OBJS-$(CONFIG_DCT) += dct.o dct32_fixed.o dct32_float.o | OBJS-$(CONFIG_DCT) += dct.o dct32_fixed.o dct32_float.o | ||||
| @@ -37,6 +37,7 @@ | |||||
| #include "libavutil/opt.h" | #include "libavutil/opt.h" | ||||
| #include "avcodec.h" | #include "avcodec.h" | ||||
| #include "put_bits.h" | #include "put_bits.h" | ||||
| #include "audiodsp.h" | |||||
| #include "ac3dsp.h" | #include "ac3dsp.h" | ||||
| #include "ac3.h" | #include "ac3.h" | ||||
| #include "fft.h" | #include "fft.h" | ||||
| @@ -2480,6 +2481,7 @@ av_cold int ff_ac3_encode_init(AVCodecContext *avctx) | |||||
| if (ret) | if (ret) | ||||
| goto init_fail; | goto init_fail; | ||||
| ff_audiodsp_init(&s->adsp); | |||||
| ff_dsputil_init(&s->dsp, avctx); | ff_dsputil_init(&s->dsp, avctx); | ||||
| ff_ac3dsp_init(&s->ac3dsp, avctx->flags & CODEC_FLAG_BITEXACT); | ff_ac3dsp_init(&s->ac3dsp, avctx->flags & CODEC_FLAG_BITEXACT); | ||||
| @@ -39,6 +39,7 @@ | |||||
| #include "fft.h" | #include "fft.h" | ||||
| #include "mathops.h" | #include "mathops.h" | ||||
| #include "put_bits.h" | #include "put_bits.h" | ||||
| #include "audiodsp.h" | |||||
| #ifndef CONFIG_AC3ENC_FLOAT | #ifndef CONFIG_AC3ENC_FLOAT | ||||
| #define CONFIG_AC3ENC_FLOAT 0 | #define CONFIG_AC3ENC_FLOAT 0 | ||||
| @@ -162,6 +163,7 @@ typedef struct AC3EncodeContext { | |||||
| AVCodecContext *avctx; ///< parent AVCodecContext | AVCodecContext *avctx; ///< parent AVCodecContext | ||||
| PutBitContext pb; ///< bitstream writer context | PutBitContext pb; ///< bitstream writer context | ||||
| DSPContext dsp; | DSPContext dsp; | ||||
| AudioDSPContext adsp; | |||||
| AVFloatDSPContext fdsp; | AVFloatDSPContext fdsp; | ||||
| AC3DSPContext ac3dsp; ///< AC-3 optimized functions | AC3DSPContext ac3dsp; ///< AC-3 optimized functions | ||||
| FFTContext mdct; ///< FFT context for MDCT calculation | FFTContext mdct; ///< FFT context for MDCT calculation | ||||
| @@ -29,6 +29,7 @@ | |||||
| #define FFT_FLOAT 0 | #define FFT_FLOAT 0 | ||||
| #undef CONFIG_AC3ENC_FLOAT | #undef CONFIG_AC3ENC_FLOAT | ||||
| #include "internal.h" | #include "internal.h" | ||||
| #include "audiodsp.h" | |||||
| #include "ac3enc.h" | #include "ac3enc.h" | ||||
| #include "eac3enc.h" | #include "eac3enc.h" | ||||
| @@ -100,9 +101,10 @@ static void scale_coefficients(AC3EncodeContext *s) | |||||
| /* | /* | ||||
| * Clip MDCT coefficients to allowable range. | * Clip MDCT coefficients to allowable range. | ||||
| */ | */ | ||||
| static void clip_coefficients(DSPContext *dsp, int32_t *coef, unsigned int len) | |||||
| static void clip_coefficients(AudioDSPContext *adsp, int32_t *coef, | |||||
| unsigned int len) | |||||
| { | { | ||||
| dsp->vector_clip_int32(coef, coef, COEF_MIN, COEF_MAX, len); | |||||
| adsp->vector_clip_int32(coef, coef, COEF_MIN, COEF_MAX, len); | |||||
| } | } | ||||
| @@ -28,6 +28,7 @@ | |||||
| #define CONFIG_AC3ENC_FLOAT 1 | #define CONFIG_AC3ENC_FLOAT 1 | ||||
| #include "internal.h" | #include "internal.h" | ||||
| #include "audiodsp.h" | |||||
| #include "ac3enc.h" | #include "ac3enc.h" | ||||
| #include "eac3enc.h" | #include "eac3enc.h" | ||||
| #include "kbdwin.h" | #include "kbdwin.h" | ||||
| @@ -107,9 +108,10 @@ static void scale_coefficients(AC3EncodeContext *s) | |||||
| /* | /* | ||||
| * Clip MDCT coefficients to allowable range. | * Clip MDCT coefficients to allowable range. | ||||
| */ | */ | ||||
| static void clip_coefficients(DSPContext *dsp, float *coef, unsigned int len) | |||||
| static void clip_coefficients(AudioDSPContext *adsp, float *coef, | |||||
| unsigned int len) | |||||
| { | { | ||||
| dsp->vector_clipf(coef, coef, COEF_MIN, COEF_MAX, len); | |||||
| adsp->vector_clipf(coef, coef, COEF_MIN, COEF_MAX, len); | |||||
| } | } | ||||
| @@ -30,6 +30,8 @@ | |||||
| #include "libavutil/attributes.h" | #include "libavutil/attributes.h" | ||||
| #include "libavutil/internal.h" | #include "libavutil/internal.h" | ||||
| #include "audiodsp.h" | |||||
| #include "internal.h" | #include "internal.h" | ||||
| #include "ac3enc.h" | #include "ac3enc.h" | ||||
| #include "eac3enc.h" | #include "eac3enc.h" | ||||
| @@ -40,7 +42,8 @@ static void scale_coefficients(AC3EncodeContext *s); | |||||
| static int normalize_samples(AC3EncodeContext *s); | static int normalize_samples(AC3EncodeContext *s); | ||||
| static void clip_coefficients(DSPContext *dsp, CoefType *coef, unsigned int len); | |||||
| static void clip_coefficients(AudioDSPContext *adsp, CoefType *coef, | |||||
| unsigned int len); | |||||
| static CoefType calc_cpl_coord(CoefSumType energy_ch, CoefSumType energy_cpl); | static CoefType calc_cpl_coord(CoefSumType energy_ch, CoefSumType energy_cpl); | ||||
| @@ -161,7 +164,7 @@ static void apply_channel_coupling(AC3EncodeContext *s) | |||||
| } | } | ||||
| /* coefficients must be clipped in order to be encoded */ | /* coefficients must be clipped in order to be encoded */ | ||||
| clip_coefficients(&s->dsp, cpl_coef, num_cpl_coefs); | |||||
| clip_coefficients(&s->adsp, cpl_coef, num_cpl_coefs); | |||||
| } | } | ||||
| /* calculate energy in each band in coupling channel and each fbw channel */ | /* calculate energy in each band in coupling channel and each fbw channel */ | ||||
| @@ -412,7 +415,7 @@ int AC3_NAME(encode_frame)(AVCodecContext *avctx, AVPacket *avpkt, | |||||
| if (s->fixed_point) | if (s->fixed_point) | ||||
| scale_coefficients(s); | scale_coefficients(s); | ||||
| clip_coefficients(&s->dsp, s->blocks[0].mdct_coef[1], | |||||
| clip_coefficients(&s->adsp, s->blocks[0].mdct_coef[1], | |||||
| AC3_MAX_COEFS * s->num_blocks * s->channels); | AC3_MAX_COEFS * s->num_blocks * s->channels); | ||||
| s->cpl_on = s->cpl_enabled; | s->cpl_on = s->cpl_enabled; | ||||
| @@ -26,6 +26,7 @@ | |||||
| #include "avcodec.h" | #include "avcodec.h" | ||||
| #include "acelp_pitch_delay.h" | #include "acelp_pitch_delay.h" | ||||
| #include "celp_math.h" | #include "celp_math.h" | ||||
| #include "audiodsp.h" | |||||
| int ff_acelp_decode_8bit_to_1st_delay3(int ac_index) | int ff_acelp_decode_8bit_to_1st_delay3(int ac_index) | ||||
| { | { | ||||
| @@ -90,7 +91,7 @@ void ff_acelp_update_past_gain( | |||||
| } | } | ||||
| int16_t ff_acelp_decode_gain_code( | int16_t ff_acelp_decode_gain_code( | ||||
| DSPContext *dsp, | |||||
| AudioDSPContext *adsp, | |||||
| int gain_corr_factor, | int gain_corr_factor, | ||||
| const int16_t* fc_v, | const int16_t* fc_v, | ||||
| int mr_energy, | int mr_energy, | ||||
| @@ -107,7 +108,7 @@ int16_t ff_acelp_decode_gain_code( | |||||
| mr_energy += quant_energy[i] * ma_prediction_coeff[i]; | mr_energy += quant_energy[i] * ma_prediction_coeff[i]; | ||||
| mr_energy = gain_corr_factor * exp(M_LN10 / (20 << 23) * mr_energy) / | mr_energy = gain_corr_factor * exp(M_LN10 / (20 << 23) * mr_energy) / | ||||
| sqrt(dsp->scalarproduct_int16(fc_v, fc_v, subframe_size)); | |||||
| sqrt(adsp->scalarproduct_int16(fc_v, fc_v, subframe_size)); | |||||
| return mr_energy >> 12; | return mr_energy >> 12; | ||||
| } | } | ||||
| @@ -24,7 +24,8 @@ | |||||
| #define AVCODEC_ACELP_PITCH_DELAY_H | #define AVCODEC_ACELP_PITCH_DELAY_H | ||||
| #include <stdint.h> | #include <stdint.h> | ||||
| #include "dsputil.h" | |||||
| #include "audiodsp.h" | |||||
| #define PITCH_DELAY_MIN 20 | #define PITCH_DELAY_MIN 20 | ||||
| #define PITCH_DELAY_MAX 143 | #define PITCH_DELAY_MAX 143 | ||||
| @@ -139,7 +140,7 @@ void ff_acelp_update_past_gain( | |||||
| /** | /** | ||||
| * @brief Decode the adaptive codebook gain and add | * @brief Decode the adaptive codebook gain and add | ||||
| * correction (4.1.5 and 3.9.1 of G.729). | * correction (4.1.5 and 3.9.1 of G.729). | ||||
| * @param dsp initialized dsputil context | |||||
| * @param adsp initialized audio DSP context | |||||
| * @param gain_corr_factor gain correction factor (2.13) | * @param gain_corr_factor gain correction factor (2.13) | ||||
| * @param fc_v fixed-codebook vector (2.13) | * @param fc_v fixed-codebook vector (2.13) | ||||
| * @param mr_energy mean innovation energy and fixed-point correction (7.13) | * @param mr_energy mean innovation energy and fixed-point correction (7.13) | ||||
| @@ -208,7 +209,7 @@ void ff_acelp_update_past_gain( | |||||
| * @remark The routine is used in G.729 and AMR (all modes). | * @remark The routine is used in G.729 and AMR (all modes). | ||||
| */ | */ | ||||
| int16_t ff_acelp_decode_gain_code( | int16_t ff_acelp_decode_gain_code( | ||||
| DSPContext *dsp, | |||||
| AudioDSPContext *adsp, | |||||
| int gain_corr_factor, | int gain_corr_factor, | ||||
| const int16_t* fc_v, | const int16_t* fc_v, | ||||
| int mr_energy, | int mr_energy, | ||||
| @@ -4,6 +4,7 @@ OBJS += arm/fmtconvert_init_arm.o | |||||
| OBJS-$(CONFIG_AC3DSP) += arm/ac3dsp_init_arm.o \ | OBJS-$(CONFIG_AC3DSP) += arm/ac3dsp_init_arm.o \ | ||||
| arm/ac3dsp_arm.o | arm/ac3dsp_arm.o | ||||
| OBJS-$(CONFIG_AUDIODSP) += arm/audiodsp_init_arm.o | |||||
| OBJS-$(CONFIG_BLOCKDSP) += arm/blockdsp_init_arm.o | OBJS-$(CONFIG_BLOCKDSP) += arm/blockdsp_init_arm.o | ||||
| OBJS-$(CONFIG_DSPUTIL) += arm/dsputil_init_arm.o \ | OBJS-$(CONFIG_DSPUTIL) += arm/dsputil_init_arm.o \ | ||||
| arm/dsputil_arm.o \ | arm/dsputil_arm.o \ | ||||
| @@ -77,11 +78,13 @@ VFP-OBJS-$(CONFIG_DCA_DECODER) += arm/dcadsp_vfp.o \ | |||||
| NEON-OBJS += arm/fmtconvert_neon.o | NEON-OBJS += arm/fmtconvert_neon.o | ||||
| NEON-OBJS-$(CONFIG_AC3DSP) += arm/ac3dsp_neon.o | NEON-OBJS-$(CONFIG_AC3DSP) += arm/ac3dsp_neon.o | ||||
| NEON-OBJS-$(CONFIG_AUDIODSP) += arm/audiodsp_init_neon.o \ | |||||
| arm/audiodsp_neon.o \ | |||||
| arm/int_neon.o | |||||
| NEON-OBJS-$(CONFIG_BLOCKDSP) += arm/blockdsp_init_neon.o \ | NEON-OBJS-$(CONFIG_BLOCKDSP) += arm/blockdsp_init_neon.o \ | ||||
| arm/blockdsp_neon.o | arm/blockdsp_neon.o | ||||
| NEON-OBJS-$(CONFIG_DSPUTIL) += arm/dsputil_init_neon.o \ | NEON-OBJS-$(CONFIG_DSPUTIL) += arm/dsputil_init_neon.o \ | ||||
| arm/dsputil_neon.o \ | arm/dsputil_neon.o \ | ||||
| arm/int_neon.o \ | |||||
| arm/simple_idct_neon.o | arm/simple_idct_neon.o | ||||
| NEON-OBJS-$(CONFIG_FFT) += arm/fft_neon.o \ | NEON-OBJS-$(CONFIG_FFT) += arm/fft_neon.o \ | ||||
| arm/fft_fixed_neon.o | arm/fft_fixed_neon.o | ||||
| @@ -0,0 +1,26 @@ | |||||
| /* | |||||
| * This file is part of Libav. | |||||
| * | |||||
| * Libav is free software; you can redistribute it and/or | |||||
| * modify it under the terms of the GNU Lesser General Public | |||||
| * License as published by the Free Software Foundation; either | |||||
| * version 2.1 of the License, or (at your option) any later version. | |||||
| * | |||||
| * Libav is distributed in the hope that it will be useful, | |||||
| * but WITHOUT ANY WARRANTY; without even the implied warranty of | |||||
| * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU | |||||
| * Lesser General Public License for more details. | |||||
| * | |||||
| * You should have received a copy of the GNU Lesser General Public | |||||
| * License along with Libav; if not, write to the Free Software | |||||
| * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA | |||||
| */ | |||||
| #ifndef AVCODEC_ARM_AUDIODSP_ARM_H | |||||
| #define AVCODEC_ARM_AUDIODSP_ARM_H | |||||
| #include "libavcodec/audiodsp.h" | |||||
| void ff_audiodsp_init_neon(AudioDSPContext *c); | |||||
| #endif /* AVCODEC_ARM_AUDIODSP_ARM_H */ | |||||
| @@ -0,0 +1,33 @@ | |||||
| /* | |||||
| * ARM optimized audio functions | |||||
| * | |||||
| * This file is part of Libav. | |||||
| * | |||||
| * Libav is free software; you can redistribute it and/or | |||||
| * modify it under the terms of the GNU Lesser General Public | |||||
| * License as published by the Free Software Foundation; either | |||||
| * version 2.1 of the License, or (at your option) any later version. | |||||
| * | |||||
| * Libav is distributed in the hope that it will be useful, | |||||
| * but WITHOUT ANY WARRANTY; without even the implied warranty of | |||||
| * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU | |||||
| * Lesser General Public License for more details. | |||||
| * | |||||
| * You should have received a copy of the GNU Lesser General Public | |||||
| * License along with Libav; if not, write to the Free Software | |||||
| * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA | |||||
| */ | |||||
| #include "libavutil/attributes.h" | |||||
| #include "libavutil/cpu.h" | |||||
| #include "libavutil/arm/cpu.h" | |||||
| #include "libavcodec/audiodsp.h" | |||||
| #include "audiodsp_arm.h" | |||||
| av_cold void ff_audiodsp_init_arm(AudioDSPContext *c) | |||||
| { | |||||
| int cpu_flags = av_get_cpu_flags(); | |||||
| if (have_neon(cpu_flags)) | |||||
| ff_audiodsp_init_neon(c); | |||||
| } | |||||
| @@ -0,0 +1,41 @@ | |||||
| /* | |||||
| * ARM NEON optimised audio functions | |||||
| * Copyright (c) 2008 Mans Rullgard <mans@mansr.com> | |||||
| * | |||||
| * This file is part of Libav. | |||||
| * | |||||
| * Libav is free software; you can redistribute it and/or | |||||
| * modify it under the terms of the GNU Lesser General Public | |||||
| * License as published by the Free Software Foundation; either | |||||
| * version 2.1 of the License, or (at your option) any later version. | |||||
| * | |||||
| * Libav is distributed in the hope that it will be useful, | |||||
| * but WITHOUT ANY WARRANTY; without even the implied warranty of | |||||
| * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU | |||||
| * Lesser General Public License for more details. | |||||
| * | |||||
| * You should have received a copy of the GNU Lesser General Public | |||||
| * License along with Libav; if not, write to the Free Software | |||||
| * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA | |||||
| */ | |||||
| #include <stdint.h> | |||||
| #include "libavutil/attributes.h" | |||||
| #include "libavcodec/audiodsp.h" | |||||
| #include "audiodsp_arm.h" | |||||
| void ff_vector_clipf_neon(float *dst, const float *src, float min, float max, | |||||
| int len); | |||||
| void ff_vector_clip_int32_neon(int32_t *dst, const int32_t *src, int32_t min, | |||||
| int32_t max, unsigned int len); | |||||
| int32_t ff_scalarproduct_int16_neon(const int16_t *v1, const int16_t *v2, int len); | |||||
| av_cold void ff_audiodsp_init_neon(AudioDSPContext *c) | |||||
| { | |||||
| c->vector_clip_int32 = ff_vector_clip_int32_neon; | |||||
| c->vector_clipf = ff_vector_clipf_neon; | |||||
| c->scalarproduct_int16 = ff_scalarproduct_int16_neon; | |||||
| } | |||||
| @@ -0,0 +1,64 @@ | |||||
| /* | |||||
| * ARM NEON optimised audio functions | |||||
| * Copyright (c) 2008 Mans Rullgard <mans@mansr.com> | |||||
| * | |||||
| * This file is part of Libav. | |||||
| * | |||||
| * Libav is free software; you can redistribute it and/or | |||||
| * modify it under the terms of the GNU Lesser General Public | |||||
| * License as published by the Free Software Foundation; either | |||||
| * version 2.1 of the License, or (at your option) any later version. | |||||
| * | |||||
| * Libav is distributed in the hope that it will be useful, | |||||
| * but WITHOUT ANY WARRANTY; without even the implied warranty of | |||||
| * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU | |||||
| * Lesser General Public License for more details. | |||||
| * | |||||
| * You should have received a copy of the GNU Lesser General Public | |||||
| * License along with Libav; if not, write to the Free Software | |||||
| * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA | |||||
| */ | |||||
| #include "libavutil/arm/asm.S" | |||||
| function ff_vector_clipf_neon, export=1 | |||||
| VFP vdup.32 q1, d0[1] | |||||
| VFP vdup.32 q0, d0[0] | |||||
| NOVFP vdup.32 q0, r2 | |||||
| NOVFP vdup.32 q1, r3 | |||||
| NOVFP ldr r2, [sp] | |||||
| vld1.f32 {q2},[r1,:128]! | |||||
| vmin.f32 q10, q2, q1 | |||||
| vld1.f32 {q3},[r1,:128]! | |||||
| vmin.f32 q11, q3, q1 | |||||
| 1: vmax.f32 q8, q10, q0 | |||||
| vmax.f32 q9, q11, q0 | |||||
| subs r2, r2, #8 | |||||
| beq 2f | |||||
| vld1.f32 {q2},[r1,:128]! | |||||
| vmin.f32 q10, q2, q1 | |||||
| vld1.f32 {q3},[r1,:128]! | |||||
| vmin.f32 q11, q3, q1 | |||||
| vst1.f32 {q8},[r0,:128]! | |||||
| vst1.f32 {q9},[r0,:128]! | |||||
| b 1b | |||||
| 2: vst1.f32 {q8},[r0,:128]! | |||||
| vst1.f32 {q9},[r0,:128]! | |||||
| bx lr | |||||
| endfunc | |||||
| function ff_vector_clip_int32_neon, export=1 | |||||
| vdup.32 q0, r2 | |||||
| vdup.32 q1, r3 | |||||
| ldr r2, [sp] | |||||
| 1: | |||||
| vld1.32 {q2-q3}, [r1,:128]! | |||||
| vmin.s32 q2, q2, q1 | |||||
| vmin.s32 q3, q3, q1 | |||||
| vmax.s32 q2, q2, q0 | |||||
| vmax.s32 q3, q3, q0 | |||||
| vst1.32 {q2-q3}, [r0,:128]! | |||||
| subs r2, r2, #8 | |||||
| bgt 1b | |||||
| bx lr | |||||
| endfunc | |||||
| @@ -34,13 +34,6 @@ void ff_add_pixels_clamped_neon(const int16_t *, uint8_t *, int); | |||||
| void ff_put_pixels_clamped_neon(const int16_t *, uint8_t *, int); | void ff_put_pixels_clamped_neon(const int16_t *, uint8_t *, int); | ||||
| void ff_put_signed_pixels_clamped_neon(const int16_t *, uint8_t *, int); | void ff_put_signed_pixels_clamped_neon(const int16_t *, uint8_t *, int); | ||||
| void ff_vector_clipf_neon(float *dst, const float *src, float min, float max, | |||||
| int len); | |||||
| void ff_vector_clip_int32_neon(int32_t *dst, const int32_t *src, int32_t min, | |||||
| int32_t max, unsigned int len); | |||||
| int32_t ff_scalarproduct_int16_neon(const int16_t *v1, const int16_t *v2, int len); | |||||
| av_cold void ff_dsputil_init_neon(DSPContext *c, AVCodecContext *avctx, | av_cold void ff_dsputil_init_neon(DSPContext *c, AVCodecContext *avctx, | ||||
| unsigned high_bit_depth) | unsigned high_bit_depth) | ||||
| { | { | ||||
| @@ -57,9 +50,4 @@ av_cold void ff_dsputil_init_neon(DSPContext *c, AVCodecContext *avctx, | |||||
| c->add_pixels_clamped = ff_add_pixels_clamped_neon; | c->add_pixels_clamped = ff_add_pixels_clamped_neon; | ||||
| c->put_pixels_clamped = ff_put_pixels_clamped_neon; | c->put_pixels_clamped = ff_put_pixels_clamped_neon; | ||||
| c->put_signed_pixels_clamped = ff_put_signed_pixels_clamped_neon; | c->put_signed_pixels_clamped = ff_put_signed_pixels_clamped_neon; | ||||
| c->vector_clipf = ff_vector_clipf_neon; | |||||
| c->vector_clip_int32 = ff_vector_clip_int32_neon; | |||||
| c->scalarproduct_int16 = ff_scalarproduct_int16_neon; | |||||
| } | } | ||||
| @@ -126,45 +126,3 @@ function ff_add_pixels_clamped_neon, export=1 | |||||
| vst1.8 {d6}, [r3,:64], r2 | vst1.8 {d6}, [r3,:64], r2 | ||||
| bx lr | bx lr | ||||
| endfunc | endfunc | ||||
| function ff_vector_clipf_neon, export=1 | |||||
| VFP vdup.32 q1, d0[1] | |||||
| VFP vdup.32 q0, d0[0] | |||||
| NOVFP vdup.32 q0, r2 | |||||
| NOVFP vdup.32 q1, r3 | |||||
| NOVFP ldr r2, [sp] | |||||
| vld1.f32 {q2},[r1,:128]! | |||||
| vmin.f32 q10, q2, q1 | |||||
| vld1.f32 {q3},[r1,:128]! | |||||
| vmin.f32 q11, q3, q1 | |||||
| 1: vmax.f32 q8, q10, q0 | |||||
| vmax.f32 q9, q11, q0 | |||||
| subs r2, r2, #8 | |||||
| beq 2f | |||||
| vld1.f32 {q2},[r1,:128]! | |||||
| vmin.f32 q10, q2, q1 | |||||
| vld1.f32 {q3},[r1,:128]! | |||||
| vmin.f32 q11, q3, q1 | |||||
| vst1.f32 {q8},[r0,:128]! | |||||
| vst1.f32 {q9},[r0,:128]! | |||||
| b 1b | |||||
| 2: vst1.f32 {q8},[r0,:128]! | |||||
| vst1.f32 {q9},[r0,:128]! | |||||
| bx lr | |||||
| endfunc | |||||
| function ff_vector_clip_int32_neon, export=1 | |||||
| vdup.32 q0, r2 | |||||
| vdup.32 q1, r3 | |||||
| ldr r2, [sp] | |||||
| 1: | |||||
| vld1.32 {q2-q3}, [r1,:128]! | |||||
| vmin.s32 q2, q2, q1 | |||||
| vmin.s32 q3, q3, q1 | |||||
| vmax.s32 q2, q2, q0 | |||||
| vmax.s32 q3, q3, q0 | |||||
| vst1.32 {q2-q3}, [r0,:128]! | |||||
| subs r2, r2, #8 | |||||
| bgt 1b | |||||
| bx lr | |||||
| endfunc | |||||
| @@ -0,0 +1,118 @@ | |||||
| /* | |||||
| * This file is part of Libav. | |||||
| * | |||||
| * Libav is free software; you can redistribute it and/or | |||||
| * modify it under the terms of the GNU Lesser General Public | |||||
| * License as published by the Free Software Foundation; either | |||||
| * version 2.1 of the License, or (at your option) any later version. | |||||
| * | |||||
| * Libav is distributed in the hope that it will be useful, | |||||
| * but WITHOUT ANY WARRANTY; without even the implied warranty of | |||||
| * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU | |||||
| * Lesser General Public License for more details. | |||||
| * | |||||
| * You should have received a copy of the GNU Lesser General Public | |||||
| * License along with Libav; if not, write to the Free Software | |||||
| * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA | |||||
| */ | |||||
| #include <stdint.h> | |||||
| #include "libavutil/attributes.h" | |||||
| #include "libavutil/common.h" | |||||
| #include "audiodsp.h" | |||||
| static inline uint32_t clipf_c_one(uint32_t a, uint32_t mini, | |||||
| uint32_t maxi, uint32_t maxisign) | |||||
| { | |||||
| if (a > mini) | |||||
| return mini; | |||||
| else if ((a ^ (1U << 31)) > maxisign) | |||||
| return maxi; | |||||
| else | |||||
| return a; | |||||
| } | |||||
| static void vector_clipf_c_opposite_sign(float *dst, const float *src, | |||||
| float *min, float *max, int len) | |||||
| { | |||||
| int i; | |||||
| uint32_t mini = *(uint32_t *) min; | |||||
| uint32_t maxi = *(uint32_t *) max; | |||||
| uint32_t maxisign = maxi ^ (1U << 31); | |||||
| uint32_t *dsti = (uint32_t *) dst; | |||||
| const uint32_t *srci = (const uint32_t *) src; | |||||
| for (i = 0; i < len; i += 8) { | |||||
| dsti[i + 0] = clipf_c_one(srci[i + 0], mini, maxi, maxisign); | |||||
| dsti[i + 1] = clipf_c_one(srci[i + 1], mini, maxi, maxisign); | |||||
| dsti[i + 2] = clipf_c_one(srci[i + 2], mini, maxi, maxisign); | |||||
| dsti[i + 3] = clipf_c_one(srci[i + 3], mini, maxi, maxisign); | |||||
| dsti[i + 4] = clipf_c_one(srci[i + 4], mini, maxi, maxisign); | |||||
| dsti[i + 5] = clipf_c_one(srci[i + 5], mini, maxi, maxisign); | |||||
| dsti[i + 6] = clipf_c_one(srci[i + 6], mini, maxi, maxisign); | |||||
| dsti[i + 7] = clipf_c_one(srci[i + 7], mini, maxi, maxisign); | |||||
| } | |||||
| } | |||||
| static void vector_clipf_c(float *dst, const float *src, | |||||
| float min, float max, int len) | |||||
| { | |||||
| int i; | |||||
| if (min < 0 && max > 0) { | |||||
| vector_clipf_c_opposite_sign(dst, src, &min, &max, len); | |||||
| } else { | |||||
| for (i = 0; i < len; i += 8) { | |||||
| dst[i] = av_clipf(src[i], min, max); | |||||
| dst[i + 1] = av_clipf(src[i + 1], min, max); | |||||
| dst[i + 2] = av_clipf(src[i + 2], min, max); | |||||
| dst[i + 3] = av_clipf(src[i + 3], min, max); | |||||
| dst[i + 4] = av_clipf(src[i + 4], min, max); | |||||
| dst[i + 5] = av_clipf(src[i + 5], min, max); | |||||
| dst[i + 6] = av_clipf(src[i + 6], min, max); | |||||
| dst[i + 7] = av_clipf(src[i + 7], min, max); | |||||
| } | |||||
| } | |||||
| } | |||||
| static int32_t scalarproduct_int16_c(const int16_t *v1, const int16_t *v2, | |||||
| int order) | |||||
| { | |||||
| int res = 0; | |||||
| while (order--) | |||||
| res += *v1++ **v2++; | |||||
| return res; | |||||
| } | |||||
| static void vector_clip_int32_c(int32_t *dst, const int32_t *src, int32_t min, | |||||
| int32_t max, unsigned int len) | |||||
| { | |||||
| do { | |||||
| *dst++ = av_clip(*src++, min, max); | |||||
| *dst++ = av_clip(*src++, min, max); | |||||
| *dst++ = av_clip(*src++, min, max); | |||||
| *dst++ = av_clip(*src++, min, max); | |||||
| *dst++ = av_clip(*src++, min, max); | |||||
| *dst++ = av_clip(*src++, min, max); | |||||
| *dst++ = av_clip(*src++, min, max); | |||||
| *dst++ = av_clip(*src++, min, max); | |||||
| len -= 8; | |||||
| } while (len > 0); | |||||
| } | |||||
| av_cold void ff_audiodsp_init(AudioDSPContext *c) | |||||
| { | |||||
| c->scalarproduct_int16 = scalarproduct_int16_c; | |||||
| c->vector_clip_int32 = vector_clip_int32_c; | |||||
| c->vector_clipf = vector_clipf_c; | |||||
| if (ARCH_ARM) | |||||
| ff_audiodsp_init_arm(c); | |||||
| if (ARCH_PPC) | |||||
| ff_audiodsp_init_ppc(c); | |||||
| if (ARCH_X86) | |||||
| ff_audiodsp_init_x86(c); | |||||
| } | |||||
| @@ -0,0 +1,59 @@ | |||||
| /* | |||||
| * This file is part of Libav. | |||||
| * | |||||
| * Libav is free software; you can redistribute it and/or | |||||
| * modify it under the terms of the GNU Lesser General Public | |||||
| * License as published by the Free Software Foundation; either | |||||
| * version 2.1 of the License, or (at your option) any later version. | |||||
| * | |||||
| * Libav is distributed in the hope that it will be useful, | |||||
| * but WITHOUT ANY WARRANTY; without even the implied warranty of | |||||
| * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU | |||||
| * Lesser General Public License for more details. | |||||
| * | |||||
| * You should have received a copy of the GNU Lesser General Public | |||||
| * License along with Libav; if not, write to the Free Software | |||||
| * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA | |||||
| */ | |||||
| #ifndef AVCODEC_AUDIODSP_H | |||||
| #define AVCODEC_AUDIODSP_H | |||||
| #include <stdint.h> | |||||
| typedef struct AudioDSPContext { | |||||
| /** | |||||
| * Calculate scalar product of two vectors. | |||||
| * @param len length of vectors, should be multiple of 16 | |||||
| */ | |||||
| int32_t (*scalarproduct_int16)(const int16_t *v1, | |||||
| const int16_t *v2 /* align 16 */, int len); | |||||
| /** | |||||
| * Clip each element in an array of int32_t to a given minimum and | |||||
| * maximum value. | |||||
| * @param dst destination array | |||||
| * constraints: 16-byte aligned | |||||
| * @param src source array | |||||
| * constraints: 16-byte aligned | |||||
| * @param min minimum value | |||||
| * constraints: must be in the range [-(1 << 24), 1 << 24] | |||||
| * @param max maximum value | |||||
| * constraints: must be in the range [-(1 << 24), 1 << 24] | |||||
| * @param len number of elements in the array | |||||
| * constraints: multiple of 32 greater than zero | |||||
| */ | |||||
| void (*vector_clip_int32)(int32_t *dst, const int32_t *src, int32_t min, | |||||
| int32_t max, unsigned int len); | |||||
| /* assume len is a multiple of 8, and arrays are 16-byte aligned */ | |||||
| void (*vector_clipf)(float *dst /* align 16 */, | |||||
| const float *src /* align 16 */, | |||||
| float min, float max, int len /* align 16 */); | |||||
| } AudioDSPContext; | |||||
| void ff_audiodsp_init(AudioDSPContext *c); | |||||
| void ff_audiodsp_init_arm(AudioDSPContext *c); | |||||
| void ff_audiodsp_init_ppc(AudioDSPContext *c); | |||||
| void ff_audiodsp_init_x86(AudioDSPContext *c); | |||||
| #endif /* AVCODEC_AUDIODSP_H */ | |||||
| @@ -44,9 +44,10 @@ | |||||
| #include "libavutil/channel_layout.h" | #include "libavutil/channel_layout.h" | ||||
| #include "libavutil/lfg.h" | #include "libavutil/lfg.h" | ||||
| #include "audiodsp.h" | |||||
| #include "avcodec.h" | #include "avcodec.h" | ||||
| #include "get_bits.h" | #include "get_bits.h" | ||||
| #include "dsputil.h" | |||||
| #include "bytestream.h" | #include "bytestream.h" | ||||
| #include "fft.h" | #include "fft.h" | ||||
| #include "internal.h" | #include "internal.h" | ||||
| @@ -122,7 +123,7 @@ typedef struct cook { | |||||
| void (*saturate_output)(struct cook *q, float *out); | void (*saturate_output)(struct cook *q, float *out); | ||||
| AVCodecContext* avctx; | AVCodecContext* avctx; | ||||
| DSPContext dsp; | |||||
| AudioDSPContext adsp; | |||||
| GetBitContext gb; | GetBitContext gb; | ||||
| /* stream data */ | /* stream data */ | ||||
| int num_vectors; | int num_vectors; | ||||
| @@ -865,8 +866,8 @@ static inline void decode_bytes_and_gain(COOKContext *q, COOKSubpacket *p, | |||||
| */ | */ | ||||
| static void saturate_output_float(COOKContext *q, float *out) | static void saturate_output_float(COOKContext *q, float *out) | ||||
| { | { | ||||
| q->dsp.vector_clipf(out, q->mono_mdct_output + q->samples_per_channel, | |||||
| -1.0f, 1.0f, FFALIGN(q->samples_per_channel, 8)); | |||||
| q->adsp.vector_clipf(out, q->mono_mdct_output + q->samples_per_channel, | |||||
| -1.0f, 1.0f, FFALIGN(q->samples_per_channel, 8)); | |||||
| } | } | ||||
| @@ -1065,7 +1066,7 @@ static av_cold int cook_decode_init(AVCodecContext *avctx) | |||||
| /* Initialize RNG. */ | /* Initialize RNG. */ | ||||
| av_lfg_init(&q->random_state, 0); | av_lfg_init(&q->random_state, 0); | ||||
| ff_dsputil_init(&q->dsp, avctx); | |||||
| ff_audiodsp_init(&q->adsp); | |||||
| while (edata_ptr < edata_ptr_end) { | while (edata_ptr < edata_ptr_end) { | ||||
| /* 8 for mono, 16 for stereo, ? for multichannel | /* 8 for mono, 16 for stereo, ? for multichannel | ||||
| @@ -1267,87 +1267,6 @@ WRAPPER8_16_SQ(quant_psnr8x8_c, quant_psnr16_c) | |||||
| WRAPPER8_16_SQ(rd8x8_c, rd16_c) | WRAPPER8_16_SQ(rd8x8_c, rd16_c) | ||||
| WRAPPER8_16_SQ(bit8x8_c, bit16_c) | WRAPPER8_16_SQ(bit8x8_c, bit16_c) | ||||
| static inline uint32_t clipf_c_one(uint32_t a, uint32_t mini, | |||||
| uint32_t maxi, uint32_t maxisign) | |||||
| { | |||||
| if (a > mini) | |||||
| return mini; | |||||
| else if ((a ^ (1U << 31)) > maxisign) | |||||
| return maxi; | |||||
| else | |||||
| return a; | |||||
| } | |||||
| static void vector_clipf_c_opposite_sign(float *dst, const float *src, | |||||
| float *min, float *max, int len) | |||||
| { | |||||
| int i; | |||||
| uint32_t mini = *(uint32_t *) min; | |||||
| uint32_t maxi = *(uint32_t *) max; | |||||
| uint32_t maxisign = maxi ^ (1U << 31); | |||||
| uint32_t *dsti = (uint32_t *) dst; | |||||
| const uint32_t *srci = (const uint32_t *) src; | |||||
| for (i = 0; i < len; i += 8) { | |||||
| dsti[i + 0] = clipf_c_one(srci[i + 0], mini, maxi, maxisign); | |||||
| dsti[i + 1] = clipf_c_one(srci[i + 1], mini, maxi, maxisign); | |||||
| dsti[i + 2] = clipf_c_one(srci[i + 2], mini, maxi, maxisign); | |||||
| dsti[i + 3] = clipf_c_one(srci[i + 3], mini, maxi, maxisign); | |||||
| dsti[i + 4] = clipf_c_one(srci[i + 4], mini, maxi, maxisign); | |||||
| dsti[i + 5] = clipf_c_one(srci[i + 5], mini, maxi, maxisign); | |||||
| dsti[i + 6] = clipf_c_one(srci[i + 6], mini, maxi, maxisign); | |||||
| dsti[i + 7] = clipf_c_one(srci[i + 7], mini, maxi, maxisign); | |||||
| } | |||||
| } | |||||
| static void vector_clipf_c(float *dst, const float *src, | |||||
| float min, float max, int len) | |||||
| { | |||||
| int i; | |||||
| if (min < 0 && max > 0) { | |||||
| vector_clipf_c_opposite_sign(dst, src, &min, &max, len); | |||||
| } else { | |||||
| for (i = 0; i < len; i += 8) { | |||||
| dst[i] = av_clipf(src[i], min, max); | |||||
| dst[i + 1] = av_clipf(src[i + 1], min, max); | |||||
| dst[i + 2] = av_clipf(src[i + 2], min, max); | |||||
| dst[i + 3] = av_clipf(src[i + 3], min, max); | |||||
| dst[i + 4] = av_clipf(src[i + 4], min, max); | |||||
| dst[i + 5] = av_clipf(src[i + 5], min, max); | |||||
| dst[i + 6] = av_clipf(src[i + 6], min, max); | |||||
| dst[i + 7] = av_clipf(src[i + 7], min, max); | |||||
| } | |||||
| } | |||||
| } | |||||
| static int32_t scalarproduct_int16_c(const int16_t *v1, const int16_t *v2, | |||||
| int order) | |||||
| { | |||||
| int res = 0; | |||||
| while (order--) | |||||
| res += *v1++ **v2++; | |||||
| return res; | |||||
| } | |||||
| static void vector_clip_int32_c(int32_t *dst, const int32_t *src, int32_t min, | |||||
| int32_t max, unsigned int len) | |||||
| { | |||||
| do { | |||||
| *dst++ = av_clip(*src++, min, max); | |||||
| *dst++ = av_clip(*src++, min, max); | |||||
| *dst++ = av_clip(*src++, min, max); | |||||
| *dst++ = av_clip(*src++, min, max); | |||||
| *dst++ = av_clip(*src++, min, max); | |||||
| *dst++ = av_clip(*src++, min, max); | |||||
| *dst++ = av_clip(*src++, min, max); | |||||
| *dst++ = av_clip(*src++, min, max); | |||||
| len -= 8; | |||||
| } while (len > 0); | |||||
| } | |||||
| static void jref_idct_put(uint8_t *dest, int line_size, int16_t *block) | static void jref_idct_put(uint8_t *dest, int line_size, int16_t *block) | ||||
| { | { | ||||
| ff_j_rev_dct(block); | ff_j_rev_dct(block); | ||||
| @@ -1502,10 +1421,6 @@ av_cold void ff_dsputil_init(DSPContext *c, AVCodecContext *avctx) | |||||
| c->try_8x8basis = try_8x8basis_c; | c->try_8x8basis = try_8x8basis_c; | ||||
| c->add_8x8basis = add_8x8basis_c; | c->add_8x8basis = add_8x8basis_c; | ||||
| c->scalarproduct_int16 = scalarproduct_int16_c; | |||||
| c->vector_clip_int32 = vector_clip_int32_c; | |||||
| c->vector_clipf = vector_clipf_c; | |||||
| c->shrink[0] = av_image_copy_plane; | c->shrink[0] = av_image_copy_plane; | ||||
| c->shrink[1] = ff_shrink22; | c->shrink[1] = ff_shrink22; | ||||
| c->shrink[2] = ff_shrink44; | c->shrink[2] = ff_shrink44; | ||||
| @@ -125,11 +125,6 @@ typedef struct DSPContext { | |||||
| void (*bswap_buf)(uint32_t *dst, const uint32_t *src, int w); | void (*bswap_buf)(uint32_t *dst, const uint32_t *src, int w); | ||||
| void (*bswap16_buf)(uint16_t *dst, const uint16_t *src, int len); | void (*bswap16_buf)(uint16_t *dst, const uint16_t *src, int len); | ||||
| /* assume len is a multiple of 8, and arrays are 16-byte aligned */ | |||||
| void (*vector_clipf)(float *dst /* align 16 */, | |||||
| const float *src /* align 16 */, | |||||
| float min, float max, int len /* align 16 */); | |||||
| /* (I)DCT */ | /* (I)DCT */ | ||||
| void (*fdct)(int16_t *block /* align 16 */); | void (*fdct)(int16_t *block /* align 16 */); | ||||
| void (*fdct248)(int16_t *block /* align 16 */); | void (*fdct248)(int16_t *block /* align 16 */); | ||||
| @@ -189,30 +184,6 @@ typedef struct DSPContext { | |||||
| void (*shrink[4])(uint8_t *dst, int dst_wrap, const uint8_t *src, | void (*shrink[4])(uint8_t *dst, int dst_wrap, const uint8_t *src, | ||||
| int src_wrap, int width, int height); | int src_wrap, int width, int height); | ||||
| /** | |||||
| * Calculate scalar product of two vectors. | |||||
| * @param len length of vectors, should be multiple of 16 | |||||
| */ | |||||
| int32_t (*scalarproduct_int16)(const int16_t *v1, | |||||
| const int16_t *v2 /* align 16 */, int len); | |||||
| /** | |||||
| * Clip each element in an array of int32_t to a given minimum and | |||||
| * maximum value. | |||||
| * @param dst destination array | |||||
| * constraints: 16-byte aligned | |||||
| * @param src source array | |||||
| * constraints: 16-byte aligned | |||||
| * @param min minimum value | |||||
| * constraints: must be in the range [-(1 << 24), 1 << 24] | |||||
| * @param max maximum value | |||||
| * constraints: must be in the range [-(1 << 24), 1 << 24] | |||||
| * @param len number of elements in the array | |||||
| * constraints: multiple of 32 greater than zero | |||||
| */ | |||||
| void (*vector_clip_int32)(int32_t *dst, const int32_t *src, int32_t min, | |||||
| int32_t max, unsigned int len); | |||||
| } DSPContext; | } DSPContext; | ||||
| void ff_dsputil_static_init(void); | void ff_dsputil_static_init(void); | ||||
| @@ -1,5 +1,6 @@ | |||||
| OBJS += ppc/fmtconvert_altivec.o \ | OBJS += ppc/fmtconvert_altivec.o \ | ||||
| OBJS-$(CONFIG_AUDIODSP) += ppc/audiodsp.o | |||||
| OBJS-$(CONFIG_BLOCKDSP) += ppc/blockdsp.o | OBJS-$(CONFIG_BLOCKDSP) += ppc/blockdsp.o | ||||
| OBJS-$(CONFIG_DSPUTIL) += ppc/dsputil_ppc.o | OBJS-$(CONFIG_DSPUTIL) += ppc/dsputil_ppc.o | ||||
| OBJS-$(CONFIG_FFT) += ppc/fft_altivec.o | OBJS-$(CONFIG_FFT) += ppc/fft_altivec.o | ||||
| @@ -24,7 +25,6 @@ ALTIVEC-OBJS-$(CONFIG_DSPUTIL) += ppc/dsputil_altivec.o \ | |||||
| ppc/fdct_altivec.o \ | ppc/fdct_altivec.o \ | ||||
| ppc/gmc_altivec.o \ | ppc/gmc_altivec.o \ | ||||
| ppc/idct_altivec.o \ | ppc/idct_altivec.o \ | ||||
| ppc/int_altivec.o \ | |||||
| FFT-OBJS-$(HAVE_GNU_AS) += ppc/fft_altivec_s.o | FFT-OBJS-$(HAVE_GNU_AS) += ppc/fft_altivec_s.o | ||||
| ALTIVEC-OBJS-$(CONFIG_FFT) += $(FFT-OBJS-yes) | ALTIVEC-OBJS-$(CONFIG_FFT) += $(FFT-OBJS-yes) | ||||
| @@ -20,7 +20,7 @@ | |||||
| /** | /** | ||||
| * @file | * @file | ||||
| * miscellaneous integer operations | |||||
| * miscellaneous audio operations | |||||
| */ | */ | ||||
| #include "config.h" | #include "config.h" | ||||
| @@ -29,10 +29,13 @@ | |||||
| #endif | #endif | ||||
| #include "libavutil/attributes.h" | #include "libavutil/attributes.h" | ||||
| #include "libavutil/cpu.h" | |||||
| #include "libavutil/ppc/cpu.h" | |||||
| #include "libavutil/ppc/types_altivec.h" | #include "libavutil/ppc/types_altivec.h" | ||||
| #include "libavutil/ppc/util_altivec.h" | #include "libavutil/ppc/util_altivec.h" | ||||
| #include "libavcodec/dsputil.h" | |||||
| #include "dsputil_altivec.h" | |||||
| #include "libavcodec/audiodsp.h" | |||||
| #if HAVE_ALTIVEC | |||||
| static int32_t scalarproduct_int16_altivec(const int16_t *v1, const int16_t *v2, | static int32_t scalarproduct_int16_altivec(const int16_t *v1, const int16_t *v2, | ||||
| int order) | int order) | ||||
| @@ -56,7 +59,14 @@ static int32_t scalarproduct_int16_altivec(const int16_t *v1, const int16_t *v2, | |||||
| return ires; | return ires; | ||||
| } | } | ||||
| av_cold void ff_int_init_altivec(DSPContext *c, AVCodecContext *avctx) | |||||
| #endif /* HAVE_ALTIVEC */ | |||||
| av_cold void ff_audiodsp_init_ppc(AudioDSPContext *c) | |||||
| { | { | ||||
| #if HAVE_ALTIVEC | |||||
| if (!PPC_ALTIVEC(av_get_cpu_flags())) | |||||
| return; | |||||
| c->scalarproduct_int16 = scalarproduct_int16_altivec; | c->scalarproduct_int16 = scalarproduct_int16_altivec; | ||||
| #endif /* HAVE_ALTIVEC */ | |||||
| } | } | ||||
| @@ -35,6 +35,5 @@ void ff_idct_add_altivec(uint8_t *dest, int line_size, int16_t *block); | |||||
| void ff_dsputil_init_altivec(DSPContext *c, AVCodecContext *avctx, | void ff_dsputil_init_altivec(DSPContext *c, AVCodecContext *avctx, | ||||
| unsigned high_bit_depth); | unsigned high_bit_depth); | ||||
| void ff_int_init_altivec(DSPContext *c, AVCodecContext *avctx); | |||||
| #endif /* AVCODEC_PPC_DSPUTIL_ALTIVEC_H */ | #endif /* AVCODEC_PPC_DSPUTIL_ALTIVEC_H */ | ||||
| @@ -34,7 +34,7 @@ av_cold void ff_dsputil_init_ppc(DSPContext *c, AVCodecContext *avctx, | |||||
| { | { | ||||
| if (PPC_ALTIVEC(av_get_cpu_flags())) { | if (PPC_ALTIVEC(av_get_cpu_flags())) { | ||||
| ff_dsputil_init_altivec(c, avctx, high_bit_depth); | ff_dsputil_init_altivec(c, avctx, high_bit_depth); | ||||
| ff_int_init_altivec(c, avctx); | |||||
| c->gmc1 = ff_gmc1_altivec; | c->gmc1 = ff_gmc1_altivec; | ||||
| if (!high_bit_depth) { | if (!high_bit_depth) { | ||||
| @@ -28,8 +28,8 @@ | |||||
| #include "libavutil/internal.h" | #include "libavutil/internal.h" | ||||
| #include "libavutil/samplefmt.h" | #include "libavutil/samplefmt.h" | ||||
| #include "tak.h" | #include "tak.h" | ||||
| #include "audiodsp.h" | |||||
| #include "avcodec.h" | #include "avcodec.h" | ||||
| #include "dsputil.h" | |||||
| #include "internal.h" | #include "internal.h" | ||||
| #include "unary.h" | #include "unary.h" | ||||
| @@ -45,7 +45,7 @@ typedef struct MCDParam { | |||||
| typedef struct TAKDecContext { | typedef struct TAKDecContext { | ||||
| AVCodecContext *avctx; // parent AVCodecContext | AVCodecContext *avctx; // parent AVCodecContext | ||||
| DSPContext dsp; | |||||
| AudioDSPContext adsp; | |||||
| TAKStreamInfo ti; | TAKStreamInfo ti; | ||||
| GetBitContext gb; // bitstream reader initialized to start at the current frame | GetBitContext gb; // bitstream reader initialized to start at the current frame | ||||
| @@ -172,7 +172,7 @@ static av_cold int tak_decode_init(AVCodecContext *avctx) | |||||
| { | { | ||||
| TAKDecContext *s = avctx->priv_data; | TAKDecContext *s = avctx->priv_data; | ||||
| ff_dsputil_init(&s->dsp, avctx); | |||||
| ff_audiodsp_init(&s->adsp); | |||||
| s->avctx = avctx; | s->avctx = avctx; | ||||
| @@ -484,8 +484,8 @@ static int decode_subframe(TAKDecContext *s, int32_t *decoded, | |||||
| for (i = 0; i < subframe_size - filter_order; i++) { | for (i = 0; i < subframe_size - filter_order; i++) { | ||||
| int v = 1 << (filter_quant - 1); | int v = 1 << (filter_quant - 1); | ||||
| v += s->dsp.scalarproduct_int16(&s->residues[i], filter, | |||||
| FFALIGN(filter_order, 16)); | |||||
| v += s->adsp.scalarproduct_int16(&s->residues[i], filter, | |||||
| FFALIGN(filter_order, 16)); | |||||
| v = (av_clip(v >> filter_quant, -8192, 8191) << dshift) - *decoded; | v = (av_clip(v >> filter_quant, -8192, 8191) << dshift) - *decoded; | ||||
| *decoded++ = v; | *decoded++ = v; | ||||
| @@ -654,8 +654,8 @@ static int decorrelate(TAKDecContext *s, int c1, int c2, int length) | |||||
| for (i = 0; i < length2; i++) { | for (i = 0; i < length2; i++) { | ||||
| int v = 1 << 9; | int v = 1 << 9; | ||||
| v += s->dsp.scalarproduct_int16(&s->residues[i], filter, | |||||
| FFALIGN(filter_order, 16)); | |||||
| v += s->adsp.scalarproduct_int16(&s->residues[i], filter, | |||||
| FFALIGN(filter_order, 16)); | |||||
| p1[i] = (av_clip(v >> 10, -8192, 8191) << dshift) - p1[i]; | p1[i] = (av_clip(v >> 10, -8192, 8191) << dshift) - p1[i]; | ||||
| } | } | ||||
| @@ -2,6 +2,7 @@ OBJS += x86/constants.o \ | |||||
| x86/fmtconvert_init.o \ | x86/fmtconvert_init.o \ | ||||
| OBJS-$(CONFIG_AC3DSP) += x86/ac3dsp_init.o | OBJS-$(CONFIG_AC3DSP) += x86/ac3dsp_init.o | ||||
| OBJS-$(CONFIG_AUDIODSP) += x86/audiodsp_init.o | |||||
| OBJS-$(CONFIG_DCT) += x86/dct_init.o | OBJS-$(CONFIG_DCT) += x86/dct_init.o | ||||
| OBJS-$(CONFIG_DSPUTIL) += x86/dsputil_init.o | OBJS-$(CONFIG_DSPUTIL) += x86/dsputil_init.o | ||||
| OBJS-$(CONFIG_ENCODERS) += x86/dsputilenc_mmx.o \ | OBJS-$(CONFIG_ENCODERS) += x86/dsputilenc_mmx.o \ | ||||
| @@ -44,6 +45,7 @@ OBJS-$(CONFIG_VP7_DECODER) += x86/vp8dsp_init.o | |||||
| OBJS-$(CONFIG_VP8_DECODER) += x86/vp8dsp_init.o | OBJS-$(CONFIG_VP8_DECODER) += x86/vp8dsp_init.o | ||||
| OBJS-$(CONFIG_VP9_DECODER) += x86/vp9dsp_init.o | OBJS-$(CONFIG_VP9_DECODER) += x86/vp9dsp_init.o | ||||
| MMX-OBJS-$(CONFIG_AUDIODSP) += x86/audiodsp_mmx.o | |||||
| MMX-OBJS-$(CONFIG_BLOCKDSP) += x86/blockdsp_mmx.o | MMX-OBJS-$(CONFIG_BLOCKDSP) += x86/blockdsp_mmx.o | ||||
| MMX-OBJS-$(CONFIG_DSPUTIL) += x86/dsputil_mmx.o \ | MMX-OBJS-$(CONFIG_DSPUTIL) += x86/dsputil_mmx.o \ | ||||
| x86/idct_mmx_xvid.o \ | x86/idct_mmx_xvid.o \ | ||||
| @@ -61,6 +63,7 @@ YASM-OBJS += x86/deinterlace.o \ | |||||
| x86/fmtconvert.o \ | x86/fmtconvert.o \ | ||||
| YASM-OBJS-$(CONFIG_AC3DSP) += x86/ac3dsp.o | YASM-OBJS-$(CONFIG_AC3DSP) += x86/ac3dsp.o | ||||
| YASM-OBJS-$(CONFIG_AUDIODSP) += x86/audiodsp.o | |||||
| YASM-OBJS-$(CONFIG_DCT) += x86/dct32.o | YASM-OBJS-$(CONFIG_DCT) += x86/dct32.o | ||||
| YASM-OBJS-$(CONFIG_DNXHD_ENCODER) += x86/dnxhdenc.o | YASM-OBJS-$(CONFIG_DNXHD_ENCODER) += x86/dnxhdenc.o | ||||
| YASM-OBJS-$(CONFIG_DSPUTIL) += x86/dsputil.o | YASM-OBJS-$(CONFIG_DSPUTIL) += x86/dsputil.o | ||||
| @@ -0,0 +1,137 @@ | |||||
| ;****************************************************************************** | |||||
| ;* optimized audio functions | |||||
| ;* Copyright (c) 2008 Loren Merritt | |||||
| ;* | |||||
| ;* This file is part of Libav. | |||||
| ;* | |||||
| ;* Libav is free software; you can redistribute it and/or | |||||
| ;* modify it under the terms of the GNU Lesser General Public | |||||
| ;* License as published by the Free Software Foundation; either | |||||
| ;* version 2.1 of the License, or (at your option) any later version. | |||||
| ;* | |||||
| ;* Libav is distributed in the hope that it will be useful, | |||||
| ;* but WITHOUT ANY WARRANTY; without even the implied warranty of | |||||
| ;* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU | |||||
| ;* Lesser General Public License for more details. | |||||
| ;* | |||||
| ;* You should have received a copy of the GNU Lesser General Public | |||||
| ;* License along with Libav; if not, write to the Free Software | |||||
| ;* Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA | |||||
| ;****************************************************************************** | |||||
| %include "libavutil/x86/x86util.asm" | |||||
| SECTION_TEXT | |||||
| %macro SCALARPRODUCT 0 | |||||
| ; int ff_scalarproduct_int16(int16_t *v1, int16_t *v2, int order) | |||||
| cglobal scalarproduct_int16, 3,3,3, v1, v2, order | |||||
| shl orderq, 1 | |||||
| add v1q, orderq | |||||
| add v2q, orderq | |||||
| neg orderq | |||||
| pxor m2, m2 | |||||
| .loop: | |||||
| movu m0, [v1q + orderq] | |||||
| movu m1, [v1q + orderq + mmsize] | |||||
| pmaddwd m0, [v2q + orderq] | |||||
| pmaddwd m1, [v2q + orderq + mmsize] | |||||
| paddd m2, m0 | |||||
| paddd m2, m1 | |||||
| add orderq, mmsize*2 | |||||
| jl .loop | |||||
| %if mmsize == 16 | |||||
| movhlps m0, m2 | |||||
| paddd m2, m0 | |||||
| pshuflw m0, m2, 0x4e | |||||
| %else | |||||
| pshufw m0, m2, 0x4e | |||||
| %endif | |||||
| paddd m2, m0 | |||||
| movd eax, m2 | |||||
| RET | |||||
| %endmacro | |||||
| INIT_MMX mmxext | |||||
| SCALARPRODUCT | |||||
| INIT_XMM sse2 | |||||
| SCALARPRODUCT | |||||
| ;----------------------------------------------------------------------------- | |||||
| ; void ff_vector_clip_int32(int32_t *dst, const int32_t *src, int32_t min, | |||||
| ; int32_t max, unsigned int len) | |||||
| ;----------------------------------------------------------------------------- | |||||
| ; %1 = number of xmm registers used | |||||
| ; %2 = number of inline load/process/store loops per asm loop | |||||
| ; %3 = process 4*mmsize (%3=0) or 8*mmsize (%3=1) bytes per loop | |||||
| ; %4 = CLIPD function takes min/max as float instead of int (CLIPD_SSE2) | |||||
| ; %5 = suffix | |||||
| %macro VECTOR_CLIP_INT32 4-5 | |||||
| cglobal vector_clip_int32%5, 5,5,%1, dst, src, min, max, len | |||||
| %if %4 | |||||
| cvtsi2ss m4, minm | |||||
| cvtsi2ss m5, maxm | |||||
| %else | |||||
| movd m4, minm | |||||
| movd m5, maxm | |||||
| %endif | |||||
| SPLATD m4 | |||||
| SPLATD m5 | |||||
| .loop: | |||||
| %assign %%i 1 | |||||
| %rep %2 | |||||
| mova m0, [srcq+mmsize*0*%%i] | |||||
| mova m1, [srcq+mmsize*1*%%i] | |||||
| mova m2, [srcq+mmsize*2*%%i] | |||||
| mova m3, [srcq+mmsize*3*%%i] | |||||
| %if %3 | |||||
| mova m7, [srcq+mmsize*4*%%i] | |||||
| mova m8, [srcq+mmsize*5*%%i] | |||||
| mova m9, [srcq+mmsize*6*%%i] | |||||
| mova m10, [srcq+mmsize*7*%%i] | |||||
| %endif | |||||
| CLIPD m0, m4, m5, m6 | |||||
| CLIPD m1, m4, m5, m6 | |||||
| CLIPD m2, m4, m5, m6 | |||||
| CLIPD m3, m4, m5, m6 | |||||
| %if %3 | |||||
| CLIPD m7, m4, m5, m6 | |||||
| CLIPD m8, m4, m5, m6 | |||||
| CLIPD m9, m4, m5, m6 | |||||
| CLIPD m10, m4, m5, m6 | |||||
| %endif | |||||
| mova [dstq+mmsize*0*%%i], m0 | |||||
| mova [dstq+mmsize*1*%%i], m1 | |||||
| mova [dstq+mmsize*2*%%i], m2 | |||||
| mova [dstq+mmsize*3*%%i], m3 | |||||
| %if %3 | |||||
| mova [dstq+mmsize*4*%%i], m7 | |||||
| mova [dstq+mmsize*5*%%i], m8 | |||||
| mova [dstq+mmsize*6*%%i], m9 | |||||
| mova [dstq+mmsize*7*%%i], m10 | |||||
| %endif | |||||
| %assign %%i %%i+1 | |||||
| %endrep | |||||
| add srcq, mmsize*4*(%2+%3) | |||||
| add dstq, mmsize*4*(%2+%3) | |||||
| sub lend, mmsize*(%2+%3) | |||||
| jg .loop | |||||
| REP_RET | |||||
| %endmacro | |||||
| INIT_MMX mmx | |||||
| %define CLIPD CLIPD_MMX | |||||
| VECTOR_CLIP_INT32 0, 1, 0, 0 | |||||
| INIT_XMM sse2 | |||||
| VECTOR_CLIP_INT32 6, 1, 0, 0, _int | |||||
| %define CLIPD CLIPD_SSE2 | |||||
| VECTOR_CLIP_INT32 6, 2, 0, 1 | |||||
| INIT_XMM sse4 | |||||
| %define CLIPD CLIPD_SSE41 | |||||
| %ifdef m8 | |||||
| VECTOR_CLIP_INT32 11, 1, 1, 0 | |||||
| %else | |||||
| VECTOR_CLIP_INT32 6, 1, 0, 0 | |||||
| %endif | |||||
| @@ -0,0 +1,25 @@ | |||||
| /* | |||||
| * This file is part of Libav. | |||||
| * | |||||
| * Libav is free software; you can redistribute it and/or | |||||
| * modify it under the terms of the GNU Lesser General Public | |||||
| * License as published by the Free Software Foundation; either | |||||
| * version 2.1 of the License, or (at your option) any later version. | |||||
| * | |||||
| * Libav is distributed in the hope that it will be useful, | |||||
| * but WITHOUT ANY WARRANTY; without even the implied warranty of | |||||
| * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU | |||||
| * Lesser General Public License for more details. | |||||
| * | |||||
| * You should have received a copy of the GNU Lesser General Public | |||||
| * License along with Libav; if not, write to the Free Software | |||||
| * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA | |||||
| */ | |||||
| #ifndef AVCODEC_X86_AUDIODSP_H | |||||
| #define AVCODEC_X86_AUDIODSP_H | |||||
| void ff_vector_clipf_sse(float *dst, const float *src, | |||||
| float min, float max, int len); | |||||
| #endif /* AVCODEC_X86_AUDIODSP_H */ | |||||
| @@ -0,0 +1,66 @@ | |||||
| /* | |||||
| * This file is part of Libav. | |||||
| * | |||||
| * Libav is free software; you can redistribute it and/or | |||||
| * modify it under the terms of the GNU Lesser General Public | |||||
| * License as published by the Free Software Foundation; either | |||||
| * version 2.1 of the License, or (at your option) any later version. | |||||
| * | |||||
| * Libav is distributed in the hope that it will be useful, | |||||
| * but WITHOUT ANY WARRANTY; without even the implied warranty of | |||||
| * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU | |||||
| * Lesser General Public License for more details. | |||||
| * | |||||
| * You should have received a copy of the GNU Lesser General Public | |||||
| * License along with Libav; if not, write to the Free Software | |||||
| * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA | |||||
| */ | |||||
| #include <stdint.h> | |||||
| #include "config.h" | |||||
| #include "libavutil/attributes.h" | |||||
| #include "libavutil/cpu.h" | |||||
| #include "libavutil/x86/asm.h" | |||||
| #include "libavutil/x86/cpu.h" | |||||
| #include "libavcodec/audiodsp.h" | |||||
| #include "audiodsp.h" | |||||
| int32_t ff_scalarproduct_int16_mmxext(const int16_t *v1, const int16_t *v2, | |||||
| int order); | |||||
| int32_t ff_scalarproduct_int16_sse2(const int16_t *v1, const int16_t *v2, | |||||
| int order); | |||||
| void ff_vector_clip_int32_mmx(int32_t *dst, const int32_t *src, | |||||
| int32_t min, int32_t max, unsigned int len); | |||||
| void ff_vector_clip_int32_sse2(int32_t *dst, const int32_t *src, | |||||
| int32_t min, int32_t max, unsigned int len); | |||||
| void ff_vector_clip_int32_int_sse2(int32_t *dst, const int32_t *src, | |||||
| int32_t min, int32_t max, unsigned int len); | |||||
| void ff_vector_clip_int32_sse4(int32_t *dst, const int32_t *src, | |||||
| int32_t min, int32_t max, unsigned int len); | |||||
| av_cold void ff_audiodsp_init_x86(AudioDSPContext *c) | |||||
| { | |||||
| int cpu_flags = av_get_cpu_flags(); | |||||
| if (EXTERNAL_MMX(cpu_flags)) | |||||
| c->vector_clip_int32 = ff_vector_clip_int32_mmx; | |||||
| if (EXTERNAL_MMXEXT(cpu_flags)) | |||||
| c->scalarproduct_int16 = ff_scalarproduct_int16_mmxext; | |||||
| if (INLINE_SSE(cpu_flags)) | |||||
| c->vector_clipf = ff_vector_clipf_sse; | |||||
| if (EXTERNAL_SSE2(cpu_flags)) { | |||||
| c->scalarproduct_int16 = ff_scalarproduct_int16_sse2; | |||||
| if (cpu_flags & AV_CPU_FLAG_ATOM) | |||||
| c->vector_clip_int32 = ff_vector_clip_int32_int_sse2; | |||||
| else | |||||
| c->vector_clip_int32 = ff_vector_clip_int32_sse2; | |||||
| } | |||||
| if (EXTERNAL_SSE4(cpu_flags)) | |||||
| c->vector_clip_int32 = ff_vector_clip_int32_sse4; | |||||
| } | |||||
| @@ -0,0 +1,58 @@ | |||||
| /* | |||||
| * This file is part of Libav. | |||||
| * | |||||
| * Libav is free software; you can redistribute it and/or | |||||
| * modify it under the terms of the GNU Lesser General Public | |||||
| * License as published by the Free Software Foundation; either | |||||
| * version 2.1 of the License, or (at your option) any later version. | |||||
| * | |||||
| * Libav is distributed in the hope that it will be useful, | |||||
| * but WITHOUT ANY WARRANTY; without even the implied warranty of | |||||
| * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU | |||||
| * Lesser General Public License for more details. | |||||
| * | |||||
| * You should have received a copy of the GNU Lesser General Public | |||||
| * License along with Libav; if not, write to the Free Software | |||||
| * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA | |||||
| */ | |||||
| #include "config.h" | |||||
| #include "libavutil/x86/asm.h" | |||||
| #include "audiodsp.h" | |||||
| #if HAVE_INLINE_ASM | |||||
| void ff_vector_clipf_sse(float *dst, const float *src, | |||||
| float min, float max, int len) | |||||
| { | |||||
| x86_reg i = (len - 16) * 4; | |||||
| __asm__ volatile ( | |||||
| "movss %3, %%xmm4 \n\t" | |||||
| "movss %4, %%xmm5 \n\t" | |||||
| "shufps $0, %%xmm4, %%xmm4 \n\t" | |||||
| "shufps $0, %%xmm5, %%xmm5 \n\t" | |||||
| "1: \n\t" | |||||
| "movaps (%2, %0), %%xmm0 \n\t" // 3/1 on intel | |||||
| "movaps 16(%2, %0), %%xmm1 \n\t" | |||||
| "movaps 32(%2, %0), %%xmm2 \n\t" | |||||
| "movaps 48(%2, %0), %%xmm3 \n\t" | |||||
| "maxps %%xmm4, %%xmm0 \n\t" | |||||
| "maxps %%xmm4, %%xmm1 \n\t" | |||||
| "maxps %%xmm4, %%xmm2 \n\t" | |||||
| "maxps %%xmm4, %%xmm3 \n\t" | |||||
| "minps %%xmm5, %%xmm0 \n\t" | |||||
| "minps %%xmm5, %%xmm1 \n\t" | |||||
| "minps %%xmm5, %%xmm2 \n\t" | |||||
| "minps %%xmm5, %%xmm3 \n\t" | |||||
| "movaps %%xmm0, (%1, %0) \n\t" | |||||
| "movaps %%xmm1, 16(%1, %0) \n\t" | |||||
| "movaps %%xmm2, 32(%1, %0) \n\t" | |||||
| "movaps %%xmm3, 48(%1, %0) \n\t" | |||||
| "sub $64, %0 \n\t" | |||||
| "jge 1b \n\t" | |||||
| : "+&r" (i) | |||||
| : "r" (dst), "r" (src), "m" (min), "m" (max) | |||||
| : "memory"); | |||||
| } | |||||
| #endif /* HAVE_INLINE_ASM */ | |||||
| @@ -26,119 +26,6 @@ pb_bswap32: db 3, 2, 1, 0, 7, 6, 5, 4, 11, 10, 9, 8, 15, 14, 13, 12 | |||||
| SECTION_TEXT | SECTION_TEXT | ||||
| %macro SCALARPRODUCT 0 | |||||
| ; int ff_scalarproduct_int16(int16_t *v1, int16_t *v2, int order) | |||||
| cglobal scalarproduct_int16, 3,3,3, v1, v2, order | |||||
| shl orderq, 1 | |||||
| add v1q, orderq | |||||
| add v2q, orderq | |||||
| neg orderq | |||||
| pxor m2, m2 | |||||
| .loop: | |||||
| movu m0, [v1q + orderq] | |||||
| movu m1, [v1q + orderq + mmsize] | |||||
| pmaddwd m0, [v2q + orderq] | |||||
| pmaddwd m1, [v2q + orderq + mmsize] | |||||
| paddd m2, m0 | |||||
| paddd m2, m1 | |||||
| add orderq, mmsize*2 | |||||
| jl .loop | |||||
| %if mmsize == 16 | |||||
| movhlps m0, m2 | |||||
| paddd m2, m0 | |||||
| pshuflw m0, m2, 0x4e | |||||
| %else | |||||
| pshufw m0, m2, 0x4e | |||||
| %endif | |||||
| paddd m2, m0 | |||||
| movd eax, m2 | |||||
| RET | |||||
| %endmacro | |||||
| INIT_MMX mmxext | |||||
| SCALARPRODUCT | |||||
| INIT_XMM sse2 | |||||
| SCALARPRODUCT | |||||
| ;----------------------------------------------------------------------------- | |||||
| ; void ff_vector_clip_int32(int32_t *dst, const int32_t *src, int32_t min, | |||||
| ; int32_t max, unsigned int len) | |||||
| ;----------------------------------------------------------------------------- | |||||
| ; %1 = number of xmm registers used | |||||
| ; %2 = number of inline load/process/store loops per asm loop | |||||
| ; %3 = process 4*mmsize (%3=0) or 8*mmsize (%3=1) bytes per loop | |||||
| ; %4 = CLIPD function takes min/max as float instead of int (CLIPD_SSE2) | |||||
| ; %5 = suffix | |||||
| %macro VECTOR_CLIP_INT32 4-5 | |||||
| cglobal vector_clip_int32%5, 5,5,%1, dst, src, min, max, len | |||||
| %if %4 | |||||
| cvtsi2ss m4, minm | |||||
| cvtsi2ss m5, maxm | |||||
| %else | |||||
| movd m4, minm | |||||
| movd m5, maxm | |||||
| %endif | |||||
| SPLATD m4 | |||||
| SPLATD m5 | |||||
| .loop: | |||||
| %assign %%i 1 | |||||
| %rep %2 | |||||
| mova m0, [srcq+mmsize*0*%%i] | |||||
| mova m1, [srcq+mmsize*1*%%i] | |||||
| mova m2, [srcq+mmsize*2*%%i] | |||||
| mova m3, [srcq+mmsize*3*%%i] | |||||
| %if %3 | |||||
| mova m7, [srcq+mmsize*4*%%i] | |||||
| mova m8, [srcq+mmsize*5*%%i] | |||||
| mova m9, [srcq+mmsize*6*%%i] | |||||
| mova m10, [srcq+mmsize*7*%%i] | |||||
| %endif | |||||
| CLIPD m0, m4, m5, m6 | |||||
| CLIPD m1, m4, m5, m6 | |||||
| CLIPD m2, m4, m5, m6 | |||||
| CLIPD m3, m4, m5, m6 | |||||
| %if %3 | |||||
| CLIPD m7, m4, m5, m6 | |||||
| CLIPD m8, m4, m5, m6 | |||||
| CLIPD m9, m4, m5, m6 | |||||
| CLIPD m10, m4, m5, m6 | |||||
| %endif | |||||
| mova [dstq+mmsize*0*%%i], m0 | |||||
| mova [dstq+mmsize*1*%%i], m1 | |||||
| mova [dstq+mmsize*2*%%i], m2 | |||||
| mova [dstq+mmsize*3*%%i], m3 | |||||
| %if %3 | |||||
| mova [dstq+mmsize*4*%%i], m7 | |||||
| mova [dstq+mmsize*5*%%i], m8 | |||||
| mova [dstq+mmsize*6*%%i], m9 | |||||
| mova [dstq+mmsize*7*%%i], m10 | |||||
| %endif | |||||
| %assign %%i %%i+1 | |||||
| %endrep | |||||
| add srcq, mmsize*4*(%2+%3) | |||||
| add dstq, mmsize*4*(%2+%3) | |||||
| sub lend, mmsize*(%2+%3) | |||||
| jg .loop | |||||
| REP_RET | |||||
| %endmacro | |||||
| INIT_MMX mmx | |||||
| %define CLIPD CLIPD_MMX | |||||
| VECTOR_CLIP_INT32 0, 1, 0, 0 | |||||
| INIT_XMM sse2 | |||||
| VECTOR_CLIP_INT32 6, 1, 0, 0, _int | |||||
| %define CLIPD CLIPD_SSE2 | |||||
| VECTOR_CLIP_INT32 6, 2, 0, 1 | |||||
| INIT_XMM sse4 | |||||
| %define CLIPD CLIPD_SSE41 | |||||
| %ifdef m8 | |||||
| VECTOR_CLIP_INT32 11, 1, 1, 0 | |||||
| %else | |||||
| VECTOR_CLIP_INT32 6, 1, 0, 0 | |||||
| %endif | |||||
| ; %1 = aligned/unaligned | ; %1 = aligned/unaligned | ||||
| %macro BSWAP_LOOPS 1 | %macro BSWAP_LOOPS 1 | ||||
| mov r3, r2 | mov r3, r2 | ||||
| @@ -26,23 +26,9 @@ | |||||
| #include "dsputil_x86.h" | #include "dsputil_x86.h" | ||||
| #include "idct_xvid.h" | #include "idct_xvid.h" | ||||
| int32_t ff_scalarproduct_int16_mmxext(const int16_t *v1, const int16_t *v2, | |||||
| int order); | |||||
| int32_t ff_scalarproduct_int16_sse2(const int16_t *v1, const int16_t *v2, | |||||
| int order); | |||||
| void ff_bswap32_buf_ssse3(uint32_t *dst, const uint32_t *src, int w); | void ff_bswap32_buf_ssse3(uint32_t *dst, const uint32_t *src, int w); | ||||
| void ff_bswap32_buf_sse2(uint32_t *dst, const uint32_t *src, int w); | void ff_bswap32_buf_sse2(uint32_t *dst, const uint32_t *src, int w); | ||||
| void ff_vector_clip_int32_mmx(int32_t *dst, const int32_t *src, | |||||
| int32_t min, int32_t max, unsigned int len); | |||||
| void ff_vector_clip_int32_sse2(int32_t *dst, const int32_t *src, | |||||
| int32_t min, int32_t max, unsigned int len); | |||||
| void ff_vector_clip_int32_int_sse2(int32_t *dst, const int32_t *src, | |||||
| int32_t min, int32_t max, unsigned int len); | |||||
| void ff_vector_clip_int32_sse4(int32_t *dst, const int32_t *src, | |||||
| int32_t min, int32_t max, unsigned int len); | |||||
| static av_cold void dsputil_init_mmx(DSPContext *c, AVCodecContext *avctx, | static av_cold void dsputil_init_mmx(DSPContext *c, AVCodecContext *avctx, | ||||
| int cpu_flags, unsigned high_bit_depth) | int cpu_flags, unsigned high_bit_depth) | ||||
| { | { | ||||
| @@ -72,10 +58,6 @@ static av_cold void dsputil_init_mmx(DSPContext *c, AVCodecContext *avctx, | |||||
| c->gmc = ff_gmc_mmx; | c->gmc = ff_gmc_mmx; | ||||
| #endif /* HAVE_MMX_INLINE */ | #endif /* HAVE_MMX_INLINE */ | ||||
| #if HAVE_MMX_EXTERNAL | |||||
| c->vector_clip_int32 = ff_vector_clip_int32_mmx; | |||||
| #endif /* HAVE_MMX_EXTERNAL */ | |||||
| } | } | ||||
| static av_cold void dsputil_init_mmxext(DSPContext *c, AVCodecContext *avctx, | static av_cold void dsputil_init_mmxext(DSPContext *c, AVCodecContext *avctx, | ||||
| @@ -88,18 +70,6 @@ static av_cold void dsputil_init_mmxext(DSPContext *c, AVCodecContext *avctx, | |||||
| c->idct = ff_idct_xvid_mmxext; | c->idct = ff_idct_xvid_mmxext; | ||||
| } | } | ||||
| #endif /* HAVE_MMXEXT_INLINE */ | #endif /* HAVE_MMXEXT_INLINE */ | ||||
| #if HAVE_MMXEXT_EXTERNAL | |||||
| c->scalarproduct_int16 = ff_scalarproduct_int16_mmxext; | |||||
| #endif /* HAVE_MMXEXT_EXTERNAL */ | |||||
| } | |||||
| static av_cold void dsputil_init_sse(DSPContext *c, AVCodecContext *avctx, | |||||
| int cpu_flags, unsigned high_bit_depth) | |||||
| { | |||||
| #if HAVE_SSE_INLINE | |||||
| c->vector_clipf = ff_vector_clipf_sse; | |||||
| #endif /* HAVE_SSE_INLINE */ | |||||
| } | } | ||||
| static av_cold void dsputil_init_sse2(DSPContext *c, AVCodecContext *avctx, | static av_cold void dsputil_init_sse2(DSPContext *c, AVCodecContext *avctx, | ||||
| @@ -115,12 +85,6 @@ static av_cold void dsputil_init_sse2(DSPContext *c, AVCodecContext *avctx, | |||||
| #endif /* HAVE_SSE2_INLINE */ | #endif /* HAVE_SSE2_INLINE */ | ||||
| #if HAVE_SSE2_EXTERNAL | #if HAVE_SSE2_EXTERNAL | ||||
| c->scalarproduct_int16 = ff_scalarproduct_int16_sse2; | |||||
| if (cpu_flags & AV_CPU_FLAG_ATOM) { | |||||
| c->vector_clip_int32 = ff_vector_clip_int32_int_sse2; | |||||
| } else { | |||||
| c->vector_clip_int32 = ff_vector_clip_int32_sse2; | |||||
| } | |||||
| c->bswap_buf = ff_bswap32_buf_sse2; | c->bswap_buf = ff_bswap32_buf_sse2; | ||||
| #endif /* HAVE_SSE2_EXTERNAL */ | #endif /* HAVE_SSE2_EXTERNAL */ | ||||
| } | } | ||||
| @@ -133,14 +97,6 @@ static av_cold void dsputil_init_ssse3(DSPContext *c, AVCodecContext *avctx, | |||||
| #endif /* HAVE_SSSE3_EXTERNAL */ | #endif /* HAVE_SSSE3_EXTERNAL */ | ||||
| } | } | ||||
| static av_cold void dsputil_init_sse4(DSPContext *c, AVCodecContext *avctx, | |||||
| int cpu_flags, unsigned high_bit_depth) | |||||
| { | |||||
| #if HAVE_SSE4_EXTERNAL | |||||
| c->vector_clip_int32 = ff_vector_clip_int32_sse4; | |||||
| #endif /* HAVE_SSE4_EXTERNAL */ | |||||
| } | |||||
| av_cold void ff_dsputil_init_x86(DSPContext *c, AVCodecContext *avctx, | av_cold void ff_dsputil_init_x86(DSPContext *c, AVCodecContext *avctx, | ||||
| unsigned high_bit_depth) | unsigned high_bit_depth) | ||||
| { | { | ||||
| @@ -152,18 +108,12 @@ av_cold void ff_dsputil_init_x86(DSPContext *c, AVCodecContext *avctx, | |||||
| if (X86_MMXEXT(cpu_flags)) | if (X86_MMXEXT(cpu_flags)) | ||||
| dsputil_init_mmxext(c, avctx, cpu_flags, high_bit_depth); | dsputil_init_mmxext(c, avctx, cpu_flags, high_bit_depth); | ||||
| if (X86_SSE(cpu_flags)) | |||||
| dsputil_init_sse(c, avctx, cpu_flags, high_bit_depth); | |||||
| if (X86_SSE2(cpu_flags)) | if (X86_SSE2(cpu_flags)) | ||||
| dsputil_init_sse2(c, avctx, cpu_flags, high_bit_depth); | dsputil_init_sse2(c, avctx, cpu_flags, high_bit_depth); | ||||
| if (EXTERNAL_SSSE3(cpu_flags)) | if (EXTERNAL_SSSE3(cpu_flags)) | ||||
| dsputil_init_ssse3(c, avctx, cpu_flags, high_bit_depth); | dsputil_init_ssse3(c, avctx, cpu_flags, high_bit_depth); | ||||
| if (EXTERNAL_SSE4(cpu_flags)) | |||||
| dsputil_init_sse4(c, avctx, cpu_flags, high_bit_depth); | |||||
| if (CONFIG_ENCODERS) | if (CONFIG_ENCODERS) | ||||
| ff_dsputilenc_init_mmx(c, avctx, high_bit_depth); | ff_dsputilenc_init_mmx(c, avctx, high_bit_depth); | ||||
| } | } | ||||
| @@ -25,7 +25,6 @@ | |||||
| #include "config.h" | #include "config.h" | ||||
| #include "libavutil/cpu.h" | #include "libavutil/cpu.h" | ||||
| #include "libavutil/x86/asm.h" | #include "libavutil/x86/asm.h" | ||||
| #include "constants.h" | |||||
| #include "dsputil_x86.h" | #include "dsputil_x86.h" | ||||
| #include "inline_asm.h" | #include "inline_asm.h" | ||||
| @@ -375,37 +374,4 @@ void ff_gmc_mmx(uint8_t *dst, uint8_t *src, | |||||
| } | } | ||||
| } | } | ||||
| void ff_vector_clipf_sse(float *dst, const float *src, | |||||
| float min, float max, int len) | |||||
| { | |||||
| x86_reg i = (len - 16) * 4; | |||||
| __asm__ volatile ( | |||||
| "movss %3, %%xmm4 \n\t" | |||||
| "movss %4, %%xmm5 \n\t" | |||||
| "shufps $0, %%xmm4, %%xmm4 \n\t" | |||||
| "shufps $0, %%xmm5, %%xmm5 \n\t" | |||||
| "1: \n\t" | |||||
| "movaps (%2, %0), %%xmm0 \n\t" // 3/1 on intel | |||||
| "movaps 16(%2, %0), %%xmm1 \n\t" | |||||
| "movaps 32(%2, %0), %%xmm2 \n\t" | |||||
| "movaps 48(%2, %0), %%xmm3 \n\t" | |||||
| "maxps %%xmm4, %%xmm0 \n\t" | |||||
| "maxps %%xmm4, %%xmm1 \n\t" | |||||
| "maxps %%xmm4, %%xmm2 \n\t" | |||||
| "maxps %%xmm4, %%xmm3 \n\t" | |||||
| "minps %%xmm5, %%xmm0 \n\t" | |||||
| "minps %%xmm5, %%xmm1 \n\t" | |||||
| "minps %%xmm5, %%xmm2 \n\t" | |||||
| "minps %%xmm5, %%xmm3 \n\t" | |||||
| "movaps %%xmm0, (%1, %0) \n\t" | |||||
| "movaps %%xmm1, 16(%1, %0) \n\t" | |||||
| "movaps %%xmm2, 32(%1, %0) \n\t" | |||||
| "movaps %%xmm3, 48(%1, %0) \n\t" | |||||
| "sub $64, %0 \n\t" | |||||
| "jge 1b \n\t" | |||||
| : "+&r" (i) | |||||
| : "r" (dst), "r" (src), "m" (min), "m" (max) | |||||
| : "memory"); | |||||
| } | |||||
| #endif /* HAVE_INLINE_ASM */ | #endif /* HAVE_INLINE_ASM */ | ||||
| @@ -46,7 +46,4 @@ void ff_gmc_mmx(uint8_t *dst, uint8_t *src, | |||||
| int dxx, int dxy, int dyx, int dyy, | int dxx, int dxy, int dyx, int dyy, | ||||
| int shift, int r, int width, int height); | int shift, int r, int width, int height); | ||||
| void ff_vector_clipf_sse(float *dst, const float *src, | |||||
| float min, float max, int len); | |||||
| #endif /* AVCODEC_X86_DSPUTIL_X86_H */ | #endif /* AVCODEC_X86_DSPUTIL_X86_H */ | ||||