This is required for SIMD optimisations. Signed-off-by: Mans Rullgard <mans@mansr.com>tags/n0.11
| @@ -27,6 +27,7 @@ | |||||
| #include "aacps.h" | #include "aacps.h" | ||||
| #include "aacps_tablegen.h" | #include "aacps_tablegen.h" | ||||
| #include "aacpsdata.c" | #include "aacpsdata.c" | ||||
| #include "dsputil.h" | |||||
| #define PS_BASELINE 0 ///< Operate in Baseline PS mode | #define PS_BASELINE 0 ///< Operate in Baseline PS mode | ||||
| ///< Baseline implies 10 or 20 stereo bands, | ///< Baseline implies 10 or 20 stereo bands, | ||||
| @@ -284,7 +285,7 @@ err: | |||||
| /** Split one subband into 2 subsubbands with a symmetric real filter. | /** Split one subband into 2 subsubbands with a symmetric real filter. | ||||
| * The filter must have its non-center even coefficients equal to zero. */ | * The filter must have its non-center even coefficients equal to zero. */ | ||||
| static void hybrid2_re(float (*in)[2], float (*out)[32][2], const float filter[7], int len, int reverse) | |||||
| static void hybrid2_re(float (*in)[2], float (*out)[32][2], const float filter[8], int len, int reverse) | |||||
| { | { | ||||
| int i, j; | int i, j; | ||||
| for (i = 0; i < len; i++, in++) { | for (i = 0; i < len; i++, in++) { | ||||
| @@ -304,11 +305,11 @@ static void hybrid2_re(float (*in)[2], float (*out)[32][2], const float filter[7 | |||||
| } | } | ||||
| /** Split one subband into 6 subsubbands with a complex filter */ | /** Split one subband into 6 subsubbands with a complex filter */ | ||||
| static void hybrid6_cx(PSDSPContext *dsp, float (*in)[2], float (*out)[32][2], const float (*filter)[7][2], int len) | |||||
| static void hybrid6_cx(PSDSPContext *dsp, float (*in)[2], float (*out)[32][2], const float (*filter)[8][2], int len) | |||||
| { | { | ||||
| int i; | int i; | ||||
| int N = 8; | int N = 8; | ||||
| float temp[8][2]; | |||||
| LOCAL_ALIGNED_16(float, temp, [8], [2]); | |||||
| for (i = 0; i < len; i++, in++) { | for (i = 0; i < len; i++, in++) { | ||||
| dsp->hybrid_analysis(temp, in, filter, 1, N); | dsp->hybrid_analysis(temp, in, filter, 1, N); | ||||
| @@ -327,7 +328,7 @@ static void hybrid6_cx(PSDSPContext *dsp, float (*in)[2], float (*out)[32][2], c | |||||
| } | } | ||||
| } | } | ||||
| static void hybrid4_8_12_cx(PSDSPContext *dsp, float (*in)[2], float (*out)[32][2], const float (*filter)[7][2], int N, int len) | |||||
| static void hybrid4_8_12_cx(PSDSPContext *dsp, float (*in)[2], float (*out)[32][2], const float (*filter)[8][2], int N, int len) | |||||
| { | { | ||||
| int i; | int i; | ||||
| @@ -607,8 +608,8 @@ static void map_val_20_to_34(float par[PS_MAX_NR_IIDICC]) | |||||
| static void decorrelation(PSContext *ps, float (*out)[32][2], const float (*s)[32][2], int is34) | static void decorrelation(PSContext *ps, float (*out)[32][2], const float (*s)[32][2], int is34) | ||||
| { | { | ||||
| float power[34][PS_QMF_TIME_SLOTS] = {{0}}; | |||||
| float transient_gain[34][PS_QMF_TIME_SLOTS]; | |||||
| LOCAL_ALIGNED_16(float, power, [34], [PS_QMF_TIME_SLOTS]); | |||||
| LOCAL_ALIGNED_16(float, transient_gain, [34], [PS_QMF_TIME_SLOTS]); | |||||
| float *peak_decay_nrg = ps->peak_decay_nrg; | float *peak_decay_nrg = ps->peak_decay_nrg; | ||||
| float *power_smooth = ps->power_smooth; | float *power_smooth = ps->power_smooth; | ||||
| float *peak_decay_diff_smooth = ps->peak_decay_diff_smooth; | float *peak_decay_diff_smooth = ps->peak_decay_diff_smooth; | ||||
| @@ -621,6 +622,8 @@ static void decorrelation(PSContext *ps, float (*out)[32][2], const float (*s)[3 | |||||
| int i, k, m, n; | int i, k, m, n; | ||||
| int n0 = 0, nL = 32; | int n0 = 0, nL = 32; | ||||
| memset(power, 0, 34 * sizeof(*power)); | |||||
| if (is34 != ps->is34bands_old) { | if (is34 != ps->is34bands_old) { | ||||
| memset(ps->peak_decay_nrg, 0, sizeof(ps->peak_decay_nrg)); | memset(ps->peak_decay_nrg, 0, sizeof(ps->peak_decay_nrg)); | ||||
| memset(ps->power_smooth, 0, sizeof(ps->power_smooth)); | memset(ps->power_smooth, 0, sizeof(ps->power_smooth)); | ||||
| @@ -883,8 +886,8 @@ static void stereo_processing(PSContext *ps, float (*l)[32][2], float (*r)[32][2 | |||||
| int ff_ps_apply(AVCodecContext *avctx, PSContext *ps, float L[2][38][64], float R[2][38][64], int top) | int ff_ps_apply(AVCodecContext *avctx, PSContext *ps, float L[2][38][64], float R[2][38][64], int top) | ||||
| { | { | ||||
| float Lbuf[91][32][2]; | |||||
| float Rbuf[91][32][2]; | |||||
| LOCAL_ALIGNED_16(float, Lbuf, [91], [32][2]); | |||||
| LOCAL_ALIGNED_16(float, Rbuf, [91], [32][2]); | |||||
| const int len = 32; | const int len = 32; | ||||
| int is34 = ps->is34bands; | int is34 = ps->is34bands; | ||||
| @@ -61,16 +61,16 @@ typedef struct { | |||||
| int is34bands; | int is34bands; | ||||
| int is34bands_old; | int is34bands_old; | ||||
| float in_buf[5][44][2]; | |||||
| float delay[PS_MAX_SSB][PS_QMF_TIME_SLOTS + PS_MAX_DELAY][2]; | |||||
| float ap_delay[PS_MAX_AP_BANDS][PS_AP_LINKS][PS_QMF_TIME_SLOTS + PS_MAX_AP_DELAY][2]; | |||||
| float peak_decay_nrg[34]; | |||||
| float power_smooth[34]; | |||||
| float peak_decay_diff_smooth[34]; | |||||
| float H11[2][PS_MAX_NUM_ENV+1][PS_MAX_NR_IIDICC]; | |||||
| float H12[2][PS_MAX_NUM_ENV+1][PS_MAX_NR_IIDICC]; | |||||
| float H21[2][PS_MAX_NUM_ENV+1][PS_MAX_NR_IIDICC]; | |||||
| float H22[2][PS_MAX_NUM_ENV+1][PS_MAX_NR_IIDICC]; | |||||
| DECLARE_ALIGNED(16, float, in_buf)[5][44][2]; | |||||
| DECLARE_ALIGNED(16, float, delay)[PS_MAX_SSB][PS_QMF_TIME_SLOTS + PS_MAX_DELAY][2]; | |||||
| DECLARE_ALIGNED(16, float, ap_delay)[PS_MAX_AP_BANDS][PS_AP_LINKS][PS_QMF_TIME_SLOTS + PS_MAX_AP_DELAY][2]; | |||||
| DECLARE_ALIGNED(16, float, peak_decay_nrg)[34]; | |||||
| DECLARE_ALIGNED(16, float, power_smooth)[34]; | |||||
| DECLARE_ALIGNED(16, float, peak_decay_diff_smooth)[34]; | |||||
| DECLARE_ALIGNED(16, float, H11)[2][PS_MAX_NUM_ENV+1][PS_MAX_NR_IIDICC]; | |||||
| DECLARE_ALIGNED(16, float, H12)[2][PS_MAX_NUM_ENV+1][PS_MAX_NR_IIDICC]; | |||||
| DECLARE_ALIGNED(16, float, H21)[2][PS_MAX_NUM_ENV+1][PS_MAX_NR_IIDICC]; | |||||
| DECLARE_ALIGNED(16, float, H22)[2][PS_MAX_NUM_ENV+1][PS_MAX_NR_IIDICC]; | |||||
| int8_t opd_hist[PS_MAX_NR_IIDICC]; | int8_t opd_hist[PS_MAX_NR_IIDICC]; | ||||
| int8_t ipd_hist[PS_MAX_NR_IIDICC]; | int8_t ipd_hist[PS_MAX_NR_IIDICC]; | ||||
| PSDSPContext dsp; | PSDSPContext dsp; | ||||
| @@ -69,23 +69,23 @@ int main(void) | |||||
| write_float_3d_array(HB, 46, 8, 4); | write_float_3d_array(HB, 46, 8, 4); | ||||
| printf("};\n"); | printf("};\n"); | ||||
| printf("static const float f20_0_8[8][7][2] = {\n"); | |||||
| write_float_3d_array(f20_0_8, 8, 7, 2); | |||||
| printf("static const DECLARE_ALIGNED(16, float, f20_0_8)[8][8][2] = {\n"); | |||||
| write_float_3d_array(f20_0_8, 8, 8, 2); | |||||
| printf("};\n"); | printf("};\n"); | ||||
| printf("static const float f34_0_12[12][7][2] = {\n"); | |||||
| write_float_3d_array(f34_0_12, 12, 7, 2); | |||||
| printf("static const DECLARE_ALIGNED(16, float, f34_0_12)[12][8][2] = {\n"); | |||||
| write_float_3d_array(f34_0_12, 12, 8, 2); | |||||
| printf("};\n"); | printf("};\n"); | ||||
| printf("static const float f34_1_8[8][7][2] = {\n"); | |||||
| write_float_3d_array(f34_1_8, 8, 7, 2); | |||||
| printf("static const DECLARE_ALIGNED(16, float, f34_1_8)[8][8][2] = {\n"); | |||||
| write_float_3d_array(f34_1_8, 8, 8, 2); | |||||
| printf("};\n"); | printf("};\n"); | ||||
| printf("static const float f34_2_4[4][7][2] = {\n"); | |||||
| write_float_3d_array(f34_2_4, 4, 7, 2); | |||||
| printf("static const DECLARE_ALIGNED(16, float, f34_2_4)[4][8][2] = {\n"); | |||||
| write_float_3d_array(f34_2_4, 4, 8, 2); | |||||
| printf("};\n"); | printf("};\n"); | ||||
| printf("static const float Q_fract_allpass[2][50][3][2] = {\n"); | |||||
| printf("static const DECLARE_ALIGNED(16, float, Q_fract_allpass)[2][50][3][2] = {\n"); | |||||
| write_float_4d_array(Q_fract_allpass, 2, 50, 3, 2); | write_float_4d_array(Q_fract_allpass, 2, 50, 3, 2); | ||||
| printf("};\n"); | printf("};\n"); | ||||
| printf("static const float phi_fract[2][50][2] = {\n"); | |||||
| printf("static const DECLARE_ALIGNED(16, float, phi_fract)[2][50][2] = {\n"); | |||||
| write_float_3d_array(phi_fract, 2, 50, 2); | write_float_3d_array(phi_fract, 2, 50, 2); | ||||
| printf("};\n"); | printf("};\n"); | ||||
| @@ -31,6 +31,7 @@ | |||||
| #else | #else | ||||
| #include "libavutil/common.h" | #include "libavutil/common.h" | ||||
| #include "libavutil/mathematics.h" | #include "libavutil/mathematics.h" | ||||
| #include "libavutil/mem.h" | |||||
| #define NR_ALLPASS_BANDS20 30 | #define NR_ALLPASS_BANDS20 30 | ||||
| #define NR_ALLPASS_BANDS34 50 | #define NR_ALLPASS_BANDS34 50 | ||||
| #define PS_AP_LINKS 3 | #define PS_AP_LINKS 3 | ||||
| @@ -38,12 +39,12 @@ static float pd_re_smooth[8*8*8]; | |||||
| static float pd_im_smooth[8*8*8]; | static float pd_im_smooth[8*8*8]; | ||||
| static float HA[46][8][4]; | static float HA[46][8][4]; | ||||
| static float HB[46][8][4]; | static float HB[46][8][4]; | ||||
| static float f20_0_8 [ 8][7][2]; | |||||
| static float f34_0_12[12][7][2]; | |||||
| static float f34_1_8 [ 8][7][2]; | |||||
| static float f34_2_4 [ 4][7][2]; | |||||
| static float Q_fract_allpass[2][50][3][2]; | |||||
| static float phi_fract[2][50][2]; | |||||
| static DECLARE_ALIGNED(16, float, f20_0_8) [ 8][8][2]; | |||||
| static DECLARE_ALIGNED(16, float, f34_0_12)[12][8][2]; | |||||
| static DECLARE_ALIGNED(16, float, f34_1_8) [ 8][8][2]; | |||||
| static DECLARE_ALIGNED(16, float, f34_2_4) [ 4][8][2]; | |||||
| static DECLARE_ALIGNED(16, float, Q_fract_allpass)[2][50][3][2]; | |||||
| static DECLARE_ALIGNED(16, float, phi_fract)[2][50][2]; | |||||
| static const float g0_Q8[] = { | static const float g0_Q8[] = { | ||||
| 0.00746082949812f, 0.02270420949825f, 0.04546865930473f, 0.07266113929591f, | 0.00746082949812f, 0.02270420949825f, 0.04546865930473f, 0.07266113929591f, | ||||
| @@ -65,7 +66,7 @@ static const float g2_Q4[] = { | |||||
| 0.16486303567403f, 0.23279856662996f, 0.25f | 0.16486303567403f, 0.23279856662996f, 0.25f | ||||
| }; | }; | ||||
| static void make_filters_from_proto(float (*filter)[7][2], const float *proto, int bands) | |||||
| static void make_filters_from_proto(float (*filter)[8][2], const float *proto, int bands) | |||||
| { | { | ||||
| int q, n; | int q, n; | ||||
| for (q = 0; q < bands; q++) { | for (q = 0; q < bands; q++) { | ||||
| @@ -40,7 +40,7 @@ static void ps_mul_pair_single_c(float (*dst)[2], float (*src0)[2], float *src1, | |||||
| } | } | ||||
| static void ps_hybrid_analysis_c(float (*out)[2], float (*in)[2], | static void ps_hybrid_analysis_c(float (*out)[2], float (*in)[2], | ||||
| const float (*filter)[7][2], | |||||
| const float (*filter)[8][2], | |||||
| int stride, int n) | int stride, int n) | ||||
| { | { | ||||
| int i, j; | int i, j; | ||||
| @@ -30,7 +30,7 @@ typedef struct PSDSPContext { | |||||
| void (*mul_pair_single)(float (*dst)[2], float (*src0)[2], float *src1, | void (*mul_pair_single)(float (*dst)[2], float (*src0)[2], float *src1, | ||||
| int n); | int n); | ||||
| void (*hybrid_analysis)(float (*out)[2], float (*in)[2], | void (*hybrid_analysis)(float (*out)[2], float (*in)[2], | ||||
| const float (*filter)[7][2], | |||||
| const float (*filter)[8][2], | |||||
| int stride, int n); | int stride, int n); | ||||
| void (*hybrid_analysis_ileave)(float (*out)[32][2], float L[2][38][64], | void (*hybrid_analysis_ileave)(float (*out)[32][2], float L[2][38][64], | ||||
| int i, int len); | int i, int len); | ||||