| @@ -46,49 +46,6 @@ static void ac3_exponent_min_c(uint8_t *exp, int num_reuse_blocks, int nb_coefs) | |||
| } | |||
| } | |||
| static int ac3_max_msb_abs_int16_c(const int16_t *src, int len) | |||
| { | |||
| int i, v = 0; | |||
| for (i = 0; i < len; i++) | |||
| v |= abs(src[i]); | |||
| return v; | |||
| } | |||
| static void ac3_lshift_int16_c(int16_t *src, unsigned int len, | |||
| unsigned int shift) | |||
| { | |||
| uint32_t *src32 = (uint32_t *)src; | |||
| const uint32_t mask = ~(((1 << shift) - 1) << 16); | |||
| int i; | |||
| len >>= 1; | |||
| for (i = 0; i < len; i += 8) { | |||
| src32[i ] = (src32[i ] << shift) & mask; | |||
| src32[i+1] = (src32[i+1] << shift) & mask; | |||
| src32[i+2] = (src32[i+2] << shift) & mask; | |||
| src32[i+3] = (src32[i+3] << shift) & mask; | |||
| src32[i+4] = (src32[i+4] << shift) & mask; | |||
| src32[i+5] = (src32[i+5] << shift) & mask; | |||
| src32[i+6] = (src32[i+6] << shift) & mask; | |||
| src32[i+7] = (src32[i+7] << shift) & mask; | |||
| } | |||
| } | |||
| static void ac3_rshift_int32_c(int32_t *src, unsigned int len, | |||
| unsigned int shift) | |||
| { | |||
| do { | |||
| *src++ >>= shift; | |||
| *src++ >>= shift; | |||
| *src++ >>= shift; | |||
| *src++ >>= shift; | |||
| *src++ >>= shift; | |||
| *src++ >>= shift; | |||
| *src++ >>= shift; | |||
| *src++ >>= shift; | |||
| len -= 8; | |||
| } while (len > 0); | |||
| } | |||
| static void float_to_fixed24_c(int32_t *dst, const float *src, unsigned int len) | |||
| { | |||
| const float scale = 1 << 24; | |||
| @@ -376,19 +333,6 @@ void ff_ac3dsp_downmix_fixed(AC3DSPContext *c, int32_t **samples, int16_t **matr | |||
| ac3_downmix_c_fixed(samples, matrix, out_ch, in_ch, len); | |||
| } | |||
| static void apply_window_int16_c(int16_t *output, const int16_t *input, | |||
| const int16_t *window, unsigned int len) | |||
| { | |||
| int i; | |||
| int len2 = len >> 1; | |||
| for (i = 0; i < len2; i++) { | |||
| int16_t w = window[i]; | |||
| output[i] = (MUL16(input[i], w) + (1 << 14)) >> 15; | |||
| output[len-i-1] = (MUL16(input[len-i-1], w) + (1 << 14)) >> 15; | |||
| } | |||
| } | |||
| void ff_ac3dsp_downmix(AC3DSPContext *c, float **samples, float **matrix, | |||
| int out_ch, int in_ch, int len) | |||
| { | |||
| @@ -424,9 +368,6 @@ void ff_ac3dsp_downmix(AC3DSPContext *c, float **samples, float **matrix, | |||
| av_cold void ff_ac3dsp_init(AC3DSPContext *c, int bit_exact) | |||
| { | |||
| c->ac3_exponent_min = ac3_exponent_min_c; | |||
| c->ac3_max_msb_abs_int16 = ac3_max_msb_abs_int16_c; | |||
| c->ac3_lshift_int16 = ac3_lshift_int16_c; | |||
| c->ac3_rshift_int32 = ac3_rshift_int32_c; | |||
| c->float_to_fixed24 = float_to_fixed24_c; | |||
| c->bit_alloc_calc_bap = ac3_bit_alloc_calc_bap_c; | |||
| c->update_bap_counts = ac3_update_bap_counts_c; | |||
| @@ -438,7 +379,6 @@ av_cold void ff_ac3dsp_init(AC3DSPContext *c, int bit_exact) | |||
| c->out_channels = 0; | |||
| c->downmix = NULL; | |||
| c->downmix_fixed = NULL; | |||
| c->apply_window_int16 = apply_window_int16_c; | |||
| if (ARCH_ARM) | |||
| ff_ac3dsp_init_arm(c, bit_exact); | |||
| @@ -42,39 +42,6 @@ typedef struct AC3DSPContext { | |||
| */ | |||
| void (*ac3_exponent_min)(uint8_t *exp, int num_reuse_blocks, int nb_coefs); | |||
| /** | |||
| * Calculate the maximum MSB of the absolute value of each element in an | |||
| * array of int16_t. | |||
| * @param src input array | |||
| * constraints: align 16. values must be in range [-32767,32767] | |||
| * @param len number of values in the array | |||
| * constraints: multiple of 16 greater than 0 | |||
| * @return a value with the same MSB as max(abs(src[])) | |||
| */ | |||
| int (*ac3_max_msb_abs_int16)(const int16_t *src, int len); | |||
| /** | |||
| * Left-shift each value in an array of int16_t by a specified amount. | |||
| * @param src input array | |||
| * constraints: align 16 | |||
| * @param len number of values in the array | |||
| * constraints: multiple of 32 greater than 0 | |||
| * @param shift left shift amount | |||
| * constraints: range [0,15] | |||
| */ | |||
| void (*ac3_lshift_int16)(int16_t *src, unsigned int len, unsigned int shift); | |||
| /** | |||
| * Right-shift each value in an array of int32_t by a specified amount. | |||
| * @param src input array | |||
| * constraints: align 16 | |||
| * @param len number of values in the array | |||
| * constraints: multiple of 16 greater than 0 | |||
| * @param shift right shift amount | |||
| * constraints: range [0,31] | |||
| */ | |||
| void (*ac3_rshift_int32)(int32_t *src, unsigned int len, unsigned int shift); | |||
| /** | |||
| * Convert an array of float in range [-1.0,1.0] to int32_t with range | |||
| * [-(1<<24),(1<<24)] | |||
| @@ -136,20 +103,6 @@ typedef struct AC3DSPContext { | |||
| int in_channels; | |||
| void (*downmix)(float **samples, float **matrix, int len); | |||
| void (*downmix_fixed)(int32_t **samples, int16_t **matrix, int len); | |||
| /** | |||
| * Apply symmetric window in 16-bit fixed-point. | |||
| * @param output destination array | |||
| * constraints: 16-byte aligned | |||
| * @param input source array | |||
| * constraints: 16-byte aligned | |||
| * @param window window array | |||
| * constraints: 16-byte aligned, at least len/2 elements | |||
| * @param len full window length | |||
| * constraints: multiple of ? greater than zero | |||
| */ | |||
| void (*apply_window_int16)(int16_t *output, const int16_t *input, | |||
| const int16_t *window, unsigned int len); | |||
| } AC3DSPContext; | |||
| void ff_ac3dsp_init (AC3DSPContext *c, int bit_exact); | |||
| @@ -147,44 +147,6 @@ const uint8_t ff_eac3_default_cpl_band_struct[18] = { | |||
| 0, 0, 0, 0, 0, 0, 0, 0, 1, 0, 1, 1, 0, 1, 1, 1, 1, 1 | |||
| }; | |||
| /* AC-3 MDCT window */ | |||
| /* MDCT window */ | |||
| DECLARE_ALIGNED(16, const int16_t, ff_ac3_window)[AC3_WINDOW_SIZE/2] = { | |||
| 4, 7, 12, 16, 21, 28, 34, 42, | |||
| 51, 61, 72, 84, 97, 111, 127, 145, | |||
| 164, 184, 207, 231, 257, 285, 315, 347, | |||
| 382, 419, 458, 500, 544, 591, 641, 694, | |||
| 750, 810, 872, 937, 1007, 1079, 1155, 1235, | |||
| 1318, 1406, 1497, 1593, 1692, 1796, 1903, 2016, | |||
| 2132, 2253, 2379, 2509, 2644, 2783, 2927, 3076, | |||
| 3230, 3389, 3552, 3721, 3894, 4072, 4255, 4444, | |||
| 4637, 4835, 5038, 5246, 5459, 5677, 5899, 6127, | |||
| 6359, 6596, 6837, 7083, 7334, 7589, 7848, 8112, | |||
| 8380, 8652, 8927, 9207, 9491, 9778,10069,10363, | |||
| 10660,10960,11264,11570,11879,12190,12504,12820, | |||
| 13138,13458,13780,14103,14427,14753,15079,15407, | |||
| 15735,16063,16392,16720,17049,17377,17705,18032, | |||
| 18358,18683,19007,19330,19651,19970,20287,20602, | |||
| 20914,21225,21532,21837,22139,22438,22733,23025, | |||
| 23314,23599,23880,24157,24430,24699,24964,25225, | |||
| 25481,25732,25979,26221,26459,26691,26919,27142, | |||
| 27359,27572,27780,27983,28180,28373,28560,28742, | |||
| 28919,29091,29258,29420,29577,29729,29876,30018, | |||
| 30155,30288,30415,30538,30657,30771,30880,30985, | |||
| 31086,31182,31274,31363,31447,31528,31605,31678, | |||
| 31747,31814,31877,31936,31993,32046,32097,32145, | |||
| 32190,32232,32272,32310,32345,32378,32409,32438, | |||
| 32465,32490,32513,32535,32556,32574,32592,32608, | |||
| 32623,32636,32649,32661,32671,32681,32690,32698, | |||
| 32705,32712,32718,32724,32729,32733,32737,32741, | |||
| 32744,32747,32750,32752,32754,32756,32757,32759, | |||
| 32760,32761,32762,32763,32764,32764,32765,32765, | |||
| 32766,32766,32766,32766,32767,32767,32767,32767, | |||
| 32767,32767,32767,32767,32767,32767,32767,32767, | |||
| 32767,32767,32767,32767,32767,32767,32767,32767, | |||
| }; | |||
| const uint8_t ff_ac3_log_add_tab[260]= { | |||
| 0x40,0x3f,0x3e,0x3d,0x3c,0x3b,0x3a,0x39,0x38,0x37, | |||
| 0x36,0x35,0x34,0x34,0x33,0x32,0x31,0x30,0x2f,0x2f, | |||
| @@ -37,7 +37,6 @@ extern const int ff_ac3_sample_rate_tab[]; | |||
| extern const uint16_t ff_ac3_bitrate_tab[19]; | |||
| extern const uint8_t ff_ac3_rematrix_band_tab[5]; | |||
| extern const uint8_t ff_eac3_default_cpl_band_struct[18]; | |||
| extern const int16_t ff_ac3_window[AC3_WINDOW_SIZE/2]; | |||
| extern const uint8_t ff_ac3_log_add_tab[260]; | |||
| extern const uint16_t ff_ac3_hearing_threshold_tab[AC3_CRITICAL_BANDS][3]; | |||
| extern const uint8_t ff_ac3_bap_tab[64]; | |||
| @@ -26,13 +26,8 @@ | |||
| #include "config.h" | |||
| void ff_ac3_exponent_min_neon(uint8_t *exp, int num_reuse_blocks, int nb_coefs); | |||
| int ff_ac3_max_msb_abs_int16_neon(const int16_t *src, int len); | |||
| void ff_ac3_lshift_int16_neon(int16_t *src, unsigned len, unsigned shift); | |||
| void ff_ac3_rshift_int32_neon(int32_t *src, unsigned len, unsigned shift); | |||
| void ff_float_to_fixed24_neon(int32_t *dst, const float *src, unsigned int len); | |||
| void ff_ac3_extract_exponents_neon(uint8_t *exp, int32_t *coef, int nb_coefs); | |||
| void ff_apply_window_int16_neon(int16_t *dst, const int16_t *src, | |||
| const int16_t *window, unsigned n); | |||
| void ff_ac3_sum_square_butterfly_int32_neon(int64_t sum[4], | |||
| const int32_t *coef0, | |||
| const int32_t *coef1, | |||
| @@ -61,12 +56,8 @@ av_cold void ff_ac3dsp_init_arm(AC3DSPContext *c, int bit_exact) | |||
| if (have_neon(cpu_flags)) { | |||
| c->ac3_exponent_min = ff_ac3_exponent_min_neon; | |||
| c->ac3_max_msb_abs_int16 = ff_ac3_max_msb_abs_int16_neon; | |||
| c->ac3_lshift_int16 = ff_ac3_lshift_int16_neon; | |||
| c->ac3_rshift_int32 = ff_ac3_rshift_int32_neon; | |||
| c->float_to_fixed24 = ff_float_to_fixed24_neon; | |||
| c->extract_exponents = ff_ac3_extract_exponents_neon; | |||
| c->apply_window_int16 = ff_apply_window_int16_neon; | |||
| c->sum_square_butterfly_int32 = ff_ac3_sum_square_butterfly_int32_neon; | |||
| c->sum_square_butterfly_float = ff_ac3_sum_square_butterfly_float_neon; | |||
| } | |||
| @@ -35,10 +35,6 @@ pw_bap_mul2: dw 5, 7, 0, 7, 5, 7, 0, 7 | |||
| cextern pd_1 | |||
| pd_151: times 4 dd 151 | |||
| ; used in ff_apply_window_int16() | |||
| pb_revwords: SHUFFLE_MASK_W 7, 6, 5, 4, 3, 2, 1, 0 | |||
| pd_16384: times 4 dd 16384 | |||
| SECTION .text | |||
| ;----------------------------------------------------------------------------- | |||
| @@ -81,133 +77,6 @@ AC3_EXPONENT_MIN | |||
| %endif | |||
| %undef LOOP_ALIGN | |||
| ;----------------------------------------------------------------------------- | |||
| ; int ff_ac3_max_msb_abs_int16(const int16_t *src, int len) | |||
| ; | |||
| ; This function uses 2 different methods to calculate a valid result. | |||
| ; 1) logical 'or' of abs of each element | |||
| ; This is used for ssse3 because of the pabsw instruction. | |||
| ; It is also used for mmx because of the lack of min/max instructions. | |||
| ; 2) calculate min/max for the array, then or(abs(min),abs(max)) | |||
| ; This is used for mmxext and sse2 because they have pminsw/pmaxsw. | |||
| ;----------------------------------------------------------------------------- | |||
| ; logical 'or' of 4 or 8 words in an mmx or xmm register into the low word | |||
| %macro OR_WORDS_HORIZ 2 ; src, tmp | |||
| %if cpuflag(sse2) | |||
| movhlps %2, %1 | |||
| por %1, %2 | |||
| pshuflw %2, %1, q0032 | |||
| por %1, %2 | |||
| pshuflw %2, %1, q0001 | |||
| por %1, %2 | |||
| %elif cpuflag(mmxext) | |||
| pshufw %2, %1, q0032 | |||
| por %1, %2 | |||
| pshufw %2, %1, q0001 | |||
| por %1, %2 | |||
| %else ; mmx | |||
| movq %2, %1 | |||
| psrlq %2, 32 | |||
| por %1, %2 | |||
| movq %2, %1 | |||
| psrlq %2, 16 | |||
| por %1, %2 | |||
| %endif | |||
| %endmacro | |||
| %macro AC3_MAX_MSB_ABS_INT16 1 | |||
| cglobal ac3_max_msb_abs_int16, 2,2,5, src, len | |||
| pxor m2, m2 | |||
| pxor m3, m3 | |||
| .loop: | |||
| %ifidn %1, min_max | |||
| mova m0, [srcq] | |||
| mova m1, [srcq+mmsize] | |||
| pminsw m2, m0 | |||
| pminsw m2, m1 | |||
| pmaxsw m3, m0 | |||
| pmaxsw m3, m1 | |||
| %else ; or_abs | |||
| %if notcpuflag(ssse3) | |||
| mova m0, [srcq] | |||
| mova m1, [srcq+mmsize] | |||
| ABS2 m0, m1, m3, m4 | |||
| %else ; ssse3 | |||
| ; using memory args is faster for ssse3 | |||
| pabsw m0, [srcq] | |||
| pabsw m1, [srcq+mmsize] | |||
| %endif | |||
| por m2, m0 | |||
| por m2, m1 | |||
| %endif | |||
| add srcq, mmsize*2 | |||
| sub lend, mmsize | |||
| ja .loop | |||
| %ifidn %1, min_max | |||
| ABS2 m2, m3, m0, m1 | |||
| por m2, m3 | |||
| %endif | |||
| OR_WORDS_HORIZ m2, m0 | |||
| movd eax, m2 | |||
| and eax, 0xFFFF | |||
| RET | |||
| %endmacro | |||
| INIT_MMX mmx | |||
| AC3_MAX_MSB_ABS_INT16 or_abs | |||
| INIT_MMX mmxext | |||
| AC3_MAX_MSB_ABS_INT16 min_max | |||
| INIT_XMM sse2 | |||
| AC3_MAX_MSB_ABS_INT16 min_max | |||
| INIT_XMM ssse3 | |||
| AC3_MAX_MSB_ABS_INT16 or_abs | |||
| ;----------------------------------------------------------------------------- | |||
| ; macro used for ff_ac3_lshift_int16() and ff_ac3_rshift_int32() | |||
| ;----------------------------------------------------------------------------- | |||
| %macro AC3_SHIFT 3 ; l/r, 16/32, shift instruction, instruction set | |||
| cglobal ac3_%1shift_int%2, 3, 3, 5, src, len, shift | |||
| movd m0, shiftd | |||
| .loop: | |||
| mova m1, [srcq ] | |||
| mova m2, [srcq+mmsize ] | |||
| mova m3, [srcq+mmsize*2] | |||
| mova m4, [srcq+mmsize*3] | |||
| %3 m1, m0 | |||
| %3 m2, m0 | |||
| %3 m3, m0 | |||
| %3 m4, m0 | |||
| mova [srcq ], m1 | |||
| mova [srcq+mmsize ], m2 | |||
| mova [srcq+mmsize*2], m3 | |||
| mova [srcq+mmsize*3], m4 | |||
| add srcq, mmsize*4 | |||
| sub lend, mmsize*32/%2 | |||
| ja .loop | |||
| .end: | |||
| REP_RET | |||
| %endmacro | |||
| ;----------------------------------------------------------------------------- | |||
| ; void ff_ac3_lshift_int16(int16_t *src, unsigned int len, unsigned int shift) | |||
| ;----------------------------------------------------------------------------- | |||
| INIT_MMX mmx | |||
| AC3_SHIFT l, 16, psllw | |||
| INIT_XMM sse2 | |||
| AC3_SHIFT l, 16, psllw | |||
| ;----------------------------------------------------------------------------- | |||
| ; void ff_ac3_rshift_int32(int32_t *src, unsigned int len, unsigned int shift) | |||
| ;----------------------------------------------------------------------------- | |||
| INIT_MMX mmx | |||
| AC3_SHIFT r, 32, psrad | |||
| INIT_XMM sse2 | |||
| AC3_SHIFT r, 32, psrad | |||
| ;----------------------------------------------------------------------------- | |||
| ; void ff_float_to_fixed24(int32_t *dst, const float *src, unsigned int len) | |||
| ;----------------------------------------------------------------------------- | |||
| @@ -423,130 +292,3 @@ AC3_EXTRACT_EXPONENTS | |||
| INIT_XMM ssse3 | |||
| AC3_EXTRACT_EXPONENTS | |||
| %endif | |||
| ;----------------------------------------------------------------------------- | |||
| ; void ff_apply_window_int16(int16_t *output, const int16_t *input, | |||
| ; const int16_t *window, unsigned int len) | |||
| ;----------------------------------------------------------------------------- | |||
| %macro REVERSE_WORDS 1-2 | |||
| %if cpuflag(ssse3) && notcpuflag(atom) | |||
| pshufb %1, %2 | |||
| %elif cpuflag(sse2) | |||
| pshuflw %1, %1, 0x1B | |||
| pshufhw %1, %1, 0x1B | |||
| pshufd %1, %1, 0x4E | |||
| %elif cpuflag(mmxext) | |||
| pshufw %1, %1, 0x1B | |||
| %endif | |||
| %endmacro | |||
| %macro MUL16FIXED 3 | |||
| %if cpuflag(ssse3) ; dst, src, unused | |||
| ; dst = ((dst * src) + (1<<14)) >> 15 | |||
| pmulhrsw %1, %2 | |||
| %elif cpuflag(mmxext) ; dst, src, temp | |||
| ; dst = (dst * src) >> 15 | |||
| ; pmulhw cuts off the bottom bit, so we have to lshift by 1 and add it back | |||
| ; in from the pmullw result. | |||
| mova %3, %1 | |||
| pmulhw %1, %2 | |||
| pmullw %3, %2 | |||
| psrlw %3, 15 | |||
| psllw %1, 1 | |||
| por %1, %3 | |||
| %endif | |||
| %endmacro | |||
| %macro APPLY_WINDOW_INT16 1 ; %1 bitexact version | |||
| %if %1 | |||
| cglobal apply_window_int16, 4,5,6, output, input, window, offset, offset2 | |||
| %else | |||
| cglobal apply_window_int16_round, 4,5,6, output, input, window, offset, offset2 | |||
| %endif | |||
| lea offset2q, [offsetq-mmsize] | |||
| %if cpuflag(ssse3) && notcpuflag(atom) | |||
| mova m5, [pb_revwords] | |||
| ALIGN 16 | |||
| %elif %1 | |||
| mova m5, [pd_16384] | |||
| %endif | |||
| .loop: | |||
| %if cpuflag(ssse3) | |||
| ; This version does the 16x16->16 multiplication in-place without expanding | |||
| ; to 32-bit. The ssse3 version is bit-identical. | |||
| mova m0, [windowq+offset2q] | |||
| mova m1, [ inputq+offset2q] | |||
| pmulhrsw m1, m0 | |||
| REVERSE_WORDS m0, m5 | |||
| pmulhrsw m0, [ inputq+offsetq ] | |||
| mova [outputq+offset2q], m1 | |||
| mova [outputq+offsetq ], m0 | |||
| %elif %1 | |||
| ; This version expands 16-bit to 32-bit, multiplies by the window, | |||
| ; adds 16384 for rounding, right shifts 15, then repacks back to words to | |||
| ; save to the output. The window is reversed for the second half. | |||
| mova m3, [windowq+offset2q] | |||
| mova m4, [ inputq+offset2q] | |||
| pxor m0, m0 | |||
| punpcklwd m0, m3 | |||
| punpcklwd m1, m4 | |||
| pmaddwd m0, m1 | |||
| paddd m0, m5 | |||
| psrad m0, 15 | |||
| pxor m2, m2 | |||
| punpckhwd m2, m3 | |||
| punpckhwd m1, m4 | |||
| pmaddwd m2, m1 | |||
| paddd m2, m5 | |||
| psrad m2, 15 | |||
| packssdw m0, m2 | |||
| mova [outputq+offset2q], m0 | |||
| REVERSE_WORDS m3 | |||
| mova m4, [ inputq+offsetq] | |||
| pxor m0, m0 | |||
| punpcklwd m0, m3 | |||
| punpcklwd m1, m4 | |||
| pmaddwd m0, m1 | |||
| paddd m0, m5 | |||
| psrad m0, 15 | |||
| pxor m2, m2 | |||
| punpckhwd m2, m3 | |||
| punpckhwd m1, m4 | |||
| pmaddwd m2, m1 | |||
| paddd m2, m5 | |||
| psrad m2, 15 | |||
| packssdw m0, m2 | |||
| mova [outputq+offsetq], m0 | |||
| %else | |||
| ; This version does the 16x16->16 multiplication in-place without expanding | |||
| ; to 32-bit. The mmxext and sse2 versions do not use rounding, and | |||
| ; therefore are not bit-identical to the C version. | |||
| mova m0, [windowq+offset2q] | |||
| mova m1, [ inputq+offset2q] | |||
| mova m2, [ inputq+offsetq ] | |||
| MUL16FIXED m1, m0, m3 | |||
| REVERSE_WORDS m0 | |||
| MUL16FIXED m2, m0, m3 | |||
| mova [outputq+offset2q], m1 | |||
| mova [outputq+offsetq ], m2 | |||
| %endif | |||
| add offsetd, mmsize | |||
| sub offset2d, mmsize | |||
| jae .loop | |||
| REP_RET | |||
| %endmacro | |||
| INIT_MMX mmxext | |||
| APPLY_WINDOW_INT16 0 | |||
| INIT_XMM sse2 | |||
| APPLY_WINDOW_INT16 0 | |||
| INIT_MMX mmxext | |||
| APPLY_WINDOW_INT16 1 | |||
| INIT_XMM sse2 | |||
| APPLY_WINDOW_INT16 1 | |||
| INIT_XMM ssse3 | |||
| APPLY_WINDOW_INT16 1 | |||
| INIT_XMM ssse3, atom | |||
| APPLY_WINDOW_INT16 1 | |||
| @@ -30,17 +30,6 @@ void ff_ac3_exponent_min_mmx (uint8_t *exp, int num_reuse_blocks, int nb_coefs | |||
| void ff_ac3_exponent_min_mmxext(uint8_t *exp, int num_reuse_blocks, int nb_coefs); | |||
| void ff_ac3_exponent_min_sse2 (uint8_t *exp, int num_reuse_blocks, int nb_coefs); | |||
| int ff_ac3_max_msb_abs_int16_mmx (const int16_t *src, int len); | |||
| int ff_ac3_max_msb_abs_int16_mmxext(const int16_t *src, int len); | |||
| int ff_ac3_max_msb_abs_int16_sse2 (const int16_t *src, int len); | |||
| int ff_ac3_max_msb_abs_int16_ssse3(const int16_t *src, int len); | |||
| void ff_ac3_lshift_int16_mmx (int16_t *src, unsigned int len, unsigned int shift); | |||
| void ff_ac3_lshift_int16_sse2(int16_t *src, unsigned int len, unsigned int shift); | |||
| void ff_ac3_rshift_int32_mmx (int32_t *src, unsigned int len, unsigned int shift); | |||
| void ff_ac3_rshift_int32_sse2(int32_t *src, unsigned int len, unsigned int shift); | |||
| void ff_float_to_fixed24_3dnow(int32_t *dst, const float *src, unsigned int len); | |||
| void ff_float_to_fixed24_sse (int32_t *dst, const float *src, unsigned int len); | |||
| void ff_float_to_fixed24_sse2 (int32_t *dst, const float *src, unsigned int len); | |||
| @@ -50,28 +39,12 @@ int ff_ac3_compute_mantissa_size_sse2(uint16_t mant_cnt[6][16]); | |||
| void ff_ac3_extract_exponents_sse2 (uint8_t *exp, int32_t *coef, int nb_coefs); | |||
| void ff_ac3_extract_exponents_ssse3(uint8_t *exp, int32_t *coef, int nb_coefs); | |||
| void ff_apply_window_int16_round_mmxext(int16_t *output, const int16_t *input, | |||
| const int16_t *window, unsigned int len); | |||
| void ff_apply_window_int16_round_sse2(int16_t *output, const int16_t *input, | |||
| const int16_t *window, unsigned int len); | |||
| void ff_apply_window_int16_mmxext(int16_t *output, const int16_t *input, | |||
| const int16_t *window, unsigned int len); | |||
| void ff_apply_window_int16_sse2(int16_t *output, const int16_t *input, | |||
| const int16_t *window, unsigned int len); | |||
| void ff_apply_window_int16_ssse3(int16_t *output, const int16_t *input, | |||
| const int16_t *window, unsigned int len); | |||
| void ff_apply_window_int16_ssse3_atom(int16_t *output, const int16_t *input, | |||
| const int16_t *window, unsigned int len); | |||
| av_cold void ff_ac3dsp_init_x86(AC3DSPContext *c, int bit_exact) | |||
| { | |||
| int cpu_flags = av_get_cpu_flags(); | |||
| if (EXTERNAL_MMX(cpu_flags)) { | |||
| c->ac3_exponent_min = ff_ac3_exponent_min_mmx; | |||
| c->ac3_max_msb_abs_int16 = ff_ac3_max_msb_abs_int16_mmx; | |||
| c->ac3_lshift_int16 = ff_ac3_lshift_int16_mmx; | |||
| c->ac3_rshift_int32 = ff_ac3_rshift_int32_mmx; | |||
| } | |||
| if (EXTERNAL_AMD3DNOW(cpu_flags)) { | |||
| if (!bit_exact) { | |||
| @@ -80,43 +53,20 @@ av_cold void ff_ac3dsp_init_x86(AC3DSPContext *c, int bit_exact) | |||
| } | |||
| if (EXTERNAL_MMXEXT(cpu_flags)) { | |||
| c->ac3_exponent_min = ff_ac3_exponent_min_mmxext; | |||
| c->ac3_max_msb_abs_int16 = ff_ac3_max_msb_abs_int16_mmxext; | |||
| if (bit_exact) { | |||
| c->apply_window_int16 = ff_apply_window_int16_mmxext; | |||
| } else { | |||
| c->apply_window_int16 = ff_apply_window_int16_round_mmxext; | |||
| } | |||
| } | |||
| if (EXTERNAL_SSE(cpu_flags)) { | |||
| c->float_to_fixed24 = ff_float_to_fixed24_sse; | |||
| } | |||
| if (EXTERNAL_SSE2(cpu_flags)) { | |||
| c->ac3_exponent_min = ff_ac3_exponent_min_sse2; | |||
| c->ac3_max_msb_abs_int16 = ff_ac3_max_msb_abs_int16_sse2; | |||
| c->float_to_fixed24 = ff_float_to_fixed24_sse2; | |||
| c->compute_mantissa_size = ff_ac3_compute_mantissa_size_sse2; | |||
| c->extract_exponents = ff_ac3_extract_exponents_sse2; | |||
| if (bit_exact) { | |||
| c->apply_window_int16 = ff_apply_window_int16_sse2; | |||
| } | |||
| } | |||
| if (EXTERNAL_SSE2_FAST(cpu_flags)) { | |||
| c->ac3_lshift_int16 = ff_ac3_lshift_int16_sse2; | |||
| c->ac3_rshift_int32 = ff_ac3_rshift_int32_sse2; | |||
| if (!bit_exact) { | |||
| c->apply_window_int16 = ff_apply_window_int16_round_sse2; | |||
| } | |||
| } | |||
| if (EXTERNAL_SSSE3(cpu_flags)) { | |||
| c->ac3_max_msb_abs_int16 = ff_ac3_max_msb_abs_int16_ssse3; | |||
| if (cpu_flags & AV_CPU_FLAG_ATOM) { | |||
| c->apply_window_int16 = ff_apply_window_int16_ssse3_atom; | |||
| } else { | |||
| if (!(cpu_flags & AV_CPU_FLAG_ATOM)) | |||
| c->extract_exponents = ff_ac3_extract_exponents_ssse3; | |||
| c->apply_window_int16 = ff_apply_window_int16_ssse3; | |||
| } | |||
| } | |||
| } | |||