ac3enc_fixed: drop unnecessary fixed-point DSP code

5 years ago · 9e05421dbe
--- a/libavcodec/ac3dsp.c
+++ b/libavcodec/ac3dsp.c
@@ -46,49 +46,6 @@ static void ac3_exponent_min_c(uint8_t *exp, int num_reuse_blocks, int nb_coefs)
    }
 }

 static int ac3_max_msb_abs_int16_c(const int16_t *src, int len)
 {
    int i, v = 0;
    for (i = 0; i < len; i++)
        v |= abs(src[i]);
    return v;
 }

 static void ac3_lshift_int16_c(int16_t *src, unsigned int len,
                               unsigned int shift)
 {
    uint32_t *src32 = (uint32_t *)src;
    const uint32_t mask = ~(((1 << shift) - 1) << 16);
    int i;
    len >>= 1;
    for (i = 0; i < len; i += 8) {
        src32[i  ] = (src32[i  ] << shift) & mask;
        src32[i+1] = (src32[i+1] << shift) & mask;
        src32[i+2] = (src32[i+2] << shift) & mask;
        src32[i+3] = (src32[i+3] << shift) & mask;
        src32[i+4] = (src32[i+4] << shift) & mask;
        src32[i+5] = (src32[i+5] << shift) & mask;
        src32[i+6] = (src32[i+6] << shift) & mask;
        src32[i+7] = (src32[i+7] << shift) & mask;
    }
 }

 static void ac3_rshift_int32_c(int32_t *src, unsigned int len,
                               unsigned int shift)
 {
    do {
        *src++ >>= shift;
        *src++ >>= shift;
        *src++ >>= shift;
        *src++ >>= shift;
        *src++ >>= shift;
        *src++ >>= shift;
        *src++ >>= shift;
        *src++ >>= shift;
        len -= 8;
    } while (len > 0);
 }

 static void float_to_fixed24_c(int32_t *dst, const float *src, unsigned int len)
 {
    const float scale = 1 << 24;
@@ -376,19 +333,6 @@ void ff_ac3dsp_downmix_fixed(AC3DSPContext *c, int32_t **samples, int16_t **matr
        ac3_downmix_c_fixed(samples, matrix, out_ch, in_ch, len);
 }

 static void apply_window_int16_c(int16_t *output, const int16_t *input,
                                 const int16_t *window, unsigned int len)
 {
    int i;
    int len2 = len >> 1;

    for (i = 0; i < len2; i++) {
        int16_t w       = window[i];
        output[i]       = (MUL16(input[i],       w) + (1 << 14)) >> 15;
        output[len-i-1] = (MUL16(input[len-i-1], w) + (1 << 14)) >> 15;
    }
 }

 void ff_ac3dsp_downmix(AC3DSPContext *c, float **samples, float **matrix,
                       int out_ch, int in_ch, int len)
 {
@@ -424,9 +368,6 @@ void ff_ac3dsp_downmix(AC3DSPContext *c, float **samples, float **matrix,
 av_cold void ff_ac3dsp_init(AC3DSPContext *c, int bit_exact)
 {
    c->ac3_exponent_min = ac3_exponent_min_c;
    c->ac3_max_msb_abs_int16 = ac3_max_msb_abs_int16_c;
    c->ac3_lshift_int16 = ac3_lshift_int16_c;
    c->ac3_rshift_int32 = ac3_rshift_int32_c;
    c->float_to_fixed24 = float_to_fixed24_c;
    c->bit_alloc_calc_bap = ac3_bit_alloc_calc_bap_c;
    c->update_bap_counts = ac3_update_bap_counts_c;
@@ -438,7 +379,6 @@ av_cold void ff_ac3dsp_init(AC3DSPContext *c, int bit_exact)
    c->out_channels          = 0;
    c->downmix               = NULL;
    c->downmix_fixed         = NULL;
    c->apply_window_int16 = apply_window_int16_c;

    if (ARCH_ARM)
        ff_ac3dsp_init_arm(c, bit_exact);
--- a/libavcodec/ac3dsp.h
+++ b/libavcodec/ac3dsp.h
@@ -42,39 +42,6 @@ typedef struct AC3DSPContext {
     */
    void (*ac3_exponent_min)(uint8_t *exp, int num_reuse_blocks, int nb_coefs);

    /**
     * Calculate the maximum MSB of the absolute value of each element in an
     * array of int16_t.
     * @param src input array
     *            constraints: align 16. values must be in range [-32767,32767]
     * @param len number of values in the array
     *            constraints: multiple of 16 greater than 0
     * @return    a value with the same MSB as max(abs(src[]))
     */
    int (*ac3_max_msb_abs_int16)(const int16_t *src, int len);

    /**
     * Left-shift each value in an array of int16_t by a specified amount.
     * @param src    input array
     *               constraints: align 16
     * @param len    number of values in the array
     *               constraints: multiple of 32 greater than 0
     * @param shift  left shift amount
     *               constraints: range [0,15]
     */
    void (*ac3_lshift_int16)(int16_t *src, unsigned int len, unsigned int shift);

    /**
     * Right-shift each value in an array of int32_t by a specified amount.
     * @param src    input array
     *               constraints: align 16
     * @param len    number of values in the array
     *               constraints: multiple of 16 greater than 0
     * @param shift  right shift amount
     *               constraints: range [0,31]
     */
    void (*ac3_rshift_int32)(int32_t *src, unsigned int len, unsigned int shift);

    /**
     * Convert an array of float in range [-1.0,1.0] to int32_t with range
     * [-(1<<24),(1<<24)]
@@ -136,20 +103,6 @@ typedef struct AC3DSPContext {
    int in_channels;
    void (*downmix)(float **samples, float **matrix, int len);
    void (*downmix_fixed)(int32_t **samples, int16_t **matrix, int len);

    /**
     * Apply symmetric window in 16-bit fixed-point.
     * @param output destination array
     *               constraints: 16-byte aligned
     * @param input  source array
     *               constraints: 16-byte aligned
     * @param window window array
     *               constraints: 16-byte aligned, at least len/2 elements
     * @param len    full window length
     *               constraints: multiple of ? greater than zero
     */
    void (*apply_window_int16)(int16_t *output, const int16_t *input,
                               const int16_t *window, unsigned int len);
 } AC3DSPContext;

 void ff_ac3dsp_init    (AC3DSPContext *c, int bit_exact);
--- a/libavcodec/ac3tab.c
+++ b/libavcodec/ac3tab.c
@@ -147,44 +147,6 @@ const uint8_t ff_eac3_default_cpl_band_struct[18] = {
    0, 0, 0, 0, 0, 0, 0, 0, 1, 0, 1, 1, 0, 1, 1, 1, 1, 1
 };

 /* AC-3 MDCT window */

 /* MDCT window */
 DECLARE_ALIGNED(16, const int16_t, ff_ac3_window)[AC3_WINDOW_SIZE/2] = {
    4,    7,   12,   16,   21,   28,   34,   42,
   51,   61,   72,   84,   97,  111,  127,  145,
  164,  184,  207,  231,  257,  285,  315,  347,
  382,  419,  458,  500,  544,  591,  641,  694,
  750,  810,  872,  937, 1007, 1079, 1155, 1235,
 1318, 1406, 1497, 1593, 1692, 1796, 1903, 2016,
 2132, 2253, 2379, 2509, 2644, 2783, 2927, 3076,
 3230, 3389, 3552, 3721, 3894, 4072, 4255, 4444,
 4637, 4835, 5038, 5246, 5459, 5677, 5899, 6127,
 6359, 6596, 6837, 7083, 7334, 7589, 7848, 8112,
 8380, 8652, 8927, 9207, 9491, 9778,10069,10363,
 10660,10960,11264,11570,11879,12190,12504,12820,
 13138,13458,13780,14103,14427,14753,15079,15407,
 15735,16063,16392,16720,17049,17377,17705,18032,
 18358,18683,19007,19330,19651,19970,20287,20602,
 20914,21225,21532,21837,22139,22438,22733,23025,
 23314,23599,23880,24157,24430,24699,24964,25225,
 25481,25732,25979,26221,26459,26691,26919,27142,
 27359,27572,27780,27983,28180,28373,28560,28742,
 28919,29091,29258,29420,29577,29729,29876,30018,
 30155,30288,30415,30538,30657,30771,30880,30985,
 31086,31182,31274,31363,31447,31528,31605,31678,
 31747,31814,31877,31936,31993,32046,32097,32145,
 32190,32232,32272,32310,32345,32378,32409,32438,
 32465,32490,32513,32535,32556,32574,32592,32608,
 32623,32636,32649,32661,32671,32681,32690,32698,
 32705,32712,32718,32724,32729,32733,32737,32741,
 32744,32747,32750,32752,32754,32756,32757,32759,
 32760,32761,32762,32763,32764,32764,32765,32765,
 32766,32766,32766,32766,32767,32767,32767,32767,
 32767,32767,32767,32767,32767,32767,32767,32767,
 32767,32767,32767,32767,32767,32767,32767,32767,
 };

 const uint8_t ff_ac3_log_add_tab[260]= {
 0x40,0x3f,0x3e,0x3d,0x3c,0x3b,0x3a,0x39,0x38,0x37,
 0x36,0x35,0x34,0x34,0x33,0x32,0x31,0x30,0x2f,0x2f,
--- a/libavcodec/ac3tab.h
+++ b/libavcodec/ac3tab.h
@@ -37,7 +37,6 @@ extern const int      ff_ac3_sample_rate_tab[];
 extern const uint16_t ff_ac3_bitrate_tab[19];
 extern const uint8_t  ff_ac3_rematrix_band_tab[5];
 extern const uint8_t  ff_eac3_default_cpl_band_struct[18];
 extern const int16_t  ff_ac3_window[AC3_WINDOW_SIZE/2];
 extern const uint8_t  ff_ac3_log_add_tab[260];
 extern const uint16_t ff_ac3_hearing_threshold_tab[AC3_CRITICAL_BANDS][3];
 extern const uint8_t  ff_ac3_bap_tab[64];
--- a/libavcodec/arm/ac3dsp_init_arm.c
+++ b/libavcodec/arm/ac3dsp_init_arm.c
@@ -26,13 +26,8 @@
 #include "config.h"

 void ff_ac3_exponent_min_neon(uint8_t *exp, int num_reuse_blocks, int nb_coefs);
 int ff_ac3_max_msb_abs_int16_neon(const int16_t *src, int len);
 void ff_ac3_lshift_int16_neon(int16_t *src, unsigned len, unsigned shift);
 void ff_ac3_rshift_int32_neon(int32_t *src, unsigned len, unsigned shift);
 void ff_float_to_fixed24_neon(int32_t *dst, const float *src, unsigned int len);
 void ff_ac3_extract_exponents_neon(uint8_t *exp, int32_t *coef, int nb_coefs);
 void ff_apply_window_int16_neon(int16_t *dst, const int16_t *src,
                                const int16_t *window, unsigned n);
 void ff_ac3_sum_square_butterfly_int32_neon(int64_t sum[4],
                                            const int32_t *coef0,
                                            const int32_t *coef1,
@@ -61,12 +56,8 @@ av_cold void ff_ac3dsp_init_arm(AC3DSPContext *c, int bit_exact)

    if (have_neon(cpu_flags)) {
        c->ac3_exponent_min      = ff_ac3_exponent_min_neon;
        c->ac3_max_msb_abs_int16 = ff_ac3_max_msb_abs_int16_neon;
        c->ac3_lshift_int16      = ff_ac3_lshift_int16_neon;
        c->ac3_rshift_int32      = ff_ac3_rshift_int32_neon;
        c->float_to_fixed24      = ff_float_to_fixed24_neon;
        c->extract_exponents     = ff_ac3_extract_exponents_neon;
        c->apply_window_int16    = ff_apply_window_int16_neon;
        c->sum_square_butterfly_int32 = ff_ac3_sum_square_butterfly_int32_neon;
        c->sum_square_butterfly_float = ff_ac3_sum_square_butterfly_float_neon;
    }
--- a/libavcodec/x86/ac3dsp.asm
+++ b/libavcodec/x86/ac3dsp.asm
@@ -35,10 +35,6 @@ pw_bap_mul2: dw 5, 7, 0, 7, 5, 7, 0, 7
 cextern pd_1
 pd_151: times 4 dd 151

 ; used in ff_apply_window_int16()
 pb_revwords: SHUFFLE_MASK_W 7, 6, 5, 4, 3, 2, 1, 0
 pd_16384: times 4 dd 16384

 SECTION .text

 ;-----------------------------------------------------------------------------
@@ -81,133 +77,6 @@ AC3_EXPONENT_MIN
 %endif
 %undef LOOP_ALIGN

 ;-----------------------------------------------------------------------------
 ; int ff_ac3_max_msb_abs_int16(const int16_t *src, int len)
 ;
 ; This function uses 2 different methods to calculate a valid result.
 ; 1) logical 'or' of abs of each element
 ;        This is used for ssse3 because of the pabsw instruction.
 ;        It is also used for mmx because of the lack of min/max instructions.
 ; 2) calculate min/max for the array, then or(abs(min),abs(max))
 ;        This is used for mmxext and sse2 because they have pminsw/pmaxsw.
 ;-----------------------------------------------------------------------------

 ; logical 'or' of 4 or 8 words in an mmx or xmm register into the low word
 %macro OR_WORDS_HORIZ 2 ; src, tmp
 %if cpuflag(sse2)
    movhlps     %2, %1
    por         %1, %2
    pshuflw     %2, %1, q0032
    por         %1, %2
    pshuflw     %2, %1, q0001
    por         %1, %2
 %elif cpuflag(mmxext)
    pshufw      %2, %1, q0032
    por         %1, %2
    pshufw      %2, %1, q0001
    por         %1, %2
 %else ; mmx
    movq        %2, %1
    psrlq       %2, 32
    por         %1, %2
    movq        %2, %1
    psrlq       %2, 16
    por         %1, %2
 %endif
 %endmacro

 %macro AC3_MAX_MSB_ABS_INT16 1
 cglobal ac3_max_msb_abs_int16, 2,2,5, src, len
    pxor        m2, m2
    pxor        m3, m3
 .loop:
 %ifidn %1, min_max
    mova        m0, [srcq]
    mova        m1, [srcq+mmsize]
    pminsw      m2, m0
    pminsw      m2, m1
    pmaxsw      m3, m0
    pmaxsw      m3, m1
 %else ; or_abs
 %if notcpuflag(ssse3)
    mova        m0, [srcq]
    mova        m1, [srcq+mmsize]
    ABS2        m0, m1, m3, m4
 %else ; ssse3
    ; using memory args is faster for ssse3
    pabsw       m0, [srcq]
    pabsw       m1, [srcq+mmsize]
 %endif
    por         m2, m0
    por         m2, m1
 %endif
    add       srcq, mmsize*2
    sub       lend, mmsize
    ja .loop
 %ifidn %1, min_max
    ABS2        m2, m3, m0, m1
    por         m2, m3
 %endif
    OR_WORDS_HORIZ m2, m0
    movd       eax, m2
    and        eax, 0xFFFF
    RET
 %endmacro

 INIT_MMX mmx
 AC3_MAX_MSB_ABS_INT16 or_abs
 INIT_MMX mmxext
 AC3_MAX_MSB_ABS_INT16 min_max
 INIT_XMM sse2
 AC3_MAX_MSB_ABS_INT16 min_max
 INIT_XMM ssse3
 AC3_MAX_MSB_ABS_INT16 or_abs

 ;-----------------------------------------------------------------------------
 ; macro used for ff_ac3_lshift_int16() and ff_ac3_rshift_int32()
 ;-----------------------------------------------------------------------------

 %macro AC3_SHIFT 3 ; l/r, 16/32, shift instruction, instruction set
 cglobal ac3_%1shift_int%2, 3, 3, 5, src, len, shift
    movd      m0, shiftd
 .loop:
    mova      m1, [srcq         ]
    mova      m2, [srcq+mmsize  ]
    mova      m3, [srcq+mmsize*2]
    mova      m4, [srcq+mmsize*3]
    %3        m1, m0
    %3        m2, m0
    %3        m3, m0
    %3        m4, m0
    mova  [srcq         ], m1
    mova  [srcq+mmsize  ], m2
    mova  [srcq+mmsize*2], m3
    mova  [srcq+mmsize*3], m4
    add     srcq, mmsize*4
    sub     lend, mmsize*32/%2
    ja .loop
 .end:
    REP_RET
 %endmacro

 ;-----------------------------------------------------------------------------
 ; void ff_ac3_lshift_int16(int16_t *src, unsigned int len, unsigned int shift)
 ;-----------------------------------------------------------------------------

 INIT_MMX mmx
 AC3_SHIFT l, 16, psllw
 INIT_XMM sse2
 AC3_SHIFT l, 16, psllw

 ;-----------------------------------------------------------------------------
 ; void ff_ac3_rshift_int32(int32_t *src, unsigned int len, unsigned int shift)
 ;-----------------------------------------------------------------------------

 INIT_MMX mmx
 AC3_SHIFT r, 32, psrad
 INIT_XMM sse2
 AC3_SHIFT r, 32, psrad

 ;-----------------------------------------------------------------------------
 ; void ff_float_to_fixed24(int32_t *dst, const float *src, unsigned int len)
 ;-----------------------------------------------------------------------------
@@ -423,130 +292,3 @@ AC3_EXTRACT_EXPONENTS
 INIT_XMM ssse3
 AC3_EXTRACT_EXPONENTS
 %endif

 ;-----------------------------------------------------------------------------
 ; void ff_apply_window_int16(int16_t *output, const int16_t *input,
 ;                            const int16_t *window, unsigned int len)
 ;-----------------------------------------------------------------------------

 %macro REVERSE_WORDS 1-2
 %if cpuflag(ssse3) && notcpuflag(atom)
    pshufb  %1, %2
 %elif cpuflag(sse2)
    pshuflw  %1, %1, 0x1B
    pshufhw  %1, %1, 0x1B
    pshufd   %1, %1, 0x4E
 %elif cpuflag(mmxext)
    pshufw   %1, %1, 0x1B
 %endif
 %endmacro

 %macro MUL16FIXED 3
 %if cpuflag(ssse3) ; dst, src, unused
 ; dst = ((dst * src) + (1<<14)) >> 15
    pmulhrsw   %1, %2
 %elif cpuflag(mmxext) ; dst, src, temp
 ; dst = (dst * src) >> 15
 ; pmulhw cuts off the bottom bit, so we have to lshift by 1 and add it back
 ; in from the pmullw result.
    mova    %3, %1
    pmulhw  %1, %2
    pmullw  %3, %2
    psrlw   %3, 15
    psllw   %1, 1
    por     %1, %3
 %endif
 %endmacro

 %macro APPLY_WINDOW_INT16 1 ; %1 bitexact version
 %if %1
 cglobal apply_window_int16, 4,5,6, output, input, window, offset, offset2
 %else
 cglobal apply_window_int16_round, 4,5,6, output, input, window, offset, offset2
 %endif
    lea     offset2q, [offsetq-mmsize]
 %if cpuflag(ssse3) && notcpuflag(atom)
    mova          m5, [pb_revwords]
    ALIGN 16
 %elif %1
    mova          m5, [pd_16384]
 %endif
 .loop:
 %if cpuflag(ssse3)
    ; This version does the 16x16->16 multiplication in-place without expanding
    ; to 32-bit. The ssse3 version is bit-identical.
    mova          m0, [windowq+offset2q]
    mova          m1, [ inputq+offset2q]
    pmulhrsw      m1, m0
    REVERSE_WORDS m0, m5
    pmulhrsw      m0, [ inputq+offsetq ]
    mova  [outputq+offset2q], m1
    mova  [outputq+offsetq ], m0
 %elif %1
    ; This version expands 16-bit to 32-bit, multiplies by the window,
    ; adds 16384 for rounding, right shifts 15, then repacks back to words to
    ; save to the output. The window is reversed for the second half.
    mova          m3, [windowq+offset2q]
    mova          m4, [ inputq+offset2q]
    pxor          m0, m0
    punpcklwd     m0, m3
    punpcklwd     m1, m4
    pmaddwd       m0, m1
    paddd         m0, m5
    psrad         m0, 15
    pxor          m2, m2
    punpckhwd     m2, m3
    punpckhwd     m1, m4
    pmaddwd       m2, m1
    paddd         m2, m5
    psrad         m2, 15
    packssdw      m0, m2
    mova  [outputq+offset2q], m0
    REVERSE_WORDS m3
    mova          m4, [ inputq+offsetq]
    pxor          m0, m0
    punpcklwd     m0, m3
    punpcklwd     m1, m4
    pmaddwd       m0, m1
    paddd         m0, m5
    psrad         m0, 15
    pxor          m2, m2
    punpckhwd     m2, m3
    punpckhwd     m1, m4
    pmaddwd       m2, m1
    paddd         m2, m5
    psrad         m2, 15
    packssdw      m0, m2
    mova  [outputq+offsetq], m0
 %else
    ; This version does the 16x16->16 multiplication in-place without expanding
    ; to 32-bit. The mmxext and sse2 versions do not use rounding, and
    ; therefore are not bit-identical to the C version.
    mova          m0, [windowq+offset2q]
    mova          m1, [ inputq+offset2q]
    mova          m2, [ inputq+offsetq ]
    MUL16FIXED    m1, m0, m3
    REVERSE_WORDS m0
    MUL16FIXED    m2, m0, m3
    mova  [outputq+offset2q], m1
    mova  [outputq+offsetq ], m2
 %endif
    add      offsetd, mmsize
    sub     offset2d, mmsize
    jae .loop
    REP_RET
 %endmacro

 INIT_MMX mmxext
 APPLY_WINDOW_INT16 0
 INIT_XMM sse2
 APPLY_WINDOW_INT16 0

 INIT_MMX mmxext
 APPLY_WINDOW_INT16 1
 INIT_XMM sse2
 APPLY_WINDOW_INT16 1
 INIT_XMM ssse3
 APPLY_WINDOW_INT16 1
 INIT_XMM ssse3, atom
 APPLY_WINDOW_INT16 1
--- a/libavcodec/x86/ac3dsp_init.c
+++ b/libavcodec/x86/ac3dsp_init.c
@@ -30,17 +30,6 @@ void ff_ac3_exponent_min_mmx   (uint8_t *exp, int num_reuse_blocks, int nb_coefs
 void ff_ac3_exponent_min_mmxext(uint8_t *exp, int num_reuse_blocks, int nb_coefs);
 void ff_ac3_exponent_min_sse2  (uint8_t *exp, int num_reuse_blocks, int nb_coefs);

 int ff_ac3_max_msb_abs_int16_mmx  (const int16_t *src, int len);
 int ff_ac3_max_msb_abs_int16_mmxext(const int16_t *src, int len);
 int ff_ac3_max_msb_abs_int16_sse2 (const int16_t *src, int len);
 int ff_ac3_max_msb_abs_int16_ssse3(const int16_t *src, int len);

 void ff_ac3_lshift_int16_mmx (int16_t *src, unsigned int len, unsigned int shift);
 void ff_ac3_lshift_int16_sse2(int16_t *src, unsigned int len, unsigned int shift);

 void ff_ac3_rshift_int32_mmx (int32_t *src, unsigned int len, unsigned int shift);
 void ff_ac3_rshift_int32_sse2(int32_t *src, unsigned int len, unsigned int shift);

 void ff_float_to_fixed24_3dnow(int32_t *dst, const float *src, unsigned int len);
 void ff_float_to_fixed24_sse  (int32_t *dst, const float *src, unsigned int len);
 void ff_float_to_fixed24_sse2 (int32_t *dst, const float *src, unsigned int len);
@@ -50,28 +39,12 @@ int ff_ac3_compute_mantissa_size_sse2(uint16_t mant_cnt[6][16]);
 void ff_ac3_extract_exponents_sse2 (uint8_t *exp, int32_t *coef, int nb_coefs);
 void ff_ac3_extract_exponents_ssse3(uint8_t *exp, int32_t *coef, int nb_coefs);

 void ff_apply_window_int16_round_mmxext(int16_t *output, const int16_t *input,
                                        const int16_t *window, unsigned int len);
 void ff_apply_window_int16_round_sse2(int16_t *output, const int16_t *input,
                                      const int16_t *window, unsigned int len);
 void ff_apply_window_int16_mmxext(int16_t *output, const int16_t *input,
                                  const int16_t *window, unsigned int len);
 void ff_apply_window_int16_sse2(int16_t *output, const int16_t *input,
                                const int16_t *window, unsigned int len);
 void ff_apply_window_int16_ssse3(int16_t *output, const int16_t *input,
                                 const int16_t *window, unsigned int len);
 void ff_apply_window_int16_ssse3_atom(int16_t *output, const int16_t *input,
                                      const int16_t *window, unsigned int len);

 av_cold void ff_ac3dsp_init_x86(AC3DSPContext *c, int bit_exact)
 {
    int cpu_flags = av_get_cpu_flags();

    if (EXTERNAL_MMX(cpu_flags)) {
        c->ac3_exponent_min = ff_ac3_exponent_min_mmx;
        c->ac3_max_msb_abs_int16 = ff_ac3_max_msb_abs_int16_mmx;
        c->ac3_lshift_int16 = ff_ac3_lshift_int16_mmx;
        c->ac3_rshift_int32 = ff_ac3_rshift_int32_mmx;
    }
    if (EXTERNAL_AMD3DNOW(cpu_flags)) {
        if (!bit_exact) {
@@ -80,43 +53,20 @@ av_cold void ff_ac3dsp_init_x86(AC3DSPContext *c, int bit_exact)
    }
    if (EXTERNAL_MMXEXT(cpu_flags)) {
        c->ac3_exponent_min = ff_ac3_exponent_min_mmxext;
        c->ac3_max_msb_abs_int16 = ff_ac3_max_msb_abs_int16_mmxext;
        if (bit_exact) {
            c->apply_window_int16 = ff_apply_window_int16_mmxext;
        } else {
            c->apply_window_int16 = ff_apply_window_int16_round_mmxext;
        }
    }
    if (EXTERNAL_SSE(cpu_flags)) {
        c->float_to_fixed24 = ff_float_to_fixed24_sse;
    }
    if (EXTERNAL_SSE2(cpu_flags)) {
        c->ac3_exponent_min = ff_ac3_exponent_min_sse2;
        c->ac3_max_msb_abs_int16 = ff_ac3_max_msb_abs_int16_sse2;
        c->float_to_fixed24 = ff_float_to_fixed24_sse2;
        c->compute_mantissa_size = ff_ac3_compute_mantissa_size_sse2;
        c->extract_exponents = ff_ac3_extract_exponents_sse2;
        if (bit_exact) {
            c->apply_window_int16 = ff_apply_window_int16_sse2;
        }
    }

    if (EXTERNAL_SSE2_FAST(cpu_flags)) {
        c->ac3_lshift_int16 = ff_ac3_lshift_int16_sse2;
        c->ac3_rshift_int32 = ff_ac3_rshift_int32_sse2;
        if (!bit_exact) {
            c->apply_window_int16 = ff_apply_window_int16_round_sse2;
        }
    }

    if (EXTERNAL_SSSE3(cpu_flags)) {
        c->ac3_max_msb_abs_int16 = ff_ac3_max_msb_abs_int16_ssse3;
        if (cpu_flags & AV_CPU_FLAG_ATOM) {
            c->apply_window_int16 = ff_apply_window_int16_ssse3_atom;
        } else {
        if (!(cpu_flags & AV_CPU_FLAG_ATOM))
            c->extract_exponents = ff_ac3_extract_exponents_ssse3;
            c->apply_window_int16 = ff_apply_window_int16_ssse3;
        }
    }
 }