Move array specifiers outside DECLARE_ALIGNED() invocations

Originally committed as revision 21377 to svn://svn.ffmpeg.org/ffmpeg/trunk
16 years ago · c67278098d
--- a/ffplay.c
+++ b/ffplay.c
@@ -133,8 +133,8 @@ typedef struct VideoState {
    int audio_hw_buf_size;
    /* samples output by the codec. we reserve more space for avsync
       compensation */
    DECLARE_ALIGNED(16,uint8_t,audio_buf1[(AVCODEC_MAX_AUDIO_FRAME_SIZE * 3) / 2]);
    DECLARE_ALIGNED(16,uint8_t,audio_buf2[(AVCODEC_MAX_AUDIO_FRAME_SIZE * 3) / 2]);
    DECLARE_ALIGNED(16,uint8_t,audio_buf1)[(AVCODEC_MAX_AUDIO_FRAME_SIZE * 3) / 2];
    DECLARE_ALIGNED(16,uint8_t,audio_buf2)[(AVCODEC_MAX_AUDIO_FRAME_SIZE * 3) / 2];
    uint8_t *audio_buf;
    unsigned int audio_buf_size; /* in bytes */
    int audio_buf_index; /* in bytes */
--- a/libavcodec/4xm.c
+++ b/libavcodec/4xm.c
@@ -137,7 +137,7 @@ typedef struct FourXContext{
    int mv[256];
    VLC pre_vlc;
    int last_dc;
    DECLARE_ALIGNED_16(DCTELEM, block[6][64]);
    DECLARE_ALIGNED_16(DCTELEM, block)[6][64];
    void *bitstream_buffer;
    unsigned int bitstream_buffer_size;
    int version;
--- a/libavcodec/aac.h
+++ b/libavcodec/aac.h
@@ -214,9 +214,9 @@ typedef struct {
    float sf[120];                            ///< scalefactors
    int sf_idx[128];                          ///< scalefactor indices (used by encoder)
    uint8_t zeroes[128];                      ///< band is not coded (used by encoder)
    DECLARE_ALIGNED_16(float, coeffs[1024]);  ///< coefficients for IMDCT
    DECLARE_ALIGNED_16(float, saved[1024]);   ///< overlap
    DECLARE_ALIGNED_16(float, ret[1024]);     ///< PCM output
    DECLARE_ALIGNED_16(float, coeffs)[1024];  ///< coefficients for IMDCT
    DECLARE_ALIGNED_16(float, saved)[1024];   ///< overlap
    DECLARE_ALIGNED_16(float, ret)[1024];     ///< PCM output
    PredictorState predictor_state[MAX_PREDICTORS];
 } SingleChannelElement;

@@ -261,7 +261,7 @@ typedef struct {
     * @defgroup temporary aligned temporary buffers (We do not want to have these on the stack.)
     * @{
     */
    DECLARE_ALIGNED_16(float, buf_mdct[1024]);
    DECLARE_ALIGNED_16(float, buf_mdct)[1024];
    /** @} */

    /**
@@ -284,7 +284,7 @@ typedef struct {
    int sf_offset;                                    ///< offset into pow2sf_tab as appropriate for dsp.float_to_int16
    /** @} */

    DECLARE_ALIGNED(16, float, temp[128]);
    DECLARE_ALIGNED(16, float, temp)[128];

    enum OCStatus output_configured;
 } AACContext;
--- a/libavcodec/aacenc.h
+++ b/libavcodec/aacenc.h
@@ -52,7 +52,7 @@ typedef struct AACEncContext {
    FFTContext mdct1024;                         ///< long (1024 samples) frame transform context
    FFTContext mdct128;                          ///< short (128 samples) frame transform context
    DSPContext  dsp;
    DECLARE_ALIGNED_16(FFTSample, output[2048]); ///< temporary buffer for MDCT input coefficients
    DECLARE_ALIGNED_16(FFTSample, output)[2048]; ///< temporary buffer for MDCT input coefficients
    int16_t* samples;                            ///< saved preprocessed input

    int samplerate_index;                        ///< MPEG-4 samplerate index
@@ -64,8 +64,8 @@ typedef struct AACEncContext {
    int cur_channel;
    int last_frame;
    float lambda;
    DECLARE_ALIGNED_16(int,   qcoefs[96][2]);    ///< quantized coefficients
    DECLARE_ALIGNED_16(float, scoefs[1024]);     ///< scaled coefficients
    DECLARE_ALIGNED_16(int,   qcoefs)[96][2];    ///< quantized coefficients
    DECLARE_ALIGNED_16(float, scoefs)[1024];     ///< scaled coefficients
 } AACEncContext;

 #endif /* AVCODEC_AACENC_H */
--- a/libavcodec/aactab.c
+++ b/libavcodec/aactab.c
@@ -32,8 +32,8 @@

 #include <stdint.h>

 DECLARE_ALIGNED(16, float,  ff_aac_kbd_long_1024[1024]);
 DECLARE_ALIGNED(16, float,  ff_aac_kbd_short_128[128]);
 DECLARE_ALIGNED(16, float,  ff_aac_kbd_long_1024)[1024];
 DECLARE_ALIGNED(16, float,  ff_aac_kbd_short_128)[128];

 const uint8_t ff_aac_num_swb_1024[] = {
    41, 41, 47, 49, 49, 51, 47, 47, 43, 43, 43, 40, 40
@@ -409,7 +409,7 @@ const uint16_t ff_aac_spectral_sizes[11] = {
 * 64.0f is a special value indicating the existence of an escape code in the
 * bitstream.
 */
 static const DECLARE_ALIGNED_16(float, codebook_vector0[324]) = {
 static const DECLARE_ALIGNED_16(float, codebook_vector0)[324] = {
 -1.0000000, -1.0000000, -1.0000000, -1.0000000,
 -1.0000000, -1.0000000, -1.0000000,  0.0000000,
 -1.0000000, -1.0000000, -1.0000000,  1.0000000,
@@ -493,7 +493,7 @@ static const DECLARE_ALIGNED_16(float, codebook_vector0[324]) = {
  1.0000000,  1.0000000,  1.0000000,  1.0000000,
 };

 static const DECLARE_ALIGNED_16(float, codebook_vector2[324]) = {
 static const DECLARE_ALIGNED_16(float, codebook_vector2)[324] = {
  0.0000000,  0.0000000,  0.0000000,  0.0000000,
  0.0000000,  0.0000000,  0.0000000,  1.0000000,
  0.0000000,  0.0000000,  0.0000000,  2.5198421,
@@ -577,7 +577,7 @@ static const DECLARE_ALIGNED_16(float, codebook_vector2[324]) = {
  2.5198421,  2.5198421,  2.5198421,  2.5198421,
 };

 static const DECLARE_ALIGNED_16(float, codebook_vector4[162]) = {
 static const DECLARE_ALIGNED_16(float, codebook_vector4)[162] = {
 -6.3496042, -6.3496042, -6.3496042, -4.3267487,
 -6.3496042, -2.5198421, -6.3496042, -1.0000000,
 -6.3496042,  0.0000000, -6.3496042,  1.0000000,
@@ -621,7 +621,7 @@ static const DECLARE_ALIGNED_16(float, codebook_vector4[162]) = {
  6.3496042,  6.3496042,
 };

 static const DECLARE_ALIGNED_16(float, codebook_vector6[128]) = {
 static const DECLARE_ALIGNED_16(float, codebook_vector6)[128] = {
  0.0000000,  0.0000000,  0.0000000,  1.0000000,
  0.0000000,  2.5198421,  0.0000000,  4.3267487,
  0.0000000,  6.3496042,  0.0000000,  8.5498797,
@@ -656,7 +656,7 @@ static const DECLARE_ALIGNED_16(float, codebook_vector6[128]) = {
 13.3905183, 10.9027236, 13.3905183, 13.3905183,
 };

 static const DECLARE_ALIGNED_16(float, codebook_vector8[338]) = {
 static const DECLARE_ALIGNED_16(float, codebook_vector8)[338] = {
  0.0000000,  0.0000000,  0.0000000,  1.0000000,
  0.0000000,  2.5198421,  0.0000000,  4.3267487,
  0.0000000,  6.3496042,  0.0000000,  8.5498797,
@@ -744,7 +744,7 @@ static const DECLARE_ALIGNED_16(float, codebook_vector8[338]) = {
 27.4731418, 27.4731418,
 };

 static const DECLARE_ALIGNED_16(float, codebook_vector10[578]) = {
 static const DECLARE_ALIGNED_16(float, codebook_vector10)[578] = {
  0.0000000,  0.0000000,  0.0000000,  1.0000000,
  0.0000000,  2.5198421,  0.0000000,  4.3267487,
  0.0000000,  6.3496042,  0.0000000,  8.5498797,
--- a/libavcodec/aactab.h
+++ b/libavcodec/aactab.h
@@ -43,8 +43,8 @@
 /* @name window coefficients
 * @{
 */
 DECLARE_ALIGNED(16, extern float,  ff_aac_kbd_long_1024[1024]);
 DECLARE_ALIGNED(16, extern float,  ff_aac_kbd_short_128[128]);
 DECLARE_ALIGNED(16, extern float,  ff_aac_kbd_long_1024)[1024];
 DECLARE_ALIGNED(16, extern float,  ff_aac_kbd_short_128)[128];
 // @}

 /* @name number of scalefactor window bands for long and short transform windows respectively
--- a/libavcodec/ac3dec.h
+++ b/libavcodec/ac3dec.h
@@ -157,12 +157,12 @@ typedef struct {
 ///@}

 ///@defgroup arrays aligned arrays
    DECLARE_ALIGNED_16(int,   fixed_coeffs[AC3_MAX_CHANNELS][AC3_MAX_COEFS]);       ///> fixed-point transform coefficients
    DECLARE_ALIGNED_16(float, transform_coeffs[AC3_MAX_CHANNELS][AC3_MAX_COEFS]);   ///< transform coefficients
    DECLARE_ALIGNED_16(float, delay[AC3_MAX_CHANNELS][AC3_BLOCK_SIZE]);             ///< delay - added to the next block
    DECLARE_ALIGNED_16(float, window[AC3_BLOCK_SIZE]);                              ///< window coefficients
    DECLARE_ALIGNED_16(float, tmp_output[AC3_BLOCK_SIZE]);                          ///< temporary storage for output before windowing
    DECLARE_ALIGNED_16(float, output[AC3_MAX_CHANNELS][AC3_BLOCK_SIZE]);            ///< output after imdct transform and windowing
    DECLARE_ALIGNED_16(int,   fixed_coeffs)[AC3_MAX_CHANNELS][AC3_MAX_COEFS];       ///> fixed-point transform coefficients
    DECLARE_ALIGNED_16(float, transform_coeffs)[AC3_MAX_CHANNELS][AC3_MAX_COEFS];   ///< transform coefficients
    DECLARE_ALIGNED_16(float, delay)[AC3_MAX_CHANNELS][AC3_BLOCK_SIZE];             ///< delay - added to the next block
    DECLARE_ALIGNED_16(float, window)[AC3_BLOCK_SIZE];                              ///< window coefficients
    DECLARE_ALIGNED_16(float, tmp_output)[AC3_BLOCK_SIZE];                          ///< temporary storage for output before windowing
    DECLARE_ALIGNED_16(float, output)[AC3_MAX_CHANNELS][AC3_BLOCK_SIZE];            ///< output after imdct transform and windowing
 ///@}
 } AC3DecodeContext;

--- a/libavcodec/asv1.c
+++ b/libavcodec/asv1.c
@@ -48,7 +48,7 @@ typedef struct ASV1Context{
    int mb_height;
    int mb_width2;
    int mb_height2;
    DECLARE_ALIGNED_16(DCTELEM, block[6][64]);
    DECLARE_ALIGNED_16(DCTELEM, block)[6][64];
    uint16_t intra_matrix[64];
    int q_intra_matrix[64];
    uint8_t *bitstream_buffer;
--- a/libavcodec/atrac1.c
+++ b/libavcodec/atrac1.c
@@ -58,11 +58,11 @@ typedef struct {
    int                 log2_block_count[AT1_QMF_BANDS];    ///< log2 number of blocks in a band
    int                 num_bfus;                           ///< number of Block Floating Units
    float*              spectrum[2];
    DECLARE_ALIGNED_16(float, spec1[AT1_SU_SAMPLES]);       ///< mdct buffer
    DECLARE_ALIGNED_16(float, spec2[AT1_SU_SAMPLES]);       ///< mdct buffer
    DECLARE_ALIGNED_16(float, fst_qmf_delay[46]);           ///< delay line for the 1st stacked QMF filter
    DECLARE_ALIGNED_16(float, snd_qmf_delay[46]);           ///< delay line for the 2nd stacked QMF filter
    DECLARE_ALIGNED_16(float, last_qmf_delay[256+23]);      ///< delay line for the last stacked QMF filter
    DECLARE_ALIGNED_16(float, spec1)[AT1_SU_SAMPLES];     ///< mdct buffer
    DECLARE_ALIGNED_16(float, spec2)[AT1_SU_SAMPLES];     ///< mdct buffer
    DECLARE_ALIGNED_16(float, fst_qmf_delay)[46];         ///< delay line for the 1st stacked QMF filter
    DECLARE_ALIGNED_16(float, snd_qmf_delay)[46];         ///< delay line for the 2nd stacked QMF filter
    DECLARE_ALIGNED_16(float, last_qmf_delay)[256+23];    ///< delay line for the last stacked QMF filter
 } AT1SUCtx;

 /**
@@ -70,13 +70,13 @@ typedef struct {
 */
 typedef struct {
    AT1SUCtx            SUs[AT1_MAX_CHANNELS];              ///< channel sound unit
    DECLARE_ALIGNED_16(float, spec[AT1_SU_SAMPLES]);        ///< the mdct spectrum buffer
    DECLARE_ALIGNED_16(float, spec)[AT1_SU_SAMPLES];      ///< the mdct spectrum buffer

    DECLARE_ALIGNED_16(float,  low[256]);
    DECLARE_ALIGNED_16(float,  mid[256]);
    DECLARE_ALIGNED_16(float, high[512]);
    DECLARE_ALIGNED_16(float,  low)[256];
    DECLARE_ALIGNED_16(float,  mid)[256];
    DECLARE_ALIGNED_16(float, high)[512];
    float*              bands[3];
    DECLARE_ALIGNED_16(float, out_samples[AT1_MAX_CHANNELS][AT1_SU_SAMPLES]);
    DECLARE_ALIGNED_16(float, out_samples)[AT1_MAX_CHANNELS][AT1_SU_SAMPLES];
    FFTContext          mdct_ctx[3];
    int                 channels;
    DSPContext          dsp;
--- a/libavcodec/atrac3.c
+++ b/libavcodec/atrac3.c
@@ -73,8 +73,8 @@ typedef struct {
    int               gcBlkSwitch;
    gain_block        gainBlock[2];

    DECLARE_ALIGNED_16(float, spectrum[1024]);
    DECLARE_ALIGNED_16(float, IMDCT_buf[1024]);
    DECLARE_ALIGNED_16(float, spectrum)[1024];
    DECLARE_ALIGNED_16(float, IMDCT_buf)[1024];

    float             delayBuf1[46]; ///<qmf delay buffers
    float             delayBuf2[46];
@@ -119,7 +119,7 @@ typedef struct {
    //@}
 } ATRAC3Context;

 static DECLARE_ALIGNED_16(float,mdct_window[512]);
 static DECLARE_ALIGNED_16(float,mdct_window)[512];
 static VLC              spectral_coeff_tab[7];
 static float            gain_tab1[16];
 static float            gain_tab2[31];
--- a/libavcodec/cavs.c
+++ b/libavcodec/cavs.c
@@ -73,7 +73,7 @@ static inline int get_bs(cavs_vector *mvP, cavs_vector *mvQ, int b) {
 *
 */
 void ff_cavs_filter(AVSContext *h, enum cavs_mb mb_type) {
    DECLARE_ALIGNED_8(uint8_t, bs[8]);
    DECLARE_ALIGNED_8(uint8_t, bs)[8];
    int qp_avg, alpha, beta, tc;
    int i;

--- a/libavcodec/cook.c
+++ b/libavcodec/cook.c
@@ -150,7 +150,7 @@ typedef struct cook {
    /* data buffers */

    uint8_t*            decoded_bytes_buffer;
    DECLARE_ALIGNED_16(float,mono_mdct_output[2048]);
    DECLARE_ALIGNED_16(float,mono_mdct_output)[2048];
    float               decode_buffer_1[1024];
    float               decode_buffer_2[1024];
    float               decode_buffer_0[1060]; /* static allocation for joint decode */
--- a/libavcodec/dca.c
+++ b/libavcodec/dca.c
@@ -228,16 +228,16 @@ typedef struct {

    /* Subband samples history (for ADPCM) */
    float subband_samples_hist[DCA_PRIM_CHANNELS_MAX][DCA_SUBBANDS][4];
    DECLARE_ALIGNED_16(float, subband_fir_hist[DCA_PRIM_CHANNELS_MAX][512]);
    DECLARE_ALIGNED_16(float, subband_fir_hist)[DCA_PRIM_CHANNELS_MAX][512];
    float subband_fir_noidea[DCA_PRIM_CHANNELS_MAX][32];
    int hist_index[DCA_PRIM_CHANNELS_MAX];
    DECLARE_ALIGNED_16(float, raXin[32]);
    DECLARE_ALIGNED_16(float, raXin)[32];

    int output;                 ///< type of output
    float add_bias;             ///< output bias
    float scale_bias;           ///< output scale

    DECLARE_ALIGNED_16(float, samples[1536]);  /* 6 * 256 = 1536, might only need 5 */
    DECLARE_ALIGNED_16(float, samples)[1536];  /* 6 * 256 = 1536, might only need 5 */
    const float *samples_chanptr[6];

    uint8_t dca_buffer[DCA_MAX_FRAME_SIZE];
--- a/libavcodec/dct-test.c
+++ b/libavcodec/dct-test.c
@@ -186,9 +186,9 @@ static void idct_mmx_init(void)
    }
 }

 DECLARE_ALIGNED(16, static DCTELEM, block[64]);
 DECLARE_ALIGNED(8, static DCTELEM, block1[64]);
 DECLARE_ALIGNED(8, static DCTELEM, block_org[64]);
 DECLARE_ALIGNED(16, static DCTELEM, block)[64];
 DECLARE_ALIGNED(8, static DCTELEM, block1)[64];
 DECLARE_ALIGNED(8, static DCTELEM, block_org)[64];

 static inline void mmx_emms(void)
 {
@@ -384,8 +384,8 @@ static void dct_error(const char *name, int is_idct,
 #endif
 }

 DECLARE_ALIGNED(8, static uint8_t, img_dest[64]);
 DECLARE_ALIGNED(8, static uint8_t, img_dest1[64]);
 DECLARE_ALIGNED(8, static uint8_t, img_dest)[64];
 DECLARE_ALIGNED(8, static uint8_t, img_dest1)[64];

 static void idct248_ref(uint8_t *dest, int linesize, int16_t *block)
 {
--- a/libavcodec/dnxhddec.c
+++ b/libavcodec/dnxhddec.c
@@ -39,7 +39,7 @@ typedef struct {
    VLC ac_vlc, dc_vlc, run_vlc;
    int last_dc[3];
    DSPContext dsp;
    DECLARE_ALIGNED_16(DCTELEM, blocks[8][64]);
    DECLARE_ALIGNED_16(DCTELEM, blocks)[8][64];
    ScanTable scantable;
    const CIDEntry *cid_table;
 } DNXHDContext;
--- a/libavcodec/dnxhdenc.c
+++ b/libavcodec/dnxhdenc.c
@@ -414,7 +414,7 @@ static int dnxhd_calc_bits_thread(AVCodecContext *avctx, void *arg, int jobnr, i
        dnxhd_get_blocks(ctx, mb_x, mb_y);

        for (i = 0; i < 8; i++) {
            DECLARE_ALIGNED_16(DCTELEM, block[64]);
            DECLARE_ALIGNED_16(DCTELEM, block)[64];
            DCTELEM *src_block = ctx->blocks[i];
            int overflow, nbits, diff, last_index;
            int n = dnxhd_switch_matrix(ctx, i);
--- a/libavcodec/dnxhdenc.h
+++ b/libavcodec/dnxhdenc.h
@@ -55,7 +55,7 @@ typedef struct DNXHDEncContext {
    int interlaced;
    int cur_field;

    DECLARE_ALIGNED_16(DCTELEM, blocks[8][64]);
    DECLARE_ALIGNED_16(DCTELEM, blocks)[8][64];

    int      (*qmatrix_c)     [64];
    int      (*qmatrix_l)     [64];
--- a/libavcodec/dsputil.c
+++ b/libavcodec/dsputil.c
@@ -87,7 +87,7 @@ const uint8_t ff_zigzag248_direct[64] = {
 };

 /* not permutated inverse zigzag_direct + 1 for MMX quantizer */
 DECLARE_ALIGNED_16(uint16_t, inv_zigzag_direct16[64]);
 DECLARE_ALIGNED_8(uint16_t, inv_zigzag_direct16)[64];

 const uint8_t ff_alternate_horizontal_scan[64] = {
    0,  1,   2,  3,  8,  9, 16, 17,
@@ -3788,7 +3788,7 @@ static int hadamard8_intra8x8_c(/*MpegEncContext*/ void *s, uint8_t *src, uint8_

 static int dct_sad8x8_c(/*MpegEncContext*/ void *c, uint8_t *src1, uint8_t *src2, int stride, int h){
    MpegEncContext * const s= (MpegEncContext *)c;
    DECLARE_ALIGNED_16(uint64_t, aligned_temp[sizeof(DCTELEM)*64/8]);
    DECLARE_ALIGNED_16(uint64_t, aligned_temp)[sizeof(DCTELEM)*64/8];
    DCTELEM * const temp= (DCTELEM*)aligned_temp;

    assert(h==8);
@@ -3853,7 +3853,7 @@ static int dct264_sad8x8_c(/*MpegEncContext*/ void *c, uint8_t *src1, uint8_t *s

 static int dct_max8x8_c(/*MpegEncContext*/ void *c, uint8_t *src1, uint8_t *src2, int stride, int h){
    MpegEncContext * const s= (MpegEncContext *)c;
    DECLARE_ALIGNED_16(uint64_t, aligned_temp[sizeof(DCTELEM)*64/8]);
    DECLARE_ALIGNED_16(uint64_t, aligned_temp)[sizeof(DCTELEM)*64/8];
    DCTELEM * const temp= (DCTELEM*)aligned_temp;
    int sum=0, i;

@@ -3870,7 +3870,7 @@ static int dct_max8x8_c(/*MpegEncContext*/ void *c, uint8_t *src1, uint8_t *src2

 static int quant_psnr8x8_c(/*MpegEncContext*/ void *c, uint8_t *src1, uint8_t *src2, int stride, int h){
    MpegEncContext * const s= (MpegEncContext *)c;
    DECLARE_ALIGNED_16(uint64_t, aligned_temp[sizeof(DCTELEM)*64*2/8]);
    DECLARE_ALIGNED_16(uint64_t, aligned_temp)[sizeof(DCTELEM)*64*2/8];
    DCTELEM * const temp= (DCTELEM*)aligned_temp;
    DCTELEM * const bak = ((DCTELEM*)aligned_temp)+64;
    int sum=0, i;
@@ -3895,9 +3895,9 @@ static int quant_psnr8x8_c(/*MpegEncContext*/ void *c, uint8_t *src1, uint8_t *s
 static int rd8x8_c(/*MpegEncContext*/ void *c, uint8_t *src1, uint8_t *src2, int stride, int h){
    MpegEncContext * const s= (MpegEncContext *)c;
    const uint8_t *scantable= s->intra_scantable.permutated;
    DECLARE_ALIGNED_16(uint64_t, aligned_temp[sizeof(DCTELEM)*64/8]);
    DECLARE_ALIGNED_16(uint64_t, aligned_src1[8]);
    DECLARE_ALIGNED_16(uint64_t, aligned_src2[8]);
    DECLARE_ALIGNED_16(uint64_t, aligned_temp)[sizeof(DCTELEM)*64/8];
    DECLARE_ALIGNED_16(uint64_t, aligned_src1)[8];
    DECLARE_ALIGNED_16(uint64_t, aligned_src2)[8];
    DCTELEM * const temp= (DCTELEM*)aligned_temp;
    uint8_t * const lsrc1 = (uint8_t*)aligned_src1;
    uint8_t * const lsrc2 = (uint8_t*)aligned_src2;
@@ -3974,7 +3974,7 @@ static int rd8x8_c(/*MpegEncContext*/ void *c, uint8_t *src1, uint8_t *src2, int
 static int bit8x8_c(/*MpegEncContext*/ void *c, uint8_t *src1, uint8_t *src2, int stride, int h){
    MpegEncContext * const s= (MpegEncContext *)c;
    const uint8_t *scantable= s->intra_scantable.permutated;
    DECLARE_ALIGNED_16(uint64_t, aligned_temp[sizeof(DCTELEM)*64/8]);
    DECLARE_ALIGNED_16(uint64_t, aligned_temp)[sizeof(DCTELEM)*64/8];
    DCTELEM * const temp= (DCTELEM*)aligned_temp;
    int i, last, run, bits, level, start_i;
    const int esc_length= s->ac_esc_length;
--- a/libavcodec/dsputil.h
+++ b/libavcodec/dsputil.h
@@ -178,7 +178,7 @@ typedef struct ScanTable{
    uint8_t raster_end[64];
 #if ARCH_PPC
                /** Used by dct_quantize_altivec to find last-non-zero */
    DECLARE_ALIGNED(16, uint8_t, inverse[64]);
    DECLARE_ALIGNED(16, uint8_t, inverse)[64];
 #endif
 } ScanTable;

@@ -656,8 +656,8 @@ void dsputil_init_ppc(DSPContext* c, AVCodecContext *avctx);
 void dsputil_init_sh4(DSPContext* c, AVCodecContext *avctx);
 void dsputil_init_vis(DSPContext* c, AVCodecContext *avctx);

 #define DECLARE_ALIGNED_16(t, v) DECLARE_ALIGNED(16, t, v)
 #define DECLARE_ALIGNED_8(t, v)  DECLARE_ALIGNED(8, t, v)
 #define DECLARE_ALIGNED_16(t, v, ...) DECLARE_ALIGNED(16, t, v)
 #define DECLARE_ALIGNED_8(t, v, ...)  DECLARE_ALIGNED(8, t, v)

 #if HAVE_MMX

@@ -749,11 +749,11 @@ typedef struct FFTContext {
 #endif

 #define COSTABLE(size) \
    COSTABLE_CONST DECLARE_ALIGNED_16(FFTSample, ff_cos_##size[size/2])
    COSTABLE_CONST DECLARE_ALIGNED_16(FFTSample, ff_cos_##size)[size/2]
 #define SINTABLE(size) \
    SINTABLE_CONST DECLARE_ALIGNED_16(FFTSample, ff_sin_##size[size/2])
    SINTABLE_CONST DECLARE_ALIGNED_16(FFTSample, ff_sin_##size)[size/2]
 #define SINETABLE(size) \
    SINETABLE_CONST DECLARE_ALIGNED_16(float, ff_sine_##size[size])
    SINETABLE_CONST DECLARE_ALIGNED_16(float, ff_sine_##size)[size]
 extern COSTABLE(16);
 extern COSTABLE(32);
 extern COSTABLE(64);
--- a/libavcodec/dv.c
+++ b/libavcodec/dv.c
@@ -532,9 +532,9 @@ static int dv_decode_video_segment(AVCodecContext *avctx, void *arg)
    PutBitContext pb, vs_pb;
    GetBitContext gb;
    BlockInfo mb_data[5 * DV_MAX_BPM], *mb, *mb1;
    DECLARE_ALIGNED_16(DCTELEM, sblock[5*DV_MAX_BPM][64]);
    DECLARE_ALIGNED_16(uint8_t, mb_bit_buffer[80 + 4]); /* allow some slack */
    DECLARE_ALIGNED_16(uint8_t, vs_bit_buffer[5 * 80 + 4]); /* allow some slack */
    DECLARE_ALIGNED_16(DCTELEM, sblock)[5*DV_MAX_BPM][64];
    DECLARE_ALIGNED_16(uint8_t, mb_bit_buffer)[80 + 4]; /* allow some slack */
    DECLARE_ALIGNED_16(uint8_t, vs_bit_buffer)[5 * 80 + 4]; /* allow some slack */
    const int log2_blocksize = 3-s->avctx->lowres;
    int is_field_mode[5];

@@ -833,7 +833,7 @@ static av_always_inline int dv_init_enc_block(EncBlockInfo* bi, uint8_t *data, i
 {
    const int *weight;
    const uint8_t* zigzag_scan;
    DECLARE_ALIGNED_16(DCTELEM, blk[64]);
    DECLARE_ALIGNED_16(DCTELEM, blk)[64];
    int i, area;
    /* We offer two different methods for class number assignment: the
       method suggested in SMPTE 314M Table 22, and an improved
--- a/libavcodec/eamad.c
+++ b/libavcodec/eamad.c
@@ -46,7 +46,7 @@ typedef struct MadContext {
    AVFrame last_frame;
    void *bitstream_buf;
    unsigned int bitstream_buf_size;
    DECLARE_ALIGNED_16(DCTELEM, block[64]);
    DECLARE_ALIGNED_16(DCTELEM, block)[64];
 } MadContext;

 static void bswap16_buf(uint16_t *dst, const uint16_t *src, int count)
--- a/libavcodec/eatgq.c
+++ b/libavcodec/eatgq.c
@@ -42,7 +42,7 @@ typedef struct TgqContext {
    int width,height;
    ScanTable scantable;
    int qtable[64];
    DECLARE_ALIGNED_16(DCTELEM, block[6][64]);
    DECLARE_ALIGNED_16(DCTELEM, block)[6][64];
 } TgqContext;

 static av_cold int tgq_decode_init(AVCodecContext *avctx){
--- a/libavcodec/eatqi.c
+++ b/libavcodec/eatqi.c
@@ -40,7 +40,7 @@ typedef struct TqiContext {
    AVFrame frame;
    void *bitstream_buf;
    unsigned int bitstream_buf_size;
    DECLARE_ALIGNED_16(DCTELEM, block[6][64]);
    DECLARE_ALIGNED_16(DCTELEM, block)[6][64];
 } TqiContext;

 static av_cold int tqi_decode_init(AVCodecContext *avctx)
--- a/libavcodec/h264.h
+++ b/libavcodec/h264.h
@@ -299,7 +299,7 @@ typedef struct H264Context{
     * non zero coeff count cache.
     * is 64 if not available.
     */
    DECLARE_ALIGNED_8(uint8_t, non_zero_count_cache[6*8]);
    DECLARE_ALIGNED_8(uint8_t, non_zero_count_cache)[6*8];

    /*
    .UU.YYYY
@@ -312,8 +312,8 @@ typedef struct H264Context{
    /**
     * Motion vector cache.
     */
    DECLARE_ALIGNED_8(int16_t, mv_cache[2][5*8][2]);
    DECLARE_ALIGNED_8(int8_t, ref_cache[2][5*8]);
    DECLARE_ALIGNED_8(int16_t, mv_cache)[2][5*8][2];
    DECLARE_ALIGNED_8(int8_t, ref_cache)[2][5*8];
 #define LIST_NOT_USED -1 //FIXME rename?
 #define PART_NOT_AVAILABLE -2

@@ -377,7 +377,7 @@ typedef struct H264Context{
    int mb_field_decoding_flag;
    int mb_mbaff;              ///< mb_aff_frame && mb_field_decoding_flag

    DECLARE_ALIGNED_8(uint16_t, sub_mb_type[4]);
    DECLARE_ALIGNED_8(uint16_t, sub_mb_type)[4];

    //POC stuff
    int poc_lsb;
@@ -456,7 +456,7 @@ typedef struct H264Context{
    GetBitContext *intra_gb_ptr;
    GetBitContext *inter_gb_ptr;

    DECLARE_ALIGNED_16(DCTELEM, mb[16*24]);
    DECLARE_ALIGNED_16(DCTELEM, mb)[16*24];
    DCTELEM mb_padding[256];        ///< as mb is addressed by scantable[i] and scantable is uint8_t we can either check that i is not too large or ensure that there is some unused stuff after mb

    /**
@@ -475,7 +475,7 @@ typedef struct H264Context{
    uint8_t     *chroma_pred_mode_table;
    int         last_qscale_diff;
    int16_t     (*mvd_table[2])[2];
    DECLARE_ALIGNED_8(int16_t, mvd_cache[2][5*8][2]);
    DECLARE_ALIGNED_8(int16_t, mvd_cache)[2][5*8][2];
    uint8_t     *direct_table;
    uint8_t     direct_cache[5*8];

--- a/libavcodec/h264_cabac.c
+++ b/libavcodec/h264_cabac.c
@@ -1041,7 +1041,7 @@ static av_always_inline int get_cabac_cbf_ctx( H264Context *h, int cat, int idx,
    return ctx + 4 * cat;
 }

 DECLARE_ASM_CONST(1, uint8_t, last_coeff_flag_offset_8x8[63]) = {
 DECLARE_ASM_CONST(1, uint8_t, last_coeff_flag_offset_8x8)[63] = {
    0, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
    2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2,
    3, 3, 3, 3, 3, 3, 3, 3, 4, 4, 4, 4, 4, 4, 4, 4,
--- a/libavcodec/h264_loopfilter.c
+++ b/libavcodec/h264_loopfilter.c
@@ -372,7 +372,7 @@ void ff_h264_filter_mb_fast( H264Context *h, int mb_x, int mb_y, uint8_t *img_y,
        filter_mb_edgech( &img_cr[2*2*uvlinesize], uvlinesize, bS3, qpc, h);
        return;
    } else {
        DECLARE_ALIGNED_8(int16_t, bS[2][4][4]);
        DECLARE_ALIGNED_8(int16_t, bS)[2][4][4];
        uint64_t (*bSv)[4] = (uint64_t(*)[4])bS;
        int edges;
        if( IS_8x8DCT(mb_type) && (h->cbp&7) == 7 ) {
@@ -457,7 +457,7 @@ static av_always_inline void filter_mb_dir(H264Context *h, int mb_x, int mb_y, u
        int j;

        for(j=0; j<2; j++, mbn_xy += s->mb_stride){
            DECLARE_ALIGNED_8(int16_t, bS[4]);
            DECLARE_ALIGNED_8(int16_t, bS)[4];
            int qp;
            if( IS_INTRA(mb_type|s->current_picture.mb_type[mbn_xy]) ) {
                *(uint64_t*)bS= 0x0003000300030003ULL;
@@ -488,7 +488,7 @@ static av_always_inline void filter_mb_dir(H264Context *h, int mb_x, int mb_y, u
        /* mbn_xy: neighbor macroblock */
        const int mbn_xy = edge > 0 ? mb_xy : mbm_xy;
        const int mbn_type = s->current_picture.mb_type[mbn_xy];
        DECLARE_ALIGNED_8(int16_t, bS[4]);
        DECLARE_ALIGNED_8(int16_t, bS)[4];
        int qp;

        if( (edge&1) && IS_8x8DCT(mb_type) )
@@ -632,7 +632,7 @@ void ff_h264_filter_mb( H264Context *h, int mb_x, int mb_y, uint8_t *img_y, uint
         */
        const int pair_xy = mb_x + (mb_y&~1)*s->mb_stride;
        const int left_mb_xy[2] = { pair_xy-1, pair_xy-1+s->mb_stride };
        DECLARE_ALIGNED_8(int16_t, bS[8]);
        DECLARE_ALIGNED_8(int16_t, bS)[8];
        int qp[2];
        int bqp[2];
        int rqp[2];
--- a/libavcodec/imc.c
+++ b/libavcodec/imc.c
@@ -84,8 +84,8 @@ typedef struct {

    DSPContext dsp;
    FFTContext fft;
    DECLARE_ALIGNED_16(FFTComplex, samples[COEFFS/2]);
    DECLARE_ALIGNED_16(float, out_samples[COEFFS]);
    DECLARE_ALIGNED_16(FFTComplex, samples)[COEFFS/2];
    DECLARE_ALIGNED_16(float, out_samples)[COEFFS];
 } IMCContext;

 static VLC huffman_vlc[4][4];
--- a/libavcodec/ituh263dec.c
+++ b/libavcodec/ituh263dec.c
@@ -563,7 +563,7 @@ not_coded:

 static int h263_skip_b_part(MpegEncContext *s, int cbp)
 {
    DECLARE_ALIGNED(16, DCTELEM, dblock[64]);
    DECLARE_ALIGNED(16, DCTELEM, dblock)[64];
    int i, mbi;

    /* we have to set s->mb_intra to zero to decode B-part of PB-frame correctly
--- a/libavcodec/mdec.c
+++ b/libavcodec/mdec.c
@@ -44,7 +44,7 @@ typedef struct MDECContext{
    int mb_width;
    int mb_height;
    int mb_x, mb_y;
    DECLARE_ALIGNED_16(DCTELEM, block[6][64]);
    DECLARE_ALIGNED_16(DCTELEM, block)[6][64];
    uint8_t *bitstream_buffer;
    unsigned int bitstream_buffer_size;
    int block_last_index[6];
--- a/libavcodec/mimic.c
+++ b/libavcodec/mimic.c
@@ -45,7 +45,7 @@ typedef struct {
    AVFrame         buf_ptrs    [16];
    AVPicture       flipped_ptrs[16];

    DECLARE_ALIGNED_16(DCTELEM, dct_block[64]);
    DECLARE_ALIGNED_16(DCTELEM, dct_block)[64];

    GetBitContext   gb;
    ScanTable       scantable;
--- a/libavcodec/mjpegdec.h
+++ b/libavcodec/mjpegdec.h
@@ -84,7 +84,7 @@ typedef struct MJpegDecodeContext {
    int got_picture;                                ///< we found a SOF and picture is valid, too.
    int linesize[MAX_COMPONENTS];                   ///< linesize << interlaced
    int8_t *qscale_table;
    DECLARE_ALIGNED_16(DCTELEM, block[64]);
    DECLARE_ALIGNED_16(DCTELEM, block)[64];
    DCTELEM (*blocks[MAX_COMPONENTS])[64]; ///< intermediate sums (progressive mode)
    uint8_t *last_nnz[MAX_COMPONENTS];
    uint64_t coefs_finished[MAX_COMPONENTS]; ///< bitmask of which coefs have been completely decoded (progressive mode)
--- a/libavcodec/mpc.h
+++ b/libavcodec/mpc.h
@@ -65,9 +65,9 @@ typedef struct {
    AVLFG rnd;
    int frames_to_skip;
    /* for synthesis */
    DECLARE_ALIGNED_16(MPA_INT, synth_buf[MPA_MAX_CHANNELS][512*2]);
    DECLARE_ALIGNED_16(MPA_INT, synth_buf)[MPA_MAX_CHANNELS][512*2];
    int synth_buf_offset[MPA_MAX_CHANNELS];
    DECLARE_ALIGNED_16(int32_t, sb_samples[MPA_MAX_CHANNELS][36][SBLIMIT]);
    DECLARE_ALIGNED_16(int32_t, sb_samples)[MPA_MAX_CHANNELS][36][SBLIMIT];
 } MPCContext;

 void ff_mpc_init(void);
--- a/libavcodec/mpegaudio.h
+++ b/libavcodec/mpegaudio.h
@@ -132,9 +132,9 @@ typedef struct MPADecodeContext {
    uint32_t free_format_next_header;
    GetBitContext gb;
    GetBitContext in_gb;
    DECLARE_ALIGNED_16(MPA_INT, synth_buf[MPA_MAX_CHANNELS][512 * 2]);
    DECLARE_ALIGNED_16(MPA_INT, synth_buf)[MPA_MAX_CHANNELS][512 * 2];
    int synth_buf_offset[MPA_MAX_CHANNELS];
    DECLARE_ALIGNED_16(int32_t, sb_samples[MPA_MAX_CHANNELS][36][SBLIMIT]);
    DECLARE_ALIGNED_16(int32_t, sb_samples)[MPA_MAX_CHANNELS][36][SBLIMIT];
    int32_t mdct_buf[MPA_MAX_CHANNELS][SBLIMIT * 18]; /* previous samples, for layer 3 MDCT */
    GranuleDef granules[2][2]; /* Used in Layer 3 */
 #ifdef DEBUG
--- a/libavcodec/mpegaudiodec.c
+++ b/libavcodec/mpegaudiodec.c
@@ -95,7 +95,7 @@ static const int32_t scale_factor_mult2[3][3] = {
    SCALE_GEN(4.0 / 9.0), /* 9 steps */
 };

 DECLARE_ALIGNED_16(MPA_INT, ff_mpa_synth_window[512]);
 DECLARE_ALIGNED_16(MPA_INT, ff_mpa_synth_window)[512];

 /**
 * Convert region offsets to region sizes and truncate
--- a/libavcodec/mpegvideo_enc.c
+++ b/libavcodec/mpegvideo_enc.c
@@ -3311,7 +3311,7 @@ static int dct_quantize_refine(MpegEncContext *s, //FIXME breaks denoise?
                        DCTELEM *block, int16_t *weight, DCTELEM *orig,
                        int n, int qscale){
    int16_t rem[64];
    DECLARE_ALIGNED_16(DCTELEM, d1[64]);
    DECLARE_ALIGNED_16(DCTELEM, d1)[64];
    const uint8_t *scantable= s->intra_scantable.scantable;
    const uint8_t *perm_scantable= s->intra_scantable.permutated;
 //    unsigned int threshold1, threshold2;
--- a/libavcodec/nellymoserdec.c
+++ b/libavcodec/nellymoserdec.c
@@ -43,7 +43,7 @@

 typedef struct NellyMoserDecodeContext {
    AVCodecContext* avctx;
    DECLARE_ALIGNED_16(float,float_buf[NELLY_SAMPLES]);
    DECLARE_ALIGNED_16(float,float_buf)[NELLY_SAMPLES];
    float           state[128];
    AVLFG           random_state;
    GetBitContext   gb;
@@ -51,7 +51,7 @@ typedef struct NellyMoserDecodeContext {
    float           scale_bias;
    DSPContext      dsp;
    FFTContext      imdct_ctx;
    DECLARE_ALIGNED_16(float,imdct_out[NELLY_BUF_LEN * 2]);
    DECLARE_ALIGNED_16(float,imdct_out)[NELLY_BUF_LEN * 2];
 } NellyMoserDecodeContext;

 static void overlap_and_window(NellyMoserDecodeContext *s, float *state, float *audio, float *a_in)
--- a/libavcodec/nellymoserenc.c
+++ b/libavcodec/nellymoserenc.c
@@ -53,9 +53,9 @@ typedef struct NellyMoserEncodeContext {
    int             have_saved;
    DSPContext      dsp;
    FFTContext      mdct_ctx;
    DECLARE_ALIGNED_16(float, mdct_out[NELLY_SAMPLES]);
    DECLARE_ALIGNED_16(float, in_buff[NELLY_SAMPLES]);
    DECLARE_ALIGNED_16(float, buf[2][3 * NELLY_BUF_LEN]);     ///< sample buffer
    DECLARE_ALIGNED_16(float, mdct_out)[NELLY_SAMPLES];
    DECLARE_ALIGNED_16(float, in_buff)[NELLY_SAMPLES];
    DECLARE_ALIGNED_16(float, buf)[2][3 * NELLY_BUF_LEN];     ///< sample buffer
    float           (*opt )[NELLY_BANDS];
    uint8_t         (*path)[NELLY_BANDS];
 } NellyMoserEncodeContext;
--- a/libavcodec/ppc/float_altivec.c
+++ b/libavcodec/ppc/float_altivec.c
@@ -226,7 +226,7 @@ float_to_int16_interleave_altivec(int16_t *dst, const float **src,
            dst+=8;
        }
    } else {
        DECLARE_ALIGNED(16, int16_t, tmp[len]);
        DECLARE_ALIGNED(16, int16_t, tmp)[len];
        int c, j;
        for (c = 0; c < channels; c++) {
            float_to_int16_altivec(tmp, src[c], len);
--- a/libavcodec/ppc/gmc_altivec.c
+++ b/libavcodec/ppc/gmc_altivec.c
@@ -34,7 +34,7 @@ void gmc1_altivec(uint8_t *dst /* align 8 */, uint8_t *src /* align1 */, int str
 {
 POWERPC_PERF_DECLARE(altivec_gmc1_num, GMC1_PERF_COND);
    const DECLARE_ALIGNED_16(unsigned short, rounder_a) = rounder;
    const DECLARE_ALIGNED_16(unsigned short, ABCD[8]) =
    const DECLARE_ALIGNED_16(unsigned short, ABCD)[8] =
        {
            (16-x16)*(16-y16), /* A */
            (   x16)*(16-y16), /* B */
--- a/libavcodec/ppc/h264_altivec.c
+++ b/libavcodec/ppc/h264_altivec.c
@@ -79,7 +79,7 @@ static void OPNAME ## h264_qpel ## SIZE ## _mc00_ ## CODETYPE (uint8_t *dst, uin
 }\
 \
 static void OPNAME ## h264_qpel ## SIZE ## _mc10_ ## CODETYPE(uint8_t *dst, uint8_t *src, int stride){ \
    DECLARE_ALIGNED_16(uint8_t, half[SIZE*SIZE]);\
    DECLARE_ALIGNED_16(uint8_t, half)[SIZE*SIZE];\
    put_h264_qpel ## SIZE ## _h_lowpass_ ## CODETYPE(half, src, SIZE, stride);\
    OPNAME ## pixels ## SIZE ## _l2_ ## CODETYPE(dst, src, half, stride, stride, SIZE);\
 }\
@@ -89,13 +89,13 @@ static void OPNAME ## h264_qpel ## SIZE ## _mc20_ ## CODETYPE(uint8_t *dst, uint
 }\
 \
 static void OPNAME ## h264_qpel ## SIZE ## _mc30_ ## CODETYPE(uint8_t *dst, uint8_t *src, int stride){\
    DECLARE_ALIGNED_16(uint8_t, half[SIZE*SIZE]);\
    DECLARE_ALIGNED_16(uint8_t, half)[SIZE*SIZE];\
    put_h264_qpel ## SIZE ## _h_lowpass_ ## CODETYPE(half, src, SIZE, stride);\
    OPNAME ## pixels ## SIZE ## _l2_ ## CODETYPE(dst, src+1, half, stride, stride, SIZE);\
 }\
 \
 static void OPNAME ## h264_qpel ## SIZE ## _mc01_ ## CODETYPE(uint8_t *dst, uint8_t *src, int stride){\
    DECLARE_ALIGNED_16(uint8_t, half[SIZE*SIZE]);\
    DECLARE_ALIGNED_16(uint8_t, half)[SIZE*SIZE];\
    put_h264_qpel ## SIZE ## _v_lowpass_ ## CODETYPE(half, src, SIZE, stride);\
    OPNAME ## pixels ## SIZE ## _l2_ ## CODETYPE(dst, src, half, stride, stride, SIZE);\
 }\
@@ -105,79 +105,79 @@ static void OPNAME ## h264_qpel ## SIZE ## _mc02_ ## CODETYPE(uint8_t *dst, uint
 }\
 \
 static void OPNAME ## h264_qpel ## SIZE ## _mc03_ ## CODETYPE(uint8_t *dst, uint8_t *src, int stride){\
    DECLARE_ALIGNED_16(uint8_t, half[SIZE*SIZE]);\
    DECLARE_ALIGNED_16(uint8_t, half)[SIZE*SIZE];\
    put_h264_qpel ## SIZE ## _v_lowpass_ ## CODETYPE(half, src, SIZE, stride);\
    OPNAME ## pixels ## SIZE ## _l2_ ## CODETYPE(dst, src+stride, half, stride, stride, SIZE);\
 }\
 \
 static void OPNAME ## h264_qpel ## SIZE ## _mc11_ ## CODETYPE(uint8_t *dst, uint8_t *src, int stride){\
    DECLARE_ALIGNED_16(uint8_t, halfH[SIZE*SIZE]);\
    DECLARE_ALIGNED_16(uint8_t, halfV[SIZE*SIZE]);\
    DECLARE_ALIGNED_16(uint8_t, halfH)[SIZE*SIZE];\
    DECLARE_ALIGNED_16(uint8_t, halfV)[SIZE*SIZE];\
    put_h264_qpel ## SIZE ## _h_lowpass_ ## CODETYPE(halfH, src, SIZE, stride);\
    put_h264_qpel ## SIZE ## _v_lowpass_ ## CODETYPE(halfV, src, SIZE, stride);\
    OPNAME ## pixels ## SIZE ## _l2_ ## CODETYPE(dst, halfH, halfV, stride, SIZE, SIZE);\
 }\
 \
 static void OPNAME ## h264_qpel ## SIZE ## _mc31_ ## CODETYPE(uint8_t *dst, uint8_t *src, int stride){\
    DECLARE_ALIGNED_16(uint8_t, halfH[SIZE*SIZE]);\
    DECLARE_ALIGNED_16(uint8_t, halfV[SIZE*SIZE]);\
    DECLARE_ALIGNED_16(uint8_t, halfH)[SIZE*SIZE];\
    DECLARE_ALIGNED_16(uint8_t, halfV)[SIZE*SIZE];\
    put_h264_qpel ## SIZE ## _h_lowpass_ ## CODETYPE(halfH, src, SIZE, stride);\
    put_h264_qpel ## SIZE ## _v_lowpass_ ## CODETYPE(halfV, src+1, SIZE, stride);\
    OPNAME ## pixels ## SIZE ## _l2_ ## CODETYPE(dst, halfH, halfV, stride, SIZE, SIZE);\
 }\
 \
 static void OPNAME ## h264_qpel ## SIZE ## _mc13_ ## CODETYPE(uint8_t *dst, uint8_t *src, int stride){\
    DECLARE_ALIGNED_16(uint8_t, halfH[SIZE*SIZE]);\
    DECLARE_ALIGNED_16(uint8_t, halfV[SIZE*SIZE]);\
    DECLARE_ALIGNED_16(uint8_t, halfH)[SIZE*SIZE];\
    DECLARE_ALIGNED_16(uint8_t, halfV)[SIZE*SIZE];\
    put_h264_qpel ## SIZE ## _h_lowpass_ ## CODETYPE(halfH, src + stride, SIZE, stride);\
    put_h264_qpel ## SIZE ## _v_lowpass_ ## CODETYPE(halfV, src, SIZE, stride);\
    OPNAME ## pixels ## SIZE ## _l2_ ## CODETYPE(dst, halfH, halfV, stride, SIZE, SIZE);\
 }\
 \
 static void OPNAME ## h264_qpel ## SIZE ## _mc33_ ## CODETYPE(uint8_t *dst, uint8_t *src, int stride){\
    DECLARE_ALIGNED_16(uint8_t, halfH[SIZE*SIZE]);\
    DECLARE_ALIGNED_16(uint8_t, halfV[SIZE*SIZE]);\
    DECLARE_ALIGNED_16(uint8_t, halfH)[SIZE*SIZE];\
    DECLARE_ALIGNED_16(uint8_t, halfV)[SIZE*SIZE];\
    put_h264_qpel ## SIZE ## _h_lowpass_ ## CODETYPE(halfH, src + stride, SIZE, stride);\
    put_h264_qpel ## SIZE ## _v_lowpass_ ## CODETYPE(halfV, src+1, SIZE, stride);\
    OPNAME ## pixels ## SIZE ## _l2_ ## CODETYPE(dst, halfH, halfV, stride, SIZE, SIZE);\
 }\
 \
 static void OPNAME ## h264_qpel ## SIZE ## _mc22_ ## CODETYPE(uint8_t *dst, uint8_t *src, int stride){\
    DECLARE_ALIGNED_16(int16_t, tmp[SIZE*(SIZE+8)]);\
    DECLARE_ALIGNED_16(int16_t, tmp)[SIZE*(SIZE+8)];\
    OPNAME ## h264_qpel ## SIZE ## _hv_lowpass_ ## CODETYPE(dst, tmp, src, stride, SIZE, stride);\
 }\
 \
 static void OPNAME ## h264_qpel ## SIZE ## _mc21_ ## CODETYPE(uint8_t *dst, uint8_t *src, int stride){\
    DECLARE_ALIGNED_16(uint8_t, halfH[SIZE*SIZE]);\
    DECLARE_ALIGNED_16(uint8_t, halfHV[SIZE*SIZE]);\
    DECLARE_ALIGNED_16(int16_t, tmp[SIZE*(SIZE+8)]);\
    DECLARE_ALIGNED_16(uint8_t, halfH)[SIZE*SIZE];\
    DECLARE_ALIGNED_16(uint8_t, halfHV)[SIZE*SIZE];\
    DECLARE_ALIGNED_16(int16_t, tmp)[SIZE*(SIZE+8)];\
    put_h264_qpel ## SIZE ## _h_lowpass_ ## CODETYPE(halfH, src, SIZE, stride);\
    put_h264_qpel ## SIZE ## _hv_lowpass_ ## CODETYPE(halfHV, tmp, src, SIZE, SIZE, stride);\
    OPNAME ## pixels ## SIZE ## _l2_ ## CODETYPE(dst, halfH, halfHV, stride, SIZE, SIZE);\
 }\
 \
 static void OPNAME ## h264_qpel ## SIZE ## _mc23_ ## CODETYPE(uint8_t *dst, uint8_t *src, int stride){\
    DECLARE_ALIGNED_16(uint8_t, halfH[SIZE*SIZE]);\
    DECLARE_ALIGNED_16(uint8_t, halfHV[SIZE*SIZE]);\
    DECLARE_ALIGNED_16(int16_t, tmp[SIZE*(SIZE+8)]);\
    DECLARE_ALIGNED_16(uint8_t, halfH)[SIZE*SIZE];\
    DECLARE_ALIGNED_16(uint8_t, halfHV)[SIZE*SIZE];\
    DECLARE_ALIGNED_16(int16_t, tmp)[SIZE*(SIZE+8)];\
    put_h264_qpel ## SIZE ## _h_lowpass_ ## CODETYPE(halfH, src + stride, SIZE, stride);\
    put_h264_qpel ## SIZE ## _hv_lowpass_ ## CODETYPE(halfHV, tmp, src, SIZE, SIZE, stride);\
    OPNAME ## pixels ## SIZE ## _l2_ ## CODETYPE(dst, halfH, halfHV, stride, SIZE, SIZE);\
 }\
 \
 static void OPNAME ## h264_qpel ## SIZE ## _mc12_ ## CODETYPE(uint8_t *dst, uint8_t *src, int stride){\
    DECLARE_ALIGNED_16(uint8_t, halfV[SIZE*SIZE]);\
    DECLARE_ALIGNED_16(uint8_t, halfHV[SIZE*SIZE]);\
    DECLARE_ALIGNED_16(int16_t, tmp[SIZE*(SIZE+8)]);\
    DECLARE_ALIGNED_16(uint8_t, halfV)[SIZE*SIZE];\
    DECLARE_ALIGNED_16(uint8_t, halfHV)[SIZE*SIZE];\
    DECLARE_ALIGNED_16(int16_t, tmp)[SIZE*(SIZE+8)];\
    put_h264_qpel ## SIZE ## _v_lowpass_ ## CODETYPE(halfV, src, SIZE, stride);\
    put_h264_qpel ## SIZE ## _hv_lowpass_ ## CODETYPE(halfHV, tmp, src, SIZE, SIZE, stride);\
    OPNAME ## pixels ## SIZE ## _l2_ ## CODETYPE(dst, halfV, halfHV, stride, SIZE, SIZE);\
 }\
 \
 static void OPNAME ## h264_qpel ## SIZE ## _mc32_ ## CODETYPE(uint8_t *dst, uint8_t *src, int stride){\
    DECLARE_ALIGNED_16(uint8_t, halfV[SIZE*SIZE]);\
    DECLARE_ALIGNED_16(uint8_t, halfHV[SIZE*SIZE]);\
    DECLARE_ALIGNED_16(int16_t, tmp[SIZE*(SIZE+8)]);\
    DECLARE_ALIGNED_16(uint8_t, halfV)[SIZE*SIZE];\
    DECLARE_ALIGNED_16(uint8_t, halfHV)[SIZE*SIZE];\
    DECLARE_ALIGNED_16(int16_t, tmp)[SIZE*(SIZE+8)];\
    put_h264_qpel ## SIZE ## _v_lowpass_ ## CODETYPE(halfV, src+1, SIZE, stride);\
    put_h264_qpel ## SIZE ## _hv_lowpass_ ## CODETYPE(halfHV, tmp, src, SIZE, SIZE, stride);\
    OPNAME ## pixels ## SIZE ## _l2_ ## CODETYPE(dst, halfV, halfHV, stride, SIZE, SIZE);\
@@ -590,7 +590,7 @@ static void ff_h264_idct_add8_altivec(uint8_t **dest, const int *block_offset, D
 static inline void write16x4(uint8_t *dst, int dst_stride,
                             register vec_u8 r0, register vec_u8 r1,
                             register vec_u8 r2, register vec_u8 r3) {
    DECLARE_ALIGNED_16(unsigned char, result[64]);
    DECLARE_ALIGNED_16(unsigned char, result)[64];
    uint32_t *src_int = (uint32_t *)result, *dst_int = (uint32_t *)dst;
    int int_dst_stride = dst_stride/4;

@@ -770,7 +770,7 @@ static inline vec_u8 h264_deblock_q1(register vec_u8 p0,
 }

 #define h264_loop_filter_luma_altivec(p2, p1, p0, q0, q1, q2, alpha, beta, tc0) {            \
    DECLARE_ALIGNED_16(unsigned char, temp[16]);                                             \
    DECLARE_ALIGNED_16(unsigned char, temp)[16];                                             \
    register vec_u8 alphavec;                                                              \
    register vec_u8 betavec;                                                               \
    register vec_u8 mask;                                                                  \
@@ -850,7 +850,7 @@ void weight_h264_WxH_altivec(uint8_t *block, int stride, int log2_denom, int wei
    vec_u8 vblock;
    vec_s16 vtemp, vweight, voffset, v0, v1;
    vec_u16 vlog2_denom;
    DECLARE_ALIGNED_16(int32_t, temp[4]);
    DECLARE_ALIGNED_16(int32_t, temp)[4];
    LOAD_ZERO;

    offset <<= log2_denom;
@@ -896,7 +896,7 @@ void biweight_h264_WxH_altivec(uint8_t *dst, uint8_t *src, int stride, int log2_
    vec_u8 vsrc, vdst;
    vec_s16 vtemp, vweights, vweightd, voffset, v0, v1, v2, v3;
    vec_u16 vlog2_denom;
    DECLARE_ALIGNED_16(int32_t, temp[4]);
    DECLARE_ALIGNED_16(int32_t, temp)[4];
    LOAD_ZERO;

    offset = ((offset + 1) | 1) << log2_denom;
--- a/libavcodec/ppc/h264_template_altivec.c
+++ b/libavcodec/ppc/h264_template_altivec.c
@@ -78,7 +78,7 @@
 void PREFIX_h264_chroma_mc8_altivec(uint8_t * dst, uint8_t * src,
                                    int stride, int h, int x, int y) {
  POWERPC_PERF_DECLARE(PREFIX_h264_chroma_mc8_num, 1);
    DECLARE_ALIGNED_16(signed int, ABCD[4]) =
    DECLARE_ALIGNED_16(signed int, ABCD)[4] =
                        {((8 - x) * (8 - y)),
                         ((    x) * (8 - y)),
                         ((8 - x) * (    y)),
@@ -208,7 +208,7 @@ void PREFIX_h264_chroma_mc8_altivec(uint8_t * dst, uint8_t * src,

 /* this code assume that stride % 16 == 0 */
 void PREFIX_no_rnd_vc1_chroma_mc8_altivec(uint8_t * dst, uint8_t * src, int stride, int h, int x, int y) {
   DECLARE_ALIGNED_16(signed int, ABCD[4]) =
   DECLARE_ALIGNED_16(signed int, ABCD)[4] =
                        {((8 - x) * (8 - y)),
                         ((    x) * (8 - y)),
                         ((8 - x) * (    y)),
--- a/libavcodec/qdm2.c
+++ b/libavcodec/qdm2.c
@@ -122,7 +122,7 @@ typedef struct {
 } FFTCoefficient;

 typedef struct {
    DECLARE_ALIGNED_16(QDM2Complex, complex[MPA_MAX_CHANNELS][256]);
    DECLARE_ALIGNED_16(QDM2Complex, complex)[MPA_MAX_CHANNELS][256];
 } QDM2FFT;

 /**
@@ -172,9 +172,9 @@ typedef struct {
    float output_buffer[1024];

    /// Synthesis filter
    DECLARE_ALIGNED_16(MPA_INT, synth_buf[MPA_MAX_CHANNELS][512*2]);
    DECLARE_ALIGNED_16(MPA_INT, synth_buf)[MPA_MAX_CHANNELS][512*2];
    int synth_buf_offset[MPA_MAX_CHANNELS];
    DECLARE_ALIGNED_16(int32_t, sb_samples[MPA_MAX_CHANNELS][128][SBLIMIT]);
    DECLARE_ALIGNED_16(int32_t, sb_samples)[MPA_MAX_CHANNELS][128][SBLIMIT];

    /// Mixed temporary data used in decoding
    float tone_level[MPA_MAX_CHANNELS][30][64];
--- a/libavcodec/rtjpeg.h
+++ b/libavcodec/rtjpeg.h
@@ -31,7 +31,7 @@ typedef struct {
    uint8_t scan[64];
    uint32_t lquant[64];
    uint32_t cquant[64];
    DECLARE_ALIGNED_16(DCTELEM, block[64]);
    DECLARE_ALIGNED_16(DCTELEM, block)[64];
 } RTJpegContext;

 void rtjpeg_decode_init(RTJpegContext *c, DSPContext *dsp,
--- a/libavcodec/rv34.h
+++ b/libavcodec/rv34.h
@@ -111,7 +111,7 @@ typedef struct RV34DecContext{
    int      *deblock_coefs; ///< deblock coefficients for each macroblock

    /** 8x8 block available flags (for MV prediction) */
    DECLARE_ALIGNED_8(uint32_t, avail_cache[3*4]);
    DECLARE_ALIGNED_8(uint32_t, avail_cache)[3*4];

    int (*parse_slice_header)(struct RV34DecContext *r, GetBitContext *gb, SliceInfo *si);
    int (*decode_mb_info)(struct RV34DecContext *r);
--- a/libavcodec/sipr.h
+++ b/libavcodec/sipr.h
@@ -64,7 +64,7 @@ typedef struct {

    float excitation[L_INTERPOL + PITCH_MAX + 2 * L_SUBFR_16k];

    DECLARE_ALIGNED_16(float, synth_buf[LP_FILTER_ORDER + 5*SUBFR_SIZE + 6]);
    DECLARE_ALIGNED_16(float, synth_buf)[LP_FILTER_ORDER + 5*SUBFR_SIZE + 6];

    float lsp_history[LP_FILTER_ORDER];
    float gain_mem;
--- a/libavcodec/sparc/simple_idct_vis.c
+++ b/libavcodec/sparc/simple_idct_vis.c
@@ -24,7 +24,7 @@

 #include "libavcodec/dsputil.h"

 static const DECLARE_ALIGNED_8(int16_t, coeffs[28]) = {
 static const DECLARE_ALIGNED_8(int16_t, coeffs)[28] = {
    - 1259,- 1259,- 1259,- 1259,
    - 4989,- 4989,- 4989,- 4989,
    -11045,-11045,-11045,-11045,
@@ -33,13 +33,13 @@ static const DECLARE_ALIGNED_8(int16_t, coeffs[28]) = {
     25080, 25080, 25080, 25080,
     12785, 12785, 12785, 12785
 };
 static const DECLARE_ALIGNED_8(uint16_t, scale[4]) = {
 static const DECLARE_ALIGNED_8(uint16_t, scale)[4] = {
    65536>>6, 65536>>6, 65536>>6, 65536>>6
 };
 static const DECLARE_ALIGNED_8(uint16_t, rounder[4]) = {
 static const DECLARE_ALIGNED_8(uint16_t, rounder)[4] = {
    1<<5, 1<<5, 1<<5, 1<<5
 };
 static const DECLARE_ALIGNED_8(uint16_t, expand[4]) = {
 static const DECLARE_ALIGNED_8(uint16_t, expand)[4] = {
    1<<14, 1<<14, 1<<14, 1<<14
 };

@@ -386,7 +386,7 @@ static const DECLARE_ALIGNED_8(uint16_t, expand[4]) = {

 void ff_simple_idct_vis(DCTELEM *data) {
    int out1, out2, out3, out4;
    DECLARE_ALIGNED_8(int16_t, temp[8*8]);
    DECLARE_ALIGNED_8(int16_t, temp)[8*8];

    __asm__ volatile(
        INIT_IDCT
--- a/libavcodec/vorbis_data.c
+++ b/libavcodec/vorbis_data.c
@@ -40,7 +40,7 @@ const int64_t ff_vorbis_channel_layouts[7] = {
    0
 };

 DECLARE_ALIGNED_16(static const float, vwin64[32]) = {
 DECLARE_ALIGNED_16(static const float, vwin64)[32] = {
    0.0009460463F, 0.0085006468F, 0.0235352254F, 0.0458950567F,
    0.0753351908F, 0.1115073077F, 0.1539457973F, 0.2020557475F,
    0.2551056759F, 0.3122276645F, 0.3724270287F, 0.4346027792F,
@@ -51,7 +51,7 @@ DECLARE_ALIGNED_16(static const float, vwin64[32]) = {
    0.9989462667F, 0.9997230082F, 0.9999638688F, 0.9999995525F,
 };

 DECLARE_ALIGNED_16(static const float, vwin128[64]) = {
 DECLARE_ALIGNED_16(static const float, vwin128)[64] = {
    0.0002365472F, 0.0021280687F, 0.0059065254F, 0.0115626550F,
    0.0190823442F, 0.0284463735F, 0.0396300935F, 0.0526030430F,
    0.0673285281F, 0.0837631763F, 0.1018564887F, 0.1215504095F,
@@ -70,7 +70,7 @@ DECLARE_ALIGNED_16(static const float, vwin128[64]) = {
    0.9999331503F, 0.9999825563F, 0.9999977357F, 0.9999999720F,
 };

 DECLARE_ALIGNED_16(static const float, vwin256[128]) = {
 DECLARE_ALIGNED_16(static const float, vwin256)[128] = {
    0.0000591390F, 0.0005321979F, 0.0014780301F, 0.0028960636F,
    0.0047854363F, 0.0071449926F, 0.0099732775F, 0.0132685298F,
    0.0170286741F, 0.0212513119F, 0.0259337111F, 0.0310727950F,
@@ -105,7 +105,7 @@ DECLARE_ALIGNED_16(static const float, vwin256[128]) = {
    0.9999958064F, 0.9999989077F, 0.9999998584F, 0.9999999983F,
 };

 DECLARE_ALIGNED_16(static const float, vwin512[256]) = {
 DECLARE_ALIGNED_16(static const float, vwin512)[256] = {
    0.0000147849F, 0.0001330607F, 0.0003695946F, 0.0007243509F,
    0.0011972759F, 0.0017882983F, 0.0024973285F, 0.0033242588F,
    0.0042689632F, 0.0053312973F, 0.0065110982F, 0.0078081841F,
@@ -172,7 +172,7 @@ DECLARE_ALIGNED_16(static const float, vwin512[256]) = {
    0.9999997377F, 0.9999999317F, 0.9999999911F, 0.9999999999F,
 };

 DECLARE_ALIGNED_16(static const float, vwin1024[512]) = {
 DECLARE_ALIGNED_16(static const float, vwin1024)[512] = {
    0.0000036962F, 0.0000332659F, 0.0000924041F, 0.0001811086F,
    0.0002993761F, 0.0004472021F, 0.0006245811F, 0.0008315063F,
    0.0010679699F, 0.0013339631F, 0.0016294757F, 0.0019544965F,
@@ -303,7 +303,7 @@ DECLARE_ALIGNED_16(static const float, vwin1024[512]) = {
    0.9999999836F, 0.9999999957F, 0.9999999994F, 1.0000000000F,
 };

 DECLARE_ALIGNED_16(static const float, vwin2048[1024]) = {
 DECLARE_ALIGNED_16(static const float, vwin2048)[1024] = {
    0.0000009241F, 0.0000083165F, 0.0000231014F, 0.0000452785F,
    0.0000748476F, 0.0001118085F, 0.0001561608F, 0.0002079041F,
    0.0002670379F, 0.0003335617F, 0.0004074748F, 0.0004887765F,
@@ -562,7 +562,7 @@ DECLARE_ALIGNED_16(static const float, vwin2048[1024]) = {
    0.9999999990F, 0.9999999997F, 1.0000000000F, 1.0000000000F,
 };

 DECLARE_ALIGNED_16(static const float, vwin4096[2048]) = {
 DECLARE_ALIGNED_16(static const float, vwin4096)[2048] = {
    0.0000002310F, 0.0000020791F, 0.0000057754F, 0.0000113197F,
    0.0000187121F, 0.0000279526F, 0.0000390412F, 0.0000519777F,
    0.0000667623F, 0.0000833949F, 0.0001018753F, 0.0001222036F,
@@ -1077,7 +1077,7 @@ DECLARE_ALIGNED_16(static const float, vwin4096[2048]) = {
    0.9999999999F, 1.0000000000F, 1.0000000000F, 1.0000000000F,
 };

 DECLARE_ALIGNED_16(static const float, vwin8192[4096]) = {
 DECLARE_ALIGNED_16(static const float, vwin8192)[4096] = {
    0.0000000578F, 0.0000005198F, 0.0000014438F, 0.0000028299F,
    0.0000046780F, 0.0000069882F, 0.0000097604F, 0.0000129945F,
    0.0000166908F, 0.0000208490F, 0.0000254692F, 0.0000305515F,
--- a/libavcodec/vp3.c
+++ b/libavcodec/vp3.c
@@ -200,7 +200,7 @@ typedef struct Vp3DecodeContext {

    /* these arrays need to be on 16-byte boundaries since SSE2 operations
     * index into them */
    DECLARE_ALIGNED_16(int16_t, qmat[3][2][3][64]);     //<qmat[qpi][is_inter][plane]
    DECLARE_ALIGNED_16(int16_t, qmat)[3][2][3][64];     //<qmat[qpi][is_inter][plane]

    /* This table contains superblock_count * 16 entries. Each set of 16
     * numbers corresponds to the fragment indexes 0..15 of the superblock.
@@ -238,7 +238,7 @@ typedef struct Vp3DecodeContext {
    uint16_t huffman_table[80][32][2];

    uint8_t filter_limit_values[64];
    DECLARE_ALIGNED_8(int, bounding_values_array[256+2]);
    DECLARE_ALIGNED_8(int, bounding_values_array)[256+2];
 } Vp3DecodeContext;

 /************************************************************************
@@ -1397,7 +1397,7 @@ static void render_slice(Vp3DecodeContext *s, int slice)
 {
    int x;
    int16_t *dequantizer;
    DECLARE_ALIGNED_16(DCTELEM, block[64]);
    DECLARE_ALIGNED_16(DCTELEM, block)[64];
    int motion_x = 0xdeadbeef, motion_y = 0xdeadbeef;
    int motion_halfpel_index;
    uint8_t *motion_source;
--- a/libavcodec/vp56.h
+++ b/libavcodec/vp56.h
@@ -120,7 +120,7 @@ struct vp56_context {
    /* blocks / macroblock */
    VP56mb mb_type;
    VP56Macroblock *macroblocks;
    DECLARE_ALIGNED_16(DCTELEM, block_coeff[6][64]);
    DECLARE_ALIGNED_16(DCTELEM, block_coeff)[6][64];

    /* motion vectors */
    VP56mv mv[6];  /* vectors for each block in MB */
--- a/libavcodec/wma.h
+++ b/libavcodec/wma.h
@@ -111,15 +111,15 @@ typedef struct WMACodecContext {
    uint8_t ms_stereo;                      ///< true if mid/side stereo mode
    uint8_t channel_coded[MAX_CHANNELS];    ///< true if channel is coded
    int exponents_bsize[MAX_CHANNELS];      ///< log2 ratio frame/exp. length
    DECLARE_ALIGNED_16(float, exponents[MAX_CHANNELS][BLOCK_MAX_SIZE]);
    DECLARE_ALIGNED_16(float, exponents)[MAX_CHANNELS][BLOCK_MAX_SIZE];
    float max_exponent[MAX_CHANNELS];
    WMACoef coefs1[MAX_CHANNELS][BLOCK_MAX_SIZE];
    DECLARE_ALIGNED_16(float, coefs[MAX_CHANNELS][BLOCK_MAX_SIZE]);
    DECLARE_ALIGNED_16(FFTSample, output[BLOCK_MAX_SIZE * 2]);
    DECLARE_ALIGNED_16(float, coefs)[MAX_CHANNELS][BLOCK_MAX_SIZE];
    DECLARE_ALIGNED_16(FFTSample, output)[BLOCK_MAX_SIZE * 2];
    FFTContext mdct_ctx[BLOCK_NB_SIZES];
    float *windows[BLOCK_NB_SIZES];
    /* output buffer for one frame and the last for IMDCT windowing */
    DECLARE_ALIGNED_16(float, frame_out[MAX_CHANNELS][BLOCK_MAX_SIZE * 2]);
    DECLARE_ALIGNED_16(float, frame_out)[MAX_CHANNELS][BLOCK_MAX_SIZE * 2];
    /* last frame info */
    uint8_t last_superframe[MAX_CODED_SUPERFRAME_SIZE + 4]; /* padding added */
    int last_bitoffset;
--- a/libavcodec/wmaprodec.c
+++ b/libavcodec/wmaprodec.c
@@ -142,7 +142,7 @@ typedef struct {
    int*     scale_factors;                           ///< pointer to the scale factor values used for decoding
    uint8_t  table_idx;                               ///< index in sf_offsets for the scale factor reference block
    float*   coeffs;                                  ///< pointer to the subframe decode buffer
    DECLARE_ALIGNED_16(float, out[WMAPRO_BLOCK_MAX_SIZE + WMAPRO_BLOCK_MAX_SIZE / 2]); ///< output buffer
    DECLARE_ALIGNED_16(float, out)[WMAPRO_BLOCK_MAX_SIZE + WMAPRO_BLOCK_MAX_SIZE / 2]; ///< output buffer
 } WMAProChannelCtx;

 /**
@@ -167,7 +167,7 @@ typedef struct WMAProDecodeCtx {
                      FF_INPUT_BUFFER_PADDING_SIZE];///< compressed frame data
    PutBitContext    pb;                            ///< context for filling the frame_data buffer
    FFTContext       mdct_ctx[WMAPRO_BLOCK_SIZES];  ///< MDCT context per block size
    DECLARE_ALIGNED_16(float, tmp[WMAPRO_BLOCK_MAX_SIZE]); ///< IMDCT output buffer
    DECLARE_ALIGNED_16(float, tmp)[WMAPRO_BLOCK_MAX_SIZE]; ///< IMDCT output buffer
    float*           windows[WMAPRO_BLOCK_SIZES];   ///< windows for the different block sizes

    /* frame size dependent frame information (set during initialization) */
--- a/libavcodec/wmv2.h
+++ b/libavcodec/wmv2.h
@@ -50,7 +50,7 @@ typedef struct Wmv2Context{
    int hshift;

    ScanTable abt_scantable[2];
    DECLARE_ALIGNED_16(DCTELEM, abt_block2[6][64]);
    DECLARE_ALIGNED_16(DCTELEM, abt_block2)[6][64];
 }Wmv2Context;

 void ff_wmv2_common_init(Wmv2Context * w);
--- a/libavcodec/x86/cavsdsp_mmx.c
+++ b/libavcodec/x86/cavsdsp_mmx.c
@@ -113,7 +113,7 @@ static inline void cavs_idct8_1d(int16_t *block, uint64_t bias)
 static void cavs_idct8_add_mmx(uint8_t *dst, int16_t *block, int stride)
 {
    int i;
    DECLARE_ALIGNED_8(int16_t, b2[64]);
    DECLARE_ALIGNED_8(int16_t, b2)[64];

    for(i=0; i<2; i++){
        DECLARE_ALIGNED_8(uint64_t, tmp);
--- a/libavcodec/x86/dsputil_mmx.c
+++ b/libavcodec/x86/dsputil_mmx.c
@@ -42,7 +42,7 @@ int mm_flags; /* multimedia extension flags */
 DECLARE_ALIGNED_8 (const uint64_t, ff_bone) = 0x0101010101010101ULL;
 DECLARE_ALIGNED_8 (const uint64_t, ff_wtwo) = 0x0002000200020002ULL;

 DECLARE_ALIGNED_16(const uint64_t, ff_pdw_80000000[2]) =
 DECLARE_ALIGNED_16(const uint64_t, ff_pdw_80000000)[2] =
 {0x8000000080000000ULL, 0x8000000080000000ULL};

 DECLARE_ALIGNED_8 (const uint64_t, ff_pw_3  ) = 0x0003000300030003ULL;
@@ -69,8 +69,8 @@ DECLARE_ALIGNED_8 (const uint64_t, ff_pb_81 ) = 0x8181818181818181ULL;
 DECLARE_ALIGNED_8 (const uint64_t, ff_pb_A1 ) = 0xA1A1A1A1A1A1A1A1ULL;
 DECLARE_ALIGNED_8 (const uint64_t, ff_pb_FC ) = 0xFCFCFCFCFCFCFCFCULL;

 DECLARE_ALIGNED_16(const double, ff_pd_1[2]) = { 1.0, 1.0 };
 DECLARE_ALIGNED_16(const double, ff_pd_2[2]) = { 2.0, 2.0 };
 DECLARE_ALIGNED_16(const double, ff_pd_1)[2] = { 1.0, 1.0 };
 DECLARE_ALIGNED_16(const double, ff_pd_2)[2] = { 2.0, 2.0 };

 #define JUMPALIGN() __asm__ volatile (ASMALIGN(3)::)
 #define MOVQ_ZERO(regd)  __asm__ volatile ("pxor %%" #regd ", %%" #regd ::)
@@ -277,7 +277,7 @@ void put_pixels_clamped_mmx(const DCTELEM *block, uint8_t *pixels, int line_size
            :"memory");
 }

 DECLARE_ASM_CONST(8, uint8_t, ff_vector128[8]) =
 DECLARE_ASM_CONST(8, uint8_t, ff_vector128)[8] =
  { 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80 };

 #define put_signed_pixels_clamped_mmx_half(off) \
@@ -754,7 +754,7 @@ static inline void transpose4x4(uint8_t *dst, uint8_t *src, int dst_stride, int
 static void h263_h_loop_filter_mmx(uint8_t *src, int stride, int qscale){
    if(CONFIG_H263_DECODER || CONFIG_H263_ENCODER) {
    const int strength= ff_h263_loop_filter_strength[qscale];
    DECLARE_ALIGNED(8, uint64_t, temp[4]);
    DECLARE_ALIGNED(8, uint64_t, temp)[4];
    uint8_t *btemp= (uint8_t*)temp;

    src -= 2;
@@ -2026,7 +2026,7 @@ static void ac3_downmix_sse(float (*samples)[256], float (*matrix)[2], int out_c
    } else if(in_ch == 5 && out_ch == 1 && matrix_cmp[0][0]==matrix_cmp[2][0] && matrix_cmp[3][0]==matrix_cmp[4][0]) {
        MIX5(IF1,IF0);
    } else {
        DECLARE_ALIGNED_16(float, matrix_simd[in_ch][2][4]);
        DECLARE_ALIGNED_16(float, matrix_simd)[in_ch][2][4];
        j = 2*in_ch*sizeof(float);
        __asm__ volatile(
            "1: \n"
@@ -2413,7 +2413,7 @@ static void ff_x264_deblock_v_luma_intra_mmxext(uint8_t *pix, int stride, int al
 #define FLOAT_TO_INT16_INTERLEAVE(cpu, body) \
 /* gcc pessimizes register allocation if this is in the same function as float_to_int16_interleave_sse2*/\
 static av_noinline void float_to_int16_interleave_misc_##cpu(int16_t *dst, const float **src, long len, int channels){\
    DECLARE_ALIGNED_16(int16_t, tmp[len]);\
    DECLARE_ALIGNED_16(int16_t, tmp)[len];\
    int i,j,c;\
    for(c=0; c<channels; c++){\
        float_to_int16_##cpu(tmp, src[c], len);\
--- a/libavcodec/x86/dsputilenc_mmx.c
+++ b/libavcodec/x86/dsputilenc_mmx.c
@@ -1063,7 +1063,7 @@ static void sub_hfyu_median_prediction_mmx2(uint8_t *dst, const uint8_t *src1, c

 #define HADAMARD8_DIFF_MMX(cpu) \
 static int hadamard8_diff_##cpu(void *s, uint8_t *src1, uint8_t *src2, int stride, int h){\
    DECLARE_ALIGNED_8(uint64_t, temp[13]);\
    DECLARE_ALIGNED_8(uint64_t, temp)[13];\
    int sum;\
 \
    assert(h==8);\
@@ -1146,7 +1146,7 @@ WRAPPER8_16_SQ(hadamard8_diff_##cpu, hadamard8_diff16_##cpu)

 #define HADAMARD8_DIFF_SSE2(cpu) \
 static int hadamard8_diff_##cpu(void *s, uint8_t *src1, uint8_t *src2, int stride, int h){\
    DECLARE_ALIGNED_16(uint64_t, temp[4]);\
    DECLARE_ALIGNED_16(uint64_t, temp)[4];\
    int sum;\
 \
    assert(h==8);\
--- a/libavcodec/x86/fft_3dn2.c
+++ b/libavcodec/x86/fft_3dn2.c
@@ -23,7 +23,7 @@
 #include "libavcodec/dsputil.h"
 #include "fft.h"

 DECLARE_ALIGNED_8(static const int, m1m1[2]) = { 1<<31, 1<<31 };
 DECLARE_ALIGNED_8(static const int, m1m1)[2] = { 1<<31, 1<<31 };

 #ifdef EMULATE_3DNOWEXT
 #define PSWAPD(s,d)\
--- a/libavcodec/x86/fft_sse.c
+++ b/libavcodec/x86/fft_sse.c
@@ -23,7 +23,7 @@
 #include "libavcodec/dsputil.h"
 #include "fft.h"

 DECLARE_ALIGNED(16, static const int, m1m1m1m1[4]) =
 DECLARE_ALIGNED(16, static const int, m1m1m1m1)[4] =
    { 1 << 31, 1 << 31, 1 << 31, 1 << 31 };

 void ff_fft_dispatch_sse(FFTComplex *z, int nbits);
--- a/libavcodec/x86/h264dsp_mmx.c
+++ b/libavcodec/x86/h264dsp_mmx.c
@@ -157,7 +157,7 @@ static inline void h264_idct8_1d(int16_t *block)
 static void ff_h264_idct8_add_mmx(uint8_t *dst, int16_t *block, int stride)
 {
    int i;
    DECLARE_ALIGNED_8(int16_t, b2[64]);
    DECLARE_ALIGNED_8(int16_t, b2)[64];

    block[0] += 32;

@@ -628,7 +628,7 @@ static void ff_h264_idct_add8_sse2(uint8_t **dest, const int *block_offset, DCTE

 static inline void h264_loop_filter_luma_mmx2(uint8_t *pix, int stride, int alpha1, int beta1, int8_t *tc0)
 {
    DECLARE_ALIGNED_8(uint64_t, tmp0[2]);
    DECLARE_ALIGNED_8(uint64_t, tmp0)[2];

    __asm__ volatile(
        "movq    (%2,%4), %%mm0    \n\t" //p1
@@ -690,7 +690,7 @@ static void h264_h_loop_filter_luma_mmx2(uint8_t *pix, int stride, int alpha, in
 {
    //FIXME: could cut some load/stores by merging transpose with filter
    // also, it only needs to transpose 6x8
    DECLARE_ALIGNED_8(uint8_t, trans[8*8]);
    DECLARE_ALIGNED_8(uint8_t, trans)[8*8];
    int i;
    for(i=0; i<2; i++, pix+=8*stride, tc0+=2) {
        if((tc0[0] & tc0[1]) < 0)
@@ -734,7 +734,7 @@ static void h264_v_loop_filter_chroma_mmx2(uint8_t *pix, int stride, int alpha,
 static void h264_h_loop_filter_chroma_mmx2(uint8_t *pix, int stride, int alpha, int beta, int8_t *tc0)
 {
    //FIXME: could cut some load/stores by merging transpose with filter
    DECLARE_ALIGNED_8(uint8_t, trans[8*4]);
    DECLARE_ALIGNED_8(uint8_t, trans)[8*4];
    transpose4x4(trans, pix-2, 8, stride);
    transpose4x4(trans+4, pix-2+4*stride, 8, stride);
    h264_loop_filter_chroma_mmx2(trans+2*8, 8, alpha-1, beta-1, tc0);
@@ -784,7 +784,7 @@ static void h264_v_loop_filter_chroma_intra_mmx2(uint8_t *pix, int stride, int a
 static void h264_h_loop_filter_chroma_intra_mmx2(uint8_t *pix, int stride, int alpha, int beta)
 {
    //FIXME: could cut some load/stores by merging transpose with filter
    DECLARE_ALIGNED_8(uint8_t, trans[8*4]);
    DECLARE_ALIGNED_8(uint8_t, trans)[8*4];
    transpose4x4(trans, pix-2, 8, stride);
    transpose4x4(trans+4, pix-2+4*stride, 8, stride);
    h264_loop_filter_chroma_intra_mmx2(trans+2*8, 8, alpha-1, beta-1);
@@ -1974,7 +1974,7 @@ static void OPNAME ## h264_qpel ## SIZE ## _mc30_ ## MMX(uint8_t *dst, uint8_t *

 #define H264_MC_V(OPNAME, SIZE, MMX, ALIGN) \
 static void OPNAME ## h264_qpel ## SIZE ## _mc01_ ## MMX(uint8_t *dst, uint8_t *src, int stride){\
    DECLARE_ALIGNED(ALIGN, uint8_t, temp[SIZE*SIZE]);\
    DECLARE_ALIGNED(ALIGN, uint8_t, temp)[SIZE*SIZE];\
    put_h264_qpel ## SIZE ## _v_lowpass_ ## MMX(temp, src, SIZE, stride);\
    OPNAME ## pixels ## SIZE ## _l2_ ## MMX(dst, src, temp, stride, stride, SIZE);\
 }\
@@ -1984,43 +1984,43 @@ static void OPNAME ## h264_qpel ## SIZE ## _mc02_ ## MMX(uint8_t *dst, uint8_t *
 }\
 \
 static void OPNAME ## h264_qpel ## SIZE ## _mc03_ ## MMX(uint8_t *dst, uint8_t *src, int stride){\
    DECLARE_ALIGNED(ALIGN, uint8_t, temp[SIZE*SIZE]);\
    DECLARE_ALIGNED(ALIGN, uint8_t, temp)[SIZE*SIZE];\
    put_h264_qpel ## SIZE ## _v_lowpass_ ## MMX(temp, src, SIZE, stride);\
    OPNAME ## pixels ## SIZE ## _l2_ ## MMX(dst, src+stride, temp, stride, stride, SIZE);\
 }\

 #define H264_MC_HV(OPNAME, SIZE, MMX, ALIGN) \
 static void OPNAME ## h264_qpel ## SIZE ## _mc11_ ## MMX(uint8_t *dst, uint8_t *src, int stride){\
    DECLARE_ALIGNED(ALIGN, uint8_t, temp[SIZE*SIZE]);\
    DECLARE_ALIGNED(ALIGN, uint8_t, temp)[SIZE*SIZE];\
    put_h264_qpel ## SIZE ## _v_lowpass_ ## MMX(temp, src, SIZE, stride);\
    OPNAME ## h264_qpel ## SIZE ## _h_lowpass_l2_ ## MMX(dst, src, temp, stride, SIZE);\
 }\
 \
 static void OPNAME ## h264_qpel ## SIZE ## _mc31_ ## MMX(uint8_t *dst, uint8_t *src, int stride){\
    DECLARE_ALIGNED(ALIGN, uint8_t, temp[SIZE*SIZE]);\
    DECLARE_ALIGNED(ALIGN, uint8_t, temp)[SIZE*SIZE];\
    put_h264_qpel ## SIZE ## _v_lowpass_ ## MMX(temp, src+1, SIZE, stride);\
    OPNAME ## h264_qpel ## SIZE ## _h_lowpass_l2_ ## MMX(dst, src, temp, stride, SIZE);\
 }\
 \
 static void OPNAME ## h264_qpel ## SIZE ## _mc13_ ## MMX(uint8_t *dst, uint8_t *src, int stride){\
    DECLARE_ALIGNED(ALIGN, uint8_t, temp[SIZE*SIZE]);\
    DECLARE_ALIGNED(ALIGN, uint8_t, temp)[SIZE*SIZE];\
    put_h264_qpel ## SIZE ## _v_lowpass_ ## MMX(temp, src, SIZE, stride);\
    OPNAME ## h264_qpel ## SIZE ## _h_lowpass_l2_ ## MMX(dst, src+stride, temp, stride, SIZE);\
 }\
 \
 static void OPNAME ## h264_qpel ## SIZE ## _mc33_ ## MMX(uint8_t *dst, uint8_t *src, int stride){\
    DECLARE_ALIGNED(ALIGN, uint8_t, temp[SIZE*SIZE]);\
    DECLARE_ALIGNED(ALIGN, uint8_t, temp)[SIZE*SIZE];\
    put_h264_qpel ## SIZE ## _v_lowpass_ ## MMX(temp, src+1, SIZE, stride);\
    OPNAME ## h264_qpel ## SIZE ## _h_lowpass_l2_ ## MMX(dst, src+stride, temp, stride, SIZE);\
 }\
 \
 static void OPNAME ## h264_qpel ## SIZE ## _mc22_ ## MMX(uint8_t *dst, uint8_t *src, int stride){\
    DECLARE_ALIGNED(ALIGN, uint16_t, temp[SIZE*(SIZE<8?12:24)]);\
    DECLARE_ALIGNED(ALIGN, uint16_t, temp)[SIZE*(SIZE<8?12:24)];\
    OPNAME ## h264_qpel ## SIZE ## _hv_lowpass_ ## MMX(dst, temp, src, stride, SIZE, stride);\
 }\
 \
 static void OPNAME ## h264_qpel ## SIZE ## _mc21_ ## MMX(uint8_t *dst, uint8_t *src, int stride){\
    DECLARE_ALIGNED(ALIGN, uint8_t, temp[SIZE*(SIZE<8?12:24)*2 + SIZE*SIZE]);\
    DECLARE_ALIGNED(ALIGN, uint8_t, temp)[SIZE*(SIZE<8?12:24)*2 + SIZE*SIZE];\
    uint8_t * const halfHV= temp;\
    int16_t * const halfV= (int16_t*)(temp + SIZE*SIZE);\
    assert(((int)temp & 7) == 0);\
@@ -2029,7 +2029,7 @@ static void OPNAME ## h264_qpel ## SIZE ## _mc21_ ## MMX(uint8_t *dst, uint8_t *
 }\
 \
 static void OPNAME ## h264_qpel ## SIZE ## _mc23_ ## MMX(uint8_t *dst, uint8_t *src, int stride){\
    DECLARE_ALIGNED(ALIGN, uint8_t, temp[SIZE*(SIZE<8?12:24)*2 + SIZE*SIZE]);\
    DECLARE_ALIGNED(ALIGN, uint8_t, temp)[SIZE*(SIZE<8?12:24)*2 + SIZE*SIZE];\
    uint8_t * const halfHV= temp;\
    int16_t * const halfV= (int16_t*)(temp + SIZE*SIZE);\
    assert(((int)temp & 7) == 0);\
@@ -2038,7 +2038,7 @@ static void OPNAME ## h264_qpel ## SIZE ## _mc23_ ## MMX(uint8_t *dst, uint8_t *
 }\
 \
 static void OPNAME ## h264_qpel ## SIZE ## _mc12_ ## MMX(uint8_t *dst, uint8_t *src, int stride){\
    DECLARE_ALIGNED(ALIGN, uint8_t, temp[SIZE*(SIZE<8?12:24)*2 + SIZE*SIZE]);\
    DECLARE_ALIGNED(ALIGN, uint8_t, temp)[SIZE*(SIZE<8?12:24)*2 + SIZE*SIZE];\
    uint8_t * const halfHV= temp;\
    int16_t * const halfV= (int16_t*)(temp + SIZE*SIZE);\
    assert(((int)temp & 7) == 0);\
@@ -2047,7 +2047,7 @@ static void OPNAME ## h264_qpel ## SIZE ## _mc12_ ## MMX(uint8_t *dst, uint8_t *
 }\
 \
 static void OPNAME ## h264_qpel ## SIZE ## _mc32_ ## MMX(uint8_t *dst, uint8_t *src, int stride){\
    DECLARE_ALIGNED(ALIGN, uint8_t, temp[SIZE*(SIZE<8?12:24)*2 + SIZE*SIZE]);\
    DECLARE_ALIGNED(ALIGN, uint8_t, temp)[SIZE*(SIZE<8?12:24)*2 + SIZE*SIZE];\
    uint8_t * const halfHV= temp;\
    int16_t * const halfV= (int16_t*)(temp + SIZE*SIZE);\
    assert(((int)temp & 7) == 0);\
@@ -2110,7 +2110,7 @@ H264_MC_816(H264_MC_HV, ssse3)
 #endif

 /* rnd interleaved with rnd div 8, use p+1 to access rnd div 8 */
 DECLARE_ALIGNED_8(static const uint64_t, h264_rnd_reg[4]) = {
 DECLARE_ALIGNED_8(static const uint64_t, h264_rnd_reg)[4] = {
    0x0020002000200020ULL, 0x0004000400040004ULL, 0x001C001C001C001CULL, 0x0003000300030003ULL
 };

--- a/libavcodec/x86/idct_mmx_xvid.c
+++ b/libavcodec/x86/idct_mmx_xvid.c
@@ -64,13 +64,13 @@
 //-----------------------------------------------------------------------------


 DECLARE_ALIGNED(8, static const int16_t, tg_1_16[4*4]) = {
 DECLARE_ALIGNED(8, static const int16_t, tg_1_16)[4*4] = {
  13036,13036,13036,13036,        // tg * (2<<16) + 0.5
  27146,27146,27146,27146,        // tg * (2<<16) + 0.5
  -21746,-21746,-21746,-21746,    // tg * (2<<16) + 0.5
  23170,23170,23170,23170};       // cos * (2<<15) + 0.5

 DECLARE_ALIGNED(8, static const int32_t, rounder_0[2*8]) = {
 DECLARE_ALIGNED(8, static const int32_t, rounder_0)[2*8] = {
  65536,65536,
  3597,3597,
  2260,2260,
@@ -140,7 +140,7 @@ DECLARE_ALIGNED(8, static const int32_t, rounder_0[2*8]) = {
 //-----------------------------------------------------------------------------

 // Table for rows 0,4 - constants are multiplied by cos_4_16
 DECLARE_ALIGNED(8, static const int16_t, tab_i_04_mmx[32*4]) = {
 DECLARE_ALIGNED(8, static const int16_t, tab_i_04_mmx)[32*4] = {
  16384,16384,16384,-16384,       // movq-> w06 w04 w02 w00
  21407,8867,8867,-21407,         // w07 w05 w03 w01
  16384,-16384,16384,16384,       // w14 w12 w10 w08
@@ -182,7 +182,7 @@ DECLARE_ALIGNED(8, static const int16_t, tab_i_04_mmx[32*4]) = {
 //-----------------------------------------------------------------------------

 // %3 for rows 0,4 - constants are multiplied by cos_4_16
 DECLARE_ALIGNED(8, static const int16_t, tab_i_04_xmm[32*4]) = {
 DECLARE_ALIGNED(8, static const int16_t, tab_i_04_xmm)[32*4] = {
  16384,21407,16384,8867,      // movq-> w05 w04 w01 w00
  16384,8867,-16384,-21407,    // w07 w06 w03 w02
  16384,-8867,16384,-21407,    // w13 w12 w09 w08
--- a/libavcodec/x86/idct_sse2_xvid.c
+++ b/libavcodec/x86/idct_sse2_xvid.c
@@ -52,41 +52,41 @@
 #define ROW_SHIFT 11
 #define COL_SHIFT 6

 DECLARE_ASM_CONST(16, int16_t, tan1[]) = {X8(13036)}; // tan( pi/16)
 DECLARE_ASM_CONST(16, int16_t, tan2[]) = {X8(27146)}; // tan(2pi/16) = sqrt(2)-1
 DECLARE_ASM_CONST(16, int16_t, tan3[]) = {X8(43790)}; // tan(3pi/16)-1
 DECLARE_ASM_CONST(16, int16_t, sqrt2[])= {X8(23170)}; // 0.5/sqrt(2)
 DECLARE_ASM_CONST(8,  uint8_t, m127[]) = {X8(127)};
 DECLARE_ASM_CONST(16, int16_t, tan1)[] = {X8(13036)}; // tan( pi/16)
 DECLARE_ASM_CONST(16, int16_t, tan2)[] = {X8(27146)}; // tan(2pi/16) = sqrt(2)-1
 DECLARE_ASM_CONST(16, int16_t, tan3)[] = {X8(43790)}; // tan(3pi/16)-1
 DECLARE_ASM_CONST(16, int16_t, sqrt2)[]= {X8(23170)}; // 0.5/sqrt(2)
 DECLARE_ASM_CONST(8,  uint8_t, m127)[] = {X8(127)};

 DECLARE_ASM_CONST(16, int16_t, iTab1[]) = {
 DECLARE_ASM_CONST(16, int16_t, iTab1)[] = {
 0x4000, 0x539f, 0xc000, 0xac61, 0x4000, 0xdd5d, 0x4000, 0xdd5d,
 0x4000, 0x22a3, 0x4000, 0x22a3, 0xc000, 0x539f, 0x4000, 0xac61,
 0x3249, 0x11a8, 0x4b42, 0xee58, 0x11a8, 0x4b42, 0x11a8, 0xcdb7,
 0x58c5, 0x4b42, 0xa73b, 0xcdb7, 0x3249, 0xa73b, 0x4b42, 0xa73b
 };

 DECLARE_ASM_CONST(16, int16_t, iTab2[]) = {
 DECLARE_ASM_CONST(16, int16_t, iTab2)[] = {
 0x58c5, 0x73fc, 0xa73b, 0x8c04, 0x58c5, 0xcff5, 0x58c5, 0xcff5,
 0x58c5, 0x300b, 0x58c5, 0x300b, 0xa73b, 0x73fc, 0x58c5, 0x8c04,
 0x45bf, 0x187e, 0x6862, 0xe782, 0x187e, 0x6862, 0x187e, 0xba41,
 0x7b21, 0x6862, 0x84df, 0xba41, 0x45bf, 0x84df, 0x6862, 0x84df
 };

 DECLARE_ASM_CONST(16, int16_t, iTab3[]) = {
 DECLARE_ASM_CONST(16, int16_t, iTab3)[] = {
 0x539f, 0x6d41, 0xac61, 0x92bf, 0x539f, 0xd2bf, 0x539f, 0xd2bf,
 0x539f, 0x2d41, 0x539f, 0x2d41, 0xac61, 0x6d41, 0x539f, 0x92bf,
 0x41b3, 0x1712, 0x6254, 0xe8ee, 0x1712, 0x6254, 0x1712, 0xbe4d,
 0x73fc, 0x6254, 0x8c04, 0xbe4d, 0x41b3, 0x8c04, 0x6254, 0x8c04
 };

 DECLARE_ASM_CONST(16, int16_t, iTab4[]) = {
 DECLARE_ASM_CONST(16, int16_t, iTab4)[] = {
 0x4b42, 0x6254, 0xb4be, 0x9dac, 0x4b42, 0xd746, 0x4b42, 0xd746,
 0x4b42, 0x28ba, 0x4b42, 0x28ba, 0xb4be, 0x6254, 0x4b42, 0x9dac,
 0x3b21, 0x14c3, 0x587e, 0xeb3d, 0x14c3, 0x587e, 0x14c3, 0xc4df,
 0x6862, 0x587e, 0x979e, 0xc4df, 0x3b21, 0x979e, 0x587e, 0x979e
 };

 DECLARE_ASM_CONST(16, int32_t, walkenIdctRounders[]) = {
 DECLARE_ASM_CONST(16, int32_t, walkenIdctRounders)[] = {
 65536, 65536, 65536, 65536,
  3597,  3597,  3597,  3597,
  2260,  2260,  2260,  2260,
--- a/libavcodec/x86/motion_est_mmx.c
+++ b/libavcodec/x86/motion_est_mmx.c
@@ -26,7 +26,7 @@
 #include "libavcodec/dsputil.h"
 #include "dsputil_mmx.h"

 DECLARE_ASM_CONST(8, uint64_t, round_tab[3])={
 DECLARE_ASM_CONST(8, uint64_t, round_tab)[3]={
 0x0000000000000000ULL,
 0x0001000100010001ULL,
 0x0002000200020002ULL,
--- a/libavcodec/x86/mpegvideo_mmx_template.c
+++ b/libavcodec/x86/mpegvideo_mmx_template.c
@@ -98,7 +98,7 @@ static int RENAME(dct_quantize)(MpegEncContext *s,
    x86_reg last_non_zero_p1;
    int level=0, q; //=0 is because gcc says uninitialized ...
    const uint16_t *qmat, *bias;
    DECLARE_ALIGNED_16(int16_t, temp_block[64]);
    DECLARE_ALIGNED_16(int16_t, temp_block)[64];

    assert((7&(int)(&temp_block[0])) == 0); //did gcc align it correctly?

--- a/libavcodec/x86/rv40dsp_mmx.c
+++ b/libavcodec/x86/rv40dsp_mmx.c
@@ -24,7 +24,7 @@
 #include "dsputil_mmx.h"

 /* bias interleaved with bias div 8, use p+1 to access bias div 8 */
 DECLARE_ALIGNED_8(static const uint64_t, rv40_bias_reg[4][8]) = {
 DECLARE_ALIGNED_8(static const uint64_t, rv40_bias_reg)[4][8] = {
    { 0x0000000000000000ULL, 0x0000000000000000ULL, 0x0010001000100010ULL, 0x0002000200020002ULL,
      0x0020002000200020ULL, 0x0004000400040004ULL, 0x0010001000100010ULL, 0x0002000200020002ULL },
    { 0x0020002000200020ULL, 0x0004000400040004ULL, 0x001C001C001C001CULL, 0x0003000300030003ULL,
--- a/libavcodec/x86/simple_idct_mmx.c
+++ b/libavcodec/x86/simple_idct_mmx.c
@@ -52,7 +52,7 @@
 DECLARE_ASM_CONST(8, uint64_t, wm1010)= 0xFFFF0000FFFF0000ULL;
 DECLARE_ASM_CONST(8, uint64_t, d40000)= 0x0000000000040000ULL;

 DECLARE_ALIGNED(8, static const int16_t, coeffs[])= {
 DECLARE_ALIGNED(8, static const int16_t, coeffs)[]= {
        1<<(ROW_SHIFT-1), 0, 1<<(ROW_SHIFT-1), 0,
 //        1<<(COL_SHIFT-1), 0, 1<<(COL_SHIFT-1), 0,
 //        0, 1<<(COL_SHIFT-1-16), 0, 1<<(COL_SHIFT-1-16),
@@ -211,7 +211,7 @@ row[7] = input[13];

 static inline void idct(int16_t *block)
 {
        DECLARE_ALIGNED(8, int64_t, align_tmp[16]);
        DECLARE_ALIGNED(8, int64_t, align_tmp)[16];
        int16_t * const temp= (int16_t*)align_tmp;

        __asm__ volatile(
--- a/libavcodec/x86/snowdsp_mmx.c
+++ b/libavcodec/x86/snowdsp_mmx.c
@@ -25,7 +25,7 @@

 void ff_snow_horizontal_compose97i_sse2(IDWTELEM *b, int width){
    const int w2= (width+1)>>1;
    DECLARE_ALIGNED_16(IDWTELEM, temp[width>>1]);
    DECLARE_ALIGNED_16(IDWTELEM, temp)[width>>1];
    const int w_l= (width>>1);
    const int w_r= w2 - 1;
    int i;
--- a/libavcodec/x86/vc1dsp_mmx.c
+++ b/libavcodec/x86/vc1dsp_mmx.c
@@ -442,7 +442,7 @@ static void OP ## vc1_mspel_mc(uint8_t *dst, const uint8_t *src, int stride,\
            static const int shift_value[] = { 0, 5, 1, 5 };\
            int              shift = (shift_value[hmode]+shift_value[vmode])>>1;\
            int              r;\
            DECLARE_ALIGNED_16(int16_t, tmp[12*8]);\
            DECLARE_ALIGNED_16(int16_t, tmp)[12*8];\
 \
            r = (1<<(shift-1)) + rnd-1;\
            vc1_put_shift_ver_16bits[vmode](tmp, src-1, stride, r, shift);\
--- a/libavcodec/x86/vp3dsp_sse2.c
+++ b/libavcodec/x86/vp3dsp_sse2.c
@@ -26,7 +26,7 @@
 #include "libavcodec/dsputil.h"
 #include "dsputil_mmx.h"

 DECLARE_ALIGNED_16(const uint16_t, ff_vp3_idct_data[7 * 8]) =
 DECLARE_ALIGNED_16(const uint16_t, ff_vp3_idct_data)[7 * 8] =
 {
    64277,64277,64277,64277,64277,64277,64277,64277,
    60547,60547,60547,60547,60547,60547,60547,60547,
--- a/libavutil/des.c
+++ b/libavutil/des.c
@@ -339,10 +339,10 @@ static uint64_t rand64(void) {
 }

 static const uint8_t test_key[] = {0x12, 0x34, 0x56, 0x78, 0x9a, 0xbc, 0xde, 0xf0};
 static const DECLARE_ALIGNED(8, uint8_t, plain[]) = {0xfe, 0xdc, 0xba, 0x98, 0x76, 0x54, 0x32, 0x10};
 static const DECLARE_ALIGNED(8, uint8_t, crypt[]) = {0x4a, 0xb6, 0x5b, 0x3d, 0x4b, 0x06, 0x15, 0x18};
 static DECLARE_ALIGNED(8, uint8_t, tmp[8]);
 static DECLARE_ALIGNED(8, uint8_t, large_buffer[10002][8]);
 static const DECLARE_ALIGNED(8, uint8_t, plain)[] = {0xfe, 0xdc, 0xba, 0x98, 0x76, 0x54, 0x32, 0x10};
 static const DECLARE_ALIGNED(8, uint8_t, crypt)[] = {0x4a, 0xb6, 0x5b, 0x3d, 0x4b, 0x06, 0x15, 0x18};
 static DECLARE_ALIGNED(8, uint8_t, tmp)[8];
 static DECLARE_ALIGNED(8, uint8_t, large_buffer)[10002][8];
 static const uint8_t cbc_key[] = {
    0x01, 0x23, 0x45, 0x67, 0x89, 0xab, 0xcd, 0xef,
    0x23, 0x45, 0x67, 0x89, 0xab, 0xcd, 0xef, 0x01,
--- a/libpostproc/postprocess_altivec_template.c
+++ b/libpostproc/postprocess_altivec_template.c
@@ -62,7 +62,7 @@ static inline int vertClassify_altivec(uint8_t src[], int stride, PPContext *c)
    vector by assuming (stride % 16) == 0, unfortunately
    this is not always true.
    */
    DECLARE_ALIGNED(16, short, data[8]) =
    DECLARE_ALIGNED(16, short, data)[8] =
                    {
                        ((c->nonBQP*c->ppMode.baseDcDiff)>>8) + 1,
                        data[0] * 2 + 1,
@@ -222,7 +222,7 @@ static inline void doVertLowPass_altivec(uint8_t *src, int stride, PPContext *c)
    const vector signed int zero = vec_splat_s32(0);
    const int properStride = (stride % 16);
    const int srcAlign = ((unsigned long)src2 % 16);
    DECLARE_ALIGNED(16, short, qp[8]) = {c->QP};
    DECLARE_ALIGNED(16, short, qp)[8] = {c->QP};
    vector signed short vqp = vec_ld(0, qp);
    vector signed short vb0, vb1, vb2, vb3, vb4, vb5, vb6, vb7, vb8, vb9;
    vector unsigned char vbA0, av_uninit(vbA1), av_uninit(vbA2), av_uninit(vbA3), av_uninit(vbA4), av_uninit(vbA5), av_uninit(vbA6), av_uninit(vbA7), av_uninit(vbA8), vbA9;
@@ -418,7 +418,7 @@ static inline void doVertDefFilter_altivec(uint8_t src[], int stride, PPContext
    */
    uint8_t *src2 = src + stride*3;
    const vector signed int zero = vec_splat_s32(0);
    DECLARE_ALIGNED(16, short, qp[8]) = {8*c->QP};
    DECLARE_ALIGNED(16, short, qp)[8] = {8*c->QP};
    vector signed short vqp = vec_splat(
                                (vector signed short)vec_ld(0, qp), 0);

@@ -538,7 +538,7 @@ static inline void dering_altivec(uint8_t src[], int stride, PPContext *c) {
    src & stride :-(
    */
    uint8_t *srcCopy = src;
    DECLARE_ALIGNED(16, uint8_t, dt[16]);
    DECLARE_ALIGNED(16, uint8_t, dt)[16];
    const vector signed int zero = vec_splat_s32(0);
    vector unsigned char v_dt;
    dt[0] = deringThreshold;
@@ -602,7 +602,7 @@ static inline void dering_altivec(uint8_t src[], int stride, PPContext *c) {
    v_avg = vec_avg(v_min, v_max);
    }

    DECLARE_ALIGNED(16, signed int, S[8]);
    DECLARE_ALIGNED(16, signed int, S)[8];
    {
    const vector unsigned short mask1 = (vector unsigned short)
                                        {0x0001, 0x0002, 0x0004, 0x0008,
@@ -698,7 +698,7 @@ static inline void dering_altivec(uint8_t src[], int stride, PPContext *c) {
    /* I'm not sure the following is actually faster
       than straight, unvectorized C code :-( */

    DECLARE_ALIGNED(16, int, tQP2[4]);
    DECLARE_ALIGNED(16, int, tQP2)[4];
    tQP2[0]= c->QP/2 + 1;
    vector signed int vQP2 = vec_ld(0, tQP2);
    vQP2 = vec_splat(vQP2, 0);
--- a/libpostproc/postprocess_internal.h
+++ b/libpostproc/postprocess_internal.h
@@ -143,8 +143,8 @@ typedef struct PPContext{
    DECLARE_ALIGNED(8, uint64_t, pQPb);
    DECLARE_ALIGNED(8, uint64_t, pQPb2);

    DECLARE_ALIGNED(8, uint64_t, mmxDcOffset[64]);
    DECLARE_ALIGNED(8, uint64_t, mmxDcThreshold[64]);
    DECLARE_ALIGNED(8, uint64_t, mmxDcOffset)[64];
    DECLARE_ALIGNED(8, uint64_t, mmxDcThreshold)[64];

    QP_STORE_T *stdQPTable;       ///< used to fix MPEG2 style qscale
    QP_STORE_T *nonBQPTable;
--- a/libpostproc/postprocess_template.c
+++ b/libpostproc/postprocess_template.c
@@ -3514,7 +3514,7 @@ static void RENAME(postProcess)(const uint8_t src[], int srcStride, uint8_t dst[
                    horizX1Filter(dstBlock-4, stride, QP);
                else if(mode & H_DEBLOCK){
 #if HAVE_ALTIVEC
                    DECLARE_ALIGNED(16, unsigned char, tempBlock[272]);
                    DECLARE_ALIGNED(16, unsigned char, tempBlock)[272];
                    transpose_16x8_char_toPackedAlign_altivec(tempBlock, dstBlock - (4 + 1), stride);

                    const int t=vertClassify_altivec(tempBlock-48, 16, &c);