Originally committed as revision 21377 to svn://svn.ffmpeg.org/ffmpeg/trunktags/v0.6
@@ -133,8 +133,8 @@ typedef struct VideoState { | |||||
int audio_hw_buf_size; | int audio_hw_buf_size; | ||||
/* samples output by the codec. we reserve more space for avsync | /* samples output by the codec. we reserve more space for avsync | ||||
compensation */ | compensation */ | ||||
DECLARE_ALIGNED(16,uint8_t,audio_buf1[(AVCODEC_MAX_AUDIO_FRAME_SIZE * 3) / 2]); | |||||
DECLARE_ALIGNED(16,uint8_t,audio_buf2[(AVCODEC_MAX_AUDIO_FRAME_SIZE * 3) / 2]); | |||||
DECLARE_ALIGNED(16,uint8_t,audio_buf1)[(AVCODEC_MAX_AUDIO_FRAME_SIZE * 3) / 2]; | |||||
DECLARE_ALIGNED(16,uint8_t,audio_buf2)[(AVCODEC_MAX_AUDIO_FRAME_SIZE * 3) / 2]; | |||||
uint8_t *audio_buf; | uint8_t *audio_buf; | ||||
unsigned int audio_buf_size; /* in bytes */ | unsigned int audio_buf_size; /* in bytes */ | ||||
int audio_buf_index; /* in bytes */ | int audio_buf_index; /* in bytes */ | ||||
@@ -137,7 +137,7 @@ typedef struct FourXContext{ | |||||
int mv[256]; | int mv[256]; | ||||
VLC pre_vlc; | VLC pre_vlc; | ||||
int last_dc; | int last_dc; | ||||
DECLARE_ALIGNED_16(DCTELEM, block[6][64]); | |||||
DECLARE_ALIGNED_16(DCTELEM, block)[6][64]; | |||||
void *bitstream_buffer; | void *bitstream_buffer; | ||||
unsigned int bitstream_buffer_size; | unsigned int bitstream_buffer_size; | ||||
int version; | int version; | ||||
@@ -214,9 +214,9 @@ typedef struct { | |||||
float sf[120]; ///< scalefactors | float sf[120]; ///< scalefactors | ||||
int sf_idx[128]; ///< scalefactor indices (used by encoder) | int sf_idx[128]; ///< scalefactor indices (used by encoder) | ||||
uint8_t zeroes[128]; ///< band is not coded (used by encoder) | uint8_t zeroes[128]; ///< band is not coded (used by encoder) | ||||
DECLARE_ALIGNED_16(float, coeffs[1024]); ///< coefficients for IMDCT | |||||
DECLARE_ALIGNED_16(float, saved[1024]); ///< overlap | |||||
DECLARE_ALIGNED_16(float, ret[1024]); ///< PCM output | |||||
DECLARE_ALIGNED_16(float, coeffs)[1024]; ///< coefficients for IMDCT | |||||
DECLARE_ALIGNED_16(float, saved)[1024]; ///< overlap | |||||
DECLARE_ALIGNED_16(float, ret)[1024]; ///< PCM output | |||||
PredictorState predictor_state[MAX_PREDICTORS]; | PredictorState predictor_state[MAX_PREDICTORS]; | ||||
} SingleChannelElement; | } SingleChannelElement; | ||||
@@ -261,7 +261,7 @@ typedef struct { | |||||
* @defgroup temporary aligned temporary buffers (We do not want to have these on the stack.) | * @defgroup temporary aligned temporary buffers (We do not want to have these on the stack.) | ||||
* @{ | * @{ | ||||
*/ | */ | ||||
DECLARE_ALIGNED_16(float, buf_mdct[1024]); | |||||
DECLARE_ALIGNED_16(float, buf_mdct)[1024]; | |||||
/** @} */ | /** @} */ | ||||
/** | /** | ||||
@@ -284,7 +284,7 @@ typedef struct { | |||||
int sf_offset; ///< offset into pow2sf_tab as appropriate for dsp.float_to_int16 | int sf_offset; ///< offset into pow2sf_tab as appropriate for dsp.float_to_int16 | ||||
/** @} */ | /** @} */ | ||||
DECLARE_ALIGNED(16, float, temp[128]); | |||||
DECLARE_ALIGNED(16, float, temp)[128]; | |||||
enum OCStatus output_configured; | enum OCStatus output_configured; | ||||
} AACContext; | } AACContext; | ||||
@@ -52,7 +52,7 @@ typedef struct AACEncContext { | |||||
FFTContext mdct1024; ///< long (1024 samples) frame transform context | FFTContext mdct1024; ///< long (1024 samples) frame transform context | ||||
FFTContext mdct128; ///< short (128 samples) frame transform context | FFTContext mdct128; ///< short (128 samples) frame transform context | ||||
DSPContext dsp; | DSPContext dsp; | ||||
DECLARE_ALIGNED_16(FFTSample, output[2048]); ///< temporary buffer for MDCT input coefficients | |||||
DECLARE_ALIGNED_16(FFTSample, output)[2048]; ///< temporary buffer for MDCT input coefficients | |||||
int16_t* samples; ///< saved preprocessed input | int16_t* samples; ///< saved preprocessed input | ||||
int samplerate_index; ///< MPEG-4 samplerate index | int samplerate_index; ///< MPEG-4 samplerate index | ||||
@@ -64,8 +64,8 @@ typedef struct AACEncContext { | |||||
int cur_channel; | int cur_channel; | ||||
int last_frame; | int last_frame; | ||||
float lambda; | float lambda; | ||||
DECLARE_ALIGNED_16(int, qcoefs[96][2]); ///< quantized coefficients | |||||
DECLARE_ALIGNED_16(float, scoefs[1024]); ///< scaled coefficients | |||||
DECLARE_ALIGNED_16(int, qcoefs)[96][2]; ///< quantized coefficients | |||||
DECLARE_ALIGNED_16(float, scoefs)[1024]; ///< scaled coefficients | |||||
} AACEncContext; | } AACEncContext; | ||||
#endif /* AVCODEC_AACENC_H */ | #endif /* AVCODEC_AACENC_H */ |
@@ -32,8 +32,8 @@ | |||||
#include <stdint.h> | #include <stdint.h> | ||||
DECLARE_ALIGNED(16, float, ff_aac_kbd_long_1024[1024]); | |||||
DECLARE_ALIGNED(16, float, ff_aac_kbd_short_128[128]); | |||||
DECLARE_ALIGNED(16, float, ff_aac_kbd_long_1024)[1024]; | |||||
DECLARE_ALIGNED(16, float, ff_aac_kbd_short_128)[128]; | |||||
const uint8_t ff_aac_num_swb_1024[] = { | const uint8_t ff_aac_num_swb_1024[] = { | ||||
41, 41, 47, 49, 49, 51, 47, 47, 43, 43, 43, 40, 40 | 41, 41, 47, 49, 49, 51, 47, 47, 43, 43, 43, 40, 40 | ||||
@@ -409,7 +409,7 @@ const uint16_t ff_aac_spectral_sizes[11] = { | |||||
* 64.0f is a special value indicating the existence of an escape code in the | * 64.0f is a special value indicating the existence of an escape code in the | ||||
* bitstream. | * bitstream. | ||||
*/ | */ | ||||
static const DECLARE_ALIGNED_16(float, codebook_vector0[324]) = { | |||||
static const DECLARE_ALIGNED_16(float, codebook_vector0)[324] = { | |||||
-1.0000000, -1.0000000, -1.0000000, -1.0000000, | -1.0000000, -1.0000000, -1.0000000, -1.0000000, | ||||
-1.0000000, -1.0000000, -1.0000000, 0.0000000, | -1.0000000, -1.0000000, -1.0000000, 0.0000000, | ||||
-1.0000000, -1.0000000, -1.0000000, 1.0000000, | -1.0000000, -1.0000000, -1.0000000, 1.0000000, | ||||
@@ -493,7 +493,7 @@ static const DECLARE_ALIGNED_16(float, codebook_vector0[324]) = { | |||||
1.0000000, 1.0000000, 1.0000000, 1.0000000, | 1.0000000, 1.0000000, 1.0000000, 1.0000000, | ||||
}; | }; | ||||
static const DECLARE_ALIGNED_16(float, codebook_vector2[324]) = { | |||||
static const DECLARE_ALIGNED_16(float, codebook_vector2)[324] = { | |||||
0.0000000, 0.0000000, 0.0000000, 0.0000000, | 0.0000000, 0.0000000, 0.0000000, 0.0000000, | ||||
0.0000000, 0.0000000, 0.0000000, 1.0000000, | 0.0000000, 0.0000000, 0.0000000, 1.0000000, | ||||
0.0000000, 0.0000000, 0.0000000, 2.5198421, | 0.0000000, 0.0000000, 0.0000000, 2.5198421, | ||||
@@ -577,7 +577,7 @@ static const DECLARE_ALIGNED_16(float, codebook_vector2[324]) = { | |||||
2.5198421, 2.5198421, 2.5198421, 2.5198421, | 2.5198421, 2.5198421, 2.5198421, 2.5198421, | ||||
}; | }; | ||||
static const DECLARE_ALIGNED_16(float, codebook_vector4[162]) = { | |||||
static const DECLARE_ALIGNED_16(float, codebook_vector4)[162] = { | |||||
-6.3496042, -6.3496042, -6.3496042, -4.3267487, | -6.3496042, -6.3496042, -6.3496042, -4.3267487, | ||||
-6.3496042, -2.5198421, -6.3496042, -1.0000000, | -6.3496042, -2.5198421, -6.3496042, -1.0000000, | ||||
-6.3496042, 0.0000000, -6.3496042, 1.0000000, | -6.3496042, 0.0000000, -6.3496042, 1.0000000, | ||||
@@ -621,7 +621,7 @@ static const DECLARE_ALIGNED_16(float, codebook_vector4[162]) = { | |||||
6.3496042, 6.3496042, | 6.3496042, 6.3496042, | ||||
}; | }; | ||||
static const DECLARE_ALIGNED_16(float, codebook_vector6[128]) = { | |||||
static const DECLARE_ALIGNED_16(float, codebook_vector6)[128] = { | |||||
0.0000000, 0.0000000, 0.0000000, 1.0000000, | 0.0000000, 0.0000000, 0.0000000, 1.0000000, | ||||
0.0000000, 2.5198421, 0.0000000, 4.3267487, | 0.0000000, 2.5198421, 0.0000000, 4.3267487, | ||||
0.0000000, 6.3496042, 0.0000000, 8.5498797, | 0.0000000, 6.3496042, 0.0000000, 8.5498797, | ||||
@@ -656,7 +656,7 @@ static const DECLARE_ALIGNED_16(float, codebook_vector6[128]) = { | |||||
13.3905183, 10.9027236, 13.3905183, 13.3905183, | 13.3905183, 10.9027236, 13.3905183, 13.3905183, | ||||
}; | }; | ||||
static const DECLARE_ALIGNED_16(float, codebook_vector8[338]) = { | |||||
static const DECLARE_ALIGNED_16(float, codebook_vector8)[338] = { | |||||
0.0000000, 0.0000000, 0.0000000, 1.0000000, | 0.0000000, 0.0000000, 0.0000000, 1.0000000, | ||||
0.0000000, 2.5198421, 0.0000000, 4.3267487, | 0.0000000, 2.5198421, 0.0000000, 4.3267487, | ||||
0.0000000, 6.3496042, 0.0000000, 8.5498797, | 0.0000000, 6.3496042, 0.0000000, 8.5498797, | ||||
@@ -744,7 +744,7 @@ static const DECLARE_ALIGNED_16(float, codebook_vector8[338]) = { | |||||
27.4731418, 27.4731418, | 27.4731418, 27.4731418, | ||||
}; | }; | ||||
static const DECLARE_ALIGNED_16(float, codebook_vector10[578]) = { | |||||
static const DECLARE_ALIGNED_16(float, codebook_vector10)[578] = { | |||||
0.0000000, 0.0000000, 0.0000000, 1.0000000, | 0.0000000, 0.0000000, 0.0000000, 1.0000000, | ||||
0.0000000, 2.5198421, 0.0000000, 4.3267487, | 0.0000000, 2.5198421, 0.0000000, 4.3267487, | ||||
0.0000000, 6.3496042, 0.0000000, 8.5498797, | 0.0000000, 6.3496042, 0.0000000, 8.5498797, | ||||
@@ -43,8 +43,8 @@ | |||||
/* @name window coefficients | /* @name window coefficients | ||||
* @{ | * @{ | ||||
*/ | */ | ||||
DECLARE_ALIGNED(16, extern float, ff_aac_kbd_long_1024[1024]); | |||||
DECLARE_ALIGNED(16, extern float, ff_aac_kbd_short_128[128]); | |||||
DECLARE_ALIGNED(16, extern float, ff_aac_kbd_long_1024)[1024]; | |||||
DECLARE_ALIGNED(16, extern float, ff_aac_kbd_short_128)[128]; | |||||
// @} | // @} | ||||
/* @name number of scalefactor window bands for long and short transform windows respectively | /* @name number of scalefactor window bands for long and short transform windows respectively | ||||
@@ -157,12 +157,12 @@ typedef struct { | |||||
///@} | ///@} | ||||
///@defgroup arrays aligned arrays | ///@defgroup arrays aligned arrays | ||||
DECLARE_ALIGNED_16(int, fixed_coeffs[AC3_MAX_CHANNELS][AC3_MAX_COEFS]); ///> fixed-point transform coefficients | |||||
DECLARE_ALIGNED_16(float, transform_coeffs[AC3_MAX_CHANNELS][AC3_MAX_COEFS]); ///< transform coefficients | |||||
DECLARE_ALIGNED_16(float, delay[AC3_MAX_CHANNELS][AC3_BLOCK_SIZE]); ///< delay - added to the next block | |||||
DECLARE_ALIGNED_16(float, window[AC3_BLOCK_SIZE]); ///< window coefficients | |||||
DECLARE_ALIGNED_16(float, tmp_output[AC3_BLOCK_SIZE]); ///< temporary storage for output before windowing | |||||
DECLARE_ALIGNED_16(float, output[AC3_MAX_CHANNELS][AC3_BLOCK_SIZE]); ///< output after imdct transform and windowing | |||||
DECLARE_ALIGNED_16(int, fixed_coeffs)[AC3_MAX_CHANNELS][AC3_MAX_COEFS]; ///> fixed-point transform coefficients | |||||
DECLARE_ALIGNED_16(float, transform_coeffs)[AC3_MAX_CHANNELS][AC3_MAX_COEFS]; ///< transform coefficients | |||||
DECLARE_ALIGNED_16(float, delay)[AC3_MAX_CHANNELS][AC3_BLOCK_SIZE]; ///< delay - added to the next block | |||||
DECLARE_ALIGNED_16(float, window)[AC3_BLOCK_SIZE]; ///< window coefficients | |||||
DECLARE_ALIGNED_16(float, tmp_output)[AC3_BLOCK_SIZE]; ///< temporary storage for output before windowing | |||||
DECLARE_ALIGNED_16(float, output)[AC3_MAX_CHANNELS][AC3_BLOCK_SIZE]; ///< output after imdct transform and windowing | |||||
///@} | ///@} | ||||
} AC3DecodeContext; | } AC3DecodeContext; | ||||
@@ -48,7 +48,7 @@ typedef struct ASV1Context{ | |||||
int mb_height; | int mb_height; | ||||
int mb_width2; | int mb_width2; | ||||
int mb_height2; | int mb_height2; | ||||
DECLARE_ALIGNED_16(DCTELEM, block[6][64]); | |||||
DECLARE_ALIGNED_16(DCTELEM, block)[6][64]; | |||||
uint16_t intra_matrix[64]; | uint16_t intra_matrix[64]; | ||||
int q_intra_matrix[64]; | int q_intra_matrix[64]; | ||||
uint8_t *bitstream_buffer; | uint8_t *bitstream_buffer; | ||||
@@ -58,11 +58,11 @@ typedef struct { | |||||
int log2_block_count[AT1_QMF_BANDS]; ///< log2 number of blocks in a band | int log2_block_count[AT1_QMF_BANDS]; ///< log2 number of blocks in a band | ||||
int num_bfus; ///< number of Block Floating Units | int num_bfus; ///< number of Block Floating Units | ||||
float* spectrum[2]; | float* spectrum[2]; | ||||
DECLARE_ALIGNED_16(float, spec1[AT1_SU_SAMPLES]); ///< mdct buffer | |||||
DECLARE_ALIGNED_16(float, spec2[AT1_SU_SAMPLES]); ///< mdct buffer | |||||
DECLARE_ALIGNED_16(float, fst_qmf_delay[46]); ///< delay line for the 1st stacked QMF filter | |||||
DECLARE_ALIGNED_16(float, snd_qmf_delay[46]); ///< delay line for the 2nd stacked QMF filter | |||||
DECLARE_ALIGNED_16(float, last_qmf_delay[256+23]); ///< delay line for the last stacked QMF filter | |||||
DECLARE_ALIGNED_16(float, spec1)[AT1_SU_SAMPLES]; ///< mdct buffer | |||||
DECLARE_ALIGNED_16(float, spec2)[AT1_SU_SAMPLES]; ///< mdct buffer | |||||
DECLARE_ALIGNED_16(float, fst_qmf_delay)[46]; ///< delay line for the 1st stacked QMF filter | |||||
DECLARE_ALIGNED_16(float, snd_qmf_delay)[46]; ///< delay line for the 2nd stacked QMF filter | |||||
DECLARE_ALIGNED_16(float, last_qmf_delay)[256+23]; ///< delay line for the last stacked QMF filter | |||||
} AT1SUCtx; | } AT1SUCtx; | ||||
/** | /** | ||||
@@ -70,13 +70,13 @@ typedef struct { | |||||
*/ | */ | ||||
typedef struct { | typedef struct { | ||||
AT1SUCtx SUs[AT1_MAX_CHANNELS]; ///< channel sound unit | AT1SUCtx SUs[AT1_MAX_CHANNELS]; ///< channel sound unit | ||||
DECLARE_ALIGNED_16(float, spec[AT1_SU_SAMPLES]); ///< the mdct spectrum buffer | |||||
DECLARE_ALIGNED_16(float, spec)[AT1_SU_SAMPLES]; ///< the mdct spectrum buffer | |||||
DECLARE_ALIGNED_16(float, low[256]); | |||||
DECLARE_ALIGNED_16(float, mid[256]); | |||||
DECLARE_ALIGNED_16(float, high[512]); | |||||
DECLARE_ALIGNED_16(float, low)[256]; | |||||
DECLARE_ALIGNED_16(float, mid)[256]; | |||||
DECLARE_ALIGNED_16(float, high)[512]; | |||||
float* bands[3]; | float* bands[3]; | ||||
DECLARE_ALIGNED_16(float, out_samples[AT1_MAX_CHANNELS][AT1_SU_SAMPLES]); | |||||
DECLARE_ALIGNED_16(float, out_samples)[AT1_MAX_CHANNELS][AT1_SU_SAMPLES]; | |||||
FFTContext mdct_ctx[3]; | FFTContext mdct_ctx[3]; | ||||
int channels; | int channels; | ||||
DSPContext dsp; | DSPContext dsp; | ||||
@@ -73,8 +73,8 @@ typedef struct { | |||||
int gcBlkSwitch; | int gcBlkSwitch; | ||||
gain_block gainBlock[2]; | gain_block gainBlock[2]; | ||||
DECLARE_ALIGNED_16(float, spectrum[1024]); | |||||
DECLARE_ALIGNED_16(float, IMDCT_buf[1024]); | |||||
DECLARE_ALIGNED_16(float, spectrum)[1024]; | |||||
DECLARE_ALIGNED_16(float, IMDCT_buf)[1024]; | |||||
float delayBuf1[46]; ///<qmf delay buffers | float delayBuf1[46]; ///<qmf delay buffers | ||||
float delayBuf2[46]; | float delayBuf2[46]; | ||||
@@ -119,7 +119,7 @@ typedef struct { | |||||
//@} | //@} | ||||
} ATRAC3Context; | } ATRAC3Context; | ||||
static DECLARE_ALIGNED_16(float,mdct_window[512]); | |||||
static DECLARE_ALIGNED_16(float,mdct_window)[512]; | |||||
static VLC spectral_coeff_tab[7]; | static VLC spectral_coeff_tab[7]; | ||||
static float gain_tab1[16]; | static float gain_tab1[16]; | ||||
static float gain_tab2[31]; | static float gain_tab2[31]; | ||||
@@ -73,7 +73,7 @@ static inline int get_bs(cavs_vector *mvP, cavs_vector *mvQ, int b) { | |||||
* | * | ||||
*/ | */ | ||||
void ff_cavs_filter(AVSContext *h, enum cavs_mb mb_type) { | void ff_cavs_filter(AVSContext *h, enum cavs_mb mb_type) { | ||||
DECLARE_ALIGNED_8(uint8_t, bs[8]); | |||||
DECLARE_ALIGNED_8(uint8_t, bs)[8]; | |||||
int qp_avg, alpha, beta, tc; | int qp_avg, alpha, beta, tc; | ||||
int i; | int i; | ||||
@@ -150,7 +150,7 @@ typedef struct cook { | |||||
/* data buffers */ | /* data buffers */ | ||||
uint8_t* decoded_bytes_buffer; | uint8_t* decoded_bytes_buffer; | ||||
DECLARE_ALIGNED_16(float,mono_mdct_output[2048]); | |||||
DECLARE_ALIGNED_16(float,mono_mdct_output)[2048]; | |||||
float decode_buffer_1[1024]; | float decode_buffer_1[1024]; | ||||
float decode_buffer_2[1024]; | float decode_buffer_2[1024]; | ||||
float decode_buffer_0[1060]; /* static allocation for joint decode */ | float decode_buffer_0[1060]; /* static allocation for joint decode */ | ||||
@@ -228,16 +228,16 @@ typedef struct { | |||||
/* Subband samples history (for ADPCM) */ | /* Subband samples history (for ADPCM) */ | ||||
float subband_samples_hist[DCA_PRIM_CHANNELS_MAX][DCA_SUBBANDS][4]; | float subband_samples_hist[DCA_PRIM_CHANNELS_MAX][DCA_SUBBANDS][4]; | ||||
DECLARE_ALIGNED_16(float, subband_fir_hist[DCA_PRIM_CHANNELS_MAX][512]); | |||||
DECLARE_ALIGNED_16(float, subband_fir_hist)[DCA_PRIM_CHANNELS_MAX][512]; | |||||
float subband_fir_noidea[DCA_PRIM_CHANNELS_MAX][32]; | float subband_fir_noidea[DCA_PRIM_CHANNELS_MAX][32]; | ||||
int hist_index[DCA_PRIM_CHANNELS_MAX]; | int hist_index[DCA_PRIM_CHANNELS_MAX]; | ||||
DECLARE_ALIGNED_16(float, raXin[32]); | |||||
DECLARE_ALIGNED_16(float, raXin)[32]; | |||||
int output; ///< type of output | int output; ///< type of output | ||||
float add_bias; ///< output bias | float add_bias; ///< output bias | ||||
float scale_bias; ///< output scale | float scale_bias; ///< output scale | ||||
DECLARE_ALIGNED_16(float, samples[1536]); /* 6 * 256 = 1536, might only need 5 */ | |||||
DECLARE_ALIGNED_16(float, samples)[1536]; /* 6 * 256 = 1536, might only need 5 */ | |||||
const float *samples_chanptr[6]; | const float *samples_chanptr[6]; | ||||
uint8_t dca_buffer[DCA_MAX_FRAME_SIZE]; | uint8_t dca_buffer[DCA_MAX_FRAME_SIZE]; | ||||
@@ -186,9 +186,9 @@ static void idct_mmx_init(void) | |||||
} | } | ||||
} | } | ||||
DECLARE_ALIGNED(16, static DCTELEM, block[64]); | |||||
DECLARE_ALIGNED(8, static DCTELEM, block1[64]); | |||||
DECLARE_ALIGNED(8, static DCTELEM, block_org[64]); | |||||
DECLARE_ALIGNED(16, static DCTELEM, block)[64]; | |||||
DECLARE_ALIGNED(8, static DCTELEM, block1)[64]; | |||||
DECLARE_ALIGNED(8, static DCTELEM, block_org)[64]; | |||||
static inline void mmx_emms(void) | static inline void mmx_emms(void) | ||||
{ | { | ||||
@@ -384,8 +384,8 @@ static void dct_error(const char *name, int is_idct, | |||||
#endif | #endif | ||||
} | } | ||||
DECLARE_ALIGNED(8, static uint8_t, img_dest[64]); | |||||
DECLARE_ALIGNED(8, static uint8_t, img_dest1[64]); | |||||
DECLARE_ALIGNED(8, static uint8_t, img_dest)[64]; | |||||
DECLARE_ALIGNED(8, static uint8_t, img_dest1)[64]; | |||||
static void idct248_ref(uint8_t *dest, int linesize, int16_t *block) | static void idct248_ref(uint8_t *dest, int linesize, int16_t *block) | ||||
{ | { | ||||
@@ -39,7 +39,7 @@ typedef struct { | |||||
VLC ac_vlc, dc_vlc, run_vlc; | VLC ac_vlc, dc_vlc, run_vlc; | ||||
int last_dc[3]; | int last_dc[3]; | ||||
DSPContext dsp; | DSPContext dsp; | ||||
DECLARE_ALIGNED_16(DCTELEM, blocks[8][64]); | |||||
DECLARE_ALIGNED_16(DCTELEM, blocks)[8][64]; | |||||
ScanTable scantable; | ScanTable scantable; | ||||
const CIDEntry *cid_table; | const CIDEntry *cid_table; | ||||
} DNXHDContext; | } DNXHDContext; | ||||
@@ -414,7 +414,7 @@ static int dnxhd_calc_bits_thread(AVCodecContext *avctx, void *arg, int jobnr, i | |||||
dnxhd_get_blocks(ctx, mb_x, mb_y); | dnxhd_get_blocks(ctx, mb_x, mb_y); | ||||
for (i = 0; i < 8; i++) { | for (i = 0; i < 8; i++) { | ||||
DECLARE_ALIGNED_16(DCTELEM, block[64]); | |||||
DECLARE_ALIGNED_16(DCTELEM, block)[64]; | |||||
DCTELEM *src_block = ctx->blocks[i]; | DCTELEM *src_block = ctx->blocks[i]; | ||||
int overflow, nbits, diff, last_index; | int overflow, nbits, diff, last_index; | ||||
int n = dnxhd_switch_matrix(ctx, i); | int n = dnxhd_switch_matrix(ctx, i); | ||||
@@ -55,7 +55,7 @@ typedef struct DNXHDEncContext { | |||||
int interlaced; | int interlaced; | ||||
int cur_field; | int cur_field; | ||||
DECLARE_ALIGNED_16(DCTELEM, blocks[8][64]); | |||||
DECLARE_ALIGNED_16(DCTELEM, blocks)[8][64]; | |||||
int (*qmatrix_c) [64]; | int (*qmatrix_c) [64]; | ||||
int (*qmatrix_l) [64]; | int (*qmatrix_l) [64]; | ||||
@@ -87,7 +87,7 @@ const uint8_t ff_zigzag248_direct[64] = { | |||||
}; | }; | ||||
/* not permutated inverse zigzag_direct + 1 for MMX quantizer */ | /* not permutated inverse zigzag_direct + 1 for MMX quantizer */ | ||||
DECLARE_ALIGNED_16(uint16_t, inv_zigzag_direct16[64]); | |||||
DECLARE_ALIGNED_8(uint16_t, inv_zigzag_direct16)[64]; | |||||
const uint8_t ff_alternate_horizontal_scan[64] = { | const uint8_t ff_alternate_horizontal_scan[64] = { | ||||
0, 1, 2, 3, 8, 9, 16, 17, | 0, 1, 2, 3, 8, 9, 16, 17, | ||||
@@ -3788,7 +3788,7 @@ static int hadamard8_intra8x8_c(/*MpegEncContext*/ void *s, uint8_t *src, uint8_ | |||||
static int dct_sad8x8_c(/*MpegEncContext*/ void *c, uint8_t *src1, uint8_t *src2, int stride, int h){ | static int dct_sad8x8_c(/*MpegEncContext*/ void *c, uint8_t *src1, uint8_t *src2, int stride, int h){ | ||||
MpegEncContext * const s= (MpegEncContext *)c; | MpegEncContext * const s= (MpegEncContext *)c; | ||||
DECLARE_ALIGNED_16(uint64_t, aligned_temp[sizeof(DCTELEM)*64/8]); | |||||
DECLARE_ALIGNED_16(uint64_t, aligned_temp)[sizeof(DCTELEM)*64/8]; | |||||
DCTELEM * const temp= (DCTELEM*)aligned_temp; | DCTELEM * const temp= (DCTELEM*)aligned_temp; | ||||
assert(h==8); | assert(h==8); | ||||
@@ -3853,7 +3853,7 @@ static int dct264_sad8x8_c(/*MpegEncContext*/ void *c, uint8_t *src1, uint8_t *s | |||||
static int dct_max8x8_c(/*MpegEncContext*/ void *c, uint8_t *src1, uint8_t *src2, int stride, int h){ | static int dct_max8x8_c(/*MpegEncContext*/ void *c, uint8_t *src1, uint8_t *src2, int stride, int h){ | ||||
MpegEncContext * const s= (MpegEncContext *)c; | MpegEncContext * const s= (MpegEncContext *)c; | ||||
DECLARE_ALIGNED_16(uint64_t, aligned_temp[sizeof(DCTELEM)*64/8]); | |||||
DECLARE_ALIGNED_16(uint64_t, aligned_temp)[sizeof(DCTELEM)*64/8]; | |||||
DCTELEM * const temp= (DCTELEM*)aligned_temp; | DCTELEM * const temp= (DCTELEM*)aligned_temp; | ||||
int sum=0, i; | int sum=0, i; | ||||
@@ -3870,7 +3870,7 @@ static int dct_max8x8_c(/*MpegEncContext*/ void *c, uint8_t *src1, uint8_t *src2 | |||||
static int quant_psnr8x8_c(/*MpegEncContext*/ void *c, uint8_t *src1, uint8_t *src2, int stride, int h){ | static int quant_psnr8x8_c(/*MpegEncContext*/ void *c, uint8_t *src1, uint8_t *src2, int stride, int h){ | ||||
MpegEncContext * const s= (MpegEncContext *)c; | MpegEncContext * const s= (MpegEncContext *)c; | ||||
DECLARE_ALIGNED_16(uint64_t, aligned_temp[sizeof(DCTELEM)*64*2/8]); | |||||
DECLARE_ALIGNED_16(uint64_t, aligned_temp)[sizeof(DCTELEM)*64*2/8]; | |||||
DCTELEM * const temp= (DCTELEM*)aligned_temp; | DCTELEM * const temp= (DCTELEM*)aligned_temp; | ||||
DCTELEM * const bak = ((DCTELEM*)aligned_temp)+64; | DCTELEM * const bak = ((DCTELEM*)aligned_temp)+64; | ||||
int sum=0, i; | int sum=0, i; | ||||
@@ -3895,9 +3895,9 @@ static int quant_psnr8x8_c(/*MpegEncContext*/ void *c, uint8_t *src1, uint8_t *s | |||||
static int rd8x8_c(/*MpegEncContext*/ void *c, uint8_t *src1, uint8_t *src2, int stride, int h){ | static int rd8x8_c(/*MpegEncContext*/ void *c, uint8_t *src1, uint8_t *src2, int stride, int h){ | ||||
MpegEncContext * const s= (MpegEncContext *)c; | MpegEncContext * const s= (MpegEncContext *)c; | ||||
const uint8_t *scantable= s->intra_scantable.permutated; | const uint8_t *scantable= s->intra_scantable.permutated; | ||||
DECLARE_ALIGNED_16(uint64_t, aligned_temp[sizeof(DCTELEM)*64/8]); | |||||
DECLARE_ALIGNED_16(uint64_t, aligned_src1[8]); | |||||
DECLARE_ALIGNED_16(uint64_t, aligned_src2[8]); | |||||
DECLARE_ALIGNED_16(uint64_t, aligned_temp)[sizeof(DCTELEM)*64/8]; | |||||
DECLARE_ALIGNED_16(uint64_t, aligned_src1)[8]; | |||||
DECLARE_ALIGNED_16(uint64_t, aligned_src2)[8]; | |||||
DCTELEM * const temp= (DCTELEM*)aligned_temp; | DCTELEM * const temp= (DCTELEM*)aligned_temp; | ||||
uint8_t * const lsrc1 = (uint8_t*)aligned_src1; | uint8_t * const lsrc1 = (uint8_t*)aligned_src1; | ||||
uint8_t * const lsrc2 = (uint8_t*)aligned_src2; | uint8_t * const lsrc2 = (uint8_t*)aligned_src2; | ||||
@@ -3974,7 +3974,7 @@ static int rd8x8_c(/*MpegEncContext*/ void *c, uint8_t *src1, uint8_t *src2, int | |||||
static int bit8x8_c(/*MpegEncContext*/ void *c, uint8_t *src1, uint8_t *src2, int stride, int h){ | static int bit8x8_c(/*MpegEncContext*/ void *c, uint8_t *src1, uint8_t *src2, int stride, int h){ | ||||
MpegEncContext * const s= (MpegEncContext *)c; | MpegEncContext * const s= (MpegEncContext *)c; | ||||
const uint8_t *scantable= s->intra_scantable.permutated; | const uint8_t *scantable= s->intra_scantable.permutated; | ||||
DECLARE_ALIGNED_16(uint64_t, aligned_temp[sizeof(DCTELEM)*64/8]); | |||||
DECLARE_ALIGNED_16(uint64_t, aligned_temp)[sizeof(DCTELEM)*64/8]; | |||||
DCTELEM * const temp= (DCTELEM*)aligned_temp; | DCTELEM * const temp= (DCTELEM*)aligned_temp; | ||||
int i, last, run, bits, level, start_i; | int i, last, run, bits, level, start_i; | ||||
const int esc_length= s->ac_esc_length; | const int esc_length= s->ac_esc_length; | ||||
@@ -178,7 +178,7 @@ typedef struct ScanTable{ | |||||
uint8_t raster_end[64]; | uint8_t raster_end[64]; | ||||
#if ARCH_PPC | #if ARCH_PPC | ||||
/** Used by dct_quantize_altivec to find last-non-zero */ | /** Used by dct_quantize_altivec to find last-non-zero */ | ||||
DECLARE_ALIGNED(16, uint8_t, inverse[64]); | |||||
DECLARE_ALIGNED(16, uint8_t, inverse)[64]; | |||||
#endif | #endif | ||||
} ScanTable; | } ScanTable; | ||||
@@ -656,8 +656,8 @@ void dsputil_init_ppc(DSPContext* c, AVCodecContext *avctx); | |||||
void dsputil_init_sh4(DSPContext* c, AVCodecContext *avctx); | void dsputil_init_sh4(DSPContext* c, AVCodecContext *avctx); | ||||
void dsputil_init_vis(DSPContext* c, AVCodecContext *avctx); | void dsputil_init_vis(DSPContext* c, AVCodecContext *avctx); | ||||
#define DECLARE_ALIGNED_16(t, v) DECLARE_ALIGNED(16, t, v) | |||||
#define DECLARE_ALIGNED_8(t, v) DECLARE_ALIGNED(8, t, v) | |||||
#define DECLARE_ALIGNED_16(t, v, ...) DECLARE_ALIGNED(16, t, v) | |||||
#define DECLARE_ALIGNED_8(t, v, ...) DECLARE_ALIGNED(8, t, v) | |||||
#if HAVE_MMX | #if HAVE_MMX | ||||
@@ -749,11 +749,11 @@ typedef struct FFTContext { | |||||
#endif | #endif | ||||
#define COSTABLE(size) \ | #define COSTABLE(size) \ | ||||
COSTABLE_CONST DECLARE_ALIGNED_16(FFTSample, ff_cos_##size[size/2]) | |||||
COSTABLE_CONST DECLARE_ALIGNED_16(FFTSample, ff_cos_##size)[size/2] | |||||
#define SINTABLE(size) \ | #define SINTABLE(size) \ | ||||
SINTABLE_CONST DECLARE_ALIGNED_16(FFTSample, ff_sin_##size[size/2]) | |||||
SINTABLE_CONST DECLARE_ALIGNED_16(FFTSample, ff_sin_##size)[size/2] | |||||
#define SINETABLE(size) \ | #define SINETABLE(size) \ | ||||
SINETABLE_CONST DECLARE_ALIGNED_16(float, ff_sine_##size[size]) | |||||
SINETABLE_CONST DECLARE_ALIGNED_16(float, ff_sine_##size)[size] | |||||
extern COSTABLE(16); | extern COSTABLE(16); | ||||
extern COSTABLE(32); | extern COSTABLE(32); | ||||
extern COSTABLE(64); | extern COSTABLE(64); | ||||
@@ -532,9 +532,9 @@ static int dv_decode_video_segment(AVCodecContext *avctx, void *arg) | |||||
PutBitContext pb, vs_pb; | PutBitContext pb, vs_pb; | ||||
GetBitContext gb; | GetBitContext gb; | ||||
BlockInfo mb_data[5 * DV_MAX_BPM], *mb, *mb1; | BlockInfo mb_data[5 * DV_MAX_BPM], *mb, *mb1; | ||||
DECLARE_ALIGNED_16(DCTELEM, sblock[5*DV_MAX_BPM][64]); | |||||
DECLARE_ALIGNED_16(uint8_t, mb_bit_buffer[80 + 4]); /* allow some slack */ | |||||
DECLARE_ALIGNED_16(uint8_t, vs_bit_buffer[5 * 80 + 4]); /* allow some slack */ | |||||
DECLARE_ALIGNED_16(DCTELEM, sblock)[5*DV_MAX_BPM][64]; | |||||
DECLARE_ALIGNED_16(uint8_t, mb_bit_buffer)[80 + 4]; /* allow some slack */ | |||||
DECLARE_ALIGNED_16(uint8_t, vs_bit_buffer)[5 * 80 + 4]; /* allow some slack */ | |||||
const int log2_blocksize = 3-s->avctx->lowres; | const int log2_blocksize = 3-s->avctx->lowres; | ||||
int is_field_mode[5]; | int is_field_mode[5]; | ||||
@@ -833,7 +833,7 @@ static av_always_inline int dv_init_enc_block(EncBlockInfo* bi, uint8_t *data, i | |||||
{ | { | ||||
const int *weight; | const int *weight; | ||||
const uint8_t* zigzag_scan; | const uint8_t* zigzag_scan; | ||||
DECLARE_ALIGNED_16(DCTELEM, blk[64]); | |||||
DECLARE_ALIGNED_16(DCTELEM, blk)[64]; | |||||
int i, area; | int i, area; | ||||
/* We offer two different methods for class number assignment: the | /* We offer two different methods for class number assignment: the | ||||
method suggested in SMPTE 314M Table 22, and an improved | method suggested in SMPTE 314M Table 22, and an improved | ||||
@@ -46,7 +46,7 @@ typedef struct MadContext { | |||||
AVFrame last_frame; | AVFrame last_frame; | ||||
void *bitstream_buf; | void *bitstream_buf; | ||||
unsigned int bitstream_buf_size; | unsigned int bitstream_buf_size; | ||||
DECLARE_ALIGNED_16(DCTELEM, block[64]); | |||||
DECLARE_ALIGNED_16(DCTELEM, block)[64]; | |||||
} MadContext; | } MadContext; | ||||
static void bswap16_buf(uint16_t *dst, const uint16_t *src, int count) | static void bswap16_buf(uint16_t *dst, const uint16_t *src, int count) | ||||
@@ -42,7 +42,7 @@ typedef struct TgqContext { | |||||
int width,height; | int width,height; | ||||
ScanTable scantable; | ScanTable scantable; | ||||
int qtable[64]; | int qtable[64]; | ||||
DECLARE_ALIGNED_16(DCTELEM, block[6][64]); | |||||
DECLARE_ALIGNED_16(DCTELEM, block)[6][64]; | |||||
} TgqContext; | } TgqContext; | ||||
static av_cold int tgq_decode_init(AVCodecContext *avctx){ | static av_cold int tgq_decode_init(AVCodecContext *avctx){ | ||||
@@ -40,7 +40,7 @@ typedef struct TqiContext { | |||||
AVFrame frame; | AVFrame frame; | ||||
void *bitstream_buf; | void *bitstream_buf; | ||||
unsigned int bitstream_buf_size; | unsigned int bitstream_buf_size; | ||||
DECLARE_ALIGNED_16(DCTELEM, block[6][64]); | |||||
DECLARE_ALIGNED_16(DCTELEM, block)[6][64]; | |||||
} TqiContext; | } TqiContext; | ||||
static av_cold int tqi_decode_init(AVCodecContext *avctx) | static av_cold int tqi_decode_init(AVCodecContext *avctx) | ||||
@@ -299,7 +299,7 @@ typedef struct H264Context{ | |||||
* non zero coeff count cache. | * non zero coeff count cache. | ||||
* is 64 if not available. | * is 64 if not available. | ||||
*/ | */ | ||||
DECLARE_ALIGNED_8(uint8_t, non_zero_count_cache[6*8]); | |||||
DECLARE_ALIGNED_8(uint8_t, non_zero_count_cache)[6*8]; | |||||
/* | /* | ||||
.UU.YYYY | .UU.YYYY | ||||
@@ -312,8 +312,8 @@ typedef struct H264Context{ | |||||
/** | /** | ||||
* Motion vector cache. | * Motion vector cache. | ||||
*/ | */ | ||||
DECLARE_ALIGNED_8(int16_t, mv_cache[2][5*8][2]); | |||||
DECLARE_ALIGNED_8(int8_t, ref_cache[2][5*8]); | |||||
DECLARE_ALIGNED_8(int16_t, mv_cache)[2][5*8][2]; | |||||
DECLARE_ALIGNED_8(int8_t, ref_cache)[2][5*8]; | |||||
#define LIST_NOT_USED -1 //FIXME rename? | #define LIST_NOT_USED -1 //FIXME rename? | ||||
#define PART_NOT_AVAILABLE -2 | #define PART_NOT_AVAILABLE -2 | ||||
@@ -377,7 +377,7 @@ typedef struct H264Context{ | |||||
int mb_field_decoding_flag; | int mb_field_decoding_flag; | ||||
int mb_mbaff; ///< mb_aff_frame && mb_field_decoding_flag | int mb_mbaff; ///< mb_aff_frame && mb_field_decoding_flag | ||||
DECLARE_ALIGNED_8(uint16_t, sub_mb_type[4]); | |||||
DECLARE_ALIGNED_8(uint16_t, sub_mb_type)[4]; | |||||
//POC stuff | //POC stuff | ||||
int poc_lsb; | int poc_lsb; | ||||
@@ -456,7 +456,7 @@ typedef struct H264Context{ | |||||
GetBitContext *intra_gb_ptr; | GetBitContext *intra_gb_ptr; | ||||
GetBitContext *inter_gb_ptr; | GetBitContext *inter_gb_ptr; | ||||
DECLARE_ALIGNED_16(DCTELEM, mb[16*24]); | |||||
DECLARE_ALIGNED_16(DCTELEM, mb)[16*24]; | |||||
DCTELEM mb_padding[256]; ///< as mb is addressed by scantable[i] and scantable is uint8_t we can either check that i is not too large or ensure that there is some unused stuff after mb | DCTELEM mb_padding[256]; ///< as mb is addressed by scantable[i] and scantable is uint8_t we can either check that i is not too large or ensure that there is some unused stuff after mb | ||||
/** | /** | ||||
@@ -475,7 +475,7 @@ typedef struct H264Context{ | |||||
uint8_t *chroma_pred_mode_table; | uint8_t *chroma_pred_mode_table; | ||||
int last_qscale_diff; | int last_qscale_diff; | ||||
int16_t (*mvd_table[2])[2]; | int16_t (*mvd_table[2])[2]; | ||||
DECLARE_ALIGNED_8(int16_t, mvd_cache[2][5*8][2]); | |||||
DECLARE_ALIGNED_8(int16_t, mvd_cache)[2][5*8][2]; | |||||
uint8_t *direct_table; | uint8_t *direct_table; | ||||
uint8_t direct_cache[5*8]; | uint8_t direct_cache[5*8]; | ||||
@@ -1041,7 +1041,7 @@ static av_always_inline int get_cabac_cbf_ctx( H264Context *h, int cat, int idx, | |||||
return ctx + 4 * cat; | return ctx + 4 * cat; | ||||
} | } | ||||
DECLARE_ASM_CONST(1, uint8_t, last_coeff_flag_offset_8x8[63]) = { | |||||
DECLARE_ASM_CONST(1, uint8_t, last_coeff_flag_offset_8x8)[63] = { | |||||
0, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, | 0, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, | ||||
2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, | 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, | ||||
3, 3, 3, 3, 3, 3, 3, 3, 4, 4, 4, 4, 4, 4, 4, 4, | 3, 3, 3, 3, 3, 3, 3, 3, 4, 4, 4, 4, 4, 4, 4, 4, | ||||
@@ -372,7 +372,7 @@ void ff_h264_filter_mb_fast( H264Context *h, int mb_x, int mb_y, uint8_t *img_y, | |||||
filter_mb_edgech( &img_cr[2*2*uvlinesize], uvlinesize, bS3, qpc, h); | filter_mb_edgech( &img_cr[2*2*uvlinesize], uvlinesize, bS3, qpc, h); | ||||
return; | return; | ||||
} else { | } else { | ||||
DECLARE_ALIGNED_8(int16_t, bS[2][4][4]); | |||||
DECLARE_ALIGNED_8(int16_t, bS)[2][4][4]; | |||||
uint64_t (*bSv)[4] = (uint64_t(*)[4])bS; | uint64_t (*bSv)[4] = (uint64_t(*)[4])bS; | ||||
int edges; | int edges; | ||||
if( IS_8x8DCT(mb_type) && (h->cbp&7) == 7 ) { | if( IS_8x8DCT(mb_type) && (h->cbp&7) == 7 ) { | ||||
@@ -457,7 +457,7 @@ static av_always_inline void filter_mb_dir(H264Context *h, int mb_x, int mb_y, u | |||||
int j; | int j; | ||||
for(j=0; j<2; j++, mbn_xy += s->mb_stride){ | for(j=0; j<2; j++, mbn_xy += s->mb_stride){ | ||||
DECLARE_ALIGNED_8(int16_t, bS[4]); | |||||
DECLARE_ALIGNED_8(int16_t, bS)[4]; | |||||
int qp; | int qp; | ||||
if( IS_INTRA(mb_type|s->current_picture.mb_type[mbn_xy]) ) { | if( IS_INTRA(mb_type|s->current_picture.mb_type[mbn_xy]) ) { | ||||
*(uint64_t*)bS= 0x0003000300030003ULL; | *(uint64_t*)bS= 0x0003000300030003ULL; | ||||
@@ -488,7 +488,7 @@ static av_always_inline void filter_mb_dir(H264Context *h, int mb_x, int mb_y, u | |||||
/* mbn_xy: neighbor macroblock */ | /* mbn_xy: neighbor macroblock */ | ||||
const int mbn_xy = edge > 0 ? mb_xy : mbm_xy; | const int mbn_xy = edge > 0 ? mb_xy : mbm_xy; | ||||
const int mbn_type = s->current_picture.mb_type[mbn_xy]; | const int mbn_type = s->current_picture.mb_type[mbn_xy]; | ||||
DECLARE_ALIGNED_8(int16_t, bS[4]); | |||||
DECLARE_ALIGNED_8(int16_t, bS)[4]; | |||||
int qp; | int qp; | ||||
if( (edge&1) && IS_8x8DCT(mb_type) ) | if( (edge&1) && IS_8x8DCT(mb_type) ) | ||||
@@ -632,7 +632,7 @@ void ff_h264_filter_mb( H264Context *h, int mb_x, int mb_y, uint8_t *img_y, uint | |||||
*/ | */ | ||||
const int pair_xy = mb_x + (mb_y&~1)*s->mb_stride; | const int pair_xy = mb_x + (mb_y&~1)*s->mb_stride; | ||||
const int left_mb_xy[2] = { pair_xy-1, pair_xy-1+s->mb_stride }; | const int left_mb_xy[2] = { pair_xy-1, pair_xy-1+s->mb_stride }; | ||||
DECLARE_ALIGNED_8(int16_t, bS[8]); | |||||
DECLARE_ALIGNED_8(int16_t, bS)[8]; | |||||
int qp[2]; | int qp[2]; | ||||
int bqp[2]; | int bqp[2]; | ||||
int rqp[2]; | int rqp[2]; | ||||
@@ -84,8 +84,8 @@ typedef struct { | |||||
DSPContext dsp; | DSPContext dsp; | ||||
FFTContext fft; | FFTContext fft; | ||||
DECLARE_ALIGNED_16(FFTComplex, samples[COEFFS/2]); | |||||
DECLARE_ALIGNED_16(float, out_samples[COEFFS]); | |||||
DECLARE_ALIGNED_16(FFTComplex, samples)[COEFFS/2]; | |||||
DECLARE_ALIGNED_16(float, out_samples)[COEFFS]; | |||||
} IMCContext; | } IMCContext; | ||||
static VLC huffman_vlc[4][4]; | static VLC huffman_vlc[4][4]; | ||||
@@ -563,7 +563,7 @@ not_coded: | |||||
static int h263_skip_b_part(MpegEncContext *s, int cbp) | static int h263_skip_b_part(MpegEncContext *s, int cbp) | ||||
{ | { | ||||
DECLARE_ALIGNED(16, DCTELEM, dblock[64]); | |||||
DECLARE_ALIGNED(16, DCTELEM, dblock)[64]; | |||||
int i, mbi; | int i, mbi; | ||||
/* we have to set s->mb_intra to zero to decode B-part of PB-frame correctly | /* we have to set s->mb_intra to zero to decode B-part of PB-frame correctly | ||||
@@ -44,7 +44,7 @@ typedef struct MDECContext{ | |||||
int mb_width; | int mb_width; | ||||
int mb_height; | int mb_height; | ||||
int mb_x, mb_y; | int mb_x, mb_y; | ||||
DECLARE_ALIGNED_16(DCTELEM, block[6][64]); | |||||
DECLARE_ALIGNED_16(DCTELEM, block)[6][64]; | |||||
uint8_t *bitstream_buffer; | uint8_t *bitstream_buffer; | ||||
unsigned int bitstream_buffer_size; | unsigned int bitstream_buffer_size; | ||||
int block_last_index[6]; | int block_last_index[6]; | ||||
@@ -45,7 +45,7 @@ typedef struct { | |||||
AVFrame buf_ptrs [16]; | AVFrame buf_ptrs [16]; | ||||
AVPicture flipped_ptrs[16]; | AVPicture flipped_ptrs[16]; | ||||
DECLARE_ALIGNED_16(DCTELEM, dct_block[64]); | |||||
DECLARE_ALIGNED_16(DCTELEM, dct_block)[64]; | |||||
GetBitContext gb; | GetBitContext gb; | ||||
ScanTable scantable; | ScanTable scantable; | ||||
@@ -84,7 +84,7 @@ typedef struct MJpegDecodeContext { | |||||
int got_picture; ///< we found a SOF and picture is valid, too. | int got_picture; ///< we found a SOF and picture is valid, too. | ||||
int linesize[MAX_COMPONENTS]; ///< linesize << interlaced | int linesize[MAX_COMPONENTS]; ///< linesize << interlaced | ||||
int8_t *qscale_table; | int8_t *qscale_table; | ||||
DECLARE_ALIGNED_16(DCTELEM, block[64]); | |||||
DECLARE_ALIGNED_16(DCTELEM, block)[64]; | |||||
DCTELEM (*blocks[MAX_COMPONENTS])[64]; ///< intermediate sums (progressive mode) | DCTELEM (*blocks[MAX_COMPONENTS])[64]; ///< intermediate sums (progressive mode) | ||||
uint8_t *last_nnz[MAX_COMPONENTS]; | uint8_t *last_nnz[MAX_COMPONENTS]; | ||||
uint64_t coefs_finished[MAX_COMPONENTS]; ///< bitmask of which coefs have been completely decoded (progressive mode) | uint64_t coefs_finished[MAX_COMPONENTS]; ///< bitmask of which coefs have been completely decoded (progressive mode) | ||||
@@ -65,9 +65,9 @@ typedef struct { | |||||
AVLFG rnd; | AVLFG rnd; | ||||
int frames_to_skip; | int frames_to_skip; | ||||
/* for synthesis */ | /* for synthesis */ | ||||
DECLARE_ALIGNED_16(MPA_INT, synth_buf[MPA_MAX_CHANNELS][512*2]); | |||||
DECLARE_ALIGNED_16(MPA_INT, synth_buf)[MPA_MAX_CHANNELS][512*2]; | |||||
int synth_buf_offset[MPA_MAX_CHANNELS]; | int synth_buf_offset[MPA_MAX_CHANNELS]; | ||||
DECLARE_ALIGNED_16(int32_t, sb_samples[MPA_MAX_CHANNELS][36][SBLIMIT]); | |||||
DECLARE_ALIGNED_16(int32_t, sb_samples)[MPA_MAX_CHANNELS][36][SBLIMIT]; | |||||
} MPCContext; | } MPCContext; | ||||
void ff_mpc_init(void); | void ff_mpc_init(void); | ||||
@@ -132,9 +132,9 @@ typedef struct MPADecodeContext { | |||||
uint32_t free_format_next_header; | uint32_t free_format_next_header; | ||||
GetBitContext gb; | GetBitContext gb; | ||||
GetBitContext in_gb; | GetBitContext in_gb; | ||||
DECLARE_ALIGNED_16(MPA_INT, synth_buf[MPA_MAX_CHANNELS][512 * 2]); | |||||
DECLARE_ALIGNED_16(MPA_INT, synth_buf)[MPA_MAX_CHANNELS][512 * 2]; | |||||
int synth_buf_offset[MPA_MAX_CHANNELS]; | int synth_buf_offset[MPA_MAX_CHANNELS]; | ||||
DECLARE_ALIGNED_16(int32_t, sb_samples[MPA_MAX_CHANNELS][36][SBLIMIT]); | |||||
DECLARE_ALIGNED_16(int32_t, sb_samples)[MPA_MAX_CHANNELS][36][SBLIMIT]; | |||||
int32_t mdct_buf[MPA_MAX_CHANNELS][SBLIMIT * 18]; /* previous samples, for layer 3 MDCT */ | int32_t mdct_buf[MPA_MAX_CHANNELS][SBLIMIT * 18]; /* previous samples, for layer 3 MDCT */ | ||||
GranuleDef granules[2][2]; /* Used in Layer 3 */ | GranuleDef granules[2][2]; /* Used in Layer 3 */ | ||||
#ifdef DEBUG | #ifdef DEBUG | ||||
@@ -95,7 +95,7 @@ static const int32_t scale_factor_mult2[3][3] = { | |||||
SCALE_GEN(4.0 / 9.0), /* 9 steps */ | SCALE_GEN(4.0 / 9.0), /* 9 steps */ | ||||
}; | }; | ||||
DECLARE_ALIGNED_16(MPA_INT, ff_mpa_synth_window[512]); | |||||
DECLARE_ALIGNED_16(MPA_INT, ff_mpa_synth_window)[512]; | |||||
/** | /** | ||||
* Convert region offsets to region sizes and truncate | * Convert region offsets to region sizes and truncate | ||||
@@ -3311,7 +3311,7 @@ static int dct_quantize_refine(MpegEncContext *s, //FIXME breaks denoise? | |||||
DCTELEM *block, int16_t *weight, DCTELEM *orig, | DCTELEM *block, int16_t *weight, DCTELEM *orig, | ||||
int n, int qscale){ | int n, int qscale){ | ||||
int16_t rem[64]; | int16_t rem[64]; | ||||
DECLARE_ALIGNED_16(DCTELEM, d1[64]); | |||||
DECLARE_ALIGNED_16(DCTELEM, d1)[64]; | |||||
const uint8_t *scantable= s->intra_scantable.scantable; | const uint8_t *scantable= s->intra_scantable.scantable; | ||||
const uint8_t *perm_scantable= s->intra_scantable.permutated; | const uint8_t *perm_scantable= s->intra_scantable.permutated; | ||||
// unsigned int threshold1, threshold2; | // unsigned int threshold1, threshold2; | ||||
@@ -43,7 +43,7 @@ | |||||
typedef struct NellyMoserDecodeContext { | typedef struct NellyMoserDecodeContext { | ||||
AVCodecContext* avctx; | AVCodecContext* avctx; | ||||
DECLARE_ALIGNED_16(float,float_buf[NELLY_SAMPLES]); | |||||
DECLARE_ALIGNED_16(float,float_buf)[NELLY_SAMPLES]; | |||||
float state[128]; | float state[128]; | ||||
AVLFG random_state; | AVLFG random_state; | ||||
GetBitContext gb; | GetBitContext gb; | ||||
@@ -51,7 +51,7 @@ typedef struct NellyMoserDecodeContext { | |||||
float scale_bias; | float scale_bias; | ||||
DSPContext dsp; | DSPContext dsp; | ||||
FFTContext imdct_ctx; | FFTContext imdct_ctx; | ||||
DECLARE_ALIGNED_16(float,imdct_out[NELLY_BUF_LEN * 2]); | |||||
DECLARE_ALIGNED_16(float,imdct_out)[NELLY_BUF_LEN * 2]; | |||||
} NellyMoserDecodeContext; | } NellyMoserDecodeContext; | ||||
static void overlap_and_window(NellyMoserDecodeContext *s, float *state, float *audio, float *a_in) | static void overlap_and_window(NellyMoserDecodeContext *s, float *state, float *audio, float *a_in) | ||||
@@ -53,9 +53,9 @@ typedef struct NellyMoserEncodeContext { | |||||
int have_saved; | int have_saved; | ||||
DSPContext dsp; | DSPContext dsp; | ||||
FFTContext mdct_ctx; | FFTContext mdct_ctx; | ||||
DECLARE_ALIGNED_16(float, mdct_out[NELLY_SAMPLES]); | |||||
DECLARE_ALIGNED_16(float, in_buff[NELLY_SAMPLES]); | |||||
DECLARE_ALIGNED_16(float, buf[2][3 * NELLY_BUF_LEN]); ///< sample buffer | |||||
DECLARE_ALIGNED_16(float, mdct_out)[NELLY_SAMPLES]; | |||||
DECLARE_ALIGNED_16(float, in_buff)[NELLY_SAMPLES]; | |||||
DECLARE_ALIGNED_16(float, buf)[2][3 * NELLY_BUF_LEN]; ///< sample buffer | |||||
float (*opt )[NELLY_BANDS]; | float (*opt )[NELLY_BANDS]; | ||||
uint8_t (*path)[NELLY_BANDS]; | uint8_t (*path)[NELLY_BANDS]; | ||||
} NellyMoserEncodeContext; | } NellyMoserEncodeContext; | ||||
@@ -226,7 +226,7 @@ float_to_int16_interleave_altivec(int16_t *dst, const float **src, | |||||
dst+=8; | dst+=8; | ||||
} | } | ||||
} else { | } else { | ||||
DECLARE_ALIGNED(16, int16_t, tmp[len]); | |||||
DECLARE_ALIGNED(16, int16_t, tmp)[len]; | |||||
int c, j; | int c, j; | ||||
for (c = 0; c < channels; c++) { | for (c = 0; c < channels; c++) { | ||||
float_to_int16_altivec(tmp, src[c], len); | float_to_int16_altivec(tmp, src[c], len); | ||||
@@ -34,7 +34,7 @@ void gmc1_altivec(uint8_t *dst /* align 8 */, uint8_t *src /* align1 */, int str | |||||
{ | { | ||||
POWERPC_PERF_DECLARE(altivec_gmc1_num, GMC1_PERF_COND); | POWERPC_PERF_DECLARE(altivec_gmc1_num, GMC1_PERF_COND); | ||||
const DECLARE_ALIGNED_16(unsigned short, rounder_a) = rounder; | const DECLARE_ALIGNED_16(unsigned short, rounder_a) = rounder; | ||||
const DECLARE_ALIGNED_16(unsigned short, ABCD[8]) = | |||||
const DECLARE_ALIGNED_16(unsigned short, ABCD)[8] = | |||||
{ | { | ||||
(16-x16)*(16-y16), /* A */ | (16-x16)*(16-y16), /* A */ | ||||
( x16)*(16-y16), /* B */ | ( x16)*(16-y16), /* B */ | ||||
@@ -79,7 +79,7 @@ static void OPNAME ## h264_qpel ## SIZE ## _mc00_ ## CODETYPE (uint8_t *dst, uin | |||||
}\ | }\ | ||||
\ | \ | ||||
static void OPNAME ## h264_qpel ## SIZE ## _mc10_ ## CODETYPE(uint8_t *dst, uint8_t *src, int stride){ \ | static void OPNAME ## h264_qpel ## SIZE ## _mc10_ ## CODETYPE(uint8_t *dst, uint8_t *src, int stride){ \ | ||||
DECLARE_ALIGNED_16(uint8_t, half[SIZE*SIZE]);\ | |||||
DECLARE_ALIGNED_16(uint8_t, half)[SIZE*SIZE];\ | |||||
put_h264_qpel ## SIZE ## _h_lowpass_ ## CODETYPE(half, src, SIZE, stride);\ | put_h264_qpel ## SIZE ## _h_lowpass_ ## CODETYPE(half, src, SIZE, stride);\ | ||||
OPNAME ## pixels ## SIZE ## _l2_ ## CODETYPE(dst, src, half, stride, stride, SIZE);\ | OPNAME ## pixels ## SIZE ## _l2_ ## CODETYPE(dst, src, half, stride, stride, SIZE);\ | ||||
}\ | }\ | ||||
@@ -89,13 +89,13 @@ static void OPNAME ## h264_qpel ## SIZE ## _mc20_ ## CODETYPE(uint8_t *dst, uint | |||||
}\ | }\ | ||||
\ | \ | ||||
static void OPNAME ## h264_qpel ## SIZE ## _mc30_ ## CODETYPE(uint8_t *dst, uint8_t *src, int stride){\ | static void OPNAME ## h264_qpel ## SIZE ## _mc30_ ## CODETYPE(uint8_t *dst, uint8_t *src, int stride){\ | ||||
DECLARE_ALIGNED_16(uint8_t, half[SIZE*SIZE]);\ | |||||
DECLARE_ALIGNED_16(uint8_t, half)[SIZE*SIZE];\ | |||||
put_h264_qpel ## SIZE ## _h_lowpass_ ## CODETYPE(half, src, SIZE, stride);\ | put_h264_qpel ## SIZE ## _h_lowpass_ ## CODETYPE(half, src, SIZE, stride);\ | ||||
OPNAME ## pixels ## SIZE ## _l2_ ## CODETYPE(dst, src+1, half, stride, stride, SIZE);\ | OPNAME ## pixels ## SIZE ## _l2_ ## CODETYPE(dst, src+1, half, stride, stride, SIZE);\ | ||||
}\ | }\ | ||||
\ | \ | ||||
static void OPNAME ## h264_qpel ## SIZE ## _mc01_ ## CODETYPE(uint8_t *dst, uint8_t *src, int stride){\ | static void OPNAME ## h264_qpel ## SIZE ## _mc01_ ## CODETYPE(uint8_t *dst, uint8_t *src, int stride){\ | ||||
DECLARE_ALIGNED_16(uint8_t, half[SIZE*SIZE]);\ | |||||
DECLARE_ALIGNED_16(uint8_t, half)[SIZE*SIZE];\ | |||||
put_h264_qpel ## SIZE ## _v_lowpass_ ## CODETYPE(half, src, SIZE, stride);\ | put_h264_qpel ## SIZE ## _v_lowpass_ ## CODETYPE(half, src, SIZE, stride);\ | ||||
OPNAME ## pixels ## SIZE ## _l2_ ## CODETYPE(dst, src, half, stride, stride, SIZE);\ | OPNAME ## pixels ## SIZE ## _l2_ ## CODETYPE(dst, src, half, stride, stride, SIZE);\ | ||||
}\ | }\ | ||||
@@ -105,79 +105,79 @@ static void OPNAME ## h264_qpel ## SIZE ## _mc02_ ## CODETYPE(uint8_t *dst, uint | |||||
}\ | }\ | ||||
\ | \ | ||||
static void OPNAME ## h264_qpel ## SIZE ## _mc03_ ## CODETYPE(uint8_t *dst, uint8_t *src, int stride){\ | static void OPNAME ## h264_qpel ## SIZE ## _mc03_ ## CODETYPE(uint8_t *dst, uint8_t *src, int stride){\ | ||||
DECLARE_ALIGNED_16(uint8_t, half[SIZE*SIZE]);\ | |||||
DECLARE_ALIGNED_16(uint8_t, half)[SIZE*SIZE];\ | |||||
put_h264_qpel ## SIZE ## _v_lowpass_ ## CODETYPE(half, src, SIZE, stride);\ | put_h264_qpel ## SIZE ## _v_lowpass_ ## CODETYPE(half, src, SIZE, stride);\ | ||||
OPNAME ## pixels ## SIZE ## _l2_ ## CODETYPE(dst, src+stride, half, stride, stride, SIZE);\ | OPNAME ## pixels ## SIZE ## _l2_ ## CODETYPE(dst, src+stride, half, stride, stride, SIZE);\ | ||||
}\ | }\ | ||||
\ | \ | ||||
static void OPNAME ## h264_qpel ## SIZE ## _mc11_ ## CODETYPE(uint8_t *dst, uint8_t *src, int stride){\ | static void OPNAME ## h264_qpel ## SIZE ## _mc11_ ## CODETYPE(uint8_t *dst, uint8_t *src, int stride){\ | ||||
DECLARE_ALIGNED_16(uint8_t, halfH[SIZE*SIZE]);\ | |||||
DECLARE_ALIGNED_16(uint8_t, halfV[SIZE*SIZE]);\ | |||||
DECLARE_ALIGNED_16(uint8_t, halfH)[SIZE*SIZE];\ | |||||
DECLARE_ALIGNED_16(uint8_t, halfV)[SIZE*SIZE];\ | |||||
put_h264_qpel ## SIZE ## _h_lowpass_ ## CODETYPE(halfH, src, SIZE, stride);\ | put_h264_qpel ## SIZE ## _h_lowpass_ ## CODETYPE(halfH, src, SIZE, stride);\ | ||||
put_h264_qpel ## SIZE ## _v_lowpass_ ## CODETYPE(halfV, src, SIZE, stride);\ | put_h264_qpel ## SIZE ## _v_lowpass_ ## CODETYPE(halfV, src, SIZE, stride);\ | ||||
OPNAME ## pixels ## SIZE ## _l2_ ## CODETYPE(dst, halfH, halfV, stride, SIZE, SIZE);\ | OPNAME ## pixels ## SIZE ## _l2_ ## CODETYPE(dst, halfH, halfV, stride, SIZE, SIZE);\ | ||||
}\ | }\ | ||||
\ | \ | ||||
static void OPNAME ## h264_qpel ## SIZE ## _mc31_ ## CODETYPE(uint8_t *dst, uint8_t *src, int stride){\ | static void OPNAME ## h264_qpel ## SIZE ## _mc31_ ## CODETYPE(uint8_t *dst, uint8_t *src, int stride){\ | ||||
DECLARE_ALIGNED_16(uint8_t, halfH[SIZE*SIZE]);\ | |||||
DECLARE_ALIGNED_16(uint8_t, halfV[SIZE*SIZE]);\ | |||||
DECLARE_ALIGNED_16(uint8_t, halfH)[SIZE*SIZE];\ | |||||
DECLARE_ALIGNED_16(uint8_t, halfV)[SIZE*SIZE];\ | |||||
put_h264_qpel ## SIZE ## _h_lowpass_ ## CODETYPE(halfH, src, SIZE, stride);\ | put_h264_qpel ## SIZE ## _h_lowpass_ ## CODETYPE(halfH, src, SIZE, stride);\ | ||||
put_h264_qpel ## SIZE ## _v_lowpass_ ## CODETYPE(halfV, src+1, SIZE, stride);\ | put_h264_qpel ## SIZE ## _v_lowpass_ ## CODETYPE(halfV, src+1, SIZE, stride);\ | ||||
OPNAME ## pixels ## SIZE ## _l2_ ## CODETYPE(dst, halfH, halfV, stride, SIZE, SIZE);\ | OPNAME ## pixels ## SIZE ## _l2_ ## CODETYPE(dst, halfH, halfV, stride, SIZE, SIZE);\ | ||||
}\ | }\ | ||||
\ | \ | ||||
static void OPNAME ## h264_qpel ## SIZE ## _mc13_ ## CODETYPE(uint8_t *dst, uint8_t *src, int stride){\ | static void OPNAME ## h264_qpel ## SIZE ## _mc13_ ## CODETYPE(uint8_t *dst, uint8_t *src, int stride){\ | ||||
DECLARE_ALIGNED_16(uint8_t, halfH[SIZE*SIZE]);\ | |||||
DECLARE_ALIGNED_16(uint8_t, halfV[SIZE*SIZE]);\ | |||||
DECLARE_ALIGNED_16(uint8_t, halfH)[SIZE*SIZE];\ | |||||
DECLARE_ALIGNED_16(uint8_t, halfV)[SIZE*SIZE];\ | |||||
put_h264_qpel ## SIZE ## _h_lowpass_ ## CODETYPE(halfH, src + stride, SIZE, stride);\ | put_h264_qpel ## SIZE ## _h_lowpass_ ## CODETYPE(halfH, src + stride, SIZE, stride);\ | ||||
put_h264_qpel ## SIZE ## _v_lowpass_ ## CODETYPE(halfV, src, SIZE, stride);\ | put_h264_qpel ## SIZE ## _v_lowpass_ ## CODETYPE(halfV, src, SIZE, stride);\ | ||||
OPNAME ## pixels ## SIZE ## _l2_ ## CODETYPE(dst, halfH, halfV, stride, SIZE, SIZE);\ | OPNAME ## pixels ## SIZE ## _l2_ ## CODETYPE(dst, halfH, halfV, stride, SIZE, SIZE);\ | ||||
}\ | }\ | ||||
\ | \ | ||||
static void OPNAME ## h264_qpel ## SIZE ## _mc33_ ## CODETYPE(uint8_t *dst, uint8_t *src, int stride){\ | static void OPNAME ## h264_qpel ## SIZE ## _mc33_ ## CODETYPE(uint8_t *dst, uint8_t *src, int stride){\ | ||||
DECLARE_ALIGNED_16(uint8_t, halfH[SIZE*SIZE]);\ | |||||
DECLARE_ALIGNED_16(uint8_t, halfV[SIZE*SIZE]);\ | |||||
DECLARE_ALIGNED_16(uint8_t, halfH)[SIZE*SIZE];\ | |||||
DECLARE_ALIGNED_16(uint8_t, halfV)[SIZE*SIZE];\ | |||||
put_h264_qpel ## SIZE ## _h_lowpass_ ## CODETYPE(halfH, src + stride, SIZE, stride);\ | put_h264_qpel ## SIZE ## _h_lowpass_ ## CODETYPE(halfH, src + stride, SIZE, stride);\ | ||||
put_h264_qpel ## SIZE ## _v_lowpass_ ## CODETYPE(halfV, src+1, SIZE, stride);\ | put_h264_qpel ## SIZE ## _v_lowpass_ ## CODETYPE(halfV, src+1, SIZE, stride);\ | ||||
OPNAME ## pixels ## SIZE ## _l2_ ## CODETYPE(dst, halfH, halfV, stride, SIZE, SIZE);\ | OPNAME ## pixels ## SIZE ## _l2_ ## CODETYPE(dst, halfH, halfV, stride, SIZE, SIZE);\ | ||||
}\ | }\ | ||||
\ | \ | ||||
static void OPNAME ## h264_qpel ## SIZE ## _mc22_ ## CODETYPE(uint8_t *dst, uint8_t *src, int stride){\ | static void OPNAME ## h264_qpel ## SIZE ## _mc22_ ## CODETYPE(uint8_t *dst, uint8_t *src, int stride){\ | ||||
DECLARE_ALIGNED_16(int16_t, tmp[SIZE*(SIZE+8)]);\ | |||||
DECLARE_ALIGNED_16(int16_t, tmp)[SIZE*(SIZE+8)];\ | |||||
OPNAME ## h264_qpel ## SIZE ## _hv_lowpass_ ## CODETYPE(dst, tmp, src, stride, SIZE, stride);\ | OPNAME ## h264_qpel ## SIZE ## _hv_lowpass_ ## CODETYPE(dst, tmp, src, stride, SIZE, stride);\ | ||||
}\ | }\ | ||||
\ | \ | ||||
static void OPNAME ## h264_qpel ## SIZE ## _mc21_ ## CODETYPE(uint8_t *dst, uint8_t *src, int stride){\ | static void OPNAME ## h264_qpel ## SIZE ## _mc21_ ## CODETYPE(uint8_t *dst, uint8_t *src, int stride){\ | ||||
DECLARE_ALIGNED_16(uint8_t, halfH[SIZE*SIZE]);\ | |||||
DECLARE_ALIGNED_16(uint8_t, halfHV[SIZE*SIZE]);\ | |||||
DECLARE_ALIGNED_16(int16_t, tmp[SIZE*(SIZE+8)]);\ | |||||
DECLARE_ALIGNED_16(uint8_t, halfH)[SIZE*SIZE];\ | |||||
DECLARE_ALIGNED_16(uint8_t, halfHV)[SIZE*SIZE];\ | |||||
DECLARE_ALIGNED_16(int16_t, tmp)[SIZE*(SIZE+8)];\ | |||||
put_h264_qpel ## SIZE ## _h_lowpass_ ## CODETYPE(halfH, src, SIZE, stride);\ | put_h264_qpel ## SIZE ## _h_lowpass_ ## CODETYPE(halfH, src, SIZE, stride);\ | ||||
put_h264_qpel ## SIZE ## _hv_lowpass_ ## CODETYPE(halfHV, tmp, src, SIZE, SIZE, stride);\ | put_h264_qpel ## SIZE ## _hv_lowpass_ ## CODETYPE(halfHV, tmp, src, SIZE, SIZE, stride);\ | ||||
OPNAME ## pixels ## SIZE ## _l2_ ## CODETYPE(dst, halfH, halfHV, stride, SIZE, SIZE);\ | OPNAME ## pixels ## SIZE ## _l2_ ## CODETYPE(dst, halfH, halfHV, stride, SIZE, SIZE);\ | ||||
}\ | }\ | ||||
\ | \ | ||||
static void OPNAME ## h264_qpel ## SIZE ## _mc23_ ## CODETYPE(uint8_t *dst, uint8_t *src, int stride){\ | static void OPNAME ## h264_qpel ## SIZE ## _mc23_ ## CODETYPE(uint8_t *dst, uint8_t *src, int stride){\ | ||||
DECLARE_ALIGNED_16(uint8_t, halfH[SIZE*SIZE]);\ | |||||
DECLARE_ALIGNED_16(uint8_t, halfHV[SIZE*SIZE]);\ | |||||
DECLARE_ALIGNED_16(int16_t, tmp[SIZE*(SIZE+8)]);\ | |||||
DECLARE_ALIGNED_16(uint8_t, halfH)[SIZE*SIZE];\ | |||||
DECLARE_ALIGNED_16(uint8_t, halfHV)[SIZE*SIZE];\ | |||||
DECLARE_ALIGNED_16(int16_t, tmp)[SIZE*(SIZE+8)];\ | |||||
put_h264_qpel ## SIZE ## _h_lowpass_ ## CODETYPE(halfH, src + stride, SIZE, stride);\ | put_h264_qpel ## SIZE ## _h_lowpass_ ## CODETYPE(halfH, src + stride, SIZE, stride);\ | ||||
put_h264_qpel ## SIZE ## _hv_lowpass_ ## CODETYPE(halfHV, tmp, src, SIZE, SIZE, stride);\ | put_h264_qpel ## SIZE ## _hv_lowpass_ ## CODETYPE(halfHV, tmp, src, SIZE, SIZE, stride);\ | ||||
OPNAME ## pixels ## SIZE ## _l2_ ## CODETYPE(dst, halfH, halfHV, stride, SIZE, SIZE);\ | OPNAME ## pixels ## SIZE ## _l2_ ## CODETYPE(dst, halfH, halfHV, stride, SIZE, SIZE);\ | ||||
}\ | }\ | ||||
\ | \ | ||||
static void OPNAME ## h264_qpel ## SIZE ## _mc12_ ## CODETYPE(uint8_t *dst, uint8_t *src, int stride){\ | static void OPNAME ## h264_qpel ## SIZE ## _mc12_ ## CODETYPE(uint8_t *dst, uint8_t *src, int stride){\ | ||||
DECLARE_ALIGNED_16(uint8_t, halfV[SIZE*SIZE]);\ | |||||
DECLARE_ALIGNED_16(uint8_t, halfHV[SIZE*SIZE]);\ | |||||
DECLARE_ALIGNED_16(int16_t, tmp[SIZE*(SIZE+8)]);\ | |||||
DECLARE_ALIGNED_16(uint8_t, halfV)[SIZE*SIZE];\ | |||||
DECLARE_ALIGNED_16(uint8_t, halfHV)[SIZE*SIZE];\ | |||||
DECLARE_ALIGNED_16(int16_t, tmp)[SIZE*(SIZE+8)];\ | |||||
put_h264_qpel ## SIZE ## _v_lowpass_ ## CODETYPE(halfV, src, SIZE, stride);\ | put_h264_qpel ## SIZE ## _v_lowpass_ ## CODETYPE(halfV, src, SIZE, stride);\ | ||||
put_h264_qpel ## SIZE ## _hv_lowpass_ ## CODETYPE(halfHV, tmp, src, SIZE, SIZE, stride);\ | put_h264_qpel ## SIZE ## _hv_lowpass_ ## CODETYPE(halfHV, tmp, src, SIZE, SIZE, stride);\ | ||||
OPNAME ## pixels ## SIZE ## _l2_ ## CODETYPE(dst, halfV, halfHV, stride, SIZE, SIZE);\ | OPNAME ## pixels ## SIZE ## _l2_ ## CODETYPE(dst, halfV, halfHV, stride, SIZE, SIZE);\ | ||||
}\ | }\ | ||||
\ | \ | ||||
static void OPNAME ## h264_qpel ## SIZE ## _mc32_ ## CODETYPE(uint8_t *dst, uint8_t *src, int stride){\ | static void OPNAME ## h264_qpel ## SIZE ## _mc32_ ## CODETYPE(uint8_t *dst, uint8_t *src, int stride){\ | ||||
DECLARE_ALIGNED_16(uint8_t, halfV[SIZE*SIZE]);\ | |||||
DECLARE_ALIGNED_16(uint8_t, halfHV[SIZE*SIZE]);\ | |||||
DECLARE_ALIGNED_16(int16_t, tmp[SIZE*(SIZE+8)]);\ | |||||
DECLARE_ALIGNED_16(uint8_t, halfV)[SIZE*SIZE];\ | |||||
DECLARE_ALIGNED_16(uint8_t, halfHV)[SIZE*SIZE];\ | |||||
DECLARE_ALIGNED_16(int16_t, tmp)[SIZE*(SIZE+8)];\ | |||||
put_h264_qpel ## SIZE ## _v_lowpass_ ## CODETYPE(halfV, src+1, SIZE, stride);\ | put_h264_qpel ## SIZE ## _v_lowpass_ ## CODETYPE(halfV, src+1, SIZE, stride);\ | ||||
put_h264_qpel ## SIZE ## _hv_lowpass_ ## CODETYPE(halfHV, tmp, src, SIZE, SIZE, stride);\ | put_h264_qpel ## SIZE ## _hv_lowpass_ ## CODETYPE(halfHV, tmp, src, SIZE, SIZE, stride);\ | ||||
OPNAME ## pixels ## SIZE ## _l2_ ## CODETYPE(dst, halfV, halfHV, stride, SIZE, SIZE);\ | OPNAME ## pixels ## SIZE ## _l2_ ## CODETYPE(dst, halfV, halfHV, stride, SIZE, SIZE);\ | ||||
@@ -590,7 +590,7 @@ static void ff_h264_idct_add8_altivec(uint8_t **dest, const int *block_offset, D | |||||
static inline void write16x4(uint8_t *dst, int dst_stride, | static inline void write16x4(uint8_t *dst, int dst_stride, | ||||
register vec_u8 r0, register vec_u8 r1, | register vec_u8 r0, register vec_u8 r1, | ||||
register vec_u8 r2, register vec_u8 r3) { | register vec_u8 r2, register vec_u8 r3) { | ||||
DECLARE_ALIGNED_16(unsigned char, result[64]); | |||||
DECLARE_ALIGNED_16(unsigned char, result)[64]; | |||||
uint32_t *src_int = (uint32_t *)result, *dst_int = (uint32_t *)dst; | uint32_t *src_int = (uint32_t *)result, *dst_int = (uint32_t *)dst; | ||||
int int_dst_stride = dst_stride/4; | int int_dst_stride = dst_stride/4; | ||||
@@ -770,7 +770,7 @@ static inline vec_u8 h264_deblock_q1(register vec_u8 p0, | |||||
} | } | ||||
#define h264_loop_filter_luma_altivec(p2, p1, p0, q0, q1, q2, alpha, beta, tc0) { \ | #define h264_loop_filter_luma_altivec(p2, p1, p0, q0, q1, q2, alpha, beta, tc0) { \ | ||||
DECLARE_ALIGNED_16(unsigned char, temp[16]); \ | |||||
DECLARE_ALIGNED_16(unsigned char, temp)[16]; \ | |||||
register vec_u8 alphavec; \ | register vec_u8 alphavec; \ | ||||
register vec_u8 betavec; \ | register vec_u8 betavec; \ | ||||
register vec_u8 mask; \ | register vec_u8 mask; \ | ||||
@@ -850,7 +850,7 @@ void weight_h264_WxH_altivec(uint8_t *block, int stride, int log2_denom, int wei | |||||
vec_u8 vblock; | vec_u8 vblock; | ||||
vec_s16 vtemp, vweight, voffset, v0, v1; | vec_s16 vtemp, vweight, voffset, v0, v1; | ||||
vec_u16 vlog2_denom; | vec_u16 vlog2_denom; | ||||
DECLARE_ALIGNED_16(int32_t, temp[4]); | |||||
DECLARE_ALIGNED_16(int32_t, temp)[4]; | |||||
LOAD_ZERO; | LOAD_ZERO; | ||||
offset <<= log2_denom; | offset <<= log2_denom; | ||||
@@ -896,7 +896,7 @@ void biweight_h264_WxH_altivec(uint8_t *dst, uint8_t *src, int stride, int log2_ | |||||
vec_u8 vsrc, vdst; | vec_u8 vsrc, vdst; | ||||
vec_s16 vtemp, vweights, vweightd, voffset, v0, v1, v2, v3; | vec_s16 vtemp, vweights, vweightd, voffset, v0, v1, v2, v3; | ||||
vec_u16 vlog2_denom; | vec_u16 vlog2_denom; | ||||
DECLARE_ALIGNED_16(int32_t, temp[4]); | |||||
DECLARE_ALIGNED_16(int32_t, temp)[4]; | |||||
LOAD_ZERO; | LOAD_ZERO; | ||||
offset = ((offset + 1) | 1) << log2_denom; | offset = ((offset + 1) | 1) << log2_denom; | ||||
@@ -78,7 +78,7 @@ | |||||
void PREFIX_h264_chroma_mc8_altivec(uint8_t * dst, uint8_t * src, | void PREFIX_h264_chroma_mc8_altivec(uint8_t * dst, uint8_t * src, | ||||
int stride, int h, int x, int y) { | int stride, int h, int x, int y) { | ||||
POWERPC_PERF_DECLARE(PREFIX_h264_chroma_mc8_num, 1); | POWERPC_PERF_DECLARE(PREFIX_h264_chroma_mc8_num, 1); | ||||
DECLARE_ALIGNED_16(signed int, ABCD[4]) = | |||||
DECLARE_ALIGNED_16(signed int, ABCD)[4] = | |||||
{((8 - x) * (8 - y)), | {((8 - x) * (8 - y)), | ||||
(( x) * (8 - y)), | (( x) * (8 - y)), | ||||
((8 - x) * ( y)), | ((8 - x) * ( y)), | ||||
@@ -208,7 +208,7 @@ void PREFIX_h264_chroma_mc8_altivec(uint8_t * dst, uint8_t * src, | |||||
/* this code assume that stride % 16 == 0 */ | /* this code assume that stride % 16 == 0 */ | ||||
void PREFIX_no_rnd_vc1_chroma_mc8_altivec(uint8_t * dst, uint8_t * src, int stride, int h, int x, int y) { | void PREFIX_no_rnd_vc1_chroma_mc8_altivec(uint8_t * dst, uint8_t * src, int stride, int h, int x, int y) { | ||||
DECLARE_ALIGNED_16(signed int, ABCD[4]) = | |||||
DECLARE_ALIGNED_16(signed int, ABCD)[4] = | |||||
{((8 - x) * (8 - y)), | {((8 - x) * (8 - y)), | ||||
(( x) * (8 - y)), | (( x) * (8 - y)), | ||||
((8 - x) * ( y)), | ((8 - x) * ( y)), | ||||
@@ -122,7 +122,7 @@ typedef struct { | |||||
} FFTCoefficient; | } FFTCoefficient; | ||||
typedef struct { | typedef struct { | ||||
DECLARE_ALIGNED_16(QDM2Complex, complex[MPA_MAX_CHANNELS][256]); | |||||
DECLARE_ALIGNED_16(QDM2Complex, complex)[MPA_MAX_CHANNELS][256]; | |||||
} QDM2FFT; | } QDM2FFT; | ||||
/** | /** | ||||
@@ -172,9 +172,9 @@ typedef struct { | |||||
float output_buffer[1024]; | float output_buffer[1024]; | ||||
/// Synthesis filter | /// Synthesis filter | ||||
DECLARE_ALIGNED_16(MPA_INT, synth_buf[MPA_MAX_CHANNELS][512*2]); | |||||
DECLARE_ALIGNED_16(MPA_INT, synth_buf)[MPA_MAX_CHANNELS][512*2]; | |||||
int synth_buf_offset[MPA_MAX_CHANNELS]; | int synth_buf_offset[MPA_MAX_CHANNELS]; | ||||
DECLARE_ALIGNED_16(int32_t, sb_samples[MPA_MAX_CHANNELS][128][SBLIMIT]); | |||||
DECLARE_ALIGNED_16(int32_t, sb_samples)[MPA_MAX_CHANNELS][128][SBLIMIT]; | |||||
/// Mixed temporary data used in decoding | /// Mixed temporary data used in decoding | ||||
float tone_level[MPA_MAX_CHANNELS][30][64]; | float tone_level[MPA_MAX_CHANNELS][30][64]; | ||||
@@ -31,7 +31,7 @@ typedef struct { | |||||
uint8_t scan[64]; | uint8_t scan[64]; | ||||
uint32_t lquant[64]; | uint32_t lquant[64]; | ||||
uint32_t cquant[64]; | uint32_t cquant[64]; | ||||
DECLARE_ALIGNED_16(DCTELEM, block[64]); | |||||
DECLARE_ALIGNED_16(DCTELEM, block)[64]; | |||||
} RTJpegContext; | } RTJpegContext; | ||||
void rtjpeg_decode_init(RTJpegContext *c, DSPContext *dsp, | void rtjpeg_decode_init(RTJpegContext *c, DSPContext *dsp, | ||||
@@ -111,7 +111,7 @@ typedef struct RV34DecContext{ | |||||
int *deblock_coefs; ///< deblock coefficients for each macroblock | int *deblock_coefs; ///< deblock coefficients for each macroblock | ||||
/** 8x8 block available flags (for MV prediction) */ | /** 8x8 block available flags (for MV prediction) */ | ||||
DECLARE_ALIGNED_8(uint32_t, avail_cache[3*4]); | |||||
DECLARE_ALIGNED_8(uint32_t, avail_cache)[3*4]; | |||||
int (*parse_slice_header)(struct RV34DecContext *r, GetBitContext *gb, SliceInfo *si); | int (*parse_slice_header)(struct RV34DecContext *r, GetBitContext *gb, SliceInfo *si); | ||||
int (*decode_mb_info)(struct RV34DecContext *r); | int (*decode_mb_info)(struct RV34DecContext *r); | ||||
@@ -64,7 +64,7 @@ typedef struct { | |||||
float excitation[L_INTERPOL + PITCH_MAX + 2 * L_SUBFR_16k]; | float excitation[L_INTERPOL + PITCH_MAX + 2 * L_SUBFR_16k]; | ||||
DECLARE_ALIGNED_16(float, synth_buf[LP_FILTER_ORDER + 5*SUBFR_SIZE + 6]); | |||||
DECLARE_ALIGNED_16(float, synth_buf)[LP_FILTER_ORDER + 5*SUBFR_SIZE + 6]; | |||||
float lsp_history[LP_FILTER_ORDER]; | float lsp_history[LP_FILTER_ORDER]; | ||||
float gain_mem; | float gain_mem; | ||||
@@ -24,7 +24,7 @@ | |||||
#include "libavcodec/dsputil.h" | #include "libavcodec/dsputil.h" | ||||
static const DECLARE_ALIGNED_8(int16_t, coeffs[28]) = { | |||||
static const DECLARE_ALIGNED_8(int16_t, coeffs)[28] = { | |||||
- 1259,- 1259,- 1259,- 1259, | - 1259,- 1259,- 1259,- 1259, | ||||
- 4989,- 4989,- 4989,- 4989, | - 4989,- 4989,- 4989,- 4989, | ||||
-11045,-11045,-11045,-11045, | -11045,-11045,-11045,-11045, | ||||
@@ -33,13 +33,13 @@ static const DECLARE_ALIGNED_8(int16_t, coeffs[28]) = { | |||||
25080, 25080, 25080, 25080, | 25080, 25080, 25080, 25080, | ||||
12785, 12785, 12785, 12785 | 12785, 12785, 12785, 12785 | ||||
}; | }; | ||||
static const DECLARE_ALIGNED_8(uint16_t, scale[4]) = { | |||||
static const DECLARE_ALIGNED_8(uint16_t, scale)[4] = { | |||||
65536>>6, 65536>>6, 65536>>6, 65536>>6 | 65536>>6, 65536>>6, 65536>>6, 65536>>6 | ||||
}; | }; | ||||
static const DECLARE_ALIGNED_8(uint16_t, rounder[4]) = { | |||||
static const DECLARE_ALIGNED_8(uint16_t, rounder)[4] = { | |||||
1<<5, 1<<5, 1<<5, 1<<5 | 1<<5, 1<<5, 1<<5, 1<<5 | ||||
}; | }; | ||||
static const DECLARE_ALIGNED_8(uint16_t, expand[4]) = { | |||||
static const DECLARE_ALIGNED_8(uint16_t, expand)[4] = { | |||||
1<<14, 1<<14, 1<<14, 1<<14 | 1<<14, 1<<14, 1<<14, 1<<14 | ||||
}; | }; | ||||
@@ -386,7 +386,7 @@ static const DECLARE_ALIGNED_8(uint16_t, expand[4]) = { | |||||
void ff_simple_idct_vis(DCTELEM *data) { | void ff_simple_idct_vis(DCTELEM *data) { | ||||
int out1, out2, out3, out4; | int out1, out2, out3, out4; | ||||
DECLARE_ALIGNED_8(int16_t, temp[8*8]); | |||||
DECLARE_ALIGNED_8(int16_t, temp)[8*8]; | |||||
__asm__ volatile( | __asm__ volatile( | ||||
INIT_IDCT | INIT_IDCT | ||||
@@ -40,7 +40,7 @@ const int64_t ff_vorbis_channel_layouts[7] = { | |||||
0 | 0 | ||||
}; | }; | ||||
DECLARE_ALIGNED_16(static const float, vwin64[32]) = { | |||||
DECLARE_ALIGNED_16(static const float, vwin64)[32] = { | |||||
0.0009460463F, 0.0085006468F, 0.0235352254F, 0.0458950567F, | 0.0009460463F, 0.0085006468F, 0.0235352254F, 0.0458950567F, | ||||
0.0753351908F, 0.1115073077F, 0.1539457973F, 0.2020557475F, | 0.0753351908F, 0.1115073077F, 0.1539457973F, 0.2020557475F, | ||||
0.2551056759F, 0.3122276645F, 0.3724270287F, 0.4346027792F, | 0.2551056759F, 0.3122276645F, 0.3724270287F, 0.4346027792F, | ||||
@@ -51,7 +51,7 @@ DECLARE_ALIGNED_16(static const float, vwin64[32]) = { | |||||
0.9989462667F, 0.9997230082F, 0.9999638688F, 0.9999995525F, | 0.9989462667F, 0.9997230082F, 0.9999638688F, 0.9999995525F, | ||||
}; | }; | ||||
DECLARE_ALIGNED_16(static const float, vwin128[64]) = { | |||||
DECLARE_ALIGNED_16(static const float, vwin128)[64] = { | |||||
0.0002365472F, 0.0021280687F, 0.0059065254F, 0.0115626550F, | 0.0002365472F, 0.0021280687F, 0.0059065254F, 0.0115626550F, | ||||
0.0190823442F, 0.0284463735F, 0.0396300935F, 0.0526030430F, | 0.0190823442F, 0.0284463735F, 0.0396300935F, 0.0526030430F, | ||||
0.0673285281F, 0.0837631763F, 0.1018564887F, 0.1215504095F, | 0.0673285281F, 0.0837631763F, 0.1018564887F, 0.1215504095F, | ||||
@@ -70,7 +70,7 @@ DECLARE_ALIGNED_16(static const float, vwin128[64]) = { | |||||
0.9999331503F, 0.9999825563F, 0.9999977357F, 0.9999999720F, | 0.9999331503F, 0.9999825563F, 0.9999977357F, 0.9999999720F, | ||||
}; | }; | ||||
DECLARE_ALIGNED_16(static const float, vwin256[128]) = { | |||||
DECLARE_ALIGNED_16(static const float, vwin256)[128] = { | |||||
0.0000591390F, 0.0005321979F, 0.0014780301F, 0.0028960636F, | 0.0000591390F, 0.0005321979F, 0.0014780301F, 0.0028960636F, | ||||
0.0047854363F, 0.0071449926F, 0.0099732775F, 0.0132685298F, | 0.0047854363F, 0.0071449926F, 0.0099732775F, 0.0132685298F, | ||||
0.0170286741F, 0.0212513119F, 0.0259337111F, 0.0310727950F, | 0.0170286741F, 0.0212513119F, 0.0259337111F, 0.0310727950F, | ||||
@@ -105,7 +105,7 @@ DECLARE_ALIGNED_16(static const float, vwin256[128]) = { | |||||
0.9999958064F, 0.9999989077F, 0.9999998584F, 0.9999999983F, | 0.9999958064F, 0.9999989077F, 0.9999998584F, 0.9999999983F, | ||||
}; | }; | ||||
DECLARE_ALIGNED_16(static const float, vwin512[256]) = { | |||||
DECLARE_ALIGNED_16(static const float, vwin512)[256] = { | |||||
0.0000147849F, 0.0001330607F, 0.0003695946F, 0.0007243509F, | 0.0000147849F, 0.0001330607F, 0.0003695946F, 0.0007243509F, | ||||
0.0011972759F, 0.0017882983F, 0.0024973285F, 0.0033242588F, | 0.0011972759F, 0.0017882983F, 0.0024973285F, 0.0033242588F, | ||||
0.0042689632F, 0.0053312973F, 0.0065110982F, 0.0078081841F, | 0.0042689632F, 0.0053312973F, 0.0065110982F, 0.0078081841F, | ||||
@@ -172,7 +172,7 @@ DECLARE_ALIGNED_16(static const float, vwin512[256]) = { | |||||
0.9999997377F, 0.9999999317F, 0.9999999911F, 0.9999999999F, | 0.9999997377F, 0.9999999317F, 0.9999999911F, 0.9999999999F, | ||||
}; | }; | ||||
DECLARE_ALIGNED_16(static const float, vwin1024[512]) = { | |||||
DECLARE_ALIGNED_16(static const float, vwin1024)[512] = { | |||||
0.0000036962F, 0.0000332659F, 0.0000924041F, 0.0001811086F, | 0.0000036962F, 0.0000332659F, 0.0000924041F, 0.0001811086F, | ||||
0.0002993761F, 0.0004472021F, 0.0006245811F, 0.0008315063F, | 0.0002993761F, 0.0004472021F, 0.0006245811F, 0.0008315063F, | ||||
0.0010679699F, 0.0013339631F, 0.0016294757F, 0.0019544965F, | 0.0010679699F, 0.0013339631F, 0.0016294757F, 0.0019544965F, | ||||
@@ -303,7 +303,7 @@ DECLARE_ALIGNED_16(static const float, vwin1024[512]) = { | |||||
0.9999999836F, 0.9999999957F, 0.9999999994F, 1.0000000000F, | 0.9999999836F, 0.9999999957F, 0.9999999994F, 1.0000000000F, | ||||
}; | }; | ||||
DECLARE_ALIGNED_16(static const float, vwin2048[1024]) = { | |||||
DECLARE_ALIGNED_16(static const float, vwin2048)[1024] = { | |||||
0.0000009241F, 0.0000083165F, 0.0000231014F, 0.0000452785F, | 0.0000009241F, 0.0000083165F, 0.0000231014F, 0.0000452785F, | ||||
0.0000748476F, 0.0001118085F, 0.0001561608F, 0.0002079041F, | 0.0000748476F, 0.0001118085F, 0.0001561608F, 0.0002079041F, | ||||
0.0002670379F, 0.0003335617F, 0.0004074748F, 0.0004887765F, | 0.0002670379F, 0.0003335617F, 0.0004074748F, 0.0004887765F, | ||||
@@ -562,7 +562,7 @@ DECLARE_ALIGNED_16(static const float, vwin2048[1024]) = { | |||||
0.9999999990F, 0.9999999997F, 1.0000000000F, 1.0000000000F, | 0.9999999990F, 0.9999999997F, 1.0000000000F, 1.0000000000F, | ||||
}; | }; | ||||
DECLARE_ALIGNED_16(static const float, vwin4096[2048]) = { | |||||
DECLARE_ALIGNED_16(static const float, vwin4096)[2048] = { | |||||
0.0000002310F, 0.0000020791F, 0.0000057754F, 0.0000113197F, | 0.0000002310F, 0.0000020791F, 0.0000057754F, 0.0000113197F, | ||||
0.0000187121F, 0.0000279526F, 0.0000390412F, 0.0000519777F, | 0.0000187121F, 0.0000279526F, 0.0000390412F, 0.0000519777F, | ||||
0.0000667623F, 0.0000833949F, 0.0001018753F, 0.0001222036F, | 0.0000667623F, 0.0000833949F, 0.0001018753F, 0.0001222036F, | ||||
@@ -1077,7 +1077,7 @@ DECLARE_ALIGNED_16(static const float, vwin4096[2048]) = { | |||||
0.9999999999F, 1.0000000000F, 1.0000000000F, 1.0000000000F, | 0.9999999999F, 1.0000000000F, 1.0000000000F, 1.0000000000F, | ||||
}; | }; | ||||
DECLARE_ALIGNED_16(static const float, vwin8192[4096]) = { | |||||
DECLARE_ALIGNED_16(static const float, vwin8192)[4096] = { | |||||
0.0000000578F, 0.0000005198F, 0.0000014438F, 0.0000028299F, | 0.0000000578F, 0.0000005198F, 0.0000014438F, 0.0000028299F, | ||||
0.0000046780F, 0.0000069882F, 0.0000097604F, 0.0000129945F, | 0.0000046780F, 0.0000069882F, 0.0000097604F, 0.0000129945F, | ||||
0.0000166908F, 0.0000208490F, 0.0000254692F, 0.0000305515F, | 0.0000166908F, 0.0000208490F, 0.0000254692F, 0.0000305515F, | ||||
@@ -200,7 +200,7 @@ typedef struct Vp3DecodeContext { | |||||
/* these arrays need to be on 16-byte boundaries since SSE2 operations | /* these arrays need to be on 16-byte boundaries since SSE2 operations | ||||
* index into them */ | * index into them */ | ||||
DECLARE_ALIGNED_16(int16_t, qmat[3][2][3][64]); //<qmat[qpi][is_inter][plane] | |||||
DECLARE_ALIGNED_16(int16_t, qmat)[3][2][3][64]; //<qmat[qpi][is_inter][plane] | |||||
/* This table contains superblock_count * 16 entries. Each set of 16 | /* This table contains superblock_count * 16 entries. Each set of 16 | ||||
* numbers corresponds to the fragment indexes 0..15 of the superblock. | * numbers corresponds to the fragment indexes 0..15 of the superblock. | ||||
@@ -238,7 +238,7 @@ typedef struct Vp3DecodeContext { | |||||
uint16_t huffman_table[80][32][2]; | uint16_t huffman_table[80][32][2]; | ||||
uint8_t filter_limit_values[64]; | uint8_t filter_limit_values[64]; | ||||
DECLARE_ALIGNED_8(int, bounding_values_array[256+2]); | |||||
DECLARE_ALIGNED_8(int, bounding_values_array)[256+2]; | |||||
} Vp3DecodeContext; | } Vp3DecodeContext; | ||||
/************************************************************************ | /************************************************************************ | ||||
@@ -1397,7 +1397,7 @@ static void render_slice(Vp3DecodeContext *s, int slice) | |||||
{ | { | ||||
int x; | int x; | ||||
int16_t *dequantizer; | int16_t *dequantizer; | ||||
DECLARE_ALIGNED_16(DCTELEM, block[64]); | |||||
DECLARE_ALIGNED_16(DCTELEM, block)[64]; | |||||
int motion_x = 0xdeadbeef, motion_y = 0xdeadbeef; | int motion_x = 0xdeadbeef, motion_y = 0xdeadbeef; | ||||
int motion_halfpel_index; | int motion_halfpel_index; | ||||
uint8_t *motion_source; | uint8_t *motion_source; | ||||
@@ -120,7 +120,7 @@ struct vp56_context { | |||||
/* blocks / macroblock */ | /* blocks / macroblock */ | ||||
VP56mb mb_type; | VP56mb mb_type; | ||||
VP56Macroblock *macroblocks; | VP56Macroblock *macroblocks; | ||||
DECLARE_ALIGNED_16(DCTELEM, block_coeff[6][64]); | |||||
DECLARE_ALIGNED_16(DCTELEM, block_coeff)[6][64]; | |||||
/* motion vectors */ | /* motion vectors */ | ||||
VP56mv mv[6]; /* vectors for each block in MB */ | VP56mv mv[6]; /* vectors for each block in MB */ | ||||
@@ -111,15 +111,15 @@ typedef struct WMACodecContext { | |||||
uint8_t ms_stereo; ///< true if mid/side stereo mode | uint8_t ms_stereo; ///< true if mid/side stereo mode | ||||
uint8_t channel_coded[MAX_CHANNELS]; ///< true if channel is coded | uint8_t channel_coded[MAX_CHANNELS]; ///< true if channel is coded | ||||
int exponents_bsize[MAX_CHANNELS]; ///< log2 ratio frame/exp. length | int exponents_bsize[MAX_CHANNELS]; ///< log2 ratio frame/exp. length | ||||
DECLARE_ALIGNED_16(float, exponents[MAX_CHANNELS][BLOCK_MAX_SIZE]); | |||||
DECLARE_ALIGNED_16(float, exponents)[MAX_CHANNELS][BLOCK_MAX_SIZE]; | |||||
float max_exponent[MAX_CHANNELS]; | float max_exponent[MAX_CHANNELS]; | ||||
WMACoef coefs1[MAX_CHANNELS][BLOCK_MAX_SIZE]; | WMACoef coefs1[MAX_CHANNELS][BLOCK_MAX_SIZE]; | ||||
DECLARE_ALIGNED_16(float, coefs[MAX_CHANNELS][BLOCK_MAX_SIZE]); | |||||
DECLARE_ALIGNED_16(FFTSample, output[BLOCK_MAX_SIZE * 2]); | |||||
DECLARE_ALIGNED_16(float, coefs)[MAX_CHANNELS][BLOCK_MAX_SIZE]; | |||||
DECLARE_ALIGNED_16(FFTSample, output)[BLOCK_MAX_SIZE * 2]; | |||||
FFTContext mdct_ctx[BLOCK_NB_SIZES]; | FFTContext mdct_ctx[BLOCK_NB_SIZES]; | ||||
float *windows[BLOCK_NB_SIZES]; | float *windows[BLOCK_NB_SIZES]; | ||||
/* output buffer for one frame and the last for IMDCT windowing */ | /* output buffer for one frame and the last for IMDCT windowing */ | ||||
DECLARE_ALIGNED_16(float, frame_out[MAX_CHANNELS][BLOCK_MAX_SIZE * 2]); | |||||
DECLARE_ALIGNED_16(float, frame_out)[MAX_CHANNELS][BLOCK_MAX_SIZE * 2]; | |||||
/* last frame info */ | /* last frame info */ | ||||
uint8_t last_superframe[MAX_CODED_SUPERFRAME_SIZE + 4]; /* padding added */ | uint8_t last_superframe[MAX_CODED_SUPERFRAME_SIZE + 4]; /* padding added */ | ||||
int last_bitoffset; | int last_bitoffset; | ||||
@@ -142,7 +142,7 @@ typedef struct { | |||||
int* scale_factors; ///< pointer to the scale factor values used for decoding | int* scale_factors; ///< pointer to the scale factor values used for decoding | ||||
uint8_t table_idx; ///< index in sf_offsets for the scale factor reference block | uint8_t table_idx; ///< index in sf_offsets for the scale factor reference block | ||||
float* coeffs; ///< pointer to the subframe decode buffer | float* coeffs; ///< pointer to the subframe decode buffer | ||||
DECLARE_ALIGNED_16(float, out[WMAPRO_BLOCK_MAX_SIZE + WMAPRO_BLOCK_MAX_SIZE / 2]); ///< output buffer | |||||
DECLARE_ALIGNED_16(float, out)[WMAPRO_BLOCK_MAX_SIZE + WMAPRO_BLOCK_MAX_SIZE / 2]; ///< output buffer | |||||
} WMAProChannelCtx; | } WMAProChannelCtx; | ||||
/** | /** | ||||
@@ -167,7 +167,7 @@ typedef struct WMAProDecodeCtx { | |||||
FF_INPUT_BUFFER_PADDING_SIZE];///< compressed frame data | FF_INPUT_BUFFER_PADDING_SIZE];///< compressed frame data | ||||
PutBitContext pb; ///< context for filling the frame_data buffer | PutBitContext pb; ///< context for filling the frame_data buffer | ||||
FFTContext mdct_ctx[WMAPRO_BLOCK_SIZES]; ///< MDCT context per block size | FFTContext mdct_ctx[WMAPRO_BLOCK_SIZES]; ///< MDCT context per block size | ||||
DECLARE_ALIGNED_16(float, tmp[WMAPRO_BLOCK_MAX_SIZE]); ///< IMDCT output buffer | |||||
DECLARE_ALIGNED_16(float, tmp)[WMAPRO_BLOCK_MAX_SIZE]; ///< IMDCT output buffer | |||||
float* windows[WMAPRO_BLOCK_SIZES]; ///< windows for the different block sizes | float* windows[WMAPRO_BLOCK_SIZES]; ///< windows for the different block sizes | ||||
/* frame size dependent frame information (set during initialization) */ | /* frame size dependent frame information (set during initialization) */ | ||||
@@ -50,7 +50,7 @@ typedef struct Wmv2Context{ | |||||
int hshift; | int hshift; | ||||
ScanTable abt_scantable[2]; | ScanTable abt_scantable[2]; | ||||
DECLARE_ALIGNED_16(DCTELEM, abt_block2[6][64]); | |||||
DECLARE_ALIGNED_16(DCTELEM, abt_block2)[6][64]; | |||||
}Wmv2Context; | }Wmv2Context; | ||||
void ff_wmv2_common_init(Wmv2Context * w); | void ff_wmv2_common_init(Wmv2Context * w); | ||||
@@ -113,7 +113,7 @@ static inline void cavs_idct8_1d(int16_t *block, uint64_t bias) | |||||
static void cavs_idct8_add_mmx(uint8_t *dst, int16_t *block, int stride) | static void cavs_idct8_add_mmx(uint8_t *dst, int16_t *block, int stride) | ||||
{ | { | ||||
int i; | int i; | ||||
DECLARE_ALIGNED_8(int16_t, b2[64]); | |||||
DECLARE_ALIGNED_8(int16_t, b2)[64]; | |||||
for(i=0; i<2; i++){ | for(i=0; i<2; i++){ | ||||
DECLARE_ALIGNED_8(uint64_t, tmp); | DECLARE_ALIGNED_8(uint64_t, tmp); | ||||
@@ -42,7 +42,7 @@ int mm_flags; /* multimedia extension flags */ | |||||
DECLARE_ALIGNED_8 (const uint64_t, ff_bone) = 0x0101010101010101ULL; | DECLARE_ALIGNED_8 (const uint64_t, ff_bone) = 0x0101010101010101ULL; | ||||
DECLARE_ALIGNED_8 (const uint64_t, ff_wtwo) = 0x0002000200020002ULL; | DECLARE_ALIGNED_8 (const uint64_t, ff_wtwo) = 0x0002000200020002ULL; | ||||
DECLARE_ALIGNED_16(const uint64_t, ff_pdw_80000000[2]) = | |||||
DECLARE_ALIGNED_16(const uint64_t, ff_pdw_80000000)[2] = | |||||
{0x8000000080000000ULL, 0x8000000080000000ULL}; | {0x8000000080000000ULL, 0x8000000080000000ULL}; | ||||
DECLARE_ALIGNED_8 (const uint64_t, ff_pw_3 ) = 0x0003000300030003ULL; | DECLARE_ALIGNED_8 (const uint64_t, ff_pw_3 ) = 0x0003000300030003ULL; | ||||
@@ -69,8 +69,8 @@ DECLARE_ALIGNED_8 (const uint64_t, ff_pb_81 ) = 0x8181818181818181ULL; | |||||
DECLARE_ALIGNED_8 (const uint64_t, ff_pb_A1 ) = 0xA1A1A1A1A1A1A1A1ULL; | DECLARE_ALIGNED_8 (const uint64_t, ff_pb_A1 ) = 0xA1A1A1A1A1A1A1A1ULL; | ||||
DECLARE_ALIGNED_8 (const uint64_t, ff_pb_FC ) = 0xFCFCFCFCFCFCFCFCULL; | DECLARE_ALIGNED_8 (const uint64_t, ff_pb_FC ) = 0xFCFCFCFCFCFCFCFCULL; | ||||
DECLARE_ALIGNED_16(const double, ff_pd_1[2]) = { 1.0, 1.0 }; | |||||
DECLARE_ALIGNED_16(const double, ff_pd_2[2]) = { 2.0, 2.0 }; | |||||
DECLARE_ALIGNED_16(const double, ff_pd_1)[2] = { 1.0, 1.0 }; | |||||
DECLARE_ALIGNED_16(const double, ff_pd_2)[2] = { 2.0, 2.0 }; | |||||
#define JUMPALIGN() __asm__ volatile (ASMALIGN(3)::) | #define JUMPALIGN() __asm__ volatile (ASMALIGN(3)::) | ||||
#define MOVQ_ZERO(regd) __asm__ volatile ("pxor %%" #regd ", %%" #regd ::) | #define MOVQ_ZERO(regd) __asm__ volatile ("pxor %%" #regd ", %%" #regd ::) | ||||
@@ -277,7 +277,7 @@ void put_pixels_clamped_mmx(const DCTELEM *block, uint8_t *pixels, int line_size | |||||
:"memory"); | :"memory"); | ||||
} | } | ||||
DECLARE_ASM_CONST(8, uint8_t, ff_vector128[8]) = | |||||
DECLARE_ASM_CONST(8, uint8_t, ff_vector128)[8] = | |||||
{ 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80 }; | { 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80 }; | ||||
#define put_signed_pixels_clamped_mmx_half(off) \ | #define put_signed_pixels_clamped_mmx_half(off) \ | ||||
@@ -754,7 +754,7 @@ static inline void transpose4x4(uint8_t *dst, uint8_t *src, int dst_stride, int | |||||
static void h263_h_loop_filter_mmx(uint8_t *src, int stride, int qscale){ | static void h263_h_loop_filter_mmx(uint8_t *src, int stride, int qscale){ | ||||
if(CONFIG_H263_DECODER || CONFIG_H263_ENCODER) { | if(CONFIG_H263_DECODER || CONFIG_H263_ENCODER) { | ||||
const int strength= ff_h263_loop_filter_strength[qscale]; | const int strength= ff_h263_loop_filter_strength[qscale]; | ||||
DECLARE_ALIGNED(8, uint64_t, temp[4]); | |||||
DECLARE_ALIGNED(8, uint64_t, temp)[4]; | |||||
uint8_t *btemp= (uint8_t*)temp; | uint8_t *btemp= (uint8_t*)temp; | ||||
src -= 2; | src -= 2; | ||||
@@ -2026,7 +2026,7 @@ static void ac3_downmix_sse(float (*samples)[256], float (*matrix)[2], int out_c | |||||
} else if(in_ch == 5 && out_ch == 1 && matrix_cmp[0][0]==matrix_cmp[2][0] && matrix_cmp[3][0]==matrix_cmp[4][0]) { | } else if(in_ch == 5 && out_ch == 1 && matrix_cmp[0][0]==matrix_cmp[2][0] && matrix_cmp[3][0]==matrix_cmp[4][0]) { | ||||
MIX5(IF1,IF0); | MIX5(IF1,IF0); | ||||
} else { | } else { | ||||
DECLARE_ALIGNED_16(float, matrix_simd[in_ch][2][4]); | |||||
DECLARE_ALIGNED_16(float, matrix_simd)[in_ch][2][4]; | |||||
j = 2*in_ch*sizeof(float); | j = 2*in_ch*sizeof(float); | ||||
__asm__ volatile( | __asm__ volatile( | ||||
"1: \n" | "1: \n" | ||||
@@ -2413,7 +2413,7 @@ static void ff_x264_deblock_v_luma_intra_mmxext(uint8_t *pix, int stride, int al | |||||
#define FLOAT_TO_INT16_INTERLEAVE(cpu, body) \ | #define FLOAT_TO_INT16_INTERLEAVE(cpu, body) \ | ||||
/* gcc pessimizes register allocation if this is in the same function as float_to_int16_interleave_sse2*/\ | /* gcc pessimizes register allocation if this is in the same function as float_to_int16_interleave_sse2*/\ | ||||
static av_noinline void float_to_int16_interleave_misc_##cpu(int16_t *dst, const float **src, long len, int channels){\ | static av_noinline void float_to_int16_interleave_misc_##cpu(int16_t *dst, const float **src, long len, int channels){\ | ||||
DECLARE_ALIGNED_16(int16_t, tmp[len]);\ | |||||
DECLARE_ALIGNED_16(int16_t, tmp)[len];\ | |||||
int i,j,c;\ | int i,j,c;\ | ||||
for(c=0; c<channels; c++){\ | for(c=0; c<channels; c++){\ | ||||
float_to_int16_##cpu(tmp, src[c], len);\ | float_to_int16_##cpu(tmp, src[c], len);\ | ||||
@@ -1063,7 +1063,7 @@ static void sub_hfyu_median_prediction_mmx2(uint8_t *dst, const uint8_t *src1, c | |||||
#define HADAMARD8_DIFF_MMX(cpu) \ | #define HADAMARD8_DIFF_MMX(cpu) \ | ||||
static int hadamard8_diff_##cpu(void *s, uint8_t *src1, uint8_t *src2, int stride, int h){\ | static int hadamard8_diff_##cpu(void *s, uint8_t *src1, uint8_t *src2, int stride, int h){\ | ||||
DECLARE_ALIGNED_8(uint64_t, temp[13]);\ | |||||
DECLARE_ALIGNED_8(uint64_t, temp)[13];\ | |||||
int sum;\ | int sum;\ | ||||
\ | \ | ||||
assert(h==8);\ | assert(h==8);\ | ||||
@@ -1146,7 +1146,7 @@ WRAPPER8_16_SQ(hadamard8_diff_##cpu, hadamard8_diff16_##cpu) | |||||
#define HADAMARD8_DIFF_SSE2(cpu) \ | #define HADAMARD8_DIFF_SSE2(cpu) \ | ||||
static int hadamard8_diff_##cpu(void *s, uint8_t *src1, uint8_t *src2, int stride, int h){\ | static int hadamard8_diff_##cpu(void *s, uint8_t *src1, uint8_t *src2, int stride, int h){\ | ||||
DECLARE_ALIGNED_16(uint64_t, temp[4]);\ | |||||
DECLARE_ALIGNED_16(uint64_t, temp)[4];\ | |||||
int sum;\ | int sum;\ | ||||
\ | \ | ||||
assert(h==8);\ | assert(h==8);\ | ||||
@@ -23,7 +23,7 @@ | |||||
#include "libavcodec/dsputil.h" | #include "libavcodec/dsputil.h" | ||||
#include "fft.h" | #include "fft.h" | ||||
DECLARE_ALIGNED_8(static const int, m1m1[2]) = { 1<<31, 1<<31 }; | |||||
DECLARE_ALIGNED_8(static const int, m1m1)[2] = { 1<<31, 1<<31 }; | |||||
#ifdef EMULATE_3DNOWEXT | #ifdef EMULATE_3DNOWEXT | ||||
#define PSWAPD(s,d)\ | #define PSWAPD(s,d)\ | ||||
@@ -23,7 +23,7 @@ | |||||
#include "libavcodec/dsputil.h" | #include "libavcodec/dsputil.h" | ||||
#include "fft.h" | #include "fft.h" | ||||
DECLARE_ALIGNED(16, static const int, m1m1m1m1[4]) = | |||||
DECLARE_ALIGNED(16, static const int, m1m1m1m1)[4] = | |||||
{ 1 << 31, 1 << 31, 1 << 31, 1 << 31 }; | { 1 << 31, 1 << 31, 1 << 31, 1 << 31 }; | ||||
void ff_fft_dispatch_sse(FFTComplex *z, int nbits); | void ff_fft_dispatch_sse(FFTComplex *z, int nbits); | ||||
@@ -157,7 +157,7 @@ static inline void h264_idct8_1d(int16_t *block) | |||||
static void ff_h264_idct8_add_mmx(uint8_t *dst, int16_t *block, int stride) | static void ff_h264_idct8_add_mmx(uint8_t *dst, int16_t *block, int stride) | ||||
{ | { | ||||
int i; | int i; | ||||
DECLARE_ALIGNED_8(int16_t, b2[64]); | |||||
DECLARE_ALIGNED_8(int16_t, b2)[64]; | |||||
block[0] += 32; | block[0] += 32; | ||||
@@ -628,7 +628,7 @@ static void ff_h264_idct_add8_sse2(uint8_t **dest, const int *block_offset, DCTE | |||||
static inline void h264_loop_filter_luma_mmx2(uint8_t *pix, int stride, int alpha1, int beta1, int8_t *tc0) | static inline void h264_loop_filter_luma_mmx2(uint8_t *pix, int stride, int alpha1, int beta1, int8_t *tc0) | ||||
{ | { | ||||
DECLARE_ALIGNED_8(uint64_t, tmp0[2]); | |||||
DECLARE_ALIGNED_8(uint64_t, tmp0)[2]; | |||||
__asm__ volatile( | __asm__ volatile( | ||||
"movq (%2,%4), %%mm0 \n\t" //p1 | "movq (%2,%4), %%mm0 \n\t" //p1 | ||||
@@ -690,7 +690,7 @@ static void h264_h_loop_filter_luma_mmx2(uint8_t *pix, int stride, int alpha, in | |||||
{ | { | ||||
//FIXME: could cut some load/stores by merging transpose with filter | //FIXME: could cut some load/stores by merging transpose with filter | ||||
// also, it only needs to transpose 6x8 | // also, it only needs to transpose 6x8 | ||||
DECLARE_ALIGNED_8(uint8_t, trans[8*8]); | |||||
DECLARE_ALIGNED_8(uint8_t, trans)[8*8]; | |||||
int i; | int i; | ||||
for(i=0; i<2; i++, pix+=8*stride, tc0+=2) { | for(i=0; i<2; i++, pix+=8*stride, tc0+=2) { | ||||
if((tc0[0] & tc0[1]) < 0) | if((tc0[0] & tc0[1]) < 0) | ||||
@@ -734,7 +734,7 @@ static void h264_v_loop_filter_chroma_mmx2(uint8_t *pix, int stride, int alpha, | |||||
static void h264_h_loop_filter_chroma_mmx2(uint8_t *pix, int stride, int alpha, int beta, int8_t *tc0) | static void h264_h_loop_filter_chroma_mmx2(uint8_t *pix, int stride, int alpha, int beta, int8_t *tc0) | ||||
{ | { | ||||
//FIXME: could cut some load/stores by merging transpose with filter | //FIXME: could cut some load/stores by merging transpose with filter | ||||
DECLARE_ALIGNED_8(uint8_t, trans[8*4]); | |||||
DECLARE_ALIGNED_8(uint8_t, trans)[8*4]; | |||||
transpose4x4(trans, pix-2, 8, stride); | transpose4x4(trans, pix-2, 8, stride); | ||||
transpose4x4(trans+4, pix-2+4*stride, 8, stride); | transpose4x4(trans+4, pix-2+4*stride, 8, stride); | ||||
h264_loop_filter_chroma_mmx2(trans+2*8, 8, alpha-1, beta-1, tc0); | h264_loop_filter_chroma_mmx2(trans+2*8, 8, alpha-1, beta-1, tc0); | ||||
@@ -784,7 +784,7 @@ static void h264_v_loop_filter_chroma_intra_mmx2(uint8_t *pix, int stride, int a | |||||
static void h264_h_loop_filter_chroma_intra_mmx2(uint8_t *pix, int stride, int alpha, int beta) | static void h264_h_loop_filter_chroma_intra_mmx2(uint8_t *pix, int stride, int alpha, int beta) | ||||
{ | { | ||||
//FIXME: could cut some load/stores by merging transpose with filter | //FIXME: could cut some load/stores by merging transpose with filter | ||||
DECLARE_ALIGNED_8(uint8_t, trans[8*4]); | |||||
DECLARE_ALIGNED_8(uint8_t, trans)[8*4]; | |||||
transpose4x4(trans, pix-2, 8, stride); | transpose4x4(trans, pix-2, 8, stride); | ||||
transpose4x4(trans+4, pix-2+4*stride, 8, stride); | transpose4x4(trans+4, pix-2+4*stride, 8, stride); | ||||
h264_loop_filter_chroma_intra_mmx2(trans+2*8, 8, alpha-1, beta-1); | h264_loop_filter_chroma_intra_mmx2(trans+2*8, 8, alpha-1, beta-1); | ||||
@@ -1974,7 +1974,7 @@ static void OPNAME ## h264_qpel ## SIZE ## _mc30_ ## MMX(uint8_t *dst, uint8_t * | |||||
#define H264_MC_V(OPNAME, SIZE, MMX, ALIGN) \ | #define H264_MC_V(OPNAME, SIZE, MMX, ALIGN) \ | ||||
static void OPNAME ## h264_qpel ## SIZE ## _mc01_ ## MMX(uint8_t *dst, uint8_t *src, int stride){\ | static void OPNAME ## h264_qpel ## SIZE ## _mc01_ ## MMX(uint8_t *dst, uint8_t *src, int stride){\ | ||||
DECLARE_ALIGNED(ALIGN, uint8_t, temp[SIZE*SIZE]);\ | |||||
DECLARE_ALIGNED(ALIGN, uint8_t, temp)[SIZE*SIZE];\ | |||||
put_h264_qpel ## SIZE ## _v_lowpass_ ## MMX(temp, src, SIZE, stride);\ | put_h264_qpel ## SIZE ## _v_lowpass_ ## MMX(temp, src, SIZE, stride);\ | ||||
OPNAME ## pixels ## SIZE ## _l2_ ## MMX(dst, src, temp, stride, stride, SIZE);\ | OPNAME ## pixels ## SIZE ## _l2_ ## MMX(dst, src, temp, stride, stride, SIZE);\ | ||||
}\ | }\ | ||||
@@ -1984,43 +1984,43 @@ static void OPNAME ## h264_qpel ## SIZE ## _mc02_ ## MMX(uint8_t *dst, uint8_t * | |||||
}\ | }\ | ||||
\ | \ | ||||
static void OPNAME ## h264_qpel ## SIZE ## _mc03_ ## MMX(uint8_t *dst, uint8_t *src, int stride){\ | static void OPNAME ## h264_qpel ## SIZE ## _mc03_ ## MMX(uint8_t *dst, uint8_t *src, int stride){\ | ||||
DECLARE_ALIGNED(ALIGN, uint8_t, temp[SIZE*SIZE]);\ | |||||
DECLARE_ALIGNED(ALIGN, uint8_t, temp)[SIZE*SIZE];\ | |||||
put_h264_qpel ## SIZE ## _v_lowpass_ ## MMX(temp, src, SIZE, stride);\ | put_h264_qpel ## SIZE ## _v_lowpass_ ## MMX(temp, src, SIZE, stride);\ | ||||
OPNAME ## pixels ## SIZE ## _l2_ ## MMX(dst, src+stride, temp, stride, stride, SIZE);\ | OPNAME ## pixels ## SIZE ## _l2_ ## MMX(dst, src+stride, temp, stride, stride, SIZE);\ | ||||
}\ | }\ | ||||
#define H264_MC_HV(OPNAME, SIZE, MMX, ALIGN) \ | #define H264_MC_HV(OPNAME, SIZE, MMX, ALIGN) \ | ||||
static void OPNAME ## h264_qpel ## SIZE ## _mc11_ ## MMX(uint8_t *dst, uint8_t *src, int stride){\ | static void OPNAME ## h264_qpel ## SIZE ## _mc11_ ## MMX(uint8_t *dst, uint8_t *src, int stride){\ | ||||
DECLARE_ALIGNED(ALIGN, uint8_t, temp[SIZE*SIZE]);\ | |||||
DECLARE_ALIGNED(ALIGN, uint8_t, temp)[SIZE*SIZE];\ | |||||
put_h264_qpel ## SIZE ## _v_lowpass_ ## MMX(temp, src, SIZE, stride);\ | put_h264_qpel ## SIZE ## _v_lowpass_ ## MMX(temp, src, SIZE, stride);\ | ||||
OPNAME ## h264_qpel ## SIZE ## _h_lowpass_l2_ ## MMX(dst, src, temp, stride, SIZE);\ | OPNAME ## h264_qpel ## SIZE ## _h_lowpass_l2_ ## MMX(dst, src, temp, stride, SIZE);\ | ||||
}\ | }\ | ||||
\ | \ | ||||
static void OPNAME ## h264_qpel ## SIZE ## _mc31_ ## MMX(uint8_t *dst, uint8_t *src, int stride){\ | static void OPNAME ## h264_qpel ## SIZE ## _mc31_ ## MMX(uint8_t *dst, uint8_t *src, int stride){\ | ||||
DECLARE_ALIGNED(ALIGN, uint8_t, temp[SIZE*SIZE]);\ | |||||
DECLARE_ALIGNED(ALIGN, uint8_t, temp)[SIZE*SIZE];\ | |||||
put_h264_qpel ## SIZE ## _v_lowpass_ ## MMX(temp, src+1, SIZE, stride);\ | put_h264_qpel ## SIZE ## _v_lowpass_ ## MMX(temp, src+1, SIZE, stride);\ | ||||
OPNAME ## h264_qpel ## SIZE ## _h_lowpass_l2_ ## MMX(dst, src, temp, stride, SIZE);\ | OPNAME ## h264_qpel ## SIZE ## _h_lowpass_l2_ ## MMX(dst, src, temp, stride, SIZE);\ | ||||
}\ | }\ | ||||
\ | \ | ||||
static void OPNAME ## h264_qpel ## SIZE ## _mc13_ ## MMX(uint8_t *dst, uint8_t *src, int stride){\ | static void OPNAME ## h264_qpel ## SIZE ## _mc13_ ## MMX(uint8_t *dst, uint8_t *src, int stride){\ | ||||
DECLARE_ALIGNED(ALIGN, uint8_t, temp[SIZE*SIZE]);\ | |||||
DECLARE_ALIGNED(ALIGN, uint8_t, temp)[SIZE*SIZE];\ | |||||
put_h264_qpel ## SIZE ## _v_lowpass_ ## MMX(temp, src, SIZE, stride);\ | put_h264_qpel ## SIZE ## _v_lowpass_ ## MMX(temp, src, SIZE, stride);\ | ||||
OPNAME ## h264_qpel ## SIZE ## _h_lowpass_l2_ ## MMX(dst, src+stride, temp, stride, SIZE);\ | OPNAME ## h264_qpel ## SIZE ## _h_lowpass_l2_ ## MMX(dst, src+stride, temp, stride, SIZE);\ | ||||
}\ | }\ | ||||
\ | \ | ||||
static void OPNAME ## h264_qpel ## SIZE ## _mc33_ ## MMX(uint8_t *dst, uint8_t *src, int stride){\ | static void OPNAME ## h264_qpel ## SIZE ## _mc33_ ## MMX(uint8_t *dst, uint8_t *src, int stride){\ | ||||
DECLARE_ALIGNED(ALIGN, uint8_t, temp[SIZE*SIZE]);\ | |||||
DECLARE_ALIGNED(ALIGN, uint8_t, temp)[SIZE*SIZE];\ | |||||
put_h264_qpel ## SIZE ## _v_lowpass_ ## MMX(temp, src+1, SIZE, stride);\ | put_h264_qpel ## SIZE ## _v_lowpass_ ## MMX(temp, src+1, SIZE, stride);\ | ||||
OPNAME ## h264_qpel ## SIZE ## _h_lowpass_l2_ ## MMX(dst, src+stride, temp, stride, SIZE);\ | OPNAME ## h264_qpel ## SIZE ## _h_lowpass_l2_ ## MMX(dst, src+stride, temp, stride, SIZE);\ | ||||
}\ | }\ | ||||
\ | \ | ||||
static void OPNAME ## h264_qpel ## SIZE ## _mc22_ ## MMX(uint8_t *dst, uint8_t *src, int stride){\ | static void OPNAME ## h264_qpel ## SIZE ## _mc22_ ## MMX(uint8_t *dst, uint8_t *src, int stride){\ | ||||
DECLARE_ALIGNED(ALIGN, uint16_t, temp[SIZE*(SIZE<8?12:24)]);\ | |||||
DECLARE_ALIGNED(ALIGN, uint16_t, temp)[SIZE*(SIZE<8?12:24)];\ | |||||
OPNAME ## h264_qpel ## SIZE ## _hv_lowpass_ ## MMX(dst, temp, src, stride, SIZE, stride);\ | OPNAME ## h264_qpel ## SIZE ## _hv_lowpass_ ## MMX(dst, temp, src, stride, SIZE, stride);\ | ||||
}\ | }\ | ||||
\ | \ | ||||
static void OPNAME ## h264_qpel ## SIZE ## _mc21_ ## MMX(uint8_t *dst, uint8_t *src, int stride){\ | static void OPNAME ## h264_qpel ## SIZE ## _mc21_ ## MMX(uint8_t *dst, uint8_t *src, int stride){\ | ||||
DECLARE_ALIGNED(ALIGN, uint8_t, temp[SIZE*(SIZE<8?12:24)*2 + SIZE*SIZE]);\ | |||||
DECLARE_ALIGNED(ALIGN, uint8_t, temp)[SIZE*(SIZE<8?12:24)*2 + SIZE*SIZE];\ | |||||
uint8_t * const halfHV= temp;\ | uint8_t * const halfHV= temp;\ | ||||
int16_t * const halfV= (int16_t*)(temp + SIZE*SIZE);\ | int16_t * const halfV= (int16_t*)(temp + SIZE*SIZE);\ | ||||
assert(((int)temp & 7) == 0);\ | assert(((int)temp & 7) == 0);\ | ||||
@@ -2029,7 +2029,7 @@ static void OPNAME ## h264_qpel ## SIZE ## _mc21_ ## MMX(uint8_t *dst, uint8_t * | |||||
}\ | }\ | ||||
\ | \ | ||||
static void OPNAME ## h264_qpel ## SIZE ## _mc23_ ## MMX(uint8_t *dst, uint8_t *src, int stride){\ | static void OPNAME ## h264_qpel ## SIZE ## _mc23_ ## MMX(uint8_t *dst, uint8_t *src, int stride){\ | ||||
DECLARE_ALIGNED(ALIGN, uint8_t, temp[SIZE*(SIZE<8?12:24)*2 + SIZE*SIZE]);\ | |||||
DECLARE_ALIGNED(ALIGN, uint8_t, temp)[SIZE*(SIZE<8?12:24)*2 + SIZE*SIZE];\ | |||||
uint8_t * const halfHV= temp;\ | uint8_t * const halfHV= temp;\ | ||||
int16_t * const halfV= (int16_t*)(temp + SIZE*SIZE);\ | int16_t * const halfV= (int16_t*)(temp + SIZE*SIZE);\ | ||||
assert(((int)temp & 7) == 0);\ | assert(((int)temp & 7) == 0);\ | ||||
@@ -2038,7 +2038,7 @@ static void OPNAME ## h264_qpel ## SIZE ## _mc23_ ## MMX(uint8_t *dst, uint8_t * | |||||
}\ | }\ | ||||
\ | \ | ||||
static void OPNAME ## h264_qpel ## SIZE ## _mc12_ ## MMX(uint8_t *dst, uint8_t *src, int stride){\ | static void OPNAME ## h264_qpel ## SIZE ## _mc12_ ## MMX(uint8_t *dst, uint8_t *src, int stride){\ | ||||
DECLARE_ALIGNED(ALIGN, uint8_t, temp[SIZE*(SIZE<8?12:24)*2 + SIZE*SIZE]);\ | |||||
DECLARE_ALIGNED(ALIGN, uint8_t, temp)[SIZE*(SIZE<8?12:24)*2 + SIZE*SIZE];\ | |||||
uint8_t * const halfHV= temp;\ | uint8_t * const halfHV= temp;\ | ||||
int16_t * const halfV= (int16_t*)(temp + SIZE*SIZE);\ | int16_t * const halfV= (int16_t*)(temp + SIZE*SIZE);\ | ||||
assert(((int)temp & 7) == 0);\ | assert(((int)temp & 7) == 0);\ | ||||
@@ -2047,7 +2047,7 @@ static void OPNAME ## h264_qpel ## SIZE ## _mc12_ ## MMX(uint8_t *dst, uint8_t * | |||||
}\ | }\ | ||||
\ | \ | ||||
static void OPNAME ## h264_qpel ## SIZE ## _mc32_ ## MMX(uint8_t *dst, uint8_t *src, int stride){\ | static void OPNAME ## h264_qpel ## SIZE ## _mc32_ ## MMX(uint8_t *dst, uint8_t *src, int stride){\ | ||||
DECLARE_ALIGNED(ALIGN, uint8_t, temp[SIZE*(SIZE<8?12:24)*2 + SIZE*SIZE]);\ | |||||
DECLARE_ALIGNED(ALIGN, uint8_t, temp)[SIZE*(SIZE<8?12:24)*2 + SIZE*SIZE];\ | |||||
uint8_t * const halfHV= temp;\ | uint8_t * const halfHV= temp;\ | ||||
int16_t * const halfV= (int16_t*)(temp + SIZE*SIZE);\ | int16_t * const halfV= (int16_t*)(temp + SIZE*SIZE);\ | ||||
assert(((int)temp & 7) == 0);\ | assert(((int)temp & 7) == 0);\ | ||||
@@ -2110,7 +2110,7 @@ H264_MC_816(H264_MC_HV, ssse3) | |||||
#endif | #endif | ||||
/* rnd interleaved with rnd div 8, use p+1 to access rnd div 8 */ | /* rnd interleaved with rnd div 8, use p+1 to access rnd div 8 */ | ||||
DECLARE_ALIGNED_8(static const uint64_t, h264_rnd_reg[4]) = { | |||||
DECLARE_ALIGNED_8(static const uint64_t, h264_rnd_reg)[4] = { | |||||
0x0020002000200020ULL, 0x0004000400040004ULL, 0x001C001C001C001CULL, 0x0003000300030003ULL | 0x0020002000200020ULL, 0x0004000400040004ULL, 0x001C001C001C001CULL, 0x0003000300030003ULL | ||||
}; | }; | ||||
@@ -64,13 +64,13 @@ | |||||
//----------------------------------------------------------------------------- | //----------------------------------------------------------------------------- | ||||
DECLARE_ALIGNED(8, static const int16_t, tg_1_16[4*4]) = { | |||||
DECLARE_ALIGNED(8, static const int16_t, tg_1_16)[4*4] = { | |||||
13036,13036,13036,13036, // tg * (2<<16) + 0.5 | 13036,13036,13036,13036, // tg * (2<<16) + 0.5 | ||||
27146,27146,27146,27146, // tg * (2<<16) + 0.5 | 27146,27146,27146,27146, // tg * (2<<16) + 0.5 | ||||
-21746,-21746,-21746,-21746, // tg * (2<<16) + 0.5 | -21746,-21746,-21746,-21746, // tg * (2<<16) + 0.5 | ||||
23170,23170,23170,23170}; // cos * (2<<15) + 0.5 | 23170,23170,23170,23170}; // cos * (2<<15) + 0.5 | ||||
DECLARE_ALIGNED(8, static const int32_t, rounder_0[2*8]) = { | |||||
DECLARE_ALIGNED(8, static const int32_t, rounder_0)[2*8] = { | |||||
65536,65536, | 65536,65536, | ||||
3597,3597, | 3597,3597, | ||||
2260,2260, | 2260,2260, | ||||
@@ -140,7 +140,7 @@ DECLARE_ALIGNED(8, static const int32_t, rounder_0[2*8]) = { | |||||
//----------------------------------------------------------------------------- | //----------------------------------------------------------------------------- | ||||
// Table for rows 0,4 - constants are multiplied by cos_4_16 | // Table for rows 0,4 - constants are multiplied by cos_4_16 | ||||
DECLARE_ALIGNED(8, static const int16_t, tab_i_04_mmx[32*4]) = { | |||||
DECLARE_ALIGNED(8, static const int16_t, tab_i_04_mmx)[32*4] = { | |||||
16384,16384,16384,-16384, // movq-> w06 w04 w02 w00 | 16384,16384,16384,-16384, // movq-> w06 w04 w02 w00 | ||||
21407,8867,8867,-21407, // w07 w05 w03 w01 | 21407,8867,8867,-21407, // w07 w05 w03 w01 | ||||
16384,-16384,16384,16384, // w14 w12 w10 w08 | 16384,-16384,16384,16384, // w14 w12 w10 w08 | ||||
@@ -182,7 +182,7 @@ DECLARE_ALIGNED(8, static const int16_t, tab_i_04_mmx[32*4]) = { | |||||
//----------------------------------------------------------------------------- | //----------------------------------------------------------------------------- | ||||
// %3 for rows 0,4 - constants are multiplied by cos_4_16 | // %3 for rows 0,4 - constants are multiplied by cos_4_16 | ||||
DECLARE_ALIGNED(8, static const int16_t, tab_i_04_xmm[32*4]) = { | |||||
DECLARE_ALIGNED(8, static const int16_t, tab_i_04_xmm)[32*4] = { | |||||
16384,21407,16384,8867, // movq-> w05 w04 w01 w00 | 16384,21407,16384,8867, // movq-> w05 w04 w01 w00 | ||||
16384,8867,-16384,-21407, // w07 w06 w03 w02 | 16384,8867,-16384,-21407, // w07 w06 w03 w02 | ||||
16384,-8867,16384,-21407, // w13 w12 w09 w08 | 16384,-8867,16384,-21407, // w13 w12 w09 w08 | ||||
@@ -52,41 +52,41 @@ | |||||
#define ROW_SHIFT 11 | #define ROW_SHIFT 11 | ||||
#define COL_SHIFT 6 | #define COL_SHIFT 6 | ||||
DECLARE_ASM_CONST(16, int16_t, tan1[]) = {X8(13036)}; // tan( pi/16) | |||||
DECLARE_ASM_CONST(16, int16_t, tan2[]) = {X8(27146)}; // tan(2pi/16) = sqrt(2)-1 | |||||
DECLARE_ASM_CONST(16, int16_t, tan3[]) = {X8(43790)}; // tan(3pi/16)-1 | |||||
DECLARE_ASM_CONST(16, int16_t, sqrt2[])= {X8(23170)}; // 0.5/sqrt(2) | |||||
DECLARE_ASM_CONST(8, uint8_t, m127[]) = {X8(127)}; | |||||
DECLARE_ASM_CONST(16, int16_t, tan1)[] = {X8(13036)}; // tan( pi/16) | |||||
DECLARE_ASM_CONST(16, int16_t, tan2)[] = {X8(27146)}; // tan(2pi/16) = sqrt(2)-1 | |||||
DECLARE_ASM_CONST(16, int16_t, tan3)[] = {X8(43790)}; // tan(3pi/16)-1 | |||||
DECLARE_ASM_CONST(16, int16_t, sqrt2)[]= {X8(23170)}; // 0.5/sqrt(2) | |||||
DECLARE_ASM_CONST(8, uint8_t, m127)[] = {X8(127)}; | |||||
DECLARE_ASM_CONST(16, int16_t, iTab1[]) = { | |||||
DECLARE_ASM_CONST(16, int16_t, iTab1)[] = { | |||||
0x4000, 0x539f, 0xc000, 0xac61, 0x4000, 0xdd5d, 0x4000, 0xdd5d, | 0x4000, 0x539f, 0xc000, 0xac61, 0x4000, 0xdd5d, 0x4000, 0xdd5d, | ||||
0x4000, 0x22a3, 0x4000, 0x22a3, 0xc000, 0x539f, 0x4000, 0xac61, | 0x4000, 0x22a3, 0x4000, 0x22a3, 0xc000, 0x539f, 0x4000, 0xac61, | ||||
0x3249, 0x11a8, 0x4b42, 0xee58, 0x11a8, 0x4b42, 0x11a8, 0xcdb7, | 0x3249, 0x11a8, 0x4b42, 0xee58, 0x11a8, 0x4b42, 0x11a8, 0xcdb7, | ||||
0x58c5, 0x4b42, 0xa73b, 0xcdb7, 0x3249, 0xa73b, 0x4b42, 0xa73b | 0x58c5, 0x4b42, 0xa73b, 0xcdb7, 0x3249, 0xa73b, 0x4b42, 0xa73b | ||||
}; | }; | ||||
DECLARE_ASM_CONST(16, int16_t, iTab2[]) = { | |||||
DECLARE_ASM_CONST(16, int16_t, iTab2)[] = { | |||||
0x58c5, 0x73fc, 0xa73b, 0x8c04, 0x58c5, 0xcff5, 0x58c5, 0xcff5, | 0x58c5, 0x73fc, 0xa73b, 0x8c04, 0x58c5, 0xcff5, 0x58c5, 0xcff5, | ||||
0x58c5, 0x300b, 0x58c5, 0x300b, 0xa73b, 0x73fc, 0x58c5, 0x8c04, | 0x58c5, 0x300b, 0x58c5, 0x300b, 0xa73b, 0x73fc, 0x58c5, 0x8c04, | ||||
0x45bf, 0x187e, 0x6862, 0xe782, 0x187e, 0x6862, 0x187e, 0xba41, | 0x45bf, 0x187e, 0x6862, 0xe782, 0x187e, 0x6862, 0x187e, 0xba41, | ||||
0x7b21, 0x6862, 0x84df, 0xba41, 0x45bf, 0x84df, 0x6862, 0x84df | 0x7b21, 0x6862, 0x84df, 0xba41, 0x45bf, 0x84df, 0x6862, 0x84df | ||||
}; | }; | ||||
DECLARE_ASM_CONST(16, int16_t, iTab3[]) = { | |||||
DECLARE_ASM_CONST(16, int16_t, iTab3)[] = { | |||||
0x539f, 0x6d41, 0xac61, 0x92bf, 0x539f, 0xd2bf, 0x539f, 0xd2bf, | 0x539f, 0x6d41, 0xac61, 0x92bf, 0x539f, 0xd2bf, 0x539f, 0xd2bf, | ||||
0x539f, 0x2d41, 0x539f, 0x2d41, 0xac61, 0x6d41, 0x539f, 0x92bf, | 0x539f, 0x2d41, 0x539f, 0x2d41, 0xac61, 0x6d41, 0x539f, 0x92bf, | ||||
0x41b3, 0x1712, 0x6254, 0xe8ee, 0x1712, 0x6254, 0x1712, 0xbe4d, | 0x41b3, 0x1712, 0x6254, 0xe8ee, 0x1712, 0x6254, 0x1712, 0xbe4d, | ||||
0x73fc, 0x6254, 0x8c04, 0xbe4d, 0x41b3, 0x8c04, 0x6254, 0x8c04 | 0x73fc, 0x6254, 0x8c04, 0xbe4d, 0x41b3, 0x8c04, 0x6254, 0x8c04 | ||||
}; | }; | ||||
DECLARE_ASM_CONST(16, int16_t, iTab4[]) = { | |||||
DECLARE_ASM_CONST(16, int16_t, iTab4)[] = { | |||||
0x4b42, 0x6254, 0xb4be, 0x9dac, 0x4b42, 0xd746, 0x4b42, 0xd746, | 0x4b42, 0x6254, 0xb4be, 0x9dac, 0x4b42, 0xd746, 0x4b42, 0xd746, | ||||
0x4b42, 0x28ba, 0x4b42, 0x28ba, 0xb4be, 0x6254, 0x4b42, 0x9dac, | 0x4b42, 0x28ba, 0x4b42, 0x28ba, 0xb4be, 0x6254, 0x4b42, 0x9dac, | ||||
0x3b21, 0x14c3, 0x587e, 0xeb3d, 0x14c3, 0x587e, 0x14c3, 0xc4df, | 0x3b21, 0x14c3, 0x587e, 0xeb3d, 0x14c3, 0x587e, 0x14c3, 0xc4df, | ||||
0x6862, 0x587e, 0x979e, 0xc4df, 0x3b21, 0x979e, 0x587e, 0x979e | 0x6862, 0x587e, 0x979e, 0xc4df, 0x3b21, 0x979e, 0x587e, 0x979e | ||||
}; | }; | ||||
DECLARE_ASM_CONST(16, int32_t, walkenIdctRounders[]) = { | |||||
DECLARE_ASM_CONST(16, int32_t, walkenIdctRounders)[] = { | |||||
65536, 65536, 65536, 65536, | 65536, 65536, 65536, 65536, | ||||
3597, 3597, 3597, 3597, | 3597, 3597, 3597, 3597, | ||||
2260, 2260, 2260, 2260, | 2260, 2260, 2260, 2260, | ||||
@@ -26,7 +26,7 @@ | |||||
#include "libavcodec/dsputil.h" | #include "libavcodec/dsputil.h" | ||||
#include "dsputil_mmx.h" | #include "dsputil_mmx.h" | ||||
DECLARE_ASM_CONST(8, uint64_t, round_tab[3])={ | |||||
DECLARE_ASM_CONST(8, uint64_t, round_tab)[3]={ | |||||
0x0000000000000000ULL, | 0x0000000000000000ULL, | ||||
0x0001000100010001ULL, | 0x0001000100010001ULL, | ||||
0x0002000200020002ULL, | 0x0002000200020002ULL, | ||||
@@ -98,7 +98,7 @@ static int RENAME(dct_quantize)(MpegEncContext *s, | |||||
x86_reg last_non_zero_p1; | x86_reg last_non_zero_p1; | ||||
int level=0, q; //=0 is because gcc says uninitialized ... | int level=0, q; //=0 is because gcc says uninitialized ... | ||||
const uint16_t *qmat, *bias; | const uint16_t *qmat, *bias; | ||||
DECLARE_ALIGNED_16(int16_t, temp_block[64]); | |||||
DECLARE_ALIGNED_16(int16_t, temp_block)[64]; | |||||
assert((7&(int)(&temp_block[0])) == 0); //did gcc align it correctly? | assert((7&(int)(&temp_block[0])) == 0); //did gcc align it correctly? | ||||
@@ -24,7 +24,7 @@ | |||||
#include "dsputil_mmx.h" | #include "dsputil_mmx.h" | ||||
/* bias interleaved with bias div 8, use p+1 to access bias div 8 */ | /* bias interleaved with bias div 8, use p+1 to access bias div 8 */ | ||||
DECLARE_ALIGNED_8(static const uint64_t, rv40_bias_reg[4][8]) = { | |||||
DECLARE_ALIGNED_8(static const uint64_t, rv40_bias_reg)[4][8] = { | |||||
{ 0x0000000000000000ULL, 0x0000000000000000ULL, 0x0010001000100010ULL, 0x0002000200020002ULL, | { 0x0000000000000000ULL, 0x0000000000000000ULL, 0x0010001000100010ULL, 0x0002000200020002ULL, | ||||
0x0020002000200020ULL, 0x0004000400040004ULL, 0x0010001000100010ULL, 0x0002000200020002ULL }, | 0x0020002000200020ULL, 0x0004000400040004ULL, 0x0010001000100010ULL, 0x0002000200020002ULL }, | ||||
{ 0x0020002000200020ULL, 0x0004000400040004ULL, 0x001C001C001C001CULL, 0x0003000300030003ULL, | { 0x0020002000200020ULL, 0x0004000400040004ULL, 0x001C001C001C001CULL, 0x0003000300030003ULL, | ||||
@@ -52,7 +52,7 @@ | |||||
DECLARE_ASM_CONST(8, uint64_t, wm1010)= 0xFFFF0000FFFF0000ULL; | DECLARE_ASM_CONST(8, uint64_t, wm1010)= 0xFFFF0000FFFF0000ULL; | ||||
DECLARE_ASM_CONST(8, uint64_t, d40000)= 0x0000000000040000ULL; | DECLARE_ASM_CONST(8, uint64_t, d40000)= 0x0000000000040000ULL; | ||||
DECLARE_ALIGNED(8, static const int16_t, coeffs[])= { | |||||
DECLARE_ALIGNED(8, static const int16_t, coeffs)[]= { | |||||
1<<(ROW_SHIFT-1), 0, 1<<(ROW_SHIFT-1), 0, | 1<<(ROW_SHIFT-1), 0, 1<<(ROW_SHIFT-1), 0, | ||||
// 1<<(COL_SHIFT-1), 0, 1<<(COL_SHIFT-1), 0, | // 1<<(COL_SHIFT-1), 0, 1<<(COL_SHIFT-1), 0, | ||||
// 0, 1<<(COL_SHIFT-1-16), 0, 1<<(COL_SHIFT-1-16), | // 0, 1<<(COL_SHIFT-1-16), 0, 1<<(COL_SHIFT-1-16), | ||||
@@ -211,7 +211,7 @@ row[7] = input[13]; | |||||
static inline void idct(int16_t *block) | static inline void idct(int16_t *block) | ||||
{ | { | ||||
DECLARE_ALIGNED(8, int64_t, align_tmp[16]); | |||||
DECLARE_ALIGNED(8, int64_t, align_tmp)[16]; | |||||
int16_t * const temp= (int16_t*)align_tmp; | int16_t * const temp= (int16_t*)align_tmp; | ||||
__asm__ volatile( | __asm__ volatile( | ||||
@@ -25,7 +25,7 @@ | |||||
void ff_snow_horizontal_compose97i_sse2(IDWTELEM *b, int width){ | void ff_snow_horizontal_compose97i_sse2(IDWTELEM *b, int width){ | ||||
const int w2= (width+1)>>1; | const int w2= (width+1)>>1; | ||||
DECLARE_ALIGNED_16(IDWTELEM, temp[width>>1]); | |||||
DECLARE_ALIGNED_16(IDWTELEM, temp)[width>>1]; | |||||
const int w_l= (width>>1); | const int w_l= (width>>1); | ||||
const int w_r= w2 - 1; | const int w_r= w2 - 1; | ||||
int i; | int i; | ||||
@@ -442,7 +442,7 @@ static void OP ## vc1_mspel_mc(uint8_t *dst, const uint8_t *src, int stride,\ | |||||
static const int shift_value[] = { 0, 5, 1, 5 };\ | static const int shift_value[] = { 0, 5, 1, 5 };\ | ||||
int shift = (shift_value[hmode]+shift_value[vmode])>>1;\ | int shift = (shift_value[hmode]+shift_value[vmode])>>1;\ | ||||
int r;\ | int r;\ | ||||
DECLARE_ALIGNED_16(int16_t, tmp[12*8]);\ | |||||
DECLARE_ALIGNED_16(int16_t, tmp)[12*8];\ | |||||
\ | \ | ||||
r = (1<<(shift-1)) + rnd-1;\ | r = (1<<(shift-1)) + rnd-1;\ | ||||
vc1_put_shift_ver_16bits[vmode](tmp, src-1, stride, r, shift);\ | vc1_put_shift_ver_16bits[vmode](tmp, src-1, stride, r, shift);\ | ||||
@@ -26,7 +26,7 @@ | |||||
#include "libavcodec/dsputil.h" | #include "libavcodec/dsputil.h" | ||||
#include "dsputil_mmx.h" | #include "dsputil_mmx.h" | ||||
DECLARE_ALIGNED_16(const uint16_t, ff_vp3_idct_data[7 * 8]) = | |||||
DECLARE_ALIGNED_16(const uint16_t, ff_vp3_idct_data)[7 * 8] = | |||||
{ | { | ||||
64277,64277,64277,64277,64277,64277,64277,64277, | 64277,64277,64277,64277,64277,64277,64277,64277, | ||||
60547,60547,60547,60547,60547,60547,60547,60547, | 60547,60547,60547,60547,60547,60547,60547,60547, | ||||
@@ -339,10 +339,10 @@ static uint64_t rand64(void) { | |||||
} | } | ||||
static const uint8_t test_key[] = {0x12, 0x34, 0x56, 0x78, 0x9a, 0xbc, 0xde, 0xf0}; | static const uint8_t test_key[] = {0x12, 0x34, 0x56, 0x78, 0x9a, 0xbc, 0xde, 0xf0}; | ||||
static const DECLARE_ALIGNED(8, uint8_t, plain[]) = {0xfe, 0xdc, 0xba, 0x98, 0x76, 0x54, 0x32, 0x10}; | |||||
static const DECLARE_ALIGNED(8, uint8_t, crypt[]) = {0x4a, 0xb6, 0x5b, 0x3d, 0x4b, 0x06, 0x15, 0x18}; | |||||
static DECLARE_ALIGNED(8, uint8_t, tmp[8]); | |||||
static DECLARE_ALIGNED(8, uint8_t, large_buffer[10002][8]); | |||||
static const DECLARE_ALIGNED(8, uint8_t, plain)[] = {0xfe, 0xdc, 0xba, 0x98, 0x76, 0x54, 0x32, 0x10}; | |||||
static const DECLARE_ALIGNED(8, uint8_t, crypt)[] = {0x4a, 0xb6, 0x5b, 0x3d, 0x4b, 0x06, 0x15, 0x18}; | |||||
static DECLARE_ALIGNED(8, uint8_t, tmp)[8]; | |||||
static DECLARE_ALIGNED(8, uint8_t, large_buffer)[10002][8]; | |||||
static const uint8_t cbc_key[] = { | static const uint8_t cbc_key[] = { | ||||
0x01, 0x23, 0x45, 0x67, 0x89, 0xab, 0xcd, 0xef, | 0x01, 0x23, 0x45, 0x67, 0x89, 0xab, 0xcd, 0xef, | ||||
0x23, 0x45, 0x67, 0x89, 0xab, 0xcd, 0xef, 0x01, | 0x23, 0x45, 0x67, 0x89, 0xab, 0xcd, 0xef, 0x01, | ||||
@@ -62,7 +62,7 @@ static inline int vertClassify_altivec(uint8_t src[], int stride, PPContext *c) | |||||
vector by assuming (stride % 16) == 0, unfortunately | vector by assuming (stride % 16) == 0, unfortunately | ||||
this is not always true. | this is not always true. | ||||
*/ | */ | ||||
DECLARE_ALIGNED(16, short, data[8]) = | |||||
DECLARE_ALIGNED(16, short, data)[8] = | |||||
{ | { | ||||
((c->nonBQP*c->ppMode.baseDcDiff)>>8) + 1, | ((c->nonBQP*c->ppMode.baseDcDiff)>>8) + 1, | ||||
data[0] * 2 + 1, | data[0] * 2 + 1, | ||||
@@ -222,7 +222,7 @@ static inline void doVertLowPass_altivec(uint8_t *src, int stride, PPContext *c) | |||||
const vector signed int zero = vec_splat_s32(0); | const vector signed int zero = vec_splat_s32(0); | ||||
const int properStride = (stride % 16); | const int properStride = (stride % 16); | ||||
const int srcAlign = ((unsigned long)src2 % 16); | const int srcAlign = ((unsigned long)src2 % 16); | ||||
DECLARE_ALIGNED(16, short, qp[8]) = {c->QP}; | |||||
DECLARE_ALIGNED(16, short, qp)[8] = {c->QP}; | |||||
vector signed short vqp = vec_ld(0, qp); | vector signed short vqp = vec_ld(0, qp); | ||||
vector signed short vb0, vb1, vb2, vb3, vb4, vb5, vb6, vb7, vb8, vb9; | vector signed short vb0, vb1, vb2, vb3, vb4, vb5, vb6, vb7, vb8, vb9; | ||||
vector unsigned char vbA0, av_uninit(vbA1), av_uninit(vbA2), av_uninit(vbA3), av_uninit(vbA4), av_uninit(vbA5), av_uninit(vbA6), av_uninit(vbA7), av_uninit(vbA8), vbA9; | vector unsigned char vbA0, av_uninit(vbA1), av_uninit(vbA2), av_uninit(vbA3), av_uninit(vbA4), av_uninit(vbA5), av_uninit(vbA6), av_uninit(vbA7), av_uninit(vbA8), vbA9; | ||||
@@ -418,7 +418,7 @@ static inline void doVertDefFilter_altivec(uint8_t src[], int stride, PPContext | |||||
*/ | */ | ||||
uint8_t *src2 = src + stride*3; | uint8_t *src2 = src + stride*3; | ||||
const vector signed int zero = vec_splat_s32(0); | const vector signed int zero = vec_splat_s32(0); | ||||
DECLARE_ALIGNED(16, short, qp[8]) = {8*c->QP}; | |||||
DECLARE_ALIGNED(16, short, qp)[8] = {8*c->QP}; | |||||
vector signed short vqp = vec_splat( | vector signed short vqp = vec_splat( | ||||
(vector signed short)vec_ld(0, qp), 0); | (vector signed short)vec_ld(0, qp), 0); | ||||
@@ -538,7 +538,7 @@ static inline void dering_altivec(uint8_t src[], int stride, PPContext *c) { | |||||
src & stride :-( | src & stride :-( | ||||
*/ | */ | ||||
uint8_t *srcCopy = src; | uint8_t *srcCopy = src; | ||||
DECLARE_ALIGNED(16, uint8_t, dt[16]); | |||||
DECLARE_ALIGNED(16, uint8_t, dt)[16]; | |||||
const vector signed int zero = vec_splat_s32(0); | const vector signed int zero = vec_splat_s32(0); | ||||
vector unsigned char v_dt; | vector unsigned char v_dt; | ||||
dt[0] = deringThreshold; | dt[0] = deringThreshold; | ||||
@@ -602,7 +602,7 @@ static inline void dering_altivec(uint8_t src[], int stride, PPContext *c) { | |||||
v_avg = vec_avg(v_min, v_max); | v_avg = vec_avg(v_min, v_max); | ||||
} | } | ||||
DECLARE_ALIGNED(16, signed int, S[8]); | |||||
DECLARE_ALIGNED(16, signed int, S)[8]; | |||||
{ | { | ||||
const vector unsigned short mask1 = (vector unsigned short) | const vector unsigned short mask1 = (vector unsigned short) | ||||
{0x0001, 0x0002, 0x0004, 0x0008, | {0x0001, 0x0002, 0x0004, 0x0008, | ||||
@@ -698,7 +698,7 @@ static inline void dering_altivec(uint8_t src[], int stride, PPContext *c) { | |||||
/* I'm not sure the following is actually faster | /* I'm not sure the following is actually faster | ||||
than straight, unvectorized C code :-( */ | than straight, unvectorized C code :-( */ | ||||
DECLARE_ALIGNED(16, int, tQP2[4]); | |||||
DECLARE_ALIGNED(16, int, tQP2)[4]; | |||||
tQP2[0]= c->QP/2 + 1; | tQP2[0]= c->QP/2 + 1; | ||||
vector signed int vQP2 = vec_ld(0, tQP2); | vector signed int vQP2 = vec_ld(0, tQP2); | ||||
vQP2 = vec_splat(vQP2, 0); | vQP2 = vec_splat(vQP2, 0); | ||||
@@ -143,8 +143,8 @@ typedef struct PPContext{ | |||||
DECLARE_ALIGNED(8, uint64_t, pQPb); | DECLARE_ALIGNED(8, uint64_t, pQPb); | ||||
DECLARE_ALIGNED(8, uint64_t, pQPb2); | DECLARE_ALIGNED(8, uint64_t, pQPb2); | ||||
DECLARE_ALIGNED(8, uint64_t, mmxDcOffset[64]); | |||||
DECLARE_ALIGNED(8, uint64_t, mmxDcThreshold[64]); | |||||
DECLARE_ALIGNED(8, uint64_t, mmxDcOffset)[64]; | |||||
DECLARE_ALIGNED(8, uint64_t, mmxDcThreshold)[64]; | |||||
QP_STORE_T *stdQPTable; ///< used to fix MPEG2 style qscale | QP_STORE_T *stdQPTable; ///< used to fix MPEG2 style qscale | ||||
QP_STORE_T *nonBQPTable; | QP_STORE_T *nonBQPTable; | ||||
@@ -3514,7 +3514,7 @@ static void RENAME(postProcess)(const uint8_t src[], int srcStride, uint8_t dst[ | |||||
horizX1Filter(dstBlock-4, stride, QP); | horizX1Filter(dstBlock-4, stride, QP); | ||||
else if(mode & H_DEBLOCK){ | else if(mode & H_DEBLOCK){ | ||||
#if HAVE_ALTIVEC | #if HAVE_ALTIVEC | ||||
DECLARE_ALIGNED(16, unsigned char, tempBlock[272]); | |||||
DECLARE_ALIGNED(16, unsigned char, tempBlock)[272]; | |||||
transpose_16x8_char_toPackedAlign_altivec(tempBlock, dstBlock - (4 + 1), stride); | transpose_16x8_char_toPackedAlign_altivec(tempBlock, dstBlock - (4 + 1), stride); | ||||
const int t=vertClassify_altivec(tempBlock-48, 16, &c); | const int t=vertClassify_altivec(tempBlock-48, 16, &c); | ||||