Browse Source

Remove DECLARE_ALIGNED_{8,16} macros

These macros are redundant.  All uses are replaced with the generic
DECLARE_ALIGNED macro instead.

Originally committed as revision 22233 to svn://svn.ffmpeg.org/ffmpeg/trunk
tags/v0.6
Måns Rullgård 15 years ago
parent
commit
84dc2d8afa
58 changed files with 199 additions and 202 deletions
  1. +1
    -1
      libavcodec/4xm.c
  2. +4
    -4
      libavcodec/aac.h
  3. +3
    -3
      libavcodec/aacenc.h
  4. +6
    -6
      libavcodec/aactab.c
  5. +6
    -6
      libavcodec/ac3dec.h
  6. +1
    -1
      libavcodec/asv1.c
  7. +10
    -10
      libavcodec/atrac1.c
  8. +3
    -3
      libavcodec/atrac3.c
  9. +2
    -2
      libavcodec/bink.c
  10. +2
    -2
      libavcodec/binkaudio.c
  11. +1
    -1
      libavcodec/cavs.h
  12. +1
    -1
      libavcodec/cook.c
  13. +3
    -3
      libavcodec/dca.c
  14. +1
    -1
      libavcodec/dnxhddec.c
  15. +1
    -1
      libavcodec/dnxhdenc.h
  16. +2
    -2
      libavcodec/dsputil.c
  17. +5
    -5
      libavcodec/dsputil.h
  18. +1
    -1
      libavcodec/eamad.c
  19. +1
    -1
      libavcodec/eatgq.c
  20. +1
    -1
      libavcodec/eatqi.c
  21. +6
    -6
      libavcodec/h264.h
  22. +4
    -4
      libavcodec/h264_loopfilter.c
  23. +2
    -2
      libavcodec/imc.c
  24. +1
    -1
      libavcodec/mdec.c
  25. +1
    -1
      libavcodec/mimic.c
  26. +1
    -1
      libavcodec/mjpegdec.h
  27. +2
    -2
      libavcodec/mpc.h
  28. +2
    -2
      libavcodec/mpegaudio.h
  29. +1
    -1
      libavcodec/mpegaudiodec.c
  30. +2
    -2
      libavcodec/nellymoserdec.c
  31. +3
    -3
      libavcodec/nellymoserenc.c
  32. +2
    -2
      libavcodec/ppc/gmc_altivec.c
  33. +30
    -30
      libavcodec/ppc/h264_altivec.c
  34. +2
    -2
      libavcodec/ppc/h264_template_altivec.c
  35. +2
    -2
      libavcodec/ppc/mpegvideo_altivec.c
  36. +3
    -3
      libavcodec/qdm2.c
  37. +1
    -1
      libavcodec/rtjpeg.h
  38. +1
    -1
      libavcodec/rv34.h
  39. +1
    -1
      libavcodec/sipr.h
  40. +5
    -5
      libavcodec/sparc/simple_idct_vis.c
  41. +8
    -8
      libavcodec/vorbis_data.c
  42. +2
    -2
      libavcodec/vp3.c
  43. +1
    -1
      libavcodec/vp56.h
  44. +4
    -4
      libavcodec/wma.h
  45. +2
    -2
      libavcodec/wmaprodec.c
  46. +1
    -1
      libavcodec/wmv2.h
  47. +2
    -2
      libavcodec/x86/cavsdsp_mmx.c
  48. +2
    -2
      libavcodec/x86/dsputil_h264_template_mmx.c
  49. +31
    -31
      libavcodec/x86/dsputil_mmx.c
  50. +2
    -2
      libavcodec/x86/dsputilenc_mmx.c
  51. +1
    -1
      libavcodec/x86/fft_3dn2.c
  52. +10
    -10
      libavcodec/x86/h264dsp_mmx.c
  53. +1
    -1
      libavcodec/x86/mpegvideo_mmx_template.c
  54. +1
    -1
      libavcodec/x86/rv40dsp_mmx.c
  55. +1
    -1
      libavcodec/x86/snowdsp_mmx.c
  56. +2
    -2
      libavcodec/x86/vc1dsp_mmx.c
  57. +1
    -1
      libavcodec/x86/vp3dsp_sse2.c
  58. +0
    -3
      libavutil/mem.h

+ 1
- 1
libavcodec/4xm.c View File

@@ -137,7 +137,7 @@ typedef struct FourXContext{
int mv[256];
VLC pre_vlc;
int last_dc;
DECLARE_ALIGNED_16(DCTELEM, block)[6][64];
DECLARE_ALIGNED(16, DCTELEM, block)[6][64];
void *bitstream_buffer;
unsigned int bitstream_buffer_size;
int version;


+ 4
- 4
libavcodec/aac.h View File

@@ -214,9 +214,9 @@ typedef struct {
float sf[120]; ///< scalefactors
int sf_idx[128]; ///< scalefactor indices (used by encoder)
uint8_t zeroes[128]; ///< band is not coded (used by encoder)
DECLARE_ALIGNED_16(float, coeffs)[1024]; ///< coefficients for IMDCT
DECLARE_ALIGNED_16(float, saved)[1024]; ///< overlap
DECLARE_ALIGNED_16(float, ret)[1024]; ///< PCM output
DECLARE_ALIGNED(16, float, coeffs)[1024]; ///< coefficients for IMDCT
DECLARE_ALIGNED(16, float, saved)[1024]; ///< overlap
DECLARE_ALIGNED(16, float, ret)[1024]; ///< PCM output
PredictorState predictor_state[MAX_PREDICTORS];
} SingleChannelElement;

@@ -261,7 +261,7 @@ typedef struct {
* @defgroup temporary aligned temporary buffers (We do not want to have these on the stack.)
* @{
*/
DECLARE_ALIGNED_16(float, buf_mdct)[1024];
DECLARE_ALIGNED(16, float, buf_mdct)[1024];
/** @} */

/**


+ 3
- 3
libavcodec/aacenc.h View File

@@ -52,7 +52,7 @@ typedef struct AACEncContext {
FFTContext mdct1024; ///< long (1024 samples) frame transform context
FFTContext mdct128; ///< short (128 samples) frame transform context
DSPContext dsp;
DECLARE_ALIGNED_16(FFTSample, output)[2048]; ///< temporary buffer for MDCT input coefficients
DECLARE_ALIGNED(16, FFTSample, output)[2048]; ///< temporary buffer for MDCT input coefficients
int16_t* samples; ///< saved preprocessed input

int samplerate_index; ///< MPEG-4 samplerate index
@@ -64,8 +64,8 @@ typedef struct AACEncContext {
int cur_channel;
int last_frame;
float lambda;
DECLARE_ALIGNED_16(int, qcoefs)[96][2]; ///< quantized coefficients
DECLARE_ALIGNED_16(float, scoefs)[1024]; ///< scaled coefficients
DECLARE_ALIGNED(16, int, qcoefs)[96][2]; ///< quantized coefficients
DECLARE_ALIGNED(16, float, scoefs)[1024]; ///< scaled coefficients
} AACEncContext;

#endif /* AVCODEC_AACENC_H */

+ 6
- 6
libavcodec/aactab.c View File

@@ -409,7 +409,7 @@ const uint16_t ff_aac_spectral_sizes[11] = {
* 64.0f is a special value indicating the existence of an escape code in the
* bitstream.
*/
static const DECLARE_ALIGNED_16(float, codebook_vector0)[324] = {
static const DECLARE_ALIGNED(16, float, codebook_vector0)[324] = {
-1.0000000, -1.0000000, -1.0000000, -1.0000000,
-1.0000000, -1.0000000, -1.0000000, 0.0000000,
-1.0000000, -1.0000000, -1.0000000, 1.0000000,
@@ -493,7 +493,7 @@ static const DECLARE_ALIGNED_16(float, codebook_vector0)[324] = {
1.0000000, 1.0000000, 1.0000000, 1.0000000,
};

static const DECLARE_ALIGNED_16(float, codebook_vector2)[324] = {
static const DECLARE_ALIGNED(16, float, codebook_vector2)[324] = {
0.0000000, 0.0000000, 0.0000000, 0.0000000,
0.0000000, 0.0000000, 0.0000000, 1.0000000,
0.0000000, 0.0000000, 0.0000000, 2.5198421,
@@ -577,7 +577,7 @@ static const DECLARE_ALIGNED_16(float, codebook_vector2)[324] = {
2.5198421, 2.5198421, 2.5198421, 2.5198421,
};

static const DECLARE_ALIGNED_16(float, codebook_vector4)[162] = {
static const DECLARE_ALIGNED(16, float, codebook_vector4)[162] = {
-6.3496042, -6.3496042, -6.3496042, -4.3267487,
-6.3496042, -2.5198421, -6.3496042, -1.0000000,
-6.3496042, 0.0000000, -6.3496042, 1.0000000,
@@ -621,7 +621,7 @@ static const DECLARE_ALIGNED_16(float, codebook_vector4)[162] = {
6.3496042, 6.3496042,
};

static const DECLARE_ALIGNED_16(float, codebook_vector6)[128] = {
static const DECLARE_ALIGNED(16, float, codebook_vector6)[128] = {
0.0000000, 0.0000000, 0.0000000, 1.0000000,
0.0000000, 2.5198421, 0.0000000, 4.3267487,
0.0000000, 6.3496042, 0.0000000, 8.5498797,
@@ -656,7 +656,7 @@ static const DECLARE_ALIGNED_16(float, codebook_vector6)[128] = {
13.3905183, 10.9027236, 13.3905183, 13.3905183,
};

static const DECLARE_ALIGNED_16(float, codebook_vector8)[338] = {
static const DECLARE_ALIGNED(16, float, codebook_vector8)[338] = {
0.0000000, 0.0000000, 0.0000000, 1.0000000,
0.0000000, 2.5198421, 0.0000000, 4.3267487,
0.0000000, 6.3496042, 0.0000000, 8.5498797,
@@ -744,7 +744,7 @@ static const DECLARE_ALIGNED_16(float, codebook_vector8)[338] = {
27.4731418, 27.4731418,
};

static const DECLARE_ALIGNED_16(float, codebook_vector10)[578] = {
static const DECLARE_ALIGNED(16, float, codebook_vector10)[578] = {
0.0000000, 0.0000000, 0.0000000, 1.0000000,
0.0000000, 2.5198421, 0.0000000, 4.3267487,
0.0000000, 6.3496042, 0.0000000, 8.5498797,


+ 6
- 6
libavcodec/ac3dec.h View File

@@ -157,12 +157,12 @@ typedef struct {
///@}

///@defgroup arrays aligned arrays
DECLARE_ALIGNED_16(int, fixed_coeffs)[AC3_MAX_CHANNELS][AC3_MAX_COEFS]; ///> fixed-point transform coefficients
DECLARE_ALIGNED_16(float, transform_coeffs)[AC3_MAX_CHANNELS][AC3_MAX_COEFS]; ///< transform coefficients
DECLARE_ALIGNED_16(float, delay)[AC3_MAX_CHANNELS][AC3_BLOCK_SIZE]; ///< delay - added to the next block
DECLARE_ALIGNED_16(float, window)[AC3_BLOCK_SIZE]; ///< window coefficients
DECLARE_ALIGNED_16(float, tmp_output)[AC3_BLOCK_SIZE]; ///< temporary storage for output before windowing
DECLARE_ALIGNED_16(float, output)[AC3_MAX_CHANNELS][AC3_BLOCK_SIZE]; ///< output after imdct transform and windowing
DECLARE_ALIGNED(16, int, fixed_coeffs)[AC3_MAX_CHANNELS][AC3_MAX_COEFS]; ///> fixed-point transform coefficients
DECLARE_ALIGNED(16, float, transform_coeffs)[AC3_MAX_CHANNELS][AC3_MAX_COEFS]; ///< transform coefficients
DECLARE_ALIGNED(16, float, delay)[AC3_MAX_CHANNELS][AC3_BLOCK_SIZE]; ///< delay - added to the next block
DECLARE_ALIGNED(16, float, window)[AC3_BLOCK_SIZE]; ///< window coefficients
DECLARE_ALIGNED(16, float, tmp_output)[AC3_BLOCK_SIZE]; ///< temporary storage for output before windowing
DECLARE_ALIGNED(16, float, output)[AC3_MAX_CHANNELS][AC3_BLOCK_SIZE]; ///< output after imdct transform and windowing
///@}
} AC3DecodeContext;



+ 1
- 1
libavcodec/asv1.c View File

@@ -48,7 +48,7 @@ typedef struct ASV1Context{
int mb_height;
int mb_width2;
int mb_height2;
DECLARE_ALIGNED_16(DCTELEM, block)[6][64];
DECLARE_ALIGNED(16, DCTELEM, block)[6][64];
uint16_t intra_matrix[64];
int q_intra_matrix[64];
uint8_t *bitstream_buffer;


+ 10
- 10
libavcodec/atrac1.c View File

@@ -58,11 +58,11 @@ typedef struct {
int log2_block_count[AT1_QMF_BANDS]; ///< log2 number of blocks in a band
int num_bfus; ///< number of Block Floating Units
float* spectrum[2];
DECLARE_ALIGNED_16(float, spec1)[AT1_SU_SAMPLES]; ///< mdct buffer
DECLARE_ALIGNED_16(float, spec2)[AT1_SU_SAMPLES]; ///< mdct buffer
DECLARE_ALIGNED_16(float, fst_qmf_delay)[46]; ///< delay line for the 1st stacked QMF filter
DECLARE_ALIGNED_16(float, snd_qmf_delay)[46]; ///< delay line for the 2nd stacked QMF filter
DECLARE_ALIGNED_16(float, last_qmf_delay)[256+23]; ///< delay line for the last stacked QMF filter
DECLARE_ALIGNED(16, float, spec1)[AT1_SU_SAMPLES]; ///< mdct buffer
DECLARE_ALIGNED(16, float, spec2)[AT1_SU_SAMPLES]; ///< mdct buffer
DECLARE_ALIGNED(16, float, fst_qmf_delay)[46]; ///< delay line for the 1st stacked QMF filter
DECLARE_ALIGNED(16, float, snd_qmf_delay)[46]; ///< delay line for the 2nd stacked QMF filter
DECLARE_ALIGNED(16, float, last_qmf_delay)[256+23]; ///< delay line for the last stacked QMF filter
} AT1SUCtx;

/**
@@ -70,13 +70,13 @@ typedef struct {
*/
typedef struct {
AT1SUCtx SUs[AT1_MAX_CHANNELS]; ///< channel sound unit
DECLARE_ALIGNED_16(float, spec)[AT1_SU_SAMPLES]; ///< the mdct spectrum buffer
DECLARE_ALIGNED(16, float, spec)[AT1_SU_SAMPLES]; ///< the mdct spectrum buffer

DECLARE_ALIGNED_16(float, low)[256];
DECLARE_ALIGNED_16(float, mid)[256];
DECLARE_ALIGNED_16(float, high)[512];
DECLARE_ALIGNED(16, float, low)[256];
DECLARE_ALIGNED(16, float, mid)[256];
DECLARE_ALIGNED(16, float, high)[512];
float* bands[3];
DECLARE_ALIGNED_16(float, out_samples)[AT1_MAX_CHANNELS][AT1_SU_SAMPLES];
DECLARE_ALIGNED(16, float, out_samples)[AT1_MAX_CHANNELS][AT1_SU_SAMPLES];
FFTContext mdct_ctx[3];
int channels;
DSPContext dsp;


+ 3
- 3
libavcodec/atrac3.c View File

@@ -73,8 +73,8 @@ typedef struct {
int gcBlkSwitch;
gain_block gainBlock[2];

DECLARE_ALIGNED_16(float, spectrum)[1024];
DECLARE_ALIGNED_16(float, IMDCT_buf)[1024];
DECLARE_ALIGNED(16, float, spectrum)[1024];
DECLARE_ALIGNED(16, float, IMDCT_buf)[1024];

float delayBuf1[46]; ///<qmf delay buffers
float delayBuf2[46];
@@ -119,7 +119,7 @@ typedef struct {
//@}
} ATRAC3Context;

static DECLARE_ALIGNED_16(float,mdct_window)[512];
static DECLARE_ALIGNED(16, float,mdct_window)[512];
static VLC spectral_coeff_tab[7];
static float gain_tab1[16];
static float gain_tab2[31];


+ 2
- 2
libavcodec/bink.c View File

@@ -681,8 +681,8 @@ static int bink_decode_plane(BinkContext *c, GetBitContext *gb, int plane_idx,
int v, col[2];
const uint8_t *scan;
int xoff, yoff;
DECLARE_ALIGNED_16(DCTELEM, block[64]);
DECLARE_ALIGNED_16(uint8_t, ublock[64]);
DECLARE_ALIGNED(16, DCTELEM, block[64]);
DECLARE_ALIGNED(16, uint8_t, ublock[64]);
int coordmap[64];

const int stride = c->pic.linesize[plane_idx];


+ 2
- 2
libavcodec/binkaudio.c View File

@@ -49,8 +49,8 @@ typedef struct {
int num_bands;
unsigned int *bands;
float root;
DECLARE_ALIGNED_16(FFTSample, coeffs)[BINK_BLOCK_MAX_SIZE];
DECLARE_ALIGNED_16(short, previous)[BINK_BLOCK_MAX_SIZE / 16]; ///< coeffs from previous audio block
DECLARE_ALIGNED(16, FFTSample, coeffs)[BINK_BLOCK_MAX_SIZE];
DECLARE_ALIGNED(16, short, previous)[BINK_BLOCK_MAX_SIZE / 16]; ///< coeffs from previous audio block
float *coeffs_ptr[MAX_CHANNELS]; ///< pointers to the coeffs arrays for float_to_int16_interleave
union {
RDFTContext rdft;


+ 1
- 1
libavcodec/cavs.h View File

@@ -136,7 +136,7 @@ enum cavs_mv_loc {
MV_BWD_X3
};

DECLARE_ALIGNED_8(typedef, struct) {
DECLARE_ALIGNED(8, typedef, struct) {
int16_t x;
int16_t y;
int16_t dist;


+ 1
- 1
libavcodec/cook.c View File

@@ -150,7 +150,7 @@ typedef struct cook {
/* data buffers */

uint8_t* decoded_bytes_buffer;
DECLARE_ALIGNED_16(float,mono_mdct_output)[2048];
DECLARE_ALIGNED(16, float,mono_mdct_output)[2048];
float decode_buffer_1[1024];
float decode_buffer_2[1024];
float decode_buffer_0[1060]; /* static allocation for joint decode */


+ 3
- 3
libavcodec/dca.c View File

@@ -228,16 +228,16 @@ typedef struct {

/* Subband samples history (for ADPCM) */
float subband_samples_hist[DCA_PRIM_CHANNELS_MAX][DCA_SUBBANDS][4];
DECLARE_ALIGNED_16(float, subband_fir_hist)[DCA_PRIM_CHANNELS_MAX][512];
DECLARE_ALIGNED(16, float, subband_fir_hist)[DCA_PRIM_CHANNELS_MAX][512];
float subband_fir_noidea[DCA_PRIM_CHANNELS_MAX][32];
int hist_index[DCA_PRIM_CHANNELS_MAX];
DECLARE_ALIGNED_16(float, raXin)[32];
DECLARE_ALIGNED(16, float, raXin)[32];

int output; ///< type of output
float add_bias; ///< output bias
float scale_bias; ///< output scale

DECLARE_ALIGNED_16(float, samples)[1536]; /* 6 * 256 = 1536, might only need 5 */
DECLARE_ALIGNED(16, float, samples)[1536]; /* 6 * 256 = 1536, might only need 5 */
const float *samples_chanptr[6];

uint8_t dca_buffer[DCA_MAX_FRAME_SIZE];


+ 1
- 1
libavcodec/dnxhddec.c View File

@@ -39,7 +39,7 @@ typedef struct {
VLC ac_vlc, dc_vlc, run_vlc;
int last_dc[3];
DSPContext dsp;
DECLARE_ALIGNED_16(DCTELEM, blocks)[8][64];
DECLARE_ALIGNED(16, DCTELEM, blocks)[8][64];
ScanTable scantable;
const CIDEntry *cid_table;
} DNXHDContext;


+ 1
- 1
libavcodec/dnxhdenc.h View File

@@ -55,7 +55,7 @@ typedef struct DNXHDEncContext {
int interlaced;
int cur_field;

DECLARE_ALIGNED_16(DCTELEM, blocks)[8][64];
DECLARE_ALIGNED(16, DCTELEM, blocks)[8][64];

int (*qmatrix_c) [64];
int (*qmatrix_l) [64];


+ 2
- 2
libavcodec/dsputil.c View File

@@ -92,7 +92,7 @@ const uint8_t ff_zigzag248_direct[64] = {
};

/* not permutated inverse zigzag_direct + 1 for MMX quantizer */
DECLARE_ALIGNED_16(uint16_t, inv_zigzag_direct16)[64];
DECLARE_ALIGNED(16, uint16_t, inv_zigzag_direct16)[64];

const uint8_t ff_alternate_horizontal_scan[64] = {
0, 1, 2, 3, 8, 9, 16, 17,
@@ -4535,7 +4535,7 @@ av_cold void dsputil_static_init(void)

int ff_check_alignment(void){
static int did_fail=0;
DECLARE_ALIGNED_16(int, aligned);
DECLARE_ALIGNED(16, int, aligned);

if((intptr_t)&aligned & 15){
if(!did_fail){


+ 5
- 5
libavcodec/dsputil.h View File

@@ -709,13 +709,13 @@ static inline void emms(void)
t (*v) __VA_ARGS__ = (void *)FFALIGN((uintptr_t)la_##v, a)

#if HAVE_LOCAL_ALIGNED_8
# define LOCAL_ALIGNED_8(t, v, s, ...) DECLARE_ALIGNED_8(t, v) s __VA_ARGS__
# define LOCAL_ALIGNED_8(t, v, s, ...) DECLARE_ALIGNED(8, t, v) s __VA_ARGS__
#else
# define LOCAL_ALIGNED_8(t, v, s, ...) LOCAL_ALIGNED(8, t, v, s, __VA_ARGS__)
#endif

#if HAVE_LOCAL_ALIGNED_16
# define LOCAL_ALIGNED_16(t, v, s, ...) DECLARE_ALIGNED_16(t, v) s __VA_ARGS__
# define LOCAL_ALIGNED_16(t, v, s, ...) DECLARE_ALIGNED(16, t, v) s __VA_ARGS__
#else
# define LOCAL_ALIGNED_16(t, v, s, ...) LOCAL_ALIGNED(16, t, v, s, __VA_ARGS__)
#endif
@@ -769,11 +769,11 @@ typedef struct FFTContext {
#endif

#define COSTABLE(size) \
COSTABLE_CONST DECLARE_ALIGNED_16(FFTSample, ff_cos_##size)[size/2]
COSTABLE_CONST DECLARE_ALIGNED(16, FFTSample, ff_cos_##size)[size/2]
#define SINTABLE(size) \
SINTABLE_CONST DECLARE_ALIGNED_16(FFTSample, ff_sin_##size)[size/2]
SINTABLE_CONST DECLARE_ALIGNED(16, FFTSample, ff_sin_##size)[size/2]
#define SINETABLE(size) \
SINETABLE_CONST DECLARE_ALIGNED_16(float, ff_sine_##size)[size]
SINETABLE_CONST DECLARE_ALIGNED(16, float, ff_sine_##size)[size]
extern COSTABLE(16);
extern COSTABLE(32);
extern COSTABLE(64);


+ 1
- 1
libavcodec/eamad.c View File

@@ -46,7 +46,7 @@ typedef struct MadContext {
AVFrame last_frame;
void *bitstream_buf;
unsigned int bitstream_buf_size;
DECLARE_ALIGNED_16(DCTELEM, block)[64];
DECLARE_ALIGNED(16, DCTELEM, block)[64];
} MadContext;

static void bswap16_buf(uint16_t *dst, const uint16_t *src, int count)


+ 1
- 1
libavcodec/eatgq.c View File

@@ -42,7 +42,7 @@ typedef struct TgqContext {
int width,height;
ScanTable scantable;
int qtable[64];
DECLARE_ALIGNED_16(DCTELEM, block)[6][64];
DECLARE_ALIGNED(16, DCTELEM, block)[6][64];
} TgqContext;

static av_cold int tgq_decode_init(AVCodecContext *avctx){


+ 1
- 1
libavcodec/eatqi.c View File

@@ -40,7 +40,7 @@ typedef struct TqiContext {
AVFrame frame;
void *bitstream_buf;
unsigned int bitstream_buf_size;
DECLARE_ALIGNED_16(DCTELEM, block)[6][64];
DECLARE_ALIGNED(16, DCTELEM, block)[6][64];
} TqiContext;

static av_cold int tqi_decode_init(AVCodecContext *avctx)


+ 6
- 6
libavcodec/h264.h View File

@@ -299,7 +299,7 @@ typedef struct H264Context{
* non zero coeff count cache.
* is 64 if not available.
*/
DECLARE_ALIGNED_8(uint8_t, non_zero_count_cache)[6*8];
DECLARE_ALIGNED(8, uint8_t, non_zero_count_cache)[6*8];

/*
.UU.YYYY
@@ -312,8 +312,8 @@ typedef struct H264Context{
/**
* Motion vector cache.
*/
DECLARE_ALIGNED_16(int16_t, mv_cache)[2][5*8][2];
DECLARE_ALIGNED_8(int8_t, ref_cache)[2][5*8];
DECLARE_ALIGNED(16, int16_t, mv_cache)[2][5*8][2];
DECLARE_ALIGNED(8, int8_t, ref_cache)[2][5*8];
#define LIST_NOT_USED -1 //FIXME rename?
#define PART_NOT_AVAILABLE -2

@@ -366,7 +366,7 @@ typedef struct H264Context{
int mb_field_decoding_flag;
int mb_mbaff; ///< mb_aff_frame && mb_field_decoding_flag

DECLARE_ALIGNED_8(uint16_t, sub_mb_type)[4];
DECLARE_ALIGNED(8, uint16_t, sub_mb_type)[4];

//Weighted pred stuff
int use_weight;
@@ -403,7 +403,7 @@ typedef struct H264Context{
GetBitContext *intra_gb_ptr;
GetBitContext *inter_gb_ptr;

DECLARE_ALIGNED_16(DCTELEM, mb)[16*24];
DECLARE_ALIGNED(16, DCTELEM, mb)[16*24];
DCTELEM mb_padding[256]; ///< as mb is addressed by scantable[i] and scantable is uint8_t we can either check that i is not too large or ensure that there is some unused stuff after mb

/**
@@ -421,7 +421,7 @@ typedef struct H264Context{
uint8_t *chroma_pred_mode_table;
int last_qscale_diff;
uint8_t (*mvd_table[2])[2];
DECLARE_ALIGNED_16(uint8_t, mvd_cache)[2][5*8][2];
DECLARE_ALIGNED(16, uint8_t, mvd_cache)[2][5*8][2];
uint8_t *direct_table;
uint8_t direct_cache[5*8];



+ 4
- 4
libavcodec/h264_loopfilter.c View File

@@ -477,7 +477,7 @@ static av_always_inline void filter_mb_dir(H264Context *h, int mb_x, int mb_y, u
int j;

for(j=0; j<2; j++, mbn_xy += s->mb_stride){
DECLARE_ALIGNED_8(int16_t, bS)[4];
DECLARE_ALIGNED(8, int16_t, bS)[4];
int qp;
if( IS_INTRA(mb_type|s->current_picture.mb_type[mbn_xy]) ) {
AV_WN64A(bS, 0x0003000300030003ULL);
@@ -507,7 +507,7 @@ static av_always_inline void filter_mb_dir(H264Context *h, int mb_x, int mb_y, u
( h->chroma_qp[1] + get_chroma_qp( h, 1, s->current_picture.qscale_table[mbn_xy] ) + 1 ) >> 1, h);
}
}else{
DECLARE_ALIGNED_8(int16_t, bS)[4];
DECLARE_ALIGNED(8, int16_t, bS)[4];
int qp;

if( IS_INTRA(mb_type|mbm_type)) {
@@ -584,7 +584,7 @@ static av_always_inline void filter_mb_dir(H264Context *h, int mb_x, int mb_y, u

/* Calculate bS */
for( edge = 1; edge < edges; edge++ ) {
DECLARE_ALIGNED_8(int16_t, bS)[4];
DECLARE_ALIGNED(8, int16_t, bS)[4];
int qp;

if( IS_8x8DCT(mb_type & (edge<<24)) ) // (edge&1) && IS_8x8DCT(mb_type)
@@ -669,7 +669,7 @@ void ff_h264_filter_mb( H264Context *h, int mb_x, int mb_y, uint8_t *img_y, uint
/* First vertical edge is different in MBAFF frames
* There are 8 different bS to compute and 2 different Qp
*/
DECLARE_ALIGNED_8(int16_t, bS)[8];
DECLARE_ALIGNED(8, int16_t, bS)[8];
int qp[2];
int bqp[2];
int rqp[2];


+ 2
- 2
libavcodec/imc.c View File

@@ -84,8 +84,8 @@ typedef struct {

DSPContext dsp;
FFTContext fft;
DECLARE_ALIGNED_16(FFTComplex, samples)[COEFFS/2];
DECLARE_ALIGNED_16(float, out_samples)[COEFFS];
DECLARE_ALIGNED(16, FFTComplex, samples)[COEFFS/2];
DECLARE_ALIGNED(16, float, out_samples)[COEFFS];
} IMCContext;

static VLC huffman_vlc[4][4];


+ 1
- 1
libavcodec/mdec.c View File

@@ -44,7 +44,7 @@ typedef struct MDECContext{
int mb_width;
int mb_height;
int mb_x, mb_y;
DECLARE_ALIGNED_16(DCTELEM, block)[6][64];
DECLARE_ALIGNED(16, DCTELEM, block)[6][64];
uint8_t *bitstream_buffer;
unsigned int bitstream_buffer_size;
int block_last_index[6];


+ 1
- 1
libavcodec/mimic.c View File

@@ -45,7 +45,7 @@ typedef struct {
AVFrame buf_ptrs [16];
AVPicture flipped_ptrs[16];

DECLARE_ALIGNED_16(DCTELEM, dct_block)[64];
DECLARE_ALIGNED(16, DCTELEM, dct_block)[64];

GetBitContext gb;
ScanTable scantable;


+ 1
- 1
libavcodec/mjpegdec.h View File

@@ -84,7 +84,7 @@ typedef struct MJpegDecodeContext {
int got_picture; ///< we found a SOF and picture is valid, too.
int linesize[MAX_COMPONENTS]; ///< linesize << interlaced
int8_t *qscale_table;
DECLARE_ALIGNED_16(DCTELEM, block)[64];
DECLARE_ALIGNED(16, DCTELEM, block)[64];
DCTELEM (*blocks[MAX_COMPONENTS])[64]; ///< intermediate sums (progressive mode)
uint8_t *last_nnz[MAX_COMPONENTS];
uint64_t coefs_finished[MAX_COMPONENTS]; ///< bitmask of which coefs have been completely decoded (progressive mode)


+ 2
- 2
libavcodec/mpc.h View File

@@ -65,9 +65,9 @@ typedef struct {
AVLFG rnd;
int frames_to_skip;
/* for synthesis */
DECLARE_ALIGNED_16(MPA_INT, synth_buf)[MPA_MAX_CHANNELS][512*2];
DECLARE_ALIGNED(16, MPA_INT, synth_buf)[MPA_MAX_CHANNELS][512*2];
int synth_buf_offset[MPA_MAX_CHANNELS];
DECLARE_ALIGNED_16(int32_t, sb_samples)[MPA_MAX_CHANNELS][36][SBLIMIT];
DECLARE_ALIGNED(16, int32_t, sb_samples)[MPA_MAX_CHANNELS][36][SBLIMIT];
} MPCContext;

void ff_mpc_init(void);


+ 2
- 2
libavcodec/mpegaudio.h View File

@@ -132,9 +132,9 @@ typedef struct MPADecodeContext {
uint32_t free_format_next_header;
GetBitContext gb;
GetBitContext in_gb;
DECLARE_ALIGNED_16(MPA_INT, synth_buf)[MPA_MAX_CHANNELS][512 * 2];
DECLARE_ALIGNED(16, MPA_INT, synth_buf)[MPA_MAX_CHANNELS][512 * 2];
int synth_buf_offset[MPA_MAX_CHANNELS];
DECLARE_ALIGNED_16(int32_t, sb_samples)[MPA_MAX_CHANNELS][36][SBLIMIT];
DECLARE_ALIGNED(16, int32_t, sb_samples)[MPA_MAX_CHANNELS][36][SBLIMIT];
int32_t mdct_buf[MPA_MAX_CHANNELS][SBLIMIT * 18]; /* previous samples, for layer 3 MDCT */
GranuleDef granules[2][2]; /* Used in Layer 3 */
#ifdef DEBUG


+ 1
- 1
libavcodec/mpegaudiodec.c View File

@@ -95,7 +95,7 @@ static const int32_t scale_factor_mult2[3][3] = {
SCALE_GEN(4.0 / 9.0), /* 9 steps */
};

DECLARE_ALIGNED_16(MPA_INT, ff_mpa_synth_window)[512];
DECLARE_ALIGNED(16, MPA_INT, ff_mpa_synth_window)[512];

/**
* Convert region offsets to region sizes and truncate


+ 2
- 2
libavcodec/nellymoserdec.c View File

@@ -43,7 +43,7 @@

typedef struct NellyMoserDecodeContext {
AVCodecContext* avctx;
DECLARE_ALIGNED_16(float,float_buf)[NELLY_SAMPLES];
DECLARE_ALIGNED(16, float,float_buf)[NELLY_SAMPLES];
float state[128];
AVLFG random_state;
GetBitContext gb;
@@ -51,7 +51,7 @@ typedef struct NellyMoserDecodeContext {
float scale_bias;
DSPContext dsp;
FFTContext imdct_ctx;
DECLARE_ALIGNED_16(float,imdct_out)[NELLY_BUF_LEN * 2];
DECLARE_ALIGNED(16, float,imdct_out)[NELLY_BUF_LEN * 2];
} NellyMoserDecodeContext;

static void overlap_and_window(NellyMoserDecodeContext *s, float *state, float *audio, float *a_in)


+ 3
- 3
libavcodec/nellymoserenc.c View File

@@ -53,9 +53,9 @@ typedef struct NellyMoserEncodeContext {
int have_saved;
DSPContext dsp;
FFTContext mdct_ctx;
DECLARE_ALIGNED_16(float, mdct_out)[NELLY_SAMPLES];
DECLARE_ALIGNED_16(float, in_buff)[NELLY_SAMPLES];
DECLARE_ALIGNED_16(float, buf)[2][3 * NELLY_BUF_LEN]; ///< sample buffer
DECLARE_ALIGNED(16, float, mdct_out)[NELLY_SAMPLES];
DECLARE_ALIGNED(16, float, in_buff)[NELLY_SAMPLES];
DECLARE_ALIGNED(16, float, buf)[2][3 * NELLY_BUF_LEN]; ///< sample buffer
float (*opt )[NELLY_BANDS];
uint8_t (*path)[NELLY_BANDS];
} NellyMoserEncodeContext;


+ 2
- 2
libavcodec/ppc/gmc_altivec.c View File

@@ -33,8 +33,8 @@
void gmc1_altivec(uint8_t *dst /* align 8 */, uint8_t *src /* align1 */, int stride, int h, int x16, int y16, int rounder)
{
POWERPC_PERF_DECLARE(altivec_gmc1_num, GMC1_PERF_COND);
const DECLARE_ALIGNED_16(unsigned short, rounder_a) = rounder;
const DECLARE_ALIGNED_16(unsigned short, ABCD)[8] =
const DECLARE_ALIGNED(16, unsigned short, rounder_a) = rounder;
const DECLARE_ALIGNED(16, unsigned short, ABCD)[8] =
{
(16-x16)*(16-y16), /* A */
( x16)*(16-y16), /* B */


+ 30
- 30
libavcodec/ppc/h264_altivec.c View File

@@ -79,7 +79,7 @@ static void OPNAME ## h264_qpel ## SIZE ## _mc00_ ## CODETYPE (uint8_t *dst, uin
}\
\
static void OPNAME ## h264_qpel ## SIZE ## _mc10_ ## CODETYPE(uint8_t *dst, uint8_t *src, int stride){ \
DECLARE_ALIGNED_16(uint8_t, half)[SIZE*SIZE];\
DECLARE_ALIGNED(16, uint8_t, half)[SIZE*SIZE];\
put_h264_qpel ## SIZE ## _h_lowpass_ ## CODETYPE(half, src, SIZE, stride);\
OPNAME ## pixels ## SIZE ## _l2_ ## CODETYPE(dst, src, half, stride, stride, SIZE);\
}\
@@ -89,13 +89,13 @@ static void OPNAME ## h264_qpel ## SIZE ## _mc20_ ## CODETYPE(uint8_t *dst, uint
}\
\
static void OPNAME ## h264_qpel ## SIZE ## _mc30_ ## CODETYPE(uint8_t *dst, uint8_t *src, int stride){\
DECLARE_ALIGNED_16(uint8_t, half)[SIZE*SIZE];\
DECLARE_ALIGNED(16, uint8_t, half)[SIZE*SIZE];\
put_h264_qpel ## SIZE ## _h_lowpass_ ## CODETYPE(half, src, SIZE, stride);\
OPNAME ## pixels ## SIZE ## _l2_ ## CODETYPE(dst, src+1, half, stride, stride, SIZE);\
}\
\
static void OPNAME ## h264_qpel ## SIZE ## _mc01_ ## CODETYPE(uint8_t *dst, uint8_t *src, int stride){\
DECLARE_ALIGNED_16(uint8_t, half)[SIZE*SIZE];\
DECLARE_ALIGNED(16, uint8_t, half)[SIZE*SIZE];\
put_h264_qpel ## SIZE ## _v_lowpass_ ## CODETYPE(half, src, SIZE, stride);\
OPNAME ## pixels ## SIZE ## _l2_ ## CODETYPE(dst, src, half, stride, stride, SIZE);\
}\
@@ -105,79 +105,79 @@ static void OPNAME ## h264_qpel ## SIZE ## _mc02_ ## CODETYPE(uint8_t *dst, uint
}\
\
static void OPNAME ## h264_qpel ## SIZE ## _mc03_ ## CODETYPE(uint8_t *dst, uint8_t *src, int stride){\
DECLARE_ALIGNED_16(uint8_t, half)[SIZE*SIZE];\
DECLARE_ALIGNED(16, uint8_t, half)[SIZE*SIZE];\
put_h264_qpel ## SIZE ## _v_lowpass_ ## CODETYPE(half, src, SIZE, stride);\
OPNAME ## pixels ## SIZE ## _l2_ ## CODETYPE(dst, src+stride, half, stride, stride, SIZE);\
}\
\
static void OPNAME ## h264_qpel ## SIZE ## _mc11_ ## CODETYPE(uint8_t *dst, uint8_t *src, int stride){\
DECLARE_ALIGNED_16(uint8_t, halfH)[SIZE*SIZE];\
DECLARE_ALIGNED_16(uint8_t, halfV)[SIZE*SIZE];\
DECLARE_ALIGNED(16, uint8_t, halfH)[SIZE*SIZE];\
DECLARE_ALIGNED(16, uint8_t, halfV)[SIZE*SIZE];\
put_h264_qpel ## SIZE ## _h_lowpass_ ## CODETYPE(halfH, src, SIZE, stride);\
put_h264_qpel ## SIZE ## _v_lowpass_ ## CODETYPE(halfV, src, SIZE, stride);\
OPNAME ## pixels ## SIZE ## _l2_ ## CODETYPE(dst, halfH, halfV, stride, SIZE, SIZE);\
}\
\
static void OPNAME ## h264_qpel ## SIZE ## _mc31_ ## CODETYPE(uint8_t *dst, uint8_t *src, int stride){\
DECLARE_ALIGNED_16(uint8_t, halfH)[SIZE*SIZE];\
DECLARE_ALIGNED_16(uint8_t, halfV)[SIZE*SIZE];\
DECLARE_ALIGNED(16, uint8_t, halfH)[SIZE*SIZE];\
DECLARE_ALIGNED(16, uint8_t, halfV)[SIZE*SIZE];\
put_h264_qpel ## SIZE ## _h_lowpass_ ## CODETYPE(halfH, src, SIZE, stride);\
put_h264_qpel ## SIZE ## _v_lowpass_ ## CODETYPE(halfV, src+1, SIZE, stride);\
OPNAME ## pixels ## SIZE ## _l2_ ## CODETYPE(dst, halfH, halfV, stride, SIZE, SIZE);\
}\
\
static void OPNAME ## h264_qpel ## SIZE ## _mc13_ ## CODETYPE(uint8_t *dst, uint8_t *src, int stride){\
DECLARE_ALIGNED_16(uint8_t, halfH)[SIZE*SIZE];\
DECLARE_ALIGNED_16(uint8_t, halfV)[SIZE*SIZE];\
DECLARE_ALIGNED(16, uint8_t, halfH)[SIZE*SIZE];\
DECLARE_ALIGNED(16, uint8_t, halfV)[SIZE*SIZE];\
put_h264_qpel ## SIZE ## _h_lowpass_ ## CODETYPE(halfH, src + stride, SIZE, stride);\
put_h264_qpel ## SIZE ## _v_lowpass_ ## CODETYPE(halfV, src, SIZE, stride);\
OPNAME ## pixels ## SIZE ## _l2_ ## CODETYPE(dst, halfH, halfV, stride, SIZE, SIZE);\
}\
\
static void OPNAME ## h264_qpel ## SIZE ## _mc33_ ## CODETYPE(uint8_t *dst, uint8_t *src, int stride){\
DECLARE_ALIGNED_16(uint8_t, halfH)[SIZE*SIZE];\
DECLARE_ALIGNED_16(uint8_t, halfV)[SIZE*SIZE];\
DECLARE_ALIGNED(16, uint8_t, halfH)[SIZE*SIZE];\
DECLARE_ALIGNED(16, uint8_t, halfV)[SIZE*SIZE];\
put_h264_qpel ## SIZE ## _h_lowpass_ ## CODETYPE(halfH, src + stride, SIZE, stride);\
put_h264_qpel ## SIZE ## _v_lowpass_ ## CODETYPE(halfV, src+1, SIZE, stride);\
OPNAME ## pixels ## SIZE ## _l2_ ## CODETYPE(dst, halfH, halfV, stride, SIZE, SIZE);\
}\
\
static void OPNAME ## h264_qpel ## SIZE ## _mc22_ ## CODETYPE(uint8_t *dst, uint8_t *src, int stride){\
DECLARE_ALIGNED_16(int16_t, tmp)[SIZE*(SIZE+8)];\
DECLARE_ALIGNED(16, int16_t, tmp)[SIZE*(SIZE+8)];\
OPNAME ## h264_qpel ## SIZE ## _hv_lowpass_ ## CODETYPE(dst, tmp, src, stride, SIZE, stride);\
}\
\
static void OPNAME ## h264_qpel ## SIZE ## _mc21_ ## CODETYPE(uint8_t *dst, uint8_t *src, int stride){\
DECLARE_ALIGNED_16(uint8_t, halfH)[SIZE*SIZE];\
DECLARE_ALIGNED_16(uint8_t, halfHV)[SIZE*SIZE];\
DECLARE_ALIGNED_16(int16_t, tmp)[SIZE*(SIZE+8)];\
DECLARE_ALIGNED(16, uint8_t, halfH)[SIZE*SIZE];\
DECLARE_ALIGNED(16, uint8_t, halfHV)[SIZE*SIZE];\
DECLARE_ALIGNED(16, int16_t, tmp)[SIZE*(SIZE+8)];\
put_h264_qpel ## SIZE ## _h_lowpass_ ## CODETYPE(halfH, src, SIZE, stride);\
put_h264_qpel ## SIZE ## _hv_lowpass_ ## CODETYPE(halfHV, tmp, src, SIZE, SIZE, stride);\
OPNAME ## pixels ## SIZE ## _l2_ ## CODETYPE(dst, halfH, halfHV, stride, SIZE, SIZE);\
}\
\
static void OPNAME ## h264_qpel ## SIZE ## _mc23_ ## CODETYPE(uint8_t *dst, uint8_t *src, int stride){\
DECLARE_ALIGNED_16(uint8_t, halfH)[SIZE*SIZE];\
DECLARE_ALIGNED_16(uint8_t, halfHV)[SIZE*SIZE];\
DECLARE_ALIGNED_16(int16_t, tmp)[SIZE*(SIZE+8)];\
DECLARE_ALIGNED(16, uint8_t, halfH)[SIZE*SIZE];\
DECLARE_ALIGNED(16, uint8_t, halfHV)[SIZE*SIZE];\
DECLARE_ALIGNED(16, int16_t, tmp)[SIZE*(SIZE+8)];\
put_h264_qpel ## SIZE ## _h_lowpass_ ## CODETYPE(halfH, src + stride, SIZE, stride);\
put_h264_qpel ## SIZE ## _hv_lowpass_ ## CODETYPE(halfHV, tmp, src, SIZE, SIZE, stride);\
OPNAME ## pixels ## SIZE ## _l2_ ## CODETYPE(dst, halfH, halfHV, stride, SIZE, SIZE);\
}\
\
static void OPNAME ## h264_qpel ## SIZE ## _mc12_ ## CODETYPE(uint8_t *dst, uint8_t *src, int stride){\
DECLARE_ALIGNED_16(uint8_t, halfV)[SIZE*SIZE];\
DECLARE_ALIGNED_16(uint8_t, halfHV)[SIZE*SIZE];\
DECLARE_ALIGNED_16(int16_t, tmp)[SIZE*(SIZE+8)];\
DECLARE_ALIGNED(16, uint8_t, halfV)[SIZE*SIZE];\
DECLARE_ALIGNED(16, uint8_t, halfHV)[SIZE*SIZE];\
DECLARE_ALIGNED(16, int16_t, tmp)[SIZE*(SIZE+8)];\
put_h264_qpel ## SIZE ## _v_lowpass_ ## CODETYPE(halfV, src, SIZE, stride);\
put_h264_qpel ## SIZE ## _hv_lowpass_ ## CODETYPE(halfHV, tmp, src, SIZE, SIZE, stride);\
OPNAME ## pixels ## SIZE ## _l2_ ## CODETYPE(dst, halfV, halfHV, stride, SIZE, SIZE);\
}\
\
static void OPNAME ## h264_qpel ## SIZE ## _mc32_ ## CODETYPE(uint8_t *dst, uint8_t *src, int stride){\
DECLARE_ALIGNED_16(uint8_t, halfV)[SIZE*SIZE];\
DECLARE_ALIGNED_16(uint8_t, halfHV)[SIZE*SIZE];\
DECLARE_ALIGNED_16(int16_t, tmp)[SIZE*(SIZE+8)];\
DECLARE_ALIGNED(16, uint8_t, halfV)[SIZE*SIZE];\
DECLARE_ALIGNED(16, uint8_t, halfHV)[SIZE*SIZE];\
DECLARE_ALIGNED(16, int16_t, tmp)[SIZE*(SIZE+8)];\
put_h264_qpel ## SIZE ## _v_lowpass_ ## CODETYPE(halfV, src+1, SIZE, stride);\
put_h264_qpel ## SIZE ## _hv_lowpass_ ## CODETYPE(halfHV, tmp, src, SIZE, SIZE, stride);\
OPNAME ## pixels ## SIZE ## _l2_ ## CODETYPE(dst, halfV, halfHV, stride, SIZE, SIZE);\
@@ -480,7 +480,7 @@ static av_always_inline void h264_idct_dc_add_internal(uint8_t *dst, DCTELEM *bl
vec_s16 dc16;
vec_u8 dcplus, dcminus, v0, v1, v2, v3, aligner;
LOAD_ZERO;
DECLARE_ALIGNED_16(int, dc);
DECLARE_ALIGNED(16, int, dc);
int i;

dc = (block[0] + 32) >> 6;
@@ -590,7 +590,7 @@ static void ff_h264_idct_add8_altivec(uint8_t **dest, const int *block_offset, D
static inline void write16x4(uint8_t *dst, int dst_stride,
register vec_u8 r0, register vec_u8 r1,
register vec_u8 r2, register vec_u8 r3) {
DECLARE_ALIGNED_16(unsigned char, result)[64];
DECLARE_ALIGNED(16, unsigned char, result)[64];
uint32_t *src_int = (uint32_t *)result, *dst_int = (uint32_t *)dst;
int int_dst_stride = dst_stride/4;

@@ -770,7 +770,7 @@ static inline vec_u8 h264_deblock_q1(register vec_u8 p0,
}

#define h264_loop_filter_luma_altivec(p2, p1, p0, q0, q1, q2, alpha, beta, tc0) { \
DECLARE_ALIGNED_16(unsigned char, temp)[16]; \
DECLARE_ALIGNED(16, unsigned char, temp)[16]; \
register vec_u8 alphavec; \
register vec_u8 betavec; \
register vec_u8 mask; \
@@ -850,7 +850,7 @@ void weight_h264_WxH_altivec(uint8_t *block, int stride, int log2_denom, int wei
vec_u8 vblock;
vec_s16 vtemp, vweight, voffset, v0, v1;
vec_u16 vlog2_denom;
DECLARE_ALIGNED_16(int32_t, temp)[4];
DECLARE_ALIGNED(16, int32_t, temp)[4];
LOAD_ZERO;

offset <<= log2_denom;
@@ -896,7 +896,7 @@ void biweight_h264_WxH_altivec(uint8_t *dst, uint8_t *src, int stride, int log2_
vec_u8 vsrc, vdst;
vec_s16 vtemp, vweights, vweightd, voffset, v0, v1, v2, v3;
vec_u16 vlog2_denom;
DECLARE_ALIGNED_16(int32_t, temp)[4];
DECLARE_ALIGNED(16, int32_t, temp)[4];
LOAD_ZERO;

offset = ((offset + 1) | 1) << log2_denom;


+ 2
- 2
libavcodec/ppc/h264_template_altivec.c View File

@@ -78,7 +78,7 @@
void PREFIX_h264_chroma_mc8_altivec(uint8_t * dst, uint8_t * src,
int stride, int h, int x, int y) {
POWERPC_PERF_DECLARE(PREFIX_h264_chroma_mc8_num, 1);
DECLARE_ALIGNED_16(signed int, ABCD)[4] =
DECLARE_ALIGNED(16, signed int, ABCD)[4] =
{((8 - x) * (8 - y)),
(( x) * (8 - y)),
((8 - x) * ( y)),
@@ -208,7 +208,7 @@ void PREFIX_h264_chroma_mc8_altivec(uint8_t * dst, uint8_t * src,

/* this code assume that stride % 16 == 0 */
void PREFIX_no_rnd_vc1_chroma_mc8_altivec(uint8_t * dst, uint8_t * src, int stride, int h, int x, int y) {
DECLARE_ALIGNED_16(signed int, ABCD)[4] =
DECLARE_ALIGNED(16, signed int, ABCD)[4] =
{((8 - x) * (8 - y)),
(( x) * (8 - y)),
((8 - x) * ( y)),


+ 2
- 2
libavcodec/ppc/mpegvideo_altivec.c View File

@@ -506,8 +506,8 @@ POWERPC_PERF_START_COUNT(altivec_dct_unquantize_h263_num, 1);

{
register const vector signed short vczero = (const vector signed short)vec_splat_s16(0);
DECLARE_ALIGNED_16(short, qmul8) = qmul;
DECLARE_ALIGNED_16(short, qadd8) = qadd;
DECLARE_ALIGNED(16, short, qmul8) = qmul;
DECLARE_ALIGNED(16, short, qadd8) = qadd;
register vector signed short blockv, qmulv, qaddv, nqaddv, temp1;
register vector bool short blockv_null, blockv_neg;
register short backup_0 = block[0];


+ 3
- 3
libavcodec/qdm2.c View File

@@ -122,7 +122,7 @@ typedef struct {
} FFTCoefficient;

typedef struct {
DECLARE_ALIGNED_16(QDM2Complex, complex)[MPA_MAX_CHANNELS][256];
DECLARE_ALIGNED(16, QDM2Complex, complex)[MPA_MAX_CHANNELS][256];
} QDM2FFT;

/**
@@ -172,9 +172,9 @@ typedef struct {
float output_buffer[1024];

/// Synthesis filter
DECLARE_ALIGNED_16(MPA_INT, synth_buf)[MPA_MAX_CHANNELS][512*2];
DECLARE_ALIGNED(16, MPA_INT, synth_buf)[MPA_MAX_CHANNELS][512*2];
int synth_buf_offset[MPA_MAX_CHANNELS];
DECLARE_ALIGNED_16(int32_t, sb_samples)[MPA_MAX_CHANNELS][128][SBLIMIT];
DECLARE_ALIGNED(16, int32_t, sb_samples)[MPA_MAX_CHANNELS][128][SBLIMIT];

/// Mixed temporary data used in decoding
float tone_level[MPA_MAX_CHANNELS][30][64];


+ 1
- 1
libavcodec/rtjpeg.h View File

@@ -31,7 +31,7 @@ typedef struct {
uint8_t scan[64];
uint32_t lquant[64];
uint32_t cquant[64];
DECLARE_ALIGNED_16(DCTELEM, block)[64];
DECLARE_ALIGNED(16, DCTELEM, block)[64];
} RTJpegContext;

void rtjpeg_decode_init(RTJpegContext *c, DSPContext *dsp,


+ 1
- 1
libavcodec/rv34.h View File

@@ -111,7 +111,7 @@ typedef struct RV34DecContext{
int *deblock_coefs; ///< deblock coefficients for each macroblock

/** 8x8 block available flags (for MV prediction) */
DECLARE_ALIGNED_8(uint32_t, avail_cache)[3*4];
DECLARE_ALIGNED(8, uint32_t, avail_cache)[3*4];

int (*parse_slice_header)(struct RV34DecContext *r, GetBitContext *gb, SliceInfo *si);
int (*decode_mb_info)(struct RV34DecContext *r);


+ 1
- 1
libavcodec/sipr.h View File

@@ -64,7 +64,7 @@ typedef struct {

float excitation[L_INTERPOL + PITCH_MAX + 2 * L_SUBFR_16k];

DECLARE_ALIGNED_16(float, synth_buf)[LP_FILTER_ORDER + 5*SUBFR_SIZE + 6];
DECLARE_ALIGNED(16, float, synth_buf)[LP_FILTER_ORDER + 5*SUBFR_SIZE + 6];

float lsp_history[LP_FILTER_ORDER];
float gain_mem;


+ 5
- 5
libavcodec/sparc/simple_idct_vis.c View File

@@ -24,7 +24,7 @@

#include "libavcodec/dsputil.h"

static const DECLARE_ALIGNED_8(int16_t, coeffs)[28] = {
static const DECLARE_ALIGNED(8, int16_t, coeffs)[28] = {
- 1259,- 1259,- 1259,- 1259,
- 4989,- 4989,- 4989,- 4989,
-11045,-11045,-11045,-11045,
@@ -33,13 +33,13 @@ static const DECLARE_ALIGNED_8(int16_t, coeffs)[28] = {
25080, 25080, 25080, 25080,
12785, 12785, 12785, 12785
};
static const DECLARE_ALIGNED_8(uint16_t, scale)[4] = {
static const DECLARE_ALIGNED(8, uint16_t, scale)[4] = {
65536>>6, 65536>>6, 65536>>6, 65536>>6
};
static const DECLARE_ALIGNED_8(uint16_t, rounder)[4] = {
static const DECLARE_ALIGNED(8, uint16_t, rounder)[4] = {
1<<5, 1<<5, 1<<5, 1<<5
};
static const DECLARE_ALIGNED_8(uint16_t, expand)[4] = {
static const DECLARE_ALIGNED(8, uint16_t, expand)[4] = {
1<<14, 1<<14, 1<<14, 1<<14
};

@@ -386,7 +386,7 @@ static const DECLARE_ALIGNED_8(uint16_t, expand)[4] = {

void ff_simple_idct_vis(DCTELEM *data) {
int out1, out2, out3, out4;
DECLARE_ALIGNED_8(int16_t, temp)[8*8];
DECLARE_ALIGNED(8, int16_t, temp)[8*8];

__asm__ volatile(
INIT_IDCT


+ 8
- 8
libavcodec/vorbis_data.c View File

@@ -44,7 +44,7 @@ const int64_t ff_vorbis_channel_layouts[9] = {
0
};

DECLARE_ALIGNED_16(static const float, vwin64)[32] = {
DECLARE_ALIGNED(16, static const float, vwin64)[32] = {
0.0009460463F, 0.0085006468F, 0.0235352254F, 0.0458950567F,
0.0753351908F, 0.1115073077F, 0.1539457973F, 0.2020557475F,
0.2551056759F, 0.3122276645F, 0.3724270287F, 0.4346027792F,
@@ -55,7 +55,7 @@ DECLARE_ALIGNED_16(static const float, vwin64)[32] = {
0.9989462667F, 0.9997230082F, 0.9999638688F, 0.9999995525F,
};

DECLARE_ALIGNED_16(static const float, vwin128)[64] = {
DECLARE_ALIGNED(16, static const float, vwin128)[64] = {
0.0002365472F, 0.0021280687F, 0.0059065254F, 0.0115626550F,
0.0190823442F, 0.0284463735F, 0.0396300935F, 0.0526030430F,
0.0673285281F, 0.0837631763F, 0.1018564887F, 0.1215504095F,
@@ -74,7 +74,7 @@ DECLARE_ALIGNED_16(static const float, vwin128)[64] = {
0.9999331503F, 0.9999825563F, 0.9999977357F, 0.9999999720F,
};

DECLARE_ALIGNED_16(static const float, vwin256)[128] = {
DECLARE_ALIGNED(16, static const float, vwin256)[128] = {
0.0000591390F, 0.0005321979F, 0.0014780301F, 0.0028960636F,
0.0047854363F, 0.0071449926F, 0.0099732775F, 0.0132685298F,
0.0170286741F, 0.0212513119F, 0.0259337111F, 0.0310727950F,
@@ -109,7 +109,7 @@ DECLARE_ALIGNED_16(static const float, vwin256)[128] = {
0.9999958064F, 0.9999989077F, 0.9999998584F, 0.9999999983F,
};

DECLARE_ALIGNED_16(static const float, vwin512)[256] = {
DECLARE_ALIGNED(16, static const float, vwin512)[256] = {
0.0000147849F, 0.0001330607F, 0.0003695946F, 0.0007243509F,
0.0011972759F, 0.0017882983F, 0.0024973285F, 0.0033242588F,
0.0042689632F, 0.0053312973F, 0.0065110982F, 0.0078081841F,
@@ -176,7 +176,7 @@ DECLARE_ALIGNED_16(static const float, vwin512)[256] = {
0.9999997377F, 0.9999999317F, 0.9999999911F, 0.9999999999F,
};

DECLARE_ALIGNED_16(static const float, vwin1024)[512] = {
DECLARE_ALIGNED(16, static const float, vwin1024)[512] = {
0.0000036962F, 0.0000332659F, 0.0000924041F, 0.0001811086F,
0.0002993761F, 0.0004472021F, 0.0006245811F, 0.0008315063F,
0.0010679699F, 0.0013339631F, 0.0016294757F, 0.0019544965F,
@@ -307,7 +307,7 @@ DECLARE_ALIGNED_16(static const float, vwin1024)[512] = {
0.9999999836F, 0.9999999957F, 0.9999999994F, 1.0000000000F,
};

DECLARE_ALIGNED_16(static const float, vwin2048)[1024] = {
DECLARE_ALIGNED(16, static const float, vwin2048)[1024] = {
0.0000009241F, 0.0000083165F, 0.0000231014F, 0.0000452785F,
0.0000748476F, 0.0001118085F, 0.0001561608F, 0.0002079041F,
0.0002670379F, 0.0003335617F, 0.0004074748F, 0.0004887765F,
@@ -566,7 +566,7 @@ DECLARE_ALIGNED_16(static const float, vwin2048)[1024] = {
0.9999999990F, 0.9999999997F, 1.0000000000F, 1.0000000000F,
};

DECLARE_ALIGNED_16(static const float, vwin4096)[2048] = {
DECLARE_ALIGNED(16, static const float, vwin4096)[2048] = {
0.0000002310F, 0.0000020791F, 0.0000057754F, 0.0000113197F,
0.0000187121F, 0.0000279526F, 0.0000390412F, 0.0000519777F,
0.0000667623F, 0.0000833949F, 0.0001018753F, 0.0001222036F,
@@ -1081,7 +1081,7 @@ DECLARE_ALIGNED_16(static const float, vwin4096)[2048] = {
0.9999999999F, 1.0000000000F, 1.0000000000F, 1.0000000000F,
};

DECLARE_ALIGNED_16(static const float, vwin8192)[4096] = {
DECLARE_ALIGNED(16, static const float, vwin8192)[4096] = {
0.0000000578F, 0.0000005198F, 0.0000014438F, 0.0000028299F,
0.0000046780F, 0.0000069882F, 0.0000097604F, 0.0000129945F,
0.0000166908F, 0.0000208490F, 0.0000254692F, 0.0000305515F,


+ 2
- 2
libavcodec/vp3.c View File

@@ -221,7 +221,7 @@ typedef struct Vp3DecodeContext {

/* these arrays need to be on 16-byte boundaries since SSE2 operations
* index into them */
DECLARE_ALIGNED_16(int16_t, qmat)[3][2][3][64]; //<qmat[qpi][is_inter][plane]
DECLARE_ALIGNED(16, int16_t, qmat)[3][2][3][64]; //<qmat[qpi][is_inter][plane]

/* This table contains superblock_count * 16 entries. Each set of 16
* numbers corresponds to the fragment indexes 0..15 of the superblock.
@@ -244,7 +244,7 @@ typedef struct Vp3DecodeContext {
uint16_t huffman_table[80][32][2];

uint8_t filter_limit_values[64];
DECLARE_ALIGNED_8(int, bounding_values_array)[256+2];
DECLARE_ALIGNED(8, int, bounding_values_array)[256+2];
} Vp3DecodeContext;

/************************************************************************


+ 1
- 1
libavcodec/vp56.h View File

@@ -121,7 +121,7 @@ struct vp56_context {
/* blocks / macroblock */
VP56mb mb_type;
VP56Macroblock *macroblocks;
DECLARE_ALIGNED_16(DCTELEM, block_coeff)[6][64];
DECLARE_ALIGNED(16, DCTELEM, block_coeff)[6][64];

/* motion vectors */
VP56mv mv[6]; /* vectors for each block in MB */


+ 4
- 4
libavcodec/wma.h View File

@@ -111,15 +111,15 @@ typedef struct WMACodecContext {
uint8_t ms_stereo; ///< true if mid/side stereo mode
uint8_t channel_coded[MAX_CHANNELS]; ///< true if channel is coded
int exponents_bsize[MAX_CHANNELS]; ///< log2 ratio frame/exp. length
DECLARE_ALIGNED_16(float, exponents)[MAX_CHANNELS][BLOCK_MAX_SIZE];
DECLARE_ALIGNED(16, float, exponents)[MAX_CHANNELS][BLOCK_MAX_SIZE];
float max_exponent[MAX_CHANNELS];
WMACoef coefs1[MAX_CHANNELS][BLOCK_MAX_SIZE];
DECLARE_ALIGNED_16(float, coefs)[MAX_CHANNELS][BLOCK_MAX_SIZE];
DECLARE_ALIGNED_16(FFTSample, output)[BLOCK_MAX_SIZE * 2];
DECLARE_ALIGNED(16, float, coefs)[MAX_CHANNELS][BLOCK_MAX_SIZE];
DECLARE_ALIGNED(16, FFTSample, output)[BLOCK_MAX_SIZE * 2];
FFTContext mdct_ctx[BLOCK_NB_SIZES];
float *windows[BLOCK_NB_SIZES];
/* output buffer for one frame and the last for IMDCT windowing */
DECLARE_ALIGNED_16(float, frame_out)[MAX_CHANNELS][BLOCK_MAX_SIZE * 2];
DECLARE_ALIGNED(16, float, frame_out)[MAX_CHANNELS][BLOCK_MAX_SIZE * 2];
/* last frame info */
uint8_t last_superframe[MAX_CODED_SUPERFRAME_SIZE + 4]; /* padding added */
int last_bitoffset;


+ 2
- 2
libavcodec/wmaprodec.c View File

@@ -142,7 +142,7 @@ typedef struct {
int* scale_factors; ///< pointer to the scale factor values used for decoding
uint8_t table_idx; ///< index in sf_offsets for the scale factor reference block
float* coeffs; ///< pointer to the subframe decode buffer
DECLARE_ALIGNED_16(float, out)[WMAPRO_BLOCK_MAX_SIZE + WMAPRO_BLOCK_MAX_SIZE / 2]; ///< output buffer
DECLARE_ALIGNED(16, float, out)[WMAPRO_BLOCK_MAX_SIZE + WMAPRO_BLOCK_MAX_SIZE / 2]; ///< output buffer
} WMAProChannelCtx;

/**
@@ -167,7 +167,7 @@ typedef struct WMAProDecodeCtx {
FF_INPUT_BUFFER_PADDING_SIZE];///< compressed frame data
PutBitContext pb; ///< context for filling the frame_data buffer
FFTContext mdct_ctx[WMAPRO_BLOCK_SIZES]; ///< MDCT context per block size
DECLARE_ALIGNED_16(float, tmp)[WMAPRO_BLOCK_MAX_SIZE]; ///< IMDCT output buffer
DECLARE_ALIGNED(16, float, tmp)[WMAPRO_BLOCK_MAX_SIZE]; ///< IMDCT output buffer
float* windows[WMAPRO_BLOCK_SIZES]; ///< windows for the different block sizes

/* frame size dependent frame information (set during initialization) */


+ 1
- 1
libavcodec/wmv2.h View File

@@ -50,7 +50,7 @@ typedef struct Wmv2Context{
int hshift;

ScanTable abt_scantable[2];
DECLARE_ALIGNED_16(DCTELEM, abt_block2)[6][64];
DECLARE_ALIGNED(16, DCTELEM, abt_block2)[6][64];
}Wmv2Context;

void ff_wmv2_common_init(Wmv2Context * w);


+ 2
- 2
libavcodec/x86/cavsdsp_mmx.c View File

@@ -113,10 +113,10 @@ static inline void cavs_idct8_1d(int16_t *block, uint64_t bias)
static void cavs_idct8_add_mmx(uint8_t *dst, int16_t *block, int stride)
{
int i;
DECLARE_ALIGNED_8(int16_t, b2)[64];
DECLARE_ALIGNED(8, int16_t, b2)[64];

for(i=0; i<2; i++){
DECLARE_ALIGNED_8(uint64_t, tmp);
DECLARE_ALIGNED(8, uint64_t, tmp);

cavs_idct8_1d(block+4*i, ff_pw_4);



+ 2
- 2
libavcodec/x86/dsputil_h264_template_mmx.c View File

@@ -27,8 +27,8 @@
*/
static void H264_CHROMA_MC8_TMPL(uint8_t *dst/*align 8*/, uint8_t *src/*align 1*/, int stride, int h, int x, int y, const uint64_t *rnd_reg)
{
DECLARE_ALIGNED_8(uint64_t, AA);
DECLARE_ALIGNED_8(uint64_t, DD);
DECLARE_ALIGNED(8, uint64_t, AA);
DECLARE_ALIGNED(8, uint64_t, DD);
int i;

if(y==0 && x==0) {


+ 31
- 31
libavcodec/x86/dsputil_mmx.c View File

@@ -39,38 +39,38 @@
int mm_flags; /* multimedia extension flags */

/* pixel operations */
DECLARE_ALIGNED_8 (const uint64_t, ff_bone) = 0x0101010101010101ULL;
DECLARE_ALIGNED_8 (const uint64_t, ff_wtwo) = 0x0002000200020002ULL;
DECLARE_ALIGNED(8, const uint64_t, ff_bone) = 0x0101010101010101ULL;
DECLARE_ALIGNED(8, const uint64_t, ff_wtwo) = 0x0002000200020002ULL;

DECLARE_ALIGNED_16(const uint64_t, ff_pdw_80000000)[2] =
DECLARE_ALIGNED(16, const uint64_t, ff_pdw_80000000)[2] =
{0x8000000080000000ULL, 0x8000000080000000ULL};

DECLARE_ALIGNED_8 (const uint64_t, ff_pw_3 ) = 0x0003000300030003ULL;
DECLARE_ALIGNED_8 (const uint64_t, ff_pw_4 ) = 0x0004000400040004ULL;
DECLARE_ALIGNED_16(const xmm_reg, ff_pw_5 ) = {0x0005000500050005ULL, 0x0005000500050005ULL};
DECLARE_ALIGNED_16(const xmm_reg, ff_pw_8 ) = {0x0008000800080008ULL, 0x0008000800080008ULL};
DECLARE_ALIGNED_8 (const uint64_t, ff_pw_15 ) = 0x000F000F000F000FULL;
DECLARE_ALIGNED_16(const xmm_reg, ff_pw_16 ) = {0x0010001000100010ULL, 0x0010001000100010ULL};
DECLARE_ALIGNED_8 (const uint64_t, ff_pw_20 ) = 0x0014001400140014ULL;
DECLARE_ALIGNED_16(const xmm_reg, ff_pw_28 ) = {0x001C001C001C001CULL, 0x001C001C001C001CULL};
DECLARE_ALIGNED_16(const xmm_reg, ff_pw_32 ) = {0x0020002000200020ULL, 0x0020002000200020ULL};
DECLARE_ALIGNED_8 (const uint64_t, ff_pw_42 ) = 0x002A002A002A002AULL;
DECLARE_ALIGNED_16(const xmm_reg, ff_pw_64 ) = {0x0040004000400040ULL, 0x0040004000400040ULL};
DECLARE_ALIGNED_8 (const uint64_t, ff_pw_96 ) = 0x0060006000600060ULL;
DECLARE_ALIGNED_8 (const uint64_t, ff_pw_128) = 0x0080008000800080ULL;
DECLARE_ALIGNED_8 (const uint64_t, ff_pw_255) = 0x00ff00ff00ff00ffULL;
DECLARE_ALIGNED_8 (const uint64_t, ff_pb_1 ) = 0x0101010101010101ULL;
DECLARE_ALIGNED_8 (const uint64_t, ff_pb_3 ) = 0x0303030303030303ULL;
DECLARE_ALIGNED_8 (const uint64_t, ff_pb_7 ) = 0x0707070707070707ULL;
DECLARE_ALIGNED_8 (const uint64_t, ff_pb_1F ) = 0x1F1F1F1F1F1F1F1FULL;
DECLARE_ALIGNED_8 (const uint64_t, ff_pb_3F ) = 0x3F3F3F3F3F3F3F3FULL;
DECLARE_ALIGNED_8 (const uint64_t, ff_pb_81 ) = 0x8181818181818181ULL;
DECLARE_ALIGNED_8 (const uint64_t, ff_pb_A1 ) = 0xA1A1A1A1A1A1A1A1ULL;
DECLARE_ALIGNED_8 (const uint64_t, ff_pb_FC ) = 0xFCFCFCFCFCFCFCFCULL;
DECLARE_ALIGNED_16(const double, ff_pd_1)[2] = { 1.0, 1.0 };
DECLARE_ALIGNED_16(const double, ff_pd_2)[2] = { 2.0, 2.0 };
DECLARE_ALIGNED(8, const uint64_t, ff_pw_3 ) = 0x0003000300030003ULL;
DECLARE_ALIGNED(8, const uint64_t, ff_pw_4 ) = 0x0004000400040004ULL;
DECLARE_ALIGNED(16, const xmm_reg, ff_pw_5 ) = {0x0005000500050005ULL, 0x0005000500050005ULL};
DECLARE_ALIGNED(16, const xmm_reg, ff_pw_8 ) = {0x0008000800080008ULL, 0x0008000800080008ULL};
DECLARE_ALIGNED(8, const uint64_t, ff_pw_15 ) = 0x000F000F000F000FULL;
DECLARE_ALIGNED(16, const xmm_reg, ff_pw_16 ) = {0x0010001000100010ULL, 0x0010001000100010ULL};
DECLARE_ALIGNED(8, const uint64_t, ff_pw_20 ) = 0x0014001400140014ULL;
DECLARE_ALIGNED(16, const xmm_reg, ff_pw_28 ) = {0x001C001C001C001CULL, 0x001C001C001C001CULL};
DECLARE_ALIGNED(16, const xmm_reg, ff_pw_32 ) = {0x0020002000200020ULL, 0x0020002000200020ULL};
DECLARE_ALIGNED(8, const uint64_t, ff_pw_42 ) = 0x002A002A002A002AULL;
DECLARE_ALIGNED(16, const xmm_reg, ff_pw_64 ) = {0x0040004000400040ULL, 0x0040004000400040ULL};
DECLARE_ALIGNED(8, const uint64_t, ff_pw_96 ) = 0x0060006000600060ULL;
DECLARE_ALIGNED(8, const uint64_t, ff_pw_128) = 0x0080008000800080ULL;
DECLARE_ALIGNED(8, const uint64_t, ff_pw_255) = 0x00ff00ff00ff00ffULL;
DECLARE_ALIGNED(8, const uint64_t, ff_pb_1 ) = 0x0101010101010101ULL;
DECLARE_ALIGNED(8, const uint64_t, ff_pb_3 ) = 0x0303030303030303ULL;
DECLARE_ALIGNED(8, const uint64_t, ff_pb_7 ) = 0x0707070707070707ULL;
DECLARE_ALIGNED(8, const uint64_t, ff_pb_1F ) = 0x1F1F1F1F1F1F1F1FULL;
DECLARE_ALIGNED(8, const uint64_t, ff_pb_3F ) = 0x3F3F3F3F3F3F3F3FULL;
DECLARE_ALIGNED(8, const uint64_t, ff_pb_81 ) = 0x8181818181818181ULL;
DECLARE_ALIGNED(8, const uint64_t, ff_pb_A1 ) = 0xA1A1A1A1A1A1A1A1ULL;
DECLARE_ALIGNED(8, const uint64_t, ff_pb_FC ) = 0xFCFCFCFCFCFCFCFCULL;
DECLARE_ALIGNED(16, const double, ff_pd_1)[2] = { 1.0, 1.0 };
DECLARE_ALIGNED(16, const double, ff_pd_2)[2] = { 2.0, 2.0 };

#define JUMPALIGN() __asm__ volatile (ASMALIGN(3)::)
#define MOVQ_ZERO(regd) __asm__ volatile ("pxor %%" #regd ", %%" #regd ::)
@@ -2026,7 +2026,7 @@ static void ac3_downmix_sse(float (*samples)[256], float (*matrix)[2], int out_c
} else if(in_ch == 5 && out_ch == 1 && matrix_cmp[0][0]==matrix_cmp[2][0] && matrix_cmp[3][0]==matrix_cmp[4][0]) {
MIX5(IF1,IF0);
} else {
DECLARE_ALIGNED_16(float, matrix_simd)[in_ch][2][4];
DECLARE_ALIGNED(16, float, matrix_simd)[in_ch][2][4];
j = 2*in_ch*sizeof(float);
__asm__ volatile(
"1: \n"
@@ -2413,7 +2413,7 @@ static void ff_x264_deblock_v_luma_intra_mmxext(uint8_t *pix, int stride, int al
#define FLOAT_TO_INT16_INTERLEAVE(cpu, body) \
/* gcc pessimizes register allocation if this is in the same function as float_to_int16_interleave_sse2*/\
static av_noinline void float_to_int16_interleave_misc_##cpu(int16_t *dst, const float **src, long len, int channels){\
DECLARE_ALIGNED_16(int16_t, tmp)[len];\
DECLARE_ALIGNED(16, int16_t, tmp)[len];\
int i,j,c;\
for(c=0; c<channels; c++){\
float_to_int16_##cpu(tmp, src[c], len);\


+ 2
- 2
libavcodec/x86/dsputilenc_mmx.c View File

@@ -1063,7 +1063,7 @@ static void sub_hfyu_median_prediction_mmx2(uint8_t *dst, const uint8_t *src1, c

#define HADAMARD8_DIFF_MMX(cpu) \
static int hadamard8_diff_##cpu(void *s, uint8_t *src1, uint8_t *src2, int stride, int h){\
DECLARE_ALIGNED_8(uint64_t, temp)[13];\
DECLARE_ALIGNED(8, uint64_t, temp)[13];\
int sum;\
\
assert(h==8);\
@@ -1146,7 +1146,7 @@ WRAPPER8_16_SQ(hadamard8_diff_##cpu, hadamard8_diff16_##cpu)

#define HADAMARD8_DIFF_SSE2(cpu) \
static int hadamard8_diff_##cpu(void *s, uint8_t *src1, uint8_t *src2, int stride, int h){\
DECLARE_ALIGNED_16(uint64_t, temp)[4];\
DECLARE_ALIGNED(16, uint64_t, temp)[4];\
int sum;\
\
assert(h==8);\


+ 1
- 1
libavcodec/x86/fft_3dn2.c View File

@@ -23,7 +23,7 @@
#include "libavcodec/dsputil.h"
#include "fft.h"

DECLARE_ALIGNED_8(static const int, m1m1)[2] = { 1<<31, 1<<31 };
DECLARE_ALIGNED(8, static const int, m1m1)[2] = { 1<<31, 1<<31 };

#ifdef EMULATE_3DNOWEXT
#define PSWAPD(s,d)\


+ 10
- 10
libavcodec/x86/h264dsp_mmx.c View File

@@ -20,8 +20,8 @@

#include "dsputil_mmx.h"

DECLARE_ALIGNED_8 (static const uint64_t, ff_pb_3_1 ) = 0x0103010301030103ULL;
DECLARE_ALIGNED_8 (static const uint64_t, ff_pb_7_3 ) = 0x0307030703070307ULL;
DECLARE_ALIGNED(8, static const uint64_t, ff_pb_3_1 ) = 0x0103010301030103ULL;
DECLARE_ALIGNED(8, static const uint64_t, ff_pb_7_3 ) = 0x0307030703070307ULL;

/***********************************/
/* IDCT */
@@ -157,12 +157,12 @@ static inline void h264_idct8_1d(int16_t *block)
static void ff_h264_idct8_add_mmx(uint8_t *dst, int16_t *block, int stride)
{
int i;
DECLARE_ALIGNED_8(int16_t, b2)[64];
DECLARE_ALIGNED(8, int16_t, b2)[64];

block[0] += 32;

for(i=0; i<2; i++){
DECLARE_ALIGNED_8(uint64_t, tmp);
DECLARE_ALIGNED(8, uint64_t, tmp);

h264_idct8_1d(block+4*i);

@@ -628,7 +628,7 @@ static void ff_h264_idct_add8_sse2(uint8_t **dest, const int *block_offset, DCTE

static inline void h264_loop_filter_luma_mmx2(uint8_t *pix, int stride, int alpha1, int beta1, int8_t *tc0)
{
DECLARE_ALIGNED_8(uint64_t, tmp0)[2];
DECLARE_ALIGNED(8, uint64_t, tmp0)[2];

__asm__ volatile(
"movq (%2,%4), %%mm0 \n\t" //p1
@@ -690,7 +690,7 @@ static void h264_h_loop_filter_luma_mmx2(uint8_t *pix, int stride, int alpha, in
{
//FIXME: could cut some load/stores by merging transpose with filter
// also, it only needs to transpose 6x8
DECLARE_ALIGNED_8(uint8_t, trans)[8*8];
DECLARE_ALIGNED(8, uint8_t, trans)[8*8];
int i;
for(i=0; i<2; i++, pix+=8*stride, tc0+=2) {
if((tc0[0] & tc0[1]) < 0)
@@ -734,7 +734,7 @@ static void h264_v_loop_filter_chroma_mmx2(uint8_t *pix, int stride, int alpha,
static void h264_h_loop_filter_chroma_mmx2(uint8_t *pix, int stride, int alpha, int beta, int8_t *tc0)
{
//FIXME: could cut some load/stores by merging transpose with filter
DECLARE_ALIGNED_8(uint8_t, trans)[8*4];
DECLARE_ALIGNED(8, uint8_t, trans)[8*4];
transpose4x4(trans, pix-2, 8, stride);
transpose4x4(trans+4, pix-2+4*stride, 8, stride);
h264_loop_filter_chroma_mmx2(trans+2*8, 8, alpha-1, beta-1, tc0);
@@ -784,7 +784,7 @@ static void h264_v_loop_filter_chroma_intra_mmx2(uint8_t *pix, int stride, int a
static void h264_h_loop_filter_chroma_intra_mmx2(uint8_t *pix, int stride, int alpha, int beta)
{
//FIXME: could cut some load/stores by merging transpose with filter
DECLARE_ALIGNED_8(uint8_t, trans)[8*4];
DECLARE_ALIGNED(8, uint8_t, trans)[8*4];
transpose4x4(trans, pix-2, 8, stride);
transpose4x4(trans+4, pix-2+4*stride, 8, stride);
h264_loop_filter_chroma_intra_mmx2(trans+2*8, 8, alpha-1, beta-1);
@@ -815,7 +815,7 @@ static void h264_loop_filter_strength_mmx2( int16_t bS[2][4][4], uint8_t nnz[40]
for( dir=1; dir>=0; dir-- ) {
const x86_reg d_idx = dir ? -8 : -1;
const int mask_mv = dir ? mask_mv1 : mask_mv0;
DECLARE_ALIGNED_8(const uint64_t, mask_dir) = dir ? 0 : 0xffffffffffffffffULL;
DECLARE_ALIGNED(8, const uint64_t, mask_dir) = dir ? 0 : 0xffffffffffffffffULL;
int b_idx, edge;
for( b_idx=12, edge=0; edge<edges; edge+=step, b_idx+=8*step ) {
__asm__ volatile(
@@ -2106,7 +2106,7 @@ H264_MC_816(H264_MC_HV, ssse3)
#endif

/* rnd interleaved with rnd div 8, use p+1 to access rnd div 8 */
DECLARE_ALIGNED_8(static const uint64_t, h264_rnd_reg)[4] = {
DECLARE_ALIGNED(8, static const uint64_t, h264_rnd_reg)[4] = {
0x0020002000200020ULL, 0x0004000400040004ULL, 0x001C001C001C001CULL, 0x0003000300030003ULL
};



+ 1
- 1
libavcodec/x86/mpegvideo_mmx_template.c View File

@@ -98,7 +98,7 @@ static int RENAME(dct_quantize)(MpegEncContext *s,
x86_reg last_non_zero_p1;
int level=0, q; //=0 is because gcc says uninitialized ...
const uint16_t *qmat, *bias;
DECLARE_ALIGNED_16(int16_t, temp_block)[64];
DECLARE_ALIGNED(16, int16_t, temp_block)[64];

assert((7&(int)(&temp_block[0])) == 0); //did gcc align it correctly?



+ 1
- 1
libavcodec/x86/rv40dsp_mmx.c View File

@@ -24,7 +24,7 @@
#include "dsputil_mmx.h"

/* bias interleaved with bias div 8, use p+1 to access bias div 8 */
DECLARE_ALIGNED_8(static const uint64_t, rv40_bias_reg)[4][8] = {
DECLARE_ALIGNED(8, static const uint64_t, rv40_bias_reg)[4][8] = {
{ 0x0000000000000000ULL, 0x0000000000000000ULL, 0x0010001000100010ULL, 0x0002000200020002ULL,
0x0020002000200020ULL, 0x0004000400040004ULL, 0x0010001000100010ULL, 0x0002000200020002ULL },
{ 0x0020002000200020ULL, 0x0004000400040004ULL, 0x001C001C001C001CULL, 0x0003000300030003ULL,


+ 1
- 1
libavcodec/x86/snowdsp_mmx.c View File

@@ -25,7 +25,7 @@

void ff_snow_horizontal_compose97i_sse2(IDWTELEM *b, int width){
const int w2= (width+1)>>1;
DECLARE_ALIGNED_16(IDWTELEM, temp)[width>>1];
DECLARE_ALIGNED(16, IDWTELEM, temp)[width>>1];
const int w_l= (width>>1);
const int w_r= w2 - 1;
int i;


+ 2
- 2
libavcodec/x86/vc1dsp_mmx.c View File

@@ -73,7 +73,7 @@
"movq %%mm"#R1", "#OFF"(%1) \n\t" \
"add %2, %0 \n\t"

DECLARE_ALIGNED_16(const uint64_t, ff_pw_9) = 0x0009000900090009ULL;
DECLARE_ALIGNED(16, const uint64_t, ff_pw_9) = 0x0009000900090009ULL;

/** Sacrifying mm6 allows to pipeline loads from src */
static void vc1_put_ver_16b_shift2_mmx(int16_t *dst,
@@ -442,7 +442,7 @@ static void OP ## vc1_mspel_mc(uint8_t *dst, const uint8_t *src, int stride,\
static const int shift_value[] = { 0, 5, 1, 5 };\
int shift = (shift_value[hmode]+shift_value[vmode])>>1;\
int r;\
DECLARE_ALIGNED_16(int16_t, tmp)[12*8];\
DECLARE_ALIGNED(16, int16_t, tmp)[12*8];\
\
r = (1<<(shift-1)) + rnd-1;\
vc1_put_shift_ver_16bits[vmode](tmp, src-1, stride, r, shift);\


+ 1
- 1
libavcodec/x86/vp3dsp_sse2.c View File

@@ -26,7 +26,7 @@
#include "libavcodec/dsputil.h"
#include "dsputil_mmx.h"

DECLARE_ALIGNED_16(const uint16_t, ff_vp3_idct_data)[7 * 8] =
DECLARE_ALIGNED(16, const uint16_t, ff_vp3_idct_data)[7 * 8] =
{
64277,64277,64277,64277,64277,64277,64277,64277,
60547,60547,60547,60547,60547,60547,60547,60547,


+ 0
- 3
libavutil/mem.h View File

@@ -49,9 +49,6 @@
#define DECLARE_ASM_CONST(n,t,v) static const t v
#endif

#define DECLARE_ALIGNED_16(t, v) DECLARE_ALIGNED(16, t, v)
#define DECLARE_ALIGNED_8(t, v) DECLARE_ALIGNED(8, t, v)

#if AV_GCC_VERSION_AT_LEAST(3,1)
#define av_malloc_attrib __attribute__((__malloc__))
#else


Loading…
Cancel
Save