patch by Steve Lhomme, steve .dot. lhomme .at. free .dot. fr Originally committed as revision 4942 to svn://svn.ffmpeg.org/ffmpeg/trunktags/v0.5
| @@ -121,7 +121,7 @@ typedef struct FourXContext{ | |||||
| int mv[256]; | int mv[256]; | ||||
| VLC pre_vlc; | VLC pre_vlc; | ||||
| int last_dc; | int last_dc; | ||||
| DCTELEM __align8 block[6][64]; | |||||
| DECLARE_ALIGNED_8(DCTELEM, block[6][64]); | |||||
| uint8_t *bitstream_buffer; | uint8_t *bitstream_buffer; | ||||
| unsigned int bitstream_buffer_size; | unsigned int bitstream_buffer_size; | ||||
| CFrameBuffer cfrm[CFRAME_BUFFER_COUNT]; | CFrameBuffer cfrm[CFRAME_BUFFER_COUNT]; | ||||
| @@ -44,9 +44,9 @@ typedef struct ASV1Context{ | |||||
| int mb_height; | int mb_height; | ||||
| int mb_width2; | int mb_width2; | ||||
| int mb_height2; | int mb_height2; | ||||
| DCTELEM __align8 block[6][64]; | |||||
| uint16_t __align8 intra_matrix[64]; | |||||
| int __align8 q_intra_matrix[64]; | |||||
| DECLARE_ALIGNED_8(DCTELEM, block[6][64]); | |||||
| DECLARE_ALIGNED_8(uint16_t, intra_matrix[64]); | |||||
| DECLARE_ALIGNED_8(int, q_intra_matrix[64]); | |||||
| uint8_t *bitstream_buffer; | uint8_t *bitstream_buffer; | ||||
| unsigned int bitstream_buffer_size; | unsigned int bitstream_buffer_size; | ||||
| } ASV1Context; | } ASV1Context; | ||||
| @@ -62,7 +62,7 @@ const uint8_t ff_zigzag248_direct[64] = { | |||||
| }; | }; | ||||
| /* not permutated inverse zigzag_direct + 1 for MMX quantizer */ | /* not permutated inverse zigzag_direct + 1 for MMX quantizer */ | ||||
| uint16_t __align8 inv_zigzag_direct16[64] = {0, }; | |||||
| DECLARE_ALIGNED_8(uint16_t, inv_zigzag_direct16[64]) = {0, }; | |||||
| const uint8_t ff_alternate_horizontal_scan[64] = { | const uint8_t ff_alternate_horizontal_scan[64] = { | ||||
| 0, 1, 2, 3, 8, 9, 16, 17, | 0, 1, 2, 3, 8, 9, 16, 17, | ||||
| @@ -3402,7 +3402,7 @@ static int hadamard8_intra8x8_c(/*MpegEncContext*/ void *s, uint8_t *src, uint8_ | |||||
| static int dct_sad8x8_c(/*MpegEncContext*/ void *c, uint8_t *src1, uint8_t *src2, int stride, int h){ | static int dct_sad8x8_c(/*MpegEncContext*/ void *c, uint8_t *src1, uint8_t *src2, int stride, int h){ | ||||
| MpegEncContext * const s= (MpegEncContext *)c; | MpegEncContext * const s= (MpegEncContext *)c; | ||||
| uint64_t __align8 aligned_temp[sizeof(DCTELEM)*64/8]; | |||||
| DECLARE_ALIGNED_8(uint64_t, aligned_temp[sizeof(DCTELEM)*64/8]); | |||||
| DCTELEM * const temp= (DCTELEM*)aligned_temp; | DCTELEM * const temp= (DCTELEM*)aligned_temp; | ||||
| int sum=0, i; | int sum=0, i; | ||||
| @@ -3472,7 +3472,7 @@ static int dct264_sad8x8_c(/*MpegEncContext*/ void *c, uint8_t *src1, uint8_t *s | |||||
| static int dct_max8x8_c(/*MpegEncContext*/ void *c, uint8_t *src1, uint8_t *src2, int stride, int h){ | static int dct_max8x8_c(/*MpegEncContext*/ void *c, uint8_t *src1, uint8_t *src2, int stride, int h){ | ||||
| MpegEncContext * const s= (MpegEncContext *)c; | MpegEncContext * const s= (MpegEncContext *)c; | ||||
| uint64_t __align8 aligned_temp[sizeof(DCTELEM)*64/8]; | |||||
| DECLARE_ALIGNED_8(uint64_t, aligned_temp[sizeof(DCTELEM)*64/8]); | |||||
| DCTELEM * const temp= (DCTELEM*)aligned_temp; | DCTELEM * const temp= (DCTELEM*)aligned_temp; | ||||
| int sum=0, i; | int sum=0, i; | ||||
| @@ -3491,7 +3491,7 @@ void simple_idct(DCTELEM *block); //FIXME | |||||
| static int quant_psnr8x8_c(/*MpegEncContext*/ void *c, uint8_t *src1, uint8_t *src2, int stride, int h){ | static int quant_psnr8x8_c(/*MpegEncContext*/ void *c, uint8_t *src1, uint8_t *src2, int stride, int h){ | ||||
| MpegEncContext * const s= (MpegEncContext *)c; | MpegEncContext * const s= (MpegEncContext *)c; | ||||
| uint64_t __align8 aligned_temp[sizeof(DCTELEM)*64*2/8]; | |||||
| DECLARE_ALIGNED_8 (uint64_t, aligned_temp[sizeof(DCTELEM)*64*2/8]); | |||||
| DCTELEM * const temp= (DCTELEM*)aligned_temp; | DCTELEM * const temp= (DCTELEM*)aligned_temp; | ||||
| DCTELEM * const bak = ((DCTELEM*)aligned_temp)+64; | DCTELEM * const bak = ((DCTELEM*)aligned_temp)+64; | ||||
| int sum=0, i; | int sum=0, i; | ||||
| @@ -3516,8 +3516,8 @@ static int quant_psnr8x8_c(/*MpegEncContext*/ void *c, uint8_t *src1, uint8_t *s | |||||
| static int rd8x8_c(/*MpegEncContext*/ void *c, uint8_t *src1, uint8_t *src2, int stride, int h){ | static int rd8x8_c(/*MpegEncContext*/ void *c, uint8_t *src1, uint8_t *src2, int stride, int h){ | ||||
| MpegEncContext * const s= (MpegEncContext *)c; | MpegEncContext * const s= (MpegEncContext *)c; | ||||
| const uint8_t *scantable= s->intra_scantable.permutated; | const uint8_t *scantable= s->intra_scantable.permutated; | ||||
| uint64_t __align8 aligned_temp[sizeof(DCTELEM)*64/8]; | |||||
| uint64_t __align8 aligned_bak[stride]; | |||||
| DECLARE_ALIGNED_8 (uint64_t, aligned_temp[sizeof(DCTELEM)*64/8]); | |||||
| DECLARE_ALIGNED_8 (uint64_t, aligned_bak[stride]); | |||||
| DCTELEM * const temp= (DCTELEM*)aligned_temp; | DCTELEM * const temp= (DCTELEM*)aligned_temp; | ||||
| uint8_t * const bak= (uint8_t*)aligned_bak; | uint8_t * const bak= (uint8_t*)aligned_bak; | ||||
| int i, last, run, bits, level, distoration, start_i; | int i, last, run, bits, level, distoration, start_i; | ||||
| @@ -3595,7 +3595,7 @@ static int rd8x8_c(/*MpegEncContext*/ void *c, uint8_t *src1, uint8_t *src2, int | |||||
| static int bit8x8_c(/*MpegEncContext*/ void *c, uint8_t *src1, uint8_t *src2, int stride, int h){ | static int bit8x8_c(/*MpegEncContext*/ void *c, uint8_t *src1, uint8_t *src2, int stride, int h){ | ||||
| MpegEncContext * const s= (MpegEncContext *)c; | MpegEncContext * const s= (MpegEncContext *)c; | ||||
| const uint8_t *scantable= s->intra_scantable.permutated; | const uint8_t *scantable= s->intra_scantable.permutated; | ||||
| uint64_t __align8 aligned_temp[sizeof(DCTELEM)*64/8]; | |||||
| DECLARE_ALIGNED_8 (uint64_t, aligned_temp[sizeof(DCTELEM)*64/8]); | |||||
| DCTELEM * const temp= (DCTELEM*)aligned_temp; | DCTELEM * const temp= (DCTELEM*)aligned_temp; | ||||
| int i, last, run, bits, level, start_i; | int i, last, run, bits, level, start_i; | ||||
| const int esc_length= s->ac_esc_length; | const int esc_length= s->ac_esc_length; | ||||
| @@ -390,7 +390,11 @@ static inline int get_penalty_factor(int lambda, int lambda2, int type){ | |||||
| one or more MultiMedia extension */ | one or more MultiMedia extension */ | ||||
| int mm_support(void); | int mm_support(void); | ||||
| #define __align16 __attribute__ ((aligned (16))) | |||||
| #ifdef __GNUC__ | |||||
| #define DECLARE_ALIGNED_16(t,v) t v __attribute__ ((aligned (16))) | |||||
| #else | |||||
| #define DECLARE_ALIGNED_16(t,v) __declspec(align(16)) t v | |||||
| #endif | |||||
| #if defined(HAVE_MMX) | #if defined(HAVE_MMX) | ||||
| @@ -421,7 +425,12 @@ static inline void emms(void) | |||||
| emms();\ | emms();\ | ||||
| } | } | ||||
| #define __align8 __attribute__ ((aligned (8))) | |||||
| #ifdef __GNUC__ | |||||
| #define DECLARE_ALIGNED_8(t,v) t v __attribute__ ((aligned (8))) | |||||
| #else | |||||
| #define DECLARE_ALIGNED_8(t,v) __declspec(align(8)) t v | |||||
| #endif | |||||
| #define STRIDE_ALIGN 8 | #define STRIDE_ALIGN 8 | ||||
| void dsputil_init_mmx(DSPContext* c, AVCodecContext *avctx); | void dsputil_init_mmx(DSPContext* c, AVCodecContext *avctx); | ||||
| @@ -431,7 +440,7 @@ void dsputil_init_pix_mmx(DSPContext* c, AVCodecContext *avctx); | |||||
| /* This is to use 4 bytes read to the IDCT pointers for some 'zero' | /* This is to use 4 bytes read to the IDCT pointers for some 'zero' | ||||
| line optimizations */ | line optimizations */ | ||||
| #define __align8 __attribute__ ((aligned (4))) | |||||
| #define DECLARE_ALIGNED_8(t,v) t v __attribute__ ((aligned (4))) | |||||
| #define STRIDE_ALIGN 4 | #define STRIDE_ALIGN 4 | ||||
| #define MM_IWMMXT 0x0100 /* XScale IWMMXT */ | #define MM_IWMMXT 0x0100 /* XScale IWMMXT */ | ||||
| @@ -443,7 +452,7 @@ void dsputil_init_armv4l(DSPContext* c, AVCodecContext *avctx); | |||||
| #elif defined(HAVE_MLIB) | #elif defined(HAVE_MLIB) | ||||
| /* SPARC/VIS IDCT needs 8-byte aligned DCT blocks */ | /* SPARC/VIS IDCT needs 8-byte aligned DCT blocks */ | ||||
| #define __align8 __attribute__ ((aligned (8))) | |||||
| #define DECLARE_ALIGNED_8(t,v) t v __attribute__ ((aligned (8))) | |||||
| #define STRIDE_ALIGN 8 | #define STRIDE_ALIGN 8 | ||||
| void dsputil_init_mlib(DSPContext* c, AVCodecContext *avctx); | void dsputil_init_mlib(DSPContext* c, AVCodecContext *avctx); | ||||
| @@ -451,13 +460,13 @@ void dsputil_init_mlib(DSPContext* c, AVCodecContext *avctx); | |||||
| #elif defined(ARCH_SPARC) | #elif defined(ARCH_SPARC) | ||||
| /* SPARC/VIS IDCT needs 8-byte aligned DCT blocks */ | /* SPARC/VIS IDCT needs 8-byte aligned DCT blocks */ | ||||
| #define __align8 __attribute__ ((aligned (8))) | |||||
| #define DECLARE_ALIGNED_8(t,v) t v __attribute__ ((aligned (8))) | |||||
| #define STRIDE_ALIGN 8 | #define STRIDE_ALIGN 8 | ||||
| void dsputil_init_vis(DSPContext* c, AVCodecContext *avctx); | void dsputil_init_vis(DSPContext* c, AVCodecContext *avctx); | ||||
| #elif defined(ARCH_ALPHA) | #elif defined(ARCH_ALPHA) | ||||
| #define __align8 __attribute__ ((aligned (8))) | |||||
| #define DECLARE_ALIGNED_8(t,v) t v __attribute__ ((aligned (8))) | |||||
| #define STRIDE_ALIGN 8 | #define STRIDE_ALIGN 8 | ||||
| void dsputil_init_alpha(DSPContext* c, AVCodecContext *avctx); | void dsputil_init_alpha(DSPContext* c, AVCodecContext *avctx); | ||||
| @@ -474,28 +483,28 @@ extern int mm_flags; | |||||
| #undef pixel | #undef pixel | ||||
| #endif | #endif | ||||
| #define __align8 __attribute__ ((aligned (16))) | |||||
| #define DECLARE_ALIGNED_8(t,v) t v __attribute__ ((aligned (16))) | |||||
| #define STRIDE_ALIGN 16 | #define STRIDE_ALIGN 16 | ||||
| void dsputil_init_ppc(DSPContext* c, AVCodecContext *avctx); | void dsputil_init_ppc(DSPContext* c, AVCodecContext *avctx); | ||||
| #elif defined(HAVE_MMI) | #elif defined(HAVE_MMI) | ||||
| #define __align8 __attribute__ ((aligned (16))) | |||||
| #define DECLARE_ALIGNED_8(t,v) t v __attribute__ ((aligned (16))) | |||||
| #define STRIDE_ALIGN 16 | #define STRIDE_ALIGN 16 | ||||
| void dsputil_init_mmi(DSPContext* c, AVCodecContext *avctx); | void dsputil_init_mmi(DSPContext* c, AVCodecContext *avctx); | ||||
| #elif defined(ARCH_SH4) | #elif defined(ARCH_SH4) | ||||
| #define __align8 __attribute__ ((aligned (8))) | |||||
| #define DECLARE_ALIGNED_8(t,v) t v __attribute__ ((aligned (8))) | |||||
| #define STRIDE_ALIGN 8 | #define STRIDE_ALIGN 8 | ||||
| void dsputil_init_sh4(DSPContext* c, AVCodecContext *avctx); | void dsputil_init_sh4(DSPContext* c, AVCodecContext *avctx); | ||||
| #else | #else | ||||
| #define __align8 __attribute__ ((aligned (8))) | |||||
| #define DECLARE_ALIGNED_8(t,v) t v __attribute__ ((aligned (8))) | |||||
| #define STRIDE_ALIGN 8 | #define STRIDE_ALIGN 8 | ||||
| #endif | #endif | ||||
| @@ -379,9 +379,9 @@ static inline void dv_decode_video_segment(DVVideoContext *s, | |||||
| PutBitContext pb, vs_pb; | PutBitContext pb, vs_pb; | ||||
| GetBitContext gb; | GetBitContext gb; | ||||
| BlockInfo mb_data[5 * 6], *mb, *mb1; | BlockInfo mb_data[5 * 6], *mb, *mb1; | ||||
| DCTELEM sblock[5*6][64] __align8; | |||||
| uint8_t mb_bit_buffer[80 + 4] __align8; /* allow some slack */ | |||||
| uint8_t vs_bit_buffer[5 * 80 + 4] __align8; /* allow some slack */ | |||||
| DECLARE_ALIGNED_8(DCTELEM, sblock[5*6][64]); | |||||
| DECLARE_ALIGNED_8(uint8_t, mb_bit_buffer[80 + 4]); /* allow some slack */ | |||||
| DECLARE_ALIGNED_8(uint8_t, vs_bit_buffer[5 * 80 + 4]); /* allow some slack */ | |||||
| const int log2_blocksize= 3-s->avctx->lowres; | const int log2_blocksize= 3-s->avctx->lowres; | ||||
| assert((((int)mb_bit_buffer)&7)==0); | assert((((int)mb_bit_buffer)&7)==0); | ||||
| @@ -779,7 +779,7 @@ static inline void dv_encode_video_segment(DVVideoContext *s, | |||||
| uint8_t* data; | uint8_t* data; | ||||
| uint8_t* ptr; | uint8_t* ptr; | ||||
| int do_edge_wrap; | int do_edge_wrap; | ||||
| DCTELEM block[64] __align8; | |||||
| DECLARE_ALIGNED_8(DCTELEM, block[64]); | |||||
| EncBlockInfo enc_blks[5*6]; | EncBlockInfo enc_blks[5*6]; | ||||
| PutBitContext pbs[5*6]; | PutBitContext pbs[5*6]; | ||||
| PutBitContext* pb; | PutBitContext* pb; | ||||
| @@ -199,14 +199,14 @@ typedef struct H264Context{ | |||||
| * non zero coeff count cache. | * non zero coeff count cache. | ||||
| * is 64 if not available. | * is 64 if not available. | ||||
| */ | */ | ||||
| uint8_t non_zero_count_cache[6*8] __align8; | |||||
| DECLARE_ALIGNED_8(uint8_t, non_zero_count_cache[6*8]); | |||||
| uint8_t (*non_zero_count)[16]; | uint8_t (*non_zero_count)[16]; | ||||
| /** | /** | ||||
| * Motion vector cache. | * Motion vector cache. | ||||
| */ | */ | ||||
| int16_t mv_cache[2][5*8][2] __align8; | |||||
| int8_t ref_cache[2][5*8] __align8; | |||||
| DECLARE_ALIGNED_8(int16_t, mv_cache[2][5*8][2]); | |||||
| DECLARE_ALIGNED_8(int8_t, ref_cache[2][5*8]); | |||||
| #define LIST_NOT_USED -1 //FIXME rename? | #define LIST_NOT_USED -1 //FIXME rename? | ||||
| #define PART_NOT_AVAILABLE -2 | #define PART_NOT_AVAILABLE -2 | ||||
| @@ -335,7 +335,7 @@ typedef struct H264Context{ | |||||
| GetBitContext *intra_gb_ptr; | GetBitContext *intra_gb_ptr; | ||||
| GetBitContext *inter_gb_ptr; | GetBitContext *inter_gb_ptr; | ||||
| DCTELEM mb[16*24] __align8; | |||||
| DECLARE_ALIGNED_8(DCTELEM, mb[16*24]); | |||||
| /** | /** | ||||
| * Cabac | * Cabac | ||||
| @@ -352,7 +352,7 @@ typedef struct H264Context{ | |||||
| uint8_t *chroma_pred_mode_table; | uint8_t *chroma_pred_mode_table; | ||||
| int last_qscale_diff; | int last_qscale_diff; | ||||
| int16_t (*mvd_table[2])[2]; | int16_t (*mvd_table[2])[2]; | ||||
| int16_t mvd_cache[2][5*8][2] __align8; | |||||
| DECLARE_ALIGNED_8(int16_t, mvd_cache[2][5*8][2]); | |||||
| uint8_t *direct_table; | uint8_t *direct_table; | ||||
| uint8_t direct_cache[5*8]; | uint8_t direct_cache[5*8]; | ||||
| @@ -25,8 +25,8 @@ | |||||
| */ | */ | ||||
| static void H264_CHROMA_MC8_TMPL(uint8_t *dst/*align 8*/, uint8_t *src/*align 1*/, int stride, int h, int x, int y) | static void H264_CHROMA_MC8_TMPL(uint8_t *dst/*align 8*/, uint8_t *src/*align 1*/, int stride, int h, int x, int y) | ||||
| { | { | ||||
| uint64_t AA __align8; | |||||
| uint64_t DD __align8; | |||||
| DECLARE_ALIGNED_8(uint64_t, AA); | |||||
| DECLARE_ALIGNED_8(uint64_t, DD); | |||||
| int i; | int i; | ||||
| if(y==0 && x==0) { | if(y==0 && x==0) { | ||||
| @@ -242,8 +242,8 @@ static void H264_CHROMA_MC8_TMPL(uint8_t *dst/*align 8*/, uint8_t *src/*align 1* | |||||
| static void H264_CHROMA_MC4_TMPL(uint8_t *dst/*align 8*/, uint8_t *src/*align 1*/, int stride, int h, int x, int y) | static void H264_CHROMA_MC4_TMPL(uint8_t *dst/*align 8*/, uint8_t *src/*align 1*/, int stride, int h, int x, int y) | ||||
| { | { | ||||
| uint64_t AA __align8; | |||||
| uint64_t DD __align8; | |||||
| DECLARE_ALIGNED_8(uint64_t, AA); | |||||
| DECLARE_ALIGNED_8(uint64_t, DD); | |||||
| int i; | int i; | ||||
| /* no special case for mv=(0,0) in 4x*, since it's much less common than in 8x*. | /* no special case for mv=(0,0) in 4x*, since it's much less common than in 8x*. | ||||
| @@ -303,7 +303,7 @@ void put_pixels_clamped_mmx(const DCTELEM *block, uint8_t *pixels, int line_size | |||||
| :"memory"); | :"memory"); | ||||
| } | } | ||||
| static const unsigned char __align8 vector128[8] = | |||||
| static DECLARE_ALIGNED_8(const unsigned char, vector128[8]) = | |||||
| { 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80 }; | { 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80 }; | ||||
| void put_signed_pixels_clamped_mmx(const DCTELEM *block, uint8_t *pixels, int line_size) | void put_signed_pixels_clamped_mmx(const DCTELEM *block, uint8_t *pixels, int line_size) | ||||
| @@ -1546,7 +1546,7 @@ static void sub_hfyu_median_prediction_mmx2(uint8_t *dst, uint8_t *src1, uint8_t | |||||
| "movq "#d", "#o"+48(%1) \n\t"\ | "movq "#d", "#o"+48(%1) \n\t"\ | ||||
| static int hadamard8_diff_mmx(void *s, uint8_t *src1, uint8_t *src2, int stride, int h){ | static int hadamard8_diff_mmx(void *s, uint8_t *src1, uint8_t *src2, int stride, int h){ | ||||
| uint64_t temp[16] __align8; | |||||
| DECLARE_ALIGNED_8(uint64_t, temp[16]); | |||||
| int sum=0; | int sum=0; | ||||
| assert(h==8); | assert(h==8); | ||||
| @@ -1633,7 +1633,7 @@ static int hadamard8_diff_mmx(void *s, uint8_t *src1, uint8_t *src2, int stride, | |||||
| } | } | ||||
| static int hadamard8_diff_mmx2(void *s, uint8_t *src1, uint8_t *src2, int stride, int h){ | static int hadamard8_diff_mmx2(void *s, uint8_t *src1, uint8_t *src2, int stride, int h){ | ||||
| uint64_t temp[16] __align8; | |||||
| DECLARE_ALIGNED_8(uint64_t, temp[16]); | |||||
| int sum=0; | int sum=0; | ||||
| assert(h==8); | assert(h==8); | ||||
| @@ -51,7 +51,7 @@ static int RENAME(dct_quantize)(MpegEncContext *s, | |||||
| long last_non_zero_p1; | long last_non_zero_p1; | ||||
| int level=0, q; //=0 is cuz gcc says uninitalized ... | int level=0, q; //=0 is cuz gcc says uninitalized ... | ||||
| const uint16_t *qmat, *bias; | const uint16_t *qmat, *bias; | ||||
| __align8 int16_t temp_block[64]; | |||||
| DECLARE_ALIGNED_8(int16_t, temp_block[64]); | |||||
| assert((7&(int)(&temp_block[0])) == 0); //did gcc align it correctly? | assert((7&(int)(&temp_block[0])) == 0); //did gcc align it correctly? | ||||
| @@ -24,7 +24,7 @@ | |||||
| #include "../dsputil.h" | #include "../dsputil.h" | ||||
| #include "mmx.h" | #include "mmx.h" | ||||
| static const unsigned short __align16 SSE2_dequant_const[] = | |||||
| static DECLARE_ALIGNED_16(const unsigned short, SSE2_dequant_const[]) = | |||||
| { | { | ||||
| 0,65535,65535,0,0,0,0,0, // 0x0000 0000 0000 0000 0000 FFFF FFFF 0000 | 0,65535,65535,0,0,0,0,0, // 0x0000 0000 0000 0000 0000 FFFF FFFF 0000 | ||||
| 0,0,0,0,65535,65535,0,0, // 0x0000 0000 FFFF FFFF 0000 0000 0000 0000 | 0,0,0,0,65535,65535,0,0, // 0x0000 0000 FFFF FFFF 0000 0000 0000 0000 | ||||
| @@ -35,7 +35,7 @@ static const unsigned short __align16 SSE2_dequant_const[] = | |||||
| 0,0,65535,65535, 0,0,0,0 // 0x0000 0000 0000 0000 FFFF FFFF 0000 0000 | 0,0,65535,65535, 0,0,0,0 // 0x0000 0000 0000 0000 FFFF FFFF 0000 0000 | ||||
| }; | }; | ||||
| static const unsigned int __align16 eight_data[] = | |||||
| static DECLARE_ALIGNED_16(const unsigned int, eight_data[]) = | |||||
| { | { | ||||
| 0x00080008, | 0x00080008, | ||||
| 0x00080008, | 0x00080008, | ||||
| @@ -43,7 +43,7 @@ static const unsigned int __align16 eight_data[] = | |||||
| 0x00080008 | 0x00080008 | ||||
| }; | }; | ||||
| static const unsigned short __align16 SSE2_idct_data[7 * 8] = | |||||
| static DECLARE_ALIGNED_16(const unsigned short, SSE2_idct_data[7 * 8]) = | |||||
| { | { | ||||
| 64277,64277,64277,64277,64277,64277,64277,64277, | 64277,64277,64277,64277,64277,64277,64277,64277, | ||||
| 60547,60547,60547,60547,60547,60547,60547,60547, | 60547,60547,60547,60547,60547,60547,60547,60547, | ||||
| @@ -50,8 +50,8 @@ struct ImgReSampleContext { | |||||
| int padtop, padbottom, padleft, padright; | int padtop, padbottom, padleft, padright; | ||||
| int pad_owidth, pad_oheight; | int pad_owidth, pad_oheight; | ||||
| int h_incr, v_incr; | int h_incr, v_incr; | ||||
| int16_t h_filters[NB_PHASES][NB_TAPS] __align8; /* horizontal filters */ | |||||
| int16_t v_filters[NB_PHASES][NB_TAPS] __align8; /* vertical filters */ | |||||
| DECLARE_ALIGNED_8(int16_t, h_filters[NB_PHASES][NB_TAPS]); /* horizontal filters */ | |||||
| DECLARE_ALIGNED_8(int16_t, v_filters[NB_PHASES][NB_TAPS]); /* vertical filters */ | |||||
| uint8_t *line_buf; | uint8_t *line_buf; | ||||
| }; | }; | ||||
| @@ -45,9 +45,9 @@ typedef struct MDECContext{ | |||||
| int mb_width; | int mb_width; | ||||
| int mb_height; | int mb_height; | ||||
| int mb_x, mb_y; | int mb_x, mb_y; | ||||
| DCTELEM __align8 block[6][64]; | |||||
| uint16_t __align8 intra_matrix[64]; | |||||
| int __align8 q_intra_matrix[64]; | |||||
| DECLARE_ALIGNED_8(DCTELEM, block[6][64]); | |||||
| DECLARE_ALIGNED_8(uint16_t, intra_matrix[64]); | |||||
| DECLARE_ALIGNED_8(int, q_intra_matrix[64]); | |||||
| uint8_t *bitstream_buffer; | uint8_t *bitstream_buffer; | ||||
| unsigned int bitstream_buffer_size; | unsigned int bitstream_buffer_size; | ||||
| int block_last_index[6]; | int block_last_index[6]; | ||||
| @@ -873,7 +873,7 @@ typedef struct MJpegDecodeContext { | |||||
| AVFrame picture; /* picture structure */ | AVFrame picture; /* picture structure */ | ||||
| int linesize[MAX_COMPONENTS]; ///< linesize << interlaced | int linesize[MAX_COMPONENTS]; ///< linesize << interlaced | ||||
| int8_t *qscale_table; | int8_t *qscale_table; | ||||
| DCTELEM block[64] __align8; | |||||
| DECLARE_ALIGNED_8(DCTELEM, block[64]); | |||||
| ScanTable scantable; | ScanTable scantable; | ||||
| void (*idct_put)(uint8_t *dest/*align 8*/, int line_size, DCTELEM *block/*align 16*/); | void (*idct_put)(uint8_t *dest/*align 8*/, int line_size, DCTELEM *block/*align 16*/); | ||||
| @@ -5963,7 +5963,7 @@ static int dct_quantize_refine(MpegEncContext *s, //FIXME breaks denoise? | |||||
| DCTELEM *block, int16_t *weight, DCTELEM *orig, | DCTELEM *block, int16_t *weight, DCTELEM *orig, | ||||
| int n, int qscale){ | int n, int qscale){ | ||||
| int16_t rem[64]; | int16_t rem[64]; | ||||
| DCTELEM d1[64] __align16; | |||||
| DECLARE_ALIGNED_16(DCTELEM, d1[64]); | |||||
| const int *qmat; | const int *qmat; | ||||
| const uint8_t *scantable= s->intra_scantable.scantable; | const uint8_t *scantable= s->intra_scantable.scantable; | ||||
| const uint8_t *perm_scantable= s->intra_scantable.permutated; | const uint8_t *perm_scantable= s->intra_scantable.permutated; | ||||
| @@ -134,7 +134,7 @@ typedef struct ScanTable{ | |||||
| uint8_t raster_end[64]; | uint8_t raster_end[64]; | ||||
| #ifdef ARCH_POWERPC | #ifdef ARCH_POWERPC | ||||
| /** Used by dct_quantise_alitvec to find last-non-zero */ | /** Used by dct_quantise_alitvec to find last-non-zero */ | ||||
| uint8_t __align8 inverse[64]; | |||||
| DECLARE_ALIGNED_8(uint8_t, inverse[64]); | |||||
| #endif | #endif | ||||
| } ScanTable; | } ScanTable; | ||||
| @@ -494,7 +494,7 @@ typedef struct MpegEncContext { | |||||
| uint16_t (*q_inter_matrix16)[2][64]; | uint16_t (*q_inter_matrix16)[2][64]; | ||||
| int block_last_index[12]; ///< last non zero coefficient in block | int block_last_index[12]; ///< last non zero coefficient in block | ||||
| /* scantables */ | /* scantables */ | ||||
| ScanTable __align8 intra_scantable; | |||||
| DECLARE_ALIGNED_8(ScanTable, intra_scantable); | |||||
| ScanTable intra_h_scantable; | ScanTable intra_h_scantable; | ||||
| ScanTable intra_v_scantable; | ScanTable intra_v_scantable; | ||||
| ScanTable inter_scantable; ///< if inter == intra then intra should be used to reduce tha cache usage | ScanTable inter_scantable; ///< if inter == intra then intra should be used to reduce tha cache usage | ||||
| @@ -71,7 +71,7 @@ static void OPNAME ## h264_qpel ## SIZE ## _mc00_ ## CODETYPE (uint8_t *dst, uin | |||||
| }\ | }\ | ||||
| \ | \ | ||||
| static void OPNAME ## h264_qpel ## SIZE ## _mc10_ ## CODETYPE(uint8_t *dst, uint8_t *src, int stride){ \ | static void OPNAME ## h264_qpel ## SIZE ## _mc10_ ## CODETYPE(uint8_t *dst, uint8_t *src, int stride){ \ | ||||
| uint64_t temp[SIZE*SIZE/8] __align16;\ | |||||
| DECLARE_ALIGNED_16(uint64_t, temp[SIZE*SIZE/8]);\ | |||||
| uint8_t * const half= (uint8_t*)temp;\ | uint8_t * const half= (uint8_t*)temp;\ | ||||
| put_h264_qpel ## SIZE ## _h_lowpass_ ## CODETYPE(half, src, SIZE, stride);\ | put_h264_qpel ## SIZE ## _h_lowpass_ ## CODETYPE(half, src, SIZE, stride);\ | ||||
| OPNAME ## pixels ## SIZE ## _l2_ ## CODETYPE(dst, src, half, stride, stride, SIZE);\ | OPNAME ## pixels ## SIZE ## _l2_ ## CODETYPE(dst, src, half, stride, stride, SIZE);\ | ||||
| @@ -82,14 +82,14 @@ static void OPNAME ## h264_qpel ## SIZE ## _mc20_ ## CODETYPE(uint8_t *dst, uint | |||||
| }\ | }\ | ||||
| \ | \ | ||||
| static void OPNAME ## h264_qpel ## SIZE ## _mc30_ ## CODETYPE(uint8_t *dst, uint8_t *src, int stride){\ | static void OPNAME ## h264_qpel ## SIZE ## _mc30_ ## CODETYPE(uint8_t *dst, uint8_t *src, int stride){\ | ||||
| uint64_t temp[SIZE*SIZE/8] __align16;\ | |||||
| DECLARE_ALIGNED_16(uint64_t, temp[SIZE*SIZE/8]);\ | |||||
| uint8_t * const half= (uint8_t*)temp;\ | uint8_t * const half= (uint8_t*)temp;\ | ||||
| put_h264_qpel ## SIZE ## _h_lowpass_ ## CODETYPE(half, src, SIZE, stride);\ | put_h264_qpel ## SIZE ## _h_lowpass_ ## CODETYPE(half, src, SIZE, stride);\ | ||||
| OPNAME ## pixels ## SIZE ## _l2_ ## CODETYPE(dst, src+1, half, stride, stride, SIZE);\ | OPNAME ## pixels ## SIZE ## _l2_ ## CODETYPE(dst, src+1, half, stride, stride, SIZE);\ | ||||
| }\ | }\ | ||||
| \ | \ | ||||
| static void OPNAME ## h264_qpel ## SIZE ## _mc01_ ## CODETYPE(uint8_t *dst, uint8_t *src, int stride){\ | static void OPNAME ## h264_qpel ## SIZE ## _mc01_ ## CODETYPE(uint8_t *dst, uint8_t *src, int stride){\ | ||||
| uint64_t temp[SIZE*SIZE/8] __align16;\ | |||||
| DECLARE_ALIGNED_16(uint64_t, temp[SIZE*SIZE/8]);\ | |||||
| uint8_t * const half= (uint8_t*)temp;\ | uint8_t * const half= (uint8_t*)temp;\ | ||||
| put_h264_qpel ## SIZE ## _v_lowpass_ ## CODETYPE(half, src, SIZE, stride);\ | put_h264_qpel ## SIZE ## _v_lowpass_ ## CODETYPE(half, src, SIZE, stride);\ | ||||
| OPNAME ## pixels ## SIZE ## _l2_ ## CODETYPE(dst, src, half, stride, stride, SIZE);\ | OPNAME ## pixels ## SIZE ## _l2_ ## CODETYPE(dst, src, half, stride, stride, SIZE);\ | ||||
| @@ -100,14 +100,14 @@ static void OPNAME ## h264_qpel ## SIZE ## _mc02_ ## CODETYPE(uint8_t *dst, uint | |||||
| }\ | }\ | ||||
| \ | \ | ||||
| static void OPNAME ## h264_qpel ## SIZE ## _mc03_ ## CODETYPE(uint8_t *dst, uint8_t *src, int stride){\ | static void OPNAME ## h264_qpel ## SIZE ## _mc03_ ## CODETYPE(uint8_t *dst, uint8_t *src, int stride){\ | ||||
| uint64_t temp[SIZE*SIZE/8] __align16;\ | |||||
| DECLARE_ALIGNED_16(uint64_t, temp[SIZE*SIZE/8]);\ | |||||
| uint8_t * const half= (uint8_t*)temp;\ | uint8_t * const half= (uint8_t*)temp;\ | ||||
| put_h264_qpel ## SIZE ## _v_lowpass_ ## CODETYPE(half, src, SIZE, stride);\ | put_h264_qpel ## SIZE ## _v_lowpass_ ## CODETYPE(half, src, SIZE, stride);\ | ||||
| OPNAME ## pixels ## SIZE ## _l2_ ## CODETYPE(dst, src+stride, half, stride, stride, SIZE);\ | OPNAME ## pixels ## SIZE ## _l2_ ## CODETYPE(dst, src+stride, half, stride, stride, SIZE);\ | ||||
| }\ | }\ | ||||
| \ | \ | ||||
| static void OPNAME ## h264_qpel ## SIZE ## _mc11_ ## CODETYPE(uint8_t *dst, uint8_t *src, int stride){\ | static void OPNAME ## h264_qpel ## SIZE ## _mc11_ ## CODETYPE(uint8_t *dst, uint8_t *src, int stride){\ | ||||
| uint64_t temp[SIZE*SIZE/4] __align16;\ | |||||
| DECLARE_ALIGNED_16(uint64_t, temp[SIZE*SIZE/4]);\ | |||||
| uint8_t * const halfH= (uint8_t*)temp;\ | uint8_t * const halfH= (uint8_t*)temp;\ | ||||
| uint8_t * const halfV= ((uint8_t*)temp) + SIZE*SIZE;\ | uint8_t * const halfV= ((uint8_t*)temp) + SIZE*SIZE;\ | ||||
| put_h264_qpel ## SIZE ## _h_lowpass_ ## CODETYPE(halfH, src, SIZE, stride);\ | put_h264_qpel ## SIZE ## _h_lowpass_ ## CODETYPE(halfH, src, SIZE, stride);\ | ||||
| @@ -116,7 +116,7 @@ static void OPNAME ## h264_qpel ## SIZE ## _mc11_ ## CODETYPE(uint8_t *dst, uint | |||||
| }\ | }\ | ||||
| \ | \ | ||||
| static void OPNAME ## h264_qpel ## SIZE ## _mc31_ ## CODETYPE(uint8_t *dst, uint8_t *src, int stride){\ | static void OPNAME ## h264_qpel ## SIZE ## _mc31_ ## CODETYPE(uint8_t *dst, uint8_t *src, int stride){\ | ||||
| uint64_t temp[SIZE*SIZE/4] __align16;\ | |||||
| DECLARE_ALIGNED_16(uint64_t, temp[SIZE*SIZE/4]);\ | |||||
| uint8_t * const halfH= (uint8_t*)temp;\ | uint8_t * const halfH= (uint8_t*)temp;\ | ||||
| uint8_t * const halfV= ((uint8_t*)temp) + SIZE*SIZE;\ | uint8_t * const halfV= ((uint8_t*)temp) + SIZE*SIZE;\ | ||||
| put_h264_qpel ## SIZE ## _h_lowpass_ ## CODETYPE(halfH, src, SIZE, stride);\ | put_h264_qpel ## SIZE ## _h_lowpass_ ## CODETYPE(halfH, src, SIZE, stride);\ | ||||
| @@ -125,7 +125,7 @@ static void OPNAME ## h264_qpel ## SIZE ## _mc31_ ## CODETYPE(uint8_t *dst, uint | |||||
| }\ | }\ | ||||
| \ | \ | ||||
| static void OPNAME ## h264_qpel ## SIZE ## _mc13_ ## CODETYPE(uint8_t *dst, uint8_t *src, int stride){\ | static void OPNAME ## h264_qpel ## SIZE ## _mc13_ ## CODETYPE(uint8_t *dst, uint8_t *src, int stride){\ | ||||
| uint64_t temp[SIZE*SIZE/4] __align16;\ | |||||
| DECLARE_ALIGNED_16(uint64_t, temp[SIZE*SIZE/4]);\ | |||||
| uint8_t * const halfH= (uint8_t*)temp;\ | uint8_t * const halfH= (uint8_t*)temp;\ | ||||
| uint8_t * const halfV= ((uint8_t*)temp) + SIZE*SIZE;\ | uint8_t * const halfV= ((uint8_t*)temp) + SIZE*SIZE;\ | ||||
| put_h264_qpel ## SIZE ## _h_lowpass_ ## CODETYPE(halfH, src + stride, SIZE, stride);\ | put_h264_qpel ## SIZE ## _h_lowpass_ ## CODETYPE(halfH, src + stride, SIZE, stride);\ | ||||
| @@ -134,7 +134,7 @@ static void OPNAME ## h264_qpel ## SIZE ## _mc13_ ## CODETYPE(uint8_t *dst, uint | |||||
| }\ | }\ | ||||
| \ | \ | ||||
| static void OPNAME ## h264_qpel ## SIZE ## _mc33_ ## CODETYPE(uint8_t *dst, uint8_t *src, int stride){\ | static void OPNAME ## h264_qpel ## SIZE ## _mc33_ ## CODETYPE(uint8_t *dst, uint8_t *src, int stride){\ | ||||
| uint64_t temp[SIZE*SIZE/4] __align16;\ | |||||
| DECLARE_ALIGNED_16(uint64_t, temp[SIZE*SIZE/4]);\ | |||||
| uint8_t * const halfH= (uint8_t*)temp;\ | uint8_t * const halfH= (uint8_t*)temp;\ | ||||
| uint8_t * const halfV= ((uint8_t*)temp) + SIZE*SIZE;\ | uint8_t * const halfV= ((uint8_t*)temp) + SIZE*SIZE;\ | ||||
| put_h264_qpel ## SIZE ## _h_lowpass_ ## CODETYPE(halfH, src + stride, SIZE, stride);\ | put_h264_qpel ## SIZE ## _h_lowpass_ ## CODETYPE(halfH, src + stride, SIZE, stride);\ | ||||
| @@ -143,13 +143,13 @@ static void OPNAME ## h264_qpel ## SIZE ## _mc33_ ## CODETYPE(uint8_t *dst, uint | |||||
| }\ | }\ | ||||
| \ | \ | ||||
| static void OPNAME ## h264_qpel ## SIZE ## _mc22_ ## CODETYPE(uint8_t *dst, uint8_t *src, int stride){\ | static void OPNAME ## h264_qpel ## SIZE ## _mc22_ ## CODETYPE(uint8_t *dst, uint8_t *src, int stride){\ | ||||
| uint64_t temp[SIZE*(SIZE+8)/4] __align16;\ | |||||
| DECLARE_ALIGNED_16(uint64_t, temp[SIZE*(SIZE+8)/4]);\ | |||||
| int16_t * const tmp= (int16_t*)temp;\ | int16_t * const tmp= (int16_t*)temp;\ | ||||
| OPNAME ## h264_qpel ## SIZE ## _hv_lowpass_ ## CODETYPE(dst, tmp, src, stride, SIZE, stride);\ | OPNAME ## h264_qpel ## SIZE ## _hv_lowpass_ ## CODETYPE(dst, tmp, src, stride, SIZE, stride);\ | ||||
| }\ | }\ | ||||
| \ | \ | ||||
| static void OPNAME ## h264_qpel ## SIZE ## _mc21_ ## CODETYPE(uint8_t *dst, uint8_t *src, int stride){\ | static void OPNAME ## h264_qpel ## SIZE ## _mc21_ ## CODETYPE(uint8_t *dst, uint8_t *src, int stride){\ | ||||
| uint64_t temp[SIZE*(SIZE+8)/4 + SIZE*SIZE/4] __align16;\ | |||||
| DECLARE_ALIGNED_16(uint64_t, temp[SIZE*(SIZE+8)/4 + SIZE*SIZE/4]);\ | |||||
| uint8_t * const halfH= (uint8_t*)temp;\ | uint8_t * const halfH= (uint8_t*)temp;\ | ||||
| uint8_t * const halfHV= ((uint8_t*)temp) + SIZE*SIZE;\ | uint8_t * const halfHV= ((uint8_t*)temp) + SIZE*SIZE;\ | ||||
| int16_t * const tmp= ((int16_t*)temp) + SIZE*SIZE;\ | int16_t * const tmp= ((int16_t*)temp) + SIZE*SIZE;\ | ||||
| @@ -159,7 +159,7 @@ static void OPNAME ## h264_qpel ## SIZE ## _mc21_ ## CODETYPE(uint8_t *dst, uint | |||||
| }\ | }\ | ||||
| \ | \ | ||||
| static void OPNAME ## h264_qpel ## SIZE ## _mc23_ ## CODETYPE(uint8_t *dst, uint8_t *src, int stride){\ | static void OPNAME ## h264_qpel ## SIZE ## _mc23_ ## CODETYPE(uint8_t *dst, uint8_t *src, int stride){\ | ||||
| uint64_t temp[SIZE*(SIZE+8)/4 + SIZE*SIZE/4] __align16;\ | |||||
| DECLARE_ALIGNED_16(uint64_t, temp[SIZE*(SIZE+8)/4 + SIZE*SIZE/4]);\ | |||||
| uint8_t * const halfH= (uint8_t*)temp;\ | uint8_t * const halfH= (uint8_t*)temp;\ | ||||
| uint8_t * const halfHV= ((uint8_t*)temp) + SIZE*SIZE;\ | uint8_t * const halfHV= ((uint8_t*)temp) + SIZE*SIZE;\ | ||||
| int16_t * const tmp= ((int16_t*)temp) + SIZE*SIZE;\ | int16_t * const tmp= ((int16_t*)temp) + SIZE*SIZE;\ | ||||
| @@ -169,7 +169,7 @@ static void OPNAME ## h264_qpel ## SIZE ## _mc23_ ## CODETYPE(uint8_t *dst, uint | |||||
| }\ | }\ | ||||
| \ | \ | ||||
| static void OPNAME ## h264_qpel ## SIZE ## _mc12_ ## CODETYPE(uint8_t *dst, uint8_t *src, int stride){\ | static void OPNAME ## h264_qpel ## SIZE ## _mc12_ ## CODETYPE(uint8_t *dst, uint8_t *src, int stride){\ | ||||
| uint64_t temp[SIZE*(SIZE+8)/4 + SIZE*SIZE/4] __align16;\ | |||||
| DECLARE_ALIGNED_16(uint64_t, temp[SIZE*(SIZE+8)/4 + SIZE*SIZE/4]);\ | |||||
| uint8_t * const halfV= (uint8_t*)temp;\ | uint8_t * const halfV= (uint8_t*)temp;\ | ||||
| uint8_t * const halfHV= ((uint8_t*)temp) + SIZE*SIZE;\ | uint8_t * const halfHV= ((uint8_t*)temp) + SIZE*SIZE;\ | ||||
| int16_t * const tmp= ((int16_t*)temp) + SIZE*SIZE;\ | int16_t * const tmp= ((int16_t*)temp) + SIZE*SIZE;\ | ||||
| @@ -179,7 +179,7 @@ static void OPNAME ## h264_qpel ## SIZE ## _mc12_ ## CODETYPE(uint8_t *dst, uint | |||||
| }\ | }\ | ||||
| \ | \ | ||||
| static void OPNAME ## h264_qpel ## SIZE ## _mc32_ ## CODETYPE(uint8_t *dst, uint8_t *src, int stride){\ | static void OPNAME ## h264_qpel ## SIZE ## _mc32_ ## CODETYPE(uint8_t *dst, uint8_t *src, int stride){\ | ||||
| uint64_t temp[SIZE*(SIZE+8)/4 + SIZE*SIZE/4] __align16;\ | |||||
| DECLARE_ALIGNED_16(uint64_t, temp[SIZE*(SIZE+8)/4 + SIZE*SIZE/4]);\ | |||||
| uint8_t * const halfV= (uint8_t*)temp;\ | uint8_t * const halfV= (uint8_t*)temp;\ | ||||
| uint8_t * const halfHV= ((uint8_t*)temp) + SIZE*SIZE;\ | uint8_t * const halfHV= ((uint8_t*)temp) + SIZE*SIZE;\ | ||||
| int16_t * const tmp= ((int16_t*)temp) + SIZE*SIZE;\ | int16_t * const tmp= ((int16_t*)temp) + SIZE*SIZE;\ | ||||
| @@ -285,9 +285,9 @@ typedef struct Vp3DecodeContext { | |||||
| /* these arrays need to be on 16-byte boundaries since SSE2 operations | /* these arrays need to be on 16-byte boundaries since SSE2 operations | ||||
| * index into them */ | * index into them */ | ||||
| int16_t __align16 intra_y_dequant[64]; | |||||
| int16_t __align16 intra_c_dequant[64]; | |||||
| int16_t __align16 inter_dequant[64]; | |||||
| DECLARE_ALIGNED_16(int16_t, intra_y_dequant[64]); | |||||
| DECLARE_ALIGNED_16(int16_t, intra_c_dequant[64]); | |||||
| DECLARE_ALIGNED_16(int16_t, inter_dequant[64]); | |||||
| /* This table contains superblock_count * 16 entries. Each set of 16 | /* This table contains superblock_count * 16 entries. Each set of 16 | ||||
| * numbers corresponds to the fragment indices 0..15 of the superblock. | * numbers corresponds to the fragment indices 0..15 of the superblock. | ||||
| @@ -1711,7 +1711,7 @@ static void render_slice(Vp3DecodeContext *s, int slice) | |||||
| int m, n; | int m, n; | ||||
| int i; /* indicates current fragment */ | int i; /* indicates current fragment */ | ||||
| int16_t *dequantizer; | int16_t *dequantizer; | ||||
| DCTELEM __align16 block[64]; | |||||
| DECLARE_ALIGNED_16(DCTELEM, block[64]); | |||||
| unsigned char *output_plane; | unsigned char *output_plane; | ||||
| unsigned char *last_plane; | unsigned char *last_plane; | ||||
| unsigned char *golden_plane; | unsigned char *golden_plane; | ||||
| @@ -102,15 +102,15 @@ typedef struct WMADecodeContext { | |||||
| int block_pos; /* current position in frame */ | int block_pos; /* current position in frame */ | ||||
| uint8_t ms_stereo; /* true if mid/side stereo mode */ | uint8_t ms_stereo; /* true if mid/side stereo mode */ | ||||
| uint8_t channel_coded[MAX_CHANNELS]; /* true if channel is coded */ | uint8_t channel_coded[MAX_CHANNELS]; /* true if channel is coded */ | ||||
| float exponents[MAX_CHANNELS][BLOCK_MAX_SIZE] __attribute__((aligned(16))); | |||||
| DECLARE_ALIGNED_16(float, exponents[MAX_CHANNELS][BLOCK_MAX_SIZE]); | |||||
| float max_exponent[MAX_CHANNELS]; | float max_exponent[MAX_CHANNELS]; | ||||
| int16_t coefs1[MAX_CHANNELS][BLOCK_MAX_SIZE]; | int16_t coefs1[MAX_CHANNELS][BLOCK_MAX_SIZE]; | ||||
| float coefs[MAX_CHANNELS][BLOCK_MAX_SIZE] __attribute__((aligned(16))); | |||||
| DECLARE_ALIGNED_16(float, coefs[MAX_CHANNELS][BLOCK_MAX_SIZE]); | |||||
| MDCTContext mdct_ctx[BLOCK_NB_SIZES]; | MDCTContext mdct_ctx[BLOCK_NB_SIZES]; | ||||
| float *windows[BLOCK_NB_SIZES]; | float *windows[BLOCK_NB_SIZES]; | ||||
| FFTSample mdct_tmp[BLOCK_MAX_SIZE] __attribute__((aligned(16))); /* temporary storage for imdct */ | |||||
| DECLARE_ALIGNED_16(FFTSample, mdct_tmp[BLOCK_MAX_SIZE]); /* temporary storage for imdct */ | |||||
| /* output buffer for one frame and the last for IMDCT windowing */ | /* output buffer for one frame and the last for IMDCT windowing */ | ||||
| float frame_out[MAX_CHANNELS][BLOCK_MAX_SIZE * 2] __attribute__((aligned(16))); | |||||
| DECLARE_ALIGNED_16(float, frame_out[MAX_CHANNELS][BLOCK_MAX_SIZE * 2]); | |||||
| /* last frame info */ | /* last frame info */ | ||||
| uint8_t last_superframe[MAX_CODED_SUPERFRAME_SIZE + 4]; /* padding added */ | uint8_t last_superframe[MAX_CODED_SUPERFRAME_SIZE + 4]; /* padding added */ | ||||
| int last_bitoffset; | int last_bitoffset; | ||||
| @@ -1097,7 +1097,7 @@ static int wma_decode_block(WMADecodeContext *s) | |||||
| for(ch = 0; ch < s->nb_channels; ch++) { | for(ch = 0; ch < s->nb_channels; ch++) { | ||||
| if (s->channel_coded[ch]) { | if (s->channel_coded[ch]) { | ||||
| FFTSample output[BLOCK_MAX_SIZE * 2] __attribute__((aligned(16))); | |||||
| DECLARE_ALIGNED_16(FFTSample, output[BLOCK_MAX_SIZE * 2]); | |||||
| float *ptr; | float *ptr; | ||||
| int i, n4, index, n; | int i, n4, index, n; | ||||
| @@ -49,7 +49,7 @@ typedef struct Wmv2Context{ | |||||
| int hshift; | int hshift; | ||||
| ScanTable abt_scantable[2]; | ScanTable abt_scantable[2]; | ||||
| DCTELEM abt_block2[6][64] __align8; | |||||
| DECLARE_ALIGNED_8(DCTELEM, abt_block2[6][64]); | |||||
| }Wmv2Context; | }Wmv2Context; | ||||
| static void wmv2_common_init(Wmv2Context * w){ | static void wmv2_common_init(Wmv2Context * w){ | ||||