Originally committed as revision 22267 to svn://svn.ffmpeg.org/ffmpeg/trunktags/v0.6
| @@ -23,6 +23,7 @@ | |||
| */ | |||
| #include "config.h" | |||
| #include "dsputil_altivec.h" | |||
| #ifdef __APPLE__ | |||
| #undef _POSIX_C_SOURCE | |||
| @@ -28,8 +28,9 @@ | |||
| #include "dsputil_ppc.h" | |||
| #include "util_altivec.h" | |||
| #include "types_altivec.h" | |||
| #include "dsputil_altivec.h" | |||
| int sad16_x2_altivec(void *v, uint8_t *pix1, uint8_t *pix2, int line_size, int h) | |||
| static int sad16_x2_altivec(void *v, uint8_t *pix1, uint8_t *pix2, int line_size, int h) | |||
| { | |||
| int i; | |||
| int s; | |||
| @@ -74,7 +75,7 @@ int sad16_x2_altivec(void *v, uint8_t *pix1, uint8_t *pix2, int line_size, int h | |||
| return s; | |||
| } | |||
| int sad16_y2_altivec(void *v, uint8_t *pix1, uint8_t *pix2, int line_size, int h) | |||
| static int sad16_y2_altivec(void *v, uint8_t *pix1, uint8_t *pix2, int line_size, int h) | |||
| { | |||
| int i; | |||
| int s; | |||
| @@ -130,7 +131,7 @@ int sad16_y2_altivec(void *v, uint8_t *pix1, uint8_t *pix2, int line_size, int h | |||
| return s; | |||
| } | |||
| int sad16_xy2_altivec(void *v, uint8_t *pix1, uint8_t *pix2, int line_size, int h) | |||
| static int sad16_xy2_altivec(void *v, uint8_t *pix1, uint8_t *pix2, int line_size, int h) | |||
| { | |||
| int i; | |||
| int s; | |||
| @@ -225,7 +226,7 @@ int sad16_xy2_altivec(void *v, uint8_t *pix1, uint8_t *pix2, int line_size, int | |||
| return s; | |||
| } | |||
| int sad16_altivec(void *v, uint8_t *pix1, uint8_t *pix2, int line_size, int h) | |||
| static int sad16_altivec(void *v, uint8_t *pix1, uint8_t *pix2, int line_size, int h) | |||
| { | |||
| int i; | |||
| int s; | |||
| @@ -267,7 +268,7 @@ int sad16_altivec(void *v, uint8_t *pix1, uint8_t *pix2, int line_size, int h) | |||
| return s; | |||
| } | |||
| int sad8_altivec(void *v, uint8_t *pix1, uint8_t *pix2, int line_size, int h) | |||
| static int sad8_altivec(void *v, uint8_t *pix1, uint8_t *pix2, int line_size, int h) | |||
| { | |||
| int i; | |||
| int s; | |||
| @@ -312,7 +313,7 @@ int sad8_altivec(void *v, uint8_t *pix1, uint8_t *pix2, int line_size, int h) | |||
| return s; | |||
| } | |||
| int pix_norm1_altivec(uint8_t *pix, int line_size) | |||
| static int pix_norm1_altivec(uint8_t *pix, int line_size) | |||
| { | |||
| int i; | |||
| int s; | |||
| @@ -348,7 +349,7 @@ int pix_norm1_altivec(uint8_t *pix, int line_size) | |||
| * AltiVec-enhanced. | |||
| * It's the sad8_altivec code above w/ squaring added. | |||
| */ | |||
| int sse8_altivec(void *v, uint8_t *pix1, uint8_t *pix2, int line_size, int h) | |||
| static int sse8_altivec(void *v, uint8_t *pix1, uint8_t *pix2, int line_size, int h) | |||
| { | |||
| int i; | |||
| int s; | |||
| @@ -402,7 +403,7 @@ int sse8_altivec(void *v, uint8_t *pix1, uint8_t *pix2, int line_size, int h) | |||
| * AltiVec-enhanced. | |||
| * It's the sad16_altivec code above w/ squaring added. | |||
| */ | |||
| int sse16_altivec(void *v, uint8_t *pix1, uint8_t *pix2, int line_size, int h) | |||
| static int sse16_altivec(void *v, uint8_t *pix1, uint8_t *pix2, int line_size, int h) | |||
| { | |||
| int i; | |||
| int s; | |||
| @@ -446,7 +447,7 @@ int sse16_altivec(void *v, uint8_t *pix1, uint8_t *pix2, int line_size, int h) | |||
| return s; | |||
| } | |||
| int pix_sum_altivec(uint8_t * pix, int line_size) | |||
| static int pix_sum_altivec(uint8_t * pix, int line_size) | |||
| { | |||
| const vector unsigned int zero = (const vector unsigned int)vec_splat_u32(0); | |||
| vector unsigned char perm, *pixv; | |||
| @@ -479,7 +480,7 @@ int pix_sum_altivec(uint8_t * pix, int line_size) | |||
| return s; | |||
| } | |||
| void get_pixels_altivec(DCTELEM *restrict block, const uint8_t *pixels, int line_size) | |||
| static void get_pixels_altivec(DCTELEM *restrict block, const uint8_t *pixels, int line_size) | |||
| { | |||
| int i; | |||
| vector unsigned char perm, bytes, *pixv; | |||
| @@ -504,7 +505,7 @@ void get_pixels_altivec(DCTELEM *restrict block, const uint8_t *pixels, int line | |||
| } | |||
| } | |||
| void diff_pixels_altivec(DCTELEM *restrict block, const uint8_t *s1, | |||
| static void diff_pixels_altivec(DCTELEM *restrict block, const uint8_t *s1, | |||
| const uint8_t *s2, int stride) | |||
| { | |||
| int i; | |||
| @@ -589,7 +590,7 @@ static void clear_block_altivec(DCTELEM *block) { | |||
| } | |||
| void add_bytes_altivec(uint8_t *dst, uint8_t *src, int w) { | |||
| static void add_bytes_altivec(uint8_t *dst, uint8_t *src, int w) { | |||
| register int i; | |||
| register vector unsigned char vdst, vsrc; | |||
| @@ -687,7 +688,7 @@ POWERPC_PERF_STOP_COUNT(altivec_avg_pixels16_num, 1); | |||
| } | |||
| /* next one assumes that ((line_size % 8) == 0) */ | |||
| void avg_pixels8_altivec(uint8_t * block, const uint8_t * pixels, int line_size, int h) | |||
| static void avg_pixels8_altivec(uint8_t * block, const uint8_t * pixels, int line_size, int h) | |||
| { | |||
| POWERPC_PERF_DECLARE(altivec_avg_pixels8_num, 1); | |||
| register vector unsigned char pixelsv1, pixelsv2, pixelsv, blockv; | |||
| @@ -723,7 +724,7 @@ POWERPC_PERF_STOP_COUNT(altivec_avg_pixels8_num, 1); | |||
| } | |||
| /* next one assumes that ((line_size % 8) == 0) */ | |||
| void put_pixels8_xy2_altivec(uint8_t *block, const uint8_t *pixels, int line_size, int h) | |||
| static void put_pixels8_xy2_altivec(uint8_t *block, const uint8_t *pixels, int line_size, int h) | |||
| { | |||
| POWERPC_PERF_DECLARE(altivec_put_pixels8_xy2_num, 1); | |||
| register int i; | |||
| @@ -786,7 +787,7 @@ POWERPC_PERF_STOP_COUNT(altivec_put_pixels8_xy2_num, 1); | |||
| } | |||
| /* next one assumes that ((line_size % 8) == 0) */ | |||
| void put_no_rnd_pixels8_xy2_altivec(uint8_t *block, const uint8_t *pixels, int line_size, int h) | |||
| static void put_no_rnd_pixels8_xy2_altivec(uint8_t *block, const uint8_t *pixels, int line_size, int h) | |||
| { | |||
| POWERPC_PERF_DECLARE(altivec_put_no_rnd_pixels8_xy2_num, 1); | |||
| register int i; | |||
| @@ -850,7 +851,7 @@ POWERPC_PERF_STOP_COUNT(altivec_put_no_rnd_pixels8_xy2_num, 1); | |||
| } | |||
| /* next one assumes that ((line_size % 16) == 0) */ | |||
| void put_pixels16_xy2_altivec(uint8_t * block, const uint8_t * pixels, int line_size, int h) | |||
| static void put_pixels16_xy2_altivec(uint8_t * block, const uint8_t * pixels, int line_size, int h) | |||
| { | |||
| POWERPC_PERF_DECLARE(altivec_put_pixels16_xy2_num, 1); | |||
| register int i; | |||
| @@ -923,7 +924,7 @@ POWERPC_PERF_STOP_COUNT(altivec_put_pixels16_xy2_num, 1); | |||
| } | |||
| /* next one assumes that ((line_size % 16) == 0) */ | |||
| void put_no_rnd_pixels16_xy2_altivec(uint8_t * block, const uint8_t * pixels, int line_size, int h) | |||
| static void put_no_rnd_pixels16_xy2_altivec(uint8_t * block, const uint8_t * pixels, int line_size, int h) | |||
| { | |||
| POWERPC_PERF_DECLARE(altivec_put_no_rnd_pixels16_xy2_num, 1); | |||
| register int i; | |||
| @@ -996,7 +997,7 @@ POWERPC_PERF_START_COUNT(altivec_put_no_rnd_pixels16_xy2_num, 1); | |||
| POWERPC_PERF_STOP_COUNT(altivec_put_no_rnd_pixels16_xy2_num, 1); | |||
| } | |||
| int hadamard8_diff8x8_altivec(/*MpegEncContext*/ void *s, uint8_t *dst, uint8_t *src, int stride, int h){ | |||
| static int hadamard8_diff8x8_altivec(/*MpegEncContext*/ void *s, uint8_t *dst, uint8_t *src, int stride, int h){ | |||
| POWERPC_PERF_DECLARE(altivec_hadamard8_diff8x8_num, 1); | |||
| int sum; | |||
| register const vector unsigned char vzero = | |||
| @@ -1317,7 +1318,7 @@ static int hadamard8_diff16x8_altivec(/*MpegEncContext*/ void *s, uint8_t *dst, | |||
| return sum; | |||
| } | |||
| int hadamard8_diff16_altivec(/*MpegEncContext*/ void *s, uint8_t *dst, uint8_t *src, int stride, int h){ | |||
| static int hadamard8_diff16_altivec(/*MpegEncContext*/ void *s, uint8_t *dst, uint8_t *src, int stride, int h){ | |||
| POWERPC_PERF_DECLARE(altivec_hadamard8_diff16_num, 1); | |||
| int score; | |||
| POWERPC_PERF_START_COUNT(altivec_hadamard8_diff16_num, 1); | |||
| @@ -1355,7 +1356,7 @@ static void vorbis_inverse_coupling_altivec(float *mag, float *ang, | |||
| } | |||
| /* next one assumes that ((line_size % 8) == 0) */ | |||
| void avg_pixels8_xy2_altivec(uint8_t *block, const uint8_t *pixels, int line_size, int h) | |||
| static void avg_pixels8_xy2_altivec(uint8_t *block, const uint8_t *pixels, int line_size, int h) | |||
| { | |||
| POWERPC_PERF_DECLARE(altivec_avg_pixels8_xy2_num, 1); | |||
| register int i; | |||
| @@ -24,11 +24,29 @@ | |||
| #define AVCODEC_PPC_DSPUTIL_ALTIVEC_H | |||
| #include <stdint.h> | |||
| int has_altivec(void); | |||
| #include "libavcodec/dsputil.h" | |||
| void put_pixels16_altivec(uint8_t *block, const uint8_t *pixels, int line_size, int h); | |||
| void avg_pixels16_altivec(uint8_t *block, const uint8_t *pixels, int line_size, int h); | |||
| int has_altivec(void); | |||
| void fdct_altivec(int16_t *block); | |||
| void gmc1_altivec(uint8_t *dst, uint8_t *src, int stride, int h, | |||
| int x16, int y16, int rounder); | |||
| void idct_put_altivec(uint8_t *dest, int line_size, int16_t *block); | |||
| void idct_add_altivec(uint8_t *dest, int line_size, int16_t *block); | |||
| void ff_vp3_idct_altivec(DCTELEM *block); | |||
| void ff_vp3_idct_put_altivec(uint8_t *dest, int line_size, DCTELEM *block); | |||
| void ff_vp3_idct_add_altivec(uint8_t *dest, int line_size, DCTELEM *block); | |||
| void dsputil_h264_init_ppc(DSPContext* c, AVCodecContext *avctx); | |||
| void dsputil_init_altivec(DSPContext* c, AVCodecContext *avctx); | |||
| void vc1dsp_init_altivec(DSPContext* c, AVCodecContext *avctx); | |||
| void float_init_altivec(DSPContext* c, AVCodecContext *avctx); | |||
| void int_init_altivec(DSPContext* c, AVCodecContext *avctx); | |||
| #endif /* AVCODEC_PPC_DSPUTIL_ALTIVEC_H */ | |||
| @@ -26,23 +26,6 @@ | |||
| #include "dsputil_altivec.h" | |||
| void fdct_altivec(int16_t *block); | |||
| void gmc1_altivec(uint8_t *dst, uint8_t *src, int stride, int h, | |||
| int x16, int y16, int rounder); | |||
| void idct_put_altivec(uint8_t *dest, int line_size, int16_t *block); | |||
| void idct_add_altivec(uint8_t *dest, int line_size, int16_t *block); | |||
| void ff_vp3_idct_altivec(DCTELEM *block); | |||
| void ff_vp3_idct_put_altivec(uint8_t *dest, int line_size, DCTELEM *block); | |||
| void ff_vp3_idct_add_altivec(uint8_t *dest, int line_size, DCTELEM *block); | |||
| void dsputil_h264_init_ppc(DSPContext* c, AVCodecContext *avctx); | |||
| void dsputil_init_altivec(DSPContext* c, AVCodecContext *avctx); | |||
| void vc1dsp_init_altivec(DSPContext* c, AVCodecContext *avctx); | |||
| void float_init_altivec(DSPContext* c, AVCodecContext *avctx); | |||
| void int_init_altivec(DSPContext* c, AVCodecContext *avctx); | |||
| int mm_flags = 0; | |||
| int mm_support(void) | |||
| @@ -133,7 +116,7 @@ distinguish, and use dcbz (32 bytes) or dcbzl (one cache line) as required. | |||
| see <http://developer.apple.com/technotes/tn/tn2087.html> | |||
| and <http://developer.apple.com/technotes/tn/tn2086.html> | |||
| */ | |||
| void clear_blocks_dcbz32_ppc(DCTELEM *blocks) | |||
| static void clear_blocks_dcbz32_ppc(DCTELEM *blocks) | |||
| { | |||
| POWERPC_PERF_DECLARE(powerpc_clear_blocks_dcbz32, 1); | |||
| register int misal = ((unsigned long)blocks & 0x00000010); | |||
| @@ -166,7 +149,7 @@ POWERPC_PERF_STOP_COUNT(powerpc_clear_blocks_dcbz32, 1); | |||
| /* same as above, when dcbzl clear a whole 128B cache line | |||
| i.e. the PPC970 aka G5 */ | |||
| #if HAVE_DCBZL | |||
| void clear_blocks_dcbz128_ppc(DCTELEM *blocks) | |||
| static void clear_blocks_dcbz128_ppc(DCTELEM *blocks) | |||
| { | |||
| POWERPC_PERF_DECLARE(powerpc_clear_blocks_dcbz128, 1); | |||
| register int misal = ((unsigned long)blocks & 0x0000007f); | |||
| @@ -189,7 +172,7 @@ POWERPC_PERF_START_COUNT(powerpc_clear_blocks_dcbz128, 1); | |||
| POWERPC_PERF_STOP_COUNT(powerpc_clear_blocks_dcbz128, 1); | |||
| } | |||
| #else | |||
| void clear_blocks_dcbz128_ppc(DCTELEM *blocks) | |||
| static void clear_blocks_dcbz128_ppc(DCTELEM *blocks) | |||
| { | |||
| memset(blocks, 0, sizeof(DCTELEM)*6*64); | |||
| } | |||
| @@ -201,7 +184,7 @@ void clear_blocks_dcbz128_ppc(DCTELEM *blocks) | |||
| the intended effect (Apple "fixed" dcbz) | |||
| unfortunately this cannot be used unless the assembler | |||
| knows about dcbzl ... */ | |||
| long check_dcbzl_effect(void) | |||
| static long check_dcbzl_effect(void) | |||
| { | |||
| register char *fakedata = av_malloc(1024); | |||
| register char *fakedata_middle; | |||
| @@ -231,7 +214,7 @@ long check_dcbzl_effect(void) | |||
| return count; | |||
| } | |||
| #else | |||
| long check_dcbzl_effect(void) | |||
| static long check_dcbzl_effect(void) | |||
| { | |||
| return 0; | |||
| } | |||
| @@ -25,7 +25,7 @@ | |||
| #include "libavutil/common.h" | |||
| #include "libavcodec/dsputil.h" | |||
| #include "dsputil_ppc.h" | |||
| #include "dsputil_altivec.h" | |||
| #define vs16(v) ((vector signed short)(v)) | |||
| #define vs32(v) ((vector signed int)(v)) | |||
| @@ -23,6 +23,8 @@ | |||
| #include "libavcodec/fft.h" | |||
| #include "dsputil_ppc.h" | |||
| #include "util_altivec.h" | |||
| #include "dsputil_altivec.h" | |||
| /** | |||
| * Do a complex FFT with the parameters defined in ff_fft_init(). The | |||
| * input data must be permuted before with s->revtab table. No | |||
| @@ -34,7 +36,7 @@ | |||
| * that successive MUL + ADD/SUB have been merged into | |||
| * fused multiply-add ('vec_madd' in altivec) | |||
| */ | |||
| void ff_fft_calc_altivec(FFTContext *s, FFTComplex *z) | |||
| static void ff_fft_calc_altivec(FFTContext *s, FFTComplex *z) | |||
| { | |||
| POWERPC_PERF_DECLARE(altivec_fft_num, s->nbits >= 6); | |||
| register const vector float vczero = (const vector float)vec_splat_u32(0.); | |||
| @@ -24,6 +24,7 @@ | |||
| #include "dsputil_ppc.h" | |||
| #include "util_altivec.h" | |||
| #include "types_altivec.h" | |||
| #include "dsputil_altivec.h" | |||
| /* | |||
| altivec-enhanced gmc1. ATM this code assume stride is a multiple of 8, | |||
| @@ -431,7 +431,7 @@ static void ff_h264_idct_add_altivec(uint8_t *dst, DCTELEM *block, int stride) | |||
| vec_st( hv, 0, dest ); \ | |||
| } | |||
| void ff_h264_idct8_add_altivec( uint8_t *dst, DCTELEM *dct, int stride ) { | |||
| static void ff_h264_idct8_add_altivec( uint8_t *dst, DCTELEM *dct, int stride ) { | |||
| vec_s16 s0, s1, s2, s3, s4, s5, s6, s7; | |||
| vec_s16 d0, d1, d2, d3, d4, d5, d6, d7; | |||
| vec_s16 idct0, idct1, idct2, idct3, idct4, idct5, idct6, idct7; | |||
| @@ -75,7 +75,7 @@ | |||
| #define noop(a) a | |||
| #define add28(a) vec_add(v28ss, a) | |||
| void PREFIX_h264_chroma_mc8_altivec(uint8_t * dst, uint8_t * src, | |||
| static void PREFIX_h264_chroma_mc8_altivec(uint8_t * dst, uint8_t * src, | |||
| int stride, int h, int x, int y) { | |||
| POWERPC_PERF_DECLARE(PREFIX_h264_chroma_mc8_num, 1); | |||
| DECLARE_ALIGNED(16, signed int, ABCD)[4] = | |||
| @@ -207,7 +207,7 @@ void PREFIX_h264_chroma_mc8_altivec(uint8_t * dst, uint8_t * src, | |||
| } | |||
| /* this code assume that stride % 16 == 0 */ | |||
| void PREFIX_no_rnd_vc1_chroma_mc8_altivec(uint8_t * dst, uint8_t * src, int stride, int h, int x, int y) { | |||
| static void PREFIX_no_rnd_vc1_chroma_mc8_altivec(uint8_t * dst, uint8_t * src, int stride, int h, int x, int y) { | |||
| DECLARE_ALIGNED(16, signed int, ABCD)[4] = | |||
| {((8 - x) * (8 - y)), | |||
| (( x) * (8 - y)), | |||
| @@ -44,6 +44,7 @@ | |||
| #include "libavcodec/dsputil.h" | |||
| #include "types_altivec.h" | |||
| #include "dsputil_ppc.h" | |||
| #include "dsputil_altivec.h" | |||
| #define IDCT_HALF \ | |||
| /* 1st stage */ \ | |||
| @@ -158,9 +159,10 @@ static const vec_s16 constants[5] = { | |||
| {19266, 26722, 25172, 22654, 19266, 22654, 25172, 26722} | |||
| }; | |||
| void idct_put_altivec(uint8_t* dest, int stride, vec_s16* block) | |||
| void idct_put_altivec(uint8_t* dest, int stride, int16_t *blk) | |||
| { | |||
| POWERPC_PERF_DECLARE(altivec_idct_put_num, 1); | |||
| vec_s16 *block = (vec_s16*)blk; | |||
| vec_u8 tmp; | |||
| #if CONFIG_POWERPC_PERF | |||
| @@ -185,9 +187,10 @@ POWERPC_PERF_START_COUNT(altivec_idct_put_num, 1); | |||
| POWERPC_PERF_STOP_COUNT(altivec_idct_put_num, 1); | |||
| } | |||
| void idct_add_altivec(uint8_t* dest, int stride, vec_s16* block) | |||
| void idct_add_altivec(uint8_t* dest, int stride, int16_t *blk) | |||
| { | |||
| POWERPC_PERF_DECLARE(altivec_idct_add_num, 1); | |||
| vec_s16 *block = (vec_s16*)blk; | |||
| vec_u8 tmp; | |||
| vec_s16 tmp2, tmp3; | |||
| vec_u8 perm0; | |||
| @@ -29,6 +29,7 @@ | |||
| #include "dsputil_ppc.h" | |||
| #include "util_altivec.h" | |||
| #include "types_altivec.h" | |||
| #include "dsputil_altivec.h" | |||
| // Swaps two variables (used for altivec registers) | |||
| #define SWAP(a,b) \ | |||
| @@ -68,7 +69,7 @@ do { \ | |||
| #define FOUROF(a) {a,a,a,a} | |||
| int dct_quantize_altivec(MpegEncContext* s, | |||
| static int dct_quantize_altivec(MpegEncContext* s, | |||
| DCTELEM* data, int n, | |||
| int qscale, int* overflow) | |||
| { | |||
| @@ -475,7 +476,7 @@ int dct_quantize_altivec(MpegEncContext* s, | |||
| /* AltiVec version of dct_unquantize_h263 | |||
| this code assumes `block' is 16 bytes-aligned */ | |||
| void dct_unquantize_h263_altivec(MpegEncContext *s, | |||
| static void dct_unquantize_h263_altivec(MpegEncContext *s, | |||
| DCTELEM *block, int n, int qscale) | |||
| { | |||
| POWERPC_PERF_DECLARE(altivec_dct_unquantize_h263_num, 1); | |||
| @@ -572,9 +573,6 @@ POWERPC_PERF_STOP_COUNT(altivec_dct_unquantize_h263_num, nCoeffs == 63); | |||
| } | |||
| void idct_put_altivec(uint8_t *dest, int line_size, int16_t *block); | |||
| void idct_add_altivec(uint8_t *dest, int line_size, int16_t *block); | |||
| void MPV_common_init_altivec(MpegEncContext *s) | |||
| { | |||
| if ((mm_flags & FF_MM_ALTIVEC) == 0) return; | |||
| @@ -22,6 +22,7 @@ | |||
| #include "libavcodec/dsputil.h" | |||
| #include "util_altivec.h" | |||
| #include "dsputil_altivec.h" | |||
| // main steps of 8x8 transform | |||
| #define STEP8(s0, s1, s2, s3, s4, s5, s6, s7, vec_rnd) \ | |||
| @@ -21,6 +21,7 @@ | |||
| #include "libavcodec/dsputil.h" | |||
| #include "util_altivec.h" | |||
| #include "types_altivec.h" | |||
| #include "dsputil_altivec.h" | |||
| static const vec_s16 constants = | |||
| {0, 64277, 60547, 54491, 46341, 36410, 25080, 12785}; | |||