PMCs, and with Apple's CHUD it's fairly easy too. No reason to keep useless
code around
2) make the PPC perf stuff a configure option
3) make put_pixels16_altivec a bit faster by unrolling the loop by 4
patch by (Romain Dolbeau <dolbeau at irisa dot fr>)
Originally committed as revision 2022 to svn://svn.ffmpeg.org/ffmpeg/trunk
tags/v0.5
| @@ -27,6 +27,7 @@ make="make" | |||
| strip="strip" | |||
| cpu=`uname -m` | |||
| tune="generic" | |||
| powerpc_perf="no" | |||
| mmx="default" | |||
| altivec="default" | |||
| mmi="default" | |||
| @@ -275,6 +276,8 @@ for opt do | |||
| ;; | |||
| --tune=*) tune=`echo $opt | cut -d '=' -f 2` | |||
| ;; | |||
| --powerpc-perf-enable) powerpc_perf="yes" | |||
| ;; | |||
| --disable-mmx) mmx="no" | |||
| ;; | |||
| --disable-altivec) altivec="no" | |||
| @@ -398,7 +401,7 @@ if test $tune != "generic"; then | |||
| if test $altivec = "no"; then | |||
| echo "WARNING: tuning for PPC74xx but altivec disabled !"; | |||
| fi | |||
| TUNECPU=ppc7450 | |||
| TUNECPU=ppc7400 | |||
| ;; | |||
| G5|970|ppc970|PowerPC970|power4*|Power4*) | |||
| CFLAGS="$CFLAGS -mcpu=970 -mtune=970 -mpowerpc64 -force_cpusubtype_ALL " | |||
| @@ -749,6 +752,7 @@ echo " --extra-ldflags=ELDFLAGS add ELDFLAGS to LDFLAGS [$LDFLAGS]" | |||
| echo " --extra-libs=ELIBS add ELIBS [$ELIBS]" | |||
| echo " --cpu=CPU force cpu to CPU [$cpu]" | |||
| echo " --tune=PROCESSOR tune code for a particular CPU (may fails or misperforms on other CPUs)" | |||
| echo " --powerpc-perf-enable enable performance report on PPC (requires enabling PMC)" | |||
| echo " --disable-mmx disable mmx usage" | |||
| echo " --disable-altivec disable AltiVec usage" | |||
| echo " --disable-audio-oss disable OSS audio support [default=no]" | |||
| @@ -847,10 +851,9 @@ elif test "$cpu" = "sparc64" ; then | |||
| elif test "$cpu" = "powerpc" ; then | |||
| echo "TARGET_ARCH_POWERPC=yes" >> config.mak | |||
| echo "#define ARCH_POWERPC 1" >> $TMPH | |||
| echo "// Enable the next line to get PowerPC performance report" >> $TMPH | |||
| echo "// #define POWERPC_TBL_PERFORMANCE_REPORT 1" >> $TMPH | |||
| echo "// Enable the next line to use PMC registers instead of TBL" >> $TMPH | |||
| echo "// #define POWERPC_PERF_USE_PMC 1" >> $TMPH | |||
| if test "$powerpc_perf" = "yes"; then | |||
| echo "#define POWERPC_PERFORMANCE_REPORT 1" >> $TMPH | |||
| fi | |||
| elif test "$cpu" = "mips" ; then | |||
| echo "TARGET_ARCH_MIPS=yes" >> config.mak | |||
| echo "#define ARCH_MIPS 1" >> $TMPH | |||
| @@ -2757,10 +2757,10 @@ int main(int argc, char **argv) | |||
| av_free_static(); | |||
| #ifdef POWERPC_TBL_PERFORMANCE_REPORT | |||
| #ifdef POWERPC_PERFORMANCE_REPORT | |||
| extern void powerpc_display_perf_report(void); | |||
| powerpc_display_perf_report(); | |||
| #endif /* POWERPC_TBL_PERFORMANCE_REPORT */ | |||
| #endif /* POWERPC_PERFORMANCE_REPORT */ | |||
| #ifndef CONFIG_WIN32 | |||
| if (received_sigterm) { | |||
| @@ -655,11 +655,11 @@ void add_bytes_altivec(uint8_t *dst, uint8_t *src, int w) { | |||
| /* next one assumes that ((line_size % 16) == 0) */ | |||
| void put_pixels16_altivec(uint8_t *block, const uint8_t *pixels, int line_size, int h) | |||
| { | |||
| POWERPC_TBL_DECLARE(altivec_put_pixels16_num, 1); | |||
| POWERPC_PERF_DECLARE(altivec_put_pixels16_num, 1); | |||
| #ifdef ALTIVEC_USE_REFERENCE_C_CODE | |||
| int i; | |||
| POWERPC_TBL_START_COUNT(altivec_put_pixels16_num, 1); | |||
| POWERPC_PERF_START_COUNT(altivec_put_pixels16_num, 1); | |||
| for(i=0; i<h; i++) { | |||
| *((uint32_t*)(block )) = (((const struct unaligned_32 *) (pixels))->l); | |||
| @@ -670,15 +670,27 @@ POWERPC_TBL_START_COUNT(altivec_put_pixels16_num, 1); | |||
| block +=line_size; | |||
| } | |||
| POWERPC_TBL_STOP_COUNT(altivec_put_pixels16_num, 1); | |||
| POWERPC_PERF_STOP_COUNT(altivec_put_pixels16_num, 1); | |||
| #else /* ALTIVEC_USE_REFERENCE_C_CODE */ | |||
| register vector unsigned char pixelsv1, pixelsv2; | |||
| register vector unsigned char pixelsv1B, pixelsv2B; | |||
| register vector unsigned char pixelsv1C, pixelsv2C; | |||
| register vector unsigned char pixelsv1D, pixelsv2D; | |||
| register vector unsigned char perm = vec_lvsl(0, pixels); | |||
| int i; | |||
| POWERPC_TBL_START_COUNT(altivec_put_pixels16_num, 1); | |||
| register int line_size_2 = line_size << 1; | |||
| register int line_size_3 = line_size + line_size_2; | |||
| register int line_size_4 = line_size << 2; | |||
| POWERPC_PERF_START_COUNT(altivec_put_pixels16_num, 1); | |||
| // hand-unrolling the loop by 4 gains about 15% | |||
| // mininum execution time goes from 74 to 60 cycles | |||
| // it's faster than -funroll-loops, but using | |||
| // -funroll-loops w/ this is bad - 74 cycles again. | |||
| // all this is on a 7450, tuning for the 7450 | |||
| #if 0 | |||
| for(i=0; i<h; i++) { | |||
| pixelsv1 = vec_ld(0, (unsigned char*)pixels); | |||
| pixelsv2 = vec_ld(16, (unsigned char*)pixels); | |||
| @@ -687,8 +699,29 @@ POWERPC_TBL_START_COUNT(altivec_put_pixels16_num, 1); | |||
| pixels+=line_size; | |||
| block +=line_size; | |||
| } | |||
| POWERPC_TBL_STOP_COUNT(altivec_put_pixels16_num, 1); | |||
| #else | |||
| for(i=0; i<h; i+=4) { | |||
| pixelsv1 = vec_ld(0, (unsigned char*)pixels); | |||
| pixelsv2 = vec_ld(16, (unsigned char*)pixels); | |||
| pixelsv1B = vec_ld(line_size, (unsigned char*)pixels); | |||
| pixelsv2B = vec_ld(16 + line_size, (unsigned char*)pixels); | |||
| pixelsv1C = vec_ld(line_size_2, (unsigned char*)pixels); | |||
| pixelsv2C = vec_ld(16 + line_size_2, (unsigned char*)pixels); | |||
| pixelsv1D = vec_ld(line_size_3, (unsigned char*)pixels); | |||
| pixelsv2D = vec_ld(16 + line_size_3, (unsigned char*)pixels); | |||
| vec_st(vec_perm(pixelsv1, pixelsv2, perm), | |||
| 0, (unsigned char*)block); | |||
| vec_st(vec_perm(pixelsv1B, pixelsv2B, perm), | |||
| line_size, (unsigned char*)block); | |||
| vec_st(vec_perm(pixelsv1C, pixelsv2C, perm), | |||
| line_size_2, (unsigned char*)block); | |||
| vec_st(vec_perm(pixelsv1D, pixelsv2D, perm), | |||
| line_size_3, (unsigned char*)block); | |||
| pixels+=line_size_4; | |||
| block +=line_size_4; | |||
| } | |||
| #endif | |||
| POWERPC_PERF_STOP_COUNT(altivec_put_pixels16_num, 1); | |||
| #endif /* ALTIVEC_USE_REFERENCE_C_CODE */ | |||
| } | |||
| @@ -697,11 +730,11 @@ POWERPC_TBL_STOP_COUNT(altivec_put_pixels16_num, 1); | |||
| #define op_avg(a,b) a = ( ((a)|(b)) - ((((a)^(b))&0xFEFEFEFEUL)>>1) ) | |||
| void avg_pixels16_altivec(uint8_t *block, const uint8_t *pixels, int line_size, int h) | |||
| { | |||
| POWERPC_TBL_DECLARE(altivec_avg_pixels16_num, 1); | |||
| POWERPC_PERF_DECLARE(altivec_avg_pixels16_num, 1); | |||
| #ifdef ALTIVEC_USE_REFERENCE_C_CODE | |||
| int i; | |||
| POWERPC_TBL_START_COUNT(altivec_avg_pixels16_num, 1); | |||
| POWERPC_PERF_START_COUNT(altivec_avg_pixels16_num, 1); | |||
| for(i=0; i<h; i++) { | |||
| op_avg(*((uint32_t*)(block)),(((const struct unaligned_32 *)(pixels))->l)); | |||
| @@ -712,14 +745,14 @@ POWERPC_TBL_START_COUNT(altivec_avg_pixels16_num, 1); | |||
| block +=line_size; | |||
| } | |||
| POWERPC_TBL_STOP_COUNT(altivec_avg_pixels16_num, 1); | |||
| POWERPC_PERF_STOP_COUNT(altivec_avg_pixels16_num, 1); | |||
| #else /* ALTIVEC_USE_REFERENCE_C_CODE */ | |||
| register vector unsigned char pixelsv1, pixelsv2, pixelsv, blockv; | |||
| register vector unsigned char perm = vec_lvsl(0, pixels); | |||
| int i; | |||
| POWERPC_TBL_START_COUNT(altivec_avg_pixels16_num, 1); | |||
| POWERPC_PERF_START_COUNT(altivec_avg_pixels16_num, 1); | |||
| for(i=0; i<h; i++) { | |||
| pixelsv1 = vec_ld(0, (unsigned char*)pixels); | |||
| @@ -732,7 +765,7 @@ POWERPC_TBL_START_COUNT(altivec_avg_pixels16_num, 1); | |||
| block +=line_size; | |||
| } | |||
| POWERPC_TBL_STOP_COUNT(altivec_avg_pixels16_num, 1); | |||
| POWERPC_PERF_STOP_COUNT(altivec_avg_pixels16_num, 1); | |||
| #endif /* ALTIVEC_USE_REFERENCE_C_CODE */ | |||
| } | |||
| @@ -740,10 +773,10 @@ POWERPC_TBL_STOP_COUNT(altivec_avg_pixels16_num, 1); | |||
| /* next one assumes that ((line_size % 8) == 0) */ | |||
| void avg_pixels8_altivec(uint8_t * block, const uint8_t * pixels, int line_size, int h) | |||
| { | |||
| POWERPC_TBL_DECLARE(altivec_avg_pixels8_num, 1); | |||
| POWERPC_PERF_DECLARE(altivec_avg_pixels8_num, 1); | |||
| #ifdef ALTIVEC_USE_REFERENCE_C_CODE | |||
| int i; | |||
| POWERPC_TBL_START_COUNT(altivec_avg_pixels8_num, 1); | |||
| POWERPC_PERF_START_COUNT(altivec_avg_pixels8_num, 1); | |||
| for (i = 0; i < h; i++) { | |||
| *((uint32_t *) (block)) = | |||
| (((*((uint32_t *) (block))) | | |||
| @@ -761,13 +794,13 @@ POWERPC_TBL_START_COUNT(altivec_avg_pixels8_num, 1); | |||
| pixels += line_size; | |||
| block += line_size; | |||
| } | |||
| POWERPC_TBL_STOP_COUNT(altivec_avg_pixels8_num, 1); | |||
| POWERPC_PERF_STOP_COUNT(altivec_avg_pixels8_num, 1); | |||
| #else /* ALTIVEC_USE_REFERENCE_C_CODE */ | |||
| register vector unsigned char pixelsv1, pixelsv2, pixelsv, blockv; | |||
| int i; | |||
| POWERPC_TBL_START_COUNT(altivec_avg_pixels8_num, 1); | |||
| POWERPC_PERF_START_COUNT(altivec_avg_pixels8_num, 1); | |||
| for (i = 0; i < h; i++) { | |||
| /* | |||
| @@ -798,7 +831,7 @@ POWERPC_TBL_START_COUNT(altivec_avg_pixels8_num, 1); | |||
| block += line_size; | |||
| } | |||
| POWERPC_TBL_STOP_COUNT(altivec_avg_pixels8_num, 1); | |||
| POWERPC_PERF_STOP_COUNT(altivec_avg_pixels8_num, 1); | |||
| #endif /* ALTIVEC_USE_REFERENCE_C_CODE */ | |||
| } | |||
| @@ -806,10 +839,10 @@ POWERPC_TBL_STOP_COUNT(altivec_avg_pixels8_num, 1); | |||
| /* next one assumes that ((line_size % 8) == 0) */ | |||
| void put_pixels8_xy2_altivec(uint8_t *block, const uint8_t *pixels, int line_size, int h) | |||
| { | |||
| POWERPC_TBL_DECLARE(altivec_put_pixels8_xy2_num, 1); | |||
| POWERPC_PERF_DECLARE(altivec_put_pixels8_xy2_num, 1); | |||
| #ifdef ALTIVEC_USE_REFERENCE_C_CODE | |||
| int j; | |||
| POWERPC_TBL_START_COUNT(altivec_put_pixels8_xy2_num, 1); | |||
| POWERPC_PERF_START_COUNT(altivec_put_pixels8_xy2_num, 1); | |||
| for (j = 0; j < 2; j++) { | |||
| int i; | |||
| const uint32_t a = (((const struct unaligned_32 *) (pixels))->l); | |||
| @@ -842,7 +875,7 @@ POWERPC_TBL_START_COUNT(altivec_put_pixels8_xy2_num, 1); | |||
| block += 4 - line_size * h; | |||
| } | |||
| POWERPC_TBL_STOP_COUNT(altivec_put_pixels8_xy2_num, 1); | |||
| POWERPC_PERF_STOP_COUNT(altivec_put_pixels8_xy2_num, 1); | |||
| #else /* ALTIVEC_USE_REFERENCE_C_CODE */ | |||
| register int i; | |||
| @@ -873,7 +906,7 @@ POWERPC_TBL_STOP_COUNT(altivec_put_pixels8_xy2_num, 1); | |||
| (vector unsigned short)pixelsv2); | |||
| pixelssum1 = vec_add(pixelssum1, vctwo); | |||
| POWERPC_TBL_START_COUNT(altivec_put_pixels8_xy2_num, 1); | |||
| POWERPC_PERF_START_COUNT(altivec_put_pixels8_xy2_num, 1); | |||
| for (i = 0; i < h ; i++) { | |||
| int rightside = ((unsigned long)block & 0x0000000F); | |||
| blockv = vec_ld(0, block); | |||
| @@ -914,17 +947,17 @@ POWERPC_TBL_START_COUNT(altivec_put_pixels8_xy2_num, 1); | |||
| pixels += line_size; | |||
| } | |||
| POWERPC_TBL_STOP_COUNT(altivec_put_pixels8_xy2_num, 1); | |||
| POWERPC_PERF_STOP_COUNT(altivec_put_pixels8_xy2_num, 1); | |||
| #endif /* ALTIVEC_USE_REFERENCE_C_CODE */ | |||
| } | |||
| /* next one assumes that ((line_size % 8) == 0) */ | |||
| void put_no_rnd_pixels8_xy2_altivec(uint8_t *block, const uint8_t *pixels, int line_size, int h) | |||
| { | |||
| POWERPC_TBL_DECLARE(altivec_put_no_rnd_pixels8_xy2_num, 1); | |||
| POWERPC_PERF_DECLARE(altivec_put_no_rnd_pixels8_xy2_num, 1); | |||
| #ifdef ALTIVEC_USE_REFERENCE_C_CODE | |||
| int j; | |||
| POWERPC_TBL_START_COUNT(altivec_put_no_rnd_pixels8_xy2_num, 1); | |||
| POWERPC_PERF_START_COUNT(altivec_put_no_rnd_pixels8_xy2_num, 1); | |||
| for (j = 0; j < 2; j++) { | |||
| int i; | |||
| const uint32_t a = (((const struct unaligned_32 *) (pixels))->l); | |||
| @@ -957,7 +990,7 @@ POWERPC_TBL_START_COUNT(altivec_put_no_rnd_pixels8_xy2_num, 1); | |||
| block += 4 - line_size * h; | |||
| } | |||
| POWERPC_TBL_STOP_COUNT(altivec_put_no_rnd_pixels8_xy2_num, 1); | |||
| POWERPC_PERF_STOP_COUNT(altivec_put_no_rnd_pixels8_xy2_num, 1); | |||
| #else /* ALTIVEC_USE_REFERENCE_C_CODE */ | |||
| register int i; | |||
| @@ -989,7 +1022,7 @@ POWERPC_TBL_STOP_COUNT(altivec_put_no_rnd_pixels8_xy2_num, 1); | |||
| (vector unsigned short)pixelsv2); | |||
| pixelssum1 = vec_add(pixelssum1, vcone); | |||
| POWERPC_TBL_START_COUNT(altivec_put_no_rnd_pixels8_xy2_num, 1); | |||
| POWERPC_PERF_START_COUNT(altivec_put_no_rnd_pixels8_xy2_num, 1); | |||
| for (i = 0; i < h ; i++) { | |||
| int rightside = ((unsigned long)block & 0x0000000F); | |||
| blockv = vec_ld(0, block); | |||
| @@ -1030,17 +1063,17 @@ POWERPC_TBL_START_COUNT(altivec_put_no_rnd_pixels8_xy2_num, 1); | |||
| pixels += line_size; | |||
| } | |||
| POWERPC_TBL_STOP_COUNT(altivec_put_no_rnd_pixels8_xy2_num, 1); | |||
| POWERPC_PERF_STOP_COUNT(altivec_put_no_rnd_pixels8_xy2_num, 1); | |||
| #endif /* ALTIVEC_USE_REFERENCE_C_CODE */ | |||
| } | |||
| /* next one assumes that ((line_size % 16) == 0) */ | |||
| void put_pixels16_xy2_altivec(uint8_t * block, const uint8_t * pixels, int line_size, int h) | |||
| { | |||
| POWERPC_TBL_DECLARE(altivec_put_pixels16_xy2_num, 1); | |||
| POWERPC_PERF_DECLARE(altivec_put_pixels16_xy2_num, 1); | |||
| #ifdef ALTIVEC_USE_REFERENCE_C_CODE | |||
| int j; | |||
| POWERPC_TBL_START_COUNT(altivec_put_pixels16_xy2_num, 1); | |||
| POWERPC_PERF_START_COUNT(altivec_put_pixels16_xy2_num, 1); | |||
| for (j = 0; j < 4; j++) { | |||
| int i; | |||
| const uint32_t a = (((const struct unaligned_32 *) (pixels))->l); | |||
| @@ -1073,7 +1106,7 @@ POWERPC_TBL_START_COUNT(altivec_put_pixels16_xy2_num, 1); | |||
| block += 4 - line_size * h; | |||
| } | |||
| POWERPC_TBL_STOP_COUNT(altivec_put_pixels16_xy2_num, 1); | |||
| POWERPC_PERF_STOP_COUNT(altivec_put_pixels16_xy2_num, 1); | |||
| #else /* ALTIVEC_USE_REFERENCE_C_CODE */ | |||
| register int i; | |||
| @@ -1087,7 +1120,7 @@ POWERPC_TBL_STOP_COUNT(altivec_put_pixels16_xy2_num, 1); | |||
| register const vector unsigned char vczero = (const vector unsigned char)vec_splat_u8(0); | |||
| register const vector unsigned short vctwo = (const vector unsigned short)vec_splat_u16(2); | |||
| POWERPC_TBL_START_COUNT(altivec_put_pixels16_xy2_num, 1); | |||
| POWERPC_PERF_START_COUNT(altivec_put_pixels16_xy2_num, 1); | |||
| temp1 = vec_ld(0, pixels); | |||
| temp2 = vec_ld(16, pixels); | |||
| @@ -1151,17 +1184,17 @@ POWERPC_TBL_START_COUNT(altivec_put_pixels16_xy2_num, 1); | |||
| pixels += line_size; | |||
| } | |||
| POWERPC_TBL_STOP_COUNT(altivec_put_pixels16_xy2_num, 1); | |||
| POWERPC_PERF_STOP_COUNT(altivec_put_pixels16_xy2_num, 1); | |||
| #endif /* ALTIVEC_USE_REFERENCE_C_CODE */ | |||
| } | |||
| /* next one assumes that ((line_size % 16) == 0) */ | |||
| void put_no_rnd_pixels16_xy2_altivec(uint8_t * block, const uint8_t * pixels, int line_size, int h) | |||
| { | |||
| POWERPC_TBL_DECLARE(altivec_put_no_rnd_pixels16_xy2_num, 1); | |||
| POWERPC_PERF_DECLARE(altivec_put_no_rnd_pixels16_xy2_num, 1); | |||
| #ifdef ALTIVEC_USE_REFERENCE_C_CODE | |||
| int j; | |||
| POWERPC_TBL_START_COUNT(altivec_put_no_rnd_pixels16_xy2_num, 1); | |||
| POWERPC_PERF_START_COUNT(altivec_put_no_rnd_pixels16_xy2_num, 1); | |||
| for (j = 0; j < 4; j++) { | |||
| int i; | |||
| const uint32_t a = (((const struct unaligned_32 *) (pixels))->l); | |||
| @@ -1194,7 +1227,7 @@ POWERPC_TBL_START_COUNT(altivec_put_no_rnd_pixels16_xy2_num, 1); | |||
| block += 4 - line_size * h; | |||
| } | |||
| POWERPC_TBL_STOP_COUNT(altivec_put_no_rnd_pixels16_xy2_num, 1); | |||
| POWERPC_PERF_STOP_COUNT(altivec_put_no_rnd_pixels16_xy2_num, 1); | |||
| #else /* ALTIVEC_USE_REFERENCE_C_CODE */ | |||
| register int i; | |||
| @@ -1209,7 +1242,7 @@ POWERPC_TBL_STOP_COUNT(altivec_put_no_rnd_pixels16_xy2_num, 1); | |||
| register const vector unsigned short vcone = (const vector unsigned short)vec_splat_u16(1); | |||
| register const vector unsigned short vctwo = (const vector unsigned short)vec_splat_u16(2); | |||
| POWERPC_TBL_START_COUNT(altivec_put_no_rnd_pixels16_xy2_num, 1); | |||
| POWERPC_PERF_START_COUNT(altivec_put_no_rnd_pixels16_xy2_num, 1); | |||
| temp1 = vec_ld(0, pixels); | |||
| temp2 = vec_ld(16, pixels); | |||
| @@ -1273,7 +1306,7 @@ POWERPC_TBL_START_COUNT(altivec_put_no_rnd_pixels16_xy2_num, 1); | |||
| pixels += line_size; | |||
| } | |||
| POWERPC_TBL_STOP_COUNT(altivec_put_no_rnd_pixels16_xy2_num, 1); | |||
| POWERPC_PERF_STOP_COUNT(altivec_put_no_rnd_pixels16_xy2_num, 1); | |||
| #endif /* ALTIVEC_USE_REFERENCE_C_CODE */ | |||
| } | |||
| @@ -41,8 +41,8 @@ int mm_support(void) | |||
| return result; | |||
| } | |||
| #ifdef POWERPC_TBL_PERFORMANCE_REPORT | |||
| unsigned long long perfdata[powerpc_perf_total][powerpc_data_total]; | |||
| #ifdef POWERPC_PERFORMANCE_REPORT | |||
| unsigned long long perfdata[POWERPC_NUM_PMC_ENABLED][powerpc_perf_total][powerpc_data_total]; | |||
| /* list below must match enum in dsputil_ppc.h */ | |||
| static unsigned char* perfname[] = { | |||
| "fft_calc_altivec", | |||
| @@ -60,53 +60,32 @@ static unsigned char* perfname[] = { | |||
| "clear_blocks_dcbz32_ppc", | |||
| "clear_blocks_dcbz128_ppc" | |||
| }; | |||
| #ifdef POWERPC_PERF_USE_PMC | |||
| unsigned long long perfdata_pmc2[powerpc_perf_total][powerpc_data_total]; | |||
| unsigned long long perfdata_pmc3[powerpc_perf_total][powerpc_data_total]; | |||
| #endif | |||
| #include <stdio.h> | |||
| #endif | |||
| #ifdef POWERPC_TBL_PERFORMANCE_REPORT | |||
| #ifdef POWERPC_PERFORMANCE_REPORT | |||
| void powerpc_display_perf_report(void) | |||
| { | |||
| int i; | |||
| #ifndef POWERPC_PERF_USE_PMC | |||
| fprintf(stderr, "PowerPC performance report\n Values are from the Time Base register, and represent 4 bus cycles.\n"); | |||
| #else /* POWERPC_PERF_USE_PMC */ | |||
| int i, j; | |||
| fprintf(stderr, "PowerPC performance report\n Values are from the PMC registers, and represent whatever the registers are set to record.\n"); | |||
| #endif /* POWERPC_PERF_USE_PMC */ | |||
| for(i = 0 ; i < powerpc_perf_total ; i++) | |||
| { | |||
| if (perfdata[i][powerpc_data_num] != (unsigned long long)0) | |||
| fprintf(stderr, " Function \"%s\" (pmc1):\n\tmin: %llu\n\tmax: %llu\n\tavg: %1.2lf (%llu)\n", | |||
| perfname[i], | |||
| perfdata[i][powerpc_data_min], | |||
| perfdata[i][powerpc_data_max], | |||
| (double)perfdata[i][powerpc_data_sum] / | |||
| (double)perfdata[i][powerpc_data_num], | |||
| perfdata[i][powerpc_data_num]); | |||
| #ifdef POWERPC_PERF_USE_PMC | |||
| if (perfdata_pmc2[i][powerpc_data_num] != (unsigned long long)0) | |||
| fprintf(stderr, " Function \"%s\" (pmc2):\n\tmin: %llu\n\tmax: %llu\n\tavg: %1.2lf (%llu)\n", | |||
| perfname[i], | |||
| perfdata_pmc2[i][powerpc_data_min], | |||
| perfdata_pmc2[i][powerpc_data_max], | |||
| (double)perfdata_pmc2[i][powerpc_data_sum] / | |||
| (double)perfdata_pmc2[i][powerpc_data_num], | |||
| perfdata_pmc2[i][powerpc_data_num]); | |||
| if (perfdata_pmc3[i][powerpc_data_num] != (unsigned long long)0) | |||
| fprintf(stderr, " Function \"%s\" (pmc3):\n\tmin: %llu\n\tmax: %llu\n\tavg: %1.2lf (%llu)\n", | |||
| perfname[i], | |||
| perfdata_pmc3[i][powerpc_data_min], | |||
| perfdata_pmc3[i][powerpc_data_max], | |||
| (double)perfdata_pmc3[i][powerpc_data_sum] / | |||
| (double)perfdata_pmc3[i][powerpc_data_num], | |||
| perfdata_pmc3[i][powerpc_data_num]); | |||
| #endif | |||
| for (j = 0; j < POWERPC_NUM_PMC_ENABLED ; j++) | |||
| { | |||
| if (perfdata[j][i][powerpc_data_num] != (unsigned long long)0) | |||
| fprintf(stderr, | |||
| " Function \"%s\" (pmc%d):\n\tmin: %llu\n\tmax: %llu\n\tavg: %1.2lf (%llu)\n", | |||
| perfname[i], | |||
| j+1, | |||
| perfdata[j][i][powerpc_data_min], | |||
| perfdata[j][i][powerpc_data_max], | |||
| (double)perfdata[j][i][powerpc_data_sum] / | |||
| (double)perfdata[j][i][powerpc_data_num], | |||
| perfdata[j][i][powerpc_data_num]); | |||
| } | |||
| } | |||
| } | |||
| #endif /* POWERPC_TBL_PERFORMANCE_REPORT */ | |||
| #endif /* POWERPC_PERFORMANCE_REPORT */ | |||
| /* ***** WARNING ***** WARNING ***** WARNING ***** */ | |||
| /* | |||
| @@ -135,10 +114,10 @@ void powerpc_display_perf_report(void) | |||
| */ | |||
| void clear_blocks_dcbz32_ppc(DCTELEM *blocks) | |||
| { | |||
| POWERPC_TBL_DECLARE(powerpc_clear_blocks_dcbz32, 1); | |||
| POWERPC_PERF_DECLARE(powerpc_clear_blocks_dcbz32, 1); | |||
| register int misal = ((unsigned long)blocks & 0x00000010); | |||
| register int i = 0; | |||
| POWERPC_TBL_START_COUNT(powerpc_clear_blocks_dcbz32, 1); | |||
| POWERPC_PERF_START_COUNT(powerpc_clear_blocks_dcbz32, 1); | |||
| #if 1 | |||
| if (misal) { | |||
| ((unsigned long*)blocks)[0] = 0L; | |||
| @@ -160,7 +139,7 @@ POWERPC_TBL_START_COUNT(powerpc_clear_blocks_dcbz32, 1); | |||
| #else | |||
| memset(blocks, 0, sizeof(DCTELEM)*6*64); | |||
| #endif | |||
| POWERPC_TBL_STOP_COUNT(powerpc_clear_blocks_dcbz32, 1); | |||
| POWERPC_PERF_STOP_COUNT(powerpc_clear_blocks_dcbz32, 1); | |||
| } | |||
| /* same as above, when dcbzl clear a whole 128B cache line | |||
| @@ -168,10 +147,10 @@ POWERPC_TBL_STOP_COUNT(powerpc_clear_blocks_dcbz32, 1); | |||
| #ifndef NO_DCBZL | |||
| void clear_blocks_dcbz128_ppc(DCTELEM *blocks) | |||
| { | |||
| POWERPC_TBL_DECLARE(powerpc_clear_blocks_dcbz128, 1); | |||
| POWERPC_PERF_DECLARE(powerpc_clear_blocks_dcbz128, 1); | |||
| register int misal = ((unsigned long)blocks & 0x0000007f); | |||
| register int i = 0; | |||
| POWERPC_TBL_START_COUNT(powerpc_clear_blocks_dcbz128, 1); | |||
| POWERPC_PERF_START_COUNT(powerpc_clear_blocks_dcbz128, 1); | |||
| #if 1 | |||
| if (misal) { | |||
| // we could probably also optimize this case, | |||
| @@ -186,7 +165,7 @@ POWERPC_TBL_START_COUNT(powerpc_clear_blocks_dcbz128, 1); | |||
| #else | |||
| memset(blocks, 0, sizeof(DCTELEM)*6*64); | |||
| #endif | |||
| POWERPC_TBL_STOP_COUNT(powerpc_clear_blocks_dcbz128, 1); | |||
| POWERPC_PERF_STOP_COUNT(powerpc_clear_blocks_dcbz128, 1); | |||
| } | |||
| #else | |||
| void clear_blocks_dcbz128_ppc(DCTELEM *blocks) | |||
| @@ -277,6 +256,8 @@ void dsputil_init_ppc(DSPContext* c, AVCodecContext *avctx) | |||
| c->add_bytes= add_bytes_altivec; | |||
| #endif /* 0 */ | |||
| c->put_pixels_tab[0][0] = put_pixels16_altivec; | |||
| /* the tow functions do the same thing, so use the same code */ | |||
| c->put_no_rnd_pixels_tab[0][0] = put_pixels16_altivec; | |||
| c->avg_pixels_tab[0][0] = avg_pixels16_altivec; | |||
| // next one disabled as it's untested. | |||
| #if 0 | |||
| @@ -301,28 +282,21 @@ void dsputil_init_ppc(DSPContext* c, AVCodecContext *avctx) | |||
| #endif /* ALTIVEC_USE_REFERENCE_C_CODE */ | |||
| } | |||
| #ifdef POWERPC_TBL_PERFORMANCE_REPORT | |||
| #ifdef POWERPC_PERFORMANCE_REPORT | |||
| { | |||
| int i; | |||
| int i, j; | |||
| for (i = 0 ; i < powerpc_perf_total ; i++) | |||
| { | |||
| perfdata[i][powerpc_data_min] = 0xFFFFFFFFFFFFFFFF; | |||
| perfdata[i][powerpc_data_max] = 0x0000000000000000; | |||
| perfdata[i][powerpc_data_sum] = 0x0000000000000000; | |||
| perfdata[i][powerpc_data_num] = 0x0000000000000000; | |||
| #ifdef POWERPC_PERF_USE_PMC | |||
| perfdata_pmc2[i][powerpc_data_min] = 0xFFFFFFFFFFFFFFFF; | |||
| perfdata_pmc2[i][powerpc_data_max] = 0x0000000000000000; | |||
| perfdata_pmc2[i][powerpc_data_sum] = 0x0000000000000000; | |||
| perfdata_pmc2[i][powerpc_data_num] = 0x0000000000000000; | |||
| perfdata_pmc3[i][powerpc_data_min] = 0xFFFFFFFFFFFFFFFF; | |||
| perfdata_pmc3[i][powerpc_data_max] = 0x0000000000000000; | |||
| perfdata_pmc3[i][powerpc_data_sum] = 0x0000000000000000; | |||
| perfdata_pmc3[i][powerpc_data_num] = 0x0000000000000000; | |||
| #endif /* POWERPC_PERF_USE_PMC */ | |||
| } | |||
| for (j = 0; j < POWERPC_NUM_PMC_ENABLED ; j++) | |||
| { | |||
| perfdata[j][i][powerpc_data_min] = (unsigned long long)0xFFFFFFFFFFFFFFFF; | |||
| perfdata[j][i][powerpc_data_max] = (unsigned long long)0x0000000000000000; | |||
| perfdata[j][i][powerpc_data_sum] = (unsigned long long)0x0000000000000000; | |||
| perfdata[j][i][powerpc_data_num] = (unsigned long long)0x0000000000000000; | |||
| } | |||
| } | |||
| } | |||
| #endif /* POWERPC_TBL_PERFORMANCE_REPORT */ | |||
| #endif /* POWERPC_PERFORMANCE_REPORT */ | |||
| } else | |||
| #endif /* HAVE_ALTIVEC */ | |||
| { | |||
| @@ -30,8 +30,10 @@ | |||
| #define NO_DCBZL | |||
| #endif /* CONFIG_DARWIN */ | |||
| #ifdef POWERPC_TBL_PERFORMANCE_REPORT | |||
| #ifdef POWERPC_PERFORMANCE_REPORT | |||
| void powerpc_display_perf_report(void); | |||
| /* the 604* have 2, the G3* have 4, the G4s have 6 */ | |||
| #define POWERPC_NUM_PMC_ENABLED 4 | |||
| /* if you add to the enum below, also add to the perfname array | |||
| in dsputil_ppc.c */ | |||
| enum powerpc_perf_index { | |||
| @@ -58,98 +60,65 @@ enum powerpc_data_index { | |||
| powerpc_data_num, | |||
| powerpc_data_total | |||
| }; | |||
| extern unsigned long long perfdata[powerpc_perf_total][powerpc_data_total]; | |||
| #ifdef POWERPC_PERF_USE_PMC | |||
| extern unsigned long long perfdata_pmc2[powerpc_perf_total][powerpc_data_total]; | |||
| extern unsigned long long perfdata_pmc3[powerpc_perf_total][powerpc_data_total]; | |||
| #endif | |||
| #ifndef POWERPC_PERF_USE_PMC | |||
| #define POWERPC_GET_CYCLES(a) asm volatile("mftb %0" : "=r" (a)) | |||
| #define POWERPC_TBL_DECLARE(a, cond) register unsigned long tbl_start, tbl_stop | |||
| #define POWERPC_TBL_START_COUNT(a, cond) do { POWERPC_GET_CYCLES(tbl_start); } while (0) | |||
| #define POWERPC_TBL_STOP_COUNT(a, cond) do { \ | |||
| POWERPC_GET_CYCLES(tbl_stop); \ | |||
| if (tbl_stop > tbl_start) \ | |||
| { \ | |||
| unsigned long diff = tbl_stop - tbl_start; \ | |||
| if (cond) \ | |||
| { \ | |||
| if (diff < perfdata[a][powerpc_data_min]) \ | |||
| perfdata[a][powerpc_data_min] = diff; \ | |||
| if (diff > perfdata[a][powerpc_data_max]) \ | |||
| perfdata[a][powerpc_data_max] = diff; \ | |||
| perfdata[a][powerpc_data_sum] += diff; \ | |||
| perfdata[a][powerpc_data_num] ++; \ | |||
| } \ | |||
| } \ | |||
| } while (0) | |||
| extern unsigned long long perfdata[POWERPC_NUM_PMC_ENABLED][powerpc_perf_total][powerpc_data_total]; | |||
| #else /* POWERPC_PERF_USE_PMC */ | |||
| #define POWERPC_GET_CYCLES(a) asm volatile("mfspr %0, 937" : "=r" (a)) | |||
| #define POWERPC_GET_PMC1(a) asm volatile("mfspr %0, 937" : "=r" (a)) | |||
| #define POWERPC_GET_PMC2(a) asm volatile("mfspr %0, 938" : "=r" (a)) | |||
| #if (POWERPC_NUM_PMC_ENABLED > 2) | |||
| #define POWERPC_GET_PMC3(a) asm volatile("mfspr %0, 941" : "=r" (a)) | |||
| #define POWERPC_TBL_DECLARE(a, cond) register unsigned long cycles_start, cycles_stop, pmc2_start, pmc2_stop, pmc3_start, pmc3_stop | |||
| #define POWERPC_TBL_START_COUNT(a, cond) do { \ | |||
| POWERPC_GET_PMC3(pmc3_start); \ | |||
| POWERPC_GET_PMC2(pmc2_start); \ | |||
| POWERPC_GET_CYCLES(cycles_start); } while (0) | |||
| #define POWERPC_TBL_STOP_COUNT(a, cond) do { \ | |||
| POWERPC_GET_CYCLES(cycles_stop); \ | |||
| POWERPC_GET_PMC2(pmc2_stop); \ | |||
| POWERPC_GET_PMC3(pmc3_stop); \ | |||
| if (cycles_stop >= cycles_start) \ | |||
| { \ | |||
| unsigned long diff = \ | |||
| cycles_stop - cycles_start; \ | |||
| if (cond) \ | |||
| { \ | |||
| if (diff < perfdata[a][powerpc_data_min]) \ | |||
| perfdata[a][powerpc_data_min] = diff; \ | |||
| if (diff > perfdata[a][powerpc_data_max]) \ | |||
| perfdata[a][powerpc_data_max] = diff; \ | |||
| perfdata[a][powerpc_data_sum] += diff; \ | |||
| perfdata[a][powerpc_data_num] ++; \ | |||
| } \ | |||
| } \ | |||
| if (pmc2_stop >= pmc2_start) \ | |||
| { \ | |||
| unsigned long diff = \ | |||
| pmc2_stop - pmc2_start; \ | |||
| if (cond) \ | |||
| { \ | |||
| if (diff < perfdata_pmc2[a][powerpc_data_min]) \ | |||
| perfdata_pmc2[a][powerpc_data_min] = diff; \ | |||
| if (diff > perfdata_pmc2[a][powerpc_data_max]) \ | |||
| perfdata_pmc2[a][powerpc_data_max] = diff; \ | |||
| perfdata_pmc2[a][powerpc_data_sum] += diff; \ | |||
| perfdata_pmc2[a][powerpc_data_num] ++; \ | |||
| } \ | |||
| } \ | |||
| if (pmc3_stop >= pmc3_start) \ | |||
| { \ | |||
| unsigned long diff = \ | |||
| pmc3_stop - pmc3_start; \ | |||
| if (cond) \ | |||
| { \ | |||
| if (diff < perfdata_pmc3[a][powerpc_data_min]) \ | |||
| perfdata_pmc3[a][powerpc_data_min] = diff; \ | |||
| if (diff > perfdata_pmc3[a][powerpc_data_max]) \ | |||
| perfdata_pmc3[a][powerpc_data_max] = diff; \ | |||
| perfdata_pmc3[a][powerpc_data_sum] += diff; \ | |||
| perfdata_pmc3[a][powerpc_data_num] ++; \ | |||
| } \ | |||
| } \ | |||
| #define POWERPC_GET_PMC4(a) asm volatile("mfspr %0, 942" : "=r" (a)) | |||
| #else | |||
| #define POWERPC_GET_PMC3(a) do {} while (0) | |||
| #define POWERPC_GET_PMC4(a) do {} while (0) | |||
| #endif | |||
| #if (POWERPC_NUM_PMC_ENABLED > 4) | |||
| #define POWERPC_GET_PMC5(a) asm volatile("mfspr %0, 929" : "=r" (a)) | |||
| #define POWERPC_GET_PMC6(a) asm volatile("mfspr %0, 930" : "=r" (a)) | |||
| #else | |||
| #define POWERPC_GET_PMC5(a) do {} while (0) | |||
| #define POWERPC_GET_PMC6(a) do {} while (0) | |||
| #endif | |||
| #define POWERPC_PERF_DECLARE(a, cond) unsigned long pmc_start[POWERPC_NUM_PMC_ENABLED], pmc_stop[POWERPC_NUM_PMC_ENABLED], pmc_loop_index; | |||
| #define POWERPC_PERF_START_COUNT(a, cond) do { \ | |||
| POWERPC_GET_PMC6(pmc_start[5]); \ | |||
| POWERPC_GET_PMC5(pmc_start[4]); \ | |||
| POWERPC_GET_PMC4(pmc_start[3]); \ | |||
| POWERPC_GET_PMC3(pmc_start[2]); \ | |||
| POWERPC_GET_PMC2(pmc_start[1]); \ | |||
| POWERPC_GET_PMC1(pmc_start[0]); \ | |||
| } while (0) | |||
| #define POWERPC_PERF_STOP_COUNT(a, cond) do { \ | |||
| POWERPC_GET_PMC1(pmc_stop[0]); \ | |||
| POWERPC_GET_PMC2(pmc_stop[1]); \ | |||
| POWERPC_GET_PMC3(pmc_stop[2]); \ | |||
| POWERPC_GET_PMC4(pmc_stop[3]); \ | |||
| POWERPC_GET_PMC5(pmc_stop[4]); \ | |||
| POWERPC_GET_PMC6(pmc_stop[5]); \ | |||
| if (cond) \ | |||
| { \ | |||
| for(pmc_loop_index = 0; \ | |||
| pmc_loop_index < POWERPC_NUM_PMC_ENABLED; \ | |||
| pmc_loop_index++) \ | |||
| { \ | |||
| if (pmc_stop[pmc_loop_index] >= pmc_start[pmc_loop_index]) \ | |||
| { \ | |||
| unsigned long diff = \ | |||
| pmc_stop[pmc_loop_index] - pmc_start[pmc_loop_index]; \ | |||
| if (diff < perfdata[pmc_loop_index][a][powerpc_data_min]) \ | |||
| perfdata[pmc_loop_index][a][powerpc_data_min] = diff; \ | |||
| if (diff > perfdata[pmc_loop_index][a][powerpc_data_max]) \ | |||
| perfdata[pmc_loop_index][a][powerpc_data_max] = diff; \ | |||
| perfdata[pmc_loop_index][a][powerpc_data_sum] += diff; \ | |||
| perfdata[pmc_loop_index][a][powerpc_data_num] ++; \ | |||
| } \ | |||
| } \ | |||
| } \ | |||
| } while (0) | |||
| #endif /* POWERPC_PERF_USE_PMC */ | |||
| #else /* POWERPC_TBL_PERFORMANCE_REPORT */ | |||
| #else /* POWERPC_PERFORMANCE_REPORT */ | |||
| // those are needed to avoid empty statements. | |||
| #define POWERPC_TBL_DECLARE(a, cond) int altivec_placeholder __attribute__ ((unused)) | |||
| #define POWERPC_TBL_START_COUNT(a, cond) do {} while (0) | |||
| #define POWERPC_TBL_STOP_COUNT(a, cond) do {} while (0) | |||
| #endif /* POWERPC_TBL_PERFORMANCE_REPORT */ | |||
| #define POWERPC_PERF_DECLARE(a, cond) int altivec_placeholder __attribute__ ((unused)) | |||
| #define POWERPC_PERF_START_COUNT(a, cond) do {} while (0) | |||
| #define POWERPC_PERF_STOP_COUNT(a, cond) do {} while (0) | |||
| #endif /* POWERPC_PERFORMANCE_REPORT */ | |||
| #endif /* _DSPUTIL_PPC_ */ | |||
| @@ -62,7 +62,7 @@ | |||
| */ | |||
| void fft_calc_altivec(FFTContext *s, FFTComplex *z) | |||
| { | |||
| POWERPC_TBL_DECLARE(altivec_fft_num, s->nbits >= 6); | |||
| POWERPC_PERF_DECLARE(altivec_fft_num, s->nbits >= 6); | |||
| #ifdef ALTIVEC_USE_REFERENCE_C_CODE | |||
| int ln = s->nbits; | |||
| int j, np, np2; | |||
| @@ -72,7 +72,7 @@ POWERPC_TBL_DECLARE(altivec_fft_num, s->nbits >= 6); | |||
| int l; | |||
| FFTSample tmp_re, tmp_im; | |||
| POWERPC_TBL_START_COUNT(altivec_fft_num, s->nbits >= 6); | |||
| POWERPC_PERF_START_COUNT(altivec_fft_num, s->nbits >= 6); | |||
| np = 1 << ln; | |||
| @@ -137,7 +137,7 @@ POWERPC_TBL_START_COUNT(altivec_fft_num, s->nbits >= 6); | |||
| nloops = nloops << 1; | |||
| } while (nblocks != 0); | |||
| POWERPC_TBL_STOP_COUNT(altivec_fft_num, s->nbits >= 6); | |||
| POWERPC_PERF_STOP_COUNT(altivec_fft_num, s->nbits >= 6); | |||
| #else /* ALTIVEC_USE_REFERENCE_C_CODE */ | |||
| #ifdef CONFIG_DARWIN | |||
| @@ -153,7 +153,7 @@ POWERPC_TBL_STOP_COUNT(altivec_fft_num, s->nbits >= 6); | |||
| FFTComplex *cptr, *cptr1; | |||
| int k; | |||
| POWERPC_TBL_START_COUNT(altivec_fft_num, s->nbits >= 6); | |||
| POWERPC_PERF_START_COUNT(altivec_fft_num, s->nbits >= 6); | |||
| np = 1 << ln; | |||
| @@ -241,7 +241,7 @@ POWERPC_TBL_START_COUNT(altivec_fft_num, s->nbits >= 6); | |||
| nloops = nloops << 1; | |||
| } while (nblocks != 0); | |||
| POWERPC_TBL_STOP_COUNT(altivec_fft_num, s->nbits >= 6); | |||
| POWERPC_PERF_STOP_COUNT(altivec_fft_num, s->nbits >= 6); | |||
| #endif /* ALTIVEC_USE_REFERENCE_C_CODE */ | |||
| } | |||
| @@ -31,7 +31,7 @@ | |||
| #define GMC1_PERF_COND (h==8) | |||
| void gmc1_altivec(uint8_t *dst /* align 8 */, uint8_t *src /* align1 */, int stride, int h, int x16, int y16, int rounder) | |||
| { | |||
| POWERPC_TBL_DECLARE(altivec_gmc1_num, GMC1_PERF_COND); | |||
| POWERPC_PERF_DECLARE(altivec_gmc1_num, GMC1_PERF_COND); | |||
| #ifdef ALTIVEC_USE_REFERENCE_C_CODE | |||
| const int A=(16-x16)*(16-y16); | |||
| const int B=( x16)*(16-y16); | |||
| @@ -39,7 +39,7 @@ POWERPC_TBL_DECLARE(altivec_gmc1_num, GMC1_PERF_COND); | |||
| const int D=( x16)*( y16); | |||
| int i; | |||
| POWERPC_TBL_START_COUNT(altivec_gmc1_num, GMC1_PERF_COND); | |||
| POWERPC_PERF_START_COUNT(altivec_gmc1_num, GMC1_PERF_COND); | |||
| for(i=0; i<h; i++) | |||
| { | |||
| @@ -55,7 +55,7 @@ POWERPC_TBL_START_COUNT(altivec_gmc1_num, GMC1_PERF_COND); | |||
| src+= stride; | |||
| } | |||
| POWERPC_TBL_STOP_COUNT(altivec_gmc1_num, GMC1_PERF_COND); | |||
| POWERPC_PERF_STOP_COUNT(altivec_gmc1_num, GMC1_PERF_COND); | |||
| #else /* ALTIVEC_USE_REFERENCE_C_CODE */ | |||
| const unsigned short __attribute__ ((aligned(16))) rounder_a[8] = | |||
| @@ -78,7 +78,7 @@ POWERPC_TBL_STOP_COUNT(altivec_gmc1_num, GMC1_PERF_COND); | |||
| unsigned long src_really_odd = (unsigned long)src & 0x0000000F; | |||
| POWERPC_TBL_START_COUNT(altivec_gmc1_num, GMC1_PERF_COND); | |||
| POWERPC_PERF_START_COUNT(altivec_gmc1_num, GMC1_PERF_COND); | |||
| tempA = vec_ld(0, (unsigned short*)ABCD); | |||
| Av = vec_splat(tempA, 0); | |||
| @@ -166,7 +166,7 @@ POWERPC_TBL_START_COUNT(altivec_gmc1_num, GMC1_PERF_COND); | |||
| src += stride; | |||
| } | |||
| POWERPC_TBL_STOP_COUNT(altivec_gmc1_num, GMC1_PERF_COND); | |||
| POWERPC_PERF_STOP_COUNT(altivec_gmc1_num, GMC1_PERF_COND); | |||
| #endif /* ALTIVEC_USE_REFERENCE_C_CODE */ | |||
| } | |||
| @@ -165,16 +165,16 @@ static const vector_s16_t constants[5] = { | |||
| void idct_put_altivec(uint8_t* dest, int stride, vector_s16_t* block) | |||
| { | |||
| POWERPC_TBL_DECLARE(altivec_idct_put_num, 1); | |||
| POWERPC_PERF_DECLARE(altivec_idct_put_num, 1); | |||
| #ifdef ALTIVEC_USE_REFERENCE_C_CODE | |||
| POWERPC_TBL_START_COUNT(altivec_idct_put_num, 1); | |||
| POWERPC_PERF_START_COUNT(altivec_idct_put_num, 1); | |||
| void simple_idct_put(uint8_t *dest, int line_size, int16_t *block); | |||
| simple_idct_put(dest, stride, (int16_t*)block); | |||
| POWERPC_TBL_STOP_COUNT(altivec_idct_put_num, 1); | |||
| POWERPC_PERF_STOP_COUNT(altivec_idct_put_num, 1); | |||
| #else /* ALTIVEC_USE_REFERENCE_C_CODE */ | |||
| vector_u8_t tmp; | |||
| POWERPC_TBL_START_COUNT(altivec_idct_put_num, 1); | |||
| POWERPC_PERF_START_COUNT(altivec_idct_put_num, 1); | |||
| IDCT | |||
| @@ -192,18 +192,18 @@ POWERPC_TBL_START_COUNT(altivec_idct_put_num, 1); | |||
| COPY (dest, vx6) dest += stride; | |||
| COPY (dest, vx7) | |||
| POWERPC_TBL_STOP_COUNT(altivec_idct_put_num, 1); | |||
| POWERPC_PERF_STOP_COUNT(altivec_idct_put_num, 1); | |||
| #endif /* ALTIVEC_USE_REFERENCE_C_CODE */ | |||
| } | |||
| void idct_add_altivec(uint8_t* dest, int stride, vector_s16_t* block) | |||
| { | |||
| POWERPC_TBL_DECLARE(altivec_idct_add_num, 1); | |||
| POWERPC_PERF_DECLARE(altivec_idct_add_num, 1); | |||
| #ifdef ALTIVEC_USE_REFERENCE_C_CODE | |||
| POWERPC_TBL_START_COUNT(altivec_idct_add_num, 1); | |||
| POWERPC_PERF_START_COUNT(altivec_idct_add_num, 1); | |||
| void simple_idct_add(uint8_t *dest, int line_size, int16_t *block); | |||
| simple_idct_add(dest, stride, (int16_t*)block); | |||
| POWERPC_TBL_STOP_COUNT(altivec_idct_add_num, 1); | |||
| POWERPC_PERF_STOP_COUNT(altivec_idct_add_num, 1); | |||
| #else /* ALTIVEC_USE_REFERENCE_C_CODE */ | |||
| vector_u8_t tmp; | |||
| vector_s16_t tmp2, tmp3; | |||
| @@ -211,7 +211,7 @@ POWERPC_TBL_STOP_COUNT(altivec_idct_add_num, 1); | |||
| vector_u8_t perm1; | |||
| vector_u8_t p0, p1, p; | |||
| POWERPC_TBL_START_COUNT(altivec_idct_add_num, 1); | |||
| POWERPC_PERF_START_COUNT(altivec_idct_add_num, 1); | |||
| IDCT | |||
| @@ -239,7 +239,7 @@ POWERPC_TBL_START_COUNT(altivec_idct_add_num, 1); | |||
| ADD (dest, vx6, perm0) dest += stride; | |||
| ADD (dest, vx7, perm1) | |||
| POWERPC_TBL_STOP_COUNT(altivec_idct_add_num, 1); | |||
| POWERPC_PERF_STOP_COUNT(altivec_idct_add_num, 1); | |||
| #endif /* ALTIVEC_USE_REFERENCE_C_CODE */ | |||
| } | |||
| @@ -522,13 +522,13 @@ int dct_quantize_altivec(MpegEncContext* s, | |||
| void dct_unquantize_h263_altivec(MpegEncContext *s, | |||
| DCTELEM *block, int n, int qscale) | |||
| { | |||
| POWERPC_TBL_DECLARE(altivec_dct_unquantize_h263_num, 1); | |||
| POWERPC_PERF_DECLARE(altivec_dct_unquantize_h263_num, 1); | |||
| int i, level, qmul, qadd; | |||
| int nCoeffs; | |||
| assert(s->block_last_index[n]>=0); | |||
| POWERPC_TBL_START_COUNT(altivec_dct_unquantize_h263_num, 1); | |||
| POWERPC_PERF_START_COUNT(altivec_dct_unquantize_h263_num, 1); | |||
| qadd = (qscale - 1) | 1; | |||
| qmul = qscale << 1; | |||
| @@ -641,5 +641,5 @@ POWERPC_TBL_START_COUNT(altivec_dct_unquantize_h263_num, 1); | |||
| } | |||
| #endif /* ALTIVEC_USE_REFERENCE_C_CODE */ | |||
| POWERPC_TBL_STOP_COUNT(altivec_dct_unquantize_h263_num, nCoeffs == 63); | |||
| POWERPC_PERF_STOP_COUNT(altivec_dct_unquantize_h263_num, nCoeffs == 63); | |||
| } | |||