Instead of filling a local array with the desired value and loading it, load a single element and vec_splat() it to fill the vector. Originally committed as revision 19691 to svn://svn.ffmpeg.org/ffmpeg/trunktags/v0.6
| @@ -23,6 +23,7 @@ | |||||
| #include "libavcodec/dsputil.h" | #include "libavcodec/dsputil.h" | ||||
| #include "dsputil_ppc.h" | #include "dsputil_ppc.h" | ||||
| #include "util_altivec.h" | #include "util_altivec.h" | ||||
| #include "types_altivec.h" | |||||
| /* | /* | ||||
| altivec-enhanced gmc1. ATM this code assume stride is a multiple of 8, | altivec-enhanced gmc1. ATM this code assume stride is a multiple of 8, | ||||
| @@ -32,9 +33,7 @@ | |||||
| void gmc1_altivec(uint8_t *dst /* align 8 */, uint8_t *src /* align1 */, int stride, int h, int x16, int y16, int rounder) | void gmc1_altivec(uint8_t *dst /* align 8 */, uint8_t *src /* align1 */, int stride, int h, int x16, int y16, int rounder) | ||||
| { | { | ||||
| POWERPC_PERF_DECLARE(altivec_gmc1_num, GMC1_PERF_COND); | POWERPC_PERF_DECLARE(altivec_gmc1_num, GMC1_PERF_COND); | ||||
| const DECLARE_ALIGNED_16(unsigned short, rounder_a[8]) = | |||||
| {rounder, rounder, rounder, rounder, | |||||
| rounder, rounder, rounder, rounder}; | |||||
| const DECLARE_ALIGNED_16(unsigned short, rounder_a) = rounder; | |||||
| const DECLARE_ALIGNED_16(unsigned short, ABCD[8]) = | const DECLARE_ALIGNED_16(unsigned short, ABCD[8]) = | ||||
| { | { | ||||
| (16-x16)*(16-y16), /* A */ | (16-x16)*(16-y16), /* A */ | ||||
| @@ -60,7 +59,7 @@ POWERPC_PERF_START_COUNT(altivec_gmc1_num, GMC1_PERF_COND); | |||||
| Cv = vec_splat(tempA, 2); | Cv = vec_splat(tempA, 2); | ||||
| Dv = vec_splat(tempA, 3); | Dv = vec_splat(tempA, 3); | ||||
| rounderV = vec_ld(0, (unsigned short*)rounder_a); | |||||
| rounderV = vec_splat((vec_u16)vec_lde(0, &rounder_a), 0); | |||||
| // we'll be able to pick-up our 9 char elements | // we'll be able to pick-up our 9 char elements | ||||
| // at src from those 32 bytes | // at src from those 32 bytes | ||||
| @@ -28,6 +28,8 @@ | |||||
| #include "dsputil_ppc.h" | #include "dsputil_ppc.h" | ||||
| #include "util_altivec.h" | #include "util_altivec.h" | ||||
| #include "types_altivec.h" | |||||
| // Swaps two variables (used for altivec registers) | // Swaps two variables (used for altivec registers) | ||||
| #define SWAP(a,b) \ | #define SWAP(a,b) \ | ||||
| do { \ | do { \ | ||||
| @@ -504,29 +506,16 @@ POWERPC_PERF_START_COUNT(altivec_dct_unquantize_h263_num, 1); | |||||
| { | { | ||||
| register const vector signed short vczero = (const vector signed short)vec_splat_s16(0); | register const vector signed short vczero = (const vector signed short)vec_splat_s16(0); | ||||
| DECLARE_ALIGNED_16(short, qmul8[]) = | |||||
| { | |||||
| qmul, qmul, qmul, qmul, | |||||
| qmul, qmul, qmul, qmul | |||||
| }; | |||||
| DECLARE_ALIGNED_16(short, qadd8[]) = | |||||
| { | |||||
| qadd, qadd, qadd, qadd, | |||||
| qadd, qadd, qadd, qadd | |||||
| }; | |||||
| DECLARE_ALIGNED_16(short, nqadd8[]) = | |||||
| { | |||||
| -qadd, -qadd, -qadd, -qadd, | |||||
| -qadd, -qadd, -qadd, -qadd | |||||
| }; | |||||
| DECLARE_ALIGNED_16(short, qmul8) = qmul; | |||||
| DECLARE_ALIGNED_16(short, qadd8) = qadd; | |||||
| register vector signed short blockv, qmulv, qaddv, nqaddv, temp1; | register vector signed short blockv, qmulv, qaddv, nqaddv, temp1; | ||||
| register vector bool short blockv_null, blockv_neg; | register vector bool short blockv_null, blockv_neg; | ||||
| register short backup_0 = block[0]; | register short backup_0 = block[0]; | ||||
| register int j = 0; | register int j = 0; | ||||
| qmulv = vec_ld(0, qmul8); | |||||
| qaddv = vec_ld(0, qadd8); | |||||
| nqaddv = vec_ld(0, nqadd8); | |||||
| qmulv = vec_splat((vec_s16)vec_lde(0, &qmul8), 0); | |||||
| qaddv = vec_splat((vec_s16)vec_lde(0, &qadd8), 0); | |||||
| nqaddv = vec_sub(vczero, qaddv); | |||||
| #if 0 // block *is* 16 bytes-aligned, it seems. | #if 0 // block *is* 16 bytes-aligned, it seems. | ||||
| // first make sure block[j] is 16 bytes-aligned | // first make sure block[j] is 16 bytes-aligned | ||||