| @@ -23,7 +23,8 @@ | |||
| #include "libavutil/ppc/util_altivec.h" | |||
| #include "dsputil_altivec.h" | |||
| static void int32_to_float_fmul_scalar_altivec(float *dst, const int *src, float mul, int len) | |||
| static void int32_to_float_fmul_scalar_altivec(float *dst, const int *src, | |||
| float mul, int len) | |||
| { | |||
| union { | |||
| vector float v; | |||
| @@ -36,7 +37,7 @@ static void int32_to_float_fmul_scalar_altivec(float *dst, const int *src, float | |||
| mul_u.s[0] = mul; | |||
| mul_v = vec_splat(mul_u.v, 0); | |||
| for(i=0; i<len; i+=8) { | |||
| for (i = 0; i < len; i += 8) { | |||
| src1 = vec_ctf(vec_ld(0, src+i), 0); | |||
| src2 = vec_ctf(vec_ld(16, src+i), 0); | |||
| dst1 = vec_madd(src1, mul_v, zero); | |||
| @@ -47,8 +48,7 @@ static void int32_to_float_fmul_scalar_altivec(float *dst, const int *src, float | |||
| } | |||
| static vector signed short | |||
| float_to_int16_one_altivec(const float *src) | |||
| static vector signed short float_to_int16_one_altivec(const float *src) | |||
| { | |||
| vector float s0 = vec_ld(0, src); | |||
| vector float s1 = vec_ld(16, src); | |||
| @@ -62,80 +62,82 @@ static void float_to_int16_altivec(int16_t *dst, const float *src, long len) | |||
| int i; | |||
| vector signed short d0, d1, d; | |||
| vector unsigned char align; | |||
| if(((long)dst)&15) //FIXME | |||
| for(i=0; i<len-7; i+=8) { | |||
| d0 = vec_ld(0, dst+i); | |||
| d = float_to_int16_one_altivec(src+i); | |||
| d1 = vec_ld(15, dst+i); | |||
| d1 = vec_perm(d1, d0, vec_lvsl(0,dst+i)); | |||
| align = vec_lvsr(0, dst+i); | |||
| d0 = vec_perm(d1, d, align); | |||
| d1 = vec_perm(d, d1, align); | |||
| vec_st(d0, 0, dst+i); | |||
| vec_st(d1,15, dst+i); | |||
| } | |||
| else | |||
| for(i=0; i<len-7; i+=8) { | |||
| d = float_to_int16_one_altivec(src+i); | |||
| vec_st(d, 0, dst+i); | |||
| if (((long)dst) & 15) { //FIXME | |||
| for (i = 0; i < len - 7; i += 8) { | |||
| d0 = vec_ld(0, dst+i); | |||
| d = float_to_int16_one_altivec(src + i); | |||
| d1 = vec_ld(15, dst+i); | |||
| d1 = vec_perm(d1, d0, vec_lvsl(0, dst + i)); | |||
| align = vec_lvsr(0, dst + i); | |||
| d0 = vec_perm(d1, d, align); | |||
| d1 = vec_perm(d, d1, align); | |||
| vec_st(d0, 0, dst + i); | |||
| vec_st(d1, 15, dst + i); | |||
| } | |||
| } else { | |||
| for (i = 0; i < len - 7; i += 8) { | |||
| d = float_to_int16_one_altivec(src + i); | |||
| vec_st(d, 0, dst + i); | |||
| } | |||
| } | |||
| } | |||
| static void | |||
| float_to_int16_interleave_altivec(int16_t *dst, const float **src, | |||
| long len, int channels) | |||
| static void float_to_int16_interleave_altivec(int16_t *dst, const float **src, | |||
| long len, int channels) | |||
| { | |||
| int i; | |||
| vector signed short d0, d1, d2, c0, c1, t0, t1; | |||
| vector unsigned char align; | |||
| if(channels == 1) | |||
| if (channels == 1) | |||
| float_to_int16_altivec(dst, src[0], len); | |||
| else | |||
| else { | |||
| if (channels == 2) { | |||
| if(((long)dst)&15) | |||
| for(i=0; i<len-7; i+=8) { | |||
| d0 = vec_ld(0, dst + i); | |||
| t0 = float_to_int16_one_altivec(src[0] + i); | |||
| d1 = vec_ld(31, dst + i); | |||
| t1 = float_to_int16_one_altivec(src[1] + i); | |||
| c0 = vec_mergeh(t0, t1); | |||
| c1 = vec_mergel(t0, t1); | |||
| d2 = vec_perm(d1, d0, vec_lvsl(0, dst + i)); | |||
| align = vec_lvsr(0, dst + i); | |||
| d0 = vec_perm(d2, c0, align); | |||
| d1 = vec_perm(c0, c1, align); | |||
| vec_st(d0, 0, dst + i); | |||
| d0 = vec_perm(c1, d2, align); | |||
| vec_st(d1, 15, dst + i); | |||
| vec_st(d0, 31, dst + i); | |||
| dst+=8; | |||
| } | |||
| else | |||
| for(i=0; i<len-7; i+=8) { | |||
| t0 = float_to_int16_one_altivec(src[0] + i); | |||
| t1 = float_to_int16_one_altivec(src[1] + i); | |||
| d0 = vec_mergeh(t0, t1); | |||
| d1 = vec_mergel(t0, t1); | |||
| vec_st(d0, 0, dst + i); | |||
| vec_st(d1, 16, dst + i); | |||
| dst+=8; | |||
| } | |||
| } else { | |||
| DECLARE_ALIGNED(16, int16_t, tmp)[len]; | |||
| int c, j; | |||
| for (c = 0; c < channels; c++) { | |||
| float_to_int16_altivec(tmp, src[c], len); | |||
| for (i = 0, j = c; i < len; i++, j+=channels) { | |||
| dst[j] = tmp[i]; | |||
| if (((long)dst) & 15) { | |||
| for (i = 0; i < len - 7; i += 8) { | |||
| d0 = vec_ld(0, dst + i); | |||
| t0 = float_to_int16_one_altivec(src[0] + i); | |||
| d1 = vec_ld(31, dst + i); | |||
| t1 = float_to_int16_one_altivec(src[1] + i); | |||
| c0 = vec_mergeh(t0, t1); | |||
| c1 = vec_mergel(t0, t1); | |||
| d2 = vec_perm(d1, d0, vec_lvsl(0, dst + i)); | |||
| align = vec_lvsr(0, dst + i); | |||
| d0 = vec_perm(d2, c0, align); | |||
| d1 = vec_perm(c0, c1, align); | |||
| vec_st(d0, 0, dst + i); | |||
| d0 = vec_perm(c1, d2, align); | |||
| vec_st(d1, 15, dst + i); | |||
| vec_st(d0, 31, dst + i); | |||
| dst += 8; | |||
| } | |||
| } else { | |||
| for (i = 0; i < len - 7; i += 8) { | |||
| t0 = float_to_int16_one_altivec(src[0] + i); | |||
| t1 = float_to_int16_one_altivec(src[1] + i); | |||
| d0 = vec_mergeh(t0, t1); | |||
| d1 = vec_mergel(t0, t1); | |||
| vec_st(d0, 0, dst + i); | |||
| vec_st(d1, 16, dst + i); | |||
| dst += 8; | |||
| } | |||
| } | |||
| } else { | |||
| DECLARE_ALIGNED(16, int16_t, tmp)[len]; | |||
| int c, j; | |||
| for (c = 0; c < channels; c++) { | |||
| float_to_int16_altivec(tmp, src[c], len); | |||
| for (i = 0, j = c; i < len; i++, j+=channels) | |||
| dst[j] = tmp[i]; | |||
| } | |||
| } | |||
| } | |||
| } | |||
| } | |||
| void ff_fmt_convert_init_altivec(FmtConvertContext *c, AVCodecContext *avctx) | |||
| { | |||
| c->int32_to_float_fmul_scalar = int32_to_float_fmul_scalar_altivec; | |||
| if(!(avctx->flags & CODEC_FLAG_BITEXACT)) { | |||
| if (!(avctx->flags & CODEC_FLAG_BITEXACT)) { | |||
| c->float_to_int16 = float_to_int16_altivec; | |||
| c->float_to_int16_interleave = float_to_int16_interleave_altivec; | |||
| } | |||