|
|
|
@@ -31,7 +31,8 @@ |
|
|
|
#include "yuv2rgb_altivec.h" |
|
|
|
#include "libavutil/ppc/util_altivec.h" |
|
|
|
|
|
|
|
#if HAVE_ALTIVEC && HAVE_BIGENDIAN |
|
|
|
#if HAVE_ALTIVEC |
|
|
|
#if HAVE_BIGENDIAN |
|
|
|
#define vzero vec_splat_s32(0) |
|
|
|
|
|
|
|
#define GET_LS(a,b,c,s) {\ |
|
|
|
@@ -102,7 +103,137 @@ |
|
|
|
#include "swscale_ppc_template.c" |
|
|
|
#undef FUNC |
|
|
|
|
|
|
|
#endif /* HAVE_ALTIVEC && HAVE_BIGENDIAN */ |
|
|
|
#undef vzero |
|
|
|
|
|
|
|
#endif /* HAVE_BIGENDIAN */ |
|
|
|
|
|
|
|
#define output_pixel(pos, val, bias, signedness) \ |
|
|
|
if (big_endian) { \ |
|
|
|
AV_WB16(pos, bias + av_clip_ ## signedness ## 16(val >> shift)); \ |
|
|
|
} else { \ |
|
|
|
AV_WL16(pos, bias + av_clip_ ## signedness ## 16(val >> shift)); \ |
|
|
|
} |
|
|
|
|
|
|
|
static void |
|
|
|
yuv2plane1_float_u(const int32_t *src, float *dest, int dstW, int start) |
|
|
|
{ |
|
|
|
static const int big_endian = HAVE_BIGENDIAN; |
|
|
|
static const int shift = 3; |
|
|
|
static const float float_mult = 1.0f / 65535.0f; |
|
|
|
int i, val; |
|
|
|
uint16_t val_uint; |
|
|
|
|
|
|
|
for (i = start; i < dstW; ++i){ |
|
|
|
val = src[i] + (1 << (shift - 1)); |
|
|
|
output_pixel(&val_uint, val, 0, uint); |
|
|
|
dest[i] = float_mult * (float)val_uint; |
|
|
|
} |
|
|
|
} |
|
|
|
|
|
|
|
static void |
|
|
|
yuv2plane1_float_bswap_u(const int32_t *src, uint32_t *dest, int dstW, int start) |
|
|
|
{ |
|
|
|
static const int big_endian = HAVE_BIGENDIAN; |
|
|
|
static const int shift = 3; |
|
|
|
static const float float_mult = 1.0f / 65535.0f; |
|
|
|
int i, val; |
|
|
|
uint16_t val_uint; |
|
|
|
|
|
|
|
for (i = start; i < dstW; ++i){ |
|
|
|
val = src[i] + (1 << (shift - 1)); |
|
|
|
output_pixel(&val_uint, val, 0, uint); |
|
|
|
dest[i] = av_bswap32(av_float2int(float_mult * (float)val_uint)); |
|
|
|
} |
|
|
|
} |
|
|
|
|
|
|
|
static void yuv2plane1_float_altivec(const int32_t *src, float *dest, int dstW) |
|
|
|
{ |
|
|
|
const int dst_u = -(uintptr_t)dest & 3; |
|
|
|
const int shift = 3; |
|
|
|
const int add = (1 << (shift - 1)); |
|
|
|
const int clip = (1 << 16) - 1; |
|
|
|
const float fmult = 1.0f / 65535.0f; |
|
|
|
const vector uint32_t vadd = (vector uint32_t) {add, add, add, add}; |
|
|
|
const vector uint32_t vshift = (vector uint32_t) vec_splat_u32(shift); |
|
|
|
const vector uint32_t vlargest = (vector uint32_t) {clip, clip, clip, clip}; |
|
|
|
const vector float vmul = (vector float) {fmult, fmult, fmult, fmult}; |
|
|
|
const vector float vzero = (vector float) {0, 0, 0, 0}; |
|
|
|
vector uint32_t v; |
|
|
|
vector float vd; |
|
|
|
int i; |
|
|
|
|
|
|
|
yuv2plane1_float_u(src, dest, dst_u, 0); |
|
|
|
|
|
|
|
for (i = dst_u; i < dstW - 3; i += 4) { |
|
|
|
v = vec_ld(0, (const uint32_t *) &src[i]); |
|
|
|
v = vec_add(v, vadd); |
|
|
|
v = vec_sr(v, vshift); |
|
|
|
v = vec_min(v, vlargest); |
|
|
|
|
|
|
|
vd = vec_ctf(v, 0); |
|
|
|
vd = vec_madd(vd, vmul, vzero); |
|
|
|
|
|
|
|
vec_st(vd, 0, &dest[i]); |
|
|
|
} |
|
|
|
|
|
|
|
yuv2plane1_float_u(src, dest, dstW, i); |
|
|
|
} |
|
|
|
|
|
|
|
static void yuv2plane1_float_bswap_altivec(const int32_t *src, uint32_t *dest, int dstW) |
|
|
|
{ |
|
|
|
const int dst_u = -(uintptr_t)dest & 3; |
|
|
|
const int shift = 3; |
|
|
|
const int add = (1 << (shift - 1)); |
|
|
|
const int clip = (1 << 16) - 1; |
|
|
|
const float fmult = 1.0f / 65535.0f; |
|
|
|
const vector uint32_t vadd = (vector uint32_t) {add, add, add, add}; |
|
|
|
const vector uint32_t vshift = (vector uint32_t) vec_splat_u32(shift); |
|
|
|
const vector uint32_t vlargest = (vector uint32_t) {clip, clip, clip, clip}; |
|
|
|
const vector float vmul = (vector float) {fmult, fmult, fmult, fmult}; |
|
|
|
const vector float vzero = (vector float) {0, 0, 0, 0}; |
|
|
|
const vector uint32_t vswapbig = (vector uint32_t) {16, 16, 16, 16}; |
|
|
|
const vector uint16_t vswapsmall = vec_splat_u16(8); |
|
|
|
vector uint32_t v; |
|
|
|
vector float vd; |
|
|
|
int i; |
|
|
|
|
|
|
|
yuv2plane1_float_bswap_u(src, dest, dst_u, 0); |
|
|
|
|
|
|
|
for (i = dst_u; i < dstW - 3; i += 4) { |
|
|
|
v = vec_ld(0, (const uint32_t *) &src[i]); |
|
|
|
v = vec_add(v, vadd); |
|
|
|
v = vec_sr(v, vshift); |
|
|
|
v = vec_min(v, vlargest); |
|
|
|
|
|
|
|
vd = vec_ctf(v, 0); |
|
|
|
vd = vec_madd(vd, vmul, vzero); |
|
|
|
|
|
|
|
vd = (vector float) vec_rl((vector uint32_t) vd, vswapbig); |
|
|
|
vd = (vector float) vec_rl((vector uint16_t) vd, vswapsmall); |
|
|
|
|
|
|
|
vec_st(vd, 0, (float *) &dest[i]); |
|
|
|
} |
|
|
|
|
|
|
|
yuv2plane1_float_bswap_u(src, dest, dstW, i); |
|
|
|
} |
|
|
|
|
|
|
|
#define yuv2plane1_float(template, dest_type, BE_LE) \ |
|
|
|
static void yuv2plane1_float ## BE_LE ## _altivec(const int16_t *src, uint8_t *dest, \ |
|
|
|
int dstW, \ |
|
|
|
const uint8_t *dither, int offset) \ |
|
|
|
{ \ |
|
|
|
template((const int32_t *)src, (dest_type *)dest, dstW); \ |
|
|
|
} |
|
|
|
|
|
|
|
#if HAVE_BIGENDIAN |
|
|
|
yuv2plane1_float(yuv2plane1_float_altivec, float, BE) |
|
|
|
yuv2plane1_float(yuv2plane1_float_bswap_altivec, uint32_t, LE) |
|
|
|
#else |
|
|
|
yuv2plane1_float(yuv2plane1_float_altivec, float, LE) |
|
|
|
yuv2plane1_float(yuv2plane1_float_bswap_altivec, uint32_t, BE) |
|
|
|
#endif |
|
|
|
|
|
|
|
#endif /* HAVE_ALTIVEC */ |
|
|
|
|
|
|
|
av_cold void ff_sws_init_swscale_ppc(SwsContext *c) |
|
|
|
{ |
|
|
|
@@ -124,6 +255,12 @@ av_cold void ff_sws_init_swscale_ppc(SwsContext *c) |
|
|
|
} |
|
|
|
#endif |
|
|
|
|
|
|
|
if (dstFormat == AV_PIX_FMT_GRAYF32BE) { |
|
|
|
c->yuv2plane1 = yuv2plane1_floatBE_altivec; |
|
|
|
} else if (dstFormat == AV_PIX_FMT_GRAYF32LE) { |
|
|
|
c->yuv2plane1 = yuv2plane1_floatLE_altivec; |
|
|
|
} |
|
|
|
|
|
|
|
/* The following list of supported dstFormat values should |
|
|
|
* match what's found in the body of ff_yuv2packedX_altivec() */ |
|
|
|
if (!(c->flags & (SWS_BITEXACT | SWS_FULL_CHR_H_INT)) && !c->needAlpha) { |
|
|
|
|