Browse Source

remove neon intrinsics and fix indentation

pull/811/head
Claudio Cabral Claudio Cabral 4 years ago
parent
commit
d8e9c1614e
1 changed files with 9 additions and 143 deletions
  1. +9
    -143
      common/memops.c

+ 9
- 143
common/memops.c View File

@@ -236,26 +236,6 @@ static inline float32x4_t clip(float32x4_t s, float32x4_t min, float32x4_t max)
return vminq_f32(max, vmaxq_f32(s, min));
}

static inline float32x4_t float_32_multiply_extended(float32x4_t samples, double factor)
{
float64x2_t factor64 = vdupq_n_f64(factor);
float64x2_t lower_elements = vcvt_f64_f32(vget_low_f32(samples));
float64x2_t upper_elements = vcvt_high_f64_f32(samples);
lower_elements = vmulq_f64(lower_elements, factor64);
upper_elements = vmulq_f64(upper_elements, factor64);
float32x2_t lower_down_scaled = vcvt_f32_f64(lower_elements);
return vcvt_high_f32_f64(lower_down_scaled, upper_elements);
}

static inline int32x4_t float_32_neon(float32x4_t s)
{
const float32x4_t upper_bound = vdupq_n_f32(NORMALIZED_FLOAT_MAX);
const float32x4_t lower_bound = vdupq_n_f32(NORMALIZED_FLOAT_MIN);

float32x4_t clipped = clip(s, lower_bound, upper_bound);
return vcvtq_s32_f32(float_32_multiply_extended(clipped, SAMPLE_32BIT_SCALING));
}

static inline int32x4_t float_24_neon(float32x4_t s)
{
const float32x4_t upper_bound = vdupq_n_f32(NORMALIZED_FLOAT_MAX);
@@ -335,37 +315,9 @@ void sample_move_dS_floatLE (char *dst, jack_default_audio_sample_t *src, unsign

void sample_move_d32_sSs (char *dst, jack_default_audio_sample_t *src, unsigned long nsamples, unsigned long dst_skip, dither_state_t *state)
{
#if defined (__ARM_NEON__) || defined (__ARM_NEON)
unsigned long unrolled = nsamples / 4;
nsamples = nsamples & 3;

while (unrolled--) {
float32x4_t samples = vld1q_f32(src);
int32x4_t converted = float_32_neon(samples);
converted = vreinterpretq_s32_u8(vrev32q_u8(vreinterpretq_u8_s32(converted)));

switch(dst_skip) {
case 4:
vst1q_s32((int32_t*)dst, converted);
break;
default:
vst1q_lane_s32((int32_t*)(dst), converted, 0);
vst1q_lane_s32((int32_t*)(dst+dst_skip), converted, 1);
vst1q_lane_s32((int32_t*)(dst+2*dst_skip), converted, 2);
vst1q_lane_s32((int32_t*)(dst+3*dst_skip), converted, 3);
break;
}
dst += 4*dst_skip;
src+= 4;
}
#endif

int32_t z;

while (nsamples--) {
int32_t z;
float_32 (*src, z);

#if __BYTE_ORDER == __LITTLE_ENDIAN
dst[0]=(char)(z>>24);
dst[1]=(char)(z>>16);
@@ -384,40 +336,16 @@ void sample_move_d32_sSs (char *dst, jack_default_audio_sample_t *src, unsigned

void sample_move_d32_sS (char *dst, jack_default_audio_sample_t *src, unsigned long nsamples, unsigned long dst_skip, dither_state_t *state)
{
#if defined (__ARM_NEON__) || defined (__ARM_NEON)
unsigned long unrolled = nsamples / 4;
nsamples = nsamples & 3;

while (unrolled--) {
float32x4_t samples = vld1q_f32(src);
int32x4_t converted = float_32_neon(samples);

switch(dst_skip) {
case 4:
vst1q_s32((int32_t*)dst, converted);
break;
default:
vst1q_lane_s32((int32_t*)(dst), converted, 0);
vst1q_lane_s32((int32_t*)(dst+dst_skip), converted, 1);
vst1q_lane_s32((int32_t*)(dst+2*dst_skip), converted, 2);
vst1q_lane_s32((int32_t*)(dst+3*dst_skip), converted, 3);
break;
}
dst += 4*dst_skip;
while (nsamples--) {
double sample = *((float *)src);
double clipped = fmin(1.0, fmax(sample, -1.0));
double scaled = clipped * SAMPLE_32BIT_MAX_F;
int y = (int)scaled;
*((int *) dst) = y;

src+= 4;
dst += dst_skip;
src++;
}
#endif
while (nsamples--) {
double sample = *((float *)src);
double clipped = fmin(1.0, fmax(sample, -1.0));
double scaled = clipped * SAMPLE_32BIT_MAX_F;
int y = (int)scaled;
*((int *) dst) = y;

dst += dst_skip;
src++;
}
}

void sample_move_d32u24_sSs (char *dst, jack_default_audio_sample_t *src, unsigned long nsamples, unsigned long dst_skip, dither_state_t *state)
@@ -822,39 +750,6 @@ void sample_move_d32l24_sS (char *dst, jack_default_audio_sample_t *src, unsigne
void sample_move_dS_s32s (jack_default_audio_sample_t *dst, char *src, unsigned long nsamples, unsigned long src_skip)
{
const jack_default_audio_sample_t scaling = 1.0/SAMPLE_32BIT_SCALING;
#if defined (__ARM_NEON__) || defined (__ARM_NEON)
unsigned long unrolled = nsamples / 4;
while (unrolled--) {
uint32x4_t src128;
switch(src_skip)
{
case 4:
src128 = vld1q_u32((uint32_t*)src);
break;
case 8:
src128 = vld2q_u32((uint32_t*)src).val[0];
break;
default:
src128 = vld1q_lane_u32((uint32_t*)src, src128, 0);
src128 = vld1q_lane_u32((uint32_t*)(src+src_skip), src128, 1);
src128 = vld1q_lane_u32((uint32_t*)(src+2*src_skip), src128, 2);
src128 = vld1q_lane_u32((uint32_t*)(src+3*src_skip), src128, 3);
break;
}
src128 = vreinterpretq_u32_u8(vrev32q_u8(vreinterpretq_u8_u32(src128)));
float32x4_t samples = vcvtq_f32_s32(vreinterpretq_s32_u32(src128));
samples = float_32_multiply_extended(samples, scaling);
vst1q_f32(dst, samples);

src += 4*src_skip;
dst += 4;
}
nsamples = nsamples & 3;
#endif

/* ALERT: signed sign-extension portability !!! */


while (nsamples--) {
int32_t x;
#if __BYTE_ORDER == __LITTLE_ENDIAN
@@ -948,35 +843,6 @@ void sample_move_dS_s32l24s (jack_default_audio_sample_t *dst, char *src, unsign
void sample_move_dS_s32 (jack_default_audio_sample_t *dst, char *src, unsigned long nsamples, unsigned long src_skip)
{
const double scaling = 1.0 / SAMPLE_32BIT_SCALING;
#if defined (__ARM_NEON__) || defined (__ARM_NEON)
unsigned long unrolled = nsamples / 4;
nsamples = nsamples & 3;
while (unrolled--) {
uint32x4_t src128;
switch(src_skip) {
case 4:
src128 = vld1q_u32((uint32_t*)src);
break;
case 8:
src128 = vld2q_u32((uint32_t*)src).val[0];
break;
default:
src128 = vld1q_lane_u32((uint32_t*)src, src128, 0);
src128 = vld1q_lane_u32((uint32_t*)(src+src_skip), src128, 1);
src128 = vld1q_lane_u32((uint32_t*)(src+2*src_skip), src128, 2);
src128 = vld1q_lane_u32((uint32_t*)(src+3*src_skip), src128, 3);
break;
}

float32x4_t samples = vcvtq_f32_s32(vreinterpretq_s32_u32(src128));
samples = float_32_multiply_extended(samples, scaling);
vst1q_f32(dst, samples);

src += 4*src_skip;
dst += 4;
}
#endif

while (nsamples--) {
int32_t val=(*((int32_t*)src));
double extended = val * scaling;


Loading…
Cancel
Save