diff --git a/common/memops.c b/common/memops.c index 3f1b67e1..6cb39172 100644 --- a/common/memops.c +++ b/common/memops.c @@ -236,26 +236,6 @@ static inline float32x4_t clip(float32x4_t s, float32x4_t min, float32x4_t max) return vminq_f32(max, vmaxq_f32(s, min)); } -static inline float32x4_t float_32_multiply_extended(float32x4_t samples, double factor) -{ - float64x2_t factor64 = vdupq_n_f64(factor); - float64x2_t lower_elements = vcvt_f64_f32(vget_low_f32(samples)); - float64x2_t upper_elements = vcvt_high_f64_f32(samples); - lower_elements = vmulq_f64(lower_elements, factor64); - upper_elements = vmulq_f64(upper_elements, factor64); - float32x2_t lower_down_scaled = vcvt_f32_f64(lower_elements); - return vcvt_high_f32_f64(lower_down_scaled, upper_elements); -} - -static inline int32x4_t float_32_neon(float32x4_t s) -{ - const float32x4_t upper_bound = vdupq_n_f32(NORMALIZED_FLOAT_MAX); - const float32x4_t lower_bound = vdupq_n_f32(NORMALIZED_FLOAT_MIN); - - float32x4_t clipped = clip(s, lower_bound, upper_bound); - return vcvtq_s32_f32(float_32_multiply_extended(clipped, SAMPLE_32BIT_SCALING)); -} - static inline int32x4_t float_24_neon(float32x4_t s) { const float32x4_t upper_bound = vdupq_n_f32(NORMALIZED_FLOAT_MAX); @@ -335,37 +315,9 @@ void sample_move_dS_floatLE (char *dst, jack_default_audio_sample_t *src, unsign void sample_move_d32_sSs (char *dst, jack_default_audio_sample_t *src, unsigned long nsamples, unsigned long dst_skip, dither_state_t *state) { -#if defined (__ARM_NEON__) || defined (__ARM_NEON) - unsigned long unrolled = nsamples / 4; - nsamples = nsamples & 3; - - while (unrolled--) { - float32x4_t samples = vld1q_f32(src); - int32x4_t converted = float_32_neon(samples); - converted = vreinterpretq_s32_u8(vrev32q_u8(vreinterpretq_u8_s32(converted))); - - switch(dst_skip) { - case 4: - vst1q_s32((int32_t*)dst, converted); - break; - default: - vst1q_lane_s32((int32_t*)(dst), converted, 0); - vst1q_lane_s32((int32_t*)(dst+dst_skip), converted, 1); - vst1q_lane_s32((int32_t*)(dst+2*dst_skip), converted, 2); - vst1q_lane_s32((int32_t*)(dst+3*dst_skip), converted, 3); - break; - } - dst += 4*dst_skip; - src+= 4; - } -#endif - - int32_t z; - while (nsamples--) { - + int32_t z; float_32 (*src, z); - #if __BYTE_ORDER == __LITTLE_ENDIAN dst[0]=(char)(z>>24); dst[1]=(char)(z>>16); @@ -384,40 +336,16 @@ void sample_move_d32_sSs (char *dst, jack_default_audio_sample_t *src, unsigned void sample_move_d32_sS (char *dst, jack_default_audio_sample_t *src, unsigned long nsamples, unsigned long dst_skip, dither_state_t *state) { -#if defined (__ARM_NEON__) || defined (__ARM_NEON) - unsigned long unrolled = nsamples / 4; - nsamples = nsamples & 3; - - while (unrolled--) { - float32x4_t samples = vld1q_f32(src); - int32x4_t converted = float_32_neon(samples); - - switch(dst_skip) { - case 4: - vst1q_s32((int32_t*)dst, converted); - break; - default: - vst1q_lane_s32((int32_t*)(dst), converted, 0); - vst1q_lane_s32((int32_t*)(dst+dst_skip), converted, 1); - vst1q_lane_s32((int32_t*)(dst+2*dst_skip), converted, 2); - vst1q_lane_s32((int32_t*)(dst+3*dst_skip), converted, 3); - break; - } - dst += 4*dst_skip; + while (nsamples--) { + double sample = *((float *)src); + double clipped = fmin(1.0, fmax(sample, -1.0)); + double scaled = clipped * SAMPLE_32BIT_MAX_F; + int y = (int)scaled; + *((int *) dst) = y; - src+= 4; + dst += dst_skip; + src++; } -#endif - while (nsamples--) { - double sample = *((float *)src); - double clipped = fmin(1.0, fmax(sample, -1.0)); - double scaled = clipped * SAMPLE_32BIT_MAX_F; - int y = (int)scaled; - *((int *) dst) = y; - - dst += dst_skip; - src++; - } } void sample_move_d32u24_sSs (char *dst, jack_default_audio_sample_t *src, unsigned long nsamples, unsigned long dst_skip, dither_state_t *state) @@ -822,39 +750,6 @@ void sample_move_d32l24_sS (char *dst, jack_default_audio_sample_t *src, unsigne void sample_move_dS_s32s (jack_default_audio_sample_t *dst, char *src, unsigned long nsamples, unsigned long src_skip) { const jack_default_audio_sample_t scaling = 1.0/SAMPLE_32BIT_SCALING; -#if defined (__ARM_NEON__) || defined (__ARM_NEON) - unsigned long unrolled = nsamples / 4; - while (unrolled--) { - uint32x4_t src128; - switch(src_skip) - { - case 4: - src128 = vld1q_u32((uint32_t*)src); - break; - case 8: - src128 = vld2q_u32((uint32_t*)src).val[0]; - break; - default: - src128 = vld1q_lane_u32((uint32_t*)src, src128, 0); - src128 = vld1q_lane_u32((uint32_t*)(src+src_skip), src128, 1); - src128 = vld1q_lane_u32((uint32_t*)(src+2*src_skip), src128, 2); - src128 = vld1q_lane_u32((uint32_t*)(src+3*src_skip), src128, 3); - break; - } - src128 = vreinterpretq_u32_u8(vrev32q_u8(vreinterpretq_u8_u32(src128))); - float32x4_t samples = vcvtq_f32_s32(vreinterpretq_s32_u32(src128)); - samples = float_32_multiply_extended(samples, scaling); - vst1q_f32(dst, samples); - - src += 4*src_skip; - dst += 4; - } - nsamples = nsamples & 3; -#endif - - /* ALERT: signed sign-extension portability !!! */ - - while (nsamples--) { int32_t x; #if __BYTE_ORDER == __LITTLE_ENDIAN @@ -948,35 +843,6 @@ void sample_move_dS_s32l24s (jack_default_audio_sample_t *dst, char *src, unsign void sample_move_dS_s32 (jack_default_audio_sample_t *dst, char *src, unsigned long nsamples, unsigned long src_skip) { const double scaling = 1.0 / SAMPLE_32BIT_SCALING; -#if defined (__ARM_NEON__) || defined (__ARM_NEON) - unsigned long unrolled = nsamples / 4; - nsamples = nsamples & 3; - while (unrolled--) { - uint32x4_t src128; - switch(src_skip) { - case 4: - src128 = vld1q_u32((uint32_t*)src); - break; - case 8: - src128 = vld2q_u32((uint32_t*)src).val[0]; - break; - default: - src128 = vld1q_lane_u32((uint32_t*)src, src128, 0); - src128 = vld1q_lane_u32((uint32_t*)(src+src_skip), src128, 1); - src128 = vld1q_lane_u32((uint32_t*)(src+2*src_skip), src128, 2); - src128 = vld1q_lane_u32((uint32_t*)(src+3*src_skip), src128, 3); - break; - } - - float32x4_t samples = vcvtq_f32_s32(vreinterpretq_s32_u32(src128)); - samples = float_32_multiply_extended(samples, scaling); - vst1q_f32(dst, samples); - - src += 4*src_skip; - dst += 4; - } -#endif - while (nsamples--) { int32_t val=(*((int32_t*)src)); double extended = val * scaling;