diff --git a/common/memops.c b/common/memops.c index 5d9f229d..17a2e5e0 100644 --- a/common/memops.c +++ b/common/memops.c @@ -83,8 +83,6 @@ #define SAMPLE_24BIT_MAX 8388607 #define SAMPLE_24BIT_MIN -8388607 -#define SAMPLE_24BIT_MAX_F 8388607.0f -#define SAMPLE_24BIT_MIN_F -8388607.0f #define SAMPLE_16BIT_MAX 32767 #define SAMPLE_16BIT_MIN -32767 @@ -128,13 +126,13 @@ (d) = f_round ((s));\ } -#define float_24u32(s, d) \ +#define float_32(s, d, scale) \ if ((s) <= NORMALIZED_FLOAT_MIN) {\ - (d) = SAMPLE_24BIT_MIN;\ + (d) = -scale;\ } else if ((s) >= NORMALIZED_FLOAT_MAX) {\ - (d) = SAMPLE_24BIT_MAX;\ + (d) = scale;\ } else {\ - (d) = f_round ((s) * SAMPLE_24BIT_SCALING);\ + (d) = f_round ((s) * scale);\ } @@ -182,13 +180,13 @@ static inline float32x4_t clip(float32x4_t s, float32x4_t min, float32x4_t max) return vminq_f32(max, vmaxq_f32(s, min)); } -static inline int32x4_t float_24_neon(float32x4_t s) +static inline int32x4_t float_32_neon(float32x4_t s, const int32_t scaling) { const float32x4_t upper_bound = vdupq_n_f32(NORMALIZED_FLOAT_MAX); const float32x4_t lower_bound = vdupq_n_f32(NORMALIZED_FLOAT_MIN); float32x4_t clipped = clip(s, lower_bound, upper_bound); - float32x4_t scaled = vmulq_f32(clipped, vdupq_n_f32(SAMPLE_24BIT_SCALING)); + float32x4_t scaled = vmulq_f32(clipped, vdupq_n_f32(scaling)); return vcvtq_s32_f32(scaled); } @@ -256,7 +254,7 @@ void sample_move_dS_floatLE (char *dst, jack_default_audio_sample_t *src, unsign /* functions for native integer sample data */ -void sample_move_d32u24_sSs (char *dst, jack_default_audio_sample_t *src, unsigned long nsamples, unsigned long dst_skip, dither_state_t *state) +static inline void sample_move_d32scal_sSs (char *dst, jack_default_audio_sample_t *src, unsigned long nsamples, unsigned long dst_skip, dither_state_t *state, const int32_t scaling) { #if defined (__ARM_NEON__) || defined (__ARM_NEON) unsigned long unrolled = nsamples / 4; @@ -264,7 +262,7 @@ void sample_move_d32u24_sSs (char *dst, jack_default_audio_sample_t *src, unsign while (unrolled--) { float32x4_t samples = vld1q_f32(src); - int32x4_t converted = float_24_neon(samples); + int32x4_t converted = float_32_neon(samples, scaling); converted = vreinterpretq_s32_u8(vrev32q_u8(vreinterpretq_u8_s32(converted))); switch(dst_skip) { @@ -287,7 +285,7 @@ void sample_move_d32u24_sSs (char *dst, jack_default_audio_sample_t *src, unsign while (nsamples--) { - float_24u32 (*src, z); + float_32 (*src, z, scaling); #if __BYTE_ORDER == __LITTLE_ENDIAN dst[0]=(char)(z>>24); @@ -303,12 +301,18 @@ void sample_move_d32u24_sSs (char *dst, jack_default_audio_sample_t *src, unsign dst += dst_skip; src++; } -} +} -void sample_move_d32u24_sS (char *dst, jack_default_audio_sample_t *src, unsigned long nsamples, unsigned long dst_skip, dither_state_t *state) +void sample_move_d32u24_sSs (char *dst, jack_default_audio_sample_t *src, unsigned long nsamples, unsigned long dst_skip, dither_state_t *state) +{ + sample_move_d32scal_sSs (dst, src, nsamples, dst_skip, state, SAMPLE_24BIT_SCALING); +} + + +static inline void sample_move_d32scal_sS (char *dst, jack_default_audio_sample_t *src, unsigned long nsamples, unsigned long dst_skip, dither_state_t *state, const int32_t scaling) { #if defined (__SSE2__) && !defined (__sun__) - __m128 int_max = _mm_set1_ps(SAMPLE_24BIT_MAX_F); + __m128 int_max = _mm_set1_ps(scaling); __m128 int_min = _mm_sub_ps(_mm_setzero_ps(), int_max); __m128 factor = int_max; @@ -361,7 +365,7 @@ void sample_move_d32u24_sS (char *dst, jack_default_audio_sample_t *src, unsigne while (unrolled--) { float32x4_t samples = vld1q_f32(src); - int32x4_t converted = float_24_neon(samples); + int32x4_t converted = float_32_neon(samples, scaling); switch(dst_skip) { case 4: @@ -382,12 +386,18 @@ void sample_move_d32u24_sS (char *dst, jack_default_audio_sample_t *src, unsigne #if !defined (__SSE2__) while (nsamples--) { - float_24u32 (*src, *((int32_t*) dst)); + float_32 (*src, *((int32_t*) dst), scaling); dst += dst_skip; src++; } #endif -} +} + +void sample_move_d32u24_sS (char *dst, jack_default_audio_sample_t *src, unsigned long nsamples, unsigned long dst_skip, dither_state_t *state) +{ + sample_move_d32scal_sS (dst, src, nsamples, dst_skip, state, SAMPLE_24BIT_SCALING); +} + void sample_move_dS_s32u24s (jack_default_audio_sample_t *dst, char *src, unsigned long nsamples, unsigned long src_skip) { @@ -533,7 +543,7 @@ void sample_move_d24_sSs (char *dst, jack_default_audio_sample_t *src, unsigned int i; int32_t z[4]; float32x4_t samples = vld1q_f32(src); - int32x4_t converted = float_24_neon(samples); + int32x4_t converted = float_32_neon(samples, SAMPLE_24BIT_SCALING); converted = vreinterpretq_s32_u8(vrev32q_u8(vreinterpretq_u8_s32(converted))); vst1q_s32(z, converted); @@ -604,7 +614,7 @@ void sample_move_d24_sS (char *dst, jack_default_audio_sample_t *src, unsigned l int i; int32_t z[4]; float32x4_t samples = vld1q_f32(src); - int32x4_t converted = float_24_neon(samples); + int32x4_t converted = float_32_neon(samples, SAMPLE_24BIT_SCALING); vst1q_s32(z, converted); for (i = 0; i != 4; ++i) {