From dafbc20580c572588a6a866fe3bcda849226cb65 Mon Sep 17 00:00:00 2001 From: Timo Wischer Date: Tue, 20 Nov 2018 16:47:37 +0100 Subject: [PATCH] memops: Align S24LE and S32LE to float conversion Due to the optimization of the compiler this commit does not introduce any differences in the machine instructions. Tested with GCC version 6.2.0 on ARM64 and x86_64. Change-Id: I2cdb003f0627e30c0593395769d4274e71af1393 Signed-off-by: Timo Wischer --- common/memops.c | 65 ++++++++++++++++++++++++++++++++++--------------- 1 file changed, 46 insertions(+), 19 deletions(-) diff --git a/common/memops.c b/common/memops.c index b7d76a11..c66a8617 100644 --- a/common/memops.c +++ b/common/memops.c @@ -29,6 +29,7 @@ #include #include #include +#include #include #ifdef __linux__ #include @@ -412,9 +413,10 @@ void sample_move_d32_sS (char *dst, jack_default_audio_sample_t *src, unsigned l } -void sample_move_dS_s32u24s (jack_default_audio_sample_t *dst, char *src, unsigned long nsamples, unsigned long src_skip) +static inline void sample_move_dS_s32s_signext (jack_default_audio_sample_t *dst, char *src, unsigned long nsamples, unsigned long src_skip, const bool do_signext) { - const jack_default_audio_sample_t scaling = 1.0 / (SAMPLE_24BIT_SCALING << 8); + const jack_default_audio_sample_t scaling_divisor = do_signext ? (SAMPLE_24BIT_SCALING << 8) : SAMPLE_32BIT_SCALING; + const jack_default_audio_sample_t scaling = 1.0 / scaling_divisor; #if defined (__ARM_NEON__) || defined (__ARM_NEON) float32x4_t factor = vdupq_n_f32(scaling); @@ -437,9 +439,11 @@ void sample_move_dS_s32u24s (jack_default_audio_sample_t *dst, char *src, unsign break; } src128 = vreinterpretq_s32_u8(vrev32q_u8(vreinterpretq_u8_s32(src128))); - /* sign extension - left shift will be reverted by scaling */ - int32x4_t shifted = vshlq_n_s32(src128, 8); - float32x4_t as_float = vcvtq_f32_s32(shifted); + if (do_signext) { + /* sign extension - left shift will be reverted by scaling */ + src128 = vshlq_n_s32(src128, 8); + } + float32x4_t as_float = vcvtq_f32_s32(src128); float32x4_t divided = vmulq_f32(as_float, factor); vst1q_f32(dst, divided); @@ -470,16 +474,26 @@ void sample_move_dS_s32u24s (jack_default_audio_sample_t *dst, char *src, unsign x <<= 8; x |= (unsigned char)(src[0]); #endif - /* sign extension - left shift will be reverted by scaling */ - *dst = (x << 8) * scaling; + if (do_signext) { + /* sign extension - left shift will be reverted by scaling */ + x <<= 8; + } + *dst = x * scaling; dst++; src += src_skip; } } -void sample_move_dS_s32u24 (jack_default_audio_sample_t *dst, char *src, unsigned long nsamples, unsigned long src_skip) +void sample_move_dS_s32u24s (jack_default_audio_sample_t *dst, char *src, unsigned long nsamples, unsigned long src_skip) +{ + sample_move_dS_s32s_signext (dst, src, nsamples, src_skip, true); +} + + +static inline void sample_move_dS_s32_signext (jack_default_audio_sample_t *dst, char *src, unsigned long nsamples, unsigned long src_skip, const bool do_signext) { - const jack_default_audio_sample_t scaling = 1.0 / (SAMPLE_24BIT_SCALING << 8); + const jack_default_audio_sample_t scaling_divisor = do_signext ? (SAMPLE_24BIT_SCALING << 8) : SAMPLE_32BIT_SCALING; + const jack_default_audio_sample_t scaling = 1.0 / scaling_divisor; #if defined (__SSE2__) && !defined (__sun__) unsigned long unrolled = nsamples / 4; @@ -495,11 +509,12 @@ void sample_move_dS_s32u24 (jack_default_audio_sample_t *dst, char *src, unsigne int i4 = *((int *) src); src+= src_skip; - __m128i src = _mm_set_epi32(i4, i3, i2, i1); - /* sign extension - left shift will be reverted by scaling */ - __m128i shifted = _mm_slli_epi32(src, 8); - - __m128 as_float = _mm_cvtepi32_ps(shifted); + __m128i src128 = _mm_set_epi32(i4, i3, i2, i1); + if (do_signext) { + /* sign extension - left shift will be reverted by scaling */ + src128 = _mm_slli_epi32(src128, 8); + } + __m128 as_float = _mm_cvtepi32_ps(src128); __m128 divided = _mm_mul_ps(as_float, factor); _mm_storeu_ps(dst, divided); @@ -526,9 +541,11 @@ void sample_move_dS_s32u24 (jack_default_audio_sample_t *dst, char *src, unsigne src128 = vld1q_lane_s32((int32_t*)(src+3*src_skip), src128, 3); break; } - /* sign extension - left shift will be reverted by scaling */ - int32x4_t shifted = vshlq_n_s32(src128, 8); - float32x4_t as_float = vcvtq_f32_s32(shifted); + if (do_signext) { + /* sign extension - left shift will be reverted by scaling */ + src128 = vshlq_n_s32(src128, 8); + } + float32x4_t as_float = vcvtq_f32_s32(src128); float32x4_t divided = vmulq_f32(as_float, factor); vst1q_f32(dst, divided); @@ -541,13 +558,23 @@ void sample_move_dS_s32u24 (jack_default_audio_sample_t *dst, char *src, unsigne /* ALERT: signed sign-extension portability !!! */ while (nsamples--) { - /* sign extension - left shift will be reverted by scaling */ - *dst = (*((int *) src) << 8) * scaling; + int src32 = *((int *) src); + if (do_signext) { + /* sign extension - left shift will be reverted by scaling */ + src32 <<= 8; + } + *dst = src32 * scaling; dst++; src += src_skip; } } +void sample_move_dS_s32u24 (jack_default_audio_sample_t *dst, char *src, unsigned long nsamples, unsigned long src_skip) +{ + sample_move_dS_s32_signext (dst, src, nsamples, src_skip, true); +} + + void sample_move_d24_sSs (char *dst, jack_default_audio_sample_t *src, unsigned long nsamples, unsigned long dst_skip, dither_state_t *state) { #if defined (__ARM_NEON__) || defined (__ARM_NEON)