Browse Source

memops: Align float to S24LE and S32LE conversion

Signed-off-by: Timo Wischer <twischer@de.adit-jv.com>
tags/v1.9.13
Timo Wischer Filipe Coelho <falktx@falktx.com> 6 years ago
parent
commit
4455fe020c
1 changed files with 29 additions and 19 deletions
  1. +29
    -19
      common/memops.c

+ 29
- 19
common/memops.c View File

@@ -83,8 +83,6 @@


#define SAMPLE_24BIT_MAX 8388607 #define SAMPLE_24BIT_MAX 8388607
#define SAMPLE_24BIT_MIN -8388607 #define SAMPLE_24BIT_MIN -8388607
#define SAMPLE_24BIT_MAX_F 8388607.0f
#define SAMPLE_24BIT_MIN_F -8388607.0f


#define SAMPLE_16BIT_MAX 32767 #define SAMPLE_16BIT_MAX 32767
#define SAMPLE_16BIT_MIN -32767 #define SAMPLE_16BIT_MIN -32767
@@ -128,13 +126,13 @@
(d) = f_round ((s));\ (d) = f_round ((s));\
} }


#define float_24u32(s, d) \
#define float_32(s, d, scale) \
if ((s) <= NORMALIZED_FLOAT_MIN) {\ if ((s) <= NORMALIZED_FLOAT_MIN) {\
(d) = SAMPLE_24BIT_MIN;\
(d) = -scale;\
} else if ((s) >= NORMALIZED_FLOAT_MAX) {\ } else if ((s) >= NORMALIZED_FLOAT_MAX) {\
(d) = SAMPLE_24BIT_MAX;\
(d) = scale;\
} else {\ } else {\
(d) = f_round ((s) * SAMPLE_24BIT_SCALING);\
(d) = f_round ((s) * scale);\
} }




@@ -182,13 +180,13 @@ static inline float32x4_t clip(float32x4_t s, float32x4_t min, float32x4_t max)
return vminq_f32(max, vmaxq_f32(s, min)); return vminq_f32(max, vmaxq_f32(s, min));
} }


static inline int32x4_t float_24_neon(float32x4_t s)
static inline int32x4_t float_32_neon(float32x4_t s, const int32_t scaling)
{ {
const float32x4_t upper_bound = vdupq_n_f32(NORMALIZED_FLOAT_MAX); const float32x4_t upper_bound = vdupq_n_f32(NORMALIZED_FLOAT_MAX);
const float32x4_t lower_bound = vdupq_n_f32(NORMALIZED_FLOAT_MIN); const float32x4_t lower_bound = vdupq_n_f32(NORMALIZED_FLOAT_MIN);


float32x4_t clipped = clip(s, lower_bound, upper_bound); float32x4_t clipped = clip(s, lower_bound, upper_bound);
float32x4_t scaled = vmulq_f32(clipped, vdupq_n_f32(SAMPLE_24BIT_SCALING));
float32x4_t scaled = vmulq_f32(clipped, vdupq_n_f32(scaling));
return vcvtq_s32_f32(scaled); return vcvtq_s32_f32(scaled);
} }


@@ -256,7 +254,7 @@ void sample_move_dS_floatLE (char *dst, jack_default_audio_sample_t *src, unsign


/* functions for native integer sample data */ /* functions for native integer sample data */


void sample_move_d32u24_sSs (char *dst, jack_default_audio_sample_t *src, unsigned long nsamples, unsigned long dst_skip, dither_state_t *state)
static inline void sample_move_d32scal_sSs (char *dst, jack_default_audio_sample_t *src, unsigned long nsamples, unsigned long dst_skip, dither_state_t *state, const int32_t scaling)
{ {
#if defined (__ARM_NEON__) || defined (__ARM_NEON) #if defined (__ARM_NEON__) || defined (__ARM_NEON)
unsigned long unrolled = nsamples / 4; unsigned long unrolled = nsamples / 4;
@@ -264,7 +262,7 @@ void sample_move_d32u24_sSs (char *dst, jack_default_audio_sample_t *src, unsign


while (unrolled--) { while (unrolled--) {
float32x4_t samples = vld1q_f32(src); float32x4_t samples = vld1q_f32(src);
int32x4_t converted = float_24_neon(samples);
int32x4_t converted = float_32_neon(samples, scaling);
converted = vreinterpretq_s32_u8(vrev32q_u8(vreinterpretq_u8_s32(converted))); converted = vreinterpretq_s32_u8(vrev32q_u8(vreinterpretq_u8_s32(converted)));


switch(dst_skip) { switch(dst_skip) {
@@ -287,7 +285,7 @@ void sample_move_d32u24_sSs (char *dst, jack_default_audio_sample_t *src, unsign


while (nsamples--) { while (nsamples--) {


float_24u32 (*src, z);
float_32 (*src, z, scaling);


#if __BYTE_ORDER == __LITTLE_ENDIAN #if __BYTE_ORDER == __LITTLE_ENDIAN
dst[0]=(char)(z>>24); dst[0]=(char)(z>>24);
@@ -303,12 +301,18 @@ void sample_move_d32u24_sSs (char *dst, jack_default_audio_sample_t *src, unsign
dst += dst_skip; dst += dst_skip;
src++; src++;
} }
}
}


void sample_move_d32u24_sS (char *dst, jack_default_audio_sample_t *src, unsigned long nsamples, unsigned long dst_skip, dither_state_t *state)
void sample_move_d32u24_sSs (char *dst, jack_default_audio_sample_t *src, unsigned long nsamples, unsigned long dst_skip, dither_state_t *state)
{
sample_move_d32scal_sSs (dst, src, nsamples, dst_skip, state, SAMPLE_24BIT_SCALING);
}


static inline void sample_move_d32scal_sS (char *dst, jack_default_audio_sample_t *src, unsigned long nsamples, unsigned long dst_skip, dither_state_t *state, const int32_t scaling)
{ {
#if defined (__SSE2__) && !defined (__sun__) #if defined (__SSE2__) && !defined (__sun__)
__m128 int_max = _mm_set1_ps(SAMPLE_24BIT_MAX_F);
__m128 int_max = _mm_set1_ps(scaling);
__m128 int_min = _mm_sub_ps(_mm_setzero_ps(), int_max); __m128 int_min = _mm_sub_ps(_mm_setzero_ps(), int_max);
__m128 factor = int_max; __m128 factor = int_max;


@@ -361,7 +365,7 @@ void sample_move_d32u24_sS (char *dst, jack_default_audio_sample_t *src, unsigne


while (unrolled--) { while (unrolled--) {
float32x4_t samples = vld1q_f32(src); float32x4_t samples = vld1q_f32(src);
int32x4_t converted = float_24_neon(samples);
int32x4_t converted = float_32_neon(samples, scaling);


switch(dst_skip) { switch(dst_skip) {
case 4: case 4:
@@ -382,12 +386,18 @@ void sample_move_d32u24_sS (char *dst, jack_default_audio_sample_t *src, unsigne


#if !defined (__SSE2__) #if !defined (__SSE2__)
while (nsamples--) { while (nsamples--) {
float_24u32 (*src, *((int32_t*) dst));
float_32 (*src, *((int32_t*) dst), scaling);
dst += dst_skip; dst += dst_skip;
src++; src++;
} }
#endif #endif
}
}

void sample_move_d32u24_sS (char *dst, jack_default_audio_sample_t *src, unsigned long nsamples, unsigned long dst_skip, dither_state_t *state)
{
sample_move_d32scal_sS (dst, src, nsamples, dst_skip, state, SAMPLE_24BIT_SCALING);
}



void sample_move_dS_s32u24s (jack_default_audio_sample_t *dst, char *src, unsigned long nsamples, unsigned long src_skip) void sample_move_dS_s32u24s (jack_default_audio_sample_t *dst, char *src, unsigned long nsamples, unsigned long src_skip)
{ {
@@ -533,7 +543,7 @@ void sample_move_d24_sSs (char *dst, jack_default_audio_sample_t *src, unsigned
int i; int i;
int32_t z[4]; int32_t z[4];
float32x4_t samples = vld1q_f32(src); float32x4_t samples = vld1q_f32(src);
int32x4_t converted = float_24_neon(samples);
int32x4_t converted = float_32_neon(samples, SAMPLE_24BIT_SCALING);
converted = vreinterpretq_s32_u8(vrev32q_u8(vreinterpretq_u8_s32(converted))); converted = vreinterpretq_s32_u8(vrev32q_u8(vreinterpretq_u8_s32(converted)));
vst1q_s32(z, converted); vst1q_s32(z, converted);


@@ -604,7 +614,7 @@ void sample_move_d24_sS (char *dst, jack_default_audio_sample_t *src, unsigned l
int i; int i;
int32_t z[4]; int32_t z[4];
float32x4_t samples = vld1q_f32(src); float32x4_t samples = vld1q_f32(src);
int32x4_t converted = float_24_neon(samples);
int32x4_t converted = float_32_neon(samples, SAMPLE_24BIT_SCALING);
vst1q_s32(z, converted); vst1q_s32(z, converted);


for (i = 0; i != 4; ++i) { for (i = 0; i != 4; ++i) {


Loading…
Cancel
Save