Browse Source

memops: Use right-aligned float to S24LE conversion

ALSA expects right-aligned samples (0x00******) as mentioned in the
source code for SND_PCM_FORMAT_S24_LE:
Signed 24 bit Little Endian using low three bytes in 32-bit word
See http://git.alsa-project.org/?p=alsa-
lib.git;a=blob;f=include/pcm.h;h=5b0782315585de1d5ab82c9f2036b62c168f5a48;hb=HEAD#l140

Signed-off-by: Timo Wischer <twischer@de.adit-jv.com>
tags/v1.9.13
Timo Wischer Filipe Coelho <falktx@falktx.com> 7 years ago
parent
commit
831163e516
1 changed files with 24 additions and 27 deletions
  1. +24
    -27
      common/memops.c

+ 24
- 27
common/memops.c View File

@@ -130,11 +130,11 @@


#define float_24u32(s, d) \ #define float_24u32(s, d) \
if ((s) <= NORMALIZED_FLOAT_MIN) {\ if ((s) <= NORMALIZED_FLOAT_MIN) {\
(d) = SAMPLE_24BIT_MIN << 8;\
(d) = SAMPLE_24BIT_MIN;\
} else if ((s) >= NORMALIZED_FLOAT_MAX) {\ } else if ((s) >= NORMALIZED_FLOAT_MAX) {\
(d) = SAMPLE_24BIT_MAX << 8;\
(d) = SAMPLE_24BIT_MAX;\
} else {\ } else {\
(d) = f_round ((s) * SAMPLE_24BIT_SCALING) << 8;\
(d) = f_round ((s) * SAMPLE_24BIT_SCALING);\
} }


/* call this when "s" has already been scaled (e.g. when dithering) /* call this when "s" has already been scaled (e.g. when dithering)
@@ -265,7 +265,7 @@ void sample_move_dS_floatLE (char *dst, jack_default_audio_sample_t *src, unsign
S - sample is a jack_default_audio_sample_t, currently (October 2008) a 32 bit floating point value S - sample is a jack_default_audio_sample_t, currently (October 2008) a 32 bit floating point value
Ss - like S but reverse endian from the host CPU Ss - like S but reverse endian from the host CPU
32u24 - sample is an signed 32 bit integer value, but data is in upper 24 bits only
32u24 - sample is an signed 32 bit integer value, but data is in lower 24 bits only
32u24s - like 32u24 but reverse endian from the host CPU 32u24s - like 32u24 but reverse endian from the host CPU
24 - sample is an signed 24 bit integer value 24 - sample is an signed 24 bit integer value
24s - like 24 but reverse endian from the host CPU 24s - like 24 but reverse endian from the host CPU
@@ -288,18 +288,17 @@ void sample_move_d32u24_sSs (char *dst, jack_default_audio_sample_t *src, unsign
while (unrolled--) { while (unrolled--) {
float32x4_t samples = vld1q_f32(src); float32x4_t samples = vld1q_f32(src);
int32x4_t converted = float_24_neon(samples); int32x4_t converted = float_24_neon(samples);
int32x4_t shifted = vshlq_n_s32(converted, 8);
shifted = vreinterpretq_s32_u8(vrev32q_u8(vreinterpretq_u8_s32(shifted)));
converted = vreinterpretq_s32_u8(vrev32q_u8(vreinterpretq_u8_s32(converted)));


switch(dst_skip) { switch(dst_skip) {
case 4: case 4:
vst1q_s32((int32_t*)dst, shifted);
vst1q_s32((int32_t*)dst, converted);
break; break;
default: default:
vst1q_lane_s32((int32_t*)(dst), shifted, 0);
vst1q_lane_s32((int32_t*)(dst+dst_skip), shifted, 1);
vst1q_lane_s32((int32_t*)(dst+2*dst_skip), shifted, 2);
vst1q_lane_s32((int32_t*)(dst+3*dst_skip), shifted, 3);
vst1q_lane_s32((int32_t*)(dst), converted, 0);
vst1q_lane_s32((int32_t*)(dst+dst_skip), converted, 1);
vst1q_lane_s32((int32_t*)(dst+2*dst_skip), converted, 2);
vst1q_lane_s32((int32_t*)(dst+3*dst_skip), converted, 3);
break; break;
} }
dst += 4*dst_skip; dst += 4*dst_skip;
@@ -345,19 +344,18 @@ void sample_move_d32u24_sS (char *dst, jack_default_audio_sample_t *src, unsigne
__m128 clipped = clip(scaled, int_min, int_max); __m128 clipped = clip(scaled, int_min, int_max);


__m128i y = _mm_cvttps_epi32(clipped); __m128i y = _mm_cvttps_epi32(clipped);
__m128i shifted = _mm_slli_epi32(y, 8);


#ifdef __SSE4_1__ #ifdef __SSE4_1__
*(int32_t*)dst = _mm_extract_epi32(shifted, 0);
*(int32_t*)(dst+dst_skip) = _mm_extract_epi32(shifted, 1);
*(int32_t*)(dst+2*dst_skip) = _mm_extract_epi32(shifted, 2);
*(int32_t*)(dst+3*dst_skip) = _mm_extract_epi32(shifted, 3);
*(int32_t*)dst = _mm_extract_epi32(y, 0);
*(int32_t*)(dst+dst_skip) = _mm_extract_epi32(y, 1);
*(int32_t*)(dst+2*dst_skip) = _mm_extract_epi32(y, 2);
*(int32_t*)(dst+3*dst_skip) = _mm_extract_epi32(y, 3);
#else #else
__m128i shuffled1 = _mm_shuffle_epi32(shifted, _MM_SHUFFLE(0, 3, 2, 1));
__m128i shuffled2 = _mm_shuffle_epi32(shifted, _MM_SHUFFLE(1, 0, 3, 2));
__m128i shuffled3 = _mm_shuffle_epi32(shifted, _MM_SHUFFLE(2, 1, 0, 3));
__m128i shuffled1 = _mm_shuffle_epi32(y, _MM_SHUFFLE(0, 3, 2, 1));
__m128i shuffled2 = _mm_shuffle_epi32(y, _MM_SHUFFLE(1, 0, 3, 2));
__m128i shuffled3 = _mm_shuffle_epi32(y, _MM_SHUFFLE(2, 1, 0, 3));


_mm_store_ss((float*)dst, (__m128)shifted);
_mm_store_ss((float*)dst, (__m128)y);


_mm_store_ss((float*)(dst+dst_skip), (__m128)shuffled1); _mm_store_ss((float*)(dst+dst_skip), (__m128)shuffled1);
_mm_store_ss((float*)(dst+2*dst_skip), (__m128)shuffled2); _mm_store_ss((float*)(dst+2*dst_skip), (__m128)shuffled2);
@@ -374,7 +372,7 @@ void sample_move_d32u24_sS (char *dst, jack_default_audio_sample_t *src, unsigne
__m128 clipped = _mm_min_ss(int_max, _mm_max_ss(scaled, int_min)); __m128 clipped = _mm_min_ss(int_max, _mm_max_ss(scaled, int_min));


int y = _mm_cvttss_si32(clipped); int y = _mm_cvttss_si32(clipped);
*((int *) dst) = y<<8;
*((int *) dst) = y;


dst += dst_skip; dst += dst_skip;
src++; src++;
@@ -387,17 +385,16 @@ void sample_move_d32u24_sS (char *dst, jack_default_audio_sample_t *src, unsigne
while (unrolled--) { while (unrolled--) {
float32x4_t samples = vld1q_f32(src); float32x4_t samples = vld1q_f32(src);
int32x4_t converted = float_24_neon(samples); int32x4_t converted = float_24_neon(samples);
int32x4_t shifted = vshlq_n_s32(converted, 8);


switch(dst_skip) { switch(dst_skip) {
case 4: case 4:
vst1q_s32((int32_t*)dst, shifted);
vst1q_s32((int32_t*)dst, converted);
break; break;
default: default:
vst1q_lane_s32((int32_t*)(dst), shifted, 0);
vst1q_lane_s32((int32_t*)(dst+dst_skip), shifted, 1);
vst1q_lane_s32((int32_t*)(dst+2*dst_skip), shifted, 2);
vst1q_lane_s32((int32_t*)(dst+3*dst_skip), shifted, 3);
vst1q_lane_s32((int32_t*)(dst), converted, 0);
vst1q_lane_s32((int32_t*)(dst+dst_skip), converted, 1);
vst1q_lane_s32((int32_t*)(dst+2*dst_skip), converted, 2);
vst1q_lane_s32((int32_t*)(dst+3*dst_skip), converted, 3);
break; break;
} }
dst += 4*dst_skip; dst += 4*dst_skip;


Loading…
Cancel
Save