Browse Source

Merge pull request #250 from schnitzeltony/NEON

ARM Neon support for non-dithering sample conversion functions
tags/v1.9.11-RC1
Filipe Coelho GitHub 8 years ago
parent
commit
4cf826c82c
3 changed files with 741 additions and 3 deletions
  1. +349
    -2
      common/memops.c
  2. +390
    -0
      example-clients/simdtests.cpp
  3. +2
    -1
      example-clients/wscript

+ 349
- 2
common/memops.c View File

@@ -42,6 +42,10 @@
#endif #endif
#endif #endif


#ifdef __ARM_NEON__
#include <arm_neon.h>
#endif

/* Notes about these *_SCALING values. /* Notes about these *_SCALING values.


the MAX_<N>BIT values are floating point. when multiplied by the MAX_<N>BIT values are floating point. when multiplied by
@@ -193,6 +197,35 @@ static inline __m128i float_24_sse(__m128 s)
} }
#endif #endif



#ifdef __ARM_NEON__

static inline float32x4_t clip(float32x4_t s, float32x4_t min, float32x4_t max)
{
return vminq_f32(max, vmaxq_f32(s, min));
}

static inline int32x4_t float_24_neon(float32x4_t s)
{
const float32x4_t upper_bound = vdupq_n_f32(NORMALIZED_FLOAT_MAX);
const float32x4_t lower_bound = vdupq_n_f32(NORMALIZED_FLOAT_MIN);

float32x4_t clipped = clip(s, lower_bound, upper_bound);
float32x4_t scaled = vmulq_f32(clipped, vdupq_n_f32(SAMPLE_24BIT_SCALING));
return vcvtq_s32_f32(scaled);
}

static inline int16x4_t float_16_neon(float32x4_t s)
{
const float32x4_t upper_bound = vdupq_n_f32(NORMALIZED_FLOAT_MAX);
const float32x4_t lower_bound = vdupq_n_f32(NORMALIZED_FLOAT_MIN);

float32x4_t clipped = clip(s, lower_bound, upper_bound);
float32x4_t scaled = vmulq_f32(clipped, vdupq_n_f32(SAMPLE_16BIT_SCALING));
return vmovn_s32(vcvtq_s32_f32(scaled));
}
#endif

/* Linear Congruential noise generator. From the music-dsp list /* Linear Congruential noise generator. From the music-dsp list
* less random than rand(), but good enough and 10x faster * less random than rand(), but good enough and 10x faster
*/ */
@@ -248,6 +281,32 @@ void sample_move_dS_floatLE (char *dst, jack_default_audio_sample_t *src, unsign


void sample_move_d32u24_sSs (char *dst, jack_default_audio_sample_t *src, unsigned long nsamples, unsigned long dst_skip, dither_state_t *state) void sample_move_d32u24_sSs (char *dst, jack_default_audio_sample_t *src, unsigned long nsamples, unsigned long dst_skip, dither_state_t *state)
{ {
#ifdef __ARM_NEON__
unsigned long unrolled = nsamples / 4;
nsamples = nsamples & 3;

while (unrolled--) {
float32x4_t samples = vld1q_f32(src);
int32x4_t converted = float_24_neon(samples);
int32x4_t shifted = vshlq_n_s32(converted, 8);
shifted = vreinterpretq_s32_u8(vrev32q_u8(vreinterpretq_u8_s32(shifted)));

switch(dst_skip) {
case 4:
vst1q_s32((int32_t*)dst, shifted);
break;
default:
vst1q_lane_s32((int32_t*)(dst), shifted, 0);
vst1q_lane_s32((int32_t*)(dst+dst_skip), shifted, 1);
vst1q_lane_s32((int32_t*)(dst+2*dst_skip), shifted, 2);
vst1q_lane_s32((int32_t*)(dst+3*dst_skip), shifted, 3);
break;
}
dst += 4*dst_skip;
src+= 4;
}
#endif

int32_t z; int32_t z;


while (nsamples--) { while (nsamples--) {
@@ -321,7 +380,33 @@ void sample_move_d32u24_sS (char *dst, jack_default_audio_sample_t *src, unsigne
src++; src++;
} }


#else
#elif defined(__ARM_NEON__)
unsigned long unrolled = nsamples / 4;
nsamples = nsamples & 3;

while (unrolled--) {
float32x4_t samples = vld1q_f32(src);
int32x4_t converted = float_24_neon(samples);
int32x4_t shifted = vshlq_n_s32(converted, 8);

switch(dst_skip) {
case 4:
vst1q_s32((int32_t*)dst, shifted);
break;
default:
vst1q_lane_s32((int32_t*)(dst), shifted, 0);
vst1q_lane_s32((int32_t*)(dst+dst_skip), shifted, 1);
vst1q_lane_s32((int32_t*)(dst+2*dst_skip), shifted, 2);
vst1q_lane_s32((int32_t*)(dst+3*dst_skip), shifted, 3);
break;
}
dst += 4*dst_skip;

src+= 4;
}
#endif

#if !defined (__SSE2__)
while (nsamples--) { while (nsamples--) {
float_24u32 (*src, *((int32_t*) dst)); float_24u32 (*src, *((int32_t*) dst));
dst += dst_skip; dst += dst_skip;
@@ -332,6 +417,38 @@ void sample_move_d32u24_sS (char *dst, jack_default_audio_sample_t *src, unsigne


void sample_move_dS_s32u24s (jack_default_audio_sample_t *dst, char *src, unsigned long nsamples, unsigned long src_skip) void sample_move_dS_s32u24s (jack_default_audio_sample_t *dst, char *src, unsigned long nsamples, unsigned long src_skip)
{ {
#ifdef __ARM_NEON__
float32x4_t factor = vdupq_n_f32(1.0 / SAMPLE_24BIT_SCALING);
unsigned long unrolled = nsamples / 4;
while (unrolled--) {
int32x4_t src128;
switch(src_skip)
{
case 4:
src128 = vld1q_s32((int32_t*)src);
break;
case 8:
src128 = vld2q_s32((int32_t*)src).val[0];
break;
default:
src128 = vld1q_lane_s32((int32_t*)src, src128, 0);
src128 = vld1q_lane_s32((int32_t*)(src+src_skip), src128, 1);
src128 = vld1q_lane_s32((int32_t*)(src+2*src_skip), src128, 2);
src128 = vld1q_lane_s32((int32_t*)(src+3*src_skip), src128, 3);
break;
}
src128 = vreinterpretq_s32_u8(vrev32q_u8(vreinterpretq_u8_s32(src128)));
int32x4_t shifted = vshrq_n_s32(src128, 8);
float32x4_t as_float = vcvtq_f32_s32(shifted);
float32x4_t divided = vmulq_f32(as_float, factor);
vst1q_f32(dst, divided);

src += 4*src_skip;
dst += 4;
}
nsamples = nsamples & 3;
#endif

/* ALERT: signed sign-extension portability !!! */ /* ALERT: signed sign-extension portability !!! */


const jack_default_audio_sample_t scaling = 1.0/SAMPLE_24BIT_SCALING; const jack_default_audio_sample_t scaling = 1.0/SAMPLE_24BIT_SCALING;
@@ -389,6 +506,34 @@ void sample_move_dS_s32u24 (jack_default_audio_sample_t *dst, char *src, unsigne
dst += 4; dst += 4;
} }
nsamples = nsamples & 3; nsamples = nsamples & 3;
#elif defined(__ARM_NEON__)
unsigned long unrolled = nsamples / 4;
float32x4_t factor = vdupq_n_f32(1.0 / SAMPLE_24BIT_SCALING);
while (unrolled--) {
int32x4_t src128;
switch(src_skip) {
case 4:
src128 = vld1q_s32((int32_t*)src);
break;
case 8:
src128 = vld2q_s32((int32_t*)src).val[0];
break;
default:
src128 = vld1q_lane_s32((int32_t*)src, src128, 0);
src128 = vld1q_lane_s32((int32_t*)(src+src_skip), src128, 1);
src128 = vld1q_lane_s32((int32_t*)(src+2*src_skip), src128, 2);
src128 = vld1q_lane_s32((int32_t*)(src+3*src_skip), src128, 3);
break;
}
int32x4_t shifted = vshrq_n_s32(src128, 8);
float32x4_t as_float = vcvtq_f32_s32(shifted);
float32x4_t divided = vmulq_f32(as_float, factor);
vst1q_f32(dst, divided);

src += 4*src_skip;
dst += 4;
}
nsamples = nsamples & 3;
#endif #endif


/* ALERT: signed sign-extension portability !!! */ /* ALERT: signed sign-extension portability !!! */
@@ -403,6 +548,25 @@ void sample_move_dS_s32u24 (jack_default_audio_sample_t *dst, char *src, unsigne


void sample_move_d24_sSs (char *dst, jack_default_audio_sample_t *src, unsigned long nsamples, unsigned long dst_skip, dither_state_t *state) void sample_move_d24_sSs (char *dst, jack_default_audio_sample_t *src, unsigned long nsamples, unsigned long dst_skip, dither_state_t *state)
{ {
#ifdef __ARM_NEON__
unsigned long unrolled = nsamples / 4;
while (unrolled--) {
int i;
int32_t z[4];
float32x4_t samples = vld1q_f32(src);
int32x4_t converted = float_24_neon(samples);
converted = vreinterpretq_s32_u8(vrev32q_u8(vreinterpretq_u8_s32(converted)));
vst1q_s32(z, converted);

for (i = 0; i != 4; ++i) {
memcpy (dst, ((char*)(z+i))+1, 3);
dst += dst_skip;
}
src += 4;
}
nsamples = nsamples & 3;
#endif

int32_t z; int32_t z;


while (nsamples--) { while (nsamples--) {
@@ -455,6 +619,22 @@ void sample_move_d24_sS (char *dst, jack_default_audio_sample_t *src, unsigned l
nsamples -= 4; nsamples -= 4;
src += 4; src += 4;
} }
#elif defined(__ARM_NEON__)
unsigned long unrolled = nsamples / 4;
while (unrolled--) {
int i;
int32_t z[4];
float32x4_t samples = vld1q_f32(src);
int32x4_t converted = float_24_neon(samples);
vst1q_s32(z, converted);

for (i = 0; i != 4; ++i) {
memcpy (dst, z+i, 3);
dst += dst_skip;
}
src += 4;
}
nsamples = nsamples & 3;
#endif #endif


int32_t z; int32_t z;
@@ -473,9 +653,41 @@ void sample_move_d24_sS (char *dst, jack_default_audio_sample_t *src, unsigned l


void sample_move_dS_s24s (jack_default_audio_sample_t *dst, char *src, unsigned long nsamples, unsigned long src_skip) void sample_move_dS_s24s (jack_default_audio_sample_t *dst, char *src, unsigned long nsamples, unsigned long src_skip)
{ {
const jack_default_audio_sample_t scaling = 1.0/SAMPLE_24BIT_SCALING;

#ifdef __ARM_NEON__
// we shift 8 to the right by dividing by 256.0 -> no sign extra handling
const float32x4_t vscaling = vdupq_n_f32(scaling/256.0);
int32_t x[4];
memset(x, 0, sizeof(x));
unsigned long unrolled = nsamples / 4;
while (unrolled--) {
#if __BYTE_ORDER == __BIG_ENDIAN /* ARM big endian?? */
// right aligned / inverse sequence below -> *256
memcpy(((char*)&x[0])+1, src, 3);
memcpy(((char*)&x[1])+1, src+src_skip, 3);
memcpy(((char*)&x[2])+1, src+2*src_skip, 3);
memcpy(((char*)&x[3])+1, src+3*src_skip, 3);
#else
memcpy(&x[0], src, 3);
memcpy(&x[1], src+src_skip, 3);
memcpy(&x[2], src+2*src_skip, 3);
memcpy(&x[3], src+3*src_skip, 3);
#endif
src += 4 * src_skip;

int32x4_t source = vld1q_s32(x);
source = vreinterpretq_s32_u8(vrev32q_u8(vreinterpretq_u8_s32(source)));
float32x4_t converted = vcvtq_f32_s32(source);
float32x4_t scaled = vmulq_f32(converted, vscaling);
vst1q_f32(dst, scaled);
dst += 4;
}
nsamples = nsamples & 3;
#endif

/* ALERT: signed sign-extension portability !!! */ /* ALERT: signed sign-extension portability !!! */


const jack_default_audio_sample_t scaling = 1.0/SAMPLE_24BIT_SCALING;
while (nsamples--) { while (nsamples--) {
int x; int x;
#if __BYTE_ORDER == __LITTLE_ENDIAN #if __BYTE_ORDER == __LITTLE_ENDIAN
@@ -528,6 +740,34 @@ void sample_move_dS_s24 (jack_default_audio_sample_t *dst, char *src, unsigned l
dst += 4; dst += 4;
nsamples -= 4; nsamples -= 4;
} }
#elif defined(__ARM_NEON__)
// we shift 8 to the right by dividing by 256.0 -> no sign extra handling
const float32x4_t vscaling = vdupq_n_f32(scaling/256.0);
int32_t x[4];
memset(x, 0, sizeof(x));
unsigned long unrolled = nsamples / 4;
while (unrolled--) {
#if __BYTE_ORDER == __BIG_ENDIAN /* ARM big endian?? */
// left aligned -> *256
memcpy(&x[0], src, 3);
memcpy(&x[1], src+src_skip, 3);
memcpy(&x[2], src+2*src_skip, 3);
memcpy(&x[3], src+3*src_skip, 3);
#else
memcpy(((char*)&x[0])+1, src, 3);
memcpy(((char*)&x[1])+1, src+src_skip, 3);
memcpy(((char*)&x[2])+1, src+2*src_skip, 3);
memcpy(((char*)&x[3])+1, src+3*src_skip, 3);
#endif
src += 4 * src_skip;

int32x4_t source = vld1q_s32(x);
float32x4_t converted = vcvtq_f32_s32(source);
float32x4_t scaled = vmulq_f32(converted, vscaling);
vst1q_f32(dst, scaled);
dst += 4;
}
nsamples = nsamples & 3;
#endif #endif


while (nsamples--) { while (nsamples--) {
@@ -547,6 +787,30 @@ void sample_move_dS_s24 (jack_default_audio_sample_t *dst, char *src, unsigned l


void sample_move_d16_sSs (char *dst, jack_default_audio_sample_t *src, unsigned long nsamples, unsigned long dst_skip, dither_state_t *state) void sample_move_d16_sSs (char *dst, jack_default_audio_sample_t *src, unsigned long nsamples, unsigned long dst_skip, dither_state_t *state)
{ {
#ifdef __ARM_NEON__
unsigned long unrolled = nsamples / 4;
nsamples = nsamples & 3;

while (unrolled--) {
float32x4_t samples = vld1q_f32(src);
int16x4_t converted = float_16_neon(samples);
converted = vreinterpret_s16_u8(vrev16_u8(vreinterpret_u8_s16(converted)));

switch(dst_skip) {
case 2:
vst1_s16((int16_t*)dst, converted);
break;
default:
vst1_lane_s16((int16_t*)(dst), converted, 0);
vst1_lane_s16((int16_t*)(dst+dst_skip), converted, 1);
vst1_lane_s16((int16_t*)(dst+2*dst_skip), converted, 2);
vst1_lane_s16((int16_t*)(dst+3*dst_skip), converted, 3);
break;
}
dst += 4*dst_skip;
src+= 4;
}
#endif
int16_t tmp; int16_t tmp;


while (nsamples--) { while (nsamples--) {
@@ -574,6 +838,29 @@ void sample_move_d16_sSs (char *dst, jack_default_audio_sample_t *src, unsigned


void sample_move_d16_sS (char *dst, jack_default_audio_sample_t *src, unsigned long nsamples, unsigned long dst_skip, dither_state_t *state) void sample_move_d16_sS (char *dst, jack_default_audio_sample_t *src, unsigned long nsamples, unsigned long dst_skip, dither_state_t *state)
{ {
#ifdef __ARM_NEON__
unsigned long unrolled = nsamples / 4;
nsamples = nsamples & 3;

while (unrolled--) {
float32x4_t samples = vld1q_f32(src);
int16x4_t converted = float_16_neon(samples);

switch(dst_skip) {
case 2:
vst1_s16((int16_t*)dst, converted);
break;
default:
vst1_lane_s16((int16_t*)(dst), converted, 0);
vst1_lane_s16((int16_t*)(dst+dst_skip), converted, 1);
vst1_lane_s16((int16_t*)(dst+2*dst_skip), converted, 2);
vst1_lane_s16((int16_t*)(dst+3*dst_skip), converted, 3);
break;
}
dst += 4*dst_skip;
src+= 4;
}
#endif
while (nsamples--) { while (nsamples--) {
float_16 (*src, *((int16_t*) dst)); float_16 (*src, *((int16_t*) dst));
dst += dst_skip; dst += dst_skip;
@@ -730,6 +1017,36 @@ void sample_move_dS_s16s (jack_default_audio_sample_t *dst, char *src, unsigned
{ {
short z; short z;
const jack_default_audio_sample_t scaling = 1.0/SAMPLE_16BIT_SCALING; const jack_default_audio_sample_t scaling = 1.0/SAMPLE_16BIT_SCALING;
#ifdef __ARM_NEON__
const float32x4_t vscaling = vdupq_n_f32(scaling);
unsigned long unrolled = nsamples / 4;
while (unrolled--) {
int16x4_t source16x4;
switch(src_skip) {
case 2:
source16x4 = vld1_s16((int16_t*)src);
break;
case 4:
source16x4 = vld2_s16((int16_t*)src).val[0];
break;
default:
source16x4 = vld1_lane_s16((int16_t*)src, source16x4, 0);
source16x4 = vld1_lane_s16((int16_t*)(src+src_skip), source16x4, 1);
source16x4 = vld1_lane_s16((int16_t*)(src+2*src_skip), source16x4, 2);
source16x4 = vld1_lane_s16((int16_t*)(src+3*src_skip), source16x4, 3);
break;
}
source16x4 = vreinterpret_s16_u8(vrev16_u8(vreinterpret_u8_s16(source16x4)));
int32x4_t source32x4 = vmovl_s16(source16x4);
src += 4 * src_skip;

float32x4_t converted = vcvtq_f32_s32(source32x4);
float32x4_t scaled = vmulq_f32(converted, vscaling);
vst1q_f32(dst, scaled);
dst += 4;
}
nsamples = nsamples & 3;
#endif


/* ALERT: signed sign-extension portability !!! */ /* ALERT: signed sign-extension portability !!! */
while (nsamples--) { while (nsamples--) {
@@ -752,6 +1069,36 @@ void sample_move_dS_s16 (jack_default_audio_sample_t *dst, char *src, unsigned l
{ {
/* ALERT: signed sign-extension portability !!! */ /* ALERT: signed sign-extension portability !!! */
const jack_default_audio_sample_t scaling = 1.0/SAMPLE_16BIT_SCALING; const jack_default_audio_sample_t scaling = 1.0/SAMPLE_16BIT_SCALING;
#ifdef __ARM_NEON__
const float32x4_t vscaling = vdupq_n_f32(scaling);
unsigned long unrolled = nsamples / 4;
while (unrolled--) {
int16x4_t source16x4;
switch(src_skip) {
case 2:
source16x4 = vld1_s16((int16_t*)src);
break;
case 4:
source16x4 = vld2_s16((int16_t*)src).val[0];
break;
default:
source16x4 = vld1_lane_s16((int16_t*)src, source16x4, 0);
source16x4 = vld1_lane_s16((int16_t*)(src+src_skip), source16x4, 1);
source16x4 = vld1_lane_s16((int16_t*)(src+2*src_skip), source16x4, 2);
source16x4 = vld1_lane_s16((int16_t*)(src+3*src_skip), source16x4, 3);
break;
}
int32x4_t source32x4 = vmovl_s16(source16x4);
src += 4 * src_skip;

float32x4_t converted = vcvtq_f32_s32(source32x4);
float32x4_t scaled = vmulq_f32(converted, vscaling);
vst1q_f32(dst, scaled);
dst += 4;
}
nsamples = nsamples & 3;
#endif

while (nsamples--) { while (nsamples--) {
*dst = (*((short *) src)) * scaling; *dst = (*((short *) src)) * scaling;
dst++; dst++;


+ 390
- 0
example-clients/simdtests.cpp View File

@@ -0,0 +1,390 @@
/*
* simdtests.c -- test accuraccy and performance of simd optimizations
*
* Copyright (C) 2017 Andreas Mueller.
*
* This program is free software; you can redistribute it and/or modify
* it under the terms of the GNU General Public License as published by
* the Free Software Foundation; either version 2 of the License, or
* (at your option) any later version.
*
* This program is distributed in the hope that it will be useful,
* but WITHOUT ANY WARRANTY; without even the implied warranty of
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
* GNU General Public License for more details.
*
* You should have received a copy of the GNU General Public License
* along with this program; if not, write to the Free Software
* Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA.
*/

/* We must include all headers memops.c includes to avoid trouble with
* out namespace game below.
*/
#include <stdio.h>
#include <string.h>
#include <math.h>
#include <memory.h>
#include <stdlib.h>
#include <stdint.h>
#include <limits.h>
#ifdef __linux__
#include <endian.h>
#endif
#include "memops.h"

#if defined (__SSE2__) && !defined (__sun__)
#include <emmintrin.h>
#ifdef __SSE4_1__
#include <smmintrin.h>
#endif
#endif

#ifdef __ARM_NEON__
#include <arm_neon.h>
#endif

// our additional headers
#include <time.h>

/* Dirty: include mempos.c twice the second time with SIMD disabled
* so we can compare aceelerated non accelerated
*/
namespace accelerated {
#include "../common/memops.c"
}

namespace origerated {
#ifdef __SSE2__
#undef __SSE2__
#endif

#ifdef __ARM_NEON__
#undef __ARM_NEON__
#endif

#include "../common/memops.c"
}

// define conversion function types
typedef void (*t_jack_to_integer)(
char *dst,
jack_default_audio_sample_t *src,
unsigned long nsamples,
unsigned long dst_skip,
dither_state_t *state);

typedef void (*t_integer_to_jack)(
jack_default_audio_sample_t *dst,
char *src,
unsigned long nsamples,
unsigned long src_skip);

// define/setup test case data
typedef struct test_case_data {
uint32_t frame_size;
uint32_t sample_size;
bool reverse;
t_jack_to_integer jack_to_integer_accel;
t_jack_to_integer jack_to_integer_orig;
t_integer_to_jack integer_to_jack_accel;
t_integer_to_jack integer_to_jack_orig;
dither_state_t *ditherstate;
const char *name;
} test_case_data_t;

test_case_data_t test_cases[] = {
{
4,
3,
true,
accelerated::sample_move_d32u24_sSs,
origerated::sample_move_d32u24_sSs,
accelerated::sample_move_dS_s32u24s,
origerated::sample_move_dS_s32u24s,
NULL,
"32u24s" },
{
4,
3,
false,
accelerated::sample_move_d32u24_sS,
origerated::sample_move_d32u24_sS,
accelerated::sample_move_dS_s32u24,
origerated::sample_move_dS_s32u24,
NULL,
"32u24" },
{
3,
3,
true,
accelerated::sample_move_d24_sSs,
origerated::sample_move_d24_sSs,
accelerated::sample_move_dS_s24s,
origerated::sample_move_dS_s24s,
NULL,
"24s" },
{
3,
3,
false,
accelerated::sample_move_d24_sS,
origerated::sample_move_d24_sS,
accelerated::sample_move_dS_s24,
origerated::sample_move_dS_s24,
NULL,
"24" },
{
2,
2,
true,
accelerated::sample_move_d16_sSs,
origerated::sample_move_d16_sSs,
accelerated::sample_move_dS_s16s,
origerated::sample_move_dS_s16s,
NULL,
"16s" },
{
2,
2,
false,
accelerated::sample_move_d16_sS,
origerated::sample_move_d16_sS,
accelerated::sample_move_dS_s16,
origerated::sample_move_dS_s16,
NULL,
"16" },
};

// we need to repeat for better accuracy at time measurement
const uint32_t retry_per_case = 1000;

// setup test buffers
#define TESTBUFF_SIZE 1024
jack_default_audio_sample_t jackbuffer_source[TESTBUFF_SIZE];
// integer buffers: max 4 bytes per value / * 2 for stereo
char integerbuffer_accel[TESTBUFF_SIZE*4*2];
char integerbuffer_orig[TESTBUFF_SIZE*4*2];
// float buffers
jack_default_audio_sample_t jackfloatbuffer_accel[TESTBUFF_SIZE];
jack_default_audio_sample_t jackfloatbuffer_orig[TESTBUFF_SIZE];

// comparing unsigned makes life easier
uint32_t extract_integer(
char* buff,
uint32_t offset,
uint32_t frame_size,
uint32_t sample_size,
bool big_endian)
{
uint32_t retval = 0;
unsigned char* curr;
uint32_t mult = 1;
if(big_endian) {
curr = (unsigned char*)buff + offset + sample_size-1;
for(uint32_t i=0; i<sample_size; i++) {
retval += *(curr--) * mult;
mult*=256;
}
}
else {
curr = (unsigned char*)buff + offset + frame_size-sample_size;
for(uint32_t i=0; i<sample_size; i++) {
retval += *(curr++) * mult;
mult*=256;
}
}
return retval;
}

int main(int argc, char *argv[])
{
// parse_arguments(argc, argv);
uint32_t maxerr_displayed = 10;

// fill jackbuffer
for(int i=0; i<TESTBUFF_SIZE; i++) {
// ramp
jack_default_audio_sample_t value =
((jack_default_audio_sample_t)((i % TESTBUFF_SIZE) - TESTBUFF_SIZE/2)) / (TESTBUFF_SIZE/2);
// force clipping
value *= 1.02;
jackbuffer_source[i] = value;
}

for(uint32_t testcase=0; testcase<sizeof(test_cases)/sizeof(test_case_data_t); testcase++) {
// test mono/stereo
for(uint32_t channels=1; channels<=2; channels++) {
//////////////////////////////////////////////////////////////////////////////
// jackfloat -> integer

// clean target buffers
memset(integerbuffer_accel, 0, sizeof(integerbuffer_accel));
memset(integerbuffer_orig, 0, sizeof(integerbuffer_orig));
// accel
clock_t time_to_integer_accel = clock();
for(uint32_t repetition=0; repetition<retry_per_case; repetition++)
{
test_cases[testcase].jack_to_integer_accel(
integerbuffer_accel,
jackbuffer_source,
TESTBUFF_SIZE,
test_cases[testcase].frame_size*channels,
test_cases[testcase].ditherstate);
}
float timediff_to_integer_accel = ((float)(clock() - time_to_integer_accel)) / CLOCKS_PER_SEC;
// orig
clock_t time_to_integer_orig = clock();
for(uint32_t repetition=0; repetition<retry_per_case; repetition++)
{
test_cases[testcase].jack_to_integer_orig(
integerbuffer_orig,
jackbuffer_source,
TESTBUFF_SIZE,
test_cases[testcase].frame_size*channels,
test_cases[testcase].ditherstate);
}
float timediff_to_integer_orig = ((float)(clock() - time_to_integer_orig)) / CLOCKS_PER_SEC;
// output performance results
printf(
"JackFloat->Integer @%7.7s/%u: Orig %7.6f sec / Accel %7.6f sec -> Win: %5.2f %%\n",
test_cases[testcase].name,
channels,
timediff_to_integer_orig,
timediff_to_integer_accel,
(timediff_to_integer_orig/timediff_to_integer_accel-1)*100.0);
uint32_t int_deviation_max = 0;
uint32_t int_error_count = 0;
// output error (avoid spam -> limit error lines per test case)
for(uint32_t sample=0; sample<TESTBUFF_SIZE; sample++) {
uint32_t sample_offset = sample*test_cases[testcase].frame_size*channels;
// compare both results
uint32_t intval_accel=extract_integer(
integerbuffer_accel,
sample_offset,
test_cases[testcase].frame_size,
test_cases[testcase].sample_size,
#if __BYTE_ORDER == __BIG_ENDIAN
!test_cases[testcase].reverse);
#else
test_cases[testcase].reverse);
#endif
uint32_t intval_orig=extract_integer(
integerbuffer_orig,
sample_offset,
test_cases[testcase].frame_size,
test_cases[testcase].sample_size,
#if __BYTE_ORDER == __BIG_ENDIAN
!test_cases[testcase].reverse);
#else
test_cases[testcase].reverse);
#endif
if(intval_accel != intval_orig) {
if(int_error_count<maxerr_displayed) {
printf("Value error sample %u:", sample);
printf(" Orig 0x");
char formatstr[10];
sprintf(formatstr, "%%0%uX", test_cases[testcase].sample_size*2);
printf(formatstr, intval_orig);
printf(" Accel 0x");
printf(formatstr, intval_accel);
printf("\n");
}
int_error_count++;
uint32_t int_deviation;
if(intval_accel > intval_orig)
int_deviation = intval_accel-intval_orig;
else
int_deviation = intval_orig-intval_accel;
if(int_deviation > int_deviation_max)
int_deviation_max = int_deviation;
}
}
printf(
"JackFloat->Integer @%7.7s/%u: Errors: %u Max deviation %u\n",
test_cases[testcase].name,
channels,
int_error_count,
int_deviation_max);

//////////////////////////////////////////////////////////////////////////////
// integer -> jackfloat

// clean target buffers
memset(jackfloatbuffer_accel, 0, sizeof(jackfloatbuffer_accel));
memset(jackfloatbuffer_orig, 0, sizeof(jackfloatbuffer_orig));
// accel
clock_t time_to_float_accel = clock();
for(uint32_t repetition=0; repetition<retry_per_case; repetition++)
{
test_cases[testcase].integer_to_jack_accel(
jackfloatbuffer_accel,
integerbuffer_orig,
TESTBUFF_SIZE,
test_cases[testcase].frame_size*channels);
}
float timediff_to_float_accel = ((float)(clock() - time_to_float_accel)) / CLOCKS_PER_SEC;
// orig
clock_t time_to_float_orig = clock();
for(uint32_t repetition=0; repetition<retry_per_case; repetition++)
{
test_cases[testcase].integer_to_jack_orig(
jackfloatbuffer_orig,
integerbuffer_orig,
TESTBUFF_SIZE,
test_cases[testcase].frame_size*channels);
}
float timediff_to_float_orig = ((float)(clock() - time_to_float_orig)) / CLOCKS_PER_SEC;
// output performance results
printf(
"Integer->JackFloat @%7.7s/%u: Orig %7.6f sec / Accel %7.6f sec -> Win: %5.2f %%\n",
test_cases[testcase].name,
channels,
timediff_to_float_orig,
timediff_to_float_accel,
(timediff_to_float_orig/timediff_to_float_accel-1)*100.0);
jack_default_audio_sample_t float_deviation_max = 0.0;
uint32_t float_error_count = 0;
// output error (avoid spam -> limit error lines per test case)
for(uint32_t sample=0; sample<TESTBUFF_SIZE; sample++) {
// For easier estimation/readabilty we scale floats back to integer
jack_default_audio_sample_t sample_scaling;
switch(test_cases[testcase].sample_size) {
case 2:
sample_scaling = SAMPLE_16BIT_SCALING;
break;
default:
sample_scaling = SAMPLE_24BIT_SCALING;
break;
}
jack_default_audio_sample_t floatval_accel = jackfloatbuffer_accel[sample] * sample_scaling;
jack_default_audio_sample_t floatval_orig = jackfloatbuffer_orig[sample] * sample_scaling;
// compare both results
jack_default_audio_sample_t float_deviation;
if(floatval_accel > floatval_orig)
float_deviation = floatval_accel-floatval_orig;
else
float_deviation = floatval_orig-floatval_accel;
if(float_deviation > float_deviation_max)
float_deviation_max = float_deviation;
// deviation > half bit => error
if(float_deviation > 0.5) {
if(float_error_count<maxerr_displayed) {
printf("Value error sample %u:", sample);
printf(" Orig %8.1f Accel %8.1f\n", floatval_orig, floatval_accel);
}
float_error_count++;
}
}
printf(
"Integer->JackFloat @%7.7s/%u: Errors: %u Max deviation %f\n",
test_cases[testcase].name,
channels,
float_error_count,
float_deviation_max);

printf("\n");
}
}
return 0;
}

+ 2
- 1
example-clients/wscript View File

@@ -28,7 +28,8 @@ example_programs = {
'jack_net_master' : 'netmaster.c', 'jack_net_master' : 'netmaster.c',
'jack_latent_client' : 'latent_client.c', 'jack_latent_client' : 'latent_client.c',
'jack_midi_dump' : 'midi_dump.c', 'jack_midi_dump' : 'midi_dump.c',
'jack_midi_latency_test' : 'midi_latency_test.c'
'jack_midi_latency_test' : 'midi_latency_test.c',
'jack_simdtests' : 'simdtests.cpp'
} }


example_libs = { example_libs = {


Loading…
Cancel
Save