| 
							- /*
 -  *  simdtests.c -- test accuracy and performance of simd optimizations
 -  *
 -  *  Copyright (C) 2017 Andreas Mueller.
 -  *
 -  *  This program is free software; you can redistribute it and/or modify
 -  *  it under the terms of the GNU General Public License as published by
 -  *  the Free Software Foundation; either version 2 of the License, or
 -  *  (at your option) any later version.
 -  *
 -  *  This program is distributed in the hope that it will be useful,
 -  *  but WITHOUT ANY WARRANTY; without even the implied warranty of
 -  *  MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
 -  *  GNU General Public License for more details.
 -  *
 -  *  You should have received a copy of the GNU General Public License
 -  *  along with this program; if not, write to the Free Software
 -  *  Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA.
 -  */
 - 
 - /* We must include all headers memops.c includes to avoid trouble with
 -  * out namespace game below.
 -  */
 - #include <stdio.h>
 - #include <string.h>
 - #include <math.h>
 - #include <memory.h>
 - #include <stdlib.h>
 - #include <stdint.h>
 - #include <limits.h>
 - #ifdef __linux__
 - #include <endian.h>
 - #endif
 - #include "memops.h"
 - 
 - #if defined (__SSE2__) && !defined (__sun__)
 - #include <emmintrin.h>
 - #ifdef __SSE4_1__
 - #include <smmintrin.h>
 - #endif
 - #endif
 - 
 - #if defined (__ARM_NEON__) || defined (__ARM_NEON)
 - #include <arm_neon.h>
 - #endif
 - 
 - // our additional headers
 - #include <time.h>
 - 
 - /* Dirty: include mempos.c twice the second time with SIMD disabled
 -  * so we can compare aceelerated non accelerated
 -  */
 - namespace accelerated {
 - #include "../common/memops.c"
 - }
 - 
 - namespace origerated {
 - #ifdef __SSE2__
 - #undef __SSE2__
 - #endif
 - 
 - #ifdef __ARM_NEON__
 - #undef __ARM_NEON__
 - #endif
 - 
 - #ifdef __ARM_NEON
 - #undef __ARM_NEON
 - #endif
 - 
 - #include "../common/memops.c"
 - }
 - 
 - // define conversion function types
 - typedef void (*t_jack_to_integer)(
 - 	char *dst,
 - 	jack_default_audio_sample_t *src,
 - 	unsigned long nsamples,
 - 	unsigned long dst_skip,
 - 	dither_state_t *state);
 - 
 - typedef void (*t_integer_to_jack)(
 - 	jack_default_audio_sample_t *dst,
 - 	char *src,
 - 	unsigned long nsamples,
 - 	unsigned long src_skip);
 - 
 - // define/setup test case data
 - typedef struct test_case_data {
 - 	uint32_t frame_size;
 - 	uint32_t sample_size;
 - 	bool reverse;
 - 	t_jack_to_integer jack_to_integer_accel;
 - 	t_jack_to_integer jack_to_integer_orig;
 - 	t_integer_to_jack integer_to_jack_accel;
 - 	t_integer_to_jack integer_to_jack_orig;
 - 	dither_state_t *ditherstate;
 - 	const char *name;
 - } test_case_data_t;
 - 
 - test_case_data_t test_cases[] = {
 - 	{
 - 		4,
 - 		3,
 - 		true,
 - 		accelerated::sample_move_d32u24_sSs,
 - 		origerated::sample_move_d32u24_sSs,
 - 		accelerated::sample_move_dS_s32u24s,
 - 		origerated::sample_move_dS_s32u24s,
 - 		NULL,
 - 		"32u24s" },
 - 	{
 - 		4,
 - 		3,
 - 		false,
 - 		accelerated::sample_move_d32u24_sS,
 - 		origerated::sample_move_d32u24_sS,
 - 		accelerated::sample_move_dS_s32u24,
 - 		origerated::sample_move_dS_s32u24,
 - 		NULL,
 - 		"32u24" },
 - 	{
 - 		4,
 - 		3,
 - 		true,
 - 		accelerated::sample_move_d32l24_sSs,
 - 		origerated::sample_move_d32l24_sSs,
 - 		accelerated::sample_move_dS_s32l24s,
 - 		origerated::sample_move_dS_s32l24s,
 - 		NULL,
 - 		"32l24s" },
 - 	{
 - 		4,
 - 		3,
 - 		false,
 - 		accelerated::sample_move_d32l24_sS,
 - 		origerated::sample_move_d32l24_sS,
 - 		accelerated::sample_move_dS_s32l24,
 - 		origerated::sample_move_dS_s32l24,
 - 		NULL,
 - 		"32l24" },
 - 	{
 - 		3,
 - 		3,
 - 		true,
 - 		accelerated::sample_move_d24_sSs,
 - 		origerated::sample_move_d24_sSs,
 - 		accelerated::sample_move_dS_s24s,
 - 		origerated::sample_move_dS_s24s,
 - 		NULL,
 - 		"24s" },
 - 	{
 - 		3,
 - 		3,
 - 		false,
 - 		accelerated::sample_move_d24_sS,
 - 		origerated::sample_move_d24_sS,
 - 		accelerated::sample_move_dS_s24,
 - 		origerated::sample_move_dS_s24,
 - 		NULL,
 - 		"24" },
 - 	{
 - 		2,
 - 		2,
 - 		true,
 - 		accelerated::sample_move_d16_sSs,
 - 		origerated::sample_move_d16_sSs,
 - 		accelerated::sample_move_dS_s16s,
 - 		origerated::sample_move_dS_s16s,
 - 		NULL,
 - 		"16s" },
 - 	{
 - 		2,
 - 		2,
 - 		false,
 - 		accelerated::sample_move_d16_sS,
 - 		origerated::sample_move_d16_sS,
 - 		accelerated::sample_move_dS_s16,
 - 		origerated::sample_move_dS_s16,
 - 		NULL,
 - 		"16" },
 - };
 - 
 - // we need to repeat for better accuracy at time measurement
 - const uint32_t retry_per_case = 1000;
 - 
 - // setup test buffers
 - #define TESTBUFF_SIZE 1024
 - jack_default_audio_sample_t jackbuffer_source[TESTBUFF_SIZE];
 - // integer buffers: max 4 bytes per value / * 2 for stereo
 - char integerbuffer_accel[TESTBUFF_SIZE*4*2];
 - char integerbuffer_orig[TESTBUFF_SIZE*4*2];
 - // float buffers
 - jack_default_audio_sample_t jackfloatbuffer_accel[TESTBUFF_SIZE];
 - jack_default_audio_sample_t jackfloatbuffer_orig[TESTBUFF_SIZE];
 - 
 - // comparing unsigned makes life easier
 - uint32_t extract_integer(
 - 	char* buff,
 - 	uint32_t offset,
 - 	uint32_t frame_size,
 - 	uint32_t sample_size,
 - 	bool big_endian)
 - {
 - 	uint32_t retval = 0;
 - 	unsigned char* curr;
 - 	uint32_t mult = 1;
 - 	if(big_endian) {
 - 		curr = (unsigned char*)buff + offset + sample_size-1;
 - 		for(uint32_t i=0; i<sample_size; i++) {
 - 			retval += *(curr--) * mult;
 - 			mult*=256;
 - 		}
 - 	}
 - 	else {
 - 		curr = (unsigned char*)buff + offset + frame_size-sample_size;
 - 		for(uint32_t i=0; i<sample_size; i++) {
 - 			retval += *(curr++) * mult;
 - 			mult*=256;
 - 		}
 - 	}
 - 	return retval;
 - }
 - 
 - int main(int argc, char *argv[])
 - {
 - //	parse_arguments(argc, argv);
 - 	uint32_t maxerr_displayed = 10;
 - 
 - 	// fill jackbuffer
 - 	for(int i=0; i<TESTBUFF_SIZE; i++) {
 - 		// ramp
 - 		jack_default_audio_sample_t value =
 - 			((jack_default_audio_sample_t)((i % TESTBUFF_SIZE) - TESTBUFF_SIZE/2)) / (TESTBUFF_SIZE/2);
 - 		// force clipping
 - 		value *= 1.02;
 - 		jackbuffer_source[i] = value;
 - 	}
 - 
 - 	for(uint32_t testcase=0; testcase<sizeof(test_cases)/sizeof(test_case_data_t); testcase++) {
 - 		// test mono/stereo
 - 		for(uint32_t channels=1; channels<=2; channels++) {
 - 			//////////////////////////////////////////////////////////////////////////////
 - 			// jackfloat -> integer
 - 
 - 			// clean target buffers
 - 			memset(integerbuffer_accel, 0, sizeof(integerbuffer_accel));
 - 			memset(integerbuffer_orig, 0, sizeof(integerbuffer_orig));
 - 			// accel
 - 			clock_t time_to_integer_accel = clock();
 - 			for(uint32_t repetition=0; repetition<retry_per_case; repetition++)
 - 			{
 - 				test_cases[testcase].jack_to_integer_accel(
 - 					integerbuffer_accel,
 - 					jackbuffer_source,
 - 					TESTBUFF_SIZE,
 - 					test_cases[testcase].frame_size*channels,
 - 					test_cases[testcase].ditherstate);
 - 			}
 - 			float timediff_to_integer_accel = ((float)(clock() - time_to_integer_accel)) / CLOCKS_PER_SEC;
 - 			// orig
 - 			clock_t time_to_integer_orig = clock();
 - 			for(uint32_t repetition=0; repetition<retry_per_case; repetition++)
 - 			{
 - 				test_cases[testcase].jack_to_integer_orig(
 - 					integerbuffer_orig,
 - 					jackbuffer_source,
 - 					TESTBUFF_SIZE,
 - 					test_cases[testcase].frame_size*channels,
 - 					test_cases[testcase].ditherstate);
 - 			}
 - 			float timediff_to_integer_orig = ((float)(clock() - time_to_integer_orig)) / CLOCKS_PER_SEC;
 - 			// output performance results
 - 			printf(
 - 				"JackFloat->Integer @%7.7s/%u: Orig %7.6f sec / Accel %7.6f sec -> Win: %5.2f %%\n",
 - 				test_cases[testcase].name,
 - 				channels,
 - 				timediff_to_integer_orig,
 - 				timediff_to_integer_accel,
 - 				(timediff_to_integer_orig/timediff_to_integer_accel-1)*100.0);
 - 			uint32_t int_deviation_max = 0;
 - 			uint32_t int_error_count = 0;
 - 			// output error (avoid spam -> limit error lines per test case)
 - 			for(uint32_t sample=0; sample<TESTBUFF_SIZE; sample++) {
 - 				uint32_t sample_offset = sample*test_cases[testcase].frame_size*channels;
 - 				// compare both results
 - 				uint32_t intval_accel=extract_integer(
 - 					integerbuffer_accel,
 - 					sample_offset,
 - 					test_cases[testcase].frame_size,
 - 					test_cases[testcase].sample_size,
 - #if __BYTE_ORDER == __BIG_ENDIAN
 - 					!test_cases[testcase].reverse);
 - #else
 - 					test_cases[testcase].reverse);
 - #endif
 - 				uint32_t intval_orig=extract_integer(
 - 					integerbuffer_orig,
 - 					sample_offset,
 - 					test_cases[testcase].frame_size,
 - 					test_cases[testcase].sample_size,
 - #if __BYTE_ORDER == __BIG_ENDIAN
 - 					!test_cases[testcase].reverse);
 - #else
 - 					test_cases[testcase].reverse);
 - #endif
 - 				// allow a deviation of 1
 - 				if(intval_accel>intval_orig+1 || intval_orig>intval_accel+1) {
 - 					if(int_error_count<maxerr_displayed) {
 - 						printf("Value error sample %u:", sample);
 - 						printf(" Orig 0x");
 - 						char formatstr[10];
 - 						sprintf(formatstr, "%%0%uX", test_cases[testcase].sample_size*2);
 - 						printf(formatstr, intval_orig);
 - 						printf(" Accel 0x");
 - 						printf(formatstr, intval_accel);
 - 						printf("\n");
 - 					}
 - 					int_error_count++;
 - 					uint32_t int_deviation;
 - 					if(intval_accel > intval_orig)
 - 						int_deviation = intval_accel-intval_orig;
 - 					else
 - 						int_deviation = intval_orig-intval_accel;
 - 					if(int_deviation > int_deviation_max)
 - 						int_deviation_max = int_deviation;
 - 				}
 - 			}
 - 			printf(
 - 				"JackFloat->Integer @%7.7s/%u: Errors: %u Max deviation %u\n",
 - 				test_cases[testcase].name,
 - 				channels,
 - 				int_error_count,
 - 				int_deviation_max);
 - 
 - 			//////////////////////////////////////////////////////////////////////////////
 - 			// integer -> jackfloat
 - 
 - 			// clean target buffers
 - 			memset(jackfloatbuffer_accel, 0, sizeof(jackfloatbuffer_accel));
 - 			memset(jackfloatbuffer_orig, 0, sizeof(jackfloatbuffer_orig));
 - 			// accel
 - 			clock_t time_to_float_accel = clock();
 - 			for(uint32_t repetition=0; repetition<retry_per_case; repetition++)
 - 			{
 - 				test_cases[testcase].integer_to_jack_accel(
 - 					jackfloatbuffer_accel,
 - 					integerbuffer_orig,
 - 					TESTBUFF_SIZE,
 - 					test_cases[testcase].frame_size*channels);
 - 			}
 - 			float timediff_to_float_accel = ((float)(clock() - time_to_float_accel)) / CLOCKS_PER_SEC;
 - 			// orig
 - 			clock_t time_to_float_orig = clock();
 - 			for(uint32_t repetition=0; repetition<retry_per_case; repetition++)
 - 			{
 - 				test_cases[testcase].integer_to_jack_orig(
 - 					jackfloatbuffer_orig,
 - 					integerbuffer_orig,
 - 					TESTBUFF_SIZE,
 - 					test_cases[testcase].frame_size*channels);
 - 			}
 - 			float timediff_to_float_orig = ((float)(clock() - time_to_float_orig)) / CLOCKS_PER_SEC;
 - 			// output performance results
 - 			printf(
 - 				"Integer->JackFloat @%7.7s/%u: Orig %7.6f sec / Accel %7.6f sec -> Win: %5.2f %%\n",
 - 				test_cases[testcase].name,
 - 				channels,
 - 				timediff_to_float_orig,
 - 				timediff_to_float_accel,
 - 				(timediff_to_float_orig/timediff_to_float_accel-1)*100.0);
 - 			jack_default_audio_sample_t float_deviation_max = 0.0;
 - 			uint32_t float_error_count = 0;
 - 			// output error (avoid spam -> limit error lines per test case)
 - 			for(uint32_t sample=0; sample<TESTBUFF_SIZE; sample++) {
 - 				// For easier estimation/readability we scale floats back to integer
 - 				jack_default_audio_sample_t sample_scaling;
 - 				switch(test_cases[testcase].sample_size) {
 - 					case 2:
 - 						sample_scaling = SAMPLE_16BIT_SCALING;
 - 						break;
 - 					default:
 - 						sample_scaling = SAMPLE_24BIT_SCALING;
 - 						break;
 - 				}
 - 				jack_default_audio_sample_t floatval_accel = jackfloatbuffer_accel[sample] * sample_scaling;
 - 				jack_default_audio_sample_t floatval_orig = jackfloatbuffer_orig[sample] * sample_scaling;
 - 				// compare both results
 - 				jack_default_audio_sample_t float_deviation;
 - 				if(floatval_accel > floatval_orig)
 - 					float_deviation = floatval_accel-floatval_orig;
 - 				else
 - 					float_deviation = floatval_orig-floatval_accel;
 - 				if(float_deviation > float_deviation_max)
 - 					float_deviation_max = float_deviation;
 - 				// deviation > half bit => error
 - 				if(float_deviation > 0.5) {
 - 					if(float_error_count<maxerr_displayed) {
 - 						printf("Value error sample %u:", sample);
 - 						printf(" Orig %8.1f Accel %8.1f\n", floatval_orig, floatval_accel);
 - 					}
 - 					float_error_count++;
 - 				}
 - 			}
 - 			printf(
 - 				"Integer->JackFloat @%7.7s/%u: Errors: %u Max deviation %f\n",
 - 				test_cases[testcase].name,
 - 				channels,
 - 				float_error_count,
 - 				float_deviation_max);
 - 
 - 			printf("\n");
 - 		}
 - 	}
 - 	return 0;
 - }
 
 
  |