jack2 codebase
You can not select more than 25 topics Topics must start with a letter or number, can include dashes ('-') and can be up to 35 characters long.

395 lines
11KB

  1. /*
  2. * simdtests.c -- test accuracy and performance of simd optimizations
  3. *
  4. * Copyright (C) 2017 Andreas Mueller.
  5. *
  6. * This program is free software; you can redistribute it and/or modify
  7. * it under the terms of the GNU General Public License as published by
  8. * the Free Software Foundation; either version 2 of the License, or
  9. * (at your option) any later version.
  10. *
  11. * This program is distributed in the hope that it will be useful,
  12. * but WITHOUT ANY WARRANTY; without even the implied warranty of
  13. * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
  14. * GNU General Public License for more details.
  15. *
  16. * You should have received a copy of the GNU General Public License
  17. * along with this program; if not, write to the Free Software
  18. * Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA.
  19. */
  20. /* We must include all headers memops.c includes to avoid trouble with
  21. * out namespace game below.
  22. */
  23. #include <stdio.h>
  24. #include <string.h>
  25. #include <math.h>
  26. #include <memory.h>
  27. #include <stdlib.h>
  28. #include <stdint.h>
  29. #include <limits.h>
  30. #ifdef __linux__
  31. #include <endian.h>
  32. #endif
  33. #include "memops.h"
  34. #if defined (__SSE2__) && !defined (__sun__)
  35. #include <emmintrin.h>
  36. #ifdef __SSE4_1__
  37. #include <smmintrin.h>
  38. #endif
  39. #endif
  40. #if defined (__ARM_NEON__) || defined (__ARM_NEON)
  41. #include <arm_neon.h>
  42. #endif
  43. // our additional headers
  44. #include <time.h>
  45. /* Dirty: include mempos.c twice the second time with SIMD disabled
  46. * so we can compare aceelerated non accelerated
  47. */
  48. namespace accelerated {
  49. #include "../common/memops.c"
  50. }
  51. namespace origerated {
  52. #ifdef __SSE2__
  53. #undef __SSE2__
  54. #endif
  55. #ifdef __ARM_NEON__
  56. #undef __ARM_NEON__
  57. #endif
  58. #ifdef __ARM_NEON
  59. #undef __ARM_NEON
  60. #endif
  61. #include "../common/memops.c"
  62. }
  63. // define conversion function types
  64. typedef void (*t_jack_to_integer)(
  65. char *dst,
  66. jack_default_audio_sample_t *src,
  67. unsigned long nsamples,
  68. unsigned long dst_skip,
  69. dither_state_t *state);
  70. typedef void (*t_integer_to_jack)(
  71. jack_default_audio_sample_t *dst,
  72. char *src,
  73. unsigned long nsamples,
  74. unsigned long src_skip);
  75. // define/setup test case data
  76. typedef struct test_case_data {
  77. uint32_t frame_size;
  78. uint32_t sample_size;
  79. bool reverse;
  80. t_jack_to_integer jack_to_integer_accel;
  81. t_jack_to_integer jack_to_integer_orig;
  82. t_integer_to_jack integer_to_jack_accel;
  83. t_integer_to_jack integer_to_jack_orig;
  84. dither_state_t *ditherstate;
  85. const char *name;
  86. } test_case_data_t;
  87. test_case_data_t test_cases[] = {
  88. {
  89. 4,
  90. 3,
  91. true,
  92. accelerated::sample_move_d32u24_sSs,
  93. origerated::sample_move_d32u24_sSs,
  94. accelerated::sample_move_dS_s32u24s,
  95. origerated::sample_move_dS_s32u24s,
  96. NULL,
  97. "32u24s" },
  98. {
  99. 4,
  100. 3,
  101. false,
  102. accelerated::sample_move_d32u24_sS,
  103. origerated::sample_move_d32u24_sS,
  104. accelerated::sample_move_dS_s32u24,
  105. origerated::sample_move_dS_s32u24,
  106. NULL,
  107. "32u24" },
  108. {
  109. 3,
  110. 3,
  111. true,
  112. accelerated::sample_move_d24_sSs,
  113. origerated::sample_move_d24_sSs,
  114. accelerated::sample_move_dS_s24s,
  115. origerated::sample_move_dS_s24s,
  116. NULL,
  117. "24s" },
  118. {
  119. 3,
  120. 3,
  121. false,
  122. accelerated::sample_move_d24_sS,
  123. origerated::sample_move_d24_sS,
  124. accelerated::sample_move_dS_s24,
  125. origerated::sample_move_dS_s24,
  126. NULL,
  127. "24" },
  128. {
  129. 2,
  130. 2,
  131. true,
  132. accelerated::sample_move_d16_sSs,
  133. origerated::sample_move_d16_sSs,
  134. accelerated::sample_move_dS_s16s,
  135. origerated::sample_move_dS_s16s,
  136. NULL,
  137. "16s" },
  138. {
  139. 2,
  140. 2,
  141. false,
  142. accelerated::sample_move_d16_sS,
  143. origerated::sample_move_d16_sS,
  144. accelerated::sample_move_dS_s16,
  145. origerated::sample_move_dS_s16,
  146. NULL,
  147. "16" },
  148. };
  149. // we need to repeat for better accuracy at time measurement
  150. const uint32_t retry_per_case = 1000;
  151. // setup test buffers
  152. #define TESTBUFF_SIZE 1024
  153. jack_default_audio_sample_t jackbuffer_source[TESTBUFF_SIZE];
  154. // integer buffers: max 4 bytes per value / * 2 for stereo
  155. char integerbuffer_accel[TESTBUFF_SIZE*4*2];
  156. char integerbuffer_orig[TESTBUFF_SIZE*4*2];
  157. // float buffers
  158. jack_default_audio_sample_t jackfloatbuffer_accel[TESTBUFF_SIZE];
  159. jack_default_audio_sample_t jackfloatbuffer_orig[TESTBUFF_SIZE];
  160. // comparing unsigned makes life easier
  161. uint32_t extract_integer(
  162. char* buff,
  163. uint32_t offset,
  164. uint32_t frame_size,
  165. uint32_t sample_size,
  166. bool big_endian)
  167. {
  168. uint32_t retval = 0;
  169. unsigned char* curr;
  170. uint32_t mult = 1;
  171. if(big_endian) {
  172. curr = (unsigned char*)buff + offset + sample_size-1;
  173. for(uint32_t i=0; i<sample_size; i++) {
  174. retval += *(curr--) * mult;
  175. mult*=256;
  176. }
  177. }
  178. else {
  179. curr = (unsigned char*)buff + offset + frame_size-sample_size;
  180. for(uint32_t i=0; i<sample_size; i++) {
  181. retval += *(curr++) * mult;
  182. mult*=256;
  183. }
  184. }
  185. return retval;
  186. }
  187. int main(int argc, char *argv[])
  188. {
  189. // parse_arguments(argc, argv);
  190. uint32_t maxerr_displayed = 10;
  191. // fill jackbuffer
  192. for(int i=0; i<TESTBUFF_SIZE; i++) {
  193. // ramp
  194. jack_default_audio_sample_t value =
  195. ((jack_default_audio_sample_t)((i % TESTBUFF_SIZE) - TESTBUFF_SIZE/2)) / (TESTBUFF_SIZE/2);
  196. // force clipping
  197. value *= 1.02;
  198. jackbuffer_source[i] = value;
  199. }
  200. for(uint32_t testcase=0; testcase<sizeof(test_cases)/sizeof(test_case_data_t); testcase++) {
  201. // test mono/stereo
  202. for(uint32_t channels=1; channels<=2; channels++) {
  203. //////////////////////////////////////////////////////////////////////////////
  204. // jackfloat -> integer
  205. // clean target buffers
  206. memset(integerbuffer_accel, 0, sizeof(integerbuffer_accel));
  207. memset(integerbuffer_orig, 0, sizeof(integerbuffer_orig));
  208. // accel
  209. clock_t time_to_integer_accel = clock();
  210. for(uint32_t repetition=0; repetition<retry_per_case; repetition++)
  211. {
  212. test_cases[testcase].jack_to_integer_accel(
  213. integerbuffer_accel,
  214. jackbuffer_source,
  215. TESTBUFF_SIZE,
  216. test_cases[testcase].frame_size*channels,
  217. test_cases[testcase].ditherstate);
  218. }
  219. float timediff_to_integer_accel = ((float)(clock() - time_to_integer_accel)) / CLOCKS_PER_SEC;
  220. // orig
  221. clock_t time_to_integer_orig = clock();
  222. for(uint32_t repetition=0; repetition<retry_per_case; repetition++)
  223. {
  224. test_cases[testcase].jack_to_integer_orig(
  225. integerbuffer_orig,
  226. jackbuffer_source,
  227. TESTBUFF_SIZE,
  228. test_cases[testcase].frame_size*channels,
  229. test_cases[testcase].ditherstate);
  230. }
  231. float timediff_to_integer_orig = ((float)(clock() - time_to_integer_orig)) / CLOCKS_PER_SEC;
  232. // output performance results
  233. printf(
  234. "JackFloat->Integer @%7.7s/%u: Orig %7.6f sec / Accel %7.6f sec -> Win: %5.2f %%\n",
  235. test_cases[testcase].name,
  236. channels,
  237. timediff_to_integer_orig,
  238. timediff_to_integer_accel,
  239. (timediff_to_integer_orig/timediff_to_integer_accel-1)*100.0);
  240. uint32_t int_deviation_max = 0;
  241. uint32_t int_error_count = 0;
  242. // output error (avoid spam -> limit error lines per test case)
  243. for(uint32_t sample=0; sample<TESTBUFF_SIZE; sample++) {
  244. uint32_t sample_offset = sample*test_cases[testcase].frame_size*channels;
  245. // compare both results
  246. uint32_t intval_accel=extract_integer(
  247. integerbuffer_accel,
  248. sample_offset,
  249. test_cases[testcase].frame_size,
  250. test_cases[testcase].sample_size,
  251. #if __BYTE_ORDER == __BIG_ENDIAN
  252. !test_cases[testcase].reverse);
  253. #else
  254. test_cases[testcase].reverse);
  255. #endif
  256. uint32_t intval_orig=extract_integer(
  257. integerbuffer_orig,
  258. sample_offset,
  259. test_cases[testcase].frame_size,
  260. test_cases[testcase].sample_size,
  261. #if __BYTE_ORDER == __BIG_ENDIAN
  262. !test_cases[testcase].reverse);
  263. #else
  264. test_cases[testcase].reverse);
  265. #endif
  266. if(intval_accel != intval_orig) {
  267. if(int_error_count<maxerr_displayed) {
  268. printf("Value error sample %u:", sample);
  269. printf(" Orig 0x");
  270. char formatstr[10];
  271. sprintf(formatstr, "%%0%uX", test_cases[testcase].sample_size*2);
  272. printf(formatstr, intval_orig);
  273. printf(" Accel 0x");
  274. printf(formatstr, intval_accel);
  275. printf("\n");
  276. }
  277. int_error_count++;
  278. uint32_t int_deviation;
  279. if(intval_accel > intval_orig)
  280. int_deviation = intval_accel-intval_orig;
  281. else
  282. int_deviation = intval_orig-intval_accel;
  283. if(int_deviation > int_deviation_max)
  284. int_deviation_max = int_deviation;
  285. }
  286. }
  287. printf(
  288. "JackFloat->Integer @%7.7s/%u: Errors: %u Max deviation %u\n",
  289. test_cases[testcase].name,
  290. channels,
  291. int_error_count,
  292. int_deviation_max);
  293. //////////////////////////////////////////////////////////////////////////////
  294. // integer -> jackfloat
  295. // clean target buffers
  296. memset(jackfloatbuffer_accel, 0, sizeof(jackfloatbuffer_accel));
  297. memset(jackfloatbuffer_orig, 0, sizeof(jackfloatbuffer_orig));
  298. // accel
  299. clock_t time_to_float_accel = clock();
  300. for(uint32_t repetition=0; repetition<retry_per_case; repetition++)
  301. {
  302. test_cases[testcase].integer_to_jack_accel(
  303. jackfloatbuffer_accel,
  304. integerbuffer_orig,
  305. TESTBUFF_SIZE,
  306. test_cases[testcase].frame_size*channels);
  307. }
  308. float timediff_to_float_accel = ((float)(clock() - time_to_float_accel)) / CLOCKS_PER_SEC;
  309. // orig
  310. clock_t time_to_float_orig = clock();
  311. for(uint32_t repetition=0; repetition<retry_per_case; repetition++)
  312. {
  313. test_cases[testcase].integer_to_jack_orig(
  314. jackfloatbuffer_orig,
  315. integerbuffer_orig,
  316. TESTBUFF_SIZE,
  317. test_cases[testcase].frame_size*channels);
  318. }
  319. float timediff_to_float_orig = ((float)(clock() - time_to_float_orig)) / CLOCKS_PER_SEC;
  320. // output performance results
  321. printf(
  322. "Integer->JackFloat @%7.7s/%u: Orig %7.6f sec / Accel %7.6f sec -> Win: %5.2f %%\n",
  323. test_cases[testcase].name,
  324. channels,
  325. timediff_to_float_orig,
  326. timediff_to_float_accel,
  327. (timediff_to_float_orig/timediff_to_float_accel-1)*100.0);
  328. jack_default_audio_sample_t float_deviation_max = 0.0;
  329. uint32_t float_error_count = 0;
  330. // output error (avoid spam -> limit error lines per test case)
  331. for(uint32_t sample=0; sample<TESTBUFF_SIZE; sample++) {
  332. // For easier estimation/readability we scale floats back to integer
  333. jack_default_audio_sample_t sample_scaling;
  334. switch(test_cases[testcase].sample_size) {
  335. case 2:
  336. sample_scaling = SAMPLE_16BIT_SCALING;
  337. break;
  338. default:
  339. sample_scaling = SAMPLE_24BIT_SCALING;
  340. break;
  341. }
  342. jack_default_audio_sample_t floatval_accel = jackfloatbuffer_accel[sample] * sample_scaling;
  343. jack_default_audio_sample_t floatval_orig = jackfloatbuffer_orig[sample] * sample_scaling;
  344. // compare both results
  345. jack_default_audio_sample_t float_deviation;
  346. if(floatval_accel > floatval_orig)
  347. float_deviation = floatval_accel-floatval_orig;
  348. else
  349. float_deviation = floatval_orig-floatval_accel;
  350. if(float_deviation > float_deviation_max)
  351. float_deviation_max = float_deviation;
  352. // deviation > half bit => error
  353. if(float_deviation > 0.5) {
  354. if(float_error_count<maxerr_displayed) {
  355. printf("Value error sample %u:", sample);
  356. printf(" Orig %8.1f Accel %8.1f\n", floatval_orig, floatval_accel);
  357. }
  358. float_error_count++;
  359. }
  360. }
  361. printf(
  362. "Integer->JackFloat @%7.7s/%u: Errors: %u Max deviation %f\n",
  363. test_cases[testcase].name,
  364. channels,
  365. float_error_count,
  366. float_deviation_max);
  367. printf("\n");
  368. }
  369. }
  370. return 0;
  371. }