jack2 codebase
You can not select more than 25 topics Topics must start with a letter or number, can include dashes ('-') and can be up to 35 characters long.

416 lines
11KB

  1. /*
  2. * simdtests.c -- test accuracy and performance of simd optimizations
  3. *
  4. * Copyright (C) 2017 Andreas Mueller.
  5. *
  6. * This program is free software; you can redistribute it and/or modify
  7. * it under the terms of the GNU General Public License as published by
  8. * the Free Software Foundation; either version 2 of the License, or
  9. * (at your option) any later version.
  10. *
  11. * This program is distributed in the hope that it will be useful,
  12. * but WITHOUT ANY WARRANTY; without even the implied warranty of
  13. * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
  14. * GNU General Public License for more details.
  15. *
  16. * You should have received a copy of the GNU General Public License
  17. * along with this program; if not, write to the Free Software
  18. * Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA.
  19. */
  20. /* We must include all headers memops.c includes to avoid trouble with
  21. * out namespace game below.
  22. */
  23. #include <stdio.h>
  24. #include <string.h>
  25. #include <math.h>
  26. #include <memory.h>
  27. #include <stdlib.h>
  28. #include <stdint.h>
  29. #include <limits.h>
  30. #ifdef __linux__
  31. #include <endian.h>
  32. #endif
  33. #include "memops.h"
  34. #if defined (__SSE2__) && !defined (__sun__)
  35. #include <emmintrin.h>
  36. #ifdef __SSE4_1__
  37. #include <smmintrin.h>
  38. #endif
  39. #endif
  40. #if defined (__ARM_NEON__) || defined (__ARM_NEON)
  41. #include <arm_neon.h>
  42. #endif
  43. // our additional headers
  44. #include <time.h>
  45. /* Dirty: include mempos.c twice the second time with SIMD disabled
  46. * so we can compare aceelerated non accelerated
  47. */
  48. namespace accelerated {
  49. #include "../common/memops.c"
  50. }
  51. namespace origerated {
  52. #ifdef __SSE2__
  53. #undef __SSE2__
  54. #endif
  55. #ifdef __ARM_NEON__
  56. #undef __ARM_NEON__
  57. #endif
  58. #ifdef __ARM_NEON
  59. #undef __ARM_NEON
  60. #endif
  61. #include "../common/memops.c"
  62. }
  63. // define conversion function types
  64. typedef void (*t_jack_to_integer)(
  65. char *dst,
  66. jack_default_audio_sample_t *src,
  67. unsigned long nsamples,
  68. unsigned long dst_skip,
  69. dither_state_t *state);
  70. typedef void (*t_integer_to_jack)(
  71. jack_default_audio_sample_t *dst,
  72. char *src,
  73. unsigned long nsamples,
  74. unsigned long src_skip);
  75. // define/setup test case data
  76. typedef struct test_case_data {
  77. uint32_t frame_size;
  78. uint32_t sample_size;
  79. bool reverse;
  80. t_jack_to_integer jack_to_integer_accel;
  81. t_jack_to_integer jack_to_integer_orig;
  82. t_integer_to_jack integer_to_jack_accel;
  83. t_integer_to_jack integer_to_jack_orig;
  84. dither_state_t *ditherstate;
  85. const char *name;
  86. } test_case_data_t;
  87. test_case_data_t test_cases[] = {
  88. {
  89. 4,
  90. 3,
  91. true,
  92. accelerated::sample_move_d32u24_sSs,
  93. origerated::sample_move_d32u24_sSs,
  94. accelerated::sample_move_dS_s32u24s,
  95. origerated::sample_move_dS_s32u24s,
  96. NULL,
  97. "32u24s" },
  98. {
  99. 4,
  100. 3,
  101. false,
  102. accelerated::sample_move_d32u24_sS,
  103. origerated::sample_move_d32u24_sS,
  104. accelerated::sample_move_dS_s32u24,
  105. origerated::sample_move_dS_s32u24,
  106. NULL,
  107. "32u24" },
  108. {
  109. 4,
  110. 3,
  111. true,
  112. accelerated::sample_move_d32l24_sSs,
  113. origerated::sample_move_d32l24_sSs,
  114. accelerated::sample_move_dS_s32l24s,
  115. origerated::sample_move_dS_s32l24s,
  116. NULL,
  117. "32l24s" },
  118. {
  119. 4,
  120. 3,
  121. false,
  122. accelerated::sample_move_d32l24_sS,
  123. origerated::sample_move_d32l24_sS,
  124. accelerated::sample_move_dS_s32l24,
  125. origerated::sample_move_dS_s32l24,
  126. NULL,
  127. "32l24" },
  128. {
  129. 3,
  130. 3,
  131. true,
  132. accelerated::sample_move_d24_sSs,
  133. origerated::sample_move_d24_sSs,
  134. accelerated::sample_move_dS_s24s,
  135. origerated::sample_move_dS_s24s,
  136. NULL,
  137. "24s" },
  138. {
  139. 3,
  140. 3,
  141. false,
  142. accelerated::sample_move_d24_sS,
  143. origerated::sample_move_d24_sS,
  144. accelerated::sample_move_dS_s24,
  145. origerated::sample_move_dS_s24,
  146. NULL,
  147. "24" },
  148. {
  149. 2,
  150. 2,
  151. true,
  152. accelerated::sample_move_d16_sSs,
  153. origerated::sample_move_d16_sSs,
  154. accelerated::sample_move_dS_s16s,
  155. origerated::sample_move_dS_s16s,
  156. NULL,
  157. "16s" },
  158. {
  159. 2,
  160. 2,
  161. false,
  162. accelerated::sample_move_d16_sS,
  163. origerated::sample_move_d16_sS,
  164. accelerated::sample_move_dS_s16,
  165. origerated::sample_move_dS_s16,
  166. NULL,
  167. "16" },
  168. };
  169. // we need to repeat for better accuracy at time measurement
  170. const uint32_t retry_per_case = 1000;
  171. // setup test buffers
  172. #define TESTBUFF_SIZE 1024
  173. jack_default_audio_sample_t jackbuffer_source[TESTBUFF_SIZE];
  174. // integer buffers: max 4 bytes per value / * 2 for stereo
  175. char integerbuffer_accel[TESTBUFF_SIZE*4*2];
  176. char integerbuffer_orig[TESTBUFF_SIZE*4*2];
  177. // float buffers
  178. jack_default_audio_sample_t jackfloatbuffer_accel[TESTBUFF_SIZE];
  179. jack_default_audio_sample_t jackfloatbuffer_orig[TESTBUFF_SIZE];
  180. // comparing unsigned makes life easier
  181. uint32_t extract_integer(
  182. char* buff,
  183. uint32_t offset,
  184. uint32_t frame_size,
  185. uint32_t sample_size,
  186. bool big_endian)
  187. {
  188. uint32_t retval = 0;
  189. unsigned char* curr;
  190. uint32_t mult = 1;
  191. if(big_endian) {
  192. curr = (unsigned char*)buff + offset + sample_size-1;
  193. for(uint32_t i=0; i<sample_size; i++) {
  194. retval += *(curr--) * mult;
  195. mult*=256;
  196. }
  197. }
  198. else {
  199. curr = (unsigned char*)buff + offset + frame_size-sample_size;
  200. for(uint32_t i=0; i<sample_size; i++) {
  201. retval += *(curr++) * mult;
  202. mult*=256;
  203. }
  204. }
  205. return retval;
  206. }
  207. int main(int argc, char *argv[])
  208. {
  209. // parse_arguments(argc, argv);
  210. uint32_t maxerr_displayed = 10;
  211. // fill jackbuffer
  212. for(int i=0; i<TESTBUFF_SIZE; i++) {
  213. // ramp
  214. jack_default_audio_sample_t value =
  215. ((jack_default_audio_sample_t)((i % TESTBUFF_SIZE) - TESTBUFF_SIZE/2)) / (TESTBUFF_SIZE/2);
  216. // force clipping
  217. value *= 1.02;
  218. jackbuffer_source[i] = value;
  219. }
  220. for(uint32_t testcase=0; testcase<sizeof(test_cases)/sizeof(test_case_data_t); testcase++) {
  221. // test mono/stereo
  222. for(uint32_t channels=1; channels<=2; channels++) {
  223. //////////////////////////////////////////////////////////////////////////////
  224. // jackfloat -> integer
  225. // clean target buffers
  226. memset(integerbuffer_accel, 0, sizeof(integerbuffer_accel));
  227. memset(integerbuffer_orig, 0, sizeof(integerbuffer_orig));
  228. // accel
  229. clock_t time_to_integer_accel = clock();
  230. for(uint32_t repetition=0; repetition<retry_per_case; repetition++)
  231. {
  232. test_cases[testcase].jack_to_integer_accel(
  233. integerbuffer_accel,
  234. jackbuffer_source,
  235. TESTBUFF_SIZE,
  236. test_cases[testcase].frame_size*channels,
  237. test_cases[testcase].ditherstate);
  238. }
  239. float timediff_to_integer_accel = ((float)(clock() - time_to_integer_accel)) / CLOCKS_PER_SEC;
  240. // orig
  241. clock_t time_to_integer_orig = clock();
  242. for(uint32_t repetition=0; repetition<retry_per_case; repetition++)
  243. {
  244. test_cases[testcase].jack_to_integer_orig(
  245. integerbuffer_orig,
  246. jackbuffer_source,
  247. TESTBUFF_SIZE,
  248. test_cases[testcase].frame_size*channels,
  249. test_cases[testcase].ditherstate);
  250. }
  251. float timediff_to_integer_orig = ((float)(clock() - time_to_integer_orig)) / CLOCKS_PER_SEC;
  252. // output performance results
  253. printf(
  254. "JackFloat->Integer @%7.7s/%u: Orig %7.6f sec / Accel %7.6f sec -> Win: %5.2f %%\n",
  255. test_cases[testcase].name,
  256. channels,
  257. timediff_to_integer_orig,
  258. timediff_to_integer_accel,
  259. (timediff_to_integer_orig/timediff_to_integer_accel-1)*100.0);
  260. uint32_t int_deviation_max = 0;
  261. uint32_t int_error_count = 0;
  262. // output error (avoid spam -> limit error lines per test case)
  263. for(uint32_t sample=0; sample<TESTBUFF_SIZE; sample++) {
  264. uint32_t sample_offset = sample*test_cases[testcase].frame_size*channels;
  265. // compare both results
  266. uint32_t intval_accel=extract_integer(
  267. integerbuffer_accel,
  268. sample_offset,
  269. test_cases[testcase].frame_size,
  270. test_cases[testcase].sample_size,
  271. #if __BYTE_ORDER == __BIG_ENDIAN
  272. !test_cases[testcase].reverse);
  273. #else
  274. test_cases[testcase].reverse);
  275. #endif
  276. uint32_t intval_orig=extract_integer(
  277. integerbuffer_orig,
  278. sample_offset,
  279. test_cases[testcase].frame_size,
  280. test_cases[testcase].sample_size,
  281. #if __BYTE_ORDER == __BIG_ENDIAN
  282. !test_cases[testcase].reverse);
  283. #else
  284. test_cases[testcase].reverse);
  285. #endif
  286. // allow a deviation of 1
  287. if(intval_accel>intval_orig+1 || intval_orig>intval_accel+1) {
  288. if(int_error_count<maxerr_displayed) {
  289. printf("Value error sample %u:", sample);
  290. printf(" Orig 0x");
  291. char formatstr[10];
  292. sprintf(formatstr, "%%0%uX", test_cases[testcase].sample_size*2);
  293. printf(formatstr, intval_orig);
  294. printf(" Accel 0x");
  295. printf(formatstr, intval_accel);
  296. printf("\n");
  297. }
  298. int_error_count++;
  299. uint32_t int_deviation;
  300. if(intval_accel > intval_orig)
  301. int_deviation = intval_accel-intval_orig;
  302. else
  303. int_deviation = intval_orig-intval_accel;
  304. if(int_deviation > int_deviation_max)
  305. int_deviation_max = int_deviation;
  306. }
  307. }
  308. printf(
  309. "JackFloat->Integer @%7.7s/%u: Errors: %u Max deviation %u\n",
  310. test_cases[testcase].name,
  311. channels,
  312. int_error_count,
  313. int_deviation_max);
  314. //////////////////////////////////////////////////////////////////////////////
  315. // integer -> jackfloat
  316. // clean target buffers
  317. memset(jackfloatbuffer_accel, 0, sizeof(jackfloatbuffer_accel));
  318. memset(jackfloatbuffer_orig, 0, sizeof(jackfloatbuffer_orig));
  319. // accel
  320. clock_t time_to_float_accel = clock();
  321. for(uint32_t repetition=0; repetition<retry_per_case; repetition++)
  322. {
  323. test_cases[testcase].integer_to_jack_accel(
  324. jackfloatbuffer_accel,
  325. integerbuffer_orig,
  326. TESTBUFF_SIZE,
  327. test_cases[testcase].frame_size*channels);
  328. }
  329. float timediff_to_float_accel = ((float)(clock() - time_to_float_accel)) / CLOCKS_PER_SEC;
  330. // orig
  331. clock_t time_to_float_orig = clock();
  332. for(uint32_t repetition=0; repetition<retry_per_case; repetition++)
  333. {
  334. test_cases[testcase].integer_to_jack_orig(
  335. jackfloatbuffer_orig,
  336. integerbuffer_orig,
  337. TESTBUFF_SIZE,
  338. test_cases[testcase].frame_size*channels);
  339. }
  340. float timediff_to_float_orig = ((float)(clock() - time_to_float_orig)) / CLOCKS_PER_SEC;
  341. // output performance results
  342. printf(
  343. "Integer->JackFloat @%7.7s/%u: Orig %7.6f sec / Accel %7.6f sec -> Win: %5.2f %%\n",
  344. test_cases[testcase].name,
  345. channels,
  346. timediff_to_float_orig,
  347. timediff_to_float_accel,
  348. (timediff_to_float_orig/timediff_to_float_accel-1)*100.0);
  349. jack_default_audio_sample_t float_deviation_max = 0.0;
  350. uint32_t float_error_count = 0;
  351. // output error (avoid spam -> limit error lines per test case)
  352. for(uint32_t sample=0; sample<TESTBUFF_SIZE; sample++) {
  353. // For easier estimation/readability we scale floats back to integer
  354. jack_default_audio_sample_t sample_scaling;
  355. switch(test_cases[testcase].sample_size) {
  356. case 2:
  357. sample_scaling = SAMPLE_16BIT_SCALING;
  358. break;
  359. default:
  360. sample_scaling = SAMPLE_24BIT_SCALING;
  361. break;
  362. }
  363. jack_default_audio_sample_t floatval_accel = jackfloatbuffer_accel[sample] * sample_scaling;
  364. jack_default_audio_sample_t floatval_orig = jackfloatbuffer_orig[sample] * sample_scaling;
  365. // compare both results
  366. jack_default_audio_sample_t float_deviation;
  367. if(floatval_accel > floatval_orig)
  368. float_deviation = floatval_accel-floatval_orig;
  369. else
  370. float_deviation = floatval_orig-floatval_accel;
  371. if(float_deviation > float_deviation_max)
  372. float_deviation_max = float_deviation;
  373. // deviation > half bit => error
  374. if(float_deviation > 0.5) {
  375. if(float_error_count<maxerr_displayed) {
  376. printf("Value error sample %u:", sample);
  377. printf(" Orig %8.1f Accel %8.1f\n", floatval_orig, floatval_accel);
  378. }
  379. float_error_count++;
  380. }
  381. }
  382. printf(
  383. "Integer->JackFloat @%7.7s/%u: Errors: %u Max deviation %f\n",
  384. test_cases[testcase].name,
  385. channels,
  386. float_error_count,
  387. float_deviation_max);
  388. printf("\n");
  389. }
  390. }
  391. return 0;
  392. }