jack2 codebase
You can not select more than 25 topics Topics must start with a letter or number, can include dashes ('-') and can be up to 35 characters long.

391 lines
11KB

  1. /*
  2. * simdtests.c -- test accuraccy and performance of simd optimizations
  3. *
  4. * Copyright (C) 2017 Andreas Mueller.
  5. *
  6. * This program is free software; you can redistribute it and/or modify
  7. * it under the terms of the GNU General Public License as published by
  8. * the Free Software Foundation; either version 2 of the License, or
  9. * (at your option) any later version.
  10. *
  11. * This program is distributed in the hope that it will be useful,
  12. * but WITHOUT ANY WARRANTY; without even the implied warranty of
  13. * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
  14. * GNU General Public License for more details.
  15. *
  16. * You should have received a copy of the GNU General Public License
  17. * along with this program; if not, write to the Free Software
  18. * Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA.
  19. */
  20. /* We must include all headers memops.c includes to avoid trouble with
  21. * out namespace game below.
  22. */
  23. #include <stdio.h>
  24. #include <string.h>
  25. #include <math.h>
  26. #include <memory.h>
  27. #include <stdlib.h>
  28. #include <stdint.h>
  29. #include <limits.h>
  30. #ifdef __linux__
  31. #include <endian.h>
  32. #endif
  33. #include "memops.h"
  34. #if defined (__SSE2__) && !defined (__sun__)
  35. #include <emmintrin.h>
  36. #ifdef __SSE4_1__
  37. #include <smmintrin.h>
  38. #endif
  39. #endif
  40. #ifdef __ARM_NEON__
  41. #include <arm_neon.h>
  42. #endif
  43. // our additional headers
  44. #include <time.h>
  45. /* Dirty: include mempos.c twice the second time with SIMD disabled
  46. * so we can compare aceelerated non accelerated
  47. */
  48. namespace accelerated {
  49. #include "../common/memops.c"
  50. }
  51. namespace origerated {
  52. #ifdef __SSE2__
  53. #undef __SSE2__
  54. #endif
  55. #ifdef __ARM_NEON__
  56. #undef __ARM_NEON__
  57. #endif
  58. #include "../common/memops.c"
  59. }
  60. // define conversion function types
  61. typedef void (*t_jack_to_integer)(
  62. char *dst,
  63. jack_default_audio_sample_t *src,
  64. unsigned long nsamples,
  65. unsigned long dst_skip,
  66. dither_state_t *state);
  67. typedef void (*t_integer_to_jack)(
  68. jack_default_audio_sample_t *dst,
  69. char *src,
  70. unsigned long nsamples,
  71. unsigned long src_skip);
  72. // define/setup test case data
  73. typedef struct test_case_data {
  74. uint32_t frame_size;
  75. uint32_t sample_size;
  76. bool reverse;
  77. t_jack_to_integer jack_to_integer_accel;
  78. t_jack_to_integer jack_to_integer_orig;
  79. t_integer_to_jack integer_to_jack_accel;
  80. t_integer_to_jack integer_to_jack_orig;
  81. dither_state_t *ditherstate;
  82. const char *name;
  83. } test_case_data_t;
  84. test_case_data_t test_cases[] = {
  85. {
  86. 4,
  87. 3,
  88. true,
  89. accelerated::sample_move_d32u24_sSs,
  90. origerated::sample_move_d32u24_sSs,
  91. accelerated::sample_move_dS_s32u24s,
  92. origerated::sample_move_dS_s32u24s,
  93. NULL,
  94. "32u24s" },
  95. {
  96. 4,
  97. 3,
  98. false,
  99. accelerated::sample_move_d32u24_sS,
  100. origerated::sample_move_d32u24_sS,
  101. accelerated::sample_move_dS_s32u24,
  102. origerated::sample_move_dS_s32u24,
  103. NULL,
  104. "32u24" },
  105. {
  106. 3,
  107. 3,
  108. true,
  109. accelerated::sample_move_d24_sSs,
  110. origerated::sample_move_d24_sSs,
  111. accelerated::sample_move_dS_s24s,
  112. origerated::sample_move_dS_s24s,
  113. NULL,
  114. "24s" },
  115. {
  116. 3,
  117. 3,
  118. false,
  119. accelerated::sample_move_d24_sS,
  120. origerated::sample_move_d24_sS,
  121. accelerated::sample_move_dS_s24,
  122. origerated::sample_move_dS_s24,
  123. NULL,
  124. "24" },
  125. {
  126. 2,
  127. 2,
  128. true,
  129. accelerated::sample_move_d16_sSs,
  130. origerated::sample_move_d16_sSs,
  131. accelerated::sample_move_dS_s16s,
  132. origerated::sample_move_dS_s16s,
  133. NULL,
  134. "16s" },
  135. {
  136. 2,
  137. 2,
  138. false,
  139. accelerated::sample_move_d16_sS,
  140. origerated::sample_move_d16_sS,
  141. accelerated::sample_move_dS_s16,
  142. origerated::sample_move_dS_s16,
  143. NULL,
  144. "16" },
  145. };
  146. // we need to repeat for better accuracy at time measurement
  147. const uint32_t retry_per_case = 1000;
  148. // setup test buffers
  149. #define TESTBUFF_SIZE 1024
  150. jack_default_audio_sample_t jackbuffer_source[TESTBUFF_SIZE];
  151. // integer buffers: max 4 bytes per value / * 2 for stereo
  152. char integerbuffer_accel[TESTBUFF_SIZE*4*2];
  153. char integerbuffer_orig[TESTBUFF_SIZE*4*2];
  154. // float buffers
  155. jack_default_audio_sample_t jackfloatbuffer_accel[TESTBUFF_SIZE];
  156. jack_default_audio_sample_t jackfloatbuffer_orig[TESTBUFF_SIZE];
  157. // comparing unsigned makes life easier
  158. uint32_t extract_integer(
  159. char* buff,
  160. uint32_t offset,
  161. uint32_t frame_size,
  162. uint32_t sample_size,
  163. bool big_endian)
  164. {
  165. uint32_t retval = 0;
  166. unsigned char* curr;
  167. uint32_t mult = 1;
  168. if(big_endian) {
  169. curr = (unsigned char*)buff + offset + sample_size-1;
  170. for(uint32_t i=0; i<sample_size; i++) {
  171. retval += *(curr--) * mult;
  172. mult*=256;
  173. }
  174. }
  175. else {
  176. curr = (unsigned char*)buff + offset + frame_size-sample_size;
  177. for(uint32_t i=0; i<sample_size; i++) {
  178. retval += *(curr++) * mult;
  179. mult*=256;
  180. }
  181. }
  182. return retval;
  183. }
  184. int main(int argc, char *argv[])
  185. {
  186. // parse_arguments(argc, argv);
  187. uint32_t maxerr_displayed = 10;
  188. // fill jackbuffer
  189. for(int i=0; i<TESTBUFF_SIZE; i++) {
  190. // ramp
  191. jack_default_audio_sample_t value =
  192. ((jack_default_audio_sample_t)((i % TESTBUFF_SIZE) - TESTBUFF_SIZE/2)) / (TESTBUFF_SIZE/2);
  193. // force clipping
  194. value *= 1.02;
  195. jackbuffer_source[i] = value;
  196. }
  197. for(uint32_t testcase=0; testcase<sizeof(test_cases)/sizeof(test_case_data_t); testcase++) {
  198. // test mono/stereo
  199. for(uint32_t channels=1; channels<=2; channels++) {
  200. //////////////////////////////////////////////////////////////////////////////
  201. // jackfloat -> integer
  202. // clean target buffers
  203. memset(integerbuffer_accel, 0, sizeof(integerbuffer_accel));
  204. memset(integerbuffer_orig, 0, sizeof(integerbuffer_orig));
  205. // accel
  206. clock_t time_to_integer_accel = clock();
  207. for(uint32_t repetition=0; repetition<retry_per_case; repetition++)
  208. {
  209. test_cases[testcase].jack_to_integer_accel(
  210. integerbuffer_accel,
  211. jackbuffer_source,
  212. TESTBUFF_SIZE,
  213. test_cases[testcase].frame_size*channels,
  214. test_cases[testcase].ditherstate);
  215. }
  216. float timediff_to_integer_accel = ((float)(clock() - time_to_integer_accel)) / CLOCKS_PER_SEC;
  217. // orig
  218. clock_t time_to_integer_orig = clock();
  219. for(uint32_t repetition=0; repetition<retry_per_case; repetition++)
  220. {
  221. test_cases[testcase].jack_to_integer_orig(
  222. integerbuffer_orig,
  223. jackbuffer_source,
  224. TESTBUFF_SIZE,
  225. test_cases[testcase].frame_size*channels,
  226. test_cases[testcase].ditherstate);
  227. }
  228. float timediff_to_integer_orig = ((float)(clock() - time_to_integer_orig)) / CLOCKS_PER_SEC;
  229. // output performance results
  230. printf(
  231. "JackFloat->Integer @%7.7s/%u: Orig %7.6f sec / Accel %7.6f sec -> Win: %5.2f %%\n",
  232. test_cases[testcase].name,
  233. channels,
  234. timediff_to_integer_orig,
  235. timediff_to_integer_accel,
  236. (timediff_to_integer_orig/timediff_to_integer_accel-1)*100.0);
  237. uint32_t int_deviation_max = 0;
  238. uint32_t int_error_count = 0;
  239. // output error (avoid spam -> limit error lines per test case)
  240. for(uint32_t sample=0; sample<TESTBUFF_SIZE; sample++) {
  241. uint32_t sample_offset = sample*test_cases[testcase].frame_size*channels;
  242. // compare both results
  243. uint32_t intval_accel=extract_integer(
  244. integerbuffer_accel,
  245. sample_offset,
  246. test_cases[testcase].frame_size,
  247. test_cases[testcase].sample_size,
  248. #if __BYTE_ORDER == __BIG_ENDIAN
  249. !test_cases[testcase].reverse);
  250. #else
  251. test_cases[testcase].reverse);
  252. #endif
  253. uint32_t intval_orig=extract_integer(
  254. integerbuffer_orig,
  255. sample_offset,
  256. test_cases[testcase].frame_size,
  257. test_cases[testcase].sample_size,
  258. #if __BYTE_ORDER == __BIG_ENDIAN
  259. !test_cases[testcase].reverse);
  260. #else
  261. test_cases[testcase].reverse);
  262. #endif
  263. if(intval_accel != intval_orig) {
  264. if(int_error_count<maxerr_displayed) {
  265. printf("Value error sample %u:", sample);
  266. printf(" Orig 0x");
  267. char formatstr[10];
  268. sprintf(formatstr, "%%0%uX", test_cases[testcase].sample_size*2);
  269. printf(formatstr, intval_orig);
  270. printf(" Accel 0x");
  271. printf(formatstr, intval_accel);
  272. printf("\n");
  273. }
  274. int_error_count++;
  275. uint32_t int_deviation;
  276. if(intval_accel > intval_orig)
  277. int_deviation = intval_accel-intval_orig;
  278. else
  279. int_deviation = intval_orig-intval_accel;
  280. if(int_deviation > int_deviation_max)
  281. int_deviation_max = int_deviation;
  282. }
  283. }
  284. printf(
  285. "JackFloat->Integer @%7.7s/%u: Errors: %u Max deviation %u\n",
  286. test_cases[testcase].name,
  287. channels,
  288. int_error_count,
  289. int_deviation_max);
  290. //////////////////////////////////////////////////////////////////////////////
  291. // integer -> jackfloat
  292. // clean target buffers
  293. memset(jackfloatbuffer_accel, 0, sizeof(jackfloatbuffer_accel));
  294. memset(jackfloatbuffer_orig, 0, sizeof(jackfloatbuffer_orig));
  295. // accel
  296. clock_t time_to_float_accel = clock();
  297. for(uint32_t repetition=0; repetition<retry_per_case; repetition++)
  298. {
  299. test_cases[testcase].integer_to_jack_accel(
  300. jackfloatbuffer_accel,
  301. integerbuffer_orig,
  302. TESTBUFF_SIZE,
  303. test_cases[testcase].frame_size*channels);
  304. }
  305. float timediff_to_float_accel = ((float)(clock() - time_to_float_accel)) / CLOCKS_PER_SEC;
  306. // orig
  307. clock_t time_to_float_orig = clock();
  308. for(uint32_t repetition=0; repetition<retry_per_case; repetition++)
  309. {
  310. test_cases[testcase].integer_to_jack_orig(
  311. jackfloatbuffer_orig,
  312. integerbuffer_orig,
  313. TESTBUFF_SIZE,
  314. test_cases[testcase].frame_size*channels);
  315. }
  316. float timediff_to_float_orig = ((float)(clock() - time_to_float_orig)) / CLOCKS_PER_SEC;
  317. // output performance results
  318. printf(
  319. "Integer->JackFloat @%7.7s/%u: Orig %7.6f sec / Accel %7.6f sec -> Win: %5.2f %%\n",
  320. test_cases[testcase].name,
  321. channels,
  322. timediff_to_float_orig,
  323. timediff_to_float_accel,
  324. (timediff_to_float_orig/timediff_to_float_accel-1)*100.0);
  325. jack_default_audio_sample_t float_deviation_max = 0.0;
  326. uint32_t float_error_count = 0;
  327. // output error (avoid spam -> limit error lines per test case)
  328. for(uint32_t sample=0; sample<TESTBUFF_SIZE; sample++) {
  329. // For easier estimation/readabilty we scale floats back to integer
  330. jack_default_audio_sample_t sample_scaling;
  331. switch(test_cases[testcase].sample_size) {
  332. case 2:
  333. sample_scaling = SAMPLE_16BIT_SCALING;
  334. break;
  335. default:
  336. sample_scaling = SAMPLE_24BIT_SCALING;
  337. break;
  338. }
  339. jack_default_audio_sample_t floatval_accel = jackfloatbuffer_accel[sample] * sample_scaling;
  340. jack_default_audio_sample_t floatval_orig = jackfloatbuffer_orig[sample] * sample_scaling;
  341. // compare both results
  342. jack_default_audio_sample_t float_deviation;
  343. if(floatval_accel > floatval_orig)
  344. float_deviation = floatval_accel-floatval_orig;
  345. else
  346. float_deviation = floatval_orig-floatval_accel;
  347. if(float_deviation > float_deviation_max)
  348. float_deviation_max = float_deviation;
  349. // deviation > half bit => error
  350. if(float_deviation > 0.5) {
  351. if(float_error_count<maxerr_displayed) {
  352. printf("Value error sample %u:", sample);
  353. printf(" Orig %8.1f Accel %8.1f\n", floatval_orig, floatval_accel);
  354. }
  355. float_error_count++;
  356. }
  357. }
  358. printf(
  359. "Integer->JackFloat @%7.7s/%u: Errors: %u Max deviation %f\n",
  360. test_cases[testcase].name,
  361. channels,
  362. float_error_count,
  363. float_deviation_max);
  364. printf("\n");
  365. }
  366. }
  367. return 0;
  368. }