You can not select more than 25 topics Topics must start with a letter or number, can include dashes ('-') and can be up to 35 characters long.

1193 lines
30KB

  1. /* Copyright (C) 2007 Hong Zhiqian */
  2. /**
  3. @file mdf_tm.h
  4. @author Hong Zhiqian
  5. @brief Various compatibility routines for Speex (TriMedia version)
  6. */
  7. /*
  8. Redistribution and use in source and binary forms, with or without
  9. modification, are permitted provided that the following conditions
  10. are met:
  11. - Redistributions of source code must retain the above copyright
  12. notice, this list of conditions and the following disclaimer.
  13. - Redistributions in binary form must reproduce the above copyright
  14. notice, this list of conditions and the following disclaimer in the
  15. documentation and/or other materials provided with the distribution.
  16. - Neither the name of the Xiph.org Foundation nor the names of its
  17. contributors may be used to endorse or promote products derived from
  18. this software without specific prior written permission.
  19. THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
  20. ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
  21. LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
  22. A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE FOUNDATION OR
  23. CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
  24. EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
  25. PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
  26. PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF
  27. LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING
  28. NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
  29. SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
  30. */
  31. #include <ops/custom_defs.h>
  32. #include "profile_tm.h"
  33. // shifted power spectrum to fftwrap.c so that optimisation can be shared between mdf.c and preprocess.c
  34. #define OVERRIDE_POWER_SPECTRUM
  35. #ifdef FIXED_POINT
  36. #else
  37. #define OVERRIDE_FILTER_DC_NOTCH16
  38. void filter_dc_notch16(
  39. const spx_int16_t * restrict in,
  40. float radius,
  41. float * restrict out,
  42. int len,
  43. float * restrict mem
  44. )
  45. {
  46. register int i;
  47. register float den2, r1;
  48. register float mem0, mem1;
  49. FILTERDCNOTCH16_START();
  50. r1 = 1 - radius;
  51. den2 = (radius * radius) + (0.7 * r1 * r1);
  52. mem0 = mem[0];
  53. mem1 = mem[1];
  54. #if (TM_UNROLL && TM_UNROLL_FILTERDCNOTCH16)
  55. #pragma TCS_unroll=4
  56. #pragma TCS_unrollexact=1
  57. #endif
  58. for ( i=0 ; i<len ; ++i )
  59. {
  60. register float vin = in[i];
  61. register float vout = mem0 + vin;
  62. register float rvout = radius * vout;
  63. mem0 = mem1 + 2 * (-vin + rvout);
  64. mem1 = vin - (den2 * vout);
  65. out[i] = rvout;
  66. }
  67. #if (TM_UNROLL && TM_UNROLL_FILTERDCNOTCH16)
  68. #pragma TCS_unrollexact=0
  69. #pragma TCS_unroll=0
  70. #endif
  71. mem[0] = mem0;
  72. mem[1] = mem1;
  73. FILTERDCNOTCH16_STOP();
  74. }
  75. #define OVERRIDE_MDF_INNER_PROD
  76. float mdf_inner_prod(
  77. const float * restrict x,
  78. const float * restrict y,
  79. int len
  80. )
  81. {
  82. register float sum = 0;
  83. MDFINNERPROD_START();
  84. len >>= 1;
  85. #if (TM_UNROLL && TM_UNROLL_MDFINNERPRODUCT)
  86. #pragma TCS_unroll=4
  87. #pragma TCS_unrollexact=1
  88. #endif
  89. while(len--)
  90. {
  91. register float acc0, acc1;
  92. acc0 = (*x++) * (*y++);
  93. acc1 = (*x++) * (*y++);
  94. sum += acc0 + acc1;
  95. }
  96. #if (TM_UNROLL && TM_UNROLL_MDFINNERPRODUCT)
  97. #pragma TCS_unrollexact=0
  98. #pragma TCS_unroll=0
  99. #endif
  100. MDFINNERPROD_STOP();
  101. return sum;
  102. }
  103. #define OVERRIDE_SPECTRAL_MUL_ACCUM
  104. void spectral_mul_accum(
  105. const float * restrict X,
  106. const float * restrict Y,
  107. float * restrict acc,
  108. int N, int M
  109. )
  110. {
  111. register int i, j;
  112. register float Xi, Yi, Xii, Yii;
  113. register int _N;
  114. SPECTRALMULACCUM_START();
  115. acc[0] = X[0] * Y[0];
  116. _N = N-1;
  117. for ( i=1 ; i<_N ; i+=2 )
  118. {
  119. Xi = X[i];
  120. Yi = Y[i];
  121. Xii = X[i+1];
  122. Yii = Y[i+1];
  123. acc[i] = (Xi * Yi - Xii * Yii);
  124. acc[i+1]= (Xii * Yi + Xi * Yii);
  125. }
  126. acc[_N] = X[_N] * Y[_N];
  127. for ( j=1,X+=N,Y+=N ; j<M ; j++ )
  128. {
  129. acc[0] += X[0] * Y[0];
  130. for ( i=1 ; i<N-1 ; i+=2 )
  131. {
  132. Xi = X[i];
  133. Yi = Y[i];
  134. Xii = X[i+1];
  135. Yii = Y[i+1];
  136. acc[i] += (Xi * Yi - Xii * Yii);
  137. acc[i+1]+= (Xii * Yi + Xi * Yii);
  138. }
  139. acc[_N] += X[_N] * Y[_N];
  140. X += N;
  141. Y += N;
  142. }
  143. SPECTRALMULACCUM_STOP();
  144. }
  145. #define OVERRIDE_WEIGHTED_SPECTRAL_MUL_CONJ
  146. void weighted_spectral_mul_conj(
  147. const float * restrict w,
  148. const float p,
  149. const float * restrict X,
  150. const float * restrict Y,
  151. float * restrict prod,
  152. int N
  153. )
  154. {
  155. register int i, j;
  156. register int _N;
  157. WEIGHTEDSPECTRALMULCONJ_START();
  158. prod[0] = p * w[0] * X[0] * Y[0];
  159. _N = N-1;
  160. for (i=1,j=1;i<_N;i+=2,j++)
  161. {
  162. register float W;
  163. register float Xi, Yi, Xii, Yii;
  164. Xi = X[i];
  165. Yi = Y[i];
  166. Xii = X[i+1];
  167. Yii = Y[i+1];
  168. W = p * w[j];
  169. prod[i] = W * (Xi * Yi + Xii * Yii);
  170. prod[i+1]= W * (Xi * Yii - Xii * Yi);
  171. }
  172. prod[_N] = p * w[j] * X[_N] * Y[_N];
  173. WEIGHTEDSPECTRALMULCONJ_STOP();
  174. }
  175. #define OVERRIDE_MDF_ADJUST_PROP
  176. void mdf_adjust_prop(
  177. const float * restrict W,
  178. int N,
  179. int M,
  180. float * restrict prop
  181. )
  182. {
  183. register int i, j;
  184. register float max_sum = 1;
  185. register float prop_sum = 1;
  186. MDFADJUSTPROP_START();
  187. for ( i=0 ; i<M ; ++i )
  188. {
  189. register float tmp = 1;
  190. register int k = i * N;
  191. register int l = k + N;
  192. register float propi;
  193. #if (TM_UNROLL && TM_UNROLL_MDFADJUSTPROP)
  194. #pragma TCS_unroll=4
  195. #pragma TCS_unrollexact=1
  196. #endif
  197. for ( j=k ; j<l ; ++j )
  198. {
  199. register float wi = W[j];
  200. tmp += wi * wi;
  201. }
  202. #if (TM_UNROLL && TM_UNROLL_MDFADJUSTPROP)
  203. #pragma TCS_unrollexact=0
  204. #pragma TCS_unroll=0
  205. #endif
  206. propi = spx_sqrt(tmp);
  207. prop[i]= propi;
  208. max_sum= fmux(propi > max_sum, propi, max_sum);
  209. }
  210. for ( i=0 ; i<M ; ++i )
  211. {
  212. register float propi = prop[i];
  213. propi += .1f * max_sum;
  214. prop_sum += propi;
  215. prop[i] = propi;
  216. }
  217. prop_sum = 0.99f / prop_sum;
  218. for ( i=0 ; i<M ; ++i )
  219. { prop[i] = prop_sum * prop[i];
  220. }
  221. MDFADJUSTPROP_STOP();
  222. }
  223. #define OVERRIDE_SPEEX_ECHO_GET_RESIDUAL
  224. void speex_echo_get_residual(
  225. SpeexEchoState * restrict st,
  226. float * restrict residual_echo,
  227. int len
  228. )
  229. {
  230. register int i;
  231. register float leak2, leake;
  232. register int N;
  233. register float * restrict window;
  234. register float * restrict last_y;
  235. register float * restrict y;
  236. SPEEXECHOGETRESIDUAL_START();
  237. window = st->window;
  238. last_y = st->last_y;
  239. y = st->y;
  240. N = st->window_size;
  241. #if (TM_UNROLL && TM_UNROLL_SPEEXECHOGETRESIDUAL)
  242. #pragma TCS_unroll=4
  243. #pragma TCS_unrollexact=1
  244. #endif
  245. for (i=0;i<N;i++)
  246. { y[i] = window[i] * last_y[i];
  247. }
  248. #if (TM_UNROLL && TM_UNROLL_SPEEXECHOGETRESIDUAL)
  249. #pragma TCS_unrollexact=0
  250. #pragma TCS_unroll=0
  251. #endif
  252. spx_fft(st->fft_table, st->y, st->Y);
  253. power_spectrum(st->Y, residual_echo, N);
  254. leake = st->leak_estimate;
  255. leak2 = fmux(leake > .5, 1, 2 * leake);
  256. N = st->frame_size;
  257. #if (TM_UNROLL && TM_UNROLL_SPEEXECHOGETRESIDUAL)
  258. #pragma TCS_unroll=4
  259. #pragma TCS_unrollexact=1
  260. #endif
  261. for ( i=0 ; i<N ; ++i )
  262. { residual_echo[i] *= leak2;
  263. }
  264. #if (TM_UNROLL && TM_UNROLL_SPEEXECHOGETRESIDUAL)
  265. #pragma TCS_unrollexact=0
  266. #pragma TCS_unroll=0
  267. #endif
  268. residual_echo[N] *= leak2;
  269. #ifndef NO_REMARK
  270. (void)len;
  271. #endif
  272. SPEEXECHOGETRESIDUAL_STOP();
  273. }
  274. #endif
  275. void mdf_preemph(
  276. SpeexEchoState * restrict st,
  277. spx_word16_t * restrict x,
  278. const spx_int16_t * restrict far_end,
  279. int framesize
  280. )
  281. {
  282. register spx_word16_t preemph = st->preemph;
  283. register spx_word16_t memX = st->memX;
  284. register spx_word16_t memD = st->memD;
  285. register spx_word16_t * restrict input = st->input;
  286. register int i;
  287. #ifdef FIXED_POINT
  288. register int saturated = st->saturated;
  289. #endif
  290. #if (TM_UNROLL && TM_UNROLL_SPEEXECHOCANCELLATION)
  291. #pragma TCS_unroll=4
  292. #pragma TCS_unrollexact=1
  293. #endif
  294. for ( i=0 ; i<framesize ; ++i )
  295. {
  296. register spx_int16_t far_endi = far_end[i];
  297. register spx_word32_t tmp32;
  298. register spx_word16_t inputi = input[i];
  299. tmp32 = SUB32(EXTEND32(far_endi), EXTEND32(MULT16_16_P15(preemph,memX)));
  300. #ifdef FIXED_POINT
  301. saturated = mux(iabs(tmp32) > 32767, M+1, saturated);
  302. tmp32 = iclipi(tmp32,32767);
  303. #endif
  304. x[i] = EXTRACT16(tmp32);
  305. memX = far_endi;
  306. tmp32 = SUB32(EXTEND32(inputi), EXTEND32(MULT16_16_P15(preemph, memD)));
  307. #ifdef FIXED_POINT
  308. saturated = mux( ((tmp32 > 32767) && (saturated == 0)), 1,
  309. mux( ((tmp32 <-32767) && (saturated == 0)), 1, saturated ));
  310. tmp32 = iclipi(tmp32,32767);
  311. #endif
  312. memD = inputi;
  313. input[i] = tmp32;
  314. }
  315. #if (TM_UNROLL && TM_UNROLL_SPEEXECHOCANCELLATION)
  316. #pragma TCS_unrollexact=0
  317. #pragma TCS_unroll=0
  318. #endif
  319. st->memD = memD;
  320. st->memX = memX;
  321. #ifdef FIXED_POINT
  322. st->saturated = saturated;
  323. #endif
  324. }
  325. void mdf_sub(
  326. spx_word16_t * restrict dest,
  327. const spx_word16_t * restrict src1,
  328. const spx_word16_t * restrict src2,
  329. int framesize
  330. )
  331. {
  332. register int i;
  333. #if (TM_UNROLL && TM_UNROLL_SPEEXECHOCANCELLATION)
  334. #pragma TCS_unroll=4
  335. #pragma TCS_unrollexact=1
  336. #endif
  337. #ifdef FIXED_POINT
  338. for ( i=0,framesize<<=1 ; i<framesize ; i+=4 )
  339. { register int src1i, src2i, desti;
  340. src1i = ld32d(src1,i);
  341. src2i = ld32d(src2,i);
  342. desti = dspidualsub(src1i,src2i);
  343. st32d(i, dest, desti);
  344. }
  345. #else
  346. for ( i=0 ; i<framesize ; ++i )
  347. { dest[i] = src1[i] - src2[i];
  348. }
  349. #endif
  350. #if (TM_UNROLL && TM_UNROLL_SPEEXECHOCANCELLATION)
  351. #pragma TCS_unrollexact=0
  352. #pragma TCS_unroll=0
  353. #endif
  354. }
  355. void mdf_sub_int(
  356. spx_word16_t * restrict dest,
  357. const spx_int16_t * restrict src1,
  358. const spx_int16_t * restrict src2,
  359. int framesize
  360. )
  361. {
  362. register int i, j;
  363. #if (TM_UNROLL && TM_UNROLL_SPEEXECHOCANCELLATION)
  364. #pragma TCS_unroll=4
  365. #pragma TCS_unrollexact=1
  366. #endif
  367. #ifdef FIXED_POINT
  368. for ( i=0,framesize<<=1 ; i<framesize ; i+=4 )
  369. { register int src1i, src2i, desti;
  370. src1i = ld32d(src1,i);
  371. src2i = ld32d(src2,i);
  372. desti = dspidualsub(src1i,src2i);
  373. st32d(i, dest, desti);
  374. }
  375. #else
  376. for ( i=0,j=0 ; i<framesize ; i+=2,++j )
  377. { register int src1i, src2i, desti;
  378. src1i = ld32d(src1,j);
  379. src2i = ld32d(src2,j);
  380. desti = dspidualsub(src1i,src2i);
  381. dest[i] = sex16(desti);
  382. dest[i+1] = asri(16,desti);
  383. }
  384. #endif
  385. #if (TM_UNROLL && TM_UNROLL_SPEEXECHOCANCELLATION)
  386. #pragma TCS_unrollexact=0
  387. #pragma TCS_unroll=0
  388. #endif
  389. }
  390. void mdf_compute_weight_gradient(
  391. SpeexEchoState * restrict st,
  392. spx_word16_t * restrict X,
  393. int N,
  394. int M
  395. )
  396. {
  397. register int i, j;
  398. register spx_word32_t * restrict PHI = st->PHI;
  399. for (j=M-1;j>=0;j--)
  400. {
  401. register spx_word32_t * restrict W = &(st->W[j*N]);
  402. weighted_spectral_mul_conj(
  403. st->power_1,
  404. FLOAT_SHL(PSEUDOFLOAT(st->prop[j]),-15),
  405. &X[(j+1)*N],
  406. st->E,
  407. st->PHI,
  408. N);
  409. #if (TM_UNROLL && TM_UNROLL_SPEEXECHOCANCELLATION)
  410. #pragma TCS_unroll=4
  411. #pragma TCS_unrollexact=1
  412. #endif
  413. for (i=0;i<N;i++)
  414. { W[i] = ADD32(W[i],PHI[i]);
  415. }
  416. #if (TM_UNROLL && TM_UNROLL_SPEEXECHOCANCELLATION)
  417. #pragma TCS_unrollexact=0
  418. #pragma TCS_unroll=0
  419. #endif
  420. }
  421. }
  422. void mdf_update_weight(
  423. SpeexEchoState * restrict st,
  424. int N,
  425. int M,
  426. int framesize
  427. )
  428. {
  429. register int j;
  430. register int cancel_count = st->cancel_count;
  431. register spx_word16_t * restrict wtmp = st->wtmp;
  432. #ifdef FIXED_POINT
  433. register spx_word16_t * restrict wtmp2 = st->wtmp2;
  434. register int i;
  435. #endif
  436. for ( j=0 ; j<M ; j++ )
  437. {
  438. register spx_word32_t * restrict W = &(st->W[j*N]);
  439. if (j==0 || cancel_count%(M-1) == j-1)
  440. {
  441. #ifdef FIXED_POINT
  442. #if (TM_UNROLL && TM_UNROLL_SPEEXECHOCANCELLATION)
  443. #pragma TCS_unroll=4
  444. #pragma TCS_unrollexact=1
  445. #endif
  446. for ( i=0 ; i<N ; i++ )
  447. wtmp2[i] = EXTRACT16(PSHR32(W[i],NORMALIZE_SCALEDOWN+16));
  448. #if (TM_UNROLL && TM_UNROLL_SPEEXECHOCANCELLATION)
  449. #pragma TCS_unrollexact=0
  450. #pragma TCS_unroll=0
  451. #endif
  452. spx_ifft(st->fft_table, wtmp2, wtmp);
  453. memset(wtmp, 0, framesize * sizeof(spx_word16_t));
  454. #if (TM_UNROLL && TM_UNROLL_SPEEXECHOCANCELLATION)
  455. #pragma TCS_unroll=4
  456. #pragma TCS_unrollexact=1
  457. #endif
  458. for (j=framesize; j<N ; ++j)
  459. { wtmp[j]=SHL16(wtmp[j],NORMALIZE_SCALEUP);
  460. }
  461. #if (TM_UNROLL && TM_UNROLL_SPEEXECHOCANCELLATION)
  462. #pragma TCS_unrollexact=0
  463. #pragma TCS_unroll=0
  464. #endif
  465. spx_fft(st->fft_table, wtmp, wtmp2);
  466. #if (TM_UNROLL && TM_UNROLL_SPEEXECHOCANCELLATION)
  467. #pragma TCS_unroll=4
  468. #pragma TCS_unrollexact=1
  469. #endif
  470. for (i=0;i<N;i++)
  471. { W[i] -= SHL32(EXTEND32(wtmp2[i]),16+NORMALIZE_SCALEDOWN-NORMALIZE_SCALEUP-1);
  472. }
  473. #if (TM_UNROLL && TM_UNROLL_SPEEXECHOCANCELLATION)
  474. #pragma TCS_unrollexact=0
  475. #pragma TCS_unroll=0
  476. #endif
  477. #else
  478. spx_ifft(st->fft_table, W, wtmp);
  479. memset(&wtmp[framesize], 0, (N-framesize) * sizeof(spx_word16_t));
  480. spx_fft(st->fft_table, wtmp, W);
  481. #endif
  482. }
  483. }
  484. }
  485. #ifdef TWO_PATH
  486. // first four parameters is passed by registers
  487. // generate faster performance with 4 parameters functions
  488. spx_word32_t mdf_update_foreground(
  489. SpeexEchoState * restrict st,
  490. spx_word32_t Dbf,
  491. spx_word32_t Sff,
  492. spx_word32_t See
  493. )
  494. {
  495. register spx_word32_t Davg1 = st->Davg1;
  496. register spx_word32_t Davg2 = st->Davg2;
  497. register spx_word32_t Dvar1 = st->Dvar1;
  498. register spx_word32_t Dvar2 = st->Dvar2;
  499. register spx_word16_t * restrict input = st->input;
  500. register int framesize = st->frame_size;
  501. register spx_word16_t * restrict xx = st->x + framesize;
  502. register spx_word16_t * restrict y = st->y + framesize;
  503. register spx_word16_t * restrict ee = st->e + framesize;
  504. register int update_foreground;
  505. register int i;
  506. register int N = st->window_size;
  507. register int M = st->M;
  508. #ifdef FIXED_POINT
  509. register spx_word32_t sc0 = SUB32(Sff,See);
  510. register spx_float_t sc1 = FLOAT_MUL32U(Sff,Dbf);
  511. Davg1 = ADD32(MULT16_32_Q15(QCONST16(.6f,15),Davg1), MULT16_32_Q15(QCONST16(.4f,15),sc0));
  512. Davg2 = ADD32(MULT16_32_Q15(QCONST16(.85f,15),Davg2), MULT16_32_Q15(QCONST16(.15f,15),sc0));
  513. Dvar1 = FLOAT_ADD(
  514. FLOAT_MULT(VAR1_SMOOTH,Dvar1),
  515. FLOAT_MUL32U(MULT16_32_Q15(QCONST16(.4f,15),Sff),
  516. MULT16_32_Q15(QCONST16(.4f,15),Dbf)));
  517. Dvar2 = FLOAT_ADD(
  518. FLOAT_MULT(VAR2_SMOOTH,Dvar2),
  519. FLOAT_MUL32U(MULT16_32_Q15(QCONST16(.15f,15),Sff),
  520. MULT16_32_Q15(QCONST16(.15f,15),Dbf)));
  521. #else
  522. register spx_word32_t sc0 = Sff - See;
  523. register spx_word32_t sc1 = Sff * Dbf;
  524. Davg1 = .6*Davg1 + .4*sc0;
  525. Davg2 = .85*Davg2 + .15*sc0;
  526. Dvar1 = VAR1_SMOOTH*Dvar1 + .16*sc1;
  527. Dvar2 = VAR2_SMOOTH*Dvar2 + .0225*sc1;
  528. #endif
  529. update_foreground =
  530. mux( FLOAT_GT(FLOAT_MUL32U(sc0, VABS(sc0)), sc1), 1,
  531. mux( FLOAT_GT(FLOAT_MUL32U(Davg1, VABS(Davg1)), FLOAT_MULT(VAR1_UPDATE,(Dvar1))), 1,
  532. mux( FLOAT_GT(FLOAT_MUL32U(Davg2, VABS(Davg2)), FLOAT_MULT(VAR2_UPDATE,(Dvar2))), 1, 0)));
  533. if ( update_foreground )
  534. {
  535. register spx_word16_t * restrict windowf = st->window + framesize;
  536. register spx_word16_t * restrict window = st->window;
  537. st->Davg1 = st->Davg2 = 0;
  538. st->Dvar1 = st->Dvar2 = FLOAT_ZERO;
  539. memcpy(st->foreground, st->W, N*M*sizeof(spx_word32_t));
  540. #if (TM_UNROLL && TM_UNROLL_SPEEXECHOCANCELLATION)
  541. #pragma TCS_unroll=4
  542. #pragma TCS_unrollexact=1
  543. #endif
  544. for ( i=0 ; i<framesize ; ++i)
  545. { register spx_word16_t wi = window[i];
  546. register spx_word16_t wfi = windowf[i];
  547. register spx_word16_t ei = ee[i];
  548. register spx_word16_t yi = y[i];
  549. ee[i] = MULT16_16_Q15(wfi,ei) + MULT16_16_Q15(wi,yi);
  550. }
  551. #if (TM_UNROLL && TM_UNROLL_SPEEXECHOCANCELLATION)
  552. #pragma TCS_unrollexact=0
  553. #pragma TCS_unroll=0
  554. #endif
  555. } else
  556. {
  557. register int reset_background;
  558. reset_background =
  559. mux( FLOAT_GT(FLOAT_MUL32U(-(sc0),VABS(sc0)), FLOAT_MULT(VAR_BACKTRACK,sc1)), 1,
  560. mux( FLOAT_GT(FLOAT_MUL32U(-(Davg1), VABS(Davg1)), FLOAT_MULT(VAR_BACKTRACK,Dvar1)), 1,
  561. mux( FLOAT_GT(FLOAT_MUL32U(-(Davg2), VABS(Davg2)), FLOAT_MULT(VAR_BACKTRACK,Dvar2)), 1, 0)));
  562. if ( reset_background )
  563. {
  564. memcpy(st->W, st->foreground, N*M*sizeof(spx_word32_t));
  565. memcpy(y, ee, framesize * sizeof(spx_word16_t));
  566. mdf_sub(xx,input,y,framesize);
  567. See = Sff;
  568. st->Davg1 = st->Davg2 = 0;
  569. st->Dvar1 = st->Dvar2 = FLOAT_ZERO;
  570. } else
  571. {
  572. st->Davg1 = Davg1;
  573. st->Davg2 = Davg2;
  574. st->Dvar1 = Dvar1;
  575. st->Dvar2 = Dvar2;
  576. }
  577. }
  578. return See;
  579. }
  580. #endif
  581. void mdf_compute_error_signal(
  582. SpeexEchoState * restrict st,
  583. const spx_int16_t * restrict in,
  584. spx_int16_t * restrict out,
  585. int framesize
  586. )
  587. {
  588. register spx_word16_t preemph = st->preemph;
  589. register spx_word16_t memE = st->memE;
  590. register int saturated = st->saturated;
  591. register spx_word16_t * restrict e = st->e;
  592. register spx_word16_t * restrict ee = st->e + framesize;
  593. register spx_word16_t * restrict input = st->input;
  594. register spx_word16_t * restrict xx = st->x + framesize;
  595. register int i;
  596. #if (TM_UNROLL && TM_UNROLL_SPEEXECHOCANCELLATION)
  597. #pragma TCS_unroll=4
  598. #pragma TCS_unrollexact=1
  599. #endif
  600. for ( i=0 ; i<framesize ; ++i )
  601. {
  602. register spx_word32_t tmp_out;
  603. register spx_int16_t ini = in[i];
  604. register int flg;
  605. #ifdef FIXED_POINT
  606. #ifdef TWO_PATH
  607. tmp_out = SUB32(EXTEND32(input[i]), EXTEND32(ee[i]));
  608. tmp_out = iclipi(tmp_out,32767);
  609. #else
  610. tmp_out = SUB32(EXTEND32(input[i]), EXTEND32(y[i]));
  611. tmp_out = iclipi(tmp_out,32767);
  612. #endif
  613. #else
  614. #ifdef TWO_PATH
  615. tmp_out = SUB32(EXTEND32(input[i]), EXTEND32(ee[i]));
  616. #else
  617. tmp_out = SUB32(EXTEND32(input[i]), EXTEND32(y[i]));
  618. #endif
  619. tmp_out =
  620. fmux( tmp_out > 32767, 32767,
  621. fmux( tmp_out < -32768, -32768, tmp_out));
  622. #endif
  623. tmp_out = ADD32(tmp_out, EXTEND32(MULT16_16_P15(preemph,memE)));
  624. flg = iabs(ini) >= 32000;
  625. tmp_out = VMUX( flg, 0, tmp_out);
  626. saturated = mux( flg && (saturated == 0), 1, saturated);
  627. out[i] = (spx_int16_t)tmp_out;
  628. memE = tmp_out;
  629. }
  630. #if (TM_UNROLL && TM_UNROLL_SPEEXECHOCANCELLATION)
  631. #pragma TCS_unrollexact=0
  632. #pragma TCS_unroll=0
  633. #endif
  634. st->memE = memE;
  635. st->saturated = saturated;
  636. memset(e, 0, framesize * sizeof(spx_word16_t));
  637. memcpy(ee, xx, framesize * sizeof(spx_word16_t));
  638. }
  639. inline int mdf_check(
  640. SpeexEchoState * restrict st,
  641. spx_int16_t * out,
  642. spx_word32_t Syy,
  643. spx_word32_t Sxx,
  644. spx_word32_t See,
  645. spx_word32_t Sff,
  646. spx_word32_t Sdd
  647. )
  648. {
  649. register int N = st->window_size;
  650. register spx_word32_t N1e9 = N * 1e9;
  651. register int screwed_up = st->screwed_up;
  652. register int framesize = st->frame_size;
  653. if (!(Syy>=0 && Sxx>=0 && See >= 0)
  654. #ifndef FIXED_POINT
  655. || !(Sff < N1e9 && Syy < N1e9 && Sxx < N1e9 )
  656. #endif
  657. )
  658. {
  659. screwed_up += 50;
  660. memset(out, 0, framesize * sizeof(spx_int16_t));
  661. } else
  662. { screwed_up = mux( SHR32(Sff, 2) > ADD32(Sdd, SHR32(MULT16_16(N, 10000),6)), screwed_up+1, 0);
  663. }
  664. st->screwed_up = screwed_up;
  665. return screwed_up;
  666. }
  667. void mdf_smooth(
  668. spx_word32_t * restrict power,
  669. spx_word32_t * restrict Xf,
  670. int framesize,
  671. int M
  672. )
  673. {
  674. register spx_word16_t ss, ss_1, pf, xff;
  675. register int j;
  676. #ifdef FIXED_POINT
  677. ss=DIV32_16(11469,M);
  678. ss_1 = SUB16(32767,ss);
  679. #else
  680. ss=.35/M;
  681. ss_1 = 1-ss;
  682. #endif
  683. #if (TM_UNROLL && TM_UNROLL_SPEEXECHOCANCELLATION)
  684. #pragma TCS_unroll=4
  685. #pragma TCS_unrollexact=1
  686. #endif
  687. for ( j=0 ; j<framesize ; ++j )
  688. { register spx_word32_t pi = power[j];
  689. register spx_word32_t xfi = Xf[j];
  690. power[j] = MULT16_32_Q15(ss_1,pi) + 1 + MULT16_32_Q15(ss,xfi);
  691. }
  692. #if (TM_UNROLL && TM_UNROLL_SPEEXECHOCANCELLATION)
  693. #pragma TCS_unrollexact=0
  694. #pragma TCS_unroll=0
  695. #endif
  696. pf = power[framesize];
  697. xff = Xf[framesize];
  698. power[framesize] = MULT16_32_Q15(ss_1,pf) + 1 + MULT16_32_Q15(ss,xff);
  699. }
  700. void mdf_compute_filtered_spectra_crosscorrelations(
  701. SpeexEchoState * restrict st,
  702. spx_word32_t Syy,
  703. spx_word32_t See,
  704. int framesize
  705. )
  706. {
  707. register spx_float_t Pey = FLOAT_ONE;
  708. register spx_float_t Pyy = FLOAT_ONE;
  709. register spx_word16_t spec_average = st->spec_average;
  710. register spx_word32_t * restrict pRf = st->Rf;
  711. register spx_word32_t * restrict pYf = st->Yf;
  712. register spx_word32_t * restrict pEh = st->Eh;
  713. register spx_word32_t * restrict pYh = st->Yh;
  714. register spx_word16_t beta0 = st->beta0;
  715. register spx_word16_t beta_max = st->beta_max;
  716. register spx_float_t alpha, alpha_1;
  717. register spx_word32_t tmp32, tmpx;
  718. register spx_float_t sPey = st->Pey;
  719. register spx_float_t sPyy = st->Pyy;
  720. register spx_float_t tmp;
  721. register spx_word16_t leak_estimate;
  722. register int j;
  723. register spx_float_t Eh, Yh;
  724. register spx_word32_t _Ehj, _Rfj, _Yfj, _Yhj;
  725. #ifdef FIXED_POINT
  726. register spx_word16_t spec_average1 = SUB16(32767,spec_average);
  727. #else
  728. register spx_word16_t spec_average1 = 1 - spec_average;
  729. #endif
  730. #if (TM_UNROLL && TM_UNROLL_SPEEXECHOCANCELLATION)
  731. #pragma TCS_unroll=4
  732. #pragma TCS_unrollexact=1
  733. #endif
  734. for (j=framesize; j>0 ; --j)
  735. {
  736. _Ehj = pEh[j];
  737. _Rfj = pRf[j];
  738. _Yfj = pYf[j];
  739. _Yhj = pYh[j];
  740. Eh = PSEUDOFLOAT(_Rfj - _Ehj);
  741. Yh = PSEUDOFLOAT(_Yfj - _Yhj);
  742. Pey = FLOAT_ADD(Pey,FLOAT_MULT(Eh,Yh));
  743. Pyy = FLOAT_ADD(Pyy,FLOAT_MULT(Yh,Yh));
  744. pEh[j] = MAC16_32_Q15(MULT16_32_Q15(spec_average1, _Ehj), spec_average, _Rfj);
  745. pYh[j] = MAC16_32_Q15(MULT16_32_Q15(spec_average1, _Yhj), spec_average, _Yfj);
  746. }
  747. #if (TM_UNROLL && TM_UNROLL_SPEEXECHOCANCELLATION)
  748. #pragma TCS_unrollexact=0
  749. #pragma TCS_unroll=0
  750. #endif
  751. _Ehj = pEh[0];
  752. _Rfj = pRf[0];
  753. _Yfj = pYf[0];
  754. _Yhj = pYh[0];
  755. Eh = PSEUDOFLOAT(_Rfj - _Ehj);
  756. Yh = PSEUDOFLOAT(_Yfj - _Yhj);
  757. Pey = FLOAT_ADD(Pey,FLOAT_MULT(Eh,Yh));
  758. Pyy = FLOAT_ADD(Pyy,FLOAT_MULT(Yh,Yh));
  759. pEh[0] = MAC16_32_Q15(MULT16_32_Q15(spec_average1, _Ehj), spec_average, _Rfj);
  760. pYh[0] = MAC16_32_Q15(MULT16_32_Q15(spec_average1, _Yhj), spec_average, _Yfj);
  761. Pyy = FLOAT_SQRT(Pyy);
  762. Pey = FLOAT_DIVU(Pey,Pyy);
  763. tmp32 = MULT16_32_Q15(beta0,Syy);
  764. tmpx = MULT16_32_Q15(beta_max,See);
  765. tmp32 = VMUX(tmp32 > tmpx, tmpx, tmp32);
  766. alpha = FLOAT_DIV32(tmp32, See);
  767. alpha_1 = FLOAT_SUB(FLOAT_ONE, alpha);
  768. sPey = FLOAT_ADD(FLOAT_MULT(alpha_1,sPey) , FLOAT_MULT(alpha,Pey));
  769. sPyy = FLOAT_ADD(FLOAT_MULT(alpha_1,sPyy) , FLOAT_MULT(alpha,Pyy));
  770. tmp = FLOAT_MULT(MIN_LEAK,sPyy);
  771. #ifndef FIXED_POINT
  772. sPyy = VMUX(FLOAT_LT(sPyy, FLOAT_ONE), FLOAT_ONE, sPyy);
  773. sPey = VMUX(FLOAT_LT(sPey, tmp), tmp, sPey);
  774. sPey = VMUX(FLOAT_LT(sPey, sPyy), sPey, sPyy);
  775. #else
  776. sPyy = FLOAT_LT(sPyy, FLOAT_ONE) ? FLOAT_ONE : sPyy;
  777. sPey = FLOAT_LT(sPey, tmp) ? tmp : sPey;
  778. sPey = FLOAT_LT(sPey, sPyy) ? sPey : sPyy;
  779. #endif
  780. leak_estimate = FLOAT_EXTRACT16(FLOAT_SHL(FLOAT_DIVU(sPey, sPyy),14));
  781. leak_estimate = VMUX( leak_estimate > 16383, 32767, SHL16(leak_estimate,1));
  782. st->Pey = sPey;
  783. st->Pyy = sPyy;
  784. st->leak_estimate = leak_estimate;
  785. }
  786. inline spx_word16_t mdf_compute_RER(
  787. spx_word32_t See,
  788. spx_word32_t Syy,
  789. spx_word32_t Sey,
  790. spx_word32_t Sxx,
  791. spx_word16_t leake
  792. )
  793. {
  794. register spx_word16_t RER;
  795. #ifdef FIXED_POINT
  796. register spx_word32_t tmp32;
  797. register spx_word32_t tmp;
  798. spx_float_t bound = PSEUDOFLOAT(Sey);
  799. tmp32 = MULT16_32_Q15(leake,Syy);
  800. tmp32 = ADD32(SHR32(Sxx,13), ADD32(tmp32, SHL32(tmp32,1)));
  801. bound = FLOAT_DIVU(FLOAT_MULT(bound, bound), PSEUDOFLOAT(ADD32(1,Syy)));
  802. tmp = FLOAT_EXTRACT32(bound);
  803. tmp32 = imux( FLOAT_GT(bound, PSEUDOFLOAT(See)), See,
  804. imux( tmp32 < tmp, tmp, tmp32));
  805. tmp = SHR32(See,1);
  806. tmp32 = imux(tmp32 > tmp, tmp, tmp32);
  807. RER = FLOAT_EXTRACT16(FLOAT_SHL(FLOAT_DIV32(tmp32,See),15));
  808. #else
  809. register spx_word32_t r0;
  810. r0 = (Sey * Sey)/(1 + See * Syy);
  811. RER = (.0001*Sxx + 3.* MULT16_32_Q15(leake,Syy)) / See;
  812. RER = fmux( RER < r0, r0, RER);
  813. RER = fmux( RER > .5, .5, RER);
  814. #endif
  815. return RER;
  816. }
  817. void mdf_adapt(
  818. SpeexEchoState * restrict st,
  819. spx_word16_t RER,
  820. spx_word32_t Syy,
  821. spx_word32_t See,
  822. spx_word32_t Sxx
  823. )
  824. {
  825. register spx_float_t * restrict power_1 = st->power_1;
  826. register spx_word32_t * restrict power = st->power;
  827. register int adapted = st->adapted;
  828. register spx_word32_t sum_adapt = st->sum_adapt;
  829. register spx_word16_t leake = st->leak_estimate;
  830. register int framesize = st->frame_size;
  831. register int i;
  832. register int M = st->M;
  833. adapted = mux( !adapted && sum_adapt > QCONST32(M,15) &&
  834. MULT16_32_Q15(leake,Syy) > MULT16_32_Q15(QCONST16(.03f,15),Syy), 1, adapted);
  835. if ( adapted )
  836. { register spx_word32_t * restrict Yf = st->Yf;
  837. register spx_word32_t * restrict Rf = st->Rf;
  838. register spx_word32_t r, e, e2;
  839. #if (TM_UNROLL && TM_UNROLL_SPEEXECHOCANCELLATION)
  840. #pragma TCS_unroll=4
  841. #pragma TCS_unrollexact=1
  842. #endif
  843. for ( i=0 ; i<framesize ; ++i )
  844. {
  845. r = SHL32(Yf[i],3);
  846. r = MULT16_32_Q15(leake,r);
  847. e = SHL32(Rf[i],3)+1;
  848. #ifdef FIXED_POINT
  849. e2 = SHR32(e,1);
  850. r = mux( r > e2, e2, r);
  851. #else
  852. e2 = e * .5;
  853. r = fmux( r > e2, e2, r);
  854. #endif
  855. r = MULT16_32_Q15(QCONST16(.7,15),r) +
  856. MULT16_32_Q15(QCONST16(.3,15),(spx_word32_t)(MULT16_32_Q15(RER,e)));
  857. power_1[i] = FLOAT_SHL(FLOAT_DIV32_FLOAT(r,FLOAT_MUL32U(e,power[i]+10)),WEIGHT_SHIFT+16);
  858. }
  859. #if (TM_UNROLL && TM_UNROLL_SPEEXECHOCANCELLATION)
  860. #pragma TCS_unrollexact=0
  861. #pragma TCS_unroll=0
  862. #endif
  863. r = SHL32(Yf[framesize],3);
  864. r = MULT16_32_Q15(leake,r);
  865. e = SHL32(Rf[framesize],3)+1;
  866. #ifdef FIXED_POINT
  867. e2 = SHR32(e,1);
  868. r = mux( r > e2, e2, r);
  869. #else
  870. e2 = e * .5;
  871. r = fmux( r > e2, e2, r);
  872. #endif
  873. r = MULT16_32_Q15(QCONST16(.7,15),r) +
  874. MULT16_32_Q15(QCONST16(.3,15),(spx_word32_t)(MULT16_32_Q15(RER,e)));
  875. power_1[framesize] = FLOAT_SHL(FLOAT_DIV32_FLOAT(r,FLOAT_MUL32U(e,power[framesize]+10)),WEIGHT_SHIFT+16);
  876. } else
  877. {
  878. register spx_word16_t adapt_rate=0;
  879. register int N = st->window_size;
  880. if ( Sxx > SHR32(MULT16_16(N, 1000),6) )
  881. { register spx_word32_t tmp32, tmp32q;
  882. tmp32 = MULT16_32_Q15(QCONST16(.25f, 15), Sxx);
  883. #ifdef FIXED_POINT
  884. tmp32q = SHR32(See,2);
  885. tmp32 = mux(tmp32 > tmp32q, tmp32q, tmp32);
  886. #else
  887. tmp32q = 0.25 * See;
  888. tmp32 = fmux(tmp32 > tmp32q, tmp32q, tmp32);
  889. #endif
  890. adapt_rate = FLOAT_EXTRACT16(FLOAT_SHL(FLOAT_DIV32(tmp32, See),15));
  891. }
  892. #if (TM_UNROLL && TM_UNROLL_SPEEXECHOCANCELLATION)
  893. #pragma TCS_unroll=4
  894. #pragma TCS_unrollexact=1
  895. #endif
  896. for (i=0;i<framesize;i++)
  897. power_1[i] = FLOAT_SHL(FLOAT_DIV32(EXTEND32(adapt_rate),ADD32(power[i],10)),WEIGHT_SHIFT+1);
  898. #if (TM_UNROLL && TM_UNROLL_SPEEXECHOCANCELLATION)
  899. #pragma TCS_unrollexact=0
  900. #pragma TCS_unroll=0
  901. #endif
  902. power_1[framesize] = FLOAT_SHL(FLOAT_DIV32(EXTEND32(adapt_rate),ADD32(power[framesize],10)),WEIGHT_SHIFT+1);
  903. sum_adapt = ADD32(sum_adapt,adapt_rate);
  904. }
  905. st->sum_adapt = sum_adapt;
  906. st->adapted = adapted;
  907. }
  908. #define OVERRIDE_ECHO_CANCELLATION
  909. void speex_echo_cancellation(
  910. SpeexEchoState * restrict st,
  911. const spx_int16_t * restrict in,
  912. const spx_int16_t * restrict far_end,
  913. spx_int16_t * restrict out
  914. )
  915. {
  916. register int framesize = st->frame_size;
  917. register spx_word16_t * restrict x = st->x;
  918. register spx_word16_t * restrict xx = st->x + framesize;
  919. register spx_word16_t * restrict yy = st->y + framesize;
  920. register spx_word16_t * restrict ee = st->e + framesize;
  921. register spx_word32_t Syy, See, Sxx, Sdd, Sff;
  922. register spx_word16_t RER;
  923. register spx_word32_t Sey;
  924. register int j;
  925. register int N,M;
  926. #ifdef TWO_PATH
  927. register spx_word32_t Dbf;
  928. #endif
  929. N = st->window_size;
  930. M = st->M;
  931. st->cancel_count++;
  932. filter_dc_notch16(in, st->notch_radius, st->input, framesize, st->notch_mem);
  933. mdf_preemph(st, xx, far_end, framesize);
  934. {
  935. register spx_word16_t * restrict X = st->X;
  936. for ( j=M-1 ; j>=0 ; j-- )
  937. { register spx_word16_t * restrict Xdes = &(X[(j+1)*N]);
  938. register spx_word16_t * restrict Xsrc = &(X[j*N]);
  939. memcpy(Xdes, Xsrc, N * sizeof(spx_word16_t));
  940. }
  941. spx_fft(st->fft_table, x, X);
  942. memcpy(st->last_y, st->x, N * sizeof(spx_word16_t));
  943. Sxx = mdf_inner_prod(xx, xx, framesize);
  944. memcpy(x, xx, framesize * sizeof(spx_word16_t));
  945. #ifdef TWO_PATH
  946. spectral_mul_accum(st->X, st->foreground, st->Y, N, M);
  947. spx_ifft(st->fft_table, st->Y, st->e);
  948. mdf_sub(xx, st->input, ee, framesize);
  949. Sff = mdf_inner_prod(xx, xx, framesize);
  950. #endif
  951. mdf_adjust_prop (st->W, N, M, st->prop);
  952. if (st->saturated == 0)
  953. { mdf_compute_weight_gradient(st, X, N, M);
  954. } else
  955. { st->saturated--;
  956. }
  957. }
  958. mdf_update_weight(st, N, M, framesize);
  959. spectral_mul_accum(st->X, st->W, st->Y, N, M);
  960. spx_ifft(st->fft_table, st->Y, st->y);
  961. #ifdef TWO_PATH
  962. mdf_sub(xx, ee, yy, framesize);
  963. Dbf = 10+mdf_inner_prod(xx, xx, framesize);
  964. #endif
  965. mdf_sub(xx, st->input, yy, framesize);
  966. See = mdf_inner_prod(xx, xx, framesize);
  967. #ifndef TWO_PATH
  968. Sff = See;
  969. #else
  970. See = mdf_update_foreground(st,Dbf,Sff,See);
  971. #endif
  972. mdf_compute_error_signal(st, in, out, framesize);
  973. Sey = mdf_inner_prod(ee, yy, framesize);
  974. Syy = mdf_inner_prod(yy, yy, framesize);
  975. Sdd = mdf_inner_prod(st->input, st->input, framesize);
  976. if ( mdf_check(st,out,Syy,Sxx,See,Sff,Sdd) >= 50 )
  977. { speex_warning("The echo canceller started acting funny and got slapped (reset). It swears it will behave now.");
  978. speex_echo_state_reset(st);
  979. return;
  980. }
  981. See = MAX32(See, SHR32(MULT16_16(N, 100),6));
  982. spx_fft(st->fft_table, st->e, st->E);
  983. memset(st->y, 0, framesize * sizeof(spx_word16_t));
  984. spx_fft(st->fft_table, st->y, st->Y);
  985. power_spectrum(st->E, st->Rf, N);
  986. power_spectrum(st->Y, st->Yf, N);
  987. power_spectrum(st->X, st->Xf, N);
  988. mdf_smooth(st->power,st->Xf,framesize, M);
  989. mdf_compute_filtered_spectra_crosscorrelations(st,Syy,See,framesize);
  990. RER = mdf_compute_RER(See,Syy,Sey,Sxx,st->leak_estimate);
  991. mdf_adapt(st, RER, Syy, See, Sxx);
  992. if ( st->adapted )
  993. { register spx_word16_t * restrict last_yy = st->last_y + framesize;
  994. memcpy(st->last_y, last_yy, framesize * sizeof(spx_word16_t));
  995. mdf_sub_int(last_yy, in, out, framesize);
  996. }
  997. }