You can not select more than 25 topics Topics must start with a letter or number, can include dashes ('-') and can be up to 35 characters long.

625 lines
27KB

  1. /*
  2. * Copyright (c) 2012
  3. * MIPS Technologies, Inc., California.
  4. *
  5. * Redistribution and use in source and binary forms, with or without
  6. * modification, are permitted provided that the following conditions
  7. * are met:
  8. * 1. Redistributions of source code must retain the above copyright
  9. * notice, this list of conditions and the following disclaimer.
  10. * 2. Redistributions in binary form must reproduce the above copyright
  11. * notice, this list of conditions and the following disclaimer in the
  12. * documentation and/or other materials provided with the distribution.
  13. * 3. Neither the name of the MIPS Technologies, Inc., nor the names of its
  14. * contributors may be used to endorse or promote products derived from
  15. * this software without specific prior written permission.
  16. *
  17. * THIS SOFTWARE IS PROVIDED BY THE MIPS TECHNOLOGIES, INC. ``AS IS'' AND
  18. * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
  19. * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
  20. * ARE DISCLAIMED. IN NO EVENT SHALL THE MIPS TECHNOLOGIES, INC. BE LIABLE
  21. * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
  22. * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
  23. * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
  24. * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
  25. * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
  26. * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
  27. * SUCH DAMAGE.
  28. *
  29. * Authors: Djordje Pesut (djordje@mips.com)
  30. * Mirjana Vulin (mvulin@mips.com)
  31. *
  32. * This file is part of FFmpeg.
  33. *
  34. * FFmpeg is free software; you can redistribute it and/or
  35. * modify it under the terms of the GNU Lesser General Public
  36. * License as published by the Free Software Foundation; either
  37. * version 2.1 of the License, or (at your option) any later version.
  38. *
  39. * FFmpeg is distributed in the hope that it will be useful,
  40. * but WITHOUT ANY WARRANTY; without even the implied warranty of
  41. * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
  42. * Lesser General Public License for more details.
  43. *
  44. * You should have received a copy of the GNU Lesser General Public
  45. * License along with FFmpeg; if not, write to the Free Software
  46. * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
  47. */
  48. /**
  49. * @file
  50. * Reference: libavcodec/aacsbr.c
  51. */
  52. #include "libavcodec/aac.h"
  53. #include "libavcodec/aacsbr.h"
  54. #include "libavutil/mem_internal.h"
  55. #include "libavutil/mips/asmdefs.h"
  56. #define ENVELOPE_ADJUSTMENT_OFFSET 2
  57. #if HAVE_INLINE_ASM
  58. #if HAVE_MIPSFPU
  59. static int sbr_lf_gen_mips(AACContext *ac, SpectralBandReplication *sbr,
  60. float X_low[32][40][2], const float W[2][32][32][2],
  61. int buf_idx)
  62. {
  63. int i, k;
  64. int temp0, temp1, temp2, temp3, temp4, temp5, temp6, temp7;
  65. float *p_x_low = &X_low[0][8][0];
  66. float *p_w = (float*)&W[buf_idx][0][0][0];
  67. float *p_x1_low = &X_low[0][0][0];
  68. float *p_w1 = (float*)&W[1-buf_idx][24][0][0];
  69. float *loop_end=p_x1_low + 2560;
  70. /* loop unrolled 8 times */
  71. __asm__ volatile (
  72. "1: \n\t"
  73. "sw $0, 0(%[p_x1_low]) \n\t"
  74. "sw $0, 4(%[p_x1_low]) \n\t"
  75. "sw $0, 8(%[p_x1_low]) \n\t"
  76. "sw $0, 12(%[p_x1_low]) \n\t"
  77. "sw $0, 16(%[p_x1_low]) \n\t"
  78. "sw $0, 20(%[p_x1_low]) \n\t"
  79. "sw $0, 24(%[p_x1_low]) \n\t"
  80. "sw $0, 28(%[p_x1_low]) \n\t"
  81. PTR_ADDIU "%[p_x1_low],%[p_x1_low], 32 \n\t"
  82. "bne %[p_x1_low], %[loop_end], 1b \n\t"
  83. PTR_ADDIU "%[p_x1_low],%[p_x1_low], -10240 \n\t"
  84. : [p_x1_low]"+r"(p_x1_low)
  85. : [loop_end]"r"(loop_end)
  86. : "memory"
  87. );
  88. for (k = 0; k < sbr->kx[1]; k++) {
  89. for (i = 0; i < 32; i+=4) {
  90. /* loop unrolled 4 times */
  91. __asm__ volatile (
  92. "lw %[temp0], 0(%[p_w]) \n\t"
  93. "lw %[temp1], 4(%[p_w]) \n\t"
  94. "lw %[temp2], 256(%[p_w]) \n\t"
  95. "lw %[temp3], 260(%[p_w]) \n\t"
  96. "lw %[temp4], 512(%[p_w]) \n\t"
  97. "lw %[temp5], 516(%[p_w]) \n\t"
  98. "lw %[temp6], 768(%[p_w]) \n\t"
  99. "lw %[temp7], 772(%[p_w]) \n\t"
  100. "sw %[temp0], 0(%[p_x_low]) \n\t"
  101. "sw %[temp1], 4(%[p_x_low]) \n\t"
  102. "sw %[temp2], 8(%[p_x_low]) \n\t"
  103. "sw %[temp3], 12(%[p_x_low]) \n\t"
  104. "sw %[temp4], 16(%[p_x_low]) \n\t"
  105. "sw %[temp5], 20(%[p_x_low]) \n\t"
  106. "sw %[temp6], 24(%[p_x_low]) \n\t"
  107. "sw %[temp7], 28(%[p_x_low]) \n\t"
  108. PTR_ADDIU "%[p_x_low], %[p_x_low], 32 \n\t"
  109. PTR_ADDIU "%[p_w], %[p_w], 1024 \n\t"
  110. : [temp0]"=&r"(temp0), [temp1]"=&r"(temp1),
  111. [temp2]"=&r"(temp2), [temp3]"=&r"(temp3),
  112. [temp4]"=&r"(temp4), [temp5]"=&r"(temp5),
  113. [temp6]"=&r"(temp6), [temp7]"=&r"(temp7),
  114. [p_w]"+r"(p_w), [p_x_low]"+r"(p_x_low)
  115. :
  116. : "memory"
  117. );
  118. }
  119. p_x_low += 16;
  120. p_w -= 2046;
  121. }
  122. for (k = 0; k < sbr->kx[0]; k++) {
  123. for (i = 0; i < 2; i++) {
  124. /* loop unrolled 4 times */
  125. __asm__ volatile (
  126. "lw %[temp0], 0(%[p_w1]) \n\t"
  127. "lw %[temp1], 4(%[p_w1]) \n\t"
  128. "lw %[temp2], 256(%[p_w1]) \n\t"
  129. "lw %[temp3], 260(%[p_w1]) \n\t"
  130. "lw %[temp4], 512(%[p_w1]) \n\t"
  131. "lw %[temp5], 516(%[p_w1]) \n\t"
  132. "lw %[temp6], 768(%[p_w1]) \n\t"
  133. "lw %[temp7], 772(%[p_w1]) \n\t"
  134. "sw %[temp0], 0(%[p_x1_low]) \n\t"
  135. "sw %[temp1], 4(%[p_x1_low]) \n\t"
  136. "sw %[temp2], 8(%[p_x1_low]) \n\t"
  137. "sw %[temp3], 12(%[p_x1_low]) \n\t"
  138. "sw %[temp4], 16(%[p_x1_low]) \n\t"
  139. "sw %[temp5], 20(%[p_x1_low]) \n\t"
  140. "sw %[temp6], 24(%[p_x1_low]) \n\t"
  141. "sw %[temp7], 28(%[p_x1_low]) \n\t"
  142. PTR_ADDIU "%[p_x1_low], %[p_x1_low], 32 \n\t"
  143. PTR_ADDIU "%[p_w1], %[p_w1], 1024 \n\t"
  144. : [temp0]"=&r"(temp0), [temp1]"=&r"(temp1),
  145. [temp2]"=&r"(temp2), [temp3]"=&r"(temp3),
  146. [temp4]"=&r"(temp4), [temp5]"=&r"(temp5),
  147. [temp6]"=&r"(temp6), [temp7]"=&r"(temp7),
  148. [p_w1]"+r"(p_w1), [p_x1_low]"+r"(p_x1_low)
  149. :
  150. : "memory"
  151. );
  152. }
  153. p_x1_low += 64;
  154. p_w1 -= 510;
  155. }
  156. return 0;
  157. }
  158. static int sbr_x_gen_mips(SpectralBandReplication *sbr, float X[2][38][64],
  159. const float Y0[38][64][2], const float Y1[38][64][2],
  160. const float X_low[32][40][2], int ch)
  161. {
  162. int k, i;
  163. const int i_f = 32;
  164. int temp0, temp1, temp2, temp3;
  165. const float *X_low1, *Y01, *Y11;
  166. float *x1=&X[0][0][0];
  167. float *j=x1+4864;
  168. const int i_Temp = FFMAX(2*sbr->data[ch].t_env_num_env_old - i_f, 0);
  169. /* loop unrolled 8 times */
  170. __asm__ volatile (
  171. "1: \n\t"
  172. "sw $0, 0(%[x1]) \n\t"
  173. "sw $0, 4(%[x1]) \n\t"
  174. "sw $0, 8(%[x1]) \n\t"
  175. "sw $0, 12(%[x1]) \n\t"
  176. "sw $0, 16(%[x1]) \n\t"
  177. "sw $0, 20(%[x1]) \n\t"
  178. "sw $0, 24(%[x1]) \n\t"
  179. "sw $0, 28(%[x1]) \n\t"
  180. PTR_ADDIU "%[x1],%[x1], 32 \n\t"
  181. "bne %[x1], %[j], 1b \n\t"
  182. PTR_ADDIU "%[x1],%[x1], -19456 \n\t"
  183. : [x1]"+r"(x1)
  184. : [j]"r"(j)
  185. : "memory"
  186. );
  187. if (i_Temp != 0) {
  188. X_low1=&X_low[0][2][0];
  189. for (k = 0; k < sbr->kx[0]; k++) {
  190. __asm__ volatile (
  191. "move %[i], $zero \n\t"
  192. "2: \n\t"
  193. "lw %[temp0], 0(%[X_low1]) \n\t"
  194. "lw %[temp1], 4(%[X_low1]) \n\t"
  195. "sw %[temp0], 0(%[x1]) \n\t"
  196. "sw %[temp1], 9728(%[x1]) \n\t"
  197. PTR_ADDIU "%[x1], %[x1], 256 \n\t"
  198. PTR_ADDIU "%[X_low1], %[X_low1], 8 \n\t"
  199. "addiu %[i], %[i], 1 \n\t"
  200. "bne %[i], %[i_Temp], 2b \n\t"
  201. : [x1]"+r"(x1), [X_low1]"+r"(X_low1), [i]"=&r"(i),
  202. [temp0]"=&r"(temp0), [temp1]"=&r"(temp1)
  203. : [i_Temp]"r"(i_Temp)
  204. : "memory"
  205. );
  206. x1-=(i_Temp<<6)-1;
  207. X_low1-=(i_Temp<<1)-80;
  208. }
  209. x1=&X[0][0][k];
  210. Y01=(float*)&Y0[32][k][0];
  211. for (; k < sbr->kx[0] + sbr->m[0]; k++) {
  212. __asm__ volatile (
  213. "move %[i], $zero \n\t"
  214. "3: \n\t"
  215. "lw %[temp0], 0(%[Y01]) \n\t"
  216. "lw %[temp1], 4(%[Y01]) \n\t"
  217. "sw %[temp0], 0(%[x1]) \n\t"
  218. "sw %[temp1], 9728(%[x1]) \n\t"
  219. PTR_ADDIU "%[x1], %[x1], 256 \n\t"
  220. PTR_ADDIU "%[Y01], %[Y01], 512 \n\t"
  221. "addiu %[i], %[i], 1 \n\t"
  222. "bne %[i], %[i_Temp], 3b \n\t"
  223. : [x1]"+r"(x1), [Y01]"+r"(Y01), [i]"=&r"(i),
  224. [temp0]"=&r"(temp0), [temp1]"=&r"(temp1)
  225. : [i_Temp]"r"(i_Temp)
  226. : "memory"
  227. );
  228. x1 -=(i_Temp<<6)-1;
  229. Y01 -=(i_Temp<<7)-2;
  230. }
  231. }
  232. x1=&X[0][i_Temp][0];
  233. X_low1=&X_low[0][i_Temp+2][0];
  234. temp3=38;
  235. for (k = 0; k < sbr->kx[1]; k++) {
  236. __asm__ volatile (
  237. "move %[i], %[i_Temp] \n\t"
  238. "4: \n\t"
  239. "lw %[temp0], 0(%[X_low1]) \n\t"
  240. "lw %[temp1], 4(%[X_low1]) \n\t"
  241. "sw %[temp0], 0(%[x1]) \n\t"
  242. "sw %[temp1], 9728(%[x1]) \n\t"
  243. PTR_ADDIU "%[x1], %[x1], 256 \n\t"
  244. PTR_ADDIU "%[X_low1],%[X_low1], 8 \n\t"
  245. "addiu %[i], %[i], 1 \n\t"
  246. "bne %[i], %[temp3], 4b \n\t"
  247. : [x1]"+r"(x1), [X_low1]"+r"(X_low1), [i]"=&r"(i),
  248. [temp0]"=&r"(temp0), [temp1]"=&r"(temp1),
  249. [temp2]"=&r"(temp2)
  250. : [i_Temp]"r"(i_Temp), [temp3]"r"(temp3)
  251. : "memory"
  252. );
  253. x1 -= ((38-i_Temp)<<6)-1;
  254. X_low1 -= ((38-i_Temp)<<1)- 80;
  255. }
  256. x1=&X[0][i_Temp][k];
  257. Y11=&Y1[i_Temp][k][0];
  258. temp2=32;
  259. for (; k < sbr->kx[1] + sbr->m[1]; k++) {
  260. __asm__ volatile (
  261. "move %[i], %[i_Temp] \n\t"
  262. "5: \n\t"
  263. "lw %[temp0], 0(%[Y11]) \n\t"
  264. "lw %[temp1], 4(%[Y11]) \n\t"
  265. "sw %[temp0], 0(%[x1]) \n\t"
  266. "sw %[temp1], 9728(%[x1]) \n\t"
  267. PTR_ADDIU "%[x1], %[x1], 256 \n\t"
  268. PTR_ADDIU "%[Y11], %[Y11], 512 \n\t"
  269. "addiu %[i], %[i], 1 \n\t"
  270. "bne %[i], %[temp2], 5b \n\t"
  271. : [x1]"+r"(x1), [Y11]"+r"(Y11), [i]"=&r"(i),
  272. [temp0]"=&r"(temp0), [temp1]"=&r"(temp1)
  273. : [i_Temp]"r"(i_Temp), [temp3]"r"(temp3),
  274. [temp2]"r"(temp2)
  275. : "memory"
  276. );
  277. x1 -= ((32-i_Temp)<<6)-1;
  278. Y11 -= ((32-i_Temp)<<7)-2;
  279. }
  280. return 0;
  281. }
  282. #if !HAVE_MIPS32R6 && !HAVE_MIPS64R6
  283. static void sbr_hf_assemble_mips(float Y1[38][64][2],
  284. const float X_high[64][40][2],
  285. SpectralBandReplication *sbr, SBRData *ch_data,
  286. const int e_a[2])
  287. {
  288. int e, i, j, m;
  289. const int h_SL = 4 * !sbr->bs_smoothing_mode;
  290. const int kx = sbr->kx[1];
  291. const int m_max = sbr->m[1];
  292. static const float h_smooth[5] = {
  293. 0.33333333333333,
  294. 0.30150283239582,
  295. 0.21816949906249,
  296. 0.11516383427084,
  297. 0.03183050093751,
  298. };
  299. float (*g_temp)[48] = ch_data->g_temp, (*q_temp)[48] = ch_data->q_temp;
  300. int indexnoise = ch_data->f_indexnoise;
  301. int indexsine = ch_data->f_indexsine;
  302. float *g_temp1, *q_temp1, *pok, *pok1;
  303. uint32_t temp1, temp2, temp3, temp4;
  304. int size = m_max;
  305. if (sbr->reset) {
  306. for (i = 0; i < h_SL; i++) {
  307. memcpy(g_temp[i + 2*ch_data->t_env[0]], sbr->gain[0], m_max * sizeof(sbr->gain[0][0]));
  308. memcpy(q_temp[i + 2*ch_data->t_env[0]], sbr->q_m[0], m_max * sizeof(sbr->q_m[0][0]));
  309. }
  310. } else if (h_SL) {
  311. memcpy(g_temp[2*ch_data->t_env[0]], g_temp[2*ch_data->t_env_num_env_old], 4*sizeof(g_temp[0]));
  312. memcpy(q_temp[2*ch_data->t_env[0]], q_temp[2*ch_data->t_env_num_env_old], 4*sizeof(q_temp[0]));
  313. }
  314. for (e = 0; e < ch_data->bs_num_env; e++) {
  315. for (i = 2 * ch_data->t_env[e]; i < 2 * ch_data->t_env[e + 1]; i++) {
  316. g_temp1 = g_temp[h_SL + i];
  317. pok = sbr->gain[e];
  318. q_temp1 = q_temp[h_SL + i];
  319. pok1 = sbr->q_m[e];
  320. /* loop unrolled 4 times */
  321. for (j=0; j<(size>>2); j++) {
  322. __asm__ volatile (
  323. "lw %[temp1], 0(%[pok]) \n\t"
  324. "lw %[temp2], 4(%[pok]) \n\t"
  325. "lw %[temp3], 8(%[pok]) \n\t"
  326. "lw %[temp4], 12(%[pok]) \n\t"
  327. "sw %[temp1], 0(%[g_temp1]) \n\t"
  328. "sw %[temp2], 4(%[g_temp1]) \n\t"
  329. "sw %[temp3], 8(%[g_temp1]) \n\t"
  330. "sw %[temp4], 12(%[g_temp1]) \n\t"
  331. "lw %[temp1], 0(%[pok1]) \n\t"
  332. "lw %[temp2], 4(%[pok1]) \n\t"
  333. "lw %[temp3], 8(%[pok1]) \n\t"
  334. "lw %[temp4], 12(%[pok1]) \n\t"
  335. "sw %[temp1], 0(%[q_temp1]) \n\t"
  336. "sw %[temp2], 4(%[q_temp1]) \n\t"
  337. "sw %[temp3], 8(%[q_temp1]) \n\t"
  338. "sw %[temp4], 12(%[q_temp1]) \n\t"
  339. PTR_ADDIU "%[pok], %[pok], 16 \n\t"
  340. PTR_ADDIU "%[g_temp1], %[g_temp1], 16 \n\t"
  341. PTR_ADDIU "%[pok1], %[pok1], 16 \n\t"
  342. PTR_ADDIU "%[q_temp1], %[q_temp1], 16 \n\t"
  343. : [temp1]"=&r"(temp1), [temp2]"=&r"(temp2),
  344. [temp3]"=&r"(temp3), [temp4]"=&r"(temp4),
  345. [pok]"+r"(pok), [g_temp1]"+r"(g_temp1),
  346. [pok1]"+r"(pok1), [q_temp1]"+r"(q_temp1)
  347. :
  348. : "memory"
  349. );
  350. }
  351. for (j=0; j<(size&3); j++) {
  352. __asm__ volatile (
  353. "lw %[temp1], 0(%[pok]) \n\t"
  354. "lw %[temp2], 0(%[pok1]) \n\t"
  355. "sw %[temp1], 0(%[g_temp1]) \n\t"
  356. "sw %[temp2], 0(%[q_temp1]) \n\t"
  357. PTR_ADDIU "%[pok], %[pok], 4 \n\t"
  358. PTR_ADDIU "%[g_temp1], %[g_temp1], 4 \n\t"
  359. PTR_ADDIU "%[pok1], %[pok1], 4 \n\t"
  360. PTR_ADDIU "%[q_temp1], %[q_temp1], 4 \n\t"
  361. : [temp1]"=&r"(temp1), [temp2]"=&r"(temp2),
  362. [temp3]"=&r"(temp3), [temp4]"=&r"(temp4),
  363. [pok]"+r"(pok), [g_temp1]"+r"(g_temp1),
  364. [pok1]"+r"(pok1), [q_temp1]"+r"(q_temp1)
  365. :
  366. : "memory"
  367. );
  368. }
  369. }
  370. }
  371. for (e = 0; e < ch_data->bs_num_env; e++) {
  372. for (i = 2 * ch_data->t_env[e]; i < 2 * ch_data->t_env[e + 1]; i++) {
  373. LOCAL_ALIGNED_16(float, g_filt_tab, [48]);
  374. LOCAL_ALIGNED_16(float, q_filt_tab, [48]);
  375. float *g_filt, *q_filt;
  376. if (h_SL && e != e_a[0] && e != e_a[1]) {
  377. g_filt = g_filt_tab;
  378. q_filt = q_filt_tab;
  379. for (m = 0; m < m_max; m++) {
  380. const int idx1 = i + h_SL;
  381. g_filt[m] = 0.0f;
  382. q_filt[m] = 0.0f;
  383. for (j = 0; j <= h_SL; j++) {
  384. g_filt[m] += g_temp[idx1 - j][m] * h_smooth[j];
  385. q_filt[m] += q_temp[idx1 - j][m] * h_smooth[j];
  386. }
  387. }
  388. } else {
  389. g_filt = g_temp[i + h_SL];
  390. q_filt = q_temp[i];
  391. }
  392. sbr->dsp.hf_g_filt(Y1[i] + kx, X_high + kx, g_filt, m_max,
  393. i + ENVELOPE_ADJUSTMENT_OFFSET);
  394. if (e != e_a[0] && e != e_a[1]) {
  395. sbr->dsp.hf_apply_noise[indexsine](Y1[i] + kx, sbr->s_m[e],
  396. q_filt, indexnoise,
  397. kx, m_max);
  398. } else {
  399. int idx = indexsine&1;
  400. int A = (1-((indexsine+(kx & 1))&2));
  401. int B = (A^(-idx)) + idx;
  402. float *out = &Y1[i][kx][idx];
  403. float *in = sbr->s_m[e];
  404. float temp0, temp1, temp2, temp3, temp4, temp5;
  405. float A_f = (float)A;
  406. float B_f = (float)B;
  407. for (m = 0; m+1 < m_max; m+=2) {
  408. temp2 = out[0];
  409. temp3 = out[2];
  410. __asm__ volatile(
  411. "lwc1 %[temp0], 0(%[in]) \n\t"
  412. "lwc1 %[temp1], 4(%[in]) \n\t"
  413. "madd.s %[temp4], %[temp2], %[temp0], %[A_f] \n\t"
  414. "madd.s %[temp5], %[temp3], %[temp1], %[B_f] \n\t"
  415. "swc1 %[temp4], 0(%[out]) \n\t"
  416. "swc1 %[temp5], 8(%[out]) \n\t"
  417. PTR_ADDIU "%[in], %[in], 8 \n\t"
  418. PTR_ADDIU "%[out], %[out], 16 \n\t"
  419. : [temp0]"=&f" (temp0), [temp1]"=&f"(temp1),
  420. [temp4]"=&f" (temp4), [temp5]"=&f"(temp5),
  421. [in]"+r"(in), [out]"+r"(out)
  422. : [A_f]"f"(A_f), [B_f]"f"(B_f), [temp2]"f"(temp2),
  423. [temp3]"f"(temp3)
  424. : "memory"
  425. );
  426. }
  427. if(m_max&1)
  428. out[2*m ] += in[m ] * A;
  429. }
  430. indexnoise = (indexnoise + m_max) & 0x1ff;
  431. indexsine = (indexsine + 1) & 3;
  432. }
  433. }
  434. ch_data->f_indexnoise = indexnoise;
  435. ch_data->f_indexsine = indexsine;
  436. }
  437. static void sbr_hf_inverse_filter_mips(SBRDSPContext *dsp,
  438. float (*alpha0)[2], float (*alpha1)[2],
  439. const float X_low[32][40][2], int k0)
  440. {
  441. int k;
  442. float temp0, temp1, temp2, temp3, temp4, temp5, temp6, temp7, c;
  443. float *phi1, *alpha_1, *alpha_0, res1, res2, temp_real, temp_im;
  444. c = 1.000001f;
  445. for (k = 0; k < k0; k++) {
  446. LOCAL_ALIGNED_16(float, phi, [3], [2][2]);
  447. float dk;
  448. phi1 = &phi[0][0][0];
  449. alpha_1 = &alpha1[k][0];
  450. alpha_0 = &alpha0[k][0];
  451. dsp->autocorrelate(X_low[k], phi);
  452. __asm__ volatile (
  453. "lwc1 %[temp0], 40(%[phi1]) \n\t"
  454. "lwc1 %[temp1], 16(%[phi1]) \n\t"
  455. "lwc1 %[temp2], 24(%[phi1]) \n\t"
  456. "lwc1 %[temp3], 28(%[phi1]) \n\t"
  457. "mul.s %[dk], %[temp0], %[temp1] \n\t"
  458. "lwc1 %[temp4], 0(%[phi1]) \n\t"
  459. "mul.s %[res2], %[temp2], %[temp2] \n\t"
  460. "lwc1 %[temp5], 4(%[phi1]) \n\t"
  461. "madd.s %[res2], %[res2], %[temp3], %[temp3] \n\t"
  462. "lwc1 %[temp6], 8(%[phi1]) \n\t"
  463. "div.s %[res2], %[res2], %[c] \n\t"
  464. "lwc1 %[temp0], 12(%[phi1]) \n\t"
  465. "sub.s %[dk], %[dk], %[res2] \n\t"
  466. : [temp0]"=&f"(temp0), [temp1]"=&f"(temp1), [temp2]"=&f"(temp2),
  467. [temp3]"=&f"(temp3), [temp4]"=&f"(temp4), [temp5]"=&f"(temp5),
  468. [temp6]"=&f"(temp6), [res2]"=&f"(res2), [dk]"=&f"(dk)
  469. : [phi1]"r"(phi1), [c]"f"(c)
  470. : "memory"
  471. );
  472. if (!dk) {
  473. alpha_1[0] = 0;
  474. alpha_1[1] = 0;
  475. } else {
  476. __asm__ volatile (
  477. "mul.s %[temp_real], %[temp4], %[temp2] \n\t"
  478. "nmsub.s %[temp_real], %[temp_real], %[temp5], %[temp3] \n\t"
  479. "nmsub.s %[temp_real], %[temp_real], %[temp6], %[temp1] \n\t"
  480. "mul.s %[temp_im], %[temp4], %[temp3] \n\t"
  481. "madd.s %[temp_im], %[temp_im], %[temp5], %[temp2] \n\t"
  482. "nmsub.s %[temp_im], %[temp_im], %[temp0], %[temp1] \n\t"
  483. "div.s %[temp_real], %[temp_real], %[dk] \n\t"
  484. "div.s %[temp_im], %[temp_im], %[dk] \n\t"
  485. "swc1 %[temp_real], 0(%[alpha_1]) \n\t"
  486. "swc1 %[temp_im], 4(%[alpha_1]) \n\t"
  487. : [temp_real]"=&f" (temp_real), [temp_im]"=&f"(temp_im)
  488. : [phi1]"r"(phi1), [temp0]"f"(temp0), [temp1]"f"(temp1),
  489. [temp2]"f"(temp2), [temp3]"f"(temp3), [temp4]"f"(temp4),
  490. [temp5]"f"(temp5), [temp6]"f"(temp6),
  491. [alpha_1]"r"(alpha_1), [dk]"f"(dk)
  492. : "memory"
  493. );
  494. }
  495. if (!phi1[4]) {
  496. alpha_0[0] = 0;
  497. alpha_0[1] = 0;
  498. } else {
  499. __asm__ volatile (
  500. "lwc1 %[temp6], 0(%[alpha_1]) \n\t"
  501. "lwc1 %[temp7], 4(%[alpha_1]) \n\t"
  502. "mul.s %[temp_real], %[temp6], %[temp2] \n\t"
  503. "add.s %[temp_real], %[temp_real], %[temp4] \n\t"
  504. "madd.s %[temp_real], %[temp_real], %[temp7], %[temp3] \n\t"
  505. "mul.s %[temp_im], %[temp7], %[temp2] \n\t"
  506. "add.s %[temp_im], %[temp_im], %[temp5] \n\t"
  507. "nmsub.s %[temp_im], %[temp_im], %[temp6], %[temp3] \n\t"
  508. "div.s %[temp_real], %[temp_real], %[temp1] \n\t"
  509. "div.s %[temp_im], %[temp_im], %[temp1] \n\t"
  510. "neg.s %[temp_real], %[temp_real] \n\t"
  511. "neg.s %[temp_im], %[temp_im] \n\t"
  512. "swc1 %[temp_real], 0(%[alpha_0]) \n\t"
  513. "swc1 %[temp_im], 4(%[alpha_0]) \n\t"
  514. : [temp_real]"=&f"(temp_real), [temp_im]"=&f"(temp_im),
  515. [temp6]"=&f"(temp6), [temp7]"=&f"(temp7),
  516. [res1]"=&f"(res1), [res2]"=&f"(res2)
  517. : [alpha_1]"r"(alpha_1), [alpha_0]"r"(alpha_0),
  518. [temp0]"f"(temp0), [temp1]"f"(temp1), [temp2]"f"(temp2),
  519. [temp3]"f"(temp3), [temp4]"f"(temp4), [temp5]"f"(temp5)
  520. : "memory"
  521. );
  522. }
  523. __asm__ volatile (
  524. "lwc1 %[temp1], 0(%[alpha_1]) \n\t"
  525. "lwc1 %[temp2], 4(%[alpha_1]) \n\t"
  526. "lwc1 %[temp_real], 0(%[alpha_0]) \n\t"
  527. "lwc1 %[temp_im], 4(%[alpha_0]) \n\t"
  528. "mul.s %[res1], %[temp1], %[temp1] \n\t"
  529. "madd.s %[res1], %[res1], %[temp2], %[temp2] \n\t"
  530. "mul.s %[res2], %[temp_real], %[temp_real] \n\t"
  531. "madd.s %[res2], %[res2], %[temp_im], %[temp_im] \n\t"
  532. : [temp_real]"=&f"(temp_real), [temp_im]"=&f"(temp_im),
  533. [temp1]"=&f"(temp1), [temp2]"=&f"(temp2),
  534. [res1]"=&f"(res1), [res2]"=&f"(res2)
  535. : [alpha_1]"r"(alpha_1), [alpha_0]"r"(alpha_0)
  536. : "memory"
  537. );
  538. if (res1 >= 16.0f || res2 >= 16.0f) {
  539. alpha_1[0] = 0;
  540. alpha_1[1] = 0;
  541. alpha_0[0] = 0;
  542. alpha_0[1] = 0;
  543. }
  544. }
  545. }
  546. #endif /* !HAVE_MIPS32R6 && !HAVE_MIPS64R6 */
  547. #endif /* HAVE_MIPSFPU */
  548. #endif /* HAVE_INLINE_ASM */
  549. void ff_aacsbr_func_ptr_init_mips(AACSBRContext *c)
  550. {
  551. #if HAVE_INLINE_ASM
  552. #if HAVE_MIPSFPU
  553. c->sbr_lf_gen = sbr_lf_gen_mips;
  554. c->sbr_x_gen = sbr_x_gen_mips;
  555. #if !HAVE_MIPS32R6 && !HAVE_MIPS64R6
  556. c->sbr_hf_inverse_filter = sbr_hf_inverse_filter_mips;
  557. c->sbr_hf_assemble = sbr_hf_assemble_mips;
  558. #endif /* !HAVE_MIPS32R6 && !HAVE_MIPS64R6 */
  559. #endif /* HAVE_MIPSFPU */
  560. #endif /* HAVE_INLINE_ASM */
  561. }