You can not select more than 25 topics Topics must start with a letter or number, can include dashes ('-') and can be up to 35 characters long.

2527 lines
104KB

  1. /*
  2. * Copyright (c) 2012
  3. * MIPS Technologies, Inc., California.
  4. *
  5. * Redistribution and use in source and binary forms, with or without
  6. * modification, are permitted provided that the following conditions
  7. * are met:
  8. * 1. Redistributions of source code must retain the above copyright
  9. * notice, this list of conditions and the following disclaimer.
  10. * 2. Redistributions in binary form must reproduce the above copyright
  11. * notice, this list of conditions and the following disclaimer in the
  12. * documentation and/or other materials provided with the distribution.
  13. * 3. Neither the name of the MIPS Technologies, Inc., nor the names of its
  14. * contributors may be used to endorse or promote products derived from
  15. * this software without specific prior written permission.
  16. *
  17. * THIS SOFTWARE IS PROVIDED BY THE MIPS TECHNOLOGIES, INC. ``AS IS'' AND
  18. * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
  19. * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
  20. * ARE DISCLAIMED. IN NO EVENT SHALL THE MIPS TECHNOLOGIES, INC. BE LIABLE
  21. * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
  22. * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
  23. * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
  24. * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
  25. * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
  26. * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
  27. * SUCH DAMAGE.
  28. *
  29. * Author: Stanislav Ocovaj (socovaj@mips.com)
  30. * Szabolcs Pal (sabolc@mips.com)
  31. *
  32. * AAC coefficients encoder optimized for MIPS floating-point architecture
  33. *
  34. * This file is part of FFmpeg.
  35. *
  36. * FFmpeg is free software; you can redistribute it and/or
  37. * modify it under the terms of the GNU Lesser General Public
  38. * License as published by the Free Software Foundation; either
  39. * version 2.1 of the License, or (at your option) any later version.
  40. *
  41. * FFmpeg is distributed in the hope that it will be useful,
  42. * but WITHOUT ANY WARRANTY; without even the implied warranty of
  43. * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
  44. * Lesser General Public License for more details.
  45. *
  46. * You should have received a copy of the GNU Lesser General Public
  47. * License along with FFmpeg; if not, write to the Free Software
  48. * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
  49. */
  50. /**
  51. * @file
  52. * Reference: libavcodec/aaccoder.c
  53. */
  54. #include "libavutil/libm.h"
  55. #include <float.h>
  56. #include "libavutil/mathematics.h"
  57. #include "libavcodec/avcodec.h"
  58. #include "libavcodec/put_bits.h"
  59. #include "libavcodec/aac.h"
  60. #include "libavcodec/aacenc.h"
  61. #include "libavcodec/aactab.h"
  62. #if HAVE_INLINE_ASM
  63. typedef struct BandCodingPath {
  64. int prev_idx;
  65. float cost;
  66. int run;
  67. } BandCodingPath;
  68. static const uint8_t run_value_bits_long[64] = {
  69. 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5,
  70. 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 10,
  71. 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10,
  72. 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 15
  73. };
  74. static const uint8_t run_value_bits_short[16] = {
  75. 3, 3, 3, 3, 3, 3, 3, 6, 6, 6, 6, 6, 6, 6, 6, 9
  76. };
  77. static const uint8_t * const run_value_bits[2] = {
  78. run_value_bits_long, run_value_bits_short
  79. };
  80. static const uint8_t uquad_sign_bits[81] = {
  81. 0, 1, 1, 1, 2, 2, 1, 2, 2,
  82. 1, 2, 2, 2, 3, 3, 2, 3, 3,
  83. 1, 2, 2, 2, 3, 3, 2, 3, 3,
  84. 1, 2, 2, 2, 3, 3, 2, 3, 3,
  85. 2, 3, 3, 3, 4, 4, 3, 4, 4,
  86. 2, 3, 3, 3, 4, 4, 3, 4, 4,
  87. 1, 2, 2, 2, 3, 3, 2, 3, 3,
  88. 2, 3, 3, 3, 4, 4, 3, 4, 4,
  89. 2, 3, 3, 3, 4, 4, 3, 4, 4
  90. };
  91. static const uint8_t upair7_sign_bits[64] = {
  92. 0, 1, 1, 1, 1, 1, 1, 1,
  93. 1, 2, 2, 2, 2, 2, 2, 2,
  94. 1, 2, 2, 2, 2, 2, 2, 2,
  95. 1, 2, 2, 2, 2, 2, 2, 2,
  96. 1, 2, 2, 2, 2, 2, 2, 2,
  97. 1, 2, 2, 2, 2, 2, 2, 2,
  98. 1, 2, 2, 2, 2, 2, 2, 2,
  99. 1, 2, 2, 2, 2, 2, 2, 2,
  100. };
  101. static const uint8_t upair12_sign_bits[169] = {
  102. 0, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
  103. 1, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2,
  104. 1, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2,
  105. 1, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2,
  106. 1, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2,
  107. 1, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2,
  108. 1, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2,
  109. 1, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2,
  110. 1, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2,
  111. 1, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2,
  112. 1, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2,
  113. 1, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2,
  114. 1, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2
  115. };
  116. static const uint8_t esc_sign_bits[289] = {
  117. 0, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
  118. 1, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2,
  119. 1, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2,
  120. 1, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2,
  121. 1, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2,
  122. 1, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2,
  123. 1, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2,
  124. 1, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2,
  125. 1, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2,
  126. 1, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2,
  127. 1, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2,
  128. 1, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2,
  129. 1, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2,
  130. 1, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2,
  131. 1, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2,
  132. 1, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2,
  133. 1, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2
  134. };
  135. #define ROUND_STANDARD 0.4054f
  136. #define ROUND_TO_ZERO 0.1054f
  137. static void abs_pow34_v(float *out, const float *in, const int size) {
  138. #ifndef USE_REALLY_FULL_SEARCH
  139. int i;
  140. float a, b, c, d;
  141. float ax, bx, cx, dx;
  142. for (i = 0; i < size; i += 4) {
  143. a = fabsf(in[i ]);
  144. b = fabsf(in[i+1]);
  145. c = fabsf(in[i+2]);
  146. d = fabsf(in[i+3]);
  147. ax = sqrtf(a);
  148. bx = sqrtf(b);
  149. cx = sqrtf(c);
  150. dx = sqrtf(d);
  151. a = a * ax;
  152. b = b * bx;
  153. c = c * cx;
  154. d = d * dx;
  155. out[i ] = sqrtf(a);
  156. out[i+1] = sqrtf(b);
  157. out[i+2] = sqrtf(c);
  158. out[i+3] = sqrtf(d);
  159. }
  160. #endif /* USE_REALLY_FULL_SEARCH */
  161. }
  162. static float find_max_val(int group_len, int swb_size, const float *scaled) {
  163. float maxval = 0.0f;
  164. int w2, i;
  165. for (w2 = 0; w2 < group_len; w2++) {
  166. for (i = 0; i < swb_size; i++) {
  167. maxval = FFMAX(maxval, scaled[w2*128+i]);
  168. }
  169. }
  170. return maxval;
  171. }
  172. static int find_min_book(float maxval, int sf) {
  173. float Q = ff_aac_pow2sf_tab[POW_SF2_ZERO - sf + SCALE_ONE_POS - SCALE_DIV_512];
  174. float Q34 = sqrtf(Q * sqrtf(Q));
  175. int qmaxval, cb;
  176. qmaxval = maxval * Q34 + 0.4054f;
  177. if (qmaxval == 0) cb = 0;
  178. else if (qmaxval == 1) cb = 1;
  179. else if (qmaxval == 2) cb = 3;
  180. else if (qmaxval <= 4) cb = 5;
  181. else if (qmaxval <= 7) cb = 7;
  182. else if (qmaxval <= 12) cb = 9;
  183. else cb = 11;
  184. return cb;
  185. }
  186. /**
  187. * Functions developed from template function and optimized for quantizing and encoding band
  188. */
  189. static void quantize_and_encode_band_cost_SQUAD_mips(struct AACEncContext *s,
  190. PutBitContext *pb, const float *in, float *out,
  191. const float *scaled, int size, int scale_idx,
  192. int cb, const float lambda, const float uplim,
  193. int *bits, const float ROUNDING)
  194. {
  195. const float Q34 = ff_aac_pow34sf_tab[POW_SF2_ZERO - scale_idx + SCALE_ONE_POS - SCALE_DIV_512];
  196. int i;
  197. int qc1, qc2, qc3, qc4;
  198. uint8_t *p_bits = (uint8_t *)ff_aac_spectral_bits[cb-1];
  199. uint16_t *p_codes = (uint16_t *)ff_aac_spectral_codes[cb-1];
  200. abs_pow34_v(s->scoefs, in, size);
  201. scaled = s->scoefs;
  202. for (i = 0; i < size; i += 4) {
  203. int curidx;
  204. int *in_int = (int *)&in[i];
  205. int t0, t1, t2, t3, t4, t5, t6, t7;
  206. qc1 = scaled[i ] * Q34 + ROUND_STANDARD;
  207. qc2 = scaled[i+1] * Q34 + ROUND_STANDARD;
  208. qc3 = scaled[i+2] * Q34 + ROUND_STANDARD;
  209. qc4 = scaled[i+3] * Q34 + ROUND_STANDARD;
  210. __asm__ volatile (
  211. ".set push \n\t"
  212. ".set noreorder \n\t"
  213. "slt %[qc1], $zero, %[qc1] \n\t"
  214. "slt %[qc2], $zero, %[qc2] \n\t"
  215. "slt %[qc3], $zero, %[qc3] \n\t"
  216. "slt %[qc4], $zero, %[qc4] \n\t"
  217. "lw %[t0], 0(%[in_int]) \n\t"
  218. "lw %[t1], 4(%[in_int]) \n\t"
  219. "lw %[t2], 8(%[in_int]) \n\t"
  220. "lw %[t3], 12(%[in_int]) \n\t"
  221. "srl %[t0], %[t0], 31 \n\t"
  222. "srl %[t1], %[t1], 31 \n\t"
  223. "srl %[t2], %[t2], 31 \n\t"
  224. "srl %[t3], %[t3], 31 \n\t"
  225. "subu %[t4], $zero, %[qc1] \n\t"
  226. "subu %[t5], $zero, %[qc2] \n\t"
  227. "subu %[t6], $zero, %[qc3] \n\t"
  228. "subu %[t7], $zero, %[qc4] \n\t"
  229. "movn %[qc1], %[t4], %[t0] \n\t"
  230. "movn %[qc2], %[t5], %[t1] \n\t"
  231. "movn %[qc3], %[t6], %[t2] \n\t"
  232. "movn %[qc4], %[t7], %[t3] \n\t"
  233. ".set pop \n\t"
  234. : [qc1]"+r"(qc1), [qc2]"+r"(qc2),
  235. [qc3]"+r"(qc3), [qc4]"+r"(qc4),
  236. [t0]"=&r"(t0), [t1]"=&r"(t1), [t2]"=&r"(t2), [t3]"=&r"(t3),
  237. [t4]"=&r"(t4), [t5]"=&r"(t5), [t6]"=&r"(t6), [t7]"=&r"(t7)
  238. : [in_int]"r"(in_int)
  239. : "memory"
  240. );
  241. curidx = qc1;
  242. curidx *= 3;
  243. curidx += qc2;
  244. curidx *= 3;
  245. curidx += qc3;
  246. curidx *= 3;
  247. curidx += qc4;
  248. curidx += 40;
  249. put_bits(pb, p_bits[curidx], p_codes[curidx]);
  250. }
  251. }
  252. static void quantize_and_encode_band_cost_UQUAD_mips(struct AACEncContext *s,
  253. PutBitContext *pb, const float *in, float *out,
  254. const float *scaled, int size, int scale_idx,
  255. int cb, const float lambda, const float uplim,
  256. int *bits, const float ROUNDING)
  257. {
  258. const float Q34 = ff_aac_pow34sf_tab[POW_SF2_ZERO - scale_idx + SCALE_ONE_POS - SCALE_DIV_512];
  259. int i;
  260. int qc1, qc2, qc3, qc4;
  261. uint8_t *p_bits = (uint8_t *)ff_aac_spectral_bits[cb-1];
  262. uint16_t *p_codes = (uint16_t *)ff_aac_spectral_codes[cb-1];
  263. abs_pow34_v(s->scoefs, in, size);
  264. scaled = s->scoefs;
  265. for (i = 0; i < size; i += 4) {
  266. int curidx, sign, count;
  267. int *in_int = (int *)&in[i];
  268. uint8_t v_bits;
  269. unsigned int v_codes;
  270. int t0, t1, t2, t3, t4;
  271. qc1 = scaled[i ] * Q34 + ROUND_STANDARD;
  272. qc2 = scaled[i+1] * Q34 + ROUND_STANDARD;
  273. qc3 = scaled[i+2] * Q34 + ROUND_STANDARD;
  274. qc4 = scaled[i+3] * Q34 + ROUND_STANDARD;
  275. __asm__ volatile (
  276. ".set push \n\t"
  277. ".set noreorder \n\t"
  278. "ori %[t4], $zero, 2 \n\t"
  279. "ori %[sign], $zero, 0 \n\t"
  280. "slt %[t0], %[t4], %[qc1] \n\t"
  281. "slt %[t1], %[t4], %[qc2] \n\t"
  282. "slt %[t2], %[t4], %[qc3] \n\t"
  283. "slt %[t3], %[t4], %[qc4] \n\t"
  284. "movn %[qc1], %[t4], %[t0] \n\t"
  285. "movn %[qc2], %[t4], %[t1] \n\t"
  286. "movn %[qc3], %[t4], %[t2] \n\t"
  287. "movn %[qc4], %[t4], %[t3] \n\t"
  288. "lw %[t0], 0(%[in_int]) \n\t"
  289. "lw %[t1], 4(%[in_int]) \n\t"
  290. "lw %[t2], 8(%[in_int]) \n\t"
  291. "lw %[t3], 12(%[in_int]) \n\t"
  292. "slt %[t0], %[t0], $zero \n\t"
  293. "movn %[sign], %[t0], %[qc1] \n\t"
  294. "slt %[t1], %[t1], $zero \n\t"
  295. "slt %[t2], %[t2], $zero \n\t"
  296. "slt %[t3], %[t3], $zero \n\t"
  297. "sll %[t0], %[sign], 1 \n\t"
  298. "or %[t0], %[t0], %[t1] \n\t"
  299. "movn %[sign], %[t0], %[qc2] \n\t"
  300. "slt %[t4], $zero, %[qc1] \n\t"
  301. "slt %[t1], $zero, %[qc2] \n\t"
  302. "slt %[count], $zero, %[qc3] \n\t"
  303. "sll %[t0], %[sign], 1 \n\t"
  304. "or %[t0], %[t0], %[t2] \n\t"
  305. "movn %[sign], %[t0], %[qc3] \n\t"
  306. "slt %[t2], $zero, %[qc4] \n\t"
  307. "addu %[count], %[count], %[t4] \n\t"
  308. "addu %[count], %[count], %[t1] \n\t"
  309. "sll %[t0], %[sign], 1 \n\t"
  310. "or %[t0], %[t0], %[t3] \n\t"
  311. "movn %[sign], %[t0], %[qc4] \n\t"
  312. "addu %[count], %[count], %[t2] \n\t"
  313. ".set pop \n\t"
  314. : [qc1]"+r"(qc1), [qc2]"+r"(qc2),
  315. [qc3]"+r"(qc3), [qc4]"+r"(qc4),
  316. [sign]"=&r"(sign), [count]"=&r"(count),
  317. [t0]"=&r"(t0), [t1]"=&r"(t1), [t2]"=&r"(t2), [t3]"=&r"(t3),
  318. [t4]"=&r"(t4)
  319. : [in_int]"r"(in_int)
  320. : "memory"
  321. );
  322. curidx = qc1;
  323. curidx *= 3;
  324. curidx += qc2;
  325. curidx *= 3;
  326. curidx += qc3;
  327. curidx *= 3;
  328. curidx += qc4;
  329. v_codes = (p_codes[curidx] << count) | (sign & ((1 << count) - 1));
  330. v_bits = p_bits[curidx] + count;
  331. put_bits(pb, v_bits, v_codes);
  332. }
  333. }
  334. static void quantize_and_encode_band_cost_SPAIR_mips(struct AACEncContext *s,
  335. PutBitContext *pb, const float *in, float *out,
  336. const float *scaled, int size, int scale_idx,
  337. int cb, const float lambda, const float uplim,
  338. int *bits, const float ROUNDING)
  339. {
  340. const float Q34 = ff_aac_pow34sf_tab[POW_SF2_ZERO - scale_idx + SCALE_ONE_POS - SCALE_DIV_512];
  341. int i;
  342. int qc1, qc2, qc3, qc4;
  343. uint8_t *p_bits = (uint8_t *)ff_aac_spectral_bits[cb-1];
  344. uint16_t *p_codes = (uint16_t *)ff_aac_spectral_codes[cb-1];
  345. abs_pow34_v(s->scoefs, in, size);
  346. scaled = s->scoefs;
  347. for (i = 0; i < size; i += 4) {
  348. int curidx, curidx2;
  349. int *in_int = (int *)&in[i];
  350. uint8_t v_bits;
  351. unsigned int v_codes;
  352. int t0, t1, t2, t3, t4, t5, t6, t7;
  353. qc1 = scaled[i ] * Q34 + ROUND_STANDARD;
  354. qc2 = scaled[i+1] * Q34 + ROUND_STANDARD;
  355. qc3 = scaled[i+2] * Q34 + ROUND_STANDARD;
  356. qc4 = scaled[i+3] * Q34 + ROUND_STANDARD;
  357. __asm__ volatile (
  358. ".set push \n\t"
  359. ".set noreorder \n\t"
  360. "ori %[t4], $zero, 4 \n\t"
  361. "slt %[t0], %[t4], %[qc1] \n\t"
  362. "slt %[t1], %[t4], %[qc2] \n\t"
  363. "slt %[t2], %[t4], %[qc3] \n\t"
  364. "slt %[t3], %[t4], %[qc4] \n\t"
  365. "movn %[qc1], %[t4], %[t0] \n\t"
  366. "movn %[qc2], %[t4], %[t1] \n\t"
  367. "movn %[qc3], %[t4], %[t2] \n\t"
  368. "movn %[qc4], %[t4], %[t3] \n\t"
  369. "lw %[t0], 0(%[in_int]) \n\t"
  370. "lw %[t1], 4(%[in_int]) \n\t"
  371. "lw %[t2], 8(%[in_int]) \n\t"
  372. "lw %[t3], 12(%[in_int]) \n\t"
  373. "srl %[t0], %[t0], 31 \n\t"
  374. "srl %[t1], %[t1], 31 \n\t"
  375. "srl %[t2], %[t2], 31 \n\t"
  376. "srl %[t3], %[t3], 31 \n\t"
  377. "subu %[t4], $zero, %[qc1] \n\t"
  378. "subu %[t5], $zero, %[qc2] \n\t"
  379. "subu %[t6], $zero, %[qc3] \n\t"
  380. "subu %[t7], $zero, %[qc4] \n\t"
  381. "movn %[qc1], %[t4], %[t0] \n\t"
  382. "movn %[qc2], %[t5], %[t1] \n\t"
  383. "movn %[qc3], %[t6], %[t2] \n\t"
  384. "movn %[qc4], %[t7], %[t3] \n\t"
  385. ".set pop \n\t"
  386. : [qc1]"+r"(qc1), [qc2]"+r"(qc2),
  387. [qc3]"+r"(qc3), [qc4]"+r"(qc4),
  388. [t0]"=&r"(t0), [t1]"=&r"(t1), [t2]"=&r"(t2), [t3]"=&r"(t3),
  389. [t4]"=&r"(t4), [t5]"=&r"(t5), [t6]"=&r"(t6), [t7]"=&r"(t7)
  390. : [in_int]"r"(in_int)
  391. : "memory"
  392. );
  393. curidx = 9 * qc1;
  394. curidx += qc2 + 40;
  395. curidx2 = 9 * qc3;
  396. curidx2 += qc4 + 40;
  397. v_codes = (p_codes[curidx] << p_bits[curidx2]) | (p_codes[curidx2]);
  398. v_bits = p_bits[curidx] + p_bits[curidx2];
  399. put_bits(pb, v_bits, v_codes);
  400. }
  401. }
  402. static void quantize_and_encode_band_cost_UPAIR7_mips(struct AACEncContext *s,
  403. PutBitContext *pb, const float *in, float *out,
  404. const float *scaled, int size, int scale_idx,
  405. int cb, const float lambda, const float uplim,
  406. int *bits, const float ROUNDING)
  407. {
  408. const float Q34 = ff_aac_pow34sf_tab[POW_SF2_ZERO - scale_idx + SCALE_ONE_POS - SCALE_DIV_512];
  409. int i;
  410. int qc1, qc2, qc3, qc4;
  411. uint8_t *p_bits = (uint8_t*) ff_aac_spectral_bits[cb-1];
  412. uint16_t *p_codes = (uint16_t*)ff_aac_spectral_codes[cb-1];
  413. abs_pow34_v(s->scoefs, in, size);
  414. scaled = s->scoefs;
  415. for (i = 0; i < size; i += 4) {
  416. int curidx, sign1, count1, sign2, count2;
  417. int *in_int = (int *)&in[i];
  418. uint8_t v_bits;
  419. unsigned int v_codes;
  420. int t0, t1, t2, t3, t4;
  421. qc1 = scaled[i ] * Q34 + ROUND_STANDARD;
  422. qc2 = scaled[i+1] * Q34 + ROUND_STANDARD;
  423. qc3 = scaled[i+2] * Q34 + ROUND_STANDARD;
  424. qc4 = scaled[i+3] * Q34 + ROUND_STANDARD;
  425. __asm__ volatile (
  426. ".set push \n\t"
  427. ".set noreorder \n\t"
  428. "ori %[t4], $zero, 7 \n\t"
  429. "ori %[sign1], $zero, 0 \n\t"
  430. "ori %[sign2], $zero, 0 \n\t"
  431. "slt %[t0], %[t4], %[qc1] \n\t"
  432. "slt %[t1], %[t4], %[qc2] \n\t"
  433. "slt %[t2], %[t4], %[qc3] \n\t"
  434. "slt %[t3], %[t4], %[qc4] \n\t"
  435. "movn %[qc1], %[t4], %[t0] \n\t"
  436. "movn %[qc2], %[t4], %[t1] \n\t"
  437. "movn %[qc3], %[t4], %[t2] \n\t"
  438. "movn %[qc4], %[t4], %[t3] \n\t"
  439. "lw %[t0], 0(%[in_int]) \n\t"
  440. "lw %[t1], 4(%[in_int]) \n\t"
  441. "lw %[t2], 8(%[in_int]) \n\t"
  442. "lw %[t3], 12(%[in_int]) \n\t"
  443. "slt %[t0], %[t0], $zero \n\t"
  444. "movn %[sign1], %[t0], %[qc1] \n\t"
  445. "slt %[t2], %[t2], $zero \n\t"
  446. "movn %[sign2], %[t2], %[qc3] \n\t"
  447. "slt %[t1], %[t1], $zero \n\t"
  448. "sll %[t0], %[sign1], 1 \n\t"
  449. "or %[t0], %[t0], %[t1] \n\t"
  450. "movn %[sign1], %[t0], %[qc2] \n\t"
  451. "slt %[t3], %[t3], $zero \n\t"
  452. "sll %[t0], %[sign2], 1 \n\t"
  453. "or %[t0], %[t0], %[t3] \n\t"
  454. "movn %[sign2], %[t0], %[qc4] \n\t"
  455. "slt %[count1], $zero, %[qc1] \n\t"
  456. "slt %[t1], $zero, %[qc2] \n\t"
  457. "slt %[count2], $zero, %[qc3] \n\t"
  458. "slt %[t2], $zero, %[qc4] \n\t"
  459. "addu %[count1], %[count1], %[t1] \n\t"
  460. "addu %[count2], %[count2], %[t2] \n\t"
  461. ".set pop \n\t"
  462. : [qc1]"+r"(qc1), [qc2]"+r"(qc2),
  463. [qc3]"+r"(qc3), [qc4]"+r"(qc4),
  464. [sign1]"=&r"(sign1), [count1]"=&r"(count1),
  465. [sign2]"=&r"(sign2), [count2]"=&r"(count2),
  466. [t0]"=&r"(t0), [t1]"=&r"(t1), [t2]"=&r"(t2), [t3]"=&r"(t3),
  467. [t4]"=&r"(t4)
  468. : [in_int]"r"(in_int)
  469. : "t0", "t1", "t2", "t3", "t4",
  470. "memory"
  471. );
  472. curidx = 8 * qc1;
  473. curidx += qc2;
  474. v_codes = (p_codes[curidx] << count1) | sign1;
  475. v_bits = p_bits[curidx] + count1;
  476. put_bits(pb, v_bits, v_codes);
  477. curidx = 8 * qc3;
  478. curidx += qc4;
  479. v_codes = (p_codes[curidx] << count2) | sign2;
  480. v_bits = p_bits[curidx] + count2;
  481. put_bits(pb, v_bits, v_codes);
  482. }
  483. }
  484. static void quantize_and_encode_band_cost_UPAIR12_mips(struct AACEncContext *s,
  485. PutBitContext *pb, const float *in, float *out,
  486. const float *scaled, int size, int scale_idx,
  487. int cb, const float lambda, const float uplim,
  488. int *bits, const float ROUNDING)
  489. {
  490. const float Q34 = ff_aac_pow34sf_tab[POW_SF2_ZERO - scale_idx + SCALE_ONE_POS - SCALE_DIV_512];
  491. int i;
  492. int qc1, qc2, qc3, qc4;
  493. uint8_t *p_bits = (uint8_t*) ff_aac_spectral_bits[cb-1];
  494. uint16_t *p_codes = (uint16_t*)ff_aac_spectral_codes[cb-1];
  495. abs_pow34_v(s->scoefs, in, size);
  496. scaled = s->scoefs;
  497. for (i = 0; i < size; i += 4) {
  498. int curidx, sign1, count1, sign2, count2;
  499. int *in_int = (int *)&in[i];
  500. uint8_t v_bits;
  501. unsigned int v_codes;
  502. int t0, t1, t2, t3, t4;
  503. qc1 = scaled[i ] * Q34 + ROUND_STANDARD;
  504. qc2 = scaled[i+1] * Q34 + ROUND_STANDARD;
  505. qc3 = scaled[i+2] * Q34 + ROUND_STANDARD;
  506. qc4 = scaled[i+3] * Q34 + ROUND_STANDARD;
  507. __asm__ volatile (
  508. ".set push \n\t"
  509. ".set noreorder \n\t"
  510. "ori %[t4], $zero, 12 \n\t"
  511. "ori %[sign1], $zero, 0 \n\t"
  512. "ori %[sign2], $zero, 0 \n\t"
  513. "slt %[t0], %[t4], %[qc1] \n\t"
  514. "slt %[t1], %[t4], %[qc2] \n\t"
  515. "slt %[t2], %[t4], %[qc3] \n\t"
  516. "slt %[t3], %[t4], %[qc4] \n\t"
  517. "movn %[qc1], %[t4], %[t0] \n\t"
  518. "movn %[qc2], %[t4], %[t1] \n\t"
  519. "movn %[qc3], %[t4], %[t2] \n\t"
  520. "movn %[qc4], %[t4], %[t3] \n\t"
  521. "lw %[t0], 0(%[in_int]) \n\t"
  522. "lw %[t1], 4(%[in_int]) \n\t"
  523. "lw %[t2], 8(%[in_int]) \n\t"
  524. "lw %[t3], 12(%[in_int]) \n\t"
  525. "slt %[t0], %[t0], $zero \n\t"
  526. "movn %[sign1], %[t0], %[qc1] \n\t"
  527. "slt %[t2], %[t2], $zero \n\t"
  528. "movn %[sign2], %[t2], %[qc3] \n\t"
  529. "slt %[t1], %[t1], $zero \n\t"
  530. "sll %[t0], %[sign1], 1 \n\t"
  531. "or %[t0], %[t0], %[t1] \n\t"
  532. "movn %[sign1], %[t0], %[qc2] \n\t"
  533. "slt %[t3], %[t3], $zero \n\t"
  534. "sll %[t0], %[sign2], 1 \n\t"
  535. "or %[t0], %[t0], %[t3] \n\t"
  536. "movn %[sign2], %[t0], %[qc4] \n\t"
  537. "slt %[count1], $zero, %[qc1] \n\t"
  538. "slt %[t1], $zero, %[qc2] \n\t"
  539. "slt %[count2], $zero, %[qc3] \n\t"
  540. "slt %[t2], $zero, %[qc4] \n\t"
  541. "addu %[count1], %[count1], %[t1] \n\t"
  542. "addu %[count2], %[count2], %[t2] \n\t"
  543. ".set pop \n\t"
  544. : [qc1]"+r"(qc1), [qc2]"+r"(qc2),
  545. [qc3]"+r"(qc3), [qc4]"+r"(qc4),
  546. [sign1]"=&r"(sign1), [count1]"=&r"(count1),
  547. [sign2]"=&r"(sign2), [count2]"=&r"(count2),
  548. [t0]"=&r"(t0), [t1]"=&r"(t1), [t2]"=&r"(t2), [t3]"=&r"(t3),
  549. [t4]"=&r"(t4)
  550. : [in_int]"r"(in_int)
  551. : "memory"
  552. );
  553. curidx = 13 * qc1;
  554. curidx += qc2;
  555. v_codes = (p_codes[curidx] << count1) | sign1;
  556. v_bits = p_bits[curidx] + count1;
  557. put_bits(pb, v_bits, v_codes);
  558. curidx = 13 * qc3;
  559. curidx += qc4;
  560. v_codes = (p_codes[curidx] << count2) | sign2;
  561. v_bits = p_bits[curidx] + count2;
  562. put_bits(pb, v_bits, v_codes);
  563. }
  564. }
  565. static void quantize_and_encode_band_cost_ESC_mips(struct AACEncContext *s,
  566. PutBitContext *pb, const float *in, float *out,
  567. const float *scaled, int size, int scale_idx,
  568. int cb, const float lambda, const float uplim,
  569. int *bits, const float ROUNDING)
  570. {
  571. const float Q34 = ff_aac_pow34sf_tab[POW_SF2_ZERO - scale_idx + SCALE_ONE_POS - SCALE_DIV_512];
  572. int i;
  573. int qc1, qc2, qc3, qc4;
  574. uint8_t *p_bits = (uint8_t* )ff_aac_spectral_bits[cb-1];
  575. uint16_t *p_codes = (uint16_t*)ff_aac_spectral_codes[cb-1];
  576. float *p_vectors = (float* )ff_aac_codebook_vectors[cb-1];
  577. abs_pow34_v(s->scoefs, in, size);
  578. scaled = s->scoefs;
  579. if (cb < 11) {
  580. for (i = 0; i < size; i += 4) {
  581. int curidx, curidx2, sign1, count1, sign2, count2;
  582. int *in_int = (int *)&in[i];
  583. uint8_t v_bits;
  584. unsigned int v_codes;
  585. int t0, t1, t2, t3, t4;
  586. qc1 = scaled[i ] * Q34 + ROUNDING;
  587. qc2 = scaled[i+1] * Q34 + ROUNDING;
  588. qc3 = scaled[i+2] * Q34 + ROUNDING;
  589. qc4 = scaled[i+3] * Q34 + ROUNDING;
  590. __asm__ volatile (
  591. ".set push \n\t"
  592. ".set noreorder \n\t"
  593. "ori %[t4], $zero, 16 \n\t"
  594. "ori %[sign1], $zero, 0 \n\t"
  595. "ori %[sign2], $zero, 0 \n\t"
  596. "slt %[t0], %[t4], %[qc1] \n\t"
  597. "slt %[t1], %[t4], %[qc2] \n\t"
  598. "slt %[t2], %[t4], %[qc3] \n\t"
  599. "slt %[t3], %[t4], %[qc4] \n\t"
  600. "movn %[qc1], %[t4], %[t0] \n\t"
  601. "movn %[qc2], %[t4], %[t1] \n\t"
  602. "movn %[qc3], %[t4], %[t2] \n\t"
  603. "movn %[qc4], %[t4], %[t3] \n\t"
  604. "lw %[t0], 0(%[in_int]) \n\t"
  605. "lw %[t1], 4(%[in_int]) \n\t"
  606. "lw %[t2], 8(%[in_int]) \n\t"
  607. "lw %[t3], 12(%[in_int]) \n\t"
  608. "slt %[t0], %[t0], $zero \n\t"
  609. "movn %[sign1], %[t0], %[qc1] \n\t"
  610. "slt %[t2], %[t2], $zero \n\t"
  611. "movn %[sign2], %[t2], %[qc3] \n\t"
  612. "slt %[t1], %[t1], $zero \n\t"
  613. "sll %[t0], %[sign1], 1 \n\t"
  614. "or %[t0], %[t0], %[t1] \n\t"
  615. "movn %[sign1], %[t0], %[qc2] \n\t"
  616. "slt %[t3], %[t3], $zero \n\t"
  617. "sll %[t0], %[sign2], 1 \n\t"
  618. "or %[t0], %[t0], %[t3] \n\t"
  619. "movn %[sign2], %[t0], %[qc4] \n\t"
  620. "slt %[count1], $zero, %[qc1] \n\t"
  621. "slt %[t1], $zero, %[qc2] \n\t"
  622. "slt %[count2], $zero, %[qc3] \n\t"
  623. "slt %[t2], $zero, %[qc4] \n\t"
  624. "addu %[count1], %[count1], %[t1] \n\t"
  625. "addu %[count2], %[count2], %[t2] \n\t"
  626. ".set pop \n\t"
  627. : [qc1]"+r"(qc1), [qc2]"+r"(qc2),
  628. [qc3]"+r"(qc3), [qc4]"+r"(qc4),
  629. [sign1]"=&r"(sign1), [count1]"=&r"(count1),
  630. [sign2]"=&r"(sign2), [count2]"=&r"(count2),
  631. [t0]"=&r"(t0), [t1]"=&r"(t1), [t2]"=&r"(t2), [t3]"=&r"(t3),
  632. [t4]"=&r"(t4)
  633. : [in_int]"r"(in_int)
  634. : "memory"
  635. );
  636. curidx = 17 * qc1;
  637. curidx += qc2;
  638. curidx2 = 17 * qc3;
  639. curidx2 += qc4;
  640. v_codes = (p_codes[curidx] << count1) | sign1;
  641. v_bits = p_bits[curidx] + count1;
  642. put_bits(pb, v_bits, v_codes);
  643. v_codes = (p_codes[curidx2] << count2) | sign2;
  644. v_bits = p_bits[curidx2] + count2;
  645. put_bits(pb, v_bits, v_codes);
  646. }
  647. } else {
  648. for (i = 0; i < size; i += 4) {
  649. int curidx, curidx2, sign1, count1, sign2, count2;
  650. int *in_int = (int *)&in[i];
  651. uint8_t v_bits;
  652. unsigned int v_codes;
  653. int c1, c2, c3, c4;
  654. int t0, t1, t2, t3, t4;
  655. qc1 = scaled[i ] * Q34 + ROUNDING;
  656. qc2 = scaled[i+1] * Q34 + ROUNDING;
  657. qc3 = scaled[i+2] * Q34 + ROUNDING;
  658. qc4 = scaled[i+3] * Q34 + ROUNDING;
  659. __asm__ volatile (
  660. ".set push \n\t"
  661. ".set noreorder \n\t"
  662. "ori %[t4], $zero, 16 \n\t"
  663. "ori %[sign1], $zero, 0 \n\t"
  664. "ori %[sign2], $zero, 0 \n\t"
  665. "shll_s.w %[c1], %[qc1], 18 \n\t"
  666. "shll_s.w %[c2], %[qc2], 18 \n\t"
  667. "shll_s.w %[c3], %[qc3], 18 \n\t"
  668. "shll_s.w %[c4], %[qc4], 18 \n\t"
  669. "srl %[c1], %[c1], 18 \n\t"
  670. "srl %[c2], %[c2], 18 \n\t"
  671. "srl %[c3], %[c3], 18 \n\t"
  672. "srl %[c4], %[c4], 18 \n\t"
  673. "slt %[t0], %[t4], %[qc1] \n\t"
  674. "slt %[t1], %[t4], %[qc2] \n\t"
  675. "slt %[t2], %[t4], %[qc3] \n\t"
  676. "slt %[t3], %[t4], %[qc4] \n\t"
  677. "movn %[qc1], %[t4], %[t0] \n\t"
  678. "movn %[qc2], %[t4], %[t1] \n\t"
  679. "movn %[qc3], %[t4], %[t2] \n\t"
  680. "movn %[qc4], %[t4], %[t3] \n\t"
  681. "lw %[t0], 0(%[in_int]) \n\t"
  682. "lw %[t1], 4(%[in_int]) \n\t"
  683. "lw %[t2], 8(%[in_int]) \n\t"
  684. "lw %[t3], 12(%[in_int]) \n\t"
  685. "slt %[t0], %[t0], $zero \n\t"
  686. "movn %[sign1], %[t0], %[qc1] \n\t"
  687. "slt %[t2], %[t2], $zero \n\t"
  688. "movn %[sign2], %[t2], %[qc3] \n\t"
  689. "slt %[t1], %[t1], $zero \n\t"
  690. "sll %[t0], %[sign1], 1 \n\t"
  691. "or %[t0], %[t0], %[t1] \n\t"
  692. "movn %[sign1], %[t0], %[qc2] \n\t"
  693. "slt %[t3], %[t3], $zero \n\t"
  694. "sll %[t0], %[sign2], 1 \n\t"
  695. "or %[t0], %[t0], %[t3] \n\t"
  696. "movn %[sign2], %[t0], %[qc4] \n\t"
  697. "slt %[count1], $zero, %[qc1] \n\t"
  698. "slt %[t1], $zero, %[qc2] \n\t"
  699. "slt %[count2], $zero, %[qc3] \n\t"
  700. "slt %[t2], $zero, %[qc4] \n\t"
  701. "addu %[count1], %[count1], %[t1] \n\t"
  702. "addu %[count2], %[count2], %[t2] \n\t"
  703. ".set pop \n\t"
  704. : [qc1]"+r"(qc1), [qc2]"+r"(qc2),
  705. [qc3]"+r"(qc3), [qc4]"+r"(qc4),
  706. [sign1]"=&r"(sign1), [count1]"=&r"(count1),
  707. [sign2]"=&r"(sign2), [count2]"=&r"(count2),
  708. [c1]"=&r"(c1), [c2]"=&r"(c2),
  709. [c3]"=&r"(c3), [c4]"=&r"(c4),
  710. [t0]"=&r"(t0), [t1]"=&r"(t1), [t2]"=&r"(t2), [t3]"=&r"(t3),
  711. [t4]"=&r"(t4)
  712. : [in_int]"r"(in_int)
  713. : "memory"
  714. );
  715. curidx = 17 * qc1;
  716. curidx += qc2;
  717. curidx2 = 17 * qc3;
  718. curidx2 += qc4;
  719. v_codes = (p_codes[curidx] << count1) | sign1;
  720. v_bits = p_bits[curidx] + count1;
  721. put_bits(pb, v_bits, v_codes);
  722. if (p_vectors[curidx*2 ] == 64.0f) {
  723. int len = av_log2(c1);
  724. v_codes = (((1 << (len - 3)) - 2) << len) | (c1 & ((1 << len) - 1));
  725. put_bits(pb, len * 2 - 3, v_codes);
  726. }
  727. if (p_vectors[curidx*2+1] == 64.0f) {
  728. int len = av_log2(c2);
  729. v_codes = (((1 << (len - 3)) - 2) << len) | (c2 & ((1 << len) - 1));
  730. put_bits(pb, len*2-3, v_codes);
  731. }
  732. v_codes = (p_codes[curidx2] << count2) | sign2;
  733. v_bits = p_bits[curidx2] + count2;
  734. put_bits(pb, v_bits, v_codes);
  735. if (p_vectors[curidx2*2 ] == 64.0f) {
  736. int len = av_log2(c3);
  737. v_codes = (((1 << (len - 3)) - 2) << len) | (c3 & ((1 << len) - 1));
  738. put_bits(pb, len* 2 - 3, v_codes);
  739. }
  740. if (p_vectors[curidx2*2+1] == 64.0f) {
  741. int len = av_log2(c4);
  742. v_codes = (((1 << (len - 3)) - 2) << len) | (c4 & ((1 << len) - 1));
  743. put_bits(pb, len * 2 - 3, v_codes);
  744. }
  745. }
  746. }
  747. }
  748. static void (*const quantize_and_encode_band_cost_arr[])(struct AACEncContext *s,
  749. PutBitContext *pb, const float *in, float *out,
  750. const float *scaled, int size, int scale_idx,
  751. int cb, const float lambda, const float uplim,
  752. int *bits, const float ROUNDING) = {
  753. NULL,
  754. quantize_and_encode_band_cost_SQUAD_mips,
  755. quantize_and_encode_band_cost_SQUAD_mips,
  756. quantize_and_encode_band_cost_UQUAD_mips,
  757. quantize_and_encode_band_cost_UQUAD_mips,
  758. quantize_and_encode_band_cost_SPAIR_mips,
  759. quantize_and_encode_band_cost_SPAIR_mips,
  760. quantize_and_encode_band_cost_UPAIR7_mips,
  761. quantize_and_encode_band_cost_UPAIR7_mips,
  762. quantize_and_encode_band_cost_UPAIR12_mips,
  763. quantize_and_encode_band_cost_UPAIR12_mips,
  764. quantize_and_encode_band_cost_ESC_mips,
  765. };
  766. #define quantize_and_encode_band_cost( \
  767. s, pb, in, out, scaled, size, scale_idx, cb, \
  768. lambda, uplim, bits, ROUNDING) \
  769. quantize_and_encode_band_cost_arr[cb]( \
  770. s, pb, in, out, scaled, size, scale_idx, cb, \
  771. lambda, uplim, bits, ROUNDING)
  772. static void quantize_and_encode_band_mips(struct AACEncContext *s, PutBitContext *pb,
  773. const float *in, float *out, int size, int scale_idx,
  774. int cb, const float lambda, int rtz)
  775. {
  776. quantize_and_encode_band_cost(s, pb, in, out, NULL, size, scale_idx, cb, lambda,
  777. INFINITY, NULL, (rtz) ? ROUND_TO_ZERO : ROUND_STANDARD);
  778. }
  779. /**
  780. * Functions developed from template function and optimized for getting the number of bits
  781. */
  782. static float get_band_numbits_ZERO_mips(struct AACEncContext *s,
  783. PutBitContext *pb, const float *in,
  784. const float *scaled, int size, int scale_idx,
  785. int cb, const float lambda, const float uplim,
  786. int *bits)
  787. {
  788. return 0;
  789. }
  790. static float get_band_numbits_SQUAD_mips(struct AACEncContext *s,
  791. PutBitContext *pb, const float *in,
  792. const float *scaled, int size, int scale_idx,
  793. int cb, const float lambda, const float uplim,
  794. int *bits)
  795. {
  796. const float Q34 = ff_aac_pow34sf_tab[POW_SF2_ZERO - scale_idx + SCALE_ONE_POS - SCALE_DIV_512];
  797. int i;
  798. int qc1, qc2, qc3, qc4;
  799. int curbits = 0;
  800. uint8_t *p_bits = (uint8_t *)ff_aac_spectral_bits[cb-1];
  801. for (i = 0; i < size; i += 4) {
  802. int curidx;
  803. int *in_int = (int *)&in[i];
  804. int t0, t1, t2, t3, t4, t5, t6, t7;
  805. qc1 = scaled[i ] * Q34 + ROUND_STANDARD;
  806. qc2 = scaled[i+1] * Q34 + ROUND_STANDARD;
  807. qc3 = scaled[i+2] * Q34 + ROUND_STANDARD;
  808. qc4 = scaled[i+3] * Q34 + ROUND_STANDARD;
  809. __asm__ volatile (
  810. ".set push \n\t"
  811. ".set noreorder \n\t"
  812. "slt %[qc1], $zero, %[qc1] \n\t"
  813. "slt %[qc2], $zero, %[qc2] \n\t"
  814. "slt %[qc3], $zero, %[qc3] \n\t"
  815. "slt %[qc4], $zero, %[qc4] \n\t"
  816. "lw %[t0], 0(%[in_int]) \n\t"
  817. "lw %[t1], 4(%[in_int]) \n\t"
  818. "lw %[t2], 8(%[in_int]) \n\t"
  819. "lw %[t3], 12(%[in_int]) \n\t"
  820. "srl %[t0], %[t0], 31 \n\t"
  821. "srl %[t1], %[t1], 31 \n\t"
  822. "srl %[t2], %[t2], 31 \n\t"
  823. "srl %[t3], %[t3], 31 \n\t"
  824. "subu %[t4], $zero, %[qc1] \n\t"
  825. "subu %[t5], $zero, %[qc2] \n\t"
  826. "subu %[t6], $zero, %[qc3] \n\t"
  827. "subu %[t7], $zero, %[qc4] \n\t"
  828. "movn %[qc1], %[t4], %[t0] \n\t"
  829. "movn %[qc2], %[t5], %[t1] \n\t"
  830. "movn %[qc3], %[t6], %[t2] \n\t"
  831. "movn %[qc4], %[t7], %[t3] \n\t"
  832. ".set pop \n\t"
  833. : [qc1]"+r"(qc1), [qc2]"+r"(qc2),
  834. [qc3]"+r"(qc3), [qc4]"+r"(qc4),
  835. [t0]"=&r"(t0), [t1]"=&r"(t1), [t2]"=&r"(t2), [t3]"=&r"(t3),
  836. [t4]"=&r"(t4), [t5]"=&r"(t5), [t6]"=&r"(t6), [t7]"=&r"(t7)
  837. : [in_int]"r"(in_int)
  838. : "memory"
  839. );
  840. curidx = qc1;
  841. curidx *= 3;
  842. curidx += qc2;
  843. curidx *= 3;
  844. curidx += qc3;
  845. curidx *= 3;
  846. curidx += qc4;
  847. curidx += 40;
  848. curbits += p_bits[curidx];
  849. }
  850. return curbits;
  851. }
  852. static float get_band_numbits_UQUAD_mips(struct AACEncContext *s,
  853. PutBitContext *pb, const float *in,
  854. const float *scaled, int size, int scale_idx,
  855. int cb, const float lambda, const float uplim,
  856. int *bits)
  857. {
  858. const float Q34 = ff_aac_pow34sf_tab[POW_SF2_ZERO - scale_idx + SCALE_ONE_POS - SCALE_DIV_512];
  859. int i;
  860. int curbits = 0;
  861. int qc1, qc2, qc3, qc4;
  862. uint8_t *p_bits = (uint8_t *)ff_aac_spectral_bits[cb-1];
  863. for (i = 0; i < size; i += 4) {
  864. int curidx;
  865. int t0, t1, t2, t3, t4;
  866. qc1 = scaled[i ] * Q34 + ROUND_STANDARD;
  867. qc2 = scaled[i+1] * Q34 + ROUND_STANDARD;
  868. qc3 = scaled[i+2] * Q34 + ROUND_STANDARD;
  869. qc4 = scaled[i+3] * Q34 + ROUND_STANDARD;
  870. __asm__ volatile (
  871. ".set push \n\t"
  872. ".set noreorder \n\t"
  873. "ori %[t4], $zero, 2 \n\t"
  874. "slt %[t0], %[t4], %[qc1] \n\t"
  875. "slt %[t1], %[t4], %[qc2] \n\t"
  876. "slt %[t2], %[t4], %[qc3] \n\t"
  877. "slt %[t3], %[t4], %[qc4] \n\t"
  878. "movn %[qc1], %[t4], %[t0] \n\t"
  879. "movn %[qc2], %[t4], %[t1] \n\t"
  880. "movn %[qc3], %[t4], %[t2] \n\t"
  881. "movn %[qc4], %[t4], %[t3] \n\t"
  882. ".set pop \n\t"
  883. : [qc1]"+r"(qc1), [qc2]"+r"(qc2),
  884. [qc3]"+r"(qc3), [qc4]"+r"(qc4),
  885. [t0]"=&r"(t0), [t1]"=&r"(t1), [t2]"=&r"(t2), [t3]"=&r"(t3),
  886. [t4]"=&r"(t4)
  887. );
  888. curidx = qc1;
  889. curidx *= 3;
  890. curidx += qc2;
  891. curidx *= 3;
  892. curidx += qc3;
  893. curidx *= 3;
  894. curidx += qc4;
  895. curbits += p_bits[curidx];
  896. curbits += uquad_sign_bits[curidx];
  897. }
  898. return curbits;
  899. }
  900. static float get_band_numbits_SPAIR_mips(struct AACEncContext *s,
  901. PutBitContext *pb, const float *in,
  902. const float *scaled, int size, int scale_idx,
  903. int cb, const float lambda, const float uplim,
  904. int *bits)
  905. {
  906. const float Q34 = ff_aac_pow34sf_tab[POW_SF2_ZERO - scale_idx + SCALE_ONE_POS - SCALE_DIV_512];
  907. int i;
  908. int qc1, qc2, qc3, qc4;
  909. int curbits = 0;
  910. uint8_t *p_bits = (uint8_t*)ff_aac_spectral_bits[cb-1];
  911. for (i = 0; i < size; i += 4) {
  912. int curidx, curidx2;
  913. int *in_int = (int *)&in[i];
  914. int t0, t1, t2, t3, t4, t5, t6, t7;
  915. qc1 = scaled[i ] * Q34 + ROUND_STANDARD;
  916. qc2 = scaled[i+1] * Q34 + ROUND_STANDARD;
  917. qc3 = scaled[i+2] * Q34 + ROUND_STANDARD;
  918. qc4 = scaled[i+3] * Q34 + ROUND_STANDARD;
  919. __asm__ volatile (
  920. ".set push \n\t"
  921. ".set noreorder \n\t"
  922. "ori %[t4], $zero, 4 \n\t"
  923. "slt %[t0], %[t4], %[qc1] \n\t"
  924. "slt %[t1], %[t4], %[qc2] \n\t"
  925. "slt %[t2], %[t4], %[qc3] \n\t"
  926. "slt %[t3], %[t4], %[qc4] \n\t"
  927. "movn %[qc1], %[t4], %[t0] \n\t"
  928. "movn %[qc2], %[t4], %[t1] \n\t"
  929. "movn %[qc3], %[t4], %[t2] \n\t"
  930. "movn %[qc4], %[t4], %[t3] \n\t"
  931. "lw %[t0], 0(%[in_int]) \n\t"
  932. "lw %[t1], 4(%[in_int]) \n\t"
  933. "lw %[t2], 8(%[in_int]) \n\t"
  934. "lw %[t3], 12(%[in_int]) \n\t"
  935. "srl %[t0], %[t0], 31 \n\t"
  936. "srl %[t1], %[t1], 31 \n\t"
  937. "srl %[t2], %[t2], 31 \n\t"
  938. "srl %[t3], %[t3], 31 \n\t"
  939. "subu %[t4], $zero, %[qc1] \n\t"
  940. "subu %[t5], $zero, %[qc2] \n\t"
  941. "subu %[t6], $zero, %[qc3] \n\t"
  942. "subu %[t7], $zero, %[qc4] \n\t"
  943. "movn %[qc1], %[t4], %[t0] \n\t"
  944. "movn %[qc2], %[t5], %[t1] \n\t"
  945. "movn %[qc3], %[t6], %[t2] \n\t"
  946. "movn %[qc4], %[t7], %[t3] \n\t"
  947. ".set pop \n\t"
  948. : [qc1]"+r"(qc1), [qc2]"+r"(qc2),
  949. [qc3]"+r"(qc3), [qc4]"+r"(qc4),
  950. [t0]"=&r"(t0), [t1]"=&r"(t1), [t2]"=&r"(t2), [t3]"=&r"(t3),
  951. [t4]"=&r"(t4), [t5]"=&r"(t5), [t6]"=&r"(t6), [t7]"=&r"(t7)
  952. : [in_int]"r"(in_int)
  953. : "memory"
  954. );
  955. curidx = 9 * qc1;
  956. curidx += qc2 + 40;
  957. curidx2 = 9 * qc3;
  958. curidx2 += qc4 + 40;
  959. curbits += p_bits[curidx] + p_bits[curidx2];
  960. }
  961. return curbits;
  962. }
  963. static float get_band_numbits_UPAIR7_mips(struct AACEncContext *s,
  964. PutBitContext *pb, const float *in,
  965. const float *scaled, int size, int scale_idx,
  966. int cb, const float lambda, const float uplim,
  967. int *bits)
  968. {
  969. const float Q34 = ff_aac_pow34sf_tab[POW_SF2_ZERO - scale_idx + SCALE_ONE_POS - SCALE_DIV_512];
  970. int i;
  971. int qc1, qc2, qc3, qc4;
  972. int curbits = 0;
  973. uint8_t *p_bits = (uint8_t *)ff_aac_spectral_bits[cb-1];
  974. for (i = 0; i < size; i += 4) {
  975. int curidx, curidx2;
  976. int t0, t1, t2, t3, t4;
  977. qc1 = scaled[i ] * Q34 + ROUND_STANDARD;
  978. qc2 = scaled[i+1] * Q34 + ROUND_STANDARD;
  979. qc3 = scaled[i+2] * Q34 + ROUND_STANDARD;
  980. qc4 = scaled[i+3] * Q34 + ROUND_STANDARD;
  981. __asm__ volatile (
  982. ".set push \n\t"
  983. ".set noreorder \n\t"
  984. "ori %[t4], $zero, 7 \n\t"
  985. "slt %[t0], %[t4], %[qc1] \n\t"
  986. "slt %[t1], %[t4], %[qc2] \n\t"
  987. "slt %[t2], %[t4], %[qc3] \n\t"
  988. "slt %[t3], %[t4], %[qc4] \n\t"
  989. "movn %[qc1], %[t4], %[t0] \n\t"
  990. "movn %[qc2], %[t4], %[t1] \n\t"
  991. "movn %[qc3], %[t4], %[t2] \n\t"
  992. "movn %[qc4], %[t4], %[t3] \n\t"
  993. ".set pop \n\t"
  994. : [qc1]"+r"(qc1), [qc2]"+r"(qc2),
  995. [qc3]"+r"(qc3), [qc4]"+r"(qc4),
  996. [t0]"=&r"(t0), [t1]"=&r"(t1), [t2]"=&r"(t2), [t3]"=&r"(t3),
  997. [t4]"=&r"(t4)
  998. );
  999. curidx = 8 * qc1;
  1000. curidx += qc2;
  1001. curidx2 = 8 * qc3;
  1002. curidx2 += qc4;
  1003. curbits += p_bits[curidx] +
  1004. upair7_sign_bits[curidx] +
  1005. p_bits[curidx2] +
  1006. upair7_sign_bits[curidx2];
  1007. }
  1008. return curbits;
  1009. }
  1010. static float get_band_numbits_UPAIR12_mips(struct AACEncContext *s,
  1011. PutBitContext *pb, const float *in,
  1012. const float *scaled, int size, int scale_idx,
  1013. int cb, const float lambda, const float uplim,
  1014. int *bits)
  1015. {
  1016. const float Q34 = ff_aac_pow34sf_tab[POW_SF2_ZERO - scale_idx + SCALE_ONE_POS - SCALE_DIV_512];
  1017. int i;
  1018. int qc1, qc2, qc3, qc4;
  1019. int curbits = 0;
  1020. uint8_t *p_bits = (uint8_t *)ff_aac_spectral_bits[cb-1];
  1021. for (i = 0; i < size; i += 4) {
  1022. int curidx, curidx2;
  1023. int t0, t1, t2, t3, t4;
  1024. qc1 = scaled[i ] * Q34 + ROUND_STANDARD;
  1025. qc2 = scaled[i+1] * Q34 + ROUND_STANDARD;
  1026. qc3 = scaled[i+2] * Q34 + ROUND_STANDARD;
  1027. qc4 = scaled[i+3] * Q34 + ROUND_STANDARD;
  1028. __asm__ volatile (
  1029. ".set push \n\t"
  1030. ".set noreorder \n\t"
  1031. "ori %[t4], $zero, 12 \n\t"
  1032. "slt %[t0], %[t4], %[qc1] \n\t"
  1033. "slt %[t1], %[t4], %[qc2] \n\t"
  1034. "slt %[t2], %[t4], %[qc3] \n\t"
  1035. "slt %[t3], %[t4], %[qc4] \n\t"
  1036. "movn %[qc1], %[t4], %[t0] \n\t"
  1037. "movn %[qc2], %[t4], %[t1] \n\t"
  1038. "movn %[qc3], %[t4], %[t2] \n\t"
  1039. "movn %[qc4], %[t4], %[t3] \n\t"
  1040. ".set pop \n\t"
  1041. : [qc1]"+r"(qc1), [qc2]"+r"(qc2),
  1042. [qc3]"+r"(qc3), [qc4]"+r"(qc4),
  1043. [t0]"=&r"(t0), [t1]"=&r"(t1), [t2]"=&r"(t2), [t3]"=&r"(t3),
  1044. [t4]"=&r"(t4)
  1045. );
  1046. curidx = 13 * qc1;
  1047. curidx += qc2;
  1048. curidx2 = 13 * qc3;
  1049. curidx2 += qc4;
  1050. curbits += p_bits[curidx] +
  1051. p_bits[curidx2] +
  1052. upair12_sign_bits[curidx] +
  1053. upair12_sign_bits[curidx2];
  1054. }
  1055. return curbits;
  1056. }
  1057. static float get_band_numbits_ESC_mips(struct AACEncContext *s,
  1058. PutBitContext *pb, const float *in,
  1059. const float *scaled, int size, int scale_idx,
  1060. int cb, const float lambda, const float uplim,
  1061. int *bits)
  1062. {
  1063. const float Q34 = ff_aac_pow34sf_tab[POW_SF2_ZERO - scale_idx + SCALE_ONE_POS - SCALE_DIV_512];
  1064. int i;
  1065. int qc1, qc2, qc3, qc4;
  1066. int curbits = 0;
  1067. uint8_t *p_bits = (uint8_t*)ff_aac_spectral_bits[cb-1];
  1068. for (i = 0; i < size; i += 4) {
  1069. int curidx, curidx2;
  1070. int cond0, cond1, cond2, cond3;
  1071. int c1, c2, c3, c4;
  1072. int t4, t5;
  1073. qc1 = scaled[i ] * Q34 + ROUND_STANDARD;
  1074. qc2 = scaled[i+1] * Q34 + ROUND_STANDARD;
  1075. qc3 = scaled[i+2] * Q34 + ROUND_STANDARD;
  1076. qc4 = scaled[i+3] * Q34 + ROUND_STANDARD;
  1077. __asm__ volatile (
  1078. ".set push \n\t"
  1079. ".set noreorder \n\t"
  1080. "ori %[t4], $zero, 15 \n\t"
  1081. "ori %[t5], $zero, 16 \n\t"
  1082. "shll_s.w %[c1], %[qc1], 18 \n\t"
  1083. "shll_s.w %[c2], %[qc2], 18 \n\t"
  1084. "shll_s.w %[c3], %[qc3], 18 \n\t"
  1085. "shll_s.w %[c4], %[qc4], 18 \n\t"
  1086. "srl %[c1], %[c1], 18 \n\t"
  1087. "srl %[c2], %[c2], 18 \n\t"
  1088. "srl %[c3], %[c3], 18 \n\t"
  1089. "srl %[c4], %[c4], 18 \n\t"
  1090. "slt %[cond0], %[t4], %[qc1] \n\t"
  1091. "slt %[cond1], %[t4], %[qc2] \n\t"
  1092. "slt %[cond2], %[t4], %[qc3] \n\t"
  1093. "slt %[cond3], %[t4], %[qc4] \n\t"
  1094. "movn %[qc1], %[t5], %[cond0] \n\t"
  1095. "movn %[qc2], %[t5], %[cond1] \n\t"
  1096. "movn %[qc3], %[t5], %[cond2] \n\t"
  1097. "movn %[qc4], %[t5], %[cond3] \n\t"
  1098. "ori %[t5], $zero, 31 \n\t"
  1099. "clz %[c1], %[c1] \n\t"
  1100. "clz %[c2], %[c2] \n\t"
  1101. "clz %[c3], %[c3] \n\t"
  1102. "clz %[c4], %[c4] \n\t"
  1103. "subu %[c1], %[t5], %[c1] \n\t"
  1104. "subu %[c2], %[t5], %[c2] \n\t"
  1105. "subu %[c3], %[t5], %[c3] \n\t"
  1106. "subu %[c4], %[t5], %[c4] \n\t"
  1107. "sll %[c1], %[c1], 1 \n\t"
  1108. "sll %[c2], %[c2], 1 \n\t"
  1109. "sll %[c3], %[c3], 1 \n\t"
  1110. "sll %[c4], %[c4], 1 \n\t"
  1111. "addiu %[c1], %[c1], -3 \n\t"
  1112. "addiu %[c2], %[c2], -3 \n\t"
  1113. "addiu %[c3], %[c3], -3 \n\t"
  1114. "addiu %[c4], %[c4], -3 \n\t"
  1115. "subu %[cond0], $zero, %[cond0] \n\t"
  1116. "subu %[cond1], $zero, %[cond1] \n\t"
  1117. "subu %[cond2], $zero, %[cond2] \n\t"
  1118. "subu %[cond3], $zero, %[cond3] \n\t"
  1119. "and %[c1], %[c1], %[cond0] \n\t"
  1120. "and %[c2], %[c2], %[cond1] \n\t"
  1121. "and %[c3], %[c3], %[cond2] \n\t"
  1122. "and %[c4], %[c4], %[cond3] \n\t"
  1123. ".set pop \n\t"
  1124. : [qc1]"+r"(qc1), [qc2]"+r"(qc2),
  1125. [qc3]"+r"(qc3), [qc4]"+r"(qc4),
  1126. [cond0]"=&r"(cond0), [cond1]"=&r"(cond1),
  1127. [cond2]"=&r"(cond2), [cond3]"=&r"(cond3),
  1128. [c1]"=&r"(c1), [c2]"=&r"(c2),
  1129. [c3]"=&r"(c3), [c4]"=&r"(c4),
  1130. [t4]"=&r"(t4), [t5]"=&r"(t5)
  1131. );
  1132. curidx = 17 * qc1;
  1133. curidx += qc2;
  1134. curidx2 = 17 * qc3;
  1135. curidx2 += qc4;
  1136. curbits += p_bits[curidx];
  1137. curbits += esc_sign_bits[curidx];
  1138. curbits += p_bits[curidx2];
  1139. curbits += esc_sign_bits[curidx2];
  1140. curbits += c1;
  1141. curbits += c2;
  1142. curbits += c3;
  1143. curbits += c4;
  1144. }
  1145. return curbits;
  1146. }
  1147. static float (*const get_band_numbits_arr[])(struct AACEncContext *s,
  1148. PutBitContext *pb, const float *in,
  1149. const float *scaled, int size, int scale_idx,
  1150. int cb, const float lambda, const float uplim,
  1151. int *bits) = {
  1152. get_band_numbits_ZERO_mips,
  1153. get_band_numbits_SQUAD_mips,
  1154. get_band_numbits_SQUAD_mips,
  1155. get_band_numbits_UQUAD_mips,
  1156. get_band_numbits_UQUAD_mips,
  1157. get_band_numbits_SPAIR_mips,
  1158. get_band_numbits_SPAIR_mips,
  1159. get_band_numbits_UPAIR7_mips,
  1160. get_band_numbits_UPAIR7_mips,
  1161. get_band_numbits_UPAIR12_mips,
  1162. get_band_numbits_UPAIR12_mips,
  1163. get_band_numbits_ESC_mips,
  1164. };
  1165. #define get_band_numbits( \
  1166. s, pb, in, scaled, size, scale_idx, cb, \
  1167. lambda, uplim, bits) \
  1168. get_band_numbits_arr[cb]( \
  1169. s, pb, in, scaled, size, scale_idx, cb, \
  1170. lambda, uplim, bits)
  1171. static float quantize_band_cost_bits(struct AACEncContext *s, const float *in,
  1172. const float *scaled, int size, int scale_idx,
  1173. int cb, const float lambda, const float uplim,
  1174. int *bits)
  1175. {
  1176. return get_band_numbits(s, NULL, in, scaled, size, scale_idx, cb, lambda, uplim, bits);
  1177. }
  1178. /**
  1179. * Functions developed from template function and optimized for getting the band cost
  1180. */
  1181. #if HAVE_MIPSFPU
  1182. static float get_band_cost_ZERO_mips(struct AACEncContext *s,
  1183. PutBitContext *pb, const float *in,
  1184. const float *scaled, int size, int scale_idx,
  1185. int cb, const float lambda, const float uplim,
  1186. int *bits)
  1187. {
  1188. int i;
  1189. float cost = 0;
  1190. for (i = 0; i < size; i += 4) {
  1191. cost += in[i ] * in[i ];
  1192. cost += in[i+1] * in[i+1];
  1193. cost += in[i+2] * in[i+2];
  1194. cost += in[i+3] * in[i+3];
  1195. }
  1196. if (bits)
  1197. *bits = 0;
  1198. return cost * lambda;
  1199. }
  1200. static float get_band_cost_SQUAD_mips(struct AACEncContext *s,
  1201. PutBitContext *pb, const float *in,
  1202. const float *scaled, int size, int scale_idx,
  1203. int cb, const float lambda, const float uplim,
  1204. int *bits)
  1205. {
  1206. const float Q34 = ff_aac_pow34sf_tab[POW_SF2_ZERO - scale_idx + SCALE_ONE_POS - SCALE_DIV_512];
  1207. const float IQ = ff_aac_pow2sf_tab [POW_SF2_ZERO + scale_idx - SCALE_ONE_POS + SCALE_DIV_512];
  1208. int i;
  1209. float cost = 0;
  1210. int qc1, qc2, qc3, qc4;
  1211. int curbits = 0;
  1212. uint8_t *p_bits = (uint8_t *)ff_aac_spectral_bits[cb-1];
  1213. float *p_codes = (float *)ff_aac_codebook_vectors[cb-1];
  1214. for (i = 0; i < size; i += 4) {
  1215. const float *vec;
  1216. int curidx;
  1217. int *in_int = (int *)&in[i];
  1218. float *in_pos = (float *)&in[i];
  1219. float di0, di1, di2, di3;
  1220. int t0, t1, t2, t3, t4, t5, t6, t7;
  1221. qc1 = scaled[i ] * Q34 + ROUND_STANDARD;
  1222. qc2 = scaled[i+1] * Q34 + ROUND_STANDARD;
  1223. qc3 = scaled[i+2] * Q34 + ROUND_STANDARD;
  1224. qc4 = scaled[i+3] * Q34 + ROUND_STANDARD;
  1225. __asm__ volatile (
  1226. ".set push \n\t"
  1227. ".set noreorder \n\t"
  1228. "slt %[qc1], $zero, %[qc1] \n\t"
  1229. "slt %[qc2], $zero, %[qc2] \n\t"
  1230. "slt %[qc3], $zero, %[qc3] \n\t"
  1231. "slt %[qc4], $zero, %[qc4] \n\t"
  1232. "lw %[t0], 0(%[in_int]) \n\t"
  1233. "lw %[t1], 4(%[in_int]) \n\t"
  1234. "lw %[t2], 8(%[in_int]) \n\t"
  1235. "lw %[t3], 12(%[in_int]) \n\t"
  1236. "srl %[t0], %[t0], 31 \n\t"
  1237. "srl %[t1], %[t1], 31 \n\t"
  1238. "srl %[t2], %[t2], 31 \n\t"
  1239. "srl %[t3], %[t3], 31 \n\t"
  1240. "subu %[t4], $zero, %[qc1] \n\t"
  1241. "subu %[t5], $zero, %[qc2] \n\t"
  1242. "subu %[t6], $zero, %[qc3] \n\t"
  1243. "subu %[t7], $zero, %[qc4] \n\t"
  1244. "movn %[qc1], %[t4], %[t0] \n\t"
  1245. "movn %[qc2], %[t5], %[t1] \n\t"
  1246. "movn %[qc3], %[t6], %[t2] \n\t"
  1247. "movn %[qc4], %[t7], %[t3] \n\t"
  1248. ".set pop \n\t"
  1249. : [qc1]"+r"(qc1), [qc2]"+r"(qc2),
  1250. [qc3]"+r"(qc3), [qc4]"+r"(qc4),
  1251. [t0]"=&r"(t0), [t1]"=&r"(t1), [t2]"=&r"(t2), [t3]"=&r"(t3),
  1252. [t4]"=&r"(t4), [t5]"=&r"(t5), [t6]"=&r"(t6), [t7]"=&r"(t7)
  1253. : [in_int]"r"(in_int)
  1254. : "memory"
  1255. );
  1256. curidx = qc1;
  1257. curidx *= 3;
  1258. curidx += qc2;
  1259. curidx *= 3;
  1260. curidx += qc3;
  1261. curidx *= 3;
  1262. curidx += qc4;
  1263. curidx += 40;
  1264. curbits += p_bits[curidx];
  1265. vec = &p_codes[curidx*4];
  1266. __asm__ volatile (
  1267. ".set push \n\t"
  1268. ".set noreorder \n\t"
  1269. "lwc1 $f0, 0(%[in_pos]) \n\t"
  1270. "lwc1 $f1, 0(%[vec]) \n\t"
  1271. "lwc1 $f2, 4(%[in_pos]) \n\t"
  1272. "lwc1 $f3, 4(%[vec]) \n\t"
  1273. "lwc1 $f4, 8(%[in_pos]) \n\t"
  1274. "lwc1 $f5, 8(%[vec]) \n\t"
  1275. "lwc1 $f6, 12(%[in_pos]) \n\t"
  1276. "lwc1 $f7, 12(%[vec]) \n\t"
  1277. "nmsub.s %[di0], $f0, $f1, %[IQ] \n\t"
  1278. "nmsub.s %[di1], $f2, $f3, %[IQ] \n\t"
  1279. "nmsub.s %[di2], $f4, $f5, %[IQ] \n\t"
  1280. "nmsub.s %[di3], $f6, $f7, %[IQ] \n\t"
  1281. ".set pop \n\t"
  1282. : [di0]"=&f"(di0), [di1]"=&f"(di1),
  1283. [di2]"=&f"(di2), [di3]"=&f"(di3)
  1284. : [in_pos]"r"(in_pos), [vec]"r"(vec),
  1285. [IQ]"f"(IQ)
  1286. : "$f0", "$f1", "$f2", "$f3",
  1287. "$f4", "$f5", "$f6", "$f7",
  1288. "memory"
  1289. );
  1290. cost += di0 * di0 + di1 * di1
  1291. + di2 * di2 + di3 * di3;
  1292. }
  1293. if (bits)
  1294. *bits = curbits;
  1295. return cost * lambda + curbits;
  1296. }
  1297. static float get_band_cost_UQUAD_mips(struct AACEncContext *s,
  1298. PutBitContext *pb, const float *in,
  1299. const float *scaled, int size, int scale_idx,
  1300. int cb, const float lambda, const float uplim,
  1301. int *bits)
  1302. {
  1303. const float Q34 = ff_aac_pow34sf_tab[POW_SF2_ZERO - scale_idx + SCALE_ONE_POS - SCALE_DIV_512];
  1304. const float IQ = ff_aac_pow2sf_tab [POW_SF2_ZERO + scale_idx - SCALE_ONE_POS + SCALE_DIV_512];
  1305. int i;
  1306. float cost = 0;
  1307. int curbits = 0;
  1308. int qc1, qc2, qc3, qc4;
  1309. uint8_t *p_bits = (uint8_t*)ff_aac_spectral_bits[cb-1];
  1310. float *p_codes = (float *)ff_aac_codebook_vectors[cb-1];
  1311. for (i = 0; i < size; i += 4) {
  1312. const float *vec;
  1313. int curidx;
  1314. float *in_pos = (float *)&in[i];
  1315. float di0, di1, di2, di3;
  1316. int t0, t1, t2, t3, t4;
  1317. qc1 = scaled[i ] * Q34 + ROUND_STANDARD;
  1318. qc2 = scaled[i+1] * Q34 + ROUND_STANDARD;
  1319. qc3 = scaled[i+2] * Q34 + ROUND_STANDARD;
  1320. qc4 = scaled[i+3] * Q34 + ROUND_STANDARD;
  1321. __asm__ volatile (
  1322. ".set push \n\t"
  1323. ".set noreorder \n\t"
  1324. "ori %[t4], $zero, 2 \n\t"
  1325. "slt %[t0], %[t4], %[qc1] \n\t"
  1326. "slt %[t1], %[t4], %[qc2] \n\t"
  1327. "slt %[t2], %[t4], %[qc3] \n\t"
  1328. "slt %[t3], %[t4], %[qc4] \n\t"
  1329. "movn %[qc1], %[t4], %[t0] \n\t"
  1330. "movn %[qc2], %[t4], %[t1] \n\t"
  1331. "movn %[qc3], %[t4], %[t2] \n\t"
  1332. "movn %[qc4], %[t4], %[t3] \n\t"
  1333. ".set pop \n\t"
  1334. : [qc1]"+r"(qc1), [qc2]"+r"(qc2),
  1335. [qc3]"+r"(qc3), [qc4]"+r"(qc4),
  1336. [t0]"=&r"(t0), [t1]"=&r"(t1), [t2]"=&r"(t2), [t3]"=&r"(t3),
  1337. [t4]"=&r"(t4)
  1338. );
  1339. curidx = qc1;
  1340. curidx *= 3;
  1341. curidx += qc2;
  1342. curidx *= 3;
  1343. curidx += qc3;
  1344. curidx *= 3;
  1345. curidx += qc4;
  1346. curbits += p_bits[curidx];
  1347. curbits += uquad_sign_bits[curidx];
  1348. vec = &p_codes[curidx*4];
  1349. __asm__ volatile (
  1350. ".set push \n\t"
  1351. ".set noreorder \n\t"
  1352. "lwc1 %[di0], 0(%[in_pos]) \n\t"
  1353. "lwc1 %[di1], 4(%[in_pos]) \n\t"
  1354. "lwc1 %[di2], 8(%[in_pos]) \n\t"
  1355. "lwc1 %[di3], 12(%[in_pos]) \n\t"
  1356. "abs.s %[di0], %[di0] \n\t"
  1357. "abs.s %[di1], %[di1] \n\t"
  1358. "abs.s %[di2], %[di2] \n\t"
  1359. "abs.s %[di3], %[di3] \n\t"
  1360. "lwc1 $f0, 0(%[vec]) \n\t"
  1361. "lwc1 $f1, 4(%[vec]) \n\t"
  1362. "lwc1 $f2, 8(%[vec]) \n\t"
  1363. "lwc1 $f3, 12(%[vec]) \n\t"
  1364. "nmsub.s %[di0], %[di0], $f0, %[IQ] \n\t"
  1365. "nmsub.s %[di1], %[di1], $f1, %[IQ] \n\t"
  1366. "nmsub.s %[di2], %[di2], $f2, %[IQ] \n\t"
  1367. "nmsub.s %[di3], %[di3], $f3, %[IQ] \n\t"
  1368. ".set pop \n\t"
  1369. : [di0]"=&f"(di0), [di1]"=&f"(di1),
  1370. [di2]"=&f"(di2), [di3]"=&f"(di3)
  1371. : [in_pos]"r"(in_pos), [vec]"r"(vec),
  1372. [IQ]"f"(IQ)
  1373. : "$f0", "$f1", "$f2", "$f3",
  1374. "memory"
  1375. );
  1376. cost += di0 * di0 + di1 * di1
  1377. + di2 * di2 + di3 * di3;
  1378. }
  1379. if (bits)
  1380. *bits = curbits;
  1381. return cost * lambda + curbits;
  1382. }
  1383. static float get_band_cost_SPAIR_mips(struct AACEncContext *s,
  1384. PutBitContext *pb, const float *in,
  1385. const float *scaled, int size, int scale_idx,
  1386. int cb, const float lambda, const float uplim,
  1387. int *bits)
  1388. {
  1389. const float Q34 = ff_aac_pow34sf_tab[POW_SF2_ZERO - scale_idx + SCALE_ONE_POS - SCALE_DIV_512];
  1390. const float IQ = ff_aac_pow2sf_tab [POW_SF2_ZERO + scale_idx - SCALE_ONE_POS + SCALE_DIV_512];
  1391. int i;
  1392. float cost = 0;
  1393. int qc1, qc2, qc3, qc4;
  1394. int curbits = 0;
  1395. uint8_t *p_bits = (uint8_t *)ff_aac_spectral_bits[cb-1];
  1396. float *p_codes = (float *)ff_aac_codebook_vectors[cb-1];
  1397. for (i = 0; i < size; i += 4) {
  1398. const float *vec, *vec2;
  1399. int curidx, curidx2;
  1400. int *in_int = (int *)&in[i];
  1401. float *in_pos = (float *)&in[i];
  1402. float di0, di1, di2, di3;
  1403. int t0, t1, t2, t3, t4, t5, t6, t7;
  1404. qc1 = scaled[i ] * Q34 + ROUND_STANDARD;
  1405. qc2 = scaled[i+1] * Q34 + ROUND_STANDARD;
  1406. qc3 = scaled[i+2] * Q34 + ROUND_STANDARD;
  1407. qc4 = scaled[i+3] * Q34 + ROUND_STANDARD;
  1408. __asm__ volatile (
  1409. ".set push \n\t"
  1410. ".set noreorder \n\t"
  1411. "ori %[t4], $zero, 4 \n\t"
  1412. "slt %[t0], %[t4], %[qc1] \n\t"
  1413. "slt %[t1], %[t4], %[qc2] \n\t"
  1414. "slt %[t2], %[t4], %[qc3] \n\t"
  1415. "slt %[t3], %[t4], %[qc4] \n\t"
  1416. "movn %[qc1], %[t4], %[t0] \n\t"
  1417. "movn %[qc2], %[t4], %[t1] \n\t"
  1418. "movn %[qc3], %[t4], %[t2] \n\t"
  1419. "movn %[qc4], %[t4], %[t3] \n\t"
  1420. "lw %[t0], 0(%[in_int]) \n\t"
  1421. "lw %[t1], 4(%[in_int]) \n\t"
  1422. "lw %[t2], 8(%[in_int]) \n\t"
  1423. "lw %[t3], 12(%[in_int]) \n\t"
  1424. "srl %[t0], %[t0], 31 \n\t"
  1425. "srl %[t1], %[t1], 31 \n\t"
  1426. "srl %[t2], %[t2], 31 \n\t"
  1427. "srl %[t3], %[t3], 31 \n\t"
  1428. "subu %[t4], $zero, %[qc1] \n\t"
  1429. "subu %[t5], $zero, %[qc2] \n\t"
  1430. "subu %[t6], $zero, %[qc3] \n\t"
  1431. "subu %[t7], $zero, %[qc4] \n\t"
  1432. "movn %[qc1], %[t4], %[t0] \n\t"
  1433. "movn %[qc2], %[t5], %[t1] \n\t"
  1434. "movn %[qc3], %[t6], %[t2] \n\t"
  1435. "movn %[qc4], %[t7], %[t3] \n\t"
  1436. ".set pop \n\t"
  1437. : [qc1]"+r"(qc1), [qc2]"+r"(qc2),
  1438. [qc3]"+r"(qc3), [qc4]"+r"(qc4),
  1439. [t0]"=&r"(t0), [t1]"=&r"(t1), [t2]"=&r"(t2), [t3]"=&r"(t3),
  1440. [t4]"=&r"(t4), [t5]"=&r"(t5), [t6]"=&r"(t6), [t7]"=&r"(t7)
  1441. : [in_int]"r"(in_int)
  1442. : "memory"
  1443. );
  1444. curidx = 9 * qc1;
  1445. curidx += qc2 + 40;
  1446. curidx2 = 9 * qc3;
  1447. curidx2 += qc4 + 40;
  1448. curbits += p_bits[curidx];
  1449. curbits += p_bits[curidx2];
  1450. vec = &p_codes[curidx*2];
  1451. vec2 = &p_codes[curidx2*2];
  1452. __asm__ volatile (
  1453. ".set push \n\t"
  1454. ".set noreorder \n\t"
  1455. "lwc1 $f0, 0(%[in_pos]) \n\t"
  1456. "lwc1 $f1, 0(%[vec]) \n\t"
  1457. "lwc1 $f2, 4(%[in_pos]) \n\t"
  1458. "lwc1 $f3, 4(%[vec]) \n\t"
  1459. "lwc1 $f4, 8(%[in_pos]) \n\t"
  1460. "lwc1 $f5, 0(%[vec2]) \n\t"
  1461. "lwc1 $f6, 12(%[in_pos]) \n\t"
  1462. "lwc1 $f7, 4(%[vec2]) \n\t"
  1463. "nmsub.s %[di0], $f0, $f1, %[IQ] \n\t"
  1464. "nmsub.s %[di1], $f2, $f3, %[IQ] \n\t"
  1465. "nmsub.s %[di2], $f4, $f5, %[IQ] \n\t"
  1466. "nmsub.s %[di3], $f6, $f7, %[IQ] \n\t"
  1467. ".set pop \n\t"
  1468. : [di0]"=&f"(di0), [di1]"=&f"(di1),
  1469. [di2]"=&f"(di2), [di3]"=&f"(di3)
  1470. : [in_pos]"r"(in_pos), [vec]"r"(vec),
  1471. [vec2]"r"(vec2), [IQ]"f"(IQ)
  1472. : "$f0", "$f1", "$f2", "$f3",
  1473. "$f4", "$f5", "$f6", "$f7",
  1474. "memory"
  1475. );
  1476. cost += di0 * di0 + di1 * di1
  1477. + di2 * di2 + di3 * di3;
  1478. }
  1479. if (bits)
  1480. *bits = curbits;
  1481. return cost * lambda + curbits;
  1482. }
  1483. static float get_band_cost_UPAIR7_mips(struct AACEncContext *s,
  1484. PutBitContext *pb, const float *in,
  1485. const float *scaled, int size, int scale_idx,
  1486. int cb, const float lambda, const float uplim,
  1487. int *bits)
  1488. {
  1489. const float Q34 = ff_aac_pow34sf_tab[POW_SF2_ZERO - scale_idx + SCALE_ONE_POS - SCALE_DIV_512];
  1490. const float IQ = ff_aac_pow2sf_tab [POW_SF2_ZERO + scale_idx - SCALE_ONE_POS + SCALE_DIV_512];
  1491. int i;
  1492. float cost = 0;
  1493. int qc1, qc2, qc3, qc4;
  1494. int curbits = 0;
  1495. uint8_t *p_bits = (uint8_t *)ff_aac_spectral_bits[cb-1];
  1496. float *p_codes = (float *)ff_aac_codebook_vectors[cb-1];
  1497. for (i = 0; i < size; i += 4) {
  1498. const float *vec, *vec2;
  1499. int curidx, curidx2, sign1, count1, sign2, count2;
  1500. int *in_int = (int *)&in[i];
  1501. float *in_pos = (float *)&in[i];
  1502. float di0, di1, di2, di3;
  1503. int t0, t1, t2, t3, t4;
  1504. qc1 = scaled[i ] * Q34 + ROUND_STANDARD;
  1505. qc2 = scaled[i+1] * Q34 + ROUND_STANDARD;
  1506. qc3 = scaled[i+2] * Q34 + ROUND_STANDARD;
  1507. qc4 = scaled[i+3] * Q34 + ROUND_STANDARD;
  1508. __asm__ volatile (
  1509. ".set push \n\t"
  1510. ".set noreorder \n\t"
  1511. "ori %[t4], $zero, 7 \n\t"
  1512. "ori %[sign1], $zero, 0 \n\t"
  1513. "ori %[sign2], $zero, 0 \n\t"
  1514. "slt %[t0], %[t4], %[qc1] \n\t"
  1515. "slt %[t1], %[t4], %[qc2] \n\t"
  1516. "slt %[t2], %[t4], %[qc3] \n\t"
  1517. "slt %[t3], %[t4], %[qc4] \n\t"
  1518. "movn %[qc1], %[t4], %[t0] \n\t"
  1519. "movn %[qc2], %[t4], %[t1] \n\t"
  1520. "movn %[qc3], %[t4], %[t2] \n\t"
  1521. "movn %[qc4], %[t4], %[t3] \n\t"
  1522. "lw %[t0], 0(%[in_int]) \n\t"
  1523. "lw %[t1], 4(%[in_int]) \n\t"
  1524. "lw %[t2], 8(%[in_int]) \n\t"
  1525. "lw %[t3], 12(%[in_int]) \n\t"
  1526. "slt %[t0], %[t0], $zero \n\t"
  1527. "movn %[sign1], %[t0], %[qc1] \n\t"
  1528. "slt %[t2], %[t2], $zero \n\t"
  1529. "movn %[sign2], %[t2], %[qc3] \n\t"
  1530. "slt %[t1], %[t1], $zero \n\t"
  1531. "sll %[t0], %[sign1], 1 \n\t"
  1532. "or %[t0], %[t0], %[t1] \n\t"
  1533. "movn %[sign1], %[t0], %[qc2] \n\t"
  1534. "slt %[t3], %[t3], $zero \n\t"
  1535. "sll %[t0], %[sign2], 1 \n\t"
  1536. "or %[t0], %[t0], %[t3] \n\t"
  1537. "movn %[sign2], %[t0], %[qc4] \n\t"
  1538. "slt %[count1], $zero, %[qc1] \n\t"
  1539. "slt %[t1], $zero, %[qc2] \n\t"
  1540. "slt %[count2], $zero, %[qc3] \n\t"
  1541. "slt %[t2], $zero, %[qc4] \n\t"
  1542. "addu %[count1], %[count1], %[t1] \n\t"
  1543. "addu %[count2], %[count2], %[t2] \n\t"
  1544. ".set pop \n\t"
  1545. : [qc1]"+r"(qc1), [qc2]"+r"(qc2),
  1546. [qc3]"+r"(qc3), [qc4]"+r"(qc4),
  1547. [sign1]"=&r"(sign1), [count1]"=&r"(count1),
  1548. [sign2]"=&r"(sign2), [count2]"=&r"(count2),
  1549. [t0]"=&r"(t0), [t1]"=&r"(t1), [t2]"=&r"(t2), [t3]"=&r"(t3),
  1550. [t4]"=&r"(t4)
  1551. : [in_int]"r"(in_int)
  1552. : "memory"
  1553. );
  1554. curidx = 8 * qc1;
  1555. curidx += qc2;
  1556. curidx2 = 8 * qc3;
  1557. curidx2 += qc4;
  1558. curbits += p_bits[curidx];
  1559. curbits += upair7_sign_bits[curidx];
  1560. vec = &p_codes[curidx*2];
  1561. curbits += p_bits[curidx2];
  1562. curbits += upair7_sign_bits[curidx2];
  1563. vec2 = &p_codes[curidx2*2];
  1564. __asm__ volatile (
  1565. ".set push \n\t"
  1566. ".set noreorder \n\t"
  1567. "lwc1 %[di0], 0(%[in_pos]) \n\t"
  1568. "lwc1 %[di1], 4(%[in_pos]) \n\t"
  1569. "lwc1 %[di2], 8(%[in_pos]) \n\t"
  1570. "lwc1 %[di3], 12(%[in_pos]) \n\t"
  1571. "abs.s %[di0], %[di0] \n\t"
  1572. "abs.s %[di1], %[di1] \n\t"
  1573. "abs.s %[di2], %[di2] \n\t"
  1574. "abs.s %[di3], %[di3] \n\t"
  1575. "lwc1 $f0, 0(%[vec]) \n\t"
  1576. "lwc1 $f1, 4(%[vec]) \n\t"
  1577. "lwc1 $f2, 0(%[vec2]) \n\t"
  1578. "lwc1 $f3, 4(%[vec2]) \n\t"
  1579. "nmsub.s %[di0], %[di0], $f0, %[IQ] \n\t"
  1580. "nmsub.s %[di1], %[di1], $f1, %[IQ] \n\t"
  1581. "nmsub.s %[di2], %[di2], $f2, %[IQ] \n\t"
  1582. "nmsub.s %[di3], %[di3], $f3, %[IQ] \n\t"
  1583. ".set pop \n\t"
  1584. : [di0]"=&f"(di0), [di1]"=&f"(di1),
  1585. [di2]"=&f"(di2), [di3]"=&f"(di3)
  1586. : [in_pos]"r"(in_pos), [vec]"r"(vec),
  1587. [vec2]"r"(vec2), [IQ]"f"(IQ)
  1588. : "$f0", "$f1", "$f2", "$f3",
  1589. "memory"
  1590. );
  1591. cost += di0 * di0 + di1 * di1
  1592. + di2 * di2 + di3 * di3;
  1593. }
  1594. if (bits)
  1595. *bits = curbits;
  1596. return cost * lambda + curbits;
  1597. }
  1598. static float get_band_cost_UPAIR12_mips(struct AACEncContext *s,
  1599. PutBitContext *pb, const float *in,
  1600. const float *scaled, int size, int scale_idx,
  1601. int cb, const float lambda, const float uplim,
  1602. int *bits)
  1603. {
  1604. const float Q34 = ff_aac_pow34sf_tab[POW_SF2_ZERO - scale_idx + SCALE_ONE_POS - SCALE_DIV_512];
  1605. const float IQ = ff_aac_pow2sf_tab [POW_SF2_ZERO + scale_idx - SCALE_ONE_POS + SCALE_DIV_512];
  1606. int i;
  1607. float cost = 0;
  1608. int qc1, qc2, qc3, qc4;
  1609. int curbits = 0;
  1610. uint8_t *p_bits = (uint8_t *)ff_aac_spectral_bits[cb-1];
  1611. float *p_codes = (float *)ff_aac_codebook_vectors[cb-1];
  1612. for (i = 0; i < size; i += 4) {
  1613. const float *vec, *vec2;
  1614. int curidx, curidx2;
  1615. int sign1, count1, sign2, count2;
  1616. int *in_int = (int *)&in[i];
  1617. float *in_pos = (float *)&in[i];
  1618. float di0, di1, di2, di3;
  1619. int t0, t1, t2, t3, t4;
  1620. qc1 = scaled[i ] * Q34 + ROUND_STANDARD;
  1621. qc2 = scaled[i+1] * Q34 + ROUND_STANDARD;
  1622. qc3 = scaled[i+2] * Q34 + ROUND_STANDARD;
  1623. qc4 = scaled[i+3] * Q34 + ROUND_STANDARD;
  1624. __asm__ volatile (
  1625. ".set push \n\t"
  1626. ".set noreorder \n\t"
  1627. "ori %[t4], $zero, 12 \n\t"
  1628. "ori %[sign1], $zero, 0 \n\t"
  1629. "ori %[sign2], $zero, 0 \n\t"
  1630. "slt %[t0], %[t4], %[qc1] \n\t"
  1631. "slt %[t1], %[t4], %[qc2] \n\t"
  1632. "slt %[t2], %[t4], %[qc3] \n\t"
  1633. "slt %[t3], %[t4], %[qc4] \n\t"
  1634. "movn %[qc1], %[t4], %[t0] \n\t"
  1635. "movn %[qc2], %[t4], %[t1] \n\t"
  1636. "movn %[qc3], %[t4], %[t2] \n\t"
  1637. "movn %[qc4], %[t4], %[t3] \n\t"
  1638. "lw %[t0], 0(%[in_int]) \n\t"
  1639. "lw %[t1], 4(%[in_int]) \n\t"
  1640. "lw %[t2], 8(%[in_int]) \n\t"
  1641. "lw %[t3], 12(%[in_int]) \n\t"
  1642. "slt %[t0], %[t0], $zero \n\t"
  1643. "movn %[sign1], %[t0], %[qc1] \n\t"
  1644. "slt %[t2], %[t2], $zero \n\t"
  1645. "movn %[sign2], %[t2], %[qc3] \n\t"
  1646. "slt %[t1], %[t1], $zero \n\t"
  1647. "sll %[t0], %[sign1], 1 \n\t"
  1648. "or %[t0], %[t0], %[t1] \n\t"
  1649. "movn %[sign1], %[t0], %[qc2] \n\t"
  1650. "slt %[t3], %[t3], $zero \n\t"
  1651. "sll %[t0], %[sign2], 1 \n\t"
  1652. "or %[t0], %[t0], %[t3] \n\t"
  1653. "movn %[sign2], %[t0], %[qc4] \n\t"
  1654. "slt %[count1], $zero, %[qc1] \n\t"
  1655. "slt %[t1], $zero, %[qc2] \n\t"
  1656. "slt %[count2], $zero, %[qc3] \n\t"
  1657. "slt %[t2], $zero, %[qc4] \n\t"
  1658. "addu %[count1], %[count1], %[t1] \n\t"
  1659. "addu %[count2], %[count2], %[t2] \n\t"
  1660. ".set pop \n\t"
  1661. : [qc1]"+r"(qc1), [qc2]"+r"(qc2),
  1662. [qc3]"+r"(qc3), [qc4]"+r"(qc4),
  1663. [sign1]"=&r"(sign1), [count1]"=&r"(count1),
  1664. [sign2]"=&r"(sign2), [count2]"=&r"(count2),
  1665. [t0]"=&r"(t0), [t1]"=&r"(t1), [t2]"=&r"(t2), [t3]"=&r"(t3),
  1666. [t4]"=&r"(t4)
  1667. : [in_int]"r"(in_int)
  1668. : "memory"
  1669. );
  1670. curidx = 13 * qc1;
  1671. curidx += qc2;
  1672. curidx2 = 13 * qc3;
  1673. curidx2 += qc4;
  1674. curbits += p_bits[curidx];
  1675. curbits += p_bits[curidx2];
  1676. curbits += upair12_sign_bits[curidx];
  1677. curbits += upair12_sign_bits[curidx2];
  1678. vec = &p_codes[curidx*2];
  1679. vec2 = &p_codes[curidx2*2];
  1680. __asm__ volatile (
  1681. ".set push \n\t"
  1682. ".set noreorder \n\t"
  1683. "lwc1 %[di0], 0(%[in_pos]) \n\t"
  1684. "lwc1 %[di1], 4(%[in_pos]) \n\t"
  1685. "lwc1 %[di2], 8(%[in_pos]) \n\t"
  1686. "lwc1 %[di3], 12(%[in_pos]) \n\t"
  1687. "abs.s %[di0], %[di0] \n\t"
  1688. "abs.s %[di1], %[di1] \n\t"
  1689. "abs.s %[di2], %[di2] \n\t"
  1690. "abs.s %[di3], %[di3] \n\t"
  1691. "lwc1 $f0, 0(%[vec]) \n\t"
  1692. "lwc1 $f1, 4(%[vec]) \n\t"
  1693. "lwc1 $f2, 0(%[vec2]) \n\t"
  1694. "lwc1 $f3, 4(%[vec2]) \n\t"
  1695. "nmsub.s %[di0], %[di0], $f0, %[IQ] \n\t"
  1696. "nmsub.s %[di1], %[di1], $f1, %[IQ] \n\t"
  1697. "nmsub.s %[di2], %[di2], $f2, %[IQ] \n\t"
  1698. "nmsub.s %[di3], %[di3], $f3, %[IQ] \n\t"
  1699. ".set pop \n\t"
  1700. : [di0]"=&f"(di0), [di1]"=&f"(di1),
  1701. [di2]"=&f"(di2), [di3]"=&f"(di3)
  1702. : [in_pos]"r"(in_pos), [vec]"r"(vec),
  1703. [vec2]"r"(vec2), [IQ]"f"(IQ)
  1704. : "$f0", "$f1", "$f2", "$f3",
  1705. "memory"
  1706. );
  1707. cost += di0 * di0 + di1 * di1
  1708. + di2 * di2 + di3 * di3;
  1709. }
  1710. if (bits)
  1711. *bits = curbits;
  1712. return cost * lambda + curbits;
  1713. }
  1714. static float get_band_cost_ESC_mips(struct AACEncContext *s,
  1715. PutBitContext *pb, const float *in,
  1716. const float *scaled, int size, int scale_idx,
  1717. int cb, const float lambda, const float uplim,
  1718. int *bits)
  1719. {
  1720. const float Q34 = ff_aac_pow34sf_tab[POW_SF2_ZERO - scale_idx + SCALE_ONE_POS - SCALE_DIV_512];
  1721. const float IQ = ff_aac_pow2sf_tab [POW_SF2_ZERO + scale_idx - SCALE_ONE_POS + SCALE_DIV_512];
  1722. const float CLIPPED_ESCAPE = 165140.0f * IQ;
  1723. int i;
  1724. float cost = 0;
  1725. int qc1, qc2, qc3, qc4;
  1726. int curbits = 0;
  1727. uint8_t *p_bits = (uint8_t*)ff_aac_spectral_bits[cb-1];
  1728. float *p_codes = (float* )ff_aac_codebook_vectors[cb-1];
  1729. for (i = 0; i < size; i += 4) {
  1730. const float *vec, *vec2;
  1731. int curidx, curidx2;
  1732. float t1, t2, t3, t4;
  1733. float di1, di2, di3, di4;
  1734. int cond0, cond1, cond2, cond3;
  1735. int c1, c2, c3, c4;
  1736. int t6, t7;
  1737. qc1 = scaled[i ] * Q34 + ROUND_STANDARD;
  1738. qc2 = scaled[i+1] * Q34 + ROUND_STANDARD;
  1739. qc3 = scaled[i+2] * Q34 + ROUND_STANDARD;
  1740. qc4 = scaled[i+3] * Q34 + ROUND_STANDARD;
  1741. __asm__ volatile (
  1742. ".set push \n\t"
  1743. ".set noreorder \n\t"
  1744. "ori %[t6], $zero, 15 \n\t"
  1745. "ori %[t7], $zero, 16 \n\t"
  1746. "shll_s.w %[c1], %[qc1], 18 \n\t"
  1747. "shll_s.w %[c2], %[qc2], 18 \n\t"
  1748. "shll_s.w %[c3], %[qc3], 18 \n\t"
  1749. "shll_s.w %[c4], %[qc4], 18 \n\t"
  1750. "srl %[c1], %[c1], 18 \n\t"
  1751. "srl %[c2], %[c2], 18 \n\t"
  1752. "srl %[c3], %[c3], 18 \n\t"
  1753. "srl %[c4], %[c4], 18 \n\t"
  1754. "slt %[cond0], %[t6], %[qc1] \n\t"
  1755. "slt %[cond1], %[t6], %[qc2] \n\t"
  1756. "slt %[cond2], %[t6], %[qc3] \n\t"
  1757. "slt %[cond3], %[t6], %[qc4] \n\t"
  1758. "movn %[qc1], %[t7], %[cond0] \n\t"
  1759. "movn %[qc2], %[t7], %[cond1] \n\t"
  1760. "movn %[qc3], %[t7], %[cond2] \n\t"
  1761. "movn %[qc4], %[t7], %[cond3] \n\t"
  1762. ".set pop \n\t"
  1763. : [qc1]"+r"(qc1), [qc2]"+r"(qc2),
  1764. [qc3]"+r"(qc3), [qc4]"+r"(qc4),
  1765. [cond0]"=&r"(cond0), [cond1]"=&r"(cond1),
  1766. [cond2]"=&r"(cond2), [cond3]"=&r"(cond3),
  1767. [c1]"=&r"(c1), [c2]"=&r"(c2),
  1768. [c3]"=&r"(c3), [c4]"=&r"(c4),
  1769. [t6]"=&r"(t6), [t7]"=&r"(t7)
  1770. );
  1771. curidx = 17 * qc1;
  1772. curidx += qc2;
  1773. curidx2 = 17 * qc3;
  1774. curidx2 += qc4;
  1775. curbits += p_bits[curidx];
  1776. curbits += esc_sign_bits[curidx];
  1777. vec = &p_codes[curidx*2];
  1778. curbits += p_bits[curidx2];
  1779. curbits += esc_sign_bits[curidx2];
  1780. vec2 = &p_codes[curidx2*2];
  1781. curbits += (av_log2(c1) * 2 - 3) & (-cond0);
  1782. curbits += (av_log2(c2) * 2 - 3) & (-cond1);
  1783. curbits += (av_log2(c3) * 2 - 3) & (-cond2);
  1784. curbits += (av_log2(c4) * 2 - 3) & (-cond3);
  1785. t1 = fabsf(in[i ]);
  1786. t2 = fabsf(in[i+1]);
  1787. t3 = fabsf(in[i+2]);
  1788. t4 = fabsf(in[i+3]);
  1789. if (cond0) {
  1790. if (t1 >= CLIPPED_ESCAPE) {
  1791. di1 = t1 - CLIPPED_ESCAPE;
  1792. } else {
  1793. di1 = t1 - c1 * cbrtf(c1) * IQ;
  1794. }
  1795. } else
  1796. di1 = t1 - vec[0] * IQ;
  1797. if (cond1) {
  1798. if (t2 >= CLIPPED_ESCAPE) {
  1799. di2 = t2 - CLIPPED_ESCAPE;
  1800. } else {
  1801. di2 = t2 - c2 * cbrtf(c2) * IQ;
  1802. }
  1803. } else
  1804. di2 = t2 - vec[1] * IQ;
  1805. if (cond2) {
  1806. if (t3 >= CLIPPED_ESCAPE) {
  1807. di3 = t3 - CLIPPED_ESCAPE;
  1808. } else {
  1809. di3 = t3 - c3 * cbrtf(c3) * IQ;
  1810. }
  1811. } else
  1812. di3 = t3 - vec2[0] * IQ;
  1813. if (cond3) {
  1814. if (t4 >= CLIPPED_ESCAPE) {
  1815. di4 = t4 - CLIPPED_ESCAPE;
  1816. } else {
  1817. di4 = t4 - c4 * cbrtf(c4) * IQ;
  1818. }
  1819. } else
  1820. di4 = t4 - vec2[1]*IQ;
  1821. cost += di1 * di1 + di2 * di2
  1822. + di3 * di3 + di4 * di4;
  1823. }
  1824. if (bits)
  1825. *bits = curbits;
  1826. return cost * lambda + curbits;
  1827. }
  1828. static float (*const get_band_cost_arr[])(struct AACEncContext *s,
  1829. PutBitContext *pb, const float *in,
  1830. const float *scaled, int size, int scale_idx,
  1831. int cb, const float lambda, const float uplim,
  1832. int *bits) = {
  1833. get_band_cost_ZERO_mips,
  1834. get_band_cost_SQUAD_mips,
  1835. get_band_cost_SQUAD_mips,
  1836. get_band_cost_UQUAD_mips,
  1837. get_band_cost_UQUAD_mips,
  1838. get_band_cost_SPAIR_mips,
  1839. get_band_cost_SPAIR_mips,
  1840. get_band_cost_UPAIR7_mips,
  1841. get_band_cost_UPAIR7_mips,
  1842. get_band_cost_UPAIR12_mips,
  1843. get_band_cost_UPAIR12_mips,
  1844. get_band_cost_ESC_mips,
  1845. };
  1846. #define get_band_cost( \
  1847. s, pb, in, scaled, size, scale_idx, cb, \
  1848. lambda, uplim, bits) \
  1849. get_band_cost_arr[cb]( \
  1850. s, pb, in, scaled, size, scale_idx, cb, \
  1851. lambda, uplim, bits)
  1852. static float quantize_band_cost(struct AACEncContext *s, const float *in,
  1853. const float *scaled, int size, int scale_idx,
  1854. int cb, const float lambda, const float uplim,
  1855. int *bits)
  1856. {
  1857. return get_band_cost(s, NULL, in, scaled, size, scale_idx, cb, lambda, uplim, bits);
  1858. }
  1859. static void search_for_quantizers_twoloop_mips(AVCodecContext *avctx,
  1860. AACEncContext *s,
  1861. SingleChannelElement *sce,
  1862. const float lambda)
  1863. {
  1864. int start = 0, i, w, w2, g;
  1865. int destbits = avctx->bit_rate * 1024.0 / avctx->sample_rate / avctx->channels;
  1866. float dists[128] = { 0 }, uplims[128];
  1867. float maxvals[128];
  1868. int fflag, minscaler;
  1869. int its = 0;
  1870. int allz = 0;
  1871. float minthr = INFINITY;
  1872. destbits = FFMIN(destbits, 5800);
  1873. for (w = 0; w < sce->ics.num_windows; w += sce->ics.group_len[w]) {
  1874. for (g = 0; g < sce->ics.num_swb; g++) {
  1875. int nz = 0;
  1876. float uplim = 0.0f;
  1877. for (w2 = 0; w2 < sce->ics.group_len[w]; w2++) {
  1878. FFPsyBand *band = &s->psy.ch[s->cur_channel].psy_bands[(w+w2)*16+g];
  1879. uplim += band->threshold;
  1880. if (band->energy <= band->threshold || band->threshold == 0.0f) {
  1881. sce->zeroes[(w+w2)*16+g] = 1;
  1882. continue;
  1883. }
  1884. nz = 1;
  1885. }
  1886. uplims[w*16+g] = uplim *512;
  1887. sce->zeroes[w*16+g] = !nz;
  1888. if (nz)
  1889. minthr = FFMIN(minthr, uplim);
  1890. allz |= nz;
  1891. }
  1892. }
  1893. for (w = 0; w < sce->ics.num_windows; w += sce->ics.group_len[w]) {
  1894. for (g = 0; g < sce->ics.num_swb; g++) {
  1895. if (sce->zeroes[w*16+g]) {
  1896. sce->sf_idx[w*16+g] = SCALE_ONE_POS;
  1897. continue;
  1898. }
  1899. sce->sf_idx[w*16+g] = SCALE_ONE_POS + FFMIN(log2f(uplims[w*16+g]/minthr)*4,59);
  1900. }
  1901. }
  1902. if (!allz)
  1903. return;
  1904. abs_pow34_v(s->scoefs, sce->coeffs, 1024);
  1905. for (w = 0; w < sce->ics.num_windows; w += sce->ics.group_len[w]) {
  1906. start = w*128;
  1907. for (g = 0; g < sce->ics.num_swb; g++) {
  1908. const float *scaled = s->scoefs + start;
  1909. maxvals[w*16+g] = find_max_val(sce->ics.group_len[w], sce->ics.swb_sizes[g], scaled);
  1910. start += sce->ics.swb_sizes[g];
  1911. }
  1912. }
  1913. do {
  1914. int tbits, qstep;
  1915. minscaler = sce->sf_idx[0];
  1916. qstep = its ? 1 : 32;
  1917. do {
  1918. int prev = -1;
  1919. tbits = 0;
  1920. fflag = 0;
  1921. if (qstep > 1) {
  1922. for (w = 0; w < sce->ics.num_windows; w += sce->ics.group_len[w]) {
  1923. start = w*128;
  1924. for (g = 0; g < sce->ics.num_swb; g++) {
  1925. const float *coefs = sce->coeffs + start;
  1926. const float *scaled = s->scoefs + start;
  1927. int bits = 0;
  1928. int cb;
  1929. if (sce->zeroes[w*16+g] || sce->sf_idx[w*16+g] >= 218) {
  1930. start += sce->ics.swb_sizes[g];
  1931. continue;
  1932. }
  1933. minscaler = FFMIN(minscaler, sce->sf_idx[w*16+g]);
  1934. cb = find_min_book(maxvals[w*16+g], sce->sf_idx[w*16+g]);
  1935. for (w2 = 0; w2 < sce->ics.group_len[w]; w2++) {
  1936. int b;
  1937. bits += quantize_band_cost_bits(s, coefs + w2*128,
  1938. scaled + w2*128,
  1939. sce->ics.swb_sizes[g],
  1940. sce->sf_idx[w*16+g],
  1941. cb,
  1942. 1.0f,
  1943. INFINITY,
  1944. &b);
  1945. }
  1946. if (prev != -1) {
  1947. bits += ff_aac_scalefactor_bits[sce->sf_idx[w*16+g] - prev + SCALE_DIFF_ZERO];
  1948. }
  1949. tbits += bits;
  1950. start += sce->ics.swb_sizes[g];
  1951. prev = sce->sf_idx[w*16+g];
  1952. }
  1953. }
  1954. }
  1955. else {
  1956. for (w = 0; w < sce->ics.num_windows; w += sce->ics.group_len[w]) {
  1957. start = w*128;
  1958. for (g = 0; g < sce->ics.num_swb; g++) {
  1959. const float *coefs = sce->coeffs + start;
  1960. const float *scaled = s->scoefs + start;
  1961. int bits = 0;
  1962. int cb;
  1963. float dist = 0.0f;
  1964. if (sce->zeroes[w*16+g] || sce->sf_idx[w*16+g] >= 218) {
  1965. start += sce->ics.swb_sizes[g];
  1966. continue;
  1967. }
  1968. minscaler = FFMIN(minscaler, sce->sf_idx[w*16+g]);
  1969. cb = find_min_book(maxvals[w*16+g], sce->sf_idx[w*16+g]);
  1970. for (w2 = 0; w2 < sce->ics.group_len[w]; w2++) {
  1971. int b;
  1972. dist += quantize_band_cost(s, coefs + w2*128,
  1973. scaled + w2*128,
  1974. sce->ics.swb_sizes[g],
  1975. sce->sf_idx[w*16+g],
  1976. cb,
  1977. 1.0f,
  1978. INFINITY,
  1979. &b);
  1980. bits += b;
  1981. }
  1982. dists[w*16+g] = dist - bits;
  1983. if (prev != -1) {
  1984. bits += ff_aac_scalefactor_bits[sce->sf_idx[w*16+g] - prev + SCALE_DIFF_ZERO];
  1985. }
  1986. tbits += bits;
  1987. start += sce->ics.swb_sizes[g];
  1988. prev = sce->sf_idx[w*16+g];
  1989. }
  1990. }
  1991. }
  1992. if (tbits > destbits) {
  1993. for (i = 0; i < 128; i++)
  1994. if (sce->sf_idx[i] < 218 - qstep)
  1995. sce->sf_idx[i] += qstep;
  1996. } else {
  1997. for (i = 0; i < 128; i++)
  1998. if (sce->sf_idx[i] > 60 - qstep)
  1999. sce->sf_idx[i] -= qstep;
  2000. }
  2001. qstep >>= 1;
  2002. if (!qstep && tbits > destbits*1.02 && sce->sf_idx[0] < 217)
  2003. qstep = 1;
  2004. } while (qstep);
  2005. fflag = 0;
  2006. minscaler = av_clip(minscaler, 60, 255 - SCALE_MAX_DIFF);
  2007. for (w = 0; w < sce->ics.num_windows; w += sce->ics.group_len[w]) {
  2008. for (g = 0; g < sce->ics.num_swb; g++) {
  2009. int prevsc = sce->sf_idx[w*16+g];
  2010. if (dists[w*16+g] > uplims[w*16+g] && sce->sf_idx[w*16+g] > 60) {
  2011. if (find_min_book(maxvals[w*16+g], sce->sf_idx[w*16+g]-1))
  2012. sce->sf_idx[w*16+g]--;
  2013. else
  2014. sce->sf_idx[w*16+g]-=2;
  2015. }
  2016. sce->sf_idx[w*16+g] = av_clip(sce->sf_idx[w*16+g], minscaler, minscaler + SCALE_MAX_DIFF);
  2017. sce->sf_idx[w*16+g] = FFMIN(sce->sf_idx[w*16+g], 219);
  2018. if (sce->sf_idx[w*16+g] != prevsc)
  2019. fflag = 1;
  2020. sce->band_type[w*16+g] = find_min_book(maxvals[w*16+g], sce->sf_idx[w*16+g]);
  2021. }
  2022. }
  2023. its++;
  2024. } while (fflag && its < 10);
  2025. }
  2026. static void search_for_ms_mips(AACEncContext *s, ChannelElement *cpe)
  2027. {
  2028. int start = 0, i, w, w2, g;
  2029. float M[128], S[128];
  2030. float *L34 = s->scoefs, *R34 = s->scoefs + 128, *M34 = s->scoefs + 128*2, *S34 = s->scoefs + 128*3;
  2031. SingleChannelElement *sce0 = &cpe->ch[0];
  2032. SingleChannelElement *sce1 = &cpe->ch[1];
  2033. if (!cpe->common_window)
  2034. return;
  2035. for (w = 0; w < sce0->ics.num_windows; w += sce0->ics.group_len[w]) {
  2036. for (g = 0; g < sce0->ics.num_swb; g++) {
  2037. if (!cpe->ch[0].zeroes[w*16+g] && !cpe->ch[1].zeroes[w*16+g]) {
  2038. float dist1 = 0.0f, dist2 = 0.0f;
  2039. for (w2 = 0; w2 < sce0->ics.group_len[w]; w2++) {
  2040. FFPsyBand *band0 = &s->psy.ch[s->cur_channel+0].psy_bands[(w+w2)*16+g];
  2041. FFPsyBand *band1 = &s->psy.ch[s->cur_channel+1].psy_bands[(w+w2)*16+g];
  2042. float minthr = FFMIN(band0->threshold, band1->threshold);
  2043. float maxthr = FFMAX(band0->threshold, band1->threshold);
  2044. for (i = 0; i < sce0->ics.swb_sizes[g]; i+=4) {
  2045. M[i ] = (sce0->coeffs[start+w2*128+i ]
  2046. + sce1->coeffs[start+w2*128+i ]) * 0.5;
  2047. M[i+1] = (sce0->coeffs[start+w2*128+i+1]
  2048. + sce1->coeffs[start+w2*128+i+1]) * 0.5;
  2049. M[i+2] = (sce0->coeffs[start+w2*128+i+2]
  2050. + sce1->coeffs[start+w2*128+i+2]) * 0.5;
  2051. M[i+3] = (sce0->coeffs[start+w2*128+i+3]
  2052. + sce1->coeffs[start+w2*128+i+3]) * 0.5;
  2053. S[i ] = M[i ]
  2054. - sce1->coeffs[start+w2*128+i ];
  2055. S[i+1] = M[i+1]
  2056. - sce1->coeffs[start+w2*128+i+1];
  2057. S[i+2] = M[i+2]
  2058. - sce1->coeffs[start+w2*128+i+2];
  2059. S[i+3] = M[i+3]
  2060. - sce1->coeffs[start+w2*128+i+3];
  2061. }
  2062. abs_pow34_v(L34, sce0->coeffs+start+w2*128, sce0->ics.swb_sizes[g]);
  2063. abs_pow34_v(R34, sce1->coeffs+start+w2*128, sce0->ics.swb_sizes[g]);
  2064. abs_pow34_v(M34, M, sce0->ics.swb_sizes[g]);
  2065. abs_pow34_v(S34, S, sce0->ics.swb_sizes[g]);
  2066. dist1 += quantize_band_cost(s, sce0->coeffs + start + w2*128,
  2067. L34,
  2068. sce0->ics.swb_sizes[g],
  2069. sce0->sf_idx[(w+w2)*16+g],
  2070. sce0->band_type[(w+w2)*16+g],
  2071. s->lambda / band0->threshold, INFINITY, NULL);
  2072. dist1 += quantize_band_cost(s, sce1->coeffs + start + w2*128,
  2073. R34,
  2074. sce1->ics.swb_sizes[g],
  2075. sce1->sf_idx[(w+w2)*16+g],
  2076. sce1->band_type[(w+w2)*16+g],
  2077. s->lambda / band1->threshold, INFINITY, NULL);
  2078. dist2 += quantize_band_cost(s, M,
  2079. M34,
  2080. sce0->ics.swb_sizes[g],
  2081. sce0->sf_idx[(w+w2)*16+g],
  2082. sce0->band_type[(w+w2)*16+g],
  2083. s->lambda / maxthr, INFINITY, NULL);
  2084. dist2 += quantize_band_cost(s, S,
  2085. S34,
  2086. sce1->ics.swb_sizes[g],
  2087. sce1->sf_idx[(w+w2)*16+g],
  2088. sce1->band_type[(w+w2)*16+g],
  2089. s->lambda / minthr, INFINITY, NULL);
  2090. }
  2091. cpe->ms_mask[w*16+g] = dist2 < dist1;
  2092. }
  2093. start += sce0->ics.swb_sizes[g];
  2094. }
  2095. }
  2096. }
  2097. #endif /*HAVE_MIPSFPU */
  2098. static void codebook_trellis_rate_mips(AACEncContext *s, SingleChannelElement *sce,
  2099. int win, int group_len, const float lambda)
  2100. {
  2101. BandCodingPath path[120][12];
  2102. int w, swb, cb, start, size;
  2103. int i, j;
  2104. const int max_sfb = sce->ics.max_sfb;
  2105. const int run_bits = sce->ics.num_windows == 1 ? 5 : 3;
  2106. const int run_esc = (1 << run_bits) - 1;
  2107. int idx, ppos, count;
  2108. int stackrun[120], stackcb[120], stack_len;
  2109. float next_minbits = INFINITY;
  2110. int next_mincb = 0;
  2111. abs_pow34_v(s->scoefs, sce->coeffs, 1024);
  2112. start = win*128;
  2113. for (cb = 0; cb < 12; cb++) {
  2114. path[0][cb].cost = run_bits+4;
  2115. path[0][cb].prev_idx = -1;
  2116. path[0][cb].run = 0;
  2117. }
  2118. for (swb = 0; swb < max_sfb; swb++) {
  2119. size = sce->ics.swb_sizes[swb];
  2120. if (sce->zeroes[win*16 + swb]) {
  2121. float cost_stay_here = path[swb][0].cost;
  2122. float cost_get_here = next_minbits + run_bits + 4;
  2123. if ( run_value_bits[sce->ics.num_windows == 8][path[swb][0].run]
  2124. != run_value_bits[sce->ics.num_windows == 8][path[swb][0].run+1])
  2125. cost_stay_here += run_bits;
  2126. if (cost_get_here < cost_stay_here) {
  2127. path[swb+1][0].prev_idx = next_mincb;
  2128. path[swb+1][0].cost = cost_get_here;
  2129. path[swb+1][0].run = 1;
  2130. } else {
  2131. path[swb+1][0].prev_idx = 0;
  2132. path[swb+1][0].cost = cost_stay_here;
  2133. path[swb+1][0].run = path[swb][0].run + 1;
  2134. }
  2135. next_minbits = path[swb+1][0].cost;
  2136. next_mincb = 0;
  2137. for (cb = 1; cb < 12; cb++) {
  2138. path[swb+1][cb].cost = 61450;
  2139. path[swb+1][cb].prev_idx = -1;
  2140. path[swb+1][cb].run = 0;
  2141. }
  2142. } else {
  2143. float minbits = next_minbits;
  2144. int mincb = next_mincb;
  2145. int startcb = sce->band_type[win*16+swb];
  2146. next_minbits = INFINITY;
  2147. next_mincb = 0;
  2148. for (cb = 0; cb < startcb; cb++) {
  2149. path[swb+1][cb].cost = 61450;
  2150. path[swb+1][cb].prev_idx = -1;
  2151. path[swb+1][cb].run = 0;
  2152. }
  2153. for (cb = startcb; cb < 12; cb++) {
  2154. float cost_stay_here, cost_get_here;
  2155. float bits = 0.0f;
  2156. for (w = 0; w < group_len; w++) {
  2157. bits += quantize_band_cost_bits(s, sce->coeffs + start + w*128,
  2158. s->scoefs + start + w*128, size,
  2159. sce->sf_idx[(win+w)*16+swb], cb,
  2160. 0, INFINITY, NULL);
  2161. }
  2162. cost_stay_here = path[swb][cb].cost + bits;
  2163. cost_get_here = minbits + bits + run_bits + 4;
  2164. if ( run_value_bits[sce->ics.num_windows == 8][path[swb][cb].run]
  2165. != run_value_bits[sce->ics.num_windows == 8][path[swb][cb].run+1])
  2166. cost_stay_here += run_bits;
  2167. if (cost_get_here < cost_stay_here) {
  2168. path[swb+1][cb].prev_idx = mincb;
  2169. path[swb+1][cb].cost = cost_get_here;
  2170. path[swb+1][cb].run = 1;
  2171. } else {
  2172. path[swb+1][cb].prev_idx = cb;
  2173. path[swb+1][cb].cost = cost_stay_here;
  2174. path[swb+1][cb].run = path[swb][cb].run + 1;
  2175. }
  2176. if (path[swb+1][cb].cost < next_minbits) {
  2177. next_minbits = path[swb+1][cb].cost;
  2178. next_mincb = cb;
  2179. }
  2180. }
  2181. }
  2182. start += sce->ics.swb_sizes[swb];
  2183. }
  2184. stack_len = 0;
  2185. idx = 0;
  2186. for (cb = 1; cb < 12; cb++)
  2187. if (path[max_sfb][cb].cost < path[max_sfb][idx].cost)
  2188. idx = cb;
  2189. ppos = max_sfb;
  2190. while (ppos > 0) {
  2191. av_assert1(idx >= 0);
  2192. cb = idx;
  2193. stackrun[stack_len] = path[ppos][cb].run;
  2194. stackcb [stack_len] = cb;
  2195. idx = path[ppos-path[ppos][cb].run+1][cb].prev_idx;
  2196. ppos -= path[ppos][cb].run;
  2197. stack_len++;
  2198. }
  2199. start = 0;
  2200. for (i = stack_len - 1; i >= 0; i--) {
  2201. put_bits(&s->pb, 4, stackcb[i]);
  2202. count = stackrun[i];
  2203. memset(sce->zeroes + win*16 + start, !stackcb[i], count);
  2204. for (j = 0; j < count; j++) {
  2205. sce->band_type[win*16 + start] = stackcb[i];
  2206. start++;
  2207. }
  2208. while (count >= run_esc) {
  2209. put_bits(&s->pb, run_bits, run_esc);
  2210. count -= run_esc;
  2211. }
  2212. put_bits(&s->pb, run_bits, count);
  2213. }
  2214. }
  2215. #endif /* HAVE_INLINE_ASM */
  2216. void ff_aac_coder_init_mips(AACEncContext *c) {
  2217. #if HAVE_INLINE_ASM
  2218. AACCoefficientsEncoder *e = c->coder;
  2219. int option = c->options.aac_coder;
  2220. if (option == 2) {
  2221. // Disabled due to failure with fate-aac-pns-encode
  2222. // e->quantize_and_encode_band = quantize_and_encode_band_mips;
  2223. // e->encode_window_bands_info = codebook_trellis_rate_mips;
  2224. #if HAVE_MIPSFPU
  2225. e->search_for_quantizers = search_for_quantizers_twoloop_mips;
  2226. e->search_for_ms = search_for_ms_mips;
  2227. #endif /* HAVE_MIPSFPU */
  2228. }
  2229. #endif /* HAVE_INLINE_ASM */
  2230. }