You can not select more than 25 topics Topics must start with a letter or number, can include dashes ('-') and can be up to 35 characters long.

2585 lines
106KB

  1. /*
  2. * Copyright (c) 2012
  3. * MIPS Technologies, Inc., California.
  4. *
  5. * Redistribution and use in source and binary forms, with or without
  6. * modification, are permitted provided that the following conditions
  7. * are met:
  8. * 1. Redistributions of source code must retain the above copyright
  9. * notice, this list of conditions and the following disclaimer.
  10. * 2. Redistributions in binary form must reproduce the above copyright
  11. * notice, this list of conditions and the following disclaimer in the
  12. * documentation and/or other materials provided with the distribution.
  13. * 3. Neither the name of the MIPS Technologies, Inc., nor the names of its
  14. * contributors may be used to endorse or promote products derived from
  15. * this software without specific prior written permission.
  16. *
  17. * THIS SOFTWARE IS PROVIDED BY THE MIPS TECHNOLOGIES, INC. ``AS IS'' AND
  18. * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
  19. * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
  20. * ARE DISCLAIMED. IN NO EVENT SHALL THE MIPS TECHNOLOGIES, INC. BE LIABLE
  21. * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
  22. * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
  23. * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
  24. * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
  25. * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
  26. * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
  27. * SUCH DAMAGE.
  28. *
  29. * Author: Stanislav Ocovaj (socovaj@mips.com)
  30. * Szabolcs Pal (sabolc@mips.com)
  31. *
  32. * AAC coefficients encoder optimized for MIPS floating-point architecture
  33. *
  34. * This file is part of FFmpeg.
  35. *
  36. * FFmpeg is free software; you can redistribute it and/or
  37. * modify it under the terms of the GNU Lesser General Public
  38. * License as published by the Free Software Foundation; either
  39. * version 2.1 of the License, or (at your option) any later version.
  40. *
  41. * FFmpeg is distributed in the hope that it will be useful,
  42. * but WITHOUT ANY WARRANTY; without even the implied warranty of
  43. * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
  44. * Lesser General Public License for more details.
  45. *
  46. * You should have received a copy of the GNU Lesser General Public
  47. * License along with FFmpeg; if not, write to the Free Software
  48. * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
  49. */
  50. /**
  51. * @file
  52. * Reference: libavcodec/aaccoder.c
  53. */
  54. #include "libavutil/libm.h"
  55. #include <float.h>
  56. #include "libavutil/mathematics.h"
  57. #include "libavcodec/avcodec.h"
  58. #include "libavcodec/put_bits.h"
  59. #include "libavcodec/aac.h"
  60. #include "libavcodec/aacenc.h"
  61. #include "libavcodec/aactab.h"
  62. #if HAVE_INLINE_ASM
  63. typedef struct BandCodingPath {
  64. int prev_idx;
  65. float cost;
  66. int run;
  67. } BandCodingPath;
  68. static const uint8_t run_value_bits_long[64] = {
  69. 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5,
  70. 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 10,
  71. 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10,
  72. 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 15
  73. };
  74. static const uint8_t run_value_bits_short[16] = {
  75. 3, 3, 3, 3, 3, 3, 3, 6, 6, 6, 6, 6, 6, 6, 6, 9
  76. };
  77. static const uint8_t * const run_value_bits[2] = {
  78. run_value_bits_long, run_value_bits_short
  79. };
  80. static const uint8_t uquad_sign_bits[81] = {
  81. 0, 1, 1, 1, 2, 2, 1, 2, 2,
  82. 1, 2, 2, 2, 3, 3, 2, 3, 3,
  83. 1, 2, 2, 2, 3, 3, 2, 3, 3,
  84. 1, 2, 2, 2, 3, 3, 2, 3, 3,
  85. 2, 3, 3, 3, 4, 4, 3, 4, 4,
  86. 2, 3, 3, 3, 4, 4, 3, 4, 4,
  87. 1, 2, 2, 2, 3, 3, 2, 3, 3,
  88. 2, 3, 3, 3, 4, 4, 3, 4, 4,
  89. 2, 3, 3, 3, 4, 4, 3, 4, 4
  90. };
  91. static const uint8_t upair7_sign_bits[64] = {
  92. 0, 1, 1, 1, 1, 1, 1, 1,
  93. 1, 2, 2, 2, 2, 2, 2, 2,
  94. 1, 2, 2, 2, 2, 2, 2, 2,
  95. 1, 2, 2, 2, 2, 2, 2, 2,
  96. 1, 2, 2, 2, 2, 2, 2, 2,
  97. 1, 2, 2, 2, 2, 2, 2, 2,
  98. 1, 2, 2, 2, 2, 2, 2, 2,
  99. 1, 2, 2, 2, 2, 2, 2, 2,
  100. };
  101. static const uint8_t upair12_sign_bits[169] = {
  102. 0, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
  103. 1, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2,
  104. 1, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2,
  105. 1, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2,
  106. 1, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2,
  107. 1, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2,
  108. 1, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2,
  109. 1, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2,
  110. 1, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2,
  111. 1, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2,
  112. 1, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2,
  113. 1, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2,
  114. 1, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2
  115. };
  116. static const uint8_t esc_sign_bits[289] = {
  117. 0, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
  118. 1, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2,
  119. 1, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2,
  120. 1, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2,
  121. 1, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2,
  122. 1, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2,
  123. 1, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2,
  124. 1, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2,
  125. 1, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2,
  126. 1, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2,
  127. 1, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2,
  128. 1, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2,
  129. 1, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2,
  130. 1, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2,
  131. 1, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2,
  132. 1, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2,
  133. 1, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2
  134. };
  135. #define ROUND_STANDARD 0.4054f
  136. #define ROUND_TO_ZERO 0.1054f
  137. static void abs_pow34_v(float *out, const float *in, const int size) {
  138. #ifndef USE_REALLY_FULL_SEARCH
  139. int i;
  140. float a, b, c, d;
  141. float ax, bx, cx, dx;
  142. for (i = 0; i < size; i += 4) {
  143. a = fabsf(in[i ]);
  144. b = fabsf(in[i+1]);
  145. c = fabsf(in[i+2]);
  146. d = fabsf(in[i+3]);
  147. ax = sqrtf(a);
  148. bx = sqrtf(b);
  149. cx = sqrtf(c);
  150. dx = sqrtf(d);
  151. a = a * ax;
  152. b = b * bx;
  153. c = c * cx;
  154. d = d * dx;
  155. out[i ] = sqrtf(a);
  156. out[i+1] = sqrtf(b);
  157. out[i+2] = sqrtf(c);
  158. out[i+3] = sqrtf(d);
  159. }
  160. #endif /* USE_REALLY_FULL_SEARCH */
  161. }
  162. static float find_max_val(int group_len, int swb_size, const float *scaled) {
  163. float maxval = 0.0f;
  164. int w2, i;
  165. for (w2 = 0; w2 < group_len; w2++) {
  166. for (i = 0; i < swb_size; i++) {
  167. maxval = FFMAX(maxval, scaled[w2*128+i]);
  168. }
  169. }
  170. return maxval;
  171. }
  172. static int find_min_book(float maxval, int sf) {
  173. float Q = ff_aac_pow2sf_tab[POW_SF2_ZERO - sf + SCALE_ONE_POS - SCALE_DIV_512];
  174. float Q34 = sqrtf(Q * sqrtf(Q));
  175. int qmaxval, cb;
  176. qmaxval = maxval * Q34 + 0.4054f;
  177. if (qmaxval == 0) cb = 0;
  178. else if (qmaxval == 1) cb = 1;
  179. else if (qmaxval == 2) cb = 3;
  180. else if (qmaxval <= 4) cb = 5;
  181. else if (qmaxval <= 7) cb = 7;
  182. else if (qmaxval <= 12) cb = 9;
  183. else cb = 11;
  184. return cb;
  185. }
  186. /**
  187. * Functions developed from template function and optimized for quantizing and encoding band
  188. */
  189. static void quantize_and_encode_band_cost_SQUAD_mips(struct AACEncContext *s,
  190. PutBitContext *pb, const float *in, float *out,
  191. const float *scaled, int size, int scale_idx,
  192. int cb, const float lambda, const float uplim,
  193. int *bits, const float ROUNDING)
  194. {
  195. const float Q34 = ff_aac_pow34sf_tab[POW_SF2_ZERO - scale_idx + SCALE_ONE_POS - SCALE_DIV_512];
  196. int i;
  197. int qc1, qc2, qc3, qc4;
  198. uint8_t *p_bits = (uint8_t *)ff_aac_spectral_bits[cb-1];
  199. uint16_t *p_codes = (uint16_t *)ff_aac_spectral_codes[cb-1];
  200. abs_pow34_v(s->scoefs, in, size);
  201. scaled = s->scoefs;
  202. for (i = 0; i < size; i += 4) {
  203. int curidx;
  204. int *in_int = (int *)&in[i];
  205. int t0, t1, t2, t3, t4, t5, t6, t7;
  206. qc1 = scaled[i ] * Q34 + ROUND_STANDARD;
  207. qc2 = scaled[i+1] * Q34 + ROUND_STANDARD;
  208. qc3 = scaled[i+2] * Q34 + ROUND_STANDARD;
  209. qc4 = scaled[i+3] * Q34 + ROUND_STANDARD;
  210. __asm__ volatile (
  211. ".set push \n\t"
  212. ".set noreorder \n\t"
  213. "slt %[qc1], $zero, %[qc1] \n\t"
  214. "slt %[qc2], $zero, %[qc2] \n\t"
  215. "slt %[qc3], $zero, %[qc3] \n\t"
  216. "slt %[qc4], $zero, %[qc4] \n\t"
  217. "lw %[t0], 0(%[in_int]) \n\t"
  218. "lw %[t1], 4(%[in_int]) \n\t"
  219. "lw %[t2], 8(%[in_int]) \n\t"
  220. "lw %[t3], 12(%[in_int]) \n\t"
  221. "srl %[t0], %[t0], 31 \n\t"
  222. "srl %[t1], %[t1], 31 \n\t"
  223. "srl %[t2], %[t2], 31 \n\t"
  224. "srl %[t3], %[t3], 31 \n\t"
  225. "subu %[t4], $zero, %[qc1] \n\t"
  226. "subu %[t5], $zero, %[qc2] \n\t"
  227. "subu %[t6], $zero, %[qc3] \n\t"
  228. "subu %[t7], $zero, %[qc4] \n\t"
  229. "movn %[qc1], %[t4], %[t0] \n\t"
  230. "movn %[qc2], %[t5], %[t1] \n\t"
  231. "movn %[qc3], %[t6], %[t2] \n\t"
  232. "movn %[qc4], %[t7], %[t3] \n\t"
  233. ".set pop \n\t"
  234. : [qc1]"+r"(qc1), [qc2]"+r"(qc2),
  235. [qc3]"+r"(qc3), [qc4]"+r"(qc4),
  236. [t0]"=&r"(t0), [t1]"=&r"(t1), [t2]"=&r"(t2), [t3]"=&r"(t3),
  237. [t4]"=&r"(t4), [t5]"=&r"(t5), [t6]"=&r"(t6), [t7]"=&r"(t7)
  238. : [in_int]"r"(in_int)
  239. : "memory"
  240. );
  241. curidx = qc1;
  242. curidx *= 3;
  243. curidx += qc2;
  244. curidx *= 3;
  245. curidx += qc3;
  246. curidx *= 3;
  247. curidx += qc4;
  248. curidx += 40;
  249. put_bits(pb, p_bits[curidx], p_codes[curidx]);
  250. }
  251. }
  252. static void quantize_and_encode_band_cost_UQUAD_mips(struct AACEncContext *s,
  253. PutBitContext *pb, const float *in, float *out,
  254. const float *scaled, int size, int scale_idx,
  255. int cb, const float lambda, const float uplim,
  256. int *bits, const float ROUNDING)
  257. {
  258. const float Q34 = ff_aac_pow34sf_tab[POW_SF2_ZERO - scale_idx + SCALE_ONE_POS - SCALE_DIV_512];
  259. int i;
  260. int qc1, qc2, qc3, qc4;
  261. uint8_t *p_bits = (uint8_t *)ff_aac_spectral_bits[cb-1];
  262. uint16_t *p_codes = (uint16_t *)ff_aac_spectral_codes[cb-1];
  263. abs_pow34_v(s->scoefs, in, size);
  264. scaled = s->scoefs;
  265. for (i = 0; i < size; i += 4) {
  266. int curidx, sign, count;
  267. int *in_int = (int *)&in[i];
  268. uint8_t v_bits;
  269. unsigned int v_codes;
  270. int t0, t1, t2, t3, t4;
  271. qc1 = scaled[i ] * Q34 + ROUND_STANDARD;
  272. qc2 = scaled[i+1] * Q34 + ROUND_STANDARD;
  273. qc3 = scaled[i+2] * Q34 + ROUND_STANDARD;
  274. qc4 = scaled[i+3] * Q34 + ROUND_STANDARD;
  275. __asm__ volatile (
  276. ".set push \n\t"
  277. ".set noreorder \n\t"
  278. "ori %[t4], $zero, 2 \n\t"
  279. "ori %[sign], $zero, 0 \n\t"
  280. "slt %[t0], %[t4], %[qc1] \n\t"
  281. "slt %[t1], %[t4], %[qc2] \n\t"
  282. "slt %[t2], %[t4], %[qc3] \n\t"
  283. "slt %[t3], %[t4], %[qc4] \n\t"
  284. "movn %[qc1], %[t4], %[t0] \n\t"
  285. "movn %[qc2], %[t4], %[t1] \n\t"
  286. "movn %[qc3], %[t4], %[t2] \n\t"
  287. "movn %[qc4], %[t4], %[t3] \n\t"
  288. "lw %[t0], 0(%[in_int]) \n\t"
  289. "lw %[t1], 4(%[in_int]) \n\t"
  290. "lw %[t2], 8(%[in_int]) \n\t"
  291. "lw %[t3], 12(%[in_int]) \n\t"
  292. "slt %[t0], %[t0], $zero \n\t"
  293. "movn %[sign], %[t0], %[qc1] \n\t"
  294. "slt %[t1], %[t1], $zero \n\t"
  295. "slt %[t2], %[t2], $zero \n\t"
  296. "slt %[t3], %[t3], $zero \n\t"
  297. "sll %[t0], %[sign], 1 \n\t"
  298. "or %[t0], %[t0], %[t1] \n\t"
  299. "movn %[sign], %[t0], %[qc2] \n\t"
  300. "slt %[t4], $zero, %[qc1] \n\t"
  301. "slt %[t1], $zero, %[qc2] \n\t"
  302. "slt %[count], $zero, %[qc3] \n\t"
  303. "sll %[t0], %[sign], 1 \n\t"
  304. "or %[t0], %[t0], %[t2] \n\t"
  305. "movn %[sign], %[t0], %[qc3] \n\t"
  306. "slt %[t2], $zero, %[qc4] \n\t"
  307. "addu %[count], %[count], %[t4] \n\t"
  308. "addu %[count], %[count], %[t1] \n\t"
  309. "sll %[t0], %[sign], 1 \n\t"
  310. "or %[t0], %[t0], %[t3] \n\t"
  311. "movn %[sign], %[t0], %[qc4] \n\t"
  312. "addu %[count], %[count], %[t2] \n\t"
  313. ".set pop \n\t"
  314. : [qc1]"+r"(qc1), [qc2]"+r"(qc2),
  315. [qc3]"+r"(qc3), [qc4]"+r"(qc4),
  316. [sign]"=&r"(sign), [count]"=&r"(count),
  317. [t0]"=&r"(t0), [t1]"=&r"(t1), [t2]"=&r"(t2), [t3]"=&r"(t3),
  318. [t4]"=&r"(t4)
  319. : [in_int]"r"(in_int)
  320. : "memory"
  321. );
  322. curidx = qc1;
  323. curidx *= 3;
  324. curidx += qc2;
  325. curidx *= 3;
  326. curidx += qc3;
  327. curidx *= 3;
  328. curidx += qc4;
  329. v_codes = (p_codes[curidx] << count) | (sign & ((1 << count) - 1));
  330. v_bits = p_bits[curidx] + count;
  331. put_bits(pb, v_bits, v_codes);
  332. }
  333. }
  334. static void quantize_and_encode_band_cost_SPAIR_mips(struct AACEncContext *s,
  335. PutBitContext *pb, const float *in, float *out,
  336. const float *scaled, int size, int scale_idx,
  337. int cb, const float lambda, const float uplim,
  338. int *bits, const float ROUNDING)
  339. {
  340. const float Q34 = ff_aac_pow34sf_tab[POW_SF2_ZERO - scale_idx + SCALE_ONE_POS - SCALE_DIV_512];
  341. int i;
  342. int qc1, qc2, qc3, qc4;
  343. uint8_t *p_bits = (uint8_t *)ff_aac_spectral_bits[cb-1];
  344. uint16_t *p_codes = (uint16_t *)ff_aac_spectral_codes[cb-1];
  345. abs_pow34_v(s->scoefs, in, size);
  346. scaled = s->scoefs;
  347. for (i = 0; i < size; i += 4) {
  348. int curidx, curidx2;
  349. int *in_int = (int *)&in[i];
  350. uint8_t v_bits;
  351. unsigned int v_codes;
  352. int t0, t1, t2, t3, t4, t5, t6, t7;
  353. qc1 = scaled[i ] * Q34 + ROUND_STANDARD;
  354. qc2 = scaled[i+1] * Q34 + ROUND_STANDARD;
  355. qc3 = scaled[i+2] * Q34 + ROUND_STANDARD;
  356. qc4 = scaled[i+3] * Q34 + ROUND_STANDARD;
  357. __asm__ volatile (
  358. ".set push \n\t"
  359. ".set noreorder \n\t"
  360. "ori %[t4], $zero, 4 \n\t"
  361. "slt %[t0], %[t4], %[qc1] \n\t"
  362. "slt %[t1], %[t4], %[qc2] \n\t"
  363. "slt %[t2], %[t4], %[qc3] \n\t"
  364. "slt %[t3], %[t4], %[qc4] \n\t"
  365. "movn %[qc1], %[t4], %[t0] \n\t"
  366. "movn %[qc2], %[t4], %[t1] \n\t"
  367. "movn %[qc3], %[t4], %[t2] \n\t"
  368. "movn %[qc4], %[t4], %[t3] \n\t"
  369. "lw %[t0], 0(%[in_int]) \n\t"
  370. "lw %[t1], 4(%[in_int]) \n\t"
  371. "lw %[t2], 8(%[in_int]) \n\t"
  372. "lw %[t3], 12(%[in_int]) \n\t"
  373. "srl %[t0], %[t0], 31 \n\t"
  374. "srl %[t1], %[t1], 31 \n\t"
  375. "srl %[t2], %[t2], 31 \n\t"
  376. "srl %[t3], %[t3], 31 \n\t"
  377. "subu %[t4], $zero, %[qc1] \n\t"
  378. "subu %[t5], $zero, %[qc2] \n\t"
  379. "subu %[t6], $zero, %[qc3] \n\t"
  380. "subu %[t7], $zero, %[qc4] \n\t"
  381. "movn %[qc1], %[t4], %[t0] \n\t"
  382. "movn %[qc2], %[t5], %[t1] \n\t"
  383. "movn %[qc3], %[t6], %[t2] \n\t"
  384. "movn %[qc4], %[t7], %[t3] \n\t"
  385. ".set pop \n\t"
  386. : [qc1]"+r"(qc1), [qc2]"+r"(qc2),
  387. [qc3]"+r"(qc3), [qc4]"+r"(qc4),
  388. [t0]"=&r"(t0), [t1]"=&r"(t1), [t2]"=&r"(t2), [t3]"=&r"(t3),
  389. [t4]"=&r"(t4), [t5]"=&r"(t5), [t6]"=&r"(t6), [t7]"=&r"(t7)
  390. : [in_int]"r"(in_int)
  391. : "memory"
  392. );
  393. curidx = 9 * qc1;
  394. curidx += qc2 + 40;
  395. curidx2 = 9 * qc3;
  396. curidx2 += qc4 + 40;
  397. v_codes = (p_codes[curidx] << p_bits[curidx2]) | (p_codes[curidx2]);
  398. v_bits = p_bits[curidx] + p_bits[curidx2];
  399. put_bits(pb, v_bits, v_codes);
  400. }
  401. }
  402. static void quantize_and_encode_band_cost_UPAIR7_mips(struct AACEncContext *s,
  403. PutBitContext *pb, const float *in, float *out,
  404. const float *scaled, int size, int scale_idx,
  405. int cb, const float lambda, const float uplim,
  406. int *bits, const float ROUNDING)
  407. {
  408. const float Q34 = ff_aac_pow34sf_tab[POW_SF2_ZERO - scale_idx + SCALE_ONE_POS - SCALE_DIV_512];
  409. int i;
  410. int qc1, qc2, qc3, qc4;
  411. uint8_t *p_bits = (uint8_t*) ff_aac_spectral_bits[cb-1];
  412. uint16_t *p_codes = (uint16_t*)ff_aac_spectral_codes[cb-1];
  413. abs_pow34_v(s->scoefs, in, size);
  414. scaled = s->scoefs;
  415. for (i = 0; i < size; i += 4) {
  416. int curidx, sign1, count1, sign2, count2;
  417. int *in_int = (int *)&in[i];
  418. uint8_t v_bits;
  419. unsigned int v_codes;
  420. int t0, t1, t2, t3, t4;
  421. qc1 = scaled[i ] * Q34 + ROUND_STANDARD;
  422. qc2 = scaled[i+1] * Q34 + ROUND_STANDARD;
  423. qc3 = scaled[i+2] * Q34 + ROUND_STANDARD;
  424. qc4 = scaled[i+3] * Q34 + ROUND_STANDARD;
  425. __asm__ volatile (
  426. ".set push \n\t"
  427. ".set noreorder \n\t"
  428. "ori %[t4], $zero, 7 \n\t"
  429. "ori %[sign1], $zero, 0 \n\t"
  430. "ori %[sign2], $zero, 0 \n\t"
  431. "slt %[t0], %[t4], %[qc1] \n\t"
  432. "slt %[t1], %[t4], %[qc2] \n\t"
  433. "slt %[t2], %[t4], %[qc3] \n\t"
  434. "slt %[t3], %[t4], %[qc4] \n\t"
  435. "movn %[qc1], %[t4], %[t0] \n\t"
  436. "movn %[qc2], %[t4], %[t1] \n\t"
  437. "movn %[qc3], %[t4], %[t2] \n\t"
  438. "movn %[qc4], %[t4], %[t3] \n\t"
  439. "lw %[t0], 0(%[in_int]) \n\t"
  440. "lw %[t1], 4(%[in_int]) \n\t"
  441. "lw %[t2], 8(%[in_int]) \n\t"
  442. "lw %[t3], 12(%[in_int]) \n\t"
  443. "slt %[t0], %[t0], $zero \n\t"
  444. "movn %[sign1], %[t0], %[qc1] \n\t"
  445. "slt %[t2], %[t2], $zero \n\t"
  446. "movn %[sign2], %[t2], %[qc3] \n\t"
  447. "slt %[t1], %[t1], $zero \n\t"
  448. "sll %[t0], %[sign1], 1 \n\t"
  449. "or %[t0], %[t0], %[t1] \n\t"
  450. "movn %[sign1], %[t0], %[qc2] \n\t"
  451. "slt %[t3], %[t3], $zero \n\t"
  452. "sll %[t0], %[sign2], 1 \n\t"
  453. "or %[t0], %[t0], %[t3] \n\t"
  454. "movn %[sign2], %[t0], %[qc4] \n\t"
  455. "slt %[count1], $zero, %[qc1] \n\t"
  456. "slt %[t1], $zero, %[qc2] \n\t"
  457. "slt %[count2], $zero, %[qc3] \n\t"
  458. "slt %[t2], $zero, %[qc4] \n\t"
  459. "addu %[count1], %[count1], %[t1] \n\t"
  460. "addu %[count2], %[count2], %[t2] \n\t"
  461. ".set pop \n\t"
  462. : [qc1]"+r"(qc1), [qc2]"+r"(qc2),
  463. [qc3]"+r"(qc3), [qc4]"+r"(qc4),
  464. [sign1]"=&r"(sign1), [count1]"=&r"(count1),
  465. [sign2]"=&r"(sign2), [count2]"=&r"(count2),
  466. [t0]"=&r"(t0), [t1]"=&r"(t1), [t2]"=&r"(t2), [t3]"=&r"(t3),
  467. [t4]"=&r"(t4)
  468. : [in_int]"r"(in_int)
  469. : "t0", "t1", "t2", "t3", "t4",
  470. "memory"
  471. );
  472. curidx = 8 * qc1;
  473. curidx += qc2;
  474. v_codes = (p_codes[curidx] << count1) | sign1;
  475. v_bits = p_bits[curidx] + count1;
  476. put_bits(pb, v_bits, v_codes);
  477. curidx = 8 * qc3;
  478. curidx += qc4;
  479. v_codes = (p_codes[curidx] << count2) | sign2;
  480. v_bits = p_bits[curidx] + count2;
  481. put_bits(pb, v_bits, v_codes);
  482. }
  483. }
  484. static void quantize_and_encode_band_cost_UPAIR12_mips(struct AACEncContext *s,
  485. PutBitContext *pb, const float *in, float *out,
  486. const float *scaled, int size, int scale_idx,
  487. int cb, const float lambda, const float uplim,
  488. int *bits, const float ROUNDING)
  489. {
  490. const float Q34 = ff_aac_pow34sf_tab[POW_SF2_ZERO - scale_idx + SCALE_ONE_POS - SCALE_DIV_512];
  491. int i;
  492. int qc1, qc2, qc3, qc4;
  493. uint8_t *p_bits = (uint8_t*) ff_aac_spectral_bits[cb-1];
  494. uint16_t *p_codes = (uint16_t*)ff_aac_spectral_codes[cb-1];
  495. abs_pow34_v(s->scoefs, in, size);
  496. scaled = s->scoefs;
  497. for (i = 0; i < size; i += 4) {
  498. int curidx, sign1, count1, sign2, count2;
  499. int *in_int = (int *)&in[i];
  500. uint8_t v_bits;
  501. unsigned int v_codes;
  502. int t0, t1, t2, t3, t4;
  503. qc1 = scaled[i ] * Q34 + ROUND_STANDARD;
  504. qc2 = scaled[i+1] * Q34 + ROUND_STANDARD;
  505. qc3 = scaled[i+2] * Q34 + ROUND_STANDARD;
  506. qc4 = scaled[i+3] * Q34 + ROUND_STANDARD;
  507. __asm__ volatile (
  508. ".set push \n\t"
  509. ".set noreorder \n\t"
  510. "ori %[t4], $zero, 12 \n\t"
  511. "ori %[sign1], $zero, 0 \n\t"
  512. "ori %[sign2], $zero, 0 \n\t"
  513. "slt %[t0], %[t4], %[qc1] \n\t"
  514. "slt %[t1], %[t4], %[qc2] \n\t"
  515. "slt %[t2], %[t4], %[qc3] \n\t"
  516. "slt %[t3], %[t4], %[qc4] \n\t"
  517. "movn %[qc1], %[t4], %[t0] \n\t"
  518. "movn %[qc2], %[t4], %[t1] \n\t"
  519. "movn %[qc3], %[t4], %[t2] \n\t"
  520. "movn %[qc4], %[t4], %[t3] \n\t"
  521. "lw %[t0], 0(%[in_int]) \n\t"
  522. "lw %[t1], 4(%[in_int]) \n\t"
  523. "lw %[t2], 8(%[in_int]) \n\t"
  524. "lw %[t3], 12(%[in_int]) \n\t"
  525. "slt %[t0], %[t0], $zero \n\t"
  526. "movn %[sign1], %[t0], %[qc1] \n\t"
  527. "slt %[t2], %[t2], $zero \n\t"
  528. "movn %[sign2], %[t2], %[qc3] \n\t"
  529. "slt %[t1], %[t1], $zero \n\t"
  530. "sll %[t0], %[sign1], 1 \n\t"
  531. "or %[t0], %[t0], %[t1] \n\t"
  532. "movn %[sign1], %[t0], %[qc2] \n\t"
  533. "slt %[t3], %[t3], $zero \n\t"
  534. "sll %[t0], %[sign2], 1 \n\t"
  535. "or %[t0], %[t0], %[t3] \n\t"
  536. "movn %[sign2], %[t0], %[qc4] \n\t"
  537. "slt %[count1], $zero, %[qc1] \n\t"
  538. "slt %[t1], $zero, %[qc2] \n\t"
  539. "slt %[count2], $zero, %[qc3] \n\t"
  540. "slt %[t2], $zero, %[qc4] \n\t"
  541. "addu %[count1], %[count1], %[t1] \n\t"
  542. "addu %[count2], %[count2], %[t2] \n\t"
  543. ".set pop \n\t"
  544. : [qc1]"+r"(qc1), [qc2]"+r"(qc2),
  545. [qc3]"+r"(qc3), [qc4]"+r"(qc4),
  546. [sign1]"=&r"(sign1), [count1]"=&r"(count1),
  547. [sign2]"=&r"(sign2), [count2]"=&r"(count2),
  548. [t0]"=&r"(t0), [t1]"=&r"(t1), [t2]"=&r"(t2), [t3]"=&r"(t3),
  549. [t4]"=&r"(t4)
  550. : [in_int]"r"(in_int)
  551. : "memory"
  552. );
  553. curidx = 13 * qc1;
  554. curidx += qc2;
  555. v_codes = (p_codes[curidx] << count1) | sign1;
  556. v_bits = p_bits[curidx] + count1;
  557. put_bits(pb, v_bits, v_codes);
  558. curidx = 13 * qc3;
  559. curidx += qc4;
  560. v_codes = (p_codes[curidx] << count2) | sign2;
  561. v_bits = p_bits[curidx] + count2;
  562. put_bits(pb, v_bits, v_codes);
  563. }
  564. }
  565. static void quantize_and_encode_band_cost_ESC_mips(struct AACEncContext *s,
  566. PutBitContext *pb, const float *in, float *out,
  567. const float *scaled, int size, int scale_idx,
  568. int cb, const float lambda, const float uplim,
  569. int *bits, const float ROUNDING)
  570. {
  571. const float Q34 = ff_aac_pow34sf_tab[POW_SF2_ZERO - scale_idx + SCALE_ONE_POS - SCALE_DIV_512];
  572. int i;
  573. int qc1, qc2, qc3, qc4;
  574. uint8_t *p_bits = (uint8_t* )ff_aac_spectral_bits[cb-1];
  575. uint16_t *p_codes = (uint16_t*)ff_aac_spectral_codes[cb-1];
  576. float *p_vectors = (float* )ff_aac_codebook_vectors[cb-1];
  577. abs_pow34_v(s->scoefs, in, size);
  578. scaled = s->scoefs;
  579. if (cb < 11) {
  580. for (i = 0; i < size; i += 4) {
  581. int curidx, curidx2, sign1, count1, sign2, count2;
  582. int *in_int = (int *)&in[i];
  583. uint8_t v_bits;
  584. unsigned int v_codes;
  585. int t0, t1, t2, t3, t4;
  586. qc1 = scaled[i ] * Q34 + ROUNDING;
  587. qc2 = scaled[i+1] * Q34 + ROUNDING;
  588. qc3 = scaled[i+2] * Q34 + ROUNDING;
  589. qc4 = scaled[i+3] * Q34 + ROUNDING;
  590. __asm__ volatile (
  591. ".set push \n\t"
  592. ".set noreorder \n\t"
  593. "ori %[t4], $zero, 16 \n\t"
  594. "ori %[sign1], $zero, 0 \n\t"
  595. "ori %[sign2], $zero, 0 \n\t"
  596. "slt %[t0], %[t4], %[qc1] \n\t"
  597. "slt %[t1], %[t4], %[qc2] \n\t"
  598. "slt %[t2], %[t4], %[qc3] \n\t"
  599. "slt %[t3], %[t4], %[qc4] \n\t"
  600. "movn %[qc1], %[t4], %[t0] \n\t"
  601. "movn %[qc2], %[t4], %[t1] \n\t"
  602. "movn %[qc3], %[t4], %[t2] \n\t"
  603. "movn %[qc4], %[t4], %[t3] \n\t"
  604. "lw %[t0], 0(%[in_int]) \n\t"
  605. "lw %[t1], 4(%[in_int]) \n\t"
  606. "lw %[t2], 8(%[in_int]) \n\t"
  607. "lw %[t3], 12(%[in_int]) \n\t"
  608. "slt %[t0], %[t0], $zero \n\t"
  609. "movn %[sign1], %[t0], %[qc1] \n\t"
  610. "slt %[t2], %[t2], $zero \n\t"
  611. "movn %[sign2], %[t2], %[qc3] \n\t"
  612. "slt %[t1], %[t1], $zero \n\t"
  613. "sll %[t0], %[sign1], 1 \n\t"
  614. "or %[t0], %[t0], %[t1] \n\t"
  615. "movn %[sign1], %[t0], %[qc2] \n\t"
  616. "slt %[t3], %[t3], $zero \n\t"
  617. "sll %[t0], %[sign2], 1 \n\t"
  618. "or %[t0], %[t0], %[t3] \n\t"
  619. "movn %[sign2], %[t0], %[qc4] \n\t"
  620. "slt %[count1], $zero, %[qc1] \n\t"
  621. "slt %[t1], $zero, %[qc2] \n\t"
  622. "slt %[count2], $zero, %[qc3] \n\t"
  623. "slt %[t2], $zero, %[qc4] \n\t"
  624. "addu %[count1], %[count1], %[t1] \n\t"
  625. "addu %[count2], %[count2], %[t2] \n\t"
  626. ".set pop \n\t"
  627. : [qc1]"+r"(qc1), [qc2]"+r"(qc2),
  628. [qc3]"+r"(qc3), [qc4]"+r"(qc4),
  629. [sign1]"=&r"(sign1), [count1]"=&r"(count1),
  630. [sign2]"=&r"(sign2), [count2]"=&r"(count2),
  631. [t0]"=&r"(t0), [t1]"=&r"(t1), [t2]"=&r"(t2), [t3]"=&r"(t3),
  632. [t4]"=&r"(t4)
  633. : [in_int]"r"(in_int)
  634. : "memory"
  635. );
  636. curidx = 17 * qc1;
  637. curidx += qc2;
  638. curidx2 = 17 * qc3;
  639. curidx2 += qc4;
  640. v_codes = (p_codes[curidx] << count1) | sign1;
  641. v_bits = p_bits[curidx] + count1;
  642. put_bits(pb, v_bits, v_codes);
  643. v_codes = (p_codes[curidx2] << count2) | sign2;
  644. v_bits = p_bits[curidx2] + count2;
  645. put_bits(pb, v_bits, v_codes);
  646. }
  647. } else {
  648. for (i = 0; i < size; i += 4) {
  649. int curidx, curidx2, sign1, count1, sign2, count2;
  650. int *in_int = (int *)&in[i];
  651. uint8_t v_bits;
  652. unsigned int v_codes;
  653. int c1, c2, c3, c4;
  654. int t0, t1, t2, t3, t4;
  655. qc1 = scaled[i ] * Q34 + ROUNDING;
  656. qc2 = scaled[i+1] * Q34 + ROUNDING;
  657. qc3 = scaled[i+2] * Q34 + ROUNDING;
  658. qc4 = scaled[i+3] * Q34 + ROUNDING;
  659. __asm__ volatile (
  660. ".set push \n\t"
  661. ".set noreorder \n\t"
  662. "ori %[t4], $zero, 16 \n\t"
  663. "ori %[sign1], $zero, 0 \n\t"
  664. "ori %[sign2], $zero, 0 \n\t"
  665. "shll_s.w %[c1], %[qc1], 18 \n\t"
  666. "shll_s.w %[c2], %[qc2], 18 \n\t"
  667. "shll_s.w %[c3], %[qc3], 18 \n\t"
  668. "shll_s.w %[c4], %[qc4], 18 \n\t"
  669. "srl %[c1], %[c1], 18 \n\t"
  670. "srl %[c2], %[c2], 18 \n\t"
  671. "srl %[c3], %[c3], 18 \n\t"
  672. "srl %[c4], %[c4], 18 \n\t"
  673. "slt %[t0], %[t4], %[qc1] \n\t"
  674. "slt %[t1], %[t4], %[qc2] \n\t"
  675. "slt %[t2], %[t4], %[qc3] \n\t"
  676. "slt %[t3], %[t4], %[qc4] \n\t"
  677. "movn %[qc1], %[t4], %[t0] \n\t"
  678. "movn %[qc2], %[t4], %[t1] \n\t"
  679. "movn %[qc3], %[t4], %[t2] \n\t"
  680. "movn %[qc4], %[t4], %[t3] \n\t"
  681. "lw %[t0], 0(%[in_int]) \n\t"
  682. "lw %[t1], 4(%[in_int]) \n\t"
  683. "lw %[t2], 8(%[in_int]) \n\t"
  684. "lw %[t3], 12(%[in_int]) \n\t"
  685. "slt %[t0], %[t0], $zero \n\t"
  686. "movn %[sign1], %[t0], %[qc1] \n\t"
  687. "slt %[t2], %[t2], $zero \n\t"
  688. "movn %[sign2], %[t2], %[qc3] \n\t"
  689. "slt %[t1], %[t1], $zero \n\t"
  690. "sll %[t0], %[sign1], 1 \n\t"
  691. "or %[t0], %[t0], %[t1] \n\t"
  692. "movn %[sign1], %[t0], %[qc2] \n\t"
  693. "slt %[t3], %[t3], $zero \n\t"
  694. "sll %[t0], %[sign2], 1 \n\t"
  695. "or %[t0], %[t0], %[t3] \n\t"
  696. "movn %[sign2], %[t0], %[qc4] \n\t"
  697. "slt %[count1], $zero, %[qc1] \n\t"
  698. "slt %[t1], $zero, %[qc2] \n\t"
  699. "slt %[count2], $zero, %[qc3] \n\t"
  700. "slt %[t2], $zero, %[qc4] \n\t"
  701. "addu %[count1], %[count1], %[t1] \n\t"
  702. "addu %[count2], %[count2], %[t2] \n\t"
  703. ".set pop \n\t"
  704. : [qc1]"+r"(qc1), [qc2]"+r"(qc2),
  705. [qc3]"+r"(qc3), [qc4]"+r"(qc4),
  706. [sign1]"=&r"(sign1), [count1]"=&r"(count1),
  707. [sign2]"=&r"(sign2), [count2]"=&r"(count2),
  708. [c1]"=&r"(c1), [c2]"=&r"(c2),
  709. [c3]"=&r"(c3), [c4]"=&r"(c4),
  710. [t0]"=&r"(t0), [t1]"=&r"(t1), [t2]"=&r"(t2), [t3]"=&r"(t3),
  711. [t4]"=&r"(t4)
  712. : [in_int]"r"(in_int)
  713. : "memory"
  714. );
  715. curidx = 17 * qc1;
  716. curidx += qc2;
  717. curidx2 = 17 * qc3;
  718. curidx2 += qc4;
  719. v_codes = (p_codes[curidx] << count1) | sign1;
  720. v_bits = p_bits[curidx] + count1;
  721. put_bits(pb, v_bits, v_codes);
  722. if (p_vectors[curidx*2 ] == 64.0f) {
  723. int len = av_log2(c1);
  724. v_codes = (((1 << (len - 3)) - 2) << len) | (c1 & ((1 << len) - 1));
  725. put_bits(pb, len * 2 - 3, v_codes);
  726. }
  727. if (p_vectors[curidx*2+1] == 64.0f) {
  728. int len = av_log2(c2);
  729. v_codes = (((1 << (len - 3)) - 2) << len) | (c2 & ((1 << len) - 1));
  730. put_bits(pb, len*2-3, v_codes);
  731. }
  732. v_codes = (p_codes[curidx2] << count2) | sign2;
  733. v_bits = p_bits[curidx2] + count2;
  734. put_bits(pb, v_bits, v_codes);
  735. if (p_vectors[curidx2*2 ] == 64.0f) {
  736. int len = av_log2(c3);
  737. v_codes = (((1 << (len - 3)) - 2) << len) | (c3 & ((1 << len) - 1));
  738. put_bits(pb, len* 2 - 3, v_codes);
  739. }
  740. if (p_vectors[curidx2*2+1] == 64.0f) {
  741. int len = av_log2(c4);
  742. v_codes = (((1 << (len - 3)) - 2) << len) | (c4 & ((1 << len) - 1));
  743. put_bits(pb, len * 2 - 3, v_codes);
  744. }
  745. }
  746. }
  747. }
  748. static void quantize_and_encode_band_cost_NONE_mips(struct AACEncContext *s,
  749. PutBitContext *pb, const float *in, float *out,
  750. const float *scaled, int size, int scale_idx,
  751. int cb, const float lambda, const float uplim,
  752. int *bits, const float ROUNDING) {
  753. av_assert0(0);
  754. }
  755. static void quantize_and_encode_band_cost_ZERO_mips(struct AACEncContext *s,
  756. PutBitContext *pb, const float *in, float *out,
  757. const float *scaled, int size, int scale_idx,
  758. int cb, const float lambda, const float uplim,
  759. int *bits, const float ROUNDING) {
  760. int i;
  761. if (bits)
  762. *bits = 0;
  763. if (out) {
  764. for (i = 0; i < size; i += 4) {
  765. out[i ] = 0.0f;
  766. out[i+1] = 0.0f;
  767. out[i+2] = 0.0f;
  768. out[i+3] = 0.0f;
  769. }
  770. }
  771. }
  772. static void (*const quantize_and_encode_band_cost_arr[])(struct AACEncContext *s,
  773. PutBitContext *pb, const float *in, float *out,
  774. const float *scaled, int size, int scale_idx,
  775. int cb, const float lambda, const float uplim,
  776. int *bits, const float ROUNDING) = {
  777. quantize_and_encode_band_cost_ZERO_mips,
  778. quantize_and_encode_band_cost_SQUAD_mips,
  779. quantize_and_encode_band_cost_SQUAD_mips,
  780. quantize_and_encode_band_cost_UQUAD_mips,
  781. quantize_and_encode_band_cost_UQUAD_mips,
  782. quantize_and_encode_band_cost_SPAIR_mips,
  783. quantize_and_encode_band_cost_SPAIR_mips,
  784. quantize_and_encode_band_cost_UPAIR7_mips,
  785. quantize_and_encode_band_cost_UPAIR7_mips,
  786. quantize_and_encode_band_cost_UPAIR12_mips,
  787. quantize_and_encode_band_cost_UPAIR12_mips,
  788. quantize_and_encode_band_cost_ESC_mips,
  789. quantize_and_encode_band_cost_NONE_mips, /* cb 12 doesn't exist */
  790. quantize_and_encode_band_cost_ZERO_mips,
  791. quantize_and_encode_band_cost_ZERO_mips,
  792. quantize_and_encode_band_cost_ZERO_mips,
  793. };
  794. #define quantize_and_encode_band_cost( \
  795. s, pb, in, out, scaled, size, scale_idx, cb, \
  796. lambda, uplim, bits, ROUNDING) \
  797. quantize_and_encode_band_cost_arr[cb]( \
  798. s, pb, in, out, scaled, size, scale_idx, cb, \
  799. lambda, uplim, bits, ROUNDING)
  800. static void quantize_and_encode_band_mips(struct AACEncContext *s, PutBitContext *pb,
  801. const float *in, float *out, int size, int scale_idx,
  802. int cb, const float lambda, int rtz)
  803. {
  804. quantize_and_encode_band_cost(s, pb, in, out, NULL, size, scale_idx, cb, lambda,
  805. INFINITY, NULL, (rtz) ? ROUND_TO_ZERO : ROUND_STANDARD);
  806. }
  807. /**
  808. * Functions developed from template function and optimized for getting the number of bits
  809. */
  810. static float get_band_numbits_ZERO_mips(struct AACEncContext *s,
  811. PutBitContext *pb, const float *in,
  812. const float *scaled, int size, int scale_idx,
  813. int cb, const float lambda, const float uplim,
  814. int *bits)
  815. {
  816. return 0;
  817. }
  818. static float get_band_numbits_NONE_mips(struct AACEncContext *s,
  819. PutBitContext *pb, const float *in,
  820. const float *scaled, int size, int scale_idx,
  821. int cb, const float lambda, const float uplim,
  822. int *bits)
  823. {
  824. av_assert0(0);
  825. return 0;
  826. }
  827. static float get_band_numbits_SQUAD_mips(struct AACEncContext *s,
  828. PutBitContext *pb, const float *in,
  829. const float *scaled, int size, int scale_idx,
  830. int cb, const float lambda, const float uplim,
  831. int *bits)
  832. {
  833. const float Q34 = ff_aac_pow34sf_tab[POW_SF2_ZERO - scale_idx + SCALE_ONE_POS - SCALE_DIV_512];
  834. int i;
  835. int qc1, qc2, qc3, qc4;
  836. int curbits = 0;
  837. uint8_t *p_bits = (uint8_t *)ff_aac_spectral_bits[cb-1];
  838. for (i = 0; i < size; i += 4) {
  839. int curidx;
  840. int *in_int = (int *)&in[i];
  841. int t0, t1, t2, t3, t4, t5, t6, t7;
  842. qc1 = scaled[i ] * Q34 + ROUND_STANDARD;
  843. qc2 = scaled[i+1] * Q34 + ROUND_STANDARD;
  844. qc3 = scaled[i+2] * Q34 + ROUND_STANDARD;
  845. qc4 = scaled[i+3] * Q34 + ROUND_STANDARD;
  846. __asm__ volatile (
  847. ".set push \n\t"
  848. ".set noreorder \n\t"
  849. "slt %[qc1], $zero, %[qc1] \n\t"
  850. "slt %[qc2], $zero, %[qc2] \n\t"
  851. "slt %[qc3], $zero, %[qc3] \n\t"
  852. "slt %[qc4], $zero, %[qc4] \n\t"
  853. "lw %[t0], 0(%[in_int]) \n\t"
  854. "lw %[t1], 4(%[in_int]) \n\t"
  855. "lw %[t2], 8(%[in_int]) \n\t"
  856. "lw %[t3], 12(%[in_int]) \n\t"
  857. "srl %[t0], %[t0], 31 \n\t"
  858. "srl %[t1], %[t1], 31 \n\t"
  859. "srl %[t2], %[t2], 31 \n\t"
  860. "srl %[t3], %[t3], 31 \n\t"
  861. "subu %[t4], $zero, %[qc1] \n\t"
  862. "subu %[t5], $zero, %[qc2] \n\t"
  863. "subu %[t6], $zero, %[qc3] \n\t"
  864. "subu %[t7], $zero, %[qc4] \n\t"
  865. "movn %[qc1], %[t4], %[t0] \n\t"
  866. "movn %[qc2], %[t5], %[t1] \n\t"
  867. "movn %[qc3], %[t6], %[t2] \n\t"
  868. "movn %[qc4], %[t7], %[t3] \n\t"
  869. ".set pop \n\t"
  870. : [qc1]"+r"(qc1), [qc2]"+r"(qc2),
  871. [qc3]"+r"(qc3), [qc4]"+r"(qc4),
  872. [t0]"=&r"(t0), [t1]"=&r"(t1), [t2]"=&r"(t2), [t3]"=&r"(t3),
  873. [t4]"=&r"(t4), [t5]"=&r"(t5), [t6]"=&r"(t6), [t7]"=&r"(t7)
  874. : [in_int]"r"(in_int)
  875. : "memory"
  876. );
  877. curidx = qc1;
  878. curidx *= 3;
  879. curidx += qc2;
  880. curidx *= 3;
  881. curidx += qc3;
  882. curidx *= 3;
  883. curidx += qc4;
  884. curidx += 40;
  885. curbits += p_bits[curidx];
  886. }
  887. return curbits;
  888. }
  889. static float get_band_numbits_UQUAD_mips(struct AACEncContext *s,
  890. PutBitContext *pb, const float *in,
  891. const float *scaled, int size, int scale_idx,
  892. int cb, const float lambda, const float uplim,
  893. int *bits)
  894. {
  895. const float Q34 = ff_aac_pow34sf_tab[POW_SF2_ZERO - scale_idx + SCALE_ONE_POS - SCALE_DIV_512];
  896. int i;
  897. int curbits = 0;
  898. int qc1, qc2, qc3, qc4;
  899. uint8_t *p_bits = (uint8_t *)ff_aac_spectral_bits[cb-1];
  900. for (i = 0; i < size; i += 4) {
  901. int curidx;
  902. int t0, t1, t2, t3, t4;
  903. qc1 = scaled[i ] * Q34 + ROUND_STANDARD;
  904. qc2 = scaled[i+1] * Q34 + ROUND_STANDARD;
  905. qc3 = scaled[i+2] * Q34 + ROUND_STANDARD;
  906. qc4 = scaled[i+3] * Q34 + ROUND_STANDARD;
  907. __asm__ volatile (
  908. ".set push \n\t"
  909. ".set noreorder \n\t"
  910. "ori %[t4], $zero, 2 \n\t"
  911. "slt %[t0], %[t4], %[qc1] \n\t"
  912. "slt %[t1], %[t4], %[qc2] \n\t"
  913. "slt %[t2], %[t4], %[qc3] \n\t"
  914. "slt %[t3], %[t4], %[qc4] \n\t"
  915. "movn %[qc1], %[t4], %[t0] \n\t"
  916. "movn %[qc2], %[t4], %[t1] \n\t"
  917. "movn %[qc3], %[t4], %[t2] \n\t"
  918. "movn %[qc4], %[t4], %[t3] \n\t"
  919. ".set pop \n\t"
  920. : [qc1]"+r"(qc1), [qc2]"+r"(qc2),
  921. [qc3]"+r"(qc3), [qc4]"+r"(qc4),
  922. [t0]"=&r"(t0), [t1]"=&r"(t1), [t2]"=&r"(t2), [t3]"=&r"(t3),
  923. [t4]"=&r"(t4)
  924. );
  925. curidx = qc1;
  926. curidx *= 3;
  927. curidx += qc2;
  928. curidx *= 3;
  929. curidx += qc3;
  930. curidx *= 3;
  931. curidx += qc4;
  932. curbits += p_bits[curidx];
  933. curbits += uquad_sign_bits[curidx];
  934. }
  935. return curbits;
  936. }
  937. static float get_band_numbits_SPAIR_mips(struct AACEncContext *s,
  938. PutBitContext *pb, const float *in,
  939. const float *scaled, int size, int scale_idx,
  940. int cb, const float lambda, const float uplim,
  941. int *bits)
  942. {
  943. const float Q34 = ff_aac_pow34sf_tab[POW_SF2_ZERO - scale_idx + SCALE_ONE_POS - SCALE_DIV_512];
  944. int i;
  945. int qc1, qc2, qc3, qc4;
  946. int curbits = 0;
  947. uint8_t *p_bits = (uint8_t*)ff_aac_spectral_bits[cb-1];
  948. for (i = 0; i < size; i += 4) {
  949. int curidx, curidx2;
  950. int *in_int = (int *)&in[i];
  951. int t0, t1, t2, t3, t4, t5, t6, t7;
  952. qc1 = scaled[i ] * Q34 + ROUND_STANDARD;
  953. qc2 = scaled[i+1] * Q34 + ROUND_STANDARD;
  954. qc3 = scaled[i+2] * Q34 + ROUND_STANDARD;
  955. qc4 = scaled[i+3] * Q34 + ROUND_STANDARD;
  956. __asm__ volatile (
  957. ".set push \n\t"
  958. ".set noreorder \n\t"
  959. "ori %[t4], $zero, 4 \n\t"
  960. "slt %[t0], %[t4], %[qc1] \n\t"
  961. "slt %[t1], %[t4], %[qc2] \n\t"
  962. "slt %[t2], %[t4], %[qc3] \n\t"
  963. "slt %[t3], %[t4], %[qc4] \n\t"
  964. "movn %[qc1], %[t4], %[t0] \n\t"
  965. "movn %[qc2], %[t4], %[t1] \n\t"
  966. "movn %[qc3], %[t4], %[t2] \n\t"
  967. "movn %[qc4], %[t4], %[t3] \n\t"
  968. "lw %[t0], 0(%[in_int]) \n\t"
  969. "lw %[t1], 4(%[in_int]) \n\t"
  970. "lw %[t2], 8(%[in_int]) \n\t"
  971. "lw %[t3], 12(%[in_int]) \n\t"
  972. "srl %[t0], %[t0], 31 \n\t"
  973. "srl %[t1], %[t1], 31 \n\t"
  974. "srl %[t2], %[t2], 31 \n\t"
  975. "srl %[t3], %[t3], 31 \n\t"
  976. "subu %[t4], $zero, %[qc1] \n\t"
  977. "subu %[t5], $zero, %[qc2] \n\t"
  978. "subu %[t6], $zero, %[qc3] \n\t"
  979. "subu %[t7], $zero, %[qc4] \n\t"
  980. "movn %[qc1], %[t4], %[t0] \n\t"
  981. "movn %[qc2], %[t5], %[t1] \n\t"
  982. "movn %[qc3], %[t6], %[t2] \n\t"
  983. "movn %[qc4], %[t7], %[t3] \n\t"
  984. ".set pop \n\t"
  985. : [qc1]"+r"(qc1), [qc2]"+r"(qc2),
  986. [qc3]"+r"(qc3), [qc4]"+r"(qc4),
  987. [t0]"=&r"(t0), [t1]"=&r"(t1), [t2]"=&r"(t2), [t3]"=&r"(t3),
  988. [t4]"=&r"(t4), [t5]"=&r"(t5), [t6]"=&r"(t6), [t7]"=&r"(t7)
  989. : [in_int]"r"(in_int)
  990. : "memory"
  991. );
  992. curidx = 9 * qc1;
  993. curidx += qc2 + 40;
  994. curidx2 = 9 * qc3;
  995. curidx2 += qc4 + 40;
  996. curbits += p_bits[curidx] + p_bits[curidx2];
  997. }
  998. return curbits;
  999. }
  1000. static float get_band_numbits_UPAIR7_mips(struct AACEncContext *s,
  1001. PutBitContext *pb, const float *in,
  1002. const float *scaled, int size, int scale_idx,
  1003. int cb, const float lambda, const float uplim,
  1004. int *bits)
  1005. {
  1006. const float Q34 = ff_aac_pow34sf_tab[POW_SF2_ZERO - scale_idx + SCALE_ONE_POS - SCALE_DIV_512];
  1007. int i;
  1008. int qc1, qc2, qc3, qc4;
  1009. int curbits = 0;
  1010. uint8_t *p_bits = (uint8_t *)ff_aac_spectral_bits[cb-1];
  1011. for (i = 0; i < size; i += 4) {
  1012. int curidx, curidx2;
  1013. int t0, t1, t2, t3, t4;
  1014. qc1 = scaled[i ] * Q34 + ROUND_STANDARD;
  1015. qc2 = scaled[i+1] * Q34 + ROUND_STANDARD;
  1016. qc3 = scaled[i+2] * Q34 + ROUND_STANDARD;
  1017. qc4 = scaled[i+3] * Q34 + ROUND_STANDARD;
  1018. __asm__ volatile (
  1019. ".set push \n\t"
  1020. ".set noreorder \n\t"
  1021. "ori %[t4], $zero, 7 \n\t"
  1022. "slt %[t0], %[t4], %[qc1] \n\t"
  1023. "slt %[t1], %[t4], %[qc2] \n\t"
  1024. "slt %[t2], %[t4], %[qc3] \n\t"
  1025. "slt %[t3], %[t4], %[qc4] \n\t"
  1026. "movn %[qc1], %[t4], %[t0] \n\t"
  1027. "movn %[qc2], %[t4], %[t1] \n\t"
  1028. "movn %[qc3], %[t4], %[t2] \n\t"
  1029. "movn %[qc4], %[t4], %[t3] \n\t"
  1030. ".set pop \n\t"
  1031. : [qc1]"+r"(qc1), [qc2]"+r"(qc2),
  1032. [qc3]"+r"(qc3), [qc4]"+r"(qc4),
  1033. [t0]"=&r"(t0), [t1]"=&r"(t1), [t2]"=&r"(t2), [t3]"=&r"(t3),
  1034. [t4]"=&r"(t4)
  1035. );
  1036. curidx = 8 * qc1;
  1037. curidx += qc2;
  1038. curidx2 = 8 * qc3;
  1039. curidx2 += qc4;
  1040. curbits += p_bits[curidx] +
  1041. upair7_sign_bits[curidx] +
  1042. p_bits[curidx2] +
  1043. upair7_sign_bits[curidx2];
  1044. }
  1045. return curbits;
  1046. }
  1047. static float get_band_numbits_UPAIR12_mips(struct AACEncContext *s,
  1048. PutBitContext *pb, const float *in,
  1049. const float *scaled, int size, int scale_idx,
  1050. int cb, const float lambda, const float uplim,
  1051. int *bits)
  1052. {
  1053. const float Q34 = ff_aac_pow34sf_tab[POW_SF2_ZERO - scale_idx + SCALE_ONE_POS - SCALE_DIV_512];
  1054. int i;
  1055. int qc1, qc2, qc3, qc4;
  1056. int curbits = 0;
  1057. uint8_t *p_bits = (uint8_t *)ff_aac_spectral_bits[cb-1];
  1058. for (i = 0; i < size; i += 4) {
  1059. int curidx, curidx2;
  1060. int t0, t1, t2, t3, t4;
  1061. qc1 = scaled[i ] * Q34 + ROUND_STANDARD;
  1062. qc2 = scaled[i+1] * Q34 + ROUND_STANDARD;
  1063. qc3 = scaled[i+2] * Q34 + ROUND_STANDARD;
  1064. qc4 = scaled[i+3] * Q34 + ROUND_STANDARD;
  1065. __asm__ volatile (
  1066. ".set push \n\t"
  1067. ".set noreorder \n\t"
  1068. "ori %[t4], $zero, 12 \n\t"
  1069. "slt %[t0], %[t4], %[qc1] \n\t"
  1070. "slt %[t1], %[t4], %[qc2] \n\t"
  1071. "slt %[t2], %[t4], %[qc3] \n\t"
  1072. "slt %[t3], %[t4], %[qc4] \n\t"
  1073. "movn %[qc1], %[t4], %[t0] \n\t"
  1074. "movn %[qc2], %[t4], %[t1] \n\t"
  1075. "movn %[qc3], %[t4], %[t2] \n\t"
  1076. "movn %[qc4], %[t4], %[t3] \n\t"
  1077. ".set pop \n\t"
  1078. : [qc1]"+r"(qc1), [qc2]"+r"(qc2),
  1079. [qc3]"+r"(qc3), [qc4]"+r"(qc4),
  1080. [t0]"=&r"(t0), [t1]"=&r"(t1), [t2]"=&r"(t2), [t3]"=&r"(t3),
  1081. [t4]"=&r"(t4)
  1082. );
  1083. curidx = 13 * qc1;
  1084. curidx += qc2;
  1085. curidx2 = 13 * qc3;
  1086. curidx2 += qc4;
  1087. curbits += p_bits[curidx] +
  1088. p_bits[curidx2] +
  1089. upair12_sign_bits[curidx] +
  1090. upair12_sign_bits[curidx2];
  1091. }
  1092. return curbits;
  1093. }
  1094. static float get_band_numbits_ESC_mips(struct AACEncContext *s,
  1095. PutBitContext *pb, const float *in,
  1096. const float *scaled, int size, int scale_idx,
  1097. int cb, const float lambda, const float uplim,
  1098. int *bits)
  1099. {
  1100. const float Q34 = ff_aac_pow34sf_tab[POW_SF2_ZERO - scale_idx + SCALE_ONE_POS - SCALE_DIV_512];
  1101. int i;
  1102. int qc1, qc2, qc3, qc4;
  1103. int curbits = 0;
  1104. uint8_t *p_bits = (uint8_t*)ff_aac_spectral_bits[cb-1];
  1105. for (i = 0; i < size; i += 4) {
  1106. int curidx, curidx2;
  1107. int cond0, cond1, cond2, cond3;
  1108. int c1, c2, c3, c4;
  1109. int t4, t5;
  1110. qc1 = scaled[i ] * Q34 + ROUND_STANDARD;
  1111. qc2 = scaled[i+1] * Q34 + ROUND_STANDARD;
  1112. qc3 = scaled[i+2] * Q34 + ROUND_STANDARD;
  1113. qc4 = scaled[i+3] * Q34 + ROUND_STANDARD;
  1114. __asm__ volatile (
  1115. ".set push \n\t"
  1116. ".set noreorder \n\t"
  1117. "ori %[t4], $zero, 15 \n\t"
  1118. "ori %[t5], $zero, 16 \n\t"
  1119. "shll_s.w %[c1], %[qc1], 18 \n\t"
  1120. "shll_s.w %[c2], %[qc2], 18 \n\t"
  1121. "shll_s.w %[c3], %[qc3], 18 \n\t"
  1122. "shll_s.w %[c4], %[qc4], 18 \n\t"
  1123. "srl %[c1], %[c1], 18 \n\t"
  1124. "srl %[c2], %[c2], 18 \n\t"
  1125. "srl %[c3], %[c3], 18 \n\t"
  1126. "srl %[c4], %[c4], 18 \n\t"
  1127. "slt %[cond0], %[t4], %[qc1] \n\t"
  1128. "slt %[cond1], %[t4], %[qc2] \n\t"
  1129. "slt %[cond2], %[t4], %[qc3] \n\t"
  1130. "slt %[cond3], %[t4], %[qc4] \n\t"
  1131. "movn %[qc1], %[t5], %[cond0] \n\t"
  1132. "movn %[qc2], %[t5], %[cond1] \n\t"
  1133. "movn %[qc3], %[t5], %[cond2] \n\t"
  1134. "movn %[qc4], %[t5], %[cond3] \n\t"
  1135. "ori %[t5], $zero, 31 \n\t"
  1136. "clz %[c1], %[c1] \n\t"
  1137. "clz %[c2], %[c2] \n\t"
  1138. "clz %[c3], %[c3] \n\t"
  1139. "clz %[c4], %[c4] \n\t"
  1140. "subu %[c1], %[t5], %[c1] \n\t"
  1141. "subu %[c2], %[t5], %[c2] \n\t"
  1142. "subu %[c3], %[t5], %[c3] \n\t"
  1143. "subu %[c4], %[t5], %[c4] \n\t"
  1144. "sll %[c1], %[c1], 1 \n\t"
  1145. "sll %[c2], %[c2], 1 \n\t"
  1146. "sll %[c3], %[c3], 1 \n\t"
  1147. "sll %[c4], %[c4], 1 \n\t"
  1148. "addiu %[c1], %[c1], -3 \n\t"
  1149. "addiu %[c2], %[c2], -3 \n\t"
  1150. "addiu %[c3], %[c3], -3 \n\t"
  1151. "addiu %[c4], %[c4], -3 \n\t"
  1152. "subu %[cond0], $zero, %[cond0] \n\t"
  1153. "subu %[cond1], $zero, %[cond1] \n\t"
  1154. "subu %[cond2], $zero, %[cond2] \n\t"
  1155. "subu %[cond3], $zero, %[cond3] \n\t"
  1156. "and %[c1], %[c1], %[cond0] \n\t"
  1157. "and %[c2], %[c2], %[cond1] \n\t"
  1158. "and %[c3], %[c3], %[cond2] \n\t"
  1159. "and %[c4], %[c4], %[cond3] \n\t"
  1160. ".set pop \n\t"
  1161. : [qc1]"+r"(qc1), [qc2]"+r"(qc2),
  1162. [qc3]"+r"(qc3), [qc4]"+r"(qc4),
  1163. [cond0]"=&r"(cond0), [cond1]"=&r"(cond1),
  1164. [cond2]"=&r"(cond2), [cond3]"=&r"(cond3),
  1165. [c1]"=&r"(c1), [c2]"=&r"(c2),
  1166. [c3]"=&r"(c3), [c4]"=&r"(c4),
  1167. [t4]"=&r"(t4), [t5]"=&r"(t5)
  1168. );
  1169. curidx = 17 * qc1;
  1170. curidx += qc2;
  1171. curidx2 = 17 * qc3;
  1172. curidx2 += qc4;
  1173. curbits += p_bits[curidx];
  1174. curbits += esc_sign_bits[curidx];
  1175. curbits += p_bits[curidx2];
  1176. curbits += esc_sign_bits[curidx2];
  1177. curbits += c1;
  1178. curbits += c2;
  1179. curbits += c3;
  1180. curbits += c4;
  1181. }
  1182. return curbits;
  1183. }
  1184. static float (*const get_band_numbits_arr[])(struct AACEncContext *s,
  1185. PutBitContext *pb, const float *in,
  1186. const float *scaled, int size, int scale_idx,
  1187. int cb, const float lambda, const float uplim,
  1188. int *bits) = {
  1189. get_band_numbits_ZERO_mips,
  1190. get_band_numbits_SQUAD_mips,
  1191. get_band_numbits_SQUAD_mips,
  1192. get_band_numbits_UQUAD_mips,
  1193. get_band_numbits_UQUAD_mips,
  1194. get_band_numbits_SPAIR_mips,
  1195. get_band_numbits_SPAIR_mips,
  1196. get_band_numbits_UPAIR7_mips,
  1197. get_band_numbits_UPAIR7_mips,
  1198. get_band_numbits_UPAIR12_mips,
  1199. get_band_numbits_UPAIR12_mips,
  1200. get_band_numbits_ESC_mips,
  1201. get_band_numbits_NONE_mips, /* cb 12 doesn't exist */
  1202. get_band_numbits_ZERO_mips,
  1203. get_band_numbits_ZERO_mips,
  1204. get_band_numbits_ZERO_mips,
  1205. };
  1206. #define get_band_numbits( \
  1207. s, pb, in, scaled, size, scale_idx, cb, \
  1208. lambda, uplim, bits) \
  1209. get_band_numbits_arr[cb]( \
  1210. s, pb, in, scaled, size, scale_idx, cb, \
  1211. lambda, uplim, bits)
  1212. static float quantize_band_cost_bits(struct AACEncContext *s, const float *in,
  1213. const float *scaled, int size, int scale_idx,
  1214. int cb, const float lambda, const float uplim,
  1215. int *bits)
  1216. {
  1217. return get_band_numbits(s, NULL, in, scaled, size, scale_idx, cb, lambda, uplim, bits);
  1218. }
  1219. /**
  1220. * Functions developed from template function and optimized for getting the band cost
  1221. */
  1222. #if HAVE_MIPSFPU
  1223. static float get_band_cost_ZERO_mips(struct AACEncContext *s,
  1224. PutBitContext *pb, const float *in,
  1225. const float *scaled, int size, int scale_idx,
  1226. int cb, const float lambda, const float uplim,
  1227. int *bits)
  1228. {
  1229. int i;
  1230. float cost = 0;
  1231. for (i = 0; i < size; i += 4) {
  1232. cost += in[i ] * in[i ];
  1233. cost += in[i+1] * in[i+1];
  1234. cost += in[i+2] * in[i+2];
  1235. cost += in[i+3] * in[i+3];
  1236. }
  1237. if (bits)
  1238. *bits = 0;
  1239. return cost * lambda;
  1240. }
  1241. static float get_band_cost_NONE_mips(struct AACEncContext *s,
  1242. PutBitContext *pb, const float *in,
  1243. const float *scaled, int size, int scale_idx,
  1244. int cb, const float lambda, const float uplim,
  1245. int *bits)
  1246. {
  1247. av_assert0(0);
  1248. return 0;
  1249. }
  1250. static float get_band_cost_SQUAD_mips(struct AACEncContext *s,
  1251. PutBitContext *pb, const float *in,
  1252. const float *scaled, int size, int scale_idx,
  1253. int cb, const float lambda, const float uplim,
  1254. int *bits)
  1255. {
  1256. const float Q34 = ff_aac_pow34sf_tab[POW_SF2_ZERO - scale_idx + SCALE_ONE_POS - SCALE_DIV_512];
  1257. const float IQ = ff_aac_pow2sf_tab [POW_SF2_ZERO + scale_idx - SCALE_ONE_POS + SCALE_DIV_512];
  1258. int i;
  1259. float cost = 0;
  1260. int qc1, qc2, qc3, qc4;
  1261. int curbits = 0;
  1262. uint8_t *p_bits = (uint8_t *)ff_aac_spectral_bits[cb-1];
  1263. float *p_codes = (float *)ff_aac_codebook_vectors[cb-1];
  1264. for (i = 0; i < size; i += 4) {
  1265. const float *vec;
  1266. int curidx;
  1267. int *in_int = (int *)&in[i];
  1268. float *in_pos = (float *)&in[i];
  1269. float di0, di1, di2, di3;
  1270. int t0, t1, t2, t3, t4, t5, t6, t7;
  1271. qc1 = scaled[i ] * Q34 + ROUND_STANDARD;
  1272. qc2 = scaled[i+1] * Q34 + ROUND_STANDARD;
  1273. qc3 = scaled[i+2] * Q34 + ROUND_STANDARD;
  1274. qc4 = scaled[i+3] * Q34 + ROUND_STANDARD;
  1275. __asm__ volatile (
  1276. ".set push \n\t"
  1277. ".set noreorder \n\t"
  1278. "slt %[qc1], $zero, %[qc1] \n\t"
  1279. "slt %[qc2], $zero, %[qc2] \n\t"
  1280. "slt %[qc3], $zero, %[qc3] \n\t"
  1281. "slt %[qc4], $zero, %[qc4] \n\t"
  1282. "lw %[t0], 0(%[in_int]) \n\t"
  1283. "lw %[t1], 4(%[in_int]) \n\t"
  1284. "lw %[t2], 8(%[in_int]) \n\t"
  1285. "lw %[t3], 12(%[in_int]) \n\t"
  1286. "srl %[t0], %[t0], 31 \n\t"
  1287. "srl %[t1], %[t1], 31 \n\t"
  1288. "srl %[t2], %[t2], 31 \n\t"
  1289. "srl %[t3], %[t3], 31 \n\t"
  1290. "subu %[t4], $zero, %[qc1] \n\t"
  1291. "subu %[t5], $zero, %[qc2] \n\t"
  1292. "subu %[t6], $zero, %[qc3] \n\t"
  1293. "subu %[t7], $zero, %[qc4] \n\t"
  1294. "movn %[qc1], %[t4], %[t0] \n\t"
  1295. "movn %[qc2], %[t5], %[t1] \n\t"
  1296. "movn %[qc3], %[t6], %[t2] \n\t"
  1297. "movn %[qc4], %[t7], %[t3] \n\t"
  1298. ".set pop \n\t"
  1299. : [qc1]"+r"(qc1), [qc2]"+r"(qc2),
  1300. [qc3]"+r"(qc3), [qc4]"+r"(qc4),
  1301. [t0]"=&r"(t0), [t1]"=&r"(t1), [t2]"=&r"(t2), [t3]"=&r"(t3),
  1302. [t4]"=&r"(t4), [t5]"=&r"(t5), [t6]"=&r"(t6), [t7]"=&r"(t7)
  1303. : [in_int]"r"(in_int)
  1304. : "memory"
  1305. );
  1306. curidx = qc1;
  1307. curidx *= 3;
  1308. curidx += qc2;
  1309. curidx *= 3;
  1310. curidx += qc3;
  1311. curidx *= 3;
  1312. curidx += qc4;
  1313. curidx += 40;
  1314. curbits += p_bits[curidx];
  1315. vec = &p_codes[curidx*4];
  1316. __asm__ volatile (
  1317. ".set push \n\t"
  1318. ".set noreorder \n\t"
  1319. "lwc1 $f0, 0(%[in_pos]) \n\t"
  1320. "lwc1 $f1, 0(%[vec]) \n\t"
  1321. "lwc1 $f2, 4(%[in_pos]) \n\t"
  1322. "lwc1 $f3, 4(%[vec]) \n\t"
  1323. "lwc1 $f4, 8(%[in_pos]) \n\t"
  1324. "lwc1 $f5, 8(%[vec]) \n\t"
  1325. "lwc1 $f6, 12(%[in_pos]) \n\t"
  1326. "lwc1 $f7, 12(%[vec]) \n\t"
  1327. "nmsub.s %[di0], $f0, $f1, %[IQ] \n\t"
  1328. "nmsub.s %[di1], $f2, $f3, %[IQ] \n\t"
  1329. "nmsub.s %[di2], $f4, $f5, %[IQ] \n\t"
  1330. "nmsub.s %[di3], $f6, $f7, %[IQ] \n\t"
  1331. ".set pop \n\t"
  1332. : [di0]"=&f"(di0), [di1]"=&f"(di1),
  1333. [di2]"=&f"(di2), [di3]"=&f"(di3)
  1334. : [in_pos]"r"(in_pos), [vec]"r"(vec),
  1335. [IQ]"f"(IQ)
  1336. : "$f0", "$f1", "$f2", "$f3",
  1337. "$f4", "$f5", "$f6", "$f7",
  1338. "memory"
  1339. );
  1340. cost += di0 * di0 + di1 * di1
  1341. + di2 * di2 + di3 * di3;
  1342. }
  1343. if (bits)
  1344. *bits = curbits;
  1345. return cost * lambda + curbits;
  1346. }
  1347. static float get_band_cost_UQUAD_mips(struct AACEncContext *s,
  1348. PutBitContext *pb, const float *in,
  1349. const float *scaled, int size, int scale_idx,
  1350. int cb, const float lambda, const float uplim,
  1351. int *bits)
  1352. {
  1353. const float Q34 = ff_aac_pow34sf_tab[POW_SF2_ZERO - scale_idx + SCALE_ONE_POS - SCALE_DIV_512];
  1354. const float IQ = ff_aac_pow2sf_tab [POW_SF2_ZERO + scale_idx - SCALE_ONE_POS + SCALE_DIV_512];
  1355. int i;
  1356. float cost = 0;
  1357. int curbits = 0;
  1358. int qc1, qc2, qc3, qc4;
  1359. uint8_t *p_bits = (uint8_t*)ff_aac_spectral_bits[cb-1];
  1360. float *p_codes = (float *)ff_aac_codebook_vectors[cb-1];
  1361. for (i = 0; i < size; i += 4) {
  1362. const float *vec;
  1363. int curidx;
  1364. float *in_pos = (float *)&in[i];
  1365. float di0, di1, di2, di3;
  1366. int t0, t1, t2, t3, t4;
  1367. qc1 = scaled[i ] * Q34 + ROUND_STANDARD;
  1368. qc2 = scaled[i+1] * Q34 + ROUND_STANDARD;
  1369. qc3 = scaled[i+2] * Q34 + ROUND_STANDARD;
  1370. qc4 = scaled[i+3] * Q34 + ROUND_STANDARD;
  1371. __asm__ volatile (
  1372. ".set push \n\t"
  1373. ".set noreorder \n\t"
  1374. "ori %[t4], $zero, 2 \n\t"
  1375. "slt %[t0], %[t4], %[qc1] \n\t"
  1376. "slt %[t1], %[t4], %[qc2] \n\t"
  1377. "slt %[t2], %[t4], %[qc3] \n\t"
  1378. "slt %[t3], %[t4], %[qc4] \n\t"
  1379. "movn %[qc1], %[t4], %[t0] \n\t"
  1380. "movn %[qc2], %[t4], %[t1] \n\t"
  1381. "movn %[qc3], %[t4], %[t2] \n\t"
  1382. "movn %[qc4], %[t4], %[t3] \n\t"
  1383. ".set pop \n\t"
  1384. : [qc1]"+r"(qc1), [qc2]"+r"(qc2),
  1385. [qc3]"+r"(qc3), [qc4]"+r"(qc4),
  1386. [t0]"=&r"(t0), [t1]"=&r"(t1), [t2]"=&r"(t2), [t3]"=&r"(t3),
  1387. [t4]"=&r"(t4)
  1388. );
  1389. curidx = qc1;
  1390. curidx *= 3;
  1391. curidx += qc2;
  1392. curidx *= 3;
  1393. curidx += qc3;
  1394. curidx *= 3;
  1395. curidx += qc4;
  1396. curbits += p_bits[curidx];
  1397. curbits += uquad_sign_bits[curidx];
  1398. vec = &p_codes[curidx*4];
  1399. __asm__ volatile (
  1400. ".set push \n\t"
  1401. ".set noreorder \n\t"
  1402. "lwc1 %[di0], 0(%[in_pos]) \n\t"
  1403. "lwc1 %[di1], 4(%[in_pos]) \n\t"
  1404. "lwc1 %[di2], 8(%[in_pos]) \n\t"
  1405. "lwc1 %[di3], 12(%[in_pos]) \n\t"
  1406. "abs.s %[di0], %[di0] \n\t"
  1407. "abs.s %[di1], %[di1] \n\t"
  1408. "abs.s %[di2], %[di2] \n\t"
  1409. "abs.s %[di3], %[di3] \n\t"
  1410. "lwc1 $f0, 0(%[vec]) \n\t"
  1411. "lwc1 $f1, 4(%[vec]) \n\t"
  1412. "lwc1 $f2, 8(%[vec]) \n\t"
  1413. "lwc1 $f3, 12(%[vec]) \n\t"
  1414. "nmsub.s %[di0], %[di0], $f0, %[IQ] \n\t"
  1415. "nmsub.s %[di1], %[di1], $f1, %[IQ] \n\t"
  1416. "nmsub.s %[di2], %[di2], $f2, %[IQ] \n\t"
  1417. "nmsub.s %[di3], %[di3], $f3, %[IQ] \n\t"
  1418. ".set pop \n\t"
  1419. : [di0]"=&f"(di0), [di1]"=&f"(di1),
  1420. [di2]"=&f"(di2), [di3]"=&f"(di3)
  1421. : [in_pos]"r"(in_pos), [vec]"r"(vec),
  1422. [IQ]"f"(IQ)
  1423. : "$f0", "$f1", "$f2", "$f3",
  1424. "memory"
  1425. );
  1426. cost += di0 * di0 + di1 * di1
  1427. + di2 * di2 + di3 * di3;
  1428. }
  1429. if (bits)
  1430. *bits = curbits;
  1431. return cost * lambda + curbits;
  1432. }
  1433. static float get_band_cost_SPAIR_mips(struct AACEncContext *s,
  1434. PutBitContext *pb, const float *in,
  1435. const float *scaled, int size, int scale_idx,
  1436. int cb, const float lambda, const float uplim,
  1437. int *bits)
  1438. {
  1439. const float Q34 = ff_aac_pow34sf_tab[POW_SF2_ZERO - scale_idx + SCALE_ONE_POS - SCALE_DIV_512];
  1440. const float IQ = ff_aac_pow2sf_tab [POW_SF2_ZERO + scale_idx - SCALE_ONE_POS + SCALE_DIV_512];
  1441. int i;
  1442. float cost = 0;
  1443. int qc1, qc2, qc3, qc4;
  1444. int curbits = 0;
  1445. uint8_t *p_bits = (uint8_t *)ff_aac_spectral_bits[cb-1];
  1446. float *p_codes = (float *)ff_aac_codebook_vectors[cb-1];
  1447. for (i = 0; i < size; i += 4) {
  1448. const float *vec, *vec2;
  1449. int curidx, curidx2;
  1450. int *in_int = (int *)&in[i];
  1451. float *in_pos = (float *)&in[i];
  1452. float di0, di1, di2, di3;
  1453. int t0, t1, t2, t3, t4, t5, t6, t7;
  1454. qc1 = scaled[i ] * Q34 + ROUND_STANDARD;
  1455. qc2 = scaled[i+1] * Q34 + ROUND_STANDARD;
  1456. qc3 = scaled[i+2] * Q34 + ROUND_STANDARD;
  1457. qc4 = scaled[i+3] * Q34 + ROUND_STANDARD;
  1458. __asm__ volatile (
  1459. ".set push \n\t"
  1460. ".set noreorder \n\t"
  1461. "ori %[t4], $zero, 4 \n\t"
  1462. "slt %[t0], %[t4], %[qc1] \n\t"
  1463. "slt %[t1], %[t4], %[qc2] \n\t"
  1464. "slt %[t2], %[t4], %[qc3] \n\t"
  1465. "slt %[t3], %[t4], %[qc4] \n\t"
  1466. "movn %[qc1], %[t4], %[t0] \n\t"
  1467. "movn %[qc2], %[t4], %[t1] \n\t"
  1468. "movn %[qc3], %[t4], %[t2] \n\t"
  1469. "movn %[qc4], %[t4], %[t3] \n\t"
  1470. "lw %[t0], 0(%[in_int]) \n\t"
  1471. "lw %[t1], 4(%[in_int]) \n\t"
  1472. "lw %[t2], 8(%[in_int]) \n\t"
  1473. "lw %[t3], 12(%[in_int]) \n\t"
  1474. "srl %[t0], %[t0], 31 \n\t"
  1475. "srl %[t1], %[t1], 31 \n\t"
  1476. "srl %[t2], %[t2], 31 \n\t"
  1477. "srl %[t3], %[t3], 31 \n\t"
  1478. "subu %[t4], $zero, %[qc1] \n\t"
  1479. "subu %[t5], $zero, %[qc2] \n\t"
  1480. "subu %[t6], $zero, %[qc3] \n\t"
  1481. "subu %[t7], $zero, %[qc4] \n\t"
  1482. "movn %[qc1], %[t4], %[t0] \n\t"
  1483. "movn %[qc2], %[t5], %[t1] \n\t"
  1484. "movn %[qc3], %[t6], %[t2] \n\t"
  1485. "movn %[qc4], %[t7], %[t3] \n\t"
  1486. ".set pop \n\t"
  1487. : [qc1]"+r"(qc1), [qc2]"+r"(qc2),
  1488. [qc3]"+r"(qc3), [qc4]"+r"(qc4),
  1489. [t0]"=&r"(t0), [t1]"=&r"(t1), [t2]"=&r"(t2), [t3]"=&r"(t3),
  1490. [t4]"=&r"(t4), [t5]"=&r"(t5), [t6]"=&r"(t6), [t7]"=&r"(t7)
  1491. : [in_int]"r"(in_int)
  1492. : "memory"
  1493. );
  1494. curidx = 9 * qc1;
  1495. curidx += qc2 + 40;
  1496. curidx2 = 9 * qc3;
  1497. curidx2 += qc4 + 40;
  1498. curbits += p_bits[curidx];
  1499. curbits += p_bits[curidx2];
  1500. vec = &p_codes[curidx*2];
  1501. vec2 = &p_codes[curidx2*2];
  1502. __asm__ volatile (
  1503. ".set push \n\t"
  1504. ".set noreorder \n\t"
  1505. "lwc1 $f0, 0(%[in_pos]) \n\t"
  1506. "lwc1 $f1, 0(%[vec]) \n\t"
  1507. "lwc1 $f2, 4(%[in_pos]) \n\t"
  1508. "lwc1 $f3, 4(%[vec]) \n\t"
  1509. "lwc1 $f4, 8(%[in_pos]) \n\t"
  1510. "lwc1 $f5, 0(%[vec2]) \n\t"
  1511. "lwc1 $f6, 12(%[in_pos]) \n\t"
  1512. "lwc1 $f7, 4(%[vec2]) \n\t"
  1513. "nmsub.s %[di0], $f0, $f1, %[IQ] \n\t"
  1514. "nmsub.s %[di1], $f2, $f3, %[IQ] \n\t"
  1515. "nmsub.s %[di2], $f4, $f5, %[IQ] \n\t"
  1516. "nmsub.s %[di3], $f6, $f7, %[IQ] \n\t"
  1517. ".set pop \n\t"
  1518. : [di0]"=&f"(di0), [di1]"=&f"(di1),
  1519. [di2]"=&f"(di2), [di3]"=&f"(di3)
  1520. : [in_pos]"r"(in_pos), [vec]"r"(vec),
  1521. [vec2]"r"(vec2), [IQ]"f"(IQ)
  1522. : "$f0", "$f1", "$f2", "$f3",
  1523. "$f4", "$f5", "$f6", "$f7",
  1524. "memory"
  1525. );
  1526. cost += di0 * di0 + di1 * di1
  1527. + di2 * di2 + di3 * di3;
  1528. }
  1529. if (bits)
  1530. *bits = curbits;
  1531. return cost * lambda + curbits;
  1532. }
  1533. static float get_band_cost_UPAIR7_mips(struct AACEncContext *s,
  1534. PutBitContext *pb, const float *in,
  1535. const float *scaled, int size, int scale_idx,
  1536. int cb, const float lambda, const float uplim,
  1537. int *bits)
  1538. {
  1539. const float Q34 = ff_aac_pow34sf_tab[POW_SF2_ZERO - scale_idx + SCALE_ONE_POS - SCALE_DIV_512];
  1540. const float IQ = ff_aac_pow2sf_tab [POW_SF2_ZERO + scale_idx - SCALE_ONE_POS + SCALE_DIV_512];
  1541. int i;
  1542. float cost = 0;
  1543. int qc1, qc2, qc3, qc4;
  1544. int curbits = 0;
  1545. uint8_t *p_bits = (uint8_t *)ff_aac_spectral_bits[cb-1];
  1546. float *p_codes = (float *)ff_aac_codebook_vectors[cb-1];
  1547. for (i = 0; i < size; i += 4) {
  1548. const float *vec, *vec2;
  1549. int curidx, curidx2, sign1, count1, sign2, count2;
  1550. int *in_int = (int *)&in[i];
  1551. float *in_pos = (float *)&in[i];
  1552. float di0, di1, di2, di3;
  1553. int t0, t1, t2, t3, t4;
  1554. qc1 = scaled[i ] * Q34 + ROUND_STANDARD;
  1555. qc2 = scaled[i+1] * Q34 + ROUND_STANDARD;
  1556. qc3 = scaled[i+2] * Q34 + ROUND_STANDARD;
  1557. qc4 = scaled[i+3] * Q34 + ROUND_STANDARD;
  1558. __asm__ volatile (
  1559. ".set push \n\t"
  1560. ".set noreorder \n\t"
  1561. "ori %[t4], $zero, 7 \n\t"
  1562. "ori %[sign1], $zero, 0 \n\t"
  1563. "ori %[sign2], $zero, 0 \n\t"
  1564. "slt %[t0], %[t4], %[qc1] \n\t"
  1565. "slt %[t1], %[t4], %[qc2] \n\t"
  1566. "slt %[t2], %[t4], %[qc3] \n\t"
  1567. "slt %[t3], %[t4], %[qc4] \n\t"
  1568. "movn %[qc1], %[t4], %[t0] \n\t"
  1569. "movn %[qc2], %[t4], %[t1] \n\t"
  1570. "movn %[qc3], %[t4], %[t2] \n\t"
  1571. "movn %[qc4], %[t4], %[t3] \n\t"
  1572. "lw %[t0], 0(%[in_int]) \n\t"
  1573. "lw %[t1], 4(%[in_int]) \n\t"
  1574. "lw %[t2], 8(%[in_int]) \n\t"
  1575. "lw %[t3], 12(%[in_int]) \n\t"
  1576. "slt %[t0], %[t0], $zero \n\t"
  1577. "movn %[sign1], %[t0], %[qc1] \n\t"
  1578. "slt %[t2], %[t2], $zero \n\t"
  1579. "movn %[sign2], %[t2], %[qc3] \n\t"
  1580. "slt %[t1], %[t1], $zero \n\t"
  1581. "sll %[t0], %[sign1], 1 \n\t"
  1582. "or %[t0], %[t0], %[t1] \n\t"
  1583. "movn %[sign1], %[t0], %[qc2] \n\t"
  1584. "slt %[t3], %[t3], $zero \n\t"
  1585. "sll %[t0], %[sign2], 1 \n\t"
  1586. "or %[t0], %[t0], %[t3] \n\t"
  1587. "movn %[sign2], %[t0], %[qc4] \n\t"
  1588. "slt %[count1], $zero, %[qc1] \n\t"
  1589. "slt %[t1], $zero, %[qc2] \n\t"
  1590. "slt %[count2], $zero, %[qc3] \n\t"
  1591. "slt %[t2], $zero, %[qc4] \n\t"
  1592. "addu %[count1], %[count1], %[t1] \n\t"
  1593. "addu %[count2], %[count2], %[t2] \n\t"
  1594. ".set pop \n\t"
  1595. : [qc1]"+r"(qc1), [qc2]"+r"(qc2),
  1596. [qc3]"+r"(qc3), [qc4]"+r"(qc4),
  1597. [sign1]"=&r"(sign1), [count1]"=&r"(count1),
  1598. [sign2]"=&r"(sign2), [count2]"=&r"(count2),
  1599. [t0]"=&r"(t0), [t1]"=&r"(t1), [t2]"=&r"(t2), [t3]"=&r"(t3),
  1600. [t4]"=&r"(t4)
  1601. : [in_int]"r"(in_int)
  1602. : "memory"
  1603. );
  1604. curidx = 8 * qc1;
  1605. curidx += qc2;
  1606. curidx2 = 8 * qc3;
  1607. curidx2 += qc4;
  1608. curbits += p_bits[curidx];
  1609. curbits += upair7_sign_bits[curidx];
  1610. vec = &p_codes[curidx*2];
  1611. curbits += p_bits[curidx2];
  1612. curbits += upair7_sign_bits[curidx2];
  1613. vec2 = &p_codes[curidx2*2];
  1614. __asm__ volatile (
  1615. ".set push \n\t"
  1616. ".set noreorder \n\t"
  1617. "lwc1 %[di0], 0(%[in_pos]) \n\t"
  1618. "lwc1 %[di1], 4(%[in_pos]) \n\t"
  1619. "lwc1 %[di2], 8(%[in_pos]) \n\t"
  1620. "lwc1 %[di3], 12(%[in_pos]) \n\t"
  1621. "abs.s %[di0], %[di0] \n\t"
  1622. "abs.s %[di1], %[di1] \n\t"
  1623. "abs.s %[di2], %[di2] \n\t"
  1624. "abs.s %[di3], %[di3] \n\t"
  1625. "lwc1 $f0, 0(%[vec]) \n\t"
  1626. "lwc1 $f1, 4(%[vec]) \n\t"
  1627. "lwc1 $f2, 0(%[vec2]) \n\t"
  1628. "lwc1 $f3, 4(%[vec2]) \n\t"
  1629. "nmsub.s %[di0], %[di0], $f0, %[IQ] \n\t"
  1630. "nmsub.s %[di1], %[di1], $f1, %[IQ] \n\t"
  1631. "nmsub.s %[di2], %[di2], $f2, %[IQ] \n\t"
  1632. "nmsub.s %[di3], %[di3], $f3, %[IQ] \n\t"
  1633. ".set pop \n\t"
  1634. : [di0]"=&f"(di0), [di1]"=&f"(di1),
  1635. [di2]"=&f"(di2), [di3]"=&f"(di3)
  1636. : [in_pos]"r"(in_pos), [vec]"r"(vec),
  1637. [vec2]"r"(vec2), [IQ]"f"(IQ)
  1638. : "$f0", "$f1", "$f2", "$f3",
  1639. "memory"
  1640. );
  1641. cost += di0 * di0 + di1 * di1
  1642. + di2 * di2 + di3 * di3;
  1643. }
  1644. if (bits)
  1645. *bits = curbits;
  1646. return cost * lambda + curbits;
  1647. }
  1648. static float get_band_cost_UPAIR12_mips(struct AACEncContext *s,
  1649. PutBitContext *pb, const float *in,
  1650. const float *scaled, int size, int scale_idx,
  1651. int cb, const float lambda, const float uplim,
  1652. int *bits)
  1653. {
  1654. const float Q34 = ff_aac_pow34sf_tab[POW_SF2_ZERO - scale_idx + SCALE_ONE_POS - SCALE_DIV_512];
  1655. const float IQ = ff_aac_pow2sf_tab [POW_SF2_ZERO + scale_idx - SCALE_ONE_POS + SCALE_DIV_512];
  1656. int i;
  1657. float cost = 0;
  1658. int qc1, qc2, qc3, qc4;
  1659. int curbits = 0;
  1660. uint8_t *p_bits = (uint8_t *)ff_aac_spectral_bits[cb-1];
  1661. float *p_codes = (float *)ff_aac_codebook_vectors[cb-1];
  1662. for (i = 0; i < size; i += 4) {
  1663. const float *vec, *vec2;
  1664. int curidx, curidx2;
  1665. int sign1, count1, sign2, count2;
  1666. int *in_int = (int *)&in[i];
  1667. float *in_pos = (float *)&in[i];
  1668. float di0, di1, di2, di3;
  1669. int t0, t1, t2, t3, t4;
  1670. qc1 = scaled[i ] * Q34 + ROUND_STANDARD;
  1671. qc2 = scaled[i+1] * Q34 + ROUND_STANDARD;
  1672. qc3 = scaled[i+2] * Q34 + ROUND_STANDARD;
  1673. qc4 = scaled[i+3] * Q34 + ROUND_STANDARD;
  1674. __asm__ volatile (
  1675. ".set push \n\t"
  1676. ".set noreorder \n\t"
  1677. "ori %[t4], $zero, 12 \n\t"
  1678. "ori %[sign1], $zero, 0 \n\t"
  1679. "ori %[sign2], $zero, 0 \n\t"
  1680. "slt %[t0], %[t4], %[qc1] \n\t"
  1681. "slt %[t1], %[t4], %[qc2] \n\t"
  1682. "slt %[t2], %[t4], %[qc3] \n\t"
  1683. "slt %[t3], %[t4], %[qc4] \n\t"
  1684. "movn %[qc1], %[t4], %[t0] \n\t"
  1685. "movn %[qc2], %[t4], %[t1] \n\t"
  1686. "movn %[qc3], %[t4], %[t2] \n\t"
  1687. "movn %[qc4], %[t4], %[t3] \n\t"
  1688. "lw %[t0], 0(%[in_int]) \n\t"
  1689. "lw %[t1], 4(%[in_int]) \n\t"
  1690. "lw %[t2], 8(%[in_int]) \n\t"
  1691. "lw %[t3], 12(%[in_int]) \n\t"
  1692. "slt %[t0], %[t0], $zero \n\t"
  1693. "movn %[sign1], %[t0], %[qc1] \n\t"
  1694. "slt %[t2], %[t2], $zero \n\t"
  1695. "movn %[sign2], %[t2], %[qc3] \n\t"
  1696. "slt %[t1], %[t1], $zero \n\t"
  1697. "sll %[t0], %[sign1], 1 \n\t"
  1698. "or %[t0], %[t0], %[t1] \n\t"
  1699. "movn %[sign1], %[t0], %[qc2] \n\t"
  1700. "slt %[t3], %[t3], $zero \n\t"
  1701. "sll %[t0], %[sign2], 1 \n\t"
  1702. "or %[t0], %[t0], %[t3] \n\t"
  1703. "movn %[sign2], %[t0], %[qc4] \n\t"
  1704. "slt %[count1], $zero, %[qc1] \n\t"
  1705. "slt %[t1], $zero, %[qc2] \n\t"
  1706. "slt %[count2], $zero, %[qc3] \n\t"
  1707. "slt %[t2], $zero, %[qc4] \n\t"
  1708. "addu %[count1], %[count1], %[t1] \n\t"
  1709. "addu %[count2], %[count2], %[t2] \n\t"
  1710. ".set pop \n\t"
  1711. : [qc1]"+r"(qc1), [qc2]"+r"(qc2),
  1712. [qc3]"+r"(qc3), [qc4]"+r"(qc4),
  1713. [sign1]"=&r"(sign1), [count1]"=&r"(count1),
  1714. [sign2]"=&r"(sign2), [count2]"=&r"(count2),
  1715. [t0]"=&r"(t0), [t1]"=&r"(t1), [t2]"=&r"(t2), [t3]"=&r"(t3),
  1716. [t4]"=&r"(t4)
  1717. : [in_int]"r"(in_int)
  1718. : "memory"
  1719. );
  1720. curidx = 13 * qc1;
  1721. curidx += qc2;
  1722. curidx2 = 13 * qc3;
  1723. curidx2 += qc4;
  1724. curbits += p_bits[curidx];
  1725. curbits += p_bits[curidx2];
  1726. curbits += upair12_sign_bits[curidx];
  1727. curbits += upair12_sign_bits[curidx2];
  1728. vec = &p_codes[curidx*2];
  1729. vec2 = &p_codes[curidx2*2];
  1730. __asm__ volatile (
  1731. ".set push \n\t"
  1732. ".set noreorder \n\t"
  1733. "lwc1 %[di0], 0(%[in_pos]) \n\t"
  1734. "lwc1 %[di1], 4(%[in_pos]) \n\t"
  1735. "lwc1 %[di2], 8(%[in_pos]) \n\t"
  1736. "lwc1 %[di3], 12(%[in_pos]) \n\t"
  1737. "abs.s %[di0], %[di0] \n\t"
  1738. "abs.s %[di1], %[di1] \n\t"
  1739. "abs.s %[di2], %[di2] \n\t"
  1740. "abs.s %[di3], %[di3] \n\t"
  1741. "lwc1 $f0, 0(%[vec]) \n\t"
  1742. "lwc1 $f1, 4(%[vec]) \n\t"
  1743. "lwc1 $f2, 0(%[vec2]) \n\t"
  1744. "lwc1 $f3, 4(%[vec2]) \n\t"
  1745. "nmsub.s %[di0], %[di0], $f0, %[IQ] \n\t"
  1746. "nmsub.s %[di1], %[di1], $f1, %[IQ] \n\t"
  1747. "nmsub.s %[di2], %[di2], $f2, %[IQ] \n\t"
  1748. "nmsub.s %[di3], %[di3], $f3, %[IQ] \n\t"
  1749. ".set pop \n\t"
  1750. : [di0]"=&f"(di0), [di1]"=&f"(di1),
  1751. [di2]"=&f"(di2), [di3]"=&f"(di3)
  1752. : [in_pos]"r"(in_pos), [vec]"r"(vec),
  1753. [vec2]"r"(vec2), [IQ]"f"(IQ)
  1754. : "$f0", "$f1", "$f2", "$f3",
  1755. "memory"
  1756. );
  1757. cost += di0 * di0 + di1 * di1
  1758. + di2 * di2 + di3 * di3;
  1759. }
  1760. if (bits)
  1761. *bits = curbits;
  1762. return cost * lambda + curbits;
  1763. }
  1764. static float get_band_cost_ESC_mips(struct AACEncContext *s,
  1765. PutBitContext *pb, const float *in,
  1766. const float *scaled, int size, int scale_idx,
  1767. int cb, const float lambda, const float uplim,
  1768. int *bits)
  1769. {
  1770. const float Q34 = ff_aac_pow34sf_tab[POW_SF2_ZERO - scale_idx + SCALE_ONE_POS - SCALE_DIV_512];
  1771. const float IQ = ff_aac_pow2sf_tab [POW_SF2_ZERO + scale_idx - SCALE_ONE_POS + SCALE_DIV_512];
  1772. const float CLIPPED_ESCAPE = 165140.0f * IQ;
  1773. int i;
  1774. float cost = 0;
  1775. int qc1, qc2, qc3, qc4;
  1776. int curbits = 0;
  1777. uint8_t *p_bits = (uint8_t*)ff_aac_spectral_bits[cb-1];
  1778. float *p_codes = (float* )ff_aac_codebook_vectors[cb-1];
  1779. for (i = 0; i < size; i += 4) {
  1780. const float *vec, *vec2;
  1781. int curidx, curidx2;
  1782. float t1, t2, t3, t4;
  1783. float di1, di2, di3, di4;
  1784. int cond0, cond1, cond2, cond3;
  1785. int c1, c2, c3, c4;
  1786. int t6, t7;
  1787. qc1 = scaled[i ] * Q34 + ROUND_STANDARD;
  1788. qc2 = scaled[i+1] * Q34 + ROUND_STANDARD;
  1789. qc3 = scaled[i+2] * Q34 + ROUND_STANDARD;
  1790. qc4 = scaled[i+3] * Q34 + ROUND_STANDARD;
  1791. __asm__ volatile (
  1792. ".set push \n\t"
  1793. ".set noreorder \n\t"
  1794. "ori %[t6], $zero, 15 \n\t"
  1795. "ori %[t7], $zero, 16 \n\t"
  1796. "shll_s.w %[c1], %[qc1], 18 \n\t"
  1797. "shll_s.w %[c2], %[qc2], 18 \n\t"
  1798. "shll_s.w %[c3], %[qc3], 18 \n\t"
  1799. "shll_s.w %[c4], %[qc4], 18 \n\t"
  1800. "srl %[c1], %[c1], 18 \n\t"
  1801. "srl %[c2], %[c2], 18 \n\t"
  1802. "srl %[c3], %[c3], 18 \n\t"
  1803. "srl %[c4], %[c4], 18 \n\t"
  1804. "slt %[cond0], %[t6], %[qc1] \n\t"
  1805. "slt %[cond1], %[t6], %[qc2] \n\t"
  1806. "slt %[cond2], %[t6], %[qc3] \n\t"
  1807. "slt %[cond3], %[t6], %[qc4] \n\t"
  1808. "movn %[qc1], %[t7], %[cond0] \n\t"
  1809. "movn %[qc2], %[t7], %[cond1] \n\t"
  1810. "movn %[qc3], %[t7], %[cond2] \n\t"
  1811. "movn %[qc4], %[t7], %[cond3] \n\t"
  1812. ".set pop \n\t"
  1813. : [qc1]"+r"(qc1), [qc2]"+r"(qc2),
  1814. [qc3]"+r"(qc3), [qc4]"+r"(qc4),
  1815. [cond0]"=&r"(cond0), [cond1]"=&r"(cond1),
  1816. [cond2]"=&r"(cond2), [cond3]"=&r"(cond3),
  1817. [c1]"=&r"(c1), [c2]"=&r"(c2),
  1818. [c3]"=&r"(c3), [c4]"=&r"(c4),
  1819. [t6]"=&r"(t6), [t7]"=&r"(t7)
  1820. );
  1821. curidx = 17 * qc1;
  1822. curidx += qc2;
  1823. curidx2 = 17 * qc3;
  1824. curidx2 += qc4;
  1825. curbits += p_bits[curidx];
  1826. curbits += esc_sign_bits[curidx];
  1827. vec = &p_codes[curidx*2];
  1828. curbits += p_bits[curidx2];
  1829. curbits += esc_sign_bits[curidx2];
  1830. vec2 = &p_codes[curidx2*2];
  1831. curbits += (av_log2(c1) * 2 - 3) & (-cond0);
  1832. curbits += (av_log2(c2) * 2 - 3) & (-cond1);
  1833. curbits += (av_log2(c3) * 2 - 3) & (-cond2);
  1834. curbits += (av_log2(c4) * 2 - 3) & (-cond3);
  1835. t1 = fabsf(in[i ]);
  1836. t2 = fabsf(in[i+1]);
  1837. t3 = fabsf(in[i+2]);
  1838. t4 = fabsf(in[i+3]);
  1839. if (cond0) {
  1840. if (t1 >= CLIPPED_ESCAPE) {
  1841. di1 = t1 - CLIPPED_ESCAPE;
  1842. } else {
  1843. di1 = t1 - c1 * cbrtf(c1) * IQ;
  1844. }
  1845. } else
  1846. di1 = t1 - vec[0] * IQ;
  1847. if (cond1) {
  1848. if (t2 >= CLIPPED_ESCAPE) {
  1849. di2 = t2 - CLIPPED_ESCAPE;
  1850. } else {
  1851. di2 = t2 - c2 * cbrtf(c2) * IQ;
  1852. }
  1853. } else
  1854. di2 = t2 - vec[1] * IQ;
  1855. if (cond2) {
  1856. if (t3 >= CLIPPED_ESCAPE) {
  1857. di3 = t3 - CLIPPED_ESCAPE;
  1858. } else {
  1859. di3 = t3 - c3 * cbrtf(c3) * IQ;
  1860. }
  1861. } else
  1862. di3 = t3 - vec2[0] * IQ;
  1863. if (cond3) {
  1864. if (t4 >= CLIPPED_ESCAPE) {
  1865. di4 = t4 - CLIPPED_ESCAPE;
  1866. } else {
  1867. di4 = t4 - c4 * cbrtf(c4) * IQ;
  1868. }
  1869. } else
  1870. di4 = t4 - vec2[1]*IQ;
  1871. cost += di1 * di1 + di2 * di2
  1872. + di3 * di3 + di4 * di4;
  1873. }
  1874. if (bits)
  1875. *bits = curbits;
  1876. return cost * lambda + curbits;
  1877. }
  1878. static float (*const get_band_cost_arr[])(struct AACEncContext *s,
  1879. PutBitContext *pb, const float *in,
  1880. const float *scaled, int size, int scale_idx,
  1881. int cb, const float lambda, const float uplim,
  1882. int *bits) = {
  1883. get_band_cost_ZERO_mips,
  1884. get_band_cost_SQUAD_mips,
  1885. get_band_cost_SQUAD_mips,
  1886. get_band_cost_UQUAD_mips,
  1887. get_band_cost_UQUAD_mips,
  1888. get_band_cost_SPAIR_mips,
  1889. get_band_cost_SPAIR_mips,
  1890. get_band_cost_UPAIR7_mips,
  1891. get_band_cost_UPAIR7_mips,
  1892. get_band_cost_UPAIR12_mips,
  1893. get_band_cost_UPAIR12_mips,
  1894. get_band_cost_ESC_mips,
  1895. get_band_cost_NONE_mips, /* cb 12 doesn't exist */
  1896. get_band_cost_ZERO_mips,
  1897. get_band_cost_ZERO_mips,
  1898. get_band_cost_ZERO_mips,
  1899. };
  1900. #define get_band_cost( \
  1901. s, pb, in, scaled, size, scale_idx, cb, \
  1902. lambda, uplim, bits) \
  1903. get_band_cost_arr[cb]( \
  1904. s, pb, in, scaled, size, scale_idx, cb, \
  1905. lambda, uplim, bits)
  1906. static float quantize_band_cost(struct AACEncContext *s, const float *in,
  1907. const float *scaled, int size, int scale_idx,
  1908. int cb, const float lambda, const float uplim,
  1909. int *bits)
  1910. {
  1911. return get_band_cost(s, NULL, in, scaled, size, scale_idx, cb, lambda, uplim, bits);
  1912. }
  1913. static void search_for_quantizers_twoloop_mips(AVCodecContext *avctx,
  1914. AACEncContext *s,
  1915. SingleChannelElement *sce,
  1916. const float lambda)
  1917. {
  1918. int start = 0, i, w, w2, g;
  1919. int destbits = avctx->bit_rate * 1024.0 / avctx->sample_rate / avctx->channels;
  1920. float dists[128] = { 0 }, uplims[128];
  1921. float maxvals[128];
  1922. int fflag, minscaler;
  1923. int its = 0;
  1924. int allz = 0;
  1925. float minthr = INFINITY;
  1926. destbits = FFMIN(destbits, 5800);
  1927. for (w = 0; w < sce->ics.num_windows; w += sce->ics.group_len[w]) {
  1928. for (g = 0; g < sce->ics.num_swb; g++) {
  1929. int nz = 0;
  1930. float uplim = 0.0f;
  1931. for (w2 = 0; w2 < sce->ics.group_len[w]; w2++) {
  1932. FFPsyBand *band = &s->psy.ch[s->cur_channel].psy_bands[(w+w2)*16+g];
  1933. uplim += band->threshold;
  1934. if (band->energy <= band->threshold || band->threshold == 0.0f) {
  1935. sce->zeroes[(w+w2)*16+g] = 1;
  1936. continue;
  1937. }
  1938. nz = 1;
  1939. }
  1940. uplims[w*16+g] = uplim *512;
  1941. sce->zeroes[w*16+g] = !nz;
  1942. if (nz)
  1943. minthr = FFMIN(minthr, uplim);
  1944. allz |= nz;
  1945. }
  1946. }
  1947. for (w = 0; w < sce->ics.num_windows; w += sce->ics.group_len[w]) {
  1948. for (g = 0; g < sce->ics.num_swb; g++) {
  1949. if (sce->zeroes[w*16+g]) {
  1950. sce->sf_idx[w*16+g] = SCALE_ONE_POS;
  1951. continue;
  1952. }
  1953. sce->sf_idx[w*16+g] = SCALE_ONE_POS + FFMIN(log2f(uplims[w*16+g]/minthr)*4,59);
  1954. }
  1955. }
  1956. if (!allz)
  1957. return;
  1958. abs_pow34_v(s->scoefs, sce->coeffs, 1024);
  1959. for (w = 0; w < sce->ics.num_windows; w += sce->ics.group_len[w]) {
  1960. start = w*128;
  1961. for (g = 0; g < sce->ics.num_swb; g++) {
  1962. const float *scaled = s->scoefs + start;
  1963. maxvals[w*16+g] = find_max_val(sce->ics.group_len[w], sce->ics.swb_sizes[g], scaled);
  1964. start += sce->ics.swb_sizes[g];
  1965. }
  1966. }
  1967. do {
  1968. int tbits, qstep;
  1969. minscaler = sce->sf_idx[0];
  1970. qstep = its ? 1 : 32;
  1971. do {
  1972. int prev = -1;
  1973. tbits = 0;
  1974. fflag = 0;
  1975. if (qstep > 1) {
  1976. for (w = 0; w < sce->ics.num_windows; w += sce->ics.group_len[w]) {
  1977. start = w*128;
  1978. for (g = 0; g < sce->ics.num_swb; g++) {
  1979. const float *coefs = sce->coeffs + start;
  1980. const float *scaled = s->scoefs + start;
  1981. int bits = 0;
  1982. int cb;
  1983. if (sce->zeroes[w*16+g] || sce->sf_idx[w*16+g] >= 218) {
  1984. start += sce->ics.swb_sizes[g];
  1985. continue;
  1986. }
  1987. minscaler = FFMIN(minscaler, sce->sf_idx[w*16+g]);
  1988. cb = find_min_book(maxvals[w*16+g], sce->sf_idx[w*16+g]);
  1989. for (w2 = 0; w2 < sce->ics.group_len[w]; w2++) {
  1990. int b;
  1991. bits += quantize_band_cost_bits(s, coefs + w2*128,
  1992. scaled + w2*128,
  1993. sce->ics.swb_sizes[g],
  1994. sce->sf_idx[w*16+g],
  1995. cb,
  1996. 1.0f,
  1997. INFINITY,
  1998. &b);
  1999. }
  2000. if (prev != -1) {
  2001. bits += ff_aac_scalefactor_bits[sce->sf_idx[w*16+g] - prev + SCALE_DIFF_ZERO];
  2002. }
  2003. tbits += bits;
  2004. start += sce->ics.swb_sizes[g];
  2005. prev = sce->sf_idx[w*16+g];
  2006. }
  2007. }
  2008. }
  2009. else {
  2010. for (w = 0; w < sce->ics.num_windows; w += sce->ics.group_len[w]) {
  2011. start = w*128;
  2012. for (g = 0; g < sce->ics.num_swb; g++) {
  2013. const float *coefs = sce->coeffs + start;
  2014. const float *scaled = s->scoefs + start;
  2015. int bits = 0;
  2016. int cb;
  2017. float dist = 0.0f;
  2018. if (sce->zeroes[w*16+g] || sce->sf_idx[w*16+g] >= 218) {
  2019. start += sce->ics.swb_sizes[g];
  2020. continue;
  2021. }
  2022. minscaler = FFMIN(minscaler, sce->sf_idx[w*16+g]);
  2023. cb = find_min_book(maxvals[w*16+g], sce->sf_idx[w*16+g]);
  2024. for (w2 = 0; w2 < sce->ics.group_len[w]; w2++) {
  2025. int b;
  2026. dist += quantize_band_cost(s, coefs + w2*128,
  2027. scaled + w2*128,
  2028. sce->ics.swb_sizes[g],
  2029. sce->sf_idx[w*16+g],
  2030. cb,
  2031. 1.0f,
  2032. INFINITY,
  2033. &b);
  2034. bits += b;
  2035. }
  2036. dists[w*16+g] = dist - bits;
  2037. if (prev != -1) {
  2038. bits += ff_aac_scalefactor_bits[sce->sf_idx[w*16+g] - prev + SCALE_DIFF_ZERO];
  2039. }
  2040. tbits += bits;
  2041. start += sce->ics.swb_sizes[g];
  2042. prev = sce->sf_idx[w*16+g];
  2043. }
  2044. }
  2045. }
  2046. if (tbits > destbits) {
  2047. for (i = 0; i < 128; i++)
  2048. if (sce->sf_idx[i] < 218 - qstep)
  2049. sce->sf_idx[i] += qstep;
  2050. } else {
  2051. for (i = 0; i < 128; i++)
  2052. if (sce->sf_idx[i] > 60 - qstep)
  2053. sce->sf_idx[i] -= qstep;
  2054. }
  2055. qstep >>= 1;
  2056. if (!qstep && tbits > destbits*1.02 && sce->sf_idx[0] < 217)
  2057. qstep = 1;
  2058. } while (qstep);
  2059. fflag = 0;
  2060. minscaler = av_clip(minscaler, 60, 255 - SCALE_MAX_DIFF);
  2061. for (w = 0; w < sce->ics.num_windows; w += sce->ics.group_len[w]) {
  2062. for (g = 0; g < sce->ics.num_swb; g++) {
  2063. int prevsc = sce->sf_idx[w*16+g];
  2064. if (dists[w*16+g] > uplims[w*16+g] && sce->sf_idx[w*16+g] > 60) {
  2065. if (find_min_book(maxvals[w*16+g], sce->sf_idx[w*16+g]-1))
  2066. sce->sf_idx[w*16+g]--;
  2067. else
  2068. sce->sf_idx[w*16+g]-=2;
  2069. }
  2070. sce->sf_idx[w*16+g] = av_clip(sce->sf_idx[w*16+g], minscaler, minscaler + SCALE_MAX_DIFF);
  2071. sce->sf_idx[w*16+g] = FFMIN(sce->sf_idx[w*16+g], 219);
  2072. if (sce->sf_idx[w*16+g] != prevsc)
  2073. fflag = 1;
  2074. sce->band_type[w*16+g] = find_min_book(maxvals[w*16+g], sce->sf_idx[w*16+g]);
  2075. }
  2076. }
  2077. its++;
  2078. } while (fflag && its < 10);
  2079. }
  2080. static void search_for_ms_mips(AACEncContext *s, ChannelElement *cpe)
  2081. {
  2082. int start = 0, i, w, w2, g;
  2083. float M[128], S[128];
  2084. float *L34 = s->scoefs, *R34 = s->scoefs + 128, *M34 = s->scoefs + 128*2, *S34 = s->scoefs + 128*3;
  2085. SingleChannelElement *sce0 = &cpe->ch[0];
  2086. SingleChannelElement *sce1 = &cpe->ch[1];
  2087. if (!cpe->common_window)
  2088. return;
  2089. for (w = 0; w < sce0->ics.num_windows; w += sce0->ics.group_len[w]) {
  2090. for (g = 0; g < sce0->ics.num_swb; g++) {
  2091. if (!cpe->ch[0].zeroes[w*16+g] && !cpe->ch[1].zeroes[w*16+g]) {
  2092. float dist1 = 0.0f, dist2 = 0.0f;
  2093. for (w2 = 0; w2 < sce0->ics.group_len[w]; w2++) {
  2094. FFPsyBand *band0 = &s->psy.ch[s->cur_channel+0].psy_bands[(w+w2)*16+g];
  2095. FFPsyBand *band1 = &s->psy.ch[s->cur_channel+1].psy_bands[(w+w2)*16+g];
  2096. float minthr = FFMIN(band0->threshold, band1->threshold);
  2097. float maxthr = FFMAX(band0->threshold, band1->threshold);
  2098. for (i = 0; i < sce0->ics.swb_sizes[g]; i+=4) {
  2099. M[i ] = (sce0->coeffs[start+w2*128+i ]
  2100. + sce1->coeffs[start+w2*128+i ]) * 0.5;
  2101. M[i+1] = (sce0->coeffs[start+w2*128+i+1]
  2102. + sce1->coeffs[start+w2*128+i+1]) * 0.5;
  2103. M[i+2] = (sce0->coeffs[start+w2*128+i+2]
  2104. + sce1->coeffs[start+w2*128+i+2]) * 0.5;
  2105. M[i+3] = (sce0->coeffs[start+w2*128+i+3]
  2106. + sce1->coeffs[start+w2*128+i+3]) * 0.5;
  2107. S[i ] = M[i ]
  2108. - sce1->coeffs[start+w2*128+i ];
  2109. S[i+1] = M[i+1]
  2110. - sce1->coeffs[start+w2*128+i+1];
  2111. S[i+2] = M[i+2]
  2112. - sce1->coeffs[start+w2*128+i+2];
  2113. S[i+3] = M[i+3]
  2114. - sce1->coeffs[start+w2*128+i+3];
  2115. }
  2116. abs_pow34_v(L34, sce0->coeffs+start+w2*128, sce0->ics.swb_sizes[g]);
  2117. abs_pow34_v(R34, sce1->coeffs+start+w2*128, sce0->ics.swb_sizes[g]);
  2118. abs_pow34_v(M34, M, sce0->ics.swb_sizes[g]);
  2119. abs_pow34_v(S34, S, sce0->ics.swb_sizes[g]);
  2120. dist1 += quantize_band_cost(s, sce0->coeffs + start + w2*128,
  2121. L34,
  2122. sce0->ics.swb_sizes[g],
  2123. sce0->sf_idx[(w+w2)*16+g],
  2124. sce0->band_type[(w+w2)*16+g],
  2125. s->lambda / band0->threshold, INFINITY, NULL);
  2126. dist1 += quantize_band_cost(s, sce1->coeffs + start + w2*128,
  2127. R34,
  2128. sce1->ics.swb_sizes[g],
  2129. sce1->sf_idx[(w+w2)*16+g],
  2130. sce1->band_type[(w+w2)*16+g],
  2131. s->lambda / band1->threshold, INFINITY, NULL);
  2132. dist2 += quantize_band_cost(s, M,
  2133. M34,
  2134. sce0->ics.swb_sizes[g],
  2135. sce0->sf_idx[(w+w2)*16+g],
  2136. sce0->band_type[(w+w2)*16+g],
  2137. s->lambda / maxthr, INFINITY, NULL);
  2138. dist2 += quantize_band_cost(s, S,
  2139. S34,
  2140. sce1->ics.swb_sizes[g],
  2141. sce1->sf_idx[(w+w2)*16+g],
  2142. sce1->band_type[(w+w2)*16+g],
  2143. s->lambda / minthr, INFINITY, NULL);
  2144. }
  2145. cpe->ms_mask[w*16+g] = dist2 < dist1;
  2146. }
  2147. start += sce0->ics.swb_sizes[g];
  2148. }
  2149. }
  2150. }
  2151. #endif /*HAVE_MIPSFPU */
  2152. static void codebook_trellis_rate_mips(AACEncContext *s, SingleChannelElement *sce,
  2153. int win, int group_len, const float lambda)
  2154. {
  2155. BandCodingPath path[120][12];
  2156. int w, swb, cb, start, size;
  2157. int i, j;
  2158. const int max_sfb = sce->ics.max_sfb;
  2159. const int run_bits = sce->ics.num_windows == 1 ? 5 : 3;
  2160. const int run_esc = (1 << run_bits) - 1;
  2161. int idx, ppos, count;
  2162. int stackrun[120], stackcb[120], stack_len;
  2163. float next_minbits = INFINITY;
  2164. int next_mincb = 0;
  2165. abs_pow34_v(s->scoefs, sce->coeffs, 1024);
  2166. start = win*128;
  2167. for (cb = 0; cb < 12; cb++) {
  2168. path[0][cb].cost = run_bits+4;
  2169. path[0][cb].prev_idx = -1;
  2170. path[0][cb].run = 0;
  2171. }
  2172. for (swb = 0; swb < max_sfb; swb++) {
  2173. size = sce->ics.swb_sizes[swb];
  2174. if (sce->zeroes[win*16 + swb]) {
  2175. float cost_stay_here = path[swb][0].cost;
  2176. float cost_get_here = next_minbits + run_bits + 4;
  2177. if ( run_value_bits[sce->ics.num_windows == 8][path[swb][0].run]
  2178. != run_value_bits[sce->ics.num_windows == 8][path[swb][0].run+1])
  2179. cost_stay_here += run_bits;
  2180. if (cost_get_here < cost_stay_here) {
  2181. path[swb+1][0].prev_idx = next_mincb;
  2182. path[swb+1][0].cost = cost_get_here;
  2183. path[swb+1][0].run = 1;
  2184. } else {
  2185. path[swb+1][0].prev_idx = 0;
  2186. path[swb+1][0].cost = cost_stay_here;
  2187. path[swb+1][0].run = path[swb][0].run + 1;
  2188. }
  2189. next_minbits = path[swb+1][0].cost;
  2190. next_mincb = 0;
  2191. for (cb = 1; cb < 12; cb++) {
  2192. path[swb+1][cb].cost = 61450;
  2193. path[swb+1][cb].prev_idx = -1;
  2194. path[swb+1][cb].run = 0;
  2195. }
  2196. } else {
  2197. float minbits = next_minbits;
  2198. int mincb = next_mincb;
  2199. int startcb = sce->band_type[win*16+swb];
  2200. next_minbits = INFINITY;
  2201. next_mincb = 0;
  2202. for (cb = 0; cb < startcb; cb++) {
  2203. path[swb+1][cb].cost = 61450;
  2204. path[swb+1][cb].prev_idx = -1;
  2205. path[swb+1][cb].run = 0;
  2206. }
  2207. for (cb = startcb; cb < 12; cb++) {
  2208. float cost_stay_here, cost_get_here;
  2209. float bits = 0.0f;
  2210. for (w = 0; w < group_len; w++) {
  2211. bits += quantize_band_cost_bits(s, sce->coeffs + start + w*128,
  2212. s->scoefs + start + w*128, size,
  2213. sce->sf_idx[(win+w)*16+swb], cb,
  2214. 0, INFINITY, NULL);
  2215. }
  2216. cost_stay_here = path[swb][cb].cost + bits;
  2217. cost_get_here = minbits + bits + run_bits + 4;
  2218. if ( run_value_bits[sce->ics.num_windows == 8][path[swb][cb].run]
  2219. != run_value_bits[sce->ics.num_windows == 8][path[swb][cb].run+1])
  2220. cost_stay_here += run_bits;
  2221. if (cost_get_here < cost_stay_here) {
  2222. path[swb+1][cb].prev_idx = mincb;
  2223. path[swb+1][cb].cost = cost_get_here;
  2224. path[swb+1][cb].run = 1;
  2225. } else {
  2226. path[swb+1][cb].prev_idx = cb;
  2227. path[swb+1][cb].cost = cost_stay_here;
  2228. path[swb+1][cb].run = path[swb][cb].run + 1;
  2229. }
  2230. if (path[swb+1][cb].cost < next_minbits) {
  2231. next_minbits = path[swb+1][cb].cost;
  2232. next_mincb = cb;
  2233. }
  2234. }
  2235. }
  2236. start += sce->ics.swb_sizes[swb];
  2237. }
  2238. stack_len = 0;
  2239. idx = 0;
  2240. for (cb = 1; cb < 12; cb++)
  2241. if (path[max_sfb][cb].cost < path[max_sfb][idx].cost)
  2242. idx = cb;
  2243. ppos = max_sfb;
  2244. while (ppos > 0) {
  2245. av_assert1(idx >= 0);
  2246. cb = idx;
  2247. stackrun[stack_len] = path[ppos][cb].run;
  2248. stackcb [stack_len] = cb;
  2249. idx = path[ppos-path[ppos][cb].run+1][cb].prev_idx;
  2250. ppos -= path[ppos][cb].run;
  2251. stack_len++;
  2252. }
  2253. start = 0;
  2254. for (i = stack_len - 1; i >= 0; i--) {
  2255. put_bits(&s->pb, 4, stackcb[i]);
  2256. count = stackrun[i];
  2257. memset(sce->zeroes + win*16 + start, !stackcb[i], count);
  2258. for (j = 0; j < count; j++) {
  2259. sce->band_type[win*16 + start] = stackcb[i];
  2260. start++;
  2261. }
  2262. while (count >= run_esc) {
  2263. put_bits(&s->pb, run_bits, run_esc);
  2264. count -= run_esc;
  2265. }
  2266. put_bits(&s->pb, run_bits, count);
  2267. }
  2268. }
  2269. #endif /* HAVE_INLINE_ASM */
  2270. void ff_aac_coder_init_mips(AACEncContext *c) {
  2271. #if HAVE_INLINE_ASM
  2272. AACCoefficientsEncoder *e = c->coder;
  2273. int option = c->options.aac_coder;
  2274. if (option == 2) {
  2275. // Disabled due to failure with fate-aac-pns-encode
  2276. // e->quantize_and_encode_band = quantize_and_encode_band_mips;
  2277. // e->encode_window_bands_info = codebook_trellis_rate_mips;
  2278. #if HAVE_MIPSFPU
  2279. e->search_for_quantizers = search_for_quantizers_twoloop_mips;
  2280. e->search_for_ms = search_for_ms_mips;
  2281. #endif /* HAVE_MIPSFPU */
  2282. }
  2283. #endif /* HAVE_INLINE_ASM */
  2284. }