You can not select more than 25 topics Topics must start with a letter or number, can include dashes ('-') and can be up to 35 characters long.

2496 lines
103KB

  1. /*
  2. * Copyright (c) 2012
  3. * MIPS Technologies, Inc., California.
  4. *
  5. * Redistribution and use in source and binary forms, with or without
  6. * modification, are permitted provided that the following conditions
  7. * are met:
  8. * 1. Redistributions of source code must retain the above copyright
  9. * notice, this list of conditions and the following disclaimer.
  10. * 2. Redistributions in binary form must reproduce the above copyright
  11. * notice, this list of conditions and the following disclaimer in the
  12. * documentation and/or other materials provided with the distribution.
  13. * 3. Neither the name of the MIPS Technologies, Inc., nor the names of its
  14. * contributors may be used to endorse or promote products derived from
  15. * this software without specific prior written permission.
  16. *
  17. * THIS SOFTWARE IS PROVIDED BY THE MIPS TECHNOLOGIES, INC. ``AS IS'' AND
  18. * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
  19. * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
  20. * ARE DISCLAIMED. IN NO EVENT SHALL THE MIPS TECHNOLOGIES, INC. BE LIABLE
  21. * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
  22. * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
  23. * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
  24. * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
  25. * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
  26. * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
  27. * SUCH DAMAGE.
  28. *
  29. * Author: Stanislav Ocovaj (socovaj@mips.com)
  30. * Szabolcs Pal (sabolc@mips.com)
  31. *
  32. * AAC coefficients encoder optimized for MIPS floating-point architecture
  33. *
  34. * This file is part of FFmpeg.
  35. *
  36. * FFmpeg is free software; you can redistribute it and/or
  37. * modify it under the terms of the GNU Lesser General Public
  38. * License as published by the Free Software Foundation; either
  39. * version 2.1 of the License, or (at your option) any later version.
  40. *
  41. * FFmpeg is distributed in the hope that it will be useful,
  42. * but WITHOUT ANY WARRANTY; without even the implied warranty of
  43. * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
  44. * Lesser General Public License for more details.
  45. *
  46. * You should have received a copy of the GNU Lesser General Public
  47. * License along with FFmpeg; if not, write to the Free Software
  48. * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
  49. */
  50. /**
  51. * @file
  52. * Reference: libavcodec/aaccoder.c
  53. */
  54. #include "libavutil/libm.h"
  55. #include <float.h>
  56. #include "libavutil/mathematics.h"
  57. #include "libavcodec/avcodec.h"
  58. #include "libavcodec/put_bits.h"
  59. #include "libavcodec/aac.h"
  60. #include "libavcodec/aacenc.h"
  61. #include "libavcodec/aactab.h"
  62. #include "libavcodec/aacenctab.h"
  63. #include "libavcodec/aacenc_utils.h"
  64. #if HAVE_INLINE_ASM
  65. typedef struct BandCodingPath {
  66. int prev_idx;
  67. float cost;
  68. int run;
  69. } BandCodingPath;
  70. static const uint8_t uquad_sign_bits[81] = {
  71. 0, 1, 1, 1, 2, 2, 1, 2, 2,
  72. 1, 2, 2, 2, 3, 3, 2, 3, 3,
  73. 1, 2, 2, 2, 3, 3, 2, 3, 3,
  74. 1, 2, 2, 2, 3, 3, 2, 3, 3,
  75. 2, 3, 3, 3, 4, 4, 3, 4, 4,
  76. 2, 3, 3, 3, 4, 4, 3, 4, 4,
  77. 1, 2, 2, 2, 3, 3, 2, 3, 3,
  78. 2, 3, 3, 3, 4, 4, 3, 4, 4,
  79. 2, 3, 3, 3, 4, 4, 3, 4, 4
  80. };
  81. static const uint8_t upair7_sign_bits[64] = {
  82. 0, 1, 1, 1, 1, 1, 1, 1,
  83. 1, 2, 2, 2, 2, 2, 2, 2,
  84. 1, 2, 2, 2, 2, 2, 2, 2,
  85. 1, 2, 2, 2, 2, 2, 2, 2,
  86. 1, 2, 2, 2, 2, 2, 2, 2,
  87. 1, 2, 2, 2, 2, 2, 2, 2,
  88. 1, 2, 2, 2, 2, 2, 2, 2,
  89. 1, 2, 2, 2, 2, 2, 2, 2,
  90. };
  91. static const uint8_t upair12_sign_bits[169] = {
  92. 0, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
  93. 1, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2,
  94. 1, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2,
  95. 1, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2,
  96. 1, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2,
  97. 1, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2,
  98. 1, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2,
  99. 1, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2,
  100. 1, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2,
  101. 1, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2,
  102. 1, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2,
  103. 1, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2,
  104. 1, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2
  105. };
  106. static const uint8_t esc_sign_bits[289] = {
  107. 0, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
  108. 1, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2,
  109. 1, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2,
  110. 1, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2,
  111. 1, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2,
  112. 1, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2,
  113. 1, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2,
  114. 1, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2,
  115. 1, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2,
  116. 1, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2,
  117. 1, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2,
  118. 1, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2,
  119. 1, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2,
  120. 1, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2,
  121. 1, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2,
  122. 1, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2,
  123. 1, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2
  124. };
  125. /**
  126. * Functions developed from template function and optimized for quantizing and encoding band
  127. */
  128. static void quantize_and_encode_band_cost_SQUAD_mips(struct AACEncContext *s,
  129. PutBitContext *pb, const float *in, float *out,
  130. const float *scaled, int size, int scale_idx,
  131. int cb, const float lambda, const float uplim,
  132. int *bits, float *energy, const float ROUNDING)
  133. {
  134. const float Q34 = ff_aac_pow34sf_tab[POW_SF2_ZERO - scale_idx + SCALE_ONE_POS - SCALE_DIV_512];
  135. const float IQ = ff_aac_pow2sf_tab [POW_SF2_ZERO + scale_idx - SCALE_ONE_POS + SCALE_DIV_512];
  136. int i;
  137. int qc1, qc2, qc3, qc4;
  138. float qenergy = 0.0f;
  139. uint8_t *p_bits = (uint8_t *)ff_aac_spectral_bits[cb-1];
  140. uint16_t *p_codes = (uint16_t *)ff_aac_spectral_codes[cb-1];
  141. float *p_vec = (float *)ff_aac_codebook_vectors[cb-1];
  142. abs_pow34_v(s->scoefs, in, size);
  143. scaled = s->scoefs;
  144. for (i = 0; i < size; i += 4) {
  145. int curidx;
  146. int *in_int = (int *)&in[i];
  147. int t0, t1, t2, t3, t4, t5, t6, t7;
  148. const float *vec;
  149. qc1 = scaled[i ] * Q34 + ROUND_STANDARD;
  150. qc2 = scaled[i+1] * Q34 + ROUND_STANDARD;
  151. qc3 = scaled[i+2] * Q34 + ROUND_STANDARD;
  152. qc4 = scaled[i+3] * Q34 + ROUND_STANDARD;
  153. __asm__ volatile (
  154. ".set push \n\t"
  155. ".set noreorder \n\t"
  156. "slt %[qc1], $zero, %[qc1] \n\t"
  157. "slt %[qc2], $zero, %[qc2] \n\t"
  158. "slt %[qc3], $zero, %[qc3] \n\t"
  159. "slt %[qc4], $zero, %[qc4] \n\t"
  160. "lw %[t0], 0(%[in_int]) \n\t"
  161. "lw %[t1], 4(%[in_int]) \n\t"
  162. "lw %[t2], 8(%[in_int]) \n\t"
  163. "lw %[t3], 12(%[in_int]) \n\t"
  164. "srl %[t0], %[t0], 31 \n\t"
  165. "srl %[t1], %[t1], 31 \n\t"
  166. "srl %[t2], %[t2], 31 \n\t"
  167. "srl %[t3], %[t3], 31 \n\t"
  168. "subu %[t4], $zero, %[qc1] \n\t"
  169. "subu %[t5], $zero, %[qc2] \n\t"
  170. "subu %[t6], $zero, %[qc3] \n\t"
  171. "subu %[t7], $zero, %[qc4] \n\t"
  172. "movn %[qc1], %[t4], %[t0] \n\t"
  173. "movn %[qc2], %[t5], %[t1] \n\t"
  174. "movn %[qc3], %[t6], %[t2] \n\t"
  175. "movn %[qc4], %[t7], %[t3] \n\t"
  176. ".set pop \n\t"
  177. : [qc1]"+r"(qc1), [qc2]"+r"(qc2),
  178. [qc3]"+r"(qc3), [qc4]"+r"(qc4),
  179. [t0]"=&r"(t0), [t1]"=&r"(t1), [t2]"=&r"(t2), [t3]"=&r"(t3),
  180. [t4]"=&r"(t4), [t5]"=&r"(t5), [t6]"=&r"(t6), [t7]"=&r"(t7)
  181. : [in_int]"r"(in_int)
  182. : "memory"
  183. );
  184. curidx = qc1;
  185. curidx *= 3;
  186. curidx += qc2;
  187. curidx *= 3;
  188. curidx += qc3;
  189. curidx *= 3;
  190. curidx += qc4;
  191. curidx += 40;
  192. put_bits(pb, p_bits[curidx], p_codes[curidx]);
  193. if (out || energy) {
  194. float e1,e2,e3,e4;
  195. vec = &p_vec[curidx*4];
  196. e1 = vec[0] * IQ;
  197. e2 = vec[1] * IQ;
  198. e3 = vec[2] * IQ;
  199. e4 = vec[3] * IQ;
  200. if (out) {
  201. out[i+0] = e1;
  202. out[i+1] = e2;
  203. out[i+2] = e3;
  204. out[i+3] = e4;
  205. }
  206. if (energy)
  207. qenergy += (e1*e1 + e2*e2) + (e3*e3 + e4*e4);
  208. }
  209. }
  210. if (energy)
  211. *energy = qenergy;
  212. }
  213. static void quantize_and_encode_band_cost_UQUAD_mips(struct AACEncContext *s,
  214. PutBitContext *pb, const float *in, float *out,
  215. const float *scaled, int size, int scale_idx,
  216. int cb, const float lambda, const float uplim,
  217. int *bits, float *energy, const float ROUNDING)
  218. {
  219. const float Q34 = ff_aac_pow34sf_tab[POW_SF2_ZERO - scale_idx + SCALE_ONE_POS - SCALE_DIV_512];
  220. const float IQ = ff_aac_pow2sf_tab [POW_SF2_ZERO + scale_idx - SCALE_ONE_POS + SCALE_DIV_512];
  221. int i;
  222. int qc1, qc2, qc3, qc4;
  223. float qenergy = 0.0f;
  224. uint8_t *p_bits = (uint8_t *)ff_aac_spectral_bits[cb-1];
  225. uint16_t *p_codes = (uint16_t *)ff_aac_spectral_codes[cb-1];
  226. float *p_vec = (float *)ff_aac_codebook_vectors[cb-1];
  227. abs_pow34_v(s->scoefs, in, size);
  228. scaled = s->scoefs;
  229. for (i = 0; i < size; i += 4) {
  230. int curidx, sign, count;
  231. int *in_int = (int *)&in[i];
  232. uint8_t v_bits;
  233. unsigned int v_codes;
  234. int t0, t1, t2, t3, t4;
  235. const float *vec;
  236. qc1 = scaled[i ] * Q34 + ROUND_STANDARD;
  237. qc2 = scaled[i+1] * Q34 + ROUND_STANDARD;
  238. qc3 = scaled[i+2] * Q34 + ROUND_STANDARD;
  239. qc4 = scaled[i+3] * Q34 + ROUND_STANDARD;
  240. __asm__ volatile (
  241. ".set push \n\t"
  242. ".set noreorder \n\t"
  243. "ori %[t4], $zero, 2 \n\t"
  244. "ori %[sign], $zero, 0 \n\t"
  245. "slt %[t0], %[t4], %[qc1] \n\t"
  246. "slt %[t1], %[t4], %[qc2] \n\t"
  247. "slt %[t2], %[t4], %[qc3] \n\t"
  248. "slt %[t3], %[t4], %[qc4] \n\t"
  249. "movn %[qc1], %[t4], %[t0] \n\t"
  250. "movn %[qc2], %[t4], %[t1] \n\t"
  251. "movn %[qc3], %[t4], %[t2] \n\t"
  252. "movn %[qc4], %[t4], %[t3] \n\t"
  253. "lw %[t0], 0(%[in_int]) \n\t"
  254. "lw %[t1], 4(%[in_int]) \n\t"
  255. "lw %[t2], 8(%[in_int]) \n\t"
  256. "lw %[t3], 12(%[in_int]) \n\t"
  257. "slt %[t0], %[t0], $zero \n\t"
  258. "movn %[sign], %[t0], %[qc1] \n\t"
  259. "slt %[t1], %[t1], $zero \n\t"
  260. "slt %[t2], %[t2], $zero \n\t"
  261. "slt %[t3], %[t3], $zero \n\t"
  262. "sll %[t0], %[sign], 1 \n\t"
  263. "or %[t0], %[t0], %[t1] \n\t"
  264. "movn %[sign], %[t0], %[qc2] \n\t"
  265. "slt %[t4], $zero, %[qc1] \n\t"
  266. "slt %[t1], $zero, %[qc2] \n\t"
  267. "slt %[count], $zero, %[qc3] \n\t"
  268. "sll %[t0], %[sign], 1 \n\t"
  269. "or %[t0], %[t0], %[t2] \n\t"
  270. "movn %[sign], %[t0], %[qc3] \n\t"
  271. "slt %[t2], $zero, %[qc4] \n\t"
  272. "addu %[count], %[count], %[t4] \n\t"
  273. "addu %[count], %[count], %[t1] \n\t"
  274. "sll %[t0], %[sign], 1 \n\t"
  275. "or %[t0], %[t0], %[t3] \n\t"
  276. "movn %[sign], %[t0], %[qc4] \n\t"
  277. "addu %[count], %[count], %[t2] \n\t"
  278. ".set pop \n\t"
  279. : [qc1]"+r"(qc1), [qc2]"+r"(qc2),
  280. [qc3]"+r"(qc3), [qc4]"+r"(qc4),
  281. [sign]"=&r"(sign), [count]"=&r"(count),
  282. [t0]"=&r"(t0), [t1]"=&r"(t1), [t2]"=&r"(t2), [t3]"=&r"(t3),
  283. [t4]"=&r"(t4)
  284. : [in_int]"r"(in_int)
  285. : "memory"
  286. );
  287. curidx = qc1;
  288. curidx *= 3;
  289. curidx += qc2;
  290. curidx *= 3;
  291. curidx += qc3;
  292. curidx *= 3;
  293. curidx += qc4;
  294. v_codes = (p_codes[curidx] << count) | (sign & ((1 << count) - 1));
  295. v_bits = p_bits[curidx] + count;
  296. put_bits(pb, v_bits, v_codes);
  297. if (out || energy) {
  298. float e1,e2,e3,e4;
  299. vec = &p_vec[curidx*4];
  300. e1 = copysignf(vec[0] * IQ, in[i+0]);
  301. e2 = copysignf(vec[1] * IQ, in[i+1]);
  302. e3 = copysignf(vec[2] * IQ, in[i+2]);
  303. e4 = copysignf(vec[3] * IQ, in[i+3]);
  304. if (out) {
  305. out[i+0] = e1;
  306. out[i+1] = e2;
  307. out[i+2] = e3;
  308. out[i+3] = e4;
  309. }
  310. if (energy)
  311. qenergy += (e1*e1 + e2*e2) + (e3*e3 + e4*e4);
  312. }
  313. }
  314. if (energy)
  315. *energy = qenergy;
  316. }
  317. static void quantize_and_encode_band_cost_SPAIR_mips(struct AACEncContext *s,
  318. PutBitContext *pb, const float *in, float *out,
  319. const float *scaled, int size, int scale_idx,
  320. int cb, const float lambda, const float uplim,
  321. int *bits, float *energy, const float ROUNDING)
  322. {
  323. const float Q34 = ff_aac_pow34sf_tab[POW_SF2_ZERO - scale_idx + SCALE_ONE_POS - SCALE_DIV_512];
  324. const float IQ = ff_aac_pow2sf_tab [POW_SF2_ZERO + scale_idx - SCALE_ONE_POS + SCALE_DIV_512];
  325. int i;
  326. int qc1, qc2, qc3, qc4;
  327. float qenergy = 0.0f;
  328. uint8_t *p_bits = (uint8_t *)ff_aac_spectral_bits[cb-1];
  329. uint16_t *p_codes = (uint16_t *)ff_aac_spectral_codes[cb-1];
  330. float *p_vec = (float *)ff_aac_codebook_vectors[cb-1];
  331. abs_pow34_v(s->scoefs, in, size);
  332. scaled = s->scoefs;
  333. for (i = 0; i < size; i += 4) {
  334. int curidx, curidx2;
  335. int *in_int = (int *)&in[i];
  336. uint8_t v_bits;
  337. unsigned int v_codes;
  338. int t0, t1, t2, t3, t4, t5, t6, t7;
  339. const float *vec1, *vec2;
  340. qc1 = scaled[i ] * Q34 + ROUND_STANDARD;
  341. qc2 = scaled[i+1] * Q34 + ROUND_STANDARD;
  342. qc3 = scaled[i+2] * Q34 + ROUND_STANDARD;
  343. qc4 = scaled[i+3] * Q34 + ROUND_STANDARD;
  344. __asm__ volatile (
  345. ".set push \n\t"
  346. ".set noreorder \n\t"
  347. "ori %[t4], $zero, 4 \n\t"
  348. "slt %[t0], %[t4], %[qc1] \n\t"
  349. "slt %[t1], %[t4], %[qc2] \n\t"
  350. "slt %[t2], %[t4], %[qc3] \n\t"
  351. "slt %[t3], %[t4], %[qc4] \n\t"
  352. "movn %[qc1], %[t4], %[t0] \n\t"
  353. "movn %[qc2], %[t4], %[t1] \n\t"
  354. "movn %[qc3], %[t4], %[t2] \n\t"
  355. "movn %[qc4], %[t4], %[t3] \n\t"
  356. "lw %[t0], 0(%[in_int]) \n\t"
  357. "lw %[t1], 4(%[in_int]) \n\t"
  358. "lw %[t2], 8(%[in_int]) \n\t"
  359. "lw %[t3], 12(%[in_int]) \n\t"
  360. "srl %[t0], %[t0], 31 \n\t"
  361. "srl %[t1], %[t1], 31 \n\t"
  362. "srl %[t2], %[t2], 31 \n\t"
  363. "srl %[t3], %[t3], 31 \n\t"
  364. "subu %[t4], $zero, %[qc1] \n\t"
  365. "subu %[t5], $zero, %[qc2] \n\t"
  366. "subu %[t6], $zero, %[qc3] \n\t"
  367. "subu %[t7], $zero, %[qc4] \n\t"
  368. "movn %[qc1], %[t4], %[t0] \n\t"
  369. "movn %[qc2], %[t5], %[t1] \n\t"
  370. "movn %[qc3], %[t6], %[t2] \n\t"
  371. "movn %[qc4], %[t7], %[t3] \n\t"
  372. ".set pop \n\t"
  373. : [qc1]"+r"(qc1), [qc2]"+r"(qc2),
  374. [qc3]"+r"(qc3), [qc4]"+r"(qc4),
  375. [t0]"=&r"(t0), [t1]"=&r"(t1), [t2]"=&r"(t2), [t3]"=&r"(t3),
  376. [t4]"=&r"(t4), [t5]"=&r"(t5), [t6]"=&r"(t6), [t7]"=&r"(t7)
  377. : [in_int]"r"(in_int)
  378. : "memory"
  379. );
  380. curidx = 9 * qc1;
  381. curidx += qc2 + 40;
  382. curidx2 = 9 * qc3;
  383. curidx2 += qc4 + 40;
  384. v_codes = (p_codes[curidx] << p_bits[curidx2]) | (p_codes[curidx2]);
  385. v_bits = p_bits[curidx] + p_bits[curidx2];
  386. put_bits(pb, v_bits, v_codes);
  387. if (out || energy) {
  388. float e1,e2,e3,e4;
  389. vec1 = &p_vec[curidx*2 ];
  390. vec2 = &p_vec[curidx2*2];
  391. e1 = vec1[0] * IQ;
  392. e2 = vec1[1] * IQ;
  393. e3 = vec2[0] * IQ;
  394. e4 = vec2[1] * IQ;
  395. if (out) {
  396. out[i+0] = e1;
  397. out[i+1] = e2;
  398. out[i+2] = e3;
  399. out[i+3] = e4;
  400. }
  401. if (energy)
  402. qenergy += (e1*e1 + e2*e2) + (e3*e3 + e4*e4);
  403. }
  404. }
  405. if (energy)
  406. *energy = qenergy;
  407. }
  408. static void quantize_and_encode_band_cost_UPAIR7_mips(struct AACEncContext *s,
  409. PutBitContext *pb, const float *in, float *out,
  410. const float *scaled, int size, int scale_idx,
  411. int cb, const float lambda, const float uplim,
  412. int *bits, float *energy, const float ROUNDING)
  413. {
  414. const float Q34 = ff_aac_pow34sf_tab[POW_SF2_ZERO - scale_idx + SCALE_ONE_POS - SCALE_DIV_512];
  415. const float IQ = ff_aac_pow2sf_tab [POW_SF2_ZERO + scale_idx - SCALE_ONE_POS + SCALE_DIV_512];
  416. int i;
  417. int qc1, qc2, qc3, qc4;
  418. float qenergy = 0.0f;
  419. uint8_t *p_bits = (uint8_t*) ff_aac_spectral_bits[cb-1];
  420. uint16_t *p_codes = (uint16_t*)ff_aac_spectral_codes[cb-1];
  421. float *p_vec = (float *)ff_aac_codebook_vectors[cb-1];
  422. abs_pow34_v(s->scoefs, in, size);
  423. scaled = s->scoefs;
  424. for (i = 0; i < size; i += 4) {
  425. int curidx1, curidx2, sign1, count1, sign2, count2;
  426. int *in_int = (int *)&in[i];
  427. uint8_t v_bits;
  428. unsigned int v_codes;
  429. int t0, t1, t2, t3, t4;
  430. const float *vec1, *vec2;
  431. qc1 = scaled[i ] * Q34 + ROUND_STANDARD;
  432. qc2 = scaled[i+1] * Q34 + ROUND_STANDARD;
  433. qc3 = scaled[i+2] * Q34 + ROUND_STANDARD;
  434. qc4 = scaled[i+3] * Q34 + ROUND_STANDARD;
  435. __asm__ volatile (
  436. ".set push \n\t"
  437. ".set noreorder \n\t"
  438. "ori %[t4], $zero, 7 \n\t"
  439. "ori %[sign1], $zero, 0 \n\t"
  440. "ori %[sign2], $zero, 0 \n\t"
  441. "slt %[t0], %[t4], %[qc1] \n\t"
  442. "slt %[t1], %[t4], %[qc2] \n\t"
  443. "slt %[t2], %[t4], %[qc3] \n\t"
  444. "slt %[t3], %[t4], %[qc4] \n\t"
  445. "movn %[qc1], %[t4], %[t0] \n\t"
  446. "movn %[qc2], %[t4], %[t1] \n\t"
  447. "movn %[qc3], %[t4], %[t2] \n\t"
  448. "movn %[qc4], %[t4], %[t3] \n\t"
  449. "lw %[t0], 0(%[in_int]) \n\t"
  450. "lw %[t1], 4(%[in_int]) \n\t"
  451. "lw %[t2], 8(%[in_int]) \n\t"
  452. "lw %[t3], 12(%[in_int]) \n\t"
  453. "slt %[t0], %[t0], $zero \n\t"
  454. "movn %[sign1], %[t0], %[qc1] \n\t"
  455. "slt %[t2], %[t2], $zero \n\t"
  456. "movn %[sign2], %[t2], %[qc3] \n\t"
  457. "slt %[t1], %[t1], $zero \n\t"
  458. "sll %[t0], %[sign1], 1 \n\t"
  459. "or %[t0], %[t0], %[t1] \n\t"
  460. "movn %[sign1], %[t0], %[qc2] \n\t"
  461. "slt %[t3], %[t3], $zero \n\t"
  462. "sll %[t0], %[sign2], 1 \n\t"
  463. "or %[t0], %[t0], %[t3] \n\t"
  464. "movn %[sign2], %[t0], %[qc4] \n\t"
  465. "slt %[count1], $zero, %[qc1] \n\t"
  466. "slt %[t1], $zero, %[qc2] \n\t"
  467. "slt %[count2], $zero, %[qc3] \n\t"
  468. "slt %[t2], $zero, %[qc4] \n\t"
  469. "addu %[count1], %[count1], %[t1] \n\t"
  470. "addu %[count2], %[count2], %[t2] \n\t"
  471. ".set pop \n\t"
  472. : [qc1]"+r"(qc1), [qc2]"+r"(qc2),
  473. [qc3]"+r"(qc3), [qc4]"+r"(qc4),
  474. [sign1]"=&r"(sign1), [count1]"=&r"(count1),
  475. [sign2]"=&r"(sign2), [count2]"=&r"(count2),
  476. [t0]"=&r"(t0), [t1]"=&r"(t1), [t2]"=&r"(t2), [t3]"=&r"(t3),
  477. [t4]"=&r"(t4)
  478. : [in_int]"r"(in_int)
  479. : "t0", "t1", "t2", "t3", "t4",
  480. "memory"
  481. );
  482. curidx1 = 8 * qc1;
  483. curidx1 += qc2;
  484. v_codes = (p_codes[curidx1] << count1) | sign1;
  485. v_bits = p_bits[curidx1] + count1;
  486. put_bits(pb, v_bits, v_codes);
  487. curidx2 = 8 * qc3;
  488. curidx2 += qc4;
  489. v_codes = (p_codes[curidx2] << count2) | sign2;
  490. v_bits = p_bits[curidx2] + count2;
  491. put_bits(pb, v_bits, v_codes);
  492. if (out || energy) {
  493. float e1,e2,e3,e4;
  494. vec1 = &p_vec[curidx1*2];
  495. vec2 = &p_vec[curidx2*2];
  496. e1 = copysignf(vec1[0] * IQ, in[i+0]);
  497. e2 = copysignf(vec1[1] * IQ, in[i+1]);
  498. e3 = copysignf(vec2[0] * IQ, in[i+2]);
  499. e4 = copysignf(vec2[1] * IQ, in[i+3]);
  500. if (out) {
  501. out[i+0] = e1;
  502. out[i+1] = e2;
  503. out[i+2] = e3;
  504. out[i+3] = e4;
  505. }
  506. if (energy)
  507. qenergy += (e1*e1 + e2*e2) + (e3*e3 + e4*e4);
  508. }
  509. }
  510. if (energy)
  511. *energy = qenergy;
  512. }
  513. static void quantize_and_encode_band_cost_UPAIR12_mips(struct AACEncContext *s,
  514. PutBitContext *pb, const float *in, float *out,
  515. const float *scaled, int size, int scale_idx,
  516. int cb, const float lambda, const float uplim,
  517. int *bits, float *energy, const float ROUNDING)
  518. {
  519. const float Q34 = ff_aac_pow34sf_tab[POW_SF2_ZERO - scale_idx + SCALE_ONE_POS - SCALE_DIV_512];
  520. const float IQ = ff_aac_pow2sf_tab [POW_SF2_ZERO + scale_idx - SCALE_ONE_POS + SCALE_DIV_512];
  521. int i;
  522. int qc1, qc2, qc3, qc4;
  523. float qenergy = 0.0f;
  524. uint8_t *p_bits = (uint8_t*) ff_aac_spectral_bits[cb-1];
  525. uint16_t *p_codes = (uint16_t*)ff_aac_spectral_codes[cb-1];
  526. float *p_vec = (float *)ff_aac_codebook_vectors[cb-1];
  527. abs_pow34_v(s->scoefs, in, size);
  528. scaled = s->scoefs;
  529. for (i = 0; i < size; i += 4) {
  530. int curidx1, curidx2, sign1, count1, sign2, count2;
  531. int *in_int = (int *)&in[i];
  532. uint8_t v_bits;
  533. unsigned int v_codes;
  534. int t0, t1, t2, t3, t4;
  535. const float *vec1, *vec2;
  536. qc1 = scaled[i ] * Q34 + ROUND_STANDARD;
  537. qc2 = scaled[i+1] * Q34 + ROUND_STANDARD;
  538. qc3 = scaled[i+2] * Q34 + ROUND_STANDARD;
  539. qc4 = scaled[i+3] * Q34 + ROUND_STANDARD;
  540. __asm__ volatile (
  541. ".set push \n\t"
  542. ".set noreorder \n\t"
  543. "ori %[t4], $zero, 12 \n\t"
  544. "ori %[sign1], $zero, 0 \n\t"
  545. "ori %[sign2], $zero, 0 \n\t"
  546. "slt %[t0], %[t4], %[qc1] \n\t"
  547. "slt %[t1], %[t4], %[qc2] \n\t"
  548. "slt %[t2], %[t4], %[qc3] \n\t"
  549. "slt %[t3], %[t4], %[qc4] \n\t"
  550. "movn %[qc1], %[t4], %[t0] \n\t"
  551. "movn %[qc2], %[t4], %[t1] \n\t"
  552. "movn %[qc3], %[t4], %[t2] \n\t"
  553. "movn %[qc4], %[t4], %[t3] \n\t"
  554. "lw %[t0], 0(%[in_int]) \n\t"
  555. "lw %[t1], 4(%[in_int]) \n\t"
  556. "lw %[t2], 8(%[in_int]) \n\t"
  557. "lw %[t3], 12(%[in_int]) \n\t"
  558. "slt %[t0], %[t0], $zero \n\t"
  559. "movn %[sign1], %[t0], %[qc1] \n\t"
  560. "slt %[t2], %[t2], $zero \n\t"
  561. "movn %[sign2], %[t2], %[qc3] \n\t"
  562. "slt %[t1], %[t1], $zero \n\t"
  563. "sll %[t0], %[sign1], 1 \n\t"
  564. "or %[t0], %[t0], %[t1] \n\t"
  565. "movn %[sign1], %[t0], %[qc2] \n\t"
  566. "slt %[t3], %[t3], $zero \n\t"
  567. "sll %[t0], %[sign2], 1 \n\t"
  568. "or %[t0], %[t0], %[t3] \n\t"
  569. "movn %[sign2], %[t0], %[qc4] \n\t"
  570. "slt %[count1], $zero, %[qc1] \n\t"
  571. "slt %[t1], $zero, %[qc2] \n\t"
  572. "slt %[count2], $zero, %[qc3] \n\t"
  573. "slt %[t2], $zero, %[qc4] \n\t"
  574. "addu %[count1], %[count1], %[t1] \n\t"
  575. "addu %[count2], %[count2], %[t2] \n\t"
  576. ".set pop \n\t"
  577. : [qc1]"+r"(qc1), [qc2]"+r"(qc2),
  578. [qc3]"+r"(qc3), [qc4]"+r"(qc4),
  579. [sign1]"=&r"(sign1), [count1]"=&r"(count1),
  580. [sign2]"=&r"(sign2), [count2]"=&r"(count2),
  581. [t0]"=&r"(t0), [t1]"=&r"(t1), [t2]"=&r"(t2), [t3]"=&r"(t3),
  582. [t4]"=&r"(t4)
  583. : [in_int]"r"(in_int)
  584. : "memory"
  585. );
  586. curidx1 = 13 * qc1;
  587. curidx1 += qc2;
  588. v_codes = (p_codes[curidx1] << count1) | sign1;
  589. v_bits = p_bits[curidx1] + count1;
  590. put_bits(pb, v_bits, v_codes);
  591. curidx2 = 13 * qc3;
  592. curidx2 += qc4;
  593. v_codes = (p_codes[curidx2] << count2) | sign2;
  594. v_bits = p_bits[curidx2] + count2;
  595. put_bits(pb, v_bits, v_codes);
  596. if (out || energy) {
  597. float e1,e2,e3,e4;
  598. vec1 = &p_vec[curidx1*2];
  599. vec2 = &p_vec[curidx2*2];
  600. e1 = copysignf(vec1[0] * IQ, in[i+0]);
  601. e2 = copysignf(vec1[1] * IQ, in[i+1]);
  602. e3 = copysignf(vec2[0] * IQ, in[i+2]);
  603. e4 = copysignf(vec2[1] * IQ, in[i+3]);
  604. if (out) {
  605. out[i+0] = e1;
  606. out[i+1] = e2;
  607. out[i+2] = e3;
  608. out[i+3] = e4;
  609. }
  610. if (energy)
  611. qenergy += (e1*e1 + e2*e2) + (e3*e3 + e4*e4);
  612. }
  613. }
  614. if (energy)
  615. *energy = qenergy;
  616. }
  617. static void quantize_and_encode_band_cost_ESC_mips(struct AACEncContext *s,
  618. PutBitContext *pb, const float *in, float *out,
  619. const float *scaled, int size, int scale_idx,
  620. int cb, const float lambda, const float uplim,
  621. int *bits, float *energy, const float ROUNDING)
  622. {
  623. const float Q34 = ff_aac_pow34sf_tab[POW_SF2_ZERO - scale_idx + SCALE_ONE_POS - SCALE_DIV_512];
  624. const float IQ = ff_aac_pow2sf_tab [POW_SF2_ZERO + scale_idx - SCALE_ONE_POS + SCALE_DIV_512];
  625. int i;
  626. int qc1, qc2, qc3, qc4;
  627. float qenergy = 0.0f;
  628. uint8_t *p_bits = (uint8_t* )ff_aac_spectral_bits[cb-1];
  629. uint16_t *p_codes = (uint16_t*)ff_aac_spectral_codes[cb-1];
  630. float *p_vectors = (float* )ff_aac_codebook_vectors[cb-1];
  631. abs_pow34_v(s->scoefs, in, size);
  632. scaled = s->scoefs;
  633. if (cb < 11) {
  634. for (i = 0; i < size; i += 4) {
  635. int curidx, curidx2, sign1, count1, sign2, count2;
  636. int *in_int = (int *)&in[i];
  637. uint8_t v_bits;
  638. unsigned int v_codes;
  639. int t0, t1, t2, t3, t4;
  640. const float *vec1, *vec2;
  641. qc1 = scaled[i ] * Q34 + ROUNDING;
  642. qc2 = scaled[i+1] * Q34 + ROUNDING;
  643. qc3 = scaled[i+2] * Q34 + ROUNDING;
  644. qc4 = scaled[i+3] * Q34 + ROUNDING;
  645. __asm__ volatile (
  646. ".set push \n\t"
  647. ".set noreorder \n\t"
  648. "ori %[t4], $zero, 16 \n\t"
  649. "ori %[sign1], $zero, 0 \n\t"
  650. "ori %[sign2], $zero, 0 \n\t"
  651. "slt %[t0], %[t4], %[qc1] \n\t"
  652. "slt %[t1], %[t4], %[qc2] \n\t"
  653. "slt %[t2], %[t4], %[qc3] \n\t"
  654. "slt %[t3], %[t4], %[qc4] \n\t"
  655. "movn %[qc1], %[t4], %[t0] \n\t"
  656. "movn %[qc2], %[t4], %[t1] \n\t"
  657. "movn %[qc3], %[t4], %[t2] \n\t"
  658. "movn %[qc4], %[t4], %[t3] \n\t"
  659. "lw %[t0], 0(%[in_int]) \n\t"
  660. "lw %[t1], 4(%[in_int]) \n\t"
  661. "lw %[t2], 8(%[in_int]) \n\t"
  662. "lw %[t3], 12(%[in_int]) \n\t"
  663. "slt %[t0], %[t0], $zero \n\t"
  664. "movn %[sign1], %[t0], %[qc1] \n\t"
  665. "slt %[t2], %[t2], $zero \n\t"
  666. "movn %[sign2], %[t2], %[qc3] \n\t"
  667. "slt %[t1], %[t1], $zero \n\t"
  668. "sll %[t0], %[sign1], 1 \n\t"
  669. "or %[t0], %[t0], %[t1] \n\t"
  670. "movn %[sign1], %[t0], %[qc2] \n\t"
  671. "slt %[t3], %[t3], $zero \n\t"
  672. "sll %[t0], %[sign2], 1 \n\t"
  673. "or %[t0], %[t0], %[t3] \n\t"
  674. "movn %[sign2], %[t0], %[qc4] \n\t"
  675. "slt %[count1], $zero, %[qc1] \n\t"
  676. "slt %[t1], $zero, %[qc2] \n\t"
  677. "slt %[count2], $zero, %[qc3] \n\t"
  678. "slt %[t2], $zero, %[qc4] \n\t"
  679. "addu %[count1], %[count1], %[t1] \n\t"
  680. "addu %[count2], %[count2], %[t2] \n\t"
  681. ".set pop \n\t"
  682. : [qc1]"+r"(qc1), [qc2]"+r"(qc2),
  683. [qc3]"+r"(qc3), [qc4]"+r"(qc4),
  684. [sign1]"=&r"(sign1), [count1]"=&r"(count1),
  685. [sign2]"=&r"(sign2), [count2]"=&r"(count2),
  686. [t0]"=&r"(t0), [t1]"=&r"(t1), [t2]"=&r"(t2), [t3]"=&r"(t3),
  687. [t4]"=&r"(t4)
  688. : [in_int]"r"(in_int)
  689. : "memory"
  690. );
  691. curidx = 17 * qc1;
  692. curidx += qc2;
  693. curidx2 = 17 * qc3;
  694. curidx2 += qc4;
  695. v_codes = (p_codes[curidx] << count1) | sign1;
  696. v_bits = p_bits[curidx] + count1;
  697. put_bits(pb, v_bits, v_codes);
  698. v_codes = (p_codes[curidx2] << count2) | sign2;
  699. v_bits = p_bits[curidx2] + count2;
  700. put_bits(pb, v_bits, v_codes);
  701. if (out || energy) {
  702. float e1,e2,e3,e4;
  703. vec1 = &p_vectors[curidx*2 ];
  704. vec2 = &p_vectors[curidx2*2];
  705. e1 = copysignf(vec1[0] * IQ, in[i+0]);
  706. e2 = copysignf(vec1[1] * IQ, in[i+1]);
  707. e3 = copysignf(vec2[0] * IQ, in[i+2]);
  708. e4 = copysignf(vec2[1] * IQ, in[i+3]);
  709. if (out) {
  710. out[i+0] = e1;
  711. out[i+1] = e2;
  712. out[i+2] = e3;
  713. out[i+3] = e4;
  714. }
  715. if (energy)
  716. qenergy += (e1*e1 + e2*e2) + (e3*e3 + e4*e4);
  717. }
  718. }
  719. } else {
  720. for (i = 0; i < size; i += 4) {
  721. int curidx, curidx2, sign1, count1, sign2, count2;
  722. int *in_int = (int *)&in[i];
  723. uint8_t v_bits;
  724. unsigned int v_codes;
  725. int c1, c2, c3, c4;
  726. int t0, t1, t2, t3, t4;
  727. qc1 = scaled[i ] * Q34 + ROUNDING;
  728. qc2 = scaled[i+1] * Q34 + ROUNDING;
  729. qc3 = scaled[i+2] * Q34 + ROUNDING;
  730. qc4 = scaled[i+3] * Q34 + ROUNDING;
  731. __asm__ volatile (
  732. ".set push \n\t"
  733. ".set noreorder \n\t"
  734. "ori %[t4], $zero, 16 \n\t"
  735. "ori %[sign1], $zero, 0 \n\t"
  736. "ori %[sign2], $zero, 0 \n\t"
  737. "shll_s.w %[c1], %[qc1], 18 \n\t"
  738. "shll_s.w %[c2], %[qc2], 18 \n\t"
  739. "shll_s.w %[c3], %[qc3], 18 \n\t"
  740. "shll_s.w %[c4], %[qc4], 18 \n\t"
  741. "srl %[c1], %[c1], 18 \n\t"
  742. "srl %[c2], %[c2], 18 \n\t"
  743. "srl %[c3], %[c3], 18 \n\t"
  744. "srl %[c4], %[c4], 18 \n\t"
  745. "slt %[t0], %[t4], %[qc1] \n\t"
  746. "slt %[t1], %[t4], %[qc2] \n\t"
  747. "slt %[t2], %[t4], %[qc3] \n\t"
  748. "slt %[t3], %[t4], %[qc4] \n\t"
  749. "movn %[qc1], %[t4], %[t0] \n\t"
  750. "movn %[qc2], %[t4], %[t1] \n\t"
  751. "movn %[qc3], %[t4], %[t2] \n\t"
  752. "movn %[qc4], %[t4], %[t3] \n\t"
  753. "lw %[t0], 0(%[in_int]) \n\t"
  754. "lw %[t1], 4(%[in_int]) \n\t"
  755. "lw %[t2], 8(%[in_int]) \n\t"
  756. "lw %[t3], 12(%[in_int]) \n\t"
  757. "slt %[t0], %[t0], $zero \n\t"
  758. "movn %[sign1], %[t0], %[qc1] \n\t"
  759. "slt %[t2], %[t2], $zero \n\t"
  760. "movn %[sign2], %[t2], %[qc3] \n\t"
  761. "slt %[t1], %[t1], $zero \n\t"
  762. "sll %[t0], %[sign1], 1 \n\t"
  763. "or %[t0], %[t0], %[t1] \n\t"
  764. "movn %[sign1], %[t0], %[qc2] \n\t"
  765. "slt %[t3], %[t3], $zero \n\t"
  766. "sll %[t0], %[sign2], 1 \n\t"
  767. "or %[t0], %[t0], %[t3] \n\t"
  768. "movn %[sign2], %[t0], %[qc4] \n\t"
  769. "slt %[count1], $zero, %[qc1] \n\t"
  770. "slt %[t1], $zero, %[qc2] \n\t"
  771. "slt %[count2], $zero, %[qc3] \n\t"
  772. "slt %[t2], $zero, %[qc4] \n\t"
  773. "addu %[count1], %[count1], %[t1] \n\t"
  774. "addu %[count2], %[count2], %[t2] \n\t"
  775. ".set pop \n\t"
  776. : [qc1]"+r"(qc1), [qc2]"+r"(qc2),
  777. [qc3]"+r"(qc3), [qc4]"+r"(qc4),
  778. [sign1]"=&r"(sign1), [count1]"=&r"(count1),
  779. [sign2]"=&r"(sign2), [count2]"=&r"(count2),
  780. [c1]"=&r"(c1), [c2]"=&r"(c2),
  781. [c3]"=&r"(c3), [c4]"=&r"(c4),
  782. [t0]"=&r"(t0), [t1]"=&r"(t1), [t2]"=&r"(t2), [t3]"=&r"(t3),
  783. [t4]"=&r"(t4)
  784. : [in_int]"r"(in_int)
  785. : "memory"
  786. );
  787. curidx = 17 * qc1;
  788. curidx += qc2;
  789. curidx2 = 17 * qc3;
  790. curidx2 += qc4;
  791. v_codes = (p_codes[curidx] << count1) | sign1;
  792. v_bits = p_bits[curidx] + count1;
  793. put_bits(pb, v_bits, v_codes);
  794. if (p_vectors[curidx*2 ] == 64.0f) {
  795. int len = av_log2(c1);
  796. v_codes = (((1 << (len - 3)) - 2) << len) | (c1 & ((1 << len) - 1));
  797. put_bits(pb, len * 2 - 3, v_codes);
  798. }
  799. if (p_vectors[curidx*2+1] == 64.0f) {
  800. int len = av_log2(c2);
  801. v_codes = (((1 << (len - 3)) - 2) << len) | (c2 & ((1 << len) - 1));
  802. put_bits(pb, len*2-3, v_codes);
  803. }
  804. v_codes = (p_codes[curidx2] << count2) | sign2;
  805. v_bits = p_bits[curidx2] + count2;
  806. put_bits(pb, v_bits, v_codes);
  807. if (p_vectors[curidx2*2 ] == 64.0f) {
  808. int len = av_log2(c3);
  809. v_codes = (((1 << (len - 3)) - 2) << len) | (c3 & ((1 << len) - 1));
  810. put_bits(pb, len* 2 - 3, v_codes);
  811. }
  812. if (p_vectors[curidx2*2+1] == 64.0f) {
  813. int len = av_log2(c4);
  814. v_codes = (((1 << (len - 3)) - 2) << len) | (c4 & ((1 << len) - 1));
  815. put_bits(pb, len * 2 - 3, v_codes);
  816. }
  817. if (out || energy) {
  818. float e1, e2, e3, e4;
  819. e1 = copysignf(c1 * cbrtf(c1) * IQ, in[i+0]);
  820. e2 = copysignf(c2 * cbrtf(c2) * IQ, in[i+1]);
  821. e3 = copysignf(c3 * cbrtf(c3) * IQ, in[i+2]);
  822. e4 = copysignf(c4 * cbrtf(c4) * IQ, in[i+3]);
  823. if (out) {
  824. out[i+0] = e1;
  825. out[i+1] = e2;
  826. out[i+2] = e3;
  827. out[i+3] = e4;
  828. }
  829. if (energy)
  830. qenergy += (e1*e1 + e2*e2) + (e3*e3 + e4*e4);
  831. }
  832. }
  833. }
  834. if (energy)
  835. *energy = qenergy;
  836. }
  837. static void quantize_and_encode_band_cost_NONE_mips(struct AACEncContext *s,
  838. PutBitContext *pb, const float *in, float *out,
  839. const float *scaled, int size, int scale_idx,
  840. int cb, const float lambda, const float uplim,
  841. int *bits, float *energy, const float ROUNDING) {
  842. av_assert0(0);
  843. }
  844. static void quantize_and_encode_band_cost_ZERO_mips(struct AACEncContext *s,
  845. PutBitContext *pb, const float *in, float *out,
  846. const float *scaled, int size, int scale_idx,
  847. int cb, const float lambda, const float uplim,
  848. int *bits, float *energy, const float ROUNDING) {
  849. int i;
  850. if (bits)
  851. *bits = 0;
  852. if (out) {
  853. for (i = 0; i < size; i += 4) {
  854. out[i ] = 0.0f;
  855. out[i+1] = 0.0f;
  856. out[i+2] = 0.0f;
  857. out[i+3] = 0.0f;
  858. }
  859. }
  860. if (energy)
  861. *energy = 0.0f;
  862. }
  863. static void (*const quantize_and_encode_band_cost_arr[])(struct AACEncContext *s,
  864. PutBitContext *pb, const float *in, float *out,
  865. const float *scaled, int size, int scale_idx,
  866. int cb, const float lambda, const float uplim,
  867. int *bits, float *energy, const float ROUNDING) = {
  868. quantize_and_encode_band_cost_ZERO_mips,
  869. quantize_and_encode_band_cost_SQUAD_mips,
  870. quantize_and_encode_band_cost_SQUAD_mips,
  871. quantize_and_encode_band_cost_UQUAD_mips,
  872. quantize_and_encode_band_cost_UQUAD_mips,
  873. quantize_and_encode_band_cost_SPAIR_mips,
  874. quantize_and_encode_band_cost_SPAIR_mips,
  875. quantize_and_encode_band_cost_UPAIR7_mips,
  876. quantize_and_encode_band_cost_UPAIR7_mips,
  877. quantize_and_encode_band_cost_UPAIR12_mips,
  878. quantize_and_encode_band_cost_UPAIR12_mips,
  879. quantize_and_encode_band_cost_ESC_mips,
  880. quantize_and_encode_band_cost_NONE_mips, /* cb 12 doesn't exist */
  881. quantize_and_encode_band_cost_ZERO_mips,
  882. quantize_and_encode_band_cost_ZERO_mips,
  883. quantize_and_encode_band_cost_ZERO_mips,
  884. };
  885. #define quantize_and_encode_band_cost( \
  886. s, pb, in, out, scaled, size, scale_idx, cb, \
  887. lambda, uplim, bits, energy, ROUNDING) \
  888. quantize_and_encode_band_cost_arr[cb]( \
  889. s, pb, in, out, scaled, size, scale_idx, cb, \
  890. lambda, uplim, bits, energy, ROUNDING)
  891. static void quantize_and_encode_band_mips(struct AACEncContext *s, PutBitContext *pb,
  892. const float *in, float *out, int size, int scale_idx,
  893. int cb, const float lambda, int rtz)
  894. {
  895. quantize_and_encode_band_cost(s, pb, in, out, NULL, size, scale_idx, cb, lambda,
  896. INFINITY, NULL, NULL, (rtz) ? ROUND_TO_ZERO : ROUND_STANDARD);
  897. }
  898. /**
  899. * Functions developed from template function and optimized for getting the number of bits
  900. */
  901. static float get_band_numbits_ZERO_mips(struct AACEncContext *s,
  902. PutBitContext *pb, const float *in,
  903. const float *scaled, int size, int scale_idx,
  904. int cb, const float lambda, const float uplim,
  905. int *bits)
  906. {
  907. return 0;
  908. }
  909. static float get_band_numbits_NONE_mips(struct AACEncContext *s,
  910. PutBitContext *pb, const float *in,
  911. const float *scaled, int size, int scale_idx,
  912. int cb, const float lambda, const float uplim,
  913. int *bits)
  914. {
  915. av_assert0(0);
  916. return 0;
  917. }
  918. static float get_band_numbits_SQUAD_mips(struct AACEncContext *s,
  919. PutBitContext *pb, const float *in,
  920. const float *scaled, int size, int scale_idx,
  921. int cb, const float lambda, const float uplim,
  922. int *bits)
  923. {
  924. const float Q34 = ff_aac_pow34sf_tab[POW_SF2_ZERO - scale_idx + SCALE_ONE_POS - SCALE_DIV_512];
  925. int i;
  926. int qc1, qc2, qc3, qc4;
  927. int curbits = 0;
  928. uint8_t *p_bits = (uint8_t *)ff_aac_spectral_bits[cb-1];
  929. for (i = 0; i < size; i += 4) {
  930. int curidx;
  931. int *in_int = (int *)&in[i];
  932. int t0, t1, t2, t3, t4, t5, t6, t7;
  933. qc1 = scaled[i ] * Q34 + ROUND_STANDARD;
  934. qc2 = scaled[i+1] * Q34 + ROUND_STANDARD;
  935. qc3 = scaled[i+2] * Q34 + ROUND_STANDARD;
  936. qc4 = scaled[i+3] * Q34 + ROUND_STANDARD;
  937. __asm__ volatile (
  938. ".set push \n\t"
  939. ".set noreorder \n\t"
  940. "slt %[qc1], $zero, %[qc1] \n\t"
  941. "slt %[qc2], $zero, %[qc2] \n\t"
  942. "slt %[qc3], $zero, %[qc3] \n\t"
  943. "slt %[qc4], $zero, %[qc4] \n\t"
  944. "lw %[t0], 0(%[in_int]) \n\t"
  945. "lw %[t1], 4(%[in_int]) \n\t"
  946. "lw %[t2], 8(%[in_int]) \n\t"
  947. "lw %[t3], 12(%[in_int]) \n\t"
  948. "srl %[t0], %[t0], 31 \n\t"
  949. "srl %[t1], %[t1], 31 \n\t"
  950. "srl %[t2], %[t2], 31 \n\t"
  951. "srl %[t3], %[t3], 31 \n\t"
  952. "subu %[t4], $zero, %[qc1] \n\t"
  953. "subu %[t5], $zero, %[qc2] \n\t"
  954. "subu %[t6], $zero, %[qc3] \n\t"
  955. "subu %[t7], $zero, %[qc4] \n\t"
  956. "movn %[qc1], %[t4], %[t0] \n\t"
  957. "movn %[qc2], %[t5], %[t1] \n\t"
  958. "movn %[qc3], %[t6], %[t2] \n\t"
  959. "movn %[qc4], %[t7], %[t3] \n\t"
  960. ".set pop \n\t"
  961. : [qc1]"+r"(qc1), [qc2]"+r"(qc2),
  962. [qc3]"+r"(qc3), [qc4]"+r"(qc4),
  963. [t0]"=&r"(t0), [t1]"=&r"(t1), [t2]"=&r"(t2), [t3]"=&r"(t3),
  964. [t4]"=&r"(t4), [t5]"=&r"(t5), [t6]"=&r"(t6), [t7]"=&r"(t7)
  965. : [in_int]"r"(in_int)
  966. : "memory"
  967. );
  968. curidx = qc1;
  969. curidx *= 3;
  970. curidx += qc2;
  971. curidx *= 3;
  972. curidx += qc3;
  973. curidx *= 3;
  974. curidx += qc4;
  975. curidx += 40;
  976. curbits += p_bits[curidx];
  977. }
  978. return curbits;
  979. }
  980. static float get_band_numbits_UQUAD_mips(struct AACEncContext *s,
  981. PutBitContext *pb, const float *in,
  982. const float *scaled, int size, int scale_idx,
  983. int cb, const float lambda, const float uplim,
  984. int *bits)
  985. {
  986. const float Q34 = ff_aac_pow34sf_tab[POW_SF2_ZERO - scale_idx + SCALE_ONE_POS - SCALE_DIV_512];
  987. int i;
  988. int curbits = 0;
  989. int qc1, qc2, qc3, qc4;
  990. uint8_t *p_bits = (uint8_t *)ff_aac_spectral_bits[cb-1];
  991. for (i = 0; i < size; i += 4) {
  992. int curidx;
  993. int t0, t1, t2, t3, t4;
  994. qc1 = scaled[i ] * Q34 + ROUND_STANDARD;
  995. qc2 = scaled[i+1] * Q34 + ROUND_STANDARD;
  996. qc3 = scaled[i+2] * Q34 + ROUND_STANDARD;
  997. qc4 = scaled[i+3] * Q34 + ROUND_STANDARD;
  998. __asm__ volatile (
  999. ".set push \n\t"
  1000. ".set noreorder \n\t"
  1001. "ori %[t4], $zero, 2 \n\t"
  1002. "slt %[t0], %[t4], %[qc1] \n\t"
  1003. "slt %[t1], %[t4], %[qc2] \n\t"
  1004. "slt %[t2], %[t4], %[qc3] \n\t"
  1005. "slt %[t3], %[t4], %[qc4] \n\t"
  1006. "movn %[qc1], %[t4], %[t0] \n\t"
  1007. "movn %[qc2], %[t4], %[t1] \n\t"
  1008. "movn %[qc3], %[t4], %[t2] \n\t"
  1009. "movn %[qc4], %[t4], %[t3] \n\t"
  1010. ".set pop \n\t"
  1011. : [qc1]"+r"(qc1), [qc2]"+r"(qc2),
  1012. [qc3]"+r"(qc3), [qc4]"+r"(qc4),
  1013. [t0]"=&r"(t0), [t1]"=&r"(t1), [t2]"=&r"(t2), [t3]"=&r"(t3),
  1014. [t4]"=&r"(t4)
  1015. );
  1016. curidx = qc1;
  1017. curidx *= 3;
  1018. curidx += qc2;
  1019. curidx *= 3;
  1020. curidx += qc3;
  1021. curidx *= 3;
  1022. curidx += qc4;
  1023. curbits += p_bits[curidx];
  1024. curbits += uquad_sign_bits[curidx];
  1025. }
  1026. return curbits;
  1027. }
  1028. static float get_band_numbits_SPAIR_mips(struct AACEncContext *s,
  1029. PutBitContext *pb, const float *in,
  1030. const float *scaled, int size, int scale_idx,
  1031. int cb, const float lambda, const float uplim,
  1032. int *bits)
  1033. {
  1034. const float Q34 = ff_aac_pow34sf_tab[POW_SF2_ZERO - scale_idx + SCALE_ONE_POS - SCALE_DIV_512];
  1035. int i;
  1036. int qc1, qc2, qc3, qc4;
  1037. int curbits = 0;
  1038. uint8_t *p_bits = (uint8_t*)ff_aac_spectral_bits[cb-1];
  1039. for (i = 0; i < size; i += 4) {
  1040. int curidx, curidx2;
  1041. int *in_int = (int *)&in[i];
  1042. int t0, t1, t2, t3, t4, t5, t6, t7;
  1043. qc1 = scaled[i ] * Q34 + ROUND_STANDARD;
  1044. qc2 = scaled[i+1] * Q34 + ROUND_STANDARD;
  1045. qc3 = scaled[i+2] * Q34 + ROUND_STANDARD;
  1046. qc4 = scaled[i+3] * Q34 + ROUND_STANDARD;
  1047. __asm__ volatile (
  1048. ".set push \n\t"
  1049. ".set noreorder \n\t"
  1050. "ori %[t4], $zero, 4 \n\t"
  1051. "slt %[t0], %[t4], %[qc1] \n\t"
  1052. "slt %[t1], %[t4], %[qc2] \n\t"
  1053. "slt %[t2], %[t4], %[qc3] \n\t"
  1054. "slt %[t3], %[t4], %[qc4] \n\t"
  1055. "movn %[qc1], %[t4], %[t0] \n\t"
  1056. "movn %[qc2], %[t4], %[t1] \n\t"
  1057. "movn %[qc3], %[t4], %[t2] \n\t"
  1058. "movn %[qc4], %[t4], %[t3] \n\t"
  1059. "lw %[t0], 0(%[in_int]) \n\t"
  1060. "lw %[t1], 4(%[in_int]) \n\t"
  1061. "lw %[t2], 8(%[in_int]) \n\t"
  1062. "lw %[t3], 12(%[in_int]) \n\t"
  1063. "srl %[t0], %[t0], 31 \n\t"
  1064. "srl %[t1], %[t1], 31 \n\t"
  1065. "srl %[t2], %[t2], 31 \n\t"
  1066. "srl %[t3], %[t3], 31 \n\t"
  1067. "subu %[t4], $zero, %[qc1] \n\t"
  1068. "subu %[t5], $zero, %[qc2] \n\t"
  1069. "subu %[t6], $zero, %[qc3] \n\t"
  1070. "subu %[t7], $zero, %[qc4] \n\t"
  1071. "movn %[qc1], %[t4], %[t0] \n\t"
  1072. "movn %[qc2], %[t5], %[t1] \n\t"
  1073. "movn %[qc3], %[t6], %[t2] \n\t"
  1074. "movn %[qc4], %[t7], %[t3] \n\t"
  1075. ".set pop \n\t"
  1076. : [qc1]"+r"(qc1), [qc2]"+r"(qc2),
  1077. [qc3]"+r"(qc3), [qc4]"+r"(qc4),
  1078. [t0]"=&r"(t0), [t1]"=&r"(t1), [t2]"=&r"(t2), [t3]"=&r"(t3),
  1079. [t4]"=&r"(t4), [t5]"=&r"(t5), [t6]"=&r"(t6), [t7]"=&r"(t7)
  1080. : [in_int]"r"(in_int)
  1081. : "memory"
  1082. );
  1083. curidx = 9 * qc1;
  1084. curidx += qc2 + 40;
  1085. curidx2 = 9 * qc3;
  1086. curidx2 += qc4 + 40;
  1087. curbits += p_bits[curidx] + p_bits[curidx2];
  1088. }
  1089. return curbits;
  1090. }
  1091. static float get_band_numbits_UPAIR7_mips(struct AACEncContext *s,
  1092. PutBitContext *pb, const float *in,
  1093. const float *scaled, int size, int scale_idx,
  1094. int cb, const float lambda, const float uplim,
  1095. int *bits)
  1096. {
  1097. const float Q34 = ff_aac_pow34sf_tab[POW_SF2_ZERO - scale_idx + SCALE_ONE_POS - SCALE_DIV_512];
  1098. int i;
  1099. int qc1, qc2, qc3, qc4;
  1100. int curbits = 0;
  1101. uint8_t *p_bits = (uint8_t *)ff_aac_spectral_bits[cb-1];
  1102. for (i = 0; i < size; i += 4) {
  1103. int curidx, curidx2;
  1104. int t0, t1, t2, t3, t4;
  1105. qc1 = scaled[i ] * Q34 + ROUND_STANDARD;
  1106. qc2 = scaled[i+1] * Q34 + ROUND_STANDARD;
  1107. qc3 = scaled[i+2] * Q34 + ROUND_STANDARD;
  1108. qc4 = scaled[i+3] * Q34 + ROUND_STANDARD;
  1109. __asm__ volatile (
  1110. ".set push \n\t"
  1111. ".set noreorder \n\t"
  1112. "ori %[t4], $zero, 7 \n\t"
  1113. "slt %[t0], %[t4], %[qc1] \n\t"
  1114. "slt %[t1], %[t4], %[qc2] \n\t"
  1115. "slt %[t2], %[t4], %[qc3] \n\t"
  1116. "slt %[t3], %[t4], %[qc4] \n\t"
  1117. "movn %[qc1], %[t4], %[t0] \n\t"
  1118. "movn %[qc2], %[t4], %[t1] \n\t"
  1119. "movn %[qc3], %[t4], %[t2] \n\t"
  1120. "movn %[qc4], %[t4], %[t3] \n\t"
  1121. ".set pop \n\t"
  1122. : [qc1]"+r"(qc1), [qc2]"+r"(qc2),
  1123. [qc3]"+r"(qc3), [qc4]"+r"(qc4),
  1124. [t0]"=&r"(t0), [t1]"=&r"(t1), [t2]"=&r"(t2), [t3]"=&r"(t3),
  1125. [t4]"=&r"(t4)
  1126. );
  1127. curidx = 8 * qc1;
  1128. curidx += qc2;
  1129. curidx2 = 8 * qc3;
  1130. curidx2 += qc4;
  1131. curbits += p_bits[curidx] +
  1132. upair7_sign_bits[curidx] +
  1133. p_bits[curidx2] +
  1134. upair7_sign_bits[curidx2];
  1135. }
  1136. return curbits;
  1137. }
  1138. static float get_band_numbits_UPAIR12_mips(struct AACEncContext *s,
  1139. PutBitContext *pb, const float *in,
  1140. const float *scaled, int size, int scale_idx,
  1141. int cb, const float lambda, const float uplim,
  1142. int *bits)
  1143. {
  1144. const float Q34 = ff_aac_pow34sf_tab[POW_SF2_ZERO - scale_idx + SCALE_ONE_POS - SCALE_DIV_512];
  1145. int i;
  1146. int qc1, qc2, qc3, qc4;
  1147. int curbits = 0;
  1148. uint8_t *p_bits = (uint8_t *)ff_aac_spectral_bits[cb-1];
  1149. for (i = 0; i < size; i += 4) {
  1150. int curidx, curidx2;
  1151. int t0, t1, t2, t3, t4;
  1152. qc1 = scaled[i ] * Q34 + ROUND_STANDARD;
  1153. qc2 = scaled[i+1] * Q34 + ROUND_STANDARD;
  1154. qc3 = scaled[i+2] * Q34 + ROUND_STANDARD;
  1155. qc4 = scaled[i+3] * Q34 + ROUND_STANDARD;
  1156. __asm__ volatile (
  1157. ".set push \n\t"
  1158. ".set noreorder \n\t"
  1159. "ori %[t4], $zero, 12 \n\t"
  1160. "slt %[t0], %[t4], %[qc1] \n\t"
  1161. "slt %[t1], %[t4], %[qc2] \n\t"
  1162. "slt %[t2], %[t4], %[qc3] \n\t"
  1163. "slt %[t3], %[t4], %[qc4] \n\t"
  1164. "movn %[qc1], %[t4], %[t0] \n\t"
  1165. "movn %[qc2], %[t4], %[t1] \n\t"
  1166. "movn %[qc3], %[t4], %[t2] \n\t"
  1167. "movn %[qc4], %[t4], %[t3] \n\t"
  1168. ".set pop \n\t"
  1169. : [qc1]"+r"(qc1), [qc2]"+r"(qc2),
  1170. [qc3]"+r"(qc3), [qc4]"+r"(qc4),
  1171. [t0]"=&r"(t0), [t1]"=&r"(t1), [t2]"=&r"(t2), [t3]"=&r"(t3),
  1172. [t4]"=&r"(t4)
  1173. );
  1174. curidx = 13 * qc1;
  1175. curidx += qc2;
  1176. curidx2 = 13 * qc3;
  1177. curidx2 += qc4;
  1178. curbits += p_bits[curidx] +
  1179. p_bits[curidx2] +
  1180. upair12_sign_bits[curidx] +
  1181. upair12_sign_bits[curidx2];
  1182. }
  1183. return curbits;
  1184. }
  1185. static float get_band_numbits_ESC_mips(struct AACEncContext *s,
  1186. PutBitContext *pb, const float *in,
  1187. const float *scaled, int size, int scale_idx,
  1188. int cb, const float lambda, const float uplim,
  1189. int *bits)
  1190. {
  1191. const float Q34 = ff_aac_pow34sf_tab[POW_SF2_ZERO - scale_idx + SCALE_ONE_POS - SCALE_DIV_512];
  1192. int i;
  1193. int qc1, qc2, qc3, qc4;
  1194. int curbits = 0;
  1195. uint8_t *p_bits = (uint8_t*)ff_aac_spectral_bits[cb-1];
  1196. for (i = 0; i < size; i += 4) {
  1197. int curidx, curidx2;
  1198. int cond0, cond1, cond2, cond3;
  1199. int c1, c2, c3, c4;
  1200. int t4, t5;
  1201. qc1 = scaled[i ] * Q34 + ROUND_STANDARD;
  1202. qc2 = scaled[i+1] * Q34 + ROUND_STANDARD;
  1203. qc3 = scaled[i+2] * Q34 + ROUND_STANDARD;
  1204. qc4 = scaled[i+3] * Q34 + ROUND_STANDARD;
  1205. __asm__ volatile (
  1206. ".set push \n\t"
  1207. ".set noreorder \n\t"
  1208. "ori %[t4], $zero, 15 \n\t"
  1209. "ori %[t5], $zero, 16 \n\t"
  1210. "shll_s.w %[c1], %[qc1], 18 \n\t"
  1211. "shll_s.w %[c2], %[qc2], 18 \n\t"
  1212. "shll_s.w %[c3], %[qc3], 18 \n\t"
  1213. "shll_s.w %[c4], %[qc4], 18 \n\t"
  1214. "srl %[c1], %[c1], 18 \n\t"
  1215. "srl %[c2], %[c2], 18 \n\t"
  1216. "srl %[c3], %[c3], 18 \n\t"
  1217. "srl %[c4], %[c4], 18 \n\t"
  1218. "slt %[cond0], %[t4], %[qc1] \n\t"
  1219. "slt %[cond1], %[t4], %[qc2] \n\t"
  1220. "slt %[cond2], %[t4], %[qc3] \n\t"
  1221. "slt %[cond3], %[t4], %[qc4] \n\t"
  1222. "movn %[qc1], %[t5], %[cond0] \n\t"
  1223. "movn %[qc2], %[t5], %[cond1] \n\t"
  1224. "movn %[qc3], %[t5], %[cond2] \n\t"
  1225. "movn %[qc4], %[t5], %[cond3] \n\t"
  1226. "ori %[t5], $zero, 31 \n\t"
  1227. "clz %[c1], %[c1] \n\t"
  1228. "clz %[c2], %[c2] \n\t"
  1229. "clz %[c3], %[c3] \n\t"
  1230. "clz %[c4], %[c4] \n\t"
  1231. "subu %[c1], %[t5], %[c1] \n\t"
  1232. "subu %[c2], %[t5], %[c2] \n\t"
  1233. "subu %[c3], %[t5], %[c3] \n\t"
  1234. "subu %[c4], %[t5], %[c4] \n\t"
  1235. "sll %[c1], %[c1], 1 \n\t"
  1236. "sll %[c2], %[c2], 1 \n\t"
  1237. "sll %[c3], %[c3], 1 \n\t"
  1238. "sll %[c4], %[c4], 1 \n\t"
  1239. "addiu %[c1], %[c1], -3 \n\t"
  1240. "addiu %[c2], %[c2], -3 \n\t"
  1241. "addiu %[c3], %[c3], -3 \n\t"
  1242. "addiu %[c4], %[c4], -3 \n\t"
  1243. "subu %[cond0], $zero, %[cond0] \n\t"
  1244. "subu %[cond1], $zero, %[cond1] \n\t"
  1245. "subu %[cond2], $zero, %[cond2] \n\t"
  1246. "subu %[cond3], $zero, %[cond3] \n\t"
  1247. "and %[c1], %[c1], %[cond0] \n\t"
  1248. "and %[c2], %[c2], %[cond1] \n\t"
  1249. "and %[c3], %[c3], %[cond2] \n\t"
  1250. "and %[c4], %[c4], %[cond3] \n\t"
  1251. ".set pop \n\t"
  1252. : [qc1]"+r"(qc1), [qc2]"+r"(qc2),
  1253. [qc3]"+r"(qc3), [qc4]"+r"(qc4),
  1254. [cond0]"=&r"(cond0), [cond1]"=&r"(cond1),
  1255. [cond2]"=&r"(cond2), [cond3]"=&r"(cond3),
  1256. [c1]"=&r"(c1), [c2]"=&r"(c2),
  1257. [c3]"=&r"(c3), [c4]"=&r"(c4),
  1258. [t4]"=&r"(t4), [t5]"=&r"(t5)
  1259. );
  1260. curidx = 17 * qc1;
  1261. curidx += qc2;
  1262. curidx2 = 17 * qc3;
  1263. curidx2 += qc4;
  1264. curbits += p_bits[curidx];
  1265. curbits += esc_sign_bits[curidx];
  1266. curbits += p_bits[curidx2];
  1267. curbits += esc_sign_bits[curidx2];
  1268. curbits += c1;
  1269. curbits += c2;
  1270. curbits += c3;
  1271. curbits += c4;
  1272. }
  1273. return curbits;
  1274. }
  1275. static float (*const get_band_numbits_arr[])(struct AACEncContext *s,
  1276. PutBitContext *pb, const float *in,
  1277. const float *scaled, int size, int scale_idx,
  1278. int cb, const float lambda, const float uplim,
  1279. int *bits) = {
  1280. get_band_numbits_ZERO_mips,
  1281. get_band_numbits_SQUAD_mips,
  1282. get_band_numbits_SQUAD_mips,
  1283. get_band_numbits_UQUAD_mips,
  1284. get_band_numbits_UQUAD_mips,
  1285. get_band_numbits_SPAIR_mips,
  1286. get_band_numbits_SPAIR_mips,
  1287. get_band_numbits_UPAIR7_mips,
  1288. get_band_numbits_UPAIR7_mips,
  1289. get_band_numbits_UPAIR12_mips,
  1290. get_band_numbits_UPAIR12_mips,
  1291. get_band_numbits_ESC_mips,
  1292. get_band_numbits_NONE_mips, /* cb 12 doesn't exist */
  1293. get_band_numbits_ZERO_mips,
  1294. get_band_numbits_ZERO_mips,
  1295. get_band_numbits_ZERO_mips,
  1296. };
  1297. #define get_band_numbits( \
  1298. s, pb, in, scaled, size, scale_idx, cb, \
  1299. lambda, uplim, bits) \
  1300. get_band_numbits_arr[cb]( \
  1301. s, pb, in, scaled, size, scale_idx, cb, \
  1302. lambda, uplim, bits)
  1303. static float quantize_band_cost_bits(struct AACEncContext *s, const float *in,
  1304. const float *scaled, int size, int scale_idx,
  1305. int cb, const float lambda, const float uplim,
  1306. int *bits, float *energy, int rtz)
  1307. {
  1308. return get_band_numbits(s, NULL, in, scaled, size, scale_idx, cb, lambda, uplim, bits);
  1309. }
  1310. /**
  1311. * Functions developed from template function and optimized for getting the band cost
  1312. */
  1313. #if HAVE_MIPSFPU
  1314. static float get_band_cost_ZERO_mips(struct AACEncContext *s,
  1315. PutBitContext *pb, const float *in,
  1316. const float *scaled, int size, int scale_idx,
  1317. int cb, const float lambda, const float uplim,
  1318. int *bits, float *energy)
  1319. {
  1320. int i;
  1321. float cost = 0;
  1322. for (i = 0; i < size; i += 4) {
  1323. cost += in[i ] * in[i ];
  1324. cost += in[i+1] * in[i+1];
  1325. cost += in[i+2] * in[i+2];
  1326. cost += in[i+3] * in[i+3];
  1327. }
  1328. if (bits)
  1329. *bits = 0;
  1330. if (energy)
  1331. *energy = 0.0f;
  1332. return cost * lambda;
  1333. }
  1334. static float get_band_cost_NONE_mips(struct AACEncContext *s,
  1335. PutBitContext *pb, const float *in,
  1336. const float *scaled, int size, int scale_idx,
  1337. int cb, const float lambda, const float uplim,
  1338. int *bits, float *energy)
  1339. {
  1340. av_assert0(0);
  1341. return 0;
  1342. }
  1343. static float get_band_cost_SQUAD_mips(struct AACEncContext *s,
  1344. PutBitContext *pb, const float *in,
  1345. const float *scaled, int size, int scale_idx,
  1346. int cb, const float lambda, const float uplim,
  1347. int *bits, float *energy)
  1348. {
  1349. const float Q34 = ff_aac_pow34sf_tab[POW_SF2_ZERO - scale_idx + SCALE_ONE_POS - SCALE_DIV_512];
  1350. const float IQ = ff_aac_pow2sf_tab [POW_SF2_ZERO + scale_idx - SCALE_ONE_POS + SCALE_DIV_512];
  1351. int i;
  1352. float cost = 0;
  1353. float qenergy = 0.0f;
  1354. int qc1, qc2, qc3, qc4;
  1355. int curbits = 0;
  1356. uint8_t *p_bits = (uint8_t *)ff_aac_spectral_bits[cb-1];
  1357. float *p_codes = (float *)ff_aac_codebook_vectors[cb-1];
  1358. for (i = 0; i < size; i += 4) {
  1359. const float *vec;
  1360. int curidx;
  1361. int *in_int = (int *)&in[i];
  1362. float *in_pos = (float *)&in[i];
  1363. float di0, di1, di2, di3;
  1364. int t0, t1, t2, t3, t4, t5, t6, t7;
  1365. qc1 = scaled[i ] * Q34 + ROUND_STANDARD;
  1366. qc2 = scaled[i+1] * Q34 + ROUND_STANDARD;
  1367. qc3 = scaled[i+2] * Q34 + ROUND_STANDARD;
  1368. qc4 = scaled[i+3] * Q34 + ROUND_STANDARD;
  1369. __asm__ volatile (
  1370. ".set push \n\t"
  1371. ".set noreorder \n\t"
  1372. "slt %[qc1], $zero, %[qc1] \n\t"
  1373. "slt %[qc2], $zero, %[qc2] \n\t"
  1374. "slt %[qc3], $zero, %[qc3] \n\t"
  1375. "slt %[qc4], $zero, %[qc4] \n\t"
  1376. "lw %[t0], 0(%[in_int]) \n\t"
  1377. "lw %[t1], 4(%[in_int]) \n\t"
  1378. "lw %[t2], 8(%[in_int]) \n\t"
  1379. "lw %[t3], 12(%[in_int]) \n\t"
  1380. "srl %[t0], %[t0], 31 \n\t"
  1381. "srl %[t1], %[t1], 31 \n\t"
  1382. "srl %[t2], %[t2], 31 \n\t"
  1383. "srl %[t3], %[t3], 31 \n\t"
  1384. "subu %[t4], $zero, %[qc1] \n\t"
  1385. "subu %[t5], $zero, %[qc2] \n\t"
  1386. "subu %[t6], $zero, %[qc3] \n\t"
  1387. "subu %[t7], $zero, %[qc4] \n\t"
  1388. "movn %[qc1], %[t4], %[t0] \n\t"
  1389. "movn %[qc2], %[t5], %[t1] \n\t"
  1390. "movn %[qc3], %[t6], %[t2] \n\t"
  1391. "movn %[qc4], %[t7], %[t3] \n\t"
  1392. ".set pop \n\t"
  1393. : [qc1]"+r"(qc1), [qc2]"+r"(qc2),
  1394. [qc3]"+r"(qc3), [qc4]"+r"(qc4),
  1395. [t0]"=&r"(t0), [t1]"=&r"(t1), [t2]"=&r"(t2), [t3]"=&r"(t3),
  1396. [t4]"=&r"(t4), [t5]"=&r"(t5), [t6]"=&r"(t6), [t7]"=&r"(t7)
  1397. : [in_int]"r"(in_int)
  1398. : "memory"
  1399. );
  1400. curidx = qc1;
  1401. curidx *= 3;
  1402. curidx += qc2;
  1403. curidx *= 3;
  1404. curidx += qc3;
  1405. curidx *= 3;
  1406. curidx += qc4;
  1407. curidx += 40;
  1408. curbits += p_bits[curidx];
  1409. vec = &p_codes[curidx*4];
  1410. qenergy += vec[0]*vec[0] + vec[1]*vec[1]
  1411. + vec[2]*vec[2] + vec[3]*vec[3];
  1412. __asm__ volatile (
  1413. ".set push \n\t"
  1414. ".set noreorder \n\t"
  1415. "lwc1 $f0, 0(%[in_pos]) \n\t"
  1416. "lwc1 $f1, 0(%[vec]) \n\t"
  1417. "lwc1 $f2, 4(%[in_pos]) \n\t"
  1418. "lwc1 $f3, 4(%[vec]) \n\t"
  1419. "lwc1 $f4, 8(%[in_pos]) \n\t"
  1420. "lwc1 $f5, 8(%[vec]) \n\t"
  1421. "lwc1 $f6, 12(%[in_pos]) \n\t"
  1422. "lwc1 $f7, 12(%[vec]) \n\t"
  1423. "nmsub.s %[di0], $f0, $f1, %[IQ] \n\t"
  1424. "nmsub.s %[di1], $f2, $f3, %[IQ] \n\t"
  1425. "nmsub.s %[di2], $f4, $f5, %[IQ] \n\t"
  1426. "nmsub.s %[di3], $f6, $f7, %[IQ] \n\t"
  1427. ".set pop \n\t"
  1428. : [di0]"=&f"(di0), [di1]"=&f"(di1),
  1429. [di2]"=&f"(di2), [di3]"=&f"(di3)
  1430. : [in_pos]"r"(in_pos), [vec]"r"(vec),
  1431. [IQ]"f"(IQ)
  1432. : "$f0", "$f1", "$f2", "$f3",
  1433. "$f4", "$f5", "$f6", "$f7",
  1434. "memory"
  1435. );
  1436. cost += di0 * di0 + di1 * di1
  1437. + di2 * di2 + di3 * di3;
  1438. }
  1439. if (bits)
  1440. *bits = curbits;
  1441. if (energy)
  1442. *energy = qenergy * (IQ*IQ);
  1443. return cost * lambda + curbits;
  1444. }
  1445. static float get_band_cost_UQUAD_mips(struct AACEncContext *s,
  1446. PutBitContext *pb, const float *in,
  1447. const float *scaled, int size, int scale_idx,
  1448. int cb, const float lambda, const float uplim,
  1449. int *bits, float *energy)
  1450. {
  1451. const float Q34 = ff_aac_pow34sf_tab[POW_SF2_ZERO - scale_idx + SCALE_ONE_POS - SCALE_DIV_512];
  1452. const float IQ = ff_aac_pow2sf_tab [POW_SF2_ZERO + scale_idx - SCALE_ONE_POS + SCALE_DIV_512];
  1453. int i;
  1454. float cost = 0;
  1455. float qenergy = 0.0f;
  1456. int curbits = 0;
  1457. int qc1, qc2, qc3, qc4;
  1458. uint8_t *p_bits = (uint8_t*)ff_aac_spectral_bits[cb-1];
  1459. float *p_codes = (float *)ff_aac_codebook_vectors[cb-1];
  1460. for (i = 0; i < size; i += 4) {
  1461. const float *vec;
  1462. int curidx;
  1463. float *in_pos = (float *)&in[i];
  1464. float di0, di1, di2, di3;
  1465. int t0, t1, t2, t3, t4;
  1466. qc1 = scaled[i ] * Q34 + ROUND_STANDARD;
  1467. qc2 = scaled[i+1] * Q34 + ROUND_STANDARD;
  1468. qc3 = scaled[i+2] * Q34 + ROUND_STANDARD;
  1469. qc4 = scaled[i+3] * Q34 + ROUND_STANDARD;
  1470. __asm__ volatile (
  1471. ".set push \n\t"
  1472. ".set noreorder \n\t"
  1473. "ori %[t4], $zero, 2 \n\t"
  1474. "slt %[t0], %[t4], %[qc1] \n\t"
  1475. "slt %[t1], %[t4], %[qc2] \n\t"
  1476. "slt %[t2], %[t4], %[qc3] \n\t"
  1477. "slt %[t3], %[t4], %[qc4] \n\t"
  1478. "movn %[qc1], %[t4], %[t0] \n\t"
  1479. "movn %[qc2], %[t4], %[t1] \n\t"
  1480. "movn %[qc3], %[t4], %[t2] \n\t"
  1481. "movn %[qc4], %[t4], %[t3] \n\t"
  1482. ".set pop \n\t"
  1483. : [qc1]"+r"(qc1), [qc2]"+r"(qc2),
  1484. [qc3]"+r"(qc3), [qc4]"+r"(qc4),
  1485. [t0]"=&r"(t0), [t1]"=&r"(t1), [t2]"=&r"(t2), [t3]"=&r"(t3),
  1486. [t4]"=&r"(t4)
  1487. );
  1488. curidx = qc1;
  1489. curidx *= 3;
  1490. curidx += qc2;
  1491. curidx *= 3;
  1492. curidx += qc3;
  1493. curidx *= 3;
  1494. curidx += qc4;
  1495. curbits += p_bits[curidx];
  1496. curbits += uquad_sign_bits[curidx];
  1497. vec = &p_codes[curidx*4];
  1498. qenergy += vec[0]*vec[0] + vec[1]*vec[1]
  1499. + vec[2]*vec[2] + vec[3]*vec[3];
  1500. __asm__ volatile (
  1501. ".set push \n\t"
  1502. ".set noreorder \n\t"
  1503. "lwc1 %[di0], 0(%[in_pos]) \n\t"
  1504. "lwc1 %[di1], 4(%[in_pos]) \n\t"
  1505. "lwc1 %[di2], 8(%[in_pos]) \n\t"
  1506. "lwc1 %[di3], 12(%[in_pos]) \n\t"
  1507. "abs.s %[di0], %[di0] \n\t"
  1508. "abs.s %[di1], %[di1] \n\t"
  1509. "abs.s %[di2], %[di2] \n\t"
  1510. "abs.s %[di3], %[di3] \n\t"
  1511. "lwc1 $f0, 0(%[vec]) \n\t"
  1512. "lwc1 $f1, 4(%[vec]) \n\t"
  1513. "lwc1 $f2, 8(%[vec]) \n\t"
  1514. "lwc1 $f3, 12(%[vec]) \n\t"
  1515. "nmsub.s %[di0], %[di0], $f0, %[IQ] \n\t"
  1516. "nmsub.s %[di1], %[di1], $f1, %[IQ] \n\t"
  1517. "nmsub.s %[di2], %[di2], $f2, %[IQ] \n\t"
  1518. "nmsub.s %[di3], %[di3], $f3, %[IQ] \n\t"
  1519. ".set pop \n\t"
  1520. : [di0]"=&f"(di0), [di1]"=&f"(di1),
  1521. [di2]"=&f"(di2), [di3]"=&f"(di3)
  1522. : [in_pos]"r"(in_pos), [vec]"r"(vec),
  1523. [IQ]"f"(IQ)
  1524. : "$f0", "$f1", "$f2", "$f3",
  1525. "memory"
  1526. );
  1527. cost += di0 * di0 + di1 * di1
  1528. + di2 * di2 + di3 * di3;
  1529. }
  1530. if (bits)
  1531. *bits = curbits;
  1532. if (energy)
  1533. *energy = qenergy * (IQ*IQ);
  1534. return cost * lambda + curbits;
  1535. }
  1536. static float get_band_cost_SPAIR_mips(struct AACEncContext *s,
  1537. PutBitContext *pb, const float *in,
  1538. const float *scaled, int size, int scale_idx,
  1539. int cb, const float lambda, const float uplim,
  1540. int *bits, float *energy)
  1541. {
  1542. const float Q34 = ff_aac_pow34sf_tab[POW_SF2_ZERO - scale_idx + SCALE_ONE_POS - SCALE_DIV_512];
  1543. const float IQ = ff_aac_pow2sf_tab [POW_SF2_ZERO + scale_idx - SCALE_ONE_POS + SCALE_DIV_512];
  1544. int i;
  1545. float cost = 0;
  1546. float qenergy = 0.0f;
  1547. int qc1, qc2, qc3, qc4;
  1548. int curbits = 0;
  1549. uint8_t *p_bits = (uint8_t *)ff_aac_spectral_bits[cb-1];
  1550. float *p_codes = (float *)ff_aac_codebook_vectors[cb-1];
  1551. for (i = 0; i < size; i += 4) {
  1552. const float *vec, *vec2;
  1553. int curidx, curidx2;
  1554. int *in_int = (int *)&in[i];
  1555. float *in_pos = (float *)&in[i];
  1556. float di0, di1, di2, di3;
  1557. int t0, t1, t2, t3, t4, t5, t6, t7;
  1558. qc1 = scaled[i ] * Q34 + ROUND_STANDARD;
  1559. qc2 = scaled[i+1] * Q34 + ROUND_STANDARD;
  1560. qc3 = scaled[i+2] * Q34 + ROUND_STANDARD;
  1561. qc4 = scaled[i+3] * Q34 + ROUND_STANDARD;
  1562. __asm__ volatile (
  1563. ".set push \n\t"
  1564. ".set noreorder \n\t"
  1565. "ori %[t4], $zero, 4 \n\t"
  1566. "slt %[t0], %[t4], %[qc1] \n\t"
  1567. "slt %[t1], %[t4], %[qc2] \n\t"
  1568. "slt %[t2], %[t4], %[qc3] \n\t"
  1569. "slt %[t3], %[t4], %[qc4] \n\t"
  1570. "movn %[qc1], %[t4], %[t0] \n\t"
  1571. "movn %[qc2], %[t4], %[t1] \n\t"
  1572. "movn %[qc3], %[t4], %[t2] \n\t"
  1573. "movn %[qc4], %[t4], %[t3] \n\t"
  1574. "lw %[t0], 0(%[in_int]) \n\t"
  1575. "lw %[t1], 4(%[in_int]) \n\t"
  1576. "lw %[t2], 8(%[in_int]) \n\t"
  1577. "lw %[t3], 12(%[in_int]) \n\t"
  1578. "srl %[t0], %[t0], 31 \n\t"
  1579. "srl %[t1], %[t1], 31 \n\t"
  1580. "srl %[t2], %[t2], 31 \n\t"
  1581. "srl %[t3], %[t3], 31 \n\t"
  1582. "subu %[t4], $zero, %[qc1] \n\t"
  1583. "subu %[t5], $zero, %[qc2] \n\t"
  1584. "subu %[t6], $zero, %[qc3] \n\t"
  1585. "subu %[t7], $zero, %[qc4] \n\t"
  1586. "movn %[qc1], %[t4], %[t0] \n\t"
  1587. "movn %[qc2], %[t5], %[t1] \n\t"
  1588. "movn %[qc3], %[t6], %[t2] \n\t"
  1589. "movn %[qc4], %[t7], %[t3] \n\t"
  1590. ".set pop \n\t"
  1591. : [qc1]"+r"(qc1), [qc2]"+r"(qc2),
  1592. [qc3]"+r"(qc3), [qc4]"+r"(qc4),
  1593. [t0]"=&r"(t0), [t1]"=&r"(t1), [t2]"=&r"(t2), [t3]"=&r"(t3),
  1594. [t4]"=&r"(t4), [t5]"=&r"(t5), [t6]"=&r"(t6), [t7]"=&r"(t7)
  1595. : [in_int]"r"(in_int)
  1596. : "memory"
  1597. );
  1598. curidx = 9 * qc1;
  1599. curidx += qc2 + 40;
  1600. curidx2 = 9 * qc3;
  1601. curidx2 += qc4 + 40;
  1602. curbits += p_bits[curidx];
  1603. curbits += p_bits[curidx2];
  1604. vec = &p_codes[curidx*2];
  1605. vec2 = &p_codes[curidx2*2];
  1606. qenergy += vec[0]*vec[0] + vec[1]*vec[1]
  1607. + vec2[0]*vec2[0] + vec2[1]*vec2[1];
  1608. __asm__ volatile (
  1609. ".set push \n\t"
  1610. ".set noreorder \n\t"
  1611. "lwc1 $f0, 0(%[in_pos]) \n\t"
  1612. "lwc1 $f1, 0(%[vec]) \n\t"
  1613. "lwc1 $f2, 4(%[in_pos]) \n\t"
  1614. "lwc1 $f3, 4(%[vec]) \n\t"
  1615. "lwc1 $f4, 8(%[in_pos]) \n\t"
  1616. "lwc1 $f5, 0(%[vec2]) \n\t"
  1617. "lwc1 $f6, 12(%[in_pos]) \n\t"
  1618. "lwc1 $f7, 4(%[vec2]) \n\t"
  1619. "nmsub.s %[di0], $f0, $f1, %[IQ] \n\t"
  1620. "nmsub.s %[di1], $f2, $f3, %[IQ] \n\t"
  1621. "nmsub.s %[di2], $f4, $f5, %[IQ] \n\t"
  1622. "nmsub.s %[di3], $f6, $f7, %[IQ] \n\t"
  1623. ".set pop \n\t"
  1624. : [di0]"=&f"(di0), [di1]"=&f"(di1),
  1625. [di2]"=&f"(di2), [di3]"=&f"(di3)
  1626. : [in_pos]"r"(in_pos), [vec]"r"(vec),
  1627. [vec2]"r"(vec2), [IQ]"f"(IQ)
  1628. : "$f0", "$f1", "$f2", "$f3",
  1629. "$f4", "$f5", "$f6", "$f7",
  1630. "memory"
  1631. );
  1632. cost += di0 * di0 + di1 * di1
  1633. + di2 * di2 + di3 * di3;
  1634. }
  1635. if (bits)
  1636. *bits = curbits;
  1637. if (energy)
  1638. *energy = qenergy * (IQ*IQ);
  1639. return cost * lambda + curbits;
  1640. }
  1641. static float get_band_cost_UPAIR7_mips(struct AACEncContext *s,
  1642. PutBitContext *pb, const float *in,
  1643. const float *scaled, int size, int scale_idx,
  1644. int cb, const float lambda, const float uplim,
  1645. int *bits, float *energy)
  1646. {
  1647. const float Q34 = ff_aac_pow34sf_tab[POW_SF2_ZERO - scale_idx + SCALE_ONE_POS - SCALE_DIV_512];
  1648. const float IQ = ff_aac_pow2sf_tab [POW_SF2_ZERO + scale_idx - SCALE_ONE_POS + SCALE_DIV_512];
  1649. int i;
  1650. float cost = 0;
  1651. float qenergy = 0.0f;
  1652. int qc1, qc2, qc3, qc4;
  1653. int curbits = 0;
  1654. uint8_t *p_bits = (uint8_t *)ff_aac_spectral_bits[cb-1];
  1655. float *p_codes = (float *)ff_aac_codebook_vectors[cb-1];
  1656. for (i = 0; i < size; i += 4) {
  1657. const float *vec, *vec2;
  1658. int curidx, curidx2, sign1, count1, sign2, count2;
  1659. int *in_int = (int *)&in[i];
  1660. float *in_pos = (float *)&in[i];
  1661. float di0, di1, di2, di3;
  1662. int t0, t1, t2, t3, t4;
  1663. qc1 = scaled[i ] * Q34 + ROUND_STANDARD;
  1664. qc2 = scaled[i+1] * Q34 + ROUND_STANDARD;
  1665. qc3 = scaled[i+2] * Q34 + ROUND_STANDARD;
  1666. qc4 = scaled[i+3] * Q34 + ROUND_STANDARD;
  1667. __asm__ volatile (
  1668. ".set push \n\t"
  1669. ".set noreorder \n\t"
  1670. "ori %[t4], $zero, 7 \n\t"
  1671. "ori %[sign1], $zero, 0 \n\t"
  1672. "ori %[sign2], $zero, 0 \n\t"
  1673. "slt %[t0], %[t4], %[qc1] \n\t"
  1674. "slt %[t1], %[t4], %[qc2] \n\t"
  1675. "slt %[t2], %[t4], %[qc3] \n\t"
  1676. "slt %[t3], %[t4], %[qc4] \n\t"
  1677. "movn %[qc1], %[t4], %[t0] \n\t"
  1678. "movn %[qc2], %[t4], %[t1] \n\t"
  1679. "movn %[qc3], %[t4], %[t2] \n\t"
  1680. "movn %[qc4], %[t4], %[t3] \n\t"
  1681. "lw %[t0], 0(%[in_int]) \n\t"
  1682. "lw %[t1], 4(%[in_int]) \n\t"
  1683. "lw %[t2], 8(%[in_int]) \n\t"
  1684. "lw %[t3], 12(%[in_int]) \n\t"
  1685. "slt %[t0], %[t0], $zero \n\t"
  1686. "movn %[sign1], %[t0], %[qc1] \n\t"
  1687. "slt %[t2], %[t2], $zero \n\t"
  1688. "movn %[sign2], %[t2], %[qc3] \n\t"
  1689. "slt %[t1], %[t1], $zero \n\t"
  1690. "sll %[t0], %[sign1], 1 \n\t"
  1691. "or %[t0], %[t0], %[t1] \n\t"
  1692. "movn %[sign1], %[t0], %[qc2] \n\t"
  1693. "slt %[t3], %[t3], $zero \n\t"
  1694. "sll %[t0], %[sign2], 1 \n\t"
  1695. "or %[t0], %[t0], %[t3] \n\t"
  1696. "movn %[sign2], %[t0], %[qc4] \n\t"
  1697. "slt %[count1], $zero, %[qc1] \n\t"
  1698. "slt %[t1], $zero, %[qc2] \n\t"
  1699. "slt %[count2], $zero, %[qc3] \n\t"
  1700. "slt %[t2], $zero, %[qc4] \n\t"
  1701. "addu %[count1], %[count1], %[t1] \n\t"
  1702. "addu %[count2], %[count2], %[t2] \n\t"
  1703. ".set pop \n\t"
  1704. : [qc1]"+r"(qc1), [qc2]"+r"(qc2),
  1705. [qc3]"+r"(qc3), [qc4]"+r"(qc4),
  1706. [sign1]"=&r"(sign1), [count1]"=&r"(count1),
  1707. [sign2]"=&r"(sign2), [count2]"=&r"(count2),
  1708. [t0]"=&r"(t0), [t1]"=&r"(t1), [t2]"=&r"(t2), [t3]"=&r"(t3),
  1709. [t4]"=&r"(t4)
  1710. : [in_int]"r"(in_int)
  1711. : "memory"
  1712. );
  1713. curidx = 8 * qc1;
  1714. curidx += qc2;
  1715. curidx2 = 8 * qc3;
  1716. curidx2 += qc4;
  1717. curbits += p_bits[curidx];
  1718. curbits += upair7_sign_bits[curidx];
  1719. vec = &p_codes[curidx*2];
  1720. curbits += p_bits[curidx2];
  1721. curbits += upair7_sign_bits[curidx2];
  1722. vec2 = &p_codes[curidx2*2];
  1723. qenergy += vec[0]*vec[0] + vec[1]*vec[1]
  1724. + vec2[0]*vec2[0] + vec2[1]*vec2[1];
  1725. __asm__ volatile (
  1726. ".set push \n\t"
  1727. ".set noreorder \n\t"
  1728. "lwc1 %[di0], 0(%[in_pos]) \n\t"
  1729. "lwc1 %[di1], 4(%[in_pos]) \n\t"
  1730. "lwc1 %[di2], 8(%[in_pos]) \n\t"
  1731. "lwc1 %[di3], 12(%[in_pos]) \n\t"
  1732. "abs.s %[di0], %[di0] \n\t"
  1733. "abs.s %[di1], %[di1] \n\t"
  1734. "abs.s %[di2], %[di2] \n\t"
  1735. "abs.s %[di3], %[di3] \n\t"
  1736. "lwc1 $f0, 0(%[vec]) \n\t"
  1737. "lwc1 $f1, 4(%[vec]) \n\t"
  1738. "lwc1 $f2, 0(%[vec2]) \n\t"
  1739. "lwc1 $f3, 4(%[vec2]) \n\t"
  1740. "nmsub.s %[di0], %[di0], $f0, %[IQ] \n\t"
  1741. "nmsub.s %[di1], %[di1], $f1, %[IQ] \n\t"
  1742. "nmsub.s %[di2], %[di2], $f2, %[IQ] \n\t"
  1743. "nmsub.s %[di3], %[di3], $f3, %[IQ] \n\t"
  1744. ".set pop \n\t"
  1745. : [di0]"=&f"(di0), [di1]"=&f"(di1),
  1746. [di2]"=&f"(di2), [di3]"=&f"(di3)
  1747. : [in_pos]"r"(in_pos), [vec]"r"(vec),
  1748. [vec2]"r"(vec2), [IQ]"f"(IQ)
  1749. : "$f0", "$f1", "$f2", "$f3",
  1750. "memory"
  1751. );
  1752. cost += di0 * di0 + di1 * di1
  1753. + di2 * di2 + di3 * di3;
  1754. }
  1755. if (bits)
  1756. *bits = curbits;
  1757. if (energy)
  1758. *energy = qenergy * (IQ*IQ);
  1759. return cost * lambda + curbits;
  1760. }
  1761. static float get_band_cost_UPAIR12_mips(struct AACEncContext *s,
  1762. PutBitContext *pb, const float *in,
  1763. const float *scaled, int size, int scale_idx,
  1764. int cb, const float lambda, const float uplim,
  1765. int *bits, float *energy)
  1766. {
  1767. const float Q34 = ff_aac_pow34sf_tab[POW_SF2_ZERO - scale_idx + SCALE_ONE_POS - SCALE_DIV_512];
  1768. const float IQ = ff_aac_pow2sf_tab [POW_SF2_ZERO + scale_idx - SCALE_ONE_POS + SCALE_DIV_512];
  1769. int i;
  1770. float cost = 0;
  1771. float qenergy = 0.0f;
  1772. int qc1, qc2, qc3, qc4;
  1773. int curbits = 0;
  1774. uint8_t *p_bits = (uint8_t *)ff_aac_spectral_bits[cb-1];
  1775. float *p_codes = (float *)ff_aac_codebook_vectors[cb-1];
  1776. for (i = 0; i < size; i += 4) {
  1777. const float *vec, *vec2;
  1778. int curidx, curidx2;
  1779. int sign1, count1, sign2, count2;
  1780. int *in_int = (int *)&in[i];
  1781. float *in_pos = (float *)&in[i];
  1782. float di0, di1, di2, di3;
  1783. int t0, t1, t2, t3, t4;
  1784. qc1 = scaled[i ] * Q34 + ROUND_STANDARD;
  1785. qc2 = scaled[i+1] * Q34 + ROUND_STANDARD;
  1786. qc3 = scaled[i+2] * Q34 + ROUND_STANDARD;
  1787. qc4 = scaled[i+3] * Q34 + ROUND_STANDARD;
  1788. __asm__ volatile (
  1789. ".set push \n\t"
  1790. ".set noreorder \n\t"
  1791. "ori %[t4], $zero, 12 \n\t"
  1792. "ori %[sign1], $zero, 0 \n\t"
  1793. "ori %[sign2], $zero, 0 \n\t"
  1794. "slt %[t0], %[t4], %[qc1] \n\t"
  1795. "slt %[t1], %[t4], %[qc2] \n\t"
  1796. "slt %[t2], %[t4], %[qc3] \n\t"
  1797. "slt %[t3], %[t4], %[qc4] \n\t"
  1798. "movn %[qc1], %[t4], %[t0] \n\t"
  1799. "movn %[qc2], %[t4], %[t1] \n\t"
  1800. "movn %[qc3], %[t4], %[t2] \n\t"
  1801. "movn %[qc4], %[t4], %[t3] \n\t"
  1802. "lw %[t0], 0(%[in_int]) \n\t"
  1803. "lw %[t1], 4(%[in_int]) \n\t"
  1804. "lw %[t2], 8(%[in_int]) \n\t"
  1805. "lw %[t3], 12(%[in_int]) \n\t"
  1806. "slt %[t0], %[t0], $zero \n\t"
  1807. "movn %[sign1], %[t0], %[qc1] \n\t"
  1808. "slt %[t2], %[t2], $zero \n\t"
  1809. "movn %[sign2], %[t2], %[qc3] \n\t"
  1810. "slt %[t1], %[t1], $zero \n\t"
  1811. "sll %[t0], %[sign1], 1 \n\t"
  1812. "or %[t0], %[t0], %[t1] \n\t"
  1813. "movn %[sign1], %[t0], %[qc2] \n\t"
  1814. "slt %[t3], %[t3], $zero \n\t"
  1815. "sll %[t0], %[sign2], 1 \n\t"
  1816. "or %[t0], %[t0], %[t3] \n\t"
  1817. "movn %[sign2], %[t0], %[qc4] \n\t"
  1818. "slt %[count1], $zero, %[qc1] \n\t"
  1819. "slt %[t1], $zero, %[qc2] \n\t"
  1820. "slt %[count2], $zero, %[qc3] \n\t"
  1821. "slt %[t2], $zero, %[qc4] \n\t"
  1822. "addu %[count1], %[count1], %[t1] \n\t"
  1823. "addu %[count2], %[count2], %[t2] \n\t"
  1824. ".set pop \n\t"
  1825. : [qc1]"+r"(qc1), [qc2]"+r"(qc2),
  1826. [qc3]"+r"(qc3), [qc4]"+r"(qc4),
  1827. [sign1]"=&r"(sign1), [count1]"=&r"(count1),
  1828. [sign2]"=&r"(sign2), [count2]"=&r"(count2),
  1829. [t0]"=&r"(t0), [t1]"=&r"(t1), [t2]"=&r"(t2), [t3]"=&r"(t3),
  1830. [t4]"=&r"(t4)
  1831. : [in_int]"r"(in_int)
  1832. : "memory"
  1833. );
  1834. curidx = 13 * qc1;
  1835. curidx += qc2;
  1836. curidx2 = 13 * qc3;
  1837. curidx2 += qc4;
  1838. curbits += p_bits[curidx];
  1839. curbits += p_bits[curidx2];
  1840. curbits += upair12_sign_bits[curidx];
  1841. curbits += upair12_sign_bits[curidx2];
  1842. vec = &p_codes[curidx*2];
  1843. vec2 = &p_codes[curidx2*2];
  1844. qenergy += vec[0]*vec[0] + vec[1]*vec[1]
  1845. + vec2[0]*vec2[0] + vec2[1]*vec2[1];
  1846. __asm__ volatile (
  1847. ".set push \n\t"
  1848. ".set noreorder \n\t"
  1849. "lwc1 %[di0], 0(%[in_pos]) \n\t"
  1850. "lwc1 %[di1], 4(%[in_pos]) \n\t"
  1851. "lwc1 %[di2], 8(%[in_pos]) \n\t"
  1852. "lwc1 %[di3], 12(%[in_pos]) \n\t"
  1853. "abs.s %[di0], %[di0] \n\t"
  1854. "abs.s %[di1], %[di1] \n\t"
  1855. "abs.s %[di2], %[di2] \n\t"
  1856. "abs.s %[di3], %[di3] \n\t"
  1857. "lwc1 $f0, 0(%[vec]) \n\t"
  1858. "lwc1 $f1, 4(%[vec]) \n\t"
  1859. "lwc1 $f2, 0(%[vec2]) \n\t"
  1860. "lwc1 $f3, 4(%[vec2]) \n\t"
  1861. "nmsub.s %[di0], %[di0], $f0, %[IQ] \n\t"
  1862. "nmsub.s %[di1], %[di1], $f1, %[IQ] \n\t"
  1863. "nmsub.s %[di2], %[di2], $f2, %[IQ] \n\t"
  1864. "nmsub.s %[di3], %[di3], $f3, %[IQ] \n\t"
  1865. ".set pop \n\t"
  1866. : [di0]"=&f"(di0), [di1]"=&f"(di1),
  1867. [di2]"=&f"(di2), [di3]"=&f"(di3)
  1868. : [in_pos]"r"(in_pos), [vec]"r"(vec),
  1869. [vec2]"r"(vec2), [IQ]"f"(IQ)
  1870. : "$f0", "$f1", "$f2", "$f3",
  1871. "memory"
  1872. );
  1873. cost += di0 * di0 + di1 * di1
  1874. + di2 * di2 + di3 * di3;
  1875. }
  1876. if (bits)
  1877. *bits = curbits;
  1878. if (energy)
  1879. *energy = qenergy * (IQ*IQ);
  1880. return cost * lambda + curbits;
  1881. }
  1882. static float get_band_cost_ESC_mips(struct AACEncContext *s,
  1883. PutBitContext *pb, const float *in,
  1884. const float *scaled, int size, int scale_idx,
  1885. int cb, const float lambda, const float uplim,
  1886. int *bits, float *energy)
  1887. {
  1888. const float Q34 = ff_aac_pow34sf_tab[POW_SF2_ZERO - scale_idx + SCALE_ONE_POS - SCALE_DIV_512];
  1889. const float IQ = ff_aac_pow2sf_tab [POW_SF2_ZERO + scale_idx - SCALE_ONE_POS + SCALE_DIV_512];
  1890. const float CLIPPED_ESCAPE = 165140.0f * IQ;
  1891. int i;
  1892. float cost = 0;
  1893. float qenergy = 0.0f;
  1894. int qc1, qc2, qc3, qc4;
  1895. int curbits = 0;
  1896. uint8_t *p_bits = (uint8_t*)ff_aac_spectral_bits[cb-1];
  1897. float *p_codes = (float* )ff_aac_codebook_vectors[cb-1];
  1898. for (i = 0; i < size; i += 4) {
  1899. const float *vec, *vec2;
  1900. int curidx, curidx2;
  1901. float t1, t2, t3, t4, V;
  1902. float di1, di2, di3, di4;
  1903. int cond0, cond1, cond2, cond3;
  1904. int c1, c2, c3, c4;
  1905. int t6, t7;
  1906. qc1 = scaled[i ] * Q34 + ROUND_STANDARD;
  1907. qc2 = scaled[i+1] * Q34 + ROUND_STANDARD;
  1908. qc3 = scaled[i+2] * Q34 + ROUND_STANDARD;
  1909. qc4 = scaled[i+3] * Q34 + ROUND_STANDARD;
  1910. __asm__ volatile (
  1911. ".set push \n\t"
  1912. ".set noreorder \n\t"
  1913. "ori %[t6], $zero, 15 \n\t"
  1914. "ori %[t7], $zero, 16 \n\t"
  1915. "shll_s.w %[c1], %[qc1], 18 \n\t"
  1916. "shll_s.w %[c2], %[qc2], 18 \n\t"
  1917. "shll_s.w %[c3], %[qc3], 18 \n\t"
  1918. "shll_s.w %[c4], %[qc4], 18 \n\t"
  1919. "srl %[c1], %[c1], 18 \n\t"
  1920. "srl %[c2], %[c2], 18 \n\t"
  1921. "srl %[c3], %[c3], 18 \n\t"
  1922. "srl %[c4], %[c4], 18 \n\t"
  1923. "slt %[cond0], %[t6], %[qc1] \n\t"
  1924. "slt %[cond1], %[t6], %[qc2] \n\t"
  1925. "slt %[cond2], %[t6], %[qc3] \n\t"
  1926. "slt %[cond3], %[t6], %[qc4] \n\t"
  1927. "movn %[qc1], %[t7], %[cond0] \n\t"
  1928. "movn %[qc2], %[t7], %[cond1] \n\t"
  1929. "movn %[qc3], %[t7], %[cond2] \n\t"
  1930. "movn %[qc4], %[t7], %[cond3] \n\t"
  1931. ".set pop \n\t"
  1932. : [qc1]"+r"(qc1), [qc2]"+r"(qc2),
  1933. [qc3]"+r"(qc3), [qc4]"+r"(qc4),
  1934. [cond0]"=&r"(cond0), [cond1]"=&r"(cond1),
  1935. [cond2]"=&r"(cond2), [cond3]"=&r"(cond3),
  1936. [c1]"=&r"(c1), [c2]"=&r"(c2),
  1937. [c3]"=&r"(c3), [c4]"=&r"(c4),
  1938. [t6]"=&r"(t6), [t7]"=&r"(t7)
  1939. );
  1940. curidx = 17 * qc1;
  1941. curidx += qc2;
  1942. curidx2 = 17 * qc3;
  1943. curidx2 += qc4;
  1944. curbits += p_bits[curidx];
  1945. curbits += esc_sign_bits[curidx];
  1946. vec = &p_codes[curidx*2];
  1947. curbits += p_bits[curidx2];
  1948. curbits += esc_sign_bits[curidx2];
  1949. vec2 = &p_codes[curidx2*2];
  1950. curbits += (av_log2(c1) * 2 - 3) & (-cond0);
  1951. curbits += (av_log2(c2) * 2 - 3) & (-cond1);
  1952. curbits += (av_log2(c3) * 2 - 3) & (-cond2);
  1953. curbits += (av_log2(c4) * 2 - 3) & (-cond3);
  1954. t1 = fabsf(in[i ]);
  1955. t2 = fabsf(in[i+1]);
  1956. t3 = fabsf(in[i+2]);
  1957. t4 = fabsf(in[i+3]);
  1958. if (cond0) {
  1959. if (t1 >= CLIPPED_ESCAPE) {
  1960. di1 = t1 - CLIPPED_ESCAPE;
  1961. qenergy += CLIPPED_ESCAPE*CLIPPED_ESCAPE;
  1962. } else {
  1963. di1 = t1 - (V = c1 * cbrtf(c1) * IQ);
  1964. qenergy += V*V;
  1965. }
  1966. } else {
  1967. di1 = t1 - (V = vec[0] * IQ);
  1968. qenergy += V*V;
  1969. }
  1970. if (cond1) {
  1971. if (t2 >= CLIPPED_ESCAPE) {
  1972. di2 = t2 - CLIPPED_ESCAPE;
  1973. qenergy += CLIPPED_ESCAPE*CLIPPED_ESCAPE;
  1974. } else {
  1975. di2 = t2 - (V = c2 * cbrtf(c2) * IQ);
  1976. qenergy += V*V;
  1977. }
  1978. } else {
  1979. di2 = t2 - (V = vec[1] * IQ);
  1980. qenergy += V*V;
  1981. }
  1982. if (cond2) {
  1983. if (t3 >= CLIPPED_ESCAPE) {
  1984. di3 = t3 - CLIPPED_ESCAPE;
  1985. qenergy += CLIPPED_ESCAPE*CLIPPED_ESCAPE;
  1986. } else {
  1987. di3 = t3 - (V = c3 * cbrtf(c3) * IQ);
  1988. qenergy += V*V;
  1989. }
  1990. } else {
  1991. di3 = t3 - (V = vec2[0] * IQ);
  1992. qenergy += V*V;
  1993. }
  1994. if (cond3) {
  1995. if (t4 >= CLIPPED_ESCAPE) {
  1996. di4 = t4 - CLIPPED_ESCAPE;
  1997. qenergy += CLIPPED_ESCAPE*CLIPPED_ESCAPE;
  1998. } else {
  1999. di4 = t4 - (V = c4 * cbrtf(c4) * IQ);
  2000. qenergy += V*V;
  2001. }
  2002. } else {
  2003. di4 = t4 - (V = vec2[1]*IQ);
  2004. qenergy += V*V;
  2005. }
  2006. cost += di1 * di1 + di2 * di2
  2007. + di3 * di3 + di4 * di4;
  2008. }
  2009. if (bits)
  2010. *bits = curbits;
  2011. return cost * lambda + curbits;
  2012. }
  2013. static float (*const get_band_cost_arr[])(struct AACEncContext *s,
  2014. PutBitContext *pb, const float *in,
  2015. const float *scaled, int size, int scale_idx,
  2016. int cb, const float lambda, const float uplim,
  2017. int *bits, float *energy) = {
  2018. get_band_cost_ZERO_mips,
  2019. get_band_cost_SQUAD_mips,
  2020. get_band_cost_SQUAD_mips,
  2021. get_band_cost_UQUAD_mips,
  2022. get_band_cost_UQUAD_mips,
  2023. get_band_cost_SPAIR_mips,
  2024. get_band_cost_SPAIR_mips,
  2025. get_band_cost_UPAIR7_mips,
  2026. get_band_cost_UPAIR7_mips,
  2027. get_band_cost_UPAIR12_mips,
  2028. get_band_cost_UPAIR12_mips,
  2029. get_band_cost_ESC_mips,
  2030. get_band_cost_NONE_mips, /* cb 12 doesn't exist */
  2031. get_band_cost_ZERO_mips,
  2032. get_band_cost_ZERO_mips,
  2033. get_band_cost_ZERO_mips,
  2034. };
  2035. #define get_band_cost( \
  2036. s, pb, in, scaled, size, scale_idx, cb, \
  2037. lambda, uplim, bits, energy) \
  2038. get_band_cost_arr[cb]( \
  2039. s, pb, in, scaled, size, scale_idx, cb, \
  2040. lambda, uplim, bits, energy)
  2041. static float quantize_band_cost(struct AACEncContext *s, const float *in,
  2042. const float *scaled, int size, int scale_idx,
  2043. int cb, const float lambda, const float uplim,
  2044. int *bits, float *energy, int rtz)
  2045. {
  2046. return get_band_cost(s, NULL, in, scaled, size, scale_idx, cb, lambda, uplim, bits, energy);
  2047. }
  2048. #include "libavcodec/aacenc_quantization_misc.h"
  2049. #include "libavcodec/aaccoder_twoloop.h"
  2050. static void search_for_ms_mips(AACEncContext *s, ChannelElement *cpe)
  2051. {
  2052. int start = 0, i, w, w2, g, sid_sf_boost, prev_mid, prev_side;
  2053. uint8_t nextband0[128], nextband1[128];
  2054. float M[128], S[128];
  2055. float *L34 = s->scoefs, *R34 = s->scoefs + 128, *M34 = s->scoefs + 128*2, *S34 = s->scoefs + 128*3;
  2056. const float lambda = s->lambda;
  2057. const float mslambda = FFMIN(1.0f, lambda / 120.f);
  2058. SingleChannelElement *sce0 = &cpe->ch[0];
  2059. SingleChannelElement *sce1 = &cpe->ch[1];
  2060. if (!cpe->common_window)
  2061. return;
  2062. /** Scout out next nonzero bands */
  2063. ff_init_nextband_map(sce0, nextband0);
  2064. ff_init_nextband_map(sce1, nextband1);
  2065. prev_mid = sce0->sf_idx[0];
  2066. prev_side = sce1->sf_idx[0];
  2067. for (w = 0; w < sce0->ics.num_windows; w += sce0->ics.group_len[w]) {
  2068. start = 0;
  2069. for (g = 0; g < sce0->ics.num_swb; g++) {
  2070. float bmax = bval2bmax(g * 17.0f / sce0->ics.num_swb) / 0.0045f;
  2071. cpe->ms_mask[w*16+g] = 0;
  2072. if (!sce0->zeroes[w*16+g] && !sce1->zeroes[w*16+g]) {
  2073. float Mmax = 0.0f, Smax = 0.0f;
  2074. /* Must compute mid/side SF and book for the whole window group */
  2075. for (w2 = 0; w2 < sce0->ics.group_len[w]; w2++) {
  2076. for (i = 0; i < sce0->ics.swb_sizes[g]; i++) {
  2077. M[i] = (sce0->coeffs[start+(w+w2)*128+i]
  2078. + sce1->coeffs[start+(w+w2)*128+i]) * 0.5;
  2079. S[i] = M[i]
  2080. - sce1->coeffs[start+(w+w2)*128+i];
  2081. }
  2082. abs_pow34_v(M34, M, sce0->ics.swb_sizes[g]);
  2083. abs_pow34_v(S34, S, sce0->ics.swb_sizes[g]);
  2084. for (i = 0; i < sce0->ics.swb_sizes[g]; i++ ) {
  2085. Mmax = FFMAX(Mmax, M34[i]);
  2086. Smax = FFMAX(Smax, S34[i]);
  2087. }
  2088. }
  2089. for (sid_sf_boost = 0; sid_sf_boost < 4; sid_sf_boost++) {
  2090. float dist1 = 0.0f, dist2 = 0.0f;
  2091. int B0 = 0, B1 = 0;
  2092. int minidx;
  2093. int mididx, sididx;
  2094. int midcb, sidcb;
  2095. minidx = FFMIN(sce0->sf_idx[w*16+g], sce1->sf_idx[w*16+g]);
  2096. mididx = av_clip(minidx, 0, SCALE_MAX_POS - SCALE_DIV_512);
  2097. sididx = av_clip(minidx - sid_sf_boost * 3, 0, SCALE_MAX_POS - SCALE_DIV_512);
  2098. if (!cpe->is_mask[w*16+g] && sce0->band_type[w*16+g] != NOISE_BT && sce1->band_type[w*16+g] != NOISE_BT
  2099. && ( !ff_sfdelta_can_replace(sce0, nextband0, prev_mid, mididx, w*16+g)
  2100. || !ff_sfdelta_can_replace(sce1, nextband1, prev_side, sididx, w*16+g))) {
  2101. /* scalefactor range violation, bad stuff, will decrease quality unacceptably */
  2102. continue;
  2103. }
  2104. midcb = find_min_book(Mmax, mididx);
  2105. sidcb = find_min_book(Smax, sididx);
  2106. /* No CB can be zero */
  2107. midcb = FFMAX(1,midcb);
  2108. sidcb = FFMAX(1,sidcb);
  2109. for (w2 = 0; w2 < sce0->ics.group_len[w]; w2++) {
  2110. FFPsyBand *band0 = &s->psy.ch[s->cur_channel+0].psy_bands[(w+w2)*16+g];
  2111. FFPsyBand *band1 = &s->psy.ch[s->cur_channel+1].psy_bands[(w+w2)*16+g];
  2112. float minthr = FFMIN(band0->threshold, band1->threshold);
  2113. int b1,b2,b3,b4;
  2114. for (i = 0; i < sce0->ics.swb_sizes[g]; i++) {
  2115. M[i] = (sce0->coeffs[start+(w+w2)*128+i]
  2116. + sce1->coeffs[start+(w+w2)*128+i]) * 0.5;
  2117. S[i] = M[i]
  2118. - sce1->coeffs[start+(w+w2)*128+i];
  2119. }
  2120. abs_pow34_v(L34, sce0->coeffs+start+(w+w2)*128, sce0->ics.swb_sizes[g]);
  2121. abs_pow34_v(R34, sce1->coeffs+start+(w+w2)*128, sce0->ics.swb_sizes[g]);
  2122. abs_pow34_v(M34, M, sce0->ics.swb_sizes[g]);
  2123. abs_pow34_v(S34, S, sce0->ics.swb_sizes[g]);
  2124. dist1 += quantize_band_cost(s, &sce0->coeffs[start + (w+w2)*128],
  2125. L34,
  2126. sce0->ics.swb_sizes[g],
  2127. sce0->sf_idx[(w+w2)*16+g],
  2128. sce0->band_type[(w+w2)*16+g],
  2129. lambda / band0->threshold, INFINITY, &b1, NULL, 0);
  2130. dist1 += quantize_band_cost(s, &sce1->coeffs[start + (w+w2)*128],
  2131. R34,
  2132. sce1->ics.swb_sizes[g],
  2133. sce1->sf_idx[(w+w2)*16+g],
  2134. sce1->band_type[(w+w2)*16+g],
  2135. lambda / band1->threshold, INFINITY, &b2, NULL, 0);
  2136. dist2 += quantize_band_cost(s, M,
  2137. M34,
  2138. sce0->ics.swb_sizes[g],
  2139. sce0->sf_idx[(w+w2)*16+g],
  2140. sce0->band_type[(w+w2)*16+g],
  2141. lambda / minthr, INFINITY, &b3, NULL, 0);
  2142. dist2 += quantize_band_cost(s, S,
  2143. S34,
  2144. sce1->ics.swb_sizes[g],
  2145. sce1->sf_idx[(w+w2)*16+g],
  2146. sce1->band_type[(w+w2)*16+g],
  2147. mslambda / (minthr * bmax), INFINITY, &b4, NULL, 0);
  2148. B0 += b1+b2;
  2149. B1 += b3+b4;
  2150. dist1 -= B0;
  2151. dist2 -= B1;
  2152. }
  2153. cpe->ms_mask[w*16+g] = dist2 <= dist1 && B1 < B0;
  2154. if (cpe->ms_mask[w*16+g]) {
  2155. /* Setting the M/S mask is useful with I/S or PNS, but only the flag */
  2156. if (!cpe->is_mask[w*16+g] && sce0->band_type[w*16+g] != NOISE_BT && sce1->band_type[w*16+g] != NOISE_BT) {
  2157. sce0->sf_idx[w*16+g] = mididx;
  2158. sce1->sf_idx[w*16+g] = sididx;
  2159. sce0->band_type[w*16+g] = midcb;
  2160. sce1->band_type[w*16+g] = sidcb;
  2161. }
  2162. break;
  2163. } else if (B1 > B0) {
  2164. /* More boost won't fix this */
  2165. break;
  2166. }
  2167. }
  2168. }
  2169. if (!sce0->zeroes[w*16+g] && sce0->band_type[w*16+g] < RESERVED_BT)
  2170. prev_mid = sce0->sf_idx[w*16+g];
  2171. if (!sce1->zeroes[w*16+g] && !cpe->is_mask[w*16+g] && sce1->band_type[w*16+g] < RESERVED_BT)
  2172. prev_side = sce1->sf_idx[w*16+g];
  2173. start += sce0->ics.swb_sizes[g];
  2174. }
  2175. }
  2176. }
  2177. #endif /*HAVE_MIPSFPU */
  2178. #include "libavcodec/aaccoder_trellis.h"
  2179. #endif /* HAVE_INLINE_ASM */
  2180. void ff_aac_coder_init_mips(AACEncContext *c) {
  2181. #if HAVE_INLINE_ASM
  2182. AACCoefficientsEncoder *e = c->coder;
  2183. int option = c->options.coder;
  2184. if (option == 2) {
  2185. e->quantize_and_encode_band = quantize_and_encode_band_mips;
  2186. e->encode_window_bands_info = codebook_trellis_rate;
  2187. #if HAVE_MIPSFPU
  2188. e->search_for_quantizers = search_for_quantizers_twoloop;
  2189. #endif /* HAVE_MIPSFPU */
  2190. }
  2191. #if HAVE_MIPSFPU
  2192. e->search_for_ms = search_for_ms_mips;
  2193. #endif /* HAVE_MIPSFPU */
  2194. #endif /* HAVE_INLINE_ASM */
  2195. }