You can not select more than 25 topics Topics must start with a letter or number, can include dashes ('-') and can be up to 35 characters long.

2562 lines
104KB

  1. /*
  2. * Copyright (c) 2012
  3. * MIPS Technologies, Inc., California.
  4. *
  5. * Redistribution and use in source and binary forms, with or without
  6. * modification, are permitted provided that the following conditions
  7. * are met:
  8. * 1. Redistributions of source code must retain the above copyright
  9. * notice, this list of conditions and the following disclaimer.
  10. * 2. Redistributions in binary form must reproduce the above copyright
  11. * notice, this list of conditions and the following disclaimer in the
  12. * documentation and/or other materials provided with the distribution.
  13. * 3. Neither the name of the MIPS Technologies, Inc., nor the names of its
  14. * contributors may be used to endorse or promote products derived from
  15. * this software without specific prior written permission.
  16. *
  17. * THIS SOFTWARE IS PROVIDED BY THE MIPS TECHNOLOGIES, INC. ``AS IS'' AND
  18. * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
  19. * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
  20. * ARE DISCLAIMED. IN NO EVENT SHALL THE MIPS TECHNOLOGIES, INC. BE LIABLE
  21. * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
  22. * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
  23. * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
  24. * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
  25. * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
  26. * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
  27. * SUCH DAMAGE.
  28. *
  29. * Author: Stanislav Ocovaj (socovaj@mips.com)
  30. * Szabolcs Pal (sabolc@mips.com)
  31. *
  32. * AAC coefficients encoder optimized for MIPS floating-point architecture
  33. *
  34. * This file is part of FFmpeg.
  35. *
  36. * FFmpeg is free software; you can redistribute it and/or
  37. * modify it under the terms of the GNU Lesser General Public
  38. * License as published by the Free Software Foundation; either
  39. * version 2.1 of the License, or (at your option) any later version.
  40. *
  41. * FFmpeg is distributed in the hope that it will be useful,
  42. * but WITHOUT ANY WARRANTY; without even the implied warranty of
  43. * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
  44. * Lesser General Public License for more details.
  45. *
  46. * You should have received a copy of the GNU Lesser General Public
  47. * License along with FFmpeg; if not, write to the Free Software
  48. * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
  49. */
  50. /**
  51. * @file
  52. * Reference: libavcodec/aaccoder.c
  53. */
  54. #include "libavutil/libm.h"
  55. #include <float.h>
  56. #include "libavutil/mathematics.h"
  57. #include "libavcodec/avcodec.h"
  58. #include "libavcodec/put_bits.h"
  59. #include "libavcodec/aac.h"
  60. #include "libavcodec/aacenc.h"
  61. #include "libavcodec/aactab.h"
  62. #include "libavcodec/aacenctab.h"
  63. #if HAVE_INLINE_ASM
  64. typedef struct BandCodingPath {
  65. int prev_idx;
  66. float cost;
  67. int run;
  68. } BandCodingPath;
  69. static const uint8_t uquad_sign_bits[81] = {
  70. 0, 1, 1, 1, 2, 2, 1, 2, 2,
  71. 1, 2, 2, 2, 3, 3, 2, 3, 3,
  72. 1, 2, 2, 2, 3, 3, 2, 3, 3,
  73. 1, 2, 2, 2, 3, 3, 2, 3, 3,
  74. 2, 3, 3, 3, 4, 4, 3, 4, 4,
  75. 2, 3, 3, 3, 4, 4, 3, 4, 4,
  76. 1, 2, 2, 2, 3, 3, 2, 3, 3,
  77. 2, 3, 3, 3, 4, 4, 3, 4, 4,
  78. 2, 3, 3, 3, 4, 4, 3, 4, 4
  79. };
  80. static const uint8_t upair7_sign_bits[64] = {
  81. 0, 1, 1, 1, 1, 1, 1, 1,
  82. 1, 2, 2, 2, 2, 2, 2, 2,
  83. 1, 2, 2, 2, 2, 2, 2, 2,
  84. 1, 2, 2, 2, 2, 2, 2, 2,
  85. 1, 2, 2, 2, 2, 2, 2, 2,
  86. 1, 2, 2, 2, 2, 2, 2, 2,
  87. 1, 2, 2, 2, 2, 2, 2, 2,
  88. 1, 2, 2, 2, 2, 2, 2, 2,
  89. };
  90. static const uint8_t upair12_sign_bits[169] = {
  91. 0, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
  92. 1, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2,
  93. 1, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2,
  94. 1, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2,
  95. 1, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2,
  96. 1, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2,
  97. 1, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2,
  98. 1, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2,
  99. 1, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2,
  100. 1, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2,
  101. 1, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2,
  102. 1, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2,
  103. 1, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2
  104. };
  105. static const uint8_t esc_sign_bits[289] = {
  106. 0, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
  107. 1, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2,
  108. 1, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2,
  109. 1, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2,
  110. 1, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2,
  111. 1, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2,
  112. 1, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2,
  113. 1, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2,
  114. 1, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2,
  115. 1, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2,
  116. 1, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2,
  117. 1, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2,
  118. 1, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2,
  119. 1, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2,
  120. 1, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2,
  121. 1, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2,
  122. 1, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2
  123. };
  124. #define ROUND_STANDARD 0.4054f
  125. #define ROUND_TO_ZERO 0.1054f
  126. static void abs_pow34_v(float *out, const float *in, const int size) {
  127. #ifndef USE_REALLY_FULL_SEARCH
  128. int i;
  129. float a, b, c, d;
  130. float ax, bx, cx, dx;
  131. for (i = 0; i < size; i += 4) {
  132. a = fabsf(in[i ]);
  133. b = fabsf(in[i+1]);
  134. c = fabsf(in[i+2]);
  135. d = fabsf(in[i+3]);
  136. ax = sqrtf(a);
  137. bx = sqrtf(b);
  138. cx = sqrtf(c);
  139. dx = sqrtf(d);
  140. a = a * ax;
  141. b = b * bx;
  142. c = c * cx;
  143. d = d * dx;
  144. out[i ] = sqrtf(a);
  145. out[i+1] = sqrtf(b);
  146. out[i+2] = sqrtf(c);
  147. out[i+3] = sqrtf(d);
  148. }
  149. #endif /* USE_REALLY_FULL_SEARCH */
  150. }
  151. static float find_max_val(int group_len, int swb_size, const float *scaled) {
  152. float maxval = 0.0f;
  153. int w2, i;
  154. for (w2 = 0; w2 < group_len; w2++) {
  155. for (i = 0; i < swb_size; i++) {
  156. maxval = FFMAX(maxval, scaled[w2*128+i]);
  157. }
  158. }
  159. return maxval;
  160. }
  161. static int find_min_book(float maxval, int sf) {
  162. float Q = ff_aac_pow2sf_tab[POW_SF2_ZERO - sf + SCALE_ONE_POS - SCALE_DIV_512];
  163. float Q34 = sqrtf(Q * sqrtf(Q));
  164. int qmaxval, cb;
  165. qmaxval = maxval * Q34 + 0.4054f;
  166. if (qmaxval >= (FF_ARRAY_ELEMS(aac_maxval_cb)))
  167. cb = 11;
  168. else
  169. cb = aac_maxval_cb[qmaxval];
  170. return cb;
  171. }
  172. /**
  173. * Functions developed from template function and optimized for quantizing and encoding band
  174. */
  175. static void quantize_and_encode_band_cost_SQUAD_mips(struct AACEncContext *s,
  176. PutBitContext *pb, const float *in, float *out,
  177. const float *scaled, int size, int scale_idx,
  178. int cb, const float lambda, const float uplim,
  179. int *bits, float *energy, const float ROUNDING)
  180. {
  181. const float Q34 = ff_aac_pow34sf_tab[POW_SF2_ZERO - scale_idx + SCALE_ONE_POS - SCALE_DIV_512];
  182. const float IQ = ff_aac_pow2sf_tab [POW_SF2_ZERO + scale_idx - SCALE_ONE_POS + SCALE_DIV_512];
  183. int i;
  184. int qc1, qc2, qc3, qc4;
  185. float qenergy = 0.0f;
  186. uint8_t *p_bits = (uint8_t *)ff_aac_spectral_bits[cb-1];
  187. uint16_t *p_codes = (uint16_t *)ff_aac_spectral_codes[cb-1];
  188. float *p_vec = (float *)ff_aac_codebook_vectors[cb-1];
  189. abs_pow34_v(s->scoefs, in, size);
  190. scaled = s->scoefs;
  191. for (i = 0; i < size; i += 4) {
  192. int curidx;
  193. int *in_int = (int *)&in[i];
  194. int t0, t1, t2, t3, t4, t5, t6, t7;
  195. const float *vec;
  196. qc1 = scaled[i ] * Q34 + ROUND_STANDARD;
  197. qc2 = scaled[i+1] * Q34 + ROUND_STANDARD;
  198. qc3 = scaled[i+2] * Q34 + ROUND_STANDARD;
  199. qc4 = scaled[i+3] * Q34 + ROUND_STANDARD;
  200. __asm__ volatile (
  201. ".set push \n\t"
  202. ".set noreorder \n\t"
  203. "slt %[qc1], $zero, %[qc1] \n\t"
  204. "slt %[qc2], $zero, %[qc2] \n\t"
  205. "slt %[qc3], $zero, %[qc3] \n\t"
  206. "slt %[qc4], $zero, %[qc4] \n\t"
  207. "lw %[t0], 0(%[in_int]) \n\t"
  208. "lw %[t1], 4(%[in_int]) \n\t"
  209. "lw %[t2], 8(%[in_int]) \n\t"
  210. "lw %[t3], 12(%[in_int]) \n\t"
  211. "srl %[t0], %[t0], 31 \n\t"
  212. "srl %[t1], %[t1], 31 \n\t"
  213. "srl %[t2], %[t2], 31 \n\t"
  214. "srl %[t3], %[t3], 31 \n\t"
  215. "subu %[t4], $zero, %[qc1] \n\t"
  216. "subu %[t5], $zero, %[qc2] \n\t"
  217. "subu %[t6], $zero, %[qc3] \n\t"
  218. "subu %[t7], $zero, %[qc4] \n\t"
  219. "movn %[qc1], %[t4], %[t0] \n\t"
  220. "movn %[qc2], %[t5], %[t1] \n\t"
  221. "movn %[qc3], %[t6], %[t2] \n\t"
  222. "movn %[qc4], %[t7], %[t3] \n\t"
  223. ".set pop \n\t"
  224. : [qc1]"+r"(qc1), [qc2]"+r"(qc2),
  225. [qc3]"+r"(qc3), [qc4]"+r"(qc4),
  226. [t0]"=&r"(t0), [t1]"=&r"(t1), [t2]"=&r"(t2), [t3]"=&r"(t3),
  227. [t4]"=&r"(t4), [t5]"=&r"(t5), [t6]"=&r"(t6), [t7]"=&r"(t7)
  228. : [in_int]"r"(in_int)
  229. : "memory"
  230. );
  231. curidx = qc1;
  232. curidx *= 3;
  233. curidx += qc2;
  234. curidx *= 3;
  235. curidx += qc3;
  236. curidx *= 3;
  237. curidx += qc4;
  238. curidx += 40;
  239. put_bits(pb, p_bits[curidx], p_codes[curidx]);
  240. if (out || energy) {
  241. float e1,e2,e3,e4;
  242. vec = &p_vec[curidx*4];
  243. e1 = vec[0] * IQ;
  244. e2 = vec[1] * IQ;
  245. e3 = vec[2] * IQ;
  246. e4 = vec[3] * IQ;
  247. if (out) {
  248. out[i+0] = e1;
  249. out[i+1] = e2;
  250. out[i+2] = e3;
  251. out[i+3] = e4;
  252. }
  253. if (energy)
  254. qenergy += (e1*e1 + e2*e2) + (e3*e3 + e4*e4);
  255. }
  256. }
  257. if (energy)
  258. *energy = qenergy;
  259. }
  260. static void quantize_and_encode_band_cost_UQUAD_mips(struct AACEncContext *s,
  261. PutBitContext *pb, const float *in, float *out,
  262. const float *scaled, int size, int scale_idx,
  263. int cb, const float lambda, const float uplim,
  264. int *bits, float *energy, const float ROUNDING)
  265. {
  266. const float Q34 = ff_aac_pow34sf_tab[POW_SF2_ZERO - scale_idx + SCALE_ONE_POS - SCALE_DIV_512];
  267. const float IQ = ff_aac_pow2sf_tab [POW_SF2_ZERO + scale_idx - SCALE_ONE_POS + SCALE_DIV_512];
  268. int i;
  269. int qc1, qc2, qc3, qc4;
  270. float qenergy = 0.0f;
  271. uint8_t *p_bits = (uint8_t *)ff_aac_spectral_bits[cb-1];
  272. uint16_t *p_codes = (uint16_t *)ff_aac_spectral_codes[cb-1];
  273. float *p_vec = (float *)ff_aac_codebook_vectors[cb-1];
  274. abs_pow34_v(s->scoefs, in, size);
  275. scaled = s->scoefs;
  276. for (i = 0; i < size; i += 4) {
  277. int curidx, sign, count;
  278. int *in_int = (int *)&in[i];
  279. uint8_t v_bits;
  280. unsigned int v_codes;
  281. int t0, t1, t2, t3, t4;
  282. const float *vec;
  283. qc1 = scaled[i ] * Q34 + ROUND_STANDARD;
  284. qc2 = scaled[i+1] * Q34 + ROUND_STANDARD;
  285. qc3 = scaled[i+2] * Q34 + ROUND_STANDARD;
  286. qc4 = scaled[i+3] * Q34 + ROUND_STANDARD;
  287. __asm__ volatile (
  288. ".set push \n\t"
  289. ".set noreorder \n\t"
  290. "ori %[t4], $zero, 2 \n\t"
  291. "ori %[sign], $zero, 0 \n\t"
  292. "slt %[t0], %[t4], %[qc1] \n\t"
  293. "slt %[t1], %[t4], %[qc2] \n\t"
  294. "slt %[t2], %[t4], %[qc3] \n\t"
  295. "slt %[t3], %[t4], %[qc4] \n\t"
  296. "movn %[qc1], %[t4], %[t0] \n\t"
  297. "movn %[qc2], %[t4], %[t1] \n\t"
  298. "movn %[qc3], %[t4], %[t2] \n\t"
  299. "movn %[qc4], %[t4], %[t3] \n\t"
  300. "lw %[t0], 0(%[in_int]) \n\t"
  301. "lw %[t1], 4(%[in_int]) \n\t"
  302. "lw %[t2], 8(%[in_int]) \n\t"
  303. "lw %[t3], 12(%[in_int]) \n\t"
  304. "slt %[t0], %[t0], $zero \n\t"
  305. "movn %[sign], %[t0], %[qc1] \n\t"
  306. "slt %[t1], %[t1], $zero \n\t"
  307. "slt %[t2], %[t2], $zero \n\t"
  308. "slt %[t3], %[t3], $zero \n\t"
  309. "sll %[t0], %[sign], 1 \n\t"
  310. "or %[t0], %[t0], %[t1] \n\t"
  311. "movn %[sign], %[t0], %[qc2] \n\t"
  312. "slt %[t4], $zero, %[qc1] \n\t"
  313. "slt %[t1], $zero, %[qc2] \n\t"
  314. "slt %[count], $zero, %[qc3] \n\t"
  315. "sll %[t0], %[sign], 1 \n\t"
  316. "or %[t0], %[t0], %[t2] \n\t"
  317. "movn %[sign], %[t0], %[qc3] \n\t"
  318. "slt %[t2], $zero, %[qc4] \n\t"
  319. "addu %[count], %[count], %[t4] \n\t"
  320. "addu %[count], %[count], %[t1] \n\t"
  321. "sll %[t0], %[sign], 1 \n\t"
  322. "or %[t0], %[t0], %[t3] \n\t"
  323. "movn %[sign], %[t0], %[qc4] \n\t"
  324. "addu %[count], %[count], %[t2] \n\t"
  325. ".set pop \n\t"
  326. : [qc1]"+r"(qc1), [qc2]"+r"(qc2),
  327. [qc3]"+r"(qc3), [qc4]"+r"(qc4),
  328. [sign]"=&r"(sign), [count]"=&r"(count),
  329. [t0]"=&r"(t0), [t1]"=&r"(t1), [t2]"=&r"(t2), [t3]"=&r"(t3),
  330. [t4]"=&r"(t4)
  331. : [in_int]"r"(in_int)
  332. : "memory"
  333. );
  334. curidx = qc1;
  335. curidx *= 3;
  336. curidx += qc2;
  337. curidx *= 3;
  338. curidx += qc3;
  339. curidx *= 3;
  340. curidx += qc4;
  341. v_codes = (p_codes[curidx] << count) | (sign & ((1 << count) - 1));
  342. v_bits = p_bits[curidx] + count;
  343. put_bits(pb, v_bits, v_codes);
  344. if (out || energy) {
  345. float e1,e2,e3,e4;
  346. vec = &p_vec[curidx*4];
  347. e1 = copysignf(vec[0] * IQ, in[i+0]);
  348. e2 = copysignf(vec[1] * IQ, in[i+1]);
  349. e3 = copysignf(vec[2] * IQ, in[i+2]);
  350. e4 = copysignf(vec[3] * IQ, in[i+3]);
  351. if (out) {
  352. out[i+0] = e1;
  353. out[i+1] = e2;
  354. out[i+2] = e3;
  355. out[i+3] = e4;
  356. }
  357. if (energy)
  358. qenergy += (e1*e1 + e2*e2) + (e3*e3 + e4*e4);
  359. }
  360. }
  361. if (energy)
  362. *energy = qenergy;
  363. }
  364. static void quantize_and_encode_band_cost_SPAIR_mips(struct AACEncContext *s,
  365. PutBitContext *pb, const float *in, float *out,
  366. const float *scaled, int size, int scale_idx,
  367. int cb, const float lambda, const float uplim,
  368. int *bits, float *energy, const float ROUNDING)
  369. {
  370. const float Q34 = ff_aac_pow34sf_tab[POW_SF2_ZERO - scale_idx + SCALE_ONE_POS - SCALE_DIV_512];
  371. const float IQ = ff_aac_pow2sf_tab [POW_SF2_ZERO + scale_idx - SCALE_ONE_POS + SCALE_DIV_512];
  372. int i;
  373. int qc1, qc2, qc3, qc4;
  374. float qenergy = 0.0f;
  375. uint8_t *p_bits = (uint8_t *)ff_aac_spectral_bits[cb-1];
  376. uint16_t *p_codes = (uint16_t *)ff_aac_spectral_codes[cb-1];
  377. float *p_vec = (float *)ff_aac_codebook_vectors[cb-1];
  378. abs_pow34_v(s->scoefs, in, size);
  379. scaled = s->scoefs;
  380. for (i = 0; i < size; i += 4) {
  381. int curidx, curidx2;
  382. int *in_int = (int *)&in[i];
  383. uint8_t v_bits;
  384. unsigned int v_codes;
  385. int t0, t1, t2, t3, t4, t5, t6, t7;
  386. const float *vec1, *vec2;
  387. qc1 = scaled[i ] * Q34 + ROUND_STANDARD;
  388. qc2 = scaled[i+1] * Q34 + ROUND_STANDARD;
  389. qc3 = scaled[i+2] * Q34 + ROUND_STANDARD;
  390. qc4 = scaled[i+3] * Q34 + ROUND_STANDARD;
  391. __asm__ volatile (
  392. ".set push \n\t"
  393. ".set noreorder \n\t"
  394. "ori %[t4], $zero, 4 \n\t"
  395. "slt %[t0], %[t4], %[qc1] \n\t"
  396. "slt %[t1], %[t4], %[qc2] \n\t"
  397. "slt %[t2], %[t4], %[qc3] \n\t"
  398. "slt %[t3], %[t4], %[qc4] \n\t"
  399. "movn %[qc1], %[t4], %[t0] \n\t"
  400. "movn %[qc2], %[t4], %[t1] \n\t"
  401. "movn %[qc3], %[t4], %[t2] \n\t"
  402. "movn %[qc4], %[t4], %[t3] \n\t"
  403. "lw %[t0], 0(%[in_int]) \n\t"
  404. "lw %[t1], 4(%[in_int]) \n\t"
  405. "lw %[t2], 8(%[in_int]) \n\t"
  406. "lw %[t3], 12(%[in_int]) \n\t"
  407. "srl %[t0], %[t0], 31 \n\t"
  408. "srl %[t1], %[t1], 31 \n\t"
  409. "srl %[t2], %[t2], 31 \n\t"
  410. "srl %[t3], %[t3], 31 \n\t"
  411. "subu %[t4], $zero, %[qc1] \n\t"
  412. "subu %[t5], $zero, %[qc2] \n\t"
  413. "subu %[t6], $zero, %[qc3] \n\t"
  414. "subu %[t7], $zero, %[qc4] \n\t"
  415. "movn %[qc1], %[t4], %[t0] \n\t"
  416. "movn %[qc2], %[t5], %[t1] \n\t"
  417. "movn %[qc3], %[t6], %[t2] \n\t"
  418. "movn %[qc4], %[t7], %[t3] \n\t"
  419. ".set pop \n\t"
  420. : [qc1]"+r"(qc1), [qc2]"+r"(qc2),
  421. [qc3]"+r"(qc3), [qc4]"+r"(qc4),
  422. [t0]"=&r"(t0), [t1]"=&r"(t1), [t2]"=&r"(t2), [t3]"=&r"(t3),
  423. [t4]"=&r"(t4), [t5]"=&r"(t5), [t6]"=&r"(t6), [t7]"=&r"(t7)
  424. : [in_int]"r"(in_int)
  425. : "memory"
  426. );
  427. curidx = 9 * qc1;
  428. curidx += qc2 + 40;
  429. curidx2 = 9 * qc3;
  430. curidx2 += qc4 + 40;
  431. v_codes = (p_codes[curidx] << p_bits[curidx2]) | (p_codes[curidx2]);
  432. v_bits = p_bits[curidx] + p_bits[curidx2];
  433. put_bits(pb, v_bits, v_codes);
  434. if (out || energy) {
  435. float e1,e2,e3,e4;
  436. vec1 = &p_vec[curidx*2 ];
  437. vec2 = &p_vec[curidx2*2];
  438. e1 = vec1[0] * IQ;
  439. e2 = vec1[1] * IQ;
  440. e3 = vec2[0] * IQ;
  441. e4 = vec2[1] * IQ;
  442. if (out) {
  443. out[i+0] = e1;
  444. out[i+1] = e2;
  445. out[i+2] = e3;
  446. out[i+3] = e4;
  447. }
  448. if (energy)
  449. qenergy += (e1*e1 + e2*e2) + (e3*e3 + e4*e4);
  450. }
  451. }
  452. if (energy)
  453. *energy = qenergy;
  454. }
  455. static void quantize_and_encode_band_cost_UPAIR7_mips(struct AACEncContext *s,
  456. PutBitContext *pb, const float *in, float *out,
  457. const float *scaled, int size, int scale_idx,
  458. int cb, const float lambda, const float uplim,
  459. int *bits, float *energy, const float ROUNDING)
  460. {
  461. const float Q34 = ff_aac_pow34sf_tab[POW_SF2_ZERO - scale_idx + SCALE_ONE_POS - SCALE_DIV_512];
  462. const float IQ = ff_aac_pow2sf_tab [POW_SF2_ZERO + scale_idx - SCALE_ONE_POS + SCALE_DIV_512];
  463. int i;
  464. int qc1, qc2, qc3, qc4;
  465. float qenergy = 0.0f;
  466. uint8_t *p_bits = (uint8_t*) ff_aac_spectral_bits[cb-1];
  467. uint16_t *p_codes = (uint16_t*)ff_aac_spectral_codes[cb-1];
  468. float *p_vec = (float *)ff_aac_codebook_vectors[cb-1];
  469. abs_pow34_v(s->scoefs, in, size);
  470. scaled = s->scoefs;
  471. for (i = 0; i < size; i += 4) {
  472. int curidx1, curidx2, sign1, count1, sign2, count2;
  473. int *in_int = (int *)&in[i];
  474. uint8_t v_bits;
  475. unsigned int v_codes;
  476. int t0, t1, t2, t3, t4;
  477. const float *vec1, *vec2;
  478. qc1 = scaled[i ] * Q34 + ROUND_STANDARD;
  479. qc2 = scaled[i+1] * Q34 + ROUND_STANDARD;
  480. qc3 = scaled[i+2] * Q34 + ROUND_STANDARD;
  481. qc4 = scaled[i+3] * Q34 + ROUND_STANDARD;
  482. __asm__ volatile (
  483. ".set push \n\t"
  484. ".set noreorder \n\t"
  485. "ori %[t4], $zero, 7 \n\t"
  486. "ori %[sign1], $zero, 0 \n\t"
  487. "ori %[sign2], $zero, 0 \n\t"
  488. "slt %[t0], %[t4], %[qc1] \n\t"
  489. "slt %[t1], %[t4], %[qc2] \n\t"
  490. "slt %[t2], %[t4], %[qc3] \n\t"
  491. "slt %[t3], %[t4], %[qc4] \n\t"
  492. "movn %[qc1], %[t4], %[t0] \n\t"
  493. "movn %[qc2], %[t4], %[t1] \n\t"
  494. "movn %[qc3], %[t4], %[t2] \n\t"
  495. "movn %[qc4], %[t4], %[t3] \n\t"
  496. "lw %[t0], 0(%[in_int]) \n\t"
  497. "lw %[t1], 4(%[in_int]) \n\t"
  498. "lw %[t2], 8(%[in_int]) \n\t"
  499. "lw %[t3], 12(%[in_int]) \n\t"
  500. "slt %[t0], %[t0], $zero \n\t"
  501. "movn %[sign1], %[t0], %[qc1] \n\t"
  502. "slt %[t2], %[t2], $zero \n\t"
  503. "movn %[sign2], %[t2], %[qc3] \n\t"
  504. "slt %[t1], %[t1], $zero \n\t"
  505. "sll %[t0], %[sign1], 1 \n\t"
  506. "or %[t0], %[t0], %[t1] \n\t"
  507. "movn %[sign1], %[t0], %[qc2] \n\t"
  508. "slt %[t3], %[t3], $zero \n\t"
  509. "sll %[t0], %[sign2], 1 \n\t"
  510. "or %[t0], %[t0], %[t3] \n\t"
  511. "movn %[sign2], %[t0], %[qc4] \n\t"
  512. "slt %[count1], $zero, %[qc1] \n\t"
  513. "slt %[t1], $zero, %[qc2] \n\t"
  514. "slt %[count2], $zero, %[qc3] \n\t"
  515. "slt %[t2], $zero, %[qc4] \n\t"
  516. "addu %[count1], %[count1], %[t1] \n\t"
  517. "addu %[count2], %[count2], %[t2] \n\t"
  518. ".set pop \n\t"
  519. : [qc1]"+r"(qc1), [qc2]"+r"(qc2),
  520. [qc3]"+r"(qc3), [qc4]"+r"(qc4),
  521. [sign1]"=&r"(sign1), [count1]"=&r"(count1),
  522. [sign2]"=&r"(sign2), [count2]"=&r"(count2),
  523. [t0]"=&r"(t0), [t1]"=&r"(t1), [t2]"=&r"(t2), [t3]"=&r"(t3),
  524. [t4]"=&r"(t4)
  525. : [in_int]"r"(in_int)
  526. : "t0", "t1", "t2", "t3", "t4",
  527. "memory"
  528. );
  529. curidx1 = 8 * qc1;
  530. curidx1 += qc2;
  531. v_codes = (p_codes[curidx1] << count1) | sign1;
  532. v_bits = p_bits[curidx1] + count1;
  533. put_bits(pb, v_bits, v_codes);
  534. curidx2 = 8 * qc3;
  535. curidx2 += qc4;
  536. v_codes = (p_codes[curidx2] << count2) | sign2;
  537. v_bits = p_bits[curidx2] + count2;
  538. put_bits(pb, v_bits, v_codes);
  539. if (out || energy) {
  540. float e1,e2,e3,e4;
  541. vec1 = &p_vec[curidx1*2];
  542. vec2 = &p_vec[curidx2*2];
  543. e1 = copysignf(vec1[0] * IQ, in[i+0]);
  544. e2 = copysignf(vec1[1] * IQ, in[i+1]);
  545. e3 = copysignf(vec2[0] * IQ, in[i+2]);
  546. e4 = copysignf(vec2[1] * IQ, in[i+3]);
  547. if (out) {
  548. out[i+0] = e1;
  549. out[i+1] = e2;
  550. out[i+2] = e3;
  551. out[i+3] = e4;
  552. }
  553. if (energy)
  554. qenergy += (e1*e1 + e2*e2) + (e3*e3 + e4*e4);
  555. }
  556. }
  557. if (energy)
  558. *energy = qenergy;
  559. }
  560. static void quantize_and_encode_band_cost_UPAIR12_mips(struct AACEncContext *s,
  561. PutBitContext *pb, const float *in, float *out,
  562. const float *scaled, int size, int scale_idx,
  563. int cb, const float lambda, const float uplim,
  564. int *bits, float *energy, const float ROUNDING)
  565. {
  566. const float Q34 = ff_aac_pow34sf_tab[POW_SF2_ZERO - scale_idx + SCALE_ONE_POS - SCALE_DIV_512];
  567. const float IQ = ff_aac_pow2sf_tab [POW_SF2_ZERO + scale_idx - SCALE_ONE_POS + SCALE_DIV_512];
  568. int i;
  569. int qc1, qc2, qc3, qc4;
  570. float qenergy = 0.0f;
  571. uint8_t *p_bits = (uint8_t*) ff_aac_spectral_bits[cb-1];
  572. uint16_t *p_codes = (uint16_t*)ff_aac_spectral_codes[cb-1];
  573. float *p_vec = (float *)ff_aac_codebook_vectors[cb-1];
  574. abs_pow34_v(s->scoefs, in, size);
  575. scaled = s->scoefs;
  576. for (i = 0; i < size; i += 4) {
  577. int curidx1, curidx2, sign1, count1, sign2, count2;
  578. int *in_int = (int *)&in[i];
  579. uint8_t v_bits;
  580. unsigned int v_codes;
  581. int t0, t1, t2, t3, t4;
  582. const float *vec1, *vec2;
  583. qc1 = scaled[i ] * Q34 + ROUND_STANDARD;
  584. qc2 = scaled[i+1] * Q34 + ROUND_STANDARD;
  585. qc3 = scaled[i+2] * Q34 + ROUND_STANDARD;
  586. qc4 = scaled[i+3] * Q34 + ROUND_STANDARD;
  587. __asm__ volatile (
  588. ".set push \n\t"
  589. ".set noreorder \n\t"
  590. "ori %[t4], $zero, 12 \n\t"
  591. "ori %[sign1], $zero, 0 \n\t"
  592. "ori %[sign2], $zero, 0 \n\t"
  593. "slt %[t0], %[t4], %[qc1] \n\t"
  594. "slt %[t1], %[t4], %[qc2] \n\t"
  595. "slt %[t2], %[t4], %[qc3] \n\t"
  596. "slt %[t3], %[t4], %[qc4] \n\t"
  597. "movn %[qc1], %[t4], %[t0] \n\t"
  598. "movn %[qc2], %[t4], %[t1] \n\t"
  599. "movn %[qc3], %[t4], %[t2] \n\t"
  600. "movn %[qc4], %[t4], %[t3] \n\t"
  601. "lw %[t0], 0(%[in_int]) \n\t"
  602. "lw %[t1], 4(%[in_int]) \n\t"
  603. "lw %[t2], 8(%[in_int]) \n\t"
  604. "lw %[t3], 12(%[in_int]) \n\t"
  605. "slt %[t0], %[t0], $zero \n\t"
  606. "movn %[sign1], %[t0], %[qc1] \n\t"
  607. "slt %[t2], %[t2], $zero \n\t"
  608. "movn %[sign2], %[t2], %[qc3] \n\t"
  609. "slt %[t1], %[t1], $zero \n\t"
  610. "sll %[t0], %[sign1], 1 \n\t"
  611. "or %[t0], %[t0], %[t1] \n\t"
  612. "movn %[sign1], %[t0], %[qc2] \n\t"
  613. "slt %[t3], %[t3], $zero \n\t"
  614. "sll %[t0], %[sign2], 1 \n\t"
  615. "or %[t0], %[t0], %[t3] \n\t"
  616. "movn %[sign2], %[t0], %[qc4] \n\t"
  617. "slt %[count1], $zero, %[qc1] \n\t"
  618. "slt %[t1], $zero, %[qc2] \n\t"
  619. "slt %[count2], $zero, %[qc3] \n\t"
  620. "slt %[t2], $zero, %[qc4] \n\t"
  621. "addu %[count1], %[count1], %[t1] \n\t"
  622. "addu %[count2], %[count2], %[t2] \n\t"
  623. ".set pop \n\t"
  624. : [qc1]"+r"(qc1), [qc2]"+r"(qc2),
  625. [qc3]"+r"(qc3), [qc4]"+r"(qc4),
  626. [sign1]"=&r"(sign1), [count1]"=&r"(count1),
  627. [sign2]"=&r"(sign2), [count2]"=&r"(count2),
  628. [t0]"=&r"(t0), [t1]"=&r"(t1), [t2]"=&r"(t2), [t3]"=&r"(t3),
  629. [t4]"=&r"(t4)
  630. : [in_int]"r"(in_int)
  631. : "memory"
  632. );
  633. curidx1 = 13 * qc1;
  634. curidx1 += qc2;
  635. v_codes = (p_codes[curidx1] << count1) | sign1;
  636. v_bits = p_bits[curidx1] + count1;
  637. put_bits(pb, v_bits, v_codes);
  638. curidx2 = 13 * qc3;
  639. curidx2 += qc4;
  640. v_codes = (p_codes[curidx2] << count2) | sign2;
  641. v_bits = p_bits[curidx2] + count2;
  642. put_bits(pb, v_bits, v_codes);
  643. if (out || energy) {
  644. float e1,e2,e3,e4;
  645. vec1 = &p_vec[curidx1*2];
  646. vec2 = &p_vec[curidx2*2];
  647. e1 = copysignf(vec1[0] * IQ, in[i+0]);
  648. e2 = copysignf(vec1[1] * IQ, in[i+1]);
  649. e3 = copysignf(vec2[0] * IQ, in[i+2]);
  650. e4 = copysignf(vec2[1] * IQ, in[i+3]);
  651. if (out) {
  652. out[i+0] = e1;
  653. out[i+1] = e2;
  654. out[i+2] = e3;
  655. out[i+3] = e4;
  656. }
  657. if (energy)
  658. qenergy += (e1*e1 + e2*e2) + (e3*e3 + e4*e4);
  659. }
  660. }
  661. if (energy)
  662. *energy = qenergy;
  663. }
  664. static void quantize_and_encode_band_cost_ESC_mips(struct AACEncContext *s,
  665. PutBitContext *pb, const float *in, float *out,
  666. const float *scaled, int size, int scale_idx,
  667. int cb, const float lambda, const float uplim,
  668. int *bits, float *energy, const float ROUNDING)
  669. {
  670. const float Q34 = ff_aac_pow34sf_tab[POW_SF2_ZERO - scale_idx + SCALE_ONE_POS - SCALE_DIV_512];
  671. const float IQ = ff_aac_pow2sf_tab [POW_SF2_ZERO + scale_idx - SCALE_ONE_POS + SCALE_DIV_512];
  672. int i;
  673. int qc1, qc2, qc3, qc4;
  674. float qenergy = 0.0f;
  675. uint8_t *p_bits = (uint8_t* )ff_aac_spectral_bits[cb-1];
  676. uint16_t *p_codes = (uint16_t*)ff_aac_spectral_codes[cb-1];
  677. float *p_vectors = (float* )ff_aac_codebook_vectors[cb-1];
  678. abs_pow34_v(s->scoefs, in, size);
  679. scaled = s->scoefs;
  680. if (cb < 11) {
  681. for (i = 0; i < size; i += 4) {
  682. int curidx, curidx2, sign1, count1, sign2, count2;
  683. int *in_int = (int *)&in[i];
  684. uint8_t v_bits;
  685. unsigned int v_codes;
  686. int t0, t1, t2, t3, t4;
  687. const float *vec1, *vec2;
  688. qc1 = scaled[i ] * Q34 + ROUNDING;
  689. qc2 = scaled[i+1] * Q34 + ROUNDING;
  690. qc3 = scaled[i+2] * Q34 + ROUNDING;
  691. qc4 = scaled[i+3] * Q34 + ROUNDING;
  692. __asm__ volatile (
  693. ".set push \n\t"
  694. ".set noreorder \n\t"
  695. "ori %[t4], $zero, 16 \n\t"
  696. "ori %[sign1], $zero, 0 \n\t"
  697. "ori %[sign2], $zero, 0 \n\t"
  698. "slt %[t0], %[t4], %[qc1] \n\t"
  699. "slt %[t1], %[t4], %[qc2] \n\t"
  700. "slt %[t2], %[t4], %[qc3] \n\t"
  701. "slt %[t3], %[t4], %[qc4] \n\t"
  702. "movn %[qc1], %[t4], %[t0] \n\t"
  703. "movn %[qc2], %[t4], %[t1] \n\t"
  704. "movn %[qc3], %[t4], %[t2] \n\t"
  705. "movn %[qc4], %[t4], %[t3] \n\t"
  706. "lw %[t0], 0(%[in_int]) \n\t"
  707. "lw %[t1], 4(%[in_int]) \n\t"
  708. "lw %[t2], 8(%[in_int]) \n\t"
  709. "lw %[t3], 12(%[in_int]) \n\t"
  710. "slt %[t0], %[t0], $zero \n\t"
  711. "movn %[sign1], %[t0], %[qc1] \n\t"
  712. "slt %[t2], %[t2], $zero \n\t"
  713. "movn %[sign2], %[t2], %[qc3] \n\t"
  714. "slt %[t1], %[t1], $zero \n\t"
  715. "sll %[t0], %[sign1], 1 \n\t"
  716. "or %[t0], %[t0], %[t1] \n\t"
  717. "movn %[sign1], %[t0], %[qc2] \n\t"
  718. "slt %[t3], %[t3], $zero \n\t"
  719. "sll %[t0], %[sign2], 1 \n\t"
  720. "or %[t0], %[t0], %[t3] \n\t"
  721. "movn %[sign2], %[t0], %[qc4] \n\t"
  722. "slt %[count1], $zero, %[qc1] \n\t"
  723. "slt %[t1], $zero, %[qc2] \n\t"
  724. "slt %[count2], $zero, %[qc3] \n\t"
  725. "slt %[t2], $zero, %[qc4] \n\t"
  726. "addu %[count1], %[count1], %[t1] \n\t"
  727. "addu %[count2], %[count2], %[t2] \n\t"
  728. ".set pop \n\t"
  729. : [qc1]"+r"(qc1), [qc2]"+r"(qc2),
  730. [qc3]"+r"(qc3), [qc4]"+r"(qc4),
  731. [sign1]"=&r"(sign1), [count1]"=&r"(count1),
  732. [sign2]"=&r"(sign2), [count2]"=&r"(count2),
  733. [t0]"=&r"(t0), [t1]"=&r"(t1), [t2]"=&r"(t2), [t3]"=&r"(t3),
  734. [t4]"=&r"(t4)
  735. : [in_int]"r"(in_int)
  736. : "memory"
  737. );
  738. curidx = 17 * qc1;
  739. curidx += qc2;
  740. curidx2 = 17 * qc3;
  741. curidx2 += qc4;
  742. v_codes = (p_codes[curidx] << count1) | sign1;
  743. v_bits = p_bits[curidx] + count1;
  744. put_bits(pb, v_bits, v_codes);
  745. v_codes = (p_codes[curidx2] << count2) | sign2;
  746. v_bits = p_bits[curidx2] + count2;
  747. put_bits(pb, v_bits, v_codes);
  748. if (out || energy) {
  749. float e1,e2,e3,e4;
  750. vec1 = &p_vectors[curidx*2 ];
  751. vec2 = &p_vectors[curidx2*2];
  752. e1 = copysignf(vec1[0] * IQ, in[i+0]);
  753. e2 = copysignf(vec1[1] * IQ, in[i+1]);
  754. e3 = copysignf(vec2[0] * IQ, in[i+2]);
  755. e4 = copysignf(vec2[1] * IQ, in[i+3]);
  756. if (out) {
  757. out[i+0] = e1;
  758. out[i+1] = e2;
  759. out[i+2] = e3;
  760. out[i+3] = e4;
  761. }
  762. if (energy)
  763. qenergy += (e1*e1 + e2*e2) + (e3*e3 + e4*e4);
  764. }
  765. }
  766. } else {
  767. for (i = 0; i < size; i += 4) {
  768. int curidx, curidx2, sign1, count1, sign2, count2;
  769. int *in_int = (int *)&in[i];
  770. uint8_t v_bits;
  771. unsigned int v_codes;
  772. int c1, c2, c3, c4;
  773. int t0, t1, t2, t3, t4;
  774. const float *vec1, *vec2;
  775. qc1 = scaled[i ] * Q34 + ROUNDING;
  776. qc2 = scaled[i+1] * Q34 + ROUNDING;
  777. qc3 = scaled[i+2] * Q34 + ROUNDING;
  778. qc4 = scaled[i+3] * Q34 + ROUNDING;
  779. __asm__ volatile (
  780. ".set push \n\t"
  781. ".set noreorder \n\t"
  782. "ori %[t4], $zero, 16 \n\t"
  783. "ori %[sign1], $zero, 0 \n\t"
  784. "ori %[sign2], $zero, 0 \n\t"
  785. "shll_s.w %[c1], %[qc1], 18 \n\t"
  786. "shll_s.w %[c2], %[qc2], 18 \n\t"
  787. "shll_s.w %[c3], %[qc3], 18 \n\t"
  788. "shll_s.w %[c4], %[qc4], 18 \n\t"
  789. "srl %[c1], %[c1], 18 \n\t"
  790. "srl %[c2], %[c2], 18 \n\t"
  791. "srl %[c3], %[c3], 18 \n\t"
  792. "srl %[c4], %[c4], 18 \n\t"
  793. "slt %[t0], %[t4], %[qc1] \n\t"
  794. "slt %[t1], %[t4], %[qc2] \n\t"
  795. "slt %[t2], %[t4], %[qc3] \n\t"
  796. "slt %[t3], %[t4], %[qc4] \n\t"
  797. "movn %[qc1], %[t4], %[t0] \n\t"
  798. "movn %[qc2], %[t4], %[t1] \n\t"
  799. "movn %[qc3], %[t4], %[t2] \n\t"
  800. "movn %[qc4], %[t4], %[t3] \n\t"
  801. "lw %[t0], 0(%[in_int]) \n\t"
  802. "lw %[t1], 4(%[in_int]) \n\t"
  803. "lw %[t2], 8(%[in_int]) \n\t"
  804. "lw %[t3], 12(%[in_int]) \n\t"
  805. "slt %[t0], %[t0], $zero \n\t"
  806. "movn %[sign1], %[t0], %[qc1] \n\t"
  807. "slt %[t2], %[t2], $zero \n\t"
  808. "movn %[sign2], %[t2], %[qc3] \n\t"
  809. "slt %[t1], %[t1], $zero \n\t"
  810. "sll %[t0], %[sign1], 1 \n\t"
  811. "or %[t0], %[t0], %[t1] \n\t"
  812. "movn %[sign1], %[t0], %[qc2] \n\t"
  813. "slt %[t3], %[t3], $zero \n\t"
  814. "sll %[t0], %[sign2], 1 \n\t"
  815. "or %[t0], %[t0], %[t3] \n\t"
  816. "movn %[sign2], %[t0], %[qc4] \n\t"
  817. "slt %[count1], $zero, %[qc1] \n\t"
  818. "slt %[t1], $zero, %[qc2] \n\t"
  819. "slt %[count2], $zero, %[qc3] \n\t"
  820. "slt %[t2], $zero, %[qc4] \n\t"
  821. "addu %[count1], %[count1], %[t1] \n\t"
  822. "addu %[count2], %[count2], %[t2] \n\t"
  823. ".set pop \n\t"
  824. : [qc1]"+r"(qc1), [qc2]"+r"(qc2),
  825. [qc3]"+r"(qc3), [qc4]"+r"(qc4),
  826. [sign1]"=&r"(sign1), [count1]"=&r"(count1),
  827. [sign2]"=&r"(sign2), [count2]"=&r"(count2),
  828. [c1]"=&r"(c1), [c2]"=&r"(c2),
  829. [c3]"=&r"(c3), [c4]"=&r"(c4),
  830. [t0]"=&r"(t0), [t1]"=&r"(t1), [t2]"=&r"(t2), [t3]"=&r"(t3),
  831. [t4]"=&r"(t4)
  832. : [in_int]"r"(in_int)
  833. : "memory"
  834. );
  835. curidx = 17 * qc1;
  836. curidx += qc2;
  837. curidx2 = 17 * qc3;
  838. curidx2 += qc4;
  839. v_codes = (p_codes[curidx] << count1) | sign1;
  840. v_bits = p_bits[curidx] + count1;
  841. put_bits(pb, v_bits, v_codes);
  842. if (p_vectors[curidx*2 ] == 64.0f) {
  843. int len = av_log2(c1);
  844. v_codes = (((1 << (len - 3)) - 2) << len) | (c1 & ((1 << len) - 1));
  845. put_bits(pb, len * 2 - 3, v_codes);
  846. }
  847. if (p_vectors[curidx*2+1] == 64.0f) {
  848. int len = av_log2(c2);
  849. v_codes = (((1 << (len - 3)) - 2) << len) | (c2 & ((1 << len) - 1));
  850. put_bits(pb, len*2-3, v_codes);
  851. }
  852. v_codes = (p_codes[curidx2] << count2) | sign2;
  853. v_bits = p_bits[curidx2] + count2;
  854. put_bits(pb, v_bits, v_codes);
  855. if (p_vectors[curidx2*2 ] == 64.0f) {
  856. int len = av_log2(c3);
  857. v_codes = (((1 << (len - 3)) - 2) << len) | (c3 & ((1 << len) - 1));
  858. put_bits(pb, len* 2 - 3, v_codes);
  859. }
  860. if (p_vectors[curidx2*2+1] == 64.0f) {
  861. int len = av_log2(c4);
  862. v_codes = (((1 << (len - 3)) - 2) << len) | (c4 & ((1 << len) - 1));
  863. put_bits(pb, len * 2 - 3, v_codes);
  864. }
  865. if (out || energy) {
  866. float e1, e2, e3, e4;
  867. vec1 = &p_vectors[curidx*2];
  868. vec2 = &p_vectors[curidx2*2];
  869. e1 = copysignf(c1 * cbrtf(c1) * IQ, in[i+0]);
  870. e2 = copysignf(c2 * cbrtf(c2) * IQ, in[i+1]);
  871. e3 = copysignf(c3 * cbrtf(c3) * IQ, in[i+2]);
  872. e4 = copysignf(c4 * cbrtf(c4) * IQ, in[i+3]);
  873. if (out) {
  874. out[i+0] = e1;
  875. out[i+1] = e2;
  876. out[i+2] = e3;
  877. out[i+3] = e4;
  878. }
  879. if (energy)
  880. qenergy += (e1*e1 + e2*e2) + (e3*e3 + e4*e4);
  881. }
  882. }
  883. }
  884. if (energy)
  885. *energy = qenergy;
  886. }
  887. static void quantize_and_encode_band_cost_NONE_mips(struct AACEncContext *s,
  888. PutBitContext *pb, const float *in, float *out,
  889. const float *scaled, int size, int scale_idx,
  890. int cb, const float lambda, const float uplim,
  891. int *bits, float *energy, const float ROUNDING) {
  892. av_assert0(0);
  893. }
  894. static void quantize_and_encode_band_cost_ZERO_mips(struct AACEncContext *s,
  895. PutBitContext *pb, const float *in, float *out,
  896. const float *scaled, int size, int scale_idx,
  897. int cb, const float lambda, const float uplim,
  898. int *bits, float *energy, const float ROUNDING) {
  899. int i;
  900. if (bits)
  901. *bits = 0;
  902. if (out) {
  903. for (i = 0; i < size; i += 4) {
  904. out[i ] = 0.0f;
  905. out[i+1] = 0.0f;
  906. out[i+2] = 0.0f;
  907. out[i+3] = 0.0f;
  908. }
  909. }
  910. if (energy)
  911. *energy = 0.0f;
  912. }
  913. static void (*const quantize_and_encode_band_cost_arr[])(struct AACEncContext *s,
  914. PutBitContext *pb, const float *in, float *out,
  915. const float *scaled, int size, int scale_idx,
  916. int cb, const float lambda, const float uplim,
  917. int *bits, float *energy, const float ROUNDING) = {
  918. quantize_and_encode_band_cost_ZERO_mips,
  919. quantize_and_encode_band_cost_SQUAD_mips,
  920. quantize_and_encode_band_cost_SQUAD_mips,
  921. quantize_and_encode_band_cost_UQUAD_mips,
  922. quantize_and_encode_band_cost_UQUAD_mips,
  923. quantize_and_encode_band_cost_SPAIR_mips,
  924. quantize_and_encode_band_cost_SPAIR_mips,
  925. quantize_and_encode_band_cost_UPAIR7_mips,
  926. quantize_and_encode_band_cost_UPAIR7_mips,
  927. quantize_and_encode_band_cost_UPAIR12_mips,
  928. quantize_and_encode_band_cost_UPAIR12_mips,
  929. quantize_and_encode_band_cost_ESC_mips,
  930. quantize_and_encode_band_cost_NONE_mips, /* cb 12 doesn't exist */
  931. quantize_and_encode_band_cost_ZERO_mips,
  932. quantize_and_encode_band_cost_ZERO_mips,
  933. quantize_and_encode_band_cost_ZERO_mips,
  934. };
  935. #define quantize_and_encode_band_cost( \
  936. s, pb, in, out, scaled, size, scale_idx, cb, \
  937. lambda, uplim, bits, energy, ROUNDING) \
  938. quantize_and_encode_band_cost_arr[cb]( \
  939. s, pb, in, out, scaled, size, scale_idx, cb, \
  940. lambda, uplim, bits, energy, ROUNDING)
  941. static void quantize_and_encode_band_mips(struct AACEncContext *s, PutBitContext *pb,
  942. const float *in, float *out, int size, int scale_idx,
  943. int cb, const float lambda, int rtz)
  944. {
  945. quantize_and_encode_band_cost(s, pb, in, out, NULL, size, scale_idx, cb, lambda,
  946. INFINITY, NULL, NULL, (rtz) ? ROUND_TO_ZERO : ROUND_STANDARD);
  947. }
  948. /**
  949. * Functions developed from template function and optimized for getting the number of bits
  950. */
  951. static float get_band_numbits_ZERO_mips(struct AACEncContext *s,
  952. PutBitContext *pb, const float *in,
  953. const float *scaled, int size, int scale_idx,
  954. int cb, const float lambda, const float uplim,
  955. int *bits)
  956. {
  957. return 0;
  958. }
  959. static float get_band_numbits_NONE_mips(struct AACEncContext *s,
  960. PutBitContext *pb, const float *in,
  961. const float *scaled, int size, int scale_idx,
  962. int cb, const float lambda, const float uplim,
  963. int *bits)
  964. {
  965. av_assert0(0);
  966. return 0;
  967. }
  968. static float get_band_numbits_SQUAD_mips(struct AACEncContext *s,
  969. PutBitContext *pb, const float *in,
  970. const float *scaled, int size, int scale_idx,
  971. int cb, const float lambda, const float uplim,
  972. int *bits)
  973. {
  974. const float Q34 = ff_aac_pow34sf_tab[POW_SF2_ZERO - scale_idx + SCALE_ONE_POS - SCALE_DIV_512];
  975. int i;
  976. int qc1, qc2, qc3, qc4;
  977. int curbits = 0;
  978. uint8_t *p_bits = (uint8_t *)ff_aac_spectral_bits[cb-1];
  979. for (i = 0; i < size; i += 4) {
  980. int curidx;
  981. int *in_int = (int *)&in[i];
  982. int t0, t1, t2, t3, t4, t5, t6, t7;
  983. qc1 = scaled[i ] * Q34 + ROUND_STANDARD;
  984. qc2 = scaled[i+1] * Q34 + ROUND_STANDARD;
  985. qc3 = scaled[i+2] * Q34 + ROUND_STANDARD;
  986. qc4 = scaled[i+3] * Q34 + ROUND_STANDARD;
  987. __asm__ volatile (
  988. ".set push \n\t"
  989. ".set noreorder \n\t"
  990. "slt %[qc1], $zero, %[qc1] \n\t"
  991. "slt %[qc2], $zero, %[qc2] \n\t"
  992. "slt %[qc3], $zero, %[qc3] \n\t"
  993. "slt %[qc4], $zero, %[qc4] \n\t"
  994. "lw %[t0], 0(%[in_int]) \n\t"
  995. "lw %[t1], 4(%[in_int]) \n\t"
  996. "lw %[t2], 8(%[in_int]) \n\t"
  997. "lw %[t3], 12(%[in_int]) \n\t"
  998. "srl %[t0], %[t0], 31 \n\t"
  999. "srl %[t1], %[t1], 31 \n\t"
  1000. "srl %[t2], %[t2], 31 \n\t"
  1001. "srl %[t3], %[t3], 31 \n\t"
  1002. "subu %[t4], $zero, %[qc1] \n\t"
  1003. "subu %[t5], $zero, %[qc2] \n\t"
  1004. "subu %[t6], $zero, %[qc3] \n\t"
  1005. "subu %[t7], $zero, %[qc4] \n\t"
  1006. "movn %[qc1], %[t4], %[t0] \n\t"
  1007. "movn %[qc2], %[t5], %[t1] \n\t"
  1008. "movn %[qc3], %[t6], %[t2] \n\t"
  1009. "movn %[qc4], %[t7], %[t3] \n\t"
  1010. ".set pop \n\t"
  1011. : [qc1]"+r"(qc1), [qc2]"+r"(qc2),
  1012. [qc3]"+r"(qc3), [qc4]"+r"(qc4),
  1013. [t0]"=&r"(t0), [t1]"=&r"(t1), [t2]"=&r"(t2), [t3]"=&r"(t3),
  1014. [t4]"=&r"(t4), [t5]"=&r"(t5), [t6]"=&r"(t6), [t7]"=&r"(t7)
  1015. : [in_int]"r"(in_int)
  1016. : "memory"
  1017. );
  1018. curidx = qc1;
  1019. curidx *= 3;
  1020. curidx += qc2;
  1021. curidx *= 3;
  1022. curidx += qc3;
  1023. curidx *= 3;
  1024. curidx += qc4;
  1025. curidx += 40;
  1026. curbits += p_bits[curidx];
  1027. }
  1028. return curbits;
  1029. }
  1030. static float get_band_numbits_UQUAD_mips(struct AACEncContext *s,
  1031. PutBitContext *pb, const float *in,
  1032. const float *scaled, int size, int scale_idx,
  1033. int cb, const float lambda, const float uplim,
  1034. int *bits)
  1035. {
  1036. const float Q34 = ff_aac_pow34sf_tab[POW_SF2_ZERO - scale_idx + SCALE_ONE_POS - SCALE_DIV_512];
  1037. int i;
  1038. int curbits = 0;
  1039. int qc1, qc2, qc3, qc4;
  1040. uint8_t *p_bits = (uint8_t *)ff_aac_spectral_bits[cb-1];
  1041. for (i = 0; i < size; i += 4) {
  1042. int curidx;
  1043. int t0, t1, t2, t3, t4;
  1044. qc1 = scaled[i ] * Q34 + ROUND_STANDARD;
  1045. qc2 = scaled[i+1] * Q34 + ROUND_STANDARD;
  1046. qc3 = scaled[i+2] * Q34 + ROUND_STANDARD;
  1047. qc4 = scaled[i+3] * Q34 + ROUND_STANDARD;
  1048. __asm__ volatile (
  1049. ".set push \n\t"
  1050. ".set noreorder \n\t"
  1051. "ori %[t4], $zero, 2 \n\t"
  1052. "slt %[t0], %[t4], %[qc1] \n\t"
  1053. "slt %[t1], %[t4], %[qc2] \n\t"
  1054. "slt %[t2], %[t4], %[qc3] \n\t"
  1055. "slt %[t3], %[t4], %[qc4] \n\t"
  1056. "movn %[qc1], %[t4], %[t0] \n\t"
  1057. "movn %[qc2], %[t4], %[t1] \n\t"
  1058. "movn %[qc3], %[t4], %[t2] \n\t"
  1059. "movn %[qc4], %[t4], %[t3] \n\t"
  1060. ".set pop \n\t"
  1061. : [qc1]"+r"(qc1), [qc2]"+r"(qc2),
  1062. [qc3]"+r"(qc3), [qc4]"+r"(qc4),
  1063. [t0]"=&r"(t0), [t1]"=&r"(t1), [t2]"=&r"(t2), [t3]"=&r"(t3),
  1064. [t4]"=&r"(t4)
  1065. );
  1066. curidx = qc1;
  1067. curidx *= 3;
  1068. curidx += qc2;
  1069. curidx *= 3;
  1070. curidx += qc3;
  1071. curidx *= 3;
  1072. curidx += qc4;
  1073. curbits += p_bits[curidx];
  1074. curbits += uquad_sign_bits[curidx];
  1075. }
  1076. return curbits;
  1077. }
  1078. static float get_band_numbits_SPAIR_mips(struct AACEncContext *s,
  1079. PutBitContext *pb, const float *in,
  1080. const float *scaled, int size, int scale_idx,
  1081. int cb, const float lambda, const float uplim,
  1082. int *bits)
  1083. {
  1084. const float Q34 = ff_aac_pow34sf_tab[POW_SF2_ZERO - scale_idx + SCALE_ONE_POS - SCALE_DIV_512];
  1085. int i;
  1086. int qc1, qc2, qc3, qc4;
  1087. int curbits = 0;
  1088. uint8_t *p_bits = (uint8_t*)ff_aac_spectral_bits[cb-1];
  1089. for (i = 0; i < size; i += 4) {
  1090. int curidx, curidx2;
  1091. int *in_int = (int *)&in[i];
  1092. int t0, t1, t2, t3, t4, t5, t6, t7;
  1093. qc1 = scaled[i ] * Q34 + ROUND_STANDARD;
  1094. qc2 = scaled[i+1] * Q34 + ROUND_STANDARD;
  1095. qc3 = scaled[i+2] * Q34 + ROUND_STANDARD;
  1096. qc4 = scaled[i+3] * Q34 + ROUND_STANDARD;
  1097. __asm__ volatile (
  1098. ".set push \n\t"
  1099. ".set noreorder \n\t"
  1100. "ori %[t4], $zero, 4 \n\t"
  1101. "slt %[t0], %[t4], %[qc1] \n\t"
  1102. "slt %[t1], %[t4], %[qc2] \n\t"
  1103. "slt %[t2], %[t4], %[qc3] \n\t"
  1104. "slt %[t3], %[t4], %[qc4] \n\t"
  1105. "movn %[qc1], %[t4], %[t0] \n\t"
  1106. "movn %[qc2], %[t4], %[t1] \n\t"
  1107. "movn %[qc3], %[t4], %[t2] \n\t"
  1108. "movn %[qc4], %[t4], %[t3] \n\t"
  1109. "lw %[t0], 0(%[in_int]) \n\t"
  1110. "lw %[t1], 4(%[in_int]) \n\t"
  1111. "lw %[t2], 8(%[in_int]) \n\t"
  1112. "lw %[t3], 12(%[in_int]) \n\t"
  1113. "srl %[t0], %[t0], 31 \n\t"
  1114. "srl %[t1], %[t1], 31 \n\t"
  1115. "srl %[t2], %[t2], 31 \n\t"
  1116. "srl %[t3], %[t3], 31 \n\t"
  1117. "subu %[t4], $zero, %[qc1] \n\t"
  1118. "subu %[t5], $zero, %[qc2] \n\t"
  1119. "subu %[t6], $zero, %[qc3] \n\t"
  1120. "subu %[t7], $zero, %[qc4] \n\t"
  1121. "movn %[qc1], %[t4], %[t0] \n\t"
  1122. "movn %[qc2], %[t5], %[t1] \n\t"
  1123. "movn %[qc3], %[t6], %[t2] \n\t"
  1124. "movn %[qc4], %[t7], %[t3] \n\t"
  1125. ".set pop \n\t"
  1126. : [qc1]"+r"(qc1), [qc2]"+r"(qc2),
  1127. [qc3]"+r"(qc3), [qc4]"+r"(qc4),
  1128. [t0]"=&r"(t0), [t1]"=&r"(t1), [t2]"=&r"(t2), [t3]"=&r"(t3),
  1129. [t4]"=&r"(t4), [t5]"=&r"(t5), [t6]"=&r"(t6), [t7]"=&r"(t7)
  1130. : [in_int]"r"(in_int)
  1131. : "memory"
  1132. );
  1133. curidx = 9 * qc1;
  1134. curidx += qc2 + 40;
  1135. curidx2 = 9 * qc3;
  1136. curidx2 += qc4 + 40;
  1137. curbits += p_bits[curidx] + p_bits[curidx2];
  1138. }
  1139. return curbits;
  1140. }
  1141. static float get_band_numbits_UPAIR7_mips(struct AACEncContext *s,
  1142. PutBitContext *pb, const float *in,
  1143. const float *scaled, int size, int scale_idx,
  1144. int cb, const float lambda, const float uplim,
  1145. int *bits)
  1146. {
  1147. const float Q34 = ff_aac_pow34sf_tab[POW_SF2_ZERO - scale_idx + SCALE_ONE_POS - SCALE_DIV_512];
  1148. int i;
  1149. int qc1, qc2, qc3, qc4;
  1150. int curbits = 0;
  1151. uint8_t *p_bits = (uint8_t *)ff_aac_spectral_bits[cb-1];
  1152. for (i = 0; i < size; i += 4) {
  1153. int curidx, curidx2;
  1154. int t0, t1, t2, t3, t4;
  1155. qc1 = scaled[i ] * Q34 + ROUND_STANDARD;
  1156. qc2 = scaled[i+1] * Q34 + ROUND_STANDARD;
  1157. qc3 = scaled[i+2] * Q34 + ROUND_STANDARD;
  1158. qc4 = scaled[i+3] * Q34 + ROUND_STANDARD;
  1159. __asm__ volatile (
  1160. ".set push \n\t"
  1161. ".set noreorder \n\t"
  1162. "ori %[t4], $zero, 7 \n\t"
  1163. "slt %[t0], %[t4], %[qc1] \n\t"
  1164. "slt %[t1], %[t4], %[qc2] \n\t"
  1165. "slt %[t2], %[t4], %[qc3] \n\t"
  1166. "slt %[t3], %[t4], %[qc4] \n\t"
  1167. "movn %[qc1], %[t4], %[t0] \n\t"
  1168. "movn %[qc2], %[t4], %[t1] \n\t"
  1169. "movn %[qc3], %[t4], %[t2] \n\t"
  1170. "movn %[qc4], %[t4], %[t3] \n\t"
  1171. ".set pop \n\t"
  1172. : [qc1]"+r"(qc1), [qc2]"+r"(qc2),
  1173. [qc3]"+r"(qc3), [qc4]"+r"(qc4),
  1174. [t0]"=&r"(t0), [t1]"=&r"(t1), [t2]"=&r"(t2), [t3]"=&r"(t3),
  1175. [t4]"=&r"(t4)
  1176. );
  1177. curidx = 8 * qc1;
  1178. curidx += qc2;
  1179. curidx2 = 8 * qc3;
  1180. curidx2 += qc4;
  1181. curbits += p_bits[curidx] +
  1182. upair7_sign_bits[curidx] +
  1183. p_bits[curidx2] +
  1184. upair7_sign_bits[curidx2];
  1185. }
  1186. return curbits;
  1187. }
  1188. static float get_band_numbits_UPAIR12_mips(struct AACEncContext *s,
  1189. PutBitContext *pb, const float *in,
  1190. const float *scaled, int size, int scale_idx,
  1191. int cb, const float lambda, const float uplim,
  1192. int *bits)
  1193. {
  1194. const float Q34 = ff_aac_pow34sf_tab[POW_SF2_ZERO - scale_idx + SCALE_ONE_POS - SCALE_DIV_512];
  1195. int i;
  1196. int qc1, qc2, qc3, qc4;
  1197. int curbits = 0;
  1198. uint8_t *p_bits = (uint8_t *)ff_aac_spectral_bits[cb-1];
  1199. for (i = 0; i < size; i += 4) {
  1200. int curidx, curidx2;
  1201. int t0, t1, t2, t3, t4;
  1202. qc1 = scaled[i ] * Q34 + ROUND_STANDARD;
  1203. qc2 = scaled[i+1] * Q34 + ROUND_STANDARD;
  1204. qc3 = scaled[i+2] * Q34 + ROUND_STANDARD;
  1205. qc4 = scaled[i+3] * Q34 + ROUND_STANDARD;
  1206. __asm__ volatile (
  1207. ".set push \n\t"
  1208. ".set noreorder \n\t"
  1209. "ori %[t4], $zero, 12 \n\t"
  1210. "slt %[t0], %[t4], %[qc1] \n\t"
  1211. "slt %[t1], %[t4], %[qc2] \n\t"
  1212. "slt %[t2], %[t4], %[qc3] \n\t"
  1213. "slt %[t3], %[t4], %[qc4] \n\t"
  1214. "movn %[qc1], %[t4], %[t0] \n\t"
  1215. "movn %[qc2], %[t4], %[t1] \n\t"
  1216. "movn %[qc3], %[t4], %[t2] \n\t"
  1217. "movn %[qc4], %[t4], %[t3] \n\t"
  1218. ".set pop \n\t"
  1219. : [qc1]"+r"(qc1), [qc2]"+r"(qc2),
  1220. [qc3]"+r"(qc3), [qc4]"+r"(qc4),
  1221. [t0]"=&r"(t0), [t1]"=&r"(t1), [t2]"=&r"(t2), [t3]"=&r"(t3),
  1222. [t4]"=&r"(t4)
  1223. );
  1224. curidx = 13 * qc1;
  1225. curidx += qc2;
  1226. curidx2 = 13 * qc3;
  1227. curidx2 += qc4;
  1228. curbits += p_bits[curidx] +
  1229. p_bits[curidx2] +
  1230. upair12_sign_bits[curidx] +
  1231. upair12_sign_bits[curidx2];
  1232. }
  1233. return curbits;
  1234. }
  1235. static float get_band_numbits_ESC_mips(struct AACEncContext *s,
  1236. PutBitContext *pb, const float *in,
  1237. const float *scaled, int size, int scale_idx,
  1238. int cb, const float lambda, const float uplim,
  1239. int *bits)
  1240. {
  1241. const float Q34 = ff_aac_pow34sf_tab[POW_SF2_ZERO - scale_idx + SCALE_ONE_POS - SCALE_DIV_512];
  1242. int i;
  1243. int qc1, qc2, qc3, qc4;
  1244. int curbits = 0;
  1245. uint8_t *p_bits = (uint8_t*)ff_aac_spectral_bits[cb-1];
  1246. for (i = 0; i < size; i += 4) {
  1247. int curidx, curidx2;
  1248. int cond0, cond1, cond2, cond3;
  1249. int c1, c2, c3, c4;
  1250. int t4, t5;
  1251. qc1 = scaled[i ] * Q34 + ROUND_STANDARD;
  1252. qc2 = scaled[i+1] * Q34 + ROUND_STANDARD;
  1253. qc3 = scaled[i+2] * Q34 + ROUND_STANDARD;
  1254. qc4 = scaled[i+3] * Q34 + ROUND_STANDARD;
  1255. __asm__ volatile (
  1256. ".set push \n\t"
  1257. ".set noreorder \n\t"
  1258. "ori %[t4], $zero, 15 \n\t"
  1259. "ori %[t5], $zero, 16 \n\t"
  1260. "shll_s.w %[c1], %[qc1], 18 \n\t"
  1261. "shll_s.w %[c2], %[qc2], 18 \n\t"
  1262. "shll_s.w %[c3], %[qc3], 18 \n\t"
  1263. "shll_s.w %[c4], %[qc4], 18 \n\t"
  1264. "srl %[c1], %[c1], 18 \n\t"
  1265. "srl %[c2], %[c2], 18 \n\t"
  1266. "srl %[c3], %[c3], 18 \n\t"
  1267. "srl %[c4], %[c4], 18 \n\t"
  1268. "slt %[cond0], %[t4], %[qc1] \n\t"
  1269. "slt %[cond1], %[t4], %[qc2] \n\t"
  1270. "slt %[cond2], %[t4], %[qc3] \n\t"
  1271. "slt %[cond3], %[t4], %[qc4] \n\t"
  1272. "movn %[qc1], %[t5], %[cond0] \n\t"
  1273. "movn %[qc2], %[t5], %[cond1] \n\t"
  1274. "movn %[qc3], %[t5], %[cond2] \n\t"
  1275. "movn %[qc4], %[t5], %[cond3] \n\t"
  1276. "ori %[t5], $zero, 31 \n\t"
  1277. "clz %[c1], %[c1] \n\t"
  1278. "clz %[c2], %[c2] \n\t"
  1279. "clz %[c3], %[c3] \n\t"
  1280. "clz %[c4], %[c4] \n\t"
  1281. "subu %[c1], %[t5], %[c1] \n\t"
  1282. "subu %[c2], %[t5], %[c2] \n\t"
  1283. "subu %[c3], %[t5], %[c3] \n\t"
  1284. "subu %[c4], %[t5], %[c4] \n\t"
  1285. "sll %[c1], %[c1], 1 \n\t"
  1286. "sll %[c2], %[c2], 1 \n\t"
  1287. "sll %[c3], %[c3], 1 \n\t"
  1288. "sll %[c4], %[c4], 1 \n\t"
  1289. "addiu %[c1], %[c1], -3 \n\t"
  1290. "addiu %[c2], %[c2], -3 \n\t"
  1291. "addiu %[c3], %[c3], -3 \n\t"
  1292. "addiu %[c4], %[c4], -3 \n\t"
  1293. "subu %[cond0], $zero, %[cond0] \n\t"
  1294. "subu %[cond1], $zero, %[cond1] \n\t"
  1295. "subu %[cond2], $zero, %[cond2] \n\t"
  1296. "subu %[cond3], $zero, %[cond3] \n\t"
  1297. "and %[c1], %[c1], %[cond0] \n\t"
  1298. "and %[c2], %[c2], %[cond1] \n\t"
  1299. "and %[c3], %[c3], %[cond2] \n\t"
  1300. "and %[c4], %[c4], %[cond3] \n\t"
  1301. ".set pop \n\t"
  1302. : [qc1]"+r"(qc1), [qc2]"+r"(qc2),
  1303. [qc3]"+r"(qc3), [qc4]"+r"(qc4),
  1304. [cond0]"=&r"(cond0), [cond1]"=&r"(cond1),
  1305. [cond2]"=&r"(cond2), [cond3]"=&r"(cond3),
  1306. [c1]"=&r"(c1), [c2]"=&r"(c2),
  1307. [c3]"=&r"(c3), [c4]"=&r"(c4),
  1308. [t4]"=&r"(t4), [t5]"=&r"(t5)
  1309. );
  1310. curidx = 17 * qc1;
  1311. curidx += qc2;
  1312. curidx2 = 17 * qc3;
  1313. curidx2 += qc4;
  1314. curbits += p_bits[curidx];
  1315. curbits += esc_sign_bits[curidx];
  1316. curbits += p_bits[curidx2];
  1317. curbits += esc_sign_bits[curidx2];
  1318. curbits += c1;
  1319. curbits += c2;
  1320. curbits += c3;
  1321. curbits += c4;
  1322. }
  1323. return curbits;
  1324. }
  1325. static float (*const get_band_numbits_arr[])(struct AACEncContext *s,
  1326. PutBitContext *pb, const float *in,
  1327. const float *scaled, int size, int scale_idx,
  1328. int cb, const float lambda, const float uplim,
  1329. int *bits) = {
  1330. get_band_numbits_ZERO_mips,
  1331. get_band_numbits_SQUAD_mips,
  1332. get_band_numbits_SQUAD_mips,
  1333. get_band_numbits_UQUAD_mips,
  1334. get_band_numbits_UQUAD_mips,
  1335. get_band_numbits_SPAIR_mips,
  1336. get_band_numbits_SPAIR_mips,
  1337. get_band_numbits_UPAIR7_mips,
  1338. get_band_numbits_UPAIR7_mips,
  1339. get_band_numbits_UPAIR12_mips,
  1340. get_band_numbits_UPAIR12_mips,
  1341. get_band_numbits_ESC_mips,
  1342. get_band_numbits_NONE_mips, /* cb 12 doesn't exist */
  1343. get_band_numbits_ZERO_mips,
  1344. get_band_numbits_ZERO_mips,
  1345. get_band_numbits_ZERO_mips,
  1346. };
  1347. #define get_band_numbits( \
  1348. s, pb, in, scaled, size, scale_idx, cb, \
  1349. lambda, uplim, bits) \
  1350. get_band_numbits_arr[cb]( \
  1351. s, pb, in, scaled, size, scale_idx, cb, \
  1352. lambda, uplim, bits)
  1353. static float quantize_band_cost_bits(struct AACEncContext *s, const float *in,
  1354. const float *scaled, int size, int scale_idx,
  1355. int cb, const float lambda, const float uplim,
  1356. int *bits, float *energy, int rtz)
  1357. {
  1358. return get_band_numbits(s, NULL, in, scaled, size, scale_idx, cb, lambda, uplim, bits);
  1359. }
  1360. /**
  1361. * Functions developed from template function and optimized for getting the band cost
  1362. */
  1363. #if HAVE_MIPSFPU
  1364. static float get_band_cost_ZERO_mips(struct AACEncContext *s,
  1365. PutBitContext *pb, const float *in,
  1366. const float *scaled, int size, int scale_idx,
  1367. int cb, const float lambda, const float uplim,
  1368. int *bits, float *energy)
  1369. {
  1370. int i;
  1371. float cost = 0;
  1372. for (i = 0; i < size; i += 4) {
  1373. cost += in[i ] * in[i ];
  1374. cost += in[i+1] * in[i+1];
  1375. cost += in[i+2] * in[i+2];
  1376. cost += in[i+3] * in[i+3];
  1377. }
  1378. if (bits)
  1379. *bits = 0;
  1380. if (energy)
  1381. *energy = 0.0f;
  1382. return cost * lambda;
  1383. }
  1384. static float get_band_cost_NONE_mips(struct AACEncContext *s,
  1385. PutBitContext *pb, const float *in,
  1386. const float *scaled, int size, int scale_idx,
  1387. int cb, const float lambda, const float uplim,
  1388. int *bits, float *energy)
  1389. {
  1390. av_assert0(0);
  1391. return 0;
  1392. }
  1393. static float get_band_cost_SQUAD_mips(struct AACEncContext *s,
  1394. PutBitContext *pb, const float *in,
  1395. const float *scaled, int size, int scale_idx,
  1396. int cb, const float lambda, const float uplim,
  1397. int *bits, float *energy)
  1398. {
  1399. const float Q34 = ff_aac_pow34sf_tab[POW_SF2_ZERO - scale_idx + SCALE_ONE_POS - SCALE_DIV_512];
  1400. const float IQ = ff_aac_pow2sf_tab [POW_SF2_ZERO + scale_idx - SCALE_ONE_POS + SCALE_DIV_512];
  1401. int i;
  1402. float cost = 0;
  1403. float qenergy = 0.0f;
  1404. int qc1, qc2, qc3, qc4;
  1405. int curbits = 0;
  1406. uint8_t *p_bits = (uint8_t *)ff_aac_spectral_bits[cb-1];
  1407. float *p_codes = (float *)ff_aac_codebook_vectors[cb-1];
  1408. for (i = 0; i < size; i += 4) {
  1409. const float *vec;
  1410. int curidx;
  1411. int *in_int = (int *)&in[i];
  1412. float *in_pos = (float *)&in[i];
  1413. float di0, di1, di2, di3;
  1414. int t0, t1, t2, t3, t4, t5, t6, t7;
  1415. qc1 = scaled[i ] * Q34 + ROUND_STANDARD;
  1416. qc2 = scaled[i+1] * Q34 + ROUND_STANDARD;
  1417. qc3 = scaled[i+2] * Q34 + ROUND_STANDARD;
  1418. qc4 = scaled[i+3] * Q34 + ROUND_STANDARD;
  1419. __asm__ volatile (
  1420. ".set push \n\t"
  1421. ".set noreorder \n\t"
  1422. "slt %[qc1], $zero, %[qc1] \n\t"
  1423. "slt %[qc2], $zero, %[qc2] \n\t"
  1424. "slt %[qc3], $zero, %[qc3] \n\t"
  1425. "slt %[qc4], $zero, %[qc4] \n\t"
  1426. "lw %[t0], 0(%[in_int]) \n\t"
  1427. "lw %[t1], 4(%[in_int]) \n\t"
  1428. "lw %[t2], 8(%[in_int]) \n\t"
  1429. "lw %[t3], 12(%[in_int]) \n\t"
  1430. "srl %[t0], %[t0], 31 \n\t"
  1431. "srl %[t1], %[t1], 31 \n\t"
  1432. "srl %[t2], %[t2], 31 \n\t"
  1433. "srl %[t3], %[t3], 31 \n\t"
  1434. "subu %[t4], $zero, %[qc1] \n\t"
  1435. "subu %[t5], $zero, %[qc2] \n\t"
  1436. "subu %[t6], $zero, %[qc3] \n\t"
  1437. "subu %[t7], $zero, %[qc4] \n\t"
  1438. "movn %[qc1], %[t4], %[t0] \n\t"
  1439. "movn %[qc2], %[t5], %[t1] \n\t"
  1440. "movn %[qc3], %[t6], %[t2] \n\t"
  1441. "movn %[qc4], %[t7], %[t3] \n\t"
  1442. ".set pop \n\t"
  1443. : [qc1]"+r"(qc1), [qc2]"+r"(qc2),
  1444. [qc3]"+r"(qc3), [qc4]"+r"(qc4),
  1445. [t0]"=&r"(t0), [t1]"=&r"(t1), [t2]"=&r"(t2), [t3]"=&r"(t3),
  1446. [t4]"=&r"(t4), [t5]"=&r"(t5), [t6]"=&r"(t6), [t7]"=&r"(t7)
  1447. : [in_int]"r"(in_int)
  1448. : "memory"
  1449. );
  1450. curidx = qc1;
  1451. curidx *= 3;
  1452. curidx += qc2;
  1453. curidx *= 3;
  1454. curidx += qc3;
  1455. curidx *= 3;
  1456. curidx += qc4;
  1457. curidx += 40;
  1458. curbits += p_bits[curidx];
  1459. vec = &p_codes[curidx*4];
  1460. qenergy += vec[0]*vec[0] + vec[1]*vec[1]
  1461. + vec[2]*vec[2] + vec[3]*vec[3];
  1462. __asm__ volatile (
  1463. ".set push \n\t"
  1464. ".set noreorder \n\t"
  1465. "lwc1 $f0, 0(%[in_pos]) \n\t"
  1466. "lwc1 $f1, 0(%[vec]) \n\t"
  1467. "lwc1 $f2, 4(%[in_pos]) \n\t"
  1468. "lwc1 $f3, 4(%[vec]) \n\t"
  1469. "lwc1 $f4, 8(%[in_pos]) \n\t"
  1470. "lwc1 $f5, 8(%[vec]) \n\t"
  1471. "lwc1 $f6, 12(%[in_pos]) \n\t"
  1472. "lwc1 $f7, 12(%[vec]) \n\t"
  1473. "nmsub.s %[di0], $f0, $f1, %[IQ] \n\t"
  1474. "nmsub.s %[di1], $f2, $f3, %[IQ] \n\t"
  1475. "nmsub.s %[di2], $f4, $f5, %[IQ] \n\t"
  1476. "nmsub.s %[di3], $f6, $f7, %[IQ] \n\t"
  1477. ".set pop \n\t"
  1478. : [di0]"=&f"(di0), [di1]"=&f"(di1),
  1479. [di2]"=&f"(di2), [di3]"=&f"(di3)
  1480. : [in_pos]"r"(in_pos), [vec]"r"(vec),
  1481. [IQ]"f"(IQ)
  1482. : "$f0", "$f1", "$f2", "$f3",
  1483. "$f4", "$f5", "$f6", "$f7",
  1484. "memory"
  1485. );
  1486. cost += di0 * di0 + di1 * di1
  1487. + di2 * di2 + di3 * di3;
  1488. }
  1489. if (bits)
  1490. *bits = curbits;
  1491. if (energy)
  1492. *energy = qenergy * (IQ*IQ);
  1493. return cost * lambda + curbits;
  1494. }
  1495. static float get_band_cost_UQUAD_mips(struct AACEncContext *s,
  1496. PutBitContext *pb, const float *in,
  1497. const float *scaled, int size, int scale_idx,
  1498. int cb, const float lambda, const float uplim,
  1499. int *bits, float *energy)
  1500. {
  1501. const float Q34 = ff_aac_pow34sf_tab[POW_SF2_ZERO - scale_idx + SCALE_ONE_POS - SCALE_DIV_512];
  1502. const float IQ = ff_aac_pow2sf_tab [POW_SF2_ZERO + scale_idx - SCALE_ONE_POS + SCALE_DIV_512];
  1503. int i;
  1504. float cost = 0;
  1505. float qenergy = 0.0f;
  1506. int curbits = 0;
  1507. int qc1, qc2, qc3, qc4;
  1508. uint8_t *p_bits = (uint8_t*)ff_aac_spectral_bits[cb-1];
  1509. float *p_codes = (float *)ff_aac_codebook_vectors[cb-1];
  1510. for (i = 0; i < size; i += 4) {
  1511. const float *vec;
  1512. int curidx;
  1513. float *in_pos = (float *)&in[i];
  1514. float di0, di1, di2, di3;
  1515. int t0, t1, t2, t3, t4;
  1516. qc1 = scaled[i ] * Q34 + ROUND_STANDARD;
  1517. qc2 = scaled[i+1] * Q34 + ROUND_STANDARD;
  1518. qc3 = scaled[i+2] * Q34 + ROUND_STANDARD;
  1519. qc4 = scaled[i+3] * Q34 + ROUND_STANDARD;
  1520. __asm__ volatile (
  1521. ".set push \n\t"
  1522. ".set noreorder \n\t"
  1523. "ori %[t4], $zero, 2 \n\t"
  1524. "slt %[t0], %[t4], %[qc1] \n\t"
  1525. "slt %[t1], %[t4], %[qc2] \n\t"
  1526. "slt %[t2], %[t4], %[qc3] \n\t"
  1527. "slt %[t3], %[t4], %[qc4] \n\t"
  1528. "movn %[qc1], %[t4], %[t0] \n\t"
  1529. "movn %[qc2], %[t4], %[t1] \n\t"
  1530. "movn %[qc3], %[t4], %[t2] \n\t"
  1531. "movn %[qc4], %[t4], %[t3] \n\t"
  1532. ".set pop \n\t"
  1533. : [qc1]"+r"(qc1), [qc2]"+r"(qc2),
  1534. [qc3]"+r"(qc3), [qc4]"+r"(qc4),
  1535. [t0]"=&r"(t0), [t1]"=&r"(t1), [t2]"=&r"(t2), [t3]"=&r"(t3),
  1536. [t4]"=&r"(t4)
  1537. );
  1538. curidx = qc1;
  1539. curidx *= 3;
  1540. curidx += qc2;
  1541. curidx *= 3;
  1542. curidx += qc3;
  1543. curidx *= 3;
  1544. curidx += qc4;
  1545. curbits += p_bits[curidx];
  1546. curbits += uquad_sign_bits[curidx];
  1547. vec = &p_codes[curidx*4];
  1548. qenergy += vec[0]*vec[0] + vec[1]*vec[1]
  1549. + vec[2]*vec[2] + vec[3]*vec[3];
  1550. __asm__ volatile (
  1551. ".set push \n\t"
  1552. ".set noreorder \n\t"
  1553. "lwc1 %[di0], 0(%[in_pos]) \n\t"
  1554. "lwc1 %[di1], 4(%[in_pos]) \n\t"
  1555. "lwc1 %[di2], 8(%[in_pos]) \n\t"
  1556. "lwc1 %[di3], 12(%[in_pos]) \n\t"
  1557. "abs.s %[di0], %[di0] \n\t"
  1558. "abs.s %[di1], %[di1] \n\t"
  1559. "abs.s %[di2], %[di2] \n\t"
  1560. "abs.s %[di3], %[di3] \n\t"
  1561. "lwc1 $f0, 0(%[vec]) \n\t"
  1562. "lwc1 $f1, 4(%[vec]) \n\t"
  1563. "lwc1 $f2, 8(%[vec]) \n\t"
  1564. "lwc1 $f3, 12(%[vec]) \n\t"
  1565. "nmsub.s %[di0], %[di0], $f0, %[IQ] \n\t"
  1566. "nmsub.s %[di1], %[di1], $f1, %[IQ] \n\t"
  1567. "nmsub.s %[di2], %[di2], $f2, %[IQ] \n\t"
  1568. "nmsub.s %[di3], %[di3], $f3, %[IQ] \n\t"
  1569. ".set pop \n\t"
  1570. : [di0]"=&f"(di0), [di1]"=&f"(di1),
  1571. [di2]"=&f"(di2), [di3]"=&f"(di3)
  1572. : [in_pos]"r"(in_pos), [vec]"r"(vec),
  1573. [IQ]"f"(IQ)
  1574. : "$f0", "$f1", "$f2", "$f3",
  1575. "memory"
  1576. );
  1577. cost += di0 * di0 + di1 * di1
  1578. + di2 * di2 + di3 * di3;
  1579. }
  1580. if (bits)
  1581. *bits = curbits;
  1582. if (energy)
  1583. *energy = qenergy * (IQ*IQ);
  1584. return cost * lambda + curbits;
  1585. }
  1586. static float get_band_cost_SPAIR_mips(struct AACEncContext *s,
  1587. PutBitContext *pb, const float *in,
  1588. const float *scaled, int size, int scale_idx,
  1589. int cb, const float lambda, const float uplim,
  1590. int *bits, float *energy)
  1591. {
  1592. const float Q34 = ff_aac_pow34sf_tab[POW_SF2_ZERO - scale_idx + SCALE_ONE_POS - SCALE_DIV_512];
  1593. const float IQ = ff_aac_pow2sf_tab [POW_SF2_ZERO + scale_idx - SCALE_ONE_POS + SCALE_DIV_512];
  1594. int i;
  1595. float cost = 0;
  1596. float qenergy = 0.0f;
  1597. int qc1, qc2, qc3, qc4;
  1598. int curbits = 0;
  1599. uint8_t *p_bits = (uint8_t *)ff_aac_spectral_bits[cb-1];
  1600. float *p_codes = (float *)ff_aac_codebook_vectors[cb-1];
  1601. for (i = 0; i < size; i += 4) {
  1602. const float *vec, *vec2;
  1603. int curidx, curidx2;
  1604. int *in_int = (int *)&in[i];
  1605. float *in_pos = (float *)&in[i];
  1606. float di0, di1, di2, di3;
  1607. int t0, t1, t2, t3, t4, t5, t6, t7;
  1608. qc1 = scaled[i ] * Q34 + ROUND_STANDARD;
  1609. qc2 = scaled[i+1] * Q34 + ROUND_STANDARD;
  1610. qc3 = scaled[i+2] * Q34 + ROUND_STANDARD;
  1611. qc4 = scaled[i+3] * Q34 + ROUND_STANDARD;
  1612. __asm__ volatile (
  1613. ".set push \n\t"
  1614. ".set noreorder \n\t"
  1615. "ori %[t4], $zero, 4 \n\t"
  1616. "slt %[t0], %[t4], %[qc1] \n\t"
  1617. "slt %[t1], %[t4], %[qc2] \n\t"
  1618. "slt %[t2], %[t4], %[qc3] \n\t"
  1619. "slt %[t3], %[t4], %[qc4] \n\t"
  1620. "movn %[qc1], %[t4], %[t0] \n\t"
  1621. "movn %[qc2], %[t4], %[t1] \n\t"
  1622. "movn %[qc3], %[t4], %[t2] \n\t"
  1623. "movn %[qc4], %[t4], %[t3] \n\t"
  1624. "lw %[t0], 0(%[in_int]) \n\t"
  1625. "lw %[t1], 4(%[in_int]) \n\t"
  1626. "lw %[t2], 8(%[in_int]) \n\t"
  1627. "lw %[t3], 12(%[in_int]) \n\t"
  1628. "srl %[t0], %[t0], 31 \n\t"
  1629. "srl %[t1], %[t1], 31 \n\t"
  1630. "srl %[t2], %[t2], 31 \n\t"
  1631. "srl %[t3], %[t3], 31 \n\t"
  1632. "subu %[t4], $zero, %[qc1] \n\t"
  1633. "subu %[t5], $zero, %[qc2] \n\t"
  1634. "subu %[t6], $zero, %[qc3] \n\t"
  1635. "subu %[t7], $zero, %[qc4] \n\t"
  1636. "movn %[qc1], %[t4], %[t0] \n\t"
  1637. "movn %[qc2], %[t5], %[t1] \n\t"
  1638. "movn %[qc3], %[t6], %[t2] \n\t"
  1639. "movn %[qc4], %[t7], %[t3] \n\t"
  1640. ".set pop \n\t"
  1641. : [qc1]"+r"(qc1), [qc2]"+r"(qc2),
  1642. [qc3]"+r"(qc3), [qc4]"+r"(qc4),
  1643. [t0]"=&r"(t0), [t1]"=&r"(t1), [t2]"=&r"(t2), [t3]"=&r"(t3),
  1644. [t4]"=&r"(t4), [t5]"=&r"(t5), [t6]"=&r"(t6), [t7]"=&r"(t7)
  1645. : [in_int]"r"(in_int)
  1646. : "memory"
  1647. );
  1648. curidx = 9 * qc1;
  1649. curidx += qc2 + 40;
  1650. curidx2 = 9 * qc3;
  1651. curidx2 += qc4 + 40;
  1652. curbits += p_bits[curidx];
  1653. curbits += p_bits[curidx2];
  1654. vec = &p_codes[curidx*2];
  1655. vec2 = &p_codes[curidx2*2];
  1656. qenergy += vec[0]*vec[0] + vec[1]*vec[1]
  1657. + vec2[0]*vec2[0] + vec2[1]*vec2[1];
  1658. __asm__ volatile (
  1659. ".set push \n\t"
  1660. ".set noreorder \n\t"
  1661. "lwc1 $f0, 0(%[in_pos]) \n\t"
  1662. "lwc1 $f1, 0(%[vec]) \n\t"
  1663. "lwc1 $f2, 4(%[in_pos]) \n\t"
  1664. "lwc1 $f3, 4(%[vec]) \n\t"
  1665. "lwc1 $f4, 8(%[in_pos]) \n\t"
  1666. "lwc1 $f5, 0(%[vec2]) \n\t"
  1667. "lwc1 $f6, 12(%[in_pos]) \n\t"
  1668. "lwc1 $f7, 4(%[vec2]) \n\t"
  1669. "nmsub.s %[di0], $f0, $f1, %[IQ] \n\t"
  1670. "nmsub.s %[di1], $f2, $f3, %[IQ] \n\t"
  1671. "nmsub.s %[di2], $f4, $f5, %[IQ] \n\t"
  1672. "nmsub.s %[di3], $f6, $f7, %[IQ] \n\t"
  1673. ".set pop \n\t"
  1674. : [di0]"=&f"(di0), [di1]"=&f"(di1),
  1675. [di2]"=&f"(di2), [di3]"=&f"(di3)
  1676. : [in_pos]"r"(in_pos), [vec]"r"(vec),
  1677. [vec2]"r"(vec2), [IQ]"f"(IQ)
  1678. : "$f0", "$f1", "$f2", "$f3",
  1679. "$f4", "$f5", "$f6", "$f7",
  1680. "memory"
  1681. );
  1682. cost += di0 * di0 + di1 * di1
  1683. + di2 * di2 + di3 * di3;
  1684. }
  1685. if (bits)
  1686. *bits = curbits;
  1687. if (energy)
  1688. *energy = qenergy * (IQ*IQ);
  1689. return cost * lambda + curbits;
  1690. }
  1691. static float get_band_cost_UPAIR7_mips(struct AACEncContext *s,
  1692. PutBitContext *pb, const float *in,
  1693. const float *scaled, int size, int scale_idx,
  1694. int cb, const float lambda, const float uplim,
  1695. int *bits, float *energy)
  1696. {
  1697. const float Q34 = ff_aac_pow34sf_tab[POW_SF2_ZERO - scale_idx + SCALE_ONE_POS - SCALE_DIV_512];
  1698. const float IQ = ff_aac_pow2sf_tab [POW_SF2_ZERO + scale_idx - SCALE_ONE_POS + SCALE_DIV_512];
  1699. int i;
  1700. float cost = 0;
  1701. float qenergy = 0.0f;
  1702. int qc1, qc2, qc3, qc4;
  1703. int curbits = 0;
  1704. uint8_t *p_bits = (uint8_t *)ff_aac_spectral_bits[cb-1];
  1705. float *p_codes = (float *)ff_aac_codebook_vectors[cb-1];
  1706. for (i = 0; i < size; i += 4) {
  1707. const float *vec, *vec2;
  1708. int curidx, curidx2, sign1, count1, sign2, count2;
  1709. int *in_int = (int *)&in[i];
  1710. float *in_pos = (float *)&in[i];
  1711. float di0, di1, di2, di3;
  1712. int t0, t1, t2, t3, t4;
  1713. qc1 = scaled[i ] * Q34 + ROUND_STANDARD;
  1714. qc2 = scaled[i+1] * Q34 + ROUND_STANDARD;
  1715. qc3 = scaled[i+2] * Q34 + ROUND_STANDARD;
  1716. qc4 = scaled[i+3] * Q34 + ROUND_STANDARD;
  1717. __asm__ volatile (
  1718. ".set push \n\t"
  1719. ".set noreorder \n\t"
  1720. "ori %[t4], $zero, 7 \n\t"
  1721. "ori %[sign1], $zero, 0 \n\t"
  1722. "ori %[sign2], $zero, 0 \n\t"
  1723. "slt %[t0], %[t4], %[qc1] \n\t"
  1724. "slt %[t1], %[t4], %[qc2] \n\t"
  1725. "slt %[t2], %[t4], %[qc3] \n\t"
  1726. "slt %[t3], %[t4], %[qc4] \n\t"
  1727. "movn %[qc1], %[t4], %[t0] \n\t"
  1728. "movn %[qc2], %[t4], %[t1] \n\t"
  1729. "movn %[qc3], %[t4], %[t2] \n\t"
  1730. "movn %[qc4], %[t4], %[t3] \n\t"
  1731. "lw %[t0], 0(%[in_int]) \n\t"
  1732. "lw %[t1], 4(%[in_int]) \n\t"
  1733. "lw %[t2], 8(%[in_int]) \n\t"
  1734. "lw %[t3], 12(%[in_int]) \n\t"
  1735. "slt %[t0], %[t0], $zero \n\t"
  1736. "movn %[sign1], %[t0], %[qc1] \n\t"
  1737. "slt %[t2], %[t2], $zero \n\t"
  1738. "movn %[sign2], %[t2], %[qc3] \n\t"
  1739. "slt %[t1], %[t1], $zero \n\t"
  1740. "sll %[t0], %[sign1], 1 \n\t"
  1741. "or %[t0], %[t0], %[t1] \n\t"
  1742. "movn %[sign1], %[t0], %[qc2] \n\t"
  1743. "slt %[t3], %[t3], $zero \n\t"
  1744. "sll %[t0], %[sign2], 1 \n\t"
  1745. "or %[t0], %[t0], %[t3] \n\t"
  1746. "movn %[sign2], %[t0], %[qc4] \n\t"
  1747. "slt %[count1], $zero, %[qc1] \n\t"
  1748. "slt %[t1], $zero, %[qc2] \n\t"
  1749. "slt %[count2], $zero, %[qc3] \n\t"
  1750. "slt %[t2], $zero, %[qc4] \n\t"
  1751. "addu %[count1], %[count1], %[t1] \n\t"
  1752. "addu %[count2], %[count2], %[t2] \n\t"
  1753. ".set pop \n\t"
  1754. : [qc1]"+r"(qc1), [qc2]"+r"(qc2),
  1755. [qc3]"+r"(qc3), [qc4]"+r"(qc4),
  1756. [sign1]"=&r"(sign1), [count1]"=&r"(count1),
  1757. [sign2]"=&r"(sign2), [count2]"=&r"(count2),
  1758. [t0]"=&r"(t0), [t1]"=&r"(t1), [t2]"=&r"(t2), [t3]"=&r"(t3),
  1759. [t4]"=&r"(t4)
  1760. : [in_int]"r"(in_int)
  1761. : "memory"
  1762. );
  1763. curidx = 8 * qc1;
  1764. curidx += qc2;
  1765. curidx2 = 8 * qc3;
  1766. curidx2 += qc4;
  1767. curbits += p_bits[curidx];
  1768. curbits += upair7_sign_bits[curidx];
  1769. vec = &p_codes[curidx*2];
  1770. curbits += p_bits[curidx2];
  1771. curbits += upair7_sign_bits[curidx2];
  1772. vec2 = &p_codes[curidx2*2];
  1773. qenergy += vec[0]*vec[0] + vec[1]*vec[1]
  1774. + vec2[0]*vec2[0] + vec2[1]*vec2[1];
  1775. __asm__ volatile (
  1776. ".set push \n\t"
  1777. ".set noreorder \n\t"
  1778. "lwc1 %[di0], 0(%[in_pos]) \n\t"
  1779. "lwc1 %[di1], 4(%[in_pos]) \n\t"
  1780. "lwc1 %[di2], 8(%[in_pos]) \n\t"
  1781. "lwc1 %[di3], 12(%[in_pos]) \n\t"
  1782. "abs.s %[di0], %[di0] \n\t"
  1783. "abs.s %[di1], %[di1] \n\t"
  1784. "abs.s %[di2], %[di2] \n\t"
  1785. "abs.s %[di3], %[di3] \n\t"
  1786. "lwc1 $f0, 0(%[vec]) \n\t"
  1787. "lwc1 $f1, 4(%[vec]) \n\t"
  1788. "lwc1 $f2, 0(%[vec2]) \n\t"
  1789. "lwc1 $f3, 4(%[vec2]) \n\t"
  1790. "nmsub.s %[di0], %[di0], $f0, %[IQ] \n\t"
  1791. "nmsub.s %[di1], %[di1], $f1, %[IQ] \n\t"
  1792. "nmsub.s %[di2], %[di2], $f2, %[IQ] \n\t"
  1793. "nmsub.s %[di3], %[di3], $f3, %[IQ] \n\t"
  1794. ".set pop \n\t"
  1795. : [di0]"=&f"(di0), [di1]"=&f"(di1),
  1796. [di2]"=&f"(di2), [di3]"=&f"(di3)
  1797. : [in_pos]"r"(in_pos), [vec]"r"(vec),
  1798. [vec2]"r"(vec2), [IQ]"f"(IQ)
  1799. : "$f0", "$f1", "$f2", "$f3",
  1800. "memory"
  1801. );
  1802. cost += di0 * di0 + di1 * di1
  1803. + di2 * di2 + di3 * di3;
  1804. }
  1805. if (bits)
  1806. *bits = curbits;
  1807. if (energy)
  1808. *energy = qenergy * (IQ*IQ);
  1809. return cost * lambda + curbits;
  1810. }
  1811. static float get_band_cost_UPAIR12_mips(struct AACEncContext *s,
  1812. PutBitContext *pb, const float *in,
  1813. const float *scaled, int size, int scale_idx,
  1814. int cb, const float lambda, const float uplim,
  1815. int *bits, float *energy)
  1816. {
  1817. const float Q34 = ff_aac_pow34sf_tab[POW_SF2_ZERO - scale_idx + SCALE_ONE_POS - SCALE_DIV_512];
  1818. const float IQ = ff_aac_pow2sf_tab [POW_SF2_ZERO + scale_idx - SCALE_ONE_POS + SCALE_DIV_512];
  1819. int i;
  1820. float cost = 0;
  1821. float qenergy = 0.0f;
  1822. int qc1, qc2, qc3, qc4;
  1823. int curbits = 0;
  1824. uint8_t *p_bits = (uint8_t *)ff_aac_spectral_bits[cb-1];
  1825. float *p_codes = (float *)ff_aac_codebook_vectors[cb-1];
  1826. for (i = 0; i < size; i += 4) {
  1827. const float *vec, *vec2;
  1828. int curidx, curidx2;
  1829. int sign1, count1, sign2, count2;
  1830. int *in_int = (int *)&in[i];
  1831. float *in_pos = (float *)&in[i];
  1832. float di0, di1, di2, di3;
  1833. int t0, t1, t2, t3, t4;
  1834. qc1 = scaled[i ] * Q34 + ROUND_STANDARD;
  1835. qc2 = scaled[i+1] * Q34 + ROUND_STANDARD;
  1836. qc3 = scaled[i+2] * Q34 + ROUND_STANDARD;
  1837. qc4 = scaled[i+3] * Q34 + ROUND_STANDARD;
  1838. __asm__ volatile (
  1839. ".set push \n\t"
  1840. ".set noreorder \n\t"
  1841. "ori %[t4], $zero, 12 \n\t"
  1842. "ori %[sign1], $zero, 0 \n\t"
  1843. "ori %[sign2], $zero, 0 \n\t"
  1844. "slt %[t0], %[t4], %[qc1] \n\t"
  1845. "slt %[t1], %[t4], %[qc2] \n\t"
  1846. "slt %[t2], %[t4], %[qc3] \n\t"
  1847. "slt %[t3], %[t4], %[qc4] \n\t"
  1848. "movn %[qc1], %[t4], %[t0] \n\t"
  1849. "movn %[qc2], %[t4], %[t1] \n\t"
  1850. "movn %[qc3], %[t4], %[t2] \n\t"
  1851. "movn %[qc4], %[t4], %[t3] \n\t"
  1852. "lw %[t0], 0(%[in_int]) \n\t"
  1853. "lw %[t1], 4(%[in_int]) \n\t"
  1854. "lw %[t2], 8(%[in_int]) \n\t"
  1855. "lw %[t3], 12(%[in_int]) \n\t"
  1856. "slt %[t0], %[t0], $zero \n\t"
  1857. "movn %[sign1], %[t0], %[qc1] \n\t"
  1858. "slt %[t2], %[t2], $zero \n\t"
  1859. "movn %[sign2], %[t2], %[qc3] \n\t"
  1860. "slt %[t1], %[t1], $zero \n\t"
  1861. "sll %[t0], %[sign1], 1 \n\t"
  1862. "or %[t0], %[t0], %[t1] \n\t"
  1863. "movn %[sign1], %[t0], %[qc2] \n\t"
  1864. "slt %[t3], %[t3], $zero \n\t"
  1865. "sll %[t0], %[sign2], 1 \n\t"
  1866. "or %[t0], %[t0], %[t3] \n\t"
  1867. "movn %[sign2], %[t0], %[qc4] \n\t"
  1868. "slt %[count1], $zero, %[qc1] \n\t"
  1869. "slt %[t1], $zero, %[qc2] \n\t"
  1870. "slt %[count2], $zero, %[qc3] \n\t"
  1871. "slt %[t2], $zero, %[qc4] \n\t"
  1872. "addu %[count1], %[count1], %[t1] \n\t"
  1873. "addu %[count2], %[count2], %[t2] \n\t"
  1874. ".set pop \n\t"
  1875. : [qc1]"+r"(qc1), [qc2]"+r"(qc2),
  1876. [qc3]"+r"(qc3), [qc4]"+r"(qc4),
  1877. [sign1]"=&r"(sign1), [count1]"=&r"(count1),
  1878. [sign2]"=&r"(sign2), [count2]"=&r"(count2),
  1879. [t0]"=&r"(t0), [t1]"=&r"(t1), [t2]"=&r"(t2), [t3]"=&r"(t3),
  1880. [t4]"=&r"(t4)
  1881. : [in_int]"r"(in_int)
  1882. : "memory"
  1883. );
  1884. curidx = 13 * qc1;
  1885. curidx += qc2;
  1886. curidx2 = 13 * qc3;
  1887. curidx2 += qc4;
  1888. curbits += p_bits[curidx];
  1889. curbits += p_bits[curidx2];
  1890. curbits += upair12_sign_bits[curidx];
  1891. curbits += upair12_sign_bits[curidx2];
  1892. vec = &p_codes[curidx*2];
  1893. vec2 = &p_codes[curidx2*2];
  1894. qenergy += vec[0]*vec[0] + vec[1]*vec[1]
  1895. + vec2[0]*vec2[0] + vec2[1]*vec2[1];
  1896. __asm__ volatile (
  1897. ".set push \n\t"
  1898. ".set noreorder \n\t"
  1899. "lwc1 %[di0], 0(%[in_pos]) \n\t"
  1900. "lwc1 %[di1], 4(%[in_pos]) \n\t"
  1901. "lwc1 %[di2], 8(%[in_pos]) \n\t"
  1902. "lwc1 %[di3], 12(%[in_pos]) \n\t"
  1903. "abs.s %[di0], %[di0] \n\t"
  1904. "abs.s %[di1], %[di1] \n\t"
  1905. "abs.s %[di2], %[di2] \n\t"
  1906. "abs.s %[di3], %[di3] \n\t"
  1907. "lwc1 $f0, 0(%[vec]) \n\t"
  1908. "lwc1 $f1, 4(%[vec]) \n\t"
  1909. "lwc1 $f2, 0(%[vec2]) \n\t"
  1910. "lwc1 $f3, 4(%[vec2]) \n\t"
  1911. "nmsub.s %[di0], %[di0], $f0, %[IQ] \n\t"
  1912. "nmsub.s %[di1], %[di1], $f1, %[IQ] \n\t"
  1913. "nmsub.s %[di2], %[di2], $f2, %[IQ] \n\t"
  1914. "nmsub.s %[di3], %[di3], $f3, %[IQ] \n\t"
  1915. ".set pop \n\t"
  1916. : [di0]"=&f"(di0), [di1]"=&f"(di1),
  1917. [di2]"=&f"(di2), [di3]"=&f"(di3)
  1918. : [in_pos]"r"(in_pos), [vec]"r"(vec),
  1919. [vec2]"r"(vec2), [IQ]"f"(IQ)
  1920. : "$f0", "$f1", "$f2", "$f3",
  1921. "memory"
  1922. );
  1923. cost += di0 * di0 + di1 * di1
  1924. + di2 * di2 + di3 * di3;
  1925. }
  1926. if (bits)
  1927. *bits = curbits;
  1928. if (energy)
  1929. *energy = qenergy * (IQ*IQ);
  1930. return cost * lambda + curbits;
  1931. }
  1932. static float get_band_cost_ESC_mips(struct AACEncContext *s,
  1933. PutBitContext *pb, const float *in,
  1934. const float *scaled, int size, int scale_idx,
  1935. int cb, const float lambda, const float uplim,
  1936. int *bits, float *energy)
  1937. {
  1938. const float Q34 = ff_aac_pow34sf_tab[POW_SF2_ZERO - scale_idx + SCALE_ONE_POS - SCALE_DIV_512];
  1939. const float IQ = ff_aac_pow2sf_tab [POW_SF2_ZERO + scale_idx - SCALE_ONE_POS + SCALE_DIV_512];
  1940. const float CLIPPED_ESCAPE = 165140.0f * IQ;
  1941. int i;
  1942. float cost = 0;
  1943. float qenergy = 0.0f;
  1944. int qc1, qc2, qc3, qc4;
  1945. int curbits = 0;
  1946. uint8_t *p_bits = (uint8_t*)ff_aac_spectral_bits[cb-1];
  1947. float *p_codes = (float* )ff_aac_codebook_vectors[cb-1];
  1948. for (i = 0; i < size; i += 4) {
  1949. const float *vec, *vec2;
  1950. int curidx, curidx2;
  1951. float t1, t2, t3, t4, V;
  1952. float di1, di2, di3, di4;
  1953. int cond0, cond1, cond2, cond3;
  1954. int c1, c2, c3, c4;
  1955. int t6, t7;
  1956. qc1 = scaled[i ] * Q34 + ROUND_STANDARD;
  1957. qc2 = scaled[i+1] * Q34 + ROUND_STANDARD;
  1958. qc3 = scaled[i+2] * Q34 + ROUND_STANDARD;
  1959. qc4 = scaled[i+3] * Q34 + ROUND_STANDARD;
  1960. __asm__ volatile (
  1961. ".set push \n\t"
  1962. ".set noreorder \n\t"
  1963. "ori %[t6], $zero, 15 \n\t"
  1964. "ori %[t7], $zero, 16 \n\t"
  1965. "shll_s.w %[c1], %[qc1], 18 \n\t"
  1966. "shll_s.w %[c2], %[qc2], 18 \n\t"
  1967. "shll_s.w %[c3], %[qc3], 18 \n\t"
  1968. "shll_s.w %[c4], %[qc4], 18 \n\t"
  1969. "srl %[c1], %[c1], 18 \n\t"
  1970. "srl %[c2], %[c2], 18 \n\t"
  1971. "srl %[c3], %[c3], 18 \n\t"
  1972. "srl %[c4], %[c4], 18 \n\t"
  1973. "slt %[cond0], %[t6], %[qc1] \n\t"
  1974. "slt %[cond1], %[t6], %[qc2] \n\t"
  1975. "slt %[cond2], %[t6], %[qc3] \n\t"
  1976. "slt %[cond3], %[t6], %[qc4] \n\t"
  1977. "movn %[qc1], %[t7], %[cond0] \n\t"
  1978. "movn %[qc2], %[t7], %[cond1] \n\t"
  1979. "movn %[qc3], %[t7], %[cond2] \n\t"
  1980. "movn %[qc4], %[t7], %[cond3] \n\t"
  1981. ".set pop \n\t"
  1982. : [qc1]"+r"(qc1), [qc2]"+r"(qc2),
  1983. [qc3]"+r"(qc3), [qc4]"+r"(qc4),
  1984. [cond0]"=&r"(cond0), [cond1]"=&r"(cond1),
  1985. [cond2]"=&r"(cond2), [cond3]"=&r"(cond3),
  1986. [c1]"=&r"(c1), [c2]"=&r"(c2),
  1987. [c3]"=&r"(c3), [c4]"=&r"(c4),
  1988. [t6]"=&r"(t6), [t7]"=&r"(t7)
  1989. );
  1990. curidx = 17 * qc1;
  1991. curidx += qc2;
  1992. curidx2 = 17 * qc3;
  1993. curidx2 += qc4;
  1994. curbits += p_bits[curidx];
  1995. curbits += esc_sign_bits[curidx];
  1996. vec = &p_codes[curidx*2];
  1997. curbits += p_bits[curidx2];
  1998. curbits += esc_sign_bits[curidx2];
  1999. vec2 = &p_codes[curidx2*2];
  2000. curbits += (av_log2(c1) * 2 - 3) & (-cond0);
  2001. curbits += (av_log2(c2) * 2 - 3) & (-cond1);
  2002. curbits += (av_log2(c3) * 2 - 3) & (-cond2);
  2003. curbits += (av_log2(c4) * 2 - 3) & (-cond3);
  2004. t1 = fabsf(in[i ]);
  2005. t2 = fabsf(in[i+1]);
  2006. t3 = fabsf(in[i+2]);
  2007. t4 = fabsf(in[i+3]);
  2008. if (cond0) {
  2009. if (t1 >= CLIPPED_ESCAPE) {
  2010. di1 = t1 - CLIPPED_ESCAPE;
  2011. qenergy += CLIPPED_ESCAPE*CLIPPED_ESCAPE;
  2012. } else {
  2013. di1 = t1 - (V = c1 * cbrtf(c1) * IQ);
  2014. qenergy += V*V;
  2015. }
  2016. } else {
  2017. di1 = t1 - (V = vec[0] * IQ);
  2018. qenergy += V*V;
  2019. }
  2020. if (cond1) {
  2021. if (t2 >= CLIPPED_ESCAPE) {
  2022. di2 = t2 - CLIPPED_ESCAPE;
  2023. qenergy += CLIPPED_ESCAPE*CLIPPED_ESCAPE;
  2024. } else {
  2025. di2 = t2 - (V = c2 * cbrtf(c2) * IQ);
  2026. qenergy += V*V;
  2027. }
  2028. } else {
  2029. di2 = t2 - (V = vec[1] * IQ);
  2030. qenergy += V*V;
  2031. }
  2032. if (cond2) {
  2033. if (t3 >= CLIPPED_ESCAPE) {
  2034. di3 = t3 - CLIPPED_ESCAPE;
  2035. qenergy += CLIPPED_ESCAPE*CLIPPED_ESCAPE;
  2036. } else {
  2037. di3 = t3 - (V = c3 * cbrtf(c3) * IQ);
  2038. qenergy += V*V;
  2039. }
  2040. } else {
  2041. di3 = t3 - (V = vec2[0] * IQ);
  2042. qenergy += V*V;
  2043. }
  2044. if (cond3) {
  2045. if (t4 >= CLIPPED_ESCAPE) {
  2046. di4 = t4 - CLIPPED_ESCAPE;
  2047. qenergy += CLIPPED_ESCAPE*CLIPPED_ESCAPE;
  2048. } else {
  2049. di4 = t4 - (V = c4 * cbrtf(c4) * IQ);
  2050. qenergy += V*V;
  2051. }
  2052. } else {
  2053. di4 = t4 - (V = vec2[1]*IQ);
  2054. qenergy += V*V;
  2055. }
  2056. cost += di1 * di1 + di2 * di2
  2057. + di3 * di3 + di4 * di4;
  2058. }
  2059. if (bits)
  2060. *bits = curbits;
  2061. return cost * lambda + curbits;
  2062. }
  2063. static float (*const get_band_cost_arr[])(struct AACEncContext *s,
  2064. PutBitContext *pb, const float *in,
  2065. const float *scaled, int size, int scale_idx,
  2066. int cb, const float lambda, const float uplim,
  2067. int *bits, float *energy) = {
  2068. get_band_cost_ZERO_mips,
  2069. get_band_cost_SQUAD_mips,
  2070. get_band_cost_SQUAD_mips,
  2071. get_band_cost_UQUAD_mips,
  2072. get_band_cost_UQUAD_mips,
  2073. get_band_cost_SPAIR_mips,
  2074. get_band_cost_SPAIR_mips,
  2075. get_band_cost_UPAIR7_mips,
  2076. get_band_cost_UPAIR7_mips,
  2077. get_band_cost_UPAIR12_mips,
  2078. get_band_cost_UPAIR12_mips,
  2079. get_band_cost_ESC_mips,
  2080. get_band_cost_NONE_mips, /* cb 12 doesn't exist */
  2081. get_band_cost_ZERO_mips,
  2082. get_band_cost_ZERO_mips,
  2083. get_band_cost_ZERO_mips,
  2084. };
  2085. #define get_band_cost( \
  2086. s, pb, in, scaled, size, scale_idx, cb, \
  2087. lambda, uplim, bits, energy) \
  2088. get_band_cost_arr[cb]( \
  2089. s, pb, in, scaled, size, scale_idx, cb, \
  2090. lambda, uplim, bits, energy)
  2091. static float quantize_band_cost(struct AACEncContext *s, const float *in,
  2092. const float *scaled, int size, int scale_idx,
  2093. int cb, const float lambda, const float uplim,
  2094. int *bits, float *energy, int rtz)
  2095. {
  2096. return get_band_cost(s, NULL, in, scaled, size, scale_idx, cb, lambda, uplim, bits, energy);
  2097. }
  2098. #include "libavcodec/aacenc_quantization_misc.h"
  2099. static float find_form_factor(int group_len, int swb_size, float thresh, const float *scaled, float nzslope) {
  2100. const float iswb_size = 1.0f / swb_size;
  2101. const float iswb_sizem1 = 1.0f / (swb_size - 1);
  2102. const float ethresh = thresh, iethresh = 1.0f / ethresh;
  2103. float form = 0.0f, weight = 0.0f;
  2104. int w2, i;
  2105. for (w2 = 0; w2 < group_len; w2++) {
  2106. float e = 0.0f, e2 = 0.0f, var = 0.0f, maxval = 0.0f;
  2107. float nzl = 0;
  2108. for (i = 0; i < swb_size; i+=4) {
  2109. float s1 = fabsf(scaled[w2*128+i ]);
  2110. float s2 = fabsf(scaled[w2*128+i+1]);
  2111. float s3 = fabsf(scaled[w2*128+i+2]);
  2112. float s4 = fabsf(scaled[w2*128+i+3]);
  2113. maxval = FFMAX(maxval, FFMAX(FFMAX(s1, s2), FFMAX(s3, s4)));
  2114. e += (s1+s2)+(s3+s4);
  2115. s1 *= s1;
  2116. s2 *= s2;
  2117. s3 *= s3;
  2118. s4 *= s4;
  2119. e2 += (s1+s2)+(s3+s4);
  2120. /* We really don't want a hard non-zero-line count, since
  2121. * even below-threshold lines do add up towards band spectral power.
  2122. * So, fall steeply towards zero, but smoothly
  2123. */
  2124. if (s1 >= ethresh) {
  2125. nzl += 1.0f;
  2126. } else {
  2127. nzl += powf(s1 * iethresh, nzslope);
  2128. }
  2129. if (s2 >= ethresh) {
  2130. nzl += 1.0f;
  2131. } else {
  2132. nzl += powf(s2 * iethresh, nzslope);
  2133. }
  2134. if (s3 >= ethresh) {
  2135. nzl += 1.0f;
  2136. } else {
  2137. nzl += powf(s3 * iethresh, nzslope);
  2138. }
  2139. if (s4 >= ethresh) {
  2140. nzl += 1.0f;
  2141. } else {
  2142. nzl += powf(s4 * iethresh, nzslope);
  2143. }
  2144. }
  2145. if (e2 > thresh) {
  2146. float frm;
  2147. e *= iswb_size;
  2148. /** compute variance */
  2149. for (i = 0; i < swb_size; i++) {
  2150. float d = fabsf(scaled[w2*128+i]) - e;
  2151. var += d*d;
  2152. }
  2153. var = sqrtf(var * iswb_sizem1);
  2154. e2 *= iswb_size;
  2155. frm = e / FFMIN(e+4*var,maxval);
  2156. form += e2 * sqrtf(frm) / FFMAX(0.5f,nzl);
  2157. weight += e2;
  2158. }
  2159. }
  2160. if (weight > 0) {
  2161. return form / weight;
  2162. } else {
  2163. return 1.0f;
  2164. }
  2165. }
  2166. #include "libavcodec/aaccoder_twoloop.h"
  2167. static void search_for_ms_mips(AACEncContext *s, ChannelElement *cpe)
  2168. {
  2169. int start = 0, i, w, w2, g;
  2170. float M[128], S[128];
  2171. float *L34 = s->scoefs, *R34 = s->scoefs + 128, *M34 = s->scoefs + 128*2, *S34 = s->scoefs + 128*3;
  2172. const float lambda = s->lambda;
  2173. SingleChannelElement *sce0 = &cpe->ch[0];
  2174. SingleChannelElement *sce1 = &cpe->ch[1];
  2175. if (!cpe->common_window)
  2176. return;
  2177. for (w = 0; w < sce0->ics.num_windows; w += sce0->ics.group_len[w]) {
  2178. start = 0;
  2179. for (g = 0; g < sce0->ics.num_swb; g++) {
  2180. if (!cpe->ch[0].zeroes[w*16+g] && !cpe->ch[1].zeroes[w*16+g]) {
  2181. float dist1 = 0.0f, dist2 = 0.0f;
  2182. for (w2 = 0; w2 < sce0->ics.group_len[w]; w2++) {
  2183. FFPsyBand *band0 = &s->psy.ch[s->cur_channel+0].psy_bands[(w+w2)*16+g];
  2184. FFPsyBand *band1 = &s->psy.ch[s->cur_channel+1].psy_bands[(w+w2)*16+g];
  2185. float minthr = FFMIN(band0->threshold, band1->threshold);
  2186. float maxthr = FFMAX(band0->threshold, band1->threshold);
  2187. for (i = 0; i < sce0->ics.swb_sizes[g]; i+=4) {
  2188. M[i ] = (sce0->coeffs[start+w2*128+i ]
  2189. + sce1->coeffs[start+w2*128+i ]) * 0.5;
  2190. M[i+1] = (sce0->coeffs[start+w2*128+i+1]
  2191. + sce1->coeffs[start+w2*128+i+1]) * 0.5;
  2192. M[i+2] = (sce0->coeffs[start+w2*128+i+2]
  2193. + sce1->coeffs[start+w2*128+i+2]) * 0.5;
  2194. M[i+3] = (sce0->coeffs[start+w2*128+i+3]
  2195. + sce1->coeffs[start+w2*128+i+3]) * 0.5;
  2196. S[i ] = M[i ]
  2197. - sce1->coeffs[start+w2*128+i ];
  2198. S[i+1] = M[i+1]
  2199. - sce1->coeffs[start+w2*128+i+1];
  2200. S[i+2] = M[i+2]
  2201. - sce1->coeffs[start+w2*128+i+2];
  2202. S[i+3] = M[i+3]
  2203. - sce1->coeffs[start+w2*128+i+3];
  2204. }
  2205. abs_pow34_v(L34, sce0->coeffs+start+(w+w2)*128, sce0->ics.swb_sizes[g]);
  2206. abs_pow34_v(R34, sce1->coeffs+start+(w+w2)*128, sce0->ics.swb_sizes[g]);
  2207. abs_pow34_v(M34, M, sce0->ics.swb_sizes[g]);
  2208. abs_pow34_v(S34, S, sce0->ics.swb_sizes[g]);
  2209. dist1 += quantize_band_cost(s, &sce0->coeffs[start + (w+w2)*128],
  2210. L34,
  2211. sce0->ics.swb_sizes[g],
  2212. sce0->sf_idx[(w+w2)*16+g],
  2213. sce0->band_type[(w+w2)*16+g],
  2214. lambda / band0->threshold, INFINITY, NULL, NULL, 0);
  2215. dist1 += quantize_band_cost(s, &sce1->coeffs[start + (w+w2)*128],
  2216. R34,
  2217. sce1->ics.swb_sizes[g],
  2218. sce1->sf_idx[(w+w2)*16+g],
  2219. sce1->band_type[(w+w2)*16+g],
  2220. lambda / band1->threshold, INFINITY, NULL, NULL, 0);
  2221. dist2 += quantize_band_cost(s, M,
  2222. M34,
  2223. sce0->ics.swb_sizes[g],
  2224. sce0->sf_idx[(w+w2)*16+g],
  2225. sce0->band_type[(w+w2)*16+g],
  2226. lambda / maxthr, INFINITY, NULL, NULL, 0);
  2227. dist2 += quantize_band_cost(s, S,
  2228. S34,
  2229. sce1->ics.swb_sizes[g],
  2230. sce1->sf_idx[(w+w2)*16+g],
  2231. sce1->band_type[(w+w2)*16+g],
  2232. lambda / minthr, INFINITY, NULL, NULL, 0);
  2233. }
  2234. cpe->ms_mask[w*16+g] = dist2 < dist1;
  2235. }
  2236. start += sce0->ics.swb_sizes[g];
  2237. }
  2238. }
  2239. }
  2240. #endif /*HAVE_MIPSFPU */
  2241. #include "libavcodec/aaccoder_trellis.h"
  2242. #endif /* HAVE_INLINE_ASM */
  2243. void ff_aac_coder_init_mips(AACEncContext *c) {
  2244. #if 0 // HAVE_INLINE_ASM
  2245. AACCoefficientsEncoder *e = c->coder;
  2246. int option = c->options.aac_coder;
  2247. if (option == 2) {
  2248. e->quantize_and_encode_band = quantize_and_encode_band_mips;
  2249. e->encode_window_bands_info = codebook_trellis_rate;
  2250. #if HAVE_MIPSFPU
  2251. e->search_for_quantizers = search_for_quantizers_twoloop;
  2252. #endif /* HAVE_MIPSFPU */
  2253. }
  2254. #if HAVE_MIPSFPU
  2255. e->search_for_ms = search_for_ms_mips;
  2256. #endif /* HAVE_MIPSFPU */
  2257. #endif /* HAVE_INLINE_ASM */
  2258. }