You can not select more than 25 topics Topics must start with a letter or number, can include dashes ('-') and can be up to 35 characters long.

1060 lines
40KB

  1. /*
  2. * AAC coefficients encoder
  3. * Copyright (C) 2008-2009 Konstantin Shishkov
  4. *
  5. * This file is part of FFmpeg.
  6. *
  7. * FFmpeg is free software; you can redistribute it and/or
  8. * modify it under the terms of the GNU Lesser General Public
  9. * License as published by the Free Software Foundation; either
  10. * version 2.1 of the License, or (at your option) any later version.
  11. *
  12. * FFmpeg is distributed in the hope that it will be useful,
  13. * but WITHOUT ANY WARRANTY; without even the implied warranty of
  14. * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
  15. * Lesser General Public License for more details.
  16. *
  17. * You should have received a copy of the GNU Lesser General Public
  18. * License along with FFmpeg; if not, write to the Free Software
  19. * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
  20. */
  21. /**
  22. * @file
  23. * AAC coefficients encoder
  24. */
  25. /***********************************
  26. * TODOs:
  27. * speedup quantizer selection
  28. * add sane pulse detection
  29. ***********************************/
  30. #include <float.h>
  31. #include "avcodec.h"
  32. #include "put_bits.h"
  33. #include "aac.h"
  34. #include "aacenc.h"
  35. #include "aactab.h"
  36. /** bits needed to code codebook run value for long windows */
  37. static const uint8_t run_value_bits_long[64] = {
  38. 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5,
  39. 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 10,
  40. 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10,
  41. 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 15
  42. };
  43. /** bits needed to code codebook run value for short windows */
  44. static const uint8_t run_value_bits_short[16] = {
  45. 3, 3, 3, 3, 3, 3, 3, 6, 6, 6, 6, 6, 6, 6, 6, 9
  46. };
  47. static const uint8_t *run_value_bits[2] = {
  48. run_value_bits_long, run_value_bits_short
  49. };
  50. /**
  51. * Quantize one coefficient.
  52. * @return absolute value of the quantized coefficient
  53. * @see 3GPP TS26.403 5.6.2 "Scalefactor determination"
  54. */
  55. static av_always_inline int quant(float coef, const float Q)
  56. {
  57. float a = coef * Q;
  58. return sqrtf(a * sqrtf(a)) + 0.4054;
  59. }
  60. static void quantize_bands(int *out, const float *in, const float *scaled,
  61. int size, float Q34, int is_signed, int maxval)
  62. {
  63. int i;
  64. double qc;
  65. for (i = 0; i < size; i++) {
  66. qc = scaled[i] * Q34;
  67. out[i] = (int)FFMIN(qc + 0.4054, (double)maxval);
  68. if (is_signed && in[i] < 0.0f) {
  69. out[i] = -out[i];
  70. }
  71. }
  72. }
  73. static void abs_pow34_v(float *out, const float *in, const int size)
  74. {
  75. #ifndef USE_REALLY_FULL_SEARCH
  76. int i;
  77. for (i = 0; i < size; i++) {
  78. float a = fabsf(in[i]);
  79. out[i] = sqrtf(a * sqrtf(a));
  80. }
  81. #endif /* USE_REALLY_FULL_SEARCH */
  82. }
  83. static const uint8_t aac_cb_range [12] = {0, 3, 3, 3, 3, 9, 9, 8, 8, 13, 13, 17};
  84. static const uint8_t aac_cb_maxval[12] = {0, 1, 1, 2, 2, 4, 4, 7, 7, 12, 12, 16};
  85. /**
  86. * Calculate rate distortion cost for quantizing with given codebook
  87. *
  88. * @return quantization distortion
  89. */
  90. static float quantize_and_encode_band_cost(struct AACEncContext *s,
  91. PutBitContext *pb, const float *in,
  92. const float *scaled, int size, int scale_idx,
  93. int cb, const float lambda, const float uplim,
  94. int *bits)
  95. {
  96. const float IQ = ff_aac_pow2sf_tab[200 + scale_idx - SCALE_ONE_POS + SCALE_DIV_512];
  97. const float Q = ff_aac_pow2sf_tab[200 - scale_idx + SCALE_ONE_POS - SCALE_DIV_512];
  98. const float CLIPPED_ESCAPE = 165140.0f*IQ;
  99. int i, j, k;
  100. float cost = 0;
  101. const int dim = cb < FIRST_PAIR_BT ? 4 : 2;
  102. int resbits = 0;
  103. const float Q34 = sqrtf(Q * sqrtf(Q));
  104. const int range = aac_cb_range[cb];
  105. const int maxval = aac_cb_maxval[cb];
  106. int off;
  107. if (!cb) {
  108. for (i = 0; i < size; i++)
  109. cost += in[i]*in[i];
  110. if (bits)
  111. *bits = 0;
  112. return cost * lambda;
  113. }
  114. if (!scaled) {
  115. abs_pow34_v(s->scoefs, in, size);
  116. scaled = s->scoefs;
  117. }
  118. quantize_bands(s->qcoefs, in, scaled, size, Q34, !IS_CODEBOOK_UNSIGNED(cb), maxval);
  119. if (IS_CODEBOOK_UNSIGNED(cb)) {
  120. off = 0;
  121. } else {
  122. off = maxval;
  123. }
  124. for (i = 0; i < size; i += dim) {
  125. const float *vec;
  126. int *quants = s->qcoefs + i;
  127. int curidx = 0;
  128. int curbits;
  129. float rd = 0.0f;
  130. for (j = 0; j < dim; j++) {
  131. curidx *= range;
  132. curidx += quants[j] + off;
  133. }
  134. curbits = ff_aac_spectral_bits[cb-1][curidx];
  135. vec = &ff_aac_codebook_vectors[cb-1][curidx*dim];
  136. if (IS_CODEBOOK_UNSIGNED(cb)) {
  137. for (k = 0; k < dim; k++) {
  138. float t = fabsf(in[i+k]);
  139. float di;
  140. if (vec[k] == 64.0f) { //FIXME: slow
  141. if (t >= CLIPPED_ESCAPE) {
  142. di = t - CLIPPED_ESCAPE;
  143. curbits += 21;
  144. } else {
  145. int c = av_clip(quant(t, Q), 0, 8191);
  146. di = t - c*cbrtf(c)*IQ;
  147. curbits += av_log2(c)*2 - 4 + 1;
  148. }
  149. } else {
  150. di = t - vec[k]*IQ;
  151. }
  152. if (vec[k] != 0.0f)
  153. curbits++;
  154. rd += di*di;
  155. }
  156. } else {
  157. for (k = 0; k < dim; k++) {
  158. float di = in[i+k] - vec[k]*IQ;
  159. rd += di*di;
  160. }
  161. }
  162. cost += rd * lambda + curbits;
  163. resbits += curbits;
  164. if (cost >= uplim)
  165. return uplim;
  166. if (pb) {
  167. put_bits(pb, ff_aac_spectral_bits[cb-1][curidx], ff_aac_spectral_codes[cb-1][curidx]);
  168. if (IS_CODEBOOK_UNSIGNED(cb))
  169. for (j = 0; j < dim; j++)
  170. if (ff_aac_codebook_vectors[cb-1][curidx*dim+j] != 0.0f)
  171. put_bits(pb, 1, in[i+j] < 0.0f);
  172. if (cb == ESC_BT) {
  173. for (j = 0; j < 2; j++) {
  174. if (ff_aac_codebook_vectors[cb-1][curidx*2+j] == 64.0f) {
  175. int coef = av_clip(quant(fabsf(in[i+j]), Q), 0, 8191);
  176. int len = av_log2(coef);
  177. put_bits(pb, len - 4 + 1, (1 << (len - 4 + 1)) - 2);
  178. put_bits(pb, len, coef & ((1 << len) - 1));
  179. }
  180. }
  181. }
  182. }
  183. }
  184. if (bits)
  185. *bits = resbits;
  186. return cost;
  187. }
  188. static float quantize_band_cost(struct AACEncContext *s, const float *in,
  189. const float *scaled, int size, int scale_idx,
  190. int cb, const float lambda, const float uplim,
  191. int *bits)
  192. {
  193. return quantize_and_encode_band_cost(s, NULL, in, scaled, size, scale_idx,
  194. cb, lambda, uplim, bits);
  195. }
  196. static void quantize_and_encode_band(struct AACEncContext *s, PutBitContext *pb,
  197. const float *in, int size, int scale_idx,
  198. int cb, const float lambda)
  199. {
  200. quantize_and_encode_band_cost(s, pb, in, NULL, size, scale_idx, cb, lambda,
  201. INFINITY, NULL);
  202. }
  203. static float find_max_val(int group_len, int swb_size, const float *scaled) {
  204. float maxval = 0.0f;
  205. int w2, i;
  206. for (w2 = 0; w2 < group_len; w2++) {
  207. for (i = 0; i < swb_size; i++) {
  208. maxval = FFMAX(maxval, scaled[w2*128+i]);
  209. }
  210. }
  211. return maxval;
  212. }
  213. static int find_min_book(float maxval, int sf) {
  214. float Q = ff_aac_pow2sf_tab[200 - sf + SCALE_ONE_POS - SCALE_DIV_512];
  215. float Q34 = sqrtf(Q * sqrtf(Q));
  216. int qmaxval, cb;
  217. qmaxval = maxval * Q34 + 0.4054f;
  218. if (qmaxval == 0) cb = 0;
  219. else if (qmaxval == 1) cb = 1;
  220. else if (qmaxval == 2) cb = 3;
  221. else if (qmaxval <= 4) cb = 5;
  222. else if (qmaxval <= 7) cb = 7;
  223. else if (qmaxval <= 12) cb = 9;
  224. else cb = 11;
  225. return cb;
  226. }
  227. /**
  228. * structure used in optimal codebook search
  229. */
  230. typedef struct BandCodingPath {
  231. int prev_idx; ///< pointer to the previous path point
  232. float cost; ///< path cost
  233. int run;
  234. } BandCodingPath;
  235. /**
  236. * Encode band info for single window group bands.
  237. */
  238. static void encode_window_bands_info(AACEncContext *s, SingleChannelElement *sce,
  239. int win, int group_len, const float lambda)
  240. {
  241. BandCodingPath path[120][12];
  242. int w, swb, cb, start, start2, size;
  243. int i, j;
  244. const int max_sfb = sce->ics.max_sfb;
  245. const int run_bits = sce->ics.num_windows == 1 ? 5 : 3;
  246. const int run_esc = (1 << run_bits) - 1;
  247. int idx, ppos, count;
  248. int stackrun[120], stackcb[120], stack_len;
  249. float next_minrd = INFINITY;
  250. int next_mincb = 0;
  251. abs_pow34_v(s->scoefs, sce->coeffs, 1024);
  252. start = win*128;
  253. for (cb = 0; cb < 12; cb++) {
  254. path[0][cb].cost = 0.0f;
  255. path[0][cb].prev_idx = -1;
  256. path[0][cb].run = 0;
  257. }
  258. for (swb = 0; swb < max_sfb; swb++) {
  259. start2 = start;
  260. size = sce->ics.swb_sizes[swb];
  261. if (sce->zeroes[win*16 + swb]) {
  262. for (cb = 0; cb < 12; cb++) {
  263. path[swb+1][cb].prev_idx = cb;
  264. path[swb+1][cb].cost = path[swb][cb].cost;
  265. path[swb+1][cb].run = path[swb][cb].run + 1;
  266. }
  267. } else {
  268. float minrd = next_minrd;
  269. int mincb = next_mincb;
  270. next_minrd = INFINITY;
  271. next_mincb = 0;
  272. for (cb = 0; cb < 12; cb++) {
  273. float cost_stay_here, cost_get_here;
  274. float rd = 0.0f;
  275. for (w = 0; w < group_len; w++) {
  276. FFPsyBand *band = &s->psy.psy_bands[s->cur_channel*PSY_MAX_BANDS+(win+w)*16+swb];
  277. rd += quantize_band_cost(s, sce->coeffs + start + w*128,
  278. s->scoefs + start + w*128, size,
  279. sce->sf_idx[(win+w)*16+swb], cb,
  280. lambda / band->threshold, INFINITY, NULL);
  281. }
  282. cost_stay_here = path[swb][cb].cost + rd;
  283. cost_get_here = minrd + rd + run_bits + 4;
  284. if ( run_value_bits[sce->ics.num_windows == 8][path[swb][cb].run]
  285. != run_value_bits[sce->ics.num_windows == 8][path[swb][cb].run+1])
  286. cost_stay_here += run_bits;
  287. if (cost_get_here < cost_stay_here) {
  288. path[swb+1][cb].prev_idx = mincb;
  289. path[swb+1][cb].cost = cost_get_here;
  290. path[swb+1][cb].run = 1;
  291. } else {
  292. path[swb+1][cb].prev_idx = cb;
  293. path[swb+1][cb].cost = cost_stay_here;
  294. path[swb+1][cb].run = path[swb][cb].run + 1;
  295. }
  296. if (path[swb+1][cb].cost < next_minrd) {
  297. next_minrd = path[swb+1][cb].cost;
  298. next_mincb = cb;
  299. }
  300. }
  301. }
  302. start += sce->ics.swb_sizes[swb];
  303. }
  304. //convert resulting path from backward-linked list
  305. stack_len = 0;
  306. idx = 0;
  307. for (cb = 1; cb < 12; cb++)
  308. if (path[max_sfb][cb].cost < path[max_sfb][idx].cost)
  309. idx = cb;
  310. ppos = max_sfb;
  311. while (ppos > 0) {
  312. cb = idx;
  313. stackrun[stack_len] = path[ppos][cb].run;
  314. stackcb [stack_len] = cb;
  315. idx = path[ppos-path[ppos][cb].run+1][cb].prev_idx;
  316. ppos -= path[ppos][cb].run;
  317. stack_len++;
  318. }
  319. //perform actual band info encoding
  320. start = 0;
  321. for (i = stack_len - 1; i >= 0; i--) {
  322. put_bits(&s->pb, 4, stackcb[i]);
  323. count = stackrun[i];
  324. memset(sce->zeroes + win*16 + start, !stackcb[i], count);
  325. //XXX: memset when band_type is also uint8_t
  326. for (j = 0; j < count; j++) {
  327. sce->band_type[win*16 + start] = stackcb[i];
  328. start++;
  329. }
  330. while (count >= run_esc) {
  331. put_bits(&s->pb, run_bits, run_esc);
  332. count -= run_esc;
  333. }
  334. put_bits(&s->pb, run_bits, count);
  335. }
  336. }
  337. static void codebook_trellis_rate(AACEncContext *s, SingleChannelElement *sce,
  338. int win, int group_len, const float lambda)
  339. {
  340. BandCodingPath path[120][12];
  341. int w, swb, cb, start, start2, size;
  342. int i, j;
  343. const int max_sfb = sce->ics.max_sfb;
  344. const int run_bits = sce->ics.num_windows == 1 ? 5 : 3;
  345. const int run_esc = (1 << run_bits) - 1;
  346. int idx, ppos, count;
  347. int stackrun[120], stackcb[120], stack_len;
  348. float next_minrd = INFINITY;
  349. int next_mincb = 0;
  350. abs_pow34_v(s->scoefs, sce->coeffs, 1024);
  351. start = win*128;
  352. for (cb = 0; cb < 12; cb++) {
  353. path[0][cb].cost = run_bits+4;
  354. path[0][cb].prev_idx = -1;
  355. path[0][cb].run = 0;
  356. }
  357. for (swb = 0; swb < max_sfb; swb++) {
  358. start2 = start;
  359. size = sce->ics.swb_sizes[swb];
  360. if (sce->zeroes[win*16 + swb]) {
  361. for (cb = 0; cb < 12; cb++) {
  362. path[swb+1][cb].prev_idx = cb;
  363. path[swb+1][cb].cost = path[swb][cb].cost;
  364. path[swb+1][cb].run = path[swb][cb].run + 1;
  365. }
  366. } else {
  367. float minrd = next_minrd;
  368. int mincb = next_mincb;
  369. int startcb = sce->band_type[win*16+swb];
  370. next_minrd = INFINITY;
  371. next_mincb = 0;
  372. for (cb = 0; cb < startcb; cb++) {
  373. path[swb+1][cb].cost = 61450;
  374. path[swb+1][cb].prev_idx = -1;
  375. path[swb+1][cb].run = 0;
  376. }
  377. for (cb = startcb; cb < 12; cb++) {
  378. float cost_stay_here, cost_get_here;
  379. float rd = 0.0f;
  380. for (w = 0; w < group_len; w++) {
  381. rd += quantize_band_cost(s, sce->coeffs + start + w*128,
  382. s->scoefs + start + w*128, size,
  383. sce->sf_idx[(win+w)*16+swb], cb,
  384. 0, INFINITY, NULL);
  385. }
  386. cost_stay_here = path[swb][cb].cost + rd;
  387. cost_get_here = minrd + rd + run_bits + 4;
  388. if ( run_value_bits[sce->ics.num_windows == 8][path[swb][cb].run]
  389. != run_value_bits[sce->ics.num_windows == 8][path[swb][cb].run+1])
  390. cost_stay_here += run_bits;
  391. if (cost_get_here < cost_stay_here) {
  392. path[swb+1][cb].prev_idx = mincb;
  393. path[swb+1][cb].cost = cost_get_here;
  394. path[swb+1][cb].run = 1;
  395. } else {
  396. path[swb+1][cb].prev_idx = cb;
  397. path[swb+1][cb].cost = cost_stay_here;
  398. path[swb+1][cb].run = path[swb][cb].run + 1;
  399. }
  400. if (path[swb+1][cb].cost < next_minrd) {
  401. next_minrd = path[swb+1][cb].cost;
  402. next_mincb = cb;
  403. }
  404. }
  405. }
  406. start += sce->ics.swb_sizes[swb];
  407. }
  408. //convert resulting path from backward-linked list
  409. stack_len = 0;
  410. idx = 0;
  411. for (cb = 1; cb < 12; cb++)
  412. if (path[max_sfb][cb].cost < path[max_sfb][idx].cost)
  413. idx = cb;
  414. ppos = max_sfb;
  415. while (ppos > 0) {
  416. if (idx < 0) abort();
  417. cb = idx;
  418. stackrun[stack_len] = path[ppos][cb].run;
  419. stackcb [stack_len] = cb;
  420. idx = path[ppos-path[ppos][cb].run+1][cb].prev_idx;
  421. ppos -= path[ppos][cb].run;
  422. stack_len++;
  423. }
  424. //perform actual band info encoding
  425. start = 0;
  426. for (i = stack_len - 1; i >= 0; i--) {
  427. put_bits(&s->pb, 4, stackcb[i]);
  428. count = stackrun[i];
  429. memset(sce->zeroes + win*16 + start, !stackcb[i], count);
  430. //XXX: memset when band_type is also uint8_t
  431. for (j = 0; j < count; j++) {
  432. sce->band_type[win*16 + start] = stackcb[i];
  433. start++;
  434. }
  435. while (count >= run_esc) {
  436. put_bits(&s->pb, run_bits, run_esc);
  437. count -= run_esc;
  438. }
  439. put_bits(&s->pb, run_bits, count);
  440. }
  441. }
  442. typedef struct TrellisPath {
  443. float cost;
  444. int prev;
  445. } TrellisPath;
  446. #define TRELLIS_STAGES 121
  447. #define TRELLIS_STATES (SCALE_MAX_DIFF+1)
  448. static void search_for_quantizers_anmr(AVCodecContext *avctx, AACEncContext *s,
  449. SingleChannelElement *sce,
  450. const float lambda)
  451. {
  452. int q, w, w2, g, start = 0;
  453. int i, j;
  454. int idx;
  455. TrellisPath paths[TRELLIS_STAGES][TRELLIS_STATES];
  456. int bandaddr[TRELLIS_STAGES];
  457. int minq;
  458. float mincost;
  459. float q0f = FLT_MAX, q1f = 0.0f, qnrgf = 0.0f;
  460. int q0, q1, qcnt = 0;
  461. for (i = 0; i < 1024; i++) {
  462. float t = fabsf(sce->coeffs[i]);
  463. if (t > 0.0f) {
  464. q0f = FFMIN(q0f, t);
  465. q1f = FFMAX(q1f, t);
  466. qnrgf += t*t;
  467. qcnt++;
  468. }
  469. }
  470. if (!qcnt) {
  471. memset(sce->sf_idx, 0, sizeof(sce->sf_idx));
  472. memset(sce->zeroes, 1, sizeof(sce->zeroes));
  473. return;
  474. }
  475. //minimum scalefactor index is when minimum nonzero coefficient after quantizing is not clipped
  476. q0 = av_clip_uint8(log2(q0f)*4 - 69 + SCALE_ONE_POS - SCALE_DIV_512);
  477. //maximum scalefactor index is when maximum coefficient after quantizing is still not zero
  478. q1 = av_clip_uint8(log2(q1f)*4 + 6 + SCALE_ONE_POS - SCALE_DIV_512);
  479. //av_log(NULL, AV_LOG_ERROR, "q0 %d, q1 %d\n", q0, q1);
  480. if (q1 - q0 > 60) {
  481. int q0low = q0;
  482. int q1high = q1;
  483. //minimum scalefactor index is when maximum nonzero coefficient after quantizing is not clipped
  484. int qnrg = av_clip_uint8(log2(sqrt(qnrgf/qcnt))*4 - 31 + SCALE_ONE_POS - SCALE_DIV_512);
  485. q1 = qnrg + 30;
  486. q0 = qnrg - 30;
  487. //av_log(NULL, AV_LOG_ERROR, "q0 %d, q1 %d\n", q0, q1);
  488. if (q0 < q0low) {
  489. q1 += q0low - q0;
  490. q0 = q0low;
  491. } else if (q1 > q1high) {
  492. q0 -= q1 - q1high;
  493. q1 = q1high;
  494. }
  495. }
  496. //av_log(NULL, AV_LOG_ERROR, "q0 %d, q1 %d\n", q0, q1);
  497. for (i = 0; i < TRELLIS_STATES; i++) {
  498. paths[0][i].cost = 0.0f;
  499. paths[0][i].prev = -1;
  500. }
  501. for (j = 1; j < TRELLIS_STAGES; j++) {
  502. for (i = 0; i < TRELLIS_STATES; i++) {
  503. paths[j][i].cost = INFINITY;
  504. paths[j][i].prev = -2;
  505. }
  506. }
  507. idx = 1;
  508. abs_pow34_v(s->scoefs, sce->coeffs, 1024);
  509. for (w = 0; w < sce->ics.num_windows; w += sce->ics.group_len[w]) {
  510. start = w*128;
  511. for (g = 0; g < sce->ics.num_swb; g++) {
  512. const float *coefs = sce->coeffs + start;
  513. float qmin, qmax;
  514. int nz = 0;
  515. bandaddr[idx] = w * 16 + g;
  516. qmin = INT_MAX;
  517. qmax = 0.0f;
  518. for (w2 = 0; w2 < sce->ics.group_len[w]; w2++) {
  519. FFPsyBand *band = &s->psy.psy_bands[s->cur_channel*PSY_MAX_BANDS+(w+w2)*16+g];
  520. if (band->energy <= band->threshold || band->threshold == 0.0f) {
  521. sce->zeroes[(w+w2)*16+g] = 1;
  522. continue;
  523. }
  524. sce->zeroes[(w+w2)*16+g] = 0;
  525. nz = 1;
  526. for (i = 0; i < sce->ics.swb_sizes[g]; i++) {
  527. float t = fabsf(coefs[w2*128+i]);
  528. if (t > 0.0f)
  529. qmin = FFMIN(qmin, t);
  530. qmax = FFMAX(qmax, t);
  531. }
  532. }
  533. if (nz) {
  534. int minscale, maxscale;
  535. float minrd = INFINITY;
  536. //minimum scalefactor index is when minimum nonzero coefficient after quantizing is not clipped
  537. minscale = av_clip_uint8(log2(qmin)*4 - 69 + SCALE_ONE_POS - SCALE_DIV_512);
  538. //maximum scalefactor index is when maximum coefficient after quantizing is still not zero
  539. maxscale = av_clip_uint8(log2(qmax)*4 + 6 + SCALE_ONE_POS - SCALE_DIV_512);
  540. minscale = av_clip(minscale - q0, 0, TRELLIS_STATES - 1);
  541. maxscale = av_clip(maxscale - q0, 0, TRELLIS_STATES);
  542. float maxval = find_max_val(sce->ics.group_len[w], sce->ics.swb_sizes[g], s->scoefs+start);
  543. for (q = minscale; q < maxscale; q++) {
  544. float dist = 0;
  545. int cb = find_min_book(maxval, sce->sf_idx[w*16+g]);
  546. for (w2 = 0; w2 < sce->ics.group_len[w]; w2++) {
  547. FFPsyBand *band = &s->psy.psy_bands[s->cur_channel*PSY_MAX_BANDS+(w+w2)*16+g];
  548. dist += quantize_band_cost(s, coefs + w2*128, s->scoefs + start + w2*128, sce->ics.swb_sizes[g],
  549. q + q0, cb, lambda / band->threshold, INFINITY, NULL);
  550. }
  551. minrd = FFMIN(minrd, dist);
  552. for (i = 0; i < q1 - q0; i++) {
  553. float cost;
  554. cost = paths[idx - 1][i].cost + dist
  555. + ff_aac_scalefactor_bits[q - i + SCALE_DIFF_ZERO];
  556. if (cost < paths[idx][q].cost) {
  557. paths[idx][q].cost = cost;
  558. paths[idx][q].prev = i;
  559. }
  560. }
  561. }
  562. } else {
  563. for (q = 0; q < q1 - q0; q++) {
  564. paths[idx][q].cost = paths[idx - 1][q].cost + 1;
  565. paths[idx][q].prev = q;
  566. }
  567. }
  568. sce->zeroes[w*16+g] = !nz;
  569. start += sce->ics.swb_sizes[g];
  570. idx++;
  571. }
  572. }
  573. idx--;
  574. mincost = paths[idx][0].cost;
  575. minq = 0;
  576. for (i = 1; i < TRELLIS_STATES; i++) {
  577. if (paths[idx][i].cost < mincost) {
  578. mincost = paths[idx][i].cost;
  579. minq = i;
  580. }
  581. }
  582. while (idx) {
  583. sce->sf_idx[bandaddr[idx]] = minq + q0;
  584. minq = paths[idx][minq].prev;
  585. idx--;
  586. }
  587. //set the same quantizers inside window groups
  588. for (w = 0; w < sce->ics.num_windows; w += sce->ics.group_len[w])
  589. for (g = 0; g < sce->ics.num_swb; g++)
  590. for (w2 = 1; w2 < sce->ics.group_len[w]; w2++)
  591. sce->sf_idx[(w+w2)*16+g] = sce->sf_idx[w*16+g];
  592. }
  593. /**
  594. * two-loop quantizers search taken from ISO 13818-7 Appendix C
  595. */
  596. static void search_for_quantizers_twoloop(AVCodecContext *avctx,
  597. AACEncContext *s,
  598. SingleChannelElement *sce,
  599. const float lambda)
  600. {
  601. int start = 0, i, w, w2, g;
  602. int destbits = avctx->bit_rate * 1024.0 / avctx->sample_rate / avctx->channels;
  603. float dists[128], uplims[128];
  604. int fflag, minscaler;
  605. int its = 0;
  606. int allz = 0;
  607. float minthr = INFINITY;
  608. //XXX: some heuristic to determine initial quantizers will reduce search time
  609. memset(dists, 0, sizeof(dists));
  610. //determine zero bands and upper limits
  611. for (w = 0; w < sce->ics.num_windows; w += sce->ics.group_len[w]) {
  612. for (g = 0; g < sce->ics.num_swb; g++) {
  613. int nz = 0;
  614. float uplim = 0.0f;
  615. for (w2 = 0; w2 < sce->ics.group_len[w]; w2++) {
  616. FFPsyBand *band = &s->psy.psy_bands[s->cur_channel*PSY_MAX_BANDS+(w+w2)*16+g];
  617. uplim += band->threshold;
  618. if (band->energy <= band->threshold || band->threshold == 0.0f) {
  619. sce->zeroes[(w+w2)*16+g] = 1;
  620. continue;
  621. }
  622. nz = 1;
  623. }
  624. uplims[w*16+g] = uplim *512;
  625. sce->zeroes[w*16+g] = !nz;
  626. if (nz)
  627. minthr = FFMIN(minthr, uplim);
  628. allz = FFMAX(allz, nz);
  629. }
  630. }
  631. for (w = 0; w < sce->ics.num_windows; w += sce->ics.group_len[w]) {
  632. for (g = 0; g < sce->ics.num_swb; g++) {
  633. if (sce->zeroes[w*16+g]) {
  634. sce->sf_idx[w*16+g] = SCALE_ONE_POS;
  635. continue;
  636. }
  637. sce->sf_idx[w*16+g] = SCALE_ONE_POS + FFMIN(log2(uplims[w*16+g]/minthr)*4,59);
  638. }
  639. }
  640. if (!allz)
  641. return;
  642. abs_pow34_v(s->scoefs, sce->coeffs, 1024);
  643. //perform two-loop search
  644. //outer loop - improve quality
  645. do {
  646. int tbits, qstep;
  647. minscaler = sce->sf_idx[0];
  648. //inner loop - quantize spectrum to fit into given number of bits
  649. qstep = its ? 1 : 32;
  650. do {
  651. int prev = -1;
  652. tbits = 0;
  653. fflag = 0;
  654. for (w = 0; w < sce->ics.num_windows; w += sce->ics.group_len[w]) {
  655. start = w*128;
  656. for (g = 0; g < sce->ics.num_swb; g++) {
  657. const float *coefs = sce->coeffs + start;
  658. const float *scaled = s->scoefs + start;
  659. int bits = 0;
  660. int cb;
  661. float mindist = INFINITY;
  662. int minbits = 0;
  663. if (sce->zeroes[w*16+g] || sce->sf_idx[w*16+g] >= 218) {
  664. start += sce->ics.swb_sizes[g];
  665. continue;
  666. }
  667. minscaler = FFMIN(minscaler, sce->sf_idx[w*16+g]);
  668. {
  669. float dist = 0.0f;
  670. int bb = 0;
  671. cb = find_min_book(find_max_val(sce->ics.group_len[w], sce->ics.swb_sizes[g], scaled), sce->sf_idx[w*16+g]);
  672. sce->band_type[w*16+g] = cb;
  673. for (w2 = 0; w2 < sce->ics.group_len[w]; w2++) {
  674. int b;
  675. dist += quantize_band_cost(s, coefs + w2*128,
  676. scaled + w2*128,
  677. sce->ics.swb_sizes[g],
  678. sce->sf_idx[w*16+g],
  679. cb,
  680. lambda,
  681. INFINITY,
  682. &b);
  683. bb += b;
  684. }
  685. mindist = dist;
  686. minbits = bb;
  687. }
  688. dists[w*16+g] = (mindist - minbits) / lambda;
  689. bits = minbits;
  690. if (prev != -1) {
  691. bits += ff_aac_scalefactor_bits[sce->sf_idx[w*16+g] - prev + SCALE_DIFF_ZERO];
  692. }
  693. tbits += bits;
  694. start += sce->ics.swb_sizes[g];
  695. prev = sce->sf_idx[w*16+g];
  696. }
  697. }
  698. if (tbits > destbits) {
  699. for (i = 0; i < 128; i++)
  700. if (sce->sf_idx[i] < 218 - qstep)
  701. sce->sf_idx[i] += qstep;
  702. } else {
  703. for (i = 0; i < 128; i++)
  704. if (sce->sf_idx[i] > 60 - qstep)
  705. sce->sf_idx[i] -= qstep;
  706. }
  707. qstep >>= 1;
  708. if (!qstep && tbits > destbits*1.02)
  709. qstep = 1;
  710. if (sce->sf_idx[0] >= 217)
  711. break;
  712. } while (qstep);
  713. fflag = 0;
  714. minscaler = av_clip(minscaler, 60, 255 - SCALE_MAX_DIFF);
  715. for (w = 0; w < sce->ics.num_windows; w += sce->ics.group_len[w]) {
  716. start = w*128;
  717. for (g = 0; g < sce->ics.num_swb; g++) {
  718. int prevsc = sce->sf_idx[w*16+g];
  719. if (dists[w*16+g] > uplims[w*16+g] && sce->sf_idx[w*16+g] > 60)
  720. sce->sf_idx[w*16+g]--;
  721. sce->sf_idx[w*16+g] = av_clip(sce->sf_idx[w*16+g], minscaler, minscaler + SCALE_MAX_DIFF);
  722. sce->sf_idx[w*16+g] = FFMIN(sce->sf_idx[w*16+g], 219);
  723. if (sce->sf_idx[w*16+g] != prevsc)
  724. fflag = 1;
  725. }
  726. }
  727. its++;
  728. } while (fflag && its < 10);
  729. }
  730. static void search_for_quantizers_faac(AVCodecContext *avctx, AACEncContext *s,
  731. SingleChannelElement *sce,
  732. const float lambda)
  733. {
  734. int start = 0, i, w, w2, g;
  735. float uplim[128], maxq[128];
  736. int minq, maxsf;
  737. float distfact = ((sce->ics.num_windows > 1) ? 85.80 : 147.84) / lambda;
  738. int last = 0, lastband = 0, curband = 0;
  739. float avg_energy = 0.0;
  740. if (sce->ics.num_windows == 1) {
  741. start = 0;
  742. for (i = 0; i < 1024; i++) {
  743. if (i - start >= sce->ics.swb_sizes[curband]) {
  744. start += sce->ics.swb_sizes[curband];
  745. curband++;
  746. }
  747. if (sce->coeffs[i]) {
  748. avg_energy += sce->coeffs[i] * sce->coeffs[i];
  749. last = i;
  750. lastband = curband;
  751. }
  752. }
  753. } else {
  754. for (w = 0; w < 8; w++) {
  755. const float *coeffs = sce->coeffs + w*128;
  756. start = 0;
  757. for (i = 0; i < 128; i++) {
  758. if (i - start >= sce->ics.swb_sizes[curband]) {
  759. start += sce->ics.swb_sizes[curband];
  760. curband++;
  761. }
  762. if (coeffs[i]) {
  763. avg_energy += coeffs[i] * coeffs[i];
  764. last = FFMAX(last, i);
  765. lastband = FFMAX(lastband, curband);
  766. }
  767. }
  768. }
  769. }
  770. last++;
  771. avg_energy /= last;
  772. if (avg_energy == 0.0f) {
  773. for (i = 0; i < FF_ARRAY_ELEMS(sce->sf_idx); i++)
  774. sce->sf_idx[i] = SCALE_ONE_POS;
  775. return;
  776. }
  777. for (w = 0; w < sce->ics.num_windows; w += sce->ics.group_len[w]) {
  778. start = w*128;
  779. for (g = 0; g < sce->ics.num_swb; g++) {
  780. float *coefs = sce->coeffs + start;
  781. const int size = sce->ics.swb_sizes[g];
  782. int start2 = start, end2 = start + size, peakpos = start;
  783. float maxval = -1, thr = 0.0f, t;
  784. maxq[w*16+g] = 0.0f;
  785. if (g > lastband) {
  786. maxq[w*16+g] = 0.0f;
  787. start += size;
  788. for (w2 = 0; w2 < sce->ics.group_len[w]; w2++)
  789. memset(coefs + w2*128, 0, sizeof(coefs[0])*size);
  790. continue;
  791. }
  792. for (w2 = 0; w2 < sce->ics.group_len[w]; w2++) {
  793. for (i = 0; i < size; i++) {
  794. float t = coefs[w2*128+i]*coefs[w2*128+i];
  795. maxq[w*16+g] = FFMAX(maxq[w*16+g], fabsf(coefs[w2*128 + i]));
  796. thr += t;
  797. if (sce->ics.num_windows == 1 && maxval < t) {
  798. maxval = t;
  799. peakpos = start+i;
  800. }
  801. }
  802. }
  803. if (sce->ics.num_windows == 1) {
  804. start2 = FFMAX(peakpos - 2, start2);
  805. end2 = FFMIN(peakpos + 3, end2);
  806. } else {
  807. start2 -= start;
  808. end2 -= start;
  809. }
  810. start += size;
  811. thr = pow(thr / (avg_energy * (end2 - start2)), 0.3 + 0.1*(lastband - g) / lastband);
  812. t = 1.0 - (1.0 * start2 / last);
  813. uplim[w*16+g] = distfact / (1.4 * thr + t*t*t + 0.075);
  814. }
  815. }
  816. memset(sce->sf_idx, 0, sizeof(sce->sf_idx));
  817. abs_pow34_v(s->scoefs, sce->coeffs, 1024);
  818. for (w = 0; w < sce->ics.num_windows; w += sce->ics.group_len[w]) {
  819. start = w*128;
  820. for (g = 0; g < sce->ics.num_swb; g++) {
  821. const float *coefs = sce->coeffs + start;
  822. const float *scaled = s->scoefs + start;
  823. const int size = sce->ics.swb_sizes[g];
  824. int scf, prev_scf, step;
  825. int min_scf = -1, max_scf = 256;
  826. float curdiff;
  827. if (maxq[w*16+g] < 21.544) {
  828. sce->zeroes[w*16+g] = 1;
  829. start += size;
  830. continue;
  831. }
  832. sce->zeroes[w*16+g] = 0;
  833. scf = prev_scf = av_clip(SCALE_ONE_POS - SCALE_DIV_512 - log2(1/maxq[w*16+g])*16/3, 60, 218);
  834. step = 16;
  835. for (;;) {
  836. float dist = 0.0f;
  837. int quant_max;
  838. for (w2 = 0; w2 < sce->ics.group_len[w]; w2++) {
  839. int b;
  840. dist += quantize_band_cost(s, coefs + w2*128,
  841. scaled + w2*128,
  842. sce->ics.swb_sizes[g],
  843. scf,
  844. ESC_BT,
  845. lambda,
  846. INFINITY,
  847. &b);
  848. dist -= b;
  849. }
  850. dist *= 1.0f / 512.0f / lambda;
  851. quant_max = quant(maxq[w*16+g], ff_aac_pow2sf_tab[200 - scf + SCALE_ONE_POS - SCALE_DIV_512]);
  852. if (quant_max >= 8191) { // too much, return to the previous quantizer
  853. sce->sf_idx[w*16+g] = prev_scf;
  854. break;
  855. }
  856. prev_scf = scf;
  857. curdiff = fabsf(dist - uplim[w*16+g]);
  858. if (curdiff <= 1.0f)
  859. step = 0;
  860. else
  861. step = log2(curdiff);
  862. if (dist > uplim[w*16+g])
  863. step = -step;
  864. scf += step;
  865. scf = av_clip_uint8(scf);
  866. step = scf - prev_scf;
  867. if (FFABS(step) <= 1 || (step > 0 && scf >= max_scf) || (step < 0 && scf <= min_scf)) {
  868. sce->sf_idx[w*16+g] = av_clip(scf, min_scf, max_scf);
  869. break;
  870. }
  871. if (step > 0)
  872. min_scf = prev_scf;
  873. else
  874. max_scf = prev_scf;
  875. }
  876. start += size;
  877. }
  878. }
  879. minq = sce->sf_idx[0] ? sce->sf_idx[0] : INT_MAX;
  880. for (i = 1; i < 128; i++) {
  881. if (!sce->sf_idx[i])
  882. sce->sf_idx[i] = sce->sf_idx[i-1];
  883. else
  884. minq = FFMIN(minq, sce->sf_idx[i]);
  885. }
  886. if (minq == INT_MAX)
  887. minq = 0;
  888. minq = FFMIN(minq, SCALE_MAX_POS);
  889. maxsf = FFMIN(minq + SCALE_MAX_DIFF, SCALE_MAX_POS);
  890. for (i = 126; i >= 0; i--) {
  891. if (!sce->sf_idx[i])
  892. sce->sf_idx[i] = sce->sf_idx[i+1];
  893. sce->sf_idx[i] = av_clip(sce->sf_idx[i], minq, maxsf);
  894. }
  895. }
  896. static void search_for_quantizers_fast(AVCodecContext *avctx, AACEncContext *s,
  897. SingleChannelElement *sce,
  898. const float lambda)
  899. {
  900. int start = 0, i, w, w2, g;
  901. int minq = 255;
  902. memset(sce->sf_idx, 0, sizeof(sce->sf_idx));
  903. for (w = 0; w < sce->ics.num_windows; w += sce->ics.group_len[w]) {
  904. start = w*128;
  905. for (g = 0; g < sce->ics.num_swb; g++) {
  906. for (w2 = 0; w2 < sce->ics.group_len[w]; w2++) {
  907. FFPsyBand *band = &s->psy.psy_bands[s->cur_channel*PSY_MAX_BANDS+(w+w2)*16+g];
  908. if (band->energy <= band->threshold) {
  909. sce->sf_idx[(w+w2)*16+g] = 218;
  910. sce->zeroes[(w+w2)*16+g] = 1;
  911. } else {
  912. sce->sf_idx[(w+w2)*16+g] = av_clip(SCALE_ONE_POS - SCALE_DIV_512 + log2(band->threshold), 80, 218);
  913. sce->zeroes[(w+w2)*16+g] = 0;
  914. }
  915. minq = FFMIN(minq, sce->sf_idx[(w+w2)*16+g]);
  916. }
  917. }
  918. }
  919. for (i = 0; i < 128; i++) {
  920. sce->sf_idx[i] = 140;
  921. //av_clip(sce->sf_idx[i], minq, minq + SCALE_MAX_DIFF - 1);
  922. }
  923. //set the same quantizers inside window groups
  924. for (w = 0; w < sce->ics.num_windows; w += sce->ics.group_len[w])
  925. for (g = 0; g < sce->ics.num_swb; g++)
  926. for (w2 = 1; w2 < sce->ics.group_len[w]; w2++)
  927. sce->sf_idx[(w+w2)*16+g] = sce->sf_idx[w*16+g];
  928. }
  929. static void search_for_ms(AACEncContext *s, ChannelElement *cpe,
  930. const float lambda)
  931. {
  932. int start = 0, i, w, w2, g;
  933. float M[128], S[128];
  934. float *L34 = s->scoefs, *R34 = s->scoefs + 128, *M34 = s->scoefs + 128*2, *S34 = s->scoefs + 128*3;
  935. SingleChannelElement *sce0 = &cpe->ch[0];
  936. SingleChannelElement *sce1 = &cpe->ch[1];
  937. if (!cpe->common_window)
  938. return;
  939. for (w = 0; w < sce0->ics.num_windows; w += sce0->ics.group_len[w]) {
  940. for (g = 0; g < sce0->ics.num_swb; g++) {
  941. if (!cpe->ch[0].zeroes[w*16+g] && !cpe->ch[1].zeroes[w*16+g]) {
  942. float dist1 = 0.0f, dist2 = 0.0f;
  943. for (w2 = 0; w2 < sce0->ics.group_len[w]; w2++) {
  944. FFPsyBand *band0 = &s->psy.psy_bands[(s->cur_channel+0)*PSY_MAX_BANDS+(w+w2)*16+g];
  945. FFPsyBand *band1 = &s->psy.psy_bands[(s->cur_channel+1)*PSY_MAX_BANDS+(w+w2)*16+g];
  946. float minthr = FFMIN(band0->threshold, band1->threshold);
  947. float maxthr = FFMAX(band0->threshold, band1->threshold);
  948. for (i = 0; i < sce0->ics.swb_sizes[g]; i++) {
  949. M[i] = (sce0->coeffs[start+w2*128+i]
  950. + sce1->coeffs[start+w2*128+i]) * 0.5;
  951. S[i] = sce0->coeffs[start+w2*128+i]
  952. - sce1->coeffs[start+w2*128+i];
  953. }
  954. abs_pow34_v(L34, sce0->coeffs+start+w2*128, sce0->ics.swb_sizes[g]);
  955. abs_pow34_v(R34, sce1->coeffs+start+w2*128, sce0->ics.swb_sizes[g]);
  956. abs_pow34_v(M34, M, sce0->ics.swb_sizes[g]);
  957. abs_pow34_v(S34, S, sce0->ics.swb_sizes[g]);
  958. dist1 += quantize_band_cost(s, sce0->coeffs + start + w2*128,
  959. L34,
  960. sce0->ics.swb_sizes[g],
  961. sce0->sf_idx[(w+w2)*16+g],
  962. sce0->band_type[(w+w2)*16+g],
  963. lambda / band0->threshold, INFINITY, NULL);
  964. dist1 += quantize_band_cost(s, sce1->coeffs + start + w2*128,
  965. R34,
  966. sce1->ics.swb_sizes[g],
  967. sce1->sf_idx[(w+w2)*16+g],
  968. sce1->band_type[(w+w2)*16+g],
  969. lambda / band1->threshold, INFINITY, NULL);
  970. dist2 += quantize_band_cost(s, M,
  971. M34,
  972. sce0->ics.swb_sizes[g],
  973. sce0->sf_idx[(w+w2)*16+g],
  974. sce0->band_type[(w+w2)*16+g],
  975. lambda / maxthr, INFINITY, NULL);
  976. dist2 += quantize_band_cost(s, S,
  977. S34,
  978. sce1->ics.swb_sizes[g],
  979. sce1->sf_idx[(w+w2)*16+g],
  980. sce1->band_type[(w+w2)*16+g],
  981. lambda / minthr, INFINITY, NULL);
  982. }
  983. cpe->ms_mask[w*16+g] = dist2 < dist1;
  984. }
  985. start += sce0->ics.swb_sizes[g];
  986. }
  987. }
  988. }
  989. AACCoefficientsEncoder ff_aac_coders[] = {
  990. {
  991. search_for_quantizers_faac,
  992. encode_window_bands_info,
  993. quantize_and_encode_band,
  994. search_for_ms,
  995. },
  996. {
  997. search_for_quantizers_anmr,
  998. encode_window_bands_info,
  999. quantize_and_encode_band,
  1000. search_for_ms,
  1001. },
  1002. {
  1003. search_for_quantizers_twoloop,
  1004. codebook_trellis_rate,
  1005. quantize_and_encode_band,
  1006. search_for_ms,
  1007. },
  1008. {
  1009. search_for_quantizers_fast,
  1010. encode_window_bands_info,
  1011. quantize_and_encode_band,
  1012. search_for_ms,
  1013. },
  1014. };