You can not select more than 25 topics Topics must start with a letter or number, can include dashes ('-') and can be up to 35 characters long.

1055 lines
40KB

  1. /*
  2. * AAC coefficients encoder
  3. * Copyright (C) 2008-2009 Konstantin Shishkov
  4. *
  5. * This file is part of FFmpeg.
  6. *
  7. * FFmpeg is free software; you can redistribute it and/or
  8. * modify it under the terms of the GNU Lesser General Public
  9. * License as published by the Free Software Foundation; either
  10. * version 2.1 of the License, or (at your option) any later version.
  11. *
  12. * FFmpeg is distributed in the hope that it will be useful,
  13. * but WITHOUT ANY WARRANTY; without even the implied warranty of
  14. * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
  15. * Lesser General Public License for more details.
  16. *
  17. * You should have received a copy of the GNU Lesser General Public
  18. * License along with FFmpeg; if not, write to the Free Software
  19. * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
  20. */
  21. /**
  22. * @file
  23. * AAC coefficients encoder
  24. */
  25. /***********************************
  26. * TODOs:
  27. * speedup quantizer selection
  28. * add sane pulse detection
  29. ***********************************/
  30. #include <float.h>
  31. #include "avcodec.h"
  32. #include "put_bits.h"
  33. #include "aac.h"
  34. #include "aacenc.h"
  35. #include "aactab.h"
  36. /** bits needed to code codebook run value for long windows */
  37. static const uint8_t run_value_bits_long[64] = {
  38. 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5,
  39. 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 10,
  40. 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10,
  41. 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 15
  42. };
  43. /** bits needed to code codebook run value for short windows */
  44. static const uint8_t run_value_bits_short[16] = {
  45. 3, 3, 3, 3, 3, 3, 3, 6, 6, 6, 6, 6, 6, 6, 6, 9
  46. };
  47. static const uint8_t *run_value_bits[2] = {
  48. run_value_bits_long, run_value_bits_short
  49. };
  50. /**
  51. * Quantize one coefficient.
  52. * @return absolute value of the quantized coefficient
  53. * @see 3GPP TS26.403 5.6.2 "Scalefactor determination"
  54. */
  55. static av_always_inline int quant(float coef, const float Q)
  56. {
  57. float a = coef * Q;
  58. return sqrtf(a * sqrtf(a)) + 0.4054;
  59. }
  60. static void quantize_bands(int *out, const float *in, const float *scaled,
  61. int size, float Q34, int is_signed, int maxval)
  62. {
  63. int i;
  64. double qc;
  65. for (i = 0; i < size; i++) {
  66. qc = scaled[i] * Q34;
  67. out[i] = (int)FFMIN(qc + 0.4054, (double)maxval);
  68. if (is_signed && in[i] < 0.0f) {
  69. out[i] = -out[i];
  70. }
  71. }
  72. }
  73. static void abs_pow34_v(float *out, const float *in, const int size)
  74. {
  75. #ifndef USE_REALLY_FULL_SEARCH
  76. int i;
  77. for (i = 0; i < size; i++) {
  78. float a = fabsf(in[i]);
  79. out[i] = sqrtf(a * sqrtf(a));
  80. }
  81. #endif /* USE_REALLY_FULL_SEARCH */
  82. }
  83. static const uint8_t aac_cb_range [12] = {0, 3, 3, 3, 3, 9, 9, 8, 8, 13, 13, 17};
  84. static const uint8_t aac_cb_maxval[12] = {0, 1, 1, 2, 2, 4, 4, 7, 7, 12, 12, 16};
  85. /**
  86. * Calculate rate distortion cost for quantizing with given codebook
  87. *
  88. * @return quantization distortion
  89. */
  90. static float quantize_and_encode_band_cost(struct AACEncContext *s,
  91. PutBitContext *pb, const float *in,
  92. const float *scaled, int size, int scale_idx,
  93. int cb, const float lambda, const float uplim,
  94. int *bits)
  95. {
  96. const float IQ = ff_aac_pow2sf_tab[200 + scale_idx - SCALE_ONE_POS + SCALE_DIV_512];
  97. const float Q = ff_aac_pow2sf_tab[200 - scale_idx + SCALE_ONE_POS - SCALE_DIV_512];
  98. const float CLIPPED_ESCAPE = 165140.0f*IQ;
  99. int i, j, k;
  100. float cost = 0;
  101. const int dim = cb < FIRST_PAIR_BT ? 4 : 2;
  102. int resbits = 0;
  103. const float Q34 = sqrtf(Q * sqrtf(Q));
  104. const int range = aac_cb_range[cb];
  105. const int maxval = aac_cb_maxval[cb];
  106. int off;
  107. if (!cb) {
  108. for (i = 0; i < size; i++)
  109. cost += in[i]*in[i];
  110. if (bits)
  111. *bits = 0;
  112. return cost * lambda;
  113. }
  114. if (!scaled) {
  115. abs_pow34_v(s->scoefs, in, size);
  116. scaled = s->scoefs;
  117. }
  118. quantize_bands(s->qcoefs, in, scaled, size, Q34, !IS_CODEBOOK_UNSIGNED(cb), maxval);
  119. if (IS_CODEBOOK_UNSIGNED(cb)) {
  120. off = 0;
  121. } else {
  122. off = maxval;
  123. }
  124. for (i = 0; i < size; i += dim) {
  125. const float *vec;
  126. int *quants = s->qcoefs + i;
  127. int curidx = 0;
  128. int curbits;
  129. float rd = 0.0f;
  130. for (j = 0; j < dim; j++) {
  131. curidx *= range;
  132. curidx += quants[j] + off;
  133. }
  134. curbits = ff_aac_spectral_bits[cb-1][curidx];
  135. vec = &ff_aac_codebook_vectors[cb-1][curidx*dim];
  136. if (IS_CODEBOOK_UNSIGNED(cb)) {
  137. for (k = 0; k < dim; k++) {
  138. float t = fabsf(in[i+k]);
  139. float di;
  140. if (vec[k] == 64.0f) { //FIXME: slow
  141. if (t >= CLIPPED_ESCAPE) {
  142. di = t - CLIPPED_ESCAPE;
  143. curbits += 21;
  144. } else {
  145. int c = av_clip(quant(t, Q), 0, 8191);
  146. di = t - c*cbrtf(c)*IQ;
  147. curbits += av_log2(c)*2 - 4 + 1;
  148. }
  149. } else {
  150. di = t - vec[k]*IQ;
  151. }
  152. if (vec[k] != 0.0f)
  153. curbits++;
  154. rd += di*di;
  155. }
  156. } else {
  157. for (k = 0; k < dim; k++) {
  158. float di = in[i+k] - vec[k]*IQ;
  159. rd += di*di;
  160. }
  161. }
  162. cost += rd * lambda + curbits;
  163. resbits += curbits;
  164. if (cost >= uplim)
  165. return uplim;
  166. if (pb) {
  167. put_bits(pb, ff_aac_spectral_bits[cb-1][curidx], ff_aac_spectral_codes[cb-1][curidx]);
  168. if (IS_CODEBOOK_UNSIGNED(cb))
  169. for (j = 0; j < dim; j++)
  170. if (ff_aac_codebook_vectors[cb-1][curidx*dim+j] != 0.0f)
  171. put_bits(pb, 1, in[i+j] < 0.0f);
  172. if (cb == ESC_BT) {
  173. for (j = 0; j < 2; j++) {
  174. if (ff_aac_codebook_vectors[cb-1][curidx*2+j] == 64.0f) {
  175. int coef = av_clip(quant(fabsf(in[i+j]), Q), 0, 8191);
  176. int len = av_log2(coef);
  177. put_bits(pb, len - 4 + 1, (1 << (len - 4 + 1)) - 2);
  178. put_bits(pb, len, coef & ((1 << len) - 1));
  179. }
  180. }
  181. }
  182. }
  183. }
  184. if (bits)
  185. *bits = resbits;
  186. return cost;
  187. }
  188. static float quantize_band_cost(struct AACEncContext *s, const float *in,
  189. const float *scaled, int size, int scale_idx,
  190. int cb, const float lambda, const float uplim,
  191. int *bits)
  192. {
  193. return quantize_and_encode_band_cost(s, NULL, in, scaled, size, scale_idx,
  194. cb, lambda, uplim, bits);
  195. }
  196. static void quantize_and_encode_band(struct AACEncContext *s, PutBitContext *pb,
  197. const float *in, int size, int scale_idx,
  198. int cb, const float lambda)
  199. {
  200. quantize_and_encode_band_cost(s, pb, in, NULL, size, scale_idx, cb, lambda,
  201. INFINITY, NULL);
  202. }
  203. static float find_max_val(int group_len, int swb_size, const float *scaled) {
  204. float maxval = 0.0f;
  205. int w2, i;
  206. for (w2 = 0; w2 < group_len; w2++) {
  207. for (i = 0; i < swb_size; i++) {
  208. maxval = FFMAX(maxval, scaled[w2*128+i]);
  209. }
  210. }
  211. return maxval;
  212. }
  213. static int find_min_book(float maxval, int sf) {
  214. float Q = ff_aac_pow2sf_tab[200 - sf + SCALE_ONE_POS - SCALE_DIV_512];
  215. float Q34 = sqrtf(Q * sqrtf(Q));
  216. int qmaxval, cb;
  217. qmaxval = maxval * Q34 + 0.4054f;
  218. if (qmaxval == 0) cb = 0;
  219. else if (qmaxval == 1) cb = 1;
  220. else if (qmaxval == 2) cb = 3;
  221. else if (qmaxval <= 4) cb = 5;
  222. else if (qmaxval <= 7) cb = 7;
  223. else if (qmaxval <= 12) cb = 9;
  224. else cb = 11;
  225. return cb;
  226. }
  227. /**
  228. * structure used in optimal codebook search
  229. */
  230. typedef struct BandCodingPath {
  231. int prev_idx; ///< pointer to the previous path point
  232. float cost; ///< path cost
  233. int run;
  234. } BandCodingPath;
  235. /**
  236. * Encode band info for single window group bands.
  237. */
  238. static void encode_window_bands_info(AACEncContext *s, SingleChannelElement *sce,
  239. int win, int group_len, const float lambda)
  240. {
  241. BandCodingPath path[120][12];
  242. int w, swb, cb, start, start2, size;
  243. int i, j;
  244. const int max_sfb = sce->ics.max_sfb;
  245. const int run_bits = sce->ics.num_windows == 1 ? 5 : 3;
  246. const int run_esc = (1 << run_bits) - 1;
  247. int idx, ppos, count;
  248. int stackrun[120], stackcb[120], stack_len;
  249. float next_minrd = INFINITY;
  250. int next_mincb = 0;
  251. abs_pow34_v(s->scoefs, sce->coeffs, 1024);
  252. start = win*128;
  253. for (cb = 0; cb < 12; cb++) {
  254. path[0][cb].cost = 0.0f;
  255. path[0][cb].prev_idx = -1;
  256. path[0][cb].run = 0;
  257. }
  258. for (swb = 0; swb < max_sfb; swb++) {
  259. start2 = start;
  260. size = sce->ics.swb_sizes[swb];
  261. if (sce->zeroes[win*16 + swb]) {
  262. for (cb = 0; cb < 12; cb++) {
  263. path[swb+1][cb].prev_idx = cb;
  264. path[swb+1][cb].cost = path[swb][cb].cost;
  265. path[swb+1][cb].run = path[swb][cb].run + 1;
  266. }
  267. } else {
  268. float minrd = next_minrd;
  269. int mincb = next_mincb;
  270. next_minrd = INFINITY;
  271. next_mincb = 0;
  272. for (cb = 0; cb < 12; cb++) {
  273. float cost_stay_here, cost_get_here;
  274. float rd = 0.0f;
  275. for (w = 0; w < group_len; w++) {
  276. FFPsyBand *band = &s->psy.psy_bands[s->cur_channel*PSY_MAX_BANDS+(win+w)*16+swb];
  277. rd += quantize_band_cost(s, sce->coeffs + start + w*128,
  278. s->scoefs + start + w*128, size,
  279. sce->sf_idx[(win+w)*16+swb], cb,
  280. lambda / band->threshold, INFINITY, NULL);
  281. }
  282. cost_stay_here = path[swb][cb].cost + rd;
  283. cost_get_here = minrd + rd + run_bits + 4;
  284. if ( run_value_bits[sce->ics.num_windows == 8][path[swb][cb].run]
  285. != run_value_bits[sce->ics.num_windows == 8][path[swb][cb].run+1])
  286. cost_stay_here += run_bits;
  287. if (cost_get_here < cost_stay_here) {
  288. path[swb+1][cb].prev_idx = mincb;
  289. path[swb+1][cb].cost = cost_get_here;
  290. path[swb+1][cb].run = 1;
  291. } else {
  292. path[swb+1][cb].prev_idx = cb;
  293. path[swb+1][cb].cost = cost_stay_here;
  294. path[swb+1][cb].run = path[swb][cb].run + 1;
  295. }
  296. if (path[swb+1][cb].cost < next_minrd) {
  297. next_minrd = path[swb+1][cb].cost;
  298. next_mincb = cb;
  299. }
  300. }
  301. }
  302. start += sce->ics.swb_sizes[swb];
  303. }
  304. //convert resulting path from backward-linked list
  305. stack_len = 0;
  306. idx = 0;
  307. for (cb = 1; cb < 12; cb++)
  308. if (path[max_sfb][cb].cost < path[max_sfb][idx].cost)
  309. idx = cb;
  310. ppos = max_sfb;
  311. while (ppos > 0) {
  312. cb = idx;
  313. stackrun[stack_len] = path[ppos][cb].run;
  314. stackcb [stack_len] = cb;
  315. idx = path[ppos-path[ppos][cb].run+1][cb].prev_idx;
  316. ppos -= path[ppos][cb].run;
  317. stack_len++;
  318. }
  319. //perform actual band info encoding
  320. start = 0;
  321. for (i = stack_len - 1; i >= 0; i--) {
  322. put_bits(&s->pb, 4, stackcb[i]);
  323. count = stackrun[i];
  324. memset(sce->zeroes + win*16 + start, !stackcb[i], count);
  325. //XXX: memset when band_type is also uint8_t
  326. for (j = 0; j < count; j++) {
  327. sce->band_type[win*16 + start] = stackcb[i];
  328. start++;
  329. }
  330. while (count >= run_esc) {
  331. put_bits(&s->pb, run_bits, run_esc);
  332. count -= run_esc;
  333. }
  334. put_bits(&s->pb, run_bits, count);
  335. }
  336. }
  337. static void codebook_trellis_rate(AACEncContext *s, SingleChannelElement *sce,
  338. int win, int group_len, const float lambda)
  339. {
  340. BandCodingPath path[120][12];
  341. int w, swb, cb, start, start2, size;
  342. int i, j;
  343. const int max_sfb = sce->ics.max_sfb;
  344. const int run_bits = sce->ics.num_windows == 1 ? 5 : 3;
  345. const int run_esc = (1 << run_bits) - 1;
  346. int idx, ppos, count;
  347. int stackrun[120], stackcb[120], stack_len;
  348. float next_minrd = INFINITY;
  349. int next_mincb = 0;
  350. abs_pow34_v(s->scoefs, sce->coeffs, 1024);
  351. start = win*128;
  352. for (cb = 0; cb < 12; cb++) {
  353. path[0][cb].cost = run_bits+4;
  354. path[0][cb].prev_idx = -1;
  355. path[0][cb].run = 0;
  356. }
  357. for (swb = 0; swb < max_sfb; swb++) {
  358. start2 = start;
  359. size = sce->ics.swb_sizes[swb];
  360. if (sce->zeroes[win*16 + swb]) {
  361. for (cb = 0; cb < 12; cb++) {
  362. path[swb+1][cb].prev_idx = cb;
  363. path[swb+1][cb].cost = path[swb][cb].cost;
  364. path[swb+1][cb].run = path[swb][cb].run + 1;
  365. }
  366. } else {
  367. float minrd = next_minrd;
  368. int mincb = next_mincb;
  369. int startcb = sce->band_type[win*16+swb];
  370. next_minrd = INFINITY;
  371. next_mincb = 0;
  372. for (cb = 0; cb < startcb; cb++) {
  373. path[swb+1][cb].cost = 61450;
  374. path[swb+1][cb].prev_idx = -1;
  375. path[swb+1][cb].run = 0;
  376. }
  377. for (cb = startcb; cb < 12; cb++) {
  378. float cost_stay_here, cost_get_here;
  379. float rd = 0.0f;
  380. for (w = 0; w < group_len; w++) {
  381. rd += quantize_band_cost(s, sce->coeffs + start + w*128,
  382. s->scoefs + start + w*128, size,
  383. sce->sf_idx[(win+w)*16+swb], cb,
  384. 0, INFINITY, NULL);
  385. }
  386. cost_stay_here = path[swb][cb].cost + rd;
  387. cost_get_here = minrd + rd + run_bits + 4;
  388. if ( run_value_bits[sce->ics.num_windows == 8][path[swb][cb].run]
  389. != run_value_bits[sce->ics.num_windows == 8][path[swb][cb].run+1])
  390. cost_stay_here += run_bits;
  391. if (cost_get_here < cost_stay_here) {
  392. path[swb+1][cb].prev_idx = mincb;
  393. path[swb+1][cb].cost = cost_get_here;
  394. path[swb+1][cb].run = 1;
  395. } else {
  396. path[swb+1][cb].prev_idx = cb;
  397. path[swb+1][cb].cost = cost_stay_here;
  398. path[swb+1][cb].run = path[swb][cb].run + 1;
  399. }
  400. if (path[swb+1][cb].cost < next_minrd) {
  401. next_minrd = path[swb+1][cb].cost;
  402. next_mincb = cb;
  403. }
  404. }
  405. }
  406. start += sce->ics.swb_sizes[swb];
  407. }
  408. //convert resulting path from backward-linked list
  409. stack_len = 0;
  410. idx = 0;
  411. for (cb = 1; cb < 12; cb++)
  412. if (path[max_sfb][cb].cost < path[max_sfb][idx].cost)
  413. idx = cb;
  414. ppos = max_sfb;
  415. while (ppos > 0) {
  416. if (idx < 0) abort();
  417. cb = idx;
  418. stackrun[stack_len] = path[ppos][cb].run;
  419. stackcb [stack_len] = cb;
  420. idx = path[ppos-path[ppos][cb].run+1][cb].prev_idx;
  421. ppos -= path[ppos][cb].run;
  422. stack_len++;
  423. }
  424. //perform actual band info encoding
  425. start = 0;
  426. for (i = stack_len - 1; i >= 0; i--) {
  427. put_bits(&s->pb, 4, stackcb[i]);
  428. count = stackrun[i];
  429. memset(sce->zeroes + win*16 + start, !stackcb[i], count);
  430. //XXX: memset when band_type is also uint8_t
  431. for (j = 0; j < count; j++) {
  432. sce->band_type[win*16 + start] = stackcb[i];
  433. start++;
  434. }
  435. while (count >= run_esc) {
  436. put_bits(&s->pb, run_bits, run_esc);
  437. count -= run_esc;
  438. }
  439. put_bits(&s->pb, run_bits, count);
  440. }
  441. }
  442. typedef struct TrellisPath {
  443. float cost;
  444. int prev;
  445. } TrellisPath;
  446. #define TRELLIS_STAGES 121
  447. #define TRELLIS_STATES (SCALE_MAX_DIFF+1)
  448. static void search_for_quantizers_anmr(AVCodecContext *avctx, AACEncContext *s,
  449. SingleChannelElement *sce,
  450. const float lambda)
  451. {
  452. int q, w, w2, g, start = 0;
  453. int i, j;
  454. int idx;
  455. TrellisPath paths[TRELLIS_STAGES][TRELLIS_STATES];
  456. int bandaddr[TRELLIS_STAGES];
  457. int minq;
  458. float mincost;
  459. float q0f = FLT_MAX, q1f = 0.0f, qnrgf = 0.0f;
  460. int q0, q1, qcnt = 0;
  461. for (i = 0; i < 1024; i++) {
  462. float t = fabsf(sce->coeffs[i]);
  463. if (t > 0.0f) {
  464. q0f = FFMIN(q0f, t);
  465. q1f = FFMAX(q1f, t);
  466. qnrgf += t*t;
  467. qcnt++;
  468. }
  469. }
  470. if (!qcnt) {
  471. memset(sce->sf_idx, 0, sizeof(sce->sf_idx));
  472. memset(sce->zeroes, 1, sizeof(sce->zeroes));
  473. return;
  474. }
  475. //minimum scalefactor index is when minimum nonzero coefficient after quantizing is not clipped
  476. q0 = av_clip_uint8(log2(q0f)*4 - 69 + SCALE_ONE_POS - SCALE_DIV_512);
  477. //maximum scalefactor index is when maximum coefficient after quantizing is still not zero
  478. q1 = av_clip_uint8(log2(q1f)*4 + 6 + SCALE_ONE_POS - SCALE_DIV_512);
  479. //av_log(NULL, AV_LOG_ERROR, "q0 %d, q1 %d\n", q0, q1);
  480. if (q1 - q0 > 60) {
  481. int q0low = q0;
  482. int q1high = q1;
  483. //minimum scalefactor index is when maximum nonzero coefficient after quantizing is not clipped
  484. int qnrg = av_clip_uint8(log2(sqrt(qnrgf/qcnt))*4 - 31 + SCALE_ONE_POS - SCALE_DIV_512);
  485. q1 = qnrg + 30;
  486. q0 = qnrg - 30;
  487. //av_log(NULL, AV_LOG_ERROR, "q0 %d, q1 %d\n", q0, q1);
  488. if (q0 < q0low) {
  489. q1 += q0low - q0;
  490. q0 = q0low;
  491. } else if (q1 > q1high) {
  492. q0 -= q1 - q1high;
  493. q1 = q1high;
  494. }
  495. }
  496. //av_log(NULL, AV_LOG_ERROR, "q0 %d, q1 %d\n", q0, q1);
  497. for (i = 0; i < TRELLIS_STATES; i++) {
  498. paths[0][i].cost = 0.0f;
  499. paths[0][i].prev = -1;
  500. }
  501. for (j = 1; j < TRELLIS_STAGES; j++) {
  502. for (i = 0; i < TRELLIS_STATES; i++) {
  503. paths[j][i].cost = INFINITY;
  504. paths[j][i].prev = -2;
  505. }
  506. }
  507. idx = 1;
  508. abs_pow34_v(s->scoefs, sce->coeffs, 1024);
  509. for (w = 0; w < sce->ics.num_windows; w += sce->ics.group_len[w]) {
  510. start = w*128;
  511. for (g = 0; g < sce->ics.num_swb; g++) {
  512. const float *coefs = sce->coeffs + start;
  513. float qmin, qmax;
  514. int nz = 0;
  515. bandaddr[idx] = w * 16 + g;
  516. qmin = INT_MAX;
  517. qmax = 0.0f;
  518. for (w2 = 0; w2 < sce->ics.group_len[w]; w2++) {
  519. FFPsyBand *band = &s->psy.psy_bands[s->cur_channel*PSY_MAX_BANDS+(w+w2)*16+g];
  520. if (band->energy <= band->threshold || band->threshold == 0.0f) {
  521. sce->zeroes[(w+w2)*16+g] = 1;
  522. continue;
  523. }
  524. sce->zeroes[(w+w2)*16+g] = 0;
  525. nz = 1;
  526. for (i = 0; i < sce->ics.swb_sizes[g]; i++) {
  527. float t = fabsf(coefs[w2*128+i]);
  528. if (t > 0.0f)
  529. qmin = FFMIN(qmin, t);
  530. qmax = FFMAX(qmax, t);
  531. }
  532. }
  533. if (nz) {
  534. int minscale, maxscale;
  535. float minrd = INFINITY;
  536. float maxval;
  537. //minimum scalefactor index is when minimum nonzero coefficient after quantizing is not clipped
  538. minscale = av_clip_uint8(log2(qmin)*4 - 69 + SCALE_ONE_POS - SCALE_DIV_512);
  539. //maximum scalefactor index is when maximum coefficient after quantizing is still not zero
  540. maxscale = av_clip_uint8(log2(qmax)*4 + 6 + SCALE_ONE_POS - SCALE_DIV_512);
  541. minscale = av_clip(minscale - q0, 0, TRELLIS_STATES - 1);
  542. maxscale = av_clip(maxscale - q0, 0, TRELLIS_STATES);
  543. maxval = find_max_val(sce->ics.group_len[w], sce->ics.swb_sizes[g], s->scoefs+start);
  544. for (q = minscale; q < maxscale; q++) {
  545. float dist = 0;
  546. int cb = find_min_book(maxval, sce->sf_idx[w*16+g]);
  547. for (w2 = 0; w2 < sce->ics.group_len[w]; w2++) {
  548. FFPsyBand *band = &s->psy.psy_bands[s->cur_channel*PSY_MAX_BANDS+(w+w2)*16+g];
  549. dist += quantize_band_cost(s, coefs + w2*128, s->scoefs + start + w2*128, sce->ics.swb_sizes[g],
  550. q + q0, cb, lambda / band->threshold, INFINITY, NULL);
  551. }
  552. minrd = FFMIN(minrd, dist);
  553. for (i = 0; i < q1 - q0; i++) {
  554. float cost;
  555. cost = paths[idx - 1][i].cost + dist
  556. + ff_aac_scalefactor_bits[q - i + SCALE_DIFF_ZERO];
  557. if (cost < paths[idx][q].cost) {
  558. paths[idx][q].cost = cost;
  559. paths[idx][q].prev = i;
  560. }
  561. }
  562. }
  563. } else {
  564. for (q = 0; q < q1 - q0; q++) {
  565. paths[idx][q].cost = paths[idx - 1][q].cost + 1;
  566. paths[idx][q].prev = q;
  567. }
  568. }
  569. sce->zeroes[w*16+g] = !nz;
  570. start += sce->ics.swb_sizes[g];
  571. idx++;
  572. }
  573. }
  574. idx--;
  575. mincost = paths[idx][0].cost;
  576. minq = 0;
  577. for (i = 1; i < TRELLIS_STATES; i++) {
  578. if (paths[idx][i].cost < mincost) {
  579. mincost = paths[idx][i].cost;
  580. minq = i;
  581. }
  582. }
  583. while (idx) {
  584. sce->sf_idx[bandaddr[idx]] = minq + q0;
  585. minq = paths[idx][minq].prev;
  586. idx--;
  587. }
  588. //set the same quantizers inside window groups
  589. for (w = 0; w < sce->ics.num_windows; w += sce->ics.group_len[w])
  590. for (g = 0; g < sce->ics.num_swb; g++)
  591. for (w2 = 1; w2 < sce->ics.group_len[w]; w2++)
  592. sce->sf_idx[(w+w2)*16+g] = sce->sf_idx[w*16+g];
  593. }
  594. /**
  595. * two-loop quantizers search taken from ISO 13818-7 Appendix C
  596. */
  597. static void search_for_quantizers_twoloop(AVCodecContext *avctx,
  598. AACEncContext *s,
  599. SingleChannelElement *sce,
  600. const float lambda)
  601. {
  602. int start = 0, i, w, w2, g;
  603. int destbits = avctx->bit_rate * 1024.0 / avctx->sample_rate / avctx->channels;
  604. float dists[128], uplims[128];
  605. int fflag, minscaler;
  606. int its = 0;
  607. int allz = 0;
  608. float minthr = INFINITY;
  609. //XXX: some heuristic to determine initial quantizers will reduce search time
  610. memset(dists, 0, sizeof(dists));
  611. //determine zero bands and upper limits
  612. for (w = 0; w < sce->ics.num_windows; w += sce->ics.group_len[w]) {
  613. for (g = 0; g < sce->ics.num_swb; g++) {
  614. int nz = 0;
  615. float uplim = 0.0f;
  616. for (w2 = 0; w2 < sce->ics.group_len[w]; w2++) {
  617. FFPsyBand *band = &s->psy.psy_bands[s->cur_channel*PSY_MAX_BANDS+(w+w2)*16+g];
  618. uplim += band->threshold;
  619. if (band->energy <= band->threshold || band->threshold == 0.0f) {
  620. sce->zeroes[(w+w2)*16+g] = 1;
  621. continue;
  622. }
  623. nz = 1;
  624. }
  625. uplims[w*16+g] = uplim *512;
  626. sce->zeroes[w*16+g] = !nz;
  627. if (nz)
  628. minthr = FFMIN(minthr, uplim);
  629. allz = FFMAX(allz, nz);
  630. }
  631. }
  632. for (w = 0; w < sce->ics.num_windows; w += sce->ics.group_len[w]) {
  633. for (g = 0; g < sce->ics.num_swb; g++) {
  634. if (sce->zeroes[w*16+g]) {
  635. sce->sf_idx[w*16+g] = SCALE_ONE_POS;
  636. continue;
  637. }
  638. sce->sf_idx[w*16+g] = SCALE_ONE_POS + FFMIN(log2(uplims[w*16+g]/minthr)*4,59);
  639. }
  640. }
  641. if (!allz)
  642. return;
  643. abs_pow34_v(s->scoefs, sce->coeffs, 1024);
  644. //perform two-loop search
  645. //outer loop - improve quality
  646. do {
  647. int tbits, qstep;
  648. minscaler = sce->sf_idx[0];
  649. //inner loop - quantize spectrum to fit into given number of bits
  650. qstep = its ? 1 : 32;
  651. do {
  652. int prev = -1;
  653. tbits = 0;
  654. fflag = 0;
  655. for (w = 0; w < sce->ics.num_windows; w += sce->ics.group_len[w]) {
  656. start = w*128;
  657. for (g = 0; g < sce->ics.num_swb; g++) {
  658. const float *coefs = sce->coeffs + start;
  659. const float *scaled = s->scoefs + start;
  660. int bits = 0;
  661. int cb;
  662. float dist = 0.0f;
  663. if (sce->zeroes[w*16+g] || sce->sf_idx[w*16+g] >= 218) {
  664. start += sce->ics.swb_sizes[g];
  665. continue;
  666. }
  667. minscaler = FFMIN(minscaler, sce->sf_idx[w*16+g]);
  668. cb = find_min_book(find_max_val(sce->ics.group_len[w], sce->ics.swb_sizes[g], scaled), sce->sf_idx[w*16+g]);
  669. for (w2 = 0; w2 < sce->ics.group_len[w]; w2++) {
  670. int b;
  671. dist += quantize_band_cost(s, coefs + w2*128,
  672. scaled + w2*128,
  673. sce->ics.swb_sizes[g],
  674. sce->sf_idx[w*16+g],
  675. cb,
  676. 1.0f,
  677. INFINITY,
  678. &b);
  679. bits += b;
  680. }
  681. dists[w*16+g] = dist - bits;
  682. if (prev != -1) {
  683. bits += ff_aac_scalefactor_bits[sce->sf_idx[w*16+g] - prev + SCALE_DIFF_ZERO];
  684. }
  685. tbits += bits;
  686. start += sce->ics.swb_sizes[g];
  687. prev = sce->sf_idx[w*16+g];
  688. }
  689. }
  690. if (tbits > destbits) {
  691. for (i = 0; i < 128; i++)
  692. if (sce->sf_idx[i] < 218 - qstep)
  693. sce->sf_idx[i] += qstep;
  694. } else {
  695. for (i = 0; i < 128; i++)
  696. if (sce->sf_idx[i] > 60 - qstep)
  697. sce->sf_idx[i] -= qstep;
  698. }
  699. qstep >>= 1;
  700. if (!qstep && tbits > destbits*1.02)
  701. qstep = 1;
  702. if (sce->sf_idx[0] >= 217)
  703. break;
  704. } while (qstep);
  705. fflag = 0;
  706. minscaler = av_clip(minscaler, 60, 255 - SCALE_MAX_DIFF);
  707. for (w = 0; w < sce->ics.num_windows; w += sce->ics.group_len[w]) {
  708. start = w*128;
  709. for (g = 0; g < sce->ics.num_swb; g++) {
  710. int prevsc = sce->sf_idx[w*16+g];
  711. const float *scaled = s->scoefs + start;
  712. if (dists[w*16+g] > uplims[w*16+g] && sce->sf_idx[w*16+g] > 60)
  713. sce->sf_idx[w*16+g]--;
  714. sce->sf_idx[w*16+g] = av_clip(sce->sf_idx[w*16+g], minscaler, minscaler + SCALE_MAX_DIFF);
  715. sce->sf_idx[w*16+g] = FFMIN(sce->sf_idx[w*16+g], 219);
  716. if (sce->sf_idx[w*16+g] != prevsc)
  717. fflag = 1;
  718. sce->band_type[w*16+g] = find_min_book(find_max_val(sce->ics.group_len[w], sce->ics.swb_sizes[g], scaled), sce->sf_idx[w*16+g]);
  719. start += sce->ics.swb_sizes[g];
  720. }
  721. }
  722. its++;
  723. } while (fflag && its < 10);
  724. }
  725. static void search_for_quantizers_faac(AVCodecContext *avctx, AACEncContext *s,
  726. SingleChannelElement *sce,
  727. const float lambda)
  728. {
  729. int start = 0, i, w, w2, g;
  730. float uplim[128], maxq[128];
  731. int minq, maxsf;
  732. float distfact = ((sce->ics.num_windows > 1) ? 85.80 : 147.84) / lambda;
  733. int last = 0, lastband = 0, curband = 0;
  734. float avg_energy = 0.0;
  735. if (sce->ics.num_windows == 1) {
  736. start = 0;
  737. for (i = 0; i < 1024; i++) {
  738. if (i - start >= sce->ics.swb_sizes[curband]) {
  739. start += sce->ics.swb_sizes[curband];
  740. curband++;
  741. }
  742. if (sce->coeffs[i]) {
  743. avg_energy += sce->coeffs[i] * sce->coeffs[i];
  744. last = i;
  745. lastband = curband;
  746. }
  747. }
  748. } else {
  749. for (w = 0; w < 8; w++) {
  750. const float *coeffs = sce->coeffs + w*128;
  751. start = 0;
  752. for (i = 0; i < 128; i++) {
  753. if (i - start >= sce->ics.swb_sizes[curband]) {
  754. start += sce->ics.swb_sizes[curband];
  755. curband++;
  756. }
  757. if (coeffs[i]) {
  758. avg_energy += coeffs[i] * coeffs[i];
  759. last = FFMAX(last, i);
  760. lastband = FFMAX(lastband, curband);
  761. }
  762. }
  763. }
  764. }
  765. last++;
  766. avg_energy /= last;
  767. if (avg_energy == 0.0f) {
  768. for (i = 0; i < FF_ARRAY_ELEMS(sce->sf_idx); i++)
  769. sce->sf_idx[i] = SCALE_ONE_POS;
  770. return;
  771. }
  772. for (w = 0; w < sce->ics.num_windows; w += sce->ics.group_len[w]) {
  773. start = w*128;
  774. for (g = 0; g < sce->ics.num_swb; g++) {
  775. float *coefs = sce->coeffs + start;
  776. const int size = sce->ics.swb_sizes[g];
  777. int start2 = start, end2 = start + size, peakpos = start;
  778. float maxval = -1, thr = 0.0f, t;
  779. maxq[w*16+g] = 0.0f;
  780. if (g > lastband) {
  781. maxq[w*16+g] = 0.0f;
  782. start += size;
  783. for (w2 = 0; w2 < sce->ics.group_len[w]; w2++)
  784. memset(coefs + w2*128, 0, sizeof(coefs[0])*size);
  785. continue;
  786. }
  787. for (w2 = 0; w2 < sce->ics.group_len[w]; w2++) {
  788. for (i = 0; i < size; i++) {
  789. float t = coefs[w2*128+i]*coefs[w2*128+i];
  790. maxq[w*16+g] = FFMAX(maxq[w*16+g], fabsf(coefs[w2*128 + i]));
  791. thr += t;
  792. if (sce->ics.num_windows == 1 && maxval < t) {
  793. maxval = t;
  794. peakpos = start+i;
  795. }
  796. }
  797. }
  798. if (sce->ics.num_windows == 1) {
  799. start2 = FFMAX(peakpos - 2, start2);
  800. end2 = FFMIN(peakpos + 3, end2);
  801. } else {
  802. start2 -= start;
  803. end2 -= start;
  804. }
  805. start += size;
  806. thr = pow(thr / (avg_energy * (end2 - start2)), 0.3 + 0.1*(lastband - g) / lastband);
  807. t = 1.0 - (1.0 * start2 / last);
  808. uplim[w*16+g] = distfact / (1.4 * thr + t*t*t + 0.075);
  809. }
  810. }
  811. memset(sce->sf_idx, 0, sizeof(sce->sf_idx));
  812. abs_pow34_v(s->scoefs, sce->coeffs, 1024);
  813. for (w = 0; w < sce->ics.num_windows; w += sce->ics.group_len[w]) {
  814. start = w*128;
  815. for (g = 0; g < sce->ics.num_swb; g++) {
  816. const float *coefs = sce->coeffs + start;
  817. const float *scaled = s->scoefs + start;
  818. const int size = sce->ics.swb_sizes[g];
  819. int scf, prev_scf, step;
  820. int min_scf = -1, max_scf = 256;
  821. float curdiff;
  822. if (maxq[w*16+g] < 21.544) {
  823. sce->zeroes[w*16+g] = 1;
  824. start += size;
  825. continue;
  826. }
  827. sce->zeroes[w*16+g] = 0;
  828. scf = prev_scf = av_clip(SCALE_ONE_POS - SCALE_DIV_512 - log2(1/maxq[w*16+g])*16/3, 60, 218);
  829. step = 16;
  830. for (;;) {
  831. float dist = 0.0f;
  832. int quant_max;
  833. for (w2 = 0; w2 < sce->ics.group_len[w]; w2++) {
  834. int b;
  835. dist += quantize_band_cost(s, coefs + w2*128,
  836. scaled + w2*128,
  837. sce->ics.swb_sizes[g],
  838. scf,
  839. ESC_BT,
  840. lambda,
  841. INFINITY,
  842. &b);
  843. dist -= b;
  844. }
  845. dist *= 1.0f / 512.0f / lambda;
  846. quant_max = quant(maxq[w*16+g], ff_aac_pow2sf_tab[200 - scf + SCALE_ONE_POS - SCALE_DIV_512]);
  847. if (quant_max >= 8191) { // too much, return to the previous quantizer
  848. sce->sf_idx[w*16+g] = prev_scf;
  849. break;
  850. }
  851. prev_scf = scf;
  852. curdiff = fabsf(dist - uplim[w*16+g]);
  853. if (curdiff <= 1.0f)
  854. step = 0;
  855. else
  856. step = log2(curdiff);
  857. if (dist > uplim[w*16+g])
  858. step = -step;
  859. scf += step;
  860. scf = av_clip_uint8(scf);
  861. step = scf - prev_scf;
  862. if (FFABS(step) <= 1 || (step > 0 && scf >= max_scf) || (step < 0 && scf <= min_scf)) {
  863. sce->sf_idx[w*16+g] = av_clip(scf, min_scf, max_scf);
  864. break;
  865. }
  866. if (step > 0)
  867. min_scf = prev_scf;
  868. else
  869. max_scf = prev_scf;
  870. }
  871. start += size;
  872. }
  873. }
  874. minq = sce->sf_idx[0] ? sce->sf_idx[0] : INT_MAX;
  875. for (i = 1; i < 128; i++) {
  876. if (!sce->sf_idx[i])
  877. sce->sf_idx[i] = sce->sf_idx[i-1];
  878. else
  879. minq = FFMIN(minq, sce->sf_idx[i]);
  880. }
  881. if (minq == INT_MAX)
  882. minq = 0;
  883. minq = FFMIN(minq, SCALE_MAX_POS);
  884. maxsf = FFMIN(minq + SCALE_MAX_DIFF, SCALE_MAX_POS);
  885. for (i = 126; i >= 0; i--) {
  886. if (!sce->sf_idx[i])
  887. sce->sf_idx[i] = sce->sf_idx[i+1];
  888. sce->sf_idx[i] = av_clip(sce->sf_idx[i], minq, maxsf);
  889. }
  890. }
  891. static void search_for_quantizers_fast(AVCodecContext *avctx, AACEncContext *s,
  892. SingleChannelElement *sce,
  893. const float lambda)
  894. {
  895. int start = 0, i, w, w2, g;
  896. int minq = 255;
  897. memset(sce->sf_idx, 0, sizeof(sce->sf_idx));
  898. for (w = 0; w < sce->ics.num_windows; w += sce->ics.group_len[w]) {
  899. start = w*128;
  900. for (g = 0; g < sce->ics.num_swb; g++) {
  901. for (w2 = 0; w2 < sce->ics.group_len[w]; w2++) {
  902. FFPsyBand *band = &s->psy.psy_bands[s->cur_channel*PSY_MAX_BANDS+(w+w2)*16+g];
  903. if (band->energy <= band->threshold) {
  904. sce->sf_idx[(w+w2)*16+g] = 218;
  905. sce->zeroes[(w+w2)*16+g] = 1;
  906. } else {
  907. sce->sf_idx[(w+w2)*16+g] = av_clip(SCALE_ONE_POS - SCALE_DIV_512 + log2(band->threshold), 80, 218);
  908. sce->zeroes[(w+w2)*16+g] = 0;
  909. }
  910. minq = FFMIN(minq, sce->sf_idx[(w+w2)*16+g]);
  911. }
  912. }
  913. }
  914. for (i = 0; i < 128; i++) {
  915. sce->sf_idx[i] = 140;
  916. //av_clip(sce->sf_idx[i], minq, minq + SCALE_MAX_DIFF - 1);
  917. }
  918. //set the same quantizers inside window groups
  919. for (w = 0; w < sce->ics.num_windows; w += sce->ics.group_len[w])
  920. for (g = 0; g < sce->ics.num_swb; g++)
  921. for (w2 = 1; w2 < sce->ics.group_len[w]; w2++)
  922. sce->sf_idx[(w+w2)*16+g] = sce->sf_idx[w*16+g];
  923. }
  924. static void search_for_ms(AACEncContext *s, ChannelElement *cpe,
  925. const float lambda)
  926. {
  927. int start = 0, i, w, w2, g;
  928. float M[128], S[128];
  929. float *L34 = s->scoefs, *R34 = s->scoefs + 128, *M34 = s->scoefs + 128*2, *S34 = s->scoefs + 128*3;
  930. SingleChannelElement *sce0 = &cpe->ch[0];
  931. SingleChannelElement *sce1 = &cpe->ch[1];
  932. if (!cpe->common_window)
  933. return;
  934. for (w = 0; w < sce0->ics.num_windows; w += sce0->ics.group_len[w]) {
  935. for (g = 0; g < sce0->ics.num_swb; g++) {
  936. if (!cpe->ch[0].zeroes[w*16+g] && !cpe->ch[1].zeroes[w*16+g]) {
  937. float dist1 = 0.0f, dist2 = 0.0f;
  938. for (w2 = 0; w2 < sce0->ics.group_len[w]; w2++) {
  939. FFPsyBand *band0 = &s->psy.psy_bands[(s->cur_channel+0)*PSY_MAX_BANDS+(w+w2)*16+g];
  940. FFPsyBand *band1 = &s->psy.psy_bands[(s->cur_channel+1)*PSY_MAX_BANDS+(w+w2)*16+g];
  941. float minthr = FFMIN(band0->threshold, band1->threshold);
  942. float maxthr = FFMAX(band0->threshold, band1->threshold);
  943. for (i = 0; i < sce0->ics.swb_sizes[g]; i++) {
  944. M[i] = (sce0->coeffs[start+w2*128+i]
  945. + sce1->coeffs[start+w2*128+i]) * 0.5;
  946. S[i] = sce0->coeffs[start+w2*128+i]
  947. - sce1->coeffs[start+w2*128+i];
  948. }
  949. abs_pow34_v(L34, sce0->coeffs+start+w2*128, sce0->ics.swb_sizes[g]);
  950. abs_pow34_v(R34, sce1->coeffs+start+w2*128, sce0->ics.swb_sizes[g]);
  951. abs_pow34_v(M34, M, sce0->ics.swb_sizes[g]);
  952. abs_pow34_v(S34, S, sce0->ics.swb_sizes[g]);
  953. dist1 += quantize_band_cost(s, sce0->coeffs + start + w2*128,
  954. L34,
  955. sce0->ics.swb_sizes[g],
  956. sce0->sf_idx[(w+w2)*16+g],
  957. sce0->band_type[(w+w2)*16+g],
  958. lambda / band0->threshold, INFINITY, NULL);
  959. dist1 += quantize_band_cost(s, sce1->coeffs + start + w2*128,
  960. R34,
  961. sce1->ics.swb_sizes[g],
  962. sce1->sf_idx[(w+w2)*16+g],
  963. sce1->band_type[(w+w2)*16+g],
  964. lambda / band1->threshold, INFINITY, NULL);
  965. dist2 += quantize_band_cost(s, M,
  966. M34,
  967. sce0->ics.swb_sizes[g],
  968. sce0->sf_idx[(w+w2)*16+g],
  969. sce0->band_type[(w+w2)*16+g],
  970. lambda / maxthr, INFINITY, NULL);
  971. dist2 += quantize_band_cost(s, S,
  972. S34,
  973. sce1->ics.swb_sizes[g],
  974. sce1->sf_idx[(w+w2)*16+g],
  975. sce1->band_type[(w+w2)*16+g],
  976. lambda / minthr, INFINITY, NULL);
  977. }
  978. cpe->ms_mask[w*16+g] = dist2 < dist1;
  979. }
  980. start += sce0->ics.swb_sizes[g];
  981. }
  982. }
  983. }
  984. AACCoefficientsEncoder ff_aac_coders[] = {
  985. {
  986. search_for_quantizers_faac,
  987. encode_window_bands_info,
  988. quantize_and_encode_band,
  989. search_for_ms,
  990. },
  991. {
  992. search_for_quantizers_anmr,
  993. encode_window_bands_info,
  994. quantize_and_encode_band,
  995. search_for_ms,
  996. },
  997. {
  998. search_for_quantizers_twoloop,
  999. codebook_trellis_rate,
  1000. quantize_and_encode_band,
  1001. search_for_ms,
  1002. },
  1003. {
  1004. search_for_quantizers_fast,
  1005. encode_window_bands_info,
  1006. quantize_and_encode_band,
  1007. search_for_ms,
  1008. },
  1009. };