You can not select more than 25 topics Topics must start with a letter or number, can include dashes ('-') and can be up to 35 characters long.

1053 lines
40KB

  1. /*
  2. * AAC coefficients encoder
  3. * Copyright (C) 2008-2009 Konstantin Shishkov
  4. *
  5. * This file is part of FFmpeg.
  6. *
  7. * FFmpeg is free software; you can redistribute it and/or
  8. * modify it under the terms of the GNU Lesser General Public
  9. * License as published by the Free Software Foundation; either
  10. * version 2.1 of the License, or (at your option) any later version.
  11. *
  12. * FFmpeg is distributed in the hope that it will be useful,
  13. * but WITHOUT ANY WARRANTY; without even the implied warranty of
  14. * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
  15. * Lesser General Public License for more details.
  16. *
  17. * You should have received a copy of the GNU Lesser General Public
  18. * License along with FFmpeg; if not, write to the Free Software
  19. * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
  20. */
  21. /**
  22. * @file
  23. * AAC coefficients encoder
  24. */
  25. /***********************************
  26. * TODOs:
  27. * speedup quantizer selection
  28. * add sane pulse detection
  29. ***********************************/
  30. #include <float.h>
  31. #include "avcodec.h"
  32. #include "put_bits.h"
  33. #include "aac.h"
  34. #include "aacenc.h"
  35. #include "aactab.h"
  36. /** bits needed to code codebook run value for long windows */
  37. static const uint8_t run_value_bits_long[64] = {
  38. 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5,
  39. 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 10,
  40. 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10,
  41. 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 15
  42. };
  43. /** bits needed to code codebook run value for short windows */
  44. static const uint8_t run_value_bits_short[16] = {
  45. 3, 3, 3, 3, 3, 3, 3, 6, 6, 6, 6, 6, 6, 6, 6, 9
  46. };
  47. static const uint8_t *run_value_bits[2] = {
  48. run_value_bits_long, run_value_bits_short
  49. };
  50. /**
  51. * Quantize one coefficient.
  52. * @return absolute value of the quantized coefficient
  53. * @see 3GPP TS26.403 5.6.2 "Scalefactor determination"
  54. */
  55. static av_always_inline int quant(float coef, const float Q)
  56. {
  57. float a = coef * Q;
  58. return sqrtf(a * sqrtf(a)) + 0.4054;
  59. }
  60. static void quantize_bands(int *out, const float *in, const float *scaled,
  61. int size, float Q34, int is_signed, int maxval)
  62. {
  63. int i;
  64. double qc;
  65. for (i = 0; i < size; i++) {
  66. qc = scaled[i] * Q34;
  67. out[i] = (int)FFMIN(qc + 0.4054, (double)maxval);
  68. if (is_signed && in[i] < 0.0f) {
  69. out[i] = -out[i];
  70. }
  71. }
  72. }
  73. static void abs_pow34_v(float *out, const float *in, const int size)
  74. {
  75. #ifndef USE_REALLY_FULL_SEARCH
  76. int i;
  77. for (i = 0; i < size; i++) {
  78. float a = fabsf(in[i]);
  79. out[i] = sqrtf(a * sqrtf(a));
  80. }
  81. #endif /* USE_REALLY_FULL_SEARCH */
  82. }
  83. static const uint8_t aac_cb_range [12] = {0, 3, 3, 3, 3, 9, 9, 8, 8, 13, 13, 17};
  84. static const uint8_t aac_cb_maxval[12] = {0, 1, 1, 2, 2, 4, 4, 7, 7, 12, 12, 16};
  85. /**
  86. * Calculate rate distortion cost for quantizing with given codebook
  87. *
  88. * @return quantization distortion
  89. */
  90. static float quantize_and_encode_band_cost(struct AACEncContext *s,
  91. PutBitContext *pb, const float *in,
  92. const float *scaled, int size, int scale_idx,
  93. int cb, const float lambda, const float uplim,
  94. int *bits)
  95. {
  96. const float IQ = ff_aac_pow2sf_tab[200 + scale_idx - SCALE_ONE_POS + SCALE_DIV_512];
  97. const float Q = ff_aac_pow2sf_tab[200 - scale_idx + SCALE_ONE_POS - SCALE_DIV_512];
  98. const float CLIPPED_ESCAPE = 165140.0f*IQ;
  99. int i, j, k;
  100. float cost = 0;
  101. const int dim = cb < FIRST_PAIR_BT ? 4 : 2;
  102. int resbits = 0;
  103. const float Q34 = sqrtf(Q * sqrtf(Q));
  104. const int range = aac_cb_range[cb];
  105. const int maxval = aac_cb_maxval[cb];
  106. int off;
  107. if (!cb) {
  108. for (i = 0; i < size; i++)
  109. cost += in[i]*in[i];
  110. if (bits)
  111. *bits = 0;
  112. return cost * lambda;
  113. }
  114. if (!scaled) {
  115. abs_pow34_v(s->scoefs, in, size);
  116. scaled = s->scoefs;
  117. }
  118. quantize_bands(s->qcoefs, in, scaled, size, Q34, !IS_CODEBOOK_UNSIGNED(cb), maxval);
  119. if (IS_CODEBOOK_UNSIGNED(cb)) {
  120. off = 0;
  121. } else {
  122. off = maxval;
  123. }
  124. for (i = 0; i < size; i += dim) {
  125. const float *vec;
  126. int *quants = s->qcoefs + i;
  127. int curidx = 0;
  128. int curbits;
  129. float rd = 0.0f;
  130. for (j = 0; j < dim; j++) {
  131. curidx *= range;
  132. curidx += quants[j] + off;
  133. }
  134. curbits = ff_aac_spectral_bits[cb-1][curidx];
  135. vec = &ff_aac_codebook_vectors[cb-1][curidx*dim];
  136. if (IS_CODEBOOK_UNSIGNED(cb)) {
  137. for (k = 0; k < dim; k++) {
  138. float t = fabsf(in[i+k]);
  139. float di;
  140. if (vec[k] == 64.0f) { //FIXME: slow
  141. if (t >= CLIPPED_ESCAPE) {
  142. di = t - CLIPPED_ESCAPE;
  143. curbits += 21;
  144. } else {
  145. int c = av_clip(quant(t, Q), 0, 8191);
  146. di = t - c*cbrtf(c)*IQ;
  147. curbits += av_log2(c)*2 - 4 + 1;
  148. }
  149. } else {
  150. di = t - vec[k]*IQ;
  151. }
  152. if (vec[k] != 0.0f)
  153. curbits++;
  154. rd += di*di;
  155. }
  156. } else {
  157. for (k = 0; k < dim; k++) {
  158. float di = in[i+k] - vec[k]*IQ;
  159. rd += di*di;
  160. }
  161. }
  162. cost += rd * lambda + curbits;
  163. resbits += curbits;
  164. if (cost >= uplim)
  165. return uplim;
  166. if (pb) {
  167. put_bits(pb, ff_aac_spectral_bits[cb-1][curidx], ff_aac_spectral_codes[cb-1][curidx]);
  168. if (IS_CODEBOOK_UNSIGNED(cb))
  169. for (j = 0; j < dim; j++)
  170. if (ff_aac_codebook_vectors[cb-1][curidx*dim+j] != 0.0f)
  171. put_bits(pb, 1, in[i+j] < 0.0f);
  172. if (cb == ESC_BT) {
  173. for (j = 0; j < 2; j++) {
  174. if (ff_aac_codebook_vectors[cb-1][curidx*2+j] == 64.0f) {
  175. int coef = av_clip(quant(fabsf(in[i+j]), Q), 0, 8191);
  176. int len = av_log2(coef);
  177. put_bits(pb, len - 4 + 1, (1 << (len - 4 + 1)) - 2);
  178. put_bits(pb, len, coef & ((1 << len) - 1));
  179. }
  180. }
  181. }
  182. }
  183. }
  184. if (bits)
  185. *bits = resbits;
  186. return cost;
  187. }
  188. static float quantize_band_cost(struct AACEncContext *s, const float *in,
  189. const float *scaled, int size, int scale_idx,
  190. int cb, const float lambda, const float uplim,
  191. int *bits)
  192. {
  193. return quantize_and_encode_band_cost(s, NULL, in, scaled, size, scale_idx,
  194. cb, lambda, uplim, bits);
  195. }
  196. static void quantize_and_encode_band(struct AACEncContext *s, PutBitContext *pb,
  197. const float *in, int size, int scale_idx,
  198. int cb, const float lambda)
  199. {
  200. quantize_and_encode_band_cost(s, pb, in, NULL, size, scale_idx, cb, lambda,
  201. INFINITY, NULL);
  202. }
  203. static float find_max_val(int group_len, int swb_size, const float *scaled) {
  204. float maxval = 0.0f;
  205. int w2, i;
  206. for (w2 = 0; w2 < group_len; w2++) {
  207. for (i = 0; i < swb_size; i++) {
  208. maxval = FFMAX(maxval, scaled[w2*128+i]);
  209. }
  210. }
  211. return maxval;
  212. }
  213. static int find_min_book(float maxval, int sf) {
  214. float Q = ff_aac_pow2sf_tab[200 - sf + SCALE_ONE_POS - SCALE_DIV_512];
  215. float Q34 = sqrtf(Q * sqrtf(Q));
  216. int qmaxval, cb;
  217. qmaxval = maxval * Q34 + 0.4054f;
  218. if (qmaxval == 0) cb = 0;
  219. else if (qmaxval == 1) cb = 1;
  220. else if (qmaxval == 2) cb = 3;
  221. else if (qmaxval <= 4) cb = 5;
  222. else if (qmaxval <= 7) cb = 7;
  223. else if (qmaxval <= 12) cb = 9;
  224. else cb = 11;
  225. return cb;
  226. }
  227. /**
  228. * structure used in optimal codebook search
  229. */
  230. typedef struct BandCodingPath {
  231. int prev_idx; ///< pointer to the previous path point
  232. float cost; ///< path cost
  233. int run;
  234. } BandCodingPath;
  235. /**
  236. * Encode band info for single window group bands.
  237. */
  238. static void encode_window_bands_info(AACEncContext *s, SingleChannelElement *sce,
  239. int win, int group_len, const float lambda)
  240. {
  241. BandCodingPath path[120][12];
  242. int w, swb, cb, start, start2, size;
  243. int i, j;
  244. const int max_sfb = sce->ics.max_sfb;
  245. const int run_bits = sce->ics.num_windows == 1 ? 5 : 3;
  246. const int run_esc = (1 << run_bits) - 1;
  247. int idx, ppos, count;
  248. int stackrun[120], stackcb[120], stack_len;
  249. float next_minrd = INFINITY;
  250. int next_mincb = 0;
  251. abs_pow34_v(s->scoefs, sce->coeffs, 1024);
  252. start = win*128;
  253. for (cb = 0; cb < 12; cb++) {
  254. path[0][cb].cost = 0.0f;
  255. path[0][cb].prev_idx = -1;
  256. path[0][cb].run = 0;
  257. }
  258. for (swb = 0; swb < max_sfb; swb++) {
  259. start2 = start;
  260. size = sce->ics.swb_sizes[swb];
  261. if (sce->zeroes[win*16 + swb]) {
  262. for (cb = 0; cb < 12; cb++) {
  263. path[swb+1][cb].prev_idx = cb;
  264. path[swb+1][cb].cost = path[swb][cb].cost;
  265. path[swb+1][cb].run = path[swb][cb].run + 1;
  266. }
  267. } else {
  268. float minrd = next_minrd;
  269. int mincb = next_mincb;
  270. next_minrd = INFINITY;
  271. next_mincb = 0;
  272. for (cb = 0; cb < 12; cb++) {
  273. float cost_stay_here, cost_get_here;
  274. float rd = 0.0f;
  275. for (w = 0; w < group_len; w++) {
  276. FFPsyBand *band = &s->psy.psy_bands[s->cur_channel*PSY_MAX_BANDS+(win+w)*16+swb];
  277. rd += quantize_band_cost(s, sce->coeffs + start + w*128,
  278. s->scoefs + start + w*128, size,
  279. sce->sf_idx[(win+w)*16+swb], cb,
  280. lambda / band->threshold, INFINITY, NULL);
  281. }
  282. cost_stay_here = path[swb][cb].cost + rd;
  283. cost_get_here = minrd + rd + run_bits + 4;
  284. if ( run_value_bits[sce->ics.num_windows == 8][path[swb][cb].run]
  285. != run_value_bits[sce->ics.num_windows == 8][path[swb][cb].run+1])
  286. cost_stay_here += run_bits;
  287. if (cost_get_here < cost_stay_here) {
  288. path[swb+1][cb].prev_idx = mincb;
  289. path[swb+1][cb].cost = cost_get_here;
  290. path[swb+1][cb].run = 1;
  291. } else {
  292. path[swb+1][cb].prev_idx = cb;
  293. path[swb+1][cb].cost = cost_stay_here;
  294. path[swb+1][cb].run = path[swb][cb].run + 1;
  295. }
  296. if (path[swb+1][cb].cost < next_minrd) {
  297. next_minrd = path[swb+1][cb].cost;
  298. next_mincb = cb;
  299. }
  300. }
  301. }
  302. start += sce->ics.swb_sizes[swb];
  303. }
  304. //convert resulting path from backward-linked list
  305. stack_len = 0;
  306. idx = 0;
  307. for (cb = 1; cb < 12; cb++)
  308. if (path[max_sfb][cb].cost < path[max_sfb][idx].cost)
  309. idx = cb;
  310. ppos = max_sfb;
  311. while (ppos > 0) {
  312. cb = idx;
  313. stackrun[stack_len] = path[ppos][cb].run;
  314. stackcb [stack_len] = cb;
  315. idx = path[ppos-path[ppos][cb].run+1][cb].prev_idx;
  316. ppos -= path[ppos][cb].run;
  317. stack_len++;
  318. }
  319. //perform actual band info encoding
  320. start = 0;
  321. for (i = stack_len - 1; i >= 0; i--) {
  322. put_bits(&s->pb, 4, stackcb[i]);
  323. count = stackrun[i];
  324. memset(sce->zeroes + win*16 + start, !stackcb[i], count);
  325. //XXX: memset when band_type is also uint8_t
  326. for (j = 0; j < count; j++) {
  327. sce->band_type[win*16 + start] = stackcb[i];
  328. start++;
  329. }
  330. while (count >= run_esc) {
  331. put_bits(&s->pb, run_bits, run_esc);
  332. count -= run_esc;
  333. }
  334. put_bits(&s->pb, run_bits, count);
  335. }
  336. }
  337. static void codebook_trellis_rate(AACEncContext *s, SingleChannelElement *sce,
  338. int win, int group_len, const float lambda)
  339. {
  340. BandCodingPath path[120][12];
  341. int w, swb, cb, start, start2, size;
  342. int i, j;
  343. const int max_sfb = sce->ics.max_sfb;
  344. const int run_bits = sce->ics.num_windows == 1 ? 5 : 3;
  345. const int run_esc = (1 << run_bits) - 1;
  346. int idx, ppos, count;
  347. int stackrun[120], stackcb[120], stack_len;
  348. float next_minrd = INFINITY;
  349. int next_mincb = 0;
  350. abs_pow34_v(s->scoefs, sce->coeffs, 1024);
  351. start = win*128;
  352. for (cb = 0; cb < 12; cb++) {
  353. path[0][cb].cost = run_bits+4;
  354. path[0][cb].prev_idx = -1;
  355. path[0][cb].run = 0;
  356. }
  357. for (swb = 0; swb < max_sfb; swb++) {
  358. start2 = start;
  359. size = sce->ics.swb_sizes[swb];
  360. if (sce->zeroes[win*16 + swb]) {
  361. for (cb = 0; cb < 12; cb++) {
  362. path[swb+1][cb].prev_idx = cb;
  363. path[swb+1][cb].cost = path[swb][cb].cost;
  364. path[swb+1][cb].run = path[swb][cb].run + 1;
  365. }
  366. } else {
  367. float minrd = next_minrd;
  368. int mincb = next_mincb;
  369. int startcb = sce->band_type[win*16+swb];
  370. next_minrd = INFINITY;
  371. next_mincb = 0;
  372. for (cb = 0; cb < startcb; cb++) {
  373. path[swb+1][cb].cost = 61450;
  374. path[swb+1][cb].prev_idx = -1;
  375. path[swb+1][cb].run = 0;
  376. }
  377. for (cb = startcb; cb < 12; cb++) {
  378. float cost_stay_here, cost_get_here;
  379. float rd = 0.0f;
  380. for (w = 0; w < group_len; w++) {
  381. rd += quantize_band_cost(s, sce->coeffs + start + w*128,
  382. s->scoefs + start + w*128, size,
  383. sce->sf_idx[(win+w)*16+swb], cb,
  384. 0, INFINITY, NULL);
  385. }
  386. cost_stay_here = path[swb][cb].cost + rd;
  387. cost_get_here = minrd + rd + run_bits + 4;
  388. if ( run_value_bits[sce->ics.num_windows == 8][path[swb][cb].run]
  389. != run_value_bits[sce->ics.num_windows == 8][path[swb][cb].run+1])
  390. cost_stay_here += run_bits;
  391. if (cost_get_here < cost_stay_here) {
  392. path[swb+1][cb].prev_idx = mincb;
  393. path[swb+1][cb].cost = cost_get_here;
  394. path[swb+1][cb].run = 1;
  395. } else {
  396. path[swb+1][cb].prev_idx = cb;
  397. path[swb+1][cb].cost = cost_stay_here;
  398. path[swb+1][cb].run = path[swb][cb].run + 1;
  399. }
  400. if (path[swb+1][cb].cost < next_minrd) {
  401. next_minrd = path[swb+1][cb].cost;
  402. next_mincb = cb;
  403. }
  404. }
  405. }
  406. start += sce->ics.swb_sizes[swb];
  407. }
  408. //convert resulting path from backward-linked list
  409. stack_len = 0;
  410. idx = 0;
  411. for (cb = 1; cb < 12; cb++)
  412. if (path[max_sfb][cb].cost < path[max_sfb][idx].cost)
  413. idx = cb;
  414. ppos = max_sfb;
  415. while (ppos > 0) {
  416. if (idx < 0) abort();
  417. cb = idx;
  418. stackrun[stack_len] = path[ppos][cb].run;
  419. stackcb [stack_len] = cb;
  420. idx = path[ppos-path[ppos][cb].run+1][cb].prev_idx;
  421. ppos -= path[ppos][cb].run;
  422. stack_len++;
  423. }
  424. //perform actual band info encoding
  425. start = 0;
  426. for (i = stack_len - 1; i >= 0; i--) {
  427. put_bits(&s->pb, 4, stackcb[i]);
  428. count = stackrun[i];
  429. memset(sce->zeroes + win*16 + start, !stackcb[i], count);
  430. //XXX: memset when band_type is also uint8_t
  431. for (j = 0; j < count; j++) {
  432. sce->band_type[win*16 + start] = stackcb[i];
  433. start++;
  434. }
  435. while (count >= run_esc) {
  436. put_bits(&s->pb, run_bits, run_esc);
  437. count -= run_esc;
  438. }
  439. put_bits(&s->pb, run_bits, count);
  440. }
  441. }
  442. typedef struct TrellisPath {
  443. float cost;
  444. int prev;
  445. } TrellisPath;
  446. #define TRELLIS_STAGES 121
  447. #define TRELLIS_STATES (SCALE_MAX_DIFF+1)
  448. static void search_for_quantizers_anmr(AVCodecContext *avctx, AACEncContext *s,
  449. SingleChannelElement *sce,
  450. const float lambda)
  451. {
  452. int q, w, w2, g, start = 0;
  453. int i, j;
  454. int idx;
  455. TrellisPath paths[TRELLIS_STAGES][TRELLIS_STATES];
  456. int bandaddr[TRELLIS_STAGES];
  457. int minq;
  458. float mincost;
  459. float q0f = FLT_MAX, q1f = 0.0f, qnrgf = 0.0f;
  460. int q0, q1, qcnt = 0;
  461. for (i = 0; i < 1024; i++) {
  462. float t = fabsf(sce->coeffs[i]);
  463. if (t > 0.0f) {
  464. q0f = FFMIN(q0f, t);
  465. q1f = FFMAX(q1f, t);
  466. qnrgf += t*t;
  467. qcnt++;
  468. }
  469. }
  470. if (!qcnt) {
  471. memset(sce->sf_idx, 0, sizeof(sce->sf_idx));
  472. memset(sce->zeroes, 1, sizeof(sce->zeroes));
  473. return;
  474. }
  475. //minimum scalefactor index is when minimum nonzero coefficient after quantizing is not clipped
  476. q0 = av_clip_uint8(log2(q0f)*4 - 69 + SCALE_ONE_POS - SCALE_DIV_512);
  477. //maximum scalefactor index is when maximum coefficient after quantizing is still not zero
  478. q1 = av_clip_uint8(log2(q1f)*4 + 6 + SCALE_ONE_POS - SCALE_DIV_512);
  479. //av_log(NULL, AV_LOG_ERROR, "q0 %d, q1 %d\n", q0, q1);
  480. if (q1 - q0 > 60) {
  481. int q0low = q0;
  482. int q1high = q1;
  483. //minimum scalefactor index is when maximum nonzero coefficient after quantizing is not clipped
  484. int qnrg = av_clip_uint8(log2(sqrt(qnrgf/qcnt))*4 - 31 + SCALE_ONE_POS - SCALE_DIV_512);
  485. q1 = qnrg + 30;
  486. q0 = qnrg - 30;
  487. //av_log(NULL, AV_LOG_ERROR, "q0 %d, q1 %d\n", q0, q1);
  488. if (q0 < q0low) {
  489. q1 += q0low - q0;
  490. q0 = q0low;
  491. } else if (q1 > q1high) {
  492. q0 -= q1 - q1high;
  493. q1 = q1high;
  494. }
  495. }
  496. //av_log(NULL, AV_LOG_ERROR, "q0 %d, q1 %d\n", q0, q1);
  497. for (i = 0; i < TRELLIS_STATES; i++) {
  498. paths[0][i].cost = 0.0f;
  499. paths[0][i].prev = -1;
  500. }
  501. for (j = 1; j < TRELLIS_STAGES; j++) {
  502. for (i = 0; i < TRELLIS_STATES; i++) {
  503. paths[j][i].cost = INFINITY;
  504. paths[j][i].prev = -2;
  505. }
  506. }
  507. idx = 1;
  508. abs_pow34_v(s->scoefs, sce->coeffs, 1024);
  509. for (w = 0; w < sce->ics.num_windows; w += sce->ics.group_len[w]) {
  510. start = w*128;
  511. for (g = 0; g < sce->ics.num_swb; g++) {
  512. const float *coefs = sce->coeffs + start;
  513. float qmin, qmax;
  514. int nz = 0;
  515. bandaddr[idx] = w * 16 + g;
  516. qmin = INT_MAX;
  517. qmax = 0.0f;
  518. for (w2 = 0; w2 < sce->ics.group_len[w]; w2++) {
  519. FFPsyBand *band = &s->psy.psy_bands[s->cur_channel*PSY_MAX_BANDS+(w+w2)*16+g];
  520. if (band->energy <= band->threshold || band->threshold == 0.0f) {
  521. sce->zeroes[(w+w2)*16+g] = 1;
  522. continue;
  523. }
  524. sce->zeroes[(w+w2)*16+g] = 0;
  525. nz = 1;
  526. for (i = 0; i < sce->ics.swb_sizes[g]; i++) {
  527. float t = fabsf(coefs[w2*128+i]);
  528. if (t > 0.0f)
  529. qmin = FFMIN(qmin, t);
  530. qmax = FFMAX(qmax, t);
  531. }
  532. }
  533. if (nz) {
  534. int minscale, maxscale;
  535. float minrd = INFINITY;
  536. float maxval;
  537. //minimum scalefactor index is when minimum nonzero coefficient after quantizing is not clipped
  538. minscale = av_clip_uint8(log2(qmin)*4 - 69 + SCALE_ONE_POS - SCALE_DIV_512);
  539. //maximum scalefactor index is when maximum coefficient after quantizing is still not zero
  540. maxscale = av_clip_uint8(log2(qmax)*4 + 6 + SCALE_ONE_POS - SCALE_DIV_512);
  541. minscale = av_clip(minscale - q0, 0, TRELLIS_STATES - 1);
  542. maxscale = av_clip(maxscale - q0, 0, TRELLIS_STATES);
  543. maxval = find_max_val(sce->ics.group_len[w], sce->ics.swb_sizes[g], s->scoefs+start);
  544. for (q = minscale; q < maxscale; q++) {
  545. float dist = 0;
  546. int cb = find_min_book(maxval, sce->sf_idx[w*16+g]);
  547. for (w2 = 0; w2 < sce->ics.group_len[w]; w2++) {
  548. FFPsyBand *band = &s->psy.psy_bands[s->cur_channel*PSY_MAX_BANDS+(w+w2)*16+g];
  549. dist += quantize_band_cost(s, coefs + w2*128, s->scoefs + start + w2*128, sce->ics.swb_sizes[g],
  550. q + q0, cb, lambda / band->threshold, INFINITY, NULL);
  551. }
  552. minrd = FFMIN(minrd, dist);
  553. for (i = 0; i < q1 - q0; i++) {
  554. float cost;
  555. cost = paths[idx - 1][i].cost + dist
  556. + ff_aac_scalefactor_bits[q - i + SCALE_DIFF_ZERO];
  557. if (cost < paths[idx][q].cost) {
  558. paths[idx][q].cost = cost;
  559. paths[idx][q].prev = i;
  560. }
  561. }
  562. }
  563. } else {
  564. for (q = 0; q < q1 - q0; q++) {
  565. paths[idx][q].cost = paths[idx - 1][q].cost + 1;
  566. paths[idx][q].prev = q;
  567. }
  568. }
  569. sce->zeroes[w*16+g] = !nz;
  570. start += sce->ics.swb_sizes[g];
  571. idx++;
  572. }
  573. }
  574. idx--;
  575. mincost = paths[idx][0].cost;
  576. minq = 0;
  577. for (i = 1; i < TRELLIS_STATES; i++) {
  578. if (paths[idx][i].cost < mincost) {
  579. mincost = paths[idx][i].cost;
  580. minq = i;
  581. }
  582. }
  583. while (idx) {
  584. sce->sf_idx[bandaddr[idx]] = minq + q0;
  585. minq = paths[idx][minq].prev;
  586. idx--;
  587. }
  588. //set the same quantizers inside window groups
  589. for (w = 0; w < sce->ics.num_windows; w += sce->ics.group_len[w])
  590. for (g = 0; g < sce->ics.num_swb; g++)
  591. for (w2 = 1; w2 < sce->ics.group_len[w]; w2++)
  592. sce->sf_idx[(w+w2)*16+g] = sce->sf_idx[w*16+g];
  593. }
  594. /**
  595. * two-loop quantizers search taken from ISO 13818-7 Appendix C
  596. */
  597. static void search_for_quantizers_twoloop(AVCodecContext *avctx,
  598. AACEncContext *s,
  599. SingleChannelElement *sce,
  600. const float lambda)
  601. {
  602. int start = 0, i, w, w2, g;
  603. int destbits = avctx->bit_rate * 1024.0 / avctx->sample_rate / avctx->channels;
  604. float dists[128], uplims[128];
  605. int fflag, minscaler;
  606. int its = 0;
  607. int allz = 0;
  608. float minthr = INFINITY;
  609. //XXX: some heuristic to determine initial quantizers will reduce search time
  610. memset(dists, 0, sizeof(dists));
  611. //determine zero bands and upper limits
  612. for (w = 0; w < sce->ics.num_windows; w += sce->ics.group_len[w]) {
  613. for (g = 0; g < sce->ics.num_swb; g++) {
  614. int nz = 0;
  615. float uplim = 0.0f;
  616. for (w2 = 0; w2 < sce->ics.group_len[w]; w2++) {
  617. FFPsyBand *band = &s->psy.psy_bands[s->cur_channel*PSY_MAX_BANDS+(w+w2)*16+g];
  618. uplim += band->threshold;
  619. if (band->energy <= band->threshold || band->threshold == 0.0f) {
  620. sce->zeroes[(w+w2)*16+g] = 1;
  621. continue;
  622. }
  623. nz = 1;
  624. }
  625. uplims[w*16+g] = uplim *512;
  626. sce->zeroes[w*16+g] = !nz;
  627. if (nz)
  628. minthr = FFMIN(minthr, uplim);
  629. allz = FFMAX(allz, nz);
  630. }
  631. }
  632. for (w = 0; w < sce->ics.num_windows; w += sce->ics.group_len[w]) {
  633. for (g = 0; g < sce->ics.num_swb; g++) {
  634. if (sce->zeroes[w*16+g]) {
  635. sce->sf_idx[w*16+g] = SCALE_ONE_POS;
  636. continue;
  637. }
  638. sce->sf_idx[w*16+g] = SCALE_ONE_POS + FFMIN(log2(uplims[w*16+g]/minthr)*4,59);
  639. }
  640. }
  641. if (!allz)
  642. return;
  643. abs_pow34_v(s->scoefs, sce->coeffs, 1024);
  644. //perform two-loop search
  645. //outer loop - improve quality
  646. do {
  647. int tbits, qstep;
  648. minscaler = sce->sf_idx[0];
  649. //inner loop - quantize spectrum to fit into given number of bits
  650. qstep = its ? 1 : 32;
  651. do {
  652. int prev = -1;
  653. tbits = 0;
  654. fflag = 0;
  655. for (w = 0; w < sce->ics.num_windows; w += sce->ics.group_len[w]) {
  656. start = w*128;
  657. for (g = 0; g < sce->ics.num_swb; g++) {
  658. const float *coefs = sce->coeffs + start;
  659. const float *scaled = s->scoefs + start;
  660. int bits = 0;
  661. int cb;
  662. float dist = 0.0f;
  663. if (sce->zeroes[w*16+g] || sce->sf_idx[w*16+g] >= 218) {
  664. start += sce->ics.swb_sizes[g];
  665. continue;
  666. }
  667. minscaler = FFMIN(minscaler, sce->sf_idx[w*16+g]);
  668. cb = find_min_book(find_max_val(sce->ics.group_len[w], sce->ics.swb_sizes[g], scaled), sce->sf_idx[w*16+g]);
  669. sce->band_type[w*16+g] = cb;
  670. for (w2 = 0; w2 < sce->ics.group_len[w]; w2++) {
  671. int b;
  672. dist += quantize_band_cost(s, coefs + w2*128,
  673. scaled + w2*128,
  674. sce->ics.swb_sizes[g],
  675. sce->sf_idx[w*16+g],
  676. cb,
  677. 1.0f,
  678. INFINITY,
  679. &b);
  680. bits += b;
  681. }
  682. dists[w*16+g] = dist - bits;
  683. if (prev != -1) {
  684. bits += ff_aac_scalefactor_bits[sce->sf_idx[w*16+g] - prev + SCALE_DIFF_ZERO];
  685. }
  686. tbits += bits;
  687. start += sce->ics.swb_sizes[g];
  688. prev = sce->sf_idx[w*16+g];
  689. }
  690. }
  691. if (tbits > destbits) {
  692. for (i = 0; i < 128; i++)
  693. if (sce->sf_idx[i] < 218 - qstep)
  694. sce->sf_idx[i] += qstep;
  695. } else {
  696. for (i = 0; i < 128; i++)
  697. if (sce->sf_idx[i] > 60 - qstep)
  698. sce->sf_idx[i] -= qstep;
  699. }
  700. qstep >>= 1;
  701. if (!qstep && tbits > destbits*1.02)
  702. qstep = 1;
  703. if (sce->sf_idx[0] >= 217)
  704. break;
  705. } while (qstep);
  706. fflag = 0;
  707. minscaler = av_clip(minscaler, 60, 255 - SCALE_MAX_DIFF);
  708. for (w = 0; w < sce->ics.num_windows; w += sce->ics.group_len[w]) {
  709. start = w*128;
  710. for (g = 0; g < sce->ics.num_swb; g++) {
  711. int prevsc = sce->sf_idx[w*16+g];
  712. if (dists[w*16+g] > uplims[w*16+g] && sce->sf_idx[w*16+g] > 60)
  713. sce->sf_idx[w*16+g]--;
  714. sce->sf_idx[w*16+g] = av_clip(sce->sf_idx[w*16+g], minscaler, minscaler + SCALE_MAX_DIFF);
  715. sce->sf_idx[w*16+g] = FFMIN(sce->sf_idx[w*16+g], 219);
  716. if (sce->sf_idx[w*16+g] != prevsc)
  717. fflag = 1;
  718. }
  719. }
  720. its++;
  721. } while (fflag && its < 10);
  722. }
  723. static void search_for_quantizers_faac(AVCodecContext *avctx, AACEncContext *s,
  724. SingleChannelElement *sce,
  725. const float lambda)
  726. {
  727. int start = 0, i, w, w2, g;
  728. float uplim[128], maxq[128];
  729. int minq, maxsf;
  730. float distfact = ((sce->ics.num_windows > 1) ? 85.80 : 147.84) / lambda;
  731. int last = 0, lastband = 0, curband = 0;
  732. float avg_energy = 0.0;
  733. if (sce->ics.num_windows == 1) {
  734. start = 0;
  735. for (i = 0; i < 1024; i++) {
  736. if (i - start >= sce->ics.swb_sizes[curband]) {
  737. start += sce->ics.swb_sizes[curband];
  738. curband++;
  739. }
  740. if (sce->coeffs[i]) {
  741. avg_energy += sce->coeffs[i] * sce->coeffs[i];
  742. last = i;
  743. lastband = curband;
  744. }
  745. }
  746. } else {
  747. for (w = 0; w < 8; w++) {
  748. const float *coeffs = sce->coeffs + w*128;
  749. start = 0;
  750. for (i = 0; i < 128; i++) {
  751. if (i - start >= sce->ics.swb_sizes[curband]) {
  752. start += sce->ics.swb_sizes[curband];
  753. curband++;
  754. }
  755. if (coeffs[i]) {
  756. avg_energy += coeffs[i] * coeffs[i];
  757. last = FFMAX(last, i);
  758. lastband = FFMAX(lastband, curband);
  759. }
  760. }
  761. }
  762. }
  763. last++;
  764. avg_energy /= last;
  765. if (avg_energy == 0.0f) {
  766. for (i = 0; i < FF_ARRAY_ELEMS(sce->sf_idx); i++)
  767. sce->sf_idx[i] = SCALE_ONE_POS;
  768. return;
  769. }
  770. for (w = 0; w < sce->ics.num_windows; w += sce->ics.group_len[w]) {
  771. start = w*128;
  772. for (g = 0; g < sce->ics.num_swb; g++) {
  773. float *coefs = sce->coeffs + start;
  774. const int size = sce->ics.swb_sizes[g];
  775. int start2 = start, end2 = start + size, peakpos = start;
  776. float maxval = -1, thr = 0.0f, t;
  777. maxq[w*16+g] = 0.0f;
  778. if (g > lastband) {
  779. maxq[w*16+g] = 0.0f;
  780. start += size;
  781. for (w2 = 0; w2 < sce->ics.group_len[w]; w2++)
  782. memset(coefs + w2*128, 0, sizeof(coefs[0])*size);
  783. continue;
  784. }
  785. for (w2 = 0; w2 < sce->ics.group_len[w]; w2++) {
  786. for (i = 0; i < size; i++) {
  787. float t = coefs[w2*128+i]*coefs[w2*128+i];
  788. maxq[w*16+g] = FFMAX(maxq[w*16+g], fabsf(coefs[w2*128 + i]));
  789. thr += t;
  790. if (sce->ics.num_windows == 1 && maxval < t) {
  791. maxval = t;
  792. peakpos = start+i;
  793. }
  794. }
  795. }
  796. if (sce->ics.num_windows == 1) {
  797. start2 = FFMAX(peakpos - 2, start2);
  798. end2 = FFMIN(peakpos + 3, end2);
  799. } else {
  800. start2 -= start;
  801. end2 -= start;
  802. }
  803. start += size;
  804. thr = pow(thr / (avg_energy * (end2 - start2)), 0.3 + 0.1*(lastband - g) / lastband);
  805. t = 1.0 - (1.0 * start2 / last);
  806. uplim[w*16+g] = distfact / (1.4 * thr + t*t*t + 0.075);
  807. }
  808. }
  809. memset(sce->sf_idx, 0, sizeof(sce->sf_idx));
  810. abs_pow34_v(s->scoefs, sce->coeffs, 1024);
  811. for (w = 0; w < sce->ics.num_windows; w += sce->ics.group_len[w]) {
  812. start = w*128;
  813. for (g = 0; g < sce->ics.num_swb; g++) {
  814. const float *coefs = sce->coeffs + start;
  815. const float *scaled = s->scoefs + start;
  816. const int size = sce->ics.swb_sizes[g];
  817. int scf, prev_scf, step;
  818. int min_scf = -1, max_scf = 256;
  819. float curdiff;
  820. if (maxq[w*16+g] < 21.544) {
  821. sce->zeroes[w*16+g] = 1;
  822. start += size;
  823. continue;
  824. }
  825. sce->zeroes[w*16+g] = 0;
  826. scf = prev_scf = av_clip(SCALE_ONE_POS - SCALE_DIV_512 - log2(1/maxq[w*16+g])*16/3, 60, 218);
  827. step = 16;
  828. for (;;) {
  829. float dist = 0.0f;
  830. int quant_max;
  831. for (w2 = 0; w2 < sce->ics.group_len[w]; w2++) {
  832. int b;
  833. dist += quantize_band_cost(s, coefs + w2*128,
  834. scaled + w2*128,
  835. sce->ics.swb_sizes[g],
  836. scf,
  837. ESC_BT,
  838. lambda,
  839. INFINITY,
  840. &b);
  841. dist -= b;
  842. }
  843. dist *= 1.0f / 512.0f / lambda;
  844. quant_max = quant(maxq[w*16+g], ff_aac_pow2sf_tab[200 - scf + SCALE_ONE_POS - SCALE_DIV_512]);
  845. if (quant_max >= 8191) { // too much, return to the previous quantizer
  846. sce->sf_idx[w*16+g] = prev_scf;
  847. break;
  848. }
  849. prev_scf = scf;
  850. curdiff = fabsf(dist - uplim[w*16+g]);
  851. if (curdiff <= 1.0f)
  852. step = 0;
  853. else
  854. step = log2(curdiff);
  855. if (dist > uplim[w*16+g])
  856. step = -step;
  857. scf += step;
  858. scf = av_clip_uint8(scf);
  859. step = scf - prev_scf;
  860. if (FFABS(step) <= 1 || (step > 0 && scf >= max_scf) || (step < 0 && scf <= min_scf)) {
  861. sce->sf_idx[w*16+g] = av_clip(scf, min_scf, max_scf);
  862. break;
  863. }
  864. if (step > 0)
  865. min_scf = prev_scf;
  866. else
  867. max_scf = prev_scf;
  868. }
  869. start += size;
  870. }
  871. }
  872. minq = sce->sf_idx[0] ? sce->sf_idx[0] : INT_MAX;
  873. for (i = 1; i < 128; i++) {
  874. if (!sce->sf_idx[i])
  875. sce->sf_idx[i] = sce->sf_idx[i-1];
  876. else
  877. minq = FFMIN(minq, sce->sf_idx[i]);
  878. }
  879. if (minq == INT_MAX)
  880. minq = 0;
  881. minq = FFMIN(minq, SCALE_MAX_POS);
  882. maxsf = FFMIN(minq + SCALE_MAX_DIFF, SCALE_MAX_POS);
  883. for (i = 126; i >= 0; i--) {
  884. if (!sce->sf_idx[i])
  885. sce->sf_idx[i] = sce->sf_idx[i+1];
  886. sce->sf_idx[i] = av_clip(sce->sf_idx[i], minq, maxsf);
  887. }
  888. }
  889. static void search_for_quantizers_fast(AVCodecContext *avctx, AACEncContext *s,
  890. SingleChannelElement *sce,
  891. const float lambda)
  892. {
  893. int start = 0, i, w, w2, g;
  894. int minq = 255;
  895. memset(sce->sf_idx, 0, sizeof(sce->sf_idx));
  896. for (w = 0; w < sce->ics.num_windows; w += sce->ics.group_len[w]) {
  897. start = w*128;
  898. for (g = 0; g < sce->ics.num_swb; g++) {
  899. for (w2 = 0; w2 < sce->ics.group_len[w]; w2++) {
  900. FFPsyBand *band = &s->psy.psy_bands[s->cur_channel*PSY_MAX_BANDS+(w+w2)*16+g];
  901. if (band->energy <= band->threshold) {
  902. sce->sf_idx[(w+w2)*16+g] = 218;
  903. sce->zeroes[(w+w2)*16+g] = 1;
  904. } else {
  905. sce->sf_idx[(w+w2)*16+g] = av_clip(SCALE_ONE_POS - SCALE_DIV_512 + log2(band->threshold), 80, 218);
  906. sce->zeroes[(w+w2)*16+g] = 0;
  907. }
  908. minq = FFMIN(minq, sce->sf_idx[(w+w2)*16+g]);
  909. }
  910. }
  911. }
  912. for (i = 0; i < 128; i++) {
  913. sce->sf_idx[i] = 140;
  914. //av_clip(sce->sf_idx[i], minq, minq + SCALE_MAX_DIFF - 1);
  915. }
  916. //set the same quantizers inside window groups
  917. for (w = 0; w < sce->ics.num_windows; w += sce->ics.group_len[w])
  918. for (g = 0; g < sce->ics.num_swb; g++)
  919. for (w2 = 1; w2 < sce->ics.group_len[w]; w2++)
  920. sce->sf_idx[(w+w2)*16+g] = sce->sf_idx[w*16+g];
  921. }
  922. static void search_for_ms(AACEncContext *s, ChannelElement *cpe,
  923. const float lambda)
  924. {
  925. int start = 0, i, w, w2, g;
  926. float M[128], S[128];
  927. float *L34 = s->scoefs, *R34 = s->scoefs + 128, *M34 = s->scoefs + 128*2, *S34 = s->scoefs + 128*3;
  928. SingleChannelElement *sce0 = &cpe->ch[0];
  929. SingleChannelElement *sce1 = &cpe->ch[1];
  930. if (!cpe->common_window)
  931. return;
  932. for (w = 0; w < sce0->ics.num_windows; w += sce0->ics.group_len[w]) {
  933. for (g = 0; g < sce0->ics.num_swb; g++) {
  934. if (!cpe->ch[0].zeroes[w*16+g] && !cpe->ch[1].zeroes[w*16+g]) {
  935. float dist1 = 0.0f, dist2 = 0.0f;
  936. for (w2 = 0; w2 < sce0->ics.group_len[w]; w2++) {
  937. FFPsyBand *band0 = &s->psy.psy_bands[(s->cur_channel+0)*PSY_MAX_BANDS+(w+w2)*16+g];
  938. FFPsyBand *band1 = &s->psy.psy_bands[(s->cur_channel+1)*PSY_MAX_BANDS+(w+w2)*16+g];
  939. float minthr = FFMIN(band0->threshold, band1->threshold);
  940. float maxthr = FFMAX(band0->threshold, band1->threshold);
  941. for (i = 0; i < sce0->ics.swb_sizes[g]; i++) {
  942. M[i] = (sce0->coeffs[start+w2*128+i]
  943. + sce1->coeffs[start+w2*128+i]) * 0.5;
  944. S[i] = sce0->coeffs[start+w2*128+i]
  945. - sce1->coeffs[start+w2*128+i];
  946. }
  947. abs_pow34_v(L34, sce0->coeffs+start+w2*128, sce0->ics.swb_sizes[g]);
  948. abs_pow34_v(R34, sce1->coeffs+start+w2*128, sce0->ics.swb_sizes[g]);
  949. abs_pow34_v(M34, M, sce0->ics.swb_sizes[g]);
  950. abs_pow34_v(S34, S, sce0->ics.swb_sizes[g]);
  951. dist1 += quantize_band_cost(s, sce0->coeffs + start + w2*128,
  952. L34,
  953. sce0->ics.swb_sizes[g],
  954. sce0->sf_idx[(w+w2)*16+g],
  955. sce0->band_type[(w+w2)*16+g],
  956. lambda / band0->threshold, INFINITY, NULL);
  957. dist1 += quantize_band_cost(s, sce1->coeffs + start + w2*128,
  958. R34,
  959. sce1->ics.swb_sizes[g],
  960. sce1->sf_idx[(w+w2)*16+g],
  961. sce1->band_type[(w+w2)*16+g],
  962. lambda / band1->threshold, INFINITY, NULL);
  963. dist2 += quantize_band_cost(s, M,
  964. M34,
  965. sce0->ics.swb_sizes[g],
  966. sce0->sf_idx[(w+w2)*16+g],
  967. sce0->band_type[(w+w2)*16+g],
  968. lambda / maxthr, INFINITY, NULL);
  969. dist2 += quantize_band_cost(s, S,
  970. S34,
  971. sce1->ics.swb_sizes[g],
  972. sce1->sf_idx[(w+w2)*16+g],
  973. sce1->band_type[(w+w2)*16+g],
  974. lambda / minthr, INFINITY, NULL);
  975. }
  976. cpe->ms_mask[w*16+g] = dist2 < dist1;
  977. }
  978. start += sce0->ics.swb_sizes[g];
  979. }
  980. }
  981. }
  982. AACCoefficientsEncoder ff_aac_coders[] = {
  983. {
  984. search_for_quantizers_faac,
  985. encode_window_bands_info,
  986. quantize_and_encode_band,
  987. search_for_ms,
  988. },
  989. {
  990. search_for_quantizers_anmr,
  991. encode_window_bands_info,
  992. quantize_and_encode_band,
  993. search_for_ms,
  994. },
  995. {
  996. search_for_quantizers_twoloop,
  997. codebook_trellis_rate,
  998. quantize_and_encode_band,
  999. search_for_ms,
  1000. },
  1001. {
  1002. search_for_quantizers_fast,
  1003. encode_window_bands_info,
  1004. quantize_and_encode_band,
  1005. search_for_ms,
  1006. },
  1007. };