You can not select more than 25 topics Topics must start with a letter or number, can include dashes ('-') and can be up to 35 characters long.

1043 lines
40KB

  1. /*
  2. * AAC coefficients encoder
  3. * Copyright (C) 2008-2009 Konstantin Shishkov
  4. *
  5. * This file is part of FFmpeg.
  6. *
  7. * FFmpeg is free software; you can redistribute it and/or
  8. * modify it under the terms of the GNU Lesser General Public
  9. * License as published by the Free Software Foundation; either
  10. * version 2.1 of the License, or (at your option) any later version.
  11. *
  12. * FFmpeg is distributed in the hope that it will be useful,
  13. * but WITHOUT ANY WARRANTY; without even the implied warranty of
  14. * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
  15. * Lesser General Public License for more details.
  16. *
  17. * You should have received a copy of the GNU Lesser General Public
  18. * License along with FFmpeg; if not, write to the Free Software
  19. * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
  20. */
  21. /**
  22. * @file
  23. * AAC coefficients encoder
  24. */
  25. /***********************************
  26. * TODOs:
  27. * speedup quantizer selection
  28. * add sane pulse detection
  29. ***********************************/
  30. #include "avcodec.h"
  31. #include "put_bits.h"
  32. #include "aac.h"
  33. #include "aacenc.h"
  34. #include "aactab.h"
  35. /** bits needed to code codebook run value for long windows */
  36. static const uint8_t run_value_bits_long[64] = {
  37. 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5,
  38. 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 10,
  39. 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10,
  40. 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 15
  41. };
  42. /** bits needed to code codebook run value for short windows */
  43. static const uint8_t run_value_bits_short[16] = {
  44. 3, 3, 3, 3, 3, 3, 3, 6, 6, 6, 6, 6, 6, 6, 6, 9
  45. };
  46. static const uint8_t *run_value_bits[2] = {
  47. run_value_bits_long, run_value_bits_short
  48. };
  49. /**
  50. * Quantize one coefficient.
  51. * @return absolute value of the quantized coefficient
  52. * @see 3GPP TS26.403 5.6.2 "Scalefactor determination"
  53. */
  54. static av_always_inline int quant(float coef, const float Q)
  55. {
  56. float a = coef * Q;
  57. return sqrtf(a * sqrtf(a)) + 0.4054;
  58. }
  59. static void quantize_bands(int *out, const float *in, const float *scaled,
  60. int size, float Q34, int is_signed, int maxval)
  61. {
  62. int i;
  63. double qc;
  64. for (i = 0; i < size; i++) {
  65. qc = scaled[i] * Q34;
  66. out[i] = (int)FFMIN(qc + 0.4054, (double)maxval);
  67. if (is_signed && in[i] < 0.0f) {
  68. out[i] = -out[i];
  69. }
  70. }
  71. }
  72. static void abs_pow34_v(float *out, const float *in, const int size)
  73. {
  74. #ifndef USE_REALLY_FULL_SEARCH
  75. int i;
  76. for (i = 0; i < size; i++) {
  77. float a = fabsf(in[i]);
  78. out[i] = sqrtf(a * sqrtf(a));
  79. }
  80. #endif /* USE_REALLY_FULL_SEARCH */
  81. }
  82. static const uint8_t aac_cb_range [12] = {0, 3, 3, 3, 3, 9, 9, 8, 8, 13, 13, 17};
  83. static const uint8_t aac_cb_maxval[12] = {0, 1, 1, 2, 2, 4, 4, 7, 7, 12, 12, 16};
  84. /**
  85. * Calculate rate distortion cost for quantizing with given codebook
  86. *
  87. * @return quantization distortion
  88. */
  89. static float quantize_and_encode_band_cost(struct AACEncContext *s,
  90. PutBitContext *pb, const float *in,
  91. const float *scaled, int size, int scale_idx,
  92. int cb, const float lambda, const float uplim,
  93. int *bits)
  94. {
  95. const float IQ = ff_aac_pow2sf_tab[200 + scale_idx - SCALE_ONE_POS + SCALE_DIV_512];
  96. const float Q = ff_aac_pow2sf_tab[200 - scale_idx + SCALE_ONE_POS - SCALE_DIV_512];
  97. const float CLIPPED_ESCAPE = 165140.0f*IQ;
  98. int i, j, k;
  99. float cost = 0;
  100. const int dim = cb < FIRST_PAIR_BT ? 4 : 2;
  101. int resbits = 0;
  102. const float Q34 = sqrtf(Q * sqrtf(Q));
  103. const int range = aac_cb_range[cb];
  104. const int maxval = aac_cb_maxval[cb];
  105. int off;
  106. if (!cb) {
  107. for (i = 0; i < size; i++)
  108. cost += in[i]*in[i];
  109. if (bits)
  110. *bits = 0;
  111. return cost * lambda;
  112. }
  113. if (!scaled) {
  114. abs_pow34_v(s->scoefs, in, size);
  115. scaled = s->scoefs;
  116. }
  117. quantize_bands(s->qcoefs, in, scaled, size, Q34, !IS_CODEBOOK_UNSIGNED(cb), maxval);
  118. if (IS_CODEBOOK_UNSIGNED(cb)) {
  119. off = 0;
  120. } else {
  121. off = maxval;
  122. }
  123. for (i = 0; i < size; i += dim) {
  124. const float *vec;
  125. int *quants = s->qcoefs + i;
  126. int curidx = 0;
  127. int curbits;
  128. float rd = 0.0f;
  129. for (j = 0; j < dim; j++) {
  130. curidx *= range;
  131. curidx += quants[j] + off;
  132. }
  133. curbits = ff_aac_spectral_bits[cb-1][curidx];
  134. vec = &ff_aac_codebook_vectors[cb-1][curidx*dim];
  135. if (IS_CODEBOOK_UNSIGNED(cb)) {
  136. for (k = 0; k < dim; k++) {
  137. float t = fabsf(in[i+k]);
  138. float di;
  139. if (vec[k] == 64.0f) { //FIXME: slow
  140. if (t >= CLIPPED_ESCAPE) {
  141. di = t - CLIPPED_ESCAPE;
  142. curbits += 21;
  143. } else {
  144. int c = av_clip(quant(t, Q), 0, 8191);
  145. di = t - c*cbrtf(c)*IQ;
  146. curbits += av_log2(c)*2 - 4 + 1;
  147. }
  148. } else {
  149. di = t - vec[k]*IQ;
  150. }
  151. if (vec[k] != 0.0f)
  152. curbits++;
  153. rd += di*di;
  154. }
  155. } else {
  156. for (k = 0; k < dim; k++) {
  157. float di = in[i+k] - vec[k]*IQ;
  158. rd += di*di;
  159. }
  160. }
  161. cost += rd * lambda + curbits;
  162. resbits += curbits;
  163. if (cost >= uplim)
  164. return uplim;
  165. if (pb) {
  166. put_bits(pb, ff_aac_spectral_bits[cb-1][curidx], ff_aac_spectral_codes[cb-1][curidx]);
  167. if (IS_CODEBOOK_UNSIGNED(cb))
  168. for (j = 0; j < dim; j++)
  169. if (ff_aac_codebook_vectors[cb-1][curidx*dim+j] != 0.0f)
  170. put_bits(pb, 1, in[i+j] < 0.0f);
  171. if (cb == ESC_BT) {
  172. for (j = 0; j < 2; j++) {
  173. if (ff_aac_codebook_vectors[cb-1][curidx*2+j] == 64.0f) {
  174. int coef = av_clip(quant(fabsf(in[i+j]), Q), 0, 8191);
  175. int len = av_log2(coef);
  176. put_bits(pb, len - 4 + 1, (1 << (len - 4 + 1)) - 2);
  177. put_bits(pb, len, coef & ((1 << len) - 1));
  178. }
  179. }
  180. }
  181. }
  182. }
  183. if (bits)
  184. *bits = resbits;
  185. return cost;
  186. }
  187. static float quantize_band_cost(struct AACEncContext *s, const float *in,
  188. const float *scaled, int size, int scale_idx,
  189. int cb, const float lambda, const float uplim,
  190. int *bits)
  191. {
  192. return quantize_and_encode_band_cost(s, NULL, in, scaled, size, scale_idx,
  193. cb, lambda, uplim, bits);
  194. }
  195. static void quantize_and_encode_band(struct AACEncContext *s, PutBitContext *pb,
  196. const float *in, int size, int scale_idx,
  197. int cb, const float lambda)
  198. {
  199. quantize_and_encode_band_cost(s, pb, in, NULL, size, scale_idx, cb, lambda,
  200. INFINITY, NULL);
  201. }
  202. /**
  203. * structure used in optimal codebook search
  204. */
  205. typedef struct BandCodingPath {
  206. int prev_idx; ///< pointer to the previous path point
  207. float cost; ///< path cost
  208. int run;
  209. } BandCodingPath;
  210. /**
  211. * Encode band info for single window group bands.
  212. */
  213. static void encode_window_bands_info(AACEncContext *s, SingleChannelElement *sce,
  214. int win, int group_len, const float lambda)
  215. {
  216. BandCodingPath path[120][12];
  217. int w, swb, cb, start, start2, size;
  218. int i, j;
  219. const int max_sfb = sce->ics.max_sfb;
  220. const int run_bits = sce->ics.num_windows == 1 ? 5 : 3;
  221. const int run_esc = (1 << run_bits) - 1;
  222. int idx, ppos, count;
  223. int stackrun[120], stackcb[120], stack_len;
  224. float next_minrd = INFINITY;
  225. int next_mincb = 0;
  226. abs_pow34_v(s->scoefs, sce->coeffs, 1024);
  227. start = win*128;
  228. for (cb = 0; cb < 12; cb++) {
  229. path[0][cb].cost = 0.0f;
  230. path[0][cb].prev_idx = -1;
  231. path[0][cb].run = 0;
  232. }
  233. for (swb = 0; swb < max_sfb; swb++) {
  234. start2 = start;
  235. size = sce->ics.swb_sizes[swb];
  236. if (sce->zeroes[win*16 + swb]) {
  237. for (cb = 0; cb < 12; cb++) {
  238. path[swb+1][cb].prev_idx = cb;
  239. path[swb+1][cb].cost = path[swb][cb].cost;
  240. path[swb+1][cb].run = path[swb][cb].run + 1;
  241. }
  242. } else {
  243. float minrd = next_minrd;
  244. int mincb = next_mincb;
  245. next_minrd = INFINITY;
  246. next_mincb = 0;
  247. for (cb = 0; cb < 12; cb++) {
  248. float cost_stay_here, cost_get_here;
  249. float rd = 0.0f;
  250. for (w = 0; w < group_len; w++) {
  251. FFPsyBand *band = &s->psy.psy_bands[s->cur_channel*PSY_MAX_BANDS+(win+w)*16+swb];
  252. rd += quantize_band_cost(s, sce->coeffs + start + w*128,
  253. s->scoefs + start + w*128, size,
  254. sce->sf_idx[(win+w)*16+swb], cb,
  255. lambda / band->threshold, INFINITY, NULL);
  256. }
  257. cost_stay_here = path[swb][cb].cost + rd;
  258. cost_get_here = minrd + rd + run_bits + 4;
  259. if ( run_value_bits[sce->ics.num_windows == 8][path[swb][cb].run]
  260. != run_value_bits[sce->ics.num_windows == 8][path[swb][cb].run+1])
  261. cost_stay_here += run_bits;
  262. if (cost_get_here < cost_stay_here) {
  263. path[swb+1][cb].prev_idx = mincb;
  264. path[swb+1][cb].cost = cost_get_here;
  265. path[swb+1][cb].run = 1;
  266. } else {
  267. path[swb+1][cb].prev_idx = cb;
  268. path[swb+1][cb].cost = cost_stay_here;
  269. path[swb+1][cb].run = path[swb][cb].run + 1;
  270. }
  271. if (path[swb+1][cb].cost < next_minrd) {
  272. next_minrd = path[swb+1][cb].cost;
  273. next_mincb = cb;
  274. }
  275. }
  276. }
  277. start += sce->ics.swb_sizes[swb];
  278. }
  279. //convert resulting path from backward-linked list
  280. stack_len = 0;
  281. idx = 0;
  282. for (cb = 1; cb < 12; cb++)
  283. if (path[max_sfb][cb].cost < path[max_sfb][idx].cost)
  284. idx = cb;
  285. ppos = max_sfb;
  286. while (ppos > 0) {
  287. cb = idx;
  288. stackrun[stack_len] = path[ppos][cb].run;
  289. stackcb [stack_len] = cb;
  290. idx = path[ppos-path[ppos][cb].run+1][cb].prev_idx;
  291. ppos -= path[ppos][cb].run;
  292. stack_len++;
  293. }
  294. //perform actual band info encoding
  295. start = 0;
  296. for (i = stack_len - 1; i >= 0; i--) {
  297. put_bits(&s->pb, 4, stackcb[i]);
  298. count = stackrun[i];
  299. memset(sce->zeroes + win*16 + start, !stackcb[i], count);
  300. //XXX: memset when band_type is also uint8_t
  301. for (j = 0; j < count; j++) {
  302. sce->band_type[win*16 + start] = stackcb[i];
  303. start++;
  304. }
  305. while (count >= run_esc) {
  306. put_bits(&s->pb, run_bits, run_esc);
  307. count -= run_esc;
  308. }
  309. put_bits(&s->pb, run_bits, count);
  310. }
  311. }
  312. static void codebook_trellis_rate(AACEncContext *s, SingleChannelElement *sce,
  313. int win, int group_len, const float lambda)
  314. {
  315. BandCodingPath path[120][12];
  316. int w, swb, cb, start, start2, size;
  317. int i, j;
  318. const int max_sfb = sce->ics.max_sfb;
  319. const int run_bits = sce->ics.num_windows == 1 ? 5 : 3;
  320. const int run_esc = (1 << run_bits) - 1;
  321. int idx, ppos, count;
  322. int stackrun[120], stackcb[120], stack_len;
  323. float next_minrd = INFINITY;
  324. int next_mincb = 0;
  325. abs_pow34_v(s->scoefs, sce->coeffs, 1024);
  326. start = win*128;
  327. for (cb = 0; cb < 12; cb++) {
  328. path[0][cb].cost = run_bits+4;
  329. path[0][cb].prev_idx = -1;
  330. path[0][cb].run = 0;
  331. }
  332. for (swb = 0; swb < max_sfb; swb++) {
  333. start2 = start;
  334. size = sce->ics.swb_sizes[swb];
  335. if (sce->zeroes[win*16 + swb]) {
  336. for (cb = 0; cb < 12; cb++) {
  337. path[swb+1][cb].prev_idx = cb;
  338. path[swb+1][cb].cost = path[swb][cb].cost;
  339. path[swb+1][cb].run = path[swb][cb].run + 1;
  340. }
  341. } else {
  342. float minrd = next_minrd;
  343. int mincb = next_mincb;
  344. int startcb = sce->band_type[win*16+swb];
  345. next_minrd = INFINITY;
  346. next_mincb = 0;
  347. for (cb = 0; cb < startcb; cb++) {
  348. path[swb+1][cb].cost = 61450;
  349. path[swb+1][cb].prev_idx = -1;
  350. path[swb+1][cb].run = 0;
  351. }
  352. for (cb = startcb; cb < 12; cb++) {
  353. float cost_stay_here, cost_get_here;
  354. float rd = 0.0f;
  355. for (w = 0; w < group_len; w++) {
  356. rd += quantize_band_cost(s, sce->coeffs + start + w*128,
  357. s->scoefs + start + w*128, size,
  358. sce->sf_idx[(win+w)*16+swb], cb,
  359. 0, INFINITY, NULL);
  360. }
  361. cost_stay_here = path[swb][cb].cost + rd;
  362. cost_get_here = minrd + rd + run_bits + 4;
  363. if ( run_value_bits[sce->ics.num_windows == 8][path[swb][cb].run]
  364. != run_value_bits[sce->ics.num_windows == 8][path[swb][cb].run+1])
  365. cost_stay_here += run_bits;
  366. if (cost_get_here < cost_stay_here) {
  367. path[swb+1][cb].prev_idx = mincb;
  368. path[swb+1][cb].cost = cost_get_here;
  369. path[swb+1][cb].run = 1;
  370. } else {
  371. path[swb+1][cb].prev_idx = cb;
  372. path[swb+1][cb].cost = cost_stay_here;
  373. path[swb+1][cb].run = path[swb][cb].run + 1;
  374. }
  375. if (path[swb+1][cb].cost < next_minrd) {
  376. next_minrd = path[swb+1][cb].cost;
  377. next_mincb = cb;
  378. }
  379. }
  380. }
  381. start += sce->ics.swb_sizes[swb];
  382. }
  383. //convert resulting path from backward-linked list
  384. stack_len = 0;
  385. idx = 0;
  386. for (cb = 1; cb < 12; cb++)
  387. if (path[max_sfb][cb].cost < path[max_sfb][idx].cost)
  388. idx = cb;
  389. ppos = max_sfb;
  390. while (ppos > 0) {
  391. if (idx < 0) abort();
  392. cb = idx;
  393. stackrun[stack_len] = path[ppos][cb].run;
  394. stackcb [stack_len] = cb;
  395. idx = path[ppos-path[ppos][cb].run+1][cb].prev_idx;
  396. ppos -= path[ppos][cb].run;
  397. stack_len++;
  398. }
  399. //perform actual band info encoding
  400. start = 0;
  401. for (i = stack_len - 1; i >= 0; i--) {
  402. put_bits(&s->pb, 4, stackcb[i]);
  403. count = stackrun[i];
  404. memset(sce->zeroes + win*16 + start, !stackcb[i], count);
  405. //XXX: memset when band_type is also uint8_t
  406. for (j = 0; j < count; j++) {
  407. sce->band_type[win*16 + start] = stackcb[i];
  408. start++;
  409. }
  410. while (count >= run_esc) {
  411. put_bits(&s->pb, run_bits, run_esc);
  412. count -= run_esc;
  413. }
  414. put_bits(&s->pb, run_bits, count);
  415. }
  416. }
  417. typedef struct TrellisPath {
  418. float cost;
  419. int prev;
  420. int min_val;
  421. int max_val;
  422. } TrellisPath;
  423. #define TRELLIS_STAGES 121
  424. #define TRELLIS_STATES 256
  425. static void search_for_quantizers_anmr(AVCodecContext *avctx, AACEncContext *s,
  426. SingleChannelElement *sce,
  427. const float lambda)
  428. {
  429. int q, w, w2, g, start = 0;
  430. int i, j;
  431. int idx;
  432. TrellisPath paths[TRELLIS_STAGES][TRELLIS_STATES];
  433. int bandaddr[TRELLIS_STAGES];
  434. int minq;
  435. float mincost;
  436. for (i = 0; i < TRELLIS_STATES; i++) {
  437. paths[0][i].cost = 0.0f;
  438. paths[0][i].prev = -1;
  439. paths[0][i].min_val = i;
  440. paths[0][i].max_val = i;
  441. }
  442. for (j = 1; j < TRELLIS_STAGES; j++) {
  443. for (i = 0; i < TRELLIS_STATES; i++) {
  444. paths[j][i].cost = INFINITY;
  445. paths[j][i].prev = -2;
  446. paths[j][i].min_val = INT_MAX;
  447. paths[j][i].max_val = 0;
  448. }
  449. }
  450. idx = 1;
  451. abs_pow34_v(s->scoefs, sce->coeffs, 1024);
  452. for (w = 0; w < sce->ics.num_windows; w += sce->ics.group_len[w]) {
  453. start = w*128;
  454. for (g = 0; g < sce->ics.num_swb; g++) {
  455. const float *coefs = sce->coeffs + start;
  456. float qmin, qmax;
  457. int nz = 0;
  458. bandaddr[idx] = w * 16 + g;
  459. qmin = INT_MAX;
  460. qmax = 0.0f;
  461. for (w2 = 0; w2 < sce->ics.group_len[w]; w2++) {
  462. FFPsyBand *band = &s->psy.psy_bands[s->cur_channel*PSY_MAX_BANDS+(w+w2)*16+g];
  463. if (band->energy <= band->threshold || band->threshold == 0.0f) {
  464. sce->zeroes[(w+w2)*16+g] = 1;
  465. continue;
  466. }
  467. sce->zeroes[(w+w2)*16+g] = 0;
  468. nz = 1;
  469. for (i = 0; i < sce->ics.swb_sizes[g]; i++) {
  470. float t = fabsf(coefs[w2*128+i]);
  471. if (t > 0.0f)
  472. qmin = FFMIN(qmin, t);
  473. qmax = FFMAX(qmax, t);
  474. }
  475. }
  476. if (nz) {
  477. int minscale, maxscale;
  478. float minrd = INFINITY;
  479. //minimum scalefactor index is when minimum nonzero coefficient after quantizing is not clipped
  480. minscale = av_clip_uint8(log2(qmin)*4 - 69 + SCALE_ONE_POS - SCALE_DIV_512);
  481. //maximum scalefactor index is when maximum coefficient after quantizing is still not zero
  482. maxscale = av_clip_uint8(log2(qmax)*4 + 6 + SCALE_ONE_POS - SCALE_DIV_512);
  483. for (q = minscale; q < maxscale; q++) {
  484. float dists[12], dist;
  485. memset(dists, 0, sizeof(dists));
  486. for (w2 = 0; w2 < sce->ics.group_len[w]; w2++) {
  487. FFPsyBand *band = &s->psy.psy_bands[s->cur_channel*PSY_MAX_BANDS+(w+w2)*16+g];
  488. int cb;
  489. for (cb = 0; cb <= ESC_BT; cb++)
  490. dists[cb] += quantize_band_cost(s, coefs + w2*128, s->scoefs + start + w2*128, sce->ics.swb_sizes[g],
  491. q, cb, lambda / band->threshold, INFINITY, NULL);
  492. }
  493. dist = dists[0];
  494. for (i = 1; i <= ESC_BT; i++)
  495. dist = FFMIN(dist, dists[i]);
  496. minrd = FFMIN(minrd, dist);
  497. for (i = FFMAX(q - SCALE_MAX_DIFF, 0); i < FFMIN(q + SCALE_MAX_DIFF, TRELLIS_STATES); i++) {
  498. float cost;
  499. int minv, maxv;
  500. if (isinf(paths[idx - 1][i].cost))
  501. continue;
  502. cost = paths[idx - 1][i].cost + dist
  503. + ff_aac_scalefactor_bits[q - i + SCALE_DIFF_ZERO];
  504. minv = FFMIN(paths[idx - 1][i].min_val, q);
  505. maxv = FFMAX(paths[idx - 1][i].max_val, q);
  506. if (cost < paths[idx][q].cost && maxv-minv < SCALE_MAX_DIFF) {
  507. paths[idx][q].cost = cost;
  508. paths[idx][q].prev = i;
  509. paths[idx][q].min_val = minv;
  510. paths[idx][q].max_val = maxv;
  511. }
  512. }
  513. }
  514. } else {
  515. for (q = 0; q < TRELLIS_STATES; q++) {
  516. if (!isinf(paths[idx - 1][q].cost)) {
  517. paths[idx][q].cost = paths[idx - 1][q].cost + 1;
  518. paths[idx][q].prev = q;
  519. paths[idx][q].min_val = FFMIN(paths[idx - 1][q].min_val, q);
  520. paths[idx][q].max_val = FFMAX(paths[idx - 1][q].max_val, q);
  521. continue;
  522. }
  523. for (i = FFMAX(q - SCALE_MAX_DIFF, 0); i < FFMIN(q + SCALE_MAX_DIFF, TRELLIS_STATES); i++) {
  524. float cost;
  525. int minv, maxv;
  526. if (isinf(paths[idx - 1][i].cost))
  527. continue;
  528. cost = paths[idx - 1][i].cost + ff_aac_scalefactor_bits[q - i + SCALE_DIFF_ZERO];
  529. minv = FFMIN(paths[idx - 1][i].min_val, q);
  530. maxv = FFMAX(paths[idx - 1][i].max_val, q);
  531. if (cost < paths[idx][q].cost && maxv-minv < SCALE_MAX_DIFF) {
  532. paths[idx][q].cost = cost;
  533. paths[idx][q].prev = i;
  534. paths[idx][q].min_val = minv;
  535. paths[idx][q].max_val = maxv;
  536. }
  537. }
  538. }
  539. }
  540. sce->zeroes[w*16+g] = !nz;
  541. start += sce->ics.swb_sizes[g];
  542. idx++;
  543. }
  544. }
  545. idx--;
  546. mincost = paths[idx][0].cost;
  547. minq = 0;
  548. for (i = 1; i < TRELLIS_STATES; i++) {
  549. if (paths[idx][i].cost < mincost) {
  550. mincost = paths[idx][i].cost;
  551. minq = i;
  552. }
  553. }
  554. while (idx) {
  555. sce->sf_idx[bandaddr[idx]] = minq;
  556. minq = paths[idx][minq].prev;
  557. idx--;
  558. }
  559. //set the same quantizers inside window groups
  560. for (w = 0; w < sce->ics.num_windows; w += sce->ics.group_len[w])
  561. for (g = 0; g < sce->ics.num_swb; g++)
  562. for (w2 = 1; w2 < sce->ics.group_len[w]; w2++)
  563. sce->sf_idx[(w+w2)*16+g] = sce->sf_idx[w*16+g];
  564. }
  565. /**
  566. * two-loop quantizers search taken from ISO 13818-7 Appendix C
  567. */
  568. static void search_for_quantizers_twoloop(AVCodecContext *avctx,
  569. AACEncContext *s,
  570. SingleChannelElement *sce,
  571. const float lambda)
  572. {
  573. int start = 0, i, w, w2, g;
  574. int destbits = avctx->bit_rate * 1024.0 / avctx->sample_rate / avctx->channels;
  575. float dists[128], uplims[128];
  576. int fflag, minscaler;
  577. int its = 0;
  578. int allz = 0;
  579. float minthr = INFINITY;
  580. //XXX: some heuristic to determine initial quantizers will reduce search time
  581. memset(dists, 0, sizeof(dists));
  582. //determine zero bands and upper limits
  583. for (w = 0; w < sce->ics.num_windows; w += sce->ics.group_len[w]) {
  584. for (g = 0; g < sce->ics.num_swb; g++) {
  585. int nz = 0;
  586. float uplim = 0.0f;
  587. for (w2 = 0; w2 < sce->ics.group_len[w]; w2++) {
  588. FFPsyBand *band = &s->psy.psy_bands[s->cur_channel*PSY_MAX_BANDS+(w+w2)*16+g];
  589. uplim += band->threshold;
  590. if (band->energy <= band->threshold || band->threshold == 0.0f) {
  591. sce->zeroes[(w+w2)*16+g] = 1;
  592. continue;
  593. }
  594. nz = 1;
  595. }
  596. uplims[w*16+g] = uplim *512;
  597. sce->zeroes[w*16+g] = !nz;
  598. if (nz)
  599. minthr = FFMIN(minthr, uplim);
  600. allz = FFMAX(allz, nz);
  601. }
  602. }
  603. for (w = 0; w < sce->ics.num_windows; w += sce->ics.group_len[w]) {
  604. for (g = 0; g < sce->ics.num_swb; g++) {
  605. if (sce->zeroes[w*16+g]) {
  606. sce->sf_idx[w*16+g] = SCALE_ONE_POS;
  607. continue;
  608. }
  609. sce->sf_idx[w*16+g] = SCALE_ONE_POS + FFMIN(log2(uplims[w*16+g]/minthr)*4,59);
  610. }
  611. }
  612. if (!allz)
  613. return;
  614. abs_pow34_v(s->scoefs, sce->coeffs, 1024);
  615. //perform two-loop search
  616. //outer loop - improve quality
  617. do {
  618. int tbits, qstep;
  619. minscaler = sce->sf_idx[0];
  620. //inner loop - quantize spectrum to fit into given number of bits
  621. qstep = its ? 1 : 32;
  622. do {
  623. int prev = -1;
  624. tbits = 0;
  625. fflag = 0;
  626. for (w = 0; w < sce->ics.num_windows; w += sce->ics.group_len[w]) {
  627. start = w*128;
  628. for (g = 0; g < sce->ics.num_swb; g++) {
  629. const float *coefs = sce->coeffs + start;
  630. const float *scaled = s->scoefs + start;
  631. int bits = 0;
  632. int cb;
  633. float mindist = INFINITY;
  634. int minbits = 0;
  635. if (sce->zeroes[w*16+g] || sce->sf_idx[w*16+g] >= 218) {
  636. start += sce->ics.swb_sizes[g];
  637. continue;
  638. }
  639. minscaler = FFMIN(minscaler, sce->sf_idx[w*16+g]);
  640. {
  641. float dist = 0.0f;
  642. int bb = 0;
  643. float maxval = 0.0f;
  644. float Q = ff_aac_pow2sf_tab[200 - sce->sf_idx[w*16+g] + SCALE_ONE_POS - SCALE_DIV_512];
  645. float Q34 = sqrtf(Q * sqrtf(Q));
  646. int qmaxval;
  647. for (w2 = 0; w2 < sce->ics.group_len[w]; w2++) {
  648. for (i = 0; i < sce->ics.swb_sizes[g]; i++) {
  649. maxval = FFMAX(maxval, scaled[w2*128+i]);
  650. }
  651. }
  652. qmaxval = maxval * Q34 + 0.4054;
  653. if (qmaxval == 0) cb = 0;
  654. else if (qmaxval == 1) cb = 1;
  655. else if (qmaxval == 2) cb = 3;
  656. else if (qmaxval <= 4) cb = 5;
  657. else if (qmaxval <= 7) cb = 7;
  658. else if (qmaxval <= 12) cb = 9;
  659. else cb = 11;
  660. sce->band_type[w*16+g] = cb;
  661. for (w2 = 0; w2 < sce->ics.group_len[w]; w2++) {
  662. int b;
  663. dist += quantize_band_cost(s, coefs + w2*128,
  664. scaled + w2*128,
  665. sce->ics.swb_sizes[g],
  666. sce->sf_idx[w*16+g],
  667. cb,
  668. lambda,
  669. INFINITY,
  670. &b);
  671. bb += b;
  672. }
  673. mindist = dist;
  674. minbits = bb;
  675. }
  676. dists[w*16+g] = (mindist - minbits) / lambda;
  677. bits = minbits;
  678. if (prev != -1) {
  679. bits += ff_aac_scalefactor_bits[sce->sf_idx[w*16+g] - prev + SCALE_DIFF_ZERO];
  680. }
  681. tbits += bits;
  682. start += sce->ics.swb_sizes[g];
  683. prev = sce->sf_idx[w*16+g];
  684. }
  685. }
  686. if (tbits > destbits) {
  687. for (i = 0; i < 128; i++)
  688. if (sce->sf_idx[i] < 218 - qstep)
  689. sce->sf_idx[i] += qstep;
  690. } else {
  691. for (i = 0; i < 128; i++)
  692. if (sce->sf_idx[i] > 60 - qstep)
  693. sce->sf_idx[i] -= qstep;
  694. }
  695. qstep >>= 1;
  696. if (!qstep && tbits > destbits*1.02)
  697. qstep = 1;
  698. if (sce->sf_idx[0] >= 217)
  699. break;
  700. } while (qstep);
  701. fflag = 0;
  702. minscaler = av_clip(minscaler, 60, 255 - SCALE_MAX_DIFF);
  703. for (w = 0; w < sce->ics.num_windows; w += sce->ics.group_len[w]) {
  704. start = w*128;
  705. for (g = 0; g < sce->ics.num_swb; g++) {
  706. int prevsc = sce->sf_idx[w*16+g];
  707. if (dists[w*16+g] > uplims[w*16+g] && sce->sf_idx[w*16+g] > 60)
  708. sce->sf_idx[w*16+g]--;
  709. sce->sf_idx[w*16+g] = av_clip(sce->sf_idx[w*16+g], minscaler, minscaler + SCALE_MAX_DIFF);
  710. sce->sf_idx[w*16+g] = FFMIN(sce->sf_idx[w*16+g], 219);
  711. if (sce->sf_idx[w*16+g] != prevsc)
  712. fflag = 1;
  713. }
  714. }
  715. its++;
  716. } while (fflag && its < 10);
  717. }
  718. static void search_for_quantizers_faac(AVCodecContext *avctx, AACEncContext *s,
  719. SingleChannelElement *sce,
  720. const float lambda)
  721. {
  722. int start = 0, i, w, w2, g;
  723. float uplim[128], maxq[128];
  724. int minq, maxsf;
  725. float distfact = ((sce->ics.num_windows > 1) ? 85.80 : 147.84) / lambda;
  726. int last = 0, lastband = 0, curband = 0;
  727. float avg_energy = 0.0;
  728. if (sce->ics.num_windows == 1) {
  729. start = 0;
  730. for (i = 0; i < 1024; i++) {
  731. if (i - start >= sce->ics.swb_sizes[curband]) {
  732. start += sce->ics.swb_sizes[curband];
  733. curband++;
  734. }
  735. if (sce->coeffs[i]) {
  736. avg_energy += sce->coeffs[i] * sce->coeffs[i];
  737. last = i;
  738. lastband = curband;
  739. }
  740. }
  741. } else {
  742. for (w = 0; w < 8; w++) {
  743. const float *coeffs = sce->coeffs + w*128;
  744. start = 0;
  745. for (i = 0; i < 128; i++) {
  746. if (i - start >= sce->ics.swb_sizes[curband]) {
  747. start += sce->ics.swb_sizes[curband];
  748. curband++;
  749. }
  750. if (coeffs[i]) {
  751. avg_energy += coeffs[i] * coeffs[i];
  752. last = FFMAX(last, i);
  753. lastband = FFMAX(lastband, curband);
  754. }
  755. }
  756. }
  757. }
  758. last++;
  759. avg_energy /= last;
  760. if (avg_energy == 0.0f) {
  761. for (i = 0; i < FF_ARRAY_ELEMS(sce->sf_idx); i++)
  762. sce->sf_idx[i] = SCALE_ONE_POS;
  763. return;
  764. }
  765. for (w = 0; w < sce->ics.num_windows; w += sce->ics.group_len[w]) {
  766. start = w*128;
  767. for (g = 0; g < sce->ics.num_swb; g++) {
  768. float *coefs = sce->coeffs + start;
  769. const int size = sce->ics.swb_sizes[g];
  770. int start2 = start, end2 = start + size, peakpos = start;
  771. float maxval = -1, thr = 0.0f, t;
  772. maxq[w*16+g] = 0.0f;
  773. if (g > lastband) {
  774. maxq[w*16+g] = 0.0f;
  775. start += size;
  776. for (w2 = 0; w2 < sce->ics.group_len[w]; w2++)
  777. memset(coefs + w2*128, 0, sizeof(coefs[0])*size);
  778. continue;
  779. }
  780. for (w2 = 0; w2 < sce->ics.group_len[w]; w2++) {
  781. for (i = 0; i < size; i++) {
  782. float t = coefs[w2*128+i]*coefs[w2*128+i];
  783. maxq[w*16+g] = FFMAX(maxq[w*16+g], fabsf(coefs[w2*128 + i]));
  784. thr += t;
  785. if (sce->ics.num_windows == 1 && maxval < t) {
  786. maxval = t;
  787. peakpos = start+i;
  788. }
  789. }
  790. }
  791. if (sce->ics.num_windows == 1) {
  792. start2 = FFMAX(peakpos - 2, start2);
  793. end2 = FFMIN(peakpos + 3, end2);
  794. } else {
  795. start2 -= start;
  796. end2 -= start;
  797. }
  798. start += size;
  799. thr = pow(thr / (avg_energy * (end2 - start2)), 0.3 + 0.1*(lastband - g) / lastband);
  800. t = 1.0 - (1.0 * start2 / last);
  801. uplim[w*16+g] = distfact / (1.4 * thr + t*t*t + 0.075);
  802. }
  803. }
  804. memset(sce->sf_idx, 0, sizeof(sce->sf_idx));
  805. abs_pow34_v(s->scoefs, sce->coeffs, 1024);
  806. for (w = 0; w < sce->ics.num_windows; w += sce->ics.group_len[w]) {
  807. start = w*128;
  808. for (g = 0; g < sce->ics.num_swb; g++) {
  809. const float *coefs = sce->coeffs + start;
  810. const float *scaled = s->scoefs + start;
  811. const int size = sce->ics.swb_sizes[g];
  812. int scf, prev_scf, step;
  813. int min_scf = -1, max_scf = 256;
  814. float curdiff;
  815. if (maxq[w*16+g] < 21.544) {
  816. sce->zeroes[w*16+g] = 1;
  817. start += size;
  818. continue;
  819. }
  820. sce->zeroes[w*16+g] = 0;
  821. scf = prev_scf = av_clip(SCALE_ONE_POS - SCALE_DIV_512 - log2(1/maxq[w*16+g])*16/3, 60, 218);
  822. step = 16;
  823. for (;;) {
  824. float dist = 0.0f;
  825. int quant_max;
  826. for (w2 = 0; w2 < sce->ics.group_len[w]; w2++) {
  827. int b;
  828. dist += quantize_band_cost(s, coefs + w2*128,
  829. scaled + w2*128,
  830. sce->ics.swb_sizes[g],
  831. scf,
  832. ESC_BT,
  833. lambda,
  834. INFINITY,
  835. &b);
  836. dist -= b;
  837. }
  838. dist *= 1.0f / 512.0f / lambda;
  839. quant_max = quant(maxq[w*16+g], ff_aac_pow2sf_tab[200 - scf + SCALE_ONE_POS - SCALE_DIV_512]);
  840. if (quant_max >= 8191) { // too much, return to the previous quantizer
  841. sce->sf_idx[w*16+g] = prev_scf;
  842. break;
  843. }
  844. prev_scf = scf;
  845. curdiff = fabsf(dist - uplim[w*16+g]);
  846. if (curdiff <= 1.0f)
  847. step = 0;
  848. else
  849. step = log2(curdiff);
  850. if (dist > uplim[w*16+g])
  851. step = -step;
  852. scf += step;
  853. scf = av_clip_uint8(scf);
  854. step = scf - prev_scf;
  855. if (FFABS(step) <= 1 || (step > 0 && scf >= max_scf) || (step < 0 && scf <= min_scf)) {
  856. sce->sf_idx[w*16+g] = av_clip(scf, min_scf, max_scf);
  857. break;
  858. }
  859. if (step > 0)
  860. min_scf = prev_scf;
  861. else
  862. max_scf = prev_scf;
  863. }
  864. start += size;
  865. }
  866. }
  867. minq = sce->sf_idx[0] ? sce->sf_idx[0] : INT_MAX;
  868. for (i = 1; i < 128; i++) {
  869. if (!sce->sf_idx[i])
  870. sce->sf_idx[i] = sce->sf_idx[i-1];
  871. else
  872. minq = FFMIN(minq, sce->sf_idx[i]);
  873. }
  874. if (minq == INT_MAX)
  875. minq = 0;
  876. minq = FFMIN(minq, SCALE_MAX_POS);
  877. maxsf = FFMIN(minq + SCALE_MAX_DIFF, SCALE_MAX_POS);
  878. for (i = 126; i >= 0; i--) {
  879. if (!sce->sf_idx[i])
  880. sce->sf_idx[i] = sce->sf_idx[i+1];
  881. sce->sf_idx[i] = av_clip(sce->sf_idx[i], minq, maxsf);
  882. }
  883. }
  884. static void search_for_quantizers_fast(AVCodecContext *avctx, AACEncContext *s,
  885. SingleChannelElement *sce,
  886. const float lambda)
  887. {
  888. int start = 0, i, w, w2, g;
  889. int minq = 255;
  890. memset(sce->sf_idx, 0, sizeof(sce->sf_idx));
  891. for (w = 0; w < sce->ics.num_windows; w += sce->ics.group_len[w]) {
  892. start = w*128;
  893. for (g = 0; g < sce->ics.num_swb; g++) {
  894. for (w2 = 0; w2 < sce->ics.group_len[w]; w2++) {
  895. FFPsyBand *band = &s->psy.psy_bands[s->cur_channel*PSY_MAX_BANDS+(w+w2)*16+g];
  896. if (band->energy <= band->threshold) {
  897. sce->sf_idx[(w+w2)*16+g] = 218;
  898. sce->zeroes[(w+w2)*16+g] = 1;
  899. } else {
  900. sce->sf_idx[(w+w2)*16+g] = av_clip(SCALE_ONE_POS - SCALE_DIV_512 + log2(band->threshold), 80, 218);
  901. sce->zeroes[(w+w2)*16+g] = 0;
  902. }
  903. minq = FFMIN(minq, sce->sf_idx[(w+w2)*16+g]);
  904. }
  905. }
  906. }
  907. for (i = 0; i < 128; i++) {
  908. sce->sf_idx[i] = 140;
  909. //av_clip(sce->sf_idx[i], minq, minq + SCALE_MAX_DIFF - 1);
  910. }
  911. //set the same quantizers inside window groups
  912. for (w = 0; w < sce->ics.num_windows; w += sce->ics.group_len[w])
  913. for (g = 0; g < sce->ics.num_swb; g++)
  914. for (w2 = 1; w2 < sce->ics.group_len[w]; w2++)
  915. sce->sf_idx[(w+w2)*16+g] = sce->sf_idx[w*16+g];
  916. }
  917. static void search_for_ms(AACEncContext *s, ChannelElement *cpe,
  918. const float lambda)
  919. {
  920. int start = 0, i, w, w2, g;
  921. float M[128], S[128];
  922. float *L34 = s->scoefs, *R34 = s->scoefs + 128, *M34 = s->scoefs + 128*2, *S34 = s->scoefs + 128*3;
  923. SingleChannelElement *sce0 = &cpe->ch[0];
  924. SingleChannelElement *sce1 = &cpe->ch[1];
  925. if (!cpe->common_window)
  926. return;
  927. for (w = 0; w < sce0->ics.num_windows; w += sce0->ics.group_len[w]) {
  928. for (g = 0; g < sce0->ics.num_swb; g++) {
  929. if (!cpe->ch[0].zeroes[w*16+g] && !cpe->ch[1].zeroes[w*16+g]) {
  930. float dist1 = 0.0f, dist2 = 0.0f;
  931. for (w2 = 0; w2 < sce0->ics.group_len[w]; w2++) {
  932. FFPsyBand *band0 = &s->psy.psy_bands[(s->cur_channel+0)*PSY_MAX_BANDS+(w+w2)*16+g];
  933. FFPsyBand *band1 = &s->psy.psy_bands[(s->cur_channel+1)*PSY_MAX_BANDS+(w+w2)*16+g];
  934. float minthr = FFMIN(band0->threshold, band1->threshold);
  935. float maxthr = FFMAX(band0->threshold, band1->threshold);
  936. for (i = 0; i < sce0->ics.swb_sizes[g]; i++) {
  937. M[i] = (sce0->coeffs[start+w2*128+i]
  938. + sce1->coeffs[start+w2*128+i]) * 0.5;
  939. S[i] = sce0->coeffs[start+w2*128+i]
  940. - sce1->coeffs[start+w2*128+i];
  941. }
  942. abs_pow34_v(L34, sce0->coeffs+start+w2*128, sce0->ics.swb_sizes[g]);
  943. abs_pow34_v(R34, sce1->coeffs+start+w2*128, sce0->ics.swb_sizes[g]);
  944. abs_pow34_v(M34, M, sce0->ics.swb_sizes[g]);
  945. abs_pow34_v(S34, S, sce0->ics.swb_sizes[g]);
  946. dist1 += quantize_band_cost(s, sce0->coeffs + start + w2*128,
  947. L34,
  948. sce0->ics.swb_sizes[g],
  949. sce0->sf_idx[(w+w2)*16+g],
  950. sce0->band_type[(w+w2)*16+g],
  951. lambda / band0->threshold, INFINITY, NULL);
  952. dist1 += quantize_band_cost(s, sce1->coeffs + start + w2*128,
  953. R34,
  954. sce1->ics.swb_sizes[g],
  955. sce1->sf_idx[(w+w2)*16+g],
  956. sce1->band_type[(w+w2)*16+g],
  957. lambda / band1->threshold, INFINITY, NULL);
  958. dist2 += quantize_band_cost(s, M,
  959. M34,
  960. sce0->ics.swb_sizes[g],
  961. sce0->sf_idx[(w+w2)*16+g],
  962. sce0->band_type[(w+w2)*16+g],
  963. lambda / maxthr, INFINITY, NULL);
  964. dist2 += quantize_band_cost(s, S,
  965. S34,
  966. sce1->ics.swb_sizes[g],
  967. sce1->sf_idx[(w+w2)*16+g],
  968. sce1->band_type[(w+w2)*16+g],
  969. lambda / minthr, INFINITY, NULL);
  970. }
  971. cpe->ms_mask[w*16+g] = dist2 < dist1;
  972. }
  973. start += sce0->ics.swb_sizes[g];
  974. }
  975. }
  976. }
  977. AACCoefficientsEncoder ff_aac_coders[] = {
  978. {
  979. search_for_quantizers_faac,
  980. encode_window_bands_info,
  981. quantize_and_encode_band,
  982. search_for_ms,
  983. },
  984. {
  985. search_for_quantizers_anmr,
  986. encode_window_bands_info,
  987. quantize_and_encode_band,
  988. search_for_ms,
  989. },
  990. {
  991. search_for_quantizers_twoloop,
  992. codebook_trellis_rate,
  993. quantize_and_encode_band,
  994. search_for_ms,
  995. },
  996. {
  997. search_for_quantizers_fast,
  998. encode_window_bands_info,
  999. quantize_and_encode_band,
  1000. search_for_ms,
  1001. },
  1002. };