You can not select more than 25 topics Topics must start with a letter or number, can include dashes ('-') and can be up to 35 characters long.

695 lines
31KB

  1. /*
  2. * AAC encoder twoloop coder
  3. * Copyright (C) 2008-2009 Konstantin Shishkov
  4. *
  5. * This file is part of FFmpeg.
  6. *
  7. * FFmpeg is free software; you can redistribute it and/or
  8. * modify it under the terms of the GNU Lesser General Public
  9. * License as published by the Free Software Foundation; either
  10. * version 2.1 of the License, or (at your option) any later version.
  11. *
  12. * FFmpeg is distributed in the hope that it will be useful,
  13. * but WITHOUT ANY WARRANTY; without even the implied warranty of
  14. * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
  15. * Lesser General Public License for more details.
  16. *
  17. * You should have received a copy of the GNU Lesser General Public
  18. * License along with FFmpeg; if not, write to the Free Software
  19. * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
  20. */
  21. /**
  22. * @file
  23. * AAC encoder twoloop coder
  24. * @author Konstantin Shishkov, Claudio Freire
  25. */
  26. /**
  27. * This file contains a template for the twoloop coder function.
  28. * It needs to be provided, externally, as an already included declaration,
  29. * the following functions from aacenc_quantization/util.h. They're not included
  30. * explicitly here to make it possible to provide alternative implementations:
  31. * - quantize_band_cost
  32. * - abs_pow34_v
  33. * - find_max_val
  34. * - find_min_book
  35. * - find_form_factor
  36. */
  37. #ifndef AVCODEC_AACCODER_TWOLOOP_H
  38. #define AVCODEC_AACCODER_TWOLOOP_H
  39. #include <float.h>
  40. #include "libavutil/mathematics.h"
  41. #include "mathops.h"
  42. #include "avcodec.h"
  43. #include "put_bits.h"
  44. #include "aac.h"
  45. #include "aacenc.h"
  46. #include "aactab.h"
  47. #include "aacenctab.h"
  48. #include "aac_tablegen_decl.h"
  49. /** Frequency in Hz for lower limit of noise substitution **/
  50. #define NOISE_LOW_LIMIT 4000
  51. #define sclip(x) av_clip(x,60,218)
  52. /* Reflects the cost to change codebooks */
  53. static inline int ff_pns_bits(SingleChannelElement *sce, int w, int g)
  54. {
  55. return (!g || !sce->zeroes[w*16+g-1] || !sce->can_pns[w*16+g-1]) ? 9 : 5;
  56. }
  57. /**
  58. * two-loop quantizers search taken from ISO 13818-7 Appendix C
  59. */
  60. static void search_for_quantizers_twoloop(AVCodecContext *avctx,
  61. AACEncContext *s,
  62. SingleChannelElement *sce,
  63. const float lambda)
  64. {
  65. int start = 0, i, w, w2, g, recomprd;
  66. int destbits = avctx->bit_rate * 1024.0 / avctx->sample_rate
  67. / ((avctx->flags & CODEC_FLAG_QSCALE) ? 2.0f : avctx->channels)
  68. * (lambda / 120.f);
  69. int refbits = destbits;
  70. int toomanybits, toofewbits;
  71. char nzs[128];
  72. int maxsf[128];
  73. float dists[128] = { 0 }, qenergies[128] = { 0 }, uplims[128], euplims[128], energies[128];
  74. float maxvals[128], spread_thr_r[128];
  75. float min_spread_thr_r, max_spread_thr_r;
  76. /**
  77. * rdlambda controls the maximum tolerated distortion. Twoloop
  78. * will keep iterating until it fails to lower it or it reaches
  79. * ulimit * rdlambda. Keeping it low increases quality on difficult
  80. * signals, but lower it too much, and bits will be taken from weak
  81. * signals, creating "holes". A balance is necesary.
  82. * rdmax and rdmin specify the relative deviation from rdlambda
  83. * allowed for tonality compensation
  84. */
  85. float rdlambda = av_clipf(2.0f * 120.f / lambda, 0.0625f, 16.0f);
  86. const float nzslope = 1.5f;
  87. float rdmin = 0.03125f;
  88. float rdmax = 1.0f;
  89. /**
  90. * sfoffs controls an offset of optmium allocation that will be
  91. * applied based on lambda. Keep it real and modest, the loop
  92. * will take care of the rest, this just accelerates convergence
  93. */
  94. float sfoffs = av_clipf(log2f(120.0f / lambda) * 4.0f, -5, 10);
  95. int fflag, minscaler, maxscaler, nminscaler, minrdsf;
  96. int its = 0;
  97. int maxits = 30;
  98. int allz = 0;
  99. int tbits;
  100. int cutoff = 1024;
  101. int pns_start_pos;
  102. /**
  103. * zeroscale controls a multiplier of the threshold, if band energy
  104. * is below this, a zero is forced. Keep it lower than 1, unless
  105. * low lambda is used, because energy < threshold doesn't mean there's
  106. * no audible signal outright, it's just energy. Also make it rise
  107. * slower than rdlambda, as rdscale has due compensation with
  108. * noisy band depriorization below, whereas zeroing logic is rather dumb
  109. */
  110. float zeroscale;
  111. if (lambda > 120.f) {
  112. zeroscale = av_clipf(powf(120.f / lambda, 0.25f), 0.0625f, 1.0f);
  113. } else {
  114. zeroscale = 1.f;
  115. }
  116. if (s->psy.bitres.alloc >= 0) {
  117. /**
  118. * Psy granted us extra bits to use, from the reservoire
  119. * adjust for lambda except what psy already did
  120. */
  121. destbits = s->psy.bitres.alloc
  122. * (lambda / (avctx->global_quality ? avctx->global_quality : 120));
  123. }
  124. if (avctx->flags & CODEC_FLAG_QSCALE) {
  125. /**
  126. * Constant Q-scale doesn't compensate MS coding on its own
  127. * No need to be overly precise, this only controls RD
  128. * adjustment CB limits when going overboard
  129. */
  130. if (s->options.stereo_mode && s->cur_type == TYPE_CPE)
  131. destbits *= 2;
  132. /**
  133. * When using a constant Q-scale, don't adjust bits, just use RD
  134. * Don't let it go overboard, though... 8x psy target is enough
  135. */
  136. toomanybits = 5800;
  137. toofewbits = destbits / 16;
  138. /** Don't offset scalers, just RD */
  139. sfoffs = sce->ics.num_windows - 1;
  140. rdlambda = sqrtf(rdlambda);
  141. /** search further */
  142. maxits *= 2;
  143. } else {
  144. /** When using ABR, be strict */
  145. toomanybits = destbits + destbits/16;
  146. toofewbits = destbits - destbits/4;
  147. sfoffs = 0;
  148. rdlambda = sqrtf(rdlambda);
  149. }
  150. /** and zero out above cutoff frequency */
  151. {
  152. int wlen = 1024 / sce->ics.num_windows;
  153. int bandwidth;
  154. /**
  155. * Scale, psy gives us constant quality, this LP only scales
  156. * bitrate by lambda, so we save bits on subjectively unimportant HF
  157. * rather than increase quantization noise. Adjust nominal bitrate
  158. * to effective bitrate according to encoding parameters,
  159. * AAC_CUTOFF_FROM_BITRATE is calibrated for effective bitrate.
  160. */
  161. float rate_bandwidth_multiplier = 1.5f;
  162. int frame_bit_rate = (avctx->flags & CODEC_FLAG_QSCALE)
  163. ? (refbits * rate_bandwidth_multiplier * avctx->sample_rate / 1024)
  164. : (avctx->bit_rate / avctx->channels);
  165. /** Compensate for extensions that increase efficiency */
  166. if (s->options.pns || s->options.intensity_stereo)
  167. frame_bit_rate *= 1.15f;
  168. if (avctx->cutoff > 0) {
  169. bandwidth = avctx->cutoff;
  170. } else {
  171. bandwidth = FFMAX(3000, AAC_CUTOFF_FROM_BITRATE(frame_bit_rate, 1, avctx->sample_rate));
  172. }
  173. cutoff = bandwidth * 2 * wlen / avctx->sample_rate;
  174. pns_start_pos = NOISE_LOW_LIMIT * 2 * wlen / avctx->sample_rate;
  175. }
  176. /**
  177. * for values above this the decoder might end up in an endless loop
  178. * due to always having more bits than what can be encoded.
  179. */
  180. destbits = FFMIN(destbits, 5800);
  181. toomanybits = FFMIN(toomanybits, 5800);
  182. toofewbits = FFMIN(toofewbits, 5800);
  183. /**
  184. * XXX: some heuristic to determine initial quantizers will reduce search time
  185. * determine zero bands and upper distortion limits
  186. */
  187. min_spread_thr_r = -1;
  188. max_spread_thr_r = -1;
  189. for (w = 0; w < sce->ics.num_windows; w += sce->ics.group_len[w]) {
  190. for (g = start = 0; g < sce->ics.num_swb; start += sce->ics.swb_sizes[g++]) {
  191. int nz = 0;
  192. float uplim = 0.0f, energy = 0.0f, spread = 0.0f;
  193. for (w2 = 0; w2 < sce->ics.group_len[w]; w2++) {
  194. FFPsyBand *band = &s->psy.ch[s->cur_channel].psy_bands[(w+w2)*16+g];
  195. if (start >= cutoff || band->energy <= (band->threshold * zeroscale) || band->threshold == 0.0f) {
  196. sce->zeroes[(w+w2)*16+g] = 1;
  197. continue;
  198. }
  199. nz = 1;
  200. }
  201. if (!nz) {
  202. uplim = 0.0f;
  203. } else {
  204. nz = 0;
  205. for (w2 = 0; w2 < sce->ics.group_len[w]; w2++) {
  206. FFPsyBand *band = &s->psy.ch[s->cur_channel].psy_bands[(w+w2)*16+g];
  207. if (band->energy <= (band->threshold * zeroscale) || band->threshold == 0.0f)
  208. continue;
  209. uplim += band->threshold;
  210. energy += band->energy;
  211. spread += band->spread;
  212. nz++;
  213. }
  214. }
  215. uplims[w*16+g] = uplim;
  216. energies[w*16+g] = energy;
  217. nzs[w*16+g] = nz;
  218. sce->zeroes[w*16+g] = !nz;
  219. allz |= nz;
  220. if (nz) {
  221. spread_thr_r[w*16+g] = energy * nz / (uplim * spread);
  222. if (min_spread_thr_r < 0) {
  223. min_spread_thr_r = max_spread_thr_r = spread_thr_r[w*16+g];
  224. } else {
  225. min_spread_thr_r = FFMIN(min_spread_thr_r, spread_thr_r[w*16+g]);
  226. max_spread_thr_r = FFMAX(max_spread_thr_r, spread_thr_r[w*16+g]);
  227. }
  228. }
  229. }
  230. }
  231. /** Compute initial scalers */
  232. minscaler = 65535;
  233. for (w = 0; w < sce->ics.num_windows; w += sce->ics.group_len[w]) {
  234. for (g = 0; g < sce->ics.num_swb; g++) {
  235. if (sce->zeroes[w*16+g]) {
  236. sce->sf_idx[w*16+g] = SCALE_ONE_POS;
  237. continue;
  238. }
  239. /**
  240. * log2f-to-distortion ratio is, technically, 2 (1.5db = 4, but it's power vs level so it's 2).
  241. * But, as offsets are applied, low-frequency signals are too sensitive to the induced distortion,
  242. * so we make scaling more conservative by choosing a lower log2f-to-distortion ratio, and thus
  243. * more robust.
  244. */
  245. sce->sf_idx[w*16+g] = av_clip(
  246. SCALE_ONE_POS
  247. + 1.75*log2f(FFMAX(0.00125f,uplims[w*16+g]) / sce->ics.swb_sizes[g])
  248. + sfoffs,
  249. 60, SCALE_MAX_POS);
  250. minscaler = FFMIN(minscaler, sce->sf_idx[w*16+g]);
  251. }
  252. }
  253. /** Clip */
  254. minscaler = av_clip(minscaler, SCALE_ONE_POS - SCALE_DIV_512, SCALE_MAX_POS - SCALE_DIV_512);
  255. for (w = 0; w < sce->ics.num_windows; w += sce->ics.group_len[w])
  256. for (g = 0; g < sce->ics.num_swb; g++)
  257. if (!sce->zeroes[w*16+g])
  258. sce->sf_idx[w*16+g] = av_clip(sce->sf_idx[w*16+g], minscaler, minscaler + SCALE_MAX_DIFF - 1);
  259. if (!allz)
  260. return;
  261. abs_pow34_v(s->scoefs, sce->coeffs, 1024);
  262. for (w = 0; w < sce->ics.num_windows; w += sce->ics.group_len[w]) {
  263. start = w*128;
  264. for (g = 0; g < sce->ics.num_swb; g++) {
  265. const float *scaled = s->scoefs + start;
  266. maxvals[w*16+g] = find_max_val(sce->ics.group_len[w], sce->ics.swb_sizes[g], scaled);
  267. start += sce->ics.swb_sizes[g];
  268. }
  269. }
  270. /**
  271. * Scale uplims to match rate distortion to quality
  272. * bu applying noisy band depriorization and tonal band priorization.
  273. * Maxval-energy ratio gives us an idea of how noisy/tonal the band is.
  274. * If maxval^2 ~ energy, then that band is mostly noise, and we can relax
  275. * rate distortion requirements.
  276. */
  277. memcpy(euplims, uplims, sizeof(euplims));
  278. for (w = 0; w < sce->ics.num_windows; w += sce->ics.group_len[w]) {
  279. /** psy already priorizes transients to some extent */
  280. float de_psy_factor = (sce->ics.num_windows > 1) ? 8.0f / sce->ics.group_len[w] : 1.0f;
  281. start = w*128;
  282. for (g = 0; g < sce->ics.num_swb; g++) {
  283. if (nzs[g] > 0) {
  284. float cleanup_factor = ff_sqrf(av_clipf(start / (cutoff * 0.75f), 1.0f, 2.0f));
  285. float energy2uplim = find_form_factor(
  286. sce->ics.group_len[w], sce->ics.swb_sizes[g],
  287. uplims[w*16+g] / (nzs[g] * sce->ics.swb_sizes[w]),
  288. sce->coeffs + start,
  289. nzslope * cleanup_factor);
  290. energy2uplim *= de_psy_factor;
  291. if (!(avctx->flags & CODEC_FLAG_QSCALE)) {
  292. /** In ABR, we need to priorize less and let rate control do its thing */
  293. energy2uplim = sqrtf(energy2uplim);
  294. }
  295. energy2uplim = FFMAX(0.015625f, FFMIN(1.0f, energy2uplim));
  296. uplims[w*16+g] *= av_clipf(rdlambda * energy2uplim, rdmin, rdmax)
  297. * sce->ics.group_len[w];
  298. energy2uplim = find_form_factor(
  299. sce->ics.group_len[w], sce->ics.swb_sizes[g],
  300. uplims[w*16+g] / (nzs[g] * sce->ics.swb_sizes[w]),
  301. sce->coeffs + start,
  302. 2.0f);
  303. energy2uplim *= de_psy_factor;
  304. if (!(avctx->flags & CODEC_FLAG_QSCALE)) {
  305. /** In ABR, we need to priorize less and let rate control do its thing */
  306. energy2uplim = sqrtf(energy2uplim);
  307. }
  308. energy2uplim = FFMAX(0.015625f, FFMIN(1.0f, energy2uplim));
  309. euplims[w*16+g] *= av_clipf(rdlambda * energy2uplim * sce->ics.group_len[w],
  310. 0.5f, 1.0f);
  311. }
  312. start += sce->ics.swb_sizes[g];
  313. }
  314. }
  315. for (i = 0; i < sizeof(maxsf) / sizeof(maxsf[0]); ++i)
  316. maxsf[i] = SCALE_MAX_POS;
  317. //perform two-loop search
  318. //outer loop - improve quality
  319. do {
  320. //inner loop - quantize spectrum to fit into given number of bits
  321. int overdist;
  322. int qstep = its ? 1 : 32;
  323. do {
  324. int prev = -1;
  325. int changed = 0;
  326. recomprd = 0;
  327. tbits = 0;
  328. for (w = 0; w < sce->ics.num_windows; w += sce->ics.group_len[w]) {
  329. start = w*128;
  330. for (g = 0; g < sce->ics.num_swb; g++) {
  331. const float *coefs = &sce->coeffs[start];
  332. const float *scaled = &s->scoefs[start];
  333. int bits = 0;
  334. int cb;
  335. float dist = 0.0f;
  336. float qenergy = 0.0f;
  337. if (sce->zeroes[w*16+g] || sce->sf_idx[w*16+g] >= 218) {
  338. start += sce->ics.swb_sizes[g];
  339. if (sce->can_pns[w*16+g]) {
  340. /** PNS isn't free */
  341. tbits += ff_pns_bits(sce, w, g);
  342. }
  343. continue;
  344. }
  345. cb = find_min_book(maxvals[w*16+g], sce->sf_idx[w*16+g]);
  346. for (w2 = 0; w2 < sce->ics.group_len[w]; w2++) {
  347. int b;
  348. float sqenergy;
  349. dist += quantize_band_cost(s, coefs + w2*128,
  350. scaled + w2*128,
  351. sce->ics.swb_sizes[g],
  352. sce->sf_idx[w*16+g],
  353. cb,
  354. 1.0f,
  355. INFINITY,
  356. &b, &sqenergy,
  357. 0);
  358. bits += b;
  359. qenergy += sqenergy;
  360. }
  361. dists[w*16+g] = dist - bits;
  362. qenergies[w*16+g] = qenergy;
  363. if (prev != -1) {
  364. int sfdiff = sce->sf_idx[w*16+g] - prev + SCALE_DIFF_ZERO;
  365. av_assert1(sfdiff >= 0 && sfdiff <= 2*SCALE_MAX_DIFF);
  366. bits += ff_aac_scalefactor_bits[sfdiff];
  367. }
  368. tbits += bits;
  369. start += sce->ics.swb_sizes[g];
  370. prev = sce->sf_idx[w*16+g];
  371. }
  372. }
  373. if (tbits > toomanybits) {
  374. recomprd = 1;
  375. for (i = 0; i < 128; i++) {
  376. if (sce->sf_idx[i] < (SCALE_MAX_POS - SCALE_DIV_512)) {
  377. int maxsf_i = (tbits > 5800) ? SCALE_MAX_POS : maxsf[i];
  378. int new_sf = FFMIN(maxsf_i, sce->sf_idx[i] + qstep);
  379. if (new_sf != sce->sf_idx[i]) {
  380. sce->sf_idx[i] = new_sf;
  381. changed = 1;
  382. }
  383. }
  384. }
  385. } else if (tbits < toofewbits) {
  386. recomprd = 1;
  387. for (i = 0; i < 128; i++) {
  388. if (sce->sf_idx[i] > SCALE_ONE_POS) {
  389. int new_sf = FFMAX(SCALE_ONE_POS, sce->sf_idx[i] - qstep);
  390. if (new_sf != sce->sf_idx[i]) {
  391. sce->sf_idx[i] = new_sf;
  392. changed = 1;
  393. }
  394. }
  395. }
  396. }
  397. qstep >>= 1;
  398. if (!qstep && tbits > toomanybits && sce->sf_idx[0] < 217 && changed)
  399. qstep = 1;
  400. } while (qstep);
  401. overdist = 1;
  402. for (i = 0; i < 2 && (overdist || recomprd); ++i) {
  403. if (recomprd) {
  404. /** Must recompute distortion */
  405. int prev = -1;
  406. tbits = 0;
  407. for (w = 0; w < sce->ics.num_windows; w += sce->ics.group_len[w]) {
  408. start = w*128;
  409. for (g = 0; g < sce->ics.num_swb; g++) {
  410. const float *coefs = sce->coeffs + start;
  411. const float *scaled = s->scoefs + start;
  412. int bits = 0;
  413. int cb;
  414. float dist = 0.0f;
  415. float qenergy = 0.0f;
  416. if (sce->zeroes[w*16+g] || sce->sf_idx[w*16+g] >= 218) {
  417. start += sce->ics.swb_sizes[g];
  418. if (sce->can_pns[w*16+g]) {
  419. /** PNS isn't free */
  420. tbits += ff_pns_bits(sce, w, g);
  421. }
  422. continue;
  423. }
  424. cb = find_min_book(maxvals[w*16+g], sce->sf_idx[w*16+g]);
  425. for (w2 = 0; w2 < sce->ics.group_len[w]; w2++) {
  426. int b;
  427. float sqenergy;
  428. dist += quantize_band_cost(s, coefs + w2*128,
  429. scaled + w2*128,
  430. sce->ics.swb_sizes[g],
  431. sce->sf_idx[w*16+g],
  432. cb,
  433. 1.0f,
  434. INFINITY,
  435. &b, &sqenergy,
  436. 0);
  437. bits += b;
  438. qenergy += sqenergy;
  439. }
  440. dists[w*16+g] = dist - bits;
  441. qenergies[w*16+g] = qenergy;
  442. if (prev != -1) {
  443. int sfdiff = sce->sf_idx[w*16+g] - prev + SCALE_DIFF_ZERO;
  444. av_assert1(sfdiff >= 0 && sfdiff <= 2*SCALE_MAX_DIFF);
  445. bits += ff_aac_scalefactor_bits[sfdiff];
  446. }
  447. tbits += bits;
  448. start += sce->ics.swb_sizes[g];
  449. prev = sce->sf_idx[w*16+g];
  450. }
  451. }
  452. }
  453. if (!i && s->options.pns && its > maxits/2) {
  454. float maxoverdist = 0.0f;
  455. overdist = recomprd = 0;
  456. for (w = 0; w < sce->ics.num_windows; w += sce->ics.group_len[w]) {
  457. float ovrfactor = 2.f+(maxits-its)*16.f/maxits;
  458. for (g = start = 0; g < sce->ics.num_swb; start += sce->ics.swb_sizes[g++]) {
  459. if (!sce->zeroes[w*16+g] && dists[w*16+g] > uplims[w*16+g]*ovrfactor) {
  460. float ovrdist = dists[w*16+g] / FFMAX(uplims[w*16+g],euplims[w*16+g]);
  461. maxoverdist = FFMAX(maxoverdist, ovrdist);
  462. overdist++;
  463. }
  464. }
  465. }
  466. if (overdist) {
  467. /* We have overdistorted bands, trade for zeroes (that can be noise)
  468. * Zero the bands in the lowest 1.25% spread-energy-threshold ranking
  469. */
  470. float minspread = max_spread_thr_r;
  471. float maxspread = min_spread_thr_r;
  472. float zspread;
  473. int zeroable = 0;
  474. int zeroed = 0;
  475. int maxzeroed;
  476. for (w = 0; w < sce->ics.num_windows; w += sce->ics.group_len[w]) {
  477. for (g = start = 0; g < sce->ics.num_swb; start += sce->ics.swb_sizes[g++]) {
  478. if (start >= pns_start_pos && !sce->zeroes[w*16+g] && sce->can_pns[w*16+g]) {
  479. minspread = FFMIN(minspread, spread_thr_r[w*16+g]);
  480. maxspread = FFMAX(maxspread, spread_thr_r[w*16+g]);
  481. zeroable++;
  482. }
  483. }
  484. }
  485. zspread = (maxspread-minspread) * 0.0125f + minspread;
  486. zspread = FFMIN(maxoverdist, zspread);
  487. maxzeroed = zeroable * its / (2 * maxits);
  488. for (g = sce->ics.num_swb-1; g > 0 && zeroed < maxzeroed; g--) {
  489. if (sce->ics.swb_offset[g] < pns_start_pos)
  490. continue;
  491. for (w = 0; w < sce->ics.num_windows; w += sce->ics.group_len[w]) {
  492. if (!sce->zeroes[w*16+g] && sce->can_pns[w*16+g] && spread_thr_r[w*16+g] <= zspread) {
  493. sce->zeroes[w*16+g] = 1;
  494. sce->band_type[w*16+g] = 0;
  495. zeroed++;
  496. }
  497. }
  498. }
  499. if (zeroed)
  500. recomprd = 1;
  501. } else {
  502. overdist = 0;
  503. }
  504. }
  505. }
  506. minscaler = SCALE_MAX_POS;
  507. maxscaler = 0;
  508. for (w = 0; w < sce->ics.num_windows; w += sce->ics.group_len[w]) {
  509. for (g = 0; g < sce->ics.num_swb; g++) {
  510. if (!sce->zeroes[w*16+g]) {
  511. minscaler = FFMIN(minscaler, sce->sf_idx[w*16+g]);
  512. maxscaler = FFMAX(maxscaler, sce->sf_idx[w*16+g]);
  513. }
  514. }
  515. }
  516. fflag = 0;
  517. minscaler = nminscaler = av_clip(minscaler, SCALE_ONE_POS - SCALE_DIV_512, SCALE_MAX_POS - SCALE_DIV_512);
  518. minrdsf = FFMAX3(60, minscaler - 1, maxscaler - SCALE_MAX_DIFF - 1);
  519. for (w = 0; w < sce->ics.num_windows; w += sce->ics.group_len[w]) {
  520. /** Start with big steps, end up fine-tunning */
  521. int depth = (its > maxits/2) ? ((its > maxits*2/3) ? 1 : 3) : 10;
  522. int edepth = depth+2;
  523. float uplmax = its / (maxits*0.25f) + 1.0f;
  524. uplmax *= (tbits > destbits) ? FFMIN(2.0f, tbits / (float)FFMAX(1,destbits)) : 1.0f;
  525. start = w * 128;
  526. for (g = 0; g < sce->ics.num_swb; g++) {
  527. int prevsc = sce->sf_idx[w*16+g];
  528. int minrdsfboost = (sce->ics.num_windows > 1) ? av_clip(g-4, -2, 0) : av_clip(g-16, -4, 0);
  529. if (!sce->zeroes[w*16+g]) {
  530. const float *coefs = sce->coeffs + start;
  531. const float *scaled = s->scoefs + start;
  532. int cmb = find_min_book(maxvals[w*16+g], sce->sf_idx[w*16+g]);
  533. if ((!cmb || dists[w*16+g] > uplims[w*16+g]) && sce->sf_idx[w*16+g] > minrdsf) {
  534. /* Try to make sure there is some energy in every nonzero band
  535. * NOTE: This algorithm must be forcibly imbalanced, pushing harder
  536. * on holes or more distorted bands at first, otherwise there's
  537. * no net gain (since the next iteration will offset all bands
  538. * on the opposite direction to compensate for extra bits)
  539. */
  540. for (i = 0; i < edepth; ++i) {
  541. int cb, bits;
  542. float dist, qenergy;
  543. int mb = find_min_book(maxvals[w*16+g], sce->sf_idx[w*16+g]-1);
  544. cb = find_min_book(maxvals[w*16+g], sce->sf_idx[w*16+g]);
  545. dist = qenergy = 0.f;
  546. bits = 0;
  547. if (!cb) {
  548. maxsf[w*16+g] = FFMIN(sce->sf_idx[w*16+g]-1, maxsf[w*16+g]);
  549. } else if (i >= depth && dists[w*16+g] < euplims[w*16+g]) {
  550. break;
  551. }
  552. for (w2 = 0; w2 < sce->ics.group_len[w]; w2++) {
  553. int b;
  554. float sqenergy;
  555. dist += quantize_band_cost(s, coefs + w2*128,
  556. scaled + w2*128,
  557. sce->ics.swb_sizes[g],
  558. sce->sf_idx[w*16+g]-1,
  559. cb,
  560. 1.0f,
  561. INFINITY,
  562. &b, &sqenergy,
  563. 0);
  564. bits += b;
  565. qenergy += sqenergy;
  566. }
  567. sce->sf_idx[w*16+g]--;
  568. dists[w*16+g] = dist - bits;
  569. qenergies[w*16+g] = qenergy;
  570. if (mb && (sce->sf_idx[w*16+g] < (minrdsf+minrdsfboost) || (
  571. (dists[w*16+g] < FFMIN(uplmax*uplims[w*16+g], euplims[w*16+g]))
  572. && (fabsf(qenergies[w*16+g]-energies[w*16+g]) < euplims[w*16+g])
  573. ) )) {
  574. break;
  575. }
  576. }
  577. } else if (tbits > toofewbits && sce->sf_idx[w*16+g] < maxscaler
  578. && (dists[w*16+g] < FFMIN(euplims[w*16+g], uplims[w*16+g]))
  579. && (fabsf(qenergies[w*16+g]-energies[w*16+g]) < euplims[w*16+g])
  580. ) {
  581. /** Um... over target. Save bits for more important stuff. */
  582. for (i = 0; i < depth; ++i) {
  583. int cb, bits;
  584. float dist, qenergy;
  585. cb = find_min_book(maxvals[w*16+g], sce->sf_idx[w*16+g]+1);
  586. if (cb > 0) {
  587. dist = qenergy = 0.f;
  588. bits = 0;
  589. for (w2 = 0; w2 < sce->ics.group_len[w]; w2++) {
  590. int b;
  591. float sqenergy;
  592. dist += quantize_band_cost(s, coefs + w2*128,
  593. scaled + w2*128,
  594. sce->ics.swb_sizes[g],
  595. sce->sf_idx[w*16+g]+1,
  596. cb,
  597. 1.0f,
  598. INFINITY,
  599. &b, &sqenergy,
  600. 0);
  601. bits += b;
  602. qenergy += sqenergy;
  603. }
  604. dist -= bits;
  605. if (dist < FFMIN(euplims[w*16+g], uplims[w*16+g])) {
  606. sce->sf_idx[w*16+g]++;
  607. dists[w*16+g] = dist;
  608. qenergies[w*16+g] = qenergy;
  609. } else {
  610. break;
  611. }
  612. } else {
  613. maxsf[w*16+g] = FFMIN(sce->sf_idx[w*16+g], maxsf[w*16+g]);
  614. break;
  615. }
  616. }
  617. }
  618. }
  619. sce->sf_idx[w*16+g] = av_clip(sce->sf_idx[w*16+g], minrdsf, minscaler + SCALE_MAX_DIFF);
  620. sce->sf_idx[w*16+g] = FFMIN(sce->sf_idx[w*16+g], SCALE_MAX_POS - SCALE_DIV_512);
  621. if (sce->sf_idx[w*16+g] != prevsc)
  622. fflag = 1;
  623. nminscaler = FFMIN(nminscaler, sce->sf_idx[w*16+g]);
  624. sce->band_type[w*16+g] = find_min_book(maxvals[w*16+g], sce->sf_idx[w*16+g]);
  625. start += sce->ics.swb_sizes[g];
  626. }
  627. }
  628. if (nminscaler < minscaler) {
  629. /** Drecreased some scalers below minscaler. Must re-clamp. */
  630. minscaler = nminscaler;
  631. for (w = 0; w < sce->ics.num_windows; w += sce->ics.group_len[w]) {
  632. for (g = 0; g < sce->ics.num_swb; g++) {
  633. sce->sf_idx[w*16+g] = av_clip(sce->sf_idx[w*16+g], minscaler, minscaler + SCALE_MAX_DIFF);
  634. sce->band_type[w*16+g] = find_min_book(maxvals[w*16+g], sce->sf_idx[w*16+g]);
  635. }
  636. }
  637. }
  638. its++;
  639. } while (fflag && its < maxits);
  640. for (w = 0; w < sce->ics.num_windows; w += sce->ics.group_len[w]) {
  641. /** Make sure proper codebooks are set */
  642. for (g = start = 0; g < sce->ics.num_swb; start += sce->ics.swb_sizes[g++]) {
  643. if (!sce->zeroes[w*16+g]) {
  644. sce->band_type[w*16+g] = find_min_book(maxvals[w*16+g], sce->sf_idx[w*16+g]);
  645. if (sce->band_type[w*16+g] <= 0) {
  646. sce->zeroes[w*16+g] = 1;
  647. sce->band_type[w*16+g] = 0;
  648. }
  649. } else {
  650. sce->band_type[w*16+g] = 0;
  651. }
  652. }
  653. }
  654. }
  655. #endif /* AVCODEC_AACCODER_TWOLOOP_H */