You can not select more than 25 topics Topics must start with a letter or number, can include dashes ('-') and can be up to 35 characters long.

707 lines
31KB

  1. /*
  2. * AAC encoder twoloop coder
  3. * Copyright (C) 2008-2009 Konstantin Shishkov
  4. *
  5. * This file is part of FFmpeg.
  6. *
  7. * FFmpeg is free software; you can redistribute it and/or
  8. * modify it under the terms of the GNU Lesser General Public
  9. * License as published by the Free Software Foundation; either
  10. * version 2.1 of the License, or (at your option) any later version.
  11. *
  12. * FFmpeg is distributed in the hope that it will be useful,
  13. * but WITHOUT ANY WARRANTY; without even the implied warranty of
  14. * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
  15. * Lesser General Public License for more details.
  16. *
  17. * You should have received a copy of the GNU Lesser General Public
  18. * License along with FFmpeg; if not, write to the Free Software
  19. * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
  20. */
  21. /**
  22. * @file
  23. * AAC encoder twoloop coder
  24. * @author Konstantin Shishkov, Claudio Freire
  25. */
  26. /**
  27. * This file contains a template for the twoloop coder function.
  28. * It needs to be provided, externally, as an already included declaration,
  29. * the following functions from aacenc_quantization/util.h. They're not included
  30. * explicitly here to make it possible to provide alternative implementations:
  31. * - quantize_band_cost
  32. * - abs_pow34_v
  33. * - find_max_val
  34. * - find_min_book
  35. * - find_form_factor
  36. */
  37. #ifndef AVCODEC_AACCODER_TWOLOOP_H
  38. #define AVCODEC_AACCODER_TWOLOOP_H
  39. #include <float.h>
  40. #include "libavutil/mathematics.h"
  41. #include "mathops.h"
  42. #include "avcodec.h"
  43. #include "put_bits.h"
  44. #include "aac.h"
  45. #include "aacenc.h"
  46. #include "aactab.h"
  47. #include "aacenctab.h"
  48. #include "aac_tablegen_decl.h"
  49. /** Frequency in Hz for lower limit of noise substitution **/
  50. #define NOISE_LOW_LIMIT 4000
  51. #define sclip(x) av_clip(x,60,218)
  52. /* Reflects the cost to change codebooks */
  53. static inline int ff_pns_bits(SingleChannelElement *sce, int w, int g)
  54. {
  55. return (!g || !sce->zeroes[w*16+g-1] || !sce->can_pns[w*16+g-1]) ? 9 : 5;
  56. }
  57. /**
  58. * two-loop quantizers search taken from ISO 13818-7 Appendix C
  59. */
  60. static void search_for_quantizers_twoloop(AVCodecContext *avctx,
  61. AACEncContext *s,
  62. SingleChannelElement *sce,
  63. const float lambda)
  64. {
  65. int start = 0, i, w, w2, g, recomprd;
  66. int destbits = avctx->bit_rate * 1024.0 / avctx->sample_rate
  67. / ((avctx->flags & CODEC_FLAG_QSCALE) ? 2.0f : avctx->channels)
  68. * (lambda / 120.f);
  69. int refbits = destbits;
  70. int toomanybits, toofewbits;
  71. char nzs[128];
  72. int maxsf[128];
  73. float dists[128] = { 0 }, qenergies[128] = { 0 }, uplims[128], euplims[128], energies[128];
  74. float maxvals[128], spread_thr_r[128];
  75. float min_spread_thr_r, max_spread_thr_r;
  76. /**
  77. * rdlambda controls the maximum tolerated distortion. Twoloop
  78. * will keep iterating until it fails to lower it or it reaches
  79. * ulimit * rdlambda. Keeping it low increases quality on difficult
  80. * signals, but lower it too much, and bits will be taken from weak
  81. * signals, creating "holes". A balance is necesary.
  82. * rdmax and rdmin specify the relative deviation from rdlambda
  83. * allowed for tonality compensation
  84. */
  85. float rdlambda = av_clipf(2.0f * 120.f / lambda, 0.0625f, 16.0f);
  86. const float nzslope = 1.5f;
  87. float rdmin = 0.03125f;
  88. float rdmax = 1.0f;
  89. /**
  90. * sfoffs controls an offset of optmium allocation that will be
  91. * applied based on lambda. Keep it real and modest, the loop
  92. * will take care of the rest, this just accelerates convergence
  93. */
  94. float sfoffs = av_clipf(log2f(120.0f / lambda) * 4.0f, -5, 10);
  95. int fflag, minscaler, maxscaler, nminscaler, minrdsf;
  96. int its = 0;
  97. int maxits = 30;
  98. int allz = 0;
  99. int tbits;
  100. int cutoff = 1024;
  101. int pns_start_pos;
  102. int prev;
  103. /**
  104. * zeroscale controls a multiplier of the threshold, if band energy
  105. * is below this, a zero is forced. Keep it lower than 1, unless
  106. * low lambda is used, because energy < threshold doesn't mean there's
  107. * no audible signal outright, it's just energy. Also make it rise
  108. * slower than rdlambda, as rdscale has due compensation with
  109. * noisy band depriorization below, whereas zeroing logic is rather dumb
  110. */
  111. float zeroscale;
  112. if (lambda > 120.f) {
  113. zeroscale = av_clipf(powf(120.f / lambda, 0.25f), 0.0625f, 1.0f);
  114. } else {
  115. zeroscale = 1.f;
  116. }
  117. if (s->psy.bitres.alloc >= 0) {
  118. /**
  119. * Psy granted us extra bits to use, from the reservoire
  120. * adjust for lambda except what psy already did
  121. */
  122. destbits = s->psy.bitres.alloc
  123. * (lambda / (avctx->global_quality ? avctx->global_quality : 120));
  124. }
  125. if (avctx->flags & CODEC_FLAG_QSCALE) {
  126. /**
  127. * Constant Q-scale doesn't compensate MS coding on its own
  128. * No need to be overly precise, this only controls RD
  129. * adjustment CB limits when going overboard
  130. */
  131. if (s->options.mid_side && s->cur_type == TYPE_CPE)
  132. destbits *= 2;
  133. /**
  134. * When using a constant Q-scale, don't adjust bits, just use RD
  135. * Don't let it go overboard, though... 8x psy target is enough
  136. */
  137. toomanybits = 5800;
  138. toofewbits = destbits / 16;
  139. /** Don't offset scalers, just RD */
  140. sfoffs = sce->ics.num_windows - 1;
  141. rdlambda = sqrtf(rdlambda);
  142. /** search further */
  143. maxits *= 2;
  144. } else {
  145. /** When using ABR, be strict */
  146. toomanybits = destbits + destbits/16;
  147. toofewbits = destbits - destbits/4;
  148. sfoffs = 0;
  149. rdlambda = sqrtf(rdlambda);
  150. }
  151. /** and zero out above cutoff frequency */
  152. {
  153. int wlen = 1024 / sce->ics.num_windows;
  154. int bandwidth;
  155. /**
  156. * Scale, psy gives us constant quality, this LP only scales
  157. * bitrate by lambda, so we save bits on subjectively unimportant HF
  158. * rather than increase quantization noise. Adjust nominal bitrate
  159. * to effective bitrate according to encoding parameters,
  160. * AAC_CUTOFF_FROM_BITRATE is calibrated for effective bitrate.
  161. */
  162. float rate_bandwidth_multiplier = 1.5f;
  163. int frame_bit_rate = (avctx->flags & CODEC_FLAG_QSCALE)
  164. ? (refbits * rate_bandwidth_multiplier * avctx->sample_rate / 1024)
  165. : (avctx->bit_rate / avctx->channels);
  166. /** Compensate for extensions that increase efficiency */
  167. if (s->options.pns || s->options.intensity_stereo)
  168. frame_bit_rate *= 1.15f;
  169. if (avctx->cutoff > 0) {
  170. bandwidth = avctx->cutoff;
  171. } else {
  172. bandwidth = FFMAX(3000, AAC_CUTOFF_FROM_BITRATE(frame_bit_rate, 1, avctx->sample_rate));
  173. }
  174. cutoff = bandwidth * 2 * wlen / avctx->sample_rate;
  175. pns_start_pos = NOISE_LOW_LIMIT * 2 * wlen / avctx->sample_rate;
  176. }
  177. /**
  178. * for values above this the decoder might end up in an endless loop
  179. * due to always having more bits than what can be encoded.
  180. */
  181. destbits = FFMIN(destbits, 5800);
  182. toomanybits = FFMIN(toomanybits, 5800);
  183. toofewbits = FFMIN(toofewbits, 5800);
  184. /**
  185. * XXX: some heuristic to determine initial quantizers will reduce search time
  186. * determine zero bands and upper distortion limits
  187. */
  188. min_spread_thr_r = -1;
  189. max_spread_thr_r = -1;
  190. for (w = 0; w < sce->ics.num_windows; w += sce->ics.group_len[w]) {
  191. for (g = start = 0; g < sce->ics.num_swb; start += sce->ics.swb_sizes[g++]) {
  192. int nz = 0;
  193. float uplim = 0.0f, energy = 0.0f, spread = 0.0f;
  194. for (w2 = 0; w2 < sce->ics.group_len[w]; w2++) {
  195. FFPsyBand *band = &s->psy.ch[s->cur_channel].psy_bands[(w+w2)*16+g];
  196. if (start >= cutoff || band->energy <= (band->threshold * zeroscale) || band->threshold == 0.0f) {
  197. sce->zeroes[(w+w2)*16+g] = 1;
  198. continue;
  199. }
  200. nz = 1;
  201. }
  202. if (!nz) {
  203. uplim = 0.0f;
  204. } else {
  205. nz = 0;
  206. for (w2 = 0; w2 < sce->ics.group_len[w]; w2++) {
  207. FFPsyBand *band = &s->psy.ch[s->cur_channel].psy_bands[(w+w2)*16+g];
  208. if (band->energy <= (band->threshold * zeroscale) || band->threshold == 0.0f)
  209. continue;
  210. uplim += band->threshold;
  211. energy += band->energy;
  212. spread += band->spread;
  213. nz++;
  214. }
  215. }
  216. uplims[w*16+g] = uplim;
  217. energies[w*16+g] = energy;
  218. nzs[w*16+g] = nz;
  219. sce->zeroes[w*16+g] = !nz;
  220. allz |= nz;
  221. if (nz) {
  222. spread_thr_r[w*16+g] = energy * nz / (uplim * spread);
  223. if (min_spread_thr_r < 0) {
  224. min_spread_thr_r = max_spread_thr_r = spread_thr_r[w*16+g];
  225. } else {
  226. min_spread_thr_r = FFMIN(min_spread_thr_r, spread_thr_r[w*16+g]);
  227. max_spread_thr_r = FFMAX(max_spread_thr_r, spread_thr_r[w*16+g]);
  228. }
  229. }
  230. }
  231. }
  232. /** Compute initial scalers */
  233. minscaler = 65535;
  234. for (w = 0; w < sce->ics.num_windows; w += sce->ics.group_len[w]) {
  235. for (g = 0; g < sce->ics.num_swb; g++) {
  236. if (sce->zeroes[w*16+g]) {
  237. sce->sf_idx[w*16+g] = SCALE_ONE_POS;
  238. continue;
  239. }
  240. /**
  241. * log2f-to-distortion ratio is, technically, 2 (1.5db = 4, but it's power vs level so it's 2).
  242. * But, as offsets are applied, low-frequency signals are too sensitive to the induced distortion,
  243. * so we make scaling more conservative by choosing a lower log2f-to-distortion ratio, and thus
  244. * more robust.
  245. */
  246. sce->sf_idx[w*16+g] = av_clip(
  247. SCALE_ONE_POS
  248. + 1.75*log2f(FFMAX(0.00125f,uplims[w*16+g]) / sce->ics.swb_sizes[g])
  249. + sfoffs,
  250. 60, SCALE_MAX_POS);
  251. minscaler = FFMIN(minscaler, sce->sf_idx[w*16+g]);
  252. }
  253. }
  254. /** Clip */
  255. minscaler = av_clip(minscaler, SCALE_ONE_POS - SCALE_DIV_512, SCALE_MAX_POS - SCALE_DIV_512);
  256. for (w = 0; w < sce->ics.num_windows; w += sce->ics.group_len[w])
  257. for (g = 0; g < sce->ics.num_swb; g++)
  258. if (!sce->zeroes[w*16+g])
  259. sce->sf_idx[w*16+g] = av_clip(sce->sf_idx[w*16+g], minscaler, minscaler + SCALE_MAX_DIFF - 1);
  260. if (!allz)
  261. return;
  262. abs_pow34_v(s->scoefs, sce->coeffs, 1024);
  263. ff_quantize_band_cost_cache_init(s);
  264. for (w = 0; w < sce->ics.num_windows; w += sce->ics.group_len[w]) {
  265. start = w*128;
  266. for (g = 0; g < sce->ics.num_swb; g++) {
  267. const float *scaled = s->scoefs + start;
  268. maxvals[w*16+g] = find_max_val(sce->ics.group_len[w], sce->ics.swb_sizes[g], scaled);
  269. start += sce->ics.swb_sizes[g];
  270. }
  271. }
  272. /**
  273. * Scale uplims to match rate distortion to quality
  274. * bu applying noisy band depriorization and tonal band priorization.
  275. * Maxval-energy ratio gives us an idea of how noisy/tonal the band is.
  276. * If maxval^2 ~ energy, then that band is mostly noise, and we can relax
  277. * rate distortion requirements.
  278. */
  279. memcpy(euplims, uplims, sizeof(euplims));
  280. for (w = 0; w < sce->ics.num_windows; w += sce->ics.group_len[w]) {
  281. /** psy already priorizes transients to some extent */
  282. float de_psy_factor = (sce->ics.num_windows > 1) ? 8.0f / sce->ics.group_len[w] : 1.0f;
  283. start = w*128;
  284. for (g = 0; g < sce->ics.num_swb; g++) {
  285. if (nzs[g] > 0) {
  286. float cleanup_factor = ff_sqrf(av_clipf(start / (cutoff * 0.75f), 1.0f, 2.0f));
  287. float energy2uplim = find_form_factor(
  288. sce->ics.group_len[w], sce->ics.swb_sizes[g],
  289. uplims[w*16+g] / (nzs[g] * sce->ics.swb_sizes[w]),
  290. sce->coeffs + start,
  291. nzslope * cleanup_factor);
  292. energy2uplim *= de_psy_factor;
  293. if (!(avctx->flags & CODEC_FLAG_QSCALE)) {
  294. /** In ABR, we need to priorize less and let rate control do its thing */
  295. energy2uplim = sqrtf(energy2uplim);
  296. }
  297. energy2uplim = FFMAX(0.015625f, FFMIN(1.0f, energy2uplim));
  298. uplims[w*16+g] *= av_clipf(rdlambda * energy2uplim, rdmin, rdmax)
  299. * sce->ics.group_len[w];
  300. energy2uplim = find_form_factor(
  301. sce->ics.group_len[w], sce->ics.swb_sizes[g],
  302. uplims[w*16+g] / (nzs[g] * sce->ics.swb_sizes[w]),
  303. sce->coeffs + start,
  304. 2.0f);
  305. energy2uplim *= de_psy_factor;
  306. if (!(avctx->flags & CODEC_FLAG_QSCALE)) {
  307. /** In ABR, we need to priorize less and let rate control do its thing */
  308. energy2uplim = sqrtf(energy2uplim);
  309. }
  310. energy2uplim = FFMAX(0.015625f, FFMIN(1.0f, energy2uplim));
  311. euplims[w*16+g] *= av_clipf(rdlambda * energy2uplim * sce->ics.group_len[w],
  312. 0.5f, 1.0f);
  313. }
  314. start += sce->ics.swb_sizes[g];
  315. }
  316. }
  317. for (i = 0; i < sizeof(maxsf) / sizeof(maxsf[0]); ++i)
  318. maxsf[i] = SCALE_MAX_POS;
  319. //perform two-loop search
  320. //outer loop - improve quality
  321. do {
  322. //inner loop - quantize spectrum to fit into given number of bits
  323. int overdist;
  324. int qstep = its ? 1 : 32;
  325. do {
  326. int changed = 0;
  327. prev = -1;
  328. recomprd = 0;
  329. tbits = 0;
  330. for (w = 0; w < sce->ics.num_windows; w += sce->ics.group_len[w]) {
  331. start = w*128;
  332. for (g = 0; g < sce->ics.num_swb; g++) {
  333. const float *coefs = &sce->coeffs[start];
  334. const float *scaled = &s->scoefs[start];
  335. int bits = 0;
  336. int cb;
  337. float dist = 0.0f;
  338. float qenergy = 0.0f;
  339. if (sce->zeroes[w*16+g] || sce->sf_idx[w*16+g] >= 218) {
  340. start += sce->ics.swb_sizes[g];
  341. if (sce->can_pns[w*16+g]) {
  342. /** PNS isn't free */
  343. tbits += ff_pns_bits(sce, w, g);
  344. }
  345. continue;
  346. }
  347. cb = find_min_book(maxvals[w*16+g], sce->sf_idx[w*16+g]);
  348. for (w2 = 0; w2 < sce->ics.group_len[w]; w2++) {
  349. int b;
  350. float sqenergy;
  351. dist += quantize_band_cost_cached(s, w + w2, g, coefs + w2*128,
  352. scaled + w2*128,
  353. sce->ics.swb_sizes[g],
  354. sce->sf_idx[w*16+g],
  355. cb,
  356. 1.0f,
  357. INFINITY,
  358. &b, &sqenergy,
  359. 0);
  360. bits += b;
  361. qenergy += sqenergy;
  362. }
  363. dists[w*16+g] = dist - bits;
  364. qenergies[w*16+g] = qenergy;
  365. if (prev != -1) {
  366. int sfdiff = av_clip(sce->sf_idx[w*16+g] - prev + SCALE_DIFF_ZERO, 0, 2*SCALE_MAX_DIFF);
  367. bits += ff_aac_scalefactor_bits[sfdiff];
  368. }
  369. tbits += bits;
  370. start += sce->ics.swb_sizes[g];
  371. prev = sce->sf_idx[w*16+g];
  372. }
  373. }
  374. if (tbits > toomanybits) {
  375. recomprd = 1;
  376. for (i = 0; i < 128; i++) {
  377. if (sce->sf_idx[i] < (SCALE_MAX_POS - SCALE_DIV_512)) {
  378. int maxsf_i = (tbits > 5800) ? SCALE_MAX_POS : maxsf[i];
  379. int new_sf = FFMIN(maxsf_i, sce->sf_idx[i] + qstep);
  380. if (new_sf != sce->sf_idx[i]) {
  381. sce->sf_idx[i] = new_sf;
  382. changed = 1;
  383. }
  384. }
  385. }
  386. } else if (tbits < toofewbits) {
  387. recomprd = 1;
  388. for (i = 0; i < 128; i++) {
  389. if (sce->sf_idx[i] > SCALE_ONE_POS) {
  390. int new_sf = FFMAX(SCALE_ONE_POS, sce->sf_idx[i] - qstep);
  391. if (new_sf != sce->sf_idx[i]) {
  392. sce->sf_idx[i] = new_sf;
  393. changed = 1;
  394. }
  395. }
  396. }
  397. }
  398. qstep >>= 1;
  399. if (!qstep && tbits > toomanybits && sce->sf_idx[0] < 217 && changed)
  400. qstep = 1;
  401. } while (qstep);
  402. overdist = 1;
  403. for (i = 0; i < 2 && (overdist || recomprd); ++i) {
  404. if (recomprd) {
  405. /** Must recompute distortion */
  406. prev = -1;
  407. tbits = 0;
  408. for (w = 0; w < sce->ics.num_windows; w += sce->ics.group_len[w]) {
  409. start = w*128;
  410. for (g = 0; g < sce->ics.num_swb; g++) {
  411. const float *coefs = sce->coeffs + start;
  412. const float *scaled = s->scoefs + start;
  413. int bits = 0;
  414. int cb;
  415. float dist = 0.0f;
  416. float qenergy = 0.0f;
  417. if (sce->zeroes[w*16+g] || sce->sf_idx[w*16+g] >= 218) {
  418. start += sce->ics.swb_sizes[g];
  419. if (sce->can_pns[w*16+g]) {
  420. /** PNS isn't free */
  421. tbits += ff_pns_bits(sce, w, g);
  422. }
  423. continue;
  424. }
  425. cb = find_min_book(maxvals[w*16+g], sce->sf_idx[w*16+g]);
  426. for (w2 = 0; w2 < sce->ics.group_len[w]; w2++) {
  427. int b;
  428. float sqenergy;
  429. dist += quantize_band_cost_cached(s, w + w2, g, coefs + w2*128,
  430. scaled + w2*128,
  431. sce->ics.swb_sizes[g],
  432. sce->sf_idx[w*16+g],
  433. cb,
  434. 1.0f,
  435. INFINITY,
  436. &b, &sqenergy,
  437. 0);
  438. bits += b;
  439. qenergy += sqenergy;
  440. }
  441. dists[w*16+g] = dist - bits;
  442. qenergies[w*16+g] = qenergy;
  443. if (prev != -1) {
  444. int sfdiff = av_clip(sce->sf_idx[w*16+g] - prev + SCALE_DIFF_ZERO, 0, 2*SCALE_MAX_DIFF);
  445. bits += ff_aac_scalefactor_bits[sfdiff];
  446. }
  447. tbits += bits;
  448. start += sce->ics.swb_sizes[g];
  449. prev = sce->sf_idx[w*16+g];
  450. }
  451. }
  452. }
  453. if (!i && s->options.pns && its > maxits/2) {
  454. float maxoverdist = 0.0f;
  455. overdist = recomprd = 0;
  456. for (w = 0; w < sce->ics.num_windows; w += sce->ics.group_len[w]) {
  457. float ovrfactor = 2.f+(maxits-its)*16.f/maxits;
  458. for (g = start = 0; g < sce->ics.num_swb; start += sce->ics.swb_sizes[g++]) {
  459. if (!sce->zeroes[w*16+g] && dists[w*16+g] > uplims[w*16+g]*ovrfactor) {
  460. float ovrdist = dists[w*16+g] / FFMAX(uplims[w*16+g],euplims[w*16+g]);
  461. maxoverdist = FFMAX(maxoverdist, ovrdist);
  462. overdist++;
  463. }
  464. }
  465. }
  466. if (overdist) {
  467. /* We have overdistorted bands, trade for zeroes (that can be noise)
  468. * Zero the bands in the lowest 1.25% spread-energy-threshold ranking
  469. */
  470. float minspread = max_spread_thr_r;
  471. float maxspread = min_spread_thr_r;
  472. float zspread;
  473. int zeroable = 0;
  474. int zeroed = 0;
  475. int maxzeroed;
  476. for (w = 0; w < sce->ics.num_windows; w += sce->ics.group_len[w]) {
  477. for (g = start = 0; g < sce->ics.num_swb; start += sce->ics.swb_sizes[g++]) {
  478. if (start >= pns_start_pos && !sce->zeroes[w*16+g] && sce->can_pns[w*16+g]) {
  479. minspread = FFMIN(minspread, spread_thr_r[w*16+g]);
  480. maxspread = FFMAX(maxspread, spread_thr_r[w*16+g]);
  481. zeroable++;
  482. }
  483. }
  484. }
  485. zspread = (maxspread-minspread) * 0.0125f + minspread;
  486. zspread = FFMIN(maxoverdist, zspread);
  487. maxzeroed = zeroable * its / (2 * maxits);
  488. for (g = sce->ics.num_swb-1; g > 0 && zeroed < maxzeroed; g--) {
  489. if (sce->ics.swb_offset[g] < pns_start_pos)
  490. continue;
  491. for (w = 0; w < sce->ics.num_windows; w += sce->ics.group_len[w]) {
  492. if (!sce->zeroes[w*16+g] && sce->can_pns[w*16+g] && spread_thr_r[w*16+g] <= zspread) {
  493. sce->zeroes[w*16+g] = 1;
  494. sce->band_type[w*16+g] = 0;
  495. zeroed++;
  496. }
  497. }
  498. }
  499. if (zeroed)
  500. recomprd = 1;
  501. } else {
  502. overdist = 0;
  503. }
  504. }
  505. }
  506. minscaler = SCALE_MAX_POS;
  507. maxscaler = 0;
  508. for (w = 0; w < sce->ics.num_windows; w += sce->ics.group_len[w]) {
  509. for (g = 0; g < sce->ics.num_swb; g++) {
  510. if (!sce->zeroes[w*16+g]) {
  511. minscaler = FFMIN(minscaler, sce->sf_idx[w*16+g]);
  512. maxscaler = FFMAX(maxscaler, sce->sf_idx[w*16+g]);
  513. }
  514. }
  515. }
  516. fflag = 0;
  517. minscaler = nminscaler = av_clip(minscaler, SCALE_ONE_POS - SCALE_DIV_512, SCALE_MAX_POS - SCALE_DIV_512);
  518. minrdsf = FFMAX3(60, minscaler - 1, maxscaler - SCALE_MAX_DIFF - 1);
  519. for (w = 0; w < sce->ics.num_windows; w += sce->ics.group_len[w]) {
  520. /** Start with big steps, end up fine-tunning */
  521. int depth = (its > maxits/2) ? ((its > maxits*2/3) ? 1 : 3) : 10;
  522. int edepth = depth+2;
  523. float uplmax = its / (maxits*0.25f) + 1.0f;
  524. uplmax *= (tbits > destbits) ? FFMIN(2.0f, tbits / (float)FFMAX(1,destbits)) : 1.0f;
  525. start = w * 128;
  526. for (g = 0; g < sce->ics.num_swb; g++) {
  527. int prevsc = sce->sf_idx[w*16+g];
  528. int minrdsfboost = (sce->ics.num_windows > 1) ? av_clip(g-4, -2, 0) : av_clip(g-16, -4, 0);
  529. if (!sce->zeroes[w*16+g]) {
  530. const float *coefs = sce->coeffs + start;
  531. const float *scaled = s->scoefs + start;
  532. int cmb = find_min_book(maxvals[w*16+g], sce->sf_idx[w*16+g]);
  533. if ((!cmb || dists[w*16+g] > uplims[w*16+g]) && sce->sf_idx[w*16+g] > minrdsf) {
  534. /* Try to make sure there is some energy in every nonzero band
  535. * NOTE: This algorithm must be forcibly imbalanced, pushing harder
  536. * on holes or more distorted bands at first, otherwise there's
  537. * no net gain (since the next iteration will offset all bands
  538. * on the opposite direction to compensate for extra bits)
  539. */
  540. for (i = 0; i < edepth; ++i) {
  541. int cb, bits;
  542. float dist, qenergy;
  543. int mb = find_min_book(maxvals[w*16+g], sce->sf_idx[w*16+g]-1);
  544. cb = find_min_book(maxvals[w*16+g], sce->sf_idx[w*16+g]);
  545. dist = qenergy = 0.f;
  546. bits = 0;
  547. if (!cb) {
  548. maxsf[w*16+g] = FFMIN(sce->sf_idx[w*16+g]-1, maxsf[w*16+g]);
  549. } else if (i >= depth && dists[w*16+g] < euplims[w*16+g]) {
  550. break;
  551. }
  552. for (w2 = 0; w2 < sce->ics.group_len[w]; w2++) {
  553. int b;
  554. float sqenergy;
  555. dist += quantize_band_cost_cached(s, w + w2, g, coefs + w2*128,
  556. scaled + w2*128,
  557. sce->ics.swb_sizes[g],
  558. sce->sf_idx[w*16+g]-1,
  559. cb,
  560. 1.0f,
  561. INFINITY,
  562. &b, &sqenergy,
  563. 0);
  564. bits += b;
  565. qenergy += sqenergy;
  566. }
  567. sce->sf_idx[w*16+g]--;
  568. dists[w*16+g] = dist - bits;
  569. qenergies[w*16+g] = qenergy;
  570. if (mb && (sce->sf_idx[w*16+g] < (minrdsf+minrdsfboost) || (
  571. (dists[w*16+g] < FFMIN(uplmax*uplims[w*16+g], euplims[w*16+g]))
  572. && (fabsf(qenergies[w*16+g]-energies[w*16+g]) < euplims[w*16+g])
  573. ) )) {
  574. break;
  575. }
  576. }
  577. } else if (tbits > toofewbits && sce->sf_idx[w*16+g] < maxscaler
  578. && (dists[w*16+g] < FFMIN(euplims[w*16+g], uplims[w*16+g]))
  579. && (fabsf(qenergies[w*16+g]-energies[w*16+g]) < euplims[w*16+g])
  580. ) {
  581. /** Um... over target. Save bits for more important stuff. */
  582. for (i = 0; i < depth; ++i) {
  583. int cb, bits;
  584. float dist, qenergy;
  585. cb = find_min_book(maxvals[w*16+g], sce->sf_idx[w*16+g]+1);
  586. if (cb > 0) {
  587. dist = qenergy = 0.f;
  588. bits = 0;
  589. for (w2 = 0; w2 < sce->ics.group_len[w]; w2++) {
  590. int b;
  591. float sqenergy;
  592. dist += quantize_band_cost_cached(s, w + w2, g, coefs + w2*128,
  593. scaled + w2*128,
  594. sce->ics.swb_sizes[g],
  595. sce->sf_idx[w*16+g]+1,
  596. cb,
  597. 1.0f,
  598. INFINITY,
  599. &b, &sqenergy,
  600. 0);
  601. bits += b;
  602. qenergy += sqenergy;
  603. }
  604. dist -= bits;
  605. if (dist < FFMIN(euplims[w*16+g], uplims[w*16+g])) {
  606. sce->sf_idx[w*16+g]++;
  607. dists[w*16+g] = dist;
  608. qenergies[w*16+g] = qenergy;
  609. } else {
  610. break;
  611. }
  612. } else {
  613. maxsf[w*16+g] = FFMIN(sce->sf_idx[w*16+g], maxsf[w*16+g]);
  614. break;
  615. }
  616. }
  617. }
  618. }
  619. sce->sf_idx[w*16+g] = av_clip(sce->sf_idx[w*16+g], minrdsf, minscaler + SCALE_MAX_DIFF);
  620. sce->sf_idx[w*16+g] = FFMIN(sce->sf_idx[w*16+g], SCALE_MAX_POS - SCALE_DIV_512);
  621. if (sce->sf_idx[w*16+g] != prevsc)
  622. fflag = 1;
  623. nminscaler = FFMIN(nminscaler, sce->sf_idx[w*16+g]);
  624. sce->band_type[w*16+g] = find_min_book(maxvals[w*16+g], sce->sf_idx[w*16+g]);
  625. start += sce->ics.swb_sizes[g];
  626. }
  627. }
  628. if (nminscaler < minscaler || sce->ics.num_windows > 1) {
  629. /** SF difference limit violation risk. Must re-clamp. */
  630. minscaler = nminscaler;
  631. for (w = 0; w < sce->ics.num_windows; w += sce->ics.group_len[w]) {
  632. for (g = 0; g < sce->ics.num_swb; g++) {
  633. sce->sf_idx[w*16+g] = av_clip(sce->sf_idx[w*16+g], minscaler, minscaler + SCALE_MAX_DIFF);
  634. sce->band_type[w*16+g] = find_min_book(maxvals[w*16+g], sce->sf_idx[w*16+g]);
  635. }
  636. }
  637. }
  638. its++;
  639. } while (fflag && its < maxits);
  640. prev = -1;
  641. for (w = 0; w < sce->ics.num_windows; w += sce->ics.group_len[w]) {
  642. /** Make sure proper codebooks are set */
  643. for (g = start = 0; g < sce->ics.num_swb; start += sce->ics.swb_sizes[g++]) {
  644. if (!sce->zeroes[w*16+g]) {
  645. sce->band_type[w*16+g] = find_min_book(maxvals[w*16+g], sce->sf_idx[w*16+g]);
  646. if (sce->band_type[w*16+g] <= 0) {
  647. sce->zeroes[w*16+g] = 1;
  648. sce->band_type[w*16+g] = 0;
  649. }
  650. } else {
  651. sce->band_type[w*16+g] = 0;
  652. }
  653. /** Check that there's no SF delta range violations */
  654. if (!sce->zeroes[w*16+g]) {
  655. if (prev != -1) {
  656. av_unused int sfdiff = sce->sf_idx[w*16+g] - prev + SCALE_DIFF_ZERO;
  657. av_assert1(sfdiff >= 0 && sfdiff <= 2*SCALE_MAX_DIFF);
  658. } else if (sce->zeroes[0]) {
  659. /** Set global gain to something useful */
  660. sce->sf_idx[0] = sce->sf_idx[w*16+g];
  661. }
  662. prev = sce->sf_idx[w*16+g];
  663. }
  664. }
  665. }
  666. }
  667. #endif /* AVCODEC_AACCODER_TWOLOOP_H */