You can not select more than 25 topics Topics must start with a letter or number, can include dashes ('-') and can be up to 35 characters long.

756 lines
34KB

  1. /*
  2. * AAC encoder twoloop coder
  3. * Copyright (C) 2008-2009 Konstantin Shishkov
  4. *
  5. * This file is part of FFmpeg.
  6. *
  7. * FFmpeg is free software; you can redistribute it and/or
  8. * modify it under the terms of the GNU Lesser General Public
  9. * License as published by the Free Software Foundation; either
  10. * version 2.1 of the License, or (at your option) any later version.
  11. *
  12. * FFmpeg is distributed in the hope that it will be useful,
  13. * but WITHOUT ANY WARRANTY; without even the implied warranty of
  14. * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
  15. * Lesser General Public License for more details.
  16. *
  17. * You should have received a copy of the GNU Lesser General Public
  18. * License along with FFmpeg; if not, write to the Free Software
  19. * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
  20. */
  21. /**
  22. * @file
  23. * AAC encoder twoloop coder
  24. * @author Konstantin Shishkov, Claudio Freire
  25. */
  26. /**
  27. * This file contains a template for the twoloop coder function.
  28. * It needs to be provided, externally, as an already included declaration,
  29. * the following functions from aacenc_quantization/util.h. They're not included
  30. * explicitly here to make it possible to provide alternative implementations:
  31. * - quantize_band_cost
  32. * - abs_pow34_v
  33. * - find_max_val
  34. * - find_min_book
  35. * - find_form_factor
  36. */
  37. #ifndef AVCODEC_AACCODER_TWOLOOP_H
  38. #define AVCODEC_AACCODER_TWOLOOP_H
  39. #include <float.h>
  40. #include "libavutil/mathematics.h"
  41. #include "mathops.h"
  42. #include "avcodec.h"
  43. #include "put_bits.h"
  44. #include "aac.h"
  45. #include "aacenc.h"
  46. #include "aactab.h"
  47. #include "aacenctab.h"
  48. /** Frequency in Hz for lower limit of noise substitution **/
  49. #define NOISE_LOW_LIMIT 4000
  50. #define sclip(x) av_clip(x,60,218)
  51. /* Reflects the cost to change codebooks */
  52. static inline int ff_pns_bits(SingleChannelElement *sce, int w, int g)
  53. {
  54. return (!g || !sce->zeroes[w*16+g-1] || !sce->can_pns[w*16+g-1]) ? 9 : 5;
  55. }
  56. /**
  57. * two-loop quantizers search taken from ISO 13818-7 Appendix C
  58. */
  59. static void search_for_quantizers_twoloop(AVCodecContext *avctx,
  60. AACEncContext *s,
  61. SingleChannelElement *sce,
  62. const float lambda)
  63. {
  64. int start = 0, i, w, w2, g, recomprd;
  65. int destbits = avctx->bit_rate * 1024.0 / avctx->sample_rate
  66. / ((avctx->flags & CODEC_FLAG_QSCALE) ? 2.0f : avctx->channels)
  67. * (lambda / 120.f);
  68. int refbits = destbits;
  69. int toomanybits, toofewbits;
  70. char nzs[128];
  71. uint8_t nextband[128];
  72. int maxsf[128];
  73. float dists[128] = { 0 }, qenergies[128] = { 0 }, uplims[128], euplims[128], energies[128];
  74. float maxvals[128], spread_thr_r[128];
  75. float min_spread_thr_r, max_spread_thr_r;
  76. /**
  77. * rdlambda controls the maximum tolerated distortion. Twoloop
  78. * will keep iterating until it fails to lower it or it reaches
  79. * ulimit * rdlambda. Keeping it low increases quality on difficult
  80. * signals, but lower it too much, and bits will be taken from weak
  81. * signals, creating "holes". A balance is necesary.
  82. * rdmax and rdmin specify the relative deviation from rdlambda
  83. * allowed for tonality compensation
  84. */
  85. float rdlambda = av_clipf(2.0f * 120.f / lambda, 0.0625f, 16.0f);
  86. const float nzslope = 1.5f;
  87. float rdmin = 0.03125f;
  88. float rdmax = 1.0f;
  89. /**
  90. * sfoffs controls an offset of optmium allocation that will be
  91. * applied based on lambda. Keep it real and modest, the loop
  92. * will take care of the rest, this just accelerates convergence
  93. */
  94. float sfoffs = av_clipf(log2f(120.0f / lambda) * 4.0f, -5, 10);
  95. int fflag, minscaler, maxscaler, nminscaler;
  96. int its = 0;
  97. int maxits = 30;
  98. int allz = 0;
  99. int tbits;
  100. int cutoff = 1024;
  101. int pns_start_pos;
  102. int prev;
  103. /**
  104. * zeroscale controls a multiplier of the threshold, if band energy
  105. * is below this, a zero is forced. Keep it lower than 1, unless
  106. * low lambda is used, because energy < threshold doesn't mean there's
  107. * no audible signal outright, it's just energy. Also make it rise
  108. * slower than rdlambda, as rdscale has due compensation with
  109. * noisy band depriorization below, whereas zeroing logic is rather dumb
  110. */
  111. float zeroscale;
  112. if (lambda > 120.f) {
  113. zeroscale = av_clipf(powf(120.f / lambda, 0.25f), 0.0625f, 1.0f);
  114. } else {
  115. zeroscale = 1.f;
  116. }
  117. if (s->psy.bitres.alloc >= 0) {
  118. /**
  119. * Psy granted us extra bits to use, from the reservoire
  120. * adjust for lambda except what psy already did
  121. */
  122. destbits = s->psy.bitres.alloc
  123. * (lambda / (avctx->global_quality ? avctx->global_quality : 120));
  124. }
  125. if (avctx->flags & CODEC_FLAG_QSCALE) {
  126. /**
  127. * Constant Q-scale doesn't compensate MS coding on its own
  128. * No need to be overly precise, this only controls RD
  129. * adjustment CB limits when going overboard
  130. */
  131. if (s->options.mid_side && s->cur_type == TYPE_CPE)
  132. destbits *= 2;
  133. /**
  134. * When using a constant Q-scale, don't adjust bits, just use RD
  135. * Don't let it go overboard, though... 8x psy target is enough
  136. */
  137. toomanybits = 5800;
  138. toofewbits = destbits / 16;
  139. /** Don't offset scalers, just RD */
  140. sfoffs = sce->ics.num_windows - 1;
  141. rdlambda = sqrtf(rdlambda);
  142. /** search further */
  143. maxits *= 2;
  144. } else {
  145. /* When using ABR, be strict, but a reasonable leeway is
  146. * critical to allow RC to smoothly track desired bitrate
  147. * without sudden quality drops that cause audible artifacts.
  148. * Symmetry is also desirable, to avoid systematic bias.
  149. */
  150. toomanybits = destbits + destbits/8;
  151. toofewbits = destbits - destbits/8;
  152. sfoffs = 0;
  153. rdlambda = sqrtf(rdlambda);
  154. }
  155. /** and zero out above cutoff frequency */
  156. {
  157. int wlen = 1024 / sce->ics.num_windows;
  158. int bandwidth;
  159. /**
  160. * Scale, psy gives us constant quality, this LP only scales
  161. * bitrate by lambda, so we save bits on subjectively unimportant HF
  162. * rather than increase quantization noise. Adjust nominal bitrate
  163. * to effective bitrate according to encoding parameters,
  164. * AAC_CUTOFF_FROM_BITRATE is calibrated for effective bitrate.
  165. */
  166. float rate_bandwidth_multiplier = 1.5f;
  167. int frame_bit_rate = (avctx->flags & CODEC_FLAG_QSCALE)
  168. ? (refbits * rate_bandwidth_multiplier * avctx->sample_rate / 1024)
  169. : (avctx->bit_rate / avctx->channels);
  170. /** Compensate for extensions that increase efficiency */
  171. if (s->options.pns || s->options.intensity_stereo)
  172. frame_bit_rate *= 1.15f;
  173. if (avctx->cutoff > 0) {
  174. bandwidth = avctx->cutoff;
  175. } else {
  176. bandwidth = FFMAX(3000, AAC_CUTOFF_FROM_BITRATE(frame_bit_rate, 1, avctx->sample_rate));
  177. s->psy.cutoff = bandwidth;
  178. }
  179. cutoff = bandwidth * 2 * wlen / avctx->sample_rate;
  180. pns_start_pos = NOISE_LOW_LIMIT * 2 * wlen / avctx->sample_rate;
  181. }
  182. /**
  183. * for values above this the decoder might end up in an endless loop
  184. * due to always having more bits than what can be encoded.
  185. */
  186. destbits = FFMIN(destbits, 5800);
  187. toomanybits = FFMIN(toomanybits, 5800);
  188. toofewbits = FFMIN(toofewbits, 5800);
  189. /**
  190. * XXX: some heuristic to determine initial quantizers will reduce search time
  191. * determine zero bands and upper distortion limits
  192. */
  193. min_spread_thr_r = -1;
  194. max_spread_thr_r = -1;
  195. for (w = 0; w < sce->ics.num_windows; w += sce->ics.group_len[w]) {
  196. for (g = start = 0; g < sce->ics.num_swb; start += sce->ics.swb_sizes[g++]) {
  197. int nz = 0;
  198. float uplim = 0.0f, energy = 0.0f, spread = 0.0f;
  199. for (w2 = 0; w2 < sce->ics.group_len[w]; w2++) {
  200. FFPsyBand *band = &s->psy.ch[s->cur_channel].psy_bands[(w+w2)*16+g];
  201. if (start >= cutoff || band->energy <= (band->threshold * zeroscale) || band->threshold == 0.0f) {
  202. sce->zeroes[(w+w2)*16+g] = 1;
  203. continue;
  204. }
  205. nz = 1;
  206. }
  207. if (!nz) {
  208. uplim = 0.0f;
  209. } else {
  210. nz = 0;
  211. for (w2 = 0; w2 < sce->ics.group_len[w]; w2++) {
  212. FFPsyBand *band = &s->psy.ch[s->cur_channel].psy_bands[(w+w2)*16+g];
  213. if (band->energy <= (band->threshold * zeroscale) || band->threshold == 0.0f)
  214. continue;
  215. uplim += band->threshold;
  216. energy += band->energy;
  217. spread += band->spread;
  218. nz++;
  219. }
  220. }
  221. uplims[w*16+g] = uplim;
  222. energies[w*16+g] = energy;
  223. nzs[w*16+g] = nz;
  224. sce->zeroes[w*16+g] = !nz;
  225. allz |= nz;
  226. if (nz && sce->can_pns[w*16+g]) {
  227. spread_thr_r[w*16+g] = energy * nz / (uplim * spread);
  228. if (min_spread_thr_r < 0) {
  229. min_spread_thr_r = max_spread_thr_r = spread_thr_r[w*16+g];
  230. } else {
  231. min_spread_thr_r = FFMIN(min_spread_thr_r, spread_thr_r[w*16+g]);
  232. max_spread_thr_r = FFMAX(max_spread_thr_r, spread_thr_r[w*16+g]);
  233. }
  234. }
  235. }
  236. }
  237. /** Compute initial scalers */
  238. minscaler = 65535;
  239. for (w = 0; w < sce->ics.num_windows; w += sce->ics.group_len[w]) {
  240. for (g = 0; g < sce->ics.num_swb; g++) {
  241. if (sce->zeroes[w*16+g]) {
  242. sce->sf_idx[w*16+g] = SCALE_ONE_POS;
  243. continue;
  244. }
  245. /**
  246. * log2f-to-distortion ratio is, technically, 2 (1.5db = 4, but it's power vs level so it's 2).
  247. * But, as offsets are applied, low-frequency signals are too sensitive to the induced distortion,
  248. * so we make scaling more conservative by choosing a lower log2f-to-distortion ratio, and thus
  249. * more robust.
  250. */
  251. sce->sf_idx[w*16+g] = av_clip(
  252. SCALE_ONE_POS
  253. + 1.75*log2f(FFMAX(0.00125f,uplims[w*16+g]) / sce->ics.swb_sizes[g])
  254. + sfoffs,
  255. 60, SCALE_MAX_POS);
  256. minscaler = FFMIN(minscaler, sce->sf_idx[w*16+g]);
  257. }
  258. }
  259. /** Clip */
  260. minscaler = av_clip(minscaler, SCALE_ONE_POS - SCALE_DIV_512, SCALE_MAX_POS - SCALE_DIV_512);
  261. for (w = 0; w < sce->ics.num_windows; w += sce->ics.group_len[w])
  262. for (g = 0; g < sce->ics.num_swb; g++)
  263. if (!sce->zeroes[w*16+g])
  264. sce->sf_idx[w*16+g] = av_clip(sce->sf_idx[w*16+g], minscaler, minscaler + SCALE_MAX_DIFF - 1);
  265. if (!allz)
  266. return;
  267. abs_pow34_v(s->scoefs, sce->coeffs, 1024);
  268. ff_quantize_band_cost_cache_init(s);
  269. for (w = 0; w < sce->ics.num_windows; w += sce->ics.group_len[w]) {
  270. start = w*128;
  271. for (g = 0; g < sce->ics.num_swb; g++) {
  272. const float *scaled = s->scoefs + start;
  273. maxvals[w*16+g] = find_max_val(sce->ics.group_len[w], sce->ics.swb_sizes[g], scaled);
  274. start += sce->ics.swb_sizes[g];
  275. }
  276. }
  277. /**
  278. * Scale uplims to match rate distortion to quality
  279. * bu applying noisy band depriorization and tonal band priorization.
  280. * Maxval-energy ratio gives us an idea of how noisy/tonal the band is.
  281. * If maxval^2 ~ energy, then that band is mostly noise, and we can relax
  282. * rate distortion requirements.
  283. */
  284. memcpy(euplims, uplims, sizeof(euplims));
  285. for (w = 0; w < sce->ics.num_windows; w += sce->ics.group_len[w]) {
  286. /** psy already priorizes transients to some extent */
  287. float de_psy_factor = (sce->ics.num_windows > 1) ? 8.0f / sce->ics.group_len[w] : 1.0f;
  288. start = w*128;
  289. for (g = 0; g < sce->ics.num_swb; g++) {
  290. if (nzs[g] > 0) {
  291. float cleanup_factor = ff_sqrf(av_clipf(start / (cutoff * 0.75f), 1.0f, 2.0f));
  292. float energy2uplim = find_form_factor(
  293. sce->ics.group_len[w], sce->ics.swb_sizes[g],
  294. uplims[w*16+g] / (nzs[g] * sce->ics.swb_sizes[w]),
  295. sce->coeffs + start,
  296. nzslope * cleanup_factor);
  297. energy2uplim *= de_psy_factor;
  298. if (!(avctx->flags & CODEC_FLAG_QSCALE)) {
  299. /** In ABR, we need to priorize less and let rate control do its thing */
  300. energy2uplim = sqrtf(energy2uplim);
  301. }
  302. energy2uplim = FFMAX(0.015625f, FFMIN(1.0f, energy2uplim));
  303. uplims[w*16+g] *= av_clipf(rdlambda * energy2uplim, rdmin, rdmax)
  304. * sce->ics.group_len[w];
  305. energy2uplim = find_form_factor(
  306. sce->ics.group_len[w], sce->ics.swb_sizes[g],
  307. uplims[w*16+g] / (nzs[g] * sce->ics.swb_sizes[w]),
  308. sce->coeffs + start,
  309. 2.0f);
  310. energy2uplim *= de_psy_factor;
  311. if (!(avctx->flags & CODEC_FLAG_QSCALE)) {
  312. /** In ABR, we need to priorize less and let rate control do its thing */
  313. energy2uplim = sqrtf(energy2uplim);
  314. }
  315. energy2uplim = FFMAX(0.015625f, FFMIN(1.0f, energy2uplim));
  316. euplims[w*16+g] *= av_clipf(rdlambda * energy2uplim * sce->ics.group_len[w],
  317. 0.5f, 1.0f);
  318. }
  319. start += sce->ics.swb_sizes[g];
  320. }
  321. }
  322. for (i = 0; i < sizeof(maxsf) / sizeof(maxsf[0]); ++i)
  323. maxsf[i] = SCALE_MAX_POS;
  324. //perform two-loop search
  325. //outer loop - improve quality
  326. do {
  327. //inner loop - quantize spectrum to fit into given number of bits
  328. int overdist;
  329. int qstep = its ? 1 : 32;
  330. do {
  331. int changed = 0;
  332. prev = -1;
  333. recomprd = 0;
  334. tbits = 0;
  335. for (w = 0; w < sce->ics.num_windows; w += sce->ics.group_len[w]) {
  336. start = w*128;
  337. for (g = 0; g < sce->ics.num_swb; g++) {
  338. const float *coefs = &sce->coeffs[start];
  339. const float *scaled = &s->scoefs[start];
  340. int bits = 0;
  341. int cb;
  342. float dist = 0.0f;
  343. float qenergy = 0.0f;
  344. if (sce->zeroes[w*16+g] || sce->sf_idx[w*16+g] >= 218) {
  345. start += sce->ics.swb_sizes[g];
  346. if (sce->can_pns[w*16+g]) {
  347. /** PNS isn't free */
  348. tbits += ff_pns_bits(sce, w, g);
  349. }
  350. continue;
  351. }
  352. cb = find_min_book(maxvals[w*16+g], sce->sf_idx[w*16+g]);
  353. for (w2 = 0; w2 < sce->ics.group_len[w]; w2++) {
  354. int b;
  355. float sqenergy;
  356. dist += quantize_band_cost_cached(s, w + w2, g, coefs + w2*128,
  357. scaled + w2*128,
  358. sce->ics.swb_sizes[g],
  359. sce->sf_idx[w*16+g],
  360. cb,
  361. 1.0f,
  362. INFINITY,
  363. &b, &sqenergy,
  364. 0);
  365. bits += b;
  366. qenergy += sqenergy;
  367. }
  368. dists[w*16+g] = dist - bits;
  369. qenergies[w*16+g] = qenergy;
  370. if (prev != -1) {
  371. int sfdiff = av_clip(sce->sf_idx[w*16+g] - prev + SCALE_DIFF_ZERO, 0, 2*SCALE_MAX_DIFF);
  372. bits += ff_aac_scalefactor_bits[sfdiff];
  373. }
  374. tbits += bits;
  375. start += sce->ics.swb_sizes[g];
  376. prev = sce->sf_idx[w*16+g];
  377. }
  378. }
  379. if (tbits > toomanybits) {
  380. recomprd = 1;
  381. for (i = 0; i < 128; i++) {
  382. if (sce->sf_idx[i] < (SCALE_MAX_POS - SCALE_DIV_512)) {
  383. int maxsf_i = (tbits > 5800) ? SCALE_MAX_POS : maxsf[i];
  384. int new_sf = FFMIN(maxsf_i, sce->sf_idx[i] + qstep);
  385. if (new_sf != sce->sf_idx[i]) {
  386. sce->sf_idx[i] = new_sf;
  387. changed = 1;
  388. }
  389. }
  390. }
  391. } else if (tbits < toofewbits) {
  392. recomprd = 1;
  393. for (i = 0; i < 128; i++) {
  394. if (sce->sf_idx[i] > SCALE_ONE_POS) {
  395. int new_sf = FFMAX(SCALE_ONE_POS, sce->sf_idx[i] - qstep);
  396. if (new_sf != sce->sf_idx[i]) {
  397. sce->sf_idx[i] = new_sf;
  398. changed = 1;
  399. }
  400. }
  401. }
  402. }
  403. qstep >>= 1;
  404. if (!qstep && tbits > toomanybits && sce->sf_idx[0] < 217 && changed)
  405. qstep = 1;
  406. } while (qstep);
  407. overdist = 1;
  408. fflag = tbits < toofewbits;
  409. for (i = 0; i < 2 && (overdist || recomprd); ++i) {
  410. if (recomprd) {
  411. /** Must recompute distortion */
  412. prev = -1;
  413. tbits = 0;
  414. for (w = 0; w < sce->ics.num_windows; w += sce->ics.group_len[w]) {
  415. start = w*128;
  416. for (g = 0; g < sce->ics.num_swb; g++) {
  417. const float *coefs = sce->coeffs + start;
  418. const float *scaled = s->scoefs + start;
  419. int bits = 0;
  420. int cb;
  421. float dist = 0.0f;
  422. float qenergy = 0.0f;
  423. if (sce->zeroes[w*16+g] || sce->sf_idx[w*16+g] >= 218) {
  424. start += sce->ics.swb_sizes[g];
  425. if (sce->can_pns[w*16+g]) {
  426. /** PNS isn't free */
  427. tbits += ff_pns_bits(sce, w, g);
  428. }
  429. continue;
  430. }
  431. cb = find_min_book(maxvals[w*16+g], sce->sf_idx[w*16+g]);
  432. for (w2 = 0; w2 < sce->ics.group_len[w]; w2++) {
  433. int b;
  434. float sqenergy;
  435. dist += quantize_band_cost_cached(s, w + w2, g, coefs + w2*128,
  436. scaled + w2*128,
  437. sce->ics.swb_sizes[g],
  438. sce->sf_idx[w*16+g],
  439. cb,
  440. 1.0f,
  441. INFINITY,
  442. &b, &sqenergy,
  443. 0);
  444. bits += b;
  445. qenergy += sqenergy;
  446. }
  447. dists[w*16+g] = dist - bits;
  448. qenergies[w*16+g] = qenergy;
  449. if (prev != -1) {
  450. int sfdiff = av_clip(sce->sf_idx[w*16+g] - prev + SCALE_DIFF_ZERO, 0, 2*SCALE_MAX_DIFF);
  451. bits += ff_aac_scalefactor_bits[sfdiff];
  452. }
  453. tbits += bits;
  454. start += sce->ics.swb_sizes[g];
  455. prev = sce->sf_idx[w*16+g];
  456. }
  457. }
  458. }
  459. if (!i && s->options.pns && its > maxits/2 && tbits > toofewbits) {
  460. float maxoverdist = 0.0f;
  461. float ovrfactor = 1.f+(maxits-its)*16.f/maxits;
  462. overdist = recomprd = 0;
  463. for (w = 0; w < sce->ics.num_windows; w += sce->ics.group_len[w]) {
  464. for (g = start = 0; g < sce->ics.num_swb; start += sce->ics.swb_sizes[g++]) {
  465. if (!sce->zeroes[w*16+g] && sce->sf_idx[w*16+g] > SCALE_ONE_POS && dists[w*16+g] > uplims[w*16+g]*ovrfactor) {
  466. float ovrdist = dists[w*16+g] / FFMAX(uplims[w*16+g],euplims[w*16+g]);
  467. maxoverdist = FFMAX(maxoverdist, ovrdist);
  468. overdist++;
  469. }
  470. }
  471. }
  472. if (overdist) {
  473. /* We have overdistorted bands, trade for zeroes (that can be noise)
  474. * Zero the bands in the lowest 1.25% spread-energy-threshold ranking
  475. */
  476. float minspread = max_spread_thr_r;
  477. float maxspread = min_spread_thr_r;
  478. float zspread;
  479. int zeroable = 0;
  480. int zeroed = 0;
  481. int maxzeroed, zloop;
  482. for (w = 0; w < sce->ics.num_windows; w += sce->ics.group_len[w]) {
  483. for (g = start = 0; g < sce->ics.num_swb; start += sce->ics.swb_sizes[g++]) {
  484. if (start >= pns_start_pos && !sce->zeroes[w*16+g] && sce->can_pns[w*16+g]) {
  485. minspread = FFMIN(minspread, spread_thr_r[w*16+g]);
  486. maxspread = FFMAX(maxspread, spread_thr_r[w*16+g]);
  487. zeroable++;
  488. }
  489. }
  490. }
  491. zspread = (maxspread-minspread) * 0.0125f + minspread;
  492. /* Don't PNS everything even if allowed. It suppresses bit starvation signals from RC,
  493. * and forced the hand of the later search_for_pns step.
  494. * Instead, PNS a fraction of the spread_thr_r range depending on how starved for bits we are,
  495. * and leave further PNSing to search_for_pns if worthwhile.
  496. */
  497. zspread = FFMIN3(min_spread_thr_r * 8.f, zspread,
  498. ((toomanybits - tbits) * min_spread_thr_r + (tbits - toofewbits) * max_spread_thr_r) / (toomanybits - toofewbits + 1));
  499. maxzeroed = FFMIN(zeroable, FFMAX(1, (zeroable * its + maxits - 1) / (2 * maxits)));
  500. for (zloop = 0; zloop < 2; zloop++) {
  501. /* Two passes: first distorted stuff - two birds in one shot and all that,
  502. * then anything viable. Viable means not zero, but either CB=zero-able
  503. * (too high SF), not SF <= 1 (that means we'd be operating at very high
  504. * quality, we don't want PNS when doing VHQ), PNS allowed, and within
  505. * the lowest ranking percentile.
  506. */
  507. float loopovrfactor = (zloop) ? 1.0f : ovrfactor;
  508. int loopminsf = (zloop) ? (SCALE_ONE_POS - SCALE_DIV_512) : SCALE_ONE_POS;
  509. int mcb;
  510. for (g = sce->ics.num_swb-1; g > 0 && zeroed < maxzeroed; g--) {
  511. if (sce->ics.swb_offset[g] < pns_start_pos)
  512. continue;
  513. for (w = 0; w < sce->ics.num_windows; w += sce->ics.group_len[w]) {
  514. if (!sce->zeroes[w*16+g] && sce->can_pns[w*16+g] && spread_thr_r[w*16+g] <= zspread
  515. && sce->sf_idx[w*16+g] > loopminsf
  516. && (dists[w*16+g] > loopovrfactor*uplims[w*16+g] || !(mcb = find_min_book(maxvals[w*16+g], sce->sf_idx[w*16+g]))
  517. || (mcb <= 1 && dists[w*16+g] > FFMIN(uplims[w*16+g], euplims[w*16+g]))) ) {
  518. sce->zeroes[w*16+g] = 1;
  519. sce->band_type[w*16+g] = 0;
  520. zeroed++;
  521. }
  522. }
  523. }
  524. }
  525. if (zeroed)
  526. recomprd = fflag = 1;
  527. } else {
  528. overdist = 0;
  529. }
  530. }
  531. }
  532. minscaler = SCALE_MAX_POS;
  533. maxscaler = 0;
  534. for (w = 0; w < sce->ics.num_windows; w += sce->ics.group_len[w]) {
  535. for (g = 0; g < sce->ics.num_swb; g++) {
  536. if (!sce->zeroes[w*16+g]) {
  537. minscaler = FFMIN(minscaler, sce->sf_idx[w*16+g]);
  538. maxscaler = FFMAX(maxscaler, sce->sf_idx[w*16+g]);
  539. }
  540. }
  541. }
  542. minscaler = nminscaler = av_clip(minscaler, SCALE_ONE_POS - SCALE_DIV_512, SCALE_MAX_POS - SCALE_DIV_512);
  543. prev = -1;
  544. for (w = 0; w < sce->ics.num_windows; w += sce->ics.group_len[w]) {
  545. /** Start with big steps, end up fine-tunning */
  546. int depth = (its > maxits/2) ? ((its > maxits*2/3) ? 1 : 3) : 10;
  547. int edepth = depth+2;
  548. float uplmax = its / (maxits*0.25f) + 1.0f;
  549. uplmax *= (tbits > destbits) ? FFMIN(2.0f, tbits / (float)FFMAX(1,destbits)) : 1.0f;
  550. start = w * 128;
  551. for (g = 0; g < sce->ics.num_swb; g++) {
  552. int prevsc = sce->sf_idx[w*16+g];
  553. if (prev < 0 && !sce->zeroes[w*16+g])
  554. prev = sce->sf_idx[0];
  555. if (!sce->zeroes[w*16+g]) {
  556. const float *coefs = sce->coeffs + start;
  557. const float *scaled = s->scoefs + start;
  558. int cmb = find_min_book(maxvals[w*16+g], sce->sf_idx[w*16+g]);
  559. int mindeltasf = FFMAX(0, prev - SCALE_MAX_DIFF);
  560. int maxdeltasf = FFMIN(SCALE_MAX_POS - SCALE_DIV_512, prev + SCALE_MAX_DIFF);
  561. if ((!cmb || dists[w*16+g] > uplims[w*16+g]) && sce->sf_idx[w*16+g] > mindeltasf) {
  562. /* Try to make sure there is some energy in every nonzero band
  563. * NOTE: This algorithm must be forcibly imbalanced, pushing harder
  564. * on holes or more distorted bands at first, otherwise there's
  565. * no net gain (since the next iteration will offset all bands
  566. * on the opposite direction to compensate for extra bits)
  567. */
  568. for (i = 0; i < edepth && sce->sf_idx[w*16+g] > mindeltasf; ++i) {
  569. int cb, bits;
  570. float dist, qenergy;
  571. int mb = find_min_book(maxvals[w*16+g], sce->sf_idx[w*16+g]-1);
  572. cb = find_min_book(maxvals[w*16+g], sce->sf_idx[w*16+g]);
  573. dist = qenergy = 0.f;
  574. bits = 0;
  575. if (!cb) {
  576. maxsf[w*16+g] = FFMIN(sce->sf_idx[w*16+g]-1, maxsf[w*16+g]);
  577. } else if (i >= depth && dists[w*16+g] < euplims[w*16+g]) {
  578. break;
  579. }
  580. /* !g is the DC band, it's important, since quantization error here
  581. * applies to less than a cycle, it creates horrible intermodulation
  582. * distortion if it doesn't stick to what psy requests
  583. */
  584. if (!g && sce->ics.num_windows > 1 && dists[w*16+g] >= euplims[w*16+g])
  585. maxsf[w*16+g] = FFMIN(sce->sf_idx[w*16+g], maxsf[w*16+g]);
  586. for (w2 = 0; w2 < sce->ics.group_len[w]; w2++) {
  587. int b;
  588. float sqenergy;
  589. dist += quantize_band_cost_cached(s, w + w2, g, coefs + w2*128,
  590. scaled + w2*128,
  591. sce->ics.swb_sizes[g],
  592. sce->sf_idx[w*16+g]-1,
  593. cb,
  594. 1.0f,
  595. INFINITY,
  596. &b, &sqenergy,
  597. 0);
  598. bits += b;
  599. qenergy += sqenergy;
  600. }
  601. sce->sf_idx[w*16+g]--;
  602. dists[w*16+g] = dist - bits;
  603. qenergies[w*16+g] = qenergy;
  604. if (mb && (sce->sf_idx[w*16+g] < mindeltasf || (
  605. (dists[w*16+g] < FFMIN(uplmax*uplims[w*16+g], euplims[w*16+g]))
  606. && (fabsf(qenergies[w*16+g]-energies[w*16+g]) < euplims[w*16+g])
  607. ) )) {
  608. break;
  609. }
  610. }
  611. } else if (tbits > toofewbits && sce->sf_idx[w*16+g] < FFMIN(maxdeltasf, maxsf[w*16+g])
  612. && (dists[w*16+g] < FFMIN(euplims[w*16+g], uplims[w*16+g]))
  613. && (fabsf(qenergies[w*16+g]-energies[w*16+g]) < euplims[w*16+g])
  614. ) {
  615. /** Um... over target. Save bits for more important stuff. */
  616. for (i = 0; i < depth && sce->sf_idx[w*16+g] < maxdeltasf; ++i) {
  617. int cb, bits;
  618. float dist, qenergy;
  619. cb = find_min_book(maxvals[w*16+g], sce->sf_idx[w*16+g]+1);
  620. if (cb > 0) {
  621. dist = qenergy = 0.f;
  622. bits = 0;
  623. for (w2 = 0; w2 < sce->ics.group_len[w]; w2++) {
  624. int b;
  625. float sqenergy;
  626. dist += quantize_band_cost_cached(s, w + w2, g, coefs + w2*128,
  627. scaled + w2*128,
  628. sce->ics.swb_sizes[g],
  629. sce->sf_idx[w*16+g]+1,
  630. cb,
  631. 1.0f,
  632. INFINITY,
  633. &b, &sqenergy,
  634. 0);
  635. bits += b;
  636. qenergy += sqenergy;
  637. }
  638. dist -= bits;
  639. if (dist < FFMIN(euplims[w*16+g], uplims[w*16+g])) {
  640. sce->sf_idx[w*16+g]++;
  641. dists[w*16+g] = dist;
  642. qenergies[w*16+g] = qenergy;
  643. } else {
  644. break;
  645. }
  646. } else {
  647. maxsf[w*16+g] = FFMIN(sce->sf_idx[w*16+g], maxsf[w*16+g]);
  648. break;
  649. }
  650. }
  651. }
  652. prev = sce->sf_idx[w*16+g] = av_clip(sce->sf_idx[w*16+g], mindeltasf, maxdeltasf);
  653. if (sce->sf_idx[w*16+g] != prevsc)
  654. fflag = 1;
  655. nminscaler = FFMIN(nminscaler, sce->sf_idx[w*16+g]);
  656. sce->band_type[w*16+g] = find_min_book(maxvals[w*16+g], sce->sf_idx[w*16+g]);
  657. }
  658. start += sce->ics.swb_sizes[g];
  659. }
  660. }
  661. /** SF difference limit violation risk. Must re-clamp. */
  662. prev = -1;
  663. for (w = 0; w < sce->ics.num_windows; w += sce->ics.group_len[w]) {
  664. for (g = 0; g < sce->ics.num_swb; g++) {
  665. if (!sce->zeroes[w*16+g]) {
  666. int prevsf = sce->sf_idx[w*16+g];
  667. if (prev < 0)
  668. prev = prevsf;
  669. sce->sf_idx[w*16+g] = av_clip(sce->sf_idx[w*16+g], prev - SCALE_MAX_DIFF, prev + SCALE_MAX_DIFF);
  670. sce->band_type[w*16+g] = find_min_book(maxvals[w*16+g], sce->sf_idx[w*16+g]);
  671. prev = sce->sf_idx[w*16+g];
  672. if (!fflag && prevsf != sce->sf_idx[w*16+g])
  673. fflag = 1;
  674. }
  675. }
  676. }
  677. its++;
  678. } while (fflag && its < maxits);
  679. /** Scout out next nonzero bands */
  680. ff_init_nextband_map(sce, nextband);
  681. prev = -1;
  682. for (w = 0; w < sce->ics.num_windows; w += sce->ics.group_len[w]) {
  683. /** Make sure proper codebooks are set */
  684. for (g = 0; g < sce->ics.num_swb; g++) {
  685. if (!sce->zeroes[w*16+g]) {
  686. sce->band_type[w*16+g] = find_min_book(maxvals[w*16+g], sce->sf_idx[w*16+g]);
  687. if (sce->band_type[w*16+g] <= 0) {
  688. if (!ff_sfdelta_can_remove_band(sce, nextband, prev, w*16+g)) {
  689. /** Cannot zero out, make sure it's not attempted */
  690. sce->band_type[w*16+g] = 1;
  691. } else {
  692. sce->zeroes[w*16+g] = 1;
  693. sce->band_type[w*16+g] = 0;
  694. }
  695. }
  696. } else {
  697. sce->band_type[w*16+g] = 0;
  698. }
  699. /** Check that there's no SF delta range violations */
  700. if (!sce->zeroes[w*16+g]) {
  701. if (prev != -1) {
  702. av_unused int sfdiff = sce->sf_idx[w*16+g] - prev + SCALE_DIFF_ZERO;
  703. av_assert1(sfdiff >= 0 && sfdiff <= 2*SCALE_MAX_DIFF);
  704. } else if (sce->zeroes[0]) {
  705. /** Set global gain to something useful */
  706. sce->sf_idx[0] = sce->sf_idx[w*16+g];
  707. }
  708. prev = sce->sf_idx[w*16+g];
  709. }
  710. }
  711. }
  712. }
  713. #endif /* AVCODEC_AACCODER_TWOLOOP_H */