You can not select more than 25 topics Topics must start with a letter or number, can include dashes ('-') and can be up to 35 characters long.

1027 lines
34KB

  1. /*
  2. * Copyright (c) 2012 Andrew D'Addesio
  3. * Copyright (c) 2013-2014 Mozilla Corporation
  4. * Copyright (c) 2016 Rostislav Pehlivanov <atomnuker@gmail.com>
  5. *
  6. * This file is part of FFmpeg.
  7. *
  8. * FFmpeg is free software; you can redistribute it and/or
  9. * modify it under the terms of the GNU Lesser General Public
  10. * License as published by the Free Software Foundation; either
  11. * version 2.1 of the License, or (at your option) any later version.
  12. *
  13. * FFmpeg is distributed in the hope that it will be useful,
  14. * but WITHOUT ANY WARRANTY; without even the implied warranty of
  15. * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
  16. * Lesser General Public License for more details.
  17. *
  18. * You should have received a copy of the GNU Lesser General Public
  19. * License along with FFmpeg; if not, write to the Free Software
  20. * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
  21. */
  22. /**
  23. * @file
  24. * Opus CELT decoder
  25. */
  26. #include "opus_celt.h"
  27. #include "opustab.h"
  28. #include "opus_pvq.h"
  29. /* Use the 2D z-transform to apply prediction in both the time domain (alpha)
  30. * and the frequency domain (beta) */
  31. static void celt_decode_coarse_energy(CeltFrame *f, OpusRangeCoder *rc)
  32. {
  33. int i, j;
  34. float prev[2] = { 0 };
  35. float alpha = ff_celt_alpha_coef[f->size];
  36. float beta = ff_celt_beta_coef[f->size];
  37. const uint8_t *model = ff_celt_coarse_energy_dist[f->size][0];
  38. /* intra frame */
  39. if (opus_rc_tell(rc) + 3 <= f->framebits && ff_opus_rc_dec_log(rc, 3)) {
  40. alpha = 0.0f;
  41. beta = 1.0f - (4915.0f/32768.0f);
  42. model = ff_celt_coarse_energy_dist[f->size][1];
  43. }
  44. for (i = 0; i < CELT_MAX_BANDS; i++) {
  45. for (j = 0; j < f->channels; j++) {
  46. CeltBlock *block = &f->block[j];
  47. float value;
  48. int available;
  49. if (i < f->start_band || i >= f->end_band) {
  50. block->energy[i] = 0.0;
  51. continue;
  52. }
  53. available = f->framebits - opus_rc_tell(rc);
  54. if (available >= 15) {
  55. /* decode using a Laplace distribution */
  56. int k = FFMIN(i, 20) << 1;
  57. value = ff_opus_rc_dec_laplace(rc, model[k] << 7, model[k+1] << 6);
  58. } else if (available >= 2) {
  59. int x = ff_opus_rc_dec_cdf(rc, ff_celt_model_energy_small);
  60. value = (x>>1) ^ -(x&1);
  61. } else if (available >= 1) {
  62. value = -(float)ff_opus_rc_dec_log(rc, 1);
  63. } else value = -1;
  64. block->energy[i] = FFMAX(-9.0f, block->energy[i]) * alpha + prev[j] + value;
  65. prev[j] += beta * value;
  66. }
  67. }
  68. }
  69. static void celt_decode_fine_energy(CeltFrame *f, OpusRangeCoder *rc)
  70. {
  71. int i;
  72. for (i = f->start_band; i < f->end_band; i++) {
  73. int j;
  74. if (!f->fine_bits[i])
  75. continue;
  76. for (j = 0; j < f->channels; j++) {
  77. CeltBlock *block = &f->block[j];
  78. int q2;
  79. float offset;
  80. q2 = ff_opus_rc_get_raw(rc, f->fine_bits[i]);
  81. offset = (q2 + 0.5f) * (1 << (14 - f->fine_bits[i])) / 16384.0f - 0.5f;
  82. block->energy[i] += offset;
  83. }
  84. }
  85. }
  86. static void celt_decode_final_energy(CeltFrame *f, OpusRangeCoder *rc)
  87. {
  88. int priority, i, j;
  89. int bits_left = f->framebits - opus_rc_tell(rc);
  90. for (priority = 0; priority < 2; priority++) {
  91. for (i = f->start_band; i < f->end_band && bits_left >= f->channels; i++) {
  92. if (f->fine_priority[i] != priority || f->fine_bits[i] >= CELT_MAX_FINE_BITS)
  93. continue;
  94. for (j = 0; j < f->channels; j++) {
  95. int q2;
  96. float offset;
  97. q2 = ff_opus_rc_get_raw(rc, 1);
  98. offset = (q2 - 0.5f) * (1 << (14 - f->fine_bits[i] - 1)) / 16384.0f;
  99. f->block[j].energy[i] += offset;
  100. bits_left--;
  101. }
  102. }
  103. }
  104. }
  105. static void celt_decode_tf_changes(CeltFrame *f, OpusRangeCoder *rc)
  106. {
  107. int i, diff = 0, tf_select = 0, tf_changed = 0, tf_select_bit;
  108. int consumed, bits = f->transient ? 2 : 4;
  109. consumed = opus_rc_tell(rc);
  110. tf_select_bit = (f->size != 0 && consumed+bits+1 <= f->framebits);
  111. for (i = f->start_band; i < f->end_band; i++) {
  112. if (consumed+bits+tf_select_bit <= f->framebits) {
  113. diff ^= ff_opus_rc_dec_log(rc, bits);
  114. consumed = opus_rc_tell(rc);
  115. tf_changed |= diff;
  116. }
  117. f->tf_change[i] = diff;
  118. bits = f->transient ? 4 : 5;
  119. }
  120. if (tf_select_bit && ff_celt_tf_select[f->size][f->transient][0][tf_changed] !=
  121. ff_celt_tf_select[f->size][f->transient][1][tf_changed])
  122. tf_select = ff_opus_rc_dec_log(rc, 1);
  123. for (i = f->start_band; i < f->end_band; i++) {
  124. f->tf_change[i] = ff_celt_tf_select[f->size][f->transient][tf_select][f->tf_change[i]];
  125. }
  126. }
  127. static void celt_decode_allocation(CeltFrame *f, OpusRangeCoder *rc)
  128. {
  129. // approx. maximum bit allocation for each band before boost/trim
  130. int cap[CELT_MAX_BANDS];
  131. int boost[CELT_MAX_BANDS];
  132. int threshold[CELT_MAX_BANDS];
  133. int bits1[CELT_MAX_BANDS];
  134. int bits2[CELT_MAX_BANDS];
  135. int trim_offset[CELT_MAX_BANDS];
  136. int skip_start_band = f->start_band;
  137. int dynalloc = 6;
  138. int alloctrim = 5;
  139. int extrabits = 0;
  140. int skip_bit = 0;
  141. int intensity_stereo_bit = 0;
  142. int dual_stereo_bit = 0;
  143. int remaining, bandbits;
  144. int low, high, total, done;
  145. int totalbits;
  146. int consumed;
  147. int i, j;
  148. consumed = opus_rc_tell(rc);
  149. /* obtain spread flag */
  150. f->spread = CELT_SPREAD_NORMAL;
  151. if (consumed + 4 <= f->framebits)
  152. f->spread = ff_opus_rc_dec_cdf(rc, ff_celt_model_spread);
  153. /* generate static allocation caps */
  154. for (i = 0; i < CELT_MAX_BANDS; i++) {
  155. cap[i] = (ff_celt_static_caps[f->size][f->channels - 1][i] + 64)
  156. * ff_celt_freq_range[i] << (f->channels - 1) << f->size >> 2;
  157. }
  158. /* obtain band boost */
  159. totalbits = f->framebits << 3; // convert to 1/8 bits
  160. consumed = opus_rc_tell_frac(rc);
  161. for (i = f->start_band; i < f->end_band; i++) {
  162. int quanta, band_dynalloc;
  163. boost[i] = 0;
  164. quanta = ff_celt_freq_range[i] << (f->channels - 1) << f->size;
  165. quanta = FFMIN(quanta << 3, FFMAX(6 << 3, quanta));
  166. band_dynalloc = dynalloc;
  167. while (consumed + (band_dynalloc<<3) < totalbits && boost[i] < cap[i]) {
  168. int add = ff_opus_rc_dec_log(rc, band_dynalloc);
  169. consumed = opus_rc_tell_frac(rc);
  170. if (!add)
  171. break;
  172. boost[i] += quanta;
  173. totalbits -= quanta;
  174. band_dynalloc = 1;
  175. }
  176. /* dynalloc is more likely to occur if it's already been used for earlier bands */
  177. if (boost[i])
  178. dynalloc = FFMAX(2, dynalloc - 1);
  179. }
  180. /* obtain allocation trim */
  181. if (consumed + (6 << 3) <= totalbits)
  182. alloctrim = ff_opus_rc_dec_cdf(rc, ff_celt_model_alloc_trim);
  183. /* anti-collapse bit reservation */
  184. totalbits = (f->framebits << 3) - opus_rc_tell_frac(rc) - 1;
  185. f->anticollapse_needed = 0;
  186. if (f->blocks > 1 && f->size >= 2 &&
  187. totalbits >= ((f->size + 2) << 3))
  188. f->anticollapse_needed = 1 << 3;
  189. totalbits -= f->anticollapse_needed;
  190. /* band skip bit reservation */
  191. if (totalbits >= 1 << 3)
  192. skip_bit = 1 << 3;
  193. totalbits -= skip_bit;
  194. /* intensity/dual stereo bit reservation */
  195. if (f->channels == 2) {
  196. intensity_stereo_bit = ff_celt_log2_frac[f->end_band - f->start_band];
  197. if (intensity_stereo_bit <= totalbits) {
  198. totalbits -= intensity_stereo_bit;
  199. if (totalbits >= 1 << 3) {
  200. dual_stereo_bit = 1 << 3;
  201. totalbits -= 1 << 3;
  202. }
  203. } else
  204. intensity_stereo_bit = 0;
  205. }
  206. for (i = f->start_band; i < f->end_band; i++) {
  207. int trim = alloctrim - 5 - f->size;
  208. int band = ff_celt_freq_range[i] * (f->end_band - i - 1);
  209. int duration = f->size + 3;
  210. int scale = duration + f->channels - 1;
  211. /* PVQ minimum allocation threshold, below this value the band is
  212. * skipped */
  213. threshold[i] = FFMAX(3 * ff_celt_freq_range[i] << duration >> 4,
  214. f->channels << 3);
  215. trim_offset[i] = trim * (band << scale) >> 6;
  216. if (ff_celt_freq_range[i] << f->size == 1)
  217. trim_offset[i] -= f->channels << 3;
  218. }
  219. /* bisection */
  220. low = 1;
  221. high = CELT_VECTORS - 1;
  222. while (low <= high) {
  223. int center = (low + high) >> 1;
  224. done = total = 0;
  225. for (i = f->end_band - 1; i >= f->start_band; i--) {
  226. bandbits = ff_celt_freq_range[i] * ff_celt_static_alloc[center][i]
  227. << (f->channels - 1) << f->size >> 2;
  228. if (bandbits)
  229. bandbits = FFMAX(0, bandbits + trim_offset[i]);
  230. bandbits += boost[i];
  231. if (bandbits >= threshold[i] || done) {
  232. done = 1;
  233. total += FFMIN(bandbits, cap[i]);
  234. } else if (bandbits >= f->channels << 3)
  235. total += f->channels << 3;
  236. }
  237. if (total > totalbits)
  238. high = center - 1;
  239. else
  240. low = center + 1;
  241. }
  242. high = low--;
  243. for (i = f->start_band; i < f->end_band; i++) {
  244. bits1[i] = ff_celt_freq_range[i] * ff_celt_static_alloc[low][i]
  245. << (f->channels - 1) << f->size >> 2;
  246. bits2[i] = high >= CELT_VECTORS ? cap[i] :
  247. ff_celt_freq_range[i] * ff_celt_static_alloc[high][i]
  248. << (f->channels - 1) << f->size >> 2;
  249. if (bits1[i])
  250. bits1[i] = FFMAX(0, bits1[i] + trim_offset[i]);
  251. if (bits2[i])
  252. bits2[i] = FFMAX(0, bits2[i] + trim_offset[i]);
  253. if (low)
  254. bits1[i] += boost[i];
  255. bits2[i] += boost[i];
  256. if (boost[i])
  257. skip_start_band = i;
  258. bits2[i] = FFMAX(0, bits2[i] - bits1[i]);
  259. }
  260. /* bisection */
  261. low = 0;
  262. high = 1 << CELT_ALLOC_STEPS;
  263. for (i = 0; i < CELT_ALLOC_STEPS; i++) {
  264. int center = (low + high) >> 1;
  265. done = total = 0;
  266. for (j = f->end_band - 1; j >= f->start_band; j--) {
  267. bandbits = bits1[j] + (center * bits2[j] >> CELT_ALLOC_STEPS);
  268. if (bandbits >= threshold[j] || done) {
  269. done = 1;
  270. total += FFMIN(bandbits, cap[j]);
  271. } else if (bandbits >= f->channels << 3)
  272. total += f->channels << 3;
  273. }
  274. if (total > totalbits)
  275. high = center;
  276. else
  277. low = center;
  278. }
  279. done = total = 0;
  280. for (i = f->end_band - 1; i >= f->start_band; i--) {
  281. bandbits = bits1[i] + (low * bits2[i] >> CELT_ALLOC_STEPS);
  282. if (bandbits >= threshold[i] || done)
  283. done = 1;
  284. else
  285. bandbits = (bandbits >= f->channels << 3) ?
  286. f->channels << 3 : 0;
  287. bandbits = FFMIN(bandbits, cap[i]);
  288. f->pulses[i] = bandbits;
  289. total += bandbits;
  290. }
  291. /* band skipping */
  292. for (f->coded_bands = f->end_band; ; f->coded_bands--) {
  293. int allocation;
  294. j = f->coded_bands - 1;
  295. if (j == skip_start_band) {
  296. /* all remaining bands are not skipped */
  297. totalbits += skip_bit;
  298. break;
  299. }
  300. /* determine the number of bits available for coding "do not skip" markers */
  301. remaining = totalbits - total;
  302. bandbits = remaining / (ff_celt_freq_bands[j+1] - ff_celt_freq_bands[f->start_band]);
  303. remaining -= bandbits * (ff_celt_freq_bands[j+1] - ff_celt_freq_bands[f->start_band]);
  304. allocation = f->pulses[j] + bandbits * ff_celt_freq_range[j]
  305. + FFMAX(0, remaining - (ff_celt_freq_bands[j] - ff_celt_freq_bands[f->start_band]));
  306. /* a "do not skip" marker is only coded if the allocation is
  307. above the chosen threshold */
  308. if (allocation >= FFMAX(threshold[j], (f->channels + 1) <<3 )) {
  309. if (ff_opus_rc_dec_log(rc, 1))
  310. break;
  311. total += 1 << 3;
  312. allocation -= 1 << 3;
  313. }
  314. /* the band is skipped, so reclaim its bits */
  315. total -= f->pulses[j];
  316. if (intensity_stereo_bit) {
  317. total -= intensity_stereo_bit;
  318. intensity_stereo_bit = ff_celt_log2_frac[j - f->start_band];
  319. total += intensity_stereo_bit;
  320. }
  321. total += f->pulses[j] = (allocation >= f->channels << 3) ?
  322. f->channels << 3 : 0;
  323. }
  324. /* obtain stereo flags */
  325. f->intensity_stereo = 0;
  326. f->dual_stereo = 0;
  327. if (intensity_stereo_bit)
  328. f->intensity_stereo = f->start_band +
  329. ff_opus_rc_dec_uint(rc, f->coded_bands + 1 - f->start_band);
  330. if (f->intensity_stereo <= f->start_band)
  331. totalbits += dual_stereo_bit; /* no intensity stereo means no dual stereo */
  332. else if (dual_stereo_bit)
  333. f->dual_stereo = ff_opus_rc_dec_log(rc, 1);
  334. /* supply the remaining bits in this frame to lower bands */
  335. remaining = totalbits - total;
  336. bandbits = remaining / (ff_celt_freq_bands[f->coded_bands] - ff_celt_freq_bands[f->start_band]);
  337. remaining -= bandbits * (ff_celt_freq_bands[f->coded_bands] - ff_celt_freq_bands[f->start_band]);
  338. for (i = f->start_band; i < f->coded_bands; i++) {
  339. int bits = FFMIN(remaining, ff_celt_freq_range[i]);
  340. f->pulses[i] += bits + bandbits * ff_celt_freq_range[i];
  341. remaining -= bits;
  342. }
  343. for (i = f->start_band; i < f->coded_bands; i++) {
  344. int N = ff_celt_freq_range[i] << f->size;
  345. int prev_extra = extrabits;
  346. f->pulses[i] += extrabits;
  347. if (N > 1) {
  348. int dof; // degrees of freedom
  349. int temp; // dof * channels * log(dof)
  350. int offset; // fine energy quantization offset, i.e.
  351. // extra bits assigned over the standard
  352. // totalbits/dof
  353. int fine_bits, max_bits;
  354. extrabits = FFMAX(0, f->pulses[i] - cap[i]);
  355. f->pulses[i] -= extrabits;
  356. /* intensity stereo makes use of an extra degree of freedom */
  357. dof = N * f->channels
  358. + (f->channels == 2 && N > 2 && !f->dual_stereo && i < f->intensity_stereo);
  359. temp = dof * (ff_celt_log_freq_range[i] + (f->size<<3));
  360. offset = (temp >> 1) - dof * CELT_FINE_OFFSET;
  361. if (N == 2) /* dof=2 is the only case that doesn't fit the model */
  362. offset += dof<<1;
  363. /* grant an additional bias for the first and second pulses */
  364. if (f->pulses[i] + offset < 2 * (dof << 3))
  365. offset += temp >> 2;
  366. else if (f->pulses[i] + offset < 3 * (dof << 3))
  367. offset += temp >> 3;
  368. fine_bits = (f->pulses[i] + offset + (dof << 2)) / (dof << 3);
  369. max_bits = FFMIN((f->pulses[i]>>3) >> (f->channels - 1),
  370. CELT_MAX_FINE_BITS);
  371. max_bits = FFMAX(max_bits, 0);
  372. f->fine_bits[i] = av_clip(fine_bits, 0, max_bits);
  373. /* if fine_bits was rounded down or capped,
  374. give priority for the final fine energy pass */
  375. f->fine_priority[i] = (f->fine_bits[i] * (dof<<3) >= f->pulses[i] + offset);
  376. /* the remaining bits are assigned to PVQ */
  377. f->pulses[i] -= f->fine_bits[i] << (f->channels - 1) << 3;
  378. } else {
  379. /* all bits go to fine energy except for the sign bit */
  380. extrabits = FFMAX(0, f->pulses[i] - (f->channels << 3));
  381. f->pulses[i] -= extrabits;
  382. f->fine_bits[i] = 0;
  383. f->fine_priority[i] = 1;
  384. }
  385. /* hand back a limited number of extra fine energy bits to this band */
  386. if (extrabits > 0) {
  387. int fineextra = FFMIN(extrabits >> (f->channels + 2),
  388. CELT_MAX_FINE_BITS - f->fine_bits[i]);
  389. f->fine_bits[i] += fineextra;
  390. fineextra <<= f->channels + 2;
  391. f->fine_priority[i] = (fineextra >= extrabits - prev_extra);
  392. extrabits -= fineextra;
  393. }
  394. }
  395. f->remaining = extrabits;
  396. /* skipped bands dedicate all of their bits for fine energy */
  397. for (; i < f->end_band; i++) {
  398. f->fine_bits[i] = f->pulses[i] >> (f->channels - 1) >> 3;
  399. f->pulses[i] = 0;
  400. f->fine_priority[i] = f->fine_bits[i] < 1;
  401. }
  402. }
  403. static void celt_denormalize(CeltFrame *f, CeltBlock *block, float *data)
  404. {
  405. int i, j;
  406. for (i = f->start_band; i < f->end_band; i++) {
  407. float *dst = data + (ff_celt_freq_bands[i] << f->size);
  408. float norm = exp2f(block->energy[i] + ff_celt_mean_energy[i]);
  409. for (j = 0; j < ff_celt_freq_range[i] << f->size; j++)
  410. dst[j] *= norm;
  411. }
  412. }
  413. static void celt_postfilter_apply_transition(CeltBlock *block, float *data)
  414. {
  415. const int T0 = block->pf_period_old;
  416. const int T1 = block->pf_period;
  417. float g00, g01, g02;
  418. float g10, g11, g12;
  419. float x0, x1, x2, x3, x4;
  420. int i;
  421. if (block->pf_gains[0] == 0.0 &&
  422. block->pf_gains_old[0] == 0.0)
  423. return;
  424. g00 = block->pf_gains_old[0];
  425. g01 = block->pf_gains_old[1];
  426. g02 = block->pf_gains_old[2];
  427. g10 = block->pf_gains[0];
  428. g11 = block->pf_gains[1];
  429. g12 = block->pf_gains[2];
  430. x1 = data[-T1 + 1];
  431. x2 = data[-T1];
  432. x3 = data[-T1 - 1];
  433. x4 = data[-T1 - 2];
  434. for (i = 0; i < CELT_OVERLAP; i++) {
  435. float w = ff_celt_window2[i];
  436. x0 = data[i - T1 + 2];
  437. data[i] += (1.0 - w) * g00 * data[i - T0] +
  438. (1.0 - w) * g01 * (data[i - T0 - 1] + data[i - T0 + 1]) +
  439. (1.0 - w) * g02 * (data[i - T0 - 2] + data[i - T0 + 2]) +
  440. w * g10 * x2 +
  441. w * g11 * (x1 + x3) +
  442. w * g12 * (x0 + x4);
  443. x4 = x3;
  444. x3 = x2;
  445. x2 = x1;
  446. x1 = x0;
  447. }
  448. }
  449. static void celt_postfilter_apply(CeltBlock *block, float *data, int len)
  450. {
  451. const int T = block->pf_period;
  452. float g0, g1, g2;
  453. float x0, x1, x2, x3, x4;
  454. int i;
  455. if (block->pf_gains[0] == 0.0 || len <= 0)
  456. return;
  457. g0 = block->pf_gains[0];
  458. g1 = block->pf_gains[1];
  459. g2 = block->pf_gains[2];
  460. x4 = data[-T - 2];
  461. x3 = data[-T - 1];
  462. x2 = data[-T];
  463. x1 = data[-T + 1];
  464. for (i = 0; i < len; i++) {
  465. x0 = data[i - T + 2];
  466. data[i] += g0 * x2 +
  467. g1 * (x1 + x3) +
  468. g2 * (x0 + x4);
  469. x4 = x3;
  470. x3 = x2;
  471. x2 = x1;
  472. x1 = x0;
  473. }
  474. }
  475. static void celt_postfilter(CeltFrame *f, CeltBlock *block)
  476. {
  477. int len = f->blocksize * f->blocks;
  478. celt_postfilter_apply_transition(block, block->buf + 1024);
  479. block->pf_period_old = block->pf_period;
  480. memcpy(block->pf_gains_old, block->pf_gains, sizeof(block->pf_gains));
  481. block->pf_period = block->pf_period_new;
  482. memcpy(block->pf_gains, block->pf_gains_new, sizeof(block->pf_gains));
  483. if (len > CELT_OVERLAP) {
  484. celt_postfilter_apply_transition(block, block->buf + 1024 + CELT_OVERLAP);
  485. celt_postfilter_apply(block, block->buf + 1024 + 2 * CELT_OVERLAP,
  486. len - 2 * CELT_OVERLAP);
  487. block->pf_period_old = block->pf_period;
  488. memcpy(block->pf_gains_old, block->pf_gains, sizeof(block->pf_gains));
  489. }
  490. memmove(block->buf, block->buf + len, (1024 + CELT_OVERLAP / 2) * sizeof(float));
  491. }
  492. static int parse_postfilter(CeltFrame *f, OpusRangeCoder *rc, int consumed)
  493. {
  494. int i;
  495. memset(f->block[0].pf_gains_new, 0, sizeof(f->block[0].pf_gains_new));
  496. memset(f->block[1].pf_gains_new, 0, sizeof(f->block[1].pf_gains_new));
  497. if (f->start_band == 0 && consumed + 16 <= f->framebits) {
  498. int has_postfilter = ff_opus_rc_dec_log(rc, 1);
  499. if (has_postfilter) {
  500. float gain;
  501. int tapset, octave, period;
  502. octave = ff_opus_rc_dec_uint(rc, 6);
  503. period = (16 << octave) + ff_opus_rc_get_raw(rc, 4 + octave) - 1;
  504. gain = 0.09375f * (ff_opus_rc_get_raw(rc, 3) + 1);
  505. tapset = (opus_rc_tell(rc) + 2 <= f->framebits) ?
  506. ff_opus_rc_dec_cdf(rc, ff_celt_model_tapset) : 0;
  507. for (i = 0; i < 2; i++) {
  508. CeltBlock *block = &f->block[i];
  509. block->pf_period_new = FFMAX(period, CELT_POSTFILTER_MINPERIOD);
  510. block->pf_gains_new[0] = gain * ff_celt_postfilter_taps[tapset][0];
  511. block->pf_gains_new[1] = gain * ff_celt_postfilter_taps[tapset][1];
  512. block->pf_gains_new[2] = gain * ff_celt_postfilter_taps[tapset][2];
  513. }
  514. }
  515. consumed = opus_rc_tell(rc);
  516. }
  517. return consumed;
  518. }
  519. static void process_anticollapse(CeltFrame *f, CeltBlock *block, float *X)
  520. {
  521. int i, j, k;
  522. for (i = f->start_band; i < f->end_band; i++) {
  523. int renormalize = 0;
  524. float *xptr;
  525. float prev[2];
  526. float Ediff, r;
  527. float thresh, sqrt_1;
  528. int depth;
  529. /* depth in 1/8 bits */
  530. depth = (1 + f->pulses[i]) / (ff_celt_freq_range[i] << f->size);
  531. thresh = exp2f(-1.0 - 0.125f * depth);
  532. sqrt_1 = 1.0f / sqrtf(ff_celt_freq_range[i] << f->size);
  533. xptr = X + (ff_celt_freq_bands[i] << f->size);
  534. prev[0] = block->prev_energy[0][i];
  535. prev[1] = block->prev_energy[1][i];
  536. if (f->channels == 1) {
  537. CeltBlock *block1 = &f->block[1];
  538. prev[0] = FFMAX(prev[0], block1->prev_energy[0][i]);
  539. prev[1] = FFMAX(prev[1], block1->prev_energy[1][i]);
  540. }
  541. Ediff = block->energy[i] - FFMIN(prev[0], prev[1]);
  542. Ediff = FFMAX(0, Ediff);
  543. /* r needs to be multiplied by 2 or 2*sqrt(2) depending on LM because
  544. short blocks don't have the same energy as long */
  545. r = exp2f(1 - Ediff);
  546. if (f->size == 3)
  547. r *= M_SQRT2;
  548. r = FFMIN(thresh, r) * sqrt_1;
  549. for (k = 0; k < 1 << f->size; k++) {
  550. /* Detect collapse */
  551. if (!(block->collapse_masks[i] & 1 << k)) {
  552. /* Fill with noise */
  553. for (j = 0; j < ff_celt_freq_range[i]; j++)
  554. xptr[(j << f->size) + k] = (celt_rng(f) & 0x8000) ? r : -r;
  555. renormalize = 1;
  556. }
  557. }
  558. /* We just added some energy, so we need to renormalize */
  559. if (renormalize)
  560. celt_renormalize_vector(xptr, ff_celt_freq_range[i] << f->size, 1.0f);
  561. }
  562. }
  563. static void celt_decode_bands(CeltFrame *f, OpusRangeCoder *rc)
  564. {
  565. float lowband_scratch[8 * 22];
  566. float norm[2 * 8 * 100];
  567. int totalbits = (f->framebits << 3) - f->anticollapse_needed;
  568. int update_lowband = 1;
  569. int lowband_offset = 0;
  570. int i, j;
  571. memset(f->block[0].coeffs, 0, sizeof(f->block[0].coeffs));
  572. memset(f->block[1].coeffs, 0, sizeof(f->block[0].coeffs));
  573. for (i = f->start_band; i < f->end_band; i++) {
  574. uint32_t cm[2] = { (1 << f->blocks) - 1, (1 << f->blocks) - 1 };
  575. int band_offset = ff_celt_freq_bands[i] << f->size;
  576. int band_size = ff_celt_freq_range[i] << f->size;
  577. float *X = f->block[0].coeffs + band_offset;
  578. float *Y = (f->channels == 2) ? f->block[1].coeffs + band_offset : NULL;
  579. int consumed = opus_rc_tell_frac(rc);
  580. float *norm2 = norm + 8 * 100;
  581. int effective_lowband = -1;
  582. int b = 0;
  583. /* Compute how many bits we want to allocate to this band */
  584. if (i != f->start_band)
  585. f->remaining -= consumed;
  586. f->remaining2 = totalbits - consumed - 1;
  587. if (i <= f->coded_bands - 1) {
  588. int curr_balance = f->remaining / FFMIN(3, f->coded_bands-i);
  589. b = av_clip_uintp2(FFMIN(f->remaining2 + 1, f->pulses[i] + curr_balance), 14);
  590. }
  591. if (ff_celt_freq_bands[i] - ff_celt_freq_range[i] >= ff_celt_freq_bands[f->start_band] &&
  592. (update_lowband || lowband_offset == 0))
  593. lowband_offset = i;
  594. /* Get a conservative estimate of the collapse_mask's for the bands we're
  595. going to be folding from. */
  596. if (lowband_offset != 0 && (f->spread != CELT_SPREAD_AGGRESSIVE ||
  597. f->blocks > 1 || f->tf_change[i] < 0)) {
  598. int foldstart, foldend;
  599. /* This ensures we never repeat spectral content within one band */
  600. effective_lowband = FFMAX(ff_celt_freq_bands[f->start_band],
  601. ff_celt_freq_bands[lowband_offset] - ff_celt_freq_range[i]);
  602. foldstart = lowband_offset;
  603. while (ff_celt_freq_bands[--foldstart] > effective_lowband);
  604. foldend = lowband_offset - 1;
  605. while (ff_celt_freq_bands[++foldend] < effective_lowband + ff_celt_freq_range[i]);
  606. cm[0] = cm[1] = 0;
  607. for (j = foldstart; j < foldend; j++) {
  608. cm[0] |= f->block[0].collapse_masks[j];
  609. cm[1] |= f->block[f->channels - 1].collapse_masks[j];
  610. }
  611. }
  612. if (f->dual_stereo && i == f->intensity_stereo) {
  613. /* Switch off dual stereo to do intensity */
  614. f->dual_stereo = 0;
  615. for (j = ff_celt_freq_bands[f->start_band] << f->size; j < band_offset; j++)
  616. norm[j] = (norm[j] + norm2[j]) / 2;
  617. }
  618. if (f->dual_stereo) {
  619. cm[0] = f->pvq->decode_band(f->pvq, f, rc, i, X, NULL, band_size, b / 2, f->blocks,
  620. effective_lowband != -1 ? norm + (effective_lowband << f->size) : NULL, f->size,
  621. norm + band_offset, 0, 1.0f, lowband_scratch, cm[0]);
  622. cm[1] = f->pvq->decode_band(f->pvq, f, rc, i, Y, NULL, band_size, b/2, f->blocks,
  623. effective_lowband != -1 ? norm2 + (effective_lowband << f->size) : NULL, f->size,
  624. norm2 + band_offset, 0, 1.0f, lowband_scratch, cm[1]);
  625. } else {
  626. cm[0] = f->pvq->decode_band(f->pvq, f, rc, i, X, Y, band_size, b, f->blocks,
  627. effective_lowband != -1 ? norm + (effective_lowband << f->size) : NULL, f->size,
  628. norm + band_offset, 0, 1.0f, lowband_scratch, cm[0]|cm[1]);
  629. cm[1] = cm[0];
  630. }
  631. f->block[0].collapse_masks[i] = (uint8_t)cm[0];
  632. f->block[f->channels - 1].collapse_masks[i] = (uint8_t)cm[1];
  633. f->remaining += f->pulses[i] + consumed;
  634. /* Update the folding position only as long as we have 1 bit/sample depth */
  635. update_lowband = (b > band_size << 3);
  636. }
  637. }
  638. int ff_celt_decode_frame(CeltFrame *f, OpusRangeCoder *rc,
  639. float **output, int channels, int frame_size,
  640. int start_band, int end_band)
  641. {
  642. int i, j, downmix = 0;
  643. int consumed; // bits of entropy consumed thus far for this frame
  644. MDCT15Context *imdct;
  645. if (channels != 1 && channels != 2) {
  646. av_log(f->avctx, AV_LOG_ERROR, "Invalid number of coded channels: %d\n",
  647. channels);
  648. return AVERROR_INVALIDDATA;
  649. }
  650. if (start_band < 0 || start_band > end_band || end_band > CELT_MAX_BANDS) {
  651. av_log(f->avctx, AV_LOG_ERROR, "Invalid start/end band: %d %d\n",
  652. start_band, end_band);
  653. return AVERROR_INVALIDDATA;
  654. }
  655. f->silence = 0;
  656. f->transient = 0;
  657. f->anticollapse = 0;
  658. f->flushed = 0;
  659. f->channels = channels;
  660. f->start_band = start_band;
  661. f->end_band = end_band;
  662. f->framebits = rc->rb.bytes * 8;
  663. f->size = av_log2(frame_size / CELT_SHORT_BLOCKSIZE);
  664. if (f->size > CELT_MAX_LOG_BLOCKS ||
  665. frame_size != CELT_SHORT_BLOCKSIZE * (1 << f->size)) {
  666. av_log(f->avctx, AV_LOG_ERROR, "Invalid CELT frame size: %d\n",
  667. frame_size);
  668. return AVERROR_INVALIDDATA;
  669. }
  670. if (!f->output_channels)
  671. f->output_channels = channels;
  672. memset(f->block[0].collapse_masks, 0, sizeof(f->block[0].collapse_masks));
  673. memset(f->block[1].collapse_masks, 0, sizeof(f->block[1].collapse_masks));
  674. consumed = opus_rc_tell(rc);
  675. /* obtain silence flag */
  676. if (consumed >= f->framebits)
  677. f->silence = 1;
  678. else if (consumed == 1)
  679. f->silence = ff_opus_rc_dec_log(rc, 15);
  680. if (f->silence) {
  681. consumed = f->framebits;
  682. rc->total_bits += f->framebits - opus_rc_tell(rc);
  683. }
  684. /* obtain post-filter options */
  685. consumed = parse_postfilter(f, rc, consumed);
  686. /* obtain transient flag */
  687. if (f->size != 0 && consumed+3 <= f->framebits)
  688. f->transient = ff_opus_rc_dec_log(rc, 3);
  689. f->blocks = f->transient ? 1 << f->size : 1;
  690. f->blocksize = frame_size / f->blocks;
  691. imdct = f->imdct[f->transient ? 0 : f->size];
  692. if (channels == 1) {
  693. for (i = 0; i < CELT_MAX_BANDS; i++)
  694. f->block[0].energy[i] = FFMAX(f->block[0].energy[i], f->block[1].energy[i]);
  695. }
  696. celt_decode_coarse_energy(f, rc);
  697. celt_decode_tf_changes (f, rc);
  698. celt_decode_allocation (f, rc);
  699. celt_decode_fine_energy (f, rc);
  700. celt_decode_bands (f, rc);
  701. if (f->anticollapse_needed)
  702. f->anticollapse = ff_opus_rc_get_raw(rc, 1);
  703. celt_decode_final_energy(f, rc);
  704. /* apply anti-collapse processing and denormalization to
  705. * each coded channel */
  706. for (i = 0; i < f->channels; i++) {
  707. CeltBlock *block = &f->block[i];
  708. if (f->anticollapse)
  709. process_anticollapse(f, block, f->block[i].coeffs);
  710. celt_denormalize(f, block, f->block[i].coeffs);
  711. }
  712. /* stereo -> mono downmix */
  713. if (f->output_channels < f->channels) {
  714. f->dsp->vector_fmac_scalar(f->block[0].coeffs, f->block[1].coeffs, 1.0, FFALIGN(frame_size, 16));
  715. downmix = 1;
  716. } else if (f->output_channels > f->channels)
  717. memcpy(f->block[1].coeffs, f->block[0].coeffs, frame_size * sizeof(float));
  718. if (f->silence) {
  719. for (i = 0; i < 2; i++) {
  720. CeltBlock *block = &f->block[i];
  721. for (j = 0; j < FF_ARRAY_ELEMS(block->energy); j++)
  722. block->energy[j] = CELT_ENERGY_SILENCE;
  723. }
  724. memset(f->block[0].coeffs, 0, sizeof(f->block[0].coeffs));
  725. memset(f->block[1].coeffs, 0, sizeof(f->block[1].coeffs));
  726. }
  727. /* transform and output for each output channel */
  728. for (i = 0; i < f->output_channels; i++) {
  729. CeltBlock *block = &f->block[i];
  730. float m = block->emph_coeff;
  731. /* iMDCT and overlap-add */
  732. for (j = 0; j < f->blocks; j++) {
  733. float *dst = block->buf + 1024 + j * f->blocksize;
  734. imdct->imdct_half(imdct, dst + CELT_OVERLAP / 2, f->block[i].coeffs + j,
  735. f->blocks);
  736. f->dsp->vector_fmul_window(dst, dst, dst + CELT_OVERLAP / 2,
  737. ff_celt_window, CELT_OVERLAP / 2);
  738. }
  739. if (downmix)
  740. f->dsp->vector_fmul_scalar(&block->buf[1024], &block->buf[1024], 0.5f, frame_size);
  741. /* postfilter */
  742. celt_postfilter(f, block);
  743. /* deemphasis and output scaling */
  744. for (j = 0; j < frame_size; j++) {
  745. const float tmp = block->buf[1024 - frame_size + j] + m;
  746. m = tmp * CELT_EMPH_COEFF;
  747. output[i][j] = tmp;
  748. }
  749. block->emph_coeff = m;
  750. }
  751. if (channels == 1)
  752. memcpy(f->block[1].energy, f->block[0].energy, sizeof(f->block[0].energy));
  753. for (i = 0; i < 2; i++ ) {
  754. CeltBlock *block = &f->block[i];
  755. if (!f->transient) {
  756. memcpy(block->prev_energy[1], block->prev_energy[0], sizeof(block->prev_energy[0]));
  757. memcpy(block->prev_energy[0], block->energy, sizeof(block->prev_energy[0]));
  758. } else {
  759. for (j = 0; j < CELT_MAX_BANDS; j++)
  760. block->prev_energy[0][j] = FFMIN(block->prev_energy[0][j], block->energy[j]);
  761. }
  762. for (j = 0; j < f->start_band; j++) {
  763. block->prev_energy[0][j] = CELT_ENERGY_SILENCE;
  764. block->energy[j] = 0.0;
  765. }
  766. for (j = f->end_band; j < CELT_MAX_BANDS; j++) {
  767. block->prev_energy[0][j] = CELT_ENERGY_SILENCE;
  768. block->energy[j] = 0.0;
  769. }
  770. }
  771. f->seed = rc->range;
  772. return 0;
  773. }
  774. void ff_celt_flush(CeltFrame *f)
  775. {
  776. int i, j;
  777. if (f->flushed)
  778. return;
  779. for (i = 0; i < 2; i++) {
  780. CeltBlock *block = &f->block[i];
  781. for (j = 0; j < CELT_MAX_BANDS; j++)
  782. block->prev_energy[0][j] = block->prev_energy[1][j] = CELT_ENERGY_SILENCE;
  783. memset(block->energy, 0, sizeof(block->energy));
  784. memset(block->buf, 0, sizeof(block->buf));
  785. memset(block->pf_gains, 0, sizeof(block->pf_gains));
  786. memset(block->pf_gains_old, 0, sizeof(block->pf_gains_old));
  787. memset(block->pf_gains_new, 0, sizeof(block->pf_gains_new));
  788. block->emph_coeff = 0.0;
  789. }
  790. f->seed = 0;
  791. f->flushed = 1;
  792. }
  793. void ff_celt_free(CeltFrame **f)
  794. {
  795. CeltFrame *frm = *f;
  796. int i;
  797. if (!frm)
  798. return;
  799. for (i = 0; i < FF_ARRAY_ELEMS(frm->imdct); i++)
  800. ff_mdct15_uninit(&frm->imdct[i]);
  801. ff_celt_pvq_uninit(&frm->pvq);
  802. av_freep(&frm->dsp);
  803. av_freep(f);
  804. }
  805. int ff_celt_init(AVCodecContext *avctx, CeltFrame **f, int output_channels)
  806. {
  807. CeltFrame *frm;
  808. int i, ret;
  809. if (output_channels != 1 && output_channels != 2) {
  810. av_log(avctx, AV_LOG_ERROR, "Invalid number of output channels: %d\n",
  811. output_channels);
  812. return AVERROR(EINVAL);
  813. }
  814. frm = av_mallocz(sizeof(*frm));
  815. if (!frm)
  816. return AVERROR(ENOMEM);
  817. frm->avctx = avctx;
  818. frm->output_channels = output_channels;
  819. for (i = 0; i < FF_ARRAY_ELEMS(frm->imdct); i++)
  820. if ((ret = ff_mdct15_init(&frm->imdct[i], 1, i + 3, -1.0f/32768)) < 0)
  821. goto fail;
  822. if ((ret = ff_celt_pvq_init(&frm->pvq)) < 0)
  823. goto fail;
  824. frm->dsp = avpriv_float_dsp_alloc(avctx->flags & AV_CODEC_FLAG_BITEXACT);
  825. if (!frm->dsp) {
  826. ret = AVERROR(ENOMEM);
  827. goto fail;
  828. }
  829. ff_celt_flush(frm);
  830. *f = frm;
  831. return 0;
  832. fail:
  833. ff_celt_free(&frm);
  834. return ret;
  835. }