You can not select more than 25 topics Topics must start with a letter or number, can include dashes ('-') and can be up to 35 characters long.

581 lines
18KB

  1. /*
  2. * Copyright (c) 2012 Andrew D'Addesio
  3. * Copyright (c) 2013-2014 Mozilla Corporation
  4. * Copyright (c) 2016 Rostislav Pehlivanov <atomnuker@gmail.com>
  5. *
  6. * This file is part of FFmpeg.
  7. *
  8. * FFmpeg is free software; you can redistribute it and/or
  9. * modify it under the terms of the GNU Lesser General Public
  10. * License as published by the Free Software Foundation; either
  11. * version 2.1 of the License, or (at your option) any later version.
  12. *
  13. * FFmpeg is distributed in the hope that it will be useful,
  14. * but WITHOUT ANY WARRANTY; without even the implied warranty of
  15. * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
  16. * Lesser General Public License for more details.
  17. *
  18. * You should have received a copy of the GNU Lesser General Public
  19. * License along with FFmpeg; if not, write to the Free Software
  20. * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
  21. */
  22. /**
  23. * @file
  24. * Opus CELT decoder
  25. */
  26. #include "opus_celt.h"
  27. #include "opustab.h"
  28. #include "opus_pvq.h"
  29. /* Use the 2D z-transform to apply prediction in both the time domain (alpha)
  30. * and the frequency domain (beta) */
  31. static void celt_decode_coarse_energy(CeltFrame *f, OpusRangeCoder *rc)
  32. {
  33. int i, j;
  34. float prev[2] = { 0 };
  35. float alpha = ff_celt_alpha_coef[f->size];
  36. float beta = ff_celt_beta_coef[f->size];
  37. const uint8_t *model = ff_celt_coarse_energy_dist[f->size][0];
  38. /* intra frame */
  39. if (opus_rc_tell(rc) + 3 <= f->framebits && ff_opus_rc_dec_log(rc, 3)) {
  40. alpha = 0.0f;
  41. beta = 1.0f - (4915.0f/32768.0f);
  42. model = ff_celt_coarse_energy_dist[f->size][1];
  43. }
  44. for (i = 0; i < CELT_MAX_BANDS; i++) {
  45. for (j = 0; j < f->channels; j++) {
  46. CeltBlock *block = &f->block[j];
  47. float value;
  48. int available;
  49. if (i < f->start_band || i >= f->end_band) {
  50. block->energy[i] = 0.0;
  51. continue;
  52. }
  53. available = f->framebits - opus_rc_tell(rc);
  54. if (available >= 15) {
  55. /* decode using a Laplace distribution */
  56. int k = FFMIN(i, 20) << 1;
  57. value = ff_opus_rc_dec_laplace(rc, model[k] << 7, model[k+1] << 6);
  58. } else if (available >= 2) {
  59. int x = ff_opus_rc_dec_cdf(rc, ff_celt_model_energy_small);
  60. value = (x>>1) ^ -(x&1);
  61. } else if (available >= 1) {
  62. value = -(float)ff_opus_rc_dec_log(rc, 1);
  63. } else value = -1;
  64. block->energy[i] = FFMAX(-9.0f, block->energy[i]) * alpha + prev[j] + value;
  65. prev[j] += beta * value;
  66. }
  67. }
  68. }
  69. static void celt_decode_fine_energy(CeltFrame *f, OpusRangeCoder *rc)
  70. {
  71. int i;
  72. for (i = f->start_band; i < f->end_band; i++) {
  73. int j;
  74. if (!f->fine_bits[i])
  75. continue;
  76. for (j = 0; j < f->channels; j++) {
  77. CeltBlock *block = &f->block[j];
  78. int q2;
  79. float offset;
  80. q2 = ff_opus_rc_get_raw(rc, f->fine_bits[i]);
  81. offset = (q2 + 0.5f) * (1 << (14 - f->fine_bits[i])) / 16384.0f - 0.5f;
  82. block->energy[i] += offset;
  83. }
  84. }
  85. }
  86. static void celt_decode_final_energy(CeltFrame *f, OpusRangeCoder *rc)
  87. {
  88. int priority, i, j;
  89. int bits_left = f->framebits - opus_rc_tell(rc);
  90. for (priority = 0; priority < 2; priority++) {
  91. for (i = f->start_band; i < f->end_band && bits_left >= f->channels; i++) {
  92. if (f->fine_priority[i] != priority || f->fine_bits[i] >= CELT_MAX_FINE_BITS)
  93. continue;
  94. for (j = 0; j < f->channels; j++) {
  95. int q2;
  96. float offset;
  97. q2 = ff_opus_rc_get_raw(rc, 1);
  98. offset = (q2 - 0.5f) * (1 << (14 - f->fine_bits[i] - 1)) / 16384.0f;
  99. f->block[j].energy[i] += offset;
  100. bits_left--;
  101. }
  102. }
  103. }
  104. }
  105. static void celt_decode_tf_changes(CeltFrame *f, OpusRangeCoder *rc)
  106. {
  107. int i, diff = 0, tf_select = 0, tf_changed = 0, tf_select_bit;
  108. int consumed, bits = f->transient ? 2 : 4;
  109. consumed = opus_rc_tell(rc);
  110. tf_select_bit = (f->size != 0 && consumed+bits+1 <= f->framebits);
  111. for (i = f->start_band; i < f->end_band; i++) {
  112. if (consumed+bits+tf_select_bit <= f->framebits) {
  113. diff ^= ff_opus_rc_dec_log(rc, bits);
  114. consumed = opus_rc_tell(rc);
  115. tf_changed |= diff;
  116. }
  117. f->tf_change[i] = diff;
  118. bits = f->transient ? 4 : 5;
  119. }
  120. if (tf_select_bit && ff_celt_tf_select[f->size][f->transient][0][tf_changed] !=
  121. ff_celt_tf_select[f->size][f->transient][1][tf_changed])
  122. tf_select = ff_opus_rc_dec_log(rc, 1);
  123. for (i = f->start_band; i < f->end_band; i++) {
  124. f->tf_change[i] = ff_celt_tf_select[f->size][f->transient][tf_select][f->tf_change[i]];
  125. }
  126. }
  127. static void celt_denormalize(CeltFrame *f, CeltBlock *block, float *data)
  128. {
  129. int i, j;
  130. for (i = f->start_band; i < f->end_band; i++) {
  131. float *dst = data + (ff_celt_freq_bands[i] << f->size);
  132. float log_norm = block->energy[i] + ff_celt_mean_energy[i];
  133. float norm = exp2f(FFMIN(log_norm, 32.0f));
  134. for (j = 0; j < ff_celt_freq_range[i] << f->size; j++)
  135. dst[j] *= norm;
  136. }
  137. }
  138. static void celt_postfilter_apply_transition(CeltBlock *block, float *data)
  139. {
  140. const int T0 = block->pf_period_old;
  141. const int T1 = block->pf_period;
  142. float g00, g01, g02;
  143. float g10, g11, g12;
  144. float x0, x1, x2, x3, x4;
  145. int i;
  146. if (block->pf_gains[0] == 0.0 &&
  147. block->pf_gains_old[0] == 0.0)
  148. return;
  149. g00 = block->pf_gains_old[0];
  150. g01 = block->pf_gains_old[1];
  151. g02 = block->pf_gains_old[2];
  152. g10 = block->pf_gains[0];
  153. g11 = block->pf_gains[1];
  154. g12 = block->pf_gains[2];
  155. x1 = data[-T1 + 1];
  156. x2 = data[-T1];
  157. x3 = data[-T1 - 1];
  158. x4 = data[-T1 - 2];
  159. for (i = 0; i < CELT_OVERLAP; i++) {
  160. float w = ff_celt_window2[i];
  161. x0 = data[i - T1 + 2];
  162. data[i] += (1.0 - w) * g00 * data[i - T0] +
  163. (1.0 - w) * g01 * (data[i - T0 - 1] + data[i - T0 + 1]) +
  164. (1.0 - w) * g02 * (data[i - T0 - 2] + data[i - T0 + 2]) +
  165. w * g10 * x2 +
  166. w * g11 * (x1 + x3) +
  167. w * g12 * (x0 + x4);
  168. x4 = x3;
  169. x3 = x2;
  170. x2 = x1;
  171. x1 = x0;
  172. }
  173. }
  174. static void celt_postfilter(CeltFrame *f, CeltBlock *block)
  175. {
  176. int len = f->blocksize * f->blocks;
  177. const int filter_len = len - 2 * CELT_OVERLAP;
  178. celt_postfilter_apply_transition(block, block->buf + 1024);
  179. block->pf_period_old = block->pf_period;
  180. memcpy(block->pf_gains_old, block->pf_gains, sizeof(block->pf_gains));
  181. block->pf_period = block->pf_period_new;
  182. memcpy(block->pf_gains, block->pf_gains_new, sizeof(block->pf_gains));
  183. if (len > CELT_OVERLAP) {
  184. celt_postfilter_apply_transition(block, block->buf + 1024 + CELT_OVERLAP);
  185. if (block->pf_gains[0] > FLT_EPSILON && filter_len > 0)
  186. f->opusdsp.postfilter(block->buf + 1024 + 2 * CELT_OVERLAP,
  187. block->pf_period, block->pf_gains,
  188. filter_len);
  189. block->pf_period_old = block->pf_period;
  190. memcpy(block->pf_gains_old, block->pf_gains, sizeof(block->pf_gains));
  191. }
  192. memmove(block->buf, block->buf + len, (1024 + CELT_OVERLAP / 2) * sizeof(float));
  193. }
  194. static int parse_postfilter(CeltFrame *f, OpusRangeCoder *rc, int consumed)
  195. {
  196. int i;
  197. memset(f->block[0].pf_gains_new, 0, sizeof(f->block[0].pf_gains_new));
  198. memset(f->block[1].pf_gains_new, 0, sizeof(f->block[1].pf_gains_new));
  199. if (f->start_band == 0 && consumed + 16 <= f->framebits) {
  200. int has_postfilter = ff_opus_rc_dec_log(rc, 1);
  201. if (has_postfilter) {
  202. float gain;
  203. int tapset, octave, period;
  204. octave = ff_opus_rc_dec_uint(rc, 6);
  205. period = (16 << octave) + ff_opus_rc_get_raw(rc, 4 + octave) - 1;
  206. gain = 0.09375f * (ff_opus_rc_get_raw(rc, 3) + 1);
  207. tapset = (opus_rc_tell(rc) + 2 <= f->framebits) ?
  208. ff_opus_rc_dec_cdf(rc, ff_celt_model_tapset) : 0;
  209. for (i = 0; i < 2; i++) {
  210. CeltBlock *block = &f->block[i];
  211. block->pf_period_new = FFMAX(period, CELT_POSTFILTER_MINPERIOD);
  212. block->pf_gains_new[0] = gain * ff_celt_postfilter_taps[tapset][0];
  213. block->pf_gains_new[1] = gain * ff_celt_postfilter_taps[tapset][1];
  214. block->pf_gains_new[2] = gain * ff_celt_postfilter_taps[tapset][2];
  215. }
  216. }
  217. consumed = opus_rc_tell(rc);
  218. }
  219. return consumed;
  220. }
  221. static void process_anticollapse(CeltFrame *f, CeltBlock *block, float *X)
  222. {
  223. int i, j, k;
  224. for (i = f->start_band; i < f->end_band; i++) {
  225. int renormalize = 0;
  226. float *xptr;
  227. float prev[2];
  228. float Ediff, r;
  229. float thresh, sqrt_1;
  230. int depth;
  231. /* depth in 1/8 bits */
  232. depth = (1 + f->pulses[i]) / (ff_celt_freq_range[i] << f->size);
  233. thresh = exp2f(-1.0 - 0.125f * depth);
  234. sqrt_1 = 1.0f / sqrtf(ff_celt_freq_range[i] << f->size);
  235. xptr = X + (ff_celt_freq_bands[i] << f->size);
  236. prev[0] = block->prev_energy[0][i];
  237. prev[1] = block->prev_energy[1][i];
  238. if (f->channels == 1) {
  239. CeltBlock *block1 = &f->block[1];
  240. prev[0] = FFMAX(prev[0], block1->prev_energy[0][i]);
  241. prev[1] = FFMAX(prev[1], block1->prev_energy[1][i]);
  242. }
  243. Ediff = block->energy[i] - FFMIN(prev[0], prev[1]);
  244. Ediff = FFMAX(0, Ediff);
  245. /* r needs to be multiplied by 2 or 2*sqrt(2) depending on LM because
  246. short blocks don't have the same energy as long */
  247. r = exp2f(1 - Ediff);
  248. if (f->size == 3)
  249. r *= M_SQRT2;
  250. r = FFMIN(thresh, r) * sqrt_1;
  251. for (k = 0; k < 1 << f->size; k++) {
  252. /* Detect collapse */
  253. if (!(block->collapse_masks[i] & 1 << k)) {
  254. /* Fill with noise */
  255. for (j = 0; j < ff_celt_freq_range[i]; j++)
  256. xptr[(j << f->size) + k] = (celt_rng(f) & 0x8000) ? r : -r;
  257. renormalize = 1;
  258. }
  259. }
  260. /* We just added some energy, so we need to renormalize */
  261. if (renormalize)
  262. celt_renormalize_vector(xptr, ff_celt_freq_range[i] << f->size, 1.0f);
  263. }
  264. }
  265. int ff_celt_decode_frame(CeltFrame *f, OpusRangeCoder *rc,
  266. float **output, int channels, int frame_size,
  267. int start_band, int end_band)
  268. {
  269. int i, j, downmix = 0;
  270. int consumed; // bits of entropy consumed thus far for this frame
  271. MDCT15Context *imdct;
  272. if (channels != 1 && channels != 2) {
  273. av_log(f->avctx, AV_LOG_ERROR, "Invalid number of coded channels: %d\n",
  274. channels);
  275. return AVERROR_INVALIDDATA;
  276. }
  277. if (start_band < 0 || start_band > end_band || end_band > CELT_MAX_BANDS) {
  278. av_log(f->avctx, AV_LOG_ERROR, "Invalid start/end band: %d %d\n",
  279. start_band, end_band);
  280. return AVERROR_INVALIDDATA;
  281. }
  282. f->silence = 0;
  283. f->transient = 0;
  284. f->anticollapse = 0;
  285. f->flushed = 0;
  286. f->channels = channels;
  287. f->start_band = start_band;
  288. f->end_band = end_band;
  289. f->framebits = rc->rb.bytes * 8;
  290. f->size = av_log2(frame_size / CELT_SHORT_BLOCKSIZE);
  291. if (f->size > CELT_MAX_LOG_BLOCKS ||
  292. frame_size != CELT_SHORT_BLOCKSIZE * (1 << f->size)) {
  293. av_log(f->avctx, AV_LOG_ERROR, "Invalid CELT frame size: %d\n",
  294. frame_size);
  295. return AVERROR_INVALIDDATA;
  296. }
  297. if (!f->output_channels)
  298. f->output_channels = channels;
  299. for (i = 0; i < f->channels; i++) {
  300. memset(f->block[i].coeffs, 0, sizeof(f->block[i].coeffs));
  301. memset(f->block[i].collapse_masks, 0, sizeof(f->block[i].collapse_masks));
  302. }
  303. consumed = opus_rc_tell(rc);
  304. /* obtain silence flag */
  305. if (consumed >= f->framebits)
  306. f->silence = 1;
  307. else if (consumed == 1)
  308. f->silence = ff_opus_rc_dec_log(rc, 15);
  309. if (f->silence) {
  310. consumed = f->framebits;
  311. rc->total_bits += f->framebits - opus_rc_tell(rc);
  312. }
  313. /* obtain post-filter options */
  314. consumed = parse_postfilter(f, rc, consumed);
  315. /* obtain transient flag */
  316. if (f->size != 0 && consumed+3 <= f->framebits)
  317. f->transient = ff_opus_rc_dec_log(rc, 3);
  318. f->blocks = f->transient ? 1 << f->size : 1;
  319. f->blocksize = frame_size / f->blocks;
  320. imdct = f->imdct[f->transient ? 0 : f->size];
  321. if (channels == 1) {
  322. for (i = 0; i < CELT_MAX_BANDS; i++)
  323. f->block[0].energy[i] = FFMAX(f->block[0].energy[i], f->block[1].energy[i]);
  324. }
  325. celt_decode_coarse_energy(f, rc);
  326. celt_decode_tf_changes (f, rc);
  327. ff_celt_bitalloc (f, rc, 0);
  328. celt_decode_fine_energy (f, rc);
  329. ff_celt_quant_bands (f, rc);
  330. if (f->anticollapse_needed)
  331. f->anticollapse = ff_opus_rc_get_raw(rc, 1);
  332. celt_decode_final_energy(f, rc);
  333. /* apply anti-collapse processing and denormalization to
  334. * each coded channel */
  335. for (i = 0; i < f->channels; i++) {
  336. CeltBlock *block = &f->block[i];
  337. if (f->anticollapse)
  338. process_anticollapse(f, block, f->block[i].coeffs);
  339. celt_denormalize(f, block, f->block[i].coeffs);
  340. }
  341. /* stereo -> mono downmix */
  342. if (f->output_channels < f->channels) {
  343. f->dsp->vector_fmac_scalar(f->block[0].coeffs, f->block[1].coeffs, 1.0, FFALIGN(frame_size, 16));
  344. downmix = 1;
  345. } else if (f->output_channels > f->channels)
  346. memcpy(f->block[1].coeffs, f->block[0].coeffs, frame_size * sizeof(float));
  347. if (f->silence) {
  348. for (i = 0; i < 2; i++) {
  349. CeltBlock *block = &f->block[i];
  350. for (j = 0; j < FF_ARRAY_ELEMS(block->energy); j++)
  351. block->energy[j] = CELT_ENERGY_SILENCE;
  352. }
  353. memset(f->block[0].coeffs, 0, sizeof(f->block[0].coeffs));
  354. memset(f->block[1].coeffs, 0, sizeof(f->block[1].coeffs));
  355. }
  356. /* transform and output for each output channel */
  357. for (i = 0; i < f->output_channels; i++) {
  358. CeltBlock *block = &f->block[i];
  359. /* iMDCT and overlap-add */
  360. for (j = 0; j < f->blocks; j++) {
  361. float *dst = block->buf + 1024 + j * f->blocksize;
  362. imdct->imdct_half(imdct, dst + CELT_OVERLAP / 2, f->block[i].coeffs + j,
  363. f->blocks);
  364. f->dsp->vector_fmul_window(dst, dst, dst + CELT_OVERLAP / 2,
  365. ff_celt_window, CELT_OVERLAP / 2);
  366. }
  367. if (downmix)
  368. f->dsp->vector_fmul_scalar(&block->buf[1024], &block->buf[1024], 0.5f, frame_size);
  369. /* postfilter */
  370. celt_postfilter(f, block);
  371. /* deemphasis */
  372. block->emph_coeff = f->opusdsp.deemphasis(output[i],
  373. &block->buf[1024 - frame_size],
  374. block->emph_coeff, frame_size);
  375. }
  376. if (channels == 1)
  377. memcpy(f->block[1].energy, f->block[0].energy, sizeof(f->block[0].energy));
  378. for (i = 0; i < 2; i++ ) {
  379. CeltBlock *block = &f->block[i];
  380. if (!f->transient) {
  381. memcpy(block->prev_energy[1], block->prev_energy[0], sizeof(block->prev_energy[0]));
  382. memcpy(block->prev_energy[0], block->energy, sizeof(block->prev_energy[0]));
  383. } else {
  384. for (j = 0; j < CELT_MAX_BANDS; j++)
  385. block->prev_energy[0][j] = FFMIN(block->prev_energy[0][j], block->energy[j]);
  386. }
  387. for (j = 0; j < f->start_band; j++) {
  388. block->prev_energy[0][j] = CELT_ENERGY_SILENCE;
  389. block->energy[j] = 0.0;
  390. }
  391. for (j = f->end_band; j < CELT_MAX_BANDS; j++) {
  392. block->prev_energy[0][j] = CELT_ENERGY_SILENCE;
  393. block->energy[j] = 0.0;
  394. }
  395. }
  396. f->seed = rc->range;
  397. return 0;
  398. }
  399. void ff_celt_flush(CeltFrame *f)
  400. {
  401. int i, j;
  402. if (f->flushed)
  403. return;
  404. for (i = 0; i < 2; i++) {
  405. CeltBlock *block = &f->block[i];
  406. for (j = 0; j < CELT_MAX_BANDS; j++)
  407. block->prev_energy[0][j] = block->prev_energy[1][j] = CELT_ENERGY_SILENCE;
  408. memset(block->energy, 0, sizeof(block->energy));
  409. memset(block->buf, 0, sizeof(block->buf));
  410. memset(block->pf_gains, 0, sizeof(block->pf_gains));
  411. memset(block->pf_gains_old, 0, sizeof(block->pf_gains_old));
  412. memset(block->pf_gains_new, 0, sizeof(block->pf_gains_new));
  413. /* libopus uses CELT_EMPH_COEFF on init, but 0 is better since there's
  414. * a lesser discontinuity when seeking.
  415. * The deemphasis functions differ from libopus in that they require
  416. * an initial state divided by the coefficient. */
  417. block->emph_coeff = 0.0f / CELT_EMPH_COEFF;
  418. }
  419. f->seed = 0;
  420. f->flushed = 1;
  421. }
  422. void ff_celt_free(CeltFrame **f)
  423. {
  424. CeltFrame *frm = *f;
  425. int i;
  426. if (!frm)
  427. return;
  428. for (i = 0; i < FF_ARRAY_ELEMS(frm->imdct); i++)
  429. ff_mdct15_uninit(&frm->imdct[i]);
  430. ff_celt_pvq_uninit(&frm->pvq);
  431. av_freep(&frm->dsp);
  432. av_freep(f);
  433. }
  434. int ff_celt_init(AVCodecContext *avctx, CeltFrame **f, int output_channels,
  435. int apply_phase_inv)
  436. {
  437. CeltFrame *frm;
  438. int i, ret;
  439. if (output_channels != 1 && output_channels != 2) {
  440. av_log(avctx, AV_LOG_ERROR, "Invalid number of output channels: %d\n",
  441. output_channels);
  442. return AVERROR(EINVAL);
  443. }
  444. frm = av_mallocz(sizeof(*frm));
  445. if (!frm)
  446. return AVERROR(ENOMEM);
  447. frm->avctx = avctx;
  448. frm->output_channels = output_channels;
  449. frm->apply_phase_inv = apply_phase_inv;
  450. for (i = 0; i < FF_ARRAY_ELEMS(frm->imdct); i++)
  451. if ((ret = ff_mdct15_init(&frm->imdct[i], 1, i + 3, -1.0f/32768)) < 0)
  452. goto fail;
  453. if ((ret = ff_celt_pvq_init(&frm->pvq, 0)) < 0)
  454. goto fail;
  455. frm->dsp = avpriv_float_dsp_alloc(avctx->flags & AV_CODEC_FLAG_BITEXACT);
  456. if (!frm->dsp) {
  457. ret = AVERROR(ENOMEM);
  458. goto fail;
  459. }
  460. ff_opus_dsp_init(&frm->opusdsp);
  461. ff_celt_flush(frm);
  462. *f = frm;
  463. return 0;
  464. fail:
  465. ff_celt_free(&frm);
  466. return ret;
  467. }