You can not select more than 25 topics Topics must start with a letter or number, can include dashes ('-') and can be up to 35 characters long.

1076 lines
38KB

  1. /*
  2. * Opus encoder
  3. * Copyright (c) 2017 Rostislav Pehlivanov <atomnuker@gmail.com>
  4. *
  5. * This file is part of FFmpeg.
  6. *
  7. * FFmpeg is free software; you can redistribute it and/or
  8. * modify it under the terms of the GNU Lesser General Public
  9. * License as published by the Free Software Foundation; either
  10. * version 2.1 of the License, or (at your option) any later version.
  11. *
  12. * FFmpeg is distributed in the hope that it will be useful,
  13. * but WITHOUT ANY WARRANTY; without even the implied warranty of
  14. * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
  15. * Lesser General Public License for more details.
  16. *
  17. * You should have received a copy of the GNU Lesser General Public
  18. * License along with FFmpeg; if not, write to the Free Software
  19. * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
  20. */
  21. #include "opusenc.h"
  22. #include "opus_pvq.h"
  23. #include "opusenc_psy.h"
  24. #include "opustab.h"
  25. #include "libavutil/float_dsp.h"
  26. #include "libavutil/opt.h"
  27. #include "internal.h"
  28. #include "bytestream.h"
  29. #include "audio_frame_queue.h"
  30. typedef struct OpusEncContext {
  31. AVClass *av_class;
  32. OpusEncOptions options;
  33. OpusPsyContext psyctx;
  34. AVCodecContext *avctx;
  35. AudioFrameQueue afq;
  36. AVFloatDSPContext *dsp;
  37. MDCT15Context *mdct[CELT_BLOCK_NB];
  38. CeltPVQ *pvq;
  39. struct FFBufQueue bufqueue;
  40. uint8_t enc_id[64];
  41. int enc_id_bits;
  42. OpusPacketInfo packet;
  43. int channels;
  44. CeltFrame *frame;
  45. OpusRangeCoder *rc;
  46. /* Actual energy the decoder will have */
  47. float last_quantized_energy[OPUS_MAX_CHANNELS][CELT_MAX_BANDS];
  48. DECLARE_ALIGNED(32, float, scratch)[2048];
  49. } OpusEncContext;
  50. static void opus_write_extradata(AVCodecContext *avctx)
  51. {
  52. uint8_t *bs = avctx->extradata;
  53. bytestream_put_buffer(&bs, "OpusHead", 8);
  54. bytestream_put_byte (&bs, 0x1);
  55. bytestream_put_byte (&bs, avctx->channels);
  56. bytestream_put_le16 (&bs, avctx->initial_padding);
  57. bytestream_put_le32 (&bs, avctx->sample_rate);
  58. bytestream_put_le16 (&bs, 0x0);
  59. bytestream_put_byte (&bs, 0x0); /* Default layout */
  60. }
  61. static int opus_gen_toc(OpusEncContext *s, uint8_t *toc, int *size, int *fsize_needed)
  62. {
  63. int i, tmp = 0x0, extended_toc = 0;
  64. static const int toc_cfg[][OPUS_MODE_NB][OPUS_BANDWITH_NB] = {
  65. /* Silk Hybrid Celt Layer */
  66. /* NB MB WB SWB FB NB MB WB SWB FB NB MB WB SWB FB Bandwidth */
  67. { { 0, 0, 0, 0, 0 }, { 0, 0, 0, 0, 0 }, { 17, 0, 21, 25, 29 } }, /* 2.5 ms */
  68. { { 0, 0, 0, 0, 0 }, { 0, 0, 0, 0, 0 }, { 18, 0, 22, 26, 30 } }, /* 5 ms */
  69. { { 1, 5, 9, 0, 0 }, { 0, 0, 0, 13, 15 }, { 19, 0, 23, 27, 31 } }, /* 10 ms */
  70. { { 2, 6, 10, 0, 0 }, { 0, 0, 0, 14, 16 }, { 20, 0, 24, 28, 32 } }, /* 20 ms */
  71. { { 3, 7, 11, 0, 0 }, { 0, 0, 0, 0, 0 }, { 0, 0, 0, 0, 0 } }, /* 40 ms */
  72. { { 4, 8, 12, 0, 0 }, { 0, 0, 0, 0, 0 }, { 0, 0, 0, 0, 0 } }, /* 60 ms */
  73. };
  74. int cfg = toc_cfg[s->packet.framesize][s->packet.mode][s->packet.bandwidth];
  75. *fsize_needed = 0;
  76. if (!cfg)
  77. return 1;
  78. if (s->packet.frames == 2) { /* 2 packets */
  79. if (s->frame[0].framebits == s->frame[1].framebits) { /* same size */
  80. tmp = 0x1;
  81. } else { /* different size */
  82. tmp = 0x2;
  83. *fsize_needed = 1; /* put frame sizes in the packet */
  84. }
  85. } else if (s->packet.frames > 2) {
  86. tmp = 0x3;
  87. extended_toc = 1;
  88. }
  89. tmp |= (s->channels > 1) << 2; /* Stereo or mono */
  90. tmp |= (cfg - 1) << 3; /* codec configuration */
  91. *toc++ = tmp;
  92. if (extended_toc) {
  93. for (i = 0; i < (s->packet.frames - 1); i++)
  94. *fsize_needed |= (s->frame[i].framebits != s->frame[i + 1].framebits);
  95. tmp = (*fsize_needed) << 7; /* vbr flag */
  96. tmp |= (0) << 6; /* padding flag */
  97. tmp |= s->packet.frames;
  98. *toc++ = tmp;
  99. }
  100. *size = 1 + extended_toc;
  101. return 0;
  102. }
  103. static void celt_frame_setup_input(OpusEncContext *s, CeltFrame *f)
  104. {
  105. int sf, ch;
  106. AVFrame *cur = NULL;
  107. const int subframesize = s->avctx->frame_size;
  108. int subframes = OPUS_BLOCK_SIZE(s->packet.framesize) / subframesize;
  109. cur = ff_bufqueue_get(&s->bufqueue);
  110. for (ch = 0; ch < f->channels; ch++) {
  111. CeltBlock *b = &f->block[ch];
  112. const void *input = cur->extended_data[ch];
  113. size_t bps = av_get_bytes_per_sample(cur->format);
  114. memcpy(b->overlap, input, bps*cur->nb_samples);
  115. }
  116. av_frame_free(&cur);
  117. for (sf = 0; sf < subframes; sf++) {
  118. if (sf != (subframes - 1))
  119. cur = ff_bufqueue_get(&s->bufqueue);
  120. else
  121. cur = ff_bufqueue_peek(&s->bufqueue, 0);
  122. for (ch = 0; ch < f->channels; ch++) {
  123. CeltBlock *b = &f->block[ch];
  124. const void *input = cur->extended_data[ch];
  125. const size_t bps = av_get_bytes_per_sample(cur->format);
  126. const size_t left = (subframesize - cur->nb_samples)*bps;
  127. const size_t len = FFMIN(subframesize, cur->nb_samples)*bps;
  128. memcpy(&b->samples[sf*subframesize], input, len);
  129. memset(&b->samples[cur->nb_samples], 0, left);
  130. }
  131. /* Last frame isn't popped off and freed yet - we need it for overlap */
  132. if (sf != (subframes - 1))
  133. av_frame_free(&cur);
  134. }
  135. }
  136. /* Apply the pre emphasis filter */
  137. static void celt_apply_preemph_filter(OpusEncContext *s, CeltFrame *f)
  138. {
  139. int i, sf, ch;
  140. const int subframesize = s->avctx->frame_size;
  141. const int subframes = OPUS_BLOCK_SIZE(s->packet.framesize) / subframesize;
  142. /* Filter overlap */
  143. for (ch = 0; ch < f->channels; ch++) {
  144. CeltBlock *b = &f->block[ch];
  145. float m = b->emph_coeff;
  146. for (i = 0; i < CELT_OVERLAP; i++) {
  147. float sample = b->overlap[i];
  148. b->overlap[i] = sample - m;
  149. m = sample * CELT_EMPH_COEFF;
  150. }
  151. b->emph_coeff = m;
  152. }
  153. /* Filter the samples but do not update the last subframe's coeff - overlap ^^^ */
  154. for (sf = 0; sf < subframes; sf++) {
  155. for (ch = 0; ch < f->channels; ch++) {
  156. CeltBlock *b = &f->block[ch];
  157. float m = b->emph_coeff;
  158. for (i = 0; i < subframesize; i++) {
  159. float sample = b->samples[sf*subframesize + i];
  160. b->samples[sf*subframesize + i] = sample - m;
  161. m = sample * CELT_EMPH_COEFF;
  162. }
  163. if (sf != (subframes - 1))
  164. b->emph_coeff = m;
  165. }
  166. }
  167. }
  168. /* Create the window and do the mdct */
  169. static void celt_frame_mdct(OpusEncContext *s, CeltFrame *f)
  170. {
  171. int i, j, t, ch;
  172. float *win = s->scratch, *temp = s->scratch + 1920;
  173. if (f->transient) {
  174. for (ch = 0; ch < f->channels; ch++) {
  175. CeltBlock *b = &f->block[ch];
  176. float *src1 = b->overlap;
  177. for (t = 0; t < f->blocks; t++) {
  178. float *src2 = &b->samples[CELT_OVERLAP*t];
  179. s->dsp->vector_fmul(win, src1, ff_celt_window, 128);
  180. s->dsp->vector_fmul_reverse(&win[CELT_OVERLAP], src2,
  181. ff_celt_window - 8, 128);
  182. src1 = src2;
  183. s->mdct[0]->mdct(s->mdct[0], b->coeffs + t, win, f->blocks);
  184. }
  185. }
  186. } else {
  187. int blk_len = OPUS_BLOCK_SIZE(f->size), wlen = OPUS_BLOCK_SIZE(f->size + 1);
  188. int rwin = blk_len - CELT_OVERLAP, lap_dst = (wlen - blk_len - CELT_OVERLAP) >> 1;
  189. memset(win, 0, wlen*sizeof(float));
  190. for (ch = 0; ch < f->channels; ch++) {
  191. CeltBlock *b = &f->block[ch];
  192. /* Overlap */
  193. s->dsp->vector_fmul(temp, b->overlap, ff_celt_window, 128);
  194. memcpy(win + lap_dst, temp, CELT_OVERLAP*sizeof(float));
  195. /* Samples, flat top window */
  196. memcpy(&win[lap_dst + CELT_OVERLAP], b->samples, rwin*sizeof(float));
  197. /* Samples, windowed */
  198. s->dsp->vector_fmul_reverse(temp, b->samples + rwin,
  199. ff_celt_window - 8, 128);
  200. memcpy(win + lap_dst + blk_len, temp, CELT_OVERLAP*sizeof(float));
  201. s->mdct[f->size]->mdct(s->mdct[f->size], b->coeffs, win, 1);
  202. }
  203. }
  204. for (ch = 0; ch < f->channels; ch++) {
  205. CeltBlock *block = &f->block[ch];
  206. for (i = 0; i < CELT_MAX_BANDS; i++) {
  207. float ener = 0.0f;
  208. int band_offset = ff_celt_freq_bands[i] << f->size;
  209. int band_size = ff_celt_freq_range[i] << f->size;
  210. float *coeffs = &block->coeffs[band_offset];
  211. for (j = 0; j < band_size; j++)
  212. ener += coeffs[j]*coeffs[j];
  213. block->lin_energy[i] = sqrtf(ener) + FLT_EPSILON;
  214. ener = 1.0f/block->lin_energy[i];
  215. for (j = 0; j < band_size; j++)
  216. coeffs[j] *= ener;
  217. block->energy[i] = log2f(block->lin_energy[i]) - ff_celt_mean_energy[i];
  218. /* CELT_ENERGY_SILENCE is what the decoder uses and its not -infinity */
  219. block->energy[i] = FFMAX(block->energy[i], CELT_ENERGY_SILENCE);
  220. }
  221. }
  222. }
  223. static void celt_enc_tf(CeltFrame *f, OpusRangeCoder *rc)
  224. {
  225. int i, tf_select = 0, diff = 0, tf_changed = 0, tf_select_needed;
  226. int bits = f->transient ? 2 : 4;
  227. tf_select_needed = ((f->size && (opus_rc_tell(rc) + bits + 1) <= f->framebits));
  228. for (i = f->start_band; i < f->end_band; i++) {
  229. if ((opus_rc_tell(rc) + bits + tf_select_needed) <= f->framebits) {
  230. const int tbit = (diff ^ 1) == f->tf_change[i];
  231. ff_opus_rc_enc_log(rc, tbit, bits);
  232. diff ^= tbit;
  233. tf_changed |= diff;
  234. }
  235. bits = f->transient ? 4 : 5;
  236. }
  237. if (tf_select_needed && ff_celt_tf_select[f->size][f->transient][0][tf_changed] !=
  238. ff_celt_tf_select[f->size][f->transient][1][tf_changed]) {
  239. ff_opus_rc_enc_log(rc, f->tf_select, 1);
  240. tf_select = f->tf_select;
  241. }
  242. for (i = f->start_band; i < f->end_band; i++)
  243. f->tf_change[i] = ff_celt_tf_select[f->size][f->transient][tf_select][f->tf_change[i]];
  244. }
  245. void ff_celt_enc_bitalloc(CeltFrame *f, OpusRangeCoder *rc)
  246. {
  247. int i, j, low, high, total, done, bandbits, remaining, tbits_8ths;
  248. int skip_startband = f->start_band;
  249. int skip_bit = 0;
  250. int intensitystereo_bit = 0;
  251. int dualstereo_bit = 0;
  252. int dynalloc = 6;
  253. int extrabits = 0;
  254. int *cap = f->caps;
  255. int boost[CELT_MAX_BANDS];
  256. int trim_offset[CELT_MAX_BANDS];
  257. int threshold[CELT_MAX_BANDS];
  258. int bits1[CELT_MAX_BANDS];
  259. int bits2[CELT_MAX_BANDS];
  260. /* Tell the spread to the decoder */
  261. if (opus_rc_tell(rc) + 4 <= f->framebits)
  262. ff_opus_rc_enc_cdf(rc, f->spread, ff_celt_model_spread);
  263. else
  264. f->spread = CELT_SPREAD_NORMAL;
  265. /* Generate static allocation caps */
  266. for (i = 0; i < CELT_MAX_BANDS; i++) {
  267. cap[i] = (ff_celt_static_caps[f->size][f->channels - 1][i] + 64)
  268. * ff_celt_freq_range[i] << (f->channels - 1) << f->size >> 2;
  269. }
  270. /* Band boosts */
  271. tbits_8ths = f->framebits << 3;
  272. for (i = f->start_band; i < f->end_band; i++) {
  273. int quanta, b_dynalloc, boost_amount = f->alloc_boost[i];
  274. boost[i] = 0;
  275. quanta = ff_celt_freq_range[i] << (f->channels - 1) << f->size;
  276. quanta = FFMIN(quanta << 3, FFMAX(6 << 3, quanta));
  277. b_dynalloc = dynalloc;
  278. while (opus_rc_tell_frac(rc) + (b_dynalloc << 3) < tbits_8ths && boost[i] < cap[i]) {
  279. int is_boost = boost_amount--;
  280. ff_opus_rc_enc_log(rc, is_boost, b_dynalloc);
  281. if (!is_boost)
  282. break;
  283. boost[i] += quanta;
  284. tbits_8ths -= quanta;
  285. b_dynalloc = 1;
  286. }
  287. if (boost[i])
  288. dynalloc = FFMAX(2, dynalloc - 1);
  289. }
  290. /* Put allocation trim */
  291. if (opus_rc_tell_frac(rc) + (6 << 3) <= tbits_8ths)
  292. ff_opus_rc_enc_cdf(rc, f->alloc_trim, ff_celt_model_alloc_trim);
  293. /* Anti-collapse bit reservation */
  294. tbits_8ths = (f->framebits << 3) - opus_rc_tell_frac(rc) - 1;
  295. f->anticollapse_needed = 0;
  296. if (f->transient && f->size >= 2 && tbits_8ths >= ((f->size + 2) << 3))
  297. f->anticollapse_needed = 1 << 3;
  298. tbits_8ths -= f->anticollapse_needed;
  299. /* Band skip bit reservation */
  300. if (tbits_8ths >= 1 << 3)
  301. skip_bit = 1 << 3;
  302. tbits_8ths -= skip_bit;
  303. /* Intensity/dual stereo bit reservation */
  304. if (f->channels == 2) {
  305. intensitystereo_bit = ff_celt_log2_frac[f->end_band - f->start_band];
  306. if (intensitystereo_bit <= tbits_8ths) {
  307. tbits_8ths -= intensitystereo_bit;
  308. if (tbits_8ths >= 1 << 3) {
  309. dualstereo_bit = 1 << 3;
  310. tbits_8ths -= 1 << 3;
  311. }
  312. } else {
  313. intensitystereo_bit = 0;
  314. }
  315. }
  316. /* Trim offsets */
  317. for (i = f->start_band; i < f->end_band; i++) {
  318. int trim = f->alloc_trim - 5 - f->size;
  319. int band = ff_celt_freq_range[i] * (f->end_band - i - 1);
  320. int duration = f->size + 3;
  321. int scale = duration + f->channels - 1;
  322. /* PVQ minimum allocation threshold, below this value the band is
  323. * skipped */
  324. threshold[i] = FFMAX(3 * ff_celt_freq_range[i] << duration >> 4,
  325. f->channels << 3);
  326. trim_offset[i] = trim * (band << scale) >> 6;
  327. if (ff_celt_freq_range[i] << f->size == 1)
  328. trim_offset[i] -= f->channels << 3;
  329. }
  330. /* Bisection */
  331. low = 1;
  332. high = CELT_VECTORS - 1;
  333. while (low <= high) {
  334. int center = (low + high) >> 1;
  335. done = total = 0;
  336. for (i = f->end_band - 1; i >= f->start_band; i--) {
  337. bandbits = ff_celt_freq_range[i] * ff_celt_static_alloc[center][i]
  338. << (f->channels - 1) << f->size >> 2;
  339. if (bandbits)
  340. bandbits = FFMAX(0, bandbits + trim_offset[i]);
  341. bandbits += boost[i];
  342. if (bandbits >= threshold[i] || done) {
  343. done = 1;
  344. total += FFMIN(bandbits, cap[i]);
  345. } else if (bandbits >= f->channels << 3)
  346. total += f->channels << 3;
  347. }
  348. if (total > tbits_8ths)
  349. high = center - 1;
  350. else
  351. low = center + 1;
  352. }
  353. high = low--;
  354. /* Bisection */
  355. for (i = f->start_band; i < f->end_band; i++) {
  356. bits1[i] = ff_celt_freq_range[i] * ff_celt_static_alloc[low][i]
  357. << (f->channels - 1) << f->size >> 2;
  358. bits2[i] = high >= CELT_VECTORS ? cap[i] :
  359. ff_celt_freq_range[i] * ff_celt_static_alloc[high][i]
  360. << (f->channels - 1) << f->size >> 2;
  361. if (bits1[i])
  362. bits1[i] = FFMAX(0, bits1[i] + trim_offset[i]);
  363. if (bits2[i])
  364. bits2[i] = FFMAX(0, bits2[i] + trim_offset[i]);
  365. if (low)
  366. bits1[i] += boost[i];
  367. bits2[i] += boost[i];
  368. if (boost[i])
  369. skip_startband = i;
  370. bits2[i] = FFMAX(0, bits2[i] - bits1[i]);
  371. }
  372. /* Bisection */
  373. low = 0;
  374. high = 1 << CELT_ALLOC_STEPS;
  375. for (i = 0; i < CELT_ALLOC_STEPS; i++) {
  376. int center = (low + high) >> 1;
  377. done = total = 0;
  378. for (j = f->end_band - 1; j >= f->start_band; j--) {
  379. bandbits = bits1[j] + (center * bits2[j] >> CELT_ALLOC_STEPS);
  380. if (bandbits >= threshold[j] || done) {
  381. done = 1;
  382. total += FFMIN(bandbits, cap[j]);
  383. } else if (bandbits >= f->channels << 3)
  384. total += f->channels << 3;
  385. }
  386. if (total > tbits_8ths)
  387. high = center;
  388. else
  389. low = center;
  390. }
  391. /* Bisection */
  392. done = total = 0;
  393. for (i = f->end_band - 1; i >= f->start_band; i--) {
  394. bandbits = bits1[i] + (low * bits2[i] >> CELT_ALLOC_STEPS);
  395. if (bandbits >= threshold[i] || done)
  396. done = 1;
  397. else
  398. bandbits = (bandbits >= f->channels << 3) ?
  399. f->channels << 3 : 0;
  400. bandbits = FFMIN(bandbits, cap[i]);
  401. f->pulses[i] = bandbits;
  402. total += bandbits;
  403. }
  404. /* Band skipping */
  405. for (f->coded_bands = f->end_band; ; f->coded_bands--) {
  406. int allocation;
  407. j = f->coded_bands - 1;
  408. if (j == skip_startband) {
  409. /* all remaining bands are not skipped */
  410. tbits_8ths += skip_bit;
  411. break;
  412. }
  413. /* determine the number of bits available for coding "do not skip" markers */
  414. remaining = tbits_8ths - total;
  415. bandbits = remaining / (ff_celt_freq_bands[j+1] - ff_celt_freq_bands[f->start_band]);
  416. remaining -= bandbits * (ff_celt_freq_bands[j+1] - ff_celt_freq_bands[f->start_band]);
  417. allocation = f->pulses[j] + bandbits * ff_celt_freq_range[j]
  418. + FFMAX(0, remaining - (ff_celt_freq_bands[j] - ff_celt_freq_bands[f->start_band]));
  419. /* a "do not skip" marker is only coded if the allocation is
  420. above the chosen threshold */
  421. if (allocation >= FFMAX(threshold[j], (f->channels + 1) << 3)) {
  422. const int do_not_skip = f->coded_bands <= f->skip_band_floor;
  423. ff_opus_rc_enc_log(rc, do_not_skip, 1);
  424. if (do_not_skip)
  425. break;
  426. total += 1 << 3;
  427. allocation -= 1 << 3;
  428. }
  429. /* the band is skipped, so reclaim its bits */
  430. total -= f->pulses[j];
  431. if (intensitystereo_bit) {
  432. total -= intensitystereo_bit;
  433. intensitystereo_bit = ff_celt_log2_frac[j - f->start_band];
  434. total += intensitystereo_bit;
  435. }
  436. total += f->pulses[j] = (allocation >= f->channels << 3) ? f->channels << 3 : 0;
  437. }
  438. /* Encode stereo flags */
  439. if (intensitystereo_bit) {
  440. f->intensity_stereo = FFMIN(f->intensity_stereo, f->coded_bands);
  441. ff_opus_rc_enc_uint(rc, f->intensity_stereo, f->coded_bands + 1 - f->start_band);
  442. }
  443. if (f->intensity_stereo <= f->start_band)
  444. tbits_8ths += dualstereo_bit; /* no intensity stereo means no dual stereo */
  445. else if (dualstereo_bit)
  446. ff_opus_rc_enc_log(rc, f->dual_stereo, 1);
  447. /* Supply the remaining bits in this frame to lower bands */
  448. remaining = tbits_8ths - total;
  449. bandbits = remaining / (ff_celt_freq_bands[f->coded_bands] - ff_celt_freq_bands[f->start_band]);
  450. remaining -= bandbits * (ff_celt_freq_bands[f->coded_bands] - ff_celt_freq_bands[f->start_band]);
  451. for (i = f->start_band; i < f->coded_bands; i++) {
  452. int bits = FFMIN(remaining, ff_celt_freq_range[i]);
  453. f->pulses[i] += bits + bandbits * ff_celt_freq_range[i];
  454. remaining -= bits;
  455. }
  456. /* Finally determine the allocation */
  457. for (i = f->start_band; i < f->coded_bands; i++) {
  458. int N = ff_celt_freq_range[i] << f->size;
  459. int prev_extra = extrabits;
  460. f->pulses[i] += extrabits;
  461. if (N > 1) {
  462. int dof; // degrees of freedom
  463. int temp; // dof * channels * log(dof)
  464. int offset; // fine energy quantization offset, i.e.
  465. // extra bits assigned over the standard
  466. // totalbits/dof
  467. int fine_bits, max_bits;
  468. extrabits = FFMAX(0, f->pulses[i] - cap[i]);
  469. f->pulses[i] -= extrabits;
  470. /* intensity stereo makes use of an extra degree of freedom */
  471. dof = N * f->channels + (f->channels == 2 && N > 2 && !f->dual_stereo && i < f->intensity_stereo);
  472. temp = dof * (ff_celt_log_freq_range[i] + (f->size << 3));
  473. offset = (temp >> 1) - dof * CELT_FINE_OFFSET;
  474. if (N == 2) /* dof=2 is the only case that doesn't fit the model */
  475. offset += dof << 1;
  476. /* grant an additional bias for the first and second pulses */
  477. if (f->pulses[i] + offset < 2 * (dof << 3))
  478. offset += temp >> 2;
  479. else if (f->pulses[i] + offset < 3 * (dof << 3))
  480. offset += temp >> 3;
  481. fine_bits = (f->pulses[i] + offset + (dof << 2)) / (dof << 3);
  482. max_bits = FFMIN((f->pulses[i] >> 3) >> (f->channels - 1), CELT_MAX_FINE_BITS);
  483. max_bits = FFMAX(max_bits, 0);
  484. f->fine_bits[i] = av_clip(fine_bits, 0, max_bits);
  485. /* if fine_bits was rounded down or capped,
  486. give priority for the final fine energy pass */
  487. f->fine_priority[i] = (f->fine_bits[i] * (dof << 3) >= f->pulses[i] + offset);
  488. /* the remaining bits are assigned to PVQ */
  489. f->pulses[i] -= f->fine_bits[i] << (f->channels - 1) << 3;
  490. } else {
  491. /* all bits go to fine energy except for the sign bit */
  492. extrabits = FFMAX(0, f->pulses[i] - (f->channels << 3));
  493. f->pulses[i] -= extrabits;
  494. f->fine_bits[i] = 0;
  495. f->fine_priority[i] = 1;
  496. }
  497. /* hand back a limited number of extra fine energy bits to this band */
  498. if (extrabits > 0) {
  499. int fineextra = FFMIN(extrabits >> (f->channels + 2),
  500. CELT_MAX_FINE_BITS - f->fine_bits[i]);
  501. f->fine_bits[i] += fineextra;
  502. fineextra <<= f->channels + 2;
  503. f->fine_priority[i] = (fineextra >= extrabits - prev_extra);
  504. extrabits -= fineextra;
  505. }
  506. }
  507. f->remaining = extrabits;
  508. /* skipped bands dedicate all of their bits for fine energy */
  509. for (; i < f->end_band; i++) {
  510. f->fine_bits[i] = f->pulses[i] >> (f->channels - 1) >> 3;
  511. f->pulses[i] = 0;
  512. f->fine_priority[i] = f->fine_bits[i] < 1;
  513. }
  514. }
  515. static void celt_enc_quant_pfilter(OpusRangeCoder *rc, CeltFrame *f)
  516. {
  517. float gain = f->pf_gain;
  518. int i, txval, octave = f->pf_octave, period = f->pf_period, tapset = f->pf_tapset;
  519. ff_opus_rc_enc_log(rc, f->pfilter, 1);
  520. if (!f->pfilter)
  521. return;
  522. /* Octave */
  523. txval = FFMIN(octave, 6);
  524. ff_opus_rc_enc_uint(rc, txval, 6);
  525. octave = txval;
  526. /* Period */
  527. txval = av_clip(period - (16 << octave) + 1, 0, (1 << (4 + octave)) - 1);
  528. ff_opus_rc_put_raw(rc, period, 4 + octave);
  529. period = txval + (16 << octave) - 1;
  530. /* Gain */
  531. txval = FFMIN(((int)(gain / 0.09375f)) - 1, 7);
  532. ff_opus_rc_put_raw(rc, txval, 3);
  533. gain = 0.09375f * (txval + 1);
  534. /* Tapset */
  535. if ((opus_rc_tell(rc) + 2) <= f->framebits)
  536. ff_opus_rc_enc_cdf(rc, tapset, ff_celt_model_tapset);
  537. else
  538. tapset = 0;
  539. /* Finally create the coeffs */
  540. for (i = 0; i < 2; i++) {
  541. CeltBlock *block = &f->block[i];
  542. block->pf_period_new = FFMAX(period, CELT_POSTFILTER_MINPERIOD);
  543. block->pf_gains_new[0] = gain * ff_celt_postfilter_taps[tapset][0];
  544. block->pf_gains_new[1] = gain * ff_celt_postfilter_taps[tapset][1];
  545. block->pf_gains_new[2] = gain * ff_celt_postfilter_taps[tapset][2];
  546. }
  547. }
  548. static void exp_quant_coarse(OpusRangeCoder *rc, CeltFrame *f,
  549. float last_energy[][CELT_MAX_BANDS], int intra)
  550. {
  551. int i, ch;
  552. float alpha, beta, prev[2] = { 0, 0 };
  553. const uint8_t *pmod = ff_celt_coarse_energy_dist[f->size][intra];
  554. /* Inter is really just differential coding */
  555. if (opus_rc_tell(rc) + 3 <= f->framebits)
  556. ff_opus_rc_enc_log(rc, intra, 3);
  557. else
  558. intra = 0;
  559. if (intra) {
  560. alpha = 0.0f;
  561. beta = 1.0f - (4915.0f/32768.0f);
  562. } else {
  563. alpha = ff_celt_alpha_coef[f->size];
  564. beta = ff_celt_beta_coef[f->size];
  565. }
  566. for (i = f->start_band; i < f->end_band; i++) {
  567. for (ch = 0; ch < f->channels; ch++) {
  568. CeltBlock *block = &f->block[ch];
  569. const int left = f->framebits - opus_rc_tell(rc);
  570. const float last = FFMAX(-9.0f, last_energy[ch][i]);
  571. float diff = block->energy[i] - prev[ch] - last*alpha;
  572. int q_en = lrintf(diff);
  573. if (left >= 15) {
  574. ff_opus_rc_enc_laplace(rc, &q_en, pmod[i << 1] << 7, pmod[(i << 1) + 1] << 6);
  575. } else if (left >= 2) {
  576. q_en = av_clip(q_en, -1, 1);
  577. ff_opus_rc_enc_cdf(rc, 2*q_en + 3*(q_en < 0), ff_celt_model_energy_small);
  578. } else if (left >= 1) {
  579. q_en = av_clip(q_en, -1, 0);
  580. ff_opus_rc_enc_log(rc, (q_en & 1), 1);
  581. } else q_en = -1;
  582. block->error_energy[i] = q_en - diff;
  583. prev[ch] += beta * q_en;
  584. }
  585. }
  586. }
  587. static void celt_quant_coarse(CeltFrame *f, OpusRangeCoder *rc,
  588. float last_energy[][CELT_MAX_BANDS])
  589. {
  590. uint32_t inter, intra;
  591. OPUS_RC_CHECKPOINT_SPAWN(rc);
  592. exp_quant_coarse(rc, f, last_energy, 1);
  593. intra = OPUS_RC_CHECKPOINT_BITS(rc);
  594. OPUS_RC_CHECKPOINT_ROLLBACK(rc);
  595. exp_quant_coarse(rc, f, last_energy, 0);
  596. inter = OPUS_RC_CHECKPOINT_BITS(rc);
  597. if (inter > intra) { /* Unlikely */
  598. OPUS_RC_CHECKPOINT_ROLLBACK(rc);
  599. exp_quant_coarse(rc, f, last_energy, 1);
  600. }
  601. }
  602. static void celt_quant_fine(CeltFrame *f, OpusRangeCoder *rc)
  603. {
  604. int i, ch;
  605. for (i = f->start_band; i < f->end_band; i++) {
  606. if (!f->fine_bits[i])
  607. continue;
  608. for (ch = 0; ch < f->channels; ch++) {
  609. CeltBlock *block = &f->block[ch];
  610. int quant, lim = (1 << f->fine_bits[i]);
  611. float offset, diff = 0.5f - block->error_energy[i];
  612. quant = av_clip(floor(diff*lim), 0, lim - 1);
  613. ff_opus_rc_put_raw(rc, quant, f->fine_bits[i]);
  614. offset = 0.5f - ((quant + 0.5f) * (1 << (14 - f->fine_bits[i])) / 16384.0f);
  615. block->error_energy[i] -= offset;
  616. }
  617. }
  618. }
  619. static void celt_quant_final(OpusEncContext *s, OpusRangeCoder *rc, CeltFrame *f)
  620. {
  621. int i, ch, priority;
  622. for (priority = 0; priority < 2; priority++) {
  623. for (i = f->start_band; i < f->end_band && (f->framebits - opus_rc_tell(rc)) >= f->channels; i++) {
  624. if (f->fine_priority[i] != priority || f->fine_bits[i] >= CELT_MAX_FINE_BITS)
  625. continue;
  626. for (ch = 0; ch < f->channels; ch++) {
  627. CeltBlock *block = &f->block[ch];
  628. const float err = block->error_energy[i];
  629. const float offset = 0.5f * (1 << (14 - f->fine_bits[i] - 1)) / 16384.0f;
  630. const int sign = FFABS(err + offset) < FFABS(err - offset);
  631. ff_opus_rc_put_raw(rc, sign, 1);
  632. block->error_energy[i] -= offset*(1 - 2*sign);
  633. }
  634. }
  635. }
  636. }
  637. static void celt_encode_frame(OpusEncContext *s, OpusRangeCoder *rc,
  638. CeltFrame *f, int index)
  639. {
  640. int i, ch;
  641. ff_opus_rc_enc_init(rc);
  642. ff_opus_psy_celt_frame_init(&s->psyctx, f, index);
  643. celt_frame_setup_input(s, f);
  644. if (f->silence) {
  645. if (f->framebits >= 16)
  646. ff_opus_rc_enc_log(rc, 1, 15); /* Silence (if using explicit singalling) */
  647. for (ch = 0; ch < s->channels; ch++)
  648. memset(s->last_quantized_energy[ch], 0.0f, sizeof(float)*CELT_MAX_BANDS);
  649. return;
  650. }
  651. /* Filters */
  652. celt_apply_preemph_filter(s, f);
  653. if (f->pfilter) {
  654. ff_opus_rc_enc_log(rc, 0, 15);
  655. celt_enc_quant_pfilter(rc, f);
  656. }
  657. /* Transform */
  658. celt_frame_mdct(s, f);
  659. /* Need to handle transient/non-transient switches at any point during analysis */
  660. while (ff_opus_psy_celt_frame_process(&s->psyctx, f, index))
  661. celt_frame_mdct(s, f);
  662. ff_opus_rc_enc_init(rc);
  663. /* Silence */
  664. ff_opus_rc_enc_log(rc, 0, 15);
  665. /* Pitch filter */
  666. if (!f->start_band && opus_rc_tell(rc) + 16 <= f->framebits)
  667. celt_enc_quant_pfilter(rc, f);
  668. /* Transient flag */
  669. if (f->size && opus_rc_tell(rc) + 3 <= f->framebits)
  670. ff_opus_rc_enc_log(rc, f->transient, 3);
  671. /* Main encoding */
  672. celt_quant_coarse (f, rc, s->last_quantized_energy);
  673. celt_enc_tf (f, rc);
  674. ff_celt_enc_bitalloc(f, rc);
  675. celt_quant_fine (f, rc);
  676. ff_celt_quant_bands (f, rc);
  677. /* Anticollapse bit */
  678. if (f->anticollapse_needed)
  679. ff_opus_rc_put_raw(rc, f->anticollapse, 1);
  680. /* Final per-band energy adjustments from leftover bits */
  681. celt_quant_final(s, rc, f);
  682. for (ch = 0; ch < f->channels; ch++) {
  683. CeltBlock *block = &f->block[ch];
  684. for (i = 0; i < CELT_MAX_BANDS; i++)
  685. s->last_quantized_energy[ch][i] = block->energy[i] + block->error_energy[i];
  686. }
  687. }
  688. static inline int write_opuslacing(uint8_t *dst, int v)
  689. {
  690. dst[0] = FFMIN(v - FFALIGN(v - 255, 4), v);
  691. dst[1] = v - dst[0] >> 2;
  692. return 1 + (v >= 252);
  693. }
  694. static void opus_packet_assembler(OpusEncContext *s, AVPacket *avpkt)
  695. {
  696. int i, offset, fsize_needed;
  697. /* Write toc */
  698. opus_gen_toc(s, avpkt->data, &offset, &fsize_needed);
  699. /* Frame sizes if needed */
  700. if (fsize_needed) {
  701. for (i = 0; i < s->packet.frames - 1; i++) {
  702. offset += write_opuslacing(avpkt->data + offset,
  703. s->frame[i].framebits >> 3);
  704. }
  705. }
  706. /* Packets */
  707. for (i = 0; i < s->packet.frames; i++) {
  708. ff_opus_rc_enc_end(&s->rc[i], avpkt->data + offset,
  709. s->frame[i].framebits >> 3);
  710. offset += s->frame[i].framebits >> 3;
  711. }
  712. avpkt->size = offset;
  713. }
  714. /* Used as overlap for the first frame and padding for the last encoded packet */
  715. static AVFrame *spawn_empty_frame(OpusEncContext *s)
  716. {
  717. int i;
  718. AVFrame *f = av_frame_alloc();
  719. if (!f)
  720. return NULL;
  721. f->format = s->avctx->sample_fmt;
  722. f->nb_samples = s->avctx->frame_size;
  723. f->channel_layout = s->avctx->channel_layout;
  724. if (av_frame_get_buffer(f, 4)) {
  725. av_frame_free(&f);
  726. return NULL;
  727. }
  728. for (i = 0; i < s->channels; i++) {
  729. size_t bps = av_get_bytes_per_sample(f->format);
  730. memset(f->extended_data[i], 0, bps*f->nb_samples);
  731. }
  732. return f;
  733. }
  734. static int opus_encode_frame(AVCodecContext *avctx, AVPacket *avpkt,
  735. const AVFrame *frame, int *got_packet_ptr)
  736. {
  737. OpusEncContext *s = avctx->priv_data;
  738. int i, ret, frame_size, alloc_size = 0;
  739. if (frame) { /* Add new frame to queue */
  740. if ((ret = ff_af_queue_add(&s->afq, frame)) < 0)
  741. return ret;
  742. ff_bufqueue_add(avctx, &s->bufqueue, av_frame_clone(frame));
  743. } else {
  744. ff_opus_psy_signal_eof(&s->psyctx);
  745. if (!s->afq.remaining_samples)
  746. return 0; /* We've been flushed and there's nothing left to encode */
  747. }
  748. /* Run the psychoacoustic system */
  749. if (ff_opus_psy_process(&s->psyctx, &s->packet))
  750. return 0;
  751. frame_size = OPUS_BLOCK_SIZE(s->packet.framesize);
  752. if (!frame) {
  753. /* This can go negative, that's not a problem, we only pad if positive */
  754. int pad_empty = s->packet.frames*(frame_size/s->avctx->frame_size) - s->bufqueue.available + 1;
  755. /* Pad with empty 2.5 ms frames to whatever framesize was decided,
  756. * this should only happen at the very last flush frame. The frames
  757. * allocated here will be freed (because they have no other references)
  758. * after they get used by celt_frame_setup_input() */
  759. for (i = 0; i < pad_empty; i++) {
  760. AVFrame *empty = spawn_empty_frame(s);
  761. if (!empty)
  762. return AVERROR(ENOMEM);
  763. ff_bufqueue_add(avctx, &s->bufqueue, empty);
  764. }
  765. }
  766. for (i = 0; i < s->packet.frames; i++) {
  767. celt_encode_frame(s, &s->rc[i], &s->frame[i], i);
  768. alloc_size += s->frame[i].framebits >> 3;
  769. }
  770. /* Worst case toc + the frame lengths if needed */
  771. alloc_size += 2 + s->packet.frames*2;
  772. if ((ret = ff_alloc_packet2(avctx, avpkt, alloc_size, 0)) < 0)
  773. return ret;
  774. /* Assemble packet */
  775. opus_packet_assembler(s, avpkt);
  776. /* Update the psychoacoustic system */
  777. ff_opus_psy_postencode_update(&s->psyctx, s->frame, s->rc);
  778. /* Remove samples from queue and skip if needed */
  779. ff_af_queue_remove(&s->afq, s->packet.frames*frame_size, &avpkt->pts, &avpkt->duration);
  780. if (s->packet.frames*frame_size > avpkt->duration) {
  781. uint8_t *side = av_packet_new_side_data(avpkt, AV_PKT_DATA_SKIP_SAMPLES, 10);
  782. if (!side)
  783. return AVERROR(ENOMEM);
  784. AV_WL32(&side[4], s->packet.frames*frame_size - avpkt->duration + 120);
  785. }
  786. *got_packet_ptr = 1;
  787. return 0;
  788. }
  789. static av_cold int opus_encode_end(AVCodecContext *avctx)
  790. {
  791. int i;
  792. OpusEncContext *s = avctx->priv_data;
  793. for (i = 0; i < CELT_BLOCK_NB; i++)
  794. ff_mdct15_uninit(&s->mdct[i]);
  795. ff_celt_pvq_uninit(&s->pvq);
  796. av_freep(&s->dsp);
  797. av_freep(&s->frame);
  798. av_freep(&s->rc);
  799. ff_af_queue_close(&s->afq);
  800. ff_opus_psy_end(&s->psyctx);
  801. ff_bufqueue_discard_all(&s->bufqueue);
  802. av_freep(&avctx->extradata);
  803. return 0;
  804. }
  805. static av_cold int opus_encode_init(AVCodecContext *avctx)
  806. {
  807. int i, ch, ret, max_frames;
  808. OpusEncContext *s = avctx->priv_data;
  809. s->avctx = avctx;
  810. s->channels = avctx->channels;
  811. /* Opus allows us to change the framesize on each packet (and each packet may
  812. * have multiple frames in it) but we can't change the codec's frame size on
  813. * runtime, so fix it to the lowest possible number of samples and use a queue
  814. * to accumulate AVFrames until we have enough to encode whatever the encoder
  815. * decides is the best */
  816. avctx->frame_size = 120;
  817. /* Initial padding will change if SILK is ever supported */
  818. avctx->initial_padding = 120;
  819. if (!avctx->bit_rate) {
  820. int coupled = ff_opus_default_coupled_streams[s->channels - 1];
  821. avctx->bit_rate = coupled*(96000) + (s->channels - coupled*2)*(48000);
  822. } else if (avctx->bit_rate < 6000 || avctx->bit_rate > 255000 * s->channels) {
  823. int64_t clipped_rate = av_clip(avctx->bit_rate, 6000, 255000 * s->channels);
  824. av_log(avctx, AV_LOG_ERROR, "Unsupported bitrate %"PRId64" kbps, clipping to %"PRId64" kbps\n",
  825. avctx->bit_rate/1000, clipped_rate/1000);
  826. avctx->bit_rate = clipped_rate;
  827. }
  828. /* Extradata */
  829. avctx->extradata_size = 19;
  830. avctx->extradata = av_malloc(avctx->extradata_size + AV_INPUT_BUFFER_PADDING_SIZE);
  831. if (!avctx->extradata)
  832. return AVERROR(ENOMEM);
  833. opus_write_extradata(avctx);
  834. ff_af_queue_init(avctx, &s->afq);
  835. if ((ret = ff_celt_pvq_init(&s->pvq, 1)) < 0)
  836. return ret;
  837. if (!(s->dsp = avpriv_float_dsp_alloc(avctx->flags & AV_CODEC_FLAG_BITEXACT)))
  838. return AVERROR(ENOMEM);
  839. /* I have no idea why a base scaling factor of 68 works, could be the twiddles */
  840. for (i = 0; i < CELT_BLOCK_NB; i++)
  841. if ((ret = ff_mdct15_init(&s->mdct[i], 0, i + 3, 68 << (CELT_BLOCK_NB - 1 - i))))
  842. return AVERROR(ENOMEM);
  843. /* Zero out previous energy (matters for inter first frame) */
  844. for (ch = 0; ch < s->channels; ch++)
  845. memset(s->last_quantized_energy[ch], 0.0f, sizeof(float)*CELT_MAX_BANDS);
  846. /* Allocate an empty frame to use as overlap for the first frame of audio */
  847. ff_bufqueue_add(avctx, &s->bufqueue, spawn_empty_frame(s));
  848. if (!ff_bufqueue_peek(&s->bufqueue, 0))
  849. return AVERROR(ENOMEM);
  850. if ((ret = ff_opus_psy_init(&s->psyctx, s->avctx, &s->bufqueue, &s->options)))
  851. return ret;
  852. /* Frame structs and range coder buffers */
  853. max_frames = ceilf(FFMIN(s->options.max_delay_ms, 120.0f)/2.5f);
  854. s->frame = av_malloc(max_frames*sizeof(CeltFrame));
  855. if (!s->frame)
  856. return AVERROR(ENOMEM);
  857. s->rc = av_malloc(max_frames*sizeof(OpusRangeCoder));
  858. if (!s->rc)
  859. return AVERROR(ENOMEM);
  860. for (i = 0; i < max_frames; i++) {
  861. s->frame[i].dsp = s->dsp;
  862. s->frame[i].avctx = s->avctx;
  863. s->frame[i].seed = 0;
  864. s->frame[i].pvq = s->pvq;
  865. s->frame[i].apply_phase_inv = 1;
  866. s->frame[i].block[0].emph_coeff = s->frame[i].block[1].emph_coeff = 0.0f;
  867. }
  868. return 0;
  869. }
  870. #define OPUSENC_FLAGS AV_OPT_FLAG_ENCODING_PARAM | AV_OPT_FLAG_AUDIO_PARAM
  871. static const AVOption opusenc_options[] = {
  872. { "opus_delay", "Maximum delay in milliseconds", offsetof(OpusEncContext, options.max_delay_ms), AV_OPT_TYPE_FLOAT, { .dbl = OPUS_MAX_LOOKAHEAD }, 2.5f, OPUS_MAX_LOOKAHEAD, OPUSENC_FLAGS, "max_delay_ms" },
  873. { NULL },
  874. };
  875. static const AVClass opusenc_class = {
  876. .class_name = "Opus encoder",
  877. .item_name = av_default_item_name,
  878. .option = opusenc_options,
  879. .version = LIBAVUTIL_VERSION_INT,
  880. };
  881. static const AVCodecDefault opusenc_defaults[] = {
  882. { "b", "0" },
  883. { "compression_level", "10" },
  884. { NULL },
  885. };
  886. AVCodec ff_opus_encoder = {
  887. .name = "opus",
  888. .long_name = NULL_IF_CONFIG_SMALL("Opus"),
  889. .type = AVMEDIA_TYPE_AUDIO,
  890. .id = AV_CODEC_ID_OPUS,
  891. .defaults = opusenc_defaults,
  892. .priv_class = &opusenc_class,
  893. .priv_data_size = sizeof(OpusEncContext),
  894. .init = opus_encode_init,
  895. .encode2 = opus_encode_frame,
  896. .close = opus_encode_end,
  897. .caps_internal = FF_CODEC_CAP_INIT_THREADSAFE | FF_CODEC_CAP_INIT_CLEANUP,
  898. .capabilities = AV_CODEC_CAP_EXPERIMENTAL | AV_CODEC_CAP_SMALL_LAST_FRAME | AV_CODEC_CAP_DELAY,
  899. .supported_samplerates = (const int []){ 48000, 0 },
  900. .channel_layouts = (const uint64_t []){ AV_CH_LAYOUT_MONO,
  901. AV_CH_LAYOUT_STEREO, 0 },
  902. .sample_fmts = (const enum AVSampleFormat[]){ AV_SAMPLE_FMT_FLTP,
  903. AV_SAMPLE_FMT_NONE },
  904. };