You can not select more than 25 topics Topics must start with a letter or number, can include dashes ('-') and can be up to 35 characters long.

1146 lines
41KB

  1. /*
  2. * Opus encoder
  3. * Copyright (c) 2017 Rostislav Pehlivanov <atomnuker@gmail.com>
  4. *
  5. * This file is part of FFmpeg.
  6. *
  7. * FFmpeg is free software; you can redistribute it and/or
  8. * modify it under the terms of the GNU Lesser General Public
  9. * License as published by the Free Software Foundation; either
  10. * version 2.1 of the License, or (at your option) any later version.
  11. *
  12. * FFmpeg is distributed in the hope that it will be useful,
  13. * but WITHOUT ANY WARRANTY; without even the implied warranty of
  14. * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
  15. * Lesser General Public License for more details.
  16. *
  17. * You should have received a copy of the GNU Lesser General Public
  18. * License along with FFmpeg; if not, write to the Free Software
  19. * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
  20. */
  21. #include "opus_celt.h"
  22. #include "opus_pvq.h"
  23. #include "opustab.h"
  24. #include "libavutil/float_dsp.h"
  25. #include "libavutil/opt.h"
  26. #include "internal.h"
  27. #include "bytestream.h"
  28. #include "audio_frame_queue.h"
  29. /* Determines the maximum delay the psychoacoustic system will use for lookahead */
  30. #define FF_BUFQUEUE_SIZE 145
  31. #include "libavfilter/bufferqueue.h"
  32. #define OPUS_MAX_LOOKAHEAD ((FF_BUFQUEUE_SIZE - 1)*2.5f)
  33. #define OPUS_MAX_CHANNELS 2
  34. /* 120 ms / 2.5 ms = 48 frames (extremely improbable, but the encoder'll work) */
  35. #define OPUS_MAX_FRAMES_PER_PACKET 48
  36. #define OPUS_BLOCK_SIZE(x) (2 * 15 * (1 << ((x) + 2)))
  37. #define OPUS_SAMPLES_TO_BLOCK_SIZE(x) (ff_log2((x) / (2 * 15)) - 2)
  38. typedef struct OpusEncOptions {
  39. float max_delay_ms;
  40. } OpusEncOptions;
  41. typedef struct OpusEncContext {
  42. AVClass *av_class;
  43. OpusEncOptions options;
  44. AVCodecContext *avctx;
  45. AudioFrameQueue afq;
  46. AVFloatDSPContext *dsp;
  47. MDCT15Context *mdct[CELT_BLOCK_NB];
  48. CeltPVQ *pvq;
  49. struct FFBufQueue bufqueue;
  50. enum OpusMode mode;
  51. enum OpusBandwidth bandwidth;
  52. int pkt_framesize;
  53. int pkt_frames;
  54. int channels;
  55. CeltFrame *frame;
  56. OpusRangeCoder *rc;
  57. /* Actual energy the decoder will have */
  58. float last_quantized_energy[OPUS_MAX_CHANNELS][CELT_MAX_BANDS];
  59. DECLARE_ALIGNED(32, float, scratch)[2048];
  60. } OpusEncContext;
  61. static void opus_write_extradata(AVCodecContext *avctx)
  62. {
  63. uint8_t *bs = avctx->extradata;
  64. bytestream_put_buffer(&bs, "OpusHead", 8);
  65. bytestream_put_byte (&bs, 0x1);
  66. bytestream_put_byte (&bs, avctx->channels);
  67. bytestream_put_le16 (&bs, avctx->initial_padding);
  68. bytestream_put_le32 (&bs, avctx->sample_rate);
  69. bytestream_put_le16 (&bs, 0x0);
  70. bytestream_put_byte (&bs, 0x0); /* Default layout */
  71. }
  72. static int opus_gen_toc(OpusEncContext *s, uint8_t *toc, int *size, int *fsize_needed)
  73. {
  74. int i, tmp = 0x0, extended_toc = 0;
  75. static const int toc_cfg[][OPUS_MODE_NB][OPUS_BANDWITH_NB] = {
  76. /* Silk Hybrid Celt Layer */
  77. /* NB MB WB SWB FB NB MB WB SWB FB NB MB WB SWB FB Bandwidth */
  78. { { 0, 0, 0, 0, 0 }, { 0, 0, 0, 0, 0 }, { 17, 0, 21, 25, 29 } }, /* 2.5 ms */
  79. { { 0, 0, 0, 0, 0 }, { 0, 0, 0, 0, 0 }, { 18, 0, 22, 26, 30 } }, /* 5 ms */
  80. { { 1, 5, 9, 0, 0 }, { 0, 0, 0, 13, 15 }, { 19, 0, 23, 27, 31 } }, /* 10 ms */
  81. { { 2, 6, 10, 0, 0 }, { 0, 0, 0, 14, 16 }, { 20, 0, 24, 28, 32 } }, /* 20 ms */
  82. { { 3, 7, 11, 0, 0 }, { 0, 0, 0, 0, 0 }, { 0, 0, 0, 0, 0 } }, /* 40 ms */
  83. { { 4, 8, 12, 0, 0 }, { 0, 0, 0, 0, 0 }, { 0, 0, 0, 0, 0 } }, /* 60 ms */
  84. };
  85. int cfg = toc_cfg[s->pkt_framesize][s->mode][s->bandwidth];
  86. *fsize_needed = 0;
  87. if (!cfg)
  88. return 1;
  89. if (s->pkt_frames == 2) { /* 2 packets */
  90. if (s->frame[0].framebits == s->frame[1].framebits) { /* same size */
  91. tmp = 0x1;
  92. } else { /* different size */
  93. tmp = 0x2;
  94. *fsize_needed = 1; /* put frame sizes in the packet */
  95. }
  96. } else if (s->pkt_frames > 2) {
  97. tmp = 0x3;
  98. extended_toc = 1;
  99. }
  100. tmp |= (s->channels > 1) << 2; /* Stereo or mono */
  101. tmp |= (cfg - 1) << 3; /* codec configuration */
  102. *toc++ = tmp;
  103. if (extended_toc) {
  104. for (i = 0; i < (s->pkt_frames - 1); i++)
  105. *fsize_needed |= (s->frame[i].framebits != s->frame[i + 1].framebits);
  106. tmp = (*fsize_needed) << 7; /* vbr flag */
  107. tmp |= s->pkt_frames; /* frame number - can be 0 as well */
  108. *toc++ = tmp;
  109. }
  110. *size = 1 + extended_toc;
  111. return 0;
  112. }
  113. static void celt_frame_setup_input(OpusEncContext *s, CeltFrame *f)
  114. {
  115. int sf, ch;
  116. AVFrame *cur = NULL;
  117. const int subframesize = s->avctx->frame_size;
  118. int subframes = OPUS_BLOCK_SIZE(s->pkt_framesize) / subframesize;
  119. cur = ff_bufqueue_get(&s->bufqueue);
  120. for (ch = 0; ch < f->channels; ch++) {
  121. CeltBlock *b = &f->block[ch];
  122. const void *input = cur->extended_data[ch];
  123. size_t bps = av_get_bytes_per_sample(cur->format);
  124. memcpy(b->overlap, input, bps*cur->nb_samples);
  125. }
  126. av_frame_free(&cur);
  127. for (sf = 0; sf < subframes; sf++) {
  128. if (sf != (subframes - 1))
  129. cur = ff_bufqueue_get(&s->bufqueue);
  130. else
  131. cur = ff_bufqueue_peek(&s->bufqueue, 0);
  132. for (ch = 0; ch < f->channels; ch++) {
  133. CeltBlock *b = &f->block[ch];
  134. const void *input = cur->extended_data[ch];
  135. const size_t bps = av_get_bytes_per_sample(cur->format);
  136. const size_t left = (subframesize - cur->nb_samples)*bps;
  137. const size_t len = FFMIN(subframesize, cur->nb_samples)*bps;
  138. memcpy(&b->samples[sf*subframesize], input, len);
  139. memset(&b->samples[cur->nb_samples], 0, left);
  140. }
  141. /* Last frame isn't popped off and freed yet - we need it for overlap */
  142. if (sf != (subframes - 1))
  143. av_frame_free(&cur);
  144. }
  145. }
  146. /* Apply the pre emphasis filter */
  147. static void celt_apply_preemph_filter(OpusEncContext *s, CeltFrame *f)
  148. {
  149. int i, sf, ch;
  150. const int subframesize = s->avctx->frame_size;
  151. const int subframes = OPUS_BLOCK_SIZE(s->pkt_framesize) / subframesize;
  152. /* Filter overlap */
  153. for (ch = 0; ch < f->channels; ch++) {
  154. CeltBlock *b = &f->block[ch];
  155. float m = b->emph_coeff;
  156. for (i = 0; i < CELT_OVERLAP; i++) {
  157. float sample = b->overlap[i];
  158. b->overlap[i] = sample - m;
  159. m = sample * CELT_EMPH_COEFF;
  160. }
  161. b->emph_coeff = m;
  162. }
  163. /* Filter the samples but do not update the last subframe's coeff - overlap ^^^ */
  164. for (sf = 0; sf < subframes; sf++) {
  165. for (ch = 0; ch < f->channels; ch++) {
  166. CeltBlock *b = &f->block[ch];
  167. float m = b->emph_coeff;
  168. for (i = 0; i < subframesize; i++) {
  169. float sample = b->samples[sf*subframesize + i];
  170. b->samples[sf*subframesize + i] = sample - m;
  171. m = sample * CELT_EMPH_COEFF;
  172. }
  173. if (sf != (subframes - 1))
  174. b->emph_coeff = m;
  175. }
  176. }
  177. }
  178. /* Create the window and do the mdct */
  179. static void celt_frame_mdct(OpusEncContext *s, CeltFrame *f)
  180. {
  181. int i, t, ch;
  182. float *win = s->scratch;
  183. /* I think I can use s->dsp->vector_fmul_window for transients at least */
  184. if (f->transient) {
  185. for (ch = 0; ch < f->channels; ch++) {
  186. CeltBlock *b = &f->block[ch];
  187. float *src1 = b->overlap;
  188. for (t = 0; t < f->blocks; t++) {
  189. float *src2 = &b->samples[CELT_OVERLAP*t];
  190. for (i = 0; i < CELT_OVERLAP; i++) {
  191. win[ i] = src1[i]*ff_celt_window[i];
  192. win[CELT_OVERLAP + i] = src2[i]*ff_celt_window[CELT_OVERLAP - i - 1];
  193. }
  194. src1 = src2;
  195. s->mdct[0]->mdct(s->mdct[0], b->coeffs + t, win, f->blocks);
  196. }
  197. }
  198. } else {
  199. int blk_len = OPUS_BLOCK_SIZE(f->size), wlen = OPUS_BLOCK_SIZE(f->size + 1);
  200. int rwin = blk_len - CELT_OVERLAP, lap_dst = (wlen - blk_len - CELT_OVERLAP) >> 1;
  201. for (ch = 0; ch < f->channels; ch++) {
  202. CeltBlock *b = &f->block[ch];
  203. memset(win, 0, wlen*sizeof(float));
  204. memcpy(&win[lap_dst + CELT_OVERLAP], b->samples, rwin*sizeof(float));
  205. /* Alignment fucks me over */
  206. //s->dsp->vector_fmul(&dst[lap_dst], b->overlap, ff_celt_window, CELT_OVERLAP);
  207. //s->dsp->vector_fmul_reverse(&dst[lap_dst + blk_len - CELT_OVERLAP], b->samples, ff_celt_window, CELT_OVERLAP);
  208. for (i = 0; i < CELT_OVERLAP; i++) {
  209. win[lap_dst + i] = b->overlap[i] *ff_celt_window[i];
  210. win[lap_dst + blk_len + i] = b->samples[rwin + i]*ff_celt_window[CELT_OVERLAP - i - 1];
  211. }
  212. s->mdct[f->size]->mdct(s->mdct[f->size], b->coeffs, win, 1);
  213. }
  214. }
  215. }
  216. /* Fills the bands and normalizes them */
  217. static void celt_frame_map_norm_bands(OpusEncContext *s, CeltFrame *f)
  218. {
  219. int i, j, ch;
  220. for (ch = 0; ch < f->channels; ch++) {
  221. CeltBlock *block = &f->block[ch];
  222. for (i = 0; i < CELT_MAX_BANDS; i++) {
  223. float ener = 0.0f;
  224. int band_offset = ff_celt_freq_bands[i] << f->size;
  225. int band_size = ff_celt_freq_range[i] << f->size;
  226. float *coeffs = &block->coeffs[band_offset];
  227. for (j = 0; j < band_size; j++)
  228. ener += coeffs[j]*coeffs[j];
  229. block->lin_energy[i] = sqrtf(ener) + FLT_EPSILON;
  230. ener = 1.0f/block->lin_energy[i];
  231. for (j = 0; j < band_size; j++)
  232. coeffs[j] *= ener;
  233. block->energy[i] = log2f(block->lin_energy[i]) - ff_celt_mean_energy[i];
  234. /* CELT_ENERGY_SILENCE is what the decoder uses and its not -infinity */
  235. block->energy[i] = FFMAX(block->energy[i], CELT_ENERGY_SILENCE);
  236. }
  237. }
  238. }
  239. static void celt_enc_tf(OpusRangeCoder *rc, CeltFrame *f)
  240. {
  241. int i, tf_select = 0, diff = 0, tf_changed = 0, tf_select_needed;
  242. int bits = f->transient ? 2 : 4;
  243. tf_select_needed = ((f->size && (opus_rc_tell(rc) + bits + 1) <= f->framebits));
  244. for (i = f->start_band; i < f->end_band; i++) {
  245. if ((opus_rc_tell(rc) + bits + tf_select_needed) <= f->framebits) {
  246. const int tbit = (diff ^ 1) == f->tf_change[i];
  247. ff_opus_rc_enc_log(rc, tbit, bits);
  248. diff ^= tbit;
  249. tf_changed |= diff;
  250. }
  251. bits = f->transient ? 4 : 5;
  252. }
  253. if (tf_select_needed && ff_celt_tf_select[f->size][f->transient][0][tf_changed] !=
  254. ff_celt_tf_select[f->size][f->transient][1][tf_changed]) {
  255. ff_opus_rc_enc_log(rc, f->tf_select, 1);
  256. tf_select = f->tf_select;
  257. }
  258. for (i = f->start_band; i < f->end_band; i++)
  259. f->tf_change[i] = ff_celt_tf_select[f->size][f->transient][tf_select][f->tf_change[i]];
  260. }
  261. static void ff_celt_enc_bitalloc(OpusRangeCoder *rc, CeltFrame *f)
  262. {
  263. int i, j, low, high, total, done, bandbits, remaining, tbits_8ths;
  264. int skip_startband = f->start_band;
  265. int skip_bit = 0;
  266. int intensitystereo_bit = 0;
  267. int dualstereo_bit = 0;
  268. int dynalloc = 6;
  269. int extrabits = 0;
  270. int *cap = f->caps;
  271. int boost[CELT_MAX_BANDS];
  272. int trim_offset[CELT_MAX_BANDS];
  273. int threshold[CELT_MAX_BANDS];
  274. int bits1[CELT_MAX_BANDS];
  275. int bits2[CELT_MAX_BANDS];
  276. /* Tell the spread to the decoder */
  277. if (opus_rc_tell(rc) + 4 <= f->framebits)
  278. ff_opus_rc_enc_cdf(rc, f->spread, ff_celt_model_spread);
  279. /* Generate static allocation caps */
  280. for (i = 0; i < CELT_MAX_BANDS; i++) {
  281. cap[i] = (ff_celt_static_caps[f->size][f->channels - 1][i] + 64)
  282. * ff_celt_freq_range[i] << (f->channels - 1) << f->size >> 2;
  283. }
  284. /* Band boosts */
  285. tbits_8ths = f->framebits << 3;
  286. for (i = f->start_band; i < f->end_band; i++) {
  287. int quanta, b_dynalloc, boost_amount = f->alloc_boost[i];
  288. boost[i] = 0;
  289. quanta = ff_celt_freq_range[i] << (f->channels - 1) << f->size;
  290. quanta = FFMIN(quanta << 3, FFMAX(6 << 3, quanta));
  291. b_dynalloc = dynalloc;
  292. while (opus_rc_tell_frac(rc) + (b_dynalloc << 3) < tbits_8ths && boost[i] < cap[i]) {
  293. int is_boost = boost_amount--;
  294. ff_opus_rc_enc_log(rc, is_boost, b_dynalloc);
  295. if (!is_boost)
  296. break;
  297. boost[i] += quanta;
  298. tbits_8ths -= quanta;
  299. b_dynalloc = 1;
  300. }
  301. if (boost[i])
  302. dynalloc = FFMAX(2, dynalloc - 1);
  303. }
  304. /* Put allocation trim */
  305. if (opus_rc_tell_frac(rc) + (6 << 3) <= tbits_8ths)
  306. ff_opus_rc_enc_cdf(rc, f->alloc_trim, ff_celt_model_alloc_trim);
  307. /* Anti-collapse bit reservation */
  308. tbits_8ths = (f->framebits << 3) - opus_rc_tell_frac(rc) - 1;
  309. f->anticollapse_needed = 0;
  310. if (f->transient && f->size >= 2 && tbits_8ths >= ((f->size + 2) << 3))
  311. f->anticollapse_needed = 1 << 3;
  312. tbits_8ths -= f->anticollapse_needed;
  313. /* Band skip bit reservation */
  314. if (tbits_8ths >= 1 << 3)
  315. skip_bit = 1 << 3;
  316. tbits_8ths -= skip_bit;
  317. /* Intensity/dual stereo bit reservation */
  318. if (f->channels == 2) {
  319. intensitystereo_bit = ff_celt_log2_frac[f->end_band - f->start_band];
  320. if (intensitystereo_bit <= tbits_8ths) {
  321. tbits_8ths -= intensitystereo_bit;
  322. if (tbits_8ths >= 1 << 3) {
  323. dualstereo_bit = 1 << 3;
  324. tbits_8ths -= 1 << 3;
  325. }
  326. } else {
  327. intensitystereo_bit = 0;
  328. }
  329. }
  330. /* Trim offsets */
  331. for (i = f->start_band; i < f->end_band; i++) {
  332. int trim = f->alloc_trim - 5 - f->size;
  333. int band = ff_celt_freq_range[i] * (f->end_band - i - 1);
  334. int duration = f->size + 3;
  335. int scale = duration + f->channels - 1;
  336. /* PVQ minimum allocation threshold, below this value the band is
  337. * skipped */
  338. threshold[i] = FFMAX(3 * ff_celt_freq_range[i] << duration >> 4,
  339. f->channels << 3);
  340. trim_offset[i] = trim * (band << scale) >> 6;
  341. if (ff_celt_freq_range[i] << f->size == 1)
  342. trim_offset[i] -= f->channels << 3;
  343. }
  344. /* Bisection */
  345. low = 1;
  346. high = CELT_VECTORS - 1;
  347. while (low <= high) {
  348. int center = (low + high) >> 1;
  349. done = total = 0;
  350. for (i = f->end_band - 1; i >= f->start_band; i--) {
  351. bandbits = ff_celt_freq_range[i] * ff_celt_static_alloc[center][i]
  352. << (f->channels - 1) << f->size >> 2;
  353. if (bandbits)
  354. bandbits = FFMAX(0, bandbits + trim_offset[i]);
  355. bandbits += boost[i];
  356. if (bandbits >= threshold[i] || done) {
  357. done = 1;
  358. total += FFMIN(bandbits, cap[i]);
  359. } else if (bandbits >= f->channels << 3)
  360. total += f->channels << 3;
  361. }
  362. if (total > tbits_8ths)
  363. high = center - 1;
  364. else
  365. low = center + 1;
  366. }
  367. high = low--;
  368. /* Bisection */
  369. for (i = f->start_band; i < f->end_band; i++) {
  370. bits1[i] = ff_celt_freq_range[i] * ff_celt_static_alloc[low][i]
  371. << (f->channels - 1) << f->size >> 2;
  372. bits2[i] = high >= CELT_VECTORS ? cap[i] :
  373. ff_celt_freq_range[i] * ff_celt_static_alloc[high][i]
  374. << (f->channels - 1) << f->size >> 2;
  375. if (bits1[i])
  376. bits1[i] = FFMAX(0, bits1[i] + trim_offset[i]);
  377. if (bits2[i])
  378. bits2[i] = FFMAX(0, bits2[i] + trim_offset[i]);
  379. if (low)
  380. bits1[i] += boost[i];
  381. bits2[i] += boost[i];
  382. if (boost[i])
  383. skip_startband = i;
  384. bits2[i] = FFMAX(0, bits2[i] - bits1[i]);
  385. }
  386. /* Bisection */
  387. low = 0;
  388. high = 1 << CELT_ALLOC_STEPS;
  389. for (i = 0; i < CELT_ALLOC_STEPS; i++) {
  390. int center = (low + high) >> 1;
  391. done = total = 0;
  392. for (j = f->end_band - 1; j >= f->start_band; j--) {
  393. bandbits = bits1[j] + (center * bits2[j] >> CELT_ALLOC_STEPS);
  394. if (bandbits >= threshold[j] || done) {
  395. done = 1;
  396. total += FFMIN(bandbits, cap[j]);
  397. } else if (bandbits >= f->channels << 3)
  398. total += f->channels << 3;
  399. }
  400. if (total > tbits_8ths)
  401. high = center;
  402. else
  403. low = center;
  404. }
  405. /* Bisection */
  406. done = total = 0;
  407. for (i = f->end_band - 1; i >= f->start_band; i--) {
  408. bandbits = bits1[i] + (low * bits2[i] >> CELT_ALLOC_STEPS);
  409. if (bandbits >= threshold[i] || done)
  410. done = 1;
  411. else
  412. bandbits = (bandbits >= f->channels << 3) ?
  413. f->channels << 3 : 0;
  414. bandbits = FFMIN(bandbits, cap[i]);
  415. f->pulses[i] = bandbits;
  416. total += bandbits;
  417. }
  418. /* Band skipping */
  419. for (f->coded_bands = f->end_band; ; f->coded_bands--) {
  420. int allocation;
  421. j = f->coded_bands - 1;
  422. if (j == skip_startband) {
  423. /* all remaining bands are not skipped */
  424. tbits_8ths += skip_bit;
  425. break;
  426. }
  427. /* determine the number of bits available for coding "do not skip" markers */
  428. remaining = tbits_8ths - total;
  429. bandbits = remaining / (ff_celt_freq_bands[j+1] - ff_celt_freq_bands[f->start_band]);
  430. remaining -= bandbits * (ff_celt_freq_bands[j+1] - ff_celt_freq_bands[f->start_band]);
  431. allocation = f->pulses[j] + bandbits * ff_celt_freq_range[j]
  432. + FFMAX(0, remaining - (ff_celt_freq_bands[j] - ff_celt_freq_bands[f->start_band]));
  433. /* a "do not skip" marker is only coded if the allocation is
  434. above the chosen threshold */
  435. if (allocation >= FFMAX(threshold[j], (f->channels + 1) << 3)) {
  436. const int do_not_skip = f->coded_bands <= f->skip_band_floor;
  437. ff_opus_rc_enc_log(rc, do_not_skip, 1);
  438. if (do_not_skip)
  439. break;
  440. total += 1 << 3;
  441. allocation -= 1 << 3;
  442. }
  443. /* the band is skipped, so reclaim its bits */
  444. total -= f->pulses[j];
  445. if (intensitystereo_bit) {
  446. total -= intensitystereo_bit;
  447. intensitystereo_bit = ff_celt_log2_frac[j - f->start_band];
  448. total += intensitystereo_bit;
  449. }
  450. total += f->pulses[j] = (allocation >= f->channels << 3) ? f->channels << 3 : 0;
  451. }
  452. /* Encode stereo flags */
  453. if (intensitystereo_bit) {
  454. f->intensity_stereo = FFMIN(f->intensity_stereo, f->coded_bands);
  455. ff_opus_rc_enc_uint(rc, f->intensity_stereo, f->coded_bands + 1 - f->start_band);
  456. }
  457. if (f->intensity_stereo <= f->start_band)
  458. tbits_8ths += dualstereo_bit; /* no intensity stereo means no dual stereo */
  459. else if (dualstereo_bit)
  460. ff_opus_rc_enc_log(rc, f->dual_stereo, 1);
  461. /* Supply the remaining bits in this frame to lower bands */
  462. remaining = tbits_8ths - total;
  463. bandbits = remaining / (ff_celt_freq_bands[f->coded_bands] - ff_celt_freq_bands[f->start_band]);
  464. remaining -= bandbits * (ff_celt_freq_bands[f->coded_bands] - ff_celt_freq_bands[f->start_band]);
  465. for (i = f->start_band; i < f->coded_bands; i++) {
  466. int bits = FFMIN(remaining, ff_celt_freq_range[i]);
  467. f->pulses[i] += bits + bandbits * ff_celt_freq_range[i];
  468. remaining -= bits;
  469. }
  470. /* Finally determine the allocation */
  471. for (i = f->start_band; i < f->coded_bands; i++) {
  472. int N = ff_celt_freq_range[i] << f->size;
  473. int prev_extra = extrabits;
  474. f->pulses[i] += extrabits;
  475. if (N > 1) {
  476. int dof; // degrees of freedom
  477. int temp; // dof * channels * log(dof)
  478. int offset; // fine energy quantization offset, i.e.
  479. // extra bits assigned over the standard
  480. // totalbits/dof
  481. int fine_bits, max_bits;
  482. extrabits = FFMAX(0, f->pulses[i] - cap[i]);
  483. f->pulses[i] -= extrabits;
  484. /* intensity stereo makes use of an extra degree of freedom */
  485. dof = N * f->channels + (f->channels == 2 && N > 2 && !f->dual_stereo && i < f->intensity_stereo);
  486. temp = dof * (ff_celt_log_freq_range[i] + (f->size << 3));
  487. offset = (temp >> 1) - dof * CELT_FINE_OFFSET;
  488. if (N == 2) /* dof=2 is the only case that doesn't fit the model */
  489. offset += dof << 1;
  490. /* grant an additional bias for the first and second pulses */
  491. if (f->pulses[i] + offset < 2 * (dof << 3))
  492. offset += temp >> 2;
  493. else if (f->pulses[i] + offset < 3 * (dof << 3))
  494. offset += temp >> 3;
  495. fine_bits = (f->pulses[i] + offset + (dof << 2)) / (dof << 3);
  496. max_bits = FFMIN((f->pulses[i] >> 3) >> (f->channels - 1), CELT_MAX_FINE_BITS);
  497. max_bits = FFMAX(max_bits, 0);
  498. f->fine_bits[i] = av_clip(fine_bits, 0, max_bits);
  499. /* if fine_bits was rounded down or capped,
  500. give priority for the final fine energy pass */
  501. f->fine_priority[i] = (f->fine_bits[i] * (dof << 3) >= f->pulses[i] + offset);
  502. /* the remaining bits are assigned to PVQ */
  503. f->pulses[i] -= f->fine_bits[i] << (f->channels - 1) << 3;
  504. } else {
  505. /* all bits go to fine energy except for the sign bit */
  506. extrabits = FFMAX(0, f->pulses[i] - (f->channels << 3));
  507. f->pulses[i] -= extrabits;
  508. f->fine_bits[i] = 0;
  509. f->fine_priority[i] = 1;
  510. }
  511. /* hand back a limited number of extra fine energy bits to this band */
  512. if (extrabits > 0) {
  513. int fineextra = FFMIN(extrabits >> (f->channels + 2),
  514. CELT_MAX_FINE_BITS - f->fine_bits[i]);
  515. f->fine_bits[i] += fineextra;
  516. fineextra <<= f->channels + 2;
  517. f->fine_priority[i] = (fineextra >= extrabits - prev_extra);
  518. extrabits -= fineextra;
  519. }
  520. }
  521. f->remaining = extrabits;
  522. /* skipped bands dedicate all of their bits for fine energy */
  523. for (; i < f->end_band; i++) {
  524. f->fine_bits[i] = f->pulses[i] >> (f->channels - 1) >> 3;
  525. f->pulses[i] = 0;
  526. f->fine_priority[i] = f->fine_bits[i] < 1;
  527. }
  528. }
  529. static void exp_quant_coarse(OpusRangeCoder *rc, CeltFrame *f,
  530. float last_energy[][CELT_MAX_BANDS], int intra)
  531. {
  532. int i, ch;
  533. float alpha, beta, prev[2] = { 0, 0 };
  534. const uint8_t *pmod = ff_celt_coarse_energy_dist[f->size][intra];
  535. /* Inter is really just differential coding */
  536. if (opus_rc_tell(rc) + 3 <= f->framebits)
  537. ff_opus_rc_enc_log(rc, intra, 3);
  538. else
  539. intra = 0;
  540. if (intra) {
  541. alpha = 0.0f;
  542. beta = 1.0f - 4915.0f/32768.0f;
  543. } else {
  544. alpha = ff_celt_alpha_coef[f->size];
  545. beta = 1.0f - ff_celt_beta_coef[f->size];
  546. }
  547. for (i = f->start_band; i < f->end_band; i++) {
  548. for (ch = 0; ch < f->channels; ch++) {
  549. CeltBlock *block = &f->block[ch];
  550. const int left = f->framebits - opus_rc_tell(rc);
  551. const float last = FFMAX(-9.0f, last_energy[ch][i]);
  552. float diff = block->energy[i] - prev[ch] - last*alpha;
  553. int q_en = lrintf(diff);
  554. if (left >= 15) {
  555. ff_opus_rc_enc_laplace(rc, &q_en, pmod[i << 1] << 7, pmod[(i << 1) + 1] << 6);
  556. } else if (left >= 2) {
  557. q_en = av_clip(q_en, -1, 1);
  558. ff_opus_rc_enc_cdf(rc, 2*q_en + 3*(q_en < 0), ff_celt_model_energy_small);
  559. } else if (left >= 1) {
  560. q_en = av_clip(q_en, -1, 0);
  561. ff_opus_rc_enc_log(rc, (q_en & 1), 1);
  562. } else q_en = -1;
  563. block->error_energy[i] = q_en - diff;
  564. prev[ch] += beta * q_en;
  565. }
  566. }
  567. }
  568. static void celt_quant_coarse(OpusRangeCoder *rc, CeltFrame *f,
  569. float last_energy[][CELT_MAX_BANDS])
  570. {
  571. uint32_t inter, intra;
  572. OPUS_RC_CHECKPOINT_SPAWN(rc);
  573. exp_quant_coarse(rc, f, last_energy, 1);
  574. intra = OPUS_RC_CHECKPOINT_BITS(rc);
  575. OPUS_RC_CHECKPOINT_ROLLBACK(rc);
  576. exp_quant_coarse(rc, f, last_energy, 0);
  577. inter = OPUS_RC_CHECKPOINT_BITS(rc);
  578. if (inter > intra) { /* Unlikely */
  579. OPUS_RC_CHECKPOINT_ROLLBACK(rc);
  580. exp_quant_coarse(rc, f, last_energy, 1);
  581. }
  582. }
  583. static void celt_quant_fine(OpusRangeCoder *rc, CeltFrame *f)
  584. {
  585. int i, ch;
  586. for (i = f->start_band; i < f->end_band; i++) {
  587. if (!f->fine_bits[i])
  588. continue;
  589. for (ch = 0; ch < f->channels; ch++) {
  590. CeltBlock *block = &f->block[ch];
  591. int quant, lim = (1 << f->fine_bits[i]);
  592. float offset, diff = 0.5f - block->error_energy[i];
  593. quant = av_clip(floor(diff*lim), 0, lim - 1);
  594. ff_opus_rc_put_raw(rc, quant, f->fine_bits[i]);
  595. offset = 0.5f - ((quant + 0.5f) * (1 << (14 - f->fine_bits[i])) / 16384.0f);
  596. block->error_energy[i] -= offset;
  597. }
  598. }
  599. }
  600. static void celt_quant_final(OpusEncContext *s, OpusRangeCoder *rc, CeltFrame *f)
  601. {
  602. int i, ch, priority;
  603. for (priority = 0; priority < 2; priority++) {
  604. for (i = f->start_band; i < f->end_band && (f->framebits - opus_rc_tell(rc)) >= f->channels; i++) {
  605. if (f->fine_priority[i] != priority || f->fine_bits[i] >= CELT_MAX_FINE_BITS)
  606. continue;
  607. for (ch = 0; ch < f->channels; ch++) {
  608. CeltBlock *block = &f->block[ch];
  609. const float err = block->error_energy[i];
  610. const float offset = 0.5f * (1 << (14 - f->fine_bits[i] - 1)) / 16384.0f;
  611. const int sign = FFABS(err + offset) < FFABS(err - offset);
  612. ff_opus_rc_put_raw(rc, sign, 1);
  613. block->error_energy[i] -= offset*(1 - 2*sign);
  614. }
  615. }
  616. }
  617. }
  618. static void celt_quant_bands(OpusRangeCoder *rc, CeltFrame *f)
  619. {
  620. float lowband_scratch[8 * 22];
  621. float norm[2 * 8 * 100];
  622. int totalbits = (f->framebits << 3) - f->anticollapse_needed;
  623. int update_lowband = 1;
  624. int lowband_offset = 0;
  625. int i, j;
  626. for (i = f->start_band; i < f->end_band; i++) {
  627. uint32_t cm[2] = { (1 << f->blocks) - 1, (1 << f->blocks) - 1 };
  628. int band_offset = ff_celt_freq_bands[i] << f->size;
  629. int band_size = ff_celt_freq_range[i] << f->size;
  630. float *X = f->block[0].coeffs + band_offset;
  631. float *Y = (f->channels == 2) ? f->block[1].coeffs + band_offset : NULL;
  632. int consumed = opus_rc_tell_frac(rc);
  633. float *norm2 = norm + 8 * 100;
  634. int effective_lowband = -1;
  635. int b = 0;
  636. /* Compute how many bits we want to allocate to this band */
  637. if (i != f->start_band)
  638. f->remaining -= consumed;
  639. f->remaining2 = totalbits - consumed - 1;
  640. if (i <= f->coded_bands - 1) {
  641. int curr_balance = f->remaining / FFMIN(3, f->coded_bands-i);
  642. b = av_clip_uintp2(FFMIN(f->remaining2 + 1, f->pulses[i] + curr_balance), 14);
  643. }
  644. if (ff_celt_freq_bands[i] - ff_celt_freq_range[i] >= ff_celt_freq_bands[f->start_band] &&
  645. (update_lowband || lowband_offset == 0))
  646. lowband_offset = i;
  647. /* Get a conservative estimate of the collapse_mask's for the bands we're
  648. going to be folding from. */
  649. if (lowband_offset != 0 && (f->spread != CELT_SPREAD_AGGRESSIVE ||
  650. f->blocks > 1 || f->tf_change[i] < 0)) {
  651. int foldstart, foldend;
  652. /* This ensures we never repeat spectral content within one band */
  653. effective_lowband = FFMAX(ff_celt_freq_bands[f->start_band],
  654. ff_celt_freq_bands[lowband_offset] - ff_celt_freq_range[i]);
  655. foldstart = lowband_offset;
  656. while (ff_celt_freq_bands[--foldstart] > effective_lowband);
  657. foldend = lowband_offset - 1;
  658. while (ff_celt_freq_bands[++foldend] < effective_lowband + ff_celt_freq_range[i]);
  659. cm[0] = cm[1] = 0;
  660. for (j = foldstart; j < foldend; j++) {
  661. cm[0] |= f->block[0].collapse_masks[j];
  662. cm[1] |= f->block[f->channels - 1].collapse_masks[j];
  663. }
  664. }
  665. if (f->dual_stereo && i == f->intensity_stereo) {
  666. /* Switch off dual stereo to do intensity */
  667. f->dual_stereo = 0;
  668. for (j = ff_celt_freq_bands[f->start_band] << f->size; j < band_offset; j++)
  669. norm[j] = (norm[j] + norm2[j]) / 2;
  670. }
  671. if (f->dual_stereo) {
  672. cm[0] = f->pvq->encode_band(f->pvq, f, rc, i, X, NULL, band_size, b / 2, f->blocks,
  673. effective_lowband != -1 ? norm + (effective_lowband << f->size) : NULL, f->size,
  674. norm + band_offset, 0, 1.0f, lowband_scratch, cm[0]);
  675. cm[1] = f->pvq->encode_band(f->pvq, f, rc, i, Y, NULL, band_size, b / 2, f->blocks,
  676. effective_lowband != -1 ? norm2 + (effective_lowband << f->size) : NULL, f->size,
  677. norm2 + band_offset, 0, 1.0f, lowband_scratch, cm[1]);
  678. } else {
  679. cm[0] = f->pvq->encode_band(f->pvq, f, rc, i, X, Y, band_size, b, f->blocks,
  680. effective_lowband != -1 ? norm + (effective_lowband << f->size) : NULL, f->size,
  681. norm + band_offset, 0, 1.0f, lowband_scratch, cm[0] | cm[1]);
  682. cm[1] = cm[0];
  683. }
  684. f->block[0].collapse_masks[i] = (uint8_t)cm[0];
  685. f->block[f->channels - 1].collapse_masks[i] = (uint8_t)cm[1];
  686. f->remaining += f->pulses[i] + consumed;
  687. /* Update the folding position only as long as we have 1 bit/sample depth */
  688. update_lowband = (b > band_size << 3);
  689. }
  690. }
  691. static void celt_encode_frame(OpusEncContext *s, OpusRangeCoder *rc, CeltFrame *f)
  692. {
  693. int i, ch;
  694. celt_frame_setup_input(s, f);
  695. celt_apply_preemph_filter(s, f);
  696. if (f->pfilter) {
  697. /* Not implemented */
  698. }
  699. celt_frame_mdct(s, f);
  700. celt_frame_map_norm_bands(s, f);
  701. ff_opus_rc_enc_log(rc, f->silence, 15);
  702. if (!f->start_band && opus_rc_tell(rc) + 16 <= f->framebits)
  703. ff_opus_rc_enc_log(rc, f->pfilter, 1);
  704. if (f->pfilter) {
  705. /* Not implemented */
  706. }
  707. if (f->size && opus_rc_tell(rc) + 3 <= f->framebits)
  708. ff_opus_rc_enc_log(rc, f->transient, 3);
  709. celt_quant_coarse(rc, f, s->last_quantized_energy);
  710. celt_enc_tf (rc, f);
  711. ff_celt_enc_bitalloc(rc, f);
  712. celt_quant_fine (rc, f);
  713. celt_quant_bands (rc, f);
  714. if (f->anticollapse_needed)
  715. ff_opus_rc_put_raw(rc, f->anticollapse, 1);
  716. celt_quant_final(s, rc, f);
  717. for (ch = 0; ch < f->channels; ch++) {
  718. CeltBlock *block = &f->block[ch];
  719. for (i = 0; i < CELT_MAX_BANDS; i++)
  720. s->last_quantized_energy[ch][i] = block->energy[i] + block->error_energy[i];
  721. }
  722. }
  723. static void ff_opus_psy_process(OpusEncContext *s, int end, int *need_more)
  724. {
  725. int max_delay_samples = (s->options.max_delay_ms*s->avctx->sample_rate)/1000;
  726. int max_bsize = FFMIN(OPUS_SAMPLES_TO_BLOCK_SIZE(max_delay_samples), CELT_BLOCK_960);
  727. s->pkt_frames = 1;
  728. s->pkt_framesize = max_bsize;
  729. s->mode = OPUS_MODE_CELT;
  730. s->bandwidth = OPUS_BANDWIDTH_FULLBAND;
  731. *need_more = s->bufqueue.available*s->avctx->frame_size < (max_delay_samples + CELT_OVERLAP);
  732. /* Don't request more if we start being flushed with NULL frames */
  733. *need_more = !end && *need_more;
  734. }
  735. static void ff_opus_psy_celt_frame_setup(OpusEncContext *s, CeltFrame *f, int index)
  736. {
  737. int frame_size = OPUS_BLOCK_SIZE(s->pkt_framesize);
  738. f->avctx = s->avctx;
  739. f->dsp = s->dsp;
  740. f->pvq = s->pvq;
  741. f->start_band = (s->mode == OPUS_MODE_HYBRID) ? 17 : 0;
  742. f->end_band = ff_celt_band_end[s->bandwidth];
  743. f->channels = s->channels;
  744. f->size = s->pkt_framesize;
  745. /* Decisions */
  746. f->silence = 0;
  747. f->pfilter = 0;
  748. f->transient = 0;
  749. f->tf_select = 0;
  750. f->anticollapse = 0;
  751. f->alloc_trim = 5;
  752. f->skip_band_floor = f->end_band;
  753. f->intensity_stereo = f->end_band;
  754. f->dual_stereo = 0;
  755. f->spread = CELT_SPREAD_NORMAL;
  756. memset(f->tf_change, 0, sizeof(int)*CELT_MAX_BANDS);
  757. memset(f->alloc_boost, 0, sizeof(int)*CELT_MAX_BANDS);
  758. f->blocks = f->transient ? frame_size/CELT_OVERLAP : 1;
  759. f->framebits = FFALIGN(lrintf((double)s->avctx->bit_rate/(s->avctx->sample_rate/frame_size)), 8);
  760. }
  761. static void opus_packet_assembler(OpusEncContext *s, AVPacket *avpkt)
  762. {
  763. int i, offset, fsize_needed;
  764. /* Write toc */
  765. opus_gen_toc(s, avpkt->data, &offset, &fsize_needed);
  766. for (i = 0; i < s->pkt_frames; i++) {
  767. ff_opus_rc_enc_end(&s->rc[i], avpkt->data + offset, s->frame[i].framebits >> 3);
  768. offset += s->frame[i].framebits >> 3;
  769. }
  770. avpkt->size = offset;
  771. }
  772. /* Used as overlap for the first frame and padding for the last encoded packet */
  773. static AVFrame *spawn_empty_frame(OpusEncContext *s)
  774. {
  775. int i;
  776. AVFrame *f = av_frame_alloc();
  777. if (!f)
  778. return NULL;
  779. f->format = s->avctx->sample_fmt;
  780. f->nb_samples = s->avctx->frame_size;
  781. f->channel_layout = s->avctx->channel_layout;
  782. if (av_frame_get_buffer(f, 4)) {
  783. av_frame_free(&f);
  784. return NULL;
  785. }
  786. for (i = 0; i < s->channels; i++) {
  787. size_t bps = av_get_bytes_per_sample(f->format);
  788. memset(f->extended_data[i], 0, bps*f->nb_samples);
  789. }
  790. return f;
  791. }
  792. static int opus_encode_frame(AVCodecContext *avctx, AVPacket *avpkt,
  793. const AVFrame *frame, int *got_packet_ptr)
  794. {
  795. OpusEncContext *s = avctx->priv_data;
  796. int i, ret, frame_size, need_more, alloc_size = 0;
  797. if (frame) { /* Add new frame to queue */
  798. if ((ret = ff_af_queue_add(&s->afq, frame)) < 0)
  799. return ret;
  800. ff_bufqueue_add(avctx, &s->bufqueue, av_frame_clone(frame));
  801. } else {
  802. if (!s->afq.remaining_samples)
  803. return 0; /* We've been flushed and there's nothing left to encode */
  804. }
  805. /* Run the psychoacoustic system */
  806. ff_opus_psy_process(s, !frame, &need_more);
  807. /* Get more samples for lookahead/encoding */
  808. if (need_more)
  809. return 0;
  810. frame_size = OPUS_BLOCK_SIZE(s->pkt_framesize);
  811. if (!frame) {
  812. /* This can go negative, that's not a problem, we only pad if positive */
  813. int pad_empty = s->pkt_frames*(frame_size/s->avctx->frame_size) - s->bufqueue.available + 1;
  814. /* Pad with empty 2.5 ms frames to whatever framesize was decided,
  815. * this should only happen at the very last flush frame. The frames
  816. * allocated here will be freed (because they have no other references)
  817. * after they get used by celt_frame_setup_input() */
  818. for (i = 0; i < pad_empty; i++) {
  819. AVFrame *empty = spawn_empty_frame(s);
  820. if (!empty)
  821. return AVERROR(ENOMEM);
  822. ff_bufqueue_add(avctx, &s->bufqueue, empty);
  823. }
  824. }
  825. for (i = 0; i < s->pkt_frames; i++) {
  826. ff_opus_rc_enc_init(&s->rc[i]);
  827. ff_opus_psy_celt_frame_setup(s, &s->frame[i], i);
  828. celt_encode_frame(s, &s->rc[i], &s->frame[i]);
  829. alloc_size += s->frame[i].framebits >> 3;
  830. }
  831. /* Worst case toc + the frame lengths if needed */
  832. alloc_size += 2 + s->pkt_frames*2;
  833. if ((ret = ff_alloc_packet2(avctx, avpkt, alloc_size, 0)) < 0)
  834. return ret;
  835. /* Assemble packet */
  836. opus_packet_assembler(s, avpkt);
  837. /* Remove samples from queue and skip if needed */
  838. ff_af_queue_remove(&s->afq, s->pkt_frames*frame_size, &avpkt->pts, &avpkt->duration);
  839. if (s->pkt_frames*frame_size > avpkt->duration) {
  840. uint8_t *side = av_packet_new_side_data(avpkt, AV_PKT_DATA_SKIP_SAMPLES, 10);
  841. if (!side)
  842. return AVERROR(ENOMEM);
  843. AV_WL32(&side[4], s->pkt_frames*frame_size - avpkt->duration + 120);
  844. }
  845. *got_packet_ptr = 1;
  846. return 0;
  847. }
  848. static av_cold int opus_encode_end(AVCodecContext *avctx)
  849. {
  850. int i;
  851. OpusEncContext *s = avctx->priv_data;
  852. for (i = 0; i < CELT_BLOCK_NB; i++)
  853. ff_mdct15_uninit(&s->mdct[i]);
  854. ff_celt_pvq_uninit(&s->pvq);
  855. av_freep(&s->dsp);
  856. av_freep(&s->frame);
  857. av_freep(&s->rc);
  858. ff_af_queue_close(&s->afq);
  859. ff_bufqueue_discard_all(&s->bufqueue);
  860. av_freep(&avctx->extradata);
  861. return 0;
  862. }
  863. static av_cold int opus_encode_init(AVCodecContext *avctx)
  864. {
  865. int i, ch, ret;
  866. OpusEncContext *s = avctx->priv_data;
  867. s->avctx = avctx;
  868. s->channels = avctx->channels;
  869. /* Opus allows us to change the framesize on each packet (and each packet may
  870. * have multiple frames in it) but we can't change the codec's frame size on
  871. * runtime, so fix it to the lowest possible number of samples and use a queue
  872. * to accumulate AVFrames until we have enough to encode whatever the encoder
  873. * decides is the best */
  874. avctx->frame_size = 120;
  875. /* Initial padding will change if SILK is ever supported */
  876. avctx->initial_padding = 120;
  877. if (!avctx->bit_rate) {
  878. int coupled = ff_opus_default_coupled_streams[s->channels - 1];
  879. avctx->bit_rate = coupled*(96000) + (s->channels - coupled*2)*(48000);
  880. } else if (avctx->bit_rate < 6000 || avctx->bit_rate > 255000 * s->channels) {
  881. int64_t clipped_rate = av_clip(avctx->bit_rate, 6000, 255000 * s->channels);
  882. av_log(avctx, AV_LOG_ERROR, "Unsupported bitrate %"PRId64" kbps, clipping to %"PRId64" kbps\n",
  883. avctx->bit_rate/1000, clipped_rate/1000);
  884. avctx->bit_rate = clipped_rate;
  885. }
  886. /* Frame structs and range coder buffers */
  887. s->frame = av_malloc(OPUS_MAX_FRAMES_PER_PACKET*sizeof(CeltFrame));
  888. if (!s->frame)
  889. return AVERROR(ENOMEM);
  890. s->rc = av_malloc(OPUS_MAX_FRAMES_PER_PACKET*sizeof(OpusRangeCoder));
  891. if (!s->rc)
  892. return AVERROR(ENOMEM);
  893. /* Extradata */
  894. avctx->extradata_size = 19;
  895. avctx->extradata = av_malloc(avctx->extradata_size + AV_INPUT_BUFFER_PADDING_SIZE);
  896. if (!avctx->extradata)
  897. return AVERROR(ENOMEM);
  898. opus_write_extradata(avctx);
  899. ff_af_queue_init(avctx, &s->afq);
  900. if ((ret = ff_celt_pvq_init(&s->pvq)) < 0)
  901. return ret;
  902. if (!(s->dsp = avpriv_float_dsp_alloc(avctx->flags & AV_CODEC_FLAG_BITEXACT)))
  903. return AVERROR(ENOMEM);
  904. /* I have no idea why a base scaling factor of 68 works, could be the twiddles */
  905. for (i = 0; i < CELT_BLOCK_NB; i++)
  906. if ((ret = ff_mdct15_init(&s->mdct[i], 0, i + 3, 68 << (CELT_BLOCK_NB - 1 - i))))
  907. return AVERROR(ENOMEM);
  908. for (i = 0; i < OPUS_MAX_FRAMES_PER_PACKET; i++) {
  909. s->frame[i].block[0].emph_coeff = s->frame[i].block[1].emph_coeff = 0.0f;
  910. s->frame[i].seed = 0;
  911. }
  912. /* Zero out previous energy (matters for inter first frame) */
  913. for (ch = 0; ch < s->channels; ch++)
  914. for (i = 0; i < CELT_MAX_BANDS; i++)
  915. s->last_quantized_energy[ch][i] = 0.0f;
  916. /* Allocate an empty frame to use as overlap for the first frame of audio */
  917. ff_bufqueue_add(avctx, &s->bufqueue, spawn_empty_frame(s));
  918. if (!ff_bufqueue_peek(&s->bufqueue, 0))
  919. return AVERROR(ENOMEM);
  920. return 0;
  921. }
  922. #define OPUSENC_FLAGS AV_OPT_FLAG_ENCODING_PARAM | AV_OPT_FLAG_AUDIO_PARAM
  923. static const AVOption opusenc_options[] = {
  924. { "opus_delay", "Maximum delay (and lookahead) in milliseconds", offsetof(OpusEncContext, options.max_delay_ms), AV_OPT_TYPE_FLOAT, { .dbl = OPUS_MAX_LOOKAHEAD }, 2.5f, OPUS_MAX_LOOKAHEAD, OPUSENC_FLAGS },
  925. { NULL },
  926. };
  927. static const AVClass opusenc_class = {
  928. .class_name = "Opus encoder",
  929. .item_name = av_default_item_name,
  930. .option = opusenc_options,
  931. .version = LIBAVUTIL_VERSION_INT,
  932. };
  933. static const AVCodecDefault opusenc_defaults[] = {
  934. { "b", "0" },
  935. { "compression_level", "10" },
  936. { NULL },
  937. };
  938. AVCodec ff_opus_encoder = {
  939. .name = "opus",
  940. .long_name = NULL_IF_CONFIG_SMALL("Opus"),
  941. .type = AVMEDIA_TYPE_AUDIO,
  942. .id = AV_CODEC_ID_OPUS,
  943. .defaults = opusenc_defaults,
  944. .priv_class = &opusenc_class,
  945. .priv_data_size = sizeof(OpusEncContext),
  946. .init = opus_encode_init,
  947. .encode2 = opus_encode_frame,
  948. .close = opus_encode_end,
  949. .caps_internal = FF_CODEC_CAP_INIT_THREADSAFE | FF_CODEC_CAP_INIT_CLEANUP,
  950. .capabilities = AV_CODEC_CAP_EXPERIMENTAL | AV_CODEC_CAP_SMALL_LAST_FRAME | AV_CODEC_CAP_DELAY,
  951. .supported_samplerates = (const int []){ 48000, 0 },
  952. .channel_layouts = (const uint64_t []){ AV_CH_LAYOUT_MONO,
  953. AV_CH_LAYOUT_STEREO, 0 },
  954. .sample_fmts = (const enum AVSampleFormat[]){ AV_SAMPLE_FMT_FLTP,
  955. AV_SAMPLE_FMT_NONE },
  956. };