You can not select more than 25 topics Topics must start with a letter or number, can include dashes ('-') and can be up to 35 characters long.

741 lines
26KB

  1. /*
  2. * Opus encoder
  3. * Copyright (c) 2017 Rostislav Pehlivanov <atomnuker@gmail.com>
  4. *
  5. * This file is part of FFmpeg.
  6. *
  7. * FFmpeg is free software; you can redistribute it and/or
  8. * modify it under the terms of the GNU Lesser General Public
  9. * License as published by the Free Software Foundation; either
  10. * version 2.1 of the License, or (at your option) any later version.
  11. *
  12. * FFmpeg is distributed in the hope that it will be useful,
  13. * but WITHOUT ANY WARRANTY; without even the implied warranty of
  14. * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
  15. * Lesser General Public License for more details.
  16. *
  17. * You should have received a copy of the GNU Lesser General Public
  18. * License along with FFmpeg; if not, write to the Free Software
  19. * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
  20. */
  21. #include "opusenc.h"
  22. #include "opus_pvq.h"
  23. #include "opusenc_psy.h"
  24. #include "opustab.h"
  25. #include "libavutil/float_dsp.h"
  26. #include "libavutil/mem_internal.h"
  27. #include "libavutil/opt.h"
  28. #include "internal.h"
  29. #include "bytestream.h"
  30. #include "audio_frame_queue.h"
  31. typedef struct OpusEncContext {
  32. AVClass *av_class;
  33. OpusEncOptions options;
  34. OpusPsyContext psyctx;
  35. AVCodecContext *avctx;
  36. AudioFrameQueue afq;
  37. AVFloatDSPContext *dsp;
  38. MDCT15Context *mdct[CELT_BLOCK_NB];
  39. CeltPVQ *pvq;
  40. struct FFBufQueue bufqueue;
  41. uint8_t enc_id[64];
  42. int enc_id_bits;
  43. OpusPacketInfo packet;
  44. int channels;
  45. CeltFrame *frame;
  46. OpusRangeCoder *rc;
  47. /* Actual energy the decoder will have */
  48. float last_quantized_energy[OPUS_MAX_CHANNELS][CELT_MAX_BANDS];
  49. DECLARE_ALIGNED(32, float, scratch)[2048];
  50. } OpusEncContext;
  51. static void opus_write_extradata(AVCodecContext *avctx)
  52. {
  53. uint8_t *bs = avctx->extradata;
  54. bytestream_put_buffer(&bs, "OpusHead", 8);
  55. bytestream_put_byte (&bs, 0x1);
  56. bytestream_put_byte (&bs, avctx->channels);
  57. bytestream_put_le16 (&bs, avctx->initial_padding);
  58. bytestream_put_le32 (&bs, avctx->sample_rate);
  59. bytestream_put_le16 (&bs, 0x0);
  60. bytestream_put_byte (&bs, 0x0); /* Default layout */
  61. }
  62. static int opus_gen_toc(OpusEncContext *s, uint8_t *toc, int *size, int *fsize_needed)
  63. {
  64. int tmp = 0x0, extended_toc = 0;
  65. static const int toc_cfg[][OPUS_MODE_NB][OPUS_BANDWITH_NB] = {
  66. /* Silk Hybrid Celt Layer */
  67. /* NB MB WB SWB FB NB MB WB SWB FB NB MB WB SWB FB Bandwidth */
  68. { { 0, 0, 0, 0, 0 }, { 0, 0, 0, 0, 0 }, { 17, 0, 21, 25, 29 } }, /* 2.5 ms */
  69. { { 0, 0, 0, 0, 0 }, { 0, 0, 0, 0, 0 }, { 18, 0, 22, 26, 30 } }, /* 5 ms */
  70. { { 1, 5, 9, 0, 0 }, { 0, 0, 0, 13, 15 }, { 19, 0, 23, 27, 31 } }, /* 10 ms */
  71. { { 2, 6, 10, 0, 0 }, { 0, 0, 0, 14, 16 }, { 20, 0, 24, 28, 32 } }, /* 20 ms */
  72. { { 3, 7, 11, 0, 0 }, { 0, 0, 0, 0, 0 }, { 0, 0, 0, 0, 0 } }, /* 40 ms */
  73. { { 4, 8, 12, 0, 0 }, { 0, 0, 0, 0, 0 }, { 0, 0, 0, 0, 0 } }, /* 60 ms */
  74. };
  75. int cfg = toc_cfg[s->packet.framesize][s->packet.mode][s->packet.bandwidth];
  76. *fsize_needed = 0;
  77. if (!cfg)
  78. return 1;
  79. if (s->packet.frames == 2) { /* 2 packets */
  80. if (s->frame[0].framebits == s->frame[1].framebits) { /* same size */
  81. tmp = 0x1;
  82. } else { /* different size */
  83. tmp = 0x2;
  84. *fsize_needed = 1; /* put frame sizes in the packet */
  85. }
  86. } else if (s->packet.frames > 2) {
  87. tmp = 0x3;
  88. extended_toc = 1;
  89. }
  90. tmp |= (s->channels > 1) << 2; /* Stereo or mono */
  91. tmp |= (cfg - 1) << 3; /* codec configuration */
  92. *toc++ = tmp;
  93. if (extended_toc) {
  94. for (int i = 0; i < (s->packet.frames - 1); i++)
  95. *fsize_needed |= (s->frame[i].framebits != s->frame[i + 1].framebits);
  96. tmp = (*fsize_needed) << 7; /* vbr flag */
  97. tmp |= (0) << 6; /* padding flag */
  98. tmp |= s->packet.frames;
  99. *toc++ = tmp;
  100. }
  101. *size = 1 + extended_toc;
  102. return 0;
  103. }
  104. static void celt_frame_setup_input(OpusEncContext *s, CeltFrame *f)
  105. {
  106. AVFrame *cur = NULL;
  107. const int subframesize = s->avctx->frame_size;
  108. int subframes = OPUS_BLOCK_SIZE(s->packet.framesize) / subframesize;
  109. cur = ff_bufqueue_get(&s->bufqueue);
  110. for (int ch = 0; ch < f->channels; ch++) {
  111. CeltBlock *b = &f->block[ch];
  112. const void *input = cur->extended_data[ch];
  113. size_t bps = av_get_bytes_per_sample(cur->format);
  114. memcpy(b->overlap, input, bps*cur->nb_samples);
  115. }
  116. av_frame_free(&cur);
  117. for (int sf = 0; sf < subframes; sf++) {
  118. if (sf != (subframes - 1))
  119. cur = ff_bufqueue_get(&s->bufqueue);
  120. else
  121. cur = ff_bufqueue_peek(&s->bufqueue, 0);
  122. for (int ch = 0; ch < f->channels; ch++) {
  123. CeltBlock *b = &f->block[ch];
  124. const void *input = cur->extended_data[ch];
  125. const size_t bps = av_get_bytes_per_sample(cur->format);
  126. const size_t left = (subframesize - cur->nb_samples)*bps;
  127. const size_t len = FFMIN(subframesize, cur->nb_samples)*bps;
  128. memcpy(&b->samples[sf*subframesize], input, len);
  129. memset(&b->samples[cur->nb_samples], 0, left);
  130. }
  131. /* Last frame isn't popped off and freed yet - we need it for overlap */
  132. if (sf != (subframes - 1))
  133. av_frame_free(&cur);
  134. }
  135. }
  136. /* Apply the pre emphasis filter */
  137. static void celt_apply_preemph_filter(OpusEncContext *s, CeltFrame *f)
  138. {
  139. const int subframesize = s->avctx->frame_size;
  140. const int subframes = OPUS_BLOCK_SIZE(s->packet.framesize) / subframesize;
  141. /* Filter overlap */
  142. for (int ch = 0; ch < f->channels; ch++) {
  143. CeltBlock *b = &f->block[ch];
  144. float m = b->emph_coeff;
  145. for (int i = 0; i < CELT_OVERLAP; i++) {
  146. float sample = b->overlap[i];
  147. b->overlap[i] = sample - m;
  148. m = sample * CELT_EMPH_COEFF;
  149. }
  150. b->emph_coeff = m;
  151. }
  152. /* Filter the samples but do not update the last subframe's coeff - overlap ^^^ */
  153. for (int sf = 0; sf < subframes; sf++) {
  154. for (int ch = 0; ch < f->channels; ch++) {
  155. CeltBlock *b = &f->block[ch];
  156. float m = b->emph_coeff;
  157. for (int i = 0; i < subframesize; i++) {
  158. float sample = b->samples[sf*subframesize + i];
  159. b->samples[sf*subframesize + i] = sample - m;
  160. m = sample * CELT_EMPH_COEFF;
  161. }
  162. if (sf != (subframes - 1))
  163. b->emph_coeff = m;
  164. }
  165. }
  166. }
  167. /* Create the window and do the mdct */
  168. static void celt_frame_mdct(OpusEncContext *s, CeltFrame *f)
  169. {
  170. float *win = s->scratch, *temp = s->scratch + 1920;
  171. if (f->transient) {
  172. for (int ch = 0; ch < f->channels; ch++) {
  173. CeltBlock *b = &f->block[ch];
  174. float *src1 = b->overlap;
  175. for (int t = 0; t < f->blocks; t++) {
  176. float *src2 = &b->samples[CELT_OVERLAP*t];
  177. s->dsp->vector_fmul(win, src1, ff_celt_window, 128);
  178. s->dsp->vector_fmul_reverse(&win[CELT_OVERLAP], src2,
  179. ff_celt_window - 8, 128);
  180. src1 = src2;
  181. s->mdct[0]->mdct(s->mdct[0], b->coeffs + t, win, f->blocks);
  182. }
  183. }
  184. } else {
  185. int blk_len = OPUS_BLOCK_SIZE(f->size), wlen = OPUS_BLOCK_SIZE(f->size + 1);
  186. int rwin = blk_len - CELT_OVERLAP, lap_dst = (wlen - blk_len - CELT_OVERLAP) >> 1;
  187. memset(win, 0, wlen*sizeof(float));
  188. for (int ch = 0; ch < f->channels; ch++) {
  189. CeltBlock *b = &f->block[ch];
  190. /* Overlap */
  191. s->dsp->vector_fmul(temp, b->overlap, ff_celt_window, 128);
  192. memcpy(win + lap_dst, temp, CELT_OVERLAP*sizeof(float));
  193. /* Samples, flat top window */
  194. memcpy(&win[lap_dst + CELT_OVERLAP], b->samples, rwin*sizeof(float));
  195. /* Samples, windowed */
  196. s->dsp->vector_fmul_reverse(temp, b->samples + rwin,
  197. ff_celt_window - 8, 128);
  198. memcpy(win + lap_dst + blk_len, temp, CELT_OVERLAP*sizeof(float));
  199. s->mdct[f->size]->mdct(s->mdct[f->size], b->coeffs, win, 1);
  200. }
  201. }
  202. for (int ch = 0; ch < f->channels; ch++) {
  203. CeltBlock *block = &f->block[ch];
  204. for (int i = 0; i < CELT_MAX_BANDS; i++) {
  205. float ener = 0.0f;
  206. int band_offset = ff_celt_freq_bands[i] << f->size;
  207. int band_size = ff_celt_freq_range[i] << f->size;
  208. float *coeffs = &block->coeffs[band_offset];
  209. for (int j = 0; j < band_size; j++)
  210. ener += coeffs[j]*coeffs[j];
  211. block->lin_energy[i] = sqrtf(ener) + FLT_EPSILON;
  212. ener = 1.0f/block->lin_energy[i];
  213. for (int j = 0; j < band_size; j++)
  214. coeffs[j] *= ener;
  215. block->energy[i] = log2f(block->lin_energy[i]) - ff_celt_mean_energy[i];
  216. /* CELT_ENERGY_SILENCE is what the decoder uses and its not -infinity */
  217. block->energy[i] = FFMAX(block->energy[i], CELT_ENERGY_SILENCE);
  218. }
  219. }
  220. }
  221. static void celt_enc_tf(CeltFrame *f, OpusRangeCoder *rc)
  222. {
  223. int tf_select = 0, diff = 0, tf_changed = 0, tf_select_needed;
  224. int bits = f->transient ? 2 : 4;
  225. tf_select_needed = ((f->size && (opus_rc_tell(rc) + bits + 1) <= f->framebits));
  226. for (int i = f->start_band; i < f->end_band; i++) {
  227. if ((opus_rc_tell(rc) + bits + tf_select_needed) <= f->framebits) {
  228. const int tbit = (diff ^ 1) == f->tf_change[i];
  229. ff_opus_rc_enc_log(rc, tbit, bits);
  230. diff ^= tbit;
  231. tf_changed |= diff;
  232. }
  233. bits = f->transient ? 4 : 5;
  234. }
  235. if (tf_select_needed && ff_celt_tf_select[f->size][f->transient][0][tf_changed] !=
  236. ff_celt_tf_select[f->size][f->transient][1][tf_changed]) {
  237. ff_opus_rc_enc_log(rc, f->tf_select, 1);
  238. tf_select = f->tf_select;
  239. }
  240. for (int i = f->start_band; i < f->end_band; i++)
  241. f->tf_change[i] = ff_celt_tf_select[f->size][f->transient][tf_select][f->tf_change[i]];
  242. }
  243. static void celt_enc_quant_pfilter(OpusRangeCoder *rc, CeltFrame *f)
  244. {
  245. float gain = f->pf_gain;
  246. int txval, octave = f->pf_octave, period = f->pf_period, tapset = f->pf_tapset;
  247. ff_opus_rc_enc_log(rc, f->pfilter, 1);
  248. if (!f->pfilter)
  249. return;
  250. /* Octave */
  251. txval = FFMIN(octave, 6);
  252. ff_opus_rc_enc_uint(rc, txval, 6);
  253. octave = txval;
  254. /* Period */
  255. txval = av_clip(period - (16 << octave) + 1, 0, (1 << (4 + octave)) - 1);
  256. ff_opus_rc_put_raw(rc, period, 4 + octave);
  257. period = txval + (16 << octave) - 1;
  258. /* Gain */
  259. txval = FFMIN(((int)(gain / 0.09375f)) - 1, 7);
  260. ff_opus_rc_put_raw(rc, txval, 3);
  261. gain = 0.09375f * (txval + 1);
  262. /* Tapset */
  263. if ((opus_rc_tell(rc) + 2) <= f->framebits)
  264. ff_opus_rc_enc_cdf(rc, tapset, ff_celt_model_tapset);
  265. else
  266. tapset = 0;
  267. /* Finally create the coeffs */
  268. for (int i = 0; i < 2; i++) {
  269. CeltBlock *block = &f->block[i];
  270. block->pf_period_new = FFMAX(period, CELT_POSTFILTER_MINPERIOD);
  271. block->pf_gains_new[0] = gain * ff_celt_postfilter_taps[tapset][0];
  272. block->pf_gains_new[1] = gain * ff_celt_postfilter_taps[tapset][1];
  273. block->pf_gains_new[2] = gain * ff_celt_postfilter_taps[tapset][2];
  274. }
  275. }
  276. static void exp_quant_coarse(OpusRangeCoder *rc, CeltFrame *f,
  277. float last_energy[][CELT_MAX_BANDS], int intra)
  278. {
  279. float alpha, beta, prev[2] = { 0, 0 };
  280. const uint8_t *pmod = ff_celt_coarse_energy_dist[f->size][intra];
  281. /* Inter is really just differential coding */
  282. if (opus_rc_tell(rc) + 3 <= f->framebits)
  283. ff_opus_rc_enc_log(rc, intra, 3);
  284. else
  285. intra = 0;
  286. if (intra) {
  287. alpha = 0.0f;
  288. beta = 1.0f - (4915.0f/32768.0f);
  289. } else {
  290. alpha = ff_celt_alpha_coef[f->size];
  291. beta = ff_celt_beta_coef[f->size];
  292. }
  293. for (int i = f->start_band; i < f->end_band; i++) {
  294. for (int ch = 0; ch < f->channels; ch++) {
  295. CeltBlock *block = &f->block[ch];
  296. const int left = f->framebits - opus_rc_tell(rc);
  297. const float last = FFMAX(-9.0f, last_energy[ch][i]);
  298. float diff = block->energy[i] - prev[ch] - last*alpha;
  299. int q_en = lrintf(diff);
  300. if (left >= 15) {
  301. ff_opus_rc_enc_laplace(rc, &q_en, pmod[i << 1] << 7, pmod[(i << 1) + 1] << 6);
  302. } else if (left >= 2) {
  303. q_en = av_clip(q_en, -1, 1);
  304. ff_opus_rc_enc_cdf(rc, 2*q_en + 3*(q_en < 0), ff_celt_model_energy_small);
  305. } else if (left >= 1) {
  306. q_en = av_clip(q_en, -1, 0);
  307. ff_opus_rc_enc_log(rc, (q_en & 1), 1);
  308. } else q_en = -1;
  309. block->error_energy[i] = q_en - diff;
  310. prev[ch] += beta * q_en;
  311. }
  312. }
  313. }
  314. static void celt_quant_coarse(CeltFrame *f, OpusRangeCoder *rc,
  315. float last_energy[][CELT_MAX_BANDS])
  316. {
  317. uint32_t inter, intra;
  318. OPUS_RC_CHECKPOINT_SPAWN(rc);
  319. exp_quant_coarse(rc, f, last_energy, 1);
  320. intra = OPUS_RC_CHECKPOINT_BITS(rc);
  321. OPUS_RC_CHECKPOINT_ROLLBACK(rc);
  322. exp_quant_coarse(rc, f, last_energy, 0);
  323. inter = OPUS_RC_CHECKPOINT_BITS(rc);
  324. if (inter > intra) { /* Unlikely */
  325. OPUS_RC_CHECKPOINT_ROLLBACK(rc);
  326. exp_quant_coarse(rc, f, last_energy, 1);
  327. }
  328. }
  329. static void celt_quant_fine(CeltFrame *f, OpusRangeCoder *rc)
  330. {
  331. for (int i = f->start_band; i < f->end_band; i++) {
  332. if (!f->fine_bits[i])
  333. continue;
  334. for (int ch = 0; ch < f->channels; ch++) {
  335. CeltBlock *block = &f->block[ch];
  336. int quant, lim = (1 << f->fine_bits[i]);
  337. float offset, diff = 0.5f - block->error_energy[i];
  338. quant = av_clip(floor(diff*lim), 0, lim - 1);
  339. ff_opus_rc_put_raw(rc, quant, f->fine_bits[i]);
  340. offset = 0.5f - ((quant + 0.5f) * (1 << (14 - f->fine_bits[i])) / 16384.0f);
  341. block->error_energy[i] -= offset;
  342. }
  343. }
  344. }
  345. static void celt_quant_final(OpusEncContext *s, OpusRangeCoder *rc, CeltFrame *f)
  346. {
  347. for (int priority = 0; priority < 2; priority++) {
  348. for (int i = f->start_band; i < f->end_band && (f->framebits - opus_rc_tell(rc)) >= f->channels; i++) {
  349. if (f->fine_priority[i] != priority || f->fine_bits[i] >= CELT_MAX_FINE_BITS)
  350. continue;
  351. for (int ch = 0; ch < f->channels; ch++) {
  352. CeltBlock *block = &f->block[ch];
  353. const float err = block->error_energy[i];
  354. const float offset = 0.5f * (1 << (14 - f->fine_bits[i] - 1)) / 16384.0f;
  355. const int sign = FFABS(err + offset) < FFABS(err - offset);
  356. ff_opus_rc_put_raw(rc, sign, 1);
  357. block->error_energy[i] -= offset*(1 - 2*sign);
  358. }
  359. }
  360. }
  361. }
  362. static void celt_encode_frame(OpusEncContext *s, OpusRangeCoder *rc,
  363. CeltFrame *f, int index)
  364. {
  365. ff_opus_rc_enc_init(rc);
  366. ff_opus_psy_celt_frame_init(&s->psyctx, f, index);
  367. celt_frame_setup_input(s, f);
  368. if (f->silence) {
  369. if (f->framebits >= 16)
  370. ff_opus_rc_enc_log(rc, 1, 15); /* Silence (if using explicit singalling) */
  371. for (int ch = 0; ch < s->channels; ch++)
  372. memset(s->last_quantized_energy[ch], 0.0f, sizeof(float)*CELT_MAX_BANDS);
  373. return;
  374. }
  375. /* Filters */
  376. celt_apply_preemph_filter(s, f);
  377. if (f->pfilter) {
  378. ff_opus_rc_enc_log(rc, 0, 15);
  379. celt_enc_quant_pfilter(rc, f);
  380. }
  381. /* Transform */
  382. celt_frame_mdct(s, f);
  383. /* Need to handle transient/non-transient switches at any point during analysis */
  384. while (ff_opus_psy_celt_frame_process(&s->psyctx, f, index))
  385. celt_frame_mdct(s, f);
  386. ff_opus_rc_enc_init(rc);
  387. /* Silence */
  388. ff_opus_rc_enc_log(rc, 0, 15);
  389. /* Pitch filter */
  390. if (!f->start_band && opus_rc_tell(rc) + 16 <= f->framebits)
  391. celt_enc_quant_pfilter(rc, f);
  392. /* Transient flag */
  393. if (f->size && opus_rc_tell(rc) + 3 <= f->framebits)
  394. ff_opus_rc_enc_log(rc, f->transient, 3);
  395. /* Main encoding */
  396. celt_quant_coarse (f, rc, s->last_quantized_energy);
  397. celt_enc_tf (f, rc);
  398. ff_celt_bitalloc (f, rc, 1);
  399. celt_quant_fine (f, rc);
  400. ff_celt_quant_bands(f, rc);
  401. /* Anticollapse bit */
  402. if (f->anticollapse_needed)
  403. ff_opus_rc_put_raw(rc, f->anticollapse, 1);
  404. /* Final per-band energy adjustments from leftover bits */
  405. celt_quant_final(s, rc, f);
  406. for (int ch = 0; ch < f->channels; ch++) {
  407. CeltBlock *block = &f->block[ch];
  408. for (int i = 0; i < CELT_MAX_BANDS; i++)
  409. s->last_quantized_energy[ch][i] = block->energy[i] + block->error_energy[i];
  410. }
  411. }
  412. static inline int write_opuslacing(uint8_t *dst, int v)
  413. {
  414. dst[0] = FFMIN(v - FFALIGN(v - 255, 4), v);
  415. dst[1] = v - dst[0] >> 2;
  416. return 1 + (v >= 252);
  417. }
  418. static void opus_packet_assembler(OpusEncContext *s, AVPacket *avpkt)
  419. {
  420. int offset, fsize_needed;
  421. /* Write toc */
  422. opus_gen_toc(s, avpkt->data, &offset, &fsize_needed);
  423. /* Frame sizes if needed */
  424. if (fsize_needed) {
  425. for (int i = 0; i < s->packet.frames - 1; i++) {
  426. offset += write_opuslacing(avpkt->data + offset,
  427. s->frame[i].framebits >> 3);
  428. }
  429. }
  430. /* Packets */
  431. for (int i = 0; i < s->packet.frames; i++) {
  432. ff_opus_rc_enc_end(&s->rc[i], avpkt->data + offset,
  433. s->frame[i].framebits >> 3);
  434. offset += s->frame[i].framebits >> 3;
  435. }
  436. avpkt->size = offset;
  437. }
  438. /* Used as overlap for the first frame and padding for the last encoded packet */
  439. static AVFrame *spawn_empty_frame(OpusEncContext *s)
  440. {
  441. AVFrame *f = av_frame_alloc();
  442. if (!f)
  443. return NULL;
  444. f->format = s->avctx->sample_fmt;
  445. f->nb_samples = s->avctx->frame_size;
  446. f->channel_layout = s->avctx->channel_layout;
  447. if (av_frame_get_buffer(f, 4)) {
  448. av_frame_free(&f);
  449. return NULL;
  450. }
  451. for (int i = 0; i < s->channels; i++) {
  452. size_t bps = av_get_bytes_per_sample(f->format);
  453. memset(f->extended_data[i], 0, bps*f->nb_samples);
  454. }
  455. return f;
  456. }
  457. static int opus_encode_frame(AVCodecContext *avctx, AVPacket *avpkt,
  458. const AVFrame *frame, int *got_packet_ptr)
  459. {
  460. OpusEncContext *s = avctx->priv_data;
  461. int ret, frame_size, alloc_size = 0;
  462. if (frame) { /* Add new frame to queue */
  463. if ((ret = ff_af_queue_add(&s->afq, frame)) < 0)
  464. return ret;
  465. ff_bufqueue_add(avctx, &s->bufqueue, av_frame_clone(frame));
  466. } else {
  467. ff_opus_psy_signal_eof(&s->psyctx);
  468. if (!s->afq.remaining_samples || !avctx->frame_number)
  469. return 0; /* We've been flushed and there's nothing left to encode */
  470. }
  471. /* Run the psychoacoustic system */
  472. if (ff_opus_psy_process(&s->psyctx, &s->packet))
  473. return 0;
  474. frame_size = OPUS_BLOCK_SIZE(s->packet.framesize);
  475. if (!frame) {
  476. /* This can go negative, that's not a problem, we only pad if positive */
  477. int pad_empty = s->packet.frames*(frame_size/s->avctx->frame_size) - s->bufqueue.available + 1;
  478. /* Pad with empty 2.5 ms frames to whatever framesize was decided,
  479. * this should only happen at the very last flush frame. The frames
  480. * allocated here will be freed (because they have no other references)
  481. * after they get used by celt_frame_setup_input() */
  482. for (int i = 0; i < pad_empty; i++) {
  483. AVFrame *empty = spawn_empty_frame(s);
  484. if (!empty)
  485. return AVERROR(ENOMEM);
  486. ff_bufqueue_add(avctx, &s->bufqueue, empty);
  487. }
  488. }
  489. for (int i = 0; i < s->packet.frames; i++) {
  490. celt_encode_frame(s, &s->rc[i], &s->frame[i], i);
  491. alloc_size += s->frame[i].framebits >> 3;
  492. }
  493. /* Worst case toc + the frame lengths if needed */
  494. alloc_size += 2 + s->packet.frames*2;
  495. if ((ret = ff_alloc_packet2(avctx, avpkt, alloc_size, 0)) < 0)
  496. return ret;
  497. /* Assemble packet */
  498. opus_packet_assembler(s, avpkt);
  499. /* Update the psychoacoustic system */
  500. ff_opus_psy_postencode_update(&s->psyctx, s->frame, s->rc);
  501. /* Remove samples from queue and skip if needed */
  502. ff_af_queue_remove(&s->afq, s->packet.frames*frame_size, &avpkt->pts, &avpkt->duration);
  503. if (s->packet.frames*frame_size > avpkt->duration) {
  504. uint8_t *side = av_packet_new_side_data(avpkt, AV_PKT_DATA_SKIP_SAMPLES, 10);
  505. if (!side)
  506. return AVERROR(ENOMEM);
  507. AV_WL32(&side[4], s->packet.frames*frame_size - avpkt->duration + 120);
  508. }
  509. *got_packet_ptr = 1;
  510. return 0;
  511. }
  512. static av_cold int opus_encode_end(AVCodecContext *avctx)
  513. {
  514. OpusEncContext *s = avctx->priv_data;
  515. for (int i = 0; i < CELT_BLOCK_NB; i++)
  516. ff_mdct15_uninit(&s->mdct[i]);
  517. ff_celt_pvq_uninit(&s->pvq);
  518. av_freep(&s->dsp);
  519. av_freep(&s->frame);
  520. av_freep(&s->rc);
  521. ff_af_queue_close(&s->afq);
  522. ff_opus_psy_end(&s->psyctx);
  523. ff_bufqueue_discard_all(&s->bufqueue);
  524. av_freep(&avctx->extradata);
  525. return 0;
  526. }
  527. static av_cold int opus_encode_init(AVCodecContext *avctx)
  528. {
  529. int ret, max_frames;
  530. OpusEncContext *s = avctx->priv_data;
  531. s->avctx = avctx;
  532. s->channels = avctx->channels;
  533. /* Opus allows us to change the framesize on each packet (and each packet may
  534. * have multiple frames in it) but we can't change the codec's frame size on
  535. * runtime, so fix it to the lowest possible number of samples and use a queue
  536. * to accumulate AVFrames until we have enough to encode whatever the encoder
  537. * decides is the best */
  538. avctx->frame_size = 120;
  539. /* Initial padding will change if SILK is ever supported */
  540. avctx->initial_padding = 120;
  541. if (!avctx->bit_rate) {
  542. int coupled = ff_opus_default_coupled_streams[s->channels - 1];
  543. avctx->bit_rate = coupled*(96000) + (s->channels - coupled*2)*(48000);
  544. } else if (avctx->bit_rate < 6000 || avctx->bit_rate > 255000 * s->channels) {
  545. int64_t clipped_rate = av_clip(avctx->bit_rate, 6000, 255000 * s->channels);
  546. av_log(avctx, AV_LOG_ERROR, "Unsupported bitrate %"PRId64" kbps, clipping to %"PRId64" kbps\n",
  547. avctx->bit_rate/1000, clipped_rate/1000);
  548. avctx->bit_rate = clipped_rate;
  549. }
  550. /* Extradata */
  551. avctx->extradata_size = 19;
  552. avctx->extradata = av_malloc(avctx->extradata_size + AV_INPUT_BUFFER_PADDING_SIZE);
  553. if (!avctx->extradata)
  554. return AVERROR(ENOMEM);
  555. opus_write_extradata(avctx);
  556. ff_af_queue_init(avctx, &s->afq);
  557. if ((ret = ff_celt_pvq_init(&s->pvq, 1)) < 0)
  558. return ret;
  559. if (!(s->dsp = avpriv_float_dsp_alloc(avctx->flags & AV_CODEC_FLAG_BITEXACT)))
  560. return AVERROR(ENOMEM);
  561. /* I have no idea why a base scaling factor of 68 works, could be the twiddles */
  562. for (int i = 0; i < CELT_BLOCK_NB; i++)
  563. if ((ret = ff_mdct15_init(&s->mdct[i], 0, i + 3, 68 << (CELT_BLOCK_NB - 1 - i))))
  564. return AVERROR(ENOMEM);
  565. /* Zero out previous energy (matters for inter first frame) */
  566. for (int ch = 0; ch < s->channels; ch++)
  567. memset(s->last_quantized_energy[ch], 0.0f, sizeof(float)*CELT_MAX_BANDS);
  568. /* Allocate an empty frame to use as overlap for the first frame of audio */
  569. ff_bufqueue_add(avctx, &s->bufqueue, spawn_empty_frame(s));
  570. if (!ff_bufqueue_peek(&s->bufqueue, 0))
  571. return AVERROR(ENOMEM);
  572. if ((ret = ff_opus_psy_init(&s->psyctx, s->avctx, &s->bufqueue, &s->options)))
  573. return ret;
  574. /* Frame structs and range coder buffers */
  575. max_frames = ceilf(FFMIN(s->options.max_delay_ms, 120.0f)/2.5f);
  576. s->frame = av_malloc(max_frames*sizeof(CeltFrame));
  577. if (!s->frame)
  578. return AVERROR(ENOMEM);
  579. s->rc = av_malloc(max_frames*sizeof(OpusRangeCoder));
  580. if (!s->rc)
  581. return AVERROR(ENOMEM);
  582. for (int i = 0; i < max_frames; i++) {
  583. s->frame[i].dsp = s->dsp;
  584. s->frame[i].avctx = s->avctx;
  585. s->frame[i].seed = 0;
  586. s->frame[i].pvq = s->pvq;
  587. s->frame[i].apply_phase_inv = s->options.apply_phase_inv;
  588. s->frame[i].block[0].emph_coeff = s->frame[i].block[1].emph_coeff = 0.0f;
  589. }
  590. return 0;
  591. }
  592. #define OPUSENC_FLAGS AV_OPT_FLAG_ENCODING_PARAM | AV_OPT_FLAG_AUDIO_PARAM
  593. static const AVOption opusenc_options[] = {
  594. { "opus_delay", "Maximum delay in milliseconds", offsetof(OpusEncContext, options.max_delay_ms), AV_OPT_TYPE_FLOAT, { .dbl = OPUS_MAX_LOOKAHEAD }, 2.5f, OPUS_MAX_LOOKAHEAD, OPUSENC_FLAGS, "max_delay_ms" },
  595. { "apply_phase_inv", "Apply intensity stereo phase inversion", offsetof(OpusEncContext, options.apply_phase_inv), AV_OPT_TYPE_BOOL, { .i64 = 1 }, 0, 1, OPUSENC_FLAGS, "apply_phase_inv" },
  596. { NULL },
  597. };
  598. static const AVClass opusenc_class = {
  599. .class_name = "Opus encoder",
  600. .item_name = av_default_item_name,
  601. .option = opusenc_options,
  602. .version = LIBAVUTIL_VERSION_INT,
  603. };
  604. static const AVCodecDefault opusenc_defaults[] = {
  605. { "b", "0" },
  606. { "compression_level", "10" },
  607. { NULL },
  608. };
  609. AVCodec ff_opus_encoder = {
  610. .name = "opus",
  611. .long_name = NULL_IF_CONFIG_SMALL("Opus"),
  612. .type = AVMEDIA_TYPE_AUDIO,
  613. .id = AV_CODEC_ID_OPUS,
  614. .defaults = opusenc_defaults,
  615. .priv_class = &opusenc_class,
  616. .priv_data_size = sizeof(OpusEncContext),
  617. .init = opus_encode_init,
  618. .encode2 = opus_encode_frame,
  619. .close = opus_encode_end,
  620. .caps_internal = FF_CODEC_CAP_INIT_THREADSAFE | FF_CODEC_CAP_INIT_CLEANUP,
  621. .capabilities = AV_CODEC_CAP_EXPERIMENTAL | AV_CODEC_CAP_SMALL_LAST_FRAME | AV_CODEC_CAP_DELAY,
  622. .supported_samplerates = (const int []){ 48000, 0 },
  623. .channel_layouts = (const uint64_t []){ AV_CH_LAYOUT_MONO,
  624. AV_CH_LAYOUT_STEREO, 0 },
  625. .sample_fmts = (const enum AVSampleFormat[]){ AV_SAMPLE_FMT_FLTP,
  626. AV_SAMPLE_FMT_NONE },
  627. };