You can not select more than 25 topics Topics must start with a letter or number, can include dashes ('-') and can be up to 35 characters long.

450 lines
16KB

  1. /*
  2. * Opus encoder using libopus
  3. * Copyright (c) 2012 Nathan Caldwell
  4. *
  5. * This file is part of FFmpeg.
  6. *
  7. * FFmpeg is free software; you can redistribute it and/or
  8. * modify it under the terms of the GNU Lesser General Public
  9. * License as published by the Free Software Foundation; either
  10. * version 2.1 of the License, or (at your option) any later version.
  11. *
  12. * FFmpeg is distributed in the hope that it will be useful,
  13. * but WITHOUT ANY WARRANTY; without even the implied warranty of
  14. * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
  15. * Lesser General Public License for more details.
  16. *
  17. * You should have received a copy of the GNU Lesser General Public
  18. * License along with FFmpeg; if not, write to the Free Software
  19. * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
  20. */
  21. #include <opus.h>
  22. #include <opus_multistream.h>
  23. #include "libavutil/opt.h"
  24. #include "avcodec.h"
  25. #include "bytestream.h"
  26. #include "internal.h"
  27. #include "libopus.h"
  28. #include "vorbis.h"
  29. #include "audio_frame_queue.h"
  30. typedef struct LibopusEncOpts {
  31. int vbr;
  32. int application;
  33. int packet_loss;
  34. int complexity;
  35. float frame_duration;
  36. int packet_size;
  37. int max_bandwidth;
  38. } LibopusEncOpts;
  39. typedef struct LibopusEncContext {
  40. AVClass *class;
  41. OpusMSEncoder *enc;
  42. int stream_count;
  43. uint8_t *samples;
  44. LibopusEncOpts opts;
  45. AudioFrameQueue afq;
  46. } LibopusEncContext;
  47. static const uint8_t opus_coupled_streams[8] = {
  48. 0, 1, 1, 2, 2, 2, 2, 3
  49. };
  50. /* Opus internal to Vorbis channel order mapping written in the header */
  51. static const uint8_t opus_vorbis_channel_map[8][8] = {
  52. { 0 },
  53. { 0, 1 },
  54. { 0, 2, 1 },
  55. { 0, 1, 2, 3 },
  56. { 0, 4, 1, 2, 3 },
  57. { 0, 4, 1, 2, 3, 5 },
  58. { 0, 4, 1, 2, 3, 5, 6 },
  59. { 0, 6, 1, 2, 3, 4, 5, 7 },
  60. };
  61. /* libavcodec to libopus channel order mapping, passed to libopus */
  62. static const uint8_t libavcodec_libopus_channel_map[8][8] = {
  63. { 0 },
  64. { 0, 1 },
  65. { 0, 1, 2 },
  66. { 0, 1, 2, 3 },
  67. { 0, 1, 3, 4, 2 },
  68. { 0, 1, 4, 5, 2, 3 },
  69. { 0, 1, 5, 6, 2, 4, 3 },
  70. { 0, 1, 6, 7, 4, 5, 2, 3 },
  71. };
  72. static void libopus_write_header(AVCodecContext *avctx, int stream_count,
  73. int coupled_stream_count,
  74. const uint8_t *channel_mapping)
  75. {
  76. uint8_t *p = avctx->extradata;
  77. int channels = avctx->channels;
  78. bytestream_put_buffer(&p, "OpusHead", 8);
  79. bytestream_put_byte(&p, 1); /* Version */
  80. bytestream_put_byte(&p, channels);
  81. bytestream_put_le16(&p, avctx->initial_padding); /* Lookahead samples at 48kHz */
  82. bytestream_put_le32(&p, avctx->sample_rate); /* Original sample rate */
  83. bytestream_put_le16(&p, 0); /* Gain of 0dB is recommended. */
  84. /* Channel mapping */
  85. if (channels > 2) {
  86. bytestream_put_byte(&p, channels <= 8 ? 1 : 255);
  87. bytestream_put_byte(&p, stream_count);
  88. bytestream_put_byte(&p, coupled_stream_count);
  89. bytestream_put_buffer(&p, channel_mapping, channels);
  90. } else {
  91. bytestream_put_byte(&p, 0);
  92. }
  93. }
  94. static int libopus_configure_encoder(AVCodecContext *avctx, OpusMSEncoder *enc,
  95. LibopusEncOpts *opts)
  96. {
  97. int ret;
  98. if (avctx->global_quality) {
  99. av_log(avctx, AV_LOG_ERROR,
  100. "Quality-based encoding not supported, "
  101. "please specify a bitrate and VBR setting.\n");
  102. return AVERROR(EINVAL);
  103. }
  104. ret = opus_multistream_encoder_ctl(enc, OPUS_SET_BITRATE(avctx->bit_rate));
  105. if (ret != OPUS_OK) {
  106. av_log(avctx, AV_LOG_ERROR,
  107. "Failed to set bitrate: %s\n", opus_strerror(ret));
  108. return ret;
  109. }
  110. ret = opus_multistream_encoder_ctl(enc,
  111. OPUS_SET_COMPLEXITY(opts->complexity));
  112. if (ret != OPUS_OK)
  113. av_log(avctx, AV_LOG_WARNING,
  114. "Unable to set complexity: %s\n", opus_strerror(ret));
  115. ret = opus_multistream_encoder_ctl(enc, OPUS_SET_VBR(!!opts->vbr));
  116. if (ret != OPUS_OK)
  117. av_log(avctx, AV_LOG_WARNING,
  118. "Unable to set VBR: %s\n", opus_strerror(ret));
  119. ret = opus_multistream_encoder_ctl(enc,
  120. OPUS_SET_VBR_CONSTRAINT(opts->vbr == 2));
  121. if (ret != OPUS_OK)
  122. av_log(avctx, AV_LOG_WARNING,
  123. "Unable to set constrained VBR: %s\n", opus_strerror(ret));
  124. ret = opus_multistream_encoder_ctl(enc,
  125. OPUS_SET_PACKET_LOSS_PERC(opts->packet_loss));
  126. if (ret != OPUS_OK)
  127. av_log(avctx, AV_LOG_WARNING,
  128. "Unable to set expected packet loss percentage: %s\n",
  129. opus_strerror(ret));
  130. if (avctx->cutoff) {
  131. ret = opus_multistream_encoder_ctl(enc,
  132. OPUS_SET_MAX_BANDWIDTH(opts->max_bandwidth));
  133. if (ret != OPUS_OK)
  134. av_log(avctx, AV_LOG_WARNING,
  135. "Unable to set maximum bandwidth: %s\n", opus_strerror(ret));
  136. }
  137. return OPUS_OK;
  138. }
  139. static av_cold int libopus_encode_init(AVCodecContext *avctx)
  140. {
  141. LibopusEncContext *opus = avctx->priv_data;
  142. const uint8_t *channel_mapping;
  143. OpusMSEncoder *enc;
  144. int ret = OPUS_OK;
  145. int coupled_stream_count, header_size, frame_size;
  146. coupled_stream_count = opus_coupled_streams[avctx->channels - 1];
  147. opus->stream_count = avctx->channels - coupled_stream_count;
  148. channel_mapping = libavcodec_libopus_channel_map[avctx->channels - 1];
  149. /* FIXME: Opus can handle up to 255 channels. However, the mapping for
  150. * anything greater than 8 is undefined. */
  151. if (avctx->channels > 8) {
  152. av_log(avctx, AV_LOG_ERROR,
  153. "Channel layout undefined for %d channels.\n", avctx->channels);
  154. return AVERROR_PATCHWELCOME;
  155. }
  156. if (!avctx->bit_rate) {
  157. /* Sane default copied from opusenc */
  158. avctx->bit_rate = 64000 * opus->stream_count +
  159. 32000 * coupled_stream_count;
  160. av_log(avctx, AV_LOG_WARNING,
  161. "No bit rate set. Defaulting to %d bps.\n", avctx->bit_rate);
  162. }
  163. if (avctx->bit_rate < 500 || avctx->bit_rate > 256000 * avctx->channels) {
  164. av_log(avctx, AV_LOG_ERROR, "The bit rate %d bps is unsupported. "
  165. "Please choose a value between 500 and %d.\n", avctx->bit_rate,
  166. 256000 * avctx->channels);
  167. return AVERROR(EINVAL);
  168. }
  169. frame_size = opus->opts.frame_duration * 48000 / 1000;
  170. switch (frame_size) {
  171. case 120:
  172. case 240:
  173. if (opus->opts.application != OPUS_APPLICATION_RESTRICTED_LOWDELAY)
  174. av_log(avctx, AV_LOG_WARNING,
  175. "LPC mode cannot be used with a frame duration of less "
  176. "than 10ms. Enabling restricted low-delay mode.\n"
  177. "Use a longer frame duration if this is not what you want.\n");
  178. /* Frame sizes less than 10 ms can only use MDCT mode, so switching to
  179. * RESTRICTED_LOWDELAY avoids an unnecessary extra 2.5ms lookahead. */
  180. opus->opts.application = OPUS_APPLICATION_RESTRICTED_LOWDELAY;
  181. case 480:
  182. case 960:
  183. case 1920:
  184. case 2880:
  185. opus->opts.packet_size =
  186. avctx->frame_size = frame_size * avctx->sample_rate / 48000;
  187. break;
  188. default:
  189. av_log(avctx, AV_LOG_ERROR, "Invalid frame duration: %g.\n"
  190. "Frame duration must be exactly one of: 2.5, 5, 10, 20, 40 or 60.\n",
  191. opus->opts.frame_duration);
  192. return AVERROR(EINVAL);
  193. }
  194. if (avctx->compression_level < 0 || avctx->compression_level > 10) {
  195. av_log(avctx, AV_LOG_WARNING,
  196. "Compression level must be in the range 0 to 10. "
  197. "Defaulting to 10.\n");
  198. opus->opts.complexity = 10;
  199. } else {
  200. opus->opts.complexity = avctx->compression_level;
  201. }
  202. if (avctx->cutoff) {
  203. switch (avctx->cutoff) {
  204. case 4000:
  205. opus->opts.max_bandwidth = OPUS_BANDWIDTH_NARROWBAND;
  206. break;
  207. case 6000:
  208. opus->opts.max_bandwidth = OPUS_BANDWIDTH_MEDIUMBAND;
  209. break;
  210. case 8000:
  211. opus->opts.max_bandwidth = OPUS_BANDWIDTH_WIDEBAND;
  212. break;
  213. case 12000:
  214. opus->opts.max_bandwidth = OPUS_BANDWIDTH_SUPERWIDEBAND;
  215. break;
  216. case 20000:
  217. opus->opts.max_bandwidth = OPUS_BANDWIDTH_FULLBAND;
  218. break;
  219. default:
  220. av_log(avctx, AV_LOG_WARNING,
  221. "Invalid frequency cutoff: %d. Using default maximum bandwidth.\n"
  222. "Cutoff frequency must be exactly one of: 4000, 6000, 8000, 12000 or 20000.\n",
  223. avctx->cutoff);
  224. avctx->cutoff = 0;
  225. }
  226. }
  227. enc = opus_multistream_encoder_create(avctx->sample_rate, avctx->channels,
  228. opus->stream_count,
  229. coupled_stream_count,
  230. channel_mapping,
  231. opus->opts.application, &ret);
  232. if (ret != OPUS_OK) {
  233. av_log(avctx, AV_LOG_ERROR,
  234. "Failed to create encoder: %s\n", opus_strerror(ret));
  235. return ff_opus_error_to_averror(ret);
  236. }
  237. ret = libopus_configure_encoder(avctx, enc, &opus->opts);
  238. if (ret != OPUS_OK) {
  239. ret = ff_opus_error_to_averror(ret);
  240. goto fail;
  241. }
  242. header_size = 19 + (avctx->channels > 2 ? 2 + avctx->channels : 0);
  243. avctx->extradata = av_malloc(header_size + FF_INPUT_BUFFER_PADDING_SIZE);
  244. if (!avctx->extradata) {
  245. av_log(avctx, AV_LOG_ERROR, "Failed to allocate extradata.\n");
  246. ret = AVERROR(ENOMEM);
  247. goto fail;
  248. }
  249. avctx->extradata_size = header_size;
  250. opus->samples = av_mallocz_array(frame_size, avctx->channels *
  251. av_get_bytes_per_sample(avctx->sample_fmt));
  252. if (!opus->samples) {
  253. av_log(avctx, AV_LOG_ERROR, "Failed to allocate samples buffer.\n");
  254. ret = AVERROR(ENOMEM);
  255. goto fail;
  256. }
  257. ret = opus_multistream_encoder_ctl(enc, OPUS_GET_LOOKAHEAD(&avctx->initial_padding));
  258. if (ret != OPUS_OK)
  259. av_log(avctx, AV_LOG_WARNING,
  260. "Unable to get number of lookahead samples: %s\n",
  261. opus_strerror(ret));
  262. libopus_write_header(avctx, opus->stream_count, coupled_stream_count,
  263. opus_vorbis_channel_map[avctx->channels - 1]);
  264. ff_af_queue_init(avctx, &opus->afq);
  265. opus->enc = enc;
  266. return 0;
  267. fail:
  268. opus_multistream_encoder_destroy(enc);
  269. av_freep(&avctx->extradata);
  270. return ret;
  271. }
  272. static int libopus_encode(AVCodecContext *avctx, AVPacket *avpkt,
  273. const AVFrame *frame, int *got_packet_ptr)
  274. {
  275. LibopusEncContext *opus = avctx->priv_data;
  276. const int sample_size = avctx->channels *
  277. av_get_bytes_per_sample(avctx->sample_fmt);
  278. uint8_t *audio;
  279. int ret;
  280. int discard_padding;
  281. if (frame) {
  282. ret = ff_af_queue_add(&opus->afq, frame);
  283. if (ret < 0)
  284. return ret;
  285. if (frame->nb_samples < opus->opts.packet_size) {
  286. audio = opus->samples;
  287. memcpy(audio, frame->data[0], frame->nb_samples * sample_size);
  288. } else
  289. audio = frame->data[0];
  290. } else {
  291. if (!opus->afq.remaining_samples)
  292. return 0;
  293. audio = opus->samples;
  294. memset(audio, 0, opus->opts.packet_size * sample_size);
  295. }
  296. /* Maximum packet size taken from opusenc in opus-tools. 60ms packets
  297. * consist of 3 frames in one packet. The maximum frame size is 1275
  298. * bytes along with the largest possible packet header of 7 bytes. */
  299. if ((ret = ff_alloc_packet2(avctx, avpkt, (1275 * 3 + 7) * opus->stream_count)) < 0)
  300. return ret;
  301. if (avctx->sample_fmt == AV_SAMPLE_FMT_FLT)
  302. ret = opus_multistream_encode_float(opus->enc, (float *)audio,
  303. opus->opts.packet_size,
  304. avpkt->data, avpkt->size);
  305. else
  306. ret = opus_multistream_encode(opus->enc, (opus_int16 *)audio,
  307. opus->opts.packet_size,
  308. avpkt->data, avpkt->size);
  309. if (ret < 0) {
  310. av_log(avctx, AV_LOG_ERROR,
  311. "Error encoding frame: %s\n", opus_strerror(ret));
  312. return ff_opus_error_to_averror(ret);
  313. }
  314. av_shrink_packet(avpkt, ret);
  315. ff_af_queue_remove(&opus->afq, opus->opts.packet_size,
  316. &avpkt->pts, &avpkt->duration);
  317. discard_padding = opus->opts.packet_size - avpkt->duration;
  318. // Check if subtraction resulted in an overflow
  319. if ((discard_padding < opus->opts.packet_size) != (avpkt->duration > 0)) {
  320. av_free_packet(avpkt);
  321. av_free(avpkt);
  322. return AVERROR(EINVAL);
  323. }
  324. if (discard_padding > 0) {
  325. uint8_t* side_data = av_packet_new_side_data(avpkt,
  326. AV_PKT_DATA_SKIP_SAMPLES,
  327. 10);
  328. if(!side_data) {
  329. av_free_packet(avpkt);
  330. av_free(avpkt);
  331. return AVERROR(ENOMEM);
  332. }
  333. AV_WL32(side_data + 4, discard_padding);
  334. }
  335. *got_packet_ptr = 1;
  336. return 0;
  337. }
  338. static av_cold int libopus_encode_close(AVCodecContext *avctx)
  339. {
  340. LibopusEncContext *opus = avctx->priv_data;
  341. opus_multistream_encoder_destroy(opus->enc);
  342. ff_af_queue_close(&opus->afq);
  343. av_freep(&opus->samples);
  344. av_freep(&avctx->extradata);
  345. return 0;
  346. }
  347. #define OFFSET(x) offsetof(LibopusEncContext, opts.x)
  348. #define FLAGS AV_OPT_FLAG_AUDIO_PARAM | AV_OPT_FLAG_ENCODING_PARAM
  349. static const AVOption libopus_options[] = {
  350. { "application", "Intended application type", OFFSET(application), AV_OPT_TYPE_INT, { .i64 = OPUS_APPLICATION_AUDIO }, OPUS_APPLICATION_VOIP, OPUS_APPLICATION_RESTRICTED_LOWDELAY, FLAGS, "application" },
  351. { "voip", "Favor improved speech intelligibility", 0, AV_OPT_TYPE_CONST, { .i64 = OPUS_APPLICATION_VOIP }, 0, 0, FLAGS, "application" },
  352. { "audio", "Favor faithfulness to the input", 0, AV_OPT_TYPE_CONST, { .i64 = OPUS_APPLICATION_AUDIO }, 0, 0, FLAGS, "application" },
  353. { "lowdelay", "Restrict to only the lowest delay modes", 0, AV_OPT_TYPE_CONST, { .i64 = OPUS_APPLICATION_RESTRICTED_LOWDELAY }, 0, 0, FLAGS, "application" },
  354. { "frame_duration", "Duration of a frame in milliseconds", OFFSET(frame_duration), AV_OPT_TYPE_FLOAT, { .dbl = 20.0 }, 2.5, 60.0, FLAGS },
  355. { "packet_loss", "Expected packet loss percentage", OFFSET(packet_loss), AV_OPT_TYPE_INT, { .i64 = 0 }, 0, 100, FLAGS },
  356. { "vbr", "Variable bit rate mode", OFFSET(vbr), AV_OPT_TYPE_INT, { .i64 = 1 }, 0, 2, FLAGS, "vbr" },
  357. { "off", "Use constant bit rate", 0, AV_OPT_TYPE_CONST, { .i64 = 0 }, 0, 0, FLAGS, "vbr" },
  358. { "on", "Use variable bit rate", 0, AV_OPT_TYPE_CONST, { .i64 = 1 }, 0, 0, FLAGS, "vbr" },
  359. { "constrained", "Use constrained VBR", 0, AV_OPT_TYPE_CONST, { .i64 = 2 }, 0, 0, FLAGS, "vbr" },
  360. { NULL },
  361. };
  362. static const AVClass libopus_class = {
  363. .class_name = "libopus",
  364. .item_name = av_default_item_name,
  365. .option = libopus_options,
  366. .version = LIBAVUTIL_VERSION_INT,
  367. };
  368. static const AVCodecDefault libopus_defaults[] = {
  369. { "b", "0" },
  370. { "compression_level", "10" },
  371. { NULL },
  372. };
  373. static const int libopus_sample_rates[] = {
  374. 48000, 24000, 16000, 12000, 8000, 0,
  375. };
  376. AVCodec ff_libopus_encoder = {
  377. .name = "libopus",
  378. .long_name = NULL_IF_CONFIG_SMALL("libopus Opus"),
  379. .type = AVMEDIA_TYPE_AUDIO,
  380. .id = AV_CODEC_ID_OPUS,
  381. .priv_data_size = sizeof(LibopusEncContext),
  382. .init = libopus_encode_init,
  383. .encode2 = libopus_encode,
  384. .close = libopus_encode_close,
  385. .capabilities = CODEC_CAP_DELAY | CODEC_CAP_SMALL_LAST_FRAME,
  386. .sample_fmts = (const enum AVSampleFormat[]){ AV_SAMPLE_FMT_S16,
  387. AV_SAMPLE_FMT_FLT,
  388. AV_SAMPLE_FMT_NONE },
  389. .channel_layouts = ff_vorbis_channel_layouts,
  390. .supported_samplerates = libopus_sample_rates,
  391. .priv_class = &libopus_class,
  392. .defaults = libopus_defaults,
  393. };