You can not select more than 25 topics Topics must start with a letter or number, can include dashes ('-') and can be up to 35 characters long.

478 lines
18KB

  1. /*
  2. * AAC encoder wrapper
  3. * Copyright (c) 2012 Martin Storsjo
  4. *
  5. * This file is part of FFmpeg.
  6. *
  7. * Permission to use, copy, modify, and/or distribute this software for any
  8. * purpose with or without fee is hereby granted, provided that the above
  9. * copyright notice and this permission notice appear in all copies.
  10. *
  11. * THE SOFTWARE IS PROVIDED "AS IS" AND THE AUTHOR DISCLAIMS ALL WARRANTIES
  12. * WITH REGARD TO THIS SOFTWARE INCLUDING ALL IMPLIED WARRANTIES OF
  13. * MERCHANTABILITY AND FITNESS. IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR
  14. * ANY SPECIAL, DIRECT, INDIRECT, OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES
  15. * WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN AN
  16. * ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING OUT OF
  17. * OR IN CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE.
  18. */
  19. #include <fdk-aac/aacenc_lib.h>
  20. #include "libavutil/channel_layout.h"
  21. #include "libavutil/common.h"
  22. #include "libavutil/opt.h"
  23. #include "avcodec.h"
  24. #include "audio_frame_queue.h"
  25. #include "internal.h"
  26. #ifdef AACENCODER_LIB_VL0
  27. #define FDKENC_VER_AT_LEAST(vl0, vl1) \
  28. ((AACENCODER_LIB_VL0 > vl0) || \
  29. (AACENCODER_LIB_VL0 == vl0 && AACENCODER_LIB_VL1 >= vl1))
  30. #else
  31. #define FDKENC_VER_AT_LEAST(vl0, vl1) 0
  32. #endif
  33. typedef struct AACContext {
  34. const AVClass *class;
  35. HANDLE_AACENCODER handle;
  36. int afterburner;
  37. int eld_sbr;
  38. int eld_v2;
  39. int signaling;
  40. int latm;
  41. int header_period;
  42. int vbr;
  43. AudioFrameQueue afq;
  44. } AACContext;
  45. static const AVOption aac_enc_options[] = {
  46. { "afterburner", "Afterburner (improved quality)", offsetof(AACContext, afterburner), AV_OPT_TYPE_INT, { .i64 = 1 }, 0, 1, AV_OPT_FLAG_AUDIO_PARAM | AV_OPT_FLAG_ENCODING_PARAM },
  47. { "eld_sbr", "Enable SBR for ELD (for SBR in other configurations, use the -profile parameter)", offsetof(AACContext, eld_sbr), AV_OPT_TYPE_INT, { .i64 = 0 }, 0, 1, AV_OPT_FLAG_AUDIO_PARAM | AV_OPT_FLAG_ENCODING_PARAM },
  48. #if FDKENC_VER_AT_LEAST(4, 0) // 4.0.0
  49. { "eld_v2", "Enable ELDv2 (LD-MPS extension for ELD stereo signals)", offsetof(AACContext, eld_v2), AV_OPT_TYPE_INT, { .i64 = 0 }, 0, 1, AV_OPT_FLAG_AUDIO_PARAM | AV_OPT_FLAG_ENCODING_PARAM },
  50. #endif
  51. { "signaling", "SBR/PS signaling style", offsetof(AACContext, signaling), AV_OPT_TYPE_INT, { .i64 = -1 }, -1, 2, AV_OPT_FLAG_AUDIO_PARAM | AV_OPT_FLAG_ENCODING_PARAM, "signaling" },
  52. { "default", "Choose signaling implicitly (explicit hierarchical by default, implicit if global header is disabled)", 0, AV_OPT_TYPE_CONST, { .i64 = -1 }, 0, 0, AV_OPT_FLAG_AUDIO_PARAM | AV_OPT_FLAG_ENCODING_PARAM, "signaling" },
  53. { "implicit", "Implicit backwards compatible signaling", 0, AV_OPT_TYPE_CONST, { .i64 = 0 }, 0, 0, AV_OPT_FLAG_AUDIO_PARAM | AV_OPT_FLAG_ENCODING_PARAM, "signaling" },
  54. { "explicit_sbr", "Explicit SBR, implicit PS signaling", 0, AV_OPT_TYPE_CONST, { .i64 = 1 }, 0, 0, AV_OPT_FLAG_AUDIO_PARAM | AV_OPT_FLAG_ENCODING_PARAM, "signaling" },
  55. { "explicit_hierarchical", "Explicit hierarchical signaling", 0, AV_OPT_TYPE_CONST, { .i64 = 2 }, 0, 0, AV_OPT_FLAG_AUDIO_PARAM | AV_OPT_FLAG_ENCODING_PARAM, "signaling" },
  56. { "latm", "Output LATM/LOAS encapsulated data", offsetof(AACContext, latm), AV_OPT_TYPE_INT, { .i64 = 0 }, 0, 1, AV_OPT_FLAG_AUDIO_PARAM | AV_OPT_FLAG_ENCODING_PARAM },
  57. { "header_period", "StreamMuxConfig and PCE repetition period (in frames)", offsetof(AACContext, header_period), AV_OPT_TYPE_INT, { .i64 = 0 }, 0, 0xffff, AV_OPT_FLAG_AUDIO_PARAM | AV_OPT_FLAG_ENCODING_PARAM },
  58. { "vbr", "VBR mode (1-5)", offsetof(AACContext, vbr), AV_OPT_TYPE_INT, { .i64 = 0 }, 0, 5, AV_OPT_FLAG_AUDIO_PARAM | AV_OPT_FLAG_ENCODING_PARAM },
  59. { NULL }
  60. };
  61. static const AVClass aac_enc_class = {
  62. .class_name = "libfdk_aac",
  63. .item_name = av_default_item_name,
  64. .option = aac_enc_options,
  65. .version = LIBAVUTIL_VERSION_INT,
  66. };
  67. static const char *aac_get_error(AACENC_ERROR err)
  68. {
  69. switch (err) {
  70. case AACENC_OK:
  71. return "No error";
  72. case AACENC_INVALID_HANDLE:
  73. return "Invalid handle";
  74. case AACENC_MEMORY_ERROR:
  75. return "Memory allocation error";
  76. case AACENC_UNSUPPORTED_PARAMETER:
  77. return "Unsupported parameter";
  78. case AACENC_INVALID_CONFIG:
  79. return "Invalid config";
  80. case AACENC_INIT_ERROR:
  81. return "Initialization error";
  82. case AACENC_INIT_AAC_ERROR:
  83. return "AAC library initialization error";
  84. case AACENC_INIT_SBR_ERROR:
  85. return "SBR library initialization error";
  86. case AACENC_INIT_TP_ERROR:
  87. return "Transport library initialization error";
  88. case AACENC_INIT_META_ERROR:
  89. return "Metadata library initialization error";
  90. case AACENC_ENCODE_ERROR:
  91. return "Encoding error";
  92. case AACENC_ENCODE_EOF:
  93. return "End of file";
  94. default:
  95. return "Unknown error";
  96. }
  97. }
  98. static int aac_encode_close(AVCodecContext *avctx)
  99. {
  100. AACContext *s = avctx->priv_data;
  101. if (s->handle)
  102. aacEncClose(&s->handle);
  103. av_freep(&avctx->extradata);
  104. ff_af_queue_close(&s->afq);
  105. return 0;
  106. }
  107. static av_cold int aac_encode_init(AVCodecContext *avctx)
  108. {
  109. AACContext *s = avctx->priv_data;
  110. int ret = AVERROR(EINVAL);
  111. AACENC_InfoStruct info = { 0 };
  112. CHANNEL_MODE mode;
  113. AACENC_ERROR err;
  114. int aot = FF_PROFILE_AAC_LOW + 1;
  115. int sce = 0, cpe = 0;
  116. if ((err = aacEncOpen(&s->handle, 0, avctx->channels)) != AACENC_OK) {
  117. av_log(avctx, AV_LOG_ERROR, "Unable to open the encoder: %s\n",
  118. aac_get_error(err));
  119. goto error;
  120. }
  121. if (avctx->profile != FF_PROFILE_UNKNOWN)
  122. aot = avctx->profile + 1;
  123. if ((err = aacEncoder_SetParam(s->handle, AACENC_AOT, aot)) != AACENC_OK) {
  124. av_log(avctx, AV_LOG_ERROR, "Unable to set the AOT %d: %s\n",
  125. aot, aac_get_error(err));
  126. goto error;
  127. }
  128. if (aot == FF_PROFILE_AAC_ELD + 1 && s->eld_sbr) {
  129. if ((err = aacEncoder_SetParam(s->handle, AACENC_SBR_MODE,
  130. 1)) != AACENC_OK) {
  131. av_log(avctx, AV_LOG_ERROR, "Unable to enable SBR for ELD: %s\n",
  132. aac_get_error(err));
  133. goto error;
  134. }
  135. }
  136. if ((err = aacEncoder_SetParam(s->handle, AACENC_SAMPLERATE,
  137. avctx->sample_rate)) != AACENC_OK) {
  138. av_log(avctx, AV_LOG_ERROR, "Unable to set the sample rate %d: %s\n",
  139. avctx->sample_rate, aac_get_error(err));
  140. goto error;
  141. }
  142. switch (avctx->channels) {
  143. case 1: mode = MODE_1; sce = 1; cpe = 0; break;
  144. case 2:
  145. #if FDKENC_VER_AT_LEAST(4, 0) // 4.0.0
  146. // (profile + 1) to map from profile range to AOT range
  147. if (aot == FF_PROFILE_AAC_ELD + 1 && s->eld_v2) {
  148. if ((err = aacEncoder_SetParam(s->handle, AACENC_CHANNELMODE,
  149. 128)) != AACENC_OK) {
  150. av_log(avctx, AV_LOG_ERROR, "Unable to enable ELDv2: %s\n",
  151. aac_get_error(err));
  152. goto error;
  153. } else {
  154. mode = MODE_212;
  155. sce = 1;
  156. cpe = 0;
  157. }
  158. } else
  159. #endif
  160. {
  161. mode = MODE_2;
  162. sce = 0;
  163. cpe = 1;
  164. }
  165. break;
  166. case 3: mode = MODE_1_2; sce = 1; cpe = 1; break;
  167. case 4: mode = MODE_1_2_1; sce = 2; cpe = 1; break;
  168. case 5: mode = MODE_1_2_2; sce = 1; cpe = 2; break;
  169. case 6: mode = MODE_1_2_2_1; sce = 2; cpe = 2; break;
  170. /* The version macro is introduced the same time as the 7.1 support, so this
  171. should suffice. */
  172. #if FDKENC_VER_AT_LEAST(3, 4) // 3.4.12
  173. case 8:
  174. sce = 2;
  175. cpe = 3;
  176. if (avctx->channel_layout == AV_CH_LAYOUT_7POINT1) {
  177. mode = MODE_7_1_REAR_SURROUND;
  178. } else {
  179. // MODE_1_2_2_2_1 and MODE_7_1_FRONT_CENTER use the same channel layout
  180. mode = MODE_7_1_FRONT_CENTER;
  181. }
  182. break;
  183. #endif
  184. default:
  185. av_log(avctx, AV_LOG_ERROR,
  186. "Unsupported number of channels %d\n", avctx->channels);
  187. goto error;
  188. }
  189. if ((err = aacEncoder_SetParam(s->handle, AACENC_CHANNELMODE,
  190. mode)) != AACENC_OK) {
  191. av_log(avctx, AV_LOG_ERROR,
  192. "Unable to set channel mode %d: %s\n", mode, aac_get_error(err));
  193. goto error;
  194. }
  195. if ((err = aacEncoder_SetParam(s->handle, AACENC_CHANNELORDER,
  196. 1)) != AACENC_OK) {
  197. av_log(avctx, AV_LOG_ERROR,
  198. "Unable to set wav channel order %d: %s\n",
  199. mode, aac_get_error(err));
  200. goto error;
  201. }
  202. if (avctx->flags & AV_CODEC_FLAG_QSCALE || s->vbr) {
  203. int mode = s->vbr ? s->vbr : avctx->global_quality;
  204. if (mode < 1 || mode > 5) {
  205. av_log(avctx, AV_LOG_WARNING,
  206. "VBR quality %d out of range, should be 1-5\n", mode);
  207. mode = av_clip(mode, 1, 5);
  208. }
  209. av_log(avctx, AV_LOG_WARNING,
  210. "Note, the VBR setting is unsupported and only works with "
  211. "some parameter combinations\n");
  212. if ((err = aacEncoder_SetParam(s->handle, AACENC_BITRATEMODE,
  213. mode)) != AACENC_OK) {
  214. av_log(avctx, AV_LOG_ERROR, "Unable to set the VBR bitrate mode %d: %s\n",
  215. mode, aac_get_error(err));
  216. goto error;
  217. }
  218. } else {
  219. if (avctx->bit_rate <= 0) {
  220. if (avctx->profile == FF_PROFILE_AAC_HE_V2) {
  221. sce = 1;
  222. cpe = 0;
  223. }
  224. avctx->bit_rate = (96*sce + 128*cpe) * avctx->sample_rate / 44;
  225. if (avctx->profile == FF_PROFILE_AAC_HE ||
  226. avctx->profile == FF_PROFILE_AAC_HE_V2 ||
  227. avctx->profile == FF_PROFILE_MPEG2_AAC_HE ||
  228. s->eld_sbr)
  229. avctx->bit_rate /= 2;
  230. }
  231. if ((err = aacEncoder_SetParam(s->handle, AACENC_BITRATE,
  232. avctx->bit_rate)) != AACENC_OK) {
  233. av_log(avctx, AV_LOG_ERROR, "Unable to set the bitrate %"PRId64": %s\n",
  234. avctx->bit_rate, aac_get_error(err));
  235. goto error;
  236. }
  237. }
  238. /* Choose bitstream format - if global header is requested, use
  239. * raw access units, otherwise use ADTS. */
  240. if ((err = aacEncoder_SetParam(s->handle, AACENC_TRANSMUX,
  241. avctx->flags & AV_CODEC_FLAG_GLOBAL_HEADER ? TT_MP4_RAW :
  242. s->latm ? TT_MP4_LOAS : TT_MP4_ADTS)) != AACENC_OK) {
  243. av_log(avctx, AV_LOG_ERROR, "Unable to set the transmux format: %s\n",
  244. aac_get_error(err));
  245. goto error;
  246. }
  247. if (s->latm && s->header_period) {
  248. if ((err = aacEncoder_SetParam(s->handle, AACENC_HEADER_PERIOD,
  249. s->header_period)) != AACENC_OK) {
  250. av_log(avctx, AV_LOG_ERROR, "Unable to set header period: %s\n",
  251. aac_get_error(err));
  252. goto error;
  253. }
  254. }
  255. /* If no signaling mode is chosen, use explicit hierarchical signaling
  256. * if using mp4 mode (raw access units, with global header) and
  257. * implicit signaling if using ADTS. */
  258. if (s->signaling < 0)
  259. s->signaling = avctx->flags & AV_CODEC_FLAG_GLOBAL_HEADER ? 2 : 0;
  260. if ((err = aacEncoder_SetParam(s->handle, AACENC_SIGNALING_MODE,
  261. s->signaling)) != AACENC_OK) {
  262. av_log(avctx, AV_LOG_ERROR, "Unable to set signaling mode %d: %s\n",
  263. s->signaling, aac_get_error(err));
  264. goto error;
  265. }
  266. if ((err = aacEncoder_SetParam(s->handle, AACENC_AFTERBURNER,
  267. s->afterburner)) != AACENC_OK) {
  268. av_log(avctx, AV_LOG_ERROR, "Unable to set afterburner to %d: %s\n",
  269. s->afterburner, aac_get_error(err));
  270. goto error;
  271. }
  272. if (avctx->cutoff > 0) {
  273. if (avctx->cutoff < (avctx->sample_rate + 255) >> 8 || avctx->cutoff > 20000) {
  274. av_log(avctx, AV_LOG_ERROR, "cutoff valid range is %d-20000\n",
  275. (avctx->sample_rate + 255) >> 8);
  276. goto error;
  277. }
  278. if ((err = aacEncoder_SetParam(s->handle, AACENC_BANDWIDTH,
  279. avctx->cutoff)) != AACENC_OK) {
  280. av_log(avctx, AV_LOG_ERROR, "Unable to set the encoder bandwidth to %d: %s\n",
  281. avctx->cutoff, aac_get_error(err));
  282. goto error;
  283. }
  284. }
  285. if ((err = aacEncEncode(s->handle, NULL, NULL, NULL, NULL)) != AACENC_OK) {
  286. av_log(avctx, AV_LOG_ERROR, "Unable to initialize the encoder: %s\n",
  287. aac_get_error(err));
  288. return AVERROR(EINVAL);
  289. }
  290. if ((err = aacEncInfo(s->handle, &info)) != AACENC_OK) {
  291. av_log(avctx, AV_LOG_ERROR, "Unable to get encoder info: %s\n",
  292. aac_get_error(err));
  293. goto error;
  294. }
  295. avctx->frame_size = info.frameLength;
  296. #if FDKENC_VER_AT_LEAST(4, 0) // 4.0.0
  297. avctx->initial_padding = info.nDelay;
  298. #else
  299. avctx->initial_padding = info.encoderDelay;
  300. #endif
  301. ff_af_queue_init(avctx, &s->afq);
  302. if (avctx->flags & AV_CODEC_FLAG_GLOBAL_HEADER) {
  303. avctx->extradata_size = info.confSize;
  304. avctx->extradata = av_mallocz(avctx->extradata_size +
  305. AV_INPUT_BUFFER_PADDING_SIZE);
  306. if (!avctx->extradata) {
  307. ret = AVERROR(ENOMEM);
  308. goto error;
  309. }
  310. memcpy(avctx->extradata, info.confBuf, info.confSize);
  311. }
  312. return 0;
  313. error:
  314. aac_encode_close(avctx);
  315. return ret;
  316. }
  317. static int aac_encode_frame(AVCodecContext *avctx, AVPacket *avpkt,
  318. const AVFrame *frame, int *got_packet_ptr)
  319. {
  320. AACContext *s = avctx->priv_data;
  321. AACENC_BufDesc in_buf = { 0 }, out_buf = { 0 };
  322. AACENC_InArgs in_args = { 0 };
  323. AACENC_OutArgs out_args = { 0 };
  324. int in_buffer_identifier = IN_AUDIO_DATA;
  325. int in_buffer_size, in_buffer_element_size;
  326. int out_buffer_identifier = OUT_BITSTREAM_DATA;
  327. int out_buffer_size, out_buffer_element_size;
  328. void *in_ptr, *out_ptr;
  329. int ret;
  330. uint8_t dummy_buf[1];
  331. AACENC_ERROR err;
  332. /* handle end-of-stream small frame and flushing */
  333. if (!frame) {
  334. /* Must be a non-null pointer, even if it's a dummy. We could use
  335. * the address of anything else on the stack as well. */
  336. in_ptr = dummy_buf;
  337. in_buffer_size = 0;
  338. in_args.numInSamples = -1;
  339. } else {
  340. in_ptr = frame->data[0];
  341. in_buffer_size = 2 * avctx->channels * frame->nb_samples;
  342. in_args.numInSamples = avctx->channels * frame->nb_samples;
  343. /* add current frame to the queue */
  344. if ((ret = ff_af_queue_add(&s->afq, frame)) < 0)
  345. return ret;
  346. }
  347. in_buffer_element_size = 2;
  348. in_buf.numBufs = 1;
  349. in_buf.bufs = &in_ptr;
  350. in_buf.bufferIdentifiers = &in_buffer_identifier;
  351. in_buf.bufSizes = &in_buffer_size;
  352. in_buf.bufElSizes = &in_buffer_element_size;
  353. /* The maximum packet size is 6144 bits aka 768 bytes per channel. */
  354. if ((ret = ff_alloc_packet2(avctx, avpkt, FFMAX(8192, 768 * avctx->channels), 0)) < 0)
  355. return ret;
  356. out_ptr = avpkt->data;
  357. out_buffer_size = avpkt->size;
  358. out_buffer_element_size = 1;
  359. out_buf.numBufs = 1;
  360. out_buf.bufs = &out_ptr;
  361. out_buf.bufferIdentifiers = &out_buffer_identifier;
  362. out_buf.bufSizes = &out_buffer_size;
  363. out_buf.bufElSizes = &out_buffer_element_size;
  364. if ((err = aacEncEncode(s->handle, &in_buf, &out_buf, &in_args,
  365. &out_args)) != AACENC_OK) {
  366. if (!frame && err == AACENC_ENCODE_EOF)
  367. return 0;
  368. av_log(avctx, AV_LOG_ERROR, "Unable to encode frame: %s\n",
  369. aac_get_error(err));
  370. return AVERROR(EINVAL);
  371. }
  372. if (!out_args.numOutBytes)
  373. return 0;
  374. /* Get the next frame pts & duration */
  375. ff_af_queue_remove(&s->afq, avctx->frame_size, &avpkt->pts,
  376. &avpkt->duration);
  377. avpkt->size = out_args.numOutBytes;
  378. *got_packet_ptr = 1;
  379. return 0;
  380. }
  381. static const AVProfile profiles[] = {
  382. { FF_PROFILE_AAC_LOW, "LC" },
  383. { FF_PROFILE_AAC_HE, "HE-AAC" },
  384. { FF_PROFILE_AAC_HE_V2, "HE-AACv2" },
  385. { FF_PROFILE_AAC_LD, "LD" },
  386. { FF_PROFILE_AAC_ELD, "ELD" },
  387. { FF_PROFILE_UNKNOWN },
  388. };
  389. static const AVCodecDefault aac_encode_defaults[] = {
  390. { "b", "0" },
  391. { NULL }
  392. };
  393. static const uint64_t aac_channel_layout[] = {
  394. AV_CH_LAYOUT_MONO,
  395. AV_CH_LAYOUT_STEREO,
  396. AV_CH_LAYOUT_SURROUND,
  397. AV_CH_LAYOUT_4POINT0,
  398. AV_CH_LAYOUT_5POINT0_BACK,
  399. AV_CH_LAYOUT_5POINT1_BACK,
  400. #if FDKENC_VER_AT_LEAST(3, 4) // 3.4.12
  401. AV_CH_LAYOUT_7POINT1_WIDE_BACK,
  402. AV_CH_LAYOUT_7POINT1,
  403. #endif
  404. 0,
  405. };
  406. static const int aac_sample_rates[] = {
  407. 96000, 88200, 64000, 48000, 44100, 32000,
  408. 24000, 22050, 16000, 12000, 11025, 8000, 0
  409. };
  410. AVCodec ff_libfdk_aac_encoder = {
  411. .name = "libfdk_aac",
  412. .long_name = NULL_IF_CONFIG_SMALL("Fraunhofer FDK AAC"),
  413. .type = AVMEDIA_TYPE_AUDIO,
  414. .id = AV_CODEC_ID_AAC,
  415. .priv_data_size = sizeof(AACContext),
  416. .init = aac_encode_init,
  417. .encode2 = aac_encode_frame,
  418. .close = aac_encode_close,
  419. .capabilities = AV_CODEC_CAP_SMALL_LAST_FRAME | AV_CODEC_CAP_DELAY,
  420. .sample_fmts = (const enum AVSampleFormat[]){ AV_SAMPLE_FMT_S16,
  421. AV_SAMPLE_FMT_NONE },
  422. .priv_class = &aac_enc_class,
  423. .defaults = aac_encode_defaults,
  424. .profiles = profiles,
  425. .supported_samplerates = aac_sample_rates,
  426. .channel_layouts = aac_channel_layout,
  427. .wrapper_name = "libfdk",
  428. };