You can not select more than 25 topics Topics must start with a letter or number, can include dashes ('-') and can be up to 35 characters long.

584 lines
18KB

  1. /*
  2. * AAC decoder
  3. * Copyright (c) 2005-2006 Oded Shimon ( ods15 ods15 dyndns org )
  4. * Copyright (c) 2006-2007 Maxim Gavrilov ( maxim.gavrilov gmail com )
  5. * Copyright (c) 2008-2013 Alex Converse <alex.converse@gmail.com>
  6. *
  7. * AAC LATM decoder
  8. * Copyright (c) 2008-2010 Paul Kendall <paul@kcbbs.gen.nz>
  9. * Copyright (c) 2010 Janne Grunau <janne-libav@jannau.net>
  10. *
  11. * This file is part of FFmpeg.
  12. *
  13. * FFmpeg is free software; you can redistribute it and/or
  14. * modify it under the terms of the GNU Lesser General Public
  15. * License as published by the Free Software Foundation; either
  16. * version 2.1 of the License, or (at your option) any later version.
  17. *
  18. * FFmpeg is distributed in the hope that it will be useful,
  19. * but WITHOUT ANY WARRANTY; without even the implied warranty of
  20. * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
  21. * Lesser General Public License for more details.
  22. *
  23. * You should have received a copy of the GNU Lesser General Public
  24. * License along with FFmpeg; if not, write to the Free Software
  25. * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
  26. */
  27. /**
  28. * @file
  29. * AAC decoder
  30. * @author Oded Shimon ( ods15 ods15 dyndns org )
  31. * @author Maxim Gavrilov ( maxim.gavrilov gmail com )
  32. */
  33. #define FFT_FLOAT 1
  34. #define FFT_FIXED_32 0
  35. #define USE_FIXED 0
  36. #include "libavutil/float_dsp.h"
  37. #include "libavutil/opt.h"
  38. #include "avcodec.h"
  39. #include "internal.h"
  40. #include "get_bits.h"
  41. #include "fft.h"
  42. #include "imdct15.h"
  43. #include "lpc.h"
  44. #include "kbdwin.h"
  45. #include "sinewin.h"
  46. #include "aac.h"
  47. #include "aactab.h"
  48. #include "aacdectab.h"
  49. #include "cbrt_tablegen.h"
  50. #include "sbr.h"
  51. #include "aacsbr.h"
  52. #include "mpeg4audio.h"
  53. #include "aacadtsdec.h"
  54. #include "libavutil/intfloat.h"
  55. #include <errno.h>
  56. #include <math.h>
  57. #include <stdint.h>
  58. #include <string.h>
  59. #if ARCH_ARM
  60. # include "arm/aac.h"
  61. #elif ARCH_MIPS
  62. # include "mips/aacdec_mips.h"
  63. #endif
  64. static av_always_inline void reset_predict_state(PredictorState *ps)
  65. {
  66. ps->r0 = 0.0f;
  67. ps->r1 = 0.0f;
  68. ps->cor0 = 0.0f;
  69. ps->cor1 = 0.0f;
  70. ps->var0 = 1.0f;
  71. ps->var1 = 1.0f;
  72. }
  73. #ifndef VMUL2
  74. static inline float *VMUL2(float *dst, const float *v, unsigned idx,
  75. const float *scale)
  76. {
  77. float s = *scale;
  78. *dst++ = v[idx & 15] * s;
  79. *dst++ = v[idx>>4 & 15] * s;
  80. return dst;
  81. }
  82. #endif
  83. #ifndef VMUL4
  84. static inline float *VMUL4(float *dst, const float *v, unsigned idx,
  85. const float *scale)
  86. {
  87. float s = *scale;
  88. *dst++ = v[idx & 3] * s;
  89. *dst++ = v[idx>>2 & 3] * s;
  90. *dst++ = v[idx>>4 & 3] * s;
  91. *dst++ = v[idx>>6 & 3] * s;
  92. return dst;
  93. }
  94. #endif
  95. #ifndef VMUL2S
  96. static inline float *VMUL2S(float *dst, const float *v, unsigned idx,
  97. unsigned sign, const float *scale)
  98. {
  99. union av_intfloat32 s0, s1;
  100. s0.f = s1.f = *scale;
  101. s0.i ^= sign >> 1 << 31;
  102. s1.i ^= sign << 31;
  103. *dst++ = v[idx & 15] * s0.f;
  104. *dst++ = v[idx>>4 & 15] * s1.f;
  105. return dst;
  106. }
  107. #endif
  108. #ifndef VMUL4S
  109. static inline float *VMUL4S(float *dst, const float *v, unsigned idx,
  110. unsigned sign, const float *scale)
  111. {
  112. unsigned nz = idx >> 12;
  113. union av_intfloat32 s = { .f = *scale };
  114. union av_intfloat32 t;
  115. t.i = s.i ^ (sign & 1U<<31);
  116. *dst++ = v[idx & 3] * t.f;
  117. sign <<= nz & 1; nz >>= 1;
  118. t.i = s.i ^ (sign & 1U<<31);
  119. *dst++ = v[idx>>2 & 3] * t.f;
  120. sign <<= nz & 1; nz >>= 1;
  121. t.i = s.i ^ (sign & 1U<<31);
  122. *dst++ = v[idx>>4 & 3] * t.f;
  123. sign <<= nz & 1;
  124. t.i = s.i ^ (sign & 1U<<31);
  125. *dst++ = v[idx>>6 & 3] * t.f;
  126. return dst;
  127. }
  128. #endif
  129. static av_always_inline float flt16_round(float pf)
  130. {
  131. union av_intfloat32 tmp;
  132. tmp.f = pf;
  133. tmp.i = (tmp.i + 0x00008000U) & 0xFFFF0000U;
  134. return tmp.f;
  135. }
  136. static av_always_inline float flt16_even(float pf)
  137. {
  138. union av_intfloat32 tmp;
  139. tmp.f = pf;
  140. tmp.i = (tmp.i + 0x00007FFFU + (tmp.i & 0x00010000U >> 16)) & 0xFFFF0000U;
  141. return tmp.f;
  142. }
  143. static av_always_inline float flt16_trunc(float pf)
  144. {
  145. union av_intfloat32 pun;
  146. pun.f = pf;
  147. pun.i &= 0xFFFF0000U;
  148. return pun.f;
  149. }
  150. static av_always_inline void predict(PredictorState *ps, float *coef,
  151. int output_enable)
  152. {
  153. const float a = 0.953125; // 61.0 / 64
  154. const float alpha = 0.90625; // 29.0 / 32
  155. float e0, e1;
  156. float pv;
  157. float k1, k2;
  158. float r0 = ps->r0, r1 = ps->r1;
  159. float cor0 = ps->cor0, cor1 = ps->cor1;
  160. float var0 = ps->var0, var1 = ps->var1;
  161. k1 = var0 > 1 ? cor0 * flt16_even(a / var0) : 0;
  162. k2 = var1 > 1 ? cor1 * flt16_even(a / var1) : 0;
  163. pv = flt16_round(k1 * r0 + k2 * r1);
  164. if (output_enable)
  165. *coef += pv;
  166. e0 = *coef;
  167. e1 = e0 - k1 * r0;
  168. ps->cor1 = flt16_trunc(alpha * cor1 + r1 * e1);
  169. ps->var1 = flt16_trunc(alpha * var1 + 0.5f * (r1 * r1 + e1 * e1));
  170. ps->cor0 = flt16_trunc(alpha * cor0 + r0 * e0);
  171. ps->var0 = flt16_trunc(alpha * var0 + 0.5f * (r0 * r0 + e0 * e0));
  172. ps->r1 = flt16_trunc(a * (r0 - k1 * e0));
  173. ps->r0 = flt16_trunc(a * e0);
  174. }
  175. /**
  176. * Apply dependent channel coupling (applied before IMDCT).
  177. *
  178. * @param index index into coupling gain array
  179. */
  180. static void apply_dependent_coupling(AACContext *ac,
  181. SingleChannelElement *target,
  182. ChannelElement *cce, int index)
  183. {
  184. IndividualChannelStream *ics = &cce->ch[0].ics;
  185. const uint16_t *offsets = ics->swb_offset;
  186. float *dest = target->coeffs;
  187. const float *src = cce->ch[0].coeffs;
  188. int g, i, group, k, idx = 0;
  189. if (ac->oc[1].m4ac.object_type == AOT_AAC_LTP) {
  190. av_log(ac->avctx, AV_LOG_ERROR,
  191. "Dependent coupling is not supported together with LTP\n");
  192. return;
  193. }
  194. for (g = 0; g < ics->num_window_groups; g++) {
  195. for (i = 0; i < ics->max_sfb; i++, idx++) {
  196. if (cce->ch[0].band_type[idx] != ZERO_BT) {
  197. const float gain = cce->coup.gain[index][idx];
  198. for (group = 0; group < ics->group_len[g]; group++) {
  199. for (k = offsets[i]; k < offsets[i + 1]; k++) {
  200. // FIXME: SIMDify
  201. dest[group * 128 + k] += gain * src[group * 128 + k];
  202. }
  203. }
  204. }
  205. }
  206. dest += ics->group_len[g] * 128;
  207. src += ics->group_len[g] * 128;
  208. }
  209. }
  210. /**
  211. * Apply independent channel coupling (applied after IMDCT).
  212. *
  213. * @param index index into coupling gain array
  214. */
  215. static void apply_independent_coupling(AACContext *ac,
  216. SingleChannelElement *target,
  217. ChannelElement *cce, int index)
  218. {
  219. int i;
  220. const float gain = cce->coup.gain[index][0];
  221. const float *src = cce->ch[0].ret;
  222. float *dest = target->ret;
  223. const int len = 1024 << (ac->oc[1].m4ac.sbr == 1);
  224. for (i = 0; i < len; i++)
  225. dest[i] += gain * src[i];
  226. }
  227. #include "aacdec_template.c"
  228. #define LOAS_SYNC_WORD 0x2b7 ///< 11 bits LOAS sync word
  229. struct LATMContext {
  230. AACContext aac_ctx; ///< containing AACContext
  231. int initialized; ///< initialized after a valid extradata was seen
  232. // parser data
  233. int audio_mux_version_A; ///< LATM syntax version
  234. int frame_length_type; ///< 0/1 variable/fixed frame length
  235. int frame_length; ///< frame length for fixed frame length
  236. };
  237. static inline uint32_t latm_get_value(GetBitContext *b)
  238. {
  239. int length = get_bits(b, 2);
  240. return get_bits_long(b, (length+1)*8);
  241. }
  242. static int latm_decode_audio_specific_config(struct LATMContext *latmctx,
  243. GetBitContext *gb, int asclen)
  244. {
  245. AACContext *ac = &latmctx->aac_ctx;
  246. AVCodecContext *avctx = ac->avctx;
  247. MPEG4AudioConfig m4ac = { 0 };
  248. int config_start_bit = get_bits_count(gb);
  249. int sync_extension = 0;
  250. int bits_consumed, esize;
  251. if (asclen) {
  252. sync_extension = 1;
  253. asclen = FFMIN(asclen, get_bits_left(gb));
  254. } else
  255. asclen = get_bits_left(gb);
  256. if (config_start_bit % 8) {
  257. avpriv_request_sample(latmctx->aac_ctx.avctx,
  258. "Non-byte-aligned audio-specific config");
  259. return AVERROR_PATCHWELCOME;
  260. }
  261. if (asclen <= 0)
  262. return AVERROR_INVALIDDATA;
  263. bits_consumed = decode_audio_specific_config(NULL, avctx, &m4ac,
  264. gb->buffer + (config_start_bit / 8),
  265. asclen, sync_extension);
  266. if (bits_consumed < 0)
  267. return AVERROR_INVALIDDATA;
  268. if (!latmctx->initialized ||
  269. ac->oc[1].m4ac.sample_rate != m4ac.sample_rate ||
  270. ac->oc[1].m4ac.chan_config != m4ac.chan_config) {
  271. if(latmctx->initialized) {
  272. av_log(avctx, AV_LOG_INFO, "audio config changed\n");
  273. } else {
  274. av_log(avctx, AV_LOG_DEBUG, "initializing latmctx\n");
  275. }
  276. latmctx->initialized = 0;
  277. esize = (bits_consumed+7) / 8;
  278. if (avctx->extradata_size < esize) {
  279. av_free(avctx->extradata);
  280. avctx->extradata = av_malloc(esize + AV_INPUT_BUFFER_PADDING_SIZE);
  281. if (!avctx->extradata)
  282. return AVERROR(ENOMEM);
  283. }
  284. avctx->extradata_size = esize;
  285. memcpy(avctx->extradata, gb->buffer + (config_start_bit/8), esize);
  286. memset(avctx->extradata+esize, 0, AV_INPUT_BUFFER_PADDING_SIZE);
  287. }
  288. skip_bits_long(gb, bits_consumed);
  289. return bits_consumed;
  290. }
  291. static int read_stream_mux_config(struct LATMContext *latmctx,
  292. GetBitContext *gb)
  293. {
  294. int ret, audio_mux_version = get_bits(gb, 1);
  295. latmctx->audio_mux_version_A = 0;
  296. if (audio_mux_version)
  297. latmctx->audio_mux_version_A = get_bits(gb, 1);
  298. if (!latmctx->audio_mux_version_A) {
  299. if (audio_mux_version)
  300. latm_get_value(gb); // taraFullness
  301. skip_bits(gb, 1); // allStreamSameTimeFraming
  302. skip_bits(gb, 6); // numSubFrames
  303. // numPrograms
  304. if (get_bits(gb, 4)) { // numPrograms
  305. avpriv_request_sample(latmctx->aac_ctx.avctx, "Multiple programs");
  306. return AVERROR_PATCHWELCOME;
  307. }
  308. // for each program (which there is only one in DVB)
  309. // for each layer (which there is only one in DVB)
  310. if (get_bits(gb, 3)) { // numLayer
  311. avpriv_request_sample(latmctx->aac_ctx.avctx, "Multiple layers");
  312. return AVERROR_PATCHWELCOME;
  313. }
  314. // for all but first stream: use_same_config = get_bits(gb, 1);
  315. if (!audio_mux_version) {
  316. if ((ret = latm_decode_audio_specific_config(latmctx, gb, 0)) < 0)
  317. return ret;
  318. } else {
  319. int ascLen = latm_get_value(gb);
  320. if ((ret = latm_decode_audio_specific_config(latmctx, gb, ascLen)) < 0)
  321. return ret;
  322. ascLen -= ret;
  323. skip_bits_long(gb, ascLen);
  324. }
  325. latmctx->frame_length_type = get_bits(gb, 3);
  326. switch (latmctx->frame_length_type) {
  327. case 0:
  328. skip_bits(gb, 8); // latmBufferFullness
  329. break;
  330. case 1:
  331. latmctx->frame_length = get_bits(gb, 9);
  332. break;
  333. case 3:
  334. case 4:
  335. case 5:
  336. skip_bits(gb, 6); // CELP frame length table index
  337. break;
  338. case 6:
  339. case 7:
  340. skip_bits(gb, 1); // HVXC frame length table index
  341. break;
  342. }
  343. if (get_bits(gb, 1)) { // other data
  344. if (audio_mux_version) {
  345. latm_get_value(gb); // other_data_bits
  346. } else {
  347. int esc;
  348. do {
  349. esc = get_bits(gb, 1);
  350. skip_bits(gb, 8);
  351. } while (esc);
  352. }
  353. }
  354. if (get_bits(gb, 1)) // crc present
  355. skip_bits(gb, 8); // config_crc
  356. }
  357. return 0;
  358. }
  359. static int read_payload_length_info(struct LATMContext *ctx, GetBitContext *gb)
  360. {
  361. uint8_t tmp;
  362. if (ctx->frame_length_type == 0) {
  363. int mux_slot_length = 0;
  364. do {
  365. tmp = get_bits(gb, 8);
  366. mux_slot_length += tmp;
  367. } while (tmp == 255);
  368. return mux_slot_length;
  369. } else if (ctx->frame_length_type == 1) {
  370. return ctx->frame_length;
  371. } else if (ctx->frame_length_type == 3 ||
  372. ctx->frame_length_type == 5 ||
  373. ctx->frame_length_type == 7) {
  374. skip_bits(gb, 2); // mux_slot_length_coded
  375. }
  376. return 0;
  377. }
  378. static int read_audio_mux_element(struct LATMContext *latmctx,
  379. GetBitContext *gb)
  380. {
  381. int err;
  382. uint8_t use_same_mux = get_bits(gb, 1);
  383. if (!use_same_mux) {
  384. if ((err = read_stream_mux_config(latmctx, gb)) < 0)
  385. return err;
  386. } else if (!latmctx->aac_ctx.avctx->extradata) {
  387. av_log(latmctx->aac_ctx.avctx, AV_LOG_DEBUG,
  388. "no decoder config found\n");
  389. return AVERROR(EAGAIN);
  390. }
  391. if (latmctx->audio_mux_version_A == 0) {
  392. int mux_slot_length_bytes = read_payload_length_info(latmctx, gb);
  393. if (mux_slot_length_bytes * 8 > get_bits_left(gb)) {
  394. av_log(latmctx->aac_ctx.avctx, AV_LOG_ERROR, "incomplete frame\n");
  395. return AVERROR_INVALIDDATA;
  396. } else if (mux_slot_length_bytes * 8 + 256 < get_bits_left(gb)) {
  397. av_log(latmctx->aac_ctx.avctx, AV_LOG_ERROR,
  398. "frame length mismatch %d << %d\n",
  399. mux_slot_length_bytes * 8, get_bits_left(gb));
  400. return AVERROR_INVALIDDATA;
  401. }
  402. }
  403. return 0;
  404. }
  405. static int latm_decode_frame(AVCodecContext *avctx, void *out,
  406. int *got_frame_ptr, AVPacket *avpkt)
  407. {
  408. struct LATMContext *latmctx = avctx->priv_data;
  409. int muxlength, err;
  410. GetBitContext gb;
  411. if ((err = init_get_bits8(&gb, avpkt->data, avpkt->size)) < 0)
  412. return err;
  413. // check for LOAS sync word
  414. if (get_bits(&gb, 11) != LOAS_SYNC_WORD)
  415. return AVERROR_INVALIDDATA;
  416. muxlength = get_bits(&gb, 13) + 3;
  417. // not enough data, the parser should have sorted this out
  418. if (muxlength > avpkt->size)
  419. return AVERROR_INVALIDDATA;
  420. if ((err = read_audio_mux_element(latmctx, &gb)) < 0)
  421. return err;
  422. if (!latmctx->initialized) {
  423. if (!avctx->extradata) {
  424. *got_frame_ptr = 0;
  425. return avpkt->size;
  426. } else {
  427. push_output_configuration(&latmctx->aac_ctx);
  428. if ((err = decode_audio_specific_config(
  429. &latmctx->aac_ctx, avctx, &latmctx->aac_ctx.oc[1].m4ac,
  430. avctx->extradata, avctx->extradata_size*8LL, 1)) < 0) {
  431. pop_output_configuration(&latmctx->aac_ctx);
  432. return err;
  433. }
  434. latmctx->initialized = 1;
  435. }
  436. }
  437. if (show_bits(&gb, 12) == 0xfff) {
  438. av_log(latmctx->aac_ctx.avctx, AV_LOG_ERROR,
  439. "ADTS header detected, probably as result of configuration "
  440. "misparsing\n");
  441. return AVERROR_INVALIDDATA;
  442. }
  443. switch (latmctx->aac_ctx.oc[1].m4ac.object_type) {
  444. case AOT_ER_AAC_LC:
  445. case AOT_ER_AAC_LTP:
  446. case AOT_ER_AAC_LD:
  447. case AOT_ER_AAC_ELD:
  448. err = aac_decode_er_frame(avctx, out, got_frame_ptr, &gb);
  449. break;
  450. default:
  451. err = aac_decode_frame_int(avctx, out, got_frame_ptr, &gb, avpkt);
  452. }
  453. if (err < 0)
  454. return err;
  455. return muxlength;
  456. }
  457. static av_cold int latm_decode_init(AVCodecContext *avctx)
  458. {
  459. struct LATMContext *latmctx = avctx->priv_data;
  460. int ret = aac_decode_init(avctx);
  461. if (avctx->extradata_size > 0)
  462. latmctx->initialized = !ret;
  463. return ret;
  464. }
  465. AVCodec ff_aac_decoder = {
  466. .name = "aac",
  467. .long_name = NULL_IF_CONFIG_SMALL("AAC (Advanced Audio Coding)"),
  468. .type = AVMEDIA_TYPE_AUDIO,
  469. .id = AV_CODEC_ID_AAC,
  470. .priv_data_size = sizeof(AACContext),
  471. .init = aac_decode_init,
  472. .close = aac_decode_close,
  473. .decode = aac_decode_frame,
  474. .sample_fmts = (const enum AVSampleFormat[]) {
  475. AV_SAMPLE_FMT_FLTP, AV_SAMPLE_FMT_NONE
  476. },
  477. .capabilities = AV_CODEC_CAP_CHANNEL_CONF | AV_CODEC_CAP_DR1,
  478. .caps_internal = FF_CODEC_CAP_INIT_THREADSAFE,
  479. .channel_layouts = aac_channel_layout,
  480. .flush = flush,
  481. .priv_class = &aac_decoder_class,
  482. .profiles = profiles,
  483. };
  484. /*
  485. Note: This decoder filter is intended to decode LATM streams transferred
  486. in MPEG transport streams which only contain one program.
  487. To do a more complex LATM demuxing a separate LATM demuxer should be used.
  488. */
  489. AVCodec ff_aac_latm_decoder = {
  490. .name = "aac_latm",
  491. .long_name = NULL_IF_CONFIG_SMALL("AAC LATM (Advanced Audio Coding LATM syntax)"),
  492. .type = AVMEDIA_TYPE_AUDIO,
  493. .id = AV_CODEC_ID_AAC_LATM,
  494. .priv_data_size = sizeof(struct LATMContext),
  495. .init = latm_decode_init,
  496. .close = aac_decode_close,
  497. .decode = latm_decode_frame,
  498. .sample_fmts = (const enum AVSampleFormat[]) {
  499. AV_SAMPLE_FMT_FLTP, AV_SAMPLE_FMT_NONE
  500. },
  501. .capabilities = AV_CODEC_CAP_CHANNEL_CONF | AV_CODEC_CAP_DR1,
  502. .caps_internal = FF_CODEC_CAP_INIT_THREADSAFE,
  503. .channel_layouts = aac_channel_layout,
  504. .flush = flush,
  505. .profiles = profiles,
  506. };