You can not select more than 25 topics Topics must start with a letter or number, can include dashes ('-') and can be up to 35 characters long.

438 lines
15KB

  1. /*
  2. * AAC decoder
  3. * Copyright (c) 2005-2006 Oded Shimon ( ods15 ods15 dyndns org )
  4. * Copyright (c) 2006-2007 Maxim Gavrilov ( maxim.gavrilov gmail com )
  5. *
  6. * This file is part of FFmpeg.
  7. *
  8. * FFmpeg is free software; you can redistribute it and/or
  9. * modify it under the terms of the GNU Lesser General Public
  10. * License as published by the Free Software Foundation; either
  11. * version 2.1 of the License, or (at your option) any later version.
  12. *
  13. * FFmpeg is distributed in the hope that it will be useful,
  14. * but WITHOUT ANY WARRANTY; without even the implied warranty of
  15. * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
  16. * Lesser General Public License for more details.
  17. *
  18. * You should have received a copy of the GNU Lesser General Public
  19. * License along with FFmpeg; if not, write to the Free Software
  20. * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
  21. */
  22. /**
  23. * @file aac.c
  24. * AAC decoder
  25. * @author Oded Shimon ( ods15 ods15 dyndns org )
  26. * @author Maxim Gavrilov ( maxim.gavrilov gmail com )
  27. */
  28. /*
  29. * supported tools
  30. *
  31. * Support? Name
  32. * N (code in SoC repo) gain control
  33. * Y block switching
  34. * Y window shapes - standard
  35. * N window shapes - Low Delay
  36. * Y filterbank - standard
  37. * N (code in SoC repo) filterbank - Scalable Sample Rate
  38. * Y Temporal Noise Shaping
  39. * N (code in SoC repo) Long Term Prediction
  40. * Y intensity stereo
  41. * Y channel coupling
  42. * N frequency domain prediction
  43. * Y Perceptual Noise Substitution
  44. * Y Mid/Side stereo
  45. * N Scalable Inverse AAC Quantization
  46. * N Frequency Selective Switch
  47. * N upsampling filter
  48. * Y quantization & coding - AAC
  49. * N quantization & coding - TwinVQ
  50. * N quantization & coding - BSAC
  51. * N AAC Error Resilience tools
  52. * N Error Resilience payload syntax
  53. * N Error Protection tool
  54. * N CELP
  55. * N Silence Compression
  56. * N HVXC
  57. * N HVXC 4kbits/s VR
  58. * N Structured Audio tools
  59. * N Structured Audio Sample Bank Format
  60. * N MIDI
  61. * N Harmonic and Individual Lines plus Noise
  62. * N Text-To-Speech Interface
  63. * N (in progress) Spectral Band Replication
  64. * Y (not in this code) Layer-1
  65. * Y (not in this code) Layer-2
  66. * Y (not in this code) Layer-3
  67. * N SinuSoidal Coding (Transient, Sinusoid, Noise)
  68. * N (planned) Parametric Stereo
  69. * N Direct Stream Transfer
  70. *
  71. * Note: - HE AAC v1 comprises LC AAC with Spectral Band Replication.
  72. * - HE AAC v2 comprises LC AAC with Spectral Band Replication and
  73. Parametric Stereo.
  74. */
  75. #include "avcodec.h"
  76. #include "bitstream.h"
  77. #include "dsputil.h"
  78. #include "aac.h"
  79. #include "aactab.h"
  80. #include "aacdectab.h"
  81. #include "mpeg4audio.h"
  82. #include <assert.h>
  83. #include <errno.h>
  84. #include <math.h>
  85. #include <string.h>
  86. #ifndef CONFIG_HARDCODED_TABLES
  87. static float ff_aac_ivquant_tab[IVQUANT_SIZE];
  88. static float ff_aac_pow2sf_tab[316];
  89. #endif /* CONFIG_HARDCODED_TABLES */
  90. static VLC vlc_scalefactors;
  91. static VLC vlc_spectral[11];
  92. num_front = get_bits(gb, 4);
  93. num_side = get_bits(gb, 4);
  94. num_back = get_bits(gb, 4);
  95. num_lfe = get_bits(gb, 2);
  96. num_assoc_data = get_bits(gb, 3);
  97. num_cc = get_bits(gb, 4);
  98. if (get_bits1(gb))
  99. skip_bits(gb, 4); // mono_mixdown_tag
  100. if (get_bits1(gb))
  101. skip_bits(gb, 4); // stereo_mixdown_tag
  102. if (get_bits1(gb))
  103. skip_bits(gb, 3); // mixdown_coeff_index and pseudo_surround
  104. decode_channel_map(new_che_pos[TYPE_CPE], new_che_pos[TYPE_SCE], AAC_CHANNEL_FRONT, gb, num_front);
  105. decode_channel_map(new_che_pos[TYPE_CPE], new_che_pos[TYPE_SCE], AAC_CHANNEL_SIDE, gb, num_side );
  106. decode_channel_map(new_che_pos[TYPE_CPE], new_che_pos[TYPE_SCE], AAC_CHANNEL_BACK, gb, num_back );
  107. decode_channel_map(NULL, new_che_pos[TYPE_LFE], AAC_CHANNEL_LFE, gb, num_lfe );
  108. skip_bits_long(gb, 4 * num_assoc_data);
  109. decode_channel_map(new_che_pos[TYPE_CCE], new_che_pos[TYPE_CCE], AAC_CHANNEL_CC, gb, num_cc );
  110. align_get_bits(gb);
  111. /* comment field, first byte is length */
  112. skip_bits_long(gb, 8 * get_bits(gb, 8));
  113. return 0;
  114. }
  115. static av_cold int aac_decode_init(AVCodecContext * avccontext) {
  116. AACContext * ac = avccontext->priv_data;
  117. int i;
  118. ac->avccontext = avccontext;
  119. if (avccontext->extradata_size <= 0 ||
  120. decode_audio_specific_config(ac, avccontext->extradata, avccontext->extradata_size))
  121. return -1;
  122. avccontext->sample_rate = ac->m4ac.sample_rate;
  123. avccontext->frame_size = 1024;
  124. AAC_INIT_VLC_STATIC( 0, 144);
  125. AAC_INIT_VLC_STATIC( 1, 114);
  126. AAC_INIT_VLC_STATIC( 2, 188);
  127. AAC_INIT_VLC_STATIC( 3, 180);
  128. AAC_INIT_VLC_STATIC( 4, 172);
  129. AAC_INIT_VLC_STATIC( 5, 140);
  130. AAC_INIT_VLC_STATIC( 6, 168);
  131. AAC_INIT_VLC_STATIC( 7, 114);
  132. AAC_INIT_VLC_STATIC( 8, 262);
  133. AAC_INIT_VLC_STATIC( 9, 248);
  134. AAC_INIT_VLC_STATIC(10, 384);
  135. dsputil_init(&ac->dsp, avccontext);
  136. // -1024 - Compensate wrong IMDCT method.
  137. // 32768 - Required to scale values to the correct range for the bias method
  138. // for float to int16 conversion.
  139. if(ac->dsp.float_to_int16 == ff_float_to_int16_c) {
  140. ac->add_bias = 385.0f;
  141. ac->sf_scale = 1. / (-1024. * 32768.);
  142. ac->sf_offset = 0;
  143. } else {
  144. ac->add_bias = 0.0f;
  145. ac->sf_scale = 1. / -1024.;
  146. ac->sf_offset = 60;
  147. }
  148. #ifndef CONFIG_HARDCODED_TABLES
  149. for (i = 1 - IVQUANT_SIZE/2; i < IVQUANT_SIZE/2; i++)
  150. ff_aac_ivquant_tab[i + IVQUANT_SIZE/2 - 1] = cbrt(fabs(i)) * i;
  151. for (i = 0; i < 316; i++)
  152. ff_aac_pow2sf_tab[i] = pow(2, (i - 200)/4.);
  153. #endif /* CONFIG_HARDCODED_TABLES */
  154. INIT_VLC_STATIC(&vlc_scalefactors, 7, sizeof(ff_aac_scalefactor_code)/sizeof(ff_aac_scalefactor_code[0]),
  155. ff_aac_scalefactor_bits, sizeof(ff_aac_scalefactor_bits[0]), sizeof(ff_aac_scalefactor_bits[0]),
  156. ff_aac_scalefactor_code, sizeof(ff_aac_scalefactor_code[0]), sizeof(ff_aac_scalefactor_code[0]),
  157. 352);
  158. ff_mdct_init(&ac->mdct, 11, 1);
  159. ff_mdct_init(&ac->mdct_small, 8, 1);
  160. return 0;
  161. }
  162. int byte_align = get_bits1(gb);
  163. int count = get_bits(gb, 8);
  164. if (count == 255)
  165. count += get_bits(gb, 8);
  166. if (byte_align)
  167. align_get_bits(gb);
  168. skip_bits_long(gb, 8 * count);
  169. }
  170. /**
  171. * inverse quantization
  172. *
  173. * @param a quantized value to be dequantized
  174. * @return Returns dequantized value.
  175. */
  176. static inline float ivquant(int a) {
  177. if (a + (unsigned int)IVQUANT_SIZE/2 - 1 < (unsigned int)IVQUANT_SIZE - 1)
  178. return ff_aac_ivquant_tab[a + IVQUANT_SIZE/2 - 1];
  179. else
  180. return cbrtf(fabsf(a)) * a;
  181. }
  182. int band_type_run_end[120], GetBitContext * gb, IndividualChannelStream * ics) {
  183. int g, idx = 0;
  184. const int bits = (ics->window_sequence[0] == EIGHT_SHORT_SEQUENCE) ? 3 : 5;
  185. for (g = 0; g < ics->num_window_groups; g++) {
  186. int k = 0;
  187. while (k < ics->max_sfb) {
  188. uint8_t sect_len = k;
  189. int sect_len_incr;
  190. int sect_band_type = get_bits(gb, 4);
  191. if (sect_band_type == 12) {
  192. av_log(ac->avccontext, AV_LOG_ERROR, "invalid band type\n");
  193. return -1;
  194. }
  195. while ((sect_len_incr = get_bits(gb, bits)) == (1 << bits)-1)
  196. sect_len += sect_len_incr;
  197. sect_len += sect_len_incr;
  198. if (sect_len > ics->max_sfb) {
  199. av_log(ac->avccontext, AV_LOG_ERROR,
  200. "Number of bands (%d) exceeds limit (%d).\n",
  201. sect_len, ics->max_sfb);
  202. return -1;
  203. }
  204. *
  205. * @param mix_gain channel gain (Not used by AAC bitstream.)
  206. * @param global_gain first scalefactor value as scalefactors are differentially coded
  207. * @param band_type array of the used band type
  208. * @param band_type_run_end array of the last scalefactor band of a band type run
  209. * @param sf array of scalefactors or intensity stereo positions
  210. *
  211. * @return Returns error status. 0 - OK, !0 - error
  212. */
  213. static int decode_scalefactors(AACContext * ac, float sf[120], GetBitContext * gb,
  214. float mix_gain, unsigned int global_gain, IndividualChannelStream * ics,
  215. enum BandType band_type[120], int band_type_run_end[120]) {
  216. const int sf_offset = ac->sf_offset + (ics->window_sequence[0] == EIGHT_SHORT_SEQUENCE ? 12 : 0);
  217. int g, i, idx = 0;
  218. int offset[3] = { global_gain, global_gain - 90, 100 };
  219. int noise_flag = 1;
  220. static const char *sf_str[3] = { "Global gain", "Noise gain", "Intensity stereo position" };
  221. ics->intensity_present = 0;
  222. for (g = 0; g < ics->num_window_groups; g++) {
  223. for (i = 0; i < ics->max_sfb;) {
  224. int run_end = band_type_run_end[idx];
  225. if (band_type[idx] == ZERO_BT) {
  226. for(; i < run_end; i++, idx++)
  227. sf[idx] = 0.;
  228. }else if((band_type[idx] == INTENSITY_BT) || (band_type[idx] == INTENSITY_BT2)) {
  229. ics->intensity_present = 1;
  230. for(; i < run_end; i++, idx++) {
  231. offset[2] += get_vlc2(gb, vlc_scalefactors.table, 7, 3) - 60;
  232. if(offset[2] > 255U) {
  233. av_log(ac->avccontext, AV_LOG_ERROR,
  234. "%s (%d) out of range.\n", sf_str[2], offset[2]);
  235. return -1;
  236. }
  237. sf[idx] = ff_aac_pow2sf_tab[-offset[2] + 300];
  238. sf[idx] *= mix_gain;
  239. }
  240. }else if(band_type[idx] == NOISE_BT) {
  241. for(; i < run_end; i++, idx++) {
  242. if(noise_flag-- > 0)
  243. offset[1] += get_bits(gb, 9) - 256;
  244. else
  245. offset[1] += get_vlc2(gb, vlc_scalefactors.table, 7, 3) - 60;
  246. if(offset[1] > 255U) {
  247. av_log(ac->avccontext, AV_LOG_ERROR,
  248. "%s (%d) out of range.\n", sf_str[1], offset[1]);
  249. return -1;
  250. }
  251. sf[idx] = -ff_aac_pow2sf_tab[ offset[1] + sf_offset];
  252. sf[idx] *= mix_gain;
  253. }
  254. }else {
  255. for(; i < run_end; i++, idx++) {
  256. offset[0] += get_vlc2(gb, vlc_scalefactors.table, 7, 3) - 60;
  257. if(offset[0] > 255U) {
  258. av_log(ac->avccontext, AV_LOG_ERROR,
  259. "%s (%d) out of range.\n", sf_str[0], offset[0]);
  260. return -1;
  261. }
  262. sf[idx] = -ff_aac_pow2sf_tab[ offset[0] + sf_offset];
  263. sf[idx] *= mix_gain;
  264. }
  265. }
  266. }
  267. }
  268. return 0;
  269. }
  270. /**
  271. * Decode pulse data; reference: table 4.7.
  272. */
  273. static void decode_pulses(Pulse * pulse, GetBitContext * gb) {
  274. int i;
  275. pulse->num_pulse = get_bits(gb, 2) + 1;
  276. pulse->start = get_bits(gb, 6);
  277. for (i = 0; i < pulse->num_pulse; i++) {
  278. pulse->offset[i] = get_bits(gb, 5);
  279. pulse->amp [i] = get_bits(gb, 4);
  280. }
  281. }
  282. /**
  283. * Add pulses with particular amplitudes to the quantized spectral data; reference: 4.6.3.3.
  284. *
  285. * @param pulse pointer to pulse data struct
  286. * @param icoef array of quantized spectral data
  287. */
  288. static void add_pulses(int icoef[1024], const Pulse * pulse, const IndividualChannelStream * ics) {
  289. int i, off = ics->swb_offset[pulse->start];
  290. for (i = 0; i < pulse->num_pulse; i++) {
  291. int ic;
  292. off += pulse->offset[i];
  293. ic = (icoef[off] - 1)>>31;
  294. icoef[off] += (pulse->amp[i]^ic) - ic;
  295. }
  296. }
  297. /**
  298. * Parse Spectral Band Replication extension data; reference: table 4.55.
  299. *
  300. * @param crc flag indicating the presence of CRC checksum
  301. * @param cnt length of TYPE_FIL syntactic element in bytes
  302. * @return Returns number of bytes consumed from the TYPE_FIL element.
  303. */
  304. static int decode_sbr_extension(AACContext * ac, GetBitContext * gb, int crc, int cnt) {
  305. // TODO : sbr_extension implementation
  306. av_log(ac->avccontext, AV_LOG_DEBUG, "aac: SBR not yet supported.\n");
  307. skip_bits_long(gb, 8*cnt - 4); // -4 due to reading extension type
  308. return cnt;
  309. }
  310. int crc_flag = 0;
  311. int res = cnt;
  312. switch (get_bits(gb, 4)) { // extension type
  313. case EXT_SBR_DATA_CRC:
  314. crc_flag++;
  315. case EXT_SBR_DATA:
  316. res = decode_sbr_extension(ac, gb, crc_flag, cnt);
  317. break;
  318. case EXT_DYNAMIC_RANGE:
  319. res = decode_dynamic_range(&ac->che_drc, gb, cnt);
  320. break;
  321. case EXT_FILL:
  322. case EXT_FILL_DATA:
  323. case EXT_DATA_ELEMENT:
  324. default:
  325. skip_bits_long(gb, 8*cnt - 4);
  326. break;
  327. };
  328. return res;
  329. }
  330. /**
  331. * Apply dependent channel coupling (applied before IMDCT).
  332. *
  333. * @param index index into coupling gain array
  334. */
  335. static void apply_dependent_coupling(AACContext * ac, SingleChannelElement * sce, ChannelElement * cc, int index) {
  336. IndividualChannelStream * ics = &cc->ch[0].ics;
  337. const uint16_t * offsets = ics->swb_offset;
  338. float * dest = sce->coeffs;
  339. const float * src = cc->ch[0].coeffs;
  340. int g, i, group, k, idx = 0;
  341. if(ac->m4ac.object_type == AOT_AAC_LTP) {
  342. av_log(ac->avccontext, AV_LOG_ERROR,
  343. "Dependent coupling is not supported together with LTP\n");
  344. return;
  345. }
  346. for (g = 0; g < ics->num_window_groups; g++) {
  347. for (i = 0; i < ics->max_sfb; i++, idx++) {
  348. if (cc->ch[0].band_type[idx] != ZERO_BT) {
  349. float gain = cc->coup.gain[index][idx] * sce->mixing_gain;
  350. for (group = 0; group < ics->group_len[g]; group++) {
  351. for (k = offsets[i]; k < offsets[i+1]; k++) {
  352. // XXX dsputil-ize
  353. dest[group*128+k] += gain * src[group*128+k];
  354. }
  355. }
  356. }
  357. }
  358. dest += ics->group_len[g]*128;
  359. src += ics->group_len[g]*128;
  360. }
  361. }
  362. /**
  363. * Apply independent channel coupling (applied after IMDCT).
  364. *
  365. * @param index index into coupling gain array
  366. */
  367. static void apply_independent_coupling(AACContext * ac, SingleChannelElement * sce, ChannelElement * cc, int index) {
  368. int i;
  369. float gain = cc->coup.gain[index][0] * sce->mixing_gain;
  370. for (i = 0; i < 1024; i++)
  371. sce->ret[i] += gain * (cc->ch[0].ret[i] - ac->add_bias);
  372. }
  373. static av_cold int aac_decode_close(AVCodecContext * avccontext) {
  374. AACContext * ac = avccontext->priv_data;
  375. int i, j;
  376. for (i = 0; i < MAX_ELEM_ID; i++) {
  377. for(j = 0; j < 4; j++)
  378. av_freep(&ac->che[j][i]);
  379. }
  380. ff_mdct_end(&ac->mdct);
  381. ff_mdct_end(&ac->mdct_small);
  382. return 0 ;
  383. }
  384. AVCodec aac_decoder = {
  385. "aac",
  386. CODEC_TYPE_AUDIO,
  387. CODEC_ID_AAC,
  388. sizeof(AACContext),
  389. aac_decode_init,
  390. NULL,
  391. aac_decode_close,
  392. aac_decode_frame,
  393. .long_name = NULL_IF_CONFIG_SMALL("Advanced Audio Coding"),
  394. .sample_fmts = (enum SampleFormat[]){SAMPLE_FMT_S16,SAMPLE_FMT_NONE},
  395. };