You can not select more than 25 topics Topics must start with a letter or number, can include dashes ('-') and can be up to 35 characters long.

1154 lines
43KB

  1. /*
  2. * AAC encoder
  3. * Copyright (C) 2008 Konstantin Shishkov
  4. *
  5. * This file is part of FFmpeg.
  6. *
  7. * FFmpeg is free software; you can redistribute it and/or
  8. * modify it under the terms of the GNU Lesser General Public
  9. * License as published by the Free Software Foundation; either
  10. * version 2.1 of the License, or (at your option) any later version.
  11. *
  12. * FFmpeg is distributed in the hope that it will be useful,
  13. * but WITHOUT ANY WARRANTY; without even the implied warranty of
  14. * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
  15. * Lesser General Public License for more details.
  16. *
  17. * You should have received a copy of the GNU Lesser General Public
  18. * License along with FFmpeg; if not, write to the Free Software
  19. * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
  20. */
  21. /**
  22. * @file
  23. * AAC encoder
  24. */
  25. /***********************************
  26. * TODOs:
  27. * add sane pulse detection
  28. ***********************************/
  29. #include "libavutil/libm.h"
  30. #include "libavutil/thread.h"
  31. #include "libavutil/float_dsp.h"
  32. #include "libavutil/opt.h"
  33. #include "avcodec.h"
  34. #include "put_bits.h"
  35. #include "internal.h"
  36. #include "mpeg4audio.h"
  37. #include "kbdwin.h"
  38. #include "sinewin.h"
  39. #include "aac.h"
  40. #include "aactab.h"
  41. #include "aacenc.h"
  42. #include "aacenctab.h"
  43. #include "aacenc_utils.h"
  44. #include "psymodel.h"
  45. static AVOnce aac_table_init = AV_ONCE_INIT;
  46. static void put_pce(PutBitContext *pb, AVCodecContext *avctx)
  47. {
  48. int i, j;
  49. AACEncContext *s = avctx->priv_data;
  50. AACPCEInfo *pce = &s->pce;
  51. put_bits(pb, 4, 0);
  52. put_bits(pb, 2, avctx->profile);
  53. put_bits(pb, 4, s->samplerate_index);
  54. put_bits(pb, 4, pce->num_ele[0]); /* Front */
  55. put_bits(pb, 4, pce->num_ele[1]); /* Side */
  56. put_bits(pb, 4, pce->num_ele[2]); /* Back */
  57. put_bits(pb, 2, pce->num_ele[3]); /* LFE */
  58. put_bits(pb, 3, 0); /* Assoc data */
  59. put_bits(pb, 4, 0); /* CCs */
  60. put_bits(pb, 1, 0); /* Stereo mixdown */
  61. put_bits(pb, 1, 0); /* Mono mixdown */
  62. put_bits(pb, 1, 0); /* Something else */
  63. for (i = 0; i < 4; i++) {
  64. for (j = 0; j < pce->num_ele[i]; j++) {
  65. if (i < 3)
  66. put_bits(pb, 1, pce->pairing[i][j]);
  67. put_bits(pb, 4, pce->index[i][j]);
  68. }
  69. }
  70. avpriv_align_put_bits(pb);
  71. put_bits(pb, 8, 0);
  72. }
  73. /**
  74. * Make AAC audio config object.
  75. * @see 1.6.2.1 "Syntax - AudioSpecificConfig"
  76. */
  77. static void put_audio_specific_config(AVCodecContext *avctx)
  78. {
  79. PutBitContext pb;
  80. AACEncContext *s = avctx->priv_data;
  81. int channels = (!s->needs_pce)*(s->channels - (s->channels == 8 ? 1 : 0));
  82. init_put_bits(&pb, avctx->extradata, avctx->extradata_size);
  83. put_bits(&pb, 5, s->profile+1); //profile
  84. put_bits(&pb, 4, s->samplerate_index); //sample rate index
  85. put_bits(&pb, 4, channels);
  86. //GASpecificConfig
  87. put_bits(&pb, 1, 0); //frame length - 1024 samples
  88. put_bits(&pb, 1, 0); //does not depend on core coder
  89. put_bits(&pb, 1, 0); //is not extension
  90. if (s->needs_pce)
  91. put_pce(&pb, avctx);
  92. //Explicitly Mark SBR absent
  93. put_bits(&pb, 11, 0x2b7); //sync extension
  94. put_bits(&pb, 5, AOT_SBR);
  95. put_bits(&pb, 1, 0);
  96. flush_put_bits(&pb);
  97. }
  98. void ff_quantize_band_cost_cache_init(struct AACEncContext *s)
  99. {
  100. ++s->quantize_band_cost_cache_generation;
  101. if (s->quantize_band_cost_cache_generation == 0) {
  102. memset(s->quantize_band_cost_cache, 0, sizeof(s->quantize_band_cost_cache));
  103. s->quantize_band_cost_cache_generation = 1;
  104. }
  105. }
  106. #define WINDOW_FUNC(type) \
  107. static void apply_ ##type ##_window(AVFloatDSPContext *fdsp, \
  108. SingleChannelElement *sce, \
  109. const float *audio)
  110. WINDOW_FUNC(only_long)
  111. {
  112. const float *lwindow = sce->ics.use_kb_window[0] ? ff_aac_kbd_long_1024 : ff_sine_1024;
  113. const float *pwindow = sce->ics.use_kb_window[1] ? ff_aac_kbd_long_1024 : ff_sine_1024;
  114. float *out = sce->ret_buf;
  115. fdsp->vector_fmul (out, audio, lwindow, 1024);
  116. fdsp->vector_fmul_reverse(out + 1024, audio + 1024, pwindow, 1024);
  117. }
  118. WINDOW_FUNC(long_start)
  119. {
  120. const float *lwindow = sce->ics.use_kb_window[1] ? ff_aac_kbd_long_1024 : ff_sine_1024;
  121. const float *swindow = sce->ics.use_kb_window[0] ? ff_aac_kbd_short_128 : ff_sine_128;
  122. float *out = sce->ret_buf;
  123. fdsp->vector_fmul(out, audio, lwindow, 1024);
  124. memcpy(out + 1024, audio + 1024, sizeof(out[0]) * 448);
  125. fdsp->vector_fmul_reverse(out + 1024 + 448, audio + 1024 + 448, swindow, 128);
  126. memset(out + 1024 + 576, 0, sizeof(out[0]) * 448);
  127. }
  128. WINDOW_FUNC(long_stop)
  129. {
  130. const float *lwindow = sce->ics.use_kb_window[0] ? ff_aac_kbd_long_1024 : ff_sine_1024;
  131. const float *swindow = sce->ics.use_kb_window[1] ? ff_aac_kbd_short_128 : ff_sine_128;
  132. float *out = sce->ret_buf;
  133. memset(out, 0, sizeof(out[0]) * 448);
  134. fdsp->vector_fmul(out + 448, audio + 448, swindow, 128);
  135. memcpy(out + 576, audio + 576, sizeof(out[0]) * 448);
  136. fdsp->vector_fmul_reverse(out + 1024, audio + 1024, lwindow, 1024);
  137. }
  138. WINDOW_FUNC(eight_short)
  139. {
  140. const float *swindow = sce->ics.use_kb_window[0] ? ff_aac_kbd_short_128 : ff_sine_128;
  141. const float *pwindow = sce->ics.use_kb_window[1] ? ff_aac_kbd_short_128 : ff_sine_128;
  142. const float *in = audio + 448;
  143. float *out = sce->ret_buf;
  144. int w;
  145. for (w = 0; w < 8; w++) {
  146. fdsp->vector_fmul (out, in, w ? pwindow : swindow, 128);
  147. out += 128;
  148. in += 128;
  149. fdsp->vector_fmul_reverse(out, in, swindow, 128);
  150. out += 128;
  151. }
  152. }
  153. static void (*const apply_window[4])(AVFloatDSPContext *fdsp,
  154. SingleChannelElement *sce,
  155. const float *audio) = {
  156. [ONLY_LONG_SEQUENCE] = apply_only_long_window,
  157. [LONG_START_SEQUENCE] = apply_long_start_window,
  158. [EIGHT_SHORT_SEQUENCE] = apply_eight_short_window,
  159. [LONG_STOP_SEQUENCE] = apply_long_stop_window
  160. };
  161. static void apply_window_and_mdct(AACEncContext *s, SingleChannelElement *sce,
  162. float *audio)
  163. {
  164. int i;
  165. const float *output = sce->ret_buf;
  166. apply_window[sce->ics.window_sequence[0]](s->fdsp, sce, audio);
  167. if (sce->ics.window_sequence[0] != EIGHT_SHORT_SEQUENCE)
  168. s->mdct1024.mdct_calc(&s->mdct1024, sce->coeffs, output);
  169. else
  170. for (i = 0; i < 1024; i += 128)
  171. s->mdct128.mdct_calc(&s->mdct128, &sce->coeffs[i], output + i*2);
  172. memcpy(audio, audio + 1024, sizeof(audio[0]) * 1024);
  173. memcpy(sce->pcoeffs, sce->coeffs, sizeof(sce->pcoeffs));
  174. }
  175. /**
  176. * Encode ics_info element.
  177. * @see Table 4.6 (syntax of ics_info)
  178. */
  179. static void put_ics_info(AACEncContext *s, IndividualChannelStream *info)
  180. {
  181. int w;
  182. put_bits(&s->pb, 1, 0); // ics_reserved bit
  183. put_bits(&s->pb, 2, info->window_sequence[0]);
  184. put_bits(&s->pb, 1, info->use_kb_window[0]);
  185. if (info->window_sequence[0] != EIGHT_SHORT_SEQUENCE) {
  186. put_bits(&s->pb, 6, info->max_sfb);
  187. put_bits(&s->pb, 1, !!info->predictor_present);
  188. } else {
  189. put_bits(&s->pb, 4, info->max_sfb);
  190. for (w = 1; w < 8; w++)
  191. put_bits(&s->pb, 1, !info->group_len[w]);
  192. }
  193. }
  194. /**
  195. * Encode MS data.
  196. * @see 4.6.8.1 "Joint Coding - M/S Stereo"
  197. */
  198. static void encode_ms_info(PutBitContext *pb, ChannelElement *cpe)
  199. {
  200. int i, w;
  201. put_bits(pb, 2, cpe->ms_mode);
  202. if (cpe->ms_mode == 1)
  203. for (w = 0; w < cpe->ch[0].ics.num_windows; w += cpe->ch[0].ics.group_len[w])
  204. for (i = 0; i < cpe->ch[0].ics.max_sfb; i++)
  205. put_bits(pb, 1, cpe->ms_mask[w*16 + i]);
  206. }
  207. /**
  208. * Produce integer coefficients from scalefactors provided by the model.
  209. */
  210. static void adjust_frame_information(ChannelElement *cpe, int chans)
  211. {
  212. int i, w, w2, g, ch;
  213. int maxsfb, cmaxsfb;
  214. for (ch = 0; ch < chans; ch++) {
  215. IndividualChannelStream *ics = &cpe->ch[ch].ics;
  216. maxsfb = 0;
  217. cpe->ch[ch].pulse.num_pulse = 0;
  218. for (w = 0; w < ics->num_windows; w += ics->group_len[w]) {
  219. for (w2 = 0; w2 < ics->group_len[w]; w2++) {
  220. for (cmaxsfb = ics->num_swb; cmaxsfb > 0 && cpe->ch[ch].zeroes[w*16+cmaxsfb-1]; cmaxsfb--)
  221. ;
  222. maxsfb = FFMAX(maxsfb, cmaxsfb);
  223. }
  224. }
  225. ics->max_sfb = maxsfb;
  226. //adjust zero bands for window groups
  227. for (w = 0; w < ics->num_windows; w += ics->group_len[w]) {
  228. for (g = 0; g < ics->max_sfb; g++) {
  229. i = 1;
  230. for (w2 = w; w2 < w + ics->group_len[w]; w2++) {
  231. if (!cpe->ch[ch].zeroes[w2*16 + g]) {
  232. i = 0;
  233. break;
  234. }
  235. }
  236. cpe->ch[ch].zeroes[w*16 + g] = i;
  237. }
  238. }
  239. }
  240. if (chans > 1 && cpe->common_window) {
  241. IndividualChannelStream *ics0 = &cpe->ch[0].ics;
  242. IndividualChannelStream *ics1 = &cpe->ch[1].ics;
  243. int msc = 0;
  244. ics0->max_sfb = FFMAX(ics0->max_sfb, ics1->max_sfb);
  245. ics1->max_sfb = ics0->max_sfb;
  246. for (w = 0; w < ics0->num_windows*16; w += 16)
  247. for (i = 0; i < ics0->max_sfb; i++)
  248. if (cpe->ms_mask[w+i])
  249. msc++;
  250. if (msc == 0 || ics0->max_sfb == 0)
  251. cpe->ms_mode = 0;
  252. else
  253. cpe->ms_mode = msc < ics0->max_sfb * ics0->num_windows ? 1 : 2;
  254. }
  255. }
  256. static void apply_intensity_stereo(ChannelElement *cpe)
  257. {
  258. int w, w2, g, i;
  259. IndividualChannelStream *ics = &cpe->ch[0].ics;
  260. if (!cpe->common_window)
  261. return;
  262. for (w = 0; w < ics->num_windows; w += ics->group_len[w]) {
  263. for (w2 = 0; w2 < ics->group_len[w]; w2++) {
  264. int start = (w+w2) * 128;
  265. for (g = 0; g < ics->num_swb; g++) {
  266. int p = -1 + 2 * (cpe->ch[1].band_type[w*16+g] - 14);
  267. float scale = cpe->ch[0].is_ener[w*16+g];
  268. if (!cpe->is_mask[w*16 + g]) {
  269. start += ics->swb_sizes[g];
  270. continue;
  271. }
  272. if (cpe->ms_mask[w*16 + g])
  273. p *= -1;
  274. for (i = 0; i < ics->swb_sizes[g]; i++) {
  275. float sum = (cpe->ch[0].coeffs[start+i] + p*cpe->ch[1].coeffs[start+i])*scale;
  276. cpe->ch[0].coeffs[start+i] = sum;
  277. cpe->ch[1].coeffs[start+i] = 0.0f;
  278. }
  279. start += ics->swb_sizes[g];
  280. }
  281. }
  282. }
  283. }
  284. static void apply_mid_side_stereo(ChannelElement *cpe)
  285. {
  286. int w, w2, g, i;
  287. IndividualChannelStream *ics = &cpe->ch[0].ics;
  288. if (!cpe->common_window)
  289. return;
  290. for (w = 0; w < ics->num_windows; w += ics->group_len[w]) {
  291. for (w2 = 0; w2 < ics->group_len[w]; w2++) {
  292. int start = (w+w2) * 128;
  293. for (g = 0; g < ics->num_swb; g++) {
  294. /* ms_mask can be used for other purposes in PNS and I/S,
  295. * so must not apply M/S if any band uses either, even if
  296. * ms_mask is set.
  297. */
  298. if (!cpe->ms_mask[w*16 + g] || cpe->is_mask[w*16 + g]
  299. || cpe->ch[0].band_type[w*16 + g] >= NOISE_BT
  300. || cpe->ch[1].band_type[w*16 + g] >= NOISE_BT) {
  301. start += ics->swb_sizes[g];
  302. continue;
  303. }
  304. for (i = 0; i < ics->swb_sizes[g]; i++) {
  305. float L = (cpe->ch[0].coeffs[start+i] + cpe->ch[1].coeffs[start+i]) * 0.5f;
  306. float R = L - cpe->ch[1].coeffs[start+i];
  307. cpe->ch[0].coeffs[start+i] = L;
  308. cpe->ch[1].coeffs[start+i] = R;
  309. }
  310. start += ics->swb_sizes[g];
  311. }
  312. }
  313. }
  314. }
  315. /**
  316. * Encode scalefactor band coding type.
  317. */
  318. static void encode_band_info(AACEncContext *s, SingleChannelElement *sce)
  319. {
  320. int w;
  321. if (s->coder->set_special_band_scalefactors)
  322. s->coder->set_special_band_scalefactors(s, sce);
  323. for (w = 0; w < sce->ics.num_windows; w += sce->ics.group_len[w])
  324. s->coder->encode_window_bands_info(s, sce, w, sce->ics.group_len[w], s->lambda);
  325. }
  326. /**
  327. * Encode scalefactors.
  328. */
  329. static void encode_scale_factors(AVCodecContext *avctx, AACEncContext *s,
  330. SingleChannelElement *sce)
  331. {
  332. int diff, off_sf = sce->sf_idx[0], off_pns = sce->sf_idx[0] - NOISE_OFFSET;
  333. int off_is = 0, noise_flag = 1;
  334. int i, w;
  335. for (w = 0; w < sce->ics.num_windows; w += sce->ics.group_len[w]) {
  336. for (i = 0; i < sce->ics.max_sfb; i++) {
  337. if (!sce->zeroes[w*16 + i]) {
  338. if (sce->band_type[w*16 + i] == NOISE_BT) {
  339. diff = sce->sf_idx[w*16 + i] - off_pns;
  340. off_pns = sce->sf_idx[w*16 + i];
  341. if (noise_flag-- > 0) {
  342. put_bits(&s->pb, NOISE_PRE_BITS, diff + NOISE_PRE);
  343. continue;
  344. }
  345. } else if (sce->band_type[w*16 + i] == INTENSITY_BT ||
  346. sce->band_type[w*16 + i] == INTENSITY_BT2) {
  347. diff = sce->sf_idx[w*16 + i] - off_is;
  348. off_is = sce->sf_idx[w*16 + i];
  349. } else {
  350. diff = sce->sf_idx[w*16 + i] - off_sf;
  351. off_sf = sce->sf_idx[w*16 + i];
  352. }
  353. diff += SCALE_DIFF_ZERO;
  354. av_assert0(diff >= 0 && diff <= 120);
  355. put_bits(&s->pb, ff_aac_scalefactor_bits[diff], ff_aac_scalefactor_code[diff]);
  356. }
  357. }
  358. }
  359. }
  360. /**
  361. * Encode pulse data.
  362. */
  363. static void encode_pulses(AACEncContext *s, Pulse *pulse)
  364. {
  365. int i;
  366. put_bits(&s->pb, 1, !!pulse->num_pulse);
  367. if (!pulse->num_pulse)
  368. return;
  369. put_bits(&s->pb, 2, pulse->num_pulse - 1);
  370. put_bits(&s->pb, 6, pulse->start);
  371. for (i = 0; i < pulse->num_pulse; i++) {
  372. put_bits(&s->pb, 5, pulse->pos[i]);
  373. put_bits(&s->pb, 4, pulse->amp[i]);
  374. }
  375. }
  376. /**
  377. * Encode spectral coefficients processed by psychoacoustic model.
  378. */
  379. static void encode_spectral_coeffs(AACEncContext *s, SingleChannelElement *sce)
  380. {
  381. int start, i, w, w2;
  382. for (w = 0; w < sce->ics.num_windows; w += sce->ics.group_len[w]) {
  383. start = 0;
  384. for (i = 0; i < sce->ics.max_sfb; i++) {
  385. if (sce->zeroes[w*16 + i]) {
  386. start += sce->ics.swb_sizes[i];
  387. continue;
  388. }
  389. for (w2 = w; w2 < w + sce->ics.group_len[w]; w2++) {
  390. s->coder->quantize_and_encode_band(s, &s->pb,
  391. &sce->coeffs[start + w2*128],
  392. NULL, sce->ics.swb_sizes[i],
  393. sce->sf_idx[w*16 + i],
  394. sce->band_type[w*16 + i],
  395. s->lambda,
  396. sce->ics.window_clipping[w]);
  397. }
  398. start += sce->ics.swb_sizes[i];
  399. }
  400. }
  401. }
  402. /**
  403. * Downscale spectral coefficients for near-clipping windows to avoid artifacts
  404. */
  405. static void avoid_clipping(AACEncContext *s, SingleChannelElement *sce)
  406. {
  407. int start, i, j, w;
  408. if (sce->ics.clip_avoidance_factor < 1.0f) {
  409. for (w = 0; w < sce->ics.num_windows; w++) {
  410. start = 0;
  411. for (i = 0; i < sce->ics.max_sfb; i++) {
  412. float *swb_coeffs = &sce->coeffs[start + w*128];
  413. for (j = 0; j < sce->ics.swb_sizes[i]; j++)
  414. swb_coeffs[j] *= sce->ics.clip_avoidance_factor;
  415. start += sce->ics.swb_sizes[i];
  416. }
  417. }
  418. }
  419. }
  420. /**
  421. * Encode one channel of audio data.
  422. */
  423. static int encode_individual_channel(AVCodecContext *avctx, AACEncContext *s,
  424. SingleChannelElement *sce,
  425. int common_window)
  426. {
  427. put_bits(&s->pb, 8, sce->sf_idx[0]);
  428. if (!common_window) {
  429. put_ics_info(s, &sce->ics);
  430. if (s->coder->encode_main_pred)
  431. s->coder->encode_main_pred(s, sce);
  432. if (s->coder->encode_ltp_info)
  433. s->coder->encode_ltp_info(s, sce, 0);
  434. }
  435. encode_band_info(s, sce);
  436. encode_scale_factors(avctx, s, sce);
  437. encode_pulses(s, &sce->pulse);
  438. put_bits(&s->pb, 1, !!sce->tns.present);
  439. if (s->coder->encode_tns_info)
  440. s->coder->encode_tns_info(s, sce);
  441. put_bits(&s->pb, 1, 0); //ssr
  442. encode_spectral_coeffs(s, sce);
  443. return 0;
  444. }
  445. /**
  446. * Write some auxiliary information about the created AAC file.
  447. */
  448. static void put_bitstream_info(AACEncContext *s, const char *name)
  449. {
  450. int i, namelen, padbits;
  451. namelen = strlen(name) + 2;
  452. put_bits(&s->pb, 3, TYPE_FIL);
  453. put_bits(&s->pb, 4, FFMIN(namelen, 15));
  454. if (namelen >= 15)
  455. put_bits(&s->pb, 8, namelen - 14);
  456. put_bits(&s->pb, 4, 0); //extension type - filler
  457. padbits = -put_bits_count(&s->pb) & 7;
  458. avpriv_align_put_bits(&s->pb);
  459. for (i = 0; i < namelen - 2; i++)
  460. put_bits(&s->pb, 8, name[i]);
  461. put_bits(&s->pb, 12 - padbits, 0);
  462. }
  463. /*
  464. * Copy input samples.
  465. * Channels are reordered from libavcodec's default order to AAC order.
  466. */
  467. static void copy_input_samples(AACEncContext *s, const AVFrame *frame)
  468. {
  469. int ch;
  470. int end = 2048 + (frame ? frame->nb_samples : 0);
  471. const uint8_t *channel_map = s->reorder_map;
  472. /* copy and remap input samples */
  473. for (ch = 0; ch < s->channels; ch++) {
  474. /* copy last 1024 samples of previous frame to the start of the current frame */
  475. memcpy(&s->planar_samples[ch][1024], &s->planar_samples[ch][2048], 1024 * sizeof(s->planar_samples[0][0]));
  476. /* copy new samples and zero any remaining samples */
  477. if (frame) {
  478. memcpy(&s->planar_samples[ch][2048],
  479. frame->extended_data[channel_map[ch]],
  480. frame->nb_samples * sizeof(s->planar_samples[0][0]));
  481. }
  482. memset(&s->planar_samples[ch][end], 0,
  483. (3072 - end) * sizeof(s->planar_samples[0][0]));
  484. }
  485. }
  486. static int aac_encode_frame(AVCodecContext *avctx, AVPacket *avpkt,
  487. const AVFrame *frame, int *got_packet_ptr)
  488. {
  489. AACEncContext *s = avctx->priv_data;
  490. float **samples = s->planar_samples, *samples2, *la, *overlap;
  491. ChannelElement *cpe;
  492. SingleChannelElement *sce;
  493. IndividualChannelStream *ics;
  494. int i, its, ch, w, chans, tag, start_ch, ret, frame_bits;
  495. int target_bits, rate_bits, too_many_bits, too_few_bits;
  496. int ms_mode = 0, is_mode = 0, tns_mode = 0, pred_mode = 0;
  497. int chan_el_counter[4];
  498. FFPsyWindowInfo windows[AAC_MAX_CHANNELS];
  499. /* add current frame to queue */
  500. if (frame) {
  501. if ((ret = ff_af_queue_add(&s->afq, frame)) < 0)
  502. return ret;
  503. } else {
  504. if (!s->afq.remaining_samples || (!s->afq.frame_alloc && !s->afq.frame_count))
  505. return 0;
  506. }
  507. copy_input_samples(s, frame);
  508. if (s->psypp)
  509. ff_psy_preprocess(s->psypp, s->planar_samples, s->channels);
  510. if (!avctx->frame_number)
  511. return 0;
  512. start_ch = 0;
  513. for (i = 0; i < s->chan_map[0]; i++) {
  514. FFPsyWindowInfo* wi = windows + start_ch;
  515. tag = s->chan_map[i+1];
  516. chans = tag == TYPE_CPE ? 2 : 1;
  517. cpe = &s->cpe[i];
  518. for (ch = 0; ch < chans; ch++) {
  519. int k;
  520. float clip_avoidance_factor;
  521. sce = &cpe->ch[ch];
  522. ics = &sce->ics;
  523. s->cur_channel = start_ch + ch;
  524. overlap = &samples[s->cur_channel][0];
  525. samples2 = overlap + 1024;
  526. la = samples2 + (448+64);
  527. if (!frame)
  528. la = NULL;
  529. if (tag == TYPE_LFE) {
  530. wi[ch].window_type[0] = wi[ch].window_type[1] = ONLY_LONG_SEQUENCE;
  531. wi[ch].window_shape = 0;
  532. wi[ch].num_windows = 1;
  533. wi[ch].grouping[0] = 1;
  534. wi[ch].clipping[0] = 0;
  535. /* Only the lowest 12 coefficients are used in a LFE channel.
  536. * The expression below results in only the bottom 8 coefficients
  537. * being used for 11.025kHz to 16kHz sample rates.
  538. */
  539. ics->num_swb = s->samplerate_index >= 8 ? 1 : 3;
  540. } else {
  541. wi[ch] = s->psy.model->window(&s->psy, samples2, la, s->cur_channel,
  542. ics->window_sequence[0]);
  543. }
  544. ics->window_sequence[1] = ics->window_sequence[0];
  545. ics->window_sequence[0] = wi[ch].window_type[0];
  546. ics->use_kb_window[1] = ics->use_kb_window[0];
  547. ics->use_kb_window[0] = wi[ch].window_shape;
  548. ics->num_windows = wi[ch].num_windows;
  549. ics->swb_sizes = s->psy.bands [ics->num_windows == 8];
  550. ics->num_swb = tag == TYPE_LFE ? ics->num_swb : s->psy.num_bands[ics->num_windows == 8];
  551. ics->max_sfb = FFMIN(ics->max_sfb, ics->num_swb);
  552. ics->swb_offset = wi[ch].window_type[0] == EIGHT_SHORT_SEQUENCE ?
  553. ff_swb_offset_128 [s->samplerate_index]:
  554. ff_swb_offset_1024[s->samplerate_index];
  555. ics->tns_max_bands = wi[ch].window_type[0] == EIGHT_SHORT_SEQUENCE ?
  556. ff_tns_max_bands_128 [s->samplerate_index]:
  557. ff_tns_max_bands_1024[s->samplerate_index];
  558. for (w = 0; w < ics->num_windows; w++)
  559. ics->group_len[w] = wi[ch].grouping[w];
  560. /* Calculate input sample maximums and evaluate clipping risk */
  561. clip_avoidance_factor = 0.0f;
  562. for (w = 0; w < ics->num_windows; w++) {
  563. const float *wbuf = overlap + w * 128;
  564. const int wlen = 2048 / ics->num_windows;
  565. float max = 0;
  566. int j;
  567. /* mdct input is 2 * output */
  568. for (j = 0; j < wlen; j++)
  569. max = FFMAX(max, fabsf(wbuf[j]));
  570. wi[ch].clipping[w] = max;
  571. }
  572. for (w = 0; w < ics->num_windows; w++) {
  573. if (wi[ch].clipping[w] > CLIP_AVOIDANCE_FACTOR) {
  574. ics->window_clipping[w] = 1;
  575. clip_avoidance_factor = FFMAX(clip_avoidance_factor, wi[ch].clipping[w]);
  576. } else {
  577. ics->window_clipping[w] = 0;
  578. }
  579. }
  580. if (clip_avoidance_factor > CLIP_AVOIDANCE_FACTOR) {
  581. ics->clip_avoidance_factor = CLIP_AVOIDANCE_FACTOR / clip_avoidance_factor;
  582. } else {
  583. ics->clip_avoidance_factor = 1.0f;
  584. }
  585. apply_window_and_mdct(s, sce, overlap);
  586. if (s->options.ltp && s->coder->update_ltp) {
  587. s->coder->update_ltp(s, sce);
  588. apply_window[sce->ics.window_sequence[0]](s->fdsp, sce, &sce->ltp_state[0]);
  589. s->mdct1024.mdct_calc(&s->mdct1024, sce->lcoeffs, sce->ret_buf);
  590. }
  591. for (k = 0; k < 1024; k++) {
  592. if (!(fabs(cpe->ch[ch].coeffs[k]) < 1E16)) { // Ensure headroom for energy calculation
  593. av_log(avctx, AV_LOG_ERROR, "Input contains (near) NaN/+-Inf\n");
  594. return AVERROR(EINVAL);
  595. }
  596. }
  597. avoid_clipping(s, sce);
  598. }
  599. start_ch += chans;
  600. }
  601. if ((ret = ff_alloc_packet2(avctx, avpkt, 8192 * s->channels, 0)) < 0)
  602. return ret;
  603. frame_bits = its = 0;
  604. do {
  605. init_put_bits(&s->pb, avpkt->data, avpkt->size);
  606. if ((avctx->frame_number & 0xFF)==1 && !(avctx->flags & AV_CODEC_FLAG_BITEXACT))
  607. put_bitstream_info(s, LIBAVCODEC_IDENT);
  608. start_ch = 0;
  609. target_bits = 0;
  610. memset(chan_el_counter, 0, sizeof(chan_el_counter));
  611. for (i = 0; i < s->chan_map[0]; i++) {
  612. FFPsyWindowInfo* wi = windows + start_ch;
  613. const float *coeffs[2];
  614. tag = s->chan_map[i+1];
  615. chans = tag == TYPE_CPE ? 2 : 1;
  616. cpe = &s->cpe[i];
  617. cpe->common_window = 0;
  618. memset(cpe->is_mask, 0, sizeof(cpe->is_mask));
  619. memset(cpe->ms_mask, 0, sizeof(cpe->ms_mask));
  620. put_bits(&s->pb, 3, tag);
  621. put_bits(&s->pb, 4, chan_el_counter[tag]++);
  622. for (ch = 0; ch < chans; ch++) {
  623. sce = &cpe->ch[ch];
  624. coeffs[ch] = sce->coeffs;
  625. sce->ics.predictor_present = 0;
  626. sce->ics.ltp.present = 0;
  627. memset(sce->ics.ltp.used, 0, sizeof(sce->ics.ltp.used));
  628. memset(sce->ics.prediction_used, 0, sizeof(sce->ics.prediction_used));
  629. memset(&sce->tns, 0, sizeof(TemporalNoiseShaping));
  630. for (w = 0; w < 128; w++)
  631. if (sce->band_type[w] > RESERVED_BT)
  632. sce->band_type[w] = 0;
  633. }
  634. s->psy.bitres.alloc = -1;
  635. s->psy.bitres.bits = s->last_frame_pb_count / s->channels;
  636. s->psy.model->analyze(&s->psy, start_ch, coeffs, wi);
  637. if (s->psy.bitres.alloc > 0) {
  638. /* Lambda unused here on purpose, we need to take psy's unscaled allocation */
  639. target_bits += s->psy.bitres.alloc
  640. * (s->lambda / (avctx->global_quality ? avctx->global_quality : 120));
  641. s->psy.bitres.alloc /= chans;
  642. }
  643. s->cur_type = tag;
  644. for (ch = 0; ch < chans; ch++) {
  645. s->cur_channel = start_ch + ch;
  646. if (s->options.pns && s->coder->mark_pns)
  647. s->coder->mark_pns(s, avctx, &cpe->ch[ch]);
  648. s->coder->search_for_quantizers(avctx, s, &cpe->ch[ch], s->lambda);
  649. }
  650. if (chans > 1
  651. && wi[0].window_type[0] == wi[1].window_type[0]
  652. && wi[0].window_shape == wi[1].window_shape) {
  653. cpe->common_window = 1;
  654. for (w = 0; w < wi[0].num_windows; w++) {
  655. if (wi[0].grouping[w] != wi[1].grouping[w]) {
  656. cpe->common_window = 0;
  657. break;
  658. }
  659. }
  660. }
  661. for (ch = 0; ch < chans; ch++) { /* TNS and PNS */
  662. sce = &cpe->ch[ch];
  663. s->cur_channel = start_ch + ch;
  664. if (s->options.tns && s->coder->search_for_tns)
  665. s->coder->search_for_tns(s, sce);
  666. if (s->options.tns && s->coder->apply_tns_filt)
  667. s->coder->apply_tns_filt(s, sce);
  668. if (sce->tns.present)
  669. tns_mode = 1;
  670. if (s->options.pns && s->coder->search_for_pns)
  671. s->coder->search_for_pns(s, avctx, sce);
  672. }
  673. s->cur_channel = start_ch;
  674. if (s->options.intensity_stereo) { /* Intensity Stereo */
  675. if (s->coder->search_for_is)
  676. s->coder->search_for_is(s, avctx, cpe);
  677. if (cpe->is_mode) is_mode = 1;
  678. apply_intensity_stereo(cpe);
  679. }
  680. if (s->options.pred) { /* Prediction */
  681. for (ch = 0; ch < chans; ch++) {
  682. sce = &cpe->ch[ch];
  683. s->cur_channel = start_ch + ch;
  684. if (s->options.pred && s->coder->search_for_pred)
  685. s->coder->search_for_pred(s, sce);
  686. if (cpe->ch[ch].ics.predictor_present) pred_mode = 1;
  687. }
  688. if (s->coder->adjust_common_pred)
  689. s->coder->adjust_common_pred(s, cpe);
  690. for (ch = 0; ch < chans; ch++) {
  691. sce = &cpe->ch[ch];
  692. s->cur_channel = start_ch + ch;
  693. if (s->options.pred && s->coder->apply_main_pred)
  694. s->coder->apply_main_pred(s, sce);
  695. }
  696. s->cur_channel = start_ch;
  697. }
  698. if (s->options.mid_side) { /* Mid/Side stereo */
  699. if (s->options.mid_side == -1 && s->coder->search_for_ms)
  700. s->coder->search_for_ms(s, cpe);
  701. else if (cpe->common_window)
  702. memset(cpe->ms_mask, 1, sizeof(cpe->ms_mask));
  703. apply_mid_side_stereo(cpe);
  704. }
  705. adjust_frame_information(cpe, chans);
  706. if (s->options.ltp) { /* LTP */
  707. for (ch = 0; ch < chans; ch++) {
  708. sce = &cpe->ch[ch];
  709. s->cur_channel = start_ch + ch;
  710. if (s->coder->search_for_ltp)
  711. s->coder->search_for_ltp(s, sce, cpe->common_window);
  712. if (sce->ics.ltp.present) pred_mode = 1;
  713. }
  714. s->cur_channel = start_ch;
  715. if (s->coder->adjust_common_ltp)
  716. s->coder->adjust_common_ltp(s, cpe);
  717. }
  718. if (chans == 2) {
  719. put_bits(&s->pb, 1, cpe->common_window);
  720. if (cpe->common_window) {
  721. put_ics_info(s, &cpe->ch[0].ics);
  722. if (s->coder->encode_main_pred)
  723. s->coder->encode_main_pred(s, &cpe->ch[0]);
  724. if (s->coder->encode_ltp_info)
  725. s->coder->encode_ltp_info(s, &cpe->ch[0], 1);
  726. encode_ms_info(&s->pb, cpe);
  727. if (cpe->ms_mode) ms_mode = 1;
  728. }
  729. }
  730. for (ch = 0; ch < chans; ch++) {
  731. s->cur_channel = start_ch + ch;
  732. encode_individual_channel(avctx, s, &cpe->ch[ch], cpe->common_window);
  733. }
  734. start_ch += chans;
  735. }
  736. if (avctx->flags & AV_CODEC_FLAG_QSCALE) {
  737. /* When using a constant Q-scale, don't mess with lambda */
  738. break;
  739. }
  740. /* rate control stuff
  741. * allow between the nominal bitrate, and what psy's bit reservoir says to target
  742. * but drift towards the nominal bitrate always
  743. */
  744. frame_bits = put_bits_count(&s->pb);
  745. rate_bits = avctx->bit_rate * 1024 / avctx->sample_rate;
  746. rate_bits = FFMIN(rate_bits, 6144 * s->channels - 3);
  747. too_many_bits = FFMAX(target_bits, rate_bits);
  748. too_many_bits = FFMIN(too_many_bits, 6144 * s->channels - 3);
  749. too_few_bits = FFMIN(FFMAX(rate_bits - rate_bits/4, target_bits), too_many_bits);
  750. /* When using ABR, be strict (but only for increasing) */
  751. too_few_bits = too_few_bits - too_few_bits/8;
  752. too_many_bits = too_many_bits + too_many_bits/2;
  753. if ( its == 0 /* for steady-state Q-scale tracking */
  754. || (its < 5 && (frame_bits < too_few_bits || frame_bits > too_many_bits))
  755. || frame_bits >= 6144 * s->channels - 3 )
  756. {
  757. float ratio = ((float)rate_bits) / frame_bits;
  758. if (frame_bits >= too_few_bits && frame_bits <= too_many_bits) {
  759. /*
  760. * This path is for steady-state Q-scale tracking
  761. * When frame bits fall within the stable range, we still need to adjust
  762. * lambda to maintain it like so in a stable fashion (large jumps in lambda
  763. * create artifacts and should be avoided), but slowly
  764. */
  765. ratio = sqrtf(sqrtf(ratio));
  766. ratio = av_clipf(ratio, 0.9f, 1.1f);
  767. } else {
  768. /* Not so fast though */
  769. ratio = sqrtf(ratio);
  770. }
  771. s->lambda = FFMIN(s->lambda * ratio, 65536.f);
  772. /* Keep iterating if we must reduce and lambda is in the sky */
  773. if (ratio > 0.9f && ratio < 1.1f) {
  774. break;
  775. } else {
  776. if (is_mode || ms_mode || tns_mode || pred_mode) {
  777. for (i = 0; i < s->chan_map[0]; i++) {
  778. // Must restore coeffs
  779. chans = tag == TYPE_CPE ? 2 : 1;
  780. cpe = &s->cpe[i];
  781. for (ch = 0; ch < chans; ch++)
  782. memcpy(cpe->ch[ch].coeffs, cpe->ch[ch].pcoeffs, sizeof(cpe->ch[ch].coeffs));
  783. }
  784. }
  785. its++;
  786. }
  787. } else {
  788. break;
  789. }
  790. } while (1);
  791. if (s->options.ltp && s->coder->ltp_insert_new_frame)
  792. s->coder->ltp_insert_new_frame(s);
  793. put_bits(&s->pb, 3, TYPE_END);
  794. flush_put_bits(&s->pb);
  795. s->last_frame_pb_count = put_bits_count(&s->pb);
  796. s->lambda_sum += s->lambda;
  797. s->lambda_count++;
  798. ff_af_queue_remove(&s->afq, avctx->frame_size, &avpkt->pts,
  799. &avpkt->duration);
  800. avpkt->size = put_bits_count(&s->pb) >> 3;
  801. *got_packet_ptr = 1;
  802. return 0;
  803. }
  804. static av_cold int aac_encode_end(AVCodecContext *avctx)
  805. {
  806. AACEncContext *s = avctx->priv_data;
  807. av_log(avctx, AV_LOG_INFO, "Qavg: %.3f\n", s->lambda_sum / s->lambda_count);
  808. ff_mdct_end(&s->mdct1024);
  809. ff_mdct_end(&s->mdct128);
  810. ff_psy_end(&s->psy);
  811. ff_lpc_end(&s->lpc);
  812. if (s->psypp)
  813. ff_psy_preprocess_end(s->psypp);
  814. av_freep(&s->buffer.samples);
  815. av_freep(&s->cpe);
  816. av_freep(&s->fdsp);
  817. ff_af_queue_close(&s->afq);
  818. return 0;
  819. }
  820. static av_cold int dsp_init(AVCodecContext *avctx, AACEncContext *s)
  821. {
  822. int ret = 0;
  823. s->fdsp = avpriv_float_dsp_alloc(avctx->flags & AV_CODEC_FLAG_BITEXACT);
  824. if (!s->fdsp)
  825. return AVERROR(ENOMEM);
  826. // window init
  827. ff_kbd_window_init(ff_aac_kbd_long_1024, 4.0, 1024);
  828. ff_kbd_window_init(ff_aac_kbd_short_128, 6.0, 128);
  829. ff_init_ff_sine_windows(10);
  830. ff_init_ff_sine_windows(7);
  831. if ((ret = ff_mdct_init(&s->mdct1024, 11, 0, 32768.0)) < 0)
  832. return ret;
  833. if ((ret = ff_mdct_init(&s->mdct128, 8, 0, 32768.0)) < 0)
  834. return ret;
  835. return 0;
  836. }
  837. static av_cold int alloc_buffers(AVCodecContext *avctx, AACEncContext *s)
  838. {
  839. int ch;
  840. FF_ALLOCZ_ARRAY_OR_GOTO(avctx, s->buffer.samples, s->channels, 3 * 1024 * sizeof(s->buffer.samples[0]), alloc_fail);
  841. FF_ALLOCZ_ARRAY_OR_GOTO(avctx, s->cpe, s->chan_map[0], sizeof(ChannelElement), alloc_fail);
  842. FF_ALLOCZ_OR_GOTO(avctx, avctx->extradata, 5 + AV_INPUT_BUFFER_PADDING_SIZE, alloc_fail);
  843. for(ch = 0; ch < s->channels; ch++)
  844. s->planar_samples[ch] = s->buffer.samples + 3 * 1024 * ch;
  845. return 0;
  846. alloc_fail:
  847. return AVERROR(ENOMEM);
  848. }
  849. static av_cold void aac_encode_init_tables(void)
  850. {
  851. ff_aac_tableinit();
  852. }
  853. static av_cold int aac_encode_init(AVCodecContext *avctx)
  854. {
  855. AACEncContext *s = avctx->priv_data;
  856. int i, ret = 0;
  857. const uint8_t *sizes[2];
  858. uint8_t grouping[AAC_MAX_CHANNELS];
  859. int lengths[2];
  860. /* Constants */
  861. s->last_frame_pb_count = 0;
  862. avctx->extradata_size = 20;
  863. avctx->frame_size = 1024;
  864. avctx->initial_padding = 1024;
  865. s->lambda = avctx->global_quality > 0 ? avctx->global_quality : 120;
  866. /* Channel map and unspecified bitrate guessing */
  867. s->channels = avctx->channels;
  868. s->needs_pce = 1;
  869. for (i = 0; i < FF_ARRAY_ELEMS(aac_normal_chan_layouts); i++) {
  870. if (avctx->channel_layout == aac_normal_chan_layouts[i]) {
  871. s->needs_pce = s->options.pce;
  872. break;
  873. }
  874. }
  875. if (s->needs_pce) {
  876. for (i = 0; i < FF_ARRAY_ELEMS(aac_pce_configs); i++)
  877. if (avctx->channel_layout == aac_pce_configs[i].layout)
  878. break;
  879. ERROR_IF(i == FF_ARRAY_ELEMS(aac_pce_configs), "Unsupported channel layout\n");
  880. av_log(avctx, AV_LOG_INFO, "Using a PCE to encode channel layout\n");
  881. s->pce = aac_pce_configs[i];
  882. s->reorder_map = s->pce.reorder_map;
  883. s->chan_map = s->pce.config_map;
  884. } else {
  885. s->reorder_map = aac_chan_maps[s->channels - 1];
  886. s->chan_map = aac_chan_configs[s->channels - 1];
  887. }
  888. if (!avctx->bit_rate) {
  889. for (i = 1; i <= s->chan_map[0]; i++) {
  890. avctx->bit_rate += s->chan_map[i] == TYPE_CPE ? 128000 : /* Pair */
  891. s->chan_map[i] == TYPE_LFE ? 16000 : /* LFE */
  892. 69000 ; /* SCE */
  893. }
  894. }
  895. /* Samplerate */
  896. for (i = 0; i < 16; i++)
  897. if (avctx->sample_rate == avpriv_mpeg4audio_sample_rates[i])
  898. break;
  899. s->samplerate_index = i;
  900. ERROR_IF(s->samplerate_index == 16 ||
  901. s->samplerate_index >= ff_aac_swb_size_1024_len ||
  902. s->samplerate_index >= ff_aac_swb_size_128_len,
  903. "Unsupported sample rate %d\n", avctx->sample_rate);
  904. /* Bitrate limiting */
  905. WARN_IF(1024.0 * avctx->bit_rate / avctx->sample_rate > 6144 * s->channels,
  906. "Too many bits %f > %d per frame requested, clamping to max\n",
  907. 1024.0 * avctx->bit_rate / avctx->sample_rate,
  908. 6144 * s->channels);
  909. avctx->bit_rate = (int64_t)FFMIN(6144 * s->channels / 1024.0 * avctx->sample_rate,
  910. avctx->bit_rate);
  911. /* Profile and option setting */
  912. avctx->profile = avctx->profile == FF_PROFILE_UNKNOWN ? FF_PROFILE_AAC_LOW :
  913. avctx->profile;
  914. for (i = 0; i < FF_ARRAY_ELEMS(aacenc_profiles); i++)
  915. if (avctx->profile == aacenc_profiles[i])
  916. break;
  917. if (avctx->profile == FF_PROFILE_MPEG2_AAC_LOW) {
  918. avctx->profile = FF_PROFILE_AAC_LOW;
  919. ERROR_IF(s->options.pred,
  920. "Main prediction unavailable in the \"mpeg2_aac_low\" profile\n");
  921. ERROR_IF(s->options.ltp,
  922. "LTP prediction unavailable in the \"mpeg2_aac_low\" profile\n");
  923. WARN_IF(s->options.pns,
  924. "PNS unavailable in the \"mpeg2_aac_low\" profile, turning off\n");
  925. s->options.pns = 0;
  926. } else if (avctx->profile == FF_PROFILE_AAC_LTP) {
  927. s->options.ltp = 1;
  928. ERROR_IF(s->options.pred,
  929. "Main prediction unavailable in the \"aac_ltp\" profile\n");
  930. } else if (avctx->profile == FF_PROFILE_AAC_MAIN) {
  931. s->options.pred = 1;
  932. ERROR_IF(s->options.ltp,
  933. "LTP prediction unavailable in the \"aac_main\" profile\n");
  934. } else if (s->options.ltp) {
  935. avctx->profile = FF_PROFILE_AAC_LTP;
  936. WARN_IF(1,
  937. "Chainging profile to \"aac_ltp\"\n");
  938. ERROR_IF(s->options.pred,
  939. "Main prediction unavailable in the \"aac_ltp\" profile\n");
  940. } else if (s->options.pred) {
  941. avctx->profile = FF_PROFILE_AAC_MAIN;
  942. WARN_IF(1,
  943. "Chainging profile to \"aac_main\"\n");
  944. ERROR_IF(s->options.ltp,
  945. "LTP prediction unavailable in the \"aac_main\" profile\n");
  946. }
  947. s->profile = avctx->profile;
  948. /* Coder limitations */
  949. s->coder = &ff_aac_coders[s->options.coder];
  950. if (s->options.coder == AAC_CODER_ANMR) {
  951. ERROR_IF(avctx->strict_std_compliance > FF_COMPLIANCE_EXPERIMENTAL,
  952. "The ANMR coder is considered experimental, add -strict -2 to enable!\n");
  953. s->options.intensity_stereo = 0;
  954. s->options.pns = 0;
  955. }
  956. ERROR_IF(s->options.ltp && avctx->strict_std_compliance > FF_COMPLIANCE_EXPERIMENTAL,
  957. "The LPT profile requires experimental compliance, add -strict -2 to enable!\n");
  958. /* M/S introduces horrible artifacts with multichannel files, this is temporary */
  959. if (s->channels > 3)
  960. s->options.mid_side = 0;
  961. if ((ret = dsp_init(avctx, s)) < 0)
  962. goto fail;
  963. if ((ret = alloc_buffers(avctx, s)) < 0)
  964. goto fail;
  965. put_audio_specific_config(avctx);
  966. sizes[0] = ff_aac_swb_size_1024[s->samplerate_index];
  967. sizes[1] = ff_aac_swb_size_128[s->samplerate_index];
  968. lengths[0] = ff_aac_num_swb_1024[s->samplerate_index];
  969. lengths[1] = ff_aac_num_swb_128[s->samplerate_index];
  970. for (i = 0; i < s->chan_map[0]; i++)
  971. grouping[i] = s->chan_map[i + 1] == TYPE_CPE;
  972. if ((ret = ff_psy_init(&s->psy, avctx, 2, sizes, lengths,
  973. s->chan_map[0], grouping)) < 0)
  974. goto fail;
  975. s->psypp = ff_psy_preprocess_init(avctx);
  976. ff_lpc_init(&s->lpc, 2*avctx->frame_size, TNS_MAX_ORDER, FF_LPC_TYPE_LEVINSON);
  977. s->random_state = 0x1f2e3d4c;
  978. s->abs_pow34 = abs_pow34_v;
  979. s->quant_bands = quantize_bands;
  980. if (ARCH_X86)
  981. ff_aac_dsp_init_x86(s);
  982. if (HAVE_MIPSDSP)
  983. ff_aac_coder_init_mips(s);
  984. if ((ret = ff_thread_once(&aac_table_init, &aac_encode_init_tables)) != 0)
  985. return AVERROR_UNKNOWN;
  986. ff_af_queue_init(avctx, &s->afq);
  987. return 0;
  988. fail:
  989. aac_encode_end(avctx);
  990. return ret;
  991. }
  992. #define AACENC_FLAGS AV_OPT_FLAG_ENCODING_PARAM | AV_OPT_FLAG_AUDIO_PARAM
  993. static const AVOption aacenc_options[] = {
  994. {"aac_coder", "Coding algorithm", offsetof(AACEncContext, options.coder), AV_OPT_TYPE_INT, {.i64 = AAC_CODER_TWOLOOP}, 0, AAC_CODER_NB-1, AACENC_FLAGS, "coder"},
  995. {"anmr", "ANMR method", 0, AV_OPT_TYPE_CONST, {.i64 = AAC_CODER_ANMR}, INT_MIN, INT_MAX, AACENC_FLAGS, "coder"},
  996. {"twoloop", "Two loop searching method", 0, AV_OPT_TYPE_CONST, {.i64 = AAC_CODER_TWOLOOP}, INT_MIN, INT_MAX, AACENC_FLAGS, "coder"},
  997. {"fast", "Constant quantizer", 0, AV_OPT_TYPE_CONST, {.i64 = AAC_CODER_FAST}, INT_MIN, INT_MAX, AACENC_FLAGS, "coder"},
  998. {"aac_ms", "Force M/S stereo coding", offsetof(AACEncContext, options.mid_side), AV_OPT_TYPE_BOOL, {.i64 = -1}, -1, 1, AACENC_FLAGS},
  999. {"aac_is", "Intensity stereo coding", offsetof(AACEncContext, options.intensity_stereo), AV_OPT_TYPE_BOOL, {.i64 = 1}, -1, 1, AACENC_FLAGS},
  1000. {"aac_pns", "Perceptual noise substitution", offsetof(AACEncContext, options.pns), AV_OPT_TYPE_BOOL, {.i64 = 1}, -1, 1, AACENC_FLAGS},
  1001. {"aac_tns", "Temporal noise shaping", offsetof(AACEncContext, options.tns), AV_OPT_TYPE_BOOL, {.i64 = 1}, -1, 1, AACENC_FLAGS},
  1002. {"aac_ltp", "Long term prediction", offsetof(AACEncContext, options.ltp), AV_OPT_TYPE_BOOL, {.i64 = 0}, -1, 1, AACENC_FLAGS},
  1003. {"aac_pred", "AAC-Main prediction", offsetof(AACEncContext, options.pred), AV_OPT_TYPE_BOOL, {.i64 = 0}, -1, 1, AACENC_FLAGS},
  1004. {"aac_pce", "Forces the use of PCEs", offsetof(AACEncContext, options.pce), AV_OPT_TYPE_BOOL, {.i64 = 0}, -1, 1, AACENC_FLAGS},
  1005. {NULL}
  1006. };
  1007. static const AVClass aacenc_class = {
  1008. .class_name = "AAC encoder",
  1009. .item_name = av_default_item_name,
  1010. .option = aacenc_options,
  1011. .version = LIBAVUTIL_VERSION_INT,
  1012. };
  1013. static const AVCodecDefault aac_encode_defaults[] = {
  1014. { "b", "0" },
  1015. { NULL }
  1016. };
  1017. AVCodec ff_aac_encoder = {
  1018. .name = "aac",
  1019. .long_name = NULL_IF_CONFIG_SMALL("AAC (Advanced Audio Coding)"),
  1020. .type = AVMEDIA_TYPE_AUDIO,
  1021. .id = AV_CODEC_ID_AAC,
  1022. .priv_data_size = sizeof(AACEncContext),
  1023. .init = aac_encode_init,
  1024. .encode2 = aac_encode_frame,
  1025. .close = aac_encode_end,
  1026. .defaults = aac_encode_defaults,
  1027. .supported_samplerates = mpeg4audio_sample_rates,
  1028. .caps_internal = FF_CODEC_CAP_INIT_THREADSAFE,
  1029. .capabilities = AV_CODEC_CAP_SMALL_LAST_FRAME | AV_CODEC_CAP_DELAY,
  1030. .sample_fmts = (const enum AVSampleFormat[]){ AV_SAMPLE_FMT_FLTP,
  1031. AV_SAMPLE_FMT_NONE },
  1032. .priv_class = &aacenc_class,
  1033. };