You can not select more than 25 topics Topics must start with a letter or number, can include dashes ('-') and can be up to 35 characters long.

629 lines
20KB

  1. /*
  2. * ALAC (Apple Lossless Audio Codec) decoder
  3. * Copyright (c) 2005 David Hammerton
  4. *
  5. * This file is part of FFmpeg.
  6. *
  7. * FFmpeg is free software; you can redistribute it and/or
  8. * modify it under the terms of the GNU Lesser General Public
  9. * License as published by the Free Software Foundation; either
  10. * version 2.1 of the License, or (at your option) any later version.
  11. *
  12. * FFmpeg is distributed in the hope that it will be useful,
  13. * but WITHOUT ANY WARRANTY; without even the implied warranty of
  14. * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
  15. * Lesser General Public License for more details.
  16. *
  17. * You should have received a copy of the GNU Lesser General Public
  18. * License along with FFmpeg; if not, write to the Free Software
  19. * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
  20. */
  21. /**
  22. * @file
  23. * ALAC (Apple Lossless Audio Codec) decoder
  24. * @author 2005 David Hammerton
  25. * @see http://crazney.net/programs/itunes/alac.html
  26. *
  27. * Note: This decoder expects a 36-byte QuickTime atom to be
  28. * passed through the extradata[_size] fields. This atom is tacked onto
  29. * the end of an 'alac' stsd atom and has the following format:
  30. *
  31. * 32bit atom size
  32. * 32bit tag ("alac")
  33. * 32bit tag version (0)
  34. * 32bit samples per frame (used when not set explicitly in the frames)
  35. * 8bit compatible version (0)
  36. * 8bit sample size
  37. * 8bit history mult (40)
  38. * 8bit initial history (10)
  39. * 8bit rice param limit (14)
  40. * 8bit channels
  41. * 16bit maxRun (255)
  42. * 32bit max coded frame size (0 means unknown)
  43. * 32bit average bitrate (0 means unknown)
  44. * 32bit samplerate
  45. */
  46. #include <inttypes.h>
  47. #include "libavutil/channel_layout.h"
  48. #include "libavutil/opt.h"
  49. #include "avcodec.h"
  50. #include "get_bits.h"
  51. #include "bytestream.h"
  52. #include "internal.h"
  53. #include "thread.h"
  54. #include "unary.h"
  55. #include "mathops.h"
  56. #include "alac_data.h"
  57. #include "alacdsp.h"
  58. #define ALAC_EXTRADATA_SIZE 36
  59. typedef struct ALACContext {
  60. AVClass *class;
  61. AVCodecContext *avctx;
  62. GetBitContext gb;
  63. int channels;
  64. int32_t *predict_error_buffer[2];
  65. int32_t *output_samples_buffer[2];
  66. int32_t *extra_bits_buffer[2];
  67. uint32_t max_samples_per_frame;
  68. uint8_t sample_size;
  69. uint8_t rice_history_mult;
  70. uint8_t rice_initial_history;
  71. uint8_t rice_limit;
  72. int extra_bits; /**< number of extra bits beyond 16-bit */
  73. int nb_samples; /**< number of samples in the current frame */
  74. int direct_output;
  75. int extra_bit_bug;
  76. ALACDSPContext dsp;
  77. } ALACContext;
  78. static inline unsigned int decode_scalar(GetBitContext *gb, int k, int bps)
  79. {
  80. unsigned int x = get_unary_0_9(gb);
  81. if (x > 8) { /* RICE THRESHOLD */
  82. /* use alternative encoding */
  83. x = get_bits_long(gb, bps);
  84. } else if (k != 1) {
  85. int extrabits = show_bits(gb, k);
  86. /* multiply x by 2^k - 1, as part of their strange algorithm */
  87. x = (x << k) - x;
  88. if (extrabits > 1) {
  89. x += extrabits - 1;
  90. skip_bits(gb, k);
  91. } else
  92. skip_bits(gb, k - 1);
  93. }
  94. return x;
  95. }
  96. static int rice_decompress(ALACContext *alac, int32_t *output_buffer,
  97. int nb_samples, int bps, int rice_history_mult)
  98. {
  99. int i;
  100. unsigned int history = alac->rice_initial_history;
  101. int sign_modifier = 0;
  102. for (i = 0; i < nb_samples; i++) {
  103. int k;
  104. unsigned int x;
  105. if(get_bits_left(&alac->gb) <= 0)
  106. return -1;
  107. /* calculate rice param and decode next value */
  108. k = av_log2((history >> 9) + 3);
  109. k = FFMIN(k, alac->rice_limit);
  110. x = decode_scalar(&alac->gb, k, bps);
  111. x += sign_modifier;
  112. sign_modifier = 0;
  113. output_buffer[i] = (x >> 1) ^ -(x & 1);
  114. /* update the history */
  115. if (x > 0xffff)
  116. history = 0xffff;
  117. else
  118. history += x * rice_history_mult -
  119. ((history * rice_history_mult) >> 9);
  120. /* special case: there may be compressed blocks of 0 */
  121. if ((history < 128) && (i + 1 < nb_samples)) {
  122. int block_size;
  123. /* calculate rice param and decode block size */
  124. k = 7 - av_log2(history) + ((history + 16) >> 6);
  125. k = FFMIN(k, alac->rice_limit);
  126. block_size = decode_scalar(&alac->gb, k, 16);
  127. if (block_size > 0) {
  128. if (block_size >= nb_samples - i) {
  129. av_log(alac->avctx, AV_LOG_ERROR,
  130. "invalid zero block size of %d %d %d\n", block_size,
  131. nb_samples, i);
  132. block_size = nb_samples - i - 1;
  133. }
  134. memset(&output_buffer[i + 1], 0,
  135. block_size * sizeof(*output_buffer));
  136. i += block_size;
  137. }
  138. if (block_size <= 0xffff)
  139. sign_modifier = 1;
  140. history = 0;
  141. }
  142. }
  143. return 0;
  144. }
  145. static inline int sign_only(int v)
  146. {
  147. return v ? FFSIGN(v) : 0;
  148. }
  149. static void lpc_prediction(int32_t *error_buffer, int32_t *buffer_out,
  150. int nb_samples, int bps, int16_t *lpc_coefs,
  151. int lpc_order, int lpc_quant)
  152. {
  153. int i;
  154. int32_t *pred = buffer_out;
  155. /* first sample always copies */
  156. *buffer_out = *error_buffer;
  157. if (nb_samples <= 1)
  158. return;
  159. if (!lpc_order) {
  160. memcpy(&buffer_out[1], &error_buffer[1],
  161. (nb_samples - 1) * sizeof(*buffer_out));
  162. return;
  163. }
  164. if (lpc_order == 31) {
  165. /* simple 1st-order prediction */
  166. for (i = 1; i < nb_samples; i++) {
  167. buffer_out[i] = sign_extend(buffer_out[i - 1] + error_buffer[i],
  168. bps);
  169. }
  170. return;
  171. }
  172. /* read warm-up samples */
  173. for (i = 1; i <= lpc_order && i < nb_samples; i++)
  174. buffer_out[i] = sign_extend(buffer_out[i - 1] + error_buffer[i], bps);
  175. /* NOTE: 4 and 8 are very common cases that could be optimized. */
  176. for (; i < nb_samples; i++) {
  177. int j;
  178. int val = 0;
  179. int error_val = error_buffer[i];
  180. int error_sign;
  181. int d = *pred++;
  182. /* LPC prediction */
  183. for (j = 0; j < lpc_order; j++)
  184. val += (pred[j] - d) * lpc_coefs[j];
  185. val = (val + (1 << (lpc_quant - 1))) >> lpc_quant;
  186. val += d + error_val;
  187. buffer_out[i] = sign_extend(val, bps);
  188. /* adapt LPC coefficients */
  189. error_sign = sign_only(error_val);
  190. if (error_sign) {
  191. for (j = 0; j < lpc_order && error_val * error_sign > 0; j++) {
  192. int sign;
  193. val = d - pred[j];
  194. sign = sign_only(val) * error_sign;
  195. lpc_coefs[j] -= sign;
  196. val *= sign;
  197. error_val -= (val >> lpc_quant) * (j + 1);
  198. }
  199. }
  200. }
  201. }
  202. static int decode_element(AVCodecContext *avctx, AVFrame *frame, int ch_index,
  203. int channels)
  204. {
  205. ALACContext *alac = avctx->priv_data;
  206. int has_size, bps, is_compressed, decorr_shift, decorr_left_weight, ret;
  207. uint32_t output_samples;
  208. int i, ch;
  209. skip_bits(&alac->gb, 4); /* element instance tag */
  210. skip_bits(&alac->gb, 12); /* unused header bits */
  211. /* the number of output samples is stored in the frame */
  212. has_size = get_bits1(&alac->gb);
  213. alac->extra_bits = get_bits(&alac->gb, 2) << 3;
  214. bps = alac->sample_size - alac->extra_bits + channels - 1;
  215. if (bps > 32U) {
  216. av_log(avctx, AV_LOG_ERROR, "bps is unsupported: %d\n", bps);
  217. return AVERROR_PATCHWELCOME;
  218. }
  219. /* whether the frame is compressed */
  220. is_compressed = !get_bits1(&alac->gb);
  221. if (has_size)
  222. output_samples = get_bits_long(&alac->gb, 32);
  223. else
  224. output_samples = alac->max_samples_per_frame;
  225. if (!output_samples || output_samples > alac->max_samples_per_frame) {
  226. av_log(avctx, AV_LOG_ERROR, "invalid samples per frame: %"PRIu32"\n",
  227. output_samples);
  228. return AVERROR_INVALIDDATA;
  229. }
  230. if (!alac->nb_samples) {
  231. ThreadFrame tframe = { .f = frame };
  232. /* get output buffer */
  233. frame->nb_samples = output_samples;
  234. if ((ret = ff_thread_get_buffer(avctx, &tframe, 0)) < 0)
  235. return ret;
  236. } else if (output_samples != alac->nb_samples) {
  237. av_log(avctx, AV_LOG_ERROR, "sample count mismatch: %"PRIu32" != %d\n",
  238. output_samples, alac->nb_samples);
  239. return AVERROR_INVALIDDATA;
  240. }
  241. alac->nb_samples = output_samples;
  242. if (alac->direct_output) {
  243. for (ch = 0; ch < channels; ch++)
  244. alac->output_samples_buffer[ch] = (int32_t *)frame->extended_data[ch_index + ch];
  245. }
  246. if (is_compressed) {
  247. int16_t lpc_coefs[2][32];
  248. int lpc_order[2];
  249. int prediction_type[2];
  250. int lpc_quant[2];
  251. int rice_history_mult[2];
  252. if (!alac->rice_limit) {
  253. avpriv_request_sample(alac->avctx,
  254. "Compression with rice limit 0");
  255. return AVERROR(ENOSYS);
  256. }
  257. decorr_shift = get_bits(&alac->gb, 8);
  258. decorr_left_weight = get_bits(&alac->gb, 8);
  259. for (ch = 0; ch < channels; ch++) {
  260. prediction_type[ch] = get_bits(&alac->gb, 4);
  261. lpc_quant[ch] = get_bits(&alac->gb, 4);
  262. rice_history_mult[ch] = get_bits(&alac->gb, 3);
  263. lpc_order[ch] = get_bits(&alac->gb, 5);
  264. if (lpc_order[ch] >= alac->max_samples_per_frame)
  265. return AVERROR_INVALIDDATA;
  266. /* read the predictor table */
  267. for (i = lpc_order[ch] - 1; i >= 0; i--)
  268. lpc_coefs[ch][i] = get_sbits(&alac->gb, 16);
  269. }
  270. if (alac->extra_bits) {
  271. for (i = 0; i < alac->nb_samples; i++) {
  272. if(get_bits_left(&alac->gb) <= 0)
  273. return -1;
  274. for (ch = 0; ch < channels; ch++)
  275. alac->extra_bits_buffer[ch][i] = get_bits(&alac->gb, alac->extra_bits);
  276. }
  277. }
  278. for (ch = 0; ch < channels; ch++) {
  279. int ret=rice_decompress(alac, alac->predict_error_buffer[ch],
  280. alac->nb_samples, bps,
  281. rice_history_mult[ch] * alac->rice_history_mult / 4);
  282. if(ret<0)
  283. return ret;
  284. /* adaptive FIR filter */
  285. if (prediction_type[ch] == 15) {
  286. /* Prediction type 15 runs the adaptive FIR twice.
  287. * The first pass uses the special-case coef_num = 31, while
  288. * the second pass uses the coefs from the bitstream.
  289. *
  290. * However, this prediction type is not currently used by the
  291. * reference encoder.
  292. */
  293. lpc_prediction(alac->predict_error_buffer[ch],
  294. alac->predict_error_buffer[ch],
  295. alac->nb_samples, bps, NULL, 31, 0);
  296. } else if (prediction_type[ch] > 0) {
  297. av_log(avctx, AV_LOG_WARNING, "unknown prediction type: %i\n",
  298. prediction_type[ch]);
  299. }
  300. lpc_prediction(alac->predict_error_buffer[ch],
  301. alac->output_samples_buffer[ch], alac->nb_samples,
  302. bps, lpc_coefs[ch], lpc_order[ch], lpc_quant[ch]);
  303. }
  304. } else {
  305. /* not compressed, easy case */
  306. for (i = 0; i < alac->nb_samples; i++) {
  307. if(get_bits_left(&alac->gb) <= 0)
  308. return -1;
  309. for (ch = 0; ch < channels; ch++) {
  310. alac->output_samples_buffer[ch][i] =
  311. get_sbits_long(&alac->gb, alac->sample_size);
  312. }
  313. }
  314. alac->extra_bits = 0;
  315. decorr_shift = 0;
  316. decorr_left_weight = 0;
  317. }
  318. if (channels == 2) {
  319. if (alac->extra_bits && alac->extra_bit_bug) {
  320. alac->dsp.append_extra_bits[1](alac->output_samples_buffer, alac->extra_bits_buffer,
  321. alac->extra_bits, channels, alac->nb_samples);
  322. }
  323. if (decorr_left_weight) {
  324. alac->dsp.decorrelate_stereo(alac->output_samples_buffer, alac->nb_samples,
  325. decorr_shift, decorr_left_weight);
  326. }
  327. if (alac->extra_bits && !alac->extra_bit_bug) {
  328. alac->dsp.append_extra_bits[1](alac->output_samples_buffer, alac->extra_bits_buffer,
  329. alac->extra_bits, channels, alac->nb_samples);
  330. }
  331. } else if (alac->extra_bits) {
  332. alac->dsp.append_extra_bits[0](alac->output_samples_buffer, alac->extra_bits_buffer,
  333. alac->extra_bits, channels, alac->nb_samples);
  334. }
  335. switch(alac->sample_size) {
  336. case 16: {
  337. for (ch = 0; ch < channels; ch++) {
  338. int16_t *outbuffer = (int16_t *)frame->extended_data[ch_index + ch];
  339. for (i = 0; i < alac->nb_samples; i++)
  340. *outbuffer++ = alac->output_samples_buffer[ch][i];
  341. }}
  342. break;
  343. case 24: {
  344. for (ch = 0; ch < channels; ch++) {
  345. for (i = 0; i < alac->nb_samples; i++)
  346. alac->output_samples_buffer[ch][i] <<= 8;
  347. }}
  348. break;
  349. }
  350. return 0;
  351. }
  352. static int alac_decode_frame(AVCodecContext *avctx, void *data,
  353. int *got_frame_ptr, AVPacket *avpkt)
  354. {
  355. ALACContext *alac = avctx->priv_data;
  356. AVFrame *frame = data;
  357. enum AlacRawDataBlockType element;
  358. int channels;
  359. int ch, ret, got_end;
  360. if ((ret = init_get_bits8(&alac->gb, avpkt->data, avpkt->size)) < 0)
  361. return ret;
  362. got_end = 0;
  363. alac->nb_samples = 0;
  364. ch = 0;
  365. while (get_bits_left(&alac->gb) >= 3) {
  366. element = get_bits(&alac->gb, 3);
  367. if (element == TYPE_END) {
  368. got_end = 1;
  369. break;
  370. }
  371. if (element > TYPE_CPE && element != TYPE_LFE) {
  372. av_log(avctx, AV_LOG_ERROR, "syntax element unsupported: %d\n", element);
  373. return AVERROR_PATCHWELCOME;
  374. }
  375. channels = (element == TYPE_CPE) ? 2 : 1;
  376. if (ch + channels > alac->channels ||
  377. ff_alac_channel_layout_offsets[alac->channels - 1][ch] + channels > alac->channels) {
  378. av_log(avctx, AV_LOG_ERROR, "invalid element channel count\n");
  379. return AVERROR_INVALIDDATA;
  380. }
  381. ret = decode_element(avctx, frame,
  382. ff_alac_channel_layout_offsets[alac->channels - 1][ch],
  383. channels);
  384. if (ret < 0 && get_bits_left(&alac->gb))
  385. return ret;
  386. ch += channels;
  387. }
  388. if (!got_end) {
  389. av_log(avctx, AV_LOG_ERROR, "no end tag found. incomplete packet.\n");
  390. return AVERROR_INVALIDDATA;
  391. }
  392. if (avpkt->size * 8 - get_bits_count(&alac->gb) > 8) {
  393. av_log(avctx, AV_LOG_ERROR, "Error : %d bits left\n",
  394. avpkt->size * 8 - get_bits_count(&alac->gb));
  395. }
  396. if (alac->channels == ch)
  397. *got_frame_ptr = 1;
  398. else
  399. av_log(avctx, AV_LOG_WARNING, "Failed to decode all channels\n");
  400. return avpkt->size;
  401. }
  402. static av_cold int alac_decode_close(AVCodecContext *avctx)
  403. {
  404. ALACContext *alac = avctx->priv_data;
  405. int ch;
  406. for (ch = 0; ch < FFMIN(alac->channels, 2); ch++) {
  407. av_freep(&alac->predict_error_buffer[ch]);
  408. if (!alac->direct_output)
  409. av_freep(&alac->output_samples_buffer[ch]);
  410. av_freep(&alac->extra_bits_buffer[ch]);
  411. }
  412. return 0;
  413. }
  414. static int allocate_buffers(ALACContext *alac)
  415. {
  416. int ch;
  417. int buf_size = alac->max_samples_per_frame * sizeof(int32_t);
  418. for (ch = 0; ch < 2; ch++) {
  419. alac->predict_error_buffer[ch] = NULL;
  420. alac->output_samples_buffer[ch] = NULL;
  421. alac->extra_bits_buffer[ch] = NULL;
  422. }
  423. for (ch = 0; ch < FFMIN(alac->channels, 2); ch++) {
  424. FF_ALLOC_OR_GOTO(alac->avctx, alac->predict_error_buffer[ch],
  425. buf_size, buf_alloc_fail);
  426. alac->direct_output = alac->sample_size > 16;
  427. if (!alac->direct_output) {
  428. FF_ALLOC_OR_GOTO(alac->avctx, alac->output_samples_buffer[ch],
  429. buf_size, buf_alloc_fail);
  430. }
  431. FF_ALLOC_OR_GOTO(alac->avctx, alac->extra_bits_buffer[ch],
  432. buf_size, buf_alloc_fail);
  433. }
  434. return 0;
  435. buf_alloc_fail:
  436. alac_decode_close(alac->avctx);
  437. return AVERROR(ENOMEM);
  438. }
  439. static int alac_set_info(ALACContext *alac)
  440. {
  441. GetByteContext gb;
  442. bytestream2_init(&gb, alac->avctx->extradata,
  443. alac->avctx->extradata_size);
  444. bytestream2_skipu(&gb, 12); // size:4, alac:4, version:4
  445. alac->max_samples_per_frame = bytestream2_get_be32u(&gb);
  446. if (!alac->max_samples_per_frame ||
  447. alac->max_samples_per_frame > INT_MAX / sizeof(int32_t)) {
  448. av_log(alac->avctx, AV_LOG_ERROR,
  449. "max samples per frame invalid: %"PRIu32"\n",
  450. alac->max_samples_per_frame);
  451. return AVERROR_INVALIDDATA;
  452. }
  453. bytestream2_skipu(&gb, 1); // compatible version
  454. alac->sample_size = bytestream2_get_byteu(&gb);
  455. alac->rice_history_mult = bytestream2_get_byteu(&gb);
  456. alac->rice_initial_history = bytestream2_get_byteu(&gb);
  457. alac->rice_limit = bytestream2_get_byteu(&gb);
  458. alac->channels = bytestream2_get_byteu(&gb);
  459. bytestream2_get_be16u(&gb); // maxRun
  460. bytestream2_get_be32u(&gb); // max coded frame size
  461. bytestream2_get_be32u(&gb); // average bitrate
  462. bytestream2_get_be32u(&gb); // samplerate
  463. return 0;
  464. }
  465. static av_cold int alac_decode_init(AVCodecContext * avctx)
  466. {
  467. int ret;
  468. ALACContext *alac = avctx->priv_data;
  469. alac->avctx = avctx;
  470. /* initialize from the extradata */
  471. if (alac->avctx->extradata_size < ALAC_EXTRADATA_SIZE) {
  472. av_log(avctx, AV_LOG_ERROR, "extradata is too small\n");
  473. return AVERROR_INVALIDDATA;
  474. }
  475. if (alac_set_info(alac)) {
  476. av_log(avctx, AV_LOG_ERROR, "set_info failed\n");
  477. return -1;
  478. }
  479. switch (alac->sample_size) {
  480. case 16: avctx->sample_fmt = AV_SAMPLE_FMT_S16P;
  481. break;
  482. case 24:
  483. case 32: avctx->sample_fmt = AV_SAMPLE_FMT_S32P;
  484. break;
  485. default: avpriv_request_sample(avctx, "Sample depth %d", alac->sample_size);
  486. return AVERROR_PATCHWELCOME;
  487. }
  488. avctx->bits_per_raw_sample = alac->sample_size;
  489. if (alac->channels < 1) {
  490. av_log(avctx, AV_LOG_WARNING, "Invalid channel count\n");
  491. alac->channels = avctx->channels;
  492. } else {
  493. if (alac->channels > ALAC_MAX_CHANNELS)
  494. alac->channels = avctx->channels;
  495. else
  496. avctx->channels = alac->channels;
  497. }
  498. if (avctx->channels > ALAC_MAX_CHANNELS || avctx->channels <= 0 ) {
  499. av_log(avctx, AV_LOG_ERROR, "Unsupported channel count: %d\n",
  500. avctx->channels);
  501. return AVERROR_PATCHWELCOME;
  502. }
  503. avctx->channel_layout = ff_alac_channel_layouts[alac->channels - 1];
  504. if ((ret = allocate_buffers(alac)) < 0) {
  505. av_log(avctx, AV_LOG_ERROR, "Error allocating buffers\n");
  506. return ret;
  507. }
  508. ff_alacdsp_init(&alac->dsp);
  509. return 0;
  510. }
  511. #if HAVE_THREADS
  512. static int init_thread_copy(AVCodecContext *avctx)
  513. {
  514. ALACContext *alac = avctx->priv_data;
  515. alac->avctx = avctx;
  516. return allocate_buffers(alac);
  517. }
  518. #endif
  519. static const AVOption options[] = {
  520. { "extra_bits_bug", "Force non-standard decoding process",
  521. offsetof(ALACContext, extra_bit_bug), AV_OPT_TYPE_INT, { .i64 = 0 },
  522. 0, 1, AV_OPT_FLAG_AUDIO_PARAM | AV_OPT_FLAG_DECODING_PARAM },
  523. { NULL },
  524. };
  525. static const AVClass alac_class = {
  526. .class_name = "alac",
  527. .item_name = av_default_item_name,
  528. .option = options,
  529. .version = LIBAVUTIL_VERSION_INT,
  530. };
  531. AVCodec ff_alac_decoder = {
  532. .name = "alac",
  533. .long_name = NULL_IF_CONFIG_SMALL("ALAC (Apple Lossless Audio Codec)"),
  534. .type = AVMEDIA_TYPE_AUDIO,
  535. .id = AV_CODEC_ID_ALAC,
  536. .priv_data_size = sizeof(ALACContext),
  537. .init = alac_decode_init,
  538. .close = alac_decode_close,
  539. .decode = alac_decode_frame,
  540. .init_thread_copy = ONLY_IF_THREADS_ENABLED(init_thread_copy),
  541. .capabilities = AV_CODEC_CAP_DR1 | AV_CODEC_CAP_FRAME_THREADS,
  542. .priv_class = &alac_class
  543. };