You can not select more than 25 topics Topics must start with a letter or number, can include dashes ('-') and can be up to 35 characters long.

631 lines
20KB

  1. /*
  2. * ALAC (Apple Lossless Audio Codec) decoder
  3. * Copyright (c) 2005 David Hammerton
  4. *
  5. * This file is part of Libav.
  6. *
  7. * Libav is free software; you can redistribute it and/or
  8. * modify it under the terms of the GNU Lesser General Public
  9. * License as published by the Free Software Foundation; either
  10. * version 2.1 of the License, or (at your option) any later version.
  11. *
  12. * Libav is distributed in the hope that it will be useful,
  13. * but WITHOUT ANY WARRANTY; without even the implied warranty of
  14. * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
  15. * Lesser General Public License for more details.
  16. *
  17. * You should have received a copy of the GNU Lesser General Public
  18. * License along with Libav; if not, write to the Free Software
  19. * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
  20. */
  21. /**
  22. * @file
  23. * ALAC (Apple Lossless Audio Codec) decoder
  24. * @author 2005 David Hammerton
  25. * @see http://crazney.net/programs/itunes/alac.html
  26. *
  27. * Note: This decoder expects a 36-byte QuickTime atom to be
  28. * passed through the extradata[_size] fields. This atom is tacked onto
  29. * the end of an 'alac' stsd atom and has the following format:
  30. *
  31. * 32bit atom size
  32. * 32bit tag ("alac")
  33. * 32bit tag version (0)
  34. * 32bit samples per frame (used when not set explicitly in the frames)
  35. * 8bit compatible version (0)
  36. * 8bit sample size
  37. * 8bit history mult (40)
  38. * 8bit initial history (14)
  39. * 8bit rice param limit (10)
  40. * 8bit channels
  41. * 16bit maxRun (255)
  42. * 32bit max coded frame size (0 means unknown)
  43. * 32bit average bitrate (0 means unknown)
  44. * 32bit samplerate
  45. */
  46. #include "avcodec.h"
  47. #include "get_bits.h"
  48. #include "bytestream.h"
  49. #include "unary.h"
  50. #include "mathops.h"
  51. #define ALAC_EXTRADATA_SIZE 36
  52. #define MAX_CHANNELS 2
  53. typedef struct {
  54. AVCodecContext *avctx;
  55. AVFrame frame;
  56. GetBitContext gb;
  57. int channels;
  58. /* buffers */
  59. int32_t *predict_error_buffer[MAX_CHANNELS];
  60. int32_t *output_samples_buffer[MAX_CHANNELS];
  61. int32_t *extra_bits_buffer[MAX_CHANNELS];
  62. uint32_t max_samples_per_frame;
  63. uint8_t sample_size;
  64. uint8_t rice_history_mult;
  65. uint8_t rice_initial_history;
  66. uint8_t rice_limit;
  67. int extra_bits; /**< number of extra bits beyond 16-bit */
  68. } ALACContext;
  69. static inline int decode_scalar(GetBitContext *gb, int k, int readsamplesize)
  70. {
  71. int x = get_unary_0_9(gb);
  72. if (x > 8) { /* RICE THRESHOLD */
  73. /* use alternative encoding */
  74. x = get_bits(gb, readsamplesize);
  75. } else if (k != 1) {
  76. int extrabits = show_bits(gb, k);
  77. /* multiply x by 2^k - 1, as part of their strange algorithm */
  78. x = (x << k) - x;
  79. if (extrabits > 1) {
  80. x += extrabits - 1;
  81. skip_bits(gb, k);
  82. } else
  83. skip_bits(gb, k - 1);
  84. }
  85. return x;
  86. }
  87. static void bastardized_rice_decompress(ALACContext *alac,
  88. int32_t *output_buffer,
  89. int output_size,
  90. int readsamplesize,
  91. int rice_history_mult)
  92. {
  93. int output_count;
  94. unsigned int history = alac->rice_initial_history;
  95. int sign_modifier = 0;
  96. for (output_count = 0; output_count < output_size; output_count++) {
  97. int x, k;
  98. /* read k, that is bits as is */
  99. k = av_log2((history >> 9) + 3);
  100. k = FFMIN(k, alac->rice_limit);
  101. x = decode_scalar(&alac->gb, k, readsamplesize);
  102. x += sign_modifier;
  103. sign_modifier = 0;
  104. output_buffer[output_count] = (x >> 1) ^ -(x & 1);
  105. /* now update the history */
  106. if (x > 0xffff)
  107. history = 0xffff;
  108. else
  109. history += x * rice_history_mult -
  110. ((history * rice_history_mult) >> 9);
  111. /* special case: there may be compressed blocks of 0 */
  112. if ((history < 128) && (output_count+1 < output_size)) {
  113. int block_size;
  114. k = 7 - av_log2(history) + ((history + 16) >> 6 /* / 64 */);
  115. k = FFMIN(k, alac->rice_limit);
  116. block_size = decode_scalar(&alac->gb, k, 16);
  117. if (block_size > 0) {
  118. if(block_size >= output_size - output_count){
  119. av_log(alac->avctx, AV_LOG_ERROR, "invalid zero block size of %d %d %d\n", block_size, output_size, output_count);
  120. block_size= output_size - output_count - 1;
  121. }
  122. memset(&output_buffer[output_count + 1], 0,
  123. block_size * sizeof(*output_buffer));
  124. output_count += block_size;
  125. }
  126. if (block_size <= 0xffff)
  127. sign_modifier = 1;
  128. history = 0;
  129. }
  130. }
  131. }
  132. static inline int sign_only(int v)
  133. {
  134. return v ? FFSIGN(v) : 0;
  135. }
  136. static void predictor_decompress_fir_adapt(int32_t *error_buffer,
  137. int32_t *buffer_out,
  138. int output_size,
  139. int readsamplesize,
  140. int16_t *predictor_coef_table,
  141. int predictor_coef_num,
  142. int predictor_quantitization)
  143. {
  144. int i;
  145. /* first sample always copies */
  146. *buffer_out = *error_buffer;
  147. if (!predictor_coef_num) {
  148. if (output_size <= 1)
  149. return;
  150. memcpy(&buffer_out[1], &error_buffer[1],
  151. (output_size - 1) * sizeof(*buffer_out));
  152. return;
  153. }
  154. if (predictor_coef_num == 31) {
  155. /* simple 1st-order prediction */
  156. if (output_size <= 1)
  157. return;
  158. for (i = 0; i < output_size - 1; i++) {
  159. int32_t prev_value;
  160. int32_t error_value;
  161. prev_value = buffer_out[i];
  162. error_value = error_buffer[i+1];
  163. buffer_out[i+1] =
  164. sign_extend((prev_value + error_value), readsamplesize);
  165. }
  166. return;
  167. }
  168. /* read warm-up samples */
  169. for (i = 0; i < predictor_coef_num; i++) {
  170. int32_t val;
  171. val = buffer_out[i] + error_buffer[i+1];
  172. val = sign_extend(val, readsamplesize);
  173. buffer_out[i+1] = val;
  174. }
  175. /* NOTE: 4 and 8 are very common cases that could be optimized. */
  176. /* general case */
  177. for (i = predictor_coef_num + 1; i < output_size; i++) {
  178. int j;
  179. int sum = 0;
  180. int outval;
  181. int error_val = error_buffer[i];
  182. for (j = 0; j < predictor_coef_num; j++) {
  183. sum += (buffer_out[predictor_coef_num-j] - buffer_out[0]) *
  184. predictor_coef_table[j];
  185. }
  186. outval = (1 << (predictor_quantitization-1)) + sum;
  187. outval = outval >> predictor_quantitization;
  188. outval = outval + buffer_out[0] + error_val;
  189. outval = sign_extend(outval, readsamplesize);
  190. buffer_out[predictor_coef_num+1] = outval;
  191. if (error_val > 0) {
  192. int predictor_num = predictor_coef_num - 1;
  193. while (predictor_num >= 0 && error_val > 0) {
  194. int val = buffer_out[0] - buffer_out[predictor_coef_num - predictor_num];
  195. int sign = sign_only(val);
  196. predictor_coef_table[predictor_num] -= sign;
  197. val *= sign; /* absolute value */
  198. error_val -= ((val >> predictor_quantitization) *
  199. (predictor_coef_num - predictor_num));
  200. predictor_num--;
  201. }
  202. } else if (error_val < 0) {
  203. int predictor_num = predictor_coef_num - 1;
  204. while (predictor_num >= 0 && error_val < 0) {
  205. int val = buffer_out[0] - buffer_out[predictor_coef_num - predictor_num];
  206. int sign = - sign_only(val);
  207. predictor_coef_table[predictor_num] -= sign;
  208. val *= sign; /* neg value */
  209. error_val -= ((val >> predictor_quantitization) *
  210. (predictor_coef_num - predictor_num));
  211. predictor_num--;
  212. }
  213. }
  214. buffer_out++;
  215. }
  216. }
  217. static void decorrelate_stereo(int32_t *buffer[MAX_CHANNELS],
  218. int numsamples, uint8_t interlacing_shift,
  219. uint8_t interlacing_leftweight)
  220. {
  221. int i;
  222. for (i = 0; i < numsamples; i++) {
  223. int32_t a, b;
  224. a = buffer[0][i];
  225. b = buffer[1][i];
  226. a -= (b * interlacing_leftweight) >> interlacing_shift;
  227. b += a;
  228. buffer[0][i] = b;
  229. buffer[1][i] = a;
  230. }
  231. }
  232. static void append_extra_bits(int32_t *buffer[MAX_CHANNELS],
  233. int32_t *extra_bits_buffer[MAX_CHANNELS],
  234. int extra_bits, int numchannels, int numsamples)
  235. {
  236. int i, ch;
  237. for (ch = 0; ch < numchannels; ch++)
  238. for (i = 0; i < numsamples; i++)
  239. buffer[ch][i] = (buffer[ch][i] << extra_bits) | extra_bits_buffer[ch][i];
  240. }
  241. static void interleave_stereo_16(int32_t *buffer[MAX_CHANNELS],
  242. int16_t *buffer_out, int numsamples)
  243. {
  244. int i;
  245. for (i = 0; i < numsamples; i++) {
  246. *buffer_out++ = buffer[0][i];
  247. *buffer_out++ = buffer[1][i];
  248. }
  249. }
  250. static void interleave_stereo_24(int32_t *buffer[MAX_CHANNELS],
  251. int32_t *buffer_out, int numsamples)
  252. {
  253. int i;
  254. for (i = 0; i < numsamples; i++) {
  255. *buffer_out++ = buffer[0][i] << 8;
  256. *buffer_out++ = buffer[1][i] << 8;
  257. }
  258. }
  259. static int alac_decode_frame(AVCodecContext *avctx, void *data,
  260. int *got_frame_ptr, AVPacket *avpkt)
  261. {
  262. const uint8_t *inbuffer = avpkt->data;
  263. int input_buffer_size = avpkt->size;
  264. ALACContext *alac = avctx->priv_data;
  265. int channels;
  266. unsigned int outputsamples;
  267. int hassize;
  268. unsigned int readsamplesize;
  269. int isnotcompressed;
  270. uint8_t interlacing_shift;
  271. uint8_t interlacing_leftweight;
  272. int i, ch, ret;
  273. init_get_bits(&alac->gb, inbuffer, input_buffer_size * 8);
  274. channels = get_bits(&alac->gb, 3) + 1;
  275. if (channels != avctx->channels) {
  276. av_log(avctx, AV_LOG_ERROR, "frame header channel count mismatch\n");
  277. return AVERROR_INVALIDDATA;
  278. }
  279. skip_bits(&alac->gb, 4); /* element instance tag */
  280. skip_bits(&alac->gb, 12); /* unused header bits */
  281. /* the number of output samples is stored in the frame */
  282. hassize = get_bits1(&alac->gb);
  283. alac->extra_bits = get_bits(&alac->gb, 2) << 3;
  284. /* whether the frame is compressed */
  285. isnotcompressed = get_bits1(&alac->gb);
  286. if (hassize) {
  287. /* now read the number of samples as a 32bit integer */
  288. outputsamples = get_bits_long(&alac->gb, 32);
  289. if (outputsamples > alac->max_samples_per_frame) {
  290. av_log(avctx, AV_LOG_ERROR, "outputsamples %d > %d\n",
  291. outputsamples, alac->max_samples_per_frame);
  292. return -1;
  293. }
  294. } else
  295. outputsamples = alac->max_samples_per_frame;
  296. /* get output buffer */
  297. if (outputsamples > INT32_MAX) {
  298. av_log(avctx, AV_LOG_ERROR, "unsupported block size: %u\n", outputsamples);
  299. return AVERROR_INVALIDDATA;
  300. }
  301. alac->frame.nb_samples = outputsamples;
  302. if ((ret = avctx->get_buffer(avctx, &alac->frame)) < 0) {
  303. av_log(avctx, AV_LOG_ERROR, "get_buffer() failed\n");
  304. return ret;
  305. }
  306. readsamplesize = alac->sample_size - alac->extra_bits + channels - 1;
  307. if (readsamplesize > MIN_CACHE_BITS) {
  308. av_log(avctx, AV_LOG_ERROR, "readsamplesize too big (%d)\n", readsamplesize);
  309. return -1;
  310. }
  311. if (!isnotcompressed) {
  312. /* so it is compressed */
  313. int16_t predictor_coef_table[MAX_CHANNELS][32];
  314. int predictor_coef_num[MAX_CHANNELS];
  315. int prediction_type[MAX_CHANNELS];
  316. int prediction_quantitization[MAX_CHANNELS];
  317. int ricemodifier[MAX_CHANNELS];
  318. interlacing_shift = get_bits(&alac->gb, 8);
  319. interlacing_leftweight = get_bits(&alac->gb, 8);
  320. for (ch = 0; ch < channels; ch++) {
  321. prediction_type[ch] = get_bits(&alac->gb, 4);
  322. prediction_quantitization[ch] = get_bits(&alac->gb, 4);
  323. ricemodifier[ch] = get_bits(&alac->gb, 3);
  324. predictor_coef_num[ch] = get_bits(&alac->gb, 5);
  325. /* read the predictor table */
  326. for (i = 0; i < predictor_coef_num[ch]; i++)
  327. predictor_coef_table[ch][i] = (int16_t)get_bits(&alac->gb, 16);
  328. }
  329. if (alac->extra_bits) {
  330. for (i = 0; i < outputsamples; i++) {
  331. for (ch = 0; ch < channels; ch++)
  332. alac->extra_bits_buffer[ch][i] = get_bits(&alac->gb, alac->extra_bits);
  333. }
  334. }
  335. for (ch = 0; ch < channels; ch++) {
  336. bastardized_rice_decompress(alac,
  337. alac->predict_error_buffer[ch],
  338. outputsamples,
  339. readsamplesize,
  340. ricemodifier[ch] * alac->rice_history_mult / 4);
  341. /* adaptive FIR filter */
  342. if (prediction_type[ch] == 15) {
  343. /* Prediction type 15 runs the adaptive FIR twice.
  344. * The first pass uses the special-case coef_num = 31, while
  345. * the second pass uses the coefs from the bitstream.
  346. *
  347. * However, this prediction type is not currently used by the
  348. * reference encoder.
  349. */
  350. predictor_decompress_fir_adapt(alac->predict_error_buffer[ch],
  351. alac->predict_error_buffer[ch],
  352. outputsamples, readsamplesize,
  353. NULL, 31, 0);
  354. } else if (prediction_type[ch] > 0) {
  355. av_log(avctx, AV_LOG_WARNING, "unknown prediction type: %i\n",
  356. prediction_type[ch]);
  357. }
  358. predictor_decompress_fir_adapt(alac->predict_error_buffer[ch],
  359. alac->output_samples_buffer[ch],
  360. outputsamples, readsamplesize,
  361. predictor_coef_table[ch],
  362. predictor_coef_num[ch],
  363. prediction_quantitization[ch]);
  364. }
  365. } else {
  366. /* not compressed, easy case */
  367. for (i = 0; i < outputsamples; i++) {
  368. for (ch = 0; ch < channels; ch++) {
  369. alac->output_samples_buffer[ch][i] = get_sbits_long(&alac->gb,
  370. alac->sample_size);
  371. }
  372. }
  373. alac->extra_bits = 0;
  374. interlacing_shift = 0;
  375. interlacing_leftweight = 0;
  376. }
  377. if (get_bits(&alac->gb, 3) != 7)
  378. av_log(avctx, AV_LOG_ERROR, "Error : Wrong End Of Frame\n");
  379. if (channels == 2 && interlacing_leftweight) {
  380. decorrelate_stereo(alac->output_samples_buffer, outputsamples,
  381. interlacing_shift, interlacing_leftweight);
  382. }
  383. if (alac->extra_bits) {
  384. append_extra_bits(alac->output_samples_buffer, alac->extra_bits_buffer,
  385. alac->extra_bits, alac->channels, outputsamples);
  386. }
  387. switch(alac->sample_size) {
  388. case 16:
  389. if (channels == 2) {
  390. interleave_stereo_16(alac->output_samples_buffer,
  391. (int16_t *)alac->frame.data[0], outputsamples);
  392. } else {
  393. int16_t *outbuffer = (int16_t *)alac->frame.data[0];
  394. for (i = 0; i < outputsamples; i++) {
  395. outbuffer[i] = alac->output_samples_buffer[0][i];
  396. }
  397. }
  398. break;
  399. case 24:
  400. if (channels == 2) {
  401. interleave_stereo_24(alac->output_samples_buffer,
  402. (int32_t *)alac->frame.data[0], outputsamples);
  403. } else {
  404. int32_t *outbuffer = (int32_t *)alac->frame.data[0];
  405. for (i = 0; i < outputsamples; i++)
  406. outbuffer[i] = alac->output_samples_buffer[0][i] << 8;
  407. }
  408. break;
  409. }
  410. if (input_buffer_size * 8 - get_bits_count(&alac->gb) > 8)
  411. av_log(avctx, AV_LOG_ERROR, "Error : %d bits left\n", input_buffer_size * 8 - get_bits_count(&alac->gb));
  412. *got_frame_ptr = 1;
  413. *(AVFrame *)data = alac->frame;
  414. return input_buffer_size;
  415. }
  416. static av_cold int alac_decode_close(AVCodecContext *avctx)
  417. {
  418. ALACContext *alac = avctx->priv_data;
  419. int ch;
  420. for (ch = 0; ch < alac->channels; ch++) {
  421. av_freep(&alac->predict_error_buffer[ch]);
  422. av_freep(&alac->output_samples_buffer[ch]);
  423. av_freep(&alac->extra_bits_buffer[ch]);
  424. }
  425. return 0;
  426. }
  427. static int allocate_buffers(ALACContext *alac)
  428. {
  429. int ch;
  430. for (ch = 0; ch < alac->channels; ch++) {
  431. int buf_size = alac->max_samples_per_frame * sizeof(int32_t);
  432. FF_ALLOC_OR_GOTO(alac->avctx, alac->predict_error_buffer[ch],
  433. buf_size, buf_alloc_fail);
  434. FF_ALLOC_OR_GOTO(alac->avctx, alac->output_samples_buffer[ch],
  435. buf_size, buf_alloc_fail);
  436. FF_ALLOC_OR_GOTO(alac->avctx, alac->extra_bits_buffer[ch],
  437. buf_size, buf_alloc_fail);
  438. }
  439. return 0;
  440. buf_alloc_fail:
  441. alac_decode_close(alac->avctx);
  442. return AVERROR(ENOMEM);
  443. }
  444. static int alac_set_info(ALACContext *alac)
  445. {
  446. GetByteContext gb;
  447. bytestream2_init(&gb, alac->avctx->extradata,
  448. alac->avctx->extradata_size);
  449. bytestream2_skipu(&gb, 12); // size:4, alac:4, version:4
  450. alac->max_samples_per_frame = bytestream2_get_be32u(&gb);
  451. if (alac->max_samples_per_frame >= UINT_MAX/4){
  452. av_log(alac->avctx, AV_LOG_ERROR,
  453. "max_samples_per_frame too large\n");
  454. return AVERROR_INVALIDDATA;
  455. }
  456. bytestream2_skipu(&gb, 1); // compatible version
  457. alac->sample_size = bytestream2_get_byteu(&gb);
  458. alac->rice_history_mult = bytestream2_get_byteu(&gb);
  459. alac->rice_initial_history = bytestream2_get_byteu(&gb);
  460. alac->rice_limit = bytestream2_get_byteu(&gb);
  461. alac->channels = bytestream2_get_byteu(&gb);
  462. bytestream2_get_be16u(&gb); // maxRun
  463. bytestream2_get_be32u(&gb); // max coded frame size
  464. bytestream2_get_be32u(&gb); // average bitrate
  465. bytestream2_get_be32u(&gb); // samplerate
  466. return 0;
  467. }
  468. static av_cold int alac_decode_init(AVCodecContext * avctx)
  469. {
  470. int ret;
  471. ALACContext *alac = avctx->priv_data;
  472. alac->avctx = avctx;
  473. /* initialize from the extradata */
  474. if (alac->avctx->extradata_size != ALAC_EXTRADATA_SIZE) {
  475. av_log(avctx, AV_LOG_ERROR, "alac: expected %d extradata bytes\n",
  476. ALAC_EXTRADATA_SIZE);
  477. return -1;
  478. }
  479. if (alac_set_info(alac)) {
  480. av_log(avctx, AV_LOG_ERROR, "alac: set_info failed\n");
  481. return -1;
  482. }
  483. switch (alac->sample_size) {
  484. case 16: avctx->sample_fmt = AV_SAMPLE_FMT_S16;
  485. break;
  486. case 24: avctx->sample_fmt = AV_SAMPLE_FMT_S32;
  487. break;
  488. default: av_log_ask_for_sample(avctx, "Sample depth %d is not supported.\n",
  489. alac->sample_size);
  490. return AVERROR_PATCHWELCOME;
  491. }
  492. if (alac->channels < 1) {
  493. av_log(avctx, AV_LOG_WARNING, "Invalid channel count\n");
  494. alac->channels = avctx->channels;
  495. } else {
  496. if (alac->channels > MAX_CHANNELS)
  497. alac->channels = avctx->channels;
  498. else
  499. avctx->channels = alac->channels;
  500. }
  501. if (avctx->channels > MAX_CHANNELS) {
  502. av_log(avctx, AV_LOG_ERROR, "Unsupported channel count: %d\n",
  503. avctx->channels);
  504. return AVERROR_PATCHWELCOME;
  505. }
  506. if ((ret = allocate_buffers(alac)) < 0) {
  507. av_log(avctx, AV_LOG_ERROR, "Error allocating buffers\n");
  508. return ret;
  509. }
  510. avcodec_get_frame_defaults(&alac->frame);
  511. avctx->coded_frame = &alac->frame;
  512. return 0;
  513. }
  514. AVCodec ff_alac_decoder = {
  515. .name = "alac",
  516. .type = AVMEDIA_TYPE_AUDIO,
  517. .id = CODEC_ID_ALAC,
  518. .priv_data_size = sizeof(ALACContext),
  519. .init = alac_decode_init,
  520. .close = alac_decode_close,
  521. .decode = alac_decode_frame,
  522. .capabilities = CODEC_CAP_DR1,
  523. .long_name = NULL_IF_CONFIG_SMALL("ALAC (Apple Lossless Audio Codec)"),
  524. };