You can not select more than 25 topics Topics must start with a letter or number, can include dashes ('-') and can be up to 35 characters long.

653 lines
22KB

  1. /*
  2. * ALAC (Apple Lossless Audio Codec) decoder
  3. * Copyright (c) 2005 David Hammerton
  4. *
  5. * This file is part of Libav.
  6. *
  7. * Libav is free software; you can redistribute it and/or
  8. * modify it under the terms of the GNU Lesser General Public
  9. * License as published by the Free Software Foundation; either
  10. * version 2.1 of the License, or (at your option) any later version.
  11. *
  12. * Libav is distributed in the hope that it will be useful,
  13. * but WITHOUT ANY WARRANTY; without even the implied warranty of
  14. * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
  15. * Lesser General Public License for more details.
  16. *
  17. * You should have received a copy of the GNU Lesser General Public
  18. * License along with Libav; if not, write to the Free Software
  19. * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
  20. */
  21. /**
  22. * @file
  23. * ALAC (Apple Lossless Audio Codec) decoder
  24. * @author 2005 David Hammerton
  25. * @see http://crazney.net/programs/itunes/alac.html
  26. *
  27. * Note: This decoder expects a 36-byte QuickTime atom to be
  28. * passed through the extradata[_size] fields. This atom is tacked onto
  29. * the end of an 'alac' stsd atom and has the following format:
  30. *
  31. * 32bit atom size
  32. * 32bit tag ("alac")
  33. * 32bit tag version (0)
  34. * 32bit samples per frame (used when not set explicitly in the frames)
  35. * 8bit compatible version (0)
  36. * 8bit sample size
  37. * 8bit history mult (40)
  38. * 8bit initial history (14)
  39. * 8bit kmodifier (10)
  40. * 8bit channels
  41. * 16bit maxRun (255)
  42. * 32bit max coded frame size (0 means unknown)
  43. * 32bit average bitrate (0 means unknown)
  44. * 32bit samplerate
  45. */
  46. #include "avcodec.h"
  47. #include "get_bits.h"
  48. #include "bytestream.h"
  49. #include "unary.h"
  50. #include "mathops.h"
  51. #define ALAC_EXTRADATA_SIZE 36
  52. #define MAX_CHANNELS 2
  53. typedef struct {
  54. AVCodecContext *avctx;
  55. AVFrame frame;
  56. GetBitContext gb;
  57. int numchannels;
  58. /* buffers */
  59. int32_t *predicterror_buffer[MAX_CHANNELS];
  60. int32_t *outputsamples_buffer[MAX_CHANNELS];
  61. int32_t *extra_bits_buffer[MAX_CHANNELS];
  62. uint32_t setinfo_max_samples_per_frame;
  63. uint8_t setinfo_sample_size;
  64. uint8_t setinfo_rice_historymult;
  65. uint8_t setinfo_rice_initialhistory;
  66. uint8_t setinfo_rice_kmodifier;
  67. int extra_bits; /**< number of extra bits beyond 16-bit */
  68. } ALACContext;
  69. static inline int decode_scalar(GetBitContext *gb, int k, int limit, int readsamplesize){
  70. int x = get_unary_0_9(gb);
  71. if (x > 8) { /* RICE THRESHOLD */
  72. /* use alternative encoding */
  73. x = get_bits(gb, readsamplesize);
  74. } else {
  75. if (k >= limit)
  76. k = limit;
  77. if (k != 1) {
  78. int extrabits = show_bits(gb, k);
  79. /* multiply x by 2^k - 1, as part of their strange algorithm */
  80. x = (x << k) - x;
  81. if (extrabits > 1) {
  82. x += extrabits - 1;
  83. skip_bits(gb, k);
  84. } else
  85. skip_bits(gb, k - 1);
  86. }
  87. }
  88. return x;
  89. }
  90. static void bastardized_rice_decompress(ALACContext *alac,
  91. int32_t *output_buffer,
  92. int output_size,
  93. int readsamplesize,
  94. int rice_initialhistory,
  95. int rice_kmodifier,
  96. int rice_historymult,
  97. int rice_kmodifier_mask)
  98. {
  99. int output_count;
  100. unsigned int history = rice_initialhistory;
  101. int sign_modifier = 0;
  102. for (output_count = 0; output_count < output_size; output_count++) {
  103. int32_t x;
  104. int32_t x_modified;
  105. int32_t final_val;
  106. /* standard rice encoding */
  107. int k; /* size of extra bits */
  108. /* read k, that is bits as is */
  109. k = av_log2((history >> 9) + 3);
  110. x= decode_scalar(&alac->gb, k, rice_kmodifier, readsamplesize);
  111. x_modified = sign_modifier + x;
  112. final_val = (x_modified + 1) / 2;
  113. if (x_modified & 1) final_val *= -1;
  114. output_buffer[output_count] = final_val;
  115. sign_modifier = 0;
  116. /* now update the history */
  117. history += x_modified * rice_historymult
  118. - ((history * rice_historymult) >> 9);
  119. if (x_modified > 0xffff)
  120. history = 0xffff;
  121. /* special case: there may be compressed blocks of 0 */
  122. if ((history < 128) && (output_count+1 < output_size)) {
  123. int k;
  124. unsigned int block_size;
  125. sign_modifier = 1;
  126. k = 7 - av_log2(history) + ((history + 16) >> 6 /* / 64 */);
  127. block_size= decode_scalar(&alac->gb, k, rice_kmodifier, 16);
  128. if (block_size > 0) {
  129. if(block_size >= output_size - output_count){
  130. av_log(alac->avctx, AV_LOG_ERROR, "invalid zero block size of %d %d %d\n", block_size, output_size, output_count);
  131. block_size= output_size - output_count - 1;
  132. }
  133. memset(&output_buffer[output_count+1], 0, block_size * 4);
  134. output_count += block_size;
  135. }
  136. if (block_size > 0xffff)
  137. sign_modifier = 0;
  138. history = 0;
  139. }
  140. }
  141. }
  142. static inline int sign_only(int v)
  143. {
  144. return v ? FFSIGN(v) : 0;
  145. }
  146. static void predictor_decompress_fir_adapt(int32_t *error_buffer,
  147. int32_t *buffer_out,
  148. int output_size,
  149. int readsamplesize,
  150. int16_t *predictor_coef_table,
  151. int predictor_coef_num,
  152. int predictor_quantitization)
  153. {
  154. int i;
  155. /* first sample always copies */
  156. *buffer_out = *error_buffer;
  157. if (!predictor_coef_num) {
  158. if (output_size <= 1)
  159. return;
  160. memcpy(buffer_out+1, error_buffer+1, (output_size-1) * 4);
  161. return;
  162. }
  163. if (predictor_coef_num == 31) {
  164. /* simple 1st-order prediction */
  165. if (output_size <= 1)
  166. return;
  167. for (i = 0; i < output_size - 1; i++) {
  168. int32_t prev_value;
  169. int32_t error_value;
  170. prev_value = buffer_out[i];
  171. error_value = error_buffer[i+1];
  172. buffer_out[i+1] =
  173. sign_extend((prev_value + error_value), readsamplesize);
  174. }
  175. return;
  176. }
  177. /* read warm-up samples */
  178. if (predictor_coef_num > 0)
  179. for (i = 0; i < predictor_coef_num; i++) {
  180. int32_t val;
  181. val = buffer_out[i] + error_buffer[i+1];
  182. val = sign_extend(val, readsamplesize);
  183. buffer_out[i+1] = val;
  184. }
  185. /* NOTE: 4 and 8 are very common cases that could be optimized. */
  186. /* general case */
  187. if (predictor_coef_num > 0) {
  188. for (i = predictor_coef_num + 1; i < output_size; i++) {
  189. int j;
  190. int sum = 0;
  191. int outval;
  192. int error_val = error_buffer[i];
  193. for (j = 0; j < predictor_coef_num; j++) {
  194. sum += (buffer_out[predictor_coef_num-j] - buffer_out[0]) *
  195. predictor_coef_table[j];
  196. }
  197. outval = (1 << (predictor_quantitization-1)) + sum;
  198. outval = outval >> predictor_quantitization;
  199. outval = outval + buffer_out[0] + error_val;
  200. outval = sign_extend(outval, readsamplesize);
  201. buffer_out[predictor_coef_num+1] = outval;
  202. if (error_val > 0) {
  203. int predictor_num = predictor_coef_num - 1;
  204. while (predictor_num >= 0 && error_val > 0) {
  205. int val = buffer_out[0] - buffer_out[predictor_coef_num - predictor_num];
  206. int sign = sign_only(val);
  207. predictor_coef_table[predictor_num] -= sign;
  208. val *= sign; /* absolute value */
  209. error_val -= ((val >> predictor_quantitization) *
  210. (predictor_coef_num - predictor_num));
  211. predictor_num--;
  212. }
  213. } else if (error_val < 0) {
  214. int predictor_num = predictor_coef_num - 1;
  215. while (predictor_num >= 0 && error_val < 0) {
  216. int val = buffer_out[0] - buffer_out[predictor_coef_num - predictor_num];
  217. int sign = - sign_only(val);
  218. predictor_coef_table[predictor_num] -= sign;
  219. val *= sign; /* neg value */
  220. error_val -= ((val >> predictor_quantitization) *
  221. (predictor_coef_num - predictor_num));
  222. predictor_num--;
  223. }
  224. }
  225. buffer_out++;
  226. }
  227. }
  228. }
  229. static void decorrelate_stereo(int32_t *buffer[MAX_CHANNELS],
  230. int numsamples, uint8_t interlacing_shift,
  231. uint8_t interlacing_leftweight)
  232. {
  233. int i;
  234. for (i = 0; i < numsamples; i++) {
  235. int32_t a, b;
  236. a = buffer[0][i];
  237. b = buffer[1][i];
  238. a -= (b * interlacing_leftweight) >> interlacing_shift;
  239. b += a;
  240. buffer[0][i] = b;
  241. buffer[1][i] = a;
  242. }
  243. }
  244. static void append_extra_bits(int32_t *buffer[MAX_CHANNELS],
  245. int32_t *extra_bits_buffer[MAX_CHANNELS],
  246. int extra_bits, int numchannels, int numsamples)
  247. {
  248. int i, ch;
  249. for (ch = 0; ch < numchannels; ch++)
  250. for (i = 0; i < numsamples; i++)
  251. buffer[ch][i] = (buffer[ch][i] << extra_bits) | extra_bits_buffer[ch][i];
  252. }
  253. static void interleave_stereo_16(int32_t *buffer[MAX_CHANNELS],
  254. int16_t *buffer_out, int numsamples)
  255. {
  256. int i;
  257. for (i = 0; i < numsamples; i++) {
  258. *buffer_out++ = buffer[0][i];
  259. *buffer_out++ = buffer[1][i];
  260. }
  261. }
  262. static void interleave_stereo_24(int32_t *buffer[MAX_CHANNELS],
  263. int32_t *buffer_out, int numsamples)
  264. {
  265. int i;
  266. for (i = 0; i < numsamples; i++) {
  267. *buffer_out++ = buffer[0][i] << 8;
  268. *buffer_out++ = buffer[1][i] << 8;
  269. }
  270. }
  271. static int alac_decode_frame(AVCodecContext *avctx, void *data,
  272. int *got_frame_ptr, AVPacket *avpkt)
  273. {
  274. const uint8_t *inbuffer = avpkt->data;
  275. int input_buffer_size = avpkt->size;
  276. ALACContext *alac = avctx->priv_data;
  277. int channels;
  278. unsigned int outputsamples;
  279. int hassize;
  280. unsigned int readsamplesize;
  281. int isnotcompressed;
  282. uint8_t interlacing_shift;
  283. uint8_t interlacing_leftweight;
  284. int i, ch, ret;
  285. init_get_bits(&alac->gb, inbuffer, input_buffer_size * 8);
  286. channels = get_bits(&alac->gb, 3) + 1;
  287. if (channels != avctx->channels) {
  288. av_log(avctx, AV_LOG_ERROR, "frame header channel count mismatch\n");
  289. return AVERROR_INVALIDDATA;
  290. }
  291. skip_bits(&alac->gb, 4); /* element instance tag */
  292. skip_bits(&alac->gb, 12); /* unused header bits */
  293. /* the number of output samples is stored in the frame */
  294. hassize = get_bits1(&alac->gb);
  295. alac->extra_bits = get_bits(&alac->gb, 2) << 3;
  296. /* whether the frame is compressed */
  297. isnotcompressed = get_bits1(&alac->gb);
  298. if (hassize) {
  299. /* now read the number of samples as a 32bit integer */
  300. outputsamples = get_bits_long(&alac->gb, 32);
  301. if(outputsamples > alac->setinfo_max_samples_per_frame){
  302. av_log(avctx, AV_LOG_ERROR, "outputsamples %d > %d\n", outputsamples, alac->setinfo_max_samples_per_frame);
  303. return -1;
  304. }
  305. } else
  306. outputsamples = alac->setinfo_max_samples_per_frame;
  307. /* get output buffer */
  308. if (outputsamples > INT32_MAX) {
  309. av_log(avctx, AV_LOG_ERROR, "unsupported block size: %u\n", outputsamples);
  310. return AVERROR_INVALIDDATA;
  311. }
  312. alac->frame.nb_samples = outputsamples;
  313. if ((ret = avctx->get_buffer(avctx, &alac->frame)) < 0) {
  314. av_log(avctx, AV_LOG_ERROR, "get_buffer() failed\n");
  315. return ret;
  316. }
  317. readsamplesize = alac->setinfo_sample_size - alac->extra_bits + channels - 1;
  318. if (readsamplesize > MIN_CACHE_BITS) {
  319. av_log(avctx, AV_LOG_ERROR, "readsamplesize too big (%d)\n", readsamplesize);
  320. return -1;
  321. }
  322. if (!isnotcompressed) {
  323. /* so it is compressed */
  324. int16_t predictor_coef_table[MAX_CHANNELS][32];
  325. int predictor_coef_num[MAX_CHANNELS];
  326. int prediction_type[MAX_CHANNELS];
  327. int prediction_quantitization[MAX_CHANNELS];
  328. int ricemodifier[MAX_CHANNELS];
  329. interlacing_shift = get_bits(&alac->gb, 8);
  330. interlacing_leftweight = get_bits(&alac->gb, 8);
  331. for (ch = 0; ch < channels; ch++) {
  332. prediction_type[ch] = get_bits(&alac->gb, 4);
  333. prediction_quantitization[ch] = get_bits(&alac->gb, 4);
  334. ricemodifier[ch] = get_bits(&alac->gb, 3);
  335. predictor_coef_num[ch] = get_bits(&alac->gb, 5);
  336. /* read the predictor table */
  337. for (i = 0; i < predictor_coef_num[ch]; i++)
  338. predictor_coef_table[ch][i] = (int16_t)get_bits(&alac->gb, 16);
  339. }
  340. if (alac->extra_bits) {
  341. for (i = 0; i < outputsamples; i++) {
  342. for (ch = 0; ch < channels; ch++)
  343. alac->extra_bits_buffer[ch][i] = get_bits(&alac->gb, alac->extra_bits);
  344. }
  345. }
  346. for (ch = 0; ch < channels; ch++) {
  347. bastardized_rice_decompress(alac,
  348. alac->predicterror_buffer[ch],
  349. outputsamples,
  350. readsamplesize,
  351. alac->setinfo_rice_initialhistory,
  352. alac->setinfo_rice_kmodifier,
  353. ricemodifier[ch] * alac->setinfo_rice_historymult / 4,
  354. (1 << alac->setinfo_rice_kmodifier) - 1);
  355. /* adaptive FIR filter */
  356. if (prediction_type[ch] == 15) {
  357. /* Prediction type 15 runs the adaptive FIR twice.
  358. * The first pass uses the special-case coef_num = 31, while
  359. * the second pass uses the coefs from the bitstream.
  360. *
  361. * However, this prediction type is not currently used by the
  362. * reference encoder.
  363. */
  364. predictor_decompress_fir_adapt(alac->predicterror_buffer[ch],
  365. alac->predicterror_buffer[ch],
  366. outputsamples, readsamplesize,
  367. NULL, 31, 0);
  368. } else if (prediction_type[ch] > 0) {
  369. av_log(avctx, AV_LOG_WARNING, "unknown prediction type: %i\n",
  370. prediction_type[ch]);
  371. }
  372. predictor_decompress_fir_adapt(alac->predicterror_buffer[ch],
  373. alac->outputsamples_buffer[ch],
  374. outputsamples, readsamplesize,
  375. predictor_coef_table[ch],
  376. predictor_coef_num[ch],
  377. prediction_quantitization[ch]);
  378. }
  379. } else {
  380. /* not compressed, easy case */
  381. for (i = 0; i < outputsamples; i++) {
  382. for (ch = 0; ch < channels; ch++) {
  383. alac->outputsamples_buffer[ch][i] = get_sbits_long(&alac->gb,
  384. alac->setinfo_sample_size);
  385. }
  386. }
  387. alac->extra_bits = 0;
  388. interlacing_shift = 0;
  389. interlacing_leftweight = 0;
  390. }
  391. if (get_bits(&alac->gb, 3) != 7)
  392. av_log(avctx, AV_LOG_ERROR, "Error : Wrong End Of Frame\n");
  393. if (channels == 2 && interlacing_leftweight) {
  394. decorrelate_stereo(alac->outputsamples_buffer, outputsamples,
  395. interlacing_shift, interlacing_leftweight);
  396. }
  397. if (alac->extra_bits) {
  398. append_extra_bits(alac->outputsamples_buffer, alac->extra_bits_buffer,
  399. alac->extra_bits, alac->numchannels, outputsamples);
  400. }
  401. switch(alac->setinfo_sample_size) {
  402. case 16:
  403. if (channels == 2) {
  404. interleave_stereo_16(alac->outputsamples_buffer,
  405. (int16_t *)alac->frame.data[0], outputsamples);
  406. } else {
  407. int16_t *outbuffer = (int16_t *)alac->frame.data[0];
  408. for (i = 0; i < outputsamples; i++) {
  409. outbuffer[i] = alac->outputsamples_buffer[0][i];
  410. }
  411. }
  412. break;
  413. case 24:
  414. if (channels == 2) {
  415. interleave_stereo_24(alac->outputsamples_buffer,
  416. (int32_t *)alac->frame.data[0], outputsamples);
  417. } else {
  418. int32_t *outbuffer = (int32_t *)alac->frame.data[0];
  419. for (i = 0; i < outputsamples; i++)
  420. outbuffer[i] = alac->outputsamples_buffer[0][i] << 8;
  421. }
  422. break;
  423. }
  424. if (input_buffer_size * 8 - get_bits_count(&alac->gb) > 8)
  425. av_log(avctx, AV_LOG_ERROR, "Error : %d bits left\n", input_buffer_size * 8 - get_bits_count(&alac->gb));
  426. *got_frame_ptr = 1;
  427. *(AVFrame *)data = alac->frame;
  428. return input_buffer_size;
  429. }
  430. static av_cold int alac_decode_close(AVCodecContext *avctx)
  431. {
  432. ALACContext *alac = avctx->priv_data;
  433. int ch;
  434. for (ch = 0; ch < alac->numchannels; ch++) {
  435. av_freep(&alac->predicterror_buffer[ch]);
  436. av_freep(&alac->outputsamples_buffer[ch]);
  437. av_freep(&alac->extra_bits_buffer[ch]);
  438. }
  439. return 0;
  440. }
  441. static int allocate_buffers(ALACContext *alac)
  442. {
  443. int ch;
  444. for (ch = 0; ch < alac->numchannels; ch++) {
  445. int buf_size = alac->setinfo_max_samples_per_frame * sizeof(int32_t);
  446. FF_ALLOC_OR_GOTO(alac->avctx, alac->predicterror_buffer[ch],
  447. buf_size, buf_alloc_fail);
  448. FF_ALLOC_OR_GOTO(alac->avctx, alac->outputsamples_buffer[ch],
  449. buf_size, buf_alloc_fail);
  450. FF_ALLOC_OR_GOTO(alac->avctx, alac->extra_bits_buffer[ch],
  451. buf_size, buf_alloc_fail);
  452. }
  453. return 0;
  454. buf_alloc_fail:
  455. alac_decode_close(alac->avctx);
  456. return AVERROR(ENOMEM);
  457. }
  458. static int alac_set_info(ALACContext *alac)
  459. {
  460. GetByteContext gb;
  461. bytestream2_init(&gb, alac->avctx->extradata,
  462. alac->avctx->extradata_size);
  463. bytestream2_skipu(&gb, 12); // size:4, alac:4, version:4
  464. alac->setinfo_max_samples_per_frame = bytestream2_get_be32u(&gb);
  465. if (alac->setinfo_max_samples_per_frame >= UINT_MAX/4){
  466. av_log(alac->avctx, AV_LOG_ERROR,
  467. "setinfo_max_samples_per_frame too large\n");
  468. return AVERROR_INVALIDDATA;
  469. }
  470. bytestream2_skipu(&gb, 1); // compatible version
  471. alac->setinfo_sample_size = bytestream2_get_byteu(&gb);
  472. alac->setinfo_rice_historymult = bytestream2_get_byteu(&gb);
  473. alac->setinfo_rice_initialhistory = bytestream2_get_byteu(&gb);
  474. alac->setinfo_rice_kmodifier = bytestream2_get_byteu(&gb);
  475. alac->numchannels = bytestream2_get_byteu(&gb);
  476. bytestream2_get_be16u(&gb); // maxRun
  477. bytestream2_get_be32u(&gb); // max coded frame size
  478. bytestream2_get_be32u(&gb); // average bitrate
  479. bytestream2_get_be32u(&gb); // samplerate
  480. return 0;
  481. }
  482. static av_cold int alac_decode_init(AVCodecContext * avctx)
  483. {
  484. int ret;
  485. ALACContext *alac = avctx->priv_data;
  486. alac->avctx = avctx;
  487. /* initialize from the extradata */
  488. if (alac->avctx->extradata_size != ALAC_EXTRADATA_SIZE) {
  489. av_log(avctx, AV_LOG_ERROR, "alac: expected %d extradata bytes\n",
  490. ALAC_EXTRADATA_SIZE);
  491. return -1;
  492. }
  493. if (alac_set_info(alac)) {
  494. av_log(avctx, AV_LOG_ERROR, "alac: set_info failed\n");
  495. return -1;
  496. }
  497. switch (alac->setinfo_sample_size) {
  498. case 16: avctx->sample_fmt = AV_SAMPLE_FMT_S16;
  499. break;
  500. case 24: avctx->sample_fmt = AV_SAMPLE_FMT_S32;
  501. break;
  502. default: av_log_ask_for_sample(avctx, "Sample depth %d is not supported.\n",
  503. alac->setinfo_sample_size);
  504. return AVERROR_PATCHWELCOME;
  505. }
  506. if (alac->numchannels < 1) {
  507. av_log(avctx, AV_LOG_WARNING, "Invalid channel count\n");
  508. alac->numchannels = avctx->channels;
  509. } else {
  510. if (alac->numchannels > MAX_CHANNELS)
  511. alac->numchannels = avctx->channels;
  512. else
  513. avctx->channels = alac->numchannels;
  514. }
  515. if (avctx->channels > MAX_CHANNELS) {
  516. av_log(avctx, AV_LOG_ERROR, "Unsupported channel count: %d\n",
  517. avctx->channels);
  518. return AVERROR_PATCHWELCOME;
  519. }
  520. if ((ret = allocate_buffers(alac)) < 0) {
  521. av_log(avctx, AV_LOG_ERROR, "Error allocating buffers\n");
  522. return ret;
  523. }
  524. avcodec_get_frame_defaults(&alac->frame);
  525. avctx->coded_frame = &alac->frame;
  526. return 0;
  527. }
  528. AVCodec ff_alac_decoder = {
  529. .name = "alac",
  530. .type = AVMEDIA_TYPE_AUDIO,
  531. .id = CODEC_ID_ALAC,
  532. .priv_data_size = sizeof(ALACContext),
  533. .init = alac_decode_init,
  534. .close = alac_decode_close,
  535. .decode = alac_decode_frame,
  536. .capabilities = CODEC_CAP_DR1,
  537. .long_name = NULL_IF_CONFIG_SMALL("ALAC (Apple Lossless Audio Codec)"),
  538. };