You can not select more than 25 topics Topics must start with a letter or number, can include dashes ('-') and can be up to 35 characters long.

460 lines
14KB

  1. /*
  2. * This file is part of FFmpeg.
  3. *
  4. * FFmpeg is free software; you can redistribute it and/or
  5. * modify it under the terms of the GNU Lesser General Public
  6. * License as published by the Free Software Foundation; either
  7. * version 2.1 of the License, or (at your option) any later version.
  8. *
  9. * FFmpeg is distributed in the hope that it will be useful,
  10. * but WITHOUT ANY WARRANTY; without even the implied warranty of
  11. * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
  12. * Lesser General Public License for more details.
  13. *
  14. * You should have received a copy of the GNU Lesser General Public
  15. * License along with FFmpeg; if not, write to the Free Software
  16. * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
  17. */
  18. #include "libavutil/crc.h"
  19. #include "libavutil/float_dsp.h"
  20. #include "libavutil/intreadwrite.h"
  21. #include "libavutil/tx.h"
  22. #include "avcodec.h"
  23. #include "get_bits.h"
  24. #include "internal.h"
  25. #include "hca_data.h"
  26. typedef struct ChannelContext {
  27. float base[128];
  28. DECLARE_ALIGNED(32, float, imdct_in)[128];
  29. DECLARE_ALIGNED(32, float, imdct_out)[128];
  30. DECLARE_ALIGNED(32, float, imdct_prev)[128];
  31. int8_t scale_factors[128];
  32. uint8_t scale[128];
  33. int8_t intensity[8];
  34. int8_t *hfr_scale;
  35. unsigned count;
  36. int chan_type;
  37. } ChannelContext;
  38. typedef struct HCAContext {
  39. GetBitContext gb;
  40. const AVCRC *crc_table;
  41. ChannelContext ch[16];
  42. uint8_t ath[128];
  43. int ath_type;
  44. unsigned hfr_group_count;
  45. uint8_t track_count;
  46. uint8_t channel_config;
  47. uint8_t total_band_count;
  48. uint8_t base_band_count;
  49. uint8_t stereo_band_count;
  50. uint8_t bands_per_hfr_group;
  51. av_tx_fn tx_fn;
  52. AVTXContext *tx_ctx;
  53. AVFloatDSPContext *fdsp;
  54. } HCAContext;
  55. static void ath_init1(uint8_t *ath, int sample_rate)
  56. {
  57. unsigned int index;
  58. unsigned int acc = 0;
  59. for (int i = 0; i < 128; i++) {
  60. acc += sample_rate;
  61. index = acc >> 13;
  62. if (index >= 654) {
  63. memset(ath+i, 0xFF, (128 - i));
  64. break;
  65. }
  66. ath[i] = ath_base_curve[index];
  67. }
  68. }
  69. static int ath_init(uint8_t *ath, int type, int sample_rate)
  70. {
  71. switch (type) {
  72. case 0:
  73. /* nothing to do */
  74. break;
  75. case 1:
  76. ath_init1(ath, sample_rate);
  77. break;
  78. default:
  79. return AVERROR_INVALIDDATA;
  80. }
  81. return 0;
  82. }
  83. static inline unsigned ceil2(unsigned a, unsigned b)
  84. {
  85. return (b > 0) ? (a / b + ((a % b) ? 1 : 0)) : 0;
  86. }
  87. static av_cold int decode_init(AVCodecContext *avctx)
  88. {
  89. HCAContext *c = avctx->priv_data;
  90. GetBitContext *gb = &c->gb;
  91. int8_t r[16] = { 0 };
  92. float scale = 1.f / 8.f;
  93. unsigned b, chunk;
  94. int version, ret;
  95. avctx->sample_fmt = AV_SAMPLE_FMT_FLTP;
  96. c->crc_table = av_crc_get_table(AV_CRC_16_ANSI);
  97. if (avctx->channels <= 0 || avctx->channels > 16)
  98. return AVERROR(EINVAL);
  99. ret = init_get_bits8(gb, avctx->extradata, avctx->extradata_size);
  100. if (ret < 0)
  101. return ret;
  102. skip_bits_long(gb, 32);
  103. version = get_bits(gb, 16);
  104. skip_bits_long(gb, 16);
  105. c->ath_type = version >= 0x200 ? 0 : 1;
  106. if (get_bits_long(gb, 32) != MKBETAG('f', 'm', 't', 0))
  107. return AVERROR_INVALIDDATA;
  108. skip_bits_long(gb, 32);
  109. skip_bits_long(gb, 32);
  110. skip_bits_long(gb, 32);
  111. chunk = get_bits_long(gb, 32);
  112. if (chunk == MKBETAG('c', 'o', 'm', 'p')) {
  113. skip_bits_long(gb, 16);
  114. skip_bits_long(gb, 8);
  115. skip_bits_long(gb, 8);
  116. c->track_count = get_bits(gb, 8);
  117. c->channel_config = get_bits(gb, 8);
  118. c->total_band_count = get_bits(gb, 8);
  119. c->base_band_count = get_bits(gb, 8);
  120. c->stereo_band_count = get_bits(gb, 8);
  121. c->bands_per_hfr_group = get_bits(gb, 8);
  122. } else if (chunk == MKBETAG('d', 'e', 'c', 0)) {
  123. skip_bits_long(gb, 16);
  124. skip_bits_long(gb, 8);
  125. skip_bits_long(gb, 8);
  126. c->total_band_count = get_bits(gb, 8) + 1;
  127. c->base_band_count = get_bits(gb, 8) + 1;
  128. c->track_count = get_bits(gb, 4);
  129. c->channel_config = get_bits(gb, 4);
  130. if (!get_bits(gb, 8))
  131. c->base_band_count = c->total_band_count;
  132. c->stereo_band_count = c->total_band_count - c->base_band_count;
  133. c->bands_per_hfr_group = 0;
  134. } else
  135. return AVERROR_INVALIDDATA;
  136. if (c->total_band_count > FF_ARRAY_ELEMS(c->ch->imdct_in))
  137. return AVERROR_INVALIDDATA;
  138. while (get_bits_left(gb) >= 32) {
  139. chunk = get_bits_long(gb, 32);
  140. if (chunk == MKBETAG('v', 'b', 'r', 0)) {
  141. skip_bits_long(gb, 16);
  142. skip_bits_long(gb, 16);
  143. } else if (chunk == MKBETAG('a', 't', 'h', 0)) {
  144. c->ath_type = get_bits(gb, 16);
  145. } else if (chunk == MKBETAG('r', 'v', 'a', 0)) {
  146. skip_bits_long(gb, 32);
  147. } else if (chunk == MKBETAG('c', 'o', 'm', 'm')) {
  148. skip_bits_long(gb, get_bits(gb, 8) * 8);
  149. } else if (chunk == MKBETAG('c', 'i', 'p', 'h')) {
  150. skip_bits_long(gb, 16);
  151. } else if (chunk == MKBETAG('l', 'o', 'o', 'p')) {
  152. skip_bits_long(gb, 32);
  153. skip_bits_long(gb, 32);
  154. skip_bits_long(gb, 16);
  155. skip_bits_long(gb, 16);
  156. } else if (chunk == MKBETAG('p', 'a', 'd', 0)) {
  157. break;
  158. } else {
  159. break;
  160. }
  161. }
  162. ret = ath_init(c->ath, c->ath_type, avctx->sample_rate);
  163. if (ret < 0)
  164. return ret;
  165. if (!c->track_count)
  166. c->track_count = 1;
  167. b = avctx->channels / c->track_count;
  168. if (c->stereo_band_count && b > 1) {
  169. int8_t *x = r;
  170. for (int i = 0; i < c->track_count; i++, x+=b) {
  171. switch (b) {
  172. case 2:
  173. case 3:
  174. x[0] = 1;
  175. x[1] = 2;
  176. break;
  177. case 4:
  178. x[0]=1; x[1] = 2;
  179. if (c->channel_config == 0) {
  180. x[2]=1;
  181. x[3]=2;
  182. }
  183. break;
  184. case 5:
  185. x[0]=1; x[1] = 2;
  186. if (c->channel_config <= 2) {
  187. x[3]=1;
  188. x[4]=2;
  189. }
  190. break;
  191. case 6:
  192. case 7:
  193. x[0] = 1; x[1] = 2; x[4] = 1; x[5] = 2;
  194. break;
  195. case 8:
  196. x[0] = 1; x[1] = 2; x[4] = 1; x[5] = 2; x[6] = 1; x[7] = 2;
  197. break;
  198. }
  199. }
  200. }
  201. if (c->total_band_count < c->base_band_count)
  202. return AVERROR_INVALIDDATA;
  203. c->hfr_group_count = ceil2(c->total_band_count - (c->base_band_count + c->stereo_band_count),
  204. c->bands_per_hfr_group);
  205. if (c->base_band_count + c->stereo_band_count + (unsigned long)c->hfr_group_count > 128ULL)
  206. return AVERROR_INVALIDDATA;
  207. for (int i = 0; i < avctx->channels; i++) {
  208. c->ch[i].chan_type = r[i];
  209. c->ch[i].count = c->base_band_count + ((r[i] != 2) ? c->stereo_band_count : 0);
  210. c->ch[i].hfr_scale = &c->ch[i].scale_factors[c->base_band_count + c->stereo_band_count];
  211. if (c->ch[i].count > 128)
  212. return AVERROR_INVALIDDATA;
  213. }
  214. c->fdsp = avpriv_float_dsp_alloc(avctx->flags & AV_CODEC_FLAG_BITEXACT);
  215. if (!c->fdsp)
  216. return AVERROR(ENOMEM);
  217. return av_tx_init(&c->tx_ctx, &c->tx_fn, AV_TX_FLOAT_MDCT, 1, 128, &scale, 0);
  218. }
  219. static void run_imdct(HCAContext *c, ChannelContext *ch, int index, float *out)
  220. {
  221. c->tx_fn(c->tx_ctx, ch->imdct_out, ch->imdct_in, sizeof(float));
  222. c->fdsp->vector_fmul_window(out, ch->imdct_prev + (128 >> 1),
  223. ch->imdct_out, window, 128 >> 1);
  224. memcpy(ch->imdct_prev, ch->imdct_out, 128 * sizeof(float));
  225. }
  226. static void apply_intensity_stereo(HCAContext *s, ChannelContext *ch1, ChannelContext *ch2,
  227. int index, unsigned band_count, unsigned base_band_count,
  228. unsigned stereo_band_count)
  229. {
  230. float ratio_l = intensity_ratio_table[ch2->intensity[index]];
  231. float ratio_r = ratio_l - 2.0f;
  232. float *c1 = &ch1->imdct_in[base_band_count];
  233. float *c2 = &ch2->imdct_in[base_band_count];
  234. if (ch1->chan_type != 1 || !stereo_band_count)
  235. return;
  236. for (int i = 0; i < band_count; i++) {
  237. *(c2++) = *c1 * ratio_r;
  238. *(c1++) *= ratio_l;
  239. }
  240. }
  241. static void reconstruct_hfr(HCAContext *s, ChannelContext *ch,
  242. unsigned hfr_group_count,
  243. unsigned bands_per_hfr_group,
  244. unsigned start_band, unsigned total_band_count)
  245. {
  246. if (ch->chan_type == 2 || !bands_per_hfr_group)
  247. return;
  248. for (int i = 0, k = start_band, l = start_band - 1; i < hfr_group_count; i++){
  249. for (int j = 0; j < bands_per_hfr_group && k < total_band_count && l >= 0; j++, k++, l--){
  250. ch->imdct_in[k] = scale_conversion_table[ scale_conv_bias +
  251. av_clip_intp2(ch->hfr_scale[i] - ch->scale_factors[l], 6) ] * ch->imdct_in[l];
  252. }
  253. }
  254. ch->imdct_in[127] = 0;
  255. }
  256. static void dequantize_coefficients(HCAContext *c, ChannelContext *ch)
  257. {
  258. GetBitContext *gb = &c->gb;
  259. for (int i = 0; i < ch->count; i++) {
  260. unsigned scale = ch->scale[i];
  261. int nb_bits = max_bits_table[scale];
  262. int value = get_bitsz(gb, nb_bits);
  263. float factor;
  264. if (scale > 7) {
  265. value = (1 - ((value & 1) << 1)) * (value >> 1);
  266. if (!value)
  267. skip_bits_long(gb, -1);
  268. factor = value;
  269. } else {
  270. value += scale << 4;
  271. skip_bits_long(gb, quant_spectrum_bits[value] - nb_bits);
  272. factor = quant_spectrum_value[value];
  273. }
  274. ch->imdct_in[i] = factor * ch->base[i];
  275. }
  276. memset(ch->imdct_in + ch->count, 0, sizeof(ch->imdct_in) - ch->count * sizeof(ch->imdct_in[0]));
  277. }
  278. static void unpack(HCAContext *c, ChannelContext *ch,
  279. unsigned hfr_group_count,
  280. int packed_noise_level,
  281. const uint8_t *ath)
  282. {
  283. GetBitContext *gb = &c->gb;
  284. int delta_bits = get_bits(gb, 3);
  285. if (delta_bits > 5) {
  286. for (int i = 0; i < ch->count; i++)
  287. ch->scale_factors[i] = get_bits(gb, 6);
  288. } else if (delta_bits) {
  289. int factor = get_bits(gb, 6);
  290. int max_value = (1 << delta_bits) - 1;
  291. int half_max = max_value >> 1;
  292. ch->scale_factors[0] = factor;
  293. for (int i = 1; i < ch->count; i++){
  294. int delta = get_bits(gb, delta_bits);
  295. if (delta == max_value) {
  296. factor = get_bits(gb, 6);
  297. } else {
  298. factor += delta - half_max;
  299. }
  300. factor = av_clip_uintp2(factor, 6);
  301. ch->scale_factors[i] = factor;
  302. }
  303. } else {
  304. memset(ch->scale_factors, 0, 128);
  305. }
  306. if (ch->chan_type == 2){
  307. ch->intensity[0] = get_bits(gb, 4);
  308. if (ch->intensity[0] < 15) {
  309. for (int i = 1; i < 8; i++)
  310. ch->intensity[i] = get_bits(gb, 4);
  311. }
  312. } else {
  313. for (int i = 0; i < hfr_group_count; i++)
  314. ch->hfr_scale[i] = get_bits(gb, 6);
  315. }
  316. for (int i = 0; i < ch->count; i++) {
  317. int scale = ch->scale_factors[i];
  318. if (scale) {
  319. scale = c->ath[i] + ((packed_noise_level + i) >> 8) - ((scale * 5) >> 1) + 2;
  320. scale = scale_table[av_clip(scale, 0, 58)];
  321. }
  322. ch->scale[i] = scale;
  323. }
  324. memset(ch->scale + ch->count, 0, sizeof(ch->scale) - ch->count);
  325. for (int i = 0; i < ch->count; i++)
  326. ch->base[i] = dequantizer_scaling_table[ch->scale_factors[i]] * quant_step_size[ch->scale[i]];
  327. }
  328. static int decode_frame(AVCodecContext *avctx, void *data,
  329. int *got_frame_ptr, AVPacket *avpkt)
  330. {
  331. AVFrame *frame = data;
  332. HCAContext *c = avctx->priv_data;
  333. int ch, ret, packed_noise_level;
  334. GetBitContext *gb = &c->gb;
  335. float **samples;
  336. if (avctx->err_recognition & AV_EF_CRCCHECK) {
  337. if (av_crc(c->crc_table, 0, avpkt->data, avpkt->size))
  338. return AVERROR_INVALIDDATA;
  339. }
  340. if ((ret = init_get_bits8(gb, avpkt->data, avpkt->size)) < 0)
  341. return ret;
  342. if (get_bits(gb, 16) != 0xFFFF)
  343. return AVERROR_INVALIDDATA;
  344. frame->nb_samples = 1024;
  345. if ((ret = ff_get_buffer(avctx, frame, 0)) < 0)
  346. return ret;
  347. samples = (float **)frame->extended_data;
  348. packed_noise_level = (get_bits(gb, 9) << 8) - get_bits(gb, 7);
  349. for (ch = 0; ch < avctx->channels; ch++)
  350. unpack(c, &c->ch[ch], c->hfr_group_count, packed_noise_level, c->ath);
  351. for (int i = 0; i < 8; i++) {
  352. for (ch = 0; ch < avctx->channels; ch++)
  353. dequantize_coefficients(c, &c->ch[ch]);
  354. for (ch = 0; ch < avctx->channels; ch++)
  355. reconstruct_hfr(c, &c->ch[ch], c->hfr_group_count, c->bands_per_hfr_group,
  356. c->stereo_band_count + c->base_band_count, c->total_band_count);
  357. for (ch = 0; ch < avctx->channels - 1; ch++)
  358. apply_intensity_stereo(c, &c->ch[ch], &c->ch[ch+1], i,
  359. c->total_band_count - c->base_band_count,
  360. c->base_band_count, c->stereo_band_count);
  361. for (ch = 0; ch < avctx->channels; ch++)
  362. run_imdct(c, &c->ch[ch], i, samples[ch] + i * 128);
  363. }
  364. *got_frame_ptr = 1;
  365. return avpkt->size;
  366. }
  367. static av_cold int decode_close(AVCodecContext *avctx)
  368. {
  369. HCAContext *c = avctx->priv_data;
  370. av_freep(&c->fdsp);
  371. av_tx_uninit(&c->tx_ctx);
  372. return 0;
  373. }
  374. AVCodec ff_hca_decoder = {
  375. .name = "hca",
  376. .long_name = NULL_IF_CONFIG_SMALL("CRI HCA"),
  377. .type = AVMEDIA_TYPE_AUDIO,
  378. .id = AV_CODEC_ID_HCA,
  379. .priv_data_size = sizeof(HCAContext),
  380. .init = decode_init,
  381. .decode = decode_frame,
  382. .close = decode_close,
  383. .capabilities = AV_CODEC_CAP_DR1,
  384. .sample_fmts = (const enum AVSampleFormat[]) { AV_SAMPLE_FMT_FLTP,
  385. AV_SAMPLE_FMT_NONE },
  386. };