You can not select more than 25 topics Topics must start with a letter or number, can include dashes ('-') and can be up to 35 characters long.

991 lines
26KB

  1. /*
  2. * Simple free lossless/lossy audio codec
  3. * Copyright (c) 2004 Alex Beregszaszi
  4. *
  5. * This file is part of FFmpeg.
  6. *
  7. * FFmpeg is free software; you can redistribute it and/or
  8. * modify it under the terms of the GNU Lesser General Public
  9. * License as published by the Free Software Foundation; either
  10. * version 2.1 of the License, or (at your option) any later version.
  11. *
  12. * FFmpeg is distributed in the hope that it will be useful,
  13. * but WITHOUT ANY WARRANTY; without even the implied warranty of
  14. * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
  15. * Lesser General Public License for more details.
  16. *
  17. * You should have received a copy of the GNU Lesser General Public
  18. * License along with FFmpeg; if not, write to the Free Software
  19. * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
  20. */
  21. #include "avcodec.h"
  22. #include "get_bits.h"
  23. #include "golomb.h"
  24. #include "internal.h"
  25. /**
  26. * @file
  27. * Simple free lossless/lossy audio codec
  28. * Based on Paul Francis Harrison's Bonk (http://www.logarithmic.net/pfh/bonk)
  29. * Written and designed by Alex Beregszaszi
  30. *
  31. * TODO:
  32. * - CABAC put/get_symbol
  33. * - independent quantizer for channels
  34. * - >2 channels support
  35. * - more decorrelation types
  36. * - more tap_quant tests
  37. * - selectable intlist writers/readers (bonk-style, golomb, cabac)
  38. */
  39. #define MAX_CHANNELS 2
  40. #define MID_SIDE 0
  41. #define LEFT_SIDE 1
  42. #define RIGHT_SIDE 2
  43. typedef struct SonicContext {
  44. int version;
  45. int lossless, decorrelation;
  46. int num_taps, downsampling;
  47. double quantization;
  48. int channels, samplerate, block_align, frame_size;
  49. int *tap_quant;
  50. int *int_samples;
  51. int *coded_samples[MAX_CHANNELS];
  52. // for encoding
  53. int *tail;
  54. int tail_size;
  55. int *window;
  56. int window_size;
  57. // for decoding
  58. int *predictor_k;
  59. int *predictor_state[MAX_CHANNELS];
  60. } SonicContext;
  61. #define LATTICE_SHIFT 10
  62. #define SAMPLE_SHIFT 4
  63. #define LATTICE_FACTOR (1 << LATTICE_SHIFT)
  64. #define SAMPLE_FACTOR (1 << SAMPLE_SHIFT)
  65. #define BASE_QUANT 0.6
  66. #define RATE_VARIATION 3.0
  67. static inline int shift(int a,int b)
  68. {
  69. return (a+(1<<(b-1))) >> b;
  70. }
  71. static inline int shift_down(int a,int b)
  72. {
  73. return (a>>b)+(a<0);
  74. }
  75. #if 1
  76. static inline int intlist_write(PutBitContext *pb, int *buf, int entries, int base_2_part)
  77. {
  78. int i;
  79. for (i = 0; i < entries; i++)
  80. set_se_golomb(pb, buf[i]);
  81. return 1;
  82. }
  83. static inline int intlist_read(GetBitContext *gb, int *buf, int entries, int base_2_part)
  84. {
  85. int i;
  86. for (i = 0; i < entries; i++)
  87. buf[i] = get_se_golomb(gb);
  88. return 1;
  89. }
  90. #else
  91. #define ADAPT_LEVEL 8
  92. static int bits_to_store(uint64_t x)
  93. {
  94. int res = 0;
  95. while(x)
  96. {
  97. res++;
  98. x >>= 1;
  99. }
  100. return res;
  101. }
  102. static void write_uint_max(PutBitContext *pb, unsigned int value, unsigned int max)
  103. {
  104. int i, bits;
  105. if (!max)
  106. return;
  107. bits = bits_to_store(max);
  108. for (i = 0; i < bits-1; i++)
  109. put_bits(pb, 1, value & (1 << i));
  110. if ( (value | (1 << (bits-1))) <= max)
  111. put_bits(pb, 1, value & (1 << (bits-1)));
  112. }
  113. static unsigned int read_uint_max(GetBitContext *gb, int max)
  114. {
  115. int i, bits, value = 0;
  116. if (!max)
  117. return 0;
  118. bits = bits_to_store(max);
  119. for (i = 0; i < bits-1; i++)
  120. if (get_bits1(gb))
  121. value += 1 << i;
  122. if ( (value | (1<<(bits-1))) <= max)
  123. if (get_bits1(gb))
  124. value += 1 << (bits-1);
  125. return value;
  126. }
  127. static int intlist_write(PutBitContext *pb, int *buf, int entries, int base_2_part)
  128. {
  129. int i, j, x = 0, low_bits = 0, max = 0;
  130. int step = 256, pos = 0, dominant = 0, any = 0;
  131. int *copy, *bits;
  132. copy = av_calloc(entries, sizeof(*copy));
  133. if (!copy)
  134. return AVERROR(ENOMEM);
  135. if (base_2_part)
  136. {
  137. int energy = 0;
  138. for (i = 0; i < entries; i++)
  139. energy += abs(buf[i]);
  140. low_bits = bits_to_store(energy / (entries * 2));
  141. if (low_bits > 15)
  142. low_bits = 15;
  143. put_bits(pb, 4, low_bits);
  144. }
  145. for (i = 0; i < entries; i++)
  146. {
  147. put_bits(pb, low_bits, abs(buf[i]));
  148. copy[i] = abs(buf[i]) >> low_bits;
  149. if (copy[i] > max)
  150. max = abs(copy[i]);
  151. }
  152. bits = av_calloc(entries*max, sizeof(*bits));
  153. if (!bits)
  154. {
  155. av_free(copy);
  156. return AVERROR(ENOMEM);
  157. }
  158. for (i = 0; i <= max; i++)
  159. {
  160. for (j = 0; j < entries; j++)
  161. if (copy[j] >= i)
  162. bits[x++] = copy[j] > i;
  163. }
  164. // store bitstream
  165. while (pos < x)
  166. {
  167. int steplet = step >> 8;
  168. if (pos + steplet > x)
  169. steplet = x - pos;
  170. for (i = 0; i < steplet; i++)
  171. if (bits[i+pos] != dominant)
  172. any = 1;
  173. put_bits(pb, 1, any);
  174. if (!any)
  175. {
  176. pos += steplet;
  177. step += step / ADAPT_LEVEL;
  178. }
  179. else
  180. {
  181. int interloper = 0;
  182. while (((pos + interloper) < x) && (bits[pos + interloper] == dominant))
  183. interloper++;
  184. // note change
  185. write_uint_max(pb, interloper, (step >> 8) - 1);
  186. pos += interloper + 1;
  187. step -= step / ADAPT_LEVEL;
  188. }
  189. if (step < 256)
  190. {
  191. step = 65536 / step;
  192. dominant = !dominant;
  193. }
  194. }
  195. // store signs
  196. for (i = 0; i < entries; i++)
  197. if (buf[i])
  198. put_bits(pb, 1, buf[i] < 0);
  199. av_free(bits);
  200. av_free(copy);
  201. return 0;
  202. }
  203. static int intlist_read(GetBitContext *gb, int *buf, int entries, int base_2_part)
  204. {
  205. int i, low_bits = 0, x = 0;
  206. int n_zeros = 0, step = 256, dominant = 0;
  207. int pos = 0, level = 0;
  208. int *bits = av_calloc(entries, sizeof(*bits));
  209. if (!bits)
  210. return AVERROR(ENOMEM);
  211. if (base_2_part)
  212. {
  213. low_bits = get_bits(gb, 4);
  214. if (low_bits)
  215. for (i = 0; i < entries; i++)
  216. buf[i] = get_bits(gb, low_bits);
  217. }
  218. // av_log(NULL, AV_LOG_INFO, "entries: %d, low bits: %d\n", entries, low_bits);
  219. while (n_zeros < entries)
  220. {
  221. int steplet = step >> 8;
  222. if (!get_bits1(gb))
  223. {
  224. for (i = 0; i < steplet; i++)
  225. bits[x++] = dominant;
  226. if (!dominant)
  227. n_zeros += steplet;
  228. step += step / ADAPT_LEVEL;
  229. }
  230. else
  231. {
  232. int actual_run = read_uint_max(gb, steplet-1);
  233. // av_log(NULL, AV_LOG_INFO, "actual run: %d\n", actual_run);
  234. for (i = 0; i < actual_run; i++)
  235. bits[x++] = dominant;
  236. bits[x++] = !dominant;
  237. if (!dominant)
  238. n_zeros += actual_run;
  239. else
  240. n_zeros++;
  241. step -= step / ADAPT_LEVEL;
  242. }
  243. if (step < 256)
  244. {
  245. step = 65536 / step;
  246. dominant = !dominant;
  247. }
  248. }
  249. // reconstruct unsigned values
  250. n_zeros = 0;
  251. for (i = 0; n_zeros < entries; i++)
  252. {
  253. while(1)
  254. {
  255. if (pos >= entries)
  256. {
  257. pos = 0;
  258. level += 1 << low_bits;
  259. }
  260. if (buf[pos] >= level)
  261. break;
  262. pos++;
  263. }
  264. if (bits[i])
  265. buf[pos] += 1 << low_bits;
  266. else
  267. n_zeros++;
  268. pos++;
  269. }
  270. av_free(bits);
  271. // read signs
  272. for (i = 0; i < entries; i++)
  273. if (buf[i] && get_bits1(gb))
  274. buf[i] = -buf[i];
  275. // av_log(NULL, AV_LOG_INFO, "zeros: %d pos: %d\n", n_zeros, pos);
  276. return 0;
  277. }
  278. #endif
  279. static void predictor_init_state(int *k, int *state, int order)
  280. {
  281. int i;
  282. for (i = order-2; i >= 0; i--)
  283. {
  284. int j, p, x = state[i];
  285. for (j = 0, p = i+1; p < order; j++,p++)
  286. {
  287. int tmp = x + shift_down(k[j] * state[p], LATTICE_SHIFT);
  288. state[p] += shift_down(k[j]*x, LATTICE_SHIFT);
  289. x = tmp;
  290. }
  291. }
  292. }
  293. static int predictor_calc_error(int *k, int *state, int order, int error)
  294. {
  295. int i, x = error - shift_down(k[order-1] * state[order-1], LATTICE_SHIFT);
  296. #if 1
  297. int *k_ptr = &(k[order-2]),
  298. *state_ptr = &(state[order-2]);
  299. for (i = order-2; i >= 0; i--, k_ptr--, state_ptr--)
  300. {
  301. int k_value = *k_ptr, state_value = *state_ptr;
  302. x -= shift_down(k_value * state_value, LATTICE_SHIFT);
  303. state_ptr[1] = state_value + shift_down(k_value * x, LATTICE_SHIFT);
  304. }
  305. #else
  306. for (i = order-2; i >= 0; i--)
  307. {
  308. x -= shift_down(k[i] * state[i], LATTICE_SHIFT);
  309. state[i+1] = state[i] + shift_down(k[i] * x, LATTICE_SHIFT);
  310. }
  311. #endif
  312. // don't drift too far, to avoid overflows
  313. if (x > (SAMPLE_FACTOR<<16)) x = (SAMPLE_FACTOR<<16);
  314. if (x < -(SAMPLE_FACTOR<<16)) x = -(SAMPLE_FACTOR<<16);
  315. state[0] = x;
  316. return x;
  317. }
  318. #if CONFIG_SONIC_ENCODER || CONFIG_SONIC_LS_ENCODER
  319. // Heavily modified Levinson-Durbin algorithm which
  320. // copes better with quantization, and calculates the
  321. // actual whitened result as it goes.
  322. static void modified_levinson_durbin(int *window, int window_entries,
  323. int *out, int out_entries, int channels, int *tap_quant)
  324. {
  325. int i;
  326. int *state = av_calloc(window_entries, sizeof(*state));
  327. memcpy(state, window, 4* window_entries);
  328. for (i = 0; i < out_entries; i++)
  329. {
  330. int step = (i+1)*channels, k, j;
  331. double xx = 0.0, xy = 0.0;
  332. #if 1
  333. int *x_ptr = &(window[step]);
  334. int *state_ptr = &(state[0]);
  335. j = window_entries - step;
  336. for (;j>0;j--,x_ptr++,state_ptr++)
  337. {
  338. double x_value = *x_ptr;
  339. double state_value = *state_ptr;
  340. xx += state_value*state_value;
  341. xy += x_value*state_value;
  342. }
  343. #else
  344. for (j = 0; j <= (window_entries - step); j++);
  345. {
  346. double stepval = window[step+j];
  347. double stateval = window[j];
  348. // xx += (double)window[j]*(double)window[j];
  349. // xy += (double)window[step+j]*(double)window[j];
  350. xx += stateval*stateval;
  351. xy += stepval*stateval;
  352. }
  353. #endif
  354. if (xx == 0.0)
  355. k = 0;
  356. else
  357. k = (int)(floor(-xy/xx * (double)LATTICE_FACTOR / (double)(tap_quant[i]) + 0.5));
  358. if (k > (LATTICE_FACTOR/tap_quant[i]))
  359. k = LATTICE_FACTOR/tap_quant[i];
  360. if (-k > (LATTICE_FACTOR/tap_quant[i]))
  361. k = -(LATTICE_FACTOR/tap_quant[i]);
  362. out[i] = k;
  363. k *= tap_quant[i];
  364. #if 1
  365. x_ptr = &(window[step]);
  366. state_ptr = &(state[0]);
  367. j = window_entries - step;
  368. for (;j>0;j--,x_ptr++,state_ptr++)
  369. {
  370. int x_value = *x_ptr;
  371. int state_value = *state_ptr;
  372. *x_ptr = x_value + shift_down(k*state_value,LATTICE_SHIFT);
  373. *state_ptr = state_value + shift_down(k*x_value, LATTICE_SHIFT);
  374. }
  375. #else
  376. for (j=0; j <= (window_entries - step); j++)
  377. {
  378. int stepval = window[step+j];
  379. int stateval=state[j];
  380. window[step+j] += shift_down(k * stateval, LATTICE_SHIFT);
  381. state[j] += shift_down(k * stepval, LATTICE_SHIFT);
  382. }
  383. #endif
  384. }
  385. av_free(state);
  386. }
  387. static inline int code_samplerate(int samplerate)
  388. {
  389. switch (samplerate)
  390. {
  391. case 44100: return 0;
  392. case 22050: return 1;
  393. case 11025: return 2;
  394. case 96000: return 3;
  395. case 48000: return 4;
  396. case 32000: return 5;
  397. case 24000: return 6;
  398. case 16000: return 7;
  399. case 8000: return 8;
  400. }
  401. return AVERROR(EINVAL);
  402. }
  403. static av_cold int sonic_encode_init(AVCodecContext *avctx)
  404. {
  405. SonicContext *s = avctx->priv_data;
  406. PutBitContext pb;
  407. int i;
  408. if (avctx->channels > MAX_CHANNELS)
  409. {
  410. av_log(avctx, AV_LOG_ERROR, "Only mono and stereo streams are supported by now\n");
  411. return AVERROR(EINVAL); /* only stereo or mono for now */
  412. }
  413. if (avctx->channels == 2)
  414. s->decorrelation = MID_SIDE;
  415. else
  416. s->decorrelation = 3;
  417. if (avctx->codec->id == AV_CODEC_ID_SONIC_LS)
  418. {
  419. s->lossless = 1;
  420. s->num_taps = 32;
  421. s->downsampling = 1;
  422. s->quantization = 0.0;
  423. }
  424. else
  425. {
  426. s->num_taps = 128;
  427. s->downsampling = 2;
  428. s->quantization = 1.0;
  429. }
  430. // max tap 2048
  431. if (s->num_taps < 32 || s->num_taps > 1024 || s->num_taps % 32) {
  432. av_log(avctx, AV_LOG_ERROR, "Invalid number of taps\n");
  433. return AVERROR_INVALIDDATA;
  434. }
  435. // generate taps
  436. s->tap_quant = av_calloc(s->num_taps, sizeof(*s->tap_quant));
  437. for (i = 0; i < s->num_taps; i++)
  438. s->tap_quant[i] = ff_sqrt(i+1);
  439. s->channels = avctx->channels;
  440. s->samplerate = avctx->sample_rate;
  441. s->block_align = 2048LL*s->samplerate/(44100*s->downsampling);
  442. s->frame_size = s->channels*s->block_align*s->downsampling;
  443. s->tail_size = s->num_taps*s->channels;
  444. s->tail = av_calloc(s->tail_size, sizeof(*s->tail));
  445. if (!s->tail)
  446. return AVERROR(ENOMEM);
  447. s->predictor_k = av_calloc(s->num_taps, sizeof(*s->predictor_k) );
  448. if (!s->predictor_k)
  449. return AVERROR(ENOMEM);
  450. for (i = 0; i < s->channels; i++)
  451. {
  452. s->coded_samples[i] = av_calloc(s->block_align, sizeof(**s->coded_samples));
  453. if (!s->coded_samples[i])
  454. return AVERROR(ENOMEM);
  455. }
  456. s->int_samples = av_calloc(s->frame_size, sizeof(*s->int_samples));
  457. s->window_size = ((2*s->tail_size)+s->frame_size);
  458. s->window = av_calloc(s->window_size, sizeof(*s->window));
  459. if (!s->window)
  460. return AVERROR(ENOMEM);
  461. avctx->extradata = av_mallocz(16);
  462. if (!avctx->extradata)
  463. return AVERROR(ENOMEM);
  464. init_put_bits(&pb, avctx->extradata, 16*8);
  465. put_bits(&pb, 2, s->version); // version
  466. if (s->version == 1)
  467. {
  468. put_bits(&pb, 2, s->channels);
  469. put_bits(&pb, 4, code_samplerate(s->samplerate));
  470. }
  471. put_bits(&pb, 1, s->lossless);
  472. if (!s->lossless)
  473. put_bits(&pb, 3, SAMPLE_SHIFT); // XXX FIXME: sample precision
  474. put_bits(&pb, 2, s->decorrelation);
  475. put_bits(&pb, 2, s->downsampling);
  476. put_bits(&pb, 5, (s->num_taps >> 5)-1); // 32..1024
  477. put_bits(&pb, 1, 0); // XXX FIXME: no custom tap quant table
  478. flush_put_bits(&pb);
  479. avctx->extradata_size = put_bits_count(&pb)/8;
  480. av_log(avctx, AV_LOG_INFO, "Sonic: ver: %d ls: %d dr: %d taps: %d block: %d frame: %d downsamp: %d\n",
  481. s->version, s->lossless, s->decorrelation, s->num_taps, s->block_align, s->frame_size, s->downsampling);
  482. avctx->frame_size = s->block_align*s->downsampling;
  483. return 0;
  484. }
  485. static av_cold int sonic_encode_close(AVCodecContext *avctx)
  486. {
  487. SonicContext *s = avctx->priv_data;
  488. int i;
  489. for (i = 0; i < s->channels; i++)
  490. av_freep(&s->coded_samples[i]);
  491. av_freep(&s->predictor_k);
  492. av_freep(&s->tail);
  493. av_freep(&s->tap_quant);
  494. av_freep(&s->window);
  495. av_freep(&s->int_samples);
  496. return 0;
  497. }
  498. static int sonic_encode_frame(AVCodecContext *avctx, AVPacket *avpkt,
  499. const AVFrame *frame, int *got_packet_ptr)
  500. {
  501. SonicContext *s = avctx->priv_data;
  502. PutBitContext pb;
  503. int i, j, ch, quant = 0, x = 0;
  504. int ret;
  505. const short *samples = (const int16_t*)frame->data[0];
  506. if ((ret = ff_alloc_packet2(avctx, avpkt, s->frame_size * 5 + 1000)) < 0)
  507. return ret;
  508. init_put_bits(&pb, avpkt->data, avpkt->size);
  509. // short -> internal
  510. for (i = 0; i < s->frame_size; i++)
  511. s->int_samples[i] = samples[i];
  512. if (!s->lossless)
  513. for (i = 0; i < s->frame_size; i++)
  514. s->int_samples[i] = s->int_samples[i] << SAMPLE_SHIFT;
  515. switch(s->decorrelation)
  516. {
  517. case MID_SIDE:
  518. for (i = 0; i < s->frame_size; i += s->channels)
  519. {
  520. s->int_samples[i] += s->int_samples[i+1];
  521. s->int_samples[i+1] -= shift(s->int_samples[i], 1);
  522. }
  523. break;
  524. case LEFT_SIDE:
  525. for (i = 0; i < s->frame_size; i += s->channels)
  526. s->int_samples[i+1] -= s->int_samples[i];
  527. break;
  528. case RIGHT_SIDE:
  529. for (i = 0; i < s->frame_size; i += s->channels)
  530. s->int_samples[i] -= s->int_samples[i+1];
  531. break;
  532. }
  533. memset(s->window, 0, 4* s->window_size);
  534. for (i = 0; i < s->tail_size; i++)
  535. s->window[x++] = s->tail[i];
  536. for (i = 0; i < s->frame_size; i++)
  537. s->window[x++] = s->int_samples[i];
  538. for (i = 0; i < s->tail_size; i++)
  539. s->window[x++] = 0;
  540. for (i = 0; i < s->tail_size; i++)
  541. s->tail[i] = s->int_samples[s->frame_size - s->tail_size + i];
  542. // generate taps
  543. modified_levinson_durbin(s->window, s->window_size,
  544. s->predictor_k, s->num_taps, s->channels, s->tap_quant);
  545. if ((ret = intlist_write(&pb, s->predictor_k, s->num_taps, 0)) < 0)
  546. return ret;
  547. for (ch = 0; ch < s->channels; ch++)
  548. {
  549. x = s->tail_size+ch;
  550. for (i = 0; i < s->block_align; i++)
  551. {
  552. int sum = 0;
  553. for (j = 0; j < s->downsampling; j++, x += s->channels)
  554. sum += s->window[x];
  555. s->coded_samples[ch][i] = sum;
  556. }
  557. }
  558. // simple rate control code
  559. if (!s->lossless)
  560. {
  561. double energy1 = 0.0, energy2 = 0.0;
  562. for (ch = 0; ch < s->channels; ch++)
  563. {
  564. for (i = 0; i < s->block_align; i++)
  565. {
  566. double sample = s->coded_samples[ch][i];
  567. energy2 += sample*sample;
  568. energy1 += fabs(sample);
  569. }
  570. }
  571. energy2 = sqrt(energy2/(s->channels*s->block_align));
  572. energy1 = M_SQRT2*energy1/(s->channels*s->block_align);
  573. // increase bitrate when samples are like a gaussian distribution
  574. // reduce bitrate when samples are like a two-tailed exponential distribution
  575. if (energy2 > energy1)
  576. energy2 += (energy2-energy1)*RATE_VARIATION;
  577. quant = (int)(BASE_QUANT*s->quantization*energy2/SAMPLE_FACTOR);
  578. // av_log(avctx, AV_LOG_DEBUG, "quant: %d energy: %f / %f\n", quant, energy1, energy2);
  579. quant = av_clip(quant, 1, 65534);
  580. set_ue_golomb(&pb, quant);
  581. quant *= SAMPLE_FACTOR;
  582. }
  583. // write out coded samples
  584. for (ch = 0; ch < s->channels; ch++)
  585. {
  586. if (!s->lossless)
  587. for (i = 0; i < s->block_align; i++)
  588. s->coded_samples[ch][i] = ROUNDED_DIV(s->coded_samples[ch][i], quant);
  589. if ((ret = intlist_write(&pb, s->coded_samples[ch], s->block_align, 1)) < 0)
  590. return ret;
  591. }
  592. // av_log(avctx, AV_LOG_DEBUG, "used bytes: %d\n", (put_bits_count(&pb)+7)/8);
  593. flush_put_bits(&pb);
  594. avpkt->size = (put_bits_count(&pb)+7)/8;
  595. *got_packet_ptr = 1;
  596. return 0;
  597. }
  598. #endif /* CONFIG_SONIC_ENCODER || CONFIG_SONIC_LS_ENCODER */
  599. #if CONFIG_SONIC_DECODER
  600. static const int samplerate_table[] =
  601. { 44100, 22050, 11025, 96000, 48000, 32000, 24000, 16000, 8000 };
  602. static av_cold int sonic_decode_init(AVCodecContext *avctx)
  603. {
  604. SonicContext *s = avctx->priv_data;
  605. GetBitContext gb;
  606. int i;
  607. s->channels = avctx->channels;
  608. s->samplerate = avctx->sample_rate;
  609. if (!avctx->extradata)
  610. {
  611. av_log(avctx, AV_LOG_ERROR, "No mandatory headers present\n");
  612. return AVERROR_INVALIDDATA;
  613. }
  614. init_get_bits8(&gb, avctx->extradata, avctx->extradata_size);
  615. s->version = get_bits(&gb, 2);
  616. if (s->version > 1)
  617. {
  618. av_log(avctx, AV_LOG_ERROR, "Unsupported Sonic version, please report\n");
  619. return AVERROR_INVALIDDATA;
  620. }
  621. if (s->version == 1)
  622. {
  623. s->channels = get_bits(&gb, 2);
  624. s->samplerate = samplerate_table[get_bits(&gb, 4)];
  625. av_log(avctx, AV_LOG_INFO, "Sonicv2 chans: %d samprate: %d\n",
  626. s->channels, s->samplerate);
  627. }
  628. if (s->channels > MAX_CHANNELS)
  629. {
  630. av_log(avctx, AV_LOG_ERROR, "Only mono and stereo streams are supported by now\n");
  631. return AVERROR_INVALIDDATA;
  632. }
  633. s->lossless = get_bits1(&gb);
  634. if (!s->lossless)
  635. skip_bits(&gb, 3); // XXX FIXME
  636. s->decorrelation = get_bits(&gb, 2);
  637. if (s->decorrelation != 3 && s->channels != 2) {
  638. av_log(avctx, AV_LOG_ERROR, "invalid decorrelation %d\n", s->decorrelation);
  639. return AVERROR_INVALIDDATA;
  640. }
  641. s->downsampling = get_bits(&gb, 2);
  642. if (!s->downsampling) {
  643. av_log(avctx, AV_LOG_ERROR, "invalid downsampling value\n");
  644. return AVERROR_INVALIDDATA;
  645. }
  646. s->num_taps = (get_bits(&gb, 5)+1)<<5;
  647. if (get_bits1(&gb)) // XXX FIXME
  648. av_log(avctx, AV_LOG_INFO, "Custom quant table\n");
  649. s->block_align = 2048LL*s->samplerate/(44100*s->downsampling);
  650. s->frame_size = s->channels*s->block_align*s->downsampling;
  651. // avctx->frame_size = s->block_align;
  652. av_log(avctx, AV_LOG_INFO, "Sonic: ver: %d ls: %d dr: %d taps: %d block: %d frame: %d downsamp: %d\n",
  653. s->version, s->lossless, s->decorrelation, s->num_taps, s->block_align, s->frame_size, s->downsampling);
  654. // generate taps
  655. s->tap_quant = av_calloc(s->num_taps, sizeof(*s->tap_quant));
  656. for (i = 0; i < s->num_taps; i++)
  657. s->tap_quant[i] = ff_sqrt(i+1);
  658. s->predictor_k = av_calloc(s->num_taps, sizeof(*s->predictor_k));
  659. for (i = 0; i < s->channels; i++)
  660. {
  661. s->predictor_state[i] = av_calloc(s->num_taps, sizeof(**s->predictor_state));
  662. if (!s->predictor_state[i])
  663. return AVERROR(ENOMEM);
  664. }
  665. for (i = 0; i < s->channels; i++)
  666. {
  667. s->coded_samples[i] = av_calloc(s->block_align, sizeof(**s->coded_samples));
  668. if (!s->coded_samples[i])
  669. return AVERROR(ENOMEM);
  670. }
  671. s->int_samples = av_calloc(s->frame_size, sizeof(*s->int_samples));
  672. avctx->sample_fmt = AV_SAMPLE_FMT_S16;
  673. return 0;
  674. }
  675. static av_cold int sonic_decode_close(AVCodecContext *avctx)
  676. {
  677. SonicContext *s = avctx->priv_data;
  678. int i;
  679. av_freep(&s->int_samples);
  680. av_freep(&s->tap_quant);
  681. av_freep(&s->predictor_k);
  682. for (i = 0; i < s->channels; i++)
  683. {
  684. av_freep(&s->predictor_state[i]);
  685. av_freep(&s->coded_samples[i]);
  686. }
  687. return 0;
  688. }
  689. static int sonic_decode_frame(AVCodecContext *avctx,
  690. void *data, int *got_frame_ptr,
  691. AVPacket *avpkt)
  692. {
  693. const uint8_t *buf = avpkt->data;
  694. int buf_size = avpkt->size;
  695. SonicContext *s = avctx->priv_data;
  696. GetBitContext gb;
  697. int i, quant, ch, j, ret;
  698. int16_t *samples;
  699. AVFrame *frame = data;
  700. if (buf_size == 0) return 0;
  701. frame->nb_samples = s->frame_size / avctx->channels;
  702. if ((ret = ff_get_buffer(avctx, frame, 0)) < 0)
  703. return ret;
  704. samples = (int16_t *)frame->data[0];
  705. // av_log(NULL, AV_LOG_INFO, "buf_size: %d\n", buf_size);
  706. init_get_bits8(&gb, buf, buf_size);
  707. intlist_read(&gb, s->predictor_k, s->num_taps, 0);
  708. // dequantize
  709. for (i = 0; i < s->num_taps; i++)
  710. s->predictor_k[i] *= s->tap_quant[i];
  711. if (s->lossless)
  712. quant = 1;
  713. else
  714. quant = get_ue_golomb(&gb) * SAMPLE_FACTOR;
  715. // av_log(NULL, AV_LOG_INFO, "quant: %d\n", quant);
  716. for (ch = 0; ch < s->channels; ch++)
  717. {
  718. int x = ch;
  719. predictor_init_state(s->predictor_k, s->predictor_state[ch], s->num_taps);
  720. intlist_read(&gb, s->coded_samples[ch], s->block_align, 1);
  721. for (i = 0; i < s->block_align; i++)
  722. {
  723. for (j = 0; j < s->downsampling - 1; j++)
  724. {
  725. s->int_samples[x] = predictor_calc_error(s->predictor_k, s->predictor_state[ch], s->num_taps, 0);
  726. x += s->channels;
  727. }
  728. s->int_samples[x] = predictor_calc_error(s->predictor_k, s->predictor_state[ch], s->num_taps, s->coded_samples[ch][i] * quant);
  729. x += s->channels;
  730. }
  731. for (i = 0; i < s->num_taps; i++)
  732. s->predictor_state[ch][i] = s->int_samples[s->frame_size - s->channels + ch - i*s->channels];
  733. }
  734. switch(s->decorrelation)
  735. {
  736. case MID_SIDE:
  737. for (i = 0; i < s->frame_size; i += s->channels)
  738. {
  739. s->int_samples[i+1] += shift(s->int_samples[i], 1);
  740. s->int_samples[i] -= s->int_samples[i+1];
  741. }
  742. break;
  743. case LEFT_SIDE:
  744. for (i = 0; i < s->frame_size; i += s->channels)
  745. s->int_samples[i+1] += s->int_samples[i];
  746. break;
  747. case RIGHT_SIDE:
  748. for (i = 0; i < s->frame_size; i += s->channels)
  749. s->int_samples[i] += s->int_samples[i+1];
  750. break;
  751. }
  752. if (!s->lossless)
  753. for (i = 0; i < s->frame_size; i++)
  754. s->int_samples[i] = shift(s->int_samples[i], SAMPLE_SHIFT);
  755. // internal -> short
  756. for (i = 0; i < s->frame_size; i++)
  757. samples[i] = av_clip_int16(s->int_samples[i]);
  758. align_get_bits(&gb);
  759. *got_frame_ptr = 1;
  760. return (get_bits_count(&gb)+7)/8;
  761. }
  762. AVCodec ff_sonic_decoder = {
  763. .name = "sonic",
  764. .long_name = NULL_IF_CONFIG_SMALL("Sonic"),
  765. .type = AVMEDIA_TYPE_AUDIO,
  766. .id = AV_CODEC_ID_SONIC,
  767. .priv_data_size = sizeof(SonicContext),
  768. .init = sonic_decode_init,
  769. .close = sonic_decode_close,
  770. .decode = sonic_decode_frame,
  771. .capabilities = CODEC_CAP_DR1 | CODEC_CAP_EXPERIMENTAL,
  772. };
  773. #endif /* CONFIG_SONIC_DECODER */
  774. #if CONFIG_SONIC_ENCODER
  775. AVCodec ff_sonic_encoder = {
  776. .name = "sonic",
  777. .long_name = NULL_IF_CONFIG_SMALL("Sonic"),
  778. .type = AVMEDIA_TYPE_AUDIO,
  779. .id = AV_CODEC_ID_SONIC,
  780. .priv_data_size = sizeof(SonicContext),
  781. .init = sonic_encode_init,
  782. .encode2 = sonic_encode_frame,
  783. .sample_fmts = (const enum AVSampleFormat[]){ AV_SAMPLE_FMT_S16, AV_SAMPLE_FMT_NONE },
  784. .capabilities = CODEC_CAP_EXPERIMENTAL,
  785. .close = sonic_encode_close,
  786. };
  787. #endif
  788. #if CONFIG_SONIC_LS_ENCODER
  789. AVCodec ff_sonic_ls_encoder = {
  790. .name = "sonicls",
  791. .long_name = NULL_IF_CONFIG_SMALL("Sonic lossless"),
  792. .type = AVMEDIA_TYPE_AUDIO,
  793. .id = AV_CODEC_ID_SONIC_LS,
  794. .priv_data_size = sizeof(SonicContext),
  795. .init = sonic_encode_init,
  796. .encode2 = sonic_encode_frame,
  797. .sample_fmts = (const enum AVSampleFormat[]){ AV_SAMPLE_FMT_S16, AV_SAMPLE_FMT_NONE },
  798. .capabilities = CODEC_CAP_EXPERIMENTAL,
  799. .close = sonic_encode_close,
  800. };
  801. #endif