You can not select more than 25 topics Topics must start with a letter or number, can include dashes ('-') and can be up to 35 characters long.

975 lines
22KB

  1. /*
  2. * Simple free lossless/lossy audio codec
  3. * Copyright (c) 2004 Alex Beregszaszi
  4. *
  5. * This library is free software; you can redistribute it and/or
  6. * modify it under the terms of the GNU Lesser General Public
  7. * License as published by the Free Software Foundation; either
  8. * version 2 of the License, or (at your option) any later version.
  9. *
  10. * This library is distributed in the hope that it will be useful,
  11. * but WITHOUT ANY WARRANTY; without even the implied warranty of
  12. * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
  13. * Lesser General Public License for more details.
  14. *
  15. * You should have received a copy of the GNU Lesser General Public
  16. * License along with this library; if not, write to the Free Software
  17. * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA
  18. */
  19. #include "avcodec.h"
  20. #include "golomb.h"
  21. /**
  22. * @file sonic.c
  23. * Simple free lossless/lossy audio codec
  24. * Based on Paul Francis Harrison's Bonk (http://www.logarithmic.net/pfh/bonk)
  25. * Written and designed by Alex Beregszaszi
  26. *
  27. * TODO:
  28. * - CABAC put/get_symbol
  29. * - independent quantizer for channels
  30. * - >2 channels support
  31. * - more decorrelation types
  32. * - more tap_quant tests
  33. * - selectable intlist writers/readers (bonk-style, golomb, cabac)
  34. */
  35. #define MAX_CHANNELS 2
  36. #define MID_SIDE 0
  37. #define LEFT_SIDE 1
  38. #define RIGHT_SIDE 2
  39. typedef struct SonicContext {
  40. int lossless, decorrelation;
  41. int num_taps, downsampling;
  42. double quantization;
  43. int channels, samplerate, block_align, frame_size;
  44. int *tap_quant;
  45. int *int_samples;
  46. int *coded_samples[MAX_CHANNELS];
  47. // for encoding
  48. int *tail;
  49. int tail_size;
  50. int *window;
  51. int window_size;
  52. // for decoding
  53. int *predictor_k;
  54. int *predictor_state[MAX_CHANNELS];
  55. } SonicContext;
  56. #define LATTICE_SHIFT 10
  57. #define SAMPLE_SHIFT 4
  58. #define LATTICE_FACTOR (1 << LATTICE_SHIFT)
  59. #define SAMPLE_FACTOR (1 << SAMPLE_SHIFT)
  60. #define BASE_QUANT 0.6
  61. #define RATE_VARIATION 3.0
  62. static inline int divide(int a, int b)
  63. {
  64. if (a < 0)
  65. return -( (-a + b/2)/b );
  66. else
  67. return (a + b/2)/b;
  68. }
  69. static inline int shift(int a,int b)
  70. {
  71. return (a+(1<<(b-1))) >> b;
  72. }
  73. static inline int shift_down(int a,int b)
  74. {
  75. return (a>>b)+((a<0)?1:0);
  76. }
  77. #if 1
  78. static inline int intlist_write(PutBitContext *pb, int *buf, int entries, int base_2_part)
  79. {
  80. int i;
  81. for (i = 0; i < entries; i++)
  82. set_se_golomb(pb, buf[i]);
  83. return 1;
  84. }
  85. static inline int intlist_read(GetBitContext *gb, int *buf, int entries, int base_2_part)
  86. {
  87. int i;
  88. for (i = 0; i < entries; i++)
  89. buf[i] = get_se_golomb(gb);
  90. return 1;
  91. }
  92. #else
  93. #define ADAPT_LEVEL 8
  94. static int bits_to_store(uint64_t x)
  95. {
  96. int res = 0;
  97. while(x)
  98. {
  99. res++;
  100. x >>= 1;
  101. }
  102. return res;
  103. }
  104. static void write_uint_max(PutBitContext *pb, unsigned int value, unsigned int max)
  105. {
  106. int i, bits;
  107. if (!max)
  108. return;
  109. bits = bits_to_store(max);
  110. for (i = 0; i < bits-1; i++)
  111. put_bits(pb, 1, value & (1 << i));
  112. if ( (value | (1 << (bits-1))) <= max)
  113. put_bits(pb, 1, value & (1 << (bits-1)));
  114. }
  115. static unsigned int read_uint_max(GetBitContext *gb, int max)
  116. {
  117. int i, bits, value = 0;
  118. if (!max)
  119. return 0;
  120. bits = bits_to_store(max);
  121. for (i = 0; i < bits-1; i++)
  122. if (get_bits1(gb))
  123. value += 1 << i;
  124. if ( (value | (1<<(bits-1))) <= max)
  125. if (get_bits1(gb))
  126. value += 1 << (bits-1);
  127. return value;
  128. }
  129. static int intlist_write(PutBitContext *pb, int *buf, int entries, int base_2_part)
  130. {
  131. int i, j, x = 0, low_bits = 0, max = 0;
  132. int step = 256, pos = 0, dominant = 0, any = 0;
  133. int *copy, *bits;
  134. copy = av_mallocz(4* entries);
  135. if (!copy)
  136. return -1;
  137. if (base_2_part)
  138. {
  139. int energy = 0;
  140. for (i = 0; i < entries; i++)
  141. energy += abs(buf[i]);
  142. low_bits = bits_to_store(energy / (entries * 2));
  143. if (low_bits > 15)
  144. low_bits = 15;
  145. put_bits(pb, 4, low_bits);
  146. }
  147. for (i = 0; i < entries; i++)
  148. {
  149. put_bits(pb, low_bits, abs(buf[i]));
  150. copy[i] = abs(buf[i]) >> low_bits;
  151. if (copy[i] > max)
  152. max = abs(copy[i]);
  153. }
  154. bits = av_mallocz(4* entries*max);
  155. if (!bits)
  156. {
  157. // av_free(copy);
  158. return -1;
  159. }
  160. for (i = 0; i <= max; i++)
  161. {
  162. for (j = 0; j < entries; j++)
  163. if (copy[j] >= i)
  164. bits[x++] = copy[j] > i;
  165. }
  166. // store bitstream
  167. while (pos < x)
  168. {
  169. int steplet = step >> 8;
  170. if (pos + steplet > x)
  171. steplet = x - pos;
  172. for (i = 0; i < steplet; i++)
  173. if (bits[i+pos] != dominant)
  174. any = 1;
  175. put_bits(pb, 1, any);
  176. if (!any)
  177. {
  178. pos += steplet;
  179. step += step / ADAPT_LEVEL;
  180. }
  181. else
  182. {
  183. int interloper = 0;
  184. while (((pos + interloper) < x) && (bits[pos + interloper] == dominant))
  185. interloper++;
  186. // note change
  187. write_uint_max(pb, interloper, (step >> 8) - 1);
  188. pos += interloper + 1;
  189. step -= step / ADAPT_LEVEL;
  190. }
  191. if (step < 256)
  192. {
  193. step = 65536 / step;
  194. dominant = !dominant;
  195. }
  196. }
  197. // store signs
  198. for (i = 0; i < entries; i++)
  199. if (buf[i])
  200. put_bits(pb, 1, buf[i] < 0);
  201. // av_free(bits);
  202. // av_free(copy);
  203. return 0;
  204. }
  205. static int intlist_read(GetBitContext *gb, int *buf, int entries, int base_2_part)
  206. {
  207. int i, low_bits = 0, x = 0;
  208. int n_zeros = 0, step = 256, dominant = 0;
  209. int pos = 0, level = 0;
  210. int *bits = av_mallocz(4* entries);
  211. if (!bits)
  212. return -1;
  213. if (base_2_part)
  214. {
  215. low_bits = get_bits(gb, 4);
  216. if (low_bits)
  217. for (i = 0; i < entries; i++)
  218. buf[i] = get_bits(gb, low_bits);
  219. }
  220. // av_log(NULL, AV_LOG_INFO, "entries: %d, low bits: %d\n", entries, low_bits);
  221. while (n_zeros < entries)
  222. {
  223. int steplet = step >> 8;
  224. if (!get_bits1(gb))
  225. {
  226. for (i = 0; i < steplet; i++)
  227. bits[x++] = dominant;
  228. if (!dominant)
  229. n_zeros += steplet;
  230. step += step / ADAPT_LEVEL;
  231. }
  232. else
  233. {
  234. int actual_run = read_uint_max(gb, steplet-1);
  235. // av_log(NULL, AV_LOG_INFO, "actual run: %d\n", actual_run);
  236. for (i = 0; i < actual_run; i++)
  237. bits[x++] = dominant;
  238. bits[x++] = !dominant;
  239. if (!dominant)
  240. n_zeros += actual_run;
  241. else
  242. n_zeros++;
  243. step -= step / ADAPT_LEVEL;
  244. }
  245. if (step < 256)
  246. {
  247. step = 65536 / step;
  248. dominant = !dominant;
  249. }
  250. }
  251. // reconstruct unsigned values
  252. n_zeros = 0;
  253. for (i = 0; n_zeros < entries; i++)
  254. {
  255. while(1)
  256. {
  257. if (pos >= entries)
  258. {
  259. pos = 0;
  260. level += 1 << low_bits;
  261. }
  262. if (buf[pos] >= level)
  263. break;
  264. pos++;
  265. }
  266. if (bits[i])
  267. buf[pos] += 1 << low_bits;
  268. else
  269. n_zeros++;
  270. pos++;
  271. }
  272. // av_free(bits);
  273. // read signs
  274. for (i = 0; i < entries; i++)
  275. if (buf[i] && get_bits1(gb))
  276. buf[i] = -buf[i];
  277. // av_log(NULL, AV_LOG_INFO, "zeros: %d pos: %d\n", n_zeros, pos);
  278. return 0;
  279. }
  280. #endif
  281. static void predictor_init_state(int *k, int *state, int order)
  282. {
  283. int i;
  284. for (i = order-2; i >= 0; i--)
  285. {
  286. int j, p, x = state[i];
  287. for (j = 0, p = i+1; p < order; j++,p++)
  288. {
  289. int tmp = x + shift_down(k[j] * state[p], LATTICE_SHIFT);
  290. state[p] += shift_down(k[j]*x, LATTICE_SHIFT);
  291. x = tmp;
  292. }
  293. }
  294. }
  295. static int predictor_calc_error(int *k, int *state, int order, int error)
  296. {
  297. int i, x = error - shift_down(k[order-1] * state[order-1], LATTICE_SHIFT);
  298. #if 1
  299. int *k_ptr = &(k[order-2]),
  300. *state_ptr = &(state[order-2]);
  301. for (i = order-2; i >= 0; i--, k_ptr--, state_ptr--)
  302. {
  303. int k_value = *k_ptr, state_value = *state_ptr;
  304. x -= shift_down(k_value * state_value, LATTICE_SHIFT);
  305. state_ptr[1] = state_value + shift_down(k_value * x, LATTICE_SHIFT);
  306. }
  307. #else
  308. for (i = order-2; i >= 0; i--)
  309. {
  310. x -= shift_down(k[i] * state[i], LATTICE_SHIFT);
  311. state[i+1] = state[i] + shift_down(k[i] * x, LATTICE_SHIFT);
  312. }
  313. #endif
  314. // don't drift too far, to avoid overflows
  315. if (x > (SAMPLE_FACTOR<<16)) x = (SAMPLE_FACTOR<<16);
  316. if (x < -(SAMPLE_FACTOR<<16)) x = -(SAMPLE_FACTOR<<16);
  317. state[0] = x;
  318. return x;
  319. }
  320. // Heavily modified Levinson-Durbin algorithm which
  321. // copes better with quantization, and calculates the
  322. // actual whitened result as it goes.
  323. static void modified_levinson_durbin(int *window, int window_entries,
  324. int *out, int out_entries, int channels, int *tap_quant)
  325. {
  326. int i;
  327. int *state = av_mallocz(4* window_entries);
  328. memcpy(state, window, 4* window_entries);
  329. for (i = 0; i < out_entries; i++)
  330. {
  331. int step = (i+1)*channels, k, j;
  332. double xx = 0.0, xy = 0.0;
  333. #if 1
  334. int *x_ptr = &(window[step]), *state_ptr = &(state[0]);
  335. j = window_entries - step;
  336. for (;j>=0;j--,x_ptr++,state_ptr++)
  337. {
  338. double x_value = *x_ptr, state_value = *state_ptr;
  339. xx += state_value*state_value;
  340. xy += x_value*state_value;
  341. }
  342. #else
  343. for (j = 0; j <= (window_entries - step); j++);
  344. {
  345. double stepval = window[step+j], stateval = window[j];
  346. // xx += (double)window[j]*(double)window[j];
  347. // xy += (double)window[step+j]*(double)window[j];
  348. xx += stateval*stateval;
  349. xy += stepval*stateval;
  350. }
  351. #endif
  352. if (xx == 0.0)
  353. k = 0;
  354. else
  355. k = (int)(floor(-xy/xx * (double)LATTICE_FACTOR / (double)(tap_quant[i]) + 0.5));
  356. if (k > (LATTICE_FACTOR/tap_quant[i]))
  357. k = LATTICE_FACTOR/tap_quant[i];
  358. if (-k > (LATTICE_FACTOR/tap_quant[i]))
  359. k = -(LATTICE_FACTOR/tap_quant[i]);
  360. out[i] = k;
  361. k *= tap_quant[i];
  362. #if 1
  363. x_ptr = &(window[step]);
  364. state_ptr = &(state[0]);
  365. j = window_entries - step;
  366. for (;j>=0;j--,x_ptr++,state_ptr++)
  367. {
  368. int x_value = *x_ptr, state_value = *state_ptr;
  369. *x_ptr = x_value + shift_down(k*state_value,LATTICE_SHIFT);
  370. *state_ptr = state_value + shift_down(k*x_value, LATTICE_SHIFT);
  371. }
  372. #else
  373. for (j=0; j <= (window_entries - step); j++)
  374. {
  375. int stepval = window[step+j], stateval=state[j];
  376. window[step+j] += shift_down(k * stateval, LATTICE_SHIFT);
  377. state[j] += shift_down(k * stepval, LATTICE_SHIFT);
  378. }
  379. #endif
  380. }
  381. av_free(state);
  382. }
  383. static int samplerate_table[] =
  384. { 44100, 22050, 11025, 96000, 48000, 32000, 24000, 16000, 8000 };
  385. #ifdef CONFIG_ENCODERS
  386. static inline int code_samplerate(int samplerate)
  387. {
  388. switch (samplerate)
  389. {
  390. case 44100: return 0;
  391. case 22050: return 1;
  392. case 11025: return 2;
  393. case 96000: return 3;
  394. case 48000: return 4;
  395. case 32000: return 5;
  396. case 24000: return 6;
  397. case 16000: return 7;
  398. case 8000: return 8;
  399. }
  400. return -1;
  401. }
  402. static int sonic_encode_init(AVCodecContext *avctx)
  403. {
  404. SonicContext *s = avctx->priv_data;
  405. PutBitContext pb;
  406. int i, version = 0;
  407. if (avctx->channels > MAX_CHANNELS)
  408. {
  409. av_log(avctx, AV_LOG_ERROR, "Only mono and stereo streams are supported by now\n");
  410. return -1; /* only stereo or mono for now */
  411. }
  412. if (avctx->channels == 2)
  413. s->decorrelation = MID_SIDE;
  414. if (avctx->codec->id == CODEC_ID_SONIC_LS)
  415. {
  416. s->lossless = 1;
  417. s->num_taps = 32;
  418. s->downsampling = 1;
  419. s->quantization = 0.0;
  420. }
  421. else
  422. {
  423. s->num_taps = 128;
  424. s->downsampling = 2;
  425. s->quantization = 1.0;
  426. }
  427. // max tap 2048
  428. if ((s->num_taps < 32) || (s->num_taps > 1024) ||
  429. ((s->num_taps>>5)<<5 != s->num_taps))
  430. {
  431. av_log(avctx, AV_LOG_ERROR, "Invalid number of taps\n");
  432. return -1;
  433. }
  434. // generate taps
  435. s->tap_quant = av_mallocz(4* s->num_taps);
  436. for (i = 0; i < s->num_taps; i++)
  437. s->tap_quant[i] = (int)(sqrt(i+1));
  438. s->channels = avctx->channels;
  439. s->samplerate = avctx->sample_rate;
  440. s->block_align = (int)(2048.0*s->samplerate/44100)/s->downsampling;
  441. s->frame_size = s->channels*s->block_align*s->downsampling;
  442. s->tail = av_mallocz(4* s->num_taps*s->channels);
  443. if (!s->tail)
  444. return -1;
  445. s->tail_size = s->num_taps*s->channels;
  446. s->predictor_k = av_mallocz(4 * s->num_taps);
  447. if (!s->predictor_k)
  448. return -1;
  449. for (i = 0; i < s->channels; i++)
  450. {
  451. s->coded_samples[i] = av_mallocz(4* s->block_align);
  452. if (!s->coded_samples[i])
  453. return -1;
  454. }
  455. s->int_samples = av_mallocz(4* s->frame_size);
  456. s->window_size = ((2*s->tail_size)+s->frame_size);
  457. s->window = av_mallocz(4* s->window_size);
  458. if (!s->window)
  459. return -1;
  460. avctx->extradata = av_mallocz(16);
  461. if (!avctx->extradata)
  462. return -1;
  463. init_put_bits(&pb, avctx->extradata, 16*8);
  464. put_bits(&pb, 2, version); // version
  465. if (version == 1)
  466. {
  467. put_bits(&pb, 2, s->channels);
  468. put_bits(&pb, 4, code_samplerate(s->samplerate));
  469. }
  470. put_bits(&pb, 1, s->lossless);
  471. if (!s->lossless)
  472. put_bits(&pb, 3, SAMPLE_SHIFT); // XXX FIXME: sample precision
  473. put_bits(&pb, 2, s->decorrelation);
  474. put_bits(&pb, 2, s->downsampling);
  475. put_bits(&pb, 5, (s->num_taps >> 5)-1); // 32..1024
  476. put_bits(&pb, 1, 0); // XXX FIXME: no custom tap quant table
  477. flush_put_bits(&pb);
  478. avctx->extradata_size = put_bits_count(&pb)/8;
  479. av_log(avctx, AV_LOG_INFO, "Sonic: ver: %d ls: %d dr: %d taps: %d block: %d frame: %d downsamp: %d\n",
  480. version, s->lossless, s->decorrelation, s->num_taps, s->block_align, s->frame_size, s->downsampling);
  481. avctx->coded_frame = avcodec_alloc_frame();
  482. if (!avctx->coded_frame)
  483. return -ENOMEM;
  484. avctx->coded_frame->key_frame = 1;
  485. avctx->frame_size = s->block_align*s->downsampling;
  486. return 0;
  487. }
  488. static int sonic_encode_close(AVCodecContext *avctx)
  489. {
  490. SonicContext *s = avctx->priv_data;
  491. int i;
  492. av_freep(&avctx->coded_frame);
  493. for (i = 0; i < s->channels; i++)
  494. av_free(s->coded_samples[i]);
  495. av_free(s->predictor_k);
  496. av_free(s->tail);
  497. av_free(s->tap_quant);
  498. av_free(s->window);
  499. av_free(s->int_samples);
  500. return 0;
  501. }
  502. static int sonic_encode_frame(AVCodecContext *avctx,
  503. uint8_t *buf, int buf_size, void *data)
  504. {
  505. SonicContext *s = avctx->priv_data;
  506. PutBitContext pb;
  507. int i, j, ch, quant = 0, x = 0;
  508. short *samples = data;
  509. init_put_bits(&pb, buf, buf_size*8);
  510. // short -> internal
  511. for (i = 0; i < s->frame_size; i++)
  512. s->int_samples[i] = samples[i];
  513. if (!s->lossless)
  514. for (i = 0; i < s->frame_size; i++)
  515. s->int_samples[i] = s->int_samples[i] << SAMPLE_SHIFT;
  516. switch(s->decorrelation)
  517. {
  518. case MID_SIDE:
  519. for (i = 0; i < s->frame_size; i += s->channels)
  520. {
  521. s->int_samples[i] += s->int_samples[i+1];
  522. s->int_samples[i+1] -= shift(s->int_samples[i], 1);
  523. }
  524. break;
  525. case LEFT_SIDE:
  526. for (i = 0; i < s->frame_size; i += s->channels)
  527. s->int_samples[i+1] -= s->int_samples[i];
  528. break;
  529. case RIGHT_SIDE:
  530. for (i = 0; i < s->frame_size; i += s->channels)
  531. s->int_samples[i] -= s->int_samples[i+1];
  532. break;
  533. }
  534. memset(s->window, 0, 4* s->window_size);
  535. for (i = 0; i < s->tail_size; i++)
  536. s->window[x++] = s->tail[i];
  537. for (i = 0; i < s->frame_size; i++)
  538. s->window[x++] = s->int_samples[i];
  539. for (i = 0; i < s->tail_size; i++)
  540. s->window[x++] = 0;
  541. for (i = 0; i < s->tail_size; i++)
  542. s->tail[i] = s->int_samples[s->frame_size - s->tail_size + i];
  543. // generate taps
  544. modified_levinson_durbin(s->window, s->window_size,
  545. s->predictor_k, s->num_taps, s->channels, s->tap_quant);
  546. if (intlist_write(&pb, s->predictor_k, s->num_taps, 0) < 0)
  547. return -1;
  548. for (ch = 0; ch < s->channels; ch++)
  549. {
  550. x = s->tail_size+ch;
  551. for (i = 0; i < s->block_align; i++)
  552. {
  553. int sum = 0;
  554. for (j = 0; j < s->downsampling; j++, x += s->channels)
  555. sum += s->window[x];
  556. s->coded_samples[ch][i] = sum;
  557. }
  558. }
  559. // simple rate control code
  560. if (!s->lossless)
  561. {
  562. double energy1 = 0.0, energy2 = 0.0;
  563. for (ch = 0; ch < s->channels; ch++)
  564. {
  565. for (i = 0; i < s->block_align; i++)
  566. {
  567. double sample = s->coded_samples[ch][i];
  568. energy2 += sample*sample;
  569. energy1 += fabs(sample);
  570. }
  571. }
  572. energy2 = sqrt(energy2/(s->channels*s->block_align));
  573. energy1 = sqrt(2.0)*energy1/(s->channels*s->block_align);
  574. // increase bitrate when samples are like a gaussian distribution
  575. // reduce bitrate when samples are like a two-tailed exponential distribution
  576. if (energy2 > energy1)
  577. energy2 += (energy2-energy1)*RATE_VARIATION;
  578. quant = (int)(BASE_QUANT*s->quantization*energy2/SAMPLE_FACTOR);
  579. // av_log(avctx, AV_LOG_DEBUG, "quant: %d energy: %f / %f\n", quant, energy1, energy2);
  580. if (quant < 1)
  581. quant = 1;
  582. if (quant > 65535)
  583. quant = 65535;
  584. set_ue_golomb(&pb, quant);
  585. quant *= SAMPLE_FACTOR;
  586. }
  587. // write out coded samples
  588. for (ch = 0; ch < s->channels; ch++)
  589. {
  590. if (!s->lossless)
  591. for (i = 0; i < s->block_align; i++)
  592. s->coded_samples[ch][i] = divide(s->coded_samples[ch][i], quant);
  593. if (intlist_write(&pb, s->coded_samples[ch], s->block_align, 1) < 0)
  594. return -1;
  595. }
  596. // av_log(avctx, AV_LOG_DEBUG, "used bytes: %d\n", (put_bits_count(&pb)+7)/8);
  597. flush_put_bits(&pb);
  598. return (put_bits_count(&pb)+7)/8;
  599. }
  600. #endif //CONFIG_ENCODERS
  601. static int sonic_decode_init(AVCodecContext *avctx)
  602. {
  603. SonicContext *s = avctx->priv_data;
  604. GetBitContext gb;
  605. int i, version;
  606. s->channels = avctx->channels;
  607. s->samplerate = avctx->sample_rate;
  608. if (!avctx->extradata)
  609. {
  610. av_log(avctx, AV_LOG_ERROR, "No mandatory headers present\n");
  611. return -1;
  612. }
  613. init_get_bits(&gb, avctx->extradata, avctx->extradata_size);
  614. version = get_bits(&gb, 2);
  615. if (version > 1)
  616. {
  617. av_log(avctx, AV_LOG_ERROR, "Unsupported Sonic version, please report\n");
  618. return -1;
  619. }
  620. if (version == 1)
  621. {
  622. s->channels = get_bits(&gb, 2);
  623. s->samplerate = samplerate_table[get_bits(&gb, 4)];
  624. av_log(avctx, AV_LOG_INFO, "Sonicv2 chans: %d samprate: %d\n",
  625. s->channels, s->samplerate);
  626. }
  627. if (s->channels > MAX_CHANNELS)
  628. {
  629. av_log(avctx, AV_LOG_ERROR, "Only mono and stereo streams are supported by now\n");
  630. return -1;
  631. }
  632. s->lossless = get_bits1(&gb);
  633. if (!s->lossless)
  634. skip_bits(&gb, 3); // XXX FIXME
  635. s->decorrelation = get_bits(&gb, 2);
  636. s->downsampling = get_bits(&gb, 2);
  637. s->num_taps = (get_bits(&gb, 5)+1)<<5;
  638. if (get_bits1(&gb)) // XXX FIXME
  639. av_log(avctx, AV_LOG_INFO, "Custom quant table\n");
  640. s->block_align = (int)(2048.0*(s->samplerate/44100))/s->downsampling;
  641. s->frame_size = s->channels*s->block_align*s->downsampling;
  642. // avctx->frame_size = s->block_align;
  643. av_log(avctx, AV_LOG_INFO, "Sonic: ver: %d ls: %d dr: %d taps: %d block: %d frame: %d downsamp: %d\n",
  644. version, s->lossless, s->decorrelation, s->num_taps, s->block_align, s->frame_size, s->downsampling);
  645. // generate taps
  646. s->tap_quant = av_mallocz(4* s->num_taps);
  647. for (i = 0; i < s->num_taps; i++)
  648. s->tap_quant[i] = (int)(sqrt(i+1));
  649. s->predictor_k = av_mallocz(4* s->num_taps);
  650. for (i = 0; i < s->channels; i++)
  651. {
  652. s->predictor_state[i] = av_mallocz(4* s->num_taps);
  653. if (!s->predictor_state[i])
  654. return -1;
  655. }
  656. for (i = 0; i < s->channels; i++)
  657. {
  658. s->coded_samples[i] = av_mallocz(4* s->block_align);
  659. if (!s->coded_samples[i])
  660. return -1;
  661. }
  662. s->int_samples = av_mallocz(4* s->frame_size);
  663. return 0;
  664. }
  665. static int sonic_decode_close(AVCodecContext *avctx)
  666. {
  667. SonicContext *s = avctx->priv_data;
  668. int i;
  669. av_free(s->int_samples);
  670. av_free(s->tap_quant);
  671. av_free(s->predictor_k);
  672. for (i = 0; i < s->channels; i++)
  673. {
  674. av_free(s->predictor_state[i]);
  675. av_free(s->coded_samples[i]);
  676. }
  677. return 0;
  678. }
  679. static int sonic_decode_frame(AVCodecContext *avctx,
  680. int16_t *data, int *data_size,
  681. uint8_t *buf, int buf_size)
  682. {
  683. SonicContext *s = avctx->priv_data;
  684. GetBitContext gb;
  685. int i, quant, ch, j;
  686. short *samples = data;
  687. if (buf_size == 0) return 0;
  688. // av_log(NULL, AV_LOG_INFO, "buf_size: %d\n", buf_size);
  689. init_get_bits(&gb, buf, buf_size*8);
  690. intlist_read(&gb, s->predictor_k, s->num_taps, 0);
  691. // dequantize
  692. for (i = 0; i < s->num_taps; i++)
  693. s->predictor_k[i] *= s->tap_quant[i];
  694. if (s->lossless)
  695. quant = 1;
  696. else
  697. quant = get_ue_golomb(&gb) * SAMPLE_FACTOR;
  698. // av_log(NULL, AV_LOG_INFO, "quant: %d\n", quant);
  699. for (ch = 0; ch < s->channels; ch++)
  700. {
  701. int x = ch;
  702. predictor_init_state(s->predictor_k, s->predictor_state[ch], s->num_taps);
  703. intlist_read(&gb, s->coded_samples[ch], s->block_align, 1);
  704. for (i = 0; i < s->block_align; i++)
  705. {
  706. for (j = 0; j < s->downsampling - 1; j++)
  707. {
  708. s->int_samples[x] = predictor_calc_error(s->predictor_k, s->predictor_state[ch], s->num_taps, 0);
  709. x += s->channels;
  710. }
  711. s->int_samples[x] = predictor_calc_error(s->predictor_k, s->predictor_state[ch], s->num_taps, s->coded_samples[ch][i] * quant);
  712. x += s->channels;
  713. }
  714. for (i = 0; i < s->num_taps; i++)
  715. s->predictor_state[ch][i] = s->int_samples[s->frame_size - s->channels + ch - i*s->channels];
  716. }
  717. switch(s->decorrelation)
  718. {
  719. case MID_SIDE:
  720. for (i = 0; i < s->frame_size; i += s->channels)
  721. {
  722. s->int_samples[i+1] += shift(s->int_samples[i], 1);
  723. s->int_samples[i] -= s->int_samples[i+1];
  724. }
  725. break;
  726. case LEFT_SIDE:
  727. for (i = 0; i < s->frame_size; i += s->channels)
  728. s->int_samples[i+1] += s->int_samples[i];
  729. break;
  730. case RIGHT_SIDE:
  731. for (i = 0; i < s->frame_size; i += s->channels)
  732. s->int_samples[i] += s->int_samples[i+1];
  733. break;
  734. }
  735. if (!s->lossless)
  736. for (i = 0; i < s->frame_size; i++)
  737. s->int_samples[i] = shift(s->int_samples[i], SAMPLE_SHIFT);
  738. // internal -> short
  739. for (i = 0; i < s->frame_size; i++)
  740. {
  741. if (s->int_samples[i] > 32767)
  742. samples[i] = 32767;
  743. else if (s->int_samples[i] < -32768)
  744. samples[i] = -32768;
  745. else
  746. samples[i] = s->int_samples[i];
  747. }
  748. align_get_bits(&gb);
  749. *data_size = s->frame_size * 2;
  750. return (get_bits_count(&gb)+7)/8;
  751. }
  752. #ifdef CONFIG_ENCODERS
  753. AVCodec sonic_encoder = {
  754. "sonic",
  755. CODEC_TYPE_AUDIO,
  756. CODEC_ID_SONIC,
  757. sizeof(SonicContext),
  758. sonic_encode_init,
  759. sonic_encode_frame,
  760. sonic_encode_close,
  761. NULL,
  762. };
  763. AVCodec sonic_ls_encoder = {
  764. "sonicls",
  765. CODEC_TYPE_AUDIO,
  766. CODEC_ID_SONIC_LS,
  767. sizeof(SonicContext),
  768. sonic_encode_init,
  769. sonic_encode_frame,
  770. sonic_encode_close,
  771. NULL,
  772. };
  773. #endif
  774. #ifdef CONFIG_DECODERS
  775. AVCodec sonic_decoder = {
  776. "sonic",
  777. CODEC_TYPE_AUDIO,
  778. CODEC_ID_SONIC,
  779. sizeof(SonicContext),
  780. sonic_decode_init,
  781. NULL,
  782. sonic_decode_close,
  783. sonic_decode_frame,
  784. };
  785. #endif