You can not select more than 25 topics Topics must start with a letter or number, can include dashes ('-') and can be up to 35 characters long.

1002 lines
26KB

  1. /*
  2. * Simple free lossless/lossy audio codec
  3. * Copyright (c) 2004 Alex Beregszaszi
  4. *
  5. * This file is part of FFmpeg.
  6. *
  7. * FFmpeg is free software; you can redistribute it and/or
  8. * modify it under the terms of the GNU Lesser General Public
  9. * License as published by the Free Software Foundation; either
  10. * version 2.1 of the License, or (at your option) any later version.
  11. *
  12. * FFmpeg is distributed in the hope that it will be useful,
  13. * but WITHOUT ANY WARRANTY; without even the implied warranty of
  14. * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
  15. * Lesser General Public License for more details.
  16. *
  17. * You should have received a copy of the GNU Lesser General Public
  18. * License along with FFmpeg; if not, write to the Free Software
  19. * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
  20. */
  21. #include "avcodec.h"
  22. #include "get_bits.h"
  23. #include "golomb.h"
  24. #include "internal.h"
  25. /**
  26. * @file
  27. * Simple free lossless/lossy audio codec
  28. * Based on Paul Francis Harrison's Bonk (http://www.logarithmic.net/pfh/bonk)
  29. * Written and designed by Alex Beregszaszi
  30. *
  31. * TODO:
  32. * - CABAC put/get_symbol
  33. * - independent quantizer for channels
  34. * - >2 channels support
  35. * - more decorrelation types
  36. * - more tap_quant tests
  37. * - selectable intlist writers/readers (bonk-style, golomb, cabac)
  38. */
  39. #define MAX_CHANNELS 2
  40. #define MID_SIDE 0
  41. #define LEFT_SIDE 1
  42. #define RIGHT_SIDE 2
  43. typedef struct SonicContext {
  44. AVFrame frame;
  45. int lossless, decorrelation;
  46. int num_taps, downsampling;
  47. double quantization;
  48. int channels, samplerate, block_align, frame_size;
  49. int *tap_quant;
  50. int *int_samples;
  51. int *coded_samples[MAX_CHANNELS];
  52. // for encoding
  53. int *tail;
  54. int tail_size;
  55. int *window;
  56. int window_size;
  57. // for decoding
  58. int *predictor_k;
  59. int *predictor_state[MAX_CHANNELS];
  60. } SonicContext;
  61. #define LATTICE_SHIFT 10
  62. #define SAMPLE_SHIFT 4
  63. #define LATTICE_FACTOR (1 << LATTICE_SHIFT)
  64. #define SAMPLE_FACTOR (1 << SAMPLE_SHIFT)
  65. #define BASE_QUANT 0.6
  66. #define RATE_VARIATION 3.0
  67. static inline int divide(int a, int b)
  68. {
  69. if (a < 0)
  70. return -( (-a + b/2)/b );
  71. else
  72. return (a + b/2)/b;
  73. }
  74. static inline int shift(int a,int b)
  75. {
  76. return (a+(1<<(b-1))) >> b;
  77. }
  78. static inline int shift_down(int a,int b)
  79. {
  80. return (a>>b)+((a<0)?1:0);
  81. }
  82. #if 1
  83. static inline int intlist_write(PutBitContext *pb, int *buf, int entries, int base_2_part)
  84. {
  85. int i;
  86. for (i = 0; i < entries; i++)
  87. set_se_golomb(pb, buf[i]);
  88. return 1;
  89. }
  90. static inline int intlist_read(GetBitContext *gb, int *buf, int entries, int base_2_part)
  91. {
  92. int i;
  93. for (i = 0; i < entries; i++)
  94. buf[i] = get_se_golomb(gb);
  95. return 1;
  96. }
  97. #else
  98. #define ADAPT_LEVEL 8
  99. static int bits_to_store(uint64_t x)
  100. {
  101. int res = 0;
  102. while(x)
  103. {
  104. res++;
  105. x >>= 1;
  106. }
  107. return res;
  108. }
  109. static void write_uint_max(PutBitContext *pb, unsigned int value, unsigned int max)
  110. {
  111. int i, bits;
  112. if (!max)
  113. return;
  114. bits = bits_to_store(max);
  115. for (i = 0; i < bits-1; i++)
  116. put_bits(pb, 1, value & (1 << i));
  117. if ( (value | (1 << (bits-1))) <= max)
  118. put_bits(pb, 1, value & (1 << (bits-1)));
  119. }
  120. static unsigned int read_uint_max(GetBitContext *gb, int max)
  121. {
  122. int i, bits, value = 0;
  123. if (!max)
  124. return 0;
  125. bits = bits_to_store(max);
  126. for (i = 0; i < bits-1; i++)
  127. if (get_bits1(gb))
  128. value += 1 << i;
  129. if ( (value | (1<<(bits-1))) <= max)
  130. if (get_bits1(gb))
  131. value += 1 << (bits-1);
  132. return value;
  133. }
  134. static int intlist_write(PutBitContext *pb, int *buf, int entries, int base_2_part)
  135. {
  136. int i, j, x = 0, low_bits = 0, max = 0;
  137. int step = 256, pos = 0, dominant = 0, any = 0;
  138. int *copy, *bits;
  139. copy = av_mallocz(4* entries);
  140. if (!copy)
  141. return -1;
  142. if (base_2_part)
  143. {
  144. int energy = 0;
  145. for (i = 0; i < entries; i++)
  146. energy += abs(buf[i]);
  147. low_bits = bits_to_store(energy / (entries * 2));
  148. if (low_bits > 15)
  149. low_bits = 15;
  150. put_bits(pb, 4, low_bits);
  151. }
  152. for (i = 0; i < entries; i++)
  153. {
  154. put_bits(pb, low_bits, abs(buf[i]));
  155. copy[i] = abs(buf[i]) >> low_bits;
  156. if (copy[i] > max)
  157. max = abs(copy[i]);
  158. }
  159. bits = av_mallocz(4* entries*max);
  160. if (!bits)
  161. {
  162. // av_free(copy);
  163. return -1;
  164. }
  165. for (i = 0; i <= max; i++)
  166. {
  167. for (j = 0; j < entries; j++)
  168. if (copy[j] >= i)
  169. bits[x++] = copy[j] > i;
  170. }
  171. // store bitstream
  172. while (pos < x)
  173. {
  174. int steplet = step >> 8;
  175. if (pos + steplet > x)
  176. steplet = x - pos;
  177. for (i = 0; i < steplet; i++)
  178. if (bits[i+pos] != dominant)
  179. any = 1;
  180. put_bits(pb, 1, any);
  181. if (!any)
  182. {
  183. pos += steplet;
  184. step += step / ADAPT_LEVEL;
  185. }
  186. else
  187. {
  188. int interloper = 0;
  189. while (((pos + interloper) < x) && (bits[pos + interloper] == dominant))
  190. interloper++;
  191. // note change
  192. write_uint_max(pb, interloper, (step >> 8) - 1);
  193. pos += interloper + 1;
  194. step -= step / ADAPT_LEVEL;
  195. }
  196. if (step < 256)
  197. {
  198. step = 65536 / step;
  199. dominant = !dominant;
  200. }
  201. }
  202. // store signs
  203. for (i = 0; i < entries; i++)
  204. if (buf[i])
  205. put_bits(pb, 1, buf[i] < 0);
  206. // av_free(bits);
  207. // av_free(copy);
  208. return 0;
  209. }
  210. static int intlist_read(GetBitContext *gb, int *buf, int entries, int base_2_part)
  211. {
  212. int i, low_bits = 0, x = 0;
  213. int n_zeros = 0, step = 256, dominant = 0;
  214. int pos = 0, level = 0;
  215. int *bits = av_mallocz(4* entries);
  216. if (!bits)
  217. return -1;
  218. if (base_2_part)
  219. {
  220. low_bits = get_bits(gb, 4);
  221. if (low_bits)
  222. for (i = 0; i < entries; i++)
  223. buf[i] = get_bits(gb, low_bits);
  224. }
  225. // av_log(NULL, AV_LOG_INFO, "entries: %d, low bits: %d\n", entries, low_bits);
  226. while (n_zeros < entries)
  227. {
  228. int steplet = step >> 8;
  229. if (!get_bits1(gb))
  230. {
  231. for (i = 0; i < steplet; i++)
  232. bits[x++] = dominant;
  233. if (!dominant)
  234. n_zeros += steplet;
  235. step += step / ADAPT_LEVEL;
  236. }
  237. else
  238. {
  239. int actual_run = read_uint_max(gb, steplet-1);
  240. // av_log(NULL, AV_LOG_INFO, "actual run: %d\n", actual_run);
  241. for (i = 0; i < actual_run; i++)
  242. bits[x++] = dominant;
  243. bits[x++] = !dominant;
  244. if (!dominant)
  245. n_zeros += actual_run;
  246. else
  247. n_zeros++;
  248. step -= step / ADAPT_LEVEL;
  249. }
  250. if (step < 256)
  251. {
  252. step = 65536 / step;
  253. dominant = !dominant;
  254. }
  255. }
  256. // reconstruct unsigned values
  257. n_zeros = 0;
  258. for (i = 0; n_zeros < entries; i++)
  259. {
  260. while(1)
  261. {
  262. if (pos >= entries)
  263. {
  264. pos = 0;
  265. level += 1 << low_bits;
  266. }
  267. if (buf[pos] >= level)
  268. break;
  269. pos++;
  270. }
  271. if (bits[i])
  272. buf[pos] += 1 << low_bits;
  273. else
  274. n_zeros++;
  275. pos++;
  276. }
  277. // av_free(bits);
  278. // read signs
  279. for (i = 0; i < entries; i++)
  280. if (buf[i] && get_bits1(gb))
  281. buf[i] = -buf[i];
  282. // av_log(NULL, AV_LOG_INFO, "zeros: %d pos: %d\n", n_zeros, pos);
  283. return 0;
  284. }
  285. #endif
  286. static void predictor_init_state(int *k, int *state, int order)
  287. {
  288. int i;
  289. for (i = order-2; i >= 0; i--)
  290. {
  291. int j, p, x = state[i];
  292. for (j = 0, p = i+1; p < order; j++,p++)
  293. {
  294. int tmp = x + shift_down(k[j] * state[p], LATTICE_SHIFT);
  295. state[p] += shift_down(k[j]*x, LATTICE_SHIFT);
  296. x = tmp;
  297. }
  298. }
  299. }
  300. static int predictor_calc_error(int *k, int *state, int order, int error)
  301. {
  302. int i, x = error - shift_down(k[order-1] * state[order-1], LATTICE_SHIFT);
  303. #if 1
  304. int *k_ptr = &(k[order-2]),
  305. *state_ptr = &(state[order-2]);
  306. for (i = order-2; i >= 0; i--, k_ptr--, state_ptr--)
  307. {
  308. int k_value = *k_ptr, state_value = *state_ptr;
  309. x -= shift_down(k_value * state_value, LATTICE_SHIFT);
  310. state_ptr[1] = state_value + shift_down(k_value * x, LATTICE_SHIFT);
  311. }
  312. #else
  313. for (i = order-2; i >= 0; i--)
  314. {
  315. x -= shift_down(k[i] * state[i], LATTICE_SHIFT);
  316. state[i+1] = state[i] + shift_down(k[i] * x, LATTICE_SHIFT);
  317. }
  318. #endif
  319. // don't drift too far, to avoid overflows
  320. if (x > (SAMPLE_FACTOR<<16)) x = (SAMPLE_FACTOR<<16);
  321. if (x < -(SAMPLE_FACTOR<<16)) x = -(SAMPLE_FACTOR<<16);
  322. state[0] = x;
  323. return x;
  324. }
  325. #if CONFIG_SONIC_ENCODER || CONFIG_SONIC_LS_ENCODER
  326. // Heavily modified Levinson-Durbin algorithm which
  327. // copes better with quantization, and calculates the
  328. // actual whitened result as it goes.
  329. static void modified_levinson_durbin(int *window, int window_entries,
  330. int *out, int out_entries, int channels, int *tap_quant)
  331. {
  332. int i;
  333. int *state = av_mallocz(4* window_entries);
  334. memcpy(state, window, 4* window_entries);
  335. for (i = 0; i < out_entries; i++)
  336. {
  337. int step = (i+1)*channels, k, j;
  338. double xx = 0.0, xy = 0.0;
  339. #if 1
  340. int *x_ptr = &(window[step]), *state_ptr = &(state[0]);
  341. j = window_entries - step;
  342. for (;j>=0;j--,x_ptr++,state_ptr++)
  343. {
  344. double x_value = *x_ptr, state_value = *state_ptr;
  345. xx += state_value*state_value;
  346. xy += x_value*state_value;
  347. }
  348. #else
  349. for (j = 0; j <= (window_entries - step); j++);
  350. {
  351. double stepval = window[step+j], stateval = window[j];
  352. // xx += (double)window[j]*(double)window[j];
  353. // xy += (double)window[step+j]*(double)window[j];
  354. xx += stateval*stateval;
  355. xy += stepval*stateval;
  356. }
  357. #endif
  358. if (xx == 0.0)
  359. k = 0;
  360. else
  361. k = (int)(floor(-xy/xx * (double)LATTICE_FACTOR / (double)(tap_quant[i]) + 0.5));
  362. if (k > (LATTICE_FACTOR/tap_quant[i]))
  363. k = LATTICE_FACTOR/tap_quant[i];
  364. if (-k > (LATTICE_FACTOR/tap_quant[i]))
  365. k = -(LATTICE_FACTOR/tap_quant[i]);
  366. out[i] = k;
  367. k *= tap_quant[i];
  368. #if 1
  369. x_ptr = &(window[step]);
  370. state_ptr = &(state[0]);
  371. j = window_entries - step;
  372. for (;j>=0;j--,x_ptr++,state_ptr++)
  373. {
  374. int x_value = *x_ptr, state_value = *state_ptr;
  375. *x_ptr = x_value + shift_down(k*state_value,LATTICE_SHIFT);
  376. *state_ptr = state_value + shift_down(k*x_value, LATTICE_SHIFT);
  377. }
  378. #else
  379. for (j=0; j <= (window_entries - step); j++)
  380. {
  381. int stepval = window[step+j], stateval=state[j];
  382. window[step+j] += shift_down(k * stateval, LATTICE_SHIFT);
  383. state[j] += shift_down(k * stepval, LATTICE_SHIFT);
  384. }
  385. #endif
  386. }
  387. av_free(state);
  388. }
  389. static inline int code_samplerate(int samplerate)
  390. {
  391. switch (samplerate)
  392. {
  393. case 44100: return 0;
  394. case 22050: return 1;
  395. case 11025: return 2;
  396. case 96000: return 3;
  397. case 48000: return 4;
  398. case 32000: return 5;
  399. case 24000: return 6;
  400. case 16000: return 7;
  401. case 8000: return 8;
  402. }
  403. return -1;
  404. }
  405. static av_cold int sonic_encode_init(AVCodecContext *avctx)
  406. {
  407. SonicContext *s = avctx->priv_data;
  408. PutBitContext pb;
  409. int i, version = 0;
  410. if (avctx->channels > MAX_CHANNELS)
  411. {
  412. av_log(avctx, AV_LOG_ERROR, "Only mono and stereo streams are supported by now\n");
  413. return -1; /* only stereo or mono for now */
  414. }
  415. if (avctx->channels == 2)
  416. s->decorrelation = MID_SIDE;
  417. if (avctx->codec->id == AV_CODEC_ID_SONIC_LS)
  418. {
  419. s->lossless = 1;
  420. s->num_taps = 32;
  421. s->downsampling = 1;
  422. s->quantization = 0.0;
  423. }
  424. else
  425. {
  426. s->num_taps = 128;
  427. s->downsampling = 2;
  428. s->quantization = 1.0;
  429. }
  430. // max tap 2048
  431. if ((s->num_taps < 32) || (s->num_taps > 1024) ||
  432. ((s->num_taps>>5)<<5 != s->num_taps))
  433. {
  434. av_log(avctx, AV_LOG_ERROR, "Invalid number of taps\n");
  435. return -1;
  436. }
  437. // generate taps
  438. s->tap_quant = av_mallocz(4* s->num_taps);
  439. for (i = 0; i < s->num_taps; i++)
  440. s->tap_quant[i] = (int)(sqrt(i+1));
  441. s->channels = avctx->channels;
  442. s->samplerate = avctx->sample_rate;
  443. s->block_align = (int)(2048.0*s->samplerate/44100)/s->downsampling;
  444. s->frame_size = s->channels*s->block_align*s->downsampling;
  445. s->tail_size = s->num_taps*s->channels;
  446. s->tail = av_mallocz(4 * s->tail_size);
  447. if (!s->tail)
  448. return -1;
  449. s->predictor_k = av_mallocz(4 * s->num_taps);
  450. if (!s->predictor_k)
  451. return -1;
  452. for (i = 0; i < s->channels; i++)
  453. {
  454. s->coded_samples[i] = av_mallocz(4* s->block_align);
  455. if (!s->coded_samples[i])
  456. return -1;
  457. }
  458. s->int_samples = av_mallocz(4* s->frame_size);
  459. s->window_size = ((2*s->tail_size)+s->frame_size);
  460. s->window = av_mallocz(4* s->window_size);
  461. if (!s->window)
  462. return -1;
  463. avctx->extradata = av_mallocz(16);
  464. if (!avctx->extradata)
  465. return -1;
  466. init_put_bits(&pb, avctx->extradata, 16*8);
  467. put_bits(&pb, 2, version); // version
  468. if (version == 1)
  469. {
  470. put_bits(&pb, 2, s->channels);
  471. put_bits(&pb, 4, code_samplerate(s->samplerate));
  472. }
  473. put_bits(&pb, 1, s->lossless);
  474. if (!s->lossless)
  475. put_bits(&pb, 3, SAMPLE_SHIFT); // XXX FIXME: sample precision
  476. put_bits(&pb, 2, s->decorrelation);
  477. put_bits(&pb, 2, s->downsampling);
  478. put_bits(&pb, 5, (s->num_taps >> 5)-1); // 32..1024
  479. put_bits(&pb, 1, 0); // XXX FIXME: no custom tap quant table
  480. flush_put_bits(&pb);
  481. avctx->extradata_size = put_bits_count(&pb)/8;
  482. av_log(avctx, AV_LOG_INFO, "Sonic: ver: %d ls: %d dr: %d taps: %d block: %d frame: %d downsamp: %d\n",
  483. version, s->lossless, s->decorrelation, s->num_taps, s->block_align, s->frame_size, s->downsampling);
  484. avctx->coded_frame = avcodec_alloc_frame();
  485. if (!avctx->coded_frame)
  486. return AVERROR(ENOMEM);
  487. avctx->coded_frame->key_frame = 1;
  488. avctx->frame_size = s->block_align*s->downsampling;
  489. return 0;
  490. }
  491. static av_cold int sonic_encode_close(AVCodecContext *avctx)
  492. {
  493. SonicContext *s = avctx->priv_data;
  494. int i;
  495. av_freep(&avctx->coded_frame);
  496. for (i = 0; i < s->channels; i++)
  497. av_free(s->coded_samples[i]);
  498. av_free(s->predictor_k);
  499. av_free(s->tail);
  500. av_free(s->tap_quant);
  501. av_free(s->window);
  502. av_free(s->int_samples);
  503. return 0;
  504. }
  505. static int sonic_encode_frame(AVCodecContext *avctx, AVPacket *avpkt,
  506. const AVFrame *frame, int *got_packet_ptr)
  507. {
  508. SonicContext *s = avctx->priv_data;
  509. PutBitContext pb;
  510. int i, j, ch, quant = 0, x = 0;
  511. int ret;
  512. const short *samples = (const int16_t*)frame->data[0];
  513. if ((ret = ff_alloc_packet2(avctx, avpkt, s->frame_size * 5 + 1000)))
  514. return ret;
  515. init_put_bits(&pb, avpkt->data, avpkt->size);
  516. // short -> internal
  517. for (i = 0; i < s->frame_size; i++)
  518. s->int_samples[i] = samples[i];
  519. if (!s->lossless)
  520. for (i = 0; i < s->frame_size; i++)
  521. s->int_samples[i] = s->int_samples[i] << SAMPLE_SHIFT;
  522. switch(s->decorrelation)
  523. {
  524. case MID_SIDE:
  525. for (i = 0; i < s->frame_size; i += s->channels)
  526. {
  527. s->int_samples[i] += s->int_samples[i+1];
  528. s->int_samples[i+1] -= shift(s->int_samples[i], 1);
  529. }
  530. break;
  531. case LEFT_SIDE:
  532. for (i = 0; i < s->frame_size; i += s->channels)
  533. s->int_samples[i+1] -= s->int_samples[i];
  534. break;
  535. case RIGHT_SIDE:
  536. for (i = 0; i < s->frame_size; i += s->channels)
  537. s->int_samples[i] -= s->int_samples[i+1];
  538. break;
  539. }
  540. memset(s->window, 0, 4* s->window_size);
  541. for (i = 0; i < s->tail_size; i++)
  542. s->window[x++] = s->tail[i];
  543. for (i = 0; i < s->frame_size; i++)
  544. s->window[x++] = s->int_samples[i];
  545. for (i = 0; i < s->tail_size; i++)
  546. s->window[x++] = 0;
  547. for (i = 0; i < s->tail_size; i++)
  548. s->tail[i] = s->int_samples[s->frame_size - s->tail_size + i];
  549. // generate taps
  550. modified_levinson_durbin(s->window, s->window_size,
  551. s->predictor_k, s->num_taps, s->channels, s->tap_quant);
  552. if (intlist_write(&pb, s->predictor_k, s->num_taps, 0) < 0)
  553. return -1;
  554. for (ch = 0; ch < s->channels; ch++)
  555. {
  556. x = s->tail_size+ch;
  557. for (i = 0; i < s->block_align; i++)
  558. {
  559. int sum = 0;
  560. for (j = 0; j < s->downsampling; j++, x += s->channels)
  561. sum += s->window[x];
  562. s->coded_samples[ch][i] = sum;
  563. }
  564. }
  565. // simple rate control code
  566. if (!s->lossless)
  567. {
  568. double energy1 = 0.0, energy2 = 0.0;
  569. for (ch = 0; ch < s->channels; ch++)
  570. {
  571. for (i = 0; i < s->block_align; i++)
  572. {
  573. double sample = s->coded_samples[ch][i];
  574. energy2 += sample*sample;
  575. energy1 += fabs(sample);
  576. }
  577. }
  578. energy2 = sqrt(energy2/(s->channels*s->block_align));
  579. energy1 = sqrt(2.0)*energy1/(s->channels*s->block_align);
  580. // increase bitrate when samples are like a gaussian distribution
  581. // reduce bitrate when samples are like a two-tailed exponential distribution
  582. if (energy2 > energy1)
  583. energy2 += (energy2-energy1)*RATE_VARIATION;
  584. quant = (int)(BASE_QUANT*s->quantization*energy2/SAMPLE_FACTOR);
  585. // av_log(avctx, AV_LOG_DEBUG, "quant: %d energy: %f / %f\n", quant, energy1, energy2);
  586. if (quant < 1)
  587. quant = 1;
  588. if (quant > 65534)
  589. quant = 65534;
  590. set_ue_golomb(&pb, quant);
  591. quant *= SAMPLE_FACTOR;
  592. }
  593. // write out coded samples
  594. for (ch = 0; ch < s->channels; ch++)
  595. {
  596. if (!s->lossless)
  597. for (i = 0; i < s->block_align; i++)
  598. s->coded_samples[ch][i] = divide(s->coded_samples[ch][i], quant);
  599. if (intlist_write(&pb, s->coded_samples[ch], s->block_align, 1) < 0)
  600. return -1;
  601. }
  602. // av_log(avctx, AV_LOG_DEBUG, "used bytes: %d\n", (put_bits_count(&pb)+7)/8);
  603. flush_put_bits(&pb);
  604. avpkt->size = (put_bits_count(&pb)+7)/8;
  605. *got_packet_ptr = 1;
  606. return 0;
  607. }
  608. #endif /* CONFIG_SONIC_ENCODER || CONFIG_SONIC_LS_ENCODER */
  609. #if CONFIG_SONIC_DECODER
  610. static const int samplerate_table[] =
  611. { 44100, 22050, 11025, 96000, 48000, 32000, 24000, 16000, 8000 };
  612. static av_cold int sonic_decode_init(AVCodecContext *avctx)
  613. {
  614. SonicContext *s = avctx->priv_data;
  615. GetBitContext gb;
  616. int i, version;
  617. s->channels = avctx->channels;
  618. s->samplerate = avctx->sample_rate;
  619. avcodec_get_frame_defaults(&s->frame);
  620. avctx->coded_frame = &s->frame;
  621. if (!avctx->extradata)
  622. {
  623. av_log(avctx, AV_LOG_ERROR, "No mandatory headers present\n");
  624. return -1;
  625. }
  626. init_get_bits(&gb, avctx->extradata, avctx->extradata_size);
  627. version = get_bits(&gb, 2);
  628. if (version > 1)
  629. {
  630. av_log(avctx, AV_LOG_ERROR, "Unsupported Sonic version, please report\n");
  631. return -1;
  632. }
  633. if (version == 1)
  634. {
  635. s->channels = get_bits(&gb, 2);
  636. s->samplerate = samplerate_table[get_bits(&gb, 4)];
  637. av_log(avctx, AV_LOG_INFO, "Sonicv2 chans: %d samprate: %d\n",
  638. s->channels, s->samplerate);
  639. }
  640. if (s->channels > MAX_CHANNELS)
  641. {
  642. av_log(avctx, AV_LOG_ERROR, "Only mono and stereo streams are supported by now\n");
  643. return -1;
  644. }
  645. s->lossless = get_bits1(&gb);
  646. if (!s->lossless)
  647. skip_bits(&gb, 3); // XXX FIXME
  648. s->decorrelation = get_bits(&gb, 2);
  649. s->downsampling = get_bits(&gb, 2);
  650. if (!s->downsampling) {
  651. av_log(avctx, AV_LOG_ERROR, "invalid downsampling value\n");
  652. return AVERROR_INVALIDDATA;
  653. }
  654. s->num_taps = (get_bits(&gb, 5)+1)<<5;
  655. if (get_bits1(&gb)) // XXX FIXME
  656. av_log(avctx, AV_LOG_INFO, "Custom quant table\n");
  657. s->block_align = (int)(2048.0*s->samplerate/44100)/s->downsampling;
  658. s->frame_size = s->channels*s->block_align*s->downsampling;
  659. // avctx->frame_size = s->block_align;
  660. av_log(avctx, AV_LOG_INFO, "Sonic: ver: %d ls: %d dr: %d taps: %d block: %d frame: %d downsamp: %d\n",
  661. version, s->lossless, s->decorrelation, s->num_taps, s->block_align, s->frame_size, s->downsampling);
  662. // generate taps
  663. s->tap_quant = av_mallocz(4* s->num_taps);
  664. for (i = 0; i < s->num_taps; i++)
  665. s->tap_quant[i] = (int)(sqrt(i+1));
  666. s->predictor_k = av_mallocz(4* s->num_taps);
  667. for (i = 0; i < s->channels; i++)
  668. {
  669. s->predictor_state[i] = av_mallocz(4* s->num_taps);
  670. if (!s->predictor_state[i])
  671. return -1;
  672. }
  673. for (i = 0; i < s->channels; i++)
  674. {
  675. s->coded_samples[i] = av_mallocz(4* s->block_align);
  676. if (!s->coded_samples[i])
  677. return -1;
  678. }
  679. s->int_samples = av_mallocz(4* s->frame_size);
  680. avctx->sample_fmt = AV_SAMPLE_FMT_S16;
  681. return 0;
  682. }
  683. static av_cold int sonic_decode_close(AVCodecContext *avctx)
  684. {
  685. SonicContext *s = avctx->priv_data;
  686. int i;
  687. av_free(s->int_samples);
  688. av_free(s->tap_quant);
  689. av_free(s->predictor_k);
  690. for (i = 0; i < s->channels; i++)
  691. {
  692. av_free(s->predictor_state[i]);
  693. av_free(s->coded_samples[i]);
  694. }
  695. return 0;
  696. }
  697. static int sonic_decode_frame(AVCodecContext *avctx,
  698. void *data, int *got_frame_ptr,
  699. AVPacket *avpkt)
  700. {
  701. const uint8_t *buf = avpkt->data;
  702. int buf_size = avpkt->size;
  703. SonicContext *s = avctx->priv_data;
  704. GetBitContext gb;
  705. int i, quant, ch, j, ret;
  706. int16_t *samples;
  707. if (buf_size == 0) return 0;
  708. s->frame.nb_samples = s->frame_size;
  709. if ((ret = avctx->get_buffer(avctx, &s->frame)) < 0) {
  710. av_log(avctx, AV_LOG_ERROR, "get_buffer() failed\n");
  711. return ret;
  712. }
  713. samples = (int16_t *)s->frame.data[0];
  714. // av_log(NULL, AV_LOG_INFO, "buf_size: %d\n", buf_size);
  715. init_get_bits(&gb, buf, buf_size*8);
  716. intlist_read(&gb, s->predictor_k, s->num_taps, 0);
  717. // dequantize
  718. for (i = 0; i < s->num_taps; i++)
  719. s->predictor_k[i] *= s->tap_quant[i];
  720. if (s->lossless)
  721. quant = 1;
  722. else
  723. quant = get_ue_golomb(&gb) * SAMPLE_FACTOR;
  724. // av_log(NULL, AV_LOG_INFO, "quant: %d\n", quant);
  725. for (ch = 0; ch < s->channels; ch++)
  726. {
  727. int x = ch;
  728. predictor_init_state(s->predictor_k, s->predictor_state[ch], s->num_taps);
  729. intlist_read(&gb, s->coded_samples[ch], s->block_align, 1);
  730. for (i = 0; i < s->block_align; i++)
  731. {
  732. for (j = 0; j < s->downsampling - 1; j++)
  733. {
  734. s->int_samples[x] = predictor_calc_error(s->predictor_k, s->predictor_state[ch], s->num_taps, 0);
  735. x += s->channels;
  736. }
  737. s->int_samples[x] = predictor_calc_error(s->predictor_k, s->predictor_state[ch], s->num_taps, s->coded_samples[ch][i] * quant);
  738. x += s->channels;
  739. }
  740. for (i = 0; i < s->num_taps; i++)
  741. s->predictor_state[ch][i] = s->int_samples[s->frame_size - s->channels + ch - i*s->channels];
  742. }
  743. switch(s->decorrelation)
  744. {
  745. case MID_SIDE:
  746. for (i = 0; i < s->frame_size; i += s->channels)
  747. {
  748. s->int_samples[i+1] += shift(s->int_samples[i], 1);
  749. s->int_samples[i] -= s->int_samples[i+1];
  750. }
  751. break;
  752. case LEFT_SIDE:
  753. for (i = 0; i < s->frame_size; i += s->channels)
  754. s->int_samples[i+1] += s->int_samples[i];
  755. break;
  756. case RIGHT_SIDE:
  757. for (i = 0; i < s->frame_size; i += s->channels)
  758. s->int_samples[i] += s->int_samples[i+1];
  759. break;
  760. }
  761. if (!s->lossless)
  762. for (i = 0; i < s->frame_size; i++)
  763. s->int_samples[i] = shift(s->int_samples[i], SAMPLE_SHIFT);
  764. // internal -> short
  765. for (i = 0; i < s->frame_size; i++)
  766. samples[i] = av_clip_int16(s->int_samples[i]);
  767. align_get_bits(&gb);
  768. *got_frame_ptr = 1;
  769. *(AVFrame*)data = s->frame;
  770. return (get_bits_count(&gb)+7)/8;
  771. }
  772. AVCodec ff_sonic_decoder = {
  773. .name = "sonic",
  774. .type = AVMEDIA_TYPE_AUDIO,
  775. .id = AV_CODEC_ID_SONIC,
  776. .priv_data_size = sizeof(SonicContext),
  777. .init = sonic_decode_init,
  778. .close = sonic_decode_close,
  779. .decode = sonic_decode_frame,
  780. .capabilities = CODEC_CAP_DR1 | CODEC_CAP_EXPERIMENTAL,
  781. .long_name = NULL_IF_CONFIG_SMALL("Sonic"),
  782. };
  783. #endif /* CONFIG_SONIC_DECODER */
  784. #if CONFIG_SONIC_ENCODER
  785. AVCodec ff_sonic_encoder = {
  786. .name = "sonic",
  787. .type = AVMEDIA_TYPE_AUDIO,
  788. .id = AV_CODEC_ID_SONIC,
  789. .priv_data_size = sizeof(SonicContext),
  790. .init = sonic_encode_init,
  791. .encode2 = sonic_encode_frame,
  792. .capabilities = CODEC_CAP_EXPERIMENTAL,
  793. .close = sonic_encode_close,
  794. .long_name = NULL_IF_CONFIG_SMALL("Sonic"),
  795. };
  796. #endif
  797. #if CONFIG_SONIC_LS_ENCODER
  798. AVCodec ff_sonic_ls_encoder = {
  799. .name = "sonicls",
  800. .type = AVMEDIA_TYPE_AUDIO,
  801. .id = AV_CODEC_ID_SONIC_LS,
  802. .priv_data_size = sizeof(SonicContext),
  803. .init = sonic_encode_init,
  804. .encode2 = sonic_encode_frame,
  805. .capabilities = CODEC_CAP_EXPERIMENTAL,
  806. .close = sonic_encode_close,
  807. .long_name = NULL_IF_CONFIG_SMALL("Sonic lossless"),
  808. };
  809. #endif