You can not select more than 25 topics Topics must start with a letter or number, can include dashes ('-') and can be up to 35 characters long.

1118 lines
30KB

  1. /*
  2. * Simple free lossless/lossy audio codec
  3. * Copyright (c) 2004 Alex Beregszaszi
  4. *
  5. * This file is part of FFmpeg.
  6. *
  7. * FFmpeg is free software; you can redistribute it and/or
  8. * modify it under the terms of the GNU Lesser General Public
  9. * License as published by the Free Software Foundation; either
  10. * version 2.1 of the License, or (at your option) any later version.
  11. *
  12. * FFmpeg is distributed in the hope that it will be useful,
  13. * but WITHOUT ANY WARRANTY; without even the implied warranty of
  14. * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
  15. * Lesser General Public License for more details.
  16. *
  17. * You should have received a copy of the GNU Lesser General Public
  18. * License along with FFmpeg; if not, write to the Free Software
  19. * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
  20. */
  21. #include "avcodec.h"
  22. #include "get_bits.h"
  23. #include "golomb.h"
  24. #include "internal.h"
  25. #include "rangecoder.h"
  26. /**
  27. * @file
  28. * Simple free lossless/lossy audio codec
  29. * Based on Paul Francis Harrison's Bonk (http://www.logarithmic.net/pfh/bonk)
  30. * Written and designed by Alex Beregszaszi
  31. *
  32. * TODO:
  33. * - CABAC put/get_symbol
  34. * - independent quantizer for channels
  35. * - >2 channels support
  36. * - more decorrelation types
  37. * - more tap_quant tests
  38. * - selectable intlist writers/readers (bonk-style, golomb, cabac)
  39. */
  40. #define MAX_CHANNELS 2
  41. #define MID_SIDE 0
  42. #define LEFT_SIDE 1
  43. #define RIGHT_SIDE 2
  44. typedef struct SonicContext {
  45. int version;
  46. int minor_version;
  47. int lossless, decorrelation;
  48. int num_taps, downsampling;
  49. double quantization;
  50. int channels, samplerate, block_align, frame_size;
  51. int *tap_quant;
  52. int *int_samples;
  53. int *coded_samples[MAX_CHANNELS];
  54. // for encoding
  55. int *tail;
  56. int tail_size;
  57. int *window;
  58. int window_size;
  59. // for decoding
  60. int *predictor_k;
  61. int *predictor_state[MAX_CHANNELS];
  62. } SonicContext;
  63. #define LATTICE_SHIFT 10
  64. #define SAMPLE_SHIFT 4
  65. #define LATTICE_FACTOR (1 << LATTICE_SHIFT)
  66. #define SAMPLE_FACTOR (1 << SAMPLE_SHIFT)
  67. #define BASE_QUANT 0.6
  68. #define RATE_VARIATION 3.0
  69. static inline int shift(int a,int b)
  70. {
  71. return (a+(1<<(b-1))) >> b;
  72. }
  73. static inline int shift_down(int a,int b)
  74. {
  75. return (a>>b)+(a<0);
  76. }
  77. static av_always_inline av_flatten void put_symbol(RangeCoder *c, uint8_t *state, int v, int is_signed, uint64_t rc_stat[256][2], uint64_t rc_stat2[32][2]){
  78. int i;
  79. #define put_rac(C,S,B) \
  80. do{\
  81. if(rc_stat){\
  82. rc_stat[*(S)][B]++;\
  83. rc_stat2[(S)-state][B]++;\
  84. }\
  85. put_rac(C,S,B);\
  86. }while(0)
  87. if(v){
  88. const int a= FFABS(v);
  89. const int e= av_log2(a);
  90. put_rac(c, state+0, 0);
  91. if(e<=9){
  92. for(i=0; i<e; i++){
  93. put_rac(c, state+1+i, 1); //1..10
  94. }
  95. put_rac(c, state+1+i, 0);
  96. for(i=e-1; i>=0; i--){
  97. put_rac(c, state+22+i, (a>>i)&1); //22..31
  98. }
  99. if(is_signed)
  100. put_rac(c, state+11 + e, v < 0); //11..21
  101. }else{
  102. for(i=0; i<e; i++){
  103. put_rac(c, state+1+FFMIN(i,9), 1); //1..10
  104. }
  105. put_rac(c, state+1+9, 0);
  106. for(i=e-1; i>=0; i--){
  107. put_rac(c, state+22+FFMIN(i,9), (a>>i)&1); //22..31
  108. }
  109. if(is_signed)
  110. put_rac(c, state+11 + 10, v < 0); //11..21
  111. }
  112. }else{
  113. put_rac(c, state+0, 1);
  114. }
  115. #undef put_rac
  116. }
  117. static inline av_flatten int get_symbol(RangeCoder *c, uint8_t *state, int is_signed){
  118. if(get_rac(c, state+0))
  119. return 0;
  120. else{
  121. int i, e;
  122. unsigned a;
  123. e= 0;
  124. while(get_rac(c, state+1 + FFMIN(e,9))){ //1..10
  125. e++;
  126. if (e > 31)
  127. return AVERROR_INVALIDDATA;
  128. }
  129. a= 1;
  130. for(i=e-1; i>=0; i--){
  131. a += a + get_rac(c, state+22 + FFMIN(i,9)); //22..31
  132. }
  133. e= -(is_signed && get_rac(c, state+11 + FFMIN(e, 10))); //11..21
  134. return (a^e)-e;
  135. }
  136. }
  137. #if 1
  138. static inline int intlist_write(RangeCoder *c, uint8_t *state, int *buf, int entries, int base_2_part)
  139. {
  140. int i;
  141. for (i = 0; i < entries; i++)
  142. put_symbol(c, state, buf[i], 1, NULL, NULL);
  143. return 1;
  144. }
  145. static inline int intlist_read(RangeCoder *c, uint8_t *state, int *buf, int entries, int base_2_part)
  146. {
  147. int i;
  148. for (i = 0; i < entries; i++)
  149. buf[i] = get_symbol(c, state, 1);
  150. return 1;
  151. }
  152. #elif 1
  153. static inline int intlist_write(PutBitContext *pb, int *buf, int entries, int base_2_part)
  154. {
  155. int i;
  156. for (i = 0; i < entries; i++)
  157. set_se_golomb(pb, buf[i]);
  158. return 1;
  159. }
  160. static inline int intlist_read(GetBitContext *gb, int *buf, int entries, int base_2_part)
  161. {
  162. int i;
  163. for (i = 0; i < entries; i++)
  164. buf[i] = get_se_golomb(gb);
  165. return 1;
  166. }
  167. #else
  168. #define ADAPT_LEVEL 8
  169. static int bits_to_store(uint64_t x)
  170. {
  171. int res = 0;
  172. while(x)
  173. {
  174. res++;
  175. x >>= 1;
  176. }
  177. return res;
  178. }
  179. static void write_uint_max(PutBitContext *pb, unsigned int value, unsigned int max)
  180. {
  181. int i, bits;
  182. if (!max)
  183. return;
  184. bits = bits_to_store(max);
  185. for (i = 0; i < bits-1; i++)
  186. put_bits(pb, 1, value & (1 << i));
  187. if ( (value | (1 << (bits-1))) <= max)
  188. put_bits(pb, 1, value & (1 << (bits-1)));
  189. }
  190. static unsigned int read_uint_max(GetBitContext *gb, int max)
  191. {
  192. int i, bits, value = 0;
  193. if (!max)
  194. return 0;
  195. bits = bits_to_store(max);
  196. for (i = 0; i < bits-1; i++)
  197. if (get_bits1(gb))
  198. value += 1 << i;
  199. if ( (value | (1<<(bits-1))) <= max)
  200. if (get_bits1(gb))
  201. value += 1 << (bits-1);
  202. return value;
  203. }
  204. static int intlist_write(PutBitContext *pb, int *buf, int entries, int base_2_part)
  205. {
  206. int i, j, x = 0, low_bits = 0, max = 0;
  207. int step = 256, pos = 0, dominant = 0, any = 0;
  208. int *copy, *bits;
  209. copy = av_calloc(entries, sizeof(*copy));
  210. if (!copy)
  211. return AVERROR(ENOMEM);
  212. if (base_2_part)
  213. {
  214. int energy = 0;
  215. for (i = 0; i < entries; i++)
  216. energy += abs(buf[i]);
  217. low_bits = bits_to_store(energy / (entries * 2));
  218. if (low_bits > 15)
  219. low_bits = 15;
  220. put_bits(pb, 4, low_bits);
  221. }
  222. for (i = 0; i < entries; i++)
  223. {
  224. put_bits(pb, low_bits, abs(buf[i]));
  225. copy[i] = abs(buf[i]) >> low_bits;
  226. if (copy[i] > max)
  227. max = abs(copy[i]);
  228. }
  229. bits = av_calloc(entries*max, sizeof(*bits));
  230. if (!bits)
  231. {
  232. av_free(copy);
  233. return AVERROR(ENOMEM);
  234. }
  235. for (i = 0; i <= max; i++)
  236. {
  237. for (j = 0; j < entries; j++)
  238. if (copy[j] >= i)
  239. bits[x++] = copy[j] > i;
  240. }
  241. // store bitstream
  242. while (pos < x)
  243. {
  244. int steplet = step >> 8;
  245. if (pos + steplet > x)
  246. steplet = x - pos;
  247. for (i = 0; i < steplet; i++)
  248. if (bits[i+pos] != dominant)
  249. any = 1;
  250. put_bits(pb, 1, any);
  251. if (!any)
  252. {
  253. pos += steplet;
  254. step += step / ADAPT_LEVEL;
  255. }
  256. else
  257. {
  258. int interloper = 0;
  259. while (((pos + interloper) < x) && (bits[pos + interloper] == dominant))
  260. interloper++;
  261. // note change
  262. write_uint_max(pb, interloper, (step >> 8) - 1);
  263. pos += interloper + 1;
  264. step -= step / ADAPT_LEVEL;
  265. }
  266. if (step < 256)
  267. {
  268. step = 65536 / step;
  269. dominant = !dominant;
  270. }
  271. }
  272. // store signs
  273. for (i = 0; i < entries; i++)
  274. if (buf[i])
  275. put_bits(pb, 1, buf[i] < 0);
  276. av_free(bits);
  277. av_free(copy);
  278. return 0;
  279. }
  280. static int intlist_read(GetBitContext *gb, int *buf, int entries, int base_2_part)
  281. {
  282. int i, low_bits = 0, x = 0;
  283. int n_zeros = 0, step = 256, dominant = 0;
  284. int pos = 0, level = 0;
  285. int *bits = av_calloc(entries, sizeof(*bits));
  286. if (!bits)
  287. return AVERROR(ENOMEM);
  288. if (base_2_part)
  289. {
  290. low_bits = get_bits(gb, 4);
  291. if (low_bits)
  292. for (i = 0; i < entries; i++)
  293. buf[i] = get_bits(gb, low_bits);
  294. }
  295. // av_log(NULL, AV_LOG_INFO, "entries: %d, low bits: %d\n", entries, low_bits);
  296. while (n_zeros < entries)
  297. {
  298. int steplet = step >> 8;
  299. if (!get_bits1(gb))
  300. {
  301. for (i = 0; i < steplet; i++)
  302. bits[x++] = dominant;
  303. if (!dominant)
  304. n_zeros += steplet;
  305. step += step / ADAPT_LEVEL;
  306. }
  307. else
  308. {
  309. int actual_run = read_uint_max(gb, steplet-1);
  310. // av_log(NULL, AV_LOG_INFO, "actual run: %d\n", actual_run);
  311. for (i = 0; i < actual_run; i++)
  312. bits[x++] = dominant;
  313. bits[x++] = !dominant;
  314. if (!dominant)
  315. n_zeros += actual_run;
  316. else
  317. n_zeros++;
  318. step -= step / ADAPT_LEVEL;
  319. }
  320. if (step < 256)
  321. {
  322. step = 65536 / step;
  323. dominant = !dominant;
  324. }
  325. }
  326. // reconstruct unsigned values
  327. n_zeros = 0;
  328. for (i = 0; n_zeros < entries; i++)
  329. {
  330. while(1)
  331. {
  332. if (pos >= entries)
  333. {
  334. pos = 0;
  335. level += 1 << low_bits;
  336. }
  337. if (buf[pos] >= level)
  338. break;
  339. pos++;
  340. }
  341. if (bits[i])
  342. buf[pos] += 1 << low_bits;
  343. else
  344. n_zeros++;
  345. pos++;
  346. }
  347. av_free(bits);
  348. // read signs
  349. for (i = 0; i < entries; i++)
  350. if (buf[i] && get_bits1(gb))
  351. buf[i] = -buf[i];
  352. // av_log(NULL, AV_LOG_INFO, "zeros: %d pos: %d\n", n_zeros, pos);
  353. return 0;
  354. }
  355. #endif
  356. static void predictor_init_state(int *k, int *state, int order)
  357. {
  358. int i;
  359. for (i = order-2; i >= 0; i--)
  360. {
  361. int j, p, x = state[i];
  362. for (j = 0, p = i+1; p < order; j++,p++)
  363. {
  364. int tmp = x + shift_down(k[j] * (unsigned)state[p], LATTICE_SHIFT);
  365. state[p] += shift_down(k[j]* (unsigned)x, LATTICE_SHIFT);
  366. x = tmp;
  367. }
  368. }
  369. }
  370. static int predictor_calc_error(int *k, int *state, int order, int error)
  371. {
  372. int i, x = error - shift_down(k[order-1] * (unsigned)state[order-1], LATTICE_SHIFT);
  373. #if 1
  374. int *k_ptr = &(k[order-2]),
  375. *state_ptr = &(state[order-2]);
  376. for (i = order-2; i >= 0; i--, k_ptr--, state_ptr--)
  377. {
  378. int k_value = *k_ptr, state_value = *state_ptr;
  379. x -= (unsigned)shift_down(k_value * (unsigned)state_value, LATTICE_SHIFT);
  380. state_ptr[1] = state_value + shift_down(k_value * (unsigned)x, LATTICE_SHIFT);
  381. }
  382. #else
  383. for (i = order-2; i >= 0; i--)
  384. {
  385. x -= (unsigned)shift_down(k[i] * state[i], LATTICE_SHIFT);
  386. state[i+1] = state[i] + shift_down(k[i] * x, LATTICE_SHIFT);
  387. }
  388. #endif
  389. // don't drift too far, to avoid overflows
  390. if (x > (SAMPLE_FACTOR<<16)) x = (SAMPLE_FACTOR<<16);
  391. if (x < -(SAMPLE_FACTOR<<16)) x = -(SAMPLE_FACTOR<<16);
  392. state[0] = x;
  393. return x;
  394. }
  395. #if CONFIG_SONIC_ENCODER || CONFIG_SONIC_LS_ENCODER
  396. // Heavily modified Levinson-Durbin algorithm which
  397. // copes better with quantization, and calculates the
  398. // actual whitened result as it goes.
  399. static void modified_levinson_durbin(int *window, int window_entries,
  400. int *out, int out_entries, int channels, int *tap_quant)
  401. {
  402. int i;
  403. int *state = window + window_entries;
  404. memcpy(state, window, window_entries * sizeof(*state));
  405. for (i = 0; i < out_entries; i++)
  406. {
  407. int step = (i+1)*channels, k, j;
  408. double xx = 0.0, xy = 0.0;
  409. #if 1
  410. int *x_ptr = &(window[step]);
  411. int *state_ptr = &(state[0]);
  412. j = window_entries - step;
  413. for (;j>0;j--,x_ptr++,state_ptr++)
  414. {
  415. double x_value = *x_ptr;
  416. double state_value = *state_ptr;
  417. xx += state_value*state_value;
  418. xy += x_value*state_value;
  419. }
  420. #else
  421. for (j = 0; j <= (window_entries - step); j++);
  422. {
  423. double stepval = window[step+j];
  424. double stateval = window[j];
  425. // xx += (double)window[j]*(double)window[j];
  426. // xy += (double)window[step+j]*(double)window[j];
  427. xx += stateval*stateval;
  428. xy += stepval*stateval;
  429. }
  430. #endif
  431. if (xx == 0.0)
  432. k = 0;
  433. else
  434. k = (int)(floor(-xy/xx * (double)LATTICE_FACTOR / (double)(tap_quant[i]) + 0.5));
  435. if (k > (LATTICE_FACTOR/tap_quant[i]))
  436. k = LATTICE_FACTOR/tap_quant[i];
  437. if (-k > (LATTICE_FACTOR/tap_quant[i]))
  438. k = -(LATTICE_FACTOR/tap_quant[i]);
  439. out[i] = k;
  440. k *= tap_quant[i];
  441. #if 1
  442. x_ptr = &(window[step]);
  443. state_ptr = &(state[0]);
  444. j = window_entries - step;
  445. for (;j>0;j--,x_ptr++,state_ptr++)
  446. {
  447. int x_value = *x_ptr;
  448. int state_value = *state_ptr;
  449. *x_ptr = x_value + shift_down(k*state_value,LATTICE_SHIFT);
  450. *state_ptr = state_value + shift_down(k*x_value, LATTICE_SHIFT);
  451. }
  452. #else
  453. for (j=0; j <= (window_entries - step); j++)
  454. {
  455. int stepval = window[step+j];
  456. int stateval=state[j];
  457. window[step+j] += shift_down(k * stateval, LATTICE_SHIFT);
  458. state[j] += shift_down(k * stepval, LATTICE_SHIFT);
  459. }
  460. #endif
  461. }
  462. }
  463. static inline int code_samplerate(int samplerate)
  464. {
  465. switch (samplerate)
  466. {
  467. case 44100: return 0;
  468. case 22050: return 1;
  469. case 11025: return 2;
  470. case 96000: return 3;
  471. case 48000: return 4;
  472. case 32000: return 5;
  473. case 24000: return 6;
  474. case 16000: return 7;
  475. case 8000: return 8;
  476. }
  477. return AVERROR(EINVAL);
  478. }
  479. static av_cold int sonic_encode_init(AVCodecContext *avctx)
  480. {
  481. SonicContext *s = avctx->priv_data;
  482. int *coded_samples;
  483. PutBitContext pb;
  484. int i;
  485. s->version = 2;
  486. if (avctx->channels > MAX_CHANNELS)
  487. {
  488. av_log(avctx, AV_LOG_ERROR, "Only mono and stereo streams are supported by now\n");
  489. return AVERROR(EINVAL); /* only stereo or mono for now */
  490. }
  491. if (avctx->channels == 2)
  492. s->decorrelation = MID_SIDE;
  493. else
  494. s->decorrelation = 3;
  495. if (avctx->codec->id == AV_CODEC_ID_SONIC_LS)
  496. {
  497. s->lossless = 1;
  498. s->num_taps = 32;
  499. s->downsampling = 1;
  500. s->quantization = 0.0;
  501. }
  502. else
  503. {
  504. s->num_taps = 128;
  505. s->downsampling = 2;
  506. s->quantization = 1.0;
  507. }
  508. // max tap 2048
  509. if (s->num_taps < 32 || s->num_taps > 1024 || s->num_taps % 32) {
  510. av_log(avctx, AV_LOG_ERROR, "Invalid number of taps\n");
  511. return AVERROR_INVALIDDATA;
  512. }
  513. // generate taps
  514. s->tap_quant = av_calloc(s->num_taps, sizeof(*s->tap_quant));
  515. if (!s->tap_quant)
  516. return AVERROR(ENOMEM);
  517. for (i = 0; i < s->num_taps; i++)
  518. s->tap_quant[i] = ff_sqrt(i+1);
  519. s->channels = avctx->channels;
  520. s->samplerate = avctx->sample_rate;
  521. s->block_align = 2048LL*s->samplerate/(44100*s->downsampling);
  522. s->frame_size = s->channels*s->block_align*s->downsampling;
  523. s->tail_size = s->num_taps*s->channels;
  524. s->tail = av_calloc(s->tail_size, sizeof(*s->tail));
  525. if (!s->tail)
  526. return AVERROR(ENOMEM);
  527. s->predictor_k = av_calloc(s->num_taps, sizeof(*s->predictor_k) );
  528. if (!s->predictor_k)
  529. return AVERROR(ENOMEM);
  530. coded_samples = av_calloc(s->block_align, s->channels * sizeof(**s->coded_samples));
  531. if (!coded_samples)
  532. return AVERROR(ENOMEM);
  533. for (i = 0; i < s->channels; i++, coded_samples += s->block_align)
  534. s->coded_samples[i] = coded_samples;
  535. s->int_samples = av_calloc(s->frame_size, sizeof(*s->int_samples));
  536. s->window_size = ((2*s->tail_size)+s->frame_size);
  537. s->window = av_calloc(s->window_size, 2 * sizeof(*s->window));
  538. if (!s->window || !s->int_samples)
  539. return AVERROR(ENOMEM);
  540. avctx->extradata = av_mallocz(16);
  541. if (!avctx->extradata)
  542. return AVERROR(ENOMEM);
  543. init_put_bits(&pb, avctx->extradata, 16*8);
  544. put_bits(&pb, 2, s->version); // version
  545. if (s->version >= 1)
  546. {
  547. if (s->version >= 2) {
  548. put_bits(&pb, 8, s->version);
  549. put_bits(&pb, 8, s->minor_version);
  550. }
  551. put_bits(&pb, 2, s->channels);
  552. put_bits(&pb, 4, code_samplerate(s->samplerate));
  553. }
  554. put_bits(&pb, 1, s->lossless);
  555. if (!s->lossless)
  556. put_bits(&pb, 3, SAMPLE_SHIFT); // XXX FIXME: sample precision
  557. put_bits(&pb, 2, s->decorrelation);
  558. put_bits(&pb, 2, s->downsampling);
  559. put_bits(&pb, 5, (s->num_taps >> 5)-1); // 32..1024
  560. put_bits(&pb, 1, 0); // XXX FIXME: no custom tap quant table
  561. flush_put_bits(&pb);
  562. avctx->extradata_size = put_bits_count(&pb)/8;
  563. av_log(avctx, AV_LOG_INFO, "Sonic: ver: %d.%d ls: %d dr: %d taps: %d block: %d frame: %d downsamp: %d\n",
  564. s->version, s->minor_version, s->lossless, s->decorrelation, s->num_taps, s->block_align, s->frame_size, s->downsampling);
  565. avctx->frame_size = s->block_align*s->downsampling;
  566. return 0;
  567. }
  568. static av_cold int sonic_encode_close(AVCodecContext *avctx)
  569. {
  570. SonicContext *s = avctx->priv_data;
  571. av_freep(&s->coded_samples[0]);
  572. av_freep(&s->predictor_k);
  573. av_freep(&s->tail);
  574. av_freep(&s->tap_quant);
  575. av_freep(&s->window);
  576. av_freep(&s->int_samples);
  577. return 0;
  578. }
  579. static int sonic_encode_frame(AVCodecContext *avctx, AVPacket *avpkt,
  580. const AVFrame *frame, int *got_packet_ptr)
  581. {
  582. SonicContext *s = avctx->priv_data;
  583. RangeCoder c;
  584. int i, j, ch, quant = 0, x = 0;
  585. int ret;
  586. const short *samples = (const int16_t*)frame->data[0];
  587. uint8_t state[32];
  588. if ((ret = ff_alloc_packet2(avctx, avpkt, s->frame_size * 5 + 1000, 0)) < 0)
  589. return ret;
  590. ff_init_range_encoder(&c, avpkt->data, avpkt->size);
  591. ff_build_rac_states(&c, 0.05*(1LL<<32), 256-8);
  592. memset(state, 128, sizeof(state));
  593. // short -> internal
  594. for (i = 0; i < s->frame_size; i++)
  595. s->int_samples[i] = samples[i];
  596. if (!s->lossless)
  597. for (i = 0; i < s->frame_size; i++)
  598. s->int_samples[i] = s->int_samples[i] << SAMPLE_SHIFT;
  599. switch(s->decorrelation)
  600. {
  601. case MID_SIDE:
  602. for (i = 0; i < s->frame_size; i += s->channels)
  603. {
  604. s->int_samples[i] += s->int_samples[i+1];
  605. s->int_samples[i+1] -= shift(s->int_samples[i], 1);
  606. }
  607. break;
  608. case LEFT_SIDE:
  609. for (i = 0; i < s->frame_size; i += s->channels)
  610. s->int_samples[i+1] -= s->int_samples[i];
  611. break;
  612. case RIGHT_SIDE:
  613. for (i = 0; i < s->frame_size; i += s->channels)
  614. s->int_samples[i] -= s->int_samples[i+1];
  615. break;
  616. }
  617. memset(s->window, 0, s->window_size * sizeof(*s->window));
  618. for (i = 0; i < s->tail_size; i++)
  619. s->window[x++] = s->tail[i];
  620. for (i = 0; i < s->frame_size; i++)
  621. s->window[x++] = s->int_samples[i];
  622. for (i = 0; i < s->tail_size; i++)
  623. s->window[x++] = 0;
  624. for (i = 0; i < s->tail_size; i++)
  625. s->tail[i] = s->int_samples[s->frame_size - s->tail_size + i];
  626. // generate taps
  627. modified_levinson_durbin(s->window, s->window_size,
  628. s->predictor_k, s->num_taps, s->channels, s->tap_quant);
  629. if ((ret = intlist_write(&c, state, s->predictor_k, s->num_taps, 0)) < 0)
  630. return ret;
  631. for (ch = 0; ch < s->channels; ch++)
  632. {
  633. x = s->tail_size+ch;
  634. for (i = 0; i < s->block_align; i++)
  635. {
  636. int sum = 0;
  637. for (j = 0; j < s->downsampling; j++, x += s->channels)
  638. sum += s->window[x];
  639. s->coded_samples[ch][i] = sum;
  640. }
  641. }
  642. // simple rate control code
  643. if (!s->lossless)
  644. {
  645. double energy1 = 0.0, energy2 = 0.0;
  646. for (ch = 0; ch < s->channels; ch++)
  647. {
  648. for (i = 0; i < s->block_align; i++)
  649. {
  650. double sample = s->coded_samples[ch][i];
  651. energy2 += sample*sample;
  652. energy1 += fabs(sample);
  653. }
  654. }
  655. energy2 = sqrt(energy2/(s->channels*s->block_align));
  656. energy1 = M_SQRT2*energy1/(s->channels*s->block_align);
  657. // increase bitrate when samples are like a gaussian distribution
  658. // reduce bitrate when samples are like a two-tailed exponential distribution
  659. if (energy2 > energy1)
  660. energy2 += (energy2-energy1)*RATE_VARIATION;
  661. quant = (int)(BASE_QUANT*s->quantization*energy2/SAMPLE_FACTOR);
  662. // av_log(avctx, AV_LOG_DEBUG, "quant: %d energy: %f / %f\n", quant, energy1, energy2);
  663. quant = av_clip(quant, 1, 65534);
  664. put_symbol(&c, state, quant, 0, NULL, NULL);
  665. quant *= SAMPLE_FACTOR;
  666. }
  667. // write out coded samples
  668. for (ch = 0; ch < s->channels; ch++)
  669. {
  670. if (!s->lossless)
  671. for (i = 0; i < s->block_align; i++)
  672. s->coded_samples[ch][i] = ROUNDED_DIV(s->coded_samples[ch][i], quant);
  673. if ((ret = intlist_write(&c, state, s->coded_samples[ch], s->block_align, 1)) < 0)
  674. return ret;
  675. }
  676. avpkt->size = ff_rac_terminate(&c, 0);
  677. *got_packet_ptr = 1;
  678. return 0;
  679. }
  680. #endif /* CONFIG_SONIC_ENCODER || CONFIG_SONIC_LS_ENCODER */
  681. #if CONFIG_SONIC_DECODER
  682. static const int samplerate_table[] =
  683. { 44100, 22050, 11025, 96000, 48000, 32000, 24000, 16000, 8000 };
  684. static av_cold int sonic_decode_init(AVCodecContext *avctx)
  685. {
  686. SonicContext *s = avctx->priv_data;
  687. int *tmp;
  688. GetBitContext gb;
  689. int i;
  690. int ret;
  691. s->channels = avctx->channels;
  692. s->samplerate = avctx->sample_rate;
  693. if (!avctx->extradata)
  694. {
  695. av_log(avctx, AV_LOG_ERROR, "No mandatory headers present\n");
  696. return AVERROR_INVALIDDATA;
  697. }
  698. ret = init_get_bits8(&gb, avctx->extradata, avctx->extradata_size);
  699. if (ret < 0)
  700. return ret;
  701. s->version = get_bits(&gb, 2);
  702. if (s->version >= 2) {
  703. s->version = get_bits(&gb, 8);
  704. s->minor_version = get_bits(&gb, 8);
  705. }
  706. if (s->version != 2)
  707. {
  708. av_log(avctx, AV_LOG_ERROR, "Unsupported Sonic version, please report\n");
  709. return AVERROR_INVALIDDATA;
  710. }
  711. if (s->version >= 1)
  712. {
  713. int sample_rate_index;
  714. s->channels = get_bits(&gb, 2);
  715. sample_rate_index = get_bits(&gb, 4);
  716. if (sample_rate_index >= FF_ARRAY_ELEMS(samplerate_table)) {
  717. av_log(avctx, AV_LOG_ERROR, "Invalid sample_rate_index %d\n", sample_rate_index);
  718. return AVERROR_INVALIDDATA;
  719. }
  720. s->samplerate = samplerate_table[sample_rate_index];
  721. av_log(avctx, AV_LOG_INFO, "Sonicv2 chans: %d samprate: %d\n",
  722. s->channels, s->samplerate);
  723. }
  724. if (s->channels > MAX_CHANNELS || s->channels < 1)
  725. {
  726. av_log(avctx, AV_LOG_ERROR, "Only mono and stereo streams are supported by now\n");
  727. return AVERROR_INVALIDDATA;
  728. }
  729. avctx->channels = s->channels;
  730. s->lossless = get_bits1(&gb);
  731. if (!s->lossless)
  732. skip_bits(&gb, 3); // XXX FIXME
  733. s->decorrelation = get_bits(&gb, 2);
  734. if (s->decorrelation != 3 && s->channels != 2) {
  735. av_log(avctx, AV_LOG_ERROR, "invalid decorrelation %d\n", s->decorrelation);
  736. return AVERROR_INVALIDDATA;
  737. }
  738. s->downsampling = get_bits(&gb, 2);
  739. if (!s->downsampling) {
  740. av_log(avctx, AV_LOG_ERROR, "invalid downsampling value\n");
  741. return AVERROR_INVALIDDATA;
  742. }
  743. s->num_taps = (get_bits(&gb, 5)+1)<<5;
  744. if (get_bits1(&gb)) // XXX FIXME
  745. av_log(avctx, AV_LOG_INFO, "Custom quant table\n");
  746. s->block_align = 2048LL*s->samplerate/(44100*s->downsampling);
  747. s->frame_size = s->channels*s->block_align*s->downsampling;
  748. // avctx->frame_size = s->block_align;
  749. if (s->num_taps * s->channels > s->frame_size) {
  750. av_log(avctx, AV_LOG_ERROR,
  751. "number of taps times channels (%d * %d) larger than frame size %d\n",
  752. s->num_taps, s->channels, s->frame_size);
  753. return AVERROR_INVALIDDATA;
  754. }
  755. av_log(avctx, AV_LOG_INFO, "Sonic: ver: %d.%d ls: %d dr: %d taps: %d block: %d frame: %d downsamp: %d\n",
  756. s->version, s->minor_version, s->lossless, s->decorrelation, s->num_taps, s->block_align, s->frame_size, s->downsampling);
  757. // generate taps
  758. s->tap_quant = av_calloc(s->num_taps, sizeof(*s->tap_quant));
  759. if (!s->tap_quant)
  760. return AVERROR(ENOMEM);
  761. for (i = 0; i < s->num_taps; i++)
  762. s->tap_quant[i] = ff_sqrt(i+1);
  763. s->predictor_k = av_calloc(s->num_taps, sizeof(*s->predictor_k));
  764. tmp = av_calloc(s->num_taps, s->channels * sizeof(**s->predictor_state));
  765. if (!tmp)
  766. return AVERROR(ENOMEM);
  767. for (i = 0; i < s->channels; i++, tmp += s->num_taps)
  768. s->predictor_state[i] = tmp;
  769. tmp = av_calloc(s->block_align, s->channels * sizeof(**s->coded_samples));
  770. if (!tmp)
  771. return AVERROR(ENOMEM);
  772. for (i = 0; i < s->channels; i++, tmp += s->block_align)
  773. s->coded_samples[i] = tmp;
  774. s->int_samples = av_calloc(s->frame_size, sizeof(*s->int_samples));
  775. if (!s->int_samples)
  776. return AVERROR(ENOMEM);
  777. avctx->sample_fmt = AV_SAMPLE_FMT_S16;
  778. return 0;
  779. }
  780. static av_cold int sonic_decode_close(AVCodecContext *avctx)
  781. {
  782. SonicContext *s = avctx->priv_data;
  783. av_freep(&s->int_samples);
  784. av_freep(&s->tap_quant);
  785. av_freep(&s->predictor_k);
  786. av_freep(&s->predictor_state[0]);
  787. av_freep(&s->coded_samples[0]);
  788. return 0;
  789. }
  790. static int sonic_decode_frame(AVCodecContext *avctx,
  791. void *data, int *got_frame_ptr,
  792. AVPacket *avpkt)
  793. {
  794. const uint8_t *buf = avpkt->data;
  795. int buf_size = avpkt->size;
  796. SonicContext *s = avctx->priv_data;
  797. RangeCoder c;
  798. uint8_t state[32];
  799. int i, quant, ch, j, ret;
  800. int16_t *samples;
  801. AVFrame *frame = data;
  802. if (buf_size == 0) return 0;
  803. frame->nb_samples = s->frame_size / avctx->channels;
  804. if ((ret = ff_get_buffer(avctx, frame, 0)) < 0)
  805. return ret;
  806. samples = (int16_t *)frame->data[0];
  807. // av_log(NULL, AV_LOG_INFO, "buf_size: %d\n", buf_size);
  808. memset(state, 128, sizeof(state));
  809. ff_init_range_decoder(&c, buf, buf_size);
  810. ff_build_rac_states(&c, 0.05*(1LL<<32), 256-8);
  811. intlist_read(&c, state, s->predictor_k, s->num_taps, 0);
  812. // dequantize
  813. for (i = 0; i < s->num_taps; i++)
  814. s->predictor_k[i] *= s->tap_quant[i];
  815. if (s->lossless)
  816. quant = 1;
  817. else
  818. quant = get_symbol(&c, state, 0) * SAMPLE_FACTOR;
  819. // av_log(NULL, AV_LOG_INFO, "quant: %d\n", quant);
  820. for (ch = 0; ch < s->channels; ch++)
  821. {
  822. int x = ch;
  823. if (c.overread > MAX_OVERREAD)
  824. return AVERROR_INVALIDDATA;
  825. predictor_init_state(s->predictor_k, s->predictor_state[ch], s->num_taps);
  826. intlist_read(&c, state, s->coded_samples[ch], s->block_align, 1);
  827. for (i = 0; i < s->block_align; i++)
  828. {
  829. for (j = 0; j < s->downsampling - 1; j++)
  830. {
  831. s->int_samples[x] = predictor_calc_error(s->predictor_k, s->predictor_state[ch], s->num_taps, 0);
  832. x += s->channels;
  833. }
  834. s->int_samples[x] = predictor_calc_error(s->predictor_k, s->predictor_state[ch], s->num_taps, s->coded_samples[ch][i] * (unsigned)quant);
  835. x += s->channels;
  836. }
  837. for (i = 0; i < s->num_taps; i++)
  838. s->predictor_state[ch][i] = s->int_samples[s->frame_size - s->channels + ch - i*s->channels];
  839. }
  840. switch(s->decorrelation)
  841. {
  842. case MID_SIDE:
  843. for (i = 0; i < s->frame_size; i += s->channels)
  844. {
  845. s->int_samples[i+1] += shift(s->int_samples[i], 1);
  846. s->int_samples[i] -= s->int_samples[i+1];
  847. }
  848. break;
  849. case LEFT_SIDE:
  850. for (i = 0; i < s->frame_size; i += s->channels)
  851. s->int_samples[i+1] += s->int_samples[i];
  852. break;
  853. case RIGHT_SIDE:
  854. for (i = 0; i < s->frame_size; i += s->channels)
  855. s->int_samples[i] += s->int_samples[i+1];
  856. break;
  857. }
  858. if (!s->lossless)
  859. for (i = 0; i < s->frame_size; i++)
  860. s->int_samples[i] = shift(s->int_samples[i], SAMPLE_SHIFT);
  861. // internal -> short
  862. for (i = 0; i < s->frame_size; i++)
  863. samples[i] = av_clip_int16(s->int_samples[i]);
  864. *got_frame_ptr = 1;
  865. return buf_size;
  866. }
  867. AVCodec ff_sonic_decoder = {
  868. .name = "sonic",
  869. .long_name = NULL_IF_CONFIG_SMALL("Sonic"),
  870. .type = AVMEDIA_TYPE_AUDIO,
  871. .id = AV_CODEC_ID_SONIC,
  872. .priv_data_size = sizeof(SonicContext),
  873. .init = sonic_decode_init,
  874. .close = sonic_decode_close,
  875. .decode = sonic_decode_frame,
  876. .capabilities = AV_CODEC_CAP_DR1 | AV_CODEC_CAP_EXPERIMENTAL | AV_CODEC_CAP_CHANNEL_CONF,
  877. .caps_internal = FF_CODEC_CAP_INIT_CLEANUP,
  878. };
  879. #endif /* CONFIG_SONIC_DECODER */
  880. #if CONFIG_SONIC_ENCODER
  881. AVCodec ff_sonic_encoder = {
  882. .name = "sonic",
  883. .long_name = NULL_IF_CONFIG_SMALL("Sonic"),
  884. .type = AVMEDIA_TYPE_AUDIO,
  885. .id = AV_CODEC_ID_SONIC,
  886. .priv_data_size = sizeof(SonicContext),
  887. .init = sonic_encode_init,
  888. .encode2 = sonic_encode_frame,
  889. .sample_fmts = (const enum AVSampleFormat[]){ AV_SAMPLE_FMT_S16, AV_SAMPLE_FMT_NONE },
  890. .capabilities = AV_CODEC_CAP_EXPERIMENTAL,
  891. .caps_internal = FF_CODEC_CAP_INIT_CLEANUP,
  892. .close = sonic_encode_close,
  893. };
  894. #endif
  895. #if CONFIG_SONIC_LS_ENCODER
  896. AVCodec ff_sonic_ls_encoder = {
  897. .name = "sonicls",
  898. .long_name = NULL_IF_CONFIG_SMALL("Sonic lossless"),
  899. .type = AVMEDIA_TYPE_AUDIO,
  900. .id = AV_CODEC_ID_SONIC_LS,
  901. .priv_data_size = sizeof(SonicContext),
  902. .init = sonic_encode_init,
  903. .encode2 = sonic_encode_frame,
  904. .sample_fmts = (const enum AVSampleFormat[]){ AV_SAMPLE_FMT_S16, AV_SAMPLE_FMT_NONE },
  905. .capabilities = AV_CODEC_CAP_EXPERIMENTAL,
  906. .caps_internal = FF_CODEC_CAP_INIT_CLEANUP,
  907. .close = sonic_encode_close,
  908. };
  909. #endif