You can not select more than 25 topics Topics must start with a letter or number, can include dashes ('-') and can be up to 35 characters long.

1127 lines
30KB

  1. /*
  2. * Simple free lossless/lossy audio codec
  3. * Copyright (c) 2004 Alex Beregszaszi
  4. *
  5. * This file is part of FFmpeg.
  6. *
  7. * FFmpeg is free software; you can redistribute it and/or
  8. * modify it under the terms of the GNU Lesser General Public
  9. * License as published by the Free Software Foundation; either
  10. * version 2.1 of the License, or (at your option) any later version.
  11. *
  12. * FFmpeg is distributed in the hope that it will be useful,
  13. * but WITHOUT ANY WARRANTY; without even the implied warranty of
  14. * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
  15. * Lesser General Public License for more details.
  16. *
  17. * You should have received a copy of the GNU Lesser General Public
  18. * License along with FFmpeg; if not, write to the Free Software
  19. * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
  20. */
  21. #include "avcodec.h"
  22. #include "get_bits.h"
  23. #include "golomb.h"
  24. #include "internal.h"
  25. #include "rangecoder.h"
  26. /**
  27. * @file
  28. * Simple free lossless/lossy audio codec
  29. * Based on Paul Francis Harrison's Bonk (http://www.logarithmic.net/pfh/bonk)
  30. * Written and designed by Alex Beregszaszi
  31. *
  32. * TODO:
  33. * - CABAC put/get_symbol
  34. * - independent quantizer for channels
  35. * - >2 channels support
  36. * - more decorrelation types
  37. * - more tap_quant tests
  38. * - selectable intlist writers/readers (bonk-style, golomb, cabac)
  39. */
  40. #define MAX_CHANNELS 2
  41. #define MID_SIDE 0
  42. #define LEFT_SIDE 1
  43. #define RIGHT_SIDE 2
  44. typedef struct SonicContext {
  45. int version;
  46. int minor_version;
  47. int lossless, decorrelation;
  48. int num_taps, downsampling;
  49. double quantization;
  50. int channels, samplerate, block_align, frame_size;
  51. int *tap_quant;
  52. int *int_samples;
  53. int *coded_samples[MAX_CHANNELS];
  54. // for encoding
  55. int *tail;
  56. int tail_size;
  57. int *window;
  58. int window_size;
  59. // for decoding
  60. int *predictor_k;
  61. int *predictor_state[MAX_CHANNELS];
  62. } SonicContext;
  63. #define LATTICE_SHIFT 10
  64. #define SAMPLE_SHIFT 4
  65. #define LATTICE_FACTOR (1 << LATTICE_SHIFT)
  66. #define SAMPLE_FACTOR (1 << SAMPLE_SHIFT)
  67. #define BASE_QUANT 0.6
  68. #define RATE_VARIATION 3.0
  69. static inline int shift(int a,int b)
  70. {
  71. return (a+(1<<(b-1))) >> b;
  72. }
  73. static inline int shift_down(int a,int b)
  74. {
  75. return (a>>b)+(a<0);
  76. }
  77. static av_always_inline av_flatten void put_symbol(RangeCoder *c, uint8_t *state, int v, int is_signed, uint64_t rc_stat[256][2], uint64_t rc_stat2[32][2]){
  78. int i;
  79. #define put_rac(C,S,B) \
  80. do{\
  81. if(rc_stat){\
  82. rc_stat[*(S)][B]++;\
  83. rc_stat2[(S)-state][B]++;\
  84. }\
  85. put_rac(C,S,B);\
  86. }while(0)
  87. if(v){
  88. const int a= FFABS(v);
  89. const int e= av_log2(a);
  90. put_rac(c, state+0, 0);
  91. if(e<=9){
  92. for(i=0; i<e; i++){
  93. put_rac(c, state+1+i, 1); //1..10
  94. }
  95. put_rac(c, state+1+i, 0);
  96. for(i=e-1; i>=0; i--){
  97. put_rac(c, state+22+i, (a>>i)&1); //22..31
  98. }
  99. if(is_signed)
  100. put_rac(c, state+11 + e, v < 0); //11..21
  101. }else{
  102. for(i=0; i<e; i++){
  103. put_rac(c, state+1+FFMIN(i,9), 1); //1..10
  104. }
  105. put_rac(c, state+1+9, 0);
  106. for(i=e-1; i>=0; i--){
  107. put_rac(c, state+22+FFMIN(i,9), (a>>i)&1); //22..31
  108. }
  109. if(is_signed)
  110. put_rac(c, state+11 + 10, v < 0); //11..21
  111. }
  112. }else{
  113. put_rac(c, state+0, 1);
  114. }
  115. #undef put_rac
  116. }
  117. static inline av_flatten int get_symbol(RangeCoder *c, uint8_t *state, int is_signed){
  118. if(get_rac(c, state+0))
  119. return 0;
  120. else{
  121. int i, e, a;
  122. e= 0;
  123. while(get_rac(c, state+1 + FFMIN(e,9))){ //1..10
  124. e++;
  125. }
  126. a= 1;
  127. for(i=e-1; i>=0; i--){
  128. a += a + get_rac(c, state+22 + FFMIN(i,9)); //22..31
  129. }
  130. e= -(is_signed && get_rac(c, state+11 + FFMIN(e, 10))); //11..21
  131. return (a^e)-e;
  132. }
  133. }
  134. #if 1
  135. static inline int intlist_write(RangeCoder *c, uint8_t *state, int *buf, int entries, int base_2_part)
  136. {
  137. int i;
  138. for (i = 0; i < entries; i++)
  139. put_symbol(c, state, buf[i], 1, NULL, NULL);
  140. return 1;
  141. }
  142. static inline int intlist_read(RangeCoder *c, uint8_t *state, int *buf, int entries, int base_2_part)
  143. {
  144. int i;
  145. for (i = 0; i < entries; i++)
  146. buf[i] = get_symbol(c, state, 1);
  147. return 1;
  148. }
  149. #elif 1
  150. static inline int intlist_write(PutBitContext *pb, int *buf, int entries, int base_2_part)
  151. {
  152. int i;
  153. for (i = 0; i < entries; i++)
  154. set_se_golomb(pb, buf[i]);
  155. return 1;
  156. }
  157. static inline int intlist_read(GetBitContext *gb, int *buf, int entries, int base_2_part)
  158. {
  159. int i;
  160. for (i = 0; i < entries; i++)
  161. buf[i] = get_se_golomb(gb);
  162. return 1;
  163. }
  164. #else
  165. #define ADAPT_LEVEL 8
  166. static int bits_to_store(uint64_t x)
  167. {
  168. int res = 0;
  169. while(x)
  170. {
  171. res++;
  172. x >>= 1;
  173. }
  174. return res;
  175. }
  176. static void write_uint_max(PutBitContext *pb, unsigned int value, unsigned int max)
  177. {
  178. int i, bits;
  179. if (!max)
  180. return;
  181. bits = bits_to_store(max);
  182. for (i = 0; i < bits-1; i++)
  183. put_bits(pb, 1, value & (1 << i));
  184. if ( (value | (1 << (bits-1))) <= max)
  185. put_bits(pb, 1, value & (1 << (bits-1)));
  186. }
  187. static unsigned int read_uint_max(GetBitContext *gb, int max)
  188. {
  189. int i, bits, value = 0;
  190. if (!max)
  191. return 0;
  192. bits = bits_to_store(max);
  193. for (i = 0; i < bits-1; i++)
  194. if (get_bits1(gb))
  195. value += 1 << i;
  196. if ( (value | (1<<(bits-1))) <= max)
  197. if (get_bits1(gb))
  198. value += 1 << (bits-1);
  199. return value;
  200. }
  201. static int intlist_write(PutBitContext *pb, int *buf, int entries, int base_2_part)
  202. {
  203. int i, j, x = 0, low_bits = 0, max = 0;
  204. int step = 256, pos = 0, dominant = 0, any = 0;
  205. int *copy, *bits;
  206. copy = av_calloc(entries, sizeof(*copy));
  207. if (!copy)
  208. return AVERROR(ENOMEM);
  209. if (base_2_part)
  210. {
  211. int energy = 0;
  212. for (i = 0; i < entries; i++)
  213. energy += abs(buf[i]);
  214. low_bits = bits_to_store(energy / (entries * 2));
  215. if (low_bits > 15)
  216. low_bits = 15;
  217. put_bits(pb, 4, low_bits);
  218. }
  219. for (i = 0; i < entries; i++)
  220. {
  221. put_bits(pb, low_bits, abs(buf[i]));
  222. copy[i] = abs(buf[i]) >> low_bits;
  223. if (copy[i] > max)
  224. max = abs(copy[i]);
  225. }
  226. bits = av_calloc(entries*max, sizeof(*bits));
  227. if (!bits)
  228. {
  229. av_free(copy);
  230. return AVERROR(ENOMEM);
  231. }
  232. for (i = 0; i <= max; i++)
  233. {
  234. for (j = 0; j < entries; j++)
  235. if (copy[j] >= i)
  236. bits[x++] = copy[j] > i;
  237. }
  238. // store bitstream
  239. while (pos < x)
  240. {
  241. int steplet = step >> 8;
  242. if (pos + steplet > x)
  243. steplet = x - pos;
  244. for (i = 0; i < steplet; i++)
  245. if (bits[i+pos] != dominant)
  246. any = 1;
  247. put_bits(pb, 1, any);
  248. if (!any)
  249. {
  250. pos += steplet;
  251. step += step / ADAPT_LEVEL;
  252. }
  253. else
  254. {
  255. int interloper = 0;
  256. while (((pos + interloper) < x) && (bits[pos + interloper] == dominant))
  257. interloper++;
  258. // note change
  259. write_uint_max(pb, interloper, (step >> 8) - 1);
  260. pos += interloper + 1;
  261. step -= step / ADAPT_LEVEL;
  262. }
  263. if (step < 256)
  264. {
  265. step = 65536 / step;
  266. dominant = !dominant;
  267. }
  268. }
  269. // store signs
  270. for (i = 0; i < entries; i++)
  271. if (buf[i])
  272. put_bits(pb, 1, buf[i] < 0);
  273. av_free(bits);
  274. av_free(copy);
  275. return 0;
  276. }
  277. static int intlist_read(GetBitContext *gb, int *buf, int entries, int base_2_part)
  278. {
  279. int i, low_bits = 0, x = 0;
  280. int n_zeros = 0, step = 256, dominant = 0;
  281. int pos = 0, level = 0;
  282. int *bits = av_calloc(entries, sizeof(*bits));
  283. if (!bits)
  284. return AVERROR(ENOMEM);
  285. if (base_2_part)
  286. {
  287. low_bits = get_bits(gb, 4);
  288. if (low_bits)
  289. for (i = 0; i < entries; i++)
  290. buf[i] = get_bits(gb, low_bits);
  291. }
  292. // av_log(NULL, AV_LOG_INFO, "entries: %d, low bits: %d\n", entries, low_bits);
  293. while (n_zeros < entries)
  294. {
  295. int steplet = step >> 8;
  296. if (!get_bits1(gb))
  297. {
  298. for (i = 0; i < steplet; i++)
  299. bits[x++] = dominant;
  300. if (!dominant)
  301. n_zeros += steplet;
  302. step += step / ADAPT_LEVEL;
  303. }
  304. else
  305. {
  306. int actual_run = read_uint_max(gb, steplet-1);
  307. // av_log(NULL, AV_LOG_INFO, "actual run: %d\n", actual_run);
  308. for (i = 0; i < actual_run; i++)
  309. bits[x++] = dominant;
  310. bits[x++] = !dominant;
  311. if (!dominant)
  312. n_zeros += actual_run;
  313. else
  314. n_zeros++;
  315. step -= step / ADAPT_LEVEL;
  316. }
  317. if (step < 256)
  318. {
  319. step = 65536 / step;
  320. dominant = !dominant;
  321. }
  322. }
  323. // reconstruct unsigned values
  324. n_zeros = 0;
  325. for (i = 0; n_zeros < entries; i++)
  326. {
  327. while(1)
  328. {
  329. if (pos >= entries)
  330. {
  331. pos = 0;
  332. level += 1 << low_bits;
  333. }
  334. if (buf[pos] >= level)
  335. break;
  336. pos++;
  337. }
  338. if (bits[i])
  339. buf[pos] += 1 << low_bits;
  340. else
  341. n_zeros++;
  342. pos++;
  343. }
  344. av_free(bits);
  345. // read signs
  346. for (i = 0; i < entries; i++)
  347. if (buf[i] && get_bits1(gb))
  348. buf[i] = -buf[i];
  349. // av_log(NULL, AV_LOG_INFO, "zeros: %d pos: %d\n", n_zeros, pos);
  350. return 0;
  351. }
  352. #endif
  353. static void predictor_init_state(int *k, int *state, int order)
  354. {
  355. int i;
  356. for (i = order-2; i >= 0; i--)
  357. {
  358. int j, p, x = state[i];
  359. for (j = 0, p = i+1; p < order; j++,p++)
  360. {
  361. int tmp = x + shift_down(k[j] * state[p], LATTICE_SHIFT);
  362. state[p] += shift_down(k[j]*x, LATTICE_SHIFT);
  363. x = tmp;
  364. }
  365. }
  366. }
  367. static int predictor_calc_error(int *k, int *state, int order, int error)
  368. {
  369. int i, x = error - shift_down(k[order-1] * state[order-1], LATTICE_SHIFT);
  370. #if 1
  371. int *k_ptr = &(k[order-2]),
  372. *state_ptr = &(state[order-2]);
  373. for (i = order-2; i >= 0; i--, k_ptr--, state_ptr--)
  374. {
  375. int k_value = *k_ptr, state_value = *state_ptr;
  376. x -= shift_down(k_value * state_value, LATTICE_SHIFT);
  377. state_ptr[1] = state_value + shift_down(k_value * x, LATTICE_SHIFT);
  378. }
  379. #else
  380. for (i = order-2; i >= 0; i--)
  381. {
  382. x -= shift_down(k[i] * state[i], LATTICE_SHIFT);
  383. state[i+1] = state[i] + shift_down(k[i] * x, LATTICE_SHIFT);
  384. }
  385. #endif
  386. // don't drift too far, to avoid overflows
  387. if (x > (SAMPLE_FACTOR<<16)) x = (SAMPLE_FACTOR<<16);
  388. if (x < -(SAMPLE_FACTOR<<16)) x = -(SAMPLE_FACTOR<<16);
  389. state[0] = x;
  390. return x;
  391. }
  392. #if CONFIG_SONIC_ENCODER || CONFIG_SONIC_LS_ENCODER
  393. // Heavily modified Levinson-Durbin algorithm which
  394. // copes better with quantization, and calculates the
  395. // actual whitened result as it goes.
  396. static int modified_levinson_durbin(int *window, int window_entries,
  397. int *out, int out_entries, int channels, int *tap_quant)
  398. {
  399. int i;
  400. int *state = av_calloc(window_entries, sizeof(*state));
  401. if (!state)
  402. return AVERROR(ENOMEM);
  403. memcpy(state, window, 4* window_entries);
  404. for (i = 0; i < out_entries; i++)
  405. {
  406. int step = (i+1)*channels, k, j;
  407. double xx = 0.0, xy = 0.0;
  408. #if 1
  409. int *x_ptr = &(window[step]);
  410. int *state_ptr = &(state[0]);
  411. j = window_entries - step;
  412. for (;j>0;j--,x_ptr++,state_ptr++)
  413. {
  414. double x_value = *x_ptr;
  415. double state_value = *state_ptr;
  416. xx += state_value*state_value;
  417. xy += x_value*state_value;
  418. }
  419. #else
  420. for (j = 0; j <= (window_entries - step); j++);
  421. {
  422. double stepval = window[step+j];
  423. double stateval = window[j];
  424. // xx += (double)window[j]*(double)window[j];
  425. // xy += (double)window[step+j]*(double)window[j];
  426. xx += stateval*stateval;
  427. xy += stepval*stateval;
  428. }
  429. #endif
  430. if (xx == 0.0)
  431. k = 0;
  432. else
  433. k = (int)(floor(-xy/xx * (double)LATTICE_FACTOR / (double)(tap_quant[i]) + 0.5));
  434. if (k > (LATTICE_FACTOR/tap_quant[i]))
  435. k = LATTICE_FACTOR/tap_quant[i];
  436. if (-k > (LATTICE_FACTOR/tap_quant[i]))
  437. k = -(LATTICE_FACTOR/tap_quant[i]);
  438. out[i] = k;
  439. k *= tap_quant[i];
  440. #if 1
  441. x_ptr = &(window[step]);
  442. state_ptr = &(state[0]);
  443. j = window_entries - step;
  444. for (;j>0;j--,x_ptr++,state_ptr++)
  445. {
  446. int x_value = *x_ptr;
  447. int state_value = *state_ptr;
  448. *x_ptr = x_value + shift_down(k*state_value,LATTICE_SHIFT);
  449. *state_ptr = state_value + shift_down(k*x_value, LATTICE_SHIFT);
  450. }
  451. #else
  452. for (j=0; j <= (window_entries - step); j++)
  453. {
  454. int stepval = window[step+j];
  455. int stateval=state[j];
  456. window[step+j] += shift_down(k * stateval, LATTICE_SHIFT);
  457. state[j] += shift_down(k * stepval, LATTICE_SHIFT);
  458. }
  459. #endif
  460. }
  461. av_free(state);
  462. return 0;
  463. }
  464. static inline int code_samplerate(int samplerate)
  465. {
  466. switch (samplerate)
  467. {
  468. case 44100: return 0;
  469. case 22050: return 1;
  470. case 11025: return 2;
  471. case 96000: return 3;
  472. case 48000: return 4;
  473. case 32000: return 5;
  474. case 24000: return 6;
  475. case 16000: return 7;
  476. case 8000: return 8;
  477. }
  478. return AVERROR(EINVAL);
  479. }
  480. static av_cold int sonic_encode_init(AVCodecContext *avctx)
  481. {
  482. SonicContext *s = avctx->priv_data;
  483. PutBitContext pb;
  484. int i;
  485. s->version = 2;
  486. if (avctx->channels > MAX_CHANNELS)
  487. {
  488. av_log(avctx, AV_LOG_ERROR, "Only mono and stereo streams are supported by now\n");
  489. return AVERROR(EINVAL); /* only stereo or mono for now */
  490. }
  491. if (avctx->channels == 2)
  492. s->decorrelation = MID_SIDE;
  493. else
  494. s->decorrelation = 3;
  495. if (avctx->codec->id == AV_CODEC_ID_SONIC_LS)
  496. {
  497. s->lossless = 1;
  498. s->num_taps = 32;
  499. s->downsampling = 1;
  500. s->quantization = 0.0;
  501. }
  502. else
  503. {
  504. s->num_taps = 128;
  505. s->downsampling = 2;
  506. s->quantization = 1.0;
  507. }
  508. // max tap 2048
  509. if (s->num_taps < 32 || s->num_taps > 1024 || s->num_taps % 32) {
  510. av_log(avctx, AV_LOG_ERROR, "Invalid number of taps\n");
  511. return AVERROR_INVALIDDATA;
  512. }
  513. // generate taps
  514. s->tap_quant = av_calloc(s->num_taps, sizeof(*s->tap_quant));
  515. if (!s->tap_quant)
  516. return AVERROR(ENOMEM);
  517. for (i = 0; i < s->num_taps; i++)
  518. s->tap_quant[i] = ff_sqrt(i+1);
  519. s->channels = avctx->channels;
  520. s->samplerate = avctx->sample_rate;
  521. s->block_align = 2048LL*s->samplerate/(44100*s->downsampling);
  522. s->frame_size = s->channels*s->block_align*s->downsampling;
  523. s->tail_size = s->num_taps*s->channels;
  524. s->tail = av_calloc(s->tail_size, sizeof(*s->tail));
  525. if (!s->tail)
  526. return AVERROR(ENOMEM);
  527. s->predictor_k = av_calloc(s->num_taps, sizeof(*s->predictor_k) );
  528. if (!s->predictor_k)
  529. return AVERROR(ENOMEM);
  530. for (i = 0; i < s->channels; i++)
  531. {
  532. s->coded_samples[i] = av_calloc(s->block_align, sizeof(**s->coded_samples));
  533. if (!s->coded_samples[i])
  534. return AVERROR(ENOMEM);
  535. }
  536. s->int_samples = av_calloc(s->frame_size, sizeof(*s->int_samples));
  537. s->window_size = ((2*s->tail_size)+s->frame_size);
  538. s->window = av_calloc(s->window_size, sizeof(*s->window));
  539. if (!s->window || !s->int_samples)
  540. return AVERROR(ENOMEM);
  541. avctx->extradata = av_mallocz(16);
  542. if (!avctx->extradata)
  543. return AVERROR(ENOMEM);
  544. init_put_bits(&pb, avctx->extradata, 16*8);
  545. put_bits(&pb, 2, s->version); // version
  546. if (s->version >= 1)
  547. {
  548. if (s->version >= 2) {
  549. put_bits(&pb, 8, s->version);
  550. put_bits(&pb, 8, s->minor_version);
  551. }
  552. put_bits(&pb, 2, s->channels);
  553. put_bits(&pb, 4, code_samplerate(s->samplerate));
  554. }
  555. put_bits(&pb, 1, s->lossless);
  556. if (!s->lossless)
  557. put_bits(&pb, 3, SAMPLE_SHIFT); // XXX FIXME: sample precision
  558. put_bits(&pb, 2, s->decorrelation);
  559. put_bits(&pb, 2, s->downsampling);
  560. put_bits(&pb, 5, (s->num_taps >> 5)-1); // 32..1024
  561. put_bits(&pb, 1, 0); // XXX FIXME: no custom tap quant table
  562. flush_put_bits(&pb);
  563. avctx->extradata_size = put_bits_count(&pb)/8;
  564. av_log(avctx, AV_LOG_INFO, "Sonic: ver: %d.%d ls: %d dr: %d taps: %d block: %d frame: %d downsamp: %d\n",
  565. s->version, s->minor_version, s->lossless, s->decorrelation, s->num_taps, s->block_align, s->frame_size, s->downsampling);
  566. avctx->frame_size = s->block_align*s->downsampling;
  567. return 0;
  568. }
  569. static av_cold int sonic_encode_close(AVCodecContext *avctx)
  570. {
  571. SonicContext *s = avctx->priv_data;
  572. int i;
  573. for (i = 0; i < s->channels; i++)
  574. av_freep(&s->coded_samples[i]);
  575. av_freep(&s->predictor_k);
  576. av_freep(&s->tail);
  577. av_freep(&s->tap_quant);
  578. av_freep(&s->window);
  579. av_freep(&s->int_samples);
  580. return 0;
  581. }
  582. static int sonic_encode_frame(AVCodecContext *avctx, AVPacket *avpkt,
  583. const AVFrame *frame, int *got_packet_ptr)
  584. {
  585. SonicContext *s = avctx->priv_data;
  586. RangeCoder c;
  587. int i, j, ch, quant = 0, x = 0;
  588. int ret;
  589. const short *samples = (const int16_t*)frame->data[0];
  590. uint8_t state[32];
  591. if ((ret = ff_alloc_packet2(avctx, avpkt, s->frame_size * 5 + 1000, 0)) < 0)
  592. return ret;
  593. ff_init_range_encoder(&c, avpkt->data, avpkt->size);
  594. ff_build_rac_states(&c, 0.05*(1LL<<32), 256-8);
  595. memset(state, 128, sizeof(state));
  596. // short -> internal
  597. for (i = 0; i < s->frame_size; i++)
  598. s->int_samples[i] = samples[i];
  599. if (!s->lossless)
  600. for (i = 0; i < s->frame_size; i++)
  601. s->int_samples[i] = s->int_samples[i] << SAMPLE_SHIFT;
  602. switch(s->decorrelation)
  603. {
  604. case MID_SIDE:
  605. for (i = 0; i < s->frame_size; i += s->channels)
  606. {
  607. s->int_samples[i] += s->int_samples[i+1];
  608. s->int_samples[i+1] -= shift(s->int_samples[i], 1);
  609. }
  610. break;
  611. case LEFT_SIDE:
  612. for (i = 0; i < s->frame_size; i += s->channels)
  613. s->int_samples[i+1] -= s->int_samples[i];
  614. break;
  615. case RIGHT_SIDE:
  616. for (i = 0; i < s->frame_size; i += s->channels)
  617. s->int_samples[i] -= s->int_samples[i+1];
  618. break;
  619. }
  620. memset(s->window, 0, 4* s->window_size);
  621. for (i = 0; i < s->tail_size; i++)
  622. s->window[x++] = s->tail[i];
  623. for (i = 0; i < s->frame_size; i++)
  624. s->window[x++] = s->int_samples[i];
  625. for (i = 0; i < s->tail_size; i++)
  626. s->window[x++] = 0;
  627. for (i = 0; i < s->tail_size; i++)
  628. s->tail[i] = s->int_samples[s->frame_size - s->tail_size + i];
  629. // generate taps
  630. ret = modified_levinson_durbin(s->window, s->window_size,
  631. s->predictor_k, s->num_taps, s->channels, s->tap_quant);
  632. if (ret < 0)
  633. return ret;
  634. if ((ret = intlist_write(&c, state, s->predictor_k, s->num_taps, 0)) < 0)
  635. return ret;
  636. for (ch = 0; ch < s->channels; ch++)
  637. {
  638. x = s->tail_size+ch;
  639. for (i = 0; i < s->block_align; i++)
  640. {
  641. int sum = 0;
  642. for (j = 0; j < s->downsampling; j++, x += s->channels)
  643. sum += s->window[x];
  644. s->coded_samples[ch][i] = sum;
  645. }
  646. }
  647. // simple rate control code
  648. if (!s->lossless)
  649. {
  650. double energy1 = 0.0, energy2 = 0.0;
  651. for (ch = 0; ch < s->channels; ch++)
  652. {
  653. for (i = 0; i < s->block_align; i++)
  654. {
  655. double sample = s->coded_samples[ch][i];
  656. energy2 += sample*sample;
  657. energy1 += fabs(sample);
  658. }
  659. }
  660. energy2 = sqrt(energy2/(s->channels*s->block_align));
  661. energy1 = M_SQRT2*energy1/(s->channels*s->block_align);
  662. // increase bitrate when samples are like a gaussian distribution
  663. // reduce bitrate when samples are like a two-tailed exponential distribution
  664. if (energy2 > energy1)
  665. energy2 += (energy2-energy1)*RATE_VARIATION;
  666. quant = (int)(BASE_QUANT*s->quantization*energy2/SAMPLE_FACTOR);
  667. // av_log(avctx, AV_LOG_DEBUG, "quant: %d energy: %f / %f\n", quant, energy1, energy2);
  668. quant = av_clip(quant, 1, 65534);
  669. put_symbol(&c, state, quant, 0, NULL, NULL);
  670. quant *= SAMPLE_FACTOR;
  671. }
  672. // write out coded samples
  673. for (ch = 0; ch < s->channels; ch++)
  674. {
  675. if (!s->lossless)
  676. for (i = 0; i < s->block_align; i++)
  677. s->coded_samples[ch][i] = ROUNDED_DIV(s->coded_samples[ch][i], quant);
  678. if ((ret = intlist_write(&c, state, s->coded_samples[ch], s->block_align, 1)) < 0)
  679. return ret;
  680. }
  681. // av_log(avctx, AV_LOG_DEBUG, "used bytes: %d\n", (put_bits_count(&pb)+7)/8);
  682. avpkt->size = ff_rac_terminate(&c, 0);
  683. *got_packet_ptr = 1;
  684. return 0;
  685. }
  686. #endif /* CONFIG_SONIC_ENCODER || CONFIG_SONIC_LS_ENCODER */
  687. #if CONFIG_SONIC_DECODER
  688. static const int samplerate_table[] =
  689. { 44100, 22050, 11025, 96000, 48000, 32000, 24000, 16000, 8000 };
  690. static av_cold int sonic_decode_init(AVCodecContext *avctx)
  691. {
  692. SonicContext *s = avctx->priv_data;
  693. GetBitContext gb;
  694. int i;
  695. int ret;
  696. s->channels = avctx->channels;
  697. s->samplerate = avctx->sample_rate;
  698. if (!avctx->extradata)
  699. {
  700. av_log(avctx, AV_LOG_ERROR, "No mandatory headers present\n");
  701. return AVERROR_INVALIDDATA;
  702. }
  703. ret = init_get_bits8(&gb, avctx->extradata, avctx->extradata_size);
  704. if (ret < 0)
  705. return ret;
  706. s->version = get_bits(&gb, 2);
  707. if (s->version >= 2) {
  708. s->version = get_bits(&gb, 8);
  709. s->minor_version = get_bits(&gb, 8);
  710. }
  711. if (s->version != 2)
  712. {
  713. av_log(avctx, AV_LOG_ERROR, "Unsupported Sonic version, please report\n");
  714. return AVERROR_INVALIDDATA;
  715. }
  716. if (s->version >= 1)
  717. {
  718. int sample_rate_index;
  719. s->channels = get_bits(&gb, 2);
  720. sample_rate_index = get_bits(&gb, 4);
  721. if (sample_rate_index >= FF_ARRAY_ELEMS(samplerate_table)) {
  722. av_log(avctx, AV_LOG_ERROR, "Invalid sample_rate_index %d\n", sample_rate_index);
  723. return AVERROR_INVALIDDATA;
  724. }
  725. s->samplerate = samplerate_table[sample_rate_index];
  726. av_log(avctx, AV_LOG_INFO, "Sonicv2 chans: %d samprate: %d\n",
  727. s->channels, s->samplerate);
  728. }
  729. if (s->channels > MAX_CHANNELS || s->channels < 1)
  730. {
  731. av_log(avctx, AV_LOG_ERROR, "Only mono and stereo streams are supported by now\n");
  732. return AVERROR_INVALIDDATA;
  733. }
  734. avctx->channels = s->channels;
  735. s->lossless = get_bits1(&gb);
  736. if (!s->lossless)
  737. skip_bits(&gb, 3); // XXX FIXME
  738. s->decorrelation = get_bits(&gb, 2);
  739. if (s->decorrelation != 3 && s->channels != 2) {
  740. av_log(avctx, AV_LOG_ERROR, "invalid decorrelation %d\n", s->decorrelation);
  741. return AVERROR_INVALIDDATA;
  742. }
  743. s->downsampling = get_bits(&gb, 2);
  744. if (!s->downsampling) {
  745. av_log(avctx, AV_LOG_ERROR, "invalid downsampling value\n");
  746. return AVERROR_INVALIDDATA;
  747. }
  748. s->num_taps = (get_bits(&gb, 5)+1)<<5;
  749. if (get_bits1(&gb)) // XXX FIXME
  750. av_log(avctx, AV_LOG_INFO, "Custom quant table\n");
  751. s->block_align = 2048LL*s->samplerate/(44100*s->downsampling);
  752. s->frame_size = s->channels*s->block_align*s->downsampling;
  753. // avctx->frame_size = s->block_align;
  754. if (s->num_taps * s->channels > s->frame_size) {
  755. av_log(avctx, AV_LOG_ERROR,
  756. "number of taps times channels (%d * %d) larger than frame size %d\n",
  757. s->num_taps, s->channels, s->frame_size);
  758. return AVERROR_INVALIDDATA;
  759. }
  760. av_log(avctx, AV_LOG_INFO, "Sonic: ver: %d.%d ls: %d dr: %d taps: %d block: %d frame: %d downsamp: %d\n",
  761. s->version, s->minor_version, s->lossless, s->decorrelation, s->num_taps, s->block_align, s->frame_size, s->downsampling);
  762. // generate taps
  763. s->tap_quant = av_calloc(s->num_taps, sizeof(*s->tap_quant));
  764. if (!s->tap_quant)
  765. return AVERROR(ENOMEM);
  766. for (i = 0; i < s->num_taps; i++)
  767. s->tap_quant[i] = ff_sqrt(i+1);
  768. s->predictor_k = av_calloc(s->num_taps, sizeof(*s->predictor_k));
  769. for (i = 0; i < s->channels; i++)
  770. {
  771. s->predictor_state[i] = av_calloc(s->num_taps, sizeof(**s->predictor_state));
  772. if (!s->predictor_state[i])
  773. return AVERROR(ENOMEM);
  774. }
  775. for (i = 0; i < s->channels; i++)
  776. {
  777. s->coded_samples[i] = av_calloc(s->block_align, sizeof(**s->coded_samples));
  778. if (!s->coded_samples[i])
  779. return AVERROR(ENOMEM);
  780. }
  781. s->int_samples = av_calloc(s->frame_size, sizeof(*s->int_samples));
  782. if (!s->int_samples)
  783. return AVERROR(ENOMEM);
  784. avctx->sample_fmt = AV_SAMPLE_FMT_S16;
  785. return 0;
  786. }
  787. static av_cold int sonic_decode_close(AVCodecContext *avctx)
  788. {
  789. SonicContext *s = avctx->priv_data;
  790. int i;
  791. av_freep(&s->int_samples);
  792. av_freep(&s->tap_quant);
  793. av_freep(&s->predictor_k);
  794. for (i = 0; i < s->channels; i++)
  795. {
  796. av_freep(&s->predictor_state[i]);
  797. av_freep(&s->coded_samples[i]);
  798. }
  799. return 0;
  800. }
  801. static int sonic_decode_frame(AVCodecContext *avctx,
  802. void *data, int *got_frame_ptr,
  803. AVPacket *avpkt)
  804. {
  805. const uint8_t *buf = avpkt->data;
  806. int buf_size = avpkt->size;
  807. SonicContext *s = avctx->priv_data;
  808. RangeCoder c;
  809. uint8_t state[32];
  810. int i, quant, ch, j, ret;
  811. int16_t *samples;
  812. AVFrame *frame = data;
  813. if (buf_size == 0) return 0;
  814. frame->nb_samples = s->frame_size / avctx->channels;
  815. if ((ret = ff_get_buffer(avctx, frame, 0)) < 0)
  816. return ret;
  817. samples = (int16_t *)frame->data[0];
  818. // av_log(NULL, AV_LOG_INFO, "buf_size: %d\n", buf_size);
  819. memset(state, 128, sizeof(state));
  820. ff_init_range_decoder(&c, buf, buf_size);
  821. ff_build_rac_states(&c, 0.05*(1LL<<32), 256-8);
  822. intlist_read(&c, state, s->predictor_k, s->num_taps, 0);
  823. // dequantize
  824. for (i = 0; i < s->num_taps; i++)
  825. s->predictor_k[i] *= s->tap_quant[i];
  826. if (s->lossless)
  827. quant = 1;
  828. else
  829. quant = get_symbol(&c, state, 0) * SAMPLE_FACTOR;
  830. // av_log(NULL, AV_LOG_INFO, "quant: %d\n", quant);
  831. for (ch = 0; ch < s->channels; ch++)
  832. {
  833. int x = ch;
  834. predictor_init_state(s->predictor_k, s->predictor_state[ch], s->num_taps);
  835. intlist_read(&c, state, s->coded_samples[ch], s->block_align, 1);
  836. for (i = 0; i < s->block_align; i++)
  837. {
  838. for (j = 0; j < s->downsampling - 1; j++)
  839. {
  840. s->int_samples[x] = predictor_calc_error(s->predictor_k, s->predictor_state[ch], s->num_taps, 0);
  841. x += s->channels;
  842. }
  843. s->int_samples[x] = predictor_calc_error(s->predictor_k, s->predictor_state[ch], s->num_taps, s->coded_samples[ch][i] * quant);
  844. x += s->channels;
  845. }
  846. for (i = 0; i < s->num_taps; i++)
  847. s->predictor_state[ch][i] = s->int_samples[s->frame_size - s->channels + ch - i*s->channels];
  848. }
  849. switch(s->decorrelation)
  850. {
  851. case MID_SIDE:
  852. for (i = 0; i < s->frame_size; i += s->channels)
  853. {
  854. s->int_samples[i+1] += shift(s->int_samples[i], 1);
  855. s->int_samples[i] -= s->int_samples[i+1];
  856. }
  857. break;
  858. case LEFT_SIDE:
  859. for (i = 0; i < s->frame_size; i += s->channels)
  860. s->int_samples[i+1] += s->int_samples[i];
  861. break;
  862. case RIGHT_SIDE:
  863. for (i = 0; i < s->frame_size; i += s->channels)
  864. s->int_samples[i] += s->int_samples[i+1];
  865. break;
  866. }
  867. if (!s->lossless)
  868. for (i = 0; i < s->frame_size; i++)
  869. s->int_samples[i] = shift(s->int_samples[i], SAMPLE_SHIFT);
  870. // internal -> short
  871. for (i = 0; i < s->frame_size; i++)
  872. samples[i] = av_clip_int16(s->int_samples[i]);
  873. *got_frame_ptr = 1;
  874. return buf_size;
  875. }
  876. AVCodec ff_sonic_decoder = {
  877. .name = "sonic",
  878. .long_name = NULL_IF_CONFIG_SMALL("Sonic"),
  879. .type = AVMEDIA_TYPE_AUDIO,
  880. .id = AV_CODEC_ID_SONIC,
  881. .priv_data_size = sizeof(SonicContext),
  882. .init = sonic_decode_init,
  883. .close = sonic_decode_close,
  884. .decode = sonic_decode_frame,
  885. .capabilities = AV_CODEC_CAP_DR1 | AV_CODEC_CAP_EXPERIMENTAL,
  886. };
  887. #endif /* CONFIG_SONIC_DECODER */
  888. #if CONFIG_SONIC_ENCODER
  889. AVCodec ff_sonic_encoder = {
  890. .name = "sonic",
  891. .long_name = NULL_IF_CONFIG_SMALL("Sonic"),
  892. .type = AVMEDIA_TYPE_AUDIO,
  893. .id = AV_CODEC_ID_SONIC,
  894. .priv_data_size = sizeof(SonicContext),
  895. .init = sonic_encode_init,
  896. .encode2 = sonic_encode_frame,
  897. .sample_fmts = (const enum AVSampleFormat[]){ AV_SAMPLE_FMT_S16, AV_SAMPLE_FMT_NONE },
  898. .capabilities = AV_CODEC_CAP_EXPERIMENTAL,
  899. .close = sonic_encode_close,
  900. };
  901. #endif
  902. #if CONFIG_SONIC_LS_ENCODER
  903. AVCodec ff_sonic_ls_encoder = {
  904. .name = "sonicls",
  905. .long_name = NULL_IF_CONFIG_SMALL("Sonic lossless"),
  906. .type = AVMEDIA_TYPE_AUDIO,
  907. .id = AV_CODEC_ID_SONIC_LS,
  908. .priv_data_size = sizeof(SonicContext),
  909. .init = sonic_encode_init,
  910. .encode2 = sonic_encode_frame,
  911. .sample_fmts = (const enum AVSampleFormat[]){ AV_SAMPLE_FMT_S16, AV_SAMPLE_FMT_NONE },
  912. .capabilities = AV_CODEC_CAP_EXPERIMENTAL,
  913. .close = sonic_encode_close,
  914. };
  915. #endif