You can not select more than 25 topics Topics must start with a letter or number, can include dashes ('-') and can be up to 35 characters long.

1128 lines
30KB

  1. /*
  2. * Simple free lossless/lossy audio codec
  3. * Copyright (c) 2004 Alex Beregszaszi
  4. *
  5. * This file is part of FFmpeg.
  6. *
  7. * FFmpeg is free software; you can redistribute it and/or
  8. * modify it under the terms of the GNU Lesser General Public
  9. * License as published by the Free Software Foundation; either
  10. * version 2.1 of the License, or (at your option) any later version.
  11. *
  12. * FFmpeg is distributed in the hope that it will be useful,
  13. * but WITHOUT ANY WARRANTY; without even the implied warranty of
  14. * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
  15. * Lesser General Public License for more details.
  16. *
  17. * You should have received a copy of the GNU Lesser General Public
  18. * License along with FFmpeg; if not, write to the Free Software
  19. * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
  20. */
  21. #include "avcodec.h"
  22. #include "get_bits.h"
  23. #include "golomb.h"
  24. #include "internal.h"
  25. #include "rangecoder.h"
  26. /**
  27. * @file
  28. * Simple free lossless/lossy audio codec
  29. * Based on Paul Francis Harrison's Bonk (http://www.logarithmic.net/pfh/bonk)
  30. * Written and designed by Alex Beregszaszi
  31. *
  32. * TODO:
  33. * - CABAC put/get_symbol
  34. * - independent quantizer for channels
  35. * - >2 channels support
  36. * - more decorrelation types
  37. * - more tap_quant tests
  38. * - selectable intlist writers/readers (bonk-style, golomb, cabac)
  39. */
  40. #define MAX_CHANNELS 2
  41. #define MID_SIDE 0
  42. #define LEFT_SIDE 1
  43. #define RIGHT_SIDE 2
  44. typedef struct SonicContext {
  45. int version;
  46. int minor_version;
  47. int lossless, decorrelation;
  48. int num_taps, downsampling;
  49. double quantization;
  50. int channels, samplerate, block_align, frame_size;
  51. int *tap_quant;
  52. int *int_samples;
  53. int *coded_samples[MAX_CHANNELS];
  54. // for encoding
  55. int *tail;
  56. int tail_size;
  57. int *window;
  58. int window_size;
  59. // for decoding
  60. int *predictor_k;
  61. int *predictor_state[MAX_CHANNELS];
  62. } SonicContext;
  63. #define LATTICE_SHIFT 10
  64. #define SAMPLE_SHIFT 4
  65. #define LATTICE_FACTOR (1 << LATTICE_SHIFT)
  66. #define SAMPLE_FACTOR (1 << SAMPLE_SHIFT)
  67. #define BASE_QUANT 0.6
  68. #define RATE_VARIATION 3.0
  69. static inline int shift(int a,int b)
  70. {
  71. return (a+(1<<(b-1))) >> b;
  72. }
  73. static inline int shift_down(int a,int b)
  74. {
  75. return (a>>b)+(a<0);
  76. }
  77. static av_always_inline av_flatten void put_symbol(RangeCoder *c, uint8_t *state, int v, int is_signed, uint64_t rc_stat[256][2], uint64_t rc_stat2[32][2]){
  78. int i;
  79. #define put_rac(C,S,B) \
  80. do{\
  81. if(rc_stat){\
  82. rc_stat[*(S)][B]++;\
  83. rc_stat2[(S)-state][B]++;\
  84. }\
  85. put_rac(C,S,B);\
  86. }while(0)
  87. if(v){
  88. const int a= FFABS(v);
  89. const int e= av_log2(a);
  90. put_rac(c, state+0, 0);
  91. if(e<=9){
  92. for(i=0; i<e; i++){
  93. put_rac(c, state+1+i, 1); //1..10
  94. }
  95. put_rac(c, state+1+i, 0);
  96. for(i=e-1; i>=0; i--){
  97. put_rac(c, state+22+i, (a>>i)&1); //22..31
  98. }
  99. if(is_signed)
  100. put_rac(c, state+11 + e, v < 0); //11..21
  101. }else{
  102. for(i=0; i<e; i++){
  103. put_rac(c, state+1+FFMIN(i,9), 1); //1..10
  104. }
  105. put_rac(c, state+1+9, 0);
  106. for(i=e-1; i>=0; i--){
  107. put_rac(c, state+22+FFMIN(i,9), (a>>i)&1); //22..31
  108. }
  109. if(is_signed)
  110. put_rac(c, state+11 + 10, v < 0); //11..21
  111. }
  112. }else{
  113. put_rac(c, state+0, 1);
  114. }
  115. #undef put_rac
  116. }
  117. static inline av_flatten int get_symbol(RangeCoder *c, uint8_t *state, int is_signed){
  118. if(get_rac(c, state+0))
  119. return 0;
  120. else{
  121. int i, e;
  122. unsigned a;
  123. e= 0;
  124. while(get_rac(c, state+1 + FFMIN(e,9))){ //1..10
  125. e++;
  126. if (e > 31)
  127. return AVERROR_INVALIDDATA;
  128. }
  129. a= 1;
  130. for(i=e-1; i>=0; i--){
  131. a += a + get_rac(c, state+22 + FFMIN(i,9)); //22..31
  132. }
  133. e= -(is_signed && get_rac(c, state+11 + FFMIN(e, 10))); //11..21
  134. return (a^e)-e;
  135. }
  136. }
  137. #if 1
  138. static inline int intlist_write(RangeCoder *c, uint8_t *state, int *buf, int entries, int base_2_part)
  139. {
  140. int i;
  141. for (i = 0; i < entries; i++)
  142. put_symbol(c, state, buf[i], 1, NULL, NULL);
  143. return 1;
  144. }
  145. static inline int intlist_read(RangeCoder *c, uint8_t *state, int *buf, int entries, int base_2_part)
  146. {
  147. int i;
  148. for (i = 0; i < entries; i++)
  149. buf[i] = get_symbol(c, state, 1);
  150. return 1;
  151. }
  152. #elif 1
  153. static inline int intlist_write(PutBitContext *pb, int *buf, int entries, int base_2_part)
  154. {
  155. int i;
  156. for (i = 0; i < entries; i++)
  157. set_se_golomb(pb, buf[i]);
  158. return 1;
  159. }
  160. static inline int intlist_read(GetBitContext *gb, int *buf, int entries, int base_2_part)
  161. {
  162. int i;
  163. for (i = 0; i < entries; i++)
  164. buf[i] = get_se_golomb(gb);
  165. return 1;
  166. }
  167. #else
  168. #define ADAPT_LEVEL 8
  169. static int bits_to_store(uint64_t x)
  170. {
  171. int res = 0;
  172. while(x)
  173. {
  174. res++;
  175. x >>= 1;
  176. }
  177. return res;
  178. }
  179. static void write_uint_max(PutBitContext *pb, unsigned int value, unsigned int max)
  180. {
  181. int i, bits;
  182. if (!max)
  183. return;
  184. bits = bits_to_store(max);
  185. for (i = 0; i < bits-1; i++)
  186. put_bits(pb, 1, value & (1 << i));
  187. if ( (value | (1 << (bits-1))) <= max)
  188. put_bits(pb, 1, value & (1 << (bits-1)));
  189. }
  190. static unsigned int read_uint_max(GetBitContext *gb, int max)
  191. {
  192. int i, bits, value = 0;
  193. if (!max)
  194. return 0;
  195. bits = bits_to_store(max);
  196. for (i = 0; i < bits-1; i++)
  197. if (get_bits1(gb))
  198. value += 1 << i;
  199. if ( (value | (1<<(bits-1))) <= max)
  200. if (get_bits1(gb))
  201. value += 1 << (bits-1);
  202. return value;
  203. }
  204. static int intlist_write(PutBitContext *pb, int *buf, int entries, int base_2_part)
  205. {
  206. int i, j, x = 0, low_bits = 0, max = 0;
  207. int step = 256, pos = 0, dominant = 0, any = 0;
  208. int *copy, *bits;
  209. copy = av_calloc(entries, sizeof(*copy));
  210. if (!copy)
  211. return AVERROR(ENOMEM);
  212. if (base_2_part)
  213. {
  214. int energy = 0;
  215. for (i = 0; i < entries; i++)
  216. energy += abs(buf[i]);
  217. low_bits = bits_to_store(energy / (entries * 2));
  218. if (low_bits > 15)
  219. low_bits = 15;
  220. put_bits(pb, 4, low_bits);
  221. }
  222. for (i = 0; i < entries; i++)
  223. {
  224. put_bits(pb, low_bits, abs(buf[i]));
  225. copy[i] = abs(buf[i]) >> low_bits;
  226. if (copy[i] > max)
  227. max = abs(copy[i]);
  228. }
  229. bits = av_calloc(entries*max, sizeof(*bits));
  230. if (!bits)
  231. {
  232. av_free(copy);
  233. return AVERROR(ENOMEM);
  234. }
  235. for (i = 0; i <= max; i++)
  236. {
  237. for (j = 0; j < entries; j++)
  238. if (copy[j] >= i)
  239. bits[x++] = copy[j] > i;
  240. }
  241. // store bitstream
  242. while (pos < x)
  243. {
  244. int steplet = step >> 8;
  245. if (pos + steplet > x)
  246. steplet = x - pos;
  247. for (i = 0; i < steplet; i++)
  248. if (bits[i+pos] != dominant)
  249. any = 1;
  250. put_bits(pb, 1, any);
  251. if (!any)
  252. {
  253. pos += steplet;
  254. step += step / ADAPT_LEVEL;
  255. }
  256. else
  257. {
  258. int interloper = 0;
  259. while (((pos + interloper) < x) && (bits[pos + interloper] == dominant))
  260. interloper++;
  261. // note change
  262. write_uint_max(pb, interloper, (step >> 8) - 1);
  263. pos += interloper + 1;
  264. step -= step / ADAPT_LEVEL;
  265. }
  266. if (step < 256)
  267. {
  268. step = 65536 / step;
  269. dominant = !dominant;
  270. }
  271. }
  272. // store signs
  273. for (i = 0; i < entries; i++)
  274. if (buf[i])
  275. put_bits(pb, 1, buf[i] < 0);
  276. av_free(bits);
  277. av_free(copy);
  278. return 0;
  279. }
  280. static int intlist_read(GetBitContext *gb, int *buf, int entries, int base_2_part)
  281. {
  282. int i, low_bits = 0, x = 0;
  283. int n_zeros = 0, step = 256, dominant = 0;
  284. int pos = 0, level = 0;
  285. int *bits = av_calloc(entries, sizeof(*bits));
  286. if (!bits)
  287. return AVERROR(ENOMEM);
  288. if (base_2_part)
  289. {
  290. low_bits = get_bits(gb, 4);
  291. if (low_bits)
  292. for (i = 0; i < entries; i++)
  293. buf[i] = get_bits(gb, low_bits);
  294. }
  295. // av_log(NULL, AV_LOG_INFO, "entries: %d, low bits: %d\n", entries, low_bits);
  296. while (n_zeros < entries)
  297. {
  298. int steplet = step >> 8;
  299. if (!get_bits1(gb))
  300. {
  301. for (i = 0; i < steplet; i++)
  302. bits[x++] = dominant;
  303. if (!dominant)
  304. n_zeros += steplet;
  305. step += step / ADAPT_LEVEL;
  306. }
  307. else
  308. {
  309. int actual_run = read_uint_max(gb, steplet-1);
  310. // av_log(NULL, AV_LOG_INFO, "actual run: %d\n", actual_run);
  311. for (i = 0; i < actual_run; i++)
  312. bits[x++] = dominant;
  313. bits[x++] = !dominant;
  314. if (!dominant)
  315. n_zeros += actual_run;
  316. else
  317. n_zeros++;
  318. step -= step / ADAPT_LEVEL;
  319. }
  320. if (step < 256)
  321. {
  322. step = 65536 / step;
  323. dominant = !dominant;
  324. }
  325. }
  326. // reconstruct unsigned values
  327. n_zeros = 0;
  328. for (i = 0; n_zeros < entries; i++)
  329. {
  330. while(1)
  331. {
  332. if (pos >= entries)
  333. {
  334. pos = 0;
  335. level += 1 << low_bits;
  336. }
  337. if (buf[pos] >= level)
  338. break;
  339. pos++;
  340. }
  341. if (bits[i])
  342. buf[pos] += 1 << low_bits;
  343. else
  344. n_zeros++;
  345. pos++;
  346. }
  347. av_free(bits);
  348. // read signs
  349. for (i = 0; i < entries; i++)
  350. if (buf[i] && get_bits1(gb))
  351. buf[i] = -buf[i];
  352. // av_log(NULL, AV_LOG_INFO, "zeros: %d pos: %d\n", n_zeros, pos);
  353. return 0;
  354. }
  355. #endif
  356. static void predictor_init_state(int *k, int *state, int order)
  357. {
  358. int i;
  359. for (i = order-2; i >= 0; i--)
  360. {
  361. int j, p, x = state[i];
  362. for (j = 0, p = i+1; p < order; j++,p++)
  363. {
  364. int tmp = x + shift_down(k[j] * state[p], LATTICE_SHIFT);
  365. state[p] += shift_down(k[j]*x, LATTICE_SHIFT);
  366. x = tmp;
  367. }
  368. }
  369. }
  370. static int predictor_calc_error(int *k, int *state, int order, int error)
  371. {
  372. int i, x = error - shift_down(k[order-1] * state[order-1], LATTICE_SHIFT);
  373. #if 1
  374. int *k_ptr = &(k[order-2]),
  375. *state_ptr = &(state[order-2]);
  376. for (i = order-2; i >= 0; i--, k_ptr--, state_ptr--)
  377. {
  378. int k_value = *k_ptr, state_value = *state_ptr;
  379. x -= shift_down(k_value * (unsigned)state_value, LATTICE_SHIFT);
  380. state_ptr[1] = state_value + shift_down(k_value * (unsigned)x, LATTICE_SHIFT);
  381. }
  382. #else
  383. for (i = order-2; i >= 0; i--)
  384. {
  385. x -= shift_down(k[i] * state[i], LATTICE_SHIFT);
  386. state[i+1] = state[i] + shift_down(k[i] * x, LATTICE_SHIFT);
  387. }
  388. #endif
  389. // don't drift too far, to avoid overflows
  390. if (x > (SAMPLE_FACTOR<<16)) x = (SAMPLE_FACTOR<<16);
  391. if (x < -(SAMPLE_FACTOR<<16)) x = -(SAMPLE_FACTOR<<16);
  392. state[0] = x;
  393. return x;
  394. }
  395. #if CONFIG_SONIC_ENCODER || CONFIG_SONIC_LS_ENCODER
  396. // Heavily modified Levinson-Durbin algorithm which
  397. // copes better with quantization, and calculates the
  398. // actual whitened result as it goes.
  399. static int modified_levinson_durbin(int *window, int window_entries,
  400. int *out, int out_entries, int channels, int *tap_quant)
  401. {
  402. int i;
  403. int *state = av_calloc(window_entries, sizeof(*state));
  404. if (!state)
  405. return AVERROR(ENOMEM);
  406. memcpy(state, window, 4* window_entries);
  407. for (i = 0; i < out_entries; i++)
  408. {
  409. int step = (i+1)*channels, k, j;
  410. double xx = 0.0, xy = 0.0;
  411. #if 1
  412. int *x_ptr = &(window[step]);
  413. int *state_ptr = &(state[0]);
  414. j = window_entries - step;
  415. for (;j>0;j--,x_ptr++,state_ptr++)
  416. {
  417. double x_value = *x_ptr;
  418. double state_value = *state_ptr;
  419. xx += state_value*state_value;
  420. xy += x_value*state_value;
  421. }
  422. #else
  423. for (j = 0; j <= (window_entries - step); j++);
  424. {
  425. double stepval = window[step+j];
  426. double stateval = window[j];
  427. // xx += (double)window[j]*(double)window[j];
  428. // xy += (double)window[step+j]*(double)window[j];
  429. xx += stateval*stateval;
  430. xy += stepval*stateval;
  431. }
  432. #endif
  433. if (xx == 0.0)
  434. k = 0;
  435. else
  436. k = (int)(floor(-xy/xx * (double)LATTICE_FACTOR / (double)(tap_quant[i]) + 0.5));
  437. if (k > (LATTICE_FACTOR/tap_quant[i]))
  438. k = LATTICE_FACTOR/tap_quant[i];
  439. if (-k > (LATTICE_FACTOR/tap_quant[i]))
  440. k = -(LATTICE_FACTOR/tap_quant[i]);
  441. out[i] = k;
  442. k *= tap_quant[i];
  443. #if 1
  444. x_ptr = &(window[step]);
  445. state_ptr = &(state[0]);
  446. j = window_entries - step;
  447. for (;j>0;j--,x_ptr++,state_ptr++)
  448. {
  449. int x_value = *x_ptr;
  450. int state_value = *state_ptr;
  451. *x_ptr = x_value + shift_down(k*state_value,LATTICE_SHIFT);
  452. *state_ptr = state_value + shift_down(k*x_value, LATTICE_SHIFT);
  453. }
  454. #else
  455. for (j=0; j <= (window_entries - step); j++)
  456. {
  457. int stepval = window[step+j];
  458. int stateval=state[j];
  459. window[step+j] += shift_down(k * stateval, LATTICE_SHIFT);
  460. state[j] += shift_down(k * stepval, LATTICE_SHIFT);
  461. }
  462. #endif
  463. }
  464. av_free(state);
  465. return 0;
  466. }
  467. static inline int code_samplerate(int samplerate)
  468. {
  469. switch (samplerate)
  470. {
  471. case 44100: return 0;
  472. case 22050: return 1;
  473. case 11025: return 2;
  474. case 96000: return 3;
  475. case 48000: return 4;
  476. case 32000: return 5;
  477. case 24000: return 6;
  478. case 16000: return 7;
  479. case 8000: return 8;
  480. }
  481. return AVERROR(EINVAL);
  482. }
  483. static av_cold int sonic_encode_init(AVCodecContext *avctx)
  484. {
  485. SonicContext *s = avctx->priv_data;
  486. PutBitContext pb;
  487. int i;
  488. s->version = 2;
  489. if (avctx->channels > MAX_CHANNELS)
  490. {
  491. av_log(avctx, AV_LOG_ERROR, "Only mono and stereo streams are supported by now\n");
  492. return AVERROR(EINVAL); /* only stereo or mono for now */
  493. }
  494. if (avctx->channels == 2)
  495. s->decorrelation = MID_SIDE;
  496. else
  497. s->decorrelation = 3;
  498. if (avctx->codec->id == AV_CODEC_ID_SONIC_LS)
  499. {
  500. s->lossless = 1;
  501. s->num_taps = 32;
  502. s->downsampling = 1;
  503. s->quantization = 0.0;
  504. }
  505. else
  506. {
  507. s->num_taps = 128;
  508. s->downsampling = 2;
  509. s->quantization = 1.0;
  510. }
  511. // max tap 2048
  512. if (s->num_taps < 32 || s->num_taps > 1024 || s->num_taps % 32) {
  513. av_log(avctx, AV_LOG_ERROR, "Invalid number of taps\n");
  514. return AVERROR_INVALIDDATA;
  515. }
  516. // generate taps
  517. s->tap_quant = av_calloc(s->num_taps, sizeof(*s->tap_quant));
  518. if (!s->tap_quant)
  519. return AVERROR(ENOMEM);
  520. for (i = 0; i < s->num_taps; i++)
  521. s->tap_quant[i] = ff_sqrt(i+1);
  522. s->channels = avctx->channels;
  523. s->samplerate = avctx->sample_rate;
  524. s->block_align = 2048LL*s->samplerate/(44100*s->downsampling);
  525. s->frame_size = s->channels*s->block_align*s->downsampling;
  526. s->tail_size = s->num_taps*s->channels;
  527. s->tail = av_calloc(s->tail_size, sizeof(*s->tail));
  528. if (!s->tail)
  529. return AVERROR(ENOMEM);
  530. s->predictor_k = av_calloc(s->num_taps, sizeof(*s->predictor_k) );
  531. if (!s->predictor_k)
  532. return AVERROR(ENOMEM);
  533. for (i = 0; i < s->channels; i++)
  534. {
  535. s->coded_samples[i] = av_calloc(s->block_align, sizeof(**s->coded_samples));
  536. if (!s->coded_samples[i])
  537. return AVERROR(ENOMEM);
  538. }
  539. s->int_samples = av_calloc(s->frame_size, sizeof(*s->int_samples));
  540. s->window_size = ((2*s->tail_size)+s->frame_size);
  541. s->window = av_calloc(s->window_size, sizeof(*s->window));
  542. if (!s->window || !s->int_samples)
  543. return AVERROR(ENOMEM);
  544. avctx->extradata = av_mallocz(16);
  545. if (!avctx->extradata)
  546. return AVERROR(ENOMEM);
  547. init_put_bits(&pb, avctx->extradata, 16*8);
  548. put_bits(&pb, 2, s->version); // version
  549. if (s->version >= 1)
  550. {
  551. if (s->version >= 2) {
  552. put_bits(&pb, 8, s->version);
  553. put_bits(&pb, 8, s->minor_version);
  554. }
  555. put_bits(&pb, 2, s->channels);
  556. put_bits(&pb, 4, code_samplerate(s->samplerate));
  557. }
  558. put_bits(&pb, 1, s->lossless);
  559. if (!s->lossless)
  560. put_bits(&pb, 3, SAMPLE_SHIFT); // XXX FIXME: sample precision
  561. put_bits(&pb, 2, s->decorrelation);
  562. put_bits(&pb, 2, s->downsampling);
  563. put_bits(&pb, 5, (s->num_taps >> 5)-1); // 32..1024
  564. put_bits(&pb, 1, 0); // XXX FIXME: no custom tap quant table
  565. flush_put_bits(&pb);
  566. avctx->extradata_size = put_bits_count(&pb)/8;
  567. av_log(avctx, AV_LOG_INFO, "Sonic: ver: %d.%d ls: %d dr: %d taps: %d block: %d frame: %d downsamp: %d\n",
  568. s->version, s->minor_version, s->lossless, s->decorrelation, s->num_taps, s->block_align, s->frame_size, s->downsampling);
  569. avctx->frame_size = s->block_align*s->downsampling;
  570. return 0;
  571. }
  572. static av_cold int sonic_encode_close(AVCodecContext *avctx)
  573. {
  574. SonicContext *s = avctx->priv_data;
  575. int i;
  576. for (i = 0; i < s->channels; i++)
  577. av_freep(&s->coded_samples[i]);
  578. av_freep(&s->predictor_k);
  579. av_freep(&s->tail);
  580. av_freep(&s->tap_quant);
  581. av_freep(&s->window);
  582. av_freep(&s->int_samples);
  583. return 0;
  584. }
  585. static int sonic_encode_frame(AVCodecContext *avctx, AVPacket *avpkt,
  586. const AVFrame *frame, int *got_packet_ptr)
  587. {
  588. SonicContext *s = avctx->priv_data;
  589. RangeCoder c;
  590. int i, j, ch, quant = 0, x = 0;
  591. int ret;
  592. const short *samples = (const int16_t*)frame->data[0];
  593. uint8_t state[32];
  594. if ((ret = ff_alloc_packet2(avctx, avpkt, s->frame_size * 5 + 1000, 0)) < 0)
  595. return ret;
  596. ff_init_range_encoder(&c, avpkt->data, avpkt->size);
  597. ff_build_rac_states(&c, 0.05*(1LL<<32), 256-8);
  598. memset(state, 128, sizeof(state));
  599. // short -> internal
  600. for (i = 0; i < s->frame_size; i++)
  601. s->int_samples[i] = samples[i];
  602. if (!s->lossless)
  603. for (i = 0; i < s->frame_size; i++)
  604. s->int_samples[i] = s->int_samples[i] << SAMPLE_SHIFT;
  605. switch(s->decorrelation)
  606. {
  607. case MID_SIDE:
  608. for (i = 0; i < s->frame_size; i += s->channels)
  609. {
  610. s->int_samples[i] += s->int_samples[i+1];
  611. s->int_samples[i+1] -= shift(s->int_samples[i], 1);
  612. }
  613. break;
  614. case LEFT_SIDE:
  615. for (i = 0; i < s->frame_size; i += s->channels)
  616. s->int_samples[i+1] -= s->int_samples[i];
  617. break;
  618. case RIGHT_SIDE:
  619. for (i = 0; i < s->frame_size; i += s->channels)
  620. s->int_samples[i] -= s->int_samples[i+1];
  621. break;
  622. }
  623. memset(s->window, 0, 4* s->window_size);
  624. for (i = 0; i < s->tail_size; i++)
  625. s->window[x++] = s->tail[i];
  626. for (i = 0; i < s->frame_size; i++)
  627. s->window[x++] = s->int_samples[i];
  628. for (i = 0; i < s->tail_size; i++)
  629. s->window[x++] = 0;
  630. for (i = 0; i < s->tail_size; i++)
  631. s->tail[i] = s->int_samples[s->frame_size - s->tail_size + i];
  632. // generate taps
  633. ret = modified_levinson_durbin(s->window, s->window_size,
  634. s->predictor_k, s->num_taps, s->channels, s->tap_quant);
  635. if (ret < 0)
  636. return ret;
  637. if ((ret = intlist_write(&c, state, s->predictor_k, s->num_taps, 0)) < 0)
  638. return ret;
  639. for (ch = 0; ch < s->channels; ch++)
  640. {
  641. x = s->tail_size+ch;
  642. for (i = 0; i < s->block_align; i++)
  643. {
  644. int sum = 0;
  645. for (j = 0; j < s->downsampling; j++, x += s->channels)
  646. sum += s->window[x];
  647. s->coded_samples[ch][i] = sum;
  648. }
  649. }
  650. // simple rate control code
  651. if (!s->lossless)
  652. {
  653. double energy1 = 0.0, energy2 = 0.0;
  654. for (ch = 0; ch < s->channels; ch++)
  655. {
  656. for (i = 0; i < s->block_align; i++)
  657. {
  658. double sample = s->coded_samples[ch][i];
  659. energy2 += sample*sample;
  660. energy1 += fabs(sample);
  661. }
  662. }
  663. energy2 = sqrt(energy2/(s->channels*s->block_align));
  664. energy1 = M_SQRT2*energy1/(s->channels*s->block_align);
  665. // increase bitrate when samples are like a gaussian distribution
  666. // reduce bitrate when samples are like a two-tailed exponential distribution
  667. if (energy2 > energy1)
  668. energy2 += (energy2-energy1)*RATE_VARIATION;
  669. quant = (int)(BASE_QUANT*s->quantization*energy2/SAMPLE_FACTOR);
  670. // av_log(avctx, AV_LOG_DEBUG, "quant: %d energy: %f / %f\n", quant, energy1, energy2);
  671. quant = av_clip(quant, 1, 65534);
  672. put_symbol(&c, state, quant, 0, NULL, NULL);
  673. quant *= SAMPLE_FACTOR;
  674. }
  675. // write out coded samples
  676. for (ch = 0; ch < s->channels; ch++)
  677. {
  678. if (!s->lossless)
  679. for (i = 0; i < s->block_align; i++)
  680. s->coded_samples[ch][i] = ROUNDED_DIV(s->coded_samples[ch][i], quant);
  681. if ((ret = intlist_write(&c, state, s->coded_samples[ch], s->block_align, 1)) < 0)
  682. return ret;
  683. }
  684. // av_log(avctx, AV_LOG_DEBUG, "used bytes: %d\n", (put_bits_count(&pb)+7)/8);
  685. avpkt->size = ff_rac_terminate(&c, 0);
  686. *got_packet_ptr = 1;
  687. return 0;
  688. }
  689. #endif /* CONFIG_SONIC_ENCODER || CONFIG_SONIC_LS_ENCODER */
  690. #if CONFIG_SONIC_DECODER
  691. static const int samplerate_table[] =
  692. { 44100, 22050, 11025, 96000, 48000, 32000, 24000, 16000, 8000 };
  693. static av_cold int sonic_decode_init(AVCodecContext *avctx)
  694. {
  695. SonicContext *s = avctx->priv_data;
  696. GetBitContext gb;
  697. int i;
  698. int ret;
  699. s->channels = avctx->channels;
  700. s->samplerate = avctx->sample_rate;
  701. if (!avctx->extradata)
  702. {
  703. av_log(avctx, AV_LOG_ERROR, "No mandatory headers present\n");
  704. return AVERROR_INVALIDDATA;
  705. }
  706. ret = init_get_bits8(&gb, avctx->extradata, avctx->extradata_size);
  707. if (ret < 0)
  708. return ret;
  709. s->version = get_bits(&gb, 2);
  710. if (s->version >= 2) {
  711. s->version = get_bits(&gb, 8);
  712. s->minor_version = get_bits(&gb, 8);
  713. }
  714. if (s->version != 2)
  715. {
  716. av_log(avctx, AV_LOG_ERROR, "Unsupported Sonic version, please report\n");
  717. return AVERROR_INVALIDDATA;
  718. }
  719. if (s->version >= 1)
  720. {
  721. int sample_rate_index;
  722. s->channels = get_bits(&gb, 2);
  723. sample_rate_index = get_bits(&gb, 4);
  724. if (sample_rate_index >= FF_ARRAY_ELEMS(samplerate_table)) {
  725. av_log(avctx, AV_LOG_ERROR, "Invalid sample_rate_index %d\n", sample_rate_index);
  726. return AVERROR_INVALIDDATA;
  727. }
  728. s->samplerate = samplerate_table[sample_rate_index];
  729. av_log(avctx, AV_LOG_INFO, "Sonicv2 chans: %d samprate: %d\n",
  730. s->channels, s->samplerate);
  731. }
  732. if (s->channels > MAX_CHANNELS || s->channels < 1)
  733. {
  734. av_log(avctx, AV_LOG_ERROR, "Only mono and stereo streams are supported by now\n");
  735. return AVERROR_INVALIDDATA;
  736. }
  737. avctx->channels = s->channels;
  738. s->lossless = get_bits1(&gb);
  739. if (!s->lossless)
  740. skip_bits(&gb, 3); // XXX FIXME
  741. s->decorrelation = get_bits(&gb, 2);
  742. if (s->decorrelation != 3 && s->channels != 2) {
  743. av_log(avctx, AV_LOG_ERROR, "invalid decorrelation %d\n", s->decorrelation);
  744. return AVERROR_INVALIDDATA;
  745. }
  746. s->downsampling = get_bits(&gb, 2);
  747. if (!s->downsampling) {
  748. av_log(avctx, AV_LOG_ERROR, "invalid downsampling value\n");
  749. return AVERROR_INVALIDDATA;
  750. }
  751. s->num_taps = (get_bits(&gb, 5)+1)<<5;
  752. if (get_bits1(&gb)) // XXX FIXME
  753. av_log(avctx, AV_LOG_INFO, "Custom quant table\n");
  754. s->block_align = 2048LL*s->samplerate/(44100*s->downsampling);
  755. s->frame_size = s->channels*s->block_align*s->downsampling;
  756. // avctx->frame_size = s->block_align;
  757. if (s->num_taps * s->channels > s->frame_size) {
  758. av_log(avctx, AV_LOG_ERROR,
  759. "number of taps times channels (%d * %d) larger than frame size %d\n",
  760. s->num_taps, s->channels, s->frame_size);
  761. return AVERROR_INVALIDDATA;
  762. }
  763. av_log(avctx, AV_LOG_INFO, "Sonic: ver: %d.%d ls: %d dr: %d taps: %d block: %d frame: %d downsamp: %d\n",
  764. s->version, s->minor_version, s->lossless, s->decorrelation, s->num_taps, s->block_align, s->frame_size, s->downsampling);
  765. // generate taps
  766. s->tap_quant = av_calloc(s->num_taps, sizeof(*s->tap_quant));
  767. if (!s->tap_quant)
  768. return AVERROR(ENOMEM);
  769. for (i = 0; i < s->num_taps; i++)
  770. s->tap_quant[i] = ff_sqrt(i+1);
  771. s->predictor_k = av_calloc(s->num_taps, sizeof(*s->predictor_k));
  772. for (i = 0; i < s->channels; i++)
  773. {
  774. s->predictor_state[i] = av_calloc(s->num_taps, sizeof(**s->predictor_state));
  775. if (!s->predictor_state[i])
  776. return AVERROR(ENOMEM);
  777. }
  778. for (i = 0; i < s->channels; i++)
  779. {
  780. s->coded_samples[i] = av_calloc(s->block_align, sizeof(**s->coded_samples));
  781. if (!s->coded_samples[i])
  782. return AVERROR(ENOMEM);
  783. }
  784. s->int_samples = av_calloc(s->frame_size, sizeof(*s->int_samples));
  785. if (!s->int_samples)
  786. return AVERROR(ENOMEM);
  787. avctx->sample_fmt = AV_SAMPLE_FMT_S16;
  788. return 0;
  789. }
  790. static av_cold int sonic_decode_close(AVCodecContext *avctx)
  791. {
  792. SonicContext *s = avctx->priv_data;
  793. int i;
  794. av_freep(&s->int_samples);
  795. av_freep(&s->tap_quant);
  796. av_freep(&s->predictor_k);
  797. for (i = 0; i < MAX_CHANNELS; i++) {
  798. av_freep(&s->predictor_state[i]);
  799. av_freep(&s->coded_samples[i]);
  800. }
  801. return 0;
  802. }
  803. static int sonic_decode_frame(AVCodecContext *avctx,
  804. void *data, int *got_frame_ptr,
  805. AVPacket *avpkt)
  806. {
  807. const uint8_t *buf = avpkt->data;
  808. int buf_size = avpkt->size;
  809. SonicContext *s = avctx->priv_data;
  810. RangeCoder c;
  811. uint8_t state[32];
  812. int i, quant, ch, j, ret;
  813. int16_t *samples;
  814. AVFrame *frame = data;
  815. if (buf_size == 0) return 0;
  816. frame->nb_samples = s->frame_size / avctx->channels;
  817. if ((ret = ff_get_buffer(avctx, frame, 0)) < 0)
  818. return ret;
  819. samples = (int16_t *)frame->data[0];
  820. // av_log(NULL, AV_LOG_INFO, "buf_size: %d\n", buf_size);
  821. memset(state, 128, sizeof(state));
  822. ff_init_range_decoder(&c, buf, buf_size);
  823. ff_build_rac_states(&c, 0.05*(1LL<<32), 256-8);
  824. intlist_read(&c, state, s->predictor_k, s->num_taps, 0);
  825. // dequantize
  826. for (i = 0; i < s->num_taps; i++)
  827. s->predictor_k[i] *= s->tap_quant[i];
  828. if (s->lossless)
  829. quant = 1;
  830. else
  831. quant = get_symbol(&c, state, 0) * SAMPLE_FACTOR;
  832. // av_log(NULL, AV_LOG_INFO, "quant: %d\n", quant);
  833. for (ch = 0; ch < s->channels; ch++)
  834. {
  835. int x = ch;
  836. predictor_init_state(s->predictor_k, s->predictor_state[ch], s->num_taps);
  837. intlist_read(&c, state, s->coded_samples[ch], s->block_align, 1);
  838. for (i = 0; i < s->block_align; i++)
  839. {
  840. for (j = 0; j < s->downsampling - 1; j++)
  841. {
  842. s->int_samples[x] = predictor_calc_error(s->predictor_k, s->predictor_state[ch], s->num_taps, 0);
  843. x += s->channels;
  844. }
  845. s->int_samples[x] = predictor_calc_error(s->predictor_k, s->predictor_state[ch], s->num_taps, s->coded_samples[ch][i] * (unsigned)quant);
  846. x += s->channels;
  847. }
  848. for (i = 0; i < s->num_taps; i++)
  849. s->predictor_state[ch][i] = s->int_samples[s->frame_size - s->channels + ch - i*s->channels];
  850. }
  851. switch(s->decorrelation)
  852. {
  853. case MID_SIDE:
  854. for (i = 0; i < s->frame_size; i += s->channels)
  855. {
  856. s->int_samples[i+1] += shift(s->int_samples[i], 1);
  857. s->int_samples[i] -= s->int_samples[i+1];
  858. }
  859. break;
  860. case LEFT_SIDE:
  861. for (i = 0; i < s->frame_size; i += s->channels)
  862. s->int_samples[i+1] += s->int_samples[i];
  863. break;
  864. case RIGHT_SIDE:
  865. for (i = 0; i < s->frame_size; i += s->channels)
  866. s->int_samples[i] += s->int_samples[i+1];
  867. break;
  868. }
  869. if (!s->lossless)
  870. for (i = 0; i < s->frame_size; i++)
  871. s->int_samples[i] = shift(s->int_samples[i], SAMPLE_SHIFT);
  872. // internal -> short
  873. for (i = 0; i < s->frame_size; i++)
  874. samples[i] = av_clip_int16(s->int_samples[i]);
  875. *got_frame_ptr = 1;
  876. return buf_size;
  877. }
  878. AVCodec ff_sonic_decoder = {
  879. .name = "sonic",
  880. .long_name = NULL_IF_CONFIG_SMALL("Sonic"),
  881. .type = AVMEDIA_TYPE_AUDIO,
  882. .id = AV_CODEC_ID_SONIC,
  883. .priv_data_size = sizeof(SonicContext),
  884. .init = sonic_decode_init,
  885. .close = sonic_decode_close,
  886. .decode = sonic_decode_frame,
  887. .capabilities = AV_CODEC_CAP_DR1 | AV_CODEC_CAP_EXPERIMENTAL,
  888. };
  889. #endif /* CONFIG_SONIC_DECODER */
  890. #if CONFIG_SONIC_ENCODER
  891. AVCodec ff_sonic_encoder = {
  892. .name = "sonic",
  893. .long_name = NULL_IF_CONFIG_SMALL("Sonic"),
  894. .type = AVMEDIA_TYPE_AUDIO,
  895. .id = AV_CODEC_ID_SONIC,
  896. .priv_data_size = sizeof(SonicContext),
  897. .init = sonic_encode_init,
  898. .encode2 = sonic_encode_frame,
  899. .sample_fmts = (const enum AVSampleFormat[]){ AV_SAMPLE_FMT_S16, AV_SAMPLE_FMT_NONE },
  900. .capabilities = AV_CODEC_CAP_EXPERIMENTAL,
  901. .close = sonic_encode_close,
  902. };
  903. #endif
  904. #if CONFIG_SONIC_LS_ENCODER
  905. AVCodec ff_sonic_ls_encoder = {
  906. .name = "sonicls",
  907. .long_name = NULL_IF_CONFIG_SMALL("Sonic lossless"),
  908. .type = AVMEDIA_TYPE_AUDIO,
  909. .id = AV_CODEC_ID_SONIC_LS,
  910. .priv_data_size = sizeof(SonicContext),
  911. .init = sonic_encode_init,
  912. .encode2 = sonic_encode_frame,
  913. .sample_fmts = (const enum AVSampleFormat[]){ AV_SAMPLE_FMT_S16, AV_SAMPLE_FMT_NONE },
  914. .capabilities = AV_CODEC_CAP_EXPERIMENTAL,
  915. .close = sonic_encode_close,
  916. };
  917. #endif