You can not select more than 25 topics Topics must start with a letter or number, can include dashes ('-') and can be up to 35 characters long.

713 lines
18KB

  1. /*
  2. * AMR Audio decoder stub
  3. * Copyright (c) 2003 the ffmpeg project
  4. *
  5. * This file is part of FFmpeg.
  6. *
  7. * FFmpeg is free software; you can redistribute it and/or
  8. * modify it under the terms of the GNU Lesser General Public
  9. * License as published by the Free Software Foundation; either
  10. * version 2.1 of the License, or (at your option) any later version.
  11. *
  12. * FFmpeg is distributed in the hope that it will be useful,
  13. * but WITHOUT ANY WARRANTY; without even the implied warranty of
  14. * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
  15. * Lesser General Public License for more details.
  16. *
  17. * You should have received a copy of the GNU Lesser General Public
  18. * License along with FFmpeg; if not, write to the Free Software
  19. * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
  20. */
  21. /** @file
  22. * Adaptive Multi-Rate (AMR) Audio decoder stub.
  23. *
  24. * This code implements both an AMR-NarrowBand (AMR-NB) and an AMR-WideBand
  25. * (AMR-WB) audio encoder/decoder through external reference code from
  26. * http://www.3gpp.org/. The license of the code from 3gpp is unclear so you
  27. * have to download the code separately. Two versions exists: One fixed-point
  28. * and one floating-point. For some reason the float encoder is significantly
  29. * faster at least on a P4 1.5GHz (0.9s instead of 9.9s on a 30s audio clip
  30. * at MR102). Both float and fixed point are supported for AMR-NB, but only
  31. * float for AMR-WB.
  32. *
  33. * \section AMR-NB
  34. *
  35. * \subsection Float
  36. * The float version (default) can be downloaded from:
  37. * http://www.3gpp.org/ftp/Specs/archive/26_series/26.104/26104-610.zip
  38. *
  39. * \subsection Fixed-point
  40. * The fixed-point (TS26.073) can be downloaded from:
  41. * http://www.3gpp.org/ftp/Specs/archive/26_series/26.073/26073-600.zip
  42. *
  43. * \subsection Specification
  44. * The specification for AMR-NB can be found in TS 26.071
  45. * (http://www.3gpp.org/ftp/Specs/html-info/26071.htm) and some other
  46. * info at http://www.3gpp.org/ftp/Specs/html-info/26-series.htm.
  47. *
  48. * \section AMR-WB
  49. *
  50. * \subsection Float
  51. * The reference code can be downloaded from:
  52. * http://www.3gpp.org/ftp/Specs/archive/26_series/26.204/26204-600.zip
  53. *
  54. * \subsection Fixed-point
  55. * If someone wants to use the fixed point version it can be downloaded from:
  56. * http://www.3gpp.org/ftp/Specs/archive/26_series/26.173/26173-571.zip.
  57. *
  58. * \subsection Specification
  59. * The specification for AMR-WB can be found in TS 26.171
  60. * (http://www.3gpp.org/ftp/Specs/html-info/26171.htm) and some other
  61. * info at http://www.3gpp.org/ftp/Specs/html-info/26-series.htm.
  62. *
  63. */
  64. #include "avcodec.h"
  65. #ifdef CONFIG_LIBAMR_NB_FIXED
  66. #define MMS_IO
  67. #include "amr/sp_dec.h"
  68. #include "amr/d_homing.h"
  69. #include "amr/typedef.h"
  70. #include "amr/sp_enc.h"
  71. #include "amr/sid_sync.h"
  72. #include "amr/e_homing.h"
  73. #else
  74. #include <amrnb/interf_dec.h>
  75. #include <amrnb/interf_enc.h>
  76. #endif
  77. static const char *nb_bitrate_unsupported =
  78. "bitrate not supported: use one of 4.75k, 5.15k, 5.9k, 6.7k, 7.4k, 7.95k, 10.2k or 12.2k\n";
  79. static const char *wb_bitrate_unsupported =
  80. "bitrate not supported: use one of 6.6k, 8.85k, 12.65k, 14.25k, 15.85k, 18.25k, 19.85k, 23.05k, or 23.85k\n";
  81. /* Common code for fixed and float version*/
  82. typedef struct AMR_bitrates
  83. {
  84. int rate;
  85. enum Mode mode;
  86. } AMR_bitrates;
  87. /* Match desired bitrate */
  88. static int getBitrateMode(int bitrate)
  89. {
  90. /* make the correspondance between bitrate and mode */
  91. AMR_bitrates rates[]={ {4750,MR475},
  92. {5150,MR515},
  93. {5900,MR59},
  94. {6700,MR67},
  95. {7400,MR74},
  96. {7950,MR795},
  97. {10200,MR102},
  98. {12200,MR122},
  99. };
  100. int i;
  101. for(i=0;i<8;i++)
  102. {
  103. if(rates[i].rate==bitrate)
  104. {
  105. return(rates[i].mode);
  106. }
  107. }
  108. /* no bitrate matching, return an error */
  109. return -1;
  110. }
  111. static void amr_decode_fix_avctx(AVCodecContext * avctx)
  112. {
  113. const int is_amr_wb = 1 + (avctx->codec_id == CODEC_ID_AMR_WB);
  114. if(avctx->sample_rate == 0)
  115. {
  116. avctx->sample_rate = 8000 * is_amr_wb;
  117. }
  118. if(avctx->channels == 0)
  119. {
  120. avctx->channels = 1;
  121. }
  122. avctx->frame_size = 160 * is_amr_wb;
  123. }
  124. #ifdef CONFIG_LIBAMR_NB_FIXED
  125. /* fixed point version*/
  126. /* frame size in serial bitstream file (frame type + serial stream + flags) */
  127. #define SERIAL_FRAMESIZE (1+MAX_SERIAL_SIZE+5)
  128. typedef struct AMRContext {
  129. int frameCount;
  130. Speech_Decode_FrameState *speech_decoder_state;
  131. enum RXFrameType rx_type;
  132. enum Mode mode;
  133. Word16 reset_flag;
  134. Word16 reset_flag_old;
  135. int enc_bitrate;
  136. Speech_Encode_FrameState *enstate;
  137. sid_syncState *sidstate;
  138. enum TXFrameType tx_frametype;
  139. } AMRContext;
  140. static int amr_nb_decode_init(AVCodecContext * avctx)
  141. {
  142. AMRContext *s = avctx->priv_data;
  143. s->frameCount=0;
  144. s->speech_decoder_state=NULL;
  145. s->rx_type = (enum RXFrameType)0;
  146. s->mode= (enum Mode)0;
  147. s->reset_flag=0;
  148. s->reset_flag_old=1;
  149. if(Speech_Decode_Frame_init(&s->speech_decoder_state, "Decoder"))
  150. {
  151. av_log(avctx, AV_LOG_ERROR, "Speech_Decode_Frame_init error\n");
  152. return -1;
  153. }
  154. amr_decode_fix_avctx(avctx);
  155. if(avctx->channels > 1)
  156. {
  157. av_log(avctx, AV_LOG_ERROR, "amr_nb: multichannel decoding not supported\n");
  158. return -1;
  159. }
  160. return 0;
  161. }
  162. static int amr_nb_encode_init(AVCodecContext * avctx)
  163. {
  164. AMRContext *s = avctx->priv_data;
  165. s->frameCount=0;
  166. s->speech_decoder_state=NULL;
  167. s->rx_type = (enum RXFrameType)0;
  168. s->mode= (enum Mode)0;
  169. s->reset_flag=0;
  170. s->reset_flag_old=1;
  171. if(avctx->sample_rate!=8000)
  172. {
  173. av_log(avctx, AV_LOG_ERROR, "Only 8000Hz sample rate supported\n");
  174. return -1;
  175. }
  176. if(avctx->channels!=1)
  177. {
  178. av_log(avctx, AV_LOG_ERROR, "Only mono supported\n");
  179. return -1;
  180. }
  181. avctx->frame_size=160;
  182. avctx->coded_frame= avcodec_alloc_frame();
  183. if(Speech_Encode_Frame_init(&s->enstate, 0, "encoder") || sid_sync_init (&s->sidstate))
  184. {
  185. av_log(avctx, AV_LOG_ERROR, "Speech_Encode_Frame_init error\n");
  186. return -1;
  187. }
  188. if((s->enc_bitrate=getBitrateMode(avctx->bit_rate))<0)
  189. {
  190. av_log(avctx, AV_LOG_ERROR, nb_bitrate_unsupported);
  191. return -1;
  192. }
  193. return 0;
  194. }
  195. static int amr_nb_encode_close(AVCodecContext * avctx)
  196. {
  197. AMRContext *s = avctx->priv_data;
  198. Speech_Encode_Frame_exit(&s->enstate);
  199. sid_sync_exit (&s->sidstate);
  200. av_freep(&avctx->coded_frame);
  201. return 0;
  202. }
  203. static int amr_nb_decode_close(AVCodecContext * avctx)
  204. {
  205. AMRContext *s = avctx->priv_data;
  206. Speech_Decode_Frame_exit(&s->speech_decoder_state);
  207. return 0;
  208. }
  209. static int amr_nb_decode_frame(AVCodecContext * avctx,
  210. void *data, int *data_size,
  211. uint8_t * buf, int buf_size)
  212. {
  213. AMRContext *s = avctx->priv_data;
  214. uint8_t*amrData=buf;
  215. int offset=0;
  216. UWord8 toc, q, ft;
  217. Word16 serial[SERIAL_FRAMESIZE]; /* coded bits */
  218. Word16 *synth;
  219. UWord8 *packed_bits;
  220. static Word16 packed_size[16] = {12, 13, 15, 17, 19, 20, 26, 31, 5, 0, 0, 0, 0, 0, 0, 0};
  221. int i;
  222. //printf("amr_decode_frame data_size=%i buf=0x%X buf_size=%d frameCount=%d!!\n",*data_size,buf,buf_size,s->frameCount);
  223. synth=data;
  224. toc=amrData[offset];
  225. /* read rest of the frame based on ToC byte */
  226. q = (toc >> 2) & 0x01;
  227. ft = (toc >> 3) & 0x0F;
  228. //printf("offset=%d, packet_size=%d amrData= 0x%X %X %X %X\n",offset,packed_size[ft],amrData[offset],amrData[offset+1],amrData[offset+2],amrData[offset+3]);
  229. offset++;
  230. packed_bits=amrData+offset;
  231. offset+=packed_size[ft];
  232. //Unsort and unpack bits
  233. s->rx_type = UnpackBits(q, ft, packed_bits, &s->mode, &serial[1]);
  234. //We have a new frame
  235. s->frameCount++;
  236. if (s->rx_type == RX_NO_DATA)
  237. {
  238. s->mode = s->speech_decoder_state->prev_mode;
  239. }
  240. else {
  241. s->speech_decoder_state->prev_mode = s->mode;
  242. }
  243. /* if homed: check if this frame is another homing frame */
  244. if (s->reset_flag_old == 1)
  245. {
  246. /* only check until end of first subframe */
  247. s->reset_flag = decoder_homing_frame_test_first(&serial[1], s->mode);
  248. }
  249. /* produce encoder homing frame if homed & input=decoder homing frame */
  250. if ((s->reset_flag != 0) && (s->reset_flag_old != 0))
  251. {
  252. for (i = 0; i < L_FRAME; i++)
  253. {
  254. synth[i] = EHF_MASK;
  255. }
  256. }
  257. else
  258. {
  259. /* decode frame */
  260. Speech_Decode_Frame(s->speech_decoder_state, s->mode, &serial[1], s->rx_type, synth);
  261. }
  262. //Each AMR-frame results in 160 16-bit samples
  263. *data_size=160*2;
  264. /* if not homed: check whether current frame is a homing frame */
  265. if (s->reset_flag_old == 0)
  266. {
  267. /* check whole frame */
  268. s->reset_flag = decoder_homing_frame_test(&serial[1], s->mode);
  269. }
  270. /* reset decoder if current frame is a homing frame */
  271. if (s->reset_flag != 0)
  272. {
  273. Speech_Decode_Frame_reset(s->speech_decoder_state);
  274. }
  275. s->reset_flag_old = s->reset_flag;
  276. return offset;
  277. }
  278. static int amr_nb_encode_frame(AVCodecContext *avctx,
  279. unsigned char *frame/*out*/, int buf_size, void *data/*in*/)
  280. {
  281. short serial_data[250] = {0};
  282. AMRContext *s = avctx->priv_data;
  283. int written;
  284. s->reset_flag = encoder_homing_frame_test(data);
  285. Speech_Encode_Frame(s->enstate, s->enc_bitrate, data, &serial_data[1], &s->mode);
  286. /* add frame type and mode */
  287. sid_sync (s->sidstate, s->mode, &s->tx_frametype);
  288. written = PackBits(s->mode, s->enc_bitrate, s->tx_frametype, &serial_data[1], frame);
  289. if (s->reset_flag != 0)
  290. {
  291. Speech_Encode_Frame_reset(s->enstate);
  292. sid_sync_reset(s->sidstate);
  293. }
  294. return written;
  295. }
  296. #elif defined(CONFIG_LIBAMR_NB) /* Float point version*/
  297. typedef struct AMRContext {
  298. int frameCount;
  299. void * decState;
  300. int *enstate;
  301. int enc_bitrate;
  302. } AMRContext;
  303. static int amr_nb_decode_init(AVCodecContext * avctx)
  304. {
  305. AMRContext *s = avctx->priv_data;
  306. s->frameCount=0;
  307. s->decState=Decoder_Interface_init();
  308. if(!s->decState)
  309. {
  310. av_log(avctx, AV_LOG_ERROR, "Decoder_Interface_init error\r\n");
  311. return -1;
  312. }
  313. amr_decode_fix_avctx(avctx);
  314. if(avctx->channels > 1)
  315. {
  316. av_log(avctx, AV_LOG_ERROR, "amr_nb: multichannel decoding not supported\n");
  317. return -1;
  318. }
  319. return 0;
  320. }
  321. static int amr_nb_encode_init(AVCodecContext * avctx)
  322. {
  323. AMRContext *s = avctx->priv_data;
  324. s->frameCount=0;
  325. if(avctx->sample_rate!=8000)
  326. {
  327. av_log(avctx, AV_LOG_ERROR, "Only 8000Hz sample rate supported\n");
  328. return -1;
  329. }
  330. if(avctx->channels!=1)
  331. {
  332. av_log(avctx, AV_LOG_ERROR, "Only mono supported\n");
  333. return -1;
  334. }
  335. avctx->frame_size=160;
  336. avctx->coded_frame= avcodec_alloc_frame();
  337. s->enstate=Encoder_Interface_init(0);
  338. if(!s->enstate)
  339. {
  340. av_log(avctx, AV_LOG_ERROR, "Encoder_Interface_init error\n");
  341. return -1;
  342. }
  343. if((s->enc_bitrate=getBitrateMode(avctx->bit_rate))<0)
  344. {
  345. av_log(avctx, AV_LOG_ERROR, nb_bitrate_unsupported);
  346. return -1;
  347. }
  348. return 0;
  349. }
  350. static int amr_nb_decode_close(AVCodecContext * avctx)
  351. {
  352. AMRContext *s = avctx->priv_data;
  353. Decoder_Interface_exit(s->decState);
  354. return 0;
  355. }
  356. static int amr_nb_encode_close(AVCodecContext * avctx)
  357. {
  358. AMRContext *s = avctx->priv_data;
  359. Encoder_Interface_exit(s->enstate);
  360. av_freep(&avctx->coded_frame);
  361. return 0;
  362. }
  363. static int amr_nb_decode_frame(AVCodecContext * avctx,
  364. void *data, int *data_size,
  365. uint8_t * buf, int buf_size)
  366. {
  367. AMRContext *s = avctx->priv_data;
  368. uint8_t*amrData=buf;
  369. static const uint8_t block_size[16]={ 12, 13, 15, 17, 19, 20, 26, 31, 5, 0, 0, 0, 0, 0, 0, 0 };
  370. enum Mode dec_mode;
  371. int packet_size;
  372. /* av_log(NULL,AV_LOG_DEBUG,"amr_decode_frame buf=%p buf_size=%d frameCount=%d!!\n",buf,buf_size,s->frameCount); */
  373. dec_mode = (buf[0] >> 3) & 0x000F;
  374. packet_size = block_size[dec_mode]+1;
  375. if(packet_size > buf_size) {
  376. av_log(avctx, AV_LOG_ERROR, "amr frame too short (%u, should be %u)\n", buf_size, packet_size);
  377. return -1;
  378. }
  379. s->frameCount++;
  380. /* av_log(NULL,AV_LOG_DEBUG,"packet_size=%d amrData= 0x%X %X %X %X\n",packet_size,amrData[0],amrData[1],amrData[2],amrData[3]); */
  381. /* call decoder */
  382. Decoder_Interface_Decode(s->decState, amrData, data, 0);
  383. *data_size=160*2;
  384. return packet_size;
  385. }
  386. static int amr_nb_encode_frame(AVCodecContext *avctx,
  387. unsigned char *frame/*out*/, int buf_size, void *data/*in*/)
  388. {
  389. AMRContext *s = avctx->priv_data;
  390. int written;
  391. if((s->enc_bitrate=getBitrateMode(avctx->bit_rate))<0)
  392. {
  393. av_log(avctx, AV_LOG_ERROR, nb_bitrate_unsupported);
  394. return -1;
  395. }
  396. written = Encoder_Interface_Encode(s->enstate,
  397. s->enc_bitrate,
  398. data,
  399. frame,
  400. 0);
  401. /* av_log(NULL,AV_LOG_DEBUG,"amr_nb_encode_frame encoded %u bytes, bitrate %u, first byte was %#02x\n",written, s->enc_bitrate, frame[0] ); */
  402. return written;
  403. }
  404. #endif
  405. #if defined(CONFIG_LIBAMR_NB) || defined(CONFIG_LIBAMR_NB_FIXED)
  406. AVCodec libamr_nb_decoder =
  407. {
  408. "libamr_nb",
  409. CODEC_TYPE_AUDIO,
  410. CODEC_ID_AMR_NB,
  411. sizeof(AMRContext),
  412. amr_nb_decode_init,
  413. NULL,
  414. amr_nb_decode_close,
  415. amr_nb_decode_frame,
  416. };
  417. AVCodec libamr_nb_encoder =
  418. {
  419. "libamr_nb",
  420. CODEC_TYPE_AUDIO,
  421. CODEC_ID_AMR_NB,
  422. sizeof(AMRContext),
  423. amr_nb_encode_init,
  424. amr_nb_encode_frame,
  425. amr_nb_encode_close,
  426. NULL,
  427. };
  428. #endif
  429. /* -----------AMR wideband ------------*/
  430. #ifdef CONFIG_LIBAMR_WB
  431. #ifdef _TYPEDEF_H
  432. //To avoid duplicate typedefs from typedef in amr-nb
  433. #define typedef_h
  434. #endif
  435. #include <amrwb/enc_if.h>
  436. #include <amrwb/dec_if.h>
  437. #include <amrwb/if_rom.h>
  438. /* Common code for fixed and float version*/
  439. typedef struct AMRWB_bitrates
  440. {
  441. int rate;
  442. int mode;
  443. } AMRWB_bitrates;
  444. static int getWBBitrateMode(int bitrate)
  445. {
  446. /* make the correspondance between bitrate and mode */
  447. AMRWB_bitrates rates[]={ {6600,0},
  448. {8850,1},
  449. {12650,2},
  450. {14250,3},
  451. {15850,4},
  452. {18250,5},
  453. {19850,6},
  454. {23050,7},
  455. {23850,8},
  456. };
  457. int i;
  458. for(i=0;i<9;i++)
  459. {
  460. if(rates[i].rate==bitrate)
  461. {
  462. return(rates[i].mode);
  463. }
  464. }
  465. /* no bitrate matching, return an error */
  466. return -1;
  467. }
  468. typedef struct AMRWBContext {
  469. int frameCount;
  470. void *state;
  471. int mode;
  472. Word16 allow_dtx;
  473. } AMRWBContext;
  474. static int amr_wb_encode_init(AVCodecContext * avctx)
  475. {
  476. AMRWBContext *s = avctx->priv_data;
  477. s->frameCount=0;
  478. if(avctx->sample_rate!=16000)
  479. {
  480. av_log(avctx, AV_LOG_ERROR, "Only 16000Hz sample rate supported\n");
  481. return -1;
  482. }
  483. if(avctx->channels!=1)
  484. {
  485. av_log(avctx, AV_LOG_ERROR, "Only mono supported\n");
  486. return -1;
  487. }
  488. if((s->mode=getWBBitrateMode(avctx->bit_rate))<0)
  489. {
  490. av_log(avctx, AV_LOG_ERROR, wb_bitrate_unsupported);
  491. return -1;
  492. }
  493. avctx->frame_size=320;
  494. avctx->coded_frame= avcodec_alloc_frame();
  495. s->state = E_IF_init();
  496. s->allow_dtx=0;
  497. return 0;
  498. }
  499. static int amr_wb_encode_close(AVCodecContext * avctx)
  500. {
  501. AMRWBContext *s = avctx->priv_data;
  502. E_IF_exit(s->state);
  503. av_freep(&avctx->coded_frame);
  504. s->frameCount++;
  505. return 0;
  506. }
  507. static int amr_wb_encode_frame(AVCodecContext *avctx,
  508. unsigned char *frame/*out*/, int buf_size, void *data/*in*/)
  509. {
  510. AMRWBContext *s = avctx->priv_data;
  511. int size;
  512. if((s->mode=getWBBitrateMode(avctx->bit_rate))<0)
  513. {
  514. av_log(avctx, AV_LOG_ERROR, wb_bitrate_unsupported);
  515. return -1;
  516. }
  517. size = E_IF_encode(s->state, s->mode, data, frame, s->allow_dtx);
  518. return size;
  519. }
  520. static int amr_wb_decode_init(AVCodecContext * avctx)
  521. {
  522. AMRWBContext *s = avctx->priv_data;
  523. s->frameCount=0;
  524. s->state = D_IF_init();
  525. amr_decode_fix_avctx(avctx);
  526. if(avctx->channels > 1)
  527. {
  528. av_log(avctx, AV_LOG_ERROR, "amr_wb: multichannel decoding not supported\n");
  529. return -1;
  530. }
  531. return 0;
  532. }
  533. static int amr_wb_decode_frame(AVCodecContext * avctx,
  534. void *data, int *data_size,
  535. uint8_t * buf, int buf_size)
  536. {
  537. AMRWBContext *s = avctx->priv_data;
  538. uint8_t*amrData=buf;
  539. int mode;
  540. int packet_size;
  541. static const uint8_t block_size[16] = {18, 23, 33, 37, 41, 47, 51, 59, 61, 6, 6, 0, 0, 0, 1, 1};
  542. if(buf_size==0) {
  543. /* nothing to do */
  544. return 0;
  545. }
  546. mode = (amrData[0] >> 3) & 0x000F;
  547. packet_size = block_size[mode];
  548. if(packet_size > buf_size) {
  549. av_log(avctx, AV_LOG_ERROR, "amr frame too short (%u, should be %u)\n", buf_size, packet_size+1);
  550. return -1;
  551. }
  552. s->frameCount++;
  553. D_IF_decode( s->state, amrData, data, _good_frame);
  554. *data_size=320*2;
  555. return packet_size;
  556. }
  557. static int amr_wb_decode_close(AVCodecContext * avctx)
  558. {
  559. AMRWBContext *s = avctx->priv_data;
  560. D_IF_exit(s->state);
  561. return 0;
  562. }
  563. AVCodec libamr_wb_decoder =
  564. {
  565. "libamr_wb",
  566. CODEC_TYPE_AUDIO,
  567. CODEC_ID_AMR_WB,
  568. sizeof(AMRWBContext),
  569. amr_wb_decode_init,
  570. NULL,
  571. amr_wb_decode_close,
  572. amr_wb_decode_frame,
  573. };
  574. AVCodec libamr_wb_encoder =
  575. {
  576. "libamr_wb",
  577. CODEC_TYPE_AUDIO,
  578. CODEC_ID_AMR_WB,
  579. sizeof(AMRWBContext),
  580. amr_wb_encode_init,
  581. amr_wb_encode_frame,
  582. amr_wb_encode_close,
  583. NULL,
  584. };
  585. #endif //CONFIG_LIBAMR_WB