You can not select more than 25 topics Topics must start with a letter or number, can include dashes ('-') and can be up to 35 characters long.

718 lines
18KB

  1. /*
  2. * AMR Audio decoder stub
  3. * Copyright (c) 2003 the ffmpeg project
  4. *
  5. * This file is part of FFmpeg.
  6. *
  7. * FFmpeg is free software; you can redistribute it and/or
  8. * modify it under the terms of the GNU Lesser General Public
  9. * License as published by the Free Software Foundation; either
  10. * version 2.1 of the License, or (at your option) any later version.
  11. *
  12. * FFmpeg is distributed in the hope that it will be useful,
  13. * but WITHOUT ANY WARRANTY; without even the implied warranty of
  14. * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
  15. * Lesser General Public License for more details.
  16. *
  17. * You should have received a copy of the GNU Lesser General Public
  18. * License along with FFmpeg; if not, write to the Free Software
  19. * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
  20. */
  21. /** @file
  22. * Adaptive Multi-Rate (AMR) Audio decoder stub.
  23. *
  24. * This code implements both an AMR-NarrowBand (AMR-NB) and an AMR-WideBand
  25. * (AMR-WB) audio encoder/decoder through external reference code from
  26. * http://www.3gpp.org/. The license of the code from 3gpp is unclear so you
  27. * have to download the code separately. Two versions exists: One fixed-point
  28. * and one with floats. For some reason the float-encoder is significant faster
  29. * at least on a P4 1.5GHz (0.9s instead of 9.9s on a 30s audio clip at MR102).
  30. * Both float and fixed point are supported for AMR-NB, but only float for
  31. * AMR-WB.
  32. *
  33. * \section AMR-NB
  34. *
  35. * \subsection Float
  36. * The float version (default) can be downloaded from:
  37. * http://www.3gpp.org/ftp/Specs/archive/26_series/26.104/26104-610.zip
  38. * Extract the source into \c "ffmpeg/libavcodec/amr_float".
  39. * Enable it by passing \c "--enable-amr-nb" to \c "./configure".
  40. *
  41. * \subsection Fixed-point
  42. * The fixed-point (TS26.073) can be downloaded from:
  43. * http://www.3gpp.org/ftp/Specs/archive/26_series/26.073/26073-510.zip.
  44. * Extract the source into \c "ffmpeg/libavcodec/amr".
  45. * Enable it by passing \c "--enable-amr-nb-fixed" to \c "./configure".
  46. *
  47. * \subsection Specification
  48. * The specification for AMR-NB can be found in TS 26.071
  49. * (http://www.3gpp.org/ftp/Specs/html-info/26071.htm) and some other
  50. * info at http://www.3gpp.org/ftp/Specs/html-info/26-series.htm.
  51. *
  52. * \section AMR-WB
  53. * \subsection Float
  54. * The reference code can be downloaded from:
  55. * http://www.3gpp.org/ftp/Specs/archive/26_series/26.204/26204-600.zip
  56. * It should be extracted to \c "ffmpeg/libavcodec/amrwb_float".
  57. * Enable it by passing \c "--enable-amr-wb" to \c "./configure".
  58. *
  59. * \subsection Fixed-point
  60. * If someone wants to use the fixed point version it can be downloaded from:
  61. * http://www.3gpp.org/ftp/Specs/archive/26_series/26.173/26173-571.zip.
  62. *
  63. * \subsection Specification
  64. * The specification for AMR-WB can be found in TS 26.171
  65. * (http://www.3gpp.org/ftp/Specs/html-info/26171.htm) and some other
  66. * info at http://www.3gpp.org/ftp/Specs/html-info/26-series.htm.
  67. *
  68. */
  69. #include "avcodec.h"
  70. #ifdef CONFIG_AMR_NB_FIXED
  71. #define MMS_IO
  72. #include "amr/sp_dec.h"
  73. #include "amr/d_homing.h"
  74. #include "amr/typedef.h"
  75. #include "amr/sp_enc.h"
  76. #include "amr/sid_sync.h"
  77. #include "amr/e_homing.h"
  78. #else
  79. #include "amr_float/interf_dec.h"
  80. #include "amr_float/interf_enc.h"
  81. #endif
  82. static const char *nb_bitrate_unsupported =
  83. "bitrate not supported: use one of 4.75k, 5.15k, 5.9k, 6.7k, 7.4k, 7.95k, 10.2k or 12.2k\n";
  84. static const char *wb_bitrate_unsupported =
  85. "bitrate not supported: use one of 6.6k, 8.85k, 12.65k, 14.25k, 15.85k, 18.25k, 19.85k, 23.05k, or 23.85k\n";
  86. /* Common code for fixed and float version*/
  87. typedef struct AMR_bitrates
  88. {
  89. int rate;
  90. enum Mode mode;
  91. } AMR_bitrates;
  92. /* Match desired bitrate */
  93. static int getBitrateMode(int bitrate)
  94. {
  95. /* make the correspondance between bitrate and mode */
  96. AMR_bitrates rates[]={ {4750,MR475},
  97. {5150,MR515},
  98. {5900,MR59},
  99. {6700,MR67},
  100. {7400,MR74},
  101. {7950,MR795},
  102. {10200,MR102},
  103. {12200,MR122},
  104. };
  105. int i;
  106. for(i=0;i<8;i++)
  107. {
  108. if(rates[i].rate==bitrate)
  109. {
  110. return(rates[i].mode);
  111. }
  112. }
  113. /* no bitrate matching, return an error */
  114. return -1;
  115. }
  116. static void amr_decode_fix_avctx(AVCodecContext * avctx)
  117. {
  118. const int is_amr_wb = 1 + (avctx->codec_id == CODEC_ID_AMR_WB);
  119. if(avctx->sample_rate == 0)
  120. {
  121. avctx->sample_rate = 8000 * is_amr_wb;
  122. }
  123. if(avctx->channels == 0)
  124. {
  125. avctx->channels = 1;
  126. }
  127. avctx->frame_size = 160 * is_amr_wb;
  128. }
  129. #ifdef CONFIG_AMR_NB_FIXED
  130. /* fixed point version*/
  131. /* frame size in serial bitstream file (frame type + serial stream + flags) */
  132. #define SERIAL_FRAMESIZE (1+MAX_SERIAL_SIZE+5)
  133. typedef struct AMRContext {
  134. int frameCount;
  135. Speech_Decode_FrameState *speech_decoder_state;
  136. enum RXFrameType rx_type;
  137. enum Mode mode;
  138. Word16 reset_flag;
  139. Word16 reset_flag_old;
  140. int enc_bitrate;
  141. Speech_Encode_FrameState *enstate;
  142. sid_syncState *sidstate;
  143. enum TXFrameType tx_frametype;
  144. } AMRContext;
  145. static int amr_nb_decode_init(AVCodecContext * avctx)
  146. {
  147. AMRContext *s = avctx->priv_data;
  148. s->frameCount=0;
  149. s->speech_decoder_state=NULL;
  150. s->rx_type = (enum RXFrameType)0;
  151. s->mode= (enum Mode)0;
  152. s->reset_flag=0;
  153. s->reset_flag_old=1;
  154. if(Speech_Decode_Frame_init(&s->speech_decoder_state, "Decoder"))
  155. {
  156. av_log(avctx, AV_LOG_ERROR, "Speech_Decode_Frame_init error\n");
  157. return -1;
  158. }
  159. amr_decode_fix_avctx(avctx);
  160. if(avctx->channels > 1)
  161. {
  162. av_log(avctx, AV_LOG_ERROR, "amr_nb: multichannel decoding not supported\n");
  163. return -1;
  164. }
  165. return 0;
  166. }
  167. static int amr_nb_encode_init(AVCodecContext * avctx)
  168. {
  169. AMRContext *s = avctx->priv_data;
  170. s->frameCount=0;
  171. s->speech_decoder_state=NULL;
  172. s->rx_type = (enum RXFrameType)0;
  173. s->mode= (enum Mode)0;
  174. s->reset_flag=0;
  175. s->reset_flag_old=1;
  176. if(avctx->sample_rate!=8000)
  177. {
  178. av_log(avctx, AV_LOG_ERROR, "Only 8000Hz sample rate supported\n");
  179. return -1;
  180. }
  181. if(avctx->channels!=1)
  182. {
  183. av_log(avctx, AV_LOG_ERROR, "Only mono supported\n");
  184. return -1;
  185. }
  186. avctx->frame_size=160;
  187. avctx->coded_frame= avcodec_alloc_frame();
  188. if(Speech_Encode_Frame_init(&s->enstate, 0, "encoder") || sid_sync_init (&s->sidstate))
  189. {
  190. av_log(avctx, AV_LOG_ERROR, "Speech_Encode_Frame_init error\n");
  191. return -1;
  192. }
  193. if((s->enc_bitrate=getBitrateMode(avctx->bit_rate))<0)
  194. {
  195. av_log(avctx, AV_LOG_ERROR, nb_bitrate_unsupported);
  196. return -1;
  197. }
  198. return 0;
  199. }
  200. static int amr_nb_encode_close(AVCodecContext * avctx)
  201. {
  202. AMRContext *s = avctx->priv_data;
  203. Speech_Encode_Frame_exit(&s->enstate);
  204. sid_sync_exit (&s->sidstate);
  205. av_freep(&avctx->coded_frame);
  206. return 0;
  207. }
  208. static int amr_nb_decode_close(AVCodecContext * avctx)
  209. {
  210. AMRContext *s = avctx->priv_data;
  211. Speech_Decode_Frame_exit(&s->speech_decoder_state);
  212. return 0;
  213. }
  214. static int amr_nb_decode_frame(AVCodecContext * avctx,
  215. void *data, int *data_size,
  216. uint8_t * buf, int buf_size)
  217. {
  218. AMRContext *s = avctx->priv_data;
  219. uint8_t*amrData=buf;
  220. int offset=0;
  221. UWord8 toc, q, ft;
  222. Word16 serial[SERIAL_FRAMESIZE]; /* coded bits */
  223. Word16 *synth;
  224. UWord8 *packed_bits;
  225. static Word16 packed_size[16] = {12, 13, 15, 17, 19, 20, 26, 31, 5, 0, 0, 0, 0, 0, 0, 0};
  226. int i;
  227. //printf("amr_decode_frame data_size=%i buf=0x%X buf_size=%d frameCount=%d!!\n",*data_size,buf,buf_size,s->frameCount);
  228. synth=data;
  229. toc=amrData[offset];
  230. /* read rest of the frame based on ToC byte */
  231. q = (toc >> 2) & 0x01;
  232. ft = (toc >> 3) & 0x0F;
  233. //printf("offset=%d, packet_size=%d amrData= 0x%X %X %X %X\n",offset,packed_size[ft],amrData[offset],amrData[offset+1],amrData[offset+2],amrData[offset+3]);
  234. offset++;
  235. packed_bits=amrData+offset;
  236. offset+=packed_size[ft];
  237. //Unsort and unpack bits
  238. s->rx_type = UnpackBits(q, ft, packed_bits, &s->mode, &serial[1]);
  239. //We have a new frame
  240. s->frameCount++;
  241. if (s->rx_type == RX_NO_DATA)
  242. {
  243. s->mode = s->speech_decoder_state->prev_mode;
  244. }
  245. else {
  246. s->speech_decoder_state->prev_mode = s->mode;
  247. }
  248. /* if homed: check if this frame is another homing frame */
  249. if (s->reset_flag_old == 1)
  250. {
  251. /* only check until end of first subframe */
  252. s->reset_flag = decoder_homing_frame_test_first(&serial[1], s->mode);
  253. }
  254. /* produce encoder homing frame if homed & input=decoder homing frame */
  255. if ((s->reset_flag != 0) && (s->reset_flag_old != 0))
  256. {
  257. for (i = 0; i < L_FRAME; i++)
  258. {
  259. synth[i] = EHF_MASK;
  260. }
  261. }
  262. else
  263. {
  264. /* decode frame */
  265. Speech_Decode_Frame(s->speech_decoder_state, s->mode, &serial[1], s->rx_type, synth);
  266. }
  267. //Each AMR-frame results in 160 16-bit samples
  268. *data_size=160*2;
  269. /* if not homed: check whether current frame is a homing frame */
  270. if (s->reset_flag_old == 0)
  271. {
  272. /* check whole frame */
  273. s->reset_flag = decoder_homing_frame_test(&serial[1], s->mode);
  274. }
  275. /* reset decoder if current frame is a homing frame */
  276. if (s->reset_flag != 0)
  277. {
  278. Speech_Decode_Frame_reset(s->speech_decoder_state);
  279. }
  280. s->reset_flag_old = s->reset_flag;
  281. return offset;
  282. }
  283. static int amr_nb_encode_frame(AVCodecContext *avctx,
  284. unsigned char *frame/*out*/, int buf_size, void *data/*in*/)
  285. {
  286. short serial_data[250] = {0};
  287. AMRContext *s = avctx->priv_data;
  288. int written;
  289. s->reset_flag = encoder_homing_frame_test(data);
  290. Speech_Encode_Frame(s->enstate, s->enc_bitrate, data, &serial_data[1], &s->mode);
  291. /* add frame type and mode */
  292. sid_sync (s->sidstate, s->mode, &s->tx_frametype);
  293. written = PackBits(s->mode, s->enc_bitrate, s->tx_frametype, &serial_data[1], frame);
  294. if (s->reset_flag != 0)
  295. {
  296. Speech_Encode_Frame_reset(s->enstate);
  297. sid_sync_reset(s->sidstate);
  298. }
  299. return written;
  300. }
  301. #elif defined(CONFIG_AMR_NB) /* Float point version*/
  302. typedef struct AMRContext {
  303. int frameCount;
  304. void * decState;
  305. int *enstate;
  306. int enc_bitrate;
  307. } AMRContext;
  308. static int amr_nb_decode_init(AVCodecContext * avctx)
  309. {
  310. AMRContext *s = avctx->priv_data;
  311. s->frameCount=0;
  312. s->decState=Decoder_Interface_init();
  313. if(!s->decState)
  314. {
  315. av_log(avctx, AV_LOG_ERROR, "Decoder_Interface_init error\r\n");
  316. return -1;
  317. }
  318. amr_decode_fix_avctx(avctx);
  319. if(avctx->channels > 1)
  320. {
  321. av_log(avctx, AV_LOG_ERROR, "amr_nb: multichannel decoding not supported\n");
  322. return -1;
  323. }
  324. return 0;
  325. }
  326. static int amr_nb_encode_init(AVCodecContext * avctx)
  327. {
  328. AMRContext *s = avctx->priv_data;
  329. s->frameCount=0;
  330. if(avctx->sample_rate!=8000)
  331. {
  332. av_log(avctx, AV_LOG_ERROR, "Only 8000Hz sample rate supported\n");
  333. return -1;
  334. }
  335. if(avctx->channels!=1)
  336. {
  337. av_log(avctx, AV_LOG_ERROR, "Only mono supported\n");
  338. return -1;
  339. }
  340. avctx->frame_size=160;
  341. avctx->coded_frame= avcodec_alloc_frame();
  342. s->enstate=Encoder_Interface_init(0);
  343. if(!s->enstate)
  344. {
  345. av_log(avctx, AV_LOG_ERROR, "Encoder_Interface_init error\n");
  346. return -1;
  347. }
  348. if((s->enc_bitrate=getBitrateMode(avctx->bit_rate))<0)
  349. {
  350. av_log(avctx, AV_LOG_ERROR, nb_bitrate_unsupported);
  351. return -1;
  352. }
  353. return 0;
  354. }
  355. static int amr_nb_decode_close(AVCodecContext * avctx)
  356. {
  357. AMRContext *s = avctx->priv_data;
  358. Decoder_Interface_exit(s->decState);
  359. return 0;
  360. }
  361. static int amr_nb_encode_close(AVCodecContext * avctx)
  362. {
  363. AMRContext *s = avctx->priv_data;
  364. Encoder_Interface_exit(s->enstate);
  365. av_freep(&avctx->coded_frame);
  366. return 0;
  367. }
  368. static int amr_nb_decode_frame(AVCodecContext * avctx,
  369. void *data, int *data_size,
  370. uint8_t * buf, int buf_size)
  371. {
  372. AMRContext *s = avctx->priv_data;
  373. uint8_t*amrData=buf;
  374. static short block_size[16]={ 12, 13, 15, 17, 19, 20, 26, 31, 5, 0, 0, 0, 0, 0, 0, 0 };
  375. enum Mode dec_mode;
  376. int packet_size;
  377. /* av_log(NULL,AV_LOG_DEBUG,"amr_decode_frame buf=%p buf_size=%d frameCount=%d!!\n",buf,buf_size,s->frameCount); */
  378. dec_mode = (buf[0] >> 3) & 0x000F;
  379. packet_size = block_size[dec_mode]+1;
  380. if(packet_size > buf_size) {
  381. av_log(avctx, AV_LOG_ERROR, "amr frame too short (%u, should be %u)\n", buf_size, packet_size);
  382. return -1;
  383. }
  384. s->frameCount++;
  385. /* av_log(NULL,AV_LOG_DEBUG,"packet_size=%d amrData= 0x%X %X %X %X\n",packet_size,amrData[0],amrData[1],amrData[2],amrData[3]); */
  386. /* call decoder */
  387. Decoder_Interface_Decode(s->decState, amrData, data, 0);
  388. *data_size=160*2;
  389. return packet_size;
  390. }
  391. static int amr_nb_encode_frame(AVCodecContext *avctx,
  392. unsigned char *frame/*out*/, int buf_size, void *data/*in*/)
  393. {
  394. AMRContext *s = avctx->priv_data;
  395. int written;
  396. if((s->enc_bitrate=getBitrateMode(avctx->bit_rate))<0)
  397. {
  398. av_log(avctx, AV_LOG_ERROR, nb_bitrate_unsupported);
  399. return -1;
  400. }
  401. written = Encoder_Interface_Encode(s->enstate,
  402. s->enc_bitrate,
  403. data,
  404. frame,
  405. 0);
  406. /* av_log(NULL,AV_LOG_DEBUG,"amr_nb_encode_frame encoded %u bytes, bitrate %u, first byte was %#02x\n",written, s->enc_bitrate, frame[0] ); */
  407. return written;
  408. }
  409. #endif
  410. #if defined(CONFIG_AMR_NB) || defined(CONFIG_AMR_NB_FIXED)
  411. AVCodec amr_nb_decoder =
  412. {
  413. "amr_nb",
  414. CODEC_TYPE_AUDIO,
  415. CODEC_ID_AMR_NB,
  416. sizeof(AMRContext),
  417. amr_nb_decode_init,
  418. NULL,
  419. amr_nb_decode_close,
  420. amr_nb_decode_frame,
  421. };
  422. AVCodec amr_nb_encoder =
  423. {
  424. "amr_nb",
  425. CODEC_TYPE_AUDIO,
  426. CODEC_ID_AMR_NB,
  427. sizeof(AMRContext),
  428. amr_nb_encode_init,
  429. amr_nb_encode_frame,
  430. amr_nb_encode_close,
  431. NULL,
  432. };
  433. #endif
  434. /* -----------AMR wideband ------------*/
  435. #ifdef CONFIG_AMR_WB
  436. #ifdef _TYPEDEF_H
  437. //To avoid duplicate typedefs from typdef in amr-nb
  438. #define typedef_h
  439. #endif
  440. #include "amrwb_float/enc_if.h"
  441. #include "amrwb_float/dec_if.h"
  442. /* Common code for fixed and float version*/
  443. typedef struct AMRWB_bitrates
  444. {
  445. int rate;
  446. int mode;
  447. } AMRWB_bitrates;
  448. static int getWBBitrateMode(int bitrate)
  449. {
  450. /* make the correspondance between bitrate and mode */
  451. AMRWB_bitrates rates[]={ {6600,0},
  452. {8850,1},
  453. {12650,2},
  454. {14250,3},
  455. {15850,4},
  456. {18250,5},
  457. {19850,6},
  458. {23050,7},
  459. {23850,8},
  460. };
  461. int i;
  462. for(i=0;i<9;i++)
  463. {
  464. if(rates[i].rate==bitrate)
  465. {
  466. return(rates[i].mode);
  467. }
  468. }
  469. /* no bitrate matching, return an error */
  470. return -1;
  471. }
  472. typedef struct AMRWBContext {
  473. int frameCount;
  474. void *state;
  475. int mode;
  476. Word16 allow_dtx;
  477. } AMRWBContext;
  478. static int amr_wb_encode_init(AVCodecContext * avctx)
  479. {
  480. AMRWBContext *s = avctx->priv_data;
  481. s->frameCount=0;
  482. if(avctx->sample_rate!=16000)
  483. {
  484. av_log(avctx, AV_LOG_ERROR, "Only 16000Hz sample rate supported\n");
  485. return -1;
  486. }
  487. if(avctx->channels!=1)
  488. {
  489. av_log(avctx, AV_LOG_ERROR, "Only mono supported\n");
  490. return -1;
  491. }
  492. if((s->mode=getWBBitrateMode(avctx->bit_rate))<0)
  493. {
  494. av_log(avctx, AV_LOG_ERROR, wb_bitrate_unsupported);
  495. return -1;
  496. }
  497. avctx->frame_size=320;
  498. avctx->coded_frame= avcodec_alloc_frame();
  499. s->state = E_IF_init();
  500. s->allow_dtx=0;
  501. return 0;
  502. }
  503. static int amr_wb_encode_close(AVCodecContext * avctx)
  504. {
  505. AMRWBContext *s = avctx->priv_data;
  506. E_IF_exit(s->state);
  507. av_freep(&avctx->coded_frame);
  508. s->frameCount++;
  509. return 0;
  510. }
  511. static int amr_wb_encode_frame(AVCodecContext *avctx,
  512. unsigned char *frame/*out*/, int buf_size, void *data/*in*/)
  513. {
  514. AMRWBContext *s = avctx->priv_data;
  515. int size;
  516. if((s->mode=getWBBitrateMode(avctx->bit_rate))<0)
  517. {
  518. av_log(avctx, AV_LOG_ERROR, wb_bitrate_unsupported);
  519. return -1;
  520. }
  521. size = E_IF_encode(s->state, s->mode, data, frame, s->allow_dtx);
  522. return size;
  523. }
  524. static int amr_wb_decode_init(AVCodecContext * avctx)
  525. {
  526. AMRWBContext *s = avctx->priv_data;
  527. s->frameCount=0;
  528. s->state = D_IF_init();
  529. amr_decode_fix_avctx(avctx);
  530. if(avctx->channels > 1)
  531. {
  532. av_log(avctx, AV_LOG_ERROR, "amr_wb: multichannel decoding not supported\n");
  533. return -1;
  534. }
  535. return 0;
  536. }
  537. extern const UWord8 block_size[];
  538. static int amr_wb_decode_frame(AVCodecContext * avctx,
  539. void *data, int *data_size,
  540. uint8_t * buf, int buf_size)
  541. {
  542. AMRWBContext *s = avctx->priv_data;
  543. uint8_t*amrData=buf;
  544. int mode;
  545. int packet_size;
  546. if(buf_size==0) {
  547. /* nothing to do */
  548. return 0;
  549. }
  550. mode = (amrData[0] >> 3) & 0x000F;
  551. packet_size = block_size[mode];
  552. if(packet_size > buf_size) {
  553. av_log(avctx, AV_LOG_ERROR, "amr frame too short (%u, should be %u)\n", buf_size, packet_size+1);
  554. return -1;
  555. }
  556. s->frameCount++;
  557. D_IF_decode( s->state, amrData, data, _good_frame);
  558. *data_size=320*2;
  559. return packet_size;
  560. }
  561. static int amr_wb_decode_close(AVCodecContext * avctx)
  562. {
  563. AMRWBContext *s = avctx->priv_data;
  564. D_IF_exit(s->state);
  565. return 0;
  566. }
  567. AVCodec amr_wb_decoder =
  568. {
  569. "amr_wb",
  570. CODEC_TYPE_AUDIO,
  571. CODEC_ID_AMR_WB,
  572. sizeof(AMRWBContext),
  573. amr_wb_decode_init,
  574. NULL,
  575. amr_wb_decode_close,
  576. amr_wb_decode_frame,
  577. };
  578. AVCodec amr_wb_encoder =
  579. {
  580. "amr_wb",
  581. CODEC_TYPE_AUDIO,
  582. CODEC_ID_AMR_WB,
  583. sizeof(AMRWBContext),
  584. amr_wb_encode_init,
  585. amr_wb_encode_frame,
  586. amr_wb_encode_close,
  587. NULL,
  588. };
  589. #endif //CONFIG_AMR_WB