You can not select more than 25 topics Topics must start with a letter or number, can include dashes ('-') and can be up to 35 characters long.

716 lines
18KB

  1. /*
  2. * AMR Audio decoder stub
  3. * Copyright (c) 2003 the ffmpeg project
  4. *
  5. * This file is part of FFmpeg.
  6. *
  7. * FFmpeg is free software; you can redistribute it and/or
  8. * modify it under the terms of the GNU Lesser General Public
  9. * License as published by the Free Software Foundation; either
  10. * version 2.1 of the License, or (at your option) any later version.
  11. *
  12. * FFmpeg is distributed in the hope that it will be useful,
  13. * but WITHOUT ANY WARRANTY; without even the implied warranty of
  14. * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
  15. * Lesser General Public License for more details.
  16. *
  17. * You should have received a copy of the GNU Lesser General Public
  18. * License along with FFmpeg; if not, write to the Free Software
  19. * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
  20. */
  21. /** @file
  22. * Adaptive Multi-Rate (AMR) Audio decoder stub.
  23. *
  24. * This code implements both an AMR-NarrowBand (AMR-NB) and an AMR-WideBand
  25. * (AMR-WB) audio encoder/decoder through external reference code from
  26. * http://www.3gpp.org/. The license of the code from 3gpp is unclear so you
  27. * have to download the code separately. Two versions exists: One fixed-point
  28. * and one with floats. For some reason the float-encoder is significant faster
  29. * at least on a P4 1.5GHz (0.9s instead of 9.9s on a 30s audio clip at MR102).
  30. * Both float and fixed point are supported for AMR-NB, but only float for
  31. * AMR-WB.
  32. *
  33. * \section AMR-NB
  34. *
  35. * \subsection Float
  36. * The float version (default) can be downloaded from:
  37. * http://www.3gpp.org/ftp/Specs/archive/26_series/26.104/26104-610.zip
  38. * Extract the source into \c "ffmpeg/libavcodec/amr_float".
  39. *
  40. * \subsection Fixed-point
  41. * The fixed-point (TS26.073) can be downloaded from:
  42. * http://www.3gpp.org/ftp/Specs/archive/26_series/26.073/26073-510.zip.
  43. * Extract the source into \c "ffmpeg/libavcodec/amr".
  44. * To use the fixed version run \c "./configure" with \c "--enable-amr_nb-fixed".
  45. *
  46. * \subsection Specification
  47. * The specification for AMR-NB can be found in TS 26.071
  48. * (http://www.3gpp.org/ftp/Specs/html-info/26071.htm) and some other
  49. * info at http://www.3gpp.org/ftp/Specs/html-info/26-series.htm.
  50. *
  51. * \section AMR-WB
  52. * \subsection Float
  53. * The reference code can be downloaded from:
  54. * http://www.3gpp.org/ftp/Specs/archive/26_series/26.204/26204-600.zip
  55. * It should be extracted to \c "ffmpeg/libavcodec/amrwb_float". Enable it with
  56. * \c "--enable-amr_wb".
  57. *
  58. * \subsection Fixed-point
  59. * If someone wants to use the fixed point version it can be downloaded from:
  60. * http://www.3gpp.org/ftp/Specs/archive/26_series/26.173/26173-571.zip.
  61. *
  62. * \subsection Specification
  63. * The specification for AMR-WB can be found in TS 26.171
  64. * (http://www.3gpp.org/ftp/Specs/html-info/26171.htm) and some other
  65. * info at http://www.3gpp.org/ftp/Specs/html-info/26-series.htm.
  66. *
  67. */
  68. #include "avcodec.h"
  69. #ifdef CONFIG_AMR_NB_FIXED
  70. #define MMS_IO
  71. #include "amr/sp_dec.h"
  72. #include "amr/d_homing.h"
  73. #include "amr/typedef.h"
  74. #include "amr/sp_enc.h"
  75. #include "amr/sid_sync.h"
  76. #include "amr/e_homing.h"
  77. #else
  78. #include "amr_float/interf_dec.h"
  79. #include "amr_float/interf_enc.h"
  80. #endif
  81. /* Common code for fixed and float version*/
  82. typedef struct AMR_bitrates
  83. {
  84. int rate;
  85. enum Mode mode;
  86. } AMR_bitrates;
  87. /* Match desired bitrate */
  88. static int getBitrateMode(int bitrate)
  89. {
  90. /* make the correspondance between bitrate and mode */
  91. AMR_bitrates rates[]={ {4750,MR475},
  92. {5150,MR515},
  93. {5900,MR59},
  94. {6700,MR67},
  95. {7400,MR74},
  96. {7950,MR795},
  97. {10200,MR102},
  98. {12200,MR122},
  99. };
  100. int i;
  101. for(i=0;i<8;i++)
  102. {
  103. if(rates[i].rate==bitrate)
  104. {
  105. return(rates[i].mode);
  106. }
  107. }
  108. /* no bitrate matching, return an error */
  109. return -1;
  110. }
  111. static void amr_decode_fix_avctx(AVCodecContext * avctx)
  112. {
  113. const int is_amr_wb = 1 + (avctx->codec_id == CODEC_ID_AMR_WB);
  114. if(avctx->sample_rate == 0)
  115. {
  116. avctx->sample_rate = 8000 * is_amr_wb;
  117. }
  118. if(avctx->channels == 0)
  119. {
  120. avctx->channels = 1;
  121. }
  122. avctx->frame_size = 160 * is_amr_wb;
  123. }
  124. #ifdef CONFIG_AMR_NB_FIXED
  125. /* fixed point version*/
  126. /* frame size in serial bitstream file (frame type + serial stream + flags) */
  127. #define SERIAL_FRAMESIZE (1+MAX_SERIAL_SIZE+5)
  128. typedef struct AMRContext {
  129. int frameCount;
  130. Speech_Decode_FrameState *speech_decoder_state;
  131. enum RXFrameType rx_type;
  132. enum Mode mode;
  133. Word16 reset_flag;
  134. Word16 reset_flag_old;
  135. int enc_bitrate;
  136. Speech_Encode_FrameState *enstate;
  137. sid_syncState *sidstate;
  138. enum TXFrameType tx_frametype;
  139. } AMRContext;
  140. static int amr_nb_decode_init(AVCodecContext * avctx)
  141. {
  142. AMRContext *s = avctx->priv_data;
  143. s->frameCount=0;
  144. s->speech_decoder_state=NULL;
  145. s->rx_type = (enum RXFrameType)0;
  146. s->mode= (enum Mode)0;
  147. s->reset_flag=0;
  148. s->reset_flag_old=1;
  149. if(Speech_Decode_Frame_init(&s->speech_decoder_state, "Decoder"))
  150. {
  151. av_log(avctx, AV_LOG_ERROR, "Speech_Decode_Frame_init error\n");
  152. return -1;
  153. }
  154. amr_decode_fix_avctx(avctx);
  155. if(avctx->channels > 1)
  156. {
  157. av_log(avctx, AV_LOG_ERROR, "amr_nb: multichannel decoding not supported\n");
  158. return -1;
  159. }
  160. return 0;
  161. }
  162. static int amr_nb_encode_init(AVCodecContext * avctx)
  163. {
  164. AMRContext *s = avctx->priv_data;
  165. s->frameCount=0;
  166. s->speech_decoder_state=NULL;
  167. s->rx_type = (enum RXFrameType)0;
  168. s->mode= (enum Mode)0;
  169. s->reset_flag=0;
  170. s->reset_flag_old=1;
  171. if(avctx->sample_rate!=8000)
  172. {
  173. av_log(avctx, AV_LOG_ERROR, "Only 8000Hz sample rate supported\n");
  174. return -1;
  175. }
  176. if(avctx->channels!=1)
  177. {
  178. av_log(avctx, AV_LOG_ERROR, "Only mono supported\n");
  179. return -1;
  180. }
  181. avctx->frame_size=160;
  182. avctx->coded_frame= avcodec_alloc_frame();
  183. if(Speech_Encode_Frame_init(&s->enstate, 0, "encoder") || sid_sync_init (&s->sidstate))
  184. {
  185. av_log(avctx, AV_LOG_ERROR, "Speech_Encode_Frame_init error\n");
  186. return -1;
  187. }
  188. if((s->enc_bitrate=getBitrateMode(avctx->bit_rate))<0)
  189. {
  190. av_log(avctx, AV_LOG_ERROR, "bitrate not supported\n");
  191. return -1;
  192. }
  193. return 0;
  194. }
  195. static int amr_nb_encode_close(AVCodecContext * avctx)
  196. {
  197. AMRContext *s = avctx->priv_data;
  198. Speech_Encode_Frame_exit(&s->enstate);
  199. sid_sync_exit (&s->sidstate);
  200. av_freep(&avctx->coded_frame);
  201. return 0;
  202. }
  203. static int amr_nb_decode_close(AVCodecContext * avctx)
  204. {
  205. AMRContext *s = avctx->priv_data;
  206. Speech_Decode_Frame_exit(&s->speech_decoder_state);
  207. return 0;
  208. }
  209. static int amr_nb_decode_frame(AVCodecContext * avctx,
  210. void *data, int *data_size,
  211. uint8_t * buf, int buf_size)
  212. {
  213. AMRContext *s = avctx->priv_data;
  214. uint8_t*amrData=buf;
  215. int offset=0;
  216. UWord8 toc, q, ft;
  217. Word16 serial[SERIAL_FRAMESIZE]; /* coded bits */
  218. Word16 *synth;
  219. UWord8 *packed_bits;
  220. static Word16 packed_size[16] = {12, 13, 15, 17, 19, 20, 26, 31, 5, 0, 0, 0, 0, 0, 0, 0};
  221. int i;
  222. //printf("amr_decode_frame data_size=%i buf=0x%X buf_size=%d frameCount=%d!!\n",*data_size,buf,buf_size,s->frameCount);
  223. synth=data;
  224. // while(offset<buf_size)
  225. {
  226. toc=amrData[offset];
  227. /* read rest of the frame based on ToC byte */
  228. q = (toc >> 2) & 0x01;
  229. ft = (toc >> 3) & 0x0F;
  230. //printf("offset=%d, packet_size=%d amrData= 0x%X %X %X %X\n",offset,packed_size[ft],amrData[offset],amrData[offset+1],amrData[offset+2],amrData[offset+3]);
  231. offset++;
  232. packed_bits=amrData+offset;
  233. offset+=packed_size[ft];
  234. //Unsort and unpack bits
  235. s->rx_type = UnpackBits(q, ft, packed_bits, &s->mode, &serial[1]);
  236. //We have a new frame
  237. s->frameCount++;
  238. if (s->rx_type == RX_NO_DATA)
  239. {
  240. s->mode = s->speech_decoder_state->prev_mode;
  241. }
  242. else {
  243. s->speech_decoder_state->prev_mode = s->mode;
  244. }
  245. /* if homed: check if this frame is another homing frame */
  246. if (s->reset_flag_old == 1)
  247. {
  248. /* only check until end of first subframe */
  249. s->reset_flag = decoder_homing_frame_test_first(&serial[1], s->mode);
  250. }
  251. /* produce encoder homing frame if homed & input=decoder homing frame */
  252. if ((s->reset_flag != 0) && (s->reset_flag_old != 0))
  253. {
  254. for (i = 0; i < L_FRAME; i++)
  255. {
  256. synth[i] = EHF_MASK;
  257. }
  258. }
  259. else
  260. {
  261. /* decode frame */
  262. Speech_Decode_Frame(s->speech_decoder_state, s->mode, &serial[1], s->rx_type, synth);
  263. }
  264. //Each AMR-frame results in 160 16-bit samples
  265. *data_size+=160*2;
  266. synth+=160;
  267. /* if not homed: check whether current frame is a homing frame */
  268. if (s->reset_flag_old == 0)
  269. {
  270. /* check whole frame */
  271. s->reset_flag = decoder_homing_frame_test(&serial[1], s->mode);
  272. }
  273. /* reset decoder if current frame is a homing frame */
  274. if (s->reset_flag != 0)
  275. {
  276. Speech_Decode_Frame_reset(s->speech_decoder_state);
  277. }
  278. s->reset_flag_old = s->reset_flag;
  279. }
  280. return offset;
  281. }
  282. static int amr_nb_encode_frame(AVCodecContext *avctx,
  283. unsigned char *frame/*out*/, int buf_size, void *data/*in*/)
  284. {
  285. short serial_data[250] = {0};
  286. AMRContext *s = avctx->priv_data;
  287. int written;
  288. s->reset_flag = encoder_homing_frame_test(data);
  289. Speech_Encode_Frame(s->enstate, s->enc_bitrate, data, &serial_data[1], &s->mode);
  290. /* add frame type and mode */
  291. sid_sync (s->sidstate, s->mode, &s->tx_frametype);
  292. written = PackBits(s->mode, s->enc_bitrate, s->tx_frametype, &serial_data[1], frame);
  293. if (s->reset_flag != 0)
  294. {
  295. Speech_Encode_Frame_reset(s->enstate);
  296. sid_sync_reset(s->sidstate);
  297. }
  298. return written;
  299. }
  300. #elif defined(CONFIG_AMR_NB) /* Float point version*/
  301. typedef struct AMRContext {
  302. int frameCount;
  303. void * decState;
  304. int *enstate;
  305. int enc_bitrate;
  306. } AMRContext;
  307. static int amr_nb_decode_init(AVCodecContext * avctx)
  308. {
  309. AMRContext *s = avctx->priv_data;
  310. s->frameCount=0;
  311. s->decState=Decoder_Interface_init();
  312. if(!s->decState)
  313. {
  314. av_log(avctx, AV_LOG_ERROR, "Decoder_Interface_init error\r\n");
  315. return -1;
  316. }
  317. amr_decode_fix_avctx(avctx);
  318. if(avctx->channels > 1)
  319. {
  320. av_log(avctx, AV_LOG_ERROR, "amr_nb: multichannel decoding not supported\n");
  321. return -1;
  322. }
  323. return 0;
  324. }
  325. static int amr_nb_encode_init(AVCodecContext * avctx)
  326. {
  327. AMRContext *s = avctx->priv_data;
  328. s->frameCount=0;
  329. if(avctx->sample_rate!=8000)
  330. {
  331. av_log(avctx, AV_LOG_ERROR, "Only 8000Hz sample rate supported\n");
  332. return -1;
  333. }
  334. if(avctx->channels!=1)
  335. {
  336. av_log(avctx, AV_LOG_ERROR, "Only mono supported\n");
  337. return -1;
  338. }
  339. avctx->frame_size=160;
  340. avctx->coded_frame= avcodec_alloc_frame();
  341. s->enstate=Encoder_Interface_init(0);
  342. if(!s->enstate)
  343. {
  344. av_log(avctx, AV_LOG_ERROR, "Encoder_Interface_init error\n");
  345. return -1;
  346. }
  347. if((s->enc_bitrate=getBitrateMode(avctx->bit_rate))<0)
  348. {
  349. av_log(avctx, AV_LOG_ERROR, "bitrate not supported\n");
  350. return -1;
  351. }
  352. return 0;
  353. }
  354. static int amr_nb_decode_close(AVCodecContext * avctx)
  355. {
  356. AMRContext *s = avctx->priv_data;
  357. Decoder_Interface_exit(s->decState);
  358. return 0;
  359. }
  360. static int amr_nb_encode_close(AVCodecContext * avctx)
  361. {
  362. AMRContext *s = avctx->priv_data;
  363. Encoder_Interface_exit(s->enstate);
  364. av_freep(&avctx->coded_frame);
  365. return 0;
  366. }
  367. static int amr_nb_decode_frame(AVCodecContext * avctx,
  368. void *data, int *data_size,
  369. uint8_t * buf, int buf_size)
  370. {
  371. AMRContext *s = avctx->priv_data;
  372. uint8_t*amrData=buf;
  373. static short block_size[16]={ 12, 13, 15, 17, 19, 20, 26, 31, 5, 0, 0, 0, 0, 0, 0, 0 };
  374. enum Mode dec_mode;
  375. int packet_size;
  376. /* av_log(NULL,AV_LOG_DEBUG,"amr_decode_frame buf=%p buf_size=%d frameCount=%d!!\n",buf,buf_size,s->frameCount); */
  377. dec_mode = (buf[0] >> 3) & 0x000F;
  378. packet_size = block_size[dec_mode]+1;
  379. if(packet_size > buf_size) {
  380. av_log(avctx, AV_LOG_ERROR, "amr frame too short (%u, should be %u)\n", buf_size, packet_size);
  381. return -1;
  382. }
  383. s->frameCount++;
  384. /* av_log(NULL,AV_LOG_DEBUG,"packet_size=%d amrData= 0x%X %X %X %X\n",packet_size,amrData[0],amrData[1],amrData[2],amrData[3]); */
  385. /* call decoder */
  386. Decoder_Interface_Decode(s->decState, amrData, data, 0);
  387. *data_size=160*2;
  388. return packet_size;
  389. }
  390. static int amr_nb_encode_frame(AVCodecContext *avctx,
  391. unsigned char *frame/*out*/, int buf_size, void *data/*in*/)
  392. {
  393. AMRContext *s = avctx->priv_data;
  394. int written;
  395. if((s->enc_bitrate=getBitrateMode(avctx->bit_rate))<0)
  396. {
  397. av_log(avctx, AV_LOG_ERROR, "bitrate not supported\n");
  398. return -1;
  399. }
  400. written = Encoder_Interface_Encode(s->enstate,
  401. s->enc_bitrate,
  402. data,
  403. frame,
  404. 0);
  405. /* av_log(NULL,AV_LOG_DEBUG,"amr_nb_encode_frame encoded %u bytes, bitrate %u, first byte was %#02x\n",written, s->enc_bitrate, frame[0] ); */
  406. return written;
  407. }
  408. #endif
  409. #if defined(CONFIG_AMR_NB) || defined(CONFIG_AMR_NB_FIXED)
  410. AVCodec amr_nb_decoder =
  411. {
  412. "amr_nb",
  413. CODEC_TYPE_AUDIO,
  414. CODEC_ID_AMR_NB,
  415. sizeof(AMRContext),
  416. amr_nb_decode_init,
  417. NULL,
  418. amr_nb_decode_close,
  419. amr_nb_decode_frame,
  420. };
  421. AVCodec amr_nb_encoder =
  422. {
  423. "amr_nb",
  424. CODEC_TYPE_AUDIO,
  425. CODEC_ID_AMR_NB,
  426. sizeof(AMRContext),
  427. amr_nb_encode_init,
  428. amr_nb_encode_frame,
  429. amr_nb_encode_close,
  430. NULL,
  431. };
  432. #endif
  433. /* -----------AMR wideband ------------*/
  434. #ifdef CONFIG_AMR_WB
  435. #ifdef _TYPEDEF_H
  436. //To avoid duplicate typedefs from typdef in amr-nb
  437. #define typedef_h
  438. #endif
  439. #include "amrwb_float/enc_if.h"
  440. #include "amrwb_float/dec_if.h"
  441. /* Common code for fixed and float version*/
  442. typedef struct AMRWB_bitrates
  443. {
  444. int rate;
  445. int mode;
  446. } AMRWB_bitrates;
  447. static int getWBBitrateMode(int bitrate)
  448. {
  449. /* make the correspondance between bitrate and mode */
  450. AMRWB_bitrates rates[]={ {6600,0},
  451. {8850,1},
  452. {12650,2},
  453. {14250,3},
  454. {15850,4},
  455. {18250,5},
  456. {19850,6},
  457. {23050,7},
  458. {23850,8},
  459. };
  460. int i;
  461. for(i=0;i<9;i++)
  462. {
  463. if(rates[i].rate==bitrate)
  464. {
  465. return(rates[i].mode);
  466. }
  467. }
  468. /* no bitrate matching, return an error */
  469. return -1;
  470. }
  471. typedef struct AMRWBContext {
  472. int frameCount;
  473. void *state;
  474. int mode;
  475. Word16 allow_dtx;
  476. } AMRWBContext;
  477. static int amr_wb_encode_init(AVCodecContext * avctx)
  478. {
  479. AMRWBContext *s = avctx->priv_data;
  480. s->frameCount=0;
  481. if(avctx->sample_rate!=16000)
  482. {
  483. av_log(avctx, AV_LOG_ERROR, "Only 16000Hz sample rate supported\n");
  484. return -1;
  485. }
  486. if(avctx->channels!=1)
  487. {
  488. av_log(avctx, AV_LOG_ERROR, "Only mono supported\n");
  489. return -1;
  490. }
  491. if((s->mode=getWBBitrateMode(avctx->bit_rate))<0)
  492. {
  493. av_log(avctx, AV_LOG_ERROR, "bitrate not supported\n");
  494. return -1;
  495. }
  496. avctx->frame_size=320;
  497. avctx->coded_frame= avcodec_alloc_frame();
  498. s->state = E_IF_init();
  499. s->allow_dtx=0;
  500. return 0;
  501. }
  502. static int amr_wb_encode_close(AVCodecContext * avctx)
  503. {
  504. AMRWBContext *s = avctx->priv_data;
  505. E_IF_exit(s->state);
  506. av_freep(&avctx->coded_frame);
  507. s->frameCount++;
  508. return 0;
  509. }
  510. static int amr_wb_encode_frame(AVCodecContext *avctx,
  511. unsigned char *frame/*out*/, int buf_size, void *data/*in*/)
  512. {
  513. AMRWBContext *s = avctx->priv_data;
  514. int size;
  515. if((s->mode=getWBBitrateMode(avctx->bit_rate))<0)
  516. {
  517. av_log(avctx, AV_LOG_ERROR, "bitrate not supported\n");
  518. return -1;
  519. }
  520. size = E_IF_encode(s->state, s->mode, data, frame, s->allow_dtx);
  521. return size;
  522. }
  523. static int amr_wb_decode_init(AVCodecContext * avctx)
  524. {
  525. AMRWBContext *s = avctx->priv_data;
  526. s->frameCount=0;
  527. s->state = D_IF_init();
  528. amr_decode_fix_avctx(avctx);
  529. if(avctx->channels > 1)
  530. {
  531. av_log(avctx, AV_LOG_ERROR, "amr_wb: multichannel decoding not supported\n");
  532. return -1;
  533. }
  534. return 0;
  535. }
  536. extern const UWord8 block_size[];
  537. static int amr_wb_decode_frame(AVCodecContext * avctx,
  538. void *data, int *data_size,
  539. uint8_t * buf, int buf_size)
  540. {
  541. AMRWBContext *s = avctx->priv_data;
  542. uint8_t*amrData=buf;
  543. int mode;
  544. int packet_size;
  545. if(buf_size==0) {
  546. /* nothing to do */
  547. return 0;
  548. }
  549. mode = (amrData[0] >> 3) & 0x000F;
  550. packet_size = block_size[mode];
  551. if(packet_size > buf_size) {
  552. av_log(avctx, AV_LOG_ERROR, "amr frame too short (%u, should be %u)\n", buf_size, packet_size+1);
  553. return -1;
  554. }
  555. s->frameCount++;
  556. D_IF_decode( s->state, amrData, data, _good_frame);
  557. *data_size=320*2;
  558. return packet_size;
  559. }
  560. static int amr_wb_decode_close(AVCodecContext * avctx)
  561. {
  562. AMRWBContext *s = avctx->priv_data;
  563. D_IF_exit(s->state);
  564. return 0;
  565. }
  566. AVCodec amr_wb_decoder =
  567. {
  568. "amr_wb",
  569. CODEC_TYPE_AUDIO,
  570. CODEC_ID_AMR_WB,
  571. sizeof(AMRWBContext),
  572. amr_wb_decode_init,
  573. NULL,
  574. amr_wb_decode_close,
  575. amr_wb_decode_frame,
  576. };
  577. AVCodec amr_wb_encoder =
  578. {
  579. "amr_wb",
  580. CODEC_TYPE_AUDIO,
  581. CODEC_ID_AMR_WB,
  582. sizeof(AMRWBContext),
  583. amr_wb_encode_init,
  584. amr_wb_encode_frame,
  585. amr_wb_encode_close,
  586. NULL,
  587. };
  588. #endif //CONFIG_AMR_WB