You can not select more than 25 topics Topics must start with a letter or number, can include dashes ('-') and can be up to 35 characters long.

715 lines
18KB

  1. /*
  2. * AMR Audio decoder stub
  3. * Copyright (c) 2003 the ffmpeg project
  4. *
  5. * This file is part of FFmpeg.
  6. *
  7. * FFmpeg is free software; you can redistribute it and/or
  8. * modify it under the terms of the GNU Lesser General Public
  9. * License as published by the Free Software Foundation; either
  10. * version 2.1 of the License, or (at your option) any later version.
  11. *
  12. * FFmpeg is distributed in the hope that it will be useful,
  13. * but WITHOUT ANY WARRANTY; without even the implied warranty of
  14. * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
  15. * Lesser General Public License for more details.
  16. *
  17. * You should have received a copy of the GNU Lesser General Public
  18. * License along with FFmpeg; if not, write to the Free Software
  19. * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
  20. */
  21. /** @file
  22. * Adaptive Multi-Rate (AMR) Audio decoder stub.
  23. *
  24. * This code implements both an AMR-NarrowBand (AMR-NB) and an AMR-WideBand
  25. * (AMR-WB) audio encoder/decoder through external reference code from
  26. * http://www.3gpp.org/. The license of the code from 3gpp is unclear so you
  27. * have to download the code separately. Two versions exists: One fixed-point
  28. * and one with floats. For some reason the float-encoder is significant faster
  29. * at least on a P4 1.5GHz (0.9s instead of 9.9s on a 30s audio clip at MR102).
  30. * Both float and fixed point are supported for AMR-NB, but only float for
  31. * AMR-WB.
  32. *
  33. * \section AMR-NB
  34. *
  35. * \subsection Float
  36. * The float version (default) can be downloaded from:
  37. * http://www.3gpp.org/ftp/Specs/archive/26_series/26.104/26104-510.zip
  38. * Extract the source into \c "ffmpeg/libavcodec/amr_float".
  39. *
  40. * \subsection Fixed-point
  41. * The fixed-point (TS26.073) can be downloaded from:
  42. * http://www.3gpp.org/ftp/Specs/archive/26_series/26.073/26073-510.zip.
  43. * Extract the source into \c "ffmpeg/libavcodec/amr".
  44. * To use the fixed version run \c "./configure" with \c "--enable-amr_nb-fixed".
  45. *
  46. * \subsection Specification
  47. * The specification for AMR-NB can be found in TS 26.071
  48. * (http://www.3gpp.org/ftp/Specs/html-info/26071.htm) and some other
  49. * info at http://www.3gpp.org/ftp/Specs/html-info/26-series.htm.
  50. *
  51. * \section AMR-WB
  52. * \subsection Float
  53. * The reference code can be downloaded from:
  54. * http://www.3gpp.org/ftp/Specs/archive/26_series/26.204/26204-510.zip
  55. * It should be extracted to \c "ffmpeg/libavcodec/amrwb_float". Enable it with
  56. * \c "--enable-amr_wb".
  57. *
  58. * \subsection Fixed-point
  59. * If someone wants to use the fixed point version it can be downloaded from:
  60. * http://www.3gpp.org/ftp/Specs/archive/26_series/26.173/26173-571.zip.
  61. *
  62. * \subsection Specification
  63. * The specification for AMR-WB can be downloaded from:
  64. * http://www.3gpp.org/ftp/Specs/archive/26_series/26.171/26171-500.zip.
  65. *
  66. */
  67. #include "avcodec.h"
  68. #ifdef CONFIG_AMR_NB_FIXED
  69. #define MMS_IO
  70. #include "amr/sp_dec.h"
  71. #include "amr/d_homing.h"
  72. #include "amr/typedef.h"
  73. #include "amr/sp_enc.h"
  74. #include "amr/sid_sync.h"
  75. #include "amr/e_homing.h"
  76. #else
  77. #include "amr_float/interf_dec.h"
  78. #include "amr_float/interf_enc.h"
  79. #endif
  80. /* Common code for fixed and float version*/
  81. typedef struct AMR_bitrates
  82. {
  83. int rate;
  84. enum Mode mode;
  85. } AMR_bitrates;
  86. /* Match desired bitrate */
  87. static int getBitrateMode(int bitrate)
  88. {
  89. /* make the correspondance between bitrate and mode */
  90. AMR_bitrates rates[]={ {4750,MR475},
  91. {5150,MR515},
  92. {5900,MR59},
  93. {6700,MR67},
  94. {7400,MR74},
  95. {7950,MR795},
  96. {10200,MR102},
  97. {12200,MR122},
  98. };
  99. int i;
  100. for(i=0;i<8;i++)
  101. {
  102. if(rates[i].rate==bitrate)
  103. {
  104. return(rates[i].mode);
  105. }
  106. }
  107. /* no bitrate matching, return an error */
  108. return -1;
  109. }
  110. static void amr_decode_fix_avctx(AVCodecContext * avctx)
  111. {
  112. const int is_amr_wb = 1 + (avctx->codec_id == CODEC_ID_AMR_WB);
  113. if(avctx->sample_rate == 0)
  114. {
  115. avctx->sample_rate = 8000 * is_amr_wb;
  116. }
  117. if(avctx->channels == 0)
  118. {
  119. avctx->channels = 1;
  120. }
  121. avctx->frame_size = 160 * is_amr_wb;
  122. }
  123. #ifdef CONFIG_AMR_NB_FIXED
  124. /* fixed point version*/
  125. /* frame size in serial bitstream file (frame type + serial stream + flags) */
  126. #define SERIAL_FRAMESIZE (1+MAX_SERIAL_SIZE+5)
  127. typedef struct AMRContext {
  128. int frameCount;
  129. Speech_Decode_FrameState *speech_decoder_state;
  130. enum RXFrameType rx_type;
  131. enum Mode mode;
  132. Word16 reset_flag;
  133. Word16 reset_flag_old;
  134. int enc_bitrate;
  135. Speech_Encode_FrameState *enstate;
  136. sid_syncState *sidstate;
  137. enum TXFrameType tx_frametype;
  138. } AMRContext;
  139. static int amr_nb_decode_init(AVCodecContext * avctx)
  140. {
  141. AMRContext *s = avctx->priv_data;
  142. s->frameCount=0;
  143. s->speech_decoder_state=NULL;
  144. s->rx_type = (enum RXFrameType)0;
  145. s->mode= (enum Mode)0;
  146. s->reset_flag=0;
  147. s->reset_flag_old=1;
  148. if(Speech_Decode_Frame_init(&s->speech_decoder_state, "Decoder"))
  149. {
  150. av_log(avctx, AV_LOG_ERROR, "Speech_Decode_Frame_init error\n");
  151. return -1;
  152. }
  153. amr_decode_fix_avctx(avctx);
  154. if(avctx->channels > 1)
  155. {
  156. av_log(avctx, AV_LOG_ERROR, "amr_nb: multichannel decoding not supported\n");
  157. return -1;
  158. }
  159. return 0;
  160. }
  161. static int amr_nb_encode_init(AVCodecContext * avctx)
  162. {
  163. AMRContext *s = avctx->priv_data;
  164. s->frameCount=0;
  165. s->speech_decoder_state=NULL;
  166. s->rx_type = (enum RXFrameType)0;
  167. s->mode= (enum Mode)0;
  168. s->reset_flag=0;
  169. s->reset_flag_old=1;
  170. if(avctx->sample_rate!=8000)
  171. {
  172. av_log(avctx, AV_LOG_ERROR, "Only 8000Hz sample rate supported\n");
  173. return -1;
  174. }
  175. if(avctx->channels!=1)
  176. {
  177. av_log(avctx, AV_LOG_ERROR, "Only mono supported\n");
  178. return -1;
  179. }
  180. avctx->frame_size=160;
  181. avctx->coded_frame= avcodec_alloc_frame();
  182. if(Speech_Encode_Frame_init(&s->enstate, 0, "encoder") || sid_sync_init (&s->sidstate))
  183. {
  184. av_log(avctx, AV_LOG_ERROR, "Speech_Encode_Frame_init error\n");
  185. return -1;
  186. }
  187. if((s->enc_bitrate=getBitrateMode(avctx->bit_rate))<0)
  188. {
  189. av_log(avctx, AV_LOG_ERROR, "bitrate not supported\n");
  190. return -1;
  191. }
  192. return 0;
  193. }
  194. static int amr_nb_encode_close(AVCodecContext * avctx)
  195. {
  196. AMRContext *s = avctx->priv_data;
  197. Speech_Encode_Frame_exit(&s->enstate);
  198. sid_sync_exit (&s->sidstate);
  199. av_freep(&avctx->coded_frame);
  200. return 0;
  201. }
  202. static int amr_nb_decode_close(AVCodecContext * avctx)
  203. {
  204. AMRContext *s = avctx->priv_data;
  205. Speech_Decode_Frame_exit(&s->speech_decoder_state);
  206. return 0;
  207. }
  208. static int amr_nb_decode_frame(AVCodecContext * avctx,
  209. void *data, int *data_size,
  210. uint8_t * buf, int buf_size)
  211. {
  212. AMRContext *s = avctx->priv_data;
  213. uint8_t*amrData=buf;
  214. int offset=0;
  215. UWord8 toc, q, ft;
  216. Word16 serial[SERIAL_FRAMESIZE]; /* coded bits */
  217. Word16 *synth;
  218. UWord8 *packed_bits;
  219. static Word16 packed_size[16] = {12, 13, 15, 17, 19, 20, 26, 31, 5, 0, 0, 0, 0, 0, 0, 0};
  220. int i;
  221. //printf("amr_decode_frame data_size=%i buf=0x%X buf_size=%d frameCount=%d!!\n",*data_size,buf,buf_size,s->frameCount);
  222. synth=data;
  223. // while(offset<buf_size)
  224. {
  225. toc=amrData[offset];
  226. /* read rest of the frame based on ToC byte */
  227. q = (toc >> 2) & 0x01;
  228. ft = (toc >> 3) & 0x0F;
  229. //printf("offset=%d, packet_size=%d amrData= 0x%X %X %X %X\n",offset,packed_size[ft],amrData[offset],amrData[offset+1],amrData[offset+2],amrData[offset+3]);
  230. offset++;
  231. packed_bits=amrData+offset;
  232. offset+=packed_size[ft];
  233. //Unsort and unpack bits
  234. s->rx_type = UnpackBits(q, ft, packed_bits, &s->mode, &serial[1]);
  235. //We have a new frame
  236. s->frameCount++;
  237. if (s->rx_type == RX_NO_DATA)
  238. {
  239. s->mode = s->speech_decoder_state->prev_mode;
  240. }
  241. else {
  242. s->speech_decoder_state->prev_mode = s->mode;
  243. }
  244. /* if homed: check if this frame is another homing frame */
  245. if (s->reset_flag_old == 1)
  246. {
  247. /* only check until end of first subframe */
  248. s->reset_flag = decoder_homing_frame_test_first(&serial[1], s->mode);
  249. }
  250. /* produce encoder homing frame if homed & input=decoder homing frame */
  251. if ((s->reset_flag != 0) && (s->reset_flag_old != 0))
  252. {
  253. for (i = 0; i < L_FRAME; i++)
  254. {
  255. synth[i] = EHF_MASK;
  256. }
  257. }
  258. else
  259. {
  260. /* decode frame */
  261. Speech_Decode_Frame(s->speech_decoder_state, s->mode, &serial[1], s->rx_type, synth);
  262. }
  263. //Each AMR-frame results in 160 16-bit samples
  264. *data_size+=160*2;
  265. synth+=160;
  266. /* if not homed: check whether current frame is a homing frame */
  267. if (s->reset_flag_old == 0)
  268. {
  269. /* check whole frame */
  270. s->reset_flag = decoder_homing_frame_test(&serial[1], s->mode);
  271. }
  272. /* reset decoder if current frame is a homing frame */
  273. if (s->reset_flag != 0)
  274. {
  275. Speech_Decode_Frame_reset(s->speech_decoder_state);
  276. }
  277. s->reset_flag_old = s->reset_flag;
  278. }
  279. return offset;
  280. }
  281. static int amr_nb_encode_frame(AVCodecContext *avctx,
  282. unsigned char *frame/*out*/, int buf_size, void *data/*in*/)
  283. {
  284. short serial_data[250] = {0};
  285. AMRContext *s = avctx->priv_data;
  286. int written;
  287. s->reset_flag = encoder_homing_frame_test(data);
  288. Speech_Encode_Frame(s->enstate, s->enc_bitrate, data, &serial_data[1], &s->mode);
  289. /* add frame type and mode */
  290. sid_sync (s->sidstate, s->mode, &s->tx_frametype);
  291. written = PackBits(s->mode, s->enc_bitrate, s->tx_frametype, &serial_data[1], frame);
  292. if (s->reset_flag != 0)
  293. {
  294. Speech_Encode_Frame_reset(s->enstate);
  295. sid_sync_reset(s->sidstate);
  296. }
  297. return written;
  298. }
  299. #elif defined(CONFIG_AMR_NB) /* Float point version*/
  300. typedef struct AMRContext {
  301. int frameCount;
  302. void * decState;
  303. int *enstate;
  304. int enc_bitrate;
  305. } AMRContext;
  306. static int amr_nb_decode_init(AVCodecContext * avctx)
  307. {
  308. AMRContext *s = avctx->priv_data;
  309. s->frameCount=0;
  310. s->decState=Decoder_Interface_init();
  311. if(!s->decState)
  312. {
  313. av_log(avctx, AV_LOG_ERROR, "Decoder_Interface_init error\r\n");
  314. return -1;
  315. }
  316. amr_decode_fix_avctx(avctx);
  317. if(avctx->channels > 1)
  318. {
  319. av_log(avctx, AV_LOG_ERROR, "amr_nb: multichannel decoding not supported\n");
  320. return -1;
  321. }
  322. return 0;
  323. }
  324. static int amr_nb_encode_init(AVCodecContext * avctx)
  325. {
  326. AMRContext *s = avctx->priv_data;
  327. s->frameCount=0;
  328. if(avctx->sample_rate!=8000)
  329. {
  330. av_log(avctx, AV_LOG_ERROR, "Only 8000Hz sample rate supported\n");
  331. return -1;
  332. }
  333. if(avctx->channels!=1)
  334. {
  335. av_log(avctx, AV_LOG_ERROR, "Only mono supported\n");
  336. return -1;
  337. }
  338. avctx->frame_size=160;
  339. avctx->coded_frame= avcodec_alloc_frame();
  340. s->enstate=Encoder_Interface_init(0);
  341. if(!s->enstate)
  342. {
  343. av_log(avctx, AV_LOG_ERROR, "Encoder_Interface_init error\n");
  344. return -1;
  345. }
  346. if((s->enc_bitrate=getBitrateMode(avctx->bit_rate))<0)
  347. {
  348. av_log(avctx, AV_LOG_ERROR, "bitrate not supported\n");
  349. return -1;
  350. }
  351. return 0;
  352. }
  353. static int amr_nb_decode_close(AVCodecContext * avctx)
  354. {
  355. AMRContext *s = avctx->priv_data;
  356. Decoder_Interface_exit(s->decState);
  357. return 0;
  358. }
  359. static int amr_nb_encode_close(AVCodecContext * avctx)
  360. {
  361. AMRContext *s = avctx->priv_data;
  362. Encoder_Interface_exit(s->enstate);
  363. av_freep(&avctx->coded_frame);
  364. return 0;
  365. }
  366. static int amr_nb_decode_frame(AVCodecContext * avctx,
  367. void *data, int *data_size,
  368. uint8_t * buf, int buf_size)
  369. {
  370. AMRContext *s = avctx->priv_data;
  371. uint8_t*amrData=buf;
  372. static short block_size[16]={ 12, 13, 15, 17, 19, 20, 26, 31, 5, 0, 0, 0, 0, 0, 0, 0 };
  373. enum Mode dec_mode;
  374. int packet_size;
  375. /* av_log(NULL,AV_LOG_DEBUG,"amr_decode_frame buf=%p buf_size=%d frameCount=%d!!\n",buf,buf_size,s->frameCount); */
  376. dec_mode = (buf[0] >> 3) & 0x000F;
  377. packet_size = block_size[dec_mode]+1;
  378. if(packet_size > buf_size) {
  379. av_log(avctx, AV_LOG_ERROR, "amr frame too short (%u, should be %u)\n", buf_size, packet_size);
  380. return -1;
  381. }
  382. s->frameCount++;
  383. /* av_log(NULL,AV_LOG_DEBUG,"packet_size=%d amrData= 0x%X %X %X %X\n",packet_size,amrData[0],amrData[1],amrData[2],amrData[3]); */
  384. /* call decoder */
  385. Decoder_Interface_Decode(s->decState, amrData, data, 0);
  386. *data_size=160*2;
  387. return packet_size;
  388. }
  389. static int amr_nb_encode_frame(AVCodecContext *avctx,
  390. unsigned char *frame/*out*/, int buf_size, void *data/*in*/)
  391. {
  392. AMRContext *s = avctx->priv_data;
  393. int written;
  394. if((s->enc_bitrate=getBitrateMode(avctx->bit_rate))<0)
  395. {
  396. av_log(avctx, AV_LOG_ERROR, "bitrate not supported\n");
  397. return -1;
  398. }
  399. written = Encoder_Interface_Encode(s->enstate,
  400. s->enc_bitrate,
  401. data,
  402. frame,
  403. 0);
  404. /* av_log(NULL,AV_LOG_DEBUG,"amr_nb_encode_frame encoded %u bytes, bitrate %u, first byte was %#02x\n",written, s->enc_bitrate, frame[0] ); */
  405. return written;
  406. }
  407. #endif
  408. #if defined(CONFIG_AMR_NB) || defined(CONFIG_AMR_NB_FIXED)
  409. AVCodec amr_nb_decoder =
  410. {
  411. "amr_nb",
  412. CODEC_TYPE_AUDIO,
  413. CODEC_ID_AMR_NB,
  414. sizeof(AMRContext),
  415. amr_nb_decode_init,
  416. NULL,
  417. amr_nb_decode_close,
  418. amr_nb_decode_frame,
  419. };
  420. AVCodec amr_nb_encoder =
  421. {
  422. "amr_nb",
  423. CODEC_TYPE_AUDIO,
  424. CODEC_ID_AMR_NB,
  425. sizeof(AMRContext),
  426. amr_nb_encode_init,
  427. amr_nb_encode_frame,
  428. amr_nb_encode_close,
  429. NULL,
  430. };
  431. #endif
  432. /* -----------AMR wideband ------------*/
  433. #ifdef CONFIG_AMR_WB
  434. #ifdef _TYPEDEF_H
  435. //To avoid duplicate typedefs from typdef in amr-nb
  436. #define typedef_h
  437. #endif
  438. #include "amrwb_float/enc_if.h"
  439. #include "amrwb_float/dec_if.h"
  440. /* Common code for fixed and float version*/
  441. typedef struct AMRWB_bitrates
  442. {
  443. int rate;
  444. int mode;
  445. } AMRWB_bitrates;
  446. static int getWBBitrateMode(int bitrate)
  447. {
  448. /* make the correspondance between bitrate and mode */
  449. AMRWB_bitrates rates[]={ {6600,0},
  450. {8850,1},
  451. {12650,2},
  452. {14250,3},
  453. {15850,4},
  454. {18250,5},
  455. {19850,6},
  456. {23050,7},
  457. {23850,8},
  458. };
  459. int i;
  460. for(i=0;i<9;i++)
  461. {
  462. if(rates[i].rate==bitrate)
  463. {
  464. return(rates[i].mode);
  465. }
  466. }
  467. /* no bitrate matching, return an error */
  468. return -1;
  469. }
  470. typedef struct AMRWBContext {
  471. int frameCount;
  472. void *state;
  473. int mode;
  474. Word16 allow_dtx;
  475. } AMRWBContext;
  476. static int amr_wb_encode_init(AVCodecContext * avctx)
  477. {
  478. AMRWBContext *s = avctx->priv_data;
  479. s->frameCount=0;
  480. if(avctx->sample_rate!=16000)
  481. {
  482. av_log(avctx, AV_LOG_ERROR, "Only 16000Hz sample rate supported\n");
  483. return -1;
  484. }
  485. if(avctx->channels!=1)
  486. {
  487. av_log(avctx, AV_LOG_ERROR, "Only mono supported\n");
  488. return -1;
  489. }
  490. if((s->mode=getWBBitrateMode(avctx->bit_rate))<0)
  491. {
  492. av_log(avctx, AV_LOG_ERROR, "bitrate not supported\n");
  493. return -1;
  494. }
  495. avctx->frame_size=320;
  496. avctx->coded_frame= avcodec_alloc_frame();
  497. s->state = E_IF_init();
  498. s->allow_dtx=0;
  499. return 0;
  500. }
  501. static int amr_wb_encode_close(AVCodecContext * avctx)
  502. {
  503. AMRWBContext *s = avctx->priv_data;
  504. E_IF_exit(s->state);
  505. av_freep(&avctx->coded_frame);
  506. s->frameCount++;
  507. return 0;
  508. }
  509. static int amr_wb_encode_frame(AVCodecContext *avctx,
  510. unsigned char *frame/*out*/, int buf_size, void *data/*in*/)
  511. {
  512. AMRWBContext *s = avctx->priv_data;
  513. int size;
  514. if((s->mode=getWBBitrateMode(avctx->bit_rate))<0)
  515. {
  516. av_log(avctx, AV_LOG_ERROR, "bitrate not supported\n");
  517. return -1;
  518. }
  519. size = E_IF_encode(s->state, s->mode, data, frame, s->allow_dtx);
  520. return size;
  521. }
  522. static int amr_wb_decode_init(AVCodecContext * avctx)
  523. {
  524. AMRWBContext *s = avctx->priv_data;
  525. s->frameCount=0;
  526. s->state = D_IF_init();
  527. amr_decode_fix_avctx(avctx);
  528. if(avctx->channels > 1)
  529. {
  530. av_log(avctx, AV_LOG_ERROR, "amr_wb: multichannel decoding not supported\n");
  531. return -1;
  532. }
  533. return 0;
  534. }
  535. extern const UWord8 block_size[];
  536. static int amr_wb_decode_frame(AVCodecContext * avctx,
  537. void *data, int *data_size,
  538. uint8_t * buf, int buf_size)
  539. {
  540. AMRWBContext *s = avctx->priv_data;
  541. uint8_t*amrData=buf;
  542. int mode;
  543. int packet_size;
  544. if(buf_size==0) {
  545. /* nothing to do */
  546. return 0;
  547. }
  548. mode = (amrData[0] >> 3) & 0x000F;
  549. packet_size = block_size[mode];
  550. if(packet_size > buf_size) {
  551. av_log(avctx, AV_LOG_ERROR, "amr frame too short (%u, should be %u)\n", buf_size, packet_size+1);
  552. return -1;
  553. }
  554. s->frameCount++;
  555. D_IF_decode( s->state, amrData, data, _good_frame);
  556. *data_size=320*2;
  557. return packet_size;
  558. }
  559. static int amr_wb_decode_close(AVCodecContext * avctx)
  560. {
  561. AMRWBContext *s = avctx->priv_data;
  562. D_IF_exit(s->state);
  563. return 0;
  564. }
  565. AVCodec amr_wb_decoder =
  566. {
  567. "amr_wb",
  568. CODEC_TYPE_AUDIO,
  569. CODEC_ID_AMR_WB,
  570. sizeof(AMRWBContext),
  571. amr_wb_decode_init,
  572. NULL,
  573. amr_wb_decode_close,
  574. amr_wb_decode_frame,
  575. };
  576. AVCodec amr_wb_encoder =
  577. {
  578. "amr_wb",
  579. CODEC_TYPE_AUDIO,
  580. CODEC_ID_AMR_WB,
  581. sizeof(AMRWBContext),
  582. amr_wb_encode_init,
  583. amr_wb_encode_frame,
  584. amr_wb_encode_close,
  585. NULL,
  586. };
  587. #endif //CONFIG_AMR_WB