You can not select more than 25 topics Topics must start with a letter or number, can include dashes ('-') and can be up to 35 characters long.

698 lines
18KB

  1. /*
  2. * AMR Audio decoder stub
  3. * Copyright (c) 2003 the ffmpeg project
  4. *
  5. * This file is part of FFmpeg.
  6. *
  7. * FFmpeg is free software; you can redistribute it and/or
  8. * modify it under the terms of the GNU Lesser General Public
  9. * License as published by the Free Software Foundation; either
  10. * version 2.1 of the License, or (at your option) any later version.
  11. *
  12. * FFmpeg is distributed in the hope that it will be useful,
  13. * but WITHOUT ANY WARRANTY; without even the implied warranty of
  14. * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
  15. * Lesser General Public License for more details.
  16. *
  17. * You should have received a copy of the GNU Lesser General Public
  18. * License along with FFmpeg; if not, write to the Free Software
  19. * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
  20. */
  21. /** @file
  22. * Adaptive Multi-Rate (AMR) Audio decoder stub.
  23. *
  24. * This code implements both an AMR-NarrowBand (AMR-NB) and an AMR-WideBand
  25. * (AMR-WB) audio encoder/decoder through external reference code from
  26. * http://www.3gpp.org/. The license of the code from 3gpp is unclear so you
  27. * have to download the code separately. Two versions exists: One fixed-point
  28. * and one with floats. For some reason the float-encoder is significant faster
  29. * at least on a P4 1.5GHz (0.9s instead of 9.9s on a 30s audio clip at MR102).
  30. * Both float and fixed point are supported for AMR-NB, but only float for
  31. * AMR-WB.
  32. *
  33. * \section AMR-NB
  34. *
  35. * \subsection Float
  36. * The float version (default) can be downloaded from:
  37. * http://www.3gpp.org/ftp/Specs/archive/26_series/26.104/26104-510.zip
  38. * Extract the source into \c "ffmpeg/libavcodec/amr_float".
  39. *
  40. * \subsection Fixed-point
  41. * The fixed-point (TS26.073) can be downloaded from:
  42. * http://www.3gpp.org/ftp/Specs/archive/26_series/26.073/26073-510.zip.
  43. * Extract the source into \c "ffmpeg/libavcodec/amr".
  44. * To use the fixed version run \c "./configure" with \c "--enable-amr_nb-fixed".
  45. *
  46. * \subsection Specification
  47. * The specification for AMR-NB can be found in TS 26.071
  48. * (http://www.3gpp.org/ftp/Specs/html-info/26071.htm) and some other
  49. * info at http://www.3gpp.org/ftp/Specs/html-info/26-series.htm.
  50. *
  51. * \section AMR-WB
  52. * \subsection Float
  53. * The reference code can be downloaded from:
  54. * http://www.3gpp.org/ftp/Specs/archive/26_series/26.204/26204-510.zip
  55. * It should be extracted to \c "ffmpeg/libavcodec/amrwb_float". Enable it with
  56. * \c "--enable-amr_wb".
  57. *
  58. * \subsection Fixed-point
  59. * If someone wants to use the fixed point version it can be downloaded from:
  60. * http://www.3gpp.org/ftp/Specs/archive/26_series/26.173/26173-571.zip.
  61. *
  62. * \subsection Specification
  63. * The specification for AMR-WB can be downloaded from:
  64. * http://www.3gpp.org/ftp/Specs/archive/26_series/26.171/26171-500.zip.
  65. *
  66. */
  67. #include "avcodec.h"
  68. #ifdef CONFIG_AMR_NB_FIXED
  69. #define MMS_IO
  70. #include "amr/sp_dec.h"
  71. #include "amr/d_homing.h"
  72. #include "amr/typedef.h"
  73. #include "amr/sp_enc.h"
  74. #include "amr/sid_sync.h"
  75. #include "amr/e_homing.h"
  76. #else
  77. #include "amr_float/interf_dec.h"
  78. #include "amr_float/interf_enc.h"
  79. #endif
  80. /* Common code for fixed and float version*/
  81. typedef struct AMR_bitrates
  82. {
  83. int startrate;
  84. int stoprate;
  85. enum Mode mode;
  86. } AMR_bitrates;
  87. /* Match desired bitrate with closest one*/
  88. static enum Mode getBitrateMode(int bitrate)
  89. {
  90. /* Adjusted so that all bitrates can be used from commandline where
  91. only a multiple of 1000 can be specified*/
  92. AMR_bitrates rates[]={ {0,4999,MR475}, //4
  93. {5000,5899,MR515},//5
  94. {5900,6699,MR59},//6
  95. {6700,7000,MR67},//7
  96. {7001,7949,MR74},//8
  97. {7950,9999,MR795},//9
  98. {10000,11999,MR102},//10
  99. {12000,64000,MR122},//12
  100. };
  101. int i;
  102. for(i=0;i<8;i++)
  103. {
  104. if(rates[i].startrate<=bitrate && rates[i].stoprate>=bitrate)
  105. {
  106. return(rates[i].mode);
  107. }
  108. }
  109. /*Return highest possible*/
  110. return(MR122);
  111. }
  112. static void amr_decode_fix_avctx(AVCodecContext * avctx)
  113. {
  114. const int is_amr_wb = 1 + (avctx->codec_id == CODEC_ID_AMR_WB);
  115. if(avctx->sample_rate == 0)
  116. {
  117. avctx->sample_rate = 8000 * is_amr_wb;
  118. }
  119. if(avctx->channels == 0)
  120. {
  121. avctx->channels = 1;
  122. }
  123. avctx->frame_size = 160 * is_amr_wb;
  124. }
  125. #ifdef CONFIG_AMR_NB_FIXED
  126. /* fixed point version*/
  127. /* frame size in serial bitstream file (frame type + serial stream + flags) */
  128. #define SERIAL_FRAMESIZE (1+MAX_SERIAL_SIZE+5)
  129. typedef struct AMRContext {
  130. int frameCount;
  131. Speech_Decode_FrameState *speech_decoder_state;
  132. enum RXFrameType rx_type;
  133. enum Mode mode;
  134. Word16 reset_flag;
  135. Word16 reset_flag_old;
  136. enum Mode enc_bitrate;
  137. Speech_Encode_FrameState *enstate;
  138. sid_syncState *sidstate;
  139. enum TXFrameType tx_frametype;
  140. } AMRContext;
  141. static int amr_nb_decode_init(AVCodecContext * avctx)
  142. {
  143. AMRContext *s = avctx->priv_data;
  144. s->frameCount=0;
  145. s->speech_decoder_state=NULL;
  146. s->rx_type = (enum RXFrameType)0;
  147. s->mode= (enum Mode)0;
  148. s->reset_flag=0;
  149. s->reset_flag_old=1;
  150. if(Speech_Decode_Frame_init(&s->speech_decoder_state, "Decoder"))
  151. {
  152. av_log(avctx, AV_LOG_ERROR, "Speech_Decode_Frame_init error\n");
  153. return -1;
  154. }
  155. amr_decode_fix_avctx(avctx);
  156. if(avctx->channels > 1)
  157. {
  158. av_log(avctx, AV_LOG_ERROR, "amr_nb: multichannel decoding not supported\n");
  159. return -1;
  160. }
  161. return 0;
  162. }
  163. static int amr_nb_encode_init(AVCodecContext * avctx)
  164. {
  165. AMRContext *s = avctx->priv_data;
  166. s->frameCount=0;
  167. s->speech_decoder_state=NULL;
  168. s->rx_type = (enum RXFrameType)0;
  169. s->mode= (enum Mode)0;
  170. s->reset_flag=0;
  171. s->reset_flag_old=1;
  172. if(avctx->sample_rate!=8000)
  173. {
  174. av_log(avctx, AV_LOG_ERROR, "Only 8000Hz sample rate supported\n");
  175. return -1;
  176. }
  177. if(avctx->channels!=1)
  178. {
  179. av_log(avctx, AV_LOG_ERROR, "Only mono supported\n");
  180. return -1;
  181. }
  182. avctx->frame_size=160;
  183. avctx->coded_frame= avcodec_alloc_frame();
  184. if(Speech_Encode_Frame_init(&s->enstate, 0, "encoder") || sid_sync_init (&s->sidstate))
  185. {
  186. av_log(avctx, AV_LOG_ERROR, "Speech_Encode_Frame_init error\n");
  187. return -1;
  188. }
  189. s->enc_bitrate=getBitrateMode(avctx->bit_rate);
  190. return 0;
  191. }
  192. static int amr_nb_encode_close(AVCodecContext * avctx)
  193. {
  194. AMRContext *s = avctx->priv_data;
  195. Speech_Encode_Frame_exit(&s->enstate);
  196. sid_sync_exit (&s->sidstate);
  197. av_freep(&avctx->coded_frame);
  198. return 0;
  199. }
  200. static int amr_nb_decode_close(AVCodecContext * avctx)
  201. {
  202. AMRContext *s = avctx->priv_data;
  203. Speech_Decode_Frame_exit(&s->speech_decoder_state);
  204. return 0;
  205. }
  206. static int amr_nb_decode_frame(AVCodecContext * avctx,
  207. void *data, int *data_size,
  208. uint8_t * buf, int buf_size)
  209. {
  210. AMRContext *s = avctx->priv_data;
  211. uint8_t*amrData=buf;
  212. int offset=0;
  213. UWord8 toc, q, ft;
  214. Word16 serial[SERIAL_FRAMESIZE]; /* coded bits */
  215. Word16 *synth;
  216. UWord8 *packed_bits;
  217. static Word16 packed_size[16] = {12, 13, 15, 17, 19, 20, 26, 31, 5, 0, 0, 0, 0, 0, 0, 0};
  218. int i;
  219. //printf("amr_decode_frame data_size=%i buf=0x%X buf_size=%d frameCount=%d!!\n",*data_size,buf,buf_size,s->frameCount);
  220. synth=data;
  221. // while(offset<buf_size)
  222. {
  223. toc=amrData[offset];
  224. /* read rest of the frame based on ToC byte */
  225. q = (toc >> 2) & 0x01;
  226. ft = (toc >> 3) & 0x0F;
  227. //printf("offset=%d, packet_size=%d amrData= 0x%X %X %X %X\n",offset,packed_size[ft],amrData[offset],amrData[offset+1],amrData[offset+2],amrData[offset+3]);
  228. offset++;
  229. packed_bits=amrData+offset;
  230. offset+=packed_size[ft];
  231. //Unsort and unpack bits
  232. s->rx_type = UnpackBits(q, ft, packed_bits, &s->mode, &serial[1]);
  233. //We have a new frame
  234. s->frameCount++;
  235. if (s->rx_type == RX_NO_DATA)
  236. {
  237. s->mode = s->speech_decoder_state->prev_mode;
  238. }
  239. else {
  240. s->speech_decoder_state->prev_mode = s->mode;
  241. }
  242. /* if homed: check if this frame is another homing frame */
  243. if (s->reset_flag_old == 1)
  244. {
  245. /* only check until end of first subframe */
  246. s->reset_flag = decoder_homing_frame_test_first(&serial[1], s->mode);
  247. }
  248. /* produce encoder homing frame if homed & input=decoder homing frame */
  249. if ((s->reset_flag != 0) && (s->reset_flag_old != 0))
  250. {
  251. for (i = 0; i < L_FRAME; i++)
  252. {
  253. synth[i] = EHF_MASK;
  254. }
  255. }
  256. else
  257. {
  258. /* decode frame */
  259. Speech_Decode_Frame(s->speech_decoder_state, s->mode, &serial[1], s->rx_type, synth);
  260. }
  261. //Each AMR-frame results in 160 16-bit samples
  262. *data_size+=160*2;
  263. synth+=160;
  264. /* if not homed: check whether current frame is a homing frame */
  265. if (s->reset_flag_old == 0)
  266. {
  267. /* check whole frame */
  268. s->reset_flag = decoder_homing_frame_test(&serial[1], s->mode);
  269. }
  270. /* reset decoder if current frame is a homing frame */
  271. if (s->reset_flag != 0)
  272. {
  273. Speech_Decode_Frame_reset(s->speech_decoder_state);
  274. }
  275. s->reset_flag_old = s->reset_flag;
  276. }
  277. return offset;
  278. }
  279. static int amr_nb_encode_frame(AVCodecContext *avctx,
  280. unsigned char *frame/*out*/, int buf_size, void *data/*in*/)
  281. {
  282. short serial_data[250] = {0};
  283. AMRContext *s = avctx->priv_data;
  284. int written;
  285. s->reset_flag = encoder_homing_frame_test(data);
  286. Speech_Encode_Frame(s->enstate, s->enc_bitrate, data, &serial_data[1], &s->mode);
  287. /* add frame type and mode */
  288. sid_sync (s->sidstate, s->mode, &s->tx_frametype);
  289. written = PackBits(s->mode, s->enc_bitrate, s->tx_frametype, &serial_data[1], frame);
  290. if (s->reset_flag != 0)
  291. {
  292. Speech_Encode_Frame_reset(s->enstate);
  293. sid_sync_reset(s->sidstate);
  294. }
  295. return written;
  296. }
  297. #elif defined(CONFIG_AMR_NB) /* Float point version*/
  298. typedef struct AMRContext {
  299. int frameCount;
  300. void * decState;
  301. int *enstate;
  302. enum Mode enc_bitrate;
  303. } AMRContext;
  304. static int amr_nb_decode_init(AVCodecContext * avctx)
  305. {
  306. AMRContext *s = avctx->priv_data;
  307. s->frameCount=0;
  308. s->decState=Decoder_Interface_init();
  309. if(!s->decState)
  310. {
  311. av_log(avctx, AV_LOG_ERROR, "Decoder_Interface_init error\r\n");
  312. return -1;
  313. }
  314. amr_decode_fix_avctx(avctx);
  315. if(avctx->channels > 1)
  316. {
  317. av_log(avctx, AV_LOG_ERROR, "amr_nb: multichannel decoding not supported\n");
  318. return -1;
  319. }
  320. return 0;
  321. }
  322. static int amr_nb_encode_init(AVCodecContext * avctx)
  323. {
  324. AMRContext *s = avctx->priv_data;
  325. s->frameCount=0;
  326. if(avctx->sample_rate!=8000)
  327. {
  328. av_log(avctx, AV_LOG_ERROR, "Only 8000Hz sample rate supported\n");
  329. return -1;
  330. }
  331. if(avctx->channels!=1)
  332. {
  333. av_log(avctx, AV_LOG_ERROR, "Only mono supported\n");
  334. return -1;
  335. }
  336. avctx->frame_size=160;
  337. avctx->coded_frame= avcodec_alloc_frame();
  338. s->enstate=Encoder_Interface_init(0);
  339. if(!s->enstate)
  340. {
  341. av_log(avctx, AV_LOG_ERROR, "Encoder_Interface_init error\n");
  342. return -1;
  343. }
  344. s->enc_bitrate=getBitrateMode(avctx->bit_rate);
  345. return 0;
  346. }
  347. static int amr_nb_decode_close(AVCodecContext * avctx)
  348. {
  349. AMRContext *s = avctx->priv_data;
  350. Decoder_Interface_exit(s->decState);
  351. return 0;
  352. }
  353. static int amr_nb_encode_close(AVCodecContext * avctx)
  354. {
  355. AMRContext *s = avctx->priv_data;
  356. Encoder_Interface_exit(s->enstate);
  357. av_freep(&avctx->coded_frame);
  358. return 0;
  359. }
  360. static int amr_nb_decode_frame(AVCodecContext * avctx,
  361. void *data, int *data_size,
  362. uint8_t * buf, int buf_size)
  363. {
  364. AMRContext *s = (AMRContext*)avctx->priv_data;
  365. uint8_t*amrData=buf;
  366. static short block_size[16]={ 12, 13, 15, 17, 19, 20, 26, 31, 5, 0, 0, 0, 0, 0, 0, 0 };
  367. enum Mode dec_mode;
  368. int packet_size;
  369. /* av_log(NULL,AV_LOG_DEBUG,"amr_decode_frame buf=%p buf_size=%d frameCount=%d!!\n",buf,buf_size,s->frameCount); */
  370. dec_mode = (buf[0] >> 3) & 0x000F;
  371. packet_size = block_size[dec_mode]+1;
  372. if(packet_size > buf_size) {
  373. av_log(avctx, AV_LOG_ERROR, "amr frame too short (%u, should be %u)\n", buf_size, packet_size);
  374. return -1;
  375. }
  376. s->frameCount++;
  377. /* av_log(NULL,AV_LOG_DEBUG,"packet_size=%d amrData= 0x%X %X %X %X\n",packet_size,amrData[0],amrData[1],amrData[2],amrData[3]); */
  378. /* call decoder */
  379. Decoder_Interface_Decode(s->decState, amrData, data, 0);
  380. *data_size=160*2;
  381. return packet_size;
  382. }
  383. static int amr_nb_encode_frame(AVCodecContext *avctx,
  384. unsigned char *frame/*out*/, int buf_size, void *data/*in*/)
  385. {
  386. AMRContext *s = (AMRContext*)avctx->priv_data;
  387. int written;
  388. s->enc_bitrate=getBitrateMode(avctx->bit_rate);
  389. written = Encoder_Interface_Encode(s->enstate,
  390. s->enc_bitrate,
  391. data,
  392. frame,
  393. 0);
  394. /* av_log(NULL,AV_LOG_DEBUG,"amr_nb_encode_frame encoded %u bytes, bitrate %u, first byte was %#02x\n",written, s->enc_bitrate, frame[0] ); */
  395. return written;
  396. }
  397. #endif
  398. #if defined(CONFIG_AMR_NB) || defined(CONFIG_AMR_NB_FIXED)
  399. AVCodec amr_nb_decoder =
  400. {
  401. "amr_nb",
  402. CODEC_TYPE_AUDIO,
  403. CODEC_ID_AMR_NB,
  404. sizeof(AMRContext),
  405. amr_nb_decode_init,
  406. NULL,
  407. amr_nb_decode_close,
  408. amr_nb_decode_frame,
  409. };
  410. AVCodec amr_nb_encoder =
  411. {
  412. "amr_nb",
  413. CODEC_TYPE_AUDIO,
  414. CODEC_ID_AMR_NB,
  415. sizeof(AMRContext),
  416. amr_nb_encode_init,
  417. amr_nb_encode_frame,
  418. amr_nb_encode_close,
  419. NULL,
  420. };
  421. #endif
  422. /* -----------AMR wideband ------------*/
  423. #ifdef CONFIG_AMR_WB
  424. #ifdef _TYPEDEF_H
  425. //To avoid duplicate typedefs from typdef in amr-nb
  426. #define typedef_h
  427. #endif
  428. #include "amrwb_float/enc_if.h"
  429. #include "amrwb_float/dec_if.h"
  430. /* Common code for fixed and float version*/
  431. typedef struct AMRWB_bitrates
  432. {
  433. int startrate;
  434. int stoprate;
  435. int mode;
  436. } AMRWB_bitrates;
  437. static int getWBBitrateMode(int bitrate)
  438. {
  439. /* Adjusted so that all bitrates can be used from commandline where
  440. only a multiple of 1000 can be specified*/
  441. AMRWB_bitrates rates[]={ {0,7999,0}, //6.6kHz
  442. {8000,9999,1},//8.85
  443. {10000,13000,2},//12.65
  444. {13001,14999,3},//14.25
  445. {15000,17000,4},//15.85
  446. {17001,18000,5},//18.25
  447. {18001,22000,6},//19.85
  448. {22001,23000,7},//23.05
  449. {23001,24000,8},//23.85
  450. };
  451. int i;
  452. for(i=0;i<9;i++)
  453. {
  454. if(rates[i].startrate<=bitrate && rates[i].stoprate>=bitrate)
  455. {
  456. return(rates[i].mode);
  457. }
  458. }
  459. /*Return highest possible*/
  460. return(8);
  461. }
  462. typedef struct AMRWBContext {
  463. int frameCount;
  464. void *state;
  465. int mode;
  466. Word16 allow_dtx;
  467. } AMRWBContext;
  468. static int amr_wb_encode_init(AVCodecContext * avctx)
  469. {
  470. AMRWBContext *s = (AMRWBContext*)avctx->priv_data;
  471. s->frameCount=0;
  472. if(avctx->sample_rate!=16000)
  473. {
  474. av_log(avctx, AV_LOG_ERROR, "Only 16000Hz sample rate supported\n");
  475. return -1;
  476. }
  477. if(avctx->channels!=1)
  478. {
  479. av_log(avctx, AV_LOG_ERROR, "Only mono supported\n");
  480. return -1;
  481. }
  482. avctx->frame_size=320;
  483. avctx->coded_frame= avcodec_alloc_frame();
  484. s->state = E_IF_init();
  485. s->mode=getWBBitrateMode(avctx->bit_rate);
  486. s->allow_dtx=0;
  487. return 0;
  488. }
  489. static int amr_wb_encode_close(AVCodecContext * avctx)
  490. {
  491. AMRWBContext *s = (AMRWBContext*) avctx->priv_data;
  492. E_IF_exit(s->state);
  493. av_freep(&avctx->coded_frame);
  494. s->frameCount++;
  495. return 0;
  496. }
  497. static int amr_wb_encode_frame(AVCodecContext *avctx,
  498. unsigned char *frame/*out*/, int buf_size, void *data/*in*/)
  499. {
  500. AMRWBContext *s;
  501. int size;
  502. s = (AMRWBContext*) avctx->priv_data;
  503. s->mode=getWBBitrateMode(avctx->bit_rate);
  504. size = E_IF_encode(s->state, s->mode, data, frame, s->allow_dtx);
  505. return size;
  506. }
  507. static int amr_wb_decode_init(AVCodecContext * avctx)
  508. {
  509. AMRWBContext *s = (AMRWBContext *)avctx->priv_data;
  510. s->frameCount=0;
  511. s->state = D_IF_init();
  512. amr_decode_fix_avctx(avctx);
  513. if(avctx->channels > 1)
  514. {
  515. av_log(avctx, AV_LOG_ERROR, "amr_wb: multichannel decoding not supported\n");
  516. return -1;
  517. }
  518. return 0;
  519. }
  520. extern const UWord8 block_size[];
  521. static int amr_wb_decode_frame(AVCodecContext * avctx,
  522. void *data, int *data_size,
  523. uint8_t * buf, int buf_size)
  524. {
  525. AMRWBContext *s = (AMRWBContext*)avctx->priv_data;
  526. uint8_t*amrData=buf;
  527. int mode;
  528. int packet_size;
  529. if(buf_size==0) {
  530. /* nothing to do */
  531. return 0;
  532. }
  533. mode = (amrData[0] >> 3) & 0x000F;
  534. packet_size = block_size[mode];
  535. if(packet_size > buf_size) {
  536. av_log(avctx, AV_LOG_ERROR, "amr frame too short (%u, should be %u)\n", buf_size, packet_size+1);
  537. return -1;
  538. }
  539. s->frameCount++;
  540. D_IF_decode( s->state, amrData, data, _good_frame);
  541. *data_size=320*2;
  542. return packet_size;
  543. }
  544. static int amr_wb_decode_close(AVCodecContext * avctx)
  545. {
  546. AMRWBContext *s = (AMRWBContext *)avctx->priv_data;
  547. D_IF_exit(s->state);
  548. return 0;
  549. }
  550. AVCodec amr_wb_decoder =
  551. {
  552. "amr_wb",
  553. CODEC_TYPE_AUDIO,
  554. CODEC_ID_AMR_WB,
  555. sizeof(AMRWBContext),
  556. amr_wb_decode_init,
  557. NULL,
  558. amr_wb_decode_close,
  559. amr_wb_decode_frame,
  560. };
  561. AVCodec amr_wb_encoder =
  562. {
  563. "amr_wb",
  564. CODEC_TYPE_AUDIO,
  565. CODEC_ID_AMR_WB,
  566. sizeof(AMRWBContext),
  567. amr_wb_encode_init,
  568. amr_wb_encode_frame,
  569. amr_wb_encode_close,
  570. NULL,
  571. };
  572. #endif //CONFIG_AMR_WB