You can not select more than 25 topics Topics must start with a letter or number, can include dashes ('-') and can be up to 35 characters long.

561 lines
19KB

  1. /*
  2. * copyright (c) 2002 Mark Hills <mark@pogo.org.uk>
  3. *
  4. * This file is part of FFmpeg.
  5. *
  6. * FFmpeg is free software; you can redistribute it and/or
  7. * modify it under the terms of the GNU Lesser General Public
  8. * License as published by the Free Software Foundation; either
  9. * version 2.1 of the License, or (at your option) any later version.
  10. *
  11. * FFmpeg is distributed in the hope that it will be useful,
  12. * but WITHOUT ANY WARRANTY; without even the implied warranty of
  13. * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
  14. * Lesser General Public License for more details.
  15. *
  16. * You should have received a copy of the GNU Lesser General Public
  17. * License along with FFmpeg; if not, write to the Free Software
  18. * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
  19. */
  20. /**
  21. * @file
  22. * Vorbis encoding support via libvorbisenc.
  23. * @author Mark Hills <mark@pogo.org.uk>
  24. */
  25. #include <vorbis/vorbisenc.h>
  26. #include "libavutil/fifo.h"
  27. #include "libavutil/opt.h"
  28. #include "avcodec.h"
  29. #include "audio_frame_queue.h"
  30. #include "bytestream.h"
  31. #include "internal.h"
  32. #include "vorbis.h"
  33. #include "vorbis_parser.h"
  34. #undef NDEBUG
  35. #include <assert.h>
  36. /* Number of samples the user should send in each call.
  37. * This value is used because it is the LCD of all possible frame sizes, so
  38. * an output packet will always start at the same point as one of the input
  39. * packets.
  40. */
  41. #define OGGVORBIS_FRAME_SIZE 64
  42. #define BUFFER_SIZE (1024 * 64)
  43. typedef struct OggVorbisContext {
  44. AVClass *av_class; /**< class for AVOptions */
  45. AVFrame frame;
  46. vorbis_info vi; /**< vorbis_info used during init */
  47. vorbis_dsp_state vd; /**< DSP state used for analysis */
  48. vorbis_block vb; /**< vorbis_block used for analysis */
  49. AVFifoBuffer *pkt_fifo; /**< output packet buffer */
  50. int eof; /**< end-of-file flag */
  51. int dsp_initialized; /**< vd has been initialized */
  52. vorbis_comment vc; /**< VorbisComment info */
  53. ogg_packet op; /**< ogg packet */
  54. double iblock; /**< impulse block bias option */
  55. VorbisParseContext vp; /**< parse context to get durations */
  56. AudioFrameQueue afq; /**< frame queue for timestamps */
  57. } OggVorbisContext;
  58. static const AVOption options[] = {
  59. { "iblock", "Sets the impulse block bias", offsetof(OggVorbisContext, iblock), AV_OPT_TYPE_DOUBLE, { .dbl = 0 }, -15, 0, AV_OPT_FLAG_AUDIO_PARAM | AV_OPT_FLAG_ENCODING_PARAM },
  60. { NULL }
  61. };
  62. static const AVCodecDefault defaults[] = {
  63. { "b", "0" },
  64. { NULL },
  65. };
  66. static const AVClass class = { "libvorbis", av_default_item_name, options, LIBAVUTIL_VERSION_INT };
  67. static int vorbis_error_to_averror(int ov_err)
  68. {
  69. switch (ov_err) {
  70. case OV_EFAULT: return AVERROR_BUG;
  71. case OV_EINVAL: return AVERROR(EINVAL);
  72. case OV_EIMPL: return AVERROR(EINVAL);
  73. default: return AVERROR_UNKNOWN;
  74. }
  75. }
  76. static av_cold int oggvorbis_init_encoder(vorbis_info *vi,
  77. AVCodecContext *avctx)
  78. {
  79. OggVorbisContext *s = avctx->priv_data;
  80. double cfreq;
  81. int ret;
  82. if (avctx->flags & CODEC_FLAG_QSCALE || !avctx->bit_rate) {
  83. /* variable bitrate
  84. * NOTE: we use the oggenc range of -1 to 10 for global_quality for
  85. * user convenience, but libvorbis uses -0.1 to 1.0.
  86. */
  87. float q = avctx->global_quality / (float)FF_QP2LAMBDA;
  88. /* default to 3 if the user did not set quality or bitrate */
  89. if (!(avctx->flags & CODEC_FLAG_QSCALE))
  90. q = 3.0;
  91. if ((ret = vorbis_encode_setup_vbr(vi, avctx->channels,
  92. avctx->sample_rate,
  93. q / 10.0)))
  94. goto error;
  95. } else {
  96. int minrate = avctx->rc_min_rate > 0 ? avctx->rc_min_rate : -1;
  97. int maxrate = avctx->rc_max_rate > 0 ? avctx->rc_max_rate : -1;
  98. /* average bitrate */
  99. if ((ret = vorbis_encode_setup_managed(vi, avctx->channels,
  100. avctx->sample_rate, maxrate,
  101. avctx->bit_rate, minrate)))
  102. goto error;
  103. /* variable bitrate by estimate, disable slow rate management */
  104. if (minrate == -1 && maxrate == -1)
  105. if ((ret = vorbis_encode_ctl(vi, OV_ECTL_RATEMANAGE2_SET, NULL)))
  106. goto error; /* should not happen */
  107. }
  108. /* cutoff frequency */
  109. if (avctx->cutoff > 0) {
  110. cfreq = avctx->cutoff / 1000.0;
  111. if ((ret = vorbis_encode_ctl(vi, OV_ECTL_LOWPASS_SET, &cfreq)))
  112. goto error; /* should not happen */
  113. }
  114. /* impulse block bias */
  115. if (s->iblock) {
  116. if ((ret = vorbis_encode_ctl(vi, OV_ECTL_IBLOCK_SET, &s->iblock)))
  117. goto error;
  118. }
  119. if (avctx->channels == 3 &&
  120. avctx->channel_layout != (AV_CH_LAYOUT_STEREO|AV_CH_FRONT_CENTER) ||
  121. avctx->channels == 4 &&
  122. avctx->channel_layout != AV_CH_LAYOUT_2_2 &&
  123. avctx->channel_layout != AV_CH_LAYOUT_QUAD ||
  124. avctx->channels == 5 &&
  125. avctx->channel_layout != AV_CH_LAYOUT_5POINT0 &&
  126. avctx->channel_layout != AV_CH_LAYOUT_5POINT0_BACK ||
  127. avctx->channels == 6 &&
  128. avctx->channel_layout != AV_CH_LAYOUT_5POINT1 &&
  129. avctx->channel_layout != AV_CH_LAYOUT_5POINT1_BACK ||
  130. avctx->channels == 7 &&
  131. avctx->channel_layout != (AV_CH_LAYOUT_5POINT1|AV_CH_BACK_CENTER) ||
  132. avctx->channels == 8 &&
  133. avctx->channel_layout != AV_CH_LAYOUT_7POINT1) {
  134. if (avctx->channel_layout) {
  135. char name[32];
  136. av_get_channel_layout_string(name, sizeof(name), avctx->channels,
  137. avctx->channel_layout);
  138. av_log(avctx, AV_LOG_ERROR, "%s not supported by Vorbis: "
  139. "output stream will have incorrect "
  140. "channel layout.\n", name);
  141. } else {
  142. av_log(avctx, AV_LOG_WARNING, "No channel layout specified. The encoder "
  143. "will use Vorbis channel layout for "
  144. "%d channels.\n", avctx->channels);
  145. }
  146. }
  147. if ((ret = vorbis_encode_setup_init(vi)))
  148. goto error;
  149. return 0;
  150. error:
  151. return vorbis_error_to_averror(ret);
  152. }
  153. /* How many bytes are needed for a buffer of length 'l' */
  154. static int xiph_len(int l)
  155. {
  156. return 1 + l / 255 + l;
  157. }
  158. static av_cold int oggvorbis_encode_close(AVCodecContext *avctx)
  159. {
  160. OggVorbisContext *s = avctx->priv_data;
  161. /* notify vorbisenc this is EOF */
  162. if (s->dsp_initialized)
  163. vorbis_analysis_wrote(&s->vd, 0);
  164. vorbis_block_clear(&s->vb);
  165. vorbis_dsp_clear(&s->vd);
  166. vorbis_info_clear(&s->vi);
  167. av_fifo_free(s->pkt_fifo);
  168. ff_af_queue_close(&s->afq);
  169. #if FF_API_OLD_ENCODE_AUDIO
  170. av_freep(&avctx->coded_frame);
  171. #endif
  172. av_freep(&avctx->extradata);
  173. return 0;
  174. }
  175. static av_cold int oggvorbis_encode_init(AVCodecContext *avctx)
  176. {
  177. OggVorbisContext *s = avctx->priv_data;
  178. ogg_packet header, header_comm, header_code;
  179. uint8_t *p;
  180. unsigned int offset;
  181. int ret;
  182. vorbis_info_init(&s->vi);
  183. if ((ret = oggvorbis_init_encoder(&s->vi, avctx))) {
  184. av_log(avctx, AV_LOG_ERROR, "encoder setup failed\n");
  185. goto error;
  186. }
  187. if ((ret = vorbis_analysis_init(&s->vd, &s->vi))) {
  188. av_log(avctx, AV_LOG_ERROR, "analysis init failed\n");
  189. ret = vorbis_error_to_averror(ret);
  190. goto error;
  191. }
  192. s->dsp_initialized = 1;
  193. if ((ret = vorbis_block_init(&s->vd, &s->vb))) {
  194. av_log(avctx, AV_LOG_ERROR, "dsp init failed\n");
  195. ret = vorbis_error_to_averror(ret);
  196. goto error;
  197. }
  198. vorbis_comment_init(&s->vc);
  199. vorbis_comment_add_tag(&s->vc, "encoder", LIBAVCODEC_IDENT);
  200. if ((ret = vorbis_analysis_headerout(&s->vd, &s->vc, &header, &header_comm,
  201. &header_code))) {
  202. ret = vorbis_error_to_averror(ret);
  203. goto error;
  204. }
  205. avctx->extradata_size = 1 + xiph_len(header.bytes) +
  206. xiph_len(header_comm.bytes) +
  207. header_code.bytes;
  208. p = avctx->extradata = av_malloc(avctx->extradata_size +
  209. FF_INPUT_BUFFER_PADDING_SIZE);
  210. if (!p) {
  211. ret = AVERROR(ENOMEM);
  212. goto error;
  213. }
  214. p[0] = 2;
  215. offset = 1;
  216. offset += av_xiphlacing(&p[offset], header.bytes);
  217. offset += av_xiphlacing(&p[offset], header_comm.bytes);
  218. memcpy(&p[offset], header.packet, header.bytes);
  219. offset += header.bytes;
  220. memcpy(&p[offset], header_comm.packet, header_comm.bytes);
  221. offset += header_comm.bytes;
  222. memcpy(&p[offset], header_code.packet, header_code.bytes);
  223. offset += header_code.bytes;
  224. assert(offset == avctx->extradata_size);
  225. if ((ret = avpriv_vorbis_parse_extradata(avctx, &s->vp)) < 0) {
  226. av_log(avctx, AV_LOG_ERROR, "invalid extradata\n");
  227. return ret;
  228. }
  229. vorbis_comment_clear(&s->vc);
  230. avctx->frame_size = OGGVORBIS_FRAME_SIZE;
  231. ff_af_queue_init(avctx, &s->afq);
  232. s->pkt_fifo = av_fifo_alloc(BUFFER_SIZE);
  233. if (!s->pkt_fifo) {
  234. ret = AVERROR(ENOMEM);
  235. goto error;
  236. }
  237. #if FF_API_OLD_ENCODE_AUDIO
  238. avctx->coded_frame = avcodec_alloc_frame();
  239. if (!avctx->coded_frame) {
  240. ret = AVERROR(ENOMEM);
  241. goto error;
  242. }
  243. #endif
  244. return 0;
  245. error:
  246. oggvorbis_encode_close(avctx);
  247. return ret;
  248. }
  249. static int oggvorbis_encode_frame(AVCodecContext *avctx, AVPacket *avpkt,
  250. const AVFrame *frame, int *got_packet_ptr)
  251. {
  252. OggVorbisContext *s = avctx->priv_data;
  253. ogg_packet op;
  254. int ret, duration;
  255. /* send samples to libvorbis */
  256. if (frame) {
  257. const float *audio = (const float *)frame->data[0];
  258. const int samples = frame->nb_samples;
  259. float **buffer;
  260. int c, channels = s->vi.channels;
  261. buffer = vorbis_analysis_buffer(&s->vd, samples);
  262. for (c = 0; c < channels; c++) {
  263. int i;
  264. int co = (channels > 8) ? c :
  265. ff_vorbis_encoding_channel_layout_offsets[channels - 1][c];
  266. for (i = 0; i < samples; i++)
  267. buffer[c][i] = audio[i * channels + co];
  268. }
  269. if ((ret = vorbis_analysis_wrote(&s->vd, samples)) < 0) {
  270. av_log(avctx, AV_LOG_ERROR, "error in vorbis_analysis_wrote()\n");
  271. return vorbis_error_to_averror(ret);
  272. }
  273. if ((ret = ff_af_queue_add(&s->afq, frame) < 0))
  274. return ret;
  275. } else {
  276. if (!s->eof)
  277. if ((ret = vorbis_analysis_wrote(&s->vd, 0)) < 0) {
  278. av_log(avctx, AV_LOG_ERROR, "error in vorbis_analysis_wrote()\n");
  279. return vorbis_error_to_averror(ret);
  280. }
  281. s->eof = 1;
  282. }
  283. /* retrieve available packets from libvorbis */
  284. while ((ret = vorbis_analysis_blockout(&s->vd, &s->vb)) == 1) {
  285. if ((ret = vorbis_analysis(&s->vb, NULL)) < 0)
  286. break;
  287. if ((ret = vorbis_bitrate_addblock(&s->vb)) < 0)
  288. break;
  289. /* add any available packets to the output packet buffer */
  290. while ((ret = vorbis_bitrate_flushpacket(&s->vd, &op)) == 1) {
  291. if (av_fifo_space(s->pkt_fifo) < sizeof(ogg_packet) + op.bytes) {
  292. av_log(avctx, AV_LOG_ERROR, "packet buffer is too small\n");
  293. return AVERROR_BUG;
  294. }
  295. av_fifo_generic_write(s->pkt_fifo, &op, sizeof(ogg_packet), NULL);
  296. av_fifo_generic_write(s->pkt_fifo, op.packet, op.bytes, NULL);
  297. }
  298. if (ret < 0) {
  299. av_log(avctx, AV_LOG_ERROR, "error getting available packets\n");
  300. break;
  301. }
  302. }
  303. if (ret < 0) {
  304. av_log(avctx, AV_LOG_ERROR, "error getting available packets\n");
  305. return vorbis_error_to_averror(ret);
  306. }
  307. /* check for available packets */
  308. if (av_fifo_size(s->pkt_fifo) < sizeof(ogg_packet))
  309. return 0;
  310. av_fifo_generic_read(s->pkt_fifo, &op, sizeof(ogg_packet), NULL);
  311. if ((ret = ff_alloc_packet2(avctx, avpkt, op.bytes)))
  312. return ret;
  313. av_fifo_generic_read(s->pkt_fifo, avpkt->data, op.bytes, NULL);
  314. avpkt->pts = ff_samples_to_time_base(avctx, op.granulepos);
  315. duration = avpriv_vorbis_parse_frame(&s->vp, avpkt->data, avpkt->size);
  316. if (duration > 0) {
  317. /* we do not know encoder delay until we get the first packet from
  318. * libvorbis, so we have to update the AudioFrameQueue counts */
  319. if (!avctx->delay) {
  320. avctx->delay = duration;
  321. s->afq.remaining_delay += duration;
  322. s->afq.remaining_samples += duration;
  323. }
  324. ff_af_queue_remove(&s->afq, duration, &avpkt->pts, &avpkt->duration);
  325. }
  326. *got_packet_ptr = 1;
  327. return 0;
  328. }
  329. AVCodec ff_libvorbis_encoder = {
  330. .name = "libvorbis",
  331. .type = AVMEDIA_TYPE_AUDIO,
  332. .id = CODEC_ID_VORBIS,
  333. .priv_data_size = sizeof(OggVorbisContext),
  334. .init = oggvorbis_encode_init,
  335. .encode2 = oggvorbis_encode_frame,
  336. .close = oggvorbis_encode_close,
  337. .capabilities = CODEC_CAP_DELAY,
  338. .sample_fmts = (const enum AVSampleFormat[]) { AV_SAMPLE_FMT_FLT,
  339. AV_SAMPLE_FMT_NONE },
  340. .long_name = NULL_IF_CONFIG_SMALL("libvorbis Vorbis"),
  341. .priv_class = &class,
  342. .defaults = defaults,
  343. };
  344. static int oggvorbis_decode_init(AVCodecContext *avccontext) {
  345. OggVorbisContext *context = avccontext->priv_data ;
  346. uint8_t *p= avccontext->extradata;
  347. int i, hsizes[3];
  348. unsigned char *headers[3], *extradata = avccontext->extradata;
  349. vorbis_info_init(&context->vi) ;
  350. vorbis_comment_init(&context->vc) ;
  351. if(! avccontext->extradata_size || ! p) {
  352. av_log(avccontext, AV_LOG_ERROR, "vorbis extradata absent\n");
  353. return -1;
  354. }
  355. if(p[0] == 0 && p[1] == 30) {
  356. for(i = 0; i < 3; i++){
  357. hsizes[i] = bytestream_get_be16(&p);
  358. headers[i] = p;
  359. p += hsizes[i];
  360. }
  361. } else if(*p == 2) {
  362. unsigned int offset = 1;
  363. p++;
  364. for(i=0; i<2; i++) {
  365. hsizes[i] = 0;
  366. while((*p == 0xFF) && (offset < avccontext->extradata_size)) {
  367. hsizes[i] += 0xFF;
  368. offset++;
  369. p++;
  370. }
  371. if(offset >= avccontext->extradata_size - 1) {
  372. av_log(avccontext, AV_LOG_ERROR,
  373. "vorbis header sizes damaged\n");
  374. return -1;
  375. }
  376. hsizes[i] += *p;
  377. offset++;
  378. p++;
  379. }
  380. hsizes[2] = avccontext->extradata_size - hsizes[0]-hsizes[1]-offset;
  381. #if 0
  382. av_log(avccontext, AV_LOG_DEBUG,
  383. "vorbis header sizes: %d, %d, %d, / extradata_len is %d \n",
  384. hsizes[0], hsizes[1], hsizes[2], avccontext->extradata_size);
  385. #endif
  386. headers[0] = extradata + offset;
  387. headers[1] = extradata + offset + hsizes[0];
  388. headers[2] = extradata + offset + hsizes[0] + hsizes[1];
  389. } else {
  390. av_log(avccontext, AV_LOG_ERROR,
  391. "vorbis initial header len is wrong: %d\n", *p);
  392. return -1;
  393. }
  394. for(i=0; i<3; i++){
  395. context->op.b_o_s= i==0;
  396. context->op.bytes = hsizes[i];
  397. context->op.packet = headers[i];
  398. if(vorbis_synthesis_headerin(&context->vi, &context->vc, &context->op)<0){
  399. av_log(avccontext, AV_LOG_ERROR, "%d. vorbis header damaged\n", i+1);
  400. return -1;
  401. }
  402. }
  403. avccontext->channels = context->vi.channels;
  404. avccontext->sample_rate = context->vi.rate;
  405. avccontext->time_base= (AVRational){1, avccontext->sample_rate};
  406. vorbis_synthesis_init(&context->vd, &context->vi);
  407. vorbis_block_init(&context->vd, &context->vb);
  408. return 0 ;
  409. }
  410. static inline int conv(int samples, float **pcm, char *buf, int channels) {
  411. int i, j;
  412. ogg_int16_t *ptr, *data = (ogg_int16_t*)buf ;
  413. float *mono ;
  414. for(i = 0 ; i < channels ; i++){
  415. ptr = &data[i];
  416. mono = pcm[i] ;
  417. for(j = 0 ; j < samples ; j++) {
  418. *ptr = av_clip_int16(mono[j] * 32767.f);
  419. ptr += channels;
  420. }
  421. }
  422. return 0 ;
  423. }
  424. static int oggvorbis_decode_frame(AVCodecContext *avccontext, void *data,
  425. int *got_frame_ptr, AVPacket *avpkt)
  426. {
  427. OggVorbisContext *context = avccontext->priv_data ;
  428. float **pcm ;
  429. ogg_packet *op= &context->op;
  430. int samples, total_samples, total_bytes;
  431. int ret;
  432. int16_t *output;
  433. if(!avpkt->size){
  434. //FIXME flush
  435. return 0;
  436. }
  437. context->frame.nb_samples = 8192*4;
  438. if ((ret = avccontext->get_buffer(avccontext, &context->frame)) < 0) {
  439. av_log(avccontext, AV_LOG_ERROR, "get_buffer() failed\n");
  440. return ret;
  441. }
  442. output = (int16_t *)context->frame.data[0];
  443. op->packet = avpkt->data;
  444. op->bytes = avpkt->size;
  445. // av_log(avccontext, AV_LOG_DEBUG, "%d %d %d %"PRId64" %"PRId64" %d %d\n", op->bytes, op->b_o_s, op->e_o_s, op->granulepos, op->packetno, buf_size, context->vi.rate);
  446. /* for(i=0; i<op->bytes; i++)
  447. av_log(avccontext, AV_LOG_DEBUG, "%02X ", op->packet[i]);
  448. av_log(avccontext, AV_LOG_DEBUG, "\n");*/
  449. if(vorbis_synthesis(&context->vb, op) == 0)
  450. vorbis_synthesis_blockin(&context->vd, &context->vb) ;
  451. total_samples = 0 ;
  452. total_bytes = 0 ;
  453. while((samples = vorbis_synthesis_pcmout(&context->vd, &pcm)) > 0) {
  454. conv(samples, pcm, (char*)output + total_bytes, context->vi.channels) ;
  455. total_bytes += samples * 2 * context->vi.channels ;
  456. total_samples += samples ;
  457. vorbis_synthesis_read(&context->vd, samples) ;
  458. }
  459. context->frame.nb_samples = total_samples;
  460. *got_frame_ptr = 1;
  461. *(AVFrame *)data = context->frame;
  462. return avpkt->size;
  463. }
  464. static int oggvorbis_decode_close(AVCodecContext *avccontext) {
  465. OggVorbisContext *context = avccontext->priv_data ;
  466. vorbis_info_clear(&context->vi) ;
  467. vorbis_comment_clear(&context->vc) ;
  468. return 0 ;
  469. }
  470. AVCodec ff_libvorbis_decoder = {
  471. .name = "libvorbis",
  472. .type = AVMEDIA_TYPE_AUDIO,
  473. .id = CODEC_ID_VORBIS,
  474. .priv_data_size = sizeof(OggVorbisContext),
  475. .init = oggvorbis_decode_init,
  476. .decode = oggvorbis_decode_frame,
  477. .close = oggvorbis_decode_close,
  478. .capabilities = CODEC_CAP_DELAY,
  479. } ;