You can not select more than 25 topics Topics must start with a letter or number, can include dashes ('-') and can be up to 35 characters long.

961 lines
30KB

  1. /*
  2. * Nvidia CUVID decoder
  3. * Copyright (c) 2016 Timo Rothenpieler <timo@rothenpieler.org>
  4. *
  5. * This file is part of FFmpeg.
  6. *
  7. * FFmpeg is free software; you can redistribute it and/or
  8. * modify it under the terms of the GNU Lesser General Public
  9. * License as published by the Free Software Foundation; either
  10. * version 2.1 of the License, or (at your option) any later version.
  11. *
  12. * FFmpeg is distributed in the hope that it will be useful,
  13. * but WITHOUT ANY WARRANTY; without even the implied warranty of
  14. * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
  15. * Lesser General Public License for more details.
  16. *
  17. * You should have received a copy of the GNU Lesser General Public
  18. * License along with FFmpeg; if not, write to the Free Software
  19. * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
  20. */
  21. #include "libavutil/buffer.h"
  22. #include "libavutil/mathematics.h"
  23. #include "libavutil/hwcontext.h"
  24. #include "libavutil/hwcontext_cuda.h"
  25. #include "libavutil/fifo.h"
  26. #include "libavutil/log.h"
  27. #include "libavutil/opt.h"
  28. #include "avcodec.h"
  29. #include "internal.h"
  30. #include "compat/cuda/nvcuvid.h"
  31. #define MAX_FRAME_COUNT 25
  32. typedef struct CuvidContext
  33. {
  34. AVClass *avclass;
  35. CUvideodecoder cudecoder;
  36. CUvideoparser cuparser;
  37. AVBufferRef *hwdevice;
  38. AVBufferRef *hwframe;
  39. AVBSFContext *bsf;
  40. AVFifoBuffer *frame_queue;
  41. int deint_mode;
  42. int64_t prev_pts;
  43. int internal_error;
  44. int ever_flushed;
  45. int decoder_flushing;
  46. cudaVideoCodec codec_type;
  47. cudaVideoChromaFormat chroma_format;
  48. CUVIDPARSERPARAMS cuparseinfo;
  49. CUVIDEOFORMATEX cuparse_ext;
  50. } CuvidContext;
  51. typedef struct CuvidParsedFrame
  52. {
  53. CUVIDPARSERDISPINFO dispinfo;
  54. int second_field;
  55. int is_deinterlacing;
  56. } CuvidParsedFrame;
  57. static int check_cu(AVCodecContext *avctx, CUresult err, const char *func)
  58. {
  59. const char *err_name;
  60. const char *err_string;
  61. av_log(avctx, AV_LOG_TRACE, "Calling %s\n", func);
  62. if (err == CUDA_SUCCESS)
  63. return 0;
  64. cuGetErrorName(err, &err_name);
  65. cuGetErrorString(err, &err_string);
  66. av_log(avctx, AV_LOG_ERROR, "%s failed", func);
  67. if (err_name && err_string)
  68. av_log(avctx, AV_LOG_ERROR, " -> %s: %s", err_name, err_string);
  69. av_log(avctx, AV_LOG_ERROR, "\n");
  70. return AVERROR_EXTERNAL;
  71. }
  72. #define CHECK_CU(x) check_cu(avctx, (x), #x)
  73. static int CUDAAPI cuvid_handle_video_sequence(void *opaque, CUVIDEOFORMAT* format)
  74. {
  75. AVCodecContext *avctx = opaque;
  76. CuvidContext *ctx = avctx->priv_data;
  77. AVHWFramesContext *hwframe_ctx = (AVHWFramesContext*)ctx->hwframe->data;
  78. CUVIDDECODECREATEINFO cuinfo;
  79. av_log(avctx, AV_LOG_TRACE, "pfnSequenceCallback, progressive_sequence=%d\n", format->progressive_sequence);
  80. ctx->internal_error = 0;
  81. avctx->width = format->display_area.right;
  82. avctx->height = format->display_area.bottom;
  83. ff_set_sar(avctx, av_div_q(
  84. (AVRational){ format->display_aspect_ratio.x, format->display_aspect_ratio.y },
  85. (AVRational){ avctx->width, avctx->height }));
  86. if (!format->progressive_sequence && ctx->deint_mode == cudaVideoDeinterlaceMode_Weave)
  87. avctx->flags |= AV_CODEC_FLAG_INTERLACED_DCT;
  88. else
  89. avctx->flags &= ~AV_CODEC_FLAG_INTERLACED_DCT;
  90. if (format->video_signal_description.video_full_range_flag)
  91. avctx->color_range = AVCOL_RANGE_JPEG;
  92. else
  93. avctx->color_range = AVCOL_RANGE_MPEG;
  94. avctx->color_primaries = format->video_signal_description.color_primaries;
  95. avctx->color_trc = format->video_signal_description.transfer_characteristics;
  96. avctx->colorspace = format->video_signal_description.matrix_coefficients;
  97. if (format->bitrate)
  98. avctx->bit_rate = format->bitrate;
  99. if (format->frame_rate.numerator && format->frame_rate.denominator) {
  100. avctx->framerate.num = format->frame_rate.numerator;
  101. avctx->framerate.den = format->frame_rate.denominator;
  102. }
  103. if (ctx->cudecoder
  104. && avctx->coded_width == format->coded_width
  105. && avctx->coded_height == format->coded_height
  106. && ctx->chroma_format == format->chroma_format
  107. && ctx->codec_type == format->codec)
  108. return 1;
  109. if (ctx->cudecoder) {
  110. av_log(avctx, AV_LOG_ERROR, "re-initializing decoder is not supported\n");
  111. ctx->internal_error = AVERROR(EINVAL);
  112. return 0;
  113. }
  114. if (hwframe_ctx->pool && !ctx->ever_flushed) {
  115. av_log(avctx, AV_LOG_ERROR, "AVHWFramesContext is already initialized\n");
  116. ctx->internal_error = AVERROR(EINVAL);
  117. return 0;
  118. }
  119. if (format->chroma_format != cudaVideoChromaFormat_420) {
  120. av_log(avctx, AV_LOG_ERROR, "Chroma formats other than 420 are not supported\n");
  121. ctx->internal_error = AVERROR(EINVAL);
  122. return 0;
  123. }
  124. avctx->coded_width = format->coded_width;
  125. avctx->coded_height = format->coded_height;
  126. ctx->chroma_format = format->chroma_format;
  127. memset(&cuinfo, 0, sizeof(cuinfo));
  128. cuinfo.CodecType = ctx->codec_type = format->codec;
  129. cuinfo.ChromaFormat = format->chroma_format;
  130. cuinfo.OutputFormat = cudaVideoSurfaceFormat_NV12;
  131. cuinfo.ulWidth = avctx->coded_width;
  132. cuinfo.ulHeight = avctx->coded_height;
  133. cuinfo.ulTargetWidth = cuinfo.ulWidth;
  134. cuinfo.ulTargetHeight = cuinfo.ulHeight;
  135. cuinfo.target_rect.left = 0;
  136. cuinfo.target_rect.top = 0;
  137. cuinfo.target_rect.right = cuinfo.ulWidth;
  138. cuinfo.target_rect.bottom = cuinfo.ulHeight;
  139. cuinfo.ulNumDecodeSurfaces = MAX_FRAME_COUNT;
  140. cuinfo.ulNumOutputSurfaces = 1;
  141. cuinfo.ulCreationFlags = cudaVideoCreate_PreferCUVID;
  142. cuinfo.bitDepthMinus8 = format->bit_depth_luma_minus8;
  143. if (format->progressive_sequence) {
  144. ctx->deint_mode = cuinfo.DeinterlaceMode = cudaVideoDeinterlaceMode_Weave;
  145. } else {
  146. cuinfo.DeinterlaceMode = ctx->deint_mode;
  147. }
  148. if (ctx->deint_mode != cudaVideoDeinterlaceMode_Weave)
  149. avctx->framerate = av_mul_q(avctx->framerate, (AVRational){2, 1});
  150. ctx->internal_error = CHECK_CU(cuvidCreateDecoder(&ctx->cudecoder, &cuinfo));
  151. if (ctx->internal_error < 0)
  152. return 0;
  153. if (!hwframe_ctx->pool) {
  154. hwframe_ctx->format = AV_PIX_FMT_CUDA;
  155. hwframe_ctx->sw_format = AV_PIX_FMT_NV12;
  156. hwframe_ctx->width = FFALIGN(avctx->width, 32);
  157. hwframe_ctx->height = FFALIGN(avctx->height, 32);
  158. if ((ctx->internal_error = av_hwframe_ctx_init(ctx->hwframe)) < 0) {
  159. av_log(avctx, AV_LOG_ERROR, "av_hwframe_ctx_init failed\n");
  160. return 0;
  161. }
  162. }
  163. return 1;
  164. }
  165. static int CUDAAPI cuvid_handle_picture_decode(void *opaque, CUVIDPICPARAMS* picparams)
  166. {
  167. AVCodecContext *avctx = opaque;
  168. CuvidContext *ctx = avctx->priv_data;
  169. av_log(avctx, AV_LOG_TRACE, "pfnDecodePicture\n");
  170. ctx->internal_error = CHECK_CU(cuvidDecodePicture(ctx->cudecoder, picparams));
  171. if (ctx->internal_error < 0)
  172. return 0;
  173. return 1;
  174. }
  175. static int CUDAAPI cuvid_handle_picture_display(void *opaque, CUVIDPARSERDISPINFO* dispinfo)
  176. {
  177. AVCodecContext *avctx = opaque;
  178. CuvidContext *ctx = avctx->priv_data;
  179. CuvidParsedFrame parsed_frame = { *dispinfo, 0, 0 };
  180. ctx->internal_error = 0;
  181. if (ctx->deint_mode == cudaVideoDeinterlaceMode_Weave) {
  182. av_fifo_generic_write(ctx->frame_queue, &parsed_frame, sizeof(CuvidParsedFrame), NULL);
  183. } else {
  184. parsed_frame.is_deinterlacing = 1;
  185. av_fifo_generic_write(ctx->frame_queue, &parsed_frame, sizeof(CuvidParsedFrame), NULL);
  186. parsed_frame.second_field = 1;
  187. av_fifo_generic_write(ctx->frame_queue, &parsed_frame, sizeof(CuvidParsedFrame), NULL);
  188. }
  189. return 1;
  190. }
  191. static int cuvid_decode_packet(AVCodecContext *avctx, const AVPacket *avpkt)
  192. {
  193. CuvidContext *ctx = avctx->priv_data;
  194. AVHWDeviceContext *device_ctx = (AVHWDeviceContext*)ctx->hwdevice->data;
  195. AVCUDADeviceContext *device_hwctx = device_ctx->hwctx;
  196. CUcontext dummy, cuda_ctx = device_hwctx->cuda_ctx;
  197. CUVIDSOURCEDATAPACKET cupkt;
  198. AVPacket filter_packet = { 0 };
  199. AVPacket filtered_packet = { 0 };
  200. int ret = 0, eret = 0, is_flush = ctx->decoder_flushing;
  201. av_log(avctx, AV_LOG_TRACE, "cuvid_decode_packet\n");
  202. if (is_flush && avpkt && avpkt->size)
  203. return AVERROR_EOF;
  204. if (av_fifo_size(ctx->frame_queue) / sizeof(CuvidParsedFrame) > MAX_FRAME_COUNT - 2 && avpkt && avpkt->size)
  205. return AVERROR(EAGAIN);
  206. if (ctx->bsf && avpkt && avpkt->size) {
  207. if ((ret = av_packet_ref(&filter_packet, avpkt)) < 0) {
  208. av_log(avctx, AV_LOG_ERROR, "av_packet_ref failed\n");
  209. return ret;
  210. }
  211. if ((ret = av_bsf_send_packet(ctx->bsf, &filter_packet)) < 0) {
  212. av_log(avctx, AV_LOG_ERROR, "av_bsf_send_packet failed\n");
  213. av_packet_unref(&filter_packet);
  214. return ret;
  215. }
  216. if ((ret = av_bsf_receive_packet(ctx->bsf, &filtered_packet)) < 0) {
  217. av_log(avctx, AV_LOG_ERROR, "av_bsf_receive_packet failed\n");
  218. return ret;
  219. }
  220. avpkt = &filtered_packet;
  221. }
  222. ret = CHECK_CU(cuCtxPushCurrent(cuda_ctx));
  223. if (ret < 0) {
  224. av_packet_unref(&filtered_packet);
  225. return ret;
  226. }
  227. memset(&cupkt, 0, sizeof(cupkt));
  228. if (avpkt && avpkt->size) {
  229. cupkt.payload_size = avpkt->size;
  230. cupkt.payload = avpkt->data;
  231. if (avpkt->pts != AV_NOPTS_VALUE) {
  232. cupkt.flags = CUVID_PKT_TIMESTAMP;
  233. if (avctx->pkt_timebase.num && avctx->pkt_timebase.den)
  234. cupkt.timestamp = av_rescale_q(avpkt->pts, avctx->pkt_timebase, (AVRational){1, 10000000});
  235. else
  236. cupkt.timestamp = avpkt->pts;
  237. }
  238. } else {
  239. cupkt.flags = CUVID_PKT_ENDOFSTREAM;
  240. ctx->decoder_flushing = 1;
  241. }
  242. ret = CHECK_CU(cuvidParseVideoData(ctx->cuparser, &cupkt));
  243. av_packet_unref(&filtered_packet);
  244. if (ret < 0)
  245. goto error;
  246. // cuvidParseVideoData doesn't return an error just because stuff failed...
  247. if (ctx->internal_error) {
  248. av_log(avctx, AV_LOG_ERROR, "cuvid decode callback error\n");
  249. ret = ctx->internal_error;
  250. goto error;
  251. }
  252. error:
  253. eret = CHECK_CU(cuCtxPopCurrent(&dummy));
  254. if (eret < 0)
  255. return eret;
  256. else if (ret < 0)
  257. return ret;
  258. else if (is_flush)
  259. return AVERROR_EOF;
  260. else
  261. return 0;
  262. }
  263. static int cuvid_output_frame(AVCodecContext *avctx, AVFrame *frame)
  264. {
  265. CuvidContext *ctx = avctx->priv_data;
  266. AVHWDeviceContext *device_ctx = (AVHWDeviceContext*)ctx->hwdevice->data;
  267. AVCUDADeviceContext *device_hwctx = device_ctx->hwctx;
  268. CUcontext dummy, cuda_ctx = device_hwctx->cuda_ctx;
  269. CUdeviceptr mapped_frame = 0;
  270. int ret = 0, eret = 0;
  271. av_log(avctx, AV_LOG_TRACE, "cuvid_output_frame\n");
  272. if (ctx->decoder_flushing) {
  273. ret = cuvid_decode_packet(avctx, NULL);
  274. if (ret < 0 && ret != AVERROR_EOF)
  275. return ret;
  276. }
  277. ret = CHECK_CU(cuCtxPushCurrent(cuda_ctx));
  278. if (ret < 0)
  279. return ret;
  280. if (av_fifo_size(ctx->frame_queue)) {
  281. CuvidParsedFrame parsed_frame;
  282. CUVIDPROCPARAMS params;
  283. unsigned int pitch = 0;
  284. int offset = 0;
  285. int i;
  286. av_fifo_generic_read(ctx->frame_queue, &parsed_frame, sizeof(CuvidParsedFrame), NULL);
  287. memset(&params, 0, sizeof(params));
  288. params.progressive_frame = parsed_frame.dispinfo.progressive_frame;
  289. params.second_field = parsed_frame.second_field;
  290. params.top_field_first = parsed_frame.dispinfo.top_field_first;
  291. ret = CHECK_CU(cuvidMapVideoFrame(ctx->cudecoder, parsed_frame.dispinfo.picture_index, &mapped_frame, &pitch, &params));
  292. if (ret < 0)
  293. goto error;
  294. if (avctx->pix_fmt == AV_PIX_FMT_CUDA) {
  295. ret = av_hwframe_get_buffer(ctx->hwframe, frame, 0);
  296. if (ret < 0) {
  297. av_log(avctx, AV_LOG_ERROR, "av_hwframe_get_buffer failed\n");
  298. goto error;
  299. }
  300. ret = ff_decode_frame_props(avctx, frame);
  301. if (ret < 0) {
  302. av_log(avctx, AV_LOG_ERROR, "ff_decode_frame_props failed\n");
  303. goto error;
  304. }
  305. for (i = 0; i < 2; i++) {
  306. CUDA_MEMCPY2D cpy = {
  307. .srcMemoryType = CU_MEMORYTYPE_DEVICE,
  308. .dstMemoryType = CU_MEMORYTYPE_DEVICE,
  309. .srcDevice = mapped_frame,
  310. .dstDevice = (CUdeviceptr)frame->data[i],
  311. .srcPitch = pitch,
  312. .dstPitch = frame->linesize[i],
  313. .srcY = offset,
  314. .WidthInBytes = FFMIN(pitch, frame->linesize[i]),
  315. .Height = avctx->height >> (i ? 1 : 0),
  316. };
  317. ret = CHECK_CU(cuMemcpy2D(&cpy));
  318. if (ret < 0)
  319. goto error;
  320. offset += avctx->coded_height;
  321. }
  322. } else if (avctx->pix_fmt == AV_PIX_FMT_NV12) {
  323. AVFrame *tmp_frame = av_frame_alloc();
  324. if (!tmp_frame) {
  325. av_log(avctx, AV_LOG_ERROR, "av_frame_alloc failed\n");
  326. ret = AVERROR(ENOMEM);
  327. goto error;
  328. }
  329. tmp_frame->format = AV_PIX_FMT_CUDA;
  330. tmp_frame->hw_frames_ctx = av_buffer_ref(ctx->hwframe);
  331. tmp_frame->data[0] = (uint8_t*)mapped_frame;
  332. tmp_frame->linesize[0] = pitch;
  333. tmp_frame->data[1] = (uint8_t*)(mapped_frame + avctx->coded_height * pitch);
  334. tmp_frame->linesize[1] = pitch;
  335. tmp_frame->width = avctx->width;
  336. tmp_frame->height = avctx->height;
  337. ret = ff_get_buffer(avctx, frame, 0);
  338. if (ret < 0) {
  339. av_log(avctx, AV_LOG_ERROR, "ff_get_buffer failed\n");
  340. av_frame_free(&tmp_frame);
  341. goto error;
  342. }
  343. ret = av_hwframe_transfer_data(frame, tmp_frame, 0);
  344. if (ret) {
  345. av_log(avctx, AV_LOG_ERROR, "av_hwframe_transfer_data failed\n");
  346. av_frame_free(&tmp_frame);
  347. goto error;
  348. }
  349. av_frame_free(&tmp_frame);
  350. } else {
  351. ret = AVERROR_BUG;
  352. goto error;
  353. }
  354. frame->width = avctx->width;
  355. frame->height = avctx->height;
  356. if (avctx->pkt_timebase.num && avctx->pkt_timebase.den)
  357. frame->pts = av_rescale_q(parsed_frame.dispinfo.timestamp, (AVRational){1, 10000000}, avctx->pkt_timebase);
  358. else
  359. frame->pts = parsed_frame.dispinfo.timestamp;
  360. if (parsed_frame.second_field) {
  361. if (ctx->prev_pts == INT64_MIN) {
  362. ctx->prev_pts = frame->pts;
  363. frame->pts += (avctx->pkt_timebase.den * avctx->framerate.den) / (avctx->pkt_timebase.num * avctx->framerate.num);
  364. } else {
  365. int pts_diff = (frame->pts - ctx->prev_pts) / 2;
  366. ctx->prev_pts = frame->pts;
  367. frame->pts += pts_diff;
  368. }
  369. }
  370. /* CUVIDs opaque reordering breaks the internal pkt logic.
  371. * So set pkt_pts and clear all the other pkt_ fields.
  372. */
  373. frame->pkt_pts = frame->pts;
  374. av_frame_set_pkt_pos(frame, -1);
  375. av_frame_set_pkt_duration(frame, 0);
  376. av_frame_set_pkt_size(frame, -1);
  377. frame->interlaced_frame = !parsed_frame.is_deinterlacing && !parsed_frame.dispinfo.progressive_frame;
  378. if (frame->interlaced_frame)
  379. frame->top_field_first = parsed_frame.dispinfo.top_field_first;
  380. } else if (ctx->decoder_flushing) {
  381. ret = AVERROR_EOF;
  382. } else {
  383. ret = AVERROR(EAGAIN);
  384. }
  385. error:
  386. if (mapped_frame)
  387. eret = CHECK_CU(cuvidUnmapVideoFrame(ctx->cudecoder, mapped_frame));
  388. eret = CHECK_CU(cuCtxPopCurrent(&dummy));
  389. if (eret < 0)
  390. return eret;
  391. else
  392. return ret;
  393. }
  394. static int cuvid_decode_frame(AVCodecContext *avctx, void *data, int *got_frame, AVPacket *avpkt)
  395. {
  396. CuvidContext *ctx = avctx->priv_data;
  397. AVFrame *frame = data;
  398. int ret = 0;
  399. av_log(avctx, AV_LOG_TRACE, "cuvid_decode_frame\n");
  400. if (ctx->deint_mode != cudaVideoDeinterlaceMode_Weave) {
  401. av_log(avctx, AV_LOG_ERROR, "Deinterlacing is not supported via the old API\n");
  402. return AVERROR(EINVAL);
  403. }
  404. if (!ctx->decoder_flushing) {
  405. ret = cuvid_decode_packet(avctx, avpkt);
  406. if (ret < 0)
  407. return ret;
  408. }
  409. ret = cuvid_output_frame(avctx, frame);
  410. if (ret == AVERROR(EAGAIN) || ret == AVERROR_EOF) {
  411. *got_frame = 0;
  412. } else if (ret < 0) {
  413. return ret;
  414. } else {
  415. *got_frame = 1;
  416. }
  417. return 0;
  418. }
  419. static av_cold int cuvid_decode_end(AVCodecContext *avctx)
  420. {
  421. CuvidContext *ctx = avctx->priv_data;
  422. av_fifo_freep(&ctx->frame_queue);
  423. if (ctx->bsf)
  424. av_bsf_free(&ctx->bsf);
  425. if (ctx->cuparser)
  426. cuvidDestroyVideoParser(ctx->cuparser);
  427. if (ctx->cudecoder)
  428. cuvidDestroyDecoder(ctx->cudecoder);
  429. av_buffer_unref(&ctx->hwframe);
  430. av_buffer_unref(&ctx->hwdevice);
  431. return 0;
  432. }
  433. static void cuvid_ctx_free(AVHWDeviceContext *ctx)
  434. {
  435. AVCUDADeviceContext *hwctx = ctx->hwctx;
  436. cuCtxDestroy(hwctx->cuda_ctx);
  437. }
  438. static int cuvid_test_dummy_decoder(AVCodecContext *avctx, CUVIDPARSERPARAMS *cuparseinfo)
  439. {
  440. CUVIDDECODECREATEINFO cuinfo;
  441. CUvideodecoder cudec = 0;
  442. int ret = 0;
  443. memset(&cuinfo, 0, sizeof(cuinfo));
  444. cuinfo.CodecType = cuparseinfo->CodecType;
  445. cuinfo.ChromaFormat = cudaVideoChromaFormat_420;
  446. cuinfo.OutputFormat = cudaVideoSurfaceFormat_NV12;
  447. cuinfo.ulWidth = 1280;
  448. cuinfo.ulHeight = 720;
  449. cuinfo.ulTargetWidth = cuinfo.ulWidth;
  450. cuinfo.ulTargetHeight = cuinfo.ulHeight;
  451. cuinfo.target_rect.left = 0;
  452. cuinfo.target_rect.top = 0;
  453. cuinfo.target_rect.right = cuinfo.ulWidth;
  454. cuinfo.target_rect.bottom = cuinfo.ulHeight;
  455. cuinfo.ulNumDecodeSurfaces = MAX_FRAME_COUNT;
  456. cuinfo.ulNumOutputSurfaces = 1;
  457. cuinfo.ulCreationFlags = cudaVideoCreate_PreferCUVID;
  458. cuinfo.bitDepthMinus8 = 0;
  459. cuinfo.DeinterlaceMode = cudaVideoDeinterlaceMode_Weave;
  460. ret = CHECK_CU(cuvidCreateDecoder(&cudec, &cuinfo));
  461. if (ret < 0)
  462. return ret;
  463. ret = CHECK_CU(cuvidDestroyDecoder(cudec));
  464. if (ret < 0)
  465. return ret;
  466. return 0;
  467. }
  468. static av_cold int cuvid_decode_init(AVCodecContext *avctx)
  469. {
  470. CuvidContext *ctx = avctx->priv_data;
  471. AVCUDADeviceContext *device_hwctx;
  472. AVHWDeviceContext *device_ctx;
  473. AVHWFramesContext *hwframe_ctx;
  474. CUVIDSOURCEDATAPACKET seq_pkt;
  475. CUdevice device;
  476. CUcontext cuda_ctx = NULL;
  477. CUcontext dummy;
  478. const AVBitStreamFilter *bsf;
  479. int ret = 0;
  480. enum AVPixelFormat pix_fmts[3] = { AV_PIX_FMT_CUDA,
  481. AV_PIX_FMT_NV12,
  482. AV_PIX_FMT_NONE };
  483. ret = ff_get_format(avctx, pix_fmts);
  484. if (ret < 0) {
  485. av_log(avctx, AV_LOG_ERROR, "ff_get_format failed: %d\n", ret);
  486. return ret;
  487. }
  488. ctx->frame_queue = av_fifo_alloc(MAX_FRAME_COUNT * sizeof(CuvidParsedFrame));
  489. if (!ctx->frame_queue) {
  490. ret = AVERROR(ENOMEM);
  491. goto error;
  492. }
  493. avctx->pix_fmt = ret;
  494. if (avctx->hw_frames_ctx) {
  495. ctx->hwframe = av_buffer_ref(avctx->hw_frames_ctx);
  496. if (!ctx->hwframe) {
  497. ret = AVERROR(ENOMEM);
  498. goto error;
  499. }
  500. hwframe_ctx = (AVHWFramesContext*)ctx->hwframe->data;
  501. ctx->hwdevice = av_buffer_ref(hwframe_ctx->device_ref);
  502. if (!ctx->hwdevice) {
  503. ret = AVERROR(ENOMEM);
  504. goto error;
  505. }
  506. device_ctx = hwframe_ctx->device_ctx;
  507. device_hwctx = device_ctx->hwctx;
  508. cuda_ctx = device_hwctx->cuda_ctx;
  509. } else {
  510. ctx->hwdevice = av_hwdevice_ctx_alloc(AV_HWDEVICE_TYPE_CUDA);
  511. if (!ctx->hwdevice) {
  512. av_log(avctx, AV_LOG_ERROR, "Error allocating hwdevice\n");
  513. ret = AVERROR(ENOMEM);
  514. goto error;
  515. }
  516. ret = CHECK_CU(cuInit(0));
  517. if (ret < 0)
  518. goto error;
  519. ret = CHECK_CU(cuDeviceGet(&device, 0));
  520. if (ret < 0)
  521. goto error;
  522. ret = CHECK_CU(cuCtxCreate(&cuda_ctx, CU_CTX_SCHED_BLOCKING_SYNC, device));
  523. if (ret < 0)
  524. goto error;
  525. device_ctx = (AVHWDeviceContext*)ctx->hwdevice->data;
  526. device_ctx->free = cuvid_ctx_free;
  527. device_hwctx = device_ctx->hwctx;
  528. device_hwctx->cuda_ctx = cuda_ctx;
  529. ret = CHECK_CU(cuCtxPopCurrent(&dummy));
  530. if (ret < 0)
  531. goto error;
  532. ret = av_hwdevice_ctx_init(ctx->hwdevice);
  533. if (ret < 0) {
  534. av_log(avctx, AV_LOG_ERROR, "av_hwdevice_ctx_init failed\n");
  535. goto error;
  536. }
  537. ctx->hwframe = av_hwframe_ctx_alloc(ctx->hwdevice);
  538. if (!ctx->hwframe) {
  539. av_log(avctx, AV_LOG_ERROR, "av_hwframe_ctx_alloc failed\n");
  540. ret = AVERROR(ENOMEM);
  541. goto error;
  542. }
  543. }
  544. memset(&ctx->cuparseinfo, 0, sizeof(ctx->cuparseinfo));
  545. memset(&ctx->cuparse_ext, 0, sizeof(ctx->cuparse_ext));
  546. memset(&seq_pkt, 0, sizeof(seq_pkt));
  547. ctx->cuparseinfo.pExtVideoInfo = &ctx->cuparse_ext;
  548. switch (avctx->codec->id) {
  549. #if CONFIG_H263_CUVID_DECODER
  550. case AV_CODEC_ID_H263:
  551. ctx->cuparseinfo.CodecType = cudaVideoCodec_MPEG4;
  552. break;
  553. #endif
  554. #if CONFIG_H264_CUVID_DECODER
  555. case AV_CODEC_ID_H264:
  556. ctx->cuparseinfo.CodecType = cudaVideoCodec_H264;
  557. break;
  558. #endif
  559. #if CONFIG_HEVC_CUVID_DECODER
  560. case AV_CODEC_ID_HEVC:
  561. ctx->cuparseinfo.CodecType = cudaVideoCodec_HEVC;
  562. break;
  563. #endif
  564. #if CONFIG_MJPEG_CUVID_DECODER
  565. case AV_CODEC_ID_MJPEG:
  566. ctx->cuparseinfo.CodecType = cudaVideoCodec_JPEG;
  567. break;
  568. #endif
  569. #if CONFIG_MPEG1_CUVID_DECODER
  570. case AV_CODEC_ID_MPEG1VIDEO:
  571. ctx->cuparseinfo.CodecType = cudaVideoCodec_MPEG1;
  572. break;
  573. #endif
  574. #if CONFIG_MPEG2_CUVID_DECODER
  575. case AV_CODEC_ID_MPEG2VIDEO:
  576. ctx->cuparseinfo.CodecType = cudaVideoCodec_MPEG2;
  577. break;
  578. #endif
  579. #if CONFIG_MPEG4_CUVID_DECODER
  580. case AV_CODEC_ID_MPEG4:
  581. ctx->cuparseinfo.CodecType = cudaVideoCodec_MPEG4;
  582. break;
  583. #endif
  584. #if CONFIG_VP8_CUVID_DECODER
  585. case AV_CODEC_ID_VP8:
  586. ctx->cuparseinfo.CodecType = cudaVideoCodec_VP8;
  587. break;
  588. #endif
  589. #if CONFIG_VP9_CUVID_DECODER
  590. case AV_CODEC_ID_VP9:
  591. ctx->cuparseinfo.CodecType = cudaVideoCodec_VP9;
  592. break;
  593. #endif
  594. #if CONFIG_VC1_CUVID_DECODER
  595. case AV_CODEC_ID_VC1:
  596. ctx->cuparseinfo.CodecType = cudaVideoCodec_VC1;
  597. break;
  598. #endif
  599. default:
  600. av_log(avctx, AV_LOG_ERROR, "Invalid CUVID codec!\n");
  601. return AVERROR_BUG;
  602. }
  603. if (avctx->codec->id == AV_CODEC_ID_H264 || avctx->codec->id == AV_CODEC_ID_HEVC) {
  604. if (avctx->codec->id == AV_CODEC_ID_H264)
  605. bsf = av_bsf_get_by_name("h264_mp4toannexb");
  606. else
  607. bsf = av_bsf_get_by_name("hevc_mp4toannexb");
  608. if (!bsf) {
  609. ret = AVERROR_BSF_NOT_FOUND;
  610. goto error;
  611. }
  612. if (ret = av_bsf_alloc(bsf, &ctx->bsf)) {
  613. goto error;
  614. }
  615. if (((ret = avcodec_parameters_from_context(ctx->bsf->par_in, avctx)) < 0) || ((ret = av_bsf_init(ctx->bsf)) < 0)) {
  616. av_bsf_free(&ctx->bsf);
  617. goto error;
  618. }
  619. ctx->cuparse_ext.format.seqhdr_data_length = ctx->bsf->par_out->extradata_size;
  620. memcpy(ctx->cuparse_ext.raw_seqhdr_data,
  621. ctx->bsf->par_out->extradata,
  622. FFMIN(sizeof(ctx->cuparse_ext.raw_seqhdr_data), ctx->bsf->par_out->extradata_size));
  623. } else if (avctx->extradata_size > 0) {
  624. ctx->cuparse_ext.format.seqhdr_data_length = avctx->extradata_size;
  625. memcpy(ctx->cuparse_ext.raw_seqhdr_data,
  626. avctx->extradata,
  627. FFMIN(sizeof(ctx->cuparse_ext.raw_seqhdr_data), avctx->extradata_size));
  628. }
  629. ctx->cuparseinfo.ulMaxNumDecodeSurfaces = MAX_FRAME_COUNT;
  630. ctx->cuparseinfo.ulMaxDisplayDelay = 4;
  631. ctx->cuparseinfo.pUserData = avctx;
  632. ctx->cuparseinfo.pfnSequenceCallback = cuvid_handle_video_sequence;
  633. ctx->cuparseinfo.pfnDecodePicture = cuvid_handle_picture_decode;
  634. ctx->cuparseinfo.pfnDisplayPicture = cuvid_handle_picture_display;
  635. ret = CHECK_CU(cuCtxPushCurrent(cuda_ctx));
  636. if (ret < 0)
  637. goto error;
  638. ret = cuvid_test_dummy_decoder(avctx, &ctx->cuparseinfo);
  639. if (ret < 0)
  640. goto error;
  641. ret = CHECK_CU(cuvidCreateVideoParser(&ctx->cuparser, &ctx->cuparseinfo));
  642. if (ret < 0)
  643. goto error;
  644. seq_pkt.payload = ctx->cuparse_ext.raw_seqhdr_data;
  645. seq_pkt.payload_size = ctx->cuparse_ext.format.seqhdr_data_length;
  646. if (seq_pkt.payload && seq_pkt.payload_size) {
  647. ret = CHECK_CU(cuvidParseVideoData(ctx->cuparser, &seq_pkt));
  648. if (ret < 0)
  649. goto error;
  650. }
  651. ret = CHECK_CU(cuCtxPopCurrent(&dummy));
  652. if (ret < 0)
  653. goto error;
  654. ctx->ever_flushed = 0;
  655. ctx->prev_pts = INT64_MIN;
  656. if (!avctx->pkt_timebase.num || !avctx->pkt_timebase.den)
  657. av_log(avctx, AV_LOG_WARNING, "Invalid pkt_timebase, passing timestamps as-is.\n");
  658. return 0;
  659. error:
  660. cuvid_decode_end(avctx);
  661. return ret;
  662. }
  663. static void cuvid_flush(AVCodecContext *avctx)
  664. {
  665. CuvidContext *ctx = avctx->priv_data;
  666. AVHWDeviceContext *device_ctx = (AVHWDeviceContext*)ctx->hwdevice->data;
  667. AVCUDADeviceContext *device_hwctx = device_ctx->hwctx;
  668. CUcontext dummy, cuda_ctx = device_hwctx->cuda_ctx;
  669. CUVIDSOURCEDATAPACKET seq_pkt = { 0 };
  670. int ret;
  671. ctx->ever_flushed = 1;
  672. ret = CHECK_CU(cuCtxPushCurrent(cuda_ctx));
  673. if (ret < 0)
  674. goto error;
  675. av_fifo_freep(&ctx->frame_queue);
  676. ctx->frame_queue = av_fifo_alloc(MAX_FRAME_COUNT * sizeof(CuvidParsedFrame));
  677. if (!ctx->frame_queue) {
  678. av_log(avctx, AV_LOG_ERROR, "Failed to recreate frame queue on flush\n");
  679. return;
  680. }
  681. if (ctx->cudecoder) {
  682. cuvidDestroyDecoder(ctx->cudecoder);
  683. ctx->cudecoder = NULL;
  684. }
  685. if (ctx->cuparser) {
  686. cuvidDestroyVideoParser(ctx->cuparser);
  687. ctx->cuparser = NULL;
  688. }
  689. ret = CHECK_CU(cuvidCreateVideoParser(&ctx->cuparser, &ctx->cuparseinfo));
  690. if (ret < 0)
  691. goto error;
  692. seq_pkt.payload = ctx->cuparse_ext.raw_seqhdr_data;
  693. seq_pkt.payload_size = ctx->cuparse_ext.format.seqhdr_data_length;
  694. if (seq_pkt.payload && seq_pkt.payload_size) {
  695. ret = CHECK_CU(cuvidParseVideoData(ctx->cuparser, &seq_pkt));
  696. if (ret < 0)
  697. goto error;
  698. }
  699. ret = CHECK_CU(cuCtxPopCurrent(&dummy));
  700. if (ret < 0)
  701. goto error;
  702. ctx->prev_pts = INT64_MIN;
  703. ctx->decoder_flushing = 0;
  704. return;
  705. error:
  706. av_log(avctx, AV_LOG_ERROR, "CUDA reinit on flush failed\n");
  707. }
  708. #define OFFSET(x) offsetof(CuvidContext, x)
  709. #define VD AV_OPT_FLAG_VIDEO_PARAM | AV_OPT_FLAG_DECODING_PARAM
  710. static const AVOption options[] = {
  711. { "deint", "Set deinterlacing mode", OFFSET(deint_mode), AV_OPT_TYPE_INT, { .i64 = cudaVideoDeinterlaceMode_Weave }, cudaVideoDeinterlaceMode_Weave, cudaVideoDeinterlaceMode_Adaptive, VD, "deint" },
  712. { "weave", "Weave deinterlacing (do nothing)", 0, AV_OPT_TYPE_CONST, { .i64 = cudaVideoDeinterlaceMode_Weave }, 0, 0, VD, "deint" },
  713. { "bob", "Bob deinterlacing", 0, AV_OPT_TYPE_CONST, { .i64 = cudaVideoDeinterlaceMode_Bob }, 0, 0, VD, "deint" },
  714. { "adaptive", "Adaptive deinterlacing", 0, AV_OPT_TYPE_CONST, { .i64 = cudaVideoDeinterlaceMode_Adaptive }, 0, 0, VD, "deint" },
  715. { NULL }
  716. };
  717. #define DEFINE_CUVID_CODEC(x, X) \
  718. static const AVClass x##_cuvid_class = { \
  719. .class_name = #x "_cuvid", \
  720. .item_name = av_default_item_name, \
  721. .option = options, \
  722. .version = LIBAVUTIL_VERSION_INT, \
  723. }; \
  724. AVHWAccel ff_##x##_cuvid_hwaccel = { \
  725. .name = #x "_cuvid", \
  726. .type = AVMEDIA_TYPE_VIDEO, \
  727. .id = AV_CODEC_ID_##X, \
  728. .pix_fmt = AV_PIX_FMT_CUDA, \
  729. }; \
  730. AVCodec ff_##x##_cuvid_decoder = { \
  731. .name = #x "_cuvid", \
  732. .long_name = NULL_IF_CONFIG_SMALL("Nvidia CUVID " #X " decoder"), \
  733. .type = AVMEDIA_TYPE_VIDEO, \
  734. .id = AV_CODEC_ID_##X, \
  735. .priv_data_size = sizeof(CuvidContext), \
  736. .priv_class = &x##_cuvid_class, \
  737. .init = cuvid_decode_init, \
  738. .close = cuvid_decode_end, \
  739. .decode = cuvid_decode_frame, \
  740. .send_packet = cuvid_decode_packet, \
  741. .receive_frame = cuvid_output_frame, \
  742. .flush = cuvid_flush, \
  743. .capabilities = AV_CODEC_CAP_DELAY | AV_CODEC_CAP_AVOID_PROBING, \
  744. .pix_fmts = (const enum AVPixelFormat[]){ AV_PIX_FMT_CUDA, \
  745. AV_PIX_FMT_NV12, \
  746. AV_PIX_FMT_NONE }, \
  747. };
  748. #if CONFIG_HEVC_CUVID_DECODER
  749. DEFINE_CUVID_CODEC(hevc, HEVC)
  750. #endif
  751. #if CONFIG_H263_CUVID_DECODER
  752. DEFINE_CUVID_CODEC(h263, H263)
  753. #endif
  754. #if CONFIG_H264_CUVID_DECODER
  755. DEFINE_CUVID_CODEC(h264, H264)
  756. #endif
  757. #if CONFIG_MJPEG_CUVID_DECODER
  758. DEFINE_CUVID_CODEC(mjpeg, MJPEG)
  759. #endif
  760. #if CONFIG_MPEG1_CUVID_DECODER
  761. DEFINE_CUVID_CODEC(mpeg1, MPEG1VIDEO)
  762. #endif
  763. #if CONFIG_MPEG2_CUVID_DECODER
  764. DEFINE_CUVID_CODEC(mpeg2, MPEG2VIDEO)
  765. #endif
  766. #if CONFIG_MPEG4_CUVID_DECODER
  767. DEFINE_CUVID_CODEC(mpeg4, MPEG4)
  768. #endif
  769. #if CONFIG_VP8_CUVID_DECODER
  770. DEFINE_CUVID_CODEC(vp8, VP8)
  771. #endif
  772. #if CONFIG_VP9_CUVID_DECODER
  773. DEFINE_CUVID_CODEC(vp9, VP9)
  774. #endif
  775. #if CONFIG_VC1_CUVID_DECODER
  776. DEFINE_CUVID_CODEC(vc1, VC1)
  777. #endif