You can not select more than 25 topics Topics must start with a letter or number, can include dashes ('-') and can be up to 35 characters long.

1018 lines
33KB

  1. /*
  2. * Nvidia CUVID decoder
  3. * Copyright (c) 2016 Timo Rothenpieler <timo@rothenpieler.org>
  4. *
  5. * This file is part of FFmpeg.
  6. *
  7. * FFmpeg is free software; you can redistribute it and/or
  8. * modify it under the terms of the GNU Lesser General Public
  9. * License as published by the Free Software Foundation; either
  10. * version 2.1 of the License, or (at your option) any later version.
  11. *
  12. * FFmpeg is distributed in the hope that it will be useful,
  13. * but WITHOUT ANY WARRANTY; without even the implied warranty of
  14. * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
  15. * Lesser General Public License for more details.
  16. *
  17. * You should have received a copy of the GNU Lesser General Public
  18. * License along with FFmpeg; if not, write to the Free Software
  19. * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
  20. */
  21. #include "compat/cuda/dynlink_loader.h"
  22. #include "libavutil/buffer.h"
  23. #include "libavutil/mathematics.h"
  24. #include "libavutil/hwcontext.h"
  25. #include "libavutil/hwcontext_cuda_internal.h"
  26. #include "libavutil/fifo.h"
  27. #include "libavutil/log.h"
  28. #include "libavutil/opt.h"
  29. #include "libavutil/pixdesc.h"
  30. #include "avcodec.h"
  31. #include "internal.h"
  32. typedef struct CuvidContext
  33. {
  34. AVClass *avclass;
  35. CUvideodecoder cudecoder;
  36. CUvideoparser cuparser;
  37. char *cu_gpu;
  38. int nb_surfaces;
  39. AVBufferRef *hwdevice;
  40. AVBufferRef *hwframe;
  41. AVBSFContext *bsf;
  42. AVFifoBuffer *frame_queue;
  43. int deint_mode;
  44. int64_t prev_pts;
  45. int internal_error;
  46. int decoder_flushing;
  47. cudaVideoCodec codec_type;
  48. cudaVideoChromaFormat chroma_format;
  49. CUVIDPARSERPARAMS cuparseinfo;
  50. CUVIDEOFORMATEX cuparse_ext;
  51. CudaFunctions *cudl;
  52. CuvidFunctions *cvdl;
  53. } CuvidContext;
  54. typedef struct CuvidParsedFrame
  55. {
  56. CUVIDPARSERDISPINFO dispinfo;
  57. int second_field;
  58. int is_deinterlacing;
  59. } CuvidParsedFrame;
  60. static int check_cu(AVCodecContext *avctx, CUresult err, const char *func)
  61. {
  62. CuvidContext *ctx = avctx->priv_data;
  63. const char *err_name;
  64. const char *err_string;
  65. av_log(avctx, AV_LOG_TRACE, "Calling %s\n", func);
  66. if (err == CUDA_SUCCESS)
  67. return 0;
  68. ctx->cudl->cuGetErrorName(err, &err_name);
  69. ctx->cudl->cuGetErrorString(err, &err_string);
  70. av_log(avctx, AV_LOG_ERROR, "%s failed", func);
  71. if (err_name && err_string)
  72. av_log(avctx, AV_LOG_ERROR, " -> %s: %s", err_name, err_string);
  73. av_log(avctx, AV_LOG_ERROR, "\n");
  74. return AVERROR_EXTERNAL;
  75. }
  76. #define CHECK_CU(x) check_cu(avctx, (x), #x)
  77. static int CUDAAPI cuvid_handle_video_sequence(void *opaque, CUVIDEOFORMAT* format)
  78. {
  79. AVCodecContext *avctx = opaque;
  80. CuvidContext *ctx = avctx->priv_data;
  81. AVHWFramesContext *hwframe_ctx = (AVHWFramesContext*)ctx->hwframe->data;
  82. CUVIDDECODECREATEINFO cuinfo;
  83. int surface_fmt;
  84. enum AVPixelFormat pix_fmts[3] = { AV_PIX_FMT_CUDA,
  85. AV_PIX_FMT_NONE, // Will be updated below
  86. AV_PIX_FMT_NONE };
  87. av_log(avctx, AV_LOG_TRACE, "pfnSequenceCallback, progressive_sequence=%d\n", format->progressive_sequence);
  88. ctx->internal_error = 0;
  89. switch (format->bit_depth_luma_minus8) {
  90. case 0: // 8-bit
  91. pix_fmts[1] = AV_PIX_FMT_NV12;
  92. break;
  93. case 2: // 10-bit
  94. pix_fmts[1] = AV_PIX_FMT_P010;
  95. break;
  96. case 4: // 12-bit
  97. pix_fmts[1] = AV_PIX_FMT_P016;
  98. break;
  99. default:
  100. av_log(avctx, AV_LOG_ERROR, "unsupported bit depth: %d\n",
  101. format->bit_depth_luma_minus8 + 8);
  102. ctx->internal_error = AVERROR(EINVAL);
  103. return 0;
  104. }
  105. surface_fmt = ff_get_format(avctx, pix_fmts);
  106. if (surface_fmt < 0) {
  107. av_log(avctx, AV_LOG_ERROR, "ff_get_format failed: %d\n", surface_fmt);
  108. ctx->internal_error = AVERROR(EINVAL);
  109. return 0;
  110. }
  111. av_log(avctx, AV_LOG_VERBOSE, "Formats: Original: %s | HW: %s | SW: %s\n",
  112. av_get_pix_fmt_name(avctx->pix_fmt),
  113. av_get_pix_fmt_name(surface_fmt),
  114. av_get_pix_fmt_name(avctx->sw_pix_fmt));
  115. avctx->pix_fmt = surface_fmt;
  116. // Update our hwframe ctx, as the get_format callback might have refreshed it!
  117. if (avctx->hw_frames_ctx) {
  118. av_buffer_unref(&ctx->hwframe);
  119. ctx->hwframe = av_buffer_ref(avctx->hw_frames_ctx);
  120. if (!ctx->hwframe) {
  121. ctx->internal_error = AVERROR(ENOMEM);
  122. return 0;
  123. }
  124. hwframe_ctx = (AVHWFramesContext*)ctx->hwframe->data;
  125. }
  126. avctx->width = format->display_area.right;
  127. avctx->height = format->display_area.bottom;
  128. ff_set_sar(avctx, av_div_q(
  129. (AVRational){ format->display_aspect_ratio.x, format->display_aspect_ratio.y },
  130. (AVRational){ avctx->width, avctx->height }));
  131. if (!format->progressive_sequence && ctx->deint_mode == cudaVideoDeinterlaceMode_Weave)
  132. avctx->flags |= AV_CODEC_FLAG_INTERLACED_DCT;
  133. else
  134. avctx->flags &= ~AV_CODEC_FLAG_INTERLACED_DCT;
  135. if (format->video_signal_description.video_full_range_flag)
  136. avctx->color_range = AVCOL_RANGE_JPEG;
  137. else
  138. avctx->color_range = AVCOL_RANGE_MPEG;
  139. avctx->color_primaries = format->video_signal_description.color_primaries;
  140. avctx->color_trc = format->video_signal_description.transfer_characteristics;
  141. avctx->colorspace = format->video_signal_description.matrix_coefficients;
  142. if (format->bitrate)
  143. avctx->bit_rate = format->bitrate;
  144. if (format->frame_rate.numerator && format->frame_rate.denominator) {
  145. avctx->framerate.num = format->frame_rate.numerator;
  146. avctx->framerate.den = format->frame_rate.denominator;
  147. }
  148. if (ctx->cudecoder
  149. && avctx->coded_width == format->coded_width
  150. && avctx->coded_height == format->coded_height
  151. && ctx->chroma_format == format->chroma_format
  152. && ctx->codec_type == format->codec)
  153. return 1;
  154. if (ctx->cudecoder) {
  155. av_log(avctx, AV_LOG_TRACE, "Re-initializing decoder\n");
  156. ctx->internal_error = CHECK_CU(ctx->cvdl->cuvidDestroyDecoder(ctx->cudecoder));
  157. if (ctx->internal_error < 0)
  158. return 0;
  159. ctx->cudecoder = NULL;
  160. }
  161. if (hwframe_ctx->pool && (
  162. hwframe_ctx->width < avctx->width ||
  163. hwframe_ctx->height < avctx->height ||
  164. hwframe_ctx->format != AV_PIX_FMT_CUDA ||
  165. hwframe_ctx->sw_format != avctx->sw_pix_fmt)) {
  166. av_log(avctx, AV_LOG_ERROR, "AVHWFramesContext is already initialized with incompatible parameters\n");
  167. ctx->internal_error = AVERROR(EINVAL);
  168. return 0;
  169. }
  170. if (format->chroma_format != cudaVideoChromaFormat_420) {
  171. av_log(avctx, AV_LOG_ERROR, "Chroma formats other than 420 are not supported\n");
  172. ctx->internal_error = AVERROR(EINVAL);
  173. return 0;
  174. }
  175. avctx->coded_width = format->coded_width;
  176. avctx->coded_height = format->coded_height;
  177. ctx->chroma_format = format->chroma_format;
  178. memset(&cuinfo, 0, sizeof(cuinfo));
  179. cuinfo.CodecType = ctx->codec_type = format->codec;
  180. cuinfo.ChromaFormat = format->chroma_format;
  181. switch (avctx->sw_pix_fmt) {
  182. case AV_PIX_FMT_NV12:
  183. cuinfo.OutputFormat = cudaVideoSurfaceFormat_NV12;
  184. break;
  185. case AV_PIX_FMT_P010:
  186. case AV_PIX_FMT_P016:
  187. cuinfo.OutputFormat = cudaVideoSurfaceFormat_P016;
  188. break;
  189. default:
  190. av_log(avctx, AV_LOG_ERROR, "Output formats other than NV12, P010 or P016 are not supported\n");
  191. ctx->internal_error = AVERROR(EINVAL);
  192. return 0;
  193. }
  194. cuinfo.ulWidth = avctx->coded_width;
  195. cuinfo.ulHeight = avctx->coded_height;
  196. cuinfo.ulTargetWidth = cuinfo.ulWidth;
  197. cuinfo.ulTargetHeight = cuinfo.ulHeight;
  198. cuinfo.target_rect.left = 0;
  199. cuinfo.target_rect.top = 0;
  200. cuinfo.target_rect.right = cuinfo.ulWidth;
  201. cuinfo.target_rect.bottom = cuinfo.ulHeight;
  202. cuinfo.ulNumDecodeSurfaces = ctx->nb_surfaces;
  203. cuinfo.ulNumOutputSurfaces = 1;
  204. cuinfo.ulCreationFlags = cudaVideoCreate_PreferCUVID;
  205. cuinfo.bitDepthMinus8 = format->bit_depth_luma_minus8;
  206. if (format->progressive_sequence) {
  207. ctx->deint_mode = cuinfo.DeinterlaceMode = cudaVideoDeinterlaceMode_Weave;
  208. } else {
  209. cuinfo.DeinterlaceMode = ctx->deint_mode;
  210. }
  211. if (ctx->deint_mode != cudaVideoDeinterlaceMode_Weave)
  212. avctx->framerate = av_mul_q(avctx->framerate, (AVRational){2, 1});
  213. ctx->internal_error = CHECK_CU(ctx->cvdl->cuvidCreateDecoder(&ctx->cudecoder, &cuinfo));
  214. if (ctx->internal_error < 0)
  215. return 0;
  216. if (!hwframe_ctx->pool) {
  217. hwframe_ctx->format = AV_PIX_FMT_CUDA;
  218. hwframe_ctx->sw_format = avctx->sw_pix_fmt;
  219. hwframe_ctx->width = avctx->width;
  220. hwframe_ctx->height = avctx->height;
  221. if ((ctx->internal_error = av_hwframe_ctx_init(ctx->hwframe)) < 0) {
  222. av_log(avctx, AV_LOG_ERROR, "av_hwframe_ctx_init failed\n");
  223. return 0;
  224. }
  225. }
  226. return 1;
  227. }
  228. static int CUDAAPI cuvid_handle_picture_decode(void *opaque, CUVIDPICPARAMS* picparams)
  229. {
  230. AVCodecContext *avctx = opaque;
  231. CuvidContext *ctx = avctx->priv_data;
  232. av_log(avctx, AV_LOG_TRACE, "pfnDecodePicture\n");
  233. ctx->internal_error = CHECK_CU(ctx->cvdl->cuvidDecodePicture(ctx->cudecoder, picparams));
  234. if (ctx->internal_error < 0)
  235. return 0;
  236. return 1;
  237. }
  238. static int CUDAAPI cuvid_handle_picture_display(void *opaque, CUVIDPARSERDISPINFO* dispinfo)
  239. {
  240. AVCodecContext *avctx = opaque;
  241. CuvidContext *ctx = avctx->priv_data;
  242. CuvidParsedFrame parsed_frame = { { 0 } };
  243. parsed_frame.dispinfo = *dispinfo;
  244. ctx->internal_error = 0;
  245. if (ctx->deint_mode == cudaVideoDeinterlaceMode_Weave) {
  246. av_fifo_generic_write(ctx->frame_queue, &parsed_frame, sizeof(CuvidParsedFrame), NULL);
  247. } else {
  248. parsed_frame.is_deinterlacing = 1;
  249. av_fifo_generic_write(ctx->frame_queue, &parsed_frame, sizeof(CuvidParsedFrame), NULL);
  250. parsed_frame.second_field = 1;
  251. av_fifo_generic_write(ctx->frame_queue, &parsed_frame, sizeof(CuvidParsedFrame), NULL);
  252. }
  253. return 1;
  254. }
  255. static int cuvid_decode_packet(AVCodecContext *avctx, const AVPacket *avpkt)
  256. {
  257. CuvidContext *ctx = avctx->priv_data;
  258. AVHWDeviceContext *device_ctx = (AVHWDeviceContext*)ctx->hwdevice->data;
  259. AVCUDADeviceContext *device_hwctx = device_ctx->hwctx;
  260. CUcontext dummy, cuda_ctx = device_hwctx->cuda_ctx;
  261. CUVIDSOURCEDATAPACKET cupkt;
  262. AVPacket filter_packet = { 0 };
  263. AVPacket filtered_packet = { 0 };
  264. int ret = 0, eret = 0, is_flush = ctx->decoder_flushing;
  265. av_log(avctx, AV_LOG_TRACE, "cuvid_decode_packet\n");
  266. if (is_flush && avpkt && avpkt->size)
  267. return AVERROR_EOF;
  268. if ((av_fifo_size(ctx->frame_queue) / sizeof(CuvidParsedFrame)) + 2 > ctx->nb_surfaces && avpkt && avpkt->size)
  269. return AVERROR(EAGAIN);
  270. if (ctx->bsf && avpkt && avpkt->size) {
  271. if ((ret = av_packet_ref(&filter_packet, avpkt)) < 0) {
  272. av_log(avctx, AV_LOG_ERROR, "av_packet_ref failed\n");
  273. return ret;
  274. }
  275. if ((ret = av_bsf_send_packet(ctx->bsf, &filter_packet)) < 0) {
  276. av_log(avctx, AV_LOG_ERROR, "av_bsf_send_packet failed\n");
  277. av_packet_unref(&filter_packet);
  278. return ret;
  279. }
  280. if ((ret = av_bsf_receive_packet(ctx->bsf, &filtered_packet)) < 0) {
  281. av_log(avctx, AV_LOG_ERROR, "av_bsf_receive_packet failed\n");
  282. return ret;
  283. }
  284. avpkt = &filtered_packet;
  285. }
  286. ret = CHECK_CU(ctx->cudl->cuCtxPushCurrent(cuda_ctx));
  287. if (ret < 0) {
  288. av_packet_unref(&filtered_packet);
  289. return ret;
  290. }
  291. memset(&cupkt, 0, sizeof(cupkt));
  292. if (avpkt && avpkt->size) {
  293. cupkt.payload_size = avpkt->size;
  294. cupkt.payload = avpkt->data;
  295. if (avpkt->pts != AV_NOPTS_VALUE) {
  296. cupkt.flags = CUVID_PKT_TIMESTAMP;
  297. if (avctx->pkt_timebase.num && avctx->pkt_timebase.den)
  298. cupkt.timestamp = av_rescale_q(avpkt->pts, avctx->pkt_timebase, (AVRational){1, 10000000});
  299. else
  300. cupkt.timestamp = avpkt->pts;
  301. }
  302. } else {
  303. cupkt.flags = CUVID_PKT_ENDOFSTREAM;
  304. ctx->decoder_flushing = 1;
  305. }
  306. ret = CHECK_CU(ctx->cvdl->cuvidParseVideoData(ctx->cuparser, &cupkt));
  307. av_packet_unref(&filtered_packet);
  308. if (ret < 0)
  309. goto error;
  310. // cuvidParseVideoData doesn't return an error just because stuff failed...
  311. if (ctx->internal_error) {
  312. av_log(avctx, AV_LOG_ERROR, "cuvid decode callback error\n");
  313. ret = ctx->internal_error;
  314. goto error;
  315. }
  316. error:
  317. eret = CHECK_CU(ctx->cudl->cuCtxPopCurrent(&dummy));
  318. if (eret < 0)
  319. return eret;
  320. else if (ret < 0)
  321. return ret;
  322. else if (is_flush)
  323. return AVERROR_EOF;
  324. else
  325. return 0;
  326. }
  327. static int cuvid_output_frame(AVCodecContext *avctx, AVFrame *frame)
  328. {
  329. CuvidContext *ctx = avctx->priv_data;
  330. AVHWDeviceContext *device_ctx = (AVHWDeviceContext*)ctx->hwdevice->data;
  331. AVCUDADeviceContext *device_hwctx = device_ctx->hwctx;
  332. CUcontext dummy, cuda_ctx = device_hwctx->cuda_ctx;
  333. CUdeviceptr mapped_frame = 0;
  334. int ret = 0, eret = 0;
  335. av_log(avctx, AV_LOG_TRACE, "cuvid_output_frame\n");
  336. if (ctx->decoder_flushing) {
  337. ret = cuvid_decode_packet(avctx, NULL);
  338. if (ret < 0 && ret != AVERROR_EOF)
  339. return ret;
  340. }
  341. ret = CHECK_CU(ctx->cudl->cuCtxPushCurrent(cuda_ctx));
  342. if (ret < 0)
  343. return ret;
  344. if (av_fifo_size(ctx->frame_queue)) {
  345. CuvidParsedFrame parsed_frame;
  346. CUVIDPROCPARAMS params;
  347. unsigned int pitch = 0;
  348. int offset = 0;
  349. int i;
  350. av_fifo_generic_read(ctx->frame_queue, &parsed_frame, sizeof(CuvidParsedFrame), NULL);
  351. memset(&params, 0, sizeof(params));
  352. params.progressive_frame = parsed_frame.dispinfo.progressive_frame;
  353. params.second_field = parsed_frame.second_field;
  354. params.top_field_first = parsed_frame.dispinfo.top_field_first;
  355. ret = CHECK_CU(ctx->cvdl->cuvidMapVideoFrame(ctx->cudecoder, parsed_frame.dispinfo.picture_index, &mapped_frame, &pitch, &params));
  356. if (ret < 0)
  357. goto error;
  358. if (avctx->pix_fmt == AV_PIX_FMT_CUDA) {
  359. ret = av_hwframe_get_buffer(ctx->hwframe, frame, 0);
  360. if (ret < 0) {
  361. av_log(avctx, AV_LOG_ERROR, "av_hwframe_get_buffer failed\n");
  362. goto error;
  363. }
  364. ret = ff_decode_frame_props(avctx, frame);
  365. if (ret < 0) {
  366. av_log(avctx, AV_LOG_ERROR, "ff_decode_frame_props failed\n");
  367. goto error;
  368. }
  369. for (i = 0; i < 2; i++) {
  370. CUDA_MEMCPY2D cpy = {
  371. .srcMemoryType = CU_MEMORYTYPE_DEVICE,
  372. .dstMemoryType = CU_MEMORYTYPE_DEVICE,
  373. .srcDevice = mapped_frame,
  374. .dstDevice = (CUdeviceptr)frame->data[i],
  375. .srcPitch = pitch,
  376. .dstPitch = frame->linesize[i],
  377. .srcY = offset,
  378. .WidthInBytes = FFMIN(pitch, frame->linesize[i]),
  379. .Height = avctx->height >> (i ? 1 : 0),
  380. };
  381. ret = CHECK_CU(ctx->cudl->cuMemcpy2D(&cpy));
  382. if (ret < 0)
  383. goto error;
  384. offset += avctx->coded_height;
  385. }
  386. } else if (avctx->pix_fmt == AV_PIX_FMT_NV12 ||
  387. avctx->pix_fmt == AV_PIX_FMT_P010 ||
  388. avctx->pix_fmt == AV_PIX_FMT_P016) {
  389. AVFrame *tmp_frame = av_frame_alloc();
  390. if (!tmp_frame) {
  391. av_log(avctx, AV_LOG_ERROR, "av_frame_alloc failed\n");
  392. ret = AVERROR(ENOMEM);
  393. goto error;
  394. }
  395. tmp_frame->format = AV_PIX_FMT_CUDA;
  396. tmp_frame->hw_frames_ctx = av_buffer_ref(ctx->hwframe);
  397. tmp_frame->data[0] = (uint8_t*)mapped_frame;
  398. tmp_frame->linesize[0] = pitch;
  399. tmp_frame->data[1] = (uint8_t*)(mapped_frame + avctx->coded_height * pitch);
  400. tmp_frame->linesize[1] = pitch;
  401. tmp_frame->width = avctx->width;
  402. tmp_frame->height = avctx->height;
  403. ret = ff_get_buffer(avctx, frame, 0);
  404. if (ret < 0) {
  405. av_log(avctx, AV_LOG_ERROR, "ff_get_buffer failed\n");
  406. av_frame_free(&tmp_frame);
  407. goto error;
  408. }
  409. ret = av_hwframe_transfer_data(frame, tmp_frame, 0);
  410. if (ret) {
  411. av_log(avctx, AV_LOG_ERROR, "av_hwframe_transfer_data failed\n");
  412. av_frame_free(&tmp_frame);
  413. goto error;
  414. }
  415. av_frame_free(&tmp_frame);
  416. } else {
  417. ret = AVERROR_BUG;
  418. goto error;
  419. }
  420. frame->width = avctx->width;
  421. frame->height = avctx->height;
  422. if (avctx->pkt_timebase.num && avctx->pkt_timebase.den)
  423. frame->pts = av_rescale_q(parsed_frame.dispinfo.timestamp, (AVRational){1, 10000000}, avctx->pkt_timebase);
  424. else
  425. frame->pts = parsed_frame.dispinfo.timestamp;
  426. if (parsed_frame.second_field) {
  427. if (ctx->prev_pts == INT64_MIN) {
  428. ctx->prev_pts = frame->pts;
  429. frame->pts += (avctx->pkt_timebase.den * avctx->framerate.den) / (avctx->pkt_timebase.num * avctx->framerate.num);
  430. } else {
  431. int pts_diff = (frame->pts - ctx->prev_pts) / 2;
  432. ctx->prev_pts = frame->pts;
  433. frame->pts += pts_diff;
  434. }
  435. }
  436. /* CUVIDs opaque reordering breaks the internal pkt logic.
  437. * So set pkt_pts and clear all the other pkt_ fields.
  438. */
  439. #if FF_API_PKT_PTS
  440. FF_DISABLE_DEPRECATION_WARNINGS
  441. frame->pkt_pts = frame->pts;
  442. FF_ENABLE_DEPRECATION_WARNINGS
  443. #endif
  444. av_frame_set_pkt_pos(frame, -1);
  445. av_frame_set_pkt_duration(frame, 0);
  446. av_frame_set_pkt_size(frame, -1);
  447. frame->interlaced_frame = !parsed_frame.is_deinterlacing && !parsed_frame.dispinfo.progressive_frame;
  448. if (frame->interlaced_frame)
  449. frame->top_field_first = parsed_frame.dispinfo.top_field_first;
  450. } else if (ctx->decoder_flushing) {
  451. ret = AVERROR_EOF;
  452. } else {
  453. ret = AVERROR(EAGAIN);
  454. }
  455. error:
  456. if (mapped_frame)
  457. eret = CHECK_CU(ctx->cvdl->cuvidUnmapVideoFrame(ctx->cudecoder, mapped_frame));
  458. eret = CHECK_CU(ctx->cudl->cuCtxPopCurrent(&dummy));
  459. if (eret < 0)
  460. return eret;
  461. else
  462. return ret;
  463. }
  464. static int cuvid_decode_frame(AVCodecContext *avctx, void *data, int *got_frame, AVPacket *avpkt)
  465. {
  466. CuvidContext *ctx = avctx->priv_data;
  467. AVFrame *frame = data;
  468. int ret = 0;
  469. av_log(avctx, AV_LOG_TRACE, "cuvid_decode_frame\n");
  470. if (ctx->deint_mode != cudaVideoDeinterlaceMode_Weave) {
  471. av_log(avctx, AV_LOG_ERROR, "Deinterlacing is not supported via the old API\n");
  472. return AVERROR(EINVAL);
  473. }
  474. if (!ctx->decoder_flushing) {
  475. ret = cuvid_decode_packet(avctx, avpkt);
  476. if (ret < 0)
  477. return ret;
  478. }
  479. ret = cuvid_output_frame(avctx, frame);
  480. if (ret == AVERROR(EAGAIN) || ret == AVERROR_EOF) {
  481. *got_frame = 0;
  482. } else if (ret < 0) {
  483. return ret;
  484. } else {
  485. *got_frame = 1;
  486. }
  487. return 0;
  488. }
  489. static av_cold int cuvid_decode_end(AVCodecContext *avctx)
  490. {
  491. CuvidContext *ctx = avctx->priv_data;
  492. av_fifo_freep(&ctx->frame_queue);
  493. if (ctx->bsf)
  494. av_bsf_free(&ctx->bsf);
  495. if (ctx->cuparser)
  496. ctx->cvdl->cuvidDestroyVideoParser(ctx->cuparser);
  497. if (ctx->cudecoder)
  498. ctx->cvdl->cuvidDestroyDecoder(ctx->cudecoder);
  499. ctx->cudl = NULL;
  500. av_buffer_unref(&ctx->hwframe);
  501. av_buffer_unref(&ctx->hwdevice);
  502. cuvid_free_functions(&ctx->cvdl);
  503. return 0;
  504. }
  505. static int cuvid_test_dummy_decoder(AVCodecContext *avctx,
  506. const CUVIDPARSERPARAMS *cuparseinfo,
  507. int probed_width,
  508. int probed_height)
  509. {
  510. CuvidContext *ctx = avctx->priv_data;
  511. CUVIDDECODECREATEINFO cuinfo;
  512. CUvideodecoder cudec = 0;
  513. int ret = 0;
  514. memset(&cuinfo, 0, sizeof(cuinfo));
  515. cuinfo.CodecType = cuparseinfo->CodecType;
  516. cuinfo.ChromaFormat = cudaVideoChromaFormat_420;
  517. cuinfo.OutputFormat = cudaVideoSurfaceFormat_NV12;
  518. cuinfo.ulWidth = probed_width;
  519. cuinfo.ulHeight = probed_height;
  520. cuinfo.ulTargetWidth = cuinfo.ulWidth;
  521. cuinfo.ulTargetHeight = cuinfo.ulHeight;
  522. cuinfo.target_rect.left = 0;
  523. cuinfo.target_rect.top = 0;
  524. cuinfo.target_rect.right = cuinfo.ulWidth;
  525. cuinfo.target_rect.bottom = cuinfo.ulHeight;
  526. cuinfo.ulNumDecodeSurfaces = ctx->nb_surfaces;
  527. cuinfo.ulNumOutputSurfaces = 1;
  528. cuinfo.ulCreationFlags = cudaVideoCreate_PreferCUVID;
  529. cuinfo.bitDepthMinus8 = 0;
  530. cuinfo.DeinterlaceMode = cudaVideoDeinterlaceMode_Weave;
  531. ret = CHECK_CU(ctx->cvdl->cuvidCreateDecoder(&cudec, &cuinfo));
  532. if (ret < 0)
  533. return ret;
  534. ret = CHECK_CU(ctx->cvdl->cuvidDestroyDecoder(cudec));
  535. if (ret < 0)
  536. return ret;
  537. return 0;
  538. }
  539. static av_cold int cuvid_decode_init(AVCodecContext *avctx)
  540. {
  541. CuvidContext *ctx = avctx->priv_data;
  542. AVCUDADeviceContext *device_hwctx;
  543. AVHWDeviceContext *device_ctx;
  544. AVHWFramesContext *hwframe_ctx;
  545. CUVIDSOURCEDATAPACKET seq_pkt;
  546. CUcontext cuda_ctx = NULL;
  547. CUcontext dummy;
  548. const AVBitStreamFilter *bsf;
  549. int ret = 0;
  550. enum AVPixelFormat pix_fmts[3] = { AV_PIX_FMT_CUDA,
  551. AV_PIX_FMT_NV12,
  552. AV_PIX_FMT_NONE };
  553. int probed_width = avctx->coded_width ? avctx->coded_width : 1280;
  554. int probed_height = avctx->coded_height ? avctx->coded_height : 720;
  555. // Accelerated transcoding scenarios with 'ffmpeg' require that the
  556. // pix_fmt be set to AV_PIX_FMT_CUDA early. The sw_pix_fmt, and the
  557. // pix_fmt for non-accelerated transcoding, do not need to be correct
  558. // but need to be set to something. We arbitrarily pick NV12.
  559. ret = ff_get_format(avctx, pix_fmts);
  560. if (ret < 0) {
  561. av_log(avctx, AV_LOG_ERROR, "ff_get_format failed: %d\n", ret);
  562. return ret;
  563. }
  564. avctx->pix_fmt = ret;
  565. ret = cuvid_load_functions(&ctx->cvdl);
  566. if (ret < 0) {
  567. av_log(avctx, AV_LOG_ERROR, "Failed loading nvcuvid.\n");
  568. goto error;
  569. }
  570. ctx->frame_queue = av_fifo_alloc(ctx->nb_surfaces * sizeof(CuvidParsedFrame));
  571. if (!ctx->frame_queue) {
  572. ret = AVERROR(ENOMEM);
  573. goto error;
  574. }
  575. if (avctx->hw_frames_ctx) {
  576. ctx->hwframe = av_buffer_ref(avctx->hw_frames_ctx);
  577. if (!ctx->hwframe) {
  578. ret = AVERROR(ENOMEM);
  579. goto error;
  580. }
  581. hwframe_ctx = (AVHWFramesContext*)ctx->hwframe->data;
  582. ctx->hwdevice = av_buffer_ref(hwframe_ctx->device_ref);
  583. if (!ctx->hwdevice) {
  584. ret = AVERROR(ENOMEM);
  585. goto error;
  586. }
  587. } else {
  588. ret = av_hwdevice_ctx_create(&ctx->hwdevice, AV_HWDEVICE_TYPE_CUDA, ctx->cu_gpu, NULL, 0);
  589. if (ret < 0)
  590. goto error;
  591. ctx->hwframe = av_hwframe_ctx_alloc(ctx->hwdevice);
  592. if (!ctx->hwframe) {
  593. av_log(avctx, AV_LOG_ERROR, "av_hwframe_ctx_alloc failed\n");
  594. ret = AVERROR(ENOMEM);
  595. goto error;
  596. }
  597. hwframe_ctx = (AVHWFramesContext*)ctx->hwframe->data;
  598. }
  599. device_ctx = hwframe_ctx->device_ctx;
  600. device_hwctx = device_ctx->hwctx;
  601. cuda_ctx = device_hwctx->cuda_ctx;
  602. ctx->cudl = device_hwctx->internal->cuda_dl;
  603. memset(&ctx->cuparseinfo, 0, sizeof(ctx->cuparseinfo));
  604. memset(&ctx->cuparse_ext, 0, sizeof(ctx->cuparse_ext));
  605. memset(&seq_pkt, 0, sizeof(seq_pkt));
  606. ctx->cuparseinfo.pExtVideoInfo = &ctx->cuparse_ext;
  607. switch (avctx->codec->id) {
  608. #if CONFIG_H264_CUVID_DECODER
  609. case AV_CODEC_ID_H264:
  610. ctx->cuparseinfo.CodecType = cudaVideoCodec_H264;
  611. break;
  612. #endif
  613. #if CONFIG_HEVC_CUVID_DECODER
  614. case AV_CODEC_ID_HEVC:
  615. ctx->cuparseinfo.CodecType = cudaVideoCodec_HEVC;
  616. break;
  617. #endif
  618. #if CONFIG_MJPEG_CUVID_DECODER
  619. case AV_CODEC_ID_MJPEG:
  620. ctx->cuparseinfo.CodecType = cudaVideoCodec_JPEG;
  621. break;
  622. #endif
  623. #if CONFIG_MPEG1_CUVID_DECODER
  624. case AV_CODEC_ID_MPEG1VIDEO:
  625. ctx->cuparseinfo.CodecType = cudaVideoCodec_MPEG1;
  626. break;
  627. #endif
  628. #if CONFIG_MPEG2_CUVID_DECODER
  629. case AV_CODEC_ID_MPEG2VIDEO:
  630. ctx->cuparseinfo.CodecType = cudaVideoCodec_MPEG2;
  631. break;
  632. #endif
  633. #if CONFIG_MPEG4_CUVID_DECODER
  634. case AV_CODEC_ID_MPEG4:
  635. ctx->cuparseinfo.CodecType = cudaVideoCodec_MPEG4;
  636. break;
  637. #endif
  638. #if CONFIG_VP8_CUVID_DECODER
  639. case AV_CODEC_ID_VP8:
  640. ctx->cuparseinfo.CodecType = cudaVideoCodec_VP8;
  641. break;
  642. #endif
  643. #if CONFIG_VP9_CUVID_DECODER
  644. case AV_CODEC_ID_VP9:
  645. ctx->cuparseinfo.CodecType = cudaVideoCodec_VP9;
  646. break;
  647. #endif
  648. #if CONFIG_VC1_CUVID_DECODER
  649. case AV_CODEC_ID_VC1:
  650. ctx->cuparseinfo.CodecType = cudaVideoCodec_VC1;
  651. break;
  652. #endif
  653. default:
  654. av_log(avctx, AV_LOG_ERROR, "Invalid CUVID codec!\n");
  655. return AVERROR_BUG;
  656. }
  657. if (avctx->codec->id == AV_CODEC_ID_H264 || avctx->codec->id == AV_CODEC_ID_HEVC) {
  658. if (avctx->codec->id == AV_CODEC_ID_H264)
  659. bsf = av_bsf_get_by_name("h264_mp4toannexb");
  660. else
  661. bsf = av_bsf_get_by_name("hevc_mp4toannexb");
  662. if (!bsf) {
  663. ret = AVERROR_BSF_NOT_FOUND;
  664. goto error;
  665. }
  666. if (ret = av_bsf_alloc(bsf, &ctx->bsf)) {
  667. goto error;
  668. }
  669. if (((ret = avcodec_parameters_from_context(ctx->bsf->par_in, avctx)) < 0) || ((ret = av_bsf_init(ctx->bsf)) < 0)) {
  670. av_bsf_free(&ctx->bsf);
  671. goto error;
  672. }
  673. ctx->cuparse_ext.format.seqhdr_data_length = ctx->bsf->par_out->extradata_size;
  674. memcpy(ctx->cuparse_ext.raw_seqhdr_data,
  675. ctx->bsf->par_out->extradata,
  676. FFMIN(sizeof(ctx->cuparse_ext.raw_seqhdr_data), ctx->bsf->par_out->extradata_size));
  677. } else if (avctx->extradata_size > 0) {
  678. ctx->cuparse_ext.format.seqhdr_data_length = avctx->extradata_size;
  679. memcpy(ctx->cuparse_ext.raw_seqhdr_data,
  680. avctx->extradata,
  681. FFMIN(sizeof(ctx->cuparse_ext.raw_seqhdr_data), avctx->extradata_size));
  682. }
  683. ctx->cuparseinfo.ulMaxNumDecodeSurfaces = ctx->nb_surfaces;
  684. ctx->cuparseinfo.ulMaxDisplayDelay = 4;
  685. ctx->cuparseinfo.pUserData = avctx;
  686. ctx->cuparseinfo.pfnSequenceCallback = cuvid_handle_video_sequence;
  687. ctx->cuparseinfo.pfnDecodePicture = cuvid_handle_picture_decode;
  688. ctx->cuparseinfo.pfnDisplayPicture = cuvid_handle_picture_display;
  689. ret = CHECK_CU(ctx->cudl->cuCtxPushCurrent(cuda_ctx));
  690. if (ret < 0)
  691. goto error;
  692. ret = cuvid_test_dummy_decoder(avctx, &ctx->cuparseinfo,
  693. probed_width,
  694. probed_height);
  695. if (ret < 0)
  696. goto error;
  697. ret = CHECK_CU(ctx->cvdl->cuvidCreateVideoParser(&ctx->cuparser, &ctx->cuparseinfo));
  698. if (ret < 0)
  699. goto error;
  700. seq_pkt.payload = ctx->cuparse_ext.raw_seqhdr_data;
  701. seq_pkt.payload_size = ctx->cuparse_ext.format.seqhdr_data_length;
  702. if (seq_pkt.payload && seq_pkt.payload_size) {
  703. ret = CHECK_CU(ctx->cvdl->cuvidParseVideoData(ctx->cuparser, &seq_pkt));
  704. if (ret < 0)
  705. goto error;
  706. }
  707. ret = CHECK_CU(ctx->cudl->cuCtxPopCurrent(&dummy));
  708. if (ret < 0)
  709. goto error;
  710. ctx->prev_pts = INT64_MIN;
  711. if (!avctx->pkt_timebase.num || !avctx->pkt_timebase.den)
  712. av_log(avctx, AV_LOG_WARNING, "Invalid pkt_timebase, passing timestamps as-is.\n");
  713. return 0;
  714. error:
  715. cuvid_decode_end(avctx);
  716. return ret;
  717. }
  718. static void cuvid_flush(AVCodecContext *avctx)
  719. {
  720. CuvidContext *ctx = avctx->priv_data;
  721. AVHWDeviceContext *device_ctx = (AVHWDeviceContext*)ctx->hwdevice->data;
  722. AVCUDADeviceContext *device_hwctx = device_ctx->hwctx;
  723. CUcontext dummy, cuda_ctx = device_hwctx->cuda_ctx;
  724. CUVIDSOURCEDATAPACKET seq_pkt = { 0 };
  725. int ret;
  726. ret = CHECK_CU(ctx->cudl->cuCtxPushCurrent(cuda_ctx));
  727. if (ret < 0)
  728. goto error;
  729. av_fifo_freep(&ctx->frame_queue);
  730. ctx->frame_queue = av_fifo_alloc(ctx->nb_surfaces * sizeof(CuvidParsedFrame));
  731. if (!ctx->frame_queue) {
  732. av_log(avctx, AV_LOG_ERROR, "Failed to recreate frame queue on flush\n");
  733. return;
  734. }
  735. if (ctx->cudecoder) {
  736. ctx->cvdl->cuvidDestroyDecoder(ctx->cudecoder);
  737. ctx->cudecoder = NULL;
  738. }
  739. if (ctx->cuparser) {
  740. ctx->cvdl->cuvidDestroyVideoParser(ctx->cuparser);
  741. ctx->cuparser = NULL;
  742. }
  743. ret = CHECK_CU(ctx->cvdl->cuvidCreateVideoParser(&ctx->cuparser, &ctx->cuparseinfo));
  744. if (ret < 0)
  745. goto error;
  746. seq_pkt.payload = ctx->cuparse_ext.raw_seqhdr_data;
  747. seq_pkt.payload_size = ctx->cuparse_ext.format.seqhdr_data_length;
  748. if (seq_pkt.payload && seq_pkt.payload_size) {
  749. ret = CHECK_CU(ctx->cvdl->cuvidParseVideoData(ctx->cuparser, &seq_pkt));
  750. if (ret < 0)
  751. goto error;
  752. }
  753. ret = CHECK_CU(ctx->cudl->cuCtxPopCurrent(&dummy));
  754. if (ret < 0)
  755. goto error;
  756. ctx->prev_pts = INT64_MIN;
  757. ctx->decoder_flushing = 0;
  758. return;
  759. error:
  760. av_log(avctx, AV_LOG_ERROR, "CUDA reinit on flush failed\n");
  761. }
  762. #define OFFSET(x) offsetof(CuvidContext, x)
  763. #define VD AV_OPT_FLAG_VIDEO_PARAM | AV_OPT_FLAG_DECODING_PARAM
  764. static const AVOption options[] = {
  765. { "deint", "Set deinterlacing mode", OFFSET(deint_mode), AV_OPT_TYPE_INT, { .i64 = cudaVideoDeinterlaceMode_Weave }, cudaVideoDeinterlaceMode_Weave, cudaVideoDeinterlaceMode_Adaptive, VD, "deint" },
  766. { "weave", "Weave deinterlacing (do nothing)", 0, AV_OPT_TYPE_CONST, { .i64 = cudaVideoDeinterlaceMode_Weave }, 0, 0, VD, "deint" },
  767. { "bob", "Bob deinterlacing", 0, AV_OPT_TYPE_CONST, { .i64 = cudaVideoDeinterlaceMode_Bob }, 0, 0, VD, "deint" },
  768. { "adaptive", "Adaptive deinterlacing", 0, AV_OPT_TYPE_CONST, { .i64 = cudaVideoDeinterlaceMode_Adaptive }, 0, 0, VD, "deint" },
  769. { "gpu", "GPU to be used for decoding", OFFSET(cu_gpu), AV_OPT_TYPE_STRING, { .str = NULL }, 0, 0, VD },
  770. { "surfaces", "Maximum surfaces to be used for decoding", OFFSET(nb_surfaces), AV_OPT_TYPE_INT, { .i64 = 25 }, 0, INT_MAX, VD },
  771. { NULL }
  772. };
  773. #define DEFINE_CUVID_CODEC(x, X) \
  774. static const AVClass x##_cuvid_class = { \
  775. .class_name = #x "_cuvid", \
  776. .item_name = av_default_item_name, \
  777. .option = options, \
  778. .version = LIBAVUTIL_VERSION_INT, \
  779. }; \
  780. AVHWAccel ff_##x##_cuvid_hwaccel = { \
  781. .name = #x "_cuvid", \
  782. .type = AVMEDIA_TYPE_VIDEO, \
  783. .id = AV_CODEC_ID_##X, \
  784. .pix_fmt = AV_PIX_FMT_CUDA, \
  785. }; \
  786. AVCodec ff_##x##_cuvid_decoder = { \
  787. .name = #x "_cuvid", \
  788. .long_name = NULL_IF_CONFIG_SMALL("Nvidia CUVID " #X " decoder"), \
  789. .type = AVMEDIA_TYPE_VIDEO, \
  790. .id = AV_CODEC_ID_##X, \
  791. .priv_data_size = sizeof(CuvidContext), \
  792. .priv_class = &x##_cuvid_class, \
  793. .init = cuvid_decode_init, \
  794. .close = cuvid_decode_end, \
  795. .decode = cuvid_decode_frame, \
  796. .send_packet = cuvid_decode_packet, \
  797. .receive_frame = cuvid_output_frame, \
  798. .flush = cuvid_flush, \
  799. .capabilities = AV_CODEC_CAP_DELAY | AV_CODEC_CAP_AVOID_PROBING, \
  800. .pix_fmts = (const enum AVPixelFormat[]){ AV_PIX_FMT_CUDA, \
  801. AV_PIX_FMT_NV12, \
  802. AV_PIX_FMT_P010, \
  803. AV_PIX_FMT_P016, \
  804. AV_PIX_FMT_NONE }, \
  805. };
  806. #if CONFIG_HEVC_CUVID_DECODER
  807. DEFINE_CUVID_CODEC(hevc, HEVC)
  808. #endif
  809. #if CONFIG_H264_CUVID_DECODER
  810. DEFINE_CUVID_CODEC(h264, H264)
  811. #endif
  812. #if CONFIG_MJPEG_CUVID_DECODER
  813. DEFINE_CUVID_CODEC(mjpeg, MJPEG)
  814. #endif
  815. #if CONFIG_MPEG1_CUVID_DECODER
  816. DEFINE_CUVID_CODEC(mpeg1, MPEG1VIDEO)
  817. #endif
  818. #if CONFIG_MPEG2_CUVID_DECODER
  819. DEFINE_CUVID_CODEC(mpeg2, MPEG2VIDEO)
  820. #endif
  821. #if CONFIG_MPEG4_CUVID_DECODER
  822. DEFINE_CUVID_CODEC(mpeg4, MPEG4)
  823. #endif
  824. #if CONFIG_VP8_CUVID_DECODER
  825. DEFINE_CUVID_CODEC(vp8, VP8)
  826. #endif
  827. #if CONFIG_VP9_CUVID_DECODER
  828. DEFINE_CUVID_CODEC(vp9, VP9)
  829. #endif
  830. #if CONFIG_VC1_CUVID_DECODER
  831. DEFINE_CUVID_CODEC(vc1, VC1)
  832. #endif