You can not select more than 25 topics Topics must start with a letter or number, can include dashes ('-') and can be up to 35 characters long.

1201 lines
40KB

  1. /*
  2. * Nvidia CUVID decoder
  3. * Copyright (c) 2016 Timo Rothenpieler <timo@rothenpieler.org>
  4. *
  5. * This file is part of FFmpeg.
  6. *
  7. * FFmpeg is free software; you can redistribute it and/or
  8. * modify it under the terms of the GNU Lesser General Public
  9. * License as published by the Free Software Foundation; either
  10. * version 2.1 of the License, or (at your option) any later version.
  11. *
  12. * FFmpeg is distributed in the hope that it will be useful,
  13. * but WITHOUT ANY WARRANTY; without even the implied warranty of
  14. * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
  15. * Lesser General Public License for more details.
  16. *
  17. * You should have received a copy of the GNU Lesser General Public
  18. * License along with FFmpeg; if not, write to the Free Software
  19. * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
  20. */
  21. #include "compat/cuda/dynlink_loader.h"
  22. #include "libavutil/buffer.h"
  23. #include "libavutil/mathematics.h"
  24. #include "libavutil/hwcontext.h"
  25. #include "libavutil/hwcontext_cuda_internal.h"
  26. #include "libavutil/cuda_check.h"
  27. #include "libavutil/fifo.h"
  28. #include "libavutil/log.h"
  29. #include "libavutil/opt.h"
  30. #include "libavutil/pixdesc.h"
  31. #include "avcodec.h"
  32. #include "decode.h"
  33. #include "hwconfig.h"
  34. #include "nvdec.h"
  35. #include "internal.h"
  36. #if !NVDECAPI_CHECK_VERSION(9, 0)
  37. #define cudaVideoSurfaceFormat_YUV444 2
  38. #define cudaVideoSurfaceFormat_YUV444_16Bit 3
  39. #endif
  40. #if NVDECAPI_CHECK_VERSION(11, 0)
  41. #define CUVID_HAS_AV1_SUPPORT
  42. #endif
  43. typedef struct CuvidContext
  44. {
  45. AVClass *avclass;
  46. CUvideodecoder cudecoder;
  47. CUvideoparser cuparser;
  48. char *cu_gpu;
  49. int nb_surfaces;
  50. int drop_second_field;
  51. char *crop_expr;
  52. char *resize_expr;
  53. struct {
  54. int left;
  55. int top;
  56. int right;
  57. int bottom;
  58. } crop;
  59. struct {
  60. int width;
  61. int height;
  62. } resize;
  63. AVBufferRef *hwdevice;
  64. AVBufferRef *hwframe;
  65. AVFifoBuffer *frame_queue;
  66. int deint_mode;
  67. int deint_mode_current;
  68. int64_t prev_pts;
  69. int progressive_sequence;
  70. int internal_error;
  71. int decoder_flushing;
  72. int *key_frame;
  73. cudaVideoCodec codec_type;
  74. cudaVideoChromaFormat chroma_format;
  75. CUVIDDECODECAPS caps8, caps10, caps12;
  76. CUVIDPARSERPARAMS cuparseinfo;
  77. CUVIDEOFORMATEX *cuparse_ext;
  78. CudaFunctions *cudl;
  79. CuvidFunctions *cvdl;
  80. } CuvidContext;
  81. typedef struct CuvidParsedFrame
  82. {
  83. CUVIDPARSERDISPINFO dispinfo;
  84. int second_field;
  85. int is_deinterlacing;
  86. } CuvidParsedFrame;
  87. #define CHECK_CU(x) FF_CUDA_CHECK_DL(avctx, ctx->cudl, x)
  88. static int CUDAAPI cuvid_handle_video_sequence(void *opaque, CUVIDEOFORMAT* format)
  89. {
  90. AVCodecContext *avctx = opaque;
  91. CuvidContext *ctx = avctx->priv_data;
  92. AVHWFramesContext *hwframe_ctx = (AVHWFramesContext*)ctx->hwframe->data;
  93. CUVIDDECODECAPS *caps = NULL;
  94. CUVIDDECODECREATEINFO cuinfo;
  95. int surface_fmt;
  96. int chroma_444;
  97. int old_width = avctx->width;
  98. int old_height = avctx->height;
  99. enum AVPixelFormat pix_fmts[3] = { AV_PIX_FMT_CUDA,
  100. AV_PIX_FMT_NONE, // Will be updated below
  101. AV_PIX_FMT_NONE };
  102. av_log(avctx, AV_LOG_TRACE, "pfnSequenceCallback, progressive_sequence=%d\n", format->progressive_sequence);
  103. memset(&cuinfo, 0, sizeof(cuinfo));
  104. ctx->internal_error = 0;
  105. avctx->coded_width = cuinfo.ulWidth = format->coded_width;
  106. avctx->coded_height = cuinfo.ulHeight = format->coded_height;
  107. // apply cropping
  108. cuinfo.display_area.left = format->display_area.left + ctx->crop.left;
  109. cuinfo.display_area.top = format->display_area.top + ctx->crop.top;
  110. cuinfo.display_area.right = format->display_area.right - ctx->crop.right;
  111. cuinfo.display_area.bottom = format->display_area.bottom - ctx->crop.bottom;
  112. // width and height need to be set before calling ff_get_format
  113. if (ctx->resize_expr) {
  114. avctx->width = ctx->resize.width;
  115. avctx->height = ctx->resize.height;
  116. } else {
  117. avctx->width = cuinfo.display_area.right - cuinfo.display_area.left;
  118. avctx->height = cuinfo.display_area.bottom - cuinfo.display_area.top;
  119. }
  120. // target width/height need to be multiples of two
  121. cuinfo.ulTargetWidth = avctx->width = (avctx->width + 1) & ~1;
  122. cuinfo.ulTargetHeight = avctx->height = (avctx->height + 1) & ~1;
  123. // aspect ratio conversion, 1:1, depends on scaled resolution
  124. cuinfo.target_rect.left = 0;
  125. cuinfo.target_rect.top = 0;
  126. cuinfo.target_rect.right = cuinfo.ulTargetWidth;
  127. cuinfo.target_rect.bottom = cuinfo.ulTargetHeight;
  128. chroma_444 = format->chroma_format == cudaVideoChromaFormat_444;
  129. switch (format->bit_depth_luma_minus8) {
  130. case 0: // 8-bit
  131. pix_fmts[1] = chroma_444 ? AV_PIX_FMT_YUV444P : AV_PIX_FMT_NV12;
  132. caps = &ctx->caps8;
  133. break;
  134. case 2: // 10-bit
  135. pix_fmts[1] = chroma_444 ? AV_PIX_FMT_YUV444P16 : AV_PIX_FMT_P010;
  136. caps = &ctx->caps10;
  137. break;
  138. case 4: // 12-bit
  139. pix_fmts[1] = chroma_444 ? AV_PIX_FMT_YUV444P16 : AV_PIX_FMT_P016;
  140. caps = &ctx->caps12;
  141. break;
  142. default:
  143. break;
  144. }
  145. if (!caps || !caps->bIsSupported) {
  146. av_log(avctx, AV_LOG_ERROR, "unsupported bit depth: %d\n",
  147. format->bit_depth_luma_minus8 + 8);
  148. ctx->internal_error = AVERROR(EINVAL);
  149. return 0;
  150. }
  151. surface_fmt = ff_get_format(avctx, pix_fmts);
  152. if (surface_fmt < 0) {
  153. av_log(avctx, AV_LOG_ERROR, "ff_get_format failed: %d\n", surface_fmt);
  154. ctx->internal_error = AVERROR(EINVAL);
  155. return 0;
  156. }
  157. av_log(avctx, AV_LOG_VERBOSE, "Formats: Original: %s | HW: %s | SW: %s\n",
  158. av_get_pix_fmt_name(avctx->pix_fmt),
  159. av_get_pix_fmt_name(surface_fmt),
  160. av_get_pix_fmt_name(avctx->sw_pix_fmt));
  161. avctx->pix_fmt = surface_fmt;
  162. // Update our hwframe ctx, as the get_format callback might have refreshed it!
  163. if (avctx->hw_frames_ctx) {
  164. av_buffer_unref(&ctx->hwframe);
  165. ctx->hwframe = av_buffer_ref(avctx->hw_frames_ctx);
  166. if (!ctx->hwframe) {
  167. ctx->internal_error = AVERROR(ENOMEM);
  168. return 0;
  169. }
  170. hwframe_ctx = (AVHWFramesContext*)ctx->hwframe->data;
  171. }
  172. ff_set_sar(avctx, av_div_q(
  173. (AVRational){ format->display_aspect_ratio.x, format->display_aspect_ratio.y },
  174. (AVRational){ avctx->width, avctx->height }));
  175. ctx->deint_mode_current = format->progressive_sequence
  176. ? cudaVideoDeinterlaceMode_Weave
  177. : ctx->deint_mode;
  178. ctx->progressive_sequence = format->progressive_sequence;
  179. if (!format->progressive_sequence && ctx->deint_mode_current == cudaVideoDeinterlaceMode_Weave)
  180. avctx->flags |= AV_CODEC_FLAG_INTERLACED_DCT;
  181. else
  182. avctx->flags &= ~AV_CODEC_FLAG_INTERLACED_DCT;
  183. if (format->video_signal_description.video_full_range_flag)
  184. avctx->color_range = AVCOL_RANGE_JPEG;
  185. else
  186. avctx->color_range = AVCOL_RANGE_MPEG;
  187. avctx->color_primaries = format->video_signal_description.color_primaries;
  188. avctx->color_trc = format->video_signal_description.transfer_characteristics;
  189. avctx->colorspace = format->video_signal_description.matrix_coefficients;
  190. if (format->bitrate)
  191. avctx->bit_rate = format->bitrate;
  192. if (format->frame_rate.numerator && format->frame_rate.denominator) {
  193. avctx->framerate.num = format->frame_rate.numerator;
  194. avctx->framerate.den = format->frame_rate.denominator;
  195. }
  196. if (ctx->cudecoder
  197. && avctx->coded_width == format->coded_width
  198. && avctx->coded_height == format->coded_height
  199. && avctx->width == old_width
  200. && avctx->height == old_height
  201. && ctx->chroma_format == format->chroma_format
  202. && ctx->codec_type == format->codec)
  203. return 1;
  204. if (ctx->cudecoder) {
  205. av_log(avctx, AV_LOG_TRACE, "Re-initializing decoder\n");
  206. ctx->internal_error = CHECK_CU(ctx->cvdl->cuvidDestroyDecoder(ctx->cudecoder));
  207. if (ctx->internal_error < 0)
  208. return 0;
  209. ctx->cudecoder = NULL;
  210. }
  211. if (hwframe_ctx->pool && (
  212. hwframe_ctx->width < avctx->width ||
  213. hwframe_ctx->height < avctx->height ||
  214. hwframe_ctx->format != AV_PIX_FMT_CUDA ||
  215. hwframe_ctx->sw_format != avctx->sw_pix_fmt)) {
  216. av_log(avctx, AV_LOG_ERROR, "AVHWFramesContext is already initialized with incompatible parameters\n");
  217. av_log(avctx, AV_LOG_DEBUG, "width: %d <-> %d\n", hwframe_ctx->width, avctx->width);
  218. av_log(avctx, AV_LOG_DEBUG, "height: %d <-> %d\n", hwframe_ctx->height, avctx->height);
  219. av_log(avctx, AV_LOG_DEBUG, "format: %s <-> cuda\n", av_get_pix_fmt_name(hwframe_ctx->format));
  220. av_log(avctx, AV_LOG_DEBUG, "sw_format: %s <-> %s\n",
  221. av_get_pix_fmt_name(hwframe_ctx->sw_format), av_get_pix_fmt_name(avctx->sw_pix_fmt));
  222. ctx->internal_error = AVERROR(EINVAL);
  223. return 0;
  224. }
  225. ctx->chroma_format = format->chroma_format;
  226. cuinfo.CodecType = ctx->codec_type = format->codec;
  227. cuinfo.ChromaFormat = format->chroma_format;
  228. switch (avctx->sw_pix_fmt) {
  229. case AV_PIX_FMT_NV12:
  230. cuinfo.OutputFormat = cudaVideoSurfaceFormat_NV12;
  231. break;
  232. case AV_PIX_FMT_P010:
  233. case AV_PIX_FMT_P016:
  234. cuinfo.OutputFormat = cudaVideoSurfaceFormat_P016;
  235. break;
  236. case AV_PIX_FMT_YUV444P:
  237. cuinfo.OutputFormat = cudaVideoSurfaceFormat_YUV444;
  238. break;
  239. case AV_PIX_FMT_YUV444P16:
  240. cuinfo.OutputFormat = cudaVideoSurfaceFormat_YUV444_16Bit;
  241. break;
  242. default:
  243. av_log(avctx, AV_LOG_ERROR, "Unsupported output format: %s\n",
  244. av_get_pix_fmt_name(avctx->sw_pix_fmt));
  245. ctx->internal_error = AVERROR(EINVAL);
  246. return 0;
  247. }
  248. cuinfo.ulNumDecodeSurfaces = ctx->nb_surfaces;
  249. cuinfo.ulNumOutputSurfaces = 1;
  250. cuinfo.ulCreationFlags = cudaVideoCreate_PreferCUVID;
  251. cuinfo.bitDepthMinus8 = format->bit_depth_luma_minus8;
  252. cuinfo.DeinterlaceMode = ctx->deint_mode_current;
  253. if (ctx->deint_mode_current != cudaVideoDeinterlaceMode_Weave && !ctx->drop_second_field)
  254. avctx->framerate = av_mul_q(avctx->framerate, (AVRational){2, 1});
  255. ctx->internal_error = CHECK_CU(ctx->cvdl->cuvidCreateDecoder(&ctx->cudecoder, &cuinfo));
  256. if (ctx->internal_error < 0)
  257. return 0;
  258. if (!hwframe_ctx->pool) {
  259. hwframe_ctx->format = AV_PIX_FMT_CUDA;
  260. hwframe_ctx->sw_format = avctx->sw_pix_fmt;
  261. hwframe_ctx->width = avctx->width;
  262. hwframe_ctx->height = avctx->height;
  263. if ((ctx->internal_error = av_hwframe_ctx_init(ctx->hwframe)) < 0) {
  264. av_log(avctx, AV_LOG_ERROR, "av_hwframe_ctx_init failed\n");
  265. return 0;
  266. }
  267. }
  268. return 1;
  269. }
  270. static int CUDAAPI cuvid_handle_picture_decode(void *opaque, CUVIDPICPARAMS* picparams)
  271. {
  272. AVCodecContext *avctx = opaque;
  273. CuvidContext *ctx = avctx->priv_data;
  274. av_log(avctx, AV_LOG_TRACE, "pfnDecodePicture\n");
  275. ctx->key_frame[picparams->CurrPicIdx] = picparams->intra_pic_flag;
  276. ctx->internal_error = CHECK_CU(ctx->cvdl->cuvidDecodePicture(ctx->cudecoder, picparams));
  277. if (ctx->internal_error < 0)
  278. return 0;
  279. return 1;
  280. }
  281. static int CUDAAPI cuvid_handle_picture_display(void *opaque, CUVIDPARSERDISPINFO* dispinfo)
  282. {
  283. AVCodecContext *avctx = opaque;
  284. CuvidContext *ctx = avctx->priv_data;
  285. CuvidParsedFrame parsed_frame = { { 0 } };
  286. parsed_frame.dispinfo = *dispinfo;
  287. ctx->internal_error = 0;
  288. // For some reason, dispinfo->progressive_frame is sometimes wrong.
  289. parsed_frame.dispinfo.progressive_frame = ctx->progressive_sequence;
  290. if (ctx->deint_mode_current == cudaVideoDeinterlaceMode_Weave) {
  291. av_fifo_generic_write(ctx->frame_queue, &parsed_frame, sizeof(CuvidParsedFrame), NULL);
  292. } else {
  293. parsed_frame.is_deinterlacing = 1;
  294. av_fifo_generic_write(ctx->frame_queue, &parsed_frame, sizeof(CuvidParsedFrame), NULL);
  295. if (!ctx->drop_second_field) {
  296. parsed_frame.second_field = 1;
  297. av_fifo_generic_write(ctx->frame_queue, &parsed_frame, sizeof(CuvidParsedFrame), NULL);
  298. }
  299. }
  300. return 1;
  301. }
  302. static int cuvid_is_buffer_full(AVCodecContext *avctx)
  303. {
  304. CuvidContext *ctx = avctx->priv_data;
  305. int delay = ctx->cuparseinfo.ulMaxDisplayDelay;
  306. if (ctx->deint_mode != cudaVideoDeinterlaceMode_Weave && !ctx->drop_second_field)
  307. delay *= 2;
  308. return (av_fifo_size(ctx->frame_queue) / sizeof(CuvidParsedFrame)) + delay >= ctx->nb_surfaces;
  309. }
  310. static int cuvid_decode_packet(AVCodecContext *avctx, const AVPacket *avpkt)
  311. {
  312. CuvidContext *ctx = avctx->priv_data;
  313. AVHWDeviceContext *device_ctx = (AVHWDeviceContext*)ctx->hwdevice->data;
  314. AVCUDADeviceContext *device_hwctx = device_ctx->hwctx;
  315. CUcontext dummy, cuda_ctx = device_hwctx->cuda_ctx;
  316. CUVIDSOURCEDATAPACKET cupkt;
  317. int ret = 0, eret = 0, is_flush = ctx->decoder_flushing;
  318. av_log(avctx, AV_LOG_TRACE, "cuvid_decode_packet\n");
  319. if (is_flush && avpkt && avpkt->size)
  320. return AVERROR_EOF;
  321. if (cuvid_is_buffer_full(avctx) && avpkt && avpkt->size)
  322. return AVERROR(EAGAIN);
  323. ret = CHECK_CU(ctx->cudl->cuCtxPushCurrent(cuda_ctx));
  324. if (ret < 0) {
  325. return ret;
  326. }
  327. memset(&cupkt, 0, sizeof(cupkt));
  328. if (avpkt && avpkt->size) {
  329. cupkt.payload_size = avpkt->size;
  330. cupkt.payload = avpkt->data;
  331. if (avpkt->pts != AV_NOPTS_VALUE) {
  332. cupkt.flags = CUVID_PKT_TIMESTAMP;
  333. if (avctx->pkt_timebase.num && avctx->pkt_timebase.den)
  334. cupkt.timestamp = av_rescale_q(avpkt->pts, avctx->pkt_timebase, (AVRational){1, 10000000});
  335. else
  336. cupkt.timestamp = avpkt->pts;
  337. }
  338. } else {
  339. cupkt.flags = CUVID_PKT_ENDOFSTREAM;
  340. ctx->decoder_flushing = 1;
  341. }
  342. ret = CHECK_CU(ctx->cvdl->cuvidParseVideoData(ctx->cuparser, &cupkt));
  343. if (ret < 0)
  344. goto error;
  345. // cuvidParseVideoData doesn't return an error just because stuff failed...
  346. if (ctx->internal_error) {
  347. av_log(avctx, AV_LOG_ERROR, "cuvid decode callback error\n");
  348. ret = ctx->internal_error;
  349. goto error;
  350. }
  351. error:
  352. eret = CHECK_CU(ctx->cudl->cuCtxPopCurrent(&dummy));
  353. if (eret < 0)
  354. return eret;
  355. else if (ret < 0)
  356. return ret;
  357. else if (is_flush)
  358. return AVERROR_EOF;
  359. else
  360. return 0;
  361. }
  362. static int cuvid_output_frame(AVCodecContext *avctx, AVFrame *frame)
  363. {
  364. CuvidContext *ctx = avctx->priv_data;
  365. AVHWDeviceContext *device_ctx = (AVHWDeviceContext*)ctx->hwdevice->data;
  366. AVCUDADeviceContext *device_hwctx = device_ctx->hwctx;
  367. CUcontext dummy, cuda_ctx = device_hwctx->cuda_ctx;
  368. CUdeviceptr mapped_frame = 0;
  369. int ret = 0, eret = 0;
  370. av_log(avctx, AV_LOG_TRACE, "cuvid_output_frame\n");
  371. if (ctx->decoder_flushing) {
  372. ret = cuvid_decode_packet(avctx, NULL);
  373. if (ret < 0 && ret != AVERROR_EOF)
  374. return ret;
  375. }
  376. if (!cuvid_is_buffer_full(avctx)) {
  377. AVPacket pkt = {0};
  378. ret = ff_decode_get_packet(avctx, &pkt);
  379. if (ret < 0 && ret != AVERROR_EOF)
  380. return ret;
  381. ret = cuvid_decode_packet(avctx, &pkt);
  382. av_packet_unref(&pkt);
  383. // cuvid_is_buffer_full() should avoid this.
  384. if (ret == AVERROR(EAGAIN))
  385. ret = AVERROR_EXTERNAL;
  386. if (ret < 0 && ret != AVERROR_EOF)
  387. return ret;
  388. }
  389. ret = CHECK_CU(ctx->cudl->cuCtxPushCurrent(cuda_ctx));
  390. if (ret < 0)
  391. return ret;
  392. if (av_fifo_size(ctx->frame_queue)) {
  393. const AVPixFmtDescriptor *pixdesc;
  394. CuvidParsedFrame parsed_frame;
  395. CUVIDPROCPARAMS params;
  396. unsigned int pitch = 0;
  397. int offset = 0;
  398. int i;
  399. av_fifo_generic_read(ctx->frame_queue, &parsed_frame, sizeof(CuvidParsedFrame), NULL);
  400. memset(&params, 0, sizeof(params));
  401. params.progressive_frame = parsed_frame.dispinfo.progressive_frame;
  402. params.second_field = parsed_frame.second_field;
  403. params.top_field_first = parsed_frame.dispinfo.top_field_first;
  404. ret = CHECK_CU(ctx->cvdl->cuvidMapVideoFrame(ctx->cudecoder, parsed_frame.dispinfo.picture_index, &mapped_frame, &pitch, &params));
  405. if (ret < 0)
  406. goto error;
  407. if (avctx->pix_fmt == AV_PIX_FMT_CUDA) {
  408. ret = av_hwframe_get_buffer(ctx->hwframe, frame, 0);
  409. if (ret < 0) {
  410. av_log(avctx, AV_LOG_ERROR, "av_hwframe_get_buffer failed\n");
  411. goto error;
  412. }
  413. ret = ff_decode_frame_props(avctx, frame);
  414. if (ret < 0) {
  415. av_log(avctx, AV_LOG_ERROR, "ff_decode_frame_props failed\n");
  416. goto error;
  417. }
  418. pixdesc = av_pix_fmt_desc_get(avctx->sw_pix_fmt);
  419. for (i = 0; i < pixdesc->nb_components; i++) {
  420. int height = avctx->height >> (i ? pixdesc->log2_chroma_h : 0);
  421. CUDA_MEMCPY2D cpy = {
  422. .srcMemoryType = CU_MEMORYTYPE_DEVICE,
  423. .dstMemoryType = CU_MEMORYTYPE_DEVICE,
  424. .srcDevice = mapped_frame,
  425. .dstDevice = (CUdeviceptr)frame->data[i],
  426. .srcPitch = pitch,
  427. .dstPitch = frame->linesize[i],
  428. .srcY = offset,
  429. .WidthInBytes = FFMIN(pitch, frame->linesize[i]),
  430. .Height = height,
  431. };
  432. ret = CHECK_CU(ctx->cudl->cuMemcpy2DAsync(&cpy, device_hwctx->stream));
  433. if (ret < 0)
  434. goto error;
  435. offset += height;
  436. }
  437. } else if (avctx->pix_fmt == AV_PIX_FMT_NV12 ||
  438. avctx->pix_fmt == AV_PIX_FMT_P010 ||
  439. avctx->pix_fmt == AV_PIX_FMT_P016 ||
  440. avctx->pix_fmt == AV_PIX_FMT_YUV444P ||
  441. avctx->pix_fmt == AV_PIX_FMT_YUV444P16) {
  442. unsigned int offset = 0;
  443. AVFrame *tmp_frame = av_frame_alloc();
  444. if (!tmp_frame) {
  445. av_log(avctx, AV_LOG_ERROR, "av_frame_alloc failed\n");
  446. ret = AVERROR(ENOMEM);
  447. goto error;
  448. }
  449. pixdesc = av_pix_fmt_desc_get(avctx->sw_pix_fmt);
  450. tmp_frame->format = AV_PIX_FMT_CUDA;
  451. tmp_frame->hw_frames_ctx = av_buffer_ref(ctx->hwframe);
  452. if (!tmp_frame->hw_frames_ctx) {
  453. ret = AVERROR(ENOMEM);
  454. av_frame_free(&tmp_frame);
  455. goto error;
  456. }
  457. tmp_frame->width = avctx->width;
  458. tmp_frame->height = avctx->height;
  459. /*
  460. * Note that the following logic would not work for three plane
  461. * YUV420 because the pitch value is different for the chroma
  462. * planes.
  463. */
  464. for (i = 0; i < pixdesc->nb_components; i++) {
  465. tmp_frame->data[i] = (uint8_t*)mapped_frame + offset;
  466. tmp_frame->linesize[i] = pitch;
  467. offset += pitch * (avctx->height >> (i ? pixdesc->log2_chroma_h : 0));
  468. }
  469. ret = ff_get_buffer(avctx, frame, 0);
  470. if (ret < 0) {
  471. av_log(avctx, AV_LOG_ERROR, "ff_get_buffer failed\n");
  472. av_frame_free(&tmp_frame);
  473. goto error;
  474. }
  475. ret = av_hwframe_transfer_data(frame, tmp_frame, 0);
  476. if (ret) {
  477. av_log(avctx, AV_LOG_ERROR, "av_hwframe_transfer_data failed\n");
  478. av_frame_free(&tmp_frame);
  479. goto error;
  480. }
  481. av_frame_free(&tmp_frame);
  482. } else {
  483. ret = AVERROR_BUG;
  484. goto error;
  485. }
  486. frame->key_frame = ctx->key_frame[parsed_frame.dispinfo.picture_index];
  487. frame->width = avctx->width;
  488. frame->height = avctx->height;
  489. if (avctx->pkt_timebase.num && avctx->pkt_timebase.den)
  490. frame->pts = av_rescale_q(parsed_frame.dispinfo.timestamp, (AVRational){1, 10000000}, avctx->pkt_timebase);
  491. else
  492. frame->pts = parsed_frame.dispinfo.timestamp;
  493. if (parsed_frame.second_field) {
  494. if (ctx->prev_pts == INT64_MIN) {
  495. ctx->prev_pts = frame->pts;
  496. frame->pts += (avctx->pkt_timebase.den * avctx->framerate.den) / (avctx->pkt_timebase.num * avctx->framerate.num);
  497. } else {
  498. int pts_diff = (frame->pts - ctx->prev_pts) / 2;
  499. ctx->prev_pts = frame->pts;
  500. frame->pts += pts_diff;
  501. }
  502. }
  503. /* CUVIDs opaque reordering breaks the internal pkt logic.
  504. * So set pkt_pts and clear all the other pkt_ fields.
  505. */
  506. #if FF_API_PKT_PTS
  507. FF_DISABLE_DEPRECATION_WARNINGS
  508. frame->pkt_pts = frame->pts;
  509. FF_ENABLE_DEPRECATION_WARNINGS
  510. #endif
  511. frame->pkt_pos = -1;
  512. frame->pkt_duration = 0;
  513. frame->pkt_size = -1;
  514. frame->interlaced_frame = !parsed_frame.is_deinterlacing && !parsed_frame.dispinfo.progressive_frame;
  515. if (frame->interlaced_frame)
  516. frame->top_field_first = parsed_frame.dispinfo.top_field_first;
  517. } else if (ctx->decoder_flushing) {
  518. ret = AVERROR_EOF;
  519. } else {
  520. ret = AVERROR(EAGAIN);
  521. }
  522. error:
  523. if (ret < 0)
  524. av_frame_unref(frame);
  525. if (mapped_frame)
  526. eret = CHECK_CU(ctx->cvdl->cuvidUnmapVideoFrame(ctx->cudecoder, mapped_frame));
  527. eret = CHECK_CU(ctx->cudl->cuCtxPopCurrent(&dummy));
  528. if (eret < 0)
  529. return eret;
  530. else
  531. return ret;
  532. }
  533. static int cuvid_decode_frame(AVCodecContext *avctx, void *data, int *got_frame, AVPacket *avpkt)
  534. {
  535. CuvidContext *ctx = avctx->priv_data;
  536. AVFrame *frame = data;
  537. int ret = 0;
  538. av_log(avctx, AV_LOG_TRACE, "cuvid_decode_frame\n");
  539. if (ctx->deint_mode_current != cudaVideoDeinterlaceMode_Weave) {
  540. av_log(avctx, AV_LOG_ERROR, "Deinterlacing is not supported via the old API\n");
  541. return AVERROR(EINVAL);
  542. }
  543. if (!ctx->decoder_flushing) {
  544. ret = cuvid_decode_packet(avctx, avpkt);
  545. if (ret < 0)
  546. return ret;
  547. }
  548. ret = cuvid_output_frame(avctx, frame);
  549. if (ret == AVERROR(EAGAIN) || ret == AVERROR_EOF) {
  550. *got_frame = 0;
  551. } else if (ret < 0) {
  552. return ret;
  553. } else {
  554. *got_frame = 1;
  555. }
  556. return 0;
  557. }
  558. static av_cold int cuvid_decode_end(AVCodecContext *avctx)
  559. {
  560. CuvidContext *ctx = avctx->priv_data;
  561. AVHWDeviceContext *device_ctx = (AVHWDeviceContext *)ctx->hwdevice->data;
  562. AVCUDADeviceContext *device_hwctx = device_ctx->hwctx;
  563. CUcontext dummy, cuda_ctx = device_hwctx->cuda_ctx;
  564. av_fifo_freep(&ctx->frame_queue);
  565. ctx->cudl->cuCtxPushCurrent(cuda_ctx);
  566. if (ctx->cuparser)
  567. ctx->cvdl->cuvidDestroyVideoParser(ctx->cuparser);
  568. if (ctx->cudecoder)
  569. ctx->cvdl->cuvidDestroyDecoder(ctx->cudecoder);
  570. ctx->cudl->cuCtxPopCurrent(&dummy);
  571. ctx->cudl = NULL;
  572. av_buffer_unref(&ctx->hwframe);
  573. av_buffer_unref(&ctx->hwdevice);
  574. av_freep(&ctx->key_frame);
  575. av_freep(&ctx->cuparse_ext);
  576. cuvid_free_functions(&ctx->cvdl);
  577. return 0;
  578. }
  579. static int cuvid_test_capabilities(AVCodecContext *avctx,
  580. const CUVIDPARSERPARAMS *cuparseinfo,
  581. int probed_width,
  582. int probed_height,
  583. int bit_depth)
  584. {
  585. CuvidContext *ctx = avctx->priv_data;
  586. CUVIDDECODECAPS *caps;
  587. int res8 = 0, res10 = 0, res12 = 0;
  588. if (!ctx->cvdl->cuvidGetDecoderCaps) {
  589. av_log(avctx, AV_LOG_WARNING, "Used Nvidia driver is too old to perform a capability check.\n");
  590. av_log(avctx, AV_LOG_WARNING, "The minimum required version is "
  591. #if defined(_WIN32) || defined(__CYGWIN__)
  592. "378.66"
  593. #else
  594. "378.13"
  595. #endif
  596. ". Continuing blind.\n");
  597. ctx->caps8.bIsSupported = ctx->caps10.bIsSupported = 1;
  598. // 12 bit was not supported before the capability check was introduced, so disable it.
  599. ctx->caps12.bIsSupported = 0;
  600. return 0;
  601. }
  602. ctx->caps8.eCodecType = ctx->caps10.eCodecType = ctx->caps12.eCodecType
  603. = cuparseinfo->CodecType;
  604. ctx->caps8.eChromaFormat = ctx->caps10.eChromaFormat = ctx->caps12.eChromaFormat
  605. = cudaVideoChromaFormat_420;
  606. ctx->caps8.nBitDepthMinus8 = 0;
  607. ctx->caps10.nBitDepthMinus8 = 2;
  608. ctx->caps12.nBitDepthMinus8 = 4;
  609. res8 = CHECK_CU(ctx->cvdl->cuvidGetDecoderCaps(&ctx->caps8));
  610. res10 = CHECK_CU(ctx->cvdl->cuvidGetDecoderCaps(&ctx->caps10));
  611. res12 = CHECK_CU(ctx->cvdl->cuvidGetDecoderCaps(&ctx->caps12));
  612. av_log(avctx, AV_LOG_VERBOSE, "CUVID capabilities for %s:\n", avctx->codec->name);
  613. av_log(avctx, AV_LOG_VERBOSE, "8 bit: supported: %d, min_width: %d, max_width: %d, min_height: %d, max_height: %d\n",
  614. ctx->caps8.bIsSupported, ctx->caps8.nMinWidth, ctx->caps8.nMaxWidth, ctx->caps8.nMinHeight, ctx->caps8.nMaxHeight);
  615. av_log(avctx, AV_LOG_VERBOSE, "10 bit: supported: %d, min_width: %d, max_width: %d, min_height: %d, max_height: %d\n",
  616. ctx->caps10.bIsSupported, ctx->caps10.nMinWidth, ctx->caps10.nMaxWidth, ctx->caps10.nMinHeight, ctx->caps10.nMaxHeight);
  617. av_log(avctx, AV_LOG_VERBOSE, "12 bit: supported: %d, min_width: %d, max_width: %d, min_height: %d, max_height: %d\n",
  618. ctx->caps12.bIsSupported, ctx->caps12.nMinWidth, ctx->caps12.nMaxWidth, ctx->caps12.nMinHeight, ctx->caps12.nMaxHeight);
  619. switch (bit_depth) {
  620. case 10:
  621. caps = &ctx->caps10;
  622. if (res10 < 0)
  623. return res10;
  624. break;
  625. case 12:
  626. caps = &ctx->caps12;
  627. if (res12 < 0)
  628. return res12;
  629. break;
  630. default:
  631. caps = &ctx->caps8;
  632. if (res8 < 0)
  633. return res8;
  634. }
  635. if (!ctx->caps8.bIsSupported) {
  636. av_log(avctx, AV_LOG_ERROR, "Codec %s is not supported.\n", avctx->codec->name);
  637. return AVERROR(EINVAL);
  638. }
  639. if (!caps->bIsSupported) {
  640. av_log(avctx, AV_LOG_ERROR, "Bit depth %d is not supported.\n", bit_depth);
  641. return AVERROR(EINVAL);
  642. }
  643. if (probed_width > caps->nMaxWidth || probed_width < caps->nMinWidth) {
  644. av_log(avctx, AV_LOG_ERROR, "Video width %d not within range from %d to %d\n",
  645. probed_width, caps->nMinWidth, caps->nMaxWidth);
  646. return AVERROR(EINVAL);
  647. }
  648. if (probed_height > caps->nMaxHeight || probed_height < caps->nMinHeight) {
  649. av_log(avctx, AV_LOG_ERROR, "Video height %d not within range from %d to %d\n",
  650. probed_height, caps->nMinHeight, caps->nMaxHeight);
  651. return AVERROR(EINVAL);
  652. }
  653. if ((probed_width * probed_height) / 256 > caps->nMaxMBCount) {
  654. av_log(avctx, AV_LOG_ERROR, "Video macroblock count %d exceeds maximum of %d\n",
  655. (int)(probed_width * probed_height) / 256, caps->nMaxMBCount);
  656. return AVERROR(EINVAL);
  657. }
  658. return 0;
  659. }
  660. static av_cold int cuvid_decode_init(AVCodecContext *avctx)
  661. {
  662. CuvidContext *ctx = avctx->priv_data;
  663. AVCUDADeviceContext *device_hwctx;
  664. AVHWDeviceContext *device_ctx;
  665. AVHWFramesContext *hwframe_ctx;
  666. CUVIDSOURCEDATAPACKET seq_pkt;
  667. CUcontext cuda_ctx = NULL;
  668. CUcontext dummy;
  669. uint8_t *extradata;
  670. int extradata_size;
  671. int ret = 0;
  672. enum AVPixelFormat pix_fmts[3] = { AV_PIX_FMT_CUDA,
  673. AV_PIX_FMT_NV12,
  674. AV_PIX_FMT_NONE };
  675. int probed_width = avctx->coded_width ? avctx->coded_width : 1280;
  676. int probed_height = avctx->coded_height ? avctx->coded_height : 720;
  677. int probed_bit_depth = 8;
  678. const AVPixFmtDescriptor *probe_desc = av_pix_fmt_desc_get(avctx->pix_fmt);
  679. if (probe_desc && probe_desc->nb_components)
  680. probed_bit_depth = probe_desc->comp[0].depth;
  681. // Accelerated transcoding scenarios with 'ffmpeg' require that the
  682. // pix_fmt be set to AV_PIX_FMT_CUDA early. The sw_pix_fmt, and the
  683. // pix_fmt for non-accelerated transcoding, do not need to be correct
  684. // but need to be set to something. We arbitrarily pick NV12.
  685. ret = ff_get_format(avctx, pix_fmts);
  686. if (ret < 0) {
  687. av_log(avctx, AV_LOG_ERROR, "ff_get_format failed: %d\n", ret);
  688. return ret;
  689. }
  690. avctx->pix_fmt = ret;
  691. if (ctx->resize_expr && sscanf(ctx->resize_expr, "%dx%d",
  692. &ctx->resize.width, &ctx->resize.height) != 2) {
  693. av_log(avctx, AV_LOG_ERROR, "Invalid resize expressions\n");
  694. ret = AVERROR(EINVAL);
  695. goto error;
  696. }
  697. if (ctx->crop_expr && sscanf(ctx->crop_expr, "%dx%dx%dx%d",
  698. &ctx->crop.top, &ctx->crop.bottom,
  699. &ctx->crop.left, &ctx->crop.right) != 4) {
  700. av_log(avctx, AV_LOG_ERROR, "Invalid cropping expressions\n");
  701. ret = AVERROR(EINVAL);
  702. goto error;
  703. }
  704. ret = cuvid_load_functions(&ctx->cvdl, avctx);
  705. if (ret < 0) {
  706. av_log(avctx, AV_LOG_ERROR, "Failed loading nvcuvid.\n");
  707. goto error;
  708. }
  709. ctx->frame_queue = av_fifo_alloc(ctx->nb_surfaces * sizeof(CuvidParsedFrame));
  710. if (!ctx->frame_queue) {
  711. ret = AVERROR(ENOMEM);
  712. goto error;
  713. }
  714. if (avctx->hw_frames_ctx) {
  715. ctx->hwframe = av_buffer_ref(avctx->hw_frames_ctx);
  716. if (!ctx->hwframe) {
  717. ret = AVERROR(ENOMEM);
  718. goto error;
  719. }
  720. hwframe_ctx = (AVHWFramesContext*)ctx->hwframe->data;
  721. ctx->hwdevice = av_buffer_ref(hwframe_ctx->device_ref);
  722. if (!ctx->hwdevice) {
  723. ret = AVERROR(ENOMEM);
  724. goto error;
  725. }
  726. } else {
  727. if (avctx->hw_device_ctx) {
  728. ctx->hwdevice = av_buffer_ref(avctx->hw_device_ctx);
  729. if (!ctx->hwdevice) {
  730. ret = AVERROR(ENOMEM);
  731. goto error;
  732. }
  733. } else {
  734. ret = av_hwdevice_ctx_create(&ctx->hwdevice, AV_HWDEVICE_TYPE_CUDA, ctx->cu_gpu, NULL, 0);
  735. if (ret < 0)
  736. goto error;
  737. }
  738. ctx->hwframe = av_hwframe_ctx_alloc(ctx->hwdevice);
  739. if (!ctx->hwframe) {
  740. av_log(avctx, AV_LOG_ERROR, "av_hwframe_ctx_alloc failed\n");
  741. ret = AVERROR(ENOMEM);
  742. goto error;
  743. }
  744. hwframe_ctx = (AVHWFramesContext*)ctx->hwframe->data;
  745. }
  746. device_ctx = hwframe_ctx->device_ctx;
  747. device_hwctx = device_ctx->hwctx;
  748. cuda_ctx = device_hwctx->cuda_ctx;
  749. ctx->cudl = device_hwctx->internal->cuda_dl;
  750. memset(&ctx->cuparseinfo, 0, sizeof(ctx->cuparseinfo));
  751. memset(&seq_pkt, 0, sizeof(seq_pkt));
  752. switch (avctx->codec->id) {
  753. #if CONFIG_H264_CUVID_DECODER
  754. case AV_CODEC_ID_H264:
  755. ctx->cuparseinfo.CodecType = cudaVideoCodec_H264;
  756. break;
  757. #endif
  758. #if CONFIG_HEVC_CUVID_DECODER
  759. case AV_CODEC_ID_HEVC:
  760. ctx->cuparseinfo.CodecType = cudaVideoCodec_HEVC;
  761. break;
  762. #endif
  763. #if CONFIG_MJPEG_CUVID_DECODER
  764. case AV_CODEC_ID_MJPEG:
  765. ctx->cuparseinfo.CodecType = cudaVideoCodec_JPEG;
  766. break;
  767. #endif
  768. #if CONFIG_MPEG1_CUVID_DECODER
  769. case AV_CODEC_ID_MPEG1VIDEO:
  770. ctx->cuparseinfo.CodecType = cudaVideoCodec_MPEG1;
  771. break;
  772. #endif
  773. #if CONFIG_MPEG2_CUVID_DECODER
  774. case AV_CODEC_ID_MPEG2VIDEO:
  775. ctx->cuparseinfo.CodecType = cudaVideoCodec_MPEG2;
  776. break;
  777. #endif
  778. #if CONFIG_MPEG4_CUVID_DECODER
  779. case AV_CODEC_ID_MPEG4:
  780. ctx->cuparseinfo.CodecType = cudaVideoCodec_MPEG4;
  781. break;
  782. #endif
  783. #if CONFIG_VP8_CUVID_DECODER
  784. case AV_CODEC_ID_VP8:
  785. ctx->cuparseinfo.CodecType = cudaVideoCodec_VP8;
  786. break;
  787. #endif
  788. #if CONFIG_VP9_CUVID_DECODER
  789. case AV_CODEC_ID_VP9:
  790. ctx->cuparseinfo.CodecType = cudaVideoCodec_VP9;
  791. break;
  792. #endif
  793. #if CONFIG_VC1_CUVID_DECODER
  794. case AV_CODEC_ID_VC1:
  795. ctx->cuparseinfo.CodecType = cudaVideoCodec_VC1;
  796. break;
  797. #endif
  798. #if CONFIG_AV1_CUVID_DECODER && defined(CUVID_HAS_AV1_SUPPORT)
  799. case AV_CODEC_ID_AV1:
  800. ctx->cuparseinfo.CodecType = cudaVideoCodec_AV1;
  801. break;
  802. #endif
  803. default:
  804. av_log(avctx, AV_LOG_ERROR, "Invalid CUVID codec!\n");
  805. return AVERROR_BUG;
  806. }
  807. if (avctx->codec->bsfs) {
  808. const AVCodecParameters *par = avctx->internal->bsf->par_out;
  809. extradata = par->extradata;
  810. extradata_size = par->extradata_size;
  811. } else {
  812. extradata = avctx->extradata;
  813. extradata_size = avctx->extradata_size;
  814. }
  815. ctx->cuparse_ext = av_mallocz(sizeof(*ctx->cuparse_ext)
  816. + FFMAX(extradata_size - (int)sizeof(ctx->cuparse_ext->raw_seqhdr_data), 0));
  817. if (!ctx->cuparse_ext) {
  818. ret = AVERROR(ENOMEM);
  819. goto error;
  820. }
  821. if (extradata_size > 0)
  822. memcpy(ctx->cuparse_ext->raw_seqhdr_data, extradata, extradata_size);
  823. ctx->cuparse_ext->format.seqhdr_data_length = extradata_size;
  824. ctx->cuparseinfo.pExtVideoInfo = ctx->cuparse_ext;
  825. ctx->key_frame = av_mallocz(ctx->nb_surfaces * sizeof(int));
  826. if (!ctx->key_frame) {
  827. ret = AVERROR(ENOMEM);
  828. goto error;
  829. }
  830. ctx->cuparseinfo.ulMaxNumDecodeSurfaces = ctx->nb_surfaces;
  831. ctx->cuparseinfo.ulMaxDisplayDelay = (avctx->flags & AV_CODEC_FLAG_LOW_DELAY) ? 0 : 4;
  832. ctx->cuparseinfo.pUserData = avctx;
  833. ctx->cuparseinfo.pfnSequenceCallback = cuvid_handle_video_sequence;
  834. ctx->cuparseinfo.pfnDecodePicture = cuvid_handle_picture_decode;
  835. ctx->cuparseinfo.pfnDisplayPicture = cuvid_handle_picture_display;
  836. ret = CHECK_CU(ctx->cudl->cuCtxPushCurrent(cuda_ctx));
  837. if (ret < 0)
  838. goto error;
  839. ret = cuvid_test_capabilities(avctx, &ctx->cuparseinfo,
  840. probed_width,
  841. probed_height,
  842. probed_bit_depth);
  843. if (ret < 0)
  844. goto error;
  845. ret = CHECK_CU(ctx->cvdl->cuvidCreateVideoParser(&ctx->cuparser, &ctx->cuparseinfo));
  846. if (ret < 0)
  847. goto error;
  848. seq_pkt.payload = ctx->cuparse_ext->raw_seqhdr_data;
  849. seq_pkt.payload_size = ctx->cuparse_ext->format.seqhdr_data_length;
  850. if (seq_pkt.payload && seq_pkt.payload_size) {
  851. ret = CHECK_CU(ctx->cvdl->cuvidParseVideoData(ctx->cuparser, &seq_pkt));
  852. if (ret < 0)
  853. goto error;
  854. }
  855. ret = CHECK_CU(ctx->cudl->cuCtxPopCurrent(&dummy));
  856. if (ret < 0)
  857. goto error;
  858. ctx->prev_pts = INT64_MIN;
  859. if (!avctx->pkt_timebase.num || !avctx->pkt_timebase.den)
  860. av_log(avctx, AV_LOG_WARNING, "Invalid pkt_timebase, passing timestamps as-is.\n");
  861. return 0;
  862. error:
  863. cuvid_decode_end(avctx);
  864. return ret;
  865. }
  866. static void cuvid_flush(AVCodecContext *avctx)
  867. {
  868. CuvidContext *ctx = avctx->priv_data;
  869. AVHWDeviceContext *device_ctx = (AVHWDeviceContext*)ctx->hwdevice->data;
  870. AVCUDADeviceContext *device_hwctx = device_ctx->hwctx;
  871. CUcontext dummy, cuda_ctx = device_hwctx->cuda_ctx;
  872. CUVIDSOURCEDATAPACKET seq_pkt = { 0 };
  873. int ret;
  874. ret = CHECK_CU(ctx->cudl->cuCtxPushCurrent(cuda_ctx));
  875. if (ret < 0)
  876. goto error;
  877. av_fifo_freep(&ctx->frame_queue);
  878. ctx->frame_queue = av_fifo_alloc(ctx->nb_surfaces * sizeof(CuvidParsedFrame));
  879. if (!ctx->frame_queue) {
  880. av_log(avctx, AV_LOG_ERROR, "Failed to recreate frame queue on flush\n");
  881. return;
  882. }
  883. if (ctx->cudecoder) {
  884. ctx->cvdl->cuvidDestroyDecoder(ctx->cudecoder);
  885. ctx->cudecoder = NULL;
  886. }
  887. if (ctx->cuparser) {
  888. ctx->cvdl->cuvidDestroyVideoParser(ctx->cuparser);
  889. ctx->cuparser = NULL;
  890. }
  891. ret = CHECK_CU(ctx->cvdl->cuvidCreateVideoParser(&ctx->cuparser, &ctx->cuparseinfo));
  892. if (ret < 0)
  893. goto error;
  894. seq_pkt.payload = ctx->cuparse_ext->raw_seqhdr_data;
  895. seq_pkt.payload_size = ctx->cuparse_ext->format.seqhdr_data_length;
  896. if (seq_pkt.payload && seq_pkt.payload_size) {
  897. ret = CHECK_CU(ctx->cvdl->cuvidParseVideoData(ctx->cuparser, &seq_pkt));
  898. if (ret < 0)
  899. goto error;
  900. }
  901. ret = CHECK_CU(ctx->cudl->cuCtxPopCurrent(&dummy));
  902. if (ret < 0)
  903. goto error;
  904. ctx->prev_pts = INT64_MIN;
  905. ctx->decoder_flushing = 0;
  906. return;
  907. error:
  908. av_log(avctx, AV_LOG_ERROR, "CUDA reinit on flush failed\n");
  909. }
  910. #define OFFSET(x) offsetof(CuvidContext, x)
  911. #define VD AV_OPT_FLAG_VIDEO_PARAM | AV_OPT_FLAG_DECODING_PARAM
  912. static const AVOption options[] = {
  913. { "deint", "Set deinterlacing mode", OFFSET(deint_mode), AV_OPT_TYPE_INT, { .i64 = cudaVideoDeinterlaceMode_Weave }, cudaVideoDeinterlaceMode_Weave, cudaVideoDeinterlaceMode_Adaptive, VD, "deint" },
  914. { "weave", "Weave deinterlacing (do nothing)", 0, AV_OPT_TYPE_CONST, { .i64 = cudaVideoDeinterlaceMode_Weave }, 0, 0, VD, "deint" },
  915. { "bob", "Bob deinterlacing", 0, AV_OPT_TYPE_CONST, { .i64 = cudaVideoDeinterlaceMode_Bob }, 0, 0, VD, "deint" },
  916. { "adaptive", "Adaptive deinterlacing", 0, AV_OPT_TYPE_CONST, { .i64 = cudaVideoDeinterlaceMode_Adaptive }, 0, 0, VD, "deint" },
  917. { "gpu", "GPU to be used for decoding", OFFSET(cu_gpu), AV_OPT_TYPE_STRING, { .str = NULL }, 0, 0, VD },
  918. { "surfaces", "Maximum surfaces to be used for decoding", OFFSET(nb_surfaces), AV_OPT_TYPE_INT, { .i64 = 25 }, 0, INT_MAX, VD },
  919. { "drop_second_field", "Drop second field when deinterlacing", OFFSET(drop_second_field), AV_OPT_TYPE_BOOL, { .i64 = 0 }, 0, 1, VD },
  920. { "crop", "Crop (top)x(bottom)x(left)x(right)", OFFSET(crop_expr), AV_OPT_TYPE_STRING, { .str = NULL }, 0, 0, VD },
  921. { "resize", "Resize (width)x(height)", OFFSET(resize_expr), AV_OPT_TYPE_STRING, { .str = NULL }, 0, 0, VD },
  922. { NULL }
  923. };
  924. static const AVCodecHWConfigInternal *const cuvid_hw_configs[] = {
  925. &(const AVCodecHWConfigInternal) {
  926. .public = {
  927. .pix_fmt = AV_PIX_FMT_CUDA,
  928. .methods = AV_CODEC_HW_CONFIG_METHOD_HW_DEVICE_CTX |
  929. AV_CODEC_HW_CONFIG_METHOD_INTERNAL,
  930. .device_type = AV_HWDEVICE_TYPE_CUDA
  931. },
  932. .hwaccel = NULL,
  933. },
  934. NULL
  935. };
  936. #define DEFINE_CUVID_CODEC(x, X, bsf_name) \
  937. static const AVClass x##_cuvid_class = { \
  938. .class_name = #x "_cuvid", \
  939. .item_name = av_default_item_name, \
  940. .option = options, \
  941. .version = LIBAVUTIL_VERSION_INT, \
  942. }; \
  943. AVCodec ff_##x##_cuvid_decoder = { \
  944. .name = #x "_cuvid", \
  945. .long_name = NULL_IF_CONFIG_SMALL("Nvidia CUVID " #X " decoder"), \
  946. .type = AVMEDIA_TYPE_VIDEO, \
  947. .id = AV_CODEC_ID_##X, \
  948. .priv_data_size = sizeof(CuvidContext), \
  949. .priv_class = &x##_cuvid_class, \
  950. .init = cuvid_decode_init, \
  951. .close = cuvid_decode_end, \
  952. .decode = cuvid_decode_frame, \
  953. .receive_frame = cuvid_output_frame, \
  954. .flush = cuvid_flush, \
  955. .bsfs = bsf_name, \
  956. .capabilities = AV_CODEC_CAP_DELAY | AV_CODEC_CAP_AVOID_PROBING | AV_CODEC_CAP_HARDWARE, \
  957. .pix_fmts = (const enum AVPixelFormat[]){ AV_PIX_FMT_CUDA, \
  958. AV_PIX_FMT_NV12, \
  959. AV_PIX_FMT_P010, \
  960. AV_PIX_FMT_P016, \
  961. AV_PIX_FMT_NONE }, \
  962. .hw_configs = cuvid_hw_configs, \
  963. .wrapper_name = "cuvid", \
  964. };
  965. #if CONFIG_AV1_CUVID_DECODER && defined(CUVID_HAS_AV1_SUPPORT)
  966. DEFINE_CUVID_CODEC(av1, AV1, NULL)
  967. #endif
  968. #if CONFIG_HEVC_CUVID_DECODER
  969. DEFINE_CUVID_CODEC(hevc, HEVC, "hevc_mp4toannexb")
  970. #endif
  971. #if CONFIG_H264_CUVID_DECODER
  972. DEFINE_CUVID_CODEC(h264, H264, "h264_mp4toannexb")
  973. #endif
  974. #if CONFIG_MJPEG_CUVID_DECODER
  975. DEFINE_CUVID_CODEC(mjpeg, MJPEG, NULL)
  976. #endif
  977. #if CONFIG_MPEG1_CUVID_DECODER
  978. DEFINE_CUVID_CODEC(mpeg1, MPEG1VIDEO, NULL)
  979. #endif
  980. #if CONFIG_MPEG2_CUVID_DECODER
  981. DEFINE_CUVID_CODEC(mpeg2, MPEG2VIDEO, NULL)
  982. #endif
  983. #if CONFIG_MPEG4_CUVID_DECODER
  984. DEFINE_CUVID_CODEC(mpeg4, MPEG4, NULL)
  985. #endif
  986. #if CONFIG_VP8_CUVID_DECODER
  987. DEFINE_CUVID_CODEC(vp8, VP8, NULL)
  988. #endif
  989. #if CONFIG_VP9_CUVID_DECODER
  990. DEFINE_CUVID_CODEC(vp9, VP9, NULL)
  991. #endif
  992. #if CONFIG_VC1_CUVID_DECODER
  993. DEFINE_CUVID_CODEC(vc1, VC1, NULL)
  994. #endif