You can not select more than 25 topics Topics must start with a letter or number, can include dashes ('-') and can be up to 35 characters long.

924 lines
29KB

  1. /*
  2. * Nvidia CUVID decoder
  3. * Copyright (c) 2016 Timo Rothenpieler <timo@rothenpieler.org>
  4. *
  5. * This file is part of FFmpeg.
  6. *
  7. * FFmpeg is free software; you can redistribute it and/or
  8. * modify it under the terms of the GNU Lesser General Public
  9. * License as published by the Free Software Foundation; either
  10. * version 2.1 of the License, or (at your option) any later version.
  11. *
  12. * FFmpeg is distributed in the hope that it will be useful,
  13. * but WITHOUT ANY WARRANTY; without even the implied warranty of
  14. * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
  15. * Lesser General Public License for more details.
  16. *
  17. * You should have received a copy of the GNU Lesser General Public
  18. * License along with FFmpeg; if not, write to the Free Software
  19. * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
  20. */
  21. #include "libavutil/buffer.h"
  22. #include "libavutil/mathematics.h"
  23. #include "libavutil/hwcontext.h"
  24. #include "libavutil/hwcontext_cuda.h"
  25. #include "libavutil/fifo.h"
  26. #include "libavutil/log.h"
  27. #include "libavutil/opt.h"
  28. #include "avcodec.h"
  29. #include "internal.h"
  30. #include "compat/cuda/nvcuvid.h"
  31. #define MAX_FRAME_COUNT 25
  32. typedef struct CuvidContext
  33. {
  34. AVClass *avclass;
  35. CUvideodecoder cudecoder;
  36. CUvideoparser cuparser;
  37. char *cu_gpu;
  38. AVBufferRef *hwdevice;
  39. AVBufferRef *hwframe;
  40. AVBSFContext *bsf;
  41. AVFifoBuffer *frame_queue;
  42. int deint_mode;
  43. int64_t prev_pts;
  44. int internal_error;
  45. int decoder_flushing;
  46. cudaVideoCodec codec_type;
  47. cudaVideoChromaFormat chroma_format;
  48. CUVIDPARSERPARAMS cuparseinfo;
  49. CUVIDEOFORMATEX cuparse_ext;
  50. } CuvidContext;
  51. typedef struct CuvidParsedFrame
  52. {
  53. CUVIDPARSERDISPINFO dispinfo;
  54. int second_field;
  55. int is_deinterlacing;
  56. } CuvidParsedFrame;
  57. static int check_cu(AVCodecContext *avctx, CUresult err, const char *func)
  58. {
  59. const char *err_name;
  60. const char *err_string;
  61. av_log(avctx, AV_LOG_TRACE, "Calling %s\n", func);
  62. if (err == CUDA_SUCCESS)
  63. return 0;
  64. cuGetErrorName(err, &err_name);
  65. cuGetErrorString(err, &err_string);
  66. av_log(avctx, AV_LOG_ERROR, "%s failed", func);
  67. if (err_name && err_string)
  68. av_log(avctx, AV_LOG_ERROR, " -> %s: %s", err_name, err_string);
  69. av_log(avctx, AV_LOG_ERROR, "\n");
  70. return AVERROR_EXTERNAL;
  71. }
  72. #define CHECK_CU(x) check_cu(avctx, (x), #x)
  73. static int CUDAAPI cuvid_handle_video_sequence(void *opaque, CUVIDEOFORMAT* format)
  74. {
  75. AVCodecContext *avctx = opaque;
  76. CuvidContext *ctx = avctx->priv_data;
  77. AVHWFramesContext *hwframe_ctx = (AVHWFramesContext*)ctx->hwframe->data;
  78. CUVIDDECODECREATEINFO cuinfo;
  79. av_log(avctx, AV_LOG_TRACE, "pfnSequenceCallback, progressive_sequence=%d\n", format->progressive_sequence);
  80. ctx->internal_error = 0;
  81. avctx->width = format->display_area.right;
  82. avctx->height = format->display_area.bottom;
  83. ff_set_sar(avctx, av_div_q(
  84. (AVRational){ format->display_aspect_ratio.x, format->display_aspect_ratio.y },
  85. (AVRational){ avctx->width, avctx->height }));
  86. if (!format->progressive_sequence && ctx->deint_mode == cudaVideoDeinterlaceMode_Weave)
  87. avctx->flags |= AV_CODEC_FLAG_INTERLACED_DCT;
  88. else
  89. avctx->flags &= ~AV_CODEC_FLAG_INTERLACED_DCT;
  90. if (format->video_signal_description.video_full_range_flag)
  91. avctx->color_range = AVCOL_RANGE_JPEG;
  92. else
  93. avctx->color_range = AVCOL_RANGE_MPEG;
  94. avctx->color_primaries = format->video_signal_description.color_primaries;
  95. avctx->color_trc = format->video_signal_description.transfer_characteristics;
  96. avctx->colorspace = format->video_signal_description.matrix_coefficients;
  97. if (format->bitrate)
  98. avctx->bit_rate = format->bitrate;
  99. if (format->frame_rate.numerator && format->frame_rate.denominator) {
  100. avctx->framerate.num = format->frame_rate.numerator;
  101. avctx->framerate.den = format->frame_rate.denominator;
  102. }
  103. if (ctx->cudecoder
  104. && avctx->coded_width == format->coded_width
  105. && avctx->coded_height == format->coded_height
  106. && ctx->chroma_format == format->chroma_format
  107. && ctx->codec_type == format->codec)
  108. return 1;
  109. if (ctx->cudecoder) {
  110. av_log(avctx, AV_LOG_TRACE, "Re-initializing decoder\n");
  111. ctx->internal_error = CHECK_CU(cuvidDestroyDecoder(ctx->cudecoder));
  112. if (ctx->internal_error < 0)
  113. return 0;
  114. ctx->cudecoder = NULL;
  115. }
  116. if (hwframe_ctx->pool && (
  117. hwframe_ctx->width < avctx->width ||
  118. hwframe_ctx->height < avctx->height ||
  119. hwframe_ctx->format != AV_PIX_FMT_CUDA ||
  120. hwframe_ctx->sw_format != AV_PIX_FMT_NV12)) {
  121. av_log(avctx, AV_LOG_ERROR, "AVHWFramesContext is already initialized with incompatible parameters\n");
  122. ctx->internal_error = AVERROR(EINVAL);
  123. return 0;
  124. }
  125. if (format->chroma_format != cudaVideoChromaFormat_420) {
  126. av_log(avctx, AV_LOG_ERROR, "Chroma formats other than 420 are not supported\n");
  127. ctx->internal_error = AVERROR(EINVAL);
  128. return 0;
  129. }
  130. avctx->coded_width = format->coded_width;
  131. avctx->coded_height = format->coded_height;
  132. ctx->chroma_format = format->chroma_format;
  133. memset(&cuinfo, 0, sizeof(cuinfo));
  134. cuinfo.CodecType = ctx->codec_type = format->codec;
  135. cuinfo.ChromaFormat = format->chroma_format;
  136. cuinfo.OutputFormat = cudaVideoSurfaceFormat_NV12;
  137. cuinfo.ulWidth = avctx->coded_width;
  138. cuinfo.ulHeight = avctx->coded_height;
  139. cuinfo.ulTargetWidth = cuinfo.ulWidth;
  140. cuinfo.ulTargetHeight = cuinfo.ulHeight;
  141. cuinfo.target_rect.left = 0;
  142. cuinfo.target_rect.top = 0;
  143. cuinfo.target_rect.right = cuinfo.ulWidth;
  144. cuinfo.target_rect.bottom = cuinfo.ulHeight;
  145. cuinfo.ulNumDecodeSurfaces = MAX_FRAME_COUNT;
  146. cuinfo.ulNumOutputSurfaces = 1;
  147. cuinfo.ulCreationFlags = cudaVideoCreate_PreferCUVID;
  148. cuinfo.bitDepthMinus8 = format->bit_depth_luma_minus8;
  149. if (format->progressive_sequence) {
  150. ctx->deint_mode = cuinfo.DeinterlaceMode = cudaVideoDeinterlaceMode_Weave;
  151. } else {
  152. cuinfo.DeinterlaceMode = ctx->deint_mode;
  153. }
  154. if (ctx->deint_mode != cudaVideoDeinterlaceMode_Weave)
  155. avctx->framerate = av_mul_q(avctx->framerate, (AVRational){2, 1});
  156. ctx->internal_error = CHECK_CU(cuvidCreateDecoder(&ctx->cudecoder, &cuinfo));
  157. if (ctx->internal_error < 0)
  158. return 0;
  159. if (!hwframe_ctx->pool) {
  160. hwframe_ctx->format = AV_PIX_FMT_CUDA;
  161. hwframe_ctx->sw_format = AV_PIX_FMT_NV12;
  162. hwframe_ctx->width = avctx->width;
  163. hwframe_ctx->height = avctx->height;
  164. if ((ctx->internal_error = av_hwframe_ctx_init(ctx->hwframe)) < 0) {
  165. av_log(avctx, AV_LOG_ERROR, "av_hwframe_ctx_init failed\n");
  166. return 0;
  167. }
  168. }
  169. return 1;
  170. }
  171. static int CUDAAPI cuvid_handle_picture_decode(void *opaque, CUVIDPICPARAMS* picparams)
  172. {
  173. AVCodecContext *avctx = opaque;
  174. CuvidContext *ctx = avctx->priv_data;
  175. av_log(avctx, AV_LOG_TRACE, "pfnDecodePicture\n");
  176. ctx->internal_error = CHECK_CU(cuvidDecodePicture(ctx->cudecoder, picparams));
  177. if (ctx->internal_error < 0)
  178. return 0;
  179. return 1;
  180. }
  181. static int CUDAAPI cuvid_handle_picture_display(void *opaque, CUVIDPARSERDISPINFO* dispinfo)
  182. {
  183. AVCodecContext *avctx = opaque;
  184. CuvidContext *ctx = avctx->priv_data;
  185. CuvidParsedFrame parsed_frame = { *dispinfo, 0, 0 };
  186. ctx->internal_error = 0;
  187. if (ctx->deint_mode == cudaVideoDeinterlaceMode_Weave) {
  188. av_fifo_generic_write(ctx->frame_queue, &parsed_frame, sizeof(CuvidParsedFrame), NULL);
  189. } else {
  190. parsed_frame.is_deinterlacing = 1;
  191. av_fifo_generic_write(ctx->frame_queue, &parsed_frame, sizeof(CuvidParsedFrame), NULL);
  192. parsed_frame.second_field = 1;
  193. av_fifo_generic_write(ctx->frame_queue, &parsed_frame, sizeof(CuvidParsedFrame), NULL);
  194. }
  195. return 1;
  196. }
  197. static int cuvid_decode_packet(AVCodecContext *avctx, const AVPacket *avpkt)
  198. {
  199. CuvidContext *ctx = avctx->priv_data;
  200. AVHWDeviceContext *device_ctx = (AVHWDeviceContext*)ctx->hwdevice->data;
  201. AVCUDADeviceContext *device_hwctx = device_ctx->hwctx;
  202. CUcontext dummy, cuda_ctx = device_hwctx->cuda_ctx;
  203. CUVIDSOURCEDATAPACKET cupkt;
  204. AVPacket filter_packet = { 0 };
  205. AVPacket filtered_packet = { 0 };
  206. int ret = 0, eret = 0, is_flush = ctx->decoder_flushing;
  207. av_log(avctx, AV_LOG_TRACE, "cuvid_decode_packet\n");
  208. if (is_flush && avpkt && avpkt->size)
  209. return AVERROR_EOF;
  210. if (av_fifo_size(ctx->frame_queue) / sizeof(CuvidParsedFrame) > MAX_FRAME_COUNT - 2 && avpkt && avpkt->size)
  211. return AVERROR(EAGAIN);
  212. if (ctx->bsf && avpkt && avpkt->size) {
  213. if ((ret = av_packet_ref(&filter_packet, avpkt)) < 0) {
  214. av_log(avctx, AV_LOG_ERROR, "av_packet_ref failed\n");
  215. return ret;
  216. }
  217. if ((ret = av_bsf_send_packet(ctx->bsf, &filter_packet)) < 0) {
  218. av_log(avctx, AV_LOG_ERROR, "av_bsf_send_packet failed\n");
  219. av_packet_unref(&filter_packet);
  220. return ret;
  221. }
  222. if ((ret = av_bsf_receive_packet(ctx->bsf, &filtered_packet)) < 0) {
  223. av_log(avctx, AV_LOG_ERROR, "av_bsf_receive_packet failed\n");
  224. return ret;
  225. }
  226. avpkt = &filtered_packet;
  227. }
  228. ret = CHECK_CU(cuCtxPushCurrent(cuda_ctx));
  229. if (ret < 0) {
  230. av_packet_unref(&filtered_packet);
  231. return ret;
  232. }
  233. memset(&cupkt, 0, sizeof(cupkt));
  234. if (avpkt && avpkt->size) {
  235. cupkt.payload_size = avpkt->size;
  236. cupkt.payload = avpkt->data;
  237. if (avpkt->pts != AV_NOPTS_VALUE) {
  238. cupkt.flags = CUVID_PKT_TIMESTAMP;
  239. if (avctx->pkt_timebase.num && avctx->pkt_timebase.den)
  240. cupkt.timestamp = av_rescale_q(avpkt->pts, avctx->pkt_timebase, (AVRational){1, 10000000});
  241. else
  242. cupkt.timestamp = avpkt->pts;
  243. }
  244. } else {
  245. cupkt.flags = CUVID_PKT_ENDOFSTREAM;
  246. ctx->decoder_flushing = 1;
  247. }
  248. ret = CHECK_CU(cuvidParseVideoData(ctx->cuparser, &cupkt));
  249. av_packet_unref(&filtered_packet);
  250. if (ret < 0)
  251. goto error;
  252. // cuvidParseVideoData doesn't return an error just because stuff failed...
  253. if (ctx->internal_error) {
  254. av_log(avctx, AV_LOG_ERROR, "cuvid decode callback error\n");
  255. ret = ctx->internal_error;
  256. goto error;
  257. }
  258. error:
  259. eret = CHECK_CU(cuCtxPopCurrent(&dummy));
  260. if (eret < 0)
  261. return eret;
  262. else if (ret < 0)
  263. return ret;
  264. else if (is_flush)
  265. return AVERROR_EOF;
  266. else
  267. return 0;
  268. }
  269. static int cuvid_output_frame(AVCodecContext *avctx, AVFrame *frame)
  270. {
  271. CuvidContext *ctx = avctx->priv_data;
  272. AVHWDeviceContext *device_ctx = (AVHWDeviceContext*)ctx->hwdevice->data;
  273. AVCUDADeviceContext *device_hwctx = device_ctx->hwctx;
  274. CUcontext dummy, cuda_ctx = device_hwctx->cuda_ctx;
  275. CUdeviceptr mapped_frame = 0;
  276. int ret = 0, eret = 0;
  277. av_log(avctx, AV_LOG_TRACE, "cuvid_output_frame\n");
  278. if (ctx->decoder_flushing) {
  279. ret = cuvid_decode_packet(avctx, NULL);
  280. if (ret < 0 && ret != AVERROR_EOF)
  281. return ret;
  282. }
  283. ret = CHECK_CU(cuCtxPushCurrent(cuda_ctx));
  284. if (ret < 0)
  285. return ret;
  286. if (av_fifo_size(ctx->frame_queue)) {
  287. CuvidParsedFrame parsed_frame;
  288. CUVIDPROCPARAMS params;
  289. unsigned int pitch = 0;
  290. int offset = 0;
  291. int i;
  292. av_fifo_generic_read(ctx->frame_queue, &parsed_frame, sizeof(CuvidParsedFrame), NULL);
  293. memset(&params, 0, sizeof(params));
  294. params.progressive_frame = parsed_frame.dispinfo.progressive_frame;
  295. params.second_field = parsed_frame.second_field;
  296. params.top_field_first = parsed_frame.dispinfo.top_field_first;
  297. ret = CHECK_CU(cuvidMapVideoFrame(ctx->cudecoder, parsed_frame.dispinfo.picture_index, &mapped_frame, &pitch, &params));
  298. if (ret < 0)
  299. goto error;
  300. if (avctx->pix_fmt == AV_PIX_FMT_CUDA) {
  301. ret = av_hwframe_get_buffer(ctx->hwframe, frame, 0);
  302. if (ret < 0) {
  303. av_log(avctx, AV_LOG_ERROR, "av_hwframe_get_buffer failed\n");
  304. goto error;
  305. }
  306. ret = ff_decode_frame_props(avctx, frame);
  307. if (ret < 0) {
  308. av_log(avctx, AV_LOG_ERROR, "ff_decode_frame_props failed\n");
  309. goto error;
  310. }
  311. for (i = 0; i < 2; i++) {
  312. CUDA_MEMCPY2D cpy = {
  313. .srcMemoryType = CU_MEMORYTYPE_DEVICE,
  314. .dstMemoryType = CU_MEMORYTYPE_DEVICE,
  315. .srcDevice = mapped_frame,
  316. .dstDevice = (CUdeviceptr)frame->data[i],
  317. .srcPitch = pitch,
  318. .dstPitch = frame->linesize[i],
  319. .srcY = offset,
  320. .WidthInBytes = FFMIN(pitch, frame->linesize[i]),
  321. .Height = avctx->height >> (i ? 1 : 0),
  322. };
  323. ret = CHECK_CU(cuMemcpy2D(&cpy));
  324. if (ret < 0)
  325. goto error;
  326. offset += avctx->coded_height;
  327. }
  328. } else if (avctx->pix_fmt == AV_PIX_FMT_NV12) {
  329. AVFrame *tmp_frame = av_frame_alloc();
  330. if (!tmp_frame) {
  331. av_log(avctx, AV_LOG_ERROR, "av_frame_alloc failed\n");
  332. ret = AVERROR(ENOMEM);
  333. goto error;
  334. }
  335. tmp_frame->format = AV_PIX_FMT_CUDA;
  336. tmp_frame->hw_frames_ctx = av_buffer_ref(ctx->hwframe);
  337. tmp_frame->data[0] = (uint8_t*)mapped_frame;
  338. tmp_frame->linesize[0] = pitch;
  339. tmp_frame->data[1] = (uint8_t*)(mapped_frame + avctx->coded_height * pitch);
  340. tmp_frame->linesize[1] = pitch;
  341. tmp_frame->width = avctx->width;
  342. tmp_frame->height = avctx->height;
  343. ret = ff_get_buffer(avctx, frame, 0);
  344. if (ret < 0) {
  345. av_log(avctx, AV_LOG_ERROR, "ff_get_buffer failed\n");
  346. av_frame_free(&tmp_frame);
  347. goto error;
  348. }
  349. ret = av_hwframe_transfer_data(frame, tmp_frame, 0);
  350. if (ret) {
  351. av_log(avctx, AV_LOG_ERROR, "av_hwframe_transfer_data failed\n");
  352. av_frame_free(&tmp_frame);
  353. goto error;
  354. }
  355. av_frame_free(&tmp_frame);
  356. } else {
  357. ret = AVERROR_BUG;
  358. goto error;
  359. }
  360. frame->width = avctx->width;
  361. frame->height = avctx->height;
  362. if (avctx->pkt_timebase.num && avctx->pkt_timebase.den)
  363. frame->pts = av_rescale_q(parsed_frame.dispinfo.timestamp, (AVRational){1, 10000000}, avctx->pkt_timebase);
  364. else
  365. frame->pts = parsed_frame.dispinfo.timestamp;
  366. if (parsed_frame.second_field) {
  367. if (ctx->prev_pts == INT64_MIN) {
  368. ctx->prev_pts = frame->pts;
  369. frame->pts += (avctx->pkt_timebase.den * avctx->framerate.den) / (avctx->pkt_timebase.num * avctx->framerate.num);
  370. } else {
  371. int pts_diff = (frame->pts - ctx->prev_pts) / 2;
  372. ctx->prev_pts = frame->pts;
  373. frame->pts += pts_diff;
  374. }
  375. }
  376. /* CUVIDs opaque reordering breaks the internal pkt logic.
  377. * So set pkt_pts and clear all the other pkt_ fields.
  378. */
  379. #if FF_API_PKT_PTS
  380. FF_DISABLE_DEPRECATION_WARNINGS
  381. frame->pkt_pts = frame->pts;
  382. FF_ENABLE_DEPRECATION_WARNINGS
  383. #endif
  384. av_frame_set_pkt_pos(frame, -1);
  385. av_frame_set_pkt_duration(frame, 0);
  386. av_frame_set_pkt_size(frame, -1);
  387. frame->interlaced_frame = !parsed_frame.is_deinterlacing && !parsed_frame.dispinfo.progressive_frame;
  388. if (frame->interlaced_frame)
  389. frame->top_field_first = parsed_frame.dispinfo.top_field_first;
  390. } else if (ctx->decoder_flushing) {
  391. ret = AVERROR_EOF;
  392. } else {
  393. ret = AVERROR(EAGAIN);
  394. }
  395. error:
  396. if (mapped_frame)
  397. eret = CHECK_CU(cuvidUnmapVideoFrame(ctx->cudecoder, mapped_frame));
  398. eret = CHECK_CU(cuCtxPopCurrent(&dummy));
  399. if (eret < 0)
  400. return eret;
  401. else
  402. return ret;
  403. }
  404. static int cuvid_decode_frame(AVCodecContext *avctx, void *data, int *got_frame, AVPacket *avpkt)
  405. {
  406. CuvidContext *ctx = avctx->priv_data;
  407. AVFrame *frame = data;
  408. int ret = 0;
  409. av_log(avctx, AV_LOG_TRACE, "cuvid_decode_frame\n");
  410. if (ctx->deint_mode != cudaVideoDeinterlaceMode_Weave) {
  411. av_log(avctx, AV_LOG_ERROR, "Deinterlacing is not supported via the old API\n");
  412. return AVERROR(EINVAL);
  413. }
  414. if (!ctx->decoder_flushing) {
  415. ret = cuvid_decode_packet(avctx, avpkt);
  416. if (ret < 0)
  417. return ret;
  418. }
  419. ret = cuvid_output_frame(avctx, frame);
  420. if (ret == AVERROR(EAGAIN) || ret == AVERROR_EOF) {
  421. *got_frame = 0;
  422. } else if (ret < 0) {
  423. return ret;
  424. } else {
  425. *got_frame = 1;
  426. }
  427. return 0;
  428. }
  429. static av_cold int cuvid_decode_end(AVCodecContext *avctx)
  430. {
  431. CuvidContext *ctx = avctx->priv_data;
  432. av_fifo_freep(&ctx->frame_queue);
  433. if (ctx->bsf)
  434. av_bsf_free(&ctx->bsf);
  435. if (ctx->cuparser)
  436. cuvidDestroyVideoParser(ctx->cuparser);
  437. if (ctx->cudecoder)
  438. cuvidDestroyDecoder(ctx->cudecoder);
  439. av_buffer_unref(&ctx->hwframe);
  440. av_buffer_unref(&ctx->hwdevice);
  441. return 0;
  442. }
  443. static int cuvid_test_dummy_decoder(AVCodecContext *avctx, CUVIDPARSERPARAMS *cuparseinfo)
  444. {
  445. CUVIDDECODECREATEINFO cuinfo;
  446. CUvideodecoder cudec = 0;
  447. int ret = 0;
  448. memset(&cuinfo, 0, sizeof(cuinfo));
  449. cuinfo.CodecType = cuparseinfo->CodecType;
  450. cuinfo.ChromaFormat = cudaVideoChromaFormat_420;
  451. cuinfo.OutputFormat = cudaVideoSurfaceFormat_NV12;
  452. cuinfo.ulWidth = 1280;
  453. cuinfo.ulHeight = 720;
  454. cuinfo.ulTargetWidth = cuinfo.ulWidth;
  455. cuinfo.ulTargetHeight = cuinfo.ulHeight;
  456. cuinfo.target_rect.left = 0;
  457. cuinfo.target_rect.top = 0;
  458. cuinfo.target_rect.right = cuinfo.ulWidth;
  459. cuinfo.target_rect.bottom = cuinfo.ulHeight;
  460. cuinfo.ulNumDecodeSurfaces = MAX_FRAME_COUNT;
  461. cuinfo.ulNumOutputSurfaces = 1;
  462. cuinfo.ulCreationFlags = cudaVideoCreate_PreferCUVID;
  463. cuinfo.bitDepthMinus8 = 0;
  464. cuinfo.DeinterlaceMode = cudaVideoDeinterlaceMode_Weave;
  465. ret = CHECK_CU(cuvidCreateDecoder(&cudec, &cuinfo));
  466. if (ret < 0)
  467. return ret;
  468. ret = CHECK_CU(cuvidDestroyDecoder(cudec));
  469. if (ret < 0)
  470. return ret;
  471. return 0;
  472. }
  473. static av_cold int cuvid_decode_init(AVCodecContext *avctx)
  474. {
  475. CuvidContext *ctx = avctx->priv_data;
  476. AVCUDADeviceContext *device_hwctx;
  477. AVHWDeviceContext *device_ctx;
  478. AVHWFramesContext *hwframe_ctx;
  479. CUVIDSOURCEDATAPACKET seq_pkt;
  480. CUcontext cuda_ctx = NULL;
  481. CUcontext dummy;
  482. const AVBitStreamFilter *bsf;
  483. int ret = 0;
  484. enum AVPixelFormat pix_fmts[3] = { AV_PIX_FMT_CUDA,
  485. AV_PIX_FMT_NV12,
  486. AV_PIX_FMT_NONE };
  487. ret = ff_get_format(avctx, pix_fmts);
  488. if (ret < 0) {
  489. av_log(avctx, AV_LOG_ERROR, "ff_get_format failed: %d\n", ret);
  490. return ret;
  491. }
  492. ctx->frame_queue = av_fifo_alloc(MAX_FRAME_COUNT * sizeof(CuvidParsedFrame));
  493. if (!ctx->frame_queue) {
  494. ret = AVERROR(ENOMEM);
  495. goto error;
  496. }
  497. avctx->pix_fmt = ret;
  498. if (avctx->hw_frames_ctx) {
  499. ctx->hwframe = av_buffer_ref(avctx->hw_frames_ctx);
  500. if (!ctx->hwframe) {
  501. ret = AVERROR(ENOMEM);
  502. goto error;
  503. }
  504. hwframe_ctx = (AVHWFramesContext*)ctx->hwframe->data;
  505. ctx->hwdevice = av_buffer_ref(hwframe_ctx->device_ref);
  506. if (!ctx->hwdevice) {
  507. ret = AVERROR(ENOMEM);
  508. goto error;
  509. }
  510. } else {
  511. ret = av_hwdevice_ctx_create(&ctx->hwdevice, AV_HWDEVICE_TYPE_CUDA, ctx->cu_gpu, NULL, 0);
  512. if (ret < 0)
  513. goto error;
  514. ctx->hwframe = av_hwframe_ctx_alloc(ctx->hwdevice);
  515. if (!ctx->hwframe) {
  516. av_log(avctx, AV_LOG_ERROR, "av_hwframe_ctx_alloc failed\n");
  517. ret = AVERROR(ENOMEM);
  518. goto error;
  519. }
  520. hwframe_ctx = (AVHWFramesContext*)ctx->hwframe->data;
  521. }
  522. device_ctx = hwframe_ctx->device_ctx;
  523. device_hwctx = device_ctx->hwctx;
  524. cuda_ctx = device_hwctx->cuda_ctx;
  525. memset(&ctx->cuparseinfo, 0, sizeof(ctx->cuparseinfo));
  526. memset(&ctx->cuparse_ext, 0, sizeof(ctx->cuparse_ext));
  527. memset(&seq_pkt, 0, sizeof(seq_pkt));
  528. ctx->cuparseinfo.pExtVideoInfo = &ctx->cuparse_ext;
  529. switch (avctx->codec->id) {
  530. #if CONFIG_H264_CUVID_DECODER
  531. case AV_CODEC_ID_H264:
  532. ctx->cuparseinfo.CodecType = cudaVideoCodec_H264;
  533. break;
  534. #endif
  535. #if CONFIG_HEVC_CUVID_DECODER
  536. case AV_CODEC_ID_HEVC:
  537. ctx->cuparseinfo.CodecType = cudaVideoCodec_HEVC;
  538. break;
  539. #endif
  540. #if CONFIG_MJPEG_CUVID_DECODER
  541. case AV_CODEC_ID_MJPEG:
  542. ctx->cuparseinfo.CodecType = cudaVideoCodec_JPEG;
  543. break;
  544. #endif
  545. #if CONFIG_MPEG1_CUVID_DECODER
  546. case AV_CODEC_ID_MPEG1VIDEO:
  547. ctx->cuparseinfo.CodecType = cudaVideoCodec_MPEG1;
  548. break;
  549. #endif
  550. #if CONFIG_MPEG2_CUVID_DECODER
  551. case AV_CODEC_ID_MPEG2VIDEO:
  552. ctx->cuparseinfo.CodecType = cudaVideoCodec_MPEG2;
  553. break;
  554. #endif
  555. #if CONFIG_MPEG4_CUVID_DECODER
  556. case AV_CODEC_ID_MPEG4:
  557. ctx->cuparseinfo.CodecType = cudaVideoCodec_MPEG4;
  558. break;
  559. #endif
  560. #if CONFIG_VP8_CUVID_DECODER
  561. case AV_CODEC_ID_VP8:
  562. ctx->cuparseinfo.CodecType = cudaVideoCodec_VP8;
  563. break;
  564. #endif
  565. #if CONFIG_VP9_CUVID_DECODER
  566. case AV_CODEC_ID_VP9:
  567. ctx->cuparseinfo.CodecType = cudaVideoCodec_VP9;
  568. break;
  569. #endif
  570. #if CONFIG_VC1_CUVID_DECODER
  571. case AV_CODEC_ID_VC1:
  572. ctx->cuparseinfo.CodecType = cudaVideoCodec_VC1;
  573. break;
  574. #endif
  575. default:
  576. av_log(avctx, AV_LOG_ERROR, "Invalid CUVID codec!\n");
  577. return AVERROR_BUG;
  578. }
  579. if (avctx->codec->id == AV_CODEC_ID_H264 || avctx->codec->id == AV_CODEC_ID_HEVC) {
  580. if (avctx->codec->id == AV_CODEC_ID_H264)
  581. bsf = av_bsf_get_by_name("h264_mp4toannexb");
  582. else
  583. bsf = av_bsf_get_by_name("hevc_mp4toannexb");
  584. if (!bsf) {
  585. ret = AVERROR_BSF_NOT_FOUND;
  586. goto error;
  587. }
  588. if (ret = av_bsf_alloc(bsf, &ctx->bsf)) {
  589. goto error;
  590. }
  591. if (((ret = avcodec_parameters_from_context(ctx->bsf->par_in, avctx)) < 0) || ((ret = av_bsf_init(ctx->bsf)) < 0)) {
  592. av_bsf_free(&ctx->bsf);
  593. goto error;
  594. }
  595. ctx->cuparse_ext.format.seqhdr_data_length = ctx->bsf->par_out->extradata_size;
  596. memcpy(ctx->cuparse_ext.raw_seqhdr_data,
  597. ctx->bsf->par_out->extradata,
  598. FFMIN(sizeof(ctx->cuparse_ext.raw_seqhdr_data), ctx->bsf->par_out->extradata_size));
  599. } else if (avctx->extradata_size > 0) {
  600. ctx->cuparse_ext.format.seqhdr_data_length = avctx->extradata_size;
  601. memcpy(ctx->cuparse_ext.raw_seqhdr_data,
  602. avctx->extradata,
  603. FFMIN(sizeof(ctx->cuparse_ext.raw_seqhdr_data), avctx->extradata_size));
  604. }
  605. ctx->cuparseinfo.ulMaxNumDecodeSurfaces = MAX_FRAME_COUNT;
  606. ctx->cuparseinfo.ulMaxDisplayDelay = 4;
  607. ctx->cuparseinfo.pUserData = avctx;
  608. ctx->cuparseinfo.pfnSequenceCallback = cuvid_handle_video_sequence;
  609. ctx->cuparseinfo.pfnDecodePicture = cuvid_handle_picture_decode;
  610. ctx->cuparseinfo.pfnDisplayPicture = cuvid_handle_picture_display;
  611. ret = CHECK_CU(cuCtxPushCurrent(cuda_ctx));
  612. if (ret < 0)
  613. goto error;
  614. ret = cuvid_test_dummy_decoder(avctx, &ctx->cuparseinfo);
  615. if (ret < 0)
  616. goto error;
  617. ret = CHECK_CU(cuvidCreateVideoParser(&ctx->cuparser, &ctx->cuparseinfo));
  618. if (ret < 0)
  619. goto error;
  620. seq_pkt.payload = ctx->cuparse_ext.raw_seqhdr_data;
  621. seq_pkt.payload_size = ctx->cuparse_ext.format.seqhdr_data_length;
  622. if (seq_pkt.payload && seq_pkt.payload_size) {
  623. ret = CHECK_CU(cuvidParseVideoData(ctx->cuparser, &seq_pkt));
  624. if (ret < 0)
  625. goto error;
  626. }
  627. ret = CHECK_CU(cuCtxPopCurrent(&dummy));
  628. if (ret < 0)
  629. goto error;
  630. ctx->prev_pts = INT64_MIN;
  631. if (!avctx->pkt_timebase.num || !avctx->pkt_timebase.den)
  632. av_log(avctx, AV_LOG_WARNING, "Invalid pkt_timebase, passing timestamps as-is.\n");
  633. return 0;
  634. error:
  635. cuvid_decode_end(avctx);
  636. return ret;
  637. }
  638. static void cuvid_flush(AVCodecContext *avctx)
  639. {
  640. CuvidContext *ctx = avctx->priv_data;
  641. AVHWDeviceContext *device_ctx = (AVHWDeviceContext*)ctx->hwdevice->data;
  642. AVCUDADeviceContext *device_hwctx = device_ctx->hwctx;
  643. CUcontext dummy, cuda_ctx = device_hwctx->cuda_ctx;
  644. CUVIDSOURCEDATAPACKET seq_pkt = { 0 };
  645. int ret;
  646. ret = CHECK_CU(cuCtxPushCurrent(cuda_ctx));
  647. if (ret < 0)
  648. goto error;
  649. av_fifo_freep(&ctx->frame_queue);
  650. ctx->frame_queue = av_fifo_alloc(MAX_FRAME_COUNT * sizeof(CuvidParsedFrame));
  651. if (!ctx->frame_queue) {
  652. av_log(avctx, AV_LOG_ERROR, "Failed to recreate frame queue on flush\n");
  653. return;
  654. }
  655. if (ctx->cudecoder) {
  656. cuvidDestroyDecoder(ctx->cudecoder);
  657. ctx->cudecoder = NULL;
  658. }
  659. if (ctx->cuparser) {
  660. cuvidDestroyVideoParser(ctx->cuparser);
  661. ctx->cuparser = NULL;
  662. }
  663. ret = CHECK_CU(cuvidCreateVideoParser(&ctx->cuparser, &ctx->cuparseinfo));
  664. if (ret < 0)
  665. goto error;
  666. seq_pkt.payload = ctx->cuparse_ext.raw_seqhdr_data;
  667. seq_pkt.payload_size = ctx->cuparse_ext.format.seqhdr_data_length;
  668. if (seq_pkt.payload && seq_pkt.payload_size) {
  669. ret = CHECK_CU(cuvidParseVideoData(ctx->cuparser, &seq_pkt));
  670. if (ret < 0)
  671. goto error;
  672. }
  673. ret = CHECK_CU(cuCtxPopCurrent(&dummy));
  674. if (ret < 0)
  675. goto error;
  676. ctx->prev_pts = INT64_MIN;
  677. ctx->decoder_flushing = 0;
  678. return;
  679. error:
  680. av_log(avctx, AV_LOG_ERROR, "CUDA reinit on flush failed\n");
  681. }
  682. #define OFFSET(x) offsetof(CuvidContext, x)
  683. #define VD AV_OPT_FLAG_VIDEO_PARAM | AV_OPT_FLAG_DECODING_PARAM
  684. static const AVOption options[] = {
  685. { "deint", "Set deinterlacing mode", OFFSET(deint_mode), AV_OPT_TYPE_INT, { .i64 = cudaVideoDeinterlaceMode_Weave }, cudaVideoDeinterlaceMode_Weave, cudaVideoDeinterlaceMode_Adaptive, VD, "deint" },
  686. { "weave", "Weave deinterlacing (do nothing)", 0, AV_OPT_TYPE_CONST, { .i64 = cudaVideoDeinterlaceMode_Weave }, 0, 0, VD, "deint" },
  687. { "bob", "Bob deinterlacing", 0, AV_OPT_TYPE_CONST, { .i64 = cudaVideoDeinterlaceMode_Bob }, 0, 0, VD, "deint" },
  688. { "adaptive", "Adaptive deinterlacing", 0, AV_OPT_TYPE_CONST, { .i64 = cudaVideoDeinterlaceMode_Adaptive }, 0, 0, VD, "deint" },
  689. { "gpu", "GPU to be used for decoding", OFFSET(cu_gpu), AV_OPT_TYPE_STRING, { .str = NULL }, 0, 0, VD },
  690. { NULL }
  691. };
  692. #define DEFINE_CUVID_CODEC(x, X) \
  693. static const AVClass x##_cuvid_class = { \
  694. .class_name = #x "_cuvid", \
  695. .item_name = av_default_item_name, \
  696. .option = options, \
  697. .version = LIBAVUTIL_VERSION_INT, \
  698. }; \
  699. AVHWAccel ff_##x##_cuvid_hwaccel = { \
  700. .name = #x "_cuvid", \
  701. .type = AVMEDIA_TYPE_VIDEO, \
  702. .id = AV_CODEC_ID_##X, \
  703. .pix_fmt = AV_PIX_FMT_CUDA, \
  704. }; \
  705. AVCodec ff_##x##_cuvid_decoder = { \
  706. .name = #x "_cuvid", \
  707. .long_name = NULL_IF_CONFIG_SMALL("Nvidia CUVID " #X " decoder"), \
  708. .type = AVMEDIA_TYPE_VIDEO, \
  709. .id = AV_CODEC_ID_##X, \
  710. .priv_data_size = sizeof(CuvidContext), \
  711. .priv_class = &x##_cuvid_class, \
  712. .init = cuvid_decode_init, \
  713. .close = cuvid_decode_end, \
  714. .decode = cuvid_decode_frame, \
  715. .send_packet = cuvid_decode_packet, \
  716. .receive_frame = cuvid_output_frame, \
  717. .flush = cuvid_flush, \
  718. .capabilities = AV_CODEC_CAP_DELAY | AV_CODEC_CAP_AVOID_PROBING, \
  719. .pix_fmts = (const enum AVPixelFormat[]){ AV_PIX_FMT_CUDA, \
  720. AV_PIX_FMT_NV12, \
  721. AV_PIX_FMT_NONE }, \
  722. };
  723. #if CONFIG_HEVC_CUVID_DECODER
  724. DEFINE_CUVID_CODEC(hevc, HEVC)
  725. #endif
  726. #if CONFIG_H264_CUVID_DECODER
  727. DEFINE_CUVID_CODEC(h264, H264)
  728. #endif
  729. #if CONFIG_MJPEG_CUVID_DECODER
  730. DEFINE_CUVID_CODEC(mjpeg, MJPEG)
  731. #endif
  732. #if CONFIG_MPEG1_CUVID_DECODER
  733. DEFINE_CUVID_CODEC(mpeg1, MPEG1VIDEO)
  734. #endif
  735. #if CONFIG_MPEG2_CUVID_DECODER
  736. DEFINE_CUVID_CODEC(mpeg2, MPEG2VIDEO)
  737. #endif
  738. #if CONFIG_MPEG4_CUVID_DECODER
  739. DEFINE_CUVID_CODEC(mpeg4, MPEG4)
  740. #endif
  741. #if CONFIG_VP8_CUVID_DECODER
  742. DEFINE_CUVID_CODEC(vp8, VP8)
  743. #endif
  744. #if CONFIG_VP9_CUVID_DECODER
  745. DEFINE_CUVID_CODEC(vp9, VP9)
  746. #endif
  747. #if CONFIG_VC1_CUVID_DECODER
  748. DEFINE_CUVID_CODEC(vc1, VC1)
  749. #endif