You can not select more than 25 topics Topics must start with a letter or number, can include dashes ('-') and can be up to 35 characters long.

933 lines
29KB

  1. /*
  2. * Nvidia CUVID decoder
  3. * Copyright (c) 2016 Timo Rothenpieler <timo@rothenpieler.org>
  4. *
  5. * This file is part of FFmpeg.
  6. *
  7. * FFmpeg is free software; you can redistribute it and/or
  8. * modify it under the terms of the GNU Lesser General Public
  9. * License as published by the Free Software Foundation; either
  10. * version 2.1 of the License, or (at your option) any later version.
  11. *
  12. * FFmpeg is distributed in the hope that it will be useful,
  13. * but WITHOUT ANY WARRANTY; without even the implied warranty of
  14. * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
  15. * Lesser General Public License for more details.
  16. *
  17. * You should have received a copy of the GNU Lesser General Public
  18. * License along with FFmpeg; if not, write to the Free Software
  19. * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
  20. */
  21. #include "libavutil/buffer.h"
  22. #include "libavutil/mathematics.h"
  23. #include "libavutil/hwcontext.h"
  24. #include "libavutil/hwcontext_cuda.h"
  25. #include "libavutil/fifo.h"
  26. #include "libavutil/log.h"
  27. #include "libavutil/opt.h"
  28. #include "avcodec.h"
  29. #include "internal.h"
  30. #include "compat/cuda/nvcuvid.h"
  31. #define MAX_FRAME_COUNT 25
  32. typedef struct CuvidContext
  33. {
  34. AVClass *avclass;
  35. CUvideodecoder cudecoder;
  36. CUvideoparser cuparser;
  37. char *cu_gpu;
  38. AVBufferRef *hwdevice;
  39. AVBufferRef *hwframe;
  40. AVBSFContext *bsf;
  41. AVFifoBuffer *frame_queue;
  42. int deint_mode;
  43. int64_t prev_pts;
  44. int internal_error;
  45. int decoder_flushing;
  46. cudaVideoCodec codec_type;
  47. cudaVideoChromaFormat chroma_format;
  48. CUVIDPARSERPARAMS cuparseinfo;
  49. CUVIDEOFORMATEX cuparse_ext;
  50. } CuvidContext;
  51. typedef struct CuvidParsedFrame
  52. {
  53. CUVIDPARSERDISPINFO dispinfo;
  54. int second_field;
  55. int is_deinterlacing;
  56. } CuvidParsedFrame;
  57. static int check_cu(AVCodecContext *avctx, CUresult err, const char *func)
  58. {
  59. const char *err_name;
  60. const char *err_string;
  61. av_log(avctx, AV_LOG_TRACE, "Calling %s\n", func);
  62. if (err == CUDA_SUCCESS)
  63. return 0;
  64. cuGetErrorName(err, &err_name);
  65. cuGetErrorString(err, &err_string);
  66. av_log(avctx, AV_LOG_ERROR, "%s failed", func);
  67. if (err_name && err_string)
  68. av_log(avctx, AV_LOG_ERROR, " -> %s: %s", err_name, err_string);
  69. av_log(avctx, AV_LOG_ERROR, "\n");
  70. return AVERROR_EXTERNAL;
  71. }
  72. #define CHECK_CU(x) check_cu(avctx, (x), #x)
  73. static int CUDAAPI cuvid_handle_video_sequence(void *opaque, CUVIDEOFORMAT* format)
  74. {
  75. AVCodecContext *avctx = opaque;
  76. CuvidContext *ctx = avctx->priv_data;
  77. AVHWFramesContext *hwframe_ctx = (AVHWFramesContext*)ctx->hwframe->data;
  78. CUVIDDECODECREATEINFO cuinfo;
  79. av_log(avctx, AV_LOG_TRACE, "pfnSequenceCallback, progressive_sequence=%d\n", format->progressive_sequence);
  80. ctx->internal_error = 0;
  81. avctx->width = format->display_area.right;
  82. avctx->height = format->display_area.bottom;
  83. ff_set_sar(avctx, av_div_q(
  84. (AVRational){ format->display_aspect_ratio.x, format->display_aspect_ratio.y },
  85. (AVRational){ avctx->width, avctx->height }));
  86. if (!format->progressive_sequence && ctx->deint_mode == cudaVideoDeinterlaceMode_Weave)
  87. avctx->flags |= AV_CODEC_FLAG_INTERLACED_DCT;
  88. else
  89. avctx->flags &= ~AV_CODEC_FLAG_INTERLACED_DCT;
  90. if (format->video_signal_description.video_full_range_flag)
  91. avctx->color_range = AVCOL_RANGE_JPEG;
  92. else
  93. avctx->color_range = AVCOL_RANGE_MPEG;
  94. avctx->color_primaries = format->video_signal_description.color_primaries;
  95. avctx->color_trc = format->video_signal_description.transfer_characteristics;
  96. avctx->colorspace = format->video_signal_description.matrix_coefficients;
  97. if (format->bitrate)
  98. avctx->bit_rate = format->bitrate;
  99. if (format->frame_rate.numerator && format->frame_rate.denominator) {
  100. avctx->framerate.num = format->frame_rate.numerator;
  101. avctx->framerate.den = format->frame_rate.denominator;
  102. }
  103. if (ctx->cudecoder
  104. && avctx->coded_width == format->coded_width
  105. && avctx->coded_height == format->coded_height
  106. && ctx->chroma_format == format->chroma_format
  107. && ctx->codec_type == format->codec)
  108. return 1;
  109. if (ctx->cudecoder) {
  110. av_log(avctx, AV_LOG_TRACE, "Re-initializing decoder\n");
  111. ctx->internal_error = CHECK_CU(cuvidDestroyDecoder(ctx->cudecoder));
  112. if (ctx->internal_error < 0)
  113. return 0;
  114. ctx->cudecoder = NULL;
  115. }
  116. if (hwframe_ctx->pool && (
  117. hwframe_ctx->width < avctx->width ||
  118. hwframe_ctx->height < avctx->height ||
  119. hwframe_ctx->format != AV_PIX_FMT_CUDA ||
  120. hwframe_ctx->sw_format != AV_PIX_FMT_NV12)) {
  121. av_log(avctx, AV_LOG_ERROR, "AVHWFramesContext is already initialized with incompatible parameters\n");
  122. ctx->internal_error = AVERROR(EINVAL);
  123. return 0;
  124. }
  125. if (format->chroma_format != cudaVideoChromaFormat_420) {
  126. av_log(avctx, AV_LOG_ERROR, "Chroma formats other than 420 are not supported\n");
  127. ctx->internal_error = AVERROR(EINVAL);
  128. return 0;
  129. }
  130. avctx->coded_width = format->coded_width;
  131. avctx->coded_height = format->coded_height;
  132. ctx->chroma_format = format->chroma_format;
  133. memset(&cuinfo, 0, sizeof(cuinfo));
  134. cuinfo.CodecType = ctx->codec_type = format->codec;
  135. cuinfo.ChromaFormat = format->chroma_format;
  136. cuinfo.OutputFormat = cudaVideoSurfaceFormat_NV12;
  137. cuinfo.ulWidth = avctx->coded_width;
  138. cuinfo.ulHeight = avctx->coded_height;
  139. cuinfo.ulTargetWidth = cuinfo.ulWidth;
  140. cuinfo.ulTargetHeight = cuinfo.ulHeight;
  141. cuinfo.target_rect.left = 0;
  142. cuinfo.target_rect.top = 0;
  143. cuinfo.target_rect.right = cuinfo.ulWidth;
  144. cuinfo.target_rect.bottom = cuinfo.ulHeight;
  145. cuinfo.ulNumDecodeSurfaces = MAX_FRAME_COUNT;
  146. cuinfo.ulNumOutputSurfaces = 1;
  147. cuinfo.ulCreationFlags = cudaVideoCreate_PreferCUVID;
  148. cuinfo.bitDepthMinus8 = format->bit_depth_luma_minus8;
  149. if (format->progressive_sequence) {
  150. ctx->deint_mode = cuinfo.DeinterlaceMode = cudaVideoDeinterlaceMode_Weave;
  151. } else {
  152. cuinfo.DeinterlaceMode = ctx->deint_mode;
  153. }
  154. if (ctx->deint_mode != cudaVideoDeinterlaceMode_Weave)
  155. avctx->framerate = av_mul_q(avctx->framerate, (AVRational){2, 1});
  156. ctx->internal_error = CHECK_CU(cuvidCreateDecoder(&ctx->cudecoder, &cuinfo));
  157. if (ctx->internal_error < 0)
  158. return 0;
  159. if (!hwframe_ctx->pool) {
  160. hwframe_ctx->format = AV_PIX_FMT_CUDA;
  161. hwframe_ctx->sw_format = AV_PIX_FMT_NV12;
  162. hwframe_ctx->width = avctx->width;
  163. hwframe_ctx->height = avctx->height;
  164. if ((ctx->internal_error = av_hwframe_ctx_init(ctx->hwframe)) < 0) {
  165. av_log(avctx, AV_LOG_ERROR, "av_hwframe_ctx_init failed\n");
  166. return 0;
  167. }
  168. }
  169. return 1;
  170. }
  171. static int CUDAAPI cuvid_handle_picture_decode(void *opaque, CUVIDPICPARAMS* picparams)
  172. {
  173. AVCodecContext *avctx = opaque;
  174. CuvidContext *ctx = avctx->priv_data;
  175. av_log(avctx, AV_LOG_TRACE, "pfnDecodePicture\n");
  176. ctx->internal_error = CHECK_CU(cuvidDecodePicture(ctx->cudecoder, picparams));
  177. if (ctx->internal_error < 0)
  178. return 0;
  179. return 1;
  180. }
  181. static int CUDAAPI cuvid_handle_picture_display(void *opaque, CUVIDPARSERDISPINFO* dispinfo)
  182. {
  183. AVCodecContext *avctx = opaque;
  184. CuvidContext *ctx = avctx->priv_data;
  185. CuvidParsedFrame parsed_frame = { *dispinfo, 0, 0 };
  186. ctx->internal_error = 0;
  187. if (ctx->deint_mode == cudaVideoDeinterlaceMode_Weave) {
  188. av_fifo_generic_write(ctx->frame_queue, &parsed_frame, sizeof(CuvidParsedFrame), NULL);
  189. } else {
  190. parsed_frame.is_deinterlacing = 1;
  191. av_fifo_generic_write(ctx->frame_queue, &parsed_frame, sizeof(CuvidParsedFrame), NULL);
  192. parsed_frame.second_field = 1;
  193. av_fifo_generic_write(ctx->frame_queue, &parsed_frame, sizeof(CuvidParsedFrame), NULL);
  194. }
  195. return 1;
  196. }
  197. static int cuvid_decode_packet(AVCodecContext *avctx, const AVPacket *avpkt)
  198. {
  199. CuvidContext *ctx = avctx->priv_data;
  200. AVHWDeviceContext *device_ctx = (AVHWDeviceContext*)ctx->hwdevice->data;
  201. AVCUDADeviceContext *device_hwctx = device_ctx->hwctx;
  202. CUcontext dummy, cuda_ctx = device_hwctx->cuda_ctx;
  203. CUVIDSOURCEDATAPACKET cupkt;
  204. AVPacket filter_packet = { 0 };
  205. AVPacket filtered_packet = { 0 };
  206. int ret = 0, eret = 0, is_flush = ctx->decoder_flushing;
  207. av_log(avctx, AV_LOG_TRACE, "cuvid_decode_packet\n");
  208. if (is_flush && avpkt && avpkt->size)
  209. return AVERROR_EOF;
  210. if (av_fifo_size(ctx->frame_queue) / sizeof(CuvidParsedFrame) > MAX_FRAME_COUNT - 2 && avpkt && avpkt->size)
  211. return AVERROR(EAGAIN);
  212. if (ctx->bsf && avpkt && avpkt->size) {
  213. if ((ret = av_packet_ref(&filter_packet, avpkt)) < 0) {
  214. av_log(avctx, AV_LOG_ERROR, "av_packet_ref failed\n");
  215. return ret;
  216. }
  217. if ((ret = av_bsf_send_packet(ctx->bsf, &filter_packet)) < 0) {
  218. av_log(avctx, AV_LOG_ERROR, "av_bsf_send_packet failed\n");
  219. av_packet_unref(&filter_packet);
  220. return ret;
  221. }
  222. if ((ret = av_bsf_receive_packet(ctx->bsf, &filtered_packet)) < 0) {
  223. av_log(avctx, AV_LOG_ERROR, "av_bsf_receive_packet failed\n");
  224. return ret;
  225. }
  226. avpkt = &filtered_packet;
  227. }
  228. ret = CHECK_CU(cuCtxPushCurrent(cuda_ctx));
  229. if (ret < 0) {
  230. av_packet_unref(&filtered_packet);
  231. return ret;
  232. }
  233. memset(&cupkt, 0, sizeof(cupkt));
  234. if (avpkt && avpkt->size) {
  235. cupkt.payload_size = avpkt->size;
  236. cupkt.payload = avpkt->data;
  237. if (avpkt->pts != AV_NOPTS_VALUE) {
  238. cupkt.flags = CUVID_PKT_TIMESTAMP;
  239. if (avctx->pkt_timebase.num && avctx->pkt_timebase.den)
  240. cupkt.timestamp = av_rescale_q(avpkt->pts, avctx->pkt_timebase, (AVRational){1, 10000000});
  241. else
  242. cupkt.timestamp = avpkt->pts;
  243. }
  244. } else {
  245. cupkt.flags = CUVID_PKT_ENDOFSTREAM;
  246. ctx->decoder_flushing = 1;
  247. }
  248. ret = CHECK_CU(cuvidParseVideoData(ctx->cuparser, &cupkt));
  249. av_packet_unref(&filtered_packet);
  250. if (ret < 0)
  251. goto error;
  252. // cuvidParseVideoData doesn't return an error just because stuff failed...
  253. if (ctx->internal_error) {
  254. av_log(avctx, AV_LOG_ERROR, "cuvid decode callback error\n");
  255. ret = ctx->internal_error;
  256. goto error;
  257. }
  258. error:
  259. eret = CHECK_CU(cuCtxPopCurrent(&dummy));
  260. if (eret < 0)
  261. return eret;
  262. else if (ret < 0)
  263. return ret;
  264. else if (is_flush)
  265. return AVERROR_EOF;
  266. else
  267. return 0;
  268. }
  269. static int cuvid_output_frame(AVCodecContext *avctx, AVFrame *frame)
  270. {
  271. CuvidContext *ctx = avctx->priv_data;
  272. AVHWDeviceContext *device_ctx = (AVHWDeviceContext*)ctx->hwdevice->data;
  273. AVCUDADeviceContext *device_hwctx = device_ctx->hwctx;
  274. CUcontext dummy, cuda_ctx = device_hwctx->cuda_ctx;
  275. CUdeviceptr mapped_frame = 0;
  276. int ret = 0, eret = 0;
  277. av_log(avctx, AV_LOG_TRACE, "cuvid_output_frame\n");
  278. if (ctx->decoder_flushing) {
  279. ret = cuvid_decode_packet(avctx, NULL);
  280. if (ret < 0 && ret != AVERROR_EOF)
  281. return ret;
  282. }
  283. ret = CHECK_CU(cuCtxPushCurrent(cuda_ctx));
  284. if (ret < 0)
  285. return ret;
  286. if (av_fifo_size(ctx->frame_queue)) {
  287. CuvidParsedFrame parsed_frame;
  288. CUVIDPROCPARAMS params;
  289. unsigned int pitch = 0;
  290. int offset = 0;
  291. int i;
  292. av_fifo_generic_read(ctx->frame_queue, &parsed_frame, sizeof(CuvidParsedFrame), NULL);
  293. memset(&params, 0, sizeof(params));
  294. params.progressive_frame = parsed_frame.dispinfo.progressive_frame;
  295. params.second_field = parsed_frame.second_field;
  296. params.top_field_first = parsed_frame.dispinfo.top_field_first;
  297. ret = CHECK_CU(cuvidMapVideoFrame(ctx->cudecoder, parsed_frame.dispinfo.picture_index, &mapped_frame, &pitch, &params));
  298. if (ret < 0)
  299. goto error;
  300. if (avctx->pix_fmt == AV_PIX_FMT_CUDA) {
  301. ret = av_hwframe_get_buffer(ctx->hwframe, frame, 0);
  302. if (ret < 0) {
  303. av_log(avctx, AV_LOG_ERROR, "av_hwframe_get_buffer failed\n");
  304. goto error;
  305. }
  306. ret = ff_decode_frame_props(avctx, frame);
  307. if (ret < 0) {
  308. av_log(avctx, AV_LOG_ERROR, "ff_decode_frame_props failed\n");
  309. goto error;
  310. }
  311. for (i = 0; i < 2; i++) {
  312. CUDA_MEMCPY2D cpy = {
  313. .srcMemoryType = CU_MEMORYTYPE_DEVICE,
  314. .dstMemoryType = CU_MEMORYTYPE_DEVICE,
  315. .srcDevice = mapped_frame,
  316. .dstDevice = (CUdeviceptr)frame->data[i],
  317. .srcPitch = pitch,
  318. .dstPitch = frame->linesize[i],
  319. .srcY = offset,
  320. .WidthInBytes = FFMIN(pitch, frame->linesize[i]),
  321. .Height = avctx->height >> (i ? 1 : 0),
  322. };
  323. ret = CHECK_CU(cuMemcpy2D(&cpy));
  324. if (ret < 0)
  325. goto error;
  326. offset += avctx->coded_height;
  327. }
  328. } else if (avctx->pix_fmt == AV_PIX_FMT_NV12) {
  329. AVFrame *tmp_frame = av_frame_alloc();
  330. if (!tmp_frame) {
  331. av_log(avctx, AV_LOG_ERROR, "av_frame_alloc failed\n");
  332. ret = AVERROR(ENOMEM);
  333. goto error;
  334. }
  335. tmp_frame->format = AV_PIX_FMT_CUDA;
  336. tmp_frame->hw_frames_ctx = av_buffer_ref(ctx->hwframe);
  337. tmp_frame->data[0] = (uint8_t*)mapped_frame;
  338. tmp_frame->linesize[0] = pitch;
  339. tmp_frame->data[1] = (uint8_t*)(mapped_frame + avctx->coded_height * pitch);
  340. tmp_frame->linesize[1] = pitch;
  341. tmp_frame->width = avctx->width;
  342. tmp_frame->height = avctx->height;
  343. ret = ff_get_buffer(avctx, frame, 0);
  344. if (ret < 0) {
  345. av_log(avctx, AV_LOG_ERROR, "ff_get_buffer failed\n");
  346. av_frame_free(&tmp_frame);
  347. goto error;
  348. }
  349. ret = av_hwframe_transfer_data(frame, tmp_frame, 0);
  350. if (ret) {
  351. av_log(avctx, AV_LOG_ERROR, "av_hwframe_transfer_data failed\n");
  352. av_frame_free(&tmp_frame);
  353. goto error;
  354. }
  355. av_frame_free(&tmp_frame);
  356. } else {
  357. ret = AVERROR_BUG;
  358. goto error;
  359. }
  360. frame->width = avctx->width;
  361. frame->height = avctx->height;
  362. if (avctx->pkt_timebase.num && avctx->pkt_timebase.den)
  363. frame->pts = av_rescale_q(parsed_frame.dispinfo.timestamp, (AVRational){1, 10000000}, avctx->pkt_timebase);
  364. else
  365. frame->pts = parsed_frame.dispinfo.timestamp;
  366. if (parsed_frame.second_field) {
  367. if (ctx->prev_pts == INT64_MIN) {
  368. ctx->prev_pts = frame->pts;
  369. frame->pts += (avctx->pkt_timebase.den * avctx->framerate.den) / (avctx->pkt_timebase.num * avctx->framerate.num);
  370. } else {
  371. int pts_diff = (frame->pts - ctx->prev_pts) / 2;
  372. ctx->prev_pts = frame->pts;
  373. frame->pts += pts_diff;
  374. }
  375. }
  376. /* CUVIDs opaque reordering breaks the internal pkt logic.
  377. * So set pkt_pts and clear all the other pkt_ fields.
  378. */
  379. #if FF_API_PKT_PTS
  380. FF_DISABLE_DEPRECATION_WARNINGS
  381. frame->pkt_pts = frame->pts;
  382. FF_ENABLE_DEPRECATION_WARNINGS
  383. #endif
  384. av_frame_set_pkt_pos(frame, -1);
  385. av_frame_set_pkt_duration(frame, 0);
  386. av_frame_set_pkt_size(frame, -1);
  387. frame->interlaced_frame = !parsed_frame.is_deinterlacing && !parsed_frame.dispinfo.progressive_frame;
  388. if (frame->interlaced_frame)
  389. frame->top_field_first = parsed_frame.dispinfo.top_field_first;
  390. } else if (ctx->decoder_flushing) {
  391. ret = AVERROR_EOF;
  392. } else {
  393. ret = AVERROR(EAGAIN);
  394. }
  395. error:
  396. if (mapped_frame)
  397. eret = CHECK_CU(cuvidUnmapVideoFrame(ctx->cudecoder, mapped_frame));
  398. eret = CHECK_CU(cuCtxPopCurrent(&dummy));
  399. if (eret < 0)
  400. return eret;
  401. else
  402. return ret;
  403. }
  404. static int cuvid_decode_frame(AVCodecContext *avctx, void *data, int *got_frame, AVPacket *avpkt)
  405. {
  406. CuvidContext *ctx = avctx->priv_data;
  407. AVFrame *frame = data;
  408. int ret = 0;
  409. av_log(avctx, AV_LOG_TRACE, "cuvid_decode_frame\n");
  410. if (ctx->deint_mode != cudaVideoDeinterlaceMode_Weave) {
  411. av_log(avctx, AV_LOG_ERROR, "Deinterlacing is not supported via the old API\n");
  412. return AVERROR(EINVAL);
  413. }
  414. if (!ctx->decoder_flushing) {
  415. ret = cuvid_decode_packet(avctx, avpkt);
  416. if (ret < 0)
  417. return ret;
  418. }
  419. ret = cuvid_output_frame(avctx, frame);
  420. if (ret == AVERROR(EAGAIN) || ret == AVERROR_EOF) {
  421. *got_frame = 0;
  422. } else if (ret < 0) {
  423. return ret;
  424. } else {
  425. *got_frame = 1;
  426. }
  427. return 0;
  428. }
  429. static av_cold int cuvid_decode_end(AVCodecContext *avctx)
  430. {
  431. CuvidContext *ctx = avctx->priv_data;
  432. av_fifo_freep(&ctx->frame_queue);
  433. if (ctx->bsf)
  434. av_bsf_free(&ctx->bsf);
  435. if (ctx->cuparser)
  436. cuvidDestroyVideoParser(ctx->cuparser);
  437. if (ctx->cudecoder)
  438. cuvidDestroyDecoder(ctx->cudecoder);
  439. av_buffer_unref(&ctx->hwframe);
  440. av_buffer_unref(&ctx->hwdevice);
  441. return 0;
  442. }
  443. static int cuvid_test_dummy_decoder(AVCodecContext *avctx, CUVIDPARSERPARAMS *cuparseinfo)
  444. {
  445. CUVIDDECODECREATEINFO cuinfo;
  446. CUvideodecoder cudec = 0;
  447. int ret = 0;
  448. memset(&cuinfo, 0, sizeof(cuinfo));
  449. cuinfo.CodecType = cuparseinfo->CodecType;
  450. cuinfo.ChromaFormat = cudaVideoChromaFormat_420;
  451. cuinfo.OutputFormat = cudaVideoSurfaceFormat_NV12;
  452. cuinfo.ulWidth = 1280;
  453. cuinfo.ulHeight = 720;
  454. cuinfo.ulTargetWidth = cuinfo.ulWidth;
  455. cuinfo.ulTargetHeight = cuinfo.ulHeight;
  456. cuinfo.target_rect.left = 0;
  457. cuinfo.target_rect.top = 0;
  458. cuinfo.target_rect.right = cuinfo.ulWidth;
  459. cuinfo.target_rect.bottom = cuinfo.ulHeight;
  460. cuinfo.ulNumDecodeSurfaces = MAX_FRAME_COUNT;
  461. cuinfo.ulNumOutputSurfaces = 1;
  462. cuinfo.ulCreationFlags = cudaVideoCreate_PreferCUVID;
  463. cuinfo.bitDepthMinus8 = 0;
  464. cuinfo.DeinterlaceMode = cudaVideoDeinterlaceMode_Weave;
  465. ret = CHECK_CU(cuvidCreateDecoder(&cudec, &cuinfo));
  466. if (ret < 0)
  467. return ret;
  468. ret = CHECK_CU(cuvidDestroyDecoder(cudec));
  469. if (ret < 0)
  470. return ret;
  471. return 0;
  472. }
  473. static av_cold int cuvid_decode_init(AVCodecContext *avctx)
  474. {
  475. CuvidContext *ctx = avctx->priv_data;
  476. AVCUDADeviceContext *device_hwctx;
  477. AVHWDeviceContext *device_ctx;
  478. AVHWFramesContext *hwframe_ctx;
  479. CUVIDSOURCEDATAPACKET seq_pkt;
  480. CUcontext cuda_ctx = NULL;
  481. CUcontext dummy;
  482. const AVBitStreamFilter *bsf;
  483. int ret = 0;
  484. enum AVPixelFormat pix_fmts[3] = { AV_PIX_FMT_CUDA,
  485. AV_PIX_FMT_NV12,
  486. AV_PIX_FMT_NONE };
  487. ret = ff_get_format(avctx, pix_fmts);
  488. if (ret < 0) {
  489. av_log(avctx, AV_LOG_ERROR, "ff_get_format failed: %d\n", ret);
  490. return ret;
  491. }
  492. ctx->frame_queue = av_fifo_alloc(MAX_FRAME_COUNT * sizeof(CuvidParsedFrame));
  493. if (!ctx->frame_queue) {
  494. ret = AVERROR(ENOMEM);
  495. goto error;
  496. }
  497. avctx->pix_fmt = ret;
  498. if (avctx->hw_frames_ctx) {
  499. ctx->hwframe = av_buffer_ref(avctx->hw_frames_ctx);
  500. if (!ctx->hwframe) {
  501. ret = AVERROR(ENOMEM);
  502. goto error;
  503. }
  504. hwframe_ctx = (AVHWFramesContext*)ctx->hwframe->data;
  505. ctx->hwdevice = av_buffer_ref(hwframe_ctx->device_ref);
  506. if (!ctx->hwdevice) {
  507. ret = AVERROR(ENOMEM);
  508. goto error;
  509. }
  510. } else {
  511. ret = av_hwdevice_ctx_create(&ctx->hwdevice, AV_HWDEVICE_TYPE_CUDA, ctx->cu_gpu, NULL, 0);
  512. if (ret < 0)
  513. goto error;
  514. ctx->hwframe = av_hwframe_ctx_alloc(ctx->hwdevice);
  515. if (!ctx->hwframe) {
  516. av_log(avctx, AV_LOG_ERROR, "av_hwframe_ctx_alloc failed\n");
  517. ret = AVERROR(ENOMEM);
  518. goto error;
  519. }
  520. hwframe_ctx = (AVHWFramesContext*)ctx->hwframe->data;
  521. }
  522. device_ctx = hwframe_ctx->device_ctx;
  523. device_hwctx = device_ctx->hwctx;
  524. cuda_ctx = device_hwctx->cuda_ctx;
  525. memset(&ctx->cuparseinfo, 0, sizeof(ctx->cuparseinfo));
  526. memset(&ctx->cuparse_ext, 0, sizeof(ctx->cuparse_ext));
  527. memset(&seq_pkt, 0, sizeof(seq_pkt));
  528. ctx->cuparseinfo.pExtVideoInfo = &ctx->cuparse_ext;
  529. switch (avctx->codec->id) {
  530. #if CONFIG_H263_CUVID_DECODER
  531. case AV_CODEC_ID_H263:
  532. ctx->cuparseinfo.CodecType = cudaVideoCodec_MPEG4;
  533. break;
  534. #endif
  535. #if CONFIG_H264_CUVID_DECODER
  536. case AV_CODEC_ID_H264:
  537. ctx->cuparseinfo.CodecType = cudaVideoCodec_H264;
  538. break;
  539. #endif
  540. #if CONFIG_HEVC_CUVID_DECODER
  541. case AV_CODEC_ID_HEVC:
  542. ctx->cuparseinfo.CodecType = cudaVideoCodec_HEVC;
  543. break;
  544. #endif
  545. #if CONFIG_MJPEG_CUVID_DECODER
  546. case AV_CODEC_ID_MJPEG:
  547. ctx->cuparseinfo.CodecType = cudaVideoCodec_JPEG;
  548. break;
  549. #endif
  550. #if CONFIG_MPEG1_CUVID_DECODER
  551. case AV_CODEC_ID_MPEG1VIDEO:
  552. ctx->cuparseinfo.CodecType = cudaVideoCodec_MPEG1;
  553. break;
  554. #endif
  555. #if CONFIG_MPEG2_CUVID_DECODER
  556. case AV_CODEC_ID_MPEG2VIDEO:
  557. ctx->cuparseinfo.CodecType = cudaVideoCodec_MPEG2;
  558. break;
  559. #endif
  560. #if CONFIG_MPEG4_CUVID_DECODER
  561. case AV_CODEC_ID_MPEG4:
  562. ctx->cuparseinfo.CodecType = cudaVideoCodec_MPEG4;
  563. break;
  564. #endif
  565. #if CONFIG_VP8_CUVID_DECODER
  566. case AV_CODEC_ID_VP8:
  567. ctx->cuparseinfo.CodecType = cudaVideoCodec_VP8;
  568. break;
  569. #endif
  570. #if CONFIG_VP9_CUVID_DECODER
  571. case AV_CODEC_ID_VP9:
  572. ctx->cuparseinfo.CodecType = cudaVideoCodec_VP9;
  573. break;
  574. #endif
  575. #if CONFIG_VC1_CUVID_DECODER
  576. case AV_CODEC_ID_VC1:
  577. ctx->cuparseinfo.CodecType = cudaVideoCodec_VC1;
  578. break;
  579. #endif
  580. default:
  581. av_log(avctx, AV_LOG_ERROR, "Invalid CUVID codec!\n");
  582. return AVERROR_BUG;
  583. }
  584. if (avctx->codec->id == AV_CODEC_ID_H264 || avctx->codec->id == AV_CODEC_ID_HEVC) {
  585. if (avctx->codec->id == AV_CODEC_ID_H264)
  586. bsf = av_bsf_get_by_name("h264_mp4toannexb");
  587. else
  588. bsf = av_bsf_get_by_name("hevc_mp4toannexb");
  589. if (!bsf) {
  590. ret = AVERROR_BSF_NOT_FOUND;
  591. goto error;
  592. }
  593. if (ret = av_bsf_alloc(bsf, &ctx->bsf)) {
  594. goto error;
  595. }
  596. if (((ret = avcodec_parameters_from_context(ctx->bsf->par_in, avctx)) < 0) || ((ret = av_bsf_init(ctx->bsf)) < 0)) {
  597. av_bsf_free(&ctx->bsf);
  598. goto error;
  599. }
  600. ctx->cuparse_ext.format.seqhdr_data_length = ctx->bsf->par_out->extradata_size;
  601. memcpy(ctx->cuparse_ext.raw_seqhdr_data,
  602. ctx->bsf->par_out->extradata,
  603. FFMIN(sizeof(ctx->cuparse_ext.raw_seqhdr_data), ctx->bsf->par_out->extradata_size));
  604. } else if (avctx->extradata_size > 0) {
  605. ctx->cuparse_ext.format.seqhdr_data_length = avctx->extradata_size;
  606. memcpy(ctx->cuparse_ext.raw_seqhdr_data,
  607. avctx->extradata,
  608. FFMIN(sizeof(ctx->cuparse_ext.raw_seqhdr_data), avctx->extradata_size));
  609. }
  610. ctx->cuparseinfo.ulMaxNumDecodeSurfaces = MAX_FRAME_COUNT;
  611. ctx->cuparseinfo.ulMaxDisplayDelay = 4;
  612. ctx->cuparseinfo.pUserData = avctx;
  613. ctx->cuparseinfo.pfnSequenceCallback = cuvid_handle_video_sequence;
  614. ctx->cuparseinfo.pfnDecodePicture = cuvid_handle_picture_decode;
  615. ctx->cuparseinfo.pfnDisplayPicture = cuvid_handle_picture_display;
  616. ret = CHECK_CU(cuCtxPushCurrent(cuda_ctx));
  617. if (ret < 0)
  618. goto error;
  619. ret = cuvid_test_dummy_decoder(avctx, &ctx->cuparseinfo);
  620. if (ret < 0)
  621. goto error;
  622. ret = CHECK_CU(cuvidCreateVideoParser(&ctx->cuparser, &ctx->cuparseinfo));
  623. if (ret < 0)
  624. goto error;
  625. seq_pkt.payload = ctx->cuparse_ext.raw_seqhdr_data;
  626. seq_pkt.payload_size = ctx->cuparse_ext.format.seqhdr_data_length;
  627. if (seq_pkt.payload && seq_pkt.payload_size) {
  628. ret = CHECK_CU(cuvidParseVideoData(ctx->cuparser, &seq_pkt));
  629. if (ret < 0)
  630. goto error;
  631. }
  632. ret = CHECK_CU(cuCtxPopCurrent(&dummy));
  633. if (ret < 0)
  634. goto error;
  635. ctx->prev_pts = INT64_MIN;
  636. if (!avctx->pkt_timebase.num || !avctx->pkt_timebase.den)
  637. av_log(avctx, AV_LOG_WARNING, "Invalid pkt_timebase, passing timestamps as-is.\n");
  638. return 0;
  639. error:
  640. cuvid_decode_end(avctx);
  641. return ret;
  642. }
  643. static void cuvid_flush(AVCodecContext *avctx)
  644. {
  645. CuvidContext *ctx = avctx->priv_data;
  646. AVHWDeviceContext *device_ctx = (AVHWDeviceContext*)ctx->hwdevice->data;
  647. AVCUDADeviceContext *device_hwctx = device_ctx->hwctx;
  648. CUcontext dummy, cuda_ctx = device_hwctx->cuda_ctx;
  649. CUVIDSOURCEDATAPACKET seq_pkt = { 0 };
  650. int ret;
  651. ret = CHECK_CU(cuCtxPushCurrent(cuda_ctx));
  652. if (ret < 0)
  653. goto error;
  654. av_fifo_freep(&ctx->frame_queue);
  655. ctx->frame_queue = av_fifo_alloc(MAX_FRAME_COUNT * sizeof(CuvidParsedFrame));
  656. if (!ctx->frame_queue) {
  657. av_log(avctx, AV_LOG_ERROR, "Failed to recreate frame queue on flush\n");
  658. return;
  659. }
  660. if (ctx->cudecoder) {
  661. cuvidDestroyDecoder(ctx->cudecoder);
  662. ctx->cudecoder = NULL;
  663. }
  664. if (ctx->cuparser) {
  665. cuvidDestroyVideoParser(ctx->cuparser);
  666. ctx->cuparser = NULL;
  667. }
  668. ret = CHECK_CU(cuvidCreateVideoParser(&ctx->cuparser, &ctx->cuparseinfo));
  669. if (ret < 0)
  670. goto error;
  671. seq_pkt.payload = ctx->cuparse_ext.raw_seqhdr_data;
  672. seq_pkt.payload_size = ctx->cuparse_ext.format.seqhdr_data_length;
  673. if (seq_pkt.payload && seq_pkt.payload_size) {
  674. ret = CHECK_CU(cuvidParseVideoData(ctx->cuparser, &seq_pkt));
  675. if (ret < 0)
  676. goto error;
  677. }
  678. ret = CHECK_CU(cuCtxPopCurrent(&dummy));
  679. if (ret < 0)
  680. goto error;
  681. ctx->prev_pts = INT64_MIN;
  682. ctx->decoder_flushing = 0;
  683. return;
  684. error:
  685. av_log(avctx, AV_LOG_ERROR, "CUDA reinit on flush failed\n");
  686. }
  687. #define OFFSET(x) offsetof(CuvidContext, x)
  688. #define VD AV_OPT_FLAG_VIDEO_PARAM | AV_OPT_FLAG_DECODING_PARAM
  689. static const AVOption options[] = {
  690. { "deint", "Set deinterlacing mode", OFFSET(deint_mode), AV_OPT_TYPE_INT, { .i64 = cudaVideoDeinterlaceMode_Weave }, cudaVideoDeinterlaceMode_Weave, cudaVideoDeinterlaceMode_Adaptive, VD, "deint" },
  691. { "weave", "Weave deinterlacing (do nothing)", 0, AV_OPT_TYPE_CONST, { .i64 = cudaVideoDeinterlaceMode_Weave }, 0, 0, VD, "deint" },
  692. { "bob", "Bob deinterlacing", 0, AV_OPT_TYPE_CONST, { .i64 = cudaVideoDeinterlaceMode_Bob }, 0, 0, VD, "deint" },
  693. { "adaptive", "Adaptive deinterlacing", 0, AV_OPT_TYPE_CONST, { .i64 = cudaVideoDeinterlaceMode_Adaptive }, 0, 0, VD, "deint" },
  694. { "gpu", "GPU to be used for decoding", OFFSET(cu_gpu), AV_OPT_TYPE_STRING, { .str = NULL }, 0, 0, VD },
  695. { NULL }
  696. };
  697. #define DEFINE_CUVID_CODEC(x, X) \
  698. static const AVClass x##_cuvid_class = { \
  699. .class_name = #x "_cuvid", \
  700. .item_name = av_default_item_name, \
  701. .option = options, \
  702. .version = LIBAVUTIL_VERSION_INT, \
  703. }; \
  704. AVHWAccel ff_##x##_cuvid_hwaccel = { \
  705. .name = #x "_cuvid", \
  706. .type = AVMEDIA_TYPE_VIDEO, \
  707. .id = AV_CODEC_ID_##X, \
  708. .pix_fmt = AV_PIX_FMT_CUDA, \
  709. }; \
  710. AVCodec ff_##x##_cuvid_decoder = { \
  711. .name = #x "_cuvid", \
  712. .long_name = NULL_IF_CONFIG_SMALL("Nvidia CUVID " #X " decoder"), \
  713. .type = AVMEDIA_TYPE_VIDEO, \
  714. .id = AV_CODEC_ID_##X, \
  715. .priv_data_size = sizeof(CuvidContext), \
  716. .priv_class = &x##_cuvid_class, \
  717. .init = cuvid_decode_init, \
  718. .close = cuvid_decode_end, \
  719. .decode = cuvid_decode_frame, \
  720. .send_packet = cuvid_decode_packet, \
  721. .receive_frame = cuvid_output_frame, \
  722. .flush = cuvid_flush, \
  723. .capabilities = AV_CODEC_CAP_DELAY | AV_CODEC_CAP_AVOID_PROBING, \
  724. .pix_fmts = (const enum AVPixelFormat[]){ AV_PIX_FMT_CUDA, \
  725. AV_PIX_FMT_NV12, \
  726. AV_PIX_FMT_NONE }, \
  727. };
  728. #if CONFIG_HEVC_CUVID_DECODER
  729. DEFINE_CUVID_CODEC(hevc, HEVC)
  730. #endif
  731. #if CONFIG_H263_CUVID_DECODER
  732. DEFINE_CUVID_CODEC(h263, H263)
  733. #endif
  734. #if CONFIG_H264_CUVID_DECODER
  735. DEFINE_CUVID_CODEC(h264, H264)
  736. #endif
  737. #if CONFIG_MJPEG_CUVID_DECODER
  738. DEFINE_CUVID_CODEC(mjpeg, MJPEG)
  739. #endif
  740. #if CONFIG_MPEG1_CUVID_DECODER
  741. DEFINE_CUVID_CODEC(mpeg1, MPEG1VIDEO)
  742. #endif
  743. #if CONFIG_MPEG2_CUVID_DECODER
  744. DEFINE_CUVID_CODEC(mpeg2, MPEG2VIDEO)
  745. #endif
  746. #if CONFIG_MPEG4_CUVID_DECODER
  747. DEFINE_CUVID_CODEC(mpeg4, MPEG4)
  748. #endif
  749. #if CONFIG_VP8_CUVID_DECODER
  750. DEFINE_CUVID_CODEC(vp8, VP8)
  751. #endif
  752. #if CONFIG_VP9_CUVID_DECODER
  753. DEFINE_CUVID_CODEC(vp9, VP9)
  754. #endif
  755. #if CONFIG_VC1_CUVID_DECODER
  756. DEFINE_CUVID_CODEC(vc1, VC1)
  757. #endif