You can not select more than 25 topics Topics must start with a letter or number, can include dashes ('-') and can be up to 35 characters long.

571 lines
16KB

  1. /*
  2. * HW decode acceleration through NVDEC
  3. *
  4. * Copyright (c) 2016 Anton Khirnov
  5. *
  6. * This file is part of FFmpeg.
  7. *
  8. * FFmpeg is free software; you can redistribute it and/or
  9. * modify it under the terms of the GNU Lesser General Public
  10. * License as published by the Free Software Foundation; either
  11. * version 2.1 of the License, or (at your option) any later version.
  12. *
  13. * FFmpeg is distributed in the hope that it will be useful,
  14. * but WITHOUT ANY WARRANTY; without even the implied warranty of
  15. * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
  16. * Lesser General Public License for more details.
  17. *
  18. * You should have received a copy of the GNU Lesser General Public
  19. * License along with FFmpeg; if not, write to the Free Software
  20. * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
  21. */
  22. #include "config.h"
  23. #include "libavutil/common.h"
  24. #include "libavutil/error.h"
  25. #include "libavutil/hwcontext.h"
  26. #include "libavutil/hwcontext_cuda_internal.h"
  27. #include "libavutil/pixdesc.h"
  28. #include "libavutil/pixfmt.h"
  29. #include "avcodec.h"
  30. #include "decode.h"
  31. #include "nvdec.h"
  32. #include "internal.h"
  33. typedef struct NVDECDecoder {
  34. CUvideodecoder decoder;
  35. AVBufferRef *hw_device_ref;
  36. CUcontext cuda_ctx;
  37. CudaFunctions *cudl;
  38. CuvidFunctions *cvdl;
  39. } NVDECDecoder;
  40. typedef struct NVDECFramePool {
  41. unsigned int dpb_size;
  42. unsigned int nb_allocated;
  43. } NVDECFramePool;
  44. static int map_avcodec_id(enum AVCodecID id)
  45. {
  46. switch (id) {
  47. case AV_CODEC_ID_H264: return cudaVideoCodec_H264;
  48. case AV_CODEC_ID_HEVC: return cudaVideoCodec_HEVC;
  49. case AV_CODEC_ID_MPEG1VIDEO: return cudaVideoCodec_MPEG1;
  50. case AV_CODEC_ID_MPEG2VIDEO: return cudaVideoCodec_MPEG2;
  51. case AV_CODEC_ID_MPEG4: return cudaVideoCodec_MPEG4;
  52. case AV_CODEC_ID_VC1: return cudaVideoCodec_VC1;
  53. case AV_CODEC_ID_VP9: return cudaVideoCodec_VP9;
  54. case AV_CODEC_ID_WMV3: return cudaVideoCodec_VC1;
  55. }
  56. return -1;
  57. }
  58. static int map_chroma_format(enum AVPixelFormat pix_fmt)
  59. {
  60. int shift_h = 0, shift_v = 0;
  61. av_pix_fmt_get_chroma_sub_sample(pix_fmt, &shift_h, &shift_v);
  62. if (shift_h == 1 && shift_v == 1)
  63. return cudaVideoChromaFormat_420;
  64. else if (shift_h == 1 && shift_v == 0)
  65. return cudaVideoChromaFormat_422;
  66. else if (shift_h == 0 && shift_v == 0)
  67. return cudaVideoChromaFormat_444;
  68. return -1;
  69. }
  70. static int nvdec_test_capabilities(NVDECDecoder *decoder,
  71. CUVIDDECODECREATEINFO *params, void *logctx)
  72. {
  73. CUresult err;
  74. CUVIDDECODECAPS caps = { 0 };
  75. caps.eCodecType = params->CodecType;
  76. caps.eChromaFormat = params->ChromaFormat;
  77. caps.nBitDepthMinus8 = params->bitDepthMinus8;
  78. err = decoder->cvdl->cuvidGetDecoderCaps(&caps);
  79. if (err != CUDA_SUCCESS) {
  80. av_log(logctx, AV_LOG_ERROR, "Failed querying decoder capabilities\n");
  81. return AVERROR_UNKNOWN;
  82. }
  83. av_log(logctx, AV_LOG_VERBOSE, "NVDEC capabilities:\n");
  84. av_log(logctx, AV_LOG_VERBOSE, "format supported: %s, max_mb_count: %d\n",
  85. caps.bIsSupported ? "yes" : "no", caps.nMaxMBCount);
  86. av_log(logctx, AV_LOG_VERBOSE, "min_width: %d, max_width: %d\n",
  87. caps.nMinWidth, caps.nMaxWidth);
  88. av_log(logctx, AV_LOG_VERBOSE, "min_height: %d, max_height: %d\n",
  89. caps.nMinHeight, caps.nMaxHeight);
  90. if (!caps.bIsSupported) {
  91. av_log(logctx, AV_LOG_ERROR, "Hardware is lacking required capabilities\n");
  92. return AVERROR(EINVAL);
  93. }
  94. if (params->ulWidth > caps.nMaxWidth || params->ulWidth < caps.nMinWidth) {
  95. av_log(logctx, AV_LOG_ERROR, "Video width %d not within range from %d to %d\n",
  96. (int)params->ulWidth, caps.nMinWidth, caps.nMaxWidth);
  97. return AVERROR(EINVAL);
  98. }
  99. if (params->ulHeight > caps.nMaxHeight || params->ulHeight < caps.nMinHeight) {
  100. av_log(logctx, AV_LOG_ERROR, "Video height %d not within range from %d to %d\n",
  101. (int)params->ulHeight, caps.nMinHeight, caps.nMaxHeight);
  102. return AVERROR(EINVAL);
  103. }
  104. if ((params->ulWidth * params->ulHeight) / 256 > caps.nMaxMBCount) {
  105. av_log(logctx, AV_LOG_ERROR, "Video macroblock count %d exceeds maximum of %d\n",
  106. (int)(params->ulWidth * params->ulHeight) / 256, caps.nMaxMBCount);
  107. return AVERROR(EINVAL);
  108. }
  109. return 0;
  110. }
  111. static void nvdec_decoder_free(void *opaque, uint8_t *data)
  112. {
  113. NVDECDecoder *decoder = (NVDECDecoder*)data;
  114. if (decoder->decoder)
  115. decoder->cvdl->cuvidDestroyDecoder(decoder->decoder);
  116. av_buffer_unref(&decoder->hw_device_ref);
  117. cuvid_free_functions(&decoder->cvdl);
  118. av_freep(&decoder);
  119. }
  120. static int nvdec_decoder_create(AVBufferRef **out, AVBufferRef *hw_device_ref,
  121. CUVIDDECODECREATEINFO *params, void *logctx)
  122. {
  123. AVHWDeviceContext *hw_device_ctx = (AVHWDeviceContext*)hw_device_ref->data;
  124. AVCUDADeviceContext *device_hwctx = hw_device_ctx->hwctx;
  125. AVBufferRef *decoder_ref;
  126. NVDECDecoder *decoder;
  127. CUcontext dummy;
  128. CUresult err;
  129. int ret;
  130. decoder = av_mallocz(sizeof(*decoder));
  131. if (!decoder)
  132. return AVERROR(ENOMEM);
  133. decoder_ref = av_buffer_create((uint8_t*)decoder, sizeof(*decoder),
  134. nvdec_decoder_free, NULL, AV_BUFFER_FLAG_READONLY);
  135. if (!decoder_ref) {
  136. av_freep(&decoder);
  137. return AVERROR(ENOMEM);
  138. }
  139. decoder->hw_device_ref = av_buffer_ref(hw_device_ref);
  140. if (!decoder->hw_device_ref) {
  141. ret = AVERROR(ENOMEM);
  142. goto fail;
  143. }
  144. decoder->cuda_ctx = device_hwctx->cuda_ctx;
  145. decoder->cudl = device_hwctx->internal->cuda_dl;
  146. ret = cuvid_load_functions(&decoder->cvdl);
  147. if (ret < 0) {
  148. av_log(logctx, AV_LOG_ERROR, "Failed loading nvcuvid.\n");
  149. goto fail;
  150. }
  151. err = decoder->cudl->cuCtxPushCurrent(decoder->cuda_ctx);
  152. if (err != CUDA_SUCCESS) {
  153. ret = AVERROR_UNKNOWN;
  154. goto fail;
  155. }
  156. ret = nvdec_test_capabilities(decoder, params, logctx);
  157. if (ret < 0) {
  158. decoder->cudl->cuCtxPopCurrent(&dummy);
  159. goto fail;
  160. }
  161. err = decoder->cvdl->cuvidCreateDecoder(&decoder->decoder, params);
  162. decoder->cudl->cuCtxPopCurrent(&dummy);
  163. if (err != CUDA_SUCCESS) {
  164. av_log(logctx, AV_LOG_ERROR, "Error creating a NVDEC decoder: %d\n", err);
  165. ret = AVERROR_UNKNOWN;
  166. goto fail;
  167. }
  168. *out = decoder_ref;
  169. return 0;
  170. fail:
  171. av_buffer_unref(&decoder_ref);
  172. return ret;
  173. }
  174. static AVBufferRef *nvdec_decoder_frame_alloc(void *opaque, int size)
  175. {
  176. NVDECFramePool *pool = opaque;
  177. AVBufferRef *ret;
  178. if (pool->nb_allocated >= pool->dpb_size)
  179. return NULL;
  180. ret = av_buffer_alloc(sizeof(unsigned int));
  181. if (!ret)
  182. return NULL;
  183. *(unsigned int*)ret->data = pool->nb_allocated++;
  184. return ret;
  185. }
  186. int ff_nvdec_decode_uninit(AVCodecContext *avctx)
  187. {
  188. NVDECContext *ctx = avctx->internal->hwaccel_priv_data;
  189. av_freep(&ctx->bitstream);
  190. ctx->bitstream_len = 0;
  191. ctx->bitstream_allocated = 0;
  192. av_freep(&ctx->slice_offsets);
  193. ctx->nb_slices = 0;
  194. ctx->slice_offsets_allocated = 0;
  195. av_buffer_unref(&ctx->decoder_ref);
  196. av_buffer_pool_uninit(&ctx->decoder_pool);
  197. return 0;
  198. }
  199. int ff_nvdec_decode_init(AVCodecContext *avctx)
  200. {
  201. NVDECContext *ctx = avctx->internal->hwaccel_priv_data;
  202. NVDECFramePool *pool;
  203. AVHWFramesContext *frames_ctx;
  204. const AVPixFmtDescriptor *sw_desc;
  205. CUVIDDECODECREATEINFO params = { 0 };
  206. int cuvid_codec_type, cuvid_chroma_format;
  207. int ret = 0;
  208. sw_desc = av_pix_fmt_desc_get(avctx->sw_pix_fmt);
  209. if (!sw_desc)
  210. return AVERROR_BUG;
  211. cuvid_codec_type = map_avcodec_id(avctx->codec_id);
  212. if (cuvid_codec_type < 0) {
  213. av_log(avctx, AV_LOG_ERROR, "Unsupported codec ID\n");
  214. return AVERROR_BUG;
  215. }
  216. cuvid_chroma_format = map_chroma_format(avctx->sw_pix_fmt);
  217. if (cuvid_chroma_format < 0) {
  218. av_log(avctx, AV_LOG_ERROR, "Unsupported chroma format\n");
  219. return AVERROR(ENOSYS);
  220. }
  221. if (!avctx->hw_frames_ctx) {
  222. ret = ff_decode_get_hw_frames_ctx(avctx, AV_HWDEVICE_TYPE_CUDA);
  223. if (ret < 0)
  224. return ret;
  225. }
  226. frames_ctx = (AVHWFramesContext*)avctx->hw_frames_ctx->data;
  227. params.ulWidth = avctx->coded_width;
  228. params.ulHeight = avctx->coded_height;
  229. params.ulTargetWidth = avctx->coded_width;
  230. params.ulTargetHeight = avctx->coded_height;
  231. params.bitDepthMinus8 = sw_desc->comp[0].depth - 8;
  232. params.OutputFormat = params.bitDepthMinus8 ?
  233. cudaVideoSurfaceFormat_P016 : cudaVideoSurfaceFormat_NV12;
  234. params.CodecType = cuvid_codec_type;
  235. params.ChromaFormat = cuvid_chroma_format;
  236. params.ulNumDecodeSurfaces = frames_ctx->initial_pool_size;
  237. params.ulNumOutputSurfaces = 1;
  238. ret = nvdec_decoder_create(&ctx->decoder_ref, frames_ctx->device_ref, &params, avctx);
  239. if (ret < 0) {
  240. if (params.ulNumDecodeSurfaces > 32) {
  241. av_log(avctx, AV_LOG_WARNING, "Using more than 32 (%d) decode surfaces might cause nvdec to fail.\n",
  242. (int)params.ulNumDecodeSurfaces);
  243. av_log(avctx, AV_LOG_WARNING, "Try lowering the amount of threads. Using %d right now.\n",
  244. avctx->thread_count);
  245. }
  246. return ret;
  247. }
  248. pool = av_mallocz(sizeof(*pool));
  249. if (!pool) {
  250. ret = AVERROR(ENOMEM);
  251. goto fail;
  252. }
  253. pool->dpb_size = frames_ctx->initial_pool_size;
  254. ctx->decoder_pool = av_buffer_pool_init2(sizeof(int), pool,
  255. nvdec_decoder_frame_alloc, av_free);
  256. if (!ctx->decoder_pool) {
  257. ret = AVERROR(ENOMEM);
  258. goto fail;
  259. }
  260. return 0;
  261. fail:
  262. ff_nvdec_decode_uninit(avctx);
  263. return ret;
  264. }
  265. static void nvdec_fdd_priv_free(void *priv)
  266. {
  267. NVDECFrame *cf = priv;
  268. if (!cf)
  269. return;
  270. av_buffer_unref(&cf->idx_ref);
  271. av_buffer_unref(&cf->decoder_ref);
  272. av_freep(&priv);
  273. }
  274. static int nvdec_retrieve_data(void *logctx, AVFrame *frame)
  275. {
  276. FrameDecodeData *fdd = (FrameDecodeData*)frame->private_ref->data;
  277. NVDECFrame *cf = (NVDECFrame*)fdd->hwaccel_priv;
  278. NVDECDecoder *decoder = (NVDECDecoder*)cf->decoder_ref->data;
  279. CUVIDPROCPARAMS vpp = { .progressive_frame = 1 };
  280. CUresult err;
  281. CUcontext dummy;
  282. CUdeviceptr devptr;
  283. unsigned int pitch, i;
  284. unsigned int offset = 0;
  285. int ret = 0;
  286. err = decoder->cudl->cuCtxPushCurrent(decoder->cuda_ctx);
  287. if (err != CUDA_SUCCESS)
  288. return AVERROR_UNKNOWN;
  289. err = decoder->cvdl->cuvidMapVideoFrame(decoder->decoder, cf->idx, &devptr,
  290. &pitch, &vpp);
  291. if (err != CUDA_SUCCESS) {
  292. av_log(logctx, AV_LOG_ERROR, "Error mapping a picture with CUVID: %d\n",
  293. err);
  294. ret = AVERROR_UNKNOWN;
  295. goto finish;
  296. }
  297. for (i = 0; frame->data[i]; i++) {
  298. CUDA_MEMCPY2D cpy = {
  299. .srcMemoryType = CU_MEMORYTYPE_DEVICE,
  300. .dstMemoryType = CU_MEMORYTYPE_DEVICE,
  301. .srcDevice = devptr,
  302. .dstDevice = (CUdeviceptr)frame->data[i],
  303. .srcPitch = pitch,
  304. .dstPitch = frame->linesize[i],
  305. .srcY = offset,
  306. .WidthInBytes = FFMIN(pitch, frame->linesize[i]),
  307. .Height = frame->height >> (i ? 1 : 0),
  308. };
  309. err = decoder->cudl->cuMemcpy2D(&cpy);
  310. if (err != CUDA_SUCCESS) {
  311. av_log(logctx, AV_LOG_ERROR, "Error copying decoded frame: %d\n",
  312. err);
  313. ret = AVERROR_UNKNOWN;
  314. goto copy_fail;
  315. }
  316. offset += cpy.Height;
  317. }
  318. copy_fail:
  319. decoder->cvdl->cuvidUnmapVideoFrame(decoder->decoder, devptr);
  320. finish:
  321. decoder->cudl->cuCtxPopCurrent(&dummy);
  322. return ret;
  323. }
  324. int ff_nvdec_start_frame(AVCodecContext *avctx, AVFrame *frame)
  325. {
  326. NVDECContext *ctx = avctx->internal->hwaccel_priv_data;
  327. FrameDecodeData *fdd = (FrameDecodeData*)frame->private_ref->data;
  328. NVDECFrame *cf = NULL;
  329. int ret;
  330. ctx->bitstream_len = 0;
  331. ctx->nb_slices = 0;
  332. if (fdd->hwaccel_priv)
  333. return 0;
  334. cf = av_mallocz(sizeof(*cf));
  335. if (!cf)
  336. return AVERROR(ENOMEM);
  337. cf->decoder_ref = av_buffer_ref(ctx->decoder_ref);
  338. if (!cf->decoder_ref) {
  339. ret = AVERROR(ENOMEM);
  340. goto fail;
  341. }
  342. cf->idx_ref = av_buffer_pool_get(ctx->decoder_pool);
  343. if (!cf->idx_ref) {
  344. av_log(avctx, AV_LOG_ERROR, "No decoder surfaces left\n");
  345. ret = AVERROR(ENOMEM);
  346. goto fail;
  347. }
  348. cf->idx = *(unsigned int*)cf->idx_ref->data;
  349. fdd->hwaccel_priv = cf;
  350. fdd->hwaccel_priv_free = nvdec_fdd_priv_free;
  351. fdd->post_process = nvdec_retrieve_data;
  352. return 0;
  353. fail:
  354. nvdec_fdd_priv_free(cf);
  355. return ret;
  356. }
  357. int ff_nvdec_end_frame(AVCodecContext *avctx)
  358. {
  359. NVDECContext *ctx = avctx->internal->hwaccel_priv_data;
  360. NVDECDecoder *decoder = (NVDECDecoder*)ctx->decoder_ref->data;
  361. CUVIDPICPARAMS *pp = &ctx->pic_params;
  362. CUresult err;
  363. CUcontext dummy;
  364. int ret = 0;
  365. pp->nBitstreamDataLen = ctx->bitstream_len;
  366. pp->pBitstreamData = ctx->bitstream;
  367. pp->nNumSlices = ctx->nb_slices;
  368. pp->pSliceDataOffsets = ctx->slice_offsets;
  369. err = decoder->cudl->cuCtxPushCurrent(decoder->cuda_ctx);
  370. if (err != CUDA_SUCCESS)
  371. return AVERROR_UNKNOWN;
  372. err = decoder->cvdl->cuvidDecodePicture(decoder->decoder, &ctx->pic_params);
  373. if (err != CUDA_SUCCESS) {
  374. av_log(avctx, AV_LOG_ERROR, "Error decoding a picture with NVDEC: %d\n",
  375. err);
  376. ret = AVERROR_UNKNOWN;
  377. goto finish;
  378. }
  379. finish:
  380. decoder->cudl->cuCtxPopCurrent(&dummy);
  381. return ret;
  382. }
  383. int ff_nvdec_simple_end_frame(AVCodecContext *avctx)
  384. {
  385. NVDECContext *ctx = avctx->internal->hwaccel_priv_data;
  386. int ret = ff_nvdec_end_frame(avctx);
  387. ctx->bitstream = NULL;
  388. return ret;
  389. }
  390. int ff_nvdec_simple_decode_slice(AVCodecContext *avctx, const uint8_t *buffer,
  391. uint32_t size)
  392. {
  393. NVDECContext *ctx = avctx->internal->hwaccel_priv_data;
  394. void *tmp;
  395. tmp = av_fast_realloc(ctx->slice_offsets, &ctx->slice_offsets_allocated,
  396. (ctx->nb_slices + 1) * sizeof(*ctx->slice_offsets));
  397. if (!tmp)
  398. return AVERROR(ENOMEM);
  399. ctx->slice_offsets = tmp;
  400. if (!ctx->bitstream)
  401. ctx->bitstream = (uint8_t*)buffer;
  402. ctx->slice_offsets[ctx->nb_slices] = buffer - ctx->bitstream;
  403. ctx->bitstream_len += size;
  404. ctx->nb_slices++;
  405. return 0;
  406. }
  407. int ff_nvdec_frame_params(AVCodecContext *avctx,
  408. AVBufferRef *hw_frames_ctx,
  409. int dpb_size)
  410. {
  411. AVHWFramesContext *frames_ctx = (AVHWFramesContext*)hw_frames_ctx->data;
  412. const AVPixFmtDescriptor *sw_desc;
  413. int cuvid_codec_type, cuvid_chroma_format;
  414. sw_desc = av_pix_fmt_desc_get(avctx->sw_pix_fmt);
  415. if (!sw_desc)
  416. return AVERROR_BUG;
  417. cuvid_codec_type = map_avcodec_id(avctx->codec_id);
  418. if (cuvid_codec_type < 0) {
  419. av_log(avctx, AV_LOG_ERROR, "Unsupported codec ID\n");
  420. return AVERROR_BUG;
  421. }
  422. cuvid_chroma_format = map_chroma_format(avctx->sw_pix_fmt);
  423. if (cuvid_chroma_format < 0) {
  424. av_log(avctx, AV_LOG_VERBOSE, "Unsupported chroma format\n");
  425. return AVERROR(EINVAL);
  426. }
  427. frames_ctx->format = AV_PIX_FMT_CUDA;
  428. frames_ctx->width = avctx->coded_width;
  429. frames_ctx->height = avctx->coded_height;
  430. frames_ctx->initial_pool_size = dpb_size;
  431. switch (sw_desc->comp[0].depth) {
  432. case 8:
  433. frames_ctx->sw_format = AV_PIX_FMT_NV12;
  434. break;
  435. case 10:
  436. frames_ctx->sw_format = AV_PIX_FMT_P010;
  437. break;
  438. case 12:
  439. frames_ctx->sw_format = AV_PIX_FMT_P016;
  440. break;
  441. default:
  442. return AVERROR(EINVAL);
  443. }
  444. return 0;
  445. }
  446. int ff_nvdec_get_ref_idx(AVFrame *frame)
  447. {
  448. FrameDecodeData *fdd;
  449. NVDECFrame *cf;
  450. if (!frame || !frame->private_ref)
  451. return -1;
  452. fdd = (FrameDecodeData*)frame->private_ref->data;
  453. cf = (NVDECFrame*)fdd->hwaccel_priv;
  454. if (!cf)
  455. return -1;
  456. return cf->idx;
  457. }