You can not select more than 25 topics Topics must start with a letter or number, can include dashes ('-') and can be up to 35 characters long.

1148 lines
33KB

  1. /*
  2. * NVIDIA NVENC Support
  3. * Copyright (C) 2015 Luca Barbato
  4. *
  5. * This file is part of Libav.
  6. *
  7. * Libav is free software; you can redistribute it and/or
  8. * modify it under the terms of the GNU Lesser General Public
  9. * License as published by the Free Software Foundation; either
  10. * version 2.1 of the License, or (at your option) any later version.
  11. *
  12. * Libav is distributed in the hope that it will be useful,
  13. * but WITHOUT ANY WARRANTY; without even the implied warranty of
  14. * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
  15. * Lesser General Public License for more details.
  16. *
  17. * You should have received a copy of the GNU Lesser General Public
  18. * License along with Libav; if not, write to the Free Software
  19. * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
  20. */
  21. #include "config.h"
  22. #include <cuda.h>
  23. #include <nvEncodeAPI.h>
  24. #include <string.h>
  25. #define CUDA_LIBNAME "libcuda.so"
  26. #if HAVE_DLFCN_H
  27. #include <dlfcn.h>
  28. #define NVENC_LIBNAME "libnvidia-encode.so"
  29. #elif HAVE_WINDOWS_H
  30. #include <windows.h>
  31. #if ARCH_X86_64
  32. #define NVENC_LIBNAME "nvEncodeAPI64.dll"
  33. #else
  34. #define NVENC_LIBNAME "nvEncodeAPI.dll"
  35. #endif
  36. #define dlopen(filename, flags) LoadLibrary((filename))
  37. #define dlsym(handle, symbol) GetProcAddress(handle, symbol)
  38. #define dlclose(handle) FreeLibrary(handle)
  39. #endif
  40. #include "libavutil/common.h"
  41. #include "libavutil/imgutils.h"
  42. #include "libavutil/mem.h"
  43. #include "avcodec.h"
  44. #include "internal.h"
  45. #include "nvenc.h"
  46. #define NVENC_CAP 0x30
  47. #define BITSTREAM_BUFFER_SIZE 1024 * 1024
  48. #define LOAD_LIBRARY(l, path) \
  49. do { \
  50. if (!((l) = dlopen(path, RTLD_LAZY))) { \
  51. av_log(avctx, AV_LOG_ERROR, \
  52. "Cannot load %s\n", \
  53. path); \
  54. return AVERROR_UNKNOWN; \
  55. } \
  56. } while (0)
  57. #define LOAD_SYMBOL(fun, lib, symbol) \
  58. do { \
  59. if (!((fun) = dlsym(lib, symbol))) { \
  60. av_log(avctx, AV_LOG_ERROR, \
  61. "Cannot load %s\n", \
  62. symbol); \
  63. return AVERROR_UNKNOWN; \
  64. } \
  65. } while (0)
  66. static av_cold int nvenc_load_libraries(AVCodecContext *avctx)
  67. {
  68. NVENCContext *ctx = avctx->priv_data;
  69. NVENCLibraryContext *nvel = &ctx->nvel;
  70. PNVENCODEAPICREATEINSTANCE nvenc_create_instance;
  71. LOAD_LIBRARY(nvel->cuda, CUDA_LIBNAME);
  72. LOAD_SYMBOL(nvel->cu_init, nvel->cuda, "cuInit");
  73. LOAD_SYMBOL(nvel->cu_device_get_count, nvel->cuda, "cuDeviceGetCount");
  74. LOAD_SYMBOL(nvel->cu_device_get, nvel->cuda, "cuDeviceGet");
  75. LOAD_SYMBOL(nvel->cu_device_get_name, nvel->cuda, "cuDeviceGetName");
  76. LOAD_SYMBOL(nvel->cu_device_compute_capability, nvel->cuda,
  77. "cuDeviceComputeCapability");
  78. LOAD_SYMBOL(nvel->cu_ctx_create, nvel->cuda, "cuCtxCreate_v2");
  79. LOAD_SYMBOL(nvel->cu_ctx_pop_current, nvel->cuda, "cuCtxPopCurrent_v2");
  80. LOAD_SYMBOL(nvel->cu_ctx_destroy, nvel->cuda, "cuCtxDestroy_v2");
  81. LOAD_LIBRARY(nvel->nvenc, NVENC_LIBNAME);
  82. LOAD_SYMBOL(nvenc_create_instance, nvel->nvenc,
  83. "NvEncodeAPICreateInstance");
  84. nvel->nvenc_funcs.version = NV_ENCODE_API_FUNCTION_LIST_VER;
  85. if ((nvenc_create_instance(&nvel->nvenc_funcs)) != NV_ENC_SUCCESS) {
  86. av_log(avctx, AV_LOG_ERROR, "Cannot create the NVENC instance");
  87. return AVERROR_UNKNOWN;
  88. }
  89. return 0;
  90. }
  91. static int nvenc_open_session(AVCodecContext *avctx)
  92. {
  93. NV_ENC_OPEN_ENCODE_SESSION_EX_PARAMS params = { 0 };
  94. NVENCContext *ctx = avctx->priv_data;
  95. NV_ENCODE_API_FUNCTION_LIST *nv = &ctx->nvel.nvenc_funcs;
  96. int ret;
  97. params.version = NV_ENC_OPEN_ENCODE_SESSION_EX_PARAMS_VER;
  98. params.apiVersion = NVENCAPI_VERSION;
  99. params.device = ctx->cu_context;
  100. params.deviceType = NV_ENC_DEVICE_TYPE_CUDA;
  101. ret = nv->nvEncOpenEncodeSessionEx(&params, &ctx->nvenc_ctx);
  102. if (ret != NV_ENC_SUCCESS) {
  103. ctx->nvenc_ctx = NULL;
  104. av_log(avctx, AV_LOG_ERROR,
  105. "Cannot open the NVENC Session\n");
  106. return AVERROR_UNKNOWN;
  107. }
  108. return 0;
  109. }
  110. static int nvenc_check_codec_support(AVCodecContext *avctx)
  111. {
  112. NVENCContext *ctx = avctx->priv_data;
  113. NV_ENCODE_API_FUNCTION_LIST *nv = &ctx->nvel.nvenc_funcs;
  114. int i, ret, count = 0;
  115. GUID *guids = NULL;
  116. ret = nv->nvEncGetEncodeGUIDCount(ctx->nvenc_ctx, &count);
  117. if (ret != NV_ENC_SUCCESS || !count)
  118. return AVERROR(ENOSYS);
  119. guids = av_malloc(count * sizeof(GUID));
  120. if (!guids)
  121. return AVERROR(ENOMEM);
  122. ret = nv->nvEncGetEncodeGUIDs(ctx->nvenc_ctx, guids, count, &count);
  123. if (ret != NV_ENC_SUCCESS) {
  124. ret = AVERROR(ENOSYS);
  125. goto fail;
  126. }
  127. ret = AVERROR(ENOSYS);
  128. for (i = 0; i < count; i++) {
  129. if (!memcmp(&guids[i], &ctx->params.encodeGUID, sizeof(*guids))) {
  130. ret = 0;
  131. break;
  132. }
  133. }
  134. fail:
  135. av_free(guids);
  136. return ret;
  137. }
  138. static int nvenc_check_cap(AVCodecContext *avctx, NV_ENC_CAPS cap)
  139. {
  140. NVENCContext *ctx = avctx->priv_data;
  141. NV_ENCODE_API_FUNCTION_LIST *nv = &ctx->nvel.nvenc_funcs;
  142. NV_ENC_CAPS_PARAM params = { 0 };
  143. int ret, val = 0;
  144. params.version = NV_ENC_CAPS_PARAM_VER;
  145. params.capsToQuery = cap;
  146. ret = nv->nvEncGetEncodeCaps(ctx->nvenc_ctx, ctx->params.encodeGUID, &params, &val);
  147. if (ret == NV_ENC_SUCCESS)
  148. return val;
  149. return 0;
  150. }
  151. static int nvenc_check_capabilities(AVCodecContext *avctx)
  152. {
  153. int ret;
  154. ret = nvenc_check_codec_support(avctx);
  155. if (ret < 0) {
  156. av_log(avctx, AV_LOG_VERBOSE, "Codec not supported\n");
  157. return ret;
  158. }
  159. ret = nvenc_check_cap(avctx, NV_ENC_CAPS_SUPPORT_YUV444_ENCODE);
  160. if (avctx->pix_fmt == AV_PIX_FMT_YUV444P && ret <= 0) {
  161. av_log(avctx, AV_LOG_VERBOSE, "YUV444P not supported\n");
  162. return AVERROR(ENOSYS);
  163. }
  164. ret = nvenc_check_cap(avctx, NV_ENC_CAPS_WIDTH_MAX);
  165. if (ret < avctx->width) {
  166. av_log(avctx, AV_LOG_VERBOSE, "Width %d exceeds %d\n",
  167. avctx->width, ret);
  168. return AVERROR(ENOSYS);
  169. }
  170. ret = nvenc_check_cap(avctx, NV_ENC_CAPS_HEIGHT_MAX);
  171. if (ret < avctx->height) {
  172. av_log(avctx, AV_LOG_VERBOSE, "Height %d exceeds %d\n",
  173. avctx->height, ret);
  174. return AVERROR(ENOSYS);
  175. }
  176. ret = nvenc_check_cap(avctx, NV_ENC_CAPS_NUM_MAX_BFRAMES);
  177. if (ret < avctx->max_b_frames) {
  178. av_log(avctx, AV_LOG_VERBOSE, "Max b-frames %d exceed %d\n",
  179. avctx->max_b_frames, ret);
  180. return AVERROR(ENOSYS);
  181. }
  182. return 0;
  183. }
  184. static int nvenc_check_device(AVCodecContext *avctx, int idx)
  185. {
  186. NVENCContext *ctx = avctx->priv_data;
  187. NVENCLibraryContext *nvel = &ctx->nvel;
  188. char name[128] = { 0 };
  189. int major, minor, ret;
  190. CUdevice cu_device;
  191. CUcontext dummy;
  192. int loglevel = AV_LOG_VERBOSE;
  193. if (ctx->device == LIST_DEVICES)
  194. loglevel = AV_LOG_INFO;
  195. ret = nvel->cu_device_get(&cu_device, idx);
  196. if (ret != CUDA_SUCCESS) {
  197. av_log(avctx, AV_LOG_ERROR,
  198. "Cannot access the CUDA device %d\n",
  199. idx);
  200. return -1;
  201. }
  202. ret = nvel->cu_device_get_name(name, sizeof(name), cu_device);
  203. if (ret != CUDA_SUCCESS)
  204. return -1;
  205. ret = nvel->cu_device_compute_capability(&major, &minor, cu_device);
  206. if (ret != CUDA_SUCCESS)
  207. return -1;
  208. av_log(avctx, loglevel, "Device %d [%s] ", cu_device, name);
  209. if (((major << 4) | minor) < NVENC_CAP)
  210. goto fail;
  211. ret = nvel->cu_ctx_create(&ctx->cu_context, 0, cu_device);
  212. if (ret != CUDA_SUCCESS)
  213. goto fail;
  214. ret = nvel->cu_ctx_pop_current(&dummy);
  215. if (ret != CUDA_SUCCESS)
  216. goto fail2;
  217. if ((ret = nvenc_open_session(avctx)) < 0)
  218. goto fail2;
  219. if ((ret = nvenc_check_capabilities(avctx)) < 0)
  220. goto fail3;
  221. av_log(avctx, loglevel, "supports NVENC\n");
  222. if (ctx->device == cu_device || ctx->device == ANY_DEVICE)
  223. return 0;
  224. fail3:
  225. nvel->nvenc_funcs.nvEncDestroyEncoder(ctx->nvenc_ctx);
  226. ctx->nvenc_ctx = NULL;
  227. fail2:
  228. nvel->cu_ctx_destroy(ctx->cu_context);
  229. ctx->cu_context = NULL;
  230. fail:
  231. if (ret != 0)
  232. av_log(avctx, loglevel, "does not support NVENC (major %d minor %d)\n",
  233. major, minor);
  234. return AVERROR(ENOSYS);
  235. }
  236. static int nvenc_setup_device(AVCodecContext *avctx)
  237. {
  238. NVENCContext *ctx = avctx->priv_data;
  239. NVENCLibraryContext *nvel = &ctx->nvel;
  240. int i, nb_devices = 0;
  241. if ((nvel->cu_init(0)) != CUDA_SUCCESS) {
  242. av_log(avctx, AV_LOG_ERROR,
  243. "Cannot init CUDA\n");
  244. return AVERROR_UNKNOWN;
  245. }
  246. if ((nvel->cu_device_get_count(&nb_devices)) != CUDA_SUCCESS) {
  247. av_log(avctx, AV_LOG_ERROR,
  248. "Cannot enumerate the CUDA devices\n");
  249. return AVERROR_UNKNOWN;
  250. }
  251. switch (avctx->codec->id) {
  252. case AV_CODEC_ID_H264:
  253. ctx->params.encodeGUID = NV_ENC_CODEC_H264_GUID;
  254. break;
  255. case AV_CODEC_ID_HEVC:
  256. ctx->params.encodeGUID = NV_ENC_CODEC_HEVC_GUID;
  257. break;
  258. default:
  259. return AVERROR_BUG;
  260. }
  261. for (i = 0; i < nb_devices; ++i) {
  262. if ((nvenc_check_device(avctx, i)) >= 0 && ctx->device != LIST_DEVICES)
  263. return 0;
  264. }
  265. if (ctx->device == LIST_DEVICES)
  266. return AVERROR_EXIT;
  267. return AVERROR(ENOSYS);
  268. }
  269. typedef struct GUIDTuple {
  270. const GUID guid;
  271. int flags;
  272. } GUIDTuple;
  273. static int nvec_map_preset(NVENCContext *ctx)
  274. {
  275. GUIDTuple presets[] = {
  276. { NV_ENC_PRESET_DEFAULT_GUID },
  277. { NV_ENC_PRESET_HP_GUID },
  278. { NV_ENC_PRESET_HQ_GUID },
  279. { NV_ENC_PRESET_BD_GUID },
  280. { NV_ENC_PRESET_LOW_LATENCY_DEFAULT_GUID, NVENC_LOWLATENCY },
  281. { NV_ENC_PRESET_LOW_LATENCY_HP_GUID, NVENC_LOWLATENCY },
  282. { NV_ENC_PRESET_LOW_LATENCY_HQ_GUID, NVENC_LOWLATENCY },
  283. { NV_ENC_PRESET_LOSSLESS_DEFAULT_GUID, NVENC_LOSSLESS },
  284. { NV_ENC_PRESET_LOSSLESS_HP_GUID, NVENC_LOSSLESS },
  285. { { 0 } }
  286. };
  287. GUIDTuple *t = &presets[ctx->preset];
  288. ctx->params.presetGUID = t->guid;
  289. ctx->flags = t->flags;
  290. return AVERROR(EINVAL);
  291. }
  292. static void set_constqp(AVCodecContext *avctx, NV_ENC_RC_PARAMS *rc)
  293. {
  294. rc->rateControlMode = NV_ENC_PARAMS_RC_CONSTQP;
  295. rc->constQP.qpInterB = avctx->global_quality;
  296. rc->constQP.qpInterP = avctx->global_quality;
  297. rc->constQP.qpIntra = avctx->global_quality;
  298. }
  299. static void set_vbr(AVCodecContext *avctx, NV_ENC_RC_PARAMS *rc)
  300. {
  301. if (avctx->qmin >= 0) {
  302. rc->enableMinQP = 1;
  303. rc->minQP.qpInterB = avctx->qmin;
  304. rc->minQP.qpInterP = avctx->qmin;
  305. rc->minQP.qpIntra = avctx->qmin;
  306. }
  307. if (avctx->qmax >= 0) {
  308. rc->enableMaxQP = 1;
  309. rc->maxQP.qpInterB = avctx->qmax;
  310. rc->maxQP.qpInterP = avctx->qmax;
  311. rc->maxQP.qpIntra = avctx->qmax;
  312. }
  313. }
  314. static void nvenc_override_rate_control(AVCodecContext *avctx,
  315. NV_ENC_RC_PARAMS *rc)
  316. {
  317. NVENCContext *ctx = avctx->priv_data;
  318. switch (ctx->rc) {
  319. case NV_ENC_PARAMS_RC_CONSTQP:
  320. if (avctx->global_quality < 0) {
  321. av_log(avctx, AV_LOG_WARNING,
  322. "The constant quality rate-control requires "
  323. "the 'global_quality' option set.\n");
  324. return;
  325. }
  326. set_constqp(avctx, rc);
  327. return;
  328. case NV_ENC_PARAMS_RC_2_PASS_VBR:
  329. case NV_ENC_PARAMS_RC_VBR:
  330. if (avctx->qmin < 0 && avctx->qmax < 0) {
  331. av_log(avctx, AV_LOG_WARNING,
  332. "The variable bitrate rate-control requires "
  333. "the 'qmin' and/or 'qmax' option set.\n");
  334. return;
  335. }
  336. case NV_ENC_PARAMS_RC_VBR_MINQP:
  337. if (avctx->qmin < 0) {
  338. av_log(avctx, AV_LOG_WARNING,
  339. "The variable bitrate rate-control requires "
  340. "the 'qmin' option set.\n");
  341. return;
  342. }
  343. set_vbr(avctx, rc);
  344. break;
  345. case NV_ENC_PARAMS_RC_CBR:
  346. break;
  347. case NV_ENC_PARAMS_RC_2_PASS_QUALITY:
  348. case NV_ENC_PARAMS_RC_2_PASS_FRAMESIZE_CAP:
  349. if (!(ctx->flags & NVENC_LOWLATENCY)) {
  350. av_log(avctx, AV_LOG_WARNING,
  351. "The multipass rate-control requires "
  352. "a low-latency preset.\n");
  353. return;
  354. }
  355. }
  356. rc->rateControlMode = ctx->rc;
  357. }
  358. static void nvenc_setup_rate_control(AVCodecContext *avctx)
  359. {
  360. NVENCContext *ctx = avctx->priv_data;
  361. NV_ENC_RC_PARAMS *rc = &ctx->config.rcParams;
  362. if (avctx->bit_rate > 0)
  363. rc->averageBitRate = avctx->bit_rate;
  364. if (avctx->rc_max_rate > 0)
  365. rc->maxBitRate = avctx->rc_max_rate;
  366. if (ctx->rc > 0) {
  367. nvenc_override_rate_control(avctx, rc);
  368. } else if (avctx->global_quality > 0) {
  369. set_constqp(avctx, rc);
  370. } else if (avctx->qmin >= 0 && avctx->qmax >= 0) {
  371. rc->rateControlMode = NV_ENC_PARAMS_RC_VBR;
  372. set_vbr(avctx, rc);
  373. }
  374. if (avctx->rc_buffer_size > 0)
  375. rc->vbvBufferSize = avctx->rc_buffer_size;
  376. if (rc->averageBitRate > 0)
  377. avctx->bit_rate = rc->averageBitRate;
  378. }
  379. static int nvenc_setup_h264_config(AVCodecContext *avctx)
  380. {
  381. NVENCContext *ctx = avctx->priv_data;
  382. NV_ENC_CONFIG *cc = &ctx->config;
  383. NV_ENC_CONFIG_H264 *h264 = &cc->encodeCodecConfig.h264Config;
  384. NV_ENC_CONFIG_H264_VUI_PARAMETERS *vui = &h264->h264VUIParameters;
  385. vui->colourDescriptionPresentFlag = 1;
  386. vui->videoSignalTypePresentFlag = 1;
  387. vui->colourMatrix = avctx->colorspace;
  388. vui->colourPrimaries = avctx->color_primaries;
  389. vui->transferCharacteristics = avctx->color_trc;
  390. vui->videoFullRangeFlag = avctx->color_range == AVCOL_RANGE_JPEG;
  391. h264->disableSPSPPS = (avctx->flags & AV_CODEC_FLAG_GLOBAL_HEADER) ? 1 : 0;
  392. h264->repeatSPSPPS = (avctx->flags & AV_CODEC_FLAG_GLOBAL_HEADER) ? 0 : 1;
  393. h264->maxNumRefFrames = avctx->refs;
  394. h264->idrPeriod = cc->gopLength;
  395. if (ctx->profile)
  396. avctx->profile = ctx->profile;
  397. if (avctx->pix_fmt == AV_PIX_FMT_YUV444P)
  398. h264->chromaFormatIDC = 3;
  399. else
  400. h264->chromaFormatIDC = 1;
  401. switch (ctx->profile) {
  402. case NV_ENC_H264_PROFILE_BASELINE:
  403. cc->profileGUID = NV_ENC_H264_PROFILE_BASELINE_GUID;
  404. break;
  405. case NV_ENC_H264_PROFILE_MAIN:
  406. cc->profileGUID = NV_ENC_H264_PROFILE_MAIN_GUID;
  407. break;
  408. case NV_ENC_H264_PROFILE_HIGH:
  409. cc->profileGUID = NV_ENC_H264_PROFILE_HIGH_GUID;
  410. break;
  411. case NV_ENC_H264_PROFILE_HIGH_444:
  412. cc->profileGUID = NV_ENC_H264_PROFILE_HIGH_444_GUID;
  413. break;
  414. case NV_ENC_H264_PROFILE_CONSTRAINED_HIGH:
  415. cc->profileGUID = NV_ENC_H264_PROFILE_CONSTRAINED_HIGH_GUID;
  416. break;
  417. }
  418. h264->level = ctx->level;
  419. return 0;
  420. }
  421. static int nvenc_setup_hevc_config(AVCodecContext *avctx)
  422. {
  423. NVENCContext *ctx = avctx->priv_data;
  424. NV_ENC_CONFIG *cc = &ctx->config;
  425. NV_ENC_CONFIG_HEVC *hevc = &cc->encodeCodecConfig.hevcConfig;
  426. hevc->disableSPSPPS = (avctx->flags & AV_CODEC_FLAG_GLOBAL_HEADER) ? 1 : 0;
  427. hevc->repeatSPSPPS = (avctx->flags & AV_CODEC_FLAG_GLOBAL_HEADER) ? 0 : 1;
  428. hevc->maxNumRefFramesInDPB = avctx->refs;
  429. hevc->idrPeriod = cc->gopLength;
  430. /* No other profile is supported in the current SDK version 5 */
  431. cc->profileGUID = NV_ENC_HEVC_PROFILE_MAIN_GUID;
  432. avctx->profile = FF_PROFILE_HEVC_MAIN;
  433. if (ctx->level) {
  434. hevc->level = ctx->level;
  435. } else {
  436. hevc->level = NV_ENC_LEVEL_AUTOSELECT;
  437. }
  438. if (ctx->tier) {
  439. hevc->tier = ctx->tier;
  440. }
  441. return 0;
  442. }
  443. static int nvenc_setup_codec_config(AVCodecContext *avctx)
  444. {
  445. switch (avctx->codec->id) {
  446. case AV_CODEC_ID_H264:
  447. return nvenc_setup_h264_config(avctx);
  448. case AV_CODEC_ID_HEVC:
  449. return nvenc_setup_hevc_config(avctx);
  450. }
  451. return 0;
  452. }
  453. static int nvenc_setup_encoder(AVCodecContext *avctx)
  454. {
  455. NVENCContext *ctx = avctx->priv_data;
  456. NV_ENCODE_API_FUNCTION_LIST *nv = &ctx->nvel.nvenc_funcs;
  457. NV_ENC_PRESET_CONFIG preset_cfg = { 0 };
  458. AVCPBProperties *cpb_props;
  459. int ret;
  460. ctx->params.version = NV_ENC_INITIALIZE_PARAMS_VER;
  461. ctx->params.encodeHeight = avctx->height;
  462. ctx->params.encodeWidth = avctx->width;
  463. if (avctx->sample_aspect_ratio.num &&
  464. avctx->sample_aspect_ratio.den &&
  465. (avctx->sample_aspect_ratio.num != 1 ||
  466. avctx->sample_aspect_ratio.den != 1)) {
  467. av_reduce(&ctx->params.darWidth,
  468. &ctx->params.darHeight,
  469. avctx->width * avctx->sample_aspect_ratio.num,
  470. avctx->height * avctx->sample_aspect_ratio.den,
  471. INT_MAX / 8);
  472. } else {
  473. ctx->params.darHeight = avctx->height;
  474. ctx->params.darWidth = avctx->width;
  475. }
  476. ctx->params.frameRateNum = avctx->time_base.den;
  477. ctx->params.frameRateDen = avctx->time_base.num * avctx->ticks_per_frame;
  478. ctx->params.enableEncodeAsync = 0;
  479. ctx->params.enablePTD = 1;
  480. ctx->params.encodeConfig = &ctx->config;
  481. nvec_map_preset(ctx);
  482. preset_cfg.version = NV_ENC_PRESET_CONFIG_VER;
  483. preset_cfg.presetCfg.version = NV_ENC_CONFIG_VER;
  484. ret = nv->nvEncGetEncodePresetConfig(ctx->nvenc_ctx,
  485. ctx->params.encodeGUID,
  486. ctx->params.presetGUID,
  487. &preset_cfg);
  488. if (ret != NV_ENC_SUCCESS) {
  489. av_log(avctx, AV_LOG_ERROR,
  490. "Cannot get the preset configuration\n");
  491. return AVERROR_UNKNOWN;
  492. }
  493. memcpy(&ctx->config, &preset_cfg.presetCfg, sizeof(ctx->config));
  494. ctx->config.version = NV_ENC_CONFIG_VER;
  495. if (avctx->gop_size > 0) {
  496. if (avctx->max_b_frames > 0) {
  497. ctx->last_dts = -2;
  498. /* 0 is intra-only,
  499. * 1 is I/P only,
  500. * 2 is one B Frame,
  501. * 3 two B frames, and so on. */
  502. ctx->config.frameIntervalP = avctx->max_b_frames + 1;
  503. } else if (avctx->max_b_frames == 0) {
  504. ctx->config.frameIntervalP = 1;
  505. }
  506. ctx->config.gopLength = avctx->gop_size;
  507. } else if (avctx->gop_size == 0) {
  508. ctx->config.frameIntervalP = 0;
  509. ctx->config.gopLength = 1;
  510. }
  511. if (ctx->config.frameIntervalP > 1)
  512. avctx->max_b_frames = ctx->config.frameIntervalP - 1;
  513. nvenc_setup_rate_control(avctx);
  514. if (avctx->flags & AV_CODEC_FLAG_INTERLACED_DCT) {
  515. ctx->config.frameFieldMode = NV_ENC_PARAMS_FRAME_FIELD_MODE_FIELD;
  516. } else {
  517. ctx->config.frameFieldMode = NV_ENC_PARAMS_FRAME_FIELD_MODE_FRAME;
  518. }
  519. if ((ret = nvenc_setup_codec_config(avctx)) < 0)
  520. return ret;
  521. ret = nv->nvEncInitializeEncoder(ctx->nvenc_ctx, &ctx->params);
  522. if (ret != NV_ENC_SUCCESS) {
  523. av_log(avctx, AV_LOG_ERROR, "Cannot initialize the decoder");
  524. return AVERROR_UNKNOWN;
  525. }
  526. cpb_props = ff_add_cpb_side_data(avctx);
  527. if (!cpb_props)
  528. return AVERROR(ENOMEM);
  529. cpb_props->max_bitrate = avctx->rc_max_rate;
  530. cpb_props->min_bitrate = avctx->rc_min_rate;
  531. cpb_props->avg_bitrate = avctx->bit_rate;
  532. cpb_props->buffer_size = avctx->rc_buffer_size;
  533. return 0;
  534. }
  535. static int nvenc_alloc_surface(AVCodecContext *avctx, int idx)
  536. {
  537. NVENCContext *ctx = avctx->priv_data;
  538. NV_ENCODE_API_FUNCTION_LIST *nv = &ctx->nvel.nvenc_funcs;
  539. int ret;
  540. NV_ENC_CREATE_INPUT_BUFFER in_buffer = { 0 };
  541. NV_ENC_CREATE_BITSTREAM_BUFFER out_buffer = { 0 };
  542. in_buffer.version = NV_ENC_CREATE_INPUT_BUFFER_VER;
  543. out_buffer.version = NV_ENC_CREATE_BITSTREAM_BUFFER_VER;
  544. in_buffer.width = avctx->width;
  545. in_buffer.height = avctx->height;
  546. in_buffer.memoryHeap = NV_ENC_MEMORY_HEAP_SYSMEM_UNCACHED;
  547. switch (avctx->pix_fmt) {
  548. case AV_PIX_FMT_YUV420P:
  549. in_buffer.bufferFmt = NV_ENC_BUFFER_FORMAT_YV12_PL;
  550. break;
  551. case AV_PIX_FMT_NV12:
  552. in_buffer.bufferFmt = NV_ENC_BUFFER_FORMAT_NV12_PL;
  553. break;
  554. case AV_PIX_FMT_YUV444P:
  555. in_buffer.bufferFmt = NV_ENC_BUFFER_FORMAT_YUV444_PL;
  556. break;
  557. default:
  558. return AVERROR_BUG;
  559. }
  560. ret = nv->nvEncCreateInputBuffer(ctx->nvenc_ctx, &in_buffer);
  561. if (ret != NV_ENC_SUCCESS) {
  562. av_log(avctx, AV_LOG_ERROR, "CreateInputBuffer failed\n");
  563. return AVERROR_UNKNOWN;
  564. }
  565. ctx->in[idx].in = in_buffer.inputBuffer;
  566. ctx->in[idx].format = in_buffer.bufferFmt;
  567. /* 1MB is large enough to hold most output frames.
  568. * NVENC increases this automaticaly if it's not enough. */
  569. out_buffer.size = BITSTREAM_BUFFER_SIZE;
  570. out_buffer.memoryHeap = NV_ENC_MEMORY_HEAP_SYSMEM_UNCACHED;
  571. ret = nv->nvEncCreateBitstreamBuffer(ctx->nvenc_ctx, &out_buffer);
  572. if (ret != NV_ENC_SUCCESS) {
  573. av_log(avctx, AV_LOG_ERROR, "CreateBitstreamBuffer failed\n");
  574. return AVERROR_UNKNOWN;
  575. }
  576. ctx->out[idx].out = out_buffer.bitstreamBuffer;
  577. ctx->out[idx].busy = 0;
  578. return 0;
  579. }
  580. static int nvenc_setup_surfaces(AVCodecContext *avctx)
  581. {
  582. NVENCContext *ctx = avctx->priv_data;
  583. int i, ret;
  584. ctx->nb_surfaces = FFMAX(4 + avctx->max_b_frames,
  585. ctx->nb_surfaces);
  586. ctx->in = av_mallocz(ctx->nb_surfaces * sizeof(*ctx->in));
  587. if (!ctx->in)
  588. return AVERROR(ENOMEM);
  589. ctx->out = av_mallocz(ctx->nb_surfaces * sizeof(*ctx->out));
  590. if (!ctx->out)
  591. return AVERROR(ENOMEM);
  592. ctx->timestamps = av_fifo_alloc(ctx->nb_surfaces * sizeof(int64_t));
  593. if (!ctx->timestamps)
  594. return AVERROR(ENOMEM);
  595. ctx->pending = av_fifo_alloc(ctx->nb_surfaces * sizeof(ctx->out));
  596. if (!ctx->pending)
  597. return AVERROR(ENOMEM);
  598. ctx->ready = av_fifo_alloc(ctx->nb_surfaces * sizeof(ctx->out));
  599. if (!ctx->ready)
  600. return AVERROR(ENOMEM);
  601. for (i = 0; i < ctx->nb_surfaces; i++) {
  602. if ((ret = nvenc_alloc_surface(avctx, i)) < 0)
  603. return ret;
  604. }
  605. return 0;
  606. }
  607. #define EXTRADATA_SIZE 512
  608. static int nvenc_setup_extradata(AVCodecContext *avctx)
  609. {
  610. NVENCContext *ctx = avctx->priv_data;
  611. NV_ENCODE_API_FUNCTION_LIST *nv = &ctx->nvel.nvenc_funcs;
  612. NV_ENC_SEQUENCE_PARAM_PAYLOAD payload = { 0 };
  613. int ret;
  614. avctx->extradata = av_mallocz(EXTRADATA_SIZE + AV_INPUT_BUFFER_PADDING_SIZE);
  615. if (!avctx->extradata)
  616. return AVERROR(ENOMEM);
  617. payload.version = NV_ENC_SEQUENCE_PARAM_PAYLOAD_VER;
  618. payload.spsppsBuffer = avctx->extradata;
  619. payload.inBufferSize = EXTRADATA_SIZE;
  620. payload.outSPSPPSPayloadSize = &avctx->extradata_size;
  621. ret = nv->nvEncGetSequenceParams(ctx->nvenc_ctx, &payload);
  622. if (ret != NV_ENC_SUCCESS) {
  623. av_log(avctx, AV_LOG_ERROR, "Cannot get the extradata\n");
  624. return AVERROR_UNKNOWN;
  625. }
  626. return 0;
  627. }
  628. av_cold int ff_nvenc_encode_close(AVCodecContext *avctx)
  629. {
  630. NVENCContext *ctx = avctx->priv_data;
  631. NV_ENCODE_API_FUNCTION_LIST *nv = &ctx->nvel.nvenc_funcs;
  632. int i;
  633. av_fifo_free(ctx->timestamps);
  634. av_fifo_free(ctx->pending);
  635. av_fifo_free(ctx->ready);
  636. if (ctx->in) {
  637. for (i = 0; i < ctx->nb_surfaces; ++i) {
  638. nv->nvEncDestroyInputBuffer(ctx->nvenc_ctx, ctx->in[i].in);
  639. nv->nvEncDestroyBitstreamBuffer(ctx->nvenc_ctx, ctx->out[i].out);
  640. }
  641. }
  642. av_freep(&ctx->in);
  643. av_freep(&ctx->out);
  644. if (ctx->nvenc_ctx)
  645. nv->nvEncDestroyEncoder(ctx->nvenc_ctx);
  646. if (ctx->cu_context)
  647. ctx->nvel.cu_ctx_destroy(ctx->cu_context);
  648. if (ctx->nvel.nvenc)
  649. dlclose(ctx->nvel.nvenc);
  650. if (ctx->nvel.cuda)
  651. dlclose(ctx->nvel.cuda);
  652. return 0;
  653. }
  654. av_cold int ff_nvenc_encode_init(AVCodecContext *avctx)
  655. {
  656. int ret;
  657. if ((ret = nvenc_load_libraries(avctx)) < 0)
  658. return ret;
  659. if ((ret = nvenc_setup_device(avctx)) < 0)
  660. return ret;
  661. if ((ret = nvenc_setup_encoder(avctx)) < 0)
  662. return ret;
  663. if ((ret = nvenc_setup_surfaces(avctx)) < 0)
  664. return ret;
  665. if (avctx->flags & AV_CODEC_FLAG_GLOBAL_HEADER) {
  666. if ((ret = nvenc_setup_extradata(avctx)) < 0)
  667. return ret;
  668. }
  669. return 0;
  670. }
  671. static NVENCInputSurface *get_input_surface(NVENCContext *ctx)
  672. {
  673. int i;
  674. for (i = 0; i < ctx->nb_surfaces; i++) {
  675. if (!ctx->in[i].locked) {
  676. ctx->in[i].locked = 1;
  677. return &ctx->in[i];
  678. }
  679. }
  680. return NULL;
  681. }
  682. static NVENCOutputSurface *get_output_surface(NVENCContext *ctx)
  683. {
  684. int i;
  685. for (i = 0; i < ctx->nb_surfaces; i++) {
  686. if (!ctx->out[i].busy) {
  687. return &ctx->out[i];
  688. }
  689. }
  690. return NULL;
  691. }
  692. static int nvenc_copy_frame(NV_ENC_LOCK_INPUT_BUFFER *in, const AVFrame *frame)
  693. {
  694. uint8_t *buf = in->bufferDataPtr;
  695. int off = frame->height * in->pitch;
  696. switch (frame->format) {
  697. case AV_PIX_FMT_YUV420P:
  698. av_image_copy_plane(buf, in->pitch,
  699. frame->data[0], frame->linesize[0],
  700. frame->width, frame->height);
  701. buf += off;
  702. av_image_copy_plane(buf, in->pitch >> 1,
  703. frame->data[2], frame->linesize[2],
  704. frame->width >> 1, frame->height >> 1);
  705. buf += off >> 2;
  706. av_image_copy_plane(buf, in->pitch >> 1,
  707. frame->data[1], frame->linesize[1],
  708. frame->width >> 1, frame->height >> 1);
  709. break;
  710. case AV_PIX_FMT_NV12:
  711. av_image_copy_plane(buf, in->pitch,
  712. frame->data[0], frame->linesize[0],
  713. frame->width, frame->height);
  714. buf += off;
  715. av_image_copy_plane(buf, in->pitch,
  716. frame->data[1], frame->linesize[1],
  717. frame->width, frame->height >> 1);
  718. break;
  719. case AV_PIX_FMT_YUV444P:
  720. av_image_copy_plane(buf, in->pitch,
  721. frame->data[0], frame->linesize[0],
  722. frame->width, frame->height);
  723. buf += off;
  724. av_image_copy_plane(buf, in->pitch,
  725. frame->data[1], frame->linesize[1],
  726. frame->width, frame->height);
  727. buf += off;
  728. av_image_copy_plane(buf, in->pitch,
  729. frame->data[2], frame->linesize[2],
  730. frame->width, frame->height);
  731. break;
  732. default:
  733. return AVERROR_BUG;
  734. }
  735. return 0;
  736. }
  737. static int nvenc_enqueue_frame(AVCodecContext *avctx, const AVFrame *frame,
  738. NVENCInputSurface **in_surf)
  739. {
  740. NVENCContext *ctx = avctx->priv_data;
  741. NV_ENCODE_API_FUNCTION_LIST *nv = &ctx->nvel.nvenc_funcs;
  742. NV_ENC_LOCK_INPUT_BUFFER params = { 0 };
  743. NVENCInputSurface *in = get_input_surface(ctx);
  744. int ret;
  745. if (!in)
  746. return AVERROR_BUG;
  747. params.version = NV_ENC_LOCK_INPUT_BUFFER_VER;
  748. params.inputBuffer = in->in;
  749. ret = nv->nvEncLockInputBuffer(ctx->nvenc_ctx, &params);
  750. if (ret != NV_ENC_SUCCESS) {
  751. av_log(avctx, AV_LOG_ERROR, "Cannot lock the buffer %p.\n",
  752. in);
  753. return AVERROR_UNKNOWN;
  754. }
  755. ret = nvenc_copy_frame(&params, frame);
  756. if (ret < 0)
  757. goto fail;
  758. ret = nv->nvEncUnlockInputBuffer(ctx->nvenc_ctx, in->in);
  759. if (ret != NV_ENC_SUCCESS) {
  760. av_log(avctx, AV_LOG_ERROR, "Cannot unlock the buffer %p.\n",
  761. in);
  762. return AVERROR_UNKNOWN;
  763. }
  764. *in_surf = in;
  765. return 0;
  766. fail:
  767. nv->nvEncUnlockInputBuffer(ctx->nvenc_ctx, in->in);
  768. return ret;
  769. }
  770. static void nvenc_codec_specific_pic_params(AVCodecContext *avctx,
  771. NV_ENC_PIC_PARAMS *params)
  772. {
  773. NVENCContext *ctx = avctx->priv_data;
  774. switch (avctx->codec->id) {
  775. case AV_CODEC_ID_H264:
  776. params->codecPicParams.h264PicParams.sliceMode =
  777. ctx->config.encodeCodecConfig.h264Config.sliceMode;
  778. params->codecPicParams.h264PicParams.sliceModeData =
  779. ctx->config.encodeCodecConfig.h264Config.sliceModeData;
  780. break;
  781. case AV_CODEC_ID_HEVC:
  782. params->codecPicParams.hevcPicParams.sliceMode =
  783. ctx->config.encodeCodecConfig.hevcConfig.sliceMode;
  784. params->codecPicParams.hevcPicParams.sliceModeData =
  785. ctx->config.encodeCodecConfig.hevcConfig.sliceModeData;
  786. break;
  787. }
  788. }
  789. static inline int nvenc_enqueue_timestamp(AVFifoBuffer *f, int64_t pts)
  790. {
  791. return av_fifo_generic_write(f, &pts, sizeof(pts), NULL);
  792. }
  793. static inline int nvenc_dequeue_timestamp(AVFifoBuffer *f, int64_t *pts)
  794. {
  795. return av_fifo_generic_read(f, pts, sizeof(*pts), NULL);
  796. }
  797. static inline int nvenc_enqueue_surface(AVFifoBuffer *f,
  798. NVENCOutputSurface *surf)
  799. {
  800. surf->busy = 1;
  801. return av_fifo_generic_write(f, &surf, sizeof(surf), NULL);
  802. }
  803. static inline int nvenc_dequeue_surface(AVFifoBuffer *f,
  804. NVENCOutputSurface **surf)
  805. {
  806. return av_fifo_generic_read(f, surf, sizeof(*surf), NULL);
  807. }
  808. static int nvenc_set_timestamp(NVENCContext *ctx,
  809. NV_ENC_LOCK_BITSTREAM *params,
  810. AVPacket *pkt)
  811. {
  812. pkt->pts = params->outputTimeStamp;
  813. pkt->duration = params->outputDuration;
  814. return nvenc_dequeue_timestamp(ctx->timestamps, &pkt->dts);
  815. }
  816. static int nvenc_get_frame(AVCodecContext *avctx, AVPacket *pkt)
  817. {
  818. NVENCContext *ctx = avctx->priv_data;
  819. NV_ENCODE_API_FUNCTION_LIST *nv = &ctx->nvel.nvenc_funcs;
  820. NV_ENC_LOCK_BITSTREAM params = { 0 };
  821. NVENCOutputSurface *out = NULL;
  822. int ret;
  823. ret = nvenc_dequeue_surface(ctx->pending, &out);
  824. if (ret)
  825. return ret;
  826. params.version = NV_ENC_LOCK_BITSTREAM_VER;
  827. params.outputBitstream = out->out;
  828. ret = nv->nvEncLockBitstream(ctx->nvenc_ctx, &params);
  829. if (ret < 0)
  830. return AVERROR_UNKNOWN;
  831. ret = ff_alloc_packet(pkt, params.bitstreamSizeInBytes);
  832. if (ret < 0)
  833. return ret;
  834. memcpy(pkt->data, params.bitstreamBufferPtr, pkt->size);
  835. ret = nv->nvEncUnlockBitstream(ctx->nvenc_ctx, out->out);
  836. if (ret < 0)
  837. return AVERROR_UNKNOWN;
  838. out->busy = out->in->locked = 0;
  839. ret = nvenc_set_timestamp(ctx, &params, pkt);
  840. if (ret < 0)
  841. return ret;
  842. switch (params.pictureType) {
  843. case NV_ENC_PIC_TYPE_IDR:
  844. pkt->flags |= AV_PKT_FLAG_KEY;
  845. #if FF_API_CODED_FRAME
  846. FF_DISABLE_DEPRECATION_WARNINGS
  847. case NV_ENC_PIC_TYPE_INTRA_REFRESH:
  848. case NV_ENC_PIC_TYPE_I:
  849. avctx->coded_frame->pict_type = AV_PICTURE_TYPE_I;
  850. break;
  851. case NV_ENC_PIC_TYPE_P:
  852. avctx->coded_frame->pict_type = AV_PICTURE_TYPE_P;
  853. break;
  854. case NV_ENC_PIC_TYPE_B:
  855. avctx->coded_frame->pict_type = AV_PICTURE_TYPE_B;
  856. break;
  857. case NV_ENC_PIC_TYPE_BI:
  858. avctx->coded_frame->pict_type = AV_PICTURE_TYPE_BI;
  859. break;
  860. FF_ENABLE_DEPRECATION_WARNINGS
  861. #endif
  862. }
  863. return 0;
  864. }
  865. int ff_nvenc_encode_frame(AVCodecContext *avctx, AVPacket *pkt,
  866. const AVFrame *frame, int *got_packet)
  867. {
  868. NVENCContext *ctx = avctx->priv_data;
  869. NV_ENCODE_API_FUNCTION_LIST *nv = &ctx->nvel.nvenc_funcs;
  870. NV_ENC_PIC_PARAMS params = { 0 };
  871. NVENCInputSurface *in = NULL;
  872. NVENCOutputSurface *out = NULL;
  873. int ret;
  874. params.version = NV_ENC_PIC_PARAMS_VER;
  875. if (frame) {
  876. ret = nvenc_enqueue_frame(avctx, frame, &in);
  877. if (ret < 0)
  878. return ret;
  879. out = get_output_surface(ctx);
  880. if (!out)
  881. return AVERROR_BUG;
  882. out->in = in;
  883. params.inputBuffer = in->in;
  884. params.bufferFmt = in->format;
  885. params.inputWidth = frame->width;
  886. params.inputHeight = frame->height;
  887. params.outputBitstream = out->out;
  888. params.inputTimeStamp = frame->pts;
  889. if (avctx->flags & AV_CODEC_FLAG_INTERLACED_DCT) {
  890. if (frame->top_field_first)
  891. params.pictureStruct = NV_ENC_PIC_STRUCT_FIELD_TOP_BOTTOM;
  892. else
  893. params.pictureStruct = NV_ENC_PIC_STRUCT_FIELD_BOTTOM_TOP;
  894. } else {
  895. params.pictureStruct = NV_ENC_PIC_STRUCT_FRAME;
  896. }
  897. nvenc_codec_specific_pic_params(avctx, &params);
  898. ret = nvenc_enqueue_timestamp(ctx->timestamps, frame->pts);
  899. if (ret < 0)
  900. return ret;
  901. } else {
  902. params.encodePicFlags = NV_ENC_PIC_FLAG_EOS;
  903. }
  904. ret = nv->nvEncEncodePicture(ctx->nvenc_ctx, &params);
  905. if (ret != NV_ENC_SUCCESS &&
  906. ret != NV_ENC_ERR_NEED_MORE_INPUT) {
  907. return AVERROR_UNKNOWN;
  908. }
  909. if (out) {
  910. ret = nvenc_enqueue_surface(ctx->pending, out);
  911. if (ret < 0)
  912. return ret;
  913. }
  914. if (ret != NV_ENC_ERR_NEED_MORE_INPUT &&
  915. av_fifo_size(ctx->pending)) {
  916. ret = nvenc_get_frame(avctx, pkt);
  917. if (ret < 0)
  918. return ret;
  919. *got_packet = 1;
  920. } else {
  921. *got_packet = 0;
  922. }
  923. return 0;
  924. }