You can not select more than 25 topics Topics must start with a letter or number, can include dashes ('-') and can be up to 35 characters long.

1137 lines
33KB

  1. /*
  2. * NVIDIA NVENC Support
  3. * Copyright (C) 2015 Luca Barbato
  4. *
  5. * This file is part of Libav.
  6. *
  7. * Libav is free software; you can redistribute it and/or
  8. * modify it under the terms of the GNU Lesser General Public
  9. * License as published by the Free Software Foundation; either
  10. * version 2.1 of the License, or (at your option) any later version.
  11. *
  12. * Libav is distributed in the hope that it will be useful,
  13. * but WITHOUT ANY WARRANTY; without even the implied warranty of
  14. * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
  15. * Lesser General Public License for more details.
  16. *
  17. * You should have received a copy of the GNU Lesser General Public
  18. * License along with Libav; if not, write to the Free Software
  19. * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
  20. */
  21. #include "config.h"
  22. #include <cuda.h>
  23. #include <nvEncodeAPI.h>
  24. #include <string.h>
  25. #define CUDA_LIBNAME "libcuda.so"
  26. #if HAVE_DLFCN_H
  27. #include <dlfcn.h>
  28. #define NVENC_LIBNAME "libnvidia-encode.so"
  29. #elif HAVE_WINDOWS_H
  30. #include <windows.h>
  31. #if ARCH_X86_64
  32. #define NVENC_LIBNAME "nvEncodeAPI64.dll"
  33. #else
  34. #define NVENC_LIBNAME "nvEncodeAPI.dll"
  35. #endif
  36. #define dlopen(filename, flags) LoadLibrary((filename))
  37. #define dlsym(handle, symbol) GetProcAddress(handle, symbol)
  38. #define dlclose(handle) FreeLibrary(handle)
  39. #endif
  40. #include "libavutil/common.h"
  41. #include "libavutil/imgutils.h"
  42. #include "libavutil/mem.h"
  43. #include "avcodec.h"
  44. #include "internal.h"
  45. #include "nvenc.h"
  46. #define NVENC_CAP 0x30
  47. #define BITSTREAM_BUFFER_SIZE 1024 * 1024
  48. #define LOAD_LIBRARY(l, path) \
  49. do { \
  50. if (!((l) = dlopen(path, RTLD_LAZY))) { \
  51. av_log(avctx, AV_LOG_ERROR, \
  52. "Cannot load %s\n", \
  53. path); \
  54. return AVERROR_UNKNOWN; \
  55. } \
  56. } while (0)
  57. #define LOAD_SYMBOL(fun, lib, symbol) \
  58. do { \
  59. if (!((fun) = dlsym(lib, symbol))) { \
  60. av_log(avctx, AV_LOG_ERROR, \
  61. "Cannot load %s\n", \
  62. symbol); \
  63. return AVERROR_UNKNOWN; \
  64. } \
  65. } while (0)
  66. static av_cold int nvenc_load_libraries(AVCodecContext *avctx)
  67. {
  68. NVENCContext *ctx = avctx->priv_data;
  69. NVENCLibraryContext *nvel = &ctx->nvel;
  70. PNVENCODEAPICREATEINSTANCE nvenc_create_instance;
  71. LOAD_LIBRARY(nvel->cuda, CUDA_LIBNAME);
  72. LOAD_SYMBOL(nvel->cu_init, nvel->cuda, "cuInit");
  73. LOAD_SYMBOL(nvel->cu_device_get_count, nvel->cuda, "cuDeviceGetCount");
  74. LOAD_SYMBOL(nvel->cu_device_get, nvel->cuda, "cuDeviceGet");
  75. LOAD_SYMBOL(nvel->cu_device_get_name, nvel->cuda, "cuDeviceGetName");
  76. LOAD_SYMBOL(nvel->cu_device_compute_capability, nvel->cuda,
  77. "cuDeviceComputeCapability");
  78. LOAD_SYMBOL(nvel->cu_ctx_create, nvel->cuda, "cuCtxCreate_v2");
  79. LOAD_SYMBOL(nvel->cu_ctx_pop_current, nvel->cuda, "cuCtxPopCurrent_v2");
  80. LOAD_SYMBOL(nvel->cu_ctx_destroy, nvel->cuda, "cuCtxDestroy_v2");
  81. LOAD_LIBRARY(nvel->nvenc, NVENC_LIBNAME);
  82. LOAD_SYMBOL(nvenc_create_instance, nvel->nvenc,
  83. "NvEncodeAPICreateInstance");
  84. nvel->nvenc_funcs.version = NV_ENCODE_API_FUNCTION_LIST_VER;
  85. if ((nvenc_create_instance(&nvel->nvenc_funcs)) != NV_ENC_SUCCESS) {
  86. av_log(avctx, AV_LOG_ERROR, "Cannot create the NVENC instance");
  87. return AVERROR_UNKNOWN;
  88. }
  89. return 0;
  90. }
  91. static int nvenc_open_session(AVCodecContext *avctx)
  92. {
  93. NV_ENC_OPEN_ENCODE_SESSION_EX_PARAMS params = { 0 };
  94. NVENCContext *ctx = avctx->priv_data;
  95. NV_ENCODE_API_FUNCTION_LIST *nv = &ctx->nvel.nvenc_funcs;
  96. int ret;
  97. params.version = NV_ENC_OPEN_ENCODE_SESSION_EX_PARAMS_VER;
  98. params.apiVersion = NVENCAPI_VERSION;
  99. params.device = ctx->cu_context;
  100. params.deviceType = NV_ENC_DEVICE_TYPE_CUDA;
  101. ret = nv->nvEncOpenEncodeSessionEx(&params, &ctx->nvenc_ctx);
  102. if (ret != NV_ENC_SUCCESS) {
  103. ctx->nvenc_ctx = NULL;
  104. av_log(avctx, AV_LOG_ERROR,
  105. "Cannot open the NVENC Session\n");
  106. return AVERROR_UNKNOWN;
  107. }
  108. return 0;
  109. }
  110. static int nvenc_check_codec_support(AVCodecContext *avctx)
  111. {
  112. NVENCContext *ctx = avctx->priv_data;
  113. NV_ENCODE_API_FUNCTION_LIST *nv = &ctx->nvel.nvenc_funcs;
  114. int i, ret, count = 0;
  115. GUID *guids = NULL;
  116. ret = nv->nvEncGetEncodeGUIDCount(ctx->nvenc_ctx, &count);
  117. if (ret != NV_ENC_SUCCESS || !count)
  118. return AVERROR(ENOSYS);
  119. guids = av_malloc(count * sizeof(GUID));
  120. if (!guids)
  121. return AVERROR(ENOMEM);
  122. ret = nv->nvEncGetEncodeGUIDs(ctx->nvenc_ctx, guids, count, &count);
  123. if (ret != NV_ENC_SUCCESS) {
  124. ret = AVERROR(ENOSYS);
  125. goto fail;
  126. }
  127. ret = AVERROR(ENOSYS);
  128. for (i = 0; i < count; i++) {
  129. if (!memcmp(&guids[i], &ctx->params.encodeGUID, sizeof(*guids))) {
  130. ret = 0;
  131. break;
  132. }
  133. }
  134. fail:
  135. av_free(guids);
  136. return ret;
  137. }
  138. static int nvenc_check_cap(AVCodecContext *avctx, NV_ENC_CAPS cap)
  139. {
  140. NVENCContext *ctx = avctx->priv_data;
  141. NV_ENCODE_API_FUNCTION_LIST *nv = &ctx->nvel.nvenc_funcs;
  142. NV_ENC_CAPS_PARAM params = { 0 };
  143. int ret, val = 0;
  144. params.version = NV_ENC_CAPS_PARAM_VER;
  145. params.capsToQuery = cap;
  146. ret = nv->nvEncGetEncodeCaps(ctx->nvenc_ctx, ctx->params.encodeGUID, &params, &val);
  147. if (ret == NV_ENC_SUCCESS)
  148. return val;
  149. return 0;
  150. }
  151. static int nvenc_check_capabilities(AVCodecContext *avctx)
  152. {
  153. int ret;
  154. ret = nvenc_check_codec_support(avctx);
  155. if (ret < 0) {
  156. av_log(avctx, AV_LOG_VERBOSE, "Codec not supported\n");
  157. return ret;
  158. }
  159. ret = nvenc_check_cap(avctx, NV_ENC_CAPS_SUPPORT_YUV444_ENCODE);
  160. if (avctx->pix_fmt == AV_PIX_FMT_YUV444P && ret <= 0) {
  161. av_log(avctx, AV_LOG_VERBOSE, "YUV444P not supported\n");
  162. return AVERROR(ENOSYS);
  163. }
  164. ret = nvenc_check_cap(avctx, NV_ENC_CAPS_WIDTH_MAX);
  165. if (ret < avctx->width) {
  166. av_log(avctx, AV_LOG_VERBOSE, "Width %d exceeds %d\n",
  167. avctx->width, ret);
  168. return AVERROR(ENOSYS);
  169. }
  170. ret = nvenc_check_cap(avctx, NV_ENC_CAPS_HEIGHT_MAX);
  171. if (ret < avctx->height) {
  172. av_log(avctx, AV_LOG_VERBOSE, "Height %d exceeds %d\n",
  173. avctx->height, ret);
  174. return AVERROR(ENOSYS);
  175. }
  176. ret = nvenc_check_cap(avctx, NV_ENC_CAPS_NUM_MAX_BFRAMES);
  177. if (ret < avctx->max_b_frames) {
  178. av_log(avctx, AV_LOG_VERBOSE, "Max b-frames %d exceed %d\n",
  179. avctx->max_b_frames, ret);
  180. return AVERROR(ENOSYS);
  181. }
  182. return 0;
  183. }
  184. static int nvenc_check_device(AVCodecContext *avctx, int idx)
  185. {
  186. NVENCContext *ctx = avctx->priv_data;
  187. NVENCLibraryContext *nvel = &ctx->nvel;
  188. char name[128] = { 0 };
  189. int major, minor, ret;
  190. CUdevice cu_device;
  191. CUcontext dummy;
  192. int loglevel = AV_LOG_VERBOSE;
  193. if (ctx->device == LIST_DEVICES)
  194. loglevel = AV_LOG_INFO;
  195. ret = nvel->cu_device_get(&cu_device, idx);
  196. if (ret != CUDA_SUCCESS) {
  197. av_log(avctx, AV_LOG_ERROR,
  198. "Cannot access the CUDA device %d\n",
  199. idx);
  200. return -1;
  201. }
  202. ret = nvel->cu_device_get_name(name, sizeof(name), cu_device);
  203. if (ret != CUDA_SUCCESS)
  204. return -1;
  205. ret = nvel->cu_device_compute_capability(&major, &minor, cu_device);
  206. if (ret != CUDA_SUCCESS)
  207. return -1;
  208. av_log(avctx, loglevel, "Device %d [%s] ", cu_device, name);
  209. if (((major << 4) | minor) < NVENC_CAP)
  210. goto fail;
  211. ret = nvel->cu_ctx_create(&ctx->cu_context, 0, cu_device);
  212. if (ret != CUDA_SUCCESS)
  213. goto fail;
  214. ret = nvel->cu_ctx_pop_current(&dummy);
  215. if (ret != CUDA_SUCCESS)
  216. goto fail2;
  217. if ((ret = nvenc_open_session(avctx)) < 0)
  218. goto fail2;
  219. if ((ret = nvenc_check_capabilities(avctx)) < 0)
  220. goto fail3;
  221. av_log(avctx, loglevel, "supports NVENC\n");
  222. if (ctx->device == cu_device || ctx->device == ANY_DEVICE)
  223. return 0;
  224. fail3:
  225. nvel->nvenc_funcs.nvEncDestroyEncoder(ctx->nvenc_ctx);
  226. ctx->nvenc_ctx = NULL;
  227. fail2:
  228. nvel->cu_ctx_destroy(ctx->cu_context);
  229. ctx->cu_context = NULL;
  230. fail:
  231. if (ret != 0)
  232. av_log(avctx, loglevel, "does not support NVENC (major %d minor %d)\n",
  233. major, minor);
  234. return AVERROR(ENOSYS);
  235. }
  236. static int nvenc_setup_device(AVCodecContext *avctx)
  237. {
  238. NVENCContext *ctx = avctx->priv_data;
  239. NVENCLibraryContext *nvel = &ctx->nvel;
  240. int i, nb_devices = 0;
  241. if ((nvel->cu_init(0)) != CUDA_SUCCESS) {
  242. av_log(avctx, AV_LOG_ERROR,
  243. "Cannot init CUDA\n");
  244. return AVERROR_UNKNOWN;
  245. }
  246. if ((nvel->cu_device_get_count(&nb_devices)) != CUDA_SUCCESS) {
  247. av_log(avctx, AV_LOG_ERROR,
  248. "Cannot enumerate the CUDA devices\n");
  249. return AVERROR_UNKNOWN;
  250. }
  251. switch (avctx->codec->id) {
  252. case AV_CODEC_ID_H264:
  253. ctx->params.encodeGUID = NV_ENC_CODEC_H264_GUID;
  254. break;
  255. case AV_CODEC_ID_HEVC:
  256. ctx->params.encodeGUID = NV_ENC_CODEC_HEVC_GUID;
  257. break;
  258. default:
  259. return AVERROR_BUG;
  260. }
  261. for (i = 0; i < nb_devices; ++i) {
  262. if ((nvenc_check_device(avctx, i)) >= 0 && ctx->device != LIST_DEVICES)
  263. return 0;
  264. }
  265. if (ctx->device == LIST_DEVICES)
  266. return AVERROR_EXIT;
  267. return AVERROR(ENOSYS);
  268. }
  269. typedef struct GUIDTuple {
  270. const GUID guid;
  271. int flags;
  272. } GUIDTuple;
  273. static int nvec_map_preset(NVENCContext *ctx)
  274. {
  275. GUIDTuple presets[] = {
  276. { NV_ENC_PRESET_DEFAULT_GUID },
  277. { NV_ENC_PRESET_HP_GUID },
  278. { NV_ENC_PRESET_HQ_GUID },
  279. { NV_ENC_PRESET_BD_GUID },
  280. { NV_ENC_PRESET_LOW_LATENCY_DEFAULT_GUID, NVENC_LOWLATENCY },
  281. { NV_ENC_PRESET_LOW_LATENCY_HP_GUID, NVENC_LOWLATENCY },
  282. { NV_ENC_PRESET_LOW_LATENCY_HQ_GUID, NVENC_LOWLATENCY },
  283. { NV_ENC_PRESET_LOSSLESS_DEFAULT_GUID, NVENC_LOSSLESS },
  284. { NV_ENC_PRESET_LOSSLESS_HP_GUID, NVENC_LOSSLESS },
  285. { { 0 } }
  286. };
  287. GUIDTuple *t = &presets[ctx->preset];
  288. ctx->params.presetGUID = t->guid;
  289. ctx->flags = t->flags;
  290. return AVERROR(EINVAL);
  291. }
  292. static void set_constqp(AVCodecContext *avctx, NV_ENC_RC_PARAMS *rc)
  293. {
  294. rc->rateControlMode = NV_ENC_PARAMS_RC_CONSTQP;
  295. rc->constQP.qpInterB = avctx->global_quality;
  296. rc->constQP.qpInterP = avctx->global_quality;
  297. rc->constQP.qpIntra = avctx->global_quality;
  298. }
  299. static void set_vbr(AVCodecContext *avctx, NV_ENC_RC_PARAMS *rc)
  300. {
  301. if (avctx->qmin >= 0) {
  302. rc->enableMinQP = 1;
  303. rc->minQP.qpInterB = avctx->qmin;
  304. rc->minQP.qpInterP = avctx->qmin;
  305. rc->minQP.qpIntra = avctx->qmin;
  306. }
  307. if (avctx->qmax >= 0) {
  308. rc->enableMaxQP = 1;
  309. rc->maxQP.qpInterB = avctx->qmax;
  310. rc->maxQP.qpInterP = avctx->qmax;
  311. rc->maxQP.qpIntra = avctx->qmax;
  312. }
  313. }
  314. static void nvenc_override_rate_control(AVCodecContext *avctx,
  315. NV_ENC_RC_PARAMS *rc)
  316. {
  317. NVENCContext *ctx = avctx->priv_data;
  318. switch (ctx->rc) {
  319. case NV_ENC_PARAMS_RC_CONSTQP:
  320. if (avctx->global_quality < 0) {
  321. av_log(avctx, AV_LOG_WARNING,
  322. "The constant quality rate-control requires "
  323. "the 'global_quality' option set.\n");
  324. return;
  325. }
  326. set_constqp(avctx, rc);
  327. return;
  328. case NV_ENC_PARAMS_RC_2_PASS_VBR:
  329. case NV_ENC_PARAMS_RC_VBR:
  330. if (avctx->qmin < 0 && avctx->qmax < 0) {
  331. av_log(avctx, AV_LOG_WARNING,
  332. "The variable bitrate rate-control requires "
  333. "the 'qmin' and/or 'qmax' option set.\n");
  334. return;
  335. }
  336. case NV_ENC_PARAMS_RC_VBR_MINQP:
  337. if (avctx->qmin < 0) {
  338. av_log(avctx, AV_LOG_WARNING,
  339. "The variable bitrate rate-control requires "
  340. "the 'qmin' option set.\n");
  341. return;
  342. }
  343. set_vbr(avctx, rc);
  344. break;
  345. case NV_ENC_PARAMS_RC_CBR:
  346. break;
  347. case NV_ENC_PARAMS_RC_2_PASS_QUALITY:
  348. case NV_ENC_PARAMS_RC_2_PASS_FRAMESIZE_CAP:
  349. if (!(ctx->flags & NVENC_LOWLATENCY)) {
  350. av_log(avctx, AV_LOG_WARNING,
  351. "The multipass rate-control requires "
  352. "a low-latency preset.\n");
  353. return;
  354. }
  355. }
  356. rc->rateControlMode = ctx->rc;
  357. }
  358. static void nvenc_setup_rate_control(AVCodecContext *avctx)
  359. {
  360. NVENCContext *ctx = avctx->priv_data;
  361. NV_ENC_RC_PARAMS *rc = &ctx->config.rcParams;
  362. if (avctx->bit_rate > 0)
  363. rc->averageBitRate = avctx->bit_rate;
  364. if (avctx->rc_max_rate > 0)
  365. rc->maxBitRate = avctx->rc_max_rate;
  366. if (ctx->rc > 0) {
  367. nvenc_override_rate_control(avctx, rc);
  368. } else if (avctx->global_quality > 0) {
  369. set_constqp(avctx, rc);
  370. } else if (avctx->qmin >= 0 && avctx->qmax >= 0) {
  371. rc->rateControlMode = NV_ENC_PARAMS_RC_VBR;
  372. set_vbr(avctx, rc);
  373. }
  374. if (avctx->rc_buffer_size > 0)
  375. rc->vbvBufferSize = avctx->rc_buffer_size;
  376. if (rc->averageBitRate > 0)
  377. avctx->bit_rate = rc->averageBitRate;
  378. }
  379. static int nvenc_setup_h264_config(AVCodecContext *avctx)
  380. {
  381. NVENCContext *ctx = avctx->priv_data;
  382. NV_ENC_CONFIG *cc = &ctx->config;
  383. NV_ENC_CONFIG_H264 *h264 = &cc->encodeCodecConfig.h264Config;
  384. NV_ENC_CONFIG_H264_VUI_PARAMETERS *vui = &h264->h264VUIParameters;
  385. vui->colourDescriptionPresentFlag = 1;
  386. vui->videoSignalTypePresentFlag = 1;
  387. vui->colourMatrix = avctx->colorspace;
  388. vui->colourPrimaries = avctx->color_primaries;
  389. vui->transferCharacteristics = avctx->color_trc;
  390. vui->videoFullRangeFlag = avctx->color_range == AVCOL_RANGE_JPEG;
  391. h264->disableSPSPPS = (avctx->flags & CODEC_FLAG_GLOBAL_HEADER) ? 1 : 0;
  392. h264->repeatSPSPPS = (avctx->flags & CODEC_FLAG_GLOBAL_HEADER) ? 0 : 1;
  393. h264->maxNumRefFrames = avctx->refs;
  394. h264->idrPeriod = cc->gopLength;
  395. if (ctx->profile)
  396. avctx->profile = ctx->profile;
  397. if (avctx->pix_fmt == AV_PIX_FMT_YUV444P)
  398. h264->chromaFormatIDC = 3;
  399. else
  400. h264->chromaFormatIDC = 1;
  401. switch (ctx->profile) {
  402. case NV_ENC_H264_PROFILE_BASELINE:
  403. cc->profileGUID = NV_ENC_H264_PROFILE_BASELINE_GUID;
  404. break;
  405. case NV_ENC_H264_PROFILE_MAIN:
  406. cc->profileGUID = NV_ENC_H264_PROFILE_MAIN_GUID;
  407. break;
  408. case NV_ENC_H264_PROFILE_HIGH:
  409. cc->profileGUID = NV_ENC_H264_PROFILE_HIGH_GUID;
  410. break;
  411. case NV_ENC_H264_PROFILE_HIGH_444:
  412. cc->profileGUID = NV_ENC_H264_PROFILE_HIGH_444_GUID;
  413. break;
  414. case NV_ENC_H264_PROFILE_CONSTRAINED_HIGH:
  415. cc->profileGUID = NV_ENC_H264_PROFILE_CONSTRAINED_HIGH_GUID;
  416. break;
  417. }
  418. h264->level = ctx->level;
  419. return 0;
  420. }
  421. static int nvenc_setup_hevc_config(AVCodecContext *avctx)
  422. {
  423. NVENCContext *ctx = avctx->priv_data;
  424. NV_ENC_CONFIG *cc = &ctx->config;
  425. NV_ENC_CONFIG_HEVC *hevc = &cc->encodeCodecConfig.hevcConfig;
  426. hevc->disableSPSPPS = (avctx->flags & CODEC_FLAG_GLOBAL_HEADER) ? 1 : 0;
  427. hevc->repeatSPSPPS = (avctx->flags & CODEC_FLAG_GLOBAL_HEADER) ? 0 : 1;
  428. hevc->maxNumRefFramesInDPB = avctx->refs;
  429. hevc->idrPeriod = cc->gopLength;
  430. /* No other profile is supported in the current SDK version 5 */
  431. cc->profileGUID = NV_ENC_HEVC_PROFILE_MAIN_GUID;
  432. avctx->profile = FF_PROFILE_HEVC_MAIN;
  433. if (ctx->level) {
  434. hevc->level = ctx->level;
  435. } else {
  436. hevc->level = NV_ENC_LEVEL_AUTOSELECT;
  437. }
  438. if (ctx->tier) {
  439. hevc->tier = ctx->tier;
  440. }
  441. return 0;
  442. }
  443. static int nvenc_setup_codec_config(AVCodecContext *avctx)
  444. {
  445. switch (avctx->codec->id) {
  446. case AV_CODEC_ID_H264:
  447. return nvenc_setup_h264_config(avctx);
  448. case AV_CODEC_ID_HEVC:
  449. return nvenc_setup_hevc_config(avctx);
  450. }
  451. return 0;
  452. }
  453. static int nvenc_setup_encoder(AVCodecContext *avctx)
  454. {
  455. NVENCContext *ctx = avctx->priv_data;
  456. NV_ENCODE_API_FUNCTION_LIST *nv = &ctx->nvel.nvenc_funcs;
  457. NV_ENC_PRESET_CONFIG preset_cfg = { 0 };
  458. int ret;
  459. ctx->params.version = NV_ENC_INITIALIZE_PARAMS_VER;
  460. ctx->params.encodeHeight = avctx->height;
  461. ctx->params.encodeWidth = avctx->width;
  462. if (avctx->sample_aspect_ratio.num &&
  463. avctx->sample_aspect_ratio.den &&
  464. (avctx->sample_aspect_ratio.num != 1 ||
  465. avctx->sample_aspect_ratio.den != 1)) {
  466. av_reduce(&ctx->params.darWidth,
  467. &ctx->params.darHeight,
  468. avctx->width * avctx->sample_aspect_ratio.num,
  469. avctx->height * avctx->sample_aspect_ratio.den,
  470. INT_MAX / 8);
  471. } else {
  472. ctx->params.darHeight = avctx->height;
  473. ctx->params.darWidth = avctx->width;
  474. }
  475. ctx->params.frameRateNum = avctx->time_base.den;
  476. ctx->params.frameRateDen = avctx->time_base.num * avctx->ticks_per_frame;
  477. ctx->params.enableEncodeAsync = 0;
  478. ctx->params.enablePTD = 1;
  479. ctx->params.encodeConfig = &ctx->config;
  480. nvec_map_preset(ctx);
  481. preset_cfg.version = NV_ENC_PRESET_CONFIG_VER;
  482. preset_cfg.presetCfg.version = NV_ENC_CONFIG_VER;
  483. ret = nv->nvEncGetEncodePresetConfig(ctx->nvenc_ctx,
  484. ctx->params.encodeGUID,
  485. ctx->params.presetGUID,
  486. &preset_cfg);
  487. if (ret != NV_ENC_SUCCESS) {
  488. av_log(avctx, AV_LOG_ERROR,
  489. "Cannot get the preset configuration\n");
  490. return AVERROR_UNKNOWN;
  491. }
  492. memcpy(&ctx->config, &preset_cfg.presetCfg, sizeof(ctx->config));
  493. ctx->config.version = NV_ENC_CONFIG_VER;
  494. if (avctx->gop_size > 0) {
  495. if (avctx->max_b_frames > 0) {
  496. ctx->last_dts = -2;
  497. /* 0 is intra-only,
  498. * 1 is I/P only,
  499. * 2 is one B Frame,
  500. * 3 two B frames, and so on. */
  501. ctx->config.frameIntervalP = avctx->max_b_frames + 1;
  502. } else if (avctx->max_b_frames == 0) {
  503. ctx->config.frameIntervalP = 1;
  504. }
  505. ctx->config.gopLength = avctx->gop_size;
  506. } else if (avctx->gop_size == 0) {
  507. ctx->config.frameIntervalP = 0;
  508. ctx->config.gopLength = 1;
  509. }
  510. if (ctx->config.frameIntervalP > 1)
  511. avctx->max_b_frames = ctx->config.frameIntervalP - 1;
  512. nvenc_setup_rate_control(avctx);
  513. if (avctx->flags & CODEC_FLAG_INTERLACED_DCT) {
  514. ctx->config.frameFieldMode = NV_ENC_PARAMS_FRAME_FIELD_MODE_FIELD;
  515. } else {
  516. ctx->config.frameFieldMode = NV_ENC_PARAMS_FRAME_FIELD_MODE_FRAME;
  517. }
  518. if ((ret = nvenc_setup_codec_config(avctx)) < 0)
  519. return ret;
  520. ret = nv->nvEncInitializeEncoder(ctx->nvenc_ctx, &ctx->params);
  521. if (ret != NV_ENC_SUCCESS) {
  522. av_log(avctx, AV_LOG_ERROR, "Cannot initialize the decoder");
  523. return AVERROR_UNKNOWN;
  524. }
  525. return 0;
  526. }
  527. static int nvenc_alloc_surface(AVCodecContext *avctx, int idx)
  528. {
  529. NVENCContext *ctx = avctx->priv_data;
  530. NV_ENCODE_API_FUNCTION_LIST *nv = &ctx->nvel.nvenc_funcs;
  531. int ret;
  532. NV_ENC_CREATE_INPUT_BUFFER in_buffer = { 0 };
  533. NV_ENC_CREATE_BITSTREAM_BUFFER out_buffer = { 0 };
  534. in_buffer.version = NV_ENC_CREATE_INPUT_BUFFER_VER;
  535. out_buffer.version = NV_ENC_CREATE_BITSTREAM_BUFFER_VER;
  536. in_buffer.width = avctx->width;
  537. in_buffer.height = avctx->height;
  538. in_buffer.memoryHeap = NV_ENC_MEMORY_HEAP_SYSMEM_UNCACHED;
  539. switch (avctx->pix_fmt) {
  540. case AV_PIX_FMT_YUV420P:
  541. in_buffer.bufferFmt = NV_ENC_BUFFER_FORMAT_YV12_PL;
  542. break;
  543. case AV_PIX_FMT_NV12:
  544. in_buffer.bufferFmt = NV_ENC_BUFFER_FORMAT_NV12_PL;
  545. break;
  546. case AV_PIX_FMT_YUV444P:
  547. in_buffer.bufferFmt = NV_ENC_BUFFER_FORMAT_YUV444_PL;
  548. break;
  549. default:
  550. return AVERROR_BUG;
  551. }
  552. ret = nv->nvEncCreateInputBuffer(ctx->nvenc_ctx, &in_buffer);
  553. if (ret != NV_ENC_SUCCESS) {
  554. av_log(avctx, AV_LOG_ERROR, "CreateInputBuffer failed\n");
  555. return AVERROR_UNKNOWN;
  556. }
  557. ctx->in[idx].in = in_buffer.inputBuffer;
  558. ctx->in[idx].format = in_buffer.bufferFmt;
  559. /* 1MB is large enough to hold most output frames.
  560. * NVENC increases this automaticaly if it's not enough. */
  561. out_buffer.size = BITSTREAM_BUFFER_SIZE;
  562. out_buffer.memoryHeap = NV_ENC_MEMORY_HEAP_SYSMEM_UNCACHED;
  563. ret = nv->nvEncCreateBitstreamBuffer(ctx->nvenc_ctx, &out_buffer);
  564. if (ret != NV_ENC_SUCCESS) {
  565. av_log(avctx, AV_LOG_ERROR, "CreateBitstreamBuffer failed\n");
  566. return AVERROR_UNKNOWN;
  567. }
  568. ctx->out[idx].out = out_buffer.bitstreamBuffer;
  569. ctx->out[idx].busy = 0;
  570. return 0;
  571. }
  572. static int nvenc_setup_surfaces(AVCodecContext *avctx)
  573. {
  574. NVENCContext *ctx = avctx->priv_data;
  575. int i, ret;
  576. ctx->nb_surfaces = FFMAX(4 + avctx->max_b_frames,
  577. ctx->nb_surfaces);
  578. ctx->in = av_mallocz(ctx->nb_surfaces * sizeof(*ctx->in));
  579. if (!ctx->in)
  580. return AVERROR(ENOMEM);
  581. ctx->out = av_mallocz(ctx->nb_surfaces * sizeof(*ctx->out));
  582. if (!ctx->out)
  583. return AVERROR(ENOMEM);
  584. ctx->timestamps = av_fifo_alloc(ctx->nb_surfaces * sizeof(int64_t));
  585. if (!ctx->timestamps)
  586. return AVERROR(ENOMEM);
  587. ctx->pending = av_fifo_alloc(ctx->nb_surfaces * sizeof(ctx->out));
  588. if (!ctx->pending)
  589. return AVERROR(ENOMEM);
  590. ctx->ready = av_fifo_alloc(ctx->nb_surfaces * sizeof(ctx->out));
  591. if (!ctx->ready)
  592. return AVERROR(ENOMEM);
  593. for (i = 0; i < ctx->nb_surfaces; i++) {
  594. if ((ret = nvenc_alloc_surface(avctx, i)) < 0)
  595. return ret;
  596. }
  597. return 0;
  598. }
  599. #define EXTRADATA_SIZE 512
  600. static int nvenc_setup_extradata(AVCodecContext *avctx)
  601. {
  602. NVENCContext *ctx = avctx->priv_data;
  603. NV_ENCODE_API_FUNCTION_LIST *nv = &ctx->nvel.nvenc_funcs;
  604. NV_ENC_SEQUENCE_PARAM_PAYLOAD payload = { 0 };
  605. int ret;
  606. avctx->extradata = av_mallocz(EXTRADATA_SIZE + FF_INPUT_BUFFER_PADDING_SIZE);
  607. if (!avctx->extradata)
  608. return AVERROR(ENOMEM);
  609. payload.version = NV_ENC_SEQUENCE_PARAM_PAYLOAD_VER;
  610. payload.spsppsBuffer = avctx->extradata;
  611. payload.inBufferSize = EXTRADATA_SIZE;
  612. payload.outSPSPPSPayloadSize = &avctx->extradata_size;
  613. ret = nv->nvEncGetSequenceParams(ctx->nvenc_ctx, &payload);
  614. if (ret != NV_ENC_SUCCESS) {
  615. av_log(avctx, AV_LOG_ERROR, "Cannot get the extradata\n");
  616. return AVERROR_UNKNOWN;
  617. }
  618. return 0;
  619. }
  620. av_cold int ff_nvenc_encode_close(AVCodecContext *avctx)
  621. {
  622. NVENCContext *ctx = avctx->priv_data;
  623. NV_ENCODE_API_FUNCTION_LIST *nv = &ctx->nvel.nvenc_funcs;
  624. int i;
  625. av_frame_free(&avctx->coded_frame);
  626. if (ctx->in) {
  627. for (i = 0; i < ctx->nb_surfaces; ++i) {
  628. nv->nvEncDestroyInputBuffer(ctx->nvenc_ctx, ctx->in[i].in);
  629. nv->nvEncDestroyBitstreamBuffer(ctx->nvenc_ctx, ctx->out[i].out);
  630. }
  631. }
  632. av_freep(&ctx->in);
  633. av_freep(&ctx->out);
  634. if (ctx->nvenc_ctx)
  635. nv->nvEncDestroyEncoder(ctx->nvenc_ctx);
  636. if (ctx->cu_context)
  637. ctx->nvel.cu_ctx_destroy(ctx->cu_context);
  638. if (ctx->nvel.nvenc)
  639. dlclose(ctx->nvel.nvenc);
  640. if (ctx->nvel.cuda)
  641. dlclose(ctx->nvel.cuda);
  642. return 0;
  643. }
  644. av_cold int ff_nvenc_encode_init(AVCodecContext *avctx)
  645. {
  646. int ret;
  647. if ((ret = nvenc_load_libraries(avctx)) < 0)
  648. return ret;
  649. if ((ret = nvenc_setup_device(avctx)) < 0)
  650. return ret;
  651. if ((ret = nvenc_setup_encoder(avctx)) < 0)
  652. return ret;
  653. if ((ret = nvenc_setup_surfaces(avctx)) < 0)
  654. return ret;
  655. if (avctx->flags & CODEC_FLAG_GLOBAL_HEADER) {
  656. if ((ret = nvenc_setup_extradata(avctx)) < 0)
  657. return ret;
  658. }
  659. avctx->coded_frame = av_frame_alloc();
  660. if (!avctx->coded_frame)
  661. return AVERROR(ENOMEM);
  662. return 0;
  663. }
  664. static NVENCInputSurface *get_input_surface(NVENCContext *ctx)
  665. {
  666. int i;
  667. for (i = 0; i < ctx->nb_surfaces; i++) {
  668. if (!ctx->in[i].locked) {
  669. ctx->in[i].locked = 1;
  670. return &ctx->in[i];
  671. }
  672. }
  673. return NULL;
  674. }
  675. static NVENCOutputSurface *get_output_surface(NVENCContext *ctx)
  676. {
  677. int i;
  678. for (i = 0; i < ctx->nb_surfaces; i++) {
  679. if (!ctx->out[i].busy) {
  680. return &ctx->out[i];
  681. }
  682. }
  683. return NULL;
  684. }
  685. static int nvenc_copy_frame(NV_ENC_LOCK_INPUT_BUFFER *in, const AVFrame *frame)
  686. {
  687. uint8_t *buf = in->bufferDataPtr;
  688. int off = frame->height * in->pitch;
  689. switch (frame->format) {
  690. case AV_PIX_FMT_YUV420P:
  691. av_image_copy_plane(buf, in->pitch,
  692. frame->data[0], frame->linesize[0],
  693. frame->width, frame->height);
  694. buf += off;
  695. av_image_copy_plane(buf, in->pitch >> 1,
  696. frame->data[2], frame->linesize[2],
  697. frame->width >> 1, frame->height >> 1);
  698. buf += off >> 2;
  699. av_image_copy_plane(buf, in->pitch >> 1,
  700. frame->data[1], frame->linesize[1],
  701. frame->width >> 1, frame->height >> 1);
  702. break;
  703. case AV_PIX_FMT_NV12:
  704. av_image_copy_plane(buf, in->pitch,
  705. frame->data[0], frame->linesize[0],
  706. frame->width, frame->height);
  707. buf += off;
  708. av_image_copy_plane(buf, in->pitch,
  709. frame->data[1], frame->linesize[1],
  710. frame->width, frame->height >> 1);
  711. break;
  712. case AV_PIX_FMT_YUV444P:
  713. av_image_copy_plane(buf, in->pitch,
  714. frame->data[0], frame->linesize[0],
  715. frame->width, frame->height);
  716. buf += off;
  717. av_image_copy_plane(buf, in->pitch,
  718. frame->data[1], frame->linesize[1],
  719. frame->width, frame->height);
  720. buf += off;
  721. av_image_copy_plane(buf, in->pitch,
  722. frame->data[2], frame->linesize[2],
  723. frame->width, frame->height);
  724. break;
  725. default:
  726. return AVERROR_BUG;
  727. }
  728. return 0;
  729. }
  730. static int nvenc_enqueue_frame(AVCodecContext *avctx, const AVFrame *frame,
  731. NVENCInputSurface **in_surf)
  732. {
  733. NVENCContext *ctx = avctx->priv_data;
  734. NV_ENCODE_API_FUNCTION_LIST *nv = &ctx->nvel.nvenc_funcs;
  735. NV_ENC_LOCK_INPUT_BUFFER params = { 0 };
  736. NVENCInputSurface *in = get_input_surface(ctx);
  737. int ret;
  738. if (!in)
  739. return AVERROR_BUG;
  740. params.version = NV_ENC_LOCK_INPUT_BUFFER_VER;
  741. params.inputBuffer = in->in;
  742. ret = nv->nvEncLockInputBuffer(ctx->nvenc_ctx, &params);
  743. if (ret != NV_ENC_SUCCESS) {
  744. av_log(avctx, AV_LOG_ERROR, "Cannot lock the buffer %p.\n",
  745. in);
  746. return AVERROR_UNKNOWN;
  747. }
  748. ret = nvenc_copy_frame(&params, frame);
  749. if (ret < 0)
  750. goto fail;
  751. ret = nv->nvEncUnlockInputBuffer(ctx->nvenc_ctx, in->in);
  752. if (ret != NV_ENC_SUCCESS) {
  753. av_log(avctx, AV_LOG_ERROR, "Cannot unlock the buffer %p.\n",
  754. in);
  755. return AVERROR_UNKNOWN;
  756. }
  757. *in_surf = in;
  758. return 0;
  759. fail:
  760. nv->nvEncUnlockInputBuffer(ctx->nvenc_ctx, in->in);
  761. return ret;
  762. }
  763. static void nvenc_codec_specific_pic_params(AVCodecContext *avctx,
  764. NV_ENC_PIC_PARAMS *params)
  765. {
  766. NVENCContext *ctx = avctx->priv_data;
  767. switch (avctx->codec->id) {
  768. case AV_CODEC_ID_H264:
  769. params->codecPicParams.h264PicParams.sliceMode =
  770. ctx->config.encodeCodecConfig.h264Config.sliceMode;
  771. params->codecPicParams.h264PicParams.sliceModeData =
  772. ctx->config.encodeCodecConfig.h264Config.sliceModeData;
  773. break;
  774. case AV_CODEC_ID_HEVC:
  775. params->codecPicParams.hevcPicParams.sliceMode =
  776. ctx->config.encodeCodecConfig.hevcConfig.sliceMode;
  777. params->codecPicParams.hevcPicParams.sliceModeData =
  778. ctx->config.encodeCodecConfig.hevcConfig.sliceModeData;
  779. break;
  780. }
  781. }
  782. static inline int nvenc_enqueue_timestamp(AVFifoBuffer *f, int64_t pts)
  783. {
  784. return av_fifo_generic_write(f, &pts, sizeof(pts), NULL);
  785. }
  786. static inline int nvenc_dequeue_timestamp(AVFifoBuffer *f, int64_t *pts)
  787. {
  788. return av_fifo_generic_read(f, pts, sizeof(*pts), NULL);
  789. }
  790. static inline int nvenc_enqueue_surface(AVFifoBuffer *f,
  791. NVENCOutputSurface *surf)
  792. {
  793. surf->busy = 1;
  794. return av_fifo_generic_write(f, &surf, sizeof(surf), NULL);
  795. }
  796. static inline int nvenc_dequeue_surface(AVFifoBuffer *f,
  797. NVENCOutputSurface **surf)
  798. {
  799. return av_fifo_generic_read(f, surf, sizeof(*surf), NULL);
  800. }
  801. static int nvenc_set_timestamp(NVENCContext *ctx,
  802. NV_ENC_LOCK_BITSTREAM *params,
  803. AVPacket *pkt)
  804. {
  805. pkt->pts = params->outputTimeStamp;
  806. pkt->duration = params->outputDuration;
  807. return nvenc_dequeue_timestamp(ctx->timestamps, &pkt->dts);
  808. }
  809. static int nvenc_get_frame(AVCodecContext *avctx, AVPacket *pkt)
  810. {
  811. NVENCContext *ctx = avctx->priv_data;
  812. NV_ENCODE_API_FUNCTION_LIST *nv = &ctx->nvel.nvenc_funcs;
  813. NV_ENC_LOCK_BITSTREAM params = { 0 };
  814. NVENCOutputSurface *out = NULL;
  815. int ret;
  816. ret = nvenc_dequeue_surface(ctx->pending, &out);
  817. if (ret)
  818. return ret;
  819. params.version = NV_ENC_LOCK_BITSTREAM_VER;
  820. params.outputBitstream = out->out;
  821. ret = nv->nvEncLockBitstream(ctx->nvenc_ctx, &params);
  822. if (ret < 0)
  823. return AVERROR_UNKNOWN;
  824. ret = ff_alloc_packet(pkt, params.bitstreamSizeInBytes);
  825. if (ret < 0)
  826. return ret;
  827. memcpy(pkt->data, params.bitstreamBufferPtr, pkt->size);
  828. ret = nv->nvEncUnlockBitstream(ctx->nvenc_ctx, out->out);
  829. if (ret < 0)
  830. return AVERROR_UNKNOWN;
  831. out->busy = out->in->locked = 0;
  832. ret = nvenc_set_timestamp(ctx, &params, pkt);
  833. if (ret < 0)
  834. return ret;
  835. switch (params.pictureType) {
  836. case NV_ENC_PIC_TYPE_IDR:
  837. pkt->flags |= AV_PKT_FLAG_KEY;
  838. case NV_ENC_PIC_TYPE_INTRA_REFRESH:
  839. case NV_ENC_PIC_TYPE_I:
  840. avctx->coded_frame->pict_type = AV_PICTURE_TYPE_I;
  841. break;
  842. case NV_ENC_PIC_TYPE_P:
  843. avctx->coded_frame->pict_type = AV_PICTURE_TYPE_P;
  844. break;
  845. case NV_ENC_PIC_TYPE_B:
  846. avctx->coded_frame->pict_type = AV_PICTURE_TYPE_B;
  847. break;
  848. case NV_ENC_PIC_TYPE_BI:
  849. avctx->coded_frame->pict_type = AV_PICTURE_TYPE_BI;
  850. break;
  851. }
  852. return 0;
  853. }
  854. int ff_nvenc_encode_frame(AVCodecContext *avctx, AVPacket *pkt,
  855. const AVFrame *frame, int *got_packet)
  856. {
  857. NVENCContext *ctx = avctx->priv_data;
  858. NV_ENCODE_API_FUNCTION_LIST *nv = &ctx->nvel.nvenc_funcs;
  859. NV_ENC_PIC_PARAMS params = { 0 };
  860. NVENCInputSurface *in = NULL;
  861. NVENCOutputSurface *out = NULL;
  862. int ret;
  863. params.version = NV_ENC_PIC_PARAMS_VER;
  864. if (frame) {
  865. ret = nvenc_enqueue_frame(avctx, frame, &in);
  866. if (ret < 0)
  867. return ret;
  868. out = get_output_surface(ctx);
  869. if (!out)
  870. return AVERROR_BUG;
  871. out->in = in;
  872. params.inputBuffer = in->in;
  873. params.bufferFmt = in->format;
  874. params.inputWidth = frame->width;
  875. params.inputHeight = frame->height;
  876. params.outputBitstream = out->out;
  877. params.inputTimeStamp = frame->pts;
  878. if (avctx->flags & CODEC_FLAG_INTERLACED_DCT) {
  879. if (frame->top_field_first)
  880. params.pictureStruct = NV_ENC_PIC_STRUCT_FIELD_TOP_BOTTOM;
  881. else
  882. params.pictureStruct = NV_ENC_PIC_STRUCT_FIELD_BOTTOM_TOP;
  883. } else {
  884. params.pictureStruct = NV_ENC_PIC_STRUCT_FRAME;
  885. }
  886. nvenc_codec_specific_pic_params(avctx, &params);
  887. ret = nvenc_enqueue_timestamp(ctx->timestamps, frame->pts);
  888. if (ret < 0)
  889. return ret;
  890. } else {
  891. params.encodePicFlags = NV_ENC_PIC_FLAG_EOS;
  892. }
  893. ret = nv->nvEncEncodePicture(ctx->nvenc_ctx, &params);
  894. if (ret != NV_ENC_SUCCESS &&
  895. ret != NV_ENC_ERR_NEED_MORE_INPUT) {
  896. return AVERROR_UNKNOWN;
  897. }
  898. if (out) {
  899. ret = nvenc_enqueue_surface(ctx->pending, out);
  900. if (ret < 0)
  901. return ret;
  902. }
  903. if (ret != NV_ENC_ERR_NEED_MORE_INPUT &&
  904. av_fifo_size(ctx->pending)) {
  905. ret = nvenc_get_frame(avctx, pkt);
  906. if (ret < 0)
  907. return ret;
  908. *got_packet = 1;
  909. } else {
  910. *got_packet = 0;
  911. }
  912. return 0;
  913. }