You can not select more than 25 topics Topics must start with a letter or number, can include dashes ('-') and can be up to 35 characters long.

1895 lines
65KB

  1. /*
  2. * H.264 hardware encoding using nvidia nvenc
  3. * Copyright (c) 2014 Timo Rothenpieler <timo@rothenpieler.org>
  4. *
  5. * This file is part of FFmpeg.
  6. *
  7. * FFmpeg is free software; you can redistribute it and/or
  8. * modify it under the terms of the GNU Lesser General Public
  9. * License as published by the Free Software Foundation; either
  10. * version 2.1 of the License, or (at your option) any later version.
  11. *
  12. * FFmpeg is distributed in the hope that it will be useful,
  13. * but WITHOUT ANY WARRANTY; without even the implied warranty of
  14. * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
  15. * Lesser General Public License for more details.
  16. *
  17. * You should have received a copy of the GNU Lesser General Public
  18. * License along with FFmpeg; if not, write to the Free Software
  19. * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
  20. */
  21. #if defined(_WIN32)
  22. #include <windows.h>
  23. #else
  24. #include <dlfcn.h>
  25. #endif
  26. #include <nvEncodeAPI.h>
  27. #include "libavutil/fifo.h"
  28. #include "libavutil/internal.h"
  29. #include "libavutil/imgutils.h"
  30. #include "libavutil/avassert.h"
  31. #include "libavutil/opt.h"
  32. #include "libavutil/mem.h"
  33. #include "libavutil/hwcontext.h"
  34. #include "avcodec.h"
  35. #include "internal.h"
  36. #include "thread.h"
  37. #if CONFIG_CUDA
  38. #include <cuda.h>
  39. #include "libavutil/hwcontext_cuda.h"
  40. #else
  41. #if defined(_WIN32)
  42. #define CUDAAPI __stdcall
  43. #else
  44. #define CUDAAPI
  45. #endif
  46. typedef enum cudaError_enum {
  47. CUDA_SUCCESS = 0
  48. } CUresult;
  49. typedef int CUdevice;
  50. typedef void* CUcontext;
  51. typedef void* CUdeviceptr;
  52. #endif
  53. #if defined(_WIN32)
  54. #define LOAD_FUNC(l, s) GetProcAddress(l, s)
  55. #define DL_CLOSE_FUNC(l) FreeLibrary(l)
  56. #else
  57. #define LOAD_FUNC(l, s) dlsym(l, s)
  58. #define DL_CLOSE_FUNC(l) dlclose(l)
  59. #endif
  60. typedef CUresult(CUDAAPI *PCUINIT)(unsigned int Flags);
  61. typedef CUresult(CUDAAPI *PCUDEVICEGETCOUNT)(int *count);
  62. typedef CUresult(CUDAAPI *PCUDEVICEGET)(CUdevice *device, int ordinal);
  63. typedef CUresult(CUDAAPI *PCUDEVICEGETNAME)(char *name, int len, CUdevice dev);
  64. typedef CUresult(CUDAAPI *PCUDEVICECOMPUTECAPABILITY)(int *major, int *minor, CUdevice dev);
  65. typedef CUresult(CUDAAPI *PCUCTXCREATE)(CUcontext *pctx, unsigned int flags, CUdevice dev);
  66. typedef CUresult(CUDAAPI *PCUCTXPOPCURRENT)(CUcontext *pctx);
  67. typedef CUresult(CUDAAPI *PCUCTXDESTROY)(CUcontext ctx);
  68. typedef NVENCSTATUS (NVENCAPI* PNVENCODEAPICREATEINSTANCE)(NV_ENCODE_API_FUNCTION_LIST *functionList);
  69. #define MAX_REGISTERED_FRAMES 64
  70. typedef struct NvencSurface
  71. {
  72. NV_ENC_INPUT_PTR input_surface;
  73. AVFrame *in_ref;
  74. NV_ENC_MAP_INPUT_RESOURCE in_map;
  75. int reg_idx;
  76. int width;
  77. int height;
  78. int lockCount;
  79. NV_ENC_BUFFER_FORMAT format;
  80. NV_ENC_OUTPUT_PTR output_surface;
  81. int size;
  82. } NvencSurface;
  83. typedef struct NvencData
  84. {
  85. union {
  86. int64_t timestamp;
  87. NvencSurface *surface;
  88. } u;
  89. } NvencData;
  90. typedef struct NvencDynLoadFunctions
  91. {
  92. PCUINIT cu_init;
  93. PCUDEVICEGETCOUNT cu_device_get_count;
  94. PCUDEVICEGET cu_device_get;
  95. PCUDEVICEGETNAME cu_device_get_name;
  96. PCUDEVICECOMPUTECAPABILITY cu_device_compute_capability;
  97. PCUCTXCREATE cu_ctx_create;
  98. PCUCTXPOPCURRENT cu_ctx_pop_current;
  99. PCUCTXDESTROY cu_ctx_destroy;
  100. NV_ENCODE_API_FUNCTION_LIST nvenc_funcs;
  101. int nvenc_device_count;
  102. CUdevice nvenc_devices[16];
  103. #if !CONFIG_CUDA
  104. #if defined(_WIN32)
  105. HMODULE cuda_lib;
  106. #else
  107. void* cuda_lib;
  108. #endif
  109. #endif
  110. #if defined(_WIN32)
  111. HMODULE nvenc_lib;
  112. #else
  113. void* nvenc_lib;
  114. #endif
  115. } NvencDynLoadFunctions;
  116. typedef struct NvencValuePair
  117. {
  118. const char *str;
  119. uint32_t num;
  120. } NvencValuePair;
  121. typedef struct NvencContext
  122. {
  123. AVClass *avclass;
  124. NvencDynLoadFunctions nvenc_dload_funcs;
  125. NV_ENC_INITIALIZE_PARAMS init_encode_params;
  126. NV_ENC_CONFIG encode_config;
  127. CUcontext cu_context;
  128. CUcontext cu_context_internal;
  129. int max_surface_count;
  130. NvencSurface *surfaces;
  131. AVFifoBuffer *output_surface_queue;
  132. AVFifoBuffer *output_surface_ready_queue;
  133. AVFifoBuffer *timestamp_list;
  134. int64_t last_dts;
  135. struct {
  136. CUdeviceptr ptr;
  137. NV_ENC_REGISTERED_PTR regptr;
  138. int mapped;
  139. } registered_frames[MAX_REGISTERED_FRAMES];
  140. int nb_registered_frames;
  141. /* the actual data pixel format, different from
  142. * AVCodecContext.pix_fmt when using hwaccel frames on input */
  143. enum AVPixelFormat data_pix_fmt;
  144. void *nvencoder;
  145. char *preset;
  146. char *profile;
  147. char *level;
  148. char *tier;
  149. int cbr;
  150. int twopass;
  151. int gpu;
  152. int buffer_delay;
  153. } NvencContext;
  154. static const NvencValuePair nvenc_h264_level_pairs[] = {
  155. { "auto", NV_ENC_LEVEL_AUTOSELECT },
  156. { "1" , NV_ENC_LEVEL_H264_1 },
  157. { "1.0" , NV_ENC_LEVEL_H264_1 },
  158. { "1b" , NV_ENC_LEVEL_H264_1b },
  159. { "1.0b", NV_ENC_LEVEL_H264_1b },
  160. { "1.1" , NV_ENC_LEVEL_H264_11 },
  161. { "1.2" , NV_ENC_LEVEL_H264_12 },
  162. { "1.3" , NV_ENC_LEVEL_H264_13 },
  163. { "2" , NV_ENC_LEVEL_H264_2 },
  164. { "2.0" , NV_ENC_LEVEL_H264_2 },
  165. { "2.1" , NV_ENC_LEVEL_H264_21 },
  166. { "2.2" , NV_ENC_LEVEL_H264_22 },
  167. { "3" , NV_ENC_LEVEL_H264_3 },
  168. { "3.0" , NV_ENC_LEVEL_H264_3 },
  169. { "3.1" , NV_ENC_LEVEL_H264_31 },
  170. { "3.2" , NV_ENC_LEVEL_H264_32 },
  171. { "4" , NV_ENC_LEVEL_H264_4 },
  172. { "4.0" , NV_ENC_LEVEL_H264_4 },
  173. { "4.1" , NV_ENC_LEVEL_H264_41 },
  174. { "4.2" , NV_ENC_LEVEL_H264_42 },
  175. { "5" , NV_ENC_LEVEL_H264_5 },
  176. { "5.0" , NV_ENC_LEVEL_H264_5 },
  177. { "5.1" , NV_ENC_LEVEL_H264_51 },
  178. { NULL }
  179. };
  180. static const NvencValuePair nvenc_hevc_level_pairs[] = {
  181. { "auto", NV_ENC_LEVEL_AUTOSELECT },
  182. { "1" , NV_ENC_LEVEL_HEVC_1 },
  183. { "1.0" , NV_ENC_LEVEL_HEVC_1 },
  184. { "2" , NV_ENC_LEVEL_HEVC_2 },
  185. { "2.0" , NV_ENC_LEVEL_HEVC_2 },
  186. { "2.1" , NV_ENC_LEVEL_HEVC_21 },
  187. { "3" , NV_ENC_LEVEL_HEVC_3 },
  188. { "3.0" , NV_ENC_LEVEL_HEVC_3 },
  189. { "3.1" , NV_ENC_LEVEL_HEVC_31 },
  190. { "4" , NV_ENC_LEVEL_HEVC_4 },
  191. { "4.0" , NV_ENC_LEVEL_HEVC_4 },
  192. { "4.1" , NV_ENC_LEVEL_HEVC_41 },
  193. { "5" , NV_ENC_LEVEL_HEVC_5 },
  194. { "5.0" , NV_ENC_LEVEL_HEVC_5 },
  195. { "5.1" , NV_ENC_LEVEL_HEVC_51 },
  196. { "5.2" , NV_ENC_LEVEL_HEVC_52 },
  197. { "6" , NV_ENC_LEVEL_HEVC_6 },
  198. { "6.0" , NV_ENC_LEVEL_HEVC_6 },
  199. { "6.1" , NV_ENC_LEVEL_HEVC_61 },
  200. { "6.2" , NV_ENC_LEVEL_HEVC_62 },
  201. { NULL }
  202. };
  203. static const struct {
  204. NVENCSTATUS nverr;
  205. int averr;
  206. const char *desc;
  207. } nvenc_errors[] = {
  208. { NV_ENC_SUCCESS, 0, "success" },
  209. { NV_ENC_ERR_NO_ENCODE_DEVICE, AVERROR(ENOENT), "no encode device" },
  210. { NV_ENC_ERR_UNSUPPORTED_DEVICE, AVERROR(ENOSYS), "unsupported device" },
  211. { NV_ENC_ERR_INVALID_ENCODERDEVICE, AVERROR(EINVAL), "invalid encoder device" },
  212. { NV_ENC_ERR_INVALID_DEVICE, AVERROR(EINVAL), "invalid device" },
  213. { NV_ENC_ERR_DEVICE_NOT_EXIST, AVERROR(EIO), "device does not exist" },
  214. { NV_ENC_ERR_INVALID_PTR, AVERROR(EFAULT), "invalid ptr" },
  215. { NV_ENC_ERR_INVALID_EVENT, AVERROR(EINVAL), "invalid event" },
  216. { NV_ENC_ERR_INVALID_PARAM, AVERROR(EINVAL), "invalid param" },
  217. { NV_ENC_ERR_INVALID_CALL, AVERROR(EINVAL), "invalid call" },
  218. { NV_ENC_ERR_OUT_OF_MEMORY, AVERROR(ENOMEM), "out of memory" },
  219. { NV_ENC_ERR_ENCODER_NOT_INITIALIZED, AVERROR(EINVAL), "encoder not initialized" },
  220. { NV_ENC_ERR_UNSUPPORTED_PARAM, AVERROR(ENOSYS), "unsupported param" },
  221. { NV_ENC_ERR_LOCK_BUSY, AVERROR(EAGAIN), "lock busy" },
  222. { NV_ENC_ERR_NOT_ENOUGH_BUFFER, AVERROR(ENOBUFS), "not enough buffer" },
  223. { NV_ENC_ERR_INVALID_VERSION, AVERROR(EINVAL), "invalid version" },
  224. { NV_ENC_ERR_MAP_FAILED, AVERROR(EIO), "map failed" },
  225. { NV_ENC_ERR_NEED_MORE_INPUT, AVERROR(EAGAIN), "need more input" },
  226. { NV_ENC_ERR_ENCODER_BUSY, AVERROR(EAGAIN), "encoder busy" },
  227. { NV_ENC_ERR_EVENT_NOT_REGISTERD, AVERROR(EBADF), "event not registered" },
  228. { NV_ENC_ERR_GENERIC, AVERROR_UNKNOWN, "generic error" },
  229. { NV_ENC_ERR_INCOMPATIBLE_CLIENT_KEY, AVERROR(EINVAL), "incompatible client key" },
  230. { NV_ENC_ERR_UNIMPLEMENTED, AVERROR(ENOSYS), "unimplemented" },
  231. { NV_ENC_ERR_RESOURCE_REGISTER_FAILED, AVERROR(EIO), "resource register failed" },
  232. { NV_ENC_ERR_RESOURCE_NOT_REGISTERED, AVERROR(EBADF), "resource not registered" },
  233. { NV_ENC_ERR_RESOURCE_NOT_MAPPED, AVERROR(EBADF), "resource not mapped" },
  234. };
  235. static int nvenc_map_error(NVENCSTATUS err, const char **desc)
  236. {
  237. int i;
  238. for (i = 0; i < FF_ARRAY_ELEMS(nvenc_errors); i++) {
  239. if (nvenc_errors[i].nverr == err) {
  240. if (desc)
  241. *desc = nvenc_errors[i].desc;
  242. return nvenc_errors[i].averr;
  243. }
  244. }
  245. if (desc)
  246. *desc = "unknown error";
  247. return AVERROR_UNKNOWN;
  248. }
  249. static int nvenc_print_error(void *log_ctx, NVENCSTATUS err,
  250. const char *error_string)
  251. {
  252. const char *desc;
  253. int ret;
  254. ret = nvenc_map_error(err, &desc);
  255. av_log(log_ctx, AV_LOG_ERROR, "%s: %s (%d)\n", error_string, desc, err);
  256. return ret;
  257. }
  258. static int input_string_to_uint32(AVCodecContext *avctx, const NvencValuePair *pair, const char *input, uint32_t *output)
  259. {
  260. for (; pair->str; ++pair) {
  261. if (!strcmp(input, pair->str)) {
  262. *output = pair->num;
  263. return 0;
  264. }
  265. }
  266. return AVERROR(EINVAL);
  267. }
  268. static void timestamp_queue_enqueue(AVFifoBuffer* queue, int64_t timestamp)
  269. {
  270. av_fifo_generic_write(queue, &timestamp, sizeof(timestamp), NULL);
  271. }
  272. static int64_t timestamp_queue_dequeue(AVFifoBuffer* queue)
  273. {
  274. int64_t timestamp = AV_NOPTS_VALUE;
  275. if (av_fifo_size(queue) > 0)
  276. av_fifo_generic_read(queue, &timestamp, sizeof(timestamp), NULL);
  277. return timestamp;
  278. }
  279. #define CHECK_LOAD_FUNC(t, f, s) \
  280. do { \
  281. (f) = (t)LOAD_FUNC(dl_fn->cuda_lib, s); \
  282. if (!(f)) { \
  283. av_log(avctx, AV_LOG_FATAL, "Failed loading %s from CUDA library\n", s); \
  284. goto error; \
  285. } \
  286. } while (0)
  287. static av_cold int nvenc_dyload_cuda(AVCodecContext *avctx)
  288. {
  289. NvencContext *ctx = avctx->priv_data;
  290. NvencDynLoadFunctions *dl_fn = &ctx->nvenc_dload_funcs;
  291. #if CONFIG_CUDA
  292. dl_fn->cu_init = cuInit;
  293. dl_fn->cu_device_get_count = cuDeviceGetCount;
  294. dl_fn->cu_device_get = cuDeviceGet;
  295. dl_fn->cu_device_get_name = cuDeviceGetName;
  296. dl_fn->cu_device_compute_capability = cuDeviceComputeCapability;
  297. dl_fn->cu_ctx_create = cuCtxCreate_v2;
  298. dl_fn->cu_ctx_pop_current = cuCtxPopCurrent_v2;
  299. dl_fn->cu_ctx_destroy = cuCtxDestroy_v2;
  300. return 1;
  301. #else
  302. if (dl_fn->cuda_lib)
  303. return 1;
  304. #if defined(_WIN32)
  305. dl_fn->cuda_lib = LoadLibrary(TEXT("nvcuda.dll"));
  306. #else
  307. dl_fn->cuda_lib = dlopen("libcuda.so", RTLD_LAZY);
  308. #endif
  309. if (!dl_fn->cuda_lib) {
  310. av_log(avctx, AV_LOG_FATAL, "Failed loading CUDA library\n");
  311. goto error;
  312. }
  313. CHECK_LOAD_FUNC(PCUINIT, dl_fn->cu_init, "cuInit");
  314. CHECK_LOAD_FUNC(PCUDEVICEGETCOUNT, dl_fn->cu_device_get_count, "cuDeviceGetCount");
  315. CHECK_LOAD_FUNC(PCUDEVICEGET, dl_fn->cu_device_get, "cuDeviceGet");
  316. CHECK_LOAD_FUNC(PCUDEVICEGETNAME, dl_fn->cu_device_get_name, "cuDeviceGetName");
  317. CHECK_LOAD_FUNC(PCUDEVICECOMPUTECAPABILITY, dl_fn->cu_device_compute_capability, "cuDeviceComputeCapability");
  318. CHECK_LOAD_FUNC(PCUCTXCREATE, dl_fn->cu_ctx_create, "cuCtxCreate_v2");
  319. CHECK_LOAD_FUNC(PCUCTXPOPCURRENT, dl_fn->cu_ctx_pop_current, "cuCtxPopCurrent_v2");
  320. CHECK_LOAD_FUNC(PCUCTXDESTROY, dl_fn->cu_ctx_destroy, "cuCtxDestroy_v2");
  321. return 1;
  322. error:
  323. if (dl_fn->cuda_lib)
  324. DL_CLOSE_FUNC(dl_fn->cuda_lib);
  325. dl_fn->cuda_lib = NULL;
  326. return 0;
  327. #endif
  328. }
  329. static av_cold int check_cuda_errors(AVCodecContext *avctx, CUresult err, const char *func)
  330. {
  331. if (err != CUDA_SUCCESS) {
  332. av_log(avctx, AV_LOG_FATAL, ">> %s - failed with error code 0x%x\n", func, err);
  333. return 0;
  334. }
  335. return 1;
  336. }
  337. #define check_cuda_errors(f) if (!check_cuda_errors(avctx, f, #f)) goto error
  338. static av_cold int nvenc_check_cuda(AVCodecContext *avctx)
  339. {
  340. int device_count = 0;
  341. CUdevice cu_device = 0;
  342. char gpu_name[128];
  343. int smminor = 0, smmajor = 0;
  344. int i, smver, target_smver;
  345. NvencContext *ctx = avctx->priv_data;
  346. NvencDynLoadFunctions *dl_fn = &ctx->nvenc_dload_funcs;
  347. switch (avctx->codec->id) {
  348. case AV_CODEC_ID_H264:
  349. target_smver = ctx->data_pix_fmt == AV_PIX_FMT_YUV444P ? 0x52 : 0x30;
  350. break;
  351. case AV_CODEC_ID_H265:
  352. target_smver = 0x52;
  353. break;
  354. default:
  355. av_log(avctx, AV_LOG_FATAL, "Unknown codec name\n");
  356. goto error;
  357. }
  358. if (!nvenc_dyload_cuda(avctx))
  359. return 0;
  360. if (dl_fn->nvenc_device_count > 0)
  361. return 1;
  362. check_cuda_errors(dl_fn->cu_init(0));
  363. check_cuda_errors(dl_fn->cu_device_get_count(&device_count));
  364. if (!device_count) {
  365. av_log(avctx, AV_LOG_FATAL, "No CUDA capable devices found\n");
  366. goto error;
  367. }
  368. av_log(avctx, AV_LOG_VERBOSE, "%d CUDA capable devices found\n", device_count);
  369. dl_fn->nvenc_device_count = 0;
  370. for (i = 0; i < device_count; ++i) {
  371. check_cuda_errors(dl_fn->cu_device_get(&cu_device, i));
  372. check_cuda_errors(dl_fn->cu_device_get_name(gpu_name, sizeof(gpu_name), cu_device));
  373. check_cuda_errors(dl_fn->cu_device_compute_capability(&smmajor, &smminor, cu_device));
  374. smver = (smmajor << 4) | smminor;
  375. av_log(avctx, AV_LOG_VERBOSE, "[ GPU #%d - < %s > has Compute SM %d.%d, NVENC %s ]\n", i, gpu_name, smmajor, smminor, (smver >= target_smver) ? "Available" : "Not Available");
  376. if (smver >= target_smver)
  377. dl_fn->nvenc_devices[dl_fn->nvenc_device_count++] = cu_device;
  378. }
  379. if (!dl_fn->nvenc_device_count) {
  380. av_log(avctx, AV_LOG_FATAL, "No NVENC capable devices found\n");
  381. goto error;
  382. }
  383. return 1;
  384. error:
  385. dl_fn->nvenc_device_count = 0;
  386. return 0;
  387. }
  388. static av_cold int nvenc_dyload_nvenc(AVCodecContext *avctx)
  389. {
  390. PNVENCODEAPICREATEINSTANCE nvEncodeAPICreateInstance = 0;
  391. NVENCSTATUS nvstatus;
  392. NvencContext *ctx = avctx->priv_data;
  393. NvencDynLoadFunctions *dl_fn = &ctx->nvenc_dload_funcs;
  394. if (!nvenc_check_cuda(avctx))
  395. return 0;
  396. if (dl_fn->nvenc_lib)
  397. return 1;
  398. #if defined(_WIN32)
  399. if (sizeof(void*) == 8) {
  400. dl_fn->nvenc_lib = LoadLibrary(TEXT("nvEncodeAPI64.dll"));
  401. } else {
  402. dl_fn->nvenc_lib = LoadLibrary(TEXT("nvEncodeAPI.dll"));
  403. }
  404. #else
  405. dl_fn->nvenc_lib = dlopen("libnvidia-encode.so.1", RTLD_LAZY);
  406. #endif
  407. if (!dl_fn->nvenc_lib) {
  408. av_log(avctx, AV_LOG_FATAL, "Failed loading the nvenc library\n");
  409. goto error;
  410. }
  411. nvEncodeAPICreateInstance = (PNVENCODEAPICREATEINSTANCE)LOAD_FUNC(dl_fn->nvenc_lib, "NvEncodeAPICreateInstance");
  412. if (!nvEncodeAPICreateInstance) {
  413. av_log(avctx, AV_LOG_FATAL, "Failed to load nvenc entrypoint\n");
  414. goto error;
  415. }
  416. dl_fn->nvenc_funcs.version = NV_ENCODE_API_FUNCTION_LIST_VER;
  417. nvstatus = nvEncodeAPICreateInstance(&dl_fn->nvenc_funcs);
  418. if (nvstatus != NV_ENC_SUCCESS) {
  419. nvenc_print_error(avctx, nvstatus, "Failed to create nvenc instance");
  420. goto error;
  421. }
  422. av_log(avctx, AV_LOG_VERBOSE, "Nvenc initialized successfully\n");
  423. return 1;
  424. error:
  425. if (dl_fn->nvenc_lib)
  426. DL_CLOSE_FUNC(dl_fn->nvenc_lib);
  427. dl_fn->nvenc_lib = NULL;
  428. return 0;
  429. }
  430. static av_cold void nvenc_unload_nvenc(AVCodecContext *avctx)
  431. {
  432. NvencContext *ctx = avctx->priv_data;
  433. NvencDynLoadFunctions *dl_fn = &ctx->nvenc_dload_funcs;
  434. DL_CLOSE_FUNC(dl_fn->nvenc_lib);
  435. dl_fn->nvenc_lib = NULL;
  436. dl_fn->nvenc_device_count = 0;
  437. #if !CONFIG_CUDA
  438. DL_CLOSE_FUNC(dl_fn->cuda_lib);
  439. dl_fn->cuda_lib = NULL;
  440. #endif
  441. dl_fn->cu_init = NULL;
  442. dl_fn->cu_device_get_count = NULL;
  443. dl_fn->cu_device_get = NULL;
  444. dl_fn->cu_device_get_name = NULL;
  445. dl_fn->cu_device_compute_capability = NULL;
  446. dl_fn->cu_ctx_create = NULL;
  447. dl_fn->cu_ctx_pop_current = NULL;
  448. dl_fn->cu_ctx_destroy = NULL;
  449. av_log(avctx, AV_LOG_VERBOSE, "Nvenc unloaded\n");
  450. }
  451. static av_cold int nvenc_setup_device(AVCodecContext *avctx)
  452. {
  453. NvencContext *ctx = avctx->priv_data;
  454. NvencDynLoadFunctions *dl_fn = &ctx->nvenc_dload_funcs;
  455. CUresult cu_res;
  456. CUcontext cu_context_curr;
  457. ctx->data_pix_fmt = avctx->pix_fmt;
  458. #if CONFIG_CUDA
  459. if (avctx->pix_fmt == AV_PIX_FMT_CUDA) {
  460. AVHWFramesContext *frames_ctx;
  461. AVCUDADeviceContext *device_hwctx;
  462. if (!avctx->hw_frames_ctx) {
  463. av_log(avctx, AV_LOG_ERROR, "hw_frames_ctx must be set when using GPU frames as input\n");
  464. return AVERROR(EINVAL);
  465. }
  466. frames_ctx = (AVHWFramesContext*)avctx->hw_frames_ctx->data;
  467. device_hwctx = frames_ctx->device_ctx->hwctx;
  468. ctx->cu_context = device_hwctx->cuda_ctx;
  469. ctx->data_pix_fmt = frames_ctx->sw_format;
  470. return 0;
  471. }
  472. #endif
  473. if (ctx->gpu >= dl_fn->nvenc_device_count) {
  474. av_log(avctx, AV_LOG_FATAL, "Requested GPU %d, but only %d GPUs are available!\n", ctx->gpu, dl_fn->nvenc_device_count);
  475. return AVERROR(EINVAL);
  476. }
  477. ctx->cu_context = NULL;
  478. cu_res = dl_fn->cu_ctx_create(&ctx->cu_context_internal, 4, dl_fn->nvenc_devices[ctx->gpu]); // CU_CTX_SCHED_BLOCKING_SYNC=4, avoid CPU spins
  479. if (cu_res != CUDA_SUCCESS) {
  480. av_log(avctx, AV_LOG_FATAL, "Failed creating CUDA context for NVENC: 0x%x\n", (int)cu_res);
  481. return AVERROR_EXTERNAL;
  482. }
  483. cu_res = dl_fn->cu_ctx_pop_current(&cu_context_curr);
  484. if (cu_res != CUDA_SUCCESS) {
  485. av_log(avctx, AV_LOG_FATAL, "Failed popping CUDA context: 0x%x\n", (int)cu_res);
  486. return AVERROR_EXTERNAL;
  487. }
  488. ctx->cu_context = ctx->cu_context_internal;
  489. return 0;
  490. }
  491. static av_cold int nvenc_open_session(AVCodecContext *avctx)
  492. {
  493. NvencContext *ctx = avctx->priv_data;
  494. NvencDynLoadFunctions *dl_fn = &ctx->nvenc_dload_funcs;
  495. NV_ENCODE_API_FUNCTION_LIST *p_nvenc = &dl_fn->nvenc_funcs;
  496. NV_ENC_OPEN_ENCODE_SESSION_EX_PARAMS encode_session_params = { 0 };
  497. NVENCSTATUS nv_status;
  498. encode_session_params.version = NV_ENC_OPEN_ENCODE_SESSION_EX_PARAMS_VER;
  499. encode_session_params.apiVersion = NVENCAPI_VERSION;
  500. encode_session_params.device = ctx->cu_context;
  501. encode_session_params.deviceType = NV_ENC_DEVICE_TYPE_CUDA;
  502. nv_status = p_nvenc->nvEncOpenEncodeSessionEx(&encode_session_params, &ctx->nvencoder);
  503. if (nv_status != NV_ENC_SUCCESS) {
  504. ctx->nvencoder = NULL;
  505. return nvenc_print_error(avctx, nv_status, "OpenEncodeSessionEx failed");
  506. }
  507. return 0;
  508. }
  509. static av_cold void set_constqp(AVCodecContext *avctx)
  510. {
  511. NvencContext *ctx = avctx->priv_data;
  512. ctx->encode_config.rcParams.rateControlMode = NV_ENC_PARAMS_RC_CONSTQP;
  513. ctx->encode_config.rcParams.constQP.qpInterB = avctx->global_quality;
  514. ctx->encode_config.rcParams.constQP.qpInterP = avctx->global_quality;
  515. ctx->encode_config.rcParams.constQP.qpIntra = avctx->global_quality;
  516. }
  517. static av_cold void set_vbr(AVCodecContext *avctx)
  518. {
  519. NvencContext *ctx = avctx->priv_data;
  520. ctx->encode_config.rcParams.enableMinQP = 1;
  521. ctx->encode_config.rcParams.enableMaxQP = 1;
  522. ctx->encode_config.rcParams.minQP.qpInterB = avctx->qmin;
  523. ctx->encode_config.rcParams.minQP.qpInterP = avctx->qmin;
  524. ctx->encode_config.rcParams.minQP.qpIntra = avctx->qmin;
  525. ctx->encode_config.rcParams.maxQP.qpInterB = avctx->qmax;
  526. ctx->encode_config.rcParams.maxQP.qpInterP = avctx->qmax;
  527. ctx->encode_config.rcParams.maxQP.qpIntra = avctx->qmax;
  528. }
  529. static av_cold void set_lossless(AVCodecContext *avctx)
  530. {
  531. NvencContext *ctx = avctx->priv_data;
  532. ctx->encode_config.rcParams.rateControlMode = NV_ENC_PARAMS_RC_CONSTQP;
  533. ctx->encode_config.rcParams.constQP.qpInterB = 0;
  534. ctx->encode_config.rcParams.constQP.qpInterP = 0;
  535. ctx->encode_config.rcParams.constQP.qpIntra = 0;
  536. }
  537. static av_cold void nvenc_setup_rate_control(AVCodecContext *avctx, int lossless)
  538. {
  539. NvencContext *ctx = avctx->priv_data;
  540. int qp_inter_p;
  541. if (avctx->bit_rate > 0) {
  542. ctx->encode_config.rcParams.averageBitRate = avctx->bit_rate;
  543. } else if (ctx->encode_config.rcParams.averageBitRate > 0) {
  544. ctx->encode_config.rcParams.maxBitRate = ctx->encode_config.rcParams.averageBitRate;
  545. }
  546. if (avctx->rc_max_rate > 0)
  547. ctx->encode_config.rcParams.maxBitRate = avctx->rc_max_rate;
  548. if (lossless) {
  549. if (avctx->codec->id == AV_CODEC_ID_H264)
  550. ctx->encode_config.encodeCodecConfig.h264Config.qpPrimeYZeroTransformBypassFlag = 1;
  551. set_lossless(avctx);
  552. avctx->qmin = -1;
  553. avctx->qmax = -1;
  554. } else if (ctx->cbr) {
  555. if (!ctx->twopass) {
  556. ctx->encode_config.rcParams.rateControlMode = NV_ENC_PARAMS_RC_CBR;
  557. } else {
  558. ctx->encode_config.rcParams.rateControlMode = NV_ENC_PARAMS_RC_2_PASS_QUALITY;
  559. if (avctx->codec->id == AV_CODEC_ID_H264) {
  560. ctx->encode_config.encodeCodecConfig.h264Config.adaptiveTransformMode = NV_ENC_H264_ADAPTIVE_TRANSFORM_ENABLE;
  561. ctx->encode_config.encodeCodecConfig.h264Config.fmoMode = NV_ENC_H264_FMO_DISABLE;
  562. }
  563. }
  564. if (avctx->codec->id == AV_CODEC_ID_H264) {
  565. ctx->encode_config.encodeCodecConfig.h264Config.outputBufferingPeriodSEI = 1;
  566. ctx->encode_config.encodeCodecConfig.h264Config.outputPictureTimingSEI = 1;
  567. } else if (avctx->codec->id == AV_CODEC_ID_H265) {
  568. ctx->encode_config.encodeCodecConfig.hevcConfig.outputBufferingPeriodSEI = 1;
  569. ctx->encode_config.encodeCodecConfig.hevcConfig.outputPictureTimingSEI = 1;
  570. }
  571. } else if (avctx->global_quality > 0) {
  572. set_constqp(avctx);
  573. avctx->qmin = -1;
  574. avctx->qmax = -1;
  575. } else {
  576. if (avctx->qmin >= 0 && avctx->qmax >= 0) {
  577. set_vbr(avctx);
  578. qp_inter_p = (avctx->qmax + 3 * avctx->qmin) / 4; // biased towards Qmin
  579. if (ctx->twopass) {
  580. ctx->encode_config.rcParams.rateControlMode = NV_ENC_PARAMS_RC_2_PASS_VBR;
  581. if (avctx->codec->id == AV_CODEC_ID_H264) {
  582. ctx->encode_config.encodeCodecConfig.h264Config.adaptiveTransformMode = NV_ENC_H264_ADAPTIVE_TRANSFORM_ENABLE;
  583. ctx->encode_config.encodeCodecConfig.h264Config.fmoMode = NV_ENC_H264_FMO_DISABLE;
  584. }
  585. } else {
  586. ctx->encode_config.rcParams.rateControlMode = NV_ENC_PARAMS_RC_VBR_MINQP;
  587. }
  588. } else {
  589. qp_inter_p = 26; // default to 26
  590. if (ctx->twopass) {
  591. ctx->encode_config.rcParams.rateControlMode = NV_ENC_PARAMS_RC_2_PASS_VBR;
  592. } else {
  593. ctx->encode_config.rcParams.rateControlMode = NV_ENC_PARAMS_RC_VBR;
  594. }
  595. }
  596. ctx->encode_config.rcParams.enableInitialRCQP = 1;
  597. ctx->encode_config.rcParams.initialRCQP.qpInterP = qp_inter_p;
  598. if (avctx->i_quant_factor != 0.0 && avctx->b_quant_factor != 0.0) {
  599. ctx->encode_config.rcParams.initialRCQP.qpIntra = av_clip(
  600. qp_inter_p * fabs(avctx->i_quant_factor) + avctx->i_quant_offset, 0, 51);
  601. ctx->encode_config.rcParams.initialRCQP.qpInterB = av_clip(
  602. qp_inter_p * fabs(avctx->b_quant_factor) + avctx->b_quant_offset, 0, 51);
  603. } else {
  604. ctx->encode_config.rcParams.initialRCQP.qpIntra = qp_inter_p;
  605. ctx->encode_config.rcParams.initialRCQP.qpInterB = qp_inter_p;
  606. }
  607. }
  608. if (avctx->rc_buffer_size > 0) {
  609. ctx->encode_config.rcParams.vbvBufferSize = avctx->rc_buffer_size;
  610. } else if (ctx->encode_config.rcParams.averageBitRate > 0) {
  611. ctx->encode_config.rcParams.vbvBufferSize = 2 * ctx->encode_config.rcParams.averageBitRate;
  612. }
  613. }
  614. static av_cold int nvenc_setup_h264_config(AVCodecContext *avctx, int lossless)
  615. {
  616. NvencContext *ctx = avctx->priv_data;
  617. int res;
  618. ctx->encode_config.encodeCodecConfig.h264Config.h264VUIParameters.colourMatrix = avctx->colorspace;
  619. ctx->encode_config.encodeCodecConfig.h264Config.h264VUIParameters.colourPrimaries = avctx->color_primaries;
  620. ctx->encode_config.encodeCodecConfig.h264Config.h264VUIParameters.transferCharacteristics = avctx->color_trc;
  621. ctx->encode_config.encodeCodecConfig.h264Config.h264VUIParameters.videoFullRangeFlag = (avctx->color_range == AVCOL_RANGE_JPEG
  622. || ctx->data_pix_fmt == AV_PIX_FMT_YUVJ420P || ctx->data_pix_fmt == AV_PIX_FMT_YUVJ422P || ctx->data_pix_fmt == AV_PIX_FMT_YUVJ444P);
  623. ctx->encode_config.encodeCodecConfig.h264Config.h264VUIParameters.colourDescriptionPresentFlag =
  624. (avctx->colorspace != 2 || avctx->color_primaries != 2 || avctx->color_trc != 2);
  625. ctx->encode_config.encodeCodecConfig.h264Config.h264VUIParameters.videoSignalTypePresentFlag =
  626. (ctx->encode_config.encodeCodecConfig.h264Config.h264VUIParameters.colourDescriptionPresentFlag
  627. || ctx->encode_config.encodeCodecConfig.h264Config.h264VUIParameters.videoFormat != 5
  628. || ctx->encode_config.encodeCodecConfig.h264Config.h264VUIParameters.videoFullRangeFlag != 0);
  629. ctx->encode_config.encodeCodecConfig.h264Config.sliceMode = 3;
  630. ctx->encode_config.encodeCodecConfig.h264Config.sliceModeData = 1;
  631. ctx->encode_config.encodeCodecConfig.h264Config.disableSPSPPS = (avctx->flags & AV_CODEC_FLAG_GLOBAL_HEADER) ? 1 : 0;
  632. ctx->encode_config.encodeCodecConfig.h264Config.repeatSPSPPS = (avctx->flags & AV_CODEC_FLAG_GLOBAL_HEADER) ? 0 : 1;
  633. ctx->encode_config.encodeCodecConfig.h264Config.outputAUD = 1;
  634. if (!ctx->profile && !lossless) {
  635. switch (avctx->profile) {
  636. case FF_PROFILE_H264_HIGH_444_PREDICTIVE:
  637. ctx->encode_config.profileGUID = NV_ENC_H264_PROFILE_HIGH_444_GUID;
  638. break;
  639. case FF_PROFILE_H264_BASELINE:
  640. ctx->encode_config.profileGUID = NV_ENC_H264_PROFILE_BASELINE_GUID;
  641. break;
  642. case FF_PROFILE_H264_MAIN:
  643. ctx->encode_config.profileGUID = NV_ENC_H264_PROFILE_MAIN_GUID;
  644. break;
  645. case FF_PROFILE_H264_HIGH:
  646. case FF_PROFILE_UNKNOWN:
  647. ctx->encode_config.profileGUID = NV_ENC_H264_PROFILE_HIGH_GUID;
  648. break;
  649. default:
  650. av_log(avctx, AV_LOG_WARNING, "Unsupported profile requested, falling back to high\n");
  651. ctx->encode_config.profileGUID = NV_ENC_H264_PROFILE_HIGH_GUID;
  652. break;
  653. }
  654. } else if (!lossless) {
  655. if (!strcmp(ctx->profile, "high")) {
  656. ctx->encode_config.profileGUID = NV_ENC_H264_PROFILE_HIGH_GUID;
  657. avctx->profile = FF_PROFILE_H264_HIGH;
  658. } else if (!strcmp(ctx->profile, "main")) {
  659. ctx->encode_config.profileGUID = NV_ENC_H264_PROFILE_MAIN_GUID;
  660. avctx->profile = FF_PROFILE_H264_MAIN;
  661. } else if (!strcmp(ctx->profile, "baseline")) {
  662. ctx->encode_config.profileGUID = NV_ENC_H264_PROFILE_BASELINE_GUID;
  663. avctx->profile = FF_PROFILE_H264_BASELINE;
  664. } else if (!strcmp(ctx->profile, "high444p")) {
  665. ctx->encode_config.profileGUID = NV_ENC_H264_PROFILE_HIGH_444_GUID;
  666. avctx->profile = FF_PROFILE_H264_HIGH_444_PREDICTIVE;
  667. } else {
  668. av_log(avctx, AV_LOG_FATAL, "Profile \"%s\" is unknown! Supported profiles: high, main, baseline\n", ctx->profile);
  669. return AVERROR(EINVAL);
  670. }
  671. }
  672. // force setting profile as high444p if input is AV_PIX_FMT_YUV444P
  673. if (ctx->data_pix_fmt == AV_PIX_FMT_YUV444P) {
  674. ctx->encode_config.profileGUID = NV_ENC_H264_PROFILE_HIGH_444_GUID;
  675. avctx->profile = FF_PROFILE_H264_HIGH_444_PREDICTIVE;
  676. }
  677. ctx->encode_config.encodeCodecConfig.h264Config.chromaFormatIDC = avctx->profile == FF_PROFILE_H264_HIGH_444_PREDICTIVE ? 3 : 1;
  678. if (ctx->level) {
  679. res = input_string_to_uint32(avctx, nvenc_h264_level_pairs, ctx->level, &ctx->encode_config.encodeCodecConfig.h264Config.level);
  680. if (res) {
  681. av_log(avctx, AV_LOG_FATAL, "Level \"%s\" is unknown! Supported levels: auto, 1, 1b, 1.1, 1.2, 1.3, 2, 2.1, 2.2, 3, 3.1, 3.2, 4, 4.1, 4.2, 5, 5.1\n", ctx->level);
  682. return res;
  683. }
  684. } else {
  685. ctx->encode_config.encodeCodecConfig.h264Config.level = NV_ENC_LEVEL_AUTOSELECT;
  686. }
  687. return 0;
  688. }
  689. static av_cold int nvenc_setup_hevc_config(AVCodecContext *avctx)
  690. {
  691. NvencContext *ctx = avctx->priv_data;
  692. int res;
  693. ctx->encode_config.encodeCodecConfig.hevcConfig.hevcVUIParameters.colourMatrix = avctx->colorspace;
  694. ctx->encode_config.encodeCodecConfig.hevcConfig.hevcVUIParameters.colourPrimaries = avctx->color_primaries;
  695. ctx->encode_config.encodeCodecConfig.hevcConfig.hevcVUIParameters.transferCharacteristics = avctx->color_trc;
  696. ctx->encode_config.encodeCodecConfig.hevcConfig.hevcVUIParameters.videoFullRangeFlag = (avctx->color_range == AVCOL_RANGE_JPEG
  697. || ctx->data_pix_fmt == AV_PIX_FMT_YUVJ420P || ctx->data_pix_fmt == AV_PIX_FMT_YUVJ422P || ctx->data_pix_fmt == AV_PIX_FMT_YUVJ444P);
  698. ctx->encode_config.encodeCodecConfig.hevcConfig.hevcVUIParameters.colourDescriptionPresentFlag =
  699. (avctx->colorspace != 2 || avctx->color_primaries != 2 || avctx->color_trc != 2);
  700. ctx->encode_config.encodeCodecConfig.hevcConfig.hevcVUIParameters.videoSignalTypePresentFlag =
  701. (ctx->encode_config.encodeCodecConfig.hevcConfig.hevcVUIParameters.colourDescriptionPresentFlag
  702. || ctx->encode_config.encodeCodecConfig.hevcConfig.hevcVUIParameters.videoFormat != 5
  703. || ctx->encode_config.encodeCodecConfig.hevcConfig.hevcVUIParameters.videoFullRangeFlag != 0);
  704. ctx->encode_config.encodeCodecConfig.hevcConfig.sliceMode = 3;
  705. ctx->encode_config.encodeCodecConfig.hevcConfig.sliceModeData = 1;
  706. ctx->encode_config.encodeCodecConfig.hevcConfig.disableSPSPPS = (avctx->flags & AV_CODEC_FLAG_GLOBAL_HEADER) ? 1 : 0;
  707. ctx->encode_config.encodeCodecConfig.hevcConfig.repeatSPSPPS = (avctx->flags & AV_CODEC_FLAG_GLOBAL_HEADER) ? 0 : 1;
  708. ctx->encode_config.encodeCodecConfig.hevcConfig.outputAUD = 1;
  709. /* No other profile is supported in the current SDK version 5 */
  710. ctx->encode_config.profileGUID = NV_ENC_HEVC_PROFILE_MAIN_GUID;
  711. avctx->profile = FF_PROFILE_HEVC_MAIN;
  712. if (ctx->level) {
  713. res = input_string_to_uint32(avctx, nvenc_hevc_level_pairs, ctx->level, &ctx->encode_config.encodeCodecConfig.hevcConfig.level);
  714. if (res) {
  715. av_log(avctx, AV_LOG_FATAL, "Level \"%s\" is unknown! Supported levels: auto, 1, 2, 2.1, 3, 3.1, 4, 4.1, 5, 5.1, 5.2, 6, 6.1, 6.2\n", ctx->level);
  716. return res;
  717. }
  718. } else {
  719. ctx->encode_config.encodeCodecConfig.hevcConfig.level = NV_ENC_LEVEL_AUTOSELECT;
  720. }
  721. if (ctx->tier) {
  722. if (!strcmp(ctx->tier, "main")) {
  723. ctx->encode_config.encodeCodecConfig.hevcConfig.tier = NV_ENC_TIER_HEVC_MAIN;
  724. } else if (!strcmp(ctx->tier, "high")) {
  725. ctx->encode_config.encodeCodecConfig.hevcConfig.tier = NV_ENC_TIER_HEVC_HIGH;
  726. } else {
  727. av_log(avctx, AV_LOG_FATAL, "Tier \"%s\" is unknown! Supported tiers: main, high\n", ctx->tier);
  728. return AVERROR(EINVAL);
  729. }
  730. }
  731. return 0;
  732. }
  733. static av_cold int nvenc_setup_codec_config(AVCodecContext *avctx, int lossless)
  734. {
  735. switch (avctx->codec->id) {
  736. case AV_CODEC_ID_H264:
  737. return nvenc_setup_h264_config(avctx, lossless);
  738. case AV_CODEC_ID_H265:
  739. return nvenc_setup_hevc_config(avctx);
  740. /* Earlier switch/case will return if unknown codec is passed. */
  741. }
  742. return 0;
  743. }
  744. static av_cold int nvenc_setup_encoder(AVCodecContext *avctx)
  745. {
  746. NvencContext *ctx = avctx->priv_data;
  747. NvencDynLoadFunctions *dl_fn = &ctx->nvenc_dload_funcs;
  748. NV_ENCODE_API_FUNCTION_LIST *p_nvenc = &dl_fn->nvenc_funcs;
  749. NV_ENC_PRESET_CONFIG preset_config = { 0 };
  750. GUID encoder_preset = NV_ENC_PRESET_HQ_GUID;
  751. GUID codec;
  752. NVENCSTATUS nv_status = NV_ENC_SUCCESS;
  753. AVCPBProperties *cpb_props;
  754. int num_mbs;
  755. int isLL = 0;
  756. int lossless = 0;
  757. int res = 0;
  758. int dw, dh;
  759. ctx->last_dts = AV_NOPTS_VALUE;
  760. ctx->encode_config.version = NV_ENC_CONFIG_VER;
  761. ctx->init_encode_params.version = NV_ENC_INITIALIZE_PARAMS_VER;
  762. preset_config.version = NV_ENC_PRESET_CONFIG_VER;
  763. preset_config.presetCfg.version = NV_ENC_CONFIG_VER;
  764. if (ctx->preset) {
  765. if (!strcmp(ctx->preset, "slow")) {
  766. encoder_preset = NV_ENC_PRESET_HQ_GUID;
  767. ctx->twopass = 1;
  768. } else if (!strcmp(ctx->preset, "medium")) {
  769. encoder_preset = NV_ENC_PRESET_HQ_GUID;
  770. ctx->twopass = 0;
  771. } else if (!strcmp(ctx->preset, "fast")) {
  772. encoder_preset = NV_ENC_PRESET_HP_GUID;
  773. ctx->twopass = 0;
  774. } else if (!strcmp(ctx->preset, "hq")) {
  775. encoder_preset = NV_ENC_PRESET_HQ_GUID;
  776. } else if (!strcmp(ctx->preset, "hp")) {
  777. encoder_preset = NV_ENC_PRESET_HP_GUID;
  778. } else if (!strcmp(ctx->preset, "bd")) {
  779. encoder_preset = NV_ENC_PRESET_BD_GUID;
  780. } else if (!strcmp(ctx->preset, "ll")) {
  781. encoder_preset = NV_ENC_PRESET_LOW_LATENCY_DEFAULT_GUID;
  782. isLL = 1;
  783. } else if (!strcmp(ctx->preset, "llhp")) {
  784. encoder_preset = NV_ENC_PRESET_LOW_LATENCY_HP_GUID;
  785. isLL = 1;
  786. } else if (!strcmp(ctx->preset, "llhq")) {
  787. encoder_preset = NV_ENC_PRESET_LOW_LATENCY_HQ_GUID;
  788. isLL = 1;
  789. } else if (!strcmp(ctx->preset, "lossless")) {
  790. encoder_preset = NV_ENC_PRESET_LOSSLESS_DEFAULT_GUID;
  791. lossless = 1;
  792. } else if (!strcmp(ctx->preset, "losslesshp")) {
  793. encoder_preset = NV_ENC_PRESET_LOSSLESS_HP_GUID;
  794. lossless = 1;
  795. } else if (!strcmp(ctx->preset, "default")) {
  796. encoder_preset = NV_ENC_PRESET_DEFAULT_GUID;
  797. } else {
  798. av_log(avctx, AV_LOG_FATAL, "Preset \"%s\" is unknown! Supported presets: slow, medium, fast, hp, hq, bd, ll, llhp, llhq, lossless, losslesshp, default\n", ctx->preset);
  799. return AVERROR(EINVAL);
  800. }
  801. }
  802. if (ctx->twopass < 0) {
  803. ctx->twopass = isLL;
  804. }
  805. switch (avctx->codec->id) {
  806. case AV_CODEC_ID_H264:
  807. codec = NV_ENC_CODEC_H264_GUID;
  808. break;
  809. case AV_CODEC_ID_H265:
  810. codec = NV_ENC_CODEC_HEVC_GUID;
  811. break;
  812. default:
  813. av_log(avctx, AV_LOG_ERROR, "Unknown codec name\n");
  814. return AVERROR(EINVAL);
  815. }
  816. nv_status = p_nvenc->nvEncGetEncodePresetConfig(ctx->nvencoder, codec, encoder_preset, &preset_config);
  817. if (nv_status != NV_ENC_SUCCESS) {
  818. return nvenc_print_error(avctx, nv_status, "GetEncodePresetConfig failed");
  819. }
  820. ctx->init_encode_params.encodeGUID = codec;
  821. ctx->init_encode_params.encodeHeight = avctx->height;
  822. ctx->init_encode_params.encodeWidth = avctx->width;
  823. if (avctx->sample_aspect_ratio.num && avctx->sample_aspect_ratio.den &&
  824. (avctx->sample_aspect_ratio.num != 1 || avctx->sample_aspect_ratio.num != 1)) {
  825. av_reduce(&dw, &dh,
  826. avctx->width * avctx->sample_aspect_ratio.num,
  827. avctx->height * avctx->sample_aspect_ratio.den,
  828. 1024 * 1024);
  829. ctx->init_encode_params.darHeight = dh;
  830. ctx->init_encode_params.darWidth = dw;
  831. } else {
  832. ctx->init_encode_params.darHeight = avctx->height;
  833. ctx->init_encode_params.darWidth = avctx->width;
  834. }
  835. // De-compensate for hardware, dubiously, trying to compensate for
  836. // playback at 704 pixel width.
  837. if (avctx->width == 720 &&
  838. (avctx->height == 480 || avctx->height == 576)) {
  839. av_reduce(&dw, &dh,
  840. ctx->init_encode_params.darWidth * 44,
  841. ctx->init_encode_params.darHeight * 45,
  842. 1024 * 1024);
  843. ctx->init_encode_params.darHeight = dh;
  844. ctx->init_encode_params.darWidth = dw;
  845. }
  846. ctx->init_encode_params.frameRateNum = avctx->time_base.den;
  847. ctx->init_encode_params.frameRateDen = avctx->time_base.num * avctx->ticks_per_frame;
  848. num_mbs = ((avctx->width + 15) >> 4) * ((avctx->height + 15) >> 4);
  849. ctx->max_surface_count = (num_mbs >= 8160) ? 32 : 48;
  850. if (ctx->buffer_delay >= ctx->max_surface_count)
  851. ctx->buffer_delay = ctx->max_surface_count - 1;
  852. ctx->init_encode_params.enableEncodeAsync = 0;
  853. ctx->init_encode_params.enablePTD = 1;
  854. ctx->init_encode_params.presetGUID = encoder_preset;
  855. ctx->init_encode_params.encodeConfig = &ctx->encode_config;
  856. memcpy(&ctx->encode_config, &preset_config.presetCfg, sizeof(ctx->encode_config));
  857. ctx->encode_config.version = NV_ENC_CONFIG_VER;
  858. if (avctx->refs >= 0) {
  859. /* 0 means "let the hardware decide" */
  860. switch (avctx->codec->id) {
  861. case AV_CODEC_ID_H264:
  862. ctx->encode_config.encodeCodecConfig.h264Config.maxNumRefFrames = avctx->refs;
  863. break;
  864. case AV_CODEC_ID_H265:
  865. ctx->encode_config.encodeCodecConfig.hevcConfig.maxNumRefFramesInDPB = avctx->refs;
  866. break;
  867. /* Earlier switch/case will return if unknown codec is passed. */
  868. }
  869. }
  870. if (avctx->gop_size > 0) {
  871. if (avctx->max_b_frames >= 0) {
  872. /* 0 is intra-only, 1 is I/P only, 2 is one B Frame, 3 two B frames, and so on. */
  873. ctx->encode_config.frameIntervalP = avctx->max_b_frames + 1;
  874. }
  875. ctx->encode_config.gopLength = avctx->gop_size;
  876. switch (avctx->codec->id) {
  877. case AV_CODEC_ID_H264:
  878. ctx->encode_config.encodeCodecConfig.h264Config.idrPeriod = avctx->gop_size;
  879. break;
  880. case AV_CODEC_ID_H265:
  881. ctx->encode_config.encodeCodecConfig.hevcConfig.idrPeriod = avctx->gop_size;
  882. break;
  883. /* Earlier switch/case will return if unknown codec is passed. */
  884. }
  885. } else if (avctx->gop_size == 0) {
  886. ctx->encode_config.frameIntervalP = 0;
  887. ctx->encode_config.gopLength = 1;
  888. switch (avctx->codec->id) {
  889. case AV_CODEC_ID_H264:
  890. ctx->encode_config.encodeCodecConfig.h264Config.idrPeriod = 1;
  891. break;
  892. case AV_CODEC_ID_H265:
  893. ctx->encode_config.encodeCodecConfig.hevcConfig.idrPeriod = 1;
  894. break;
  895. /* Earlier switch/case will return if unknown codec is passed. */
  896. }
  897. }
  898. /* when there're b frames, set dts offset */
  899. if (ctx->encode_config.frameIntervalP >= 2)
  900. ctx->last_dts = -2;
  901. nvenc_setup_rate_control(avctx, lossless);
  902. if (avctx->flags & AV_CODEC_FLAG_INTERLACED_DCT) {
  903. ctx->encode_config.frameFieldMode = NV_ENC_PARAMS_FRAME_FIELD_MODE_FIELD;
  904. } else {
  905. ctx->encode_config.frameFieldMode = NV_ENC_PARAMS_FRAME_FIELD_MODE_FRAME;
  906. }
  907. res = nvenc_setup_codec_config(avctx, lossless);
  908. if (res)
  909. return res;
  910. nv_status = p_nvenc->nvEncInitializeEncoder(ctx->nvencoder, &ctx->init_encode_params);
  911. if (nv_status != NV_ENC_SUCCESS) {
  912. return nvenc_print_error(avctx, nv_status, "InitializeEncoder failed");
  913. }
  914. if (ctx->encode_config.frameIntervalP > 1)
  915. avctx->has_b_frames = 2;
  916. if (ctx->encode_config.rcParams.averageBitRate > 0)
  917. avctx->bit_rate = ctx->encode_config.rcParams.averageBitRate;
  918. cpb_props = ff_add_cpb_side_data(avctx);
  919. if (!cpb_props)
  920. return AVERROR(ENOMEM);
  921. cpb_props->max_bitrate = ctx->encode_config.rcParams.maxBitRate;
  922. cpb_props->avg_bitrate = avctx->bit_rate;
  923. cpb_props->buffer_size = ctx->encode_config.rcParams.vbvBufferSize;
  924. return 0;
  925. }
  926. static av_cold int nvenc_alloc_surface(AVCodecContext *avctx, int idx)
  927. {
  928. NvencContext *ctx = avctx->priv_data;
  929. NvencDynLoadFunctions *dl_fn = &ctx->nvenc_dload_funcs;
  930. NV_ENCODE_API_FUNCTION_LIST *p_nvenc = &dl_fn->nvenc_funcs;
  931. NVENCSTATUS nv_status;
  932. NV_ENC_CREATE_BITSTREAM_BUFFER allocOut = { 0 };
  933. allocOut.version = NV_ENC_CREATE_BITSTREAM_BUFFER_VER;
  934. switch (ctx->data_pix_fmt) {
  935. case AV_PIX_FMT_YUV420P:
  936. ctx->surfaces[idx].format = NV_ENC_BUFFER_FORMAT_YV12_PL;
  937. break;
  938. case AV_PIX_FMT_NV12:
  939. ctx->surfaces[idx].format = NV_ENC_BUFFER_FORMAT_NV12_PL;
  940. break;
  941. case AV_PIX_FMT_YUV444P:
  942. ctx->surfaces[idx].format = NV_ENC_BUFFER_FORMAT_YUV444_PL;
  943. break;
  944. default:
  945. av_log(avctx, AV_LOG_FATAL, "Invalid input pixel format\n");
  946. return AVERROR(EINVAL);
  947. }
  948. if (avctx->pix_fmt == AV_PIX_FMT_CUDA) {
  949. ctx->surfaces[idx].in_ref = av_frame_alloc();
  950. if (!ctx->surfaces[idx].in_ref)
  951. return AVERROR(ENOMEM);
  952. } else {
  953. NV_ENC_CREATE_INPUT_BUFFER allocSurf = { 0 };
  954. allocSurf.version = NV_ENC_CREATE_INPUT_BUFFER_VER;
  955. allocSurf.width = (avctx->width + 31) & ~31;
  956. allocSurf.height = (avctx->height + 31) & ~31;
  957. allocSurf.memoryHeap = NV_ENC_MEMORY_HEAP_SYSMEM_CACHED;
  958. allocSurf.bufferFmt = ctx->surfaces[idx].format;
  959. nv_status = p_nvenc->nvEncCreateInputBuffer(ctx->nvencoder, &allocSurf);
  960. if (nv_status != NV_ENC_SUCCESS) {
  961. return nvenc_print_error(avctx, nv_status, "CreateInputBuffer failed");
  962. }
  963. ctx->surfaces[idx].input_surface = allocSurf.inputBuffer;
  964. ctx->surfaces[idx].width = allocSurf.width;
  965. ctx->surfaces[idx].height = allocSurf.height;
  966. }
  967. ctx->surfaces[idx].lockCount = 0;
  968. /* 1MB is large enough to hold most output frames. NVENC increases this automaticaly if it's not enough. */
  969. allocOut.size = 1024 * 1024;
  970. allocOut.memoryHeap = NV_ENC_MEMORY_HEAP_SYSMEM_CACHED;
  971. nv_status = p_nvenc->nvEncCreateBitstreamBuffer(ctx->nvencoder, &allocOut);
  972. if (nv_status != NV_ENC_SUCCESS) {
  973. int err = nvenc_print_error(avctx, nv_status, "CreateBitstreamBuffer failed");
  974. if (avctx->pix_fmt != AV_PIX_FMT_CUDA)
  975. p_nvenc->nvEncDestroyInputBuffer(ctx->nvencoder, ctx->surfaces[idx].input_surface);
  976. av_frame_free(&ctx->surfaces[idx].in_ref);
  977. return err;
  978. }
  979. ctx->surfaces[idx].output_surface = allocOut.bitstreamBuffer;
  980. ctx->surfaces[idx].size = allocOut.size;
  981. return 0;
  982. }
  983. static av_cold int nvenc_setup_surfaces(AVCodecContext *avctx, int* surfaceCount)
  984. {
  985. int res;
  986. NvencContext *ctx = avctx->priv_data;
  987. ctx->surfaces = av_malloc(ctx->max_surface_count * sizeof(*ctx->surfaces));
  988. if (!ctx->surfaces) {
  989. return AVERROR(ENOMEM);
  990. }
  991. ctx->timestamp_list = av_fifo_alloc(ctx->max_surface_count * sizeof(int64_t));
  992. if (!ctx->timestamp_list)
  993. return AVERROR(ENOMEM);
  994. ctx->output_surface_queue = av_fifo_alloc(ctx->max_surface_count * sizeof(NvencSurface*));
  995. if (!ctx->output_surface_queue)
  996. return AVERROR(ENOMEM);
  997. ctx->output_surface_ready_queue = av_fifo_alloc(ctx->max_surface_count * sizeof(NvencSurface*));
  998. if (!ctx->output_surface_ready_queue)
  999. return AVERROR(ENOMEM);
  1000. for (*surfaceCount = 0; *surfaceCount < ctx->max_surface_count; ++*surfaceCount) {
  1001. res = nvenc_alloc_surface(avctx, *surfaceCount);
  1002. if (res)
  1003. return res;
  1004. }
  1005. return 0;
  1006. }
  1007. static av_cold int nvenc_setup_extradata(AVCodecContext *avctx)
  1008. {
  1009. NvencContext *ctx = avctx->priv_data;
  1010. NvencDynLoadFunctions *dl_fn = &ctx->nvenc_dload_funcs;
  1011. NV_ENCODE_API_FUNCTION_LIST *p_nvenc = &dl_fn->nvenc_funcs;
  1012. NVENCSTATUS nv_status;
  1013. uint32_t outSize = 0;
  1014. char tmpHeader[256];
  1015. NV_ENC_SEQUENCE_PARAM_PAYLOAD payload = { 0 };
  1016. payload.version = NV_ENC_SEQUENCE_PARAM_PAYLOAD_VER;
  1017. payload.spsppsBuffer = tmpHeader;
  1018. payload.inBufferSize = sizeof(tmpHeader);
  1019. payload.outSPSPPSPayloadSize = &outSize;
  1020. nv_status = p_nvenc->nvEncGetSequenceParams(ctx->nvencoder, &payload);
  1021. if (nv_status != NV_ENC_SUCCESS) {
  1022. return nvenc_print_error(avctx, nv_status, "GetSequenceParams failed");
  1023. }
  1024. avctx->extradata_size = outSize;
  1025. avctx->extradata = av_mallocz(outSize + AV_INPUT_BUFFER_PADDING_SIZE);
  1026. if (!avctx->extradata) {
  1027. return AVERROR(ENOMEM);
  1028. }
  1029. memcpy(avctx->extradata, tmpHeader, outSize);
  1030. return 0;
  1031. }
  1032. static av_cold int nvenc_encode_init(AVCodecContext *avctx)
  1033. {
  1034. NvencContext *ctx = avctx->priv_data;
  1035. NvencDynLoadFunctions *dl_fn = &ctx->nvenc_dload_funcs;
  1036. NV_ENCODE_API_FUNCTION_LIST *p_nvenc = &dl_fn->nvenc_funcs;
  1037. int res;
  1038. int i;
  1039. int surfaceCount = 0;
  1040. if (!nvenc_dyload_nvenc(avctx))
  1041. return AVERROR_EXTERNAL;
  1042. res = nvenc_setup_device(avctx);
  1043. if (res)
  1044. goto error;
  1045. res = nvenc_open_session(avctx);
  1046. if (res)
  1047. goto error;
  1048. res = nvenc_setup_encoder(avctx);
  1049. if (res)
  1050. goto error;
  1051. res = nvenc_setup_surfaces(avctx, &surfaceCount);
  1052. if (res)
  1053. goto error;
  1054. if (avctx->flags & AV_CODEC_FLAG_GLOBAL_HEADER) {
  1055. res = nvenc_setup_extradata(avctx);
  1056. if (res)
  1057. goto error;
  1058. }
  1059. return 0;
  1060. error:
  1061. av_fifo_freep(&ctx->timestamp_list);
  1062. av_fifo_freep(&ctx->output_surface_ready_queue);
  1063. av_fifo_freep(&ctx->output_surface_queue);
  1064. for (i = 0; i < surfaceCount; ++i) {
  1065. if (avctx->pix_fmt != AV_PIX_FMT_CUDA)
  1066. p_nvenc->nvEncDestroyInputBuffer(ctx->nvencoder, ctx->surfaces[i].input_surface);
  1067. av_frame_free(&ctx->surfaces[i].in_ref);
  1068. p_nvenc->nvEncDestroyBitstreamBuffer(ctx->nvencoder, ctx->surfaces[i].output_surface);
  1069. }
  1070. av_freep(&ctx->surfaces);
  1071. if (ctx->nvencoder)
  1072. p_nvenc->nvEncDestroyEncoder(ctx->nvencoder);
  1073. ctx->nvencoder = NULL;
  1074. if (ctx->cu_context_internal)
  1075. dl_fn->cu_ctx_destroy(ctx->cu_context_internal);
  1076. ctx->cu_context = ctx->cu_context_internal = NULL;
  1077. nvenc_unload_nvenc(avctx);
  1078. return res;
  1079. }
  1080. static av_cold int nvenc_encode_close(AVCodecContext *avctx)
  1081. {
  1082. NvencContext *ctx = avctx->priv_data;
  1083. NvencDynLoadFunctions *dl_fn = &ctx->nvenc_dload_funcs;
  1084. NV_ENCODE_API_FUNCTION_LIST *p_nvenc = &dl_fn->nvenc_funcs;
  1085. int i;
  1086. av_fifo_freep(&ctx->timestamp_list);
  1087. av_fifo_freep(&ctx->output_surface_ready_queue);
  1088. av_fifo_freep(&ctx->output_surface_queue);
  1089. if (avctx->pix_fmt == AV_PIX_FMT_CUDA) {
  1090. for (i = 0; i < ctx->max_surface_count; ++i) {
  1091. if (ctx->surfaces[i].input_surface) {
  1092. p_nvenc->nvEncUnmapInputResource(ctx->nvencoder, ctx->surfaces[i].in_map.mappedResource);
  1093. }
  1094. }
  1095. for (i = 0; i < ctx->nb_registered_frames; i++) {
  1096. if (ctx->registered_frames[i].regptr)
  1097. p_nvenc->nvEncUnregisterResource(ctx->nvencoder, ctx->registered_frames[i].regptr);
  1098. }
  1099. ctx->nb_registered_frames = 0;
  1100. }
  1101. for (i = 0; i < ctx->max_surface_count; ++i) {
  1102. if (avctx->pix_fmt != AV_PIX_FMT_CUDA)
  1103. p_nvenc->nvEncDestroyInputBuffer(ctx->nvencoder, ctx->surfaces[i].input_surface);
  1104. av_frame_free(&ctx->surfaces[i].in_ref);
  1105. p_nvenc->nvEncDestroyBitstreamBuffer(ctx->nvencoder, ctx->surfaces[i].output_surface);
  1106. }
  1107. av_freep(&ctx->surfaces);
  1108. ctx->max_surface_count = 0;
  1109. p_nvenc->nvEncDestroyEncoder(ctx->nvencoder);
  1110. ctx->nvencoder = NULL;
  1111. if (ctx->cu_context_internal)
  1112. dl_fn->cu_ctx_destroy(ctx->cu_context_internal);
  1113. ctx->cu_context = ctx->cu_context_internal = NULL;
  1114. nvenc_unload_nvenc(avctx);
  1115. return 0;
  1116. }
  1117. static NvencSurface *get_free_frame(NvencContext *ctx)
  1118. {
  1119. int i;
  1120. for (i = 0; i < ctx->max_surface_count; ++i) {
  1121. if (!ctx->surfaces[i].lockCount) {
  1122. ctx->surfaces[i].lockCount = 1;
  1123. return &ctx->surfaces[i];
  1124. }
  1125. }
  1126. return NULL;
  1127. }
  1128. static int nvenc_copy_frame(AVCodecContext *avctx, NvencSurface *inSurf,
  1129. NV_ENC_LOCK_INPUT_BUFFER *lockBufferParams, const AVFrame *frame)
  1130. {
  1131. uint8_t *buf = lockBufferParams->bufferDataPtr;
  1132. int off = inSurf->height * lockBufferParams->pitch;
  1133. if (frame->format == AV_PIX_FMT_YUV420P) {
  1134. av_image_copy_plane(buf, lockBufferParams->pitch,
  1135. frame->data[0], frame->linesize[0],
  1136. avctx->width, avctx->height);
  1137. buf += off;
  1138. av_image_copy_plane(buf, lockBufferParams->pitch >> 1,
  1139. frame->data[2], frame->linesize[2],
  1140. avctx->width >> 1, avctx->height >> 1);
  1141. buf += off >> 2;
  1142. av_image_copy_plane(buf, lockBufferParams->pitch >> 1,
  1143. frame->data[1], frame->linesize[1],
  1144. avctx->width >> 1, avctx->height >> 1);
  1145. } else if (frame->format == AV_PIX_FMT_NV12) {
  1146. av_image_copy_plane(buf, lockBufferParams->pitch,
  1147. frame->data[0], frame->linesize[0],
  1148. avctx->width, avctx->height);
  1149. buf += off;
  1150. av_image_copy_plane(buf, lockBufferParams->pitch,
  1151. frame->data[1], frame->linesize[1],
  1152. avctx->width, avctx->height >> 1);
  1153. } else if (frame->format == AV_PIX_FMT_YUV444P) {
  1154. av_image_copy_plane(buf, lockBufferParams->pitch,
  1155. frame->data[0], frame->linesize[0],
  1156. avctx->width, avctx->height);
  1157. buf += off;
  1158. av_image_copy_plane(buf, lockBufferParams->pitch,
  1159. frame->data[1], frame->linesize[1],
  1160. avctx->width, avctx->height);
  1161. buf += off;
  1162. av_image_copy_plane(buf, lockBufferParams->pitch,
  1163. frame->data[2], frame->linesize[2],
  1164. avctx->width, avctx->height);
  1165. } else {
  1166. av_log(avctx, AV_LOG_FATAL, "Invalid pixel format!\n");
  1167. return AVERROR(EINVAL);
  1168. }
  1169. return 0;
  1170. }
  1171. static int nvenc_find_free_reg_resource(AVCodecContext *avctx)
  1172. {
  1173. NvencContext *ctx = avctx->priv_data;
  1174. NvencDynLoadFunctions *dl_fn = &ctx->nvenc_dload_funcs;
  1175. NV_ENCODE_API_FUNCTION_LIST *p_nvenc = &dl_fn->nvenc_funcs;
  1176. int i;
  1177. if (ctx->nb_registered_frames == FF_ARRAY_ELEMS(ctx->registered_frames)) {
  1178. for (i = 0; i < ctx->nb_registered_frames; i++) {
  1179. if (!ctx->registered_frames[i].mapped) {
  1180. if (ctx->registered_frames[i].regptr) {
  1181. p_nvenc->nvEncUnregisterResource(ctx->nvencoder,
  1182. ctx->registered_frames[i].regptr);
  1183. ctx->registered_frames[i].regptr = NULL;
  1184. }
  1185. return i;
  1186. }
  1187. }
  1188. } else {
  1189. return ctx->nb_registered_frames++;
  1190. }
  1191. av_log(avctx, AV_LOG_ERROR, "Too many registered CUDA frames\n");
  1192. return AVERROR(ENOMEM);
  1193. }
  1194. static int nvenc_register_frame(AVCodecContext *avctx, const AVFrame *frame)
  1195. {
  1196. NvencContext *ctx = avctx->priv_data;
  1197. NvencDynLoadFunctions *dl_fn = &ctx->nvenc_dload_funcs;
  1198. NV_ENCODE_API_FUNCTION_LIST *p_nvenc = &dl_fn->nvenc_funcs;
  1199. AVHWFramesContext *frames_ctx = (AVHWFramesContext*)avctx->hw_frames_ctx->data;
  1200. NV_ENC_REGISTER_RESOURCE reg;
  1201. int i, idx, ret;
  1202. for (i = 0; i < ctx->nb_registered_frames; i++) {
  1203. if (ctx->registered_frames[i].ptr == (CUdeviceptr)frame->data[0])
  1204. return i;
  1205. }
  1206. idx = nvenc_find_free_reg_resource(avctx);
  1207. if (idx < 0)
  1208. return idx;
  1209. reg.version = NV_ENC_REGISTER_RESOURCE_VER;
  1210. reg.resourceType = NV_ENC_INPUT_RESOURCE_TYPE_CUDADEVICEPTR;
  1211. reg.width = frames_ctx->width;
  1212. reg.height = frames_ctx->height;
  1213. reg.bufferFormat = ctx->surfaces[0].format;
  1214. reg.pitch = frame->linesize[0];
  1215. reg.resourceToRegister = frame->data[0];
  1216. ret = p_nvenc->nvEncRegisterResource(ctx->nvencoder, &reg);
  1217. if (ret != NV_ENC_SUCCESS) {
  1218. nvenc_print_error(avctx, ret, "Error registering an input resource");
  1219. return AVERROR_UNKNOWN;
  1220. }
  1221. ctx->registered_frames[idx].ptr = (CUdeviceptr)frame->data[0];
  1222. ctx->registered_frames[idx].regptr = reg.registeredResource;
  1223. return idx;
  1224. }
  1225. static int nvenc_upload_frame(AVCodecContext *avctx, const AVFrame *frame,
  1226. NvencSurface *nvenc_frame)
  1227. {
  1228. NvencContext *ctx = avctx->priv_data;
  1229. NvencDynLoadFunctions *dl_fn = &ctx->nvenc_dload_funcs;
  1230. NV_ENCODE_API_FUNCTION_LIST *p_nvenc = &dl_fn->nvenc_funcs;
  1231. int res;
  1232. NVENCSTATUS nv_status;
  1233. if (avctx->pix_fmt == AV_PIX_FMT_CUDA) {
  1234. int reg_idx = nvenc_register_frame(avctx, frame);
  1235. if (reg_idx < 0) {
  1236. av_log(avctx, AV_LOG_ERROR, "Could not register an input CUDA frame\n");
  1237. return reg_idx;
  1238. }
  1239. res = av_frame_ref(nvenc_frame->in_ref, frame);
  1240. if (res < 0)
  1241. return res;
  1242. nvenc_frame->in_map.version = NV_ENC_MAP_INPUT_RESOURCE_VER;
  1243. nvenc_frame->in_map.registeredResource = ctx->registered_frames[reg_idx].regptr;
  1244. nv_status = p_nvenc->nvEncMapInputResource(ctx->nvencoder, &nvenc_frame->in_map);
  1245. if (nv_status != NV_ENC_SUCCESS) {
  1246. av_frame_unref(nvenc_frame->in_ref);
  1247. return nvenc_print_error(avctx, nv_status, "Error mapping an input resource");
  1248. }
  1249. ctx->registered_frames[reg_idx].mapped = 1;
  1250. nvenc_frame->reg_idx = reg_idx;
  1251. nvenc_frame->input_surface = nvenc_frame->in_map.mappedResource;
  1252. return 0;
  1253. } else {
  1254. NV_ENC_LOCK_INPUT_BUFFER lockBufferParams = { 0 };
  1255. lockBufferParams.version = NV_ENC_LOCK_INPUT_BUFFER_VER;
  1256. lockBufferParams.inputBuffer = nvenc_frame->input_surface;
  1257. nv_status = p_nvenc->nvEncLockInputBuffer(ctx->nvencoder, &lockBufferParams);
  1258. if (nv_status != NV_ENC_SUCCESS) {
  1259. return nvenc_print_error(avctx, nv_status, "Failed locking nvenc input buffer");
  1260. }
  1261. res = nvenc_copy_frame(avctx, nvenc_frame, &lockBufferParams, frame);
  1262. nv_status = p_nvenc->nvEncUnlockInputBuffer(ctx->nvencoder, nvenc_frame->input_surface);
  1263. if (nv_status != NV_ENC_SUCCESS) {
  1264. return nvenc_print_error(avctx, nv_status, "Failed unlocking input buffer!");
  1265. }
  1266. return res;
  1267. }
  1268. }
  1269. static void nvenc_codec_specific_pic_params(AVCodecContext *avctx,
  1270. NV_ENC_PIC_PARAMS *params)
  1271. {
  1272. NvencContext *ctx = avctx->priv_data;
  1273. switch (avctx->codec->id) {
  1274. case AV_CODEC_ID_H264:
  1275. params->codecPicParams.h264PicParams.sliceMode = ctx->encode_config.encodeCodecConfig.h264Config.sliceMode;
  1276. params->codecPicParams.h264PicParams.sliceModeData = ctx->encode_config.encodeCodecConfig.h264Config.sliceModeData;
  1277. break;
  1278. case AV_CODEC_ID_H265:
  1279. params->codecPicParams.hevcPicParams.sliceMode = ctx->encode_config.encodeCodecConfig.hevcConfig.sliceMode;
  1280. params->codecPicParams.hevcPicParams.sliceModeData = ctx->encode_config.encodeCodecConfig.hevcConfig.sliceModeData;
  1281. break;
  1282. }
  1283. }
  1284. static int process_output_surface(AVCodecContext *avctx, AVPacket *pkt, NvencSurface *tmpoutsurf)
  1285. {
  1286. NvencContext *ctx = avctx->priv_data;
  1287. NvencDynLoadFunctions *dl_fn = &ctx->nvenc_dload_funcs;
  1288. NV_ENCODE_API_FUNCTION_LIST *p_nvenc = &dl_fn->nvenc_funcs;
  1289. uint32_t slice_mode_data;
  1290. uint32_t *slice_offsets;
  1291. NV_ENC_LOCK_BITSTREAM lock_params = { 0 };
  1292. NVENCSTATUS nv_status;
  1293. int res = 0;
  1294. enum AVPictureType pict_type;
  1295. switch (avctx->codec->id) {
  1296. case AV_CODEC_ID_H264:
  1297. slice_mode_data = ctx->encode_config.encodeCodecConfig.h264Config.sliceModeData;
  1298. break;
  1299. case AV_CODEC_ID_H265:
  1300. slice_mode_data = ctx->encode_config.encodeCodecConfig.hevcConfig.sliceModeData;
  1301. break;
  1302. default:
  1303. av_log(avctx, AV_LOG_ERROR, "Unknown codec name\n");
  1304. res = AVERROR(EINVAL);
  1305. goto error;
  1306. }
  1307. slice_offsets = av_mallocz(slice_mode_data * sizeof(*slice_offsets));
  1308. if (!slice_offsets)
  1309. return AVERROR(ENOMEM);
  1310. lock_params.version = NV_ENC_LOCK_BITSTREAM_VER;
  1311. lock_params.doNotWait = 0;
  1312. lock_params.outputBitstream = tmpoutsurf->output_surface;
  1313. lock_params.sliceOffsets = slice_offsets;
  1314. nv_status = p_nvenc->nvEncLockBitstream(ctx->nvencoder, &lock_params);
  1315. if (nv_status != NV_ENC_SUCCESS) {
  1316. res = nvenc_print_error(avctx, nv_status, "Failed locking bitstream buffer");
  1317. goto error;
  1318. }
  1319. if (res = ff_alloc_packet2(avctx, pkt, lock_params.bitstreamSizeInBytes,0)) {
  1320. p_nvenc->nvEncUnlockBitstream(ctx->nvencoder, tmpoutsurf->output_surface);
  1321. goto error;
  1322. }
  1323. memcpy(pkt->data, lock_params.bitstreamBufferPtr, lock_params.bitstreamSizeInBytes);
  1324. nv_status = p_nvenc->nvEncUnlockBitstream(ctx->nvencoder, tmpoutsurf->output_surface);
  1325. if (nv_status != NV_ENC_SUCCESS)
  1326. nvenc_print_error(avctx, nv_status, "Failed unlocking bitstream buffer, expect the gates of mordor to open");
  1327. if (avctx->pix_fmt == AV_PIX_FMT_CUDA) {
  1328. p_nvenc->nvEncUnmapInputResource(ctx->nvencoder, tmpoutsurf->in_map.mappedResource);
  1329. av_frame_unref(tmpoutsurf->in_ref);
  1330. ctx->registered_frames[tmpoutsurf->reg_idx].mapped = 0;
  1331. tmpoutsurf->input_surface = NULL;
  1332. }
  1333. switch (lock_params.pictureType) {
  1334. case NV_ENC_PIC_TYPE_IDR:
  1335. pkt->flags |= AV_PKT_FLAG_KEY;
  1336. case NV_ENC_PIC_TYPE_I:
  1337. pict_type = AV_PICTURE_TYPE_I;
  1338. break;
  1339. case NV_ENC_PIC_TYPE_P:
  1340. pict_type = AV_PICTURE_TYPE_P;
  1341. break;
  1342. case NV_ENC_PIC_TYPE_B:
  1343. pict_type = AV_PICTURE_TYPE_B;
  1344. break;
  1345. case NV_ENC_PIC_TYPE_BI:
  1346. pict_type = AV_PICTURE_TYPE_BI;
  1347. break;
  1348. default:
  1349. av_log(avctx, AV_LOG_ERROR, "Unknown picture type encountered, expect the output to be broken.\n");
  1350. av_log(avctx, AV_LOG_ERROR, "Please report this error and include as much information on how to reproduce it as possible.\n");
  1351. res = AVERROR_EXTERNAL;
  1352. goto error;
  1353. }
  1354. #if FF_API_CODED_FRAME
  1355. FF_DISABLE_DEPRECATION_WARNINGS
  1356. avctx->coded_frame->pict_type = pict_type;
  1357. FF_ENABLE_DEPRECATION_WARNINGS
  1358. #endif
  1359. ff_side_data_set_encoder_stats(pkt,
  1360. (lock_params.frameAvgQP - 1) * FF_QP2LAMBDA, NULL, 0, pict_type);
  1361. pkt->pts = lock_params.outputTimeStamp;
  1362. pkt->dts = timestamp_queue_dequeue(ctx->timestamp_list);
  1363. /* when there're b frame(s), set dts offset */
  1364. if (ctx->encode_config.frameIntervalP >= 2)
  1365. pkt->dts -= 1;
  1366. if (pkt->dts > pkt->pts)
  1367. pkt->dts = pkt->pts;
  1368. if (ctx->last_dts != AV_NOPTS_VALUE && pkt->dts <= ctx->last_dts)
  1369. pkt->dts = ctx->last_dts + 1;
  1370. ctx->last_dts = pkt->dts;
  1371. av_free(slice_offsets);
  1372. return 0;
  1373. error:
  1374. av_free(slice_offsets);
  1375. timestamp_queue_dequeue(ctx->timestamp_list);
  1376. return res;
  1377. }
  1378. static int output_ready(NvencContext *ctx, int flush)
  1379. {
  1380. int nb_ready, nb_pending;
  1381. nb_ready = av_fifo_size(ctx->output_surface_ready_queue) / sizeof(NvencSurface*);
  1382. nb_pending = av_fifo_size(ctx->output_surface_queue) / sizeof(NvencSurface*);
  1383. return nb_ready > 0 && (flush || nb_ready + nb_pending >= ctx->buffer_delay);
  1384. }
  1385. static int nvenc_encode_frame(AVCodecContext *avctx, AVPacket *pkt,
  1386. const AVFrame *frame, int *got_packet)
  1387. {
  1388. NVENCSTATUS nv_status;
  1389. NvencSurface *tmpoutsurf, *inSurf;
  1390. int res;
  1391. NvencContext *ctx = avctx->priv_data;
  1392. NvencDynLoadFunctions *dl_fn = &ctx->nvenc_dload_funcs;
  1393. NV_ENCODE_API_FUNCTION_LIST *p_nvenc = &dl_fn->nvenc_funcs;
  1394. NV_ENC_PIC_PARAMS pic_params = { 0 };
  1395. pic_params.version = NV_ENC_PIC_PARAMS_VER;
  1396. if (frame) {
  1397. inSurf = get_free_frame(ctx);
  1398. av_assert0(inSurf);
  1399. res = nvenc_upload_frame(avctx, frame, inSurf);
  1400. if (res) {
  1401. inSurf->lockCount = 0;
  1402. return res;
  1403. }
  1404. pic_params.inputBuffer = inSurf->input_surface;
  1405. pic_params.bufferFmt = inSurf->format;
  1406. pic_params.inputWidth = avctx->width;
  1407. pic_params.inputHeight = avctx->height;
  1408. pic_params.outputBitstream = inSurf->output_surface;
  1409. pic_params.completionEvent = 0;
  1410. if (avctx->flags & AV_CODEC_FLAG_INTERLACED_DCT) {
  1411. if (frame->top_field_first) {
  1412. pic_params.pictureStruct = NV_ENC_PIC_STRUCT_FIELD_TOP_BOTTOM;
  1413. } else {
  1414. pic_params.pictureStruct = NV_ENC_PIC_STRUCT_FIELD_BOTTOM_TOP;
  1415. }
  1416. } else {
  1417. pic_params.pictureStruct = NV_ENC_PIC_STRUCT_FRAME;
  1418. }
  1419. pic_params.encodePicFlags = 0;
  1420. pic_params.inputTimeStamp = frame->pts;
  1421. pic_params.inputDuration = 0;
  1422. nvenc_codec_specific_pic_params(avctx, &pic_params);
  1423. timestamp_queue_enqueue(ctx->timestamp_list, frame->pts);
  1424. } else {
  1425. pic_params.encodePicFlags = NV_ENC_PIC_FLAG_EOS;
  1426. }
  1427. nv_status = p_nvenc->nvEncEncodePicture(ctx->nvencoder, &pic_params);
  1428. if (frame && nv_status == NV_ENC_ERR_NEED_MORE_INPUT)
  1429. av_fifo_generic_write(ctx->output_surface_queue, &inSurf, sizeof(inSurf), NULL);
  1430. if (nv_status != NV_ENC_SUCCESS && nv_status != NV_ENC_ERR_NEED_MORE_INPUT) {
  1431. return nvenc_print_error(avctx, nv_status, "EncodePicture failed!");
  1432. }
  1433. if (nv_status != NV_ENC_ERR_NEED_MORE_INPUT) {
  1434. while (av_fifo_size(ctx->output_surface_queue) > 0) {
  1435. av_fifo_generic_read(ctx->output_surface_queue, &tmpoutsurf, sizeof(tmpoutsurf), NULL);
  1436. av_fifo_generic_write(ctx->output_surface_ready_queue, &tmpoutsurf, sizeof(tmpoutsurf), NULL);
  1437. }
  1438. if (frame)
  1439. av_fifo_generic_write(ctx->output_surface_ready_queue, &inSurf, sizeof(inSurf), NULL);
  1440. }
  1441. if (output_ready(ctx, !frame)) {
  1442. av_fifo_generic_read(ctx->output_surface_ready_queue, &tmpoutsurf, sizeof(tmpoutsurf), NULL);
  1443. res = process_output_surface(avctx, pkt, tmpoutsurf);
  1444. if (res)
  1445. return res;
  1446. av_assert0(tmpoutsurf->lockCount);
  1447. tmpoutsurf->lockCount--;
  1448. *got_packet = 1;
  1449. } else {
  1450. *got_packet = 0;
  1451. }
  1452. return 0;
  1453. }
  1454. static const enum AVPixelFormat pix_fmts_nvenc[] = {
  1455. AV_PIX_FMT_YUV420P,
  1456. AV_PIX_FMT_NV12,
  1457. AV_PIX_FMT_YUV444P,
  1458. #if CONFIG_CUDA
  1459. AV_PIX_FMT_CUDA,
  1460. #endif
  1461. AV_PIX_FMT_NONE
  1462. };
  1463. #define OFFSET(x) offsetof(NvencContext, x)
  1464. #define VE AV_OPT_FLAG_VIDEO_PARAM | AV_OPT_FLAG_ENCODING_PARAM
  1465. static const AVOption options[] = {
  1466. { "preset", "Set the encoding preset (one of slow = hq 2pass, medium = hq, fast = hp, hq, hp, bd, ll, llhq, llhp, lossless, losslesshp, default)", OFFSET(preset), AV_OPT_TYPE_STRING, { .str = "medium" }, 0, 0, VE },
  1467. { "profile", "Set the encoding profile (high, main, baseline or high444p)", OFFSET(profile), AV_OPT_TYPE_STRING, { .str = "main" }, 0, 0, VE },
  1468. { "level", "Set the encoding level restriction (auto, 1.0, 1.0b, 1.1, 1.2, ..., 4.2, 5.0, 5.1)", OFFSET(level), AV_OPT_TYPE_STRING, { .str = "auto" }, 0, 0, VE },
  1469. { "tier", "Set the encoding tier (main or high)", OFFSET(tier), AV_OPT_TYPE_STRING, { .str = "main" }, 0, 0, VE },
  1470. { "cbr", "Use cbr encoding mode", OFFSET(cbr), AV_OPT_TYPE_BOOL, { .i64 = 0 }, 0, 1, VE },
  1471. { "2pass", "Use 2pass encoding mode", OFFSET(twopass), AV_OPT_TYPE_BOOL, { .i64 = -1 }, -1, 1, VE },
  1472. { "gpu", "Selects which NVENC capable GPU to use. First GPU is 0, second is 1, and so on.", OFFSET(gpu), AV_OPT_TYPE_INT, { .i64 = 0 }, 0, INT_MAX, VE },
  1473. { "delay", "Delays frame output by the given amount of frames.", OFFSET(buffer_delay), AV_OPT_TYPE_INT, { .i64 = INT_MAX }, 0, INT_MAX, VE },
  1474. { NULL }
  1475. };
  1476. static const AVCodecDefault nvenc_defaults[] = {
  1477. { "b", "2M" },
  1478. { "qmin", "-1" },
  1479. { "qmax", "-1" },
  1480. { "qdiff", "-1" },
  1481. { "qblur", "-1" },
  1482. { "qcomp", "-1" },
  1483. { "g", "250" },
  1484. { "bf", "0" },
  1485. { NULL },
  1486. };
  1487. #if CONFIG_NVENC_ENCODER
  1488. static const AVClass nvenc_class = {
  1489. .class_name = "nvenc",
  1490. .item_name = av_default_item_name,
  1491. .option = options,
  1492. .version = LIBAVUTIL_VERSION_INT,
  1493. };
  1494. AVCodec ff_nvenc_encoder = {
  1495. .name = "nvenc",
  1496. .long_name = NULL_IF_CONFIG_SMALL("NVIDIA NVENC h264 encoder"),
  1497. .type = AVMEDIA_TYPE_VIDEO,
  1498. .id = AV_CODEC_ID_H264,
  1499. .priv_data_size = sizeof(NvencContext),
  1500. .init = nvenc_encode_init,
  1501. .encode2 = nvenc_encode_frame,
  1502. .close = nvenc_encode_close,
  1503. .capabilities = AV_CODEC_CAP_DELAY,
  1504. .priv_class = &nvenc_class,
  1505. .defaults = nvenc_defaults,
  1506. .pix_fmts = pix_fmts_nvenc,
  1507. };
  1508. #endif
  1509. /* Add an alias for nvenc_h264 */
  1510. #if CONFIG_NVENC_H264_ENCODER
  1511. static const AVClass nvenc_h264_class = {
  1512. .class_name = "nvenc_h264",
  1513. .item_name = av_default_item_name,
  1514. .option = options,
  1515. .version = LIBAVUTIL_VERSION_INT,
  1516. };
  1517. AVCodec ff_nvenc_h264_encoder = {
  1518. .name = "nvenc_h264",
  1519. .long_name = NULL_IF_CONFIG_SMALL("NVIDIA NVENC h264 encoder"),
  1520. .type = AVMEDIA_TYPE_VIDEO,
  1521. .id = AV_CODEC_ID_H264,
  1522. .priv_data_size = sizeof(NvencContext),
  1523. .init = nvenc_encode_init,
  1524. .encode2 = nvenc_encode_frame,
  1525. .close = nvenc_encode_close,
  1526. .capabilities = AV_CODEC_CAP_DELAY,
  1527. .priv_class = &nvenc_h264_class,
  1528. .defaults = nvenc_defaults,
  1529. .pix_fmts = pix_fmts_nvenc,
  1530. };
  1531. #endif
  1532. #if CONFIG_NVENC_HEVC_ENCODER
  1533. static const AVClass nvenc_hevc_class = {
  1534. .class_name = "nvenc_hevc",
  1535. .item_name = av_default_item_name,
  1536. .option = options,
  1537. .version = LIBAVUTIL_VERSION_INT,
  1538. };
  1539. AVCodec ff_nvenc_hevc_encoder = {
  1540. .name = "nvenc_hevc",
  1541. .long_name = NULL_IF_CONFIG_SMALL("NVIDIA NVENC hevc encoder"),
  1542. .type = AVMEDIA_TYPE_VIDEO,
  1543. .id = AV_CODEC_ID_H265,
  1544. .priv_data_size = sizeof(NvencContext),
  1545. .init = nvenc_encode_init,
  1546. .encode2 = nvenc_encode_frame,
  1547. .close = nvenc_encode_close,
  1548. .capabilities = AV_CODEC_CAP_DELAY,
  1549. .priv_class = &nvenc_hevc_class,
  1550. .defaults = nvenc_defaults,
  1551. .pix_fmts = pix_fmts_nvenc,
  1552. };
  1553. #endif