You can not select more than 25 topics Topics must start with a letter or number, can include dashes ('-') and can be up to 35 characters long.

2876 lines
98KB

  1. /*
  2. * This file is part of FFmpeg.
  3. *
  4. * FFmpeg is free software; you can redistribute it and/or
  5. * modify it under the terms of the GNU Lesser General Public
  6. * License as published by the Free Software Foundation; either
  7. * version 2.1 of the License, or (at your option) any later version.
  8. *
  9. * FFmpeg is distributed in the hope that it will be useful,
  10. * but WITHOUT ANY WARRANTY; without even the implied warranty of
  11. * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
  12. * Lesser General Public License for more details.
  13. *
  14. * You should have received a copy of the GNU Lesser General Public
  15. * License along with FFmpeg; if not, write to the Free Software
  16. * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
  17. */
  18. #include "config.h"
  19. #include "pixdesc.h"
  20. #include "avstring.h"
  21. #include "imgutils.h"
  22. #include "hwcontext.h"
  23. #include "hwcontext_internal.h"
  24. #include "hwcontext_vulkan.h"
  25. #if CONFIG_LIBDRM
  26. #include <unistd.h>
  27. #include <xf86drm.h>
  28. #include <drm_fourcc.h>
  29. #include "hwcontext_drm.h"
  30. #if CONFIG_VAAPI
  31. #include <va/va_drmcommon.h>
  32. #include "hwcontext_vaapi.h"
  33. #endif
  34. #endif
  35. #if CONFIG_CUDA
  36. #include "hwcontext_cuda_internal.h"
  37. #include "cuda_check.h"
  38. #define CHECK_CU(x) FF_CUDA_CHECK_DL(cuda_cu, cu, x)
  39. #endif
  40. typedef struct VulkanExecCtx {
  41. VkCommandPool pool;
  42. VkCommandBuffer buf;
  43. VkQueue queue;
  44. VkFence fence;
  45. } VulkanExecCtx;
  46. typedef struct VulkanDevicePriv {
  47. /* Properties */
  48. VkPhysicalDeviceProperties props;
  49. VkPhysicalDeviceMemoryProperties mprops;
  50. /* Debug callback */
  51. VkDebugUtilsMessengerEXT debug_ctx;
  52. /* Image uploading */
  53. VulkanExecCtx cmd;
  54. /* Extensions */
  55. uint64_t extensions;
  56. /* Settings */
  57. int use_linear_images;
  58. /* Nvidia */
  59. int dev_is_nvidia;
  60. } VulkanDevicePriv;
  61. typedef struct VulkanFramesPriv {
  62. VulkanExecCtx cmd;
  63. } VulkanFramesPriv;
  64. typedef struct AVVkFrameInternal {
  65. #if CONFIG_CUDA
  66. /* Importing external memory into cuda is really expensive so we keep the
  67. * memory imported all the time */
  68. AVBufferRef *cuda_fc_ref; /* Need to keep it around for uninit */
  69. CUexternalMemory ext_mem[AV_NUM_DATA_POINTERS];
  70. CUmipmappedArray cu_mma[AV_NUM_DATA_POINTERS];
  71. CUarray cu_array[AV_NUM_DATA_POINTERS];
  72. CUexternalSemaphore cu_sem[AV_NUM_DATA_POINTERS];
  73. #endif
  74. } AVVkFrameInternal;
  75. #define VK_LOAD_PFN(inst, name) PFN_##name pfn_##name = (PFN_##name) \
  76. vkGetInstanceProcAddr(inst, #name)
  77. #define DEFAULT_USAGE_FLAGS (VK_IMAGE_USAGE_SAMPLED_BIT | \
  78. VK_IMAGE_USAGE_STORAGE_BIT | \
  79. VK_IMAGE_USAGE_TRANSFER_SRC_BIT | \
  80. VK_IMAGE_USAGE_TRANSFER_DST_BIT)
  81. #define ADD_VAL_TO_LIST(list, count, val) \
  82. do { \
  83. list = av_realloc_array(list, sizeof(*list), ++count); \
  84. if (!list) { \
  85. err = AVERROR(ENOMEM); \
  86. goto end; \
  87. } \
  88. list[count - 1] = val; \
  89. } while(0)
  90. static const struct {
  91. enum AVPixelFormat pixfmt;
  92. const VkFormat vkfmts[3];
  93. } vk_pixfmt_map[] = {
  94. { AV_PIX_FMT_GRAY8, { VK_FORMAT_R8_UNORM } },
  95. { AV_PIX_FMT_GRAY16, { VK_FORMAT_R16_UNORM } },
  96. { AV_PIX_FMT_GRAYF32, { VK_FORMAT_R32_SFLOAT } },
  97. { AV_PIX_FMT_NV12, { VK_FORMAT_R8_UNORM, VK_FORMAT_R8G8_UNORM } },
  98. { AV_PIX_FMT_P010, { VK_FORMAT_R16_UNORM, VK_FORMAT_R16G16_UNORM } },
  99. { AV_PIX_FMT_P016, { VK_FORMAT_R16_UNORM, VK_FORMAT_R16G16_UNORM } },
  100. { AV_PIX_FMT_YUV420P, { VK_FORMAT_R8_UNORM, VK_FORMAT_R8_UNORM, VK_FORMAT_R8_UNORM } },
  101. { AV_PIX_FMT_YUV422P, { VK_FORMAT_R8_UNORM, VK_FORMAT_R8_UNORM, VK_FORMAT_R8_UNORM } },
  102. { AV_PIX_FMT_YUV444P, { VK_FORMAT_R8_UNORM, VK_FORMAT_R8_UNORM, VK_FORMAT_R8_UNORM } },
  103. { AV_PIX_FMT_YUV420P16, { VK_FORMAT_R16_UNORM, VK_FORMAT_R16_UNORM, VK_FORMAT_R16_UNORM } },
  104. { AV_PIX_FMT_YUV422P16, { VK_FORMAT_R16_UNORM, VK_FORMAT_R16_UNORM, VK_FORMAT_R16_UNORM } },
  105. { AV_PIX_FMT_YUV444P16, { VK_FORMAT_R16_UNORM, VK_FORMAT_R16_UNORM, VK_FORMAT_R16_UNORM } },
  106. { AV_PIX_FMT_ABGR, { VK_FORMAT_A8B8G8R8_UNORM_PACK32 } },
  107. { AV_PIX_FMT_BGRA, { VK_FORMAT_B8G8R8A8_UNORM } },
  108. { AV_PIX_FMT_RGBA, { VK_FORMAT_R8G8B8A8_UNORM } },
  109. { AV_PIX_FMT_RGB24, { VK_FORMAT_R8G8B8_UNORM } },
  110. { AV_PIX_FMT_BGR24, { VK_FORMAT_B8G8R8_UNORM } },
  111. { AV_PIX_FMT_RGB48, { VK_FORMAT_R16G16B16_UNORM } },
  112. { AV_PIX_FMT_RGBA64, { VK_FORMAT_R16G16B16A16_UNORM } },
  113. { AV_PIX_FMT_RGB565, { VK_FORMAT_R5G6B5_UNORM_PACK16 } },
  114. { AV_PIX_FMT_BGR565, { VK_FORMAT_B5G6R5_UNORM_PACK16 } },
  115. { AV_PIX_FMT_BGR0, { VK_FORMAT_B8G8R8A8_UNORM } },
  116. { AV_PIX_FMT_0BGR, { VK_FORMAT_A8B8G8R8_UNORM_PACK32 } },
  117. { AV_PIX_FMT_RGB0, { VK_FORMAT_R8G8B8A8_UNORM } },
  118. { AV_PIX_FMT_GBRPF32, { VK_FORMAT_R32_SFLOAT, VK_FORMAT_R32_SFLOAT, VK_FORMAT_R32_SFLOAT } },
  119. };
  120. const VkFormat *av_vkfmt_from_pixfmt(enum AVPixelFormat p)
  121. {
  122. for (enum AVPixelFormat i = 0; i < FF_ARRAY_ELEMS(vk_pixfmt_map); i++)
  123. if (vk_pixfmt_map[i].pixfmt == p)
  124. return vk_pixfmt_map[i].vkfmts;
  125. return NULL;
  126. }
  127. static int pixfmt_is_supported(AVVulkanDeviceContext *hwctx, enum AVPixelFormat p,
  128. int linear)
  129. {
  130. const VkFormat *fmt = av_vkfmt_from_pixfmt(p);
  131. int planes = av_pix_fmt_count_planes(p);
  132. if (!fmt)
  133. return 0;
  134. for (int i = 0; i < planes; i++) {
  135. VkFormatFeatureFlags flags;
  136. VkFormatProperties2 prop = {
  137. .sType = VK_STRUCTURE_TYPE_FORMAT_PROPERTIES_2,
  138. };
  139. vkGetPhysicalDeviceFormatProperties2(hwctx->phys_dev, fmt[i], &prop);
  140. flags = linear ? prop.formatProperties.linearTilingFeatures :
  141. prop.formatProperties.optimalTilingFeatures;
  142. if (!(flags & DEFAULT_USAGE_FLAGS))
  143. return 0;
  144. }
  145. return 1;
  146. }
  147. enum VulkanExtensions {
  148. EXT_EXTERNAL_DMABUF_MEMORY = 1ULL << 0, /* VK_EXT_external_memory_dma_buf */
  149. EXT_DRM_MODIFIER_FLAGS = 1ULL << 1, /* VK_EXT_image_drm_format_modifier */
  150. EXT_EXTERNAL_FD_MEMORY = 1ULL << 2, /* VK_KHR_external_memory_fd */
  151. EXT_EXTERNAL_FD_SEM = 1ULL << 3, /* VK_KHR_external_semaphore_fd */
  152. EXT_OPTIONAL = 1ULL << 62,
  153. EXT_REQUIRED = 1ULL << 63,
  154. };
  155. typedef struct VulkanOptExtension {
  156. const char *name;
  157. uint64_t flag;
  158. } VulkanOptExtension;
  159. static const VulkanOptExtension optional_instance_exts[] = {
  160. /* For future use */
  161. };
  162. static const VulkanOptExtension optional_device_exts[] = {
  163. { VK_KHR_EXTERNAL_MEMORY_FD_EXTENSION_NAME, EXT_EXTERNAL_FD_MEMORY, },
  164. { VK_EXT_EXTERNAL_MEMORY_DMA_BUF_EXTENSION_NAME, EXT_EXTERNAL_DMABUF_MEMORY, },
  165. { VK_EXT_IMAGE_DRM_FORMAT_MODIFIER_EXTENSION_NAME, EXT_DRM_MODIFIER_FLAGS, },
  166. { VK_KHR_EXTERNAL_SEMAPHORE_FD_EXTENSION_NAME, EXT_EXTERNAL_FD_SEM, },
  167. };
  168. /* Converts return values to strings */
  169. static const char *vk_ret2str(VkResult res)
  170. {
  171. #define CASE(VAL) case VAL: return #VAL
  172. switch (res) {
  173. CASE(VK_SUCCESS);
  174. CASE(VK_NOT_READY);
  175. CASE(VK_TIMEOUT);
  176. CASE(VK_EVENT_SET);
  177. CASE(VK_EVENT_RESET);
  178. CASE(VK_INCOMPLETE);
  179. CASE(VK_ERROR_OUT_OF_HOST_MEMORY);
  180. CASE(VK_ERROR_OUT_OF_DEVICE_MEMORY);
  181. CASE(VK_ERROR_INITIALIZATION_FAILED);
  182. CASE(VK_ERROR_DEVICE_LOST);
  183. CASE(VK_ERROR_MEMORY_MAP_FAILED);
  184. CASE(VK_ERROR_LAYER_NOT_PRESENT);
  185. CASE(VK_ERROR_EXTENSION_NOT_PRESENT);
  186. CASE(VK_ERROR_FEATURE_NOT_PRESENT);
  187. CASE(VK_ERROR_INCOMPATIBLE_DRIVER);
  188. CASE(VK_ERROR_TOO_MANY_OBJECTS);
  189. CASE(VK_ERROR_FORMAT_NOT_SUPPORTED);
  190. CASE(VK_ERROR_FRAGMENTED_POOL);
  191. CASE(VK_ERROR_SURFACE_LOST_KHR);
  192. CASE(VK_ERROR_NATIVE_WINDOW_IN_USE_KHR);
  193. CASE(VK_SUBOPTIMAL_KHR);
  194. CASE(VK_ERROR_OUT_OF_DATE_KHR);
  195. CASE(VK_ERROR_INCOMPATIBLE_DISPLAY_KHR);
  196. CASE(VK_ERROR_VALIDATION_FAILED_EXT);
  197. CASE(VK_ERROR_INVALID_SHADER_NV);
  198. CASE(VK_ERROR_OUT_OF_POOL_MEMORY);
  199. CASE(VK_ERROR_INVALID_EXTERNAL_HANDLE);
  200. CASE(VK_ERROR_NOT_PERMITTED_EXT);
  201. CASE(VK_ERROR_INVALID_DRM_FORMAT_MODIFIER_PLANE_LAYOUT_EXT);
  202. CASE(VK_ERROR_INVALID_DEVICE_ADDRESS_EXT);
  203. CASE(VK_ERROR_FULL_SCREEN_EXCLUSIVE_MODE_LOST_EXT);
  204. default: return "Unknown error";
  205. }
  206. #undef CASE
  207. }
  208. static VkBool32 vk_dbg_callback(VkDebugUtilsMessageSeverityFlagBitsEXT severity,
  209. VkDebugUtilsMessageTypeFlagsEXT messageType,
  210. const VkDebugUtilsMessengerCallbackDataEXT *data,
  211. void *priv)
  212. {
  213. int l;
  214. AVHWDeviceContext *ctx = priv;
  215. switch (severity) {
  216. case VK_DEBUG_UTILS_MESSAGE_SEVERITY_VERBOSE_BIT_EXT: l = AV_LOG_VERBOSE; break;
  217. case VK_DEBUG_UTILS_MESSAGE_SEVERITY_INFO_BIT_EXT: l = AV_LOG_INFO; break;
  218. case VK_DEBUG_UTILS_MESSAGE_SEVERITY_WARNING_BIT_EXT: l = AV_LOG_WARNING; break;
  219. case VK_DEBUG_UTILS_MESSAGE_SEVERITY_ERROR_BIT_EXT: l = AV_LOG_ERROR; break;
  220. default: l = AV_LOG_DEBUG; break;
  221. }
  222. av_log(ctx, l, "%s\n", data->pMessage);
  223. for (int i = 0; i < data->cmdBufLabelCount; i++)
  224. av_log(ctx, l, "\t%i: %s\n", i, data->pCmdBufLabels[i].pLabelName);
  225. return 0;
  226. }
  227. static int check_extensions(AVHWDeviceContext *ctx, int dev,
  228. const char * const **dst, uint32_t *num, int debug)
  229. {
  230. const char *tstr;
  231. const char **extension_names = NULL;
  232. VulkanDevicePriv *p = ctx->internal->priv;
  233. AVVulkanDeviceContext *hwctx = ctx->hwctx;
  234. int err = 0, found, extensions_found = 0;
  235. const char *mod;
  236. int optional_exts_num;
  237. uint32_t sup_ext_count;
  238. VkExtensionProperties *sup_ext;
  239. const VulkanOptExtension *optional_exts;
  240. if (!dev) {
  241. mod = "instance";
  242. optional_exts = optional_instance_exts;
  243. optional_exts_num = FF_ARRAY_ELEMS(optional_instance_exts);
  244. vkEnumerateInstanceExtensionProperties(NULL, &sup_ext_count, NULL);
  245. sup_ext = av_malloc_array(sup_ext_count, sizeof(VkExtensionProperties));
  246. if (!sup_ext)
  247. return AVERROR(ENOMEM);
  248. vkEnumerateInstanceExtensionProperties(NULL, &sup_ext_count, sup_ext);
  249. } else {
  250. mod = "device";
  251. optional_exts = optional_device_exts;
  252. optional_exts_num = FF_ARRAY_ELEMS(optional_device_exts);
  253. vkEnumerateDeviceExtensionProperties(hwctx->phys_dev, NULL,
  254. &sup_ext_count, NULL);
  255. sup_ext = av_malloc_array(sup_ext_count, sizeof(VkExtensionProperties));
  256. if (!sup_ext)
  257. return AVERROR(ENOMEM);
  258. vkEnumerateDeviceExtensionProperties(hwctx->phys_dev, NULL,
  259. &sup_ext_count, sup_ext);
  260. }
  261. for (int i = 0; i < optional_exts_num; i++) {
  262. int req = optional_exts[i].flag & EXT_REQUIRED;
  263. tstr = optional_exts[i].name;
  264. found = 0;
  265. for (int j = 0; j < sup_ext_count; j++) {
  266. if (!strcmp(tstr, sup_ext[j].extensionName)) {
  267. found = 1;
  268. break;
  269. }
  270. }
  271. if (!found) {
  272. int lvl = req ? AV_LOG_ERROR : AV_LOG_VERBOSE;
  273. av_log(ctx, lvl, "Extension \"%s\" not found!\n", tstr);
  274. if (req) {
  275. err = AVERROR(EINVAL);
  276. goto end;
  277. }
  278. continue;
  279. }
  280. if (!req)
  281. p->extensions |= optional_exts[i].flag;
  282. av_log(ctx, AV_LOG_VERBOSE, "Using %s extension \"%s\"\n", mod, tstr);
  283. ADD_VAL_TO_LIST(extension_names, extensions_found, tstr);
  284. }
  285. if (debug && !dev) {
  286. tstr = VK_EXT_DEBUG_UTILS_EXTENSION_NAME;
  287. found = 0;
  288. for (int j = 0; j < sup_ext_count; j++) {
  289. if (!strcmp(tstr, sup_ext[j].extensionName)) {
  290. found = 1;
  291. break;
  292. }
  293. }
  294. if (found) {
  295. ADD_VAL_TO_LIST(extension_names, extensions_found, tstr);
  296. } else {
  297. av_log(ctx, AV_LOG_ERROR, "Debug extension \"%s\" not found!\n",
  298. tstr);
  299. err = AVERROR(EINVAL);
  300. goto end;
  301. }
  302. }
  303. *dst = extension_names;
  304. *num = extensions_found;
  305. end:
  306. av_free(sup_ext);
  307. return err;
  308. }
  309. /* Creates a VkInstance */
  310. static int create_instance(AVHWDeviceContext *ctx, AVDictionary *opts)
  311. {
  312. int err = 0;
  313. VkResult ret;
  314. VulkanDevicePriv *p = ctx->internal->priv;
  315. AVVulkanDeviceContext *hwctx = ctx->hwctx;
  316. AVDictionaryEntry *debug_opt = av_dict_get(opts, "debug", NULL, 0);
  317. const int debug_mode = debug_opt && strtol(debug_opt->value, NULL, 10);
  318. VkApplicationInfo application_info = {
  319. .sType = VK_STRUCTURE_TYPE_APPLICATION_INFO,
  320. .pEngineName = "libavutil",
  321. .apiVersion = VK_API_VERSION_1_1,
  322. .engineVersion = VK_MAKE_VERSION(LIBAVUTIL_VERSION_MAJOR,
  323. LIBAVUTIL_VERSION_MINOR,
  324. LIBAVUTIL_VERSION_MICRO),
  325. };
  326. VkInstanceCreateInfo inst_props = {
  327. .sType = VK_STRUCTURE_TYPE_INSTANCE_CREATE_INFO,
  328. .pApplicationInfo = &application_info,
  329. };
  330. /* Check for present/missing extensions */
  331. err = check_extensions(ctx, 0, &inst_props.ppEnabledExtensionNames,
  332. &inst_props.enabledExtensionCount, debug_mode);
  333. if (err < 0)
  334. return err;
  335. if (debug_mode) {
  336. static const char *layers[] = { "VK_LAYER_LUNARG_standard_validation" };
  337. inst_props.ppEnabledLayerNames = layers;
  338. inst_props.enabledLayerCount = FF_ARRAY_ELEMS(layers);
  339. }
  340. /* Try to create the instance */
  341. ret = vkCreateInstance(&inst_props, hwctx->alloc, &hwctx->inst);
  342. /* Free used memory */
  343. av_free((void *)inst_props.ppEnabledExtensionNames);
  344. /* Check for errors */
  345. if (ret != VK_SUCCESS) {
  346. av_log(ctx, AV_LOG_ERROR, "Instance creation failure: %s\n",
  347. vk_ret2str(ret));
  348. return AVERROR_EXTERNAL;
  349. }
  350. if (debug_mode) {
  351. VkDebugUtilsMessengerCreateInfoEXT dbg = {
  352. .sType = VK_STRUCTURE_TYPE_DEBUG_UTILS_MESSENGER_CREATE_INFO_EXT,
  353. .messageSeverity = VK_DEBUG_UTILS_MESSAGE_SEVERITY_VERBOSE_BIT_EXT |
  354. VK_DEBUG_UTILS_MESSAGE_SEVERITY_INFO_BIT_EXT |
  355. VK_DEBUG_UTILS_MESSAGE_SEVERITY_WARNING_BIT_EXT |
  356. VK_DEBUG_UTILS_MESSAGE_SEVERITY_ERROR_BIT_EXT,
  357. .messageType = VK_DEBUG_UTILS_MESSAGE_TYPE_GENERAL_BIT_EXT |
  358. VK_DEBUG_UTILS_MESSAGE_TYPE_VALIDATION_BIT_EXT |
  359. VK_DEBUG_UTILS_MESSAGE_TYPE_PERFORMANCE_BIT_EXT,
  360. .pfnUserCallback = vk_dbg_callback,
  361. .pUserData = ctx,
  362. };
  363. VK_LOAD_PFN(hwctx->inst, vkCreateDebugUtilsMessengerEXT);
  364. pfn_vkCreateDebugUtilsMessengerEXT(hwctx->inst, &dbg,
  365. hwctx->alloc, &p->debug_ctx);
  366. }
  367. return 0;
  368. }
  369. typedef struct VulkanDeviceSelection {
  370. uint8_t uuid[VK_UUID_SIZE]; /* Will use this first unless !has_uuid */
  371. int has_uuid;
  372. const char *name; /* Will use this second unless NULL */
  373. uint32_t pci_device; /* Will use this third unless 0x0 */
  374. uint32_t vendor_id; /* Last resort to find something deterministic */
  375. int index; /* Finally fall back to index */
  376. } VulkanDeviceSelection;
  377. static const char *vk_dev_type(enum VkPhysicalDeviceType type)
  378. {
  379. switch (type) {
  380. case VK_PHYSICAL_DEVICE_TYPE_INTEGRATED_GPU: return "integrated";
  381. case VK_PHYSICAL_DEVICE_TYPE_DISCRETE_GPU: return "discrete";
  382. case VK_PHYSICAL_DEVICE_TYPE_VIRTUAL_GPU: return "virtual";
  383. case VK_PHYSICAL_DEVICE_TYPE_CPU: return "software";
  384. default: return "unknown";
  385. }
  386. }
  387. /* Finds a device */
  388. static int find_device(AVHWDeviceContext *ctx, VulkanDeviceSelection *select)
  389. {
  390. int err = 0, choice = -1;
  391. uint32_t num;
  392. VkResult ret;
  393. VkPhysicalDevice *devices = NULL;
  394. VkPhysicalDeviceIDProperties *idp = NULL;
  395. VkPhysicalDeviceProperties2 *prop = NULL;
  396. VulkanDevicePriv *p = ctx->internal->priv;
  397. AVVulkanDeviceContext *hwctx = ctx->hwctx;
  398. ret = vkEnumeratePhysicalDevices(hwctx->inst, &num, NULL);
  399. if (ret != VK_SUCCESS || !num) {
  400. av_log(ctx, AV_LOG_ERROR, "No devices found: %s!\n", vk_ret2str(ret));
  401. return AVERROR(ENODEV);
  402. }
  403. devices = av_malloc_array(num, sizeof(VkPhysicalDevice));
  404. if (!devices)
  405. return AVERROR(ENOMEM);
  406. ret = vkEnumeratePhysicalDevices(hwctx->inst, &num, devices);
  407. if (ret != VK_SUCCESS) {
  408. av_log(ctx, AV_LOG_ERROR, "Failed enumerating devices: %s\n",
  409. vk_ret2str(ret));
  410. err = AVERROR(ENODEV);
  411. goto end;
  412. }
  413. prop = av_mallocz_array(num, sizeof(*prop));
  414. if (!prop) {
  415. err = AVERROR(ENOMEM);
  416. goto end;
  417. }
  418. idp = av_mallocz_array(num, sizeof(*idp));
  419. if (!idp) {
  420. err = AVERROR(ENOMEM);
  421. goto end;
  422. }
  423. av_log(ctx, AV_LOG_VERBOSE, "GPU listing:\n");
  424. for (int i = 0; i < num; i++) {
  425. idp[i].sType = VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_ID_PROPERTIES;
  426. prop[i].sType = VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_PROPERTIES_2;
  427. prop[i].pNext = &idp[i];
  428. vkGetPhysicalDeviceProperties2(devices[i], &prop[i]);
  429. av_log(ctx, AV_LOG_VERBOSE, " %d: %s (%s) (0x%x)\n", i,
  430. prop[i].properties.deviceName,
  431. vk_dev_type(prop[i].properties.deviceType),
  432. prop[i].properties.deviceID);
  433. }
  434. if (select->has_uuid) {
  435. for (int i = 0; i < num; i++) {
  436. if (!strncmp(idp[i].deviceUUID, select->uuid, VK_UUID_SIZE)) {
  437. choice = i;
  438. goto end;
  439. }
  440. }
  441. av_log(ctx, AV_LOG_ERROR, "Unable to find device by given UUID!\n");
  442. err = AVERROR(ENODEV);
  443. goto end;
  444. } else if (select->name) {
  445. av_log(ctx, AV_LOG_VERBOSE, "Requested device: %s\n", select->name);
  446. for (int i = 0; i < num; i++) {
  447. if (strstr(prop[i].properties.deviceName, select->name)) {
  448. choice = i;
  449. goto end;
  450. }
  451. }
  452. av_log(ctx, AV_LOG_ERROR, "Unable to find device \"%s\"!\n",
  453. select->name);
  454. err = AVERROR(ENODEV);
  455. goto end;
  456. } else if (select->pci_device) {
  457. av_log(ctx, AV_LOG_VERBOSE, "Requested device: 0x%x\n", select->pci_device);
  458. for (int i = 0; i < num; i++) {
  459. if (select->pci_device == prop[i].properties.deviceID) {
  460. choice = i;
  461. goto end;
  462. }
  463. }
  464. av_log(ctx, AV_LOG_ERROR, "Unable to find device with PCI ID 0x%x!\n",
  465. select->pci_device);
  466. err = AVERROR(EINVAL);
  467. goto end;
  468. } else if (select->vendor_id) {
  469. av_log(ctx, AV_LOG_VERBOSE, "Requested vendor: 0x%x\n", select->vendor_id);
  470. for (int i = 0; i < num; i++) {
  471. if (select->vendor_id == prop[i].properties.vendorID) {
  472. choice = i;
  473. goto end;
  474. }
  475. }
  476. av_log(ctx, AV_LOG_ERROR, "Unable to find device with Vendor ID 0x%x!\n",
  477. select->vendor_id);
  478. err = AVERROR(ENODEV);
  479. goto end;
  480. } else {
  481. if (select->index < num) {
  482. choice = select->index;
  483. goto end;
  484. }
  485. av_log(ctx, AV_LOG_ERROR, "Unable to find device with index %i!\n",
  486. select->index);
  487. err = AVERROR(ENODEV);
  488. goto end;
  489. }
  490. end:
  491. if (choice > -1) {
  492. p->dev_is_nvidia = (prop[choice].properties.vendorID == 0x10de);
  493. hwctx->phys_dev = devices[choice];
  494. }
  495. av_free(devices);
  496. av_free(prop);
  497. av_free(idp);
  498. return err;
  499. }
  500. static int search_queue_families(AVHWDeviceContext *ctx, VkDeviceCreateInfo *cd)
  501. {
  502. uint32_t num;
  503. VkQueueFamilyProperties *qs = NULL;
  504. AVVulkanDeviceContext *hwctx = ctx->hwctx;
  505. int graph_index = -1, comp_index = -1, tx_index = -1;
  506. VkDeviceQueueCreateInfo *pc = (VkDeviceQueueCreateInfo *)cd->pQueueCreateInfos;
  507. /* First get the number of queue families */
  508. vkGetPhysicalDeviceQueueFamilyProperties(hwctx->phys_dev, &num, NULL);
  509. if (!num) {
  510. av_log(ctx, AV_LOG_ERROR, "Failed to get queues!\n");
  511. return AVERROR_EXTERNAL;
  512. }
  513. /* Then allocate memory */
  514. qs = av_malloc_array(num, sizeof(VkQueueFamilyProperties));
  515. if (!qs)
  516. return AVERROR(ENOMEM);
  517. /* Finally retrieve the queue families */
  518. vkGetPhysicalDeviceQueueFamilyProperties(hwctx->phys_dev, &num, qs);
  519. #define SEARCH_FLAGS(expr, out) \
  520. for (int i = 0; i < num; i++) { \
  521. const VkQueueFlagBits flags = qs[i].queueFlags; \
  522. if (expr) { \
  523. out = i; \
  524. break; \
  525. } \
  526. }
  527. SEARCH_FLAGS(flags & VK_QUEUE_GRAPHICS_BIT, graph_index)
  528. SEARCH_FLAGS((flags & VK_QUEUE_COMPUTE_BIT) && (i != graph_index),
  529. comp_index)
  530. SEARCH_FLAGS((flags & VK_QUEUE_TRANSFER_BIT) && (i != graph_index) &&
  531. (i != comp_index), tx_index)
  532. #undef SEARCH_FLAGS
  533. #define QF_FLAGS(flags) \
  534. ((flags) & VK_QUEUE_GRAPHICS_BIT ) ? "(graphics) " : "", \
  535. ((flags) & VK_QUEUE_COMPUTE_BIT ) ? "(compute) " : "", \
  536. ((flags) & VK_QUEUE_TRANSFER_BIT ) ? "(transfer) " : "", \
  537. ((flags) & VK_QUEUE_SPARSE_BINDING_BIT) ? "(sparse) " : ""
  538. av_log(ctx, AV_LOG_VERBOSE, "Using queue family %i for graphics, "
  539. "flags: %s%s%s%s\n", graph_index, QF_FLAGS(qs[graph_index].queueFlags));
  540. hwctx->queue_family_index = graph_index;
  541. hwctx->queue_family_tx_index = graph_index;
  542. hwctx->queue_family_comp_index = graph_index;
  543. pc[cd->queueCreateInfoCount++].queueFamilyIndex = graph_index;
  544. if (comp_index != -1) {
  545. av_log(ctx, AV_LOG_VERBOSE, "Using queue family %i for compute, "
  546. "flags: %s%s%s%s\n", comp_index, QF_FLAGS(qs[comp_index].queueFlags));
  547. hwctx->queue_family_tx_index = comp_index;
  548. hwctx->queue_family_comp_index = comp_index;
  549. pc[cd->queueCreateInfoCount++].queueFamilyIndex = comp_index;
  550. }
  551. if (tx_index != -1) {
  552. av_log(ctx, AV_LOG_VERBOSE, "Using queue family %i for transfers, "
  553. "flags: %s%s%s%s\n", tx_index, QF_FLAGS(qs[tx_index].queueFlags));
  554. hwctx->queue_family_tx_index = tx_index;
  555. pc[cd->queueCreateInfoCount++].queueFamilyIndex = tx_index;
  556. }
  557. #undef QF_FLAGS
  558. av_free(qs);
  559. return 0;
  560. }
  561. static int create_exec_ctx(AVHWDeviceContext *ctx, VulkanExecCtx *cmd,
  562. int queue_family_index)
  563. {
  564. VkResult ret;
  565. AVVulkanDeviceContext *hwctx = ctx->hwctx;
  566. VkCommandPoolCreateInfo cqueue_create = {
  567. .sType = VK_STRUCTURE_TYPE_COMMAND_POOL_CREATE_INFO,
  568. .flags = VK_COMMAND_POOL_CREATE_RESET_COMMAND_BUFFER_BIT,
  569. .queueFamilyIndex = queue_family_index,
  570. };
  571. VkCommandBufferAllocateInfo cbuf_create = {
  572. .sType = VK_STRUCTURE_TYPE_COMMAND_BUFFER_ALLOCATE_INFO,
  573. .level = VK_COMMAND_BUFFER_LEVEL_PRIMARY,
  574. .commandBufferCount = 1,
  575. };
  576. VkFenceCreateInfo fence_spawn = {
  577. .sType = VK_STRUCTURE_TYPE_FENCE_CREATE_INFO,
  578. };
  579. ret = vkCreateFence(hwctx->act_dev, &fence_spawn,
  580. hwctx->alloc, &cmd->fence);
  581. if (ret != VK_SUCCESS) {
  582. av_log(ctx, AV_LOG_ERROR, "Failed to create frame fence: %s\n",
  583. vk_ret2str(ret));
  584. return AVERROR_EXTERNAL;
  585. }
  586. ret = vkCreateCommandPool(hwctx->act_dev, &cqueue_create,
  587. hwctx->alloc, &cmd->pool);
  588. if (ret != VK_SUCCESS) {
  589. av_log(ctx, AV_LOG_ERROR, "Command pool creation failure: %s\n",
  590. vk_ret2str(ret));
  591. return AVERROR_EXTERNAL;
  592. }
  593. cbuf_create.commandPool = cmd->pool;
  594. ret = vkAllocateCommandBuffers(hwctx->act_dev, &cbuf_create, &cmd->buf);
  595. if (ret != VK_SUCCESS) {
  596. av_log(ctx, AV_LOG_ERROR, "Command buffer alloc failure: %s\n",
  597. vk_ret2str(ret));
  598. return AVERROR_EXTERNAL;
  599. }
  600. vkGetDeviceQueue(hwctx->act_dev, cqueue_create.queueFamilyIndex, 0,
  601. &cmd->queue);
  602. return 0;
  603. }
  604. static void free_exec_ctx(AVHWDeviceContext *ctx, VulkanExecCtx *cmd)
  605. {
  606. AVVulkanDeviceContext *hwctx = ctx->hwctx;
  607. if (cmd->fence)
  608. vkDestroyFence(hwctx->act_dev, cmd->fence, hwctx->alloc);
  609. if (cmd->buf)
  610. vkFreeCommandBuffers(hwctx->act_dev, cmd->pool, 1, &cmd->buf);
  611. if (cmd->pool)
  612. vkDestroyCommandPool(hwctx->act_dev, cmd->pool, hwctx->alloc);
  613. }
  614. static void vulkan_device_free(AVHWDeviceContext *ctx)
  615. {
  616. VulkanDevicePriv *p = ctx->internal->priv;
  617. AVVulkanDeviceContext *hwctx = ctx->hwctx;
  618. free_exec_ctx(ctx, &p->cmd);
  619. vkDestroyDevice(hwctx->act_dev, hwctx->alloc);
  620. if (p->debug_ctx) {
  621. VK_LOAD_PFN(hwctx->inst, vkDestroyDebugUtilsMessengerEXT);
  622. pfn_vkDestroyDebugUtilsMessengerEXT(hwctx->inst, p->debug_ctx,
  623. hwctx->alloc);
  624. }
  625. vkDestroyInstance(hwctx->inst, hwctx->alloc);
  626. }
  627. static int vulkan_device_create_internal(AVHWDeviceContext *ctx,
  628. VulkanDeviceSelection *dev_select,
  629. AVDictionary *opts, int flags)
  630. {
  631. int err = 0;
  632. VkResult ret;
  633. AVDictionaryEntry *opt_d;
  634. VulkanDevicePriv *p = ctx->internal->priv;
  635. AVVulkanDeviceContext *hwctx = ctx->hwctx;
  636. VkDeviceQueueCreateInfo queue_create_info[3] = {
  637. { .sType = VK_STRUCTURE_TYPE_DEVICE_QUEUE_CREATE_INFO,
  638. .pQueuePriorities = (float []){ 1.0f },
  639. .queueCount = 1, },
  640. { .sType = VK_STRUCTURE_TYPE_DEVICE_QUEUE_CREATE_INFO,
  641. .pQueuePriorities = (float []){ 1.0f },
  642. .queueCount = 1, },
  643. { .sType = VK_STRUCTURE_TYPE_DEVICE_QUEUE_CREATE_INFO,
  644. .pQueuePriorities = (float []){ 1.0f },
  645. .queueCount = 1, },
  646. };
  647. VkDeviceCreateInfo dev_info = {
  648. .sType = VK_STRUCTURE_TYPE_DEVICE_CREATE_INFO,
  649. .pQueueCreateInfos = queue_create_info,
  650. .queueCreateInfoCount = 0,
  651. };
  652. ctx->free = vulkan_device_free;
  653. /* Create an instance if not given one */
  654. if ((err = create_instance(ctx, opts)))
  655. goto end;
  656. /* Find a device (if not given one) */
  657. if ((err = find_device(ctx, dev_select)))
  658. goto end;
  659. vkGetPhysicalDeviceProperties(hwctx->phys_dev, &p->props);
  660. av_log(ctx, AV_LOG_VERBOSE, "Using device: %s\n", p->props.deviceName);
  661. av_log(ctx, AV_LOG_VERBOSE, "Alignments:\n");
  662. av_log(ctx, AV_LOG_VERBOSE, " optimalBufferCopyOffsetAlignment: %li\n",
  663. p->props.limits.optimalBufferCopyOffsetAlignment);
  664. av_log(ctx, AV_LOG_VERBOSE, " optimalBufferCopyRowPitchAlignment: %li\n",
  665. p->props.limits.optimalBufferCopyRowPitchAlignment);
  666. av_log(ctx, AV_LOG_VERBOSE, " minMemoryMapAlignment: %li\n",
  667. p->props.limits.minMemoryMapAlignment);
  668. /* Search queue family */
  669. if ((err = search_queue_families(ctx, &dev_info)))
  670. goto end;
  671. if ((err = check_extensions(ctx, 1, &dev_info.ppEnabledExtensionNames,
  672. &dev_info.enabledExtensionCount, 0)))
  673. goto end;
  674. ret = vkCreateDevice(hwctx->phys_dev, &dev_info, hwctx->alloc,
  675. &hwctx->act_dev);
  676. av_free((void *)dev_info.ppEnabledExtensionNames);
  677. if (ret != VK_SUCCESS) {
  678. av_log(ctx, AV_LOG_ERROR, "Device creation failure: %s\n",
  679. vk_ret2str(ret));
  680. err = AVERROR_EXTERNAL;
  681. goto end;
  682. }
  683. /* Tiled images setting, use them by default */
  684. opt_d = av_dict_get(opts, "linear_images", NULL, 0);
  685. if (opt_d)
  686. p->use_linear_images = strtol(opt_d->value, NULL, 10);
  687. end:
  688. return err;
  689. }
  690. static int vulkan_device_init(AVHWDeviceContext *ctx)
  691. {
  692. int err;
  693. uint32_t queue_num;
  694. AVVulkanDeviceContext *hwctx = ctx->hwctx;
  695. VulkanDevicePriv *p = ctx->internal->priv;
  696. vkGetPhysicalDeviceQueueFamilyProperties(hwctx->phys_dev, &queue_num, NULL);
  697. if (!queue_num) {
  698. av_log(ctx, AV_LOG_ERROR, "Failed to get queues!\n");
  699. return AVERROR_EXTERNAL;
  700. }
  701. #define CHECK_QUEUE(type, n) \
  702. if (n >= queue_num) { \
  703. av_log(ctx, AV_LOG_ERROR, "Invalid %s queue index %i (device has %i queues)!\n", \
  704. type, n, queue_num); \
  705. return AVERROR(EINVAL); \
  706. }
  707. CHECK_QUEUE("graphics", hwctx->queue_family_index)
  708. CHECK_QUEUE("upload", hwctx->queue_family_tx_index)
  709. CHECK_QUEUE("compute", hwctx->queue_family_comp_index)
  710. #undef CHECK_QUEUE
  711. /* Create exec context - if there's something invalid this will error out */
  712. err = create_exec_ctx(ctx, &p->cmd, hwctx->queue_family_tx_index);
  713. if (err)
  714. return err;
  715. /* Get device capabilities */
  716. vkGetPhysicalDeviceMemoryProperties(hwctx->phys_dev, &p->mprops);
  717. return 0;
  718. }
  719. static int vulkan_device_create(AVHWDeviceContext *ctx, const char *device,
  720. AVDictionary *opts, int flags)
  721. {
  722. VulkanDeviceSelection dev_select = { 0 };
  723. if (device && device[0]) {
  724. char *end = NULL;
  725. dev_select.index = strtol(device, &end, 10);
  726. if (end == device) {
  727. dev_select.index = 0;
  728. dev_select.name = device;
  729. }
  730. }
  731. return vulkan_device_create_internal(ctx, &dev_select, opts, flags);
  732. }
  733. static int vulkan_device_derive(AVHWDeviceContext *ctx,
  734. AVHWDeviceContext *src_ctx, int flags)
  735. {
  736. av_unused VulkanDeviceSelection dev_select = { 0 };
  737. /* If there's only one device on the system, then even if its not covered
  738. * by the following checks (e.g. non-PCIe ARM GPU), having an empty
  739. * dev_select will mean it'll get picked. */
  740. switch(src_ctx->type) {
  741. #if CONFIG_LIBDRM
  742. #if CONFIG_VAAPI
  743. case AV_HWDEVICE_TYPE_VAAPI: {
  744. AVVAAPIDeviceContext *src_hwctx = src_ctx->hwctx;
  745. const char *vendor = vaQueryVendorString(src_hwctx->display);
  746. if (!vendor) {
  747. av_log(ctx, AV_LOG_ERROR, "Unable to get device info from VAAPI!\n");
  748. return AVERROR_EXTERNAL;
  749. }
  750. if (strstr(vendor, "Intel"))
  751. dev_select.vendor_id = 0x8086;
  752. if (strstr(vendor, "AMD"))
  753. dev_select.vendor_id = 0x1002;
  754. return vulkan_device_create_internal(ctx, &dev_select, NULL, flags);
  755. }
  756. #endif
  757. case AV_HWDEVICE_TYPE_DRM: {
  758. AVDRMDeviceContext *src_hwctx = src_ctx->hwctx;
  759. drmDevice *drm_dev_info;
  760. int err = drmGetDevice(src_hwctx->fd, &drm_dev_info);
  761. if (err) {
  762. av_log(ctx, AV_LOG_ERROR, "Unable to get device info from DRM fd!\n");
  763. return AVERROR_EXTERNAL;
  764. }
  765. if (drm_dev_info->bustype == DRM_BUS_PCI)
  766. dev_select.pci_device = drm_dev_info->deviceinfo.pci->device_id;
  767. drmFreeDevice(&drm_dev_info);
  768. return vulkan_device_create_internal(ctx, &dev_select, NULL, flags);
  769. }
  770. #endif
  771. #if CONFIG_CUDA
  772. case AV_HWDEVICE_TYPE_CUDA: {
  773. AVHWDeviceContext *cuda_cu = src_ctx;
  774. AVCUDADeviceContext *src_hwctx = src_ctx->hwctx;
  775. AVCUDADeviceContextInternal *cu_internal = src_hwctx->internal;
  776. CudaFunctions *cu = cu_internal->cuda_dl;
  777. int ret = CHECK_CU(cu->cuDeviceGetUuid((CUuuid *)&dev_select.uuid,
  778. cu_internal->cuda_device));
  779. if (ret < 0) {
  780. av_log(ctx, AV_LOG_ERROR, "Unable to get UUID from CUDA!\n");
  781. return AVERROR_EXTERNAL;
  782. }
  783. dev_select.has_uuid = 1;
  784. return vulkan_device_create_internal(ctx, &dev_select, NULL, flags);
  785. }
  786. #endif
  787. default:
  788. return AVERROR(ENOSYS);
  789. }
  790. }
  791. static int vulkan_frames_get_constraints(AVHWDeviceContext *ctx,
  792. const void *hwconfig,
  793. AVHWFramesConstraints *constraints)
  794. {
  795. int count = 0;
  796. AVVulkanDeviceContext *hwctx = ctx->hwctx;
  797. VulkanDevicePriv *p = ctx->internal->priv;
  798. for (enum AVPixelFormat i = 0; i < AV_PIX_FMT_NB; i++)
  799. count += pixfmt_is_supported(hwctx, i, p->use_linear_images);
  800. #if CONFIG_CUDA
  801. if (p->dev_is_nvidia)
  802. count++;
  803. #endif
  804. constraints->valid_sw_formats = av_malloc_array(count + 1,
  805. sizeof(enum AVPixelFormat));
  806. if (!constraints->valid_sw_formats)
  807. return AVERROR(ENOMEM);
  808. count = 0;
  809. for (enum AVPixelFormat i = 0; i < AV_PIX_FMT_NB; i++)
  810. if (pixfmt_is_supported(hwctx, i, p->use_linear_images))
  811. constraints->valid_sw_formats[count++] = i;
  812. #if CONFIG_CUDA
  813. if (p->dev_is_nvidia)
  814. constraints->valid_sw_formats[count++] = AV_PIX_FMT_CUDA;
  815. #endif
  816. constraints->valid_sw_formats[count++] = AV_PIX_FMT_NONE;
  817. constraints->min_width = 0;
  818. constraints->min_height = 0;
  819. constraints->max_width = p->props.limits.maxImageDimension2D;
  820. constraints->max_height = p->props.limits.maxImageDimension2D;
  821. constraints->valid_hw_formats = av_malloc_array(2, sizeof(enum AVPixelFormat));
  822. if (!constraints->valid_hw_formats)
  823. return AVERROR(ENOMEM);
  824. constraints->valid_hw_formats[0] = AV_PIX_FMT_VULKAN;
  825. constraints->valid_hw_formats[1] = AV_PIX_FMT_NONE;
  826. return 0;
  827. }
  828. static int alloc_mem(AVHWDeviceContext *ctx, VkMemoryRequirements *req,
  829. VkMemoryPropertyFlagBits req_flags, void *alloc_extension,
  830. VkMemoryPropertyFlagBits *mem_flags, VkDeviceMemory *mem)
  831. {
  832. VkResult ret;
  833. int index = -1;
  834. VulkanDevicePriv *p = ctx->internal->priv;
  835. AVVulkanDeviceContext *dev_hwctx = ctx->hwctx;
  836. VkMemoryAllocateInfo alloc_info = {
  837. .sType = VK_STRUCTURE_TYPE_MEMORY_ALLOCATE_INFO,
  838. .pNext = alloc_extension,
  839. };
  840. /* Align if we need to */
  841. if (req_flags & VK_MEMORY_PROPERTY_HOST_VISIBLE_BIT)
  842. req->size = FFALIGN(req->size, p->props.limits.minMemoryMapAlignment);
  843. alloc_info.allocationSize = req->size;
  844. /* The vulkan spec requires memory types to be sorted in the "optimal"
  845. * order, so the first matching type we find will be the best/fastest one */
  846. for (int i = 0; i < p->mprops.memoryTypeCount; i++) {
  847. /* The memory type must be supported by the requirements (bitfield) */
  848. if (!(req->memoryTypeBits & (1 << i)))
  849. continue;
  850. /* The memory type flags must include our properties */
  851. if ((p->mprops.memoryTypes[i].propertyFlags & req_flags) != req_flags)
  852. continue;
  853. /* Found a suitable memory type */
  854. index = i;
  855. break;
  856. }
  857. if (index < 0) {
  858. av_log(ctx, AV_LOG_ERROR, "No memory type found for flags 0x%x\n",
  859. req_flags);
  860. return AVERROR(EINVAL);
  861. }
  862. alloc_info.memoryTypeIndex = index;
  863. ret = vkAllocateMemory(dev_hwctx->act_dev, &alloc_info,
  864. dev_hwctx->alloc, mem);
  865. if (ret != VK_SUCCESS) {
  866. av_log(ctx, AV_LOG_ERROR, "Failed to allocate memory: %s\n",
  867. vk_ret2str(ret));
  868. return AVERROR(ENOMEM);
  869. }
  870. *mem_flags |= p->mprops.memoryTypes[index].propertyFlags;
  871. return 0;
  872. }
  873. static void vulkan_free_internal(AVVkFrameInternal *internal)
  874. {
  875. if (!internal)
  876. return;
  877. #if CONFIG_CUDA
  878. if (internal->cuda_fc_ref) {
  879. AVHWFramesContext *cuda_fc = (AVHWFramesContext *)internal->cuda_fc_ref->data;
  880. int planes = av_pix_fmt_count_planes(cuda_fc->sw_format);
  881. AVHWDeviceContext *cuda_cu = cuda_fc->device_ctx;
  882. AVCUDADeviceContext *cuda_dev = cuda_cu->hwctx;
  883. AVCUDADeviceContextInternal *cu_internal = cuda_dev->internal;
  884. CudaFunctions *cu = cu_internal->cuda_dl;
  885. for (int i = 0; i < planes; i++) {
  886. if (internal->cu_sem[i])
  887. CHECK_CU(cu->cuDestroyExternalSemaphore(internal->cu_sem[i]));
  888. if (internal->cu_mma[i])
  889. CHECK_CU(cu->cuMipmappedArrayDestroy(internal->cu_mma[i]));
  890. if (internal->ext_mem[i])
  891. CHECK_CU(cu->cuDestroyExternalMemory(internal->ext_mem[i]));
  892. }
  893. av_buffer_unref(&internal->cuda_fc_ref);
  894. }
  895. #endif
  896. av_free(internal);
  897. }
  898. static void vulkan_frame_free(void *opaque, uint8_t *data)
  899. {
  900. AVVkFrame *f = (AVVkFrame *)data;
  901. AVHWFramesContext *hwfc = opaque;
  902. AVVulkanDeviceContext *hwctx = hwfc->device_ctx->hwctx;
  903. int planes = av_pix_fmt_count_planes(hwfc->sw_format);
  904. vulkan_free_internal(f->internal);
  905. for (int i = 0; i < planes; i++) {
  906. vkDestroyImage(hwctx->act_dev, f->img[i], hwctx->alloc);
  907. vkFreeMemory(hwctx->act_dev, f->mem[i], hwctx->alloc);
  908. vkDestroySemaphore(hwctx->act_dev, f->sem[i], hwctx->alloc);
  909. }
  910. av_free(f);
  911. }
  912. static int alloc_bind_mem(AVHWFramesContext *hwfc, AVVkFrame *f,
  913. void *alloc_pnext, size_t alloc_pnext_stride)
  914. {
  915. int err;
  916. VkResult ret;
  917. AVHWDeviceContext *ctx = hwfc->device_ctx;
  918. const int planes = av_pix_fmt_count_planes(hwfc->sw_format);
  919. VkBindImageMemoryInfo bind_info[AV_NUM_DATA_POINTERS] = { { 0 } };
  920. AVVulkanDeviceContext *hwctx = ctx->hwctx;
  921. for (int i = 0; i < planes; i++) {
  922. int use_ded_mem;
  923. VkImageMemoryRequirementsInfo2 req_desc = {
  924. .sType = VK_STRUCTURE_TYPE_IMAGE_MEMORY_REQUIREMENTS_INFO_2,
  925. .image = f->img[i],
  926. };
  927. VkMemoryDedicatedAllocateInfo ded_alloc = {
  928. .sType = VK_STRUCTURE_TYPE_MEMORY_DEDICATED_ALLOCATE_INFO,
  929. .pNext = (void *)(((uint8_t *)alloc_pnext) + i*alloc_pnext_stride),
  930. };
  931. VkMemoryDedicatedRequirements ded_req = {
  932. .sType = VK_STRUCTURE_TYPE_MEMORY_DEDICATED_REQUIREMENTS,
  933. };
  934. VkMemoryRequirements2 req = {
  935. .sType = VK_STRUCTURE_TYPE_MEMORY_REQUIREMENTS_2,
  936. .pNext = &ded_req,
  937. };
  938. vkGetImageMemoryRequirements2(hwctx->act_dev, &req_desc, &req);
  939. /* In case the implementation prefers/requires dedicated allocation */
  940. use_ded_mem = ded_req.prefersDedicatedAllocation |
  941. ded_req.requiresDedicatedAllocation;
  942. if (use_ded_mem)
  943. ded_alloc.image = f->img[i];
  944. /* Allocate memory */
  945. if ((err = alloc_mem(ctx, &req.memoryRequirements,
  946. f->tiling == VK_IMAGE_TILING_LINEAR ?
  947. VK_MEMORY_PROPERTY_HOST_VISIBLE_BIT :
  948. VK_MEMORY_PROPERTY_DEVICE_LOCAL_BIT,
  949. use_ded_mem ? &ded_alloc : (void *)ded_alloc.pNext,
  950. &f->flags, &f->mem[i])))
  951. return err;
  952. f->size[i] = req.memoryRequirements.size;
  953. bind_info[i].sType = VK_STRUCTURE_TYPE_BIND_IMAGE_MEMORY_INFO;
  954. bind_info[i].image = f->img[i];
  955. bind_info[i].memory = f->mem[i];
  956. }
  957. /* Bind the allocated memory to the images */
  958. ret = vkBindImageMemory2(hwctx->act_dev, planes, bind_info);
  959. if (ret != VK_SUCCESS) {
  960. av_log(ctx, AV_LOG_ERROR, "Failed to bind memory: %s\n",
  961. vk_ret2str(ret));
  962. return AVERROR_EXTERNAL;
  963. }
  964. return 0;
  965. }
  966. enum PrepMode {
  967. PREP_MODE_WRITE,
  968. PREP_MODE_RO_SHADER,
  969. };
  970. static int prepare_frame(AVHWFramesContext *hwfc, VulkanExecCtx *ectx,
  971. AVVkFrame *frame, enum PrepMode pmode)
  972. {
  973. VkResult ret;
  974. VkImageLayout new_layout;
  975. VkAccessFlags new_access;
  976. AVHWDeviceContext *ctx = hwfc->device_ctx;
  977. AVVulkanDeviceContext *hwctx = ctx->hwctx;
  978. const int planes = av_pix_fmt_count_planes(hwfc->sw_format);
  979. VkImageMemoryBarrier img_bar[AV_NUM_DATA_POINTERS] = { 0 };
  980. VkCommandBufferBeginInfo cmd_start = {
  981. .sType = VK_STRUCTURE_TYPE_COMMAND_BUFFER_BEGIN_INFO,
  982. .flags = VK_COMMAND_BUFFER_USAGE_ONE_TIME_SUBMIT_BIT,
  983. };
  984. VkSubmitInfo s_info = {
  985. .sType = VK_STRUCTURE_TYPE_SUBMIT_INFO,
  986. .commandBufferCount = 1,
  987. .pCommandBuffers = &ectx->buf,
  988. .pSignalSemaphores = frame->sem,
  989. .signalSemaphoreCount = planes,
  990. };
  991. switch (pmode) {
  992. case PREP_MODE_WRITE:
  993. new_layout = VK_IMAGE_LAYOUT_TRANSFER_DST_OPTIMAL;
  994. new_access = VK_ACCESS_TRANSFER_WRITE_BIT;
  995. break;
  996. case PREP_MODE_RO_SHADER:
  997. new_layout = VK_IMAGE_LAYOUT_TRANSFER_SRC_OPTIMAL;
  998. new_access = VK_ACCESS_TRANSFER_READ_BIT;
  999. break;
  1000. }
  1001. ret = vkBeginCommandBuffer(ectx->buf, &cmd_start);
  1002. if (ret != VK_SUCCESS)
  1003. return AVERROR_EXTERNAL;
  1004. /* Change the image layout to something more optimal for writes.
  1005. * This also signals the newly created semaphore, making it usable
  1006. * for synchronization */
  1007. for (int i = 0; i < planes; i++) {
  1008. img_bar[i].sType = VK_STRUCTURE_TYPE_IMAGE_MEMORY_BARRIER;
  1009. img_bar[i].srcAccessMask = 0x0;
  1010. img_bar[i].dstAccessMask = new_access;
  1011. img_bar[i].oldLayout = frame->layout[i];
  1012. img_bar[i].newLayout = new_layout;
  1013. img_bar[i].srcQueueFamilyIndex = VK_QUEUE_FAMILY_IGNORED;
  1014. img_bar[i].dstQueueFamilyIndex = VK_QUEUE_FAMILY_IGNORED;
  1015. img_bar[i].image = frame->img[i];
  1016. img_bar[i].subresourceRange.levelCount = 1;
  1017. img_bar[i].subresourceRange.layerCount = 1;
  1018. img_bar[i].subresourceRange.aspectMask = VK_IMAGE_ASPECT_COLOR_BIT;
  1019. frame->layout[i] = img_bar[i].newLayout;
  1020. frame->access[i] = img_bar[i].dstAccessMask;
  1021. }
  1022. vkCmdPipelineBarrier(ectx->buf, VK_PIPELINE_STAGE_TOP_OF_PIPE_BIT,
  1023. VK_PIPELINE_STAGE_TRANSFER_BIT, 0,
  1024. 0, NULL, 0, NULL, planes, img_bar);
  1025. ret = vkEndCommandBuffer(ectx->buf);
  1026. if (ret != VK_SUCCESS)
  1027. return AVERROR_EXTERNAL;
  1028. ret = vkQueueSubmit(ectx->queue, 1, &s_info, ectx->fence);
  1029. if (ret != VK_SUCCESS) {
  1030. return AVERROR_EXTERNAL;
  1031. } else {
  1032. vkWaitForFences(hwctx->act_dev, 1, &ectx->fence, VK_TRUE, UINT64_MAX);
  1033. vkResetFences(hwctx->act_dev, 1, &ectx->fence);
  1034. }
  1035. return 0;
  1036. }
  1037. static int create_frame(AVHWFramesContext *hwfc, AVVkFrame **frame,
  1038. VkImageTiling tiling, VkImageUsageFlagBits usage,
  1039. void *create_pnext)
  1040. {
  1041. int err;
  1042. VkResult ret;
  1043. AVHWDeviceContext *ctx = hwfc->device_ctx;
  1044. VulkanDevicePriv *p = ctx->internal->priv;
  1045. AVVulkanDeviceContext *hwctx = ctx->hwctx;
  1046. enum AVPixelFormat format = hwfc->sw_format;
  1047. const VkFormat *img_fmts = av_vkfmt_from_pixfmt(format);
  1048. const int planes = av_pix_fmt_count_planes(format);
  1049. VkExportSemaphoreCreateInfo ext_sem_info = {
  1050. .sType = VK_STRUCTURE_TYPE_EXPORT_SEMAPHORE_CREATE_INFO,
  1051. .handleTypes = VK_EXTERNAL_SEMAPHORE_HANDLE_TYPE_OPAQUE_FD_BIT,
  1052. };
  1053. VkSemaphoreCreateInfo sem_spawn = {
  1054. .sType = VK_STRUCTURE_TYPE_SEMAPHORE_CREATE_INFO,
  1055. .pNext = p->extensions & EXT_EXTERNAL_FD_SEM ? &ext_sem_info : NULL,
  1056. };
  1057. AVVkFrame *f = av_vk_frame_alloc();
  1058. if (!f) {
  1059. av_log(ctx, AV_LOG_ERROR, "Unable to allocate memory for AVVkFrame!\n");
  1060. return AVERROR(ENOMEM);
  1061. }
  1062. /* Create the images */
  1063. for (int i = 0; i < planes; i++) {
  1064. const AVPixFmtDescriptor *desc = av_pix_fmt_desc_get(format);
  1065. int w = hwfc->width;
  1066. int h = hwfc->height;
  1067. const int p_w = i > 0 ? AV_CEIL_RSHIFT(w, desc->log2_chroma_w) : w;
  1068. const int p_h = i > 0 ? AV_CEIL_RSHIFT(h, desc->log2_chroma_h) : h;
  1069. VkImageCreateInfo image_create_info = {
  1070. .sType = VK_STRUCTURE_TYPE_IMAGE_CREATE_INFO,
  1071. .pNext = create_pnext,
  1072. .imageType = VK_IMAGE_TYPE_2D,
  1073. .format = img_fmts[i],
  1074. .extent.width = p_w,
  1075. .extent.height = p_h,
  1076. .extent.depth = 1,
  1077. .mipLevels = 1,
  1078. .arrayLayers = 1,
  1079. .flags = VK_IMAGE_CREATE_ALIAS_BIT,
  1080. .tiling = tiling,
  1081. .initialLayout = VK_IMAGE_LAYOUT_UNDEFINED,
  1082. .usage = usage,
  1083. .sharingMode = VK_SHARING_MODE_EXCLUSIVE,
  1084. .samples = VK_SAMPLE_COUNT_1_BIT,
  1085. };
  1086. ret = vkCreateImage(hwctx->act_dev, &image_create_info,
  1087. hwctx->alloc, &f->img[i]);
  1088. if (ret != VK_SUCCESS) {
  1089. av_log(ctx, AV_LOG_ERROR, "Image creation failure: %s\n",
  1090. vk_ret2str(ret));
  1091. err = AVERROR(EINVAL);
  1092. goto fail;
  1093. }
  1094. /* Create semaphore */
  1095. ret = vkCreateSemaphore(hwctx->act_dev, &sem_spawn,
  1096. hwctx->alloc, &f->sem[i]);
  1097. if (ret != VK_SUCCESS) {
  1098. av_log(hwctx, AV_LOG_ERROR, "Failed to create semaphore: %s\n",
  1099. vk_ret2str(ret));
  1100. return AVERROR_EXTERNAL;
  1101. }
  1102. f->layout[i] = image_create_info.initialLayout;
  1103. f->access[i] = 0x0;
  1104. }
  1105. f->flags = 0x0;
  1106. f->tiling = tiling;
  1107. *frame = f;
  1108. return 0;
  1109. fail:
  1110. vulkan_frame_free(hwfc, (uint8_t *)f);
  1111. return err;
  1112. }
  1113. /* Checks if an export flag is enabled, and if it is ORs it with *iexp */
  1114. static void try_export_flags(AVHWFramesContext *hwfc,
  1115. VkExternalMemoryHandleTypeFlags *comp_handle_types,
  1116. VkExternalMemoryHandleTypeFlagBits *iexp,
  1117. VkExternalMemoryHandleTypeFlagBits exp)
  1118. {
  1119. VkResult ret;
  1120. AVVulkanFramesContext *hwctx = hwfc->hwctx;
  1121. AVVulkanDeviceContext *dev_hwctx = hwfc->device_ctx->hwctx;
  1122. VkExternalImageFormatProperties eprops = {
  1123. .sType = VK_STRUCTURE_TYPE_EXTERNAL_IMAGE_FORMAT_PROPERTIES_KHR,
  1124. };
  1125. VkImageFormatProperties2 props = {
  1126. .sType = VK_STRUCTURE_TYPE_IMAGE_FORMAT_PROPERTIES_2,
  1127. .pNext = &eprops,
  1128. };
  1129. VkPhysicalDeviceExternalImageFormatInfo enext = {
  1130. .sType = VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_EXTERNAL_IMAGE_FORMAT_INFO,
  1131. .handleType = exp,
  1132. };
  1133. VkPhysicalDeviceImageFormatInfo2 pinfo = {
  1134. .sType = VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_IMAGE_FORMAT_INFO_2,
  1135. .pNext = !exp ? NULL : &enext,
  1136. .format = av_vkfmt_from_pixfmt(hwfc->sw_format)[0],
  1137. .type = VK_IMAGE_TYPE_2D,
  1138. .tiling = hwctx->tiling,
  1139. .usage = hwctx->usage,
  1140. .flags = VK_IMAGE_CREATE_ALIAS_BIT,
  1141. };
  1142. ret = vkGetPhysicalDeviceImageFormatProperties2(dev_hwctx->phys_dev,
  1143. &pinfo, &props);
  1144. if (ret == VK_SUCCESS) {
  1145. *iexp |= exp;
  1146. *comp_handle_types |= eprops.externalMemoryProperties.compatibleHandleTypes;
  1147. }
  1148. }
  1149. static AVBufferRef *vulkan_pool_alloc(void *opaque, int size)
  1150. {
  1151. int err;
  1152. AVVkFrame *f;
  1153. AVBufferRef *avbuf = NULL;
  1154. AVHWFramesContext *hwfc = opaque;
  1155. AVVulkanFramesContext *hwctx = hwfc->hwctx;
  1156. VulkanDevicePriv *p = hwfc->device_ctx->internal->priv;
  1157. VkExportMemoryAllocateInfo eminfo[AV_NUM_DATA_POINTERS];
  1158. VkExternalMemoryHandleTypeFlags e = 0x0;
  1159. VkExternalMemoryImageCreateInfo eiinfo = {
  1160. .sType = VK_STRUCTURE_TYPE_EXTERNAL_MEMORY_IMAGE_CREATE_INFO,
  1161. .pNext = hwctx->create_pnext,
  1162. };
  1163. if (p->extensions & EXT_EXTERNAL_FD_MEMORY)
  1164. try_export_flags(hwfc, &eiinfo.handleTypes, &e,
  1165. VK_EXTERNAL_MEMORY_HANDLE_TYPE_OPAQUE_FD_BIT);
  1166. if (p->extensions & EXT_EXTERNAL_DMABUF_MEMORY)
  1167. try_export_flags(hwfc, &eiinfo.handleTypes, &e,
  1168. VK_EXTERNAL_MEMORY_HANDLE_TYPE_DMA_BUF_BIT_EXT);
  1169. for (int i = 0; i < av_pix_fmt_count_planes(hwfc->sw_format); i++) {
  1170. eminfo[i].sType = VK_STRUCTURE_TYPE_EXPORT_MEMORY_ALLOCATE_INFO;
  1171. eminfo[i].pNext = hwctx->alloc_pnext[i];
  1172. eminfo[i].handleTypes = e;
  1173. }
  1174. err = create_frame(hwfc, &f, hwctx->tiling, hwctx->usage,
  1175. eiinfo.handleTypes ? &eiinfo : NULL);
  1176. if (err)
  1177. return NULL;
  1178. err = alloc_bind_mem(hwfc, f, eminfo, sizeof(*eminfo));
  1179. if (err)
  1180. goto fail;
  1181. err = prepare_frame(hwfc, &p->cmd, f, PREP_MODE_WRITE);
  1182. if (err)
  1183. goto fail;
  1184. avbuf = av_buffer_create((uint8_t *)f, sizeof(AVVkFrame),
  1185. vulkan_frame_free, hwfc, 0);
  1186. if (!avbuf)
  1187. goto fail;
  1188. return avbuf;
  1189. fail:
  1190. vulkan_frame_free(hwfc, (uint8_t *)f);
  1191. return NULL;
  1192. }
  1193. static void vulkan_frames_uninit(AVHWFramesContext *hwfc)
  1194. {
  1195. VulkanFramesPriv *fp = hwfc->internal->priv;
  1196. free_exec_ctx(hwfc->device_ctx, &fp->cmd);
  1197. }
  1198. static int vulkan_frames_init(AVHWFramesContext *hwfc)
  1199. {
  1200. int err;
  1201. AVVkFrame *f;
  1202. AVVulkanFramesContext *hwctx = hwfc->hwctx;
  1203. VulkanFramesPriv *fp = hwfc->internal->priv;
  1204. AVVulkanDeviceContext *dev_hwctx = hwfc->device_ctx->hwctx;
  1205. VulkanDevicePriv *p = hwfc->device_ctx->internal->priv;
  1206. if (hwfc->pool)
  1207. return 0;
  1208. /* Default pool flags */
  1209. hwctx->tiling = hwctx->tiling ? hwctx->tiling : p->use_linear_images ?
  1210. VK_IMAGE_TILING_LINEAR : VK_IMAGE_TILING_OPTIMAL;
  1211. hwctx->usage |= DEFAULT_USAGE_FLAGS;
  1212. err = create_exec_ctx(hwfc->device_ctx, &fp->cmd,
  1213. dev_hwctx->queue_family_tx_index);
  1214. if (err)
  1215. return err;
  1216. /* Test to see if allocation will fail */
  1217. err = create_frame(hwfc, &f, hwctx->tiling, hwctx->usage,
  1218. hwctx->create_pnext);
  1219. if (err) {
  1220. free_exec_ctx(hwfc->device_ctx, &p->cmd);
  1221. return err;
  1222. }
  1223. vulkan_frame_free(hwfc, (uint8_t *)f);
  1224. hwfc->internal->pool_internal = av_buffer_pool_init2(sizeof(AVVkFrame),
  1225. hwfc, vulkan_pool_alloc,
  1226. NULL);
  1227. if (!hwfc->internal->pool_internal) {
  1228. free_exec_ctx(hwfc->device_ctx, &p->cmd);
  1229. return AVERROR(ENOMEM);
  1230. }
  1231. return 0;
  1232. }
  1233. static int vulkan_get_buffer(AVHWFramesContext *hwfc, AVFrame *frame)
  1234. {
  1235. frame->buf[0] = av_buffer_pool_get(hwfc->pool);
  1236. if (!frame->buf[0])
  1237. return AVERROR(ENOMEM);
  1238. frame->data[0] = frame->buf[0]->data;
  1239. frame->format = AV_PIX_FMT_VULKAN;
  1240. frame->width = hwfc->width;
  1241. frame->height = hwfc->height;
  1242. return 0;
  1243. }
  1244. static int vulkan_transfer_get_formats(AVHWFramesContext *hwfc,
  1245. enum AVHWFrameTransferDirection dir,
  1246. enum AVPixelFormat **formats)
  1247. {
  1248. enum AVPixelFormat *fmts = av_malloc_array(2, sizeof(*fmts));
  1249. if (!fmts)
  1250. return AVERROR(ENOMEM);
  1251. fmts[0] = hwfc->sw_format;
  1252. fmts[1] = AV_PIX_FMT_NONE;
  1253. *formats = fmts;
  1254. return 0;
  1255. }
  1256. typedef struct VulkanMapping {
  1257. AVVkFrame *frame;
  1258. int flags;
  1259. } VulkanMapping;
  1260. static void vulkan_unmap_frame(AVHWFramesContext *hwfc, HWMapDescriptor *hwmap)
  1261. {
  1262. VulkanMapping *map = hwmap->priv;
  1263. AVVulkanDeviceContext *hwctx = hwfc->device_ctx->hwctx;
  1264. const int planes = av_pix_fmt_count_planes(hwfc->sw_format);
  1265. /* Check if buffer needs flushing */
  1266. if ((map->flags & AV_HWFRAME_MAP_WRITE) &&
  1267. !(map->frame->flags & VK_MEMORY_PROPERTY_HOST_COHERENT_BIT)) {
  1268. VkResult ret;
  1269. VkMappedMemoryRange flush_ranges[AV_NUM_DATA_POINTERS] = { { 0 } };
  1270. for (int i = 0; i < planes; i++) {
  1271. flush_ranges[i].sType = VK_STRUCTURE_TYPE_MAPPED_MEMORY_RANGE;
  1272. flush_ranges[i].memory = map->frame->mem[i];
  1273. flush_ranges[i].size = VK_WHOLE_SIZE;
  1274. }
  1275. ret = vkFlushMappedMemoryRanges(hwctx->act_dev, planes,
  1276. flush_ranges);
  1277. if (ret != VK_SUCCESS) {
  1278. av_log(hwfc, AV_LOG_ERROR, "Failed to flush memory: %s\n",
  1279. vk_ret2str(ret));
  1280. }
  1281. }
  1282. for (int i = 0; i < planes; i++)
  1283. vkUnmapMemory(hwctx->act_dev, map->frame->mem[i]);
  1284. av_free(map);
  1285. }
  1286. static int vulkan_map_frame_to_mem(AVHWFramesContext *hwfc, AVFrame *dst,
  1287. const AVFrame *src, int flags)
  1288. {
  1289. VkResult ret;
  1290. int err, mapped_mem_count = 0;
  1291. AVVkFrame *f = (AVVkFrame *)src->data[0];
  1292. AVVulkanDeviceContext *hwctx = hwfc->device_ctx->hwctx;
  1293. const int planes = av_pix_fmt_count_planes(hwfc->sw_format);
  1294. VulkanMapping *map = av_mallocz(sizeof(VulkanMapping));
  1295. if (!map)
  1296. return AVERROR(EINVAL);
  1297. if (src->format != AV_PIX_FMT_VULKAN) {
  1298. av_log(hwfc, AV_LOG_ERROR, "Cannot map from pixel format %s!\n",
  1299. av_get_pix_fmt_name(src->format));
  1300. err = AVERROR(EINVAL);
  1301. goto fail;
  1302. }
  1303. if (!(f->flags & VK_MEMORY_PROPERTY_HOST_VISIBLE_BIT) ||
  1304. !(f->tiling == VK_IMAGE_TILING_LINEAR)) {
  1305. av_log(hwfc, AV_LOG_ERROR, "Unable to map frame, not host visible "
  1306. "and linear!\n");
  1307. err = AVERROR(EINVAL);
  1308. goto fail;
  1309. }
  1310. dst->width = src->width;
  1311. dst->height = src->height;
  1312. for (int i = 0; i < planes; i++) {
  1313. ret = vkMapMemory(hwctx->act_dev, f->mem[i], 0,
  1314. VK_WHOLE_SIZE, 0, (void **)&dst->data[i]);
  1315. if (ret != VK_SUCCESS) {
  1316. av_log(hwfc, AV_LOG_ERROR, "Failed to map image memory: %s\n",
  1317. vk_ret2str(ret));
  1318. err = AVERROR_EXTERNAL;
  1319. goto fail;
  1320. }
  1321. mapped_mem_count++;
  1322. }
  1323. /* Check if the memory contents matter */
  1324. if (((flags & AV_HWFRAME_MAP_READ) || !(flags & AV_HWFRAME_MAP_OVERWRITE)) &&
  1325. !(f->flags & VK_MEMORY_PROPERTY_HOST_COHERENT_BIT)) {
  1326. VkMappedMemoryRange map_mem_ranges[AV_NUM_DATA_POINTERS] = { { 0 } };
  1327. for (int i = 0; i < planes; i++) {
  1328. map_mem_ranges[i].sType = VK_STRUCTURE_TYPE_MAPPED_MEMORY_RANGE;
  1329. map_mem_ranges[i].size = VK_WHOLE_SIZE;
  1330. map_mem_ranges[i].memory = f->mem[i];
  1331. }
  1332. ret = vkInvalidateMappedMemoryRanges(hwctx->act_dev, planes,
  1333. map_mem_ranges);
  1334. if (ret != VK_SUCCESS) {
  1335. av_log(hwfc, AV_LOG_ERROR, "Failed to invalidate memory: %s\n",
  1336. vk_ret2str(ret));
  1337. err = AVERROR_EXTERNAL;
  1338. goto fail;
  1339. }
  1340. }
  1341. for (int i = 0; i < planes; i++) {
  1342. VkImageSubresource sub = {
  1343. .aspectMask = VK_IMAGE_ASPECT_COLOR_BIT,
  1344. };
  1345. VkSubresourceLayout layout;
  1346. vkGetImageSubresourceLayout(hwctx->act_dev, f->img[i], &sub, &layout);
  1347. dst->linesize[i] = layout.rowPitch;
  1348. }
  1349. map->frame = f;
  1350. map->flags = flags;
  1351. err = ff_hwframe_map_create(src->hw_frames_ctx, dst, src,
  1352. &vulkan_unmap_frame, map);
  1353. if (err < 0)
  1354. goto fail;
  1355. return 0;
  1356. fail:
  1357. for (int i = 0; i < mapped_mem_count; i++)
  1358. vkUnmapMemory(hwctx->act_dev, f->mem[i]);
  1359. av_free(map);
  1360. return err;
  1361. }
  1362. #if CONFIG_LIBDRM
  1363. static void vulkan_unmap_from(AVHWFramesContext *hwfc, HWMapDescriptor *hwmap)
  1364. {
  1365. VulkanMapping *map = hwmap->priv;
  1366. AVVulkanDeviceContext *hwctx = hwfc->device_ctx->hwctx;
  1367. const int planes = av_pix_fmt_count_planes(hwfc->sw_format);
  1368. for (int i = 0; i < planes; i++) {
  1369. vkDestroyImage(hwctx->act_dev, map->frame->img[i], hwctx->alloc);
  1370. vkFreeMemory(hwctx->act_dev, map->frame->mem[i], hwctx->alloc);
  1371. vkDestroySemaphore(hwctx->act_dev, map->frame->sem[i], hwctx->alloc);
  1372. }
  1373. av_freep(&map->frame);
  1374. }
  1375. static const struct {
  1376. uint32_t drm_fourcc;
  1377. VkFormat vk_format;
  1378. } vulkan_drm_format_map[] = {
  1379. { DRM_FORMAT_R8, VK_FORMAT_R8_UNORM },
  1380. { DRM_FORMAT_R16, VK_FORMAT_R16_UNORM },
  1381. { DRM_FORMAT_GR88, VK_FORMAT_R8G8_UNORM },
  1382. { DRM_FORMAT_RG88, VK_FORMAT_R8G8_UNORM },
  1383. { DRM_FORMAT_GR1616, VK_FORMAT_R16G16_UNORM },
  1384. { DRM_FORMAT_RG1616, VK_FORMAT_R16G16_UNORM },
  1385. { DRM_FORMAT_ARGB8888, VK_FORMAT_B8G8R8A8_UNORM },
  1386. { DRM_FORMAT_XRGB8888, VK_FORMAT_B8G8R8A8_UNORM },
  1387. { DRM_FORMAT_ABGR8888, VK_FORMAT_R8G8B8A8_UNORM },
  1388. { DRM_FORMAT_XBGR8888, VK_FORMAT_R8G8B8A8_UNORM },
  1389. };
  1390. static inline VkFormat drm_to_vulkan_fmt(uint32_t drm_fourcc)
  1391. {
  1392. for (int i = 0; i < FF_ARRAY_ELEMS(vulkan_drm_format_map); i++)
  1393. if (vulkan_drm_format_map[i].drm_fourcc == drm_fourcc)
  1394. return vulkan_drm_format_map[i].vk_format;
  1395. return VK_FORMAT_UNDEFINED;
  1396. }
  1397. static int vulkan_map_from_drm_frame_desc(AVHWFramesContext *hwfc, AVVkFrame **frame,
  1398. AVDRMFrameDescriptor *desc)
  1399. {
  1400. int err = 0;
  1401. VkResult ret;
  1402. AVVkFrame *f;
  1403. int bind_counts = 0;
  1404. AVHWDeviceContext *ctx = hwfc->device_ctx;
  1405. AVVulkanDeviceContext *hwctx = ctx->hwctx;
  1406. VulkanDevicePriv *p = ctx->internal->priv;
  1407. const AVPixFmtDescriptor *fmt_desc = av_pix_fmt_desc_get(hwfc->sw_format);
  1408. const int has_modifiers = p->extensions & EXT_DRM_MODIFIER_FLAGS;
  1409. VkSubresourceLayout plane_data[AV_NUM_DATA_POINTERS] = { 0 };
  1410. VkBindImageMemoryInfo bind_info[AV_NUM_DATA_POINTERS] = { 0 };
  1411. VkBindImagePlaneMemoryInfo plane_info[AV_NUM_DATA_POINTERS] = { 0 };
  1412. VkExternalMemoryHandleTypeFlagBits htype = VK_EXTERNAL_MEMORY_HANDLE_TYPE_DMA_BUF_BIT_EXT;
  1413. VK_LOAD_PFN(hwctx->inst, vkGetMemoryFdPropertiesKHR);
  1414. for (int i = 0; i < desc->nb_layers; i++) {
  1415. if (drm_to_vulkan_fmt(desc->layers[i].format) == VK_FORMAT_UNDEFINED) {
  1416. av_log(ctx, AV_LOG_ERROR, "Unsupported DMABUF layer format %#08x!\n",
  1417. desc->layers[i].format);
  1418. return AVERROR(EINVAL);
  1419. }
  1420. }
  1421. if (!(f = av_vk_frame_alloc())) {
  1422. av_log(ctx, AV_LOG_ERROR, "Unable to allocate memory for AVVkFrame!\n");
  1423. err = AVERROR(ENOMEM);
  1424. goto fail;
  1425. }
  1426. for (int i = 0; i < desc->nb_objects; i++) {
  1427. VkMemoryFdPropertiesKHR fdmp = {
  1428. .sType = VK_STRUCTURE_TYPE_MEMORY_FD_PROPERTIES_KHR,
  1429. };
  1430. VkMemoryRequirements req = {
  1431. .size = desc->objects[i].size,
  1432. };
  1433. VkImportMemoryFdInfoKHR idesc = {
  1434. .sType = VK_STRUCTURE_TYPE_IMPORT_MEMORY_FD_INFO_KHR,
  1435. .handleType = htype,
  1436. .fd = dup(desc->objects[i].fd),
  1437. };
  1438. ret = pfn_vkGetMemoryFdPropertiesKHR(hwctx->act_dev, htype,
  1439. idesc.fd, &fdmp);
  1440. if (ret != VK_SUCCESS) {
  1441. av_log(hwfc, AV_LOG_ERROR, "Failed to get FD properties: %s\n",
  1442. vk_ret2str(ret));
  1443. err = AVERROR_EXTERNAL;
  1444. close(idesc.fd);
  1445. goto fail;
  1446. }
  1447. req.memoryTypeBits = fdmp.memoryTypeBits;
  1448. err = alloc_mem(ctx, &req, VK_MEMORY_PROPERTY_DEVICE_LOCAL_BIT,
  1449. &idesc, &f->flags, &f->mem[i]);
  1450. if (err) {
  1451. close(idesc.fd);
  1452. return err;
  1453. }
  1454. f->size[i] = desc->objects[i].size;
  1455. }
  1456. f->tiling = has_modifiers ? VK_IMAGE_TILING_DRM_FORMAT_MODIFIER_EXT :
  1457. desc->objects[0].format_modifier == DRM_FORMAT_MOD_LINEAR ?
  1458. VK_IMAGE_TILING_LINEAR : VK_IMAGE_TILING_OPTIMAL;
  1459. for (int i = 0; i < desc->nb_layers; i++) {
  1460. const int planes = desc->layers[i].nb_planes;
  1461. const int signal_p = has_modifiers && (planes > 1);
  1462. VkImageDrmFormatModifierExplicitCreateInfoEXT drm_info = {
  1463. .sType = VK_STRUCTURE_TYPE_IMAGE_DRM_FORMAT_MODIFIER_EXPLICIT_CREATE_INFO_EXT,
  1464. .drmFormatModifier = desc->objects[0].format_modifier,
  1465. .drmFormatModifierPlaneCount = planes,
  1466. .pPlaneLayouts = (const VkSubresourceLayout *)&plane_data,
  1467. };
  1468. VkExternalMemoryImageCreateInfo einfo = {
  1469. .sType = VK_STRUCTURE_TYPE_EXTERNAL_MEMORY_IMAGE_CREATE_INFO,
  1470. .pNext = has_modifiers ? &drm_info : NULL,
  1471. .handleTypes = htype,
  1472. };
  1473. VkSemaphoreCreateInfo sem_spawn = {
  1474. .sType = VK_STRUCTURE_TYPE_SEMAPHORE_CREATE_INFO,
  1475. };
  1476. const int p_w = i > 0 ? AV_CEIL_RSHIFT(hwfc->width, fmt_desc->log2_chroma_w) : hwfc->width;
  1477. const int p_h = i > 0 ? AV_CEIL_RSHIFT(hwfc->height, fmt_desc->log2_chroma_h) : hwfc->height;
  1478. VkImageCreateInfo image_create_info = {
  1479. .sType = VK_STRUCTURE_TYPE_IMAGE_CREATE_INFO,
  1480. .pNext = &einfo,
  1481. .imageType = VK_IMAGE_TYPE_2D,
  1482. .format = drm_to_vulkan_fmt(desc->layers[i].format),
  1483. .extent.width = p_w,
  1484. .extent.height = p_h,
  1485. .extent.depth = 1,
  1486. .mipLevels = 1,
  1487. .arrayLayers = 1,
  1488. .flags = VK_IMAGE_CREATE_ALIAS_BIT |
  1489. (signal_p ? VK_IMAGE_CREATE_DISJOINT_BIT : 0x0),
  1490. .tiling = f->tiling,
  1491. .initialLayout = VK_IMAGE_LAYOUT_UNDEFINED, /* specs say so */
  1492. .usage = DEFAULT_USAGE_FLAGS,
  1493. .sharingMode = VK_SHARING_MODE_EXCLUSIVE,
  1494. .samples = VK_SAMPLE_COUNT_1_BIT,
  1495. };
  1496. for (int j = 0; j < planes; j++) {
  1497. plane_data[j].offset = desc->layers[i].planes[j].offset;
  1498. plane_data[j].rowPitch = desc->layers[i].planes[j].pitch;
  1499. plane_data[j].size = 0; /* The specs say so for all 3 */
  1500. plane_data[j].arrayPitch = 0;
  1501. plane_data[j].depthPitch = 0;
  1502. }
  1503. /* Create image */
  1504. ret = vkCreateImage(hwctx->act_dev, &image_create_info,
  1505. hwctx->alloc, &f->img[i]);
  1506. if (ret != VK_SUCCESS) {
  1507. av_log(ctx, AV_LOG_ERROR, "Image creation failure: %s\n",
  1508. vk_ret2str(ret));
  1509. err = AVERROR(EINVAL);
  1510. goto fail;
  1511. }
  1512. ret = vkCreateSemaphore(hwctx->act_dev, &sem_spawn,
  1513. hwctx->alloc, &f->sem[i]);
  1514. if (ret != VK_SUCCESS) {
  1515. av_log(hwctx, AV_LOG_ERROR, "Failed to create semaphore: %s\n",
  1516. vk_ret2str(ret));
  1517. return AVERROR_EXTERNAL;
  1518. }
  1519. /* We'd import a semaphore onto the one we created using
  1520. * vkImportSemaphoreFdKHR but unfortunately neither DRM nor VAAPI
  1521. * offer us anything we could import and sync with, so instead
  1522. * just signal the semaphore we created. */
  1523. f->layout[i] = image_create_info.initialLayout;
  1524. f->access[i] = 0x0;
  1525. for (int j = 0; j < planes; j++) {
  1526. VkImageAspectFlagBits aspect = j == 0 ? VK_IMAGE_ASPECT_MEMORY_PLANE_0_BIT_EXT :
  1527. j == 1 ? VK_IMAGE_ASPECT_MEMORY_PLANE_1_BIT_EXT :
  1528. VK_IMAGE_ASPECT_MEMORY_PLANE_2_BIT_EXT;
  1529. plane_info[bind_counts].sType = VK_STRUCTURE_TYPE_BIND_IMAGE_PLANE_MEMORY_INFO;
  1530. plane_info[bind_counts].planeAspect = aspect;
  1531. bind_info[bind_counts].sType = VK_STRUCTURE_TYPE_BIND_IMAGE_MEMORY_INFO;
  1532. bind_info[bind_counts].pNext = signal_p ? &plane_info[bind_counts] : NULL;
  1533. bind_info[bind_counts].image = f->img[i];
  1534. bind_info[bind_counts].memory = f->mem[desc->layers[i].planes[j].object_index];
  1535. bind_info[bind_counts].memoryOffset = desc->layers[i].planes[j].offset;
  1536. bind_counts++;
  1537. }
  1538. }
  1539. /* Bind the allocated memory to the images */
  1540. ret = vkBindImageMemory2(hwctx->act_dev, bind_counts, bind_info);
  1541. if (ret != VK_SUCCESS) {
  1542. av_log(ctx, AV_LOG_ERROR, "Failed to bind memory: %s\n",
  1543. vk_ret2str(ret));
  1544. return AVERROR_EXTERNAL;
  1545. }
  1546. /* NOTE: This is completely uneccesary and unneeded once we can import
  1547. * semaphores from DRM. Otherwise we have to activate the semaphores.
  1548. * We're reusing the exec context that's also used for uploads/downloads. */
  1549. err = prepare_frame(hwfc, &p->cmd, f, PREP_MODE_RO_SHADER);
  1550. if (err)
  1551. goto fail;
  1552. *frame = f;
  1553. return 0;
  1554. fail:
  1555. for (int i = 0; i < desc->nb_layers; i++) {
  1556. vkDestroyImage(hwctx->act_dev, f->img[i], hwctx->alloc);
  1557. vkDestroySemaphore(hwctx->act_dev, f->sem[i], hwctx->alloc);
  1558. }
  1559. for (int i = 0; i < desc->nb_objects; i++)
  1560. vkFreeMemory(hwctx->act_dev, f->mem[i], hwctx->alloc);
  1561. av_free(f);
  1562. return err;
  1563. }
  1564. static int vulkan_map_from_drm(AVHWFramesContext *hwfc, AVFrame *dst,
  1565. const AVFrame *src, int flags)
  1566. {
  1567. int err = 0;
  1568. AVVkFrame *f;
  1569. VulkanMapping *map = NULL;
  1570. err = vulkan_map_from_drm_frame_desc(hwfc, &f,
  1571. (AVDRMFrameDescriptor *)src->data[0]);
  1572. if (err)
  1573. return err;
  1574. /* The unmapping function will free this */
  1575. dst->data[0] = (uint8_t *)f;
  1576. dst->width = src->width;
  1577. dst->height = src->height;
  1578. map = av_mallocz(sizeof(VulkanMapping));
  1579. if (!map)
  1580. goto fail;
  1581. map->frame = f;
  1582. map->flags = flags;
  1583. err = ff_hwframe_map_create(dst->hw_frames_ctx, dst, src,
  1584. &vulkan_unmap_from, map);
  1585. if (err < 0)
  1586. goto fail;
  1587. av_log(hwfc, AV_LOG_DEBUG, "Mapped DRM object to Vulkan!\n");
  1588. return 0;
  1589. fail:
  1590. vulkan_frame_free(hwfc->device_ctx->hwctx, (uint8_t *)f);
  1591. av_free(map);
  1592. return err;
  1593. }
  1594. #if CONFIG_VAAPI
  1595. static int vulkan_map_from_vaapi(AVHWFramesContext *dst_fc,
  1596. AVFrame *dst, const AVFrame *src,
  1597. int flags)
  1598. {
  1599. int err;
  1600. AVFrame *tmp = av_frame_alloc();
  1601. AVHWFramesContext *vaapi_fc = (AVHWFramesContext*)src->hw_frames_ctx->data;
  1602. AVVAAPIDeviceContext *vaapi_ctx = vaapi_fc->device_ctx->hwctx;
  1603. VASurfaceID surface_id = (VASurfaceID)(uintptr_t)src->data[3];
  1604. if (!tmp)
  1605. return AVERROR(ENOMEM);
  1606. /* We have to sync since like the previous comment said, no semaphores */
  1607. vaSyncSurface(vaapi_ctx->display, surface_id);
  1608. tmp->format = AV_PIX_FMT_DRM_PRIME;
  1609. err = av_hwframe_map(tmp, src, flags);
  1610. if (err < 0)
  1611. goto fail;
  1612. err = vulkan_map_from_drm(dst_fc, dst, tmp, flags);
  1613. if (err < 0)
  1614. goto fail;
  1615. err = ff_hwframe_map_replace(dst, src);
  1616. fail:
  1617. av_frame_free(&tmp);
  1618. return err;
  1619. }
  1620. #endif
  1621. #endif
  1622. #if CONFIG_CUDA
  1623. static int vulkan_export_to_cuda(AVHWFramesContext *hwfc,
  1624. AVBufferRef *cuda_hwfc,
  1625. const AVFrame *frame)
  1626. {
  1627. int err;
  1628. VkResult ret;
  1629. AVVkFrame *dst_f;
  1630. AVVkFrameInternal *dst_int;
  1631. AVHWDeviceContext *ctx = hwfc->device_ctx;
  1632. AVVulkanDeviceContext *hwctx = ctx->hwctx;
  1633. const int planes = av_pix_fmt_count_planes(hwfc->sw_format);
  1634. const AVPixFmtDescriptor *desc = av_pix_fmt_desc_get(hwfc->sw_format);
  1635. VK_LOAD_PFN(hwctx->inst, vkGetMemoryFdKHR);
  1636. VK_LOAD_PFN(hwctx->inst, vkGetSemaphoreFdKHR);
  1637. AVHWFramesContext *cuda_fc = (AVHWFramesContext*)cuda_hwfc->data;
  1638. AVHWDeviceContext *cuda_cu = cuda_fc->device_ctx;
  1639. AVCUDADeviceContext *cuda_dev = cuda_cu->hwctx;
  1640. AVCUDADeviceContextInternal *cu_internal = cuda_dev->internal;
  1641. CudaFunctions *cu = cu_internal->cuda_dl;
  1642. CUarray_format cufmt = desc->comp[0].depth > 8 ? CU_AD_FORMAT_UNSIGNED_INT16 :
  1643. CU_AD_FORMAT_UNSIGNED_INT8;
  1644. dst_f = (AVVkFrame *)frame->data[0];
  1645. dst_int = dst_f->internal;
  1646. if (!dst_int || !dst_int->cuda_fc_ref) {
  1647. if (!dst_f->internal)
  1648. dst_f->internal = dst_int = av_mallocz(sizeof(*dst_f->internal));
  1649. if (!dst_int) {
  1650. err = AVERROR(ENOMEM);
  1651. goto fail;
  1652. }
  1653. dst_int->cuda_fc_ref = av_buffer_ref(cuda_hwfc);
  1654. if (!dst_int->cuda_fc_ref) {
  1655. err = AVERROR(ENOMEM);
  1656. goto fail;
  1657. }
  1658. for (int i = 0; i < planes; i++) {
  1659. CUDA_EXTERNAL_MEMORY_MIPMAPPED_ARRAY_DESC tex_desc = {
  1660. .offset = 0,
  1661. .arrayDesc = {
  1662. .Width = i > 0 ? AV_CEIL_RSHIFT(hwfc->width, desc->log2_chroma_w)
  1663. : hwfc->width,
  1664. .Height = i > 0 ? AV_CEIL_RSHIFT(hwfc->height, desc->log2_chroma_h)
  1665. : hwfc->height,
  1666. .Depth = 0,
  1667. .Format = cufmt,
  1668. .NumChannels = 1 + ((planes == 2) && i),
  1669. .Flags = 0,
  1670. },
  1671. .numLevels = 1,
  1672. };
  1673. CUDA_EXTERNAL_MEMORY_HANDLE_DESC ext_desc = {
  1674. .type = CU_EXTERNAL_MEMORY_HANDLE_TYPE_OPAQUE_FD,
  1675. .size = dst_f->size[i],
  1676. };
  1677. VkMemoryGetFdInfoKHR export_info = {
  1678. .sType = VK_STRUCTURE_TYPE_MEMORY_GET_FD_INFO_KHR,
  1679. .memory = dst_f->mem[i],
  1680. .handleType = VK_EXTERNAL_MEMORY_HANDLE_TYPE_OPAQUE_FD_BIT_KHR,
  1681. };
  1682. VkSemaphoreGetFdInfoKHR sem_export = {
  1683. .sType = VK_STRUCTURE_TYPE_SEMAPHORE_GET_FD_INFO_KHR,
  1684. .semaphore = dst_f->sem[i],
  1685. .handleType = VK_EXTERNAL_SEMAPHORE_HANDLE_TYPE_OPAQUE_FD_BIT,
  1686. };
  1687. CUDA_EXTERNAL_SEMAPHORE_HANDLE_DESC ext_sem_desc = {
  1688. .type = CU_EXTERNAL_SEMAPHORE_HANDLE_TYPE_OPAQUE_FD,
  1689. };
  1690. ret = pfn_vkGetMemoryFdKHR(hwctx->act_dev, &export_info,
  1691. &ext_desc.handle.fd);
  1692. if (ret != VK_SUCCESS) {
  1693. av_log(hwfc, AV_LOG_ERROR, "Unable to export the image as a FD!\n");
  1694. err = AVERROR_EXTERNAL;
  1695. goto fail;
  1696. }
  1697. ret = CHECK_CU(cu->cuImportExternalMemory(&dst_int->ext_mem[i], &ext_desc));
  1698. if (ret < 0) {
  1699. err = AVERROR_EXTERNAL;
  1700. goto fail;
  1701. }
  1702. ret = CHECK_CU(cu->cuExternalMemoryGetMappedMipmappedArray(&dst_int->cu_mma[i],
  1703. dst_int->ext_mem[i],
  1704. &tex_desc));
  1705. if (ret < 0) {
  1706. err = AVERROR_EXTERNAL;
  1707. goto fail;
  1708. }
  1709. ret = CHECK_CU(cu->cuMipmappedArrayGetLevel(&dst_int->cu_array[i],
  1710. dst_int->cu_mma[i], 0));
  1711. if (ret < 0) {
  1712. err = AVERROR_EXTERNAL;
  1713. goto fail;
  1714. }
  1715. ret = pfn_vkGetSemaphoreFdKHR(hwctx->act_dev, &sem_export,
  1716. &ext_sem_desc.handle.fd);
  1717. if (ret != VK_SUCCESS) {
  1718. av_log(ctx, AV_LOG_ERROR, "Failed to export semaphore: %s\n",
  1719. vk_ret2str(ret));
  1720. err = AVERROR_EXTERNAL;
  1721. goto fail;
  1722. }
  1723. ret = CHECK_CU(cu->cuImportExternalSemaphore(&dst_int->cu_sem[i],
  1724. &ext_sem_desc));
  1725. if (ret < 0) {
  1726. err = AVERROR_EXTERNAL;
  1727. goto fail;
  1728. }
  1729. }
  1730. }
  1731. return 0;
  1732. fail:
  1733. return err;
  1734. }
  1735. static int vulkan_transfer_data_from_cuda(AVHWFramesContext *hwfc,
  1736. AVFrame *dst, const AVFrame *src)
  1737. {
  1738. int err;
  1739. VkResult ret;
  1740. CUcontext dummy;
  1741. AVVkFrame *dst_f;
  1742. AVVkFrameInternal *dst_int;
  1743. const int planes = av_pix_fmt_count_planes(hwfc->sw_format);
  1744. const AVPixFmtDescriptor *desc = av_pix_fmt_desc_get(hwfc->sw_format);
  1745. AVHWFramesContext *cuda_fc = (AVHWFramesContext*)src->hw_frames_ctx->data;
  1746. AVHWDeviceContext *cuda_cu = cuda_fc->device_ctx;
  1747. AVCUDADeviceContext *cuda_dev = cuda_cu->hwctx;
  1748. AVCUDADeviceContextInternal *cu_internal = cuda_dev->internal;
  1749. CudaFunctions *cu = cu_internal->cuda_dl;
  1750. CUDA_EXTERNAL_SEMAPHORE_WAIT_PARAMS s_w_par[AV_NUM_DATA_POINTERS] = { 0 };
  1751. CUDA_EXTERNAL_SEMAPHORE_SIGNAL_PARAMS s_s_par[AV_NUM_DATA_POINTERS] = { 0 };
  1752. ret = CHECK_CU(cu->cuCtxPushCurrent(cuda_dev->cuda_ctx));
  1753. if (ret < 0) {
  1754. err = AVERROR_EXTERNAL;
  1755. goto fail;
  1756. }
  1757. dst_f = (AVVkFrame *)dst->data[0];
  1758. ret = vulkan_export_to_cuda(hwfc, src->hw_frames_ctx, dst);
  1759. if (ret < 0) {
  1760. goto fail;
  1761. }
  1762. dst_int = dst_f->internal;
  1763. ret = CHECK_CU(cu->cuWaitExternalSemaphoresAsync(dst_int->cu_sem, s_w_par,
  1764. planes, cuda_dev->stream));
  1765. if (ret < 0) {
  1766. err = AVERROR_EXTERNAL;
  1767. goto fail;
  1768. }
  1769. for (int i = 0; i < planes; i++) {
  1770. CUDA_MEMCPY2D cpy = {
  1771. .srcMemoryType = CU_MEMORYTYPE_DEVICE,
  1772. .srcDevice = (CUdeviceptr)src->data[i],
  1773. .srcPitch = src->linesize[i],
  1774. .srcY = 0,
  1775. .dstMemoryType = CU_MEMORYTYPE_ARRAY,
  1776. .dstArray = dst_int->cu_array[i],
  1777. .WidthInBytes = (i > 0 ? AV_CEIL_RSHIFT(hwfc->width, desc->log2_chroma_w)
  1778. : hwfc->width) * desc->comp[i].step,
  1779. .Height = i > 0 ? AV_CEIL_RSHIFT(hwfc->height, desc->log2_chroma_h)
  1780. : hwfc->height,
  1781. };
  1782. ret = CHECK_CU(cu->cuMemcpy2DAsync(&cpy, cuda_dev->stream));
  1783. if (ret < 0) {
  1784. err = AVERROR_EXTERNAL;
  1785. goto fail;
  1786. }
  1787. }
  1788. ret = CHECK_CU(cu->cuSignalExternalSemaphoresAsync(dst_int->cu_sem, s_s_par,
  1789. planes, cuda_dev->stream));
  1790. if (ret < 0) {
  1791. err = AVERROR_EXTERNAL;
  1792. goto fail;
  1793. }
  1794. CHECK_CU(cu->cuCtxPopCurrent(&dummy));
  1795. av_log(hwfc, AV_LOG_VERBOSE, "Transfered CUDA image to Vulkan!\n");
  1796. return 0;
  1797. fail:
  1798. CHECK_CU(cu->cuCtxPopCurrent(&dummy));
  1799. vulkan_free_internal(dst_int);
  1800. dst_f->internal = NULL;
  1801. av_buffer_unref(&dst->buf[0]);
  1802. return err;
  1803. }
  1804. #endif
  1805. static int vulkan_map_to(AVHWFramesContext *hwfc, AVFrame *dst,
  1806. const AVFrame *src, int flags)
  1807. {
  1808. av_unused VulkanDevicePriv *p = hwfc->device_ctx->internal->priv;
  1809. switch (src->format) {
  1810. #if CONFIG_LIBDRM
  1811. #if CONFIG_VAAPI
  1812. case AV_PIX_FMT_VAAPI:
  1813. if (p->extensions & EXT_EXTERNAL_DMABUF_MEMORY)
  1814. return vulkan_map_from_vaapi(hwfc, dst, src, flags);
  1815. #endif
  1816. case AV_PIX_FMT_DRM_PRIME:
  1817. if (p->extensions & EXT_EXTERNAL_DMABUF_MEMORY)
  1818. return vulkan_map_from_drm(hwfc, dst, src, flags);
  1819. #endif
  1820. default:
  1821. return AVERROR(ENOSYS);
  1822. }
  1823. }
  1824. #if CONFIG_LIBDRM
  1825. typedef struct VulkanDRMMapping {
  1826. AVDRMFrameDescriptor drm_desc;
  1827. AVVkFrame *source;
  1828. } VulkanDRMMapping;
  1829. static void vulkan_unmap_to_drm(AVHWFramesContext *hwfc, HWMapDescriptor *hwmap)
  1830. {
  1831. AVDRMFrameDescriptor *drm_desc = hwmap->priv;
  1832. for (int i = 0; i < drm_desc->nb_objects; i++)
  1833. close(drm_desc->objects[i].fd);
  1834. av_free(drm_desc);
  1835. }
  1836. static inline uint32_t vulkan_fmt_to_drm(VkFormat vkfmt)
  1837. {
  1838. for (int i = 0; i < FF_ARRAY_ELEMS(vulkan_drm_format_map); i++)
  1839. if (vulkan_drm_format_map[i].vk_format == vkfmt)
  1840. return vulkan_drm_format_map[i].drm_fourcc;
  1841. return DRM_FORMAT_INVALID;
  1842. }
  1843. static int vulkan_map_to_drm(AVHWFramesContext *hwfc, AVFrame *dst,
  1844. const AVFrame *src, int flags)
  1845. {
  1846. int err = 0;
  1847. VkResult ret;
  1848. AVVkFrame *f = (AVVkFrame *)src->data[0];
  1849. VulkanDevicePriv *p = hwfc->device_ctx->internal->priv;
  1850. AVVulkanDeviceContext *hwctx = hwfc->device_ctx->hwctx;
  1851. const int planes = av_pix_fmt_count_planes(hwfc->sw_format);
  1852. VK_LOAD_PFN(hwctx->inst, vkGetMemoryFdKHR);
  1853. VkImageDrmFormatModifierPropertiesEXT drm_mod = {
  1854. .sType = VK_STRUCTURE_TYPE_IMAGE_DRM_FORMAT_MODIFIER_PROPERTIES_EXT,
  1855. };
  1856. AVDRMFrameDescriptor *drm_desc = av_mallocz(sizeof(*drm_desc));
  1857. if (!drm_desc)
  1858. return AVERROR(ENOMEM);
  1859. err = ff_hwframe_map_create(src->hw_frames_ctx, dst, src, &vulkan_unmap_to_drm, drm_desc);
  1860. if (err < 0)
  1861. goto end;
  1862. if (p->extensions & EXT_DRM_MODIFIER_FLAGS) {
  1863. VK_LOAD_PFN(hwctx->inst, vkGetImageDrmFormatModifierPropertiesEXT);
  1864. ret = pfn_vkGetImageDrmFormatModifierPropertiesEXT(hwctx->act_dev, f->img[0],
  1865. &drm_mod);
  1866. if (ret != VK_SUCCESS) {
  1867. av_log(hwfc, AV_LOG_ERROR, "Failed to retrieve DRM format modifier!\n");
  1868. err = AVERROR_EXTERNAL;
  1869. goto end;
  1870. }
  1871. }
  1872. for (int i = 0; (i < planes) && (f->mem[i]); i++) {
  1873. VkMemoryGetFdInfoKHR export_info = {
  1874. .sType = VK_STRUCTURE_TYPE_MEMORY_GET_FD_INFO_KHR,
  1875. .memory = f->mem[i],
  1876. .handleType = VK_EXTERNAL_MEMORY_HANDLE_TYPE_DMA_BUF_BIT_EXT,
  1877. };
  1878. ret = pfn_vkGetMemoryFdKHR(hwctx->act_dev, &export_info,
  1879. &drm_desc->objects[i].fd);
  1880. if (ret != VK_SUCCESS) {
  1881. av_log(hwfc, AV_LOG_ERROR, "Unable to export the image as a FD!\n");
  1882. err = AVERROR_EXTERNAL;
  1883. goto end;
  1884. }
  1885. drm_desc->nb_objects++;
  1886. drm_desc->objects[i].size = f->size[i];
  1887. drm_desc->objects[i].format_modifier = drm_mod.drmFormatModifier;
  1888. }
  1889. drm_desc->nb_layers = planes;
  1890. for (int i = 0; i < drm_desc->nb_layers; i++) {
  1891. VkSubresourceLayout layout;
  1892. VkImageSubresource sub = {
  1893. .aspectMask = p->extensions & EXT_DRM_MODIFIER_FLAGS ?
  1894. VK_IMAGE_ASPECT_MEMORY_PLANE_0_BIT_EXT :
  1895. VK_IMAGE_ASPECT_COLOR_BIT,
  1896. };
  1897. VkFormat plane_vkfmt = av_vkfmt_from_pixfmt(hwfc->sw_format)[i];
  1898. drm_desc->layers[i].format = vulkan_fmt_to_drm(plane_vkfmt);
  1899. drm_desc->layers[i].nb_planes = 1;
  1900. if (drm_desc->layers[i].format == DRM_FORMAT_INVALID) {
  1901. av_log(hwfc, AV_LOG_ERROR, "Cannot map to DRM layer, unsupported!\n");
  1902. err = AVERROR_PATCHWELCOME;
  1903. goto end;
  1904. }
  1905. drm_desc->layers[i].planes[0].object_index = FFMIN(i, drm_desc->nb_objects - 1);
  1906. if (f->tiling != VK_IMAGE_TILING_OPTIMAL)
  1907. continue;
  1908. vkGetImageSubresourceLayout(hwctx->act_dev, f->img[i], &sub, &layout);
  1909. drm_desc->layers[i].planes[0].offset = layout.offset;
  1910. drm_desc->layers[i].planes[0].pitch = layout.rowPitch;
  1911. }
  1912. dst->width = src->width;
  1913. dst->height = src->height;
  1914. dst->data[0] = (uint8_t *)drm_desc;
  1915. av_log(hwfc, AV_LOG_VERBOSE, "Mapped AVVkFrame to a DRM object!\n");
  1916. return 0;
  1917. end:
  1918. av_free(drm_desc);
  1919. return err;
  1920. }
  1921. #if CONFIG_VAAPI
  1922. static int vulkan_map_to_vaapi(AVHWFramesContext *hwfc, AVFrame *dst,
  1923. const AVFrame *src, int flags)
  1924. {
  1925. int err;
  1926. AVFrame *tmp = av_frame_alloc();
  1927. if (!tmp)
  1928. return AVERROR(ENOMEM);
  1929. tmp->format = AV_PIX_FMT_DRM_PRIME;
  1930. err = vulkan_map_to_drm(hwfc, tmp, src, flags);
  1931. if (err < 0)
  1932. goto fail;
  1933. err = av_hwframe_map(dst, tmp, flags);
  1934. if (err < 0)
  1935. goto fail;
  1936. err = ff_hwframe_map_replace(dst, src);
  1937. fail:
  1938. av_frame_free(&tmp);
  1939. return err;
  1940. }
  1941. #endif
  1942. #endif
  1943. static int vulkan_map_from(AVHWFramesContext *hwfc, AVFrame *dst,
  1944. const AVFrame *src, int flags)
  1945. {
  1946. av_unused VulkanDevicePriv *p = hwfc->device_ctx->internal->priv;
  1947. switch (dst->format) {
  1948. #if CONFIG_LIBDRM
  1949. case AV_PIX_FMT_DRM_PRIME:
  1950. if (p->extensions & EXT_EXTERNAL_DMABUF_MEMORY)
  1951. return vulkan_map_to_drm(hwfc, dst, src, flags);
  1952. #if CONFIG_VAAPI
  1953. case AV_PIX_FMT_VAAPI:
  1954. if (p->extensions & EXT_EXTERNAL_DMABUF_MEMORY)
  1955. return vulkan_map_to_vaapi(hwfc, dst, src, flags);
  1956. #endif
  1957. #endif
  1958. default:
  1959. return vulkan_map_frame_to_mem(hwfc, dst, src, flags);
  1960. }
  1961. }
  1962. typedef struct ImageBuffer {
  1963. VkBuffer buf;
  1964. VkDeviceMemory mem;
  1965. VkMemoryPropertyFlagBits flags;
  1966. } ImageBuffer;
  1967. static void free_buf(AVHWDeviceContext *ctx, ImageBuffer *buf)
  1968. {
  1969. AVVulkanDeviceContext *hwctx = ctx->hwctx;
  1970. if (!buf)
  1971. return;
  1972. vkDestroyBuffer(hwctx->act_dev, buf->buf, hwctx->alloc);
  1973. vkFreeMemory(hwctx->act_dev, buf->mem, hwctx->alloc);
  1974. }
  1975. static int create_buf(AVHWDeviceContext *ctx, ImageBuffer *buf, int height,
  1976. int *stride, VkBufferUsageFlags usage,
  1977. VkMemoryPropertyFlagBits flags, void *create_pnext,
  1978. void *alloc_pnext)
  1979. {
  1980. int err;
  1981. VkResult ret;
  1982. VkMemoryRequirements req;
  1983. AVVulkanDeviceContext *hwctx = ctx->hwctx;
  1984. VulkanDevicePriv *p = ctx->internal->priv;
  1985. VkBufferCreateInfo buf_spawn = {
  1986. .sType = VK_STRUCTURE_TYPE_BUFFER_CREATE_INFO,
  1987. .pNext = create_pnext,
  1988. .usage = usage,
  1989. .sharingMode = VK_SHARING_MODE_EXCLUSIVE,
  1990. };
  1991. *stride = FFALIGN(*stride, p->props.limits.optimalBufferCopyRowPitchAlignment);
  1992. buf_spawn.size = height*(*stride);
  1993. ret = vkCreateBuffer(hwctx->act_dev, &buf_spawn, NULL, &buf->buf);
  1994. if (ret != VK_SUCCESS) {
  1995. av_log(ctx, AV_LOG_ERROR, "Failed to create buffer: %s\n",
  1996. vk_ret2str(ret));
  1997. return AVERROR_EXTERNAL;
  1998. }
  1999. vkGetBufferMemoryRequirements(hwctx->act_dev, buf->buf, &req);
  2000. err = alloc_mem(ctx, &req, flags, alloc_pnext, &buf->flags, &buf->mem);
  2001. if (err)
  2002. return err;
  2003. ret = vkBindBufferMemory(hwctx->act_dev, buf->buf, buf->mem, 0);
  2004. if (ret != VK_SUCCESS) {
  2005. av_log(ctx, AV_LOG_ERROR, "Failed to bind memory to buffer: %s\n",
  2006. vk_ret2str(ret));
  2007. free_buf(ctx, buf);
  2008. return AVERROR_EXTERNAL;
  2009. }
  2010. return 0;
  2011. }
  2012. static int map_buffers(AVHWDeviceContext *ctx, ImageBuffer *buf, uint8_t *mem[],
  2013. int nb_buffers, int invalidate)
  2014. {
  2015. VkResult ret;
  2016. AVVulkanDeviceContext *hwctx = ctx->hwctx;
  2017. VkMappedMemoryRange invalidate_ctx[AV_NUM_DATA_POINTERS];
  2018. int invalidate_count = 0;
  2019. for (int i = 0; i < nb_buffers; i++) {
  2020. ret = vkMapMemory(hwctx->act_dev, buf[i].mem, 0,
  2021. VK_WHOLE_SIZE, 0, (void **)&mem[i]);
  2022. if (ret != VK_SUCCESS) {
  2023. av_log(ctx, AV_LOG_ERROR, "Failed to map buffer memory: %s\n",
  2024. vk_ret2str(ret));
  2025. return AVERROR_EXTERNAL;
  2026. }
  2027. }
  2028. if (!invalidate)
  2029. return 0;
  2030. for (int i = 0; i < nb_buffers; i++) {
  2031. const VkMappedMemoryRange ival_buf = {
  2032. .sType = VK_STRUCTURE_TYPE_MAPPED_MEMORY_RANGE,
  2033. .memory = buf[i].mem,
  2034. .size = VK_WHOLE_SIZE,
  2035. };
  2036. if (buf[i].flags & VK_MEMORY_PROPERTY_HOST_COHERENT_BIT)
  2037. continue;
  2038. invalidate_ctx[invalidate_count++] = ival_buf;
  2039. }
  2040. if (invalidate_count) {
  2041. ret = vkInvalidateMappedMemoryRanges(hwctx->act_dev, invalidate_count,
  2042. invalidate_ctx);
  2043. if (ret != VK_SUCCESS)
  2044. av_log(ctx, AV_LOG_WARNING, "Failed to invalidate memory: %s\n",
  2045. vk_ret2str(ret));
  2046. }
  2047. return 0;
  2048. }
  2049. static int unmap_buffers(AVHWDeviceContext *ctx, ImageBuffer *buf,
  2050. int nb_buffers, int flush)
  2051. {
  2052. int err = 0;
  2053. VkResult ret;
  2054. AVVulkanDeviceContext *hwctx = ctx->hwctx;
  2055. VkMappedMemoryRange flush_ctx[AV_NUM_DATA_POINTERS];
  2056. int flush_count = 0;
  2057. if (flush) {
  2058. for (int i = 0; i < nb_buffers; i++) {
  2059. const VkMappedMemoryRange flush_buf = {
  2060. .sType = VK_STRUCTURE_TYPE_MAPPED_MEMORY_RANGE,
  2061. .memory = buf[i].mem,
  2062. .size = VK_WHOLE_SIZE,
  2063. };
  2064. if (buf[i].flags & VK_MEMORY_PROPERTY_HOST_COHERENT_BIT)
  2065. continue;
  2066. flush_ctx[flush_count++] = flush_buf;
  2067. }
  2068. }
  2069. if (flush_count) {
  2070. ret = vkFlushMappedMemoryRanges(hwctx->act_dev, flush_count, flush_ctx);
  2071. if (ret != VK_SUCCESS) {
  2072. av_log(ctx, AV_LOG_ERROR, "Failed to flush memory: %s\n",
  2073. vk_ret2str(ret));
  2074. err = AVERROR_EXTERNAL; /* We still want to try to unmap them */
  2075. }
  2076. }
  2077. for (int i = 0; i < nb_buffers; i++)
  2078. vkUnmapMemory(hwctx->act_dev, buf[i].mem);
  2079. return err;
  2080. }
  2081. static int transfer_image_buf(AVHWDeviceContext *ctx, AVVkFrame *frame,
  2082. ImageBuffer *buffer, const int *buf_stride, int w,
  2083. int h, enum AVPixelFormat pix_fmt, int to_buf)
  2084. {
  2085. VkResult ret;
  2086. AVVulkanDeviceContext *hwctx = ctx->hwctx;
  2087. VulkanDevicePriv *s = ctx->internal->priv;
  2088. int bar_num = 0;
  2089. VkPipelineStageFlagBits sem_wait_dst[AV_NUM_DATA_POINTERS];
  2090. const int planes = av_pix_fmt_count_planes(pix_fmt);
  2091. const AVPixFmtDescriptor *desc = av_pix_fmt_desc_get(pix_fmt);
  2092. VkCommandBufferBeginInfo cmd_start = {
  2093. .sType = VK_STRUCTURE_TYPE_COMMAND_BUFFER_BEGIN_INFO,
  2094. .flags = VK_COMMAND_BUFFER_USAGE_ONE_TIME_SUBMIT_BIT,
  2095. };
  2096. VkImageMemoryBarrier img_bar[AV_NUM_DATA_POINTERS] = { 0 };
  2097. VkSubmitInfo s_info = {
  2098. .sType = VK_STRUCTURE_TYPE_SUBMIT_INFO,
  2099. .commandBufferCount = 1,
  2100. .pCommandBuffers = &s->cmd.buf,
  2101. .pSignalSemaphores = frame->sem,
  2102. .pWaitSemaphores = frame->sem,
  2103. .pWaitDstStageMask = sem_wait_dst,
  2104. .signalSemaphoreCount = planes,
  2105. .waitSemaphoreCount = planes,
  2106. };
  2107. ret = vkBeginCommandBuffer(s->cmd.buf, &cmd_start);
  2108. if (ret != VK_SUCCESS) {
  2109. av_log(ctx, AV_LOG_ERROR, "Unable to init command buffer: %s\n",
  2110. vk_ret2str(ret));
  2111. return AVERROR_EXTERNAL;
  2112. }
  2113. /* Change the image layout to something more optimal for transfers */
  2114. for (int i = 0; i < planes; i++) {
  2115. VkImageLayout new_layout = to_buf ? VK_IMAGE_LAYOUT_TRANSFER_SRC_OPTIMAL :
  2116. VK_IMAGE_LAYOUT_TRANSFER_DST_OPTIMAL;
  2117. VkAccessFlags new_access = to_buf ? VK_ACCESS_TRANSFER_READ_BIT :
  2118. VK_ACCESS_TRANSFER_WRITE_BIT;
  2119. sem_wait_dst[i] = VK_PIPELINE_STAGE_TOP_OF_PIPE_BIT;
  2120. /* If the layout matches and we have read access skip the barrier */
  2121. if ((frame->layout[i] == new_layout) && (frame->access[i] & new_access))
  2122. continue;
  2123. img_bar[bar_num].sType = VK_STRUCTURE_TYPE_IMAGE_MEMORY_BARRIER;
  2124. img_bar[bar_num].srcAccessMask = 0x0;
  2125. img_bar[bar_num].dstAccessMask = new_access;
  2126. img_bar[bar_num].oldLayout = frame->layout[i];
  2127. img_bar[bar_num].newLayout = new_layout;
  2128. img_bar[bar_num].srcQueueFamilyIndex = VK_QUEUE_FAMILY_IGNORED;
  2129. img_bar[bar_num].dstQueueFamilyIndex = VK_QUEUE_FAMILY_IGNORED;
  2130. img_bar[bar_num].image = frame->img[i];
  2131. img_bar[bar_num].subresourceRange.levelCount = 1;
  2132. img_bar[bar_num].subresourceRange.layerCount = 1;
  2133. img_bar[bar_num].subresourceRange.aspectMask = VK_IMAGE_ASPECT_COLOR_BIT;
  2134. frame->layout[i] = img_bar[bar_num].newLayout;
  2135. frame->access[i] = img_bar[bar_num].dstAccessMask;
  2136. bar_num++;
  2137. }
  2138. if (bar_num)
  2139. vkCmdPipelineBarrier(s->cmd.buf, VK_PIPELINE_STAGE_TOP_OF_PIPE_BIT,
  2140. VK_PIPELINE_STAGE_TRANSFER_BIT, 0,
  2141. 0, NULL, 0, NULL, bar_num, img_bar);
  2142. /* Schedule a copy for each plane */
  2143. for (int i = 0; i < planes; i++) {
  2144. const int p_w = i > 0 ? AV_CEIL_RSHIFT(w, desc->log2_chroma_w) : w;
  2145. const int p_h = i > 0 ? AV_CEIL_RSHIFT(h, desc->log2_chroma_h) : h;
  2146. VkBufferImageCopy buf_reg = {
  2147. .bufferOffset = 0,
  2148. /* Buffer stride isn't in bytes, it's in samples, the implementation
  2149. * uses the image's VkFormat to know how many bytes per sample
  2150. * the buffer has. So we have to convert by dividing. Stupid.
  2151. * Won't work with YUVA or other planar formats with alpha. */
  2152. .bufferRowLength = buf_stride[i] / desc->comp[i].step,
  2153. .bufferImageHeight = p_h,
  2154. .imageSubresource.layerCount = 1,
  2155. .imageSubresource.aspectMask = VK_IMAGE_ASPECT_COLOR_BIT,
  2156. .imageOffset = { 0, 0, 0, },
  2157. .imageExtent = { p_w, p_h, 1, },
  2158. };
  2159. if (to_buf)
  2160. vkCmdCopyImageToBuffer(s->cmd.buf, frame->img[i], frame->layout[i],
  2161. buffer[i].buf, 1, &buf_reg);
  2162. else
  2163. vkCmdCopyBufferToImage(s->cmd.buf, buffer[i].buf, frame->img[i],
  2164. frame->layout[i], 1, &buf_reg);
  2165. }
  2166. ret = vkEndCommandBuffer(s->cmd.buf);
  2167. if (ret != VK_SUCCESS) {
  2168. av_log(ctx, AV_LOG_ERROR, "Unable to finish command buffer: %s\n",
  2169. vk_ret2str(ret));
  2170. return AVERROR_EXTERNAL;
  2171. }
  2172. /* Wait for the download/upload to finish if uploading, otherwise the
  2173. * semaphore will take care of synchronization when uploading */
  2174. ret = vkQueueSubmit(s->cmd.queue, 1, &s_info, s->cmd.fence);
  2175. if (ret != VK_SUCCESS) {
  2176. av_log(ctx, AV_LOG_ERROR, "Unable to submit command buffer: %s\n",
  2177. vk_ret2str(ret));
  2178. return AVERROR_EXTERNAL;
  2179. } else {
  2180. vkWaitForFences(hwctx->act_dev, 1, &s->cmd.fence, VK_TRUE, UINT64_MAX);
  2181. vkResetFences(hwctx->act_dev, 1, &s->cmd.fence);
  2182. }
  2183. return 0;
  2184. }
  2185. /* Technically we can use VK_EXT_external_memory_host to upload and download,
  2186. * however the alignment requirements make this unfeasible as both the pointer
  2187. * and the size of each plane need to be aligned to the minimum alignment
  2188. * requirement, which on all current implementations (anv, radv) is 4096.
  2189. * If the requirement gets relaxed (unlikely) this can easily be implemented. */
  2190. static int vulkan_transfer_data_from_mem(AVHWFramesContext *hwfc, AVFrame *dst,
  2191. const AVFrame *src)
  2192. {
  2193. int err = 0;
  2194. AVFrame tmp;
  2195. AVVkFrame *f = (AVVkFrame *)dst->data[0];
  2196. AVHWDeviceContext *dev_ctx = hwfc->device_ctx;
  2197. ImageBuffer buf[AV_NUM_DATA_POINTERS] = { { 0 } };
  2198. const int planes = av_pix_fmt_count_planes(src->format);
  2199. int log2_chroma = av_pix_fmt_desc_get(src->format)->log2_chroma_h;
  2200. if ((src->format != AV_PIX_FMT_NONE && !av_vkfmt_from_pixfmt(src->format))) {
  2201. av_log(hwfc, AV_LOG_ERROR, "Unsupported source pixel format!\n");
  2202. return AVERROR(EINVAL);
  2203. }
  2204. if (src->width > hwfc->width || src->height > hwfc->height)
  2205. return AVERROR(EINVAL);
  2206. /* For linear, host visiable images */
  2207. if (f->tiling == VK_IMAGE_TILING_LINEAR &&
  2208. f->flags & VK_MEMORY_PROPERTY_HOST_VISIBLE_BIT) {
  2209. AVFrame *map = av_frame_alloc();
  2210. if (!map)
  2211. return AVERROR(ENOMEM);
  2212. map->format = src->format;
  2213. err = vulkan_map_frame_to_mem(hwfc, map, dst, AV_HWFRAME_MAP_WRITE);
  2214. if (err)
  2215. goto end;
  2216. err = av_frame_copy(map, src);
  2217. av_frame_free(&map);
  2218. goto end;
  2219. }
  2220. /* Create buffers */
  2221. for (int i = 0; i < planes; i++) {
  2222. int h = src->height;
  2223. int p_height = i > 0 ? AV_CEIL_RSHIFT(h, log2_chroma) : h;
  2224. tmp.linesize[i] = src->linesize[i];
  2225. err = create_buf(dev_ctx, &buf[i], p_height,
  2226. &tmp.linesize[i], VK_BUFFER_USAGE_TRANSFER_SRC_BIT,
  2227. VK_MEMORY_PROPERTY_HOST_VISIBLE_BIT, NULL, NULL);
  2228. if (err)
  2229. goto end;
  2230. }
  2231. /* Map, copy image to buffer, unmap */
  2232. if ((err = map_buffers(dev_ctx, buf, tmp.data, planes, 0)))
  2233. goto end;
  2234. av_image_copy(tmp.data, tmp.linesize, (const uint8_t **)src->data,
  2235. src->linesize, src->format, src->width, src->height);
  2236. if ((err = unmap_buffers(dev_ctx, buf, planes, 1)))
  2237. goto end;
  2238. /* Copy buffers to image */
  2239. err = transfer_image_buf(dev_ctx, f, buf, tmp.linesize,
  2240. src->width, src->height, src->format, 0);
  2241. end:
  2242. for (int i = 0; i < planes; i++)
  2243. free_buf(dev_ctx, &buf[i]);
  2244. return err;
  2245. }
  2246. static int vulkan_transfer_data_to(AVHWFramesContext *hwfc, AVFrame *dst,
  2247. const AVFrame *src)
  2248. {
  2249. av_unused VulkanDevicePriv *p = hwfc->device_ctx->internal->priv;
  2250. switch (src->format) {
  2251. #if CONFIG_CUDA
  2252. case AV_PIX_FMT_CUDA:
  2253. if ((p->extensions & EXT_EXTERNAL_FD_MEMORY) &&
  2254. (p->extensions & EXT_EXTERNAL_FD_SEM))
  2255. return vulkan_transfer_data_from_cuda(hwfc, dst, src);
  2256. #endif
  2257. default:
  2258. if (src->hw_frames_ctx)
  2259. return AVERROR(ENOSYS);
  2260. else
  2261. return vulkan_transfer_data_from_mem(hwfc, dst, src);
  2262. }
  2263. }
  2264. #if CONFIG_CUDA
  2265. static int vulkan_transfer_data_to_cuda(AVHWFramesContext *hwfc, AVFrame *dst,
  2266. const AVFrame *src)
  2267. {
  2268. int err;
  2269. VkResult ret;
  2270. CUcontext dummy;
  2271. AVVkFrame *dst_f;
  2272. AVVkFrameInternal *dst_int;
  2273. const int planes = av_pix_fmt_count_planes(hwfc->sw_format);
  2274. const AVPixFmtDescriptor *desc = av_pix_fmt_desc_get(hwfc->sw_format);
  2275. AVHWFramesContext *cuda_fc = (AVHWFramesContext*)dst->hw_frames_ctx->data;
  2276. AVHWDeviceContext *cuda_cu = cuda_fc->device_ctx;
  2277. AVCUDADeviceContext *cuda_dev = cuda_cu->hwctx;
  2278. AVCUDADeviceContextInternal *cu_internal = cuda_dev->internal;
  2279. CudaFunctions *cu = cu_internal->cuda_dl;
  2280. ret = CHECK_CU(cu->cuCtxPushCurrent(cuda_dev->cuda_ctx));
  2281. if (ret < 0) {
  2282. err = AVERROR_EXTERNAL;
  2283. goto fail;
  2284. }
  2285. dst_f = (AVVkFrame *)src->data[0];
  2286. err = vulkan_export_to_cuda(hwfc, dst->hw_frames_ctx, src);
  2287. if (err < 0) {
  2288. goto fail;
  2289. }
  2290. dst_int = dst_f->internal;
  2291. for (int i = 0; i < planes; i++) {
  2292. CUDA_MEMCPY2D cpy = {
  2293. .dstMemoryType = CU_MEMORYTYPE_DEVICE,
  2294. .dstDevice = (CUdeviceptr)dst->data[i],
  2295. .dstPitch = dst->linesize[i],
  2296. .dstY = 0,
  2297. .srcMemoryType = CU_MEMORYTYPE_ARRAY,
  2298. .srcArray = dst_int->cu_array[i],
  2299. .WidthInBytes = (i > 0 ? AV_CEIL_RSHIFT(hwfc->width, desc->log2_chroma_w)
  2300. : hwfc->width) * desc->comp[i].step,
  2301. .Height = i > 0 ? AV_CEIL_RSHIFT(hwfc->height, desc->log2_chroma_h)
  2302. : hwfc->height,
  2303. };
  2304. ret = CHECK_CU(cu->cuMemcpy2DAsync(&cpy, cuda_dev->stream));
  2305. if (ret < 0) {
  2306. err = AVERROR_EXTERNAL;
  2307. goto fail;
  2308. }
  2309. }
  2310. CHECK_CU(cu->cuCtxPopCurrent(&dummy));
  2311. av_log(hwfc, AV_LOG_VERBOSE, "Transfered Vulkan image to CUDA!\n");
  2312. return 0;
  2313. fail:
  2314. CHECK_CU(cu->cuCtxPopCurrent(&dummy));
  2315. vulkan_free_internal(dst_int);
  2316. dst_f->internal = NULL;
  2317. av_buffer_unref(&dst->buf[0]);
  2318. return err;
  2319. }
  2320. #endif
  2321. static int vulkan_transfer_data_to_mem(AVHWFramesContext *hwfc, AVFrame *dst,
  2322. const AVFrame *src)
  2323. {
  2324. int err = 0;
  2325. AVFrame tmp;
  2326. AVVkFrame *f = (AVVkFrame *)src->data[0];
  2327. AVHWDeviceContext *dev_ctx = hwfc->device_ctx;
  2328. ImageBuffer buf[AV_NUM_DATA_POINTERS] = { { 0 } };
  2329. const int planes = av_pix_fmt_count_planes(dst->format);
  2330. int log2_chroma = av_pix_fmt_desc_get(dst->format)->log2_chroma_h;
  2331. if (dst->width > hwfc->width || dst->height > hwfc->height)
  2332. return AVERROR(EINVAL);
  2333. /* For linear, host visiable images */
  2334. if (f->tiling == VK_IMAGE_TILING_LINEAR &&
  2335. f->flags & VK_MEMORY_PROPERTY_HOST_VISIBLE_BIT) {
  2336. AVFrame *map = av_frame_alloc();
  2337. if (!map)
  2338. return AVERROR(ENOMEM);
  2339. map->format = dst->format;
  2340. err = vulkan_map_frame_to_mem(hwfc, map, src, AV_HWFRAME_MAP_READ);
  2341. if (err)
  2342. return err;
  2343. err = av_frame_copy(dst, map);
  2344. av_frame_free(&map);
  2345. return err;
  2346. }
  2347. /* Create buffers */
  2348. for (int i = 0; i < planes; i++) {
  2349. int h = dst->height;
  2350. int p_height = i > 0 ? AV_CEIL_RSHIFT(h, log2_chroma) : h;
  2351. tmp.linesize[i] = dst->linesize[i];
  2352. err = create_buf(dev_ctx, &buf[i], p_height,
  2353. &tmp.linesize[i], VK_BUFFER_USAGE_TRANSFER_DST_BIT,
  2354. VK_MEMORY_PROPERTY_HOST_VISIBLE_BIT, NULL, NULL);
  2355. }
  2356. /* Copy image to buffer */
  2357. if ((err = transfer_image_buf(dev_ctx, f, buf, tmp.linesize,
  2358. dst->width, dst->height, dst->format, 1)))
  2359. goto end;
  2360. /* Map, copy buffer to frame, unmap */
  2361. if ((err = map_buffers(dev_ctx, buf, tmp.data, planes, 1)))
  2362. goto end;
  2363. av_image_copy(dst->data, dst->linesize, (const uint8_t **)tmp.data,
  2364. tmp.linesize, dst->format, dst->width, dst->height);
  2365. err = unmap_buffers(dev_ctx, buf, planes, 0);
  2366. end:
  2367. for (int i = 0; i < planes; i++)
  2368. free_buf(dev_ctx, &buf[i]);
  2369. return err;
  2370. }
  2371. static int vulkan_transfer_data_from(AVHWFramesContext *hwfc, AVFrame *dst,
  2372. const AVFrame *src)
  2373. {
  2374. av_unused VulkanDevicePriv *p = hwfc->device_ctx->internal->priv;
  2375. switch (dst->format) {
  2376. #if CONFIG_CUDA
  2377. case AV_PIX_FMT_CUDA:
  2378. if ((p->extensions & EXT_EXTERNAL_FD_MEMORY) &&
  2379. (p->extensions & EXT_EXTERNAL_FD_SEM))
  2380. return vulkan_transfer_data_to_cuda(hwfc, dst, src);
  2381. #endif
  2382. default:
  2383. if (dst->hw_frames_ctx)
  2384. return AVERROR(ENOSYS);
  2385. else
  2386. return vulkan_transfer_data_to_mem(hwfc, dst, src);
  2387. }
  2388. }
  2389. AVVkFrame *av_vk_frame_alloc(void)
  2390. {
  2391. return av_mallocz(sizeof(AVVkFrame));
  2392. }
  2393. const HWContextType ff_hwcontext_type_vulkan = {
  2394. .type = AV_HWDEVICE_TYPE_VULKAN,
  2395. .name = "Vulkan",
  2396. .device_hwctx_size = sizeof(AVVulkanDeviceContext),
  2397. .device_priv_size = sizeof(VulkanDevicePriv),
  2398. .frames_hwctx_size = sizeof(AVVulkanFramesContext),
  2399. .frames_priv_size = sizeof(VulkanFramesPriv),
  2400. .device_init = &vulkan_device_init,
  2401. .device_create = &vulkan_device_create,
  2402. .device_derive = &vulkan_device_derive,
  2403. .frames_get_constraints = &vulkan_frames_get_constraints,
  2404. .frames_init = vulkan_frames_init,
  2405. .frames_get_buffer = vulkan_get_buffer,
  2406. .frames_uninit = vulkan_frames_uninit,
  2407. .transfer_get_formats = vulkan_transfer_get_formats,
  2408. .transfer_data_to = vulkan_transfer_data_to,
  2409. .transfer_data_from = vulkan_transfer_data_from,
  2410. .map_to = vulkan_map_to,
  2411. .map_from = vulkan_map_from,
  2412. .pix_fmts = (const enum AVPixelFormat []) {
  2413. AV_PIX_FMT_VULKAN,
  2414. AV_PIX_FMT_NONE
  2415. },
  2416. };