You can not select more than 25 topics Topics must start with a letter or number, can include dashes ('-') and can be up to 35 characters long.

2947 lines
101KB

  1. /*
  2. * This file is part of FFmpeg.
  3. *
  4. * FFmpeg is free software; you can redistribute it and/or
  5. * modify it under the terms of the GNU Lesser General Public
  6. * License as published by the Free Software Foundation; either
  7. * version 2.1 of the License, or (at your option) any later version.
  8. *
  9. * FFmpeg is distributed in the hope that it will be useful,
  10. * but WITHOUT ANY WARRANTY; without even the implied warranty of
  11. * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
  12. * Lesser General Public License for more details.
  13. *
  14. * You should have received a copy of the GNU Lesser General Public
  15. * License along with FFmpeg; if not, write to the Free Software
  16. * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
  17. */
  18. #include "config.h"
  19. #include "pixdesc.h"
  20. #include "avstring.h"
  21. #include "imgutils.h"
  22. #include "hwcontext.h"
  23. #include "hwcontext_internal.h"
  24. #include "hwcontext_vulkan.h"
  25. #if CONFIG_LIBDRM
  26. #include <unistd.h>
  27. #include <xf86drm.h>
  28. #include <drm_fourcc.h>
  29. #include "hwcontext_drm.h"
  30. #if CONFIG_VAAPI
  31. #include <va/va_drmcommon.h>
  32. #include "hwcontext_vaapi.h"
  33. #endif
  34. #endif
  35. #if CONFIG_CUDA
  36. #include "hwcontext_cuda_internal.h"
  37. #include "cuda_check.h"
  38. #define CHECK_CU(x) FF_CUDA_CHECK_DL(cuda_cu, cu, x)
  39. #endif
  40. typedef struct VulkanExecCtx {
  41. VkCommandPool pool;
  42. VkCommandBuffer buf;
  43. VkQueue queue;
  44. VkFence fence;
  45. } VulkanExecCtx;
  46. typedef struct VulkanDevicePriv {
  47. /* Properties */
  48. VkPhysicalDeviceProperties props;
  49. VkPhysicalDeviceMemoryProperties mprops;
  50. /* Debug callback */
  51. VkDebugUtilsMessengerEXT debug_ctx;
  52. /* Image uploading */
  53. VulkanExecCtx cmd;
  54. /* Extensions */
  55. uint64_t extensions;
  56. /* Settings */
  57. int use_linear_images;
  58. /* Nvidia */
  59. int dev_is_nvidia;
  60. } VulkanDevicePriv;
  61. typedef struct VulkanFramesPriv {
  62. VulkanExecCtx cmd;
  63. } VulkanFramesPriv;
  64. typedef struct AVVkFrameInternal {
  65. #if CONFIG_CUDA
  66. /* Importing external memory into cuda is really expensive so we keep the
  67. * memory imported all the time */
  68. AVBufferRef *cuda_fc_ref; /* Need to keep it around for uninit */
  69. CUexternalMemory ext_mem[AV_NUM_DATA_POINTERS];
  70. CUmipmappedArray cu_mma[AV_NUM_DATA_POINTERS];
  71. CUarray cu_array[AV_NUM_DATA_POINTERS];
  72. CUexternalSemaphore cu_sem;
  73. #endif
  74. } AVVkFrameInternal;
  75. #define VK_LOAD_PFN(inst, name) PFN_##name pfn_##name = (PFN_##name) \
  76. vkGetInstanceProcAddr(inst, #name)
  77. #define DEFAULT_USAGE_FLAGS (VK_IMAGE_USAGE_SAMPLED_BIT | \
  78. VK_IMAGE_USAGE_STORAGE_BIT | \
  79. VK_IMAGE_USAGE_TRANSFER_SRC_BIT | \
  80. VK_IMAGE_USAGE_TRANSFER_DST_BIT)
  81. #define ADD_VAL_TO_LIST(list, count, val) \
  82. do { \
  83. list = av_realloc_array(list, sizeof(*list), ++count); \
  84. if (!list) { \
  85. err = AVERROR(ENOMEM); \
  86. goto fail; \
  87. } \
  88. list[count - 1] = av_strdup(val); \
  89. if (!list[count - 1]) { \
  90. err = AVERROR(ENOMEM); \
  91. goto fail; \
  92. } \
  93. } while(0)
  94. static const struct {
  95. enum AVPixelFormat pixfmt;
  96. const VkFormat vkfmts[3];
  97. } vk_pixfmt_map[] = {
  98. { AV_PIX_FMT_GRAY8, { VK_FORMAT_R8_UNORM } },
  99. { AV_PIX_FMT_GRAY16, { VK_FORMAT_R16_UNORM } },
  100. { AV_PIX_FMT_GRAYF32, { VK_FORMAT_R32_SFLOAT } },
  101. { AV_PIX_FMT_NV12, { VK_FORMAT_R8_UNORM, VK_FORMAT_R8G8_UNORM } },
  102. { AV_PIX_FMT_P010, { VK_FORMAT_R16_UNORM, VK_FORMAT_R16G16_UNORM } },
  103. { AV_PIX_FMT_P016, { VK_FORMAT_R16_UNORM, VK_FORMAT_R16G16_UNORM } },
  104. { AV_PIX_FMT_YUV420P, { VK_FORMAT_R8_UNORM, VK_FORMAT_R8_UNORM, VK_FORMAT_R8_UNORM } },
  105. { AV_PIX_FMT_YUV422P, { VK_FORMAT_R8_UNORM, VK_FORMAT_R8_UNORM, VK_FORMAT_R8_UNORM } },
  106. { AV_PIX_FMT_YUV444P, { VK_FORMAT_R8_UNORM, VK_FORMAT_R8_UNORM, VK_FORMAT_R8_UNORM } },
  107. { AV_PIX_FMT_YUV420P16, { VK_FORMAT_R16_UNORM, VK_FORMAT_R16_UNORM, VK_FORMAT_R16_UNORM } },
  108. { AV_PIX_FMT_YUV422P16, { VK_FORMAT_R16_UNORM, VK_FORMAT_R16_UNORM, VK_FORMAT_R16_UNORM } },
  109. { AV_PIX_FMT_YUV444P16, { VK_FORMAT_R16_UNORM, VK_FORMAT_R16_UNORM, VK_FORMAT_R16_UNORM } },
  110. { AV_PIX_FMT_ABGR, { VK_FORMAT_A8B8G8R8_UNORM_PACK32 } },
  111. { AV_PIX_FMT_BGRA, { VK_FORMAT_B8G8R8A8_UNORM } },
  112. { AV_PIX_FMT_RGBA, { VK_FORMAT_R8G8B8A8_UNORM } },
  113. { AV_PIX_FMT_RGB24, { VK_FORMAT_R8G8B8_UNORM } },
  114. { AV_PIX_FMT_BGR24, { VK_FORMAT_B8G8R8_UNORM } },
  115. { AV_PIX_FMT_RGB48, { VK_FORMAT_R16G16B16_UNORM } },
  116. { AV_PIX_FMT_RGBA64, { VK_FORMAT_R16G16B16A16_UNORM } },
  117. { AV_PIX_FMT_RGB565, { VK_FORMAT_R5G6B5_UNORM_PACK16 } },
  118. { AV_PIX_FMT_BGR565, { VK_FORMAT_B5G6R5_UNORM_PACK16 } },
  119. { AV_PIX_FMT_BGR0, { VK_FORMAT_B8G8R8A8_UNORM } },
  120. { AV_PIX_FMT_0BGR, { VK_FORMAT_A8B8G8R8_UNORM_PACK32 } },
  121. { AV_PIX_FMT_RGB0, { VK_FORMAT_R8G8B8A8_UNORM } },
  122. { AV_PIX_FMT_GBRPF32, { VK_FORMAT_R32_SFLOAT, VK_FORMAT_R32_SFLOAT, VK_FORMAT_R32_SFLOAT } },
  123. };
  124. const VkFormat *av_vkfmt_from_pixfmt(enum AVPixelFormat p)
  125. {
  126. for (enum AVPixelFormat i = 0; i < FF_ARRAY_ELEMS(vk_pixfmt_map); i++)
  127. if (vk_pixfmt_map[i].pixfmt == p)
  128. return vk_pixfmt_map[i].vkfmts;
  129. return NULL;
  130. }
  131. static int pixfmt_is_supported(AVVulkanDeviceContext *hwctx, enum AVPixelFormat p,
  132. int linear)
  133. {
  134. const VkFormat *fmt = av_vkfmt_from_pixfmt(p);
  135. int planes = av_pix_fmt_count_planes(p);
  136. if (!fmt)
  137. return 0;
  138. for (int i = 0; i < planes; i++) {
  139. VkFormatFeatureFlags flags;
  140. VkFormatProperties2 prop = {
  141. .sType = VK_STRUCTURE_TYPE_FORMAT_PROPERTIES_2,
  142. };
  143. vkGetPhysicalDeviceFormatProperties2(hwctx->phys_dev, fmt[i], &prop);
  144. flags = linear ? prop.formatProperties.linearTilingFeatures :
  145. prop.formatProperties.optimalTilingFeatures;
  146. if (!(flags & DEFAULT_USAGE_FLAGS))
  147. return 0;
  148. }
  149. return 1;
  150. }
  151. enum VulkanExtensions {
  152. EXT_EXTERNAL_DMABUF_MEMORY = 1ULL << 0, /* VK_EXT_external_memory_dma_buf */
  153. EXT_DRM_MODIFIER_FLAGS = 1ULL << 1, /* VK_EXT_image_drm_format_modifier */
  154. EXT_EXTERNAL_FD_MEMORY = 1ULL << 2, /* VK_KHR_external_memory_fd */
  155. EXT_EXTERNAL_FD_SEM = 1ULL << 3, /* VK_KHR_external_semaphore_fd */
  156. EXT_NO_FLAG = 1ULL << 63,
  157. };
  158. typedef struct VulkanOptExtension {
  159. const char *name;
  160. uint64_t flag;
  161. } VulkanOptExtension;
  162. static const VulkanOptExtension optional_instance_exts[] = {
  163. { VK_KHR_SURFACE_EXTENSION_NAME, EXT_NO_FLAG },
  164. };
  165. static const VulkanOptExtension optional_device_exts[] = {
  166. { VK_KHR_EXTERNAL_MEMORY_FD_EXTENSION_NAME, EXT_EXTERNAL_FD_MEMORY, },
  167. { VK_EXT_EXTERNAL_MEMORY_DMA_BUF_EXTENSION_NAME, EXT_EXTERNAL_DMABUF_MEMORY, },
  168. { VK_EXT_IMAGE_DRM_FORMAT_MODIFIER_EXTENSION_NAME, EXT_DRM_MODIFIER_FLAGS, },
  169. { VK_KHR_EXTERNAL_SEMAPHORE_FD_EXTENSION_NAME, EXT_EXTERNAL_FD_SEM, },
  170. };
  171. /* Converts return values to strings */
  172. static const char *vk_ret2str(VkResult res)
  173. {
  174. #define CASE(VAL) case VAL: return #VAL
  175. switch (res) {
  176. CASE(VK_SUCCESS);
  177. CASE(VK_NOT_READY);
  178. CASE(VK_TIMEOUT);
  179. CASE(VK_EVENT_SET);
  180. CASE(VK_EVENT_RESET);
  181. CASE(VK_INCOMPLETE);
  182. CASE(VK_ERROR_OUT_OF_HOST_MEMORY);
  183. CASE(VK_ERROR_OUT_OF_DEVICE_MEMORY);
  184. CASE(VK_ERROR_INITIALIZATION_FAILED);
  185. CASE(VK_ERROR_DEVICE_LOST);
  186. CASE(VK_ERROR_MEMORY_MAP_FAILED);
  187. CASE(VK_ERROR_LAYER_NOT_PRESENT);
  188. CASE(VK_ERROR_EXTENSION_NOT_PRESENT);
  189. CASE(VK_ERROR_FEATURE_NOT_PRESENT);
  190. CASE(VK_ERROR_INCOMPATIBLE_DRIVER);
  191. CASE(VK_ERROR_TOO_MANY_OBJECTS);
  192. CASE(VK_ERROR_FORMAT_NOT_SUPPORTED);
  193. CASE(VK_ERROR_FRAGMENTED_POOL);
  194. CASE(VK_ERROR_SURFACE_LOST_KHR);
  195. CASE(VK_ERROR_NATIVE_WINDOW_IN_USE_KHR);
  196. CASE(VK_SUBOPTIMAL_KHR);
  197. CASE(VK_ERROR_OUT_OF_DATE_KHR);
  198. CASE(VK_ERROR_INCOMPATIBLE_DISPLAY_KHR);
  199. CASE(VK_ERROR_VALIDATION_FAILED_EXT);
  200. CASE(VK_ERROR_INVALID_SHADER_NV);
  201. CASE(VK_ERROR_OUT_OF_POOL_MEMORY);
  202. CASE(VK_ERROR_INVALID_EXTERNAL_HANDLE);
  203. CASE(VK_ERROR_NOT_PERMITTED_EXT);
  204. CASE(VK_ERROR_INVALID_DRM_FORMAT_MODIFIER_PLANE_LAYOUT_EXT);
  205. CASE(VK_ERROR_INVALID_DEVICE_ADDRESS_EXT);
  206. CASE(VK_ERROR_FULL_SCREEN_EXCLUSIVE_MODE_LOST_EXT);
  207. default: return "Unknown error";
  208. }
  209. #undef CASE
  210. }
  211. static VkBool32 vk_dbg_callback(VkDebugUtilsMessageSeverityFlagBitsEXT severity,
  212. VkDebugUtilsMessageTypeFlagsEXT messageType,
  213. const VkDebugUtilsMessengerCallbackDataEXT *data,
  214. void *priv)
  215. {
  216. int l;
  217. AVHWDeviceContext *ctx = priv;
  218. switch (severity) {
  219. case VK_DEBUG_UTILS_MESSAGE_SEVERITY_VERBOSE_BIT_EXT: l = AV_LOG_VERBOSE; break;
  220. case VK_DEBUG_UTILS_MESSAGE_SEVERITY_INFO_BIT_EXT: l = AV_LOG_INFO; break;
  221. case VK_DEBUG_UTILS_MESSAGE_SEVERITY_WARNING_BIT_EXT: l = AV_LOG_WARNING; break;
  222. case VK_DEBUG_UTILS_MESSAGE_SEVERITY_ERROR_BIT_EXT: l = AV_LOG_ERROR; break;
  223. default: l = AV_LOG_DEBUG; break;
  224. }
  225. av_log(ctx, l, "%s\n", data->pMessage);
  226. for (int i = 0; i < data->cmdBufLabelCount; i++)
  227. av_log(ctx, l, "\t%i: %s\n", i, data->pCmdBufLabels[i].pLabelName);
  228. return 0;
  229. }
  230. static int check_extensions(AVHWDeviceContext *ctx, int dev, AVDictionary *opts,
  231. const char * const **dst, uint32_t *num, int debug)
  232. {
  233. const char *tstr;
  234. const char **extension_names = NULL;
  235. VulkanDevicePriv *p = ctx->internal->priv;
  236. AVVulkanDeviceContext *hwctx = ctx->hwctx;
  237. int err = 0, found, extensions_found = 0;
  238. const char *mod;
  239. int optional_exts_num;
  240. uint32_t sup_ext_count;
  241. char *user_exts_str = NULL;
  242. AVDictionaryEntry *user_exts;
  243. VkExtensionProperties *sup_ext;
  244. const VulkanOptExtension *optional_exts;
  245. if (!dev) {
  246. mod = "instance";
  247. optional_exts = optional_instance_exts;
  248. optional_exts_num = FF_ARRAY_ELEMS(optional_instance_exts);
  249. user_exts = av_dict_get(opts, "instance_extensions", NULL, 0);
  250. if (user_exts) {
  251. user_exts_str = av_strdup(user_exts->value);
  252. if (!user_exts_str) {
  253. err = AVERROR(ENOMEM);
  254. goto fail;
  255. }
  256. }
  257. vkEnumerateInstanceExtensionProperties(NULL, &sup_ext_count, NULL);
  258. sup_ext = av_malloc_array(sup_ext_count, sizeof(VkExtensionProperties));
  259. if (!sup_ext)
  260. return AVERROR(ENOMEM);
  261. vkEnumerateInstanceExtensionProperties(NULL, &sup_ext_count, sup_ext);
  262. } else {
  263. mod = "device";
  264. optional_exts = optional_device_exts;
  265. optional_exts_num = FF_ARRAY_ELEMS(optional_device_exts);
  266. user_exts = av_dict_get(opts, "device_extensions", NULL, 0);
  267. if (user_exts) {
  268. user_exts_str = av_strdup(user_exts->value);
  269. if (!user_exts_str) {
  270. err = AVERROR(ENOMEM);
  271. goto fail;
  272. }
  273. }
  274. vkEnumerateDeviceExtensionProperties(hwctx->phys_dev, NULL,
  275. &sup_ext_count, NULL);
  276. sup_ext = av_malloc_array(sup_ext_count, sizeof(VkExtensionProperties));
  277. if (!sup_ext)
  278. return AVERROR(ENOMEM);
  279. vkEnumerateDeviceExtensionProperties(hwctx->phys_dev, NULL,
  280. &sup_ext_count, sup_ext);
  281. }
  282. for (int i = 0; i < optional_exts_num; i++) {
  283. tstr = optional_exts[i].name;
  284. found = 0;
  285. for (int j = 0; j < sup_ext_count; j++) {
  286. if (!strcmp(tstr, sup_ext[j].extensionName)) {
  287. found = 1;
  288. break;
  289. }
  290. }
  291. if (!found)
  292. continue;
  293. av_log(ctx, AV_LOG_VERBOSE, "Using %s extension \"%s\"\n", mod, tstr);
  294. p->extensions |= optional_exts[i].flag;
  295. ADD_VAL_TO_LIST(extension_names, extensions_found, tstr);
  296. }
  297. if (debug && !dev) {
  298. tstr = VK_EXT_DEBUG_UTILS_EXTENSION_NAME;
  299. found = 0;
  300. for (int j = 0; j < sup_ext_count; j++) {
  301. if (!strcmp(tstr, sup_ext[j].extensionName)) {
  302. found = 1;
  303. break;
  304. }
  305. }
  306. if (found) {
  307. av_log(ctx, AV_LOG_VERBOSE, "Using %s extension \"%s\"\n", mod, tstr);
  308. ADD_VAL_TO_LIST(extension_names, extensions_found, tstr);
  309. } else {
  310. av_log(ctx, AV_LOG_ERROR, "Debug extension \"%s\" not found!\n",
  311. tstr);
  312. err = AVERROR(EINVAL);
  313. goto fail;
  314. }
  315. }
  316. if (user_exts_str) {
  317. char *save, *token = av_strtok(user_exts_str, "+", &save);
  318. while (token) {
  319. found = 0;
  320. for (int j = 0; j < sup_ext_count; j++) {
  321. if (!strcmp(token, sup_ext[j].extensionName)) {
  322. found = 1;
  323. break;
  324. }
  325. }
  326. if (found) {
  327. av_log(ctx, AV_LOG_VERBOSE, "Using %s extension \"%s\"\n", mod, tstr);
  328. ADD_VAL_TO_LIST(extension_names, extensions_found, token);
  329. } else {
  330. av_log(ctx, AV_LOG_ERROR, "%s extension \"%s\" not found!\n",
  331. mod, token);
  332. err = AVERROR(EINVAL);
  333. goto fail;
  334. }
  335. token = av_strtok(NULL, "+", &save);
  336. }
  337. }
  338. *dst = extension_names;
  339. *num = extensions_found;
  340. av_free(user_exts_str);
  341. av_free(sup_ext);
  342. return 0;
  343. fail:
  344. if (extension_names)
  345. for (int i = 0; i < extensions_found; i++)
  346. av_free((void *)extension_names[i]);
  347. av_free(extension_names);
  348. av_free(user_exts_str);
  349. av_free(sup_ext);
  350. return err;
  351. }
  352. /* Creates a VkInstance */
  353. static int create_instance(AVHWDeviceContext *ctx, AVDictionary *opts)
  354. {
  355. int err = 0;
  356. VkResult ret;
  357. VulkanDevicePriv *p = ctx->internal->priv;
  358. AVVulkanDeviceContext *hwctx = ctx->hwctx;
  359. AVDictionaryEntry *debug_opt = av_dict_get(opts, "debug", NULL, 0);
  360. const int debug_mode = debug_opt && strtol(debug_opt->value, NULL, 10);
  361. VkApplicationInfo application_info = {
  362. .sType = VK_STRUCTURE_TYPE_APPLICATION_INFO,
  363. .pEngineName = "libavutil",
  364. .apiVersion = VK_API_VERSION_1_1,
  365. .engineVersion = VK_MAKE_VERSION(LIBAVUTIL_VERSION_MAJOR,
  366. LIBAVUTIL_VERSION_MINOR,
  367. LIBAVUTIL_VERSION_MICRO),
  368. };
  369. VkInstanceCreateInfo inst_props = {
  370. .sType = VK_STRUCTURE_TYPE_INSTANCE_CREATE_INFO,
  371. .pApplicationInfo = &application_info,
  372. };
  373. /* Check for present/missing extensions */
  374. err = check_extensions(ctx, 0, opts, &inst_props.ppEnabledExtensionNames,
  375. &inst_props.enabledExtensionCount, debug_mode);
  376. if (err < 0)
  377. return err;
  378. if (debug_mode) {
  379. static const char *layers[] = { "VK_LAYER_KHRONOS_validation" };
  380. inst_props.ppEnabledLayerNames = layers;
  381. inst_props.enabledLayerCount = FF_ARRAY_ELEMS(layers);
  382. }
  383. /* Try to create the instance */
  384. ret = vkCreateInstance(&inst_props, hwctx->alloc, &hwctx->inst);
  385. /* Check for errors */
  386. if (ret != VK_SUCCESS) {
  387. av_log(ctx, AV_LOG_ERROR, "Instance creation failure: %s\n",
  388. vk_ret2str(ret));
  389. for (int i = 0; i < inst_props.enabledExtensionCount; i++)
  390. av_free((void *)inst_props.ppEnabledExtensionNames[i]);
  391. av_free((void *)inst_props.ppEnabledExtensionNames);
  392. return AVERROR_EXTERNAL;
  393. }
  394. if (debug_mode) {
  395. VkDebugUtilsMessengerCreateInfoEXT dbg = {
  396. .sType = VK_STRUCTURE_TYPE_DEBUG_UTILS_MESSENGER_CREATE_INFO_EXT,
  397. .messageSeverity = VK_DEBUG_UTILS_MESSAGE_SEVERITY_VERBOSE_BIT_EXT |
  398. VK_DEBUG_UTILS_MESSAGE_SEVERITY_INFO_BIT_EXT |
  399. VK_DEBUG_UTILS_MESSAGE_SEVERITY_WARNING_BIT_EXT |
  400. VK_DEBUG_UTILS_MESSAGE_SEVERITY_ERROR_BIT_EXT,
  401. .messageType = VK_DEBUG_UTILS_MESSAGE_TYPE_GENERAL_BIT_EXT |
  402. VK_DEBUG_UTILS_MESSAGE_TYPE_VALIDATION_BIT_EXT |
  403. VK_DEBUG_UTILS_MESSAGE_TYPE_PERFORMANCE_BIT_EXT,
  404. .pfnUserCallback = vk_dbg_callback,
  405. .pUserData = ctx,
  406. };
  407. VK_LOAD_PFN(hwctx->inst, vkCreateDebugUtilsMessengerEXT);
  408. pfn_vkCreateDebugUtilsMessengerEXT(hwctx->inst, &dbg,
  409. hwctx->alloc, &p->debug_ctx);
  410. }
  411. hwctx->enabled_inst_extensions = inst_props.ppEnabledExtensionNames;
  412. hwctx->nb_enabled_inst_extensions = inst_props.enabledExtensionCount;
  413. return 0;
  414. }
  415. typedef struct VulkanDeviceSelection {
  416. uint8_t uuid[VK_UUID_SIZE]; /* Will use this first unless !has_uuid */
  417. int has_uuid;
  418. const char *name; /* Will use this second unless NULL */
  419. uint32_t pci_device; /* Will use this third unless 0x0 */
  420. uint32_t vendor_id; /* Last resort to find something deterministic */
  421. int index; /* Finally fall back to index */
  422. } VulkanDeviceSelection;
  423. static const char *vk_dev_type(enum VkPhysicalDeviceType type)
  424. {
  425. switch (type) {
  426. case VK_PHYSICAL_DEVICE_TYPE_INTEGRATED_GPU: return "integrated";
  427. case VK_PHYSICAL_DEVICE_TYPE_DISCRETE_GPU: return "discrete";
  428. case VK_PHYSICAL_DEVICE_TYPE_VIRTUAL_GPU: return "virtual";
  429. case VK_PHYSICAL_DEVICE_TYPE_CPU: return "software";
  430. default: return "unknown";
  431. }
  432. }
  433. /* Finds a device */
  434. static int find_device(AVHWDeviceContext *ctx, VulkanDeviceSelection *select)
  435. {
  436. int err = 0, choice = -1;
  437. uint32_t num;
  438. VkResult ret;
  439. VkPhysicalDevice *devices = NULL;
  440. VkPhysicalDeviceIDProperties *idp = NULL;
  441. VkPhysicalDeviceProperties2 *prop = NULL;
  442. VulkanDevicePriv *p = ctx->internal->priv;
  443. AVVulkanDeviceContext *hwctx = ctx->hwctx;
  444. ret = vkEnumeratePhysicalDevices(hwctx->inst, &num, NULL);
  445. if (ret != VK_SUCCESS || !num) {
  446. av_log(ctx, AV_LOG_ERROR, "No devices found: %s!\n", vk_ret2str(ret));
  447. return AVERROR(ENODEV);
  448. }
  449. devices = av_malloc_array(num, sizeof(VkPhysicalDevice));
  450. if (!devices)
  451. return AVERROR(ENOMEM);
  452. ret = vkEnumeratePhysicalDevices(hwctx->inst, &num, devices);
  453. if (ret != VK_SUCCESS) {
  454. av_log(ctx, AV_LOG_ERROR, "Failed enumerating devices: %s\n",
  455. vk_ret2str(ret));
  456. err = AVERROR(ENODEV);
  457. goto end;
  458. }
  459. prop = av_mallocz_array(num, sizeof(*prop));
  460. if (!prop) {
  461. err = AVERROR(ENOMEM);
  462. goto end;
  463. }
  464. idp = av_mallocz_array(num, sizeof(*idp));
  465. if (!idp) {
  466. err = AVERROR(ENOMEM);
  467. goto end;
  468. }
  469. av_log(ctx, AV_LOG_VERBOSE, "GPU listing:\n");
  470. for (int i = 0; i < num; i++) {
  471. idp[i].sType = VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_ID_PROPERTIES;
  472. prop[i].sType = VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_PROPERTIES_2;
  473. prop[i].pNext = &idp[i];
  474. vkGetPhysicalDeviceProperties2(devices[i], &prop[i]);
  475. av_log(ctx, AV_LOG_VERBOSE, " %d: %s (%s) (0x%x)\n", i,
  476. prop[i].properties.deviceName,
  477. vk_dev_type(prop[i].properties.deviceType),
  478. prop[i].properties.deviceID);
  479. }
  480. if (select->has_uuid) {
  481. for (int i = 0; i < num; i++) {
  482. if (!strncmp(idp[i].deviceUUID, select->uuid, VK_UUID_SIZE)) {
  483. choice = i;
  484. goto end;
  485. }
  486. }
  487. av_log(ctx, AV_LOG_ERROR, "Unable to find device by given UUID!\n");
  488. err = AVERROR(ENODEV);
  489. goto end;
  490. } else if (select->name) {
  491. av_log(ctx, AV_LOG_VERBOSE, "Requested device: %s\n", select->name);
  492. for (int i = 0; i < num; i++) {
  493. if (strstr(prop[i].properties.deviceName, select->name)) {
  494. choice = i;
  495. goto end;
  496. }
  497. }
  498. av_log(ctx, AV_LOG_ERROR, "Unable to find device \"%s\"!\n",
  499. select->name);
  500. err = AVERROR(ENODEV);
  501. goto end;
  502. } else if (select->pci_device) {
  503. av_log(ctx, AV_LOG_VERBOSE, "Requested device: 0x%x\n", select->pci_device);
  504. for (int i = 0; i < num; i++) {
  505. if (select->pci_device == prop[i].properties.deviceID) {
  506. choice = i;
  507. goto end;
  508. }
  509. }
  510. av_log(ctx, AV_LOG_ERROR, "Unable to find device with PCI ID 0x%x!\n",
  511. select->pci_device);
  512. err = AVERROR(EINVAL);
  513. goto end;
  514. } else if (select->vendor_id) {
  515. av_log(ctx, AV_LOG_VERBOSE, "Requested vendor: 0x%x\n", select->vendor_id);
  516. for (int i = 0; i < num; i++) {
  517. if (select->vendor_id == prop[i].properties.vendorID) {
  518. choice = i;
  519. goto end;
  520. }
  521. }
  522. av_log(ctx, AV_LOG_ERROR, "Unable to find device with Vendor ID 0x%x!\n",
  523. select->vendor_id);
  524. err = AVERROR(ENODEV);
  525. goto end;
  526. } else {
  527. if (select->index < num) {
  528. choice = select->index;
  529. goto end;
  530. }
  531. av_log(ctx, AV_LOG_ERROR, "Unable to find device with index %i!\n",
  532. select->index);
  533. err = AVERROR(ENODEV);
  534. goto end;
  535. }
  536. end:
  537. if (choice > -1) {
  538. p->dev_is_nvidia = (prop[choice].properties.vendorID == 0x10de);
  539. hwctx->phys_dev = devices[choice];
  540. }
  541. av_free(devices);
  542. av_free(prop);
  543. av_free(idp);
  544. return err;
  545. }
  546. static int search_queue_families(AVHWDeviceContext *ctx, VkDeviceCreateInfo *cd)
  547. {
  548. uint32_t num;
  549. VkQueueFamilyProperties *qs = NULL;
  550. AVVulkanDeviceContext *hwctx = ctx->hwctx;
  551. int graph_index = -1, comp_index = -1, tx_index = -1;
  552. VkDeviceQueueCreateInfo *pc = (VkDeviceQueueCreateInfo *)cd->pQueueCreateInfos;
  553. /* First get the number of queue families */
  554. vkGetPhysicalDeviceQueueFamilyProperties(hwctx->phys_dev, &num, NULL);
  555. if (!num) {
  556. av_log(ctx, AV_LOG_ERROR, "Failed to get queues!\n");
  557. return AVERROR_EXTERNAL;
  558. }
  559. /* Then allocate memory */
  560. qs = av_malloc_array(num, sizeof(VkQueueFamilyProperties));
  561. if (!qs)
  562. return AVERROR(ENOMEM);
  563. /* Finally retrieve the queue families */
  564. vkGetPhysicalDeviceQueueFamilyProperties(hwctx->phys_dev, &num, qs);
  565. #define SEARCH_FLAGS(expr, out) \
  566. for (int i = 0; i < num; i++) { \
  567. const VkQueueFlagBits flags = qs[i].queueFlags; \
  568. if (expr) { \
  569. out = i; \
  570. break; \
  571. } \
  572. }
  573. SEARCH_FLAGS(flags & VK_QUEUE_GRAPHICS_BIT, graph_index)
  574. SEARCH_FLAGS((flags & VK_QUEUE_COMPUTE_BIT) && (i != graph_index),
  575. comp_index)
  576. SEARCH_FLAGS((flags & VK_QUEUE_TRANSFER_BIT) && (i != graph_index) &&
  577. (i != comp_index), tx_index)
  578. #undef SEARCH_FLAGS
  579. #define QF_FLAGS(flags) \
  580. ((flags) & VK_QUEUE_GRAPHICS_BIT ) ? "(graphics) " : "", \
  581. ((flags) & VK_QUEUE_COMPUTE_BIT ) ? "(compute) " : "", \
  582. ((flags) & VK_QUEUE_TRANSFER_BIT ) ? "(transfer) " : "", \
  583. ((flags) & VK_QUEUE_SPARSE_BINDING_BIT) ? "(sparse) " : ""
  584. av_log(ctx, AV_LOG_VERBOSE, "Using queue family %i for graphics, "
  585. "flags: %s%s%s%s\n", graph_index, QF_FLAGS(qs[graph_index].queueFlags));
  586. hwctx->queue_family_index = graph_index;
  587. hwctx->queue_family_tx_index = graph_index;
  588. hwctx->queue_family_comp_index = graph_index;
  589. pc[cd->queueCreateInfoCount++].queueFamilyIndex = graph_index;
  590. if (comp_index != -1) {
  591. av_log(ctx, AV_LOG_VERBOSE, "Using queue family %i for compute, "
  592. "flags: %s%s%s%s\n", comp_index, QF_FLAGS(qs[comp_index].queueFlags));
  593. hwctx->queue_family_tx_index = comp_index;
  594. hwctx->queue_family_comp_index = comp_index;
  595. pc[cd->queueCreateInfoCount++].queueFamilyIndex = comp_index;
  596. }
  597. if (tx_index != -1) {
  598. av_log(ctx, AV_LOG_VERBOSE, "Using queue family %i for transfers, "
  599. "flags: %s%s%s%s\n", tx_index, QF_FLAGS(qs[tx_index].queueFlags));
  600. hwctx->queue_family_tx_index = tx_index;
  601. pc[cd->queueCreateInfoCount++].queueFamilyIndex = tx_index;
  602. }
  603. #undef QF_FLAGS
  604. av_free(qs);
  605. return 0;
  606. }
  607. static int create_exec_ctx(AVHWDeviceContext *ctx, VulkanExecCtx *cmd,
  608. int queue_family_index)
  609. {
  610. VkResult ret;
  611. AVVulkanDeviceContext *hwctx = ctx->hwctx;
  612. VkCommandPoolCreateInfo cqueue_create = {
  613. .sType = VK_STRUCTURE_TYPE_COMMAND_POOL_CREATE_INFO,
  614. .flags = VK_COMMAND_POOL_CREATE_RESET_COMMAND_BUFFER_BIT,
  615. .queueFamilyIndex = queue_family_index,
  616. };
  617. VkCommandBufferAllocateInfo cbuf_create = {
  618. .sType = VK_STRUCTURE_TYPE_COMMAND_BUFFER_ALLOCATE_INFO,
  619. .level = VK_COMMAND_BUFFER_LEVEL_PRIMARY,
  620. .commandBufferCount = 1,
  621. };
  622. VkFenceCreateInfo fence_spawn = {
  623. .sType = VK_STRUCTURE_TYPE_FENCE_CREATE_INFO,
  624. };
  625. ret = vkCreateFence(hwctx->act_dev, &fence_spawn,
  626. hwctx->alloc, &cmd->fence);
  627. if (ret != VK_SUCCESS) {
  628. av_log(ctx, AV_LOG_ERROR, "Failed to create frame fence: %s\n",
  629. vk_ret2str(ret));
  630. return AVERROR_EXTERNAL;
  631. }
  632. ret = vkCreateCommandPool(hwctx->act_dev, &cqueue_create,
  633. hwctx->alloc, &cmd->pool);
  634. if (ret != VK_SUCCESS) {
  635. av_log(ctx, AV_LOG_ERROR, "Command pool creation failure: %s\n",
  636. vk_ret2str(ret));
  637. return AVERROR_EXTERNAL;
  638. }
  639. cbuf_create.commandPool = cmd->pool;
  640. ret = vkAllocateCommandBuffers(hwctx->act_dev, &cbuf_create, &cmd->buf);
  641. if (ret != VK_SUCCESS) {
  642. av_log(ctx, AV_LOG_ERROR, "Command buffer alloc failure: %s\n",
  643. vk_ret2str(ret));
  644. return AVERROR_EXTERNAL;
  645. }
  646. vkGetDeviceQueue(hwctx->act_dev, cqueue_create.queueFamilyIndex, 0,
  647. &cmd->queue);
  648. return 0;
  649. }
  650. static void free_exec_ctx(AVHWDeviceContext *ctx, VulkanExecCtx *cmd)
  651. {
  652. AVVulkanDeviceContext *hwctx = ctx->hwctx;
  653. if (cmd->fence)
  654. vkDestroyFence(hwctx->act_dev, cmd->fence, hwctx->alloc);
  655. if (cmd->buf)
  656. vkFreeCommandBuffers(hwctx->act_dev, cmd->pool, 1, &cmd->buf);
  657. if (cmd->pool)
  658. vkDestroyCommandPool(hwctx->act_dev, cmd->pool, hwctx->alloc);
  659. }
  660. static void vulkan_device_free(AVHWDeviceContext *ctx)
  661. {
  662. VulkanDevicePriv *p = ctx->internal->priv;
  663. AVVulkanDeviceContext *hwctx = ctx->hwctx;
  664. free_exec_ctx(ctx, &p->cmd);
  665. vkDestroyDevice(hwctx->act_dev, hwctx->alloc);
  666. if (p->debug_ctx) {
  667. VK_LOAD_PFN(hwctx->inst, vkDestroyDebugUtilsMessengerEXT);
  668. pfn_vkDestroyDebugUtilsMessengerEXT(hwctx->inst, p->debug_ctx,
  669. hwctx->alloc);
  670. }
  671. vkDestroyInstance(hwctx->inst, hwctx->alloc);
  672. for (int i = 0; i < hwctx->nb_enabled_inst_extensions; i++)
  673. av_free((void *)hwctx->enabled_inst_extensions[i]);
  674. av_free((void *)hwctx->enabled_inst_extensions);
  675. for (int i = 0; i < hwctx->nb_enabled_dev_extensions; i++)
  676. av_free((void *)hwctx->enabled_dev_extensions[i]);
  677. av_free((void *)hwctx->enabled_dev_extensions);
  678. }
  679. static int vulkan_device_create_internal(AVHWDeviceContext *ctx,
  680. VulkanDeviceSelection *dev_select,
  681. AVDictionary *opts, int flags)
  682. {
  683. int err = 0;
  684. VkResult ret;
  685. AVDictionaryEntry *opt_d;
  686. VulkanDevicePriv *p = ctx->internal->priv;
  687. AVVulkanDeviceContext *hwctx = ctx->hwctx;
  688. VkDeviceQueueCreateInfo queue_create_info[3] = {
  689. { .sType = VK_STRUCTURE_TYPE_DEVICE_QUEUE_CREATE_INFO,
  690. .pQueuePriorities = (float []){ 1.0f },
  691. .queueCount = 1, },
  692. { .sType = VK_STRUCTURE_TYPE_DEVICE_QUEUE_CREATE_INFO,
  693. .pQueuePriorities = (float []){ 1.0f },
  694. .queueCount = 1, },
  695. { .sType = VK_STRUCTURE_TYPE_DEVICE_QUEUE_CREATE_INFO,
  696. .pQueuePriorities = (float []){ 1.0f },
  697. .queueCount = 1, },
  698. };
  699. VkDeviceCreateInfo dev_info = {
  700. .sType = VK_STRUCTURE_TYPE_DEVICE_CREATE_INFO,
  701. .pQueueCreateInfos = queue_create_info,
  702. .queueCreateInfoCount = 0,
  703. };
  704. ctx->free = vulkan_device_free;
  705. /* Create an instance if not given one */
  706. if ((err = create_instance(ctx, opts)))
  707. goto end;
  708. /* Find a device (if not given one) */
  709. if ((err = find_device(ctx, dev_select)))
  710. goto end;
  711. vkGetPhysicalDeviceProperties(hwctx->phys_dev, &p->props);
  712. av_log(ctx, AV_LOG_VERBOSE, "Using device: %s\n", p->props.deviceName);
  713. av_log(ctx, AV_LOG_VERBOSE, "Alignments:\n");
  714. av_log(ctx, AV_LOG_VERBOSE, " optimalBufferCopyOffsetAlignment: %li\n",
  715. p->props.limits.optimalBufferCopyOffsetAlignment);
  716. av_log(ctx, AV_LOG_VERBOSE, " optimalBufferCopyRowPitchAlignment: %li\n",
  717. p->props.limits.optimalBufferCopyRowPitchAlignment);
  718. av_log(ctx, AV_LOG_VERBOSE, " minMemoryMapAlignment: %li\n",
  719. p->props.limits.minMemoryMapAlignment);
  720. /* Search queue family */
  721. if ((err = search_queue_families(ctx, &dev_info)))
  722. goto end;
  723. if ((err = check_extensions(ctx, 1, opts, &dev_info.ppEnabledExtensionNames,
  724. &dev_info.enabledExtensionCount, 0)))
  725. goto end;
  726. ret = vkCreateDevice(hwctx->phys_dev, &dev_info, hwctx->alloc,
  727. &hwctx->act_dev);
  728. if (ret != VK_SUCCESS) {
  729. av_log(ctx, AV_LOG_ERROR, "Device creation failure: %s\n",
  730. vk_ret2str(ret));
  731. for (int i = 0; i < dev_info.enabledExtensionCount; i++)
  732. av_free((void *)dev_info.ppEnabledExtensionNames[i]);
  733. av_free((void *)dev_info.ppEnabledExtensionNames);
  734. err = AVERROR_EXTERNAL;
  735. goto end;
  736. }
  737. /* Tiled images setting, use them by default */
  738. opt_d = av_dict_get(opts, "linear_images", NULL, 0);
  739. if (opt_d)
  740. p->use_linear_images = strtol(opt_d->value, NULL, 10);
  741. hwctx->enabled_dev_extensions = dev_info.ppEnabledExtensionNames;
  742. hwctx->nb_enabled_dev_extensions = dev_info.enabledExtensionCount;
  743. end:
  744. return err;
  745. }
  746. static int vulkan_device_init(AVHWDeviceContext *ctx)
  747. {
  748. int err;
  749. uint32_t queue_num;
  750. AVVulkanDeviceContext *hwctx = ctx->hwctx;
  751. VulkanDevicePriv *p = ctx->internal->priv;
  752. /* Set device extension flags */
  753. for (int i = 0; i < hwctx->nb_enabled_dev_extensions; i++) {
  754. for (int j = 0; j < FF_ARRAY_ELEMS(optional_device_exts); j++) {
  755. if (!strcmp(hwctx->enabled_dev_extensions[i],
  756. optional_device_exts[j].name)) {
  757. p->extensions |= optional_device_exts[j].flag;
  758. break;
  759. }
  760. }
  761. }
  762. vkGetPhysicalDeviceQueueFamilyProperties(hwctx->phys_dev, &queue_num, NULL);
  763. if (!queue_num) {
  764. av_log(ctx, AV_LOG_ERROR, "Failed to get queues!\n");
  765. return AVERROR_EXTERNAL;
  766. }
  767. #define CHECK_QUEUE(type, n) \
  768. if (n >= queue_num) { \
  769. av_log(ctx, AV_LOG_ERROR, "Invalid %s queue index %i (device has %i queues)!\n", \
  770. type, n, queue_num); \
  771. return AVERROR(EINVAL); \
  772. }
  773. CHECK_QUEUE("graphics", hwctx->queue_family_index)
  774. CHECK_QUEUE("upload", hwctx->queue_family_tx_index)
  775. CHECK_QUEUE("compute", hwctx->queue_family_comp_index)
  776. #undef CHECK_QUEUE
  777. /* Create exec context - if there's something invalid this will error out */
  778. err = create_exec_ctx(ctx, &p->cmd, hwctx->queue_family_tx_index);
  779. if (err)
  780. return err;
  781. /* Get device capabilities */
  782. vkGetPhysicalDeviceMemoryProperties(hwctx->phys_dev, &p->mprops);
  783. return 0;
  784. }
  785. static int vulkan_device_create(AVHWDeviceContext *ctx, const char *device,
  786. AVDictionary *opts, int flags)
  787. {
  788. VulkanDeviceSelection dev_select = { 0 };
  789. if (device && device[0]) {
  790. char *end = NULL;
  791. dev_select.index = strtol(device, &end, 10);
  792. if (end == device) {
  793. dev_select.index = 0;
  794. dev_select.name = device;
  795. }
  796. }
  797. return vulkan_device_create_internal(ctx, &dev_select, opts, flags);
  798. }
  799. static int vulkan_device_derive(AVHWDeviceContext *ctx,
  800. AVHWDeviceContext *src_ctx, int flags)
  801. {
  802. av_unused VulkanDeviceSelection dev_select = { 0 };
  803. /* If there's only one device on the system, then even if its not covered
  804. * by the following checks (e.g. non-PCIe ARM GPU), having an empty
  805. * dev_select will mean it'll get picked. */
  806. switch(src_ctx->type) {
  807. #if CONFIG_LIBDRM
  808. #if CONFIG_VAAPI
  809. case AV_HWDEVICE_TYPE_VAAPI: {
  810. AVVAAPIDeviceContext *src_hwctx = src_ctx->hwctx;
  811. const char *vendor = vaQueryVendorString(src_hwctx->display);
  812. if (!vendor) {
  813. av_log(ctx, AV_LOG_ERROR, "Unable to get device info from VAAPI!\n");
  814. return AVERROR_EXTERNAL;
  815. }
  816. if (strstr(vendor, "Intel"))
  817. dev_select.vendor_id = 0x8086;
  818. if (strstr(vendor, "AMD"))
  819. dev_select.vendor_id = 0x1002;
  820. return vulkan_device_create_internal(ctx, &dev_select, NULL, flags);
  821. }
  822. #endif
  823. case AV_HWDEVICE_TYPE_DRM: {
  824. AVDRMDeviceContext *src_hwctx = src_ctx->hwctx;
  825. drmDevice *drm_dev_info;
  826. int err = drmGetDevice(src_hwctx->fd, &drm_dev_info);
  827. if (err) {
  828. av_log(ctx, AV_LOG_ERROR, "Unable to get device info from DRM fd!\n");
  829. return AVERROR_EXTERNAL;
  830. }
  831. if (drm_dev_info->bustype == DRM_BUS_PCI)
  832. dev_select.pci_device = drm_dev_info->deviceinfo.pci->device_id;
  833. drmFreeDevice(&drm_dev_info);
  834. return vulkan_device_create_internal(ctx, &dev_select, NULL, flags);
  835. }
  836. #endif
  837. #if CONFIG_CUDA
  838. case AV_HWDEVICE_TYPE_CUDA: {
  839. AVHWDeviceContext *cuda_cu = src_ctx;
  840. AVCUDADeviceContext *src_hwctx = src_ctx->hwctx;
  841. AVCUDADeviceContextInternal *cu_internal = src_hwctx->internal;
  842. CudaFunctions *cu = cu_internal->cuda_dl;
  843. int ret = CHECK_CU(cu->cuDeviceGetUuid((CUuuid *)&dev_select.uuid,
  844. cu_internal->cuda_device));
  845. if (ret < 0) {
  846. av_log(ctx, AV_LOG_ERROR, "Unable to get UUID from CUDA!\n");
  847. return AVERROR_EXTERNAL;
  848. }
  849. dev_select.has_uuid = 1;
  850. return vulkan_device_create_internal(ctx, &dev_select, NULL, flags);
  851. }
  852. #endif
  853. default:
  854. return AVERROR(ENOSYS);
  855. }
  856. }
  857. static int vulkan_frames_get_constraints(AVHWDeviceContext *ctx,
  858. const void *hwconfig,
  859. AVHWFramesConstraints *constraints)
  860. {
  861. int count = 0;
  862. AVVulkanDeviceContext *hwctx = ctx->hwctx;
  863. VulkanDevicePriv *p = ctx->internal->priv;
  864. for (enum AVPixelFormat i = 0; i < AV_PIX_FMT_NB; i++)
  865. count += pixfmt_is_supported(hwctx, i, p->use_linear_images);
  866. #if CONFIG_CUDA
  867. if (p->dev_is_nvidia)
  868. count++;
  869. #endif
  870. constraints->valid_sw_formats = av_malloc_array(count + 1,
  871. sizeof(enum AVPixelFormat));
  872. if (!constraints->valid_sw_formats)
  873. return AVERROR(ENOMEM);
  874. count = 0;
  875. for (enum AVPixelFormat i = 0; i < AV_PIX_FMT_NB; i++)
  876. if (pixfmt_is_supported(hwctx, i, p->use_linear_images))
  877. constraints->valid_sw_formats[count++] = i;
  878. #if CONFIG_CUDA
  879. if (p->dev_is_nvidia)
  880. constraints->valid_sw_formats[count++] = AV_PIX_FMT_CUDA;
  881. #endif
  882. constraints->valid_sw_formats[count++] = AV_PIX_FMT_NONE;
  883. constraints->min_width = 0;
  884. constraints->min_height = 0;
  885. constraints->max_width = p->props.limits.maxImageDimension2D;
  886. constraints->max_height = p->props.limits.maxImageDimension2D;
  887. constraints->valid_hw_formats = av_malloc_array(2, sizeof(enum AVPixelFormat));
  888. if (!constraints->valid_hw_formats)
  889. return AVERROR(ENOMEM);
  890. constraints->valid_hw_formats[0] = AV_PIX_FMT_VULKAN;
  891. constraints->valid_hw_formats[1] = AV_PIX_FMT_NONE;
  892. return 0;
  893. }
  894. static int alloc_mem(AVHWDeviceContext *ctx, VkMemoryRequirements *req,
  895. VkMemoryPropertyFlagBits req_flags, void *alloc_extension,
  896. VkMemoryPropertyFlagBits *mem_flags, VkDeviceMemory *mem)
  897. {
  898. VkResult ret;
  899. int index = -1;
  900. VulkanDevicePriv *p = ctx->internal->priv;
  901. AVVulkanDeviceContext *dev_hwctx = ctx->hwctx;
  902. VkMemoryAllocateInfo alloc_info = {
  903. .sType = VK_STRUCTURE_TYPE_MEMORY_ALLOCATE_INFO,
  904. .pNext = alloc_extension,
  905. };
  906. /* Align if we need to */
  907. if (req_flags & VK_MEMORY_PROPERTY_HOST_VISIBLE_BIT)
  908. req->size = FFALIGN(req->size, p->props.limits.minMemoryMapAlignment);
  909. alloc_info.allocationSize = req->size;
  910. /* The vulkan spec requires memory types to be sorted in the "optimal"
  911. * order, so the first matching type we find will be the best/fastest one */
  912. for (int i = 0; i < p->mprops.memoryTypeCount; i++) {
  913. /* The memory type must be supported by the requirements (bitfield) */
  914. if (!(req->memoryTypeBits & (1 << i)))
  915. continue;
  916. /* The memory type flags must include our properties */
  917. if ((p->mprops.memoryTypes[i].propertyFlags & req_flags) != req_flags)
  918. continue;
  919. /* Found a suitable memory type */
  920. index = i;
  921. break;
  922. }
  923. if (index < 0) {
  924. av_log(ctx, AV_LOG_ERROR, "No memory type found for flags 0x%x\n",
  925. req_flags);
  926. return AVERROR(EINVAL);
  927. }
  928. alloc_info.memoryTypeIndex = index;
  929. ret = vkAllocateMemory(dev_hwctx->act_dev, &alloc_info,
  930. dev_hwctx->alloc, mem);
  931. if (ret != VK_SUCCESS) {
  932. av_log(ctx, AV_LOG_ERROR, "Failed to allocate memory: %s\n",
  933. vk_ret2str(ret));
  934. return AVERROR(ENOMEM);
  935. }
  936. *mem_flags |= p->mprops.memoryTypes[index].propertyFlags;
  937. return 0;
  938. }
  939. static void vulkan_free_internal(AVVkFrameInternal *internal)
  940. {
  941. if (!internal)
  942. return;
  943. #if CONFIG_CUDA
  944. if (internal->cuda_fc_ref) {
  945. AVHWFramesContext *cuda_fc = (AVHWFramesContext *)internal->cuda_fc_ref->data;
  946. int planes = av_pix_fmt_count_planes(cuda_fc->sw_format);
  947. AVHWDeviceContext *cuda_cu = cuda_fc->device_ctx;
  948. AVCUDADeviceContext *cuda_dev = cuda_cu->hwctx;
  949. AVCUDADeviceContextInternal *cu_internal = cuda_dev->internal;
  950. CudaFunctions *cu = cu_internal->cuda_dl;
  951. if (internal->cu_sem)
  952. CHECK_CU(cu->cuDestroyExternalSemaphore(internal->cu_sem));
  953. for (int i = 0; i < planes; i++) {
  954. if (internal->cu_mma[i])
  955. CHECK_CU(cu->cuMipmappedArrayDestroy(internal->cu_mma[i]));
  956. if (internal->ext_mem[i])
  957. CHECK_CU(cu->cuDestroyExternalMemory(internal->ext_mem[i]));
  958. }
  959. av_buffer_unref(&internal->cuda_fc_ref);
  960. }
  961. #endif
  962. av_free(internal);
  963. }
  964. static void vulkan_frame_free(void *opaque, uint8_t *data)
  965. {
  966. AVVkFrame *f = (AVVkFrame *)data;
  967. AVHWFramesContext *hwfc = opaque;
  968. AVVulkanDeviceContext *hwctx = hwfc->device_ctx->hwctx;
  969. int planes = av_pix_fmt_count_planes(hwfc->sw_format);
  970. vulkan_free_internal(f->internal);
  971. for (int i = 0; i < planes; i++) {
  972. vkDestroyImage(hwctx->act_dev, f->img[i], hwctx->alloc);
  973. vkFreeMemory(hwctx->act_dev, f->mem[i], hwctx->alloc);
  974. }
  975. vkDestroySemaphore(hwctx->act_dev, f->sem, hwctx->alloc);
  976. av_free(f);
  977. }
  978. static int alloc_bind_mem(AVHWFramesContext *hwfc, AVVkFrame *f,
  979. void *alloc_pnext, size_t alloc_pnext_stride)
  980. {
  981. int err;
  982. VkResult ret;
  983. AVHWDeviceContext *ctx = hwfc->device_ctx;
  984. const int planes = av_pix_fmt_count_planes(hwfc->sw_format);
  985. VkBindImageMemoryInfo bind_info[AV_NUM_DATA_POINTERS] = { { 0 } };
  986. AVVulkanDeviceContext *hwctx = ctx->hwctx;
  987. for (int i = 0; i < planes; i++) {
  988. int use_ded_mem;
  989. VkImageMemoryRequirementsInfo2 req_desc = {
  990. .sType = VK_STRUCTURE_TYPE_IMAGE_MEMORY_REQUIREMENTS_INFO_2,
  991. .image = f->img[i],
  992. };
  993. VkMemoryDedicatedAllocateInfo ded_alloc = {
  994. .sType = VK_STRUCTURE_TYPE_MEMORY_DEDICATED_ALLOCATE_INFO,
  995. .pNext = (void *)(((uint8_t *)alloc_pnext) + i*alloc_pnext_stride),
  996. };
  997. VkMemoryDedicatedRequirements ded_req = {
  998. .sType = VK_STRUCTURE_TYPE_MEMORY_DEDICATED_REQUIREMENTS,
  999. };
  1000. VkMemoryRequirements2 req = {
  1001. .sType = VK_STRUCTURE_TYPE_MEMORY_REQUIREMENTS_2,
  1002. .pNext = &ded_req,
  1003. };
  1004. vkGetImageMemoryRequirements2(hwctx->act_dev, &req_desc, &req);
  1005. /* In case the implementation prefers/requires dedicated allocation */
  1006. use_ded_mem = ded_req.prefersDedicatedAllocation |
  1007. ded_req.requiresDedicatedAllocation;
  1008. if (use_ded_mem)
  1009. ded_alloc.image = f->img[i];
  1010. /* Allocate memory */
  1011. if ((err = alloc_mem(ctx, &req.memoryRequirements,
  1012. f->tiling == VK_IMAGE_TILING_LINEAR ?
  1013. VK_MEMORY_PROPERTY_HOST_VISIBLE_BIT :
  1014. VK_MEMORY_PROPERTY_DEVICE_LOCAL_BIT,
  1015. use_ded_mem ? &ded_alloc : (void *)ded_alloc.pNext,
  1016. &f->flags, &f->mem[i])))
  1017. return err;
  1018. f->size[i] = req.memoryRequirements.size;
  1019. bind_info[i].sType = VK_STRUCTURE_TYPE_BIND_IMAGE_MEMORY_INFO;
  1020. bind_info[i].image = f->img[i];
  1021. bind_info[i].memory = f->mem[i];
  1022. }
  1023. /* Bind the allocated memory to the images */
  1024. ret = vkBindImageMemory2(hwctx->act_dev, planes, bind_info);
  1025. if (ret != VK_SUCCESS) {
  1026. av_log(ctx, AV_LOG_ERROR, "Failed to bind memory: %s\n",
  1027. vk_ret2str(ret));
  1028. return AVERROR_EXTERNAL;
  1029. }
  1030. return 0;
  1031. }
  1032. enum PrepMode {
  1033. PREP_MODE_WRITE,
  1034. PREP_MODE_RO_SHADER,
  1035. };
  1036. static int prepare_frame(AVHWFramesContext *hwfc, VulkanExecCtx *ectx,
  1037. AVVkFrame *frame, enum PrepMode pmode)
  1038. {
  1039. VkResult ret;
  1040. VkImageLayout new_layout;
  1041. VkAccessFlags new_access;
  1042. AVHWDeviceContext *ctx = hwfc->device_ctx;
  1043. AVVulkanDeviceContext *hwctx = ctx->hwctx;
  1044. const int planes = av_pix_fmt_count_planes(hwfc->sw_format);
  1045. VkImageMemoryBarrier img_bar[AV_NUM_DATA_POINTERS] = { 0 };
  1046. VkCommandBufferBeginInfo cmd_start = {
  1047. .sType = VK_STRUCTURE_TYPE_COMMAND_BUFFER_BEGIN_INFO,
  1048. .flags = VK_COMMAND_BUFFER_USAGE_ONE_TIME_SUBMIT_BIT,
  1049. };
  1050. VkSubmitInfo s_info = {
  1051. .sType = VK_STRUCTURE_TYPE_SUBMIT_INFO,
  1052. .commandBufferCount = 1,
  1053. .pCommandBuffers = &ectx->buf,
  1054. .pSignalSemaphores = &frame->sem,
  1055. .signalSemaphoreCount = 1,
  1056. };
  1057. switch (pmode) {
  1058. case PREP_MODE_WRITE:
  1059. new_layout = VK_IMAGE_LAYOUT_TRANSFER_DST_OPTIMAL;
  1060. new_access = VK_ACCESS_TRANSFER_WRITE_BIT;
  1061. break;
  1062. case PREP_MODE_RO_SHADER:
  1063. new_layout = VK_IMAGE_LAYOUT_TRANSFER_SRC_OPTIMAL;
  1064. new_access = VK_ACCESS_TRANSFER_READ_BIT;
  1065. break;
  1066. }
  1067. ret = vkBeginCommandBuffer(ectx->buf, &cmd_start);
  1068. if (ret != VK_SUCCESS)
  1069. return AVERROR_EXTERNAL;
  1070. /* Change the image layout to something more optimal for writes.
  1071. * This also signals the newly created semaphore, making it usable
  1072. * for synchronization */
  1073. for (int i = 0; i < planes; i++) {
  1074. img_bar[i].sType = VK_STRUCTURE_TYPE_IMAGE_MEMORY_BARRIER;
  1075. img_bar[i].srcAccessMask = 0x0;
  1076. img_bar[i].dstAccessMask = new_access;
  1077. img_bar[i].oldLayout = frame->layout[i];
  1078. img_bar[i].newLayout = new_layout;
  1079. img_bar[i].srcQueueFamilyIndex = VK_QUEUE_FAMILY_IGNORED;
  1080. img_bar[i].dstQueueFamilyIndex = VK_QUEUE_FAMILY_IGNORED;
  1081. img_bar[i].image = frame->img[i];
  1082. img_bar[i].subresourceRange.levelCount = 1;
  1083. img_bar[i].subresourceRange.layerCount = 1;
  1084. img_bar[i].subresourceRange.aspectMask = VK_IMAGE_ASPECT_COLOR_BIT;
  1085. frame->layout[i] = img_bar[i].newLayout;
  1086. frame->access[i] = img_bar[i].dstAccessMask;
  1087. }
  1088. vkCmdPipelineBarrier(ectx->buf, VK_PIPELINE_STAGE_TOP_OF_PIPE_BIT,
  1089. VK_PIPELINE_STAGE_TRANSFER_BIT, 0,
  1090. 0, NULL, 0, NULL, planes, img_bar);
  1091. ret = vkEndCommandBuffer(ectx->buf);
  1092. if (ret != VK_SUCCESS)
  1093. return AVERROR_EXTERNAL;
  1094. ret = vkQueueSubmit(ectx->queue, 1, &s_info, ectx->fence);
  1095. if (ret != VK_SUCCESS) {
  1096. return AVERROR_EXTERNAL;
  1097. } else {
  1098. vkWaitForFences(hwctx->act_dev, 1, &ectx->fence, VK_TRUE, UINT64_MAX);
  1099. vkResetFences(hwctx->act_dev, 1, &ectx->fence);
  1100. }
  1101. return 0;
  1102. }
  1103. static int create_frame(AVHWFramesContext *hwfc, AVVkFrame **frame,
  1104. VkImageTiling tiling, VkImageUsageFlagBits usage,
  1105. void *create_pnext)
  1106. {
  1107. int err;
  1108. VkResult ret;
  1109. AVHWDeviceContext *ctx = hwfc->device_ctx;
  1110. VulkanDevicePriv *p = ctx->internal->priv;
  1111. AVVulkanDeviceContext *hwctx = ctx->hwctx;
  1112. enum AVPixelFormat format = hwfc->sw_format;
  1113. const VkFormat *img_fmts = av_vkfmt_from_pixfmt(format);
  1114. const int planes = av_pix_fmt_count_planes(format);
  1115. VkExportSemaphoreCreateInfo ext_sem_info = {
  1116. .sType = VK_STRUCTURE_TYPE_EXPORT_SEMAPHORE_CREATE_INFO,
  1117. .handleTypes = VK_EXTERNAL_SEMAPHORE_HANDLE_TYPE_OPAQUE_FD_BIT,
  1118. };
  1119. VkSemaphoreCreateInfo sem_spawn = {
  1120. .sType = VK_STRUCTURE_TYPE_SEMAPHORE_CREATE_INFO,
  1121. .pNext = p->extensions & EXT_EXTERNAL_FD_SEM ? &ext_sem_info : NULL,
  1122. };
  1123. AVVkFrame *f = av_vk_frame_alloc();
  1124. if (!f) {
  1125. av_log(ctx, AV_LOG_ERROR, "Unable to allocate memory for AVVkFrame!\n");
  1126. return AVERROR(ENOMEM);
  1127. }
  1128. /* Create the images */
  1129. for (int i = 0; i < planes; i++) {
  1130. const AVPixFmtDescriptor *desc = av_pix_fmt_desc_get(format);
  1131. int w = hwfc->width;
  1132. int h = hwfc->height;
  1133. const int p_w = i > 0 ? AV_CEIL_RSHIFT(w, desc->log2_chroma_w) : w;
  1134. const int p_h = i > 0 ? AV_CEIL_RSHIFT(h, desc->log2_chroma_h) : h;
  1135. VkImageCreateInfo image_create_info = {
  1136. .sType = VK_STRUCTURE_TYPE_IMAGE_CREATE_INFO,
  1137. .pNext = create_pnext,
  1138. .imageType = VK_IMAGE_TYPE_2D,
  1139. .format = img_fmts[i],
  1140. .extent.width = p_w,
  1141. .extent.height = p_h,
  1142. .extent.depth = 1,
  1143. .mipLevels = 1,
  1144. .arrayLayers = 1,
  1145. .flags = VK_IMAGE_CREATE_ALIAS_BIT,
  1146. .tiling = tiling,
  1147. .initialLayout = VK_IMAGE_LAYOUT_UNDEFINED,
  1148. .usage = usage,
  1149. .sharingMode = VK_SHARING_MODE_EXCLUSIVE,
  1150. .samples = VK_SAMPLE_COUNT_1_BIT,
  1151. };
  1152. ret = vkCreateImage(hwctx->act_dev, &image_create_info,
  1153. hwctx->alloc, &f->img[i]);
  1154. if (ret != VK_SUCCESS) {
  1155. av_log(ctx, AV_LOG_ERROR, "Image creation failure: %s\n",
  1156. vk_ret2str(ret));
  1157. err = AVERROR(EINVAL);
  1158. goto fail;
  1159. }
  1160. f->layout[i] = image_create_info.initialLayout;
  1161. f->access[i] = 0x0;
  1162. }
  1163. /* Create semaphore */
  1164. ret = vkCreateSemaphore(hwctx->act_dev, &sem_spawn,
  1165. hwctx->alloc, &f->sem);
  1166. if (ret != VK_SUCCESS) {
  1167. av_log(hwctx, AV_LOG_ERROR, "Failed to create semaphore: %s\n",
  1168. vk_ret2str(ret));
  1169. return AVERROR_EXTERNAL;
  1170. }
  1171. f->flags = 0x0;
  1172. f->tiling = tiling;
  1173. *frame = f;
  1174. return 0;
  1175. fail:
  1176. vulkan_frame_free(hwfc, (uint8_t *)f);
  1177. return err;
  1178. }
  1179. /* Checks if an export flag is enabled, and if it is ORs it with *iexp */
  1180. static void try_export_flags(AVHWFramesContext *hwfc,
  1181. VkExternalMemoryHandleTypeFlags *comp_handle_types,
  1182. VkExternalMemoryHandleTypeFlagBits *iexp,
  1183. VkExternalMemoryHandleTypeFlagBits exp)
  1184. {
  1185. VkResult ret;
  1186. AVVulkanFramesContext *hwctx = hwfc->hwctx;
  1187. AVVulkanDeviceContext *dev_hwctx = hwfc->device_ctx->hwctx;
  1188. VkExternalImageFormatProperties eprops = {
  1189. .sType = VK_STRUCTURE_TYPE_EXTERNAL_IMAGE_FORMAT_PROPERTIES_KHR,
  1190. };
  1191. VkImageFormatProperties2 props = {
  1192. .sType = VK_STRUCTURE_TYPE_IMAGE_FORMAT_PROPERTIES_2,
  1193. .pNext = &eprops,
  1194. };
  1195. VkPhysicalDeviceExternalImageFormatInfo enext = {
  1196. .sType = VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_EXTERNAL_IMAGE_FORMAT_INFO,
  1197. .handleType = exp,
  1198. };
  1199. VkPhysicalDeviceImageFormatInfo2 pinfo = {
  1200. .sType = VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_IMAGE_FORMAT_INFO_2,
  1201. .pNext = !exp ? NULL : &enext,
  1202. .format = av_vkfmt_from_pixfmt(hwfc->sw_format)[0],
  1203. .type = VK_IMAGE_TYPE_2D,
  1204. .tiling = hwctx->tiling,
  1205. .usage = hwctx->usage,
  1206. .flags = VK_IMAGE_CREATE_ALIAS_BIT,
  1207. };
  1208. ret = vkGetPhysicalDeviceImageFormatProperties2(dev_hwctx->phys_dev,
  1209. &pinfo, &props);
  1210. if (ret == VK_SUCCESS) {
  1211. *iexp |= exp;
  1212. *comp_handle_types |= eprops.externalMemoryProperties.compatibleHandleTypes;
  1213. }
  1214. }
  1215. static AVBufferRef *vulkan_pool_alloc(void *opaque, int size)
  1216. {
  1217. int err;
  1218. AVVkFrame *f;
  1219. AVBufferRef *avbuf = NULL;
  1220. AVHWFramesContext *hwfc = opaque;
  1221. AVVulkanFramesContext *hwctx = hwfc->hwctx;
  1222. VulkanDevicePriv *p = hwfc->device_ctx->internal->priv;
  1223. VkExportMemoryAllocateInfo eminfo[AV_NUM_DATA_POINTERS];
  1224. VkExternalMemoryHandleTypeFlags e = 0x0;
  1225. VkExternalMemoryImageCreateInfo eiinfo = {
  1226. .sType = VK_STRUCTURE_TYPE_EXTERNAL_MEMORY_IMAGE_CREATE_INFO,
  1227. .pNext = hwctx->create_pnext,
  1228. };
  1229. if (p->extensions & EXT_EXTERNAL_FD_MEMORY)
  1230. try_export_flags(hwfc, &eiinfo.handleTypes, &e,
  1231. VK_EXTERNAL_MEMORY_HANDLE_TYPE_OPAQUE_FD_BIT);
  1232. if (p->extensions & EXT_EXTERNAL_DMABUF_MEMORY)
  1233. try_export_flags(hwfc, &eiinfo.handleTypes, &e,
  1234. VK_EXTERNAL_MEMORY_HANDLE_TYPE_DMA_BUF_BIT_EXT);
  1235. for (int i = 0; i < av_pix_fmt_count_planes(hwfc->sw_format); i++) {
  1236. eminfo[i].sType = VK_STRUCTURE_TYPE_EXPORT_MEMORY_ALLOCATE_INFO;
  1237. eminfo[i].pNext = hwctx->alloc_pnext[i];
  1238. eminfo[i].handleTypes = e;
  1239. }
  1240. err = create_frame(hwfc, &f, hwctx->tiling, hwctx->usage,
  1241. eiinfo.handleTypes ? &eiinfo : NULL);
  1242. if (err)
  1243. return NULL;
  1244. err = alloc_bind_mem(hwfc, f, eminfo, sizeof(*eminfo));
  1245. if (err)
  1246. goto fail;
  1247. err = prepare_frame(hwfc, &p->cmd, f, PREP_MODE_WRITE);
  1248. if (err)
  1249. goto fail;
  1250. avbuf = av_buffer_create((uint8_t *)f, sizeof(AVVkFrame),
  1251. vulkan_frame_free, hwfc, 0);
  1252. if (!avbuf)
  1253. goto fail;
  1254. return avbuf;
  1255. fail:
  1256. vulkan_frame_free(hwfc, (uint8_t *)f);
  1257. return NULL;
  1258. }
  1259. static void vulkan_frames_uninit(AVHWFramesContext *hwfc)
  1260. {
  1261. VulkanFramesPriv *fp = hwfc->internal->priv;
  1262. free_exec_ctx(hwfc->device_ctx, &fp->cmd);
  1263. }
  1264. static int vulkan_frames_init(AVHWFramesContext *hwfc)
  1265. {
  1266. int err;
  1267. AVVkFrame *f;
  1268. AVVulkanFramesContext *hwctx = hwfc->hwctx;
  1269. VulkanFramesPriv *fp = hwfc->internal->priv;
  1270. AVVulkanDeviceContext *dev_hwctx = hwfc->device_ctx->hwctx;
  1271. VulkanDevicePriv *p = hwfc->device_ctx->internal->priv;
  1272. if (hwfc->pool)
  1273. return 0;
  1274. /* Default pool flags */
  1275. hwctx->tiling = hwctx->tiling ? hwctx->tiling : p->use_linear_images ?
  1276. VK_IMAGE_TILING_LINEAR : VK_IMAGE_TILING_OPTIMAL;
  1277. hwctx->usage |= DEFAULT_USAGE_FLAGS;
  1278. err = create_exec_ctx(hwfc->device_ctx, &fp->cmd,
  1279. dev_hwctx->queue_family_tx_index);
  1280. if (err)
  1281. return err;
  1282. /* Test to see if allocation will fail */
  1283. err = create_frame(hwfc, &f, hwctx->tiling, hwctx->usage,
  1284. hwctx->create_pnext);
  1285. if (err) {
  1286. free_exec_ctx(hwfc->device_ctx, &p->cmd);
  1287. return err;
  1288. }
  1289. vulkan_frame_free(hwfc, (uint8_t *)f);
  1290. hwfc->internal->pool_internal = av_buffer_pool_init2(sizeof(AVVkFrame),
  1291. hwfc, vulkan_pool_alloc,
  1292. NULL);
  1293. if (!hwfc->internal->pool_internal) {
  1294. free_exec_ctx(hwfc->device_ctx, &p->cmd);
  1295. return AVERROR(ENOMEM);
  1296. }
  1297. return 0;
  1298. }
  1299. static int vulkan_get_buffer(AVHWFramesContext *hwfc, AVFrame *frame)
  1300. {
  1301. frame->buf[0] = av_buffer_pool_get(hwfc->pool);
  1302. if (!frame->buf[0])
  1303. return AVERROR(ENOMEM);
  1304. frame->data[0] = frame->buf[0]->data;
  1305. frame->format = AV_PIX_FMT_VULKAN;
  1306. frame->width = hwfc->width;
  1307. frame->height = hwfc->height;
  1308. return 0;
  1309. }
  1310. static int vulkan_transfer_get_formats(AVHWFramesContext *hwfc,
  1311. enum AVHWFrameTransferDirection dir,
  1312. enum AVPixelFormat **formats)
  1313. {
  1314. enum AVPixelFormat *fmts = av_malloc_array(2, sizeof(*fmts));
  1315. if (!fmts)
  1316. return AVERROR(ENOMEM);
  1317. fmts[0] = hwfc->sw_format;
  1318. fmts[1] = AV_PIX_FMT_NONE;
  1319. *formats = fmts;
  1320. return 0;
  1321. }
  1322. typedef struct VulkanMapping {
  1323. AVVkFrame *frame;
  1324. int flags;
  1325. } VulkanMapping;
  1326. static void vulkan_unmap_frame(AVHWFramesContext *hwfc, HWMapDescriptor *hwmap)
  1327. {
  1328. VulkanMapping *map = hwmap->priv;
  1329. AVVulkanDeviceContext *hwctx = hwfc->device_ctx->hwctx;
  1330. const int planes = av_pix_fmt_count_planes(hwfc->sw_format);
  1331. /* Check if buffer needs flushing */
  1332. if ((map->flags & AV_HWFRAME_MAP_WRITE) &&
  1333. !(map->frame->flags & VK_MEMORY_PROPERTY_HOST_COHERENT_BIT)) {
  1334. VkResult ret;
  1335. VkMappedMemoryRange flush_ranges[AV_NUM_DATA_POINTERS] = { { 0 } };
  1336. for (int i = 0; i < planes; i++) {
  1337. flush_ranges[i].sType = VK_STRUCTURE_TYPE_MAPPED_MEMORY_RANGE;
  1338. flush_ranges[i].memory = map->frame->mem[i];
  1339. flush_ranges[i].size = VK_WHOLE_SIZE;
  1340. }
  1341. ret = vkFlushMappedMemoryRanges(hwctx->act_dev, planes,
  1342. flush_ranges);
  1343. if (ret != VK_SUCCESS) {
  1344. av_log(hwfc, AV_LOG_ERROR, "Failed to flush memory: %s\n",
  1345. vk_ret2str(ret));
  1346. }
  1347. }
  1348. for (int i = 0; i < planes; i++)
  1349. vkUnmapMemory(hwctx->act_dev, map->frame->mem[i]);
  1350. av_free(map);
  1351. }
  1352. static int vulkan_map_frame_to_mem(AVHWFramesContext *hwfc, AVFrame *dst,
  1353. const AVFrame *src, int flags)
  1354. {
  1355. VkResult ret;
  1356. int err, mapped_mem_count = 0;
  1357. AVVkFrame *f = (AVVkFrame *)src->data[0];
  1358. AVVulkanDeviceContext *hwctx = hwfc->device_ctx->hwctx;
  1359. const int planes = av_pix_fmt_count_planes(hwfc->sw_format);
  1360. VulkanMapping *map = av_mallocz(sizeof(VulkanMapping));
  1361. if (!map)
  1362. return AVERROR(EINVAL);
  1363. if (src->format != AV_PIX_FMT_VULKAN) {
  1364. av_log(hwfc, AV_LOG_ERROR, "Cannot map from pixel format %s!\n",
  1365. av_get_pix_fmt_name(src->format));
  1366. err = AVERROR(EINVAL);
  1367. goto fail;
  1368. }
  1369. if (!(f->flags & VK_MEMORY_PROPERTY_HOST_VISIBLE_BIT) ||
  1370. !(f->tiling == VK_IMAGE_TILING_LINEAR)) {
  1371. av_log(hwfc, AV_LOG_ERROR, "Unable to map frame, not host visible "
  1372. "and linear!\n");
  1373. err = AVERROR(EINVAL);
  1374. goto fail;
  1375. }
  1376. dst->width = src->width;
  1377. dst->height = src->height;
  1378. for (int i = 0; i < planes; i++) {
  1379. ret = vkMapMemory(hwctx->act_dev, f->mem[i], 0,
  1380. VK_WHOLE_SIZE, 0, (void **)&dst->data[i]);
  1381. if (ret != VK_SUCCESS) {
  1382. av_log(hwfc, AV_LOG_ERROR, "Failed to map image memory: %s\n",
  1383. vk_ret2str(ret));
  1384. err = AVERROR_EXTERNAL;
  1385. goto fail;
  1386. }
  1387. mapped_mem_count++;
  1388. }
  1389. /* Check if the memory contents matter */
  1390. if (((flags & AV_HWFRAME_MAP_READ) || !(flags & AV_HWFRAME_MAP_OVERWRITE)) &&
  1391. !(f->flags & VK_MEMORY_PROPERTY_HOST_COHERENT_BIT)) {
  1392. VkMappedMemoryRange map_mem_ranges[AV_NUM_DATA_POINTERS] = { { 0 } };
  1393. for (int i = 0; i < planes; i++) {
  1394. map_mem_ranges[i].sType = VK_STRUCTURE_TYPE_MAPPED_MEMORY_RANGE;
  1395. map_mem_ranges[i].size = VK_WHOLE_SIZE;
  1396. map_mem_ranges[i].memory = f->mem[i];
  1397. }
  1398. ret = vkInvalidateMappedMemoryRanges(hwctx->act_dev, planes,
  1399. map_mem_ranges);
  1400. if (ret != VK_SUCCESS) {
  1401. av_log(hwfc, AV_LOG_ERROR, "Failed to invalidate memory: %s\n",
  1402. vk_ret2str(ret));
  1403. err = AVERROR_EXTERNAL;
  1404. goto fail;
  1405. }
  1406. }
  1407. for (int i = 0; i < planes; i++) {
  1408. VkImageSubresource sub = {
  1409. .aspectMask = VK_IMAGE_ASPECT_COLOR_BIT,
  1410. };
  1411. VkSubresourceLayout layout;
  1412. vkGetImageSubresourceLayout(hwctx->act_dev, f->img[i], &sub, &layout);
  1413. dst->linesize[i] = layout.rowPitch;
  1414. }
  1415. map->frame = f;
  1416. map->flags = flags;
  1417. err = ff_hwframe_map_create(src->hw_frames_ctx, dst, src,
  1418. &vulkan_unmap_frame, map);
  1419. if (err < 0)
  1420. goto fail;
  1421. return 0;
  1422. fail:
  1423. for (int i = 0; i < mapped_mem_count; i++)
  1424. vkUnmapMemory(hwctx->act_dev, f->mem[i]);
  1425. av_free(map);
  1426. return err;
  1427. }
  1428. #if CONFIG_LIBDRM
  1429. static void vulkan_unmap_from(AVHWFramesContext *hwfc, HWMapDescriptor *hwmap)
  1430. {
  1431. VulkanMapping *map = hwmap->priv;
  1432. AVVulkanDeviceContext *hwctx = hwfc->device_ctx->hwctx;
  1433. const int planes = av_pix_fmt_count_planes(hwfc->sw_format);
  1434. for (int i = 0; i < planes; i++) {
  1435. vkDestroyImage(hwctx->act_dev, map->frame->img[i], hwctx->alloc);
  1436. vkFreeMemory(hwctx->act_dev, map->frame->mem[i], hwctx->alloc);
  1437. }
  1438. vkDestroySemaphore(hwctx->act_dev, map->frame->sem, hwctx->alloc);
  1439. av_freep(&map->frame);
  1440. }
  1441. static const struct {
  1442. uint32_t drm_fourcc;
  1443. VkFormat vk_format;
  1444. } vulkan_drm_format_map[] = {
  1445. { DRM_FORMAT_R8, VK_FORMAT_R8_UNORM },
  1446. { DRM_FORMAT_R16, VK_FORMAT_R16_UNORM },
  1447. { DRM_FORMAT_GR88, VK_FORMAT_R8G8_UNORM },
  1448. { DRM_FORMAT_RG88, VK_FORMAT_R8G8_UNORM },
  1449. { DRM_FORMAT_GR1616, VK_FORMAT_R16G16_UNORM },
  1450. { DRM_FORMAT_RG1616, VK_FORMAT_R16G16_UNORM },
  1451. { DRM_FORMAT_ARGB8888, VK_FORMAT_B8G8R8A8_UNORM },
  1452. { DRM_FORMAT_XRGB8888, VK_FORMAT_B8G8R8A8_UNORM },
  1453. { DRM_FORMAT_ABGR8888, VK_FORMAT_R8G8B8A8_UNORM },
  1454. { DRM_FORMAT_XBGR8888, VK_FORMAT_R8G8B8A8_UNORM },
  1455. };
  1456. static inline VkFormat drm_to_vulkan_fmt(uint32_t drm_fourcc)
  1457. {
  1458. for (int i = 0; i < FF_ARRAY_ELEMS(vulkan_drm_format_map); i++)
  1459. if (vulkan_drm_format_map[i].drm_fourcc == drm_fourcc)
  1460. return vulkan_drm_format_map[i].vk_format;
  1461. return VK_FORMAT_UNDEFINED;
  1462. }
  1463. static int vulkan_map_from_drm_frame_desc(AVHWFramesContext *hwfc, AVVkFrame **frame,
  1464. AVDRMFrameDescriptor *desc)
  1465. {
  1466. int err = 0;
  1467. VkResult ret;
  1468. AVVkFrame *f;
  1469. int bind_counts = 0;
  1470. AVHWDeviceContext *ctx = hwfc->device_ctx;
  1471. AVVulkanDeviceContext *hwctx = ctx->hwctx;
  1472. VulkanDevicePriv *p = ctx->internal->priv;
  1473. const AVPixFmtDescriptor *fmt_desc = av_pix_fmt_desc_get(hwfc->sw_format);
  1474. const int has_modifiers = p->extensions & EXT_DRM_MODIFIER_FLAGS;
  1475. VkSubresourceLayout plane_data[AV_NUM_DATA_POINTERS] = { 0 };
  1476. VkBindImageMemoryInfo bind_info[AV_NUM_DATA_POINTERS] = { 0 };
  1477. VkBindImagePlaneMemoryInfo plane_info[AV_NUM_DATA_POINTERS] = { 0 };
  1478. VkExternalMemoryHandleTypeFlagBits htype = VK_EXTERNAL_MEMORY_HANDLE_TYPE_DMA_BUF_BIT_EXT;
  1479. VkSemaphoreCreateInfo sem_spawn = {
  1480. .sType = VK_STRUCTURE_TYPE_SEMAPHORE_CREATE_INFO,
  1481. };
  1482. VK_LOAD_PFN(hwctx->inst, vkGetMemoryFdPropertiesKHR);
  1483. for (int i = 0; i < desc->nb_layers; i++) {
  1484. if (drm_to_vulkan_fmt(desc->layers[i].format) == VK_FORMAT_UNDEFINED) {
  1485. av_log(ctx, AV_LOG_ERROR, "Unsupported DMABUF layer format %#08x!\n",
  1486. desc->layers[i].format);
  1487. return AVERROR(EINVAL);
  1488. }
  1489. }
  1490. if (!(f = av_vk_frame_alloc())) {
  1491. av_log(ctx, AV_LOG_ERROR, "Unable to allocate memory for AVVkFrame!\n");
  1492. err = AVERROR(ENOMEM);
  1493. goto fail;
  1494. }
  1495. for (int i = 0; i < desc->nb_objects; i++) {
  1496. VkMemoryFdPropertiesKHR fdmp = {
  1497. .sType = VK_STRUCTURE_TYPE_MEMORY_FD_PROPERTIES_KHR,
  1498. };
  1499. VkMemoryRequirements req = {
  1500. .size = desc->objects[i].size,
  1501. };
  1502. VkImportMemoryFdInfoKHR idesc = {
  1503. .sType = VK_STRUCTURE_TYPE_IMPORT_MEMORY_FD_INFO_KHR,
  1504. .handleType = htype,
  1505. .fd = dup(desc->objects[i].fd),
  1506. };
  1507. ret = pfn_vkGetMemoryFdPropertiesKHR(hwctx->act_dev, htype,
  1508. idesc.fd, &fdmp);
  1509. if (ret != VK_SUCCESS) {
  1510. av_log(hwfc, AV_LOG_ERROR, "Failed to get FD properties: %s\n",
  1511. vk_ret2str(ret));
  1512. err = AVERROR_EXTERNAL;
  1513. close(idesc.fd);
  1514. goto fail;
  1515. }
  1516. req.memoryTypeBits = fdmp.memoryTypeBits;
  1517. err = alloc_mem(ctx, &req, VK_MEMORY_PROPERTY_DEVICE_LOCAL_BIT,
  1518. &idesc, &f->flags, &f->mem[i]);
  1519. if (err) {
  1520. close(idesc.fd);
  1521. return err;
  1522. }
  1523. f->size[i] = desc->objects[i].size;
  1524. }
  1525. f->tiling = has_modifiers ? VK_IMAGE_TILING_DRM_FORMAT_MODIFIER_EXT :
  1526. desc->objects[0].format_modifier == DRM_FORMAT_MOD_LINEAR ?
  1527. VK_IMAGE_TILING_LINEAR : VK_IMAGE_TILING_OPTIMAL;
  1528. for (int i = 0; i < desc->nb_layers; i++) {
  1529. const int planes = desc->layers[i].nb_planes;
  1530. const int signal_p = has_modifiers && (planes > 1);
  1531. VkImageDrmFormatModifierExplicitCreateInfoEXT drm_info = {
  1532. .sType = VK_STRUCTURE_TYPE_IMAGE_DRM_FORMAT_MODIFIER_EXPLICIT_CREATE_INFO_EXT,
  1533. .drmFormatModifier = desc->objects[0].format_modifier,
  1534. .drmFormatModifierPlaneCount = planes,
  1535. .pPlaneLayouts = (const VkSubresourceLayout *)&plane_data,
  1536. };
  1537. VkExternalMemoryImageCreateInfo einfo = {
  1538. .sType = VK_STRUCTURE_TYPE_EXTERNAL_MEMORY_IMAGE_CREATE_INFO,
  1539. .pNext = has_modifiers ? &drm_info : NULL,
  1540. .handleTypes = htype,
  1541. };
  1542. const int p_w = i > 0 ? AV_CEIL_RSHIFT(hwfc->width, fmt_desc->log2_chroma_w) : hwfc->width;
  1543. const int p_h = i > 0 ? AV_CEIL_RSHIFT(hwfc->height, fmt_desc->log2_chroma_h) : hwfc->height;
  1544. VkImageCreateInfo image_create_info = {
  1545. .sType = VK_STRUCTURE_TYPE_IMAGE_CREATE_INFO,
  1546. .pNext = &einfo,
  1547. .imageType = VK_IMAGE_TYPE_2D,
  1548. .format = drm_to_vulkan_fmt(desc->layers[i].format),
  1549. .extent.width = p_w,
  1550. .extent.height = p_h,
  1551. .extent.depth = 1,
  1552. .mipLevels = 1,
  1553. .arrayLayers = 1,
  1554. .flags = VK_IMAGE_CREATE_ALIAS_BIT |
  1555. (signal_p ? VK_IMAGE_CREATE_DISJOINT_BIT : 0x0),
  1556. .tiling = f->tiling,
  1557. .initialLayout = VK_IMAGE_LAYOUT_UNDEFINED, /* specs say so */
  1558. .usage = DEFAULT_USAGE_FLAGS,
  1559. .sharingMode = VK_SHARING_MODE_EXCLUSIVE,
  1560. .samples = VK_SAMPLE_COUNT_1_BIT,
  1561. };
  1562. for (int j = 0; j < planes; j++) {
  1563. plane_data[j].offset = desc->layers[i].planes[j].offset;
  1564. plane_data[j].rowPitch = desc->layers[i].planes[j].pitch;
  1565. plane_data[j].size = 0; /* The specs say so for all 3 */
  1566. plane_data[j].arrayPitch = 0;
  1567. plane_data[j].depthPitch = 0;
  1568. }
  1569. /* Create image */
  1570. ret = vkCreateImage(hwctx->act_dev, &image_create_info,
  1571. hwctx->alloc, &f->img[i]);
  1572. if (ret != VK_SUCCESS) {
  1573. av_log(ctx, AV_LOG_ERROR, "Image creation failure: %s\n",
  1574. vk_ret2str(ret));
  1575. err = AVERROR(EINVAL);
  1576. goto fail;
  1577. }
  1578. f->layout[i] = image_create_info.initialLayout;
  1579. f->access[i] = 0x0;
  1580. for (int j = 0; j < planes; j++) {
  1581. VkImageAspectFlagBits aspect = j == 0 ? VK_IMAGE_ASPECT_MEMORY_PLANE_0_BIT_EXT :
  1582. j == 1 ? VK_IMAGE_ASPECT_MEMORY_PLANE_1_BIT_EXT :
  1583. VK_IMAGE_ASPECT_MEMORY_PLANE_2_BIT_EXT;
  1584. plane_info[bind_counts].sType = VK_STRUCTURE_TYPE_BIND_IMAGE_PLANE_MEMORY_INFO;
  1585. plane_info[bind_counts].planeAspect = aspect;
  1586. bind_info[bind_counts].sType = VK_STRUCTURE_TYPE_BIND_IMAGE_MEMORY_INFO;
  1587. bind_info[bind_counts].pNext = signal_p ? &plane_info[bind_counts] : NULL;
  1588. bind_info[bind_counts].image = f->img[i];
  1589. bind_info[bind_counts].memory = f->mem[desc->layers[i].planes[j].object_index];
  1590. bind_info[bind_counts].memoryOffset = desc->layers[i].planes[j].offset;
  1591. bind_counts++;
  1592. }
  1593. }
  1594. ret = vkCreateSemaphore(hwctx->act_dev, &sem_spawn,
  1595. hwctx->alloc, &f->sem);
  1596. if (ret != VK_SUCCESS) {
  1597. av_log(hwctx, AV_LOG_ERROR, "Failed to create semaphore: %s\n",
  1598. vk_ret2str(ret));
  1599. return AVERROR_EXTERNAL;
  1600. }
  1601. /* We'd import a semaphore onto the one we created using
  1602. * vkImportSemaphoreFdKHR but unfortunately neither DRM nor VAAPI
  1603. * offer us anything we could import and sync with, so instead
  1604. * just signal the semaphore we created. */
  1605. /* Bind the allocated memory to the images */
  1606. ret = vkBindImageMemory2(hwctx->act_dev, bind_counts, bind_info);
  1607. if (ret != VK_SUCCESS) {
  1608. av_log(ctx, AV_LOG_ERROR, "Failed to bind memory: %s\n",
  1609. vk_ret2str(ret));
  1610. return AVERROR_EXTERNAL;
  1611. }
  1612. /* NOTE: This is completely uneccesary and unneeded once we can import
  1613. * semaphores from DRM. Otherwise we have to activate the semaphores.
  1614. * We're reusing the exec context that's also used for uploads/downloads. */
  1615. err = prepare_frame(hwfc, &p->cmd, f, PREP_MODE_RO_SHADER);
  1616. if (err)
  1617. goto fail;
  1618. *frame = f;
  1619. return 0;
  1620. fail:
  1621. for (int i = 0; i < desc->nb_layers; i++)
  1622. vkDestroyImage(hwctx->act_dev, f->img[i], hwctx->alloc);
  1623. for (int i = 0; i < desc->nb_objects; i++)
  1624. vkFreeMemory(hwctx->act_dev, f->mem[i], hwctx->alloc);
  1625. vkDestroySemaphore(hwctx->act_dev, f->sem, hwctx->alloc);
  1626. av_free(f);
  1627. return err;
  1628. }
  1629. static int vulkan_map_from_drm(AVHWFramesContext *hwfc, AVFrame *dst,
  1630. const AVFrame *src, int flags)
  1631. {
  1632. int err = 0;
  1633. AVVkFrame *f;
  1634. VulkanMapping *map = NULL;
  1635. err = vulkan_map_from_drm_frame_desc(hwfc, &f,
  1636. (AVDRMFrameDescriptor *)src->data[0]);
  1637. if (err)
  1638. return err;
  1639. /* The unmapping function will free this */
  1640. dst->data[0] = (uint8_t *)f;
  1641. dst->width = src->width;
  1642. dst->height = src->height;
  1643. map = av_mallocz(sizeof(VulkanMapping));
  1644. if (!map)
  1645. goto fail;
  1646. map->frame = f;
  1647. map->flags = flags;
  1648. err = ff_hwframe_map_create(dst->hw_frames_ctx, dst, src,
  1649. &vulkan_unmap_from, map);
  1650. if (err < 0)
  1651. goto fail;
  1652. av_log(hwfc, AV_LOG_DEBUG, "Mapped DRM object to Vulkan!\n");
  1653. return 0;
  1654. fail:
  1655. vulkan_frame_free(hwfc->device_ctx->hwctx, (uint8_t *)f);
  1656. av_free(map);
  1657. return err;
  1658. }
  1659. #if CONFIG_VAAPI
  1660. static int vulkan_map_from_vaapi(AVHWFramesContext *dst_fc,
  1661. AVFrame *dst, const AVFrame *src,
  1662. int flags)
  1663. {
  1664. int err;
  1665. AVFrame *tmp = av_frame_alloc();
  1666. AVHWFramesContext *vaapi_fc = (AVHWFramesContext*)src->hw_frames_ctx->data;
  1667. AVVAAPIDeviceContext *vaapi_ctx = vaapi_fc->device_ctx->hwctx;
  1668. VASurfaceID surface_id = (VASurfaceID)(uintptr_t)src->data[3];
  1669. if (!tmp)
  1670. return AVERROR(ENOMEM);
  1671. /* We have to sync since like the previous comment said, no semaphores */
  1672. vaSyncSurface(vaapi_ctx->display, surface_id);
  1673. tmp->format = AV_PIX_FMT_DRM_PRIME;
  1674. err = av_hwframe_map(tmp, src, flags);
  1675. if (err < 0)
  1676. goto fail;
  1677. err = vulkan_map_from_drm(dst_fc, dst, tmp, flags);
  1678. if (err < 0)
  1679. goto fail;
  1680. err = ff_hwframe_map_replace(dst, src);
  1681. fail:
  1682. av_frame_free(&tmp);
  1683. return err;
  1684. }
  1685. #endif
  1686. #endif
  1687. #if CONFIG_CUDA
  1688. static int vulkan_export_to_cuda(AVHWFramesContext *hwfc,
  1689. AVBufferRef *cuda_hwfc,
  1690. const AVFrame *frame)
  1691. {
  1692. int err;
  1693. VkResult ret;
  1694. AVVkFrame *dst_f;
  1695. AVVkFrameInternal *dst_int;
  1696. AVHWDeviceContext *ctx = hwfc->device_ctx;
  1697. AVVulkanDeviceContext *hwctx = ctx->hwctx;
  1698. const int planes = av_pix_fmt_count_planes(hwfc->sw_format);
  1699. const AVPixFmtDescriptor *desc = av_pix_fmt_desc_get(hwfc->sw_format);
  1700. VK_LOAD_PFN(hwctx->inst, vkGetMemoryFdKHR);
  1701. VK_LOAD_PFN(hwctx->inst, vkGetSemaphoreFdKHR);
  1702. AVHWFramesContext *cuda_fc = (AVHWFramesContext*)cuda_hwfc->data;
  1703. AVHWDeviceContext *cuda_cu = cuda_fc->device_ctx;
  1704. AVCUDADeviceContext *cuda_dev = cuda_cu->hwctx;
  1705. AVCUDADeviceContextInternal *cu_internal = cuda_dev->internal;
  1706. CudaFunctions *cu = cu_internal->cuda_dl;
  1707. CUarray_format cufmt = desc->comp[0].depth > 8 ? CU_AD_FORMAT_UNSIGNED_INT16 :
  1708. CU_AD_FORMAT_UNSIGNED_INT8;
  1709. dst_f = (AVVkFrame *)frame->data[0];
  1710. dst_int = dst_f->internal;
  1711. if (!dst_int || !dst_int->cuda_fc_ref) {
  1712. VkSemaphoreGetFdInfoKHR sem_export = {
  1713. .sType = VK_STRUCTURE_TYPE_SEMAPHORE_GET_FD_INFO_KHR,
  1714. .semaphore = dst_f->sem,
  1715. .handleType = VK_EXTERNAL_SEMAPHORE_HANDLE_TYPE_OPAQUE_FD_BIT,
  1716. };
  1717. CUDA_EXTERNAL_SEMAPHORE_HANDLE_DESC ext_sem_desc = {
  1718. .type = CU_EXTERNAL_SEMAPHORE_HANDLE_TYPE_OPAQUE_FD,
  1719. };
  1720. if (!dst_f->internal)
  1721. dst_f->internal = dst_int = av_mallocz(sizeof(*dst_f->internal));
  1722. if (!dst_int) {
  1723. err = AVERROR(ENOMEM);
  1724. goto fail;
  1725. }
  1726. dst_int->cuda_fc_ref = av_buffer_ref(cuda_hwfc);
  1727. if (!dst_int->cuda_fc_ref) {
  1728. err = AVERROR(ENOMEM);
  1729. goto fail;
  1730. }
  1731. for (int i = 0; i < planes; i++) {
  1732. CUDA_EXTERNAL_MEMORY_MIPMAPPED_ARRAY_DESC tex_desc = {
  1733. .offset = 0,
  1734. .arrayDesc = {
  1735. .Width = i > 0 ? AV_CEIL_RSHIFT(hwfc->width, desc->log2_chroma_w)
  1736. : hwfc->width,
  1737. .Height = i > 0 ? AV_CEIL_RSHIFT(hwfc->height, desc->log2_chroma_h)
  1738. : hwfc->height,
  1739. .Depth = 0,
  1740. .Format = cufmt,
  1741. .NumChannels = 1 + ((planes == 2) && i),
  1742. .Flags = 0,
  1743. },
  1744. .numLevels = 1,
  1745. };
  1746. CUDA_EXTERNAL_MEMORY_HANDLE_DESC ext_desc = {
  1747. .type = CU_EXTERNAL_MEMORY_HANDLE_TYPE_OPAQUE_FD,
  1748. .size = dst_f->size[i],
  1749. };
  1750. VkMemoryGetFdInfoKHR export_info = {
  1751. .sType = VK_STRUCTURE_TYPE_MEMORY_GET_FD_INFO_KHR,
  1752. .memory = dst_f->mem[i],
  1753. .handleType = VK_EXTERNAL_MEMORY_HANDLE_TYPE_OPAQUE_FD_BIT_KHR,
  1754. };
  1755. ret = pfn_vkGetMemoryFdKHR(hwctx->act_dev, &export_info,
  1756. &ext_desc.handle.fd);
  1757. if (ret != VK_SUCCESS) {
  1758. av_log(hwfc, AV_LOG_ERROR, "Unable to export the image as a FD!\n");
  1759. err = AVERROR_EXTERNAL;
  1760. goto fail;
  1761. }
  1762. ret = CHECK_CU(cu->cuImportExternalMemory(&dst_int->ext_mem[i], &ext_desc));
  1763. if (ret < 0) {
  1764. err = AVERROR_EXTERNAL;
  1765. goto fail;
  1766. }
  1767. ret = CHECK_CU(cu->cuExternalMemoryGetMappedMipmappedArray(&dst_int->cu_mma[i],
  1768. dst_int->ext_mem[i],
  1769. &tex_desc));
  1770. if (ret < 0) {
  1771. err = AVERROR_EXTERNAL;
  1772. goto fail;
  1773. }
  1774. ret = CHECK_CU(cu->cuMipmappedArrayGetLevel(&dst_int->cu_array[i],
  1775. dst_int->cu_mma[i], 0));
  1776. if (ret < 0) {
  1777. err = AVERROR_EXTERNAL;
  1778. goto fail;
  1779. }
  1780. }
  1781. ret = pfn_vkGetSemaphoreFdKHR(hwctx->act_dev, &sem_export,
  1782. &ext_sem_desc.handle.fd);
  1783. if (ret != VK_SUCCESS) {
  1784. av_log(ctx, AV_LOG_ERROR, "Failed to export semaphore: %s\n",
  1785. vk_ret2str(ret));
  1786. err = AVERROR_EXTERNAL;
  1787. goto fail;
  1788. }
  1789. ret = CHECK_CU(cu->cuImportExternalSemaphore(&dst_int->cu_sem,
  1790. &ext_sem_desc));
  1791. if (ret < 0) {
  1792. err = AVERROR_EXTERNAL;
  1793. goto fail;
  1794. }
  1795. }
  1796. return 0;
  1797. fail:
  1798. return err;
  1799. }
  1800. static int vulkan_transfer_data_from_cuda(AVHWFramesContext *hwfc,
  1801. AVFrame *dst, const AVFrame *src)
  1802. {
  1803. int err;
  1804. VkResult ret;
  1805. CUcontext dummy;
  1806. AVVkFrame *dst_f;
  1807. AVVkFrameInternal *dst_int;
  1808. const int planes = av_pix_fmt_count_planes(hwfc->sw_format);
  1809. const AVPixFmtDescriptor *desc = av_pix_fmt_desc_get(hwfc->sw_format);
  1810. AVHWFramesContext *cuda_fc = (AVHWFramesContext*)src->hw_frames_ctx->data;
  1811. AVHWDeviceContext *cuda_cu = cuda_fc->device_ctx;
  1812. AVCUDADeviceContext *cuda_dev = cuda_cu->hwctx;
  1813. AVCUDADeviceContextInternal *cu_internal = cuda_dev->internal;
  1814. CudaFunctions *cu = cu_internal->cuda_dl;
  1815. CUDA_EXTERNAL_SEMAPHORE_WAIT_PARAMS s_w_par = { 0 };
  1816. CUDA_EXTERNAL_SEMAPHORE_SIGNAL_PARAMS s_s_par = { 0 };
  1817. ret = CHECK_CU(cu->cuCtxPushCurrent(cuda_dev->cuda_ctx));
  1818. if (ret < 0) {
  1819. err = AVERROR_EXTERNAL;
  1820. goto fail;
  1821. }
  1822. dst_f = (AVVkFrame *)dst->data[0];
  1823. ret = vulkan_export_to_cuda(hwfc, src->hw_frames_ctx, dst);
  1824. if (ret < 0) {
  1825. goto fail;
  1826. }
  1827. dst_int = dst_f->internal;
  1828. ret = CHECK_CU(cu->cuWaitExternalSemaphoresAsync(&dst_int->cu_sem, &s_w_par,
  1829. 1, cuda_dev->stream));
  1830. if (ret < 0) {
  1831. err = AVERROR_EXTERNAL;
  1832. goto fail;
  1833. }
  1834. for (int i = 0; i < planes; i++) {
  1835. CUDA_MEMCPY2D cpy = {
  1836. .srcMemoryType = CU_MEMORYTYPE_DEVICE,
  1837. .srcDevice = (CUdeviceptr)src->data[i],
  1838. .srcPitch = src->linesize[i],
  1839. .srcY = 0,
  1840. .dstMemoryType = CU_MEMORYTYPE_ARRAY,
  1841. .dstArray = dst_int->cu_array[i],
  1842. .WidthInBytes = (i > 0 ? AV_CEIL_RSHIFT(hwfc->width, desc->log2_chroma_w)
  1843. : hwfc->width) * desc->comp[i].step,
  1844. .Height = i > 0 ? AV_CEIL_RSHIFT(hwfc->height, desc->log2_chroma_h)
  1845. : hwfc->height,
  1846. };
  1847. ret = CHECK_CU(cu->cuMemcpy2DAsync(&cpy, cuda_dev->stream));
  1848. if (ret < 0) {
  1849. err = AVERROR_EXTERNAL;
  1850. goto fail;
  1851. }
  1852. }
  1853. ret = CHECK_CU(cu->cuSignalExternalSemaphoresAsync(&dst_int->cu_sem, &s_s_par,
  1854. 1, cuda_dev->stream));
  1855. if (ret < 0) {
  1856. err = AVERROR_EXTERNAL;
  1857. goto fail;
  1858. }
  1859. CHECK_CU(cu->cuCtxPopCurrent(&dummy));
  1860. av_log(hwfc, AV_LOG_VERBOSE, "Transfered CUDA image to Vulkan!\n");
  1861. return 0;
  1862. fail:
  1863. CHECK_CU(cu->cuCtxPopCurrent(&dummy));
  1864. vulkan_free_internal(dst_int);
  1865. dst_f->internal = NULL;
  1866. av_buffer_unref(&dst->buf[0]);
  1867. return err;
  1868. }
  1869. #endif
  1870. static int vulkan_map_to(AVHWFramesContext *hwfc, AVFrame *dst,
  1871. const AVFrame *src, int flags)
  1872. {
  1873. av_unused VulkanDevicePriv *p = hwfc->device_ctx->internal->priv;
  1874. switch (src->format) {
  1875. #if CONFIG_LIBDRM
  1876. #if CONFIG_VAAPI
  1877. case AV_PIX_FMT_VAAPI:
  1878. if (p->extensions & EXT_EXTERNAL_DMABUF_MEMORY)
  1879. return vulkan_map_from_vaapi(hwfc, dst, src, flags);
  1880. #endif
  1881. case AV_PIX_FMT_DRM_PRIME:
  1882. if (p->extensions & EXT_EXTERNAL_DMABUF_MEMORY)
  1883. return vulkan_map_from_drm(hwfc, dst, src, flags);
  1884. #endif
  1885. default:
  1886. return AVERROR(ENOSYS);
  1887. }
  1888. }
  1889. #if CONFIG_LIBDRM
  1890. typedef struct VulkanDRMMapping {
  1891. AVDRMFrameDescriptor drm_desc;
  1892. AVVkFrame *source;
  1893. } VulkanDRMMapping;
  1894. static void vulkan_unmap_to_drm(AVHWFramesContext *hwfc, HWMapDescriptor *hwmap)
  1895. {
  1896. AVDRMFrameDescriptor *drm_desc = hwmap->priv;
  1897. for (int i = 0; i < drm_desc->nb_objects; i++)
  1898. close(drm_desc->objects[i].fd);
  1899. av_free(drm_desc);
  1900. }
  1901. static inline uint32_t vulkan_fmt_to_drm(VkFormat vkfmt)
  1902. {
  1903. for (int i = 0; i < FF_ARRAY_ELEMS(vulkan_drm_format_map); i++)
  1904. if (vulkan_drm_format_map[i].vk_format == vkfmt)
  1905. return vulkan_drm_format_map[i].drm_fourcc;
  1906. return DRM_FORMAT_INVALID;
  1907. }
  1908. static int vulkan_map_to_drm(AVHWFramesContext *hwfc, AVFrame *dst,
  1909. const AVFrame *src, int flags)
  1910. {
  1911. int err = 0;
  1912. VkResult ret;
  1913. AVVkFrame *f = (AVVkFrame *)src->data[0];
  1914. VulkanDevicePriv *p = hwfc->device_ctx->internal->priv;
  1915. AVVulkanDeviceContext *hwctx = hwfc->device_ctx->hwctx;
  1916. const int planes = av_pix_fmt_count_planes(hwfc->sw_format);
  1917. VK_LOAD_PFN(hwctx->inst, vkGetMemoryFdKHR);
  1918. VkImageDrmFormatModifierPropertiesEXT drm_mod = {
  1919. .sType = VK_STRUCTURE_TYPE_IMAGE_DRM_FORMAT_MODIFIER_PROPERTIES_EXT,
  1920. };
  1921. AVDRMFrameDescriptor *drm_desc = av_mallocz(sizeof(*drm_desc));
  1922. if (!drm_desc)
  1923. return AVERROR(ENOMEM);
  1924. err = ff_hwframe_map_create(src->hw_frames_ctx, dst, src, &vulkan_unmap_to_drm, drm_desc);
  1925. if (err < 0)
  1926. goto end;
  1927. if (p->extensions & EXT_DRM_MODIFIER_FLAGS) {
  1928. VK_LOAD_PFN(hwctx->inst, vkGetImageDrmFormatModifierPropertiesEXT);
  1929. ret = pfn_vkGetImageDrmFormatModifierPropertiesEXT(hwctx->act_dev, f->img[0],
  1930. &drm_mod);
  1931. if (ret != VK_SUCCESS) {
  1932. av_log(hwfc, AV_LOG_ERROR, "Failed to retrieve DRM format modifier!\n");
  1933. err = AVERROR_EXTERNAL;
  1934. goto end;
  1935. }
  1936. }
  1937. for (int i = 0; (i < planes) && (f->mem[i]); i++) {
  1938. VkMemoryGetFdInfoKHR export_info = {
  1939. .sType = VK_STRUCTURE_TYPE_MEMORY_GET_FD_INFO_KHR,
  1940. .memory = f->mem[i],
  1941. .handleType = VK_EXTERNAL_MEMORY_HANDLE_TYPE_DMA_BUF_BIT_EXT,
  1942. };
  1943. ret = pfn_vkGetMemoryFdKHR(hwctx->act_dev, &export_info,
  1944. &drm_desc->objects[i].fd);
  1945. if (ret != VK_SUCCESS) {
  1946. av_log(hwfc, AV_LOG_ERROR, "Unable to export the image as a FD!\n");
  1947. err = AVERROR_EXTERNAL;
  1948. goto end;
  1949. }
  1950. drm_desc->nb_objects++;
  1951. drm_desc->objects[i].size = f->size[i];
  1952. drm_desc->objects[i].format_modifier = drm_mod.drmFormatModifier;
  1953. }
  1954. drm_desc->nb_layers = planes;
  1955. for (int i = 0; i < drm_desc->nb_layers; i++) {
  1956. VkSubresourceLayout layout;
  1957. VkImageSubresource sub = {
  1958. .aspectMask = p->extensions & EXT_DRM_MODIFIER_FLAGS ?
  1959. VK_IMAGE_ASPECT_MEMORY_PLANE_0_BIT_EXT :
  1960. VK_IMAGE_ASPECT_COLOR_BIT,
  1961. };
  1962. VkFormat plane_vkfmt = av_vkfmt_from_pixfmt(hwfc->sw_format)[i];
  1963. drm_desc->layers[i].format = vulkan_fmt_to_drm(plane_vkfmt);
  1964. drm_desc->layers[i].nb_planes = 1;
  1965. if (drm_desc->layers[i].format == DRM_FORMAT_INVALID) {
  1966. av_log(hwfc, AV_LOG_ERROR, "Cannot map to DRM layer, unsupported!\n");
  1967. err = AVERROR_PATCHWELCOME;
  1968. goto end;
  1969. }
  1970. drm_desc->layers[i].planes[0].object_index = FFMIN(i, drm_desc->nb_objects - 1);
  1971. if (f->tiling != VK_IMAGE_TILING_OPTIMAL)
  1972. continue;
  1973. vkGetImageSubresourceLayout(hwctx->act_dev, f->img[i], &sub, &layout);
  1974. drm_desc->layers[i].planes[0].offset = layout.offset;
  1975. drm_desc->layers[i].planes[0].pitch = layout.rowPitch;
  1976. }
  1977. dst->width = src->width;
  1978. dst->height = src->height;
  1979. dst->data[0] = (uint8_t *)drm_desc;
  1980. av_log(hwfc, AV_LOG_VERBOSE, "Mapped AVVkFrame to a DRM object!\n");
  1981. return 0;
  1982. end:
  1983. av_free(drm_desc);
  1984. return err;
  1985. }
  1986. #if CONFIG_VAAPI
  1987. static int vulkan_map_to_vaapi(AVHWFramesContext *hwfc, AVFrame *dst,
  1988. const AVFrame *src, int flags)
  1989. {
  1990. int err;
  1991. AVFrame *tmp = av_frame_alloc();
  1992. if (!tmp)
  1993. return AVERROR(ENOMEM);
  1994. tmp->format = AV_PIX_FMT_DRM_PRIME;
  1995. err = vulkan_map_to_drm(hwfc, tmp, src, flags);
  1996. if (err < 0)
  1997. goto fail;
  1998. err = av_hwframe_map(dst, tmp, flags);
  1999. if (err < 0)
  2000. goto fail;
  2001. err = ff_hwframe_map_replace(dst, src);
  2002. fail:
  2003. av_frame_free(&tmp);
  2004. return err;
  2005. }
  2006. #endif
  2007. #endif
  2008. static int vulkan_map_from(AVHWFramesContext *hwfc, AVFrame *dst,
  2009. const AVFrame *src, int flags)
  2010. {
  2011. av_unused VulkanDevicePriv *p = hwfc->device_ctx->internal->priv;
  2012. switch (dst->format) {
  2013. #if CONFIG_LIBDRM
  2014. case AV_PIX_FMT_DRM_PRIME:
  2015. if (p->extensions & EXT_EXTERNAL_DMABUF_MEMORY)
  2016. return vulkan_map_to_drm(hwfc, dst, src, flags);
  2017. #if CONFIG_VAAPI
  2018. case AV_PIX_FMT_VAAPI:
  2019. if (p->extensions & EXT_EXTERNAL_DMABUF_MEMORY)
  2020. return vulkan_map_to_vaapi(hwfc, dst, src, flags);
  2021. #endif
  2022. #endif
  2023. default:
  2024. return vulkan_map_frame_to_mem(hwfc, dst, src, flags);
  2025. }
  2026. }
  2027. typedef struct ImageBuffer {
  2028. VkBuffer buf;
  2029. VkDeviceMemory mem;
  2030. VkMemoryPropertyFlagBits flags;
  2031. } ImageBuffer;
  2032. static void free_buf(AVHWDeviceContext *ctx, ImageBuffer *buf)
  2033. {
  2034. AVVulkanDeviceContext *hwctx = ctx->hwctx;
  2035. if (!buf)
  2036. return;
  2037. vkDestroyBuffer(hwctx->act_dev, buf->buf, hwctx->alloc);
  2038. vkFreeMemory(hwctx->act_dev, buf->mem, hwctx->alloc);
  2039. }
  2040. static int create_buf(AVHWDeviceContext *ctx, ImageBuffer *buf, int height,
  2041. int *stride, VkBufferUsageFlags usage,
  2042. VkMemoryPropertyFlagBits flags, void *create_pnext,
  2043. void *alloc_pnext)
  2044. {
  2045. int err;
  2046. VkResult ret;
  2047. VkMemoryRequirements req;
  2048. AVVulkanDeviceContext *hwctx = ctx->hwctx;
  2049. VulkanDevicePriv *p = ctx->internal->priv;
  2050. VkBufferCreateInfo buf_spawn = {
  2051. .sType = VK_STRUCTURE_TYPE_BUFFER_CREATE_INFO,
  2052. .pNext = create_pnext,
  2053. .usage = usage,
  2054. .sharingMode = VK_SHARING_MODE_EXCLUSIVE,
  2055. };
  2056. *stride = FFALIGN(*stride, p->props.limits.optimalBufferCopyRowPitchAlignment);
  2057. buf_spawn.size = height*(*stride);
  2058. ret = vkCreateBuffer(hwctx->act_dev, &buf_spawn, NULL, &buf->buf);
  2059. if (ret != VK_SUCCESS) {
  2060. av_log(ctx, AV_LOG_ERROR, "Failed to create buffer: %s\n",
  2061. vk_ret2str(ret));
  2062. return AVERROR_EXTERNAL;
  2063. }
  2064. vkGetBufferMemoryRequirements(hwctx->act_dev, buf->buf, &req);
  2065. err = alloc_mem(ctx, &req, flags, alloc_pnext, &buf->flags, &buf->mem);
  2066. if (err)
  2067. return err;
  2068. ret = vkBindBufferMemory(hwctx->act_dev, buf->buf, buf->mem, 0);
  2069. if (ret != VK_SUCCESS) {
  2070. av_log(ctx, AV_LOG_ERROR, "Failed to bind memory to buffer: %s\n",
  2071. vk_ret2str(ret));
  2072. free_buf(ctx, buf);
  2073. return AVERROR_EXTERNAL;
  2074. }
  2075. return 0;
  2076. }
  2077. static int map_buffers(AVHWDeviceContext *ctx, ImageBuffer *buf, uint8_t *mem[],
  2078. int nb_buffers, int invalidate)
  2079. {
  2080. VkResult ret;
  2081. AVVulkanDeviceContext *hwctx = ctx->hwctx;
  2082. VkMappedMemoryRange invalidate_ctx[AV_NUM_DATA_POINTERS];
  2083. int invalidate_count = 0;
  2084. for (int i = 0; i < nb_buffers; i++) {
  2085. ret = vkMapMemory(hwctx->act_dev, buf[i].mem, 0,
  2086. VK_WHOLE_SIZE, 0, (void **)&mem[i]);
  2087. if (ret != VK_SUCCESS) {
  2088. av_log(ctx, AV_LOG_ERROR, "Failed to map buffer memory: %s\n",
  2089. vk_ret2str(ret));
  2090. return AVERROR_EXTERNAL;
  2091. }
  2092. }
  2093. if (!invalidate)
  2094. return 0;
  2095. for (int i = 0; i < nb_buffers; i++) {
  2096. const VkMappedMemoryRange ival_buf = {
  2097. .sType = VK_STRUCTURE_TYPE_MAPPED_MEMORY_RANGE,
  2098. .memory = buf[i].mem,
  2099. .size = VK_WHOLE_SIZE,
  2100. };
  2101. if (buf[i].flags & VK_MEMORY_PROPERTY_HOST_COHERENT_BIT)
  2102. continue;
  2103. invalidate_ctx[invalidate_count++] = ival_buf;
  2104. }
  2105. if (invalidate_count) {
  2106. ret = vkInvalidateMappedMemoryRanges(hwctx->act_dev, invalidate_count,
  2107. invalidate_ctx);
  2108. if (ret != VK_SUCCESS)
  2109. av_log(ctx, AV_LOG_WARNING, "Failed to invalidate memory: %s\n",
  2110. vk_ret2str(ret));
  2111. }
  2112. return 0;
  2113. }
  2114. static int unmap_buffers(AVHWDeviceContext *ctx, ImageBuffer *buf,
  2115. int nb_buffers, int flush)
  2116. {
  2117. int err = 0;
  2118. VkResult ret;
  2119. AVVulkanDeviceContext *hwctx = ctx->hwctx;
  2120. VkMappedMemoryRange flush_ctx[AV_NUM_DATA_POINTERS];
  2121. int flush_count = 0;
  2122. if (flush) {
  2123. for (int i = 0; i < nb_buffers; i++) {
  2124. const VkMappedMemoryRange flush_buf = {
  2125. .sType = VK_STRUCTURE_TYPE_MAPPED_MEMORY_RANGE,
  2126. .memory = buf[i].mem,
  2127. .size = VK_WHOLE_SIZE,
  2128. };
  2129. if (buf[i].flags & VK_MEMORY_PROPERTY_HOST_COHERENT_BIT)
  2130. continue;
  2131. flush_ctx[flush_count++] = flush_buf;
  2132. }
  2133. }
  2134. if (flush_count) {
  2135. ret = vkFlushMappedMemoryRanges(hwctx->act_dev, flush_count, flush_ctx);
  2136. if (ret != VK_SUCCESS) {
  2137. av_log(ctx, AV_LOG_ERROR, "Failed to flush memory: %s\n",
  2138. vk_ret2str(ret));
  2139. err = AVERROR_EXTERNAL; /* We still want to try to unmap them */
  2140. }
  2141. }
  2142. for (int i = 0; i < nb_buffers; i++)
  2143. vkUnmapMemory(hwctx->act_dev, buf[i].mem);
  2144. return err;
  2145. }
  2146. static int transfer_image_buf(AVHWDeviceContext *ctx, AVVkFrame *frame,
  2147. ImageBuffer *buffer, const int *buf_stride, int w,
  2148. int h, enum AVPixelFormat pix_fmt, int to_buf)
  2149. {
  2150. VkResult ret;
  2151. AVVulkanDeviceContext *hwctx = ctx->hwctx;
  2152. VulkanDevicePriv *s = ctx->internal->priv;
  2153. int bar_num = 0;
  2154. VkPipelineStageFlagBits sem_wait_dst[AV_NUM_DATA_POINTERS];
  2155. const int planes = av_pix_fmt_count_planes(pix_fmt);
  2156. const AVPixFmtDescriptor *desc = av_pix_fmt_desc_get(pix_fmt);
  2157. VkCommandBufferBeginInfo cmd_start = {
  2158. .sType = VK_STRUCTURE_TYPE_COMMAND_BUFFER_BEGIN_INFO,
  2159. .flags = VK_COMMAND_BUFFER_USAGE_ONE_TIME_SUBMIT_BIT,
  2160. };
  2161. VkImageMemoryBarrier img_bar[AV_NUM_DATA_POINTERS] = { 0 };
  2162. VkSubmitInfo s_info = {
  2163. .sType = VK_STRUCTURE_TYPE_SUBMIT_INFO,
  2164. .commandBufferCount = 1,
  2165. .pCommandBuffers = &s->cmd.buf,
  2166. .pSignalSemaphores = &frame->sem,
  2167. .pWaitSemaphores = &frame->sem,
  2168. .pWaitDstStageMask = sem_wait_dst,
  2169. .signalSemaphoreCount = 1,
  2170. .waitSemaphoreCount = 1,
  2171. };
  2172. ret = vkBeginCommandBuffer(s->cmd.buf, &cmd_start);
  2173. if (ret != VK_SUCCESS) {
  2174. av_log(ctx, AV_LOG_ERROR, "Unable to init command buffer: %s\n",
  2175. vk_ret2str(ret));
  2176. return AVERROR_EXTERNAL;
  2177. }
  2178. /* Change the image layout to something more optimal for transfers */
  2179. for (int i = 0; i < planes; i++) {
  2180. VkImageLayout new_layout = to_buf ? VK_IMAGE_LAYOUT_TRANSFER_SRC_OPTIMAL :
  2181. VK_IMAGE_LAYOUT_TRANSFER_DST_OPTIMAL;
  2182. VkAccessFlags new_access = to_buf ? VK_ACCESS_TRANSFER_READ_BIT :
  2183. VK_ACCESS_TRANSFER_WRITE_BIT;
  2184. sem_wait_dst[i] = VK_PIPELINE_STAGE_TOP_OF_PIPE_BIT;
  2185. /* If the layout matches and we have read access skip the barrier */
  2186. if ((frame->layout[i] == new_layout) && (frame->access[i] & new_access))
  2187. continue;
  2188. img_bar[bar_num].sType = VK_STRUCTURE_TYPE_IMAGE_MEMORY_BARRIER;
  2189. img_bar[bar_num].srcAccessMask = 0x0;
  2190. img_bar[bar_num].dstAccessMask = new_access;
  2191. img_bar[bar_num].oldLayout = frame->layout[i];
  2192. img_bar[bar_num].newLayout = new_layout;
  2193. img_bar[bar_num].srcQueueFamilyIndex = VK_QUEUE_FAMILY_IGNORED;
  2194. img_bar[bar_num].dstQueueFamilyIndex = VK_QUEUE_FAMILY_IGNORED;
  2195. img_bar[bar_num].image = frame->img[i];
  2196. img_bar[bar_num].subresourceRange.levelCount = 1;
  2197. img_bar[bar_num].subresourceRange.layerCount = 1;
  2198. img_bar[bar_num].subresourceRange.aspectMask = VK_IMAGE_ASPECT_COLOR_BIT;
  2199. frame->layout[i] = img_bar[bar_num].newLayout;
  2200. frame->access[i] = img_bar[bar_num].dstAccessMask;
  2201. bar_num++;
  2202. }
  2203. if (bar_num)
  2204. vkCmdPipelineBarrier(s->cmd.buf, VK_PIPELINE_STAGE_TOP_OF_PIPE_BIT,
  2205. VK_PIPELINE_STAGE_TRANSFER_BIT, 0,
  2206. 0, NULL, 0, NULL, bar_num, img_bar);
  2207. /* Schedule a copy for each plane */
  2208. for (int i = 0; i < planes; i++) {
  2209. const int p_w = i > 0 ? AV_CEIL_RSHIFT(w, desc->log2_chroma_w) : w;
  2210. const int p_h = i > 0 ? AV_CEIL_RSHIFT(h, desc->log2_chroma_h) : h;
  2211. VkBufferImageCopy buf_reg = {
  2212. .bufferOffset = 0,
  2213. /* Buffer stride isn't in bytes, it's in samples, the implementation
  2214. * uses the image's VkFormat to know how many bytes per sample
  2215. * the buffer has. So we have to convert by dividing. Stupid.
  2216. * Won't work with YUVA or other planar formats with alpha. */
  2217. .bufferRowLength = buf_stride[i] / desc->comp[i].step,
  2218. .bufferImageHeight = p_h,
  2219. .imageSubresource.layerCount = 1,
  2220. .imageSubresource.aspectMask = VK_IMAGE_ASPECT_COLOR_BIT,
  2221. .imageOffset = { 0, 0, 0, },
  2222. .imageExtent = { p_w, p_h, 1, },
  2223. };
  2224. if (to_buf)
  2225. vkCmdCopyImageToBuffer(s->cmd.buf, frame->img[i], frame->layout[i],
  2226. buffer[i].buf, 1, &buf_reg);
  2227. else
  2228. vkCmdCopyBufferToImage(s->cmd.buf, buffer[i].buf, frame->img[i],
  2229. frame->layout[i], 1, &buf_reg);
  2230. }
  2231. ret = vkEndCommandBuffer(s->cmd.buf);
  2232. if (ret != VK_SUCCESS) {
  2233. av_log(ctx, AV_LOG_ERROR, "Unable to finish command buffer: %s\n",
  2234. vk_ret2str(ret));
  2235. return AVERROR_EXTERNAL;
  2236. }
  2237. /* Wait for the download/upload to finish if uploading, otherwise the
  2238. * semaphore will take care of synchronization when uploading */
  2239. ret = vkQueueSubmit(s->cmd.queue, 1, &s_info, s->cmd.fence);
  2240. if (ret != VK_SUCCESS) {
  2241. av_log(ctx, AV_LOG_ERROR, "Unable to submit command buffer: %s\n",
  2242. vk_ret2str(ret));
  2243. return AVERROR_EXTERNAL;
  2244. } else {
  2245. vkWaitForFences(hwctx->act_dev, 1, &s->cmd.fence, VK_TRUE, UINT64_MAX);
  2246. vkResetFences(hwctx->act_dev, 1, &s->cmd.fence);
  2247. }
  2248. return 0;
  2249. }
  2250. /* Technically we can use VK_EXT_external_memory_host to upload and download,
  2251. * however the alignment requirements make this unfeasible as both the pointer
  2252. * and the size of each plane need to be aligned to the minimum alignment
  2253. * requirement, which on all current implementations (anv, radv) is 4096.
  2254. * If the requirement gets relaxed (unlikely) this can easily be implemented. */
  2255. static int vulkan_transfer_data_from_mem(AVHWFramesContext *hwfc, AVFrame *dst,
  2256. const AVFrame *src)
  2257. {
  2258. int err = 0;
  2259. AVFrame tmp;
  2260. AVVkFrame *f = (AVVkFrame *)dst->data[0];
  2261. AVHWDeviceContext *dev_ctx = hwfc->device_ctx;
  2262. ImageBuffer buf[AV_NUM_DATA_POINTERS] = { { 0 } };
  2263. const int planes = av_pix_fmt_count_planes(src->format);
  2264. int log2_chroma = av_pix_fmt_desc_get(src->format)->log2_chroma_h;
  2265. if ((src->format != AV_PIX_FMT_NONE && !av_vkfmt_from_pixfmt(src->format))) {
  2266. av_log(hwfc, AV_LOG_ERROR, "Unsupported source pixel format!\n");
  2267. return AVERROR(EINVAL);
  2268. }
  2269. if (src->width > hwfc->width || src->height > hwfc->height)
  2270. return AVERROR(EINVAL);
  2271. /* For linear, host visiable images */
  2272. if (f->tiling == VK_IMAGE_TILING_LINEAR &&
  2273. f->flags & VK_MEMORY_PROPERTY_HOST_VISIBLE_BIT) {
  2274. AVFrame *map = av_frame_alloc();
  2275. if (!map)
  2276. return AVERROR(ENOMEM);
  2277. map->format = src->format;
  2278. err = vulkan_map_frame_to_mem(hwfc, map, dst, AV_HWFRAME_MAP_WRITE);
  2279. if (err)
  2280. goto end;
  2281. err = av_frame_copy(map, src);
  2282. av_frame_free(&map);
  2283. goto end;
  2284. }
  2285. /* Create buffers */
  2286. for (int i = 0; i < planes; i++) {
  2287. int h = src->height;
  2288. int p_height = i > 0 ? AV_CEIL_RSHIFT(h, log2_chroma) : h;
  2289. tmp.linesize[i] = FFABS(src->linesize[i]);
  2290. err = create_buf(dev_ctx, &buf[i], p_height,
  2291. &tmp.linesize[i], VK_BUFFER_USAGE_TRANSFER_SRC_BIT,
  2292. VK_MEMORY_PROPERTY_HOST_VISIBLE_BIT, NULL, NULL);
  2293. if (err)
  2294. goto end;
  2295. }
  2296. /* Map, copy image to buffer, unmap */
  2297. if ((err = map_buffers(dev_ctx, buf, tmp.data, planes, 0)))
  2298. goto end;
  2299. av_image_copy(tmp.data, tmp.linesize, (const uint8_t **)src->data,
  2300. src->linesize, src->format, src->width, src->height);
  2301. if ((err = unmap_buffers(dev_ctx, buf, planes, 1)))
  2302. goto end;
  2303. /* Copy buffers to image */
  2304. err = transfer_image_buf(dev_ctx, f, buf, tmp.linesize,
  2305. src->width, src->height, src->format, 0);
  2306. end:
  2307. for (int i = 0; i < planes; i++)
  2308. free_buf(dev_ctx, &buf[i]);
  2309. return err;
  2310. }
  2311. static int vulkan_transfer_data_to(AVHWFramesContext *hwfc, AVFrame *dst,
  2312. const AVFrame *src)
  2313. {
  2314. av_unused VulkanDevicePriv *p = hwfc->device_ctx->internal->priv;
  2315. switch (src->format) {
  2316. #if CONFIG_CUDA
  2317. case AV_PIX_FMT_CUDA:
  2318. if ((p->extensions & EXT_EXTERNAL_FD_MEMORY) &&
  2319. (p->extensions & EXT_EXTERNAL_FD_SEM))
  2320. return vulkan_transfer_data_from_cuda(hwfc, dst, src);
  2321. #endif
  2322. default:
  2323. if (src->hw_frames_ctx)
  2324. return AVERROR(ENOSYS);
  2325. else
  2326. return vulkan_transfer_data_from_mem(hwfc, dst, src);
  2327. }
  2328. }
  2329. #if CONFIG_CUDA
  2330. static int vulkan_transfer_data_to_cuda(AVHWFramesContext *hwfc, AVFrame *dst,
  2331. const AVFrame *src)
  2332. {
  2333. int err;
  2334. VkResult ret;
  2335. CUcontext dummy;
  2336. AVVkFrame *dst_f;
  2337. AVVkFrameInternal *dst_int;
  2338. const int planes = av_pix_fmt_count_planes(hwfc->sw_format);
  2339. const AVPixFmtDescriptor *desc = av_pix_fmt_desc_get(hwfc->sw_format);
  2340. AVHWFramesContext *cuda_fc = (AVHWFramesContext*)dst->hw_frames_ctx->data;
  2341. AVHWDeviceContext *cuda_cu = cuda_fc->device_ctx;
  2342. AVCUDADeviceContext *cuda_dev = cuda_cu->hwctx;
  2343. AVCUDADeviceContextInternal *cu_internal = cuda_dev->internal;
  2344. CudaFunctions *cu = cu_internal->cuda_dl;
  2345. ret = CHECK_CU(cu->cuCtxPushCurrent(cuda_dev->cuda_ctx));
  2346. if (ret < 0) {
  2347. err = AVERROR_EXTERNAL;
  2348. goto fail;
  2349. }
  2350. dst_f = (AVVkFrame *)src->data[0];
  2351. err = vulkan_export_to_cuda(hwfc, dst->hw_frames_ctx, src);
  2352. if (err < 0) {
  2353. goto fail;
  2354. }
  2355. dst_int = dst_f->internal;
  2356. for (int i = 0; i < planes; i++) {
  2357. CUDA_MEMCPY2D cpy = {
  2358. .dstMemoryType = CU_MEMORYTYPE_DEVICE,
  2359. .dstDevice = (CUdeviceptr)dst->data[i],
  2360. .dstPitch = dst->linesize[i],
  2361. .dstY = 0,
  2362. .srcMemoryType = CU_MEMORYTYPE_ARRAY,
  2363. .srcArray = dst_int->cu_array[i],
  2364. .WidthInBytes = (i > 0 ? AV_CEIL_RSHIFT(hwfc->width, desc->log2_chroma_w)
  2365. : hwfc->width) * desc->comp[i].step,
  2366. .Height = i > 0 ? AV_CEIL_RSHIFT(hwfc->height, desc->log2_chroma_h)
  2367. : hwfc->height,
  2368. };
  2369. ret = CHECK_CU(cu->cuMemcpy2DAsync(&cpy, cuda_dev->stream));
  2370. if (ret < 0) {
  2371. err = AVERROR_EXTERNAL;
  2372. goto fail;
  2373. }
  2374. }
  2375. CHECK_CU(cu->cuCtxPopCurrent(&dummy));
  2376. av_log(hwfc, AV_LOG_VERBOSE, "Transfered Vulkan image to CUDA!\n");
  2377. return 0;
  2378. fail:
  2379. CHECK_CU(cu->cuCtxPopCurrent(&dummy));
  2380. vulkan_free_internal(dst_int);
  2381. dst_f->internal = NULL;
  2382. av_buffer_unref(&dst->buf[0]);
  2383. return err;
  2384. }
  2385. #endif
  2386. static int vulkan_transfer_data_to_mem(AVHWFramesContext *hwfc, AVFrame *dst,
  2387. const AVFrame *src)
  2388. {
  2389. int err = 0;
  2390. AVFrame tmp;
  2391. AVVkFrame *f = (AVVkFrame *)src->data[0];
  2392. AVHWDeviceContext *dev_ctx = hwfc->device_ctx;
  2393. ImageBuffer buf[AV_NUM_DATA_POINTERS] = { { 0 } };
  2394. const int planes = av_pix_fmt_count_planes(dst->format);
  2395. int log2_chroma = av_pix_fmt_desc_get(dst->format)->log2_chroma_h;
  2396. if (dst->width > hwfc->width || dst->height > hwfc->height)
  2397. return AVERROR(EINVAL);
  2398. /* For linear, host visiable images */
  2399. if (f->tiling == VK_IMAGE_TILING_LINEAR &&
  2400. f->flags & VK_MEMORY_PROPERTY_HOST_VISIBLE_BIT) {
  2401. AVFrame *map = av_frame_alloc();
  2402. if (!map)
  2403. return AVERROR(ENOMEM);
  2404. map->format = dst->format;
  2405. err = vulkan_map_frame_to_mem(hwfc, map, src, AV_HWFRAME_MAP_READ);
  2406. if (err)
  2407. return err;
  2408. err = av_frame_copy(dst, map);
  2409. av_frame_free(&map);
  2410. return err;
  2411. }
  2412. /* Create buffers */
  2413. for (int i = 0; i < planes; i++) {
  2414. int h = dst->height;
  2415. int p_height = i > 0 ? AV_CEIL_RSHIFT(h, log2_chroma) : h;
  2416. tmp.linesize[i] = FFABS(dst->linesize[i]);
  2417. err = create_buf(dev_ctx, &buf[i], p_height,
  2418. &tmp.linesize[i], VK_BUFFER_USAGE_TRANSFER_DST_BIT,
  2419. VK_MEMORY_PROPERTY_HOST_VISIBLE_BIT, NULL, NULL);
  2420. }
  2421. /* Copy image to buffer */
  2422. if ((err = transfer_image_buf(dev_ctx, f, buf, tmp.linesize,
  2423. dst->width, dst->height, dst->format, 1)))
  2424. goto end;
  2425. /* Map, copy buffer to frame, unmap */
  2426. if ((err = map_buffers(dev_ctx, buf, tmp.data, planes, 1)))
  2427. goto end;
  2428. av_image_copy(dst->data, dst->linesize, (const uint8_t **)tmp.data,
  2429. tmp.linesize, dst->format, dst->width, dst->height);
  2430. err = unmap_buffers(dev_ctx, buf, planes, 0);
  2431. end:
  2432. for (int i = 0; i < planes; i++)
  2433. free_buf(dev_ctx, &buf[i]);
  2434. return err;
  2435. }
  2436. static int vulkan_transfer_data_from(AVHWFramesContext *hwfc, AVFrame *dst,
  2437. const AVFrame *src)
  2438. {
  2439. av_unused VulkanDevicePriv *p = hwfc->device_ctx->internal->priv;
  2440. switch (dst->format) {
  2441. #if CONFIG_CUDA
  2442. case AV_PIX_FMT_CUDA:
  2443. if ((p->extensions & EXT_EXTERNAL_FD_MEMORY) &&
  2444. (p->extensions & EXT_EXTERNAL_FD_SEM))
  2445. return vulkan_transfer_data_to_cuda(hwfc, dst, src);
  2446. #endif
  2447. default:
  2448. if (dst->hw_frames_ctx)
  2449. return AVERROR(ENOSYS);
  2450. else
  2451. return vulkan_transfer_data_to_mem(hwfc, dst, src);
  2452. }
  2453. }
  2454. AVVkFrame *av_vk_frame_alloc(void)
  2455. {
  2456. return av_mallocz(sizeof(AVVkFrame));
  2457. }
  2458. const HWContextType ff_hwcontext_type_vulkan = {
  2459. .type = AV_HWDEVICE_TYPE_VULKAN,
  2460. .name = "Vulkan",
  2461. .device_hwctx_size = sizeof(AVVulkanDeviceContext),
  2462. .device_priv_size = sizeof(VulkanDevicePriv),
  2463. .frames_hwctx_size = sizeof(AVVulkanFramesContext),
  2464. .frames_priv_size = sizeof(VulkanFramesPriv),
  2465. .device_init = &vulkan_device_init,
  2466. .device_create = &vulkan_device_create,
  2467. .device_derive = &vulkan_device_derive,
  2468. .frames_get_constraints = &vulkan_frames_get_constraints,
  2469. .frames_init = vulkan_frames_init,
  2470. .frames_get_buffer = vulkan_get_buffer,
  2471. .frames_uninit = vulkan_frames_uninit,
  2472. .transfer_get_formats = vulkan_transfer_get_formats,
  2473. .transfer_data_to = vulkan_transfer_data_to,
  2474. .transfer_data_from = vulkan_transfer_data_from,
  2475. .map_to = vulkan_map_to,
  2476. .map_from = vulkan_map_from,
  2477. .pix_fmts = (const enum AVPixelFormat []) {
  2478. AV_PIX_FMT_VULKAN,
  2479. AV_PIX_FMT_NONE
  2480. },
  2481. };