You can not select more than 25 topics Topics must start with a letter or number, can include dashes ('-') and can be up to 35 characters long.

3329 lines
115KB

  1. /*
  2. * This file is part of FFmpeg.
  3. *
  4. * FFmpeg is free software; you can redistribute it and/or
  5. * modify it under the terms of the GNU Lesser General Public
  6. * License as published by the Free Software Foundation; either
  7. * version 2.1 of the License, or (at your option) any later version.
  8. *
  9. * FFmpeg is distributed in the hope that it will be useful,
  10. * but WITHOUT ANY WARRANTY; without even the implied warranty of
  11. * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
  12. * Lesser General Public License for more details.
  13. *
  14. * You should have received a copy of the GNU Lesser General Public
  15. * License along with FFmpeg; if not, write to the Free Software
  16. * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
  17. */
  18. #include "config.h"
  19. #include "pixdesc.h"
  20. #include "avstring.h"
  21. #include "imgutils.h"
  22. #include "hwcontext.h"
  23. #include "hwcontext_internal.h"
  24. #include "hwcontext_vulkan.h"
  25. #if CONFIG_LIBDRM
  26. #include <unistd.h>
  27. #include <xf86drm.h>
  28. #include <drm_fourcc.h>
  29. #include "hwcontext_drm.h"
  30. #if CONFIG_VAAPI
  31. #include <va/va_drmcommon.h>
  32. #include "hwcontext_vaapi.h"
  33. #endif
  34. #endif
  35. #if CONFIG_CUDA
  36. #include "hwcontext_cuda_internal.h"
  37. #include "cuda_check.h"
  38. #define CHECK_CU(x) FF_CUDA_CHECK_DL(cuda_cu, cu, x)
  39. #endif
  40. typedef struct VulkanQueueCtx {
  41. VkFence fence;
  42. VkQueue queue;
  43. int was_synchronous;
  44. /* Buffer dependencies */
  45. AVBufferRef **buf_deps;
  46. int nb_buf_deps;
  47. int buf_deps_alloc_size;
  48. } VulkanQueueCtx;
  49. typedef struct VulkanExecCtx {
  50. VkCommandPool pool;
  51. VkCommandBuffer *bufs;
  52. VulkanQueueCtx *queues;
  53. int nb_queues;
  54. int cur_queue_idx;
  55. } VulkanExecCtx;
  56. typedef struct VulkanDevicePriv {
  57. /* Properties */
  58. VkPhysicalDeviceProperties2 props;
  59. VkPhysicalDeviceMemoryProperties mprops;
  60. VkPhysicalDeviceExternalMemoryHostPropertiesEXT hprops;
  61. /* Queues */
  62. uint32_t qfs[3];
  63. int num_qfs;
  64. /* Debug callback */
  65. VkDebugUtilsMessengerEXT debug_ctx;
  66. /* Extensions */
  67. uint64_t extensions;
  68. /* Settings */
  69. int use_linear_images;
  70. /* Nvidia */
  71. int dev_is_nvidia;
  72. } VulkanDevicePriv;
  73. typedef struct VulkanFramesPriv {
  74. /* Image conversions */
  75. VulkanExecCtx conv_ctx;
  76. /* Image transfers */
  77. VulkanExecCtx upload_ctx;
  78. VulkanExecCtx download_ctx;
  79. } VulkanFramesPriv;
  80. typedef struct AVVkFrameInternal {
  81. #if CONFIG_CUDA
  82. /* Importing external memory into cuda is really expensive so we keep the
  83. * memory imported all the time */
  84. AVBufferRef *cuda_fc_ref; /* Need to keep it around for uninit */
  85. CUexternalMemory ext_mem[AV_NUM_DATA_POINTERS];
  86. CUmipmappedArray cu_mma[AV_NUM_DATA_POINTERS];
  87. CUarray cu_array[AV_NUM_DATA_POINTERS];
  88. CUexternalSemaphore cu_sem[AV_NUM_DATA_POINTERS];
  89. #endif
  90. } AVVkFrameInternal;
  91. #define GET_QUEUE_COUNT(hwctx, graph, comp, tx) ( \
  92. graph ? hwctx->nb_graphics_queues : \
  93. comp ? (hwctx->nb_comp_queues ? \
  94. hwctx->nb_comp_queues : hwctx->nb_graphics_queues) : \
  95. tx ? (hwctx->nb_tx_queues ? hwctx->nb_tx_queues : \
  96. (hwctx->nb_comp_queues ? \
  97. hwctx->nb_comp_queues : hwctx->nb_graphics_queues)) : \
  98. 0 \
  99. )
  100. #define VK_LOAD_PFN(inst, name) PFN_##name pfn_##name = (PFN_##name) \
  101. vkGetInstanceProcAddr(inst, #name)
  102. #define DEFAULT_USAGE_FLAGS (VK_IMAGE_USAGE_SAMPLED_BIT | \
  103. VK_IMAGE_USAGE_STORAGE_BIT | \
  104. VK_IMAGE_USAGE_TRANSFER_SRC_BIT | \
  105. VK_IMAGE_USAGE_TRANSFER_DST_BIT)
  106. #define ADD_VAL_TO_LIST(list, count, val) \
  107. do { \
  108. list = av_realloc_array(list, sizeof(*list), ++count); \
  109. if (!list) { \
  110. err = AVERROR(ENOMEM); \
  111. goto fail; \
  112. } \
  113. list[count - 1] = av_strdup(val); \
  114. if (!list[count - 1]) { \
  115. err = AVERROR(ENOMEM); \
  116. goto fail; \
  117. } \
  118. } while(0)
  119. static const struct {
  120. enum AVPixelFormat pixfmt;
  121. const VkFormat vkfmts[3];
  122. } vk_pixfmt_map[] = {
  123. { AV_PIX_FMT_GRAY8, { VK_FORMAT_R8_UNORM } },
  124. { AV_PIX_FMT_GRAY16, { VK_FORMAT_R16_UNORM } },
  125. { AV_PIX_FMT_GRAYF32, { VK_FORMAT_R32_SFLOAT } },
  126. { AV_PIX_FMT_NV12, { VK_FORMAT_R8_UNORM, VK_FORMAT_R8G8_UNORM } },
  127. { AV_PIX_FMT_P010, { VK_FORMAT_R16_UNORM, VK_FORMAT_R16G16_UNORM } },
  128. { AV_PIX_FMT_P016, { VK_FORMAT_R16_UNORM, VK_FORMAT_R16G16_UNORM } },
  129. { AV_PIX_FMT_YUV420P, { VK_FORMAT_R8_UNORM, VK_FORMAT_R8_UNORM, VK_FORMAT_R8_UNORM } },
  130. { AV_PIX_FMT_YUV422P, { VK_FORMAT_R8_UNORM, VK_FORMAT_R8_UNORM, VK_FORMAT_R8_UNORM } },
  131. { AV_PIX_FMT_YUV444P, { VK_FORMAT_R8_UNORM, VK_FORMAT_R8_UNORM, VK_FORMAT_R8_UNORM } },
  132. { AV_PIX_FMT_YUV420P16, { VK_FORMAT_R16_UNORM, VK_FORMAT_R16_UNORM, VK_FORMAT_R16_UNORM } },
  133. { AV_PIX_FMT_YUV422P16, { VK_FORMAT_R16_UNORM, VK_FORMAT_R16_UNORM, VK_FORMAT_R16_UNORM } },
  134. { AV_PIX_FMT_YUV444P16, { VK_FORMAT_R16_UNORM, VK_FORMAT_R16_UNORM, VK_FORMAT_R16_UNORM } },
  135. { AV_PIX_FMT_ABGR, { VK_FORMAT_A8B8G8R8_UNORM_PACK32 } },
  136. { AV_PIX_FMT_BGRA, { VK_FORMAT_B8G8R8A8_UNORM } },
  137. { AV_PIX_FMT_RGBA, { VK_FORMAT_R8G8B8A8_UNORM } },
  138. { AV_PIX_FMT_RGB24, { VK_FORMAT_R8G8B8_UNORM } },
  139. { AV_PIX_FMT_BGR24, { VK_FORMAT_B8G8R8_UNORM } },
  140. { AV_PIX_FMT_RGB48, { VK_FORMAT_R16G16B16_UNORM } },
  141. { AV_PIX_FMT_RGBA64, { VK_FORMAT_R16G16B16A16_UNORM } },
  142. { AV_PIX_FMT_RGB565, { VK_FORMAT_R5G6B5_UNORM_PACK16 } },
  143. { AV_PIX_FMT_BGR565, { VK_FORMAT_B5G6R5_UNORM_PACK16 } },
  144. { AV_PIX_FMT_BGR0, { VK_FORMAT_B8G8R8A8_UNORM } },
  145. { AV_PIX_FMT_0BGR, { VK_FORMAT_A8B8G8R8_UNORM_PACK32 } },
  146. { AV_PIX_FMT_RGB0, { VK_FORMAT_R8G8B8A8_UNORM } },
  147. { AV_PIX_FMT_GBRPF32, { VK_FORMAT_R32_SFLOAT, VK_FORMAT_R32_SFLOAT, VK_FORMAT_R32_SFLOAT } },
  148. };
  149. const VkFormat *av_vkfmt_from_pixfmt(enum AVPixelFormat p)
  150. {
  151. for (enum AVPixelFormat i = 0; i < FF_ARRAY_ELEMS(vk_pixfmt_map); i++)
  152. if (vk_pixfmt_map[i].pixfmt == p)
  153. return vk_pixfmt_map[i].vkfmts;
  154. return NULL;
  155. }
  156. static int pixfmt_is_supported(AVVulkanDeviceContext *hwctx, enum AVPixelFormat p,
  157. int linear)
  158. {
  159. const VkFormat *fmt = av_vkfmt_from_pixfmt(p);
  160. int planes = av_pix_fmt_count_planes(p);
  161. if (!fmt)
  162. return 0;
  163. for (int i = 0; i < planes; i++) {
  164. VkFormatFeatureFlags flags;
  165. VkFormatProperties2 prop = {
  166. .sType = VK_STRUCTURE_TYPE_FORMAT_PROPERTIES_2,
  167. };
  168. vkGetPhysicalDeviceFormatProperties2(hwctx->phys_dev, fmt[i], &prop);
  169. flags = linear ? prop.formatProperties.linearTilingFeatures :
  170. prop.formatProperties.optimalTilingFeatures;
  171. if (!(flags & DEFAULT_USAGE_FLAGS))
  172. return 0;
  173. }
  174. return 1;
  175. }
  176. enum VulkanExtensions {
  177. EXT_EXTERNAL_DMABUF_MEMORY = 1ULL << 0, /* VK_EXT_external_memory_dma_buf */
  178. EXT_DRM_MODIFIER_FLAGS = 1ULL << 1, /* VK_EXT_image_drm_format_modifier */
  179. EXT_EXTERNAL_FD_MEMORY = 1ULL << 2, /* VK_KHR_external_memory_fd */
  180. EXT_EXTERNAL_FD_SEM = 1ULL << 3, /* VK_KHR_external_semaphore_fd */
  181. EXT_EXTERNAL_HOST_MEMORY = 1ULL << 4, /* VK_EXT_external_memory_host */
  182. EXT_NO_FLAG = 1ULL << 63,
  183. };
  184. typedef struct VulkanOptExtension {
  185. const char *name;
  186. uint64_t flag;
  187. } VulkanOptExtension;
  188. static const VulkanOptExtension optional_instance_exts[] = {
  189. /* For future use */
  190. };
  191. static const VulkanOptExtension optional_device_exts[] = {
  192. { VK_KHR_EXTERNAL_MEMORY_FD_EXTENSION_NAME, EXT_EXTERNAL_FD_MEMORY, },
  193. { VK_EXT_EXTERNAL_MEMORY_DMA_BUF_EXTENSION_NAME, EXT_EXTERNAL_DMABUF_MEMORY, },
  194. { VK_EXT_IMAGE_DRM_FORMAT_MODIFIER_EXTENSION_NAME, EXT_DRM_MODIFIER_FLAGS, },
  195. { VK_KHR_EXTERNAL_SEMAPHORE_FD_EXTENSION_NAME, EXT_EXTERNAL_FD_SEM, },
  196. { VK_EXT_EXTERNAL_MEMORY_HOST_EXTENSION_NAME, EXT_EXTERNAL_HOST_MEMORY, },
  197. };
  198. /* Converts return values to strings */
  199. static const char *vk_ret2str(VkResult res)
  200. {
  201. #define CASE(VAL) case VAL: return #VAL
  202. switch (res) {
  203. CASE(VK_SUCCESS);
  204. CASE(VK_NOT_READY);
  205. CASE(VK_TIMEOUT);
  206. CASE(VK_EVENT_SET);
  207. CASE(VK_EVENT_RESET);
  208. CASE(VK_INCOMPLETE);
  209. CASE(VK_ERROR_OUT_OF_HOST_MEMORY);
  210. CASE(VK_ERROR_OUT_OF_DEVICE_MEMORY);
  211. CASE(VK_ERROR_INITIALIZATION_FAILED);
  212. CASE(VK_ERROR_DEVICE_LOST);
  213. CASE(VK_ERROR_MEMORY_MAP_FAILED);
  214. CASE(VK_ERROR_LAYER_NOT_PRESENT);
  215. CASE(VK_ERROR_EXTENSION_NOT_PRESENT);
  216. CASE(VK_ERROR_FEATURE_NOT_PRESENT);
  217. CASE(VK_ERROR_INCOMPATIBLE_DRIVER);
  218. CASE(VK_ERROR_TOO_MANY_OBJECTS);
  219. CASE(VK_ERROR_FORMAT_NOT_SUPPORTED);
  220. CASE(VK_ERROR_FRAGMENTED_POOL);
  221. CASE(VK_ERROR_SURFACE_LOST_KHR);
  222. CASE(VK_ERROR_NATIVE_WINDOW_IN_USE_KHR);
  223. CASE(VK_SUBOPTIMAL_KHR);
  224. CASE(VK_ERROR_OUT_OF_DATE_KHR);
  225. CASE(VK_ERROR_INCOMPATIBLE_DISPLAY_KHR);
  226. CASE(VK_ERROR_VALIDATION_FAILED_EXT);
  227. CASE(VK_ERROR_INVALID_SHADER_NV);
  228. CASE(VK_ERROR_OUT_OF_POOL_MEMORY);
  229. CASE(VK_ERROR_INVALID_EXTERNAL_HANDLE);
  230. CASE(VK_ERROR_NOT_PERMITTED_EXT);
  231. CASE(VK_ERROR_INVALID_DRM_FORMAT_MODIFIER_PLANE_LAYOUT_EXT);
  232. CASE(VK_ERROR_INVALID_DEVICE_ADDRESS_EXT);
  233. CASE(VK_ERROR_FULL_SCREEN_EXCLUSIVE_MODE_LOST_EXT);
  234. default: return "Unknown error";
  235. }
  236. #undef CASE
  237. }
  238. static VkBool32 vk_dbg_callback(VkDebugUtilsMessageSeverityFlagBitsEXT severity,
  239. VkDebugUtilsMessageTypeFlagsEXT messageType,
  240. const VkDebugUtilsMessengerCallbackDataEXT *data,
  241. void *priv)
  242. {
  243. int l;
  244. AVHWDeviceContext *ctx = priv;
  245. switch (severity) {
  246. case VK_DEBUG_UTILS_MESSAGE_SEVERITY_VERBOSE_BIT_EXT: l = AV_LOG_VERBOSE; break;
  247. case VK_DEBUG_UTILS_MESSAGE_SEVERITY_INFO_BIT_EXT: l = AV_LOG_INFO; break;
  248. case VK_DEBUG_UTILS_MESSAGE_SEVERITY_WARNING_BIT_EXT: l = AV_LOG_WARNING; break;
  249. case VK_DEBUG_UTILS_MESSAGE_SEVERITY_ERROR_BIT_EXT: l = AV_LOG_ERROR; break;
  250. default: l = AV_LOG_DEBUG; break;
  251. }
  252. av_log(ctx, l, "%s\n", data->pMessage);
  253. for (int i = 0; i < data->cmdBufLabelCount; i++)
  254. av_log(ctx, l, "\t%i: %s\n", i, data->pCmdBufLabels[i].pLabelName);
  255. return 0;
  256. }
  257. static int check_extensions(AVHWDeviceContext *ctx, int dev, AVDictionary *opts,
  258. const char * const **dst, uint32_t *num, int debug)
  259. {
  260. const char *tstr;
  261. const char **extension_names = NULL;
  262. VulkanDevicePriv *p = ctx->internal->priv;
  263. AVVulkanDeviceContext *hwctx = ctx->hwctx;
  264. int err = 0, found, extensions_found = 0;
  265. const char *mod;
  266. int optional_exts_num;
  267. uint32_t sup_ext_count;
  268. char *user_exts_str = NULL;
  269. AVDictionaryEntry *user_exts;
  270. VkExtensionProperties *sup_ext;
  271. const VulkanOptExtension *optional_exts;
  272. if (!dev) {
  273. mod = "instance";
  274. optional_exts = optional_instance_exts;
  275. optional_exts_num = FF_ARRAY_ELEMS(optional_instance_exts);
  276. user_exts = av_dict_get(opts, "instance_extensions", NULL, 0);
  277. if (user_exts) {
  278. user_exts_str = av_strdup(user_exts->value);
  279. if (!user_exts_str) {
  280. err = AVERROR(ENOMEM);
  281. goto fail;
  282. }
  283. }
  284. vkEnumerateInstanceExtensionProperties(NULL, &sup_ext_count, NULL);
  285. sup_ext = av_malloc_array(sup_ext_count, sizeof(VkExtensionProperties));
  286. if (!sup_ext)
  287. return AVERROR(ENOMEM);
  288. vkEnumerateInstanceExtensionProperties(NULL, &sup_ext_count, sup_ext);
  289. } else {
  290. mod = "device";
  291. optional_exts = optional_device_exts;
  292. optional_exts_num = FF_ARRAY_ELEMS(optional_device_exts);
  293. user_exts = av_dict_get(opts, "device_extensions", NULL, 0);
  294. if (user_exts) {
  295. user_exts_str = av_strdup(user_exts->value);
  296. if (!user_exts_str) {
  297. err = AVERROR(ENOMEM);
  298. goto fail;
  299. }
  300. }
  301. vkEnumerateDeviceExtensionProperties(hwctx->phys_dev, NULL,
  302. &sup_ext_count, NULL);
  303. sup_ext = av_malloc_array(sup_ext_count, sizeof(VkExtensionProperties));
  304. if (!sup_ext)
  305. return AVERROR(ENOMEM);
  306. vkEnumerateDeviceExtensionProperties(hwctx->phys_dev, NULL,
  307. &sup_ext_count, sup_ext);
  308. }
  309. for (int i = 0; i < optional_exts_num; i++) {
  310. tstr = optional_exts[i].name;
  311. found = 0;
  312. for (int j = 0; j < sup_ext_count; j++) {
  313. if (!strcmp(tstr, sup_ext[j].extensionName)) {
  314. found = 1;
  315. break;
  316. }
  317. }
  318. if (!found)
  319. continue;
  320. av_log(ctx, AV_LOG_VERBOSE, "Using %s extension \"%s\"\n", mod, tstr);
  321. p->extensions |= optional_exts[i].flag;
  322. ADD_VAL_TO_LIST(extension_names, extensions_found, tstr);
  323. }
  324. if (debug && !dev) {
  325. tstr = VK_EXT_DEBUG_UTILS_EXTENSION_NAME;
  326. found = 0;
  327. for (int j = 0; j < sup_ext_count; j++) {
  328. if (!strcmp(tstr, sup_ext[j].extensionName)) {
  329. found = 1;
  330. break;
  331. }
  332. }
  333. if (found) {
  334. av_log(ctx, AV_LOG_VERBOSE, "Using %s extension \"%s\"\n", mod, tstr);
  335. ADD_VAL_TO_LIST(extension_names, extensions_found, tstr);
  336. } else {
  337. av_log(ctx, AV_LOG_ERROR, "Debug extension \"%s\" not found!\n",
  338. tstr);
  339. err = AVERROR(EINVAL);
  340. goto fail;
  341. }
  342. }
  343. if (user_exts_str) {
  344. char *save, *token = av_strtok(user_exts_str, "+", &save);
  345. while (token) {
  346. found = 0;
  347. for (int j = 0; j < sup_ext_count; j++) {
  348. if (!strcmp(token, sup_ext[j].extensionName)) {
  349. found = 1;
  350. break;
  351. }
  352. }
  353. if (found) {
  354. av_log(ctx, AV_LOG_VERBOSE, "Using %s extension \"%s\"\n", mod, token);
  355. ADD_VAL_TO_LIST(extension_names, extensions_found, token);
  356. } else {
  357. av_log(ctx, AV_LOG_WARNING, "%s extension \"%s\" not found, excluding.\n",
  358. mod, token);
  359. }
  360. token = av_strtok(NULL, "+", &save);
  361. }
  362. }
  363. *dst = extension_names;
  364. *num = extensions_found;
  365. av_free(user_exts_str);
  366. av_free(sup_ext);
  367. return 0;
  368. fail:
  369. if (extension_names)
  370. for (int i = 0; i < extensions_found; i++)
  371. av_free((void *)extension_names[i]);
  372. av_free(extension_names);
  373. av_free(user_exts_str);
  374. av_free(sup_ext);
  375. return err;
  376. }
  377. /* Creates a VkInstance */
  378. static int create_instance(AVHWDeviceContext *ctx, AVDictionary *opts)
  379. {
  380. int err = 0;
  381. VkResult ret;
  382. VulkanDevicePriv *p = ctx->internal->priv;
  383. AVVulkanDeviceContext *hwctx = ctx->hwctx;
  384. AVDictionaryEntry *debug_opt = av_dict_get(opts, "debug", NULL, 0);
  385. const int debug_mode = debug_opt && strtol(debug_opt->value, NULL, 10);
  386. VkApplicationInfo application_info = {
  387. .sType = VK_STRUCTURE_TYPE_APPLICATION_INFO,
  388. .pEngineName = "libavutil",
  389. .apiVersion = VK_API_VERSION_1_1,
  390. .engineVersion = VK_MAKE_VERSION(LIBAVUTIL_VERSION_MAJOR,
  391. LIBAVUTIL_VERSION_MINOR,
  392. LIBAVUTIL_VERSION_MICRO),
  393. };
  394. VkInstanceCreateInfo inst_props = {
  395. .sType = VK_STRUCTURE_TYPE_INSTANCE_CREATE_INFO,
  396. .pApplicationInfo = &application_info,
  397. };
  398. /* Check for present/missing extensions */
  399. err = check_extensions(ctx, 0, opts, &inst_props.ppEnabledExtensionNames,
  400. &inst_props.enabledExtensionCount, debug_mode);
  401. if (err < 0)
  402. return err;
  403. if (debug_mode) {
  404. static const char *layers[] = { "VK_LAYER_KHRONOS_validation" };
  405. inst_props.ppEnabledLayerNames = layers;
  406. inst_props.enabledLayerCount = FF_ARRAY_ELEMS(layers);
  407. }
  408. /* Try to create the instance */
  409. ret = vkCreateInstance(&inst_props, hwctx->alloc, &hwctx->inst);
  410. /* Check for errors */
  411. if (ret != VK_SUCCESS) {
  412. av_log(ctx, AV_LOG_ERROR, "Instance creation failure: %s\n",
  413. vk_ret2str(ret));
  414. for (int i = 0; i < inst_props.enabledExtensionCount; i++)
  415. av_free((void *)inst_props.ppEnabledExtensionNames[i]);
  416. av_free((void *)inst_props.ppEnabledExtensionNames);
  417. return AVERROR_EXTERNAL;
  418. }
  419. if (debug_mode) {
  420. VkDebugUtilsMessengerCreateInfoEXT dbg = {
  421. .sType = VK_STRUCTURE_TYPE_DEBUG_UTILS_MESSENGER_CREATE_INFO_EXT,
  422. .messageSeverity = VK_DEBUG_UTILS_MESSAGE_SEVERITY_VERBOSE_BIT_EXT |
  423. VK_DEBUG_UTILS_MESSAGE_SEVERITY_INFO_BIT_EXT |
  424. VK_DEBUG_UTILS_MESSAGE_SEVERITY_WARNING_BIT_EXT |
  425. VK_DEBUG_UTILS_MESSAGE_SEVERITY_ERROR_BIT_EXT,
  426. .messageType = VK_DEBUG_UTILS_MESSAGE_TYPE_GENERAL_BIT_EXT |
  427. VK_DEBUG_UTILS_MESSAGE_TYPE_VALIDATION_BIT_EXT |
  428. VK_DEBUG_UTILS_MESSAGE_TYPE_PERFORMANCE_BIT_EXT,
  429. .pfnUserCallback = vk_dbg_callback,
  430. .pUserData = ctx,
  431. };
  432. VK_LOAD_PFN(hwctx->inst, vkCreateDebugUtilsMessengerEXT);
  433. pfn_vkCreateDebugUtilsMessengerEXT(hwctx->inst, &dbg,
  434. hwctx->alloc, &p->debug_ctx);
  435. }
  436. hwctx->enabled_inst_extensions = inst_props.ppEnabledExtensionNames;
  437. hwctx->nb_enabled_inst_extensions = inst_props.enabledExtensionCount;
  438. return 0;
  439. }
  440. typedef struct VulkanDeviceSelection {
  441. uint8_t uuid[VK_UUID_SIZE]; /* Will use this first unless !has_uuid */
  442. int has_uuid;
  443. const char *name; /* Will use this second unless NULL */
  444. uint32_t pci_device; /* Will use this third unless 0x0 */
  445. uint32_t vendor_id; /* Last resort to find something deterministic */
  446. int index; /* Finally fall back to index */
  447. } VulkanDeviceSelection;
  448. static const char *vk_dev_type(enum VkPhysicalDeviceType type)
  449. {
  450. switch (type) {
  451. case VK_PHYSICAL_DEVICE_TYPE_INTEGRATED_GPU: return "integrated";
  452. case VK_PHYSICAL_DEVICE_TYPE_DISCRETE_GPU: return "discrete";
  453. case VK_PHYSICAL_DEVICE_TYPE_VIRTUAL_GPU: return "virtual";
  454. case VK_PHYSICAL_DEVICE_TYPE_CPU: return "software";
  455. default: return "unknown";
  456. }
  457. }
  458. /* Finds a device */
  459. static int find_device(AVHWDeviceContext *ctx, VulkanDeviceSelection *select)
  460. {
  461. int err = 0, choice = -1;
  462. uint32_t num;
  463. VkResult ret;
  464. VkPhysicalDevice *devices = NULL;
  465. VkPhysicalDeviceIDProperties *idp = NULL;
  466. VkPhysicalDeviceProperties2 *prop = NULL;
  467. AVVulkanDeviceContext *hwctx = ctx->hwctx;
  468. ret = vkEnumeratePhysicalDevices(hwctx->inst, &num, NULL);
  469. if (ret != VK_SUCCESS || !num) {
  470. av_log(ctx, AV_LOG_ERROR, "No devices found: %s!\n", vk_ret2str(ret));
  471. return AVERROR(ENODEV);
  472. }
  473. devices = av_malloc_array(num, sizeof(VkPhysicalDevice));
  474. if (!devices)
  475. return AVERROR(ENOMEM);
  476. ret = vkEnumeratePhysicalDevices(hwctx->inst, &num, devices);
  477. if (ret != VK_SUCCESS) {
  478. av_log(ctx, AV_LOG_ERROR, "Failed enumerating devices: %s\n",
  479. vk_ret2str(ret));
  480. err = AVERROR(ENODEV);
  481. goto end;
  482. }
  483. prop = av_mallocz_array(num, sizeof(*prop));
  484. if (!prop) {
  485. err = AVERROR(ENOMEM);
  486. goto end;
  487. }
  488. idp = av_mallocz_array(num, sizeof(*idp));
  489. if (!idp) {
  490. err = AVERROR(ENOMEM);
  491. goto end;
  492. }
  493. av_log(ctx, AV_LOG_VERBOSE, "GPU listing:\n");
  494. for (int i = 0; i < num; i++) {
  495. idp[i].sType = VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_ID_PROPERTIES;
  496. prop[i].sType = VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_PROPERTIES_2;
  497. prop[i].pNext = &idp[i];
  498. vkGetPhysicalDeviceProperties2(devices[i], &prop[i]);
  499. av_log(ctx, AV_LOG_VERBOSE, " %d: %s (%s) (0x%x)\n", i,
  500. prop[i].properties.deviceName,
  501. vk_dev_type(prop[i].properties.deviceType),
  502. prop[i].properties.deviceID);
  503. }
  504. if (select->has_uuid) {
  505. for (int i = 0; i < num; i++) {
  506. if (!strncmp(idp[i].deviceUUID, select->uuid, VK_UUID_SIZE)) {
  507. choice = i;
  508. goto end;
  509. }
  510. }
  511. av_log(ctx, AV_LOG_ERROR, "Unable to find device by given UUID!\n");
  512. err = AVERROR(ENODEV);
  513. goto end;
  514. } else if (select->name) {
  515. av_log(ctx, AV_LOG_VERBOSE, "Requested device: %s\n", select->name);
  516. for (int i = 0; i < num; i++) {
  517. if (strstr(prop[i].properties.deviceName, select->name)) {
  518. choice = i;
  519. goto end;
  520. }
  521. }
  522. av_log(ctx, AV_LOG_ERROR, "Unable to find device \"%s\"!\n",
  523. select->name);
  524. err = AVERROR(ENODEV);
  525. goto end;
  526. } else if (select->pci_device) {
  527. av_log(ctx, AV_LOG_VERBOSE, "Requested device: 0x%x\n", select->pci_device);
  528. for (int i = 0; i < num; i++) {
  529. if (select->pci_device == prop[i].properties.deviceID) {
  530. choice = i;
  531. goto end;
  532. }
  533. }
  534. av_log(ctx, AV_LOG_ERROR, "Unable to find device with PCI ID 0x%x!\n",
  535. select->pci_device);
  536. err = AVERROR(EINVAL);
  537. goto end;
  538. } else if (select->vendor_id) {
  539. av_log(ctx, AV_LOG_VERBOSE, "Requested vendor: 0x%x\n", select->vendor_id);
  540. for (int i = 0; i < num; i++) {
  541. if (select->vendor_id == prop[i].properties.vendorID) {
  542. choice = i;
  543. goto end;
  544. }
  545. }
  546. av_log(ctx, AV_LOG_ERROR, "Unable to find device with Vendor ID 0x%x!\n",
  547. select->vendor_id);
  548. err = AVERROR(ENODEV);
  549. goto end;
  550. } else {
  551. if (select->index < num) {
  552. choice = select->index;
  553. goto end;
  554. }
  555. av_log(ctx, AV_LOG_ERROR, "Unable to find device with index %i!\n",
  556. select->index);
  557. err = AVERROR(ENODEV);
  558. goto end;
  559. }
  560. end:
  561. if (choice > -1)
  562. hwctx->phys_dev = devices[choice];
  563. av_free(devices);
  564. av_free(prop);
  565. av_free(idp);
  566. return err;
  567. }
  568. static int search_queue_families(AVHWDeviceContext *ctx, VkDeviceCreateInfo *cd)
  569. {
  570. uint32_t num;
  571. float *weights;
  572. VkQueueFamilyProperties *qs = NULL;
  573. AVVulkanDeviceContext *hwctx = ctx->hwctx;
  574. int graph_index = -1, comp_index = -1, tx_index = -1;
  575. VkDeviceQueueCreateInfo *pc = (VkDeviceQueueCreateInfo *)cd->pQueueCreateInfos;
  576. /* First get the number of queue families */
  577. vkGetPhysicalDeviceQueueFamilyProperties(hwctx->phys_dev, &num, NULL);
  578. if (!num) {
  579. av_log(ctx, AV_LOG_ERROR, "Failed to get queues!\n");
  580. return AVERROR_EXTERNAL;
  581. }
  582. /* Then allocate memory */
  583. qs = av_malloc_array(num, sizeof(VkQueueFamilyProperties));
  584. if (!qs)
  585. return AVERROR(ENOMEM);
  586. /* Finally retrieve the queue families */
  587. vkGetPhysicalDeviceQueueFamilyProperties(hwctx->phys_dev, &num, qs);
  588. #define SEARCH_FLAGS(expr, out) \
  589. for (int i = 0; i < num; i++) { \
  590. const VkQueueFlagBits flags = qs[i].queueFlags; \
  591. if (expr) { \
  592. out = i; \
  593. break; \
  594. } \
  595. }
  596. SEARCH_FLAGS(flags & VK_QUEUE_GRAPHICS_BIT, graph_index)
  597. SEARCH_FLAGS((flags & VK_QUEUE_COMPUTE_BIT) && (i != graph_index),
  598. comp_index)
  599. SEARCH_FLAGS((flags & VK_QUEUE_TRANSFER_BIT) && (i != graph_index) &&
  600. (i != comp_index), tx_index)
  601. #undef SEARCH_FLAGS
  602. #define ADD_QUEUE(fidx, graph, comp, tx) \
  603. av_log(ctx, AV_LOG_VERBOSE, "Using queue family %i (total queues: %i) for %s%s%s\n", \
  604. fidx, qs[fidx].queueCount, graph ? "graphics " : "", \
  605. comp ? "compute " : "", tx ? "transfers " : ""); \
  606. av_log(ctx, AV_LOG_VERBOSE, " QF %i flags: %s%s%s%s\n", fidx, \
  607. ((qs[fidx].queueFlags) & VK_QUEUE_GRAPHICS_BIT) ? "(graphics) " : "", \
  608. ((qs[fidx].queueFlags) & VK_QUEUE_COMPUTE_BIT) ? "(compute) " : "", \
  609. ((qs[fidx].queueFlags) & VK_QUEUE_TRANSFER_BIT) ? "(transfers) " : "", \
  610. ((qs[fidx].queueFlags) & VK_QUEUE_SPARSE_BINDING_BIT) ? "(sparse) " : ""); \
  611. pc[cd->queueCreateInfoCount].queueFamilyIndex = fidx; \
  612. pc[cd->queueCreateInfoCount].queueCount = qs[fidx].queueCount; \
  613. weights = av_malloc(qs[fidx].queueCount * sizeof(float)); \
  614. pc[cd->queueCreateInfoCount].pQueuePriorities = weights; \
  615. if (!weights) \
  616. goto fail; \
  617. for (int i = 0; i < qs[fidx].queueCount; i++) \
  618. weights[i] = 1.0f; \
  619. cd->queueCreateInfoCount++;
  620. ADD_QUEUE(graph_index, 1, comp_index < 0, tx_index < 0 && comp_index < 0)
  621. hwctx->queue_family_index = graph_index;
  622. hwctx->queue_family_comp_index = graph_index;
  623. hwctx->queue_family_tx_index = graph_index;
  624. hwctx->nb_graphics_queues = qs[graph_index].queueCount;
  625. if (comp_index != -1) {
  626. ADD_QUEUE(comp_index, 0, 1, tx_index < 0)
  627. hwctx->queue_family_tx_index = comp_index;
  628. hwctx->queue_family_comp_index = comp_index;
  629. hwctx->nb_comp_queues = qs[comp_index].queueCount;
  630. }
  631. if (tx_index != -1) {
  632. ADD_QUEUE(tx_index, 0, 0, 1)
  633. hwctx->queue_family_tx_index = tx_index;
  634. hwctx->nb_tx_queues = qs[tx_index].queueCount;
  635. }
  636. #undef ADD_QUEUE
  637. av_free(qs);
  638. return 0;
  639. fail:
  640. av_freep(&pc[0].pQueuePriorities);
  641. av_freep(&pc[1].pQueuePriorities);
  642. av_freep(&pc[2].pQueuePriorities);
  643. av_free(qs);
  644. return AVERROR(ENOMEM);
  645. }
  646. static int create_exec_ctx(AVHWFramesContext *hwfc, VulkanExecCtx *cmd,
  647. int queue_family_index, int num_queues)
  648. {
  649. VkResult ret;
  650. AVVulkanDeviceContext *hwctx = hwfc->device_ctx->hwctx;
  651. VkCommandPoolCreateInfo cqueue_create = {
  652. .sType = VK_STRUCTURE_TYPE_COMMAND_POOL_CREATE_INFO,
  653. .flags = VK_COMMAND_POOL_CREATE_RESET_COMMAND_BUFFER_BIT,
  654. .queueFamilyIndex = queue_family_index,
  655. };
  656. VkCommandBufferAllocateInfo cbuf_create = {
  657. .sType = VK_STRUCTURE_TYPE_COMMAND_BUFFER_ALLOCATE_INFO,
  658. .level = VK_COMMAND_BUFFER_LEVEL_PRIMARY,
  659. .commandBufferCount = num_queues,
  660. };
  661. cmd->nb_queues = num_queues;
  662. /* Create command pool */
  663. ret = vkCreateCommandPool(hwctx->act_dev, &cqueue_create,
  664. hwctx->alloc, &cmd->pool);
  665. if (ret != VK_SUCCESS) {
  666. av_log(hwfc, AV_LOG_ERROR, "Command pool creation failure: %s\n",
  667. vk_ret2str(ret));
  668. return AVERROR_EXTERNAL;
  669. }
  670. cmd->bufs = av_mallocz(num_queues * sizeof(*cmd->bufs));
  671. if (!cmd->bufs)
  672. return AVERROR(ENOMEM);
  673. cbuf_create.commandPool = cmd->pool;
  674. /* Allocate command buffer */
  675. ret = vkAllocateCommandBuffers(hwctx->act_dev, &cbuf_create, cmd->bufs);
  676. if (ret != VK_SUCCESS) {
  677. av_log(hwfc, AV_LOG_ERROR, "Command buffer alloc failure: %s\n",
  678. vk_ret2str(ret));
  679. av_freep(&cmd->bufs);
  680. return AVERROR_EXTERNAL;
  681. }
  682. cmd->queues = av_mallocz(num_queues * sizeof(*cmd->queues));
  683. if (!cmd->queues)
  684. return AVERROR(ENOMEM);
  685. for (int i = 0; i < num_queues; i++) {
  686. VulkanQueueCtx *q = &cmd->queues[i];
  687. vkGetDeviceQueue(hwctx->act_dev, queue_family_index, i, &q->queue);
  688. q->was_synchronous = 1;
  689. }
  690. return 0;
  691. }
  692. static void free_exec_ctx(AVHWFramesContext *hwfc, VulkanExecCtx *cmd)
  693. {
  694. AVVulkanDeviceContext *hwctx = hwfc->device_ctx->hwctx;
  695. if (cmd->queues) {
  696. for (int i = 0; i < cmd->nb_queues; i++) {
  697. VulkanQueueCtx *q = &cmd->queues[i];
  698. /* Make sure all queues have finished executing */
  699. if (q->fence && !q->was_synchronous) {
  700. vkWaitForFences(hwctx->act_dev, 1, &q->fence, VK_TRUE, UINT64_MAX);
  701. vkResetFences(hwctx->act_dev, 1, &q->fence);
  702. }
  703. /* Free the fence */
  704. if (q->fence)
  705. vkDestroyFence(hwctx->act_dev, q->fence, hwctx->alloc);
  706. /* Free buffer dependencies */
  707. for (int j = 0; j < q->nb_buf_deps; j++)
  708. av_buffer_unref(&q->buf_deps[j]);
  709. av_free(q->buf_deps);
  710. }
  711. }
  712. if (cmd->bufs)
  713. vkFreeCommandBuffers(hwctx->act_dev, cmd->pool, cmd->nb_queues, cmd->bufs);
  714. if (cmd->pool)
  715. vkDestroyCommandPool(hwctx->act_dev, cmd->pool, hwctx->alloc);
  716. av_freep(&cmd->queues);
  717. av_freep(&cmd->bufs);
  718. cmd->pool = NULL;
  719. }
  720. static VkCommandBuffer get_buf_exec_ctx(AVHWFramesContext *hwfc, VulkanExecCtx *cmd)
  721. {
  722. return cmd->bufs[cmd->cur_queue_idx];
  723. }
  724. static void unref_exec_ctx_deps(AVHWFramesContext *hwfc, VulkanExecCtx *cmd)
  725. {
  726. VulkanQueueCtx *q = &cmd->queues[cmd->cur_queue_idx];
  727. for (int j = 0; j < q->nb_buf_deps; j++)
  728. av_buffer_unref(&q->buf_deps[j]);
  729. q->nb_buf_deps = 0;
  730. }
  731. static int wait_start_exec_ctx(AVHWFramesContext *hwfc, VulkanExecCtx *cmd)
  732. {
  733. VkResult ret;
  734. AVVulkanDeviceContext *hwctx = hwfc->device_ctx->hwctx;
  735. VulkanQueueCtx *q = &cmd->queues[cmd->cur_queue_idx];
  736. VkCommandBufferBeginInfo cmd_start = {
  737. .sType = VK_STRUCTURE_TYPE_COMMAND_BUFFER_BEGIN_INFO,
  738. .flags = VK_COMMAND_BUFFER_USAGE_ONE_TIME_SUBMIT_BIT,
  739. };
  740. /* Create the fence and don't wait for it initially */
  741. if (!q->fence) {
  742. VkFenceCreateInfo fence_spawn = {
  743. .sType = VK_STRUCTURE_TYPE_FENCE_CREATE_INFO,
  744. };
  745. ret = vkCreateFence(hwctx->act_dev, &fence_spawn, hwctx->alloc,
  746. &q->fence);
  747. if (ret != VK_SUCCESS) {
  748. av_log(hwfc, AV_LOG_ERROR, "Failed to queue frame fence: %s\n",
  749. vk_ret2str(ret));
  750. return AVERROR_EXTERNAL;
  751. }
  752. } else if (!q->was_synchronous) {
  753. vkWaitForFences(hwctx->act_dev, 1, &q->fence, VK_TRUE, UINT64_MAX);
  754. vkResetFences(hwctx->act_dev, 1, &q->fence);
  755. }
  756. /* Discard queue dependencies */
  757. unref_exec_ctx_deps(hwfc, cmd);
  758. ret = vkBeginCommandBuffer(cmd->bufs[cmd->cur_queue_idx], &cmd_start);
  759. if (ret != VK_SUCCESS) {
  760. av_log(hwfc, AV_LOG_ERROR, "Unable to init command buffer: %s\n",
  761. vk_ret2str(ret));
  762. return AVERROR_EXTERNAL;
  763. }
  764. return 0;
  765. }
  766. static int add_buf_dep_exec_ctx(AVHWFramesContext *hwfc, VulkanExecCtx *cmd,
  767. AVBufferRef * const *deps, int nb_deps)
  768. {
  769. AVBufferRef **dst;
  770. VulkanQueueCtx *q = &cmd->queues[cmd->cur_queue_idx];
  771. if (!deps || !nb_deps)
  772. return 0;
  773. dst = av_fast_realloc(q->buf_deps, &q->buf_deps_alloc_size,
  774. (q->nb_buf_deps + nb_deps) * sizeof(*dst));
  775. if (!dst)
  776. goto err;
  777. q->buf_deps = dst;
  778. for (int i = 0; i < nb_deps; i++) {
  779. q->buf_deps[q->nb_buf_deps] = av_buffer_ref(deps[i]);
  780. if (!q->buf_deps[q->nb_buf_deps])
  781. goto err;
  782. q->nb_buf_deps++;
  783. }
  784. return 0;
  785. err:
  786. unref_exec_ctx_deps(hwfc, cmd);
  787. return AVERROR(ENOMEM);
  788. }
  789. static int submit_exec_ctx(AVHWFramesContext *hwfc, VulkanExecCtx *cmd,
  790. VkSubmitInfo *s_info, int synchronous)
  791. {
  792. VkResult ret;
  793. VulkanQueueCtx *q = &cmd->queues[cmd->cur_queue_idx];
  794. ret = vkEndCommandBuffer(cmd->bufs[cmd->cur_queue_idx]);
  795. if (ret != VK_SUCCESS) {
  796. av_log(hwfc, AV_LOG_ERROR, "Unable to finish command buffer: %s\n",
  797. vk_ret2str(ret));
  798. unref_exec_ctx_deps(hwfc, cmd);
  799. return AVERROR_EXTERNAL;
  800. }
  801. s_info->pCommandBuffers = &cmd->bufs[cmd->cur_queue_idx];
  802. s_info->commandBufferCount = 1;
  803. ret = vkQueueSubmit(q->queue, 1, s_info, q->fence);
  804. if (ret != VK_SUCCESS) {
  805. unref_exec_ctx_deps(hwfc, cmd);
  806. return AVERROR_EXTERNAL;
  807. }
  808. q->was_synchronous = synchronous;
  809. if (synchronous) {
  810. AVVulkanDeviceContext *hwctx = hwfc->device_ctx->hwctx;
  811. vkWaitForFences(hwctx->act_dev, 1, &q->fence, VK_TRUE, UINT64_MAX);
  812. vkResetFences(hwctx->act_dev, 1, &q->fence);
  813. unref_exec_ctx_deps(hwfc, cmd);
  814. } else { /* Rotate queues */
  815. cmd->cur_queue_idx = (cmd->cur_queue_idx + 1) % cmd->nb_queues;
  816. }
  817. return 0;
  818. }
  819. static void vulkan_device_free(AVHWDeviceContext *ctx)
  820. {
  821. VulkanDevicePriv *p = ctx->internal->priv;
  822. AVVulkanDeviceContext *hwctx = ctx->hwctx;
  823. vkDestroyDevice(hwctx->act_dev, hwctx->alloc);
  824. if (p->debug_ctx) {
  825. VK_LOAD_PFN(hwctx->inst, vkDestroyDebugUtilsMessengerEXT);
  826. pfn_vkDestroyDebugUtilsMessengerEXT(hwctx->inst, p->debug_ctx,
  827. hwctx->alloc);
  828. }
  829. vkDestroyInstance(hwctx->inst, hwctx->alloc);
  830. for (int i = 0; i < hwctx->nb_enabled_inst_extensions; i++)
  831. av_free((void *)hwctx->enabled_inst_extensions[i]);
  832. av_free((void *)hwctx->enabled_inst_extensions);
  833. for (int i = 0; i < hwctx->nb_enabled_dev_extensions; i++)
  834. av_free((void *)hwctx->enabled_dev_extensions[i]);
  835. av_free((void *)hwctx->enabled_dev_extensions);
  836. }
  837. static int vulkan_device_create_internal(AVHWDeviceContext *ctx,
  838. VulkanDeviceSelection *dev_select,
  839. AVDictionary *opts, int flags)
  840. {
  841. int err = 0;
  842. VkResult ret;
  843. AVDictionaryEntry *opt_d;
  844. VulkanDevicePriv *p = ctx->internal->priv;
  845. AVVulkanDeviceContext *hwctx = ctx->hwctx;
  846. VkPhysicalDeviceFeatures dev_features = { 0 };
  847. VkDeviceQueueCreateInfo queue_create_info[3] = {
  848. { .sType = VK_STRUCTURE_TYPE_DEVICE_QUEUE_CREATE_INFO, },
  849. { .sType = VK_STRUCTURE_TYPE_DEVICE_QUEUE_CREATE_INFO, },
  850. { .sType = VK_STRUCTURE_TYPE_DEVICE_QUEUE_CREATE_INFO, },
  851. };
  852. VkDeviceCreateInfo dev_info = {
  853. .sType = VK_STRUCTURE_TYPE_DEVICE_CREATE_INFO,
  854. .pNext = &hwctx->device_features,
  855. .pQueueCreateInfos = queue_create_info,
  856. .queueCreateInfoCount = 0,
  857. };
  858. hwctx->device_features.sType = VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_FEATURES_2;
  859. ctx->free = vulkan_device_free;
  860. /* Create an instance if not given one */
  861. if ((err = create_instance(ctx, opts)))
  862. goto end;
  863. /* Find a device (if not given one) */
  864. if ((err = find_device(ctx, dev_select)))
  865. goto end;
  866. vkGetPhysicalDeviceFeatures(hwctx->phys_dev, &dev_features);
  867. #define COPY_FEATURE(DST, NAME) (DST).features.NAME = dev_features.NAME;
  868. COPY_FEATURE(hwctx->device_features, shaderImageGatherExtended)
  869. COPY_FEATURE(hwctx->device_features, fragmentStoresAndAtomics)
  870. COPY_FEATURE(hwctx->device_features, vertexPipelineStoresAndAtomics)
  871. COPY_FEATURE(hwctx->device_features, shaderInt64)
  872. #undef COPY_FEATURE
  873. /* Search queue family */
  874. if ((err = search_queue_families(ctx, &dev_info)))
  875. goto end;
  876. if ((err = check_extensions(ctx, 1, opts, &dev_info.ppEnabledExtensionNames,
  877. &dev_info.enabledExtensionCount, 0))) {
  878. av_free((void *)queue_create_info[0].pQueuePriorities);
  879. av_free((void *)queue_create_info[1].pQueuePriorities);
  880. av_free((void *)queue_create_info[2].pQueuePriorities);
  881. goto end;
  882. }
  883. ret = vkCreateDevice(hwctx->phys_dev, &dev_info, hwctx->alloc,
  884. &hwctx->act_dev);
  885. av_free((void *)queue_create_info[0].pQueuePriorities);
  886. av_free((void *)queue_create_info[1].pQueuePriorities);
  887. av_free((void *)queue_create_info[2].pQueuePriorities);
  888. if (ret != VK_SUCCESS) {
  889. av_log(ctx, AV_LOG_ERROR, "Device creation failure: %s\n",
  890. vk_ret2str(ret));
  891. for (int i = 0; i < dev_info.enabledExtensionCount; i++)
  892. av_free((void *)dev_info.ppEnabledExtensionNames[i]);
  893. av_free((void *)dev_info.ppEnabledExtensionNames);
  894. err = AVERROR_EXTERNAL;
  895. goto end;
  896. }
  897. /* Tiled images setting, use them by default */
  898. opt_d = av_dict_get(opts, "linear_images", NULL, 0);
  899. if (opt_d)
  900. p->use_linear_images = strtol(opt_d->value, NULL, 10);
  901. hwctx->enabled_dev_extensions = dev_info.ppEnabledExtensionNames;
  902. hwctx->nb_enabled_dev_extensions = dev_info.enabledExtensionCount;
  903. end:
  904. return err;
  905. }
  906. static int vulkan_device_init(AVHWDeviceContext *ctx)
  907. {
  908. uint32_t queue_num;
  909. AVVulkanDeviceContext *hwctx = ctx->hwctx;
  910. VulkanDevicePriv *p = ctx->internal->priv;
  911. /* Set device extension flags */
  912. for (int i = 0; i < hwctx->nb_enabled_dev_extensions; i++) {
  913. for (int j = 0; j < FF_ARRAY_ELEMS(optional_device_exts); j++) {
  914. if (!strcmp(hwctx->enabled_dev_extensions[i],
  915. optional_device_exts[j].name)) {
  916. av_log(ctx, AV_LOG_VERBOSE, "Using device extension %s\n",
  917. hwctx->enabled_dev_extensions[i]);
  918. p->extensions |= optional_device_exts[j].flag;
  919. break;
  920. }
  921. }
  922. }
  923. p->props.sType = VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_PROPERTIES_2;
  924. p->props.pNext = &p->hprops;
  925. p->hprops.sType = VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_EXTERNAL_MEMORY_HOST_PROPERTIES_EXT;
  926. vkGetPhysicalDeviceProperties2(hwctx->phys_dev, &p->props);
  927. av_log(ctx, AV_LOG_VERBOSE, "Using device: %s\n",
  928. p->props.properties.deviceName);
  929. av_log(ctx, AV_LOG_VERBOSE, "Alignments:\n");
  930. av_log(ctx, AV_LOG_VERBOSE, " optimalBufferCopyRowPitchAlignment: %li\n",
  931. p->props.properties.limits.optimalBufferCopyRowPitchAlignment);
  932. av_log(ctx, AV_LOG_VERBOSE, " minMemoryMapAlignment: %li\n",
  933. p->props.properties.limits.minMemoryMapAlignment);
  934. if (p->extensions & EXT_EXTERNAL_HOST_MEMORY)
  935. av_log(ctx, AV_LOG_VERBOSE, " minImportedHostPointerAlignment: %li\n",
  936. p->hprops.minImportedHostPointerAlignment);
  937. p->dev_is_nvidia = (p->props.properties.vendorID == 0x10de);
  938. vkGetPhysicalDeviceQueueFamilyProperties(hwctx->phys_dev, &queue_num, NULL);
  939. if (!queue_num) {
  940. av_log(ctx, AV_LOG_ERROR, "Failed to get queues!\n");
  941. return AVERROR_EXTERNAL;
  942. }
  943. #define CHECK_QUEUE(type, n) \
  944. if (n >= queue_num) { \
  945. av_log(ctx, AV_LOG_ERROR, "Invalid %s queue index %i (device has %i queues)!\n", \
  946. type, n, queue_num); \
  947. return AVERROR(EINVAL); \
  948. }
  949. CHECK_QUEUE("graphics", hwctx->queue_family_index)
  950. CHECK_QUEUE("upload", hwctx->queue_family_tx_index)
  951. CHECK_QUEUE("compute", hwctx->queue_family_comp_index)
  952. #undef CHECK_QUEUE
  953. p->qfs[p->num_qfs++] = hwctx->queue_family_index;
  954. if ((hwctx->queue_family_tx_index != hwctx->queue_family_index) &&
  955. (hwctx->queue_family_tx_index != hwctx->queue_family_comp_index))
  956. p->qfs[p->num_qfs++] = hwctx->queue_family_tx_index;
  957. if ((hwctx->queue_family_comp_index != hwctx->queue_family_index) &&
  958. (hwctx->queue_family_comp_index != hwctx->queue_family_tx_index))
  959. p->qfs[p->num_qfs++] = hwctx->queue_family_comp_index;
  960. /* Get device capabilities */
  961. vkGetPhysicalDeviceMemoryProperties(hwctx->phys_dev, &p->mprops);
  962. return 0;
  963. }
  964. static int vulkan_device_create(AVHWDeviceContext *ctx, const char *device,
  965. AVDictionary *opts, int flags)
  966. {
  967. VulkanDeviceSelection dev_select = { 0 };
  968. if (device && device[0]) {
  969. char *end = NULL;
  970. dev_select.index = strtol(device, &end, 10);
  971. if (end == device) {
  972. dev_select.index = 0;
  973. dev_select.name = device;
  974. }
  975. }
  976. return vulkan_device_create_internal(ctx, &dev_select, opts, flags);
  977. }
  978. static int vulkan_device_derive(AVHWDeviceContext *ctx,
  979. AVHWDeviceContext *src_ctx,
  980. AVDictionary *opts, int flags)
  981. {
  982. av_unused VulkanDeviceSelection dev_select = { 0 };
  983. /* If there's only one device on the system, then even if its not covered
  984. * by the following checks (e.g. non-PCIe ARM GPU), having an empty
  985. * dev_select will mean it'll get picked. */
  986. switch(src_ctx->type) {
  987. #if CONFIG_LIBDRM
  988. #if CONFIG_VAAPI
  989. case AV_HWDEVICE_TYPE_VAAPI: {
  990. AVVAAPIDeviceContext *src_hwctx = src_ctx->hwctx;
  991. const char *vendor = vaQueryVendorString(src_hwctx->display);
  992. if (!vendor) {
  993. av_log(ctx, AV_LOG_ERROR, "Unable to get device info from VAAPI!\n");
  994. return AVERROR_EXTERNAL;
  995. }
  996. if (strstr(vendor, "Intel"))
  997. dev_select.vendor_id = 0x8086;
  998. if (strstr(vendor, "AMD"))
  999. dev_select.vendor_id = 0x1002;
  1000. return vulkan_device_create_internal(ctx, &dev_select, opts, flags);
  1001. }
  1002. #endif
  1003. case AV_HWDEVICE_TYPE_DRM: {
  1004. AVDRMDeviceContext *src_hwctx = src_ctx->hwctx;
  1005. drmDevice *drm_dev_info;
  1006. int err = drmGetDevice(src_hwctx->fd, &drm_dev_info);
  1007. if (err) {
  1008. av_log(ctx, AV_LOG_ERROR, "Unable to get device info from DRM fd!\n");
  1009. return AVERROR_EXTERNAL;
  1010. }
  1011. if (drm_dev_info->bustype == DRM_BUS_PCI)
  1012. dev_select.pci_device = drm_dev_info->deviceinfo.pci->device_id;
  1013. drmFreeDevice(&drm_dev_info);
  1014. return vulkan_device_create_internal(ctx, &dev_select, opts, flags);
  1015. }
  1016. #endif
  1017. #if CONFIG_CUDA
  1018. case AV_HWDEVICE_TYPE_CUDA: {
  1019. AVHWDeviceContext *cuda_cu = src_ctx;
  1020. AVCUDADeviceContext *src_hwctx = src_ctx->hwctx;
  1021. AVCUDADeviceContextInternal *cu_internal = src_hwctx->internal;
  1022. CudaFunctions *cu = cu_internal->cuda_dl;
  1023. int ret = CHECK_CU(cu->cuDeviceGetUuid((CUuuid *)&dev_select.uuid,
  1024. cu_internal->cuda_device));
  1025. if (ret < 0) {
  1026. av_log(ctx, AV_LOG_ERROR, "Unable to get UUID from CUDA!\n");
  1027. return AVERROR_EXTERNAL;
  1028. }
  1029. dev_select.has_uuid = 1;
  1030. return vulkan_device_create_internal(ctx, &dev_select, opts, flags);
  1031. }
  1032. #endif
  1033. default:
  1034. return AVERROR(ENOSYS);
  1035. }
  1036. }
  1037. static int vulkan_frames_get_constraints(AVHWDeviceContext *ctx,
  1038. const void *hwconfig,
  1039. AVHWFramesConstraints *constraints)
  1040. {
  1041. int count = 0;
  1042. AVVulkanDeviceContext *hwctx = ctx->hwctx;
  1043. VulkanDevicePriv *p = ctx->internal->priv;
  1044. for (enum AVPixelFormat i = 0; i < AV_PIX_FMT_NB; i++)
  1045. count += pixfmt_is_supported(hwctx, i, p->use_linear_images);
  1046. #if CONFIG_CUDA
  1047. if (p->dev_is_nvidia)
  1048. count++;
  1049. #endif
  1050. constraints->valid_sw_formats = av_malloc_array(count + 1,
  1051. sizeof(enum AVPixelFormat));
  1052. if (!constraints->valid_sw_formats)
  1053. return AVERROR(ENOMEM);
  1054. count = 0;
  1055. for (enum AVPixelFormat i = 0; i < AV_PIX_FMT_NB; i++)
  1056. if (pixfmt_is_supported(hwctx, i, p->use_linear_images))
  1057. constraints->valid_sw_formats[count++] = i;
  1058. #if CONFIG_CUDA
  1059. if (p->dev_is_nvidia)
  1060. constraints->valid_sw_formats[count++] = AV_PIX_FMT_CUDA;
  1061. #endif
  1062. constraints->valid_sw_formats[count++] = AV_PIX_FMT_NONE;
  1063. constraints->min_width = 0;
  1064. constraints->min_height = 0;
  1065. constraints->max_width = p->props.properties.limits.maxImageDimension2D;
  1066. constraints->max_height = p->props.properties.limits.maxImageDimension2D;
  1067. constraints->valid_hw_formats = av_malloc_array(2, sizeof(enum AVPixelFormat));
  1068. if (!constraints->valid_hw_formats)
  1069. return AVERROR(ENOMEM);
  1070. constraints->valid_hw_formats[0] = AV_PIX_FMT_VULKAN;
  1071. constraints->valid_hw_formats[1] = AV_PIX_FMT_NONE;
  1072. return 0;
  1073. }
  1074. static int alloc_mem(AVHWDeviceContext *ctx, VkMemoryRequirements *req,
  1075. VkMemoryPropertyFlagBits req_flags, const void *alloc_extension,
  1076. VkMemoryPropertyFlagBits *mem_flags, VkDeviceMemory *mem)
  1077. {
  1078. VkResult ret;
  1079. int index = -1;
  1080. VulkanDevicePriv *p = ctx->internal->priv;
  1081. AVVulkanDeviceContext *dev_hwctx = ctx->hwctx;
  1082. VkMemoryAllocateInfo alloc_info = {
  1083. .sType = VK_STRUCTURE_TYPE_MEMORY_ALLOCATE_INFO,
  1084. .pNext = alloc_extension,
  1085. .allocationSize = req->size,
  1086. };
  1087. /* The vulkan spec requires memory types to be sorted in the "optimal"
  1088. * order, so the first matching type we find will be the best/fastest one */
  1089. for (int i = 0; i < p->mprops.memoryTypeCount; i++) {
  1090. const VkMemoryType *type = &p->mprops.memoryTypes[i];
  1091. /* The memory type must be supported by the requirements (bitfield) */
  1092. if (!(req->memoryTypeBits & (1 << i)))
  1093. continue;
  1094. /* The memory type flags must include our properties */
  1095. if ((type->propertyFlags & req_flags) != req_flags)
  1096. continue;
  1097. /* The memory type must be large enough */
  1098. if (req->size > p->mprops.memoryHeaps[type->heapIndex].size)
  1099. continue;
  1100. /* Found a suitable memory type */
  1101. index = i;
  1102. break;
  1103. }
  1104. if (index < 0) {
  1105. av_log(ctx, AV_LOG_ERROR, "No memory type found for flags 0x%x\n",
  1106. req_flags);
  1107. return AVERROR(EINVAL);
  1108. }
  1109. alloc_info.memoryTypeIndex = index;
  1110. ret = vkAllocateMemory(dev_hwctx->act_dev, &alloc_info,
  1111. dev_hwctx->alloc, mem);
  1112. if (ret != VK_SUCCESS) {
  1113. av_log(ctx, AV_LOG_ERROR, "Failed to allocate memory: %s\n",
  1114. vk_ret2str(ret));
  1115. return AVERROR(ENOMEM);
  1116. }
  1117. *mem_flags |= p->mprops.memoryTypes[index].propertyFlags;
  1118. return 0;
  1119. }
  1120. static void vulkan_free_internal(AVVkFrameInternal *internal)
  1121. {
  1122. if (!internal)
  1123. return;
  1124. #if CONFIG_CUDA
  1125. if (internal->cuda_fc_ref) {
  1126. AVHWFramesContext *cuda_fc = (AVHWFramesContext *)internal->cuda_fc_ref->data;
  1127. int planes = av_pix_fmt_count_planes(cuda_fc->sw_format);
  1128. AVHWDeviceContext *cuda_cu = cuda_fc->device_ctx;
  1129. AVCUDADeviceContext *cuda_dev = cuda_cu->hwctx;
  1130. AVCUDADeviceContextInternal *cu_internal = cuda_dev->internal;
  1131. CudaFunctions *cu = cu_internal->cuda_dl;
  1132. for (int i = 0; i < planes; i++) {
  1133. if (internal->cu_sem[i])
  1134. CHECK_CU(cu->cuDestroyExternalSemaphore(internal->cu_sem[i]));
  1135. if (internal->cu_mma[i])
  1136. CHECK_CU(cu->cuMipmappedArrayDestroy(internal->cu_mma[i]));
  1137. if (internal->ext_mem[i])
  1138. CHECK_CU(cu->cuDestroyExternalMemory(internal->ext_mem[i]));
  1139. }
  1140. av_buffer_unref(&internal->cuda_fc_ref);
  1141. }
  1142. #endif
  1143. av_free(internal);
  1144. }
  1145. static void vulkan_frame_free(void *opaque, uint8_t *data)
  1146. {
  1147. AVVkFrame *f = (AVVkFrame *)data;
  1148. AVHWFramesContext *hwfc = opaque;
  1149. AVVulkanDeviceContext *hwctx = hwfc->device_ctx->hwctx;
  1150. int planes = av_pix_fmt_count_planes(hwfc->sw_format);
  1151. vulkan_free_internal(f->internal);
  1152. for (int i = 0; i < planes; i++) {
  1153. vkDestroyImage(hwctx->act_dev, f->img[i], hwctx->alloc);
  1154. vkFreeMemory(hwctx->act_dev, f->mem[i], hwctx->alloc);
  1155. vkDestroySemaphore(hwctx->act_dev, f->sem[i], hwctx->alloc);
  1156. }
  1157. av_free(f);
  1158. }
  1159. static int alloc_bind_mem(AVHWFramesContext *hwfc, AVVkFrame *f,
  1160. void *alloc_pnext, size_t alloc_pnext_stride)
  1161. {
  1162. int err;
  1163. VkResult ret;
  1164. AVHWDeviceContext *ctx = hwfc->device_ctx;
  1165. VulkanDevicePriv *p = ctx->internal->priv;
  1166. const int planes = av_pix_fmt_count_planes(hwfc->sw_format);
  1167. VkBindImageMemoryInfo bind_info[AV_NUM_DATA_POINTERS] = { { 0 } };
  1168. AVVulkanDeviceContext *hwctx = ctx->hwctx;
  1169. for (int i = 0; i < planes; i++) {
  1170. int use_ded_mem;
  1171. VkImageMemoryRequirementsInfo2 req_desc = {
  1172. .sType = VK_STRUCTURE_TYPE_IMAGE_MEMORY_REQUIREMENTS_INFO_2,
  1173. .image = f->img[i],
  1174. };
  1175. VkMemoryDedicatedAllocateInfo ded_alloc = {
  1176. .sType = VK_STRUCTURE_TYPE_MEMORY_DEDICATED_ALLOCATE_INFO,
  1177. .pNext = (void *)(((uint8_t *)alloc_pnext) + i*alloc_pnext_stride),
  1178. };
  1179. VkMemoryDedicatedRequirements ded_req = {
  1180. .sType = VK_STRUCTURE_TYPE_MEMORY_DEDICATED_REQUIREMENTS,
  1181. };
  1182. VkMemoryRequirements2 req = {
  1183. .sType = VK_STRUCTURE_TYPE_MEMORY_REQUIREMENTS_2,
  1184. .pNext = &ded_req,
  1185. };
  1186. vkGetImageMemoryRequirements2(hwctx->act_dev, &req_desc, &req);
  1187. if (f->tiling == VK_IMAGE_TILING_LINEAR)
  1188. req.memoryRequirements.size = FFALIGN(req.memoryRequirements.size,
  1189. p->props.properties.limits.minMemoryMapAlignment);
  1190. /* In case the implementation prefers/requires dedicated allocation */
  1191. use_ded_mem = ded_req.prefersDedicatedAllocation |
  1192. ded_req.requiresDedicatedAllocation;
  1193. if (use_ded_mem)
  1194. ded_alloc.image = f->img[i];
  1195. /* Allocate memory */
  1196. if ((err = alloc_mem(ctx, &req.memoryRequirements,
  1197. f->tiling == VK_IMAGE_TILING_LINEAR ?
  1198. VK_MEMORY_PROPERTY_HOST_VISIBLE_BIT :
  1199. VK_MEMORY_PROPERTY_DEVICE_LOCAL_BIT,
  1200. use_ded_mem ? &ded_alloc : (void *)ded_alloc.pNext,
  1201. &f->flags, &f->mem[i])))
  1202. return err;
  1203. f->size[i] = req.memoryRequirements.size;
  1204. bind_info[i].sType = VK_STRUCTURE_TYPE_BIND_IMAGE_MEMORY_INFO;
  1205. bind_info[i].image = f->img[i];
  1206. bind_info[i].memory = f->mem[i];
  1207. }
  1208. /* Bind the allocated memory to the images */
  1209. ret = vkBindImageMemory2(hwctx->act_dev, planes, bind_info);
  1210. if (ret != VK_SUCCESS) {
  1211. av_log(ctx, AV_LOG_ERROR, "Failed to bind memory: %s\n",
  1212. vk_ret2str(ret));
  1213. return AVERROR_EXTERNAL;
  1214. }
  1215. return 0;
  1216. }
  1217. enum PrepMode {
  1218. PREP_MODE_WRITE,
  1219. PREP_MODE_RO_SHADER,
  1220. PREP_MODE_EXTERNAL_EXPORT,
  1221. };
  1222. static int prepare_frame(AVHWFramesContext *hwfc, VulkanExecCtx *ectx,
  1223. AVVkFrame *frame, enum PrepMode pmode)
  1224. {
  1225. int err;
  1226. uint32_t dst_qf;
  1227. VkImageLayout new_layout;
  1228. VkAccessFlags new_access;
  1229. const int planes = av_pix_fmt_count_planes(hwfc->sw_format);
  1230. VkImageMemoryBarrier img_bar[AV_NUM_DATA_POINTERS] = { 0 };
  1231. VkSubmitInfo s_info = {
  1232. .sType = VK_STRUCTURE_TYPE_SUBMIT_INFO,
  1233. .pSignalSemaphores = frame->sem,
  1234. .signalSemaphoreCount = planes,
  1235. };
  1236. VkPipelineStageFlagBits wait_st[AV_NUM_DATA_POINTERS];
  1237. for (int i = 0; i < planes; i++)
  1238. wait_st[i] = VK_PIPELINE_STAGE_TOP_OF_PIPE_BIT;
  1239. switch (pmode) {
  1240. case PREP_MODE_WRITE:
  1241. new_layout = VK_IMAGE_LAYOUT_TRANSFER_DST_OPTIMAL;
  1242. new_access = VK_ACCESS_TRANSFER_WRITE_BIT;
  1243. dst_qf = VK_QUEUE_FAMILY_IGNORED;
  1244. break;
  1245. case PREP_MODE_RO_SHADER:
  1246. new_layout = VK_IMAGE_LAYOUT_TRANSFER_SRC_OPTIMAL;
  1247. new_access = VK_ACCESS_TRANSFER_READ_BIT;
  1248. dst_qf = VK_QUEUE_FAMILY_IGNORED;
  1249. break;
  1250. case PREP_MODE_EXTERNAL_EXPORT:
  1251. new_layout = VK_IMAGE_LAYOUT_GENERAL;
  1252. new_access = VK_ACCESS_MEMORY_READ_BIT | VK_ACCESS_MEMORY_WRITE_BIT;
  1253. dst_qf = VK_QUEUE_FAMILY_EXTERNAL_KHR;
  1254. s_info.pWaitSemaphores = frame->sem;
  1255. s_info.pWaitDstStageMask = wait_st;
  1256. s_info.waitSemaphoreCount = planes;
  1257. break;
  1258. }
  1259. if ((err = wait_start_exec_ctx(hwfc, ectx)))
  1260. return err;
  1261. /* Change the image layout to something more optimal for writes.
  1262. * This also signals the newly created semaphore, making it usable
  1263. * for synchronization */
  1264. for (int i = 0; i < planes; i++) {
  1265. img_bar[i].sType = VK_STRUCTURE_TYPE_IMAGE_MEMORY_BARRIER;
  1266. img_bar[i].srcAccessMask = 0x0;
  1267. img_bar[i].dstAccessMask = new_access;
  1268. img_bar[i].oldLayout = frame->layout[i];
  1269. img_bar[i].newLayout = new_layout;
  1270. img_bar[i].srcQueueFamilyIndex = VK_QUEUE_FAMILY_IGNORED;
  1271. img_bar[i].dstQueueFamilyIndex = dst_qf;
  1272. img_bar[i].image = frame->img[i];
  1273. img_bar[i].subresourceRange.levelCount = 1;
  1274. img_bar[i].subresourceRange.layerCount = 1;
  1275. img_bar[i].subresourceRange.aspectMask = VK_IMAGE_ASPECT_COLOR_BIT;
  1276. frame->layout[i] = img_bar[i].newLayout;
  1277. frame->access[i] = img_bar[i].dstAccessMask;
  1278. }
  1279. vkCmdPipelineBarrier(get_buf_exec_ctx(hwfc, ectx),
  1280. VK_PIPELINE_STAGE_TOP_OF_PIPE_BIT,
  1281. VK_PIPELINE_STAGE_TRANSFER_BIT,
  1282. 0, 0, NULL, 0, NULL, planes, img_bar);
  1283. return submit_exec_ctx(hwfc, ectx, &s_info, 0);
  1284. }
  1285. static inline void get_plane_wh(int *w, int *h, enum AVPixelFormat format,
  1286. int frame_w, int frame_h, int plane)
  1287. {
  1288. const AVPixFmtDescriptor *desc = av_pix_fmt_desc_get(format);
  1289. /* Currently always true unless gray + alpha support is added */
  1290. if (!plane || (plane == 3) || desc->flags & AV_PIX_FMT_FLAG_RGB ||
  1291. !(desc->flags & AV_PIX_FMT_FLAG_PLANAR)) {
  1292. *w = frame_w;
  1293. *h = frame_h;
  1294. return;
  1295. }
  1296. *w = AV_CEIL_RSHIFT(frame_w, desc->log2_chroma_w);
  1297. *h = AV_CEIL_RSHIFT(frame_h, desc->log2_chroma_h);
  1298. }
  1299. static int create_frame(AVHWFramesContext *hwfc, AVVkFrame **frame,
  1300. VkImageTiling tiling, VkImageUsageFlagBits usage,
  1301. void *create_pnext)
  1302. {
  1303. int err;
  1304. VkResult ret;
  1305. AVHWDeviceContext *ctx = hwfc->device_ctx;
  1306. VulkanDevicePriv *p = ctx->internal->priv;
  1307. AVVulkanDeviceContext *hwctx = ctx->hwctx;
  1308. enum AVPixelFormat format = hwfc->sw_format;
  1309. const VkFormat *img_fmts = av_vkfmt_from_pixfmt(format);
  1310. const int planes = av_pix_fmt_count_planes(format);
  1311. VkExportSemaphoreCreateInfo ext_sem_info = {
  1312. .sType = VK_STRUCTURE_TYPE_EXPORT_SEMAPHORE_CREATE_INFO,
  1313. .handleTypes = VK_EXTERNAL_SEMAPHORE_HANDLE_TYPE_OPAQUE_FD_BIT,
  1314. };
  1315. VkSemaphoreCreateInfo sem_spawn = {
  1316. .sType = VK_STRUCTURE_TYPE_SEMAPHORE_CREATE_INFO,
  1317. .pNext = p->extensions & EXT_EXTERNAL_FD_SEM ? &ext_sem_info : NULL,
  1318. };
  1319. AVVkFrame *f = av_vk_frame_alloc();
  1320. if (!f) {
  1321. av_log(ctx, AV_LOG_ERROR, "Unable to allocate memory for AVVkFrame!\n");
  1322. return AVERROR(ENOMEM);
  1323. }
  1324. /* Create the images */
  1325. for (int i = 0; i < planes; i++) {
  1326. VkImageCreateInfo create_info = {
  1327. .sType = VK_STRUCTURE_TYPE_IMAGE_CREATE_INFO,
  1328. .pNext = create_pnext,
  1329. .imageType = VK_IMAGE_TYPE_2D,
  1330. .format = img_fmts[i],
  1331. .extent.depth = 1,
  1332. .mipLevels = 1,
  1333. .arrayLayers = 1,
  1334. .flags = VK_IMAGE_CREATE_ALIAS_BIT,
  1335. .tiling = tiling,
  1336. .initialLayout = VK_IMAGE_LAYOUT_UNDEFINED,
  1337. .usage = usage,
  1338. .samples = VK_SAMPLE_COUNT_1_BIT,
  1339. .pQueueFamilyIndices = p->qfs,
  1340. .queueFamilyIndexCount = p->num_qfs,
  1341. .sharingMode = p->num_qfs > 1 ? VK_SHARING_MODE_CONCURRENT :
  1342. VK_SHARING_MODE_EXCLUSIVE,
  1343. };
  1344. get_plane_wh(&create_info.extent.width, &create_info.extent.height,
  1345. format, hwfc->width, hwfc->height, i);
  1346. ret = vkCreateImage(hwctx->act_dev, &create_info,
  1347. hwctx->alloc, &f->img[i]);
  1348. if (ret != VK_SUCCESS) {
  1349. av_log(ctx, AV_LOG_ERROR, "Image creation failure: %s\n",
  1350. vk_ret2str(ret));
  1351. err = AVERROR(EINVAL);
  1352. goto fail;
  1353. }
  1354. /* Create semaphore */
  1355. ret = vkCreateSemaphore(hwctx->act_dev, &sem_spawn,
  1356. hwctx->alloc, &f->sem[i]);
  1357. if (ret != VK_SUCCESS) {
  1358. av_log(hwctx, AV_LOG_ERROR, "Failed to create semaphore: %s\n",
  1359. vk_ret2str(ret));
  1360. return AVERROR_EXTERNAL;
  1361. }
  1362. f->layout[i] = create_info.initialLayout;
  1363. f->access[i] = 0x0;
  1364. }
  1365. f->flags = 0x0;
  1366. f->tiling = tiling;
  1367. *frame = f;
  1368. return 0;
  1369. fail:
  1370. vulkan_frame_free(hwfc, (uint8_t *)f);
  1371. return err;
  1372. }
  1373. /* Checks if an export flag is enabled, and if it is ORs it with *iexp */
  1374. static void try_export_flags(AVHWFramesContext *hwfc,
  1375. VkExternalMemoryHandleTypeFlags *comp_handle_types,
  1376. VkExternalMemoryHandleTypeFlagBits *iexp,
  1377. VkExternalMemoryHandleTypeFlagBits exp)
  1378. {
  1379. VkResult ret;
  1380. AVVulkanFramesContext *hwctx = hwfc->hwctx;
  1381. AVVulkanDeviceContext *dev_hwctx = hwfc->device_ctx->hwctx;
  1382. VkExternalImageFormatProperties eprops = {
  1383. .sType = VK_STRUCTURE_TYPE_EXTERNAL_IMAGE_FORMAT_PROPERTIES_KHR,
  1384. };
  1385. VkImageFormatProperties2 props = {
  1386. .sType = VK_STRUCTURE_TYPE_IMAGE_FORMAT_PROPERTIES_2,
  1387. .pNext = &eprops,
  1388. };
  1389. VkPhysicalDeviceExternalImageFormatInfo enext = {
  1390. .sType = VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_EXTERNAL_IMAGE_FORMAT_INFO,
  1391. .handleType = exp,
  1392. };
  1393. VkPhysicalDeviceImageFormatInfo2 pinfo = {
  1394. .sType = VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_IMAGE_FORMAT_INFO_2,
  1395. .pNext = !exp ? NULL : &enext,
  1396. .format = av_vkfmt_from_pixfmt(hwfc->sw_format)[0],
  1397. .type = VK_IMAGE_TYPE_2D,
  1398. .tiling = hwctx->tiling,
  1399. .usage = hwctx->usage,
  1400. .flags = VK_IMAGE_CREATE_ALIAS_BIT,
  1401. };
  1402. ret = vkGetPhysicalDeviceImageFormatProperties2(dev_hwctx->phys_dev,
  1403. &pinfo, &props);
  1404. if (ret == VK_SUCCESS) {
  1405. *iexp |= exp;
  1406. *comp_handle_types |= eprops.externalMemoryProperties.compatibleHandleTypes;
  1407. }
  1408. }
  1409. static AVBufferRef *vulkan_pool_alloc(void *opaque, int size)
  1410. {
  1411. int err;
  1412. AVVkFrame *f;
  1413. AVBufferRef *avbuf = NULL;
  1414. AVHWFramesContext *hwfc = opaque;
  1415. AVVulkanFramesContext *hwctx = hwfc->hwctx;
  1416. VulkanDevicePriv *p = hwfc->device_ctx->internal->priv;
  1417. VulkanFramesPriv *fp = hwfc->internal->priv;
  1418. VkExportMemoryAllocateInfo eminfo[AV_NUM_DATA_POINTERS];
  1419. VkExternalMemoryHandleTypeFlags e = 0x0;
  1420. VkExternalMemoryImageCreateInfo eiinfo = {
  1421. .sType = VK_STRUCTURE_TYPE_EXTERNAL_MEMORY_IMAGE_CREATE_INFO,
  1422. .pNext = hwctx->create_pnext,
  1423. };
  1424. if (p->extensions & EXT_EXTERNAL_FD_MEMORY)
  1425. try_export_flags(hwfc, &eiinfo.handleTypes, &e,
  1426. VK_EXTERNAL_MEMORY_HANDLE_TYPE_OPAQUE_FD_BIT);
  1427. if (p->extensions & EXT_EXTERNAL_DMABUF_MEMORY)
  1428. try_export_flags(hwfc, &eiinfo.handleTypes, &e,
  1429. VK_EXTERNAL_MEMORY_HANDLE_TYPE_DMA_BUF_BIT_EXT);
  1430. for (int i = 0; i < av_pix_fmt_count_planes(hwfc->sw_format); i++) {
  1431. eminfo[i].sType = VK_STRUCTURE_TYPE_EXPORT_MEMORY_ALLOCATE_INFO;
  1432. eminfo[i].pNext = hwctx->alloc_pnext[i];
  1433. eminfo[i].handleTypes = e;
  1434. }
  1435. err = create_frame(hwfc, &f, hwctx->tiling, hwctx->usage,
  1436. eiinfo.handleTypes ? &eiinfo : NULL);
  1437. if (err)
  1438. return NULL;
  1439. err = alloc_bind_mem(hwfc, f, eminfo, sizeof(*eminfo));
  1440. if (err)
  1441. goto fail;
  1442. err = prepare_frame(hwfc, &fp->conv_ctx, f, PREP_MODE_WRITE);
  1443. if (err)
  1444. goto fail;
  1445. avbuf = av_buffer_create((uint8_t *)f, sizeof(AVVkFrame),
  1446. vulkan_frame_free, hwfc, 0);
  1447. if (!avbuf)
  1448. goto fail;
  1449. return avbuf;
  1450. fail:
  1451. vulkan_frame_free(hwfc, (uint8_t *)f);
  1452. return NULL;
  1453. }
  1454. static void vulkan_frames_uninit(AVHWFramesContext *hwfc)
  1455. {
  1456. VulkanFramesPriv *fp = hwfc->internal->priv;
  1457. free_exec_ctx(hwfc, &fp->conv_ctx);
  1458. free_exec_ctx(hwfc, &fp->upload_ctx);
  1459. free_exec_ctx(hwfc, &fp->download_ctx);
  1460. }
  1461. static int vulkan_frames_init(AVHWFramesContext *hwfc)
  1462. {
  1463. int err;
  1464. AVVkFrame *f;
  1465. AVVulkanFramesContext *hwctx = hwfc->hwctx;
  1466. VulkanFramesPriv *fp = hwfc->internal->priv;
  1467. AVVulkanDeviceContext *dev_hwctx = hwfc->device_ctx->hwctx;
  1468. VulkanDevicePriv *p = hwfc->device_ctx->internal->priv;
  1469. /* Default pool flags */
  1470. hwctx->tiling = hwctx->tiling ? hwctx->tiling : p->use_linear_images ?
  1471. VK_IMAGE_TILING_LINEAR : VK_IMAGE_TILING_OPTIMAL;
  1472. if (!hwctx->usage)
  1473. hwctx->usage = DEFAULT_USAGE_FLAGS;
  1474. err = create_exec_ctx(hwfc, &fp->conv_ctx,
  1475. dev_hwctx->queue_family_comp_index,
  1476. GET_QUEUE_COUNT(dev_hwctx, 0, 1, 0));
  1477. if (err)
  1478. return err;
  1479. err = create_exec_ctx(hwfc, &fp->upload_ctx,
  1480. dev_hwctx->queue_family_tx_index,
  1481. GET_QUEUE_COUNT(dev_hwctx, 0, 0, 1));
  1482. if (err)
  1483. return err;
  1484. err = create_exec_ctx(hwfc, &fp->download_ctx,
  1485. dev_hwctx->queue_family_tx_index, 1);
  1486. if (err)
  1487. return err;
  1488. /* Test to see if allocation will fail */
  1489. err = create_frame(hwfc, &f, hwctx->tiling, hwctx->usage,
  1490. hwctx->create_pnext);
  1491. if (err)
  1492. return err;
  1493. vulkan_frame_free(hwfc, (uint8_t *)f);
  1494. /* If user did not specify a pool, hwfc->pool will be set to the internal one
  1495. * in hwcontext.c just after this gets called */
  1496. if (!hwfc->pool) {
  1497. hwfc->internal->pool_internal = av_buffer_pool_init2(sizeof(AVVkFrame),
  1498. hwfc, vulkan_pool_alloc,
  1499. NULL);
  1500. if (!hwfc->internal->pool_internal)
  1501. return AVERROR(ENOMEM);
  1502. }
  1503. return 0;
  1504. }
  1505. static int vulkan_get_buffer(AVHWFramesContext *hwfc, AVFrame *frame)
  1506. {
  1507. frame->buf[0] = av_buffer_pool_get(hwfc->pool);
  1508. if (!frame->buf[0])
  1509. return AVERROR(ENOMEM);
  1510. frame->data[0] = frame->buf[0]->data;
  1511. frame->format = AV_PIX_FMT_VULKAN;
  1512. frame->width = hwfc->width;
  1513. frame->height = hwfc->height;
  1514. return 0;
  1515. }
  1516. static int vulkan_transfer_get_formats(AVHWFramesContext *hwfc,
  1517. enum AVHWFrameTransferDirection dir,
  1518. enum AVPixelFormat **formats)
  1519. {
  1520. enum AVPixelFormat *fmts = av_malloc_array(2, sizeof(*fmts));
  1521. if (!fmts)
  1522. return AVERROR(ENOMEM);
  1523. fmts[0] = hwfc->sw_format;
  1524. fmts[1] = AV_PIX_FMT_NONE;
  1525. *formats = fmts;
  1526. return 0;
  1527. }
  1528. typedef struct VulkanMapping {
  1529. AVVkFrame *frame;
  1530. int flags;
  1531. } VulkanMapping;
  1532. static void vulkan_unmap_frame(AVHWFramesContext *hwfc, HWMapDescriptor *hwmap)
  1533. {
  1534. VulkanMapping *map = hwmap->priv;
  1535. AVVulkanDeviceContext *hwctx = hwfc->device_ctx->hwctx;
  1536. const int planes = av_pix_fmt_count_planes(hwfc->sw_format);
  1537. /* Check if buffer needs flushing */
  1538. if ((map->flags & AV_HWFRAME_MAP_WRITE) &&
  1539. !(map->frame->flags & VK_MEMORY_PROPERTY_HOST_COHERENT_BIT)) {
  1540. VkResult ret;
  1541. VkMappedMemoryRange flush_ranges[AV_NUM_DATA_POINTERS] = { { 0 } };
  1542. for (int i = 0; i < planes; i++) {
  1543. flush_ranges[i].sType = VK_STRUCTURE_TYPE_MAPPED_MEMORY_RANGE;
  1544. flush_ranges[i].memory = map->frame->mem[i];
  1545. flush_ranges[i].size = VK_WHOLE_SIZE;
  1546. }
  1547. ret = vkFlushMappedMemoryRanges(hwctx->act_dev, planes,
  1548. flush_ranges);
  1549. if (ret != VK_SUCCESS) {
  1550. av_log(hwfc, AV_LOG_ERROR, "Failed to flush memory: %s\n",
  1551. vk_ret2str(ret));
  1552. }
  1553. }
  1554. for (int i = 0; i < planes; i++)
  1555. vkUnmapMemory(hwctx->act_dev, map->frame->mem[i]);
  1556. av_free(map);
  1557. }
  1558. static int vulkan_map_frame_to_mem(AVHWFramesContext *hwfc, AVFrame *dst,
  1559. const AVFrame *src, int flags)
  1560. {
  1561. VkResult ret;
  1562. int err, mapped_mem_count = 0;
  1563. AVVkFrame *f = (AVVkFrame *)src->data[0];
  1564. AVVulkanDeviceContext *hwctx = hwfc->device_ctx->hwctx;
  1565. const int planes = av_pix_fmt_count_planes(hwfc->sw_format);
  1566. VulkanMapping *map = av_mallocz(sizeof(VulkanMapping));
  1567. if (!map)
  1568. return AVERROR(EINVAL);
  1569. if (src->format != AV_PIX_FMT_VULKAN) {
  1570. av_log(hwfc, AV_LOG_ERROR, "Cannot map from pixel format %s!\n",
  1571. av_get_pix_fmt_name(src->format));
  1572. err = AVERROR(EINVAL);
  1573. goto fail;
  1574. }
  1575. if (!(f->flags & VK_MEMORY_PROPERTY_HOST_VISIBLE_BIT) ||
  1576. !(f->tiling == VK_IMAGE_TILING_LINEAR)) {
  1577. av_log(hwfc, AV_LOG_ERROR, "Unable to map frame, not host visible "
  1578. "and linear!\n");
  1579. err = AVERROR(EINVAL);
  1580. goto fail;
  1581. }
  1582. dst->width = src->width;
  1583. dst->height = src->height;
  1584. for (int i = 0; i < planes; i++) {
  1585. ret = vkMapMemory(hwctx->act_dev, f->mem[i], 0,
  1586. VK_WHOLE_SIZE, 0, (void **)&dst->data[i]);
  1587. if (ret != VK_SUCCESS) {
  1588. av_log(hwfc, AV_LOG_ERROR, "Failed to map image memory: %s\n",
  1589. vk_ret2str(ret));
  1590. err = AVERROR_EXTERNAL;
  1591. goto fail;
  1592. }
  1593. mapped_mem_count++;
  1594. }
  1595. /* Check if the memory contents matter */
  1596. if (((flags & AV_HWFRAME_MAP_READ) || !(flags & AV_HWFRAME_MAP_OVERWRITE)) &&
  1597. !(f->flags & VK_MEMORY_PROPERTY_HOST_COHERENT_BIT)) {
  1598. VkMappedMemoryRange map_mem_ranges[AV_NUM_DATA_POINTERS] = { { 0 } };
  1599. for (int i = 0; i < planes; i++) {
  1600. map_mem_ranges[i].sType = VK_STRUCTURE_TYPE_MAPPED_MEMORY_RANGE;
  1601. map_mem_ranges[i].size = VK_WHOLE_SIZE;
  1602. map_mem_ranges[i].memory = f->mem[i];
  1603. }
  1604. ret = vkInvalidateMappedMemoryRanges(hwctx->act_dev, planes,
  1605. map_mem_ranges);
  1606. if (ret != VK_SUCCESS) {
  1607. av_log(hwfc, AV_LOG_ERROR, "Failed to invalidate memory: %s\n",
  1608. vk_ret2str(ret));
  1609. err = AVERROR_EXTERNAL;
  1610. goto fail;
  1611. }
  1612. }
  1613. for (int i = 0; i < planes; i++) {
  1614. VkImageSubresource sub = {
  1615. .aspectMask = VK_IMAGE_ASPECT_COLOR_BIT,
  1616. };
  1617. VkSubresourceLayout layout;
  1618. vkGetImageSubresourceLayout(hwctx->act_dev, f->img[i], &sub, &layout);
  1619. dst->linesize[i] = layout.rowPitch;
  1620. }
  1621. map->frame = f;
  1622. map->flags = flags;
  1623. err = ff_hwframe_map_create(src->hw_frames_ctx, dst, src,
  1624. &vulkan_unmap_frame, map);
  1625. if (err < 0)
  1626. goto fail;
  1627. return 0;
  1628. fail:
  1629. for (int i = 0; i < mapped_mem_count; i++)
  1630. vkUnmapMemory(hwctx->act_dev, f->mem[i]);
  1631. av_free(map);
  1632. return err;
  1633. }
  1634. #if CONFIG_LIBDRM
  1635. static void vulkan_unmap_from(AVHWFramesContext *hwfc, HWMapDescriptor *hwmap)
  1636. {
  1637. VulkanMapping *map = hwmap->priv;
  1638. AVVulkanDeviceContext *hwctx = hwfc->device_ctx->hwctx;
  1639. const int planes = av_pix_fmt_count_planes(hwfc->sw_format);
  1640. for (int i = 0; i < planes; i++) {
  1641. vkDestroyImage(hwctx->act_dev, map->frame->img[i], hwctx->alloc);
  1642. vkFreeMemory(hwctx->act_dev, map->frame->mem[i], hwctx->alloc);
  1643. vkDestroySemaphore(hwctx->act_dev, map->frame->sem[i], hwctx->alloc);
  1644. }
  1645. av_freep(&map->frame);
  1646. }
  1647. static const struct {
  1648. uint32_t drm_fourcc;
  1649. VkFormat vk_format;
  1650. } vulkan_drm_format_map[] = {
  1651. { DRM_FORMAT_R8, VK_FORMAT_R8_UNORM },
  1652. { DRM_FORMAT_R16, VK_FORMAT_R16_UNORM },
  1653. { DRM_FORMAT_GR88, VK_FORMAT_R8G8_UNORM },
  1654. { DRM_FORMAT_RG88, VK_FORMAT_R8G8_UNORM },
  1655. { DRM_FORMAT_GR1616, VK_FORMAT_R16G16_UNORM },
  1656. { DRM_FORMAT_RG1616, VK_FORMAT_R16G16_UNORM },
  1657. { DRM_FORMAT_ARGB8888, VK_FORMAT_B8G8R8A8_UNORM },
  1658. { DRM_FORMAT_XRGB8888, VK_FORMAT_B8G8R8A8_UNORM },
  1659. { DRM_FORMAT_ABGR8888, VK_FORMAT_R8G8B8A8_UNORM },
  1660. { DRM_FORMAT_XBGR8888, VK_FORMAT_R8G8B8A8_UNORM },
  1661. };
  1662. static inline VkFormat drm_to_vulkan_fmt(uint32_t drm_fourcc)
  1663. {
  1664. for (int i = 0; i < FF_ARRAY_ELEMS(vulkan_drm_format_map); i++)
  1665. if (vulkan_drm_format_map[i].drm_fourcc == drm_fourcc)
  1666. return vulkan_drm_format_map[i].vk_format;
  1667. return VK_FORMAT_UNDEFINED;
  1668. }
  1669. static int vulkan_map_from_drm_frame_desc(AVHWFramesContext *hwfc, AVVkFrame **frame,
  1670. AVDRMFrameDescriptor *desc)
  1671. {
  1672. int err = 0;
  1673. VkResult ret;
  1674. AVVkFrame *f;
  1675. int bind_counts = 0;
  1676. AVHWDeviceContext *ctx = hwfc->device_ctx;
  1677. AVVulkanDeviceContext *hwctx = ctx->hwctx;
  1678. VulkanDevicePriv *p = ctx->internal->priv;
  1679. VulkanFramesPriv *fp = hwfc->internal->priv;
  1680. AVVulkanFramesContext *frames_hwctx = hwfc->hwctx;
  1681. const int has_modifiers = !!(p->extensions & EXT_DRM_MODIFIER_FLAGS);
  1682. VkSubresourceLayout plane_data[AV_NUM_DATA_POINTERS] = { 0 };
  1683. VkBindImageMemoryInfo bind_info[AV_NUM_DATA_POINTERS] = { 0 };
  1684. VkBindImagePlaneMemoryInfo plane_info[AV_NUM_DATA_POINTERS] = { 0 };
  1685. VkExternalMemoryHandleTypeFlagBits htype = VK_EXTERNAL_MEMORY_HANDLE_TYPE_DMA_BUF_BIT_EXT;
  1686. VK_LOAD_PFN(hwctx->inst, vkGetMemoryFdPropertiesKHR);
  1687. for (int i = 0; i < desc->nb_layers; i++) {
  1688. if (drm_to_vulkan_fmt(desc->layers[i].format) == VK_FORMAT_UNDEFINED) {
  1689. av_log(ctx, AV_LOG_ERROR, "Unsupported DMABUF layer format %#08x!\n",
  1690. desc->layers[i].format);
  1691. return AVERROR(EINVAL);
  1692. }
  1693. }
  1694. if (!(f = av_vk_frame_alloc())) {
  1695. av_log(ctx, AV_LOG_ERROR, "Unable to allocate memory for AVVkFrame!\n");
  1696. err = AVERROR(ENOMEM);
  1697. goto fail;
  1698. }
  1699. f->tiling = has_modifiers ? VK_IMAGE_TILING_DRM_FORMAT_MODIFIER_EXT :
  1700. desc->objects[0].format_modifier == DRM_FORMAT_MOD_LINEAR ?
  1701. VK_IMAGE_TILING_LINEAR : VK_IMAGE_TILING_OPTIMAL;
  1702. for (int i = 0; i < desc->nb_layers; i++) {
  1703. const int planes = desc->layers[i].nb_planes;
  1704. VkImageDrmFormatModifierExplicitCreateInfoEXT drm_info = {
  1705. .sType = VK_STRUCTURE_TYPE_IMAGE_DRM_FORMAT_MODIFIER_EXPLICIT_CREATE_INFO_EXT,
  1706. .drmFormatModifier = desc->objects[0].format_modifier,
  1707. .drmFormatModifierPlaneCount = planes,
  1708. .pPlaneLayouts = (const VkSubresourceLayout *)&plane_data,
  1709. };
  1710. VkExternalMemoryImageCreateInfo einfo = {
  1711. .sType = VK_STRUCTURE_TYPE_EXTERNAL_MEMORY_IMAGE_CREATE_INFO,
  1712. .pNext = has_modifiers ? &drm_info : NULL,
  1713. .handleTypes = htype,
  1714. };
  1715. VkSemaphoreCreateInfo sem_spawn = {
  1716. .sType = VK_STRUCTURE_TYPE_SEMAPHORE_CREATE_INFO,
  1717. };
  1718. VkImageCreateInfo create_info = {
  1719. .sType = VK_STRUCTURE_TYPE_IMAGE_CREATE_INFO,
  1720. .pNext = &einfo,
  1721. .imageType = VK_IMAGE_TYPE_2D,
  1722. .format = drm_to_vulkan_fmt(desc->layers[i].format),
  1723. .extent.depth = 1,
  1724. .mipLevels = 1,
  1725. .arrayLayers = 1,
  1726. .flags = VK_IMAGE_CREATE_ALIAS_BIT,
  1727. .tiling = f->tiling,
  1728. .initialLayout = VK_IMAGE_LAYOUT_UNDEFINED, /* specs say so */
  1729. .usage = frames_hwctx->usage,
  1730. .samples = VK_SAMPLE_COUNT_1_BIT,
  1731. .pQueueFamilyIndices = p->qfs,
  1732. .queueFamilyIndexCount = p->num_qfs,
  1733. .sharingMode = p->num_qfs > 1 ? VK_SHARING_MODE_CONCURRENT :
  1734. VK_SHARING_MODE_EXCLUSIVE,
  1735. };
  1736. get_plane_wh(&create_info.extent.width, &create_info.extent.height,
  1737. hwfc->sw_format, hwfc->width, hwfc->height, i);
  1738. for (int j = 0; j < planes; j++) {
  1739. plane_data[j].offset = desc->layers[i].planes[j].offset;
  1740. plane_data[j].rowPitch = desc->layers[i].planes[j].pitch;
  1741. plane_data[j].size = 0; /* The specs say so for all 3 */
  1742. plane_data[j].arrayPitch = 0;
  1743. plane_data[j].depthPitch = 0;
  1744. }
  1745. /* Create image */
  1746. ret = vkCreateImage(hwctx->act_dev, &create_info,
  1747. hwctx->alloc, &f->img[i]);
  1748. if (ret != VK_SUCCESS) {
  1749. av_log(ctx, AV_LOG_ERROR, "Image creation failure: %s\n",
  1750. vk_ret2str(ret));
  1751. err = AVERROR(EINVAL);
  1752. goto fail;
  1753. }
  1754. ret = vkCreateSemaphore(hwctx->act_dev, &sem_spawn,
  1755. hwctx->alloc, &f->sem[i]);
  1756. if (ret != VK_SUCCESS) {
  1757. av_log(hwctx, AV_LOG_ERROR, "Failed to create semaphore: %s\n",
  1758. vk_ret2str(ret));
  1759. return AVERROR_EXTERNAL;
  1760. }
  1761. /* We'd import a semaphore onto the one we created using
  1762. * vkImportSemaphoreFdKHR but unfortunately neither DRM nor VAAPI
  1763. * offer us anything we could import and sync with, so instead
  1764. * just signal the semaphore we created. */
  1765. f->layout[i] = create_info.initialLayout;
  1766. f->access[i] = 0x0;
  1767. }
  1768. for (int i = 0; i < desc->nb_objects; i++) {
  1769. int use_ded_mem = 0;
  1770. VkMemoryFdPropertiesKHR fdmp = {
  1771. .sType = VK_STRUCTURE_TYPE_MEMORY_FD_PROPERTIES_KHR,
  1772. };
  1773. VkMemoryRequirements req = {
  1774. .size = desc->objects[i].size,
  1775. };
  1776. VkImportMemoryFdInfoKHR idesc = {
  1777. .sType = VK_STRUCTURE_TYPE_IMPORT_MEMORY_FD_INFO_KHR,
  1778. .handleType = htype,
  1779. .fd = dup(desc->objects[i].fd),
  1780. };
  1781. VkMemoryDedicatedAllocateInfo ded_alloc = {
  1782. .sType = VK_STRUCTURE_TYPE_MEMORY_DEDICATED_ALLOCATE_INFO,
  1783. .pNext = &idesc,
  1784. };
  1785. ret = pfn_vkGetMemoryFdPropertiesKHR(hwctx->act_dev, htype,
  1786. idesc.fd, &fdmp);
  1787. if (ret != VK_SUCCESS) {
  1788. av_log(hwfc, AV_LOG_ERROR, "Failed to get FD properties: %s\n",
  1789. vk_ret2str(ret));
  1790. err = AVERROR_EXTERNAL;
  1791. close(idesc.fd);
  1792. goto fail;
  1793. }
  1794. req.memoryTypeBits = fdmp.memoryTypeBits;
  1795. /* Dedicated allocation only makes sense if there's a one to one mapping
  1796. * between images and the memory backing them, so only check in this
  1797. * case. */
  1798. if (desc->nb_layers == desc->nb_objects) {
  1799. VkImageMemoryRequirementsInfo2 req_desc = {
  1800. .sType = VK_STRUCTURE_TYPE_IMAGE_MEMORY_REQUIREMENTS_INFO_2,
  1801. .image = f->img[i],
  1802. };
  1803. VkMemoryDedicatedRequirements ded_req = {
  1804. .sType = VK_STRUCTURE_TYPE_MEMORY_DEDICATED_REQUIREMENTS,
  1805. };
  1806. VkMemoryRequirements2 req2 = {
  1807. .sType = VK_STRUCTURE_TYPE_MEMORY_REQUIREMENTS_2,
  1808. .pNext = &ded_req,
  1809. };
  1810. vkGetImageMemoryRequirements2(hwctx->act_dev, &req_desc, &req2);
  1811. use_ded_mem = ded_req.prefersDedicatedAllocation |
  1812. ded_req.requiresDedicatedAllocation;
  1813. if (use_ded_mem)
  1814. ded_alloc.image = f->img[i];
  1815. }
  1816. err = alloc_mem(ctx, &req, VK_MEMORY_PROPERTY_DEVICE_LOCAL_BIT,
  1817. use_ded_mem ? &ded_alloc : ded_alloc.pNext,
  1818. &f->flags, &f->mem[i]);
  1819. if (err) {
  1820. close(idesc.fd);
  1821. return err;
  1822. }
  1823. f->size[i] = desc->objects[i].size;
  1824. }
  1825. for (int i = 0; i < desc->nb_layers; i++) {
  1826. const int planes = desc->layers[i].nb_planes;
  1827. const int signal_p = has_modifiers && (planes > 1);
  1828. for (int j = 0; j < planes; j++) {
  1829. VkImageAspectFlagBits aspect = j == 0 ? VK_IMAGE_ASPECT_MEMORY_PLANE_0_BIT_EXT :
  1830. j == 1 ? VK_IMAGE_ASPECT_MEMORY_PLANE_1_BIT_EXT :
  1831. VK_IMAGE_ASPECT_MEMORY_PLANE_2_BIT_EXT;
  1832. plane_info[bind_counts].sType = VK_STRUCTURE_TYPE_BIND_IMAGE_PLANE_MEMORY_INFO;
  1833. plane_info[bind_counts].planeAspect = aspect;
  1834. bind_info[bind_counts].sType = VK_STRUCTURE_TYPE_BIND_IMAGE_MEMORY_INFO;
  1835. bind_info[bind_counts].pNext = signal_p ? &plane_info[bind_counts] : NULL;
  1836. bind_info[bind_counts].image = f->img[i];
  1837. bind_info[bind_counts].memory = f->mem[desc->layers[i].planes[j].object_index];
  1838. bind_info[bind_counts].memoryOffset = desc->layers[i].planes[j].offset;
  1839. bind_counts++;
  1840. }
  1841. }
  1842. /* Bind the allocated memory to the images */
  1843. ret = vkBindImageMemory2(hwctx->act_dev, bind_counts, bind_info);
  1844. if (ret != VK_SUCCESS) {
  1845. av_log(ctx, AV_LOG_ERROR, "Failed to bind memory: %s\n",
  1846. vk_ret2str(ret));
  1847. return AVERROR_EXTERNAL;
  1848. }
  1849. /* NOTE: This is completely uneccesary and unneeded once we can import
  1850. * semaphores from DRM. Otherwise we have to activate the semaphores.
  1851. * We're reusing the exec context that's also used for uploads/downloads. */
  1852. err = prepare_frame(hwfc, &fp->conv_ctx, f, PREP_MODE_RO_SHADER);
  1853. if (err)
  1854. goto fail;
  1855. *frame = f;
  1856. return 0;
  1857. fail:
  1858. for (int i = 0; i < desc->nb_layers; i++) {
  1859. vkDestroyImage(hwctx->act_dev, f->img[i], hwctx->alloc);
  1860. vkDestroySemaphore(hwctx->act_dev, f->sem[i], hwctx->alloc);
  1861. }
  1862. for (int i = 0; i < desc->nb_objects; i++)
  1863. vkFreeMemory(hwctx->act_dev, f->mem[i], hwctx->alloc);
  1864. av_free(f);
  1865. return err;
  1866. }
  1867. static int vulkan_map_from_drm(AVHWFramesContext *hwfc, AVFrame *dst,
  1868. const AVFrame *src, int flags)
  1869. {
  1870. int err = 0;
  1871. AVVkFrame *f;
  1872. VulkanMapping *map = NULL;
  1873. err = vulkan_map_from_drm_frame_desc(hwfc, &f,
  1874. (AVDRMFrameDescriptor *)src->data[0]);
  1875. if (err)
  1876. return err;
  1877. /* The unmapping function will free this */
  1878. dst->data[0] = (uint8_t *)f;
  1879. dst->width = src->width;
  1880. dst->height = src->height;
  1881. map = av_mallocz(sizeof(VulkanMapping));
  1882. if (!map)
  1883. goto fail;
  1884. map->frame = f;
  1885. map->flags = flags;
  1886. err = ff_hwframe_map_create(dst->hw_frames_ctx, dst, src,
  1887. &vulkan_unmap_from, map);
  1888. if (err < 0)
  1889. goto fail;
  1890. av_log(hwfc, AV_LOG_DEBUG, "Mapped DRM object to Vulkan!\n");
  1891. return 0;
  1892. fail:
  1893. vulkan_frame_free(hwfc->device_ctx->hwctx, (uint8_t *)f);
  1894. av_free(map);
  1895. return err;
  1896. }
  1897. #if CONFIG_VAAPI
  1898. static int vulkan_map_from_vaapi(AVHWFramesContext *dst_fc,
  1899. AVFrame *dst, const AVFrame *src,
  1900. int flags)
  1901. {
  1902. int err;
  1903. AVFrame *tmp = av_frame_alloc();
  1904. AVHWFramesContext *vaapi_fc = (AVHWFramesContext*)src->hw_frames_ctx->data;
  1905. AVVAAPIDeviceContext *vaapi_ctx = vaapi_fc->device_ctx->hwctx;
  1906. VASurfaceID surface_id = (VASurfaceID)(uintptr_t)src->data[3];
  1907. if (!tmp)
  1908. return AVERROR(ENOMEM);
  1909. /* We have to sync since like the previous comment said, no semaphores */
  1910. vaSyncSurface(vaapi_ctx->display, surface_id);
  1911. tmp->format = AV_PIX_FMT_DRM_PRIME;
  1912. err = av_hwframe_map(tmp, src, flags);
  1913. if (err < 0)
  1914. goto fail;
  1915. err = vulkan_map_from_drm(dst_fc, dst, tmp, flags);
  1916. if (err < 0)
  1917. goto fail;
  1918. err = ff_hwframe_map_replace(dst, src);
  1919. fail:
  1920. av_frame_free(&tmp);
  1921. return err;
  1922. }
  1923. #endif
  1924. #endif
  1925. #if CONFIG_CUDA
  1926. static int vulkan_export_to_cuda(AVHWFramesContext *hwfc,
  1927. AVBufferRef *cuda_hwfc,
  1928. const AVFrame *frame)
  1929. {
  1930. int err;
  1931. VkResult ret;
  1932. AVVkFrame *dst_f;
  1933. AVVkFrameInternal *dst_int;
  1934. AVHWDeviceContext *ctx = hwfc->device_ctx;
  1935. AVVulkanDeviceContext *hwctx = ctx->hwctx;
  1936. const int planes = av_pix_fmt_count_planes(hwfc->sw_format);
  1937. const AVPixFmtDescriptor *desc = av_pix_fmt_desc_get(hwfc->sw_format);
  1938. VK_LOAD_PFN(hwctx->inst, vkGetMemoryFdKHR);
  1939. VK_LOAD_PFN(hwctx->inst, vkGetSemaphoreFdKHR);
  1940. AVHWFramesContext *cuda_fc = (AVHWFramesContext*)cuda_hwfc->data;
  1941. AVHWDeviceContext *cuda_cu = cuda_fc->device_ctx;
  1942. AVCUDADeviceContext *cuda_dev = cuda_cu->hwctx;
  1943. AVCUDADeviceContextInternal *cu_internal = cuda_dev->internal;
  1944. CudaFunctions *cu = cu_internal->cuda_dl;
  1945. CUarray_format cufmt = desc->comp[0].depth > 8 ? CU_AD_FORMAT_UNSIGNED_INT16 :
  1946. CU_AD_FORMAT_UNSIGNED_INT8;
  1947. dst_f = (AVVkFrame *)frame->data[0];
  1948. dst_int = dst_f->internal;
  1949. if (!dst_int || !dst_int->cuda_fc_ref) {
  1950. if (!dst_f->internal)
  1951. dst_f->internal = dst_int = av_mallocz(sizeof(*dst_f->internal));
  1952. if (!dst_int) {
  1953. err = AVERROR(ENOMEM);
  1954. goto fail;
  1955. }
  1956. dst_int->cuda_fc_ref = av_buffer_ref(cuda_hwfc);
  1957. if (!dst_int->cuda_fc_ref) {
  1958. err = AVERROR(ENOMEM);
  1959. goto fail;
  1960. }
  1961. for (int i = 0; i < planes; i++) {
  1962. CUDA_EXTERNAL_MEMORY_MIPMAPPED_ARRAY_DESC tex_desc = {
  1963. .offset = 0,
  1964. .arrayDesc = {
  1965. .Depth = 0,
  1966. .Format = cufmt,
  1967. .NumChannels = 1 + ((planes == 2) && i),
  1968. .Flags = 0,
  1969. },
  1970. .numLevels = 1,
  1971. };
  1972. CUDA_EXTERNAL_MEMORY_HANDLE_DESC ext_desc = {
  1973. .type = CU_EXTERNAL_MEMORY_HANDLE_TYPE_OPAQUE_FD,
  1974. .size = dst_f->size[i],
  1975. };
  1976. VkMemoryGetFdInfoKHR export_info = {
  1977. .sType = VK_STRUCTURE_TYPE_MEMORY_GET_FD_INFO_KHR,
  1978. .memory = dst_f->mem[i],
  1979. .handleType = VK_EXTERNAL_MEMORY_HANDLE_TYPE_OPAQUE_FD_BIT_KHR,
  1980. };
  1981. VkSemaphoreGetFdInfoKHR sem_export = {
  1982. .sType = VK_STRUCTURE_TYPE_SEMAPHORE_GET_FD_INFO_KHR,
  1983. .semaphore = dst_f->sem[i],
  1984. .handleType = VK_EXTERNAL_SEMAPHORE_HANDLE_TYPE_OPAQUE_FD_BIT,
  1985. };
  1986. CUDA_EXTERNAL_SEMAPHORE_HANDLE_DESC ext_sem_desc = {
  1987. .type = CU_EXTERNAL_SEMAPHORE_HANDLE_TYPE_OPAQUE_FD,
  1988. };
  1989. int p_w, p_h;
  1990. get_plane_wh(&p_w, &p_h, hwfc->sw_format, hwfc->width, hwfc->height, i);
  1991. tex_desc.arrayDesc.Width = p_w;
  1992. tex_desc.arrayDesc.Height = p_h;
  1993. ret = pfn_vkGetMemoryFdKHR(hwctx->act_dev, &export_info,
  1994. &ext_desc.handle.fd);
  1995. if (ret != VK_SUCCESS) {
  1996. av_log(hwfc, AV_LOG_ERROR, "Unable to export the image as a FD!\n");
  1997. err = AVERROR_EXTERNAL;
  1998. goto fail;
  1999. }
  2000. ret = CHECK_CU(cu->cuImportExternalMemory(&dst_int->ext_mem[i], &ext_desc));
  2001. if (ret < 0) {
  2002. err = AVERROR_EXTERNAL;
  2003. goto fail;
  2004. }
  2005. ret = CHECK_CU(cu->cuExternalMemoryGetMappedMipmappedArray(&dst_int->cu_mma[i],
  2006. dst_int->ext_mem[i],
  2007. &tex_desc));
  2008. if (ret < 0) {
  2009. err = AVERROR_EXTERNAL;
  2010. goto fail;
  2011. }
  2012. ret = CHECK_CU(cu->cuMipmappedArrayGetLevel(&dst_int->cu_array[i],
  2013. dst_int->cu_mma[i], 0));
  2014. if (ret < 0) {
  2015. err = AVERROR_EXTERNAL;
  2016. goto fail;
  2017. }
  2018. ret = pfn_vkGetSemaphoreFdKHR(hwctx->act_dev, &sem_export,
  2019. &ext_sem_desc.handle.fd);
  2020. if (ret != VK_SUCCESS) {
  2021. av_log(ctx, AV_LOG_ERROR, "Failed to export semaphore: %s\n",
  2022. vk_ret2str(ret));
  2023. err = AVERROR_EXTERNAL;
  2024. goto fail;
  2025. }
  2026. ret = CHECK_CU(cu->cuImportExternalSemaphore(&dst_int->cu_sem[i],
  2027. &ext_sem_desc));
  2028. if (ret < 0) {
  2029. err = AVERROR_EXTERNAL;
  2030. goto fail;
  2031. }
  2032. }
  2033. }
  2034. return 0;
  2035. fail:
  2036. return err;
  2037. }
  2038. static int vulkan_transfer_data_from_cuda(AVHWFramesContext *hwfc,
  2039. AVFrame *dst, const AVFrame *src)
  2040. {
  2041. int err;
  2042. VkResult ret;
  2043. CUcontext dummy;
  2044. AVVkFrame *dst_f;
  2045. AVVkFrameInternal *dst_int;
  2046. const int planes = av_pix_fmt_count_planes(hwfc->sw_format);
  2047. const AVPixFmtDescriptor *desc = av_pix_fmt_desc_get(hwfc->sw_format);
  2048. AVHWFramesContext *cuda_fc = (AVHWFramesContext*)src->hw_frames_ctx->data;
  2049. AVHWDeviceContext *cuda_cu = cuda_fc->device_ctx;
  2050. AVCUDADeviceContext *cuda_dev = cuda_cu->hwctx;
  2051. AVCUDADeviceContextInternal *cu_internal = cuda_dev->internal;
  2052. CudaFunctions *cu = cu_internal->cuda_dl;
  2053. CUDA_EXTERNAL_SEMAPHORE_WAIT_PARAMS s_w_par[AV_NUM_DATA_POINTERS] = { 0 };
  2054. CUDA_EXTERNAL_SEMAPHORE_SIGNAL_PARAMS s_s_par[AV_NUM_DATA_POINTERS] = { 0 };
  2055. ret = CHECK_CU(cu->cuCtxPushCurrent(cuda_dev->cuda_ctx));
  2056. if (ret < 0)
  2057. return AVERROR_EXTERNAL;
  2058. dst_f = (AVVkFrame *)dst->data[0];
  2059. ret = vulkan_export_to_cuda(hwfc, src->hw_frames_ctx, dst);
  2060. if (ret < 0) {
  2061. CHECK_CU(cu->cuCtxPopCurrent(&dummy));
  2062. return ret;
  2063. }
  2064. dst_int = dst_f->internal;
  2065. ret = CHECK_CU(cu->cuWaitExternalSemaphoresAsync(dst_int->cu_sem, s_w_par,
  2066. planes, cuda_dev->stream));
  2067. if (ret < 0) {
  2068. err = AVERROR_EXTERNAL;
  2069. goto fail;
  2070. }
  2071. for (int i = 0; i < planes; i++) {
  2072. CUDA_MEMCPY2D cpy = {
  2073. .srcMemoryType = CU_MEMORYTYPE_DEVICE,
  2074. .srcDevice = (CUdeviceptr)src->data[i],
  2075. .srcPitch = src->linesize[i],
  2076. .srcY = 0,
  2077. .dstMemoryType = CU_MEMORYTYPE_ARRAY,
  2078. .dstArray = dst_int->cu_array[i],
  2079. };
  2080. int p_w, p_h;
  2081. get_plane_wh(&p_w, &p_h, hwfc->sw_format, hwfc->width, hwfc->height, i);
  2082. cpy.WidthInBytes = p_w * desc->comp[i].step;
  2083. cpy.Height = p_h;
  2084. ret = CHECK_CU(cu->cuMemcpy2DAsync(&cpy, cuda_dev->stream));
  2085. if (ret < 0) {
  2086. err = AVERROR_EXTERNAL;
  2087. goto fail;
  2088. }
  2089. }
  2090. ret = CHECK_CU(cu->cuSignalExternalSemaphoresAsync(dst_int->cu_sem, s_s_par,
  2091. planes, cuda_dev->stream));
  2092. if (ret < 0) {
  2093. err = AVERROR_EXTERNAL;
  2094. goto fail;
  2095. }
  2096. CHECK_CU(cu->cuCtxPopCurrent(&dummy));
  2097. av_log(hwfc, AV_LOG_VERBOSE, "Transfered CUDA image to Vulkan!\n");
  2098. return 0;
  2099. fail:
  2100. CHECK_CU(cu->cuCtxPopCurrent(&dummy));
  2101. vulkan_free_internal(dst_int);
  2102. dst_f->internal = NULL;
  2103. av_buffer_unref(&dst->buf[0]);
  2104. return err;
  2105. }
  2106. #endif
  2107. static int vulkan_map_to(AVHWFramesContext *hwfc, AVFrame *dst,
  2108. const AVFrame *src, int flags)
  2109. {
  2110. av_unused VulkanDevicePriv *p = hwfc->device_ctx->internal->priv;
  2111. switch (src->format) {
  2112. #if CONFIG_LIBDRM
  2113. #if CONFIG_VAAPI
  2114. case AV_PIX_FMT_VAAPI:
  2115. if (p->extensions & EXT_EXTERNAL_DMABUF_MEMORY)
  2116. return vulkan_map_from_vaapi(hwfc, dst, src, flags);
  2117. #endif
  2118. case AV_PIX_FMT_DRM_PRIME:
  2119. if (p->extensions & EXT_EXTERNAL_DMABUF_MEMORY)
  2120. return vulkan_map_from_drm(hwfc, dst, src, flags);
  2121. #endif
  2122. default:
  2123. return AVERROR(ENOSYS);
  2124. }
  2125. }
  2126. #if CONFIG_LIBDRM
  2127. typedef struct VulkanDRMMapping {
  2128. AVDRMFrameDescriptor drm_desc;
  2129. AVVkFrame *source;
  2130. } VulkanDRMMapping;
  2131. static void vulkan_unmap_to_drm(AVHWFramesContext *hwfc, HWMapDescriptor *hwmap)
  2132. {
  2133. AVDRMFrameDescriptor *drm_desc = hwmap->priv;
  2134. for (int i = 0; i < drm_desc->nb_objects; i++)
  2135. close(drm_desc->objects[i].fd);
  2136. av_free(drm_desc);
  2137. }
  2138. static inline uint32_t vulkan_fmt_to_drm(VkFormat vkfmt)
  2139. {
  2140. for (int i = 0; i < FF_ARRAY_ELEMS(vulkan_drm_format_map); i++)
  2141. if (vulkan_drm_format_map[i].vk_format == vkfmt)
  2142. return vulkan_drm_format_map[i].drm_fourcc;
  2143. return DRM_FORMAT_INVALID;
  2144. }
  2145. static int vulkan_map_to_drm(AVHWFramesContext *hwfc, AVFrame *dst,
  2146. const AVFrame *src, int flags)
  2147. {
  2148. int err = 0;
  2149. VkResult ret;
  2150. AVVkFrame *f = (AVVkFrame *)src->data[0];
  2151. VulkanDevicePriv *p = hwfc->device_ctx->internal->priv;
  2152. VulkanFramesPriv *fp = hwfc->internal->priv;
  2153. AVVulkanDeviceContext *hwctx = hwfc->device_ctx->hwctx;
  2154. const int planes = av_pix_fmt_count_planes(hwfc->sw_format);
  2155. VK_LOAD_PFN(hwctx->inst, vkGetMemoryFdKHR);
  2156. VkImageDrmFormatModifierPropertiesEXT drm_mod = {
  2157. .sType = VK_STRUCTURE_TYPE_IMAGE_DRM_FORMAT_MODIFIER_PROPERTIES_EXT,
  2158. };
  2159. AVDRMFrameDescriptor *drm_desc = av_mallocz(sizeof(*drm_desc));
  2160. if (!drm_desc)
  2161. return AVERROR(ENOMEM);
  2162. err = prepare_frame(hwfc, &fp->conv_ctx, f, PREP_MODE_EXTERNAL_EXPORT);
  2163. if (err < 0)
  2164. goto end;
  2165. err = ff_hwframe_map_create(src->hw_frames_ctx, dst, src, &vulkan_unmap_to_drm, drm_desc);
  2166. if (err < 0)
  2167. goto end;
  2168. if (p->extensions & EXT_DRM_MODIFIER_FLAGS) {
  2169. VK_LOAD_PFN(hwctx->inst, vkGetImageDrmFormatModifierPropertiesEXT);
  2170. ret = pfn_vkGetImageDrmFormatModifierPropertiesEXT(hwctx->act_dev, f->img[0],
  2171. &drm_mod);
  2172. if (ret != VK_SUCCESS) {
  2173. av_log(hwfc, AV_LOG_ERROR, "Failed to retrieve DRM format modifier!\n");
  2174. err = AVERROR_EXTERNAL;
  2175. goto end;
  2176. }
  2177. }
  2178. for (int i = 0; (i < planes) && (f->mem[i]); i++) {
  2179. VkMemoryGetFdInfoKHR export_info = {
  2180. .sType = VK_STRUCTURE_TYPE_MEMORY_GET_FD_INFO_KHR,
  2181. .memory = f->mem[i],
  2182. .handleType = VK_EXTERNAL_MEMORY_HANDLE_TYPE_DMA_BUF_BIT_EXT,
  2183. };
  2184. ret = pfn_vkGetMemoryFdKHR(hwctx->act_dev, &export_info,
  2185. &drm_desc->objects[i].fd);
  2186. if (ret != VK_SUCCESS) {
  2187. av_log(hwfc, AV_LOG_ERROR, "Unable to export the image as a FD!\n");
  2188. err = AVERROR_EXTERNAL;
  2189. goto end;
  2190. }
  2191. drm_desc->nb_objects++;
  2192. drm_desc->objects[i].size = f->size[i];
  2193. drm_desc->objects[i].format_modifier = drm_mod.drmFormatModifier;
  2194. }
  2195. drm_desc->nb_layers = planes;
  2196. for (int i = 0; i < drm_desc->nb_layers; i++) {
  2197. VkSubresourceLayout layout;
  2198. VkImageSubresource sub = {
  2199. .aspectMask = p->extensions & EXT_DRM_MODIFIER_FLAGS ?
  2200. VK_IMAGE_ASPECT_MEMORY_PLANE_0_BIT_EXT :
  2201. VK_IMAGE_ASPECT_COLOR_BIT,
  2202. };
  2203. VkFormat plane_vkfmt = av_vkfmt_from_pixfmt(hwfc->sw_format)[i];
  2204. drm_desc->layers[i].format = vulkan_fmt_to_drm(plane_vkfmt);
  2205. drm_desc->layers[i].nb_planes = 1;
  2206. if (drm_desc->layers[i].format == DRM_FORMAT_INVALID) {
  2207. av_log(hwfc, AV_LOG_ERROR, "Cannot map to DRM layer, unsupported!\n");
  2208. err = AVERROR_PATCHWELCOME;
  2209. goto end;
  2210. }
  2211. drm_desc->layers[i].planes[0].object_index = FFMIN(i, drm_desc->nb_objects - 1);
  2212. if (f->tiling == VK_IMAGE_TILING_OPTIMAL)
  2213. continue;
  2214. vkGetImageSubresourceLayout(hwctx->act_dev, f->img[i], &sub, &layout);
  2215. drm_desc->layers[i].planes[0].offset = layout.offset;
  2216. drm_desc->layers[i].planes[0].pitch = layout.rowPitch;
  2217. }
  2218. dst->width = src->width;
  2219. dst->height = src->height;
  2220. dst->data[0] = (uint8_t *)drm_desc;
  2221. av_log(hwfc, AV_LOG_VERBOSE, "Mapped AVVkFrame to a DRM object!\n");
  2222. return 0;
  2223. end:
  2224. av_free(drm_desc);
  2225. return err;
  2226. }
  2227. #if CONFIG_VAAPI
  2228. static int vulkan_map_to_vaapi(AVHWFramesContext *hwfc, AVFrame *dst,
  2229. const AVFrame *src, int flags)
  2230. {
  2231. int err;
  2232. AVFrame *tmp = av_frame_alloc();
  2233. if (!tmp)
  2234. return AVERROR(ENOMEM);
  2235. tmp->format = AV_PIX_FMT_DRM_PRIME;
  2236. err = vulkan_map_to_drm(hwfc, tmp, src, flags);
  2237. if (err < 0)
  2238. goto fail;
  2239. err = av_hwframe_map(dst, tmp, flags);
  2240. if (err < 0)
  2241. goto fail;
  2242. err = ff_hwframe_map_replace(dst, src);
  2243. fail:
  2244. av_frame_free(&tmp);
  2245. return err;
  2246. }
  2247. #endif
  2248. #endif
  2249. static int vulkan_map_from(AVHWFramesContext *hwfc, AVFrame *dst,
  2250. const AVFrame *src, int flags)
  2251. {
  2252. av_unused VulkanDevicePriv *p = hwfc->device_ctx->internal->priv;
  2253. switch (dst->format) {
  2254. #if CONFIG_LIBDRM
  2255. case AV_PIX_FMT_DRM_PRIME:
  2256. if (p->extensions & EXT_EXTERNAL_DMABUF_MEMORY)
  2257. return vulkan_map_to_drm(hwfc, dst, src, flags);
  2258. #if CONFIG_VAAPI
  2259. case AV_PIX_FMT_VAAPI:
  2260. if (p->extensions & EXT_EXTERNAL_DMABUF_MEMORY)
  2261. return vulkan_map_to_vaapi(hwfc, dst, src, flags);
  2262. #endif
  2263. #endif
  2264. default:
  2265. return vulkan_map_frame_to_mem(hwfc, dst, src, flags);
  2266. }
  2267. }
  2268. typedef struct ImageBuffer {
  2269. VkBuffer buf;
  2270. VkDeviceMemory mem;
  2271. VkMemoryPropertyFlagBits flags;
  2272. int mapped_mem;
  2273. } ImageBuffer;
  2274. static void free_buf(void *opaque, uint8_t *data)
  2275. {
  2276. AVHWDeviceContext *ctx = opaque;
  2277. AVVulkanDeviceContext *hwctx = ctx->hwctx;
  2278. ImageBuffer *vkbuf = (ImageBuffer *)data;
  2279. if (vkbuf->buf)
  2280. vkDestroyBuffer(hwctx->act_dev, vkbuf->buf, hwctx->alloc);
  2281. if (vkbuf->mem)
  2282. vkFreeMemory(hwctx->act_dev, vkbuf->mem, hwctx->alloc);
  2283. av_free(data);
  2284. }
  2285. static size_t get_req_buffer_size(VulkanDevicePriv *p, int *stride, int height)
  2286. {
  2287. size_t size;
  2288. *stride = FFALIGN(*stride, p->props.properties.limits.optimalBufferCopyRowPitchAlignment);
  2289. size = height*(*stride);
  2290. size = FFALIGN(size, p->props.properties.limits.minMemoryMapAlignment);
  2291. return size;
  2292. }
  2293. static int create_buf(AVHWDeviceContext *ctx, AVBufferRef **buf,
  2294. VkBufferUsageFlags usage, VkMemoryPropertyFlagBits flags,
  2295. size_t size, uint32_t req_memory_bits, int host_mapped,
  2296. void *create_pnext, void *alloc_pnext)
  2297. {
  2298. int err;
  2299. VkResult ret;
  2300. int use_ded_mem;
  2301. AVVulkanDeviceContext *hwctx = ctx->hwctx;
  2302. VkBufferCreateInfo buf_spawn = {
  2303. .sType = VK_STRUCTURE_TYPE_BUFFER_CREATE_INFO,
  2304. .pNext = create_pnext,
  2305. .usage = usage,
  2306. .size = size,
  2307. .sharingMode = VK_SHARING_MODE_EXCLUSIVE,
  2308. };
  2309. VkBufferMemoryRequirementsInfo2 req_desc = {
  2310. .sType = VK_STRUCTURE_TYPE_BUFFER_MEMORY_REQUIREMENTS_INFO_2,
  2311. };
  2312. VkMemoryDedicatedAllocateInfo ded_alloc = {
  2313. .sType = VK_STRUCTURE_TYPE_MEMORY_DEDICATED_ALLOCATE_INFO,
  2314. .pNext = alloc_pnext,
  2315. };
  2316. VkMemoryDedicatedRequirements ded_req = {
  2317. .sType = VK_STRUCTURE_TYPE_MEMORY_DEDICATED_REQUIREMENTS,
  2318. };
  2319. VkMemoryRequirements2 req = {
  2320. .sType = VK_STRUCTURE_TYPE_MEMORY_REQUIREMENTS_2,
  2321. .pNext = &ded_req,
  2322. };
  2323. ImageBuffer *vkbuf = av_mallocz(sizeof(*vkbuf));
  2324. if (!vkbuf)
  2325. return AVERROR(ENOMEM);
  2326. vkbuf->mapped_mem = host_mapped;
  2327. ret = vkCreateBuffer(hwctx->act_dev, &buf_spawn, NULL, &vkbuf->buf);
  2328. if (ret != VK_SUCCESS) {
  2329. av_log(ctx, AV_LOG_ERROR, "Failed to create buffer: %s\n",
  2330. vk_ret2str(ret));
  2331. err = AVERROR_EXTERNAL;
  2332. goto fail;
  2333. }
  2334. req_desc.buffer = vkbuf->buf;
  2335. vkGetBufferMemoryRequirements2(hwctx->act_dev, &req_desc, &req);
  2336. /* In case the implementation prefers/requires dedicated allocation */
  2337. use_ded_mem = ded_req.prefersDedicatedAllocation |
  2338. ded_req.requiresDedicatedAllocation;
  2339. if (use_ded_mem)
  2340. ded_alloc.buffer = vkbuf->buf;
  2341. /* Additional requirements imposed on us */
  2342. if (req_memory_bits)
  2343. req.memoryRequirements.memoryTypeBits &= req_memory_bits;
  2344. err = alloc_mem(ctx, &req.memoryRequirements, flags,
  2345. use_ded_mem ? &ded_alloc : (void *)ded_alloc.pNext,
  2346. &vkbuf->flags, &vkbuf->mem);
  2347. if (err)
  2348. goto fail;
  2349. ret = vkBindBufferMemory(hwctx->act_dev, vkbuf->buf, vkbuf->mem, 0);
  2350. if (ret != VK_SUCCESS) {
  2351. av_log(ctx, AV_LOG_ERROR, "Failed to bind memory to buffer: %s\n",
  2352. vk_ret2str(ret));
  2353. err = AVERROR_EXTERNAL;
  2354. goto fail;
  2355. }
  2356. *buf = av_buffer_create((uint8_t *)vkbuf, sizeof(*vkbuf), free_buf, ctx, 0);
  2357. if (!(*buf)) {
  2358. err = AVERROR(ENOMEM);
  2359. goto fail;
  2360. }
  2361. return 0;
  2362. fail:
  2363. free_buf(ctx, (uint8_t *)vkbuf);
  2364. return err;
  2365. }
  2366. /* Skips mapping of host mapped buffers but still invalidates them */
  2367. static int map_buffers(AVHWDeviceContext *ctx, AVBufferRef **bufs, uint8_t *mem[],
  2368. int nb_buffers, int invalidate)
  2369. {
  2370. VkResult ret;
  2371. AVVulkanDeviceContext *hwctx = ctx->hwctx;
  2372. VkMappedMemoryRange invalidate_ctx[AV_NUM_DATA_POINTERS];
  2373. int invalidate_count = 0;
  2374. for (int i = 0; i < nb_buffers; i++) {
  2375. ImageBuffer *vkbuf = (ImageBuffer *)bufs[i]->data;
  2376. if (vkbuf->mapped_mem)
  2377. continue;
  2378. ret = vkMapMemory(hwctx->act_dev, vkbuf->mem, 0,
  2379. VK_WHOLE_SIZE, 0, (void **)&mem[i]);
  2380. if (ret != VK_SUCCESS) {
  2381. av_log(ctx, AV_LOG_ERROR, "Failed to map buffer memory: %s\n",
  2382. vk_ret2str(ret));
  2383. return AVERROR_EXTERNAL;
  2384. }
  2385. }
  2386. if (!invalidate)
  2387. return 0;
  2388. for (int i = 0; i < nb_buffers; i++) {
  2389. ImageBuffer *vkbuf = (ImageBuffer *)bufs[i]->data;
  2390. const VkMappedMemoryRange ival_buf = {
  2391. .sType = VK_STRUCTURE_TYPE_MAPPED_MEMORY_RANGE,
  2392. .memory = vkbuf->mem,
  2393. .size = VK_WHOLE_SIZE,
  2394. };
  2395. /* For host imported memory Vulkan says to use platform-defined
  2396. * sync methods, but doesn't really say not to call flush or invalidate
  2397. * on original host pointers. It does explicitly allow to do that on
  2398. * host-mapped pointers which are then mapped again using vkMapMemory,
  2399. * but known implementations return the original pointers when mapped
  2400. * again. */
  2401. if (vkbuf->flags & VK_MEMORY_PROPERTY_HOST_COHERENT_BIT)
  2402. continue;
  2403. invalidate_ctx[invalidate_count++] = ival_buf;
  2404. }
  2405. if (invalidate_count) {
  2406. ret = vkInvalidateMappedMemoryRanges(hwctx->act_dev, invalidate_count,
  2407. invalidate_ctx);
  2408. if (ret != VK_SUCCESS)
  2409. av_log(ctx, AV_LOG_WARNING, "Failed to invalidate memory: %s\n",
  2410. vk_ret2str(ret));
  2411. }
  2412. return 0;
  2413. }
  2414. static int unmap_buffers(AVHWDeviceContext *ctx, AVBufferRef **bufs,
  2415. int nb_buffers, int flush)
  2416. {
  2417. int err = 0;
  2418. VkResult ret;
  2419. AVVulkanDeviceContext *hwctx = ctx->hwctx;
  2420. VkMappedMemoryRange flush_ctx[AV_NUM_DATA_POINTERS];
  2421. int flush_count = 0;
  2422. if (flush) {
  2423. for (int i = 0; i < nb_buffers; i++) {
  2424. ImageBuffer *vkbuf = (ImageBuffer *)bufs[i]->data;
  2425. const VkMappedMemoryRange flush_buf = {
  2426. .sType = VK_STRUCTURE_TYPE_MAPPED_MEMORY_RANGE,
  2427. .memory = vkbuf->mem,
  2428. .size = VK_WHOLE_SIZE,
  2429. };
  2430. if (vkbuf->flags & VK_MEMORY_PROPERTY_HOST_COHERENT_BIT)
  2431. continue;
  2432. flush_ctx[flush_count++] = flush_buf;
  2433. }
  2434. }
  2435. if (flush_count) {
  2436. ret = vkFlushMappedMemoryRanges(hwctx->act_dev, flush_count, flush_ctx);
  2437. if (ret != VK_SUCCESS) {
  2438. av_log(ctx, AV_LOG_ERROR, "Failed to flush memory: %s\n",
  2439. vk_ret2str(ret));
  2440. err = AVERROR_EXTERNAL; /* We still want to try to unmap them */
  2441. }
  2442. }
  2443. for (int i = 0; i < nb_buffers; i++) {
  2444. ImageBuffer *vkbuf = (ImageBuffer *)bufs[i]->data;
  2445. if (vkbuf->mapped_mem)
  2446. continue;
  2447. vkUnmapMemory(hwctx->act_dev, vkbuf->mem);
  2448. }
  2449. return err;
  2450. }
  2451. static int transfer_image_buf(AVHWFramesContext *hwfc, const AVFrame *f,
  2452. AVBufferRef **bufs, size_t *buf_offsets,
  2453. const int *buf_stride, int w,
  2454. int h, enum AVPixelFormat pix_fmt, int to_buf)
  2455. {
  2456. int err;
  2457. AVVkFrame *frame = (AVVkFrame *)f->data[0];
  2458. VulkanFramesPriv *fp = hwfc->internal->priv;
  2459. int bar_num = 0;
  2460. VkPipelineStageFlagBits sem_wait_dst[AV_NUM_DATA_POINTERS];
  2461. const int planes = av_pix_fmt_count_planes(pix_fmt);
  2462. const AVPixFmtDescriptor *desc = av_pix_fmt_desc_get(pix_fmt);
  2463. VkImageMemoryBarrier img_bar[AV_NUM_DATA_POINTERS] = { 0 };
  2464. VulkanExecCtx *ectx = to_buf ? &fp->download_ctx : &fp->upload_ctx;
  2465. VkCommandBuffer cmd_buf = get_buf_exec_ctx(hwfc, ectx);
  2466. VkSubmitInfo s_info = {
  2467. .sType = VK_STRUCTURE_TYPE_SUBMIT_INFO,
  2468. .pSignalSemaphores = frame->sem,
  2469. .pWaitSemaphores = frame->sem,
  2470. .pWaitDstStageMask = sem_wait_dst,
  2471. .signalSemaphoreCount = planes,
  2472. .waitSemaphoreCount = planes,
  2473. };
  2474. if ((err = wait_start_exec_ctx(hwfc, ectx)))
  2475. return err;
  2476. /* Change the image layout to something more optimal for transfers */
  2477. for (int i = 0; i < planes; i++) {
  2478. VkImageLayout new_layout = to_buf ? VK_IMAGE_LAYOUT_TRANSFER_SRC_OPTIMAL :
  2479. VK_IMAGE_LAYOUT_TRANSFER_DST_OPTIMAL;
  2480. VkAccessFlags new_access = to_buf ? VK_ACCESS_TRANSFER_READ_BIT :
  2481. VK_ACCESS_TRANSFER_WRITE_BIT;
  2482. sem_wait_dst[i] = VK_PIPELINE_STAGE_TOP_OF_PIPE_BIT;
  2483. /* If the layout matches and we have read access skip the barrier */
  2484. if ((frame->layout[i] == new_layout) && (frame->access[i] & new_access))
  2485. continue;
  2486. img_bar[bar_num].sType = VK_STRUCTURE_TYPE_IMAGE_MEMORY_BARRIER;
  2487. img_bar[bar_num].srcAccessMask = 0x0;
  2488. img_bar[bar_num].dstAccessMask = new_access;
  2489. img_bar[bar_num].oldLayout = frame->layout[i];
  2490. img_bar[bar_num].newLayout = new_layout;
  2491. img_bar[bar_num].srcQueueFamilyIndex = VK_QUEUE_FAMILY_IGNORED;
  2492. img_bar[bar_num].dstQueueFamilyIndex = VK_QUEUE_FAMILY_IGNORED;
  2493. img_bar[bar_num].image = frame->img[i];
  2494. img_bar[bar_num].subresourceRange.levelCount = 1;
  2495. img_bar[bar_num].subresourceRange.layerCount = 1;
  2496. img_bar[bar_num].subresourceRange.aspectMask = VK_IMAGE_ASPECT_COLOR_BIT;
  2497. frame->layout[i] = img_bar[bar_num].newLayout;
  2498. frame->access[i] = img_bar[bar_num].dstAccessMask;
  2499. bar_num++;
  2500. }
  2501. if (bar_num)
  2502. vkCmdPipelineBarrier(cmd_buf, VK_PIPELINE_STAGE_TOP_OF_PIPE_BIT,
  2503. VK_PIPELINE_STAGE_TRANSFER_BIT, 0,
  2504. 0, NULL, 0, NULL, bar_num, img_bar);
  2505. /* Schedule a copy for each plane */
  2506. for (int i = 0; i < planes; i++) {
  2507. ImageBuffer *vkbuf = (ImageBuffer *)bufs[i]->data;
  2508. VkBufferImageCopy buf_reg = {
  2509. .bufferOffset = buf_offsets[i],
  2510. .bufferRowLength = buf_stride[i] / desc->comp[i].step,
  2511. .imageSubresource.layerCount = 1,
  2512. .imageSubresource.aspectMask = VK_IMAGE_ASPECT_COLOR_BIT,
  2513. .imageOffset = { 0, 0, 0, },
  2514. };
  2515. int p_w, p_h;
  2516. get_plane_wh(&p_w, &p_h, pix_fmt, w, h, i);
  2517. buf_reg.bufferImageHeight = p_h;
  2518. buf_reg.imageExtent = (VkExtent3D){ p_w, p_h, 1, };
  2519. if (to_buf)
  2520. vkCmdCopyImageToBuffer(cmd_buf, frame->img[i], frame->layout[i],
  2521. vkbuf->buf, 1, &buf_reg);
  2522. else
  2523. vkCmdCopyBufferToImage(cmd_buf, vkbuf->buf, frame->img[i],
  2524. frame->layout[i], 1, &buf_reg);
  2525. }
  2526. /* When uploading, do this asynchronously if the source is refcounted by
  2527. * keeping the buffers as a submission dependency.
  2528. * The hwcontext is guaranteed to not be freed until all frames are freed
  2529. * in the frames_unint function.
  2530. * When downloading to buffer, do this synchronously and wait for the
  2531. * queue submission to finish executing */
  2532. if (!to_buf) {
  2533. int ref;
  2534. for (ref = 0; ref < AV_NUM_DATA_POINTERS; ref++) {
  2535. if (!f->buf[ref])
  2536. break;
  2537. if ((err = add_buf_dep_exec_ctx(hwfc, ectx, &f->buf[ref], 1)))
  2538. return err;
  2539. }
  2540. if (ref && (err = add_buf_dep_exec_ctx(hwfc, ectx, bufs, planes)))
  2541. return err;
  2542. return submit_exec_ctx(hwfc, ectx, &s_info, !ref);
  2543. } else {
  2544. return submit_exec_ctx(hwfc, ectx, &s_info, 1);
  2545. }
  2546. }
  2547. static int vulkan_transfer_data(AVHWFramesContext *hwfc, const AVFrame *vkf,
  2548. const AVFrame *swf, int from)
  2549. {
  2550. int err = 0;
  2551. VkResult ret;
  2552. AVVkFrame *f = (AVVkFrame *)vkf->data[0];
  2553. AVHWDeviceContext *dev_ctx = hwfc->device_ctx;
  2554. AVVulkanDeviceContext *hwctx = dev_ctx->hwctx;
  2555. VulkanDevicePriv *p = hwfc->device_ctx->internal->priv;
  2556. AVFrame tmp;
  2557. AVBufferRef *bufs[AV_NUM_DATA_POINTERS] = { 0 };
  2558. size_t buf_offsets[AV_NUM_DATA_POINTERS] = { 0 };
  2559. int p_w, p_h;
  2560. const int planes = av_pix_fmt_count_planes(swf->format);
  2561. int host_mapped[AV_NUM_DATA_POINTERS] = { 0 };
  2562. const int map_host = !!(p->extensions & EXT_EXTERNAL_HOST_MEMORY);
  2563. VK_LOAD_PFN(hwctx->inst, vkGetMemoryHostPointerPropertiesEXT);
  2564. if ((swf->format != AV_PIX_FMT_NONE && !av_vkfmt_from_pixfmt(swf->format))) {
  2565. av_log(hwfc, AV_LOG_ERROR, "Unsupported software frame pixel format!\n");
  2566. return AVERROR(EINVAL);
  2567. }
  2568. if (swf->width > hwfc->width || swf->height > hwfc->height)
  2569. return AVERROR(EINVAL);
  2570. /* For linear, host visiable images */
  2571. if (f->tiling == VK_IMAGE_TILING_LINEAR &&
  2572. f->flags & VK_MEMORY_PROPERTY_HOST_VISIBLE_BIT) {
  2573. AVFrame *map = av_frame_alloc();
  2574. if (!map)
  2575. return AVERROR(ENOMEM);
  2576. map->format = swf->format;
  2577. err = vulkan_map_frame_to_mem(hwfc, map, vkf, AV_HWFRAME_MAP_WRITE);
  2578. if (err)
  2579. return err;
  2580. err = av_frame_copy((AVFrame *)(from ? swf : map), from ? map : swf);
  2581. av_frame_free(&map);
  2582. return err;
  2583. }
  2584. /* Create buffers */
  2585. for (int i = 0; i < planes; i++) {
  2586. size_t req_size;
  2587. VkExternalMemoryBufferCreateInfo create_desc = {
  2588. .sType = VK_STRUCTURE_TYPE_EXTERNAL_MEMORY_BUFFER_CREATE_INFO,
  2589. .handleTypes = VK_EXTERNAL_MEMORY_HANDLE_TYPE_HOST_ALLOCATION_BIT_EXT,
  2590. };
  2591. VkImportMemoryHostPointerInfoEXT import_desc = {
  2592. .sType = VK_STRUCTURE_TYPE_IMPORT_MEMORY_HOST_POINTER_INFO_EXT,
  2593. .handleType = VK_EXTERNAL_MEMORY_HANDLE_TYPE_HOST_ALLOCATION_BIT_EXT,
  2594. };
  2595. VkMemoryHostPointerPropertiesEXT p_props = {
  2596. .sType = VK_STRUCTURE_TYPE_MEMORY_HOST_POINTER_PROPERTIES_EXT,
  2597. };
  2598. get_plane_wh(&p_w, &p_h, swf->format, swf->width, swf->height, i);
  2599. tmp.linesize[i] = FFABS(swf->linesize[i]);
  2600. /* Do not map images with a negative stride */
  2601. if (map_host && swf->linesize[i] > 0) {
  2602. size_t offs;
  2603. offs = (uintptr_t)swf->data[i] % p->hprops.minImportedHostPointerAlignment;
  2604. import_desc.pHostPointer = swf->data[i] - offs;
  2605. /* We have to compensate for the few extra bytes of padding we
  2606. * completely ignore at the start */
  2607. req_size = FFALIGN(offs + tmp.linesize[i] * p_h,
  2608. p->hprops.minImportedHostPointerAlignment);
  2609. ret = pfn_vkGetMemoryHostPointerPropertiesEXT(hwctx->act_dev,
  2610. import_desc.handleType,
  2611. import_desc.pHostPointer,
  2612. &p_props);
  2613. if (ret == VK_SUCCESS) {
  2614. host_mapped[i] = 1;
  2615. buf_offsets[i] = offs;
  2616. }
  2617. }
  2618. if (!host_mapped[i])
  2619. req_size = get_req_buffer_size(p, &tmp.linesize[i], p_h);
  2620. err = create_buf(dev_ctx, &bufs[i],
  2621. from ? VK_BUFFER_USAGE_TRANSFER_DST_BIT :
  2622. VK_BUFFER_USAGE_TRANSFER_SRC_BIT,
  2623. VK_MEMORY_PROPERTY_HOST_VISIBLE_BIT,
  2624. req_size, p_props.memoryTypeBits, host_mapped[i],
  2625. host_mapped[i] ? &create_desc : NULL,
  2626. host_mapped[i] ? &import_desc : NULL);
  2627. if (err)
  2628. goto end;
  2629. }
  2630. if (!from) {
  2631. /* Map, copy image to buffer, unmap */
  2632. if ((err = map_buffers(dev_ctx, bufs, tmp.data, planes, 0)))
  2633. goto end;
  2634. for (int i = 0; i < planes; i++) {
  2635. if (host_mapped[i])
  2636. continue;
  2637. get_plane_wh(&p_w, &p_h, swf->format, swf->width, swf->height, i);
  2638. av_image_copy_plane(tmp.data[i], tmp.linesize[i],
  2639. (const uint8_t *)swf->data[i], swf->linesize[i],
  2640. FFMIN(tmp.linesize[i], FFABS(swf->linesize[i])),
  2641. p_h);
  2642. }
  2643. if ((err = unmap_buffers(dev_ctx, bufs, planes, 1)))
  2644. goto end;
  2645. }
  2646. /* Copy buffers into/from image */
  2647. err = transfer_image_buf(hwfc, vkf, bufs, buf_offsets, tmp.linesize,
  2648. swf->width, swf->height, swf->format, from);
  2649. if (from) {
  2650. /* Map, copy image to buffer, unmap */
  2651. if ((err = map_buffers(dev_ctx, bufs, tmp.data, planes, 0)))
  2652. goto end;
  2653. for (int i = 0; i < planes; i++) {
  2654. if (host_mapped[i])
  2655. continue;
  2656. get_plane_wh(&p_w, &p_h, swf->format, swf->width, swf->height, i);
  2657. av_image_copy_plane(swf->data[i], swf->linesize[i],
  2658. (const uint8_t *)tmp.data[i], tmp.linesize[i],
  2659. FFMIN(tmp.linesize[i], FFABS(swf->linesize[i])),
  2660. p_h);
  2661. }
  2662. if ((err = unmap_buffers(dev_ctx, bufs, planes, 1)))
  2663. goto end;
  2664. }
  2665. end:
  2666. for (int i = 0; i < planes; i++)
  2667. av_buffer_unref(&bufs[i]);
  2668. return err;
  2669. }
  2670. static int vulkan_transfer_data_to(AVHWFramesContext *hwfc, AVFrame *dst,
  2671. const AVFrame *src)
  2672. {
  2673. av_unused VulkanDevicePriv *p = hwfc->device_ctx->internal->priv;
  2674. switch (src->format) {
  2675. #if CONFIG_CUDA
  2676. case AV_PIX_FMT_CUDA:
  2677. if ((p->extensions & EXT_EXTERNAL_FD_MEMORY) &&
  2678. (p->extensions & EXT_EXTERNAL_FD_SEM))
  2679. return vulkan_transfer_data_from_cuda(hwfc, dst, src);
  2680. #endif
  2681. default:
  2682. if (src->hw_frames_ctx)
  2683. return AVERROR(ENOSYS);
  2684. else
  2685. return vulkan_transfer_data(hwfc, dst, src, 0);
  2686. }
  2687. }
  2688. #if CONFIG_CUDA
  2689. static int vulkan_transfer_data_to_cuda(AVHWFramesContext *hwfc, AVFrame *dst,
  2690. const AVFrame *src)
  2691. {
  2692. int err;
  2693. VkResult ret;
  2694. CUcontext dummy;
  2695. AVVkFrame *dst_f;
  2696. AVVkFrameInternal *dst_int;
  2697. const int planes = av_pix_fmt_count_planes(hwfc->sw_format);
  2698. const AVPixFmtDescriptor *desc = av_pix_fmt_desc_get(hwfc->sw_format);
  2699. AVHWFramesContext *cuda_fc = (AVHWFramesContext*)dst->hw_frames_ctx->data;
  2700. AVHWDeviceContext *cuda_cu = cuda_fc->device_ctx;
  2701. AVCUDADeviceContext *cuda_dev = cuda_cu->hwctx;
  2702. AVCUDADeviceContextInternal *cu_internal = cuda_dev->internal;
  2703. CudaFunctions *cu = cu_internal->cuda_dl;
  2704. ret = CHECK_CU(cu->cuCtxPushCurrent(cuda_dev->cuda_ctx));
  2705. if (ret < 0)
  2706. return AVERROR_EXTERNAL;
  2707. dst_f = (AVVkFrame *)src->data[0];
  2708. err = vulkan_export_to_cuda(hwfc, dst->hw_frames_ctx, src);
  2709. if (err < 0) {
  2710. CHECK_CU(cu->cuCtxPopCurrent(&dummy));
  2711. return err;
  2712. }
  2713. dst_int = dst_f->internal;
  2714. for (int i = 0; i < planes; i++) {
  2715. CUDA_MEMCPY2D cpy = {
  2716. .dstMemoryType = CU_MEMORYTYPE_DEVICE,
  2717. .dstDevice = (CUdeviceptr)dst->data[i],
  2718. .dstPitch = dst->linesize[i],
  2719. .dstY = 0,
  2720. .srcMemoryType = CU_MEMORYTYPE_ARRAY,
  2721. .srcArray = dst_int->cu_array[i],
  2722. };
  2723. int w, h;
  2724. get_plane_wh(&w, &h, hwfc->sw_format, hwfc->width, hwfc->height, i);
  2725. cpy.WidthInBytes = w * desc->comp[i].step;
  2726. cpy.Height = h;
  2727. ret = CHECK_CU(cu->cuMemcpy2DAsync(&cpy, cuda_dev->stream));
  2728. if (ret < 0) {
  2729. err = AVERROR_EXTERNAL;
  2730. goto fail;
  2731. }
  2732. }
  2733. CHECK_CU(cu->cuCtxPopCurrent(&dummy));
  2734. av_log(hwfc, AV_LOG_VERBOSE, "Transfered Vulkan image to CUDA!\n");
  2735. return 0;
  2736. fail:
  2737. CHECK_CU(cu->cuCtxPopCurrent(&dummy));
  2738. vulkan_free_internal(dst_int);
  2739. dst_f->internal = NULL;
  2740. av_buffer_unref(&dst->buf[0]);
  2741. return err;
  2742. }
  2743. #endif
  2744. static int vulkan_transfer_data_from(AVHWFramesContext *hwfc, AVFrame *dst,
  2745. const AVFrame *src)
  2746. {
  2747. av_unused VulkanDevicePriv *p = hwfc->device_ctx->internal->priv;
  2748. switch (dst->format) {
  2749. #if CONFIG_CUDA
  2750. case AV_PIX_FMT_CUDA:
  2751. if ((p->extensions & EXT_EXTERNAL_FD_MEMORY) &&
  2752. (p->extensions & EXT_EXTERNAL_FD_SEM))
  2753. return vulkan_transfer_data_to_cuda(hwfc, dst, src);
  2754. #endif
  2755. default:
  2756. if (dst->hw_frames_ctx)
  2757. return AVERROR(ENOSYS);
  2758. else
  2759. return vulkan_transfer_data(hwfc, src, dst, 1);
  2760. }
  2761. }
  2762. static int vulkan_frames_derive_to(AVHWFramesContext *dst_fc,
  2763. AVHWFramesContext *src_fc, int flags)
  2764. {
  2765. return vulkan_frames_init(dst_fc);
  2766. }
  2767. AVVkFrame *av_vk_frame_alloc(void)
  2768. {
  2769. return av_mallocz(sizeof(AVVkFrame));
  2770. }
  2771. const HWContextType ff_hwcontext_type_vulkan = {
  2772. .type = AV_HWDEVICE_TYPE_VULKAN,
  2773. .name = "Vulkan",
  2774. .device_hwctx_size = sizeof(AVVulkanDeviceContext),
  2775. .device_priv_size = sizeof(VulkanDevicePriv),
  2776. .frames_hwctx_size = sizeof(AVVulkanFramesContext),
  2777. .frames_priv_size = sizeof(VulkanFramesPriv),
  2778. .device_init = &vulkan_device_init,
  2779. .device_create = &vulkan_device_create,
  2780. .device_derive = &vulkan_device_derive,
  2781. .frames_get_constraints = &vulkan_frames_get_constraints,
  2782. .frames_init = vulkan_frames_init,
  2783. .frames_get_buffer = vulkan_get_buffer,
  2784. .frames_uninit = vulkan_frames_uninit,
  2785. .transfer_get_formats = vulkan_transfer_get_formats,
  2786. .transfer_data_to = vulkan_transfer_data_to,
  2787. .transfer_data_from = vulkan_transfer_data_from,
  2788. .map_to = vulkan_map_to,
  2789. .map_from = vulkan_map_from,
  2790. .frames_derive_to = &vulkan_frames_derive_to,
  2791. .pix_fmts = (const enum AVPixelFormat []) {
  2792. AV_PIX_FMT_VULKAN,
  2793. AV_PIX_FMT_NONE
  2794. },
  2795. };