You can not select more than 25 topics Topics must start with a letter or number, can include dashes ('-') and can be up to 35 characters long.

3189 lines
111KB

  1. /*
  2. * This file is part of FFmpeg.
  3. *
  4. * FFmpeg is free software; you can redistribute it and/or
  5. * modify it under the terms of the GNU Lesser General Public
  6. * License as published by the Free Software Foundation; either
  7. * version 2.1 of the License, or (at your option) any later version.
  8. *
  9. * FFmpeg is distributed in the hope that it will be useful,
  10. * but WITHOUT ANY WARRANTY; without even the implied warranty of
  11. * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
  12. * Lesser General Public License for more details.
  13. *
  14. * You should have received a copy of the GNU Lesser General Public
  15. * License along with FFmpeg; if not, write to the Free Software
  16. * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
  17. */
  18. #include "config.h"
  19. #include "pixdesc.h"
  20. #include "avstring.h"
  21. #include "imgutils.h"
  22. #include "hwcontext.h"
  23. #include "hwcontext_internal.h"
  24. #include "hwcontext_vulkan.h"
  25. #if CONFIG_LIBDRM
  26. #include <unistd.h>
  27. #include <xf86drm.h>
  28. #include <drm_fourcc.h>
  29. #include "hwcontext_drm.h"
  30. #if CONFIG_VAAPI
  31. #include <va/va_drmcommon.h>
  32. #include "hwcontext_vaapi.h"
  33. #endif
  34. #endif
  35. #if CONFIG_CUDA
  36. #include "hwcontext_cuda_internal.h"
  37. #include "cuda_check.h"
  38. #define CHECK_CU(x) FF_CUDA_CHECK_DL(cuda_cu, cu, x)
  39. #endif
  40. typedef struct VulkanQueueCtx {
  41. VkFence fence;
  42. VkQueue queue;
  43. int was_synchronous;
  44. /* Buffer dependencies */
  45. AVBufferRef **buf_deps;
  46. int nb_buf_deps;
  47. int buf_deps_alloc_size;
  48. } VulkanQueueCtx;
  49. typedef struct VulkanExecCtx {
  50. VkCommandPool pool;
  51. VkCommandBuffer *bufs;
  52. VulkanQueueCtx *queues;
  53. int nb_queues;
  54. int cur_queue_idx;
  55. } VulkanExecCtx;
  56. typedef struct VulkanDevicePriv {
  57. /* Properties */
  58. VkPhysicalDeviceProperties props;
  59. VkPhysicalDeviceMemoryProperties mprops;
  60. /* Queues */
  61. uint32_t qfs[3];
  62. int num_qfs;
  63. /* Debug callback */
  64. VkDebugUtilsMessengerEXT debug_ctx;
  65. /* Extensions */
  66. uint64_t extensions;
  67. /* Settings */
  68. int use_linear_images;
  69. /* Nvidia */
  70. int dev_is_nvidia;
  71. } VulkanDevicePriv;
  72. typedef struct VulkanFramesPriv {
  73. /* Image conversions */
  74. VulkanExecCtx conv_ctx;
  75. /* Image transfers */
  76. VulkanExecCtx upload_ctx;
  77. VulkanExecCtx download_ctx;
  78. } VulkanFramesPriv;
  79. typedef struct AVVkFrameInternal {
  80. #if CONFIG_CUDA
  81. /* Importing external memory into cuda is really expensive so we keep the
  82. * memory imported all the time */
  83. AVBufferRef *cuda_fc_ref; /* Need to keep it around for uninit */
  84. CUexternalMemory ext_mem[AV_NUM_DATA_POINTERS];
  85. CUmipmappedArray cu_mma[AV_NUM_DATA_POINTERS];
  86. CUarray cu_array[AV_NUM_DATA_POINTERS];
  87. CUexternalSemaphore cu_sem[AV_NUM_DATA_POINTERS];
  88. #endif
  89. } AVVkFrameInternal;
  90. #define GET_QUEUE_COUNT(hwctx, graph, comp, tx) ( \
  91. graph ? hwctx->nb_graphics_queues : \
  92. comp ? (hwctx->nb_comp_queues ? \
  93. hwctx->nb_comp_queues : hwctx->nb_graphics_queues) : \
  94. tx ? (hwctx->nb_tx_queues ? hwctx->nb_tx_queues : \
  95. (hwctx->nb_comp_queues ? \
  96. hwctx->nb_comp_queues : hwctx->nb_graphics_queues)) : \
  97. 0 \
  98. )
  99. #define VK_LOAD_PFN(inst, name) PFN_##name pfn_##name = (PFN_##name) \
  100. vkGetInstanceProcAddr(inst, #name)
  101. #define DEFAULT_USAGE_FLAGS (VK_IMAGE_USAGE_SAMPLED_BIT | \
  102. VK_IMAGE_USAGE_STORAGE_BIT | \
  103. VK_IMAGE_USAGE_TRANSFER_SRC_BIT | \
  104. VK_IMAGE_USAGE_TRANSFER_DST_BIT)
  105. #define ADD_VAL_TO_LIST(list, count, val) \
  106. do { \
  107. list = av_realloc_array(list, sizeof(*list), ++count); \
  108. if (!list) { \
  109. err = AVERROR(ENOMEM); \
  110. goto fail; \
  111. } \
  112. list[count - 1] = av_strdup(val); \
  113. if (!list[count - 1]) { \
  114. err = AVERROR(ENOMEM); \
  115. goto fail; \
  116. } \
  117. } while(0)
  118. static const struct {
  119. enum AVPixelFormat pixfmt;
  120. const VkFormat vkfmts[3];
  121. } vk_pixfmt_map[] = {
  122. { AV_PIX_FMT_GRAY8, { VK_FORMAT_R8_UNORM } },
  123. { AV_PIX_FMT_GRAY16, { VK_FORMAT_R16_UNORM } },
  124. { AV_PIX_FMT_GRAYF32, { VK_FORMAT_R32_SFLOAT } },
  125. { AV_PIX_FMT_NV12, { VK_FORMAT_R8_UNORM, VK_FORMAT_R8G8_UNORM } },
  126. { AV_PIX_FMT_P010, { VK_FORMAT_R16_UNORM, VK_FORMAT_R16G16_UNORM } },
  127. { AV_PIX_FMT_P016, { VK_FORMAT_R16_UNORM, VK_FORMAT_R16G16_UNORM } },
  128. { AV_PIX_FMT_YUV420P, { VK_FORMAT_R8_UNORM, VK_FORMAT_R8_UNORM, VK_FORMAT_R8_UNORM } },
  129. { AV_PIX_FMT_YUV422P, { VK_FORMAT_R8_UNORM, VK_FORMAT_R8_UNORM, VK_FORMAT_R8_UNORM } },
  130. { AV_PIX_FMT_YUV444P, { VK_FORMAT_R8_UNORM, VK_FORMAT_R8_UNORM, VK_FORMAT_R8_UNORM } },
  131. { AV_PIX_FMT_YUV420P16, { VK_FORMAT_R16_UNORM, VK_FORMAT_R16_UNORM, VK_FORMAT_R16_UNORM } },
  132. { AV_PIX_FMT_YUV422P16, { VK_FORMAT_R16_UNORM, VK_FORMAT_R16_UNORM, VK_FORMAT_R16_UNORM } },
  133. { AV_PIX_FMT_YUV444P16, { VK_FORMAT_R16_UNORM, VK_FORMAT_R16_UNORM, VK_FORMAT_R16_UNORM } },
  134. { AV_PIX_FMT_ABGR, { VK_FORMAT_A8B8G8R8_UNORM_PACK32 } },
  135. { AV_PIX_FMT_BGRA, { VK_FORMAT_B8G8R8A8_UNORM } },
  136. { AV_PIX_FMT_RGBA, { VK_FORMAT_R8G8B8A8_UNORM } },
  137. { AV_PIX_FMT_RGB24, { VK_FORMAT_R8G8B8_UNORM } },
  138. { AV_PIX_FMT_BGR24, { VK_FORMAT_B8G8R8_UNORM } },
  139. { AV_PIX_FMT_RGB48, { VK_FORMAT_R16G16B16_UNORM } },
  140. { AV_PIX_FMT_RGBA64, { VK_FORMAT_R16G16B16A16_UNORM } },
  141. { AV_PIX_FMT_RGB565, { VK_FORMAT_R5G6B5_UNORM_PACK16 } },
  142. { AV_PIX_FMT_BGR565, { VK_FORMAT_B5G6R5_UNORM_PACK16 } },
  143. { AV_PIX_FMT_BGR0, { VK_FORMAT_B8G8R8A8_UNORM } },
  144. { AV_PIX_FMT_0BGR, { VK_FORMAT_A8B8G8R8_UNORM_PACK32 } },
  145. { AV_PIX_FMT_RGB0, { VK_FORMAT_R8G8B8A8_UNORM } },
  146. { AV_PIX_FMT_GBRPF32, { VK_FORMAT_R32_SFLOAT, VK_FORMAT_R32_SFLOAT, VK_FORMAT_R32_SFLOAT } },
  147. };
  148. const VkFormat *av_vkfmt_from_pixfmt(enum AVPixelFormat p)
  149. {
  150. for (enum AVPixelFormat i = 0; i < FF_ARRAY_ELEMS(vk_pixfmt_map); i++)
  151. if (vk_pixfmt_map[i].pixfmt == p)
  152. return vk_pixfmt_map[i].vkfmts;
  153. return NULL;
  154. }
  155. static int pixfmt_is_supported(AVVulkanDeviceContext *hwctx, enum AVPixelFormat p,
  156. int linear)
  157. {
  158. const VkFormat *fmt = av_vkfmt_from_pixfmt(p);
  159. int planes = av_pix_fmt_count_planes(p);
  160. if (!fmt)
  161. return 0;
  162. for (int i = 0; i < planes; i++) {
  163. VkFormatFeatureFlags flags;
  164. VkFormatProperties2 prop = {
  165. .sType = VK_STRUCTURE_TYPE_FORMAT_PROPERTIES_2,
  166. };
  167. vkGetPhysicalDeviceFormatProperties2(hwctx->phys_dev, fmt[i], &prop);
  168. flags = linear ? prop.formatProperties.linearTilingFeatures :
  169. prop.formatProperties.optimalTilingFeatures;
  170. if (!(flags & DEFAULT_USAGE_FLAGS))
  171. return 0;
  172. }
  173. return 1;
  174. }
  175. enum VulkanExtensions {
  176. EXT_EXTERNAL_DMABUF_MEMORY = 1ULL << 0, /* VK_EXT_external_memory_dma_buf */
  177. EXT_DRM_MODIFIER_FLAGS = 1ULL << 1, /* VK_EXT_image_drm_format_modifier */
  178. EXT_EXTERNAL_FD_MEMORY = 1ULL << 2, /* VK_KHR_external_memory_fd */
  179. EXT_EXTERNAL_FD_SEM = 1ULL << 3, /* VK_KHR_external_semaphore_fd */
  180. EXT_NO_FLAG = 1ULL << 63,
  181. };
  182. typedef struct VulkanOptExtension {
  183. const char *name;
  184. uint64_t flag;
  185. } VulkanOptExtension;
  186. static const VulkanOptExtension optional_instance_exts[] = {
  187. /* For future use */
  188. };
  189. static const VulkanOptExtension optional_device_exts[] = {
  190. { VK_KHR_EXTERNAL_MEMORY_FD_EXTENSION_NAME, EXT_EXTERNAL_FD_MEMORY, },
  191. { VK_EXT_EXTERNAL_MEMORY_DMA_BUF_EXTENSION_NAME, EXT_EXTERNAL_DMABUF_MEMORY, },
  192. { VK_EXT_IMAGE_DRM_FORMAT_MODIFIER_EXTENSION_NAME, EXT_DRM_MODIFIER_FLAGS, },
  193. { VK_KHR_EXTERNAL_SEMAPHORE_FD_EXTENSION_NAME, EXT_EXTERNAL_FD_SEM, },
  194. };
  195. /* Converts return values to strings */
  196. static const char *vk_ret2str(VkResult res)
  197. {
  198. #define CASE(VAL) case VAL: return #VAL
  199. switch (res) {
  200. CASE(VK_SUCCESS);
  201. CASE(VK_NOT_READY);
  202. CASE(VK_TIMEOUT);
  203. CASE(VK_EVENT_SET);
  204. CASE(VK_EVENT_RESET);
  205. CASE(VK_INCOMPLETE);
  206. CASE(VK_ERROR_OUT_OF_HOST_MEMORY);
  207. CASE(VK_ERROR_OUT_OF_DEVICE_MEMORY);
  208. CASE(VK_ERROR_INITIALIZATION_FAILED);
  209. CASE(VK_ERROR_DEVICE_LOST);
  210. CASE(VK_ERROR_MEMORY_MAP_FAILED);
  211. CASE(VK_ERROR_LAYER_NOT_PRESENT);
  212. CASE(VK_ERROR_EXTENSION_NOT_PRESENT);
  213. CASE(VK_ERROR_FEATURE_NOT_PRESENT);
  214. CASE(VK_ERROR_INCOMPATIBLE_DRIVER);
  215. CASE(VK_ERROR_TOO_MANY_OBJECTS);
  216. CASE(VK_ERROR_FORMAT_NOT_SUPPORTED);
  217. CASE(VK_ERROR_FRAGMENTED_POOL);
  218. CASE(VK_ERROR_SURFACE_LOST_KHR);
  219. CASE(VK_ERROR_NATIVE_WINDOW_IN_USE_KHR);
  220. CASE(VK_SUBOPTIMAL_KHR);
  221. CASE(VK_ERROR_OUT_OF_DATE_KHR);
  222. CASE(VK_ERROR_INCOMPATIBLE_DISPLAY_KHR);
  223. CASE(VK_ERROR_VALIDATION_FAILED_EXT);
  224. CASE(VK_ERROR_INVALID_SHADER_NV);
  225. CASE(VK_ERROR_OUT_OF_POOL_MEMORY);
  226. CASE(VK_ERROR_INVALID_EXTERNAL_HANDLE);
  227. CASE(VK_ERROR_NOT_PERMITTED_EXT);
  228. CASE(VK_ERROR_INVALID_DRM_FORMAT_MODIFIER_PLANE_LAYOUT_EXT);
  229. CASE(VK_ERROR_INVALID_DEVICE_ADDRESS_EXT);
  230. CASE(VK_ERROR_FULL_SCREEN_EXCLUSIVE_MODE_LOST_EXT);
  231. default: return "Unknown error";
  232. }
  233. #undef CASE
  234. }
  235. static VkBool32 vk_dbg_callback(VkDebugUtilsMessageSeverityFlagBitsEXT severity,
  236. VkDebugUtilsMessageTypeFlagsEXT messageType,
  237. const VkDebugUtilsMessengerCallbackDataEXT *data,
  238. void *priv)
  239. {
  240. int l;
  241. AVHWDeviceContext *ctx = priv;
  242. switch (severity) {
  243. case VK_DEBUG_UTILS_MESSAGE_SEVERITY_VERBOSE_BIT_EXT: l = AV_LOG_VERBOSE; break;
  244. case VK_DEBUG_UTILS_MESSAGE_SEVERITY_INFO_BIT_EXT: l = AV_LOG_INFO; break;
  245. case VK_DEBUG_UTILS_MESSAGE_SEVERITY_WARNING_BIT_EXT: l = AV_LOG_WARNING; break;
  246. case VK_DEBUG_UTILS_MESSAGE_SEVERITY_ERROR_BIT_EXT: l = AV_LOG_ERROR; break;
  247. default: l = AV_LOG_DEBUG; break;
  248. }
  249. av_log(ctx, l, "%s\n", data->pMessage);
  250. for (int i = 0; i < data->cmdBufLabelCount; i++)
  251. av_log(ctx, l, "\t%i: %s\n", i, data->pCmdBufLabels[i].pLabelName);
  252. return 0;
  253. }
  254. static int check_extensions(AVHWDeviceContext *ctx, int dev, AVDictionary *opts,
  255. const char * const **dst, uint32_t *num, int debug)
  256. {
  257. const char *tstr;
  258. const char **extension_names = NULL;
  259. VulkanDevicePriv *p = ctx->internal->priv;
  260. AVVulkanDeviceContext *hwctx = ctx->hwctx;
  261. int err = 0, found, extensions_found = 0;
  262. const char *mod;
  263. int optional_exts_num;
  264. uint32_t sup_ext_count;
  265. char *user_exts_str = NULL;
  266. AVDictionaryEntry *user_exts;
  267. VkExtensionProperties *sup_ext;
  268. const VulkanOptExtension *optional_exts;
  269. if (!dev) {
  270. mod = "instance";
  271. optional_exts = optional_instance_exts;
  272. optional_exts_num = FF_ARRAY_ELEMS(optional_instance_exts);
  273. user_exts = av_dict_get(opts, "instance_extensions", NULL, 0);
  274. if (user_exts) {
  275. user_exts_str = av_strdup(user_exts->value);
  276. if (!user_exts_str) {
  277. err = AVERROR(ENOMEM);
  278. goto fail;
  279. }
  280. }
  281. vkEnumerateInstanceExtensionProperties(NULL, &sup_ext_count, NULL);
  282. sup_ext = av_malloc_array(sup_ext_count, sizeof(VkExtensionProperties));
  283. if (!sup_ext)
  284. return AVERROR(ENOMEM);
  285. vkEnumerateInstanceExtensionProperties(NULL, &sup_ext_count, sup_ext);
  286. } else {
  287. mod = "device";
  288. optional_exts = optional_device_exts;
  289. optional_exts_num = FF_ARRAY_ELEMS(optional_device_exts);
  290. user_exts = av_dict_get(opts, "device_extensions", NULL, 0);
  291. if (user_exts) {
  292. user_exts_str = av_strdup(user_exts->value);
  293. if (!user_exts_str) {
  294. err = AVERROR(ENOMEM);
  295. goto fail;
  296. }
  297. }
  298. vkEnumerateDeviceExtensionProperties(hwctx->phys_dev, NULL,
  299. &sup_ext_count, NULL);
  300. sup_ext = av_malloc_array(sup_ext_count, sizeof(VkExtensionProperties));
  301. if (!sup_ext)
  302. return AVERROR(ENOMEM);
  303. vkEnumerateDeviceExtensionProperties(hwctx->phys_dev, NULL,
  304. &sup_ext_count, sup_ext);
  305. }
  306. for (int i = 0; i < optional_exts_num; i++) {
  307. tstr = optional_exts[i].name;
  308. found = 0;
  309. for (int j = 0; j < sup_ext_count; j++) {
  310. if (!strcmp(tstr, sup_ext[j].extensionName)) {
  311. found = 1;
  312. break;
  313. }
  314. }
  315. if (!found)
  316. continue;
  317. av_log(ctx, AV_LOG_VERBOSE, "Using %s extension \"%s\"\n", mod, tstr);
  318. p->extensions |= optional_exts[i].flag;
  319. ADD_VAL_TO_LIST(extension_names, extensions_found, tstr);
  320. }
  321. if (debug && !dev) {
  322. tstr = VK_EXT_DEBUG_UTILS_EXTENSION_NAME;
  323. found = 0;
  324. for (int j = 0; j < sup_ext_count; j++) {
  325. if (!strcmp(tstr, sup_ext[j].extensionName)) {
  326. found = 1;
  327. break;
  328. }
  329. }
  330. if (found) {
  331. av_log(ctx, AV_LOG_VERBOSE, "Using %s extension \"%s\"\n", mod, tstr);
  332. ADD_VAL_TO_LIST(extension_names, extensions_found, tstr);
  333. } else {
  334. av_log(ctx, AV_LOG_ERROR, "Debug extension \"%s\" not found!\n",
  335. tstr);
  336. err = AVERROR(EINVAL);
  337. goto fail;
  338. }
  339. }
  340. if (user_exts_str) {
  341. char *save, *token = av_strtok(user_exts_str, "+", &save);
  342. while (token) {
  343. found = 0;
  344. for (int j = 0; j < sup_ext_count; j++) {
  345. if (!strcmp(token, sup_ext[j].extensionName)) {
  346. found = 1;
  347. break;
  348. }
  349. }
  350. if (found) {
  351. av_log(ctx, AV_LOG_VERBOSE, "Using %s extension \"%s\"\n", mod, token);
  352. ADD_VAL_TO_LIST(extension_names, extensions_found, token);
  353. } else {
  354. av_log(ctx, AV_LOG_WARNING, "%s extension \"%s\" not found, excluding.\n",
  355. mod, token);
  356. }
  357. token = av_strtok(NULL, "+", &save);
  358. }
  359. }
  360. *dst = extension_names;
  361. *num = extensions_found;
  362. av_free(user_exts_str);
  363. av_free(sup_ext);
  364. return 0;
  365. fail:
  366. if (extension_names)
  367. for (int i = 0; i < extensions_found; i++)
  368. av_free((void *)extension_names[i]);
  369. av_free(extension_names);
  370. av_free(user_exts_str);
  371. av_free(sup_ext);
  372. return err;
  373. }
  374. /* Creates a VkInstance */
  375. static int create_instance(AVHWDeviceContext *ctx, AVDictionary *opts)
  376. {
  377. int err = 0;
  378. VkResult ret;
  379. VulkanDevicePriv *p = ctx->internal->priv;
  380. AVVulkanDeviceContext *hwctx = ctx->hwctx;
  381. AVDictionaryEntry *debug_opt = av_dict_get(opts, "debug", NULL, 0);
  382. const int debug_mode = debug_opt && strtol(debug_opt->value, NULL, 10);
  383. VkApplicationInfo application_info = {
  384. .sType = VK_STRUCTURE_TYPE_APPLICATION_INFO,
  385. .pEngineName = "libavutil",
  386. .apiVersion = VK_API_VERSION_1_1,
  387. .engineVersion = VK_MAKE_VERSION(LIBAVUTIL_VERSION_MAJOR,
  388. LIBAVUTIL_VERSION_MINOR,
  389. LIBAVUTIL_VERSION_MICRO),
  390. };
  391. VkInstanceCreateInfo inst_props = {
  392. .sType = VK_STRUCTURE_TYPE_INSTANCE_CREATE_INFO,
  393. .pApplicationInfo = &application_info,
  394. };
  395. /* Check for present/missing extensions */
  396. err = check_extensions(ctx, 0, opts, &inst_props.ppEnabledExtensionNames,
  397. &inst_props.enabledExtensionCount, debug_mode);
  398. if (err < 0)
  399. return err;
  400. if (debug_mode) {
  401. static const char *layers[] = { "VK_LAYER_KHRONOS_validation" };
  402. inst_props.ppEnabledLayerNames = layers;
  403. inst_props.enabledLayerCount = FF_ARRAY_ELEMS(layers);
  404. }
  405. /* Try to create the instance */
  406. ret = vkCreateInstance(&inst_props, hwctx->alloc, &hwctx->inst);
  407. /* Check for errors */
  408. if (ret != VK_SUCCESS) {
  409. av_log(ctx, AV_LOG_ERROR, "Instance creation failure: %s\n",
  410. vk_ret2str(ret));
  411. for (int i = 0; i < inst_props.enabledExtensionCount; i++)
  412. av_free((void *)inst_props.ppEnabledExtensionNames[i]);
  413. av_free((void *)inst_props.ppEnabledExtensionNames);
  414. return AVERROR_EXTERNAL;
  415. }
  416. if (debug_mode) {
  417. VkDebugUtilsMessengerCreateInfoEXT dbg = {
  418. .sType = VK_STRUCTURE_TYPE_DEBUG_UTILS_MESSENGER_CREATE_INFO_EXT,
  419. .messageSeverity = VK_DEBUG_UTILS_MESSAGE_SEVERITY_VERBOSE_BIT_EXT |
  420. VK_DEBUG_UTILS_MESSAGE_SEVERITY_INFO_BIT_EXT |
  421. VK_DEBUG_UTILS_MESSAGE_SEVERITY_WARNING_BIT_EXT |
  422. VK_DEBUG_UTILS_MESSAGE_SEVERITY_ERROR_BIT_EXT,
  423. .messageType = VK_DEBUG_UTILS_MESSAGE_TYPE_GENERAL_BIT_EXT |
  424. VK_DEBUG_UTILS_MESSAGE_TYPE_VALIDATION_BIT_EXT |
  425. VK_DEBUG_UTILS_MESSAGE_TYPE_PERFORMANCE_BIT_EXT,
  426. .pfnUserCallback = vk_dbg_callback,
  427. .pUserData = ctx,
  428. };
  429. VK_LOAD_PFN(hwctx->inst, vkCreateDebugUtilsMessengerEXT);
  430. pfn_vkCreateDebugUtilsMessengerEXT(hwctx->inst, &dbg,
  431. hwctx->alloc, &p->debug_ctx);
  432. }
  433. hwctx->enabled_inst_extensions = inst_props.ppEnabledExtensionNames;
  434. hwctx->nb_enabled_inst_extensions = inst_props.enabledExtensionCount;
  435. return 0;
  436. }
  437. typedef struct VulkanDeviceSelection {
  438. uint8_t uuid[VK_UUID_SIZE]; /* Will use this first unless !has_uuid */
  439. int has_uuid;
  440. const char *name; /* Will use this second unless NULL */
  441. uint32_t pci_device; /* Will use this third unless 0x0 */
  442. uint32_t vendor_id; /* Last resort to find something deterministic */
  443. int index; /* Finally fall back to index */
  444. } VulkanDeviceSelection;
  445. static const char *vk_dev_type(enum VkPhysicalDeviceType type)
  446. {
  447. switch (type) {
  448. case VK_PHYSICAL_DEVICE_TYPE_INTEGRATED_GPU: return "integrated";
  449. case VK_PHYSICAL_DEVICE_TYPE_DISCRETE_GPU: return "discrete";
  450. case VK_PHYSICAL_DEVICE_TYPE_VIRTUAL_GPU: return "virtual";
  451. case VK_PHYSICAL_DEVICE_TYPE_CPU: return "software";
  452. default: return "unknown";
  453. }
  454. }
  455. /* Finds a device */
  456. static int find_device(AVHWDeviceContext *ctx, VulkanDeviceSelection *select)
  457. {
  458. int err = 0, choice = -1;
  459. uint32_t num;
  460. VkResult ret;
  461. VkPhysicalDevice *devices = NULL;
  462. VkPhysicalDeviceIDProperties *idp = NULL;
  463. VkPhysicalDeviceProperties2 *prop = NULL;
  464. AVVulkanDeviceContext *hwctx = ctx->hwctx;
  465. ret = vkEnumeratePhysicalDevices(hwctx->inst, &num, NULL);
  466. if (ret != VK_SUCCESS || !num) {
  467. av_log(ctx, AV_LOG_ERROR, "No devices found: %s!\n", vk_ret2str(ret));
  468. return AVERROR(ENODEV);
  469. }
  470. devices = av_malloc_array(num, sizeof(VkPhysicalDevice));
  471. if (!devices)
  472. return AVERROR(ENOMEM);
  473. ret = vkEnumeratePhysicalDevices(hwctx->inst, &num, devices);
  474. if (ret != VK_SUCCESS) {
  475. av_log(ctx, AV_LOG_ERROR, "Failed enumerating devices: %s\n",
  476. vk_ret2str(ret));
  477. err = AVERROR(ENODEV);
  478. goto end;
  479. }
  480. prop = av_mallocz_array(num, sizeof(*prop));
  481. if (!prop) {
  482. err = AVERROR(ENOMEM);
  483. goto end;
  484. }
  485. idp = av_mallocz_array(num, sizeof(*idp));
  486. if (!idp) {
  487. err = AVERROR(ENOMEM);
  488. goto end;
  489. }
  490. av_log(ctx, AV_LOG_VERBOSE, "GPU listing:\n");
  491. for (int i = 0; i < num; i++) {
  492. idp[i].sType = VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_ID_PROPERTIES;
  493. prop[i].sType = VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_PROPERTIES_2;
  494. prop[i].pNext = &idp[i];
  495. vkGetPhysicalDeviceProperties2(devices[i], &prop[i]);
  496. av_log(ctx, AV_LOG_VERBOSE, " %d: %s (%s) (0x%x)\n", i,
  497. prop[i].properties.deviceName,
  498. vk_dev_type(prop[i].properties.deviceType),
  499. prop[i].properties.deviceID);
  500. }
  501. if (select->has_uuid) {
  502. for (int i = 0; i < num; i++) {
  503. if (!strncmp(idp[i].deviceUUID, select->uuid, VK_UUID_SIZE)) {
  504. choice = i;
  505. goto end;
  506. }
  507. }
  508. av_log(ctx, AV_LOG_ERROR, "Unable to find device by given UUID!\n");
  509. err = AVERROR(ENODEV);
  510. goto end;
  511. } else if (select->name) {
  512. av_log(ctx, AV_LOG_VERBOSE, "Requested device: %s\n", select->name);
  513. for (int i = 0; i < num; i++) {
  514. if (strstr(prop[i].properties.deviceName, select->name)) {
  515. choice = i;
  516. goto end;
  517. }
  518. }
  519. av_log(ctx, AV_LOG_ERROR, "Unable to find device \"%s\"!\n",
  520. select->name);
  521. err = AVERROR(ENODEV);
  522. goto end;
  523. } else if (select->pci_device) {
  524. av_log(ctx, AV_LOG_VERBOSE, "Requested device: 0x%x\n", select->pci_device);
  525. for (int i = 0; i < num; i++) {
  526. if (select->pci_device == prop[i].properties.deviceID) {
  527. choice = i;
  528. goto end;
  529. }
  530. }
  531. av_log(ctx, AV_LOG_ERROR, "Unable to find device with PCI ID 0x%x!\n",
  532. select->pci_device);
  533. err = AVERROR(EINVAL);
  534. goto end;
  535. } else if (select->vendor_id) {
  536. av_log(ctx, AV_LOG_VERBOSE, "Requested vendor: 0x%x\n", select->vendor_id);
  537. for (int i = 0; i < num; i++) {
  538. if (select->vendor_id == prop[i].properties.vendorID) {
  539. choice = i;
  540. goto end;
  541. }
  542. }
  543. av_log(ctx, AV_LOG_ERROR, "Unable to find device with Vendor ID 0x%x!\n",
  544. select->vendor_id);
  545. err = AVERROR(ENODEV);
  546. goto end;
  547. } else {
  548. if (select->index < num) {
  549. choice = select->index;
  550. goto end;
  551. }
  552. av_log(ctx, AV_LOG_ERROR, "Unable to find device with index %i!\n",
  553. select->index);
  554. err = AVERROR(ENODEV);
  555. goto end;
  556. }
  557. end:
  558. if (choice > -1)
  559. hwctx->phys_dev = devices[choice];
  560. av_free(devices);
  561. av_free(prop);
  562. av_free(idp);
  563. return err;
  564. }
  565. static int search_queue_families(AVHWDeviceContext *ctx, VkDeviceCreateInfo *cd)
  566. {
  567. uint32_t num;
  568. float *weights;
  569. VkQueueFamilyProperties *qs = NULL;
  570. AVVulkanDeviceContext *hwctx = ctx->hwctx;
  571. int graph_index = -1, comp_index = -1, tx_index = -1;
  572. VkDeviceQueueCreateInfo *pc = (VkDeviceQueueCreateInfo *)cd->pQueueCreateInfos;
  573. /* First get the number of queue families */
  574. vkGetPhysicalDeviceQueueFamilyProperties(hwctx->phys_dev, &num, NULL);
  575. if (!num) {
  576. av_log(ctx, AV_LOG_ERROR, "Failed to get queues!\n");
  577. return AVERROR_EXTERNAL;
  578. }
  579. /* Then allocate memory */
  580. qs = av_malloc_array(num, sizeof(VkQueueFamilyProperties));
  581. if (!qs)
  582. return AVERROR(ENOMEM);
  583. /* Finally retrieve the queue families */
  584. vkGetPhysicalDeviceQueueFamilyProperties(hwctx->phys_dev, &num, qs);
  585. #define SEARCH_FLAGS(expr, out) \
  586. for (int i = 0; i < num; i++) { \
  587. const VkQueueFlagBits flags = qs[i].queueFlags; \
  588. if (expr) { \
  589. out = i; \
  590. break; \
  591. } \
  592. }
  593. SEARCH_FLAGS(flags & VK_QUEUE_GRAPHICS_BIT, graph_index)
  594. SEARCH_FLAGS((flags & VK_QUEUE_COMPUTE_BIT) && (i != graph_index),
  595. comp_index)
  596. SEARCH_FLAGS((flags & VK_QUEUE_TRANSFER_BIT) && (i != graph_index) &&
  597. (i != comp_index), tx_index)
  598. #undef SEARCH_FLAGS
  599. #define ADD_QUEUE(fidx, graph, comp, tx) \
  600. av_log(ctx, AV_LOG_VERBOSE, "Using queue family %i (total queues: %i) for %s%s%s\n", \
  601. fidx, qs[fidx].queueCount, graph ? "graphics " : "", \
  602. comp ? "compute " : "", tx ? "transfers " : ""); \
  603. av_log(ctx, AV_LOG_VERBOSE, " QF %i flags: %s%s%s%s\n", fidx, \
  604. ((qs[fidx].queueFlags) & VK_QUEUE_GRAPHICS_BIT) ? "(graphics) " : "", \
  605. ((qs[fidx].queueFlags) & VK_QUEUE_COMPUTE_BIT) ? "(compute) " : "", \
  606. ((qs[fidx].queueFlags) & VK_QUEUE_TRANSFER_BIT) ? "(transfers) " : "", \
  607. ((qs[fidx].queueFlags) & VK_QUEUE_SPARSE_BINDING_BIT) ? "(sparse) " : ""); \
  608. pc[cd->queueCreateInfoCount].queueFamilyIndex = fidx; \
  609. pc[cd->queueCreateInfoCount].queueCount = qs[fidx].queueCount; \
  610. weights = av_malloc(qs[fidx].queueCount * sizeof(float)); \
  611. pc[cd->queueCreateInfoCount].pQueuePriorities = weights; \
  612. if (!weights) \
  613. goto fail; \
  614. for (int i = 0; i < qs[fidx].queueCount; i++) \
  615. weights[i] = 1.0f; \
  616. cd->queueCreateInfoCount++;
  617. ADD_QUEUE(graph_index, 1, comp_index < 0, tx_index < 0 && comp_index < 0)
  618. hwctx->queue_family_index = graph_index;
  619. hwctx->queue_family_comp_index = graph_index;
  620. hwctx->queue_family_tx_index = graph_index;
  621. hwctx->nb_graphics_queues = qs[graph_index].queueCount;
  622. if (comp_index != -1) {
  623. ADD_QUEUE(comp_index, 0, 1, tx_index < 0)
  624. hwctx->queue_family_tx_index = comp_index;
  625. hwctx->queue_family_comp_index = comp_index;
  626. hwctx->nb_comp_queues = qs[comp_index].queueCount;
  627. }
  628. if (tx_index != -1) {
  629. ADD_QUEUE(tx_index, 0, 0, 1)
  630. hwctx->queue_family_tx_index = tx_index;
  631. hwctx->nb_tx_queues = qs[tx_index].queueCount;
  632. }
  633. #undef ADD_QUEUE
  634. av_free(qs);
  635. return 0;
  636. fail:
  637. av_freep(&pc[0].pQueuePriorities);
  638. av_freep(&pc[1].pQueuePriorities);
  639. av_freep(&pc[2].pQueuePriorities);
  640. av_free(qs);
  641. return AVERROR(ENOMEM);
  642. }
  643. static int create_exec_ctx(AVHWFramesContext *hwfc, VulkanExecCtx *cmd,
  644. int queue_family_index, int num_queues)
  645. {
  646. VkResult ret;
  647. AVVulkanDeviceContext *hwctx = hwfc->device_ctx->hwctx;
  648. VkCommandPoolCreateInfo cqueue_create = {
  649. .sType = VK_STRUCTURE_TYPE_COMMAND_POOL_CREATE_INFO,
  650. .flags = VK_COMMAND_POOL_CREATE_RESET_COMMAND_BUFFER_BIT,
  651. .queueFamilyIndex = queue_family_index,
  652. };
  653. VkCommandBufferAllocateInfo cbuf_create = {
  654. .sType = VK_STRUCTURE_TYPE_COMMAND_BUFFER_ALLOCATE_INFO,
  655. .level = VK_COMMAND_BUFFER_LEVEL_PRIMARY,
  656. .commandBufferCount = num_queues,
  657. };
  658. cmd->nb_queues = num_queues;
  659. cmd->queues = av_mallocz(num_queues * sizeof(*cmd->queues));
  660. if (!cmd->queues)
  661. return AVERROR(ENOMEM);
  662. cmd->bufs = av_mallocz(num_queues * sizeof(*cmd->bufs));
  663. if (!cmd->bufs)
  664. return AVERROR(ENOMEM);
  665. /* Create command pool */
  666. ret = vkCreateCommandPool(hwctx->act_dev, &cqueue_create,
  667. hwctx->alloc, &cmd->pool);
  668. if (ret != VK_SUCCESS) {
  669. av_log(hwfc, AV_LOG_ERROR, "Command pool creation failure: %s\n",
  670. vk_ret2str(ret));
  671. return AVERROR_EXTERNAL;
  672. }
  673. cbuf_create.commandPool = cmd->pool;
  674. /* Allocate command buffer */
  675. ret = vkAllocateCommandBuffers(hwctx->act_dev, &cbuf_create, cmd->bufs);
  676. if (ret != VK_SUCCESS) {
  677. av_log(hwfc, AV_LOG_ERROR, "Command buffer alloc failure: %s\n",
  678. vk_ret2str(ret));
  679. return AVERROR_EXTERNAL;
  680. }
  681. for (int i = 0; i < num_queues; i++) {
  682. VulkanQueueCtx *q = &cmd->queues[i];
  683. vkGetDeviceQueue(hwctx->act_dev, queue_family_index, i, &q->queue);
  684. q->was_synchronous = 1;
  685. }
  686. return 0;
  687. }
  688. static void free_exec_ctx(AVHWFramesContext *hwfc, VulkanExecCtx *cmd)
  689. {
  690. AVVulkanDeviceContext *hwctx = hwfc->device_ctx->hwctx;
  691. /* Make sure all queues have finished executing */
  692. for (int i = 0; i < cmd->nb_queues; i++) {
  693. VulkanQueueCtx *q = &cmd->queues[i];
  694. if (q->fence && !q->was_synchronous) {
  695. vkWaitForFences(hwctx->act_dev, 1, &q->fence, VK_TRUE, UINT64_MAX);
  696. vkResetFences(hwctx->act_dev, 1, &q->fence);
  697. }
  698. /* Free the fence */
  699. if (q->fence)
  700. vkDestroyFence(hwctx->act_dev, q->fence, hwctx->alloc);
  701. /* Free buffer dependencies */
  702. for (int j = 0; j < q->nb_buf_deps; j++)
  703. av_buffer_unref(&q->buf_deps[j]);
  704. av_free(q->buf_deps);
  705. }
  706. if (cmd->bufs)
  707. vkFreeCommandBuffers(hwctx->act_dev, cmd->pool, cmd->nb_queues, cmd->bufs);
  708. if (cmd->pool)
  709. vkDestroyCommandPool(hwctx->act_dev, cmd->pool, hwctx->alloc);
  710. av_freep(&cmd->bufs);
  711. av_freep(&cmd->queues);
  712. }
  713. static VkCommandBuffer get_buf_exec_ctx(AVHWFramesContext *hwfc, VulkanExecCtx *cmd)
  714. {
  715. return cmd->bufs[cmd->cur_queue_idx];
  716. }
  717. static void unref_exec_ctx_deps(AVHWFramesContext *hwfc, VulkanExecCtx *cmd)
  718. {
  719. VulkanQueueCtx *q = &cmd->queues[cmd->cur_queue_idx];
  720. for (int j = 0; j < q->nb_buf_deps; j++)
  721. av_buffer_unref(&q->buf_deps[j]);
  722. q->nb_buf_deps = 0;
  723. }
  724. static int wait_start_exec_ctx(AVHWFramesContext *hwfc, VulkanExecCtx *cmd)
  725. {
  726. VkResult ret;
  727. AVVulkanDeviceContext *hwctx = hwfc->device_ctx->hwctx;
  728. VulkanQueueCtx *q = &cmd->queues[cmd->cur_queue_idx];
  729. VkCommandBufferBeginInfo cmd_start = {
  730. .sType = VK_STRUCTURE_TYPE_COMMAND_BUFFER_BEGIN_INFO,
  731. .flags = VK_COMMAND_BUFFER_USAGE_ONE_TIME_SUBMIT_BIT,
  732. };
  733. /* Create the fence and don't wait for it initially */
  734. if (!q->fence) {
  735. VkFenceCreateInfo fence_spawn = {
  736. .sType = VK_STRUCTURE_TYPE_FENCE_CREATE_INFO,
  737. };
  738. ret = vkCreateFence(hwctx->act_dev, &fence_spawn, hwctx->alloc,
  739. &q->fence);
  740. if (ret != VK_SUCCESS) {
  741. av_log(hwfc, AV_LOG_ERROR, "Failed to queue frame fence: %s\n",
  742. vk_ret2str(ret));
  743. return AVERROR_EXTERNAL;
  744. }
  745. } else if (!q->was_synchronous) {
  746. vkWaitForFences(hwctx->act_dev, 1, &q->fence, VK_TRUE, UINT64_MAX);
  747. vkResetFences(hwctx->act_dev, 1, &q->fence);
  748. }
  749. /* Discard queue dependencies */
  750. unref_exec_ctx_deps(hwfc, cmd);
  751. ret = vkBeginCommandBuffer(cmd->bufs[cmd->cur_queue_idx], &cmd_start);
  752. if (ret != VK_SUCCESS) {
  753. av_log(hwfc, AV_LOG_ERROR, "Unable to init command buffer: %s\n",
  754. vk_ret2str(ret));
  755. return AVERROR_EXTERNAL;
  756. }
  757. return 0;
  758. }
  759. static int add_buf_dep_exec_ctx(AVHWFramesContext *hwfc, VulkanExecCtx *cmd,
  760. AVBufferRef * const *deps, int nb_deps)
  761. {
  762. AVBufferRef **dst;
  763. VulkanQueueCtx *q = &cmd->queues[cmd->cur_queue_idx];
  764. if (!deps || !nb_deps)
  765. return 0;
  766. dst = av_fast_realloc(q->buf_deps, &q->buf_deps_alloc_size,
  767. (q->nb_buf_deps + nb_deps) * sizeof(*dst));
  768. if (!dst)
  769. goto err;
  770. q->buf_deps = dst;
  771. for (int i = 0; i < nb_deps; i++) {
  772. q->buf_deps[q->nb_buf_deps] = av_buffer_ref(deps[i]);
  773. if (!q->buf_deps[q->nb_buf_deps])
  774. goto err;
  775. q->nb_buf_deps++;
  776. }
  777. return 0;
  778. err:
  779. unref_exec_ctx_deps(hwfc, cmd);
  780. return AVERROR(ENOMEM);
  781. }
  782. static int submit_exec_ctx(AVHWFramesContext *hwfc, VulkanExecCtx *cmd,
  783. VkSubmitInfo *s_info, int synchronous)
  784. {
  785. VkResult ret;
  786. VulkanQueueCtx *q = &cmd->queues[cmd->cur_queue_idx];
  787. ret = vkEndCommandBuffer(cmd->bufs[cmd->cur_queue_idx]);
  788. if (ret != VK_SUCCESS) {
  789. av_log(hwfc, AV_LOG_ERROR, "Unable to finish command buffer: %s\n",
  790. vk_ret2str(ret));
  791. unref_exec_ctx_deps(hwfc, cmd);
  792. return AVERROR_EXTERNAL;
  793. }
  794. s_info->pCommandBuffers = &cmd->bufs[cmd->cur_queue_idx];
  795. s_info->commandBufferCount = 1;
  796. ret = vkQueueSubmit(q->queue, 1, s_info, q->fence);
  797. if (ret != VK_SUCCESS) {
  798. unref_exec_ctx_deps(hwfc, cmd);
  799. return AVERROR_EXTERNAL;
  800. }
  801. q->was_synchronous = synchronous;
  802. if (synchronous) {
  803. AVVulkanDeviceContext *hwctx = hwfc->device_ctx->hwctx;
  804. vkWaitForFences(hwctx->act_dev, 1, &q->fence, VK_TRUE, UINT64_MAX);
  805. vkResetFences(hwctx->act_dev, 1, &q->fence);
  806. unref_exec_ctx_deps(hwfc, cmd);
  807. } else { /* Rotate queues */
  808. cmd->cur_queue_idx = (cmd->cur_queue_idx + 1) % cmd->nb_queues;
  809. }
  810. return 0;
  811. }
  812. static void vulkan_device_free(AVHWDeviceContext *ctx)
  813. {
  814. VulkanDevicePriv *p = ctx->internal->priv;
  815. AVVulkanDeviceContext *hwctx = ctx->hwctx;
  816. vkDestroyDevice(hwctx->act_dev, hwctx->alloc);
  817. if (p->debug_ctx) {
  818. VK_LOAD_PFN(hwctx->inst, vkDestroyDebugUtilsMessengerEXT);
  819. pfn_vkDestroyDebugUtilsMessengerEXT(hwctx->inst, p->debug_ctx,
  820. hwctx->alloc);
  821. }
  822. vkDestroyInstance(hwctx->inst, hwctx->alloc);
  823. for (int i = 0; i < hwctx->nb_enabled_inst_extensions; i++)
  824. av_free((void *)hwctx->enabled_inst_extensions[i]);
  825. av_free((void *)hwctx->enabled_inst_extensions);
  826. for (int i = 0; i < hwctx->nb_enabled_dev_extensions; i++)
  827. av_free((void *)hwctx->enabled_dev_extensions[i]);
  828. av_free((void *)hwctx->enabled_dev_extensions);
  829. }
  830. static int vulkan_device_create_internal(AVHWDeviceContext *ctx,
  831. VulkanDeviceSelection *dev_select,
  832. AVDictionary *opts, int flags)
  833. {
  834. int err = 0;
  835. VkResult ret;
  836. AVDictionaryEntry *opt_d;
  837. VulkanDevicePriv *p = ctx->internal->priv;
  838. AVVulkanDeviceContext *hwctx = ctx->hwctx;
  839. VkPhysicalDeviceFeatures dev_features = { 0 };
  840. VkDeviceQueueCreateInfo queue_create_info[3] = {
  841. { .sType = VK_STRUCTURE_TYPE_DEVICE_QUEUE_CREATE_INFO, },
  842. { .sType = VK_STRUCTURE_TYPE_DEVICE_QUEUE_CREATE_INFO, },
  843. { .sType = VK_STRUCTURE_TYPE_DEVICE_QUEUE_CREATE_INFO, },
  844. };
  845. VkDeviceCreateInfo dev_info = {
  846. .sType = VK_STRUCTURE_TYPE_DEVICE_CREATE_INFO,
  847. .pNext = &hwctx->device_features,
  848. .pQueueCreateInfos = queue_create_info,
  849. .queueCreateInfoCount = 0,
  850. };
  851. hwctx->device_features.sType = VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_FEATURES_2;
  852. ctx->free = vulkan_device_free;
  853. /* Create an instance if not given one */
  854. if ((err = create_instance(ctx, opts)))
  855. goto end;
  856. /* Find a device (if not given one) */
  857. if ((err = find_device(ctx, dev_select)))
  858. goto end;
  859. vkGetPhysicalDeviceFeatures(hwctx->phys_dev, &dev_features);
  860. #define COPY_FEATURE(DST, NAME) (DST).features.NAME = dev_features.NAME;
  861. COPY_FEATURE(hwctx->device_features, shaderImageGatherExtended)
  862. COPY_FEATURE(hwctx->device_features, fragmentStoresAndAtomics)
  863. COPY_FEATURE(hwctx->device_features, vertexPipelineStoresAndAtomics)
  864. COPY_FEATURE(hwctx->device_features, shaderInt64)
  865. #undef COPY_FEATURE
  866. /* Search queue family */
  867. if ((err = search_queue_families(ctx, &dev_info)))
  868. goto end;
  869. if ((err = check_extensions(ctx, 1, opts, &dev_info.ppEnabledExtensionNames,
  870. &dev_info.enabledExtensionCount, 0))) {
  871. av_free((void *)queue_create_info[0].pQueuePriorities);
  872. av_free((void *)queue_create_info[1].pQueuePriorities);
  873. av_free((void *)queue_create_info[2].pQueuePriorities);
  874. goto end;
  875. }
  876. ret = vkCreateDevice(hwctx->phys_dev, &dev_info, hwctx->alloc,
  877. &hwctx->act_dev);
  878. av_free((void *)queue_create_info[0].pQueuePriorities);
  879. av_free((void *)queue_create_info[1].pQueuePriorities);
  880. av_free((void *)queue_create_info[2].pQueuePriorities);
  881. if (ret != VK_SUCCESS) {
  882. av_log(ctx, AV_LOG_ERROR, "Device creation failure: %s\n",
  883. vk_ret2str(ret));
  884. for (int i = 0; i < dev_info.enabledExtensionCount; i++)
  885. av_free((void *)dev_info.ppEnabledExtensionNames[i]);
  886. av_free((void *)dev_info.ppEnabledExtensionNames);
  887. err = AVERROR_EXTERNAL;
  888. goto end;
  889. }
  890. /* Tiled images setting, use them by default */
  891. opt_d = av_dict_get(opts, "linear_images", NULL, 0);
  892. if (opt_d)
  893. p->use_linear_images = strtol(opt_d->value, NULL, 10);
  894. hwctx->enabled_dev_extensions = dev_info.ppEnabledExtensionNames;
  895. hwctx->nb_enabled_dev_extensions = dev_info.enabledExtensionCount;
  896. end:
  897. return err;
  898. }
  899. static int vulkan_device_init(AVHWDeviceContext *ctx)
  900. {
  901. uint32_t queue_num;
  902. AVVulkanDeviceContext *hwctx = ctx->hwctx;
  903. VulkanDevicePriv *p = ctx->internal->priv;
  904. vkGetPhysicalDeviceProperties(hwctx->phys_dev, &p->props);
  905. av_log(ctx, AV_LOG_VERBOSE, "Using device: %s\n", p->props.deviceName);
  906. av_log(ctx, AV_LOG_VERBOSE, "Alignments:\n");
  907. av_log(ctx, AV_LOG_VERBOSE, " optimalBufferCopyOffsetAlignment: %li\n",
  908. p->props.limits.optimalBufferCopyOffsetAlignment);
  909. av_log(ctx, AV_LOG_VERBOSE, " optimalBufferCopyRowPitchAlignment: %li\n",
  910. p->props.limits.optimalBufferCopyRowPitchAlignment);
  911. av_log(ctx, AV_LOG_VERBOSE, " minMemoryMapAlignment: %li\n",
  912. p->props.limits.minMemoryMapAlignment);
  913. /* Set device extension flags */
  914. for (int i = 0; i < hwctx->nb_enabled_dev_extensions; i++) {
  915. for (int j = 0; j < FF_ARRAY_ELEMS(optional_device_exts); j++) {
  916. if (!strcmp(hwctx->enabled_dev_extensions[i],
  917. optional_device_exts[j].name)) {
  918. av_log(ctx, AV_LOG_VERBOSE, "Using device extension %s\n",
  919. hwctx->enabled_dev_extensions[i]);
  920. p->extensions |= optional_device_exts[j].flag;
  921. break;
  922. }
  923. }
  924. }
  925. p->dev_is_nvidia = (p->props.vendorID == 0x10de);
  926. vkGetPhysicalDeviceQueueFamilyProperties(hwctx->phys_dev, &queue_num, NULL);
  927. if (!queue_num) {
  928. av_log(ctx, AV_LOG_ERROR, "Failed to get queues!\n");
  929. return AVERROR_EXTERNAL;
  930. }
  931. #define CHECK_QUEUE(type, n) \
  932. if (n >= queue_num) { \
  933. av_log(ctx, AV_LOG_ERROR, "Invalid %s queue index %i (device has %i queues)!\n", \
  934. type, n, queue_num); \
  935. return AVERROR(EINVAL); \
  936. }
  937. CHECK_QUEUE("graphics", hwctx->queue_family_index)
  938. CHECK_QUEUE("upload", hwctx->queue_family_tx_index)
  939. CHECK_QUEUE("compute", hwctx->queue_family_comp_index)
  940. #undef CHECK_QUEUE
  941. p->qfs[p->num_qfs++] = hwctx->queue_family_index;
  942. if ((hwctx->queue_family_tx_index != hwctx->queue_family_index) &&
  943. (hwctx->queue_family_tx_index != hwctx->queue_family_comp_index))
  944. p->qfs[p->num_qfs++] = hwctx->queue_family_tx_index;
  945. if ((hwctx->queue_family_comp_index != hwctx->queue_family_index) &&
  946. (hwctx->queue_family_comp_index != hwctx->queue_family_tx_index))
  947. p->qfs[p->num_qfs++] = hwctx->queue_family_comp_index;
  948. /* Get device capabilities */
  949. vkGetPhysicalDeviceMemoryProperties(hwctx->phys_dev, &p->mprops);
  950. return 0;
  951. }
  952. static int vulkan_device_create(AVHWDeviceContext *ctx, const char *device,
  953. AVDictionary *opts, int flags)
  954. {
  955. VulkanDeviceSelection dev_select = { 0 };
  956. if (device && device[0]) {
  957. char *end = NULL;
  958. dev_select.index = strtol(device, &end, 10);
  959. if (end == device) {
  960. dev_select.index = 0;
  961. dev_select.name = device;
  962. }
  963. }
  964. return vulkan_device_create_internal(ctx, &dev_select, opts, flags);
  965. }
  966. static int vulkan_device_derive(AVHWDeviceContext *ctx,
  967. AVHWDeviceContext *src_ctx,
  968. AVDictionary *opts, int flags)
  969. {
  970. av_unused VulkanDeviceSelection dev_select = { 0 };
  971. /* If there's only one device on the system, then even if its not covered
  972. * by the following checks (e.g. non-PCIe ARM GPU), having an empty
  973. * dev_select will mean it'll get picked. */
  974. switch(src_ctx->type) {
  975. #if CONFIG_LIBDRM
  976. #if CONFIG_VAAPI
  977. case AV_HWDEVICE_TYPE_VAAPI: {
  978. AVVAAPIDeviceContext *src_hwctx = src_ctx->hwctx;
  979. const char *vendor = vaQueryVendorString(src_hwctx->display);
  980. if (!vendor) {
  981. av_log(ctx, AV_LOG_ERROR, "Unable to get device info from VAAPI!\n");
  982. return AVERROR_EXTERNAL;
  983. }
  984. if (strstr(vendor, "Intel"))
  985. dev_select.vendor_id = 0x8086;
  986. if (strstr(vendor, "AMD"))
  987. dev_select.vendor_id = 0x1002;
  988. return vulkan_device_create_internal(ctx, &dev_select, opts, flags);
  989. }
  990. #endif
  991. case AV_HWDEVICE_TYPE_DRM: {
  992. AVDRMDeviceContext *src_hwctx = src_ctx->hwctx;
  993. drmDevice *drm_dev_info;
  994. int err = drmGetDevice(src_hwctx->fd, &drm_dev_info);
  995. if (err) {
  996. av_log(ctx, AV_LOG_ERROR, "Unable to get device info from DRM fd!\n");
  997. return AVERROR_EXTERNAL;
  998. }
  999. if (drm_dev_info->bustype == DRM_BUS_PCI)
  1000. dev_select.pci_device = drm_dev_info->deviceinfo.pci->device_id;
  1001. drmFreeDevice(&drm_dev_info);
  1002. return vulkan_device_create_internal(ctx, &dev_select, opts, flags);
  1003. }
  1004. #endif
  1005. #if CONFIG_CUDA
  1006. case AV_HWDEVICE_TYPE_CUDA: {
  1007. AVHWDeviceContext *cuda_cu = src_ctx;
  1008. AVCUDADeviceContext *src_hwctx = src_ctx->hwctx;
  1009. AVCUDADeviceContextInternal *cu_internal = src_hwctx->internal;
  1010. CudaFunctions *cu = cu_internal->cuda_dl;
  1011. int ret = CHECK_CU(cu->cuDeviceGetUuid((CUuuid *)&dev_select.uuid,
  1012. cu_internal->cuda_device));
  1013. if (ret < 0) {
  1014. av_log(ctx, AV_LOG_ERROR, "Unable to get UUID from CUDA!\n");
  1015. return AVERROR_EXTERNAL;
  1016. }
  1017. dev_select.has_uuid = 1;
  1018. return vulkan_device_create_internal(ctx, &dev_select, opts, flags);
  1019. }
  1020. #endif
  1021. default:
  1022. return AVERROR(ENOSYS);
  1023. }
  1024. }
  1025. static int vulkan_frames_get_constraints(AVHWDeviceContext *ctx,
  1026. const void *hwconfig,
  1027. AVHWFramesConstraints *constraints)
  1028. {
  1029. int count = 0;
  1030. AVVulkanDeviceContext *hwctx = ctx->hwctx;
  1031. VulkanDevicePriv *p = ctx->internal->priv;
  1032. for (enum AVPixelFormat i = 0; i < AV_PIX_FMT_NB; i++)
  1033. count += pixfmt_is_supported(hwctx, i, p->use_linear_images);
  1034. #if CONFIG_CUDA
  1035. if (p->dev_is_nvidia)
  1036. count++;
  1037. #endif
  1038. constraints->valid_sw_formats = av_malloc_array(count + 1,
  1039. sizeof(enum AVPixelFormat));
  1040. if (!constraints->valid_sw_formats)
  1041. return AVERROR(ENOMEM);
  1042. count = 0;
  1043. for (enum AVPixelFormat i = 0; i < AV_PIX_FMT_NB; i++)
  1044. if (pixfmt_is_supported(hwctx, i, p->use_linear_images))
  1045. constraints->valid_sw_formats[count++] = i;
  1046. #if CONFIG_CUDA
  1047. if (p->dev_is_nvidia)
  1048. constraints->valid_sw_formats[count++] = AV_PIX_FMT_CUDA;
  1049. #endif
  1050. constraints->valid_sw_formats[count++] = AV_PIX_FMT_NONE;
  1051. constraints->min_width = 0;
  1052. constraints->min_height = 0;
  1053. constraints->max_width = p->props.limits.maxImageDimension2D;
  1054. constraints->max_height = p->props.limits.maxImageDimension2D;
  1055. constraints->valid_hw_formats = av_malloc_array(2, sizeof(enum AVPixelFormat));
  1056. if (!constraints->valid_hw_formats)
  1057. return AVERROR(ENOMEM);
  1058. constraints->valid_hw_formats[0] = AV_PIX_FMT_VULKAN;
  1059. constraints->valid_hw_formats[1] = AV_PIX_FMT_NONE;
  1060. return 0;
  1061. }
  1062. static int alloc_mem(AVHWDeviceContext *ctx, VkMemoryRequirements *req,
  1063. VkMemoryPropertyFlagBits req_flags, void *alloc_extension,
  1064. VkMemoryPropertyFlagBits *mem_flags, VkDeviceMemory *mem)
  1065. {
  1066. VkResult ret;
  1067. int index = -1;
  1068. VulkanDevicePriv *p = ctx->internal->priv;
  1069. AVVulkanDeviceContext *dev_hwctx = ctx->hwctx;
  1070. VkMemoryAllocateInfo alloc_info = {
  1071. .sType = VK_STRUCTURE_TYPE_MEMORY_ALLOCATE_INFO,
  1072. .pNext = alloc_extension,
  1073. };
  1074. /* Align if we need to */
  1075. if (req_flags & VK_MEMORY_PROPERTY_HOST_VISIBLE_BIT)
  1076. req->size = FFALIGN(req->size, p->props.limits.minMemoryMapAlignment);
  1077. alloc_info.allocationSize = req->size;
  1078. /* The vulkan spec requires memory types to be sorted in the "optimal"
  1079. * order, so the first matching type we find will be the best/fastest one */
  1080. for (int i = 0; i < p->mprops.memoryTypeCount; i++) {
  1081. /* The memory type must be supported by the requirements (bitfield) */
  1082. if (!(req->memoryTypeBits & (1 << i)))
  1083. continue;
  1084. /* The memory type flags must include our properties */
  1085. if ((p->mprops.memoryTypes[i].propertyFlags & req_flags) != req_flags)
  1086. continue;
  1087. /* Found a suitable memory type */
  1088. index = i;
  1089. break;
  1090. }
  1091. if (index < 0) {
  1092. av_log(ctx, AV_LOG_ERROR, "No memory type found for flags 0x%x\n",
  1093. req_flags);
  1094. return AVERROR(EINVAL);
  1095. }
  1096. alloc_info.memoryTypeIndex = index;
  1097. ret = vkAllocateMemory(dev_hwctx->act_dev, &alloc_info,
  1098. dev_hwctx->alloc, mem);
  1099. if (ret != VK_SUCCESS) {
  1100. av_log(ctx, AV_LOG_ERROR, "Failed to allocate memory: %s\n",
  1101. vk_ret2str(ret));
  1102. return AVERROR(ENOMEM);
  1103. }
  1104. *mem_flags |= p->mprops.memoryTypes[index].propertyFlags;
  1105. return 0;
  1106. }
  1107. static void vulkan_free_internal(AVVkFrameInternal *internal)
  1108. {
  1109. if (!internal)
  1110. return;
  1111. #if CONFIG_CUDA
  1112. if (internal->cuda_fc_ref) {
  1113. AVHWFramesContext *cuda_fc = (AVHWFramesContext *)internal->cuda_fc_ref->data;
  1114. int planes = av_pix_fmt_count_planes(cuda_fc->sw_format);
  1115. AVHWDeviceContext *cuda_cu = cuda_fc->device_ctx;
  1116. AVCUDADeviceContext *cuda_dev = cuda_cu->hwctx;
  1117. AVCUDADeviceContextInternal *cu_internal = cuda_dev->internal;
  1118. CudaFunctions *cu = cu_internal->cuda_dl;
  1119. for (int i = 0; i < planes; i++) {
  1120. if (internal->cu_sem[i])
  1121. CHECK_CU(cu->cuDestroyExternalSemaphore(internal->cu_sem[i]));
  1122. if (internal->cu_mma[i])
  1123. CHECK_CU(cu->cuMipmappedArrayDestroy(internal->cu_mma[i]));
  1124. if (internal->ext_mem[i])
  1125. CHECK_CU(cu->cuDestroyExternalMemory(internal->ext_mem[i]));
  1126. }
  1127. av_buffer_unref(&internal->cuda_fc_ref);
  1128. }
  1129. #endif
  1130. av_free(internal);
  1131. }
  1132. static void vulkan_frame_free(void *opaque, uint8_t *data)
  1133. {
  1134. AVVkFrame *f = (AVVkFrame *)data;
  1135. AVHWFramesContext *hwfc = opaque;
  1136. AVVulkanDeviceContext *hwctx = hwfc->device_ctx->hwctx;
  1137. int planes = av_pix_fmt_count_planes(hwfc->sw_format);
  1138. vulkan_free_internal(f->internal);
  1139. for (int i = 0; i < planes; i++) {
  1140. vkDestroyImage(hwctx->act_dev, f->img[i], hwctx->alloc);
  1141. vkFreeMemory(hwctx->act_dev, f->mem[i], hwctx->alloc);
  1142. vkDestroySemaphore(hwctx->act_dev, f->sem[i], hwctx->alloc);
  1143. }
  1144. av_free(f);
  1145. }
  1146. static int alloc_bind_mem(AVHWFramesContext *hwfc, AVVkFrame *f,
  1147. void *alloc_pnext, size_t alloc_pnext_stride)
  1148. {
  1149. int err;
  1150. VkResult ret;
  1151. AVHWDeviceContext *ctx = hwfc->device_ctx;
  1152. const int planes = av_pix_fmt_count_planes(hwfc->sw_format);
  1153. VkBindImageMemoryInfo bind_info[AV_NUM_DATA_POINTERS] = { { 0 } };
  1154. AVVulkanDeviceContext *hwctx = ctx->hwctx;
  1155. for (int i = 0; i < planes; i++) {
  1156. int use_ded_mem;
  1157. VkImageMemoryRequirementsInfo2 req_desc = {
  1158. .sType = VK_STRUCTURE_TYPE_IMAGE_MEMORY_REQUIREMENTS_INFO_2,
  1159. .image = f->img[i],
  1160. };
  1161. VkMemoryDedicatedAllocateInfo ded_alloc = {
  1162. .sType = VK_STRUCTURE_TYPE_MEMORY_DEDICATED_ALLOCATE_INFO,
  1163. .pNext = (void *)(((uint8_t *)alloc_pnext) + i*alloc_pnext_stride),
  1164. };
  1165. VkMemoryDedicatedRequirements ded_req = {
  1166. .sType = VK_STRUCTURE_TYPE_MEMORY_DEDICATED_REQUIREMENTS,
  1167. };
  1168. VkMemoryRequirements2 req = {
  1169. .sType = VK_STRUCTURE_TYPE_MEMORY_REQUIREMENTS_2,
  1170. .pNext = &ded_req,
  1171. };
  1172. vkGetImageMemoryRequirements2(hwctx->act_dev, &req_desc, &req);
  1173. /* In case the implementation prefers/requires dedicated allocation */
  1174. use_ded_mem = ded_req.prefersDedicatedAllocation |
  1175. ded_req.requiresDedicatedAllocation;
  1176. if (use_ded_mem)
  1177. ded_alloc.image = f->img[i];
  1178. /* Allocate memory */
  1179. if ((err = alloc_mem(ctx, &req.memoryRequirements,
  1180. f->tiling == VK_IMAGE_TILING_LINEAR ?
  1181. VK_MEMORY_PROPERTY_HOST_VISIBLE_BIT :
  1182. VK_MEMORY_PROPERTY_DEVICE_LOCAL_BIT,
  1183. use_ded_mem ? &ded_alloc : (void *)ded_alloc.pNext,
  1184. &f->flags, &f->mem[i])))
  1185. return err;
  1186. f->size[i] = req.memoryRequirements.size;
  1187. bind_info[i].sType = VK_STRUCTURE_TYPE_BIND_IMAGE_MEMORY_INFO;
  1188. bind_info[i].image = f->img[i];
  1189. bind_info[i].memory = f->mem[i];
  1190. }
  1191. /* Bind the allocated memory to the images */
  1192. ret = vkBindImageMemory2(hwctx->act_dev, planes, bind_info);
  1193. if (ret != VK_SUCCESS) {
  1194. av_log(ctx, AV_LOG_ERROR, "Failed to bind memory: %s\n",
  1195. vk_ret2str(ret));
  1196. return AVERROR_EXTERNAL;
  1197. }
  1198. return 0;
  1199. }
  1200. enum PrepMode {
  1201. PREP_MODE_WRITE,
  1202. PREP_MODE_RO_SHADER,
  1203. PREP_MODE_EXTERNAL_EXPORT,
  1204. };
  1205. static int prepare_frame(AVHWFramesContext *hwfc, VulkanExecCtx *ectx,
  1206. AVVkFrame *frame, enum PrepMode pmode)
  1207. {
  1208. int err;
  1209. uint32_t dst_qf;
  1210. VkImageLayout new_layout;
  1211. VkAccessFlags new_access;
  1212. const int planes = av_pix_fmt_count_planes(hwfc->sw_format);
  1213. VkImageMemoryBarrier img_bar[AV_NUM_DATA_POINTERS] = { 0 };
  1214. VkSubmitInfo s_info = {
  1215. .sType = VK_STRUCTURE_TYPE_SUBMIT_INFO,
  1216. .pSignalSemaphores = frame->sem,
  1217. .signalSemaphoreCount = planes,
  1218. };
  1219. VkPipelineStageFlagBits wait_st[AV_NUM_DATA_POINTERS];
  1220. for (int i = 0; i < planes; i++)
  1221. wait_st[i] = VK_PIPELINE_STAGE_TOP_OF_PIPE_BIT;
  1222. switch (pmode) {
  1223. case PREP_MODE_WRITE:
  1224. new_layout = VK_IMAGE_LAYOUT_TRANSFER_DST_OPTIMAL;
  1225. new_access = VK_ACCESS_TRANSFER_WRITE_BIT;
  1226. dst_qf = VK_QUEUE_FAMILY_IGNORED;
  1227. break;
  1228. case PREP_MODE_RO_SHADER:
  1229. new_layout = VK_IMAGE_LAYOUT_TRANSFER_SRC_OPTIMAL;
  1230. new_access = VK_ACCESS_TRANSFER_READ_BIT;
  1231. dst_qf = VK_QUEUE_FAMILY_IGNORED;
  1232. break;
  1233. case PREP_MODE_EXTERNAL_EXPORT:
  1234. new_layout = VK_IMAGE_LAYOUT_GENERAL;
  1235. new_access = VK_ACCESS_MEMORY_READ_BIT | VK_ACCESS_MEMORY_WRITE_BIT;
  1236. dst_qf = VK_QUEUE_FAMILY_EXTERNAL_KHR;
  1237. s_info.pWaitSemaphores = frame->sem;
  1238. s_info.pWaitDstStageMask = wait_st;
  1239. s_info.waitSemaphoreCount = planes;
  1240. break;
  1241. }
  1242. if ((err = wait_start_exec_ctx(hwfc, ectx)))
  1243. return err;
  1244. /* Change the image layout to something more optimal for writes.
  1245. * This also signals the newly created semaphore, making it usable
  1246. * for synchronization */
  1247. for (int i = 0; i < planes; i++) {
  1248. img_bar[i].sType = VK_STRUCTURE_TYPE_IMAGE_MEMORY_BARRIER;
  1249. img_bar[i].srcAccessMask = 0x0;
  1250. img_bar[i].dstAccessMask = new_access;
  1251. img_bar[i].oldLayout = frame->layout[i];
  1252. img_bar[i].newLayout = new_layout;
  1253. img_bar[i].srcQueueFamilyIndex = VK_QUEUE_FAMILY_IGNORED;
  1254. img_bar[i].dstQueueFamilyIndex = dst_qf;
  1255. img_bar[i].image = frame->img[i];
  1256. img_bar[i].subresourceRange.levelCount = 1;
  1257. img_bar[i].subresourceRange.layerCount = 1;
  1258. img_bar[i].subresourceRange.aspectMask = VK_IMAGE_ASPECT_COLOR_BIT;
  1259. frame->layout[i] = img_bar[i].newLayout;
  1260. frame->access[i] = img_bar[i].dstAccessMask;
  1261. }
  1262. vkCmdPipelineBarrier(get_buf_exec_ctx(hwfc, ectx),
  1263. VK_PIPELINE_STAGE_TOP_OF_PIPE_BIT,
  1264. VK_PIPELINE_STAGE_TRANSFER_BIT,
  1265. 0, 0, NULL, 0, NULL, planes, img_bar);
  1266. return submit_exec_ctx(hwfc, ectx, &s_info, 0);
  1267. }
  1268. static int create_frame(AVHWFramesContext *hwfc, AVVkFrame **frame,
  1269. VkImageTiling tiling, VkImageUsageFlagBits usage,
  1270. void *create_pnext)
  1271. {
  1272. int err;
  1273. VkResult ret;
  1274. AVHWDeviceContext *ctx = hwfc->device_ctx;
  1275. VulkanDevicePriv *p = ctx->internal->priv;
  1276. AVVulkanDeviceContext *hwctx = ctx->hwctx;
  1277. enum AVPixelFormat format = hwfc->sw_format;
  1278. const VkFormat *img_fmts = av_vkfmt_from_pixfmt(format);
  1279. const int planes = av_pix_fmt_count_planes(format);
  1280. VkExportSemaphoreCreateInfo ext_sem_info = {
  1281. .sType = VK_STRUCTURE_TYPE_EXPORT_SEMAPHORE_CREATE_INFO,
  1282. .handleTypes = VK_EXTERNAL_SEMAPHORE_HANDLE_TYPE_OPAQUE_FD_BIT,
  1283. };
  1284. VkSemaphoreCreateInfo sem_spawn = {
  1285. .sType = VK_STRUCTURE_TYPE_SEMAPHORE_CREATE_INFO,
  1286. .pNext = p->extensions & EXT_EXTERNAL_FD_SEM ? &ext_sem_info : NULL,
  1287. };
  1288. AVVkFrame *f = av_vk_frame_alloc();
  1289. if (!f) {
  1290. av_log(ctx, AV_LOG_ERROR, "Unable to allocate memory for AVVkFrame!\n");
  1291. return AVERROR(ENOMEM);
  1292. }
  1293. /* Create the images */
  1294. for (int i = 0; i < planes; i++) {
  1295. const AVPixFmtDescriptor *desc = av_pix_fmt_desc_get(format);
  1296. int w = hwfc->width;
  1297. int h = hwfc->height;
  1298. const int p_w = i > 0 ? AV_CEIL_RSHIFT(w, desc->log2_chroma_w) : w;
  1299. const int p_h = i > 0 ? AV_CEIL_RSHIFT(h, desc->log2_chroma_h) : h;
  1300. VkImageCreateInfo image_create_info = {
  1301. .sType = VK_STRUCTURE_TYPE_IMAGE_CREATE_INFO,
  1302. .pNext = create_pnext,
  1303. .imageType = VK_IMAGE_TYPE_2D,
  1304. .format = img_fmts[i],
  1305. .extent.width = p_w,
  1306. .extent.height = p_h,
  1307. .extent.depth = 1,
  1308. .mipLevels = 1,
  1309. .arrayLayers = 1,
  1310. .flags = VK_IMAGE_CREATE_ALIAS_BIT,
  1311. .tiling = tiling,
  1312. .initialLayout = VK_IMAGE_LAYOUT_UNDEFINED,
  1313. .usage = usage,
  1314. .samples = VK_SAMPLE_COUNT_1_BIT,
  1315. .pQueueFamilyIndices = p->qfs,
  1316. .queueFamilyIndexCount = p->num_qfs,
  1317. .sharingMode = p->num_qfs > 1 ? VK_SHARING_MODE_CONCURRENT :
  1318. VK_SHARING_MODE_EXCLUSIVE,
  1319. };
  1320. ret = vkCreateImage(hwctx->act_dev, &image_create_info,
  1321. hwctx->alloc, &f->img[i]);
  1322. if (ret != VK_SUCCESS) {
  1323. av_log(ctx, AV_LOG_ERROR, "Image creation failure: %s\n",
  1324. vk_ret2str(ret));
  1325. err = AVERROR(EINVAL);
  1326. goto fail;
  1327. }
  1328. /* Create semaphore */
  1329. ret = vkCreateSemaphore(hwctx->act_dev, &sem_spawn,
  1330. hwctx->alloc, &f->sem[i]);
  1331. if (ret != VK_SUCCESS) {
  1332. av_log(hwctx, AV_LOG_ERROR, "Failed to create semaphore: %s\n",
  1333. vk_ret2str(ret));
  1334. return AVERROR_EXTERNAL;
  1335. }
  1336. f->layout[i] = image_create_info.initialLayout;
  1337. f->access[i] = 0x0;
  1338. }
  1339. f->flags = 0x0;
  1340. f->tiling = tiling;
  1341. *frame = f;
  1342. return 0;
  1343. fail:
  1344. vulkan_frame_free(hwfc, (uint8_t *)f);
  1345. return err;
  1346. }
  1347. /* Checks if an export flag is enabled, and if it is ORs it with *iexp */
  1348. static void try_export_flags(AVHWFramesContext *hwfc,
  1349. VkExternalMemoryHandleTypeFlags *comp_handle_types,
  1350. VkExternalMemoryHandleTypeFlagBits *iexp,
  1351. VkExternalMemoryHandleTypeFlagBits exp)
  1352. {
  1353. VkResult ret;
  1354. AVVulkanFramesContext *hwctx = hwfc->hwctx;
  1355. AVVulkanDeviceContext *dev_hwctx = hwfc->device_ctx->hwctx;
  1356. VkExternalImageFormatProperties eprops = {
  1357. .sType = VK_STRUCTURE_TYPE_EXTERNAL_IMAGE_FORMAT_PROPERTIES_KHR,
  1358. };
  1359. VkImageFormatProperties2 props = {
  1360. .sType = VK_STRUCTURE_TYPE_IMAGE_FORMAT_PROPERTIES_2,
  1361. .pNext = &eprops,
  1362. };
  1363. VkPhysicalDeviceExternalImageFormatInfo enext = {
  1364. .sType = VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_EXTERNAL_IMAGE_FORMAT_INFO,
  1365. .handleType = exp,
  1366. };
  1367. VkPhysicalDeviceImageFormatInfo2 pinfo = {
  1368. .sType = VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_IMAGE_FORMAT_INFO_2,
  1369. .pNext = !exp ? NULL : &enext,
  1370. .format = av_vkfmt_from_pixfmt(hwfc->sw_format)[0],
  1371. .type = VK_IMAGE_TYPE_2D,
  1372. .tiling = hwctx->tiling,
  1373. .usage = hwctx->usage,
  1374. .flags = VK_IMAGE_CREATE_ALIAS_BIT,
  1375. };
  1376. ret = vkGetPhysicalDeviceImageFormatProperties2(dev_hwctx->phys_dev,
  1377. &pinfo, &props);
  1378. if (ret == VK_SUCCESS) {
  1379. *iexp |= exp;
  1380. *comp_handle_types |= eprops.externalMemoryProperties.compatibleHandleTypes;
  1381. }
  1382. }
  1383. static AVBufferRef *vulkan_pool_alloc(void *opaque, int size)
  1384. {
  1385. int err;
  1386. AVVkFrame *f;
  1387. AVBufferRef *avbuf = NULL;
  1388. AVHWFramesContext *hwfc = opaque;
  1389. AVVulkanFramesContext *hwctx = hwfc->hwctx;
  1390. VulkanDevicePriv *p = hwfc->device_ctx->internal->priv;
  1391. VulkanFramesPriv *fp = hwfc->internal->priv;
  1392. VkExportMemoryAllocateInfo eminfo[AV_NUM_DATA_POINTERS];
  1393. VkExternalMemoryHandleTypeFlags e = 0x0;
  1394. VkExternalMemoryImageCreateInfo eiinfo = {
  1395. .sType = VK_STRUCTURE_TYPE_EXTERNAL_MEMORY_IMAGE_CREATE_INFO,
  1396. .pNext = hwctx->create_pnext,
  1397. };
  1398. if (p->extensions & EXT_EXTERNAL_FD_MEMORY)
  1399. try_export_flags(hwfc, &eiinfo.handleTypes, &e,
  1400. VK_EXTERNAL_MEMORY_HANDLE_TYPE_OPAQUE_FD_BIT);
  1401. if (p->extensions & EXT_EXTERNAL_DMABUF_MEMORY)
  1402. try_export_flags(hwfc, &eiinfo.handleTypes, &e,
  1403. VK_EXTERNAL_MEMORY_HANDLE_TYPE_DMA_BUF_BIT_EXT);
  1404. for (int i = 0; i < av_pix_fmt_count_planes(hwfc->sw_format); i++) {
  1405. eminfo[i].sType = VK_STRUCTURE_TYPE_EXPORT_MEMORY_ALLOCATE_INFO;
  1406. eminfo[i].pNext = hwctx->alloc_pnext[i];
  1407. eminfo[i].handleTypes = e;
  1408. }
  1409. err = create_frame(hwfc, &f, hwctx->tiling, hwctx->usage,
  1410. eiinfo.handleTypes ? &eiinfo : NULL);
  1411. if (err)
  1412. return NULL;
  1413. err = alloc_bind_mem(hwfc, f, eminfo, sizeof(*eminfo));
  1414. if (err)
  1415. goto fail;
  1416. err = prepare_frame(hwfc, &fp->conv_ctx, f, PREP_MODE_WRITE);
  1417. if (err)
  1418. goto fail;
  1419. avbuf = av_buffer_create((uint8_t *)f, sizeof(AVVkFrame),
  1420. vulkan_frame_free, hwfc, 0);
  1421. if (!avbuf)
  1422. goto fail;
  1423. return avbuf;
  1424. fail:
  1425. vulkan_frame_free(hwfc, (uint8_t *)f);
  1426. return NULL;
  1427. }
  1428. static void vulkan_frames_uninit(AVHWFramesContext *hwfc)
  1429. {
  1430. VulkanFramesPriv *fp = hwfc->internal->priv;
  1431. free_exec_ctx(hwfc, &fp->conv_ctx);
  1432. free_exec_ctx(hwfc, &fp->upload_ctx);
  1433. free_exec_ctx(hwfc, &fp->download_ctx);
  1434. }
  1435. static int vulkan_frames_init(AVHWFramesContext *hwfc)
  1436. {
  1437. int err;
  1438. AVVkFrame *f;
  1439. AVVulkanFramesContext *hwctx = hwfc->hwctx;
  1440. VulkanFramesPriv *fp = hwfc->internal->priv;
  1441. AVVulkanDeviceContext *dev_hwctx = hwfc->device_ctx->hwctx;
  1442. VulkanDevicePriv *p = hwfc->device_ctx->internal->priv;
  1443. /* Default pool flags */
  1444. hwctx->tiling = hwctx->tiling ? hwctx->tiling : p->use_linear_images ?
  1445. VK_IMAGE_TILING_LINEAR : VK_IMAGE_TILING_OPTIMAL;
  1446. if (!hwctx->usage)
  1447. hwctx->usage = DEFAULT_USAGE_FLAGS;
  1448. err = create_exec_ctx(hwfc, &fp->conv_ctx,
  1449. dev_hwctx->queue_family_comp_index,
  1450. GET_QUEUE_COUNT(dev_hwctx, 0, 1, 0));
  1451. if (err)
  1452. goto fail;
  1453. err = create_exec_ctx(hwfc, &fp->upload_ctx,
  1454. dev_hwctx->queue_family_tx_index,
  1455. GET_QUEUE_COUNT(dev_hwctx, 0, 0, 1));
  1456. if (err)
  1457. goto fail;
  1458. err = create_exec_ctx(hwfc, &fp->download_ctx,
  1459. dev_hwctx->queue_family_tx_index, 1);
  1460. if (err)
  1461. goto fail;
  1462. /* Test to see if allocation will fail */
  1463. err = create_frame(hwfc, &f, hwctx->tiling, hwctx->usage,
  1464. hwctx->create_pnext);
  1465. if (err)
  1466. goto fail;
  1467. vulkan_frame_free(hwfc, (uint8_t *)f);
  1468. /* If user did not specify a pool, hwfc->pool will be set to the internal one
  1469. * in hwcontext.c just after this gets called */
  1470. if (!hwfc->pool) {
  1471. hwfc->internal->pool_internal = av_buffer_pool_init2(sizeof(AVVkFrame),
  1472. hwfc, vulkan_pool_alloc,
  1473. NULL);
  1474. if (!hwfc->internal->pool_internal) {
  1475. err = AVERROR(ENOMEM);
  1476. goto fail;
  1477. }
  1478. }
  1479. return 0;
  1480. fail:
  1481. free_exec_ctx(hwfc, &fp->conv_ctx);
  1482. free_exec_ctx(hwfc, &fp->upload_ctx);
  1483. free_exec_ctx(hwfc, &fp->download_ctx);
  1484. return err;
  1485. }
  1486. static int vulkan_get_buffer(AVHWFramesContext *hwfc, AVFrame *frame)
  1487. {
  1488. frame->buf[0] = av_buffer_pool_get(hwfc->pool);
  1489. if (!frame->buf[0])
  1490. return AVERROR(ENOMEM);
  1491. frame->data[0] = frame->buf[0]->data;
  1492. frame->format = AV_PIX_FMT_VULKAN;
  1493. frame->width = hwfc->width;
  1494. frame->height = hwfc->height;
  1495. return 0;
  1496. }
  1497. static int vulkan_transfer_get_formats(AVHWFramesContext *hwfc,
  1498. enum AVHWFrameTransferDirection dir,
  1499. enum AVPixelFormat **formats)
  1500. {
  1501. enum AVPixelFormat *fmts = av_malloc_array(2, sizeof(*fmts));
  1502. if (!fmts)
  1503. return AVERROR(ENOMEM);
  1504. fmts[0] = hwfc->sw_format;
  1505. fmts[1] = AV_PIX_FMT_NONE;
  1506. *formats = fmts;
  1507. return 0;
  1508. }
  1509. typedef struct VulkanMapping {
  1510. AVVkFrame *frame;
  1511. int flags;
  1512. } VulkanMapping;
  1513. static void vulkan_unmap_frame(AVHWFramesContext *hwfc, HWMapDescriptor *hwmap)
  1514. {
  1515. VulkanMapping *map = hwmap->priv;
  1516. AVVulkanDeviceContext *hwctx = hwfc->device_ctx->hwctx;
  1517. const int planes = av_pix_fmt_count_planes(hwfc->sw_format);
  1518. /* Check if buffer needs flushing */
  1519. if ((map->flags & AV_HWFRAME_MAP_WRITE) &&
  1520. !(map->frame->flags & VK_MEMORY_PROPERTY_HOST_COHERENT_BIT)) {
  1521. VkResult ret;
  1522. VkMappedMemoryRange flush_ranges[AV_NUM_DATA_POINTERS] = { { 0 } };
  1523. for (int i = 0; i < planes; i++) {
  1524. flush_ranges[i].sType = VK_STRUCTURE_TYPE_MAPPED_MEMORY_RANGE;
  1525. flush_ranges[i].memory = map->frame->mem[i];
  1526. flush_ranges[i].size = VK_WHOLE_SIZE;
  1527. }
  1528. ret = vkFlushMappedMemoryRanges(hwctx->act_dev, planes,
  1529. flush_ranges);
  1530. if (ret != VK_SUCCESS) {
  1531. av_log(hwfc, AV_LOG_ERROR, "Failed to flush memory: %s\n",
  1532. vk_ret2str(ret));
  1533. }
  1534. }
  1535. for (int i = 0; i < planes; i++)
  1536. vkUnmapMemory(hwctx->act_dev, map->frame->mem[i]);
  1537. av_free(map);
  1538. }
  1539. static int vulkan_map_frame_to_mem(AVHWFramesContext *hwfc, AVFrame *dst,
  1540. const AVFrame *src, int flags)
  1541. {
  1542. VkResult ret;
  1543. int err, mapped_mem_count = 0;
  1544. AVVkFrame *f = (AVVkFrame *)src->data[0];
  1545. AVVulkanDeviceContext *hwctx = hwfc->device_ctx->hwctx;
  1546. const int planes = av_pix_fmt_count_planes(hwfc->sw_format);
  1547. VulkanMapping *map = av_mallocz(sizeof(VulkanMapping));
  1548. if (!map)
  1549. return AVERROR(EINVAL);
  1550. if (src->format != AV_PIX_FMT_VULKAN) {
  1551. av_log(hwfc, AV_LOG_ERROR, "Cannot map from pixel format %s!\n",
  1552. av_get_pix_fmt_name(src->format));
  1553. err = AVERROR(EINVAL);
  1554. goto fail;
  1555. }
  1556. if (!(f->flags & VK_MEMORY_PROPERTY_HOST_VISIBLE_BIT) ||
  1557. !(f->tiling == VK_IMAGE_TILING_LINEAR)) {
  1558. av_log(hwfc, AV_LOG_ERROR, "Unable to map frame, not host visible "
  1559. "and linear!\n");
  1560. err = AVERROR(EINVAL);
  1561. goto fail;
  1562. }
  1563. dst->width = src->width;
  1564. dst->height = src->height;
  1565. for (int i = 0; i < planes; i++) {
  1566. ret = vkMapMemory(hwctx->act_dev, f->mem[i], 0,
  1567. VK_WHOLE_SIZE, 0, (void **)&dst->data[i]);
  1568. if (ret != VK_SUCCESS) {
  1569. av_log(hwfc, AV_LOG_ERROR, "Failed to map image memory: %s\n",
  1570. vk_ret2str(ret));
  1571. err = AVERROR_EXTERNAL;
  1572. goto fail;
  1573. }
  1574. mapped_mem_count++;
  1575. }
  1576. /* Check if the memory contents matter */
  1577. if (((flags & AV_HWFRAME_MAP_READ) || !(flags & AV_HWFRAME_MAP_OVERWRITE)) &&
  1578. !(f->flags & VK_MEMORY_PROPERTY_HOST_COHERENT_BIT)) {
  1579. VkMappedMemoryRange map_mem_ranges[AV_NUM_DATA_POINTERS] = { { 0 } };
  1580. for (int i = 0; i < planes; i++) {
  1581. map_mem_ranges[i].sType = VK_STRUCTURE_TYPE_MAPPED_MEMORY_RANGE;
  1582. map_mem_ranges[i].size = VK_WHOLE_SIZE;
  1583. map_mem_ranges[i].memory = f->mem[i];
  1584. }
  1585. ret = vkInvalidateMappedMemoryRanges(hwctx->act_dev, planes,
  1586. map_mem_ranges);
  1587. if (ret != VK_SUCCESS) {
  1588. av_log(hwfc, AV_LOG_ERROR, "Failed to invalidate memory: %s\n",
  1589. vk_ret2str(ret));
  1590. err = AVERROR_EXTERNAL;
  1591. goto fail;
  1592. }
  1593. }
  1594. for (int i = 0; i < planes; i++) {
  1595. VkImageSubresource sub = {
  1596. .aspectMask = VK_IMAGE_ASPECT_COLOR_BIT,
  1597. };
  1598. VkSubresourceLayout layout;
  1599. vkGetImageSubresourceLayout(hwctx->act_dev, f->img[i], &sub, &layout);
  1600. dst->linesize[i] = layout.rowPitch;
  1601. }
  1602. map->frame = f;
  1603. map->flags = flags;
  1604. err = ff_hwframe_map_create(src->hw_frames_ctx, dst, src,
  1605. &vulkan_unmap_frame, map);
  1606. if (err < 0)
  1607. goto fail;
  1608. return 0;
  1609. fail:
  1610. for (int i = 0; i < mapped_mem_count; i++)
  1611. vkUnmapMemory(hwctx->act_dev, f->mem[i]);
  1612. av_free(map);
  1613. return err;
  1614. }
  1615. #if CONFIG_LIBDRM
  1616. static void vulkan_unmap_from(AVHWFramesContext *hwfc, HWMapDescriptor *hwmap)
  1617. {
  1618. VulkanMapping *map = hwmap->priv;
  1619. AVVulkanDeviceContext *hwctx = hwfc->device_ctx->hwctx;
  1620. const int planes = av_pix_fmt_count_planes(hwfc->sw_format);
  1621. for (int i = 0; i < planes; i++) {
  1622. vkDestroyImage(hwctx->act_dev, map->frame->img[i], hwctx->alloc);
  1623. vkFreeMemory(hwctx->act_dev, map->frame->mem[i], hwctx->alloc);
  1624. vkDestroySemaphore(hwctx->act_dev, map->frame->sem[i], hwctx->alloc);
  1625. }
  1626. av_freep(&map->frame);
  1627. }
  1628. static const struct {
  1629. uint32_t drm_fourcc;
  1630. VkFormat vk_format;
  1631. } vulkan_drm_format_map[] = {
  1632. { DRM_FORMAT_R8, VK_FORMAT_R8_UNORM },
  1633. { DRM_FORMAT_R16, VK_FORMAT_R16_UNORM },
  1634. { DRM_FORMAT_GR88, VK_FORMAT_R8G8_UNORM },
  1635. { DRM_FORMAT_RG88, VK_FORMAT_R8G8_UNORM },
  1636. { DRM_FORMAT_GR1616, VK_FORMAT_R16G16_UNORM },
  1637. { DRM_FORMAT_RG1616, VK_FORMAT_R16G16_UNORM },
  1638. { DRM_FORMAT_ARGB8888, VK_FORMAT_B8G8R8A8_UNORM },
  1639. { DRM_FORMAT_XRGB8888, VK_FORMAT_B8G8R8A8_UNORM },
  1640. { DRM_FORMAT_ABGR8888, VK_FORMAT_R8G8B8A8_UNORM },
  1641. { DRM_FORMAT_XBGR8888, VK_FORMAT_R8G8B8A8_UNORM },
  1642. };
  1643. static inline VkFormat drm_to_vulkan_fmt(uint32_t drm_fourcc)
  1644. {
  1645. for (int i = 0; i < FF_ARRAY_ELEMS(vulkan_drm_format_map); i++)
  1646. if (vulkan_drm_format_map[i].drm_fourcc == drm_fourcc)
  1647. return vulkan_drm_format_map[i].vk_format;
  1648. return VK_FORMAT_UNDEFINED;
  1649. }
  1650. static int vulkan_map_from_drm_frame_desc(AVHWFramesContext *hwfc, AVVkFrame **frame,
  1651. AVDRMFrameDescriptor *desc)
  1652. {
  1653. int err = 0;
  1654. VkResult ret;
  1655. AVVkFrame *f;
  1656. int bind_counts = 0;
  1657. AVHWDeviceContext *ctx = hwfc->device_ctx;
  1658. AVVulkanDeviceContext *hwctx = ctx->hwctx;
  1659. VulkanDevicePriv *p = ctx->internal->priv;
  1660. VulkanFramesPriv *fp = hwfc->internal->priv;
  1661. AVVulkanFramesContext *frames_hwctx = hwfc->hwctx;
  1662. const AVPixFmtDescriptor *fmt_desc = av_pix_fmt_desc_get(hwfc->sw_format);
  1663. const int has_modifiers = p->extensions & EXT_DRM_MODIFIER_FLAGS;
  1664. VkSubresourceLayout plane_data[AV_NUM_DATA_POINTERS] = { 0 };
  1665. VkBindImageMemoryInfo bind_info[AV_NUM_DATA_POINTERS] = { 0 };
  1666. VkBindImagePlaneMemoryInfo plane_info[AV_NUM_DATA_POINTERS] = { 0 };
  1667. VkExternalMemoryHandleTypeFlagBits htype = VK_EXTERNAL_MEMORY_HANDLE_TYPE_DMA_BUF_BIT_EXT;
  1668. VK_LOAD_PFN(hwctx->inst, vkGetMemoryFdPropertiesKHR);
  1669. for (int i = 0; i < desc->nb_layers; i++) {
  1670. if (drm_to_vulkan_fmt(desc->layers[i].format) == VK_FORMAT_UNDEFINED) {
  1671. av_log(ctx, AV_LOG_ERROR, "Unsupported DMABUF layer format %#08x!\n",
  1672. desc->layers[i].format);
  1673. return AVERROR(EINVAL);
  1674. }
  1675. }
  1676. if (!(f = av_vk_frame_alloc())) {
  1677. av_log(ctx, AV_LOG_ERROR, "Unable to allocate memory for AVVkFrame!\n");
  1678. err = AVERROR(ENOMEM);
  1679. goto fail;
  1680. }
  1681. for (int i = 0; i < desc->nb_objects; i++) {
  1682. VkMemoryFdPropertiesKHR fdmp = {
  1683. .sType = VK_STRUCTURE_TYPE_MEMORY_FD_PROPERTIES_KHR,
  1684. };
  1685. VkMemoryRequirements req = {
  1686. .size = desc->objects[i].size,
  1687. };
  1688. VkImportMemoryFdInfoKHR idesc = {
  1689. .sType = VK_STRUCTURE_TYPE_IMPORT_MEMORY_FD_INFO_KHR,
  1690. .handleType = htype,
  1691. .fd = dup(desc->objects[i].fd),
  1692. };
  1693. ret = pfn_vkGetMemoryFdPropertiesKHR(hwctx->act_dev, htype,
  1694. idesc.fd, &fdmp);
  1695. if (ret != VK_SUCCESS) {
  1696. av_log(hwfc, AV_LOG_ERROR, "Failed to get FD properties: %s\n",
  1697. vk_ret2str(ret));
  1698. err = AVERROR_EXTERNAL;
  1699. close(idesc.fd);
  1700. goto fail;
  1701. }
  1702. req.memoryTypeBits = fdmp.memoryTypeBits;
  1703. err = alloc_mem(ctx, &req, VK_MEMORY_PROPERTY_DEVICE_LOCAL_BIT,
  1704. &idesc, &f->flags, &f->mem[i]);
  1705. if (err) {
  1706. close(idesc.fd);
  1707. return err;
  1708. }
  1709. f->size[i] = desc->objects[i].size;
  1710. }
  1711. f->tiling = has_modifiers ? VK_IMAGE_TILING_DRM_FORMAT_MODIFIER_EXT :
  1712. desc->objects[0].format_modifier == DRM_FORMAT_MOD_LINEAR ?
  1713. VK_IMAGE_TILING_LINEAR : VK_IMAGE_TILING_OPTIMAL;
  1714. for (int i = 0; i < desc->nb_layers; i++) {
  1715. const int planes = desc->layers[i].nb_planes;
  1716. const int signal_p = has_modifiers && (planes > 1);
  1717. VkImageDrmFormatModifierExplicitCreateInfoEXT drm_info = {
  1718. .sType = VK_STRUCTURE_TYPE_IMAGE_DRM_FORMAT_MODIFIER_EXPLICIT_CREATE_INFO_EXT,
  1719. .drmFormatModifier = desc->objects[0].format_modifier,
  1720. .drmFormatModifierPlaneCount = planes,
  1721. .pPlaneLayouts = (const VkSubresourceLayout *)&plane_data,
  1722. };
  1723. VkExternalMemoryImageCreateInfo einfo = {
  1724. .sType = VK_STRUCTURE_TYPE_EXTERNAL_MEMORY_IMAGE_CREATE_INFO,
  1725. .pNext = has_modifiers ? &drm_info : NULL,
  1726. .handleTypes = htype,
  1727. };
  1728. VkSemaphoreCreateInfo sem_spawn = {
  1729. .sType = VK_STRUCTURE_TYPE_SEMAPHORE_CREATE_INFO,
  1730. };
  1731. const int p_w = i > 0 ? AV_CEIL_RSHIFT(hwfc->width, fmt_desc->log2_chroma_w) : hwfc->width;
  1732. const int p_h = i > 0 ? AV_CEIL_RSHIFT(hwfc->height, fmt_desc->log2_chroma_h) : hwfc->height;
  1733. VkImageCreateInfo image_create_info = {
  1734. .sType = VK_STRUCTURE_TYPE_IMAGE_CREATE_INFO,
  1735. .pNext = &einfo,
  1736. .imageType = VK_IMAGE_TYPE_2D,
  1737. .format = drm_to_vulkan_fmt(desc->layers[i].format),
  1738. .extent.width = p_w,
  1739. .extent.height = p_h,
  1740. .extent.depth = 1,
  1741. .mipLevels = 1,
  1742. .arrayLayers = 1,
  1743. .flags = VK_IMAGE_CREATE_ALIAS_BIT,
  1744. .tiling = f->tiling,
  1745. .initialLayout = VK_IMAGE_LAYOUT_UNDEFINED, /* specs say so */
  1746. .usage = frames_hwctx->usage,
  1747. .samples = VK_SAMPLE_COUNT_1_BIT,
  1748. .pQueueFamilyIndices = p->qfs,
  1749. .queueFamilyIndexCount = p->num_qfs,
  1750. .sharingMode = p->num_qfs > 1 ? VK_SHARING_MODE_CONCURRENT :
  1751. VK_SHARING_MODE_EXCLUSIVE,
  1752. };
  1753. for (int j = 0; j < planes; j++) {
  1754. plane_data[j].offset = desc->layers[i].planes[j].offset;
  1755. plane_data[j].rowPitch = desc->layers[i].planes[j].pitch;
  1756. plane_data[j].size = 0; /* The specs say so for all 3 */
  1757. plane_data[j].arrayPitch = 0;
  1758. plane_data[j].depthPitch = 0;
  1759. }
  1760. /* Create image */
  1761. ret = vkCreateImage(hwctx->act_dev, &image_create_info,
  1762. hwctx->alloc, &f->img[i]);
  1763. if (ret != VK_SUCCESS) {
  1764. av_log(ctx, AV_LOG_ERROR, "Image creation failure: %s\n",
  1765. vk_ret2str(ret));
  1766. err = AVERROR(EINVAL);
  1767. goto fail;
  1768. }
  1769. ret = vkCreateSemaphore(hwctx->act_dev, &sem_spawn,
  1770. hwctx->alloc, &f->sem[i]);
  1771. if (ret != VK_SUCCESS) {
  1772. av_log(hwctx, AV_LOG_ERROR, "Failed to create semaphore: %s\n",
  1773. vk_ret2str(ret));
  1774. return AVERROR_EXTERNAL;
  1775. }
  1776. /* We'd import a semaphore onto the one we created using
  1777. * vkImportSemaphoreFdKHR but unfortunately neither DRM nor VAAPI
  1778. * offer us anything we could import and sync with, so instead
  1779. * just signal the semaphore we created. */
  1780. f->layout[i] = image_create_info.initialLayout;
  1781. f->access[i] = 0x0;
  1782. for (int j = 0; j < planes; j++) {
  1783. VkImageAspectFlagBits aspect = j == 0 ? VK_IMAGE_ASPECT_MEMORY_PLANE_0_BIT_EXT :
  1784. j == 1 ? VK_IMAGE_ASPECT_MEMORY_PLANE_1_BIT_EXT :
  1785. VK_IMAGE_ASPECT_MEMORY_PLANE_2_BIT_EXT;
  1786. plane_info[bind_counts].sType = VK_STRUCTURE_TYPE_BIND_IMAGE_PLANE_MEMORY_INFO;
  1787. plane_info[bind_counts].planeAspect = aspect;
  1788. bind_info[bind_counts].sType = VK_STRUCTURE_TYPE_BIND_IMAGE_MEMORY_INFO;
  1789. bind_info[bind_counts].pNext = signal_p ? &plane_info[bind_counts] : NULL;
  1790. bind_info[bind_counts].image = f->img[i];
  1791. bind_info[bind_counts].memory = f->mem[desc->layers[i].planes[j].object_index];
  1792. bind_info[bind_counts].memoryOffset = desc->layers[i].planes[j].offset;
  1793. bind_counts++;
  1794. }
  1795. }
  1796. /* Bind the allocated memory to the images */
  1797. ret = vkBindImageMemory2(hwctx->act_dev, bind_counts, bind_info);
  1798. if (ret != VK_SUCCESS) {
  1799. av_log(ctx, AV_LOG_ERROR, "Failed to bind memory: %s\n",
  1800. vk_ret2str(ret));
  1801. return AVERROR_EXTERNAL;
  1802. }
  1803. /* NOTE: This is completely uneccesary and unneeded once we can import
  1804. * semaphores from DRM. Otherwise we have to activate the semaphores.
  1805. * We're reusing the exec context that's also used for uploads/downloads. */
  1806. err = prepare_frame(hwfc, &fp->conv_ctx, f, PREP_MODE_RO_SHADER);
  1807. if (err)
  1808. goto fail;
  1809. *frame = f;
  1810. return 0;
  1811. fail:
  1812. for (int i = 0; i < desc->nb_layers; i++) {
  1813. vkDestroyImage(hwctx->act_dev, f->img[i], hwctx->alloc);
  1814. vkDestroySemaphore(hwctx->act_dev, f->sem[i], hwctx->alloc);
  1815. }
  1816. for (int i = 0; i < desc->nb_objects; i++)
  1817. vkFreeMemory(hwctx->act_dev, f->mem[i], hwctx->alloc);
  1818. av_free(f);
  1819. return err;
  1820. }
  1821. static int vulkan_map_from_drm(AVHWFramesContext *hwfc, AVFrame *dst,
  1822. const AVFrame *src, int flags)
  1823. {
  1824. int err = 0;
  1825. AVVkFrame *f;
  1826. VulkanMapping *map = NULL;
  1827. err = vulkan_map_from_drm_frame_desc(hwfc, &f,
  1828. (AVDRMFrameDescriptor *)src->data[0]);
  1829. if (err)
  1830. return err;
  1831. /* The unmapping function will free this */
  1832. dst->data[0] = (uint8_t *)f;
  1833. dst->width = src->width;
  1834. dst->height = src->height;
  1835. map = av_mallocz(sizeof(VulkanMapping));
  1836. if (!map)
  1837. goto fail;
  1838. map->frame = f;
  1839. map->flags = flags;
  1840. err = ff_hwframe_map_create(dst->hw_frames_ctx, dst, src,
  1841. &vulkan_unmap_from, map);
  1842. if (err < 0)
  1843. goto fail;
  1844. av_log(hwfc, AV_LOG_DEBUG, "Mapped DRM object to Vulkan!\n");
  1845. return 0;
  1846. fail:
  1847. vulkan_frame_free(hwfc->device_ctx->hwctx, (uint8_t *)f);
  1848. av_free(map);
  1849. return err;
  1850. }
  1851. #if CONFIG_VAAPI
  1852. static int vulkan_map_from_vaapi(AVHWFramesContext *dst_fc,
  1853. AVFrame *dst, const AVFrame *src,
  1854. int flags)
  1855. {
  1856. int err;
  1857. AVFrame *tmp = av_frame_alloc();
  1858. AVHWFramesContext *vaapi_fc = (AVHWFramesContext*)src->hw_frames_ctx->data;
  1859. AVVAAPIDeviceContext *vaapi_ctx = vaapi_fc->device_ctx->hwctx;
  1860. VASurfaceID surface_id = (VASurfaceID)(uintptr_t)src->data[3];
  1861. if (!tmp)
  1862. return AVERROR(ENOMEM);
  1863. /* We have to sync since like the previous comment said, no semaphores */
  1864. vaSyncSurface(vaapi_ctx->display, surface_id);
  1865. tmp->format = AV_PIX_FMT_DRM_PRIME;
  1866. err = av_hwframe_map(tmp, src, flags);
  1867. if (err < 0)
  1868. goto fail;
  1869. err = vulkan_map_from_drm(dst_fc, dst, tmp, flags);
  1870. if (err < 0)
  1871. goto fail;
  1872. err = ff_hwframe_map_replace(dst, src);
  1873. fail:
  1874. av_frame_free(&tmp);
  1875. return err;
  1876. }
  1877. #endif
  1878. #endif
  1879. #if CONFIG_CUDA
  1880. static int vulkan_export_to_cuda(AVHWFramesContext *hwfc,
  1881. AVBufferRef *cuda_hwfc,
  1882. const AVFrame *frame)
  1883. {
  1884. int err;
  1885. VkResult ret;
  1886. AVVkFrame *dst_f;
  1887. AVVkFrameInternal *dst_int;
  1888. AVHWDeviceContext *ctx = hwfc->device_ctx;
  1889. AVVulkanDeviceContext *hwctx = ctx->hwctx;
  1890. const int planes = av_pix_fmt_count_planes(hwfc->sw_format);
  1891. const AVPixFmtDescriptor *desc = av_pix_fmt_desc_get(hwfc->sw_format);
  1892. VK_LOAD_PFN(hwctx->inst, vkGetMemoryFdKHR);
  1893. VK_LOAD_PFN(hwctx->inst, vkGetSemaphoreFdKHR);
  1894. AVHWFramesContext *cuda_fc = (AVHWFramesContext*)cuda_hwfc->data;
  1895. AVHWDeviceContext *cuda_cu = cuda_fc->device_ctx;
  1896. AVCUDADeviceContext *cuda_dev = cuda_cu->hwctx;
  1897. AVCUDADeviceContextInternal *cu_internal = cuda_dev->internal;
  1898. CudaFunctions *cu = cu_internal->cuda_dl;
  1899. CUarray_format cufmt = desc->comp[0].depth > 8 ? CU_AD_FORMAT_UNSIGNED_INT16 :
  1900. CU_AD_FORMAT_UNSIGNED_INT8;
  1901. dst_f = (AVVkFrame *)frame->data[0];
  1902. dst_int = dst_f->internal;
  1903. if (!dst_int || !dst_int->cuda_fc_ref) {
  1904. if (!dst_f->internal)
  1905. dst_f->internal = dst_int = av_mallocz(sizeof(*dst_f->internal));
  1906. if (!dst_int) {
  1907. err = AVERROR(ENOMEM);
  1908. goto fail;
  1909. }
  1910. dst_int->cuda_fc_ref = av_buffer_ref(cuda_hwfc);
  1911. if (!dst_int->cuda_fc_ref) {
  1912. err = AVERROR(ENOMEM);
  1913. goto fail;
  1914. }
  1915. for (int i = 0; i < planes; i++) {
  1916. CUDA_EXTERNAL_MEMORY_MIPMAPPED_ARRAY_DESC tex_desc = {
  1917. .offset = 0,
  1918. .arrayDesc = {
  1919. .Width = i > 0 ? AV_CEIL_RSHIFT(hwfc->width, desc->log2_chroma_w)
  1920. : hwfc->width,
  1921. .Height = i > 0 ? AV_CEIL_RSHIFT(hwfc->height, desc->log2_chroma_h)
  1922. : hwfc->height,
  1923. .Depth = 0,
  1924. .Format = cufmt,
  1925. .NumChannels = 1 + ((planes == 2) && i),
  1926. .Flags = 0,
  1927. },
  1928. .numLevels = 1,
  1929. };
  1930. CUDA_EXTERNAL_MEMORY_HANDLE_DESC ext_desc = {
  1931. .type = CU_EXTERNAL_MEMORY_HANDLE_TYPE_OPAQUE_FD,
  1932. .size = dst_f->size[i],
  1933. };
  1934. VkMemoryGetFdInfoKHR export_info = {
  1935. .sType = VK_STRUCTURE_TYPE_MEMORY_GET_FD_INFO_KHR,
  1936. .memory = dst_f->mem[i],
  1937. .handleType = VK_EXTERNAL_MEMORY_HANDLE_TYPE_OPAQUE_FD_BIT_KHR,
  1938. };
  1939. VkSemaphoreGetFdInfoKHR sem_export = {
  1940. .sType = VK_STRUCTURE_TYPE_SEMAPHORE_GET_FD_INFO_KHR,
  1941. .semaphore = dst_f->sem[i],
  1942. .handleType = VK_EXTERNAL_SEMAPHORE_HANDLE_TYPE_OPAQUE_FD_BIT,
  1943. };
  1944. CUDA_EXTERNAL_SEMAPHORE_HANDLE_DESC ext_sem_desc = {
  1945. .type = CU_EXTERNAL_SEMAPHORE_HANDLE_TYPE_OPAQUE_FD,
  1946. };
  1947. ret = pfn_vkGetMemoryFdKHR(hwctx->act_dev, &export_info,
  1948. &ext_desc.handle.fd);
  1949. if (ret != VK_SUCCESS) {
  1950. av_log(hwfc, AV_LOG_ERROR, "Unable to export the image as a FD!\n");
  1951. err = AVERROR_EXTERNAL;
  1952. goto fail;
  1953. }
  1954. ret = CHECK_CU(cu->cuImportExternalMemory(&dst_int->ext_mem[i], &ext_desc));
  1955. if (ret < 0) {
  1956. err = AVERROR_EXTERNAL;
  1957. goto fail;
  1958. }
  1959. ret = CHECK_CU(cu->cuExternalMemoryGetMappedMipmappedArray(&dst_int->cu_mma[i],
  1960. dst_int->ext_mem[i],
  1961. &tex_desc));
  1962. if (ret < 0) {
  1963. err = AVERROR_EXTERNAL;
  1964. goto fail;
  1965. }
  1966. ret = CHECK_CU(cu->cuMipmappedArrayGetLevel(&dst_int->cu_array[i],
  1967. dst_int->cu_mma[i], 0));
  1968. if (ret < 0) {
  1969. err = AVERROR_EXTERNAL;
  1970. goto fail;
  1971. }
  1972. ret = pfn_vkGetSemaphoreFdKHR(hwctx->act_dev, &sem_export,
  1973. &ext_sem_desc.handle.fd);
  1974. if (ret != VK_SUCCESS) {
  1975. av_log(ctx, AV_LOG_ERROR, "Failed to export semaphore: %s\n",
  1976. vk_ret2str(ret));
  1977. err = AVERROR_EXTERNAL;
  1978. goto fail;
  1979. }
  1980. ret = CHECK_CU(cu->cuImportExternalSemaphore(&dst_int->cu_sem[i],
  1981. &ext_sem_desc));
  1982. if (ret < 0) {
  1983. err = AVERROR_EXTERNAL;
  1984. goto fail;
  1985. }
  1986. }
  1987. }
  1988. return 0;
  1989. fail:
  1990. return err;
  1991. }
  1992. static int vulkan_transfer_data_from_cuda(AVHWFramesContext *hwfc,
  1993. AVFrame *dst, const AVFrame *src)
  1994. {
  1995. int err;
  1996. VkResult ret;
  1997. CUcontext dummy;
  1998. AVVkFrame *dst_f;
  1999. AVVkFrameInternal *dst_int;
  2000. const int planes = av_pix_fmt_count_planes(hwfc->sw_format);
  2001. const AVPixFmtDescriptor *desc = av_pix_fmt_desc_get(hwfc->sw_format);
  2002. AVHWFramesContext *cuda_fc = (AVHWFramesContext*)src->hw_frames_ctx->data;
  2003. AVHWDeviceContext *cuda_cu = cuda_fc->device_ctx;
  2004. AVCUDADeviceContext *cuda_dev = cuda_cu->hwctx;
  2005. AVCUDADeviceContextInternal *cu_internal = cuda_dev->internal;
  2006. CudaFunctions *cu = cu_internal->cuda_dl;
  2007. CUDA_EXTERNAL_SEMAPHORE_WAIT_PARAMS s_w_par[AV_NUM_DATA_POINTERS] = { 0 };
  2008. CUDA_EXTERNAL_SEMAPHORE_SIGNAL_PARAMS s_s_par[AV_NUM_DATA_POINTERS] = { 0 };
  2009. ret = CHECK_CU(cu->cuCtxPushCurrent(cuda_dev->cuda_ctx));
  2010. if (ret < 0) {
  2011. err = AVERROR_EXTERNAL;
  2012. goto fail;
  2013. }
  2014. dst_f = (AVVkFrame *)dst->data[0];
  2015. ret = vulkan_export_to_cuda(hwfc, src->hw_frames_ctx, dst);
  2016. if (ret < 0) {
  2017. goto fail;
  2018. }
  2019. dst_int = dst_f->internal;
  2020. ret = CHECK_CU(cu->cuWaitExternalSemaphoresAsync(dst_int->cu_sem, s_w_par,
  2021. planes, cuda_dev->stream));
  2022. if (ret < 0) {
  2023. err = AVERROR_EXTERNAL;
  2024. goto fail;
  2025. }
  2026. for (int i = 0; i < planes; i++) {
  2027. CUDA_MEMCPY2D cpy = {
  2028. .srcMemoryType = CU_MEMORYTYPE_DEVICE,
  2029. .srcDevice = (CUdeviceptr)src->data[i],
  2030. .srcPitch = src->linesize[i],
  2031. .srcY = 0,
  2032. .dstMemoryType = CU_MEMORYTYPE_ARRAY,
  2033. .dstArray = dst_int->cu_array[i],
  2034. .WidthInBytes = (i > 0 ? AV_CEIL_RSHIFT(hwfc->width, desc->log2_chroma_w)
  2035. : hwfc->width) * desc->comp[i].step,
  2036. .Height = i > 0 ? AV_CEIL_RSHIFT(hwfc->height, desc->log2_chroma_h)
  2037. : hwfc->height,
  2038. };
  2039. ret = CHECK_CU(cu->cuMemcpy2DAsync(&cpy, cuda_dev->stream));
  2040. if (ret < 0) {
  2041. err = AVERROR_EXTERNAL;
  2042. goto fail;
  2043. }
  2044. }
  2045. ret = CHECK_CU(cu->cuSignalExternalSemaphoresAsync(dst_int->cu_sem, s_s_par,
  2046. planes, cuda_dev->stream));
  2047. if (ret < 0) {
  2048. err = AVERROR_EXTERNAL;
  2049. goto fail;
  2050. }
  2051. CHECK_CU(cu->cuCtxPopCurrent(&dummy));
  2052. av_log(hwfc, AV_LOG_VERBOSE, "Transfered CUDA image to Vulkan!\n");
  2053. return 0;
  2054. fail:
  2055. CHECK_CU(cu->cuCtxPopCurrent(&dummy));
  2056. vulkan_free_internal(dst_int);
  2057. dst_f->internal = NULL;
  2058. av_buffer_unref(&dst->buf[0]);
  2059. return err;
  2060. }
  2061. #endif
  2062. static int vulkan_map_to(AVHWFramesContext *hwfc, AVFrame *dst,
  2063. const AVFrame *src, int flags)
  2064. {
  2065. av_unused VulkanDevicePriv *p = hwfc->device_ctx->internal->priv;
  2066. switch (src->format) {
  2067. #if CONFIG_LIBDRM
  2068. #if CONFIG_VAAPI
  2069. case AV_PIX_FMT_VAAPI:
  2070. if (p->extensions & EXT_EXTERNAL_DMABUF_MEMORY)
  2071. return vulkan_map_from_vaapi(hwfc, dst, src, flags);
  2072. #endif
  2073. case AV_PIX_FMT_DRM_PRIME:
  2074. if (p->extensions & EXT_EXTERNAL_DMABUF_MEMORY)
  2075. return vulkan_map_from_drm(hwfc, dst, src, flags);
  2076. #endif
  2077. default:
  2078. return AVERROR(ENOSYS);
  2079. }
  2080. }
  2081. #if CONFIG_LIBDRM
  2082. typedef struct VulkanDRMMapping {
  2083. AVDRMFrameDescriptor drm_desc;
  2084. AVVkFrame *source;
  2085. } VulkanDRMMapping;
  2086. static void vulkan_unmap_to_drm(AVHWFramesContext *hwfc, HWMapDescriptor *hwmap)
  2087. {
  2088. AVDRMFrameDescriptor *drm_desc = hwmap->priv;
  2089. for (int i = 0; i < drm_desc->nb_objects; i++)
  2090. close(drm_desc->objects[i].fd);
  2091. av_free(drm_desc);
  2092. }
  2093. static inline uint32_t vulkan_fmt_to_drm(VkFormat vkfmt)
  2094. {
  2095. for (int i = 0; i < FF_ARRAY_ELEMS(vulkan_drm_format_map); i++)
  2096. if (vulkan_drm_format_map[i].vk_format == vkfmt)
  2097. return vulkan_drm_format_map[i].drm_fourcc;
  2098. return DRM_FORMAT_INVALID;
  2099. }
  2100. static int vulkan_map_to_drm(AVHWFramesContext *hwfc, AVFrame *dst,
  2101. const AVFrame *src, int flags)
  2102. {
  2103. int err = 0;
  2104. VkResult ret;
  2105. AVVkFrame *f = (AVVkFrame *)src->data[0];
  2106. VulkanDevicePriv *p = hwfc->device_ctx->internal->priv;
  2107. VulkanFramesPriv *fp = hwfc->internal->priv;
  2108. AVVulkanDeviceContext *hwctx = hwfc->device_ctx->hwctx;
  2109. const int planes = av_pix_fmt_count_planes(hwfc->sw_format);
  2110. VK_LOAD_PFN(hwctx->inst, vkGetMemoryFdKHR);
  2111. VkImageDrmFormatModifierPropertiesEXT drm_mod = {
  2112. .sType = VK_STRUCTURE_TYPE_IMAGE_DRM_FORMAT_MODIFIER_PROPERTIES_EXT,
  2113. };
  2114. AVDRMFrameDescriptor *drm_desc = av_mallocz(sizeof(*drm_desc));
  2115. if (!drm_desc)
  2116. return AVERROR(ENOMEM);
  2117. err = prepare_frame(hwfc, &fp->conv_ctx, f, PREP_MODE_EXTERNAL_EXPORT);
  2118. if (err < 0)
  2119. goto end;
  2120. err = ff_hwframe_map_create(src->hw_frames_ctx, dst, src, &vulkan_unmap_to_drm, drm_desc);
  2121. if (err < 0)
  2122. goto end;
  2123. if (p->extensions & EXT_DRM_MODIFIER_FLAGS) {
  2124. VK_LOAD_PFN(hwctx->inst, vkGetImageDrmFormatModifierPropertiesEXT);
  2125. ret = pfn_vkGetImageDrmFormatModifierPropertiesEXT(hwctx->act_dev, f->img[0],
  2126. &drm_mod);
  2127. if (ret != VK_SUCCESS) {
  2128. av_log(hwfc, AV_LOG_ERROR, "Failed to retrieve DRM format modifier!\n");
  2129. err = AVERROR_EXTERNAL;
  2130. goto end;
  2131. }
  2132. }
  2133. for (int i = 0; (i < planes) && (f->mem[i]); i++) {
  2134. VkMemoryGetFdInfoKHR export_info = {
  2135. .sType = VK_STRUCTURE_TYPE_MEMORY_GET_FD_INFO_KHR,
  2136. .memory = f->mem[i],
  2137. .handleType = VK_EXTERNAL_MEMORY_HANDLE_TYPE_DMA_BUF_BIT_EXT,
  2138. };
  2139. ret = pfn_vkGetMemoryFdKHR(hwctx->act_dev, &export_info,
  2140. &drm_desc->objects[i].fd);
  2141. if (ret != VK_SUCCESS) {
  2142. av_log(hwfc, AV_LOG_ERROR, "Unable to export the image as a FD!\n");
  2143. err = AVERROR_EXTERNAL;
  2144. goto end;
  2145. }
  2146. drm_desc->nb_objects++;
  2147. drm_desc->objects[i].size = f->size[i];
  2148. drm_desc->objects[i].format_modifier = drm_mod.drmFormatModifier;
  2149. }
  2150. drm_desc->nb_layers = planes;
  2151. for (int i = 0; i < drm_desc->nb_layers; i++) {
  2152. VkSubresourceLayout layout;
  2153. VkImageSubresource sub = {
  2154. .aspectMask = p->extensions & EXT_DRM_MODIFIER_FLAGS ?
  2155. VK_IMAGE_ASPECT_MEMORY_PLANE_0_BIT_EXT :
  2156. VK_IMAGE_ASPECT_COLOR_BIT,
  2157. };
  2158. VkFormat plane_vkfmt = av_vkfmt_from_pixfmt(hwfc->sw_format)[i];
  2159. drm_desc->layers[i].format = vulkan_fmt_to_drm(plane_vkfmt);
  2160. drm_desc->layers[i].nb_planes = 1;
  2161. if (drm_desc->layers[i].format == DRM_FORMAT_INVALID) {
  2162. av_log(hwfc, AV_LOG_ERROR, "Cannot map to DRM layer, unsupported!\n");
  2163. err = AVERROR_PATCHWELCOME;
  2164. goto end;
  2165. }
  2166. drm_desc->layers[i].planes[0].object_index = FFMIN(i, drm_desc->nb_objects - 1);
  2167. if (f->tiling == VK_IMAGE_TILING_OPTIMAL)
  2168. continue;
  2169. vkGetImageSubresourceLayout(hwctx->act_dev, f->img[i], &sub, &layout);
  2170. drm_desc->layers[i].planes[0].offset = layout.offset;
  2171. drm_desc->layers[i].planes[0].pitch = layout.rowPitch;
  2172. }
  2173. dst->width = src->width;
  2174. dst->height = src->height;
  2175. dst->data[0] = (uint8_t *)drm_desc;
  2176. av_log(hwfc, AV_LOG_VERBOSE, "Mapped AVVkFrame to a DRM object!\n");
  2177. return 0;
  2178. end:
  2179. av_free(drm_desc);
  2180. return err;
  2181. }
  2182. #if CONFIG_VAAPI
  2183. static int vulkan_map_to_vaapi(AVHWFramesContext *hwfc, AVFrame *dst,
  2184. const AVFrame *src, int flags)
  2185. {
  2186. int err;
  2187. AVFrame *tmp = av_frame_alloc();
  2188. if (!tmp)
  2189. return AVERROR(ENOMEM);
  2190. tmp->format = AV_PIX_FMT_DRM_PRIME;
  2191. err = vulkan_map_to_drm(hwfc, tmp, src, flags);
  2192. if (err < 0)
  2193. goto fail;
  2194. err = av_hwframe_map(dst, tmp, flags);
  2195. if (err < 0)
  2196. goto fail;
  2197. err = ff_hwframe_map_replace(dst, src);
  2198. fail:
  2199. av_frame_free(&tmp);
  2200. return err;
  2201. }
  2202. #endif
  2203. #endif
  2204. static int vulkan_map_from(AVHWFramesContext *hwfc, AVFrame *dst,
  2205. const AVFrame *src, int flags)
  2206. {
  2207. av_unused VulkanDevicePriv *p = hwfc->device_ctx->internal->priv;
  2208. switch (dst->format) {
  2209. #if CONFIG_LIBDRM
  2210. case AV_PIX_FMT_DRM_PRIME:
  2211. if (p->extensions & EXT_EXTERNAL_DMABUF_MEMORY)
  2212. return vulkan_map_to_drm(hwfc, dst, src, flags);
  2213. #if CONFIG_VAAPI
  2214. case AV_PIX_FMT_VAAPI:
  2215. if (p->extensions & EXT_EXTERNAL_DMABUF_MEMORY)
  2216. return vulkan_map_to_vaapi(hwfc, dst, src, flags);
  2217. #endif
  2218. #endif
  2219. default:
  2220. return vulkan_map_frame_to_mem(hwfc, dst, src, flags);
  2221. }
  2222. }
  2223. typedef struct ImageBuffer {
  2224. VkBuffer buf;
  2225. VkDeviceMemory mem;
  2226. VkMemoryPropertyFlagBits flags;
  2227. } ImageBuffer;
  2228. static void free_buf(void *opaque, uint8_t *data)
  2229. {
  2230. AVHWDeviceContext *ctx = opaque;
  2231. AVVulkanDeviceContext *hwctx = ctx->hwctx;
  2232. ImageBuffer *vkbuf = (ImageBuffer *)data;
  2233. if (vkbuf->buf)
  2234. vkDestroyBuffer(hwctx->act_dev, vkbuf->buf, hwctx->alloc);
  2235. if (vkbuf->mem)
  2236. vkFreeMemory(hwctx->act_dev, vkbuf->mem, hwctx->alloc);
  2237. av_free(data);
  2238. }
  2239. static int create_buf(AVHWDeviceContext *ctx, AVBufferRef **buf,
  2240. int height, int *stride, VkBufferUsageFlags usage,
  2241. VkMemoryPropertyFlagBits flags, void *create_pnext,
  2242. void *alloc_pnext)
  2243. {
  2244. int err;
  2245. VkResult ret;
  2246. VkMemoryRequirements req;
  2247. AVVulkanDeviceContext *hwctx = ctx->hwctx;
  2248. VulkanDevicePriv *p = ctx->internal->priv;
  2249. VkBufferCreateInfo buf_spawn = {
  2250. .sType = VK_STRUCTURE_TYPE_BUFFER_CREATE_INFO,
  2251. .pNext = create_pnext,
  2252. .usage = usage,
  2253. .sharingMode = VK_SHARING_MODE_EXCLUSIVE,
  2254. };
  2255. ImageBuffer *vkbuf = av_mallocz(sizeof(*vkbuf));
  2256. if (!vkbuf)
  2257. return AVERROR(ENOMEM);
  2258. *stride = FFALIGN(*stride, p->props.limits.optimalBufferCopyRowPitchAlignment);
  2259. buf_spawn.size = height*(*stride);
  2260. ret = vkCreateBuffer(hwctx->act_dev, &buf_spawn, NULL, &vkbuf->buf);
  2261. if (ret != VK_SUCCESS) {
  2262. av_log(ctx, AV_LOG_ERROR, "Failed to create buffer: %s\n",
  2263. vk_ret2str(ret));
  2264. return AVERROR_EXTERNAL;
  2265. }
  2266. vkGetBufferMemoryRequirements(hwctx->act_dev, vkbuf->buf, &req);
  2267. err = alloc_mem(ctx, &req, flags, alloc_pnext, &vkbuf->flags, &vkbuf->mem);
  2268. if (err)
  2269. return err;
  2270. ret = vkBindBufferMemory(hwctx->act_dev, vkbuf->buf, vkbuf->mem, 0);
  2271. if (ret != VK_SUCCESS) {
  2272. av_log(ctx, AV_LOG_ERROR, "Failed to bind memory to buffer: %s\n",
  2273. vk_ret2str(ret));
  2274. free_buf(ctx, (uint8_t *)vkbuf);
  2275. return AVERROR_EXTERNAL;
  2276. }
  2277. *buf = av_buffer_create((uint8_t *)vkbuf, sizeof(*vkbuf), free_buf, ctx, 0);
  2278. if (!(*buf)) {
  2279. free_buf(ctx, (uint8_t *)vkbuf);
  2280. return AVERROR(ENOMEM);
  2281. }
  2282. return 0;
  2283. }
  2284. static int map_buffers(AVHWDeviceContext *ctx, AVBufferRef **bufs, uint8_t *mem[],
  2285. int nb_buffers, int invalidate)
  2286. {
  2287. VkResult ret;
  2288. AVVulkanDeviceContext *hwctx = ctx->hwctx;
  2289. VkMappedMemoryRange invalidate_ctx[AV_NUM_DATA_POINTERS];
  2290. int invalidate_count = 0;
  2291. for (int i = 0; i < nb_buffers; i++) {
  2292. ImageBuffer *vkbuf = (ImageBuffer *)bufs[i]->data;
  2293. ret = vkMapMemory(hwctx->act_dev, vkbuf->mem, 0,
  2294. VK_WHOLE_SIZE, 0, (void **)&mem[i]);
  2295. if (ret != VK_SUCCESS) {
  2296. av_log(ctx, AV_LOG_ERROR, "Failed to map buffer memory: %s\n",
  2297. vk_ret2str(ret));
  2298. return AVERROR_EXTERNAL;
  2299. }
  2300. }
  2301. if (!invalidate)
  2302. return 0;
  2303. for (int i = 0; i < nb_buffers; i++) {
  2304. ImageBuffer *vkbuf = (ImageBuffer *)bufs[i]->data;
  2305. const VkMappedMemoryRange ival_buf = {
  2306. .sType = VK_STRUCTURE_TYPE_MAPPED_MEMORY_RANGE,
  2307. .memory = vkbuf->mem,
  2308. .size = VK_WHOLE_SIZE,
  2309. };
  2310. if (vkbuf->flags & VK_MEMORY_PROPERTY_HOST_COHERENT_BIT)
  2311. continue;
  2312. invalidate_ctx[invalidate_count++] = ival_buf;
  2313. }
  2314. if (invalidate_count) {
  2315. ret = vkInvalidateMappedMemoryRanges(hwctx->act_dev, invalidate_count,
  2316. invalidate_ctx);
  2317. if (ret != VK_SUCCESS)
  2318. av_log(ctx, AV_LOG_WARNING, "Failed to invalidate memory: %s\n",
  2319. vk_ret2str(ret));
  2320. }
  2321. return 0;
  2322. }
  2323. static int unmap_buffers(AVHWDeviceContext *ctx, AVBufferRef **bufs,
  2324. int nb_buffers, int flush)
  2325. {
  2326. int err = 0;
  2327. VkResult ret;
  2328. AVVulkanDeviceContext *hwctx = ctx->hwctx;
  2329. VkMappedMemoryRange flush_ctx[AV_NUM_DATA_POINTERS];
  2330. int flush_count = 0;
  2331. if (flush) {
  2332. for (int i = 0; i < nb_buffers; i++) {
  2333. ImageBuffer *vkbuf = (ImageBuffer *)bufs[i]->data;
  2334. const VkMappedMemoryRange flush_buf = {
  2335. .sType = VK_STRUCTURE_TYPE_MAPPED_MEMORY_RANGE,
  2336. .memory = vkbuf->mem,
  2337. .size = VK_WHOLE_SIZE,
  2338. };
  2339. if (vkbuf->flags & VK_MEMORY_PROPERTY_HOST_COHERENT_BIT)
  2340. continue;
  2341. flush_ctx[flush_count++] = flush_buf;
  2342. }
  2343. }
  2344. if (flush_count) {
  2345. ret = vkFlushMappedMemoryRanges(hwctx->act_dev, flush_count, flush_ctx);
  2346. if (ret != VK_SUCCESS) {
  2347. av_log(ctx, AV_LOG_ERROR, "Failed to flush memory: %s\n",
  2348. vk_ret2str(ret));
  2349. err = AVERROR_EXTERNAL; /* We still want to try to unmap them */
  2350. }
  2351. }
  2352. for (int i = 0; i < nb_buffers; i++) {
  2353. ImageBuffer *vkbuf = (ImageBuffer *)bufs[i]->data;
  2354. vkUnmapMemory(hwctx->act_dev, vkbuf->mem);
  2355. }
  2356. return err;
  2357. }
  2358. static int transfer_image_buf(AVHWFramesContext *hwfc, const AVFrame *f,
  2359. AVBufferRef **bufs, const int *buf_stride, int w,
  2360. int h, enum AVPixelFormat pix_fmt, int to_buf)
  2361. {
  2362. int err;
  2363. AVVkFrame *frame = (AVVkFrame *)f->data[0];
  2364. VulkanFramesPriv *fp = hwfc->internal->priv;
  2365. int bar_num = 0;
  2366. VkPipelineStageFlagBits sem_wait_dst[AV_NUM_DATA_POINTERS];
  2367. const int planes = av_pix_fmt_count_planes(pix_fmt);
  2368. const AVPixFmtDescriptor *desc = av_pix_fmt_desc_get(pix_fmt);
  2369. VkImageMemoryBarrier img_bar[AV_NUM_DATA_POINTERS] = { 0 };
  2370. VulkanExecCtx *ectx = to_buf ? &fp->download_ctx : &fp->upload_ctx;
  2371. VkCommandBuffer cmd_buf = get_buf_exec_ctx(hwfc, ectx);
  2372. VkSubmitInfo s_info = {
  2373. .sType = VK_STRUCTURE_TYPE_SUBMIT_INFO,
  2374. .pSignalSemaphores = frame->sem,
  2375. .pWaitSemaphores = frame->sem,
  2376. .pWaitDstStageMask = sem_wait_dst,
  2377. .signalSemaphoreCount = planes,
  2378. .waitSemaphoreCount = planes,
  2379. };
  2380. if ((err = wait_start_exec_ctx(hwfc, ectx)))
  2381. return err;
  2382. /* Change the image layout to something more optimal for transfers */
  2383. for (int i = 0; i < planes; i++) {
  2384. VkImageLayout new_layout = to_buf ? VK_IMAGE_LAYOUT_TRANSFER_SRC_OPTIMAL :
  2385. VK_IMAGE_LAYOUT_TRANSFER_DST_OPTIMAL;
  2386. VkAccessFlags new_access = to_buf ? VK_ACCESS_TRANSFER_READ_BIT :
  2387. VK_ACCESS_TRANSFER_WRITE_BIT;
  2388. sem_wait_dst[i] = VK_PIPELINE_STAGE_TOP_OF_PIPE_BIT;
  2389. /* If the layout matches and we have read access skip the barrier */
  2390. if ((frame->layout[i] == new_layout) && (frame->access[i] & new_access))
  2391. continue;
  2392. img_bar[bar_num].sType = VK_STRUCTURE_TYPE_IMAGE_MEMORY_BARRIER;
  2393. img_bar[bar_num].srcAccessMask = 0x0;
  2394. img_bar[bar_num].dstAccessMask = new_access;
  2395. img_bar[bar_num].oldLayout = frame->layout[i];
  2396. img_bar[bar_num].newLayout = new_layout;
  2397. img_bar[bar_num].srcQueueFamilyIndex = VK_QUEUE_FAMILY_IGNORED;
  2398. img_bar[bar_num].dstQueueFamilyIndex = VK_QUEUE_FAMILY_IGNORED;
  2399. img_bar[bar_num].image = frame->img[i];
  2400. img_bar[bar_num].subresourceRange.levelCount = 1;
  2401. img_bar[bar_num].subresourceRange.layerCount = 1;
  2402. img_bar[bar_num].subresourceRange.aspectMask = VK_IMAGE_ASPECT_COLOR_BIT;
  2403. frame->layout[i] = img_bar[bar_num].newLayout;
  2404. frame->access[i] = img_bar[bar_num].dstAccessMask;
  2405. bar_num++;
  2406. }
  2407. if (bar_num)
  2408. vkCmdPipelineBarrier(cmd_buf, VK_PIPELINE_STAGE_TOP_OF_PIPE_BIT,
  2409. VK_PIPELINE_STAGE_TRANSFER_BIT, 0,
  2410. 0, NULL, 0, NULL, bar_num, img_bar);
  2411. /* Schedule a copy for each plane */
  2412. for (int i = 0; i < planes; i++) {
  2413. ImageBuffer *vkbuf = (ImageBuffer *)bufs[i]->data;
  2414. const int p_w = i > 0 ? AV_CEIL_RSHIFT(w, desc->log2_chroma_w) : w;
  2415. const int p_h = i > 0 ? AV_CEIL_RSHIFT(h, desc->log2_chroma_h) : h;
  2416. VkBufferImageCopy buf_reg = {
  2417. .bufferOffset = 0,
  2418. /* Buffer stride isn't in bytes, it's in samples, the implementation
  2419. * uses the image's VkFormat to know how many bytes per sample
  2420. * the buffer has. So we have to convert by dividing. Stupid.
  2421. * Won't work with YUVA or other planar formats with alpha. */
  2422. .bufferRowLength = buf_stride[i] / desc->comp[i].step,
  2423. .bufferImageHeight = p_h,
  2424. .imageSubresource.layerCount = 1,
  2425. .imageSubresource.aspectMask = VK_IMAGE_ASPECT_COLOR_BIT,
  2426. .imageOffset = { 0, 0, 0, },
  2427. .imageExtent = { p_w, p_h, 1, },
  2428. };
  2429. if (to_buf)
  2430. vkCmdCopyImageToBuffer(cmd_buf, frame->img[i], frame->layout[i],
  2431. vkbuf->buf, 1, &buf_reg);
  2432. else
  2433. vkCmdCopyBufferToImage(cmd_buf, vkbuf->buf, frame->img[i],
  2434. frame->layout[i], 1, &buf_reg);
  2435. }
  2436. /* When uploading, do this asynchronously if the source is refcounted by
  2437. * keeping the buffers as a submission dependency.
  2438. * The hwcontext is guaranteed to not be freed until all frames are freed
  2439. * in the frames_unint function.
  2440. * When downloading to buffer, do this synchronously and wait for the
  2441. * queue submission to finish executing */
  2442. if (!to_buf) {
  2443. int ref;
  2444. for (ref = 0; ref < AV_NUM_DATA_POINTERS; ref++) {
  2445. if (!f->buf[ref])
  2446. break;
  2447. if ((err = add_buf_dep_exec_ctx(hwfc, ectx, &f->buf[ref], 1)))
  2448. return err;
  2449. }
  2450. if (ref && (err = add_buf_dep_exec_ctx(hwfc, ectx, bufs, planes)))
  2451. return err;
  2452. return submit_exec_ctx(hwfc, ectx, &s_info, !ref);
  2453. } else {
  2454. return submit_exec_ctx(hwfc, ectx, &s_info, 1);
  2455. }
  2456. }
  2457. /* Technically we can use VK_EXT_external_memory_host to upload and download,
  2458. * however the alignment requirements make this unfeasible as both the pointer
  2459. * and the size of each plane need to be aligned to the minimum alignment
  2460. * requirement, which on all current implementations (anv, radv) is 4096.
  2461. * If the requirement gets relaxed (unlikely) this can easily be implemented. */
  2462. static int vulkan_transfer_data_from_mem(AVHWFramesContext *hwfc, AVFrame *dst,
  2463. const AVFrame *src)
  2464. {
  2465. int err = 0;
  2466. AVFrame tmp;
  2467. AVVkFrame *f = (AVVkFrame *)dst->data[0];
  2468. AVHWDeviceContext *dev_ctx = hwfc->device_ctx;
  2469. AVBufferRef *bufs[AV_NUM_DATA_POINTERS] = { 0 };
  2470. const int planes = av_pix_fmt_count_planes(src->format);
  2471. int log2_chroma = av_pix_fmt_desc_get(src->format)->log2_chroma_h;
  2472. if ((src->format != AV_PIX_FMT_NONE && !av_vkfmt_from_pixfmt(src->format))) {
  2473. av_log(hwfc, AV_LOG_ERROR, "Unsupported source pixel format!\n");
  2474. return AVERROR(EINVAL);
  2475. }
  2476. if (src->width > hwfc->width || src->height > hwfc->height)
  2477. return AVERROR(EINVAL);
  2478. /* For linear, host visiable images */
  2479. if (f->tiling == VK_IMAGE_TILING_LINEAR &&
  2480. f->flags & VK_MEMORY_PROPERTY_HOST_VISIBLE_BIT) {
  2481. AVFrame *map = av_frame_alloc();
  2482. if (!map)
  2483. return AVERROR(ENOMEM);
  2484. map->format = src->format;
  2485. err = vulkan_map_frame_to_mem(hwfc, map, dst, AV_HWFRAME_MAP_WRITE);
  2486. if (err)
  2487. return err;
  2488. err = av_frame_copy(map, src);
  2489. av_frame_free(&map);
  2490. return err;
  2491. }
  2492. /* Create buffers */
  2493. for (int i = 0; i < planes; i++) {
  2494. int h = src->height;
  2495. int p_height = i > 0 ? AV_CEIL_RSHIFT(h, log2_chroma) : h;
  2496. tmp.linesize[i] = FFABS(src->linesize[i]);
  2497. err = create_buf(dev_ctx, &bufs[i], p_height,
  2498. &tmp.linesize[i], VK_BUFFER_USAGE_TRANSFER_SRC_BIT,
  2499. VK_MEMORY_PROPERTY_HOST_VISIBLE_BIT, NULL, NULL);
  2500. if (err)
  2501. goto end;
  2502. }
  2503. /* Map, copy image to buffer, unmap */
  2504. if ((err = map_buffers(dev_ctx, bufs, tmp.data, planes, 0)))
  2505. goto end;
  2506. av_image_copy(tmp.data, tmp.linesize, (const uint8_t **)src->data,
  2507. src->linesize, src->format, src->width, src->height);
  2508. if ((err = unmap_buffers(dev_ctx, bufs, planes, 1)))
  2509. goto end;
  2510. /* Copy buffers to image */
  2511. err = transfer_image_buf(hwfc, dst, bufs, tmp.linesize,
  2512. src->width, src->height, src->format, 0);
  2513. end:
  2514. for (int i = 0; i < planes; i++)
  2515. av_buffer_unref(&bufs[i]);
  2516. return err;
  2517. }
  2518. static int vulkan_transfer_data_to(AVHWFramesContext *hwfc, AVFrame *dst,
  2519. const AVFrame *src)
  2520. {
  2521. av_unused VulkanDevicePriv *p = hwfc->device_ctx->internal->priv;
  2522. switch (src->format) {
  2523. #if CONFIG_CUDA
  2524. case AV_PIX_FMT_CUDA:
  2525. if ((p->extensions & EXT_EXTERNAL_FD_MEMORY) &&
  2526. (p->extensions & EXT_EXTERNAL_FD_SEM))
  2527. return vulkan_transfer_data_from_cuda(hwfc, dst, src);
  2528. #endif
  2529. default:
  2530. if (src->hw_frames_ctx)
  2531. return AVERROR(ENOSYS);
  2532. else
  2533. return vulkan_transfer_data_from_mem(hwfc, dst, src);
  2534. }
  2535. }
  2536. #if CONFIG_CUDA
  2537. static int vulkan_transfer_data_to_cuda(AVHWFramesContext *hwfc, AVFrame *dst,
  2538. const AVFrame *src)
  2539. {
  2540. int err;
  2541. VkResult ret;
  2542. CUcontext dummy;
  2543. AVVkFrame *dst_f;
  2544. AVVkFrameInternal *dst_int;
  2545. const int planes = av_pix_fmt_count_planes(hwfc->sw_format);
  2546. const AVPixFmtDescriptor *desc = av_pix_fmt_desc_get(hwfc->sw_format);
  2547. AVHWFramesContext *cuda_fc = (AVHWFramesContext*)dst->hw_frames_ctx->data;
  2548. AVHWDeviceContext *cuda_cu = cuda_fc->device_ctx;
  2549. AVCUDADeviceContext *cuda_dev = cuda_cu->hwctx;
  2550. AVCUDADeviceContextInternal *cu_internal = cuda_dev->internal;
  2551. CudaFunctions *cu = cu_internal->cuda_dl;
  2552. ret = CHECK_CU(cu->cuCtxPushCurrent(cuda_dev->cuda_ctx));
  2553. if (ret < 0) {
  2554. err = AVERROR_EXTERNAL;
  2555. goto fail;
  2556. }
  2557. dst_f = (AVVkFrame *)src->data[0];
  2558. err = vulkan_export_to_cuda(hwfc, dst->hw_frames_ctx, src);
  2559. if (err < 0) {
  2560. goto fail;
  2561. }
  2562. dst_int = dst_f->internal;
  2563. for (int i = 0; i < planes; i++) {
  2564. CUDA_MEMCPY2D cpy = {
  2565. .dstMemoryType = CU_MEMORYTYPE_DEVICE,
  2566. .dstDevice = (CUdeviceptr)dst->data[i],
  2567. .dstPitch = dst->linesize[i],
  2568. .dstY = 0,
  2569. .srcMemoryType = CU_MEMORYTYPE_ARRAY,
  2570. .srcArray = dst_int->cu_array[i],
  2571. .WidthInBytes = (i > 0 ? AV_CEIL_RSHIFT(hwfc->width, desc->log2_chroma_w)
  2572. : hwfc->width) * desc->comp[i].step,
  2573. .Height = i > 0 ? AV_CEIL_RSHIFT(hwfc->height, desc->log2_chroma_h)
  2574. : hwfc->height,
  2575. };
  2576. ret = CHECK_CU(cu->cuMemcpy2DAsync(&cpy, cuda_dev->stream));
  2577. if (ret < 0) {
  2578. err = AVERROR_EXTERNAL;
  2579. goto fail;
  2580. }
  2581. }
  2582. CHECK_CU(cu->cuCtxPopCurrent(&dummy));
  2583. av_log(hwfc, AV_LOG_VERBOSE, "Transfered Vulkan image to CUDA!\n");
  2584. return 0;
  2585. fail:
  2586. CHECK_CU(cu->cuCtxPopCurrent(&dummy));
  2587. vulkan_free_internal(dst_int);
  2588. dst_f->internal = NULL;
  2589. av_buffer_unref(&dst->buf[0]);
  2590. return err;
  2591. }
  2592. #endif
  2593. static int vulkan_transfer_data_to_mem(AVHWFramesContext *hwfc, AVFrame *dst,
  2594. const AVFrame *src)
  2595. {
  2596. int err = 0;
  2597. AVFrame tmp;
  2598. AVVkFrame *f = (AVVkFrame *)src->data[0];
  2599. AVHWDeviceContext *dev_ctx = hwfc->device_ctx;
  2600. AVBufferRef *bufs[AV_NUM_DATA_POINTERS] = { 0 };
  2601. const int planes = av_pix_fmt_count_planes(dst->format);
  2602. int log2_chroma = av_pix_fmt_desc_get(dst->format)->log2_chroma_h;
  2603. if (dst->width > hwfc->width || dst->height > hwfc->height)
  2604. return AVERROR(EINVAL);
  2605. /* For linear, host visiable images */
  2606. if (f->tiling == VK_IMAGE_TILING_LINEAR &&
  2607. f->flags & VK_MEMORY_PROPERTY_HOST_VISIBLE_BIT) {
  2608. AVFrame *map = av_frame_alloc();
  2609. if (!map)
  2610. return AVERROR(ENOMEM);
  2611. map->format = dst->format;
  2612. err = vulkan_map_frame_to_mem(hwfc, map, src, AV_HWFRAME_MAP_READ);
  2613. if (err)
  2614. return err;
  2615. err = av_frame_copy(dst, map);
  2616. av_frame_free(&map);
  2617. return err;
  2618. }
  2619. /* Create buffers */
  2620. for (int i = 0; i < planes; i++) {
  2621. int h = dst->height;
  2622. int p_height = i > 0 ? AV_CEIL_RSHIFT(h, log2_chroma) : h;
  2623. tmp.linesize[i] = FFABS(dst->linesize[i]);
  2624. err = create_buf(dev_ctx, &bufs[i], p_height,
  2625. &tmp.linesize[i], VK_BUFFER_USAGE_TRANSFER_DST_BIT,
  2626. VK_MEMORY_PROPERTY_HOST_VISIBLE_BIT, NULL, NULL);
  2627. if (err)
  2628. goto end;
  2629. }
  2630. /* Copy image to buffer */
  2631. if ((err = transfer_image_buf(hwfc, src, bufs, tmp.linesize,
  2632. dst->width, dst->height, dst->format, 1)))
  2633. goto end;
  2634. /* Map, copy buffer to frame, unmap */
  2635. if ((err = map_buffers(dev_ctx, bufs, tmp.data, planes, 1)))
  2636. goto end;
  2637. av_image_copy(dst->data, dst->linesize, (const uint8_t **)tmp.data,
  2638. tmp.linesize, dst->format, dst->width, dst->height);
  2639. err = unmap_buffers(dev_ctx, bufs, planes, 0);
  2640. end:
  2641. for (int i = 0; i < planes; i++)
  2642. av_buffer_unref(&bufs[i]);
  2643. return err;
  2644. }
  2645. static int vulkan_transfer_data_from(AVHWFramesContext *hwfc, AVFrame *dst,
  2646. const AVFrame *src)
  2647. {
  2648. av_unused VulkanDevicePriv *p = hwfc->device_ctx->internal->priv;
  2649. switch (dst->format) {
  2650. #if CONFIG_CUDA
  2651. case AV_PIX_FMT_CUDA:
  2652. if ((p->extensions & EXT_EXTERNAL_FD_MEMORY) &&
  2653. (p->extensions & EXT_EXTERNAL_FD_SEM))
  2654. return vulkan_transfer_data_to_cuda(hwfc, dst, src);
  2655. #endif
  2656. default:
  2657. if (dst->hw_frames_ctx)
  2658. return AVERROR(ENOSYS);
  2659. else
  2660. return vulkan_transfer_data_to_mem(hwfc, dst, src);
  2661. }
  2662. }
  2663. AVVkFrame *av_vk_frame_alloc(void)
  2664. {
  2665. return av_mallocz(sizeof(AVVkFrame));
  2666. }
  2667. const HWContextType ff_hwcontext_type_vulkan = {
  2668. .type = AV_HWDEVICE_TYPE_VULKAN,
  2669. .name = "Vulkan",
  2670. .device_hwctx_size = sizeof(AVVulkanDeviceContext),
  2671. .device_priv_size = sizeof(VulkanDevicePriv),
  2672. .frames_hwctx_size = sizeof(AVVulkanFramesContext),
  2673. .frames_priv_size = sizeof(VulkanFramesPriv),
  2674. .device_init = &vulkan_device_init,
  2675. .device_create = &vulkan_device_create,
  2676. .device_derive = &vulkan_device_derive,
  2677. .frames_get_constraints = &vulkan_frames_get_constraints,
  2678. .frames_init = vulkan_frames_init,
  2679. .frames_get_buffer = vulkan_get_buffer,
  2680. .frames_uninit = vulkan_frames_uninit,
  2681. .transfer_get_formats = vulkan_transfer_get_formats,
  2682. .transfer_data_to = vulkan_transfer_data_to,
  2683. .transfer_data_from = vulkan_transfer_data_from,
  2684. .map_to = vulkan_map_to,
  2685. .map_from = vulkan_map_from,
  2686. .pix_fmts = (const enum AVPixelFormat []) {
  2687. AV_PIX_FMT_VULKAN,
  2688. AV_PIX_FMT_NONE
  2689. },
  2690. };