You can not select more than 25 topics Topics must start with a letter or number, can include dashes ('-') and can be up to 35 characters long.

2999 lines
104KB

  1. /*
  2. * This file is part of FFmpeg.
  3. *
  4. * FFmpeg is free software; you can redistribute it and/or
  5. * modify it under the terms of the GNU Lesser General Public
  6. * License as published by the Free Software Foundation; either
  7. * version 2.1 of the License, or (at your option) any later version.
  8. *
  9. * FFmpeg is distributed in the hope that it will be useful,
  10. * but WITHOUT ANY WARRANTY; without even the implied warranty of
  11. * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
  12. * Lesser General Public License for more details.
  13. *
  14. * You should have received a copy of the GNU Lesser General Public
  15. * License along with FFmpeg; if not, write to the Free Software
  16. * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
  17. */
  18. #include "config.h"
  19. #include "pixdesc.h"
  20. #include "avstring.h"
  21. #include "imgutils.h"
  22. #include "hwcontext.h"
  23. #include "hwcontext_internal.h"
  24. #include "hwcontext_vulkan.h"
  25. #if CONFIG_LIBDRM
  26. #include <unistd.h>
  27. #include <xf86drm.h>
  28. #include <drm_fourcc.h>
  29. #include "hwcontext_drm.h"
  30. #if CONFIG_VAAPI
  31. #include <va/va_drmcommon.h>
  32. #include "hwcontext_vaapi.h"
  33. #endif
  34. #endif
  35. #if CONFIG_CUDA
  36. #include "hwcontext_cuda_internal.h"
  37. #include "cuda_check.h"
  38. #define CHECK_CU(x) FF_CUDA_CHECK_DL(cuda_cu, cu, x)
  39. #endif
  40. typedef struct VulkanExecCtx {
  41. VkCommandPool pool;
  42. VkCommandBuffer buf;
  43. VkQueue queue;
  44. VkFence fence;
  45. } VulkanExecCtx;
  46. typedef struct VulkanDevicePriv {
  47. /* Properties */
  48. VkPhysicalDeviceProperties props;
  49. VkPhysicalDeviceMemoryProperties mprops;
  50. /* Queues */
  51. uint32_t qfs[3];
  52. int num_qfs;
  53. /* Debug callback */
  54. VkDebugUtilsMessengerEXT debug_ctx;
  55. /* Image uploading */
  56. VulkanExecCtx cmd;
  57. /* Extensions */
  58. uint64_t extensions;
  59. /* Settings */
  60. int use_linear_images;
  61. /* Nvidia */
  62. int dev_is_nvidia;
  63. } VulkanDevicePriv;
  64. typedef struct VulkanFramesPriv {
  65. VulkanExecCtx cmd;
  66. } VulkanFramesPriv;
  67. typedef struct AVVkFrameInternal {
  68. #if CONFIG_CUDA
  69. /* Importing external memory into cuda is really expensive so we keep the
  70. * memory imported all the time */
  71. AVBufferRef *cuda_fc_ref; /* Need to keep it around for uninit */
  72. CUexternalMemory ext_mem[AV_NUM_DATA_POINTERS];
  73. CUmipmappedArray cu_mma[AV_NUM_DATA_POINTERS];
  74. CUarray cu_array[AV_NUM_DATA_POINTERS];
  75. CUexternalSemaphore cu_sem[AV_NUM_DATA_POINTERS];
  76. #endif
  77. } AVVkFrameInternal;
  78. #define VK_LOAD_PFN(inst, name) PFN_##name pfn_##name = (PFN_##name) \
  79. vkGetInstanceProcAddr(inst, #name)
  80. #define DEFAULT_USAGE_FLAGS (VK_IMAGE_USAGE_SAMPLED_BIT | \
  81. VK_IMAGE_USAGE_STORAGE_BIT | \
  82. VK_IMAGE_USAGE_TRANSFER_SRC_BIT | \
  83. VK_IMAGE_USAGE_TRANSFER_DST_BIT)
  84. #define ADD_VAL_TO_LIST(list, count, val) \
  85. do { \
  86. list = av_realloc_array(list, sizeof(*list), ++count); \
  87. if (!list) { \
  88. err = AVERROR(ENOMEM); \
  89. goto fail; \
  90. } \
  91. list[count - 1] = av_strdup(val); \
  92. if (!list[count - 1]) { \
  93. err = AVERROR(ENOMEM); \
  94. goto fail; \
  95. } \
  96. } while(0)
  97. static const struct {
  98. enum AVPixelFormat pixfmt;
  99. const VkFormat vkfmts[3];
  100. } vk_pixfmt_map[] = {
  101. { AV_PIX_FMT_GRAY8, { VK_FORMAT_R8_UNORM } },
  102. { AV_PIX_FMT_GRAY16, { VK_FORMAT_R16_UNORM } },
  103. { AV_PIX_FMT_GRAYF32, { VK_FORMAT_R32_SFLOAT } },
  104. { AV_PIX_FMT_NV12, { VK_FORMAT_R8_UNORM, VK_FORMAT_R8G8_UNORM } },
  105. { AV_PIX_FMT_P010, { VK_FORMAT_R16_UNORM, VK_FORMAT_R16G16_UNORM } },
  106. { AV_PIX_FMT_P016, { VK_FORMAT_R16_UNORM, VK_FORMAT_R16G16_UNORM } },
  107. { AV_PIX_FMT_YUV420P, { VK_FORMAT_R8_UNORM, VK_FORMAT_R8_UNORM, VK_FORMAT_R8_UNORM } },
  108. { AV_PIX_FMT_YUV422P, { VK_FORMAT_R8_UNORM, VK_FORMAT_R8_UNORM, VK_FORMAT_R8_UNORM } },
  109. { AV_PIX_FMT_YUV444P, { VK_FORMAT_R8_UNORM, VK_FORMAT_R8_UNORM, VK_FORMAT_R8_UNORM } },
  110. { AV_PIX_FMT_YUV420P16, { VK_FORMAT_R16_UNORM, VK_FORMAT_R16_UNORM, VK_FORMAT_R16_UNORM } },
  111. { AV_PIX_FMT_YUV422P16, { VK_FORMAT_R16_UNORM, VK_FORMAT_R16_UNORM, VK_FORMAT_R16_UNORM } },
  112. { AV_PIX_FMT_YUV444P16, { VK_FORMAT_R16_UNORM, VK_FORMAT_R16_UNORM, VK_FORMAT_R16_UNORM } },
  113. { AV_PIX_FMT_ABGR, { VK_FORMAT_A8B8G8R8_UNORM_PACK32 } },
  114. { AV_PIX_FMT_BGRA, { VK_FORMAT_B8G8R8A8_UNORM } },
  115. { AV_PIX_FMT_RGBA, { VK_FORMAT_R8G8B8A8_UNORM } },
  116. { AV_PIX_FMT_RGB24, { VK_FORMAT_R8G8B8_UNORM } },
  117. { AV_PIX_FMT_BGR24, { VK_FORMAT_B8G8R8_UNORM } },
  118. { AV_PIX_FMT_RGB48, { VK_FORMAT_R16G16B16_UNORM } },
  119. { AV_PIX_FMT_RGBA64, { VK_FORMAT_R16G16B16A16_UNORM } },
  120. { AV_PIX_FMT_RGB565, { VK_FORMAT_R5G6B5_UNORM_PACK16 } },
  121. { AV_PIX_FMT_BGR565, { VK_FORMAT_B5G6R5_UNORM_PACK16 } },
  122. { AV_PIX_FMT_BGR0, { VK_FORMAT_B8G8R8A8_UNORM } },
  123. { AV_PIX_FMT_0BGR, { VK_FORMAT_A8B8G8R8_UNORM_PACK32 } },
  124. { AV_PIX_FMT_RGB0, { VK_FORMAT_R8G8B8A8_UNORM } },
  125. { AV_PIX_FMT_GBRPF32, { VK_FORMAT_R32_SFLOAT, VK_FORMAT_R32_SFLOAT, VK_FORMAT_R32_SFLOAT } },
  126. };
  127. const VkFormat *av_vkfmt_from_pixfmt(enum AVPixelFormat p)
  128. {
  129. for (enum AVPixelFormat i = 0; i < FF_ARRAY_ELEMS(vk_pixfmt_map); i++)
  130. if (vk_pixfmt_map[i].pixfmt == p)
  131. return vk_pixfmt_map[i].vkfmts;
  132. return NULL;
  133. }
  134. static int pixfmt_is_supported(AVVulkanDeviceContext *hwctx, enum AVPixelFormat p,
  135. int linear)
  136. {
  137. const VkFormat *fmt = av_vkfmt_from_pixfmt(p);
  138. int planes = av_pix_fmt_count_planes(p);
  139. if (!fmt)
  140. return 0;
  141. for (int i = 0; i < planes; i++) {
  142. VkFormatFeatureFlags flags;
  143. VkFormatProperties2 prop = {
  144. .sType = VK_STRUCTURE_TYPE_FORMAT_PROPERTIES_2,
  145. };
  146. vkGetPhysicalDeviceFormatProperties2(hwctx->phys_dev, fmt[i], &prop);
  147. flags = linear ? prop.formatProperties.linearTilingFeatures :
  148. prop.formatProperties.optimalTilingFeatures;
  149. if (!(flags & DEFAULT_USAGE_FLAGS))
  150. return 0;
  151. }
  152. return 1;
  153. }
  154. enum VulkanExtensions {
  155. EXT_EXTERNAL_DMABUF_MEMORY = 1ULL << 0, /* VK_EXT_external_memory_dma_buf */
  156. EXT_DRM_MODIFIER_FLAGS = 1ULL << 1, /* VK_EXT_image_drm_format_modifier */
  157. EXT_EXTERNAL_FD_MEMORY = 1ULL << 2, /* VK_KHR_external_memory_fd */
  158. EXT_EXTERNAL_FD_SEM = 1ULL << 3, /* VK_KHR_external_semaphore_fd */
  159. EXT_NO_FLAG = 1ULL << 63,
  160. };
  161. typedef struct VulkanOptExtension {
  162. const char *name;
  163. uint64_t flag;
  164. } VulkanOptExtension;
  165. static const VulkanOptExtension optional_instance_exts[] = {
  166. /* For future use */
  167. };
  168. static const VulkanOptExtension optional_device_exts[] = {
  169. { VK_KHR_EXTERNAL_MEMORY_FD_EXTENSION_NAME, EXT_EXTERNAL_FD_MEMORY, },
  170. { VK_EXT_EXTERNAL_MEMORY_DMA_BUF_EXTENSION_NAME, EXT_EXTERNAL_DMABUF_MEMORY, },
  171. { VK_EXT_IMAGE_DRM_FORMAT_MODIFIER_EXTENSION_NAME, EXT_DRM_MODIFIER_FLAGS, },
  172. { VK_KHR_EXTERNAL_SEMAPHORE_FD_EXTENSION_NAME, EXT_EXTERNAL_FD_SEM, },
  173. };
  174. /* Converts return values to strings */
  175. static const char *vk_ret2str(VkResult res)
  176. {
  177. #define CASE(VAL) case VAL: return #VAL
  178. switch (res) {
  179. CASE(VK_SUCCESS);
  180. CASE(VK_NOT_READY);
  181. CASE(VK_TIMEOUT);
  182. CASE(VK_EVENT_SET);
  183. CASE(VK_EVENT_RESET);
  184. CASE(VK_INCOMPLETE);
  185. CASE(VK_ERROR_OUT_OF_HOST_MEMORY);
  186. CASE(VK_ERROR_OUT_OF_DEVICE_MEMORY);
  187. CASE(VK_ERROR_INITIALIZATION_FAILED);
  188. CASE(VK_ERROR_DEVICE_LOST);
  189. CASE(VK_ERROR_MEMORY_MAP_FAILED);
  190. CASE(VK_ERROR_LAYER_NOT_PRESENT);
  191. CASE(VK_ERROR_EXTENSION_NOT_PRESENT);
  192. CASE(VK_ERROR_FEATURE_NOT_PRESENT);
  193. CASE(VK_ERROR_INCOMPATIBLE_DRIVER);
  194. CASE(VK_ERROR_TOO_MANY_OBJECTS);
  195. CASE(VK_ERROR_FORMAT_NOT_SUPPORTED);
  196. CASE(VK_ERROR_FRAGMENTED_POOL);
  197. CASE(VK_ERROR_SURFACE_LOST_KHR);
  198. CASE(VK_ERROR_NATIVE_WINDOW_IN_USE_KHR);
  199. CASE(VK_SUBOPTIMAL_KHR);
  200. CASE(VK_ERROR_OUT_OF_DATE_KHR);
  201. CASE(VK_ERROR_INCOMPATIBLE_DISPLAY_KHR);
  202. CASE(VK_ERROR_VALIDATION_FAILED_EXT);
  203. CASE(VK_ERROR_INVALID_SHADER_NV);
  204. CASE(VK_ERROR_OUT_OF_POOL_MEMORY);
  205. CASE(VK_ERROR_INVALID_EXTERNAL_HANDLE);
  206. CASE(VK_ERROR_NOT_PERMITTED_EXT);
  207. CASE(VK_ERROR_INVALID_DRM_FORMAT_MODIFIER_PLANE_LAYOUT_EXT);
  208. CASE(VK_ERROR_INVALID_DEVICE_ADDRESS_EXT);
  209. CASE(VK_ERROR_FULL_SCREEN_EXCLUSIVE_MODE_LOST_EXT);
  210. default: return "Unknown error";
  211. }
  212. #undef CASE
  213. }
  214. static VkBool32 vk_dbg_callback(VkDebugUtilsMessageSeverityFlagBitsEXT severity,
  215. VkDebugUtilsMessageTypeFlagsEXT messageType,
  216. const VkDebugUtilsMessengerCallbackDataEXT *data,
  217. void *priv)
  218. {
  219. int l;
  220. AVHWDeviceContext *ctx = priv;
  221. switch (severity) {
  222. case VK_DEBUG_UTILS_MESSAGE_SEVERITY_VERBOSE_BIT_EXT: l = AV_LOG_VERBOSE; break;
  223. case VK_DEBUG_UTILS_MESSAGE_SEVERITY_INFO_BIT_EXT: l = AV_LOG_INFO; break;
  224. case VK_DEBUG_UTILS_MESSAGE_SEVERITY_WARNING_BIT_EXT: l = AV_LOG_WARNING; break;
  225. case VK_DEBUG_UTILS_MESSAGE_SEVERITY_ERROR_BIT_EXT: l = AV_LOG_ERROR; break;
  226. default: l = AV_LOG_DEBUG; break;
  227. }
  228. av_log(ctx, l, "%s\n", data->pMessage);
  229. for (int i = 0; i < data->cmdBufLabelCount; i++)
  230. av_log(ctx, l, "\t%i: %s\n", i, data->pCmdBufLabels[i].pLabelName);
  231. return 0;
  232. }
  233. static int check_extensions(AVHWDeviceContext *ctx, int dev, AVDictionary *opts,
  234. const char * const **dst, uint32_t *num, int debug)
  235. {
  236. const char *tstr;
  237. const char **extension_names = NULL;
  238. VulkanDevicePriv *p = ctx->internal->priv;
  239. AVVulkanDeviceContext *hwctx = ctx->hwctx;
  240. int err = 0, found, extensions_found = 0;
  241. const char *mod;
  242. int optional_exts_num;
  243. uint32_t sup_ext_count;
  244. char *user_exts_str = NULL;
  245. AVDictionaryEntry *user_exts;
  246. VkExtensionProperties *sup_ext;
  247. const VulkanOptExtension *optional_exts;
  248. if (!dev) {
  249. mod = "instance";
  250. optional_exts = optional_instance_exts;
  251. optional_exts_num = FF_ARRAY_ELEMS(optional_instance_exts);
  252. user_exts = av_dict_get(opts, "instance_extensions", NULL, 0);
  253. if (user_exts) {
  254. user_exts_str = av_strdup(user_exts->value);
  255. if (!user_exts_str) {
  256. err = AVERROR(ENOMEM);
  257. goto fail;
  258. }
  259. }
  260. vkEnumerateInstanceExtensionProperties(NULL, &sup_ext_count, NULL);
  261. sup_ext = av_malloc_array(sup_ext_count, sizeof(VkExtensionProperties));
  262. if (!sup_ext)
  263. return AVERROR(ENOMEM);
  264. vkEnumerateInstanceExtensionProperties(NULL, &sup_ext_count, sup_ext);
  265. } else {
  266. mod = "device";
  267. optional_exts = optional_device_exts;
  268. optional_exts_num = FF_ARRAY_ELEMS(optional_device_exts);
  269. user_exts = av_dict_get(opts, "device_extensions", NULL, 0);
  270. if (user_exts) {
  271. user_exts_str = av_strdup(user_exts->value);
  272. if (!user_exts_str) {
  273. err = AVERROR(ENOMEM);
  274. goto fail;
  275. }
  276. }
  277. vkEnumerateDeviceExtensionProperties(hwctx->phys_dev, NULL,
  278. &sup_ext_count, NULL);
  279. sup_ext = av_malloc_array(sup_ext_count, sizeof(VkExtensionProperties));
  280. if (!sup_ext)
  281. return AVERROR(ENOMEM);
  282. vkEnumerateDeviceExtensionProperties(hwctx->phys_dev, NULL,
  283. &sup_ext_count, sup_ext);
  284. }
  285. for (int i = 0; i < optional_exts_num; i++) {
  286. tstr = optional_exts[i].name;
  287. found = 0;
  288. for (int j = 0; j < sup_ext_count; j++) {
  289. if (!strcmp(tstr, sup_ext[j].extensionName)) {
  290. found = 1;
  291. break;
  292. }
  293. }
  294. if (!found)
  295. continue;
  296. av_log(ctx, AV_LOG_VERBOSE, "Using %s extension \"%s\"\n", mod, tstr);
  297. p->extensions |= optional_exts[i].flag;
  298. ADD_VAL_TO_LIST(extension_names, extensions_found, tstr);
  299. }
  300. if (debug && !dev) {
  301. tstr = VK_EXT_DEBUG_UTILS_EXTENSION_NAME;
  302. found = 0;
  303. for (int j = 0; j < sup_ext_count; j++) {
  304. if (!strcmp(tstr, sup_ext[j].extensionName)) {
  305. found = 1;
  306. break;
  307. }
  308. }
  309. if (found) {
  310. av_log(ctx, AV_LOG_VERBOSE, "Using %s extension \"%s\"\n", mod, tstr);
  311. ADD_VAL_TO_LIST(extension_names, extensions_found, tstr);
  312. } else {
  313. av_log(ctx, AV_LOG_ERROR, "Debug extension \"%s\" not found!\n",
  314. tstr);
  315. err = AVERROR(EINVAL);
  316. goto fail;
  317. }
  318. }
  319. if (user_exts_str) {
  320. char *save, *token = av_strtok(user_exts_str, "+", &save);
  321. while (token) {
  322. found = 0;
  323. for (int j = 0; j < sup_ext_count; j++) {
  324. if (!strcmp(token, sup_ext[j].extensionName)) {
  325. found = 1;
  326. break;
  327. }
  328. }
  329. if (found) {
  330. av_log(ctx, AV_LOG_VERBOSE, "Using %s extension \"%s\"\n", mod, token);
  331. ADD_VAL_TO_LIST(extension_names, extensions_found, token);
  332. } else {
  333. av_log(ctx, AV_LOG_WARNING, "%s extension \"%s\" not found, excluding.\n",
  334. mod, token);
  335. }
  336. token = av_strtok(NULL, "+", &save);
  337. }
  338. }
  339. *dst = extension_names;
  340. *num = extensions_found;
  341. av_free(user_exts_str);
  342. av_free(sup_ext);
  343. return 0;
  344. fail:
  345. if (extension_names)
  346. for (int i = 0; i < extensions_found; i++)
  347. av_free((void *)extension_names[i]);
  348. av_free(extension_names);
  349. av_free(user_exts_str);
  350. av_free(sup_ext);
  351. return err;
  352. }
  353. /* Creates a VkInstance */
  354. static int create_instance(AVHWDeviceContext *ctx, AVDictionary *opts)
  355. {
  356. int err = 0;
  357. VkResult ret;
  358. VulkanDevicePriv *p = ctx->internal->priv;
  359. AVVulkanDeviceContext *hwctx = ctx->hwctx;
  360. AVDictionaryEntry *debug_opt = av_dict_get(opts, "debug", NULL, 0);
  361. const int debug_mode = debug_opt && strtol(debug_opt->value, NULL, 10);
  362. VkApplicationInfo application_info = {
  363. .sType = VK_STRUCTURE_TYPE_APPLICATION_INFO,
  364. .pEngineName = "libavutil",
  365. .apiVersion = VK_API_VERSION_1_1,
  366. .engineVersion = VK_MAKE_VERSION(LIBAVUTIL_VERSION_MAJOR,
  367. LIBAVUTIL_VERSION_MINOR,
  368. LIBAVUTIL_VERSION_MICRO),
  369. };
  370. VkInstanceCreateInfo inst_props = {
  371. .sType = VK_STRUCTURE_TYPE_INSTANCE_CREATE_INFO,
  372. .pApplicationInfo = &application_info,
  373. };
  374. /* Check for present/missing extensions */
  375. err = check_extensions(ctx, 0, opts, &inst_props.ppEnabledExtensionNames,
  376. &inst_props.enabledExtensionCount, debug_mode);
  377. if (err < 0)
  378. return err;
  379. if (debug_mode) {
  380. static const char *layers[] = { "VK_LAYER_KHRONOS_validation" };
  381. inst_props.ppEnabledLayerNames = layers;
  382. inst_props.enabledLayerCount = FF_ARRAY_ELEMS(layers);
  383. }
  384. /* Try to create the instance */
  385. ret = vkCreateInstance(&inst_props, hwctx->alloc, &hwctx->inst);
  386. /* Check for errors */
  387. if (ret != VK_SUCCESS) {
  388. av_log(ctx, AV_LOG_ERROR, "Instance creation failure: %s\n",
  389. vk_ret2str(ret));
  390. for (int i = 0; i < inst_props.enabledExtensionCount; i++)
  391. av_free((void *)inst_props.ppEnabledExtensionNames[i]);
  392. av_free((void *)inst_props.ppEnabledExtensionNames);
  393. return AVERROR_EXTERNAL;
  394. }
  395. if (debug_mode) {
  396. VkDebugUtilsMessengerCreateInfoEXT dbg = {
  397. .sType = VK_STRUCTURE_TYPE_DEBUG_UTILS_MESSENGER_CREATE_INFO_EXT,
  398. .messageSeverity = VK_DEBUG_UTILS_MESSAGE_SEVERITY_VERBOSE_BIT_EXT |
  399. VK_DEBUG_UTILS_MESSAGE_SEVERITY_INFO_BIT_EXT |
  400. VK_DEBUG_UTILS_MESSAGE_SEVERITY_WARNING_BIT_EXT |
  401. VK_DEBUG_UTILS_MESSAGE_SEVERITY_ERROR_BIT_EXT,
  402. .messageType = VK_DEBUG_UTILS_MESSAGE_TYPE_GENERAL_BIT_EXT |
  403. VK_DEBUG_UTILS_MESSAGE_TYPE_VALIDATION_BIT_EXT |
  404. VK_DEBUG_UTILS_MESSAGE_TYPE_PERFORMANCE_BIT_EXT,
  405. .pfnUserCallback = vk_dbg_callback,
  406. .pUserData = ctx,
  407. };
  408. VK_LOAD_PFN(hwctx->inst, vkCreateDebugUtilsMessengerEXT);
  409. pfn_vkCreateDebugUtilsMessengerEXT(hwctx->inst, &dbg,
  410. hwctx->alloc, &p->debug_ctx);
  411. }
  412. hwctx->enabled_inst_extensions = inst_props.ppEnabledExtensionNames;
  413. hwctx->nb_enabled_inst_extensions = inst_props.enabledExtensionCount;
  414. return 0;
  415. }
  416. typedef struct VulkanDeviceSelection {
  417. uint8_t uuid[VK_UUID_SIZE]; /* Will use this first unless !has_uuid */
  418. int has_uuid;
  419. const char *name; /* Will use this second unless NULL */
  420. uint32_t pci_device; /* Will use this third unless 0x0 */
  421. uint32_t vendor_id; /* Last resort to find something deterministic */
  422. int index; /* Finally fall back to index */
  423. } VulkanDeviceSelection;
  424. static const char *vk_dev_type(enum VkPhysicalDeviceType type)
  425. {
  426. switch (type) {
  427. case VK_PHYSICAL_DEVICE_TYPE_INTEGRATED_GPU: return "integrated";
  428. case VK_PHYSICAL_DEVICE_TYPE_DISCRETE_GPU: return "discrete";
  429. case VK_PHYSICAL_DEVICE_TYPE_VIRTUAL_GPU: return "virtual";
  430. case VK_PHYSICAL_DEVICE_TYPE_CPU: return "software";
  431. default: return "unknown";
  432. }
  433. }
  434. /* Finds a device */
  435. static int find_device(AVHWDeviceContext *ctx, VulkanDeviceSelection *select)
  436. {
  437. int err = 0, choice = -1;
  438. uint32_t num;
  439. VkResult ret;
  440. VkPhysicalDevice *devices = NULL;
  441. VkPhysicalDeviceIDProperties *idp = NULL;
  442. VkPhysicalDeviceProperties2 *prop = NULL;
  443. VulkanDevicePriv *p = ctx->internal->priv;
  444. AVVulkanDeviceContext *hwctx = ctx->hwctx;
  445. ret = vkEnumeratePhysicalDevices(hwctx->inst, &num, NULL);
  446. if (ret != VK_SUCCESS || !num) {
  447. av_log(ctx, AV_LOG_ERROR, "No devices found: %s!\n", vk_ret2str(ret));
  448. return AVERROR(ENODEV);
  449. }
  450. devices = av_malloc_array(num, sizeof(VkPhysicalDevice));
  451. if (!devices)
  452. return AVERROR(ENOMEM);
  453. ret = vkEnumeratePhysicalDevices(hwctx->inst, &num, devices);
  454. if (ret != VK_SUCCESS) {
  455. av_log(ctx, AV_LOG_ERROR, "Failed enumerating devices: %s\n",
  456. vk_ret2str(ret));
  457. err = AVERROR(ENODEV);
  458. goto end;
  459. }
  460. prop = av_mallocz_array(num, sizeof(*prop));
  461. if (!prop) {
  462. err = AVERROR(ENOMEM);
  463. goto end;
  464. }
  465. idp = av_mallocz_array(num, sizeof(*idp));
  466. if (!idp) {
  467. err = AVERROR(ENOMEM);
  468. goto end;
  469. }
  470. av_log(ctx, AV_LOG_VERBOSE, "GPU listing:\n");
  471. for (int i = 0; i < num; i++) {
  472. idp[i].sType = VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_ID_PROPERTIES;
  473. prop[i].sType = VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_PROPERTIES_2;
  474. prop[i].pNext = &idp[i];
  475. vkGetPhysicalDeviceProperties2(devices[i], &prop[i]);
  476. av_log(ctx, AV_LOG_VERBOSE, " %d: %s (%s) (0x%x)\n", i,
  477. prop[i].properties.deviceName,
  478. vk_dev_type(prop[i].properties.deviceType),
  479. prop[i].properties.deviceID);
  480. }
  481. if (select->has_uuid) {
  482. for (int i = 0; i < num; i++) {
  483. if (!strncmp(idp[i].deviceUUID, select->uuid, VK_UUID_SIZE)) {
  484. choice = i;
  485. goto end;
  486. }
  487. }
  488. av_log(ctx, AV_LOG_ERROR, "Unable to find device by given UUID!\n");
  489. err = AVERROR(ENODEV);
  490. goto end;
  491. } else if (select->name) {
  492. av_log(ctx, AV_LOG_VERBOSE, "Requested device: %s\n", select->name);
  493. for (int i = 0; i < num; i++) {
  494. if (strstr(prop[i].properties.deviceName, select->name)) {
  495. choice = i;
  496. goto end;
  497. }
  498. }
  499. av_log(ctx, AV_LOG_ERROR, "Unable to find device \"%s\"!\n",
  500. select->name);
  501. err = AVERROR(ENODEV);
  502. goto end;
  503. } else if (select->pci_device) {
  504. av_log(ctx, AV_LOG_VERBOSE, "Requested device: 0x%x\n", select->pci_device);
  505. for (int i = 0; i < num; i++) {
  506. if (select->pci_device == prop[i].properties.deviceID) {
  507. choice = i;
  508. goto end;
  509. }
  510. }
  511. av_log(ctx, AV_LOG_ERROR, "Unable to find device with PCI ID 0x%x!\n",
  512. select->pci_device);
  513. err = AVERROR(EINVAL);
  514. goto end;
  515. } else if (select->vendor_id) {
  516. av_log(ctx, AV_LOG_VERBOSE, "Requested vendor: 0x%x\n", select->vendor_id);
  517. for (int i = 0; i < num; i++) {
  518. if (select->vendor_id == prop[i].properties.vendorID) {
  519. choice = i;
  520. goto end;
  521. }
  522. }
  523. av_log(ctx, AV_LOG_ERROR, "Unable to find device with Vendor ID 0x%x!\n",
  524. select->vendor_id);
  525. err = AVERROR(ENODEV);
  526. goto end;
  527. } else {
  528. if (select->index < num) {
  529. choice = select->index;
  530. goto end;
  531. }
  532. av_log(ctx, AV_LOG_ERROR, "Unable to find device with index %i!\n",
  533. select->index);
  534. err = AVERROR(ENODEV);
  535. goto end;
  536. }
  537. end:
  538. if (choice > -1) {
  539. p->dev_is_nvidia = (prop[choice].properties.vendorID == 0x10de);
  540. hwctx->phys_dev = devices[choice];
  541. }
  542. av_free(devices);
  543. av_free(prop);
  544. av_free(idp);
  545. return err;
  546. }
  547. static int search_queue_families(AVHWDeviceContext *ctx, VkDeviceCreateInfo *cd)
  548. {
  549. uint32_t num;
  550. float *weights;
  551. VkQueueFamilyProperties *qs = NULL;
  552. AVVulkanDeviceContext *hwctx = ctx->hwctx;
  553. int graph_index = -1, comp_index = -1, tx_index = -1;
  554. VkDeviceQueueCreateInfo *pc = (VkDeviceQueueCreateInfo *)cd->pQueueCreateInfos;
  555. /* First get the number of queue families */
  556. vkGetPhysicalDeviceQueueFamilyProperties(hwctx->phys_dev, &num, NULL);
  557. if (!num) {
  558. av_log(ctx, AV_LOG_ERROR, "Failed to get queues!\n");
  559. return AVERROR_EXTERNAL;
  560. }
  561. /* Then allocate memory */
  562. qs = av_malloc_array(num, sizeof(VkQueueFamilyProperties));
  563. if (!qs)
  564. return AVERROR(ENOMEM);
  565. /* Finally retrieve the queue families */
  566. vkGetPhysicalDeviceQueueFamilyProperties(hwctx->phys_dev, &num, qs);
  567. #define SEARCH_FLAGS(expr, out) \
  568. for (int i = 0; i < num; i++) { \
  569. const VkQueueFlagBits flags = qs[i].queueFlags; \
  570. if (expr) { \
  571. out = i; \
  572. break; \
  573. } \
  574. }
  575. SEARCH_FLAGS(flags & VK_QUEUE_GRAPHICS_BIT, graph_index)
  576. SEARCH_FLAGS((flags & VK_QUEUE_COMPUTE_BIT) && (i != graph_index),
  577. comp_index)
  578. SEARCH_FLAGS((flags & VK_QUEUE_TRANSFER_BIT) && (i != graph_index) &&
  579. (i != comp_index), tx_index)
  580. #undef SEARCH_FLAGS
  581. #define ADD_QUEUE(fidx, graph, comp, tx) \
  582. av_log(ctx, AV_LOG_VERBOSE, "Using queue family %i (total queues: %i) for %s%s%s\n", \
  583. fidx, qs[fidx].queueCount, graph ? "graphics " : "", \
  584. comp ? "compute " : "", tx ? "transfers " : ""); \
  585. av_log(ctx, AV_LOG_VERBOSE, " QF %i flags: %s%s%s%s\n", fidx, \
  586. ((qs[fidx].queueFlags) & VK_QUEUE_GRAPHICS_BIT) ? "(graphics) " : "", \
  587. ((qs[fidx].queueFlags) & VK_QUEUE_COMPUTE_BIT) ? "(compute) " : "", \
  588. ((qs[fidx].queueFlags) & VK_QUEUE_TRANSFER_BIT) ? "(transfers) " : "", \
  589. ((qs[fidx].queueFlags) & VK_QUEUE_SPARSE_BINDING_BIT) ? "(sparse) " : ""); \
  590. pc[cd->queueCreateInfoCount].queueFamilyIndex = fidx; \
  591. pc[cd->queueCreateInfoCount].queueCount = qs[fidx].queueCount; \
  592. weights = av_malloc(qs[fidx].queueCount * sizeof(float)); \
  593. pc[cd->queueCreateInfoCount].pQueuePriorities = weights; \
  594. if (!weights) \
  595. goto fail; \
  596. for (int i = 0; i < qs[fidx].queueCount; i++) \
  597. weights[i] = 1.0f; \
  598. cd->queueCreateInfoCount++;
  599. ADD_QUEUE(graph_index, 1, comp_index < 0, tx_index < 0 && comp_index < 0)
  600. hwctx->queue_family_index = graph_index;
  601. hwctx->queue_family_comp_index = graph_index;
  602. hwctx->queue_family_tx_index = graph_index;
  603. hwctx->nb_graphics_queues = qs[graph_index].queueCount;
  604. if (comp_index != -1) {
  605. ADD_QUEUE(comp_index, 0, 1, tx_index < 0)
  606. hwctx->queue_family_tx_index = comp_index;
  607. hwctx->queue_family_comp_index = comp_index;
  608. hwctx->nb_comp_queues = qs[comp_index].queueCount;
  609. }
  610. if (tx_index != -1) {
  611. ADD_QUEUE(tx_index, 0, 0, 1)
  612. hwctx->queue_family_tx_index = tx_index;
  613. hwctx->nb_tx_queues = qs[tx_index].queueCount;
  614. }
  615. #undef ADD_QUEUE
  616. av_free(qs);
  617. return 0;
  618. fail:
  619. av_freep(&pc[0].pQueuePriorities);
  620. av_freep(&pc[1].pQueuePriorities);
  621. av_freep(&pc[2].pQueuePriorities);
  622. av_free(qs);
  623. return AVERROR(ENOMEM);
  624. }
  625. static int create_exec_ctx(AVHWDeviceContext *ctx, VulkanExecCtx *cmd,
  626. int queue_family_index)
  627. {
  628. VkResult ret;
  629. AVVulkanDeviceContext *hwctx = ctx->hwctx;
  630. VkCommandPoolCreateInfo cqueue_create = {
  631. .sType = VK_STRUCTURE_TYPE_COMMAND_POOL_CREATE_INFO,
  632. .flags = VK_COMMAND_POOL_CREATE_RESET_COMMAND_BUFFER_BIT,
  633. .queueFamilyIndex = queue_family_index,
  634. };
  635. VkCommandBufferAllocateInfo cbuf_create = {
  636. .sType = VK_STRUCTURE_TYPE_COMMAND_BUFFER_ALLOCATE_INFO,
  637. .level = VK_COMMAND_BUFFER_LEVEL_PRIMARY,
  638. .commandBufferCount = 1,
  639. };
  640. VkFenceCreateInfo fence_spawn = {
  641. .sType = VK_STRUCTURE_TYPE_FENCE_CREATE_INFO,
  642. };
  643. ret = vkCreateFence(hwctx->act_dev, &fence_spawn,
  644. hwctx->alloc, &cmd->fence);
  645. if (ret != VK_SUCCESS) {
  646. av_log(ctx, AV_LOG_ERROR, "Failed to create frame fence: %s\n",
  647. vk_ret2str(ret));
  648. return AVERROR_EXTERNAL;
  649. }
  650. ret = vkCreateCommandPool(hwctx->act_dev, &cqueue_create,
  651. hwctx->alloc, &cmd->pool);
  652. if (ret != VK_SUCCESS) {
  653. av_log(ctx, AV_LOG_ERROR, "Command pool creation failure: %s\n",
  654. vk_ret2str(ret));
  655. return AVERROR_EXTERNAL;
  656. }
  657. cbuf_create.commandPool = cmd->pool;
  658. ret = vkAllocateCommandBuffers(hwctx->act_dev, &cbuf_create, &cmd->buf);
  659. if (ret != VK_SUCCESS) {
  660. av_log(ctx, AV_LOG_ERROR, "Command buffer alloc failure: %s\n",
  661. vk_ret2str(ret));
  662. return AVERROR_EXTERNAL;
  663. }
  664. vkGetDeviceQueue(hwctx->act_dev, cqueue_create.queueFamilyIndex, 0,
  665. &cmd->queue);
  666. return 0;
  667. }
  668. static void free_exec_ctx(AVHWDeviceContext *ctx, VulkanExecCtx *cmd)
  669. {
  670. AVVulkanDeviceContext *hwctx = ctx->hwctx;
  671. if (cmd->fence)
  672. vkDestroyFence(hwctx->act_dev, cmd->fence, hwctx->alloc);
  673. if (cmd->buf)
  674. vkFreeCommandBuffers(hwctx->act_dev, cmd->pool, 1, &cmd->buf);
  675. if (cmd->pool)
  676. vkDestroyCommandPool(hwctx->act_dev, cmd->pool, hwctx->alloc);
  677. }
  678. static void vulkan_device_free(AVHWDeviceContext *ctx)
  679. {
  680. VulkanDevicePriv *p = ctx->internal->priv;
  681. AVVulkanDeviceContext *hwctx = ctx->hwctx;
  682. free_exec_ctx(ctx, &p->cmd);
  683. vkDestroyDevice(hwctx->act_dev, hwctx->alloc);
  684. if (p->debug_ctx) {
  685. VK_LOAD_PFN(hwctx->inst, vkDestroyDebugUtilsMessengerEXT);
  686. pfn_vkDestroyDebugUtilsMessengerEXT(hwctx->inst, p->debug_ctx,
  687. hwctx->alloc);
  688. }
  689. vkDestroyInstance(hwctx->inst, hwctx->alloc);
  690. for (int i = 0; i < hwctx->nb_enabled_inst_extensions; i++)
  691. av_free((void *)hwctx->enabled_inst_extensions[i]);
  692. av_free((void *)hwctx->enabled_inst_extensions);
  693. for (int i = 0; i < hwctx->nb_enabled_dev_extensions; i++)
  694. av_free((void *)hwctx->enabled_dev_extensions[i]);
  695. av_free((void *)hwctx->enabled_dev_extensions);
  696. }
  697. static int vulkan_device_create_internal(AVHWDeviceContext *ctx,
  698. VulkanDeviceSelection *dev_select,
  699. AVDictionary *opts, int flags)
  700. {
  701. int err = 0;
  702. VkResult ret;
  703. AVDictionaryEntry *opt_d;
  704. VulkanDevicePriv *p = ctx->internal->priv;
  705. AVVulkanDeviceContext *hwctx = ctx->hwctx;
  706. VkDeviceQueueCreateInfo queue_create_info[3] = {
  707. { .sType = VK_STRUCTURE_TYPE_DEVICE_QUEUE_CREATE_INFO, },
  708. { .sType = VK_STRUCTURE_TYPE_DEVICE_QUEUE_CREATE_INFO, },
  709. { .sType = VK_STRUCTURE_TYPE_DEVICE_QUEUE_CREATE_INFO, },
  710. };
  711. VkDeviceCreateInfo dev_info = {
  712. .sType = VK_STRUCTURE_TYPE_DEVICE_CREATE_INFO,
  713. .pQueueCreateInfos = queue_create_info,
  714. .queueCreateInfoCount = 0,
  715. };
  716. ctx->free = vulkan_device_free;
  717. /* Create an instance if not given one */
  718. if ((err = create_instance(ctx, opts)))
  719. goto end;
  720. /* Find a device (if not given one) */
  721. if ((err = find_device(ctx, dev_select)))
  722. goto end;
  723. vkGetPhysicalDeviceProperties(hwctx->phys_dev, &p->props);
  724. av_log(ctx, AV_LOG_VERBOSE, "Using device: %s\n", p->props.deviceName);
  725. av_log(ctx, AV_LOG_VERBOSE, "Alignments:\n");
  726. av_log(ctx, AV_LOG_VERBOSE, " optimalBufferCopyOffsetAlignment: %li\n",
  727. p->props.limits.optimalBufferCopyOffsetAlignment);
  728. av_log(ctx, AV_LOG_VERBOSE, " optimalBufferCopyRowPitchAlignment: %li\n",
  729. p->props.limits.optimalBufferCopyRowPitchAlignment);
  730. av_log(ctx, AV_LOG_VERBOSE, " minMemoryMapAlignment: %li\n",
  731. p->props.limits.minMemoryMapAlignment);
  732. /* Search queue family */
  733. if ((err = search_queue_families(ctx, &dev_info)))
  734. goto end;
  735. if ((err = check_extensions(ctx, 1, opts, &dev_info.ppEnabledExtensionNames,
  736. &dev_info.enabledExtensionCount, 0))) {
  737. av_free((void *)queue_create_info[0].pQueuePriorities);
  738. av_free((void *)queue_create_info[1].pQueuePriorities);
  739. av_free((void *)queue_create_info[2].pQueuePriorities);
  740. goto end;
  741. }
  742. ret = vkCreateDevice(hwctx->phys_dev, &dev_info, hwctx->alloc,
  743. &hwctx->act_dev);
  744. av_free((void *)queue_create_info[0].pQueuePriorities);
  745. av_free((void *)queue_create_info[1].pQueuePriorities);
  746. av_free((void *)queue_create_info[2].pQueuePriorities);
  747. if (ret != VK_SUCCESS) {
  748. av_log(ctx, AV_LOG_ERROR, "Device creation failure: %s\n",
  749. vk_ret2str(ret));
  750. for (int i = 0; i < dev_info.enabledExtensionCount; i++)
  751. av_free((void *)dev_info.ppEnabledExtensionNames[i]);
  752. av_free((void *)dev_info.ppEnabledExtensionNames);
  753. err = AVERROR_EXTERNAL;
  754. goto end;
  755. }
  756. /* Tiled images setting, use them by default */
  757. opt_d = av_dict_get(opts, "linear_images", NULL, 0);
  758. if (opt_d)
  759. p->use_linear_images = strtol(opt_d->value, NULL, 10);
  760. hwctx->enabled_dev_extensions = dev_info.ppEnabledExtensionNames;
  761. hwctx->nb_enabled_dev_extensions = dev_info.enabledExtensionCount;
  762. end:
  763. return err;
  764. }
  765. static int vulkan_device_init(AVHWDeviceContext *ctx)
  766. {
  767. int err;
  768. uint32_t queue_num;
  769. AVVulkanDeviceContext *hwctx = ctx->hwctx;
  770. VulkanDevicePriv *p = ctx->internal->priv;
  771. /* Set device extension flags */
  772. for (int i = 0; i < hwctx->nb_enabled_dev_extensions; i++) {
  773. for (int j = 0; j < FF_ARRAY_ELEMS(optional_device_exts); j++) {
  774. if (!strcmp(hwctx->enabled_dev_extensions[i],
  775. optional_device_exts[j].name)) {
  776. p->extensions |= optional_device_exts[j].flag;
  777. break;
  778. }
  779. }
  780. }
  781. vkGetPhysicalDeviceQueueFamilyProperties(hwctx->phys_dev, &queue_num, NULL);
  782. if (!queue_num) {
  783. av_log(ctx, AV_LOG_ERROR, "Failed to get queues!\n");
  784. return AVERROR_EXTERNAL;
  785. }
  786. #define CHECK_QUEUE(type, n) \
  787. if (n >= queue_num) { \
  788. av_log(ctx, AV_LOG_ERROR, "Invalid %s queue index %i (device has %i queues)!\n", \
  789. type, n, queue_num); \
  790. return AVERROR(EINVAL); \
  791. }
  792. CHECK_QUEUE("graphics", hwctx->queue_family_index)
  793. CHECK_QUEUE("upload", hwctx->queue_family_tx_index)
  794. CHECK_QUEUE("compute", hwctx->queue_family_comp_index)
  795. #undef CHECK_QUEUE
  796. p->qfs[p->num_qfs++] = hwctx->queue_family_index;
  797. if ((hwctx->queue_family_tx_index != hwctx->queue_family_index) &&
  798. (hwctx->queue_family_tx_index != hwctx->queue_family_comp_index))
  799. p->qfs[p->num_qfs++] = hwctx->queue_family_tx_index;
  800. if ((hwctx->queue_family_comp_index != hwctx->queue_family_index) &&
  801. (hwctx->queue_family_comp_index != hwctx->queue_family_tx_index))
  802. p->qfs[p->num_qfs++] = hwctx->queue_family_comp_index;
  803. /* Create exec context - if there's something invalid this will error out */
  804. err = create_exec_ctx(ctx, &p->cmd, hwctx->queue_family_tx_index);
  805. if (err)
  806. return err;
  807. /* Get device capabilities */
  808. vkGetPhysicalDeviceMemoryProperties(hwctx->phys_dev, &p->mprops);
  809. return 0;
  810. }
  811. static int vulkan_device_create(AVHWDeviceContext *ctx, const char *device,
  812. AVDictionary *opts, int flags)
  813. {
  814. VulkanDeviceSelection dev_select = { 0 };
  815. if (device && device[0]) {
  816. char *end = NULL;
  817. dev_select.index = strtol(device, &end, 10);
  818. if (end == device) {
  819. dev_select.index = 0;
  820. dev_select.name = device;
  821. }
  822. }
  823. return vulkan_device_create_internal(ctx, &dev_select, opts, flags);
  824. }
  825. static int vulkan_device_derive(AVHWDeviceContext *ctx,
  826. AVHWDeviceContext *src_ctx,
  827. AVDictionary *opts, int flags)
  828. {
  829. av_unused VulkanDeviceSelection dev_select = { 0 };
  830. /* If there's only one device on the system, then even if its not covered
  831. * by the following checks (e.g. non-PCIe ARM GPU), having an empty
  832. * dev_select will mean it'll get picked. */
  833. switch(src_ctx->type) {
  834. #if CONFIG_LIBDRM
  835. #if CONFIG_VAAPI
  836. case AV_HWDEVICE_TYPE_VAAPI: {
  837. AVVAAPIDeviceContext *src_hwctx = src_ctx->hwctx;
  838. const char *vendor = vaQueryVendorString(src_hwctx->display);
  839. if (!vendor) {
  840. av_log(ctx, AV_LOG_ERROR, "Unable to get device info from VAAPI!\n");
  841. return AVERROR_EXTERNAL;
  842. }
  843. if (strstr(vendor, "Intel"))
  844. dev_select.vendor_id = 0x8086;
  845. if (strstr(vendor, "AMD"))
  846. dev_select.vendor_id = 0x1002;
  847. return vulkan_device_create_internal(ctx, &dev_select, opts, flags);
  848. }
  849. #endif
  850. case AV_HWDEVICE_TYPE_DRM: {
  851. AVDRMDeviceContext *src_hwctx = src_ctx->hwctx;
  852. drmDevice *drm_dev_info;
  853. int err = drmGetDevice(src_hwctx->fd, &drm_dev_info);
  854. if (err) {
  855. av_log(ctx, AV_LOG_ERROR, "Unable to get device info from DRM fd!\n");
  856. return AVERROR_EXTERNAL;
  857. }
  858. if (drm_dev_info->bustype == DRM_BUS_PCI)
  859. dev_select.pci_device = drm_dev_info->deviceinfo.pci->device_id;
  860. drmFreeDevice(&drm_dev_info);
  861. return vulkan_device_create_internal(ctx, &dev_select, opts, flags);
  862. }
  863. #endif
  864. #if CONFIG_CUDA
  865. case AV_HWDEVICE_TYPE_CUDA: {
  866. AVHWDeviceContext *cuda_cu = src_ctx;
  867. AVCUDADeviceContext *src_hwctx = src_ctx->hwctx;
  868. AVCUDADeviceContextInternal *cu_internal = src_hwctx->internal;
  869. CudaFunctions *cu = cu_internal->cuda_dl;
  870. int ret = CHECK_CU(cu->cuDeviceGetUuid((CUuuid *)&dev_select.uuid,
  871. cu_internal->cuda_device));
  872. if (ret < 0) {
  873. av_log(ctx, AV_LOG_ERROR, "Unable to get UUID from CUDA!\n");
  874. return AVERROR_EXTERNAL;
  875. }
  876. dev_select.has_uuid = 1;
  877. return vulkan_device_create_internal(ctx, &dev_select, opts, flags);
  878. }
  879. #endif
  880. default:
  881. return AVERROR(ENOSYS);
  882. }
  883. }
  884. static int vulkan_frames_get_constraints(AVHWDeviceContext *ctx,
  885. const void *hwconfig,
  886. AVHWFramesConstraints *constraints)
  887. {
  888. int count = 0;
  889. AVVulkanDeviceContext *hwctx = ctx->hwctx;
  890. VulkanDevicePriv *p = ctx->internal->priv;
  891. for (enum AVPixelFormat i = 0; i < AV_PIX_FMT_NB; i++)
  892. count += pixfmt_is_supported(hwctx, i, p->use_linear_images);
  893. #if CONFIG_CUDA
  894. if (p->dev_is_nvidia)
  895. count++;
  896. #endif
  897. constraints->valid_sw_formats = av_malloc_array(count + 1,
  898. sizeof(enum AVPixelFormat));
  899. if (!constraints->valid_sw_formats)
  900. return AVERROR(ENOMEM);
  901. count = 0;
  902. for (enum AVPixelFormat i = 0; i < AV_PIX_FMT_NB; i++)
  903. if (pixfmt_is_supported(hwctx, i, p->use_linear_images))
  904. constraints->valid_sw_formats[count++] = i;
  905. #if CONFIG_CUDA
  906. if (p->dev_is_nvidia)
  907. constraints->valid_sw_formats[count++] = AV_PIX_FMT_CUDA;
  908. #endif
  909. constraints->valid_sw_formats[count++] = AV_PIX_FMT_NONE;
  910. constraints->min_width = 0;
  911. constraints->min_height = 0;
  912. constraints->max_width = p->props.limits.maxImageDimension2D;
  913. constraints->max_height = p->props.limits.maxImageDimension2D;
  914. constraints->valid_hw_formats = av_malloc_array(2, sizeof(enum AVPixelFormat));
  915. if (!constraints->valid_hw_formats)
  916. return AVERROR(ENOMEM);
  917. constraints->valid_hw_formats[0] = AV_PIX_FMT_VULKAN;
  918. constraints->valid_hw_formats[1] = AV_PIX_FMT_NONE;
  919. return 0;
  920. }
  921. static int alloc_mem(AVHWDeviceContext *ctx, VkMemoryRequirements *req,
  922. VkMemoryPropertyFlagBits req_flags, void *alloc_extension,
  923. VkMemoryPropertyFlagBits *mem_flags, VkDeviceMemory *mem)
  924. {
  925. VkResult ret;
  926. int index = -1;
  927. VulkanDevicePriv *p = ctx->internal->priv;
  928. AVVulkanDeviceContext *dev_hwctx = ctx->hwctx;
  929. VkMemoryAllocateInfo alloc_info = {
  930. .sType = VK_STRUCTURE_TYPE_MEMORY_ALLOCATE_INFO,
  931. .pNext = alloc_extension,
  932. };
  933. /* Align if we need to */
  934. if (req_flags & VK_MEMORY_PROPERTY_HOST_VISIBLE_BIT)
  935. req->size = FFALIGN(req->size, p->props.limits.minMemoryMapAlignment);
  936. alloc_info.allocationSize = req->size;
  937. /* The vulkan spec requires memory types to be sorted in the "optimal"
  938. * order, so the first matching type we find will be the best/fastest one */
  939. for (int i = 0; i < p->mprops.memoryTypeCount; i++) {
  940. /* The memory type must be supported by the requirements (bitfield) */
  941. if (!(req->memoryTypeBits & (1 << i)))
  942. continue;
  943. /* The memory type flags must include our properties */
  944. if ((p->mprops.memoryTypes[i].propertyFlags & req_flags) != req_flags)
  945. continue;
  946. /* Found a suitable memory type */
  947. index = i;
  948. break;
  949. }
  950. if (index < 0) {
  951. av_log(ctx, AV_LOG_ERROR, "No memory type found for flags 0x%x\n",
  952. req_flags);
  953. return AVERROR(EINVAL);
  954. }
  955. alloc_info.memoryTypeIndex = index;
  956. ret = vkAllocateMemory(dev_hwctx->act_dev, &alloc_info,
  957. dev_hwctx->alloc, mem);
  958. if (ret != VK_SUCCESS) {
  959. av_log(ctx, AV_LOG_ERROR, "Failed to allocate memory: %s\n",
  960. vk_ret2str(ret));
  961. return AVERROR(ENOMEM);
  962. }
  963. *mem_flags |= p->mprops.memoryTypes[index].propertyFlags;
  964. return 0;
  965. }
  966. static void vulkan_free_internal(AVVkFrameInternal *internal)
  967. {
  968. if (!internal)
  969. return;
  970. #if CONFIG_CUDA
  971. if (internal->cuda_fc_ref) {
  972. AVHWFramesContext *cuda_fc = (AVHWFramesContext *)internal->cuda_fc_ref->data;
  973. int planes = av_pix_fmt_count_planes(cuda_fc->sw_format);
  974. AVHWDeviceContext *cuda_cu = cuda_fc->device_ctx;
  975. AVCUDADeviceContext *cuda_dev = cuda_cu->hwctx;
  976. AVCUDADeviceContextInternal *cu_internal = cuda_dev->internal;
  977. CudaFunctions *cu = cu_internal->cuda_dl;
  978. for (int i = 0; i < planes; i++) {
  979. if (internal->cu_sem[i])
  980. CHECK_CU(cu->cuDestroyExternalSemaphore(internal->cu_sem[i]));
  981. if (internal->cu_mma[i])
  982. CHECK_CU(cu->cuMipmappedArrayDestroy(internal->cu_mma[i]));
  983. if (internal->ext_mem[i])
  984. CHECK_CU(cu->cuDestroyExternalMemory(internal->ext_mem[i]));
  985. }
  986. av_buffer_unref(&internal->cuda_fc_ref);
  987. }
  988. #endif
  989. av_free(internal);
  990. }
  991. static void vulkan_frame_free(void *opaque, uint8_t *data)
  992. {
  993. AVVkFrame *f = (AVVkFrame *)data;
  994. AVHWFramesContext *hwfc = opaque;
  995. AVVulkanDeviceContext *hwctx = hwfc->device_ctx->hwctx;
  996. int planes = av_pix_fmt_count_planes(hwfc->sw_format);
  997. vulkan_free_internal(f->internal);
  998. for (int i = 0; i < planes; i++) {
  999. vkDestroyImage(hwctx->act_dev, f->img[i], hwctx->alloc);
  1000. vkFreeMemory(hwctx->act_dev, f->mem[i], hwctx->alloc);
  1001. vkDestroySemaphore(hwctx->act_dev, f->sem[i], hwctx->alloc);
  1002. }
  1003. av_free(f);
  1004. }
  1005. static int alloc_bind_mem(AVHWFramesContext *hwfc, AVVkFrame *f,
  1006. void *alloc_pnext, size_t alloc_pnext_stride)
  1007. {
  1008. int err;
  1009. VkResult ret;
  1010. AVHWDeviceContext *ctx = hwfc->device_ctx;
  1011. const int planes = av_pix_fmt_count_planes(hwfc->sw_format);
  1012. VkBindImageMemoryInfo bind_info[AV_NUM_DATA_POINTERS] = { { 0 } };
  1013. AVVulkanDeviceContext *hwctx = ctx->hwctx;
  1014. for (int i = 0; i < planes; i++) {
  1015. int use_ded_mem;
  1016. VkImageMemoryRequirementsInfo2 req_desc = {
  1017. .sType = VK_STRUCTURE_TYPE_IMAGE_MEMORY_REQUIREMENTS_INFO_2,
  1018. .image = f->img[i],
  1019. };
  1020. VkMemoryDedicatedAllocateInfo ded_alloc = {
  1021. .sType = VK_STRUCTURE_TYPE_MEMORY_DEDICATED_ALLOCATE_INFO,
  1022. .pNext = (void *)(((uint8_t *)alloc_pnext) + i*alloc_pnext_stride),
  1023. };
  1024. VkMemoryDedicatedRequirements ded_req = {
  1025. .sType = VK_STRUCTURE_TYPE_MEMORY_DEDICATED_REQUIREMENTS,
  1026. };
  1027. VkMemoryRequirements2 req = {
  1028. .sType = VK_STRUCTURE_TYPE_MEMORY_REQUIREMENTS_2,
  1029. .pNext = &ded_req,
  1030. };
  1031. vkGetImageMemoryRequirements2(hwctx->act_dev, &req_desc, &req);
  1032. /* In case the implementation prefers/requires dedicated allocation */
  1033. use_ded_mem = ded_req.prefersDedicatedAllocation |
  1034. ded_req.requiresDedicatedAllocation;
  1035. if (use_ded_mem)
  1036. ded_alloc.image = f->img[i];
  1037. /* Allocate memory */
  1038. if ((err = alloc_mem(ctx, &req.memoryRequirements,
  1039. f->tiling == VK_IMAGE_TILING_LINEAR ?
  1040. VK_MEMORY_PROPERTY_HOST_VISIBLE_BIT :
  1041. VK_MEMORY_PROPERTY_DEVICE_LOCAL_BIT,
  1042. use_ded_mem ? &ded_alloc : (void *)ded_alloc.pNext,
  1043. &f->flags, &f->mem[i])))
  1044. return err;
  1045. f->size[i] = req.memoryRequirements.size;
  1046. bind_info[i].sType = VK_STRUCTURE_TYPE_BIND_IMAGE_MEMORY_INFO;
  1047. bind_info[i].image = f->img[i];
  1048. bind_info[i].memory = f->mem[i];
  1049. }
  1050. /* Bind the allocated memory to the images */
  1051. ret = vkBindImageMemory2(hwctx->act_dev, planes, bind_info);
  1052. if (ret != VK_SUCCESS) {
  1053. av_log(ctx, AV_LOG_ERROR, "Failed to bind memory: %s\n",
  1054. vk_ret2str(ret));
  1055. return AVERROR_EXTERNAL;
  1056. }
  1057. return 0;
  1058. }
  1059. enum PrepMode {
  1060. PREP_MODE_WRITE,
  1061. PREP_MODE_RO_SHADER,
  1062. PREP_MODE_EXTERNAL_EXPORT,
  1063. };
  1064. static int prepare_frame(AVHWFramesContext *hwfc, VulkanExecCtx *ectx,
  1065. AVVkFrame *frame, enum PrepMode pmode)
  1066. {
  1067. VkResult ret;
  1068. uint32_t dst_qf;
  1069. VkImageLayout new_layout;
  1070. VkAccessFlags new_access;
  1071. AVHWDeviceContext *ctx = hwfc->device_ctx;
  1072. AVVulkanDeviceContext *hwctx = ctx->hwctx;
  1073. const int planes = av_pix_fmt_count_planes(hwfc->sw_format);
  1074. VkImageMemoryBarrier img_bar[AV_NUM_DATA_POINTERS] = { 0 };
  1075. VkCommandBufferBeginInfo cmd_start = {
  1076. .sType = VK_STRUCTURE_TYPE_COMMAND_BUFFER_BEGIN_INFO,
  1077. .flags = VK_COMMAND_BUFFER_USAGE_ONE_TIME_SUBMIT_BIT,
  1078. };
  1079. VkSubmitInfo s_info = {
  1080. .sType = VK_STRUCTURE_TYPE_SUBMIT_INFO,
  1081. .commandBufferCount = 1,
  1082. .pCommandBuffers = &ectx->buf,
  1083. .pSignalSemaphores = frame->sem,
  1084. .signalSemaphoreCount = planes,
  1085. };
  1086. VkPipelineStageFlagBits wait_st[AV_NUM_DATA_POINTERS];
  1087. for (int i = 0; i < planes; i++)
  1088. wait_st[i] = VK_PIPELINE_STAGE_TOP_OF_PIPE_BIT;
  1089. switch (pmode) {
  1090. case PREP_MODE_WRITE:
  1091. new_layout = VK_IMAGE_LAYOUT_TRANSFER_DST_OPTIMAL;
  1092. new_access = VK_ACCESS_TRANSFER_WRITE_BIT;
  1093. dst_qf = VK_QUEUE_FAMILY_IGNORED;
  1094. break;
  1095. case PREP_MODE_RO_SHADER:
  1096. new_layout = VK_IMAGE_LAYOUT_TRANSFER_SRC_OPTIMAL;
  1097. new_access = VK_ACCESS_TRANSFER_READ_BIT;
  1098. dst_qf = VK_QUEUE_FAMILY_IGNORED;
  1099. break;
  1100. case PREP_MODE_EXTERNAL_EXPORT:
  1101. new_layout = VK_IMAGE_LAYOUT_GENERAL;
  1102. new_access = VK_ACCESS_MEMORY_READ_BIT | VK_ACCESS_MEMORY_WRITE_BIT;
  1103. dst_qf = VK_QUEUE_FAMILY_EXTERNAL_KHR;
  1104. s_info.pWaitSemaphores = frame->sem;
  1105. s_info.pWaitDstStageMask = wait_st;
  1106. s_info.waitSemaphoreCount = planes;
  1107. break;
  1108. }
  1109. ret = vkBeginCommandBuffer(ectx->buf, &cmd_start);
  1110. if (ret != VK_SUCCESS)
  1111. return AVERROR_EXTERNAL;
  1112. /* Change the image layout to something more optimal for writes.
  1113. * This also signals the newly created semaphore, making it usable
  1114. * for synchronization */
  1115. for (int i = 0; i < planes; i++) {
  1116. img_bar[i].sType = VK_STRUCTURE_TYPE_IMAGE_MEMORY_BARRIER;
  1117. img_bar[i].srcAccessMask = 0x0;
  1118. img_bar[i].dstAccessMask = new_access;
  1119. img_bar[i].oldLayout = frame->layout[i];
  1120. img_bar[i].newLayout = new_layout;
  1121. img_bar[i].srcQueueFamilyIndex = VK_QUEUE_FAMILY_IGNORED;
  1122. img_bar[i].dstQueueFamilyIndex = dst_qf;
  1123. img_bar[i].image = frame->img[i];
  1124. img_bar[i].subresourceRange.levelCount = 1;
  1125. img_bar[i].subresourceRange.layerCount = 1;
  1126. img_bar[i].subresourceRange.aspectMask = VK_IMAGE_ASPECT_COLOR_BIT;
  1127. frame->layout[i] = img_bar[i].newLayout;
  1128. frame->access[i] = img_bar[i].dstAccessMask;
  1129. }
  1130. vkCmdPipelineBarrier(ectx->buf, VK_PIPELINE_STAGE_TOP_OF_PIPE_BIT,
  1131. VK_PIPELINE_STAGE_TRANSFER_BIT, 0,
  1132. 0, NULL, 0, NULL, planes, img_bar);
  1133. ret = vkEndCommandBuffer(ectx->buf);
  1134. if (ret != VK_SUCCESS)
  1135. return AVERROR_EXTERNAL;
  1136. ret = vkQueueSubmit(ectx->queue, 1, &s_info, ectx->fence);
  1137. if (ret != VK_SUCCESS) {
  1138. return AVERROR_EXTERNAL;
  1139. } else {
  1140. vkWaitForFences(hwctx->act_dev, 1, &ectx->fence, VK_TRUE, UINT64_MAX);
  1141. vkResetFences(hwctx->act_dev, 1, &ectx->fence);
  1142. }
  1143. return 0;
  1144. }
  1145. static int create_frame(AVHWFramesContext *hwfc, AVVkFrame **frame,
  1146. VkImageTiling tiling, VkImageUsageFlagBits usage,
  1147. void *create_pnext)
  1148. {
  1149. int err;
  1150. VkResult ret;
  1151. AVHWDeviceContext *ctx = hwfc->device_ctx;
  1152. VulkanDevicePriv *p = ctx->internal->priv;
  1153. AVVulkanDeviceContext *hwctx = ctx->hwctx;
  1154. enum AVPixelFormat format = hwfc->sw_format;
  1155. const VkFormat *img_fmts = av_vkfmt_from_pixfmt(format);
  1156. const int planes = av_pix_fmt_count_planes(format);
  1157. VkExportSemaphoreCreateInfo ext_sem_info = {
  1158. .sType = VK_STRUCTURE_TYPE_EXPORT_SEMAPHORE_CREATE_INFO,
  1159. .handleTypes = VK_EXTERNAL_SEMAPHORE_HANDLE_TYPE_OPAQUE_FD_BIT,
  1160. };
  1161. VkSemaphoreCreateInfo sem_spawn = {
  1162. .sType = VK_STRUCTURE_TYPE_SEMAPHORE_CREATE_INFO,
  1163. .pNext = p->extensions & EXT_EXTERNAL_FD_SEM ? &ext_sem_info : NULL,
  1164. };
  1165. AVVkFrame *f = av_vk_frame_alloc();
  1166. if (!f) {
  1167. av_log(ctx, AV_LOG_ERROR, "Unable to allocate memory for AVVkFrame!\n");
  1168. return AVERROR(ENOMEM);
  1169. }
  1170. /* Create the images */
  1171. for (int i = 0; i < planes; i++) {
  1172. const AVPixFmtDescriptor *desc = av_pix_fmt_desc_get(format);
  1173. int w = hwfc->width;
  1174. int h = hwfc->height;
  1175. const int p_w = i > 0 ? AV_CEIL_RSHIFT(w, desc->log2_chroma_w) : w;
  1176. const int p_h = i > 0 ? AV_CEIL_RSHIFT(h, desc->log2_chroma_h) : h;
  1177. VkImageCreateInfo image_create_info = {
  1178. .sType = VK_STRUCTURE_TYPE_IMAGE_CREATE_INFO,
  1179. .pNext = create_pnext,
  1180. .imageType = VK_IMAGE_TYPE_2D,
  1181. .format = img_fmts[i],
  1182. .extent.width = p_w,
  1183. .extent.height = p_h,
  1184. .extent.depth = 1,
  1185. .mipLevels = 1,
  1186. .arrayLayers = 1,
  1187. .flags = VK_IMAGE_CREATE_ALIAS_BIT,
  1188. .tiling = tiling,
  1189. .initialLayout = VK_IMAGE_LAYOUT_UNDEFINED,
  1190. .usage = usage,
  1191. .samples = VK_SAMPLE_COUNT_1_BIT,
  1192. .pQueueFamilyIndices = p->qfs,
  1193. .queueFamilyIndexCount = p->num_qfs,
  1194. .sharingMode = p->num_qfs > 1 ? VK_SHARING_MODE_CONCURRENT :
  1195. VK_SHARING_MODE_EXCLUSIVE,
  1196. };
  1197. ret = vkCreateImage(hwctx->act_dev, &image_create_info,
  1198. hwctx->alloc, &f->img[i]);
  1199. if (ret != VK_SUCCESS) {
  1200. av_log(ctx, AV_LOG_ERROR, "Image creation failure: %s\n",
  1201. vk_ret2str(ret));
  1202. err = AVERROR(EINVAL);
  1203. goto fail;
  1204. }
  1205. /* Create semaphore */
  1206. ret = vkCreateSemaphore(hwctx->act_dev, &sem_spawn,
  1207. hwctx->alloc, &f->sem[i]);
  1208. if (ret != VK_SUCCESS) {
  1209. av_log(hwctx, AV_LOG_ERROR, "Failed to create semaphore: %s\n",
  1210. vk_ret2str(ret));
  1211. return AVERROR_EXTERNAL;
  1212. }
  1213. f->layout[i] = image_create_info.initialLayout;
  1214. f->access[i] = 0x0;
  1215. }
  1216. f->flags = 0x0;
  1217. f->tiling = tiling;
  1218. *frame = f;
  1219. return 0;
  1220. fail:
  1221. vulkan_frame_free(hwfc, (uint8_t *)f);
  1222. return err;
  1223. }
  1224. /* Checks if an export flag is enabled, and if it is ORs it with *iexp */
  1225. static void try_export_flags(AVHWFramesContext *hwfc,
  1226. VkExternalMemoryHandleTypeFlags *comp_handle_types,
  1227. VkExternalMemoryHandleTypeFlagBits *iexp,
  1228. VkExternalMemoryHandleTypeFlagBits exp)
  1229. {
  1230. VkResult ret;
  1231. AVVulkanFramesContext *hwctx = hwfc->hwctx;
  1232. AVVulkanDeviceContext *dev_hwctx = hwfc->device_ctx->hwctx;
  1233. VkExternalImageFormatProperties eprops = {
  1234. .sType = VK_STRUCTURE_TYPE_EXTERNAL_IMAGE_FORMAT_PROPERTIES_KHR,
  1235. };
  1236. VkImageFormatProperties2 props = {
  1237. .sType = VK_STRUCTURE_TYPE_IMAGE_FORMAT_PROPERTIES_2,
  1238. .pNext = &eprops,
  1239. };
  1240. VkPhysicalDeviceExternalImageFormatInfo enext = {
  1241. .sType = VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_EXTERNAL_IMAGE_FORMAT_INFO,
  1242. .handleType = exp,
  1243. };
  1244. VkPhysicalDeviceImageFormatInfo2 pinfo = {
  1245. .sType = VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_IMAGE_FORMAT_INFO_2,
  1246. .pNext = !exp ? NULL : &enext,
  1247. .format = av_vkfmt_from_pixfmt(hwfc->sw_format)[0],
  1248. .type = VK_IMAGE_TYPE_2D,
  1249. .tiling = hwctx->tiling,
  1250. .usage = hwctx->usage,
  1251. .flags = VK_IMAGE_CREATE_ALIAS_BIT,
  1252. };
  1253. ret = vkGetPhysicalDeviceImageFormatProperties2(dev_hwctx->phys_dev,
  1254. &pinfo, &props);
  1255. if (ret == VK_SUCCESS) {
  1256. *iexp |= exp;
  1257. *comp_handle_types |= eprops.externalMemoryProperties.compatibleHandleTypes;
  1258. }
  1259. }
  1260. static AVBufferRef *vulkan_pool_alloc(void *opaque, int size)
  1261. {
  1262. int err;
  1263. AVVkFrame *f;
  1264. AVBufferRef *avbuf = NULL;
  1265. AVHWFramesContext *hwfc = opaque;
  1266. AVVulkanFramesContext *hwctx = hwfc->hwctx;
  1267. VulkanDevicePriv *p = hwfc->device_ctx->internal->priv;
  1268. VkExportMemoryAllocateInfo eminfo[AV_NUM_DATA_POINTERS];
  1269. VkExternalMemoryHandleTypeFlags e = 0x0;
  1270. VkExternalMemoryImageCreateInfo eiinfo = {
  1271. .sType = VK_STRUCTURE_TYPE_EXTERNAL_MEMORY_IMAGE_CREATE_INFO,
  1272. .pNext = hwctx->create_pnext,
  1273. };
  1274. if (p->extensions & EXT_EXTERNAL_FD_MEMORY)
  1275. try_export_flags(hwfc, &eiinfo.handleTypes, &e,
  1276. VK_EXTERNAL_MEMORY_HANDLE_TYPE_OPAQUE_FD_BIT);
  1277. if (p->extensions & EXT_EXTERNAL_DMABUF_MEMORY)
  1278. try_export_flags(hwfc, &eiinfo.handleTypes, &e,
  1279. VK_EXTERNAL_MEMORY_HANDLE_TYPE_DMA_BUF_BIT_EXT);
  1280. for (int i = 0; i < av_pix_fmt_count_planes(hwfc->sw_format); i++) {
  1281. eminfo[i].sType = VK_STRUCTURE_TYPE_EXPORT_MEMORY_ALLOCATE_INFO;
  1282. eminfo[i].pNext = hwctx->alloc_pnext[i];
  1283. eminfo[i].handleTypes = e;
  1284. }
  1285. err = create_frame(hwfc, &f, hwctx->tiling, hwctx->usage,
  1286. eiinfo.handleTypes ? &eiinfo : NULL);
  1287. if (err)
  1288. return NULL;
  1289. err = alloc_bind_mem(hwfc, f, eminfo, sizeof(*eminfo));
  1290. if (err)
  1291. goto fail;
  1292. err = prepare_frame(hwfc, &p->cmd, f, PREP_MODE_WRITE);
  1293. if (err)
  1294. goto fail;
  1295. avbuf = av_buffer_create((uint8_t *)f, sizeof(AVVkFrame),
  1296. vulkan_frame_free, hwfc, 0);
  1297. if (!avbuf)
  1298. goto fail;
  1299. return avbuf;
  1300. fail:
  1301. vulkan_frame_free(hwfc, (uint8_t *)f);
  1302. return NULL;
  1303. }
  1304. static void vulkan_frames_uninit(AVHWFramesContext *hwfc)
  1305. {
  1306. VulkanFramesPriv *fp = hwfc->internal->priv;
  1307. free_exec_ctx(hwfc->device_ctx, &fp->cmd);
  1308. }
  1309. static int vulkan_frames_init(AVHWFramesContext *hwfc)
  1310. {
  1311. int err;
  1312. AVVkFrame *f;
  1313. AVVulkanFramesContext *hwctx = hwfc->hwctx;
  1314. VulkanFramesPriv *fp = hwfc->internal->priv;
  1315. AVVulkanDeviceContext *dev_hwctx = hwfc->device_ctx->hwctx;
  1316. VulkanDevicePriv *p = hwfc->device_ctx->internal->priv;
  1317. if (hwfc->pool)
  1318. return 0;
  1319. /* Default pool flags */
  1320. hwctx->tiling = hwctx->tiling ? hwctx->tiling : p->use_linear_images ?
  1321. VK_IMAGE_TILING_LINEAR : VK_IMAGE_TILING_OPTIMAL;
  1322. hwctx->usage |= DEFAULT_USAGE_FLAGS;
  1323. err = create_exec_ctx(hwfc->device_ctx, &fp->cmd,
  1324. dev_hwctx->queue_family_tx_index);
  1325. if (err)
  1326. return err;
  1327. /* Test to see if allocation will fail */
  1328. err = create_frame(hwfc, &f, hwctx->tiling, hwctx->usage,
  1329. hwctx->create_pnext);
  1330. if (err) {
  1331. free_exec_ctx(hwfc->device_ctx, &p->cmd);
  1332. return err;
  1333. }
  1334. vulkan_frame_free(hwfc, (uint8_t *)f);
  1335. hwfc->internal->pool_internal = av_buffer_pool_init2(sizeof(AVVkFrame),
  1336. hwfc, vulkan_pool_alloc,
  1337. NULL);
  1338. if (!hwfc->internal->pool_internal) {
  1339. free_exec_ctx(hwfc->device_ctx, &p->cmd);
  1340. return AVERROR(ENOMEM);
  1341. }
  1342. return 0;
  1343. }
  1344. static int vulkan_get_buffer(AVHWFramesContext *hwfc, AVFrame *frame)
  1345. {
  1346. frame->buf[0] = av_buffer_pool_get(hwfc->pool);
  1347. if (!frame->buf[0])
  1348. return AVERROR(ENOMEM);
  1349. frame->data[0] = frame->buf[0]->data;
  1350. frame->format = AV_PIX_FMT_VULKAN;
  1351. frame->width = hwfc->width;
  1352. frame->height = hwfc->height;
  1353. return 0;
  1354. }
  1355. static int vulkan_transfer_get_formats(AVHWFramesContext *hwfc,
  1356. enum AVHWFrameTransferDirection dir,
  1357. enum AVPixelFormat **formats)
  1358. {
  1359. enum AVPixelFormat *fmts = av_malloc_array(2, sizeof(*fmts));
  1360. if (!fmts)
  1361. return AVERROR(ENOMEM);
  1362. fmts[0] = hwfc->sw_format;
  1363. fmts[1] = AV_PIX_FMT_NONE;
  1364. *formats = fmts;
  1365. return 0;
  1366. }
  1367. typedef struct VulkanMapping {
  1368. AVVkFrame *frame;
  1369. int flags;
  1370. } VulkanMapping;
  1371. static void vulkan_unmap_frame(AVHWFramesContext *hwfc, HWMapDescriptor *hwmap)
  1372. {
  1373. VulkanMapping *map = hwmap->priv;
  1374. AVVulkanDeviceContext *hwctx = hwfc->device_ctx->hwctx;
  1375. const int planes = av_pix_fmt_count_planes(hwfc->sw_format);
  1376. /* Check if buffer needs flushing */
  1377. if ((map->flags & AV_HWFRAME_MAP_WRITE) &&
  1378. !(map->frame->flags & VK_MEMORY_PROPERTY_HOST_COHERENT_BIT)) {
  1379. VkResult ret;
  1380. VkMappedMemoryRange flush_ranges[AV_NUM_DATA_POINTERS] = { { 0 } };
  1381. for (int i = 0; i < planes; i++) {
  1382. flush_ranges[i].sType = VK_STRUCTURE_TYPE_MAPPED_MEMORY_RANGE;
  1383. flush_ranges[i].memory = map->frame->mem[i];
  1384. flush_ranges[i].size = VK_WHOLE_SIZE;
  1385. }
  1386. ret = vkFlushMappedMemoryRanges(hwctx->act_dev, planes,
  1387. flush_ranges);
  1388. if (ret != VK_SUCCESS) {
  1389. av_log(hwfc, AV_LOG_ERROR, "Failed to flush memory: %s\n",
  1390. vk_ret2str(ret));
  1391. }
  1392. }
  1393. for (int i = 0; i < planes; i++)
  1394. vkUnmapMemory(hwctx->act_dev, map->frame->mem[i]);
  1395. av_free(map);
  1396. }
  1397. static int vulkan_map_frame_to_mem(AVHWFramesContext *hwfc, AVFrame *dst,
  1398. const AVFrame *src, int flags)
  1399. {
  1400. VkResult ret;
  1401. int err, mapped_mem_count = 0;
  1402. AVVkFrame *f = (AVVkFrame *)src->data[0];
  1403. AVVulkanDeviceContext *hwctx = hwfc->device_ctx->hwctx;
  1404. const int planes = av_pix_fmt_count_planes(hwfc->sw_format);
  1405. VulkanMapping *map = av_mallocz(sizeof(VulkanMapping));
  1406. if (!map)
  1407. return AVERROR(EINVAL);
  1408. if (src->format != AV_PIX_FMT_VULKAN) {
  1409. av_log(hwfc, AV_LOG_ERROR, "Cannot map from pixel format %s!\n",
  1410. av_get_pix_fmt_name(src->format));
  1411. err = AVERROR(EINVAL);
  1412. goto fail;
  1413. }
  1414. if (!(f->flags & VK_MEMORY_PROPERTY_HOST_VISIBLE_BIT) ||
  1415. !(f->tiling == VK_IMAGE_TILING_LINEAR)) {
  1416. av_log(hwfc, AV_LOG_ERROR, "Unable to map frame, not host visible "
  1417. "and linear!\n");
  1418. err = AVERROR(EINVAL);
  1419. goto fail;
  1420. }
  1421. dst->width = src->width;
  1422. dst->height = src->height;
  1423. for (int i = 0; i < planes; i++) {
  1424. ret = vkMapMemory(hwctx->act_dev, f->mem[i], 0,
  1425. VK_WHOLE_SIZE, 0, (void **)&dst->data[i]);
  1426. if (ret != VK_SUCCESS) {
  1427. av_log(hwfc, AV_LOG_ERROR, "Failed to map image memory: %s\n",
  1428. vk_ret2str(ret));
  1429. err = AVERROR_EXTERNAL;
  1430. goto fail;
  1431. }
  1432. mapped_mem_count++;
  1433. }
  1434. /* Check if the memory contents matter */
  1435. if (((flags & AV_HWFRAME_MAP_READ) || !(flags & AV_HWFRAME_MAP_OVERWRITE)) &&
  1436. !(f->flags & VK_MEMORY_PROPERTY_HOST_COHERENT_BIT)) {
  1437. VkMappedMemoryRange map_mem_ranges[AV_NUM_DATA_POINTERS] = { { 0 } };
  1438. for (int i = 0; i < planes; i++) {
  1439. map_mem_ranges[i].sType = VK_STRUCTURE_TYPE_MAPPED_MEMORY_RANGE;
  1440. map_mem_ranges[i].size = VK_WHOLE_SIZE;
  1441. map_mem_ranges[i].memory = f->mem[i];
  1442. }
  1443. ret = vkInvalidateMappedMemoryRanges(hwctx->act_dev, planes,
  1444. map_mem_ranges);
  1445. if (ret != VK_SUCCESS) {
  1446. av_log(hwfc, AV_LOG_ERROR, "Failed to invalidate memory: %s\n",
  1447. vk_ret2str(ret));
  1448. err = AVERROR_EXTERNAL;
  1449. goto fail;
  1450. }
  1451. }
  1452. for (int i = 0; i < planes; i++) {
  1453. VkImageSubresource sub = {
  1454. .aspectMask = VK_IMAGE_ASPECT_COLOR_BIT,
  1455. };
  1456. VkSubresourceLayout layout;
  1457. vkGetImageSubresourceLayout(hwctx->act_dev, f->img[i], &sub, &layout);
  1458. dst->linesize[i] = layout.rowPitch;
  1459. }
  1460. map->frame = f;
  1461. map->flags = flags;
  1462. err = ff_hwframe_map_create(src->hw_frames_ctx, dst, src,
  1463. &vulkan_unmap_frame, map);
  1464. if (err < 0)
  1465. goto fail;
  1466. return 0;
  1467. fail:
  1468. for (int i = 0; i < mapped_mem_count; i++)
  1469. vkUnmapMemory(hwctx->act_dev, f->mem[i]);
  1470. av_free(map);
  1471. return err;
  1472. }
  1473. #if CONFIG_LIBDRM
  1474. static void vulkan_unmap_from(AVHWFramesContext *hwfc, HWMapDescriptor *hwmap)
  1475. {
  1476. VulkanMapping *map = hwmap->priv;
  1477. AVVulkanDeviceContext *hwctx = hwfc->device_ctx->hwctx;
  1478. const int planes = av_pix_fmt_count_planes(hwfc->sw_format);
  1479. for (int i = 0; i < planes; i++) {
  1480. vkDestroyImage(hwctx->act_dev, map->frame->img[i], hwctx->alloc);
  1481. vkFreeMemory(hwctx->act_dev, map->frame->mem[i], hwctx->alloc);
  1482. vkDestroySemaphore(hwctx->act_dev, map->frame->sem[i], hwctx->alloc);
  1483. }
  1484. av_freep(&map->frame);
  1485. }
  1486. static const struct {
  1487. uint32_t drm_fourcc;
  1488. VkFormat vk_format;
  1489. } vulkan_drm_format_map[] = {
  1490. { DRM_FORMAT_R8, VK_FORMAT_R8_UNORM },
  1491. { DRM_FORMAT_R16, VK_FORMAT_R16_UNORM },
  1492. { DRM_FORMAT_GR88, VK_FORMAT_R8G8_UNORM },
  1493. { DRM_FORMAT_RG88, VK_FORMAT_R8G8_UNORM },
  1494. { DRM_FORMAT_GR1616, VK_FORMAT_R16G16_UNORM },
  1495. { DRM_FORMAT_RG1616, VK_FORMAT_R16G16_UNORM },
  1496. { DRM_FORMAT_ARGB8888, VK_FORMAT_B8G8R8A8_UNORM },
  1497. { DRM_FORMAT_XRGB8888, VK_FORMAT_B8G8R8A8_UNORM },
  1498. { DRM_FORMAT_ABGR8888, VK_FORMAT_R8G8B8A8_UNORM },
  1499. { DRM_FORMAT_XBGR8888, VK_FORMAT_R8G8B8A8_UNORM },
  1500. };
  1501. static inline VkFormat drm_to_vulkan_fmt(uint32_t drm_fourcc)
  1502. {
  1503. for (int i = 0; i < FF_ARRAY_ELEMS(vulkan_drm_format_map); i++)
  1504. if (vulkan_drm_format_map[i].drm_fourcc == drm_fourcc)
  1505. return vulkan_drm_format_map[i].vk_format;
  1506. return VK_FORMAT_UNDEFINED;
  1507. }
  1508. static int vulkan_map_from_drm_frame_desc(AVHWFramesContext *hwfc, AVVkFrame **frame,
  1509. AVDRMFrameDescriptor *desc)
  1510. {
  1511. int err = 0;
  1512. VkResult ret;
  1513. AVVkFrame *f;
  1514. int bind_counts = 0;
  1515. AVHWDeviceContext *ctx = hwfc->device_ctx;
  1516. AVVulkanDeviceContext *hwctx = ctx->hwctx;
  1517. VulkanDevicePriv *p = ctx->internal->priv;
  1518. const AVPixFmtDescriptor *fmt_desc = av_pix_fmt_desc_get(hwfc->sw_format);
  1519. const int has_modifiers = p->extensions & EXT_DRM_MODIFIER_FLAGS;
  1520. VkSubresourceLayout plane_data[AV_NUM_DATA_POINTERS] = { 0 };
  1521. VkBindImageMemoryInfo bind_info[AV_NUM_DATA_POINTERS] = { 0 };
  1522. VkBindImagePlaneMemoryInfo plane_info[AV_NUM_DATA_POINTERS] = { 0 };
  1523. VkExternalMemoryHandleTypeFlagBits htype = VK_EXTERNAL_MEMORY_HANDLE_TYPE_DMA_BUF_BIT_EXT;
  1524. VK_LOAD_PFN(hwctx->inst, vkGetMemoryFdPropertiesKHR);
  1525. for (int i = 0; i < desc->nb_layers; i++) {
  1526. if (drm_to_vulkan_fmt(desc->layers[i].format) == VK_FORMAT_UNDEFINED) {
  1527. av_log(ctx, AV_LOG_ERROR, "Unsupported DMABUF layer format %#08x!\n",
  1528. desc->layers[i].format);
  1529. return AVERROR(EINVAL);
  1530. }
  1531. }
  1532. if (!(f = av_vk_frame_alloc())) {
  1533. av_log(ctx, AV_LOG_ERROR, "Unable to allocate memory for AVVkFrame!\n");
  1534. err = AVERROR(ENOMEM);
  1535. goto fail;
  1536. }
  1537. for (int i = 0; i < desc->nb_objects; i++) {
  1538. VkMemoryFdPropertiesKHR fdmp = {
  1539. .sType = VK_STRUCTURE_TYPE_MEMORY_FD_PROPERTIES_KHR,
  1540. };
  1541. VkMemoryRequirements req = {
  1542. .size = desc->objects[i].size,
  1543. };
  1544. VkImportMemoryFdInfoKHR idesc = {
  1545. .sType = VK_STRUCTURE_TYPE_IMPORT_MEMORY_FD_INFO_KHR,
  1546. .handleType = htype,
  1547. .fd = dup(desc->objects[i].fd),
  1548. };
  1549. ret = pfn_vkGetMemoryFdPropertiesKHR(hwctx->act_dev, htype,
  1550. idesc.fd, &fdmp);
  1551. if (ret != VK_SUCCESS) {
  1552. av_log(hwfc, AV_LOG_ERROR, "Failed to get FD properties: %s\n",
  1553. vk_ret2str(ret));
  1554. err = AVERROR_EXTERNAL;
  1555. close(idesc.fd);
  1556. goto fail;
  1557. }
  1558. req.memoryTypeBits = fdmp.memoryTypeBits;
  1559. err = alloc_mem(ctx, &req, VK_MEMORY_PROPERTY_DEVICE_LOCAL_BIT,
  1560. &idesc, &f->flags, &f->mem[i]);
  1561. if (err) {
  1562. close(idesc.fd);
  1563. return err;
  1564. }
  1565. f->size[i] = desc->objects[i].size;
  1566. }
  1567. f->tiling = has_modifiers ? VK_IMAGE_TILING_DRM_FORMAT_MODIFIER_EXT :
  1568. desc->objects[0].format_modifier == DRM_FORMAT_MOD_LINEAR ?
  1569. VK_IMAGE_TILING_LINEAR : VK_IMAGE_TILING_OPTIMAL;
  1570. for (int i = 0; i < desc->nb_layers; i++) {
  1571. const int planes = desc->layers[i].nb_planes;
  1572. const int signal_p = has_modifiers && (planes > 1);
  1573. VkImageDrmFormatModifierExplicitCreateInfoEXT drm_info = {
  1574. .sType = VK_STRUCTURE_TYPE_IMAGE_DRM_FORMAT_MODIFIER_EXPLICIT_CREATE_INFO_EXT,
  1575. .drmFormatModifier = desc->objects[0].format_modifier,
  1576. .drmFormatModifierPlaneCount = planes,
  1577. .pPlaneLayouts = (const VkSubresourceLayout *)&plane_data,
  1578. };
  1579. VkExternalMemoryImageCreateInfo einfo = {
  1580. .sType = VK_STRUCTURE_TYPE_EXTERNAL_MEMORY_IMAGE_CREATE_INFO,
  1581. .pNext = has_modifiers ? &drm_info : NULL,
  1582. .handleTypes = htype,
  1583. };
  1584. VkSemaphoreCreateInfo sem_spawn = {
  1585. .sType = VK_STRUCTURE_TYPE_SEMAPHORE_CREATE_INFO,
  1586. };
  1587. const int p_w = i > 0 ? AV_CEIL_RSHIFT(hwfc->width, fmt_desc->log2_chroma_w) : hwfc->width;
  1588. const int p_h = i > 0 ? AV_CEIL_RSHIFT(hwfc->height, fmt_desc->log2_chroma_h) : hwfc->height;
  1589. VkImageCreateInfo image_create_info = {
  1590. .sType = VK_STRUCTURE_TYPE_IMAGE_CREATE_INFO,
  1591. .pNext = &einfo,
  1592. .imageType = VK_IMAGE_TYPE_2D,
  1593. .format = drm_to_vulkan_fmt(desc->layers[i].format),
  1594. .extent.width = p_w,
  1595. .extent.height = p_h,
  1596. .extent.depth = 1,
  1597. .mipLevels = 1,
  1598. .arrayLayers = 1,
  1599. .flags = VK_IMAGE_CREATE_ALIAS_BIT,
  1600. .tiling = f->tiling,
  1601. .initialLayout = VK_IMAGE_LAYOUT_UNDEFINED, /* specs say so */
  1602. .usage = DEFAULT_USAGE_FLAGS,
  1603. .samples = VK_SAMPLE_COUNT_1_BIT,
  1604. .pQueueFamilyIndices = p->qfs,
  1605. .queueFamilyIndexCount = p->num_qfs,
  1606. .sharingMode = p->num_qfs > 1 ? VK_SHARING_MODE_CONCURRENT :
  1607. VK_SHARING_MODE_EXCLUSIVE,
  1608. };
  1609. for (int j = 0; j < planes; j++) {
  1610. plane_data[j].offset = desc->layers[i].planes[j].offset;
  1611. plane_data[j].rowPitch = desc->layers[i].planes[j].pitch;
  1612. plane_data[j].size = 0; /* The specs say so for all 3 */
  1613. plane_data[j].arrayPitch = 0;
  1614. plane_data[j].depthPitch = 0;
  1615. }
  1616. /* Create image */
  1617. ret = vkCreateImage(hwctx->act_dev, &image_create_info,
  1618. hwctx->alloc, &f->img[i]);
  1619. if (ret != VK_SUCCESS) {
  1620. av_log(ctx, AV_LOG_ERROR, "Image creation failure: %s\n",
  1621. vk_ret2str(ret));
  1622. err = AVERROR(EINVAL);
  1623. goto fail;
  1624. }
  1625. ret = vkCreateSemaphore(hwctx->act_dev, &sem_spawn,
  1626. hwctx->alloc, &f->sem[i]);
  1627. if (ret != VK_SUCCESS) {
  1628. av_log(hwctx, AV_LOG_ERROR, "Failed to create semaphore: %s\n",
  1629. vk_ret2str(ret));
  1630. return AVERROR_EXTERNAL;
  1631. }
  1632. /* We'd import a semaphore onto the one we created using
  1633. * vkImportSemaphoreFdKHR but unfortunately neither DRM nor VAAPI
  1634. * offer us anything we could import and sync with, so instead
  1635. * just signal the semaphore we created. */
  1636. f->layout[i] = image_create_info.initialLayout;
  1637. f->access[i] = 0x0;
  1638. for (int j = 0; j < planes; j++) {
  1639. VkImageAspectFlagBits aspect = j == 0 ? VK_IMAGE_ASPECT_MEMORY_PLANE_0_BIT_EXT :
  1640. j == 1 ? VK_IMAGE_ASPECT_MEMORY_PLANE_1_BIT_EXT :
  1641. VK_IMAGE_ASPECT_MEMORY_PLANE_2_BIT_EXT;
  1642. plane_info[bind_counts].sType = VK_STRUCTURE_TYPE_BIND_IMAGE_PLANE_MEMORY_INFO;
  1643. plane_info[bind_counts].planeAspect = aspect;
  1644. bind_info[bind_counts].sType = VK_STRUCTURE_TYPE_BIND_IMAGE_MEMORY_INFO;
  1645. bind_info[bind_counts].pNext = signal_p ? &plane_info[bind_counts] : NULL;
  1646. bind_info[bind_counts].image = f->img[i];
  1647. bind_info[bind_counts].memory = f->mem[desc->layers[i].planes[j].object_index];
  1648. bind_info[bind_counts].memoryOffset = desc->layers[i].planes[j].offset;
  1649. bind_counts++;
  1650. }
  1651. }
  1652. /* Bind the allocated memory to the images */
  1653. ret = vkBindImageMemory2(hwctx->act_dev, bind_counts, bind_info);
  1654. if (ret != VK_SUCCESS) {
  1655. av_log(ctx, AV_LOG_ERROR, "Failed to bind memory: %s\n",
  1656. vk_ret2str(ret));
  1657. return AVERROR_EXTERNAL;
  1658. }
  1659. /* NOTE: This is completely uneccesary and unneeded once we can import
  1660. * semaphores from DRM. Otherwise we have to activate the semaphores.
  1661. * We're reusing the exec context that's also used for uploads/downloads. */
  1662. err = prepare_frame(hwfc, &p->cmd, f, PREP_MODE_RO_SHADER);
  1663. if (err)
  1664. goto fail;
  1665. *frame = f;
  1666. return 0;
  1667. fail:
  1668. for (int i = 0; i < desc->nb_layers; i++) {
  1669. vkDestroyImage(hwctx->act_dev, f->img[i], hwctx->alloc);
  1670. vkDestroySemaphore(hwctx->act_dev, f->sem[i], hwctx->alloc);
  1671. }
  1672. for (int i = 0; i < desc->nb_objects; i++)
  1673. vkFreeMemory(hwctx->act_dev, f->mem[i], hwctx->alloc);
  1674. av_free(f);
  1675. return err;
  1676. }
  1677. static int vulkan_map_from_drm(AVHWFramesContext *hwfc, AVFrame *dst,
  1678. const AVFrame *src, int flags)
  1679. {
  1680. int err = 0;
  1681. AVVkFrame *f;
  1682. VulkanMapping *map = NULL;
  1683. err = vulkan_map_from_drm_frame_desc(hwfc, &f,
  1684. (AVDRMFrameDescriptor *)src->data[0]);
  1685. if (err)
  1686. return err;
  1687. /* The unmapping function will free this */
  1688. dst->data[0] = (uint8_t *)f;
  1689. dst->width = src->width;
  1690. dst->height = src->height;
  1691. map = av_mallocz(sizeof(VulkanMapping));
  1692. if (!map)
  1693. goto fail;
  1694. map->frame = f;
  1695. map->flags = flags;
  1696. err = ff_hwframe_map_create(dst->hw_frames_ctx, dst, src,
  1697. &vulkan_unmap_from, map);
  1698. if (err < 0)
  1699. goto fail;
  1700. av_log(hwfc, AV_LOG_DEBUG, "Mapped DRM object to Vulkan!\n");
  1701. return 0;
  1702. fail:
  1703. vulkan_frame_free(hwfc->device_ctx->hwctx, (uint8_t *)f);
  1704. av_free(map);
  1705. return err;
  1706. }
  1707. #if CONFIG_VAAPI
  1708. static int vulkan_map_from_vaapi(AVHWFramesContext *dst_fc,
  1709. AVFrame *dst, const AVFrame *src,
  1710. int flags)
  1711. {
  1712. int err;
  1713. AVFrame *tmp = av_frame_alloc();
  1714. AVHWFramesContext *vaapi_fc = (AVHWFramesContext*)src->hw_frames_ctx->data;
  1715. AVVAAPIDeviceContext *vaapi_ctx = vaapi_fc->device_ctx->hwctx;
  1716. VASurfaceID surface_id = (VASurfaceID)(uintptr_t)src->data[3];
  1717. if (!tmp)
  1718. return AVERROR(ENOMEM);
  1719. /* We have to sync since like the previous comment said, no semaphores */
  1720. vaSyncSurface(vaapi_ctx->display, surface_id);
  1721. tmp->format = AV_PIX_FMT_DRM_PRIME;
  1722. err = av_hwframe_map(tmp, src, flags);
  1723. if (err < 0)
  1724. goto fail;
  1725. err = vulkan_map_from_drm(dst_fc, dst, tmp, flags);
  1726. if (err < 0)
  1727. goto fail;
  1728. err = ff_hwframe_map_replace(dst, src);
  1729. fail:
  1730. av_frame_free(&tmp);
  1731. return err;
  1732. }
  1733. #endif
  1734. #endif
  1735. #if CONFIG_CUDA
  1736. static int vulkan_export_to_cuda(AVHWFramesContext *hwfc,
  1737. AVBufferRef *cuda_hwfc,
  1738. const AVFrame *frame)
  1739. {
  1740. int err;
  1741. VkResult ret;
  1742. AVVkFrame *dst_f;
  1743. AVVkFrameInternal *dst_int;
  1744. AVHWDeviceContext *ctx = hwfc->device_ctx;
  1745. AVVulkanDeviceContext *hwctx = ctx->hwctx;
  1746. const int planes = av_pix_fmt_count_planes(hwfc->sw_format);
  1747. const AVPixFmtDescriptor *desc = av_pix_fmt_desc_get(hwfc->sw_format);
  1748. VK_LOAD_PFN(hwctx->inst, vkGetMemoryFdKHR);
  1749. VK_LOAD_PFN(hwctx->inst, vkGetSemaphoreFdKHR);
  1750. AVHWFramesContext *cuda_fc = (AVHWFramesContext*)cuda_hwfc->data;
  1751. AVHWDeviceContext *cuda_cu = cuda_fc->device_ctx;
  1752. AVCUDADeviceContext *cuda_dev = cuda_cu->hwctx;
  1753. AVCUDADeviceContextInternal *cu_internal = cuda_dev->internal;
  1754. CudaFunctions *cu = cu_internal->cuda_dl;
  1755. CUarray_format cufmt = desc->comp[0].depth > 8 ? CU_AD_FORMAT_UNSIGNED_INT16 :
  1756. CU_AD_FORMAT_UNSIGNED_INT8;
  1757. dst_f = (AVVkFrame *)frame->data[0];
  1758. dst_int = dst_f->internal;
  1759. if (!dst_int || !dst_int->cuda_fc_ref) {
  1760. if (!dst_f->internal)
  1761. dst_f->internal = dst_int = av_mallocz(sizeof(*dst_f->internal));
  1762. if (!dst_int) {
  1763. err = AVERROR(ENOMEM);
  1764. goto fail;
  1765. }
  1766. dst_int->cuda_fc_ref = av_buffer_ref(cuda_hwfc);
  1767. if (!dst_int->cuda_fc_ref) {
  1768. err = AVERROR(ENOMEM);
  1769. goto fail;
  1770. }
  1771. for (int i = 0; i < planes; i++) {
  1772. CUDA_EXTERNAL_MEMORY_MIPMAPPED_ARRAY_DESC tex_desc = {
  1773. .offset = 0,
  1774. .arrayDesc = {
  1775. .Width = i > 0 ? AV_CEIL_RSHIFT(hwfc->width, desc->log2_chroma_w)
  1776. : hwfc->width,
  1777. .Height = i > 0 ? AV_CEIL_RSHIFT(hwfc->height, desc->log2_chroma_h)
  1778. : hwfc->height,
  1779. .Depth = 0,
  1780. .Format = cufmt,
  1781. .NumChannels = 1 + ((planes == 2) && i),
  1782. .Flags = 0,
  1783. },
  1784. .numLevels = 1,
  1785. };
  1786. CUDA_EXTERNAL_MEMORY_HANDLE_DESC ext_desc = {
  1787. .type = CU_EXTERNAL_MEMORY_HANDLE_TYPE_OPAQUE_FD,
  1788. .size = dst_f->size[i],
  1789. };
  1790. VkMemoryGetFdInfoKHR export_info = {
  1791. .sType = VK_STRUCTURE_TYPE_MEMORY_GET_FD_INFO_KHR,
  1792. .memory = dst_f->mem[i],
  1793. .handleType = VK_EXTERNAL_MEMORY_HANDLE_TYPE_OPAQUE_FD_BIT_KHR,
  1794. };
  1795. VkSemaphoreGetFdInfoKHR sem_export = {
  1796. .sType = VK_STRUCTURE_TYPE_SEMAPHORE_GET_FD_INFO_KHR,
  1797. .semaphore = dst_f->sem[i],
  1798. .handleType = VK_EXTERNAL_SEMAPHORE_HANDLE_TYPE_OPAQUE_FD_BIT,
  1799. };
  1800. CUDA_EXTERNAL_SEMAPHORE_HANDLE_DESC ext_sem_desc = {
  1801. .type = CU_EXTERNAL_SEMAPHORE_HANDLE_TYPE_OPAQUE_FD,
  1802. };
  1803. ret = pfn_vkGetMemoryFdKHR(hwctx->act_dev, &export_info,
  1804. &ext_desc.handle.fd);
  1805. if (ret != VK_SUCCESS) {
  1806. av_log(hwfc, AV_LOG_ERROR, "Unable to export the image as a FD!\n");
  1807. err = AVERROR_EXTERNAL;
  1808. goto fail;
  1809. }
  1810. ret = CHECK_CU(cu->cuImportExternalMemory(&dst_int->ext_mem[i], &ext_desc));
  1811. if (ret < 0) {
  1812. err = AVERROR_EXTERNAL;
  1813. goto fail;
  1814. }
  1815. ret = CHECK_CU(cu->cuExternalMemoryGetMappedMipmappedArray(&dst_int->cu_mma[i],
  1816. dst_int->ext_mem[i],
  1817. &tex_desc));
  1818. if (ret < 0) {
  1819. err = AVERROR_EXTERNAL;
  1820. goto fail;
  1821. }
  1822. ret = CHECK_CU(cu->cuMipmappedArrayGetLevel(&dst_int->cu_array[i],
  1823. dst_int->cu_mma[i], 0));
  1824. if (ret < 0) {
  1825. err = AVERROR_EXTERNAL;
  1826. goto fail;
  1827. }
  1828. ret = pfn_vkGetSemaphoreFdKHR(hwctx->act_dev, &sem_export,
  1829. &ext_sem_desc.handle.fd);
  1830. if (ret != VK_SUCCESS) {
  1831. av_log(ctx, AV_LOG_ERROR, "Failed to export semaphore: %s\n",
  1832. vk_ret2str(ret));
  1833. err = AVERROR_EXTERNAL;
  1834. goto fail;
  1835. }
  1836. ret = CHECK_CU(cu->cuImportExternalSemaphore(&dst_int->cu_sem[i],
  1837. &ext_sem_desc));
  1838. if (ret < 0) {
  1839. err = AVERROR_EXTERNAL;
  1840. goto fail;
  1841. }
  1842. }
  1843. }
  1844. return 0;
  1845. fail:
  1846. return err;
  1847. }
  1848. static int vulkan_transfer_data_from_cuda(AVHWFramesContext *hwfc,
  1849. AVFrame *dst, const AVFrame *src)
  1850. {
  1851. int err;
  1852. VkResult ret;
  1853. CUcontext dummy;
  1854. AVVkFrame *dst_f;
  1855. AVVkFrameInternal *dst_int;
  1856. const int planes = av_pix_fmt_count_planes(hwfc->sw_format);
  1857. const AVPixFmtDescriptor *desc = av_pix_fmt_desc_get(hwfc->sw_format);
  1858. AVHWFramesContext *cuda_fc = (AVHWFramesContext*)src->hw_frames_ctx->data;
  1859. AVHWDeviceContext *cuda_cu = cuda_fc->device_ctx;
  1860. AVCUDADeviceContext *cuda_dev = cuda_cu->hwctx;
  1861. AVCUDADeviceContextInternal *cu_internal = cuda_dev->internal;
  1862. CudaFunctions *cu = cu_internal->cuda_dl;
  1863. CUDA_EXTERNAL_SEMAPHORE_WAIT_PARAMS s_w_par[AV_NUM_DATA_POINTERS] = { 0 };
  1864. CUDA_EXTERNAL_SEMAPHORE_SIGNAL_PARAMS s_s_par[AV_NUM_DATA_POINTERS] = { 0 };
  1865. ret = CHECK_CU(cu->cuCtxPushCurrent(cuda_dev->cuda_ctx));
  1866. if (ret < 0) {
  1867. err = AVERROR_EXTERNAL;
  1868. goto fail;
  1869. }
  1870. dst_f = (AVVkFrame *)dst->data[0];
  1871. ret = vulkan_export_to_cuda(hwfc, src->hw_frames_ctx, dst);
  1872. if (ret < 0) {
  1873. goto fail;
  1874. }
  1875. dst_int = dst_f->internal;
  1876. ret = CHECK_CU(cu->cuWaitExternalSemaphoresAsync(dst_int->cu_sem, s_w_par,
  1877. planes, cuda_dev->stream));
  1878. if (ret < 0) {
  1879. err = AVERROR_EXTERNAL;
  1880. goto fail;
  1881. }
  1882. for (int i = 0; i < planes; i++) {
  1883. CUDA_MEMCPY2D cpy = {
  1884. .srcMemoryType = CU_MEMORYTYPE_DEVICE,
  1885. .srcDevice = (CUdeviceptr)src->data[i],
  1886. .srcPitch = src->linesize[i],
  1887. .srcY = 0,
  1888. .dstMemoryType = CU_MEMORYTYPE_ARRAY,
  1889. .dstArray = dst_int->cu_array[i],
  1890. .WidthInBytes = (i > 0 ? AV_CEIL_RSHIFT(hwfc->width, desc->log2_chroma_w)
  1891. : hwfc->width) * desc->comp[i].step,
  1892. .Height = i > 0 ? AV_CEIL_RSHIFT(hwfc->height, desc->log2_chroma_h)
  1893. : hwfc->height,
  1894. };
  1895. ret = CHECK_CU(cu->cuMemcpy2DAsync(&cpy, cuda_dev->stream));
  1896. if (ret < 0) {
  1897. err = AVERROR_EXTERNAL;
  1898. goto fail;
  1899. }
  1900. }
  1901. ret = CHECK_CU(cu->cuSignalExternalSemaphoresAsync(dst_int->cu_sem, s_s_par,
  1902. planes, cuda_dev->stream));
  1903. if (ret < 0) {
  1904. err = AVERROR_EXTERNAL;
  1905. goto fail;
  1906. }
  1907. CHECK_CU(cu->cuCtxPopCurrent(&dummy));
  1908. av_log(hwfc, AV_LOG_VERBOSE, "Transfered CUDA image to Vulkan!\n");
  1909. return 0;
  1910. fail:
  1911. CHECK_CU(cu->cuCtxPopCurrent(&dummy));
  1912. vulkan_free_internal(dst_int);
  1913. dst_f->internal = NULL;
  1914. av_buffer_unref(&dst->buf[0]);
  1915. return err;
  1916. }
  1917. #endif
  1918. static int vulkan_map_to(AVHWFramesContext *hwfc, AVFrame *dst,
  1919. const AVFrame *src, int flags)
  1920. {
  1921. av_unused VulkanDevicePriv *p = hwfc->device_ctx->internal->priv;
  1922. switch (src->format) {
  1923. #if CONFIG_LIBDRM
  1924. #if CONFIG_VAAPI
  1925. case AV_PIX_FMT_VAAPI:
  1926. if (p->extensions & EXT_EXTERNAL_DMABUF_MEMORY)
  1927. return vulkan_map_from_vaapi(hwfc, dst, src, flags);
  1928. #endif
  1929. case AV_PIX_FMT_DRM_PRIME:
  1930. if (p->extensions & EXT_EXTERNAL_DMABUF_MEMORY)
  1931. return vulkan_map_from_drm(hwfc, dst, src, flags);
  1932. #endif
  1933. default:
  1934. return AVERROR(ENOSYS);
  1935. }
  1936. }
  1937. #if CONFIG_LIBDRM
  1938. typedef struct VulkanDRMMapping {
  1939. AVDRMFrameDescriptor drm_desc;
  1940. AVVkFrame *source;
  1941. } VulkanDRMMapping;
  1942. static void vulkan_unmap_to_drm(AVHWFramesContext *hwfc, HWMapDescriptor *hwmap)
  1943. {
  1944. AVDRMFrameDescriptor *drm_desc = hwmap->priv;
  1945. for (int i = 0; i < drm_desc->nb_objects; i++)
  1946. close(drm_desc->objects[i].fd);
  1947. av_free(drm_desc);
  1948. }
  1949. static inline uint32_t vulkan_fmt_to_drm(VkFormat vkfmt)
  1950. {
  1951. for (int i = 0; i < FF_ARRAY_ELEMS(vulkan_drm_format_map); i++)
  1952. if (vulkan_drm_format_map[i].vk_format == vkfmt)
  1953. return vulkan_drm_format_map[i].drm_fourcc;
  1954. return DRM_FORMAT_INVALID;
  1955. }
  1956. static int vulkan_map_to_drm(AVHWFramesContext *hwfc, AVFrame *dst,
  1957. const AVFrame *src, int flags)
  1958. {
  1959. int err = 0;
  1960. VkResult ret;
  1961. AVVkFrame *f = (AVVkFrame *)src->data[0];
  1962. VulkanDevicePriv *p = hwfc->device_ctx->internal->priv;
  1963. AVVulkanDeviceContext *hwctx = hwfc->device_ctx->hwctx;
  1964. const int planes = av_pix_fmt_count_planes(hwfc->sw_format);
  1965. VK_LOAD_PFN(hwctx->inst, vkGetMemoryFdKHR);
  1966. VkImageDrmFormatModifierPropertiesEXT drm_mod = {
  1967. .sType = VK_STRUCTURE_TYPE_IMAGE_DRM_FORMAT_MODIFIER_PROPERTIES_EXT,
  1968. };
  1969. AVDRMFrameDescriptor *drm_desc = av_mallocz(sizeof(*drm_desc));
  1970. if (!drm_desc)
  1971. return AVERROR(ENOMEM);
  1972. err = prepare_frame(hwfc, &p->cmd, f, PREP_MODE_EXTERNAL_EXPORT);
  1973. if (err < 0)
  1974. goto end;
  1975. err = ff_hwframe_map_create(src->hw_frames_ctx, dst, src, &vulkan_unmap_to_drm, drm_desc);
  1976. if (err < 0)
  1977. goto end;
  1978. if (p->extensions & EXT_DRM_MODIFIER_FLAGS) {
  1979. VK_LOAD_PFN(hwctx->inst, vkGetImageDrmFormatModifierPropertiesEXT);
  1980. ret = pfn_vkGetImageDrmFormatModifierPropertiesEXT(hwctx->act_dev, f->img[0],
  1981. &drm_mod);
  1982. if (ret != VK_SUCCESS) {
  1983. av_log(hwfc, AV_LOG_ERROR, "Failed to retrieve DRM format modifier!\n");
  1984. err = AVERROR_EXTERNAL;
  1985. goto end;
  1986. }
  1987. }
  1988. for (int i = 0; (i < planes) && (f->mem[i]); i++) {
  1989. VkMemoryGetFdInfoKHR export_info = {
  1990. .sType = VK_STRUCTURE_TYPE_MEMORY_GET_FD_INFO_KHR,
  1991. .memory = f->mem[i],
  1992. .handleType = VK_EXTERNAL_MEMORY_HANDLE_TYPE_DMA_BUF_BIT_EXT,
  1993. };
  1994. ret = pfn_vkGetMemoryFdKHR(hwctx->act_dev, &export_info,
  1995. &drm_desc->objects[i].fd);
  1996. if (ret != VK_SUCCESS) {
  1997. av_log(hwfc, AV_LOG_ERROR, "Unable to export the image as a FD!\n");
  1998. err = AVERROR_EXTERNAL;
  1999. goto end;
  2000. }
  2001. drm_desc->nb_objects++;
  2002. drm_desc->objects[i].size = f->size[i];
  2003. drm_desc->objects[i].format_modifier = drm_mod.drmFormatModifier;
  2004. }
  2005. drm_desc->nb_layers = planes;
  2006. for (int i = 0; i < drm_desc->nb_layers; i++) {
  2007. VkSubresourceLayout layout;
  2008. VkImageSubresource sub = {
  2009. .aspectMask = p->extensions & EXT_DRM_MODIFIER_FLAGS ?
  2010. VK_IMAGE_ASPECT_MEMORY_PLANE_0_BIT_EXT :
  2011. VK_IMAGE_ASPECT_COLOR_BIT,
  2012. };
  2013. VkFormat plane_vkfmt = av_vkfmt_from_pixfmt(hwfc->sw_format)[i];
  2014. drm_desc->layers[i].format = vulkan_fmt_to_drm(plane_vkfmt);
  2015. drm_desc->layers[i].nb_planes = 1;
  2016. if (drm_desc->layers[i].format == DRM_FORMAT_INVALID) {
  2017. av_log(hwfc, AV_LOG_ERROR, "Cannot map to DRM layer, unsupported!\n");
  2018. err = AVERROR_PATCHWELCOME;
  2019. goto end;
  2020. }
  2021. drm_desc->layers[i].planes[0].object_index = FFMIN(i, drm_desc->nb_objects - 1);
  2022. if (f->tiling == VK_IMAGE_TILING_OPTIMAL)
  2023. continue;
  2024. vkGetImageSubresourceLayout(hwctx->act_dev, f->img[i], &sub, &layout);
  2025. drm_desc->layers[i].planes[0].offset = layout.offset;
  2026. drm_desc->layers[i].planes[0].pitch = layout.rowPitch;
  2027. }
  2028. dst->width = src->width;
  2029. dst->height = src->height;
  2030. dst->data[0] = (uint8_t *)drm_desc;
  2031. av_log(hwfc, AV_LOG_VERBOSE, "Mapped AVVkFrame to a DRM object!\n");
  2032. return 0;
  2033. end:
  2034. av_free(drm_desc);
  2035. return err;
  2036. }
  2037. #if CONFIG_VAAPI
  2038. static int vulkan_map_to_vaapi(AVHWFramesContext *hwfc, AVFrame *dst,
  2039. const AVFrame *src, int flags)
  2040. {
  2041. int err;
  2042. AVFrame *tmp = av_frame_alloc();
  2043. if (!tmp)
  2044. return AVERROR(ENOMEM);
  2045. tmp->format = AV_PIX_FMT_DRM_PRIME;
  2046. err = vulkan_map_to_drm(hwfc, tmp, src, flags);
  2047. if (err < 0)
  2048. goto fail;
  2049. err = av_hwframe_map(dst, tmp, flags);
  2050. if (err < 0)
  2051. goto fail;
  2052. err = ff_hwframe_map_replace(dst, src);
  2053. fail:
  2054. av_frame_free(&tmp);
  2055. return err;
  2056. }
  2057. #endif
  2058. #endif
  2059. static int vulkan_map_from(AVHWFramesContext *hwfc, AVFrame *dst,
  2060. const AVFrame *src, int flags)
  2061. {
  2062. av_unused VulkanDevicePriv *p = hwfc->device_ctx->internal->priv;
  2063. switch (dst->format) {
  2064. #if CONFIG_LIBDRM
  2065. case AV_PIX_FMT_DRM_PRIME:
  2066. if (p->extensions & EXT_EXTERNAL_DMABUF_MEMORY)
  2067. return vulkan_map_to_drm(hwfc, dst, src, flags);
  2068. #if CONFIG_VAAPI
  2069. case AV_PIX_FMT_VAAPI:
  2070. if (p->extensions & EXT_EXTERNAL_DMABUF_MEMORY)
  2071. return vulkan_map_to_vaapi(hwfc, dst, src, flags);
  2072. #endif
  2073. #endif
  2074. default:
  2075. return vulkan_map_frame_to_mem(hwfc, dst, src, flags);
  2076. }
  2077. }
  2078. typedef struct ImageBuffer {
  2079. VkBuffer buf;
  2080. VkDeviceMemory mem;
  2081. VkMemoryPropertyFlagBits flags;
  2082. } ImageBuffer;
  2083. static void free_buf(AVHWDeviceContext *ctx, ImageBuffer *buf)
  2084. {
  2085. AVVulkanDeviceContext *hwctx = ctx->hwctx;
  2086. if (!buf)
  2087. return;
  2088. vkDestroyBuffer(hwctx->act_dev, buf->buf, hwctx->alloc);
  2089. vkFreeMemory(hwctx->act_dev, buf->mem, hwctx->alloc);
  2090. }
  2091. static int create_buf(AVHWDeviceContext *ctx, ImageBuffer *buf, int height,
  2092. int *stride, VkBufferUsageFlags usage,
  2093. VkMemoryPropertyFlagBits flags, void *create_pnext,
  2094. void *alloc_pnext)
  2095. {
  2096. int err;
  2097. VkResult ret;
  2098. VkMemoryRequirements req;
  2099. AVVulkanDeviceContext *hwctx = ctx->hwctx;
  2100. VulkanDevicePriv *p = ctx->internal->priv;
  2101. VkBufferCreateInfo buf_spawn = {
  2102. .sType = VK_STRUCTURE_TYPE_BUFFER_CREATE_INFO,
  2103. .pNext = create_pnext,
  2104. .usage = usage,
  2105. .sharingMode = VK_SHARING_MODE_EXCLUSIVE,
  2106. };
  2107. *stride = FFALIGN(*stride, p->props.limits.optimalBufferCopyRowPitchAlignment);
  2108. buf_spawn.size = height*(*stride);
  2109. ret = vkCreateBuffer(hwctx->act_dev, &buf_spawn, NULL, &buf->buf);
  2110. if (ret != VK_SUCCESS) {
  2111. av_log(ctx, AV_LOG_ERROR, "Failed to create buffer: %s\n",
  2112. vk_ret2str(ret));
  2113. return AVERROR_EXTERNAL;
  2114. }
  2115. vkGetBufferMemoryRequirements(hwctx->act_dev, buf->buf, &req);
  2116. err = alloc_mem(ctx, &req, flags, alloc_pnext, &buf->flags, &buf->mem);
  2117. if (err)
  2118. return err;
  2119. ret = vkBindBufferMemory(hwctx->act_dev, buf->buf, buf->mem, 0);
  2120. if (ret != VK_SUCCESS) {
  2121. av_log(ctx, AV_LOG_ERROR, "Failed to bind memory to buffer: %s\n",
  2122. vk_ret2str(ret));
  2123. free_buf(ctx, buf);
  2124. return AVERROR_EXTERNAL;
  2125. }
  2126. return 0;
  2127. }
  2128. static int map_buffers(AVHWDeviceContext *ctx, ImageBuffer *buf, uint8_t *mem[],
  2129. int nb_buffers, int invalidate)
  2130. {
  2131. VkResult ret;
  2132. AVVulkanDeviceContext *hwctx = ctx->hwctx;
  2133. VkMappedMemoryRange invalidate_ctx[AV_NUM_DATA_POINTERS];
  2134. int invalidate_count = 0;
  2135. for (int i = 0; i < nb_buffers; i++) {
  2136. ret = vkMapMemory(hwctx->act_dev, buf[i].mem, 0,
  2137. VK_WHOLE_SIZE, 0, (void **)&mem[i]);
  2138. if (ret != VK_SUCCESS) {
  2139. av_log(ctx, AV_LOG_ERROR, "Failed to map buffer memory: %s\n",
  2140. vk_ret2str(ret));
  2141. return AVERROR_EXTERNAL;
  2142. }
  2143. }
  2144. if (!invalidate)
  2145. return 0;
  2146. for (int i = 0; i < nb_buffers; i++) {
  2147. const VkMappedMemoryRange ival_buf = {
  2148. .sType = VK_STRUCTURE_TYPE_MAPPED_MEMORY_RANGE,
  2149. .memory = buf[i].mem,
  2150. .size = VK_WHOLE_SIZE,
  2151. };
  2152. if (buf[i].flags & VK_MEMORY_PROPERTY_HOST_COHERENT_BIT)
  2153. continue;
  2154. invalidate_ctx[invalidate_count++] = ival_buf;
  2155. }
  2156. if (invalidate_count) {
  2157. ret = vkInvalidateMappedMemoryRanges(hwctx->act_dev, invalidate_count,
  2158. invalidate_ctx);
  2159. if (ret != VK_SUCCESS)
  2160. av_log(ctx, AV_LOG_WARNING, "Failed to invalidate memory: %s\n",
  2161. vk_ret2str(ret));
  2162. }
  2163. return 0;
  2164. }
  2165. static int unmap_buffers(AVHWDeviceContext *ctx, ImageBuffer *buf,
  2166. int nb_buffers, int flush)
  2167. {
  2168. int err = 0;
  2169. VkResult ret;
  2170. AVVulkanDeviceContext *hwctx = ctx->hwctx;
  2171. VkMappedMemoryRange flush_ctx[AV_NUM_DATA_POINTERS];
  2172. int flush_count = 0;
  2173. if (flush) {
  2174. for (int i = 0; i < nb_buffers; i++) {
  2175. const VkMappedMemoryRange flush_buf = {
  2176. .sType = VK_STRUCTURE_TYPE_MAPPED_MEMORY_RANGE,
  2177. .memory = buf[i].mem,
  2178. .size = VK_WHOLE_SIZE,
  2179. };
  2180. if (buf[i].flags & VK_MEMORY_PROPERTY_HOST_COHERENT_BIT)
  2181. continue;
  2182. flush_ctx[flush_count++] = flush_buf;
  2183. }
  2184. }
  2185. if (flush_count) {
  2186. ret = vkFlushMappedMemoryRanges(hwctx->act_dev, flush_count, flush_ctx);
  2187. if (ret != VK_SUCCESS) {
  2188. av_log(ctx, AV_LOG_ERROR, "Failed to flush memory: %s\n",
  2189. vk_ret2str(ret));
  2190. err = AVERROR_EXTERNAL; /* We still want to try to unmap them */
  2191. }
  2192. }
  2193. for (int i = 0; i < nb_buffers; i++)
  2194. vkUnmapMemory(hwctx->act_dev, buf[i].mem);
  2195. return err;
  2196. }
  2197. static int transfer_image_buf(AVHWDeviceContext *ctx, AVVkFrame *frame,
  2198. ImageBuffer *buffer, const int *buf_stride, int w,
  2199. int h, enum AVPixelFormat pix_fmt, int to_buf)
  2200. {
  2201. VkResult ret;
  2202. AVVulkanDeviceContext *hwctx = ctx->hwctx;
  2203. VulkanDevicePriv *s = ctx->internal->priv;
  2204. int bar_num = 0;
  2205. VkPipelineStageFlagBits sem_wait_dst[AV_NUM_DATA_POINTERS];
  2206. const int planes = av_pix_fmt_count_planes(pix_fmt);
  2207. const AVPixFmtDescriptor *desc = av_pix_fmt_desc_get(pix_fmt);
  2208. VkCommandBufferBeginInfo cmd_start = {
  2209. .sType = VK_STRUCTURE_TYPE_COMMAND_BUFFER_BEGIN_INFO,
  2210. .flags = VK_COMMAND_BUFFER_USAGE_ONE_TIME_SUBMIT_BIT,
  2211. };
  2212. VkImageMemoryBarrier img_bar[AV_NUM_DATA_POINTERS] = { 0 };
  2213. VkSubmitInfo s_info = {
  2214. .sType = VK_STRUCTURE_TYPE_SUBMIT_INFO,
  2215. .commandBufferCount = 1,
  2216. .pCommandBuffers = &s->cmd.buf,
  2217. .pSignalSemaphores = frame->sem,
  2218. .pWaitSemaphores = frame->sem,
  2219. .pWaitDstStageMask = sem_wait_dst,
  2220. .signalSemaphoreCount = planes,
  2221. .waitSemaphoreCount = planes,
  2222. };
  2223. ret = vkBeginCommandBuffer(s->cmd.buf, &cmd_start);
  2224. if (ret != VK_SUCCESS) {
  2225. av_log(ctx, AV_LOG_ERROR, "Unable to init command buffer: %s\n",
  2226. vk_ret2str(ret));
  2227. return AVERROR_EXTERNAL;
  2228. }
  2229. /* Change the image layout to something more optimal for transfers */
  2230. for (int i = 0; i < planes; i++) {
  2231. VkImageLayout new_layout = to_buf ? VK_IMAGE_LAYOUT_TRANSFER_SRC_OPTIMAL :
  2232. VK_IMAGE_LAYOUT_TRANSFER_DST_OPTIMAL;
  2233. VkAccessFlags new_access = to_buf ? VK_ACCESS_TRANSFER_READ_BIT :
  2234. VK_ACCESS_TRANSFER_WRITE_BIT;
  2235. sem_wait_dst[i] = VK_PIPELINE_STAGE_TOP_OF_PIPE_BIT;
  2236. /* If the layout matches and we have read access skip the barrier */
  2237. if ((frame->layout[i] == new_layout) && (frame->access[i] & new_access))
  2238. continue;
  2239. img_bar[bar_num].sType = VK_STRUCTURE_TYPE_IMAGE_MEMORY_BARRIER;
  2240. img_bar[bar_num].srcAccessMask = 0x0;
  2241. img_bar[bar_num].dstAccessMask = new_access;
  2242. img_bar[bar_num].oldLayout = frame->layout[i];
  2243. img_bar[bar_num].newLayout = new_layout;
  2244. img_bar[bar_num].srcQueueFamilyIndex = VK_QUEUE_FAMILY_IGNORED;
  2245. img_bar[bar_num].dstQueueFamilyIndex = VK_QUEUE_FAMILY_IGNORED;
  2246. img_bar[bar_num].image = frame->img[i];
  2247. img_bar[bar_num].subresourceRange.levelCount = 1;
  2248. img_bar[bar_num].subresourceRange.layerCount = 1;
  2249. img_bar[bar_num].subresourceRange.aspectMask = VK_IMAGE_ASPECT_COLOR_BIT;
  2250. frame->layout[i] = img_bar[bar_num].newLayout;
  2251. frame->access[i] = img_bar[bar_num].dstAccessMask;
  2252. bar_num++;
  2253. }
  2254. if (bar_num)
  2255. vkCmdPipelineBarrier(s->cmd.buf, VK_PIPELINE_STAGE_TOP_OF_PIPE_BIT,
  2256. VK_PIPELINE_STAGE_TRANSFER_BIT, 0,
  2257. 0, NULL, 0, NULL, bar_num, img_bar);
  2258. /* Schedule a copy for each plane */
  2259. for (int i = 0; i < planes; i++) {
  2260. const int p_w = i > 0 ? AV_CEIL_RSHIFT(w, desc->log2_chroma_w) : w;
  2261. const int p_h = i > 0 ? AV_CEIL_RSHIFT(h, desc->log2_chroma_h) : h;
  2262. VkBufferImageCopy buf_reg = {
  2263. .bufferOffset = 0,
  2264. /* Buffer stride isn't in bytes, it's in samples, the implementation
  2265. * uses the image's VkFormat to know how many bytes per sample
  2266. * the buffer has. So we have to convert by dividing. Stupid.
  2267. * Won't work with YUVA or other planar formats with alpha. */
  2268. .bufferRowLength = buf_stride[i] / desc->comp[i].step,
  2269. .bufferImageHeight = p_h,
  2270. .imageSubresource.layerCount = 1,
  2271. .imageSubresource.aspectMask = VK_IMAGE_ASPECT_COLOR_BIT,
  2272. .imageOffset = { 0, 0, 0, },
  2273. .imageExtent = { p_w, p_h, 1, },
  2274. };
  2275. if (to_buf)
  2276. vkCmdCopyImageToBuffer(s->cmd.buf, frame->img[i], frame->layout[i],
  2277. buffer[i].buf, 1, &buf_reg);
  2278. else
  2279. vkCmdCopyBufferToImage(s->cmd.buf, buffer[i].buf, frame->img[i],
  2280. frame->layout[i], 1, &buf_reg);
  2281. }
  2282. ret = vkEndCommandBuffer(s->cmd.buf);
  2283. if (ret != VK_SUCCESS) {
  2284. av_log(ctx, AV_LOG_ERROR, "Unable to finish command buffer: %s\n",
  2285. vk_ret2str(ret));
  2286. return AVERROR_EXTERNAL;
  2287. }
  2288. /* Wait for the download/upload to finish if uploading, otherwise the
  2289. * semaphore will take care of synchronization when uploading */
  2290. ret = vkQueueSubmit(s->cmd.queue, 1, &s_info, s->cmd.fence);
  2291. if (ret != VK_SUCCESS) {
  2292. av_log(ctx, AV_LOG_ERROR, "Unable to submit command buffer: %s\n",
  2293. vk_ret2str(ret));
  2294. return AVERROR_EXTERNAL;
  2295. } else {
  2296. vkWaitForFences(hwctx->act_dev, 1, &s->cmd.fence, VK_TRUE, UINT64_MAX);
  2297. vkResetFences(hwctx->act_dev, 1, &s->cmd.fence);
  2298. }
  2299. return 0;
  2300. }
  2301. /* Technically we can use VK_EXT_external_memory_host to upload and download,
  2302. * however the alignment requirements make this unfeasible as both the pointer
  2303. * and the size of each plane need to be aligned to the minimum alignment
  2304. * requirement, which on all current implementations (anv, radv) is 4096.
  2305. * If the requirement gets relaxed (unlikely) this can easily be implemented. */
  2306. static int vulkan_transfer_data_from_mem(AVHWFramesContext *hwfc, AVFrame *dst,
  2307. const AVFrame *src)
  2308. {
  2309. int err = 0;
  2310. AVFrame tmp;
  2311. AVVkFrame *f = (AVVkFrame *)dst->data[0];
  2312. AVHWDeviceContext *dev_ctx = hwfc->device_ctx;
  2313. ImageBuffer buf[AV_NUM_DATA_POINTERS] = { { 0 } };
  2314. const int planes = av_pix_fmt_count_planes(src->format);
  2315. int log2_chroma = av_pix_fmt_desc_get(src->format)->log2_chroma_h;
  2316. if ((src->format != AV_PIX_FMT_NONE && !av_vkfmt_from_pixfmt(src->format))) {
  2317. av_log(hwfc, AV_LOG_ERROR, "Unsupported source pixel format!\n");
  2318. return AVERROR(EINVAL);
  2319. }
  2320. if (src->width > hwfc->width || src->height > hwfc->height)
  2321. return AVERROR(EINVAL);
  2322. /* For linear, host visiable images */
  2323. if (f->tiling == VK_IMAGE_TILING_LINEAR &&
  2324. f->flags & VK_MEMORY_PROPERTY_HOST_VISIBLE_BIT) {
  2325. AVFrame *map = av_frame_alloc();
  2326. if (!map)
  2327. return AVERROR(ENOMEM);
  2328. map->format = src->format;
  2329. err = vulkan_map_frame_to_mem(hwfc, map, dst, AV_HWFRAME_MAP_WRITE);
  2330. if (err)
  2331. goto end;
  2332. err = av_frame_copy(map, src);
  2333. av_frame_free(&map);
  2334. goto end;
  2335. }
  2336. /* Create buffers */
  2337. for (int i = 0; i < planes; i++) {
  2338. int h = src->height;
  2339. int p_height = i > 0 ? AV_CEIL_RSHIFT(h, log2_chroma) : h;
  2340. tmp.linesize[i] = FFABS(src->linesize[i]);
  2341. err = create_buf(dev_ctx, &buf[i], p_height,
  2342. &tmp.linesize[i], VK_BUFFER_USAGE_TRANSFER_SRC_BIT,
  2343. VK_MEMORY_PROPERTY_HOST_VISIBLE_BIT, NULL, NULL);
  2344. if (err)
  2345. goto end;
  2346. }
  2347. /* Map, copy image to buffer, unmap */
  2348. if ((err = map_buffers(dev_ctx, buf, tmp.data, planes, 0)))
  2349. goto end;
  2350. av_image_copy(tmp.data, tmp.linesize, (const uint8_t **)src->data,
  2351. src->linesize, src->format, src->width, src->height);
  2352. if ((err = unmap_buffers(dev_ctx, buf, planes, 1)))
  2353. goto end;
  2354. /* Copy buffers to image */
  2355. err = transfer_image_buf(dev_ctx, f, buf, tmp.linesize,
  2356. src->width, src->height, src->format, 0);
  2357. end:
  2358. for (int i = 0; i < planes; i++)
  2359. free_buf(dev_ctx, &buf[i]);
  2360. return err;
  2361. }
  2362. static int vulkan_transfer_data_to(AVHWFramesContext *hwfc, AVFrame *dst,
  2363. const AVFrame *src)
  2364. {
  2365. av_unused VulkanDevicePriv *p = hwfc->device_ctx->internal->priv;
  2366. switch (src->format) {
  2367. #if CONFIG_CUDA
  2368. case AV_PIX_FMT_CUDA:
  2369. if ((p->extensions & EXT_EXTERNAL_FD_MEMORY) &&
  2370. (p->extensions & EXT_EXTERNAL_FD_SEM))
  2371. return vulkan_transfer_data_from_cuda(hwfc, dst, src);
  2372. #endif
  2373. default:
  2374. if (src->hw_frames_ctx)
  2375. return AVERROR(ENOSYS);
  2376. else
  2377. return vulkan_transfer_data_from_mem(hwfc, dst, src);
  2378. }
  2379. }
  2380. #if CONFIG_CUDA
  2381. static int vulkan_transfer_data_to_cuda(AVHWFramesContext *hwfc, AVFrame *dst,
  2382. const AVFrame *src)
  2383. {
  2384. int err;
  2385. VkResult ret;
  2386. CUcontext dummy;
  2387. AVVkFrame *dst_f;
  2388. AVVkFrameInternal *dst_int;
  2389. const int planes = av_pix_fmt_count_planes(hwfc->sw_format);
  2390. const AVPixFmtDescriptor *desc = av_pix_fmt_desc_get(hwfc->sw_format);
  2391. AVHWFramesContext *cuda_fc = (AVHWFramesContext*)dst->hw_frames_ctx->data;
  2392. AVHWDeviceContext *cuda_cu = cuda_fc->device_ctx;
  2393. AVCUDADeviceContext *cuda_dev = cuda_cu->hwctx;
  2394. AVCUDADeviceContextInternal *cu_internal = cuda_dev->internal;
  2395. CudaFunctions *cu = cu_internal->cuda_dl;
  2396. ret = CHECK_CU(cu->cuCtxPushCurrent(cuda_dev->cuda_ctx));
  2397. if (ret < 0) {
  2398. err = AVERROR_EXTERNAL;
  2399. goto fail;
  2400. }
  2401. dst_f = (AVVkFrame *)src->data[0];
  2402. err = vulkan_export_to_cuda(hwfc, dst->hw_frames_ctx, src);
  2403. if (err < 0) {
  2404. goto fail;
  2405. }
  2406. dst_int = dst_f->internal;
  2407. for (int i = 0; i < planes; i++) {
  2408. CUDA_MEMCPY2D cpy = {
  2409. .dstMemoryType = CU_MEMORYTYPE_DEVICE,
  2410. .dstDevice = (CUdeviceptr)dst->data[i],
  2411. .dstPitch = dst->linesize[i],
  2412. .dstY = 0,
  2413. .srcMemoryType = CU_MEMORYTYPE_ARRAY,
  2414. .srcArray = dst_int->cu_array[i],
  2415. .WidthInBytes = (i > 0 ? AV_CEIL_RSHIFT(hwfc->width, desc->log2_chroma_w)
  2416. : hwfc->width) * desc->comp[i].step,
  2417. .Height = i > 0 ? AV_CEIL_RSHIFT(hwfc->height, desc->log2_chroma_h)
  2418. : hwfc->height,
  2419. };
  2420. ret = CHECK_CU(cu->cuMemcpy2DAsync(&cpy, cuda_dev->stream));
  2421. if (ret < 0) {
  2422. err = AVERROR_EXTERNAL;
  2423. goto fail;
  2424. }
  2425. }
  2426. CHECK_CU(cu->cuCtxPopCurrent(&dummy));
  2427. av_log(hwfc, AV_LOG_VERBOSE, "Transfered Vulkan image to CUDA!\n");
  2428. return 0;
  2429. fail:
  2430. CHECK_CU(cu->cuCtxPopCurrent(&dummy));
  2431. vulkan_free_internal(dst_int);
  2432. dst_f->internal = NULL;
  2433. av_buffer_unref(&dst->buf[0]);
  2434. return err;
  2435. }
  2436. #endif
  2437. static int vulkan_transfer_data_to_mem(AVHWFramesContext *hwfc, AVFrame *dst,
  2438. const AVFrame *src)
  2439. {
  2440. int err = 0;
  2441. AVFrame tmp;
  2442. AVVkFrame *f = (AVVkFrame *)src->data[0];
  2443. AVHWDeviceContext *dev_ctx = hwfc->device_ctx;
  2444. ImageBuffer buf[AV_NUM_DATA_POINTERS] = { { 0 } };
  2445. const int planes = av_pix_fmt_count_planes(dst->format);
  2446. int log2_chroma = av_pix_fmt_desc_get(dst->format)->log2_chroma_h;
  2447. if (dst->width > hwfc->width || dst->height > hwfc->height)
  2448. return AVERROR(EINVAL);
  2449. /* For linear, host visiable images */
  2450. if (f->tiling == VK_IMAGE_TILING_LINEAR &&
  2451. f->flags & VK_MEMORY_PROPERTY_HOST_VISIBLE_BIT) {
  2452. AVFrame *map = av_frame_alloc();
  2453. if (!map)
  2454. return AVERROR(ENOMEM);
  2455. map->format = dst->format;
  2456. err = vulkan_map_frame_to_mem(hwfc, map, src, AV_HWFRAME_MAP_READ);
  2457. if (err)
  2458. return err;
  2459. err = av_frame_copy(dst, map);
  2460. av_frame_free(&map);
  2461. return err;
  2462. }
  2463. /* Create buffers */
  2464. for (int i = 0; i < planes; i++) {
  2465. int h = dst->height;
  2466. int p_height = i > 0 ? AV_CEIL_RSHIFT(h, log2_chroma) : h;
  2467. tmp.linesize[i] = FFABS(dst->linesize[i]);
  2468. err = create_buf(dev_ctx, &buf[i], p_height,
  2469. &tmp.linesize[i], VK_BUFFER_USAGE_TRANSFER_DST_BIT,
  2470. VK_MEMORY_PROPERTY_HOST_VISIBLE_BIT, NULL, NULL);
  2471. }
  2472. /* Copy image to buffer */
  2473. if ((err = transfer_image_buf(dev_ctx, f, buf, tmp.linesize,
  2474. dst->width, dst->height, dst->format, 1)))
  2475. goto end;
  2476. /* Map, copy buffer to frame, unmap */
  2477. if ((err = map_buffers(dev_ctx, buf, tmp.data, planes, 1)))
  2478. goto end;
  2479. av_image_copy(dst->data, dst->linesize, (const uint8_t **)tmp.data,
  2480. tmp.linesize, dst->format, dst->width, dst->height);
  2481. err = unmap_buffers(dev_ctx, buf, planes, 0);
  2482. end:
  2483. for (int i = 0; i < planes; i++)
  2484. free_buf(dev_ctx, &buf[i]);
  2485. return err;
  2486. }
  2487. static int vulkan_transfer_data_from(AVHWFramesContext *hwfc, AVFrame *dst,
  2488. const AVFrame *src)
  2489. {
  2490. av_unused VulkanDevicePriv *p = hwfc->device_ctx->internal->priv;
  2491. switch (dst->format) {
  2492. #if CONFIG_CUDA
  2493. case AV_PIX_FMT_CUDA:
  2494. if ((p->extensions & EXT_EXTERNAL_FD_MEMORY) &&
  2495. (p->extensions & EXT_EXTERNAL_FD_SEM))
  2496. return vulkan_transfer_data_to_cuda(hwfc, dst, src);
  2497. #endif
  2498. default:
  2499. if (dst->hw_frames_ctx)
  2500. return AVERROR(ENOSYS);
  2501. else
  2502. return vulkan_transfer_data_to_mem(hwfc, dst, src);
  2503. }
  2504. }
  2505. AVVkFrame *av_vk_frame_alloc(void)
  2506. {
  2507. return av_mallocz(sizeof(AVVkFrame));
  2508. }
  2509. const HWContextType ff_hwcontext_type_vulkan = {
  2510. .type = AV_HWDEVICE_TYPE_VULKAN,
  2511. .name = "Vulkan",
  2512. .device_hwctx_size = sizeof(AVVulkanDeviceContext),
  2513. .device_priv_size = sizeof(VulkanDevicePriv),
  2514. .frames_hwctx_size = sizeof(AVVulkanFramesContext),
  2515. .frames_priv_size = sizeof(VulkanFramesPriv),
  2516. .device_init = &vulkan_device_init,
  2517. .device_create = &vulkan_device_create,
  2518. .device_derive = &vulkan_device_derive,
  2519. .frames_get_constraints = &vulkan_frames_get_constraints,
  2520. .frames_init = vulkan_frames_init,
  2521. .frames_get_buffer = vulkan_get_buffer,
  2522. .frames_uninit = vulkan_frames_uninit,
  2523. .transfer_get_formats = vulkan_transfer_get_formats,
  2524. .transfer_data_to = vulkan_transfer_data_to,
  2525. .transfer_data_from = vulkan_transfer_data_from,
  2526. .map_to = vulkan_map_to,
  2527. .map_from = vulkan_map_from,
  2528. .pix_fmts = (const enum AVPixelFormat []) {
  2529. AV_PIX_FMT_VULKAN,
  2530. AV_PIX_FMT_NONE
  2531. },
  2532. };