You can not select more than 25 topics Topics must start with a letter or number, can include dashes ('-') and can be up to 35 characters long.

1441 lines
48KB

  1. /*
  2. * This file is part of FFmpeg.
  3. *
  4. * FFmpeg is free software; you can redistribute it and/or
  5. * modify it under the terms of the GNU Lesser General Public
  6. * License as published by the Free Software Foundation; either
  7. * version 2.1 of the License, or (at your option) any later version.
  8. *
  9. * FFmpeg is distributed in the hope that it will be useful,
  10. * but WITHOUT ANY WARRANTY; without even the implied warranty of
  11. * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
  12. * Lesser General Public License for more details.
  13. *
  14. * You should have received a copy of the GNU Lesser General Public
  15. * License along with FFmpeg; if not, write to the Free Software
  16. * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
  17. */
  18. #include "formats.h"
  19. #include "vulkan.h"
  20. #include "glslang.h"
  21. /* Generic macro for creating contexts which need to keep their addresses
  22. * if another context is created. */
  23. #define FN_CREATING(ctx, type, shortname, array, num) \
  24. static av_always_inline type *create_ ##shortname(ctx *dctx) \
  25. { \
  26. type **array, *sctx = av_mallocz(sizeof(*sctx)); \
  27. if (!sctx) \
  28. return NULL; \
  29. \
  30. array = av_realloc_array(dctx->array, sizeof(*dctx->array), dctx->num + 1);\
  31. if (!array) { \
  32. av_free(sctx); \
  33. return NULL; \
  34. } \
  35. \
  36. dctx->array = array; \
  37. dctx->array[dctx->num++] = sctx; \
  38. \
  39. return sctx; \
  40. }
  41. const VkComponentMapping ff_comp_identity_map = {
  42. .r = VK_COMPONENT_SWIZZLE_IDENTITY,
  43. .g = VK_COMPONENT_SWIZZLE_IDENTITY,
  44. .b = VK_COMPONENT_SWIZZLE_IDENTITY,
  45. .a = VK_COMPONENT_SWIZZLE_IDENTITY,
  46. };
  47. /* Converts return values to strings */
  48. const char *ff_vk_ret2str(VkResult res)
  49. {
  50. #define CASE(VAL) case VAL: return #VAL
  51. switch (res) {
  52. CASE(VK_SUCCESS);
  53. CASE(VK_NOT_READY);
  54. CASE(VK_TIMEOUT);
  55. CASE(VK_EVENT_SET);
  56. CASE(VK_EVENT_RESET);
  57. CASE(VK_INCOMPLETE);
  58. CASE(VK_ERROR_OUT_OF_HOST_MEMORY);
  59. CASE(VK_ERROR_OUT_OF_DEVICE_MEMORY);
  60. CASE(VK_ERROR_INITIALIZATION_FAILED);
  61. CASE(VK_ERROR_DEVICE_LOST);
  62. CASE(VK_ERROR_MEMORY_MAP_FAILED);
  63. CASE(VK_ERROR_LAYER_NOT_PRESENT);
  64. CASE(VK_ERROR_EXTENSION_NOT_PRESENT);
  65. CASE(VK_ERROR_FEATURE_NOT_PRESENT);
  66. CASE(VK_ERROR_INCOMPATIBLE_DRIVER);
  67. CASE(VK_ERROR_TOO_MANY_OBJECTS);
  68. CASE(VK_ERROR_FORMAT_NOT_SUPPORTED);
  69. CASE(VK_ERROR_FRAGMENTED_POOL);
  70. CASE(VK_ERROR_SURFACE_LOST_KHR);
  71. CASE(VK_ERROR_NATIVE_WINDOW_IN_USE_KHR);
  72. CASE(VK_SUBOPTIMAL_KHR);
  73. CASE(VK_ERROR_OUT_OF_DATE_KHR);
  74. CASE(VK_ERROR_INCOMPATIBLE_DISPLAY_KHR);
  75. CASE(VK_ERROR_VALIDATION_FAILED_EXT);
  76. CASE(VK_ERROR_INVALID_SHADER_NV);
  77. CASE(VK_ERROR_OUT_OF_POOL_MEMORY);
  78. CASE(VK_ERROR_INVALID_EXTERNAL_HANDLE);
  79. CASE(VK_ERROR_NOT_PERMITTED_EXT);
  80. default: return "Unknown error";
  81. }
  82. #undef CASE
  83. }
  84. static int vk_alloc_mem(AVFilterContext *avctx, VkMemoryRequirements *req,
  85. VkMemoryPropertyFlagBits req_flags, void *alloc_extension,
  86. VkMemoryPropertyFlagBits *mem_flags, VkDeviceMemory *mem)
  87. {
  88. VkResult ret;
  89. int index = -1;
  90. VkPhysicalDeviceProperties props;
  91. VkPhysicalDeviceMemoryProperties mprops;
  92. VulkanFilterContext *s = avctx->priv;
  93. VkMemoryAllocateInfo alloc_info = {
  94. .sType = VK_STRUCTURE_TYPE_MEMORY_ALLOCATE_INFO,
  95. .pNext = alloc_extension,
  96. };
  97. vkGetPhysicalDeviceProperties(s->hwctx->phys_dev, &props);
  98. vkGetPhysicalDeviceMemoryProperties(s->hwctx->phys_dev, &mprops);
  99. /* Align if we need to */
  100. if (req_flags & VK_MEMORY_PROPERTY_HOST_VISIBLE_BIT)
  101. req->size = FFALIGN(req->size, props.limits.minMemoryMapAlignment);
  102. alloc_info.allocationSize = req->size;
  103. /* The vulkan spec requires memory types to be sorted in the "optimal"
  104. * order, so the first matching type we find will be the best/fastest one */
  105. for (int i = 0; i < mprops.memoryTypeCount; i++) {
  106. /* The memory type must be supported by the requirements (bitfield) */
  107. if (!(req->memoryTypeBits & (1 << i)))
  108. continue;
  109. /* The memory type flags must include our properties */
  110. if ((mprops.memoryTypes[i].propertyFlags & req_flags) != req_flags)
  111. continue;
  112. /* Found a suitable memory type */
  113. index = i;
  114. break;
  115. }
  116. if (index < 0) {
  117. av_log(avctx, AV_LOG_ERROR, "No memory type found for flags 0x%x\n",
  118. req_flags);
  119. return AVERROR(EINVAL);
  120. }
  121. alloc_info.memoryTypeIndex = index;
  122. ret = vkAllocateMemory(s->hwctx->act_dev, &alloc_info,
  123. s->hwctx->alloc, mem);
  124. if (ret != VK_SUCCESS) {
  125. av_log(avctx, AV_LOG_ERROR, "Failed to allocate memory: %s\n",
  126. ff_vk_ret2str(ret));
  127. return AVERROR(ENOMEM);
  128. }
  129. *mem_flags |= mprops.memoryTypes[index].propertyFlags;
  130. return 0;
  131. }
  132. int ff_vk_create_buf(AVFilterContext *avctx, FFVkBuffer *buf, size_t size,
  133. VkBufferUsageFlags usage, VkMemoryPropertyFlagBits flags)
  134. {
  135. int err;
  136. VkResult ret;
  137. int use_ded_mem;
  138. VulkanFilterContext *s = avctx->priv;
  139. VkBufferCreateInfo buf_spawn = {
  140. .sType = VK_STRUCTURE_TYPE_BUFFER_CREATE_INFO,
  141. .pNext = NULL,
  142. .usage = usage,
  143. .sharingMode = VK_SHARING_MODE_EXCLUSIVE,
  144. .size = size, /* Gets FFALIGNED during alloc if host visible
  145. but should be ok */
  146. };
  147. VkBufferMemoryRequirementsInfo2 req_desc = {
  148. .sType = VK_STRUCTURE_TYPE_BUFFER_MEMORY_REQUIREMENTS_INFO_2,
  149. };
  150. VkMemoryDedicatedAllocateInfo ded_alloc = {
  151. .sType = VK_STRUCTURE_TYPE_MEMORY_DEDICATED_ALLOCATE_INFO,
  152. .pNext = NULL,
  153. };
  154. VkMemoryDedicatedRequirements ded_req = {
  155. .sType = VK_STRUCTURE_TYPE_MEMORY_DEDICATED_REQUIREMENTS,
  156. };
  157. VkMemoryRequirements2 req = {
  158. .sType = VK_STRUCTURE_TYPE_MEMORY_REQUIREMENTS_2,
  159. .pNext = &ded_req,
  160. };
  161. ret = vkCreateBuffer(s->hwctx->act_dev, &buf_spawn, NULL, &buf->buf);
  162. if (ret != VK_SUCCESS) {
  163. av_log(avctx, AV_LOG_ERROR, "Failed to create buffer: %s\n",
  164. ff_vk_ret2str(ret));
  165. return AVERROR_EXTERNAL;
  166. }
  167. req_desc.buffer = buf->buf;
  168. vkGetBufferMemoryRequirements2(s->hwctx->act_dev, &req_desc, &req);
  169. /* In case the implementation prefers/requires dedicated allocation */
  170. use_ded_mem = ded_req.prefersDedicatedAllocation |
  171. ded_req.requiresDedicatedAllocation;
  172. if (use_ded_mem)
  173. ded_alloc.buffer = buf->buf;
  174. err = vk_alloc_mem(avctx, &req.memoryRequirements, flags,
  175. use_ded_mem ? &ded_alloc : (void *)ded_alloc.pNext,
  176. &buf->flags, &buf->mem);
  177. if (err)
  178. return err;
  179. ret = vkBindBufferMemory(s->hwctx->act_dev, buf->buf, buf->mem, 0);
  180. if (ret != VK_SUCCESS) {
  181. av_log(avctx, AV_LOG_ERROR, "Failed to bind memory to buffer: %s\n",
  182. ff_vk_ret2str(ret));
  183. return AVERROR_EXTERNAL;
  184. }
  185. return 0;
  186. }
  187. int ff_vk_map_buffers(AVFilterContext *avctx, FFVkBuffer *buf, uint8_t *mem[],
  188. int nb_buffers, int invalidate)
  189. {
  190. VkResult ret;
  191. VulkanFilterContext *s = avctx->priv;
  192. VkMappedMemoryRange *inval_list = NULL;
  193. int inval_count = 0;
  194. for (int i = 0; i < nb_buffers; i++) {
  195. ret = vkMapMemory(s->hwctx->act_dev, buf[i].mem, 0,
  196. VK_WHOLE_SIZE, 0, (void **)&mem[i]);
  197. if (ret != VK_SUCCESS) {
  198. av_log(avctx, AV_LOG_ERROR, "Failed to map buffer memory: %s\n",
  199. ff_vk_ret2str(ret));
  200. return AVERROR_EXTERNAL;
  201. }
  202. }
  203. if (!invalidate)
  204. return 0;
  205. for (int i = 0; i < nb_buffers; i++) {
  206. const VkMappedMemoryRange ival_buf = {
  207. .sType = VK_STRUCTURE_TYPE_MAPPED_MEMORY_RANGE,
  208. .memory = buf[i].mem,
  209. .size = VK_WHOLE_SIZE,
  210. };
  211. if (buf[i].flags & VK_MEMORY_PROPERTY_HOST_COHERENT_BIT)
  212. continue;
  213. inval_list = av_fast_realloc(s->scratch, &s->scratch_size,
  214. (++inval_count)*sizeof(*inval_list));
  215. if (!inval_list)
  216. return AVERROR(ENOMEM);
  217. inval_list[inval_count - 1] = ival_buf;
  218. }
  219. if (inval_count) {
  220. ret = vkInvalidateMappedMemoryRanges(s->hwctx->act_dev, inval_count,
  221. inval_list);
  222. if (ret != VK_SUCCESS) {
  223. av_log(avctx, AV_LOG_ERROR, "Failed to invalidate memory: %s\n",
  224. ff_vk_ret2str(ret));
  225. return AVERROR_EXTERNAL;
  226. }
  227. }
  228. return 0;
  229. }
  230. int ff_vk_unmap_buffers(AVFilterContext *avctx, FFVkBuffer *buf, int nb_buffers,
  231. int flush)
  232. {
  233. int err = 0;
  234. VkResult ret;
  235. VulkanFilterContext *s = avctx->priv;
  236. VkMappedMemoryRange *flush_list = NULL;
  237. int flush_count = 0;
  238. if (flush) {
  239. for (int i = 0; i < nb_buffers; i++) {
  240. const VkMappedMemoryRange flush_buf = {
  241. .sType = VK_STRUCTURE_TYPE_MAPPED_MEMORY_RANGE,
  242. .memory = buf[i].mem,
  243. .size = VK_WHOLE_SIZE,
  244. };
  245. if (buf[i].flags & VK_MEMORY_PROPERTY_HOST_COHERENT_BIT)
  246. continue;
  247. flush_list = av_fast_realloc(s->scratch, &s->scratch_size,
  248. (++flush_count)*sizeof(*flush_list));
  249. if (!flush_list)
  250. return AVERROR(ENOMEM);
  251. flush_list[flush_count - 1] = flush_buf;
  252. }
  253. }
  254. if (flush_count) {
  255. ret = vkFlushMappedMemoryRanges(s->hwctx->act_dev, flush_count,
  256. flush_list);
  257. if (ret != VK_SUCCESS) {
  258. av_log(avctx, AV_LOG_ERROR, "Failed to flush memory: %s\n",
  259. ff_vk_ret2str(ret));
  260. err = AVERROR_EXTERNAL; /* We still want to try to unmap them */
  261. }
  262. }
  263. for (int i = 0; i < nb_buffers; i++)
  264. vkUnmapMemory(s->hwctx->act_dev, buf[i].mem);
  265. return err;
  266. }
  267. void ff_vk_free_buf(AVFilterContext *avctx, FFVkBuffer *buf)
  268. {
  269. VulkanFilterContext *s = avctx->priv;
  270. if (!buf)
  271. return;
  272. if (buf->buf != VK_NULL_HANDLE)
  273. vkDestroyBuffer(s->hwctx->act_dev, buf->buf, s->hwctx->alloc);
  274. if (buf->mem != VK_NULL_HANDLE)
  275. vkFreeMemory(s->hwctx->act_dev, buf->mem, s->hwctx->alloc);
  276. }
  277. int ff_vk_add_push_constant(AVFilterContext *avctx, VulkanPipeline *pl,
  278. int offset, int size, VkShaderStageFlagBits stage)
  279. {
  280. VkPushConstantRange *pc;
  281. pl->push_consts = av_realloc_array(pl->push_consts, sizeof(*pl->push_consts),
  282. pl->push_consts_num + 1);
  283. if (!pl->push_consts)
  284. return AVERROR(ENOMEM);
  285. pc = &pl->push_consts[pl->push_consts_num++];
  286. memset(pc, 0, sizeof(*pc));
  287. pc->stageFlags = stage;
  288. pc->offset = offset;
  289. pc->size = size;
  290. return 0;
  291. }
  292. FN_CREATING(VulkanFilterContext, FFVkExecContext, exec_ctx, exec_ctx, exec_ctx_num)
  293. int ff_vk_create_exec_ctx(AVFilterContext *avctx, FFVkExecContext **ctx)
  294. {
  295. VkResult ret;
  296. FFVkExecContext *e;
  297. VulkanFilterContext *s = avctx->priv;
  298. int queue_family = s->queue_family_idx;
  299. int nb_queues = s->queue_count;
  300. VkCommandPoolCreateInfo cqueue_create = {
  301. .sType = VK_STRUCTURE_TYPE_COMMAND_POOL_CREATE_INFO,
  302. .flags = VK_COMMAND_POOL_CREATE_RESET_COMMAND_BUFFER_BIT,
  303. .queueFamilyIndex = queue_family,
  304. };
  305. VkCommandBufferAllocateInfo cbuf_create = {
  306. .sType = VK_STRUCTURE_TYPE_COMMAND_BUFFER_ALLOCATE_INFO,
  307. .level = VK_COMMAND_BUFFER_LEVEL_PRIMARY,
  308. .commandBufferCount = nb_queues,
  309. };
  310. e = create_exec_ctx(s);
  311. if (!e)
  312. return AVERROR(ENOMEM);
  313. e->queues = av_mallocz(nb_queues * sizeof(*e->queues));
  314. if (!e->queues)
  315. return AVERROR(ENOMEM);
  316. e->bufs = av_mallocz(nb_queues * sizeof(*e->bufs));
  317. if (!e->bufs)
  318. return AVERROR(ENOMEM);
  319. /* Create command pool */
  320. ret = vkCreateCommandPool(s->hwctx->act_dev, &cqueue_create,
  321. s->hwctx->alloc, &e->pool);
  322. if (ret != VK_SUCCESS) {
  323. av_log(avctx, AV_LOG_ERROR, "Command pool creation failure: %s\n",
  324. ff_vk_ret2str(ret));
  325. return AVERROR_EXTERNAL;
  326. }
  327. cbuf_create.commandPool = e->pool;
  328. /* Allocate command buffer */
  329. ret = vkAllocateCommandBuffers(s->hwctx->act_dev, &cbuf_create, e->bufs);
  330. if (ret != VK_SUCCESS) {
  331. av_log(avctx, AV_LOG_ERROR, "Command buffer alloc failure: %s\n",
  332. ff_vk_ret2str(ret));
  333. return AVERROR_EXTERNAL;
  334. }
  335. for (int i = 0; i < nb_queues; i++) {
  336. FFVkQueueCtx *q = &e->queues[i];
  337. vkGetDeviceQueue(s->hwctx->act_dev, queue_family, i, &q->queue);
  338. }
  339. *ctx = e;
  340. return 0;
  341. }
  342. void ff_vk_discard_exec_deps(AVFilterContext *avctx, FFVkExecContext *e)
  343. {
  344. VulkanFilterContext *s = avctx->priv;
  345. FFVkQueueCtx *q = &e->queues[s->cur_queue_idx];
  346. for (int j = 0; j < q->nb_buf_deps; j++)
  347. av_buffer_unref(&q->buf_deps[j]);
  348. q->nb_buf_deps = 0;
  349. for (int j = 0; j < q->nb_frame_deps; j++)
  350. av_frame_free(&q->frame_deps[j]);
  351. q->nb_frame_deps = 0;
  352. e->sem_wait_cnt = 0;
  353. e->sem_sig_cnt = 0;
  354. }
  355. int ff_vk_start_exec_recording(AVFilterContext *avctx, FFVkExecContext *e)
  356. {
  357. VkResult ret;
  358. VulkanFilterContext *s = avctx->priv;
  359. FFVkQueueCtx *q = &e->queues[s->cur_queue_idx];
  360. VkCommandBufferBeginInfo cmd_start = {
  361. .sType = VK_STRUCTURE_TYPE_COMMAND_BUFFER_BEGIN_INFO,
  362. .flags = VK_COMMAND_BUFFER_USAGE_ONE_TIME_SUBMIT_BIT,
  363. };
  364. /* Create the fence and don't wait for it initially */
  365. if (!q->fence) {
  366. VkFenceCreateInfo fence_spawn = {
  367. .sType = VK_STRUCTURE_TYPE_FENCE_CREATE_INFO,
  368. };
  369. ret = vkCreateFence(s->hwctx->act_dev, &fence_spawn, s->hwctx->alloc,
  370. &q->fence);
  371. if (ret != VK_SUCCESS) {
  372. av_log(avctx, AV_LOG_ERROR, "Failed to queue frame fence: %s\n",
  373. ff_vk_ret2str(ret));
  374. return AVERROR_EXTERNAL;
  375. }
  376. } else {
  377. vkWaitForFences(s->hwctx->act_dev, 1, &q->fence, VK_TRUE, UINT64_MAX);
  378. vkResetFences(s->hwctx->act_dev, 1, &q->fence);
  379. }
  380. /* Discard queue dependencies */
  381. ff_vk_discard_exec_deps(avctx, e);
  382. ret = vkBeginCommandBuffer(e->bufs[s->cur_queue_idx], &cmd_start);
  383. if (ret != VK_SUCCESS) {
  384. av_log(avctx, AV_LOG_ERROR, "Failed to start command recoding: %s\n",
  385. ff_vk_ret2str(ret));
  386. return AVERROR_EXTERNAL;
  387. }
  388. return 0;
  389. }
  390. VkCommandBuffer ff_vk_get_exec_buf(AVFilterContext *avctx, FFVkExecContext *e)
  391. {
  392. VulkanFilterContext *s = avctx->priv;
  393. return e->bufs[s->cur_queue_idx];
  394. }
  395. int ff_vk_add_exec_dep(AVFilterContext *avctx, FFVkExecContext *e,
  396. AVFrame *frame, VkPipelineStageFlagBits in_wait_dst_flag)
  397. {
  398. AVFrame **dst;
  399. VulkanFilterContext *s = avctx->priv;
  400. AVVkFrame *f = (AVVkFrame *)frame->data[0];
  401. FFVkQueueCtx *q = &e->queues[s->cur_queue_idx];
  402. AVHWFramesContext *fc = (AVHWFramesContext *)frame->hw_frames_ctx->data;
  403. int planes = av_pix_fmt_count_planes(fc->sw_format);
  404. for (int i = 0; i < planes; i++) {
  405. e->sem_wait = av_fast_realloc(e->sem_wait, &e->sem_wait_alloc,
  406. (e->sem_wait_cnt + 1)*sizeof(*e->sem_wait));
  407. if (!e->sem_wait) {
  408. ff_vk_discard_exec_deps(avctx, e);
  409. return AVERROR(ENOMEM);
  410. }
  411. e->sem_wait_dst = av_fast_realloc(e->sem_wait_dst, &e->sem_wait_dst_alloc,
  412. (e->sem_wait_cnt + 1)*sizeof(*e->sem_wait_dst));
  413. if (!e->sem_wait_dst) {
  414. ff_vk_discard_exec_deps(avctx, e);
  415. return AVERROR(ENOMEM);
  416. }
  417. e->sem_sig = av_fast_realloc(e->sem_sig, &e->sem_sig_alloc,
  418. (e->sem_sig_cnt + 1)*sizeof(*e->sem_sig));
  419. if (!e->sem_sig) {
  420. ff_vk_discard_exec_deps(avctx, e);
  421. return AVERROR(ENOMEM);
  422. }
  423. e->sem_wait[e->sem_wait_cnt] = f->sem[i];
  424. e->sem_wait_dst[e->sem_wait_cnt] = in_wait_dst_flag;
  425. e->sem_wait_cnt++;
  426. e->sem_sig[e->sem_sig_cnt] = f->sem[i];
  427. e->sem_sig_cnt++;
  428. }
  429. dst = av_fast_realloc(q->frame_deps, &q->frame_deps_alloc_size,
  430. (q->nb_frame_deps + 1) * sizeof(*dst));
  431. if (!dst) {
  432. ff_vk_discard_exec_deps(avctx, e);
  433. return AVERROR(ENOMEM);
  434. }
  435. q->frame_deps = dst;
  436. q->frame_deps[q->nb_frame_deps] = av_frame_clone(frame);
  437. if (!q->frame_deps[q->nb_frame_deps]) {
  438. ff_vk_discard_exec_deps(avctx, e);
  439. return AVERROR(ENOMEM);
  440. }
  441. q->nb_frame_deps++;
  442. return 0;
  443. }
  444. int ff_vk_submit_exec_queue(AVFilterContext *avctx, FFVkExecContext *e)
  445. {
  446. VkResult ret;
  447. VulkanFilterContext *s = avctx->priv;
  448. FFVkQueueCtx *q = &e->queues[s->cur_queue_idx];
  449. VkSubmitInfo s_info = {
  450. .sType = VK_STRUCTURE_TYPE_SUBMIT_INFO,
  451. .commandBufferCount = 1,
  452. .pCommandBuffers = &e->bufs[s->cur_queue_idx],
  453. .pWaitSemaphores = e->sem_wait,
  454. .pWaitDstStageMask = e->sem_wait_dst,
  455. .waitSemaphoreCount = e->sem_wait_cnt,
  456. .pSignalSemaphores = e->sem_sig,
  457. .signalSemaphoreCount = e->sem_sig_cnt,
  458. };
  459. ret = vkEndCommandBuffer(e->bufs[s->cur_queue_idx]);
  460. if (ret != VK_SUCCESS) {
  461. av_log(avctx, AV_LOG_ERROR, "Unable to finish command buffer: %s\n",
  462. ff_vk_ret2str(ret));
  463. return AVERROR_EXTERNAL;
  464. }
  465. ret = vkQueueSubmit(q->queue, 1, &s_info, q->fence);
  466. if (ret != VK_SUCCESS) {
  467. av_log(avctx, AV_LOG_ERROR, "Unable to submit command buffer: %s\n",
  468. ff_vk_ret2str(ret));
  469. return AVERROR_EXTERNAL;
  470. }
  471. /* Rotate queues */
  472. s->cur_queue_idx = (s->cur_queue_idx + 1) % s->queue_count;
  473. return 0;
  474. }
  475. int ff_vk_add_dep_exec_ctx(AVFilterContext *avctx, FFVkExecContext *e,
  476. AVBufferRef **deps, int nb_deps)
  477. {
  478. AVBufferRef **dst;
  479. VulkanFilterContext *s = avctx->priv;
  480. FFVkQueueCtx *q = &e->queues[s->cur_queue_idx];
  481. if (!deps || !nb_deps)
  482. return 0;
  483. dst = av_fast_realloc(q->buf_deps, &q->buf_deps_alloc_size,
  484. (q->nb_buf_deps + nb_deps) * sizeof(*dst));
  485. if (!dst)
  486. goto err;
  487. q->buf_deps = dst;
  488. for (int i = 0; i < nb_deps; i++) {
  489. q->buf_deps[q->nb_buf_deps] = deps[i];
  490. if (!q->buf_deps[q->nb_buf_deps])
  491. goto err;
  492. q->nb_buf_deps++;
  493. }
  494. return 0;
  495. err:
  496. ff_vk_discard_exec_deps(avctx, e);
  497. return AVERROR(ENOMEM);
  498. }
  499. int ff_vk_filter_query_formats(AVFilterContext *avctx)
  500. {
  501. static const enum AVPixelFormat pixel_formats[] = {
  502. AV_PIX_FMT_VULKAN, AV_PIX_FMT_NONE,
  503. };
  504. AVFilterFormats *pix_fmts = ff_make_format_list(pixel_formats);
  505. if (!pix_fmts)
  506. return AVERROR(ENOMEM);
  507. return ff_set_common_formats(avctx, pix_fmts);
  508. }
  509. static int vulkan_filter_set_device(AVFilterContext *avctx,
  510. AVBufferRef *device)
  511. {
  512. VulkanFilterContext *s = avctx->priv;
  513. av_buffer_unref(&s->device_ref);
  514. s->device_ref = av_buffer_ref(device);
  515. if (!s->device_ref)
  516. return AVERROR(ENOMEM);
  517. s->device = (AVHWDeviceContext*)s->device_ref->data;
  518. s->hwctx = s->device->hwctx;
  519. return 0;
  520. }
  521. static int vulkan_filter_set_frames(AVFilterContext *avctx,
  522. AVBufferRef *frames)
  523. {
  524. VulkanFilterContext *s = avctx->priv;
  525. av_buffer_unref(&s->frames_ref);
  526. s->frames_ref = av_buffer_ref(frames);
  527. if (!s->frames_ref)
  528. return AVERROR(ENOMEM);
  529. return 0;
  530. }
  531. int ff_vk_filter_config_input(AVFilterLink *inlink)
  532. {
  533. int err;
  534. AVFilterContext *avctx = inlink->dst;
  535. VulkanFilterContext *s = avctx->priv;
  536. AVHWFramesContext *input_frames;
  537. if (!inlink->hw_frames_ctx) {
  538. av_log(avctx, AV_LOG_ERROR, "Vulkan filtering requires a "
  539. "hardware frames context on the input.\n");
  540. return AVERROR(EINVAL);
  541. }
  542. /* Extract the device and default output format from the first input. */
  543. if (avctx->inputs[0] != inlink)
  544. return 0;
  545. input_frames = (AVHWFramesContext*)inlink->hw_frames_ctx->data;
  546. if (input_frames->format != AV_PIX_FMT_VULKAN)
  547. return AVERROR(EINVAL);
  548. err = vulkan_filter_set_device(avctx, input_frames->device_ref);
  549. if (err < 0)
  550. return err;
  551. err = vulkan_filter_set_frames(avctx, inlink->hw_frames_ctx);
  552. if (err < 0)
  553. return err;
  554. /* Default output parameters match input parameters. */
  555. s->input_format = input_frames->sw_format;
  556. if (s->output_format == AV_PIX_FMT_NONE)
  557. s->output_format = input_frames->sw_format;
  558. if (!s->output_width)
  559. s->output_width = inlink->w;
  560. if (!s->output_height)
  561. s->output_height = inlink->h;
  562. return 0;
  563. }
  564. int ff_vk_filter_config_output_inplace(AVFilterLink *outlink)
  565. {
  566. int err;
  567. AVFilterContext *avctx = outlink->src;
  568. VulkanFilterContext *s = avctx->priv;
  569. av_buffer_unref(&outlink->hw_frames_ctx);
  570. if (!s->device_ref) {
  571. if (!avctx->hw_device_ctx) {
  572. av_log(avctx, AV_LOG_ERROR, "Vulkan filtering requires a "
  573. "Vulkan device.\n");
  574. return AVERROR(EINVAL);
  575. }
  576. err = vulkan_filter_set_device(avctx, avctx->hw_device_ctx);
  577. if (err < 0)
  578. return err;
  579. }
  580. outlink->hw_frames_ctx = av_buffer_ref(s->frames_ref);
  581. if (!outlink->hw_frames_ctx)
  582. return AVERROR(ENOMEM);
  583. outlink->w = s->output_width;
  584. outlink->h = s->output_height;
  585. return 0;
  586. }
  587. int ff_vk_filter_config_output(AVFilterLink *outlink)
  588. {
  589. int err;
  590. AVFilterContext *avctx = outlink->src;
  591. VulkanFilterContext *s = avctx->priv;
  592. AVBufferRef *output_frames_ref;
  593. AVHWFramesContext *output_frames;
  594. av_buffer_unref(&outlink->hw_frames_ctx);
  595. if (!s->device_ref) {
  596. if (!avctx->hw_device_ctx) {
  597. av_log(avctx, AV_LOG_ERROR, "Vulkan filtering requires a "
  598. "Vulkan device.\n");
  599. return AVERROR(EINVAL);
  600. }
  601. err = vulkan_filter_set_device(avctx, avctx->hw_device_ctx);
  602. if (err < 0)
  603. return err;
  604. }
  605. output_frames_ref = av_hwframe_ctx_alloc(s->device_ref);
  606. if (!output_frames_ref) {
  607. err = AVERROR(ENOMEM);
  608. goto fail;
  609. }
  610. output_frames = (AVHWFramesContext*)output_frames_ref->data;
  611. output_frames->format = AV_PIX_FMT_VULKAN;
  612. output_frames->sw_format = s->output_format;
  613. output_frames->width = s->output_width;
  614. output_frames->height = s->output_height;
  615. err = av_hwframe_ctx_init(output_frames_ref);
  616. if (err < 0) {
  617. av_log(avctx, AV_LOG_ERROR, "Failed to initialise output "
  618. "frames: %d.\n", err);
  619. goto fail;
  620. }
  621. outlink->hw_frames_ctx = output_frames_ref;
  622. outlink->w = s->output_width;
  623. outlink->h = s->output_height;
  624. return 0;
  625. fail:
  626. av_buffer_unref(&output_frames_ref);
  627. return err;
  628. }
  629. int ff_vk_filter_init(AVFilterContext *avctx)
  630. {
  631. VulkanFilterContext *s = avctx->priv;
  632. s->output_format = AV_PIX_FMT_NONE;
  633. if (glslang_init())
  634. return AVERROR_EXTERNAL;
  635. return 0;
  636. }
  637. FN_CREATING(VulkanFilterContext, VkSampler, sampler, samplers, samplers_num)
  638. VkSampler *ff_vk_init_sampler(AVFilterContext *avctx, int unnorm_coords,
  639. VkFilter filt)
  640. {
  641. VkResult ret;
  642. VulkanFilterContext *s = avctx->priv;
  643. VkSamplerCreateInfo sampler_info = {
  644. .sType = VK_STRUCTURE_TYPE_SAMPLER_CREATE_INFO,
  645. .magFilter = filt,
  646. .minFilter = sampler_info.magFilter,
  647. .mipmapMode = unnorm_coords ? VK_SAMPLER_MIPMAP_MODE_NEAREST :
  648. VK_SAMPLER_MIPMAP_MODE_LINEAR,
  649. .addressModeU = VK_SAMPLER_ADDRESS_MODE_CLAMP_TO_EDGE,
  650. .addressModeV = sampler_info.addressModeU,
  651. .addressModeW = sampler_info.addressModeU,
  652. .anisotropyEnable = VK_FALSE,
  653. .compareOp = VK_COMPARE_OP_NEVER,
  654. .borderColor = VK_BORDER_COLOR_FLOAT_TRANSPARENT_BLACK,
  655. .unnormalizedCoordinates = unnorm_coords,
  656. };
  657. VkSampler *sampler = create_sampler(s);
  658. if (!sampler)
  659. return NULL;
  660. ret = vkCreateSampler(s->hwctx->act_dev, &sampler_info,
  661. s->hwctx->alloc, sampler);
  662. if (ret != VK_SUCCESS) {
  663. av_log(avctx, AV_LOG_ERROR, "Unable to init sampler: %s\n",
  664. ff_vk_ret2str(ret));
  665. return NULL;
  666. }
  667. return sampler;
  668. }
  669. int ff_vk_mt_is_np_rgb(enum AVPixelFormat pix_fmt)
  670. {
  671. if (pix_fmt == AV_PIX_FMT_ABGR || pix_fmt == AV_PIX_FMT_BGRA ||
  672. pix_fmt == AV_PIX_FMT_RGBA || pix_fmt == AV_PIX_FMT_RGB24 ||
  673. pix_fmt == AV_PIX_FMT_BGR24 || pix_fmt == AV_PIX_FMT_RGB48 ||
  674. pix_fmt == AV_PIX_FMT_RGBA64 || pix_fmt == AV_PIX_FMT_RGB565 ||
  675. pix_fmt == AV_PIX_FMT_BGR565 || pix_fmt == AV_PIX_FMT_BGR0 ||
  676. pix_fmt == AV_PIX_FMT_0BGR || pix_fmt == AV_PIX_FMT_RGB0)
  677. return 1;
  678. return 0;
  679. }
  680. const char *ff_vk_shader_rep_fmt(enum AVPixelFormat pixfmt)
  681. {
  682. const AVPixFmtDescriptor *desc = av_pix_fmt_desc_get(pixfmt);
  683. const int high = desc->comp[0].depth > 8;
  684. return high ? "rgba16f" : "rgba8";
  685. }
  686. typedef struct ImageViewCtx {
  687. VkImageView view;
  688. } ImageViewCtx;
  689. static void destroy_imageview(void *opaque, uint8_t *data)
  690. {
  691. VulkanFilterContext *s = opaque;
  692. ImageViewCtx *iv = (ImageViewCtx *)data;
  693. vkDestroyImageView(s->hwctx->act_dev, iv->view, s->hwctx->alloc);
  694. av_free(iv);
  695. }
  696. int ff_vk_create_imageview(AVFilterContext *avctx, FFVkExecContext *e,
  697. VkImageView *v, VkImage img, VkFormat fmt,
  698. const VkComponentMapping map)
  699. {
  700. int err;
  701. AVBufferRef *buf;
  702. VulkanFilterContext *s = avctx->priv;
  703. VkImageViewCreateInfo imgview_spawn = {
  704. .sType = VK_STRUCTURE_TYPE_IMAGE_VIEW_CREATE_INFO,
  705. .pNext = NULL,
  706. .image = img,
  707. .viewType = VK_IMAGE_VIEW_TYPE_2D,
  708. .format = fmt,
  709. .components = map,
  710. .subresourceRange = {
  711. .aspectMask = VK_IMAGE_ASPECT_COLOR_BIT,
  712. .baseMipLevel = 0,
  713. .levelCount = 1,
  714. .baseArrayLayer = 0,
  715. .layerCount = 1,
  716. },
  717. };
  718. ImageViewCtx *iv = av_mallocz(sizeof(*iv));
  719. VkResult ret = vkCreateImageView(s->hwctx->act_dev, &imgview_spawn,
  720. s->hwctx->alloc, &iv->view);
  721. if (ret != VK_SUCCESS) {
  722. av_log(avctx, AV_LOG_ERROR, "Failed to create imageview: %s\n",
  723. ff_vk_ret2str(ret));
  724. return AVERROR_EXTERNAL;
  725. }
  726. buf = av_buffer_create((uint8_t *)iv, sizeof(*iv), destroy_imageview, s, 0);
  727. if (!buf) {
  728. destroy_imageview(s, (uint8_t *)iv);
  729. return AVERROR(ENOMEM);
  730. }
  731. /* Add to queue dependencies */
  732. err = ff_vk_add_dep_exec_ctx(avctx, e, &buf, 1);
  733. if (err) {
  734. av_buffer_unref(&buf);
  735. return err;
  736. }
  737. *v = iv->view;
  738. return 0;
  739. }
  740. FN_CREATING(VulkanPipeline, SPIRVShader, shader, shaders, shaders_num)
  741. SPIRVShader *ff_vk_init_shader(AVFilterContext *avctx, VulkanPipeline *pl,
  742. const char *name, VkShaderStageFlags stage)
  743. {
  744. SPIRVShader *shd = create_shader(pl);
  745. if (!shd)
  746. return NULL;
  747. av_bprint_init(&shd->src, 0, AV_BPRINT_SIZE_UNLIMITED);
  748. shd->shader.sType = VK_STRUCTURE_TYPE_PIPELINE_SHADER_STAGE_CREATE_INFO;
  749. shd->shader.stage = stage;
  750. shd->name = name;
  751. GLSLF(0, #version %i ,460);
  752. GLSLC(0, #define IS_WITHIN(v1, v2) ((v1.x < v2.x) && (v1.y < v2.y)) );
  753. GLSLC(0, );
  754. return shd;
  755. }
  756. void ff_vk_set_compute_shader_sizes(AVFilterContext *avctx, SPIRVShader *shd,
  757. int local_size[3])
  758. {
  759. shd->local_size[0] = local_size[0];
  760. shd->local_size[1] = local_size[1];
  761. shd->local_size[2] = local_size[2];
  762. av_bprintf(&shd->src, "layout (local_size_x = %i, "
  763. "local_size_y = %i, local_size_z = %i) in;\n\n",
  764. shd->local_size[0], shd->local_size[1], shd->local_size[2]);
  765. }
  766. static void print_shader(AVFilterContext *avctx, SPIRVShader *shd, int prio)
  767. {
  768. int line = 0;
  769. const char *p = shd->src.str;
  770. const char *start = p;
  771. AVBPrint buf;
  772. av_bprint_init(&buf, 0, AV_BPRINT_SIZE_UNLIMITED);
  773. for (int i = 0; i < strlen(p); i++) {
  774. if (p[i] == '\n') {
  775. av_bprintf(&buf, "%i\t", ++line);
  776. av_bprint_append_data(&buf, start, &p[i] - start + 1);
  777. start = &p[i + 1];
  778. }
  779. }
  780. av_log(avctx, prio, "Shader %s: \n%s", shd->name, buf.str);
  781. av_bprint_finalize(&buf, NULL);
  782. }
  783. int ff_vk_compile_shader(AVFilterContext *avctx, SPIRVShader *shd,
  784. const char *entrypoint)
  785. {
  786. VkResult ret;
  787. VulkanFilterContext *s = avctx->priv;
  788. VkShaderModuleCreateInfo shader_create;
  789. GLSlangResult *res;
  790. static const enum GLSlangStage emap[] = {
  791. [VK_SHADER_STAGE_VERTEX_BIT] = GLSLANG_VERTEX,
  792. [VK_SHADER_STAGE_FRAGMENT_BIT] = GLSLANG_FRAGMENT,
  793. [VK_SHADER_STAGE_COMPUTE_BIT] = GLSLANG_COMPUTE,
  794. };
  795. shd->shader.pName = entrypoint;
  796. res = glslang_compile(shd->src.str, emap[shd->shader.stage]);
  797. if (!res)
  798. return AVERROR(ENOMEM);
  799. if (res->rval) {
  800. av_log(avctx, AV_LOG_ERROR, "Error compiling shader %s: %s!\n",
  801. shd->name, av_err2str(res->rval));
  802. print_shader(avctx, shd, AV_LOG_ERROR);
  803. if (res->error_msg)
  804. av_log(avctx, AV_LOG_ERROR, "%s", res->error_msg);
  805. av_free(res->error_msg);
  806. return res->rval;
  807. }
  808. print_shader(avctx, shd, AV_LOG_VERBOSE);
  809. shader_create.sType = VK_STRUCTURE_TYPE_SHADER_MODULE_CREATE_INFO;
  810. shader_create.pNext = NULL;
  811. shader_create.codeSize = res->size;
  812. shader_create.flags = 0;
  813. shader_create.pCode = res->data;
  814. ret = vkCreateShaderModule(s->hwctx->act_dev, &shader_create, NULL,
  815. &shd->shader.module);
  816. /* Free the GLSlangResult struct */
  817. av_free(res->data);
  818. av_free(res);
  819. if (ret != VK_SUCCESS) {
  820. av_log(avctx, AV_LOG_ERROR, "Unable to create shader module: %s\n",
  821. ff_vk_ret2str(ret));
  822. return AVERROR_EXTERNAL;
  823. }
  824. av_log(avctx, AV_LOG_VERBOSE, "Shader %s linked! Size: %zu bytes\n",
  825. shd->name, shader_create.codeSize);
  826. return 0;
  827. }
  828. static const struct descriptor_props {
  829. size_t struct_size; /* Size of the opaque which updates the descriptor */
  830. const char *type;
  831. int is_uniform;
  832. int mem_quali; /* Can use a memory qualifier */
  833. int dim_needed; /* Must indicate dimension */
  834. int buf_content; /* Must indicate buffer contents */
  835. } descriptor_props[] = {
  836. [VK_DESCRIPTOR_TYPE_SAMPLER] = { sizeof(VkDescriptorImageInfo), "sampler", 1, 0, 0, 0, },
  837. [VK_DESCRIPTOR_TYPE_SAMPLED_IMAGE] = { sizeof(VkDescriptorImageInfo), "texture", 1, 0, 1, 0, },
  838. [VK_DESCRIPTOR_TYPE_STORAGE_IMAGE] = { sizeof(VkDescriptorImageInfo), "image", 1, 1, 1, 0, },
  839. [VK_DESCRIPTOR_TYPE_INPUT_ATTACHMENT] = { sizeof(VkDescriptorImageInfo), "subpassInput", 1, 0, 0, 0, },
  840. [VK_DESCRIPTOR_TYPE_COMBINED_IMAGE_SAMPLER] = { sizeof(VkDescriptorImageInfo), "sampler", 1, 0, 1, 0, },
  841. [VK_DESCRIPTOR_TYPE_UNIFORM_BUFFER] = { sizeof(VkDescriptorBufferInfo), NULL, 1, 0, 0, 1, },
  842. [VK_DESCRIPTOR_TYPE_STORAGE_BUFFER] = { sizeof(VkDescriptorBufferInfo), "buffer", 0, 1, 0, 1, },
  843. [VK_DESCRIPTOR_TYPE_UNIFORM_BUFFER_DYNAMIC] = { sizeof(VkDescriptorBufferInfo), NULL, 1, 0, 0, 1, },
  844. [VK_DESCRIPTOR_TYPE_STORAGE_BUFFER_DYNAMIC] = { sizeof(VkDescriptorBufferInfo), "buffer", 0, 1, 0, 1, },
  845. [VK_DESCRIPTOR_TYPE_UNIFORM_TEXEL_BUFFER] = { sizeof(VkBufferView), "samplerBuffer", 1, 0, 0, 0, },
  846. [VK_DESCRIPTOR_TYPE_STORAGE_TEXEL_BUFFER] = { sizeof(VkBufferView), "imageBuffer", 1, 0, 0, 0, },
  847. };
  848. int ff_vk_add_descriptor_set(AVFilterContext *avctx, VulkanPipeline *pl,
  849. SPIRVShader *shd, VulkanDescriptorSetBinding *desc,
  850. int num, int only_print_to_shader)
  851. {
  852. VkResult ret;
  853. VkDescriptorSetLayout *layout;
  854. VulkanFilterContext *s = avctx->priv;
  855. if (only_print_to_shader)
  856. goto print;
  857. pl->desc_layout = av_realloc_array(pl->desc_layout, sizeof(*pl->desc_layout),
  858. pl->desc_layout_num + 1);
  859. if (!pl->desc_layout)
  860. return AVERROR(ENOMEM);
  861. layout = &pl->desc_layout[pl->desc_layout_num];
  862. memset(layout, 0, sizeof(*layout));
  863. { /* Create descriptor set layout descriptions */
  864. VkDescriptorSetLayoutCreateInfo desc_create_layout = { 0 };
  865. VkDescriptorSetLayoutBinding *desc_binding;
  866. desc_binding = av_mallocz(sizeof(*desc_binding)*num);
  867. if (!desc_binding)
  868. return AVERROR(ENOMEM);
  869. for (int i = 0; i < num; i++) {
  870. desc_binding[i].binding = i;
  871. desc_binding[i].descriptorType = desc[i].type;
  872. desc_binding[i].descriptorCount = FFMAX(desc[i].elems, 1);
  873. desc_binding[i].stageFlags = desc[i].stages;
  874. desc_binding[i].pImmutableSamplers = desc[i].samplers;
  875. }
  876. desc_create_layout.sType = VK_STRUCTURE_TYPE_DESCRIPTOR_SET_LAYOUT_CREATE_INFO;
  877. desc_create_layout.pBindings = desc_binding;
  878. desc_create_layout.bindingCount = num;
  879. ret = vkCreateDescriptorSetLayout(s->hwctx->act_dev, &desc_create_layout,
  880. s->hwctx->alloc, layout);
  881. av_free(desc_binding);
  882. if (ret != VK_SUCCESS) {
  883. av_log(avctx, AV_LOG_ERROR, "Unable to init descriptor set "
  884. "layout: %s\n", ff_vk_ret2str(ret));
  885. return AVERROR_EXTERNAL;
  886. }
  887. }
  888. { /* Pool each descriptor by type and update pool counts */
  889. for (int i = 0; i < num; i++) {
  890. int j;
  891. for (j = 0; j < pl->pool_size_desc_num; j++)
  892. if (pl->pool_size_desc[j].type == desc[i].type)
  893. break;
  894. if (j >= pl->pool_size_desc_num) {
  895. pl->pool_size_desc = av_realloc_array(pl->pool_size_desc,
  896. sizeof(*pl->pool_size_desc),
  897. ++pl->pool_size_desc_num);
  898. if (!pl->pool_size_desc)
  899. return AVERROR(ENOMEM);
  900. memset(&pl->pool_size_desc[j], 0, sizeof(VkDescriptorPoolSize));
  901. }
  902. pl->pool_size_desc[j].type = desc[i].type;
  903. pl->pool_size_desc[j].descriptorCount += FFMAX(desc[i].elems, 1);
  904. }
  905. }
  906. { /* Create template creation struct */
  907. VkDescriptorUpdateTemplateCreateInfo *dt;
  908. VkDescriptorUpdateTemplateEntry *des_entries;
  909. /* Freed after descriptor set initialization */
  910. des_entries = av_mallocz(num*sizeof(VkDescriptorUpdateTemplateEntry));
  911. if (!des_entries)
  912. return AVERROR(ENOMEM);
  913. for (int i = 0; i < num; i++) {
  914. des_entries[i].dstBinding = i;
  915. des_entries[i].descriptorType = desc[i].type;
  916. des_entries[i].descriptorCount = FFMAX(desc[i].elems, 1);
  917. des_entries[i].dstArrayElement = 0;
  918. des_entries[i].offset = ((uint8_t *)desc[i].updater) - (uint8_t *)s;
  919. des_entries[i].stride = descriptor_props[desc[i].type].struct_size;
  920. }
  921. pl->desc_template_info = av_realloc_array(pl->desc_template_info,
  922. sizeof(*pl->desc_template_info),
  923. pl->desc_layout_num + 1);
  924. if (!pl->desc_template_info)
  925. return AVERROR(ENOMEM);
  926. dt = &pl->desc_template_info[pl->desc_layout_num];
  927. memset(dt, 0, sizeof(*dt));
  928. dt->sType = VK_STRUCTURE_TYPE_DESCRIPTOR_UPDATE_TEMPLATE_CREATE_INFO;
  929. dt->templateType = VK_DESCRIPTOR_UPDATE_TEMPLATE_TYPE_DESCRIPTOR_SET;
  930. dt->descriptorSetLayout = *layout;
  931. dt->pDescriptorUpdateEntries = des_entries;
  932. dt->descriptorUpdateEntryCount = num;
  933. }
  934. pl->desc_layout_num++;
  935. print:
  936. /* Write shader info */
  937. for (int i = 0; i < num; i++) {
  938. const struct descriptor_props *prop = &descriptor_props[desc[i].type];
  939. GLSLA("layout (set = %i, binding = %i", pl->desc_layout_num - 1, i);
  940. if (desc[i].mem_layout)
  941. GLSLA(", %s", desc[i].mem_layout);
  942. GLSLA(")");
  943. if (prop->is_uniform)
  944. GLSLA(" uniform");
  945. if (prop->mem_quali && desc[i].mem_quali)
  946. GLSLA(" %s", desc[i].mem_quali);
  947. if (prop->type)
  948. GLSLA(" %s", prop->type);
  949. if (prop->dim_needed)
  950. GLSLA("%iD", desc[i].dimensions);
  951. GLSLA(" %s", desc[i].name);
  952. if (prop->buf_content)
  953. GLSLA(" {\n %s\n}", desc[i].buf_content);
  954. else if (desc[i].elems > 0)
  955. GLSLA("[%i]", desc[i].elems);
  956. GLSLA(";\n");
  957. }
  958. GLSLA("\n");
  959. return 0;
  960. }
  961. void ff_vk_update_descriptor_set(AVFilterContext *avctx, VulkanPipeline *pl,
  962. int set_id)
  963. {
  964. VulkanFilterContext *s = avctx->priv;
  965. vkUpdateDescriptorSetWithTemplate(s->hwctx->act_dev,
  966. pl->desc_set[s->cur_queue_idx * pl->desc_layout_num + set_id],
  967. pl->desc_template[set_id],
  968. s);
  969. }
  970. void ff_vk_update_push_exec(AVFilterContext *avctx, FFVkExecContext *e,
  971. VkShaderStageFlagBits stage, int offset,
  972. size_t size, void *src)
  973. {
  974. VulkanFilterContext *s = avctx->priv;
  975. vkCmdPushConstants(e->bufs[s->cur_queue_idx], e->bound_pl->pipeline_layout,
  976. stage, offset, size, src);
  977. }
  978. int ff_vk_init_pipeline_layout(AVFilterContext *avctx, VulkanPipeline *pl)
  979. {
  980. VkResult ret;
  981. VulkanFilterContext *s = avctx->priv;
  982. pl->descriptor_sets_num = pl->desc_layout_num * s->queue_count;
  983. { /* Init descriptor set pool */
  984. VkDescriptorPoolCreateInfo pool_create_info = {
  985. .sType = VK_STRUCTURE_TYPE_DESCRIPTOR_POOL_CREATE_INFO,
  986. .poolSizeCount = pl->pool_size_desc_num,
  987. .pPoolSizes = pl->pool_size_desc,
  988. .maxSets = pl->descriptor_sets_num,
  989. };
  990. ret = vkCreateDescriptorPool(s->hwctx->act_dev, &pool_create_info,
  991. s->hwctx->alloc, &pl->desc_pool);
  992. av_freep(&pl->pool_size_desc);
  993. if (ret != VK_SUCCESS) {
  994. av_log(avctx, AV_LOG_ERROR, "Unable to init descriptor set "
  995. "pool: %s\n", ff_vk_ret2str(ret));
  996. return AVERROR_EXTERNAL;
  997. }
  998. }
  999. { /* Allocate descriptor sets */
  1000. VkDescriptorSetAllocateInfo alloc_info = {
  1001. .sType = VK_STRUCTURE_TYPE_DESCRIPTOR_SET_ALLOCATE_INFO,
  1002. .descriptorPool = pl->desc_pool,
  1003. .descriptorSetCount = pl->descriptor_sets_num,
  1004. .pSetLayouts = pl->desc_layout,
  1005. };
  1006. pl->desc_set = av_malloc(pl->descriptor_sets_num*sizeof(*pl->desc_set));
  1007. if (!pl->desc_set)
  1008. return AVERROR(ENOMEM);
  1009. ret = vkAllocateDescriptorSets(s->hwctx->act_dev, &alloc_info,
  1010. pl->desc_set);
  1011. if (ret != VK_SUCCESS) {
  1012. av_log(avctx, AV_LOG_ERROR, "Unable to allocate descriptor set: %s\n",
  1013. ff_vk_ret2str(ret));
  1014. return AVERROR_EXTERNAL;
  1015. }
  1016. }
  1017. { /* Finally create the pipeline layout */
  1018. VkPipelineLayoutCreateInfo spawn_pipeline_layout = {
  1019. .sType = VK_STRUCTURE_TYPE_PIPELINE_LAYOUT_CREATE_INFO,
  1020. .setLayoutCount = pl->desc_layout_num,
  1021. .pSetLayouts = pl->desc_layout,
  1022. .pushConstantRangeCount = pl->push_consts_num,
  1023. .pPushConstantRanges = pl->push_consts,
  1024. };
  1025. ret = vkCreatePipelineLayout(s->hwctx->act_dev, &spawn_pipeline_layout,
  1026. s->hwctx->alloc, &pl->pipeline_layout);
  1027. av_freep(&pl->push_consts);
  1028. pl->push_consts_num = 0;
  1029. if (ret != VK_SUCCESS) {
  1030. av_log(avctx, AV_LOG_ERROR, "Unable to init pipeline layout: %s\n",
  1031. ff_vk_ret2str(ret));
  1032. return AVERROR_EXTERNAL;
  1033. }
  1034. }
  1035. { /* Descriptor template (for tightly packed descriptors) */
  1036. VkDescriptorUpdateTemplateCreateInfo *desc_template_info;
  1037. pl->desc_template = av_malloc(pl->descriptor_sets_num*sizeof(*pl->desc_template));
  1038. if (!pl->desc_template)
  1039. return AVERROR(ENOMEM);
  1040. /* Create update templates for the descriptor sets */
  1041. for (int i = 0; i < pl->descriptor_sets_num; i++) {
  1042. desc_template_info = &pl->desc_template_info[i % pl->desc_layout_num];
  1043. desc_template_info->pipelineLayout = pl->pipeline_layout;
  1044. ret = vkCreateDescriptorUpdateTemplate(s->hwctx->act_dev,
  1045. desc_template_info,
  1046. s->hwctx->alloc,
  1047. &pl->desc_template[i]);
  1048. av_free((void *)desc_template_info->pDescriptorUpdateEntries);
  1049. if (ret != VK_SUCCESS) {
  1050. av_log(avctx, AV_LOG_ERROR, "Unable to init descriptor "
  1051. "template: %s\n", ff_vk_ret2str(ret));
  1052. return AVERROR_EXTERNAL;
  1053. }
  1054. }
  1055. av_freep(&pl->desc_template_info);
  1056. }
  1057. return 0;
  1058. }
  1059. FN_CREATING(VulkanFilterContext, VulkanPipeline, pipeline, pipelines, pipelines_num)
  1060. VulkanPipeline *ff_vk_create_pipeline(AVFilterContext *avctx)
  1061. {
  1062. return create_pipeline(avctx->priv);
  1063. }
  1064. int ff_vk_init_compute_pipeline(AVFilterContext *avctx, VulkanPipeline *pl)
  1065. {
  1066. int i;
  1067. VkResult ret;
  1068. VulkanFilterContext *s = avctx->priv;
  1069. VkComputePipelineCreateInfo pipe = {
  1070. .sType = VK_STRUCTURE_TYPE_COMPUTE_PIPELINE_CREATE_INFO,
  1071. .layout = pl->pipeline_layout,
  1072. };
  1073. for (i = 0; i < pl->shaders_num; i++) {
  1074. if (pl->shaders[i]->shader.stage & VK_SHADER_STAGE_COMPUTE_BIT) {
  1075. pipe.stage = pl->shaders[i]->shader;
  1076. break;
  1077. }
  1078. }
  1079. if (i == pl->shaders_num) {
  1080. av_log(avctx, AV_LOG_ERROR, "Can't init compute pipeline, no shader\n");
  1081. return AVERROR(EINVAL);
  1082. }
  1083. ret = vkCreateComputePipelines(s->hwctx->act_dev, VK_NULL_HANDLE, 1, &pipe,
  1084. s->hwctx->alloc, &pl->pipeline);
  1085. if (ret != VK_SUCCESS) {
  1086. av_log(avctx, AV_LOG_ERROR, "Unable to init compute pipeline: %s\n",
  1087. ff_vk_ret2str(ret));
  1088. return AVERROR_EXTERNAL;
  1089. }
  1090. pl->bind_point = VK_PIPELINE_BIND_POINT_COMPUTE;
  1091. return 0;
  1092. }
  1093. void ff_vk_bind_pipeline_exec(AVFilterContext *avctx, FFVkExecContext *e,
  1094. VulkanPipeline *pl)
  1095. {
  1096. VulkanFilterContext *s = avctx->priv;
  1097. vkCmdBindPipeline(e->bufs[s->cur_queue_idx], pl->bind_point, pl->pipeline);
  1098. vkCmdBindDescriptorSets(e->bufs[s->cur_queue_idx], pl->bind_point,
  1099. pl->pipeline_layout, 0, pl->descriptor_sets_num,
  1100. pl->desc_set, 0, 0);
  1101. e->bound_pl = pl;
  1102. }
  1103. static void free_exec_ctx(VulkanFilterContext *s, FFVkExecContext *e)
  1104. {
  1105. /* Make sure all queues have finished executing */
  1106. for (int i = 0; i < s->queue_count; i++) {
  1107. FFVkQueueCtx *q = &e->queues[i];
  1108. if (q->fence) {
  1109. vkWaitForFences(s->hwctx->act_dev, 1, &q->fence, VK_TRUE, UINT64_MAX);
  1110. vkResetFences(s->hwctx->act_dev, 1, &q->fence);
  1111. }
  1112. /* Free the fence */
  1113. if (q->fence)
  1114. vkDestroyFence(s->hwctx->act_dev, q->fence, s->hwctx->alloc);
  1115. /* Free buffer dependencies */
  1116. for (int j = 0; j < q->nb_buf_deps; j++)
  1117. av_buffer_unref(&q->buf_deps[j]);
  1118. av_free(q->buf_deps);
  1119. /* Free frame dependencies */
  1120. for (int j = 0; j < q->nb_frame_deps; j++)
  1121. av_frame_free(&q->frame_deps[j]);
  1122. av_free(q->frame_deps);
  1123. }
  1124. if (e->bufs)
  1125. vkFreeCommandBuffers(s->hwctx->act_dev, e->pool, s->queue_count, e->bufs);
  1126. if (e->pool)
  1127. vkDestroyCommandPool(s->hwctx->act_dev, e->pool, s->hwctx->alloc);
  1128. av_freep(&e->bufs);
  1129. av_freep(&e->queues);
  1130. av_freep(&e->sem_sig);
  1131. av_freep(&e->sem_wait);
  1132. av_freep(&e->sem_wait_dst);
  1133. av_free(e);
  1134. }
  1135. static void free_pipeline(VulkanFilterContext *s, VulkanPipeline *pl)
  1136. {
  1137. for (int i = 0; i < pl->shaders_num; i++) {
  1138. SPIRVShader *shd = pl->shaders[i];
  1139. av_bprint_finalize(&shd->src, NULL);
  1140. vkDestroyShaderModule(s->hwctx->act_dev, shd->shader.module,
  1141. s->hwctx->alloc);
  1142. av_free(shd);
  1143. }
  1144. vkDestroyPipeline(s->hwctx->act_dev, pl->pipeline, s->hwctx->alloc);
  1145. vkDestroyPipelineLayout(s->hwctx->act_dev, pl->pipeline_layout,
  1146. s->hwctx->alloc);
  1147. for (int i = 0; i < pl->desc_layout_num; i++) {
  1148. if (pl->desc_template && pl->desc_template[i])
  1149. vkDestroyDescriptorUpdateTemplate(s->hwctx->act_dev, pl->desc_template[i],
  1150. s->hwctx->alloc);
  1151. if (pl->desc_layout && pl->desc_layout[i])
  1152. vkDestroyDescriptorSetLayout(s->hwctx->act_dev, pl->desc_layout[i],
  1153. s->hwctx->alloc);
  1154. }
  1155. /* Also frees the descriptor sets */
  1156. if (pl->desc_pool)
  1157. vkDestroyDescriptorPool(s->hwctx->act_dev, pl->desc_pool,
  1158. s->hwctx->alloc);
  1159. av_freep(&pl->desc_set);
  1160. av_freep(&pl->shaders);
  1161. av_freep(&pl->desc_layout);
  1162. av_freep(&pl->desc_template);
  1163. av_freep(&pl->push_consts);
  1164. pl->push_consts_num = 0;
  1165. /* Only freed in case of failure */
  1166. av_freep(&pl->pool_size_desc);
  1167. if (pl->desc_template_info) {
  1168. for (int i = 0; i < pl->descriptor_sets_num; i++)
  1169. av_free((void *)pl->desc_template_info[i].pDescriptorUpdateEntries);
  1170. av_freep(&pl->desc_template_info);
  1171. }
  1172. av_free(pl);
  1173. }
  1174. void ff_vk_filter_uninit(AVFilterContext *avctx)
  1175. {
  1176. VulkanFilterContext *s = avctx->priv;
  1177. glslang_uninit();
  1178. for (int i = 0; i < s->exec_ctx_num; i++)
  1179. free_exec_ctx(s, s->exec_ctx[i]);
  1180. av_freep(&s->exec_ctx);
  1181. for (int i = 0; i < s->samplers_num; i++) {
  1182. vkDestroySampler(s->hwctx->act_dev, *s->samplers[i], s->hwctx->alloc);
  1183. av_free(s->samplers[i]);
  1184. }
  1185. av_freep(&s->samplers);
  1186. for (int i = 0; i < s->pipelines_num; i++)
  1187. free_pipeline(s, s->pipelines[i]);
  1188. av_freep(&s->pipelines);
  1189. av_freep(&s->scratch);
  1190. s->scratch_size = 0;
  1191. av_buffer_unref(&s->device_ref);
  1192. av_buffer_unref(&s->frames_ref);
  1193. }