]> git.sesse.net Git - ffmpeg/blob - libavutil/hwcontext_vulkan.c
hwcontext_vulkan: dlopen libvulkan
[ffmpeg] / libavutil / hwcontext_vulkan.c
1 /*
2  * This file is part of FFmpeg.
3  *
4  * FFmpeg is free software; you can redistribute it and/or
5  * modify it under the terms of the GNU Lesser General Public
6  * License as published by the Free Software Foundation; either
7  * version 2.1 of the License, or (at your option) any later version.
8  *
9  * FFmpeg is distributed in the hope that it will be useful,
10  * but WITHOUT ANY WARRANTY; without even the implied warranty of
11  * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
12  * Lesser General Public License for more details.
13  *
14  * You should have received a copy of the GNU Lesser General Public
15  * License along with FFmpeg; if not, write to the Free Software
16  * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
17  */
18
19 #define VK_NO_PROTOTYPES
20
21 #include "config.h"
22 #include "pixdesc.h"
23 #include "avstring.h"
24 #include "imgutils.h"
25 #include "hwcontext.h"
26 #include "hwcontext_internal.h"
27 #include "hwcontext_vulkan.h"
28
29 #ifdef _WIN32
30 #include "compat/w32dlfcn.h"
31 #else
32 #include <dlfcn.h>
33 #endif
34
35 #if CONFIG_LIBDRM
36 #include <unistd.h>
37 #include <xf86drm.h>
38 #include <drm_fourcc.h>
39 #include "hwcontext_drm.h"
40 #if CONFIG_VAAPI
41 #include <va/va_drmcommon.h>
42 #include "hwcontext_vaapi.h"
43 #endif
44 #endif
45
46 #if CONFIG_CUDA
47 #include "hwcontext_cuda_internal.h"
48 #include "cuda_check.h"
49 #define CHECK_CU(x) FF_CUDA_CHECK_DL(cuda_cu, cu, x)
50 #endif
51
52 enum VulkanExtensions {
53     EXT_EXTERNAL_DMABUF_MEMORY = 1 <<  0, /* VK_EXT_external_memory_dma_buf */
54     EXT_DRM_MODIFIER_FLAGS     = 1 <<  1, /* VK_EXT_image_drm_format_modifier */
55     EXT_EXTERNAL_FD_MEMORY     = 1 <<  2, /* VK_KHR_external_memory_fd */
56     EXT_EXTERNAL_FD_SEM        = 1 <<  3, /* VK_KHR_external_semaphore_fd */
57     EXT_EXTERNAL_HOST_MEMORY   = 1 <<  4, /* VK_EXT_external_memory_host */
58     EXT_PUSH_DESCRIPTORS       = 1 <<  5, /* VK_KHR_push_descriptor */
59     EXT_DEBUG_UTILS            = 1 <<  6, /* VK_EXT_debug_utils */
60
61     EXT_NO_FLAG                = 1 << 31,
62 };
63
64 #define FN_LIST(MACRO)                                                             \
65     /* Instance */                                                                 \
66     MACRO(0, 0, EXT_NO_FLAG,              EnumerateInstanceExtensionProperties)    \
67     MACRO(0, 0, EXT_NO_FLAG,              CreateInstance)                          \
68     MACRO(1, 0, EXT_NO_FLAG,              DestroyInstance)                         \
69                                                                                    \
70     /* Debug */                                                                    \
71     MACRO(1, 0, EXT_NO_FLAG,              CreateDebugUtilsMessengerEXT)            \
72     MACRO(1, 0, EXT_NO_FLAG,              DestroyDebugUtilsMessengerEXT)           \
73                                                                                    \
74     /* Device */                                                                   \
75     MACRO(1, 0, EXT_NO_FLAG,              GetDeviceProcAddr)                       \
76     MACRO(1, 0, EXT_NO_FLAG,              CreateDevice)                            \
77     MACRO(1, 0, EXT_NO_FLAG,              GetPhysicalDeviceFeatures)               \
78     MACRO(1, 0, EXT_NO_FLAG,              DestroyDevice)                           \
79                                                                                    \
80     MACRO(1, 0, EXT_NO_FLAG,              EnumeratePhysicalDevices)                \
81     MACRO(1, 0, EXT_NO_FLAG,              EnumerateDeviceExtensionProperties)      \
82                                                                                    \
83     MACRO(1, 0, EXT_NO_FLAG,              GetPhysicalDeviceProperties2)            \
84     MACRO(1, 0, EXT_NO_FLAG,              GetPhysicalDeviceMemoryProperties)       \
85     MACRO(1, 0, EXT_NO_FLAG,              GetPhysicalDeviceFormatProperties2)      \
86     MACRO(1, 0, EXT_NO_FLAG,              GetPhysicalDeviceImageFormatProperties2) \
87     MACRO(1, 0, EXT_NO_FLAG,              GetPhysicalDeviceQueueFamilyProperties)  \
88                                                                                    \
89     /* Command pool */                                                             \
90     MACRO(1, 1, EXT_NO_FLAG,              CreateCommandPool)                       \
91     MACRO(1, 1, EXT_NO_FLAG,              DestroyCommandPool)                      \
92                                                                                    \
93     /* Command buffer */                                                           \
94     MACRO(1, 1, EXT_NO_FLAG,              AllocateCommandBuffers)                  \
95     MACRO(1, 1, EXT_NO_FLAG,              BeginCommandBuffer)                      \
96     MACRO(1, 1, EXT_NO_FLAG,              EndCommandBuffer)                        \
97     MACRO(1, 1, EXT_NO_FLAG,              FreeCommandBuffers)                      \
98                                                                                    \
99     /* Queue */                                                                    \
100     MACRO(1, 1, EXT_NO_FLAG,              GetDeviceQueue)                          \
101     MACRO(1, 1, EXT_NO_FLAG,              QueueSubmit)                             \
102                                                                                    \
103     /* Fences */                                                                   \
104     MACRO(1, 1, EXT_NO_FLAG,              CreateFence)                             \
105     MACRO(1, 1, EXT_NO_FLAG,              WaitForFences)                           \
106     MACRO(1, 1, EXT_NO_FLAG,              ResetFences)                             \
107     MACRO(1, 1, EXT_NO_FLAG,              DestroyFence)                            \
108                                                                                    \
109     /* Semaphores */                                                               \
110     MACRO(1, 1, EXT_EXTERNAL_FD_SEM,      GetSemaphoreFdKHR)                       \
111     MACRO(1, 1, EXT_NO_FLAG,              CreateSemaphore)                         \
112     MACRO(1, 1, EXT_NO_FLAG,              DestroySemaphore)                        \
113                                                                                    \
114     /* Memory */                                                                   \
115     MACRO(1, 1, EXT_EXTERNAL_FD_MEMORY,   GetMemoryFdKHR)                          \
116     MACRO(1, 1, EXT_NO_FLAG,              GetMemoryFdPropertiesKHR)                \
117     MACRO(1, 1, EXT_EXTERNAL_HOST_MEMORY, GetMemoryHostPointerPropertiesEXT)       \
118     MACRO(1, 1, EXT_NO_FLAG,              AllocateMemory)                          \
119     MACRO(1, 1, EXT_NO_FLAG,              MapMemory)                               \
120     MACRO(1, 1, EXT_NO_FLAG,              FlushMappedMemoryRanges)                 \
121     MACRO(1, 1, EXT_NO_FLAG,              InvalidateMappedMemoryRanges)            \
122     MACRO(1, 1, EXT_NO_FLAG,              UnmapMemory)                             \
123     MACRO(1, 1, EXT_NO_FLAG,              FreeMemory)                              \
124                                                                                    \
125     /* Commands */                                                                 \
126     MACRO(1, 1, EXT_NO_FLAG,              CmdPipelineBarrier)                      \
127     MACRO(1, 1, EXT_NO_FLAG,              CmdCopyBufferToImage)                    \
128     MACRO(1, 1, EXT_NO_FLAG,              CmdCopyImageToBuffer)                    \
129                                                                                    \
130     /* Buffer */                                                                   \
131     MACRO(1, 1, EXT_NO_FLAG,              GetBufferMemoryRequirements2)            \
132     MACRO(1, 1, EXT_NO_FLAG,              CreateBuffer)                            \
133     MACRO(1, 1, EXT_NO_FLAG,              BindBufferMemory)                        \
134     MACRO(1, 1, EXT_NO_FLAG,              DestroyBuffer)                           \
135                                                                                    \
136     /* Image */                                                                    \
137     MACRO(1, 1, EXT_DRM_MODIFIER_FLAGS,   GetImageDrmFormatModifierPropertiesEXT)  \
138     MACRO(1, 1, EXT_NO_FLAG,              GetImageMemoryRequirements2)             \
139     MACRO(1, 1, EXT_NO_FLAG,              CreateImage)                             \
140     MACRO(1, 1, EXT_NO_FLAG,              BindImageMemory2)                        \
141     MACRO(1, 1, EXT_NO_FLAG,              GetImageSubresourceLayout)               \
142     MACRO(1, 1, EXT_NO_FLAG,              DestroyImage)
143
144 #define PFN_DEF(req_inst, req_dev, ext_flag, name) \
145     PFN_vk##name name;
146
147 typedef struct VulkanFunctions {
148     FN_LIST(PFN_DEF)
149 } VulkanFunctions;
150
151 #define PFN_LOAD_INFO(req_inst, req_dev, ext_flag, name) \
152     {                                                    \
153         req_inst,                                        \
154         req_dev,                                         \
155         offsetof(VulkanFunctions, name),                 \
156         ext_flag,                                        \
157         { "vk"#name, "vk"#name"EXT", "vk"#name"KHR" }    \
158     },
159
160 typedef struct VulkanFunctionsLoadInfo {
161     int req_inst;
162     int req_dev;
163     size_t struct_offset;
164     enum VulkanExtensions ext_flag;
165     const char *names[3];
166 } VulkanFunctionsLoadInfo;
167
168 static const VulkanFunctionsLoadInfo vk_load_info[] = {
169     FN_LIST(PFN_LOAD_INFO)
170 };
171
172 typedef struct VulkanQueueCtx {
173     VkFence fence;
174     VkQueue queue;
175     int was_synchronous;
176
177     /* Buffer dependencies */
178     AVBufferRef **buf_deps;
179     int nb_buf_deps;
180     int buf_deps_alloc_size;
181 } VulkanQueueCtx;
182
183 typedef struct VulkanExecCtx {
184     VkCommandPool pool;
185     VkCommandBuffer *bufs;
186     VulkanQueueCtx *queues;
187     int nb_queues;
188     int cur_queue_idx;
189 } VulkanExecCtx;
190
191 typedef struct VulkanDevicePriv {
192     /* Vulkan library and loader functions */
193     void *libvulkan;
194     VulkanFunctions vkfn;
195
196     /* Properties */
197     VkPhysicalDeviceProperties2 props;
198     VkPhysicalDeviceMemoryProperties mprops;
199     VkPhysicalDeviceExternalMemoryHostPropertiesEXT hprops;
200
201     /* Queues */
202     uint32_t qfs[3];
203     int num_qfs;
204
205     /* Debug callback */
206     VkDebugUtilsMessengerEXT debug_ctx;
207
208     /* Extensions */
209     enum VulkanExtensions extensions;
210
211     /* Settings */
212     int use_linear_images;
213
214     /* Nvidia */
215     int dev_is_nvidia;
216 } VulkanDevicePriv;
217
218 typedef struct VulkanFramesPriv {
219     /* Image conversions */
220     VulkanExecCtx conv_ctx;
221
222     /* Image transfers */
223     VulkanExecCtx upload_ctx;
224     VulkanExecCtx download_ctx;
225 } VulkanFramesPriv;
226
227 typedef struct AVVkFrameInternal {
228 #if CONFIG_CUDA
229     /* Importing external memory into cuda is really expensive so we keep the
230      * memory imported all the time */
231     AVBufferRef *cuda_fc_ref; /* Need to keep it around for uninit */
232     CUexternalMemory ext_mem[AV_NUM_DATA_POINTERS];
233     CUmipmappedArray cu_mma[AV_NUM_DATA_POINTERS];
234     CUarray cu_array[AV_NUM_DATA_POINTERS];
235     CUexternalSemaphore cu_sem[AV_NUM_DATA_POINTERS];
236 #endif
237 } AVVkFrameInternal;
238
239 #define GET_QUEUE_COUNT(hwctx, graph, comp, tx) (                   \
240     graph ?  hwctx->nb_graphics_queues :                            \
241     comp  ? (hwctx->nb_comp_queues ?                                \
242              hwctx->nb_comp_queues : hwctx->nb_graphics_queues) :   \
243     tx    ? (hwctx->nb_tx_queues ? hwctx->nb_tx_queues :            \
244              (hwctx->nb_comp_queues ?                               \
245               hwctx->nb_comp_queues : hwctx->nb_graphics_queues)) : \
246     0                                                               \
247 )
248
249 #define DEFAULT_USAGE_FLAGS (VK_IMAGE_USAGE_SAMPLED_BIT      |                 \
250                              VK_IMAGE_USAGE_STORAGE_BIT      |                 \
251                              VK_IMAGE_USAGE_TRANSFER_SRC_BIT |                 \
252                              VK_IMAGE_USAGE_TRANSFER_DST_BIT)
253
254 #define ADD_VAL_TO_LIST(list, count, val)                                      \
255     do {                                                                       \
256         list = av_realloc_array(list, sizeof(*list), ++count);                 \
257         if (!list) {                                                           \
258             err = AVERROR(ENOMEM);                                             \
259             goto fail;                                                         \
260         }                                                                      \
261         list[count - 1] = av_strdup(val);                                      \
262         if (!list[count - 1]) {                                                \
263             err = AVERROR(ENOMEM);                                             \
264             goto fail;                                                         \
265         }                                                                      \
266     } while(0)
267
268 static const struct {
269     enum AVPixelFormat pixfmt;
270     const VkFormat vkfmts[4];
271 } vk_pixfmt_map[] = {
272     { AV_PIX_FMT_GRAY8,   { VK_FORMAT_R8_UNORM } },
273     { AV_PIX_FMT_GRAY16,  { VK_FORMAT_R16_UNORM } },
274     { AV_PIX_FMT_GRAYF32, { VK_FORMAT_R32_SFLOAT } },
275
276     { AV_PIX_FMT_NV12, { VK_FORMAT_R8_UNORM, VK_FORMAT_R8G8_UNORM } },
277     { AV_PIX_FMT_NV21, { VK_FORMAT_R8_UNORM, VK_FORMAT_R8G8_UNORM } },
278     { AV_PIX_FMT_P010, { VK_FORMAT_R16_UNORM, VK_FORMAT_R16G16_UNORM } },
279     { AV_PIX_FMT_P016, { VK_FORMAT_R16_UNORM, VK_FORMAT_R16G16_UNORM } },
280
281     { AV_PIX_FMT_NV16, { VK_FORMAT_R8_UNORM, VK_FORMAT_R8G8_UNORM } },
282
283     { AV_PIX_FMT_NV24, { VK_FORMAT_R8_UNORM, VK_FORMAT_R8G8_UNORM } },
284     { AV_PIX_FMT_NV42, { VK_FORMAT_R8_UNORM, VK_FORMAT_R8G8_UNORM } },
285
286     { AV_PIX_FMT_YUV420P,   {  VK_FORMAT_R8_UNORM,  VK_FORMAT_R8_UNORM,  VK_FORMAT_R8_UNORM } },
287     { AV_PIX_FMT_YUV420P10, { VK_FORMAT_R16_UNORM, VK_FORMAT_R16_UNORM, VK_FORMAT_R16_UNORM } },
288     { AV_PIX_FMT_YUV420P12, { VK_FORMAT_R16_UNORM, VK_FORMAT_R16_UNORM, VK_FORMAT_R16_UNORM } },
289     { AV_PIX_FMT_YUV420P16, { VK_FORMAT_R16_UNORM, VK_FORMAT_R16_UNORM, VK_FORMAT_R16_UNORM } },
290
291     { AV_PIX_FMT_YUV422P,   {  VK_FORMAT_R8_UNORM,  VK_FORMAT_R8_UNORM,  VK_FORMAT_R8_UNORM } },
292     { AV_PIX_FMT_YUV422P10, { VK_FORMAT_R16_UNORM, VK_FORMAT_R16_UNORM, VK_FORMAT_R16_UNORM } },
293     { AV_PIX_FMT_YUV422P12, { VK_FORMAT_R16_UNORM, VK_FORMAT_R16_UNORM, VK_FORMAT_R16_UNORM } },
294     { AV_PIX_FMT_YUV422P16, { VK_FORMAT_R16_UNORM, VK_FORMAT_R16_UNORM, VK_FORMAT_R16_UNORM } },
295
296     { AV_PIX_FMT_YUV444P,   {  VK_FORMAT_R8_UNORM,  VK_FORMAT_R8_UNORM,  VK_FORMAT_R8_UNORM } },
297     { AV_PIX_FMT_YUV444P10, { VK_FORMAT_R16_UNORM, VK_FORMAT_R16_UNORM, VK_FORMAT_R16_UNORM } },
298     { AV_PIX_FMT_YUV444P12, { VK_FORMAT_R16_UNORM, VK_FORMAT_R16_UNORM, VK_FORMAT_R16_UNORM } },
299     { AV_PIX_FMT_YUV444P16, { VK_FORMAT_R16_UNORM, VK_FORMAT_R16_UNORM, VK_FORMAT_R16_UNORM } },
300
301     { AV_PIX_FMT_YUVA420P,   {  VK_FORMAT_R8_UNORM,  VK_FORMAT_R8_UNORM,  VK_FORMAT_R8_UNORM,  VK_FORMAT_R8_UNORM } },
302     { AV_PIX_FMT_YUVA420P10, { VK_FORMAT_R16_UNORM, VK_FORMAT_R16_UNORM, VK_FORMAT_R16_UNORM, VK_FORMAT_R16_UNORM } },
303     /* There is no AV_PIX_FMT_YUVA420P12 */
304     { AV_PIX_FMT_YUVA420P16, { VK_FORMAT_R16_UNORM, VK_FORMAT_R16_UNORM, VK_FORMAT_R16_UNORM, VK_FORMAT_R16_UNORM } },
305
306     { AV_PIX_FMT_YUVA422P,   {  VK_FORMAT_R8_UNORM,  VK_FORMAT_R8_UNORM,  VK_FORMAT_R8_UNORM,  VK_FORMAT_R8_UNORM } },
307     { AV_PIX_FMT_YUVA422P10, { VK_FORMAT_R16_UNORM, VK_FORMAT_R16_UNORM, VK_FORMAT_R16_UNORM, VK_FORMAT_R16_UNORM } },
308     { AV_PIX_FMT_YUVA422P12, { VK_FORMAT_R16_UNORM, VK_FORMAT_R16_UNORM, VK_FORMAT_R16_UNORM, VK_FORMAT_R16_UNORM } },
309     { AV_PIX_FMT_YUVA422P16, { VK_FORMAT_R16_UNORM, VK_FORMAT_R16_UNORM, VK_FORMAT_R16_UNORM, VK_FORMAT_R16_UNORM } },
310
311     { AV_PIX_FMT_YUVA444P,   {  VK_FORMAT_R8_UNORM,  VK_FORMAT_R8_UNORM,  VK_FORMAT_R8_UNORM,  VK_FORMAT_R8_UNORM } },
312     { AV_PIX_FMT_YUVA444P10, { VK_FORMAT_R16_UNORM, VK_FORMAT_R16_UNORM, VK_FORMAT_R16_UNORM, VK_FORMAT_R16_UNORM } },
313     { AV_PIX_FMT_YUVA444P12, { VK_FORMAT_R16_UNORM, VK_FORMAT_R16_UNORM, VK_FORMAT_R16_UNORM, VK_FORMAT_R16_UNORM } },
314     { AV_PIX_FMT_YUVA444P16, { VK_FORMAT_R16_UNORM, VK_FORMAT_R16_UNORM, VK_FORMAT_R16_UNORM, VK_FORMAT_R16_UNORM } },
315
316     { AV_PIX_FMT_BGRA,   { VK_FORMAT_B8G8R8A8_UNORM } },
317     { AV_PIX_FMT_RGBA,   { VK_FORMAT_R8G8B8A8_UNORM } },
318     { AV_PIX_FMT_RGB24,  { VK_FORMAT_R8G8B8_UNORM } },
319     { AV_PIX_FMT_BGR24,  { VK_FORMAT_B8G8R8_UNORM } },
320     { AV_PIX_FMT_RGB48,  { VK_FORMAT_R16G16B16_UNORM } },
321     { AV_PIX_FMT_RGBA64, { VK_FORMAT_R16G16B16A16_UNORM } },
322     { AV_PIX_FMT_RGBA64, { VK_FORMAT_R16G16B16A16_UNORM } },
323     { AV_PIX_FMT_RGB565, { VK_FORMAT_R5G6B5_UNORM_PACK16 } },
324     { AV_PIX_FMT_BGR565, { VK_FORMAT_B5G6R5_UNORM_PACK16 } },
325     { AV_PIX_FMT_BGR0,   { VK_FORMAT_B8G8R8A8_UNORM } },
326     { AV_PIX_FMT_RGB0,   { VK_FORMAT_R8G8B8A8_UNORM } },
327
328     /* Lower priority as there's an endianess-dependent overlap between these
329      * and rgba/bgr0, and PACK32 formats are more limited */
330     { AV_PIX_FMT_BGR32,  { VK_FORMAT_A8B8G8R8_UNORM_PACK32 } },
331     { AV_PIX_FMT_0BGR32, { VK_FORMAT_A8B8G8R8_UNORM_PACK32 } },
332
333     { AV_PIX_FMT_X2RGB10, { VK_FORMAT_A2R10G10B10_UNORM_PACK32 } },
334
335     { AV_PIX_FMT_GBRAP, { VK_FORMAT_R8_UNORM, VK_FORMAT_R8_UNORM, VK_FORMAT_R8_UNORM, VK_FORMAT_R8_UNORM } },
336     { AV_PIX_FMT_GBRAP16, { VK_FORMAT_R16_UNORM, VK_FORMAT_R16_UNORM, VK_FORMAT_R16_UNORM, VK_FORMAT_R16_UNORM } },
337     { AV_PIX_FMT_GBRPF32, { VK_FORMAT_R32_SFLOAT, VK_FORMAT_R32_SFLOAT, VK_FORMAT_R32_SFLOAT } },
338     { AV_PIX_FMT_GBRAPF32, { VK_FORMAT_R32_SFLOAT, VK_FORMAT_R32_SFLOAT, VK_FORMAT_R32_SFLOAT, VK_FORMAT_R32_SFLOAT } },
339 };
340
341 const VkFormat *av_vkfmt_from_pixfmt(enum AVPixelFormat p)
342 {
343     for (enum AVPixelFormat i = 0; i < FF_ARRAY_ELEMS(vk_pixfmt_map); i++)
344         if (vk_pixfmt_map[i].pixfmt == p)
345             return vk_pixfmt_map[i].vkfmts;
346     return NULL;
347 }
348
349 static int pixfmt_is_supported(AVHWDeviceContext *dev_ctx, enum AVPixelFormat p,
350                                int linear)
351 {
352     AVVulkanDeviceContext *hwctx = dev_ctx->hwctx;
353     VulkanDevicePriv *priv = dev_ctx->internal->priv;
354     VulkanFunctions *vk = &priv->vkfn;
355     const VkFormat *fmt = av_vkfmt_from_pixfmt(p);
356     int planes = av_pix_fmt_count_planes(p);
357
358     if (!fmt)
359         return 0;
360
361     for (int i = 0; i < planes; i++) {
362         VkFormatFeatureFlags flags;
363         VkFormatProperties2 prop = {
364             .sType = VK_STRUCTURE_TYPE_FORMAT_PROPERTIES_2,
365         };
366         vk->GetPhysicalDeviceFormatProperties2(hwctx->phys_dev, fmt[i], &prop);
367         flags = linear ? prop.formatProperties.linearTilingFeatures :
368                          prop.formatProperties.optimalTilingFeatures;
369         if (!(flags & DEFAULT_USAGE_FLAGS))
370             return 0;
371     }
372
373     return 1;
374 }
375
376 static int load_libvulkan(AVHWDeviceContext *ctx)
377 {
378     AVVulkanDeviceContext *hwctx = ctx->hwctx;
379     VulkanDevicePriv *p = ctx->internal->priv;
380
381     static const char *lib_names[] = {
382 #if defined(_WIN32)
383         "vulkan-1.dll",
384 #elif defined(__APPLE__)
385         "libvulkan.dylib",
386         "libvulkan.1.dylib",
387         "libMoltenVK.dylib",
388 #else
389         "libvulkan.so.1",
390         "libvulkan.so",
391 #endif
392     };
393
394     for (int i = 0; i < FF_ARRAY_ELEMS(lib_names); i++) {
395         p->libvulkan = dlopen(lib_names[i], RTLD_NOW | RTLD_LOCAL);
396         if (p->libvulkan)
397             break;
398     }
399
400     if (!p->libvulkan) {
401         av_log(ctx, AV_LOG_ERROR, "Unable to open the libvulkan library!\n");
402         return AVERROR_UNKNOWN;
403     }
404
405     hwctx->get_proc_addr = (PFN_vkGetInstanceProcAddr)dlsym(p->libvulkan, "vkGetInstanceProcAddr");
406
407     return 0;
408 }
409
410 static int load_functions(AVHWDeviceContext *ctx, int has_inst, int has_dev)
411 {
412     AVVulkanDeviceContext *hwctx = ctx->hwctx;
413     VulkanDevicePriv *p = ctx->internal->priv;
414     VulkanFunctions *vk = &p->vkfn;
415
416     for (int i = 0; i < FF_ARRAY_ELEMS(vk_load_info); i++) {
417         const VulkanFunctionsLoadInfo *load = &vk_load_info[i];
418         PFN_vkVoidFunction fn;
419
420         if (load->req_dev  && !has_dev)
421             continue;
422         if (load->req_inst && !has_inst)
423             continue;
424
425         for (int j = 0; j < FF_ARRAY_ELEMS(load->names); j++) {
426             const char *name = load->names[j];
427
428             if (load->req_dev)
429                 fn = vk->GetDeviceProcAddr(hwctx->act_dev, name);
430             else if (load->req_inst)
431                 fn = hwctx->get_proc_addr(hwctx->inst, name);
432             else
433                 fn = hwctx->get_proc_addr(NULL, name);
434
435             if (fn)
436                 break;
437         }
438
439         if (!fn && ((p->extensions &~ EXT_NO_FLAG) & load->ext_flag)) {
440             av_log(ctx, AV_LOG_ERROR, "Loader error, function \"%s\" indicated"
441                    "as supported, but got NULL function pointer!\n", load->names[0]);
442             return AVERROR_EXTERNAL;
443         }
444
445         *(PFN_vkVoidFunction *)((uint8_t *)vk + load->struct_offset) = fn;
446     }
447
448     return 0;
449 }
450
451 typedef struct VulkanOptExtension {
452     const char *name;
453     enum VulkanExtensions flag;
454 } VulkanOptExtension;
455
456 static const VulkanOptExtension optional_instance_exts[] = {
457     /* For future use */
458 };
459
460 static const VulkanOptExtension optional_device_exts[] = {
461     { VK_KHR_EXTERNAL_MEMORY_FD_EXTENSION_NAME,               EXT_EXTERNAL_FD_MEMORY,     },
462     { VK_EXT_EXTERNAL_MEMORY_DMA_BUF_EXTENSION_NAME,          EXT_EXTERNAL_DMABUF_MEMORY, },
463     { VK_EXT_IMAGE_DRM_FORMAT_MODIFIER_EXTENSION_NAME,        EXT_DRM_MODIFIER_FLAGS,     },
464     { VK_KHR_EXTERNAL_SEMAPHORE_FD_EXTENSION_NAME,            EXT_EXTERNAL_FD_SEM,        },
465     { VK_EXT_EXTERNAL_MEMORY_HOST_EXTENSION_NAME,             EXT_EXTERNAL_HOST_MEMORY,   },
466     { VK_KHR_PUSH_DESCRIPTOR_EXTENSION_NAME,                  EXT_PUSH_DESCRIPTORS,       },
467     { VK_EXT_HOST_QUERY_RESET_EXTENSION_NAME,                 EXT_NO_FLAG,                },
468 };
469
470 /* Converts return values to strings */
471 static const char *vk_ret2str(VkResult res)
472 {
473 #define CASE(VAL) case VAL: return #VAL
474     switch (res) {
475     CASE(VK_SUCCESS);
476     CASE(VK_NOT_READY);
477     CASE(VK_TIMEOUT);
478     CASE(VK_EVENT_SET);
479     CASE(VK_EVENT_RESET);
480     CASE(VK_INCOMPLETE);
481     CASE(VK_ERROR_OUT_OF_HOST_MEMORY);
482     CASE(VK_ERROR_OUT_OF_DEVICE_MEMORY);
483     CASE(VK_ERROR_INITIALIZATION_FAILED);
484     CASE(VK_ERROR_DEVICE_LOST);
485     CASE(VK_ERROR_MEMORY_MAP_FAILED);
486     CASE(VK_ERROR_LAYER_NOT_PRESENT);
487     CASE(VK_ERROR_EXTENSION_NOT_PRESENT);
488     CASE(VK_ERROR_FEATURE_NOT_PRESENT);
489     CASE(VK_ERROR_INCOMPATIBLE_DRIVER);
490     CASE(VK_ERROR_TOO_MANY_OBJECTS);
491     CASE(VK_ERROR_FORMAT_NOT_SUPPORTED);
492     CASE(VK_ERROR_FRAGMENTED_POOL);
493     CASE(VK_ERROR_SURFACE_LOST_KHR);
494     CASE(VK_ERROR_NATIVE_WINDOW_IN_USE_KHR);
495     CASE(VK_SUBOPTIMAL_KHR);
496     CASE(VK_ERROR_OUT_OF_DATE_KHR);
497     CASE(VK_ERROR_INCOMPATIBLE_DISPLAY_KHR);
498     CASE(VK_ERROR_VALIDATION_FAILED_EXT);
499     CASE(VK_ERROR_INVALID_SHADER_NV);
500     CASE(VK_ERROR_OUT_OF_POOL_MEMORY);
501     CASE(VK_ERROR_INVALID_EXTERNAL_HANDLE);
502     CASE(VK_ERROR_NOT_PERMITTED_EXT);
503     CASE(VK_ERROR_INVALID_DRM_FORMAT_MODIFIER_PLANE_LAYOUT_EXT);
504     CASE(VK_ERROR_INVALID_DEVICE_ADDRESS_EXT);
505     CASE(VK_ERROR_FULL_SCREEN_EXCLUSIVE_MODE_LOST_EXT);
506     default: return "Unknown error";
507     }
508 #undef CASE
509 }
510
511 static VkBool32 vk_dbg_callback(VkDebugUtilsMessageSeverityFlagBitsEXT severity,
512                                 VkDebugUtilsMessageTypeFlagsEXT messageType,
513                                 const VkDebugUtilsMessengerCallbackDataEXT *data,
514                                 void *priv)
515 {
516     int l;
517     AVHWDeviceContext *ctx = priv;
518
519     switch (severity) {
520     case VK_DEBUG_UTILS_MESSAGE_SEVERITY_VERBOSE_BIT_EXT: l = AV_LOG_VERBOSE; break;
521     case VK_DEBUG_UTILS_MESSAGE_SEVERITY_INFO_BIT_EXT:    l = AV_LOG_INFO;    break;
522     case VK_DEBUG_UTILS_MESSAGE_SEVERITY_WARNING_BIT_EXT: l = AV_LOG_WARNING; break;
523     case VK_DEBUG_UTILS_MESSAGE_SEVERITY_ERROR_BIT_EXT:   l = AV_LOG_ERROR;   break;
524     default:                                              l = AV_LOG_DEBUG;   break;
525     }
526
527     av_log(ctx, l, "%s\n", data->pMessage);
528     for (int i = 0; i < data->cmdBufLabelCount; i++)
529         av_log(ctx, l, "\t%i: %s\n", i, data->pCmdBufLabels[i].pLabelName);
530
531     return 0;
532 }
533
534 static int check_extensions(AVHWDeviceContext *ctx, int dev, AVDictionary *opts,
535                             const char * const **dst, uint32_t *num, int debug)
536 {
537     const char *tstr;
538     const char **extension_names = NULL;
539     VulkanDevicePriv *p = ctx->internal->priv;
540     VulkanFunctions *vk = &p->vkfn;
541     AVVulkanDeviceContext *hwctx = ctx->hwctx;
542     int err = 0, found, extensions_found = 0;
543
544     const char *mod;
545     int optional_exts_num;
546     uint32_t sup_ext_count;
547     char *user_exts_str = NULL;
548     AVDictionaryEntry *user_exts;
549     VkExtensionProperties *sup_ext;
550     const VulkanOptExtension *optional_exts;
551
552     if (!dev) {
553         mod = "instance";
554         optional_exts = optional_instance_exts;
555         optional_exts_num = FF_ARRAY_ELEMS(optional_instance_exts);
556         user_exts = av_dict_get(opts, "instance_extensions", NULL, 0);
557         if (user_exts) {
558             user_exts_str = av_strdup(user_exts->value);
559             if (!user_exts_str) {
560                 err = AVERROR(ENOMEM);
561                 goto fail;
562             }
563         }
564         vk->EnumerateInstanceExtensionProperties(NULL, &sup_ext_count, NULL);
565         sup_ext = av_malloc_array(sup_ext_count, sizeof(VkExtensionProperties));
566         if (!sup_ext)
567             return AVERROR(ENOMEM);
568         vk->EnumerateInstanceExtensionProperties(NULL, &sup_ext_count, sup_ext);
569     } else {
570         mod = "device";
571         optional_exts = optional_device_exts;
572         optional_exts_num = FF_ARRAY_ELEMS(optional_device_exts);
573         user_exts = av_dict_get(opts, "device_extensions", NULL, 0);
574         if (user_exts) {
575             user_exts_str = av_strdup(user_exts->value);
576             if (!user_exts_str) {
577                 err = AVERROR(ENOMEM);
578                 goto fail;
579             }
580         }
581         vk->EnumerateDeviceExtensionProperties(hwctx->phys_dev, NULL,
582                                                &sup_ext_count, NULL);
583         sup_ext = av_malloc_array(sup_ext_count, sizeof(VkExtensionProperties));
584         if (!sup_ext)
585             return AVERROR(ENOMEM);
586         vk->EnumerateDeviceExtensionProperties(hwctx->phys_dev, NULL,
587                                                &sup_ext_count, sup_ext);
588     }
589
590     for (int i = 0; i < optional_exts_num; i++) {
591         tstr = optional_exts[i].name;
592         found = 0;
593         for (int j = 0; j < sup_ext_count; j++) {
594             if (!strcmp(tstr, sup_ext[j].extensionName)) {
595                 found = 1;
596                 break;
597             }
598         }
599         if (!found)
600             continue;
601
602         av_log(ctx, AV_LOG_VERBOSE, "Using %s extension \"%s\"\n", mod, tstr);
603         p->extensions |= optional_exts[i].flag;
604         ADD_VAL_TO_LIST(extension_names, extensions_found, tstr);
605     }
606
607     if (debug && !dev) {
608         tstr = VK_EXT_DEBUG_UTILS_EXTENSION_NAME;
609         found = 0;
610         for (int j = 0; j < sup_ext_count; j++) {
611             if (!strcmp(tstr, sup_ext[j].extensionName)) {
612                 found = 1;
613                 break;
614             }
615         }
616         if (found) {
617             av_log(ctx, AV_LOG_VERBOSE, "Using %s extension \"%s\"\n", mod, tstr);
618             ADD_VAL_TO_LIST(extension_names, extensions_found, tstr);
619             p->extensions |= EXT_DEBUG_UTILS;
620         } else {
621             av_log(ctx, AV_LOG_ERROR, "Debug extension \"%s\" not found!\n",
622                    tstr);
623             err = AVERROR(EINVAL);
624             goto fail;
625         }
626     }
627
628     if (user_exts_str) {
629         char *save, *token = av_strtok(user_exts_str, "+", &save);
630         while (token) {
631             found = 0;
632             for (int j = 0; j < sup_ext_count; j++) {
633                 if (!strcmp(token, sup_ext[j].extensionName)) {
634                     found = 1;
635                     break;
636                 }
637             }
638             if (found) {
639                 av_log(ctx, AV_LOG_VERBOSE, "Using %s extension \"%s\"\n", mod, token);
640                 ADD_VAL_TO_LIST(extension_names, extensions_found, token);
641             } else {
642                 av_log(ctx, AV_LOG_WARNING, "%s extension \"%s\" not found, excluding.\n",
643                        mod, token);
644             }
645             token = av_strtok(NULL, "+", &save);
646         }
647     }
648
649     *dst = extension_names;
650     *num = extensions_found;
651
652     av_free(user_exts_str);
653     av_free(sup_ext);
654     return 0;
655
656 fail:
657     if (extension_names)
658         for (int i = 0; i < extensions_found; i++)
659             av_free((void *)extension_names[i]);
660     av_free(extension_names);
661     av_free(user_exts_str);
662     av_free(sup_ext);
663     return err;
664 }
665
666 /* Creates a VkInstance */
667 static int create_instance(AVHWDeviceContext *ctx, AVDictionary *opts)
668 {
669     int err = 0;
670     VkResult ret;
671     VulkanDevicePriv *p = ctx->internal->priv;
672     VulkanFunctions *vk = &p->vkfn;
673     AVVulkanDeviceContext *hwctx = ctx->hwctx;
674     AVDictionaryEntry *debug_opt = av_dict_get(opts, "debug", NULL, 0);
675     const int debug_mode = debug_opt && strtol(debug_opt->value, NULL, 10);
676     VkApplicationInfo application_info = {
677         .sType              = VK_STRUCTURE_TYPE_APPLICATION_INFO,
678         .pEngineName        = "libavutil",
679         .apiVersion         = VK_API_VERSION_1_1,
680         .engineVersion      = VK_MAKE_VERSION(LIBAVUTIL_VERSION_MAJOR,
681                                               LIBAVUTIL_VERSION_MINOR,
682                                               LIBAVUTIL_VERSION_MICRO),
683     };
684     VkInstanceCreateInfo inst_props = {
685         .sType            = VK_STRUCTURE_TYPE_INSTANCE_CREATE_INFO,
686         .pApplicationInfo = &application_info,
687     };
688
689     if (!hwctx->get_proc_addr) {
690         err = load_libvulkan(ctx);
691         if (err < 0)
692             return err;
693     }
694
695     err = load_functions(ctx, 0, 0);
696     if (err < 0) {
697         av_log(ctx, AV_LOG_ERROR, "Unable to load instance enumeration functions!\n");
698         return err;
699     }
700
701     /* Check for present/missing extensions */
702     err = check_extensions(ctx, 0, opts, &inst_props.ppEnabledExtensionNames,
703                            &inst_props.enabledExtensionCount, debug_mode);
704     if (err < 0)
705         return err;
706
707     if (debug_mode) {
708         static const char *layers[] = { "VK_LAYER_KHRONOS_validation" };
709         inst_props.ppEnabledLayerNames = layers;
710         inst_props.enabledLayerCount = FF_ARRAY_ELEMS(layers);
711     }
712
713     /* Try to create the instance */
714     ret = vk->CreateInstance(&inst_props, hwctx->alloc, &hwctx->inst);
715
716     /* Check for errors */
717     if (ret != VK_SUCCESS) {
718         av_log(ctx, AV_LOG_ERROR, "Instance creation failure: %s\n",
719                vk_ret2str(ret));
720         for (int i = 0; i < inst_props.enabledExtensionCount; i++)
721             av_free((void *)inst_props.ppEnabledExtensionNames[i]);
722         av_free((void *)inst_props.ppEnabledExtensionNames);
723         return AVERROR_EXTERNAL;
724     }
725
726     err = load_functions(ctx, 1, 0);
727     if (err < 0) {
728         av_log(ctx, AV_LOG_ERROR, "Unable to load instance functions!\n");
729         return err;
730     }
731
732     if (debug_mode) {
733         VkDebugUtilsMessengerCreateInfoEXT dbg = {
734             .sType = VK_STRUCTURE_TYPE_DEBUG_UTILS_MESSENGER_CREATE_INFO_EXT,
735             .messageSeverity = VK_DEBUG_UTILS_MESSAGE_SEVERITY_VERBOSE_BIT_EXT |
736                                VK_DEBUG_UTILS_MESSAGE_SEVERITY_INFO_BIT_EXT    |
737                                VK_DEBUG_UTILS_MESSAGE_SEVERITY_WARNING_BIT_EXT |
738                                VK_DEBUG_UTILS_MESSAGE_SEVERITY_ERROR_BIT_EXT,
739             .messageType = VK_DEBUG_UTILS_MESSAGE_TYPE_GENERAL_BIT_EXT    |
740                            VK_DEBUG_UTILS_MESSAGE_TYPE_VALIDATION_BIT_EXT |
741                            VK_DEBUG_UTILS_MESSAGE_TYPE_PERFORMANCE_BIT_EXT,
742             .pfnUserCallback = vk_dbg_callback,
743             .pUserData = ctx,
744         };
745
746         vk->CreateDebugUtilsMessengerEXT(hwctx->inst, &dbg,
747                                          hwctx->alloc, &p->debug_ctx);
748     }
749
750     hwctx->enabled_inst_extensions = inst_props.ppEnabledExtensionNames;
751     hwctx->nb_enabled_inst_extensions = inst_props.enabledExtensionCount;
752
753     return 0;
754 }
755
756 typedef struct VulkanDeviceSelection {
757     uint8_t uuid[VK_UUID_SIZE]; /* Will use this first unless !has_uuid */
758     int has_uuid;
759     const char *name; /* Will use this second unless NULL */
760     uint32_t pci_device; /* Will use this third unless 0x0 */
761     uint32_t vendor_id; /* Last resort to find something deterministic */
762     int index; /* Finally fall back to index */
763 } VulkanDeviceSelection;
764
765 static const char *vk_dev_type(enum VkPhysicalDeviceType type)
766 {
767     switch (type) {
768     case VK_PHYSICAL_DEVICE_TYPE_INTEGRATED_GPU: return "integrated";
769     case VK_PHYSICAL_DEVICE_TYPE_DISCRETE_GPU:   return "discrete";
770     case VK_PHYSICAL_DEVICE_TYPE_VIRTUAL_GPU:    return "virtual";
771     case VK_PHYSICAL_DEVICE_TYPE_CPU:            return "software";
772     default:                                     return "unknown";
773     }
774 }
775
776 /* Finds a device */
777 static int find_device(AVHWDeviceContext *ctx, VulkanDeviceSelection *select)
778 {
779     int err = 0, choice = -1;
780     uint32_t num;
781     VkResult ret;
782     VulkanDevicePriv *p = ctx->internal->priv;
783     VulkanFunctions *vk = &p->vkfn;
784     VkPhysicalDevice *devices = NULL;
785     VkPhysicalDeviceIDProperties *idp = NULL;
786     VkPhysicalDeviceProperties2 *prop = NULL;
787     AVVulkanDeviceContext *hwctx = ctx->hwctx;
788
789     ret = vk->EnumeratePhysicalDevices(hwctx->inst, &num, NULL);
790     if (ret != VK_SUCCESS || !num) {
791         av_log(ctx, AV_LOG_ERROR, "No devices found: %s!\n", vk_ret2str(ret));
792         return AVERROR(ENODEV);
793     }
794
795     devices = av_malloc_array(num, sizeof(VkPhysicalDevice));
796     if (!devices)
797         return AVERROR(ENOMEM);
798
799     ret = vk->EnumeratePhysicalDevices(hwctx->inst, &num, devices);
800     if (ret != VK_SUCCESS) {
801         av_log(ctx, AV_LOG_ERROR, "Failed enumerating devices: %s\n",
802                vk_ret2str(ret));
803         err = AVERROR(ENODEV);
804         goto end;
805     }
806
807     prop = av_mallocz_array(num, sizeof(*prop));
808     if (!prop) {
809         err = AVERROR(ENOMEM);
810         goto end;
811     }
812
813     idp = av_mallocz_array(num, sizeof(*idp));
814     if (!idp) {
815         err = AVERROR(ENOMEM);
816         goto end;
817     }
818
819     av_log(ctx, AV_LOG_VERBOSE, "GPU listing:\n");
820     for (int i = 0; i < num; i++) {
821         idp[i].sType = VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_ID_PROPERTIES;
822         prop[i].sType = VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_PROPERTIES_2;
823         prop[i].pNext = &idp[i];
824
825         vk->GetPhysicalDeviceProperties2(devices[i], &prop[i]);
826         av_log(ctx, AV_LOG_VERBOSE, "    %d: %s (%s) (0x%x)\n", i,
827                prop[i].properties.deviceName,
828                vk_dev_type(prop[i].properties.deviceType),
829                prop[i].properties.deviceID);
830     }
831
832     if (select->has_uuid) {
833         for (int i = 0; i < num; i++) {
834             if (!strncmp(idp[i].deviceUUID, select->uuid, VK_UUID_SIZE)) {
835                 choice = i;
836                 goto end;
837              }
838         }
839         av_log(ctx, AV_LOG_ERROR, "Unable to find device by given UUID!\n");
840         err = AVERROR(ENODEV);
841         goto end;
842     } else if (select->name) {
843         av_log(ctx, AV_LOG_VERBOSE, "Requested device: %s\n", select->name);
844         for (int i = 0; i < num; i++) {
845             if (strstr(prop[i].properties.deviceName, select->name)) {
846                 choice = i;
847                 goto end;
848              }
849         }
850         av_log(ctx, AV_LOG_ERROR, "Unable to find device \"%s\"!\n",
851                select->name);
852         err = AVERROR(ENODEV);
853         goto end;
854     } else if (select->pci_device) {
855         av_log(ctx, AV_LOG_VERBOSE, "Requested device: 0x%x\n", select->pci_device);
856         for (int i = 0; i < num; i++) {
857             if (select->pci_device == prop[i].properties.deviceID) {
858                 choice = i;
859                 goto end;
860             }
861         }
862         av_log(ctx, AV_LOG_ERROR, "Unable to find device with PCI ID 0x%x!\n",
863                select->pci_device);
864         err = AVERROR(EINVAL);
865         goto end;
866     } else if (select->vendor_id) {
867         av_log(ctx, AV_LOG_VERBOSE, "Requested vendor: 0x%x\n", select->vendor_id);
868         for (int i = 0; i < num; i++) {
869             if (select->vendor_id == prop[i].properties.vendorID) {
870                 choice = i;
871                 goto end;
872             }
873         }
874         av_log(ctx, AV_LOG_ERROR, "Unable to find device with Vendor ID 0x%x!\n",
875                select->vendor_id);
876         err = AVERROR(ENODEV);
877         goto end;
878     } else {
879         if (select->index < num) {
880             choice = select->index;
881             goto end;
882         }
883         av_log(ctx, AV_LOG_ERROR, "Unable to find device with index %i!\n",
884                select->index);
885         err = AVERROR(ENODEV);
886         goto end;
887     }
888
889 end:
890     if (choice > -1)
891         hwctx->phys_dev = devices[choice];
892
893     av_free(devices);
894     av_free(prop);
895     av_free(idp);
896
897     return err;
898 }
899
900 static int search_queue_families(AVHWDeviceContext *ctx, VkDeviceCreateInfo *cd)
901 {
902     uint32_t num;
903     float *weights;
904     VkQueueFamilyProperties *qs = NULL;
905     VulkanDevicePriv *p = ctx->internal->priv;
906     VulkanFunctions *vk = &p->vkfn;
907     AVVulkanDeviceContext *hwctx = ctx->hwctx;
908     int graph_index = -1, comp_index = -1, tx_index = -1;
909     VkDeviceQueueCreateInfo *pc = (VkDeviceQueueCreateInfo *)cd->pQueueCreateInfos;
910
911     /* First get the number of queue families */
912     vk->GetPhysicalDeviceQueueFamilyProperties(hwctx->phys_dev, &num, NULL);
913     if (!num) {
914         av_log(ctx, AV_LOG_ERROR, "Failed to get queues!\n");
915         return AVERROR_EXTERNAL;
916     }
917
918     /* Then allocate memory */
919     qs = av_malloc_array(num, sizeof(VkQueueFamilyProperties));
920     if (!qs)
921         return AVERROR(ENOMEM);
922
923     /* Finally retrieve the queue families */
924     vk->GetPhysicalDeviceQueueFamilyProperties(hwctx->phys_dev, &num, qs);
925
926 #define SEARCH_FLAGS(expr, out)                                                \
927     for (int i = 0; i < num; i++) {                                            \
928         const VkQueueFlagBits flags = qs[i].queueFlags;                        \
929         if (expr) {                                                            \
930             out = i;                                                           \
931             break;                                                             \
932         }                                                                      \
933     }
934
935     SEARCH_FLAGS(flags & VK_QUEUE_GRAPHICS_BIT, graph_index)
936
937     SEARCH_FLAGS((flags &  VK_QUEUE_COMPUTE_BIT) && (i != graph_index),
938                  comp_index)
939
940     SEARCH_FLAGS((flags & VK_QUEUE_TRANSFER_BIT) && (i != graph_index) &&
941                  (i != comp_index), tx_index)
942
943 #undef SEARCH_FLAGS
944 #define ADD_QUEUE(fidx, graph, comp, tx)                                                 \
945     av_log(ctx, AV_LOG_VERBOSE, "Using queue family %i (total queues: %i) for %s%s%s\n", \
946            fidx, qs[fidx].queueCount, graph ? "graphics " : "",                          \
947            comp ? "compute " : "", tx ? "transfers " : "");                              \
948     av_log(ctx, AV_LOG_VERBOSE, "    QF %i flags: %s%s%s%s\n", fidx,                     \
949            ((qs[fidx].queueFlags) & VK_QUEUE_GRAPHICS_BIT) ? "(graphics) " : "",         \
950            ((qs[fidx].queueFlags) & VK_QUEUE_COMPUTE_BIT) ? "(compute) " : "",           \
951            ((qs[fidx].queueFlags) & VK_QUEUE_TRANSFER_BIT) ? "(transfers) " : "",        \
952            ((qs[fidx].queueFlags) & VK_QUEUE_SPARSE_BINDING_BIT) ? "(sparse) " : "");    \
953     pc[cd->queueCreateInfoCount].queueFamilyIndex = fidx;                                \
954     pc[cd->queueCreateInfoCount].queueCount = qs[fidx].queueCount;                       \
955     weights = av_malloc(qs[fidx].queueCount * sizeof(float));                            \
956     pc[cd->queueCreateInfoCount].pQueuePriorities = weights;                             \
957     if (!weights)                                                                        \
958         goto fail;                                                                       \
959     for (int i = 0; i < qs[fidx].queueCount; i++)                                        \
960         weights[i] = 1.0f;                                                               \
961     cd->queueCreateInfoCount++;
962
963     ADD_QUEUE(graph_index, 1, comp_index < 0, tx_index < 0 && comp_index < 0)
964     hwctx->queue_family_index      = graph_index;
965     hwctx->queue_family_comp_index = graph_index;
966     hwctx->queue_family_tx_index   = graph_index;
967     hwctx->nb_graphics_queues      = qs[graph_index].queueCount;
968
969     if (comp_index != -1) {
970         ADD_QUEUE(comp_index, 0, 1, tx_index < 0)
971         hwctx->queue_family_tx_index   = comp_index;
972         hwctx->queue_family_comp_index = comp_index;
973         hwctx->nb_comp_queues          = qs[comp_index].queueCount;
974     }
975
976     if (tx_index != -1) {
977         ADD_QUEUE(tx_index, 0, 0, 1)
978         hwctx->queue_family_tx_index = tx_index;
979         hwctx->nb_tx_queues          = qs[tx_index].queueCount;
980     }
981
982 #undef ADD_QUEUE
983     av_free(qs);
984
985     return 0;
986
987 fail:
988     av_freep(&pc[0].pQueuePriorities);
989     av_freep(&pc[1].pQueuePriorities);
990     av_freep(&pc[2].pQueuePriorities);
991     av_free(qs);
992
993     return AVERROR(ENOMEM);
994 }
995
996 static int create_exec_ctx(AVHWFramesContext *hwfc, VulkanExecCtx *cmd,
997                            int queue_family_index, int num_queues)
998 {
999     VkResult ret;
1000     AVVulkanDeviceContext *hwctx = hwfc->device_ctx->hwctx;
1001     VulkanDevicePriv *p = hwfc->device_ctx->internal->priv;
1002     VulkanFunctions *vk = &p->vkfn;
1003
1004     VkCommandPoolCreateInfo cqueue_create = {
1005         .sType              = VK_STRUCTURE_TYPE_COMMAND_POOL_CREATE_INFO,
1006         .flags              = VK_COMMAND_POOL_CREATE_RESET_COMMAND_BUFFER_BIT,
1007         .queueFamilyIndex   = queue_family_index,
1008     };
1009     VkCommandBufferAllocateInfo cbuf_create = {
1010         .sType              = VK_STRUCTURE_TYPE_COMMAND_BUFFER_ALLOCATE_INFO,
1011         .level              = VK_COMMAND_BUFFER_LEVEL_PRIMARY,
1012         .commandBufferCount = num_queues,
1013     };
1014
1015     cmd->nb_queues = num_queues;
1016
1017     /* Create command pool */
1018     ret = vk->CreateCommandPool(hwctx->act_dev, &cqueue_create,
1019                                 hwctx->alloc, &cmd->pool);
1020     if (ret != VK_SUCCESS) {
1021         av_log(hwfc, AV_LOG_ERROR, "Command pool creation failure: %s\n",
1022                vk_ret2str(ret));
1023         return AVERROR_EXTERNAL;
1024     }
1025
1026     cmd->bufs = av_mallocz(num_queues * sizeof(*cmd->bufs));
1027     if (!cmd->bufs)
1028         return AVERROR(ENOMEM);
1029
1030     cbuf_create.commandPool = cmd->pool;
1031
1032     /* Allocate command buffer */
1033     ret = vk->AllocateCommandBuffers(hwctx->act_dev, &cbuf_create, cmd->bufs);
1034     if (ret != VK_SUCCESS) {
1035         av_log(hwfc, AV_LOG_ERROR, "Command buffer alloc failure: %s\n",
1036                vk_ret2str(ret));
1037         av_freep(&cmd->bufs);
1038         return AVERROR_EXTERNAL;
1039     }
1040
1041     cmd->queues = av_mallocz(num_queues * sizeof(*cmd->queues));
1042     if (!cmd->queues)
1043         return AVERROR(ENOMEM);
1044
1045     for (int i = 0; i < num_queues; i++) {
1046         VulkanQueueCtx *q = &cmd->queues[i];
1047         vk->GetDeviceQueue(hwctx->act_dev, queue_family_index, i, &q->queue);
1048         q->was_synchronous = 1;
1049     }
1050
1051     return 0;
1052 }
1053
1054 static void free_exec_ctx(AVHWFramesContext *hwfc, VulkanExecCtx *cmd)
1055 {
1056     AVVulkanDeviceContext *hwctx = hwfc->device_ctx->hwctx;
1057     VulkanDevicePriv *p = hwfc->device_ctx->internal->priv;
1058     VulkanFunctions *vk = &p->vkfn;
1059
1060     if (cmd->queues) {
1061         for (int i = 0; i < cmd->nb_queues; i++) {
1062             VulkanQueueCtx *q = &cmd->queues[i];
1063
1064             /* Make sure all queues have finished executing */
1065             if (q->fence && !q->was_synchronous) {
1066                 vk->WaitForFences(hwctx->act_dev, 1, &q->fence, VK_TRUE, UINT64_MAX);
1067                 vk->ResetFences(hwctx->act_dev, 1, &q->fence);
1068             }
1069
1070             /* Free the fence */
1071             if (q->fence)
1072                 vk->DestroyFence(hwctx->act_dev, q->fence, hwctx->alloc);
1073
1074             /* Free buffer dependencies */
1075             for (int j = 0; j < q->nb_buf_deps; j++)
1076                 av_buffer_unref(&q->buf_deps[j]);
1077             av_free(q->buf_deps);
1078         }
1079     }
1080
1081     if (cmd->bufs)
1082         vk->FreeCommandBuffers(hwctx->act_dev, cmd->pool, cmd->nb_queues, cmd->bufs);
1083     if (cmd->pool)
1084         vk->DestroyCommandPool(hwctx->act_dev, cmd->pool, hwctx->alloc);
1085
1086     av_freep(&cmd->queues);
1087     av_freep(&cmd->bufs);
1088     cmd->pool = NULL;
1089 }
1090
1091 static VkCommandBuffer get_buf_exec_ctx(AVHWFramesContext *hwfc, VulkanExecCtx *cmd)
1092 {
1093     return cmd->bufs[cmd->cur_queue_idx];
1094 }
1095
1096 static void unref_exec_ctx_deps(AVHWFramesContext *hwfc, VulkanExecCtx *cmd)
1097 {
1098     VulkanQueueCtx *q = &cmd->queues[cmd->cur_queue_idx];
1099
1100     for (int j = 0; j < q->nb_buf_deps; j++)
1101         av_buffer_unref(&q->buf_deps[j]);
1102     q->nb_buf_deps = 0;
1103 }
1104
1105 static int wait_start_exec_ctx(AVHWFramesContext *hwfc, VulkanExecCtx *cmd)
1106 {
1107     VkResult ret;
1108     AVVulkanDeviceContext *hwctx = hwfc->device_ctx->hwctx;
1109     VulkanQueueCtx *q = &cmd->queues[cmd->cur_queue_idx];
1110     VulkanDevicePriv *p = hwfc->device_ctx->internal->priv;
1111     VulkanFunctions *vk = &p->vkfn;
1112
1113     VkCommandBufferBeginInfo cmd_start = {
1114         .sType = VK_STRUCTURE_TYPE_COMMAND_BUFFER_BEGIN_INFO,
1115         .flags = VK_COMMAND_BUFFER_USAGE_ONE_TIME_SUBMIT_BIT,
1116     };
1117
1118     /* Create the fence and don't wait for it initially */
1119     if (!q->fence) {
1120         VkFenceCreateInfo fence_spawn = {
1121             .sType = VK_STRUCTURE_TYPE_FENCE_CREATE_INFO,
1122         };
1123         ret = vk->CreateFence(hwctx->act_dev, &fence_spawn, hwctx->alloc,
1124                               &q->fence);
1125         if (ret != VK_SUCCESS) {
1126             av_log(hwfc, AV_LOG_ERROR, "Failed to queue frame fence: %s\n",
1127                    vk_ret2str(ret));
1128             return AVERROR_EXTERNAL;
1129         }
1130     } else if (!q->was_synchronous) {
1131         vk->WaitForFences(hwctx->act_dev, 1, &q->fence, VK_TRUE, UINT64_MAX);
1132         vk->ResetFences(hwctx->act_dev, 1, &q->fence);
1133     }
1134
1135     /* Discard queue dependencies */
1136     unref_exec_ctx_deps(hwfc, cmd);
1137
1138     ret = vk->BeginCommandBuffer(cmd->bufs[cmd->cur_queue_idx], &cmd_start);
1139     if (ret != VK_SUCCESS) {
1140         av_log(hwfc, AV_LOG_ERROR, "Unable to init command buffer: %s\n",
1141                vk_ret2str(ret));
1142         return AVERROR_EXTERNAL;
1143     }
1144
1145     return 0;
1146 }
1147
1148 static int add_buf_dep_exec_ctx(AVHWFramesContext *hwfc, VulkanExecCtx *cmd,
1149                                 AVBufferRef * const *deps, int nb_deps)
1150 {
1151     AVBufferRef **dst;
1152     VulkanQueueCtx *q = &cmd->queues[cmd->cur_queue_idx];
1153
1154     if (!deps || !nb_deps)
1155         return 0;
1156
1157     dst = av_fast_realloc(q->buf_deps, &q->buf_deps_alloc_size,
1158                           (q->nb_buf_deps + nb_deps) * sizeof(*dst));
1159     if (!dst)
1160         goto err;
1161
1162     q->buf_deps = dst;
1163
1164     for (int i = 0; i < nb_deps; i++) {
1165         q->buf_deps[q->nb_buf_deps] = av_buffer_ref(deps[i]);
1166         if (!q->buf_deps[q->nb_buf_deps])
1167             goto err;
1168         q->nb_buf_deps++;
1169     }
1170
1171     return 0;
1172
1173 err:
1174     unref_exec_ctx_deps(hwfc, cmd);
1175     return AVERROR(ENOMEM);
1176 }
1177
1178 static int submit_exec_ctx(AVHWFramesContext *hwfc, VulkanExecCtx *cmd,
1179                            VkSubmitInfo *s_info, int synchronous)
1180 {
1181     VkResult ret;
1182     VulkanQueueCtx *q = &cmd->queues[cmd->cur_queue_idx];
1183     VulkanDevicePriv *p = hwfc->device_ctx->internal->priv;
1184     VulkanFunctions *vk = &p->vkfn;
1185
1186     ret = vk->EndCommandBuffer(cmd->bufs[cmd->cur_queue_idx]);
1187     if (ret != VK_SUCCESS) {
1188         av_log(hwfc, AV_LOG_ERROR, "Unable to finish command buffer: %s\n",
1189                vk_ret2str(ret));
1190         unref_exec_ctx_deps(hwfc, cmd);
1191         return AVERROR_EXTERNAL;
1192     }
1193
1194     s_info->pCommandBuffers = &cmd->bufs[cmd->cur_queue_idx];
1195     s_info->commandBufferCount = 1;
1196
1197     ret = vk->QueueSubmit(q->queue, 1, s_info, q->fence);
1198     if (ret != VK_SUCCESS) {
1199         unref_exec_ctx_deps(hwfc, cmd);
1200         return AVERROR_EXTERNAL;
1201     }
1202
1203     q->was_synchronous = synchronous;
1204
1205     if (synchronous) {
1206         AVVulkanDeviceContext *hwctx = hwfc->device_ctx->hwctx;
1207         vk->WaitForFences(hwctx->act_dev, 1, &q->fence, VK_TRUE, UINT64_MAX);
1208         vk->ResetFences(hwctx->act_dev, 1, &q->fence);
1209         unref_exec_ctx_deps(hwfc, cmd);
1210     } else { /* Rotate queues */
1211         cmd->cur_queue_idx = (cmd->cur_queue_idx + 1) % cmd->nb_queues;
1212     }
1213
1214     return 0;
1215 }
1216
1217 static void vulkan_device_free(AVHWDeviceContext *ctx)
1218 {
1219     VulkanDevicePriv *p = ctx->internal->priv;
1220     VulkanFunctions *vk = &p->vkfn;
1221     AVVulkanDeviceContext *hwctx = ctx->hwctx;
1222
1223     vk->DestroyDevice(hwctx->act_dev, hwctx->alloc);
1224
1225     if (p->debug_ctx)
1226         vk->DestroyDebugUtilsMessengerEXT(hwctx->inst, p->debug_ctx,
1227                                           hwctx->alloc);
1228
1229     vk->DestroyInstance(hwctx->inst, hwctx->alloc);
1230
1231     if (p->libvulkan)
1232         dlclose(p->libvulkan);
1233
1234     for (int i = 0; i < hwctx->nb_enabled_inst_extensions; i++)
1235         av_free((void *)hwctx->enabled_inst_extensions[i]);
1236     av_free((void *)hwctx->enabled_inst_extensions);
1237
1238     for (int i = 0; i < hwctx->nb_enabled_dev_extensions; i++)
1239         av_free((void *)hwctx->enabled_dev_extensions[i]);
1240     av_free((void *)hwctx->enabled_dev_extensions);
1241 }
1242
1243 static int vulkan_device_create_internal(AVHWDeviceContext *ctx,
1244                                          VulkanDeviceSelection *dev_select,
1245                                          AVDictionary *opts, int flags)
1246 {
1247     int err = 0;
1248     VkResult ret;
1249     AVDictionaryEntry *opt_d;
1250     VulkanDevicePriv *p = ctx->internal->priv;
1251     VulkanFunctions *vk = &p->vkfn;
1252     AVVulkanDeviceContext *hwctx = ctx->hwctx;
1253     VkPhysicalDeviceFeatures dev_features = { 0 };
1254     VkDeviceQueueCreateInfo queue_create_info[3] = {
1255         { .sType = VK_STRUCTURE_TYPE_DEVICE_QUEUE_CREATE_INFO, },
1256         { .sType = VK_STRUCTURE_TYPE_DEVICE_QUEUE_CREATE_INFO, },
1257         { .sType = VK_STRUCTURE_TYPE_DEVICE_QUEUE_CREATE_INFO, },
1258     };
1259
1260     VkDeviceCreateInfo dev_info = {
1261         .sType                = VK_STRUCTURE_TYPE_DEVICE_CREATE_INFO,
1262         .pNext                = &hwctx->device_features,
1263         .pQueueCreateInfos    = queue_create_info,
1264         .queueCreateInfoCount = 0,
1265     };
1266
1267     hwctx->device_features.sType = VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_FEATURES_2;
1268     ctx->free = vulkan_device_free;
1269
1270     /* Create an instance if not given one */
1271     if ((err = create_instance(ctx, opts)))
1272         goto end;
1273
1274     /* Find a device (if not given one) */
1275     if ((err = find_device(ctx, dev_select)))
1276         goto end;
1277
1278     vk->GetPhysicalDeviceFeatures(hwctx->phys_dev, &dev_features);
1279
1280     /* Try to keep in sync with libplacebo */
1281 #define COPY_FEATURE(DST, NAME) (DST).features.NAME = dev_features.NAME;
1282     COPY_FEATURE(hwctx->device_features, shaderImageGatherExtended)
1283     COPY_FEATURE(hwctx->device_features, shaderStorageImageReadWithoutFormat)
1284     COPY_FEATURE(hwctx->device_features, shaderStorageImageWriteWithoutFormat)
1285     COPY_FEATURE(hwctx->device_features, fragmentStoresAndAtomics)
1286     COPY_FEATURE(hwctx->device_features, vertexPipelineStoresAndAtomics)
1287     COPY_FEATURE(hwctx->device_features, shaderInt64)
1288 #undef COPY_FEATURE
1289
1290     /* Search queue family */
1291     if ((err = search_queue_families(ctx, &dev_info)))
1292         goto end;
1293
1294     if ((err = check_extensions(ctx, 1, opts, &dev_info.ppEnabledExtensionNames,
1295                                 &dev_info.enabledExtensionCount, 0))) {
1296         av_free((void *)queue_create_info[0].pQueuePriorities);
1297         av_free((void *)queue_create_info[1].pQueuePriorities);
1298         av_free((void *)queue_create_info[2].pQueuePriorities);
1299         goto end;
1300     }
1301
1302     ret = vk->CreateDevice(hwctx->phys_dev, &dev_info, hwctx->alloc,
1303                            &hwctx->act_dev);
1304
1305     av_free((void *)queue_create_info[0].pQueuePriorities);
1306     av_free((void *)queue_create_info[1].pQueuePriorities);
1307     av_free((void *)queue_create_info[2].pQueuePriorities);
1308
1309     if (ret != VK_SUCCESS) {
1310         av_log(ctx, AV_LOG_ERROR, "Device creation failure: %s\n",
1311                vk_ret2str(ret));
1312         for (int i = 0; i < dev_info.enabledExtensionCount; i++)
1313             av_free((void *)dev_info.ppEnabledExtensionNames[i]);
1314         av_free((void *)dev_info.ppEnabledExtensionNames);
1315         err = AVERROR_EXTERNAL;
1316         goto end;
1317     }
1318
1319     /* Tiled images setting, use them by default */
1320     opt_d = av_dict_get(opts, "linear_images", NULL, 0);
1321     if (opt_d)
1322         p->use_linear_images = strtol(opt_d->value, NULL, 10);
1323
1324     hwctx->enabled_dev_extensions = dev_info.ppEnabledExtensionNames;
1325     hwctx->nb_enabled_dev_extensions = dev_info.enabledExtensionCount;
1326
1327 end:
1328     return err;
1329 }
1330
1331 static int vulkan_device_init(AVHWDeviceContext *ctx)
1332 {
1333     int err;
1334     uint32_t queue_num;
1335     AVVulkanDeviceContext *hwctx = ctx->hwctx;
1336     VulkanDevicePriv *p = ctx->internal->priv;
1337     VulkanFunctions *vk = &p->vkfn;
1338
1339     /* Set device extension flags */
1340     for (int i = 0; i < hwctx->nb_enabled_dev_extensions; i++) {
1341         for (int j = 0; j < FF_ARRAY_ELEMS(optional_device_exts); j++) {
1342             if (!strcmp(hwctx->enabled_dev_extensions[i],
1343                         optional_device_exts[j].name)) {
1344                 av_log(ctx, AV_LOG_VERBOSE, "Using device extension %s\n",
1345                        hwctx->enabled_dev_extensions[i]);
1346                 p->extensions |= optional_device_exts[j].flag;
1347                 break;
1348             }
1349         }
1350     }
1351
1352     err = load_functions(ctx, 1, 0);
1353     if (err < 0) {
1354         av_log(ctx, AV_LOG_ERROR, "Unable to load functions!\n");
1355         return err;
1356     }
1357
1358     p->props.sType = VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_PROPERTIES_2;
1359     p->props.pNext = &p->hprops;
1360     p->hprops.sType = VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_EXTERNAL_MEMORY_HOST_PROPERTIES_EXT;
1361
1362     vk->GetPhysicalDeviceProperties2(hwctx->phys_dev, &p->props);
1363     av_log(ctx, AV_LOG_VERBOSE, "Using device: %s\n",
1364            p->props.properties.deviceName);
1365     av_log(ctx, AV_LOG_VERBOSE, "Alignments:\n");
1366     av_log(ctx, AV_LOG_VERBOSE, "    optimalBufferCopyRowPitchAlignment: %"PRIu64"\n",
1367            p->props.properties.limits.optimalBufferCopyRowPitchAlignment);
1368     av_log(ctx, AV_LOG_VERBOSE, "    minMemoryMapAlignment:              %"SIZE_SPECIFIER"\n",
1369            p->props.properties.limits.minMemoryMapAlignment);
1370     if (p->extensions & EXT_EXTERNAL_HOST_MEMORY)
1371         av_log(ctx, AV_LOG_VERBOSE, "    minImportedHostPointerAlignment:    %"PRIu64"\n",
1372                p->hprops.minImportedHostPointerAlignment);
1373
1374     p->dev_is_nvidia = (p->props.properties.vendorID == 0x10de);
1375
1376     vk->GetPhysicalDeviceQueueFamilyProperties(hwctx->phys_dev, &queue_num, NULL);
1377     if (!queue_num) {
1378         av_log(ctx, AV_LOG_ERROR, "Failed to get queues!\n");
1379         return AVERROR_EXTERNAL;
1380     }
1381
1382 #define CHECK_QUEUE(type, n)                                                         \
1383 if (n >= queue_num) {                                                                \
1384     av_log(ctx, AV_LOG_ERROR, "Invalid %s queue index %i (device has %i queues)!\n", \
1385            type, n, queue_num);                                                      \
1386     return AVERROR(EINVAL);                                                          \
1387 }
1388
1389     CHECK_QUEUE("graphics", hwctx->queue_family_index)
1390     CHECK_QUEUE("upload",   hwctx->queue_family_tx_index)
1391     CHECK_QUEUE("compute",  hwctx->queue_family_comp_index)
1392
1393 #undef CHECK_QUEUE
1394
1395     p->qfs[p->num_qfs++] = hwctx->queue_family_index;
1396     if ((hwctx->queue_family_tx_index != hwctx->queue_family_index) &&
1397         (hwctx->queue_family_tx_index != hwctx->queue_family_comp_index))
1398         p->qfs[p->num_qfs++] = hwctx->queue_family_tx_index;
1399     if ((hwctx->queue_family_comp_index != hwctx->queue_family_index) &&
1400         (hwctx->queue_family_comp_index != hwctx->queue_family_tx_index))
1401         p->qfs[p->num_qfs++] = hwctx->queue_family_comp_index;
1402
1403     /* Get device capabilities */
1404     vk->GetPhysicalDeviceMemoryProperties(hwctx->phys_dev, &p->mprops);
1405
1406     return 0;
1407 }
1408
1409 static int vulkan_device_create(AVHWDeviceContext *ctx, const char *device,
1410                                 AVDictionary *opts, int flags)
1411 {
1412     VulkanDeviceSelection dev_select = { 0 };
1413     if (device && device[0]) {
1414         char *end = NULL;
1415         dev_select.index = strtol(device, &end, 10);
1416         if (end == device) {
1417             dev_select.index = 0;
1418             dev_select.name  = device;
1419         }
1420     }
1421
1422     return vulkan_device_create_internal(ctx, &dev_select, opts, flags);
1423 }
1424
1425 static int vulkan_device_derive(AVHWDeviceContext *ctx,
1426                                 AVHWDeviceContext *src_ctx,
1427                                 AVDictionary *opts, int flags)
1428 {
1429     av_unused VulkanDeviceSelection dev_select = { 0 };
1430
1431     /* If there's only one device on the system, then even if its not covered
1432      * by the following checks (e.g. non-PCIe ARM GPU), having an empty
1433      * dev_select will mean it'll get picked. */
1434     switch(src_ctx->type) {
1435 #if CONFIG_LIBDRM
1436 #if CONFIG_VAAPI
1437     case AV_HWDEVICE_TYPE_VAAPI: {
1438         AVVAAPIDeviceContext *src_hwctx = src_ctx->hwctx;
1439
1440         const char *vendor = vaQueryVendorString(src_hwctx->display);
1441         if (!vendor) {
1442             av_log(ctx, AV_LOG_ERROR, "Unable to get device info from VAAPI!\n");
1443             return AVERROR_EXTERNAL;
1444         }
1445
1446         if (strstr(vendor, "Intel"))
1447             dev_select.vendor_id = 0x8086;
1448         if (strstr(vendor, "AMD"))
1449             dev_select.vendor_id = 0x1002;
1450
1451         return vulkan_device_create_internal(ctx, &dev_select, opts, flags);
1452     }
1453 #endif
1454     case AV_HWDEVICE_TYPE_DRM: {
1455         AVDRMDeviceContext *src_hwctx = src_ctx->hwctx;
1456
1457         drmDevice *drm_dev_info;
1458         int err = drmGetDevice(src_hwctx->fd, &drm_dev_info);
1459         if (err) {
1460             av_log(ctx, AV_LOG_ERROR, "Unable to get device info from DRM fd!\n");
1461             return AVERROR_EXTERNAL;
1462         }
1463
1464         if (drm_dev_info->bustype == DRM_BUS_PCI)
1465             dev_select.pci_device = drm_dev_info->deviceinfo.pci->device_id;
1466
1467         drmFreeDevice(&drm_dev_info);
1468
1469         return vulkan_device_create_internal(ctx, &dev_select, opts, flags);
1470     }
1471 #endif
1472 #if CONFIG_CUDA
1473     case AV_HWDEVICE_TYPE_CUDA: {
1474         AVHWDeviceContext *cuda_cu = src_ctx;
1475         AVCUDADeviceContext *src_hwctx = src_ctx->hwctx;
1476         AVCUDADeviceContextInternal *cu_internal = src_hwctx->internal;
1477         CudaFunctions *cu = cu_internal->cuda_dl;
1478
1479         int ret = CHECK_CU(cu->cuDeviceGetUuid((CUuuid *)&dev_select.uuid,
1480                                                cu_internal->cuda_device));
1481         if (ret < 0) {
1482             av_log(ctx, AV_LOG_ERROR, "Unable to get UUID from CUDA!\n");
1483             return AVERROR_EXTERNAL;
1484         }
1485
1486         dev_select.has_uuid = 1;
1487
1488         return vulkan_device_create_internal(ctx, &dev_select, opts, flags);
1489     }
1490 #endif
1491     default:
1492         return AVERROR(ENOSYS);
1493     }
1494 }
1495
1496 static int vulkan_frames_get_constraints(AVHWDeviceContext *ctx,
1497                                          const void *hwconfig,
1498                                          AVHWFramesConstraints *constraints)
1499 {
1500     int count = 0;
1501     VulkanDevicePriv *p = ctx->internal->priv;
1502
1503     for (enum AVPixelFormat i = 0; i < AV_PIX_FMT_NB; i++)
1504         count += pixfmt_is_supported(ctx, i, p->use_linear_images);
1505
1506 #if CONFIG_CUDA
1507     if (p->dev_is_nvidia)
1508         count++;
1509 #endif
1510
1511     constraints->valid_sw_formats = av_malloc_array(count + 1,
1512                                                     sizeof(enum AVPixelFormat));
1513     if (!constraints->valid_sw_formats)
1514         return AVERROR(ENOMEM);
1515
1516     count = 0;
1517     for (enum AVPixelFormat i = 0; i < AV_PIX_FMT_NB; i++)
1518         if (pixfmt_is_supported(ctx, i, p->use_linear_images))
1519             constraints->valid_sw_formats[count++] = i;
1520
1521 #if CONFIG_CUDA
1522     if (p->dev_is_nvidia)
1523         constraints->valid_sw_formats[count++] = AV_PIX_FMT_CUDA;
1524 #endif
1525     constraints->valid_sw_formats[count++] = AV_PIX_FMT_NONE;
1526
1527     constraints->min_width  = 0;
1528     constraints->min_height = 0;
1529     constraints->max_width  = p->props.properties.limits.maxImageDimension2D;
1530     constraints->max_height = p->props.properties.limits.maxImageDimension2D;
1531
1532     constraints->valid_hw_formats = av_malloc_array(2, sizeof(enum AVPixelFormat));
1533     if (!constraints->valid_hw_formats)
1534         return AVERROR(ENOMEM);
1535
1536     constraints->valid_hw_formats[0] = AV_PIX_FMT_VULKAN;
1537     constraints->valid_hw_formats[1] = AV_PIX_FMT_NONE;
1538
1539     return 0;
1540 }
1541
1542 static int alloc_mem(AVHWDeviceContext *ctx, VkMemoryRequirements *req,
1543                      VkMemoryPropertyFlagBits req_flags, const void *alloc_extension,
1544                      VkMemoryPropertyFlagBits *mem_flags, VkDeviceMemory *mem)
1545 {
1546     VkResult ret;
1547     int index = -1;
1548     VulkanDevicePriv *p = ctx->internal->priv;
1549     VulkanFunctions *vk = &p->vkfn;
1550     AVVulkanDeviceContext *dev_hwctx = ctx->hwctx;
1551     VkMemoryAllocateInfo alloc_info = {
1552         .sType          = VK_STRUCTURE_TYPE_MEMORY_ALLOCATE_INFO,
1553         .pNext          = alloc_extension,
1554         .allocationSize = req->size,
1555     };
1556
1557     /* The vulkan spec requires memory types to be sorted in the "optimal"
1558      * order, so the first matching type we find will be the best/fastest one */
1559     for (int i = 0; i < p->mprops.memoryTypeCount; i++) {
1560         const VkMemoryType *type = &p->mprops.memoryTypes[i];
1561
1562         /* The memory type must be supported by the requirements (bitfield) */
1563         if (!(req->memoryTypeBits & (1 << i)))
1564             continue;
1565
1566         /* The memory type flags must include our properties */
1567         if ((type->propertyFlags & req_flags) != req_flags)
1568             continue;
1569
1570         /* The memory type must be large enough */
1571         if (req->size > p->mprops.memoryHeaps[type->heapIndex].size)
1572             continue;
1573
1574         /* Found a suitable memory type */
1575         index = i;
1576         break;
1577     }
1578
1579     if (index < 0) {
1580         av_log(ctx, AV_LOG_ERROR, "No memory type found for flags 0x%x\n",
1581                req_flags);
1582         return AVERROR(EINVAL);
1583     }
1584
1585     alloc_info.memoryTypeIndex = index;
1586
1587     ret = vk->AllocateMemory(dev_hwctx->act_dev, &alloc_info,
1588                              dev_hwctx->alloc, mem);
1589     if (ret != VK_SUCCESS) {
1590         av_log(ctx, AV_LOG_ERROR, "Failed to allocate memory: %s\n",
1591                vk_ret2str(ret));
1592         return AVERROR(ENOMEM);
1593     }
1594
1595     *mem_flags |= p->mprops.memoryTypes[index].propertyFlags;
1596
1597     return 0;
1598 }
1599
1600 static void vulkan_free_internal(AVVkFrameInternal *internal)
1601 {
1602     if (!internal)
1603         return;
1604
1605 #if CONFIG_CUDA
1606     if (internal->cuda_fc_ref) {
1607         AVHWFramesContext *cuda_fc = (AVHWFramesContext *)internal->cuda_fc_ref->data;
1608         int planes = av_pix_fmt_count_planes(cuda_fc->sw_format);
1609         AVHWDeviceContext *cuda_cu = cuda_fc->device_ctx;
1610         AVCUDADeviceContext *cuda_dev = cuda_cu->hwctx;
1611         AVCUDADeviceContextInternal *cu_internal = cuda_dev->internal;
1612         CudaFunctions *cu = cu_internal->cuda_dl;
1613
1614         for (int i = 0; i < planes; i++) {
1615             if (internal->cu_sem[i])
1616                 CHECK_CU(cu->cuDestroyExternalSemaphore(internal->cu_sem[i]));
1617             if (internal->cu_mma[i])
1618                 CHECK_CU(cu->cuMipmappedArrayDestroy(internal->cu_mma[i]));
1619             if (internal->ext_mem[i])
1620                 CHECK_CU(cu->cuDestroyExternalMemory(internal->ext_mem[i]));
1621         }
1622
1623         av_buffer_unref(&internal->cuda_fc_ref);
1624     }
1625 #endif
1626
1627     av_free(internal);
1628 }
1629
1630 static void vulkan_frame_free(void *opaque, uint8_t *data)
1631 {
1632     AVVkFrame *f = (AVVkFrame *)data;
1633     AVHWFramesContext *hwfc = opaque;
1634     AVVulkanDeviceContext *hwctx = hwfc->device_ctx->hwctx;
1635     VulkanDevicePriv *p = hwfc->device_ctx->internal->priv;
1636     VulkanFunctions *vk = &p->vkfn;
1637     int planes = av_pix_fmt_count_planes(hwfc->sw_format);
1638
1639     vulkan_free_internal(f->internal);
1640
1641     for (int i = 0; i < planes; i++) {
1642         vk->DestroyImage(hwctx->act_dev, f->img[i], hwctx->alloc);
1643         vk->FreeMemory(hwctx->act_dev, f->mem[i], hwctx->alloc);
1644         vk->DestroySemaphore(hwctx->act_dev, f->sem[i], hwctx->alloc);
1645     }
1646
1647     av_free(f);
1648 }
1649
1650 static int alloc_bind_mem(AVHWFramesContext *hwfc, AVVkFrame *f,
1651                           void *alloc_pnext, size_t alloc_pnext_stride)
1652 {
1653     int err;
1654     VkResult ret;
1655     AVHWDeviceContext *ctx = hwfc->device_ctx;
1656     VulkanDevicePriv *p = ctx->internal->priv;
1657     VulkanFunctions *vk = &p->vkfn;
1658     const int planes = av_pix_fmt_count_planes(hwfc->sw_format);
1659     VkBindImageMemoryInfo bind_info[AV_NUM_DATA_POINTERS] = { { 0 } };
1660
1661     AVVulkanDeviceContext *hwctx = ctx->hwctx;
1662
1663     for (int i = 0; i < planes; i++) {
1664         int use_ded_mem;
1665         VkImageMemoryRequirementsInfo2 req_desc = {
1666             .sType = VK_STRUCTURE_TYPE_IMAGE_MEMORY_REQUIREMENTS_INFO_2,
1667             .image = f->img[i],
1668         };
1669         VkMemoryDedicatedAllocateInfo ded_alloc = {
1670             .sType = VK_STRUCTURE_TYPE_MEMORY_DEDICATED_ALLOCATE_INFO,
1671             .pNext = (void *)(((uint8_t *)alloc_pnext) + i*alloc_pnext_stride),
1672         };
1673         VkMemoryDedicatedRequirements ded_req = {
1674             .sType = VK_STRUCTURE_TYPE_MEMORY_DEDICATED_REQUIREMENTS,
1675         };
1676         VkMemoryRequirements2 req = {
1677             .sType = VK_STRUCTURE_TYPE_MEMORY_REQUIREMENTS_2,
1678             .pNext = &ded_req,
1679         };
1680
1681         vk->GetImageMemoryRequirements2(hwctx->act_dev, &req_desc, &req);
1682
1683         if (f->tiling == VK_IMAGE_TILING_LINEAR)
1684             req.memoryRequirements.size = FFALIGN(req.memoryRequirements.size,
1685                                                   p->props.properties.limits.minMemoryMapAlignment);
1686
1687         /* In case the implementation prefers/requires dedicated allocation */
1688         use_ded_mem = ded_req.prefersDedicatedAllocation |
1689                       ded_req.requiresDedicatedAllocation;
1690         if (use_ded_mem)
1691             ded_alloc.image = f->img[i];
1692
1693         /* Allocate memory */
1694         if ((err = alloc_mem(ctx, &req.memoryRequirements,
1695                              f->tiling == VK_IMAGE_TILING_LINEAR ?
1696                              VK_MEMORY_PROPERTY_HOST_VISIBLE_BIT :
1697                              VK_MEMORY_PROPERTY_DEVICE_LOCAL_BIT,
1698                              use_ded_mem ? &ded_alloc : (void *)ded_alloc.pNext,
1699                              &f->flags, &f->mem[i])))
1700             return err;
1701
1702         f->size[i] = req.memoryRequirements.size;
1703         bind_info[i].sType  = VK_STRUCTURE_TYPE_BIND_IMAGE_MEMORY_INFO;
1704         bind_info[i].image  = f->img[i];
1705         bind_info[i].memory = f->mem[i];
1706     }
1707
1708     /* Bind the allocated memory to the images */
1709     ret = vk->BindImageMemory2(hwctx->act_dev, planes, bind_info);
1710     if (ret != VK_SUCCESS) {
1711         av_log(ctx, AV_LOG_ERROR, "Failed to bind memory: %s\n",
1712                vk_ret2str(ret));
1713         return AVERROR_EXTERNAL;
1714     }
1715
1716     return 0;
1717 }
1718
1719 enum PrepMode {
1720     PREP_MODE_WRITE,
1721     PREP_MODE_RO_SHADER,
1722     PREP_MODE_EXTERNAL_EXPORT,
1723 };
1724
1725 static int prepare_frame(AVHWFramesContext *hwfc, VulkanExecCtx *ectx,
1726                          AVVkFrame *frame, enum PrepMode pmode)
1727 {
1728     int err;
1729     uint32_t dst_qf;
1730     VkImageLayout new_layout;
1731     VkAccessFlags new_access;
1732     const int planes = av_pix_fmt_count_planes(hwfc->sw_format);
1733     VulkanDevicePriv *p = hwfc->device_ctx->internal->priv;
1734     VulkanFunctions *vk = &p->vkfn;
1735
1736     VkImageMemoryBarrier img_bar[AV_NUM_DATA_POINTERS] = { 0 };
1737
1738     VkSubmitInfo s_info = {
1739         .sType                = VK_STRUCTURE_TYPE_SUBMIT_INFO,
1740         .pSignalSemaphores    = frame->sem,
1741         .signalSemaphoreCount = planes,
1742     };
1743
1744     VkPipelineStageFlagBits wait_st[AV_NUM_DATA_POINTERS];
1745     for (int i = 0; i < planes; i++)
1746         wait_st[i] = VK_PIPELINE_STAGE_TOP_OF_PIPE_BIT;
1747
1748     switch (pmode) {
1749     case PREP_MODE_WRITE:
1750         new_layout = VK_IMAGE_LAYOUT_TRANSFER_DST_OPTIMAL;
1751         new_access = VK_ACCESS_TRANSFER_WRITE_BIT;
1752         dst_qf     = VK_QUEUE_FAMILY_IGNORED;
1753         break;
1754     case PREP_MODE_RO_SHADER:
1755         new_layout = VK_IMAGE_LAYOUT_TRANSFER_SRC_OPTIMAL;
1756         new_access = VK_ACCESS_TRANSFER_READ_BIT;
1757         dst_qf     = VK_QUEUE_FAMILY_IGNORED;
1758         break;
1759     case PREP_MODE_EXTERNAL_EXPORT:
1760         new_layout = VK_IMAGE_LAYOUT_GENERAL;
1761         new_access = VK_ACCESS_MEMORY_READ_BIT | VK_ACCESS_MEMORY_WRITE_BIT;
1762         dst_qf     = VK_QUEUE_FAMILY_EXTERNAL_KHR;
1763         s_info.pWaitSemaphores = frame->sem;
1764         s_info.pWaitDstStageMask = wait_st;
1765         s_info.waitSemaphoreCount = planes;
1766         break;
1767     }
1768
1769     if ((err = wait_start_exec_ctx(hwfc, ectx)))
1770         return err;
1771
1772     /* Change the image layout to something more optimal for writes.
1773      * This also signals the newly created semaphore, making it usable
1774      * for synchronization */
1775     for (int i = 0; i < planes; i++) {
1776         img_bar[i].sType = VK_STRUCTURE_TYPE_IMAGE_MEMORY_BARRIER;
1777         img_bar[i].srcAccessMask = 0x0;
1778         img_bar[i].dstAccessMask = new_access;
1779         img_bar[i].oldLayout = frame->layout[i];
1780         img_bar[i].newLayout = new_layout;
1781         img_bar[i].srcQueueFamilyIndex = VK_QUEUE_FAMILY_IGNORED;
1782         img_bar[i].dstQueueFamilyIndex = dst_qf;
1783         img_bar[i].image = frame->img[i];
1784         img_bar[i].subresourceRange.levelCount = 1;
1785         img_bar[i].subresourceRange.layerCount = 1;
1786         img_bar[i].subresourceRange.aspectMask = VK_IMAGE_ASPECT_COLOR_BIT;
1787
1788         frame->layout[i] = img_bar[i].newLayout;
1789         frame->access[i] = img_bar[i].dstAccessMask;
1790     }
1791
1792     vk->CmdPipelineBarrier(get_buf_exec_ctx(hwfc, ectx),
1793                            VK_PIPELINE_STAGE_TOP_OF_PIPE_BIT,
1794                            VK_PIPELINE_STAGE_TRANSFER_BIT,
1795                            0, 0, NULL, 0, NULL, planes, img_bar);
1796
1797     return submit_exec_ctx(hwfc, ectx, &s_info, 0);
1798 }
1799
1800 static inline void get_plane_wh(int *w, int *h, enum AVPixelFormat format,
1801                                 int frame_w, int frame_h, int plane)
1802 {
1803     const AVPixFmtDescriptor *desc = av_pix_fmt_desc_get(format);
1804
1805     /* Currently always true unless gray + alpha support is added */
1806     if (!plane || (plane == 3) || desc->flags & AV_PIX_FMT_FLAG_RGB ||
1807         !(desc->flags & AV_PIX_FMT_FLAG_PLANAR)) {
1808         *w = frame_w;
1809         *h = frame_h;
1810         return;
1811     }
1812
1813     *w = AV_CEIL_RSHIFT(frame_w, desc->log2_chroma_w);
1814     *h = AV_CEIL_RSHIFT(frame_h, desc->log2_chroma_h);
1815 }
1816
1817 static int create_frame(AVHWFramesContext *hwfc, AVVkFrame **frame,
1818                         VkImageTiling tiling, VkImageUsageFlagBits usage,
1819                         void *create_pnext)
1820 {
1821     int err;
1822     VkResult ret;
1823     AVHWDeviceContext *ctx = hwfc->device_ctx;
1824     VulkanDevicePriv *p = ctx->internal->priv;
1825     VulkanFunctions *vk = &p->vkfn;
1826     AVVulkanDeviceContext *hwctx = ctx->hwctx;
1827     enum AVPixelFormat format = hwfc->sw_format;
1828     const VkFormat *img_fmts = av_vkfmt_from_pixfmt(format);
1829     const int planes = av_pix_fmt_count_planes(format);
1830
1831     VkExportSemaphoreCreateInfo ext_sem_info = {
1832         .sType = VK_STRUCTURE_TYPE_EXPORT_SEMAPHORE_CREATE_INFO,
1833         .handleTypes = VK_EXTERNAL_SEMAPHORE_HANDLE_TYPE_OPAQUE_FD_BIT,
1834     };
1835
1836     VkSemaphoreCreateInfo sem_spawn = {
1837         .sType = VK_STRUCTURE_TYPE_SEMAPHORE_CREATE_INFO,
1838         .pNext = p->extensions & EXT_EXTERNAL_FD_SEM ? &ext_sem_info : NULL,
1839     };
1840
1841     AVVkFrame *f = av_vk_frame_alloc();
1842     if (!f) {
1843         av_log(ctx, AV_LOG_ERROR, "Unable to allocate memory for AVVkFrame!\n");
1844         return AVERROR(ENOMEM);
1845     }
1846
1847     /* Create the images */
1848     for (int i = 0; i < planes; i++) {
1849         VkImageCreateInfo create_info = {
1850             .sType                 = VK_STRUCTURE_TYPE_IMAGE_CREATE_INFO,
1851             .pNext                 = create_pnext,
1852             .imageType             = VK_IMAGE_TYPE_2D,
1853             .format                = img_fmts[i],
1854             .extent.depth          = 1,
1855             .mipLevels             = 1,
1856             .arrayLayers           = 1,
1857             .flags                 = VK_IMAGE_CREATE_ALIAS_BIT,
1858             .tiling                = tiling,
1859             .initialLayout         = VK_IMAGE_LAYOUT_UNDEFINED,
1860             .usage                 = usage,
1861             .samples               = VK_SAMPLE_COUNT_1_BIT,
1862             .pQueueFamilyIndices   = p->qfs,
1863             .queueFamilyIndexCount = p->num_qfs,
1864             .sharingMode           = p->num_qfs > 1 ? VK_SHARING_MODE_CONCURRENT :
1865                                                       VK_SHARING_MODE_EXCLUSIVE,
1866         };
1867
1868         get_plane_wh(&create_info.extent.width, &create_info.extent.height,
1869                      format, hwfc->width, hwfc->height, i);
1870
1871         ret = vk->CreateImage(hwctx->act_dev, &create_info,
1872                               hwctx->alloc, &f->img[i]);
1873         if (ret != VK_SUCCESS) {
1874             av_log(ctx, AV_LOG_ERROR, "Image creation failure: %s\n",
1875                    vk_ret2str(ret));
1876             err = AVERROR(EINVAL);
1877             goto fail;
1878         }
1879
1880         /* Create semaphore */
1881         ret = vk->CreateSemaphore(hwctx->act_dev, &sem_spawn,
1882                                   hwctx->alloc, &f->sem[i]);
1883         if (ret != VK_SUCCESS) {
1884             av_log(hwctx, AV_LOG_ERROR, "Failed to create semaphore: %s\n",
1885                    vk_ret2str(ret));
1886             return AVERROR_EXTERNAL;
1887         }
1888
1889         f->layout[i] = create_info.initialLayout;
1890         f->access[i] = 0x0;
1891     }
1892
1893     f->flags     = 0x0;
1894     f->tiling    = tiling;
1895
1896     *frame = f;
1897     return 0;
1898
1899 fail:
1900     vulkan_frame_free(hwfc, (uint8_t *)f);
1901     return err;
1902 }
1903
1904 /* Checks if an export flag is enabled, and if it is ORs it with *iexp */
1905 static void try_export_flags(AVHWFramesContext *hwfc,
1906                              VkExternalMemoryHandleTypeFlags *comp_handle_types,
1907                              VkExternalMemoryHandleTypeFlagBits *iexp,
1908                              VkExternalMemoryHandleTypeFlagBits exp)
1909 {
1910     VkResult ret;
1911     AVVulkanFramesContext *hwctx = hwfc->hwctx;
1912     AVVulkanDeviceContext *dev_hwctx = hwfc->device_ctx->hwctx;
1913     VulkanDevicePriv *p = hwfc->device_ctx->internal->priv;
1914     VulkanFunctions *vk = &p->vkfn;
1915     VkExternalImageFormatProperties eprops = {
1916         .sType = VK_STRUCTURE_TYPE_EXTERNAL_IMAGE_FORMAT_PROPERTIES_KHR,
1917     };
1918     VkImageFormatProperties2 props = {
1919         .sType = VK_STRUCTURE_TYPE_IMAGE_FORMAT_PROPERTIES_2,
1920         .pNext = &eprops,
1921     };
1922     VkPhysicalDeviceExternalImageFormatInfo enext = {
1923         .sType = VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_EXTERNAL_IMAGE_FORMAT_INFO,
1924         .handleType = exp,
1925     };
1926     VkPhysicalDeviceImageFormatInfo2 pinfo = {
1927         .sType  = VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_IMAGE_FORMAT_INFO_2,
1928         .pNext  = !exp ? NULL : &enext,
1929         .format = av_vkfmt_from_pixfmt(hwfc->sw_format)[0],
1930         .type   = VK_IMAGE_TYPE_2D,
1931         .tiling = hwctx->tiling,
1932         .usage  = hwctx->usage,
1933         .flags  = VK_IMAGE_CREATE_ALIAS_BIT,
1934     };
1935
1936     ret = vk->GetPhysicalDeviceImageFormatProperties2(dev_hwctx->phys_dev,
1937                                                       &pinfo, &props);
1938     if (ret == VK_SUCCESS) {
1939         *iexp |= exp;
1940         *comp_handle_types |= eprops.externalMemoryProperties.compatibleHandleTypes;
1941     }
1942 }
1943
1944 static AVBufferRef *vulkan_pool_alloc(void *opaque, size_t size)
1945 {
1946     int err;
1947     AVVkFrame *f;
1948     AVBufferRef *avbuf = NULL;
1949     AVHWFramesContext *hwfc = opaque;
1950     AVVulkanFramesContext *hwctx = hwfc->hwctx;
1951     VulkanDevicePriv *p = hwfc->device_ctx->internal->priv;
1952     VulkanFramesPriv *fp = hwfc->internal->priv;
1953     VkExportMemoryAllocateInfo eminfo[AV_NUM_DATA_POINTERS];
1954     VkExternalMemoryHandleTypeFlags e = 0x0;
1955
1956     VkExternalMemoryImageCreateInfo eiinfo = {
1957         .sType       = VK_STRUCTURE_TYPE_EXTERNAL_MEMORY_IMAGE_CREATE_INFO,
1958         .pNext       = hwctx->create_pnext,
1959     };
1960
1961     if (p->extensions & EXT_EXTERNAL_FD_MEMORY)
1962         try_export_flags(hwfc, &eiinfo.handleTypes, &e,
1963                          VK_EXTERNAL_MEMORY_HANDLE_TYPE_OPAQUE_FD_BIT);
1964
1965     if (p->extensions & EXT_EXTERNAL_DMABUF_MEMORY)
1966         try_export_flags(hwfc, &eiinfo.handleTypes, &e,
1967                          VK_EXTERNAL_MEMORY_HANDLE_TYPE_DMA_BUF_BIT_EXT);
1968
1969     for (int i = 0; i < av_pix_fmt_count_planes(hwfc->sw_format); i++) {
1970         eminfo[i].sType       = VK_STRUCTURE_TYPE_EXPORT_MEMORY_ALLOCATE_INFO;
1971         eminfo[i].pNext       = hwctx->alloc_pnext[i];
1972         eminfo[i].handleTypes = e;
1973     }
1974
1975     err = create_frame(hwfc, &f, hwctx->tiling, hwctx->usage,
1976                        eiinfo.handleTypes ? &eiinfo : NULL);
1977     if (err)
1978         return NULL;
1979
1980     err = alloc_bind_mem(hwfc, f, eminfo, sizeof(*eminfo));
1981     if (err)
1982         goto fail;
1983
1984     err = prepare_frame(hwfc, &fp->conv_ctx, f, PREP_MODE_WRITE);
1985     if (err)
1986         goto fail;
1987
1988     avbuf = av_buffer_create((uint8_t *)f, sizeof(AVVkFrame),
1989                              vulkan_frame_free, hwfc, 0);
1990     if (!avbuf)
1991         goto fail;
1992
1993     return avbuf;
1994
1995 fail:
1996     vulkan_frame_free(hwfc, (uint8_t *)f);
1997     return NULL;
1998 }
1999
2000 static void vulkan_frames_uninit(AVHWFramesContext *hwfc)
2001 {
2002     VulkanFramesPriv *fp = hwfc->internal->priv;
2003
2004     free_exec_ctx(hwfc, &fp->conv_ctx);
2005     free_exec_ctx(hwfc, &fp->upload_ctx);
2006     free_exec_ctx(hwfc, &fp->download_ctx);
2007 }
2008
2009 static int vulkan_frames_init(AVHWFramesContext *hwfc)
2010 {
2011     int err;
2012     AVVkFrame *f;
2013     AVVulkanFramesContext *hwctx = hwfc->hwctx;
2014     VulkanFramesPriv *fp = hwfc->internal->priv;
2015     AVVulkanDeviceContext *dev_hwctx = hwfc->device_ctx->hwctx;
2016     VulkanDevicePriv *p = hwfc->device_ctx->internal->priv;
2017
2018     /* Default pool flags */
2019     hwctx->tiling = hwctx->tiling ? hwctx->tiling : p->use_linear_images ?
2020                     VK_IMAGE_TILING_LINEAR : VK_IMAGE_TILING_OPTIMAL;
2021
2022     if (!hwctx->usage)
2023         hwctx->usage = DEFAULT_USAGE_FLAGS;
2024
2025     err = create_exec_ctx(hwfc, &fp->conv_ctx,
2026                           dev_hwctx->queue_family_comp_index,
2027                           GET_QUEUE_COUNT(dev_hwctx, 0, 1, 0));
2028     if (err)
2029         return err;
2030
2031     err = create_exec_ctx(hwfc, &fp->upload_ctx,
2032                           dev_hwctx->queue_family_tx_index,
2033                           GET_QUEUE_COUNT(dev_hwctx, 0, 0, 1));
2034     if (err)
2035         return err;
2036
2037     err = create_exec_ctx(hwfc, &fp->download_ctx,
2038                           dev_hwctx->queue_family_tx_index, 1);
2039     if (err)
2040         return err;
2041
2042     /* Test to see if allocation will fail */
2043     err = create_frame(hwfc, &f, hwctx->tiling, hwctx->usage,
2044                        hwctx->create_pnext);
2045     if (err)
2046         return err;
2047
2048     vulkan_frame_free(hwfc, (uint8_t *)f);
2049
2050     /* If user did not specify a pool, hwfc->pool will be set to the internal one
2051      * in hwcontext.c just after this gets called */
2052     if (!hwfc->pool) {
2053         hwfc->internal->pool_internal = av_buffer_pool_init2(sizeof(AVVkFrame),
2054                                                              hwfc, vulkan_pool_alloc,
2055                                                              NULL);
2056         if (!hwfc->internal->pool_internal)
2057             return AVERROR(ENOMEM);
2058     }
2059
2060     return 0;
2061 }
2062
2063 static int vulkan_get_buffer(AVHWFramesContext *hwfc, AVFrame *frame)
2064 {
2065     frame->buf[0] = av_buffer_pool_get(hwfc->pool);
2066     if (!frame->buf[0])
2067         return AVERROR(ENOMEM);
2068
2069     frame->data[0] = frame->buf[0]->data;
2070     frame->format  = AV_PIX_FMT_VULKAN;
2071     frame->width   = hwfc->width;
2072     frame->height  = hwfc->height;
2073
2074     return 0;
2075 }
2076
2077 static int vulkan_transfer_get_formats(AVHWFramesContext *hwfc,
2078                                        enum AVHWFrameTransferDirection dir,
2079                                        enum AVPixelFormat **formats)
2080 {
2081     enum AVPixelFormat *fmts = av_malloc_array(2, sizeof(*fmts));
2082     if (!fmts)
2083         return AVERROR(ENOMEM);
2084
2085     fmts[0] = hwfc->sw_format;
2086     fmts[1] = AV_PIX_FMT_NONE;
2087
2088     *formats = fmts;
2089     return 0;
2090 }
2091
2092 typedef struct VulkanMapping {
2093     AVVkFrame *frame;
2094     int flags;
2095 } VulkanMapping;
2096
2097 static void vulkan_unmap_frame(AVHWFramesContext *hwfc, HWMapDescriptor *hwmap)
2098 {
2099     VulkanMapping *map = hwmap->priv;
2100     AVVulkanDeviceContext *hwctx = hwfc->device_ctx->hwctx;
2101     const int planes = av_pix_fmt_count_planes(hwfc->sw_format);
2102     VulkanDevicePriv *p = hwfc->device_ctx->internal->priv;
2103     VulkanFunctions *vk = &p->vkfn;
2104
2105     /* Check if buffer needs flushing */
2106     if ((map->flags & AV_HWFRAME_MAP_WRITE) &&
2107         !(map->frame->flags & VK_MEMORY_PROPERTY_HOST_COHERENT_BIT)) {
2108         VkResult ret;
2109         VkMappedMemoryRange flush_ranges[AV_NUM_DATA_POINTERS] = { { 0 } };
2110
2111         for (int i = 0; i < planes; i++) {
2112             flush_ranges[i].sType  = VK_STRUCTURE_TYPE_MAPPED_MEMORY_RANGE;
2113             flush_ranges[i].memory = map->frame->mem[i];
2114             flush_ranges[i].size   = VK_WHOLE_SIZE;
2115         }
2116
2117         ret = vk->FlushMappedMemoryRanges(hwctx->act_dev, planes,
2118                                           flush_ranges);
2119         if (ret != VK_SUCCESS) {
2120             av_log(hwfc, AV_LOG_ERROR, "Failed to flush memory: %s\n",
2121                    vk_ret2str(ret));
2122         }
2123     }
2124
2125     for (int i = 0; i < planes; i++)
2126         vk->UnmapMemory(hwctx->act_dev, map->frame->mem[i]);
2127
2128     av_free(map);
2129 }
2130
2131 static int vulkan_map_frame_to_mem(AVHWFramesContext *hwfc, AVFrame *dst,
2132                                    const AVFrame *src, int flags)
2133 {
2134     VkResult ret;
2135     int err, mapped_mem_count = 0;
2136     AVVkFrame *f = (AVVkFrame *)src->data[0];
2137     AVVulkanDeviceContext *hwctx = hwfc->device_ctx->hwctx;
2138     const int planes = av_pix_fmt_count_planes(hwfc->sw_format);
2139     VulkanDevicePriv *p = hwfc->device_ctx->internal->priv;
2140     VulkanFunctions *vk = &p->vkfn;
2141
2142     VulkanMapping *map = av_mallocz(sizeof(VulkanMapping));
2143     if (!map)
2144         return AVERROR(EINVAL);
2145
2146     if (src->format != AV_PIX_FMT_VULKAN) {
2147         av_log(hwfc, AV_LOG_ERROR, "Cannot map from pixel format %s!\n",
2148                av_get_pix_fmt_name(src->format));
2149         err = AVERROR(EINVAL);
2150         goto fail;
2151     }
2152
2153     if (!(f->flags & VK_MEMORY_PROPERTY_HOST_VISIBLE_BIT) ||
2154         !(f->tiling == VK_IMAGE_TILING_LINEAR)) {
2155         av_log(hwfc, AV_LOG_ERROR, "Unable to map frame, not host visible "
2156                "and linear!\n");
2157         err = AVERROR(EINVAL);
2158         goto fail;
2159     }
2160
2161     dst->width  = src->width;
2162     dst->height = src->height;
2163
2164     for (int i = 0; i < planes; i++) {
2165         ret = vk->MapMemory(hwctx->act_dev, f->mem[i], 0,
2166                             VK_WHOLE_SIZE, 0, (void **)&dst->data[i]);
2167         if (ret != VK_SUCCESS) {
2168             av_log(hwfc, AV_LOG_ERROR, "Failed to map image memory: %s\n",
2169                 vk_ret2str(ret));
2170             err = AVERROR_EXTERNAL;
2171             goto fail;
2172         }
2173         mapped_mem_count++;
2174     }
2175
2176     /* Check if the memory contents matter */
2177     if (((flags & AV_HWFRAME_MAP_READ) || !(flags & AV_HWFRAME_MAP_OVERWRITE)) &&
2178         !(f->flags & VK_MEMORY_PROPERTY_HOST_COHERENT_BIT)) {
2179         VkMappedMemoryRange map_mem_ranges[AV_NUM_DATA_POINTERS] = { { 0 } };
2180         for (int i = 0; i < planes; i++) {
2181             map_mem_ranges[i].sType = VK_STRUCTURE_TYPE_MAPPED_MEMORY_RANGE;
2182             map_mem_ranges[i].size = VK_WHOLE_SIZE;
2183             map_mem_ranges[i].memory = f->mem[i];
2184         }
2185
2186         ret = vk->InvalidateMappedMemoryRanges(hwctx->act_dev, planes,
2187                                                map_mem_ranges);
2188         if (ret != VK_SUCCESS) {
2189             av_log(hwfc, AV_LOG_ERROR, "Failed to invalidate memory: %s\n",
2190                    vk_ret2str(ret));
2191             err = AVERROR_EXTERNAL;
2192             goto fail;
2193         }
2194     }
2195
2196     for (int i = 0; i < planes; i++) {
2197         VkImageSubresource sub = {
2198             .aspectMask = VK_IMAGE_ASPECT_COLOR_BIT,
2199         };
2200         VkSubresourceLayout layout;
2201         vk->GetImageSubresourceLayout(hwctx->act_dev, f->img[i], &sub, &layout);
2202         dst->linesize[i] = layout.rowPitch;
2203     }
2204
2205     map->frame = f;
2206     map->flags = flags;
2207
2208     err = ff_hwframe_map_create(src->hw_frames_ctx, dst, src,
2209                                 &vulkan_unmap_frame, map);
2210     if (err < 0)
2211         goto fail;
2212
2213     return 0;
2214
2215 fail:
2216     for (int i = 0; i < mapped_mem_count; i++)
2217         vk->UnmapMemory(hwctx->act_dev, f->mem[i]);
2218
2219     av_free(map);
2220     return err;
2221 }
2222
2223 #if CONFIG_LIBDRM
2224 static void vulkan_unmap_from(AVHWFramesContext *hwfc, HWMapDescriptor *hwmap)
2225 {
2226     VulkanMapping *map = hwmap->priv;
2227     AVVulkanDeviceContext *hwctx = hwfc->device_ctx->hwctx;
2228     const int planes = av_pix_fmt_count_planes(hwfc->sw_format);
2229     VulkanDevicePriv *p = hwfc->device_ctx->internal->priv;
2230     VulkanFunctions *vk = &p->vkfn;
2231
2232     for (int i = 0; i < planes; i++) {
2233         vk->DestroyImage(hwctx->act_dev, map->frame->img[i], hwctx->alloc);
2234         vk->FreeMemory(hwctx->act_dev, map->frame->mem[i], hwctx->alloc);
2235         vk->DestroySemaphore(hwctx->act_dev, map->frame->sem[i], hwctx->alloc);
2236     }
2237
2238     av_freep(&map->frame);
2239 }
2240
2241 static const struct {
2242     uint32_t drm_fourcc;
2243     VkFormat vk_format;
2244 } vulkan_drm_format_map[] = {
2245     { DRM_FORMAT_R8,       VK_FORMAT_R8_UNORM       },
2246     { DRM_FORMAT_R16,      VK_FORMAT_R16_UNORM      },
2247     { DRM_FORMAT_GR88,     VK_FORMAT_R8G8_UNORM     },
2248     { DRM_FORMAT_RG88,     VK_FORMAT_R8G8_UNORM     },
2249     { DRM_FORMAT_GR1616,   VK_FORMAT_R16G16_UNORM   },
2250     { DRM_FORMAT_RG1616,   VK_FORMAT_R16G16_UNORM   },
2251     { DRM_FORMAT_ARGB8888, VK_FORMAT_B8G8R8A8_UNORM },
2252     { DRM_FORMAT_XRGB8888, VK_FORMAT_B8G8R8A8_UNORM },
2253     { DRM_FORMAT_ABGR8888, VK_FORMAT_R8G8B8A8_UNORM },
2254     { DRM_FORMAT_XBGR8888, VK_FORMAT_R8G8B8A8_UNORM },
2255 };
2256
2257 static inline VkFormat drm_to_vulkan_fmt(uint32_t drm_fourcc)
2258 {
2259     for (int i = 0; i < FF_ARRAY_ELEMS(vulkan_drm_format_map); i++)
2260         if (vulkan_drm_format_map[i].drm_fourcc == drm_fourcc)
2261             return vulkan_drm_format_map[i].vk_format;
2262     return VK_FORMAT_UNDEFINED;
2263 }
2264
2265 static int vulkan_map_from_drm_frame_desc(AVHWFramesContext *hwfc, AVVkFrame **frame,
2266                                           const AVFrame *src)
2267 {
2268     int err = 0;
2269     VkResult ret;
2270     AVVkFrame *f;
2271     int bind_counts = 0;
2272     AVHWDeviceContext *ctx = hwfc->device_ctx;
2273     AVVulkanDeviceContext *hwctx = ctx->hwctx;
2274     VulkanDevicePriv *p = ctx->internal->priv;
2275     VulkanFunctions *vk = &p->vkfn;
2276     VulkanFramesPriv *fp = hwfc->internal->priv;
2277     AVVulkanFramesContext *frames_hwctx = hwfc->hwctx;
2278     const AVDRMFrameDescriptor *desc = (AVDRMFrameDescriptor *)src->data[0];
2279     const int has_modifiers = !!(p->extensions & EXT_DRM_MODIFIER_FLAGS);
2280     VkSubresourceLayout plane_data[AV_NUM_DATA_POINTERS] = { 0 };
2281     VkBindImageMemoryInfo bind_info[AV_NUM_DATA_POINTERS] = { 0 };
2282     VkBindImagePlaneMemoryInfo plane_info[AV_NUM_DATA_POINTERS] = { 0 };
2283     VkExternalMemoryHandleTypeFlagBits htype = VK_EXTERNAL_MEMORY_HANDLE_TYPE_DMA_BUF_BIT_EXT;
2284
2285     for (int i = 0; i < desc->nb_layers; i++) {
2286         if (drm_to_vulkan_fmt(desc->layers[i].format) == VK_FORMAT_UNDEFINED) {
2287             av_log(ctx, AV_LOG_ERROR, "Unsupported DMABUF layer format %#08x!\n",
2288                    desc->layers[i].format);
2289             return AVERROR(EINVAL);
2290         }
2291     }
2292
2293     if (!(f = av_vk_frame_alloc())) {
2294         av_log(ctx, AV_LOG_ERROR, "Unable to allocate memory for AVVkFrame!\n");
2295         err = AVERROR(ENOMEM);
2296         goto fail;
2297     }
2298
2299     f->tiling = has_modifiers ? VK_IMAGE_TILING_DRM_FORMAT_MODIFIER_EXT :
2300                 desc->objects[0].format_modifier == DRM_FORMAT_MOD_LINEAR ?
2301                 VK_IMAGE_TILING_LINEAR : VK_IMAGE_TILING_OPTIMAL;
2302
2303     for (int i = 0; i < desc->nb_layers; i++) {
2304         const int planes = desc->layers[i].nb_planes;
2305         VkImageDrmFormatModifierExplicitCreateInfoEXT drm_info = {
2306             .sType = VK_STRUCTURE_TYPE_IMAGE_DRM_FORMAT_MODIFIER_EXPLICIT_CREATE_INFO_EXT,
2307             .drmFormatModifier = desc->objects[0].format_modifier,
2308             .drmFormatModifierPlaneCount = planes,
2309             .pPlaneLayouts = (const VkSubresourceLayout *)&plane_data,
2310         };
2311
2312         VkExternalMemoryImageCreateInfo einfo = {
2313             .sType       = VK_STRUCTURE_TYPE_EXTERNAL_MEMORY_IMAGE_CREATE_INFO,
2314             .pNext       = has_modifiers ? &drm_info : NULL,
2315             .handleTypes = htype,
2316         };
2317
2318         VkSemaphoreCreateInfo sem_spawn = {
2319             .sType = VK_STRUCTURE_TYPE_SEMAPHORE_CREATE_INFO,
2320         };
2321
2322         VkImageCreateInfo create_info = {
2323             .sType                 = VK_STRUCTURE_TYPE_IMAGE_CREATE_INFO,
2324             .pNext                 = &einfo,
2325             .imageType             = VK_IMAGE_TYPE_2D,
2326             .format                = drm_to_vulkan_fmt(desc->layers[i].format),
2327             .extent.depth          = 1,
2328             .mipLevels             = 1,
2329             .arrayLayers           = 1,
2330             .flags                 = VK_IMAGE_CREATE_ALIAS_BIT,
2331             .tiling                = f->tiling,
2332             .initialLayout         = VK_IMAGE_LAYOUT_UNDEFINED, /* specs say so */
2333             .usage                 = frames_hwctx->usage,
2334             .samples               = VK_SAMPLE_COUNT_1_BIT,
2335             .pQueueFamilyIndices   = p->qfs,
2336             .queueFamilyIndexCount = p->num_qfs,
2337             .sharingMode           = p->num_qfs > 1 ? VK_SHARING_MODE_CONCURRENT :
2338                                                       VK_SHARING_MODE_EXCLUSIVE,
2339         };
2340
2341         get_plane_wh(&create_info.extent.width, &create_info.extent.height,
2342                      hwfc->sw_format, src->width, src->height, i);
2343
2344         for (int j = 0; j < planes; j++) {
2345             plane_data[j].offset     = desc->layers[i].planes[j].offset;
2346             plane_data[j].rowPitch   = desc->layers[i].planes[j].pitch;
2347             plane_data[j].size       = 0; /* The specs say so for all 3 */
2348             plane_data[j].arrayPitch = 0;
2349             plane_data[j].depthPitch = 0;
2350         }
2351
2352         /* Create image */
2353         ret = vk->CreateImage(hwctx->act_dev, &create_info,
2354                               hwctx->alloc, &f->img[i]);
2355         if (ret != VK_SUCCESS) {
2356             av_log(ctx, AV_LOG_ERROR, "Image creation failure: %s\n",
2357                    vk_ret2str(ret));
2358             err = AVERROR(EINVAL);
2359             goto fail;
2360         }
2361
2362         ret = vk->CreateSemaphore(hwctx->act_dev, &sem_spawn,
2363                                   hwctx->alloc, &f->sem[i]);
2364         if (ret != VK_SUCCESS) {
2365             av_log(hwctx, AV_LOG_ERROR, "Failed to create semaphore: %s\n",
2366                    vk_ret2str(ret));
2367             return AVERROR_EXTERNAL;
2368         }
2369
2370         /* We'd import a semaphore onto the one we created using
2371          * vkImportSemaphoreFdKHR but unfortunately neither DRM nor VAAPI
2372          * offer us anything we could import and sync with, so instead
2373          * just signal the semaphore we created. */
2374
2375         f->layout[i] = create_info.initialLayout;
2376         f->access[i] = 0x0;
2377     }
2378
2379     for (int i = 0; i < desc->nb_objects; i++) {
2380         int use_ded_mem = 0;
2381         VkMemoryFdPropertiesKHR fdmp = {
2382             .sType = VK_STRUCTURE_TYPE_MEMORY_FD_PROPERTIES_KHR,
2383         };
2384         VkMemoryRequirements req = {
2385             .size = desc->objects[i].size,
2386         };
2387         VkImportMemoryFdInfoKHR idesc = {
2388             .sType      = VK_STRUCTURE_TYPE_IMPORT_MEMORY_FD_INFO_KHR,
2389             .handleType = htype,
2390             .fd         = dup(desc->objects[i].fd),
2391         };
2392         VkMemoryDedicatedAllocateInfo ded_alloc = {
2393             .sType = VK_STRUCTURE_TYPE_MEMORY_DEDICATED_ALLOCATE_INFO,
2394             .pNext = &idesc,
2395         };
2396
2397         ret = vk->GetMemoryFdPropertiesKHR(hwctx->act_dev, htype,
2398                                            idesc.fd, &fdmp);
2399         if (ret != VK_SUCCESS) {
2400             av_log(hwfc, AV_LOG_ERROR, "Failed to get FD properties: %s\n",
2401                    vk_ret2str(ret));
2402             err = AVERROR_EXTERNAL;
2403             close(idesc.fd);
2404             goto fail;
2405         }
2406
2407         req.memoryTypeBits = fdmp.memoryTypeBits;
2408
2409         /* Dedicated allocation only makes sense if there's a one to one mapping
2410          * between images and the memory backing them, so only check in this
2411          * case. */
2412         if (desc->nb_layers == desc->nb_objects) {
2413             VkImageMemoryRequirementsInfo2 req_desc = {
2414                 .sType = VK_STRUCTURE_TYPE_IMAGE_MEMORY_REQUIREMENTS_INFO_2,
2415                 .image = f->img[i],
2416             };
2417             VkMemoryDedicatedRequirements ded_req = {
2418                 .sType = VK_STRUCTURE_TYPE_MEMORY_DEDICATED_REQUIREMENTS,
2419             };
2420             VkMemoryRequirements2 req2 = {
2421                 .sType = VK_STRUCTURE_TYPE_MEMORY_REQUIREMENTS_2,
2422                 .pNext = &ded_req,
2423             };
2424
2425             vk->GetImageMemoryRequirements2(hwctx->act_dev, &req_desc, &req2);
2426
2427             use_ded_mem = ded_req.prefersDedicatedAllocation |
2428                           ded_req.requiresDedicatedAllocation;
2429             if (use_ded_mem)
2430                 ded_alloc.image = f->img[i];
2431         }
2432
2433         err = alloc_mem(ctx, &req, VK_MEMORY_PROPERTY_DEVICE_LOCAL_BIT,
2434                         use_ded_mem ? &ded_alloc : ded_alloc.pNext,
2435                         &f->flags, &f->mem[i]);
2436         if (err) {
2437             close(idesc.fd);
2438             return err;
2439         }
2440
2441         f->size[i] = desc->objects[i].size;
2442     }
2443
2444     for (int i = 0; i < desc->nb_layers; i++) {
2445         const int planes = desc->layers[i].nb_planes;
2446         const int signal_p = has_modifiers && (planes > 1);
2447         for (int j = 0; j < planes; j++) {
2448             VkImageAspectFlagBits aspect = j == 0 ? VK_IMAGE_ASPECT_MEMORY_PLANE_0_BIT_EXT :
2449                                            j == 1 ? VK_IMAGE_ASPECT_MEMORY_PLANE_1_BIT_EXT :
2450                                                     VK_IMAGE_ASPECT_MEMORY_PLANE_2_BIT_EXT;
2451
2452             plane_info[bind_counts].sType = VK_STRUCTURE_TYPE_BIND_IMAGE_PLANE_MEMORY_INFO;
2453             plane_info[bind_counts].planeAspect = aspect;
2454
2455             bind_info[bind_counts].sType  = VK_STRUCTURE_TYPE_BIND_IMAGE_MEMORY_INFO;
2456             bind_info[bind_counts].pNext  = signal_p ? &plane_info[bind_counts] : NULL;
2457             bind_info[bind_counts].image  = f->img[i];
2458             bind_info[bind_counts].memory = f->mem[desc->layers[i].planes[j].object_index];
2459             bind_info[bind_counts].memoryOffset = desc->layers[i].planes[j].offset;
2460             bind_counts++;
2461         }
2462     }
2463
2464     /* Bind the allocated memory to the images */
2465     ret = vk->BindImageMemory2(hwctx->act_dev, bind_counts, bind_info);
2466     if (ret != VK_SUCCESS) {
2467         av_log(ctx, AV_LOG_ERROR, "Failed to bind memory: %s\n",
2468                vk_ret2str(ret));
2469         return AVERROR_EXTERNAL;
2470     }
2471
2472     /* NOTE: This is completely uneccesary and unneeded once we can import
2473      * semaphores from DRM. Otherwise we have to activate the semaphores.
2474      * We're reusing the exec context that's also used for uploads/downloads. */
2475     err = prepare_frame(hwfc, &fp->conv_ctx, f, PREP_MODE_RO_SHADER);
2476     if (err)
2477         goto fail;
2478
2479     *frame = f;
2480
2481     return 0;
2482
2483 fail:
2484     for (int i = 0; i < desc->nb_layers; i++) {
2485         vk->DestroyImage(hwctx->act_dev, f->img[i], hwctx->alloc);
2486         vk->DestroySemaphore(hwctx->act_dev, f->sem[i], hwctx->alloc);
2487     }
2488     for (int i = 0; i < desc->nb_objects; i++)
2489         vk->FreeMemory(hwctx->act_dev, f->mem[i], hwctx->alloc);
2490
2491     av_free(f);
2492
2493     return err;
2494 }
2495
2496 static int vulkan_map_from_drm(AVHWFramesContext *hwfc, AVFrame *dst,
2497                                const AVFrame *src, int flags)
2498 {
2499     int err = 0;
2500     AVVkFrame *f;
2501     VulkanMapping *map = NULL;
2502
2503     if ((err = vulkan_map_from_drm_frame_desc(hwfc, &f, src)))
2504         return err;
2505
2506     /* The unmapping function will free this */
2507     dst->data[0] = (uint8_t *)f;
2508     dst->width   = src->width;
2509     dst->height  = src->height;
2510
2511     map = av_mallocz(sizeof(VulkanMapping));
2512     if (!map)
2513         goto fail;
2514
2515     map->frame = f;
2516     map->flags = flags;
2517
2518     err = ff_hwframe_map_create(dst->hw_frames_ctx, dst, src,
2519                                 &vulkan_unmap_from, map);
2520     if (err < 0)
2521         goto fail;
2522
2523     av_log(hwfc, AV_LOG_DEBUG, "Mapped DRM object to Vulkan!\n");
2524
2525     return 0;
2526
2527 fail:
2528     vulkan_frame_free(hwfc->device_ctx->hwctx, (uint8_t *)f);
2529     av_free(map);
2530     return err;
2531 }
2532
2533 #if CONFIG_VAAPI
2534 static int vulkan_map_from_vaapi(AVHWFramesContext *dst_fc,
2535                                  AVFrame *dst, const AVFrame *src,
2536                                  int flags)
2537 {
2538     int err;
2539     AVFrame *tmp = av_frame_alloc();
2540     AVHWFramesContext *vaapi_fc = (AVHWFramesContext*)src->hw_frames_ctx->data;
2541     AVVAAPIDeviceContext *vaapi_ctx = vaapi_fc->device_ctx->hwctx;
2542     VASurfaceID surface_id = (VASurfaceID)(uintptr_t)src->data[3];
2543
2544     if (!tmp)
2545         return AVERROR(ENOMEM);
2546
2547     /* We have to sync since like the previous comment said, no semaphores */
2548     vaSyncSurface(vaapi_ctx->display, surface_id);
2549
2550     tmp->format = AV_PIX_FMT_DRM_PRIME;
2551
2552     err = av_hwframe_map(tmp, src, flags);
2553     if (err < 0)
2554         goto fail;
2555
2556     err = vulkan_map_from_drm(dst_fc, dst, tmp, flags);
2557     if (err < 0)
2558         goto fail;
2559
2560     err = ff_hwframe_map_replace(dst, src);
2561
2562 fail:
2563     av_frame_free(&tmp);
2564     return err;
2565 }
2566 #endif
2567 #endif
2568
2569 #if CONFIG_CUDA
2570 static int vulkan_export_to_cuda(AVHWFramesContext *hwfc,
2571                                  AVBufferRef *cuda_hwfc,
2572                                  const AVFrame *frame)
2573 {
2574     int err;
2575     VkResult ret;
2576     AVVkFrame *dst_f;
2577     AVVkFrameInternal *dst_int;
2578     AVHWDeviceContext *ctx = hwfc->device_ctx;
2579     AVVulkanDeviceContext *hwctx = ctx->hwctx;
2580     const int planes = av_pix_fmt_count_planes(hwfc->sw_format);
2581     const AVPixFmtDescriptor *desc = av_pix_fmt_desc_get(hwfc->sw_format);
2582     VulkanDevicePriv *p = ctx->internal->priv;
2583     VulkanFunctions *vk = &p->vkfn;
2584
2585     AVHWFramesContext *cuda_fc = (AVHWFramesContext*)cuda_hwfc->data;
2586     AVHWDeviceContext *cuda_cu = cuda_fc->device_ctx;
2587     AVCUDADeviceContext *cuda_dev = cuda_cu->hwctx;
2588     AVCUDADeviceContextInternal *cu_internal = cuda_dev->internal;
2589     CudaFunctions *cu = cu_internal->cuda_dl;
2590     CUarray_format cufmt = desc->comp[0].depth > 8 ? CU_AD_FORMAT_UNSIGNED_INT16 :
2591                                                      CU_AD_FORMAT_UNSIGNED_INT8;
2592
2593     dst_f = (AVVkFrame *)frame->data[0];
2594
2595     dst_int = dst_f->internal;
2596     if (!dst_int || !dst_int->cuda_fc_ref) {
2597         if (!dst_f->internal)
2598             dst_f->internal = dst_int = av_mallocz(sizeof(*dst_f->internal));
2599
2600         if (!dst_int) {
2601             err = AVERROR(ENOMEM);
2602             goto fail;
2603         }
2604
2605         dst_int->cuda_fc_ref = av_buffer_ref(cuda_hwfc);
2606         if (!dst_int->cuda_fc_ref) {
2607             err = AVERROR(ENOMEM);
2608             goto fail;
2609         }
2610
2611         for (int i = 0; i < planes; i++) {
2612             CUDA_EXTERNAL_MEMORY_MIPMAPPED_ARRAY_DESC tex_desc = {
2613                 .offset = 0,
2614                 .arrayDesc = {
2615                     .Depth = 0,
2616                     .Format = cufmt,
2617                     .NumChannels = 1 + ((planes == 2) && i),
2618                     .Flags = 0,
2619                 },
2620                 .numLevels = 1,
2621             };
2622             CUDA_EXTERNAL_MEMORY_HANDLE_DESC ext_desc = {
2623                 .type = CU_EXTERNAL_MEMORY_HANDLE_TYPE_OPAQUE_FD,
2624                 .size = dst_f->size[i],
2625             };
2626             VkMemoryGetFdInfoKHR export_info = {
2627                 .sType      = VK_STRUCTURE_TYPE_MEMORY_GET_FD_INFO_KHR,
2628                 .memory     = dst_f->mem[i],
2629                 .handleType = VK_EXTERNAL_MEMORY_HANDLE_TYPE_OPAQUE_FD_BIT_KHR,
2630             };
2631             VkSemaphoreGetFdInfoKHR sem_export = {
2632                 .sType = VK_STRUCTURE_TYPE_SEMAPHORE_GET_FD_INFO_KHR,
2633                 .semaphore = dst_f->sem[i],
2634                 .handleType = VK_EXTERNAL_SEMAPHORE_HANDLE_TYPE_OPAQUE_FD_BIT,
2635             };
2636             CUDA_EXTERNAL_SEMAPHORE_HANDLE_DESC ext_sem_desc = {
2637                 .type = CU_EXTERNAL_SEMAPHORE_HANDLE_TYPE_OPAQUE_FD,
2638             };
2639
2640             int p_w, p_h;
2641             get_plane_wh(&p_w, &p_h, hwfc->sw_format, hwfc->width, hwfc->height, i);
2642
2643             tex_desc.arrayDesc.Width = p_w;
2644             tex_desc.arrayDesc.Height = p_h;
2645
2646             ret = vk->GetMemoryFdKHR(hwctx->act_dev, &export_info,
2647                                      &ext_desc.handle.fd);
2648             if (ret != VK_SUCCESS) {
2649                 av_log(hwfc, AV_LOG_ERROR, "Unable to export the image as a FD!\n");
2650                 err = AVERROR_EXTERNAL;
2651                 goto fail;
2652             }
2653
2654             ret = CHECK_CU(cu->cuImportExternalMemory(&dst_int->ext_mem[i], &ext_desc));
2655             if (ret < 0) {
2656                 err = AVERROR_EXTERNAL;
2657                 goto fail;
2658             }
2659
2660             ret = CHECK_CU(cu->cuExternalMemoryGetMappedMipmappedArray(&dst_int->cu_mma[i],
2661                                                                        dst_int->ext_mem[i],
2662                                                                        &tex_desc));
2663             if (ret < 0) {
2664                 err = AVERROR_EXTERNAL;
2665                 goto fail;
2666             }
2667
2668             ret = CHECK_CU(cu->cuMipmappedArrayGetLevel(&dst_int->cu_array[i],
2669                                                         dst_int->cu_mma[i], 0));
2670             if (ret < 0) {
2671                 err = AVERROR_EXTERNAL;
2672                 goto fail;
2673             }
2674
2675             ret = vk->GetSemaphoreFdKHR(hwctx->act_dev, &sem_export,
2676                                         &ext_sem_desc.handle.fd);
2677             if (ret != VK_SUCCESS) {
2678                 av_log(ctx, AV_LOG_ERROR, "Failed to export semaphore: %s\n",
2679                        vk_ret2str(ret));
2680                 err = AVERROR_EXTERNAL;
2681                 goto fail;
2682             }
2683
2684             ret = CHECK_CU(cu->cuImportExternalSemaphore(&dst_int->cu_sem[i],
2685                                                          &ext_sem_desc));
2686             if (ret < 0) {
2687                 err = AVERROR_EXTERNAL;
2688                 goto fail;
2689             }
2690         }
2691     }
2692
2693     return 0;
2694
2695 fail:
2696     return err;
2697 }
2698
2699 static int vulkan_transfer_data_from_cuda(AVHWFramesContext *hwfc,
2700                                           AVFrame *dst, const AVFrame *src)
2701 {
2702     int err;
2703     VkResult ret;
2704     CUcontext dummy;
2705     AVVkFrame *dst_f;
2706     AVVkFrameInternal *dst_int;
2707     const int planes = av_pix_fmt_count_planes(hwfc->sw_format);
2708     const AVPixFmtDescriptor *desc = av_pix_fmt_desc_get(hwfc->sw_format);
2709
2710     AVHWFramesContext *cuda_fc = (AVHWFramesContext*)src->hw_frames_ctx->data;
2711     AVHWDeviceContext *cuda_cu = cuda_fc->device_ctx;
2712     AVCUDADeviceContext *cuda_dev = cuda_cu->hwctx;
2713     AVCUDADeviceContextInternal *cu_internal = cuda_dev->internal;
2714     CudaFunctions *cu = cu_internal->cuda_dl;
2715     CUDA_EXTERNAL_SEMAPHORE_WAIT_PARAMS s_w_par[AV_NUM_DATA_POINTERS] = { 0 };
2716     CUDA_EXTERNAL_SEMAPHORE_SIGNAL_PARAMS s_s_par[AV_NUM_DATA_POINTERS] = { 0 };
2717
2718     ret = CHECK_CU(cu->cuCtxPushCurrent(cuda_dev->cuda_ctx));
2719     if (ret < 0)
2720         return AVERROR_EXTERNAL;
2721
2722     dst_f = (AVVkFrame *)dst->data[0];
2723
2724     ret = vulkan_export_to_cuda(hwfc, src->hw_frames_ctx, dst);
2725     if (ret < 0) {
2726         CHECK_CU(cu->cuCtxPopCurrent(&dummy));
2727         return ret;
2728     }
2729
2730     dst_int = dst_f->internal;
2731
2732     ret = CHECK_CU(cu->cuWaitExternalSemaphoresAsync(dst_int->cu_sem, s_w_par,
2733                                                      planes, cuda_dev->stream));
2734     if (ret < 0) {
2735         err = AVERROR_EXTERNAL;
2736         goto fail;
2737     }
2738
2739     for (int i = 0; i < planes; i++) {
2740         CUDA_MEMCPY2D cpy = {
2741             .srcMemoryType = CU_MEMORYTYPE_DEVICE,
2742             .srcDevice     = (CUdeviceptr)src->data[i],
2743             .srcPitch      = src->linesize[i],
2744             .srcY          = 0,
2745
2746             .dstMemoryType = CU_MEMORYTYPE_ARRAY,
2747             .dstArray      = dst_int->cu_array[i],
2748         };
2749
2750         int p_w, p_h;
2751         get_plane_wh(&p_w, &p_h, hwfc->sw_format, hwfc->width, hwfc->height, i);
2752
2753         cpy.WidthInBytes = p_w * desc->comp[i].step;
2754         cpy.Height = p_h;
2755
2756         ret = CHECK_CU(cu->cuMemcpy2DAsync(&cpy, cuda_dev->stream));
2757         if (ret < 0) {
2758             err = AVERROR_EXTERNAL;
2759             goto fail;
2760         }
2761     }
2762
2763     ret = CHECK_CU(cu->cuSignalExternalSemaphoresAsync(dst_int->cu_sem, s_s_par,
2764                                                        planes, cuda_dev->stream));
2765     if (ret < 0) {
2766         err = AVERROR_EXTERNAL;
2767         goto fail;
2768     }
2769
2770     CHECK_CU(cu->cuCtxPopCurrent(&dummy));
2771
2772     av_log(hwfc, AV_LOG_VERBOSE, "Transfered CUDA image to Vulkan!\n");
2773
2774     return 0;
2775
2776 fail:
2777     CHECK_CU(cu->cuCtxPopCurrent(&dummy));
2778     vulkan_free_internal(dst_int);
2779     dst_f->internal = NULL;
2780     av_buffer_unref(&dst->buf[0]);
2781     return err;
2782 }
2783 #endif
2784
2785 static int vulkan_map_to(AVHWFramesContext *hwfc, AVFrame *dst,
2786                          const AVFrame *src, int flags)
2787 {
2788     av_unused VulkanDevicePriv *p = hwfc->device_ctx->internal->priv;
2789
2790     switch (src->format) {
2791 #if CONFIG_LIBDRM
2792 #if CONFIG_VAAPI
2793     case AV_PIX_FMT_VAAPI:
2794         if (p->extensions & EXT_EXTERNAL_DMABUF_MEMORY)
2795             return vulkan_map_from_vaapi(hwfc, dst, src, flags);
2796 #endif
2797     case AV_PIX_FMT_DRM_PRIME:
2798         if (p->extensions & EXT_EXTERNAL_DMABUF_MEMORY)
2799             return vulkan_map_from_drm(hwfc, dst, src, flags);
2800 #endif
2801     default:
2802         return AVERROR(ENOSYS);
2803     }
2804 }
2805
2806 #if CONFIG_LIBDRM
2807 typedef struct VulkanDRMMapping {
2808     AVDRMFrameDescriptor drm_desc;
2809     AVVkFrame *source;
2810 } VulkanDRMMapping;
2811
2812 static void vulkan_unmap_to_drm(AVHWFramesContext *hwfc, HWMapDescriptor *hwmap)
2813 {
2814     AVDRMFrameDescriptor *drm_desc = hwmap->priv;
2815
2816     for (int i = 0; i < drm_desc->nb_objects; i++)
2817         close(drm_desc->objects[i].fd);
2818
2819     av_free(drm_desc);
2820 }
2821
2822 static inline uint32_t vulkan_fmt_to_drm(VkFormat vkfmt)
2823 {
2824     for (int i = 0; i < FF_ARRAY_ELEMS(vulkan_drm_format_map); i++)
2825         if (vulkan_drm_format_map[i].vk_format == vkfmt)
2826             return vulkan_drm_format_map[i].drm_fourcc;
2827     return DRM_FORMAT_INVALID;
2828 }
2829
2830 static int vulkan_map_to_drm(AVHWFramesContext *hwfc, AVFrame *dst,
2831                              const AVFrame *src, int flags)
2832 {
2833     int err = 0;
2834     VkResult ret;
2835     AVVkFrame *f = (AVVkFrame *)src->data[0];
2836     VulkanDevicePriv *p = hwfc->device_ctx->internal->priv;
2837     VulkanFunctions *vk = &p->vkfn;
2838     VulkanFramesPriv *fp = hwfc->internal->priv;
2839     AVVulkanDeviceContext *hwctx = hwfc->device_ctx->hwctx;
2840     const int planes = av_pix_fmt_count_planes(hwfc->sw_format);
2841     VkImageDrmFormatModifierPropertiesEXT drm_mod = {
2842         .sType = VK_STRUCTURE_TYPE_IMAGE_DRM_FORMAT_MODIFIER_PROPERTIES_EXT,
2843     };
2844
2845     AVDRMFrameDescriptor *drm_desc = av_mallocz(sizeof(*drm_desc));
2846     if (!drm_desc)
2847         return AVERROR(ENOMEM);
2848
2849     err = prepare_frame(hwfc, &fp->conv_ctx, f, PREP_MODE_EXTERNAL_EXPORT);
2850     if (err < 0)
2851         goto end;
2852
2853     err = ff_hwframe_map_create(src->hw_frames_ctx, dst, src, &vulkan_unmap_to_drm, drm_desc);
2854     if (err < 0)
2855         goto end;
2856
2857     if (p->extensions & EXT_DRM_MODIFIER_FLAGS) {
2858         ret = vk->GetImageDrmFormatModifierPropertiesEXT(hwctx->act_dev, f->img[0],
2859                                                          &drm_mod);
2860         if (ret != VK_SUCCESS) {
2861             av_log(hwfc, AV_LOG_ERROR, "Failed to retrieve DRM format modifier!\n");
2862             err = AVERROR_EXTERNAL;
2863             goto end;
2864         }
2865     }
2866
2867     for (int i = 0; (i < planes) && (f->mem[i]); i++) {
2868         VkMemoryGetFdInfoKHR export_info = {
2869             .sType      = VK_STRUCTURE_TYPE_MEMORY_GET_FD_INFO_KHR,
2870             .memory     = f->mem[i],
2871             .handleType = VK_EXTERNAL_MEMORY_HANDLE_TYPE_DMA_BUF_BIT_EXT,
2872         };
2873
2874         ret = vk->GetMemoryFdKHR(hwctx->act_dev, &export_info,
2875                                  &drm_desc->objects[i].fd);
2876         if (ret != VK_SUCCESS) {
2877             av_log(hwfc, AV_LOG_ERROR, "Unable to export the image as a FD!\n");
2878             err = AVERROR_EXTERNAL;
2879             goto end;
2880         }
2881
2882         drm_desc->nb_objects++;
2883         drm_desc->objects[i].size = f->size[i];
2884         drm_desc->objects[i].format_modifier = drm_mod.drmFormatModifier;
2885     }
2886
2887     drm_desc->nb_layers = planes;
2888     for (int i = 0; i < drm_desc->nb_layers; i++) {
2889         VkSubresourceLayout layout;
2890         VkImageSubresource sub = {
2891             .aspectMask = p->extensions & EXT_DRM_MODIFIER_FLAGS ?
2892                           VK_IMAGE_ASPECT_MEMORY_PLANE_0_BIT_EXT :
2893                           VK_IMAGE_ASPECT_COLOR_BIT,
2894         };
2895         VkFormat plane_vkfmt = av_vkfmt_from_pixfmt(hwfc->sw_format)[i];
2896
2897         drm_desc->layers[i].format    = vulkan_fmt_to_drm(plane_vkfmt);
2898         drm_desc->layers[i].nb_planes = 1;
2899
2900         if (drm_desc->layers[i].format == DRM_FORMAT_INVALID) {
2901             av_log(hwfc, AV_LOG_ERROR, "Cannot map to DRM layer, unsupported!\n");
2902             err = AVERROR_PATCHWELCOME;
2903             goto end;
2904         }
2905
2906         drm_desc->layers[i].planes[0].object_index = FFMIN(i, drm_desc->nb_objects - 1);
2907
2908         if (f->tiling == VK_IMAGE_TILING_OPTIMAL)
2909             continue;
2910
2911         vk->GetImageSubresourceLayout(hwctx->act_dev, f->img[i], &sub, &layout);
2912         drm_desc->layers[i].planes[0].offset       = layout.offset;
2913         drm_desc->layers[i].planes[0].pitch        = layout.rowPitch;
2914     }
2915
2916     dst->width   = src->width;
2917     dst->height  = src->height;
2918     dst->data[0] = (uint8_t *)drm_desc;
2919
2920     av_log(hwfc, AV_LOG_VERBOSE, "Mapped AVVkFrame to a DRM object!\n");
2921
2922     return 0;
2923
2924 end:
2925     av_free(drm_desc);
2926     return err;
2927 }
2928
2929 #if CONFIG_VAAPI
2930 static int vulkan_map_to_vaapi(AVHWFramesContext *hwfc, AVFrame *dst,
2931                                const AVFrame *src, int flags)
2932 {
2933     int err;
2934     AVFrame *tmp = av_frame_alloc();
2935     if (!tmp)
2936         return AVERROR(ENOMEM);
2937
2938     tmp->format = AV_PIX_FMT_DRM_PRIME;
2939
2940     err = vulkan_map_to_drm(hwfc, tmp, src, flags);
2941     if (err < 0)
2942         goto fail;
2943
2944     err = av_hwframe_map(dst, tmp, flags);
2945     if (err < 0)
2946         goto fail;
2947
2948     err = ff_hwframe_map_replace(dst, src);
2949
2950 fail:
2951     av_frame_free(&tmp);
2952     return err;
2953 }
2954 #endif
2955 #endif
2956
2957 static int vulkan_map_from(AVHWFramesContext *hwfc, AVFrame *dst,
2958                            const AVFrame *src, int flags)
2959 {
2960     av_unused VulkanDevicePriv *p = hwfc->device_ctx->internal->priv;
2961
2962     switch (dst->format) {
2963 #if CONFIG_LIBDRM
2964     case AV_PIX_FMT_DRM_PRIME:
2965         if (p->extensions & EXT_EXTERNAL_DMABUF_MEMORY)
2966             return vulkan_map_to_drm(hwfc, dst, src, flags);
2967 #if CONFIG_VAAPI
2968     case AV_PIX_FMT_VAAPI:
2969         if (p->extensions & EXT_EXTERNAL_DMABUF_MEMORY)
2970             return vulkan_map_to_vaapi(hwfc, dst, src, flags);
2971 #endif
2972 #endif
2973     default:
2974         return vulkan_map_frame_to_mem(hwfc, dst, src, flags);
2975     }
2976 }
2977
2978 typedef struct ImageBuffer {
2979     VkBuffer buf;
2980     VkDeviceMemory mem;
2981     VkMemoryPropertyFlagBits flags;
2982     int mapped_mem;
2983 } ImageBuffer;
2984
2985 static void free_buf(void *opaque, uint8_t *data)
2986 {
2987     AVHWDeviceContext *ctx = opaque;
2988     AVVulkanDeviceContext *hwctx = ctx->hwctx;
2989     VulkanDevicePriv *p = ctx->internal->priv;
2990     VulkanFunctions *vk = &p->vkfn;
2991     ImageBuffer *vkbuf = (ImageBuffer *)data;
2992
2993     if (vkbuf->buf)
2994         vk->DestroyBuffer(hwctx->act_dev, vkbuf->buf, hwctx->alloc);
2995     if (vkbuf->mem)
2996         vk->FreeMemory(hwctx->act_dev, vkbuf->mem, hwctx->alloc);
2997
2998     av_free(data);
2999 }
3000
3001 static size_t get_req_buffer_size(VulkanDevicePriv *p, int *stride, int height)
3002 {
3003     size_t size;
3004     *stride = FFALIGN(*stride, p->props.properties.limits.optimalBufferCopyRowPitchAlignment);
3005     size = height*(*stride);
3006     size = FFALIGN(size, p->props.properties.limits.minMemoryMapAlignment);
3007     return size;
3008 }
3009
3010 static int create_buf(AVHWDeviceContext *ctx, AVBufferRef **buf,
3011                       VkBufferUsageFlags usage, VkMemoryPropertyFlagBits flags,
3012                       size_t size, uint32_t req_memory_bits, int host_mapped,
3013                       void *create_pnext, void *alloc_pnext)
3014 {
3015     int err;
3016     VkResult ret;
3017     int use_ded_mem;
3018     AVVulkanDeviceContext *hwctx = ctx->hwctx;
3019     VulkanDevicePriv *p = ctx->internal->priv;
3020     VulkanFunctions *vk = &p->vkfn;
3021
3022     VkBufferCreateInfo buf_spawn = {
3023         .sType       = VK_STRUCTURE_TYPE_BUFFER_CREATE_INFO,
3024         .pNext       = create_pnext,
3025         .usage       = usage,
3026         .size        = size,
3027         .sharingMode = VK_SHARING_MODE_EXCLUSIVE,
3028     };
3029
3030     VkBufferMemoryRequirementsInfo2 req_desc = {
3031         .sType = VK_STRUCTURE_TYPE_BUFFER_MEMORY_REQUIREMENTS_INFO_2,
3032     };
3033     VkMemoryDedicatedAllocateInfo ded_alloc = {
3034         .sType = VK_STRUCTURE_TYPE_MEMORY_DEDICATED_ALLOCATE_INFO,
3035         .pNext = alloc_pnext,
3036     };
3037     VkMemoryDedicatedRequirements ded_req = {
3038         .sType = VK_STRUCTURE_TYPE_MEMORY_DEDICATED_REQUIREMENTS,
3039     };
3040     VkMemoryRequirements2 req = {
3041         .sType = VK_STRUCTURE_TYPE_MEMORY_REQUIREMENTS_2,
3042         .pNext = &ded_req,
3043     };
3044
3045     ImageBuffer *vkbuf = av_mallocz(sizeof(*vkbuf));
3046     if (!vkbuf)
3047         return AVERROR(ENOMEM);
3048
3049     vkbuf->mapped_mem = host_mapped;
3050
3051     ret = vk->CreateBuffer(hwctx->act_dev, &buf_spawn, NULL, &vkbuf->buf);
3052     if (ret != VK_SUCCESS) {
3053         av_log(ctx, AV_LOG_ERROR, "Failed to create buffer: %s\n",
3054                vk_ret2str(ret));
3055         err = AVERROR_EXTERNAL;
3056         goto fail;
3057     }
3058
3059     req_desc.buffer = vkbuf->buf;
3060
3061     vk->GetBufferMemoryRequirements2(hwctx->act_dev, &req_desc, &req);
3062
3063     /* In case the implementation prefers/requires dedicated allocation */
3064     use_ded_mem = ded_req.prefersDedicatedAllocation |
3065                   ded_req.requiresDedicatedAllocation;
3066     if (use_ded_mem)
3067         ded_alloc.buffer = vkbuf->buf;
3068
3069     /* Additional requirements imposed on us */
3070     if (req_memory_bits)
3071         req.memoryRequirements.memoryTypeBits &= req_memory_bits;
3072
3073     err = alloc_mem(ctx, &req.memoryRequirements, flags,
3074                     use_ded_mem ? &ded_alloc : (void *)ded_alloc.pNext,
3075                     &vkbuf->flags, &vkbuf->mem);
3076     if (err)
3077         goto fail;
3078
3079     ret = vk->BindBufferMemory(hwctx->act_dev, vkbuf->buf, vkbuf->mem, 0);
3080     if (ret != VK_SUCCESS) {
3081         av_log(ctx, AV_LOG_ERROR, "Failed to bind memory to buffer: %s\n",
3082                vk_ret2str(ret));
3083         err = AVERROR_EXTERNAL;
3084         goto fail;
3085     }
3086
3087     *buf = av_buffer_create((uint8_t *)vkbuf, sizeof(*vkbuf), free_buf, ctx, 0);
3088     if (!(*buf)) {
3089         err = AVERROR(ENOMEM);
3090         goto fail;
3091     }
3092
3093     return 0;
3094
3095 fail:
3096     free_buf(ctx, (uint8_t *)vkbuf);
3097     return err;
3098 }
3099
3100 /* Skips mapping of host mapped buffers but still invalidates them */
3101 static int map_buffers(AVHWDeviceContext *ctx, AVBufferRef **bufs, uint8_t *mem[],
3102                        int nb_buffers, int invalidate)
3103 {
3104     VkResult ret;
3105     AVVulkanDeviceContext *hwctx = ctx->hwctx;
3106     VulkanDevicePriv *p = ctx->internal->priv;
3107     VulkanFunctions *vk = &p->vkfn;
3108     VkMappedMemoryRange invalidate_ctx[AV_NUM_DATA_POINTERS];
3109     int invalidate_count = 0;
3110
3111     for (int i = 0; i < nb_buffers; i++) {
3112         ImageBuffer *vkbuf = (ImageBuffer *)bufs[i]->data;
3113         if (vkbuf->mapped_mem)
3114             continue;
3115
3116         ret = vk->MapMemory(hwctx->act_dev, vkbuf->mem, 0,
3117                             VK_WHOLE_SIZE, 0, (void **)&mem[i]);
3118         if (ret != VK_SUCCESS) {
3119             av_log(ctx, AV_LOG_ERROR, "Failed to map buffer memory: %s\n",
3120                    vk_ret2str(ret));
3121             return AVERROR_EXTERNAL;
3122         }
3123     }
3124
3125     if (!invalidate)
3126         return 0;
3127
3128     for (int i = 0; i < nb_buffers; i++) {
3129         ImageBuffer *vkbuf = (ImageBuffer *)bufs[i]->data;
3130         const VkMappedMemoryRange ival_buf = {
3131             .sType  = VK_STRUCTURE_TYPE_MAPPED_MEMORY_RANGE,
3132             .memory = vkbuf->mem,
3133             .size   = VK_WHOLE_SIZE,
3134         };
3135
3136         /* For host imported memory Vulkan says to use platform-defined
3137          * sync methods, but doesn't really say not to call flush or invalidate
3138          * on original host pointers. It does explicitly allow to do that on
3139          * host-mapped pointers which are then mapped again using vkMapMemory,
3140          * but known implementations return the original pointers when mapped
3141          * again. */
3142         if (vkbuf->flags & VK_MEMORY_PROPERTY_HOST_COHERENT_BIT)
3143             continue;
3144
3145         invalidate_ctx[invalidate_count++] = ival_buf;
3146     }
3147
3148     if (invalidate_count) {
3149         ret = vk->InvalidateMappedMemoryRanges(hwctx->act_dev, invalidate_count,
3150                                                invalidate_ctx);
3151         if (ret != VK_SUCCESS)
3152             av_log(ctx, AV_LOG_WARNING, "Failed to invalidate memory: %s\n",
3153                    vk_ret2str(ret));
3154     }
3155
3156     return 0;
3157 }
3158
3159 static int unmap_buffers(AVHWDeviceContext *ctx, AVBufferRef **bufs,
3160                          int nb_buffers, int flush)
3161 {
3162     int err = 0;
3163     VkResult ret;
3164     AVVulkanDeviceContext *hwctx = ctx->hwctx;
3165     VulkanDevicePriv *p = ctx->internal->priv;
3166     VulkanFunctions *vk = &p->vkfn;
3167     VkMappedMemoryRange flush_ctx[AV_NUM_DATA_POINTERS];
3168     int flush_count = 0;
3169
3170     if (flush) {
3171         for (int i = 0; i < nb_buffers; i++) {
3172             ImageBuffer *vkbuf = (ImageBuffer *)bufs[i]->data;
3173             const VkMappedMemoryRange flush_buf = {
3174                 .sType  = VK_STRUCTURE_TYPE_MAPPED_MEMORY_RANGE,
3175                 .memory = vkbuf->mem,
3176                 .size   = VK_WHOLE_SIZE,
3177             };
3178
3179             if (vkbuf->flags & VK_MEMORY_PROPERTY_HOST_COHERENT_BIT)
3180                 continue;
3181
3182             flush_ctx[flush_count++] = flush_buf;
3183         }
3184     }
3185
3186     if (flush_count) {
3187         ret = vk->FlushMappedMemoryRanges(hwctx->act_dev, flush_count, flush_ctx);
3188         if (ret != VK_SUCCESS) {
3189             av_log(ctx, AV_LOG_ERROR, "Failed to flush memory: %s\n",
3190                     vk_ret2str(ret));
3191             err = AVERROR_EXTERNAL; /* We still want to try to unmap them */
3192         }
3193     }
3194
3195     for (int i = 0; i < nb_buffers; i++) {
3196         ImageBuffer *vkbuf = (ImageBuffer *)bufs[i]->data;
3197         if (vkbuf->mapped_mem)
3198             continue;
3199
3200         vk->UnmapMemory(hwctx->act_dev, vkbuf->mem);
3201     }
3202
3203     return err;
3204 }
3205
3206 static int transfer_image_buf(AVHWFramesContext *hwfc, const AVFrame *f,
3207                               AVBufferRef **bufs, size_t *buf_offsets,
3208                               const int *buf_stride, int w,
3209                               int h, enum AVPixelFormat pix_fmt, int to_buf)
3210 {
3211     int err;
3212     AVVkFrame *frame = (AVVkFrame *)f->data[0];
3213     VulkanFramesPriv *fp = hwfc->internal->priv;
3214     VulkanDevicePriv *p = hwfc->device_ctx->internal->priv;
3215     VulkanFunctions *vk = &p->vkfn;
3216
3217     int bar_num = 0;
3218     VkPipelineStageFlagBits sem_wait_dst[AV_NUM_DATA_POINTERS];
3219
3220     const int planes = av_pix_fmt_count_planes(pix_fmt);
3221     const AVPixFmtDescriptor *desc = av_pix_fmt_desc_get(pix_fmt);
3222
3223     VkImageMemoryBarrier img_bar[AV_NUM_DATA_POINTERS] = { 0 };
3224     VulkanExecCtx *ectx = to_buf ? &fp->download_ctx : &fp->upload_ctx;
3225     VkCommandBuffer cmd_buf = get_buf_exec_ctx(hwfc, ectx);
3226
3227     VkSubmitInfo s_info = {
3228         .sType                = VK_STRUCTURE_TYPE_SUBMIT_INFO,
3229         .pSignalSemaphores    = frame->sem,
3230         .pWaitSemaphores      = frame->sem,
3231         .pWaitDstStageMask    = sem_wait_dst,
3232         .signalSemaphoreCount = planes,
3233         .waitSemaphoreCount   = planes,
3234     };
3235
3236     if ((err = wait_start_exec_ctx(hwfc, ectx)))
3237         return err;
3238
3239     /* Change the image layout to something more optimal for transfers */
3240     for (int i = 0; i < planes; i++) {
3241         VkImageLayout new_layout = to_buf ? VK_IMAGE_LAYOUT_TRANSFER_SRC_OPTIMAL :
3242                                             VK_IMAGE_LAYOUT_TRANSFER_DST_OPTIMAL;
3243         VkAccessFlags new_access = to_buf ? VK_ACCESS_TRANSFER_READ_BIT :
3244                                             VK_ACCESS_TRANSFER_WRITE_BIT;
3245
3246         sem_wait_dst[i] = VK_PIPELINE_STAGE_TOP_OF_PIPE_BIT;
3247
3248         /* If the layout matches and we have read access skip the barrier */
3249         if ((frame->layout[i] == new_layout) && (frame->access[i] & new_access))
3250             continue;
3251
3252         img_bar[bar_num].sType = VK_STRUCTURE_TYPE_IMAGE_MEMORY_BARRIER;
3253         img_bar[bar_num].srcAccessMask = 0x0;
3254         img_bar[bar_num].dstAccessMask = new_access;
3255         img_bar[bar_num].oldLayout = frame->layout[i];
3256         img_bar[bar_num].newLayout = new_layout;
3257         img_bar[bar_num].srcQueueFamilyIndex = VK_QUEUE_FAMILY_IGNORED;
3258         img_bar[bar_num].dstQueueFamilyIndex = VK_QUEUE_FAMILY_IGNORED;
3259         img_bar[bar_num].image = frame->img[i];
3260         img_bar[bar_num].subresourceRange.levelCount = 1;
3261         img_bar[bar_num].subresourceRange.layerCount = 1;
3262         img_bar[bar_num].subresourceRange.aspectMask = VK_IMAGE_ASPECT_COLOR_BIT;
3263
3264         frame->layout[i] = img_bar[bar_num].newLayout;
3265         frame->access[i] = img_bar[bar_num].dstAccessMask;
3266
3267         bar_num++;
3268     }
3269
3270     if (bar_num)
3271         vk->CmdPipelineBarrier(cmd_buf, VK_PIPELINE_STAGE_TOP_OF_PIPE_BIT,
3272                                VK_PIPELINE_STAGE_TRANSFER_BIT, 0,
3273                                0, NULL, 0, NULL, bar_num, img_bar);
3274
3275     /* Schedule a copy for each plane */
3276     for (int i = 0; i < planes; i++) {
3277         ImageBuffer *vkbuf = (ImageBuffer *)bufs[i]->data;
3278         VkBufferImageCopy buf_reg = {
3279             .bufferOffset = buf_offsets[i],
3280             .bufferRowLength = buf_stride[i] / desc->comp[i].step,
3281             .imageSubresource.layerCount = 1,
3282             .imageSubresource.aspectMask = VK_IMAGE_ASPECT_COLOR_BIT,
3283             .imageOffset = { 0, 0, 0, },
3284         };
3285
3286         int p_w, p_h;
3287         get_plane_wh(&p_w, &p_h, pix_fmt, w, h, i);
3288
3289         buf_reg.bufferImageHeight = p_h;
3290         buf_reg.imageExtent = (VkExtent3D){ p_w, p_h, 1, };
3291
3292         if (to_buf)
3293             vk->CmdCopyImageToBuffer(cmd_buf, frame->img[i], frame->layout[i],
3294                                      vkbuf->buf, 1, &buf_reg);
3295         else
3296             vk->CmdCopyBufferToImage(cmd_buf, vkbuf->buf, frame->img[i],
3297                                      frame->layout[i], 1, &buf_reg);
3298     }
3299
3300     /* When uploading, do this asynchronously if the source is refcounted by
3301      * keeping the buffers as a submission dependency.
3302      * The hwcontext is guaranteed to not be freed until all frames are freed
3303      * in the frames_unint function.
3304      * When downloading to buffer, do this synchronously and wait for the
3305      * queue submission to finish executing */
3306     if (!to_buf) {
3307         int ref;
3308         for (ref = 0; ref < AV_NUM_DATA_POINTERS; ref++) {
3309             if (!f->buf[ref])
3310                 break;
3311             if ((err = add_buf_dep_exec_ctx(hwfc, ectx, &f->buf[ref], 1)))
3312                 return err;
3313         }
3314         if (ref && (err = add_buf_dep_exec_ctx(hwfc, ectx, bufs, planes)))
3315             return err;
3316         return submit_exec_ctx(hwfc, ectx, &s_info, !ref);
3317     } else {
3318         return submit_exec_ctx(hwfc, ectx, &s_info,    1);
3319     }
3320 }
3321
3322 static int vulkan_transfer_data(AVHWFramesContext *hwfc, const AVFrame *vkf,
3323                                 const AVFrame *swf, int from)
3324 {
3325     int err = 0;
3326     VkResult ret;
3327     AVVkFrame *f = (AVVkFrame *)vkf->data[0];
3328     AVHWDeviceContext *dev_ctx = hwfc->device_ctx;
3329     AVVulkanDeviceContext *hwctx = dev_ctx->hwctx;
3330     VulkanDevicePriv *p = hwfc->device_ctx->internal->priv;
3331     VulkanFunctions *vk = &p->vkfn;
3332
3333     AVFrame tmp;
3334     AVBufferRef *bufs[AV_NUM_DATA_POINTERS] = { 0 };
3335     size_t buf_offsets[AV_NUM_DATA_POINTERS] = { 0 };
3336
3337     int p_w, p_h;
3338     const int planes = av_pix_fmt_count_planes(swf->format);
3339
3340     int host_mapped[AV_NUM_DATA_POINTERS] = { 0 };
3341     const int map_host = !!(p->extensions & EXT_EXTERNAL_HOST_MEMORY);
3342
3343     if ((swf->format != AV_PIX_FMT_NONE && !av_vkfmt_from_pixfmt(swf->format))) {
3344         av_log(hwfc, AV_LOG_ERROR, "Unsupported software frame pixel format!\n");
3345         return AVERROR(EINVAL);
3346     }
3347
3348     if (swf->width > hwfc->width || swf->height > hwfc->height)
3349         return AVERROR(EINVAL);
3350
3351     /* For linear, host visiable images */
3352     if (f->tiling == VK_IMAGE_TILING_LINEAR &&
3353         f->flags & VK_MEMORY_PROPERTY_HOST_VISIBLE_BIT) {
3354         AVFrame *map = av_frame_alloc();
3355         if (!map)
3356             return AVERROR(ENOMEM);
3357         map->format = swf->format;
3358
3359         err = vulkan_map_frame_to_mem(hwfc, map, vkf, AV_HWFRAME_MAP_WRITE);
3360         if (err)
3361             return err;
3362
3363         err = av_frame_copy((AVFrame *)(from ? swf : map), from ? map : swf);
3364         av_frame_free(&map);
3365         return err;
3366     }
3367
3368     /* Create buffers */
3369     for (int i = 0; i < planes; i++) {
3370         size_t req_size;
3371
3372         VkExternalMemoryBufferCreateInfo create_desc = {
3373             .sType = VK_STRUCTURE_TYPE_EXTERNAL_MEMORY_BUFFER_CREATE_INFO,
3374             .handleTypes = VK_EXTERNAL_MEMORY_HANDLE_TYPE_HOST_ALLOCATION_BIT_EXT,
3375         };
3376
3377         VkImportMemoryHostPointerInfoEXT import_desc = {
3378             .sType = VK_STRUCTURE_TYPE_IMPORT_MEMORY_HOST_POINTER_INFO_EXT,
3379             .handleType = VK_EXTERNAL_MEMORY_HANDLE_TYPE_HOST_ALLOCATION_BIT_EXT,
3380         };
3381
3382         VkMemoryHostPointerPropertiesEXT p_props = {
3383             .sType = VK_STRUCTURE_TYPE_MEMORY_HOST_POINTER_PROPERTIES_EXT,
3384         };
3385
3386         get_plane_wh(&p_w, &p_h, swf->format, swf->width, swf->height, i);
3387
3388         tmp.linesize[i] = FFABS(swf->linesize[i]);
3389
3390         /* Do not map images with a negative stride */
3391         if (map_host && swf->linesize[i] > 0) {
3392             size_t offs;
3393             offs = (uintptr_t)swf->data[i] % p->hprops.minImportedHostPointerAlignment;
3394             import_desc.pHostPointer = swf->data[i] - offs;
3395
3396             /* We have to compensate for the few extra bytes of padding we
3397              * completely ignore at the start */
3398             req_size = FFALIGN(offs + tmp.linesize[i] * p_h,
3399                                p->hprops.minImportedHostPointerAlignment);
3400
3401             ret = vk->GetMemoryHostPointerPropertiesEXT(hwctx->act_dev,
3402                                                         import_desc.handleType,
3403                                                         import_desc.pHostPointer,
3404                                                         &p_props);
3405
3406             if (ret == VK_SUCCESS) {
3407                 host_mapped[i] = 1;
3408                 buf_offsets[i] = offs;
3409             }
3410         }
3411
3412         if (!host_mapped[i])
3413             req_size = get_req_buffer_size(p, &tmp.linesize[i], p_h);
3414
3415         err = create_buf(dev_ctx, &bufs[i],
3416                          from ? VK_BUFFER_USAGE_TRANSFER_DST_BIT :
3417                                 VK_BUFFER_USAGE_TRANSFER_SRC_BIT,
3418                          VK_MEMORY_PROPERTY_HOST_VISIBLE_BIT,
3419                          req_size, p_props.memoryTypeBits, host_mapped[i],
3420                          host_mapped[i] ? &create_desc : NULL,
3421                          host_mapped[i] ? &import_desc : NULL);
3422         if (err)
3423             goto end;
3424     }
3425
3426     if (!from) {
3427         /* Map, copy image to buffer, unmap */
3428         if ((err = map_buffers(dev_ctx, bufs, tmp.data, planes, 0)))
3429             goto end;
3430
3431         for (int i = 0; i < planes; i++) {
3432             if (host_mapped[i])
3433                 continue;
3434
3435             get_plane_wh(&p_w, &p_h, swf->format, swf->width, swf->height, i);
3436
3437             av_image_copy_plane(tmp.data[i], tmp.linesize[i],
3438                                 (const uint8_t *)swf->data[i], swf->linesize[i],
3439                                 FFMIN(tmp.linesize[i], FFABS(swf->linesize[i])),
3440                                 p_h);
3441         }
3442
3443         if ((err = unmap_buffers(dev_ctx, bufs, planes, 1)))
3444             goto end;
3445     }
3446
3447     /* Copy buffers into/from image */
3448     err = transfer_image_buf(hwfc, vkf, bufs, buf_offsets, tmp.linesize,
3449                              swf->width, swf->height, swf->format, from);
3450
3451     if (from) {
3452         /* Map, copy image to buffer, unmap */
3453         if ((err = map_buffers(dev_ctx, bufs, tmp.data, planes, 0)))
3454             goto end;
3455
3456         for (int i = 0; i < planes; i++) {
3457             if (host_mapped[i])
3458                 continue;
3459
3460             get_plane_wh(&p_w, &p_h, swf->format, swf->width, swf->height, i);
3461
3462             av_image_copy_plane(swf->data[i], swf->linesize[i],
3463                                 (const uint8_t *)tmp.data[i], tmp.linesize[i],
3464                                 FFMIN(tmp.linesize[i], FFABS(swf->linesize[i])),
3465                                 p_h);
3466         }
3467
3468         if ((err = unmap_buffers(dev_ctx, bufs, planes, 1)))
3469             goto end;
3470     }
3471
3472 end:
3473     for (int i = 0; i < planes; i++)
3474         av_buffer_unref(&bufs[i]);
3475
3476     return err;
3477 }
3478
3479 static int vulkan_transfer_data_to(AVHWFramesContext *hwfc, AVFrame *dst,
3480                                    const AVFrame *src)
3481 {
3482     av_unused VulkanDevicePriv *p = hwfc->device_ctx->internal->priv;
3483
3484     switch (src->format) {
3485 #if CONFIG_CUDA
3486     case AV_PIX_FMT_CUDA:
3487         if ((p->extensions & EXT_EXTERNAL_FD_MEMORY) &&
3488             (p->extensions & EXT_EXTERNAL_FD_SEM))
3489             return vulkan_transfer_data_from_cuda(hwfc, dst, src);
3490 #endif
3491     default:
3492         if (src->hw_frames_ctx)
3493             return AVERROR(ENOSYS);
3494         else
3495             return vulkan_transfer_data(hwfc, dst, src, 0);
3496     }
3497 }
3498
3499 #if CONFIG_CUDA
3500 static int vulkan_transfer_data_to_cuda(AVHWFramesContext *hwfc, AVFrame *dst,
3501                                         const AVFrame *src)
3502 {
3503     int err;
3504     VkResult ret;
3505     CUcontext dummy;
3506     AVVkFrame *dst_f;
3507     AVVkFrameInternal *dst_int;
3508     const int planes = av_pix_fmt_count_planes(hwfc->sw_format);
3509     const AVPixFmtDescriptor *desc = av_pix_fmt_desc_get(hwfc->sw_format);
3510
3511     AVHWFramesContext *cuda_fc = (AVHWFramesContext*)dst->hw_frames_ctx->data;
3512     AVHWDeviceContext *cuda_cu = cuda_fc->device_ctx;
3513     AVCUDADeviceContext *cuda_dev = cuda_cu->hwctx;
3514     AVCUDADeviceContextInternal *cu_internal = cuda_dev->internal;
3515     CudaFunctions *cu = cu_internal->cuda_dl;
3516     CUDA_EXTERNAL_SEMAPHORE_WAIT_PARAMS s_w_par[AV_NUM_DATA_POINTERS] = { 0 };
3517     CUDA_EXTERNAL_SEMAPHORE_SIGNAL_PARAMS s_s_par[AV_NUM_DATA_POINTERS] = { 0 };
3518
3519     ret = CHECK_CU(cu->cuCtxPushCurrent(cuda_dev->cuda_ctx));
3520     if (ret < 0)
3521         return AVERROR_EXTERNAL;
3522
3523     dst_f = (AVVkFrame *)src->data[0];
3524
3525     err = vulkan_export_to_cuda(hwfc, dst->hw_frames_ctx, src);
3526     if (err < 0) {
3527         CHECK_CU(cu->cuCtxPopCurrent(&dummy));
3528         return err;
3529     }
3530
3531     dst_int = dst_f->internal;
3532
3533     ret = CHECK_CU(cu->cuWaitExternalSemaphoresAsync(dst_int->cu_sem, s_w_par,
3534                                                      planes, cuda_dev->stream));
3535     if (ret < 0) {
3536         err = AVERROR_EXTERNAL;
3537         goto fail;
3538     }
3539
3540     for (int i = 0; i < planes; i++) {
3541         CUDA_MEMCPY2D cpy = {
3542             .dstMemoryType = CU_MEMORYTYPE_DEVICE,
3543             .dstDevice     = (CUdeviceptr)dst->data[i],
3544             .dstPitch      = dst->linesize[i],
3545             .dstY          = 0,
3546
3547             .srcMemoryType = CU_MEMORYTYPE_ARRAY,
3548             .srcArray      = dst_int->cu_array[i],
3549         };
3550
3551         int w, h;
3552         get_plane_wh(&w, &h, hwfc->sw_format, hwfc->width, hwfc->height, i);
3553
3554         cpy.WidthInBytes = w * desc->comp[i].step;
3555         cpy.Height = h;
3556
3557         ret = CHECK_CU(cu->cuMemcpy2DAsync(&cpy, cuda_dev->stream));
3558         if (ret < 0) {
3559             err = AVERROR_EXTERNAL;
3560             goto fail;
3561         }
3562     }
3563
3564     ret = CHECK_CU(cu->cuSignalExternalSemaphoresAsync(dst_int->cu_sem, s_s_par,
3565                                                        planes, cuda_dev->stream));
3566     if (ret < 0) {
3567         err = AVERROR_EXTERNAL;
3568         goto fail;
3569     }
3570
3571     CHECK_CU(cu->cuCtxPopCurrent(&dummy));
3572
3573     av_log(hwfc, AV_LOG_VERBOSE, "Transfered Vulkan image to CUDA!\n");
3574
3575     return 0;
3576
3577 fail:
3578     CHECK_CU(cu->cuCtxPopCurrent(&dummy));
3579     vulkan_free_internal(dst_int);
3580     dst_f->internal = NULL;
3581     av_buffer_unref(&dst->buf[0]);
3582     return err;
3583 }
3584 #endif
3585
3586 static int vulkan_transfer_data_from(AVHWFramesContext *hwfc, AVFrame *dst,
3587                                      const AVFrame *src)
3588 {
3589     av_unused VulkanDevicePriv *p = hwfc->device_ctx->internal->priv;
3590
3591     switch (dst->format) {
3592 #if CONFIG_CUDA
3593     case AV_PIX_FMT_CUDA:
3594         if ((p->extensions & EXT_EXTERNAL_FD_MEMORY) &&
3595             (p->extensions & EXT_EXTERNAL_FD_SEM))
3596             return vulkan_transfer_data_to_cuda(hwfc, dst, src);
3597 #endif
3598     default:
3599         if (dst->hw_frames_ctx)
3600             return AVERROR(ENOSYS);
3601         else
3602             return vulkan_transfer_data(hwfc, src, dst, 1);
3603     }
3604 }
3605
3606 static int vulkan_frames_derive_to(AVHWFramesContext *dst_fc,
3607                                    AVHWFramesContext *src_fc, int flags)
3608 {
3609     return vulkan_frames_init(dst_fc);
3610 }
3611
3612 AVVkFrame *av_vk_frame_alloc(void)
3613 {
3614     return av_mallocz(sizeof(AVVkFrame));
3615 }
3616
3617 const HWContextType ff_hwcontext_type_vulkan = {
3618     .type                   = AV_HWDEVICE_TYPE_VULKAN,
3619     .name                   = "Vulkan",
3620
3621     .device_hwctx_size      = sizeof(AVVulkanDeviceContext),
3622     .device_priv_size       = sizeof(VulkanDevicePriv),
3623     .frames_hwctx_size      = sizeof(AVVulkanFramesContext),
3624     .frames_priv_size       = sizeof(VulkanFramesPriv),
3625
3626     .device_init            = &vulkan_device_init,
3627     .device_create          = &vulkan_device_create,
3628     .device_derive          = &vulkan_device_derive,
3629
3630     .frames_get_constraints = &vulkan_frames_get_constraints,
3631     .frames_init            = vulkan_frames_init,
3632     .frames_get_buffer      = vulkan_get_buffer,
3633     .frames_uninit          = vulkan_frames_uninit,
3634
3635     .transfer_get_formats   = vulkan_transfer_get_formats,
3636     .transfer_data_to       = vulkan_transfer_data_to,
3637     .transfer_data_from     = vulkan_transfer_data_from,
3638
3639     .map_to                 = vulkan_map_to,
3640     .map_from               = vulkan_map_from,
3641     .frames_derive_to       = &vulkan_frames_derive_to,
3642
3643     .pix_fmts = (const enum AVPixelFormat []) {
3644         AV_PIX_FMT_VULKAN,
3645         AV_PIX_FMT_NONE
3646     },
3647 };