]> git.sesse.net Git - ffmpeg/blob - libavutil/hwcontext_vulkan.c
hwcontext_vulkan: don't error on unavailable user-specified extensions
[ffmpeg] / libavutil / hwcontext_vulkan.c
1 /*
2  * This file is part of FFmpeg.
3  *
4  * FFmpeg is free software; you can redistribute it and/or
5  * modify it under the terms of the GNU Lesser General Public
6  * License as published by the Free Software Foundation; either
7  * version 2.1 of the License, or (at your option) any later version.
8  *
9  * FFmpeg is distributed in the hope that it will be useful,
10  * but WITHOUT ANY WARRANTY; without even the implied warranty of
11  * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
12  * Lesser General Public License for more details.
13  *
14  * You should have received a copy of the GNU Lesser General Public
15  * License along with FFmpeg; if not, write to the Free Software
16  * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
17  */
18
19 #include "config.h"
20 #include "pixdesc.h"
21 #include "avstring.h"
22 #include "imgutils.h"
23 #include "hwcontext.h"
24 #include "hwcontext_internal.h"
25 #include "hwcontext_vulkan.h"
26
27 #if CONFIG_LIBDRM
28 #include <unistd.h>
29 #include <xf86drm.h>
30 #include <drm_fourcc.h>
31 #include "hwcontext_drm.h"
32 #if CONFIG_VAAPI
33 #include <va/va_drmcommon.h>
34 #include "hwcontext_vaapi.h"
35 #endif
36 #endif
37
38 #if CONFIG_CUDA
39 #include "hwcontext_cuda_internal.h"
40 #include "cuda_check.h"
41 #define CHECK_CU(x) FF_CUDA_CHECK_DL(cuda_cu, cu, x)
42 #endif
43
44 typedef struct VulkanExecCtx {
45     VkCommandPool pool;
46     VkCommandBuffer buf;
47     VkQueue queue;
48     VkFence fence;
49 } VulkanExecCtx;
50
51 typedef struct VulkanDevicePriv {
52     /* Properties */
53     VkPhysicalDeviceProperties props;
54     VkPhysicalDeviceMemoryProperties mprops;
55
56     /* Queues */
57     uint32_t qfs[3];
58     int num_qfs;
59
60     /* Debug callback */
61     VkDebugUtilsMessengerEXT debug_ctx;
62
63     /* Image uploading */
64     VulkanExecCtx cmd;
65
66     /* Extensions */
67     uint64_t extensions;
68
69     /* Settings */
70     int use_linear_images;
71
72     /* Nvidia */
73     int dev_is_nvidia;
74 } VulkanDevicePriv;
75
76 typedef struct VulkanFramesPriv {
77     VulkanExecCtx cmd;
78 } VulkanFramesPriv;
79
80 typedef struct AVVkFrameInternal {
81 #if CONFIG_CUDA
82     /* Importing external memory into cuda is really expensive so we keep the
83      * memory imported all the time */
84     AVBufferRef *cuda_fc_ref; /* Need to keep it around for uninit */
85     CUexternalMemory ext_mem[AV_NUM_DATA_POINTERS];
86     CUmipmappedArray cu_mma[AV_NUM_DATA_POINTERS];
87     CUarray cu_array[AV_NUM_DATA_POINTERS];
88     CUexternalSemaphore cu_sem[AV_NUM_DATA_POINTERS];
89 #endif
90 } AVVkFrameInternal;
91
92 #define VK_LOAD_PFN(inst, name) PFN_##name pfn_##name = (PFN_##name)           \
93                                               vkGetInstanceProcAddr(inst, #name)
94
95 #define DEFAULT_USAGE_FLAGS (VK_IMAGE_USAGE_SAMPLED_BIT      |                 \
96                              VK_IMAGE_USAGE_STORAGE_BIT      |                 \
97                              VK_IMAGE_USAGE_TRANSFER_SRC_BIT |                 \
98                              VK_IMAGE_USAGE_TRANSFER_DST_BIT)
99
100 #define ADD_VAL_TO_LIST(list, count, val)                                      \
101     do {                                                                       \
102         list = av_realloc_array(list, sizeof(*list), ++count);                 \
103         if (!list) {                                                           \
104             err = AVERROR(ENOMEM);                                             \
105             goto fail;                                                         \
106         }                                                                      \
107         list[count - 1] = av_strdup(val);                                      \
108         if (!list[count - 1]) {                                                \
109             err = AVERROR(ENOMEM);                                             \
110             goto fail;                                                         \
111         }                                                                      \
112     } while(0)
113
114 static const struct {
115     enum AVPixelFormat pixfmt;
116     const VkFormat vkfmts[3];
117 } vk_pixfmt_map[] = {
118     { AV_PIX_FMT_GRAY8,   { VK_FORMAT_R8_UNORM } },
119     { AV_PIX_FMT_GRAY16,  { VK_FORMAT_R16_UNORM } },
120     { AV_PIX_FMT_GRAYF32, { VK_FORMAT_R32_SFLOAT } },
121
122     { AV_PIX_FMT_NV12, { VK_FORMAT_R8_UNORM, VK_FORMAT_R8G8_UNORM } },
123     { AV_PIX_FMT_P010, { VK_FORMAT_R16_UNORM, VK_FORMAT_R16G16_UNORM } },
124     { AV_PIX_FMT_P016, { VK_FORMAT_R16_UNORM, VK_FORMAT_R16G16_UNORM } },
125
126     { AV_PIX_FMT_YUV420P, { VK_FORMAT_R8_UNORM, VK_FORMAT_R8_UNORM, VK_FORMAT_R8_UNORM } },
127     { AV_PIX_FMT_YUV422P, { VK_FORMAT_R8_UNORM, VK_FORMAT_R8_UNORM, VK_FORMAT_R8_UNORM } },
128     { AV_PIX_FMT_YUV444P, { VK_FORMAT_R8_UNORM, VK_FORMAT_R8_UNORM, VK_FORMAT_R8_UNORM } },
129
130     { AV_PIX_FMT_YUV420P16, { VK_FORMAT_R16_UNORM, VK_FORMAT_R16_UNORM, VK_FORMAT_R16_UNORM } },
131     { AV_PIX_FMT_YUV422P16, { VK_FORMAT_R16_UNORM, VK_FORMAT_R16_UNORM, VK_FORMAT_R16_UNORM } },
132     { AV_PIX_FMT_YUV444P16, { VK_FORMAT_R16_UNORM, VK_FORMAT_R16_UNORM, VK_FORMAT_R16_UNORM } },
133
134     { AV_PIX_FMT_ABGR,   { VK_FORMAT_A8B8G8R8_UNORM_PACK32 } },
135     { AV_PIX_FMT_BGRA,   { VK_FORMAT_B8G8R8A8_UNORM } },
136     { AV_PIX_FMT_RGBA,   { VK_FORMAT_R8G8B8A8_UNORM } },
137     { AV_PIX_FMT_RGB24,  { VK_FORMAT_R8G8B8_UNORM } },
138     { AV_PIX_FMT_BGR24,  { VK_FORMAT_B8G8R8_UNORM } },
139     { AV_PIX_FMT_RGB48,  { VK_FORMAT_R16G16B16_UNORM } },
140     { AV_PIX_FMT_RGBA64, { VK_FORMAT_R16G16B16A16_UNORM } },
141     { AV_PIX_FMT_RGB565, { VK_FORMAT_R5G6B5_UNORM_PACK16 } },
142     { AV_PIX_FMT_BGR565, { VK_FORMAT_B5G6R5_UNORM_PACK16 } },
143     { AV_PIX_FMT_BGR0,   { VK_FORMAT_B8G8R8A8_UNORM } },
144     { AV_PIX_FMT_0BGR,   { VK_FORMAT_A8B8G8R8_UNORM_PACK32 } },
145     { AV_PIX_FMT_RGB0,   { VK_FORMAT_R8G8B8A8_UNORM } },
146
147     { AV_PIX_FMT_GBRPF32, { VK_FORMAT_R32_SFLOAT, VK_FORMAT_R32_SFLOAT, VK_FORMAT_R32_SFLOAT } },
148 };
149
150 const VkFormat *av_vkfmt_from_pixfmt(enum AVPixelFormat p)
151 {
152     for (enum AVPixelFormat i = 0; i < FF_ARRAY_ELEMS(vk_pixfmt_map); i++)
153         if (vk_pixfmt_map[i].pixfmt == p)
154             return vk_pixfmt_map[i].vkfmts;
155     return NULL;
156 }
157
158 static int pixfmt_is_supported(AVVulkanDeviceContext *hwctx, enum AVPixelFormat p,
159                                int linear)
160 {
161     const VkFormat *fmt = av_vkfmt_from_pixfmt(p);
162     int planes = av_pix_fmt_count_planes(p);
163
164     if (!fmt)
165         return 0;
166
167     for (int i = 0; i < planes; i++) {
168         VkFormatFeatureFlags flags;
169         VkFormatProperties2 prop = {
170             .sType = VK_STRUCTURE_TYPE_FORMAT_PROPERTIES_2,
171         };
172         vkGetPhysicalDeviceFormatProperties2(hwctx->phys_dev, fmt[i], &prop);
173         flags = linear ? prop.formatProperties.linearTilingFeatures :
174                          prop.formatProperties.optimalTilingFeatures;
175         if (!(flags & DEFAULT_USAGE_FLAGS))
176             return 0;
177     }
178
179     return 1;
180 }
181
182 enum VulkanExtensions {
183     EXT_EXTERNAL_DMABUF_MEMORY = 1ULL <<  0, /* VK_EXT_external_memory_dma_buf */
184     EXT_DRM_MODIFIER_FLAGS     = 1ULL <<  1, /* VK_EXT_image_drm_format_modifier */
185     EXT_EXTERNAL_FD_MEMORY     = 1ULL <<  2, /* VK_KHR_external_memory_fd */
186     EXT_EXTERNAL_FD_SEM        = 1ULL <<  3, /* VK_KHR_external_semaphore_fd */
187
188     EXT_NO_FLAG                = 1ULL << 63,
189 };
190
191 typedef struct VulkanOptExtension {
192     const char *name;
193     uint64_t flag;
194 } VulkanOptExtension;
195
196 static const VulkanOptExtension optional_instance_exts[] = {
197     { VK_KHR_SURFACE_EXTENSION_NAME, EXT_NO_FLAG },
198 };
199
200 static const VulkanOptExtension optional_device_exts[] = {
201     { VK_KHR_EXTERNAL_MEMORY_FD_EXTENSION_NAME,               EXT_EXTERNAL_FD_MEMORY,     },
202     { VK_EXT_EXTERNAL_MEMORY_DMA_BUF_EXTENSION_NAME,          EXT_EXTERNAL_DMABUF_MEMORY, },
203     { VK_EXT_IMAGE_DRM_FORMAT_MODIFIER_EXTENSION_NAME,        EXT_DRM_MODIFIER_FLAGS,     },
204     { VK_KHR_EXTERNAL_SEMAPHORE_FD_EXTENSION_NAME,            EXT_EXTERNAL_FD_SEM,        },
205 };
206
207 /* Converts return values to strings */
208 static const char *vk_ret2str(VkResult res)
209 {
210 #define CASE(VAL) case VAL: return #VAL
211     switch (res) {
212     CASE(VK_SUCCESS);
213     CASE(VK_NOT_READY);
214     CASE(VK_TIMEOUT);
215     CASE(VK_EVENT_SET);
216     CASE(VK_EVENT_RESET);
217     CASE(VK_INCOMPLETE);
218     CASE(VK_ERROR_OUT_OF_HOST_MEMORY);
219     CASE(VK_ERROR_OUT_OF_DEVICE_MEMORY);
220     CASE(VK_ERROR_INITIALIZATION_FAILED);
221     CASE(VK_ERROR_DEVICE_LOST);
222     CASE(VK_ERROR_MEMORY_MAP_FAILED);
223     CASE(VK_ERROR_LAYER_NOT_PRESENT);
224     CASE(VK_ERROR_EXTENSION_NOT_PRESENT);
225     CASE(VK_ERROR_FEATURE_NOT_PRESENT);
226     CASE(VK_ERROR_INCOMPATIBLE_DRIVER);
227     CASE(VK_ERROR_TOO_MANY_OBJECTS);
228     CASE(VK_ERROR_FORMAT_NOT_SUPPORTED);
229     CASE(VK_ERROR_FRAGMENTED_POOL);
230     CASE(VK_ERROR_SURFACE_LOST_KHR);
231     CASE(VK_ERROR_NATIVE_WINDOW_IN_USE_KHR);
232     CASE(VK_SUBOPTIMAL_KHR);
233     CASE(VK_ERROR_OUT_OF_DATE_KHR);
234     CASE(VK_ERROR_INCOMPATIBLE_DISPLAY_KHR);
235     CASE(VK_ERROR_VALIDATION_FAILED_EXT);
236     CASE(VK_ERROR_INVALID_SHADER_NV);
237     CASE(VK_ERROR_OUT_OF_POOL_MEMORY);
238     CASE(VK_ERROR_INVALID_EXTERNAL_HANDLE);
239     CASE(VK_ERROR_NOT_PERMITTED_EXT);
240     CASE(VK_ERROR_INVALID_DRM_FORMAT_MODIFIER_PLANE_LAYOUT_EXT);
241     CASE(VK_ERROR_INVALID_DEVICE_ADDRESS_EXT);
242     CASE(VK_ERROR_FULL_SCREEN_EXCLUSIVE_MODE_LOST_EXT);
243     default: return "Unknown error";
244     }
245 #undef CASE
246 }
247
248 static VkBool32 vk_dbg_callback(VkDebugUtilsMessageSeverityFlagBitsEXT severity,
249                                 VkDebugUtilsMessageTypeFlagsEXT messageType,
250                                 const VkDebugUtilsMessengerCallbackDataEXT *data,
251                                 void *priv)
252 {
253     int l;
254     AVHWDeviceContext *ctx = priv;
255
256     switch (severity) {
257     case VK_DEBUG_UTILS_MESSAGE_SEVERITY_VERBOSE_BIT_EXT: l = AV_LOG_VERBOSE; break;
258     case VK_DEBUG_UTILS_MESSAGE_SEVERITY_INFO_BIT_EXT:    l = AV_LOG_INFO;    break;
259     case VK_DEBUG_UTILS_MESSAGE_SEVERITY_WARNING_BIT_EXT: l = AV_LOG_WARNING; break;
260     case VK_DEBUG_UTILS_MESSAGE_SEVERITY_ERROR_BIT_EXT:   l = AV_LOG_ERROR;   break;
261     default:                                              l = AV_LOG_DEBUG;   break;
262     }
263
264     av_log(ctx, l, "%s\n", data->pMessage);
265     for (int i = 0; i < data->cmdBufLabelCount; i++)
266         av_log(ctx, l, "\t%i: %s\n", i, data->pCmdBufLabels[i].pLabelName);
267
268     return 0;
269 }
270
271 static int check_extensions(AVHWDeviceContext *ctx, int dev, AVDictionary *opts,
272                             const char * const **dst, uint32_t *num, int debug)
273 {
274     const char *tstr;
275     const char **extension_names = NULL;
276     VulkanDevicePriv *p = ctx->internal->priv;
277     AVVulkanDeviceContext *hwctx = ctx->hwctx;
278     int err = 0, found, extensions_found = 0;
279
280     const char *mod;
281     int optional_exts_num;
282     uint32_t sup_ext_count;
283     char *user_exts_str = NULL;
284     AVDictionaryEntry *user_exts;
285     VkExtensionProperties *sup_ext;
286     const VulkanOptExtension *optional_exts;
287
288     if (!dev) {
289         mod = "instance";
290         optional_exts = optional_instance_exts;
291         optional_exts_num = FF_ARRAY_ELEMS(optional_instance_exts);
292         user_exts = av_dict_get(opts, "instance_extensions", NULL, 0);
293         if (user_exts) {
294             user_exts_str = av_strdup(user_exts->value);
295             if (!user_exts_str) {
296                 err = AVERROR(ENOMEM);
297                 goto fail;
298             }
299         }
300         vkEnumerateInstanceExtensionProperties(NULL, &sup_ext_count, NULL);
301         sup_ext = av_malloc_array(sup_ext_count, sizeof(VkExtensionProperties));
302         if (!sup_ext)
303             return AVERROR(ENOMEM);
304         vkEnumerateInstanceExtensionProperties(NULL, &sup_ext_count, sup_ext);
305     } else {
306         mod = "device";
307         optional_exts = optional_device_exts;
308         optional_exts_num = FF_ARRAY_ELEMS(optional_device_exts);
309         user_exts = av_dict_get(opts, "device_extensions", NULL, 0);
310         if (user_exts) {
311             user_exts_str = av_strdup(user_exts->value);
312             if (!user_exts_str) {
313                 err = AVERROR(ENOMEM);
314                 goto fail;
315             }
316         }
317         vkEnumerateDeviceExtensionProperties(hwctx->phys_dev, NULL,
318                                              &sup_ext_count, NULL);
319         sup_ext = av_malloc_array(sup_ext_count, sizeof(VkExtensionProperties));
320         if (!sup_ext)
321             return AVERROR(ENOMEM);
322         vkEnumerateDeviceExtensionProperties(hwctx->phys_dev, NULL,
323                                              &sup_ext_count, sup_ext);
324     }
325
326     for (int i = 0; i < optional_exts_num; i++) {
327         tstr = optional_exts[i].name;
328         found = 0;
329         for (int j = 0; j < sup_ext_count; j++) {
330             if (!strcmp(tstr, sup_ext[j].extensionName)) {
331                 found = 1;
332                 break;
333             }
334         }
335         if (!found)
336             continue;
337
338         av_log(ctx, AV_LOG_VERBOSE, "Using %s extension \"%s\"\n", mod, tstr);
339         p->extensions |= optional_exts[i].flag;
340         ADD_VAL_TO_LIST(extension_names, extensions_found, tstr);
341     }
342
343     if (debug && !dev) {
344         tstr = VK_EXT_DEBUG_UTILS_EXTENSION_NAME;
345         found = 0;
346         for (int j = 0; j < sup_ext_count; j++) {
347             if (!strcmp(tstr, sup_ext[j].extensionName)) {
348                 found = 1;
349                 break;
350             }
351         }
352         if (found) {
353             av_log(ctx, AV_LOG_VERBOSE, "Using %s extension \"%s\"\n", mod, tstr);
354             ADD_VAL_TO_LIST(extension_names, extensions_found, tstr);
355         } else {
356             av_log(ctx, AV_LOG_ERROR, "Debug extension \"%s\" not found!\n",
357                    tstr);
358             err = AVERROR(EINVAL);
359             goto fail;
360         }
361     }
362
363     if (user_exts_str) {
364         char *save, *token = av_strtok(user_exts_str, "+", &save);
365         while (token) {
366             found = 0;
367             for (int j = 0; j < sup_ext_count; j++) {
368                 if (!strcmp(token, sup_ext[j].extensionName)) {
369                     found = 1;
370                     break;
371                 }
372             }
373             if (found) {
374                 av_log(ctx, AV_LOG_VERBOSE, "Using %s extension \"%s\"\n", mod, tstr);
375                 ADD_VAL_TO_LIST(extension_names, extensions_found, token);
376             } else {
377                 av_log(ctx, AV_LOG_WARNING, "%s extension \"%s\" not found, excluding.\n",
378                        mod, token);
379             }
380             token = av_strtok(NULL, "+", &save);
381         }
382     }
383
384     *dst = extension_names;
385     *num = extensions_found;
386
387     av_free(user_exts_str);
388     av_free(sup_ext);
389     return 0;
390
391 fail:
392     if (extension_names)
393         for (int i = 0; i < extensions_found; i++)
394             av_free((void *)extension_names[i]);
395     av_free(extension_names);
396     av_free(user_exts_str);
397     av_free(sup_ext);
398     return err;
399 }
400
401 /* Creates a VkInstance */
402 static int create_instance(AVHWDeviceContext *ctx, AVDictionary *opts)
403 {
404     int err = 0;
405     VkResult ret;
406     VulkanDevicePriv *p = ctx->internal->priv;
407     AVVulkanDeviceContext *hwctx = ctx->hwctx;
408     AVDictionaryEntry *debug_opt = av_dict_get(opts, "debug", NULL, 0);
409     const int debug_mode = debug_opt && strtol(debug_opt->value, NULL, 10);
410     VkApplicationInfo application_info = {
411         .sType              = VK_STRUCTURE_TYPE_APPLICATION_INFO,
412         .pEngineName        = "libavutil",
413         .apiVersion         = VK_API_VERSION_1_1,
414         .engineVersion      = VK_MAKE_VERSION(LIBAVUTIL_VERSION_MAJOR,
415                                               LIBAVUTIL_VERSION_MINOR,
416                                               LIBAVUTIL_VERSION_MICRO),
417     };
418     VkInstanceCreateInfo inst_props = {
419         .sType            = VK_STRUCTURE_TYPE_INSTANCE_CREATE_INFO,
420         .pApplicationInfo = &application_info,
421     };
422
423     /* Check for present/missing extensions */
424     err = check_extensions(ctx, 0, opts, &inst_props.ppEnabledExtensionNames,
425                            &inst_props.enabledExtensionCount, debug_mode);
426     if (err < 0)
427         return err;
428
429     if (debug_mode) {
430         static const char *layers[] = { "VK_LAYER_KHRONOS_validation" };
431         inst_props.ppEnabledLayerNames = layers;
432         inst_props.enabledLayerCount = FF_ARRAY_ELEMS(layers);
433     }
434
435     /* Try to create the instance */
436     ret = vkCreateInstance(&inst_props, hwctx->alloc, &hwctx->inst);
437
438     /* Check for errors */
439     if (ret != VK_SUCCESS) {
440         av_log(ctx, AV_LOG_ERROR, "Instance creation failure: %s\n",
441                vk_ret2str(ret));
442         for (int i = 0; i < inst_props.enabledExtensionCount; i++)
443             av_free((void *)inst_props.ppEnabledExtensionNames[i]);
444         av_free((void *)inst_props.ppEnabledExtensionNames);
445         return AVERROR_EXTERNAL;
446     }
447
448     if (debug_mode) {
449         VkDebugUtilsMessengerCreateInfoEXT dbg = {
450             .sType = VK_STRUCTURE_TYPE_DEBUG_UTILS_MESSENGER_CREATE_INFO_EXT,
451             .messageSeverity = VK_DEBUG_UTILS_MESSAGE_SEVERITY_VERBOSE_BIT_EXT |
452                                VK_DEBUG_UTILS_MESSAGE_SEVERITY_INFO_BIT_EXT    |
453                                VK_DEBUG_UTILS_MESSAGE_SEVERITY_WARNING_BIT_EXT |
454                                VK_DEBUG_UTILS_MESSAGE_SEVERITY_ERROR_BIT_EXT,
455             .messageType = VK_DEBUG_UTILS_MESSAGE_TYPE_GENERAL_BIT_EXT    |
456                            VK_DEBUG_UTILS_MESSAGE_TYPE_VALIDATION_BIT_EXT |
457                            VK_DEBUG_UTILS_MESSAGE_TYPE_PERFORMANCE_BIT_EXT,
458             .pfnUserCallback = vk_dbg_callback,
459             .pUserData = ctx,
460         };
461         VK_LOAD_PFN(hwctx->inst, vkCreateDebugUtilsMessengerEXT);
462
463         pfn_vkCreateDebugUtilsMessengerEXT(hwctx->inst, &dbg,
464                                            hwctx->alloc, &p->debug_ctx);
465     }
466
467     hwctx->enabled_inst_extensions = inst_props.ppEnabledExtensionNames;
468     hwctx->nb_enabled_inst_extensions = inst_props.enabledExtensionCount;
469
470     return 0;
471 }
472
473 typedef struct VulkanDeviceSelection {
474     uint8_t uuid[VK_UUID_SIZE]; /* Will use this first unless !has_uuid */
475     int has_uuid;
476     const char *name; /* Will use this second unless NULL */
477     uint32_t pci_device; /* Will use this third unless 0x0 */
478     uint32_t vendor_id; /* Last resort to find something deterministic */
479     int index; /* Finally fall back to index */
480 } VulkanDeviceSelection;
481
482 static const char *vk_dev_type(enum VkPhysicalDeviceType type)
483 {
484     switch (type) {
485     case VK_PHYSICAL_DEVICE_TYPE_INTEGRATED_GPU: return "integrated";
486     case VK_PHYSICAL_DEVICE_TYPE_DISCRETE_GPU:   return "discrete";
487     case VK_PHYSICAL_DEVICE_TYPE_VIRTUAL_GPU:    return "virtual";
488     case VK_PHYSICAL_DEVICE_TYPE_CPU:            return "software";
489     default:                                     return "unknown";
490     }
491 }
492
493 /* Finds a device */
494 static int find_device(AVHWDeviceContext *ctx, VulkanDeviceSelection *select)
495 {
496     int err = 0, choice = -1;
497     uint32_t num;
498     VkResult ret;
499     VkPhysicalDevice *devices = NULL;
500     VkPhysicalDeviceIDProperties *idp = NULL;
501     VkPhysicalDeviceProperties2 *prop = NULL;
502     VulkanDevicePriv *p = ctx->internal->priv;
503     AVVulkanDeviceContext *hwctx = ctx->hwctx;
504
505     ret = vkEnumeratePhysicalDevices(hwctx->inst, &num, NULL);
506     if (ret != VK_SUCCESS || !num) {
507         av_log(ctx, AV_LOG_ERROR, "No devices found: %s!\n", vk_ret2str(ret));
508         return AVERROR(ENODEV);
509     }
510
511     devices = av_malloc_array(num, sizeof(VkPhysicalDevice));
512     if (!devices)
513         return AVERROR(ENOMEM);
514
515     ret = vkEnumeratePhysicalDevices(hwctx->inst, &num, devices);
516     if (ret != VK_SUCCESS) {
517         av_log(ctx, AV_LOG_ERROR, "Failed enumerating devices: %s\n",
518                vk_ret2str(ret));
519         err = AVERROR(ENODEV);
520         goto end;
521     }
522
523     prop = av_mallocz_array(num, sizeof(*prop));
524     if (!prop) {
525         err = AVERROR(ENOMEM);
526         goto end;
527     }
528
529     idp = av_mallocz_array(num, sizeof(*idp));
530     if (!idp) {
531         err = AVERROR(ENOMEM);
532         goto end;
533     }
534
535     av_log(ctx, AV_LOG_VERBOSE, "GPU listing:\n");
536     for (int i = 0; i < num; i++) {
537         idp[i].sType = VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_ID_PROPERTIES;
538         prop[i].sType = VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_PROPERTIES_2;
539         prop[i].pNext = &idp[i];
540
541         vkGetPhysicalDeviceProperties2(devices[i], &prop[i]);
542         av_log(ctx, AV_LOG_VERBOSE, "    %d: %s (%s) (0x%x)\n", i,
543                prop[i].properties.deviceName,
544                vk_dev_type(prop[i].properties.deviceType),
545                prop[i].properties.deviceID);
546     }
547
548     if (select->has_uuid) {
549         for (int i = 0; i < num; i++) {
550             if (!strncmp(idp[i].deviceUUID, select->uuid, VK_UUID_SIZE)) {
551                 choice = i;
552                 goto end;
553              }
554         }
555         av_log(ctx, AV_LOG_ERROR, "Unable to find device by given UUID!\n");
556         err = AVERROR(ENODEV);
557         goto end;
558     } else if (select->name) {
559         av_log(ctx, AV_LOG_VERBOSE, "Requested device: %s\n", select->name);
560         for (int i = 0; i < num; i++) {
561             if (strstr(prop[i].properties.deviceName, select->name)) {
562                 choice = i;
563                 goto end;
564              }
565         }
566         av_log(ctx, AV_LOG_ERROR, "Unable to find device \"%s\"!\n",
567                select->name);
568         err = AVERROR(ENODEV);
569         goto end;
570     } else if (select->pci_device) {
571         av_log(ctx, AV_LOG_VERBOSE, "Requested device: 0x%x\n", select->pci_device);
572         for (int i = 0; i < num; i++) {
573             if (select->pci_device == prop[i].properties.deviceID) {
574                 choice = i;
575                 goto end;
576             }
577         }
578         av_log(ctx, AV_LOG_ERROR, "Unable to find device with PCI ID 0x%x!\n",
579                select->pci_device);
580         err = AVERROR(EINVAL);
581         goto end;
582     } else if (select->vendor_id) {
583         av_log(ctx, AV_LOG_VERBOSE, "Requested vendor: 0x%x\n", select->vendor_id);
584         for (int i = 0; i < num; i++) {
585             if (select->vendor_id == prop[i].properties.vendorID) {
586                 choice = i;
587                 goto end;
588             }
589         }
590         av_log(ctx, AV_LOG_ERROR, "Unable to find device with Vendor ID 0x%x!\n",
591                select->vendor_id);
592         err = AVERROR(ENODEV);
593         goto end;
594     } else {
595         if (select->index < num) {
596             choice = select->index;
597             goto end;
598         }
599         av_log(ctx, AV_LOG_ERROR, "Unable to find device with index %i!\n",
600                select->index);
601         err = AVERROR(ENODEV);
602         goto end;
603     }
604
605 end:
606     if (choice > -1) {
607         p->dev_is_nvidia = (prop[choice].properties.vendorID == 0x10de);
608         hwctx->phys_dev = devices[choice];
609     }
610     av_free(devices);
611     av_free(prop);
612     av_free(idp);
613
614     return err;
615 }
616
617 static int search_queue_families(AVHWDeviceContext *ctx, VkDeviceCreateInfo *cd)
618 {
619     uint32_t num;
620     float *weights;
621     VkQueueFamilyProperties *qs = NULL;
622     AVVulkanDeviceContext *hwctx = ctx->hwctx;
623     int graph_index = -1, comp_index = -1, tx_index = -1;
624     VkDeviceQueueCreateInfo *pc = (VkDeviceQueueCreateInfo *)cd->pQueueCreateInfos;
625
626     /* First get the number of queue families */
627     vkGetPhysicalDeviceQueueFamilyProperties(hwctx->phys_dev, &num, NULL);
628     if (!num) {
629         av_log(ctx, AV_LOG_ERROR, "Failed to get queues!\n");
630         return AVERROR_EXTERNAL;
631     }
632
633     /* Then allocate memory */
634     qs = av_malloc_array(num, sizeof(VkQueueFamilyProperties));
635     if (!qs)
636         return AVERROR(ENOMEM);
637
638     /* Finally retrieve the queue families */
639     vkGetPhysicalDeviceQueueFamilyProperties(hwctx->phys_dev, &num, qs);
640
641 #define SEARCH_FLAGS(expr, out)                                                \
642     for (int i = 0; i < num; i++) {                                            \
643         const VkQueueFlagBits flags = qs[i].queueFlags;                        \
644         if (expr) {                                                            \
645             out = i;                                                           \
646             break;                                                             \
647         }                                                                      \
648     }
649
650     SEARCH_FLAGS(flags & VK_QUEUE_GRAPHICS_BIT, graph_index)
651
652     SEARCH_FLAGS((flags &  VK_QUEUE_COMPUTE_BIT) && (i != graph_index),
653                  comp_index)
654
655     SEARCH_FLAGS((flags & VK_QUEUE_TRANSFER_BIT) && (i != graph_index) &&
656                  (i != comp_index), tx_index)
657
658 #undef SEARCH_FLAGS
659 #define ADD_QUEUE(fidx, graph, comp, tx)                                                 \
660     av_log(ctx, AV_LOG_VERBOSE, "Using queue family %i (total queues: %i) for %s%s%s\n", \
661            fidx, qs[fidx].queueCount, graph ? "graphics " : "",                          \
662            comp ? "compute " : "", tx ? "transfers " : "");                              \
663     av_log(ctx, AV_LOG_VERBOSE, "    QF %i flags: %s%s%s%s\n", fidx,                     \
664            ((qs[fidx].queueFlags) & VK_QUEUE_GRAPHICS_BIT) ? "(graphics) " : "",         \
665            ((qs[fidx].queueFlags) & VK_QUEUE_COMPUTE_BIT) ? "(compute) " : "",           \
666            ((qs[fidx].queueFlags) & VK_QUEUE_TRANSFER_BIT) ? "(transfers) " : "",        \
667            ((qs[fidx].queueFlags) & VK_QUEUE_SPARSE_BINDING_BIT) ? "(sparse) " : "");    \
668     pc[cd->queueCreateInfoCount].queueFamilyIndex = fidx;                                \
669     pc[cd->queueCreateInfoCount].queueCount = qs[fidx].queueCount;                       \
670     weights = av_malloc(qs[fidx].queueCount * sizeof(float));                            \
671     pc[cd->queueCreateInfoCount].pQueuePriorities = weights;                             \
672     if (!weights)                                                                        \
673         goto fail;                                                                       \
674     for (int i = 0; i < qs[fidx].queueCount; i++)                                        \
675         weights[i] = 1.0f;                                                               \
676     cd->queueCreateInfoCount++;
677
678     ADD_QUEUE(graph_index, 1, comp_index < 0, tx_index < 0 && comp_index < 0)
679     hwctx->queue_family_index      = graph_index;
680     hwctx->queue_family_comp_index = graph_index;
681     hwctx->queue_family_tx_index   = graph_index;
682
683     if (comp_index != -1) {
684         ADD_QUEUE(comp_index, 0, 1, tx_index < 0)
685         hwctx->queue_family_tx_index   = comp_index;
686         hwctx->queue_family_comp_index = comp_index;
687     }
688
689     if (tx_index != -1) {
690         ADD_QUEUE(tx_index, 0, 0, 1)
691         hwctx->queue_family_tx_index = tx_index;
692     }
693
694 #undef ADD_QUEUE
695     av_free(qs);
696
697     return 0;
698
699 fail:
700     av_freep(&pc[0].pQueuePriorities);
701     av_freep(&pc[1].pQueuePriorities);
702     av_freep(&pc[2].pQueuePriorities);
703     av_free(qs);
704
705     return AVERROR(ENOMEM);
706 }
707
708 static int create_exec_ctx(AVHWDeviceContext *ctx, VulkanExecCtx *cmd,
709                            int queue_family_index)
710 {
711     VkResult ret;
712     AVVulkanDeviceContext *hwctx = ctx->hwctx;
713
714     VkCommandPoolCreateInfo cqueue_create = {
715         .sType              = VK_STRUCTURE_TYPE_COMMAND_POOL_CREATE_INFO,
716         .flags              = VK_COMMAND_POOL_CREATE_RESET_COMMAND_BUFFER_BIT,
717         .queueFamilyIndex   = queue_family_index,
718     };
719     VkCommandBufferAllocateInfo cbuf_create = {
720         .sType              = VK_STRUCTURE_TYPE_COMMAND_BUFFER_ALLOCATE_INFO,
721         .level              = VK_COMMAND_BUFFER_LEVEL_PRIMARY,
722         .commandBufferCount = 1,
723     };
724
725     VkFenceCreateInfo fence_spawn = {
726         .sType = VK_STRUCTURE_TYPE_FENCE_CREATE_INFO,
727     };
728
729     ret = vkCreateFence(hwctx->act_dev, &fence_spawn,
730                         hwctx->alloc, &cmd->fence);
731     if (ret != VK_SUCCESS) {
732         av_log(ctx, AV_LOG_ERROR, "Failed to create frame fence: %s\n",
733                vk_ret2str(ret));
734         return AVERROR_EXTERNAL;
735     }
736
737     ret = vkCreateCommandPool(hwctx->act_dev, &cqueue_create,
738                               hwctx->alloc, &cmd->pool);
739     if (ret != VK_SUCCESS) {
740         av_log(ctx, AV_LOG_ERROR, "Command pool creation failure: %s\n",
741                vk_ret2str(ret));
742         return AVERROR_EXTERNAL;
743     }
744
745     cbuf_create.commandPool = cmd->pool;
746
747     ret = vkAllocateCommandBuffers(hwctx->act_dev, &cbuf_create, &cmd->buf);
748     if (ret != VK_SUCCESS) {
749         av_log(ctx, AV_LOG_ERROR, "Command buffer alloc failure: %s\n",
750                vk_ret2str(ret));
751         return AVERROR_EXTERNAL;
752     }
753
754     vkGetDeviceQueue(hwctx->act_dev, cqueue_create.queueFamilyIndex, 0,
755                      &cmd->queue);
756
757     return 0;
758 }
759
760 static void free_exec_ctx(AVHWDeviceContext *ctx, VulkanExecCtx *cmd)
761 {
762     AVVulkanDeviceContext *hwctx = ctx->hwctx;
763
764     if (cmd->fence)
765         vkDestroyFence(hwctx->act_dev, cmd->fence, hwctx->alloc);
766     if (cmd->buf)
767         vkFreeCommandBuffers(hwctx->act_dev, cmd->pool, 1, &cmd->buf);
768     if (cmd->pool)
769         vkDestroyCommandPool(hwctx->act_dev, cmd->pool, hwctx->alloc);
770 }
771
772 static void vulkan_device_free(AVHWDeviceContext *ctx)
773 {
774     VulkanDevicePriv *p = ctx->internal->priv;
775     AVVulkanDeviceContext *hwctx = ctx->hwctx;
776
777     free_exec_ctx(ctx, &p->cmd);
778
779     vkDestroyDevice(hwctx->act_dev, hwctx->alloc);
780
781     if (p->debug_ctx) {
782         VK_LOAD_PFN(hwctx->inst, vkDestroyDebugUtilsMessengerEXT);
783         pfn_vkDestroyDebugUtilsMessengerEXT(hwctx->inst, p->debug_ctx,
784                                             hwctx->alloc);
785     }
786
787     vkDestroyInstance(hwctx->inst, hwctx->alloc);
788
789     for (int i = 0; i < hwctx->nb_enabled_inst_extensions; i++)
790         av_free((void *)hwctx->enabled_inst_extensions[i]);
791     av_free((void *)hwctx->enabled_inst_extensions);
792
793     for (int i = 0; i < hwctx->nb_enabled_dev_extensions; i++)
794         av_free((void *)hwctx->enabled_dev_extensions[i]);
795     av_free((void *)hwctx->enabled_dev_extensions);
796 }
797
798 static int vulkan_device_create_internal(AVHWDeviceContext *ctx,
799                                          VulkanDeviceSelection *dev_select,
800                                          AVDictionary *opts, int flags)
801 {
802     int err = 0;
803     VkResult ret;
804     AVDictionaryEntry *opt_d;
805     VulkanDevicePriv *p = ctx->internal->priv;
806     AVVulkanDeviceContext *hwctx = ctx->hwctx;
807     VkDeviceQueueCreateInfo queue_create_info[3] = {
808         { .sType = VK_STRUCTURE_TYPE_DEVICE_QUEUE_CREATE_INFO, },
809         { .sType = VK_STRUCTURE_TYPE_DEVICE_QUEUE_CREATE_INFO, },
810         { .sType = VK_STRUCTURE_TYPE_DEVICE_QUEUE_CREATE_INFO, },
811     };
812
813     VkDeviceCreateInfo dev_info = {
814         .sType                = VK_STRUCTURE_TYPE_DEVICE_CREATE_INFO,
815         .pQueueCreateInfos    = queue_create_info,
816         .queueCreateInfoCount = 0,
817     };
818
819     ctx->free = vulkan_device_free;
820
821     /* Create an instance if not given one */
822     if ((err = create_instance(ctx, opts)))
823         goto end;
824
825     /* Find a device (if not given one) */
826     if ((err = find_device(ctx, dev_select)))
827         goto end;
828
829     vkGetPhysicalDeviceProperties(hwctx->phys_dev, &p->props);
830     av_log(ctx, AV_LOG_VERBOSE, "Using device: %s\n", p->props.deviceName);
831     av_log(ctx, AV_LOG_VERBOSE, "Alignments:\n");
832     av_log(ctx, AV_LOG_VERBOSE, "    optimalBufferCopyOffsetAlignment:   %li\n",
833            p->props.limits.optimalBufferCopyOffsetAlignment);
834     av_log(ctx, AV_LOG_VERBOSE, "    optimalBufferCopyRowPitchAlignment: %li\n",
835            p->props.limits.optimalBufferCopyRowPitchAlignment);
836     av_log(ctx, AV_LOG_VERBOSE, "    minMemoryMapAlignment:              %li\n",
837            p->props.limits.minMemoryMapAlignment);
838
839     /* Search queue family */
840     if ((err = search_queue_families(ctx, &dev_info)))
841         goto end;
842
843     if ((err = check_extensions(ctx, 1, opts, &dev_info.ppEnabledExtensionNames,
844                                 &dev_info.enabledExtensionCount, 0))) {
845         av_free((void *)queue_create_info[0].pQueuePriorities);
846         av_free((void *)queue_create_info[1].pQueuePriorities);
847         av_free((void *)queue_create_info[2].pQueuePriorities);
848         goto end;
849     }
850
851     ret = vkCreateDevice(hwctx->phys_dev, &dev_info, hwctx->alloc,
852                          &hwctx->act_dev);
853
854     av_free((void *)queue_create_info[0].pQueuePriorities);
855     av_free((void *)queue_create_info[1].pQueuePriorities);
856     av_free((void *)queue_create_info[2].pQueuePriorities);
857
858     if (ret != VK_SUCCESS) {
859         av_log(ctx, AV_LOG_ERROR, "Device creation failure: %s\n",
860                vk_ret2str(ret));
861         for (int i = 0; i < dev_info.enabledExtensionCount; i++)
862             av_free((void *)dev_info.ppEnabledExtensionNames[i]);
863         av_free((void *)dev_info.ppEnabledExtensionNames);
864         err = AVERROR_EXTERNAL;
865         goto end;
866     }
867
868     /* Tiled images setting, use them by default */
869     opt_d = av_dict_get(opts, "linear_images", NULL, 0);
870     if (opt_d)
871         p->use_linear_images = strtol(opt_d->value, NULL, 10);
872
873     hwctx->enabled_dev_extensions = dev_info.ppEnabledExtensionNames;
874     hwctx->nb_enabled_dev_extensions = dev_info.enabledExtensionCount;
875
876 end:
877     return err;
878 }
879
880 static int vulkan_device_init(AVHWDeviceContext *ctx)
881 {
882     int err;
883     uint32_t queue_num;
884     AVVulkanDeviceContext *hwctx = ctx->hwctx;
885     VulkanDevicePriv *p = ctx->internal->priv;
886
887     /* Set device extension flags */
888     for (int i = 0; i < hwctx->nb_enabled_dev_extensions; i++) {
889         for (int j = 0; j < FF_ARRAY_ELEMS(optional_device_exts); j++) {
890             if (!strcmp(hwctx->enabled_dev_extensions[i],
891                         optional_device_exts[j].name)) {
892                 p->extensions |= optional_device_exts[j].flag;
893                 break;
894             }
895         }
896     }
897
898     vkGetPhysicalDeviceQueueFamilyProperties(hwctx->phys_dev, &queue_num, NULL);
899     if (!queue_num) {
900         av_log(ctx, AV_LOG_ERROR, "Failed to get queues!\n");
901         return AVERROR_EXTERNAL;
902     }
903
904 #define CHECK_QUEUE(type, n)                                                         \
905 if (n >= queue_num) {                                                                \
906     av_log(ctx, AV_LOG_ERROR, "Invalid %s queue index %i (device has %i queues)!\n", \
907            type, n, queue_num);                                                      \
908     return AVERROR(EINVAL);                                                          \
909 }
910
911     CHECK_QUEUE("graphics", hwctx->queue_family_index)
912     CHECK_QUEUE("upload",   hwctx->queue_family_tx_index)
913     CHECK_QUEUE("compute",  hwctx->queue_family_comp_index)
914
915 #undef CHECK_QUEUE
916
917     p->qfs[p->num_qfs++] = hwctx->queue_family_index;
918     if ((hwctx->queue_family_tx_index != hwctx->queue_family_index) &&
919         (hwctx->queue_family_tx_index != hwctx->queue_family_comp_index))
920         p->qfs[p->num_qfs++] = hwctx->queue_family_tx_index;
921     if ((hwctx->queue_family_comp_index != hwctx->queue_family_index) &&
922         (hwctx->queue_family_comp_index != hwctx->queue_family_tx_index))
923         p->qfs[p->num_qfs++] = hwctx->queue_family_comp_index;
924
925     /* Create exec context - if there's something invalid this will error out */
926     err = create_exec_ctx(ctx, &p->cmd, hwctx->queue_family_tx_index);
927     if (err)
928         return err;
929
930     /* Get device capabilities */
931     vkGetPhysicalDeviceMemoryProperties(hwctx->phys_dev, &p->mprops);
932
933     return 0;
934 }
935
936 static int vulkan_device_create(AVHWDeviceContext *ctx, const char *device,
937                                 AVDictionary *opts, int flags)
938 {
939     VulkanDeviceSelection dev_select = { 0 };
940     if (device && device[0]) {
941         char *end = NULL;
942         dev_select.index = strtol(device, &end, 10);
943         if (end == device) {
944             dev_select.index = 0;
945             dev_select.name  = device;
946         }
947     }
948
949     return vulkan_device_create_internal(ctx, &dev_select, opts, flags);
950 }
951
952 static int vulkan_device_derive(AVHWDeviceContext *ctx,
953                                 AVHWDeviceContext *src_ctx, int flags)
954 {
955     av_unused VulkanDeviceSelection dev_select = { 0 };
956
957     /* If there's only one device on the system, then even if its not covered
958      * by the following checks (e.g. non-PCIe ARM GPU), having an empty
959      * dev_select will mean it'll get picked. */
960     switch(src_ctx->type) {
961 #if CONFIG_LIBDRM
962 #if CONFIG_VAAPI
963     case AV_HWDEVICE_TYPE_VAAPI: {
964         AVVAAPIDeviceContext *src_hwctx = src_ctx->hwctx;
965
966         const char *vendor = vaQueryVendorString(src_hwctx->display);
967         if (!vendor) {
968             av_log(ctx, AV_LOG_ERROR, "Unable to get device info from VAAPI!\n");
969             return AVERROR_EXTERNAL;
970         }
971
972         if (strstr(vendor, "Intel"))
973             dev_select.vendor_id = 0x8086;
974         if (strstr(vendor, "AMD"))
975             dev_select.vendor_id = 0x1002;
976
977         return vulkan_device_create_internal(ctx, &dev_select, NULL, flags);
978     }
979 #endif
980     case AV_HWDEVICE_TYPE_DRM: {
981         AVDRMDeviceContext *src_hwctx = src_ctx->hwctx;
982
983         drmDevice *drm_dev_info;
984         int err = drmGetDevice(src_hwctx->fd, &drm_dev_info);
985         if (err) {
986             av_log(ctx, AV_LOG_ERROR, "Unable to get device info from DRM fd!\n");
987             return AVERROR_EXTERNAL;
988         }
989
990         if (drm_dev_info->bustype == DRM_BUS_PCI)
991             dev_select.pci_device = drm_dev_info->deviceinfo.pci->device_id;
992
993         drmFreeDevice(&drm_dev_info);
994
995         return vulkan_device_create_internal(ctx, &dev_select, NULL, flags);
996     }
997 #endif
998 #if CONFIG_CUDA
999     case AV_HWDEVICE_TYPE_CUDA: {
1000         AVHWDeviceContext *cuda_cu = src_ctx;
1001         AVCUDADeviceContext *src_hwctx = src_ctx->hwctx;
1002         AVCUDADeviceContextInternal *cu_internal = src_hwctx->internal;
1003         CudaFunctions *cu = cu_internal->cuda_dl;
1004
1005         int ret = CHECK_CU(cu->cuDeviceGetUuid((CUuuid *)&dev_select.uuid,
1006                                                cu_internal->cuda_device));
1007         if (ret < 0) {
1008             av_log(ctx, AV_LOG_ERROR, "Unable to get UUID from CUDA!\n");
1009             return AVERROR_EXTERNAL;
1010         }
1011
1012         dev_select.has_uuid = 1;
1013
1014         return vulkan_device_create_internal(ctx, &dev_select, NULL, flags);
1015     }
1016 #endif
1017     default:
1018         return AVERROR(ENOSYS);
1019     }
1020 }
1021
1022 static int vulkan_frames_get_constraints(AVHWDeviceContext *ctx,
1023                                          const void *hwconfig,
1024                                          AVHWFramesConstraints *constraints)
1025 {
1026     int count = 0;
1027     AVVulkanDeviceContext *hwctx = ctx->hwctx;
1028     VulkanDevicePriv *p = ctx->internal->priv;
1029
1030     for (enum AVPixelFormat i = 0; i < AV_PIX_FMT_NB; i++)
1031         count += pixfmt_is_supported(hwctx, i, p->use_linear_images);
1032
1033 #if CONFIG_CUDA
1034     if (p->dev_is_nvidia)
1035         count++;
1036 #endif
1037
1038     constraints->valid_sw_formats = av_malloc_array(count + 1,
1039                                                     sizeof(enum AVPixelFormat));
1040     if (!constraints->valid_sw_formats)
1041         return AVERROR(ENOMEM);
1042
1043     count = 0;
1044     for (enum AVPixelFormat i = 0; i < AV_PIX_FMT_NB; i++)
1045         if (pixfmt_is_supported(hwctx, i, p->use_linear_images))
1046             constraints->valid_sw_formats[count++] = i;
1047
1048 #if CONFIG_CUDA
1049     if (p->dev_is_nvidia)
1050         constraints->valid_sw_formats[count++] = AV_PIX_FMT_CUDA;
1051 #endif
1052     constraints->valid_sw_formats[count++] = AV_PIX_FMT_NONE;
1053
1054     constraints->min_width  = 0;
1055     constraints->min_height = 0;
1056     constraints->max_width  = p->props.limits.maxImageDimension2D;
1057     constraints->max_height = p->props.limits.maxImageDimension2D;
1058
1059     constraints->valid_hw_formats = av_malloc_array(2, sizeof(enum AVPixelFormat));
1060     if (!constraints->valid_hw_formats)
1061         return AVERROR(ENOMEM);
1062
1063     constraints->valid_hw_formats[0] = AV_PIX_FMT_VULKAN;
1064     constraints->valid_hw_formats[1] = AV_PIX_FMT_NONE;
1065
1066     return 0;
1067 }
1068
1069 static int alloc_mem(AVHWDeviceContext *ctx, VkMemoryRequirements *req,
1070                      VkMemoryPropertyFlagBits req_flags, void *alloc_extension,
1071                      VkMemoryPropertyFlagBits *mem_flags, VkDeviceMemory *mem)
1072 {
1073     VkResult ret;
1074     int index = -1;
1075     VulkanDevicePriv *p = ctx->internal->priv;
1076     AVVulkanDeviceContext *dev_hwctx = ctx->hwctx;
1077     VkMemoryAllocateInfo alloc_info = {
1078         .sType           = VK_STRUCTURE_TYPE_MEMORY_ALLOCATE_INFO,
1079         .pNext           = alloc_extension,
1080     };
1081
1082     /* Align if we need to */
1083     if (req_flags & VK_MEMORY_PROPERTY_HOST_VISIBLE_BIT)
1084         req->size = FFALIGN(req->size, p->props.limits.minMemoryMapAlignment);
1085
1086     alloc_info.allocationSize = req->size;
1087
1088     /* The vulkan spec requires memory types to be sorted in the "optimal"
1089      * order, so the first matching type we find will be the best/fastest one */
1090     for (int i = 0; i < p->mprops.memoryTypeCount; i++) {
1091         /* The memory type must be supported by the requirements (bitfield) */
1092         if (!(req->memoryTypeBits & (1 << i)))
1093             continue;
1094
1095         /* The memory type flags must include our properties */
1096         if ((p->mprops.memoryTypes[i].propertyFlags & req_flags) != req_flags)
1097             continue;
1098
1099         /* Found a suitable memory type */
1100         index = i;
1101         break;
1102     }
1103
1104     if (index < 0) {
1105         av_log(ctx, AV_LOG_ERROR, "No memory type found for flags 0x%x\n",
1106                req_flags);
1107         return AVERROR(EINVAL);
1108     }
1109
1110     alloc_info.memoryTypeIndex = index;
1111
1112     ret = vkAllocateMemory(dev_hwctx->act_dev, &alloc_info,
1113                            dev_hwctx->alloc, mem);
1114     if (ret != VK_SUCCESS) {
1115         av_log(ctx, AV_LOG_ERROR, "Failed to allocate memory: %s\n",
1116                vk_ret2str(ret));
1117         return AVERROR(ENOMEM);
1118     }
1119
1120     *mem_flags |= p->mprops.memoryTypes[index].propertyFlags;
1121
1122     return 0;
1123 }
1124
1125 static void vulkan_free_internal(AVVkFrameInternal *internal)
1126 {
1127     if (!internal)
1128         return;
1129
1130 #if CONFIG_CUDA
1131     if (internal->cuda_fc_ref) {
1132         AVHWFramesContext *cuda_fc = (AVHWFramesContext *)internal->cuda_fc_ref->data;
1133         int planes = av_pix_fmt_count_planes(cuda_fc->sw_format);
1134         AVHWDeviceContext *cuda_cu = cuda_fc->device_ctx;
1135         AVCUDADeviceContext *cuda_dev = cuda_cu->hwctx;
1136         AVCUDADeviceContextInternal *cu_internal = cuda_dev->internal;
1137         CudaFunctions *cu = cu_internal->cuda_dl;
1138
1139         for (int i = 0; i < planes; i++) {
1140             if (internal->cu_sem[i])
1141                 CHECK_CU(cu->cuDestroyExternalSemaphore(internal->cu_sem[i]));
1142             if (internal->cu_mma[i])
1143                 CHECK_CU(cu->cuMipmappedArrayDestroy(internal->cu_mma[i]));
1144             if (internal->ext_mem[i])
1145                 CHECK_CU(cu->cuDestroyExternalMemory(internal->ext_mem[i]));
1146         }
1147
1148         av_buffer_unref(&internal->cuda_fc_ref);
1149     }
1150 #endif
1151
1152     av_free(internal);
1153 }
1154
1155 static void vulkan_frame_free(void *opaque, uint8_t *data)
1156 {
1157     AVVkFrame *f = (AVVkFrame *)data;
1158     AVHWFramesContext *hwfc = opaque;
1159     AVVulkanDeviceContext *hwctx = hwfc->device_ctx->hwctx;
1160     int planes = av_pix_fmt_count_planes(hwfc->sw_format);
1161
1162     vulkan_free_internal(f->internal);
1163
1164     for (int i = 0; i < planes; i++) {
1165         vkDestroyImage(hwctx->act_dev, f->img[i], hwctx->alloc);
1166         vkFreeMemory(hwctx->act_dev, f->mem[i], hwctx->alloc);
1167         vkDestroySemaphore(hwctx->act_dev, f->sem[i], hwctx->alloc);
1168     }
1169
1170     av_free(f);
1171 }
1172
1173 static int alloc_bind_mem(AVHWFramesContext *hwfc, AVVkFrame *f,
1174                           void *alloc_pnext, size_t alloc_pnext_stride)
1175 {
1176     int err;
1177     VkResult ret;
1178     AVHWDeviceContext *ctx = hwfc->device_ctx;
1179     const int planes = av_pix_fmt_count_planes(hwfc->sw_format);
1180     VkBindImageMemoryInfo bind_info[AV_NUM_DATA_POINTERS] = { { 0 } };
1181
1182     AVVulkanDeviceContext *hwctx = ctx->hwctx;
1183
1184     for (int i = 0; i < planes; i++) {
1185         int use_ded_mem;
1186         VkImageMemoryRequirementsInfo2 req_desc = {
1187             .sType = VK_STRUCTURE_TYPE_IMAGE_MEMORY_REQUIREMENTS_INFO_2,
1188             .image = f->img[i],
1189         };
1190         VkMemoryDedicatedAllocateInfo ded_alloc = {
1191             .sType = VK_STRUCTURE_TYPE_MEMORY_DEDICATED_ALLOCATE_INFO,
1192             .pNext = (void *)(((uint8_t *)alloc_pnext) + i*alloc_pnext_stride),
1193         };
1194         VkMemoryDedicatedRequirements ded_req = {
1195             .sType = VK_STRUCTURE_TYPE_MEMORY_DEDICATED_REQUIREMENTS,
1196         };
1197         VkMemoryRequirements2 req = {
1198             .sType = VK_STRUCTURE_TYPE_MEMORY_REQUIREMENTS_2,
1199             .pNext = &ded_req,
1200         };
1201
1202         vkGetImageMemoryRequirements2(hwctx->act_dev, &req_desc, &req);
1203
1204         /* In case the implementation prefers/requires dedicated allocation */
1205         use_ded_mem = ded_req.prefersDedicatedAllocation |
1206                       ded_req.requiresDedicatedAllocation;
1207         if (use_ded_mem)
1208             ded_alloc.image = f->img[i];
1209
1210         /* Allocate memory */
1211         if ((err = alloc_mem(ctx, &req.memoryRequirements,
1212                              f->tiling == VK_IMAGE_TILING_LINEAR ?
1213                              VK_MEMORY_PROPERTY_HOST_VISIBLE_BIT :
1214                              VK_MEMORY_PROPERTY_DEVICE_LOCAL_BIT,
1215                              use_ded_mem ? &ded_alloc : (void *)ded_alloc.pNext,
1216                              &f->flags, &f->mem[i])))
1217             return err;
1218
1219         f->size[i] = req.memoryRequirements.size;
1220         bind_info[i].sType  = VK_STRUCTURE_TYPE_BIND_IMAGE_MEMORY_INFO;
1221         bind_info[i].image  = f->img[i];
1222         bind_info[i].memory = f->mem[i];
1223     }
1224
1225     /* Bind the allocated memory to the images */
1226     ret = vkBindImageMemory2(hwctx->act_dev, planes, bind_info);
1227     if (ret != VK_SUCCESS) {
1228         av_log(ctx, AV_LOG_ERROR, "Failed to bind memory: %s\n",
1229                vk_ret2str(ret));
1230         return AVERROR_EXTERNAL;
1231     }
1232
1233     return 0;
1234 }
1235
1236 enum PrepMode {
1237     PREP_MODE_WRITE,
1238     PREP_MODE_RO_SHADER,
1239     PREP_MODE_EXTERNAL_EXPORT,
1240 };
1241
1242 static int prepare_frame(AVHWFramesContext *hwfc, VulkanExecCtx *ectx,
1243                          AVVkFrame *frame, enum PrepMode pmode)
1244 {
1245     VkResult ret;
1246     uint32_t dst_qf;
1247     VkImageLayout new_layout;
1248     VkAccessFlags new_access;
1249     AVHWDeviceContext *ctx = hwfc->device_ctx;
1250     AVVulkanDeviceContext *hwctx = ctx->hwctx;
1251     const int planes = av_pix_fmt_count_planes(hwfc->sw_format);
1252
1253     VkImageMemoryBarrier img_bar[AV_NUM_DATA_POINTERS] = { 0 };
1254
1255     VkCommandBufferBeginInfo cmd_start = {
1256         .sType = VK_STRUCTURE_TYPE_COMMAND_BUFFER_BEGIN_INFO,
1257         .flags = VK_COMMAND_BUFFER_USAGE_ONE_TIME_SUBMIT_BIT,
1258     };
1259
1260     VkSubmitInfo s_info = {
1261         .sType                = VK_STRUCTURE_TYPE_SUBMIT_INFO,
1262         .commandBufferCount   = 1,
1263         .pCommandBuffers      = &ectx->buf,
1264
1265         .pSignalSemaphores    = frame->sem,
1266         .signalSemaphoreCount = planes,
1267     };
1268
1269     VkPipelineStageFlagBits wait_st[AV_NUM_DATA_POINTERS];
1270     for (int i = 0; i < planes; i++)
1271         wait_st[i] = VK_PIPELINE_STAGE_TOP_OF_PIPE_BIT;
1272
1273     switch (pmode) {
1274     case PREP_MODE_WRITE:
1275         new_layout = VK_IMAGE_LAYOUT_TRANSFER_DST_OPTIMAL;
1276         new_access = VK_ACCESS_TRANSFER_WRITE_BIT;
1277         dst_qf     = VK_QUEUE_FAMILY_IGNORED;
1278         break;
1279     case PREP_MODE_RO_SHADER:
1280         new_layout = VK_IMAGE_LAYOUT_TRANSFER_SRC_OPTIMAL;
1281         new_access = VK_ACCESS_TRANSFER_READ_BIT;
1282         dst_qf     = VK_QUEUE_FAMILY_IGNORED;
1283         break;
1284     case PREP_MODE_EXTERNAL_EXPORT:
1285         new_layout = VK_IMAGE_LAYOUT_GENERAL;
1286         new_access = VK_ACCESS_MEMORY_READ_BIT | VK_ACCESS_MEMORY_WRITE_BIT;
1287         dst_qf     = VK_QUEUE_FAMILY_EXTERNAL_KHR;
1288         s_info.pWaitSemaphores = frame->sem;
1289         s_info.pWaitDstStageMask = wait_st;
1290         s_info.waitSemaphoreCount = planes;
1291         break;
1292     }
1293
1294     ret = vkBeginCommandBuffer(ectx->buf, &cmd_start);
1295     if (ret != VK_SUCCESS)
1296         return AVERROR_EXTERNAL;
1297
1298     /* Change the image layout to something more optimal for writes.
1299      * This also signals the newly created semaphore, making it usable
1300      * for synchronization */
1301     for (int i = 0; i < planes; i++) {
1302         img_bar[i].sType = VK_STRUCTURE_TYPE_IMAGE_MEMORY_BARRIER;
1303         img_bar[i].srcAccessMask = 0x0;
1304         img_bar[i].dstAccessMask = new_access;
1305         img_bar[i].oldLayout = frame->layout[i];
1306         img_bar[i].newLayout = new_layout;
1307         img_bar[i].srcQueueFamilyIndex = VK_QUEUE_FAMILY_IGNORED;
1308         img_bar[i].dstQueueFamilyIndex = dst_qf;
1309         img_bar[i].image = frame->img[i];
1310         img_bar[i].subresourceRange.levelCount = 1;
1311         img_bar[i].subresourceRange.layerCount = 1;
1312         img_bar[i].subresourceRange.aspectMask = VK_IMAGE_ASPECT_COLOR_BIT;
1313
1314         frame->layout[i] = img_bar[i].newLayout;
1315         frame->access[i] = img_bar[i].dstAccessMask;
1316     }
1317
1318     vkCmdPipelineBarrier(ectx->buf, VK_PIPELINE_STAGE_TOP_OF_PIPE_BIT,
1319                          VK_PIPELINE_STAGE_TRANSFER_BIT, 0,
1320                          0, NULL, 0, NULL, planes, img_bar);
1321
1322     ret = vkEndCommandBuffer(ectx->buf);
1323     if (ret != VK_SUCCESS)
1324         return AVERROR_EXTERNAL;
1325
1326     ret = vkQueueSubmit(ectx->queue, 1, &s_info, ectx->fence);
1327     if (ret != VK_SUCCESS) {
1328         return AVERROR_EXTERNAL;
1329     } else {
1330         vkWaitForFences(hwctx->act_dev, 1, &ectx->fence, VK_TRUE, UINT64_MAX);
1331         vkResetFences(hwctx->act_dev, 1, &ectx->fence);
1332     }
1333
1334     return 0;
1335 }
1336
1337 static int create_frame(AVHWFramesContext *hwfc, AVVkFrame **frame,
1338                         VkImageTiling tiling, VkImageUsageFlagBits usage,
1339                         void *create_pnext)
1340 {
1341     int err;
1342     VkResult ret;
1343     AVHWDeviceContext *ctx = hwfc->device_ctx;
1344     VulkanDevicePriv *p = ctx->internal->priv;
1345     AVVulkanDeviceContext *hwctx = ctx->hwctx;
1346     enum AVPixelFormat format = hwfc->sw_format;
1347     const VkFormat *img_fmts = av_vkfmt_from_pixfmt(format);
1348     const int planes = av_pix_fmt_count_planes(format);
1349
1350     VkExportSemaphoreCreateInfo ext_sem_info = {
1351         .sType = VK_STRUCTURE_TYPE_EXPORT_SEMAPHORE_CREATE_INFO,
1352         .handleTypes = VK_EXTERNAL_SEMAPHORE_HANDLE_TYPE_OPAQUE_FD_BIT,
1353     };
1354
1355     VkSemaphoreCreateInfo sem_spawn = {
1356         .sType = VK_STRUCTURE_TYPE_SEMAPHORE_CREATE_INFO,
1357         .pNext = p->extensions & EXT_EXTERNAL_FD_SEM ? &ext_sem_info : NULL,
1358     };
1359
1360     AVVkFrame *f = av_vk_frame_alloc();
1361     if (!f) {
1362         av_log(ctx, AV_LOG_ERROR, "Unable to allocate memory for AVVkFrame!\n");
1363         return AVERROR(ENOMEM);
1364     }
1365
1366     /* Create the images */
1367     for (int i = 0; i < planes; i++) {
1368         const AVPixFmtDescriptor *desc = av_pix_fmt_desc_get(format);
1369         int w = hwfc->width;
1370         int h = hwfc->height;
1371         const int p_w = i > 0 ? AV_CEIL_RSHIFT(w, desc->log2_chroma_w) : w;
1372         const int p_h = i > 0 ? AV_CEIL_RSHIFT(h, desc->log2_chroma_h) : h;
1373
1374         VkImageCreateInfo image_create_info = {
1375             .sType                 = VK_STRUCTURE_TYPE_IMAGE_CREATE_INFO,
1376             .pNext                 = create_pnext,
1377             .imageType             = VK_IMAGE_TYPE_2D,
1378             .format                = img_fmts[i],
1379             .extent.width          = p_w,
1380             .extent.height         = p_h,
1381             .extent.depth          = 1,
1382             .mipLevels             = 1,
1383             .arrayLayers           = 1,
1384             .flags                 = VK_IMAGE_CREATE_ALIAS_BIT,
1385             .tiling                = tiling,
1386             .initialLayout         = VK_IMAGE_LAYOUT_UNDEFINED,
1387             .usage                 = usage,
1388             .samples               = VK_SAMPLE_COUNT_1_BIT,
1389             .pQueueFamilyIndices   = p->qfs,
1390             .queueFamilyIndexCount = p->num_qfs,
1391             .sharingMode           = p->num_qfs > 1 ? VK_SHARING_MODE_CONCURRENT :
1392                                                       VK_SHARING_MODE_EXCLUSIVE,
1393         };
1394
1395         ret = vkCreateImage(hwctx->act_dev, &image_create_info,
1396                             hwctx->alloc, &f->img[i]);
1397         if (ret != VK_SUCCESS) {
1398             av_log(ctx, AV_LOG_ERROR, "Image creation failure: %s\n",
1399                    vk_ret2str(ret));
1400             err = AVERROR(EINVAL);
1401             goto fail;
1402         }
1403
1404         /* Create semaphore */
1405         ret = vkCreateSemaphore(hwctx->act_dev, &sem_spawn,
1406                                 hwctx->alloc, &f->sem[i]);
1407         if (ret != VK_SUCCESS) {
1408             av_log(hwctx, AV_LOG_ERROR, "Failed to create semaphore: %s\n",
1409                    vk_ret2str(ret));
1410             return AVERROR_EXTERNAL;
1411         }
1412
1413         f->layout[i] = image_create_info.initialLayout;
1414         f->access[i] = 0x0;
1415     }
1416
1417     f->flags     = 0x0;
1418     f->tiling    = tiling;
1419
1420     *frame = f;
1421     return 0;
1422
1423 fail:
1424     vulkan_frame_free(hwfc, (uint8_t *)f);
1425     return err;
1426 }
1427
1428 /* Checks if an export flag is enabled, and if it is ORs it with *iexp */
1429 static void try_export_flags(AVHWFramesContext *hwfc,
1430                              VkExternalMemoryHandleTypeFlags *comp_handle_types,
1431                              VkExternalMemoryHandleTypeFlagBits *iexp,
1432                              VkExternalMemoryHandleTypeFlagBits exp)
1433 {
1434     VkResult ret;
1435     AVVulkanFramesContext *hwctx = hwfc->hwctx;
1436     AVVulkanDeviceContext *dev_hwctx = hwfc->device_ctx->hwctx;
1437     VkExternalImageFormatProperties eprops = {
1438         .sType = VK_STRUCTURE_TYPE_EXTERNAL_IMAGE_FORMAT_PROPERTIES_KHR,
1439     };
1440     VkImageFormatProperties2 props = {
1441         .sType = VK_STRUCTURE_TYPE_IMAGE_FORMAT_PROPERTIES_2,
1442         .pNext = &eprops,
1443     };
1444     VkPhysicalDeviceExternalImageFormatInfo enext = {
1445         .sType = VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_EXTERNAL_IMAGE_FORMAT_INFO,
1446         .handleType = exp,
1447     };
1448     VkPhysicalDeviceImageFormatInfo2 pinfo = {
1449         .sType  = VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_IMAGE_FORMAT_INFO_2,
1450         .pNext  = !exp ? NULL : &enext,
1451         .format = av_vkfmt_from_pixfmt(hwfc->sw_format)[0],
1452         .type   = VK_IMAGE_TYPE_2D,
1453         .tiling = hwctx->tiling,
1454         .usage  = hwctx->usage,
1455         .flags  = VK_IMAGE_CREATE_ALIAS_BIT,
1456     };
1457
1458     ret = vkGetPhysicalDeviceImageFormatProperties2(dev_hwctx->phys_dev,
1459                                                     &pinfo, &props);
1460     if (ret == VK_SUCCESS) {
1461         *iexp |= exp;
1462         *comp_handle_types |= eprops.externalMemoryProperties.compatibleHandleTypes;
1463     }
1464 }
1465
1466 static AVBufferRef *vulkan_pool_alloc(void *opaque, int size)
1467 {
1468     int err;
1469     AVVkFrame *f;
1470     AVBufferRef *avbuf = NULL;
1471     AVHWFramesContext *hwfc = opaque;
1472     AVVulkanFramesContext *hwctx = hwfc->hwctx;
1473     VulkanDevicePriv *p = hwfc->device_ctx->internal->priv;
1474     VkExportMemoryAllocateInfo eminfo[AV_NUM_DATA_POINTERS];
1475     VkExternalMemoryHandleTypeFlags e = 0x0;
1476
1477     VkExternalMemoryImageCreateInfo eiinfo = {
1478         .sType       = VK_STRUCTURE_TYPE_EXTERNAL_MEMORY_IMAGE_CREATE_INFO,
1479         .pNext       = hwctx->create_pnext,
1480     };
1481
1482     if (p->extensions & EXT_EXTERNAL_FD_MEMORY)
1483         try_export_flags(hwfc, &eiinfo.handleTypes, &e,
1484                          VK_EXTERNAL_MEMORY_HANDLE_TYPE_OPAQUE_FD_BIT);
1485
1486     if (p->extensions & EXT_EXTERNAL_DMABUF_MEMORY)
1487         try_export_flags(hwfc, &eiinfo.handleTypes, &e,
1488                          VK_EXTERNAL_MEMORY_HANDLE_TYPE_DMA_BUF_BIT_EXT);
1489
1490     for (int i = 0; i < av_pix_fmt_count_planes(hwfc->sw_format); i++) {
1491         eminfo[i].sType       = VK_STRUCTURE_TYPE_EXPORT_MEMORY_ALLOCATE_INFO;
1492         eminfo[i].pNext       = hwctx->alloc_pnext[i];
1493         eminfo[i].handleTypes = e;
1494     }
1495
1496     err = create_frame(hwfc, &f, hwctx->tiling, hwctx->usage,
1497                        eiinfo.handleTypes ? &eiinfo : NULL);
1498     if (err)
1499         return NULL;
1500
1501     err = alloc_bind_mem(hwfc, f, eminfo, sizeof(*eminfo));
1502     if (err)
1503         goto fail;
1504
1505     err = prepare_frame(hwfc, &p->cmd, f, PREP_MODE_WRITE);
1506     if (err)
1507         goto fail;
1508
1509     avbuf = av_buffer_create((uint8_t *)f, sizeof(AVVkFrame),
1510                              vulkan_frame_free, hwfc, 0);
1511     if (!avbuf)
1512         goto fail;
1513
1514     return avbuf;
1515
1516 fail:
1517     vulkan_frame_free(hwfc, (uint8_t *)f);
1518     return NULL;
1519 }
1520
1521 static void vulkan_frames_uninit(AVHWFramesContext *hwfc)
1522 {
1523     VulkanFramesPriv *fp = hwfc->internal->priv;
1524
1525     free_exec_ctx(hwfc->device_ctx, &fp->cmd);
1526 }
1527
1528 static int vulkan_frames_init(AVHWFramesContext *hwfc)
1529 {
1530     int err;
1531     AVVkFrame *f;
1532     AVVulkanFramesContext *hwctx = hwfc->hwctx;
1533     VulkanFramesPriv *fp = hwfc->internal->priv;
1534     AVVulkanDeviceContext *dev_hwctx = hwfc->device_ctx->hwctx;
1535     VulkanDevicePriv *p = hwfc->device_ctx->internal->priv;
1536
1537     if (hwfc->pool)
1538         return 0;
1539
1540     /* Default pool flags */
1541     hwctx->tiling = hwctx->tiling ? hwctx->tiling : p->use_linear_images ?
1542                     VK_IMAGE_TILING_LINEAR : VK_IMAGE_TILING_OPTIMAL;
1543
1544     hwctx->usage |= DEFAULT_USAGE_FLAGS;
1545
1546     err = create_exec_ctx(hwfc->device_ctx, &fp->cmd,
1547                           dev_hwctx->queue_family_tx_index);
1548     if (err)
1549         return err;
1550
1551     /* Test to see if allocation will fail */
1552     err = create_frame(hwfc, &f, hwctx->tiling, hwctx->usage,
1553                        hwctx->create_pnext);
1554     if (err) {
1555         free_exec_ctx(hwfc->device_ctx, &p->cmd);
1556         return err;
1557     }
1558
1559     vulkan_frame_free(hwfc, (uint8_t *)f);
1560
1561     hwfc->internal->pool_internal = av_buffer_pool_init2(sizeof(AVVkFrame),
1562                                                          hwfc, vulkan_pool_alloc,
1563                                                          NULL);
1564     if (!hwfc->internal->pool_internal) {
1565         free_exec_ctx(hwfc->device_ctx, &p->cmd);
1566         return AVERROR(ENOMEM);
1567     }
1568
1569     return 0;
1570 }
1571
1572 static int vulkan_get_buffer(AVHWFramesContext *hwfc, AVFrame *frame)
1573 {
1574     frame->buf[0] = av_buffer_pool_get(hwfc->pool);
1575     if (!frame->buf[0])
1576         return AVERROR(ENOMEM);
1577
1578     frame->data[0] = frame->buf[0]->data;
1579     frame->format  = AV_PIX_FMT_VULKAN;
1580     frame->width   = hwfc->width;
1581     frame->height  = hwfc->height;
1582
1583     return 0;
1584 }
1585
1586 static int vulkan_transfer_get_formats(AVHWFramesContext *hwfc,
1587                                        enum AVHWFrameTransferDirection dir,
1588                                        enum AVPixelFormat **formats)
1589 {
1590     enum AVPixelFormat *fmts = av_malloc_array(2, sizeof(*fmts));
1591     if (!fmts)
1592         return AVERROR(ENOMEM);
1593
1594     fmts[0] = hwfc->sw_format;
1595     fmts[1] = AV_PIX_FMT_NONE;
1596
1597     *formats = fmts;
1598     return 0;
1599 }
1600
1601 typedef struct VulkanMapping {
1602     AVVkFrame *frame;
1603     int flags;
1604 } VulkanMapping;
1605
1606 static void vulkan_unmap_frame(AVHWFramesContext *hwfc, HWMapDescriptor *hwmap)
1607 {
1608     VulkanMapping *map = hwmap->priv;
1609     AVVulkanDeviceContext *hwctx = hwfc->device_ctx->hwctx;
1610     const int planes = av_pix_fmt_count_planes(hwfc->sw_format);
1611
1612     /* Check if buffer needs flushing */
1613     if ((map->flags & AV_HWFRAME_MAP_WRITE) &&
1614         !(map->frame->flags & VK_MEMORY_PROPERTY_HOST_COHERENT_BIT)) {
1615         VkResult ret;
1616         VkMappedMemoryRange flush_ranges[AV_NUM_DATA_POINTERS] = { { 0 } };
1617
1618         for (int i = 0; i < planes; i++) {
1619             flush_ranges[i].sType  = VK_STRUCTURE_TYPE_MAPPED_MEMORY_RANGE;
1620             flush_ranges[i].memory = map->frame->mem[i];
1621             flush_ranges[i].size   = VK_WHOLE_SIZE;
1622         }
1623
1624         ret = vkFlushMappedMemoryRanges(hwctx->act_dev, planes,
1625                                         flush_ranges);
1626         if (ret != VK_SUCCESS) {
1627             av_log(hwfc, AV_LOG_ERROR, "Failed to flush memory: %s\n",
1628                    vk_ret2str(ret));
1629         }
1630     }
1631
1632     for (int i = 0; i < planes; i++)
1633         vkUnmapMemory(hwctx->act_dev, map->frame->mem[i]);
1634
1635     av_free(map);
1636 }
1637
1638 static int vulkan_map_frame_to_mem(AVHWFramesContext *hwfc, AVFrame *dst,
1639                                    const AVFrame *src, int flags)
1640 {
1641     VkResult ret;
1642     int err, mapped_mem_count = 0;
1643     AVVkFrame *f = (AVVkFrame *)src->data[0];
1644     AVVulkanDeviceContext *hwctx = hwfc->device_ctx->hwctx;
1645     const int planes = av_pix_fmt_count_planes(hwfc->sw_format);
1646
1647     VulkanMapping *map = av_mallocz(sizeof(VulkanMapping));
1648     if (!map)
1649         return AVERROR(EINVAL);
1650
1651     if (src->format != AV_PIX_FMT_VULKAN) {
1652         av_log(hwfc, AV_LOG_ERROR, "Cannot map from pixel format %s!\n",
1653                av_get_pix_fmt_name(src->format));
1654         err = AVERROR(EINVAL);
1655         goto fail;
1656     }
1657
1658     if (!(f->flags & VK_MEMORY_PROPERTY_HOST_VISIBLE_BIT) ||
1659         !(f->tiling == VK_IMAGE_TILING_LINEAR)) {
1660         av_log(hwfc, AV_LOG_ERROR, "Unable to map frame, not host visible "
1661                "and linear!\n");
1662         err = AVERROR(EINVAL);
1663         goto fail;
1664     }
1665
1666     dst->width  = src->width;
1667     dst->height = src->height;
1668
1669     for (int i = 0; i < planes; i++) {
1670         ret = vkMapMemory(hwctx->act_dev, f->mem[i], 0,
1671                           VK_WHOLE_SIZE, 0, (void **)&dst->data[i]);
1672         if (ret != VK_SUCCESS) {
1673             av_log(hwfc, AV_LOG_ERROR, "Failed to map image memory: %s\n",
1674                 vk_ret2str(ret));
1675             err = AVERROR_EXTERNAL;
1676             goto fail;
1677         }
1678         mapped_mem_count++;
1679     }
1680
1681     /* Check if the memory contents matter */
1682     if (((flags & AV_HWFRAME_MAP_READ) || !(flags & AV_HWFRAME_MAP_OVERWRITE)) &&
1683         !(f->flags & VK_MEMORY_PROPERTY_HOST_COHERENT_BIT)) {
1684         VkMappedMemoryRange map_mem_ranges[AV_NUM_DATA_POINTERS] = { { 0 } };
1685         for (int i = 0; i < planes; i++) {
1686             map_mem_ranges[i].sType = VK_STRUCTURE_TYPE_MAPPED_MEMORY_RANGE;
1687             map_mem_ranges[i].size = VK_WHOLE_SIZE;
1688             map_mem_ranges[i].memory = f->mem[i];
1689         }
1690
1691         ret = vkInvalidateMappedMemoryRanges(hwctx->act_dev, planes,
1692                                              map_mem_ranges);
1693         if (ret != VK_SUCCESS) {
1694             av_log(hwfc, AV_LOG_ERROR, "Failed to invalidate memory: %s\n",
1695                    vk_ret2str(ret));
1696             err = AVERROR_EXTERNAL;
1697             goto fail;
1698         }
1699     }
1700
1701     for (int i = 0; i < planes; i++) {
1702         VkImageSubresource sub = {
1703             .aspectMask = VK_IMAGE_ASPECT_COLOR_BIT,
1704         };
1705         VkSubresourceLayout layout;
1706         vkGetImageSubresourceLayout(hwctx->act_dev, f->img[i], &sub, &layout);
1707         dst->linesize[i] = layout.rowPitch;
1708     }
1709
1710     map->frame = f;
1711     map->flags = flags;
1712
1713     err = ff_hwframe_map_create(src->hw_frames_ctx, dst, src,
1714                                 &vulkan_unmap_frame, map);
1715     if (err < 0)
1716         goto fail;
1717
1718     return 0;
1719
1720 fail:
1721     for (int i = 0; i < mapped_mem_count; i++)
1722         vkUnmapMemory(hwctx->act_dev, f->mem[i]);
1723
1724     av_free(map);
1725     return err;
1726 }
1727
1728 #if CONFIG_LIBDRM
1729 static void vulkan_unmap_from(AVHWFramesContext *hwfc, HWMapDescriptor *hwmap)
1730 {
1731     VulkanMapping *map = hwmap->priv;
1732     AVVulkanDeviceContext *hwctx = hwfc->device_ctx->hwctx;
1733     const int planes = av_pix_fmt_count_planes(hwfc->sw_format);
1734
1735     for (int i = 0; i < planes; i++) {
1736         vkDestroyImage(hwctx->act_dev, map->frame->img[i], hwctx->alloc);
1737         vkFreeMemory(hwctx->act_dev, map->frame->mem[i], hwctx->alloc);
1738         vkDestroySemaphore(hwctx->act_dev, map->frame->sem[i], hwctx->alloc);
1739     }
1740
1741     av_freep(&map->frame);
1742 }
1743
1744 static const struct {
1745     uint32_t drm_fourcc;
1746     VkFormat vk_format;
1747 } vulkan_drm_format_map[] = {
1748     { DRM_FORMAT_R8,       VK_FORMAT_R8_UNORM       },
1749     { DRM_FORMAT_R16,      VK_FORMAT_R16_UNORM      },
1750     { DRM_FORMAT_GR88,     VK_FORMAT_R8G8_UNORM     },
1751     { DRM_FORMAT_RG88,     VK_FORMAT_R8G8_UNORM     },
1752     { DRM_FORMAT_GR1616,   VK_FORMAT_R16G16_UNORM   },
1753     { DRM_FORMAT_RG1616,   VK_FORMAT_R16G16_UNORM   },
1754     { DRM_FORMAT_ARGB8888, VK_FORMAT_B8G8R8A8_UNORM },
1755     { DRM_FORMAT_XRGB8888, VK_FORMAT_B8G8R8A8_UNORM },
1756     { DRM_FORMAT_ABGR8888, VK_FORMAT_R8G8B8A8_UNORM },
1757     { DRM_FORMAT_XBGR8888, VK_FORMAT_R8G8B8A8_UNORM },
1758 };
1759
1760 static inline VkFormat drm_to_vulkan_fmt(uint32_t drm_fourcc)
1761 {
1762     for (int i = 0; i < FF_ARRAY_ELEMS(vulkan_drm_format_map); i++)
1763         if (vulkan_drm_format_map[i].drm_fourcc == drm_fourcc)
1764             return vulkan_drm_format_map[i].vk_format;
1765     return VK_FORMAT_UNDEFINED;
1766 }
1767
1768 static int vulkan_map_from_drm_frame_desc(AVHWFramesContext *hwfc, AVVkFrame **frame,
1769                                           AVDRMFrameDescriptor *desc)
1770 {
1771     int err = 0;
1772     VkResult ret;
1773     AVVkFrame *f;
1774     int bind_counts = 0;
1775     AVHWDeviceContext *ctx = hwfc->device_ctx;
1776     AVVulkanDeviceContext *hwctx = ctx->hwctx;
1777     VulkanDevicePriv *p = ctx->internal->priv;
1778     const AVPixFmtDescriptor *fmt_desc = av_pix_fmt_desc_get(hwfc->sw_format);
1779     const int has_modifiers = p->extensions & EXT_DRM_MODIFIER_FLAGS;
1780     VkSubresourceLayout plane_data[AV_NUM_DATA_POINTERS] = { 0 };
1781     VkBindImageMemoryInfo bind_info[AV_NUM_DATA_POINTERS] = { 0 };
1782     VkBindImagePlaneMemoryInfo plane_info[AV_NUM_DATA_POINTERS] = { 0 };
1783     VkExternalMemoryHandleTypeFlagBits htype = VK_EXTERNAL_MEMORY_HANDLE_TYPE_DMA_BUF_BIT_EXT;
1784
1785     VK_LOAD_PFN(hwctx->inst, vkGetMemoryFdPropertiesKHR);
1786
1787     for (int i = 0; i < desc->nb_layers; i++) {
1788         if (drm_to_vulkan_fmt(desc->layers[i].format) == VK_FORMAT_UNDEFINED) {
1789             av_log(ctx, AV_LOG_ERROR, "Unsupported DMABUF layer format %#08x!\n",
1790                    desc->layers[i].format);
1791             return AVERROR(EINVAL);
1792         }
1793     }
1794
1795     if (!(f = av_vk_frame_alloc())) {
1796         av_log(ctx, AV_LOG_ERROR, "Unable to allocate memory for AVVkFrame!\n");
1797         err = AVERROR(ENOMEM);
1798         goto fail;
1799     }
1800
1801     for (int i = 0; i < desc->nb_objects; i++) {
1802         VkMemoryFdPropertiesKHR fdmp = {
1803             .sType = VK_STRUCTURE_TYPE_MEMORY_FD_PROPERTIES_KHR,
1804         };
1805         VkMemoryRequirements req = {
1806             .size = desc->objects[i].size,
1807         };
1808         VkImportMemoryFdInfoKHR idesc = {
1809             .sType      = VK_STRUCTURE_TYPE_IMPORT_MEMORY_FD_INFO_KHR,
1810             .handleType = htype,
1811             .fd         = dup(desc->objects[i].fd),
1812         };
1813
1814         ret = pfn_vkGetMemoryFdPropertiesKHR(hwctx->act_dev, htype,
1815                                              idesc.fd, &fdmp);
1816         if (ret != VK_SUCCESS) {
1817             av_log(hwfc, AV_LOG_ERROR, "Failed to get FD properties: %s\n",
1818                    vk_ret2str(ret));
1819             err = AVERROR_EXTERNAL;
1820             close(idesc.fd);
1821             goto fail;
1822         }
1823
1824         req.memoryTypeBits = fdmp.memoryTypeBits;
1825
1826         err = alloc_mem(ctx, &req, VK_MEMORY_PROPERTY_DEVICE_LOCAL_BIT,
1827                         &idesc, &f->flags, &f->mem[i]);
1828         if (err) {
1829             close(idesc.fd);
1830             return err;
1831         }
1832
1833         f->size[i] = desc->objects[i].size;
1834     }
1835
1836     f->tiling = has_modifiers ? VK_IMAGE_TILING_DRM_FORMAT_MODIFIER_EXT :
1837                 desc->objects[0].format_modifier == DRM_FORMAT_MOD_LINEAR ?
1838                 VK_IMAGE_TILING_LINEAR : VK_IMAGE_TILING_OPTIMAL;
1839
1840     for (int i = 0; i < desc->nb_layers; i++) {
1841         const int planes = desc->layers[i].nb_planes;
1842         const int signal_p = has_modifiers && (planes > 1);
1843
1844         VkImageDrmFormatModifierExplicitCreateInfoEXT drm_info = {
1845             .sType = VK_STRUCTURE_TYPE_IMAGE_DRM_FORMAT_MODIFIER_EXPLICIT_CREATE_INFO_EXT,
1846             .drmFormatModifier = desc->objects[0].format_modifier,
1847             .drmFormatModifierPlaneCount = planes,
1848             .pPlaneLayouts = (const VkSubresourceLayout *)&plane_data,
1849         };
1850
1851         VkExternalMemoryImageCreateInfo einfo = {
1852             .sType       = VK_STRUCTURE_TYPE_EXTERNAL_MEMORY_IMAGE_CREATE_INFO,
1853             .pNext       = has_modifiers ? &drm_info : NULL,
1854             .handleTypes = htype,
1855         };
1856
1857         VkSemaphoreCreateInfo sem_spawn = {
1858             .sType = VK_STRUCTURE_TYPE_SEMAPHORE_CREATE_INFO,
1859         };
1860
1861         const int p_w = i > 0 ? AV_CEIL_RSHIFT(hwfc->width, fmt_desc->log2_chroma_w) : hwfc->width;
1862         const int p_h = i > 0 ? AV_CEIL_RSHIFT(hwfc->height, fmt_desc->log2_chroma_h) : hwfc->height;
1863
1864         VkImageCreateInfo image_create_info = {
1865             .sType                 = VK_STRUCTURE_TYPE_IMAGE_CREATE_INFO,
1866             .pNext                 = &einfo,
1867             .imageType             = VK_IMAGE_TYPE_2D,
1868             .format                = drm_to_vulkan_fmt(desc->layers[i].format),
1869             .extent.width          = p_w,
1870             .extent.height         = p_h,
1871             .extent.depth          = 1,
1872             .mipLevels             = 1,
1873             .arrayLayers           = 1,
1874             .flags                 = VK_IMAGE_CREATE_ALIAS_BIT,
1875             .tiling                = f->tiling,
1876             .initialLayout         = VK_IMAGE_LAYOUT_UNDEFINED, /* specs say so */
1877             .usage                 = DEFAULT_USAGE_FLAGS,
1878             .samples               = VK_SAMPLE_COUNT_1_BIT,
1879             .pQueueFamilyIndices   = p->qfs,
1880             .queueFamilyIndexCount = p->num_qfs,
1881             .sharingMode           = p->num_qfs > 1 ? VK_SHARING_MODE_CONCURRENT :
1882                                                       VK_SHARING_MODE_EXCLUSIVE,
1883         };
1884
1885         for (int j = 0; j < planes; j++) {
1886             plane_data[j].offset     = desc->layers[i].planes[j].offset;
1887             plane_data[j].rowPitch   = desc->layers[i].planes[j].pitch;
1888             plane_data[j].size       = 0; /* The specs say so for all 3 */
1889             plane_data[j].arrayPitch = 0;
1890             plane_data[j].depthPitch = 0;
1891         }
1892
1893         /* Create image */
1894         ret = vkCreateImage(hwctx->act_dev, &image_create_info,
1895                             hwctx->alloc, &f->img[i]);
1896         if (ret != VK_SUCCESS) {
1897             av_log(ctx, AV_LOG_ERROR, "Image creation failure: %s\n",
1898                    vk_ret2str(ret));
1899             err = AVERROR(EINVAL);
1900             goto fail;
1901         }
1902
1903         ret = vkCreateSemaphore(hwctx->act_dev, &sem_spawn,
1904                                 hwctx->alloc, &f->sem[i]);
1905         if (ret != VK_SUCCESS) {
1906             av_log(hwctx, AV_LOG_ERROR, "Failed to create semaphore: %s\n",
1907                    vk_ret2str(ret));
1908             return AVERROR_EXTERNAL;
1909         }
1910
1911         /* We'd import a semaphore onto the one we created using
1912          * vkImportSemaphoreFdKHR but unfortunately neither DRM nor VAAPI
1913          * offer us anything we could import and sync with, so instead
1914          * just signal the semaphore we created. */
1915
1916         f->layout[i] = image_create_info.initialLayout;
1917         f->access[i] = 0x0;
1918
1919         for (int j = 0; j < planes; j++) {
1920             VkImageAspectFlagBits aspect = j == 0 ? VK_IMAGE_ASPECT_MEMORY_PLANE_0_BIT_EXT :
1921                                            j == 1 ? VK_IMAGE_ASPECT_MEMORY_PLANE_1_BIT_EXT :
1922                                                     VK_IMAGE_ASPECT_MEMORY_PLANE_2_BIT_EXT;
1923
1924             plane_info[bind_counts].sType = VK_STRUCTURE_TYPE_BIND_IMAGE_PLANE_MEMORY_INFO;
1925             plane_info[bind_counts].planeAspect = aspect;
1926
1927             bind_info[bind_counts].sType  = VK_STRUCTURE_TYPE_BIND_IMAGE_MEMORY_INFO;
1928             bind_info[bind_counts].pNext  = signal_p ? &plane_info[bind_counts] : NULL;
1929             bind_info[bind_counts].image  = f->img[i];
1930             bind_info[bind_counts].memory = f->mem[desc->layers[i].planes[j].object_index];
1931             bind_info[bind_counts].memoryOffset = desc->layers[i].planes[j].offset;
1932             bind_counts++;
1933         }
1934     }
1935
1936     /* Bind the allocated memory to the images */
1937     ret = vkBindImageMemory2(hwctx->act_dev, bind_counts, bind_info);
1938     if (ret != VK_SUCCESS) {
1939         av_log(ctx, AV_LOG_ERROR, "Failed to bind memory: %s\n",
1940                vk_ret2str(ret));
1941         return AVERROR_EXTERNAL;
1942     }
1943
1944     /* NOTE: This is completely uneccesary and unneeded once we can import
1945      * semaphores from DRM. Otherwise we have to activate the semaphores.
1946      * We're reusing the exec context that's also used for uploads/downloads. */
1947     err = prepare_frame(hwfc, &p->cmd, f, PREP_MODE_RO_SHADER);
1948     if (err)
1949         goto fail;
1950
1951     *frame = f;
1952
1953     return 0;
1954
1955 fail:
1956     for (int i = 0; i < desc->nb_layers; i++) {
1957         vkDestroyImage(hwctx->act_dev, f->img[i], hwctx->alloc);
1958         vkDestroySemaphore(hwctx->act_dev, f->sem[i], hwctx->alloc);
1959     }
1960     for (int i = 0; i < desc->nb_objects; i++)
1961         vkFreeMemory(hwctx->act_dev, f->mem[i], hwctx->alloc);
1962
1963     av_free(f);
1964
1965     return err;
1966 }
1967
1968 static int vulkan_map_from_drm(AVHWFramesContext *hwfc, AVFrame *dst,
1969                                const AVFrame *src, int flags)
1970 {
1971     int err = 0;
1972     AVVkFrame *f;
1973     VulkanMapping *map = NULL;
1974
1975     err = vulkan_map_from_drm_frame_desc(hwfc, &f,
1976                                          (AVDRMFrameDescriptor *)src->data[0]);
1977     if (err)
1978         return err;
1979
1980     /* The unmapping function will free this */
1981     dst->data[0] = (uint8_t *)f;
1982     dst->width   = src->width;
1983     dst->height  = src->height;
1984
1985     map = av_mallocz(sizeof(VulkanMapping));
1986     if (!map)
1987         goto fail;
1988
1989     map->frame = f;
1990     map->flags = flags;
1991
1992     err = ff_hwframe_map_create(dst->hw_frames_ctx, dst, src,
1993                                 &vulkan_unmap_from, map);
1994     if (err < 0)
1995         goto fail;
1996
1997     av_log(hwfc, AV_LOG_DEBUG, "Mapped DRM object to Vulkan!\n");
1998
1999     return 0;
2000
2001 fail:
2002     vulkan_frame_free(hwfc->device_ctx->hwctx, (uint8_t *)f);
2003     av_free(map);
2004     return err;
2005 }
2006
2007 #if CONFIG_VAAPI
2008 static int vulkan_map_from_vaapi(AVHWFramesContext *dst_fc,
2009                                  AVFrame *dst, const AVFrame *src,
2010                                  int flags)
2011 {
2012     int err;
2013     AVFrame *tmp = av_frame_alloc();
2014     AVHWFramesContext *vaapi_fc = (AVHWFramesContext*)src->hw_frames_ctx->data;
2015     AVVAAPIDeviceContext *vaapi_ctx = vaapi_fc->device_ctx->hwctx;
2016     VASurfaceID surface_id = (VASurfaceID)(uintptr_t)src->data[3];
2017
2018     if (!tmp)
2019         return AVERROR(ENOMEM);
2020
2021     /* We have to sync since like the previous comment said, no semaphores */
2022     vaSyncSurface(vaapi_ctx->display, surface_id);
2023
2024     tmp->format = AV_PIX_FMT_DRM_PRIME;
2025
2026     err = av_hwframe_map(tmp, src, flags);
2027     if (err < 0)
2028         goto fail;
2029
2030     err = vulkan_map_from_drm(dst_fc, dst, tmp, flags);
2031     if (err < 0)
2032         goto fail;
2033
2034     err = ff_hwframe_map_replace(dst, src);
2035
2036 fail:
2037     av_frame_free(&tmp);
2038     return err;
2039 }
2040 #endif
2041 #endif
2042
2043 #if CONFIG_CUDA
2044 static int vulkan_export_to_cuda(AVHWFramesContext *hwfc,
2045                                  AVBufferRef *cuda_hwfc,
2046                                  const AVFrame *frame)
2047 {
2048     int err;
2049     VkResult ret;
2050     AVVkFrame *dst_f;
2051     AVVkFrameInternal *dst_int;
2052     AVHWDeviceContext *ctx = hwfc->device_ctx;
2053     AVVulkanDeviceContext *hwctx = ctx->hwctx;
2054     const int planes = av_pix_fmt_count_planes(hwfc->sw_format);
2055     const AVPixFmtDescriptor *desc = av_pix_fmt_desc_get(hwfc->sw_format);
2056     VK_LOAD_PFN(hwctx->inst, vkGetMemoryFdKHR);
2057     VK_LOAD_PFN(hwctx->inst, vkGetSemaphoreFdKHR);
2058
2059     AVHWFramesContext *cuda_fc = (AVHWFramesContext*)cuda_hwfc->data;
2060     AVHWDeviceContext *cuda_cu = cuda_fc->device_ctx;
2061     AVCUDADeviceContext *cuda_dev = cuda_cu->hwctx;
2062     AVCUDADeviceContextInternal *cu_internal = cuda_dev->internal;
2063     CudaFunctions *cu = cu_internal->cuda_dl;
2064     CUarray_format cufmt = desc->comp[0].depth > 8 ? CU_AD_FORMAT_UNSIGNED_INT16 :
2065                                                      CU_AD_FORMAT_UNSIGNED_INT8;
2066
2067     dst_f = (AVVkFrame *)frame->data[0];
2068
2069     dst_int = dst_f->internal;
2070     if (!dst_int || !dst_int->cuda_fc_ref) {
2071         if (!dst_f->internal)
2072             dst_f->internal = dst_int = av_mallocz(sizeof(*dst_f->internal));
2073
2074         if (!dst_int) {
2075             err = AVERROR(ENOMEM);
2076             goto fail;
2077         }
2078
2079         dst_int->cuda_fc_ref = av_buffer_ref(cuda_hwfc);
2080         if (!dst_int->cuda_fc_ref) {
2081             err = AVERROR(ENOMEM);
2082             goto fail;
2083         }
2084
2085         for (int i = 0; i < planes; i++) {
2086             CUDA_EXTERNAL_MEMORY_MIPMAPPED_ARRAY_DESC tex_desc = {
2087                 .offset = 0,
2088                 .arrayDesc = {
2089                     .Width  = i > 0 ? AV_CEIL_RSHIFT(hwfc->width, desc->log2_chroma_w)
2090                                     : hwfc->width,
2091                     .Height = i > 0 ? AV_CEIL_RSHIFT(hwfc->height, desc->log2_chroma_h)
2092                                     : hwfc->height,
2093                     .Depth = 0,
2094                     .Format = cufmt,
2095                     .NumChannels = 1 + ((planes == 2) && i),
2096                     .Flags = 0,
2097                 },
2098                 .numLevels = 1,
2099             };
2100             CUDA_EXTERNAL_MEMORY_HANDLE_DESC ext_desc = {
2101                 .type = CU_EXTERNAL_MEMORY_HANDLE_TYPE_OPAQUE_FD,
2102                 .size = dst_f->size[i],
2103             };
2104             VkMemoryGetFdInfoKHR export_info = {
2105                 .sType      = VK_STRUCTURE_TYPE_MEMORY_GET_FD_INFO_KHR,
2106                 .memory     = dst_f->mem[i],
2107                 .handleType = VK_EXTERNAL_MEMORY_HANDLE_TYPE_OPAQUE_FD_BIT_KHR,
2108             };
2109             VkSemaphoreGetFdInfoKHR sem_export = {
2110                 .sType = VK_STRUCTURE_TYPE_SEMAPHORE_GET_FD_INFO_KHR,
2111                 .semaphore = dst_f->sem[i],
2112                 .handleType = VK_EXTERNAL_SEMAPHORE_HANDLE_TYPE_OPAQUE_FD_BIT,
2113             };
2114             CUDA_EXTERNAL_SEMAPHORE_HANDLE_DESC ext_sem_desc = {
2115                 .type = CU_EXTERNAL_SEMAPHORE_HANDLE_TYPE_OPAQUE_FD,
2116             };
2117
2118             ret = pfn_vkGetMemoryFdKHR(hwctx->act_dev, &export_info,
2119                                        &ext_desc.handle.fd);
2120             if (ret != VK_SUCCESS) {
2121                 av_log(hwfc, AV_LOG_ERROR, "Unable to export the image as a FD!\n");
2122                 err = AVERROR_EXTERNAL;
2123                 goto fail;
2124             }
2125
2126             ret = CHECK_CU(cu->cuImportExternalMemory(&dst_int->ext_mem[i], &ext_desc));
2127             if (ret < 0) {
2128                 err = AVERROR_EXTERNAL;
2129                 goto fail;
2130             }
2131
2132             ret = CHECK_CU(cu->cuExternalMemoryGetMappedMipmappedArray(&dst_int->cu_mma[i],
2133                                                                        dst_int->ext_mem[i],
2134                                                                        &tex_desc));
2135             if (ret < 0) {
2136                 err = AVERROR_EXTERNAL;
2137                 goto fail;
2138             }
2139
2140             ret = CHECK_CU(cu->cuMipmappedArrayGetLevel(&dst_int->cu_array[i],
2141                                                         dst_int->cu_mma[i], 0));
2142             if (ret < 0) {
2143                 err = AVERROR_EXTERNAL;
2144                 goto fail;
2145             }
2146
2147             ret = pfn_vkGetSemaphoreFdKHR(hwctx->act_dev, &sem_export,
2148                                           &ext_sem_desc.handle.fd);
2149             if (ret != VK_SUCCESS) {
2150                 av_log(ctx, AV_LOG_ERROR, "Failed to export semaphore: %s\n",
2151                        vk_ret2str(ret));
2152                 err = AVERROR_EXTERNAL;
2153                 goto fail;
2154             }
2155
2156             ret = CHECK_CU(cu->cuImportExternalSemaphore(&dst_int->cu_sem[i],
2157                                                          &ext_sem_desc));
2158             if (ret < 0) {
2159                 err = AVERROR_EXTERNAL;
2160                 goto fail;
2161             }
2162         }
2163     }
2164
2165     return 0;
2166
2167 fail:
2168     return err;
2169 }
2170
2171 static int vulkan_transfer_data_from_cuda(AVHWFramesContext *hwfc,
2172                                           AVFrame *dst, const AVFrame *src)
2173 {
2174     int err;
2175     VkResult ret;
2176     CUcontext dummy;
2177     AVVkFrame *dst_f;
2178     AVVkFrameInternal *dst_int;
2179     const int planes = av_pix_fmt_count_planes(hwfc->sw_format);
2180     const AVPixFmtDescriptor *desc = av_pix_fmt_desc_get(hwfc->sw_format);
2181
2182     AVHWFramesContext *cuda_fc = (AVHWFramesContext*)src->hw_frames_ctx->data;
2183     AVHWDeviceContext *cuda_cu = cuda_fc->device_ctx;
2184     AVCUDADeviceContext *cuda_dev = cuda_cu->hwctx;
2185     AVCUDADeviceContextInternal *cu_internal = cuda_dev->internal;
2186     CudaFunctions *cu = cu_internal->cuda_dl;
2187     CUDA_EXTERNAL_SEMAPHORE_WAIT_PARAMS s_w_par[AV_NUM_DATA_POINTERS] = { 0 };
2188     CUDA_EXTERNAL_SEMAPHORE_SIGNAL_PARAMS s_s_par[AV_NUM_DATA_POINTERS] = { 0 };
2189
2190     ret = CHECK_CU(cu->cuCtxPushCurrent(cuda_dev->cuda_ctx));
2191     if (ret < 0) {
2192         err = AVERROR_EXTERNAL;
2193         goto fail;
2194     }
2195
2196     dst_f = (AVVkFrame *)dst->data[0];
2197
2198     ret = vulkan_export_to_cuda(hwfc, src->hw_frames_ctx, dst);
2199     if (ret < 0) {
2200         goto fail;
2201     }
2202     dst_int = dst_f->internal;
2203
2204     ret = CHECK_CU(cu->cuWaitExternalSemaphoresAsync(dst_int->cu_sem, s_w_par,
2205                                                      planes, cuda_dev->stream));
2206     if (ret < 0) {
2207         err = AVERROR_EXTERNAL;
2208         goto fail;
2209     }
2210
2211     for (int i = 0; i < planes; i++) {
2212         CUDA_MEMCPY2D cpy = {
2213             .srcMemoryType = CU_MEMORYTYPE_DEVICE,
2214             .srcDevice     = (CUdeviceptr)src->data[i],
2215             .srcPitch      = src->linesize[i],
2216             .srcY          = 0,
2217
2218             .dstMemoryType = CU_MEMORYTYPE_ARRAY,
2219             .dstArray      = dst_int->cu_array[i],
2220             .WidthInBytes  = (i > 0 ? AV_CEIL_RSHIFT(hwfc->width, desc->log2_chroma_w)
2221                                     : hwfc->width) * desc->comp[i].step,
2222             .Height        = i > 0 ? AV_CEIL_RSHIFT(hwfc->height, desc->log2_chroma_h)
2223                                    : hwfc->height,
2224         };
2225
2226         ret = CHECK_CU(cu->cuMemcpy2DAsync(&cpy, cuda_dev->stream));
2227         if (ret < 0) {
2228             err = AVERROR_EXTERNAL;
2229             goto fail;
2230         }
2231     }
2232
2233     ret = CHECK_CU(cu->cuSignalExternalSemaphoresAsync(dst_int->cu_sem, s_s_par,
2234                                                        planes, cuda_dev->stream));
2235     if (ret < 0) {
2236         err = AVERROR_EXTERNAL;
2237         goto fail;
2238     }
2239
2240     CHECK_CU(cu->cuCtxPopCurrent(&dummy));
2241
2242     av_log(hwfc, AV_LOG_VERBOSE, "Transfered CUDA image to Vulkan!\n");
2243
2244     return 0;
2245
2246 fail:
2247     CHECK_CU(cu->cuCtxPopCurrent(&dummy));
2248     vulkan_free_internal(dst_int);
2249     dst_f->internal = NULL;
2250     av_buffer_unref(&dst->buf[0]);
2251     return err;
2252 }
2253 #endif
2254
2255 static int vulkan_map_to(AVHWFramesContext *hwfc, AVFrame *dst,
2256                          const AVFrame *src, int flags)
2257 {
2258     av_unused VulkanDevicePriv *p = hwfc->device_ctx->internal->priv;
2259
2260     switch (src->format) {
2261 #if CONFIG_LIBDRM
2262 #if CONFIG_VAAPI
2263     case AV_PIX_FMT_VAAPI:
2264         if (p->extensions & EXT_EXTERNAL_DMABUF_MEMORY)
2265             return vulkan_map_from_vaapi(hwfc, dst, src, flags);
2266 #endif
2267     case AV_PIX_FMT_DRM_PRIME:
2268         if (p->extensions & EXT_EXTERNAL_DMABUF_MEMORY)
2269             return vulkan_map_from_drm(hwfc, dst, src, flags);
2270 #endif
2271     default:
2272         return AVERROR(ENOSYS);
2273     }
2274 }
2275
2276 #if CONFIG_LIBDRM
2277 typedef struct VulkanDRMMapping {
2278     AVDRMFrameDescriptor drm_desc;
2279     AVVkFrame *source;
2280 } VulkanDRMMapping;
2281
2282 static void vulkan_unmap_to_drm(AVHWFramesContext *hwfc, HWMapDescriptor *hwmap)
2283 {
2284     AVDRMFrameDescriptor *drm_desc = hwmap->priv;
2285
2286     for (int i = 0; i < drm_desc->nb_objects; i++)
2287         close(drm_desc->objects[i].fd);
2288
2289     av_free(drm_desc);
2290 }
2291
2292 static inline uint32_t vulkan_fmt_to_drm(VkFormat vkfmt)
2293 {
2294     for (int i = 0; i < FF_ARRAY_ELEMS(vulkan_drm_format_map); i++)
2295         if (vulkan_drm_format_map[i].vk_format == vkfmt)
2296             return vulkan_drm_format_map[i].drm_fourcc;
2297     return DRM_FORMAT_INVALID;
2298 }
2299
2300 static int vulkan_map_to_drm(AVHWFramesContext *hwfc, AVFrame *dst,
2301                              const AVFrame *src, int flags)
2302 {
2303     int err = 0;
2304     VkResult ret;
2305     AVVkFrame *f = (AVVkFrame *)src->data[0];
2306     VulkanDevicePriv *p = hwfc->device_ctx->internal->priv;
2307     AVVulkanDeviceContext *hwctx = hwfc->device_ctx->hwctx;
2308     const int planes = av_pix_fmt_count_planes(hwfc->sw_format);
2309     VK_LOAD_PFN(hwctx->inst, vkGetMemoryFdKHR);
2310     VkImageDrmFormatModifierPropertiesEXT drm_mod = {
2311         .sType = VK_STRUCTURE_TYPE_IMAGE_DRM_FORMAT_MODIFIER_PROPERTIES_EXT,
2312     };
2313
2314     AVDRMFrameDescriptor *drm_desc = av_mallocz(sizeof(*drm_desc));
2315     if (!drm_desc)
2316         return AVERROR(ENOMEM);
2317
2318     err = prepare_frame(hwfc, &p->cmd, f, PREP_MODE_EXTERNAL_EXPORT);
2319     if (err < 0)
2320         goto end;
2321
2322     err = ff_hwframe_map_create(src->hw_frames_ctx, dst, src, &vulkan_unmap_to_drm, drm_desc);
2323     if (err < 0)
2324         goto end;
2325
2326     if (p->extensions & EXT_DRM_MODIFIER_FLAGS) {
2327         VK_LOAD_PFN(hwctx->inst, vkGetImageDrmFormatModifierPropertiesEXT);
2328         ret = pfn_vkGetImageDrmFormatModifierPropertiesEXT(hwctx->act_dev, f->img[0],
2329                                                            &drm_mod);
2330         if (ret != VK_SUCCESS) {
2331             av_log(hwfc, AV_LOG_ERROR, "Failed to retrieve DRM format modifier!\n");
2332             err = AVERROR_EXTERNAL;
2333             goto end;
2334         }
2335     }
2336
2337     for (int i = 0; (i < planes) && (f->mem[i]); i++) {
2338         VkMemoryGetFdInfoKHR export_info = {
2339             .sType      = VK_STRUCTURE_TYPE_MEMORY_GET_FD_INFO_KHR,
2340             .memory     = f->mem[i],
2341             .handleType = VK_EXTERNAL_MEMORY_HANDLE_TYPE_DMA_BUF_BIT_EXT,
2342         };
2343
2344         ret = pfn_vkGetMemoryFdKHR(hwctx->act_dev, &export_info,
2345                                    &drm_desc->objects[i].fd);
2346         if (ret != VK_SUCCESS) {
2347             av_log(hwfc, AV_LOG_ERROR, "Unable to export the image as a FD!\n");
2348             err = AVERROR_EXTERNAL;
2349             goto end;
2350         }
2351
2352         drm_desc->nb_objects++;
2353         drm_desc->objects[i].size = f->size[i];
2354         drm_desc->objects[i].format_modifier = drm_mod.drmFormatModifier;
2355     }
2356
2357     drm_desc->nb_layers = planes;
2358     for (int i = 0; i < drm_desc->nb_layers; i++) {
2359         VkSubresourceLayout layout;
2360         VkImageSubresource sub = {
2361             .aspectMask = p->extensions & EXT_DRM_MODIFIER_FLAGS ?
2362                           VK_IMAGE_ASPECT_MEMORY_PLANE_0_BIT_EXT :
2363                           VK_IMAGE_ASPECT_COLOR_BIT,
2364         };
2365         VkFormat plane_vkfmt = av_vkfmt_from_pixfmt(hwfc->sw_format)[i];
2366
2367         drm_desc->layers[i].format    = vulkan_fmt_to_drm(plane_vkfmt);
2368         drm_desc->layers[i].nb_planes = 1;
2369
2370         if (drm_desc->layers[i].format == DRM_FORMAT_INVALID) {
2371             av_log(hwfc, AV_LOG_ERROR, "Cannot map to DRM layer, unsupported!\n");
2372             err = AVERROR_PATCHWELCOME;
2373             goto end;
2374         }
2375
2376         drm_desc->layers[i].planes[0].object_index = FFMIN(i, drm_desc->nb_objects - 1);
2377
2378         if (f->tiling == VK_IMAGE_TILING_OPTIMAL)
2379             continue;
2380
2381         vkGetImageSubresourceLayout(hwctx->act_dev, f->img[i], &sub, &layout);
2382         drm_desc->layers[i].planes[0].offset       = layout.offset;
2383         drm_desc->layers[i].planes[0].pitch        = layout.rowPitch;
2384     }
2385
2386     dst->width   = src->width;
2387     dst->height  = src->height;
2388     dst->data[0] = (uint8_t *)drm_desc;
2389
2390     av_log(hwfc, AV_LOG_VERBOSE, "Mapped AVVkFrame to a DRM object!\n");
2391
2392     return 0;
2393
2394 end:
2395     av_free(drm_desc);
2396     return err;
2397 }
2398
2399 #if CONFIG_VAAPI
2400 static int vulkan_map_to_vaapi(AVHWFramesContext *hwfc, AVFrame *dst,
2401                                const AVFrame *src, int flags)
2402 {
2403     int err;
2404     AVFrame *tmp = av_frame_alloc();
2405     if (!tmp)
2406         return AVERROR(ENOMEM);
2407
2408     tmp->format = AV_PIX_FMT_DRM_PRIME;
2409
2410     err = vulkan_map_to_drm(hwfc, tmp, src, flags);
2411     if (err < 0)
2412         goto fail;
2413
2414     err = av_hwframe_map(dst, tmp, flags);
2415     if (err < 0)
2416         goto fail;
2417
2418     err = ff_hwframe_map_replace(dst, src);
2419
2420 fail:
2421     av_frame_free(&tmp);
2422     return err;
2423 }
2424 #endif
2425 #endif
2426
2427 static int vulkan_map_from(AVHWFramesContext *hwfc, AVFrame *dst,
2428                            const AVFrame *src, int flags)
2429 {
2430     av_unused VulkanDevicePriv *p = hwfc->device_ctx->internal->priv;
2431
2432     switch (dst->format) {
2433 #if CONFIG_LIBDRM
2434     case AV_PIX_FMT_DRM_PRIME:
2435         if (p->extensions & EXT_EXTERNAL_DMABUF_MEMORY)
2436             return vulkan_map_to_drm(hwfc, dst, src, flags);
2437 #if CONFIG_VAAPI
2438     case AV_PIX_FMT_VAAPI:
2439         if (p->extensions & EXT_EXTERNAL_DMABUF_MEMORY)
2440             return vulkan_map_to_vaapi(hwfc, dst, src, flags);
2441 #endif
2442 #endif
2443     default:
2444         return vulkan_map_frame_to_mem(hwfc, dst, src, flags);
2445     }
2446 }
2447
2448 typedef struct ImageBuffer {
2449     VkBuffer buf;
2450     VkDeviceMemory mem;
2451     VkMemoryPropertyFlagBits flags;
2452 } ImageBuffer;
2453
2454 static void free_buf(AVHWDeviceContext *ctx, ImageBuffer *buf)
2455 {
2456     AVVulkanDeviceContext *hwctx = ctx->hwctx;
2457     if (!buf)
2458         return;
2459
2460     vkDestroyBuffer(hwctx->act_dev, buf->buf, hwctx->alloc);
2461     vkFreeMemory(hwctx->act_dev, buf->mem, hwctx->alloc);
2462 }
2463
2464 static int create_buf(AVHWDeviceContext *ctx, ImageBuffer *buf, int height,
2465                       int *stride, VkBufferUsageFlags usage,
2466                       VkMemoryPropertyFlagBits flags, void *create_pnext,
2467                       void *alloc_pnext)
2468 {
2469     int err;
2470     VkResult ret;
2471     VkMemoryRequirements req;
2472     AVVulkanDeviceContext *hwctx = ctx->hwctx;
2473     VulkanDevicePriv *p = ctx->internal->priv;
2474
2475     VkBufferCreateInfo buf_spawn = {
2476         .sType       = VK_STRUCTURE_TYPE_BUFFER_CREATE_INFO,
2477         .pNext       = create_pnext,
2478         .usage       = usage,
2479         .sharingMode = VK_SHARING_MODE_EXCLUSIVE,
2480     };
2481
2482     *stride = FFALIGN(*stride, p->props.limits.optimalBufferCopyRowPitchAlignment);
2483     buf_spawn.size = height*(*stride);
2484
2485     ret = vkCreateBuffer(hwctx->act_dev, &buf_spawn, NULL, &buf->buf);
2486     if (ret != VK_SUCCESS) {
2487         av_log(ctx, AV_LOG_ERROR, "Failed to create buffer: %s\n",
2488                vk_ret2str(ret));
2489         return AVERROR_EXTERNAL;
2490     }
2491
2492     vkGetBufferMemoryRequirements(hwctx->act_dev, buf->buf, &req);
2493
2494     err = alloc_mem(ctx, &req, flags, alloc_pnext, &buf->flags, &buf->mem);
2495     if (err)
2496         return err;
2497
2498     ret = vkBindBufferMemory(hwctx->act_dev, buf->buf, buf->mem, 0);
2499     if (ret != VK_SUCCESS) {
2500         av_log(ctx, AV_LOG_ERROR, "Failed to bind memory to buffer: %s\n",
2501                vk_ret2str(ret));
2502         free_buf(ctx, buf);
2503         return AVERROR_EXTERNAL;
2504     }
2505
2506     return 0;
2507 }
2508
2509 static int map_buffers(AVHWDeviceContext *ctx, ImageBuffer *buf, uint8_t *mem[],
2510                        int nb_buffers, int invalidate)
2511 {
2512     VkResult ret;
2513     AVVulkanDeviceContext *hwctx = ctx->hwctx;
2514     VkMappedMemoryRange invalidate_ctx[AV_NUM_DATA_POINTERS];
2515     int invalidate_count = 0;
2516
2517     for (int i = 0; i < nb_buffers; i++) {
2518         ret = vkMapMemory(hwctx->act_dev, buf[i].mem, 0,
2519                           VK_WHOLE_SIZE, 0, (void **)&mem[i]);
2520         if (ret != VK_SUCCESS) {
2521             av_log(ctx, AV_LOG_ERROR, "Failed to map buffer memory: %s\n",
2522                    vk_ret2str(ret));
2523             return AVERROR_EXTERNAL;
2524         }
2525     }
2526
2527     if (!invalidate)
2528         return 0;
2529
2530     for (int i = 0; i < nb_buffers; i++) {
2531         const VkMappedMemoryRange ival_buf = {
2532             .sType  = VK_STRUCTURE_TYPE_MAPPED_MEMORY_RANGE,
2533             .memory = buf[i].mem,
2534             .size   = VK_WHOLE_SIZE,
2535         };
2536         if (buf[i].flags & VK_MEMORY_PROPERTY_HOST_COHERENT_BIT)
2537             continue;
2538         invalidate_ctx[invalidate_count++] = ival_buf;
2539     }
2540
2541     if (invalidate_count) {
2542         ret = vkInvalidateMappedMemoryRanges(hwctx->act_dev, invalidate_count,
2543                                              invalidate_ctx);
2544         if (ret != VK_SUCCESS)
2545             av_log(ctx, AV_LOG_WARNING, "Failed to invalidate memory: %s\n",
2546                    vk_ret2str(ret));
2547     }
2548
2549     return 0;
2550 }
2551
2552 static int unmap_buffers(AVHWDeviceContext *ctx, ImageBuffer *buf,
2553                          int nb_buffers, int flush)
2554 {
2555     int err = 0;
2556     VkResult ret;
2557     AVVulkanDeviceContext *hwctx = ctx->hwctx;
2558     VkMappedMemoryRange flush_ctx[AV_NUM_DATA_POINTERS];
2559     int flush_count = 0;
2560
2561     if (flush) {
2562         for (int i = 0; i < nb_buffers; i++) {
2563             const VkMappedMemoryRange flush_buf = {
2564                 .sType  = VK_STRUCTURE_TYPE_MAPPED_MEMORY_RANGE,
2565                 .memory = buf[i].mem,
2566                 .size   = VK_WHOLE_SIZE,
2567             };
2568             if (buf[i].flags & VK_MEMORY_PROPERTY_HOST_COHERENT_BIT)
2569                 continue;
2570             flush_ctx[flush_count++] = flush_buf;
2571         }
2572     }
2573
2574     if (flush_count) {
2575         ret = vkFlushMappedMemoryRanges(hwctx->act_dev, flush_count, flush_ctx);
2576         if (ret != VK_SUCCESS) {
2577             av_log(ctx, AV_LOG_ERROR, "Failed to flush memory: %s\n",
2578                     vk_ret2str(ret));
2579             err = AVERROR_EXTERNAL; /* We still want to try to unmap them */
2580         }
2581     }
2582
2583     for (int i = 0; i < nb_buffers; i++)
2584         vkUnmapMemory(hwctx->act_dev, buf[i].mem);
2585
2586     return err;
2587 }
2588
2589 static int transfer_image_buf(AVHWDeviceContext *ctx, AVVkFrame *frame,
2590                               ImageBuffer *buffer, const int *buf_stride, int w,
2591                               int h, enum AVPixelFormat pix_fmt, int to_buf)
2592 {
2593     VkResult ret;
2594     AVVulkanDeviceContext *hwctx = ctx->hwctx;
2595     VulkanDevicePriv *s = ctx->internal->priv;
2596
2597     int bar_num = 0;
2598     VkPipelineStageFlagBits sem_wait_dst[AV_NUM_DATA_POINTERS];
2599
2600     const int planes = av_pix_fmt_count_planes(pix_fmt);
2601     const AVPixFmtDescriptor *desc = av_pix_fmt_desc_get(pix_fmt);
2602
2603     VkCommandBufferBeginInfo cmd_start = {
2604         .sType = VK_STRUCTURE_TYPE_COMMAND_BUFFER_BEGIN_INFO,
2605         .flags = VK_COMMAND_BUFFER_USAGE_ONE_TIME_SUBMIT_BIT,
2606     };
2607
2608     VkImageMemoryBarrier img_bar[AV_NUM_DATA_POINTERS] = { 0 };
2609
2610     VkSubmitInfo s_info = {
2611         .sType                = VK_STRUCTURE_TYPE_SUBMIT_INFO,
2612         .commandBufferCount   = 1,
2613         .pCommandBuffers      = &s->cmd.buf,
2614         .pSignalSemaphores    = frame->sem,
2615         .pWaitSemaphores      = frame->sem,
2616         .pWaitDstStageMask    = sem_wait_dst,
2617         .signalSemaphoreCount = planes,
2618         .waitSemaphoreCount   = planes,
2619     };
2620
2621     ret = vkBeginCommandBuffer(s->cmd.buf, &cmd_start);
2622     if (ret != VK_SUCCESS) {
2623         av_log(ctx, AV_LOG_ERROR, "Unable to init command buffer: %s\n",
2624                vk_ret2str(ret));
2625         return AVERROR_EXTERNAL;
2626     }
2627
2628     /* Change the image layout to something more optimal for transfers */
2629     for (int i = 0; i < planes; i++) {
2630         VkImageLayout new_layout = to_buf ? VK_IMAGE_LAYOUT_TRANSFER_SRC_OPTIMAL :
2631                                             VK_IMAGE_LAYOUT_TRANSFER_DST_OPTIMAL;
2632         VkAccessFlags new_access = to_buf ? VK_ACCESS_TRANSFER_READ_BIT :
2633                                             VK_ACCESS_TRANSFER_WRITE_BIT;
2634
2635         sem_wait_dst[i] = VK_PIPELINE_STAGE_TOP_OF_PIPE_BIT;
2636
2637         /* If the layout matches and we have read access skip the barrier */
2638         if ((frame->layout[i] == new_layout) && (frame->access[i] & new_access))
2639             continue;
2640
2641         img_bar[bar_num].sType = VK_STRUCTURE_TYPE_IMAGE_MEMORY_BARRIER;
2642         img_bar[bar_num].srcAccessMask = 0x0;
2643         img_bar[bar_num].dstAccessMask = new_access;
2644         img_bar[bar_num].oldLayout = frame->layout[i];
2645         img_bar[bar_num].newLayout = new_layout;
2646         img_bar[bar_num].srcQueueFamilyIndex = VK_QUEUE_FAMILY_IGNORED;
2647         img_bar[bar_num].dstQueueFamilyIndex = VK_QUEUE_FAMILY_IGNORED;
2648         img_bar[bar_num].image = frame->img[i];
2649         img_bar[bar_num].subresourceRange.levelCount = 1;
2650         img_bar[bar_num].subresourceRange.layerCount = 1;
2651         img_bar[bar_num].subresourceRange.aspectMask = VK_IMAGE_ASPECT_COLOR_BIT;
2652
2653         frame->layout[i] = img_bar[bar_num].newLayout;
2654         frame->access[i] = img_bar[bar_num].dstAccessMask;
2655
2656         bar_num++;
2657     }
2658
2659     if (bar_num)
2660         vkCmdPipelineBarrier(s->cmd.buf, VK_PIPELINE_STAGE_TOP_OF_PIPE_BIT,
2661                              VK_PIPELINE_STAGE_TRANSFER_BIT, 0,
2662                              0, NULL, 0, NULL, bar_num, img_bar);
2663
2664     /* Schedule a copy for each plane */
2665     for (int i = 0; i < planes; i++) {
2666         const int p_w = i > 0 ? AV_CEIL_RSHIFT(w, desc->log2_chroma_w) : w;
2667         const int p_h = i > 0 ? AV_CEIL_RSHIFT(h, desc->log2_chroma_h) : h;
2668         VkBufferImageCopy buf_reg = {
2669             .bufferOffset = 0,
2670             /* Buffer stride isn't in bytes, it's in samples, the implementation
2671              * uses the image's VkFormat to know how many bytes per sample
2672              * the buffer has. So we have to convert by dividing. Stupid.
2673              * Won't work with YUVA or other planar formats with alpha. */
2674             .bufferRowLength = buf_stride[i] / desc->comp[i].step,
2675             .bufferImageHeight = p_h,
2676             .imageSubresource.layerCount = 1,
2677             .imageSubresource.aspectMask = VK_IMAGE_ASPECT_COLOR_BIT,
2678             .imageOffset = { 0, 0, 0, },
2679             .imageExtent = { p_w, p_h, 1, },
2680         };
2681
2682         if (to_buf)
2683             vkCmdCopyImageToBuffer(s->cmd.buf, frame->img[i], frame->layout[i],
2684                                    buffer[i].buf, 1, &buf_reg);
2685         else
2686             vkCmdCopyBufferToImage(s->cmd.buf, buffer[i].buf, frame->img[i],
2687                                    frame->layout[i], 1, &buf_reg);
2688     }
2689
2690     ret = vkEndCommandBuffer(s->cmd.buf);
2691     if (ret != VK_SUCCESS) {
2692         av_log(ctx, AV_LOG_ERROR, "Unable to finish command buffer: %s\n",
2693                vk_ret2str(ret));
2694         return AVERROR_EXTERNAL;
2695     }
2696
2697     /* Wait for the download/upload to finish if uploading, otherwise the
2698      * semaphore will take care of synchronization when uploading */
2699     ret = vkQueueSubmit(s->cmd.queue, 1, &s_info, s->cmd.fence);
2700     if (ret != VK_SUCCESS) {
2701         av_log(ctx, AV_LOG_ERROR, "Unable to submit command buffer: %s\n",
2702                vk_ret2str(ret));
2703         return AVERROR_EXTERNAL;
2704     } else {
2705         vkWaitForFences(hwctx->act_dev, 1, &s->cmd.fence, VK_TRUE, UINT64_MAX);
2706         vkResetFences(hwctx->act_dev, 1, &s->cmd.fence);
2707     }
2708
2709     return 0;
2710 }
2711
2712 /* Technically we can use VK_EXT_external_memory_host to upload and download,
2713  * however the alignment requirements make this unfeasible as both the pointer
2714  * and the size of each plane need to be aligned to the minimum alignment
2715  * requirement, which on all current implementations (anv, radv) is 4096.
2716  * If the requirement gets relaxed (unlikely) this can easily be implemented. */
2717 static int vulkan_transfer_data_from_mem(AVHWFramesContext *hwfc, AVFrame *dst,
2718                                          const AVFrame *src)
2719 {
2720     int err = 0;
2721     AVFrame tmp;
2722     AVVkFrame *f = (AVVkFrame *)dst->data[0];
2723     AVHWDeviceContext *dev_ctx = hwfc->device_ctx;
2724     ImageBuffer buf[AV_NUM_DATA_POINTERS] = { { 0 } };
2725     const int planes = av_pix_fmt_count_planes(src->format);
2726     int log2_chroma = av_pix_fmt_desc_get(src->format)->log2_chroma_h;
2727
2728     if ((src->format != AV_PIX_FMT_NONE && !av_vkfmt_from_pixfmt(src->format))) {
2729         av_log(hwfc, AV_LOG_ERROR, "Unsupported source pixel format!\n");
2730         return AVERROR(EINVAL);
2731     }
2732
2733     if (src->width > hwfc->width || src->height > hwfc->height)
2734         return AVERROR(EINVAL);
2735
2736     /* For linear, host visiable images */
2737     if (f->tiling == VK_IMAGE_TILING_LINEAR &&
2738         f->flags & VK_MEMORY_PROPERTY_HOST_VISIBLE_BIT) {
2739         AVFrame *map = av_frame_alloc();
2740         if (!map)
2741             return AVERROR(ENOMEM);
2742         map->format = src->format;
2743
2744         err = vulkan_map_frame_to_mem(hwfc, map, dst, AV_HWFRAME_MAP_WRITE);
2745         if (err)
2746             goto end;
2747
2748         err = av_frame_copy(map, src);
2749         av_frame_free(&map);
2750         goto end;
2751     }
2752
2753     /* Create buffers */
2754     for (int i = 0; i < planes; i++) {
2755         int h = src->height;
2756         int p_height = i > 0 ? AV_CEIL_RSHIFT(h, log2_chroma) : h;
2757
2758         tmp.linesize[i] = FFABS(src->linesize[i]);
2759         err = create_buf(dev_ctx, &buf[i], p_height,
2760                          &tmp.linesize[i], VK_BUFFER_USAGE_TRANSFER_SRC_BIT,
2761                          VK_MEMORY_PROPERTY_HOST_VISIBLE_BIT, NULL, NULL);
2762         if (err)
2763             goto end;
2764     }
2765
2766     /* Map, copy image to buffer, unmap */
2767     if ((err = map_buffers(dev_ctx, buf, tmp.data, planes, 0)))
2768         goto end;
2769
2770     av_image_copy(tmp.data, tmp.linesize, (const uint8_t **)src->data,
2771                   src->linesize, src->format, src->width, src->height);
2772
2773     if ((err = unmap_buffers(dev_ctx, buf, planes, 1)))
2774         goto end;
2775
2776     /* Copy buffers to image */
2777     err = transfer_image_buf(dev_ctx, f, buf, tmp.linesize,
2778                              src->width, src->height, src->format, 0);
2779
2780 end:
2781     for (int i = 0; i < planes; i++)
2782         free_buf(dev_ctx, &buf[i]);
2783
2784     return err;
2785 }
2786
2787 static int vulkan_transfer_data_to(AVHWFramesContext *hwfc, AVFrame *dst,
2788                                         const AVFrame *src)
2789 {
2790     av_unused VulkanDevicePriv *p = hwfc->device_ctx->internal->priv;
2791
2792     switch (src->format) {
2793 #if CONFIG_CUDA
2794     case AV_PIX_FMT_CUDA:
2795         if ((p->extensions & EXT_EXTERNAL_FD_MEMORY) &&
2796             (p->extensions & EXT_EXTERNAL_FD_SEM))
2797             return vulkan_transfer_data_from_cuda(hwfc, dst, src);
2798 #endif
2799     default:
2800         if (src->hw_frames_ctx)
2801             return AVERROR(ENOSYS);
2802         else
2803             return vulkan_transfer_data_from_mem(hwfc, dst, src);
2804     }
2805 }
2806
2807 #if CONFIG_CUDA
2808 static int vulkan_transfer_data_to_cuda(AVHWFramesContext *hwfc, AVFrame *dst,
2809                                       const AVFrame *src)
2810 {
2811     int err;
2812     VkResult ret;
2813     CUcontext dummy;
2814     AVVkFrame *dst_f;
2815     AVVkFrameInternal *dst_int;
2816     const int planes = av_pix_fmt_count_planes(hwfc->sw_format);
2817     const AVPixFmtDescriptor *desc = av_pix_fmt_desc_get(hwfc->sw_format);
2818
2819     AVHWFramesContext *cuda_fc = (AVHWFramesContext*)dst->hw_frames_ctx->data;
2820     AVHWDeviceContext *cuda_cu = cuda_fc->device_ctx;
2821     AVCUDADeviceContext *cuda_dev = cuda_cu->hwctx;
2822     AVCUDADeviceContextInternal *cu_internal = cuda_dev->internal;
2823     CudaFunctions *cu = cu_internal->cuda_dl;
2824
2825     ret = CHECK_CU(cu->cuCtxPushCurrent(cuda_dev->cuda_ctx));
2826     if (ret < 0) {
2827         err = AVERROR_EXTERNAL;
2828         goto fail;
2829     }
2830
2831     dst_f = (AVVkFrame *)src->data[0];
2832
2833     err = vulkan_export_to_cuda(hwfc, dst->hw_frames_ctx, src);
2834     if (err < 0) {
2835         goto fail;
2836     }
2837
2838     dst_int = dst_f->internal;
2839
2840     for (int i = 0; i < planes; i++) {
2841         CUDA_MEMCPY2D cpy = {
2842             .dstMemoryType = CU_MEMORYTYPE_DEVICE,
2843             .dstDevice     = (CUdeviceptr)dst->data[i],
2844             .dstPitch      = dst->linesize[i],
2845             .dstY          = 0,
2846
2847             .srcMemoryType = CU_MEMORYTYPE_ARRAY,
2848             .srcArray      = dst_int->cu_array[i],
2849             .WidthInBytes  = (i > 0 ? AV_CEIL_RSHIFT(hwfc->width, desc->log2_chroma_w)
2850                                     : hwfc->width) * desc->comp[i].step,
2851             .Height        = i > 0 ? AV_CEIL_RSHIFT(hwfc->height, desc->log2_chroma_h)
2852                                    : hwfc->height,
2853         };
2854
2855         ret = CHECK_CU(cu->cuMemcpy2DAsync(&cpy, cuda_dev->stream));
2856         if (ret < 0) {
2857             err = AVERROR_EXTERNAL;
2858             goto fail;
2859         }
2860     }
2861
2862     CHECK_CU(cu->cuCtxPopCurrent(&dummy));
2863
2864     av_log(hwfc, AV_LOG_VERBOSE, "Transfered Vulkan image to CUDA!\n");
2865
2866     return 0;
2867
2868 fail:
2869     CHECK_CU(cu->cuCtxPopCurrent(&dummy));
2870     vulkan_free_internal(dst_int);
2871     dst_f->internal = NULL;
2872     av_buffer_unref(&dst->buf[0]);
2873     return err;
2874 }
2875 #endif
2876
2877 static int vulkan_transfer_data_to_mem(AVHWFramesContext *hwfc, AVFrame *dst,
2878                                        const AVFrame *src)
2879 {
2880     int err = 0;
2881     AVFrame tmp;
2882     AVVkFrame *f = (AVVkFrame *)src->data[0];
2883     AVHWDeviceContext *dev_ctx = hwfc->device_ctx;
2884     ImageBuffer buf[AV_NUM_DATA_POINTERS] = { { 0 } };
2885     const int planes = av_pix_fmt_count_planes(dst->format);
2886     int log2_chroma = av_pix_fmt_desc_get(dst->format)->log2_chroma_h;
2887
2888     if (dst->width > hwfc->width || dst->height > hwfc->height)
2889         return AVERROR(EINVAL);
2890
2891     /* For linear, host visiable images */
2892     if (f->tiling == VK_IMAGE_TILING_LINEAR &&
2893         f->flags & VK_MEMORY_PROPERTY_HOST_VISIBLE_BIT) {
2894         AVFrame *map = av_frame_alloc();
2895         if (!map)
2896             return AVERROR(ENOMEM);
2897         map->format = dst->format;
2898
2899         err = vulkan_map_frame_to_mem(hwfc, map, src, AV_HWFRAME_MAP_READ);
2900         if (err)
2901             return err;
2902
2903         err = av_frame_copy(dst, map);
2904         av_frame_free(&map);
2905         return err;
2906     }
2907
2908     /* Create buffers */
2909     for (int i = 0; i < planes; i++) {
2910         int h = dst->height;
2911         int p_height = i > 0 ? AV_CEIL_RSHIFT(h, log2_chroma) : h;
2912
2913         tmp.linesize[i] = FFABS(dst->linesize[i]);
2914         err = create_buf(dev_ctx, &buf[i], p_height,
2915                          &tmp.linesize[i], VK_BUFFER_USAGE_TRANSFER_DST_BIT,
2916                          VK_MEMORY_PROPERTY_HOST_VISIBLE_BIT, NULL, NULL);
2917     }
2918
2919     /* Copy image to buffer */
2920     if ((err = transfer_image_buf(dev_ctx, f, buf, tmp.linesize,
2921                                   dst->width, dst->height, dst->format, 1)))
2922         goto end;
2923
2924     /* Map, copy buffer to frame, unmap */
2925     if ((err = map_buffers(dev_ctx, buf, tmp.data, planes, 1)))
2926         goto end;
2927
2928     av_image_copy(dst->data, dst->linesize, (const uint8_t **)tmp.data,
2929                   tmp.linesize, dst->format, dst->width, dst->height);
2930
2931     err = unmap_buffers(dev_ctx, buf, planes, 0);
2932
2933 end:
2934     for (int i = 0; i < planes; i++)
2935         free_buf(dev_ctx, &buf[i]);
2936
2937     return err;
2938 }
2939
2940 static int vulkan_transfer_data_from(AVHWFramesContext *hwfc, AVFrame *dst,
2941                                      const AVFrame *src)
2942 {
2943     av_unused VulkanDevicePriv *p = hwfc->device_ctx->internal->priv;
2944
2945     switch (dst->format) {
2946 #if CONFIG_CUDA
2947     case AV_PIX_FMT_CUDA:
2948         if ((p->extensions & EXT_EXTERNAL_FD_MEMORY) &&
2949             (p->extensions & EXT_EXTERNAL_FD_SEM))
2950             return vulkan_transfer_data_to_cuda(hwfc, dst, src);
2951 #endif
2952     default:
2953         if (dst->hw_frames_ctx)
2954             return AVERROR(ENOSYS);
2955         else
2956             return vulkan_transfer_data_to_mem(hwfc, dst, src);
2957     }
2958 }
2959
2960 AVVkFrame *av_vk_frame_alloc(void)
2961 {
2962     return av_mallocz(sizeof(AVVkFrame));
2963 }
2964
2965 const HWContextType ff_hwcontext_type_vulkan = {
2966     .type                   = AV_HWDEVICE_TYPE_VULKAN,
2967     .name                   = "Vulkan",
2968
2969     .device_hwctx_size      = sizeof(AVVulkanDeviceContext),
2970     .device_priv_size       = sizeof(VulkanDevicePriv),
2971     .frames_hwctx_size      = sizeof(AVVulkanFramesContext),
2972     .frames_priv_size       = sizeof(VulkanFramesPriv),
2973
2974     .device_init            = &vulkan_device_init,
2975     .device_create          = &vulkan_device_create,
2976     .device_derive          = &vulkan_device_derive,
2977
2978     .frames_get_constraints = &vulkan_frames_get_constraints,
2979     .frames_init            = vulkan_frames_init,
2980     .frames_get_buffer      = vulkan_get_buffer,
2981     .frames_uninit          = vulkan_frames_uninit,
2982
2983     .transfer_get_formats   = vulkan_transfer_get_formats,
2984     .transfer_data_to       = vulkan_transfer_data_to,
2985     .transfer_data_from     = vulkan_transfer_data_from,
2986
2987     .map_to                 = vulkan_map_to,
2988     .map_from               = vulkan_map_from,
2989
2990     .pix_fmts = (const enum AVPixelFormat []) {
2991         AV_PIX_FMT_VULKAN,
2992         AV_PIX_FMT_NONE
2993     },
2994 };