]> git.sesse.net Git - ffmpeg/blob - libavutil/hwcontext_vulkan.c
hwcontext_vulkan: expose enabled device and instance extensions
[ffmpeg] / libavutil / hwcontext_vulkan.c
1 /*
2  * This file is part of FFmpeg.
3  *
4  * FFmpeg is free software; you can redistribute it and/or
5  * modify it under the terms of the GNU Lesser General Public
6  * License as published by the Free Software Foundation; either
7  * version 2.1 of the License, or (at your option) any later version.
8  *
9  * FFmpeg is distributed in the hope that it will be useful,
10  * but WITHOUT ANY WARRANTY; without even the implied warranty of
11  * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
12  * Lesser General Public License for more details.
13  *
14  * You should have received a copy of the GNU Lesser General Public
15  * License along with FFmpeg; if not, write to the Free Software
16  * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
17  */
18
19 #include "config.h"
20 #include "pixdesc.h"
21 #include "avstring.h"
22 #include "imgutils.h"
23 #include "hwcontext.h"
24 #include "hwcontext_internal.h"
25 #include "hwcontext_vulkan.h"
26
27 #if CONFIG_LIBDRM
28 #include <unistd.h>
29 #include <xf86drm.h>
30 #include <drm_fourcc.h>
31 #include "hwcontext_drm.h"
32 #if CONFIG_VAAPI
33 #include <va/va_drmcommon.h>
34 #include "hwcontext_vaapi.h"
35 #endif
36 #endif
37
38 #if CONFIG_CUDA
39 #include "hwcontext_cuda_internal.h"
40 #include "cuda_check.h"
41 #define CHECK_CU(x) FF_CUDA_CHECK_DL(cuda_cu, cu, x)
42 #endif
43
44 typedef struct VulkanExecCtx {
45     VkCommandPool pool;
46     VkCommandBuffer buf;
47     VkQueue queue;
48     VkFence fence;
49 } VulkanExecCtx;
50
51 typedef struct VulkanDevicePriv {
52     /* Properties */
53     VkPhysicalDeviceProperties props;
54     VkPhysicalDeviceMemoryProperties mprops;
55
56     /* Debug callback */
57     VkDebugUtilsMessengerEXT debug_ctx;
58
59     /* Image uploading */
60     VulkanExecCtx cmd;
61
62     /* Extensions */
63     uint64_t extensions;
64
65     /* Settings */
66     int use_linear_images;
67
68     /* Nvidia */
69     int dev_is_nvidia;
70 } VulkanDevicePriv;
71
72 typedef struct VulkanFramesPriv {
73     VulkanExecCtx cmd;
74 } VulkanFramesPriv;
75
76 typedef struct AVVkFrameInternal {
77 #if CONFIG_CUDA
78     /* Importing external memory into cuda is really expensive so we keep the
79      * memory imported all the time */
80     AVBufferRef *cuda_fc_ref; /* Need to keep it around for uninit */
81     CUexternalMemory ext_mem[AV_NUM_DATA_POINTERS];
82     CUmipmappedArray cu_mma[AV_NUM_DATA_POINTERS];
83     CUarray cu_array[AV_NUM_DATA_POINTERS];
84     CUexternalSemaphore cu_sem;
85 #endif
86 } AVVkFrameInternal;
87
88 #define VK_LOAD_PFN(inst, name) PFN_##name pfn_##name = (PFN_##name)           \
89                                               vkGetInstanceProcAddr(inst, #name)
90
91 #define DEFAULT_USAGE_FLAGS (VK_IMAGE_USAGE_SAMPLED_BIT      |                 \
92                              VK_IMAGE_USAGE_STORAGE_BIT      |                 \
93                              VK_IMAGE_USAGE_TRANSFER_SRC_BIT |                 \
94                              VK_IMAGE_USAGE_TRANSFER_DST_BIT)
95
96 #define ADD_VAL_TO_LIST(list, count, val)                                      \
97     do {                                                                       \
98         list = av_realloc_array(list, sizeof(*list), ++count);                 \
99         if (!list) {                                                           \
100             err = AVERROR(ENOMEM);                                             \
101             goto fail;                                                         \
102         }                                                                      \
103         list[count - 1] = av_strdup(val);                                      \
104         if (!list[count - 1]) {                                                \
105             err = AVERROR(ENOMEM);                                             \
106             goto fail;                                                         \
107         }                                                                      \
108     } while(0)
109
110 static const struct {
111     enum AVPixelFormat pixfmt;
112     const VkFormat vkfmts[3];
113 } vk_pixfmt_map[] = {
114     { AV_PIX_FMT_GRAY8,   { VK_FORMAT_R8_UNORM } },
115     { AV_PIX_FMT_GRAY16,  { VK_FORMAT_R16_UNORM } },
116     { AV_PIX_FMT_GRAYF32, { VK_FORMAT_R32_SFLOAT } },
117
118     { AV_PIX_FMT_NV12, { VK_FORMAT_R8_UNORM, VK_FORMAT_R8G8_UNORM } },
119     { AV_PIX_FMT_P010, { VK_FORMAT_R16_UNORM, VK_FORMAT_R16G16_UNORM } },
120     { AV_PIX_FMT_P016, { VK_FORMAT_R16_UNORM, VK_FORMAT_R16G16_UNORM } },
121
122     { AV_PIX_FMT_YUV420P, { VK_FORMAT_R8_UNORM, VK_FORMAT_R8_UNORM, VK_FORMAT_R8_UNORM } },
123     { AV_PIX_FMT_YUV422P, { VK_FORMAT_R8_UNORM, VK_FORMAT_R8_UNORM, VK_FORMAT_R8_UNORM } },
124     { AV_PIX_FMT_YUV444P, { VK_FORMAT_R8_UNORM, VK_FORMAT_R8_UNORM, VK_FORMAT_R8_UNORM } },
125
126     { AV_PIX_FMT_YUV420P16, { VK_FORMAT_R16_UNORM, VK_FORMAT_R16_UNORM, VK_FORMAT_R16_UNORM } },
127     { AV_PIX_FMT_YUV422P16, { VK_FORMAT_R16_UNORM, VK_FORMAT_R16_UNORM, VK_FORMAT_R16_UNORM } },
128     { AV_PIX_FMT_YUV444P16, { VK_FORMAT_R16_UNORM, VK_FORMAT_R16_UNORM, VK_FORMAT_R16_UNORM } },
129
130     { AV_PIX_FMT_ABGR,   { VK_FORMAT_A8B8G8R8_UNORM_PACK32 } },
131     { AV_PIX_FMT_BGRA,   { VK_FORMAT_B8G8R8A8_UNORM } },
132     { AV_PIX_FMT_RGBA,   { VK_FORMAT_R8G8B8A8_UNORM } },
133     { AV_PIX_FMT_RGB24,  { VK_FORMAT_R8G8B8_UNORM } },
134     { AV_PIX_FMT_BGR24,  { VK_FORMAT_B8G8R8_UNORM } },
135     { AV_PIX_FMT_RGB48,  { VK_FORMAT_R16G16B16_UNORM } },
136     { AV_PIX_FMT_RGBA64, { VK_FORMAT_R16G16B16A16_UNORM } },
137     { AV_PIX_FMT_RGB565, { VK_FORMAT_R5G6B5_UNORM_PACK16 } },
138     { AV_PIX_FMT_BGR565, { VK_FORMAT_B5G6R5_UNORM_PACK16 } },
139     { AV_PIX_FMT_BGR0,   { VK_FORMAT_B8G8R8A8_UNORM } },
140     { AV_PIX_FMT_0BGR,   { VK_FORMAT_A8B8G8R8_UNORM_PACK32 } },
141     { AV_PIX_FMT_RGB0,   { VK_FORMAT_R8G8B8A8_UNORM } },
142
143     { AV_PIX_FMT_GBRPF32, { VK_FORMAT_R32_SFLOAT, VK_FORMAT_R32_SFLOAT, VK_FORMAT_R32_SFLOAT } },
144 };
145
146 const VkFormat *av_vkfmt_from_pixfmt(enum AVPixelFormat p)
147 {
148     for (enum AVPixelFormat i = 0; i < FF_ARRAY_ELEMS(vk_pixfmt_map); i++)
149         if (vk_pixfmt_map[i].pixfmt == p)
150             return vk_pixfmt_map[i].vkfmts;
151     return NULL;
152 }
153
154 static int pixfmt_is_supported(AVVulkanDeviceContext *hwctx, enum AVPixelFormat p,
155                                int linear)
156 {
157     const VkFormat *fmt = av_vkfmt_from_pixfmt(p);
158     int planes = av_pix_fmt_count_planes(p);
159
160     if (!fmt)
161         return 0;
162
163     for (int i = 0; i < planes; i++) {
164         VkFormatFeatureFlags flags;
165         VkFormatProperties2 prop = {
166             .sType = VK_STRUCTURE_TYPE_FORMAT_PROPERTIES_2,
167         };
168         vkGetPhysicalDeviceFormatProperties2(hwctx->phys_dev, fmt[i], &prop);
169         flags = linear ? prop.formatProperties.linearTilingFeatures :
170                          prop.formatProperties.optimalTilingFeatures;
171         if (!(flags & DEFAULT_USAGE_FLAGS))
172             return 0;
173     }
174
175     return 1;
176 }
177
178 enum VulkanExtensions {
179     EXT_EXTERNAL_DMABUF_MEMORY = 1ULL <<  0, /* VK_EXT_external_memory_dma_buf */
180     EXT_DRM_MODIFIER_FLAGS     = 1ULL <<  1, /* VK_EXT_image_drm_format_modifier */
181     EXT_EXTERNAL_FD_MEMORY     = 1ULL <<  2, /* VK_KHR_external_memory_fd */
182     EXT_EXTERNAL_FD_SEM        = 1ULL <<  3, /* VK_KHR_external_semaphore_fd */
183
184     EXT_OPTIONAL               = 1ULL << 62,
185     EXT_REQUIRED               = 1ULL << 63,
186 };
187
188 typedef struct VulkanOptExtension {
189     const char *name;
190     uint64_t flag;
191 } VulkanOptExtension;
192
193 static const VulkanOptExtension optional_instance_exts[] = {
194     { VK_KHR_SURFACE_EXTENSION_NAME, EXT_OPTIONAL },
195 };
196
197 static const VulkanOptExtension optional_device_exts[] = {
198     { VK_KHR_EXTERNAL_MEMORY_FD_EXTENSION_NAME,               EXT_EXTERNAL_FD_MEMORY,     },
199     { VK_EXT_EXTERNAL_MEMORY_DMA_BUF_EXTENSION_NAME,          EXT_EXTERNAL_DMABUF_MEMORY, },
200     { VK_EXT_IMAGE_DRM_FORMAT_MODIFIER_EXTENSION_NAME,        EXT_DRM_MODIFIER_FLAGS,     },
201     { VK_KHR_EXTERNAL_SEMAPHORE_FD_EXTENSION_NAME,            EXT_EXTERNAL_FD_SEM,        },
202 };
203
204 /* Converts return values to strings */
205 static const char *vk_ret2str(VkResult res)
206 {
207 #define CASE(VAL) case VAL: return #VAL
208     switch (res) {
209     CASE(VK_SUCCESS);
210     CASE(VK_NOT_READY);
211     CASE(VK_TIMEOUT);
212     CASE(VK_EVENT_SET);
213     CASE(VK_EVENT_RESET);
214     CASE(VK_INCOMPLETE);
215     CASE(VK_ERROR_OUT_OF_HOST_MEMORY);
216     CASE(VK_ERROR_OUT_OF_DEVICE_MEMORY);
217     CASE(VK_ERROR_INITIALIZATION_FAILED);
218     CASE(VK_ERROR_DEVICE_LOST);
219     CASE(VK_ERROR_MEMORY_MAP_FAILED);
220     CASE(VK_ERROR_LAYER_NOT_PRESENT);
221     CASE(VK_ERROR_EXTENSION_NOT_PRESENT);
222     CASE(VK_ERROR_FEATURE_NOT_PRESENT);
223     CASE(VK_ERROR_INCOMPATIBLE_DRIVER);
224     CASE(VK_ERROR_TOO_MANY_OBJECTS);
225     CASE(VK_ERROR_FORMAT_NOT_SUPPORTED);
226     CASE(VK_ERROR_FRAGMENTED_POOL);
227     CASE(VK_ERROR_SURFACE_LOST_KHR);
228     CASE(VK_ERROR_NATIVE_WINDOW_IN_USE_KHR);
229     CASE(VK_SUBOPTIMAL_KHR);
230     CASE(VK_ERROR_OUT_OF_DATE_KHR);
231     CASE(VK_ERROR_INCOMPATIBLE_DISPLAY_KHR);
232     CASE(VK_ERROR_VALIDATION_FAILED_EXT);
233     CASE(VK_ERROR_INVALID_SHADER_NV);
234     CASE(VK_ERROR_OUT_OF_POOL_MEMORY);
235     CASE(VK_ERROR_INVALID_EXTERNAL_HANDLE);
236     CASE(VK_ERROR_NOT_PERMITTED_EXT);
237     CASE(VK_ERROR_INVALID_DRM_FORMAT_MODIFIER_PLANE_LAYOUT_EXT);
238     CASE(VK_ERROR_INVALID_DEVICE_ADDRESS_EXT);
239     CASE(VK_ERROR_FULL_SCREEN_EXCLUSIVE_MODE_LOST_EXT);
240     default: return "Unknown error";
241     }
242 #undef CASE
243 }
244
245 static VkBool32 vk_dbg_callback(VkDebugUtilsMessageSeverityFlagBitsEXT severity,
246                                 VkDebugUtilsMessageTypeFlagsEXT messageType,
247                                 const VkDebugUtilsMessengerCallbackDataEXT *data,
248                                 void *priv)
249 {
250     int l;
251     AVHWDeviceContext *ctx = priv;
252
253     switch (severity) {
254     case VK_DEBUG_UTILS_MESSAGE_SEVERITY_VERBOSE_BIT_EXT: l = AV_LOG_VERBOSE; break;
255     case VK_DEBUG_UTILS_MESSAGE_SEVERITY_INFO_BIT_EXT:    l = AV_LOG_INFO;    break;
256     case VK_DEBUG_UTILS_MESSAGE_SEVERITY_WARNING_BIT_EXT: l = AV_LOG_WARNING; break;
257     case VK_DEBUG_UTILS_MESSAGE_SEVERITY_ERROR_BIT_EXT:   l = AV_LOG_ERROR;   break;
258     default:                                              l = AV_LOG_DEBUG;   break;
259     }
260
261     av_log(ctx, l, "%s\n", data->pMessage);
262     for (int i = 0; i < data->cmdBufLabelCount; i++)
263         av_log(ctx, l, "\t%i: %s\n", i, data->pCmdBufLabels[i].pLabelName);
264
265     return 0;
266 }
267
268 static int check_extensions(AVHWDeviceContext *ctx, int dev, AVDictionary *opts,
269                             const char * const **dst, uint32_t *num, int debug)
270 {
271     const char *tstr;
272     const char **extension_names = NULL;
273     VulkanDevicePriv *p = ctx->internal->priv;
274     AVVulkanDeviceContext *hwctx = ctx->hwctx;
275     int err = 0, found, extensions_found = 0;
276
277     const char *mod;
278     int optional_exts_num;
279     uint32_t sup_ext_count;
280     char *user_exts_str = NULL;
281     AVDictionaryEntry *user_exts;
282     VkExtensionProperties *sup_ext;
283     const VulkanOptExtension *optional_exts;
284
285     if (!dev) {
286         mod = "instance";
287         optional_exts = optional_instance_exts;
288         optional_exts_num = FF_ARRAY_ELEMS(optional_instance_exts);
289         user_exts = av_dict_get(opts, "instance_extensions", NULL, 0);
290         if (user_exts) {
291             user_exts_str = av_strdup(user_exts->value);
292             if (!user_exts_str) {
293                 err = AVERROR(ENOMEM);
294                 goto fail;
295             }
296         }
297         vkEnumerateInstanceExtensionProperties(NULL, &sup_ext_count, NULL);
298         sup_ext = av_malloc_array(sup_ext_count, sizeof(VkExtensionProperties));
299         if (!sup_ext)
300             return AVERROR(ENOMEM);
301         vkEnumerateInstanceExtensionProperties(NULL, &sup_ext_count, sup_ext);
302     } else {
303         mod = "device";
304         optional_exts = optional_device_exts;
305         optional_exts_num = FF_ARRAY_ELEMS(optional_device_exts);
306         user_exts = av_dict_get(opts, "device_extensions", NULL, 0);
307         if (user_exts) {
308             user_exts_str = av_strdup(user_exts->value);
309             if (!user_exts_str) {
310                 err = AVERROR(ENOMEM);
311                 goto fail;
312             }
313         }
314         vkEnumerateDeviceExtensionProperties(hwctx->phys_dev, NULL,
315                                              &sup_ext_count, NULL);
316         sup_ext = av_malloc_array(sup_ext_count, sizeof(VkExtensionProperties));
317         if (!sup_ext)
318             return AVERROR(ENOMEM);
319         vkEnumerateDeviceExtensionProperties(hwctx->phys_dev, NULL,
320                                              &sup_ext_count, sup_ext);
321     }
322
323     for (int i = 0; i < optional_exts_num; i++) {
324         int req = optional_exts[i].flag & EXT_REQUIRED;
325         tstr = optional_exts[i].name;
326
327         found = 0;
328         for (int j = 0; j < sup_ext_count; j++) {
329             if (!strcmp(tstr, sup_ext[j].extensionName)) {
330                 found = 1;
331                 break;
332             }
333         }
334         if (!found) {
335             int lvl = req ? AV_LOG_ERROR : AV_LOG_VERBOSE;
336             av_log(ctx, lvl, "Extension \"%s\" not found!\n", tstr);
337             if (req) {
338                 err = AVERROR(EINVAL);
339                 goto end;
340             }
341             continue;
342         }
343         if (!req)
344             p->extensions |= optional_exts[i].flag;
345
346         av_log(ctx, AV_LOG_VERBOSE, "Using %s extension \"%s\"\n", mod, tstr);
347
348         ADD_VAL_TO_LIST(extension_names, extensions_found, tstr);
349     }
350
351     if (debug && !dev) {
352         tstr = VK_EXT_DEBUG_UTILS_EXTENSION_NAME;
353         found = 0;
354         for (int j = 0; j < sup_ext_count; j++) {
355             if (!strcmp(tstr, sup_ext[j].extensionName)) {
356                 found = 1;
357                 break;
358             }
359         }
360         if (found) {
361             av_log(ctx, AV_LOG_VERBOSE, "Using %s extension \"%s\"\n", mod, tstr);
362             ADD_VAL_TO_LIST(extension_names, extensions_found, tstr);
363         } else {
364             av_log(ctx, AV_LOG_ERROR, "Debug extension \"%s\" not found!\n",
365                    tstr);
366             err = AVERROR(EINVAL);
367             goto fail;
368         }
369     }
370
371     if (user_exts_str) {
372         char *save, *token = av_strtok(user_exts_str, "+", &save);
373         while (token) {
374             found = 0;
375             for (int j = 0; j < sup_ext_count; j++) {
376                 if (!strcmp(token, sup_ext[j].extensionName)) {
377                     found = 1;
378                     break;
379                 }
380             }
381             if (found) {
382                 av_log(ctx, AV_LOG_VERBOSE, "Using %s extension \"%s\"\n", mod, tstr);
383                 ADD_VAL_TO_LIST(extension_names, extensions_found, token);
384             } else {
385                 av_log(ctx, AV_LOG_ERROR, "%s extension \"%s\" not found!\n",
386                        mod, token);
387                 err = AVERROR(EINVAL);
388                 goto fail;
389             }
390             token = av_strtok(NULL, "+", &save);
391         }
392     }
393
394     *dst = extension_names;
395     *num = extensions_found;
396
397     av_free(user_exts_str);
398     av_free(sup_ext);
399     return 0;
400
401 fail:
402     if (extension_names)
403         for (int i = 0; i < extensions_found; i++)
404             av_free((void *)extension_names[i]);
405     av_free(extension_names);
406     av_free(user_exts_str);
407     av_free(sup_ext);
408     return err;
409 }
410
411 /* Creates a VkInstance */
412 static int create_instance(AVHWDeviceContext *ctx, AVDictionary *opts)
413 {
414     int err = 0;
415     VkResult ret;
416     VulkanDevicePriv *p = ctx->internal->priv;
417     AVVulkanDeviceContext *hwctx = ctx->hwctx;
418     AVDictionaryEntry *debug_opt = av_dict_get(opts, "debug", NULL, 0);
419     const int debug_mode = debug_opt && strtol(debug_opt->value, NULL, 10);
420     VkApplicationInfo application_info = {
421         .sType              = VK_STRUCTURE_TYPE_APPLICATION_INFO,
422         .pEngineName        = "libavutil",
423         .apiVersion         = VK_API_VERSION_1_1,
424         .engineVersion      = VK_MAKE_VERSION(LIBAVUTIL_VERSION_MAJOR,
425                                               LIBAVUTIL_VERSION_MINOR,
426                                               LIBAVUTIL_VERSION_MICRO),
427     };
428     VkInstanceCreateInfo inst_props = {
429         .sType            = VK_STRUCTURE_TYPE_INSTANCE_CREATE_INFO,
430         .pApplicationInfo = &application_info,
431     };
432
433     /* Check for present/missing extensions */
434     err = check_extensions(ctx, 0, opts, &inst_props.ppEnabledExtensionNames,
435                            &inst_props.enabledExtensionCount, debug_mode);
436     if (err < 0)
437         return err;
438
439     if (debug_mode) {
440         static const char *layers[] = { "VK_LAYER_LUNARG_standard_validation" };
441         inst_props.ppEnabledLayerNames = layers;
442         inst_props.enabledLayerCount = FF_ARRAY_ELEMS(layers);
443     }
444
445     /* Try to create the instance */
446     ret = vkCreateInstance(&inst_props, hwctx->alloc, &hwctx->inst);
447
448     /* Check for errors */
449     if (ret != VK_SUCCESS) {
450         av_log(ctx, AV_LOG_ERROR, "Instance creation failure: %s\n",
451                vk_ret2str(ret));
452         for (int i = 0; i < inst_props.enabledExtensionCount; i++)
453             av_free((void *)inst_props.ppEnabledExtensionNames[i]);
454         av_free((void *)inst_props.ppEnabledExtensionNames);
455         return AVERROR_EXTERNAL;
456     }
457
458     if (debug_mode) {
459         VkDebugUtilsMessengerCreateInfoEXT dbg = {
460             .sType = VK_STRUCTURE_TYPE_DEBUG_UTILS_MESSENGER_CREATE_INFO_EXT,
461             .messageSeverity = VK_DEBUG_UTILS_MESSAGE_SEVERITY_VERBOSE_BIT_EXT |
462                                VK_DEBUG_UTILS_MESSAGE_SEVERITY_INFO_BIT_EXT    |
463                                VK_DEBUG_UTILS_MESSAGE_SEVERITY_WARNING_BIT_EXT |
464                                VK_DEBUG_UTILS_MESSAGE_SEVERITY_ERROR_BIT_EXT,
465             .messageType = VK_DEBUG_UTILS_MESSAGE_TYPE_GENERAL_BIT_EXT    |
466                            VK_DEBUG_UTILS_MESSAGE_TYPE_VALIDATION_BIT_EXT |
467                            VK_DEBUG_UTILS_MESSAGE_TYPE_PERFORMANCE_BIT_EXT,
468             .pfnUserCallback = vk_dbg_callback,
469             .pUserData = ctx,
470         };
471         VK_LOAD_PFN(hwctx->inst, vkCreateDebugUtilsMessengerEXT);
472
473         pfn_vkCreateDebugUtilsMessengerEXT(hwctx->inst, &dbg,
474                                            hwctx->alloc, &p->debug_ctx);
475     }
476
477     hwctx->enabled_inst_extensions = inst_props.ppEnabledExtensionNames;
478     hwctx->nb_enabled_inst_extensions = inst_props.enabledExtensionCount;
479
480     return 0;
481 }
482
483 typedef struct VulkanDeviceSelection {
484     uint8_t uuid[VK_UUID_SIZE]; /* Will use this first unless !has_uuid */
485     int has_uuid;
486     const char *name; /* Will use this second unless NULL */
487     uint32_t pci_device; /* Will use this third unless 0x0 */
488     uint32_t vendor_id; /* Last resort to find something deterministic */
489     int index; /* Finally fall back to index */
490 } VulkanDeviceSelection;
491
492 static const char *vk_dev_type(enum VkPhysicalDeviceType type)
493 {
494     switch (type) {
495     case VK_PHYSICAL_DEVICE_TYPE_INTEGRATED_GPU: return "integrated";
496     case VK_PHYSICAL_DEVICE_TYPE_DISCRETE_GPU:   return "discrete";
497     case VK_PHYSICAL_DEVICE_TYPE_VIRTUAL_GPU:    return "virtual";
498     case VK_PHYSICAL_DEVICE_TYPE_CPU:            return "software";
499     default:                                     return "unknown";
500     }
501 }
502
503 /* Finds a device */
504 static int find_device(AVHWDeviceContext *ctx, VulkanDeviceSelection *select)
505 {
506     int err = 0, choice = -1;
507     uint32_t num;
508     VkResult ret;
509     VkPhysicalDevice *devices = NULL;
510     VkPhysicalDeviceIDProperties *idp = NULL;
511     VkPhysicalDeviceProperties2 *prop = NULL;
512     VulkanDevicePriv *p = ctx->internal->priv;
513     AVVulkanDeviceContext *hwctx = ctx->hwctx;
514
515     ret = vkEnumeratePhysicalDevices(hwctx->inst, &num, NULL);
516     if (ret != VK_SUCCESS || !num) {
517         av_log(ctx, AV_LOG_ERROR, "No devices found: %s!\n", vk_ret2str(ret));
518         return AVERROR(ENODEV);
519     }
520
521     devices = av_malloc_array(num, sizeof(VkPhysicalDevice));
522     if (!devices)
523         return AVERROR(ENOMEM);
524
525     ret = vkEnumeratePhysicalDevices(hwctx->inst, &num, devices);
526     if (ret != VK_SUCCESS) {
527         av_log(ctx, AV_LOG_ERROR, "Failed enumerating devices: %s\n",
528                vk_ret2str(ret));
529         err = AVERROR(ENODEV);
530         goto end;
531     }
532
533     prop = av_mallocz_array(num, sizeof(*prop));
534     if (!prop) {
535         err = AVERROR(ENOMEM);
536         goto end;
537     }
538
539     idp = av_mallocz_array(num, sizeof(*idp));
540     if (!idp) {
541         err = AVERROR(ENOMEM);
542         goto end;
543     }
544
545     av_log(ctx, AV_LOG_VERBOSE, "GPU listing:\n");
546     for (int i = 0; i < num; i++) {
547         idp[i].sType = VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_ID_PROPERTIES;
548         prop[i].sType = VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_PROPERTIES_2;
549         prop[i].pNext = &idp[i];
550
551         vkGetPhysicalDeviceProperties2(devices[i], &prop[i]);
552         av_log(ctx, AV_LOG_VERBOSE, "    %d: %s (%s) (0x%x)\n", i,
553                prop[i].properties.deviceName,
554                vk_dev_type(prop[i].properties.deviceType),
555                prop[i].properties.deviceID);
556     }
557
558     if (select->has_uuid) {
559         for (int i = 0; i < num; i++) {
560             if (!strncmp(idp[i].deviceUUID, select->uuid, VK_UUID_SIZE)) {
561                 choice = i;
562                 goto end;
563              }
564         }
565         av_log(ctx, AV_LOG_ERROR, "Unable to find device by given UUID!\n");
566         err = AVERROR(ENODEV);
567         goto end;
568     } else if (select->name) {
569         av_log(ctx, AV_LOG_VERBOSE, "Requested device: %s\n", select->name);
570         for (int i = 0; i < num; i++) {
571             if (strstr(prop[i].properties.deviceName, select->name)) {
572                 choice = i;
573                 goto end;
574              }
575         }
576         av_log(ctx, AV_LOG_ERROR, "Unable to find device \"%s\"!\n",
577                select->name);
578         err = AVERROR(ENODEV);
579         goto end;
580     } else if (select->pci_device) {
581         av_log(ctx, AV_LOG_VERBOSE, "Requested device: 0x%x\n", select->pci_device);
582         for (int i = 0; i < num; i++) {
583             if (select->pci_device == prop[i].properties.deviceID) {
584                 choice = i;
585                 goto end;
586             }
587         }
588         av_log(ctx, AV_LOG_ERROR, "Unable to find device with PCI ID 0x%x!\n",
589                select->pci_device);
590         err = AVERROR(EINVAL);
591         goto end;
592     } else if (select->vendor_id) {
593         av_log(ctx, AV_LOG_VERBOSE, "Requested vendor: 0x%x\n", select->vendor_id);
594         for (int i = 0; i < num; i++) {
595             if (select->vendor_id == prop[i].properties.vendorID) {
596                 choice = i;
597                 goto end;
598             }
599         }
600         av_log(ctx, AV_LOG_ERROR, "Unable to find device with Vendor ID 0x%x!\n",
601                select->vendor_id);
602         err = AVERROR(ENODEV);
603         goto end;
604     } else {
605         if (select->index < num) {
606             choice = select->index;
607             goto end;
608         }
609         av_log(ctx, AV_LOG_ERROR, "Unable to find device with index %i!\n",
610                select->index);
611         err = AVERROR(ENODEV);
612         goto end;
613     }
614
615 end:
616     if (choice > -1) {
617         p->dev_is_nvidia = (prop[choice].properties.vendorID == 0x10de);
618         hwctx->phys_dev = devices[choice];
619     }
620     av_free(devices);
621     av_free(prop);
622     av_free(idp);
623
624     return err;
625 }
626
627 static int search_queue_families(AVHWDeviceContext *ctx, VkDeviceCreateInfo *cd)
628 {
629     uint32_t num;
630     VkQueueFamilyProperties *qs = NULL;
631     AVVulkanDeviceContext *hwctx = ctx->hwctx;
632     int graph_index = -1, comp_index = -1, tx_index = -1;
633     VkDeviceQueueCreateInfo *pc = (VkDeviceQueueCreateInfo *)cd->pQueueCreateInfos;
634
635     /* First get the number of queue families */
636     vkGetPhysicalDeviceQueueFamilyProperties(hwctx->phys_dev, &num, NULL);
637     if (!num) {
638         av_log(ctx, AV_LOG_ERROR, "Failed to get queues!\n");
639         return AVERROR_EXTERNAL;
640     }
641
642     /* Then allocate memory */
643     qs = av_malloc_array(num, sizeof(VkQueueFamilyProperties));
644     if (!qs)
645         return AVERROR(ENOMEM);
646
647     /* Finally retrieve the queue families */
648     vkGetPhysicalDeviceQueueFamilyProperties(hwctx->phys_dev, &num, qs);
649
650 #define SEARCH_FLAGS(expr, out)                                                \
651     for (int i = 0; i < num; i++) {                                            \
652         const VkQueueFlagBits flags = qs[i].queueFlags;                        \
653         if (expr) {                                                            \
654             out = i;                                                           \
655             break;                                                             \
656         }                                                                      \
657     }
658
659     SEARCH_FLAGS(flags & VK_QUEUE_GRAPHICS_BIT, graph_index)
660
661     SEARCH_FLAGS((flags &  VK_QUEUE_COMPUTE_BIT) && (i != graph_index),
662                  comp_index)
663
664     SEARCH_FLAGS((flags & VK_QUEUE_TRANSFER_BIT) && (i != graph_index) &&
665                  (i != comp_index), tx_index)
666
667 #undef SEARCH_FLAGS
668 #define QF_FLAGS(flags)                                                        \
669     ((flags) & VK_QUEUE_GRAPHICS_BIT      ) ? "(graphics) " : "",              \
670     ((flags) & VK_QUEUE_COMPUTE_BIT       ) ? "(compute) "  : "",              \
671     ((flags) & VK_QUEUE_TRANSFER_BIT      ) ? "(transfer) " : "",              \
672     ((flags) & VK_QUEUE_SPARSE_BINDING_BIT) ? "(sparse) "   : ""
673
674     av_log(ctx, AV_LOG_VERBOSE, "Using queue family %i for graphics, "
675            "flags: %s%s%s%s\n", graph_index, QF_FLAGS(qs[graph_index].queueFlags));
676
677     hwctx->queue_family_index      = graph_index;
678     hwctx->queue_family_tx_index   = graph_index;
679     hwctx->queue_family_comp_index = graph_index;
680
681     pc[cd->queueCreateInfoCount++].queueFamilyIndex = graph_index;
682
683     if (comp_index != -1) {
684         av_log(ctx, AV_LOG_VERBOSE, "Using queue family %i for compute, "
685                "flags: %s%s%s%s\n", comp_index, QF_FLAGS(qs[comp_index].queueFlags));
686         hwctx->queue_family_tx_index                    = comp_index;
687         hwctx->queue_family_comp_index                  = comp_index;
688         pc[cd->queueCreateInfoCount++].queueFamilyIndex = comp_index;
689     }
690
691     if (tx_index != -1) {
692         av_log(ctx, AV_LOG_VERBOSE, "Using queue family %i for transfers, "
693                "flags: %s%s%s%s\n", tx_index, QF_FLAGS(qs[tx_index].queueFlags));
694         hwctx->queue_family_tx_index                    = tx_index;
695         pc[cd->queueCreateInfoCount++].queueFamilyIndex = tx_index;
696     }
697
698 #undef QF_FLAGS
699
700     av_free(qs);
701
702     return 0;
703 }
704
705 static int create_exec_ctx(AVHWDeviceContext *ctx, VulkanExecCtx *cmd,
706                            int queue_family_index)
707 {
708     VkResult ret;
709     AVVulkanDeviceContext *hwctx = ctx->hwctx;
710
711     VkCommandPoolCreateInfo cqueue_create = {
712         .sType              = VK_STRUCTURE_TYPE_COMMAND_POOL_CREATE_INFO,
713         .flags              = VK_COMMAND_POOL_CREATE_RESET_COMMAND_BUFFER_BIT,
714         .queueFamilyIndex   = queue_family_index,
715     };
716     VkCommandBufferAllocateInfo cbuf_create = {
717         .sType              = VK_STRUCTURE_TYPE_COMMAND_BUFFER_ALLOCATE_INFO,
718         .level              = VK_COMMAND_BUFFER_LEVEL_PRIMARY,
719         .commandBufferCount = 1,
720     };
721
722     VkFenceCreateInfo fence_spawn = {
723         .sType = VK_STRUCTURE_TYPE_FENCE_CREATE_INFO,
724     };
725
726     ret = vkCreateFence(hwctx->act_dev, &fence_spawn,
727                         hwctx->alloc, &cmd->fence);
728     if (ret != VK_SUCCESS) {
729         av_log(ctx, AV_LOG_ERROR, "Failed to create frame fence: %s\n",
730                vk_ret2str(ret));
731         return AVERROR_EXTERNAL;
732     }
733
734     ret = vkCreateCommandPool(hwctx->act_dev, &cqueue_create,
735                               hwctx->alloc, &cmd->pool);
736     if (ret != VK_SUCCESS) {
737         av_log(ctx, AV_LOG_ERROR, "Command pool creation failure: %s\n",
738                vk_ret2str(ret));
739         return AVERROR_EXTERNAL;
740     }
741
742     cbuf_create.commandPool = cmd->pool;
743
744     ret = vkAllocateCommandBuffers(hwctx->act_dev, &cbuf_create, &cmd->buf);
745     if (ret != VK_SUCCESS) {
746         av_log(ctx, AV_LOG_ERROR, "Command buffer alloc failure: %s\n",
747                vk_ret2str(ret));
748         return AVERROR_EXTERNAL;
749     }
750
751     vkGetDeviceQueue(hwctx->act_dev, cqueue_create.queueFamilyIndex, 0,
752                      &cmd->queue);
753
754     return 0;
755 }
756
757 static void free_exec_ctx(AVHWDeviceContext *ctx, VulkanExecCtx *cmd)
758 {
759     AVVulkanDeviceContext *hwctx = ctx->hwctx;
760
761     if (cmd->fence)
762         vkDestroyFence(hwctx->act_dev, cmd->fence, hwctx->alloc);
763     if (cmd->buf)
764         vkFreeCommandBuffers(hwctx->act_dev, cmd->pool, 1, &cmd->buf);
765     if (cmd->pool)
766         vkDestroyCommandPool(hwctx->act_dev, cmd->pool, hwctx->alloc);
767 }
768
769 static void vulkan_device_free(AVHWDeviceContext *ctx)
770 {
771     VulkanDevicePriv *p = ctx->internal->priv;
772     AVVulkanDeviceContext *hwctx = ctx->hwctx;
773
774     free_exec_ctx(ctx, &p->cmd);
775
776     vkDestroyDevice(hwctx->act_dev, hwctx->alloc);
777
778     if (p->debug_ctx) {
779         VK_LOAD_PFN(hwctx->inst, vkDestroyDebugUtilsMessengerEXT);
780         pfn_vkDestroyDebugUtilsMessengerEXT(hwctx->inst, p->debug_ctx,
781                                             hwctx->alloc);
782     }
783
784     vkDestroyInstance(hwctx->inst, hwctx->alloc);
785
786     for (int i = 0; i < hwctx->nb_enabled_inst_extensions; i++)
787         av_free((void *)hwctx->enabled_inst_extensions[i]);
788     av_free((void *)hwctx->enabled_inst_extensions);
789
790     for (int i = 0; i < hwctx->nb_enabled_dev_extensions; i++)
791         av_free((void *)hwctx->enabled_dev_extensions[i]);
792     av_free((void *)hwctx->enabled_dev_extensions);
793 }
794
795 static int vulkan_device_create_internal(AVHWDeviceContext *ctx,
796                                          VulkanDeviceSelection *dev_select,
797                                          AVDictionary *opts, int flags)
798 {
799     int err = 0;
800     VkResult ret;
801     AVDictionaryEntry *opt_d;
802     VulkanDevicePriv *p = ctx->internal->priv;
803     AVVulkanDeviceContext *hwctx = ctx->hwctx;
804     VkDeviceQueueCreateInfo queue_create_info[3] = {
805         {   .sType            = VK_STRUCTURE_TYPE_DEVICE_QUEUE_CREATE_INFO,
806             .pQueuePriorities = (float []){ 1.0f },
807             .queueCount       = 1, },
808         {   .sType            = VK_STRUCTURE_TYPE_DEVICE_QUEUE_CREATE_INFO,
809             .pQueuePriorities = (float []){ 1.0f },
810             .queueCount       = 1, },
811         {   .sType            = VK_STRUCTURE_TYPE_DEVICE_QUEUE_CREATE_INFO,
812             .pQueuePriorities = (float []){ 1.0f },
813             .queueCount       = 1, },
814     };
815
816     VkDeviceCreateInfo dev_info = {
817         .sType                = VK_STRUCTURE_TYPE_DEVICE_CREATE_INFO,
818         .pQueueCreateInfos    = queue_create_info,
819         .queueCreateInfoCount = 0,
820     };
821
822     ctx->free = vulkan_device_free;
823
824     /* Create an instance if not given one */
825     if ((err = create_instance(ctx, opts)))
826         goto end;
827
828     /* Find a device (if not given one) */
829     if ((err = find_device(ctx, dev_select)))
830         goto end;
831
832     vkGetPhysicalDeviceProperties(hwctx->phys_dev, &p->props);
833     av_log(ctx, AV_LOG_VERBOSE, "Using device: %s\n", p->props.deviceName);
834     av_log(ctx, AV_LOG_VERBOSE, "Alignments:\n");
835     av_log(ctx, AV_LOG_VERBOSE, "    optimalBufferCopyOffsetAlignment:   %li\n",
836            p->props.limits.optimalBufferCopyOffsetAlignment);
837     av_log(ctx, AV_LOG_VERBOSE, "    optimalBufferCopyRowPitchAlignment: %li\n",
838            p->props.limits.optimalBufferCopyRowPitchAlignment);
839     av_log(ctx, AV_LOG_VERBOSE, "    minMemoryMapAlignment:              %li\n",
840            p->props.limits.minMemoryMapAlignment);
841
842     /* Search queue family */
843     if ((err = search_queue_families(ctx, &dev_info)))
844         goto end;
845
846     if ((err = check_extensions(ctx, 1, opts, &dev_info.ppEnabledExtensionNames,
847                                 &dev_info.enabledExtensionCount, 0)))
848         goto end;
849
850     ret = vkCreateDevice(hwctx->phys_dev, &dev_info, hwctx->alloc,
851                          &hwctx->act_dev);
852
853     if (ret != VK_SUCCESS) {
854         av_log(ctx, AV_LOG_ERROR, "Device creation failure: %s\n",
855                vk_ret2str(ret));
856         for (int i = 0; i < dev_info.enabledExtensionCount; i++)
857             av_free((void *)dev_info.ppEnabledExtensionNames[i]);
858         av_free((void *)dev_info.ppEnabledExtensionNames);
859         err = AVERROR_EXTERNAL;
860         goto end;
861     }
862
863     /* Tiled images setting, use them by default */
864     opt_d = av_dict_get(opts, "linear_images", NULL, 0);
865     if (opt_d)
866         p->use_linear_images = strtol(opt_d->value, NULL, 10);
867
868     hwctx->enabled_dev_extensions = dev_info.ppEnabledExtensionNames;
869     hwctx->nb_enabled_dev_extensions = dev_info.enabledExtensionCount;
870
871 end:
872     return err;
873 }
874
875 static int vulkan_device_init(AVHWDeviceContext *ctx)
876 {
877     int err;
878     uint32_t queue_num;
879     AVVulkanDeviceContext *hwctx = ctx->hwctx;
880     VulkanDevicePriv *p = ctx->internal->priv;
881
882     /* Set device extension flags */
883     for (int i = 0; i < hwctx->nb_enabled_dev_extensions; i++) {
884         for (int j = 0; j < FF_ARRAY_ELEMS(optional_device_exts); j++) {
885             if (!strcmp(hwctx->enabled_dev_extensions[i],
886                         optional_device_exts[j].name)) {
887                 p->extensions |= optional_device_exts[j].flag;
888                 break;
889             }
890         }
891     }
892
893     vkGetPhysicalDeviceQueueFamilyProperties(hwctx->phys_dev, &queue_num, NULL);
894     if (!queue_num) {
895         av_log(ctx, AV_LOG_ERROR, "Failed to get queues!\n");
896         return AVERROR_EXTERNAL;
897     }
898
899 #define CHECK_QUEUE(type, n)                                                         \
900 if (n >= queue_num) {                                                                \
901     av_log(ctx, AV_LOG_ERROR, "Invalid %s queue index %i (device has %i queues)!\n", \
902            type, n, queue_num);                                                      \
903     return AVERROR(EINVAL);                                                          \
904 }
905
906     CHECK_QUEUE("graphics", hwctx->queue_family_index)
907     CHECK_QUEUE("upload",   hwctx->queue_family_tx_index)
908     CHECK_QUEUE("compute",  hwctx->queue_family_comp_index)
909
910 #undef CHECK_QUEUE
911
912     /* Create exec context - if there's something invalid this will error out */
913     err = create_exec_ctx(ctx, &p->cmd, hwctx->queue_family_tx_index);
914     if (err)
915         return err;
916
917     /* Get device capabilities */
918     vkGetPhysicalDeviceMemoryProperties(hwctx->phys_dev, &p->mprops);
919
920     return 0;
921 }
922
923 static int vulkan_device_create(AVHWDeviceContext *ctx, const char *device,
924                                 AVDictionary *opts, int flags)
925 {
926     VulkanDeviceSelection dev_select = { 0 };
927     if (device && device[0]) {
928         char *end = NULL;
929         dev_select.index = strtol(device, &end, 10);
930         if (end == device) {
931             dev_select.index = 0;
932             dev_select.name  = device;
933         }
934     }
935
936     return vulkan_device_create_internal(ctx, &dev_select, opts, flags);
937 }
938
939 static int vulkan_device_derive(AVHWDeviceContext *ctx,
940                                 AVHWDeviceContext *src_ctx, int flags)
941 {
942     av_unused VulkanDeviceSelection dev_select = { 0 };
943
944     /* If there's only one device on the system, then even if its not covered
945      * by the following checks (e.g. non-PCIe ARM GPU), having an empty
946      * dev_select will mean it'll get picked. */
947     switch(src_ctx->type) {
948 #if CONFIG_LIBDRM
949 #if CONFIG_VAAPI
950     case AV_HWDEVICE_TYPE_VAAPI: {
951         AVVAAPIDeviceContext *src_hwctx = src_ctx->hwctx;
952
953         const char *vendor = vaQueryVendorString(src_hwctx->display);
954         if (!vendor) {
955             av_log(ctx, AV_LOG_ERROR, "Unable to get device info from VAAPI!\n");
956             return AVERROR_EXTERNAL;
957         }
958
959         if (strstr(vendor, "Intel"))
960             dev_select.vendor_id = 0x8086;
961         if (strstr(vendor, "AMD"))
962             dev_select.vendor_id = 0x1002;
963
964         return vulkan_device_create_internal(ctx, &dev_select, NULL, flags);
965     }
966 #endif
967     case AV_HWDEVICE_TYPE_DRM: {
968         AVDRMDeviceContext *src_hwctx = src_ctx->hwctx;
969
970         drmDevice *drm_dev_info;
971         int err = drmGetDevice(src_hwctx->fd, &drm_dev_info);
972         if (err) {
973             av_log(ctx, AV_LOG_ERROR, "Unable to get device info from DRM fd!\n");
974             return AVERROR_EXTERNAL;
975         }
976
977         if (drm_dev_info->bustype == DRM_BUS_PCI)
978             dev_select.pci_device = drm_dev_info->deviceinfo.pci->device_id;
979
980         drmFreeDevice(&drm_dev_info);
981
982         return vulkan_device_create_internal(ctx, &dev_select, NULL, flags);
983     }
984 #endif
985 #if CONFIG_CUDA
986     case AV_HWDEVICE_TYPE_CUDA: {
987         AVHWDeviceContext *cuda_cu = src_ctx;
988         AVCUDADeviceContext *src_hwctx = src_ctx->hwctx;
989         AVCUDADeviceContextInternal *cu_internal = src_hwctx->internal;
990         CudaFunctions *cu = cu_internal->cuda_dl;
991
992         int ret = CHECK_CU(cu->cuDeviceGetUuid((CUuuid *)&dev_select.uuid,
993                                                cu_internal->cuda_device));
994         if (ret < 0) {
995             av_log(ctx, AV_LOG_ERROR, "Unable to get UUID from CUDA!\n");
996             return AVERROR_EXTERNAL;
997         }
998
999         dev_select.has_uuid = 1;
1000
1001         return vulkan_device_create_internal(ctx, &dev_select, NULL, flags);
1002     }
1003 #endif
1004     default:
1005         return AVERROR(ENOSYS);
1006     }
1007 }
1008
1009 static int vulkan_frames_get_constraints(AVHWDeviceContext *ctx,
1010                                          const void *hwconfig,
1011                                          AVHWFramesConstraints *constraints)
1012 {
1013     int count = 0;
1014     AVVulkanDeviceContext *hwctx = ctx->hwctx;
1015     VulkanDevicePriv *p = ctx->internal->priv;
1016
1017     for (enum AVPixelFormat i = 0; i < AV_PIX_FMT_NB; i++)
1018         count += pixfmt_is_supported(hwctx, i, p->use_linear_images);
1019
1020 #if CONFIG_CUDA
1021     if (p->dev_is_nvidia)
1022         count++;
1023 #endif
1024
1025     constraints->valid_sw_formats = av_malloc_array(count + 1,
1026                                                     sizeof(enum AVPixelFormat));
1027     if (!constraints->valid_sw_formats)
1028         return AVERROR(ENOMEM);
1029
1030     count = 0;
1031     for (enum AVPixelFormat i = 0; i < AV_PIX_FMT_NB; i++)
1032         if (pixfmt_is_supported(hwctx, i, p->use_linear_images))
1033             constraints->valid_sw_formats[count++] = i;
1034
1035 #if CONFIG_CUDA
1036     if (p->dev_is_nvidia)
1037         constraints->valid_sw_formats[count++] = AV_PIX_FMT_CUDA;
1038 #endif
1039     constraints->valid_sw_formats[count++] = AV_PIX_FMT_NONE;
1040
1041     constraints->min_width  = 0;
1042     constraints->min_height = 0;
1043     constraints->max_width  = p->props.limits.maxImageDimension2D;
1044     constraints->max_height = p->props.limits.maxImageDimension2D;
1045
1046     constraints->valid_hw_formats = av_malloc_array(2, sizeof(enum AVPixelFormat));
1047     if (!constraints->valid_hw_formats)
1048         return AVERROR(ENOMEM);
1049
1050     constraints->valid_hw_formats[0] = AV_PIX_FMT_VULKAN;
1051     constraints->valid_hw_formats[1] = AV_PIX_FMT_NONE;
1052
1053     return 0;
1054 }
1055
1056 static int alloc_mem(AVHWDeviceContext *ctx, VkMemoryRequirements *req,
1057                      VkMemoryPropertyFlagBits req_flags, void *alloc_extension,
1058                      VkMemoryPropertyFlagBits *mem_flags, VkDeviceMemory *mem)
1059 {
1060     VkResult ret;
1061     int index = -1;
1062     VulkanDevicePriv *p = ctx->internal->priv;
1063     AVVulkanDeviceContext *dev_hwctx = ctx->hwctx;
1064     VkMemoryAllocateInfo alloc_info = {
1065         .sType           = VK_STRUCTURE_TYPE_MEMORY_ALLOCATE_INFO,
1066         .pNext           = alloc_extension,
1067     };
1068
1069     /* Align if we need to */
1070     if (req_flags & VK_MEMORY_PROPERTY_HOST_VISIBLE_BIT)
1071         req->size = FFALIGN(req->size, p->props.limits.minMemoryMapAlignment);
1072
1073     alloc_info.allocationSize = req->size;
1074
1075     /* The vulkan spec requires memory types to be sorted in the "optimal"
1076      * order, so the first matching type we find will be the best/fastest one */
1077     for (int i = 0; i < p->mprops.memoryTypeCount; i++) {
1078         /* The memory type must be supported by the requirements (bitfield) */
1079         if (!(req->memoryTypeBits & (1 << i)))
1080             continue;
1081
1082         /* The memory type flags must include our properties */
1083         if ((p->mprops.memoryTypes[i].propertyFlags & req_flags) != req_flags)
1084             continue;
1085
1086         /* Found a suitable memory type */
1087         index = i;
1088         break;
1089     }
1090
1091     if (index < 0) {
1092         av_log(ctx, AV_LOG_ERROR, "No memory type found for flags 0x%x\n",
1093                req_flags);
1094         return AVERROR(EINVAL);
1095     }
1096
1097     alloc_info.memoryTypeIndex = index;
1098
1099     ret = vkAllocateMemory(dev_hwctx->act_dev, &alloc_info,
1100                            dev_hwctx->alloc, mem);
1101     if (ret != VK_SUCCESS) {
1102         av_log(ctx, AV_LOG_ERROR, "Failed to allocate memory: %s\n",
1103                vk_ret2str(ret));
1104         return AVERROR(ENOMEM);
1105     }
1106
1107     *mem_flags |= p->mprops.memoryTypes[index].propertyFlags;
1108
1109     return 0;
1110 }
1111
1112 static void vulkan_free_internal(AVVkFrameInternal *internal)
1113 {
1114     if (!internal)
1115         return;
1116
1117 #if CONFIG_CUDA
1118     if (internal->cuda_fc_ref) {
1119         AVHWFramesContext *cuda_fc = (AVHWFramesContext *)internal->cuda_fc_ref->data;
1120         int planes = av_pix_fmt_count_planes(cuda_fc->sw_format);
1121         AVHWDeviceContext *cuda_cu = cuda_fc->device_ctx;
1122         AVCUDADeviceContext *cuda_dev = cuda_cu->hwctx;
1123         AVCUDADeviceContextInternal *cu_internal = cuda_dev->internal;
1124         CudaFunctions *cu = cu_internal->cuda_dl;
1125
1126         if (internal->cu_sem)
1127             CHECK_CU(cu->cuDestroyExternalSemaphore(internal->cu_sem));
1128
1129         for (int i = 0; i < planes; i++) {
1130             if (internal->cu_mma[i])
1131                 CHECK_CU(cu->cuMipmappedArrayDestroy(internal->cu_mma[i]));
1132             if (internal->ext_mem[i])
1133                 CHECK_CU(cu->cuDestroyExternalMemory(internal->ext_mem[i]));
1134         }
1135
1136         av_buffer_unref(&internal->cuda_fc_ref);
1137     }
1138 #endif
1139
1140     av_free(internal);
1141 }
1142
1143 static void vulkan_frame_free(void *opaque, uint8_t *data)
1144 {
1145     AVVkFrame *f = (AVVkFrame *)data;
1146     AVHWFramesContext *hwfc = opaque;
1147     AVVulkanDeviceContext *hwctx = hwfc->device_ctx->hwctx;
1148     int planes = av_pix_fmt_count_planes(hwfc->sw_format);
1149
1150     vulkan_free_internal(f->internal);
1151
1152     for (int i = 0; i < planes; i++) {
1153         vkDestroyImage(hwctx->act_dev, f->img[i], hwctx->alloc);
1154         vkFreeMemory(hwctx->act_dev, f->mem[i], hwctx->alloc);
1155     }
1156
1157     vkDestroySemaphore(hwctx->act_dev, f->sem, hwctx->alloc);
1158
1159     av_free(f);
1160 }
1161
1162 static int alloc_bind_mem(AVHWFramesContext *hwfc, AVVkFrame *f,
1163                           void *alloc_pnext, size_t alloc_pnext_stride)
1164 {
1165     int err;
1166     VkResult ret;
1167     AVHWDeviceContext *ctx = hwfc->device_ctx;
1168     const int planes = av_pix_fmt_count_planes(hwfc->sw_format);
1169     VkBindImageMemoryInfo bind_info[AV_NUM_DATA_POINTERS] = { { 0 } };
1170
1171     AVVulkanDeviceContext *hwctx = ctx->hwctx;
1172
1173     for (int i = 0; i < planes; i++) {
1174         int use_ded_mem;
1175         VkImageMemoryRequirementsInfo2 req_desc = {
1176             .sType = VK_STRUCTURE_TYPE_IMAGE_MEMORY_REQUIREMENTS_INFO_2,
1177             .image = f->img[i],
1178         };
1179         VkMemoryDedicatedAllocateInfo ded_alloc = {
1180             .sType = VK_STRUCTURE_TYPE_MEMORY_DEDICATED_ALLOCATE_INFO,
1181             .pNext = (void *)(((uint8_t *)alloc_pnext) + i*alloc_pnext_stride),
1182         };
1183         VkMemoryDedicatedRequirements ded_req = {
1184             .sType = VK_STRUCTURE_TYPE_MEMORY_DEDICATED_REQUIREMENTS,
1185         };
1186         VkMemoryRequirements2 req = {
1187             .sType = VK_STRUCTURE_TYPE_MEMORY_REQUIREMENTS_2,
1188             .pNext = &ded_req,
1189         };
1190
1191         vkGetImageMemoryRequirements2(hwctx->act_dev, &req_desc, &req);
1192
1193         /* In case the implementation prefers/requires dedicated allocation */
1194         use_ded_mem = ded_req.prefersDedicatedAllocation |
1195                       ded_req.requiresDedicatedAllocation;
1196         if (use_ded_mem)
1197             ded_alloc.image = f->img[i];
1198
1199         /* Allocate memory */
1200         if ((err = alloc_mem(ctx, &req.memoryRequirements,
1201                              f->tiling == VK_IMAGE_TILING_LINEAR ?
1202                              VK_MEMORY_PROPERTY_HOST_VISIBLE_BIT :
1203                              VK_MEMORY_PROPERTY_DEVICE_LOCAL_BIT,
1204                              use_ded_mem ? &ded_alloc : (void *)ded_alloc.pNext,
1205                              &f->flags, &f->mem[i])))
1206             return err;
1207
1208         f->size[i] = req.memoryRequirements.size;
1209         bind_info[i].sType  = VK_STRUCTURE_TYPE_BIND_IMAGE_MEMORY_INFO;
1210         bind_info[i].image  = f->img[i];
1211         bind_info[i].memory = f->mem[i];
1212     }
1213
1214     /* Bind the allocated memory to the images */
1215     ret = vkBindImageMemory2(hwctx->act_dev, planes, bind_info);
1216     if (ret != VK_SUCCESS) {
1217         av_log(ctx, AV_LOG_ERROR, "Failed to bind memory: %s\n",
1218                vk_ret2str(ret));
1219         return AVERROR_EXTERNAL;
1220     }
1221
1222     return 0;
1223 }
1224
1225 enum PrepMode {
1226     PREP_MODE_WRITE,
1227     PREP_MODE_RO_SHADER,
1228 };
1229
1230 static int prepare_frame(AVHWFramesContext *hwfc, VulkanExecCtx *ectx,
1231                          AVVkFrame *frame, enum PrepMode pmode)
1232 {
1233     VkResult ret;
1234     VkImageLayout new_layout;
1235     VkAccessFlags new_access;
1236     AVHWDeviceContext *ctx = hwfc->device_ctx;
1237     AVVulkanDeviceContext *hwctx = ctx->hwctx;
1238     const int planes = av_pix_fmt_count_planes(hwfc->sw_format);
1239
1240     VkImageMemoryBarrier img_bar[AV_NUM_DATA_POINTERS] = { 0 };
1241
1242     VkCommandBufferBeginInfo cmd_start = {
1243         .sType = VK_STRUCTURE_TYPE_COMMAND_BUFFER_BEGIN_INFO,
1244         .flags = VK_COMMAND_BUFFER_USAGE_ONE_TIME_SUBMIT_BIT,
1245     };
1246
1247     VkSubmitInfo s_info = {
1248         .sType                = VK_STRUCTURE_TYPE_SUBMIT_INFO,
1249         .commandBufferCount   = 1,
1250         .pCommandBuffers      = &ectx->buf,
1251
1252         .pSignalSemaphores    = &frame->sem,
1253         .signalSemaphoreCount = 1,
1254     };
1255
1256     switch (pmode) {
1257     case PREP_MODE_WRITE:
1258         new_layout = VK_IMAGE_LAYOUT_TRANSFER_DST_OPTIMAL;
1259         new_access = VK_ACCESS_TRANSFER_WRITE_BIT;
1260         break;
1261     case PREP_MODE_RO_SHADER:
1262         new_layout = VK_IMAGE_LAYOUT_TRANSFER_SRC_OPTIMAL;
1263         new_access = VK_ACCESS_TRANSFER_READ_BIT;
1264         break;
1265     }
1266
1267     ret = vkBeginCommandBuffer(ectx->buf, &cmd_start);
1268     if (ret != VK_SUCCESS)
1269         return AVERROR_EXTERNAL;
1270
1271     /* Change the image layout to something more optimal for writes.
1272      * This also signals the newly created semaphore, making it usable
1273      * for synchronization */
1274     for (int i = 0; i < planes; i++) {
1275         img_bar[i].sType = VK_STRUCTURE_TYPE_IMAGE_MEMORY_BARRIER;
1276         img_bar[i].srcAccessMask = 0x0;
1277         img_bar[i].dstAccessMask = new_access;
1278         img_bar[i].oldLayout = frame->layout[i];
1279         img_bar[i].newLayout = new_layout;
1280         img_bar[i].srcQueueFamilyIndex = VK_QUEUE_FAMILY_IGNORED;
1281         img_bar[i].dstQueueFamilyIndex = VK_QUEUE_FAMILY_IGNORED;
1282         img_bar[i].image = frame->img[i];
1283         img_bar[i].subresourceRange.levelCount = 1;
1284         img_bar[i].subresourceRange.layerCount = 1;
1285         img_bar[i].subresourceRange.aspectMask = VK_IMAGE_ASPECT_COLOR_BIT;
1286
1287         frame->layout[i] = img_bar[i].newLayout;
1288         frame->access[i] = img_bar[i].dstAccessMask;
1289     }
1290
1291     vkCmdPipelineBarrier(ectx->buf, VK_PIPELINE_STAGE_TOP_OF_PIPE_BIT,
1292                          VK_PIPELINE_STAGE_TRANSFER_BIT, 0,
1293                          0, NULL, 0, NULL, planes, img_bar);
1294
1295     ret = vkEndCommandBuffer(ectx->buf);
1296     if (ret != VK_SUCCESS)
1297         return AVERROR_EXTERNAL;
1298
1299     ret = vkQueueSubmit(ectx->queue, 1, &s_info, ectx->fence);
1300     if (ret != VK_SUCCESS) {
1301         return AVERROR_EXTERNAL;
1302     } else {
1303         vkWaitForFences(hwctx->act_dev, 1, &ectx->fence, VK_TRUE, UINT64_MAX);
1304         vkResetFences(hwctx->act_dev, 1, &ectx->fence);
1305     }
1306
1307     return 0;
1308 }
1309
1310 static int create_frame(AVHWFramesContext *hwfc, AVVkFrame **frame,
1311                         VkImageTiling tiling, VkImageUsageFlagBits usage,
1312                         void *create_pnext)
1313 {
1314     int err;
1315     VkResult ret;
1316     AVHWDeviceContext *ctx = hwfc->device_ctx;
1317     VulkanDevicePriv *p = ctx->internal->priv;
1318     AVVulkanDeviceContext *hwctx = ctx->hwctx;
1319     enum AVPixelFormat format = hwfc->sw_format;
1320     const VkFormat *img_fmts = av_vkfmt_from_pixfmt(format);
1321     const int planes = av_pix_fmt_count_planes(format);
1322
1323     VkExportSemaphoreCreateInfo ext_sem_info = {
1324         .sType = VK_STRUCTURE_TYPE_EXPORT_SEMAPHORE_CREATE_INFO,
1325         .handleTypes = VK_EXTERNAL_SEMAPHORE_HANDLE_TYPE_OPAQUE_FD_BIT,
1326     };
1327
1328     VkSemaphoreCreateInfo sem_spawn = {
1329         .sType = VK_STRUCTURE_TYPE_SEMAPHORE_CREATE_INFO,
1330         .pNext = p->extensions & EXT_EXTERNAL_FD_SEM ? &ext_sem_info : NULL,
1331     };
1332
1333     AVVkFrame *f = av_vk_frame_alloc();
1334     if (!f) {
1335         av_log(ctx, AV_LOG_ERROR, "Unable to allocate memory for AVVkFrame!\n");
1336         return AVERROR(ENOMEM);
1337     }
1338
1339     /* Create the images */
1340     for (int i = 0; i < planes; i++) {
1341         const AVPixFmtDescriptor *desc = av_pix_fmt_desc_get(format);
1342         int w = hwfc->width;
1343         int h = hwfc->height;
1344         const int p_w = i > 0 ? AV_CEIL_RSHIFT(w, desc->log2_chroma_w) : w;
1345         const int p_h = i > 0 ? AV_CEIL_RSHIFT(h, desc->log2_chroma_h) : h;
1346
1347         VkImageCreateInfo image_create_info = {
1348             .sType         = VK_STRUCTURE_TYPE_IMAGE_CREATE_INFO,
1349             .pNext         = create_pnext,
1350             .imageType     = VK_IMAGE_TYPE_2D,
1351             .format        = img_fmts[i],
1352             .extent.width  = p_w,
1353             .extent.height = p_h,
1354             .extent.depth  = 1,
1355             .mipLevels     = 1,
1356             .arrayLayers   = 1,
1357             .flags         = VK_IMAGE_CREATE_ALIAS_BIT,
1358             .tiling        = tiling,
1359             .initialLayout = VK_IMAGE_LAYOUT_UNDEFINED,
1360             .usage         = usage,
1361             .sharingMode   = VK_SHARING_MODE_EXCLUSIVE,
1362             .samples       = VK_SAMPLE_COUNT_1_BIT,
1363         };
1364
1365         ret = vkCreateImage(hwctx->act_dev, &image_create_info,
1366                             hwctx->alloc, &f->img[i]);
1367         if (ret != VK_SUCCESS) {
1368             av_log(ctx, AV_LOG_ERROR, "Image creation failure: %s\n",
1369                    vk_ret2str(ret));
1370             err = AVERROR(EINVAL);
1371             goto fail;
1372         }
1373
1374         f->layout[i] = image_create_info.initialLayout;
1375         f->access[i] = 0x0;
1376     }
1377
1378     /* Create semaphore */
1379     ret = vkCreateSemaphore(hwctx->act_dev, &sem_spawn,
1380                             hwctx->alloc, &f->sem);
1381     if (ret != VK_SUCCESS) {
1382         av_log(hwctx, AV_LOG_ERROR, "Failed to create semaphore: %s\n",
1383                vk_ret2str(ret));
1384         return AVERROR_EXTERNAL;
1385     }
1386
1387     f->flags     = 0x0;
1388     f->tiling    = tiling;
1389
1390     *frame = f;
1391     return 0;
1392
1393 fail:
1394     vulkan_frame_free(hwfc, (uint8_t *)f);
1395     return err;
1396 }
1397
1398 /* Checks if an export flag is enabled, and if it is ORs it with *iexp */
1399 static void try_export_flags(AVHWFramesContext *hwfc,
1400                              VkExternalMemoryHandleTypeFlags *comp_handle_types,
1401                              VkExternalMemoryHandleTypeFlagBits *iexp,
1402                              VkExternalMemoryHandleTypeFlagBits exp)
1403 {
1404     VkResult ret;
1405     AVVulkanFramesContext *hwctx = hwfc->hwctx;
1406     AVVulkanDeviceContext *dev_hwctx = hwfc->device_ctx->hwctx;
1407     VkExternalImageFormatProperties eprops = {
1408         .sType = VK_STRUCTURE_TYPE_EXTERNAL_IMAGE_FORMAT_PROPERTIES_KHR,
1409     };
1410     VkImageFormatProperties2 props = {
1411         .sType = VK_STRUCTURE_TYPE_IMAGE_FORMAT_PROPERTIES_2,
1412         .pNext = &eprops,
1413     };
1414     VkPhysicalDeviceExternalImageFormatInfo enext = {
1415         .sType = VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_EXTERNAL_IMAGE_FORMAT_INFO,
1416         .handleType = exp,
1417     };
1418     VkPhysicalDeviceImageFormatInfo2 pinfo = {
1419         .sType  = VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_IMAGE_FORMAT_INFO_2,
1420         .pNext  = !exp ? NULL : &enext,
1421         .format = av_vkfmt_from_pixfmt(hwfc->sw_format)[0],
1422         .type   = VK_IMAGE_TYPE_2D,
1423         .tiling = hwctx->tiling,
1424         .usage  = hwctx->usage,
1425         .flags  = VK_IMAGE_CREATE_ALIAS_BIT,
1426     };
1427
1428     ret = vkGetPhysicalDeviceImageFormatProperties2(dev_hwctx->phys_dev,
1429                                                     &pinfo, &props);
1430     if (ret == VK_SUCCESS) {
1431         *iexp |= exp;
1432         *comp_handle_types |= eprops.externalMemoryProperties.compatibleHandleTypes;
1433     }
1434 }
1435
1436 static AVBufferRef *vulkan_pool_alloc(void *opaque, int size)
1437 {
1438     int err;
1439     AVVkFrame *f;
1440     AVBufferRef *avbuf = NULL;
1441     AVHWFramesContext *hwfc = opaque;
1442     AVVulkanFramesContext *hwctx = hwfc->hwctx;
1443     VulkanDevicePriv *p = hwfc->device_ctx->internal->priv;
1444     VkExportMemoryAllocateInfo eminfo[AV_NUM_DATA_POINTERS];
1445     VkExternalMemoryHandleTypeFlags e = 0x0;
1446
1447     VkExternalMemoryImageCreateInfo eiinfo = {
1448         .sType       = VK_STRUCTURE_TYPE_EXTERNAL_MEMORY_IMAGE_CREATE_INFO,
1449         .pNext       = hwctx->create_pnext,
1450     };
1451
1452     if (p->extensions & EXT_EXTERNAL_FD_MEMORY)
1453         try_export_flags(hwfc, &eiinfo.handleTypes, &e,
1454                          VK_EXTERNAL_MEMORY_HANDLE_TYPE_OPAQUE_FD_BIT);
1455
1456     if (p->extensions & EXT_EXTERNAL_DMABUF_MEMORY)
1457         try_export_flags(hwfc, &eiinfo.handleTypes, &e,
1458                          VK_EXTERNAL_MEMORY_HANDLE_TYPE_DMA_BUF_BIT_EXT);
1459
1460     for (int i = 0; i < av_pix_fmt_count_planes(hwfc->sw_format); i++) {
1461         eminfo[i].sType       = VK_STRUCTURE_TYPE_EXPORT_MEMORY_ALLOCATE_INFO;
1462         eminfo[i].pNext       = hwctx->alloc_pnext[i];
1463         eminfo[i].handleTypes = e;
1464     }
1465
1466     err = create_frame(hwfc, &f, hwctx->tiling, hwctx->usage,
1467                        eiinfo.handleTypes ? &eiinfo : NULL);
1468     if (err)
1469         return NULL;
1470
1471     err = alloc_bind_mem(hwfc, f, eminfo, sizeof(*eminfo));
1472     if (err)
1473         goto fail;
1474
1475     err = prepare_frame(hwfc, &p->cmd, f, PREP_MODE_WRITE);
1476     if (err)
1477         goto fail;
1478
1479     avbuf = av_buffer_create((uint8_t *)f, sizeof(AVVkFrame),
1480                              vulkan_frame_free, hwfc, 0);
1481     if (!avbuf)
1482         goto fail;
1483
1484     return avbuf;
1485
1486 fail:
1487     vulkan_frame_free(hwfc, (uint8_t *)f);
1488     return NULL;
1489 }
1490
1491 static void vulkan_frames_uninit(AVHWFramesContext *hwfc)
1492 {
1493     VulkanFramesPriv *fp = hwfc->internal->priv;
1494
1495     free_exec_ctx(hwfc->device_ctx, &fp->cmd);
1496 }
1497
1498 static int vulkan_frames_init(AVHWFramesContext *hwfc)
1499 {
1500     int err;
1501     AVVkFrame *f;
1502     AVVulkanFramesContext *hwctx = hwfc->hwctx;
1503     VulkanFramesPriv *fp = hwfc->internal->priv;
1504     AVVulkanDeviceContext *dev_hwctx = hwfc->device_ctx->hwctx;
1505     VulkanDevicePriv *p = hwfc->device_ctx->internal->priv;
1506
1507     if (hwfc->pool)
1508         return 0;
1509
1510     /* Default pool flags */
1511     hwctx->tiling = hwctx->tiling ? hwctx->tiling : p->use_linear_images ?
1512                     VK_IMAGE_TILING_LINEAR : VK_IMAGE_TILING_OPTIMAL;
1513
1514     hwctx->usage |= DEFAULT_USAGE_FLAGS;
1515
1516     err = create_exec_ctx(hwfc->device_ctx, &fp->cmd,
1517                           dev_hwctx->queue_family_tx_index);
1518     if (err)
1519         return err;
1520
1521     /* Test to see if allocation will fail */
1522     err = create_frame(hwfc, &f, hwctx->tiling, hwctx->usage,
1523                        hwctx->create_pnext);
1524     if (err) {
1525         free_exec_ctx(hwfc->device_ctx, &p->cmd);
1526         return err;
1527     }
1528
1529     vulkan_frame_free(hwfc, (uint8_t *)f);
1530
1531     hwfc->internal->pool_internal = av_buffer_pool_init2(sizeof(AVVkFrame),
1532                                                          hwfc, vulkan_pool_alloc,
1533                                                          NULL);
1534     if (!hwfc->internal->pool_internal) {
1535         free_exec_ctx(hwfc->device_ctx, &p->cmd);
1536         return AVERROR(ENOMEM);
1537     }
1538
1539     return 0;
1540 }
1541
1542 static int vulkan_get_buffer(AVHWFramesContext *hwfc, AVFrame *frame)
1543 {
1544     frame->buf[0] = av_buffer_pool_get(hwfc->pool);
1545     if (!frame->buf[0])
1546         return AVERROR(ENOMEM);
1547
1548     frame->data[0] = frame->buf[0]->data;
1549     frame->format  = AV_PIX_FMT_VULKAN;
1550     frame->width   = hwfc->width;
1551     frame->height  = hwfc->height;
1552
1553     return 0;
1554 }
1555
1556 static int vulkan_transfer_get_formats(AVHWFramesContext *hwfc,
1557                                        enum AVHWFrameTransferDirection dir,
1558                                        enum AVPixelFormat **formats)
1559 {
1560     enum AVPixelFormat *fmts = av_malloc_array(2, sizeof(*fmts));
1561     if (!fmts)
1562         return AVERROR(ENOMEM);
1563
1564     fmts[0] = hwfc->sw_format;
1565     fmts[1] = AV_PIX_FMT_NONE;
1566
1567     *formats = fmts;
1568     return 0;
1569 }
1570
1571 typedef struct VulkanMapping {
1572     AVVkFrame *frame;
1573     int flags;
1574 } VulkanMapping;
1575
1576 static void vulkan_unmap_frame(AVHWFramesContext *hwfc, HWMapDescriptor *hwmap)
1577 {
1578     VulkanMapping *map = hwmap->priv;
1579     AVVulkanDeviceContext *hwctx = hwfc->device_ctx->hwctx;
1580     const int planes = av_pix_fmt_count_planes(hwfc->sw_format);
1581
1582     /* Check if buffer needs flushing */
1583     if ((map->flags & AV_HWFRAME_MAP_WRITE) &&
1584         !(map->frame->flags & VK_MEMORY_PROPERTY_HOST_COHERENT_BIT)) {
1585         VkResult ret;
1586         VkMappedMemoryRange flush_ranges[AV_NUM_DATA_POINTERS] = { { 0 } };
1587
1588         for (int i = 0; i < planes; i++) {
1589             flush_ranges[i].sType  = VK_STRUCTURE_TYPE_MAPPED_MEMORY_RANGE;
1590             flush_ranges[i].memory = map->frame->mem[i];
1591             flush_ranges[i].size   = VK_WHOLE_SIZE;
1592         }
1593
1594         ret = vkFlushMappedMemoryRanges(hwctx->act_dev, planes,
1595                                         flush_ranges);
1596         if (ret != VK_SUCCESS) {
1597             av_log(hwfc, AV_LOG_ERROR, "Failed to flush memory: %s\n",
1598                    vk_ret2str(ret));
1599         }
1600     }
1601
1602     for (int i = 0; i < planes; i++)
1603         vkUnmapMemory(hwctx->act_dev, map->frame->mem[i]);
1604
1605     av_free(map);
1606 }
1607
1608 static int vulkan_map_frame_to_mem(AVHWFramesContext *hwfc, AVFrame *dst,
1609                                    const AVFrame *src, int flags)
1610 {
1611     VkResult ret;
1612     int err, mapped_mem_count = 0;
1613     AVVkFrame *f = (AVVkFrame *)src->data[0];
1614     AVVulkanDeviceContext *hwctx = hwfc->device_ctx->hwctx;
1615     const int planes = av_pix_fmt_count_planes(hwfc->sw_format);
1616
1617     VulkanMapping *map = av_mallocz(sizeof(VulkanMapping));
1618     if (!map)
1619         return AVERROR(EINVAL);
1620
1621     if (src->format != AV_PIX_FMT_VULKAN) {
1622         av_log(hwfc, AV_LOG_ERROR, "Cannot map from pixel format %s!\n",
1623                av_get_pix_fmt_name(src->format));
1624         err = AVERROR(EINVAL);
1625         goto fail;
1626     }
1627
1628     if (!(f->flags & VK_MEMORY_PROPERTY_HOST_VISIBLE_BIT) ||
1629         !(f->tiling == VK_IMAGE_TILING_LINEAR)) {
1630         av_log(hwfc, AV_LOG_ERROR, "Unable to map frame, not host visible "
1631                "and linear!\n");
1632         err = AVERROR(EINVAL);
1633         goto fail;
1634     }
1635
1636     dst->width  = src->width;
1637     dst->height = src->height;
1638
1639     for (int i = 0; i < planes; i++) {
1640         ret = vkMapMemory(hwctx->act_dev, f->mem[i], 0,
1641                           VK_WHOLE_SIZE, 0, (void **)&dst->data[i]);
1642         if (ret != VK_SUCCESS) {
1643             av_log(hwfc, AV_LOG_ERROR, "Failed to map image memory: %s\n",
1644                 vk_ret2str(ret));
1645             err = AVERROR_EXTERNAL;
1646             goto fail;
1647         }
1648         mapped_mem_count++;
1649     }
1650
1651     /* Check if the memory contents matter */
1652     if (((flags & AV_HWFRAME_MAP_READ) || !(flags & AV_HWFRAME_MAP_OVERWRITE)) &&
1653         !(f->flags & VK_MEMORY_PROPERTY_HOST_COHERENT_BIT)) {
1654         VkMappedMemoryRange map_mem_ranges[AV_NUM_DATA_POINTERS] = { { 0 } };
1655         for (int i = 0; i < planes; i++) {
1656             map_mem_ranges[i].sType = VK_STRUCTURE_TYPE_MAPPED_MEMORY_RANGE;
1657             map_mem_ranges[i].size = VK_WHOLE_SIZE;
1658             map_mem_ranges[i].memory = f->mem[i];
1659         }
1660
1661         ret = vkInvalidateMappedMemoryRanges(hwctx->act_dev, planes,
1662                                              map_mem_ranges);
1663         if (ret != VK_SUCCESS) {
1664             av_log(hwfc, AV_LOG_ERROR, "Failed to invalidate memory: %s\n",
1665                    vk_ret2str(ret));
1666             err = AVERROR_EXTERNAL;
1667             goto fail;
1668         }
1669     }
1670
1671     for (int i = 0; i < planes; i++) {
1672         VkImageSubresource sub = {
1673             .aspectMask = VK_IMAGE_ASPECT_COLOR_BIT,
1674         };
1675         VkSubresourceLayout layout;
1676         vkGetImageSubresourceLayout(hwctx->act_dev, f->img[i], &sub, &layout);
1677         dst->linesize[i] = layout.rowPitch;
1678     }
1679
1680     map->frame = f;
1681     map->flags = flags;
1682
1683     err = ff_hwframe_map_create(src->hw_frames_ctx, dst, src,
1684                                 &vulkan_unmap_frame, map);
1685     if (err < 0)
1686         goto fail;
1687
1688     return 0;
1689
1690 fail:
1691     for (int i = 0; i < mapped_mem_count; i++)
1692         vkUnmapMemory(hwctx->act_dev, f->mem[i]);
1693
1694     av_free(map);
1695     return err;
1696 }
1697
1698 #if CONFIG_LIBDRM
1699 static void vulkan_unmap_from(AVHWFramesContext *hwfc, HWMapDescriptor *hwmap)
1700 {
1701     VulkanMapping *map = hwmap->priv;
1702     AVVulkanDeviceContext *hwctx = hwfc->device_ctx->hwctx;
1703     const int planes = av_pix_fmt_count_planes(hwfc->sw_format);
1704
1705     for (int i = 0; i < planes; i++) {
1706         vkDestroyImage(hwctx->act_dev, map->frame->img[i], hwctx->alloc);
1707         vkFreeMemory(hwctx->act_dev, map->frame->mem[i], hwctx->alloc);
1708     }
1709
1710     vkDestroySemaphore(hwctx->act_dev, map->frame->sem, hwctx->alloc);
1711
1712     av_freep(&map->frame);
1713 }
1714
1715 static const struct {
1716     uint32_t drm_fourcc;
1717     VkFormat vk_format;
1718 } vulkan_drm_format_map[] = {
1719     { DRM_FORMAT_R8,       VK_FORMAT_R8_UNORM       },
1720     { DRM_FORMAT_R16,      VK_FORMAT_R16_UNORM      },
1721     { DRM_FORMAT_GR88,     VK_FORMAT_R8G8_UNORM     },
1722     { DRM_FORMAT_RG88,     VK_FORMAT_R8G8_UNORM     },
1723     { DRM_FORMAT_GR1616,   VK_FORMAT_R16G16_UNORM   },
1724     { DRM_FORMAT_RG1616,   VK_FORMAT_R16G16_UNORM   },
1725     { DRM_FORMAT_ARGB8888, VK_FORMAT_B8G8R8A8_UNORM },
1726     { DRM_FORMAT_XRGB8888, VK_FORMAT_B8G8R8A8_UNORM },
1727     { DRM_FORMAT_ABGR8888, VK_FORMAT_R8G8B8A8_UNORM },
1728     { DRM_FORMAT_XBGR8888, VK_FORMAT_R8G8B8A8_UNORM },
1729 };
1730
1731 static inline VkFormat drm_to_vulkan_fmt(uint32_t drm_fourcc)
1732 {
1733     for (int i = 0; i < FF_ARRAY_ELEMS(vulkan_drm_format_map); i++)
1734         if (vulkan_drm_format_map[i].drm_fourcc == drm_fourcc)
1735             return vulkan_drm_format_map[i].vk_format;
1736     return VK_FORMAT_UNDEFINED;
1737 }
1738
1739 static int vulkan_map_from_drm_frame_desc(AVHWFramesContext *hwfc, AVVkFrame **frame,
1740                                           AVDRMFrameDescriptor *desc)
1741 {
1742     int err = 0;
1743     VkResult ret;
1744     AVVkFrame *f;
1745     int bind_counts = 0;
1746     AVHWDeviceContext *ctx = hwfc->device_ctx;
1747     AVVulkanDeviceContext *hwctx = ctx->hwctx;
1748     VulkanDevicePriv *p = ctx->internal->priv;
1749     const AVPixFmtDescriptor *fmt_desc = av_pix_fmt_desc_get(hwfc->sw_format);
1750     const int has_modifiers = p->extensions & EXT_DRM_MODIFIER_FLAGS;
1751     VkSubresourceLayout plane_data[AV_NUM_DATA_POINTERS] = { 0 };
1752     VkBindImageMemoryInfo bind_info[AV_NUM_DATA_POINTERS] = { 0 };
1753     VkBindImagePlaneMemoryInfo plane_info[AV_NUM_DATA_POINTERS] = { 0 };
1754     VkExternalMemoryHandleTypeFlagBits htype = VK_EXTERNAL_MEMORY_HANDLE_TYPE_DMA_BUF_BIT_EXT;
1755     VkSemaphoreCreateInfo sem_spawn = {
1756         .sType = VK_STRUCTURE_TYPE_SEMAPHORE_CREATE_INFO,
1757     };
1758
1759     VK_LOAD_PFN(hwctx->inst, vkGetMemoryFdPropertiesKHR);
1760
1761     for (int i = 0; i < desc->nb_layers; i++) {
1762         if (drm_to_vulkan_fmt(desc->layers[i].format) == VK_FORMAT_UNDEFINED) {
1763             av_log(ctx, AV_LOG_ERROR, "Unsupported DMABUF layer format %#08x!\n",
1764                    desc->layers[i].format);
1765             return AVERROR(EINVAL);
1766         }
1767     }
1768
1769     if (!(f = av_vk_frame_alloc())) {
1770         av_log(ctx, AV_LOG_ERROR, "Unable to allocate memory for AVVkFrame!\n");
1771         err = AVERROR(ENOMEM);
1772         goto fail;
1773     }
1774
1775     for (int i = 0; i < desc->nb_objects; i++) {
1776         VkMemoryFdPropertiesKHR fdmp = {
1777             .sType = VK_STRUCTURE_TYPE_MEMORY_FD_PROPERTIES_KHR,
1778         };
1779         VkMemoryRequirements req = {
1780             .size = desc->objects[i].size,
1781         };
1782         VkImportMemoryFdInfoKHR idesc = {
1783             .sType      = VK_STRUCTURE_TYPE_IMPORT_MEMORY_FD_INFO_KHR,
1784             .handleType = htype,
1785             .fd         = dup(desc->objects[i].fd),
1786         };
1787
1788         ret = pfn_vkGetMemoryFdPropertiesKHR(hwctx->act_dev, htype,
1789                                              idesc.fd, &fdmp);
1790         if (ret != VK_SUCCESS) {
1791             av_log(hwfc, AV_LOG_ERROR, "Failed to get FD properties: %s\n",
1792                    vk_ret2str(ret));
1793             err = AVERROR_EXTERNAL;
1794             close(idesc.fd);
1795             goto fail;
1796         }
1797
1798         req.memoryTypeBits = fdmp.memoryTypeBits;
1799
1800         err = alloc_mem(ctx, &req, VK_MEMORY_PROPERTY_DEVICE_LOCAL_BIT,
1801                         &idesc, &f->flags, &f->mem[i]);
1802         if (err) {
1803             close(idesc.fd);
1804             return err;
1805         }
1806
1807         f->size[i] = desc->objects[i].size;
1808     }
1809
1810     f->tiling = has_modifiers ? VK_IMAGE_TILING_DRM_FORMAT_MODIFIER_EXT :
1811                 desc->objects[0].format_modifier == DRM_FORMAT_MOD_LINEAR ?
1812                 VK_IMAGE_TILING_LINEAR : VK_IMAGE_TILING_OPTIMAL;
1813
1814     for (int i = 0; i < desc->nb_layers; i++) {
1815         const int planes = desc->layers[i].nb_planes;
1816         const int signal_p = has_modifiers && (planes > 1);
1817
1818         VkImageDrmFormatModifierExplicitCreateInfoEXT drm_info = {
1819             .sType = VK_STRUCTURE_TYPE_IMAGE_DRM_FORMAT_MODIFIER_EXPLICIT_CREATE_INFO_EXT,
1820             .drmFormatModifier = desc->objects[0].format_modifier,
1821             .drmFormatModifierPlaneCount = planes,
1822             .pPlaneLayouts = (const VkSubresourceLayout *)&plane_data,
1823         };
1824
1825         VkExternalMemoryImageCreateInfo einfo = {
1826             .sType       = VK_STRUCTURE_TYPE_EXTERNAL_MEMORY_IMAGE_CREATE_INFO,
1827             .pNext       = has_modifiers ? &drm_info : NULL,
1828             .handleTypes = htype,
1829         };
1830
1831         const int p_w = i > 0 ? AV_CEIL_RSHIFT(hwfc->width, fmt_desc->log2_chroma_w) : hwfc->width;
1832         const int p_h = i > 0 ? AV_CEIL_RSHIFT(hwfc->height, fmt_desc->log2_chroma_h) : hwfc->height;
1833
1834         VkImageCreateInfo image_create_info = {
1835             .sType         = VK_STRUCTURE_TYPE_IMAGE_CREATE_INFO,
1836             .pNext         = &einfo,
1837             .imageType     = VK_IMAGE_TYPE_2D,
1838             .format        = drm_to_vulkan_fmt(desc->layers[i].format),
1839             .extent.width  = p_w,
1840             .extent.height = p_h,
1841             .extent.depth  = 1,
1842             .mipLevels     = 1,
1843             .arrayLayers   = 1,
1844             .flags         = VK_IMAGE_CREATE_ALIAS_BIT |
1845                              (signal_p ? VK_IMAGE_CREATE_DISJOINT_BIT : 0x0),
1846             .tiling        = f->tiling,
1847             .initialLayout = VK_IMAGE_LAYOUT_UNDEFINED, /* specs say so */
1848             .usage         = DEFAULT_USAGE_FLAGS,
1849             .sharingMode   = VK_SHARING_MODE_EXCLUSIVE,
1850             .samples       = VK_SAMPLE_COUNT_1_BIT,
1851         };
1852
1853         for (int j = 0; j < planes; j++) {
1854             plane_data[j].offset     = desc->layers[i].planes[j].offset;
1855             plane_data[j].rowPitch   = desc->layers[i].planes[j].pitch;
1856             plane_data[j].size       = 0; /* The specs say so for all 3 */
1857             plane_data[j].arrayPitch = 0;
1858             plane_data[j].depthPitch = 0;
1859         }
1860
1861         /* Create image */
1862         ret = vkCreateImage(hwctx->act_dev, &image_create_info,
1863                             hwctx->alloc, &f->img[i]);
1864         if (ret != VK_SUCCESS) {
1865             av_log(ctx, AV_LOG_ERROR, "Image creation failure: %s\n",
1866                    vk_ret2str(ret));
1867             err = AVERROR(EINVAL);
1868             goto fail;
1869         }
1870
1871         f->layout[i] = image_create_info.initialLayout;
1872         f->access[i] = 0x0;
1873
1874         for (int j = 0; j < planes; j++) {
1875             VkImageAspectFlagBits aspect = j == 0 ? VK_IMAGE_ASPECT_MEMORY_PLANE_0_BIT_EXT :
1876                                            j == 1 ? VK_IMAGE_ASPECT_MEMORY_PLANE_1_BIT_EXT :
1877                                                     VK_IMAGE_ASPECT_MEMORY_PLANE_2_BIT_EXT;
1878
1879             plane_info[bind_counts].sType = VK_STRUCTURE_TYPE_BIND_IMAGE_PLANE_MEMORY_INFO;
1880             plane_info[bind_counts].planeAspect = aspect;
1881
1882             bind_info[bind_counts].sType  = VK_STRUCTURE_TYPE_BIND_IMAGE_MEMORY_INFO;
1883             bind_info[bind_counts].pNext  = signal_p ? &plane_info[bind_counts] : NULL;
1884             bind_info[bind_counts].image  = f->img[i];
1885             bind_info[bind_counts].memory = f->mem[desc->layers[i].planes[j].object_index];
1886             bind_info[bind_counts].memoryOffset = desc->layers[i].planes[j].offset;
1887             bind_counts++;
1888         }
1889     }
1890
1891     ret = vkCreateSemaphore(hwctx->act_dev, &sem_spawn,
1892                             hwctx->alloc, &f->sem);
1893     if (ret != VK_SUCCESS) {
1894         av_log(hwctx, AV_LOG_ERROR, "Failed to create semaphore: %s\n",
1895                vk_ret2str(ret));
1896         return AVERROR_EXTERNAL;
1897     }
1898
1899     /* We'd import a semaphore onto the one we created using
1900      * vkImportSemaphoreFdKHR but unfortunately neither DRM nor VAAPI
1901      * offer us anything we could import and sync with, so instead
1902      * just signal the semaphore we created. */
1903
1904     /* Bind the allocated memory to the images */
1905     ret = vkBindImageMemory2(hwctx->act_dev, bind_counts, bind_info);
1906     if (ret != VK_SUCCESS) {
1907         av_log(ctx, AV_LOG_ERROR, "Failed to bind memory: %s\n",
1908                vk_ret2str(ret));
1909         return AVERROR_EXTERNAL;
1910     }
1911
1912     /* NOTE: This is completely uneccesary and unneeded once we can import
1913      * semaphores from DRM. Otherwise we have to activate the semaphores.
1914      * We're reusing the exec context that's also used for uploads/downloads. */
1915     err = prepare_frame(hwfc, &p->cmd, f, PREP_MODE_RO_SHADER);
1916     if (err)
1917         goto fail;
1918
1919     *frame = f;
1920
1921     return 0;
1922
1923 fail:
1924     for (int i = 0; i < desc->nb_layers; i++)
1925         vkDestroyImage(hwctx->act_dev, f->img[i], hwctx->alloc);
1926     for (int i = 0; i < desc->nb_objects; i++)
1927         vkFreeMemory(hwctx->act_dev, f->mem[i], hwctx->alloc);
1928     vkDestroySemaphore(hwctx->act_dev, f->sem, hwctx->alloc);
1929
1930     av_free(f);
1931
1932     return err;
1933 }
1934
1935 static int vulkan_map_from_drm(AVHWFramesContext *hwfc, AVFrame *dst,
1936                                const AVFrame *src, int flags)
1937 {
1938     int err = 0;
1939     AVVkFrame *f;
1940     VulkanMapping *map = NULL;
1941
1942     err = vulkan_map_from_drm_frame_desc(hwfc, &f,
1943                                          (AVDRMFrameDescriptor *)src->data[0]);
1944     if (err)
1945         return err;
1946
1947     /* The unmapping function will free this */
1948     dst->data[0] = (uint8_t *)f;
1949     dst->width   = src->width;
1950     dst->height  = src->height;
1951
1952     map = av_mallocz(sizeof(VulkanMapping));
1953     if (!map)
1954         goto fail;
1955
1956     map->frame = f;
1957     map->flags = flags;
1958
1959     err = ff_hwframe_map_create(dst->hw_frames_ctx, dst, src,
1960                                 &vulkan_unmap_from, map);
1961     if (err < 0)
1962         goto fail;
1963
1964     av_log(hwfc, AV_LOG_DEBUG, "Mapped DRM object to Vulkan!\n");
1965
1966     return 0;
1967
1968 fail:
1969     vulkan_frame_free(hwfc->device_ctx->hwctx, (uint8_t *)f);
1970     av_free(map);
1971     return err;
1972 }
1973
1974 #if CONFIG_VAAPI
1975 static int vulkan_map_from_vaapi(AVHWFramesContext *dst_fc,
1976                                  AVFrame *dst, const AVFrame *src,
1977                                  int flags)
1978 {
1979     int err;
1980     AVFrame *tmp = av_frame_alloc();
1981     AVHWFramesContext *vaapi_fc = (AVHWFramesContext*)src->hw_frames_ctx->data;
1982     AVVAAPIDeviceContext *vaapi_ctx = vaapi_fc->device_ctx->hwctx;
1983     VASurfaceID surface_id = (VASurfaceID)(uintptr_t)src->data[3];
1984
1985     if (!tmp)
1986         return AVERROR(ENOMEM);
1987
1988     /* We have to sync since like the previous comment said, no semaphores */
1989     vaSyncSurface(vaapi_ctx->display, surface_id);
1990
1991     tmp->format = AV_PIX_FMT_DRM_PRIME;
1992
1993     err = av_hwframe_map(tmp, src, flags);
1994     if (err < 0)
1995         goto fail;
1996
1997     err = vulkan_map_from_drm(dst_fc, dst, tmp, flags);
1998     if (err < 0)
1999         goto fail;
2000
2001     err = ff_hwframe_map_replace(dst, src);
2002
2003 fail:
2004     av_frame_free(&tmp);
2005     return err;
2006 }
2007 #endif
2008 #endif
2009
2010 #if CONFIG_CUDA
2011 static int vulkan_export_to_cuda(AVHWFramesContext *hwfc,
2012                                  AVBufferRef *cuda_hwfc,
2013                                  const AVFrame *frame)
2014 {
2015     int err;
2016     VkResult ret;
2017     AVVkFrame *dst_f;
2018     AVVkFrameInternal *dst_int;
2019     AVHWDeviceContext *ctx = hwfc->device_ctx;
2020     AVVulkanDeviceContext *hwctx = ctx->hwctx;
2021     const int planes = av_pix_fmt_count_planes(hwfc->sw_format);
2022     const AVPixFmtDescriptor *desc = av_pix_fmt_desc_get(hwfc->sw_format);
2023     VK_LOAD_PFN(hwctx->inst, vkGetMemoryFdKHR);
2024     VK_LOAD_PFN(hwctx->inst, vkGetSemaphoreFdKHR);
2025
2026     AVHWFramesContext *cuda_fc = (AVHWFramesContext*)cuda_hwfc->data;
2027     AVHWDeviceContext *cuda_cu = cuda_fc->device_ctx;
2028     AVCUDADeviceContext *cuda_dev = cuda_cu->hwctx;
2029     AVCUDADeviceContextInternal *cu_internal = cuda_dev->internal;
2030     CudaFunctions *cu = cu_internal->cuda_dl;
2031     CUarray_format cufmt = desc->comp[0].depth > 8 ? CU_AD_FORMAT_UNSIGNED_INT16 :
2032                                                      CU_AD_FORMAT_UNSIGNED_INT8;
2033
2034     dst_f = (AVVkFrame *)frame->data[0];
2035
2036     dst_int = dst_f->internal;
2037     if (!dst_int || !dst_int->cuda_fc_ref) {
2038         VkSemaphoreGetFdInfoKHR sem_export = {
2039             .sType = VK_STRUCTURE_TYPE_SEMAPHORE_GET_FD_INFO_KHR,
2040             .semaphore = dst_f->sem,
2041             .handleType = VK_EXTERNAL_SEMAPHORE_HANDLE_TYPE_OPAQUE_FD_BIT,
2042         };
2043         CUDA_EXTERNAL_SEMAPHORE_HANDLE_DESC ext_sem_desc = {
2044             .type = CU_EXTERNAL_SEMAPHORE_HANDLE_TYPE_OPAQUE_FD,
2045         };
2046
2047         if (!dst_f->internal)
2048             dst_f->internal = dst_int = av_mallocz(sizeof(*dst_f->internal));
2049
2050         if (!dst_int) {
2051             err = AVERROR(ENOMEM);
2052             goto fail;
2053         }
2054
2055         dst_int->cuda_fc_ref = av_buffer_ref(cuda_hwfc);
2056         if (!dst_int->cuda_fc_ref) {
2057             err = AVERROR(ENOMEM);
2058             goto fail;
2059         }
2060
2061         for (int i = 0; i < planes; i++) {
2062             CUDA_EXTERNAL_MEMORY_MIPMAPPED_ARRAY_DESC tex_desc = {
2063                 .offset = 0,
2064                 .arrayDesc = {
2065                     .Width  = i > 0 ? AV_CEIL_RSHIFT(hwfc->width, desc->log2_chroma_w)
2066                                     : hwfc->width,
2067                     .Height = i > 0 ? AV_CEIL_RSHIFT(hwfc->height, desc->log2_chroma_h)
2068                                     : hwfc->height,
2069                     .Depth = 0,
2070                     .Format = cufmt,
2071                     .NumChannels = 1 + ((planes == 2) && i),
2072                     .Flags = 0,
2073                 },
2074                 .numLevels = 1,
2075             };
2076             CUDA_EXTERNAL_MEMORY_HANDLE_DESC ext_desc = {
2077                 .type = CU_EXTERNAL_MEMORY_HANDLE_TYPE_OPAQUE_FD,
2078                 .size = dst_f->size[i],
2079             };
2080             VkMemoryGetFdInfoKHR export_info = {
2081                 .sType      = VK_STRUCTURE_TYPE_MEMORY_GET_FD_INFO_KHR,
2082                 .memory     = dst_f->mem[i],
2083                 .handleType = VK_EXTERNAL_MEMORY_HANDLE_TYPE_OPAQUE_FD_BIT_KHR,
2084             };
2085
2086             ret = pfn_vkGetMemoryFdKHR(hwctx->act_dev, &export_info,
2087                                        &ext_desc.handle.fd);
2088             if (ret != VK_SUCCESS) {
2089                 av_log(hwfc, AV_LOG_ERROR, "Unable to export the image as a FD!\n");
2090                 err = AVERROR_EXTERNAL;
2091                 goto fail;
2092             }
2093
2094             ret = CHECK_CU(cu->cuImportExternalMemory(&dst_int->ext_mem[i], &ext_desc));
2095             if (ret < 0) {
2096                 err = AVERROR_EXTERNAL;
2097                 goto fail;
2098             }
2099
2100             ret = CHECK_CU(cu->cuExternalMemoryGetMappedMipmappedArray(&dst_int->cu_mma[i],
2101                                                                        dst_int->ext_mem[i],
2102                                                                        &tex_desc));
2103             if (ret < 0) {
2104                 err = AVERROR_EXTERNAL;
2105                 goto fail;
2106             }
2107
2108             ret = CHECK_CU(cu->cuMipmappedArrayGetLevel(&dst_int->cu_array[i],
2109                                                         dst_int->cu_mma[i], 0));
2110             if (ret < 0) {
2111                 err = AVERROR_EXTERNAL;
2112                 goto fail;
2113             }
2114         }
2115
2116         ret = pfn_vkGetSemaphoreFdKHR(hwctx->act_dev, &sem_export,
2117                                       &ext_sem_desc.handle.fd);
2118         if (ret != VK_SUCCESS) {
2119             av_log(ctx, AV_LOG_ERROR, "Failed to export semaphore: %s\n",
2120                    vk_ret2str(ret));
2121             err = AVERROR_EXTERNAL;
2122             goto fail;
2123         }
2124
2125         ret = CHECK_CU(cu->cuImportExternalSemaphore(&dst_int->cu_sem,
2126                                                      &ext_sem_desc));
2127         if (ret < 0) {
2128             err = AVERROR_EXTERNAL;
2129             goto fail;
2130         }
2131     }
2132
2133     return 0;
2134
2135 fail:
2136     return err;
2137 }
2138
2139 static int vulkan_transfer_data_from_cuda(AVHWFramesContext *hwfc,
2140                                           AVFrame *dst, const AVFrame *src)
2141 {
2142     int err;
2143     VkResult ret;
2144     CUcontext dummy;
2145     AVVkFrame *dst_f;
2146     AVVkFrameInternal *dst_int;
2147     const int planes = av_pix_fmt_count_planes(hwfc->sw_format);
2148     const AVPixFmtDescriptor *desc = av_pix_fmt_desc_get(hwfc->sw_format);
2149
2150     AVHWFramesContext *cuda_fc = (AVHWFramesContext*)src->hw_frames_ctx->data;
2151     AVHWDeviceContext *cuda_cu = cuda_fc->device_ctx;
2152     AVCUDADeviceContext *cuda_dev = cuda_cu->hwctx;
2153     AVCUDADeviceContextInternal *cu_internal = cuda_dev->internal;
2154     CudaFunctions *cu = cu_internal->cuda_dl;
2155     CUDA_EXTERNAL_SEMAPHORE_WAIT_PARAMS s_w_par = { 0 };
2156     CUDA_EXTERNAL_SEMAPHORE_SIGNAL_PARAMS s_s_par = { 0 };
2157
2158     ret = CHECK_CU(cu->cuCtxPushCurrent(cuda_dev->cuda_ctx));
2159     if (ret < 0) {
2160         err = AVERROR_EXTERNAL;
2161         goto fail;
2162     }
2163
2164     dst_f = (AVVkFrame *)dst->data[0];
2165
2166     ret = vulkan_export_to_cuda(hwfc, src->hw_frames_ctx, dst);
2167     if (ret < 0) {
2168         goto fail;
2169     }
2170     dst_int = dst_f->internal;
2171
2172     ret = CHECK_CU(cu->cuWaitExternalSemaphoresAsync(&dst_int->cu_sem, &s_w_par,
2173                                                      1, cuda_dev->stream));
2174     if (ret < 0) {
2175         err = AVERROR_EXTERNAL;
2176         goto fail;
2177     }
2178
2179     for (int i = 0; i < planes; i++) {
2180         CUDA_MEMCPY2D cpy = {
2181             .srcMemoryType = CU_MEMORYTYPE_DEVICE,
2182             .srcDevice     = (CUdeviceptr)src->data[i],
2183             .srcPitch      = src->linesize[i],
2184             .srcY          = 0,
2185
2186             .dstMemoryType = CU_MEMORYTYPE_ARRAY,
2187             .dstArray      = dst_int->cu_array[i],
2188             .WidthInBytes  = (i > 0 ? AV_CEIL_RSHIFT(hwfc->width, desc->log2_chroma_w)
2189                                     : hwfc->width) * desc->comp[i].step,
2190             .Height        = i > 0 ? AV_CEIL_RSHIFT(hwfc->height, desc->log2_chroma_h)
2191                                    : hwfc->height,
2192         };
2193
2194         ret = CHECK_CU(cu->cuMemcpy2DAsync(&cpy, cuda_dev->stream));
2195         if (ret < 0) {
2196             err = AVERROR_EXTERNAL;
2197             goto fail;
2198         }
2199     }
2200
2201     ret = CHECK_CU(cu->cuSignalExternalSemaphoresAsync(&dst_int->cu_sem, &s_s_par,
2202                                                        1, cuda_dev->stream));
2203     if (ret < 0) {
2204         err = AVERROR_EXTERNAL;
2205         goto fail;
2206     }
2207
2208     CHECK_CU(cu->cuCtxPopCurrent(&dummy));
2209
2210     av_log(hwfc, AV_LOG_VERBOSE, "Transfered CUDA image to Vulkan!\n");
2211
2212     return 0;
2213
2214 fail:
2215     CHECK_CU(cu->cuCtxPopCurrent(&dummy));
2216     vulkan_free_internal(dst_int);
2217     dst_f->internal = NULL;
2218     av_buffer_unref(&dst->buf[0]);
2219     return err;
2220 }
2221 #endif
2222
2223 static int vulkan_map_to(AVHWFramesContext *hwfc, AVFrame *dst,
2224                          const AVFrame *src, int flags)
2225 {
2226     av_unused VulkanDevicePriv *p = hwfc->device_ctx->internal->priv;
2227
2228     switch (src->format) {
2229 #if CONFIG_LIBDRM
2230 #if CONFIG_VAAPI
2231     case AV_PIX_FMT_VAAPI:
2232         if (p->extensions & EXT_EXTERNAL_DMABUF_MEMORY)
2233             return vulkan_map_from_vaapi(hwfc, dst, src, flags);
2234 #endif
2235     case AV_PIX_FMT_DRM_PRIME:
2236         if (p->extensions & EXT_EXTERNAL_DMABUF_MEMORY)
2237             return vulkan_map_from_drm(hwfc, dst, src, flags);
2238 #endif
2239     default:
2240         return AVERROR(ENOSYS);
2241     }
2242 }
2243
2244 #if CONFIG_LIBDRM
2245 typedef struct VulkanDRMMapping {
2246     AVDRMFrameDescriptor drm_desc;
2247     AVVkFrame *source;
2248 } VulkanDRMMapping;
2249
2250 static void vulkan_unmap_to_drm(AVHWFramesContext *hwfc, HWMapDescriptor *hwmap)
2251 {
2252     AVDRMFrameDescriptor *drm_desc = hwmap->priv;
2253
2254     for (int i = 0; i < drm_desc->nb_objects; i++)
2255         close(drm_desc->objects[i].fd);
2256
2257     av_free(drm_desc);
2258 }
2259
2260 static inline uint32_t vulkan_fmt_to_drm(VkFormat vkfmt)
2261 {
2262     for (int i = 0; i < FF_ARRAY_ELEMS(vulkan_drm_format_map); i++)
2263         if (vulkan_drm_format_map[i].vk_format == vkfmt)
2264             return vulkan_drm_format_map[i].drm_fourcc;
2265     return DRM_FORMAT_INVALID;
2266 }
2267
2268 static int vulkan_map_to_drm(AVHWFramesContext *hwfc, AVFrame *dst,
2269                              const AVFrame *src, int flags)
2270 {
2271     int err = 0;
2272     VkResult ret;
2273     AVVkFrame *f = (AVVkFrame *)src->data[0];
2274     VulkanDevicePriv *p = hwfc->device_ctx->internal->priv;
2275     AVVulkanDeviceContext *hwctx = hwfc->device_ctx->hwctx;
2276     const int planes = av_pix_fmt_count_planes(hwfc->sw_format);
2277     VK_LOAD_PFN(hwctx->inst, vkGetMemoryFdKHR);
2278     VkImageDrmFormatModifierPropertiesEXT drm_mod = {
2279         .sType = VK_STRUCTURE_TYPE_IMAGE_DRM_FORMAT_MODIFIER_PROPERTIES_EXT,
2280     };
2281
2282     AVDRMFrameDescriptor *drm_desc = av_mallocz(sizeof(*drm_desc));
2283     if (!drm_desc)
2284         return AVERROR(ENOMEM);
2285
2286     err = ff_hwframe_map_create(src->hw_frames_ctx, dst, src, &vulkan_unmap_to_drm, drm_desc);
2287     if (err < 0)
2288         goto end;
2289
2290     if (p->extensions & EXT_DRM_MODIFIER_FLAGS) {
2291         VK_LOAD_PFN(hwctx->inst, vkGetImageDrmFormatModifierPropertiesEXT);
2292         ret = pfn_vkGetImageDrmFormatModifierPropertiesEXT(hwctx->act_dev, f->img[0],
2293                                                            &drm_mod);
2294         if (ret != VK_SUCCESS) {
2295             av_log(hwfc, AV_LOG_ERROR, "Failed to retrieve DRM format modifier!\n");
2296             err = AVERROR_EXTERNAL;
2297             goto end;
2298         }
2299     }
2300
2301     for (int i = 0; (i < planes) && (f->mem[i]); i++) {
2302         VkMemoryGetFdInfoKHR export_info = {
2303             .sType      = VK_STRUCTURE_TYPE_MEMORY_GET_FD_INFO_KHR,
2304             .memory     = f->mem[i],
2305             .handleType = VK_EXTERNAL_MEMORY_HANDLE_TYPE_DMA_BUF_BIT_EXT,
2306         };
2307
2308         ret = pfn_vkGetMemoryFdKHR(hwctx->act_dev, &export_info,
2309                                    &drm_desc->objects[i].fd);
2310         if (ret != VK_SUCCESS) {
2311             av_log(hwfc, AV_LOG_ERROR, "Unable to export the image as a FD!\n");
2312             err = AVERROR_EXTERNAL;
2313             goto end;
2314         }
2315
2316         drm_desc->nb_objects++;
2317         drm_desc->objects[i].size = f->size[i];
2318         drm_desc->objects[i].format_modifier = drm_mod.drmFormatModifier;
2319     }
2320
2321     drm_desc->nb_layers = planes;
2322     for (int i = 0; i < drm_desc->nb_layers; i++) {
2323         VkSubresourceLayout layout;
2324         VkImageSubresource sub = {
2325             .aspectMask = p->extensions & EXT_DRM_MODIFIER_FLAGS ?
2326                           VK_IMAGE_ASPECT_MEMORY_PLANE_0_BIT_EXT :
2327                           VK_IMAGE_ASPECT_COLOR_BIT,
2328         };
2329         VkFormat plane_vkfmt = av_vkfmt_from_pixfmt(hwfc->sw_format)[i];
2330
2331         drm_desc->layers[i].format    = vulkan_fmt_to_drm(plane_vkfmt);
2332         drm_desc->layers[i].nb_planes = 1;
2333
2334         if (drm_desc->layers[i].format == DRM_FORMAT_INVALID) {
2335             av_log(hwfc, AV_LOG_ERROR, "Cannot map to DRM layer, unsupported!\n");
2336             err = AVERROR_PATCHWELCOME;
2337             goto end;
2338         }
2339
2340         drm_desc->layers[i].planes[0].object_index = FFMIN(i, drm_desc->nb_objects - 1);
2341
2342         if (f->tiling != VK_IMAGE_TILING_OPTIMAL)
2343             continue;
2344
2345         vkGetImageSubresourceLayout(hwctx->act_dev, f->img[i], &sub, &layout);
2346         drm_desc->layers[i].planes[0].offset       = layout.offset;
2347         drm_desc->layers[i].planes[0].pitch        = layout.rowPitch;
2348     }
2349
2350     dst->width   = src->width;
2351     dst->height  = src->height;
2352     dst->data[0] = (uint8_t *)drm_desc;
2353
2354     av_log(hwfc, AV_LOG_VERBOSE, "Mapped AVVkFrame to a DRM object!\n");
2355
2356     return 0;
2357
2358 end:
2359     av_free(drm_desc);
2360     return err;
2361 }
2362
2363 #if CONFIG_VAAPI
2364 static int vulkan_map_to_vaapi(AVHWFramesContext *hwfc, AVFrame *dst,
2365                                const AVFrame *src, int flags)
2366 {
2367     int err;
2368     AVFrame *tmp = av_frame_alloc();
2369     if (!tmp)
2370         return AVERROR(ENOMEM);
2371
2372     tmp->format = AV_PIX_FMT_DRM_PRIME;
2373
2374     err = vulkan_map_to_drm(hwfc, tmp, src, flags);
2375     if (err < 0)
2376         goto fail;
2377
2378     err = av_hwframe_map(dst, tmp, flags);
2379     if (err < 0)
2380         goto fail;
2381
2382     err = ff_hwframe_map_replace(dst, src);
2383
2384 fail:
2385     av_frame_free(&tmp);
2386     return err;
2387 }
2388 #endif
2389 #endif
2390
2391 static int vulkan_map_from(AVHWFramesContext *hwfc, AVFrame *dst,
2392                            const AVFrame *src, int flags)
2393 {
2394     av_unused VulkanDevicePriv *p = hwfc->device_ctx->internal->priv;
2395
2396     switch (dst->format) {
2397 #if CONFIG_LIBDRM
2398     case AV_PIX_FMT_DRM_PRIME:
2399         if (p->extensions & EXT_EXTERNAL_DMABUF_MEMORY)
2400             return vulkan_map_to_drm(hwfc, dst, src, flags);
2401 #if CONFIG_VAAPI
2402     case AV_PIX_FMT_VAAPI:
2403         if (p->extensions & EXT_EXTERNAL_DMABUF_MEMORY)
2404             return vulkan_map_to_vaapi(hwfc, dst, src, flags);
2405 #endif
2406 #endif
2407     default:
2408         return vulkan_map_frame_to_mem(hwfc, dst, src, flags);
2409     }
2410 }
2411
2412 typedef struct ImageBuffer {
2413     VkBuffer buf;
2414     VkDeviceMemory mem;
2415     VkMemoryPropertyFlagBits flags;
2416 } ImageBuffer;
2417
2418 static void free_buf(AVHWDeviceContext *ctx, ImageBuffer *buf)
2419 {
2420     AVVulkanDeviceContext *hwctx = ctx->hwctx;
2421     if (!buf)
2422         return;
2423
2424     vkDestroyBuffer(hwctx->act_dev, buf->buf, hwctx->alloc);
2425     vkFreeMemory(hwctx->act_dev, buf->mem, hwctx->alloc);
2426 }
2427
2428 static int create_buf(AVHWDeviceContext *ctx, ImageBuffer *buf, int height,
2429                       int *stride, VkBufferUsageFlags usage,
2430                       VkMemoryPropertyFlagBits flags, void *create_pnext,
2431                       void *alloc_pnext)
2432 {
2433     int err;
2434     VkResult ret;
2435     VkMemoryRequirements req;
2436     AVVulkanDeviceContext *hwctx = ctx->hwctx;
2437     VulkanDevicePriv *p = ctx->internal->priv;
2438
2439     VkBufferCreateInfo buf_spawn = {
2440         .sType       = VK_STRUCTURE_TYPE_BUFFER_CREATE_INFO,
2441         .pNext       = create_pnext,
2442         .usage       = usage,
2443         .sharingMode = VK_SHARING_MODE_EXCLUSIVE,
2444     };
2445
2446     *stride = FFALIGN(*stride, p->props.limits.optimalBufferCopyRowPitchAlignment);
2447     buf_spawn.size = height*(*stride);
2448
2449     ret = vkCreateBuffer(hwctx->act_dev, &buf_spawn, NULL, &buf->buf);
2450     if (ret != VK_SUCCESS) {
2451         av_log(ctx, AV_LOG_ERROR, "Failed to create buffer: %s\n",
2452                vk_ret2str(ret));
2453         return AVERROR_EXTERNAL;
2454     }
2455
2456     vkGetBufferMemoryRequirements(hwctx->act_dev, buf->buf, &req);
2457
2458     err = alloc_mem(ctx, &req, flags, alloc_pnext, &buf->flags, &buf->mem);
2459     if (err)
2460         return err;
2461
2462     ret = vkBindBufferMemory(hwctx->act_dev, buf->buf, buf->mem, 0);
2463     if (ret != VK_SUCCESS) {
2464         av_log(ctx, AV_LOG_ERROR, "Failed to bind memory to buffer: %s\n",
2465                vk_ret2str(ret));
2466         free_buf(ctx, buf);
2467         return AVERROR_EXTERNAL;
2468     }
2469
2470     return 0;
2471 }
2472
2473 static int map_buffers(AVHWDeviceContext *ctx, ImageBuffer *buf, uint8_t *mem[],
2474                        int nb_buffers, int invalidate)
2475 {
2476     VkResult ret;
2477     AVVulkanDeviceContext *hwctx = ctx->hwctx;
2478     VkMappedMemoryRange invalidate_ctx[AV_NUM_DATA_POINTERS];
2479     int invalidate_count = 0;
2480
2481     for (int i = 0; i < nb_buffers; i++) {
2482         ret = vkMapMemory(hwctx->act_dev, buf[i].mem, 0,
2483                           VK_WHOLE_SIZE, 0, (void **)&mem[i]);
2484         if (ret != VK_SUCCESS) {
2485             av_log(ctx, AV_LOG_ERROR, "Failed to map buffer memory: %s\n",
2486                    vk_ret2str(ret));
2487             return AVERROR_EXTERNAL;
2488         }
2489     }
2490
2491     if (!invalidate)
2492         return 0;
2493
2494     for (int i = 0; i < nb_buffers; i++) {
2495         const VkMappedMemoryRange ival_buf = {
2496             .sType  = VK_STRUCTURE_TYPE_MAPPED_MEMORY_RANGE,
2497             .memory = buf[i].mem,
2498             .size   = VK_WHOLE_SIZE,
2499         };
2500         if (buf[i].flags & VK_MEMORY_PROPERTY_HOST_COHERENT_BIT)
2501             continue;
2502         invalidate_ctx[invalidate_count++] = ival_buf;
2503     }
2504
2505     if (invalidate_count) {
2506         ret = vkInvalidateMappedMemoryRanges(hwctx->act_dev, invalidate_count,
2507                                              invalidate_ctx);
2508         if (ret != VK_SUCCESS)
2509             av_log(ctx, AV_LOG_WARNING, "Failed to invalidate memory: %s\n",
2510                    vk_ret2str(ret));
2511     }
2512
2513     return 0;
2514 }
2515
2516 static int unmap_buffers(AVHWDeviceContext *ctx, ImageBuffer *buf,
2517                          int nb_buffers, int flush)
2518 {
2519     int err = 0;
2520     VkResult ret;
2521     AVVulkanDeviceContext *hwctx = ctx->hwctx;
2522     VkMappedMemoryRange flush_ctx[AV_NUM_DATA_POINTERS];
2523     int flush_count = 0;
2524
2525     if (flush) {
2526         for (int i = 0; i < nb_buffers; i++) {
2527             const VkMappedMemoryRange flush_buf = {
2528                 .sType  = VK_STRUCTURE_TYPE_MAPPED_MEMORY_RANGE,
2529                 .memory = buf[i].mem,
2530                 .size   = VK_WHOLE_SIZE,
2531             };
2532             if (buf[i].flags & VK_MEMORY_PROPERTY_HOST_COHERENT_BIT)
2533                 continue;
2534             flush_ctx[flush_count++] = flush_buf;
2535         }
2536     }
2537
2538     if (flush_count) {
2539         ret = vkFlushMappedMemoryRanges(hwctx->act_dev, flush_count, flush_ctx);
2540         if (ret != VK_SUCCESS) {
2541             av_log(ctx, AV_LOG_ERROR, "Failed to flush memory: %s\n",
2542                     vk_ret2str(ret));
2543             err = AVERROR_EXTERNAL; /* We still want to try to unmap them */
2544         }
2545     }
2546
2547     for (int i = 0; i < nb_buffers; i++)
2548         vkUnmapMemory(hwctx->act_dev, buf[i].mem);
2549
2550     return err;
2551 }
2552
2553 static int transfer_image_buf(AVHWDeviceContext *ctx, AVVkFrame *frame,
2554                               ImageBuffer *buffer, const int *buf_stride, int w,
2555                               int h, enum AVPixelFormat pix_fmt, int to_buf)
2556 {
2557     VkResult ret;
2558     AVVulkanDeviceContext *hwctx = ctx->hwctx;
2559     VulkanDevicePriv *s = ctx->internal->priv;
2560
2561     int bar_num = 0;
2562     VkPipelineStageFlagBits sem_wait_dst[AV_NUM_DATA_POINTERS];
2563
2564     const int planes = av_pix_fmt_count_planes(pix_fmt);
2565     const AVPixFmtDescriptor *desc = av_pix_fmt_desc_get(pix_fmt);
2566
2567     VkCommandBufferBeginInfo cmd_start = {
2568         .sType = VK_STRUCTURE_TYPE_COMMAND_BUFFER_BEGIN_INFO,
2569         .flags = VK_COMMAND_BUFFER_USAGE_ONE_TIME_SUBMIT_BIT,
2570     };
2571
2572     VkImageMemoryBarrier img_bar[AV_NUM_DATA_POINTERS] = { 0 };
2573
2574     VkSubmitInfo s_info = {
2575         .sType                = VK_STRUCTURE_TYPE_SUBMIT_INFO,
2576         .commandBufferCount   = 1,
2577         .pCommandBuffers      = &s->cmd.buf,
2578         .pSignalSemaphores    = &frame->sem,
2579         .pWaitSemaphores      = &frame->sem,
2580         .pWaitDstStageMask    = sem_wait_dst,
2581         .signalSemaphoreCount = 1,
2582         .waitSemaphoreCount   = 1,
2583     };
2584
2585     ret = vkBeginCommandBuffer(s->cmd.buf, &cmd_start);
2586     if (ret != VK_SUCCESS) {
2587         av_log(ctx, AV_LOG_ERROR, "Unable to init command buffer: %s\n",
2588                vk_ret2str(ret));
2589         return AVERROR_EXTERNAL;
2590     }
2591
2592     /* Change the image layout to something more optimal for transfers */
2593     for (int i = 0; i < planes; i++) {
2594         VkImageLayout new_layout = to_buf ? VK_IMAGE_LAYOUT_TRANSFER_SRC_OPTIMAL :
2595                                             VK_IMAGE_LAYOUT_TRANSFER_DST_OPTIMAL;
2596         VkAccessFlags new_access = to_buf ? VK_ACCESS_TRANSFER_READ_BIT :
2597                                             VK_ACCESS_TRANSFER_WRITE_BIT;
2598
2599         sem_wait_dst[i] = VK_PIPELINE_STAGE_TOP_OF_PIPE_BIT;
2600
2601         /* If the layout matches and we have read access skip the barrier */
2602         if ((frame->layout[i] == new_layout) && (frame->access[i] & new_access))
2603             continue;
2604
2605         img_bar[bar_num].sType = VK_STRUCTURE_TYPE_IMAGE_MEMORY_BARRIER;
2606         img_bar[bar_num].srcAccessMask = 0x0;
2607         img_bar[bar_num].dstAccessMask = new_access;
2608         img_bar[bar_num].oldLayout = frame->layout[i];
2609         img_bar[bar_num].newLayout = new_layout;
2610         img_bar[bar_num].srcQueueFamilyIndex = VK_QUEUE_FAMILY_IGNORED;
2611         img_bar[bar_num].dstQueueFamilyIndex = VK_QUEUE_FAMILY_IGNORED;
2612         img_bar[bar_num].image = frame->img[i];
2613         img_bar[bar_num].subresourceRange.levelCount = 1;
2614         img_bar[bar_num].subresourceRange.layerCount = 1;
2615         img_bar[bar_num].subresourceRange.aspectMask = VK_IMAGE_ASPECT_COLOR_BIT;
2616
2617         frame->layout[i] = img_bar[bar_num].newLayout;
2618         frame->access[i] = img_bar[bar_num].dstAccessMask;
2619
2620         bar_num++;
2621     }
2622
2623     if (bar_num)
2624         vkCmdPipelineBarrier(s->cmd.buf, VK_PIPELINE_STAGE_TOP_OF_PIPE_BIT,
2625                              VK_PIPELINE_STAGE_TRANSFER_BIT, 0,
2626                              0, NULL, 0, NULL, bar_num, img_bar);
2627
2628     /* Schedule a copy for each plane */
2629     for (int i = 0; i < planes; i++) {
2630         const int p_w = i > 0 ? AV_CEIL_RSHIFT(w, desc->log2_chroma_w) : w;
2631         const int p_h = i > 0 ? AV_CEIL_RSHIFT(h, desc->log2_chroma_h) : h;
2632         VkBufferImageCopy buf_reg = {
2633             .bufferOffset = 0,
2634             /* Buffer stride isn't in bytes, it's in samples, the implementation
2635              * uses the image's VkFormat to know how many bytes per sample
2636              * the buffer has. So we have to convert by dividing. Stupid.
2637              * Won't work with YUVA or other planar formats with alpha. */
2638             .bufferRowLength = buf_stride[i] / desc->comp[i].step,
2639             .bufferImageHeight = p_h,
2640             .imageSubresource.layerCount = 1,
2641             .imageSubresource.aspectMask = VK_IMAGE_ASPECT_COLOR_BIT,
2642             .imageOffset = { 0, 0, 0, },
2643             .imageExtent = { p_w, p_h, 1, },
2644         };
2645
2646         if (to_buf)
2647             vkCmdCopyImageToBuffer(s->cmd.buf, frame->img[i], frame->layout[i],
2648                                    buffer[i].buf, 1, &buf_reg);
2649         else
2650             vkCmdCopyBufferToImage(s->cmd.buf, buffer[i].buf, frame->img[i],
2651                                    frame->layout[i], 1, &buf_reg);
2652     }
2653
2654     ret = vkEndCommandBuffer(s->cmd.buf);
2655     if (ret != VK_SUCCESS) {
2656         av_log(ctx, AV_LOG_ERROR, "Unable to finish command buffer: %s\n",
2657                vk_ret2str(ret));
2658         return AVERROR_EXTERNAL;
2659     }
2660
2661     /* Wait for the download/upload to finish if uploading, otherwise the
2662      * semaphore will take care of synchronization when uploading */
2663     ret = vkQueueSubmit(s->cmd.queue, 1, &s_info, s->cmd.fence);
2664     if (ret != VK_SUCCESS) {
2665         av_log(ctx, AV_LOG_ERROR, "Unable to submit command buffer: %s\n",
2666                vk_ret2str(ret));
2667         return AVERROR_EXTERNAL;
2668     } else {
2669         vkWaitForFences(hwctx->act_dev, 1, &s->cmd.fence, VK_TRUE, UINT64_MAX);
2670         vkResetFences(hwctx->act_dev, 1, &s->cmd.fence);
2671     }
2672
2673     return 0;
2674 }
2675
2676 /* Technically we can use VK_EXT_external_memory_host to upload and download,
2677  * however the alignment requirements make this unfeasible as both the pointer
2678  * and the size of each plane need to be aligned to the minimum alignment
2679  * requirement, which on all current implementations (anv, radv) is 4096.
2680  * If the requirement gets relaxed (unlikely) this can easily be implemented. */
2681 static int vulkan_transfer_data_from_mem(AVHWFramesContext *hwfc, AVFrame *dst,
2682                                          const AVFrame *src)
2683 {
2684     int err = 0;
2685     AVFrame tmp;
2686     AVVkFrame *f = (AVVkFrame *)dst->data[0];
2687     AVHWDeviceContext *dev_ctx = hwfc->device_ctx;
2688     ImageBuffer buf[AV_NUM_DATA_POINTERS] = { { 0 } };
2689     const int planes = av_pix_fmt_count_planes(src->format);
2690     int log2_chroma = av_pix_fmt_desc_get(src->format)->log2_chroma_h;
2691
2692     if ((src->format != AV_PIX_FMT_NONE && !av_vkfmt_from_pixfmt(src->format))) {
2693         av_log(hwfc, AV_LOG_ERROR, "Unsupported source pixel format!\n");
2694         return AVERROR(EINVAL);
2695     }
2696
2697     if (src->width > hwfc->width || src->height > hwfc->height)
2698         return AVERROR(EINVAL);
2699
2700     /* For linear, host visiable images */
2701     if (f->tiling == VK_IMAGE_TILING_LINEAR &&
2702         f->flags & VK_MEMORY_PROPERTY_HOST_VISIBLE_BIT) {
2703         AVFrame *map = av_frame_alloc();
2704         if (!map)
2705             return AVERROR(ENOMEM);
2706         map->format = src->format;
2707
2708         err = vulkan_map_frame_to_mem(hwfc, map, dst, AV_HWFRAME_MAP_WRITE);
2709         if (err)
2710             goto end;
2711
2712         err = av_frame_copy(map, src);
2713         av_frame_free(&map);
2714         goto end;
2715     }
2716
2717     /* Create buffers */
2718     for (int i = 0; i < planes; i++) {
2719         int h = src->height;
2720         int p_height = i > 0 ? AV_CEIL_RSHIFT(h, log2_chroma) : h;
2721
2722         tmp.linesize[i] = FFABS(src->linesize[i]);
2723         err = create_buf(dev_ctx, &buf[i], p_height,
2724                          &tmp.linesize[i], VK_BUFFER_USAGE_TRANSFER_SRC_BIT,
2725                          VK_MEMORY_PROPERTY_HOST_VISIBLE_BIT, NULL, NULL);
2726         if (err)
2727             goto end;
2728     }
2729
2730     /* Map, copy image to buffer, unmap */
2731     if ((err = map_buffers(dev_ctx, buf, tmp.data, planes, 0)))
2732         goto end;
2733
2734     av_image_copy(tmp.data, tmp.linesize, (const uint8_t **)src->data,
2735                   src->linesize, src->format, src->width, src->height);
2736
2737     if ((err = unmap_buffers(dev_ctx, buf, planes, 1)))
2738         goto end;
2739
2740     /* Copy buffers to image */
2741     err = transfer_image_buf(dev_ctx, f, buf, tmp.linesize,
2742                              src->width, src->height, src->format, 0);
2743
2744 end:
2745     for (int i = 0; i < planes; i++)
2746         free_buf(dev_ctx, &buf[i]);
2747
2748     return err;
2749 }
2750
2751 static int vulkan_transfer_data_to(AVHWFramesContext *hwfc, AVFrame *dst,
2752                                         const AVFrame *src)
2753 {
2754     av_unused VulkanDevicePriv *p = hwfc->device_ctx->internal->priv;
2755
2756     switch (src->format) {
2757 #if CONFIG_CUDA
2758     case AV_PIX_FMT_CUDA:
2759         if ((p->extensions & EXT_EXTERNAL_FD_MEMORY) &&
2760             (p->extensions & EXT_EXTERNAL_FD_SEM))
2761             return vulkan_transfer_data_from_cuda(hwfc, dst, src);
2762 #endif
2763     default:
2764         if (src->hw_frames_ctx)
2765             return AVERROR(ENOSYS);
2766         else
2767             return vulkan_transfer_data_from_mem(hwfc, dst, src);
2768     }
2769 }
2770
2771 #if CONFIG_CUDA
2772 static int vulkan_transfer_data_to_cuda(AVHWFramesContext *hwfc, AVFrame *dst,
2773                                       const AVFrame *src)
2774 {
2775     int err;
2776     VkResult ret;
2777     CUcontext dummy;
2778     AVVkFrame *dst_f;
2779     AVVkFrameInternal *dst_int;
2780     const int planes = av_pix_fmt_count_planes(hwfc->sw_format);
2781     const AVPixFmtDescriptor *desc = av_pix_fmt_desc_get(hwfc->sw_format);
2782
2783     AVHWFramesContext *cuda_fc = (AVHWFramesContext*)dst->hw_frames_ctx->data;
2784     AVHWDeviceContext *cuda_cu = cuda_fc->device_ctx;
2785     AVCUDADeviceContext *cuda_dev = cuda_cu->hwctx;
2786     AVCUDADeviceContextInternal *cu_internal = cuda_dev->internal;
2787     CudaFunctions *cu = cu_internal->cuda_dl;
2788
2789     ret = CHECK_CU(cu->cuCtxPushCurrent(cuda_dev->cuda_ctx));
2790     if (ret < 0) {
2791         err = AVERROR_EXTERNAL;
2792         goto fail;
2793     }
2794
2795     dst_f = (AVVkFrame *)src->data[0];
2796
2797     err = vulkan_export_to_cuda(hwfc, dst->hw_frames_ctx, src);
2798     if (err < 0) {
2799         goto fail;
2800     }
2801
2802     dst_int = dst_f->internal;
2803
2804     for (int i = 0; i < planes; i++) {
2805         CUDA_MEMCPY2D cpy = {
2806             .dstMemoryType = CU_MEMORYTYPE_DEVICE,
2807             .dstDevice     = (CUdeviceptr)dst->data[i],
2808             .dstPitch      = dst->linesize[i],
2809             .dstY          = 0,
2810
2811             .srcMemoryType = CU_MEMORYTYPE_ARRAY,
2812             .srcArray      = dst_int->cu_array[i],
2813             .WidthInBytes  = (i > 0 ? AV_CEIL_RSHIFT(hwfc->width, desc->log2_chroma_w)
2814                                     : hwfc->width) * desc->comp[i].step,
2815             .Height        = i > 0 ? AV_CEIL_RSHIFT(hwfc->height, desc->log2_chroma_h)
2816                                    : hwfc->height,
2817         };
2818
2819         ret = CHECK_CU(cu->cuMemcpy2DAsync(&cpy, cuda_dev->stream));
2820         if (ret < 0) {
2821             err = AVERROR_EXTERNAL;
2822             goto fail;
2823         }
2824     }
2825
2826     CHECK_CU(cu->cuCtxPopCurrent(&dummy));
2827
2828     av_log(hwfc, AV_LOG_VERBOSE, "Transfered Vulkan image to CUDA!\n");
2829
2830     return 0;
2831
2832 fail:
2833     CHECK_CU(cu->cuCtxPopCurrent(&dummy));
2834     vulkan_free_internal(dst_int);
2835     dst_f->internal = NULL;
2836     av_buffer_unref(&dst->buf[0]);
2837     return err;
2838 }
2839 #endif
2840
2841 static int vulkan_transfer_data_to_mem(AVHWFramesContext *hwfc, AVFrame *dst,
2842                                        const AVFrame *src)
2843 {
2844     int err = 0;
2845     AVFrame tmp;
2846     AVVkFrame *f = (AVVkFrame *)src->data[0];
2847     AVHWDeviceContext *dev_ctx = hwfc->device_ctx;
2848     ImageBuffer buf[AV_NUM_DATA_POINTERS] = { { 0 } };
2849     const int planes = av_pix_fmt_count_planes(dst->format);
2850     int log2_chroma = av_pix_fmt_desc_get(dst->format)->log2_chroma_h;
2851
2852     if (dst->width > hwfc->width || dst->height > hwfc->height)
2853         return AVERROR(EINVAL);
2854
2855     /* For linear, host visiable images */
2856     if (f->tiling == VK_IMAGE_TILING_LINEAR &&
2857         f->flags & VK_MEMORY_PROPERTY_HOST_VISIBLE_BIT) {
2858         AVFrame *map = av_frame_alloc();
2859         if (!map)
2860             return AVERROR(ENOMEM);
2861         map->format = dst->format;
2862
2863         err = vulkan_map_frame_to_mem(hwfc, map, src, AV_HWFRAME_MAP_READ);
2864         if (err)
2865             return err;
2866
2867         err = av_frame_copy(dst, map);
2868         av_frame_free(&map);
2869         return err;
2870     }
2871
2872     /* Create buffers */
2873     for (int i = 0; i < planes; i++) {
2874         int h = dst->height;
2875         int p_height = i > 0 ? AV_CEIL_RSHIFT(h, log2_chroma) : h;
2876
2877         tmp.linesize[i] = FFABS(dst->linesize[i]);
2878         err = create_buf(dev_ctx, &buf[i], p_height,
2879                          &tmp.linesize[i], VK_BUFFER_USAGE_TRANSFER_DST_BIT,
2880                          VK_MEMORY_PROPERTY_HOST_VISIBLE_BIT, NULL, NULL);
2881     }
2882
2883     /* Copy image to buffer */
2884     if ((err = transfer_image_buf(dev_ctx, f, buf, tmp.linesize,
2885                                   dst->width, dst->height, dst->format, 1)))
2886         goto end;
2887
2888     /* Map, copy buffer to frame, unmap */
2889     if ((err = map_buffers(dev_ctx, buf, tmp.data, planes, 1)))
2890         goto end;
2891
2892     av_image_copy(dst->data, dst->linesize, (const uint8_t **)tmp.data,
2893                   tmp.linesize, dst->format, dst->width, dst->height);
2894
2895     err = unmap_buffers(dev_ctx, buf, planes, 0);
2896
2897 end:
2898     for (int i = 0; i < planes; i++)
2899         free_buf(dev_ctx, &buf[i]);
2900
2901     return err;
2902 }
2903
2904 static int vulkan_transfer_data_from(AVHWFramesContext *hwfc, AVFrame *dst,
2905                                      const AVFrame *src)
2906 {
2907     av_unused VulkanDevicePriv *p = hwfc->device_ctx->internal->priv;
2908
2909     switch (dst->format) {
2910 #if CONFIG_CUDA
2911     case AV_PIX_FMT_CUDA:
2912         if ((p->extensions & EXT_EXTERNAL_FD_MEMORY) &&
2913             (p->extensions & EXT_EXTERNAL_FD_SEM))
2914             return vulkan_transfer_data_to_cuda(hwfc, dst, src);
2915 #endif
2916     default:
2917         if (dst->hw_frames_ctx)
2918             return AVERROR(ENOSYS);
2919         else
2920             return vulkan_transfer_data_to_mem(hwfc, dst, src);
2921     }
2922 }
2923
2924 AVVkFrame *av_vk_frame_alloc(void)
2925 {
2926     return av_mallocz(sizeof(AVVkFrame));
2927 }
2928
2929 const HWContextType ff_hwcontext_type_vulkan = {
2930     .type                   = AV_HWDEVICE_TYPE_VULKAN,
2931     .name                   = "Vulkan",
2932
2933     .device_hwctx_size      = sizeof(AVVulkanDeviceContext),
2934     .device_priv_size       = sizeof(VulkanDevicePriv),
2935     .frames_hwctx_size      = sizeof(AVVulkanFramesContext),
2936     .frames_priv_size       = sizeof(VulkanFramesPriv),
2937
2938     .device_init            = &vulkan_device_init,
2939     .device_create          = &vulkan_device_create,
2940     .device_derive          = &vulkan_device_derive,
2941
2942     .frames_get_constraints = &vulkan_frames_get_constraints,
2943     .frames_init            = vulkan_frames_init,
2944     .frames_get_buffer      = vulkan_get_buffer,
2945     .frames_uninit          = vulkan_frames_uninit,
2946
2947     .transfer_get_formats   = vulkan_transfer_get_formats,
2948     .transfer_data_to       = vulkan_transfer_data_to,
2949     .transfer_data_from     = vulkan_transfer_data_from,
2950
2951     .map_to                 = vulkan_map_to,
2952     .map_from               = vulkan_map_from,
2953
2954     .pix_fmts = (const enum AVPixelFormat []) {
2955         AV_PIX_FMT_VULKAN,
2956         AV_PIX_FMT_NONE
2957     },
2958 };