]> git.sesse.net Git - ffmpeg/blob - libavutil/hwcontext_vulkan.c
hwcontext_vulkan: use the maximum amount of queues for each family
[ffmpeg] / libavutil / hwcontext_vulkan.c
1 /*
2  * This file is part of FFmpeg.
3  *
4  * FFmpeg is free software; you can redistribute it and/or
5  * modify it under the terms of the GNU Lesser General Public
6  * License as published by the Free Software Foundation; either
7  * version 2.1 of the License, or (at your option) any later version.
8  *
9  * FFmpeg is distributed in the hope that it will be useful,
10  * but WITHOUT ANY WARRANTY; without even the implied warranty of
11  * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
12  * Lesser General Public License for more details.
13  *
14  * You should have received a copy of the GNU Lesser General Public
15  * License along with FFmpeg; if not, write to the Free Software
16  * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
17  */
18
19 #include "config.h"
20 #include "pixdesc.h"
21 #include "avstring.h"
22 #include "imgutils.h"
23 #include "hwcontext.h"
24 #include "hwcontext_internal.h"
25 #include "hwcontext_vulkan.h"
26
27 #if CONFIG_LIBDRM
28 #include <unistd.h>
29 #include <xf86drm.h>
30 #include <drm_fourcc.h>
31 #include "hwcontext_drm.h"
32 #if CONFIG_VAAPI
33 #include <va/va_drmcommon.h>
34 #include "hwcontext_vaapi.h"
35 #endif
36 #endif
37
38 #if CONFIG_CUDA
39 #include "hwcontext_cuda_internal.h"
40 #include "cuda_check.h"
41 #define CHECK_CU(x) FF_CUDA_CHECK_DL(cuda_cu, cu, x)
42 #endif
43
44 typedef struct VulkanExecCtx {
45     VkCommandPool pool;
46     VkCommandBuffer buf;
47     VkQueue queue;
48     VkFence fence;
49 } VulkanExecCtx;
50
51 typedef struct VulkanDevicePriv {
52     /* Properties */
53     VkPhysicalDeviceProperties props;
54     VkPhysicalDeviceMemoryProperties mprops;
55
56     /* Queues */
57     uint32_t qfs[3];
58     int num_qfs;
59
60     /* Debug callback */
61     VkDebugUtilsMessengerEXT debug_ctx;
62
63     /* Image uploading */
64     VulkanExecCtx cmd;
65
66     /* Extensions */
67     uint64_t extensions;
68
69     /* Settings */
70     int use_linear_images;
71
72     /* Nvidia */
73     int dev_is_nvidia;
74 } VulkanDevicePriv;
75
76 typedef struct VulkanFramesPriv {
77     VulkanExecCtx cmd;
78 } VulkanFramesPriv;
79
80 typedef struct AVVkFrameInternal {
81 #if CONFIG_CUDA
82     /* Importing external memory into cuda is really expensive so we keep the
83      * memory imported all the time */
84     AVBufferRef *cuda_fc_ref; /* Need to keep it around for uninit */
85     CUexternalMemory ext_mem[AV_NUM_DATA_POINTERS];
86     CUmipmappedArray cu_mma[AV_NUM_DATA_POINTERS];
87     CUarray cu_array[AV_NUM_DATA_POINTERS];
88     CUexternalSemaphore cu_sem[AV_NUM_DATA_POINTERS];
89 #endif
90 } AVVkFrameInternal;
91
92 #define VK_LOAD_PFN(inst, name) PFN_##name pfn_##name = (PFN_##name)           \
93                                               vkGetInstanceProcAddr(inst, #name)
94
95 #define DEFAULT_USAGE_FLAGS (VK_IMAGE_USAGE_SAMPLED_BIT      |                 \
96                              VK_IMAGE_USAGE_STORAGE_BIT      |                 \
97                              VK_IMAGE_USAGE_TRANSFER_SRC_BIT |                 \
98                              VK_IMAGE_USAGE_TRANSFER_DST_BIT)
99
100 #define ADD_VAL_TO_LIST(list, count, val)                                      \
101     do {                                                                       \
102         list = av_realloc_array(list, sizeof(*list), ++count);                 \
103         if (!list) {                                                           \
104             err = AVERROR(ENOMEM);                                             \
105             goto fail;                                                         \
106         }                                                                      \
107         list[count - 1] = av_strdup(val);                                      \
108         if (!list[count - 1]) {                                                \
109             err = AVERROR(ENOMEM);                                             \
110             goto fail;                                                         \
111         }                                                                      \
112     } while(0)
113
114 static const struct {
115     enum AVPixelFormat pixfmt;
116     const VkFormat vkfmts[3];
117 } vk_pixfmt_map[] = {
118     { AV_PIX_FMT_GRAY8,   { VK_FORMAT_R8_UNORM } },
119     { AV_PIX_FMT_GRAY16,  { VK_FORMAT_R16_UNORM } },
120     { AV_PIX_FMT_GRAYF32, { VK_FORMAT_R32_SFLOAT } },
121
122     { AV_PIX_FMT_NV12, { VK_FORMAT_R8_UNORM, VK_FORMAT_R8G8_UNORM } },
123     { AV_PIX_FMT_P010, { VK_FORMAT_R16_UNORM, VK_FORMAT_R16G16_UNORM } },
124     { AV_PIX_FMT_P016, { VK_FORMAT_R16_UNORM, VK_FORMAT_R16G16_UNORM } },
125
126     { AV_PIX_FMT_YUV420P, { VK_FORMAT_R8_UNORM, VK_FORMAT_R8_UNORM, VK_FORMAT_R8_UNORM } },
127     { AV_PIX_FMT_YUV422P, { VK_FORMAT_R8_UNORM, VK_FORMAT_R8_UNORM, VK_FORMAT_R8_UNORM } },
128     { AV_PIX_FMT_YUV444P, { VK_FORMAT_R8_UNORM, VK_FORMAT_R8_UNORM, VK_FORMAT_R8_UNORM } },
129
130     { AV_PIX_FMT_YUV420P16, { VK_FORMAT_R16_UNORM, VK_FORMAT_R16_UNORM, VK_FORMAT_R16_UNORM } },
131     { AV_PIX_FMT_YUV422P16, { VK_FORMAT_R16_UNORM, VK_FORMAT_R16_UNORM, VK_FORMAT_R16_UNORM } },
132     { AV_PIX_FMT_YUV444P16, { VK_FORMAT_R16_UNORM, VK_FORMAT_R16_UNORM, VK_FORMAT_R16_UNORM } },
133
134     { AV_PIX_FMT_ABGR,   { VK_FORMAT_A8B8G8R8_UNORM_PACK32 } },
135     { AV_PIX_FMT_BGRA,   { VK_FORMAT_B8G8R8A8_UNORM } },
136     { AV_PIX_FMT_RGBA,   { VK_FORMAT_R8G8B8A8_UNORM } },
137     { AV_PIX_FMT_RGB24,  { VK_FORMAT_R8G8B8_UNORM } },
138     { AV_PIX_FMT_BGR24,  { VK_FORMAT_B8G8R8_UNORM } },
139     { AV_PIX_FMT_RGB48,  { VK_FORMAT_R16G16B16_UNORM } },
140     { AV_PIX_FMT_RGBA64, { VK_FORMAT_R16G16B16A16_UNORM } },
141     { AV_PIX_FMT_RGB565, { VK_FORMAT_R5G6B5_UNORM_PACK16 } },
142     { AV_PIX_FMT_BGR565, { VK_FORMAT_B5G6R5_UNORM_PACK16 } },
143     { AV_PIX_FMT_BGR0,   { VK_FORMAT_B8G8R8A8_UNORM } },
144     { AV_PIX_FMT_0BGR,   { VK_FORMAT_A8B8G8R8_UNORM_PACK32 } },
145     { AV_PIX_FMT_RGB0,   { VK_FORMAT_R8G8B8A8_UNORM } },
146
147     { AV_PIX_FMT_GBRPF32, { VK_FORMAT_R32_SFLOAT, VK_FORMAT_R32_SFLOAT, VK_FORMAT_R32_SFLOAT } },
148 };
149
150 const VkFormat *av_vkfmt_from_pixfmt(enum AVPixelFormat p)
151 {
152     for (enum AVPixelFormat i = 0; i < FF_ARRAY_ELEMS(vk_pixfmt_map); i++)
153         if (vk_pixfmt_map[i].pixfmt == p)
154             return vk_pixfmt_map[i].vkfmts;
155     return NULL;
156 }
157
158 static int pixfmt_is_supported(AVVulkanDeviceContext *hwctx, enum AVPixelFormat p,
159                                int linear)
160 {
161     const VkFormat *fmt = av_vkfmt_from_pixfmt(p);
162     int planes = av_pix_fmt_count_planes(p);
163
164     if (!fmt)
165         return 0;
166
167     for (int i = 0; i < planes; i++) {
168         VkFormatFeatureFlags flags;
169         VkFormatProperties2 prop = {
170             .sType = VK_STRUCTURE_TYPE_FORMAT_PROPERTIES_2,
171         };
172         vkGetPhysicalDeviceFormatProperties2(hwctx->phys_dev, fmt[i], &prop);
173         flags = linear ? prop.formatProperties.linearTilingFeatures :
174                          prop.formatProperties.optimalTilingFeatures;
175         if (!(flags & DEFAULT_USAGE_FLAGS))
176             return 0;
177     }
178
179     return 1;
180 }
181
182 enum VulkanExtensions {
183     EXT_EXTERNAL_DMABUF_MEMORY = 1ULL <<  0, /* VK_EXT_external_memory_dma_buf */
184     EXT_DRM_MODIFIER_FLAGS     = 1ULL <<  1, /* VK_EXT_image_drm_format_modifier */
185     EXT_EXTERNAL_FD_MEMORY     = 1ULL <<  2, /* VK_KHR_external_memory_fd */
186     EXT_EXTERNAL_FD_SEM        = 1ULL <<  3, /* VK_KHR_external_semaphore_fd */
187
188     EXT_NO_FLAG                = 1ULL << 63,
189 };
190
191 typedef struct VulkanOptExtension {
192     const char *name;
193     uint64_t flag;
194 } VulkanOptExtension;
195
196 static const VulkanOptExtension optional_instance_exts[] = {
197     { VK_KHR_SURFACE_EXTENSION_NAME, EXT_NO_FLAG },
198 };
199
200 static const VulkanOptExtension optional_device_exts[] = {
201     { VK_KHR_EXTERNAL_MEMORY_FD_EXTENSION_NAME,               EXT_EXTERNAL_FD_MEMORY,     },
202     { VK_EXT_EXTERNAL_MEMORY_DMA_BUF_EXTENSION_NAME,          EXT_EXTERNAL_DMABUF_MEMORY, },
203     { VK_EXT_IMAGE_DRM_FORMAT_MODIFIER_EXTENSION_NAME,        EXT_DRM_MODIFIER_FLAGS,     },
204     { VK_KHR_EXTERNAL_SEMAPHORE_FD_EXTENSION_NAME,            EXT_EXTERNAL_FD_SEM,        },
205 };
206
207 /* Converts return values to strings */
208 static const char *vk_ret2str(VkResult res)
209 {
210 #define CASE(VAL) case VAL: return #VAL
211     switch (res) {
212     CASE(VK_SUCCESS);
213     CASE(VK_NOT_READY);
214     CASE(VK_TIMEOUT);
215     CASE(VK_EVENT_SET);
216     CASE(VK_EVENT_RESET);
217     CASE(VK_INCOMPLETE);
218     CASE(VK_ERROR_OUT_OF_HOST_MEMORY);
219     CASE(VK_ERROR_OUT_OF_DEVICE_MEMORY);
220     CASE(VK_ERROR_INITIALIZATION_FAILED);
221     CASE(VK_ERROR_DEVICE_LOST);
222     CASE(VK_ERROR_MEMORY_MAP_FAILED);
223     CASE(VK_ERROR_LAYER_NOT_PRESENT);
224     CASE(VK_ERROR_EXTENSION_NOT_PRESENT);
225     CASE(VK_ERROR_FEATURE_NOT_PRESENT);
226     CASE(VK_ERROR_INCOMPATIBLE_DRIVER);
227     CASE(VK_ERROR_TOO_MANY_OBJECTS);
228     CASE(VK_ERROR_FORMAT_NOT_SUPPORTED);
229     CASE(VK_ERROR_FRAGMENTED_POOL);
230     CASE(VK_ERROR_SURFACE_LOST_KHR);
231     CASE(VK_ERROR_NATIVE_WINDOW_IN_USE_KHR);
232     CASE(VK_SUBOPTIMAL_KHR);
233     CASE(VK_ERROR_OUT_OF_DATE_KHR);
234     CASE(VK_ERROR_INCOMPATIBLE_DISPLAY_KHR);
235     CASE(VK_ERROR_VALIDATION_FAILED_EXT);
236     CASE(VK_ERROR_INVALID_SHADER_NV);
237     CASE(VK_ERROR_OUT_OF_POOL_MEMORY);
238     CASE(VK_ERROR_INVALID_EXTERNAL_HANDLE);
239     CASE(VK_ERROR_NOT_PERMITTED_EXT);
240     CASE(VK_ERROR_INVALID_DRM_FORMAT_MODIFIER_PLANE_LAYOUT_EXT);
241     CASE(VK_ERROR_INVALID_DEVICE_ADDRESS_EXT);
242     CASE(VK_ERROR_FULL_SCREEN_EXCLUSIVE_MODE_LOST_EXT);
243     default: return "Unknown error";
244     }
245 #undef CASE
246 }
247
248 static VkBool32 vk_dbg_callback(VkDebugUtilsMessageSeverityFlagBitsEXT severity,
249                                 VkDebugUtilsMessageTypeFlagsEXT messageType,
250                                 const VkDebugUtilsMessengerCallbackDataEXT *data,
251                                 void *priv)
252 {
253     int l;
254     AVHWDeviceContext *ctx = priv;
255
256     switch (severity) {
257     case VK_DEBUG_UTILS_MESSAGE_SEVERITY_VERBOSE_BIT_EXT: l = AV_LOG_VERBOSE; break;
258     case VK_DEBUG_UTILS_MESSAGE_SEVERITY_INFO_BIT_EXT:    l = AV_LOG_INFO;    break;
259     case VK_DEBUG_UTILS_MESSAGE_SEVERITY_WARNING_BIT_EXT: l = AV_LOG_WARNING; break;
260     case VK_DEBUG_UTILS_MESSAGE_SEVERITY_ERROR_BIT_EXT:   l = AV_LOG_ERROR;   break;
261     default:                                              l = AV_LOG_DEBUG;   break;
262     }
263
264     av_log(ctx, l, "%s\n", data->pMessage);
265     for (int i = 0; i < data->cmdBufLabelCount; i++)
266         av_log(ctx, l, "\t%i: %s\n", i, data->pCmdBufLabels[i].pLabelName);
267
268     return 0;
269 }
270
271 static int check_extensions(AVHWDeviceContext *ctx, int dev, AVDictionary *opts,
272                             const char * const **dst, uint32_t *num, int debug)
273 {
274     const char *tstr;
275     const char **extension_names = NULL;
276     VulkanDevicePriv *p = ctx->internal->priv;
277     AVVulkanDeviceContext *hwctx = ctx->hwctx;
278     int err = 0, found, extensions_found = 0;
279
280     const char *mod;
281     int optional_exts_num;
282     uint32_t sup_ext_count;
283     char *user_exts_str = NULL;
284     AVDictionaryEntry *user_exts;
285     VkExtensionProperties *sup_ext;
286     const VulkanOptExtension *optional_exts;
287
288     if (!dev) {
289         mod = "instance";
290         optional_exts = optional_instance_exts;
291         optional_exts_num = FF_ARRAY_ELEMS(optional_instance_exts);
292         user_exts = av_dict_get(opts, "instance_extensions", NULL, 0);
293         if (user_exts) {
294             user_exts_str = av_strdup(user_exts->value);
295             if (!user_exts_str) {
296                 err = AVERROR(ENOMEM);
297                 goto fail;
298             }
299         }
300         vkEnumerateInstanceExtensionProperties(NULL, &sup_ext_count, NULL);
301         sup_ext = av_malloc_array(sup_ext_count, sizeof(VkExtensionProperties));
302         if (!sup_ext)
303             return AVERROR(ENOMEM);
304         vkEnumerateInstanceExtensionProperties(NULL, &sup_ext_count, sup_ext);
305     } else {
306         mod = "device";
307         optional_exts = optional_device_exts;
308         optional_exts_num = FF_ARRAY_ELEMS(optional_device_exts);
309         user_exts = av_dict_get(opts, "device_extensions", NULL, 0);
310         if (user_exts) {
311             user_exts_str = av_strdup(user_exts->value);
312             if (!user_exts_str) {
313                 err = AVERROR(ENOMEM);
314                 goto fail;
315             }
316         }
317         vkEnumerateDeviceExtensionProperties(hwctx->phys_dev, NULL,
318                                              &sup_ext_count, NULL);
319         sup_ext = av_malloc_array(sup_ext_count, sizeof(VkExtensionProperties));
320         if (!sup_ext)
321             return AVERROR(ENOMEM);
322         vkEnumerateDeviceExtensionProperties(hwctx->phys_dev, NULL,
323                                              &sup_ext_count, sup_ext);
324     }
325
326     for (int i = 0; i < optional_exts_num; i++) {
327         tstr = optional_exts[i].name;
328         found = 0;
329         for (int j = 0; j < sup_ext_count; j++) {
330             if (!strcmp(tstr, sup_ext[j].extensionName)) {
331                 found = 1;
332                 break;
333             }
334         }
335         if (!found)
336             continue;
337
338         av_log(ctx, AV_LOG_VERBOSE, "Using %s extension \"%s\"\n", mod, tstr);
339         p->extensions |= optional_exts[i].flag;
340         ADD_VAL_TO_LIST(extension_names, extensions_found, tstr);
341     }
342
343     if (debug && !dev) {
344         tstr = VK_EXT_DEBUG_UTILS_EXTENSION_NAME;
345         found = 0;
346         for (int j = 0; j < sup_ext_count; j++) {
347             if (!strcmp(tstr, sup_ext[j].extensionName)) {
348                 found = 1;
349                 break;
350             }
351         }
352         if (found) {
353             av_log(ctx, AV_LOG_VERBOSE, "Using %s extension \"%s\"\n", mod, tstr);
354             ADD_VAL_TO_LIST(extension_names, extensions_found, tstr);
355         } else {
356             av_log(ctx, AV_LOG_ERROR, "Debug extension \"%s\" not found!\n",
357                    tstr);
358             err = AVERROR(EINVAL);
359             goto fail;
360         }
361     }
362
363     if (user_exts_str) {
364         char *save, *token = av_strtok(user_exts_str, "+", &save);
365         while (token) {
366             found = 0;
367             for (int j = 0; j < sup_ext_count; j++) {
368                 if (!strcmp(token, sup_ext[j].extensionName)) {
369                     found = 1;
370                     break;
371                 }
372             }
373             if (found) {
374                 av_log(ctx, AV_LOG_VERBOSE, "Using %s extension \"%s\"\n", mod, tstr);
375                 ADD_VAL_TO_LIST(extension_names, extensions_found, token);
376             } else {
377                 av_log(ctx, AV_LOG_ERROR, "%s extension \"%s\" not found!\n",
378                        mod, token);
379                 err = AVERROR(EINVAL);
380                 goto fail;
381             }
382             token = av_strtok(NULL, "+", &save);
383         }
384     }
385
386     *dst = extension_names;
387     *num = extensions_found;
388
389     av_free(user_exts_str);
390     av_free(sup_ext);
391     return 0;
392
393 fail:
394     if (extension_names)
395         for (int i = 0; i < extensions_found; i++)
396             av_free((void *)extension_names[i]);
397     av_free(extension_names);
398     av_free(user_exts_str);
399     av_free(sup_ext);
400     return err;
401 }
402
403 /* Creates a VkInstance */
404 static int create_instance(AVHWDeviceContext *ctx, AVDictionary *opts)
405 {
406     int err = 0;
407     VkResult ret;
408     VulkanDevicePriv *p = ctx->internal->priv;
409     AVVulkanDeviceContext *hwctx = ctx->hwctx;
410     AVDictionaryEntry *debug_opt = av_dict_get(opts, "debug", NULL, 0);
411     const int debug_mode = debug_opt && strtol(debug_opt->value, NULL, 10);
412     VkApplicationInfo application_info = {
413         .sType              = VK_STRUCTURE_TYPE_APPLICATION_INFO,
414         .pEngineName        = "libavutil",
415         .apiVersion         = VK_API_VERSION_1_1,
416         .engineVersion      = VK_MAKE_VERSION(LIBAVUTIL_VERSION_MAJOR,
417                                               LIBAVUTIL_VERSION_MINOR,
418                                               LIBAVUTIL_VERSION_MICRO),
419     };
420     VkInstanceCreateInfo inst_props = {
421         .sType            = VK_STRUCTURE_TYPE_INSTANCE_CREATE_INFO,
422         .pApplicationInfo = &application_info,
423     };
424
425     /* Check for present/missing extensions */
426     err = check_extensions(ctx, 0, opts, &inst_props.ppEnabledExtensionNames,
427                            &inst_props.enabledExtensionCount, debug_mode);
428     if (err < 0)
429         return err;
430
431     if (debug_mode) {
432         static const char *layers[] = { "VK_LAYER_KHRONOS_validation" };
433         inst_props.ppEnabledLayerNames = layers;
434         inst_props.enabledLayerCount = FF_ARRAY_ELEMS(layers);
435     }
436
437     /* Try to create the instance */
438     ret = vkCreateInstance(&inst_props, hwctx->alloc, &hwctx->inst);
439
440     /* Check for errors */
441     if (ret != VK_SUCCESS) {
442         av_log(ctx, AV_LOG_ERROR, "Instance creation failure: %s\n",
443                vk_ret2str(ret));
444         for (int i = 0; i < inst_props.enabledExtensionCount; i++)
445             av_free((void *)inst_props.ppEnabledExtensionNames[i]);
446         av_free((void *)inst_props.ppEnabledExtensionNames);
447         return AVERROR_EXTERNAL;
448     }
449
450     if (debug_mode) {
451         VkDebugUtilsMessengerCreateInfoEXT dbg = {
452             .sType = VK_STRUCTURE_TYPE_DEBUG_UTILS_MESSENGER_CREATE_INFO_EXT,
453             .messageSeverity = VK_DEBUG_UTILS_MESSAGE_SEVERITY_VERBOSE_BIT_EXT |
454                                VK_DEBUG_UTILS_MESSAGE_SEVERITY_INFO_BIT_EXT    |
455                                VK_DEBUG_UTILS_MESSAGE_SEVERITY_WARNING_BIT_EXT |
456                                VK_DEBUG_UTILS_MESSAGE_SEVERITY_ERROR_BIT_EXT,
457             .messageType = VK_DEBUG_UTILS_MESSAGE_TYPE_GENERAL_BIT_EXT    |
458                            VK_DEBUG_UTILS_MESSAGE_TYPE_VALIDATION_BIT_EXT |
459                            VK_DEBUG_UTILS_MESSAGE_TYPE_PERFORMANCE_BIT_EXT,
460             .pfnUserCallback = vk_dbg_callback,
461             .pUserData = ctx,
462         };
463         VK_LOAD_PFN(hwctx->inst, vkCreateDebugUtilsMessengerEXT);
464
465         pfn_vkCreateDebugUtilsMessengerEXT(hwctx->inst, &dbg,
466                                            hwctx->alloc, &p->debug_ctx);
467     }
468
469     hwctx->enabled_inst_extensions = inst_props.ppEnabledExtensionNames;
470     hwctx->nb_enabled_inst_extensions = inst_props.enabledExtensionCount;
471
472     return 0;
473 }
474
475 typedef struct VulkanDeviceSelection {
476     uint8_t uuid[VK_UUID_SIZE]; /* Will use this first unless !has_uuid */
477     int has_uuid;
478     const char *name; /* Will use this second unless NULL */
479     uint32_t pci_device; /* Will use this third unless 0x0 */
480     uint32_t vendor_id; /* Last resort to find something deterministic */
481     int index; /* Finally fall back to index */
482 } VulkanDeviceSelection;
483
484 static const char *vk_dev_type(enum VkPhysicalDeviceType type)
485 {
486     switch (type) {
487     case VK_PHYSICAL_DEVICE_TYPE_INTEGRATED_GPU: return "integrated";
488     case VK_PHYSICAL_DEVICE_TYPE_DISCRETE_GPU:   return "discrete";
489     case VK_PHYSICAL_DEVICE_TYPE_VIRTUAL_GPU:    return "virtual";
490     case VK_PHYSICAL_DEVICE_TYPE_CPU:            return "software";
491     default:                                     return "unknown";
492     }
493 }
494
495 /* Finds a device */
496 static int find_device(AVHWDeviceContext *ctx, VulkanDeviceSelection *select)
497 {
498     int err = 0, choice = -1;
499     uint32_t num;
500     VkResult ret;
501     VkPhysicalDevice *devices = NULL;
502     VkPhysicalDeviceIDProperties *idp = NULL;
503     VkPhysicalDeviceProperties2 *prop = NULL;
504     VulkanDevicePriv *p = ctx->internal->priv;
505     AVVulkanDeviceContext *hwctx = ctx->hwctx;
506
507     ret = vkEnumeratePhysicalDevices(hwctx->inst, &num, NULL);
508     if (ret != VK_SUCCESS || !num) {
509         av_log(ctx, AV_LOG_ERROR, "No devices found: %s!\n", vk_ret2str(ret));
510         return AVERROR(ENODEV);
511     }
512
513     devices = av_malloc_array(num, sizeof(VkPhysicalDevice));
514     if (!devices)
515         return AVERROR(ENOMEM);
516
517     ret = vkEnumeratePhysicalDevices(hwctx->inst, &num, devices);
518     if (ret != VK_SUCCESS) {
519         av_log(ctx, AV_LOG_ERROR, "Failed enumerating devices: %s\n",
520                vk_ret2str(ret));
521         err = AVERROR(ENODEV);
522         goto end;
523     }
524
525     prop = av_mallocz_array(num, sizeof(*prop));
526     if (!prop) {
527         err = AVERROR(ENOMEM);
528         goto end;
529     }
530
531     idp = av_mallocz_array(num, sizeof(*idp));
532     if (!idp) {
533         err = AVERROR(ENOMEM);
534         goto end;
535     }
536
537     av_log(ctx, AV_LOG_VERBOSE, "GPU listing:\n");
538     for (int i = 0; i < num; i++) {
539         idp[i].sType = VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_ID_PROPERTIES;
540         prop[i].sType = VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_PROPERTIES_2;
541         prop[i].pNext = &idp[i];
542
543         vkGetPhysicalDeviceProperties2(devices[i], &prop[i]);
544         av_log(ctx, AV_LOG_VERBOSE, "    %d: %s (%s) (0x%x)\n", i,
545                prop[i].properties.deviceName,
546                vk_dev_type(prop[i].properties.deviceType),
547                prop[i].properties.deviceID);
548     }
549
550     if (select->has_uuid) {
551         for (int i = 0; i < num; i++) {
552             if (!strncmp(idp[i].deviceUUID, select->uuid, VK_UUID_SIZE)) {
553                 choice = i;
554                 goto end;
555              }
556         }
557         av_log(ctx, AV_LOG_ERROR, "Unable to find device by given UUID!\n");
558         err = AVERROR(ENODEV);
559         goto end;
560     } else if (select->name) {
561         av_log(ctx, AV_LOG_VERBOSE, "Requested device: %s\n", select->name);
562         for (int i = 0; i < num; i++) {
563             if (strstr(prop[i].properties.deviceName, select->name)) {
564                 choice = i;
565                 goto end;
566              }
567         }
568         av_log(ctx, AV_LOG_ERROR, "Unable to find device \"%s\"!\n",
569                select->name);
570         err = AVERROR(ENODEV);
571         goto end;
572     } else if (select->pci_device) {
573         av_log(ctx, AV_LOG_VERBOSE, "Requested device: 0x%x\n", select->pci_device);
574         for (int i = 0; i < num; i++) {
575             if (select->pci_device == prop[i].properties.deviceID) {
576                 choice = i;
577                 goto end;
578             }
579         }
580         av_log(ctx, AV_LOG_ERROR, "Unable to find device with PCI ID 0x%x!\n",
581                select->pci_device);
582         err = AVERROR(EINVAL);
583         goto end;
584     } else if (select->vendor_id) {
585         av_log(ctx, AV_LOG_VERBOSE, "Requested vendor: 0x%x\n", select->vendor_id);
586         for (int i = 0; i < num; i++) {
587             if (select->vendor_id == prop[i].properties.vendorID) {
588                 choice = i;
589                 goto end;
590             }
591         }
592         av_log(ctx, AV_LOG_ERROR, "Unable to find device with Vendor ID 0x%x!\n",
593                select->vendor_id);
594         err = AVERROR(ENODEV);
595         goto end;
596     } else {
597         if (select->index < num) {
598             choice = select->index;
599             goto end;
600         }
601         av_log(ctx, AV_LOG_ERROR, "Unable to find device with index %i!\n",
602                select->index);
603         err = AVERROR(ENODEV);
604         goto end;
605     }
606
607 end:
608     if (choice > -1) {
609         p->dev_is_nvidia = (prop[choice].properties.vendorID == 0x10de);
610         hwctx->phys_dev = devices[choice];
611     }
612     av_free(devices);
613     av_free(prop);
614     av_free(idp);
615
616     return err;
617 }
618
619 static int search_queue_families(AVHWDeviceContext *ctx, VkDeviceCreateInfo *cd)
620 {
621     uint32_t num;
622     float *weights;
623     VkQueueFamilyProperties *qs = NULL;
624     AVVulkanDeviceContext *hwctx = ctx->hwctx;
625     int graph_index = -1, comp_index = -1, tx_index = -1;
626     VkDeviceQueueCreateInfo *pc = (VkDeviceQueueCreateInfo *)cd->pQueueCreateInfos;
627
628     /* First get the number of queue families */
629     vkGetPhysicalDeviceQueueFamilyProperties(hwctx->phys_dev, &num, NULL);
630     if (!num) {
631         av_log(ctx, AV_LOG_ERROR, "Failed to get queues!\n");
632         return AVERROR_EXTERNAL;
633     }
634
635     /* Then allocate memory */
636     qs = av_malloc_array(num, sizeof(VkQueueFamilyProperties));
637     if (!qs)
638         return AVERROR(ENOMEM);
639
640     /* Finally retrieve the queue families */
641     vkGetPhysicalDeviceQueueFamilyProperties(hwctx->phys_dev, &num, qs);
642
643 #define SEARCH_FLAGS(expr, out)                                                \
644     for (int i = 0; i < num; i++) {                                            \
645         const VkQueueFlagBits flags = qs[i].queueFlags;                        \
646         if (expr) {                                                            \
647             out = i;                                                           \
648             break;                                                             \
649         }                                                                      \
650     }
651
652     SEARCH_FLAGS(flags & VK_QUEUE_GRAPHICS_BIT, graph_index)
653
654     SEARCH_FLAGS((flags &  VK_QUEUE_COMPUTE_BIT) && (i != graph_index),
655                  comp_index)
656
657     SEARCH_FLAGS((flags & VK_QUEUE_TRANSFER_BIT) && (i != graph_index) &&
658                  (i != comp_index), tx_index)
659
660 #undef SEARCH_FLAGS
661 #define ADD_QUEUE(fidx, graph, comp, tx)                                                 \
662     av_log(ctx, AV_LOG_VERBOSE, "Using queue family %i (total queues: %i) for %s%s%s\n", \
663            fidx, qs[fidx].queueCount, graph ? "graphics " : "",                          \
664            comp ? "compute " : "", tx ? "transfers " : "");                              \
665     av_log(ctx, AV_LOG_VERBOSE, "    QF %i flags: %s%s%s%s\n", fidx,                     \
666            ((qs[fidx].queueFlags) & VK_QUEUE_GRAPHICS_BIT) ? "(graphics) " : "",         \
667            ((qs[fidx].queueFlags) & VK_QUEUE_COMPUTE_BIT) ? "(compute) " : "",           \
668            ((qs[fidx].queueFlags) & VK_QUEUE_TRANSFER_BIT) ? "(transfers) " : "",        \
669            ((qs[fidx].queueFlags) & VK_QUEUE_SPARSE_BINDING_BIT) ? "(sparse) " : "");    \
670     pc[cd->queueCreateInfoCount].queueFamilyIndex = fidx;                                \
671     pc[cd->queueCreateInfoCount].queueCount = qs[fidx].queueCount;                       \
672     weights = av_malloc(qs[fidx].queueCount * sizeof(float));                            \
673     pc[cd->queueCreateInfoCount].pQueuePriorities = weights;                             \
674     if (!weights)                                                                        \
675         goto fail;                                                                       \
676     for (int i = 0; i < qs[fidx].queueCount; i++)                                        \
677         weights[i] = 1.0f;                                                               \
678     cd->queueCreateInfoCount++;
679
680     ADD_QUEUE(graph_index, 1, comp_index < 0, tx_index < 0 && comp_index < 0)
681     hwctx->queue_family_index      = graph_index;
682     hwctx->queue_family_comp_index = graph_index;
683     hwctx->queue_family_tx_index   = graph_index;
684
685     if (comp_index != -1) {
686         ADD_QUEUE(comp_index, 0, 1, tx_index < 0)
687         hwctx->queue_family_tx_index   = comp_index;
688         hwctx->queue_family_comp_index = comp_index;
689     }
690
691     if (tx_index != -1) {
692         ADD_QUEUE(tx_index, 0, 0, 1)
693         hwctx->queue_family_tx_index = tx_index;
694     }
695
696 #undef ADD_QUEUE
697     av_free(qs);
698
699     return 0;
700
701 fail:
702     av_freep(&pc[0].pQueuePriorities);
703     av_freep(&pc[1].pQueuePriorities);
704     av_freep(&pc[2].pQueuePriorities);
705     av_free(qs);
706
707     return AVERROR(ENOMEM);
708 }
709
710 static int create_exec_ctx(AVHWDeviceContext *ctx, VulkanExecCtx *cmd,
711                            int queue_family_index)
712 {
713     VkResult ret;
714     AVVulkanDeviceContext *hwctx = ctx->hwctx;
715
716     VkCommandPoolCreateInfo cqueue_create = {
717         .sType              = VK_STRUCTURE_TYPE_COMMAND_POOL_CREATE_INFO,
718         .flags              = VK_COMMAND_POOL_CREATE_RESET_COMMAND_BUFFER_BIT,
719         .queueFamilyIndex   = queue_family_index,
720     };
721     VkCommandBufferAllocateInfo cbuf_create = {
722         .sType              = VK_STRUCTURE_TYPE_COMMAND_BUFFER_ALLOCATE_INFO,
723         .level              = VK_COMMAND_BUFFER_LEVEL_PRIMARY,
724         .commandBufferCount = 1,
725     };
726
727     VkFenceCreateInfo fence_spawn = {
728         .sType = VK_STRUCTURE_TYPE_FENCE_CREATE_INFO,
729     };
730
731     ret = vkCreateFence(hwctx->act_dev, &fence_spawn,
732                         hwctx->alloc, &cmd->fence);
733     if (ret != VK_SUCCESS) {
734         av_log(ctx, AV_LOG_ERROR, "Failed to create frame fence: %s\n",
735                vk_ret2str(ret));
736         return AVERROR_EXTERNAL;
737     }
738
739     ret = vkCreateCommandPool(hwctx->act_dev, &cqueue_create,
740                               hwctx->alloc, &cmd->pool);
741     if (ret != VK_SUCCESS) {
742         av_log(ctx, AV_LOG_ERROR, "Command pool creation failure: %s\n",
743                vk_ret2str(ret));
744         return AVERROR_EXTERNAL;
745     }
746
747     cbuf_create.commandPool = cmd->pool;
748
749     ret = vkAllocateCommandBuffers(hwctx->act_dev, &cbuf_create, &cmd->buf);
750     if (ret != VK_SUCCESS) {
751         av_log(ctx, AV_LOG_ERROR, "Command buffer alloc failure: %s\n",
752                vk_ret2str(ret));
753         return AVERROR_EXTERNAL;
754     }
755
756     vkGetDeviceQueue(hwctx->act_dev, cqueue_create.queueFamilyIndex, 0,
757                      &cmd->queue);
758
759     return 0;
760 }
761
762 static void free_exec_ctx(AVHWDeviceContext *ctx, VulkanExecCtx *cmd)
763 {
764     AVVulkanDeviceContext *hwctx = ctx->hwctx;
765
766     if (cmd->fence)
767         vkDestroyFence(hwctx->act_dev, cmd->fence, hwctx->alloc);
768     if (cmd->buf)
769         vkFreeCommandBuffers(hwctx->act_dev, cmd->pool, 1, &cmd->buf);
770     if (cmd->pool)
771         vkDestroyCommandPool(hwctx->act_dev, cmd->pool, hwctx->alloc);
772 }
773
774 static void vulkan_device_free(AVHWDeviceContext *ctx)
775 {
776     VulkanDevicePriv *p = ctx->internal->priv;
777     AVVulkanDeviceContext *hwctx = ctx->hwctx;
778
779     free_exec_ctx(ctx, &p->cmd);
780
781     vkDestroyDevice(hwctx->act_dev, hwctx->alloc);
782
783     if (p->debug_ctx) {
784         VK_LOAD_PFN(hwctx->inst, vkDestroyDebugUtilsMessengerEXT);
785         pfn_vkDestroyDebugUtilsMessengerEXT(hwctx->inst, p->debug_ctx,
786                                             hwctx->alloc);
787     }
788
789     vkDestroyInstance(hwctx->inst, hwctx->alloc);
790
791     for (int i = 0; i < hwctx->nb_enabled_inst_extensions; i++)
792         av_free((void *)hwctx->enabled_inst_extensions[i]);
793     av_free((void *)hwctx->enabled_inst_extensions);
794
795     for (int i = 0; i < hwctx->nb_enabled_dev_extensions; i++)
796         av_free((void *)hwctx->enabled_dev_extensions[i]);
797     av_free((void *)hwctx->enabled_dev_extensions);
798 }
799
800 static int vulkan_device_create_internal(AVHWDeviceContext *ctx,
801                                          VulkanDeviceSelection *dev_select,
802                                          AVDictionary *opts, int flags)
803 {
804     int err = 0;
805     VkResult ret;
806     AVDictionaryEntry *opt_d;
807     VulkanDevicePriv *p = ctx->internal->priv;
808     AVVulkanDeviceContext *hwctx = ctx->hwctx;
809     VkDeviceQueueCreateInfo queue_create_info[3] = {
810         { .sType = VK_STRUCTURE_TYPE_DEVICE_QUEUE_CREATE_INFO, },
811         { .sType = VK_STRUCTURE_TYPE_DEVICE_QUEUE_CREATE_INFO, },
812         { .sType = VK_STRUCTURE_TYPE_DEVICE_QUEUE_CREATE_INFO, },
813     };
814
815     VkDeviceCreateInfo dev_info = {
816         .sType                = VK_STRUCTURE_TYPE_DEVICE_CREATE_INFO,
817         .pQueueCreateInfos    = queue_create_info,
818         .queueCreateInfoCount = 0,
819     };
820
821     ctx->free = vulkan_device_free;
822
823     /* Create an instance if not given one */
824     if ((err = create_instance(ctx, opts)))
825         goto end;
826
827     /* Find a device (if not given one) */
828     if ((err = find_device(ctx, dev_select)))
829         goto end;
830
831     vkGetPhysicalDeviceProperties(hwctx->phys_dev, &p->props);
832     av_log(ctx, AV_LOG_VERBOSE, "Using device: %s\n", p->props.deviceName);
833     av_log(ctx, AV_LOG_VERBOSE, "Alignments:\n");
834     av_log(ctx, AV_LOG_VERBOSE, "    optimalBufferCopyOffsetAlignment:   %li\n",
835            p->props.limits.optimalBufferCopyOffsetAlignment);
836     av_log(ctx, AV_LOG_VERBOSE, "    optimalBufferCopyRowPitchAlignment: %li\n",
837            p->props.limits.optimalBufferCopyRowPitchAlignment);
838     av_log(ctx, AV_LOG_VERBOSE, "    minMemoryMapAlignment:              %li\n",
839            p->props.limits.minMemoryMapAlignment);
840
841     /* Search queue family */
842     if ((err = search_queue_families(ctx, &dev_info)))
843         goto end;
844
845     if ((err = check_extensions(ctx, 1, opts, &dev_info.ppEnabledExtensionNames,
846                                 &dev_info.enabledExtensionCount, 0))) {
847         av_free((void *)queue_create_info[0].pQueuePriorities);
848         av_free((void *)queue_create_info[1].pQueuePriorities);
849         av_free((void *)queue_create_info[2].pQueuePriorities);
850         goto end;
851     }
852
853     ret = vkCreateDevice(hwctx->phys_dev, &dev_info, hwctx->alloc,
854                          &hwctx->act_dev);
855
856     av_free((void *)queue_create_info[0].pQueuePriorities);
857     av_free((void *)queue_create_info[1].pQueuePriorities);
858     av_free((void *)queue_create_info[2].pQueuePriorities);
859
860     if (ret != VK_SUCCESS) {
861         av_log(ctx, AV_LOG_ERROR, "Device creation failure: %s\n",
862                vk_ret2str(ret));
863         for (int i = 0; i < dev_info.enabledExtensionCount; i++)
864             av_free((void *)dev_info.ppEnabledExtensionNames[i]);
865         av_free((void *)dev_info.ppEnabledExtensionNames);
866         err = AVERROR_EXTERNAL;
867         goto end;
868     }
869
870     /* Tiled images setting, use them by default */
871     opt_d = av_dict_get(opts, "linear_images", NULL, 0);
872     if (opt_d)
873         p->use_linear_images = strtol(opt_d->value, NULL, 10);
874
875     hwctx->enabled_dev_extensions = dev_info.ppEnabledExtensionNames;
876     hwctx->nb_enabled_dev_extensions = dev_info.enabledExtensionCount;
877
878 end:
879     return err;
880 }
881
882 static int vulkan_device_init(AVHWDeviceContext *ctx)
883 {
884     int err;
885     uint32_t queue_num;
886     AVVulkanDeviceContext *hwctx = ctx->hwctx;
887     VulkanDevicePriv *p = ctx->internal->priv;
888
889     /* Set device extension flags */
890     for (int i = 0; i < hwctx->nb_enabled_dev_extensions; i++) {
891         for (int j = 0; j < FF_ARRAY_ELEMS(optional_device_exts); j++) {
892             if (!strcmp(hwctx->enabled_dev_extensions[i],
893                         optional_device_exts[j].name)) {
894                 p->extensions |= optional_device_exts[j].flag;
895                 break;
896             }
897         }
898     }
899
900     vkGetPhysicalDeviceQueueFamilyProperties(hwctx->phys_dev, &queue_num, NULL);
901     if (!queue_num) {
902         av_log(ctx, AV_LOG_ERROR, "Failed to get queues!\n");
903         return AVERROR_EXTERNAL;
904     }
905
906 #define CHECK_QUEUE(type, n)                                                         \
907 if (n >= queue_num) {                                                                \
908     av_log(ctx, AV_LOG_ERROR, "Invalid %s queue index %i (device has %i queues)!\n", \
909            type, n, queue_num);                                                      \
910     return AVERROR(EINVAL);                                                          \
911 }
912
913     CHECK_QUEUE("graphics", hwctx->queue_family_index)
914     CHECK_QUEUE("upload",   hwctx->queue_family_tx_index)
915     CHECK_QUEUE("compute",  hwctx->queue_family_comp_index)
916
917 #undef CHECK_QUEUE
918
919     p->qfs[p->num_qfs++] = hwctx->queue_family_index;
920     if ((hwctx->queue_family_tx_index != hwctx->queue_family_index) &&
921         (hwctx->queue_family_tx_index != hwctx->queue_family_comp_index))
922         p->qfs[p->num_qfs++] = hwctx->queue_family_tx_index;
923     if ((hwctx->queue_family_comp_index != hwctx->queue_family_index) &&
924         (hwctx->queue_family_comp_index != hwctx->queue_family_tx_index))
925         p->qfs[p->num_qfs++] = hwctx->queue_family_comp_index;
926
927     /* Create exec context - if there's something invalid this will error out */
928     err = create_exec_ctx(ctx, &p->cmd, hwctx->queue_family_tx_index);
929     if (err)
930         return err;
931
932     /* Get device capabilities */
933     vkGetPhysicalDeviceMemoryProperties(hwctx->phys_dev, &p->mprops);
934
935     return 0;
936 }
937
938 static int vulkan_device_create(AVHWDeviceContext *ctx, const char *device,
939                                 AVDictionary *opts, int flags)
940 {
941     VulkanDeviceSelection dev_select = { 0 };
942     if (device && device[0]) {
943         char *end = NULL;
944         dev_select.index = strtol(device, &end, 10);
945         if (end == device) {
946             dev_select.index = 0;
947             dev_select.name  = device;
948         }
949     }
950
951     return vulkan_device_create_internal(ctx, &dev_select, opts, flags);
952 }
953
954 static int vulkan_device_derive(AVHWDeviceContext *ctx,
955                                 AVHWDeviceContext *src_ctx, int flags)
956 {
957     av_unused VulkanDeviceSelection dev_select = { 0 };
958
959     /* If there's only one device on the system, then even if its not covered
960      * by the following checks (e.g. non-PCIe ARM GPU), having an empty
961      * dev_select will mean it'll get picked. */
962     switch(src_ctx->type) {
963 #if CONFIG_LIBDRM
964 #if CONFIG_VAAPI
965     case AV_HWDEVICE_TYPE_VAAPI: {
966         AVVAAPIDeviceContext *src_hwctx = src_ctx->hwctx;
967
968         const char *vendor = vaQueryVendorString(src_hwctx->display);
969         if (!vendor) {
970             av_log(ctx, AV_LOG_ERROR, "Unable to get device info from VAAPI!\n");
971             return AVERROR_EXTERNAL;
972         }
973
974         if (strstr(vendor, "Intel"))
975             dev_select.vendor_id = 0x8086;
976         if (strstr(vendor, "AMD"))
977             dev_select.vendor_id = 0x1002;
978
979         return vulkan_device_create_internal(ctx, &dev_select, NULL, flags);
980     }
981 #endif
982     case AV_HWDEVICE_TYPE_DRM: {
983         AVDRMDeviceContext *src_hwctx = src_ctx->hwctx;
984
985         drmDevice *drm_dev_info;
986         int err = drmGetDevice(src_hwctx->fd, &drm_dev_info);
987         if (err) {
988             av_log(ctx, AV_LOG_ERROR, "Unable to get device info from DRM fd!\n");
989             return AVERROR_EXTERNAL;
990         }
991
992         if (drm_dev_info->bustype == DRM_BUS_PCI)
993             dev_select.pci_device = drm_dev_info->deviceinfo.pci->device_id;
994
995         drmFreeDevice(&drm_dev_info);
996
997         return vulkan_device_create_internal(ctx, &dev_select, NULL, flags);
998     }
999 #endif
1000 #if CONFIG_CUDA
1001     case AV_HWDEVICE_TYPE_CUDA: {
1002         AVHWDeviceContext *cuda_cu = src_ctx;
1003         AVCUDADeviceContext *src_hwctx = src_ctx->hwctx;
1004         AVCUDADeviceContextInternal *cu_internal = src_hwctx->internal;
1005         CudaFunctions *cu = cu_internal->cuda_dl;
1006
1007         int ret = CHECK_CU(cu->cuDeviceGetUuid((CUuuid *)&dev_select.uuid,
1008                                                cu_internal->cuda_device));
1009         if (ret < 0) {
1010             av_log(ctx, AV_LOG_ERROR, "Unable to get UUID from CUDA!\n");
1011             return AVERROR_EXTERNAL;
1012         }
1013
1014         dev_select.has_uuid = 1;
1015
1016         return vulkan_device_create_internal(ctx, &dev_select, NULL, flags);
1017     }
1018 #endif
1019     default:
1020         return AVERROR(ENOSYS);
1021     }
1022 }
1023
1024 static int vulkan_frames_get_constraints(AVHWDeviceContext *ctx,
1025                                          const void *hwconfig,
1026                                          AVHWFramesConstraints *constraints)
1027 {
1028     int count = 0;
1029     AVVulkanDeviceContext *hwctx = ctx->hwctx;
1030     VulkanDevicePriv *p = ctx->internal->priv;
1031
1032     for (enum AVPixelFormat i = 0; i < AV_PIX_FMT_NB; i++)
1033         count += pixfmt_is_supported(hwctx, i, p->use_linear_images);
1034
1035 #if CONFIG_CUDA
1036     if (p->dev_is_nvidia)
1037         count++;
1038 #endif
1039
1040     constraints->valid_sw_formats = av_malloc_array(count + 1,
1041                                                     sizeof(enum AVPixelFormat));
1042     if (!constraints->valid_sw_formats)
1043         return AVERROR(ENOMEM);
1044
1045     count = 0;
1046     for (enum AVPixelFormat i = 0; i < AV_PIX_FMT_NB; i++)
1047         if (pixfmt_is_supported(hwctx, i, p->use_linear_images))
1048             constraints->valid_sw_formats[count++] = i;
1049
1050 #if CONFIG_CUDA
1051     if (p->dev_is_nvidia)
1052         constraints->valid_sw_formats[count++] = AV_PIX_FMT_CUDA;
1053 #endif
1054     constraints->valid_sw_formats[count++] = AV_PIX_FMT_NONE;
1055
1056     constraints->min_width  = 0;
1057     constraints->min_height = 0;
1058     constraints->max_width  = p->props.limits.maxImageDimension2D;
1059     constraints->max_height = p->props.limits.maxImageDimension2D;
1060
1061     constraints->valid_hw_formats = av_malloc_array(2, sizeof(enum AVPixelFormat));
1062     if (!constraints->valid_hw_formats)
1063         return AVERROR(ENOMEM);
1064
1065     constraints->valid_hw_formats[0] = AV_PIX_FMT_VULKAN;
1066     constraints->valid_hw_formats[1] = AV_PIX_FMT_NONE;
1067
1068     return 0;
1069 }
1070
1071 static int alloc_mem(AVHWDeviceContext *ctx, VkMemoryRequirements *req,
1072                      VkMemoryPropertyFlagBits req_flags, void *alloc_extension,
1073                      VkMemoryPropertyFlagBits *mem_flags, VkDeviceMemory *mem)
1074 {
1075     VkResult ret;
1076     int index = -1;
1077     VulkanDevicePriv *p = ctx->internal->priv;
1078     AVVulkanDeviceContext *dev_hwctx = ctx->hwctx;
1079     VkMemoryAllocateInfo alloc_info = {
1080         .sType           = VK_STRUCTURE_TYPE_MEMORY_ALLOCATE_INFO,
1081         .pNext           = alloc_extension,
1082     };
1083
1084     /* Align if we need to */
1085     if (req_flags & VK_MEMORY_PROPERTY_HOST_VISIBLE_BIT)
1086         req->size = FFALIGN(req->size, p->props.limits.minMemoryMapAlignment);
1087
1088     alloc_info.allocationSize = req->size;
1089
1090     /* The vulkan spec requires memory types to be sorted in the "optimal"
1091      * order, so the first matching type we find will be the best/fastest one */
1092     for (int i = 0; i < p->mprops.memoryTypeCount; i++) {
1093         /* The memory type must be supported by the requirements (bitfield) */
1094         if (!(req->memoryTypeBits & (1 << i)))
1095             continue;
1096
1097         /* The memory type flags must include our properties */
1098         if ((p->mprops.memoryTypes[i].propertyFlags & req_flags) != req_flags)
1099             continue;
1100
1101         /* Found a suitable memory type */
1102         index = i;
1103         break;
1104     }
1105
1106     if (index < 0) {
1107         av_log(ctx, AV_LOG_ERROR, "No memory type found for flags 0x%x\n",
1108                req_flags);
1109         return AVERROR(EINVAL);
1110     }
1111
1112     alloc_info.memoryTypeIndex = index;
1113
1114     ret = vkAllocateMemory(dev_hwctx->act_dev, &alloc_info,
1115                            dev_hwctx->alloc, mem);
1116     if (ret != VK_SUCCESS) {
1117         av_log(ctx, AV_LOG_ERROR, "Failed to allocate memory: %s\n",
1118                vk_ret2str(ret));
1119         return AVERROR(ENOMEM);
1120     }
1121
1122     *mem_flags |= p->mprops.memoryTypes[index].propertyFlags;
1123
1124     return 0;
1125 }
1126
1127 static void vulkan_free_internal(AVVkFrameInternal *internal)
1128 {
1129     if (!internal)
1130         return;
1131
1132 #if CONFIG_CUDA
1133     if (internal->cuda_fc_ref) {
1134         AVHWFramesContext *cuda_fc = (AVHWFramesContext *)internal->cuda_fc_ref->data;
1135         int planes = av_pix_fmt_count_planes(cuda_fc->sw_format);
1136         AVHWDeviceContext *cuda_cu = cuda_fc->device_ctx;
1137         AVCUDADeviceContext *cuda_dev = cuda_cu->hwctx;
1138         AVCUDADeviceContextInternal *cu_internal = cuda_dev->internal;
1139         CudaFunctions *cu = cu_internal->cuda_dl;
1140
1141         for (int i = 0; i < planes; i++) {
1142             if (internal->cu_sem[i])
1143                 CHECK_CU(cu->cuDestroyExternalSemaphore(internal->cu_sem[i]));
1144             if (internal->cu_mma[i])
1145                 CHECK_CU(cu->cuMipmappedArrayDestroy(internal->cu_mma[i]));
1146             if (internal->ext_mem[i])
1147                 CHECK_CU(cu->cuDestroyExternalMemory(internal->ext_mem[i]));
1148         }
1149
1150         av_buffer_unref(&internal->cuda_fc_ref);
1151     }
1152 #endif
1153
1154     av_free(internal);
1155 }
1156
1157 static void vulkan_frame_free(void *opaque, uint8_t *data)
1158 {
1159     AVVkFrame *f = (AVVkFrame *)data;
1160     AVHWFramesContext *hwfc = opaque;
1161     AVVulkanDeviceContext *hwctx = hwfc->device_ctx->hwctx;
1162     int planes = av_pix_fmt_count_planes(hwfc->sw_format);
1163
1164     vulkan_free_internal(f->internal);
1165
1166     for (int i = 0; i < planes; i++) {
1167         vkDestroyImage(hwctx->act_dev, f->img[i], hwctx->alloc);
1168         vkFreeMemory(hwctx->act_dev, f->mem[i], hwctx->alloc);
1169         vkDestroySemaphore(hwctx->act_dev, f->sem[i], hwctx->alloc);
1170     }
1171
1172     av_free(f);
1173 }
1174
1175 static int alloc_bind_mem(AVHWFramesContext *hwfc, AVVkFrame *f,
1176                           void *alloc_pnext, size_t alloc_pnext_stride)
1177 {
1178     int err;
1179     VkResult ret;
1180     AVHWDeviceContext *ctx = hwfc->device_ctx;
1181     const int planes = av_pix_fmt_count_planes(hwfc->sw_format);
1182     VkBindImageMemoryInfo bind_info[AV_NUM_DATA_POINTERS] = { { 0 } };
1183
1184     AVVulkanDeviceContext *hwctx = ctx->hwctx;
1185
1186     for (int i = 0; i < planes; i++) {
1187         int use_ded_mem;
1188         VkImageMemoryRequirementsInfo2 req_desc = {
1189             .sType = VK_STRUCTURE_TYPE_IMAGE_MEMORY_REQUIREMENTS_INFO_2,
1190             .image = f->img[i],
1191         };
1192         VkMemoryDedicatedAllocateInfo ded_alloc = {
1193             .sType = VK_STRUCTURE_TYPE_MEMORY_DEDICATED_ALLOCATE_INFO,
1194             .pNext = (void *)(((uint8_t *)alloc_pnext) + i*alloc_pnext_stride),
1195         };
1196         VkMemoryDedicatedRequirements ded_req = {
1197             .sType = VK_STRUCTURE_TYPE_MEMORY_DEDICATED_REQUIREMENTS,
1198         };
1199         VkMemoryRequirements2 req = {
1200             .sType = VK_STRUCTURE_TYPE_MEMORY_REQUIREMENTS_2,
1201             .pNext = &ded_req,
1202         };
1203
1204         vkGetImageMemoryRequirements2(hwctx->act_dev, &req_desc, &req);
1205
1206         /* In case the implementation prefers/requires dedicated allocation */
1207         use_ded_mem = ded_req.prefersDedicatedAllocation |
1208                       ded_req.requiresDedicatedAllocation;
1209         if (use_ded_mem)
1210             ded_alloc.image = f->img[i];
1211
1212         /* Allocate memory */
1213         if ((err = alloc_mem(ctx, &req.memoryRequirements,
1214                              f->tiling == VK_IMAGE_TILING_LINEAR ?
1215                              VK_MEMORY_PROPERTY_HOST_VISIBLE_BIT :
1216                              VK_MEMORY_PROPERTY_DEVICE_LOCAL_BIT,
1217                              use_ded_mem ? &ded_alloc : (void *)ded_alloc.pNext,
1218                              &f->flags, &f->mem[i])))
1219             return err;
1220
1221         f->size[i] = req.memoryRequirements.size;
1222         bind_info[i].sType  = VK_STRUCTURE_TYPE_BIND_IMAGE_MEMORY_INFO;
1223         bind_info[i].image  = f->img[i];
1224         bind_info[i].memory = f->mem[i];
1225     }
1226
1227     /* Bind the allocated memory to the images */
1228     ret = vkBindImageMemory2(hwctx->act_dev, planes, bind_info);
1229     if (ret != VK_SUCCESS) {
1230         av_log(ctx, AV_LOG_ERROR, "Failed to bind memory: %s\n",
1231                vk_ret2str(ret));
1232         return AVERROR_EXTERNAL;
1233     }
1234
1235     return 0;
1236 }
1237
1238 enum PrepMode {
1239     PREP_MODE_WRITE,
1240     PREP_MODE_RO_SHADER,
1241     PREP_MODE_EXTERNAL_EXPORT,
1242 };
1243
1244 static int prepare_frame(AVHWFramesContext *hwfc, VulkanExecCtx *ectx,
1245                          AVVkFrame *frame, enum PrepMode pmode)
1246 {
1247     VkResult ret;
1248     uint32_t dst_qf;
1249     VkImageLayout new_layout;
1250     VkAccessFlags new_access;
1251     AVHWDeviceContext *ctx = hwfc->device_ctx;
1252     AVVulkanDeviceContext *hwctx = ctx->hwctx;
1253     const int planes = av_pix_fmt_count_planes(hwfc->sw_format);
1254
1255     VkImageMemoryBarrier img_bar[AV_NUM_DATA_POINTERS] = { 0 };
1256
1257     VkCommandBufferBeginInfo cmd_start = {
1258         .sType = VK_STRUCTURE_TYPE_COMMAND_BUFFER_BEGIN_INFO,
1259         .flags = VK_COMMAND_BUFFER_USAGE_ONE_TIME_SUBMIT_BIT,
1260     };
1261
1262     VkSubmitInfo s_info = {
1263         .sType                = VK_STRUCTURE_TYPE_SUBMIT_INFO,
1264         .commandBufferCount   = 1,
1265         .pCommandBuffers      = &ectx->buf,
1266
1267         .pSignalSemaphores    = frame->sem,
1268         .signalSemaphoreCount = planes,
1269     };
1270
1271     VkPipelineStageFlagBits wait_st[AV_NUM_DATA_POINTERS];
1272     for (int i = 0; i < planes; i++)
1273         wait_st[i] = VK_PIPELINE_STAGE_TOP_OF_PIPE_BIT;
1274
1275     switch (pmode) {
1276     case PREP_MODE_WRITE:
1277         new_layout = VK_IMAGE_LAYOUT_TRANSFER_DST_OPTIMAL;
1278         new_access = VK_ACCESS_TRANSFER_WRITE_BIT;
1279         dst_qf     = VK_QUEUE_FAMILY_IGNORED;
1280         break;
1281     case PREP_MODE_RO_SHADER:
1282         new_layout = VK_IMAGE_LAYOUT_TRANSFER_SRC_OPTIMAL;
1283         new_access = VK_ACCESS_TRANSFER_READ_BIT;
1284         dst_qf     = VK_QUEUE_FAMILY_IGNORED;
1285         break;
1286     case PREP_MODE_EXTERNAL_EXPORT:
1287         new_layout = VK_IMAGE_LAYOUT_GENERAL;
1288         new_access = VK_ACCESS_MEMORY_READ_BIT | VK_ACCESS_MEMORY_WRITE_BIT;
1289         dst_qf     = VK_QUEUE_FAMILY_EXTERNAL_KHR;
1290         s_info.pWaitSemaphores = frame->sem;
1291         s_info.pWaitDstStageMask = wait_st;
1292         s_info.waitSemaphoreCount = planes;
1293         break;
1294     }
1295
1296     ret = vkBeginCommandBuffer(ectx->buf, &cmd_start);
1297     if (ret != VK_SUCCESS)
1298         return AVERROR_EXTERNAL;
1299
1300     /* Change the image layout to something more optimal for writes.
1301      * This also signals the newly created semaphore, making it usable
1302      * for synchronization */
1303     for (int i = 0; i < planes; i++) {
1304         img_bar[i].sType = VK_STRUCTURE_TYPE_IMAGE_MEMORY_BARRIER;
1305         img_bar[i].srcAccessMask = 0x0;
1306         img_bar[i].dstAccessMask = new_access;
1307         img_bar[i].oldLayout = frame->layout[i];
1308         img_bar[i].newLayout = new_layout;
1309         img_bar[i].srcQueueFamilyIndex = VK_QUEUE_FAMILY_IGNORED;
1310         img_bar[i].dstQueueFamilyIndex = dst_qf;
1311         img_bar[i].image = frame->img[i];
1312         img_bar[i].subresourceRange.levelCount = 1;
1313         img_bar[i].subresourceRange.layerCount = 1;
1314         img_bar[i].subresourceRange.aspectMask = VK_IMAGE_ASPECT_COLOR_BIT;
1315
1316         frame->layout[i] = img_bar[i].newLayout;
1317         frame->access[i] = img_bar[i].dstAccessMask;
1318     }
1319
1320     vkCmdPipelineBarrier(ectx->buf, VK_PIPELINE_STAGE_TOP_OF_PIPE_BIT,
1321                          VK_PIPELINE_STAGE_TRANSFER_BIT, 0,
1322                          0, NULL, 0, NULL, planes, img_bar);
1323
1324     ret = vkEndCommandBuffer(ectx->buf);
1325     if (ret != VK_SUCCESS)
1326         return AVERROR_EXTERNAL;
1327
1328     ret = vkQueueSubmit(ectx->queue, 1, &s_info, ectx->fence);
1329     if (ret != VK_SUCCESS) {
1330         return AVERROR_EXTERNAL;
1331     } else {
1332         vkWaitForFences(hwctx->act_dev, 1, &ectx->fence, VK_TRUE, UINT64_MAX);
1333         vkResetFences(hwctx->act_dev, 1, &ectx->fence);
1334     }
1335
1336     return 0;
1337 }
1338
1339 static int create_frame(AVHWFramesContext *hwfc, AVVkFrame **frame,
1340                         VkImageTiling tiling, VkImageUsageFlagBits usage,
1341                         void *create_pnext)
1342 {
1343     int err;
1344     VkResult ret;
1345     AVHWDeviceContext *ctx = hwfc->device_ctx;
1346     VulkanDevicePriv *p = ctx->internal->priv;
1347     AVVulkanDeviceContext *hwctx = ctx->hwctx;
1348     enum AVPixelFormat format = hwfc->sw_format;
1349     const VkFormat *img_fmts = av_vkfmt_from_pixfmt(format);
1350     const int planes = av_pix_fmt_count_planes(format);
1351
1352     VkExportSemaphoreCreateInfo ext_sem_info = {
1353         .sType = VK_STRUCTURE_TYPE_EXPORT_SEMAPHORE_CREATE_INFO,
1354         .handleTypes = VK_EXTERNAL_SEMAPHORE_HANDLE_TYPE_OPAQUE_FD_BIT,
1355     };
1356
1357     VkSemaphoreCreateInfo sem_spawn = {
1358         .sType = VK_STRUCTURE_TYPE_SEMAPHORE_CREATE_INFO,
1359         .pNext = p->extensions & EXT_EXTERNAL_FD_SEM ? &ext_sem_info : NULL,
1360     };
1361
1362     AVVkFrame *f = av_vk_frame_alloc();
1363     if (!f) {
1364         av_log(ctx, AV_LOG_ERROR, "Unable to allocate memory for AVVkFrame!\n");
1365         return AVERROR(ENOMEM);
1366     }
1367
1368     /* Create the images */
1369     for (int i = 0; i < planes; i++) {
1370         const AVPixFmtDescriptor *desc = av_pix_fmt_desc_get(format);
1371         int w = hwfc->width;
1372         int h = hwfc->height;
1373         const int p_w = i > 0 ? AV_CEIL_RSHIFT(w, desc->log2_chroma_w) : w;
1374         const int p_h = i > 0 ? AV_CEIL_RSHIFT(h, desc->log2_chroma_h) : h;
1375
1376         VkImageCreateInfo image_create_info = {
1377             .sType                 = VK_STRUCTURE_TYPE_IMAGE_CREATE_INFO,
1378             .pNext                 = create_pnext,
1379             .imageType             = VK_IMAGE_TYPE_2D,
1380             .format                = img_fmts[i],
1381             .extent.width          = p_w,
1382             .extent.height         = p_h,
1383             .extent.depth          = 1,
1384             .mipLevels             = 1,
1385             .arrayLayers           = 1,
1386             .flags                 = VK_IMAGE_CREATE_ALIAS_BIT,
1387             .tiling                = tiling,
1388             .initialLayout         = VK_IMAGE_LAYOUT_UNDEFINED,
1389             .usage                 = usage,
1390             .samples               = VK_SAMPLE_COUNT_1_BIT,
1391             .pQueueFamilyIndices   = p->qfs,
1392             .queueFamilyIndexCount = p->num_qfs,
1393             .sharingMode           = p->num_qfs > 1 ? VK_SHARING_MODE_CONCURRENT :
1394                                                       VK_SHARING_MODE_EXCLUSIVE,
1395         };
1396
1397         ret = vkCreateImage(hwctx->act_dev, &image_create_info,
1398                             hwctx->alloc, &f->img[i]);
1399         if (ret != VK_SUCCESS) {
1400             av_log(ctx, AV_LOG_ERROR, "Image creation failure: %s\n",
1401                    vk_ret2str(ret));
1402             err = AVERROR(EINVAL);
1403             goto fail;
1404         }
1405
1406         /* Create semaphore */
1407         ret = vkCreateSemaphore(hwctx->act_dev, &sem_spawn,
1408                                 hwctx->alloc, &f->sem[i]);
1409         if (ret != VK_SUCCESS) {
1410             av_log(hwctx, AV_LOG_ERROR, "Failed to create semaphore: %s\n",
1411                    vk_ret2str(ret));
1412             return AVERROR_EXTERNAL;
1413         }
1414
1415         f->layout[i] = image_create_info.initialLayout;
1416         f->access[i] = 0x0;
1417     }
1418
1419     f->flags     = 0x0;
1420     f->tiling    = tiling;
1421
1422     *frame = f;
1423     return 0;
1424
1425 fail:
1426     vulkan_frame_free(hwfc, (uint8_t *)f);
1427     return err;
1428 }
1429
1430 /* Checks if an export flag is enabled, and if it is ORs it with *iexp */
1431 static void try_export_flags(AVHWFramesContext *hwfc,
1432                              VkExternalMemoryHandleTypeFlags *comp_handle_types,
1433                              VkExternalMemoryHandleTypeFlagBits *iexp,
1434                              VkExternalMemoryHandleTypeFlagBits exp)
1435 {
1436     VkResult ret;
1437     AVVulkanFramesContext *hwctx = hwfc->hwctx;
1438     AVVulkanDeviceContext *dev_hwctx = hwfc->device_ctx->hwctx;
1439     VkExternalImageFormatProperties eprops = {
1440         .sType = VK_STRUCTURE_TYPE_EXTERNAL_IMAGE_FORMAT_PROPERTIES_KHR,
1441     };
1442     VkImageFormatProperties2 props = {
1443         .sType = VK_STRUCTURE_TYPE_IMAGE_FORMAT_PROPERTIES_2,
1444         .pNext = &eprops,
1445     };
1446     VkPhysicalDeviceExternalImageFormatInfo enext = {
1447         .sType = VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_EXTERNAL_IMAGE_FORMAT_INFO,
1448         .handleType = exp,
1449     };
1450     VkPhysicalDeviceImageFormatInfo2 pinfo = {
1451         .sType  = VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_IMAGE_FORMAT_INFO_2,
1452         .pNext  = !exp ? NULL : &enext,
1453         .format = av_vkfmt_from_pixfmt(hwfc->sw_format)[0],
1454         .type   = VK_IMAGE_TYPE_2D,
1455         .tiling = hwctx->tiling,
1456         .usage  = hwctx->usage,
1457         .flags  = VK_IMAGE_CREATE_ALIAS_BIT,
1458     };
1459
1460     ret = vkGetPhysicalDeviceImageFormatProperties2(dev_hwctx->phys_dev,
1461                                                     &pinfo, &props);
1462     if (ret == VK_SUCCESS) {
1463         *iexp |= exp;
1464         *comp_handle_types |= eprops.externalMemoryProperties.compatibleHandleTypes;
1465     }
1466 }
1467
1468 static AVBufferRef *vulkan_pool_alloc(void *opaque, int size)
1469 {
1470     int err;
1471     AVVkFrame *f;
1472     AVBufferRef *avbuf = NULL;
1473     AVHWFramesContext *hwfc = opaque;
1474     AVVulkanFramesContext *hwctx = hwfc->hwctx;
1475     VulkanDevicePriv *p = hwfc->device_ctx->internal->priv;
1476     VkExportMemoryAllocateInfo eminfo[AV_NUM_DATA_POINTERS];
1477     VkExternalMemoryHandleTypeFlags e = 0x0;
1478
1479     VkExternalMemoryImageCreateInfo eiinfo = {
1480         .sType       = VK_STRUCTURE_TYPE_EXTERNAL_MEMORY_IMAGE_CREATE_INFO,
1481         .pNext       = hwctx->create_pnext,
1482     };
1483
1484     if (p->extensions & EXT_EXTERNAL_FD_MEMORY)
1485         try_export_flags(hwfc, &eiinfo.handleTypes, &e,
1486                          VK_EXTERNAL_MEMORY_HANDLE_TYPE_OPAQUE_FD_BIT);
1487
1488     if (p->extensions & EXT_EXTERNAL_DMABUF_MEMORY)
1489         try_export_flags(hwfc, &eiinfo.handleTypes, &e,
1490                          VK_EXTERNAL_MEMORY_HANDLE_TYPE_DMA_BUF_BIT_EXT);
1491
1492     for (int i = 0; i < av_pix_fmt_count_planes(hwfc->sw_format); i++) {
1493         eminfo[i].sType       = VK_STRUCTURE_TYPE_EXPORT_MEMORY_ALLOCATE_INFO;
1494         eminfo[i].pNext       = hwctx->alloc_pnext[i];
1495         eminfo[i].handleTypes = e;
1496     }
1497
1498     err = create_frame(hwfc, &f, hwctx->tiling, hwctx->usage,
1499                        eiinfo.handleTypes ? &eiinfo : NULL);
1500     if (err)
1501         return NULL;
1502
1503     err = alloc_bind_mem(hwfc, f, eminfo, sizeof(*eminfo));
1504     if (err)
1505         goto fail;
1506
1507     err = prepare_frame(hwfc, &p->cmd, f, PREP_MODE_WRITE);
1508     if (err)
1509         goto fail;
1510
1511     avbuf = av_buffer_create((uint8_t *)f, sizeof(AVVkFrame),
1512                              vulkan_frame_free, hwfc, 0);
1513     if (!avbuf)
1514         goto fail;
1515
1516     return avbuf;
1517
1518 fail:
1519     vulkan_frame_free(hwfc, (uint8_t *)f);
1520     return NULL;
1521 }
1522
1523 static void vulkan_frames_uninit(AVHWFramesContext *hwfc)
1524 {
1525     VulkanFramesPriv *fp = hwfc->internal->priv;
1526
1527     free_exec_ctx(hwfc->device_ctx, &fp->cmd);
1528 }
1529
1530 static int vulkan_frames_init(AVHWFramesContext *hwfc)
1531 {
1532     int err;
1533     AVVkFrame *f;
1534     AVVulkanFramesContext *hwctx = hwfc->hwctx;
1535     VulkanFramesPriv *fp = hwfc->internal->priv;
1536     AVVulkanDeviceContext *dev_hwctx = hwfc->device_ctx->hwctx;
1537     VulkanDevicePriv *p = hwfc->device_ctx->internal->priv;
1538
1539     if (hwfc->pool)
1540         return 0;
1541
1542     /* Default pool flags */
1543     hwctx->tiling = hwctx->tiling ? hwctx->tiling : p->use_linear_images ?
1544                     VK_IMAGE_TILING_LINEAR : VK_IMAGE_TILING_OPTIMAL;
1545
1546     hwctx->usage |= DEFAULT_USAGE_FLAGS;
1547
1548     err = create_exec_ctx(hwfc->device_ctx, &fp->cmd,
1549                           dev_hwctx->queue_family_tx_index);
1550     if (err)
1551         return err;
1552
1553     /* Test to see if allocation will fail */
1554     err = create_frame(hwfc, &f, hwctx->tiling, hwctx->usage,
1555                        hwctx->create_pnext);
1556     if (err) {
1557         free_exec_ctx(hwfc->device_ctx, &p->cmd);
1558         return err;
1559     }
1560
1561     vulkan_frame_free(hwfc, (uint8_t *)f);
1562
1563     hwfc->internal->pool_internal = av_buffer_pool_init2(sizeof(AVVkFrame),
1564                                                          hwfc, vulkan_pool_alloc,
1565                                                          NULL);
1566     if (!hwfc->internal->pool_internal) {
1567         free_exec_ctx(hwfc->device_ctx, &p->cmd);
1568         return AVERROR(ENOMEM);
1569     }
1570
1571     return 0;
1572 }
1573
1574 static int vulkan_get_buffer(AVHWFramesContext *hwfc, AVFrame *frame)
1575 {
1576     frame->buf[0] = av_buffer_pool_get(hwfc->pool);
1577     if (!frame->buf[0])
1578         return AVERROR(ENOMEM);
1579
1580     frame->data[0] = frame->buf[0]->data;
1581     frame->format  = AV_PIX_FMT_VULKAN;
1582     frame->width   = hwfc->width;
1583     frame->height  = hwfc->height;
1584
1585     return 0;
1586 }
1587
1588 static int vulkan_transfer_get_formats(AVHWFramesContext *hwfc,
1589                                        enum AVHWFrameTransferDirection dir,
1590                                        enum AVPixelFormat **formats)
1591 {
1592     enum AVPixelFormat *fmts = av_malloc_array(2, sizeof(*fmts));
1593     if (!fmts)
1594         return AVERROR(ENOMEM);
1595
1596     fmts[0] = hwfc->sw_format;
1597     fmts[1] = AV_PIX_FMT_NONE;
1598
1599     *formats = fmts;
1600     return 0;
1601 }
1602
1603 typedef struct VulkanMapping {
1604     AVVkFrame *frame;
1605     int flags;
1606 } VulkanMapping;
1607
1608 static void vulkan_unmap_frame(AVHWFramesContext *hwfc, HWMapDescriptor *hwmap)
1609 {
1610     VulkanMapping *map = hwmap->priv;
1611     AVVulkanDeviceContext *hwctx = hwfc->device_ctx->hwctx;
1612     const int planes = av_pix_fmt_count_planes(hwfc->sw_format);
1613
1614     /* Check if buffer needs flushing */
1615     if ((map->flags & AV_HWFRAME_MAP_WRITE) &&
1616         !(map->frame->flags & VK_MEMORY_PROPERTY_HOST_COHERENT_BIT)) {
1617         VkResult ret;
1618         VkMappedMemoryRange flush_ranges[AV_NUM_DATA_POINTERS] = { { 0 } };
1619
1620         for (int i = 0; i < planes; i++) {
1621             flush_ranges[i].sType  = VK_STRUCTURE_TYPE_MAPPED_MEMORY_RANGE;
1622             flush_ranges[i].memory = map->frame->mem[i];
1623             flush_ranges[i].size   = VK_WHOLE_SIZE;
1624         }
1625
1626         ret = vkFlushMappedMemoryRanges(hwctx->act_dev, planes,
1627                                         flush_ranges);
1628         if (ret != VK_SUCCESS) {
1629             av_log(hwfc, AV_LOG_ERROR, "Failed to flush memory: %s\n",
1630                    vk_ret2str(ret));
1631         }
1632     }
1633
1634     for (int i = 0; i < planes; i++)
1635         vkUnmapMemory(hwctx->act_dev, map->frame->mem[i]);
1636
1637     av_free(map);
1638 }
1639
1640 static int vulkan_map_frame_to_mem(AVHWFramesContext *hwfc, AVFrame *dst,
1641                                    const AVFrame *src, int flags)
1642 {
1643     VkResult ret;
1644     int err, mapped_mem_count = 0;
1645     AVVkFrame *f = (AVVkFrame *)src->data[0];
1646     AVVulkanDeviceContext *hwctx = hwfc->device_ctx->hwctx;
1647     const int planes = av_pix_fmt_count_planes(hwfc->sw_format);
1648
1649     VulkanMapping *map = av_mallocz(sizeof(VulkanMapping));
1650     if (!map)
1651         return AVERROR(EINVAL);
1652
1653     if (src->format != AV_PIX_FMT_VULKAN) {
1654         av_log(hwfc, AV_LOG_ERROR, "Cannot map from pixel format %s!\n",
1655                av_get_pix_fmt_name(src->format));
1656         err = AVERROR(EINVAL);
1657         goto fail;
1658     }
1659
1660     if (!(f->flags & VK_MEMORY_PROPERTY_HOST_VISIBLE_BIT) ||
1661         !(f->tiling == VK_IMAGE_TILING_LINEAR)) {
1662         av_log(hwfc, AV_LOG_ERROR, "Unable to map frame, not host visible "
1663                "and linear!\n");
1664         err = AVERROR(EINVAL);
1665         goto fail;
1666     }
1667
1668     dst->width  = src->width;
1669     dst->height = src->height;
1670
1671     for (int i = 0; i < planes; i++) {
1672         ret = vkMapMemory(hwctx->act_dev, f->mem[i], 0,
1673                           VK_WHOLE_SIZE, 0, (void **)&dst->data[i]);
1674         if (ret != VK_SUCCESS) {
1675             av_log(hwfc, AV_LOG_ERROR, "Failed to map image memory: %s\n",
1676                 vk_ret2str(ret));
1677             err = AVERROR_EXTERNAL;
1678             goto fail;
1679         }
1680         mapped_mem_count++;
1681     }
1682
1683     /* Check if the memory contents matter */
1684     if (((flags & AV_HWFRAME_MAP_READ) || !(flags & AV_HWFRAME_MAP_OVERWRITE)) &&
1685         !(f->flags & VK_MEMORY_PROPERTY_HOST_COHERENT_BIT)) {
1686         VkMappedMemoryRange map_mem_ranges[AV_NUM_DATA_POINTERS] = { { 0 } };
1687         for (int i = 0; i < planes; i++) {
1688             map_mem_ranges[i].sType = VK_STRUCTURE_TYPE_MAPPED_MEMORY_RANGE;
1689             map_mem_ranges[i].size = VK_WHOLE_SIZE;
1690             map_mem_ranges[i].memory = f->mem[i];
1691         }
1692
1693         ret = vkInvalidateMappedMemoryRanges(hwctx->act_dev, planes,
1694                                              map_mem_ranges);
1695         if (ret != VK_SUCCESS) {
1696             av_log(hwfc, AV_LOG_ERROR, "Failed to invalidate memory: %s\n",
1697                    vk_ret2str(ret));
1698             err = AVERROR_EXTERNAL;
1699             goto fail;
1700         }
1701     }
1702
1703     for (int i = 0; i < planes; i++) {
1704         VkImageSubresource sub = {
1705             .aspectMask = VK_IMAGE_ASPECT_COLOR_BIT,
1706         };
1707         VkSubresourceLayout layout;
1708         vkGetImageSubresourceLayout(hwctx->act_dev, f->img[i], &sub, &layout);
1709         dst->linesize[i] = layout.rowPitch;
1710     }
1711
1712     map->frame = f;
1713     map->flags = flags;
1714
1715     err = ff_hwframe_map_create(src->hw_frames_ctx, dst, src,
1716                                 &vulkan_unmap_frame, map);
1717     if (err < 0)
1718         goto fail;
1719
1720     return 0;
1721
1722 fail:
1723     for (int i = 0; i < mapped_mem_count; i++)
1724         vkUnmapMemory(hwctx->act_dev, f->mem[i]);
1725
1726     av_free(map);
1727     return err;
1728 }
1729
1730 #if CONFIG_LIBDRM
1731 static void vulkan_unmap_from(AVHWFramesContext *hwfc, HWMapDescriptor *hwmap)
1732 {
1733     VulkanMapping *map = hwmap->priv;
1734     AVVulkanDeviceContext *hwctx = hwfc->device_ctx->hwctx;
1735     const int planes = av_pix_fmt_count_planes(hwfc->sw_format);
1736
1737     for (int i = 0; i < planes; i++) {
1738         vkDestroyImage(hwctx->act_dev, map->frame->img[i], hwctx->alloc);
1739         vkFreeMemory(hwctx->act_dev, map->frame->mem[i], hwctx->alloc);
1740         vkDestroySemaphore(hwctx->act_dev, map->frame->sem[i], hwctx->alloc);
1741     }
1742
1743     av_freep(&map->frame);
1744 }
1745
1746 static const struct {
1747     uint32_t drm_fourcc;
1748     VkFormat vk_format;
1749 } vulkan_drm_format_map[] = {
1750     { DRM_FORMAT_R8,       VK_FORMAT_R8_UNORM       },
1751     { DRM_FORMAT_R16,      VK_FORMAT_R16_UNORM      },
1752     { DRM_FORMAT_GR88,     VK_FORMAT_R8G8_UNORM     },
1753     { DRM_FORMAT_RG88,     VK_FORMAT_R8G8_UNORM     },
1754     { DRM_FORMAT_GR1616,   VK_FORMAT_R16G16_UNORM   },
1755     { DRM_FORMAT_RG1616,   VK_FORMAT_R16G16_UNORM   },
1756     { DRM_FORMAT_ARGB8888, VK_FORMAT_B8G8R8A8_UNORM },
1757     { DRM_FORMAT_XRGB8888, VK_FORMAT_B8G8R8A8_UNORM },
1758     { DRM_FORMAT_ABGR8888, VK_FORMAT_R8G8B8A8_UNORM },
1759     { DRM_FORMAT_XBGR8888, VK_FORMAT_R8G8B8A8_UNORM },
1760 };
1761
1762 static inline VkFormat drm_to_vulkan_fmt(uint32_t drm_fourcc)
1763 {
1764     for (int i = 0; i < FF_ARRAY_ELEMS(vulkan_drm_format_map); i++)
1765         if (vulkan_drm_format_map[i].drm_fourcc == drm_fourcc)
1766             return vulkan_drm_format_map[i].vk_format;
1767     return VK_FORMAT_UNDEFINED;
1768 }
1769
1770 static int vulkan_map_from_drm_frame_desc(AVHWFramesContext *hwfc, AVVkFrame **frame,
1771                                           AVDRMFrameDescriptor *desc)
1772 {
1773     int err = 0;
1774     VkResult ret;
1775     AVVkFrame *f;
1776     int bind_counts = 0;
1777     AVHWDeviceContext *ctx = hwfc->device_ctx;
1778     AVVulkanDeviceContext *hwctx = ctx->hwctx;
1779     VulkanDevicePriv *p = ctx->internal->priv;
1780     const AVPixFmtDescriptor *fmt_desc = av_pix_fmt_desc_get(hwfc->sw_format);
1781     const int has_modifiers = p->extensions & EXT_DRM_MODIFIER_FLAGS;
1782     VkSubresourceLayout plane_data[AV_NUM_DATA_POINTERS] = { 0 };
1783     VkBindImageMemoryInfo bind_info[AV_NUM_DATA_POINTERS] = { 0 };
1784     VkBindImagePlaneMemoryInfo plane_info[AV_NUM_DATA_POINTERS] = { 0 };
1785     VkExternalMemoryHandleTypeFlagBits htype = VK_EXTERNAL_MEMORY_HANDLE_TYPE_DMA_BUF_BIT_EXT;
1786
1787     VK_LOAD_PFN(hwctx->inst, vkGetMemoryFdPropertiesKHR);
1788
1789     for (int i = 0; i < desc->nb_layers; i++) {
1790         if (drm_to_vulkan_fmt(desc->layers[i].format) == VK_FORMAT_UNDEFINED) {
1791             av_log(ctx, AV_LOG_ERROR, "Unsupported DMABUF layer format %#08x!\n",
1792                    desc->layers[i].format);
1793             return AVERROR(EINVAL);
1794         }
1795     }
1796
1797     if (!(f = av_vk_frame_alloc())) {
1798         av_log(ctx, AV_LOG_ERROR, "Unable to allocate memory for AVVkFrame!\n");
1799         err = AVERROR(ENOMEM);
1800         goto fail;
1801     }
1802
1803     for (int i = 0; i < desc->nb_objects; i++) {
1804         VkMemoryFdPropertiesKHR fdmp = {
1805             .sType = VK_STRUCTURE_TYPE_MEMORY_FD_PROPERTIES_KHR,
1806         };
1807         VkMemoryRequirements req = {
1808             .size = desc->objects[i].size,
1809         };
1810         VkImportMemoryFdInfoKHR idesc = {
1811             .sType      = VK_STRUCTURE_TYPE_IMPORT_MEMORY_FD_INFO_KHR,
1812             .handleType = htype,
1813             .fd         = dup(desc->objects[i].fd),
1814         };
1815
1816         ret = pfn_vkGetMemoryFdPropertiesKHR(hwctx->act_dev, htype,
1817                                              idesc.fd, &fdmp);
1818         if (ret != VK_SUCCESS) {
1819             av_log(hwfc, AV_LOG_ERROR, "Failed to get FD properties: %s\n",
1820                    vk_ret2str(ret));
1821             err = AVERROR_EXTERNAL;
1822             close(idesc.fd);
1823             goto fail;
1824         }
1825
1826         req.memoryTypeBits = fdmp.memoryTypeBits;
1827
1828         err = alloc_mem(ctx, &req, VK_MEMORY_PROPERTY_DEVICE_LOCAL_BIT,
1829                         &idesc, &f->flags, &f->mem[i]);
1830         if (err) {
1831             close(idesc.fd);
1832             return err;
1833         }
1834
1835         f->size[i] = desc->objects[i].size;
1836     }
1837
1838     f->tiling = has_modifiers ? VK_IMAGE_TILING_DRM_FORMAT_MODIFIER_EXT :
1839                 desc->objects[0].format_modifier == DRM_FORMAT_MOD_LINEAR ?
1840                 VK_IMAGE_TILING_LINEAR : VK_IMAGE_TILING_OPTIMAL;
1841
1842     for (int i = 0; i < desc->nb_layers; i++) {
1843         const int planes = desc->layers[i].nb_planes;
1844         const int signal_p = has_modifiers && (planes > 1);
1845
1846         VkImageDrmFormatModifierExplicitCreateInfoEXT drm_info = {
1847             .sType = VK_STRUCTURE_TYPE_IMAGE_DRM_FORMAT_MODIFIER_EXPLICIT_CREATE_INFO_EXT,
1848             .drmFormatModifier = desc->objects[0].format_modifier,
1849             .drmFormatModifierPlaneCount = planes,
1850             .pPlaneLayouts = (const VkSubresourceLayout *)&plane_data,
1851         };
1852
1853         VkExternalMemoryImageCreateInfo einfo = {
1854             .sType       = VK_STRUCTURE_TYPE_EXTERNAL_MEMORY_IMAGE_CREATE_INFO,
1855             .pNext       = has_modifiers ? &drm_info : NULL,
1856             .handleTypes = htype,
1857         };
1858
1859         VkSemaphoreCreateInfo sem_spawn = {
1860             .sType = VK_STRUCTURE_TYPE_SEMAPHORE_CREATE_INFO,
1861         };
1862
1863         const int p_w = i > 0 ? AV_CEIL_RSHIFT(hwfc->width, fmt_desc->log2_chroma_w) : hwfc->width;
1864         const int p_h = i > 0 ? AV_CEIL_RSHIFT(hwfc->height, fmt_desc->log2_chroma_h) : hwfc->height;
1865
1866         VkImageCreateInfo image_create_info = {
1867             .sType                 = VK_STRUCTURE_TYPE_IMAGE_CREATE_INFO,
1868             .pNext                 = &einfo,
1869             .imageType             = VK_IMAGE_TYPE_2D,
1870             .format                = drm_to_vulkan_fmt(desc->layers[i].format),
1871             .extent.width          = p_w,
1872             .extent.height         = p_h,
1873             .extent.depth          = 1,
1874             .mipLevels             = 1,
1875             .arrayLayers           = 1,
1876             .flags                 = VK_IMAGE_CREATE_ALIAS_BIT,
1877             .tiling                = f->tiling,
1878             .initialLayout         = VK_IMAGE_LAYOUT_UNDEFINED, /* specs say so */
1879             .usage                 = DEFAULT_USAGE_FLAGS,
1880             .samples               = VK_SAMPLE_COUNT_1_BIT,
1881             .pQueueFamilyIndices   = p->qfs,
1882             .queueFamilyIndexCount = p->num_qfs,
1883             .sharingMode           = p->num_qfs > 1 ? VK_SHARING_MODE_CONCURRENT :
1884                                                       VK_SHARING_MODE_EXCLUSIVE,
1885         };
1886
1887         for (int j = 0; j < planes; j++) {
1888             plane_data[j].offset     = desc->layers[i].planes[j].offset;
1889             plane_data[j].rowPitch   = desc->layers[i].planes[j].pitch;
1890             plane_data[j].size       = 0; /* The specs say so for all 3 */
1891             plane_data[j].arrayPitch = 0;
1892             plane_data[j].depthPitch = 0;
1893         }
1894
1895         /* Create image */
1896         ret = vkCreateImage(hwctx->act_dev, &image_create_info,
1897                             hwctx->alloc, &f->img[i]);
1898         if (ret != VK_SUCCESS) {
1899             av_log(ctx, AV_LOG_ERROR, "Image creation failure: %s\n",
1900                    vk_ret2str(ret));
1901             err = AVERROR(EINVAL);
1902             goto fail;
1903         }
1904
1905         ret = vkCreateSemaphore(hwctx->act_dev, &sem_spawn,
1906                                 hwctx->alloc, &f->sem[i]);
1907         if (ret != VK_SUCCESS) {
1908             av_log(hwctx, AV_LOG_ERROR, "Failed to create semaphore: %s\n",
1909                    vk_ret2str(ret));
1910             return AVERROR_EXTERNAL;
1911         }
1912
1913         /* We'd import a semaphore onto the one we created using
1914          * vkImportSemaphoreFdKHR but unfortunately neither DRM nor VAAPI
1915          * offer us anything we could import and sync with, so instead
1916          * just signal the semaphore we created. */
1917
1918         f->layout[i] = image_create_info.initialLayout;
1919         f->access[i] = 0x0;
1920
1921         for (int j = 0; j < planes; j++) {
1922             VkImageAspectFlagBits aspect = j == 0 ? VK_IMAGE_ASPECT_MEMORY_PLANE_0_BIT_EXT :
1923                                            j == 1 ? VK_IMAGE_ASPECT_MEMORY_PLANE_1_BIT_EXT :
1924                                                     VK_IMAGE_ASPECT_MEMORY_PLANE_2_BIT_EXT;
1925
1926             plane_info[bind_counts].sType = VK_STRUCTURE_TYPE_BIND_IMAGE_PLANE_MEMORY_INFO;
1927             plane_info[bind_counts].planeAspect = aspect;
1928
1929             bind_info[bind_counts].sType  = VK_STRUCTURE_TYPE_BIND_IMAGE_MEMORY_INFO;
1930             bind_info[bind_counts].pNext  = signal_p ? &plane_info[bind_counts] : NULL;
1931             bind_info[bind_counts].image  = f->img[i];
1932             bind_info[bind_counts].memory = f->mem[desc->layers[i].planes[j].object_index];
1933             bind_info[bind_counts].memoryOffset = desc->layers[i].planes[j].offset;
1934             bind_counts++;
1935         }
1936     }
1937
1938     /* Bind the allocated memory to the images */
1939     ret = vkBindImageMemory2(hwctx->act_dev, bind_counts, bind_info);
1940     if (ret != VK_SUCCESS) {
1941         av_log(ctx, AV_LOG_ERROR, "Failed to bind memory: %s\n",
1942                vk_ret2str(ret));
1943         return AVERROR_EXTERNAL;
1944     }
1945
1946     /* NOTE: This is completely uneccesary and unneeded once we can import
1947      * semaphores from DRM. Otherwise we have to activate the semaphores.
1948      * We're reusing the exec context that's also used for uploads/downloads. */
1949     err = prepare_frame(hwfc, &p->cmd, f, PREP_MODE_RO_SHADER);
1950     if (err)
1951         goto fail;
1952
1953     *frame = f;
1954
1955     return 0;
1956
1957 fail:
1958     for (int i = 0; i < desc->nb_layers; i++) {
1959         vkDestroyImage(hwctx->act_dev, f->img[i], hwctx->alloc);
1960         vkDestroySemaphore(hwctx->act_dev, f->sem[i], hwctx->alloc);
1961     }
1962     for (int i = 0; i < desc->nb_objects; i++)
1963         vkFreeMemory(hwctx->act_dev, f->mem[i], hwctx->alloc);
1964
1965     av_free(f);
1966
1967     return err;
1968 }
1969
1970 static int vulkan_map_from_drm(AVHWFramesContext *hwfc, AVFrame *dst,
1971                                const AVFrame *src, int flags)
1972 {
1973     int err = 0;
1974     AVVkFrame *f;
1975     VulkanMapping *map = NULL;
1976
1977     err = vulkan_map_from_drm_frame_desc(hwfc, &f,
1978                                          (AVDRMFrameDescriptor *)src->data[0]);
1979     if (err)
1980         return err;
1981
1982     /* The unmapping function will free this */
1983     dst->data[0] = (uint8_t *)f;
1984     dst->width   = src->width;
1985     dst->height  = src->height;
1986
1987     map = av_mallocz(sizeof(VulkanMapping));
1988     if (!map)
1989         goto fail;
1990
1991     map->frame = f;
1992     map->flags = flags;
1993
1994     err = ff_hwframe_map_create(dst->hw_frames_ctx, dst, src,
1995                                 &vulkan_unmap_from, map);
1996     if (err < 0)
1997         goto fail;
1998
1999     av_log(hwfc, AV_LOG_DEBUG, "Mapped DRM object to Vulkan!\n");
2000
2001     return 0;
2002
2003 fail:
2004     vulkan_frame_free(hwfc->device_ctx->hwctx, (uint8_t *)f);
2005     av_free(map);
2006     return err;
2007 }
2008
2009 #if CONFIG_VAAPI
2010 static int vulkan_map_from_vaapi(AVHWFramesContext *dst_fc,
2011                                  AVFrame *dst, const AVFrame *src,
2012                                  int flags)
2013 {
2014     int err;
2015     AVFrame *tmp = av_frame_alloc();
2016     AVHWFramesContext *vaapi_fc = (AVHWFramesContext*)src->hw_frames_ctx->data;
2017     AVVAAPIDeviceContext *vaapi_ctx = vaapi_fc->device_ctx->hwctx;
2018     VASurfaceID surface_id = (VASurfaceID)(uintptr_t)src->data[3];
2019
2020     if (!tmp)
2021         return AVERROR(ENOMEM);
2022
2023     /* We have to sync since like the previous comment said, no semaphores */
2024     vaSyncSurface(vaapi_ctx->display, surface_id);
2025
2026     tmp->format = AV_PIX_FMT_DRM_PRIME;
2027
2028     err = av_hwframe_map(tmp, src, flags);
2029     if (err < 0)
2030         goto fail;
2031
2032     err = vulkan_map_from_drm(dst_fc, dst, tmp, flags);
2033     if (err < 0)
2034         goto fail;
2035
2036     err = ff_hwframe_map_replace(dst, src);
2037
2038 fail:
2039     av_frame_free(&tmp);
2040     return err;
2041 }
2042 #endif
2043 #endif
2044
2045 #if CONFIG_CUDA
2046 static int vulkan_export_to_cuda(AVHWFramesContext *hwfc,
2047                                  AVBufferRef *cuda_hwfc,
2048                                  const AVFrame *frame)
2049 {
2050     int err;
2051     VkResult ret;
2052     AVVkFrame *dst_f;
2053     AVVkFrameInternal *dst_int;
2054     AVHWDeviceContext *ctx = hwfc->device_ctx;
2055     AVVulkanDeviceContext *hwctx = ctx->hwctx;
2056     const int planes = av_pix_fmt_count_planes(hwfc->sw_format);
2057     const AVPixFmtDescriptor *desc = av_pix_fmt_desc_get(hwfc->sw_format);
2058     VK_LOAD_PFN(hwctx->inst, vkGetMemoryFdKHR);
2059     VK_LOAD_PFN(hwctx->inst, vkGetSemaphoreFdKHR);
2060
2061     AVHWFramesContext *cuda_fc = (AVHWFramesContext*)cuda_hwfc->data;
2062     AVHWDeviceContext *cuda_cu = cuda_fc->device_ctx;
2063     AVCUDADeviceContext *cuda_dev = cuda_cu->hwctx;
2064     AVCUDADeviceContextInternal *cu_internal = cuda_dev->internal;
2065     CudaFunctions *cu = cu_internal->cuda_dl;
2066     CUarray_format cufmt = desc->comp[0].depth > 8 ? CU_AD_FORMAT_UNSIGNED_INT16 :
2067                                                      CU_AD_FORMAT_UNSIGNED_INT8;
2068
2069     dst_f = (AVVkFrame *)frame->data[0];
2070
2071     dst_int = dst_f->internal;
2072     if (!dst_int || !dst_int->cuda_fc_ref) {
2073         if (!dst_f->internal)
2074             dst_f->internal = dst_int = av_mallocz(sizeof(*dst_f->internal));
2075
2076         if (!dst_int) {
2077             err = AVERROR(ENOMEM);
2078             goto fail;
2079         }
2080
2081         dst_int->cuda_fc_ref = av_buffer_ref(cuda_hwfc);
2082         if (!dst_int->cuda_fc_ref) {
2083             err = AVERROR(ENOMEM);
2084             goto fail;
2085         }
2086
2087         for (int i = 0; i < planes; i++) {
2088             CUDA_EXTERNAL_MEMORY_MIPMAPPED_ARRAY_DESC tex_desc = {
2089                 .offset = 0,
2090                 .arrayDesc = {
2091                     .Width  = i > 0 ? AV_CEIL_RSHIFT(hwfc->width, desc->log2_chroma_w)
2092                                     : hwfc->width,
2093                     .Height = i > 0 ? AV_CEIL_RSHIFT(hwfc->height, desc->log2_chroma_h)
2094                                     : hwfc->height,
2095                     .Depth = 0,
2096                     .Format = cufmt,
2097                     .NumChannels = 1 + ((planes == 2) && i),
2098                     .Flags = 0,
2099                 },
2100                 .numLevels = 1,
2101             };
2102             CUDA_EXTERNAL_MEMORY_HANDLE_DESC ext_desc = {
2103                 .type = CU_EXTERNAL_MEMORY_HANDLE_TYPE_OPAQUE_FD,
2104                 .size = dst_f->size[i],
2105             };
2106             VkMemoryGetFdInfoKHR export_info = {
2107                 .sType      = VK_STRUCTURE_TYPE_MEMORY_GET_FD_INFO_KHR,
2108                 .memory     = dst_f->mem[i],
2109                 .handleType = VK_EXTERNAL_MEMORY_HANDLE_TYPE_OPAQUE_FD_BIT_KHR,
2110             };
2111             VkSemaphoreGetFdInfoKHR sem_export = {
2112                 .sType = VK_STRUCTURE_TYPE_SEMAPHORE_GET_FD_INFO_KHR,
2113                 .semaphore = dst_f->sem[i],
2114                 .handleType = VK_EXTERNAL_SEMAPHORE_HANDLE_TYPE_OPAQUE_FD_BIT,
2115             };
2116             CUDA_EXTERNAL_SEMAPHORE_HANDLE_DESC ext_sem_desc = {
2117                 .type = CU_EXTERNAL_SEMAPHORE_HANDLE_TYPE_OPAQUE_FD,
2118             };
2119
2120             ret = pfn_vkGetMemoryFdKHR(hwctx->act_dev, &export_info,
2121                                        &ext_desc.handle.fd);
2122             if (ret != VK_SUCCESS) {
2123                 av_log(hwfc, AV_LOG_ERROR, "Unable to export the image as a FD!\n");
2124                 err = AVERROR_EXTERNAL;
2125                 goto fail;
2126             }
2127
2128             ret = CHECK_CU(cu->cuImportExternalMemory(&dst_int->ext_mem[i], &ext_desc));
2129             if (ret < 0) {
2130                 err = AVERROR_EXTERNAL;
2131                 goto fail;
2132             }
2133
2134             ret = CHECK_CU(cu->cuExternalMemoryGetMappedMipmappedArray(&dst_int->cu_mma[i],
2135                                                                        dst_int->ext_mem[i],
2136                                                                        &tex_desc));
2137             if (ret < 0) {
2138                 err = AVERROR_EXTERNAL;
2139                 goto fail;
2140             }
2141
2142             ret = CHECK_CU(cu->cuMipmappedArrayGetLevel(&dst_int->cu_array[i],
2143                                                         dst_int->cu_mma[i], 0));
2144             if (ret < 0) {
2145                 err = AVERROR_EXTERNAL;
2146                 goto fail;
2147             }
2148
2149             ret = pfn_vkGetSemaphoreFdKHR(hwctx->act_dev, &sem_export,
2150                                           &ext_sem_desc.handle.fd);
2151             if (ret != VK_SUCCESS) {
2152                 av_log(ctx, AV_LOG_ERROR, "Failed to export semaphore: %s\n",
2153                        vk_ret2str(ret));
2154                 err = AVERROR_EXTERNAL;
2155                 goto fail;
2156             }
2157
2158             ret = CHECK_CU(cu->cuImportExternalSemaphore(&dst_int->cu_sem[i],
2159                                                          &ext_sem_desc));
2160             if (ret < 0) {
2161                 err = AVERROR_EXTERNAL;
2162                 goto fail;
2163             }
2164         }
2165     }
2166
2167     return 0;
2168
2169 fail:
2170     return err;
2171 }
2172
2173 static int vulkan_transfer_data_from_cuda(AVHWFramesContext *hwfc,
2174                                           AVFrame *dst, const AVFrame *src)
2175 {
2176     int err;
2177     VkResult ret;
2178     CUcontext dummy;
2179     AVVkFrame *dst_f;
2180     AVVkFrameInternal *dst_int;
2181     const int planes = av_pix_fmt_count_planes(hwfc->sw_format);
2182     const AVPixFmtDescriptor *desc = av_pix_fmt_desc_get(hwfc->sw_format);
2183
2184     AVHWFramesContext *cuda_fc = (AVHWFramesContext*)src->hw_frames_ctx->data;
2185     AVHWDeviceContext *cuda_cu = cuda_fc->device_ctx;
2186     AVCUDADeviceContext *cuda_dev = cuda_cu->hwctx;
2187     AVCUDADeviceContextInternal *cu_internal = cuda_dev->internal;
2188     CudaFunctions *cu = cu_internal->cuda_dl;
2189     CUDA_EXTERNAL_SEMAPHORE_WAIT_PARAMS s_w_par[AV_NUM_DATA_POINTERS] = { 0 };
2190     CUDA_EXTERNAL_SEMAPHORE_SIGNAL_PARAMS s_s_par[AV_NUM_DATA_POINTERS] = { 0 };
2191
2192     ret = CHECK_CU(cu->cuCtxPushCurrent(cuda_dev->cuda_ctx));
2193     if (ret < 0) {
2194         err = AVERROR_EXTERNAL;
2195         goto fail;
2196     }
2197
2198     dst_f = (AVVkFrame *)dst->data[0];
2199
2200     ret = vulkan_export_to_cuda(hwfc, src->hw_frames_ctx, dst);
2201     if (ret < 0) {
2202         goto fail;
2203     }
2204     dst_int = dst_f->internal;
2205
2206     ret = CHECK_CU(cu->cuWaitExternalSemaphoresAsync(dst_int->cu_sem, s_w_par,
2207                                                      planes, cuda_dev->stream));
2208     if (ret < 0) {
2209         err = AVERROR_EXTERNAL;
2210         goto fail;
2211     }
2212
2213     for (int i = 0; i < planes; i++) {
2214         CUDA_MEMCPY2D cpy = {
2215             .srcMemoryType = CU_MEMORYTYPE_DEVICE,
2216             .srcDevice     = (CUdeviceptr)src->data[i],
2217             .srcPitch      = src->linesize[i],
2218             .srcY          = 0,
2219
2220             .dstMemoryType = CU_MEMORYTYPE_ARRAY,
2221             .dstArray      = dst_int->cu_array[i],
2222             .WidthInBytes  = (i > 0 ? AV_CEIL_RSHIFT(hwfc->width, desc->log2_chroma_w)
2223                                     : hwfc->width) * desc->comp[i].step,
2224             .Height        = i > 0 ? AV_CEIL_RSHIFT(hwfc->height, desc->log2_chroma_h)
2225                                    : hwfc->height,
2226         };
2227
2228         ret = CHECK_CU(cu->cuMemcpy2DAsync(&cpy, cuda_dev->stream));
2229         if (ret < 0) {
2230             err = AVERROR_EXTERNAL;
2231             goto fail;
2232         }
2233     }
2234
2235     ret = CHECK_CU(cu->cuSignalExternalSemaphoresAsync(dst_int->cu_sem, s_s_par,
2236                                                        planes, cuda_dev->stream));
2237     if (ret < 0) {
2238         err = AVERROR_EXTERNAL;
2239         goto fail;
2240     }
2241
2242     CHECK_CU(cu->cuCtxPopCurrent(&dummy));
2243
2244     av_log(hwfc, AV_LOG_VERBOSE, "Transfered CUDA image to Vulkan!\n");
2245
2246     return 0;
2247
2248 fail:
2249     CHECK_CU(cu->cuCtxPopCurrent(&dummy));
2250     vulkan_free_internal(dst_int);
2251     dst_f->internal = NULL;
2252     av_buffer_unref(&dst->buf[0]);
2253     return err;
2254 }
2255 #endif
2256
2257 static int vulkan_map_to(AVHWFramesContext *hwfc, AVFrame *dst,
2258                          const AVFrame *src, int flags)
2259 {
2260     av_unused VulkanDevicePriv *p = hwfc->device_ctx->internal->priv;
2261
2262     switch (src->format) {
2263 #if CONFIG_LIBDRM
2264 #if CONFIG_VAAPI
2265     case AV_PIX_FMT_VAAPI:
2266         if (p->extensions & EXT_EXTERNAL_DMABUF_MEMORY)
2267             return vulkan_map_from_vaapi(hwfc, dst, src, flags);
2268 #endif
2269     case AV_PIX_FMT_DRM_PRIME:
2270         if (p->extensions & EXT_EXTERNAL_DMABUF_MEMORY)
2271             return vulkan_map_from_drm(hwfc, dst, src, flags);
2272 #endif
2273     default:
2274         return AVERROR(ENOSYS);
2275     }
2276 }
2277
2278 #if CONFIG_LIBDRM
2279 typedef struct VulkanDRMMapping {
2280     AVDRMFrameDescriptor drm_desc;
2281     AVVkFrame *source;
2282 } VulkanDRMMapping;
2283
2284 static void vulkan_unmap_to_drm(AVHWFramesContext *hwfc, HWMapDescriptor *hwmap)
2285 {
2286     AVDRMFrameDescriptor *drm_desc = hwmap->priv;
2287
2288     for (int i = 0; i < drm_desc->nb_objects; i++)
2289         close(drm_desc->objects[i].fd);
2290
2291     av_free(drm_desc);
2292 }
2293
2294 static inline uint32_t vulkan_fmt_to_drm(VkFormat vkfmt)
2295 {
2296     for (int i = 0; i < FF_ARRAY_ELEMS(vulkan_drm_format_map); i++)
2297         if (vulkan_drm_format_map[i].vk_format == vkfmt)
2298             return vulkan_drm_format_map[i].drm_fourcc;
2299     return DRM_FORMAT_INVALID;
2300 }
2301
2302 static int vulkan_map_to_drm(AVHWFramesContext *hwfc, AVFrame *dst,
2303                              const AVFrame *src, int flags)
2304 {
2305     int err = 0;
2306     VkResult ret;
2307     AVVkFrame *f = (AVVkFrame *)src->data[0];
2308     VulkanDevicePriv *p = hwfc->device_ctx->internal->priv;
2309     AVVulkanDeviceContext *hwctx = hwfc->device_ctx->hwctx;
2310     const int planes = av_pix_fmt_count_planes(hwfc->sw_format);
2311     VK_LOAD_PFN(hwctx->inst, vkGetMemoryFdKHR);
2312     VkImageDrmFormatModifierPropertiesEXT drm_mod = {
2313         .sType = VK_STRUCTURE_TYPE_IMAGE_DRM_FORMAT_MODIFIER_PROPERTIES_EXT,
2314     };
2315
2316     AVDRMFrameDescriptor *drm_desc = av_mallocz(sizeof(*drm_desc));
2317     if (!drm_desc)
2318         return AVERROR(ENOMEM);
2319
2320     err = prepare_frame(hwfc, &p->cmd, f, PREP_MODE_EXTERNAL_EXPORT);
2321     if (err < 0)
2322         goto end;
2323
2324     err = ff_hwframe_map_create(src->hw_frames_ctx, dst, src, &vulkan_unmap_to_drm, drm_desc);
2325     if (err < 0)
2326         goto end;
2327
2328     if (p->extensions & EXT_DRM_MODIFIER_FLAGS) {
2329         VK_LOAD_PFN(hwctx->inst, vkGetImageDrmFormatModifierPropertiesEXT);
2330         ret = pfn_vkGetImageDrmFormatModifierPropertiesEXT(hwctx->act_dev, f->img[0],
2331                                                            &drm_mod);
2332         if (ret != VK_SUCCESS) {
2333             av_log(hwfc, AV_LOG_ERROR, "Failed to retrieve DRM format modifier!\n");
2334             err = AVERROR_EXTERNAL;
2335             goto end;
2336         }
2337     }
2338
2339     for (int i = 0; (i < planes) && (f->mem[i]); i++) {
2340         VkMemoryGetFdInfoKHR export_info = {
2341             .sType      = VK_STRUCTURE_TYPE_MEMORY_GET_FD_INFO_KHR,
2342             .memory     = f->mem[i],
2343             .handleType = VK_EXTERNAL_MEMORY_HANDLE_TYPE_DMA_BUF_BIT_EXT,
2344         };
2345
2346         ret = pfn_vkGetMemoryFdKHR(hwctx->act_dev, &export_info,
2347                                    &drm_desc->objects[i].fd);
2348         if (ret != VK_SUCCESS) {
2349             av_log(hwfc, AV_LOG_ERROR, "Unable to export the image as a FD!\n");
2350             err = AVERROR_EXTERNAL;
2351             goto end;
2352         }
2353
2354         drm_desc->nb_objects++;
2355         drm_desc->objects[i].size = f->size[i];
2356         drm_desc->objects[i].format_modifier = drm_mod.drmFormatModifier;
2357     }
2358
2359     drm_desc->nb_layers = planes;
2360     for (int i = 0; i < drm_desc->nb_layers; i++) {
2361         VkSubresourceLayout layout;
2362         VkImageSubresource sub = {
2363             .aspectMask = p->extensions & EXT_DRM_MODIFIER_FLAGS ?
2364                           VK_IMAGE_ASPECT_MEMORY_PLANE_0_BIT_EXT :
2365                           VK_IMAGE_ASPECT_COLOR_BIT,
2366         };
2367         VkFormat plane_vkfmt = av_vkfmt_from_pixfmt(hwfc->sw_format)[i];
2368
2369         drm_desc->layers[i].format    = vulkan_fmt_to_drm(plane_vkfmt);
2370         drm_desc->layers[i].nb_planes = 1;
2371
2372         if (drm_desc->layers[i].format == DRM_FORMAT_INVALID) {
2373             av_log(hwfc, AV_LOG_ERROR, "Cannot map to DRM layer, unsupported!\n");
2374             err = AVERROR_PATCHWELCOME;
2375             goto end;
2376         }
2377
2378         drm_desc->layers[i].planes[0].object_index = FFMIN(i, drm_desc->nb_objects - 1);
2379
2380         if (f->tiling == VK_IMAGE_TILING_OPTIMAL)
2381             continue;
2382
2383         vkGetImageSubresourceLayout(hwctx->act_dev, f->img[i], &sub, &layout);
2384         drm_desc->layers[i].planes[0].offset       = layout.offset;
2385         drm_desc->layers[i].planes[0].pitch        = layout.rowPitch;
2386     }
2387
2388     dst->width   = src->width;
2389     dst->height  = src->height;
2390     dst->data[0] = (uint8_t *)drm_desc;
2391
2392     av_log(hwfc, AV_LOG_VERBOSE, "Mapped AVVkFrame to a DRM object!\n");
2393
2394     return 0;
2395
2396 end:
2397     av_free(drm_desc);
2398     return err;
2399 }
2400
2401 #if CONFIG_VAAPI
2402 static int vulkan_map_to_vaapi(AVHWFramesContext *hwfc, AVFrame *dst,
2403                                const AVFrame *src, int flags)
2404 {
2405     int err;
2406     AVFrame *tmp = av_frame_alloc();
2407     if (!tmp)
2408         return AVERROR(ENOMEM);
2409
2410     tmp->format = AV_PIX_FMT_DRM_PRIME;
2411
2412     err = vulkan_map_to_drm(hwfc, tmp, src, flags);
2413     if (err < 0)
2414         goto fail;
2415
2416     err = av_hwframe_map(dst, tmp, flags);
2417     if (err < 0)
2418         goto fail;
2419
2420     err = ff_hwframe_map_replace(dst, src);
2421
2422 fail:
2423     av_frame_free(&tmp);
2424     return err;
2425 }
2426 #endif
2427 #endif
2428
2429 static int vulkan_map_from(AVHWFramesContext *hwfc, AVFrame *dst,
2430                            const AVFrame *src, int flags)
2431 {
2432     av_unused VulkanDevicePriv *p = hwfc->device_ctx->internal->priv;
2433
2434     switch (dst->format) {
2435 #if CONFIG_LIBDRM
2436     case AV_PIX_FMT_DRM_PRIME:
2437         if (p->extensions & EXT_EXTERNAL_DMABUF_MEMORY)
2438             return vulkan_map_to_drm(hwfc, dst, src, flags);
2439 #if CONFIG_VAAPI
2440     case AV_PIX_FMT_VAAPI:
2441         if (p->extensions & EXT_EXTERNAL_DMABUF_MEMORY)
2442             return vulkan_map_to_vaapi(hwfc, dst, src, flags);
2443 #endif
2444 #endif
2445     default:
2446         return vulkan_map_frame_to_mem(hwfc, dst, src, flags);
2447     }
2448 }
2449
2450 typedef struct ImageBuffer {
2451     VkBuffer buf;
2452     VkDeviceMemory mem;
2453     VkMemoryPropertyFlagBits flags;
2454 } ImageBuffer;
2455
2456 static void free_buf(AVHWDeviceContext *ctx, ImageBuffer *buf)
2457 {
2458     AVVulkanDeviceContext *hwctx = ctx->hwctx;
2459     if (!buf)
2460         return;
2461
2462     vkDestroyBuffer(hwctx->act_dev, buf->buf, hwctx->alloc);
2463     vkFreeMemory(hwctx->act_dev, buf->mem, hwctx->alloc);
2464 }
2465
2466 static int create_buf(AVHWDeviceContext *ctx, ImageBuffer *buf, int height,
2467                       int *stride, VkBufferUsageFlags usage,
2468                       VkMemoryPropertyFlagBits flags, void *create_pnext,
2469                       void *alloc_pnext)
2470 {
2471     int err;
2472     VkResult ret;
2473     VkMemoryRequirements req;
2474     AVVulkanDeviceContext *hwctx = ctx->hwctx;
2475     VulkanDevicePriv *p = ctx->internal->priv;
2476
2477     VkBufferCreateInfo buf_spawn = {
2478         .sType       = VK_STRUCTURE_TYPE_BUFFER_CREATE_INFO,
2479         .pNext       = create_pnext,
2480         .usage       = usage,
2481         .sharingMode = VK_SHARING_MODE_EXCLUSIVE,
2482     };
2483
2484     *stride = FFALIGN(*stride, p->props.limits.optimalBufferCopyRowPitchAlignment);
2485     buf_spawn.size = height*(*stride);
2486
2487     ret = vkCreateBuffer(hwctx->act_dev, &buf_spawn, NULL, &buf->buf);
2488     if (ret != VK_SUCCESS) {
2489         av_log(ctx, AV_LOG_ERROR, "Failed to create buffer: %s\n",
2490                vk_ret2str(ret));
2491         return AVERROR_EXTERNAL;
2492     }
2493
2494     vkGetBufferMemoryRequirements(hwctx->act_dev, buf->buf, &req);
2495
2496     err = alloc_mem(ctx, &req, flags, alloc_pnext, &buf->flags, &buf->mem);
2497     if (err)
2498         return err;
2499
2500     ret = vkBindBufferMemory(hwctx->act_dev, buf->buf, buf->mem, 0);
2501     if (ret != VK_SUCCESS) {
2502         av_log(ctx, AV_LOG_ERROR, "Failed to bind memory to buffer: %s\n",
2503                vk_ret2str(ret));
2504         free_buf(ctx, buf);
2505         return AVERROR_EXTERNAL;
2506     }
2507
2508     return 0;
2509 }
2510
2511 static int map_buffers(AVHWDeviceContext *ctx, ImageBuffer *buf, uint8_t *mem[],
2512                        int nb_buffers, int invalidate)
2513 {
2514     VkResult ret;
2515     AVVulkanDeviceContext *hwctx = ctx->hwctx;
2516     VkMappedMemoryRange invalidate_ctx[AV_NUM_DATA_POINTERS];
2517     int invalidate_count = 0;
2518
2519     for (int i = 0; i < nb_buffers; i++) {
2520         ret = vkMapMemory(hwctx->act_dev, buf[i].mem, 0,
2521                           VK_WHOLE_SIZE, 0, (void **)&mem[i]);
2522         if (ret != VK_SUCCESS) {
2523             av_log(ctx, AV_LOG_ERROR, "Failed to map buffer memory: %s\n",
2524                    vk_ret2str(ret));
2525             return AVERROR_EXTERNAL;
2526         }
2527     }
2528
2529     if (!invalidate)
2530         return 0;
2531
2532     for (int i = 0; i < nb_buffers; i++) {
2533         const VkMappedMemoryRange ival_buf = {
2534             .sType  = VK_STRUCTURE_TYPE_MAPPED_MEMORY_RANGE,
2535             .memory = buf[i].mem,
2536             .size   = VK_WHOLE_SIZE,
2537         };
2538         if (buf[i].flags & VK_MEMORY_PROPERTY_HOST_COHERENT_BIT)
2539             continue;
2540         invalidate_ctx[invalidate_count++] = ival_buf;
2541     }
2542
2543     if (invalidate_count) {
2544         ret = vkInvalidateMappedMemoryRanges(hwctx->act_dev, invalidate_count,
2545                                              invalidate_ctx);
2546         if (ret != VK_SUCCESS)
2547             av_log(ctx, AV_LOG_WARNING, "Failed to invalidate memory: %s\n",
2548                    vk_ret2str(ret));
2549     }
2550
2551     return 0;
2552 }
2553
2554 static int unmap_buffers(AVHWDeviceContext *ctx, ImageBuffer *buf,
2555                          int nb_buffers, int flush)
2556 {
2557     int err = 0;
2558     VkResult ret;
2559     AVVulkanDeviceContext *hwctx = ctx->hwctx;
2560     VkMappedMemoryRange flush_ctx[AV_NUM_DATA_POINTERS];
2561     int flush_count = 0;
2562
2563     if (flush) {
2564         for (int i = 0; i < nb_buffers; i++) {
2565             const VkMappedMemoryRange flush_buf = {
2566                 .sType  = VK_STRUCTURE_TYPE_MAPPED_MEMORY_RANGE,
2567                 .memory = buf[i].mem,
2568                 .size   = VK_WHOLE_SIZE,
2569             };
2570             if (buf[i].flags & VK_MEMORY_PROPERTY_HOST_COHERENT_BIT)
2571                 continue;
2572             flush_ctx[flush_count++] = flush_buf;
2573         }
2574     }
2575
2576     if (flush_count) {
2577         ret = vkFlushMappedMemoryRanges(hwctx->act_dev, flush_count, flush_ctx);
2578         if (ret != VK_SUCCESS) {
2579             av_log(ctx, AV_LOG_ERROR, "Failed to flush memory: %s\n",
2580                     vk_ret2str(ret));
2581             err = AVERROR_EXTERNAL; /* We still want to try to unmap them */
2582         }
2583     }
2584
2585     for (int i = 0; i < nb_buffers; i++)
2586         vkUnmapMemory(hwctx->act_dev, buf[i].mem);
2587
2588     return err;
2589 }
2590
2591 static int transfer_image_buf(AVHWDeviceContext *ctx, AVVkFrame *frame,
2592                               ImageBuffer *buffer, const int *buf_stride, int w,
2593                               int h, enum AVPixelFormat pix_fmt, int to_buf)
2594 {
2595     VkResult ret;
2596     AVVulkanDeviceContext *hwctx = ctx->hwctx;
2597     VulkanDevicePriv *s = ctx->internal->priv;
2598
2599     int bar_num = 0;
2600     VkPipelineStageFlagBits sem_wait_dst[AV_NUM_DATA_POINTERS];
2601
2602     const int planes = av_pix_fmt_count_planes(pix_fmt);
2603     const AVPixFmtDescriptor *desc = av_pix_fmt_desc_get(pix_fmt);
2604
2605     VkCommandBufferBeginInfo cmd_start = {
2606         .sType = VK_STRUCTURE_TYPE_COMMAND_BUFFER_BEGIN_INFO,
2607         .flags = VK_COMMAND_BUFFER_USAGE_ONE_TIME_SUBMIT_BIT,
2608     };
2609
2610     VkImageMemoryBarrier img_bar[AV_NUM_DATA_POINTERS] = { 0 };
2611
2612     VkSubmitInfo s_info = {
2613         .sType                = VK_STRUCTURE_TYPE_SUBMIT_INFO,
2614         .commandBufferCount   = 1,
2615         .pCommandBuffers      = &s->cmd.buf,
2616         .pSignalSemaphores    = frame->sem,
2617         .pWaitSemaphores      = frame->sem,
2618         .pWaitDstStageMask    = sem_wait_dst,
2619         .signalSemaphoreCount = planes,
2620         .waitSemaphoreCount   = planes,
2621     };
2622
2623     ret = vkBeginCommandBuffer(s->cmd.buf, &cmd_start);
2624     if (ret != VK_SUCCESS) {
2625         av_log(ctx, AV_LOG_ERROR, "Unable to init command buffer: %s\n",
2626                vk_ret2str(ret));
2627         return AVERROR_EXTERNAL;
2628     }
2629
2630     /* Change the image layout to something more optimal for transfers */
2631     for (int i = 0; i < planes; i++) {
2632         VkImageLayout new_layout = to_buf ? VK_IMAGE_LAYOUT_TRANSFER_SRC_OPTIMAL :
2633                                             VK_IMAGE_LAYOUT_TRANSFER_DST_OPTIMAL;
2634         VkAccessFlags new_access = to_buf ? VK_ACCESS_TRANSFER_READ_BIT :
2635                                             VK_ACCESS_TRANSFER_WRITE_BIT;
2636
2637         sem_wait_dst[i] = VK_PIPELINE_STAGE_TOP_OF_PIPE_BIT;
2638
2639         /* If the layout matches and we have read access skip the barrier */
2640         if ((frame->layout[i] == new_layout) && (frame->access[i] & new_access))
2641             continue;
2642
2643         img_bar[bar_num].sType = VK_STRUCTURE_TYPE_IMAGE_MEMORY_BARRIER;
2644         img_bar[bar_num].srcAccessMask = 0x0;
2645         img_bar[bar_num].dstAccessMask = new_access;
2646         img_bar[bar_num].oldLayout = frame->layout[i];
2647         img_bar[bar_num].newLayout = new_layout;
2648         img_bar[bar_num].srcQueueFamilyIndex = VK_QUEUE_FAMILY_IGNORED;
2649         img_bar[bar_num].dstQueueFamilyIndex = VK_QUEUE_FAMILY_IGNORED;
2650         img_bar[bar_num].image = frame->img[i];
2651         img_bar[bar_num].subresourceRange.levelCount = 1;
2652         img_bar[bar_num].subresourceRange.layerCount = 1;
2653         img_bar[bar_num].subresourceRange.aspectMask = VK_IMAGE_ASPECT_COLOR_BIT;
2654
2655         frame->layout[i] = img_bar[bar_num].newLayout;
2656         frame->access[i] = img_bar[bar_num].dstAccessMask;
2657
2658         bar_num++;
2659     }
2660
2661     if (bar_num)
2662         vkCmdPipelineBarrier(s->cmd.buf, VK_PIPELINE_STAGE_TOP_OF_PIPE_BIT,
2663                              VK_PIPELINE_STAGE_TRANSFER_BIT, 0,
2664                              0, NULL, 0, NULL, bar_num, img_bar);
2665
2666     /* Schedule a copy for each plane */
2667     for (int i = 0; i < planes; i++) {
2668         const int p_w = i > 0 ? AV_CEIL_RSHIFT(w, desc->log2_chroma_w) : w;
2669         const int p_h = i > 0 ? AV_CEIL_RSHIFT(h, desc->log2_chroma_h) : h;
2670         VkBufferImageCopy buf_reg = {
2671             .bufferOffset = 0,
2672             /* Buffer stride isn't in bytes, it's in samples, the implementation
2673              * uses the image's VkFormat to know how many bytes per sample
2674              * the buffer has. So we have to convert by dividing. Stupid.
2675              * Won't work with YUVA or other planar formats with alpha. */
2676             .bufferRowLength = buf_stride[i] / desc->comp[i].step,
2677             .bufferImageHeight = p_h,
2678             .imageSubresource.layerCount = 1,
2679             .imageSubresource.aspectMask = VK_IMAGE_ASPECT_COLOR_BIT,
2680             .imageOffset = { 0, 0, 0, },
2681             .imageExtent = { p_w, p_h, 1, },
2682         };
2683
2684         if (to_buf)
2685             vkCmdCopyImageToBuffer(s->cmd.buf, frame->img[i], frame->layout[i],
2686                                    buffer[i].buf, 1, &buf_reg);
2687         else
2688             vkCmdCopyBufferToImage(s->cmd.buf, buffer[i].buf, frame->img[i],
2689                                    frame->layout[i], 1, &buf_reg);
2690     }
2691
2692     ret = vkEndCommandBuffer(s->cmd.buf);
2693     if (ret != VK_SUCCESS) {
2694         av_log(ctx, AV_LOG_ERROR, "Unable to finish command buffer: %s\n",
2695                vk_ret2str(ret));
2696         return AVERROR_EXTERNAL;
2697     }
2698
2699     /* Wait for the download/upload to finish if uploading, otherwise the
2700      * semaphore will take care of synchronization when uploading */
2701     ret = vkQueueSubmit(s->cmd.queue, 1, &s_info, s->cmd.fence);
2702     if (ret != VK_SUCCESS) {
2703         av_log(ctx, AV_LOG_ERROR, "Unable to submit command buffer: %s\n",
2704                vk_ret2str(ret));
2705         return AVERROR_EXTERNAL;
2706     } else {
2707         vkWaitForFences(hwctx->act_dev, 1, &s->cmd.fence, VK_TRUE, UINT64_MAX);
2708         vkResetFences(hwctx->act_dev, 1, &s->cmd.fence);
2709     }
2710
2711     return 0;
2712 }
2713
2714 /* Technically we can use VK_EXT_external_memory_host to upload and download,
2715  * however the alignment requirements make this unfeasible as both the pointer
2716  * and the size of each plane need to be aligned to the minimum alignment
2717  * requirement, which on all current implementations (anv, radv) is 4096.
2718  * If the requirement gets relaxed (unlikely) this can easily be implemented. */
2719 static int vulkan_transfer_data_from_mem(AVHWFramesContext *hwfc, AVFrame *dst,
2720                                          const AVFrame *src)
2721 {
2722     int err = 0;
2723     AVFrame tmp;
2724     AVVkFrame *f = (AVVkFrame *)dst->data[0];
2725     AVHWDeviceContext *dev_ctx = hwfc->device_ctx;
2726     ImageBuffer buf[AV_NUM_DATA_POINTERS] = { { 0 } };
2727     const int planes = av_pix_fmt_count_planes(src->format);
2728     int log2_chroma = av_pix_fmt_desc_get(src->format)->log2_chroma_h;
2729
2730     if ((src->format != AV_PIX_FMT_NONE && !av_vkfmt_from_pixfmt(src->format))) {
2731         av_log(hwfc, AV_LOG_ERROR, "Unsupported source pixel format!\n");
2732         return AVERROR(EINVAL);
2733     }
2734
2735     if (src->width > hwfc->width || src->height > hwfc->height)
2736         return AVERROR(EINVAL);
2737
2738     /* For linear, host visiable images */
2739     if (f->tiling == VK_IMAGE_TILING_LINEAR &&
2740         f->flags & VK_MEMORY_PROPERTY_HOST_VISIBLE_BIT) {
2741         AVFrame *map = av_frame_alloc();
2742         if (!map)
2743             return AVERROR(ENOMEM);
2744         map->format = src->format;
2745
2746         err = vulkan_map_frame_to_mem(hwfc, map, dst, AV_HWFRAME_MAP_WRITE);
2747         if (err)
2748             goto end;
2749
2750         err = av_frame_copy(map, src);
2751         av_frame_free(&map);
2752         goto end;
2753     }
2754
2755     /* Create buffers */
2756     for (int i = 0; i < planes; i++) {
2757         int h = src->height;
2758         int p_height = i > 0 ? AV_CEIL_RSHIFT(h, log2_chroma) : h;
2759
2760         tmp.linesize[i] = FFABS(src->linesize[i]);
2761         err = create_buf(dev_ctx, &buf[i], p_height,
2762                          &tmp.linesize[i], VK_BUFFER_USAGE_TRANSFER_SRC_BIT,
2763                          VK_MEMORY_PROPERTY_HOST_VISIBLE_BIT, NULL, NULL);
2764         if (err)
2765             goto end;
2766     }
2767
2768     /* Map, copy image to buffer, unmap */
2769     if ((err = map_buffers(dev_ctx, buf, tmp.data, planes, 0)))
2770         goto end;
2771
2772     av_image_copy(tmp.data, tmp.linesize, (const uint8_t **)src->data,
2773                   src->linesize, src->format, src->width, src->height);
2774
2775     if ((err = unmap_buffers(dev_ctx, buf, planes, 1)))
2776         goto end;
2777
2778     /* Copy buffers to image */
2779     err = transfer_image_buf(dev_ctx, f, buf, tmp.linesize,
2780                              src->width, src->height, src->format, 0);
2781
2782 end:
2783     for (int i = 0; i < planes; i++)
2784         free_buf(dev_ctx, &buf[i]);
2785
2786     return err;
2787 }
2788
2789 static int vulkan_transfer_data_to(AVHWFramesContext *hwfc, AVFrame *dst,
2790                                         const AVFrame *src)
2791 {
2792     av_unused VulkanDevicePriv *p = hwfc->device_ctx->internal->priv;
2793
2794     switch (src->format) {
2795 #if CONFIG_CUDA
2796     case AV_PIX_FMT_CUDA:
2797         if ((p->extensions & EXT_EXTERNAL_FD_MEMORY) &&
2798             (p->extensions & EXT_EXTERNAL_FD_SEM))
2799             return vulkan_transfer_data_from_cuda(hwfc, dst, src);
2800 #endif
2801     default:
2802         if (src->hw_frames_ctx)
2803             return AVERROR(ENOSYS);
2804         else
2805             return vulkan_transfer_data_from_mem(hwfc, dst, src);
2806     }
2807 }
2808
2809 #if CONFIG_CUDA
2810 static int vulkan_transfer_data_to_cuda(AVHWFramesContext *hwfc, AVFrame *dst,
2811                                       const AVFrame *src)
2812 {
2813     int err;
2814     VkResult ret;
2815     CUcontext dummy;
2816     AVVkFrame *dst_f;
2817     AVVkFrameInternal *dst_int;
2818     const int planes = av_pix_fmt_count_planes(hwfc->sw_format);
2819     const AVPixFmtDescriptor *desc = av_pix_fmt_desc_get(hwfc->sw_format);
2820
2821     AVHWFramesContext *cuda_fc = (AVHWFramesContext*)dst->hw_frames_ctx->data;
2822     AVHWDeviceContext *cuda_cu = cuda_fc->device_ctx;
2823     AVCUDADeviceContext *cuda_dev = cuda_cu->hwctx;
2824     AVCUDADeviceContextInternal *cu_internal = cuda_dev->internal;
2825     CudaFunctions *cu = cu_internal->cuda_dl;
2826
2827     ret = CHECK_CU(cu->cuCtxPushCurrent(cuda_dev->cuda_ctx));
2828     if (ret < 0) {
2829         err = AVERROR_EXTERNAL;
2830         goto fail;
2831     }
2832
2833     dst_f = (AVVkFrame *)src->data[0];
2834
2835     err = vulkan_export_to_cuda(hwfc, dst->hw_frames_ctx, src);
2836     if (err < 0) {
2837         goto fail;
2838     }
2839
2840     dst_int = dst_f->internal;
2841
2842     for (int i = 0; i < planes; i++) {
2843         CUDA_MEMCPY2D cpy = {
2844             .dstMemoryType = CU_MEMORYTYPE_DEVICE,
2845             .dstDevice     = (CUdeviceptr)dst->data[i],
2846             .dstPitch      = dst->linesize[i],
2847             .dstY          = 0,
2848
2849             .srcMemoryType = CU_MEMORYTYPE_ARRAY,
2850             .srcArray      = dst_int->cu_array[i],
2851             .WidthInBytes  = (i > 0 ? AV_CEIL_RSHIFT(hwfc->width, desc->log2_chroma_w)
2852                                     : hwfc->width) * desc->comp[i].step,
2853             .Height        = i > 0 ? AV_CEIL_RSHIFT(hwfc->height, desc->log2_chroma_h)
2854                                    : hwfc->height,
2855         };
2856
2857         ret = CHECK_CU(cu->cuMemcpy2DAsync(&cpy, cuda_dev->stream));
2858         if (ret < 0) {
2859             err = AVERROR_EXTERNAL;
2860             goto fail;
2861         }
2862     }
2863
2864     CHECK_CU(cu->cuCtxPopCurrent(&dummy));
2865
2866     av_log(hwfc, AV_LOG_VERBOSE, "Transfered Vulkan image to CUDA!\n");
2867
2868     return 0;
2869
2870 fail:
2871     CHECK_CU(cu->cuCtxPopCurrent(&dummy));
2872     vulkan_free_internal(dst_int);
2873     dst_f->internal = NULL;
2874     av_buffer_unref(&dst->buf[0]);
2875     return err;
2876 }
2877 #endif
2878
2879 static int vulkan_transfer_data_to_mem(AVHWFramesContext *hwfc, AVFrame *dst,
2880                                        const AVFrame *src)
2881 {
2882     int err = 0;
2883     AVFrame tmp;
2884     AVVkFrame *f = (AVVkFrame *)src->data[0];
2885     AVHWDeviceContext *dev_ctx = hwfc->device_ctx;
2886     ImageBuffer buf[AV_NUM_DATA_POINTERS] = { { 0 } };
2887     const int planes = av_pix_fmt_count_planes(dst->format);
2888     int log2_chroma = av_pix_fmt_desc_get(dst->format)->log2_chroma_h;
2889
2890     if (dst->width > hwfc->width || dst->height > hwfc->height)
2891         return AVERROR(EINVAL);
2892
2893     /* For linear, host visiable images */
2894     if (f->tiling == VK_IMAGE_TILING_LINEAR &&
2895         f->flags & VK_MEMORY_PROPERTY_HOST_VISIBLE_BIT) {
2896         AVFrame *map = av_frame_alloc();
2897         if (!map)
2898             return AVERROR(ENOMEM);
2899         map->format = dst->format;
2900
2901         err = vulkan_map_frame_to_mem(hwfc, map, src, AV_HWFRAME_MAP_READ);
2902         if (err)
2903             return err;
2904
2905         err = av_frame_copy(dst, map);
2906         av_frame_free(&map);
2907         return err;
2908     }
2909
2910     /* Create buffers */
2911     for (int i = 0; i < planes; i++) {
2912         int h = dst->height;
2913         int p_height = i > 0 ? AV_CEIL_RSHIFT(h, log2_chroma) : h;
2914
2915         tmp.linesize[i] = FFABS(dst->linesize[i]);
2916         err = create_buf(dev_ctx, &buf[i], p_height,
2917                          &tmp.linesize[i], VK_BUFFER_USAGE_TRANSFER_DST_BIT,
2918                          VK_MEMORY_PROPERTY_HOST_VISIBLE_BIT, NULL, NULL);
2919     }
2920
2921     /* Copy image to buffer */
2922     if ((err = transfer_image_buf(dev_ctx, f, buf, tmp.linesize,
2923                                   dst->width, dst->height, dst->format, 1)))
2924         goto end;
2925
2926     /* Map, copy buffer to frame, unmap */
2927     if ((err = map_buffers(dev_ctx, buf, tmp.data, planes, 1)))
2928         goto end;
2929
2930     av_image_copy(dst->data, dst->linesize, (const uint8_t **)tmp.data,
2931                   tmp.linesize, dst->format, dst->width, dst->height);
2932
2933     err = unmap_buffers(dev_ctx, buf, planes, 0);
2934
2935 end:
2936     for (int i = 0; i < planes; i++)
2937         free_buf(dev_ctx, &buf[i]);
2938
2939     return err;
2940 }
2941
2942 static int vulkan_transfer_data_from(AVHWFramesContext *hwfc, AVFrame *dst,
2943                                      const AVFrame *src)
2944 {
2945     av_unused VulkanDevicePriv *p = hwfc->device_ctx->internal->priv;
2946
2947     switch (dst->format) {
2948 #if CONFIG_CUDA
2949     case AV_PIX_FMT_CUDA:
2950         if ((p->extensions & EXT_EXTERNAL_FD_MEMORY) &&
2951             (p->extensions & EXT_EXTERNAL_FD_SEM))
2952             return vulkan_transfer_data_to_cuda(hwfc, dst, src);
2953 #endif
2954     default:
2955         if (dst->hw_frames_ctx)
2956             return AVERROR(ENOSYS);
2957         else
2958             return vulkan_transfer_data_to_mem(hwfc, dst, src);
2959     }
2960 }
2961
2962 AVVkFrame *av_vk_frame_alloc(void)
2963 {
2964     return av_mallocz(sizeof(AVVkFrame));
2965 }
2966
2967 const HWContextType ff_hwcontext_type_vulkan = {
2968     .type                   = AV_HWDEVICE_TYPE_VULKAN,
2969     .name                   = "Vulkan",
2970
2971     .device_hwctx_size      = sizeof(AVVulkanDeviceContext),
2972     .device_priv_size       = sizeof(VulkanDevicePriv),
2973     .frames_hwctx_size      = sizeof(AVVulkanFramesContext),
2974     .frames_priv_size       = sizeof(VulkanFramesPriv),
2975
2976     .device_init            = &vulkan_device_init,
2977     .device_create          = &vulkan_device_create,
2978     .device_derive          = &vulkan_device_derive,
2979
2980     .frames_get_constraints = &vulkan_frames_get_constraints,
2981     .frames_init            = vulkan_frames_init,
2982     .frames_get_buffer      = vulkan_get_buffer,
2983     .frames_uninit          = vulkan_frames_uninit,
2984
2985     .transfer_get_formats   = vulkan_transfer_get_formats,
2986     .transfer_data_to       = vulkan_transfer_data_to,
2987     .transfer_data_from     = vulkan_transfer_data_from,
2988
2989     .map_to                 = vulkan_map_to,
2990     .map_from               = vulkan_map_from,
2991
2992     .pix_fmts = (const enum AVPixelFormat []) {
2993         AV_PIX_FMT_VULKAN,
2994         AV_PIX_FMT_NONE
2995     },
2996 };