]> git.sesse.net Git - ffmpeg/blob - libavutil/hwcontext_vulkan.c
a8e61a15f6bda98de6e66141ec764a61df081d0b
[ffmpeg] / libavutil / hwcontext_vulkan.c
1 /*
2  * This file is part of FFmpeg.
3  *
4  * FFmpeg is free software; you can redistribute it and/or
5  * modify it under the terms of the GNU Lesser General Public
6  * License as published by the Free Software Foundation; either
7  * version 2.1 of the License, or (at your option) any later version.
8  *
9  * FFmpeg is distributed in the hope that it will be useful,
10  * but WITHOUT ANY WARRANTY; without even the implied warranty of
11  * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
12  * Lesser General Public License for more details.
13  *
14  * You should have received a copy of the GNU Lesser General Public
15  * License along with FFmpeg; if not, write to the Free Software
16  * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
17  */
18
19 #include "config.h"
20 #include "pixdesc.h"
21 #include "avstring.h"
22 #include "imgutils.h"
23 #include "hwcontext.h"
24 #include "hwcontext_internal.h"
25 #include "hwcontext_vulkan.h"
26
27 #if CONFIG_LIBDRM
28 #include <unistd.h>
29 #include <xf86drm.h>
30 #include <drm_fourcc.h>
31 #include "hwcontext_drm.h"
32 #if CONFIG_VAAPI
33 #include <va/va_drmcommon.h>
34 #include "hwcontext_vaapi.h"
35 #endif
36 #endif
37
38 #if CONFIG_CUDA
39 #include "hwcontext_cuda_internal.h"
40 #include "cuda_check.h"
41 #define CHECK_CU(x) FF_CUDA_CHECK_DL(cuda_cu, cu, x)
42 #endif
43
44 typedef struct VulkanQueueCtx {
45     VkFence fence;
46     VkQueue queue;
47     int was_synchronous;
48
49     /* Buffer dependencies */
50     AVBufferRef **buf_deps;
51     int nb_buf_deps;
52     int buf_deps_alloc_size;
53 } VulkanQueueCtx;
54
55 typedef struct VulkanExecCtx {
56     VkCommandPool pool;
57     VkCommandBuffer *bufs;
58     VulkanQueueCtx *queues;
59     int nb_queues;
60     int cur_queue_idx;
61 } VulkanExecCtx;
62
63 typedef struct VulkanDevicePriv {
64     /* Properties */
65     VkPhysicalDeviceProperties2 props;
66     VkPhysicalDeviceMemoryProperties mprops;
67     VkPhysicalDeviceExternalMemoryHostPropertiesEXT hprops;
68
69     /* Queues */
70     uint32_t qfs[3];
71     int num_qfs;
72
73     /* Debug callback */
74     VkDebugUtilsMessengerEXT debug_ctx;
75
76     /* Extensions */
77     uint64_t extensions;
78
79     /* Settings */
80     int use_linear_images;
81
82     /* Nvidia */
83     int dev_is_nvidia;
84 } VulkanDevicePriv;
85
86 typedef struct VulkanFramesPriv {
87     /* Image conversions */
88     VulkanExecCtx conv_ctx;
89
90     /* Image transfers */
91     VulkanExecCtx upload_ctx;
92     VulkanExecCtx download_ctx;
93 } VulkanFramesPriv;
94
95 typedef struct AVVkFrameInternal {
96 #if CONFIG_CUDA
97     /* Importing external memory into cuda is really expensive so we keep the
98      * memory imported all the time */
99     AVBufferRef *cuda_fc_ref; /* Need to keep it around for uninit */
100     CUexternalMemory ext_mem[AV_NUM_DATA_POINTERS];
101     CUmipmappedArray cu_mma[AV_NUM_DATA_POINTERS];
102     CUarray cu_array[AV_NUM_DATA_POINTERS];
103     CUexternalSemaphore cu_sem[AV_NUM_DATA_POINTERS];
104 #endif
105 } AVVkFrameInternal;
106
107 #define GET_QUEUE_COUNT(hwctx, graph, comp, tx) (                   \
108     graph ?  hwctx->nb_graphics_queues :                            \
109     comp  ? (hwctx->nb_comp_queues ?                                \
110              hwctx->nb_comp_queues : hwctx->nb_graphics_queues) :   \
111     tx    ? (hwctx->nb_tx_queues ? hwctx->nb_tx_queues :            \
112              (hwctx->nb_comp_queues ?                               \
113               hwctx->nb_comp_queues : hwctx->nb_graphics_queues)) : \
114     0                                                               \
115 )
116
117 #define VK_LOAD_PFN(inst, name) PFN_##name pfn_##name = (PFN_##name)           \
118                                               vkGetInstanceProcAddr(inst, #name)
119
120 #define DEFAULT_USAGE_FLAGS (VK_IMAGE_USAGE_SAMPLED_BIT      |                 \
121                              VK_IMAGE_USAGE_STORAGE_BIT      |                 \
122                              VK_IMAGE_USAGE_TRANSFER_SRC_BIT |                 \
123                              VK_IMAGE_USAGE_TRANSFER_DST_BIT)
124
125 #define ADD_VAL_TO_LIST(list, count, val)                                      \
126     do {                                                                       \
127         list = av_realloc_array(list, sizeof(*list), ++count);                 \
128         if (!list) {                                                           \
129             err = AVERROR(ENOMEM);                                             \
130             goto fail;                                                         \
131         }                                                                      \
132         list[count - 1] = av_strdup(val);                                      \
133         if (!list[count - 1]) {                                                \
134             err = AVERROR(ENOMEM);                                             \
135             goto fail;                                                         \
136         }                                                                      \
137     } while(0)
138
139 static const struct {
140     enum AVPixelFormat pixfmt;
141     const VkFormat vkfmts[4];
142 } vk_pixfmt_map[] = {
143     { AV_PIX_FMT_GRAY8,   { VK_FORMAT_R8_UNORM } },
144     { AV_PIX_FMT_GRAY16,  { VK_FORMAT_R16_UNORM } },
145     { AV_PIX_FMT_GRAYF32, { VK_FORMAT_R32_SFLOAT } },
146
147     { AV_PIX_FMT_NV12, { VK_FORMAT_R8_UNORM, VK_FORMAT_R8G8_UNORM } },
148     { AV_PIX_FMT_NV21, { VK_FORMAT_R8_UNORM, VK_FORMAT_R8G8_UNORM } },
149     { AV_PIX_FMT_P010, { VK_FORMAT_R16_UNORM, VK_FORMAT_R16G16_UNORM } },
150     { AV_PIX_FMT_P016, { VK_FORMAT_R16_UNORM, VK_FORMAT_R16G16_UNORM } },
151
152     { AV_PIX_FMT_NV16, { VK_FORMAT_R8_UNORM, VK_FORMAT_R8G8_UNORM } },
153
154     { AV_PIX_FMT_NV24, { VK_FORMAT_R8_UNORM, VK_FORMAT_R8G8_UNORM } },
155     { AV_PIX_FMT_NV42, { VK_FORMAT_R8_UNORM, VK_FORMAT_R8G8_UNORM } },
156
157     { AV_PIX_FMT_YUV420P,   {  VK_FORMAT_R8_UNORM,  VK_FORMAT_R8_UNORM,  VK_FORMAT_R8_UNORM } },
158     { AV_PIX_FMT_YUV420P10, { VK_FORMAT_R16_UNORM, VK_FORMAT_R16_UNORM, VK_FORMAT_R16_UNORM } },
159     { AV_PIX_FMT_YUV420P12, { VK_FORMAT_R16_UNORM, VK_FORMAT_R16_UNORM, VK_FORMAT_R16_UNORM } },
160     { AV_PIX_FMT_YUV420P16, { VK_FORMAT_R16_UNORM, VK_FORMAT_R16_UNORM, VK_FORMAT_R16_UNORM } },
161
162     { AV_PIX_FMT_YUV422P,   {  VK_FORMAT_R8_UNORM,  VK_FORMAT_R8_UNORM,  VK_FORMAT_R8_UNORM } },
163     { AV_PIX_FMT_YUV422P10, { VK_FORMAT_R16_UNORM, VK_FORMAT_R16_UNORM, VK_FORMAT_R16_UNORM } },
164     { AV_PIX_FMT_YUV422P12, { VK_FORMAT_R16_UNORM, VK_FORMAT_R16_UNORM, VK_FORMAT_R16_UNORM } },
165     { AV_PIX_FMT_YUV422P16, { VK_FORMAT_R16_UNORM, VK_FORMAT_R16_UNORM, VK_FORMAT_R16_UNORM } },
166
167     { AV_PIX_FMT_YUV444P,   {  VK_FORMAT_R8_UNORM,  VK_FORMAT_R8_UNORM,  VK_FORMAT_R8_UNORM } },
168     { AV_PIX_FMT_YUV444P10, { VK_FORMAT_R16_UNORM, VK_FORMAT_R16_UNORM, VK_FORMAT_R16_UNORM } },
169     { AV_PIX_FMT_YUV444P12, { VK_FORMAT_R16_UNORM, VK_FORMAT_R16_UNORM, VK_FORMAT_R16_UNORM } },
170     { AV_PIX_FMT_YUV444P16, { VK_FORMAT_R16_UNORM, VK_FORMAT_R16_UNORM, VK_FORMAT_R16_UNORM } },
171
172     { AV_PIX_FMT_YUVA420P,   {  VK_FORMAT_R8_UNORM,  VK_FORMAT_R8_UNORM,  VK_FORMAT_R8_UNORM,  VK_FORMAT_R8_UNORM } },
173     { AV_PIX_FMT_YUVA420P10, { VK_FORMAT_R16_UNORM, VK_FORMAT_R16_UNORM, VK_FORMAT_R16_UNORM, VK_FORMAT_R16_UNORM } },
174     /* There is no AV_PIX_FMT_YUVA420P12 */
175     { AV_PIX_FMT_YUVA420P16, { VK_FORMAT_R16_UNORM, VK_FORMAT_R16_UNORM, VK_FORMAT_R16_UNORM, VK_FORMAT_R16_UNORM } },
176
177     { AV_PIX_FMT_YUVA422P,   {  VK_FORMAT_R8_UNORM,  VK_FORMAT_R8_UNORM,  VK_FORMAT_R8_UNORM,  VK_FORMAT_R8_UNORM } },
178     { AV_PIX_FMT_YUVA422P10, { VK_FORMAT_R16_UNORM, VK_FORMAT_R16_UNORM, VK_FORMAT_R16_UNORM, VK_FORMAT_R16_UNORM } },
179     { AV_PIX_FMT_YUVA422P12, { VK_FORMAT_R16_UNORM, VK_FORMAT_R16_UNORM, VK_FORMAT_R16_UNORM, VK_FORMAT_R16_UNORM } },
180     { AV_PIX_FMT_YUVA422P16, { VK_FORMAT_R16_UNORM, VK_FORMAT_R16_UNORM, VK_FORMAT_R16_UNORM, VK_FORMAT_R16_UNORM } },
181
182     { AV_PIX_FMT_YUVA444P,   {  VK_FORMAT_R8_UNORM,  VK_FORMAT_R8_UNORM,  VK_FORMAT_R8_UNORM,  VK_FORMAT_R8_UNORM } },
183     { AV_PIX_FMT_YUVA444P10, { VK_FORMAT_R16_UNORM, VK_FORMAT_R16_UNORM, VK_FORMAT_R16_UNORM, VK_FORMAT_R16_UNORM } },
184     { AV_PIX_FMT_YUVA444P12, { VK_FORMAT_R16_UNORM, VK_FORMAT_R16_UNORM, VK_FORMAT_R16_UNORM, VK_FORMAT_R16_UNORM } },
185     { AV_PIX_FMT_YUVA444P16, { VK_FORMAT_R16_UNORM, VK_FORMAT_R16_UNORM, VK_FORMAT_R16_UNORM, VK_FORMAT_R16_UNORM } },
186
187     { AV_PIX_FMT_BGR32,  { VK_FORMAT_A8B8G8R8_UNORM_PACK32 } },
188     { AV_PIX_FMT_BGRA,   { VK_FORMAT_B8G8R8A8_UNORM } },
189     { AV_PIX_FMT_RGBA,   { VK_FORMAT_R8G8B8A8_UNORM } },
190     { AV_PIX_FMT_RGB24,  { VK_FORMAT_R8G8B8_UNORM } },
191     { AV_PIX_FMT_BGR24,  { VK_FORMAT_B8G8R8_UNORM } },
192     { AV_PIX_FMT_RGB48,  { VK_FORMAT_R16G16B16_UNORM } },
193     { AV_PIX_FMT_RGBA64, { VK_FORMAT_R16G16B16A16_UNORM } },
194     { AV_PIX_FMT_RGBA64, { VK_FORMAT_R16G16B16A16_UNORM } },
195     { AV_PIX_FMT_RGB565, { VK_FORMAT_R5G6B5_UNORM_PACK16 } },
196     { AV_PIX_FMT_BGR565, { VK_FORMAT_B5G6R5_UNORM_PACK16 } },
197     { AV_PIX_FMT_BGR0,   { VK_FORMAT_B8G8R8A8_UNORM } },
198     { AV_PIX_FMT_0BGR32, { VK_FORMAT_A8B8G8R8_UNORM_PACK32 } },
199     { AV_PIX_FMT_RGB0,   { VK_FORMAT_R8G8B8A8_UNORM } },
200
201     { AV_PIX_FMT_X2RGB10, { VK_FORMAT_A2R10G10B10_UNORM_PACK32 } },
202
203     { AV_PIX_FMT_GBRAP, { VK_FORMAT_R8_UNORM, VK_FORMAT_R8_UNORM, VK_FORMAT_R8_UNORM, VK_FORMAT_R8_UNORM } },
204     { AV_PIX_FMT_GBRAP16, { VK_FORMAT_R16_UNORM, VK_FORMAT_R16_UNORM, VK_FORMAT_R16_UNORM, VK_FORMAT_R16_UNORM } },
205     { AV_PIX_FMT_GBRPF32, { VK_FORMAT_R32_SFLOAT, VK_FORMAT_R32_SFLOAT, VK_FORMAT_R32_SFLOAT } },
206     { AV_PIX_FMT_GBRAPF32, { VK_FORMAT_R32_SFLOAT, VK_FORMAT_R32_SFLOAT, VK_FORMAT_R32_SFLOAT, VK_FORMAT_R32_SFLOAT } },
207 };
208
209 const VkFormat *av_vkfmt_from_pixfmt(enum AVPixelFormat p)
210 {
211     for (enum AVPixelFormat i = 0; i < FF_ARRAY_ELEMS(vk_pixfmt_map); i++)
212         if (vk_pixfmt_map[i].pixfmt == p)
213             return vk_pixfmt_map[i].vkfmts;
214     return NULL;
215 }
216
217 static int pixfmt_is_supported(AVVulkanDeviceContext *hwctx, enum AVPixelFormat p,
218                                int linear)
219 {
220     const VkFormat *fmt = av_vkfmt_from_pixfmt(p);
221     int planes = av_pix_fmt_count_planes(p);
222
223     if (!fmt)
224         return 0;
225
226     for (int i = 0; i < planes; i++) {
227         VkFormatFeatureFlags flags;
228         VkFormatProperties2 prop = {
229             .sType = VK_STRUCTURE_TYPE_FORMAT_PROPERTIES_2,
230         };
231         vkGetPhysicalDeviceFormatProperties2(hwctx->phys_dev, fmt[i], &prop);
232         flags = linear ? prop.formatProperties.linearTilingFeatures :
233                          prop.formatProperties.optimalTilingFeatures;
234         if (!(flags & DEFAULT_USAGE_FLAGS))
235             return 0;
236     }
237
238     return 1;
239 }
240
241 enum VulkanExtensions {
242     EXT_EXTERNAL_DMABUF_MEMORY = 1ULL <<  0, /* VK_EXT_external_memory_dma_buf */
243     EXT_DRM_MODIFIER_FLAGS     = 1ULL <<  1, /* VK_EXT_image_drm_format_modifier */
244     EXT_EXTERNAL_FD_MEMORY     = 1ULL <<  2, /* VK_KHR_external_memory_fd */
245     EXT_EXTERNAL_FD_SEM        = 1ULL <<  3, /* VK_KHR_external_semaphore_fd */
246     EXT_EXTERNAL_HOST_MEMORY   = 1ULL <<  4, /* VK_EXT_external_memory_host */
247
248     EXT_NO_FLAG                = 1ULL << 63,
249 };
250
251 typedef struct VulkanOptExtension {
252     const char *name;
253     uint64_t flag;
254 } VulkanOptExtension;
255
256 static const VulkanOptExtension optional_instance_exts[] = {
257     /* For future use */
258 };
259
260 static const VulkanOptExtension optional_device_exts[] = {
261     { VK_KHR_EXTERNAL_MEMORY_FD_EXTENSION_NAME,               EXT_EXTERNAL_FD_MEMORY,     },
262     { VK_EXT_EXTERNAL_MEMORY_DMA_BUF_EXTENSION_NAME,          EXT_EXTERNAL_DMABUF_MEMORY, },
263     { VK_EXT_IMAGE_DRM_FORMAT_MODIFIER_EXTENSION_NAME,        EXT_DRM_MODIFIER_FLAGS,     },
264     { VK_KHR_EXTERNAL_SEMAPHORE_FD_EXTENSION_NAME,            EXT_EXTERNAL_FD_SEM,        },
265     { VK_EXT_EXTERNAL_MEMORY_HOST_EXTENSION_NAME,             EXT_EXTERNAL_HOST_MEMORY,   },
266 };
267
268 /* Converts return values to strings */
269 static const char *vk_ret2str(VkResult res)
270 {
271 #define CASE(VAL) case VAL: return #VAL
272     switch (res) {
273     CASE(VK_SUCCESS);
274     CASE(VK_NOT_READY);
275     CASE(VK_TIMEOUT);
276     CASE(VK_EVENT_SET);
277     CASE(VK_EVENT_RESET);
278     CASE(VK_INCOMPLETE);
279     CASE(VK_ERROR_OUT_OF_HOST_MEMORY);
280     CASE(VK_ERROR_OUT_OF_DEVICE_MEMORY);
281     CASE(VK_ERROR_INITIALIZATION_FAILED);
282     CASE(VK_ERROR_DEVICE_LOST);
283     CASE(VK_ERROR_MEMORY_MAP_FAILED);
284     CASE(VK_ERROR_LAYER_NOT_PRESENT);
285     CASE(VK_ERROR_EXTENSION_NOT_PRESENT);
286     CASE(VK_ERROR_FEATURE_NOT_PRESENT);
287     CASE(VK_ERROR_INCOMPATIBLE_DRIVER);
288     CASE(VK_ERROR_TOO_MANY_OBJECTS);
289     CASE(VK_ERROR_FORMAT_NOT_SUPPORTED);
290     CASE(VK_ERROR_FRAGMENTED_POOL);
291     CASE(VK_ERROR_SURFACE_LOST_KHR);
292     CASE(VK_ERROR_NATIVE_WINDOW_IN_USE_KHR);
293     CASE(VK_SUBOPTIMAL_KHR);
294     CASE(VK_ERROR_OUT_OF_DATE_KHR);
295     CASE(VK_ERROR_INCOMPATIBLE_DISPLAY_KHR);
296     CASE(VK_ERROR_VALIDATION_FAILED_EXT);
297     CASE(VK_ERROR_INVALID_SHADER_NV);
298     CASE(VK_ERROR_OUT_OF_POOL_MEMORY);
299     CASE(VK_ERROR_INVALID_EXTERNAL_HANDLE);
300     CASE(VK_ERROR_NOT_PERMITTED_EXT);
301     CASE(VK_ERROR_INVALID_DRM_FORMAT_MODIFIER_PLANE_LAYOUT_EXT);
302     CASE(VK_ERROR_INVALID_DEVICE_ADDRESS_EXT);
303     CASE(VK_ERROR_FULL_SCREEN_EXCLUSIVE_MODE_LOST_EXT);
304     default: return "Unknown error";
305     }
306 #undef CASE
307 }
308
309 static VkBool32 vk_dbg_callback(VkDebugUtilsMessageSeverityFlagBitsEXT severity,
310                                 VkDebugUtilsMessageTypeFlagsEXT messageType,
311                                 const VkDebugUtilsMessengerCallbackDataEXT *data,
312                                 void *priv)
313 {
314     int l;
315     AVHWDeviceContext *ctx = priv;
316
317     switch (severity) {
318     case VK_DEBUG_UTILS_MESSAGE_SEVERITY_VERBOSE_BIT_EXT: l = AV_LOG_VERBOSE; break;
319     case VK_DEBUG_UTILS_MESSAGE_SEVERITY_INFO_BIT_EXT:    l = AV_LOG_INFO;    break;
320     case VK_DEBUG_UTILS_MESSAGE_SEVERITY_WARNING_BIT_EXT: l = AV_LOG_WARNING; break;
321     case VK_DEBUG_UTILS_MESSAGE_SEVERITY_ERROR_BIT_EXT:   l = AV_LOG_ERROR;   break;
322     default:                                              l = AV_LOG_DEBUG;   break;
323     }
324
325     av_log(ctx, l, "%s\n", data->pMessage);
326     for (int i = 0; i < data->cmdBufLabelCount; i++)
327         av_log(ctx, l, "\t%i: %s\n", i, data->pCmdBufLabels[i].pLabelName);
328
329     return 0;
330 }
331
332 static int check_extensions(AVHWDeviceContext *ctx, int dev, AVDictionary *opts,
333                             const char * const **dst, uint32_t *num, int debug)
334 {
335     const char *tstr;
336     const char **extension_names = NULL;
337     VulkanDevicePriv *p = ctx->internal->priv;
338     AVVulkanDeviceContext *hwctx = ctx->hwctx;
339     int err = 0, found, extensions_found = 0;
340
341     const char *mod;
342     int optional_exts_num;
343     uint32_t sup_ext_count;
344     char *user_exts_str = NULL;
345     AVDictionaryEntry *user_exts;
346     VkExtensionProperties *sup_ext;
347     const VulkanOptExtension *optional_exts;
348
349     if (!dev) {
350         mod = "instance";
351         optional_exts = optional_instance_exts;
352         optional_exts_num = FF_ARRAY_ELEMS(optional_instance_exts);
353         user_exts = av_dict_get(opts, "instance_extensions", NULL, 0);
354         if (user_exts) {
355             user_exts_str = av_strdup(user_exts->value);
356             if (!user_exts_str) {
357                 err = AVERROR(ENOMEM);
358                 goto fail;
359             }
360         }
361         vkEnumerateInstanceExtensionProperties(NULL, &sup_ext_count, NULL);
362         sup_ext = av_malloc_array(sup_ext_count, sizeof(VkExtensionProperties));
363         if (!sup_ext)
364             return AVERROR(ENOMEM);
365         vkEnumerateInstanceExtensionProperties(NULL, &sup_ext_count, sup_ext);
366     } else {
367         mod = "device";
368         optional_exts = optional_device_exts;
369         optional_exts_num = FF_ARRAY_ELEMS(optional_device_exts);
370         user_exts = av_dict_get(opts, "device_extensions", NULL, 0);
371         if (user_exts) {
372             user_exts_str = av_strdup(user_exts->value);
373             if (!user_exts_str) {
374                 err = AVERROR(ENOMEM);
375                 goto fail;
376             }
377         }
378         vkEnumerateDeviceExtensionProperties(hwctx->phys_dev, NULL,
379                                              &sup_ext_count, NULL);
380         sup_ext = av_malloc_array(sup_ext_count, sizeof(VkExtensionProperties));
381         if (!sup_ext)
382             return AVERROR(ENOMEM);
383         vkEnumerateDeviceExtensionProperties(hwctx->phys_dev, NULL,
384                                              &sup_ext_count, sup_ext);
385     }
386
387     for (int i = 0; i < optional_exts_num; i++) {
388         tstr = optional_exts[i].name;
389         found = 0;
390         for (int j = 0; j < sup_ext_count; j++) {
391             if (!strcmp(tstr, sup_ext[j].extensionName)) {
392                 found = 1;
393                 break;
394             }
395         }
396         if (!found)
397             continue;
398
399         av_log(ctx, AV_LOG_VERBOSE, "Using %s extension \"%s\"\n", mod, tstr);
400         p->extensions |= optional_exts[i].flag;
401         ADD_VAL_TO_LIST(extension_names, extensions_found, tstr);
402     }
403
404     if (debug && !dev) {
405         tstr = VK_EXT_DEBUG_UTILS_EXTENSION_NAME;
406         found = 0;
407         for (int j = 0; j < sup_ext_count; j++) {
408             if (!strcmp(tstr, sup_ext[j].extensionName)) {
409                 found = 1;
410                 break;
411             }
412         }
413         if (found) {
414             av_log(ctx, AV_LOG_VERBOSE, "Using %s extension \"%s\"\n", mod, tstr);
415             ADD_VAL_TO_LIST(extension_names, extensions_found, tstr);
416         } else {
417             av_log(ctx, AV_LOG_ERROR, "Debug extension \"%s\" not found!\n",
418                    tstr);
419             err = AVERROR(EINVAL);
420             goto fail;
421         }
422     }
423
424     if (user_exts_str) {
425         char *save, *token = av_strtok(user_exts_str, "+", &save);
426         while (token) {
427             found = 0;
428             for (int j = 0; j < sup_ext_count; j++) {
429                 if (!strcmp(token, sup_ext[j].extensionName)) {
430                     found = 1;
431                     break;
432                 }
433             }
434             if (found) {
435                 av_log(ctx, AV_LOG_VERBOSE, "Using %s extension \"%s\"\n", mod, token);
436                 ADD_VAL_TO_LIST(extension_names, extensions_found, token);
437             } else {
438                 av_log(ctx, AV_LOG_WARNING, "%s extension \"%s\" not found, excluding.\n",
439                        mod, token);
440             }
441             token = av_strtok(NULL, "+", &save);
442         }
443     }
444
445     *dst = extension_names;
446     *num = extensions_found;
447
448     av_free(user_exts_str);
449     av_free(sup_ext);
450     return 0;
451
452 fail:
453     if (extension_names)
454         for (int i = 0; i < extensions_found; i++)
455             av_free((void *)extension_names[i]);
456     av_free(extension_names);
457     av_free(user_exts_str);
458     av_free(sup_ext);
459     return err;
460 }
461
462 /* Creates a VkInstance */
463 static int create_instance(AVHWDeviceContext *ctx, AVDictionary *opts)
464 {
465     int err = 0;
466     VkResult ret;
467     VulkanDevicePriv *p = ctx->internal->priv;
468     AVVulkanDeviceContext *hwctx = ctx->hwctx;
469     AVDictionaryEntry *debug_opt = av_dict_get(opts, "debug", NULL, 0);
470     const int debug_mode = debug_opt && strtol(debug_opt->value, NULL, 10);
471     VkApplicationInfo application_info = {
472         .sType              = VK_STRUCTURE_TYPE_APPLICATION_INFO,
473         .pEngineName        = "libavutil",
474         .apiVersion         = VK_API_VERSION_1_1,
475         .engineVersion      = VK_MAKE_VERSION(LIBAVUTIL_VERSION_MAJOR,
476                                               LIBAVUTIL_VERSION_MINOR,
477                                               LIBAVUTIL_VERSION_MICRO),
478     };
479     VkInstanceCreateInfo inst_props = {
480         .sType            = VK_STRUCTURE_TYPE_INSTANCE_CREATE_INFO,
481         .pApplicationInfo = &application_info,
482     };
483
484     /* Check for present/missing extensions */
485     err = check_extensions(ctx, 0, opts, &inst_props.ppEnabledExtensionNames,
486                            &inst_props.enabledExtensionCount, debug_mode);
487     if (err < 0)
488         return err;
489
490     if (debug_mode) {
491         static const char *layers[] = { "VK_LAYER_KHRONOS_validation" };
492         inst_props.ppEnabledLayerNames = layers;
493         inst_props.enabledLayerCount = FF_ARRAY_ELEMS(layers);
494     }
495
496     /* Try to create the instance */
497     ret = vkCreateInstance(&inst_props, hwctx->alloc, &hwctx->inst);
498
499     /* Check for errors */
500     if (ret != VK_SUCCESS) {
501         av_log(ctx, AV_LOG_ERROR, "Instance creation failure: %s\n",
502                vk_ret2str(ret));
503         for (int i = 0; i < inst_props.enabledExtensionCount; i++)
504             av_free((void *)inst_props.ppEnabledExtensionNames[i]);
505         av_free((void *)inst_props.ppEnabledExtensionNames);
506         return AVERROR_EXTERNAL;
507     }
508
509     if (debug_mode) {
510         VkDebugUtilsMessengerCreateInfoEXT dbg = {
511             .sType = VK_STRUCTURE_TYPE_DEBUG_UTILS_MESSENGER_CREATE_INFO_EXT,
512             .messageSeverity = VK_DEBUG_UTILS_MESSAGE_SEVERITY_VERBOSE_BIT_EXT |
513                                VK_DEBUG_UTILS_MESSAGE_SEVERITY_INFO_BIT_EXT    |
514                                VK_DEBUG_UTILS_MESSAGE_SEVERITY_WARNING_BIT_EXT |
515                                VK_DEBUG_UTILS_MESSAGE_SEVERITY_ERROR_BIT_EXT,
516             .messageType = VK_DEBUG_UTILS_MESSAGE_TYPE_GENERAL_BIT_EXT    |
517                            VK_DEBUG_UTILS_MESSAGE_TYPE_VALIDATION_BIT_EXT |
518                            VK_DEBUG_UTILS_MESSAGE_TYPE_PERFORMANCE_BIT_EXT,
519             .pfnUserCallback = vk_dbg_callback,
520             .pUserData = ctx,
521         };
522         VK_LOAD_PFN(hwctx->inst, vkCreateDebugUtilsMessengerEXT);
523
524         pfn_vkCreateDebugUtilsMessengerEXT(hwctx->inst, &dbg,
525                                            hwctx->alloc, &p->debug_ctx);
526     }
527
528     hwctx->enabled_inst_extensions = inst_props.ppEnabledExtensionNames;
529     hwctx->nb_enabled_inst_extensions = inst_props.enabledExtensionCount;
530
531     return 0;
532 }
533
534 typedef struct VulkanDeviceSelection {
535     uint8_t uuid[VK_UUID_SIZE]; /* Will use this first unless !has_uuid */
536     int has_uuid;
537     const char *name; /* Will use this second unless NULL */
538     uint32_t pci_device; /* Will use this third unless 0x0 */
539     uint32_t vendor_id; /* Last resort to find something deterministic */
540     int index; /* Finally fall back to index */
541 } VulkanDeviceSelection;
542
543 static const char *vk_dev_type(enum VkPhysicalDeviceType type)
544 {
545     switch (type) {
546     case VK_PHYSICAL_DEVICE_TYPE_INTEGRATED_GPU: return "integrated";
547     case VK_PHYSICAL_DEVICE_TYPE_DISCRETE_GPU:   return "discrete";
548     case VK_PHYSICAL_DEVICE_TYPE_VIRTUAL_GPU:    return "virtual";
549     case VK_PHYSICAL_DEVICE_TYPE_CPU:            return "software";
550     default:                                     return "unknown";
551     }
552 }
553
554 /* Finds a device */
555 static int find_device(AVHWDeviceContext *ctx, VulkanDeviceSelection *select)
556 {
557     int err = 0, choice = -1;
558     uint32_t num;
559     VkResult ret;
560     VkPhysicalDevice *devices = NULL;
561     VkPhysicalDeviceIDProperties *idp = NULL;
562     VkPhysicalDeviceProperties2 *prop = NULL;
563     AVVulkanDeviceContext *hwctx = ctx->hwctx;
564
565     ret = vkEnumeratePhysicalDevices(hwctx->inst, &num, NULL);
566     if (ret != VK_SUCCESS || !num) {
567         av_log(ctx, AV_LOG_ERROR, "No devices found: %s!\n", vk_ret2str(ret));
568         return AVERROR(ENODEV);
569     }
570
571     devices = av_malloc_array(num, sizeof(VkPhysicalDevice));
572     if (!devices)
573         return AVERROR(ENOMEM);
574
575     ret = vkEnumeratePhysicalDevices(hwctx->inst, &num, devices);
576     if (ret != VK_SUCCESS) {
577         av_log(ctx, AV_LOG_ERROR, "Failed enumerating devices: %s\n",
578                vk_ret2str(ret));
579         err = AVERROR(ENODEV);
580         goto end;
581     }
582
583     prop = av_mallocz_array(num, sizeof(*prop));
584     if (!prop) {
585         err = AVERROR(ENOMEM);
586         goto end;
587     }
588
589     idp = av_mallocz_array(num, sizeof(*idp));
590     if (!idp) {
591         err = AVERROR(ENOMEM);
592         goto end;
593     }
594
595     av_log(ctx, AV_LOG_VERBOSE, "GPU listing:\n");
596     for (int i = 0; i < num; i++) {
597         idp[i].sType = VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_ID_PROPERTIES;
598         prop[i].sType = VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_PROPERTIES_2;
599         prop[i].pNext = &idp[i];
600
601         vkGetPhysicalDeviceProperties2(devices[i], &prop[i]);
602         av_log(ctx, AV_LOG_VERBOSE, "    %d: %s (%s) (0x%x)\n", i,
603                prop[i].properties.deviceName,
604                vk_dev_type(prop[i].properties.deviceType),
605                prop[i].properties.deviceID);
606     }
607
608     if (select->has_uuid) {
609         for (int i = 0; i < num; i++) {
610             if (!strncmp(idp[i].deviceUUID, select->uuid, VK_UUID_SIZE)) {
611                 choice = i;
612                 goto end;
613              }
614         }
615         av_log(ctx, AV_LOG_ERROR, "Unable to find device by given UUID!\n");
616         err = AVERROR(ENODEV);
617         goto end;
618     } else if (select->name) {
619         av_log(ctx, AV_LOG_VERBOSE, "Requested device: %s\n", select->name);
620         for (int i = 0; i < num; i++) {
621             if (strstr(prop[i].properties.deviceName, select->name)) {
622                 choice = i;
623                 goto end;
624              }
625         }
626         av_log(ctx, AV_LOG_ERROR, "Unable to find device \"%s\"!\n",
627                select->name);
628         err = AVERROR(ENODEV);
629         goto end;
630     } else if (select->pci_device) {
631         av_log(ctx, AV_LOG_VERBOSE, "Requested device: 0x%x\n", select->pci_device);
632         for (int i = 0; i < num; i++) {
633             if (select->pci_device == prop[i].properties.deviceID) {
634                 choice = i;
635                 goto end;
636             }
637         }
638         av_log(ctx, AV_LOG_ERROR, "Unable to find device with PCI ID 0x%x!\n",
639                select->pci_device);
640         err = AVERROR(EINVAL);
641         goto end;
642     } else if (select->vendor_id) {
643         av_log(ctx, AV_LOG_VERBOSE, "Requested vendor: 0x%x\n", select->vendor_id);
644         for (int i = 0; i < num; i++) {
645             if (select->vendor_id == prop[i].properties.vendorID) {
646                 choice = i;
647                 goto end;
648             }
649         }
650         av_log(ctx, AV_LOG_ERROR, "Unable to find device with Vendor ID 0x%x!\n",
651                select->vendor_id);
652         err = AVERROR(ENODEV);
653         goto end;
654     } else {
655         if (select->index < num) {
656             choice = select->index;
657             goto end;
658         }
659         av_log(ctx, AV_LOG_ERROR, "Unable to find device with index %i!\n",
660                select->index);
661         err = AVERROR(ENODEV);
662         goto end;
663     }
664
665 end:
666     if (choice > -1)
667         hwctx->phys_dev = devices[choice];
668
669     av_free(devices);
670     av_free(prop);
671     av_free(idp);
672
673     return err;
674 }
675
676 static int search_queue_families(AVHWDeviceContext *ctx, VkDeviceCreateInfo *cd)
677 {
678     uint32_t num;
679     float *weights;
680     VkQueueFamilyProperties *qs = NULL;
681     AVVulkanDeviceContext *hwctx = ctx->hwctx;
682     int graph_index = -1, comp_index = -1, tx_index = -1;
683     VkDeviceQueueCreateInfo *pc = (VkDeviceQueueCreateInfo *)cd->pQueueCreateInfos;
684
685     /* First get the number of queue families */
686     vkGetPhysicalDeviceQueueFamilyProperties(hwctx->phys_dev, &num, NULL);
687     if (!num) {
688         av_log(ctx, AV_LOG_ERROR, "Failed to get queues!\n");
689         return AVERROR_EXTERNAL;
690     }
691
692     /* Then allocate memory */
693     qs = av_malloc_array(num, sizeof(VkQueueFamilyProperties));
694     if (!qs)
695         return AVERROR(ENOMEM);
696
697     /* Finally retrieve the queue families */
698     vkGetPhysicalDeviceQueueFamilyProperties(hwctx->phys_dev, &num, qs);
699
700 #define SEARCH_FLAGS(expr, out)                                                \
701     for (int i = 0; i < num; i++) {                                            \
702         const VkQueueFlagBits flags = qs[i].queueFlags;                        \
703         if (expr) {                                                            \
704             out = i;                                                           \
705             break;                                                             \
706         }                                                                      \
707     }
708
709     SEARCH_FLAGS(flags & VK_QUEUE_GRAPHICS_BIT, graph_index)
710
711     SEARCH_FLAGS((flags &  VK_QUEUE_COMPUTE_BIT) && (i != graph_index),
712                  comp_index)
713
714     SEARCH_FLAGS((flags & VK_QUEUE_TRANSFER_BIT) && (i != graph_index) &&
715                  (i != comp_index), tx_index)
716
717 #undef SEARCH_FLAGS
718 #define ADD_QUEUE(fidx, graph, comp, tx)                                                 \
719     av_log(ctx, AV_LOG_VERBOSE, "Using queue family %i (total queues: %i) for %s%s%s\n", \
720            fidx, qs[fidx].queueCount, graph ? "graphics " : "",                          \
721            comp ? "compute " : "", tx ? "transfers " : "");                              \
722     av_log(ctx, AV_LOG_VERBOSE, "    QF %i flags: %s%s%s%s\n", fidx,                     \
723            ((qs[fidx].queueFlags) & VK_QUEUE_GRAPHICS_BIT) ? "(graphics) " : "",         \
724            ((qs[fidx].queueFlags) & VK_QUEUE_COMPUTE_BIT) ? "(compute) " : "",           \
725            ((qs[fidx].queueFlags) & VK_QUEUE_TRANSFER_BIT) ? "(transfers) " : "",        \
726            ((qs[fidx].queueFlags) & VK_QUEUE_SPARSE_BINDING_BIT) ? "(sparse) " : "");    \
727     pc[cd->queueCreateInfoCount].queueFamilyIndex = fidx;                                \
728     pc[cd->queueCreateInfoCount].queueCount = qs[fidx].queueCount;                       \
729     weights = av_malloc(qs[fidx].queueCount * sizeof(float));                            \
730     pc[cd->queueCreateInfoCount].pQueuePriorities = weights;                             \
731     if (!weights)                                                                        \
732         goto fail;                                                                       \
733     for (int i = 0; i < qs[fidx].queueCount; i++)                                        \
734         weights[i] = 1.0f;                                                               \
735     cd->queueCreateInfoCount++;
736
737     ADD_QUEUE(graph_index, 1, comp_index < 0, tx_index < 0 && comp_index < 0)
738     hwctx->queue_family_index      = graph_index;
739     hwctx->queue_family_comp_index = graph_index;
740     hwctx->queue_family_tx_index   = graph_index;
741     hwctx->nb_graphics_queues      = qs[graph_index].queueCount;
742
743     if (comp_index != -1) {
744         ADD_QUEUE(comp_index, 0, 1, tx_index < 0)
745         hwctx->queue_family_tx_index   = comp_index;
746         hwctx->queue_family_comp_index = comp_index;
747         hwctx->nb_comp_queues          = qs[comp_index].queueCount;
748     }
749
750     if (tx_index != -1) {
751         ADD_QUEUE(tx_index, 0, 0, 1)
752         hwctx->queue_family_tx_index = tx_index;
753         hwctx->nb_tx_queues          = qs[tx_index].queueCount;
754     }
755
756 #undef ADD_QUEUE
757     av_free(qs);
758
759     return 0;
760
761 fail:
762     av_freep(&pc[0].pQueuePriorities);
763     av_freep(&pc[1].pQueuePriorities);
764     av_freep(&pc[2].pQueuePriorities);
765     av_free(qs);
766
767     return AVERROR(ENOMEM);
768 }
769
770 static int create_exec_ctx(AVHWFramesContext *hwfc, VulkanExecCtx *cmd,
771                            int queue_family_index, int num_queues)
772 {
773     VkResult ret;
774     AVVulkanDeviceContext *hwctx = hwfc->device_ctx->hwctx;
775
776     VkCommandPoolCreateInfo cqueue_create = {
777         .sType              = VK_STRUCTURE_TYPE_COMMAND_POOL_CREATE_INFO,
778         .flags              = VK_COMMAND_POOL_CREATE_RESET_COMMAND_BUFFER_BIT,
779         .queueFamilyIndex   = queue_family_index,
780     };
781     VkCommandBufferAllocateInfo cbuf_create = {
782         .sType              = VK_STRUCTURE_TYPE_COMMAND_BUFFER_ALLOCATE_INFO,
783         .level              = VK_COMMAND_BUFFER_LEVEL_PRIMARY,
784         .commandBufferCount = num_queues,
785     };
786
787     cmd->nb_queues = num_queues;
788
789     /* Create command pool */
790     ret = vkCreateCommandPool(hwctx->act_dev, &cqueue_create,
791                               hwctx->alloc, &cmd->pool);
792     if (ret != VK_SUCCESS) {
793         av_log(hwfc, AV_LOG_ERROR, "Command pool creation failure: %s\n",
794                vk_ret2str(ret));
795         return AVERROR_EXTERNAL;
796     }
797
798     cmd->bufs = av_mallocz(num_queues * sizeof(*cmd->bufs));
799     if (!cmd->bufs)
800         return AVERROR(ENOMEM);
801
802     cbuf_create.commandPool = cmd->pool;
803
804     /* Allocate command buffer */
805     ret = vkAllocateCommandBuffers(hwctx->act_dev, &cbuf_create, cmd->bufs);
806     if (ret != VK_SUCCESS) {
807         av_log(hwfc, AV_LOG_ERROR, "Command buffer alloc failure: %s\n",
808                vk_ret2str(ret));
809         av_freep(&cmd->bufs);
810         return AVERROR_EXTERNAL;
811     }
812
813     cmd->queues = av_mallocz(num_queues * sizeof(*cmd->queues));
814     if (!cmd->queues)
815         return AVERROR(ENOMEM);
816
817     for (int i = 0; i < num_queues; i++) {
818         VulkanQueueCtx *q = &cmd->queues[i];
819         vkGetDeviceQueue(hwctx->act_dev, queue_family_index, i, &q->queue);
820         q->was_synchronous = 1;
821     }
822
823     return 0;
824 }
825
826 static void free_exec_ctx(AVHWFramesContext *hwfc, VulkanExecCtx *cmd)
827 {
828     AVVulkanDeviceContext *hwctx = hwfc->device_ctx->hwctx;
829
830     if (cmd->queues) {
831         for (int i = 0; i < cmd->nb_queues; i++) {
832             VulkanQueueCtx *q = &cmd->queues[i];
833
834             /* Make sure all queues have finished executing */
835             if (q->fence && !q->was_synchronous) {
836                 vkWaitForFences(hwctx->act_dev, 1, &q->fence, VK_TRUE, UINT64_MAX);
837                 vkResetFences(hwctx->act_dev, 1, &q->fence);
838             }
839
840             /* Free the fence */
841             if (q->fence)
842                 vkDestroyFence(hwctx->act_dev, q->fence, hwctx->alloc);
843
844             /* Free buffer dependencies */
845             for (int j = 0; j < q->nb_buf_deps; j++)
846                 av_buffer_unref(&q->buf_deps[j]);
847             av_free(q->buf_deps);
848         }
849     }
850
851     if (cmd->bufs)
852         vkFreeCommandBuffers(hwctx->act_dev, cmd->pool, cmd->nb_queues, cmd->bufs);
853     if (cmd->pool)
854         vkDestroyCommandPool(hwctx->act_dev, cmd->pool, hwctx->alloc);
855
856     av_freep(&cmd->queues);
857     av_freep(&cmd->bufs);
858     cmd->pool = NULL;
859 }
860
861 static VkCommandBuffer get_buf_exec_ctx(AVHWFramesContext *hwfc, VulkanExecCtx *cmd)
862 {
863     return cmd->bufs[cmd->cur_queue_idx];
864 }
865
866 static void unref_exec_ctx_deps(AVHWFramesContext *hwfc, VulkanExecCtx *cmd)
867 {
868     VulkanQueueCtx *q = &cmd->queues[cmd->cur_queue_idx];
869
870     for (int j = 0; j < q->nb_buf_deps; j++)
871         av_buffer_unref(&q->buf_deps[j]);
872     q->nb_buf_deps = 0;
873 }
874
875 static int wait_start_exec_ctx(AVHWFramesContext *hwfc, VulkanExecCtx *cmd)
876 {
877     VkResult ret;
878     AVVulkanDeviceContext *hwctx = hwfc->device_ctx->hwctx;
879     VulkanQueueCtx *q = &cmd->queues[cmd->cur_queue_idx];
880
881     VkCommandBufferBeginInfo cmd_start = {
882         .sType = VK_STRUCTURE_TYPE_COMMAND_BUFFER_BEGIN_INFO,
883         .flags = VK_COMMAND_BUFFER_USAGE_ONE_TIME_SUBMIT_BIT,
884     };
885
886     /* Create the fence and don't wait for it initially */
887     if (!q->fence) {
888         VkFenceCreateInfo fence_spawn = {
889             .sType = VK_STRUCTURE_TYPE_FENCE_CREATE_INFO,
890         };
891         ret = vkCreateFence(hwctx->act_dev, &fence_spawn, hwctx->alloc,
892                             &q->fence);
893         if (ret != VK_SUCCESS) {
894             av_log(hwfc, AV_LOG_ERROR, "Failed to queue frame fence: %s\n",
895                    vk_ret2str(ret));
896             return AVERROR_EXTERNAL;
897         }
898     } else if (!q->was_synchronous) {
899         vkWaitForFences(hwctx->act_dev, 1, &q->fence, VK_TRUE, UINT64_MAX);
900         vkResetFences(hwctx->act_dev, 1, &q->fence);
901     }
902
903     /* Discard queue dependencies */
904     unref_exec_ctx_deps(hwfc, cmd);
905
906     ret = vkBeginCommandBuffer(cmd->bufs[cmd->cur_queue_idx], &cmd_start);
907     if (ret != VK_SUCCESS) {
908         av_log(hwfc, AV_LOG_ERROR, "Unable to init command buffer: %s\n",
909                vk_ret2str(ret));
910         return AVERROR_EXTERNAL;
911     }
912
913     return 0;
914 }
915
916 static int add_buf_dep_exec_ctx(AVHWFramesContext *hwfc, VulkanExecCtx *cmd,
917                                 AVBufferRef * const *deps, int nb_deps)
918 {
919     AVBufferRef **dst;
920     VulkanQueueCtx *q = &cmd->queues[cmd->cur_queue_idx];
921
922     if (!deps || !nb_deps)
923         return 0;
924
925     dst = av_fast_realloc(q->buf_deps, &q->buf_deps_alloc_size,
926                           (q->nb_buf_deps + nb_deps) * sizeof(*dst));
927     if (!dst)
928         goto err;
929
930     q->buf_deps = dst;
931
932     for (int i = 0; i < nb_deps; i++) {
933         q->buf_deps[q->nb_buf_deps] = av_buffer_ref(deps[i]);
934         if (!q->buf_deps[q->nb_buf_deps])
935             goto err;
936         q->nb_buf_deps++;
937     }
938
939     return 0;
940
941 err:
942     unref_exec_ctx_deps(hwfc, cmd);
943     return AVERROR(ENOMEM);
944 }
945
946 static int submit_exec_ctx(AVHWFramesContext *hwfc, VulkanExecCtx *cmd,
947                            VkSubmitInfo *s_info, int synchronous)
948 {
949     VkResult ret;
950     VulkanQueueCtx *q = &cmd->queues[cmd->cur_queue_idx];
951
952     ret = vkEndCommandBuffer(cmd->bufs[cmd->cur_queue_idx]);
953     if (ret != VK_SUCCESS) {
954         av_log(hwfc, AV_LOG_ERROR, "Unable to finish command buffer: %s\n",
955                vk_ret2str(ret));
956         unref_exec_ctx_deps(hwfc, cmd);
957         return AVERROR_EXTERNAL;
958     }
959
960     s_info->pCommandBuffers = &cmd->bufs[cmd->cur_queue_idx];
961     s_info->commandBufferCount = 1;
962
963     ret = vkQueueSubmit(q->queue, 1, s_info, q->fence);
964     if (ret != VK_SUCCESS) {
965         unref_exec_ctx_deps(hwfc, cmd);
966         return AVERROR_EXTERNAL;
967     }
968
969     q->was_synchronous = synchronous;
970
971     if (synchronous) {
972         AVVulkanDeviceContext *hwctx = hwfc->device_ctx->hwctx;
973         vkWaitForFences(hwctx->act_dev, 1, &q->fence, VK_TRUE, UINT64_MAX);
974         vkResetFences(hwctx->act_dev, 1, &q->fence);
975         unref_exec_ctx_deps(hwfc, cmd);
976     } else { /* Rotate queues */
977         cmd->cur_queue_idx = (cmd->cur_queue_idx + 1) % cmd->nb_queues;
978     }
979
980     return 0;
981 }
982
983 static void vulkan_device_free(AVHWDeviceContext *ctx)
984 {
985     VulkanDevicePriv *p = ctx->internal->priv;
986     AVVulkanDeviceContext *hwctx = ctx->hwctx;
987
988     vkDestroyDevice(hwctx->act_dev, hwctx->alloc);
989
990     if (p->debug_ctx) {
991         VK_LOAD_PFN(hwctx->inst, vkDestroyDebugUtilsMessengerEXT);
992         pfn_vkDestroyDebugUtilsMessengerEXT(hwctx->inst, p->debug_ctx,
993                                             hwctx->alloc);
994     }
995
996     vkDestroyInstance(hwctx->inst, hwctx->alloc);
997
998     for (int i = 0; i < hwctx->nb_enabled_inst_extensions; i++)
999         av_free((void *)hwctx->enabled_inst_extensions[i]);
1000     av_free((void *)hwctx->enabled_inst_extensions);
1001
1002     for (int i = 0; i < hwctx->nb_enabled_dev_extensions; i++)
1003         av_free((void *)hwctx->enabled_dev_extensions[i]);
1004     av_free((void *)hwctx->enabled_dev_extensions);
1005 }
1006
1007 static int vulkan_device_create_internal(AVHWDeviceContext *ctx,
1008                                          VulkanDeviceSelection *dev_select,
1009                                          AVDictionary *opts, int flags)
1010 {
1011     int err = 0;
1012     VkResult ret;
1013     AVDictionaryEntry *opt_d;
1014     VulkanDevicePriv *p = ctx->internal->priv;
1015     AVVulkanDeviceContext *hwctx = ctx->hwctx;
1016     VkPhysicalDeviceFeatures dev_features = { 0 };
1017     VkDeviceQueueCreateInfo queue_create_info[3] = {
1018         { .sType = VK_STRUCTURE_TYPE_DEVICE_QUEUE_CREATE_INFO, },
1019         { .sType = VK_STRUCTURE_TYPE_DEVICE_QUEUE_CREATE_INFO, },
1020         { .sType = VK_STRUCTURE_TYPE_DEVICE_QUEUE_CREATE_INFO, },
1021     };
1022
1023     VkDeviceCreateInfo dev_info = {
1024         .sType                = VK_STRUCTURE_TYPE_DEVICE_CREATE_INFO,
1025         .pNext                = &hwctx->device_features,
1026         .pQueueCreateInfos    = queue_create_info,
1027         .queueCreateInfoCount = 0,
1028     };
1029
1030     hwctx->device_features.sType = VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_FEATURES_2;
1031     ctx->free = vulkan_device_free;
1032
1033     /* Create an instance if not given one */
1034     if ((err = create_instance(ctx, opts)))
1035         goto end;
1036
1037     /* Find a device (if not given one) */
1038     if ((err = find_device(ctx, dev_select)))
1039         goto end;
1040
1041     vkGetPhysicalDeviceFeatures(hwctx->phys_dev, &dev_features);
1042 #define COPY_FEATURE(DST, NAME) (DST).features.NAME = dev_features.NAME;
1043     COPY_FEATURE(hwctx->device_features, shaderImageGatherExtended)
1044     COPY_FEATURE(hwctx->device_features, fragmentStoresAndAtomics)
1045     COPY_FEATURE(hwctx->device_features, vertexPipelineStoresAndAtomics)
1046     COPY_FEATURE(hwctx->device_features, shaderInt64)
1047 #undef COPY_FEATURE
1048
1049     /* Search queue family */
1050     if ((err = search_queue_families(ctx, &dev_info)))
1051         goto end;
1052
1053     if ((err = check_extensions(ctx, 1, opts, &dev_info.ppEnabledExtensionNames,
1054                                 &dev_info.enabledExtensionCount, 0))) {
1055         av_free((void *)queue_create_info[0].pQueuePriorities);
1056         av_free((void *)queue_create_info[1].pQueuePriorities);
1057         av_free((void *)queue_create_info[2].pQueuePriorities);
1058         goto end;
1059     }
1060
1061     ret = vkCreateDevice(hwctx->phys_dev, &dev_info, hwctx->alloc,
1062                          &hwctx->act_dev);
1063
1064     av_free((void *)queue_create_info[0].pQueuePriorities);
1065     av_free((void *)queue_create_info[1].pQueuePriorities);
1066     av_free((void *)queue_create_info[2].pQueuePriorities);
1067
1068     if (ret != VK_SUCCESS) {
1069         av_log(ctx, AV_LOG_ERROR, "Device creation failure: %s\n",
1070                vk_ret2str(ret));
1071         for (int i = 0; i < dev_info.enabledExtensionCount; i++)
1072             av_free((void *)dev_info.ppEnabledExtensionNames[i]);
1073         av_free((void *)dev_info.ppEnabledExtensionNames);
1074         err = AVERROR_EXTERNAL;
1075         goto end;
1076     }
1077
1078     /* Tiled images setting, use them by default */
1079     opt_d = av_dict_get(opts, "linear_images", NULL, 0);
1080     if (opt_d)
1081         p->use_linear_images = strtol(opt_d->value, NULL, 10);
1082
1083     hwctx->enabled_dev_extensions = dev_info.ppEnabledExtensionNames;
1084     hwctx->nb_enabled_dev_extensions = dev_info.enabledExtensionCount;
1085
1086 end:
1087     return err;
1088 }
1089
1090 static int vulkan_device_init(AVHWDeviceContext *ctx)
1091 {
1092     uint32_t queue_num;
1093     AVVulkanDeviceContext *hwctx = ctx->hwctx;
1094     VulkanDevicePriv *p = ctx->internal->priv;
1095
1096     /* Set device extension flags */
1097     for (int i = 0; i < hwctx->nb_enabled_dev_extensions; i++) {
1098         for (int j = 0; j < FF_ARRAY_ELEMS(optional_device_exts); j++) {
1099             if (!strcmp(hwctx->enabled_dev_extensions[i],
1100                         optional_device_exts[j].name)) {
1101                 av_log(ctx, AV_LOG_VERBOSE, "Using device extension %s\n",
1102                        hwctx->enabled_dev_extensions[i]);
1103                 p->extensions |= optional_device_exts[j].flag;
1104                 break;
1105             }
1106         }
1107     }
1108
1109     p->props.sType = VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_PROPERTIES_2;
1110     p->props.pNext = &p->hprops;
1111     p->hprops.sType = VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_EXTERNAL_MEMORY_HOST_PROPERTIES_EXT;
1112
1113     vkGetPhysicalDeviceProperties2(hwctx->phys_dev, &p->props);
1114     av_log(ctx, AV_LOG_VERBOSE, "Using device: %s\n",
1115            p->props.properties.deviceName);
1116     av_log(ctx, AV_LOG_VERBOSE, "Alignments:\n");
1117     av_log(ctx, AV_LOG_VERBOSE, "    optimalBufferCopyRowPitchAlignment: %li\n",
1118            p->props.properties.limits.optimalBufferCopyRowPitchAlignment);
1119     av_log(ctx, AV_LOG_VERBOSE, "    minMemoryMapAlignment:              %li\n",
1120            p->props.properties.limits.minMemoryMapAlignment);
1121     if (p->extensions & EXT_EXTERNAL_HOST_MEMORY)
1122         av_log(ctx, AV_LOG_VERBOSE, "    minImportedHostPointerAlignment:    %li\n",
1123                p->hprops.minImportedHostPointerAlignment);
1124
1125     p->dev_is_nvidia = (p->props.properties.vendorID == 0x10de);
1126
1127     vkGetPhysicalDeviceQueueFamilyProperties(hwctx->phys_dev, &queue_num, NULL);
1128     if (!queue_num) {
1129         av_log(ctx, AV_LOG_ERROR, "Failed to get queues!\n");
1130         return AVERROR_EXTERNAL;
1131     }
1132
1133 #define CHECK_QUEUE(type, n)                                                         \
1134 if (n >= queue_num) {                                                                \
1135     av_log(ctx, AV_LOG_ERROR, "Invalid %s queue index %i (device has %i queues)!\n", \
1136            type, n, queue_num);                                                      \
1137     return AVERROR(EINVAL);                                                          \
1138 }
1139
1140     CHECK_QUEUE("graphics", hwctx->queue_family_index)
1141     CHECK_QUEUE("upload",   hwctx->queue_family_tx_index)
1142     CHECK_QUEUE("compute",  hwctx->queue_family_comp_index)
1143
1144 #undef CHECK_QUEUE
1145
1146     p->qfs[p->num_qfs++] = hwctx->queue_family_index;
1147     if ((hwctx->queue_family_tx_index != hwctx->queue_family_index) &&
1148         (hwctx->queue_family_tx_index != hwctx->queue_family_comp_index))
1149         p->qfs[p->num_qfs++] = hwctx->queue_family_tx_index;
1150     if ((hwctx->queue_family_comp_index != hwctx->queue_family_index) &&
1151         (hwctx->queue_family_comp_index != hwctx->queue_family_tx_index))
1152         p->qfs[p->num_qfs++] = hwctx->queue_family_comp_index;
1153
1154     /* Get device capabilities */
1155     vkGetPhysicalDeviceMemoryProperties(hwctx->phys_dev, &p->mprops);
1156
1157     return 0;
1158 }
1159
1160 static int vulkan_device_create(AVHWDeviceContext *ctx, const char *device,
1161                                 AVDictionary *opts, int flags)
1162 {
1163     VulkanDeviceSelection dev_select = { 0 };
1164     if (device && device[0]) {
1165         char *end = NULL;
1166         dev_select.index = strtol(device, &end, 10);
1167         if (end == device) {
1168             dev_select.index = 0;
1169             dev_select.name  = device;
1170         }
1171     }
1172
1173     return vulkan_device_create_internal(ctx, &dev_select, opts, flags);
1174 }
1175
1176 static int vulkan_device_derive(AVHWDeviceContext *ctx,
1177                                 AVHWDeviceContext *src_ctx,
1178                                 AVDictionary *opts, int flags)
1179 {
1180     av_unused VulkanDeviceSelection dev_select = { 0 };
1181
1182     /* If there's only one device on the system, then even if its not covered
1183      * by the following checks (e.g. non-PCIe ARM GPU), having an empty
1184      * dev_select will mean it'll get picked. */
1185     switch(src_ctx->type) {
1186 #if CONFIG_LIBDRM
1187 #if CONFIG_VAAPI
1188     case AV_HWDEVICE_TYPE_VAAPI: {
1189         AVVAAPIDeviceContext *src_hwctx = src_ctx->hwctx;
1190
1191         const char *vendor = vaQueryVendorString(src_hwctx->display);
1192         if (!vendor) {
1193             av_log(ctx, AV_LOG_ERROR, "Unable to get device info from VAAPI!\n");
1194             return AVERROR_EXTERNAL;
1195         }
1196
1197         if (strstr(vendor, "Intel"))
1198             dev_select.vendor_id = 0x8086;
1199         if (strstr(vendor, "AMD"))
1200             dev_select.vendor_id = 0x1002;
1201
1202         return vulkan_device_create_internal(ctx, &dev_select, opts, flags);
1203     }
1204 #endif
1205     case AV_HWDEVICE_TYPE_DRM: {
1206         AVDRMDeviceContext *src_hwctx = src_ctx->hwctx;
1207
1208         drmDevice *drm_dev_info;
1209         int err = drmGetDevice(src_hwctx->fd, &drm_dev_info);
1210         if (err) {
1211             av_log(ctx, AV_LOG_ERROR, "Unable to get device info from DRM fd!\n");
1212             return AVERROR_EXTERNAL;
1213         }
1214
1215         if (drm_dev_info->bustype == DRM_BUS_PCI)
1216             dev_select.pci_device = drm_dev_info->deviceinfo.pci->device_id;
1217
1218         drmFreeDevice(&drm_dev_info);
1219
1220         return vulkan_device_create_internal(ctx, &dev_select, opts, flags);
1221     }
1222 #endif
1223 #if CONFIG_CUDA
1224     case AV_HWDEVICE_TYPE_CUDA: {
1225         AVHWDeviceContext *cuda_cu = src_ctx;
1226         AVCUDADeviceContext *src_hwctx = src_ctx->hwctx;
1227         AVCUDADeviceContextInternal *cu_internal = src_hwctx->internal;
1228         CudaFunctions *cu = cu_internal->cuda_dl;
1229
1230         int ret = CHECK_CU(cu->cuDeviceGetUuid((CUuuid *)&dev_select.uuid,
1231                                                cu_internal->cuda_device));
1232         if (ret < 0) {
1233             av_log(ctx, AV_LOG_ERROR, "Unable to get UUID from CUDA!\n");
1234             return AVERROR_EXTERNAL;
1235         }
1236
1237         dev_select.has_uuid = 1;
1238
1239         return vulkan_device_create_internal(ctx, &dev_select, opts, flags);
1240     }
1241 #endif
1242     default:
1243         return AVERROR(ENOSYS);
1244     }
1245 }
1246
1247 static int vulkan_frames_get_constraints(AVHWDeviceContext *ctx,
1248                                          const void *hwconfig,
1249                                          AVHWFramesConstraints *constraints)
1250 {
1251     int count = 0;
1252     AVVulkanDeviceContext *hwctx = ctx->hwctx;
1253     VulkanDevicePriv *p = ctx->internal->priv;
1254
1255     for (enum AVPixelFormat i = 0; i < AV_PIX_FMT_NB; i++)
1256         count += pixfmt_is_supported(hwctx, i, p->use_linear_images);
1257
1258 #if CONFIG_CUDA
1259     if (p->dev_is_nvidia)
1260         count++;
1261 #endif
1262
1263     constraints->valid_sw_formats = av_malloc_array(count + 1,
1264                                                     sizeof(enum AVPixelFormat));
1265     if (!constraints->valid_sw_formats)
1266         return AVERROR(ENOMEM);
1267
1268     count = 0;
1269     for (enum AVPixelFormat i = 0; i < AV_PIX_FMT_NB; i++)
1270         if (pixfmt_is_supported(hwctx, i, p->use_linear_images))
1271             constraints->valid_sw_formats[count++] = i;
1272
1273 #if CONFIG_CUDA
1274     if (p->dev_is_nvidia)
1275         constraints->valid_sw_formats[count++] = AV_PIX_FMT_CUDA;
1276 #endif
1277     constraints->valid_sw_formats[count++] = AV_PIX_FMT_NONE;
1278
1279     constraints->min_width  = 0;
1280     constraints->min_height = 0;
1281     constraints->max_width  = p->props.properties.limits.maxImageDimension2D;
1282     constraints->max_height = p->props.properties.limits.maxImageDimension2D;
1283
1284     constraints->valid_hw_formats = av_malloc_array(2, sizeof(enum AVPixelFormat));
1285     if (!constraints->valid_hw_formats)
1286         return AVERROR(ENOMEM);
1287
1288     constraints->valid_hw_formats[0] = AV_PIX_FMT_VULKAN;
1289     constraints->valid_hw_formats[1] = AV_PIX_FMT_NONE;
1290
1291     return 0;
1292 }
1293
1294 static int alloc_mem(AVHWDeviceContext *ctx, VkMemoryRequirements *req,
1295                      VkMemoryPropertyFlagBits req_flags, const void *alloc_extension,
1296                      VkMemoryPropertyFlagBits *mem_flags, VkDeviceMemory *mem)
1297 {
1298     VkResult ret;
1299     int index = -1;
1300     VulkanDevicePriv *p = ctx->internal->priv;
1301     AVVulkanDeviceContext *dev_hwctx = ctx->hwctx;
1302     VkMemoryAllocateInfo alloc_info = {
1303         .sType          = VK_STRUCTURE_TYPE_MEMORY_ALLOCATE_INFO,
1304         .pNext          = alloc_extension,
1305         .allocationSize = req->size,
1306     };
1307
1308     /* The vulkan spec requires memory types to be sorted in the "optimal"
1309      * order, so the first matching type we find will be the best/fastest one */
1310     for (int i = 0; i < p->mprops.memoryTypeCount; i++) {
1311         const VkMemoryType *type = &p->mprops.memoryTypes[i];
1312
1313         /* The memory type must be supported by the requirements (bitfield) */
1314         if (!(req->memoryTypeBits & (1 << i)))
1315             continue;
1316
1317         /* The memory type flags must include our properties */
1318         if ((type->propertyFlags & req_flags) != req_flags)
1319             continue;
1320
1321         /* The memory type must be large enough */
1322         if (req->size > p->mprops.memoryHeaps[type->heapIndex].size)
1323             continue;
1324
1325         /* Found a suitable memory type */
1326         index = i;
1327         break;
1328     }
1329
1330     if (index < 0) {
1331         av_log(ctx, AV_LOG_ERROR, "No memory type found for flags 0x%x\n",
1332                req_flags);
1333         return AVERROR(EINVAL);
1334     }
1335
1336     alloc_info.memoryTypeIndex = index;
1337
1338     ret = vkAllocateMemory(dev_hwctx->act_dev, &alloc_info,
1339                            dev_hwctx->alloc, mem);
1340     if (ret != VK_SUCCESS) {
1341         av_log(ctx, AV_LOG_ERROR, "Failed to allocate memory: %s\n",
1342                vk_ret2str(ret));
1343         return AVERROR(ENOMEM);
1344     }
1345
1346     *mem_flags |= p->mprops.memoryTypes[index].propertyFlags;
1347
1348     return 0;
1349 }
1350
1351 static void vulkan_free_internal(AVVkFrameInternal *internal)
1352 {
1353     if (!internal)
1354         return;
1355
1356 #if CONFIG_CUDA
1357     if (internal->cuda_fc_ref) {
1358         AVHWFramesContext *cuda_fc = (AVHWFramesContext *)internal->cuda_fc_ref->data;
1359         int planes = av_pix_fmt_count_planes(cuda_fc->sw_format);
1360         AVHWDeviceContext *cuda_cu = cuda_fc->device_ctx;
1361         AVCUDADeviceContext *cuda_dev = cuda_cu->hwctx;
1362         AVCUDADeviceContextInternal *cu_internal = cuda_dev->internal;
1363         CudaFunctions *cu = cu_internal->cuda_dl;
1364
1365         for (int i = 0; i < planes; i++) {
1366             if (internal->cu_sem[i])
1367                 CHECK_CU(cu->cuDestroyExternalSemaphore(internal->cu_sem[i]));
1368             if (internal->cu_mma[i])
1369                 CHECK_CU(cu->cuMipmappedArrayDestroy(internal->cu_mma[i]));
1370             if (internal->ext_mem[i])
1371                 CHECK_CU(cu->cuDestroyExternalMemory(internal->ext_mem[i]));
1372         }
1373
1374         av_buffer_unref(&internal->cuda_fc_ref);
1375     }
1376 #endif
1377
1378     av_free(internal);
1379 }
1380
1381 static void vulkan_frame_free(void *opaque, uint8_t *data)
1382 {
1383     AVVkFrame *f = (AVVkFrame *)data;
1384     AVHWFramesContext *hwfc = opaque;
1385     AVVulkanDeviceContext *hwctx = hwfc->device_ctx->hwctx;
1386     int planes = av_pix_fmt_count_planes(hwfc->sw_format);
1387
1388     vulkan_free_internal(f->internal);
1389
1390     for (int i = 0; i < planes; i++) {
1391         vkDestroyImage(hwctx->act_dev, f->img[i], hwctx->alloc);
1392         vkFreeMemory(hwctx->act_dev, f->mem[i], hwctx->alloc);
1393         vkDestroySemaphore(hwctx->act_dev, f->sem[i], hwctx->alloc);
1394     }
1395
1396     av_free(f);
1397 }
1398
1399 static int alloc_bind_mem(AVHWFramesContext *hwfc, AVVkFrame *f,
1400                           void *alloc_pnext, size_t alloc_pnext_stride)
1401 {
1402     int err;
1403     VkResult ret;
1404     AVHWDeviceContext *ctx = hwfc->device_ctx;
1405     VulkanDevicePriv *p = ctx->internal->priv;
1406     const int planes = av_pix_fmt_count_planes(hwfc->sw_format);
1407     VkBindImageMemoryInfo bind_info[AV_NUM_DATA_POINTERS] = { { 0 } };
1408
1409     AVVulkanDeviceContext *hwctx = ctx->hwctx;
1410
1411     for (int i = 0; i < planes; i++) {
1412         int use_ded_mem;
1413         VkImageMemoryRequirementsInfo2 req_desc = {
1414             .sType = VK_STRUCTURE_TYPE_IMAGE_MEMORY_REQUIREMENTS_INFO_2,
1415             .image = f->img[i],
1416         };
1417         VkMemoryDedicatedAllocateInfo ded_alloc = {
1418             .sType = VK_STRUCTURE_TYPE_MEMORY_DEDICATED_ALLOCATE_INFO,
1419             .pNext = (void *)(((uint8_t *)alloc_pnext) + i*alloc_pnext_stride),
1420         };
1421         VkMemoryDedicatedRequirements ded_req = {
1422             .sType = VK_STRUCTURE_TYPE_MEMORY_DEDICATED_REQUIREMENTS,
1423         };
1424         VkMemoryRequirements2 req = {
1425             .sType = VK_STRUCTURE_TYPE_MEMORY_REQUIREMENTS_2,
1426             .pNext = &ded_req,
1427         };
1428
1429         vkGetImageMemoryRequirements2(hwctx->act_dev, &req_desc, &req);
1430
1431         if (f->tiling == VK_IMAGE_TILING_LINEAR)
1432             req.memoryRequirements.size = FFALIGN(req.memoryRequirements.size,
1433                                                   p->props.properties.limits.minMemoryMapAlignment);
1434
1435         /* In case the implementation prefers/requires dedicated allocation */
1436         use_ded_mem = ded_req.prefersDedicatedAllocation |
1437                       ded_req.requiresDedicatedAllocation;
1438         if (use_ded_mem)
1439             ded_alloc.image = f->img[i];
1440
1441         /* Allocate memory */
1442         if ((err = alloc_mem(ctx, &req.memoryRequirements,
1443                              f->tiling == VK_IMAGE_TILING_LINEAR ?
1444                              VK_MEMORY_PROPERTY_HOST_VISIBLE_BIT :
1445                              VK_MEMORY_PROPERTY_DEVICE_LOCAL_BIT,
1446                              use_ded_mem ? &ded_alloc : (void *)ded_alloc.pNext,
1447                              &f->flags, &f->mem[i])))
1448             return err;
1449
1450         f->size[i] = req.memoryRequirements.size;
1451         bind_info[i].sType  = VK_STRUCTURE_TYPE_BIND_IMAGE_MEMORY_INFO;
1452         bind_info[i].image  = f->img[i];
1453         bind_info[i].memory = f->mem[i];
1454     }
1455
1456     /* Bind the allocated memory to the images */
1457     ret = vkBindImageMemory2(hwctx->act_dev, planes, bind_info);
1458     if (ret != VK_SUCCESS) {
1459         av_log(ctx, AV_LOG_ERROR, "Failed to bind memory: %s\n",
1460                vk_ret2str(ret));
1461         return AVERROR_EXTERNAL;
1462     }
1463
1464     return 0;
1465 }
1466
1467 enum PrepMode {
1468     PREP_MODE_WRITE,
1469     PREP_MODE_RO_SHADER,
1470     PREP_MODE_EXTERNAL_EXPORT,
1471 };
1472
1473 static int prepare_frame(AVHWFramesContext *hwfc, VulkanExecCtx *ectx,
1474                          AVVkFrame *frame, enum PrepMode pmode)
1475 {
1476     int err;
1477     uint32_t dst_qf;
1478     VkImageLayout new_layout;
1479     VkAccessFlags new_access;
1480     const int planes = av_pix_fmt_count_planes(hwfc->sw_format);
1481
1482     VkImageMemoryBarrier img_bar[AV_NUM_DATA_POINTERS] = { 0 };
1483
1484     VkSubmitInfo s_info = {
1485         .sType                = VK_STRUCTURE_TYPE_SUBMIT_INFO,
1486         .pSignalSemaphores    = frame->sem,
1487         .signalSemaphoreCount = planes,
1488     };
1489
1490     VkPipelineStageFlagBits wait_st[AV_NUM_DATA_POINTERS];
1491     for (int i = 0; i < planes; i++)
1492         wait_st[i] = VK_PIPELINE_STAGE_TOP_OF_PIPE_BIT;
1493
1494     switch (pmode) {
1495     case PREP_MODE_WRITE:
1496         new_layout = VK_IMAGE_LAYOUT_TRANSFER_DST_OPTIMAL;
1497         new_access = VK_ACCESS_TRANSFER_WRITE_BIT;
1498         dst_qf     = VK_QUEUE_FAMILY_IGNORED;
1499         break;
1500     case PREP_MODE_RO_SHADER:
1501         new_layout = VK_IMAGE_LAYOUT_TRANSFER_SRC_OPTIMAL;
1502         new_access = VK_ACCESS_TRANSFER_READ_BIT;
1503         dst_qf     = VK_QUEUE_FAMILY_IGNORED;
1504         break;
1505     case PREP_MODE_EXTERNAL_EXPORT:
1506         new_layout = VK_IMAGE_LAYOUT_GENERAL;
1507         new_access = VK_ACCESS_MEMORY_READ_BIT | VK_ACCESS_MEMORY_WRITE_BIT;
1508         dst_qf     = VK_QUEUE_FAMILY_EXTERNAL_KHR;
1509         s_info.pWaitSemaphores = frame->sem;
1510         s_info.pWaitDstStageMask = wait_st;
1511         s_info.waitSemaphoreCount = planes;
1512         break;
1513     }
1514
1515     if ((err = wait_start_exec_ctx(hwfc, ectx)))
1516         return err;
1517
1518     /* Change the image layout to something more optimal for writes.
1519      * This also signals the newly created semaphore, making it usable
1520      * for synchronization */
1521     for (int i = 0; i < planes; i++) {
1522         img_bar[i].sType = VK_STRUCTURE_TYPE_IMAGE_MEMORY_BARRIER;
1523         img_bar[i].srcAccessMask = 0x0;
1524         img_bar[i].dstAccessMask = new_access;
1525         img_bar[i].oldLayout = frame->layout[i];
1526         img_bar[i].newLayout = new_layout;
1527         img_bar[i].srcQueueFamilyIndex = VK_QUEUE_FAMILY_IGNORED;
1528         img_bar[i].dstQueueFamilyIndex = dst_qf;
1529         img_bar[i].image = frame->img[i];
1530         img_bar[i].subresourceRange.levelCount = 1;
1531         img_bar[i].subresourceRange.layerCount = 1;
1532         img_bar[i].subresourceRange.aspectMask = VK_IMAGE_ASPECT_COLOR_BIT;
1533
1534         frame->layout[i] = img_bar[i].newLayout;
1535         frame->access[i] = img_bar[i].dstAccessMask;
1536     }
1537
1538     vkCmdPipelineBarrier(get_buf_exec_ctx(hwfc, ectx),
1539                          VK_PIPELINE_STAGE_TOP_OF_PIPE_BIT,
1540                          VK_PIPELINE_STAGE_TRANSFER_BIT,
1541                          0, 0, NULL, 0, NULL, planes, img_bar);
1542
1543     return submit_exec_ctx(hwfc, ectx, &s_info, 0);
1544 }
1545
1546 static inline void get_plane_wh(int *w, int *h, enum AVPixelFormat format,
1547                                 int frame_w, int frame_h, int plane)
1548 {
1549     const AVPixFmtDescriptor *desc = av_pix_fmt_desc_get(format);
1550
1551     /* Currently always true unless gray + alpha support is added */
1552     if (!plane || (plane == 3) || desc->flags & AV_PIX_FMT_FLAG_RGB ||
1553         !(desc->flags & AV_PIX_FMT_FLAG_PLANAR)) {
1554         *w = frame_w;
1555         *h = frame_h;
1556         return;
1557     }
1558
1559     *w = AV_CEIL_RSHIFT(frame_w, desc->log2_chroma_w);
1560     *h = AV_CEIL_RSHIFT(frame_h, desc->log2_chroma_h);
1561 }
1562
1563 static int create_frame(AVHWFramesContext *hwfc, AVVkFrame **frame,
1564                         VkImageTiling tiling, VkImageUsageFlagBits usage,
1565                         void *create_pnext)
1566 {
1567     int err;
1568     VkResult ret;
1569     AVHWDeviceContext *ctx = hwfc->device_ctx;
1570     VulkanDevicePriv *p = ctx->internal->priv;
1571     AVVulkanDeviceContext *hwctx = ctx->hwctx;
1572     enum AVPixelFormat format = hwfc->sw_format;
1573     const VkFormat *img_fmts = av_vkfmt_from_pixfmt(format);
1574     const int planes = av_pix_fmt_count_planes(format);
1575
1576     VkExportSemaphoreCreateInfo ext_sem_info = {
1577         .sType = VK_STRUCTURE_TYPE_EXPORT_SEMAPHORE_CREATE_INFO,
1578         .handleTypes = VK_EXTERNAL_SEMAPHORE_HANDLE_TYPE_OPAQUE_FD_BIT,
1579     };
1580
1581     VkSemaphoreCreateInfo sem_spawn = {
1582         .sType = VK_STRUCTURE_TYPE_SEMAPHORE_CREATE_INFO,
1583         .pNext = p->extensions & EXT_EXTERNAL_FD_SEM ? &ext_sem_info : NULL,
1584     };
1585
1586     AVVkFrame *f = av_vk_frame_alloc();
1587     if (!f) {
1588         av_log(ctx, AV_LOG_ERROR, "Unable to allocate memory for AVVkFrame!\n");
1589         return AVERROR(ENOMEM);
1590     }
1591
1592     /* Create the images */
1593     for (int i = 0; i < planes; i++) {
1594         VkImageCreateInfo create_info = {
1595             .sType                 = VK_STRUCTURE_TYPE_IMAGE_CREATE_INFO,
1596             .pNext                 = create_pnext,
1597             .imageType             = VK_IMAGE_TYPE_2D,
1598             .format                = img_fmts[i],
1599             .extent.depth          = 1,
1600             .mipLevels             = 1,
1601             .arrayLayers           = 1,
1602             .flags                 = VK_IMAGE_CREATE_ALIAS_BIT,
1603             .tiling                = tiling,
1604             .initialLayout         = VK_IMAGE_LAYOUT_UNDEFINED,
1605             .usage                 = usage,
1606             .samples               = VK_SAMPLE_COUNT_1_BIT,
1607             .pQueueFamilyIndices   = p->qfs,
1608             .queueFamilyIndexCount = p->num_qfs,
1609             .sharingMode           = p->num_qfs > 1 ? VK_SHARING_MODE_CONCURRENT :
1610                                                       VK_SHARING_MODE_EXCLUSIVE,
1611         };
1612
1613         get_plane_wh(&create_info.extent.width, &create_info.extent.height,
1614                      format, hwfc->width, hwfc->height, i);
1615
1616         ret = vkCreateImage(hwctx->act_dev, &create_info,
1617                             hwctx->alloc, &f->img[i]);
1618         if (ret != VK_SUCCESS) {
1619             av_log(ctx, AV_LOG_ERROR, "Image creation failure: %s\n",
1620                    vk_ret2str(ret));
1621             err = AVERROR(EINVAL);
1622             goto fail;
1623         }
1624
1625         /* Create semaphore */
1626         ret = vkCreateSemaphore(hwctx->act_dev, &sem_spawn,
1627                                 hwctx->alloc, &f->sem[i]);
1628         if (ret != VK_SUCCESS) {
1629             av_log(hwctx, AV_LOG_ERROR, "Failed to create semaphore: %s\n",
1630                    vk_ret2str(ret));
1631             return AVERROR_EXTERNAL;
1632         }
1633
1634         f->layout[i] = create_info.initialLayout;
1635         f->access[i] = 0x0;
1636     }
1637
1638     f->flags     = 0x0;
1639     f->tiling    = tiling;
1640
1641     *frame = f;
1642     return 0;
1643
1644 fail:
1645     vulkan_frame_free(hwfc, (uint8_t *)f);
1646     return err;
1647 }
1648
1649 /* Checks if an export flag is enabled, and if it is ORs it with *iexp */
1650 static void try_export_flags(AVHWFramesContext *hwfc,
1651                              VkExternalMemoryHandleTypeFlags *comp_handle_types,
1652                              VkExternalMemoryHandleTypeFlagBits *iexp,
1653                              VkExternalMemoryHandleTypeFlagBits exp)
1654 {
1655     VkResult ret;
1656     AVVulkanFramesContext *hwctx = hwfc->hwctx;
1657     AVVulkanDeviceContext *dev_hwctx = hwfc->device_ctx->hwctx;
1658     VkExternalImageFormatProperties eprops = {
1659         .sType = VK_STRUCTURE_TYPE_EXTERNAL_IMAGE_FORMAT_PROPERTIES_KHR,
1660     };
1661     VkImageFormatProperties2 props = {
1662         .sType = VK_STRUCTURE_TYPE_IMAGE_FORMAT_PROPERTIES_2,
1663         .pNext = &eprops,
1664     };
1665     VkPhysicalDeviceExternalImageFormatInfo enext = {
1666         .sType = VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_EXTERNAL_IMAGE_FORMAT_INFO,
1667         .handleType = exp,
1668     };
1669     VkPhysicalDeviceImageFormatInfo2 pinfo = {
1670         .sType  = VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_IMAGE_FORMAT_INFO_2,
1671         .pNext  = !exp ? NULL : &enext,
1672         .format = av_vkfmt_from_pixfmt(hwfc->sw_format)[0],
1673         .type   = VK_IMAGE_TYPE_2D,
1674         .tiling = hwctx->tiling,
1675         .usage  = hwctx->usage,
1676         .flags  = VK_IMAGE_CREATE_ALIAS_BIT,
1677     };
1678
1679     ret = vkGetPhysicalDeviceImageFormatProperties2(dev_hwctx->phys_dev,
1680                                                     &pinfo, &props);
1681     if (ret == VK_SUCCESS) {
1682         *iexp |= exp;
1683         *comp_handle_types |= eprops.externalMemoryProperties.compatibleHandleTypes;
1684     }
1685 }
1686
1687 static AVBufferRef *vulkan_pool_alloc(void *opaque, int size)
1688 {
1689     int err;
1690     AVVkFrame *f;
1691     AVBufferRef *avbuf = NULL;
1692     AVHWFramesContext *hwfc = opaque;
1693     AVVulkanFramesContext *hwctx = hwfc->hwctx;
1694     VulkanDevicePriv *p = hwfc->device_ctx->internal->priv;
1695     VulkanFramesPriv *fp = hwfc->internal->priv;
1696     VkExportMemoryAllocateInfo eminfo[AV_NUM_DATA_POINTERS];
1697     VkExternalMemoryHandleTypeFlags e = 0x0;
1698
1699     VkExternalMemoryImageCreateInfo eiinfo = {
1700         .sType       = VK_STRUCTURE_TYPE_EXTERNAL_MEMORY_IMAGE_CREATE_INFO,
1701         .pNext       = hwctx->create_pnext,
1702     };
1703
1704     if (p->extensions & EXT_EXTERNAL_FD_MEMORY)
1705         try_export_flags(hwfc, &eiinfo.handleTypes, &e,
1706                          VK_EXTERNAL_MEMORY_HANDLE_TYPE_OPAQUE_FD_BIT);
1707
1708     if (p->extensions & EXT_EXTERNAL_DMABUF_MEMORY)
1709         try_export_flags(hwfc, &eiinfo.handleTypes, &e,
1710                          VK_EXTERNAL_MEMORY_HANDLE_TYPE_DMA_BUF_BIT_EXT);
1711
1712     for (int i = 0; i < av_pix_fmt_count_planes(hwfc->sw_format); i++) {
1713         eminfo[i].sType       = VK_STRUCTURE_TYPE_EXPORT_MEMORY_ALLOCATE_INFO;
1714         eminfo[i].pNext       = hwctx->alloc_pnext[i];
1715         eminfo[i].handleTypes = e;
1716     }
1717
1718     err = create_frame(hwfc, &f, hwctx->tiling, hwctx->usage,
1719                        eiinfo.handleTypes ? &eiinfo : NULL);
1720     if (err)
1721         return NULL;
1722
1723     err = alloc_bind_mem(hwfc, f, eminfo, sizeof(*eminfo));
1724     if (err)
1725         goto fail;
1726
1727     err = prepare_frame(hwfc, &fp->conv_ctx, f, PREP_MODE_WRITE);
1728     if (err)
1729         goto fail;
1730
1731     avbuf = av_buffer_create((uint8_t *)f, sizeof(AVVkFrame),
1732                              vulkan_frame_free, hwfc, 0);
1733     if (!avbuf)
1734         goto fail;
1735
1736     return avbuf;
1737
1738 fail:
1739     vulkan_frame_free(hwfc, (uint8_t *)f);
1740     return NULL;
1741 }
1742
1743 static void vulkan_frames_uninit(AVHWFramesContext *hwfc)
1744 {
1745     VulkanFramesPriv *fp = hwfc->internal->priv;
1746
1747     free_exec_ctx(hwfc, &fp->conv_ctx);
1748     free_exec_ctx(hwfc, &fp->upload_ctx);
1749     free_exec_ctx(hwfc, &fp->download_ctx);
1750 }
1751
1752 static int vulkan_frames_init(AVHWFramesContext *hwfc)
1753 {
1754     int err;
1755     AVVkFrame *f;
1756     AVVulkanFramesContext *hwctx = hwfc->hwctx;
1757     VulkanFramesPriv *fp = hwfc->internal->priv;
1758     AVVulkanDeviceContext *dev_hwctx = hwfc->device_ctx->hwctx;
1759     VulkanDevicePriv *p = hwfc->device_ctx->internal->priv;
1760
1761     /* Default pool flags */
1762     hwctx->tiling = hwctx->tiling ? hwctx->tiling : p->use_linear_images ?
1763                     VK_IMAGE_TILING_LINEAR : VK_IMAGE_TILING_OPTIMAL;
1764
1765     if (!hwctx->usage)
1766         hwctx->usage = DEFAULT_USAGE_FLAGS;
1767
1768     err = create_exec_ctx(hwfc, &fp->conv_ctx,
1769                           dev_hwctx->queue_family_comp_index,
1770                           GET_QUEUE_COUNT(dev_hwctx, 0, 1, 0));
1771     if (err)
1772         return err;
1773
1774     err = create_exec_ctx(hwfc, &fp->upload_ctx,
1775                           dev_hwctx->queue_family_tx_index,
1776                           GET_QUEUE_COUNT(dev_hwctx, 0, 0, 1));
1777     if (err)
1778         return err;
1779
1780     err = create_exec_ctx(hwfc, &fp->download_ctx,
1781                           dev_hwctx->queue_family_tx_index, 1);
1782     if (err)
1783         return err;
1784
1785     /* Test to see if allocation will fail */
1786     err = create_frame(hwfc, &f, hwctx->tiling, hwctx->usage,
1787                        hwctx->create_pnext);
1788     if (err)
1789         return err;
1790
1791     vulkan_frame_free(hwfc, (uint8_t *)f);
1792
1793     /* If user did not specify a pool, hwfc->pool will be set to the internal one
1794      * in hwcontext.c just after this gets called */
1795     if (!hwfc->pool) {
1796         hwfc->internal->pool_internal = av_buffer_pool_init2(sizeof(AVVkFrame),
1797                                                              hwfc, vulkan_pool_alloc,
1798                                                              NULL);
1799         if (!hwfc->internal->pool_internal)
1800             return AVERROR(ENOMEM);
1801     }
1802
1803     return 0;
1804 }
1805
1806 static int vulkan_get_buffer(AVHWFramesContext *hwfc, AVFrame *frame)
1807 {
1808     frame->buf[0] = av_buffer_pool_get(hwfc->pool);
1809     if (!frame->buf[0])
1810         return AVERROR(ENOMEM);
1811
1812     frame->data[0] = frame->buf[0]->data;
1813     frame->format  = AV_PIX_FMT_VULKAN;
1814     frame->width   = hwfc->width;
1815     frame->height  = hwfc->height;
1816
1817     return 0;
1818 }
1819
1820 static int vulkan_transfer_get_formats(AVHWFramesContext *hwfc,
1821                                        enum AVHWFrameTransferDirection dir,
1822                                        enum AVPixelFormat **formats)
1823 {
1824     enum AVPixelFormat *fmts = av_malloc_array(2, sizeof(*fmts));
1825     if (!fmts)
1826         return AVERROR(ENOMEM);
1827
1828     fmts[0] = hwfc->sw_format;
1829     fmts[1] = AV_PIX_FMT_NONE;
1830
1831     *formats = fmts;
1832     return 0;
1833 }
1834
1835 typedef struct VulkanMapping {
1836     AVVkFrame *frame;
1837     int flags;
1838 } VulkanMapping;
1839
1840 static void vulkan_unmap_frame(AVHWFramesContext *hwfc, HWMapDescriptor *hwmap)
1841 {
1842     VulkanMapping *map = hwmap->priv;
1843     AVVulkanDeviceContext *hwctx = hwfc->device_ctx->hwctx;
1844     const int planes = av_pix_fmt_count_planes(hwfc->sw_format);
1845
1846     /* Check if buffer needs flushing */
1847     if ((map->flags & AV_HWFRAME_MAP_WRITE) &&
1848         !(map->frame->flags & VK_MEMORY_PROPERTY_HOST_COHERENT_BIT)) {
1849         VkResult ret;
1850         VkMappedMemoryRange flush_ranges[AV_NUM_DATA_POINTERS] = { { 0 } };
1851
1852         for (int i = 0; i < planes; i++) {
1853             flush_ranges[i].sType  = VK_STRUCTURE_TYPE_MAPPED_MEMORY_RANGE;
1854             flush_ranges[i].memory = map->frame->mem[i];
1855             flush_ranges[i].size   = VK_WHOLE_SIZE;
1856         }
1857
1858         ret = vkFlushMappedMemoryRanges(hwctx->act_dev, planes,
1859                                         flush_ranges);
1860         if (ret != VK_SUCCESS) {
1861             av_log(hwfc, AV_LOG_ERROR, "Failed to flush memory: %s\n",
1862                    vk_ret2str(ret));
1863         }
1864     }
1865
1866     for (int i = 0; i < planes; i++)
1867         vkUnmapMemory(hwctx->act_dev, map->frame->mem[i]);
1868
1869     av_free(map);
1870 }
1871
1872 static int vulkan_map_frame_to_mem(AVHWFramesContext *hwfc, AVFrame *dst,
1873                                    const AVFrame *src, int flags)
1874 {
1875     VkResult ret;
1876     int err, mapped_mem_count = 0;
1877     AVVkFrame *f = (AVVkFrame *)src->data[0];
1878     AVVulkanDeviceContext *hwctx = hwfc->device_ctx->hwctx;
1879     const int planes = av_pix_fmt_count_planes(hwfc->sw_format);
1880
1881     VulkanMapping *map = av_mallocz(sizeof(VulkanMapping));
1882     if (!map)
1883         return AVERROR(EINVAL);
1884
1885     if (src->format != AV_PIX_FMT_VULKAN) {
1886         av_log(hwfc, AV_LOG_ERROR, "Cannot map from pixel format %s!\n",
1887                av_get_pix_fmt_name(src->format));
1888         err = AVERROR(EINVAL);
1889         goto fail;
1890     }
1891
1892     if (!(f->flags & VK_MEMORY_PROPERTY_HOST_VISIBLE_BIT) ||
1893         !(f->tiling == VK_IMAGE_TILING_LINEAR)) {
1894         av_log(hwfc, AV_LOG_ERROR, "Unable to map frame, not host visible "
1895                "and linear!\n");
1896         err = AVERROR(EINVAL);
1897         goto fail;
1898     }
1899
1900     dst->width  = src->width;
1901     dst->height = src->height;
1902
1903     for (int i = 0; i < planes; i++) {
1904         ret = vkMapMemory(hwctx->act_dev, f->mem[i], 0,
1905                           VK_WHOLE_SIZE, 0, (void **)&dst->data[i]);
1906         if (ret != VK_SUCCESS) {
1907             av_log(hwfc, AV_LOG_ERROR, "Failed to map image memory: %s\n",
1908                 vk_ret2str(ret));
1909             err = AVERROR_EXTERNAL;
1910             goto fail;
1911         }
1912         mapped_mem_count++;
1913     }
1914
1915     /* Check if the memory contents matter */
1916     if (((flags & AV_HWFRAME_MAP_READ) || !(flags & AV_HWFRAME_MAP_OVERWRITE)) &&
1917         !(f->flags & VK_MEMORY_PROPERTY_HOST_COHERENT_BIT)) {
1918         VkMappedMemoryRange map_mem_ranges[AV_NUM_DATA_POINTERS] = { { 0 } };
1919         for (int i = 0; i < planes; i++) {
1920             map_mem_ranges[i].sType = VK_STRUCTURE_TYPE_MAPPED_MEMORY_RANGE;
1921             map_mem_ranges[i].size = VK_WHOLE_SIZE;
1922             map_mem_ranges[i].memory = f->mem[i];
1923         }
1924
1925         ret = vkInvalidateMappedMemoryRanges(hwctx->act_dev, planes,
1926                                              map_mem_ranges);
1927         if (ret != VK_SUCCESS) {
1928             av_log(hwfc, AV_LOG_ERROR, "Failed to invalidate memory: %s\n",
1929                    vk_ret2str(ret));
1930             err = AVERROR_EXTERNAL;
1931             goto fail;
1932         }
1933     }
1934
1935     for (int i = 0; i < planes; i++) {
1936         VkImageSubresource sub = {
1937             .aspectMask = VK_IMAGE_ASPECT_COLOR_BIT,
1938         };
1939         VkSubresourceLayout layout;
1940         vkGetImageSubresourceLayout(hwctx->act_dev, f->img[i], &sub, &layout);
1941         dst->linesize[i] = layout.rowPitch;
1942     }
1943
1944     map->frame = f;
1945     map->flags = flags;
1946
1947     err = ff_hwframe_map_create(src->hw_frames_ctx, dst, src,
1948                                 &vulkan_unmap_frame, map);
1949     if (err < 0)
1950         goto fail;
1951
1952     return 0;
1953
1954 fail:
1955     for (int i = 0; i < mapped_mem_count; i++)
1956         vkUnmapMemory(hwctx->act_dev, f->mem[i]);
1957
1958     av_free(map);
1959     return err;
1960 }
1961
1962 #if CONFIG_LIBDRM
1963 static void vulkan_unmap_from(AVHWFramesContext *hwfc, HWMapDescriptor *hwmap)
1964 {
1965     VulkanMapping *map = hwmap->priv;
1966     AVVulkanDeviceContext *hwctx = hwfc->device_ctx->hwctx;
1967     const int planes = av_pix_fmt_count_planes(hwfc->sw_format);
1968
1969     for (int i = 0; i < planes; i++) {
1970         vkDestroyImage(hwctx->act_dev, map->frame->img[i], hwctx->alloc);
1971         vkFreeMemory(hwctx->act_dev, map->frame->mem[i], hwctx->alloc);
1972         vkDestroySemaphore(hwctx->act_dev, map->frame->sem[i], hwctx->alloc);
1973     }
1974
1975     av_freep(&map->frame);
1976 }
1977
1978 static const struct {
1979     uint32_t drm_fourcc;
1980     VkFormat vk_format;
1981 } vulkan_drm_format_map[] = {
1982     { DRM_FORMAT_R8,       VK_FORMAT_R8_UNORM       },
1983     { DRM_FORMAT_R16,      VK_FORMAT_R16_UNORM      },
1984     { DRM_FORMAT_GR88,     VK_FORMAT_R8G8_UNORM     },
1985     { DRM_FORMAT_RG88,     VK_FORMAT_R8G8_UNORM     },
1986     { DRM_FORMAT_GR1616,   VK_FORMAT_R16G16_UNORM   },
1987     { DRM_FORMAT_RG1616,   VK_FORMAT_R16G16_UNORM   },
1988     { DRM_FORMAT_ARGB8888, VK_FORMAT_B8G8R8A8_UNORM },
1989     { DRM_FORMAT_XRGB8888, VK_FORMAT_B8G8R8A8_UNORM },
1990     { DRM_FORMAT_ABGR8888, VK_FORMAT_R8G8B8A8_UNORM },
1991     { DRM_FORMAT_XBGR8888, VK_FORMAT_R8G8B8A8_UNORM },
1992 };
1993
1994 static inline VkFormat drm_to_vulkan_fmt(uint32_t drm_fourcc)
1995 {
1996     for (int i = 0; i < FF_ARRAY_ELEMS(vulkan_drm_format_map); i++)
1997         if (vulkan_drm_format_map[i].drm_fourcc == drm_fourcc)
1998             return vulkan_drm_format_map[i].vk_format;
1999     return VK_FORMAT_UNDEFINED;
2000 }
2001
2002 static int vulkan_map_from_drm_frame_desc(AVHWFramesContext *hwfc, AVVkFrame **frame,
2003                                           AVDRMFrameDescriptor *desc)
2004 {
2005     int err = 0;
2006     VkResult ret;
2007     AVVkFrame *f;
2008     int bind_counts = 0;
2009     AVHWDeviceContext *ctx = hwfc->device_ctx;
2010     AVVulkanDeviceContext *hwctx = ctx->hwctx;
2011     VulkanDevicePriv *p = ctx->internal->priv;
2012     VulkanFramesPriv *fp = hwfc->internal->priv;
2013     AVVulkanFramesContext *frames_hwctx = hwfc->hwctx;
2014     const int has_modifiers = !!(p->extensions & EXT_DRM_MODIFIER_FLAGS);
2015     VkSubresourceLayout plane_data[AV_NUM_DATA_POINTERS] = { 0 };
2016     VkBindImageMemoryInfo bind_info[AV_NUM_DATA_POINTERS] = { 0 };
2017     VkBindImagePlaneMemoryInfo plane_info[AV_NUM_DATA_POINTERS] = { 0 };
2018     VkExternalMemoryHandleTypeFlagBits htype = VK_EXTERNAL_MEMORY_HANDLE_TYPE_DMA_BUF_BIT_EXT;
2019
2020     VK_LOAD_PFN(hwctx->inst, vkGetMemoryFdPropertiesKHR);
2021
2022     for (int i = 0; i < desc->nb_layers; i++) {
2023         if (drm_to_vulkan_fmt(desc->layers[i].format) == VK_FORMAT_UNDEFINED) {
2024             av_log(ctx, AV_LOG_ERROR, "Unsupported DMABUF layer format %#08x!\n",
2025                    desc->layers[i].format);
2026             return AVERROR(EINVAL);
2027         }
2028     }
2029
2030     if (!(f = av_vk_frame_alloc())) {
2031         av_log(ctx, AV_LOG_ERROR, "Unable to allocate memory for AVVkFrame!\n");
2032         err = AVERROR(ENOMEM);
2033         goto fail;
2034     }
2035
2036     f->tiling = has_modifiers ? VK_IMAGE_TILING_DRM_FORMAT_MODIFIER_EXT :
2037                 desc->objects[0].format_modifier == DRM_FORMAT_MOD_LINEAR ?
2038                 VK_IMAGE_TILING_LINEAR : VK_IMAGE_TILING_OPTIMAL;
2039
2040     for (int i = 0; i < desc->nb_layers; i++) {
2041         const int planes = desc->layers[i].nb_planes;
2042         VkImageDrmFormatModifierExplicitCreateInfoEXT drm_info = {
2043             .sType = VK_STRUCTURE_TYPE_IMAGE_DRM_FORMAT_MODIFIER_EXPLICIT_CREATE_INFO_EXT,
2044             .drmFormatModifier = desc->objects[0].format_modifier,
2045             .drmFormatModifierPlaneCount = planes,
2046             .pPlaneLayouts = (const VkSubresourceLayout *)&plane_data,
2047         };
2048
2049         VkExternalMemoryImageCreateInfo einfo = {
2050             .sType       = VK_STRUCTURE_TYPE_EXTERNAL_MEMORY_IMAGE_CREATE_INFO,
2051             .pNext       = has_modifiers ? &drm_info : NULL,
2052             .handleTypes = htype,
2053         };
2054
2055         VkSemaphoreCreateInfo sem_spawn = {
2056             .sType = VK_STRUCTURE_TYPE_SEMAPHORE_CREATE_INFO,
2057         };
2058
2059         VkImageCreateInfo create_info = {
2060             .sType                 = VK_STRUCTURE_TYPE_IMAGE_CREATE_INFO,
2061             .pNext                 = &einfo,
2062             .imageType             = VK_IMAGE_TYPE_2D,
2063             .format                = drm_to_vulkan_fmt(desc->layers[i].format),
2064             .extent.depth          = 1,
2065             .mipLevels             = 1,
2066             .arrayLayers           = 1,
2067             .flags                 = VK_IMAGE_CREATE_ALIAS_BIT,
2068             .tiling                = f->tiling,
2069             .initialLayout         = VK_IMAGE_LAYOUT_UNDEFINED, /* specs say so */
2070             .usage                 = frames_hwctx->usage,
2071             .samples               = VK_SAMPLE_COUNT_1_BIT,
2072             .pQueueFamilyIndices   = p->qfs,
2073             .queueFamilyIndexCount = p->num_qfs,
2074             .sharingMode           = p->num_qfs > 1 ? VK_SHARING_MODE_CONCURRENT :
2075                                                       VK_SHARING_MODE_EXCLUSIVE,
2076         };
2077
2078         get_plane_wh(&create_info.extent.width, &create_info.extent.height,
2079                      hwfc->sw_format, hwfc->width, hwfc->height, i);
2080
2081         for (int j = 0; j < planes; j++) {
2082             plane_data[j].offset     = desc->layers[i].planes[j].offset;
2083             plane_data[j].rowPitch   = desc->layers[i].planes[j].pitch;
2084             plane_data[j].size       = 0; /* The specs say so for all 3 */
2085             plane_data[j].arrayPitch = 0;
2086             plane_data[j].depthPitch = 0;
2087         }
2088
2089         /* Create image */
2090         ret = vkCreateImage(hwctx->act_dev, &create_info,
2091                             hwctx->alloc, &f->img[i]);
2092         if (ret != VK_SUCCESS) {
2093             av_log(ctx, AV_LOG_ERROR, "Image creation failure: %s\n",
2094                    vk_ret2str(ret));
2095             err = AVERROR(EINVAL);
2096             goto fail;
2097         }
2098
2099         ret = vkCreateSemaphore(hwctx->act_dev, &sem_spawn,
2100                                 hwctx->alloc, &f->sem[i]);
2101         if (ret != VK_SUCCESS) {
2102             av_log(hwctx, AV_LOG_ERROR, "Failed to create semaphore: %s\n",
2103                    vk_ret2str(ret));
2104             return AVERROR_EXTERNAL;
2105         }
2106
2107         /* We'd import a semaphore onto the one we created using
2108          * vkImportSemaphoreFdKHR but unfortunately neither DRM nor VAAPI
2109          * offer us anything we could import and sync with, so instead
2110          * just signal the semaphore we created. */
2111
2112         f->layout[i] = create_info.initialLayout;
2113         f->access[i] = 0x0;
2114     }
2115
2116     for (int i = 0; i < desc->nb_objects; i++) {
2117         int use_ded_mem = 0;
2118         VkMemoryFdPropertiesKHR fdmp = {
2119             .sType = VK_STRUCTURE_TYPE_MEMORY_FD_PROPERTIES_KHR,
2120         };
2121         VkMemoryRequirements req = {
2122             .size = desc->objects[i].size,
2123         };
2124         VkImportMemoryFdInfoKHR idesc = {
2125             .sType      = VK_STRUCTURE_TYPE_IMPORT_MEMORY_FD_INFO_KHR,
2126             .handleType = htype,
2127             .fd         = dup(desc->objects[i].fd),
2128         };
2129         VkMemoryDedicatedAllocateInfo ded_alloc = {
2130             .sType = VK_STRUCTURE_TYPE_MEMORY_DEDICATED_ALLOCATE_INFO,
2131             .pNext = &idesc,
2132         };
2133
2134         ret = pfn_vkGetMemoryFdPropertiesKHR(hwctx->act_dev, htype,
2135                                              idesc.fd, &fdmp);
2136         if (ret != VK_SUCCESS) {
2137             av_log(hwfc, AV_LOG_ERROR, "Failed to get FD properties: %s\n",
2138                    vk_ret2str(ret));
2139             err = AVERROR_EXTERNAL;
2140             close(idesc.fd);
2141             goto fail;
2142         }
2143
2144         req.memoryTypeBits = fdmp.memoryTypeBits;
2145
2146         /* Dedicated allocation only makes sense if there's a one to one mapping
2147          * between images and the memory backing them, so only check in this
2148          * case. */
2149         if (desc->nb_layers == desc->nb_objects) {
2150             VkImageMemoryRequirementsInfo2 req_desc = {
2151                 .sType = VK_STRUCTURE_TYPE_IMAGE_MEMORY_REQUIREMENTS_INFO_2,
2152                 .image = f->img[i],
2153             };
2154             VkMemoryDedicatedRequirements ded_req = {
2155                 .sType = VK_STRUCTURE_TYPE_MEMORY_DEDICATED_REQUIREMENTS,
2156             };
2157             VkMemoryRequirements2 req2 = {
2158                 .sType = VK_STRUCTURE_TYPE_MEMORY_REQUIREMENTS_2,
2159                 .pNext = &ded_req,
2160             };
2161
2162             vkGetImageMemoryRequirements2(hwctx->act_dev, &req_desc, &req2);
2163
2164             use_ded_mem = ded_req.prefersDedicatedAllocation |
2165                           ded_req.requiresDedicatedAllocation;
2166             if (use_ded_mem)
2167                 ded_alloc.image = f->img[i];
2168         }
2169
2170         err = alloc_mem(ctx, &req, VK_MEMORY_PROPERTY_DEVICE_LOCAL_BIT,
2171                         use_ded_mem ? &ded_alloc : ded_alloc.pNext,
2172                         &f->flags, &f->mem[i]);
2173         if (err) {
2174             close(idesc.fd);
2175             return err;
2176         }
2177
2178         f->size[i] = desc->objects[i].size;
2179     }
2180
2181     for (int i = 0; i < desc->nb_layers; i++) {
2182         const int planes = desc->layers[i].nb_planes;
2183         const int signal_p = has_modifiers && (planes > 1);
2184         for (int j = 0; j < planes; j++) {
2185             VkImageAspectFlagBits aspect = j == 0 ? VK_IMAGE_ASPECT_MEMORY_PLANE_0_BIT_EXT :
2186                                            j == 1 ? VK_IMAGE_ASPECT_MEMORY_PLANE_1_BIT_EXT :
2187                                                     VK_IMAGE_ASPECT_MEMORY_PLANE_2_BIT_EXT;
2188
2189             plane_info[bind_counts].sType = VK_STRUCTURE_TYPE_BIND_IMAGE_PLANE_MEMORY_INFO;
2190             plane_info[bind_counts].planeAspect = aspect;
2191
2192             bind_info[bind_counts].sType  = VK_STRUCTURE_TYPE_BIND_IMAGE_MEMORY_INFO;
2193             bind_info[bind_counts].pNext  = signal_p ? &plane_info[bind_counts] : NULL;
2194             bind_info[bind_counts].image  = f->img[i];
2195             bind_info[bind_counts].memory = f->mem[desc->layers[i].planes[j].object_index];
2196             bind_info[bind_counts].memoryOffset = desc->layers[i].planes[j].offset;
2197             bind_counts++;
2198         }
2199     }
2200
2201     /* Bind the allocated memory to the images */
2202     ret = vkBindImageMemory2(hwctx->act_dev, bind_counts, bind_info);
2203     if (ret != VK_SUCCESS) {
2204         av_log(ctx, AV_LOG_ERROR, "Failed to bind memory: %s\n",
2205                vk_ret2str(ret));
2206         return AVERROR_EXTERNAL;
2207     }
2208
2209     /* NOTE: This is completely uneccesary and unneeded once we can import
2210      * semaphores from DRM. Otherwise we have to activate the semaphores.
2211      * We're reusing the exec context that's also used for uploads/downloads. */
2212     err = prepare_frame(hwfc, &fp->conv_ctx, f, PREP_MODE_RO_SHADER);
2213     if (err)
2214         goto fail;
2215
2216     *frame = f;
2217
2218     return 0;
2219
2220 fail:
2221     for (int i = 0; i < desc->nb_layers; i++) {
2222         vkDestroyImage(hwctx->act_dev, f->img[i], hwctx->alloc);
2223         vkDestroySemaphore(hwctx->act_dev, f->sem[i], hwctx->alloc);
2224     }
2225     for (int i = 0; i < desc->nb_objects; i++)
2226         vkFreeMemory(hwctx->act_dev, f->mem[i], hwctx->alloc);
2227
2228     av_free(f);
2229
2230     return err;
2231 }
2232
2233 static int vulkan_map_from_drm(AVHWFramesContext *hwfc, AVFrame *dst,
2234                                const AVFrame *src, int flags)
2235 {
2236     int err = 0;
2237     AVVkFrame *f;
2238     VulkanMapping *map = NULL;
2239
2240     err = vulkan_map_from_drm_frame_desc(hwfc, &f,
2241                                          (AVDRMFrameDescriptor *)src->data[0]);
2242     if (err)
2243         return err;
2244
2245     /* The unmapping function will free this */
2246     dst->data[0] = (uint8_t *)f;
2247     dst->width   = src->width;
2248     dst->height  = src->height;
2249
2250     map = av_mallocz(sizeof(VulkanMapping));
2251     if (!map)
2252         goto fail;
2253
2254     map->frame = f;
2255     map->flags = flags;
2256
2257     err = ff_hwframe_map_create(dst->hw_frames_ctx, dst, src,
2258                                 &vulkan_unmap_from, map);
2259     if (err < 0)
2260         goto fail;
2261
2262     av_log(hwfc, AV_LOG_DEBUG, "Mapped DRM object to Vulkan!\n");
2263
2264     return 0;
2265
2266 fail:
2267     vulkan_frame_free(hwfc->device_ctx->hwctx, (uint8_t *)f);
2268     av_free(map);
2269     return err;
2270 }
2271
2272 #if CONFIG_VAAPI
2273 static int vulkan_map_from_vaapi(AVHWFramesContext *dst_fc,
2274                                  AVFrame *dst, const AVFrame *src,
2275                                  int flags)
2276 {
2277     int err;
2278     AVFrame *tmp = av_frame_alloc();
2279     AVHWFramesContext *vaapi_fc = (AVHWFramesContext*)src->hw_frames_ctx->data;
2280     AVVAAPIDeviceContext *vaapi_ctx = vaapi_fc->device_ctx->hwctx;
2281     VASurfaceID surface_id = (VASurfaceID)(uintptr_t)src->data[3];
2282
2283     if (!tmp)
2284         return AVERROR(ENOMEM);
2285
2286     /* We have to sync since like the previous comment said, no semaphores */
2287     vaSyncSurface(vaapi_ctx->display, surface_id);
2288
2289     tmp->format = AV_PIX_FMT_DRM_PRIME;
2290
2291     err = av_hwframe_map(tmp, src, flags);
2292     if (err < 0)
2293         goto fail;
2294
2295     err = vulkan_map_from_drm(dst_fc, dst, tmp, flags);
2296     if (err < 0)
2297         goto fail;
2298
2299     err = ff_hwframe_map_replace(dst, src);
2300
2301 fail:
2302     av_frame_free(&tmp);
2303     return err;
2304 }
2305 #endif
2306 #endif
2307
2308 #if CONFIG_CUDA
2309 static int vulkan_export_to_cuda(AVHWFramesContext *hwfc,
2310                                  AVBufferRef *cuda_hwfc,
2311                                  const AVFrame *frame)
2312 {
2313     int err;
2314     VkResult ret;
2315     AVVkFrame *dst_f;
2316     AVVkFrameInternal *dst_int;
2317     AVHWDeviceContext *ctx = hwfc->device_ctx;
2318     AVVulkanDeviceContext *hwctx = ctx->hwctx;
2319     const int planes = av_pix_fmt_count_planes(hwfc->sw_format);
2320     const AVPixFmtDescriptor *desc = av_pix_fmt_desc_get(hwfc->sw_format);
2321     VK_LOAD_PFN(hwctx->inst, vkGetMemoryFdKHR);
2322     VK_LOAD_PFN(hwctx->inst, vkGetSemaphoreFdKHR);
2323
2324     AVHWFramesContext *cuda_fc = (AVHWFramesContext*)cuda_hwfc->data;
2325     AVHWDeviceContext *cuda_cu = cuda_fc->device_ctx;
2326     AVCUDADeviceContext *cuda_dev = cuda_cu->hwctx;
2327     AVCUDADeviceContextInternal *cu_internal = cuda_dev->internal;
2328     CudaFunctions *cu = cu_internal->cuda_dl;
2329     CUarray_format cufmt = desc->comp[0].depth > 8 ? CU_AD_FORMAT_UNSIGNED_INT16 :
2330                                                      CU_AD_FORMAT_UNSIGNED_INT8;
2331
2332     dst_f = (AVVkFrame *)frame->data[0];
2333
2334     dst_int = dst_f->internal;
2335     if (!dst_int || !dst_int->cuda_fc_ref) {
2336         if (!dst_f->internal)
2337             dst_f->internal = dst_int = av_mallocz(sizeof(*dst_f->internal));
2338
2339         if (!dst_int) {
2340             err = AVERROR(ENOMEM);
2341             goto fail;
2342         }
2343
2344         dst_int->cuda_fc_ref = av_buffer_ref(cuda_hwfc);
2345         if (!dst_int->cuda_fc_ref) {
2346             err = AVERROR(ENOMEM);
2347             goto fail;
2348         }
2349
2350         for (int i = 0; i < planes; i++) {
2351             CUDA_EXTERNAL_MEMORY_MIPMAPPED_ARRAY_DESC tex_desc = {
2352                 .offset = 0,
2353                 .arrayDesc = {
2354                     .Depth = 0,
2355                     .Format = cufmt,
2356                     .NumChannels = 1 + ((planes == 2) && i),
2357                     .Flags = 0,
2358                 },
2359                 .numLevels = 1,
2360             };
2361             CUDA_EXTERNAL_MEMORY_HANDLE_DESC ext_desc = {
2362                 .type = CU_EXTERNAL_MEMORY_HANDLE_TYPE_OPAQUE_FD,
2363                 .size = dst_f->size[i],
2364             };
2365             VkMemoryGetFdInfoKHR export_info = {
2366                 .sType      = VK_STRUCTURE_TYPE_MEMORY_GET_FD_INFO_KHR,
2367                 .memory     = dst_f->mem[i],
2368                 .handleType = VK_EXTERNAL_MEMORY_HANDLE_TYPE_OPAQUE_FD_BIT_KHR,
2369             };
2370             VkSemaphoreGetFdInfoKHR sem_export = {
2371                 .sType = VK_STRUCTURE_TYPE_SEMAPHORE_GET_FD_INFO_KHR,
2372                 .semaphore = dst_f->sem[i],
2373                 .handleType = VK_EXTERNAL_SEMAPHORE_HANDLE_TYPE_OPAQUE_FD_BIT,
2374             };
2375             CUDA_EXTERNAL_SEMAPHORE_HANDLE_DESC ext_sem_desc = {
2376                 .type = CU_EXTERNAL_SEMAPHORE_HANDLE_TYPE_OPAQUE_FD,
2377             };
2378
2379             int p_w, p_h;
2380             get_plane_wh(&p_w, &p_h, hwfc->sw_format, hwfc->width, hwfc->height, i);
2381
2382             tex_desc.arrayDesc.Width = p_w;
2383             tex_desc.arrayDesc.Height = p_h;
2384
2385             ret = pfn_vkGetMemoryFdKHR(hwctx->act_dev, &export_info,
2386                                        &ext_desc.handle.fd);
2387             if (ret != VK_SUCCESS) {
2388                 av_log(hwfc, AV_LOG_ERROR, "Unable to export the image as a FD!\n");
2389                 err = AVERROR_EXTERNAL;
2390                 goto fail;
2391             }
2392
2393             ret = CHECK_CU(cu->cuImportExternalMemory(&dst_int->ext_mem[i], &ext_desc));
2394             if (ret < 0) {
2395                 err = AVERROR_EXTERNAL;
2396                 goto fail;
2397             }
2398
2399             ret = CHECK_CU(cu->cuExternalMemoryGetMappedMipmappedArray(&dst_int->cu_mma[i],
2400                                                                        dst_int->ext_mem[i],
2401                                                                        &tex_desc));
2402             if (ret < 0) {
2403                 err = AVERROR_EXTERNAL;
2404                 goto fail;
2405             }
2406
2407             ret = CHECK_CU(cu->cuMipmappedArrayGetLevel(&dst_int->cu_array[i],
2408                                                         dst_int->cu_mma[i], 0));
2409             if (ret < 0) {
2410                 err = AVERROR_EXTERNAL;
2411                 goto fail;
2412             }
2413
2414             ret = pfn_vkGetSemaphoreFdKHR(hwctx->act_dev, &sem_export,
2415                                           &ext_sem_desc.handle.fd);
2416             if (ret != VK_SUCCESS) {
2417                 av_log(ctx, AV_LOG_ERROR, "Failed to export semaphore: %s\n",
2418                        vk_ret2str(ret));
2419                 err = AVERROR_EXTERNAL;
2420                 goto fail;
2421             }
2422
2423             ret = CHECK_CU(cu->cuImportExternalSemaphore(&dst_int->cu_sem[i],
2424                                                          &ext_sem_desc));
2425             if (ret < 0) {
2426                 err = AVERROR_EXTERNAL;
2427                 goto fail;
2428             }
2429         }
2430     }
2431
2432     return 0;
2433
2434 fail:
2435     return err;
2436 }
2437
2438 static int vulkan_transfer_data_from_cuda(AVHWFramesContext *hwfc,
2439                                           AVFrame *dst, const AVFrame *src)
2440 {
2441     int err;
2442     VkResult ret;
2443     CUcontext dummy;
2444     AVVkFrame *dst_f;
2445     AVVkFrameInternal *dst_int;
2446     const int planes = av_pix_fmt_count_planes(hwfc->sw_format);
2447     const AVPixFmtDescriptor *desc = av_pix_fmt_desc_get(hwfc->sw_format);
2448
2449     AVHWFramesContext *cuda_fc = (AVHWFramesContext*)src->hw_frames_ctx->data;
2450     AVHWDeviceContext *cuda_cu = cuda_fc->device_ctx;
2451     AVCUDADeviceContext *cuda_dev = cuda_cu->hwctx;
2452     AVCUDADeviceContextInternal *cu_internal = cuda_dev->internal;
2453     CudaFunctions *cu = cu_internal->cuda_dl;
2454     CUDA_EXTERNAL_SEMAPHORE_WAIT_PARAMS s_w_par[AV_NUM_DATA_POINTERS] = { 0 };
2455     CUDA_EXTERNAL_SEMAPHORE_SIGNAL_PARAMS s_s_par[AV_NUM_DATA_POINTERS] = { 0 };
2456
2457     ret = CHECK_CU(cu->cuCtxPushCurrent(cuda_dev->cuda_ctx));
2458     if (ret < 0)
2459         return AVERROR_EXTERNAL;
2460
2461     dst_f = (AVVkFrame *)dst->data[0];
2462
2463     ret = vulkan_export_to_cuda(hwfc, src->hw_frames_ctx, dst);
2464     if (ret < 0) {
2465         CHECK_CU(cu->cuCtxPopCurrent(&dummy));
2466         return ret;
2467     }
2468
2469     dst_int = dst_f->internal;
2470
2471     ret = CHECK_CU(cu->cuWaitExternalSemaphoresAsync(dst_int->cu_sem, s_w_par,
2472                                                      planes, cuda_dev->stream));
2473     if (ret < 0) {
2474         err = AVERROR_EXTERNAL;
2475         goto fail;
2476     }
2477
2478     for (int i = 0; i < planes; i++) {
2479         CUDA_MEMCPY2D cpy = {
2480             .srcMemoryType = CU_MEMORYTYPE_DEVICE,
2481             .srcDevice     = (CUdeviceptr)src->data[i],
2482             .srcPitch      = src->linesize[i],
2483             .srcY          = 0,
2484
2485             .dstMemoryType = CU_MEMORYTYPE_ARRAY,
2486             .dstArray      = dst_int->cu_array[i],
2487         };
2488
2489         int p_w, p_h;
2490         get_plane_wh(&p_w, &p_h, hwfc->sw_format, hwfc->width, hwfc->height, i);
2491
2492         cpy.WidthInBytes = p_w * desc->comp[i].step;
2493         cpy.Height = p_h;
2494
2495         ret = CHECK_CU(cu->cuMemcpy2DAsync(&cpy, cuda_dev->stream));
2496         if (ret < 0) {
2497             err = AVERROR_EXTERNAL;
2498             goto fail;
2499         }
2500     }
2501
2502     ret = CHECK_CU(cu->cuSignalExternalSemaphoresAsync(dst_int->cu_sem, s_s_par,
2503                                                        planes, cuda_dev->stream));
2504     if (ret < 0) {
2505         err = AVERROR_EXTERNAL;
2506         goto fail;
2507     }
2508
2509     CHECK_CU(cu->cuCtxPopCurrent(&dummy));
2510
2511     av_log(hwfc, AV_LOG_VERBOSE, "Transfered CUDA image to Vulkan!\n");
2512
2513     return 0;
2514
2515 fail:
2516     CHECK_CU(cu->cuCtxPopCurrent(&dummy));
2517     vulkan_free_internal(dst_int);
2518     dst_f->internal = NULL;
2519     av_buffer_unref(&dst->buf[0]);
2520     return err;
2521 }
2522 #endif
2523
2524 static int vulkan_map_to(AVHWFramesContext *hwfc, AVFrame *dst,
2525                          const AVFrame *src, int flags)
2526 {
2527     av_unused VulkanDevicePriv *p = hwfc->device_ctx->internal->priv;
2528
2529     switch (src->format) {
2530 #if CONFIG_LIBDRM
2531 #if CONFIG_VAAPI
2532     case AV_PIX_FMT_VAAPI:
2533         if (p->extensions & EXT_EXTERNAL_DMABUF_MEMORY)
2534             return vulkan_map_from_vaapi(hwfc, dst, src, flags);
2535 #endif
2536     case AV_PIX_FMT_DRM_PRIME:
2537         if (p->extensions & EXT_EXTERNAL_DMABUF_MEMORY)
2538             return vulkan_map_from_drm(hwfc, dst, src, flags);
2539 #endif
2540     default:
2541         return AVERROR(ENOSYS);
2542     }
2543 }
2544
2545 #if CONFIG_LIBDRM
2546 typedef struct VulkanDRMMapping {
2547     AVDRMFrameDescriptor drm_desc;
2548     AVVkFrame *source;
2549 } VulkanDRMMapping;
2550
2551 static void vulkan_unmap_to_drm(AVHWFramesContext *hwfc, HWMapDescriptor *hwmap)
2552 {
2553     AVDRMFrameDescriptor *drm_desc = hwmap->priv;
2554
2555     for (int i = 0; i < drm_desc->nb_objects; i++)
2556         close(drm_desc->objects[i].fd);
2557
2558     av_free(drm_desc);
2559 }
2560
2561 static inline uint32_t vulkan_fmt_to_drm(VkFormat vkfmt)
2562 {
2563     for (int i = 0; i < FF_ARRAY_ELEMS(vulkan_drm_format_map); i++)
2564         if (vulkan_drm_format_map[i].vk_format == vkfmt)
2565             return vulkan_drm_format_map[i].drm_fourcc;
2566     return DRM_FORMAT_INVALID;
2567 }
2568
2569 static int vulkan_map_to_drm(AVHWFramesContext *hwfc, AVFrame *dst,
2570                              const AVFrame *src, int flags)
2571 {
2572     int err = 0;
2573     VkResult ret;
2574     AVVkFrame *f = (AVVkFrame *)src->data[0];
2575     VulkanDevicePriv *p = hwfc->device_ctx->internal->priv;
2576     VulkanFramesPriv *fp = hwfc->internal->priv;
2577     AVVulkanDeviceContext *hwctx = hwfc->device_ctx->hwctx;
2578     const int planes = av_pix_fmt_count_planes(hwfc->sw_format);
2579     VK_LOAD_PFN(hwctx->inst, vkGetMemoryFdKHR);
2580     VkImageDrmFormatModifierPropertiesEXT drm_mod = {
2581         .sType = VK_STRUCTURE_TYPE_IMAGE_DRM_FORMAT_MODIFIER_PROPERTIES_EXT,
2582     };
2583
2584     AVDRMFrameDescriptor *drm_desc = av_mallocz(sizeof(*drm_desc));
2585     if (!drm_desc)
2586         return AVERROR(ENOMEM);
2587
2588     err = prepare_frame(hwfc, &fp->conv_ctx, f, PREP_MODE_EXTERNAL_EXPORT);
2589     if (err < 0)
2590         goto end;
2591
2592     err = ff_hwframe_map_create(src->hw_frames_ctx, dst, src, &vulkan_unmap_to_drm, drm_desc);
2593     if (err < 0)
2594         goto end;
2595
2596     if (p->extensions & EXT_DRM_MODIFIER_FLAGS) {
2597         VK_LOAD_PFN(hwctx->inst, vkGetImageDrmFormatModifierPropertiesEXT);
2598         ret = pfn_vkGetImageDrmFormatModifierPropertiesEXT(hwctx->act_dev, f->img[0],
2599                                                            &drm_mod);
2600         if (ret != VK_SUCCESS) {
2601             av_log(hwfc, AV_LOG_ERROR, "Failed to retrieve DRM format modifier!\n");
2602             err = AVERROR_EXTERNAL;
2603             goto end;
2604         }
2605     }
2606
2607     for (int i = 0; (i < planes) && (f->mem[i]); i++) {
2608         VkMemoryGetFdInfoKHR export_info = {
2609             .sType      = VK_STRUCTURE_TYPE_MEMORY_GET_FD_INFO_KHR,
2610             .memory     = f->mem[i],
2611             .handleType = VK_EXTERNAL_MEMORY_HANDLE_TYPE_DMA_BUF_BIT_EXT,
2612         };
2613
2614         ret = pfn_vkGetMemoryFdKHR(hwctx->act_dev, &export_info,
2615                                    &drm_desc->objects[i].fd);
2616         if (ret != VK_SUCCESS) {
2617             av_log(hwfc, AV_LOG_ERROR, "Unable to export the image as a FD!\n");
2618             err = AVERROR_EXTERNAL;
2619             goto end;
2620         }
2621
2622         drm_desc->nb_objects++;
2623         drm_desc->objects[i].size = f->size[i];
2624         drm_desc->objects[i].format_modifier = drm_mod.drmFormatModifier;
2625     }
2626
2627     drm_desc->nb_layers = planes;
2628     for (int i = 0; i < drm_desc->nb_layers; i++) {
2629         VkSubresourceLayout layout;
2630         VkImageSubresource sub = {
2631             .aspectMask = p->extensions & EXT_DRM_MODIFIER_FLAGS ?
2632                           VK_IMAGE_ASPECT_MEMORY_PLANE_0_BIT_EXT :
2633                           VK_IMAGE_ASPECT_COLOR_BIT,
2634         };
2635         VkFormat plane_vkfmt = av_vkfmt_from_pixfmt(hwfc->sw_format)[i];
2636
2637         drm_desc->layers[i].format    = vulkan_fmt_to_drm(plane_vkfmt);
2638         drm_desc->layers[i].nb_planes = 1;
2639
2640         if (drm_desc->layers[i].format == DRM_FORMAT_INVALID) {
2641             av_log(hwfc, AV_LOG_ERROR, "Cannot map to DRM layer, unsupported!\n");
2642             err = AVERROR_PATCHWELCOME;
2643             goto end;
2644         }
2645
2646         drm_desc->layers[i].planes[0].object_index = FFMIN(i, drm_desc->nb_objects - 1);
2647
2648         if (f->tiling == VK_IMAGE_TILING_OPTIMAL)
2649             continue;
2650
2651         vkGetImageSubresourceLayout(hwctx->act_dev, f->img[i], &sub, &layout);
2652         drm_desc->layers[i].planes[0].offset       = layout.offset;
2653         drm_desc->layers[i].planes[0].pitch        = layout.rowPitch;
2654     }
2655
2656     dst->width   = src->width;
2657     dst->height  = src->height;
2658     dst->data[0] = (uint8_t *)drm_desc;
2659
2660     av_log(hwfc, AV_LOG_VERBOSE, "Mapped AVVkFrame to a DRM object!\n");
2661
2662     return 0;
2663
2664 end:
2665     av_free(drm_desc);
2666     return err;
2667 }
2668
2669 #if CONFIG_VAAPI
2670 static int vulkan_map_to_vaapi(AVHWFramesContext *hwfc, AVFrame *dst,
2671                                const AVFrame *src, int flags)
2672 {
2673     int err;
2674     AVFrame *tmp = av_frame_alloc();
2675     if (!tmp)
2676         return AVERROR(ENOMEM);
2677
2678     tmp->format = AV_PIX_FMT_DRM_PRIME;
2679
2680     err = vulkan_map_to_drm(hwfc, tmp, src, flags);
2681     if (err < 0)
2682         goto fail;
2683
2684     err = av_hwframe_map(dst, tmp, flags);
2685     if (err < 0)
2686         goto fail;
2687
2688     err = ff_hwframe_map_replace(dst, src);
2689
2690 fail:
2691     av_frame_free(&tmp);
2692     return err;
2693 }
2694 #endif
2695 #endif
2696
2697 static int vulkan_map_from(AVHWFramesContext *hwfc, AVFrame *dst,
2698                            const AVFrame *src, int flags)
2699 {
2700     av_unused VulkanDevicePriv *p = hwfc->device_ctx->internal->priv;
2701
2702     switch (dst->format) {
2703 #if CONFIG_LIBDRM
2704     case AV_PIX_FMT_DRM_PRIME:
2705         if (p->extensions & EXT_EXTERNAL_DMABUF_MEMORY)
2706             return vulkan_map_to_drm(hwfc, dst, src, flags);
2707 #if CONFIG_VAAPI
2708     case AV_PIX_FMT_VAAPI:
2709         if (p->extensions & EXT_EXTERNAL_DMABUF_MEMORY)
2710             return vulkan_map_to_vaapi(hwfc, dst, src, flags);
2711 #endif
2712 #endif
2713     default:
2714         return vulkan_map_frame_to_mem(hwfc, dst, src, flags);
2715     }
2716 }
2717
2718 typedef struct ImageBuffer {
2719     VkBuffer buf;
2720     VkDeviceMemory mem;
2721     VkMemoryPropertyFlagBits flags;
2722     int mapped_mem;
2723 } ImageBuffer;
2724
2725 static void free_buf(void *opaque, uint8_t *data)
2726 {
2727     AVHWDeviceContext *ctx = opaque;
2728     AVVulkanDeviceContext *hwctx = ctx->hwctx;
2729     ImageBuffer *vkbuf = (ImageBuffer *)data;
2730
2731     if (vkbuf->buf)
2732         vkDestroyBuffer(hwctx->act_dev, vkbuf->buf, hwctx->alloc);
2733     if (vkbuf->mem)
2734         vkFreeMemory(hwctx->act_dev, vkbuf->mem, hwctx->alloc);
2735
2736     av_free(data);
2737 }
2738
2739 static size_t get_req_buffer_size(VulkanDevicePriv *p, int *stride, int height)
2740 {
2741     size_t size;
2742     *stride = FFALIGN(*stride, p->props.properties.limits.optimalBufferCopyRowPitchAlignment);
2743     size = height*(*stride);
2744     size = FFALIGN(size, p->props.properties.limits.minMemoryMapAlignment);
2745     return size;
2746 }
2747
2748 static int create_buf(AVHWDeviceContext *ctx, AVBufferRef **buf,
2749                       VkBufferUsageFlags usage, VkMemoryPropertyFlagBits flags,
2750                       size_t size, uint32_t req_memory_bits, int host_mapped,
2751                       void *create_pnext, void *alloc_pnext)
2752 {
2753     int err;
2754     VkResult ret;
2755     int use_ded_mem;
2756     AVVulkanDeviceContext *hwctx = ctx->hwctx;
2757
2758     VkBufferCreateInfo buf_spawn = {
2759         .sType       = VK_STRUCTURE_TYPE_BUFFER_CREATE_INFO,
2760         .pNext       = create_pnext,
2761         .usage       = usage,
2762         .size        = size,
2763         .sharingMode = VK_SHARING_MODE_EXCLUSIVE,
2764     };
2765
2766     VkBufferMemoryRequirementsInfo2 req_desc = {
2767         .sType = VK_STRUCTURE_TYPE_BUFFER_MEMORY_REQUIREMENTS_INFO_2,
2768     };
2769     VkMemoryDedicatedAllocateInfo ded_alloc = {
2770         .sType = VK_STRUCTURE_TYPE_MEMORY_DEDICATED_ALLOCATE_INFO,
2771         .pNext = alloc_pnext,
2772     };
2773     VkMemoryDedicatedRequirements ded_req = {
2774         .sType = VK_STRUCTURE_TYPE_MEMORY_DEDICATED_REQUIREMENTS,
2775     };
2776     VkMemoryRequirements2 req = {
2777         .sType = VK_STRUCTURE_TYPE_MEMORY_REQUIREMENTS_2,
2778         .pNext = &ded_req,
2779     };
2780
2781     ImageBuffer *vkbuf = av_mallocz(sizeof(*vkbuf));
2782     if (!vkbuf)
2783         return AVERROR(ENOMEM);
2784
2785     vkbuf->mapped_mem = host_mapped;
2786
2787     ret = vkCreateBuffer(hwctx->act_dev, &buf_spawn, NULL, &vkbuf->buf);
2788     if (ret != VK_SUCCESS) {
2789         av_log(ctx, AV_LOG_ERROR, "Failed to create buffer: %s\n",
2790                vk_ret2str(ret));
2791         err = AVERROR_EXTERNAL;
2792         goto fail;
2793     }
2794
2795     req_desc.buffer = vkbuf->buf;
2796
2797     vkGetBufferMemoryRequirements2(hwctx->act_dev, &req_desc, &req);
2798
2799     /* In case the implementation prefers/requires dedicated allocation */
2800     use_ded_mem = ded_req.prefersDedicatedAllocation |
2801                   ded_req.requiresDedicatedAllocation;
2802     if (use_ded_mem)
2803         ded_alloc.buffer = vkbuf->buf;
2804
2805     /* Additional requirements imposed on us */
2806     if (req_memory_bits)
2807         req.memoryRequirements.memoryTypeBits &= req_memory_bits;
2808
2809     err = alloc_mem(ctx, &req.memoryRequirements, flags,
2810                     use_ded_mem ? &ded_alloc : (void *)ded_alloc.pNext,
2811                     &vkbuf->flags, &vkbuf->mem);
2812     if (err)
2813         goto fail;
2814
2815     ret = vkBindBufferMemory(hwctx->act_dev, vkbuf->buf, vkbuf->mem, 0);
2816     if (ret != VK_SUCCESS) {
2817         av_log(ctx, AV_LOG_ERROR, "Failed to bind memory to buffer: %s\n",
2818                vk_ret2str(ret));
2819         err = AVERROR_EXTERNAL;
2820         goto fail;
2821     }
2822
2823     *buf = av_buffer_create((uint8_t *)vkbuf, sizeof(*vkbuf), free_buf, ctx, 0);
2824     if (!(*buf)) {
2825         err = AVERROR(ENOMEM);
2826         goto fail;
2827     }
2828
2829     return 0;
2830
2831 fail:
2832     free_buf(ctx, (uint8_t *)vkbuf);
2833     return err;
2834 }
2835
2836 /* Skips mapping of host mapped buffers but still invalidates them */
2837 static int map_buffers(AVHWDeviceContext *ctx, AVBufferRef **bufs, uint8_t *mem[],
2838                        int nb_buffers, int invalidate)
2839 {
2840     VkResult ret;
2841     AVVulkanDeviceContext *hwctx = ctx->hwctx;
2842     VkMappedMemoryRange invalidate_ctx[AV_NUM_DATA_POINTERS];
2843     int invalidate_count = 0;
2844
2845     for (int i = 0; i < nb_buffers; i++) {
2846         ImageBuffer *vkbuf = (ImageBuffer *)bufs[i]->data;
2847         if (vkbuf->mapped_mem)
2848             continue;
2849
2850         ret = vkMapMemory(hwctx->act_dev, vkbuf->mem, 0,
2851                           VK_WHOLE_SIZE, 0, (void **)&mem[i]);
2852         if (ret != VK_SUCCESS) {
2853             av_log(ctx, AV_LOG_ERROR, "Failed to map buffer memory: %s\n",
2854                    vk_ret2str(ret));
2855             return AVERROR_EXTERNAL;
2856         }
2857     }
2858
2859     if (!invalidate)
2860         return 0;
2861
2862     for (int i = 0; i < nb_buffers; i++) {
2863         ImageBuffer *vkbuf = (ImageBuffer *)bufs[i]->data;
2864         const VkMappedMemoryRange ival_buf = {
2865             .sType  = VK_STRUCTURE_TYPE_MAPPED_MEMORY_RANGE,
2866             .memory = vkbuf->mem,
2867             .size   = VK_WHOLE_SIZE,
2868         };
2869
2870         /* For host imported memory Vulkan says to use platform-defined
2871          * sync methods, but doesn't really say not to call flush or invalidate
2872          * on original host pointers. It does explicitly allow to do that on
2873          * host-mapped pointers which are then mapped again using vkMapMemory,
2874          * but known implementations return the original pointers when mapped
2875          * again. */
2876         if (vkbuf->flags & VK_MEMORY_PROPERTY_HOST_COHERENT_BIT)
2877             continue;
2878
2879         invalidate_ctx[invalidate_count++] = ival_buf;
2880     }
2881
2882     if (invalidate_count) {
2883         ret = vkInvalidateMappedMemoryRanges(hwctx->act_dev, invalidate_count,
2884                                              invalidate_ctx);
2885         if (ret != VK_SUCCESS)
2886             av_log(ctx, AV_LOG_WARNING, "Failed to invalidate memory: %s\n",
2887                    vk_ret2str(ret));
2888     }
2889
2890     return 0;
2891 }
2892
2893 static int unmap_buffers(AVHWDeviceContext *ctx, AVBufferRef **bufs,
2894                          int nb_buffers, int flush)
2895 {
2896     int err = 0;
2897     VkResult ret;
2898     AVVulkanDeviceContext *hwctx = ctx->hwctx;
2899     VkMappedMemoryRange flush_ctx[AV_NUM_DATA_POINTERS];
2900     int flush_count = 0;
2901
2902     if (flush) {
2903         for (int i = 0; i < nb_buffers; i++) {
2904             ImageBuffer *vkbuf = (ImageBuffer *)bufs[i]->data;
2905             const VkMappedMemoryRange flush_buf = {
2906                 .sType  = VK_STRUCTURE_TYPE_MAPPED_MEMORY_RANGE,
2907                 .memory = vkbuf->mem,
2908                 .size   = VK_WHOLE_SIZE,
2909             };
2910
2911             if (vkbuf->flags & VK_MEMORY_PROPERTY_HOST_COHERENT_BIT)
2912                 continue;
2913
2914             flush_ctx[flush_count++] = flush_buf;
2915         }
2916     }
2917
2918     if (flush_count) {
2919         ret = vkFlushMappedMemoryRanges(hwctx->act_dev, flush_count, flush_ctx);
2920         if (ret != VK_SUCCESS) {
2921             av_log(ctx, AV_LOG_ERROR, "Failed to flush memory: %s\n",
2922                     vk_ret2str(ret));
2923             err = AVERROR_EXTERNAL; /* We still want to try to unmap them */
2924         }
2925     }
2926
2927     for (int i = 0; i < nb_buffers; i++) {
2928         ImageBuffer *vkbuf = (ImageBuffer *)bufs[i]->data;
2929         if (vkbuf->mapped_mem)
2930             continue;
2931
2932         vkUnmapMemory(hwctx->act_dev, vkbuf->mem);
2933     }
2934
2935     return err;
2936 }
2937
2938 static int transfer_image_buf(AVHWFramesContext *hwfc, const AVFrame *f,
2939                               AVBufferRef **bufs, size_t *buf_offsets,
2940                               const int *buf_stride, int w,
2941                               int h, enum AVPixelFormat pix_fmt, int to_buf)
2942 {
2943     int err;
2944     AVVkFrame *frame = (AVVkFrame *)f->data[0];
2945     VulkanFramesPriv *fp = hwfc->internal->priv;
2946
2947     int bar_num = 0;
2948     VkPipelineStageFlagBits sem_wait_dst[AV_NUM_DATA_POINTERS];
2949
2950     const int planes = av_pix_fmt_count_planes(pix_fmt);
2951     const AVPixFmtDescriptor *desc = av_pix_fmt_desc_get(pix_fmt);
2952
2953     VkImageMemoryBarrier img_bar[AV_NUM_DATA_POINTERS] = { 0 };
2954     VulkanExecCtx *ectx = to_buf ? &fp->download_ctx : &fp->upload_ctx;
2955     VkCommandBuffer cmd_buf = get_buf_exec_ctx(hwfc, ectx);
2956
2957     VkSubmitInfo s_info = {
2958         .sType                = VK_STRUCTURE_TYPE_SUBMIT_INFO,
2959         .pSignalSemaphores    = frame->sem,
2960         .pWaitSemaphores      = frame->sem,
2961         .pWaitDstStageMask    = sem_wait_dst,
2962         .signalSemaphoreCount = planes,
2963         .waitSemaphoreCount   = planes,
2964     };
2965
2966     if ((err = wait_start_exec_ctx(hwfc, ectx)))
2967         return err;
2968
2969     /* Change the image layout to something more optimal for transfers */
2970     for (int i = 0; i < planes; i++) {
2971         VkImageLayout new_layout = to_buf ? VK_IMAGE_LAYOUT_TRANSFER_SRC_OPTIMAL :
2972                                             VK_IMAGE_LAYOUT_TRANSFER_DST_OPTIMAL;
2973         VkAccessFlags new_access = to_buf ? VK_ACCESS_TRANSFER_READ_BIT :
2974                                             VK_ACCESS_TRANSFER_WRITE_BIT;
2975
2976         sem_wait_dst[i] = VK_PIPELINE_STAGE_TOP_OF_PIPE_BIT;
2977
2978         /* If the layout matches and we have read access skip the barrier */
2979         if ((frame->layout[i] == new_layout) && (frame->access[i] & new_access))
2980             continue;
2981
2982         img_bar[bar_num].sType = VK_STRUCTURE_TYPE_IMAGE_MEMORY_BARRIER;
2983         img_bar[bar_num].srcAccessMask = 0x0;
2984         img_bar[bar_num].dstAccessMask = new_access;
2985         img_bar[bar_num].oldLayout = frame->layout[i];
2986         img_bar[bar_num].newLayout = new_layout;
2987         img_bar[bar_num].srcQueueFamilyIndex = VK_QUEUE_FAMILY_IGNORED;
2988         img_bar[bar_num].dstQueueFamilyIndex = VK_QUEUE_FAMILY_IGNORED;
2989         img_bar[bar_num].image = frame->img[i];
2990         img_bar[bar_num].subresourceRange.levelCount = 1;
2991         img_bar[bar_num].subresourceRange.layerCount = 1;
2992         img_bar[bar_num].subresourceRange.aspectMask = VK_IMAGE_ASPECT_COLOR_BIT;
2993
2994         frame->layout[i] = img_bar[bar_num].newLayout;
2995         frame->access[i] = img_bar[bar_num].dstAccessMask;
2996
2997         bar_num++;
2998     }
2999
3000     if (bar_num)
3001         vkCmdPipelineBarrier(cmd_buf, VK_PIPELINE_STAGE_TOP_OF_PIPE_BIT,
3002                              VK_PIPELINE_STAGE_TRANSFER_BIT, 0,
3003                              0, NULL, 0, NULL, bar_num, img_bar);
3004
3005     /* Schedule a copy for each plane */
3006     for (int i = 0; i < planes; i++) {
3007         ImageBuffer *vkbuf = (ImageBuffer *)bufs[i]->data;
3008         VkBufferImageCopy buf_reg = {
3009             .bufferOffset = buf_offsets[i],
3010             .bufferRowLength = buf_stride[i] / desc->comp[i].step,
3011             .imageSubresource.layerCount = 1,
3012             .imageSubresource.aspectMask = VK_IMAGE_ASPECT_COLOR_BIT,
3013             .imageOffset = { 0, 0, 0, },
3014         };
3015
3016         int p_w, p_h;
3017         get_plane_wh(&p_w, &p_h, pix_fmt, w, h, i);
3018
3019         buf_reg.bufferImageHeight = p_h;
3020         buf_reg.imageExtent = (VkExtent3D){ p_w, p_h, 1, };
3021
3022         if (to_buf)
3023             vkCmdCopyImageToBuffer(cmd_buf, frame->img[i], frame->layout[i],
3024                                    vkbuf->buf, 1, &buf_reg);
3025         else
3026             vkCmdCopyBufferToImage(cmd_buf, vkbuf->buf, frame->img[i],
3027                                    frame->layout[i], 1, &buf_reg);
3028     }
3029
3030     /* When uploading, do this asynchronously if the source is refcounted by
3031      * keeping the buffers as a submission dependency.
3032      * The hwcontext is guaranteed to not be freed until all frames are freed
3033      * in the frames_unint function.
3034      * When downloading to buffer, do this synchronously and wait for the
3035      * queue submission to finish executing */
3036     if (!to_buf) {
3037         int ref;
3038         for (ref = 0; ref < AV_NUM_DATA_POINTERS; ref++) {
3039             if (!f->buf[ref])
3040                 break;
3041             if ((err = add_buf_dep_exec_ctx(hwfc, ectx, &f->buf[ref], 1)))
3042                 return err;
3043         }
3044         if (ref && (err = add_buf_dep_exec_ctx(hwfc, ectx, bufs, planes)))
3045             return err;
3046         return submit_exec_ctx(hwfc, ectx, &s_info, !ref);
3047     } else {
3048         return submit_exec_ctx(hwfc, ectx, &s_info,    1);
3049     }
3050 }
3051
3052 static int vulkan_transfer_data(AVHWFramesContext *hwfc, const AVFrame *vkf,
3053                                 const AVFrame *swf, int from)
3054 {
3055     int err = 0;
3056     VkResult ret;
3057     AVVkFrame *f = (AVVkFrame *)vkf->data[0];
3058     AVHWDeviceContext *dev_ctx = hwfc->device_ctx;
3059     AVVulkanDeviceContext *hwctx = dev_ctx->hwctx;
3060     VulkanDevicePriv *p = hwfc->device_ctx->internal->priv;
3061
3062     AVFrame tmp;
3063     AVBufferRef *bufs[AV_NUM_DATA_POINTERS] = { 0 };
3064     size_t buf_offsets[AV_NUM_DATA_POINTERS] = { 0 };
3065
3066     int p_w, p_h;
3067     const int planes = av_pix_fmt_count_planes(swf->format);
3068
3069     int host_mapped[AV_NUM_DATA_POINTERS] = { 0 };
3070     const int map_host = !!(p->extensions & EXT_EXTERNAL_HOST_MEMORY);
3071
3072     VK_LOAD_PFN(hwctx->inst, vkGetMemoryHostPointerPropertiesEXT);
3073
3074     if ((swf->format != AV_PIX_FMT_NONE && !av_vkfmt_from_pixfmt(swf->format))) {
3075         av_log(hwfc, AV_LOG_ERROR, "Unsupported software frame pixel format!\n");
3076         return AVERROR(EINVAL);
3077     }
3078
3079     if (swf->width > hwfc->width || swf->height > hwfc->height)
3080         return AVERROR(EINVAL);
3081
3082     /* For linear, host visiable images */
3083     if (f->tiling == VK_IMAGE_TILING_LINEAR &&
3084         f->flags & VK_MEMORY_PROPERTY_HOST_VISIBLE_BIT) {
3085         AVFrame *map = av_frame_alloc();
3086         if (!map)
3087             return AVERROR(ENOMEM);
3088         map->format = swf->format;
3089
3090         err = vulkan_map_frame_to_mem(hwfc, map, vkf, AV_HWFRAME_MAP_WRITE);
3091         if (err)
3092             return err;
3093
3094         err = av_frame_copy((AVFrame *)(from ? swf : map), from ? map : swf);
3095         av_frame_free(&map);
3096         return err;
3097     }
3098
3099     /* Create buffers */
3100     for (int i = 0; i < planes; i++) {
3101         size_t req_size;
3102
3103         VkExternalMemoryBufferCreateInfo create_desc = {
3104             .sType = VK_STRUCTURE_TYPE_EXTERNAL_MEMORY_BUFFER_CREATE_INFO,
3105             .handleTypes = VK_EXTERNAL_MEMORY_HANDLE_TYPE_HOST_ALLOCATION_BIT_EXT,
3106         };
3107
3108         VkImportMemoryHostPointerInfoEXT import_desc = {
3109             .sType = VK_STRUCTURE_TYPE_IMPORT_MEMORY_HOST_POINTER_INFO_EXT,
3110             .handleType = VK_EXTERNAL_MEMORY_HANDLE_TYPE_HOST_ALLOCATION_BIT_EXT,
3111         };
3112
3113         VkMemoryHostPointerPropertiesEXT p_props = {
3114             .sType = VK_STRUCTURE_TYPE_MEMORY_HOST_POINTER_PROPERTIES_EXT,
3115         };
3116
3117         get_plane_wh(&p_w, &p_h, swf->format, swf->width, swf->height, i);
3118
3119         tmp.linesize[i] = FFABS(swf->linesize[i]);
3120
3121         /* Do not map images with a negative stride */
3122         if (map_host && swf->linesize[i] > 0) {
3123             size_t offs;
3124             offs = (uintptr_t)swf->data[i] % p->hprops.minImportedHostPointerAlignment;
3125             import_desc.pHostPointer = swf->data[i] - offs;
3126
3127             /* We have to compensate for the few extra bytes of padding we
3128              * completely ignore at the start */
3129             req_size = FFALIGN(offs + tmp.linesize[i] * p_h,
3130                                p->hprops.minImportedHostPointerAlignment);
3131
3132             ret = pfn_vkGetMemoryHostPointerPropertiesEXT(hwctx->act_dev,
3133                                                           import_desc.handleType,
3134                                                           import_desc.pHostPointer,
3135                                                           &p_props);
3136
3137             if (ret == VK_SUCCESS) {
3138                 host_mapped[i] = 1;
3139                 buf_offsets[i] = offs;
3140             }
3141         }
3142
3143         if (!host_mapped[i])
3144             req_size = get_req_buffer_size(p, &tmp.linesize[i], p_h);
3145
3146         err = create_buf(dev_ctx, &bufs[i],
3147                          from ? VK_BUFFER_USAGE_TRANSFER_DST_BIT :
3148                                 VK_BUFFER_USAGE_TRANSFER_SRC_BIT,
3149                          VK_MEMORY_PROPERTY_HOST_VISIBLE_BIT,
3150                          req_size, p_props.memoryTypeBits, host_mapped[i],
3151                          host_mapped[i] ? &create_desc : NULL,
3152                          host_mapped[i] ? &import_desc : NULL);
3153         if (err)
3154             goto end;
3155     }
3156
3157     if (!from) {
3158         /* Map, copy image to buffer, unmap */
3159         if ((err = map_buffers(dev_ctx, bufs, tmp.data, planes, 0)))
3160             goto end;
3161
3162         for (int i = 0; i < planes; i++) {
3163             if (host_mapped[i])
3164                 continue;
3165
3166             get_plane_wh(&p_w, &p_h, swf->format, swf->width, swf->height, i);
3167
3168             av_image_copy_plane(tmp.data[i], tmp.linesize[i],
3169                                 (const uint8_t *)swf->data[i], swf->linesize[i],
3170                                 FFMIN(tmp.linesize[i], FFABS(swf->linesize[i])),
3171                                 p_h);
3172         }
3173
3174         if ((err = unmap_buffers(dev_ctx, bufs, planes, 1)))
3175             goto end;
3176     }
3177
3178     /* Copy buffers into/from image */
3179     err = transfer_image_buf(hwfc, vkf, bufs, buf_offsets, tmp.linesize,
3180                              swf->width, swf->height, swf->format, from);
3181
3182     if (from) {
3183         /* Map, copy image to buffer, unmap */
3184         if ((err = map_buffers(dev_ctx, bufs, tmp.data, planes, 0)))
3185             goto end;
3186
3187         for (int i = 0; i < planes; i++) {
3188             if (host_mapped[i])
3189                 continue;
3190
3191             get_plane_wh(&p_w, &p_h, swf->format, swf->width, swf->height, i);
3192
3193             av_image_copy_plane(swf->data[i], swf->linesize[i],
3194                                 (const uint8_t *)tmp.data[i], tmp.linesize[i],
3195                                 FFMIN(tmp.linesize[i], FFABS(swf->linesize[i])),
3196                                 p_h);
3197         }
3198
3199         if ((err = unmap_buffers(dev_ctx, bufs, planes, 1)))
3200             goto end;
3201     }
3202
3203 end:
3204     for (int i = 0; i < planes; i++)
3205         av_buffer_unref(&bufs[i]);
3206
3207     return err;
3208 }
3209
3210 static int vulkan_transfer_data_to(AVHWFramesContext *hwfc, AVFrame *dst,
3211                                    const AVFrame *src)
3212 {
3213     av_unused VulkanDevicePriv *p = hwfc->device_ctx->internal->priv;
3214
3215     switch (src->format) {
3216 #if CONFIG_CUDA
3217     case AV_PIX_FMT_CUDA:
3218         if ((p->extensions & EXT_EXTERNAL_FD_MEMORY) &&
3219             (p->extensions & EXT_EXTERNAL_FD_SEM))
3220             return vulkan_transfer_data_from_cuda(hwfc, dst, src);
3221 #endif
3222     default:
3223         if (src->hw_frames_ctx)
3224             return AVERROR(ENOSYS);
3225         else
3226             return vulkan_transfer_data(hwfc, dst, src, 0);
3227     }
3228 }
3229
3230 #if CONFIG_CUDA
3231 static int vulkan_transfer_data_to_cuda(AVHWFramesContext *hwfc, AVFrame *dst,
3232                                         const AVFrame *src)
3233 {
3234     int err;
3235     VkResult ret;
3236     CUcontext dummy;
3237     AVVkFrame *dst_f;
3238     AVVkFrameInternal *dst_int;
3239     const int planes = av_pix_fmt_count_planes(hwfc->sw_format);
3240     const AVPixFmtDescriptor *desc = av_pix_fmt_desc_get(hwfc->sw_format);
3241
3242     AVHWFramesContext *cuda_fc = (AVHWFramesContext*)dst->hw_frames_ctx->data;
3243     AVHWDeviceContext *cuda_cu = cuda_fc->device_ctx;
3244     AVCUDADeviceContext *cuda_dev = cuda_cu->hwctx;
3245     AVCUDADeviceContextInternal *cu_internal = cuda_dev->internal;
3246     CudaFunctions *cu = cu_internal->cuda_dl;
3247
3248     ret = CHECK_CU(cu->cuCtxPushCurrent(cuda_dev->cuda_ctx));
3249     if (ret < 0)
3250         return AVERROR_EXTERNAL;
3251
3252     dst_f = (AVVkFrame *)src->data[0];
3253
3254     err = vulkan_export_to_cuda(hwfc, dst->hw_frames_ctx, src);
3255     if (err < 0) {
3256         CHECK_CU(cu->cuCtxPopCurrent(&dummy));
3257         return err;
3258     }
3259
3260     dst_int = dst_f->internal;
3261
3262     for (int i = 0; i < planes; i++) {
3263         CUDA_MEMCPY2D cpy = {
3264             .dstMemoryType = CU_MEMORYTYPE_DEVICE,
3265             .dstDevice     = (CUdeviceptr)dst->data[i],
3266             .dstPitch      = dst->linesize[i],
3267             .dstY          = 0,
3268
3269             .srcMemoryType = CU_MEMORYTYPE_ARRAY,
3270             .srcArray      = dst_int->cu_array[i],
3271         };
3272
3273         int w, h;
3274         get_plane_wh(&w, &h, hwfc->sw_format, hwfc->width, hwfc->height, i);
3275
3276         cpy.WidthInBytes = w * desc->comp[i].step;
3277         cpy.Height = h;
3278
3279         ret = CHECK_CU(cu->cuMemcpy2DAsync(&cpy, cuda_dev->stream));
3280         if (ret < 0) {
3281             err = AVERROR_EXTERNAL;
3282             goto fail;
3283         }
3284     }
3285
3286     CHECK_CU(cu->cuCtxPopCurrent(&dummy));
3287
3288     av_log(hwfc, AV_LOG_VERBOSE, "Transfered Vulkan image to CUDA!\n");
3289
3290     return 0;
3291
3292 fail:
3293     CHECK_CU(cu->cuCtxPopCurrent(&dummy));
3294     vulkan_free_internal(dst_int);
3295     dst_f->internal = NULL;
3296     av_buffer_unref(&dst->buf[0]);
3297     return err;
3298 }
3299 #endif
3300
3301 static int vulkan_transfer_data_from(AVHWFramesContext *hwfc, AVFrame *dst,
3302                                      const AVFrame *src)
3303 {
3304     av_unused VulkanDevicePriv *p = hwfc->device_ctx->internal->priv;
3305
3306     switch (dst->format) {
3307 #if CONFIG_CUDA
3308     case AV_PIX_FMT_CUDA:
3309         if ((p->extensions & EXT_EXTERNAL_FD_MEMORY) &&
3310             (p->extensions & EXT_EXTERNAL_FD_SEM))
3311             return vulkan_transfer_data_to_cuda(hwfc, dst, src);
3312 #endif
3313     default:
3314         if (dst->hw_frames_ctx)
3315             return AVERROR(ENOSYS);
3316         else
3317             return vulkan_transfer_data(hwfc, src, dst, 1);
3318     }
3319 }
3320
3321 static int vulkan_frames_derive_to(AVHWFramesContext *dst_fc,
3322                                    AVHWFramesContext *src_fc, int flags)
3323 {
3324     return vulkan_frames_init(dst_fc);
3325 }
3326
3327 AVVkFrame *av_vk_frame_alloc(void)
3328 {
3329     return av_mallocz(sizeof(AVVkFrame));
3330 }
3331
3332 const HWContextType ff_hwcontext_type_vulkan = {
3333     .type                   = AV_HWDEVICE_TYPE_VULKAN,
3334     .name                   = "Vulkan",
3335
3336     .device_hwctx_size      = sizeof(AVVulkanDeviceContext),
3337     .device_priv_size       = sizeof(VulkanDevicePriv),
3338     .frames_hwctx_size      = sizeof(AVVulkanFramesContext),
3339     .frames_priv_size       = sizeof(VulkanFramesPriv),
3340
3341     .device_init            = &vulkan_device_init,
3342     .device_create          = &vulkan_device_create,
3343     .device_derive          = &vulkan_device_derive,
3344
3345     .frames_get_constraints = &vulkan_frames_get_constraints,
3346     .frames_init            = vulkan_frames_init,
3347     .frames_get_buffer      = vulkan_get_buffer,
3348     .frames_uninit          = vulkan_frames_uninit,
3349
3350     .transfer_get_formats   = vulkan_transfer_get_formats,
3351     .transfer_data_to       = vulkan_transfer_data_to,
3352     .transfer_data_from     = vulkan_transfer_data_from,
3353
3354     .map_to                 = vulkan_map_to,
3355     .map_from               = vulkan_map_from,
3356     .frames_derive_to       = &vulkan_frames_derive_to,
3357
3358     .pix_fmts = (const enum AVPixelFormat []) {
3359         AV_PIX_FMT_VULKAN,
3360         AV_PIX_FMT_NONE
3361     },
3362 };