]> git.sesse.net Git - ffmpeg/blobdiff - libavutil/hwcontext_vulkan.c
hwcontext_vulkan: don't error on unavailable user-specified extensions
[ffmpeg] / libavutil / hwcontext_vulkan.c
index b62d0a6c6c421e7e33ff5f4b8b7e94ec509224d0..e3df428049de21bf828d27ea752c6d0b652e1386 100644 (file)
@@ -53,6 +53,10 @@ typedef struct VulkanDevicePriv {
     VkPhysicalDeviceProperties props;
     VkPhysicalDeviceMemoryProperties mprops;
 
+    /* Queues */
+    uint32_t qfs[3];
+    int num_qfs;
+
     /* Debug callback */
     VkDebugUtilsMessengerEXT debug_ctx;
 
@@ -81,7 +85,7 @@ typedef struct AVVkFrameInternal {
     CUexternalMemory ext_mem[AV_NUM_DATA_POINTERS];
     CUmipmappedArray cu_mma[AV_NUM_DATA_POINTERS];
     CUarray cu_array[AV_NUM_DATA_POINTERS];
-    CUexternalSemaphore cu_sem;
+    CUexternalSemaphore cu_sem[AV_NUM_DATA_POINTERS];
 #endif
 } AVVkFrameInternal;
 
@@ -370,10 +374,8 @@ static int check_extensions(AVHWDeviceContext *ctx, int dev, AVDictionary *opts,
                 av_log(ctx, AV_LOG_VERBOSE, "Using %s extension \"%s\"\n", mod, tstr);
                 ADD_VAL_TO_LIST(extension_names, extensions_found, token);
             } else {
-                av_log(ctx, AV_LOG_ERROR, "%s extension \"%s\" not found!\n",
+                av_log(ctx, AV_LOG_WARNING, "%s extension \"%s\" not found, excluding.\n",
                        mod, token);
-                err = AVERROR(EINVAL);
-                goto fail;
             }
             token = av_strtok(NULL, "+", &save);
         }
@@ -615,6 +617,7 @@ end:
 static int search_queue_families(AVHWDeviceContext *ctx, VkDeviceCreateInfo *cd)
 {
     uint32_t num;
+    float *weights;
     VkQueueFamilyProperties *qs = NULL;
     AVVulkanDeviceContext *hwctx = ctx->hwctx;
     int graph_index = -1, comp_index = -1, tx_index = -1;
@@ -653,41 +656,53 @@ static int search_queue_families(AVHWDeviceContext *ctx, VkDeviceCreateInfo *cd)
                  (i != comp_index), tx_index)
 
 #undef SEARCH_FLAGS
-#define QF_FLAGS(flags)                                                        \
-    ((flags) & VK_QUEUE_GRAPHICS_BIT      ) ? "(graphics) " : "",              \
-    ((flags) & VK_QUEUE_COMPUTE_BIT       ) ? "(compute) "  : "",              \
-    ((flags) & VK_QUEUE_TRANSFER_BIT      ) ? "(transfer) " : "",              \
-    ((flags) & VK_QUEUE_SPARSE_BINDING_BIT) ? "(sparse) "   : ""
-
-    av_log(ctx, AV_LOG_VERBOSE, "Using queue family %i for graphics, "
-           "flags: %s%s%s%s\n", graph_index, QF_FLAGS(qs[graph_index].queueFlags));
-
+#define ADD_QUEUE(fidx, graph, comp, tx)                                                 \
+    av_log(ctx, AV_LOG_VERBOSE, "Using queue family %i (total queues: %i) for %s%s%s\n", \
+           fidx, qs[fidx].queueCount, graph ? "graphics " : "",                          \
+           comp ? "compute " : "", tx ? "transfers " : "");                              \
+    av_log(ctx, AV_LOG_VERBOSE, "    QF %i flags: %s%s%s%s\n", fidx,                     \
+           ((qs[fidx].queueFlags) & VK_QUEUE_GRAPHICS_BIT) ? "(graphics) " : "",         \
+           ((qs[fidx].queueFlags) & VK_QUEUE_COMPUTE_BIT) ? "(compute) " : "",           \
+           ((qs[fidx].queueFlags) & VK_QUEUE_TRANSFER_BIT) ? "(transfers) " : "",        \
+           ((qs[fidx].queueFlags) & VK_QUEUE_SPARSE_BINDING_BIT) ? "(sparse) " : "");    \
+    pc[cd->queueCreateInfoCount].queueFamilyIndex = fidx;                                \
+    pc[cd->queueCreateInfoCount].queueCount = qs[fidx].queueCount;                       \
+    weights = av_malloc(qs[fidx].queueCount * sizeof(float));                            \
+    pc[cd->queueCreateInfoCount].pQueuePriorities = weights;                             \
+    if (!weights)                                                                        \
+        goto fail;                                                                       \
+    for (int i = 0; i < qs[fidx].queueCount; i++)                                        \
+        weights[i] = 1.0f;                                                               \
+    cd->queueCreateInfoCount++;
+
+    ADD_QUEUE(graph_index, 1, comp_index < 0, tx_index < 0 && comp_index < 0)
     hwctx->queue_family_index      = graph_index;
-    hwctx->queue_family_tx_index   = graph_index;
     hwctx->queue_family_comp_index = graph_index;
-
-    pc[cd->queueCreateInfoCount++].queueFamilyIndex = graph_index;
+    hwctx->queue_family_tx_index   = graph_index;
 
     if (comp_index != -1) {
-        av_log(ctx, AV_LOG_VERBOSE, "Using queue family %i for compute, "
-               "flags: %s%s%s%s\n", comp_index, QF_FLAGS(qs[comp_index].queueFlags));
-        hwctx->queue_family_tx_index                    = comp_index;
-        hwctx->queue_family_comp_index                  = comp_index;
-        pc[cd->queueCreateInfoCount++].queueFamilyIndex = comp_index;
+        ADD_QUEUE(comp_index, 0, 1, tx_index < 0)
+        hwctx->queue_family_tx_index   = comp_index;
+        hwctx->queue_family_comp_index = comp_index;
     }
 
     if (tx_index != -1) {
-        av_log(ctx, AV_LOG_VERBOSE, "Using queue family %i for transfers, "
-               "flags: %s%s%s%s\n", tx_index, QF_FLAGS(qs[tx_index].queueFlags));
-        hwctx->queue_family_tx_index                    = tx_index;
-        pc[cd->queueCreateInfoCount++].queueFamilyIndex = tx_index;
+        ADD_QUEUE(tx_index, 0, 0, 1)
+        hwctx->queue_family_tx_index = tx_index;
     }
 
-#undef QF_FLAGS
-
+#undef ADD_QUEUE
     av_free(qs);
 
     return 0;
+
+fail:
+    av_freep(&pc[0].pQueuePriorities);
+    av_freep(&pc[1].pQueuePriorities);
+    av_freep(&pc[2].pQueuePriorities);
+    av_free(qs);
+
+    return AVERROR(ENOMEM);
 }
 
 static int create_exec_ctx(AVHWDeviceContext *ctx, VulkanExecCtx *cmd,
@@ -790,15 +805,9 @@ static int vulkan_device_create_internal(AVHWDeviceContext *ctx,
     VulkanDevicePriv *p = ctx->internal->priv;
     AVVulkanDeviceContext *hwctx = ctx->hwctx;
     VkDeviceQueueCreateInfo queue_create_info[3] = {
-        {   .sType            = VK_STRUCTURE_TYPE_DEVICE_QUEUE_CREATE_INFO,
-            .pQueuePriorities = (float []){ 1.0f },
-            .queueCount       = 1, },
-        {   .sType            = VK_STRUCTURE_TYPE_DEVICE_QUEUE_CREATE_INFO,
-            .pQueuePriorities = (float []){ 1.0f },
-            .queueCount       = 1, },
-        {   .sType            = VK_STRUCTURE_TYPE_DEVICE_QUEUE_CREATE_INFO,
-            .pQueuePriorities = (float []){ 1.0f },
-            .queueCount       = 1, },
+        { .sType = VK_STRUCTURE_TYPE_DEVICE_QUEUE_CREATE_INFO, },
+        { .sType = VK_STRUCTURE_TYPE_DEVICE_QUEUE_CREATE_INFO, },
+        { .sType = VK_STRUCTURE_TYPE_DEVICE_QUEUE_CREATE_INFO, },
     };
 
     VkDeviceCreateInfo dev_info = {
@@ -832,12 +841,20 @@ static int vulkan_device_create_internal(AVHWDeviceContext *ctx,
         goto end;
 
     if ((err = check_extensions(ctx, 1, opts, &dev_info.ppEnabledExtensionNames,
-                                &dev_info.enabledExtensionCount, 0)))
+                                &dev_info.enabledExtensionCount, 0))) {
+        av_free((void *)queue_create_info[0].pQueuePriorities);
+        av_free((void *)queue_create_info[1].pQueuePriorities);
+        av_free((void *)queue_create_info[2].pQueuePriorities);
         goto end;
+    }
 
     ret = vkCreateDevice(hwctx->phys_dev, &dev_info, hwctx->alloc,
                          &hwctx->act_dev);
 
+    av_free((void *)queue_create_info[0].pQueuePriorities);
+    av_free((void *)queue_create_info[1].pQueuePriorities);
+    av_free((void *)queue_create_info[2].pQueuePriorities);
+
     if (ret != VK_SUCCESS) {
         av_log(ctx, AV_LOG_ERROR, "Device creation failure: %s\n",
                vk_ret2str(ret));
@@ -897,6 +914,14 @@ if (n >= queue_num) {
 
 #undef CHECK_QUEUE
 
+    p->qfs[p->num_qfs++] = hwctx->queue_family_index;
+    if ((hwctx->queue_family_tx_index != hwctx->queue_family_index) &&
+        (hwctx->queue_family_tx_index != hwctx->queue_family_comp_index))
+        p->qfs[p->num_qfs++] = hwctx->queue_family_tx_index;
+    if ((hwctx->queue_family_comp_index != hwctx->queue_family_index) &&
+        (hwctx->queue_family_comp_index != hwctx->queue_family_tx_index))
+        p->qfs[p->num_qfs++] = hwctx->queue_family_comp_index;
+
     /* Create exec context - if there's something invalid this will error out */
     err = create_exec_ctx(ctx, &p->cmd, hwctx->queue_family_tx_index);
     if (err)
@@ -1111,10 +1136,9 @@ static void vulkan_free_internal(AVVkFrameInternal *internal)
         AVCUDADeviceContextInternal *cu_internal = cuda_dev->internal;
         CudaFunctions *cu = cu_internal->cuda_dl;
 
-        if (internal->cu_sem)
-            CHECK_CU(cu->cuDestroyExternalSemaphore(internal->cu_sem));
-
         for (int i = 0; i < planes; i++) {
+            if (internal->cu_sem[i])
+                CHECK_CU(cu->cuDestroyExternalSemaphore(internal->cu_sem[i]));
             if (internal->cu_mma[i])
                 CHECK_CU(cu->cuMipmappedArrayDestroy(internal->cu_mma[i]));
             if (internal->ext_mem[i])
@@ -1140,10 +1164,9 @@ static void vulkan_frame_free(void *opaque, uint8_t *data)
     for (int i = 0; i < planes; i++) {
         vkDestroyImage(hwctx->act_dev, f->img[i], hwctx->alloc);
         vkFreeMemory(hwctx->act_dev, f->mem[i], hwctx->alloc);
+        vkDestroySemaphore(hwctx->act_dev, f->sem[i], hwctx->alloc);
     }
 
-    vkDestroySemaphore(hwctx->act_dev, f->sem, hwctx->alloc);
-
     av_free(f);
 }
 
@@ -1213,12 +1236,14 @@ static int alloc_bind_mem(AVHWFramesContext *hwfc, AVVkFrame *f,
 enum PrepMode {
     PREP_MODE_WRITE,
     PREP_MODE_RO_SHADER,
+    PREP_MODE_EXTERNAL_EXPORT,
 };
 
 static int prepare_frame(AVHWFramesContext *hwfc, VulkanExecCtx *ectx,
                          AVVkFrame *frame, enum PrepMode pmode)
 {
     VkResult ret;
+    uint32_t dst_qf;
     VkImageLayout new_layout;
     VkAccessFlags new_access;
     AVHWDeviceContext *ctx = hwfc->device_ctx;
@@ -1237,18 +1262,32 @@ static int prepare_frame(AVHWFramesContext *hwfc, VulkanExecCtx *ectx,
         .commandBufferCount   = 1,
         .pCommandBuffers      = &ectx->buf,
 
-        .pSignalSemaphores    = &frame->sem,
-        .signalSemaphoreCount = 1,
+        .pSignalSemaphores    = frame->sem,
+        .signalSemaphoreCount = planes,
     };
 
+    VkPipelineStageFlagBits wait_st[AV_NUM_DATA_POINTERS];
+    for (int i = 0; i < planes; i++)
+        wait_st[i] = VK_PIPELINE_STAGE_TOP_OF_PIPE_BIT;
+
     switch (pmode) {
     case PREP_MODE_WRITE:
         new_layout = VK_IMAGE_LAYOUT_TRANSFER_DST_OPTIMAL;
         new_access = VK_ACCESS_TRANSFER_WRITE_BIT;
+        dst_qf     = VK_QUEUE_FAMILY_IGNORED;
         break;
     case PREP_MODE_RO_SHADER:
         new_layout = VK_IMAGE_LAYOUT_TRANSFER_SRC_OPTIMAL;
         new_access = VK_ACCESS_TRANSFER_READ_BIT;
+        dst_qf     = VK_QUEUE_FAMILY_IGNORED;
+        break;
+    case PREP_MODE_EXTERNAL_EXPORT:
+        new_layout = VK_IMAGE_LAYOUT_GENERAL;
+        new_access = VK_ACCESS_MEMORY_READ_BIT | VK_ACCESS_MEMORY_WRITE_BIT;
+        dst_qf     = VK_QUEUE_FAMILY_EXTERNAL_KHR;
+        s_info.pWaitSemaphores = frame->sem;
+        s_info.pWaitDstStageMask = wait_st;
+        s_info.waitSemaphoreCount = planes;
         break;
     }
 
@@ -1266,7 +1305,7 @@ static int prepare_frame(AVHWFramesContext *hwfc, VulkanExecCtx *ectx,
         img_bar[i].oldLayout = frame->layout[i];
         img_bar[i].newLayout = new_layout;
         img_bar[i].srcQueueFamilyIndex = VK_QUEUE_FAMILY_IGNORED;
-        img_bar[i].dstQueueFamilyIndex = VK_QUEUE_FAMILY_IGNORED;
+        img_bar[i].dstQueueFamilyIndex = dst_qf;
         img_bar[i].image = frame->img[i];
         img_bar[i].subresourceRange.levelCount = 1;
         img_bar[i].subresourceRange.layerCount = 1;
@@ -1333,21 +1372,24 @@ static int create_frame(AVHWFramesContext *hwfc, AVVkFrame **frame,
         const int p_h = i > 0 ? AV_CEIL_RSHIFT(h, desc->log2_chroma_h) : h;
 
         VkImageCreateInfo image_create_info = {
-            .sType         = VK_STRUCTURE_TYPE_IMAGE_CREATE_INFO,
-            .pNext         = create_pnext,
-            .imageType     = VK_IMAGE_TYPE_2D,
-            .format        = img_fmts[i],
-            .extent.width  = p_w,
-            .extent.height = p_h,
-            .extent.depth  = 1,
-            .mipLevels     = 1,
-            .arrayLayers   = 1,
-            .flags         = VK_IMAGE_CREATE_ALIAS_BIT,
-            .tiling        = tiling,
-            .initialLayout = VK_IMAGE_LAYOUT_UNDEFINED,
-            .usage         = usage,
-            .sharingMode   = VK_SHARING_MODE_EXCLUSIVE,
-            .samples       = VK_SAMPLE_COUNT_1_BIT,
+            .sType                 = VK_STRUCTURE_TYPE_IMAGE_CREATE_INFO,
+            .pNext                 = create_pnext,
+            .imageType             = VK_IMAGE_TYPE_2D,
+            .format                = img_fmts[i],
+            .extent.width          = p_w,
+            .extent.height         = p_h,
+            .extent.depth          = 1,
+            .mipLevels             = 1,
+            .arrayLayers           = 1,
+            .flags                 = VK_IMAGE_CREATE_ALIAS_BIT,
+            .tiling                = tiling,
+            .initialLayout         = VK_IMAGE_LAYOUT_UNDEFINED,
+            .usage                 = usage,
+            .samples               = VK_SAMPLE_COUNT_1_BIT,
+            .pQueueFamilyIndices   = p->qfs,
+            .queueFamilyIndexCount = p->num_qfs,
+            .sharingMode           = p->num_qfs > 1 ? VK_SHARING_MODE_CONCURRENT :
+                                                      VK_SHARING_MODE_EXCLUSIVE,
         };
 
         ret = vkCreateImage(hwctx->act_dev, &image_create_info,
@@ -1359,19 +1401,19 @@ static int create_frame(AVHWFramesContext *hwfc, AVVkFrame **frame,
             goto fail;
         }
 
+        /* Create semaphore */
+        ret = vkCreateSemaphore(hwctx->act_dev, &sem_spawn,
+                                hwctx->alloc, &f->sem[i]);
+        if (ret != VK_SUCCESS) {
+            av_log(hwctx, AV_LOG_ERROR, "Failed to create semaphore: %s\n",
+                   vk_ret2str(ret));
+            return AVERROR_EXTERNAL;
+        }
+
         f->layout[i] = image_create_info.initialLayout;
         f->access[i] = 0x0;
     }
 
-    /* Create semaphore */
-    ret = vkCreateSemaphore(hwctx->act_dev, &sem_spawn,
-                            hwctx->alloc, &f->sem);
-    if (ret != VK_SUCCESS) {
-        av_log(hwctx, AV_LOG_ERROR, "Failed to create semaphore: %s\n",
-               vk_ret2str(ret));
-        return AVERROR_EXTERNAL;
-    }
-
     f->flags     = 0x0;
     f->tiling    = tiling;
 
@@ -1693,10 +1735,9 @@ static void vulkan_unmap_from(AVHWFramesContext *hwfc, HWMapDescriptor *hwmap)
     for (int i = 0; i < planes; i++) {
         vkDestroyImage(hwctx->act_dev, map->frame->img[i], hwctx->alloc);
         vkFreeMemory(hwctx->act_dev, map->frame->mem[i], hwctx->alloc);
+        vkDestroySemaphore(hwctx->act_dev, map->frame->sem[i], hwctx->alloc);
     }
 
-    vkDestroySemaphore(hwctx->act_dev, map->frame->sem, hwctx->alloc);
-
     av_freep(&map->frame);
 }
 
@@ -1740,9 +1781,6 @@ static int vulkan_map_from_drm_frame_desc(AVHWFramesContext *hwfc, AVVkFrame **f
     VkBindImageMemoryInfo bind_info[AV_NUM_DATA_POINTERS] = { 0 };
     VkBindImagePlaneMemoryInfo plane_info[AV_NUM_DATA_POINTERS] = { 0 };
     VkExternalMemoryHandleTypeFlagBits htype = VK_EXTERNAL_MEMORY_HANDLE_TYPE_DMA_BUF_BIT_EXT;
-    VkSemaphoreCreateInfo sem_spawn = {
-        .sType = VK_STRUCTURE_TYPE_SEMAPHORE_CREATE_INFO,
-    };
 
     VK_LOAD_PFN(hwctx->inst, vkGetMemoryFdPropertiesKHR);
 
@@ -1816,26 +1854,32 @@ static int vulkan_map_from_drm_frame_desc(AVHWFramesContext *hwfc, AVVkFrame **f
             .handleTypes = htype,
         };
 
+        VkSemaphoreCreateInfo sem_spawn = {
+            .sType = VK_STRUCTURE_TYPE_SEMAPHORE_CREATE_INFO,
+        };
+
         const int p_w = i > 0 ? AV_CEIL_RSHIFT(hwfc->width, fmt_desc->log2_chroma_w) : hwfc->width;
         const int p_h = i > 0 ? AV_CEIL_RSHIFT(hwfc->height, fmt_desc->log2_chroma_h) : hwfc->height;
 
         VkImageCreateInfo image_create_info = {
-            .sType         = VK_STRUCTURE_TYPE_IMAGE_CREATE_INFO,
-            .pNext         = &einfo,
-            .imageType     = VK_IMAGE_TYPE_2D,
-            .format        = drm_to_vulkan_fmt(desc->layers[i].format),
-            .extent.width  = p_w,
-            .extent.height = p_h,
-            .extent.depth  = 1,
-            .mipLevels     = 1,
-            .arrayLayers   = 1,
-            .flags         = VK_IMAGE_CREATE_ALIAS_BIT |
-                             (signal_p ? VK_IMAGE_CREATE_DISJOINT_BIT : 0x0),
-            .tiling        = f->tiling,
-            .initialLayout = VK_IMAGE_LAYOUT_UNDEFINED, /* specs say so */
-            .usage         = DEFAULT_USAGE_FLAGS,
-            .sharingMode   = VK_SHARING_MODE_EXCLUSIVE,
-            .samples       = VK_SAMPLE_COUNT_1_BIT,
+            .sType                 = VK_STRUCTURE_TYPE_IMAGE_CREATE_INFO,
+            .pNext                 = &einfo,
+            .imageType             = VK_IMAGE_TYPE_2D,
+            .format                = drm_to_vulkan_fmt(desc->layers[i].format),
+            .extent.width          = p_w,
+            .extent.height         = p_h,
+            .extent.depth          = 1,
+            .mipLevels             = 1,
+            .arrayLayers           = 1,
+            .flags                 = VK_IMAGE_CREATE_ALIAS_BIT,
+            .tiling                = f->tiling,
+            .initialLayout         = VK_IMAGE_LAYOUT_UNDEFINED, /* specs say so */
+            .usage                 = DEFAULT_USAGE_FLAGS,
+            .samples               = VK_SAMPLE_COUNT_1_BIT,
+            .pQueueFamilyIndices   = p->qfs,
+            .queueFamilyIndexCount = p->num_qfs,
+            .sharingMode           = p->num_qfs > 1 ? VK_SHARING_MODE_CONCURRENT :
+                                                      VK_SHARING_MODE_EXCLUSIVE,
         };
 
         for (int j = 0; j < planes; j++) {
@@ -1856,6 +1900,19 @@ static int vulkan_map_from_drm_frame_desc(AVHWFramesContext *hwfc, AVVkFrame **f
             goto fail;
         }
 
+        ret = vkCreateSemaphore(hwctx->act_dev, &sem_spawn,
+                                hwctx->alloc, &f->sem[i]);
+        if (ret != VK_SUCCESS) {
+            av_log(hwctx, AV_LOG_ERROR, "Failed to create semaphore: %s\n",
+                   vk_ret2str(ret));
+            return AVERROR_EXTERNAL;
+        }
+
+        /* We'd import a semaphore onto the one we created using
+         * vkImportSemaphoreFdKHR but unfortunately neither DRM nor VAAPI
+         * offer us anything we could import and sync with, so instead
+         * just signal the semaphore we created. */
+
         f->layout[i] = image_create_info.initialLayout;
         f->access[i] = 0x0;
 
@@ -1876,19 +1933,6 @@ static int vulkan_map_from_drm_frame_desc(AVHWFramesContext *hwfc, AVVkFrame **f
         }
     }
 
-    ret = vkCreateSemaphore(hwctx->act_dev, &sem_spawn,
-                            hwctx->alloc, &f->sem);
-    if (ret != VK_SUCCESS) {
-        av_log(hwctx, AV_LOG_ERROR, "Failed to create semaphore: %s\n",
-               vk_ret2str(ret));
-        return AVERROR_EXTERNAL;
-    }
-
-    /* We'd import a semaphore onto the one we created using
-     * vkImportSemaphoreFdKHR but unfortunately neither DRM nor VAAPI
-     * offer us anything we could import and sync with, so instead
-     * just signal the semaphore we created. */
-
     /* Bind the allocated memory to the images */
     ret = vkBindImageMemory2(hwctx->act_dev, bind_counts, bind_info);
     if (ret != VK_SUCCESS) {
@@ -1909,11 +1953,12 @@ static int vulkan_map_from_drm_frame_desc(AVHWFramesContext *hwfc, AVVkFrame **f
     return 0;
 
 fail:
-    for (int i = 0; i < desc->nb_layers; i++)
+    for (int i = 0; i < desc->nb_layers; i++) {
         vkDestroyImage(hwctx->act_dev, f->img[i], hwctx->alloc);
+        vkDestroySemaphore(hwctx->act_dev, f->sem[i], hwctx->alloc);
+    }
     for (int i = 0; i < desc->nb_objects; i++)
         vkFreeMemory(hwctx->act_dev, f->mem[i], hwctx->alloc);
-    vkDestroySemaphore(hwctx->act_dev, f->sem, hwctx->alloc);
 
     av_free(f);
 
@@ -2023,15 +2068,6 @@ static int vulkan_export_to_cuda(AVHWFramesContext *hwfc,
 
     dst_int = dst_f->internal;
     if (!dst_int || !dst_int->cuda_fc_ref) {
-        VkSemaphoreGetFdInfoKHR sem_export = {
-            .sType = VK_STRUCTURE_TYPE_SEMAPHORE_GET_FD_INFO_KHR,
-            .semaphore = dst_f->sem,
-            .handleType = VK_EXTERNAL_SEMAPHORE_HANDLE_TYPE_OPAQUE_FD_BIT,
-        };
-        CUDA_EXTERNAL_SEMAPHORE_HANDLE_DESC ext_sem_desc = {
-            .type = CU_EXTERNAL_SEMAPHORE_HANDLE_TYPE_OPAQUE_FD,
-        };
-
         if (!dst_f->internal)
             dst_f->internal = dst_int = av_mallocz(sizeof(*dst_f->internal));
 
@@ -2070,6 +2106,14 @@ static int vulkan_export_to_cuda(AVHWFramesContext *hwfc,
                 .memory     = dst_f->mem[i],
                 .handleType = VK_EXTERNAL_MEMORY_HANDLE_TYPE_OPAQUE_FD_BIT_KHR,
             };
+            VkSemaphoreGetFdInfoKHR sem_export = {
+                .sType = VK_STRUCTURE_TYPE_SEMAPHORE_GET_FD_INFO_KHR,
+                .semaphore = dst_f->sem[i],
+                .handleType = VK_EXTERNAL_SEMAPHORE_HANDLE_TYPE_OPAQUE_FD_BIT,
+            };
+            CUDA_EXTERNAL_SEMAPHORE_HANDLE_DESC ext_sem_desc = {
+                .type = CU_EXTERNAL_SEMAPHORE_HANDLE_TYPE_OPAQUE_FD,
+            };
 
             ret = pfn_vkGetMemoryFdKHR(hwctx->act_dev, &export_info,
                                        &ext_desc.handle.fd);
@@ -2099,22 +2143,22 @@ static int vulkan_export_to_cuda(AVHWFramesContext *hwfc,
                 err = AVERROR_EXTERNAL;
                 goto fail;
             }
-        }
 
-        ret = pfn_vkGetSemaphoreFdKHR(hwctx->act_dev, &sem_export,
-                                      &ext_sem_desc.handle.fd);
-        if (ret != VK_SUCCESS) {
-            av_log(ctx, AV_LOG_ERROR, "Failed to export semaphore: %s\n",
-                   vk_ret2str(ret));
-            err = AVERROR_EXTERNAL;
-            goto fail;
-        }
+            ret = pfn_vkGetSemaphoreFdKHR(hwctx->act_dev, &sem_export,
+                                          &ext_sem_desc.handle.fd);
+            if (ret != VK_SUCCESS) {
+                av_log(ctx, AV_LOG_ERROR, "Failed to export semaphore: %s\n",
+                       vk_ret2str(ret));
+                err = AVERROR_EXTERNAL;
+                goto fail;
+            }
 
-        ret = CHECK_CU(cu->cuImportExternalSemaphore(&dst_int->cu_sem,
-                                                     &ext_sem_desc));
-        if (ret < 0) {
-            err = AVERROR_EXTERNAL;
-            goto fail;
+            ret = CHECK_CU(cu->cuImportExternalSemaphore(&dst_int->cu_sem[i],
+                                                         &ext_sem_desc));
+            if (ret < 0) {
+                err = AVERROR_EXTERNAL;
+                goto fail;
+            }
         }
     }
 
@@ -2140,8 +2184,8 @@ static int vulkan_transfer_data_from_cuda(AVHWFramesContext *hwfc,
     AVCUDADeviceContext *cuda_dev = cuda_cu->hwctx;
     AVCUDADeviceContextInternal *cu_internal = cuda_dev->internal;
     CudaFunctions *cu = cu_internal->cuda_dl;
-    CUDA_EXTERNAL_SEMAPHORE_WAIT_PARAMS s_w_par = { 0 };
-    CUDA_EXTERNAL_SEMAPHORE_SIGNAL_PARAMS s_s_par = { 0 };
+    CUDA_EXTERNAL_SEMAPHORE_WAIT_PARAMS s_w_par[AV_NUM_DATA_POINTERS] = { 0 };
+    CUDA_EXTERNAL_SEMAPHORE_SIGNAL_PARAMS s_s_par[AV_NUM_DATA_POINTERS] = { 0 };
 
     ret = CHECK_CU(cu->cuCtxPushCurrent(cuda_dev->cuda_ctx));
     if (ret < 0) {
@@ -2157,8 +2201,8 @@ static int vulkan_transfer_data_from_cuda(AVHWFramesContext *hwfc,
     }
     dst_int = dst_f->internal;
 
-    ret = CHECK_CU(cu->cuWaitExternalSemaphoresAsync(&dst_int->cu_sem, &s_w_par,
-                                                     1, cuda_dev->stream));
+    ret = CHECK_CU(cu->cuWaitExternalSemaphoresAsync(dst_int->cu_sem, s_w_par,
+                                                     planes, cuda_dev->stream));
     if (ret < 0) {
         err = AVERROR_EXTERNAL;
         goto fail;
@@ -2186,8 +2230,8 @@ static int vulkan_transfer_data_from_cuda(AVHWFramesContext *hwfc,
         }
     }
 
-    ret = CHECK_CU(cu->cuSignalExternalSemaphoresAsync(&dst_int->cu_sem, &s_s_par,
-                                                       1, cuda_dev->stream));
+    ret = CHECK_CU(cu->cuSignalExternalSemaphoresAsync(dst_int->cu_sem, s_s_par,
+                                                       planes, cuda_dev->stream));
     if (ret < 0) {
         err = AVERROR_EXTERNAL;
         goto fail;
@@ -2271,6 +2315,10 @@ static int vulkan_map_to_drm(AVHWFramesContext *hwfc, AVFrame *dst,
     if (!drm_desc)
         return AVERROR(ENOMEM);
 
+    err = prepare_frame(hwfc, &p->cmd, f, PREP_MODE_EXTERNAL_EXPORT);
+    if (err < 0)
+        goto end;
+
     err = ff_hwframe_map_create(src->hw_frames_ctx, dst, src, &vulkan_unmap_to_drm, drm_desc);
     if (err < 0)
         goto end;
@@ -2327,7 +2375,7 @@ static int vulkan_map_to_drm(AVHWFramesContext *hwfc, AVFrame *dst,
 
         drm_desc->layers[i].planes[0].object_index = FFMIN(i, drm_desc->nb_objects - 1);
 
-        if (f->tiling != VK_IMAGE_TILING_OPTIMAL)
+        if (f->tiling == VK_IMAGE_TILING_OPTIMAL)
             continue;
 
         vkGetImageSubresourceLayout(hwctx->act_dev, f->img[i], &sub, &layout);
@@ -2563,11 +2611,11 @@ static int transfer_image_buf(AVHWDeviceContext *ctx, AVVkFrame *frame,
         .sType                = VK_STRUCTURE_TYPE_SUBMIT_INFO,
         .commandBufferCount   = 1,
         .pCommandBuffers      = &s->cmd.buf,
-        .pSignalSemaphores    = &frame->sem,
-        .pWaitSemaphores      = &frame->sem,
+        .pSignalSemaphores    = frame->sem,
+        .pWaitSemaphores      = frame->sem,
         .pWaitDstStageMask    = sem_wait_dst,
-        .signalSemaphoreCount = 1,
-        .waitSemaphoreCount   = 1,
+        .signalSemaphoreCount = planes,
+        .waitSemaphoreCount   = planes,
     };
 
     ret = vkBeginCommandBuffer(s->cmd.buf, &cmd_start);