]> git.sesse.net Git - ffmpeg/blobdiff - libavutil/hwcontext_vulkan.c
avcodec/avcodec: Use avcodec_close() on avcodec_open2() failure
[ffmpeg] / libavutil / hwcontext_vulkan.c
index f1ee9693e18d4f0608db72a53446009eba55379a..48327ad98f965073a20084daa0b4a02b3fa05e0d 100644 (file)
@@ -138,38 +138,75 @@ typedef struct AVVkFrameInternal {
 
 static const struct {
     enum AVPixelFormat pixfmt;
-    const VkFormat vkfmts[3];
+    const VkFormat vkfmts[4];
 } vk_pixfmt_map[] = {
     { AV_PIX_FMT_GRAY8,   { VK_FORMAT_R8_UNORM } },
     { AV_PIX_FMT_GRAY16,  { VK_FORMAT_R16_UNORM } },
     { AV_PIX_FMT_GRAYF32, { VK_FORMAT_R32_SFLOAT } },
 
     { AV_PIX_FMT_NV12, { VK_FORMAT_R8_UNORM, VK_FORMAT_R8G8_UNORM } },
+    { AV_PIX_FMT_NV21, { VK_FORMAT_R8_UNORM, VK_FORMAT_R8G8_UNORM } },
     { AV_PIX_FMT_P010, { VK_FORMAT_R16_UNORM, VK_FORMAT_R16G16_UNORM } },
     { AV_PIX_FMT_P016, { VK_FORMAT_R16_UNORM, VK_FORMAT_R16G16_UNORM } },
 
-    { AV_PIX_FMT_YUV420P, { VK_FORMAT_R8_UNORM, VK_FORMAT_R8_UNORM, VK_FORMAT_R8_UNORM } },
-    { AV_PIX_FMT_YUV422P, { VK_FORMAT_R8_UNORM, VK_FORMAT_R8_UNORM, VK_FORMAT_R8_UNORM } },
-    { AV_PIX_FMT_YUV444P, { VK_FORMAT_R8_UNORM, VK_FORMAT_R8_UNORM, VK_FORMAT_R8_UNORM } },
+    { AV_PIX_FMT_NV16, { VK_FORMAT_R8_UNORM, VK_FORMAT_R8G8_UNORM } },
 
+    { AV_PIX_FMT_NV24, { VK_FORMAT_R8_UNORM, VK_FORMAT_R8G8_UNORM } },
+    { AV_PIX_FMT_NV42, { VK_FORMAT_R8_UNORM, VK_FORMAT_R8G8_UNORM } },
+
+    { AV_PIX_FMT_YUV420P,   {  VK_FORMAT_R8_UNORM,  VK_FORMAT_R8_UNORM,  VK_FORMAT_R8_UNORM } },
+    { AV_PIX_FMT_YUV420P10, { VK_FORMAT_R16_UNORM, VK_FORMAT_R16_UNORM, VK_FORMAT_R16_UNORM } },
+    { AV_PIX_FMT_YUV420P12, { VK_FORMAT_R16_UNORM, VK_FORMAT_R16_UNORM, VK_FORMAT_R16_UNORM } },
     { AV_PIX_FMT_YUV420P16, { VK_FORMAT_R16_UNORM, VK_FORMAT_R16_UNORM, VK_FORMAT_R16_UNORM } },
+
+    { AV_PIX_FMT_YUV422P,   {  VK_FORMAT_R8_UNORM,  VK_FORMAT_R8_UNORM,  VK_FORMAT_R8_UNORM } },
+    { AV_PIX_FMT_YUV422P10, { VK_FORMAT_R16_UNORM, VK_FORMAT_R16_UNORM, VK_FORMAT_R16_UNORM } },
+    { AV_PIX_FMT_YUV422P12, { VK_FORMAT_R16_UNORM, VK_FORMAT_R16_UNORM, VK_FORMAT_R16_UNORM } },
     { AV_PIX_FMT_YUV422P16, { VK_FORMAT_R16_UNORM, VK_FORMAT_R16_UNORM, VK_FORMAT_R16_UNORM } },
+
+    { AV_PIX_FMT_YUV444P,   {  VK_FORMAT_R8_UNORM,  VK_FORMAT_R8_UNORM,  VK_FORMAT_R8_UNORM } },
+    { AV_PIX_FMT_YUV444P10, { VK_FORMAT_R16_UNORM, VK_FORMAT_R16_UNORM, VK_FORMAT_R16_UNORM } },
+    { AV_PIX_FMT_YUV444P12, { VK_FORMAT_R16_UNORM, VK_FORMAT_R16_UNORM, VK_FORMAT_R16_UNORM } },
     { AV_PIX_FMT_YUV444P16, { VK_FORMAT_R16_UNORM, VK_FORMAT_R16_UNORM, VK_FORMAT_R16_UNORM } },
 
-    { AV_PIX_FMT_ABGR,   { VK_FORMAT_A8B8G8R8_UNORM_PACK32 } },
+    { AV_PIX_FMT_YUVA420P,   {  VK_FORMAT_R8_UNORM,  VK_FORMAT_R8_UNORM,  VK_FORMAT_R8_UNORM,  VK_FORMAT_R8_UNORM } },
+    { AV_PIX_FMT_YUVA420P10, { VK_FORMAT_R16_UNORM, VK_FORMAT_R16_UNORM, VK_FORMAT_R16_UNORM, VK_FORMAT_R16_UNORM } },
+    /* There is no AV_PIX_FMT_YUVA420P12 */
+    { AV_PIX_FMT_YUVA420P16, { VK_FORMAT_R16_UNORM, VK_FORMAT_R16_UNORM, VK_FORMAT_R16_UNORM, VK_FORMAT_R16_UNORM } },
+
+    { AV_PIX_FMT_YUVA422P,   {  VK_FORMAT_R8_UNORM,  VK_FORMAT_R8_UNORM,  VK_FORMAT_R8_UNORM,  VK_FORMAT_R8_UNORM } },
+    { AV_PIX_FMT_YUVA422P10, { VK_FORMAT_R16_UNORM, VK_FORMAT_R16_UNORM, VK_FORMAT_R16_UNORM, VK_FORMAT_R16_UNORM } },
+    { AV_PIX_FMT_YUVA422P12, { VK_FORMAT_R16_UNORM, VK_FORMAT_R16_UNORM, VK_FORMAT_R16_UNORM, VK_FORMAT_R16_UNORM } },
+    { AV_PIX_FMT_YUVA422P16, { VK_FORMAT_R16_UNORM, VK_FORMAT_R16_UNORM, VK_FORMAT_R16_UNORM, VK_FORMAT_R16_UNORM } },
+
+    { AV_PIX_FMT_YUVA444P,   {  VK_FORMAT_R8_UNORM,  VK_FORMAT_R8_UNORM,  VK_FORMAT_R8_UNORM,  VK_FORMAT_R8_UNORM } },
+    { AV_PIX_FMT_YUVA444P10, { VK_FORMAT_R16_UNORM, VK_FORMAT_R16_UNORM, VK_FORMAT_R16_UNORM, VK_FORMAT_R16_UNORM } },
+    { AV_PIX_FMT_YUVA444P12, { VK_FORMAT_R16_UNORM, VK_FORMAT_R16_UNORM, VK_FORMAT_R16_UNORM, VK_FORMAT_R16_UNORM } },
+    { AV_PIX_FMT_YUVA444P16, { VK_FORMAT_R16_UNORM, VK_FORMAT_R16_UNORM, VK_FORMAT_R16_UNORM, VK_FORMAT_R16_UNORM } },
+
     { AV_PIX_FMT_BGRA,   { VK_FORMAT_B8G8R8A8_UNORM } },
     { AV_PIX_FMT_RGBA,   { VK_FORMAT_R8G8B8A8_UNORM } },
     { AV_PIX_FMT_RGB24,  { VK_FORMAT_R8G8B8_UNORM } },
     { AV_PIX_FMT_BGR24,  { VK_FORMAT_B8G8R8_UNORM } },
     { AV_PIX_FMT_RGB48,  { VK_FORMAT_R16G16B16_UNORM } },
     { AV_PIX_FMT_RGBA64, { VK_FORMAT_R16G16B16A16_UNORM } },
+    { AV_PIX_FMT_RGBA64, { VK_FORMAT_R16G16B16A16_UNORM } },
     { AV_PIX_FMT_RGB565, { VK_FORMAT_R5G6B5_UNORM_PACK16 } },
     { AV_PIX_FMT_BGR565, { VK_FORMAT_B5G6R5_UNORM_PACK16 } },
     { AV_PIX_FMT_BGR0,   { VK_FORMAT_B8G8R8A8_UNORM } },
-    { AV_PIX_FMT_0BGR,   { VK_FORMAT_A8B8G8R8_UNORM_PACK32 } },
     { AV_PIX_FMT_RGB0,   { VK_FORMAT_R8G8B8A8_UNORM } },
 
+    /* Lower priority as there's an endianess-dependent overlap between these
+     * and rgba/bgr0, and PACK32 formats are more limited */
+    { AV_PIX_FMT_BGR32,  { VK_FORMAT_A8B8G8R8_UNORM_PACK32 } },
+    { AV_PIX_FMT_0BGR32, { VK_FORMAT_A8B8G8R8_UNORM_PACK32 } },
+
+    { AV_PIX_FMT_X2RGB10, { VK_FORMAT_A2R10G10B10_UNORM_PACK32 } },
+
+    { AV_PIX_FMT_GBRAP, { VK_FORMAT_R8_UNORM, VK_FORMAT_R8_UNORM, VK_FORMAT_R8_UNORM, VK_FORMAT_R8_UNORM } },
+    { AV_PIX_FMT_GBRAP16, { VK_FORMAT_R16_UNORM, VK_FORMAT_R16_UNORM, VK_FORMAT_R16_UNORM, VK_FORMAT_R16_UNORM } },
     { AV_PIX_FMT_GBRPF32, { VK_FORMAT_R32_SFLOAT, VK_FORMAT_R32_SFLOAT, VK_FORMAT_R32_SFLOAT } },
+    { AV_PIX_FMT_GBRAPF32, { VK_FORMAT_R32_SFLOAT, VK_FORMAT_R32_SFLOAT, VK_FORMAT_R32_SFLOAT, VK_FORMAT_R32_SFLOAT } },
 };
 
 const VkFormat *av_vkfmt_from_pixfmt(enum AVPixelFormat p)
@@ -210,6 +247,8 @@ enum VulkanExtensions {
     EXT_EXTERNAL_FD_MEMORY     = 1ULL <<  2, /* VK_KHR_external_memory_fd */
     EXT_EXTERNAL_FD_SEM        = 1ULL <<  3, /* VK_KHR_external_semaphore_fd */
     EXT_EXTERNAL_HOST_MEMORY   = 1ULL <<  4, /* VK_EXT_external_memory_host */
+    EXT_PUSH_DESCRIPTORS       = 1ULL <<  5, /* VK_KHR_push_descriptor */
+    EXT_HOST_QUERY_RESET       = 1ULL <<  6, /* VK_EXT_host_query_reset */
 
     EXT_NO_FLAG                = 1ULL << 63,
 };
@@ -229,6 +268,8 @@ static const VulkanOptExtension optional_device_exts[] = {
     { VK_EXT_IMAGE_DRM_FORMAT_MODIFIER_EXTENSION_NAME,        EXT_DRM_MODIFIER_FLAGS,     },
     { VK_KHR_EXTERNAL_SEMAPHORE_FD_EXTENSION_NAME,            EXT_EXTERNAL_FD_SEM,        },
     { VK_EXT_EXTERNAL_MEMORY_HOST_EXTENSION_NAME,             EXT_EXTERNAL_HOST_MEMORY,   },
+    { VK_KHR_PUSH_DESCRIPTOR_EXTENSION_NAME,                  EXT_PUSH_DESCRIPTORS,       },
+    { VK_EXT_HOST_QUERY_RESET_EXTENSION_NAME,                 EXT_HOST_QUERY_RESET,       },
 };
 
 /* Converts return values to strings */
@@ -1007,6 +1048,8 @@ static int vulkan_device_create_internal(AVHWDeviceContext *ctx,
     vkGetPhysicalDeviceFeatures(hwctx->phys_dev, &dev_features);
 #define COPY_FEATURE(DST, NAME) (DST).features.NAME = dev_features.NAME;
     COPY_FEATURE(hwctx->device_features, shaderImageGatherExtended)
+    COPY_FEATURE(hwctx->device_features, shaderStorageImageReadWithoutFormat)
+    COPY_FEATURE(hwctx->device_features, shaderStorageImageWriteWithoutFormat)
     COPY_FEATURE(hwctx->device_features, fragmentStoresAndAtomics)
     COPY_FEATURE(hwctx->device_features, vertexPipelineStoresAndAtomics)
     COPY_FEATURE(hwctx->device_features, shaderInt64)
@@ -1509,6 +1552,23 @@ static int prepare_frame(AVHWFramesContext *hwfc, VulkanExecCtx *ectx,
     return submit_exec_ctx(hwfc, ectx, &s_info, 0);
 }
 
+static inline void get_plane_wh(int *w, int *h, enum AVPixelFormat format,
+                                int frame_w, int frame_h, int plane)
+{
+    const AVPixFmtDescriptor *desc = av_pix_fmt_desc_get(format);
+
+    /* Currently always true unless gray + alpha support is added */
+    if (!plane || (plane == 3) || desc->flags & AV_PIX_FMT_FLAG_RGB ||
+        !(desc->flags & AV_PIX_FMT_FLAG_PLANAR)) {
+        *w = frame_w;
+        *h = frame_h;
+        return;
+    }
+
+    *w = AV_CEIL_RSHIFT(frame_w, desc->log2_chroma_w);
+    *h = AV_CEIL_RSHIFT(frame_h, desc->log2_chroma_h);
+}
+
 static int create_frame(AVHWFramesContext *hwfc, AVVkFrame **frame,
                         VkImageTiling tiling, VkImageUsageFlagBits usage,
                         void *create_pnext)
@@ -1540,19 +1600,11 @@ static int create_frame(AVHWFramesContext *hwfc, AVVkFrame **frame,
 
     /* Create the images */
     for (int i = 0; i < planes; i++) {
-        const AVPixFmtDescriptor *desc = av_pix_fmt_desc_get(format);
-        int w = hwfc->width;
-        int h = hwfc->height;
-        const int p_w = i > 0 ? AV_CEIL_RSHIFT(w, desc->log2_chroma_w) : w;
-        const int p_h = i > 0 ? AV_CEIL_RSHIFT(h, desc->log2_chroma_h) : h;
-
-        VkImageCreateInfo image_create_info = {
+        VkImageCreateInfo create_info = {
             .sType                 = VK_STRUCTURE_TYPE_IMAGE_CREATE_INFO,
             .pNext                 = create_pnext,
             .imageType             = VK_IMAGE_TYPE_2D,
             .format                = img_fmts[i],
-            .extent.width          = p_w,
-            .extent.height         = p_h,
             .extent.depth          = 1,
             .mipLevels             = 1,
             .arrayLayers           = 1,
@@ -1567,7 +1619,10 @@ static int create_frame(AVHWFramesContext *hwfc, AVVkFrame **frame,
                                                       VK_SHARING_MODE_EXCLUSIVE,
         };
 
-        ret = vkCreateImage(hwctx->act_dev, &image_create_info,
+        get_plane_wh(&create_info.extent.width, &create_info.extent.height,
+                     format, hwfc->width, hwfc->height, i);
+
+        ret = vkCreateImage(hwctx->act_dev, &create_info,
                             hwctx->alloc, &f->img[i]);
         if (ret != VK_SUCCESS) {
             av_log(ctx, AV_LOG_ERROR, "Image creation failure: %s\n",
@@ -1585,7 +1640,7 @@ static int create_frame(AVHWFramesContext *hwfc, AVVkFrame **frame,
             return AVERROR_EXTERNAL;
         }
 
-        f->layout[i] = image_create_info.initialLayout;
+        f->layout[i] = create_info.initialLayout;
         f->access[i] = 0x0;
     }
 
@@ -1638,7 +1693,7 @@ static void try_export_flags(AVHWFramesContext *hwfc,
     }
 }
 
-static AVBufferRef *vulkan_pool_alloc(void *opaque, int size)
+static AVBufferRef *vulkan_pool_alloc(void *opaque, size_t size)
 {
     int err;
     AVVkFrame *f;
@@ -1954,7 +2009,7 @@ static inline VkFormat drm_to_vulkan_fmt(uint32_t drm_fourcc)
 }
 
 static int vulkan_map_from_drm_frame_desc(AVHWFramesContext *hwfc, AVVkFrame **frame,
-                                          AVDRMFrameDescriptor *desc)
+                                          const AVFrame *src)
 {
     int err = 0;
     VkResult ret;
@@ -1965,7 +2020,7 @@ static int vulkan_map_from_drm_frame_desc(AVHWFramesContext *hwfc, AVVkFrame **f
     VulkanDevicePriv *p = ctx->internal->priv;
     VulkanFramesPriv *fp = hwfc->internal->priv;
     AVVulkanFramesContext *frames_hwctx = hwfc->hwctx;
-    const AVPixFmtDescriptor *fmt_desc = av_pix_fmt_desc_get(hwfc->sw_format);
+    const AVDRMFrameDescriptor *desc = (AVDRMFrameDescriptor *)src->data[0];
     const int has_modifiers = !!(p->extensions & EXT_DRM_MODIFIER_FLAGS);
     VkSubresourceLayout plane_data[AV_NUM_DATA_POINTERS] = { 0 };
     VkBindImageMemoryInfo bind_info[AV_NUM_DATA_POINTERS] = { 0 };
@@ -2011,16 +2066,11 @@ static int vulkan_map_from_drm_frame_desc(AVHWFramesContext *hwfc, AVVkFrame **f
             .sType = VK_STRUCTURE_TYPE_SEMAPHORE_CREATE_INFO,
         };
 
-        const int p_w = i > 0 ? AV_CEIL_RSHIFT(hwfc->width, fmt_desc->log2_chroma_w) : hwfc->width;
-        const int p_h = i > 0 ? AV_CEIL_RSHIFT(hwfc->height, fmt_desc->log2_chroma_h) : hwfc->height;
-
-        VkImageCreateInfo image_create_info = {
+        VkImageCreateInfo create_info = {
             .sType                 = VK_STRUCTURE_TYPE_IMAGE_CREATE_INFO,
             .pNext                 = &einfo,
             .imageType             = VK_IMAGE_TYPE_2D,
             .format                = drm_to_vulkan_fmt(desc->layers[i].format),
-            .extent.width          = p_w,
-            .extent.height         = p_h,
             .extent.depth          = 1,
             .mipLevels             = 1,
             .arrayLayers           = 1,
@@ -2035,6 +2085,9 @@ static int vulkan_map_from_drm_frame_desc(AVHWFramesContext *hwfc, AVVkFrame **f
                                                       VK_SHARING_MODE_EXCLUSIVE,
         };
 
+        get_plane_wh(&create_info.extent.width, &create_info.extent.height,
+                     hwfc->sw_format, src->width, src->height, i);
+
         for (int j = 0; j < planes; j++) {
             plane_data[j].offset     = desc->layers[i].planes[j].offset;
             plane_data[j].rowPitch   = desc->layers[i].planes[j].pitch;
@@ -2044,7 +2097,7 @@ static int vulkan_map_from_drm_frame_desc(AVHWFramesContext *hwfc, AVVkFrame **f
         }
 
         /* Create image */
-        ret = vkCreateImage(hwctx->act_dev, &image_create_info,
+        ret = vkCreateImage(hwctx->act_dev, &create_info,
                             hwctx->alloc, &f->img[i]);
         if (ret != VK_SUCCESS) {
             av_log(ctx, AV_LOG_ERROR, "Image creation failure: %s\n",
@@ -2066,7 +2119,7 @@ static int vulkan_map_from_drm_frame_desc(AVHWFramesContext *hwfc, AVVkFrame **f
          * offer us anything we could import and sync with, so instead
          * just signal the semaphore we created. */
 
-        f->layout[i] = image_create_info.initialLayout;
+        f->layout[i] = create_info.initialLayout;
         f->access[i] = 0x0;
     }
 
@@ -2194,9 +2247,7 @@ static int vulkan_map_from_drm(AVHWFramesContext *hwfc, AVFrame *dst,
     AVVkFrame *f;
     VulkanMapping *map = NULL;
 
-    err = vulkan_map_from_drm_frame_desc(hwfc, &f,
-                                         (AVDRMFrameDescriptor *)src->data[0]);
-    if (err)
+    if ((err = vulkan_map_from_drm_frame_desc(hwfc, &f, src)))
         return err;
 
     /* The unmapping function will free this */
@@ -2308,10 +2359,6 @@ static int vulkan_export_to_cuda(AVHWFramesContext *hwfc,
             CUDA_EXTERNAL_MEMORY_MIPMAPPED_ARRAY_DESC tex_desc = {
                 .offset = 0,
                 .arrayDesc = {
-                    .Width  = i > 0 ? AV_CEIL_RSHIFT(hwfc->width, desc->log2_chroma_w)
-                                    : hwfc->width,
-                    .Height = i > 0 ? AV_CEIL_RSHIFT(hwfc->height, desc->log2_chroma_h)
-                                    : hwfc->height,
                     .Depth = 0,
                     .Format = cufmt,
                     .NumChannels = 1 + ((planes == 2) && i),
@@ -2337,6 +2384,12 @@ static int vulkan_export_to_cuda(AVHWFramesContext *hwfc,
                 .type = CU_EXTERNAL_SEMAPHORE_HANDLE_TYPE_OPAQUE_FD,
             };
 
+            int p_w, p_h;
+            get_plane_wh(&p_w, &p_h, hwfc->sw_format, hwfc->width, hwfc->height, i);
+
+            tex_desc.arrayDesc.Width = p_w;
+            tex_desc.arrayDesc.Height = p_h;
+
             ret = pfn_vkGetMemoryFdKHR(hwctx->act_dev, &export_info,
                                        &ext_desc.handle.fd);
             if (ret != VK_SUCCESS) {
@@ -2439,12 +2492,14 @@ static int vulkan_transfer_data_from_cuda(AVHWFramesContext *hwfc,
 
             .dstMemoryType = CU_MEMORYTYPE_ARRAY,
             .dstArray      = dst_int->cu_array[i],
-            .WidthInBytes  = (i > 0 ? AV_CEIL_RSHIFT(hwfc->width, desc->log2_chroma_w)
-                                    : hwfc->width) * desc->comp[i].step,
-            .Height        = i > 0 ? AV_CEIL_RSHIFT(hwfc->height, desc->log2_chroma_h)
-                                   : hwfc->height,
         };
 
+        int p_w, p_h;
+        get_plane_wh(&p_w, &p_h, hwfc->sw_format, hwfc->width, hwfc->height, i);
+
+        cpy.WidthInBytes = p_w * desc->comp[i].step;
+        cpy.Height = p_h;
+
         ret = CHECK_CU(cu->cuMemcpy2DAsync(&cpy, cuda_dev->stream));
         if (ret < 0) {
             err = AVERROR_EXTERNAL;
@@ -2958,22 +3013,20 @@ static int transfer_image_buf(AVHWFramesContext *hwfc, const AVFrame *f,
     /* Schedule a copy for each plane */
     for (int i = 0; i < planes; i++) {
         ImageBuffer *vkbuf = (ImageBuffer *)bufs[i]->data;
-        const int p_w = i > 0 ? AV_CEIL_RSHIFT(w, desc->log2_chroma_w) : w;
-        const int p_h = i > 0 ? AV_CEIL_RSHIFT(h, desc->log2_chroma_h) : h;
         VkBufferImageCopy buf_reg = {
             .bufferOffset = buf_offsets[i],
-            /* Buffer stride isn't in bytes, it's in samples, the implementation
-             * uses the image's VkFormat to know how many bytes per sample
-             * the buffer has. So we have to convert by dividing. Stupid.
-             * Won't work with YUVA or other planar formats with alpha. */
             .bufferRowLength = buf_stride[i] / desc->comp[i].step,
-            .bufferImageHeight = p_h,
             .imageSubresource.layerCount = 1,
             .imageSubresource.aspectMask = VK_IMAGE_ASPECT_COLOR_BIT,
             .imageOffset = { 0, 0, 0, },
-            .imageExtent = { p_w, p_h, 1, },
         };
 
+        int p_w, p_h;
+        get_plane_wh(&p_w, &p_h, pix_fmt, w, h, i);
+
+        buf_reg.bufferImageHeight = p_h;
+        buf_reg.imageExtent = (VkExtent3D){ p_w, p_h, 1, };
+
         if (to_buf)
             vkCmdCopyImageToBuffer(cmd_buf, frame->img[i], frame->layout[i],
                                    vkbuf->buf, 1, &buf_reg);
@@ -3018,8 +3071,8 @@ static int vulkan_transfer_data(AVHWFramesContext *hwfc, const AVFrame *vkf,
     AVBufferRef *bufs[AV_NUM_DATA_POINTERS] = { 0 };
     size_t buf_offsets[AV_NUM_DATA_POINTERS] = { 0 };
 
+    int p_w, p_h;
     const int planes = av_pix_fmt_count_planes(swf->format);
-    int log2_chroma = av_pix_fmt_desc_get(swf->format)->log2_chroma_h;
 
     int host_mapped[AV_NUM_DATA_POINTERS] = { 0 };
     const int map_host = !!(p->extensions & EXT_EXTERNAL_HOST_MEMORY);
@@ -3054,8 +3107,6 @@ static int vulkan_transfer_data(AVHWFramesContext *hwfc, const AVFrame *vkf,
     /* Create buffers */
     for (int i = 0; i < planes; i++) {
         size_t req_size;
-        int h = swf->height;
-        int p_height = i > 0 ? AV_CEIL_RSHIFT(h, log2_chroma) : h;
 
         VkExternalMemoryBufferCreateInfo create_desc = {
             .sType = VK_STRUCTURE_TYPE_EXTERNAL_MEMORY_BUFFER_CREATE_INFO,
@@ -3071,6 +3122,8 @@ static int vulkan_transfer_data(AVHWFramesContext *hwfc, const AVFrame *vkf,
             .sType = VK_STRUCTURE_TYPE_MEMORY_HOST_POINTER_PROPERTIES_EXT,
         };
 
+        get_plane_wh(&p_w, &p_h, swf->format, swf->width, swf->height, i);
+
         tmp.linesize[i] = FFABS(swf->linesize[i]);
 
         /* Do not map images with a negative stride */
@@ -3081,7 +3134,7 @@ static int vulkan_transfer_data(AVHWFramesContext *hwfc, const AVFrame *vkf,
 
             /* We have to compensate for the few extra bytes of padding we
              * completely ignore at the start */
-            req_size = FFALIGN(offs + tmp.linesize[i] * p_height,
+            req_size = FFALIGN(offs + tmp.linesize[i] * p_h,
                                p->hprops.minImportedHostPointerAlignment);
 
             ret = pfn_vkGetMemoryHostPointerPropertiesEXT(hwctx->act_dev,
@@ -3096,7 +3149,7 @@ static int vulkan_transfer_data(AVHWFramesContext *hwfc, const AVFrame *vkf,
         }
 
         if (!host_mapped[i])
-            req_size = get_req_buffer_size(p, &tmp.linesize[i], p_height);
+            req_size = get_req_buffer_size(p, &tmp.linesize[i], p_h);
 
         err = create_buf(dev_ctx, &bufs[i],
                          from ? VK_BUFFER_USAGE_TRANSFER_DST_BIT :
@@ -3115,16 +3168,15 @@ static int vulkan_transfer_data(AVHWFramesContext *hwfc, const AVFrame *vkf,
             goto end;
 
         for (int i = 0; i < planes; i++) {
-            int h = swf->height;
-            int p_height = i > 0 ? AV_CEIL_RSHIFT(h, log2_chroma) : h;
-
             if (host_mapped[i])
                 continue;
 
+            get_plane_wh(&p_w, &p_h, swf->format, swf->width, swf->height, i);
+
             av_image_copy_plane(tmp.data[i], tmp.linesize[i],
                                 (const uint8_t *)swf->data[i], swf->linesize[i],
                                 FFMIN(tmp.linesize[i], FFABS(swf->linesize[i])),
-                                p_height);
+                                p_h);
         }
 
         if ((err = unmap_buffers(dev_ctx, bufs, planes, 1)))
@@ -3141,16 +3193,15 @@ static int vulkan_transfer_data(AVHWFramesContext *hwfc, const AVFrame *vkf,
             goto end;
 
         for (int i = 0; i < planes; i++) {
-            int h = swf->height;
-            int p_height = i > 0 ? AV_CEIL_RSHIFT(h, log2_chroma) : h;
-
             if (host_mapped[i])
                 continue;
 
+            get_plane_wh(&p_w, &p_h, swf->format, swf->width, swf->height, i);
+
             av_image_copy_plane(swf->data[i], swf->linesize[i],
                                 (const uint8_t *)tmp.data[i], tmp.linesize[i],
                                 FFMIN(tmp.linesize[i], FFABS(swf->linesize[i])),
-                                p_height);
+                                p_h);
         }
 
         if ((err = unmap_buffers(dev_ctx, bufs, planes, 1)))
@@ -3201,6 +3252,8 @@ static int vulkan_transfer_data_to_cuda(AVHWFramesContext *hwfc, AVFrame *dst,
     AVCUDADeviceContext *cuda_dev = cuda_cu->hwctx;
     AVCUDADeviceContextInternal *cu_internal = cuda_dev->internal;
     CudaFunctions *cu = cu_internal->cuda_dl;
+    CUDA_EXTERNAL_SEMAPHORE_WAIT_PARAMS s_w_par[AV_NUM_DATA_POINTERS] = { 0 };
+    CUDA_EXTERNAL_SEMAPHORE_SIGNAL_PARAMS s_s_par[AV_NUM_DATA_POINTERS] = { 0 };
 
     ret = CHECK_CU(cu->cuCtxPushCurrent(cuda_dev->cuda_ctx));
     if (ret < 0)
@@ -3216,6 +3269,13 @@ static int vulkan_transfer_data_to_cuda(AVHWFramesContext *hwfc, AVFrame *dst,
 
     dst_int = dst_f->internal;
 
+    ret = CHECK_CU(cu->cuWaitExternalSemaphoresAsync(dst_int->cu_sem, s_w_par,
+                                                     planes, cuda_dev->stream));
+    if (ret < 0) {
+        err = AVERROR_EXTERNAL;
+        goto fail;
+    }
+
     for (int i = 0; i < planes; i++) {
         CUDA_MEMCPY2D cpy = {
             .dstMemoryType = CU_MEMORYTYPE_DEVICE,
@@ -3225,12 +3285,14 @@ static int vulkan_transfer_data_to_cuda(AVHWFramesContext *hwfc, AVFrame *dst,
 
             .srcMemoryType = CU_MEMORYTYPE_ARRAY,
             .srcArray      = dst_int->cu_array[i],
-            .WidthInBytes  = (i > 0 ? AV_CEIL_RSHIFT(hwfc->width, desc->log2_chroma_w)
-                                    : hwfc->width) * desc->comp[i].step,
-            .Height        = i > 0 ? AV_CEIL_RSHIFT(hwfc->height, desc->log2_chroma_h)
-                                   : hwfc->height,
         };
 
+        int w, h;
+        get_plane_wh(&w, &h, hwfc->sw_format, hwfc->width, hwfc->height, i);
+
+        cpy.WidthInBytes = w * desc->comp[i].step;
+        cpy.Height = h;
+
         ret = CHECK_CU(cu->cuMemcpy2DAsync(&cpy, cuda_dev->stream));
         if (ret < 0) {
             err = AVERROR_EXTERNAL;
@@ -3238,6 +3300,13 @@ static int vulkan_transfer_data_to_cuda(AVHWFramesContext *hwfc, AVFrame *dst,
         }
     }
 
+    ret = CHECK_CU(cu->cuSignalExternalSemaphoresAsync(dst_int->cu_sem, s_s_par,
+                                                       planes, cuda_dev->stream));
+    if (ret < 0) {
+        err = AVERROR_EXTERNAL;
+        goto fail;
+    }
+
     CHECK_CU(cu->cuCtxPopCurrent(&dummy));
 
     av_log(hwfc, AV_LOG_VERBOSE, "Transfered Vulkan image to CUDA!\n");