X-Git-Url: https://git.sesse.net/?a=blobdiff_plain;f=libavutil%2Fhwcontext_vulkan.c;h=960fd0b6ceadd8aee3091ddcaa0ddaa3b8de832c;hb=4a6581e968d38f009140e055e29be9953279cc7b;hp=aca34a35a8b200d70e56394fed8cc8e54179222e;hpb=dccd07f66dfc236206bd37633a4666dbe666c0d5;p=ffmpeg diff --git a/libavutil/hwcontext_vulkan.c b/libavutil/hwcontext_vulkan.c index aca34a35a8b..960fd0b6cea 100644 --- a/libavutil/hwcontext_vulkan.c +++ b/libavutil/hwcontext_vulkan.c @@ -16,6 +16,8 @@ * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA */ +#define VK_NO_PROTOTYPES + #include "config.h" #include "pixdesc.h" #include "avstring.h" @@ -24,6 +26,9 @@ #include "hwcontext_internal.h" #include "hwcontext_vulkan.h" +VKAPI_ATTR PFN_vkVoidFunction VKAPI_CALL vkGetInstanceProcAddr(VkInstance instance, + const char *name); + #if CONFIG_LIBDRM #include #include @@ -41,26 +46,163 @@ #define CHECK_CU(x) FF_CUDA_CHECK_DL(cuda_cu, cu, x) #endif +enum VulkanExtensions { + EXT_EXTERNAL_DMABUF_MEMORY = 1 << 0, /* VK_EXT_external_memory_dma_buf */ + EXT_DRM_MODIFIER_FLAGS = 1 << 1, /* VK_EXT_image_drm_format_modifier */ + EXT_EXTERNAL_FD_MEMORY = 1 << 2, /* VK_KHR_external_memory_fd */ + EXT_EXTERNAL_FD_SEM = 1 << 3, /* VK_KHR_external_semaphore_fd */ + EXT_EXTERNAL_HOST_MEMORY = 1 << 4, /* VK_EXT_external_memory_host */ + EXT_PUSH_DESCRIPTORS = 1 << 5, /* VK_KHR_push_descriptor */ + EXT_DEBUG_UTILS = 1 << 6, /* VK_EXT_debug_utils */ + + EXT_NO_FLAG = 1 << 31, +}; + +#define FN_LIST(MACRO) \ + /* Instance */ \ + MACRO(0, 0, EXT_NO_FLAG, EnumerateInstanceExtensionProperties) \ + MACRO(0, 0, EXT_NO_FLAG, CreateInstance) \ + MACRO(1, 0, EXT_NO_FLAG, DestroyInstance) \ + \ + /* Debug */ \ + MACRO(1, 0, EXT_NO_FLAG, CreateDebugUtilsMessengerEXT) \ + MACRO(1, 0, EXT_NO_FLAG, DestroyDebugUtilsMessengerEXT) \ + \ + /* Device */ \ + MACRO(1, 0, EXT_NO_FLAG, GetDeviceProcAddr) \ + MACRO(1, 0, EXT_NO_FLAG, CreateDevice) \ + MACRO(1, 0, EXT_NO_FLAG, GetPhysicalDeviceFeatures) \ + MACRO(1, 0, EXT_NO_FLAG, DestroyDevice) \ + \ + MACRO(1, 0, EXT_NO_FLAG, EnumeratePhysicalDevices) \ + MACRO(1, 0, EXT_NO_FLAG, EnumerateDeviceExtensionProperties) \ + \ + MACRO(1, 0, EXT_NO_FLAG, GetPhysicalDeviceProperties2) \ + MACRO(1, 0, EXT_NO_FLAG, GetPhysicalDeviceMemoryProperties) \ + MACRO(1, 0, EXT_NO_FLAG, GetPhysicalDeviceFormatProperties2) \ + MACRO(1, 0, EXT_NO_FLAG, GetPhysicalDeviceImageFormatProperties2) \ + MACRO(1, 0, EXT_NO_FLAG, GetPhysicalDeviceQueueFamilyProperties) \ + \ + /* Command pool */ \ + MACRO(1, 1, EXT_NO_FLAG, CreateCommandPool) \ + MACRO(1, 1, EXT_NO_FLAG, DestroyCommandPool) \ + \ + /* Command buffer */ \ + MACRO(1, 1, EXT_NO_FLAG, AllocateCommandBuffers) \ + MACRO(1, 1, EXT_NO_FLAG, BeginCommandBuffer) \ + MACRO(1, 1, EXT_NO_FLAG, EndCommandBuffer) \ + MACRO(1, 1, EXT_NO_FLAG, FreeCommandBuffers) \ + \ + /* Queue */ \ + MACRO(1, 1, EXT_NO_FLAG, GetDeviceQueue) \ + MACRO(1, 1, EXT_NO_FLAG, QueueSubmit) \ + \ + /* Fences */ \ + MACRO(1, 1, EXT_NO_FLAG, CreateFence) \ + MACRO(1, 1, EXT_NO_FLAG, WaitForFences) \ + MACRO(1, 1, EXT_NO_FLAG, ResetFences) \ + MACRO(1, 1, EXT_NO_FLAG, DestroyFence) \ + \ + /* Semaphores */ \ + MACRO(1, 1, EXT_EXTERNAL_FD_SEM, GetSemaphoreFdKHR) \ + MACRO(1, 1, EXT_NO_FLAG, CreateSemaphore) \ + MACRO(1, 1, EXT_NO_FLAG, DestroySemaphore) \ + \ + /* Memory */ \ + MACRO(1, 1, EXT_EXTERNAL_FD_MEMORY, GetMemoryFdKHR) \ + MACRO(1, 1, EXT_NO_FLAG, GetMemoryFdPropertiesKHR) \ + MACRO(1, 1, EXT_EXTERNAL_HOST_MEMORY, GetMemoryHostPointerPropertiesEXT) \ + MACRO(1, 1, EXT_NO_FLAG, AllocateMemory) \ + MACRO(1, 1, EXT_NO_FLAG, MapMemory) \ + MACRO(1, 1, EXT_NO_FLAG, FlushMappedMemoryRanges) \ + MACRO(1, 1, EXT_NO_FLAG, InvalidateMappedMemoryRanges) \ + MACRO(1, 1, EXT_NO_FLAG, UnmapMemory) \ + MACRO(1, 1, EXT_NO_FLAG, FreeMemory) \ + \ + /* Commands */ \ + MACRO(1, 1, EXT_NO_FLAG, CmdPipelineBarrier) \ + MACRO(1, 1, EXT_NO_FLAG, CmdCopyBufferToImage) \ + MACRO(1, 1, EXT_NO_FLAG, CmdCopyImageToBuffer) \ + \ + /* Buffer */ \ + MACRO(1, 1, EXT_NO_FLAG, GetBufferMemoryRequirements2) \ + MACRO(1, 1, EXT_NO_FLAG, CreateBuffer) \ + MACRO(1, 1, EXT_NO_FLAG, BindBufferMemory) \ + MACRO(1, 1, EXT_NO_FLAG, DestroyBuffer) \ + \ + /* Image */ \ + MACRO(1, 1, EXT_DRM_MODIFIER_FLAGS, GetImageDrmFormatModifierPropertiesEXT) \ + MACRO(1, 1, EXT_NO_FLAG, GetImageMemoryRequirements2) \ + MACRO(1, 1, EXT_NO_FLAG, CreateImage) \ + MACRO(1, 1, EXT_NO_FLAG, BindImageMemory2) \ + MACRO(1, 1, EXT_NO_FLAG, GetImageSubresourceLayout) \ + MACRO(1, 1, EXT_NO_FLAG, DestroyImage) + +#define PFN_DEF(req_inst, req_dev, ext_flag, name) \ + PFN_vk##name name; + +typedef struct VulkanFunctions { + FN_LIST(PFN_DEF) +} VulkanFunctions; + +#define PFN_LOAD_INFO(req_inst, req_dev, ext_flag, name) \ + { \ + req_inst, \ + req_dev, \ + offsetof(VulkanFunctions, name), \ + ext_flag, \ + { "vk"#name, "vk"#name"EXT", "vk"#name"KHR" } \ + }, + +typedef struct VulkanFunctionsLoadInfo { + int req_inst; + int req_dev; + size_t struct_offset; + enum VulkanExtensions ext_flag; + const char *names[3]; +} VulkanFunctionsLoadInfo; + +static const VulkanFunctionsLoadInfo vk_load_info[] = { + FN_LIST(PFN_LOAD_INFO) +}; + +typedef struct VulkanQueueCtx { + VkFence fence; + VkQueue queue; + int was_synchronous; + + /* Buffer dependencies */ + AVBufferRef **buf_deps; + int nb_buf_deps; + int buf_deps_alloc_size; +} VulkanQueueCtx; + typedef struct VulkanExecCtx { VkCommandPool pool; - VkCommandBuffer buf; - VkQueue queue; - VkFence fence; + VkCommandBuffer *bufs; + VulkanQueueCtx *queues; + int nb_queues; + int cur_queue_idx; } VulkanExecCtx; typedef struct VulkanDevicePriv { + /* Vulkan loader functions */ + VulkanFunctions vkfn; + /* Properties */ - VkPhysicalDeviceProperties props; + VkPhysicalDeviceProperties2 props; VkPhysicalDeviceMemoryProperties mprops; + VkPhysicalDeviceExternalMemoryHostPropertiesEXT hprops; + + /* Queues */ + uint32_t qfs[3]; + int num_qfs; /* Debug callback */ VkDebugUtilsMessengerEXT debug_ctx; - /* Image uploading */ - VulkanExecCtx cmd; - /* Extensions */ - uint64_t extensions; + enum VulkanExtensions extensions; /* Settings */ int use_linear_images; @@ -70,7 +212,12 @@ typedef struct VulkanDevicePriv { } VulkanDevicePriv; typedef struct VulkanFramesPriv { - VulkanExecCtx cmd; + /* Image conversions */ + VulkanExecCtx conv_ctx; + + /* Image transfers */ + VulkanExecCtx upload_ctx; + VulkanExecCtx download_ctx; } VulkanFramesPriv; typedef struct AVVkFrameInternal { @@ -81,12 +228,19 @@ typedef struct AVVkFrameInternal { CUexternalMemory ext_mem[AV_NUM_DATA_POINTERS]; CUmipmappedArray cu_mma[AV_NUM_DATA_POINTERS]; CUarray cu_array[AV_NUM_DATA_POINTERS]; - CUexternalSemaphore cu_sem; + CUexternalSemaphore cu_sem[AV_NUM_DATA_POINTERS]; #endif } AVVkFrameInternal; -#define VK_LOAD_PFN(inst, name) PFN_##name pfn_##name = (PFN_##name) \ - vkGetInstanceProcAddr(inst, #name) +#define GET_QUEUE_COUNT(hwctx, graph, comp, tx) ( \ + graph ? hwctx->nb_graphics_queues : \ + comp ? (hwctx->nb_comp_queues ? \ + hwctx->nb_comp_queues : hwctx->nb_graphics_queues) : \ + tx ? (hwctx->nb_tx_queues ? hwctx->nb_tx_queues : \ + (hwctx->nb_comp_queues ? \ + hwctx->nb_comp_queues : hwctx->nb_graphics_queues)) : \ + 0 \ +) #define DEFAULT_USAGE_FLAGS (VK_IMAGE_USAGE_SAMPLED_BIT | \ VK_IMAGE_USAGE_STORAGE_BIT | \ @@ -109,38 +263,75 @@ typedef struct AVVkFrameInternal { static const struct { enum AVPixelFormat pixfmt; - const VkFormat vkfmts[3]; + const VkFormat vkfmts[4]; } vk_pixfmt_map[] = { { AV_PIX_FMT_GRAY8, { VK_FORMAT_R8_UNORM } }, { AV_PIX_FMT_GRAY16, { VK_FORMAT_R16_UNORM } }, { AV_PIX_FMT_GRAYF32, { VK_FORMAT_R32_SFLOAT } }, { AV_PIX_FMT_NV12, { VK_FORMAT_R8_UNORM, VK_FORMAT_R8G8_UNORM } }, + { AV_PIX_FMT_NV21, { VK_FORMAT_R8_UNORM, VK_FORMAT_R8G8_UNORM } }, { AV_PIX_FMT_P010, { VK_FORMAT_R16_UNORM, VK_FORMAT_R16G16_UNORM } }, { AV_PIX_FMT_P016, { VK_FORMAT_R16_UNORM, VK_FORMAT_R16G16_UNORM } }, - { AV_PIX_FMT_YUV420P, { VK_FORMAT_R8_UNORM, VK_FORMAT_R8_UNORM, VK_FORMAT_R8_UNORM } }, - { AV_PIX_FMT_YUV422P, { VK_FORMAT_R8_UNORM, VK_FORMAT_R8_UNORM, VK_FORMAT_R8_UNORM } }, - { AV_PIX_FMT_YUV444P, { VK_FORMAT_R8_UNORM, VK_FORMAT_R8_UNORM, VK_FORMAT_R8_UNORM } }, + { AV_PIX_FMT_NV16, { VK_FORMAT_R8_UNORM, VK_FORMAT_R8G8_UNORM } }, + { AV_PIX_FMT_NV24, { VK_FORMAT_R8_UNORM, VK_FORMAT_R8G8_UNORM } }, + { AV_PIX_FMT_NV42, { VK_FORMAT_R8_UNORM, VK_FORMAT_R8G8_UNORM } }, + + { AV_PIX_FMT_YUV420P, { VK_FORMAT_R8_UNORM, VK_FORMAT_R8_UNORM, VK_FORMAT_R8_UNORM } }, + { AV_PIX_FMT_YUV420P10, { VK_FORMAT_R16_UNORM, VK_FORMAT_R16_UNORM, VK_FORMAT_R16_UNORM } }, + { AV_PIX_FMT_YUV420P12, { VK_FORMAT_R16_UNORM, VK_FORMAT_R16_UNORM, VK_FORMAT_R16_UNORM } }, { AV_PIX_FMT_YUV420P16, { VK_FORMAT_R16_UNORM, VK_FORMAT_R16_UNORM, VK_FORMAT_R16_UNORM } }, + + { AV_PIX_FMT_YUV422P, { VK_FORMAT_R8_UNORM, VK_FORMAT_R8_UNORM, VK_FORMAT_R8_UNORM } }, + { AV_PIX_FMT_YUV422P10, { VK_FORMAT_R16_UNORM, VK_FORMAT_R16_UNORM, VK_FORMAT_R16_UNORM } }, + { AV_PIX_FMT_YUV422P12, { VK_FORMAT_R16_UNORM, VK_FORMAT_R16_UNORM, VK_FORMAT_R16_UNORM } }, { AV_PIX_FMT_YUV422P16, { VK_FORMAT_R16_UNORM, VK_FORMAT_R16_UNORM, VK_FORMAT_R16_UNORM } }, + + { AV_PIX_FMT_YUV444P, { VK_FORMAT_R8_UNORM, VK_FORMAT_R8_UNORM, VK_FORMAT_R8_UNORM } }, + { AV_PIX_FMT_YUV444P10, { VK_FORMAT_R16_UNORM, VK_FORMAT_R16_UNORM, VK_FORMAT_R16_UNORM } }, + { AV_PIX_FMT_YUV444P12, { VK_FORMAT_R16_UNORM, VK_FORMAT_R16_UNORM, VK_FORMAT_R16_UNORM } }, { AV_PIX_FMT_YUV444P16, { VK_FORMAT_R16_UNORM, VK_FORMAT_R16_UNORM, VK_FORMAT_R16_UNORM } }, - { AV_PIX_FMT_ABGR, { VK_FORMAT_A8B8G8R8_UNORM_PACK32 } }, + { AV_PIX_FMT_YUVA420P, { VK_FORMAT_R8_UNORM, VK_FORMAT_R8_UNORM, VK_FORMAT_R8_UNORM, VK_FORMAT_R8_UNORM } }, + { AV_PIX_FMT_YUVA420P10, { VK_FORMAT_R16_UNORM, VK_FORMAT_R16_UNORM, VK_FORMAT_R16_UNORM, VK_FORMAT_R16_UNORM } }, + /* There is no AV_PIX_FMT_YUVA420P12 */ + { AV_PIX_FMT_YUVA420P16, { VK_FORMAT_R16_UNORM, VK_FORMAT_R16_UNORM, VK_FORMAT_R16_UNORM, VK_FORMAT_R16_UNORM } }, + + { AV_PIX_FMT_YUVA422P, { VK_FORMAT_R8_UNORM, VK_FORMAT_R8_UNORM, VK_FORMAT_R8_UNORM, VK_FORMAT_R8_UNORM } }, + { AV_PIX_FMT_YUVA422P10, { VK_FORMAT_R16_UNORM, VK_FORMAT_R16_UNORM, VK_FORMAT_R16_UNORM, VK_FORMAT_R16_UNORM } }, + { AV_PIX_FMT_YUVA422P12, { VK_FORMAT_R16_UNORM, VK_FORMAT_R16_UNORM, VK_FORMAT_R16_UNORM, VK_FORMAT_R16_UNORM } }, + { AV_PIX_FMT_YUVA422P16, { VK_FORMAT_R16_UNORM, VK_FORMAT_R16_UNORM, VK_FORMAT_R16_UNORM, VK_FORMAT_R16_UNORM } }, + + { AV_PIX_FMT_YUVA444P, { VK_FORMAT_R8_UNORM, VK_FORMAT_R8_UNORM, VK_FORMAT_R8_UNORM, VK_FORMAT_R8_UNORM } }, + { AV_PIX_FMT_YUVA444P10, { VK_FORMAT_R16_UNORM, VK_FORMAT_R16_UNORM, VK_FORMAT_R16_UNORM, VK_FORMAT_R16_UNORM } }, + { AV_PIX_FMT_YUVA444P12, { VK_FORMAT_R16_UNORM, VK_FORMAT_R16_UNORM, VK_FORMAT_R16_UNORM, VK_FORMAT_R16_UNORM } }, + { AV_PIX_FMT_YUVA444P16, { VK_FORMAT_R16_UNORM, VK_FORMAT_R16_UNORM, VK_FORMAT_R16_UNORM, VK_FORMAT_R16_UNORM } }, + { AV_PIX_FMT_BGRA, { VK_FORMAT_B8G8R8A8_UNORM } }, { AV_PIX_FMT_RGBA, { VK_FORMAT_R8G8B8A8_UNORM } }, { AV_PIX_FMT_RGB24, { VK_FORMAT_R8G8B8_UNORM } }, { AV_PIX_FMT_BGR24, { VK_FORMAT_B8G8R8_UNORM } }, { AV_PIX_FMT_RGB48, { VK_FORMAT_R16G16B16_UNORM } }, { AV_PIX_FMT_RGBA64, { VK_FORMAT_R16G16B16A16_UNORM } }, + { AV_PIX_FMT_RGBA64, { VK_FORMAT_R16G16B16A16_UNORM } }, { AV_PIX_FMT_RGB565, { VK_FORMAT_R5G6B5_UNORM_PACK16 } }, { AV_PIX_FMT_BGR565, { VK_FORMAT_B5G6R5_UNORM_PACK16 } }, { AV_PIX_FMT_BGR0, { VK_FORMAT_B8G8R8A8_UNORM } }, - { AV_PIX_FMT_0BGR, { VK_FORMAT_A8B8G8R8_UNORM_PACK32 } }, { AV_PIX_FMT_RGB0, { VK_FORMAT_R8G8B8A8_UNORM } }, + /* Lower priority as there's an endianess-dependent overlap between these + * and rgba/bgr0, and PACK32 formats are more limited */ + { AV_PIX_FMT_BGR32, { VK_FORMAT_A8B8G8R8_UNORM_PACK32 } }, + { AV_PIX_FMT_0BGR32, { VK_FORMAT_A8B8G8R8_UNORM_PACK32 } }, + + { AV_PIX_FMT_X2RGB10, { VK_FORMAT_A2R10G10B10_UNORM_PACK32 } }, + + { AV_PIX_FMT_GBRAP, { VK_FORMAT_R8_UNORM, VK_FORMAT_R8_UNORM, VK_FORMAT_R8_UNORM, VK_FORMAT_R8_UNORM } }, + { AV_PIX_FMT_GBRAP16, { VK_FORMAT_R16_UNORM, VK_FORMAT_R16_UNORM, VK_FORMAT_R16_UNORM, VK_FORMAT_R16_UNORM } }, { AV_PIX_FMT_GBRPF32, { VK_FORMAT_R32_SFLOAT, VK_FORMAT_R32_SFLOAT, VK_FORMAT_R32_SFLOAT } }, + { AV_PIX_FMT_GBRAPF32, { VK_FORMAT_R32_SFLOAT, VK_FORMAT_R32_SFLOAT, VK_FORMAT_R32_SFLOAT, VK_FORMAT_R32_SFLOAT } }, }; const VkFormat *av_vkfmt_from_pixfmt(enum AVPixelFormat p) @@ -151,9 +342,12 @@ const VkFormat *av_vkfmt_from_pixfmt(enum AVPixelFormat p) return NULL; } -static int pixfmt_is_supported(AVVulkanDeviceContext *hwctx, enum AVPixelFormat p, +static int pixfmt_is_supported(AVHWDeviceContext *dev_ctx, enum AVPixelFormat p, int linear) { + AVVulkanDeviceContext *hwctx = dev_ctx->hwctx; + VulkanDevicePriv *priv = dev_ctx->internal->priv; + VulkanFunctions *vk = &priv->vkfn; const VkFormat *fmt = av_vkfmt_from_pixfmt(p); int planes = av_pix_fmt_count_planes(p); @@ -165,7 +359,7 @@ static int pixfmt_is_supported(AVVulkanDeviceContext *hwctx, enum AVPixelFormat VkFormatProperties2 prop = { .sType = VK_STRUCTURE_TYPE_FORMAT_PROPERTIES_2, }; - vkGetPhysicalDeviceFormatProperties2(hwctx->phys_dev, fmt[i], &prop); + vk->GetPhysicalDeviceFormatProperties2(hwctx->phys_dev, fmt[i], &prop); flags = linear ? prop.formatProperties.linearTilingFeatures : prop.formatProperties.optimalTilingFeatures; if (!(flags & DEFAULT_USAGE_FLAGS)) @@ -175,23 +369,54 @@ static int pixfmt_is_supported(AVVulkanDeviceContext *hwctx, enum AVPixelFormat return 1; } -enum VulkanExtensions { - EXT_EXTERNAL_DMABUF_MEMORY = 1ULL << 0, /* VK_EXT_external_memory_dma_buf */ - EXT_DRM_MODIFIER_FLAGS = 1ULL << 1, /* VK_EXT_image_drm_format_modifier */ - EXT_EXTERNAL_FD_MEMORY = 1ULL << 2, /* VK_KHR_external_memory_fd */ - EXT_EXTERNAL_FD_SEM = 1ULL << 3, /* VK_KHR_external_semaphore_fd */ +static int load_functions(AVHWDeviceContext *ctx, int has_inst, int has_dev) +{ + AVVulkanDeviceContext *hwctx = ctx->hwctx; + VulkanDevicePriv *p = ctx->internal->priv; + VulkanFunctions *vk = &p->vkfn; - EXT_OPTIONAL = 1ULL << 62, - EXT_REQUIRED = 1ULL << 63, -}; + for (int i = 0; i < FF_ARRAY_ELEMS(vk_load_info); i++) { + const VulkanFunctionsLoadInfo *load = &vk_load_info[i]; + PFN_vkVoidFunction fn; + + if (load->req_dev && !has_dev) + continue; + if (load->req_inst && !has_inst) + continue; + + for (int j = 0; j < FF_ARRAY_ELEMS(load->names); j++) { + const char *name = load->names[j]; + + if (load->req_dev) + fn = vk->GetDeviceProcAddr(hwctx->act_dev, name); + else if (load->req_inst) + fn = hwctx->get_proc_addr(hwctx->inst, name); + else + fn = hwctx->get_proc_addr(NULL, name); + + if (fn) + break; + } + + if (!fn && ((p->extensions &~ EXT_NO_FLAG) & load->ext_flag)) { + av_log(ctx, AV_LOG_ERROR, "Loader error, function \"%s\" indicated" + "as supported, but got NULL function pointer!\n", load->names[0]); + return AVERROR_EXTERNAL; + } + + *(PFN_vkVoidFunction *)((uint8_t *)vk + load->struct_offset) = fn; + } + + return 0; +} typedef struct VulkanOptExtension { const char *name; - uint64_t flag; + enum VulkanExtensions flag; } VulkanOptExtension; static const VulkanOptExtension optional_instance_exts[] = { - { VK_KHR_SURFACE_EXTENSION_NAME, EXT_OPTIONAL }, + /* For future use */ }; static const VulkanOptExtension optional_device_exts[] = { @@ -199,6 +424,9 @@ static const VulkanOptExtension optional_device_exts[] = { { VK_EXT_EXTERNAL_MEMORY_DMA_BUF_EXTENSION_NAME, EXT_EXTERNAL_DMABUF_MEMORY, }, { VK_EXT_IMAGE_DRM_FORMAT_MODIFIER_EXTENSION_NAME, EXT_DRM_MODIFIER_FLAGS, }, { VK_KHR_EXTERNAL_SEMAPHORE_FD_EXTENSION_NAME, EXT_EXTERNAL_FD_SEM, }, + { VK_EXT_EXTERNAL_MEMORY_HOST_EXTENSION_NAME, EXT_EXTERNAL_HOST_MEMORY, }, + { VK_KHR_PUSH_DESCRIPTOR_EXTENSION_NAME, EXT_PUSH_DESCRIPTORS, }, + { VK_EXT_HOST_QUERY_RESET_EXTENSION_NAME, EXT_NO_FLAG, }, }; /* Converts return values to strings */ @@ -271,6 +499,7 @@ static int check_extensions(AVHWDeviceContext *ctx, int dev, AVDictionary *opts, const char *tstr; const char **extension_names = NULL; VulkanDevicePriv *p = ctx->internal->priv; + VulkanFunctions *vk = &p->vkfn; AVVulkanDeviceContext *hwctx = ctx->hwctx; int err = 0, found, extensions_found = 0; @@ -294,11 +523,11 @@ static int check_extensions(AVHWDeviceContext *ctx, int dev, AVDictionary *opts, goto fail; } } - vkEnumerateInstanceExtensionProperties(NULL, &sup_ext_count, NULL); + vk->EnumerateInstanceExtensionProperties(NULL, &sup_ext_count, NULL); sup_ext = av_malloc_array(sup_ext_count, sizeof(VkExtensionProperties)); if (!sup_ext) return AVERROR(ENOMEM); - vkEnumerateInstanceExtensionProperties(NULL, &sup_ext_count, sup_ext); + vk->EnumerateInstanceExtensionProperties(NULL, &sup_ext_count, sup_ext); } else { mod = "device"; optional_exts = optional_device_exts; @@ -311,19 +540,17 @@ static int check_extensions(AVHWDeviceContext *ctx, int dev, AVDictionary *opts, goto fail; } } - vkEnumerateDeviceExtensionProperties(hwctx->phys_dev, NULL, - &sup_ext_count, NULL); + vk->EnumerateDeviceExtensionProperties(hwctx->phys_dev, NULL, + &sup_ext_count, NULL); sup_ext = av_malloc_array(sup_ext_count, sizeof(VkExtensionProperties)); if (!sup_ext) return AVERROR(ENOMEM); - vkEnumerateDeviceExtensionProperties(hwctx->phys_dev, NULL, - &sup_ext_count, sup_ext); + vk->EnumerateDeviceExtensionProperties(hwctx->phys_dev, NULL, + &sup_ext_count, sup_ext); } for (int i = 0; i < optional_exts_num; i++) { - int req = optional_exts[i].flag & EXT_REQUIRED; tstr = optional_exts[i].name; - found = 0; for (int j = 0; j < sup_ext_count; j++) { if (!strcmp(tstr, sup_ext[j].extensionName)) { @@ -331,20 +558,11 @@ static int check_extensions(AVHWDeviceContext *ctx, int dev, AVDictionary *opts, break; } } - if (!found) { - int lvl = req ? AV_LOG_ERROR : AV_LOG_VERBOSE; - av_log(ctx, lvl, "Extension \"%s\" not found!\n", tstr); - if (req) { - err = AVERROR(EINVAL); - goto end; - } + if (!found) continue; - } - if (!req) - p->extensions |= optional_exts[i].flag; av_log(ctx, AV_LOG_VERBOSE, "Using %s extension \"%s\"\n", mod, tstr); - + p->extensions |= optional_exts[i].flag; ADD_VAL_TO_LIST(extension_names, extensions_found, tstr); } @@ -360,6 +578,7 @@ static int check_extensions(AVHWDeviceContext *ctx, int dev, AVDictionary *opts, if (found) { av_log(ctx, AV_LOG_VERBOSE, "Using %s extension \"%s\"\n", mod, tstr); ADD_VAL_TO_LIST(extension_names, extensions_found, tstr); + p->extensions |= EXT_DEBUG_UTILS; } else { av_log(ctx, AV_LOG_ERROR, "Debug extension \"%s\" not found!\n", tstr); @@ -379,13 +598,11 @@ static int check_extensions(AVHWDeviceContext *ctx, int dev, AVDictionary *opts, } } if (found) { - av_log(ctx, AV_LOG_VERBOSE, "Using %s extension \"%s\"\n", mod, tstr); + av_log(ctx, AV_LOG_VERBOSE, "Using %s extension \"%s\"\n", mod, token); ADD_VAL_TO_LIST(extension_names, extensions_found, token); } else { - av_log(ctx, AV_LOG_ERROR, "%s extension \"%s\" not found!\n", + av_log(ctx, AV_LOG_WARNING, "%s extension \"%s\" not found, excluding.\n", mod, token); - err = AVERROR(EINVAL); - goto fail; } token = av_strtok(NULL, "+", &save); } @@ -414,6 +631,7 @@ static int create_instance(AVHWDeviceContext *ctx, AVDictionary *opts) int err = 0; VkResult ret; VulkanDevicePriv *p = ctx->internal->priv; + VulkanFunctions *vk = &p->vkfn; AVVulkanDeviceContext *hwctx = ctx->hwctx; AVDictionaryEntry *debug_opt = av_dict_get(opts, "debug", NULL, 0); const int debug_mode = debug_opt && strtol(debug_opt->value, NULL, 10); @@ -430,6 +648,16 @@ static int create_instance(AVHWDeviceContext *ctx, AVDictionary *opts) .pApplicationInfo = &application_info, }; + if (!hwctx->get_proc_addr) { + hwctx->get_proc_addr = vkGetInstanceProcAddr; + } + + err = load_functions(ctx, 0, 0); + if (err < 0) { + av_log(ctx, AV_LOG_ERROR, "Unable to load instance enumeration functions!\n"); + return err; + } + /* Check for present/missing extensions */ err = check_extensions(ctx, 0, opts, &inst_props.ppEnabledExtensionNames, &inst_props.enabledExtensionCount, debug_mode); @@ -437,13 +665,13 @@ static int create_instance(AVHWDeviceContext *ctx, AVDictionary *opts) return err; if (debug_mode) { - static const char *layers[] = { "VK_LAYER_LUNARG_standard_validation" }; + static const char *layers[] = { "VK_LAYER_KHRONOS_validation" }; inst_props.ppEnabledLayerNames = layers; inst_props.enabledLayerCount = FF_ARRAY_ELEMS(layers); } /* Try to create the instance */ - ret = vkCreateInstance(&inst_props, hwctx->alloc, &hwctx->inst); + ret = vk->CreateInstance(&inst_props, hwctx->alloc, &hwctx->inst); /* Check for errors */ if (ret != VK_SUCCESS) { @@ -455,6 +683,12 @@ static int create_instance(AVHWDeviceContext *ctx, AVDictionary *opts) return AVERROR_EXTERNAL; } + err = load_functions(ctx, 1, 0); + if (err < 0) { + av_log(ctx, AV_LOG_ERROR, "Unable to load instance functions!\n"); + return err; + } + if (debug_mode) { VkDebugUtilsMessengerCreateInfoEXT dbg = { .sType = VK_STRUCTURE_TYPE_DEBUG_UTILS_MESSENGER_CREATE_INFO_EXT, @@ -468,10 +702,9 @@ static int create_instance(AVHWDeviceContext *ctx, AVDictionary *opts) .pfnUserCallback = vk_dbg_callback, .pUserData = ctx, }; - VK_LOAD_PFN(hwctx->inst, vkCreateDebugUtilsMessengerEXT); - pfn_vkCreateDebugUtilsMessengerEXT(hwctx->inst, &dbg, - hwctx->alloc, &p->debug_ctx); + vk->CreateDebugUtilsMessengerEXT(hwctx->inst, &dbg, + hwctx->alloc, &p->debug_ctx); } hwctx->enabled_inst_extensions = inst_props.ppEnabledExtensionNames; @@ -506,13 +739,14 @@ static int find_device(AVHWDeviceContext *ctx, VulkanDeviceSelection *select) int err = 0, choice = -1; uint32_t num; VkResult ret; + VulkanDevicePriv *p = ctx->internal->priv; + VulkanFunctions *vk = &p->vkfn; VkPhysicalDevice *devices = NULL; VkPhysicalDeviceIDProperties *idp = NULL; VkPhysicalDeviceProperties2 *prop = NULL; - VulkanDevicePriv *p = ctx->internal->priv; AVVulkanDeviceContext *hwctx = ctx->hwctx; - ret = vkEnumeratePhysicalDevices(hwctx->inst, &num, NULL); + ret = vk->EnumeratePhysicalDevices(hwctx->inst, &num, NULL); if (ret != VK_SUCCESS || !num) { av_log(ctx, AV_LOG_ERROR, "No devices found: %s!\n", vk_ret2str(ret)); return AVERROR(ENODEV); @@ -522,7 +756,7 @@ static int find_device(AVHWDeviceContext *ctx, VulkanDeviceSelection *select) if (!devices) return AVERROR(ENOMEM); - ret = vkEnumeratePhysicalDevices(hwctx->inst, &num, devices); + ret = vk->EnumeratePhysicalDevices(hwctx->inst, &num, devices); if (ret != VK_SUCCESS) { av_log(ctx, AV_LOG_ERROR, "Failed enumerating devices: %s\n", vk_ret2str(ret)); @@ -548,7 +782,7 @@ static int find_device(AVHWDeviceContext *ctx, VulkanDeviceSelection *select) prop[i].sType = VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_PROPERTIES_2; prop[i].pNext = &idp[i]; - vkGetPhysicalDeviceProperties2(devices[i], &prop[i]); + vk->GetPhysicalDeviceProperties2(devices[i], &prop[i]); av_log(ctx, AV_LOG_VERBOSE, " %d: %s (%s) (0x%x)\n", i, prop[i].properties.deviceName, vk_dev_type(prop[i].properties.deviceType), @@ -613,10 +847,9 @@ static int find_device(AVHWDeviceContext *ctx, VulkanDeviceSelection *select) } end: - if (choice > -1) { - p->dev_is_nvidia = (prop[choice].properties.vendorID == 0x10de); + if (choice > -1) hwctx->phys_dev = devices[choice]; - } + av_free(devices); av_free(prop); av_free(idp); @@ -627,13 +860,16 @@ end: static int search_queue_families(AVHWDeviceContext *ctx, VkDeviceCreateInfo *cd) { uint32_t num; + float *weights; VkQueueFamilyProperties *qs = NULL; + VulkanDevicePriv *p = ctx->internal->priv; + VulkanFunctions *vk = &p->vkfn; AVVulkanDeviceContext *hwctx = ctx->hwctx; int graph_index = -1, comp_index = -1, tx_index = -1; VkDeviceQueueCreateInfo *pc = (VkDeviceQueueCreateInfo *)cd->pQueueCreateInfos; /* First get the number of queue families */ - vkGetPhysicalDeviceQueueFamilyProperties(hwctx->phys_dev, &num, NULL); + vk->GetPhysicalDeviceQueueFamilyProperties(hwctx->phys_dev, &num, NULL); if (!num) { av_log(ctx, AV_LOG_ERROR, "Failed to get queues!\n"); return AVERROR_EXTERNAL; @@ -645,7 +881,7 @@ static int search_queue_families(AVHWDeviceContext *ctx, VkDeviceCreateInfo *cd) return AVERROR(ENOMEM); /* Finally retrieve the queue families */ - vkGetPhysicalDeviceQueueFamilyProperties(hwctx->phys_dev, &num, qs); + vk->GetPhysicalDeviceQueueFamilyProperties(hwctx->phys_dev, &num, qs); #define SEARCH_FLAGS(expr, out) \ for (int i = 0; i < num; i++) { \ @@ -665,48 +901,65 @@ static int search_queue_families(AVHWDeviceContext *ctx, VkDeviceCreateInfo *cd) (i != comp_index), tx_index) #undef SEARCH_FLAGS -#define QF_FLAGS(flags) \ - ((flags) & VK_QUEUE_GRAPHICS_BIT ) ? "(graphics) " : "", \ - ((flags) & VK_QUEUE_COMPUTE_BIT ) ? "(compute) " : "", \ - ((flags) & VK_QUEUE_TRANSFER_BIT ) ? "(transfer) " : "", \ - ((flags) & VK_QUEUE_SPARSE_BINDING_BIT) ? "(sparse) " : "" - - av_log(ctx, AV_LOG_VERBOSE, "Using queue family %i for graphics, " - "flags: %s%s%s%s\n", graph_index, QF_FLAGS(qs[graph_index].queueFlags)); - +#define ADD_QUEUE(fidx, graph, comp, tx) \ + av_log(ctx, AV_LOG_VERBOSE, "Using queue family %i (total queues: %i) for %s%s%s\n", \ + fidx, qs[fidx].queueCount, graph ? "graphics " : "", \ + comp ? "compute " : "", tx ? "transfers " : ""); \ + av_log(ctx, AV_LOG_VERBOSE, " QF %i flags: %s%s%s%s\n", fidx, \ + ((qs[fidx].queueFlags) & VK_QUEUE_GRAPHICS_BIT) ? "(graphics) " : "", \ + ((qs[fidx].queueFlags) & VK_QUEUE_COMPUTE_BIT) ? "(compute) " : "", \ + ((qs[fidx].queueFlags) & VK_QUEUE_TRANSFER_BIT) ? "(transfers) " : "", \ + ((qs[fidx].queueFlags) & VK_QUEUE_SPARSE_BINDING_BIT) ? "(sparse) " : ""); \ + pc[cd->queueCreateInfoCount].queueFamilyIndex = fidx; \ + pc[cd->queueCreateInfoCount].queueCount = qs[fidx].queueCount; \ + weights = av_malloc(qs[fidx].queueCount * sizeof(float)); \ + pc[cd->queueCreateInfoCount].pQueuePriorities = weights; \ + if (!weights) \ + goto fail; \ + for (int i = 0; i < qs[fidx].queueCount; i++) \ + weights[i] = 1.0f; \ + cd->queueCreateInfoCount++; + + ADD_QUEUE(graph_index, 1, comp_index < 0, tx_index < 0 && comp_index < 0) hwctx->queue_family_index = graph_index; - hwctx->queue_family_tx_index = graph_index; hwctx->queue_family_comp_index = graph_index; - - pc[cd->queueCreateInfoCount++].queueFamilyIndex = graph_index; + hwctx->queue_family_tx_index = graph_index; + hwctx->nb_graphics_queues = qs[graph_index].queueCount; if (comp_index != -1) { - av_log(ctx, AV_LOG_VERBOSE, "Using queue family %i for compute, " - "flags: %s%s%s%s\n", comp_index, QF_FLAGS(qs[comp_index].queueFlags)); - hwctx->queue_family_tx_index = comp_index; - hwctx->queue_family_comp_index = comp_index; - pc[cd->queueCreateInfoCount++].queueFamilyIndex = comp_index; + ADD_QUEUE(comp_index, 0, 1, tx_index < 0) + hwctx->queue_family_tx_index = comp_index; + hwctx->queue_family_comp_index = comp_index; + hwctx->nb_comp_queues = qs[comp_index].queueCount; } if (tx_index != -1) { - av_log(ctx, AV_LOG_VERBOSE, "Using queue family %i for transfers, " - "flags: %s%s%s%s\n", tx_index, QF_FLAGS(qs[tx_index].queueFlags)); - hwctx->queue_family_tx_index = tx_index; - pc[cd->queueCreateInfoCount++].queueFamilyIndex = tx_index; + ADD_QUEUE(tx_index, 0, 0, 1) + hwctx->queue_family_tx_index = tx_index; + hwctx->nb_tx_queues = qs[tx_index].queueCount; } -#undef QF_FLAGS - +#undef ADD_QUEUE av_free(qs); return 0; + +fail: + av_freep(&pc[0].pQueuePriorities); + av_freep(&pc[1].pQueuePriorities); + av_freep(&pc[2].pQueuePriorities); + av_free(qs); + + return AVERROR(ENOMEM); } -static int create_exec_ctx(AVHWDeviceContext *ctx, VulkanExecCtx *cmd, - int queue_family_index) +static int create_exec_ctx(AVHWFramesContext *hwfc, VulkanExecCtx *cmd, + int queue_family_index, int num_queues) { VkResult ret; - AVVulkanDeviceContext *hwctx = ctx->hwctx; + AVVulkanDeviceContext *hwctx = hwfc->device_ctx->hwctx; + VulkanDevicePriv *p = hwfc->device_ctx->internal->priv; + VulkanFunctions *vk = &p->vkfn; VkCommandPoolCreateInfo cqueue_create = { .sType = VK_STRUCTURE_TYPE_COMMAND_POOL_CREATE_INFO, @@ -716,72 +969,224 @@ static int create_exec_ctx(AVHWDeviceContext *ctx, VulkanExecCtx *cmd, VkCommandBufferAllocateInfo cbuf_create = { .sType = VK_STRUCTURE_TYPE_COMMAND_BUFFER_ALLOCATE_INFO, .level = VK_COMMAND_BUFFER_LEVEL_PRIMARY, - .commandBufferCount = 1, + .commandBufferCount = num_queues, }; - VkFenceCreateInfo fence_spawn = { - .sType = VK_STRUCTURE_TYPE_FENCE_CREATE_INFO, - }; + cmd->nb_queues = num_queues; - ret = vkCreateFence(hwctx->act_dev, &fence_spawn, - hwctx->alloc, &cmd->fence); + /* Create command pool */ + ret = vk->CreateCommandPool(hwctx->act_dev, &cqueue_create, + hwctx->alloc, &cmd->pool); if (ret != VK_SUCCESS) { - av_log(ctx, AV_LOG_ERROR, "Failed to create frame fence: %s\n", + av_log(hwfc, AV_LOG_ERROR, "Command pool creation failure: %s\n", vk_ret2str(ret)); return AVERROR_EXTERNAL; } - ret = vkCreateCommandPool(hwctx->act_dev, &cqueue_create, - hwctx->alloc, &cmd->pool); + cmd->bufs = av_mallocz(num_queues * sizeof(*cmd->bufs)); + if (!cmd->bufs) + return AVERROR(ENOMEM); + + cbuf_create.commandPool = cmd->pool; + + /* Allocate command buffer */ + ret = vk->AllocateCommandBuffers(hwctx->act_dev, &cbuf_create, cmd->bufs); if (ret != VK_SUCCESS) { - av_log(ctx, AV_LOG_ERROR, "Command pool creation failure: %s\n", + av_log(hwfc, AV_LOG_ERROR, "Command buffer alloc failure: %s\n", vk_ret2str(ret)); + av_freep(&cmd->bufs); return AVERROR_EXTERNAL; } - cbuf_create.commandPool = cmd->pool; + cmd->queues = av_mallocz(num_queues * sizeof(*cmd->queues)); + if (!cmd->queues) + return AVERROR(ENOMEM); + + for (int i = 0; i < num_queues; i++) { + VulkanQueueCtx *q = &cmd->queues[i]; + vk->GetDeviceQueue(hwctx->act_dev, queue_family_index, i, &q->queue); + q->was_synchronous = 1; + } + + return 0; +} + +static void free_exec_ctx(AVHWFramesContext *hwfc, VulkanExecCtx *cmd) +{ + AVVulkanDeviceContext *hwctx = hwfc->device_ctx->hwctx; + VulkanDevicePriv *p = hwfc->device_ctx->internal->priv; + VulkanFunctions *vk = &p->vkfn; + + if (cmd->queues) { + for (int i = 0; i < cmd->nb_queues; i++) { + VulkanQueueCtx *q = &cmd->queues[i]; + + /* Make sure all queues have finished executing */ + if (q->fence && !q->was_synchronous) { + vk->WaitForFences(hwctx->act_dev, 1, &q->fence, VK_TRUE, UINT64_MAX); + vk->ResetFences(hwctx->act_dev, 1, &q->fence); + } + + /* Free the fence */ + if (q->fence) + vk->DestroyFence(hwctx->act_dev, q->fence, hwctx->alloc); + + /* Free buffer dependencies */ + for (int j = 0; j < q->nb_buf_deps; j++) + av_buffer_unref(&q->buf_deps[j]); + av_free(q->buf_deps); + } + } + + if (cmd->bufs) + vk->FreeCommandBuffers(hwctx->act_dev, cmd->pool, cmd->nb_queues, cmd->bufs); + if (cmd->pool) + vk->DestroyCommandPool(hwctx->act_dev, cmd->pool, hwctx->alloc); + + av_freep(&cmd->queues); + av_freep(&cmd->bufs); + cmd->pool = NULL; +} + +static VkCommandBuffer get_buf_exec_ctx(AVHWFramesContext *hwfc, VulkanExecCtx *cmd) +{ + return cmd->bufs[cmd->cur_queue_idx]; +} + +static void unref_exec_ctx_deps(AVHWFramesContext *hwfc, VulkanExecCtx *cmd) +{ + VulkanQueueCtx *q = &cmd->queues[cmd->cur_queue_idx]; + + for (int j = 0; j < q->nb_buf_deps; j++) + av_buffer_unref(&q->buf_deps[j]); + q->nb_buf_deps = 0; +} + +static int wait_start_exec_ctx(AVHWFramesContext *hwfc, VulkanExecCtx *cmd) +{ + VkResult ret; + AVVulkanDeviceContext *hwctx = hwfc->device_ctx->hwctx; + VulkanQueueCtx *q = &cmd->queues[cmd->cur_queue_idx]; + VulkanDevicePriv *p = hwfc->device_ctx->internal->priv; + VulkanFunctions *vk = &p->vkfn; + + VkCommandBufferBeginInfo cmd_start = { + .sType = VK_STRUCTURE_TYPE_COMMAND_BUFFER_BEGIN_INFO, + .flags = VK_COMMAND_BUFFER_USAGE_ONE_TIME_SUBMIT_BIT, + }; - ret = vkAllocateCommandBuffers(hwctx->act_dev, &cbuf_create, &cmd->buf); + /* Create the fence and don't wait for it initially */ + if (!q->fence) { + VkFenceCreateInfo fence_spawn = { + .sType = VK_STRUCTURE_TYPE_FENCE_CREATE_INFO, + }; + ret = vk->CreateFence(hwctx->act_dev, &fence_spawn, hwctx->alloc, + &q->fence); + if (ret != VK_SUCCESS) { + av_log(hwfc, AV_LOG_ERROR, "Failed to queue frame fence: %s\n", + vk_ret2str(ret)); + return AVERROR_EXTERNAL; + } + } else if (!q->was_synchronous) { + vk->WaitForFences(hwctx->act_dev, 1, &q->fence, VK_TRUE, UINT64_MAX); + vk->ResetFences(hwctx->act_dev, 1, &q->fence); + } + + /* Discard queue dependencies */ + unref_exec_ctx_deps(hwfc, cmd); + + ret = vk->BeginCommandBuffer(cmd->bufs[cmd->cur_queue_idx], &cmd_start); if (ret != VK_SUCCESS) { - av_log(ctx, AV_LOG_ERROR, "Command buffer alloc failure: %s\n", + av_log(hwfc, AV_LOG_ERROR, "Unable to init command buffer: %s\n", vk_ret2str(ret)); return AVERROR_EXTERNAL; } - vkGetDeviceQueue(hwctx->act_dev, cqueue_create.queueFamilyIndex, 0, - &cmd->queue); + return 0; +} + +static int add_buf_dep_exec_ctx(AVHWFramesContext *hwfc, VulkanExecCtx *cmd, + AVBufferRef * const *deps, int nb_deps) +{ + AVBufferRef **dst; + VulkanQueueCtx *q = &cmd->queues[cmd->cur_queue_idx]; + + if (!deps || !nb_deps) + return 0; + + dst = av_fast_realloc(q->buf_deps, &q->buf_deps_alloc_size, + (q->nb_buf_deps + nb_deps) * sizeof(*dst)); + if (!dst) + goto err; + + q->buf_deps = dst; + + for (int i = 0; i < nb_deps; i++) { + q->buf_deps[q->nb_buf_deps] = av_buffer_ref(deps[i]); + if (!q->buf_deps[q->nb_buf_deps]) + goto err; + q->nb_buf_deps++; + } return 0; + +err: + unref_exec_ctx_deps(hwfc, cmd); + return AVERROR(ENOMEM); } -static void free_exec_ctx(AVHWDeviceContext *ctx, VulkanExecCtx *cmd) +static int submit_exec_ctx(AVHWFramesContext *hwfc, VulkanExecCtx *cmd, + VkSubmitInfo *s_info, int synchronous) { - AVVulkanDeviceContext *hwctx = ctx->hwctx; + VkResult ret; + VulkanQueueCtx *q = &cmd->queues[cmd->cur_queue_idx]; + VulkanDevicePriv *p = hwfc->device_ctx->internal->priv; + VulkanFunctions *vk = &p->vkfn; - if (cmd->fence) - vkDestroyFence(hwctx->act_dev, cmd->fence, hwctx->alloc); - if (cmd->buf) - vkFreeCommandBuffers(hwctx->act_dev, cmd->pool, 1, &cmd->buf); - if (cmd->pool) - vkDestroyCommandPool(hwctx->act_dev, cmd->pool, hwctx->alloc); + ret = vk->EndCommandBuffer(cmd->bufs[cmd->cur_queue_idx]); + if (ret != VK_SUCCESS) { + av_log(hwfc, AV_LOG_ERROR, "Unable to finish command buffer: %s\n", + vk_ret2str(ret)); + unref_exec_ctx_deps(hwfc, cmd); + return AVERROR_EXTERNAL; + } + + s_info->pCommandBuffers = &cmd->bufs[cmd->cur_queue_idx]; + s_info->commandBufferCount = 1; + + ret = vk->QueueSubmit(q->queue, 1, s_info, q->fence); + if (ret != VK_SUCCESS) { + unref_exec_ctx_deps(hwfc, cmd); + return AVERROR_EXTERNAL; + } + + q->was_synchronous = synchronous; + + if (synchronous) { + AVVulkanDeviceContext *hwctx = hwfc->device_ctx->hwctx; + vk->WaitForFences(hwctx->act_dev, 1, &q->fence, VK_TRUE, UINT64_MAX); + vk->ResetFences(hwctx->act_dev, 1, &q->fence); + unref_exec_ctx_deps(hwfc, cmd); + } else { /* Rotate queues */ + cmd->cur_queue_idx = (cmd->cur_queue_idx + 1) % cmd->nb_queues; + } + + return 0; } static void vulkan_device_free(AVHWDeviceContext *ctx) { VulkanDevicePriv *p = ctx->internal->priv; + VulkanFunctions *vk = &p->vkfn; AVVulkanDeviceContext *hwctx = ctx->hwctx; - free_exec_ctx(ctx, &p->cmd); - - vkDestroyDevice(hwctx->act_dev, hwctx->alloc); + vk->DestroyDevice(hwctx->act_dev, hwctx->alloc); - if (p->debug_ctx) { - VK_LOAD_PFN(hwctx->inst, vkDestroyDebugUtilsMessengerEXT); - pfn_vkDestroyDebugUtilsMessengerEXT(hwctx->inst, p->debug_ctx, - hwctx->alloc); - } + if (p->debug_ctx) + vk->DestroyDebugUtilsMessengerEXT(hwctx->inst, p->debug_ctx, + hwctx->alloc); - vkDestroyInstance(hwctx->inst, hwctx->alloc); + vk->DestroyInstance(hwctx->inst, hwctx->alloc); for (int i = 0; i < hwctx->nb_enabled_inst_extensions; i++) av_free((void *)hwctx->enabled_inst_extensions[i]); @@ -800,25 +1205,23 @@ static int vulkan_device_create_internal(AVHWDeviceContext *ctx, VkResult ret; AVDictionaryEntry *opt_d; VulkanDevicePriv *p = ctx->internal->priv; + VulkanFunctions *vk = &p->vkfn; AVVulkanDeviceContext *hwctx = ctx->hwctx; + VkPhysicalDeviceFeatures dev_features = { 0 }; VkDeviceQueueCreateInfo queue_create_info[3] = { - { .sType = VK_STRUCTURE_TYPE_DEVICE_QUEUE_CREATE_INFO, - .pQueuePriorities = (float []){ 1.0f }, - .queueCount = 1, }, - { .sType = VK_STRUCTURE_TYPE_DEVICE_QUEUE_CREATE_INFO, - .pQueuePriorities = (float []){ 1.0f }, - .queueCount = 1, }, - { .sType = VK_STRUCTURE_TYPE_DEVICE_QUEUE_CREATE_INFO, - .pQueuePriorities = (float []){ 1.0f }, - .queueCount = 1, }, + { .sType = VK_STRUCTURE_TYPE_DEVICE_QUEUE_CREATE_INFO, }, + { .sType = VK_STRUCTURE_TYPE_DEVICE_QUEUE_CREATE_INFO, }, + { .sType = VK_STRUCTURE_TYPE_DEVICE_QUEUE_CREATE_INFO, }, }; VkDeviceCreateInfo dev_info = { .sType = VK_STRUCTURE_TYPE_DEVICE_CREATE_INFO, + .pNext = &hwctx->device_features, .pQueueCreateInfos = queue_create_info, .queueCreateInfoCount = 0, }; + hwctx->device_features.sType = VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_FEATURES_2; ctx->free = vulkan_device_free; /* Create an instance if not given one */ @@ -829,26 +1232,36 @@ static int vulkan_device_create_internal(AVHWDeviceContext *ctx, if ((err = find_device(ctx, dev_select))) goto end; - vkGetPhysicalDeviceProperties(hwctx->phys_dev, &p->props); - av_log(ctx, AV_LOG_VERBOSE, "Using device: %s\n", p->props.deviceName); - av_log(ctx, AV_LOG_VERBOSE, "Alignments:\n"); - av_log(ctx, AV_LOG_VERBOSE, " optimalBufferCopyOffsetAlignment: %li\n", - p->props.limits.optimalBufferCopyOffsetAlignment); - av_log(ctx, AV_LOG_VERBOSE, " optimalBufferCopyRowPitchAlignment: %li\n", - p->props.limits.optimalBufferCopyRowPitchAlignment); - av_log(ctx, AV_LOG_VERBOSE, " minMemoryMapAlignment: %li\n", - p->props.limits.minMemoryMapAlignment); + vk->GetPhysicalDeviceFeatures(hwctx->phys_dev, &dev_features); + + /* Try to keep in sync with libplacebo */ +#define COPY_FEATURE(DST, NAME) (DST).features.NAME = dev_features.NAME; + COPY_FEATURE(hwctx->device_features, shaderImageGatherExtended) + COPY_FEATURE(hwctx->device_features, shaderStorageImageReadWithoutFormat) + COPY_FEATURE(hwctx->device_features, shaderStorageImageWriteWithoutFormat) + COPY_FEATURE(hwctx->device_features, fragmentStoresAndAtomics) + COPY_FEATURE(hwctx->device_features, vertexPipelineStoresAndAtomics) + COPY_FEATURE(hwctx->device_features, shaderInt64) +#undef COPY_FEATURE /* Search queue family */ if ((err = search_queue_families(ctx, &dev_info))) goto end; if ((err = check_extensions(ctx, 1, opts, &dev_info.ppEnabledExtensionNames, - &dev_info.enabledExtensionCount, 0))) + &dev_info.enabledExtensionCount, 0))) { + av_free((void *)queue_create_info[0].pQueuePriorities); + av_free((void *)queue_create_info[1].pQueuePriorities); + av_free((void *)queue_create_info[2].pQueuePriorities); goto end; + } - ret = vkCreateDevice(hwctx->phys_dev, &dev_info, hwctx->alloc, - &hwctx->act_dev); + ret = vk->CreateDevice(hwctx->phys_dev, &dev_info, hwctx->alloc, + &hwctx->act_dev); + + av_free((void *)queue_create_info[0].pQueuePriorities); + av_free((void *)queue_create_info[1].pQueuePriorities); + av_free((void *)queue_create_info[2].pQueuePriorities); if (ret != VK_SUCCESS) { av_log(ctx, AV_LOG_ERROR, "Device creation failure: %s\n", @@ -878,19 +1291,46 @@ static int vulkan_device_init(AVHWDeviceContext *ctx) uint32_t queue_num; AVVulkanDeviceContext *hwctx = ctx->hwctx; VulkanDevicePriv *p = ctx->internal->priv; + VulkanFunctions *vk = &p->vkfn; /* Set device extension flags */ for (int i = 0; i < hwctx->nb_enabled_dev_extensions; i++) { for (int j = 0; j < FF_ARRAY_ELEMS(optional_device_exts); j++) { if (!strcmp(hwctx->enabled_dev_extensions[i], optional_device_exts[j].name)) { + av_log(ctx, AV_LOG_VERBOSE, "Using device extension %s\n", + hwctx->enabled_dev_extensions[i]); p->extensions |= optional_device_exts[j].flag; break; } } } - vkGetPhysicalDeviceQueueFamilyProperties(hwctx->phys_dev, &queue_num, NULL); + err = load_functions(ctx, 1, 0); + if (err < 0) { + av_log(ctx, AV_LOG_ERROR, "Unable to load functions!\n"); + return err; + } + + p->props.sType = VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_PROPERTIES_2; + p->props.pNext = &p->hprops; + p->hprops.sType = VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_EXTERNAL_MEMORY_HOST_PROPERTIES_EXT; + + vk->GetPhysicalDeviceProperties2(hwctx->phys_dev, &p->props); + av_log(ctx, AV_LOG_VERBOSE, "Using device: %s\n", + p->props.properties.deviceName); + av_log(ctx, AV_LOG_VERBOSE, "Alignments:\n"); + av_log(ctx, AV_LOG_VERBOSE, " optimalBufferCopyRowPitchAlignment: %"PRIu64"\n", + p->props.properties.limits.optimalBufferCopyRowPitchAlignment); + av_log(ctx, AV_LOG_VERBOSE, " minMemoryMapAlignment: %"SIZE_SPECIFIER"\n", + p->props.properties.limits.minMemoryMapAlignment); + if (p->extensions & EXT_EXTERNAL_HOST_MEMORY) + av_log(ctx, AV_LOG_VERBOSE, " minImportedHostPointerAlignment: %"PRIu64"\n", + p->hprops.minImportedHostPointerAlignment); + + p->dev_is_nvidia = (p->props.properties.vendorID == 0x10de); + + vk->GetPhysicalDeviceQueueFamilyProperties(hwctx->phys_dev, &queue_num, NULL); if (!queue_num) { av_log(ctx, AV_LOG_ERROR, "Failed to get queues!\n"); return AVERROR_EXTERNAL; @@ -909,13 +1349,16 @@ if (n >= queue_num) { #undef CHECK_QUEUE - /* Create exec context - if there's something invalid this will error out */ - err = create_exec_ctx(ctx, &p->cmd, hwctx->queue_family_tx_index); - if (err) - return err; + p->qfs[p->num_qfs++] = hwctx->queue_family_index; + if ((hwctx->queue_family_tx_index != hwctx->queue_family_index) && + (hwctx->queue_family_tx_index != hwctx->queue_family_comp_index)) + p->qfs[p->num_qfs++] = hwctx->queue_family_tx_index; + if ((hwctx->queue_family_comp_index != hwctx->queue_family_index) && + (hwctx->queue_family_comp_index != hwctx->queue_family_tx_index)) + p->qfs[p->num_qfs++] = hwctx->queue_family_comp_index; /* Get device capabilities */ - vkGetPhysicalDeviceMemoryProperties(hwctx->phys_dev, &p->mprops); + vk->GetPhysicalDeviceMemoryProperties(hwctx->phys_dev, &p->mprops); return 0; } @@ -937,7 +1380,8 @@ static int vulkan_device_create(AVHWDeviceContext *ctx, const char *device, } static int vulkan_device_derive(AVHWDeviceContext *ctx, - AVHWDeviceContext *src_ctx, int flags) + AVHWDeviceContext *src_ctx, + AVDictionary *opts, int flags) { av_unused VulkanDeviceSelection dev_select = { 0 }; @@ -961,7 +1405,7 @@ static int vulkan_device_derive(AVHWDeviceContext *ctx, if (strstr(vendor, "AMD")) dev_select.vendor_id = 0x1002; - return vulkan_device_create_internal(ctx, &dev_select, NULL, flags); + return vulkan_device_create_internal(ctx, &dev_select, opts, flags); } #endif case AV_HWDEVICE_TYPE_DRM: { @@ -979,7 +1423,7 @@ static int vulkan_device_derive(AVHWDeviceContext *ctx, drmFreeDevice(&drm_dev_info); - return vulkan_device_create_internal(ctx, &dev_select, NULL, flags); + return vulkan_device_create_internal(ctx, &dev_select, opts, flags); } #endif #if CONFIG_CUDA @@ -998,7 +1442,7 @@ static int vulkan_device_derive(AVHWDeviceContext *ctx, dev_select.has_uuid = 1; - return vulkan_device_create_internal(ctx, &dev_select, NULL, flags); + return vulkan_device_create_internal(ctx, &dev_select, opts, flags); } #endif default: @@ -1011,11 +1455,10 @@ static int vulkan_frames_get_constraints(AVHWDeviceContext *ctx, AVHWFramesConstraints *constraints) { int count = 0; - AVVulkanDeviceContext *hwctx = ctx->hwctx; VulkanDevicePriv *p = ctx->internal->priv; for (enum AVPixelFormat i = 0; i < AV_PIX_FMT_NB; i++) - count += pixfmt_is_supported(hwctx, i, p->use_linear_images); + count += pixfmt_is_supported(ctx, i, p->use_linear_images); #if CONFIG_CUDA if (p->dev_is_nvidia) @@ -1029,7 +1472,7 @@ static int vulkan_frames_get_constraints(AVHWDeviceContext *ctx, count = 0; for (enum AVPixelFormat i = 0; i < AV_PIX_FMT_NB; i++) - if (pixfmt_is_supported(hwctx, i, p->use_linear_images)) + if (pixfmt_is_supported(ctx, i, p->use_linear_images)) constraints->valid_sw_formats[count++] = i; #if CONFIG_CUDA @@ -1040,8 +1483,8 @@ static int vulkan_frames_get_constraints(AVHWDeviceContext *ctx, constraints->min_width = 0; constraints->min_height = 0; - constraints->max_width = p->props.limits.maxImageDimension2D; - constraints->max_height = p->props.limits.maxImageDimension2D; + constraints->max_width = p->props.properties.limits.maxImageDimension2D; + constraints->max_height = p->props.properties.limits.maxImageDimension2D; constraints->valid_hw_formats = av_malloc_array(2, sizeof(enum AVPixelFormat)); if (!constraints->valid_hw_formats) @@ -1054,33 +1497,35 @@ static int vulkan_frames_get_constraints(AVHWDeviceContext *ctx, } static int alloc_mem(AVHWDeviceContext *ctx, VkMemoryRequirements *req, - VkMemoryPropertyFlagBits req_flags, void *alloc_extension, + VkMemoryPropertyFlagBits req_flags, const void *alloc_extension, VkMemoryPropertyFlagBits *mem_flags, VkDeviceMemory *mem) { VkResult ret; int index = -1; VulkanDevicePriv *p = ctx->internal->priv; + VulkanFunctions *vk = &p->vkfn; AVVulkanDeviceContext *dev_hwctx = ctx->hwctx; VkMemoryAllocateInfo alloc_info = { - .sType = VK_STRUCTURE_TYPE_MEMORY_ALLOCATE_INFO, - .pNext = alloc_extension, + .sType = VK_STRUCTURE_TYPE_MEMORY_ALLOCATE_INFO, + .pNext = alloc_extension, + .allocationSize = req->size, }; - /* Align if we need to */ - if (req_flags & VK_MEMORY_PROPERTY_HOST_VISIBLE_BIT) - req->size = FFALIGN(req->size, p->props.limits.minMemoryMapAlignment); - - alloc_info.allocationSize = req->size; - /* The vulkan spec requires memory types to be sorted in the "optimal" * order, so the first matching type we find will be the best/fastest one */ for (int i = 0; i < p->mprops.memoryTypeCount; i++) { + const VkMemoryType *type = &p->mprops.memoryTypes[i]; + /* The memory type must be supported by the requirements (bitfield) */ if (!(req->memoryTypeBits & (1 << i))) continue; /* The memory type flags must include our properties */ - if ((p->mprops.memoryTypes[i].propertyFlags & req_flags) != req_flags) + if ((type->propertyFlags & req_flags) != req_flags) + continue; + + /* The memory type must be large enough */ + if (req->size > p->mprops.memoryHeaps[type->heapIndex].size) continue; /* Found a suitable memory type */ @@ -1096,8 +1541,8 @@ static int alloc_mem(AVHWDeviceContext *ctx, VkMemoryRequirements *req, alloc_info.memoryTypeIndex = index; - ret = vkAllocateMemory(dev_hwctx->act_dev, &alloc_info, - dev_hwctx->alloc, mem); + ret = vk->AllocateMemory(dev_hwctx->act_dev, &alloc_info, + dev_hwctx->alloc, mem); if (ret != VK_SUCCESS) { av_log(ctx, AV_LOG_ERROR, "Failed to allocate memory: %s\n", vk_ret2str(ret)); @@ -1123,10 +1568,9 @@ static void vulkan_free_internal(AVVkFrameInternal *internal) AVCUDADeviceContextInternal *cu_internal = cuda_dev->internal; CudaFunctions *cu = cu_internal->cuda_dl; - if (internal->cu_sem) - CHECK_CU(cu->cuDestroyExternalSemaphore(internal->cu_sem)); - for (int i = 0; i < planes; i++) { + if (internal->cu_sem[i]) + CHECK_CU(cu->cuDestroyExternalSemaphore(internal->cu_sem[i])); if (internal->cu_mma[i]) CHECK_CU(cu->cuMipmappedArrayDestroy(internal->cu_mma[i])); if (internal->ext_mem[i]) @@ -1145,17 +1589,18 @@ static void vulkan_frame_free(void *opaque, uint8_t *data) AVVkFrame *f = (AVVkFrame *)data; AVHWFramesContext *hwfc = opaque; AVVulkanDeviceContext *hwctx = hwfc->device_ctx->hwctx; + VulkanDevicePriv *p = hwfc->device_ctx->internal->priv; + VulkanFunctions *vk = &p->vkfn; int planes = av_pix_fmt_count_planes(hwfc->sw_format); vulkan_free_internal(f->internal); for (int i = 0; i < planes; i++) { - vkDestroyImage(hwctx->act_dev, f->img[i], hwctx->alloc); - vkFreeMemory(hwctx->act_dev, f->mem[i], hwctx->alloc); + vk->DestroyImage(hwctx->act_dev, f->img[i], hwctx->alloc); + vk->FreeMemory(hwctx->act_dev, f->mem[i], hwctx->alloc); + vk->DestroySemaphore(hwctx->act_dev, f->sem[i], hwctx->alloc); } - vkDestroySemaphore(hwctx->act_dev, f->sem, hwctx->alloc); - av_free(f); } @@ -1165,6 +1610,8 @@ static int alloc_bind_mem(AVHWFramesContext *hwfc, AVVkFrame *f, int err; VkResult ret; AVHWDeviceContext *ctx = hwfc->device_ctx; + VulkanDevicePriv *p = ctx->internal->priv; + VulkanFunctions *vk = &p->vkfn; const int planes = av_pix_fmt_count_planes(hwfc->sw_format); VkBindImageMemoryInfo bind_info[AV_NUM_DATA_POINTERS] = { { 0 } }; @@ -1188,7 +1635,11 @@ static int alloc_bind_mem(AVHWFramesContext *hwfc, AVVkFrame *f, .pNext = &ded_req, }; - vkGetImageMemoryRequirements2(hwctx->act_dev, &req_desc, &req); + vk->GetImageMemoryRequirements2(hwctx->act_dev, &req_desc, &req); + + if (f->tiling == VK_IMAGE_TILING_LINEAR) + req.memoryRequirements.size = FFALIGN(req.memoryRequirements.size, + p->props.properties.limits.minMemoryMapAlignment); /* In case the implementation prefers/requires dedicated allocation */ use_ded_mem = ded_req.prefersDedicatedAllocation | @@ -1212,7 +1663,7 @@ static int alloc_bind_mem(AVHWFramesContext *hwfc, AVVkFrame *f, } /* Bind the allocated memory to the images */ - ret = vkBindImageMemory2(hwctx->act_dev, planes, bind_info); + ret = vk->BindImageMemory2(hwctx->act_dev, planes, bind_info); if (ret != VK_SUCCESS) { av_log(ctx, AV_LOG_ERROR, "Failed to bind memory: %s\n", vk_ret2str(ret)); @@ -1225,48 +1676,55 @@ static int alloc_bind_mem(AVHWFramesContext *hwfc, AVVkFrame *f, enum PrepMode { PREP_MODE_WRITE, PREP_MODE_RO_SHADER, + PREP_MODE_EXTERNAL_EXPORT, }; static int prepare_frame(AVHWFramesContext *hwfc, VulkanExecCtx *ectx, AVVkFrame *frame, enum PrepMode pmode) { - VkResult ret; + int err; + uint32_t dst_qf; VkImageLayout new_layout; VkAccessFlags new_access; - AVHWDeviceContext *ctx = hwfc->device_ctx; - AVVulkanDeviceContext *hwctx = ctx->hwctx; const int planes = av_pix_fmt_count_planes(hwfc->sw_format); + VulkanDevicePriv *p = hwfc->device_ctx->internal->priv; + VulkanFunctions *vk = &p->vkfn; VkImageMemoryBarrier img_bar[AV_NUM_DATA_POINTERS] = { 0 }; - VkCommandBufferBeginInfo cmd_start = { - .sType = VK_STRUCTURE_TYPE_COMMAND_BUFFER_BEGIN_INFO, - .flags = VK_COMMAND_BUFFER_USAGE_ONE_TIME_SUBMIT_BIT, - }; - VkSubmitInfo s_info = { .sType = VK_STRUCTURE_TYPE_SUBMIT_INFO, - .commandBufferCount = 1, - .pCommandBuffers = &ectx->buf, - - .pSignalSemaphores = &frame->sem, - .signalSemaphoreCount = 1, + .pSignalSemaphores = frame->sem, + .signalSemaphoreCount = planes, }; + VkPipelineStageFlagBits wait_st[AV_NUM_DATA_POINTERS]; + for (int i = 0; i < planes; i++) + wait_st[i] = VK_PIPELINE_STAGE_TOP_OF_PIPE_BIT; + switch (pmode) { case PREP_MODE_WRITE: new_layout = VK_IMAGE_LAYOUT_TRANSFER_DST_OPTIMAL; new_access = VK_ACCESS_TRANSFER_WRITE_BIT; + dst_qf = VK_QUEUE_FAMILY_IGNORED; break; case PREP_MODE_RO_SHADER: new_layout = VK_IMAGE_LAYOUT_TRANSFER_SRC_OPTIMAL; new_access = VK_ACCESS_TRANSFER_READ_BIT; + dst_qf = VK_QUEUE_FAMILY_IGNORED; + break; + case PREP_MODE_EXTERNAL_EXPORT: + new_layout = VK_IMAGE_LAYOUT_GENERAL; + new_access = VK_ACCESS_MEMORY_READ_BIT | VK_ACCESS_MEMORY_WRITE_BIT; + dst_qf = VK_QUEUE_FAMILY_EXTERNAL_KHR; + s_info.pWaitSemaphores = frame->sem; + s_info.pWaitDstStageMask = wait_st; + s_info.waitSemaphoreCount = planes; break; } - ret = vkBeginCommandBuffer(ectx->buf, &cmd_start); - if (ret != VK_SUCCESS) - return AVERROR_EXTERNAL; + if ((err = wait_start_exec_ctx(hwfc, ectx))) + return err; /* Change the image layout to something more optimal for writes. * This also signals the newly created semaphore, making it usable @@ -1278,7 +1736,7 @@ static int prepare_frame(AVHWFramesContext *hwfc, VulkanExecCtx *ectx, img_bar[i].oldLayout = frame->layout[i]; img_bar[i].newLayout = new_layout; img_bar[i].srcQueueFamilyIndex = VK_QUEUE_FAMILY_IGNORED; - img_bar[i].dstQueueFamilyIndex = VK_QUEUE_FAMILY_IGNORED; + img_bar[i].dstQueueFamilyIndex = dst_qf; img_bar[i].image = frame->img[i]; img_bar[i].subresourceRange.levelCount = 1; img_bar[i].subresourceRange.layerCount = 1; @@ -1288,23 +1746,29 @@ static int prepare_frame(AVHWFramesContext *hwfc, VulkanExecCtx *ectx, frame->access[i] = img_bar[i].dstAccessMask; } - vkCmdPipelineBarrier(ectx->buf, VK_PIPELINE_STAGE_TOP_OF_PIPE_BIT, - VK_PIPELINE_STAGE_TRANSFER_BIT, 0, - 0, NULL, 0, NULL, planes, img_bar); + vk->CmdPipelineBarrier(get_buf_exec_ctx(hwfc, ectx), + VK_PIPELINE_STAGE_TOP_OF_PIPE_BIT, + VK_PIPELINE_STAGE_TRANSFER_BIT, + 0, 0, NULL, 0, NULL, planes, img_bar); - ret = vkEndCommandBuffer(ectx->buf); - if (ret != VK_SUCCESS) - return AVERROR_EXTERNAL; + return submit_exec_ctx(hwfc, ectx, &s_info, 0); +} - ret = vkQueueSubmit(ectx->queue, 1, &s_info, ectx->fence); - if (ret != VK_SUCCESS) { - return AVERROR_EXTERNAL; - } else { - vkWaitForFences(hwctx->act_dev, 1, &ectx->fence, VK_TRUE, UINT64_MAX); - vkResetFences(hwctx->act_dev, 1, &ectx->fence); +static inline void get_plane_wh(int *w, int *h, enum AVPixelFormat format, + int frame_w, int frame_h, int plane) +{ + const AVPixFmtDescriptor *desc = av_pix_fmt_desc_get(format); + + /* Currently always true unless gray + alpha support is added */ + if (!plane || (plane == 3) || desc->flags & AV_PIX_FMT_FLAG_RGB || + !(desc->flags & AV_PIX_FMT_FLAG_PLANAR)) { + *w = frame_w; + *h = frame_h; + return; } - return 0; + *w = AV_CEIL_RSHIFT(frame_w, desc->log2_chroma_w); + *h = AV_CEIL_RSHIFT(frame_h, desc->log2_chroma_h); } static int create_frame(AVHWFramesContext *hwfc, AVVkFrame **frame, @@ -1315,6 +1779,7 @@ static int create_frame(AVHWFramesContext *hwfc, AVVkFrame **frame, VkResult ret; AVHWDeviceContext *ctx = hwfc->device_ctx; VulkanDevicePriv *p = ctx->internal->priv; + VulkanFunctions *vk = &p->vkfn; AVVulkanDeviceContext *hwctx = ctx->hwctx; enum AVPixelFormat format = hwfc->sw_format; const VkFormat *img_fmts = av_vkfmt_from_pixfmt(format); @@ -1338,32 +1803,30 @@ static int create_frame(AVHWFramesContext *hwfc, AVVkFrame **frame, /* Create the images */ for (int i = 0; i < planes; i++) { - const AVPixFmtDescriptor *desc = av_pix_fmt_desc_get(format); - int w = hwfc->width; - int h = hwfc->height; - const int p_w = i > 0 ? AV_CEIL_RSHIFT(w, desc->log2_chroma_w) : w; - const int p_h = i > 0 ? AV_CEIL_RSHIFT(h, desc->log2_chroma_h) : h; - - VkImageCreateInfo image_create_info = { - .sType = VK_STRUCTURE_TYPE_IMAGE_CREATE_INFO, - .pNext = create_pnext, - .imageType = VK_IMAGE_TYPE_2D, - .format = img_fmts[i], - .extent.width = p_w, - .extent.height = p_h, - .extent.depth = 1, - .mipLevels = 1, - .arrayLayers = 1, - .flags = VK_IMAGE_CREATE_ALIAS_BIT, - .tiling = tiling, - .initialLayout = VK_IMAGE_LAYOUT_UNDEFINED, - .usage = usage, - .sharingMode = VK_SHARING_MODE_EXCLUSIVE, - .samples = VK_SAMPLE_COUNT_1_BIT, + VkImageCreateInfo create_info = { + .sType = VK_STRUCTURE_TYPE_IMAGE_CREATE_INFO, + .pNext = create_pnext, + .imageType = VK_IMAGE_TYPE_2D, + .format = img_fmts[i], + .extent.depth = 1, + .mipLevels = 1, + .arrayLayers = 1, + .flags = VK_IMAGE_CREATE_ALIAS_BIT, + .tiling = tiling, + .initialLayout = VK_IMAGE_LAYOUT_UNDEFINED, + .usage = usage, + .samples = VK_SAMPLE_COUNT_1_BIT, + .pQueueFamilyIndices = p->qfs, + .queueFamilyIndexCount = p->num_qfs, + .sharingMode = p->num_qfs > 1 ? VK_SHARING_MODE_CONCURRENT : + VK_SHARING_MODE_EXCLUSIVE, }; - ret = vkCreateImage(hwctx->act_dev, &image_create_info, - hwctx->alloc, &f->img[i]); + get_plane_wh(&create_info.extent.width, &create_info.extent.height, + format, hwfc->width, hwfc->height, i); + + ret = vk->CreateImage(hwctx->act_dev, &create_info, + hwctx->alloc, &f->img[i]); if (ret != VK_SUCCESS) { av_log(ctx, AV_LOG_ERROR, "Image creation failure: %s\n", vk_ret2str(ret)); @@ -1371,17 +1834,17 @@ static int create_frame(AVHWFramesContext *hwfc, AVVkFrame **frame, goto fail; } - f->layout[i] = image_create_info.initialLayout; - f->access[i] = 0x0; - } + /* Create semaphore */ + ret = vk->CreateSemaphore(hwctx->act_dev, &sem_spawn, + hwctx->alloc, &f->sem[i]); + if (ret != VK_SUCCESS) { + av_log(hwctx, AV_LOG_ERROR, "Failed to create semaphore: %s\n", + vk_ret2str(ret)); + return AVERROR_EXTERNAL; + } - /* Create semaphore */ - ret = vkCreateSemaphore(hwctx->act_dev, &sem_spawn, - hwctx->alloc, &f->sem); - if (ret != VK_SUCCESS) { - av_log(hwctx, AV_LOG_ERROR, "Failed to create semaphore: %s\n", - vk_ret2str(ret)); - return AVERROR_EXTERNAL; + f->layout[i] = create_info.initialLayout; + f->access[i] = 0x0; } f->flags = 0x0; @@ -1404,6 +1867,8 @@ static void try_export_flags(AVHWFramesContext *hwfc, VkResult ret; AVVulkanFramesContext *hwctx = hwfc->hwctx; AVVulkanDeviceContext *dev_hwctx = hwfc->device_ctx->hwctx; + VulkanDevicePriv *p = hwfc->device_ctx->internal->priv; + VulkanFunctions *vk = &p->vkfn; VkExternalImageFormatProperties eprops = { .sType = VK_STRUCTURE_TYPE_EXTERNAL_IMAGE_FORMAT_PROPERTIES_KHR, }; @@ -1425,15 +1890,15 @@ static void try_export_flags(AVHWFramesContext *hwfc, .flags = VK_IMAGE_CREATE_ALIAS_BIT, }; - ret = vkGetPhysicalDeviceImageFormatProperties2(dev_hwctx->phys_dev, - &pinfo, &props); + ret = vk->GetPhysicalDeviceImageFormatProperties2(dev_hwctx->phys_dev, + &pinfo, &props); if (ret == VK_SUCCESS) { *iexp |= exp; *comp_handle_types |= eprops.externalMemoryProperties.compatibleHandleTypes; } } -static AVBufferRef *vulkan_pool_alloc(void *opaque, int size) +static AVBufferRef *vulkan_pool_alloc(void *opaque, size_t size) { int err; AVVkFrame *f; @@ -1441,6 +1906,7 @@ static AVBufferRef *vulkan_pool_alloc(void *opaque, int size) AVHWFramesContext *hwfc = opaque; AVVulkanFramesContext *hwctx = hwfc->hwctx; VulkanDevicePriv *p = hwfc->device_ctx->internal->priv; + VulkanFramesPriv *fp = hwfc->internal->priv; VkExportMemoryAllocateInfo eminfo[AV_NUM_DATA_POINTERS]; VkExternalMemoryHandleTypeFlags e = 0x0; @@ -1472,7 +1938,7 @@ static AVBufferRef *vulkan_pool_alloc(void *opaque, int size) if (err) goto fail; - err = prepare_frame(hwfc, &p->cmd, f, PREP_MODE_WRITE); + err = prepare_frame(hwfc, &fp->conv_ctx, f, PREP_MODE_WRITE); if (err) goto fail; @@ -1492,7 +1958,9 @@ static void vulkan_frames_uninit(AVHWFramesContext *hwfc) { VulkanFramesPriv *fp = hwfc->internal->priv; - free_exec_ctx(hwfc->device_ctx, &fp->cmd); + free_exec_ctx(hwfc, &fp->conv_ctx); + free_exec_ctx(hwfc, &fp->upload_ctx); + free_exec_ctx(hwfc, &fp->download_ctx); } static int vulkan_frames_init(AVHWFramesContext *hwfc) @@ -1504,36 +1972,46 @@ static int vulkan_frames_init(AVHWFramesContext *hwfc) AVVulkanDeviceContext *dev_hwctx = hwfc->device_ctx->hwctx; VulkanDevicePriv *p = hwfc->device_ctx->internal->priv; - if (hwfc->pool) - return 0; - /* Default pool flags */ hwctx->tiling = hwctx->tiling ? hwctx->tiling : p->use_linear_images ? VK_IMAGE_TILING_LINEAR : VK_IMAGE_TILING_OPTIMAL; - hwctx->usage |= DEFAULT_USAGE_FLAGS; + if (!hwctx->usage) + hwctx->usage = DEFAULT_USAGE_FLAGS; + + err = create_exec_ctx(hwfc, &fp->conv_ctx, + dev_hwctx->queue_family_comp_index, + GET_QUEUE_COUNT(dev_hwctx, 0, 1, 0)); + if (err) + return err; + + err = create_exec_ctx(hwfc, &fp->upload_ctx, + dev_hwctx->queue_family_tx_index, + GET_QUEUE_COUNT(dev_hwctx, 0, 0, 1)); + if (err) + return err; - err = create_exec_ctx(hwfc->device_ctx, &fp->cmd, - dev_hwctx->queue_family_tx_index); + err = create_exec_ctx(hwfc, &fp->download_ctx, + dev_hwctx->queue_family_tx_index, 1); if (err) return err; /* Test to see if allocation will fail */ err = create_frame(hwfc, &f, hwctx->tiling, hwctx->usage, hwctx->create_pnext); - if (err) { - free_exec_ctx(hwfc->device_ctx, &p->cmd); + if (err) return err; - } vulkan_frame_free(hwfc, (uint8_t *)f); - hwfc->internal->pool_internal = av_buffer_pool_init2(sizeof(AVVkFrame), - hwfc, vulkan_pool_alloc, - NULL); - if (!hwfc->internal->pool_internal) { - free_exec_ctx(hwfc->device_ctx, &p->cmd); - return AVERROR(ENOMEM); + /* If user did not specify a pool, hwfc->pool will be set to the internal one + * in hwcontext.c just after this gets called */ + if (!hwfc->pool) { + hwfc->internal->pool_internal = av_buffer_pool_init2(sizeof(AVVkFrame), + hwfc, vulkan_pool_alloc, + NULL); + if (!hwfc->internal->pool_internal) + return AVERROR(ENOMEM); } return 0; @@ -1578,6 +2056,8 @@ static void vulkan_unmap_frame(AVHWFramesContext *hwfc, HWMapDescriptor *hwmap) VulkanMapping *map = hwmap->priv; AVVulkanDeviceContext *hwctx = hwfc->device_ctx->hwctx; const int planes = av_pix_fmt_count_planes(hwfc->sw_format); + VulkanDevicePriv *p = hwfc->device_ctx->internal->priv; + VulkanFunctions *vk = &p->vkfn; /* Check if buffer needs flushing */ if ((map->flags & AV_HWFRAME_MAP_WRITE) && @@ -1591,8 +2071,8 @@ static void vulkan_unmap_frame(AVHWFramesContext *hwfc, HWMapDescriptor *hwmap) flush_ranges[i].size = VK_WHOLE_SIZE; } - ret = vkFlushMappedMemoryRanges(hwctx->act_dev, planes, - flush_ranges); + ret = vk->FlushMappedMemoryRanges(hwctx->act_dev, planes, + flush_ranges); if (ret != VK_SUCCESS) { av_log(hwfc, AV_LOG_ERROR, "Failed to flush memory: %s\n", vk_ret2str(ret)); @@ -1600,7 +2080,7 @@ static void vulkan_unmap_frame(AVHWFramesContext *hwfc, HWMapDescriptor *hwmap) } for (int i = 0; i < planes; i++) - vkUnmapMemory(hwctx->act_dev, map->frame->mem[i]); + vk->UnmapMemory(hwctx->act_dev, map->frame->mem[i]); av_free(map); } @@ -1613,6 +2093,8 @@ static int vulkan_map_frame_to_mem(AVHWFramesContext *hwfc, AVFrame *dst, AVVkFrame *f = (AVVkFrame *)src->data[0]; AVVulkanDeviceContext *hwctx = hwfc->device_ctx->hwctx; const int planes = av_pix_fmt_count_planes(hwfc->sw_format); + VulkanDevicePriv *p = hwfc->device_ctx->internal->priv; + VulkanFunctions *vk = &p->vkfn; VulkanMapping *map = av_mallocz(sizeof(VulkanMapping)); if (!map) @@ -1637,8 +2119,8 @@ static int vulkan_map_frame_to_mem(AVHWFramesContext *hwfc, AVFrame *dst, dst->height = src->height; for (int i = 0; i < planes; i++) { - ret = vkMapMemory(hwctx->act_dev, f->mem[i], 0, - VK_WHOLE_SIZE, 0, (void **)&dst->data[i]); + ret = vk->MapMemory(hwctx->act_dev, f->mem[i], 0, + VK_WHOLE_SIZE, 0, (void **)&dst->data[i]); if (ret != VK_SUCCESS) { av_log(hwfc, AV_LOG_ERROR, "Failed to map image memory: %s\n", vk_ret2str(ret)); @@ -1658,8 +2140,8 @@ static int vulkan_map_frame_to_mem(AVHWFramesContext *hwfc, AVFrame *dst, map_mem_ranges[i].memory = f->mem[i]; } - ret = vkInvalidateMappedMemoryRanges(hwctx->act_dev, planes, - map_mem_ranges); + ret = vk->InvalidateMappedMemoryRanges(hwctx->act_dev, planes, + map_mem_ranges); if (ret != VK_SUCCESS) { av_log(hwfc, AV_LOG_ERROR, "Failed to invalidate memory: %s\n", vk_ret2str(ret)); @@ -1673,7 +2155,7 @@ static int vulkan_map_frame_to_mem(AVHWFramesContext *hwfc, AVFrame *dst, .aspectMask = VK_IMAGE_ASPECT_COLOR_BIT, }; VkSubresourceLayout layout; - vkGetImageSubresourceLayout(hwctx->act_dev, f->img[i], &sub, &layout); + vk->GetImageSubresourceLayout(hwctx->act_dev, f->img[i], &sub, &layout); dst->linesize[i] = layout.rowPitch; } @@ -1689,7 +2171,7 @@ static int vulkan_map_frame_to_mem(AVHWFramesContext *hwfc, AVFrame *dst, fail: for (int i = 0; i < mapped_mem_count; i++) - vkUnmapMemory(hwctx->act_dev, f->mem[i]); + vk->UnmapMemory(hwctx->act_dev, f->mem[i]); av_free(map); return err; @@ -1701,14 +2183,15 @@ static void vulkan_unmap_from(AVHWFramesContext *hwfc, HWMapDescriptor *hwmap) VulkanMapping *map = hwmap->priv; AVVulkanDeviceContext *hwctx = hwfc->device_ctx->hwctx; const int planes = av_pix_fmt_count_planes(hwfc->sw_format); + VulkanDevicePriv *p = hwfc->device_ctx->internal->priv; + VulkanFunctions *vk = &p->vkfn; for (int i = 0; i < planes; i++) { - vkDestroyImage(hwctx->act_dev, map->frame->img[i], hwctx->alloc); - vkFreeMemory(hwctx->act_dev, map->frame->mem[i], hwctx->alloc); + vk->DestroyImage(hwctx->act_dev, map->frame->img[i], hwctx->alloc); + vk->FreeMemory(hwctx->act_dev, map->frame->mem[i], hwctx->alloc); + vk->DestroySemaphore(hwctx->act_dev, map->frame->sem[i], hwctx->alloc); } - vkDestroySemaphore(hwctx->act_dev, map->frame->sem, hwctx->alloc); - av_freep(&map->frame); } @@ -1737,7 +2220,7 @@ static inline VkFormat drm_to_vulkan_fmt(uint32_t drm_fourcc) } static int vulkan_map_from_drm_frame_desc(AVHWFramesContext *hwfc, AVVkFrame **frame, - AVDRMFrameDescriptor *desc) + const AVFrame *src) { int err = 0; VkResult ret; @@ -1746,17 +2229,15 @@ static int vulkan_map_from_drm_frame_desc(AVHWFramesContext *hwfc, AVVkFrame **f AVHWDeviceContext *ctx = hwfc->device_ctx; AVVulkanDeviceContext *hwctx = ctx->hwctx; VulkanDevicePriv *p = ctx->internal->priv; - const AVPixFmtDescriptor *fmt_desc = av_pix_fmt_desc_get(hwfc->sw_format); - const int has_modifiers = p->extensions & EXT_DRM_MODIFIER_FLAGS; + VulkanFunctions *vk = &p->vkfn; + VulkanFramesPriv *fp = hwfc->internal->priv; + AVVulkanFramesContext *frames_hwctx = hwfc->hwctx; + const AVDRMFrameDescriptor *desc = (AVDRMFrameDescriptor *)src->data[0]; + const int has_modifiers = !!(p->extensions & EXT_DRM_MODIFIER_FLAGS); VkSubresourceLayout plane_data[AV_NUM_DATA_POINTERS] = { 0 }; VkBindImageMemoryInfo bind_info[AV_NUM_DATA_POINTERS] = { 0 }; VkBindImagePlaneMemoryInfo plane_info[AV_NUM_DATA_POINTERS] = { 0 }; VkExternalMemoryHandleTypeFlagBits htype = VK_EXTERNAL_MEMORY_HANDLE_TYPE_DMA_BUF_BIT_EXT; - VkSemaphoreCreateInfo sem_spawn = { - .sType = VK_STRUCTURE_TYPE_SEMAPHORE_CREATE_INFO, - }; - - VK_LOAD_PFN(hwctx->inst, vkGetMemoryFdPropertiesKHR); for (int i = 0; i < desc->nb_layers; i++) { if (drm_to_vulkan_fmt(desc->layers[i].format) == VK_FORMAT_UNDEFINED) { @@ -1772,49 +2253,12 @@ static int vulkan_map_from_drm_frame_desc(AVHWFramesContext *hwfc, AVVkFrame **f goto fail; } - for (int i = 0; i < desc->nb_objects; i++) { - VkMemoryFdPropertiesKHR fdmp = { - .sType = VK_STRUCTURE_TYPE_MEMORY_FD_PROPERTIES_KHR, - }; - VkMemoryRequirements req = { - .size = desc->objects[i].size, - }; - VkImportMemoryFdInfoKHR idesc = { - .sType = VK_STRUCTURE_TYPE_IMPORT_MEMORY_FD_INFO_KHR, - .handleType = htype, - .fd = dup(desc->objects[i].fd), - }; - - ret = pfn_vkGetMemoryFdPropertiesKHR(hwctx->act_dev, htype, - idesc.fd, &fdmp); - if (ret != VK_SUCCESS) { - av_log(hwfc, AV_LOG_ERROR, "Failed to get FD properties: %s\n", - vk_ret2str(ret)); - err = AVERROR_EXTERNAL; - close(idesc.fd); - goto fail; - } - - req.memoryTypeBits = fdmp.memoryTypeBits; - - err = alloc_mem(ctx, &req, VK_MEMORY_PROPERTY_DEVICE_LOCAL_BIT, - &idesc, &f->flags, &f->mem[i]); - if (err) { - close(idesc.fd); - return err; - } - - f->size[i] = desc->objects[i].size; - } - f->tiling = has_modifiers ? VK_IMAGE_TILING_DRM_FORMAT_MODIFIER_EXT : desc->objects[0].format_modifier == DRM_FORMAT_MOD_LINEAR ? VK_IMAGE_TILING_LINEAR : VK_IMAGE_TILING_OPTIMAL; for (int i = 0; i < desc->nb_layers; i++) { const int planes = desc->layers[i].nb_planes; - const int signal_p = has_modifiers && (planes > 1); - VkImageDrmFormatModifierExplicitCreateInfoEXT drm_info = { .sType = VK_STRUCTURE_TYPE_IMAGE_DRM_FORMAT_MODIFIER_EXPLICIT_CREATE_INFO_EXT, .drmFormatModifier = desc->objects[0].format_modifier, @@ -1828,28 +2272,32 @@ static int vulkan_map_from_drm_frame_desc(AVHWFramesContext *hwfc, AVVkFrame **f .handleTypes = htype, }; - const int p_w = i > 0 ? AV_CEIL_RSHIFT(hwfc->width, fmt_desc->log2_chroma_w) : hwfc->width; - const int p_h = i > 0 ? AV_CEIL_RSHIFT(hwfc->height, fmt_desc->log2_chroma_h) : hwfc->height; - - VkImageCreateInfo image_create_info = { - .sType = VK_STRUCTURE_TYPE_IMAGE_CREATE_INFO, - .pNext = &einfo, - .imageType = VK_IMAGE_TYPE_2D, - .format = drm_to_vulkan_fmt(desc->layers[i].format), - .extent.width = p_w, - .extent.height = p_h, - .extent.depth = 1, - .mipLevels = 1, - .arrayLayers = 1, - .flags = VK_IMAGE_CREATE_ALIAS_BIT | - (signal_p ? VK_IMAGE_CREATE_DISJOINT_BIT : 0x0), - .tiling = f->tiling, - .initialLayout = VK_IMAGE_LAYOUT_UNDEFINED, /* specs say so */ - .usage = DEFAULT_USAGE_FLAGS, - .sharingMode = VK_SHARING_MODE_EXCLUSIVE, - .samples = VK_SAMPLE_COUNT_1_BIT, + VkSemaphoreCreateInfo sem_spawn = { + .sType = VK_STRUCTURE_TYPE_SEMAPHORE_CREATE_INFO, }; + VkImageCreateInfo create_info = { + .sType = VK_STRUCTURE_TYPE_IMAGE_CREATE_INFO, + .pNext = &einfo, + .imageType = VK_IMAGE_TYPE_2D, + .format = drm_to_vulkan_fmt(desc->layers[i].format), + .extent.depth = 1, + .mipLevels = 1, + .arrayLayers = 1, + .flags = VK_IMAGE_CREATE_ALIAS_BIT, + .tiling = f->tiling, + .initialLayout = VK_IMAGE_LAYOUT_UNDEFINED, /* specs say so */ + .usage = frames_hwctx->usage, + .samples = VK_SAMPLE_COUNT_1_BIT, + .pQueueFamilyIndices = p->qfs, + .queueFamilyIndexCount = p->num_qfs, + .sharingMode = p->num_qfs > 1 ? VK_SHARING_MODE_CONCURRENT : + VK_SHARING_MODE_EXCLUSIVE, + }; + + get_plane_wh(&create_info.extent.width, &create_info.extent.height, + hwfc->sw_format, src->width, src->height, i); + for (int j = 0; j < planes; j++) { plane_data[j].offset = desc->layers[i].planes[j].offset; plane_data[j].rowPitch = desc->layers[i].planes[j].pitch; @@ -1859,8 +2307,8 @@ static int vulkan_map_from_drm_frame_desc(AVHWFramesContext *hwfc, AVVkFrame **f } /* Create image */ - ret = vkCreateImage(hwctx->act_dev, &image_create_info, - hwctx->alloc, &f->img[i]); + ret = vk->CreateImage(hwctx->act_dev, &create_info, + hwctx->alloc, &f->img[i]); if (ret != VK_SUCCESS) { av_log(ctx, AV_LOG_ERROR, "Image creation failure: %s\n", vk_ret2str(ret)); @@ -1868,9 +2316,91 @@ static int vulkan_map_from_drm_frame_desc(AVHWFramesContext *hwfc, AVVkFrame **f goto fail; } - f->layout[i] = image_create_info.initialLayout; + ret = vk->CreateSemaphore(hwctx->act_dev, &sem_spawn, + hwctx->alloc, &f->sem[i]); + if (ret != VK_SUCCESS) { + av_log(hwctx, AV_LOG_ERROR, "Failed to create semaphore: %s\n", + vk_ret2str(ret)); + return AVERROR_EXTERNAL; + } + + /* We'd import a semaphore onto the one we created using + * vkImportSemaphoreFdKHR but unfortunately neither DRM nor VAAPI + * offer us anything we could import and sync with, so instead + * just signal the semaphore we created. */ + + f->layout[i] = create_info.initialLayout; f->access[i] = 0x0; + } + for (int i = 0; i < desc->nb_objects; i++) { + int use_ded_mem = 0; + VkMemoryFdPropertiesKHR fdmp = { + .sType = VK_STRUCTURE_TYPE_MEMORY_FD_PROPERTIES_KHR, + }; + VkMemoryRequirements req = { + .size = desc->objects[i].size, + }; + VkImportMemoryFdInfoKHR idesc = { + .sType = VK_STRUCTURE_TYPE_IMPORT_MEMORY_FD_INFO_KHR, + .handleType = htype, + .fd = dup(desc->objects[i].fd), + }; + VkMemoryDedicatedAllocateInfo ded_alloc = { + .sType = VK_STRUCTURE_TYPE_MEMORY_DEDICATED_ALLOCATE_INFO, + .pNext = &idesc, + }; + + ret = vk->GetMemoryFdPropertiesKHR(hwctx->act_dev, htype, + idesc.fd, &fdmp); + if (ret != VK_SUCCESS) { + av_log(hwfc, AV_LOG_ERROR, "Failed to get FD properties: %s\n", + vk_ret2str(ret)); + err = AVERROR_EXTERNAL; + close(idesc.fd); + goto fail; + } + + req.memoryTypeBits = fdmp.memoryTypeBits; + + /* Dedicated allocation only makes sense if there's a one to one mapping + * between images and the memory backing them, so only check in this + * case. */ + if (desc->nb_layers == desc->nb_objects) { + VkImageMemoryRequirementsInfo2 req_desc = { + .sType = VK_STRUCTURE_TYPE_IMAGE_MEMORY_REQUIREMENTS_INFO_2, + .image = f->img[i], + }; + VkMemoryDedicatedRequirements ded_req = { + .sType = VK_STRUCTURE_TYPE_MEMORY_DEDICATED_REQUIREMENTS, + }; + VkMemoryRequirements2 req2 = { + .sType = VK_STRUCTURE_TYPE_MEMORY_REQUIREMENTS_2, + .pNext = &ded_req, + }; + + vk->GetImageMemoryRequirements2(hwctx->act_dev, &req_desc, &req2); + + use_ded_mem = ded_req.prefersDedicatedAllocation | + ded_req.requiresDedicatedAllocation; + if (use_ded_mem) + ded_alloc.image = f->img[i]; + } + + err = alloc_mem(ctx, &req, VK_MEMORY_PROPERTY_DEVICE_LOCAL_BIT, + use_ded_mem ? &ded_alloc : ded_alloc.pNext, + &f->flags, &f->mem[i]); + if (err) { + close(idesc.fd); + return err; + } + + f->size[i] = desc->objects[i].size; + } + + for (int i = 0; i < desc->nb_layers; i++) { + const int planes = desc->layers[i].nb_planes; + const int signal_p = has_modifiers && (planes > 1); for (int j = 0; j < planes; j++) { VkImageAspectFlagBits aspect = j == 0 ? VK_IMAGE_ASPECT_MEMORY_PLANE_0_BIT_EXT : j == 1 ? VK_IMAGE_ASPECT_MEMORY_PLANE_1_BIT_EXT : @@ -1888,21 +2418,8 @@ static int vulkan_map_from_drm_frame_desc(AVHWFramesContext *hwfc, AVVkFrame **f } } - ret = vkCreateSemaphore(hwctx->act_dev, &sem_spawn, - hwctx->alloc, &f->sem); - if (ret != VK_SUCCESS) { - av_log(hwctx, AV_LOG_ERROR, "Failed to create semaphore: %s\n", - vk_ret2str(ret)); - return AVERROR_EXTERNAL; - } - - /* We'd import a semaphore onto the one we created using - * vkImportSemaphoreFdKHR but unfortunately neither DRM nor VAAPI - * offer us anything we could import and sync with, so instead - * just signal the semaphore we created. */ - /* Bind the allocated memory to the images */ - ret = vkBindImageMemory2(hwctx->act_dev, bind_counts, bind_info); + ret = vk->BindImageMemory2(hwctx->act_dev, bind_counts, bind_info); if (ret != VK_SUCCESS) { av_log(ctx, AV_LOG_ERROR, "Failed to bind memory: %s\n", vk_ret2str(ret)); @@ -1912,7 +2429,7 @@ static int vulkan_map_from_drm_frame_desc(AVHWFramesContext *hwfc, AVVkFrame **f /* NOTE: This is completely uneccesary and unneeded once we can import * semaphores from DRM. Otherwise we have to activate the semaphores. * We're reusing the exec context that's also used for uploads/downloads. */ - err = prepare_frame(hwfc, &p->cmd, f, PREP_MODE_RO_SHADER); + err = prepare_frame(hwfc, &fp->conv_ctx, f, PREP_MODE_RO_SHADER); if (err) goto fail; @@ -1921,11 +2438,12 @@ static int vulkan_map_from_drm_frame_desc(AVHWFramesContext *hwfc, AVVkFrame **f return 0; fail: - for (int i = 0; i < desc->nb_layers; i++) - vkDestroyImage(hwctx->act_dev, f->img[i], hwctx->alloc); + for (int i = 0; i < desc->nb_layers; i++) { + vk->DestroyImage(hwctx->act_dev, f->img[i], hwctx->alloc); + vk->DestroySemaphore(hwctx->act_dev, f->sem[i], hwctx->alloc); + } for (int i = 0; i < desc->nb_objects; i++) - vkFreeMemory(hwctx->act_dev, f->mem[i], hwctx->alloc); - vkDestroySemaphore(hwctx->act_dev, f->sem, hwctx->alloc); + vk->FreeMemory(hwctx->act_dev, f->mem[i], hwctx->alloc); av_free(f); @@ -1939,9 +2457,7 @@ static int vulkan_map_from_drm(AVHWFramesContext *hwfc, AVFrame *dst, AVVkFrame *f; VulkanMapping *map = NULL; - err = vulkan_map_from_drm_frame_desc(hwfc, &f, - (AVDRMFrameDescriptor *)src->data[0]); - if (err) + if ((err = vulkan_map_from_drm_frame_desc(hwfc, &f, src))) return err; /* The unmapping function will free this */ @@ -2020,8 +2536,8 @@ static int vulkan_export_to_cuda(AVHWFramesContext *hwfc, AVVulkanDeviceContext *hwctx = ctx->hwctx; const int planes = av_pix_fmt_count_planes(hwfc->sw_format); const AVPixFmtDescriptor *desc = av_pix_fmt_desc_get(hwfc->sw_format); - VK_LOAD_PFN(hwctx->inst, vkGetMemoryFdKHR); - VK_LOAD_PFN(hwctx->inst, vkGetSemaphoreFdKHR); + VulkanDevicePriv *p = ctx->internal->priv; + VulkanFunctions *vk = &p->vkfn; AVHWFramesContext *cuda_fc = (AVHWFramesContext*)cuda_hwfc->data; AVHWDeviceContext *cuda_cu = cuda_fc->device_ctx; @@ -2035,15 +2551,6 @@ static int vulkan_export_to_cuda(AVHWFramesContext *hwfc, dst_int = dst_f->internal; if (!dst_int || !dst_int->cuda_fc_ref) { - VkSemaphoreGetFdInfoKHR sem_export = { - .sType = VK_STRUCTURE_TYPE_SEMAPHORE_GET_FD_INFO_KHR, - .semaphore = dst_f->sem, - .handleType = VK_EXTERNAL_SEMAPHORE_HANDLE_TYPE_OPAQUE_FD_BIT, - }; - CUDA_EXTERNAL_SEMAPHORE_HANDLE_DESC ext_sem_desc = { - .type = CU_EXTERNAL_SEMAPHORE_HANDLE_TYPE_OPAQUE_FD, - }; - if (!dst_f->internal) dst_f->internal = dst_int = av_mallocz(sizeof(*dst_f->internal)); @@ -2062,10 +2569,6 @@ static int vulkan_export_to_cuda(AVHWFramesContext *hwfc, CUDA_EXTERNAL_MEMORY_MIPMAPPED_ARRAY_DESC tex_desc = { .offset = 0, .arrayDesc = { - .Width = i > 0 ? AV_CEIL_RSHIFT(hwfc->width, desc->log2_chroma_w) - : hwfc->width, - .Height = i > 0 ? AV_CEIL_RSHIFT(hwfc->height, desc->log2_chroma_h) - : hwfc->height, .Depth = 0, .Format = cufmt, .NumChannels = 1 + ((planes == 2) && i), @@ -2082,9 +2585,23 @@ static int vulkan_export_to_cuda(AVHWFramesContext *hwfc, .memory = dst_f->mem[i], .handleType = VK_EXTERNAL_MEMORY_HANDLE_TYPE_OPAQUE_FD_BIT_KHR, }; + VkSemaphoreGetFdInfoKHR sem_export = { + .sType = VK_STRUCTURE_TYPE_SEMAPHORE_GET_FD_INFO_KHR, + .semaphore = dst_f->sem[i], + .handleType = VK_EXTERNAL_SEMAPHORE_HANDLE_TYPE_OPAQUE_FD_BIT, + }; + CUDA_EXTERNAL_SEMAPHORE_HANDLE_DESC ext_sem_desc = { + .type = CU_EXTERNAL_SEMAPHORE_HANDLE_TYPE_OPAQUE_FD, + }; - ret = pfn_vkGetMemoryFdKHR(hwctx->act_dev, &export_info, - &ext_desc.handle.fd); + int p_w, p_h; + get_plane_wh(&p_w, &p_h, hwfc->sw_format, hwfc->width, hwfc->height, i); + + tex_desc.arrayDesc.Width = p_w; + tex_desc.arrayDesc.Height = p_h; + + ret = vk->GetMemoryFdKHR(hwctx->act_dev, &export_info, + &ext_desc.handle.fd); if (ret != VK_SUCCESS) { av_log(hwfc, AV_LOG_ERROR, "Unable to export the image as a FD!\n"); err = AVERROR_EXTERNAL; @@ -2111,22 +2628,22 @@ static int vulkan_export_to_cuda(AVHWFramesContext *hwfc, err = AVERROR_EXTERNAL; goto fail; } - } - ret = pfn_vkGetSemaphoreFdKHR(hwctx->act_dev, &sem_export, - &ext_sem_desc.handle.fd); - if (ret != VK_SUCCESS) { - av_log(ctx, AV_LOG_ERROR, "Failed to export semaphore: %s\n", - vk_ret2str(ret)); - err = AVERROR_EXTERNAL; - goto fail; - } + ret = vk->GetSemaphoreFdKHR(hwctx->act_dev, &sem_export, + &ext_sem_desc.handle.fd); + if (ret != VK_SUCCESS) { + av_log(ctx, AV_LOG_ERROR, "Failed to export semaphore: %s\n", + vk_ret2str(ret)); + err = AVERROR_EXTERNAL; + goto fail; + } - ret = CHECK_CU(cu->cuImportExternalSemaphore(&dst_int->cu_sem, - &ext_sem_desc)); - if (ret < 0) { - err = AVERROR_EXTERNAL; - goto fail; + ret = CHECK_CU(cu->cuImportExternalSemaphore(&dst_int->cu_sem[i], + &ext_sem_desc)); + if (ret < 0) { + err = AVERROR_EXTERNAL; + goto fail; + } } } @@ -2152,25 +2669,25 @@ static int vulkan_transfer_data_from_cuda(AVHWFramesContext *hwfc, AVCUDADeviceContext *cuda_dev = cuda_cu->hwctx; AVCUDADeviceContextInternal *cu_internal = cuda_dev->internal; CudaFunctions *cu = cu_internal->cuda_dl; - CUDA_EXTERNAL_SEMAPHORE_WAIT_PARAMS s_w_par = { 0 }; - CUDA_EXTERNAL_SEMAPHORE_SIGNAL_PARAMS s_s_par = { 0 }; + CUDA_EXTERNAL_SEMAPHORE_WAIT_PARAMS s_w_par[AV_NUM_DATA_POINTERS] = { 0 }; + CUDA_EXTERNAL_SEMAPHORE_SIGNAL_PARAMS s_s_par[AV_NUM_DATA_POINTERS] = { 0 }; ret = CHECK_CU(cu->cuCtxPushCurrent(cuda_dev->cuda_ctx)); - if (ret < 0) { - err = AVERROR_EXTERNAL; - goto fail; - } + if (ret < 0) + return AVERROR_EXTERNAL; dst_f = (AVVkFrame *)dst->data[0]; ret = vulkan_export_to_cuda(hwfc, src->hw_frames_ctx, dst); if (ret < 0) { - goto fail; + CHECK_CU(cu->cuCtxPopCurrent(&dummy)); + return ret; } + dst_int = dst_f->internal; - ret = CHECK_CU(cu->cuWaitExternalSemaphoresAsync(&dst_int->cu_sem, &s_w_par, - 1, cuda_dev->stream)); + ret = CHECK_CU(cu->cuWaitExternalSemaphoresAsync(dst_int->cu_sem, s_w_par, + planes, cuda_dev->stream)); if (ret < 0) { err = AVERROR_EXTERNAL; goto fail; @@ -2185,12 +2702,14 @@ static int vulkan_transfer_data_from_cuda(AVHWFramesContext *hwfc, .dstMemoryType = CU_MEMORYTYPE_ARRAY, .dstArray = dst_int->cu_array[i], - .WidthInBytes = (i > 0 ? AV_CEIL_RSHIFT(hwfc->width, desc->log2_chroma_w) - : hwfc->width) * desc->comp[i].step, - .Height = i > 0 ? AV_CEIL_RSHIFT(hwfc->height, desc->log2_chroma_h) - : hwfc->height, }; + int p_w, p_h; + get_plane_wh(&p_w, &p_h, hwfc->sw_format, hwfc->width, hwfc->height, i); + + cpy.WidthInBytes = p_w * desc->comp[i].step; + cpy.Height = p_h; + ret = CHECK_CU(cu->cuMemcpy2DAsync(&cpy, cuda_dev->stream)); if (ret < 0) { err = AVERROR_EXTERNAL; @@ -2198,8 +2717,8 @@ static int vulkan_transfer_data_from_cuda(AVHWFramesContext *hwfc, } } - ret = CHECK_CU(cu->cuSignalExternalSemaphoresAsync(&dst_int->cu_sem, &s_s_par, - 1, cuda_dev->stream)); + ret = CHECK_CU(cu->cuSignalExternalSemaphoresAsync(dst_int->cu_sem, s_s_par, + planes, cuda_dev->stream)); if (ret < 0) { err = AVERROR_EXTERNAL; goto fail; @@ -2272,9 +2791,10 @@ static int vulkan_map_to_drm(AVHWFramesContext *hwfc, AVFrame *dst, VkResult ret; AVVkFrame *f = (AVVkFrame *)src->data[0]; VulkanDevicePriv *p = hwfc->device_ctx->internal->priv; + VulkanFunctions *vk = &p->vkfn; + VulkanFramesPriv *fp = hwfc->internal->priv; AVVulkanDeviceContext *hwctx = hwfc->device_ctx->hwctx; const int planes = av_pix_fmt_count_planes(hwfc->sw_format); - VK_LOAD_PFN(hwctx->inst, vkGetMemoryFdKHR); VkImageDrmFormatModifierPropertiesEXT drm_mod = { .sType = VK_STRUCTURE_TYPE_IMAGE_DRM_FORMAT_MODIFIER_PROPERTIES_EXT, }; @@ -2283,14 +2803,17 @@ static int vulkan_map_to_drm(AVHWFramesContext *hwfc, AVFrame *dst, if (!drm_desc) return AVERROR(ENOMEM); + err = prepare_frame(hwfc, &fp->conv_ctx, f, PREP_MODE_EXTERNAL_EXPORT); + if (err < 0) + goto end; + err = ff_hwframe_map_create(src->hw_frames_ctx, dst, src, &vulkan_unmap_to_drm, drm_desc); if (err < 0) goto end; if (p->extensions & EXT_DRM_MODIFIER_FLAGS) { - VK_LOAD_PFN(hwctx->inst, vkGetImageDrmFormatModifierPropertiesEXT); - ret = pfn_vkGetImageDrmFormatModifierPropertiesEXT(hwctx->act_dev, f->img[0], - &drm_mod); + ret = vk->GetImageDrmFormatModifierPropertiesEXT(hwctx->act_dev, f->img[0], + &drm_mod); if (ret != VK_SUCCESS) { av_log(hwfc, AV_LOG_ERROR, "Failed to retrieve DRM format modifier!\n"); err = AVERROR_EXTERNAL; @@ -2305,8 +2828,8 @@ static int vulkan_map_to_drm(AVHWFramesContext *hwfc, AVFrame *dst, .handleType = VK_EXTERNAL_MEMORY_HANDLE_TYPE_DMA_BUF_BIT_EXT, }; - ret = pfn_vkGetMemoryFdKHR(hwctx->act_dev, &export_info, - &drm_desc->objects[i].fd); + ret = vk->GetMemoryFdKHR(hwctx->act_dev, &export_info, + &drm_desc->objects[i].fd); if (ret != VK_SUCCESS) { av_log(hwfc, AV_LOG_ERROR, "Unable to export the image as a FD!\n"); err = AVERROR_EXTERNAL; @@ -2339,10 +2862,10 @@ static int vulkan_map_to_drm(AVHWFramesContext *hwfc, AVFrame *dst, drm_desc->layers[i].planes[0].object_index = FFMIN(i, drm_desc->nb_objects - 1); - if (f->tiling != VK_IMAGE_TILING_OPTIMAL) + if (f->tiling == VK_IMAGE_TILING_OPTIMAL) continue; - vkGetImageSubresourceLayout(hwctx->act_dev, f->img[i], &sub, &layout); + vk->GetImageSubresourceLayout(hwctx->act_dev, f->img[i], &sub, &layout); drm_desc->layers[i].planes[0].offset = layout.offset; drm_desc->layers[i].planes[0].pitch = layout.rowPitch; } @@ -2413,74 +2936,142 @@ typedef struct ImageBuffer { VkBuffer buf; VkDeviceMemory mem; VkMemoryPropertyFlagBits flags; + int mapped_mem; } ImageBuffer; -static void free_buf(AVHWDeviceContext *ctx, ImageBuffer *buf) +static void free_buf(void *opaque, uint8_t *data) { + AVHWDeviceContext *ctx = opaque; AVVulkanDeviceContext *hwctx = ctx->hwctx; - if (!buf) - return; + VulkanDevicePriv *p = ctx->internal->priv; + VulkanFunctions *vk = &p->vkfn; + ImageBuffer *vkbuf = (ImageBuffer *)data; + + if (vkbuf->buf) + vk->DestroyBuffer(hwctx->act_dev, vkbuf->buf, hwctx->alloc); + if (vkbuf->mem) + vk->FreeMemory(hwctx->act_dev, vkbuf->mem, hwctx->alloc); - vkDestroyBuffer(hwctx->act_dev, buf->buf, hwctx->alloc); - vkFreeMemory(hwctx->act_dev, buf->mem, hwctx->alloc); + av_free(data); } -static int create_buf(AVHWDeviceContext *ctx, ImageBuffer *buf, int height, - int *stride, VkBufferUsageFlags usage, - VkMemoryPropertyFlagBits flags, void *create_pnext, - void *alloc_pnext) +static size_t get_req_buffer_size(VulkanDevicePriv *p, int *stride, int height) +{ + size_t size; + *stride = FFALIGN(*stride, p->props.properties.limits.optimalBufferCopyRowPitchAlignment); + size = height*(*stride); + size = FFALIGN(size, p->props.properties.limits.minMemoryMapAlignment); + return size; +} + +static int create_buf(AVHWDeviceContext *ctx, AVBufferRef **buf, + VkBufferUsageFlags usage, VkMemoryPropertyFlagBits flags, + size_t size, uint32_t req_memory_bits, int host_mapped, + void *create_pnext, void *alloc_pnext) { int err; VkResult ret; - VkMemoryRequirements req; + int use_ded_mem; AVVulkanDeviceContext *hwctx = ctx->hwctx; VulkanDevicePriv *p = ctx->internal->priv; + VulkanFunctions *vk = &p->vkfn; VkBufferCreateInfo buf_spawn = { .sType = VK_STRUCTURE_TYPE_BUFFER_CREATE_INFO, .pNext = create_pnext, .usage = usage, + .size = size, .sharingMode = VK_SHARING_MODE_EXCLUSIVE, }; - *stride = FFALIGN(*stride, p->props.limits.optimalBufferCopyRowPitchAlignment); - buf_spawn.size = height*(*stride); + VkBufferMemoryRequirementsInfo2 req_desc = { + .sType = VK_STRUCTURE_TYPE_BUFFER_MEMORY_REQUIREMENTS_INFO_2, + }; + VkMemoryDedicatedAllocateInfo ded_alloc = { + .sType = VK_STRUCTURE_TYPE_MEMORY_DEDICATED_ALLOCATE_INFO, + .pNext = alloc_pnext, + }; + VkMemoryDedicatedRequirements ded_req = { + .sType = VK_STRUCTURE_TYPE_MEMORY_DEDICATED_REQUIREMENTS, + }; + VkMemoryRequirements2 req = { + .sType = VK_STRUCTURE_TYPE_MEMORY_REQUIREMENTS_2, + .pNext = &ded_req, + }; + + ImageBuffer *vkbuf = av_mallocz(sizeof(*vkbuf)); + if (!vkbuf) + return AVERROR(ENOMEM); + + vkbuf->mapped_mem = host_mapped; - ret = vkCreateBuffer(hwctx->act_dev, &buf_spawn, NULL, &buf->buf); + ret = vk->CreateBuffer(hwctx->act_dev, &buf_spawn, NULL, &vkbuf->buf); if (ret != VK_SUCCESS) { av_log(ctx, AV_LOG_ERROR, "Failed to create buffer: %s\n", vk_ret2str(ret)); - return AVERROR_EXTERNAL; + err = AVERROR_EXTERNAL; + goto fail; } - vkGetBufferMemoryRequirements(hwctx->act_dev, buf->buf, &req); + req_desc.buffer = vkbuf->buf; + + vk->GetBufferMemoryRequirements2(hwctx->act_dev, &req_desc, &req); + + /* In case the implementation prefers/requires dedicated allocation */ + use_ded_mem = ded_req.prefersDedicatedAllocation | + ded_req.requiresDedicatedAllocation; + if (use_ded_mem) + ded_alloc.buffer = vkbuf->buf; - err = alloc_mem(ctx, &req, flags, alloc_pnext, &buf->flags, &buf->mem); + /* Additional requirements imposed on us */ + if (req_memory_bits) + req.memoryRequirements.memoryTypeBits &= req_memory_bits; + + err = alloc_mem(ctx, &req.memoryRequirements, flags, + use_ded_mem ? &ded_alloc : (void *)ded_alloc.pNext, + &vkbuf->flags, &vkbuf->mem); if (err) - return err; + goto fail; - ret = vkBindBufferMemory(hwctx->act_dev, buf->buf, buf->mem, 0); + ret = vk->BindBufferMemory(hwctx->act_dev, vkbuf->buf, vkbuf->mem, 0); if (ret != VK_SUCCESS) { av_log(ctx, AV_LOG_ERROR, "Failed to bind memory to buffer: %s\n", vk_ret2str(ret)); - free_buf(ctx, buf); - return AVERROR_EXTERNAL; + err = AVERROR_EXTERNAL; + goto fail; + } + + *buf = av_buffer_create((uint8_t *)vkbuf, sizeof(*vkbuf), free_buf, ctx, 0); + if (!(*buf)) { + err = AVERROR(ENOMEM); + goto fail; } return 0; + +fail: + free_buf(ctx, (uint8_t *)vkbuf); + return err; } -static int map_buffers(AVHWDeviceContext *ctx, ImageBuffer *buf, uint8_t *mem[], +/* Skips mapping of host mapped buffers but still invalidates them */ +static int map_buffers(AVHWDeviceContext *ctx, AVBufferRef **bufs, uint8_t *mem[], int nb_buffers, int invalidate) { VkResult ret; AVVulkanDeviceContext *hwctx = ctx->hwctx; + VulkanDevicePriv *p = ctx->internal->priv; + VulkanFunctions *vk = &p->vkfn; VkMappedMemoryRange invalidate_ctx[AV_NUM_DATA_POINTERS]; int invalidate_count = 0; for (int i = 0; i < nb_buffers; i++) { - ret = vkMapMemory(hwctx->act_dev, buf[i].mem, 0, - VK_WHOLE_SIZE, 0, (void **)&mem[i]); + ImageBuffer *vkbuf = (ImageBuffer *)bufs[i]->data; + if (vkbuf->mapped_mem) + continue; + + ret = vk->MapMemory(hwctx->act_dev, vkbuf->mem, 0, + VK_WHOLE_SIZE, 0, (void **)&mem[i]); if (ret != VK_SUCCESS) { av_log(ctx, AV_LOG_ERROR, "Failed to map buffer memory: %s\n", vk_ret2str(ret)); @@ -2492,19 +3083,28 @@ static int map_buffers(AVHWDeviceContext *ctx, ImageBuffer *buf, uint8_t *mem[], return 0; for (int i = 0; i < nb_buffers; i++) { + ImageBuffer *vkbuf = (ImageBuffer *)bufs[i]->data; const VkMappedMemoryRange ival_buf = { .sType = VK_STRUCTURE_TYPE_MAPPED_MEMORY_RANGE, - .memory = buf[i].mem, + .memory = vkbuf->mem, .size = VK_WHOLE_SIZE, }; - if (buf[i].flags & VK_MEMORY_PROPERTY_HOST_COHERENT_BIT) + + /* For host imported memory Vulkan says to use platform-defined + * sync methods, but doesn't really say not to call flush or invalidate + * on original host pointers. It does explicitly allow to do that on + * host-mapped pointers which are then mapped again using vkMapMemory, + * but known implementations return the original pointers when mapped + * again. */ + if (vkbuf->flags & VK_MEMORY_PROPERTY_HOST_COHERENT_BIT) continue; + invalidate_ctx[invalidate_count++] = ival_buf; } if (invalidate_count) { - ret = vkInvalidateMappedMemoryRanges(hwctx->act_dev, invalidate_count, - invalidate_ctx); + ret = vk->InvalidateMappedMemoryRanges(hwctx->act_dev, invalidate_count, + invalidate_ctx); if (ret != VK_SUCCESS) av_log(ctx, AV_LOG_WARNING, "Failed to invalidate memory: %s\n", vk_ret2str(ret)); @@ -2513,30 +3113,35 @@ static int map_buffers(AVHWDeviceContext *ctx, ImageBuffer *buf, uint8_t *mem[], return 0; } -static int unmap_buffers(AVHWDeviceContext *ctx, ImageBuffer *buf, +static int unmap_buffers(AVHWDeviceContext *ctx, AVBufferRef **bufs, int nb_buffers, int flush) { int err = 0; VkResult ret; AVVulkanDeviceContext *hwctx = ctx->hwctx; + VulkanDevicePriv *p = ctx->internal->priv; + VulkanFunctions *vk = &p->vkfn; VkMappedMemoryRange flush_ctx[AV_NUM_DATA_POINTERS]; int flush_count = 0; if (flush) { for (int i = 0; i < nb_buffers; i++) { + ImageBuffer *vkbuf = (ImageBuffer *)bufs[i]->data; const VkMappedMemoryRange flush_buf = { .sType = VK_STRUCTURE_TYPE_MAPPED_MEMORY_RANGE, - .memory = buf[i].mem, + .memory = vkbuf->mem, .size = VK_WHOLE_SIZE, }; - if (buf[i].flags & VK_MEMORY_PROPERTY_HOST_COHERENT_BIT) + + if (vkbuf->flags & VK_MEMORY_PROPERTY_HOST_COHERENT_BIT) continue; + flush_ctx[flush_count++] = flush_buf; } } if (flush_count) { - ret = vkFlushMappedMemoryRanges(hwctx->act_dev, flush_count, flush_ctx); + ret = vk->FlushMappedMemoryRanges(hwctx->act_dev, flush_count, flush_ctx); if (ret != VK_SUCCESS) { av_log(ctx, AV_LOG_ERROR, "Failed to flush memory: %s\n", vk_ret2str(ret)); @@ -2544,19 +3149,27 @@ static int unmap_buffers(AVHWDeviceContext *ctx, ImageBuffer *buf, } } - for (int i = 0; i < nb_buffers; i++) - vkUnmapMemory(hwctx->act_dev, buf[i].mem); + for (int i = 0; i < nb_buffers; i++) { + ImageBuffer *vkbuf = (ImageBuffer *)bufs[i]->data; + if (vkbuf->mapped_mem) + continue; + + vk->UnmapMemory(hwctx->act_dev, vkbuf->mem); + } return err; } -static int transfer_image_buf(AVHWDeviceContext *ctx, AVVkFrame *frame, - ImageBuffer *buffer, const int *buf_stride, int w, +static int transfer_image_buf(AVHWFramesContext *hwfc, const AVFrame *f, + AVBufferRef **bufs, size_t *buf_offsets, + const int *buf_stride, int w, int h, enum AVPixelFormat pix_fmt, int to_buf) { - VkResult ret; - AVVulkanDeviceContext *hwctx = ctx->hwctx; - VulkanDevicePriv *s = ctx->internal->priv; + int err; + AVVkFrame *frame = (AVVkFrame *)f->data[0]; + VulkanFramesPriv *fp = hwfc->internal->priv; + VulkanDevicePriv *p = hwfc->device_ctx->internal->priv; + VulkanFunctions *vk = &p->vkfn; int bar_num = 0; VkPipelineStageFlagBits sem_wait_dst[AV_NUM_DATA_POINTERS]; @@ -2564,30 +3177,21 @@ static int transfer_image_buf(AVHWDeviceContext *ctx, AVVkFrame *frame, const int planes = av_pix_fmt_count_planes(pix_fmt); const AVPixFmtDescriptor *desc = av_pix_fmt_desc_get(pix_fmt); - VkCommandBufferBeginInfo cmd_start = { - .sType = VK_STRUCTURE_TYPE_COMMAND_BUFFER_BEGIN_INFO, - .flags = VK_COMMAND_BUFFER_USAGE_ONE_TIME_SUBMIT_BIT, - }; - VkImageMemoryBarrier img_bar[AV_NUM_DATA_POINTERS] = { 0 }; + VulkanExecCtx *ectx = to_buf ? &fp->download_ctx : &fp->upload_ctx; + VkCommandBuffer cmd_buf = get_buf_exec_ctx(hwfc, ectx); VkSubmitInfo s_info = { .sType = VK_STRUCTURE_TYPE_SUBMIT_INFO, - .commandBufferCount = 1, - .pCommandBuffers = &s->cmd.buf, - .pSignalSemaphores = &frame->sem, - .pWaitSemaphores = &frame->sem, + .pSignalSemaphores = frame->sem, + .pWaitSemaphores = frame->sem, .pWaitDstStageMask = sem_wait_dst, - .signalSemaphoreCount = 1, - .waitSemaphoreCount = 1, + .signalSemaphoreCount = planes, + .waitSemaphoreCount = planes, }; - ret = vkBeginCommandBuffer(s->cmd.buf, &cmd_start); - if (ret != VK_SUCCESS) { - av_log(ctx, AV_LOG_ERROR, "Unable to init command buffer: %s\n", - vk_ret2str(ret)); - return AVERROR_EXTERNAL; - } + if ((err = wait_start_exec_ctx(hwfc, ectx))) + return err; /* Change the image layout to something more optimal for transfers */ for (int i = 0; i < planes; i++) { @@ -2621,80 +3225,84 @@ static int transfer_image_buf(AVHWDeviceContext *ctx, AVVkFrame *frame, } if (bar_num) - vkCmdPipelineBarrier(s->cmd.buf, VK_PIPELINE_STAGE_TOP_OF_PIPE_BIT, - VK_PIPELINE_STAGE_TRANSFER_BIT, 0, - 0, NULL, 0, NULL, bar_num, img_bar); + vk->CmdPipelineBarrier(cmd_buf, VK_PIPELINE_STAGE_TOP_OF_PIPE_BIT, + VK_PIPELINE_STAGE_TRANSFER_BIT, 0, + 0, NULL, 0, NULL, bar_num, img_bar); /* Schedule a copy for each plane */ for (int i = 0; i < planes; i++) { - const int p_w = i > 0 ? AV_CEIL_RSHIFT(w, desc->log2_chroma_w) : w; - const int p_h = i > 0 ? AV_CEIL_RSHIFT(h, desc->log2_chroma_h) : h; + ImageBuffer *vkbuf = (ImageBuffer *)bufs[i]->data; VkBufferImageCopy buf_reg = { - .bufferOffset = 0, - /* Buffer stride isn't in bytes, it's in samples, the implementation - * uses the image's VkFormat to know how many bytes per sample - * the buffer has. So we have to convert by dividing. Stupid. - * Won't work with YUVA or other planar formats with alpha. */ + .bufferOffset = buf_offsets[i], .bufferRowLength = buf_stride[i] / desc->comp[i].step, - .bufferImageHeight = p_h, .imageSubresource.layerCount = 1, .imageSubresource.aspectMask = VK_IMAGE_ASPECT_COLOR_BIT, .imageOffset = { 0, 0, 0, }, - .imageExtent = { p_w, p_h, 1, }, }; - if (to_buf) - vkCmdCopyImageToBuffer(s->cmd.buf, frame->img[i], frame->layout[i], - buffer[i].buf, 1, &buf_reg); - else - vkCmdCopyBufferToImage(s->cmd.buf, buffer[i].buf, frame->img[i], - frame->layout[i], 1, &buf_reg); - } + int p_w, p_h; + get_plane_wh(&p_w, &p_h, pix_fmt, w, h, i); - ret = vkEndCommandBuffer(s->cmd.buf); - if (ret != VK_SUCCESS) { - av_log(ctx, AV_LOG_ERROR, "Unable to finish command buffer: %s\n", - vk_ret2str(ret)); - return AVERROR_EXTERNAL; - } + buf_reg.bufferImageHeight = p_h; + buf_reg.imageExtent = (VkExtent3D){ p_w, p_h, 1, }; - /* Wait for the download/upload to finish if uploading, otherwise the - * semaphore will take care of synchronization when uploading */ - ret = vkQueueSubmit(s->cmd.queue, 1, &s_info, s->cmd.fence); - if (ret != VK_SUCCESS) { - av_log(ctx, AV_LOG_ERROR, "Unable to submit command buffer: %s\n", - vk_ret2str(ret)); - return AVERROR_EXTERNAL; + if (to_buf) + vk->CmdCopyImageToBuffer(cmd_buf, frame->img[i], frame->layout[i], + vkbuf->buf, 1, &buf_reg); + else + vk->CmdCopyBufferToImage(cmd_buf, vkbuf->buf, frame->img[i], + frame->layout[i], 1, &buf_reg); + } + + /* When uploading, do this asynchronously if the source is refcounted by + * keeping the buffers as a submission dependency. + * The hwcontext is guaranteed to not be freed until all frames are freed + * in the frames_unint function. + * When downloading to buffer, do this synchronously and wait for the + * queue submission to finish executing */ + if (!to_buf) { + int ref; + for (ref = 0; ref < AV_NUM_DATA_POINTERS; ref++) { + if (!f->buf[ref]) + break; + if ((err = add_buf_dep_exec_ctx(hwfc, ectx, &f->buf[ref], 1))) + return err; + } + if (ref && (err = add_buf_dep_exec_ctx(hwfc, ectx, bufs, planes))) + return err; + return submit_exec_ctx(hwfc, ectx, &s_info, !ref); } else { - vkWaitForFences(hwctx->act_dev, 1, &s->cmd.fence, VK_TRUE, UINT64_MAX); - vkResetFences(hwctx->act_dev, 1, &s->cmd.fence); + return submit_exec_ctx(hwfc, ectx, &s_info, 1); } - - return 0; } -/* Technically we can use VK_EXT_external_memory_host to upload and download, - * however the alignment requirements make this unfeasible as both the pointer - * and the size of each plane need to be aligned to the minimum alignment - * requirement, which on all current implementations (anv, radv) is 4096. - * If the requirement gets relaxed (unlikely) this can easily be implemented. */ -static int vulkan_transfer_data_from_mem(AVHWFramesContext *hwfc, AVFrame *dst, - const AVFrame *src) +static int vulkan_transfer_data(AVHWFramesContext *hwfc, const AVFrame *vkf, + const AVFrame *swf, int from) { int err = 0; - AVFrame tmp; - AVVkFrame *f = (AVVkFrame *)dst->data[0]; + VkResult ret; + AVVkFrame *f = (AVVkFrame *)vkf->data[0]; AVHWDeviceContext *dev_ctx = hwfc->device_ctx; - ImageBuffer buf[AV_NUM_DATA_POINTERS] = { { 0 } }; - const int planes = av_pix_fmt_count_planes(src->format); - int log2_chroma = av_pix_fmt_desc_get(src->format)->log2_chroma_h; + AVVulkanDeviceContext *hwctx = dev_ctx->hwctx; + VulkanDevicePriv *p = hwfc->device_ctx->internal->priv; + VulkanFunctions *vk = &p->vkfn; + + AVFrame tmp; + AVBufferRef *bufs[AV_NUM_DATA_POINTERS] = { 0 }; + size_t buf_offsets[AV_NUM_DATA_POINTERS] = { 0 }; + + int p_w, p_h; + const int planes = av_pix_fmt_count_planes(swf->format); + + int host_mapped[AV_NUM_DATA_POINTERS] = { 0 }; + const int map_host = !!(p->extensions & EXT_EXTERNAL_HOST_MEMORY); - if ((src->format != AV_PIX_FMT_NONE && !av_vkfmt_from_pixfmt(src->format))) { - av_log(hwfc, AV_LOG_ERROR, "Unsupported source pixel format!\n"); + if ((swf->format != AV_PIX_FMT_NONE && !av_vkfmt_from_pixfmt(swf->format))) { + av_log(hwfc, AV_LOG_ERROR, "Unsupported software frame pixel format!\n"); return AVERROR(EINVAL); } - if (src->width > hwfc->width || src->height > hwfc->height) + if (swf->width > hwfc->width || swf->height > hwfc->height) return AVERROR(EINVAL); /* For linear, host visiable images */ @@ -2703,53 +3311,130 @@ static int vulkan_transfer_data_from_mem(AVHWFramesContext *hwfc, AVFrame *dst, AVFrame *map = av_frame_alloc(); if (!map) return AVERROR(ENOMEM); - map->format = src->format; + map->format = swf->format; - err = vulkan_map_frame_to_mem(hwfc, map, dst, AV_HWFRAME_MAP_WRITE); + err = vulkan_map_frame_to_mem(hwfc, map, vkf, AV_HWFRAME_MAP_WRITE); if (err) - goto end; + return err; - err = av_frame_copy(map, src); + err = av_frame_copy((AVFrame *)(from ? swf : map), from ? map : swf); av_frame_free(&map); - goto end; + return err; } /* Create buffers */ for (int i = 0; i < planes; i++) { - int h = src->height; - int p_height = i > 0 ? AV_CEIL_RSHIFT(h, log2_chroma) : h; + size_t req_size; - tmp.linesize[i] = FFABS(src->linesize[i]); - err = create_buf(dev_ctx, &buf[i], p_height, - &tmp.linesize[i], VK_BUFFER_USAGE_TRANSFER_SRC_BIT, - VK_MEMORY_PROPERTY_HOST_VISIBLE_BIT, NULL, NULL); + VkExternalMemoryBufferCreateInfo create_desc = { + .sType = VK_STRUCTURE_TYPE_EXTERNAL_MEMORY_BUFFER_CREATE_INFO, + .handleTypes = VK_EXTERNAL_MEMORY_HANDLE_TYPE_HOST_ALLOCATION_BIT_EXT, + }; + + VkImportMemoryHostPointerInfoEXT import_desc = { + .sType = VK_STRUCTURE_TYPE_IMPORT_MEMORY_HOST_POINTER_INFO_EXT, + .handleType = VK_EXTERNAL_MEMORY_HANDLE_TYPE_HOST_ALLOCATION_BIT_EXT, + }; + + VkMemoryHostPointerPropertiesEXT p_props = { + .sType = VK_STRUCTURE_TYPE_MEMORY_HOST_POINTER_PROPERTIES_EXT, + }; + + get_plane_wh(&p_w, &p_h, swf->format, swf->width, swf->height, i); + + tmp.linesize[i] = FFABS(swf->linesize[i]); + + /* Do not map images with a negative stride */ + if (map_host && swf->linesize[i] > 0) { + size_t offs; + offs = (uintptr_t)swf->data[i] % p->hprops.minImportedHostPointerAlignment; + import_desc.pHostPointer = swf->data[i] - offs; + + /* We have to compensate for the few extra bytes of padding we + * completely ignore at the start */ + req_size = FFALIGN(offs + tmp.linesize[i] * p_h, + p->hprops.minImportedHostPointerAlignment); + + ret = vk->GetMemoryHostPointerPropertiesEXT(hwctx->act_dev, + import_desc.handleType, + import_desc.pHostPointer, + &p_props); + + if (ret == VK_SUCCESS) { + host_mapped[i] = 1; + buf_offsets[i] = offs; + } + } + + if (!host_mapped[i]) + req_size = get_req_buffer_size(p, &tmp.linesize[i], p_h); + + err = create_buf(dev_ctx, &bufs[i], + from ? VK_BUFFER_USAGE_TRANSFER_DST_BIT : + VK_BUFFER_USAGE_TRANSFER_SRC_BIT, + VK_MEMORY_PROPERTY_HOST_VISIBLE_BIT, + req_size, p_props.memoryTypeBits, host_mapped[i], + host_mapped[i] ? &create_desc : NULL, + host_mapped[i] ? &import_desc : NULL); if (err) goto end; } - /* Map, copy image to buffer, unmap */ - if ((err = map_buffers(dev_ctx, buf, tmp.data, planes, 0))) - goto end; + if (!from) { + /* Map, copy image to buffer, unmap */ + if ((err = map_buffers(dev_ctx, bufs, tmp.data, planes, 0))) + goto end; - av_image_copy(tmp.data, tmp.linesize, (const uint8_t **)src->data, - src->linesize, src->format, src->width, src->height); + for (int i = 0; i < planes; i++) { + if (host_mapped[i]) + continue; - if ((err = unmap_buffers(dev_ctx, buf, planes, 1))) - goto end; + get_plane_wh(&p_w, &p_h, swf->format, swf->width, swf->height, i); - /* Copy buffers to image */ - err = transfer_image_buf(dev_ctx, f, buf, tmp.linesize, - src->width, src->height, src->format, 0); + av_image_copy_plane(tmp.data[i], tmp.linesize[i], + (const uint8_t *)swf->data[i], swf->linesize[i], + FFMIN(tmp.linesize[i], FFABS(swf->linesize[i])), + p_h); + } + + if ((err = unmap_buffers(dev_ctx, bufs, planes, 1))) + goto end; + } + + /* Copy buffers into/from image */ + err = transfer_image_buf(hwfc, vkf, bufs, buf_offsets, tmp.linesize, + swf->width, swf->height, swf->format, from); + + if (from) { + /* Map, copy image to buffer, unmap */ + if ((err = map_buffers(dev_ctx, bufs, tmp.data, planes, 0))) + goto end; + + for (int i = 0; i < planes; i++) { + if (host_mapped[i]) + continue; + + get_plane_wh(&p_w, &p_h, swf->format, swf->width, swf->height, i); + + av_image_copy_plane(swf->data[i], swf->linesize[i], + (const uint8_t *)tmp.data[i], tmp.linesize[i], + FFMIN(tmp.linesize[i], FFABS(swf->linesize[i])), + p_h); + } + + if ((err = unmap_buffers(dev_ctx, bufs, planes, 1))) + goto end; + } end: for (int i = 0; i < planes; i++) - free_buf(dev_ctx, &buf[i]); + av_buffer_unref(&bufs[i]); return err; } static int vulkan_transfer_data_to(AVHWFramesContext *hwfc, AVFrame *dst, - const AVFrame *src) + const AVFrame *src) { av_unused VulkanDevicePriv *p = hwfc->device_ctx->internal->priv; @@ -2764,13 +3449,13 @@ static int vulkan_transfer_data_to(AVHWFramesContext *hwfc, AVFrame *dst, if (src->hw_frames_ctx) return AVERROR(ENOSYS); else - return vulkan_transfer_data_from_mem(hwfc, dst, src); + return vulkan_transfer_data(hwfc, dst, src, 0); } } #if CONFIG_CUDA static int vulkan_transfer_data_to_cuda(AVHWFramesContext *hwfc, AVFrame *dst, - const AVFrame *src) + const AVFrame *src) { int err; VkResult ret; @@ -2785,22 +3470,30 @@ static int vulkan_transfer_data_to_cuda(AVHWFramesContext *hwfc, AVFrame *dst, AVCUDADeviceContext *cuda_dev = cuda_cu->hwctx; AVCUDADeviceContextInternal *cu_internal = cuda_dev->internal; CudaFunctions *cu = cu_internal->cuda_dl; + CUDA_EXTERNAL_SEMAPHORE_WAIT_PARAMS s_w_par[AV_NUM_DATA_POINTERS] = { 0 }; + CUDA_EXTERNAL_SEMAPHORE_SIGNAL_PARAMS s_s_par[AV_NUM_DATA_POINTERS] = { 0 }; ret = CHECK_CU(cu->cuCtxPushCurrent(cuda_dev->cuda_ctx)); - if (ret < 0) { - err = AVERROR_EXTERNAL; - goto fail; - } + if (ret < 0) + return AVERROR_EXTERNAL; dst_f = (AVVkFrame *)src->data[0]; err = vulkan_export_to_cuda(hwfc, dst->hw_frames_ctx, src); if (err < 0) { - goto fail; + CHECK_CU(cu->cuCtxPopCurrent(&dummy)); + return err; } dst_int = dst_f->internal; + ret = CHECK_CU(cu->cuWaitExternalSemaphoresAsync(dst_int->cu_sem, s_w_par, + planes, cuda_dev->stream)); + if (ret < 0) { + err = AVERROR_EXTERNAL; + goto fail; + } + for (int i = 0; i < planes; i++) { CUDA_MEMCPY2D cpy = { .dstMemoryType = CU_MEMORYTYPE_DEVICE, @@ -2810,12 +3503,14 @@ static int vulkan_transfer_data_to_cuda(AVHWFramesContext *hwfc, AVFrame *dst, .srcMemoryType = CU_MEMORYTYPE_ARRAY, .srcArray = dst_int->cu_array[i], - .WidthInBytes = (i > 0 ? AV_CEIL_RSHIFT(hwfc->width, desc->log2_chroma_w) - : hwfc->width) * desc->comp[i].step, - .Height = i > 0 ? AV_CEIL_RSHIFT(hwfc->height, desc->log2_chroma_h) - : hwfc->height, }; + int w, h; + get_plane_wh(&w, &h, hwfc->sw_format, hwfc->width, hwfc->height, i); + + cpy.WidthInBytes = w * desc->comp[i].step; + cpy.Height = h; + ret = CHECK_CU(cu->cuMemcpy2DAsync(&cpy, cuda_dev->stream)); if (ret < 0) { err = AVERROR_EXTERNAL; @@ -2823,6 +3518,13 @@ static int vulkan_transfer_data_to_cuda(AVHWFramesContext *hwfc, AVFrame *dst, } } + ret = CHECK_CU(cu->cuSignalExternalSemaphoresAsync(dst_int->cu_sem, s_s_par, + planes, cuda_dev->stream)); + if (ret < 0) { + err = AVERROR_EXTERNAL; + goto fail; + } + CHECK_CU(cu->cuCtxPopCurrent(&dummy)); av_log(hwfc, AV_LOG_VERBOSE, "Transfered Vulkan image to CUDA!\n"); @@ -2838,69 +3540,6 @@ fail: } #endif -static int vulkan_transfer_data_to_mem(AVHWFramesContext *hwfc, AVFrame *dst, - const AVFrame *src) -{ - int err = 0; - AVFrame tmp; - AVVkFrame *f = (AVVkFrame *)src->data[0]; - AVHWDeviceContext *dev_ctx = hwfc->device_ctx; - ImageBuffer buf[AV_NUM_DATA_POINTERS] = { { 0 } }; - const int planes = av_pix_fmt_count_planes(dst->format); - int log2_chroma = av_pix_fmt_desc_get(dst->format)->log2_chroma_h; - - if (dst->width > hwfc->width || dst->height > hwfc->height) - return AVERROR(EINVAL); - - /* For linear, host visiable images */ - if (f->tiling == VK_IMAGE_TILING_LINEAR && - f->flags & VK_MEMORY_PROPERTY_HOST_VISIBLE_BIT) { - AVFrame *map = av_frame_alloc(); - if (!map) - return AVERROR(ENOMEM); - map->format = dst->format; - - err = vulkan_map_frame_to_mem(hwfc, map, src, AV_HWFRAME_MAP_READ); - if (err) - return err; - - err = av_frame_copy(dst, map); - av_frame_free(&map); - return err; - } - - /* Create buffers */ - for (int i = 0; i < planes; i++) { - int h = dst->height; - int p_height = i > 0 ? AV_CEIL_RSHIFT(h, log2_chroma) : h; - - tmp.linesize[i] = FFABS(dst->linesize[i]); - err = create_buf(dev_ctx, &buf[i], p_height, - &tmp.linesize[i], VK_BUFFER_USAGE_TRANSFER_DST_BIT, - VK_MEMORY_PROPERTY_HOST_VISIBLE_BIT, NULL, NULL); - } - - /* Copy image to buffer */ - if ((err = transfer_image_buf(dev_ctx, f, buf, tmp.linesize, - dst->width, dst->height, dst->format, 1))) - goto end; - - /* Map, copy buffer to frame, unmap */ - if ((err = map_buffers(dev_ctx, buf, tmp.data, planes, 1))) - goto end; - - av_image_copy(dst->data, dst->linesize, (const uint8_t **)tmp.data, - tmp.linesize, dst->format, dst->width, dst->height); - - err = unmap_buffers(dev_ctx, buf, planes, 0); - -end: - for (int i = 0; i < planes; i++) - free_buf(dev_ctx, &buf[i]); - - return err; -} - static int vulkan_transfer_data_from(AVHWFramesContext *hwfc, AVFrame *dst, const AVFrame *src) { @@ -2917,10 +3556,16 @@ static int vulkan_transfer_data_from(AVHWFramesContext *hwfc, AVFrame *dst, if (dst->hw_frames_ctx) return AVERROR(ENOSYS); else - return vulkan_transfer_data_to_mem(hwfc, dst, src); + return vulkan_transfer_data(hwfc, src, dst, 1); } } +static int vulkan_frames_derive_to(AVHWFramesContext *dst_fc, + AVHWFramesContext *src_fc, int flags) +{ + return vulkan_frames_init(dst_fc); +} + AVVkFrame *av_vk_frame_alloc(void) { return av_mallocz(sizeof(AVVkFrame)); @@ -2950,6 +3595,7 @@ const HWContextType ff_hwcontext_type_vulkan = { .map_to = vulkan_map_to, .map_from = vulkan_map_from, + .frames_derive_to = &vulkan_frames_derive_to, .pix_fmts = (const enum AVPixelFormat []) { AV_PIX_FMT_VULKAN,