static const struct {
enum AVPixelFormat pixfmt;
- const VkFormat vkfmts[3];
+ const VkFormat vkfmts[4];
} vk_pixfmt_map[] = {
{ AV_PIX_FMT_GRAY8, { VK_FORMAT_R8_UNORM } },
{ AV_PIX_FMT_GRAY16, { VK_FORMAT_R16_UNORM } },
{ AV_PIX_FMT_GRAYF32, { VK_FORMAT_R32_SFLOAT } },
{ AV_PIX_FMT_NV12, { VK_FORMAT_R8_UNORM, VK_FORMAT_R8G8_UNORM } },
+ { AV_PIX_FMT_NV21, { VK_FORMAT_R8_UNORM, VK_FORMAT_R8G8_UNORM } },
{ AV_PIX_FMT_P010, { VK_FORMAT_R16_UNORM, VK_FORMAT_R16G16_UNORM } },
{ AV_PIX_FMT_P016, { VK_FORMAT_R16_UNORM, VK_FORMAT_R16G16_UNORM } },
- { AV_PIX_FMT_YUV420P, { VK_FORMAT_R8_UNORM, VK_FORMAT_R8_UNORM, VK_FORMAT_R8_UNORM } },
- { AV_PIX_FMT_YUV422P, { VK_FORMAT_R8_UNORM, VK_FORMAT_R8_UNORM, VK_FORMAT_R8_UNORM } },
- { AV_PIX_FMT_YUV444P, { VK_FORMAT_R8_UNORM, VK_FORMAT_R8_UNORM, VK_FORMAT_R8_UNORM } },
+ { AV_PIX_FMT_NV16, { VK_FORMAT_R8_UNORM, VK_FORMAT_R8G8_UNORM } },
+ { AV_PIX_FMT_NV24, { VK_FORMAT_R8_UNORM, VK_FORMAT_R8G8_UNORM } },
+ { AV_PIX_FMT_NV42, { VK_FORMAT_R8_UNORM, VK_FORMAT_R8G8_UNORM } },
+
+ { AV_PIX_FMT_YUV420P, { VK_FORMAT_R8_UNORM, VK_FORMAT_R8_UNORM, VK_FORMAT_R8_UNORM } },
+ { AV_PIX_FMT_YUV420P10, { VK_FORMAT_R16_UNORM, VK_FORMAT_R16_UNORM, VK_FORMAT_R16_UNORM } },
+ { AV_PIX_FMT_YUV420P12, { VK_FORMAT_R16_UNORM, VK_FORMAT_R16_UNORM, VK_FORMAT_R16_UNORM } },
{ AV_PIX_FMT_YUV420P16, { VK_FORMAT_R16_UNORM, VK_FORMAT_R16_UNORM, VK_FORMAT_R16_UNORM } },
+
+ { AV_PIX_FMT_YUV422P, { VK_FORMAT_R8_UNORM, VK_FORMAT_R8_UNORM, VK_FORMAT_R8_UNORM } },
+ { AV_PIX_FMT_YUV422P10, { VK_FORMAT_R16_UNORM, VK_FORMAT_R16_UNORM, VK_FORMAT_R16_UNORM } },
+ { AV_PIX_FMT_YUV422P12, { VK_FORMAT_R16_UNORM, VK_FORMAT_R16_UNORM, VK_FORMAT_R16_UNORM } },
{ AV_PIX_FMT_YUV422P16, { VK_FORMAT_R16_UNORM, VK_FORMAT_R16_UNORM, VK_FORMAT_R16_UNORM } },
+
+ { AV_PIX_FMT_YUV444P, { VK_FORMAT_R8_UNORM, VK_FORMAT_R8_UNORM, VK_FORMAT_R8_UNORM } },
+ { AV_PIX_FMT_YUV444P10, { VK_FORMAT_R16_UNORM, VK_FORMAT_R16_UNORM, VK_FORMAT_R16_UNORM } },
+ { AV_PIX_FMT_YUV444P12, { VK_FORMAT_R16_UNORM, VK_FORMAT_R16_UNORM, VK_FORMAT_R16_UNORM } },
{ AV_PIX_FMT_YUV444P16, { VK_FORMAT_R16_UNORM, VK_FORMAT_R16_UNORM, VK_FORMAT_R16_UNORM } },
- { AV_PIX_FMT_ABGR, { VK_FORMAT_A8B8G8R8_UNORM_PACK32 } },
+ { AV_PIX_FMT_YUVA420P, { VK_FORMAT_R8_UNORM, VK_FORMAT_R8_UNORM, VK_FORMAT_R8_UNORM, VK_FORMAT_R8_UNORM } },
+ { AV_PIX_FMT_YUVA420P10, { VK_FORMAT_R16_UNORM, VK_FORMAT_R16_UNORM, VK_FORMAT_R16_UNORM, VK_FORMAT_R16_UNORM } },
+ /* There is no AV_PIX_FMT_YUVA420P12 */
+ { AV_PIX_FMT_YUVA420P16, { VK_FORMAT_R16_UNORM, VK_FORMAT_R16_UNORM, VK_FORMAT_R16_UNORM, VK_FORMAT_R16_UNORM } },
+
+ { AV_PIX_FMT_YUVA422P, { VK_FORMAT_R8_UNORM, VK_FORMAT_R8_UNORM, VK_FORMAT_R8_UNORM, VK_FORMAT_R8_UNORM } },
+ { AV_PIX_FMT_YUVA422P10, { VK_FORMAT_R16_UNORM, VK_FORMAT_R16_UNORM, VK_FORMAT_R16_UNORM, VK_FORMAT_R16_UNORM } },
+ { AV_PIX_FMT_YUVA422P12, { VK_FORMAT_R16_UNORM, VK_FORMAT_R16_UNORM, VK_FORMAT_R16_UNORM, VK_FORMAT_R16_UNORM } },
+ { AV_PIX_FMT_YUVA422P16, { VK_FORMAT_R16_UNORM, VK_FORMAT_R16_UNORM, VK_FORMAT_R16_UNORM, VK_FORMAT_R16_UNORM } },
+
+ { AV_PIX_FMT_YUVA444P, { VK_FORMAT_R8_UNORM, VK_FORMAT_R8_UNORM, VK_FORMAT_R8_UNORM, VK_FORMAT_R8_UNORM } },
+ { AV_PIX_FMT_YUVA444P10, { VK_FORMAT_R16_UNORM, VK_FORMAT_R16_UNORM, VK_FORMAT_R16_UNORM, VK_FORMAT_R16_UNORM } },
+ { AV_PIX_FMT_YUVA444P12, { VK_FORMAT_R16_UNORM, VK_FORMAT_R16_UNORM, VK_FORMAT_R16_UNORM, VK_FORMAT_R16_UNORM } },
+ { AV_PIX_FMT_YUVA444P16, { VK_FORMAT_R16_UNORM, VK_FORMAT_R16_UNORM, VK_FORMAT_R16_UNORM, VK_FORMAT_R16_UNORM } },
+
{ AV_PIX_FMT_BGRA, { VK_FORMAT_B8G8R8A8_UNORM } },
{ AV_PIX_FMT_RGBA, { VK_FORMAT_R8G8B8A8_UNORM } },
{ AV_PIX_FMT_RGB24, { VK_FORMAT_R8G8B8_UNORM } },
{ AV_PIX_FMT_BGR24, { VK_FORMAT_B8G8R8_UNORM } },
{ AV_PIX_FMT_RGB48, { VK_FORMAT_R16G16B16_UNORM } },
{ AV_PIX_FMT_RGBA64, { VK_FORMAT_R16G16B16A16_UNORM } },
+ { AV_PIX_FMT_RGBA64, { VK_FORMAT_R16G16B16A16_UNORM } },
{ AV_PIX_FMT_RGB565, { VK_FORMAT_R5G6B5_UNORM_PACK16 } },
{ AV_PIX_FMT_BGR565, { VK_FORMAT_B5G6R5_UNORM_PACK16 } },
{ AV_PIX_FMT_BGR0, { VK_FORMAT_B8G8R8A8_UNORM } },
- { AV_PIX_FMT_0BGR, { VK_FORMAT_A8B8G8R8_UNORM_PACK32 } },
{ AV_PIX_FMT_RGB0, { VK_FORMAT_R8G8B8A8_UNORM } },
+ /* Lower priority as there's an endianess-dependent overlap between these
+ * and rgba/bgr0, and PACK32 formats are more limited */
+ { AV_PIX_FMT_BGR32, { VK_FORMAT_A8B8G8R8_UNORM_PACK32 } },
+ { AV_PIX_FMT_0BGR32, { VK_FORMAT_A8B8G8R8_UNORM_PACK32 } },
+
+ { AV_PIX_FMT_X2RGB10, { VK_FORMAT_A2R10G10B10_UNORM_PACK32 } },
+
+ { AV_PIX_FMT_GBRAP, { VK_FORMAT_R8_UNORM, VK_FORMAT_R8_UNORM, VK_FORMAT_R8_UNORM, VK_FORMAT_R8_UNORM } },
+ { AV_PIX_FMT_GBRAP16, { VK_FORMAT_R16_UNORM, VK_FORMAT_R16_UNORM, VK_FORMAT_R16_UNORM, VK_FORMAT_R16_UNORM } },
{ AV_PIX_FMT_GBRPF32, { VK_FORMAT_R32_SFLOAT, VK_FORMAT_R32_SFLOAT, VK_FORMAT_R32_SFLOAT } },
+ { AV_PIX_FMT_GBRAPF32, { VK_FORMAT_R32_SFLOAT, VK_FORMAT_R32_SFLOAT, VK_FORMAT_R32_SFLOAT, VK_FORMAT_R32_SFLOAT } },
};
const VkFormat *av_vkfmt_from_pixfmt(enum AVPixelFormat p)
EXT_EXTERNAL_FD_MEMORY = 1ULL << 2, /* VK_KHR_external_memory_fd */
EXT_EXTERNAL_FD_SEM = 1ULL << 3, /* VK_KHR_external_semaphore_fd */
EXT_EXTERNAL_HOST_MEMORY = 1ULL << 4, /* VK_EXT_external_memory_host */
+ EXT_PUSH_DESCRIPTORS = 1ULL << 5, /* VK_KHR_push_descriptor */
+ EXT_HOST_QUERY_RESET = 1ULL << 6, /* VK_EXT_host_query_reset */
EXT_NO_FLAG = 1ULL << 63,
};
{ VK_EXT_IMAGE_DRM_FORMAT_MODIFIER_EXTENSION_NAME, EXT_DRM_MODIFIER_FLAGS, },
{ VK_KHR_EXTERNAL_SEMAPHORE_FD_EXTENSION_NAME, EXT_EXTERNAL_FD_SEM, },
{ VK_EXT_EXTERNAL_MEMORY_HOST_EXTENSION_NAME, EXT_EXTERNAL_HOST_MEMORY, },
+ { VK_KHR_PUSH_DESCRIPTOR_EXTENSION_NAME, EXT_PUSH_DESCRIPTORS, },
+ { VK_EXT_HOST_QUERY_RESET_EXTENSION_NAME, EXT_HOST_QUERY_RESET, },
};
/* Converts return values to strings */
cmd->nb_queues = num_queues;
- cmd->queues = av_mallocz(num_queues * sizeof(*cmd->queues));
- if (!cmd->queues)
- return AVERROR(ENOMEM);
-
- cmd->bufs = av_mallocz(num_queues * sizeof(*cmd->bufs));
- if (!cmd->bufs)
- return AVERROR(ENOMEM);
-
/* Create command pool */
ret = vkCreateCommandPool(hwctx->act_dev, &cqueue_create,
hwctx->alloc, &cmd->pool);
return AVERROR_EXTERNAL;
}
+ cmd->bufs = av_mallocz(num_queues * sizeof(*cmd->bufs));
+ if (!cmd->bufs)
+ return AVERROR(ENOMEM);
+
cbuf_create.commandPool = cmd->pool;
/* Allocate command buffer */
if (ret != VK_SUCCESS) {
av_log(hwfc, AV_LOG_ERROR, "Command buffer alloc failure: %s\n",
vk_ret2str(ret));
+ av_freep(&cmd->bufs);
return AVERROR_EXTERNAL;
}
+ cmd->queues = av_mallocz(num_queues * sizeof(*cmd->queues));
+ if (!cmd->queues)
+ return AVERROR(ENOMEM);
+
for (int i = 0; i < num_queues; i++) {
VulkanQueueCtx *q = &cmd->queues[i];
vkGetDeviceQueue(hwctx->act_dev, queue_family_index, i, &q->queue);
{
AVVulkanDeviceContext *hwctx = hwfc->device_ctx->hwctx;
- /* Make sure all queues have finished executing */
- for (int i = 0; i < cmd->nb_queues; i++) {
- VulkanQueueCtx *q = &cmd->queues[i];
+ if (cmd->queues) {
+ for (int i = 0; i < cmd->nb_queues; i++) {
+ VulkanQueueCtx *q = &cmd->queues[i];
- if (q->fence && !q->was_synchronous) {
- vkWaitForFences(hwctx->act_dev, 1, &q->fence, VK_TRUE, UINT64_MAX);
- vkResetFences(hwctx->act_dev, 1, &q->fence);
- }
+ /* Make sure all queues have finished executing */
+ if (q->fence && !q->was_synchronous) {
+ vkWaitForFences(hwctx->act_dev, 1, &q->fence, VK_TRUE, UINT64_MAX);
+ vkResetFences(hwctx->act_dev, 1, &q->fence);
+ }
- /* Free the fence */
- if (q->fence)
- vkDestroyFence(hwctx->act_dev, q->fence, hwctx->alloc);
+ /* Free the fence */
+ if (q->fence)
+ vkDestroyFence(hwctx->act_dev, q->fence, hwctx->alloc);
- /* Free buffer dependencies */
- for (int j = 0; j < q->nb_buf_deps; j++)
- av_buffer_unref(&q->buf_deps[j]);
- av_free(q->buf_deps);
+ /* Free buffer dependencies */
+ for (int j = 0; j < q->nb_buf_deps; j++)
+ av_buffer_unref(&q->buf_deps[j]);
+ av_free(q->buf_deps);
+ }
}
if (cmd->bufs)
if (cmd->pool)
vkDestroyCommandPool(hwctx->act_dev, cmd->pool, hwctx->alloc);
- av_freep(&cmd->bufs);
av_freep(&cmd->queues);
+ av_freep(&cmd->bufs);
+ cmd->pool = NULL;
}
static VkCommandBuffer get_buf_exec_ctx(AVHWFramesContext *hwfc, VulkanExecCtx *cmd)
vkGetPhysicalDeviceFeatures(hwctx->phys_dev, &dev_features);
#define COPY_FEATURE(DST, NAME) (DST).features.NAME = dev_features.NAME;
COPY_FEATURE(hwctx->device_features, shaderImageGatherExtended)
+ COPY_FEATURE(hwctx->device_features, shaderStorageImageReadWithoutFormat)
+ COPY_FEATURE(hwctx->device_features, shaderStorageImageWriteWithoutFormat)
COPY_FEATURE(hwctx->device_features, fragmentStoresAndAtomics)
COPY_FEATURE(hwctx->device_features, vertexPipelineStoresAndAtomics)
COPY_FEATURE(hwctx->device_features, shaderInt64)
}
static int alloc_mem(AVHWDeviceContext *ctx, VkMemoryRequirements *req,
- VkMemoryPropertyFlagBits req_flags, void *alloc_extension,
+ VkMemoryPropertyFlagBits req_flags, const void *alloc_extension,
VkMemoryPropertyFlagBits *mem_flags, VkDeviceMemory *mem)
{
VkResult ret;
/* The vulkan spec requires memory types to be sorted in the "optimal"
* order, so the first matching type we find will be the best/fastest one */
for (int i = 0; i < p->mprops.memoryTypeCount; i++) {
+ const VkMemoryType *type = &p->mprops.memoryTypes[i];
+
/* The memory type must be supported by the requirements (bitfield) */
if (!(req->memoryTypeBits & (1 << i)))
continue;
/* The memory type flags must include our properties */
- if ((p->mprops.memoryTypes[i].propertyFlags & req_flags) != req_flags)
+ if ((type->propertyFlags & req_flags) != req_flags)
+ continue;
+
+ /* The memory type must be large enough */
+ if (req->size > p->mprops.memoryHeaps[type->heapIndex].size)
continue;
/* Found a suitable memory type */
return submit_exec_ctx(hwfc, ectx, &s_info, 0);
}
+static inline void get_plane_wh(int *w, int *h, enum AVPixelFormat format,
+ int frame_w, int frame_h, int plane)
+{
+ const AVPixFmtDescriptor *desc = av_pix_fmt_desc_get(format);
+
+ /* Currently always true unless gray + alpha support is added */
+ if (!plane || (plane == 3) || desc->flags & AV_PIX_FMT_FLAG_RGB ||
+ !(desc->flags & AV_PIX_FMT_FLAG_PLANAR)) {
+ *w = frame_w;
+ *h = frame_h;
+ return;
+ }
+
+ *w = AV_CEIL_RSHIFT(frame_w, desc->log2_chroma_w);
+ *h = AV_CEIL_RSHIFT(frame_h, desc->log2_chroma_h);
+}
+
static int create_frame(AVHWFramesContext *hwfc, AVVkFrame **frame,
VkImageTiling tiling, VkImageUsageFlagBits usage,
void *create_pnext)
/* Create the images */
for (int i = 0; i < planes; i++) {
- const AVPixFmtDescriptor *desc = av_pix_fmt_desc_get(format);
- int w = hwfc->width;
- int h = hwfc->height;
- const int p_w = i > 0 ? AV_CEIL_RSHIFT(w, desc->log2_chroma_w) : w;
- const int p_h = i > 0 ? AV_CEIL_RSHIFT(h, desc->log2_chroma_h) : h;
-
- VkImageCreateInfo image_create_info = {
+ VkImageCreateInfo create_info = {
.sType = VK_STRUCTURE_TYPE_IMAGE_CREATE_INFO,
.pNext = create_pnext,
.imageType = VK_IMAGE_TYPE_2D,
.format = img_fmts[i],
- .extent.width = p_w,
- .extent.height = p_h,
.extent.depth = 1,
.mipLevels = 1,
.arrayLayers = 1,
VK_SHARING_MODE_EXCLUSIVE,
};
- ret = vkCreateImage(hwctx->act_dev, &image_create_info,
+ get_plane_wh(&create_info.extent.width, &create_info.extent.height,
+ format, hwfc->width, hwfc->height, i);
+
+ ret = vkCreateImage(hwctx->act_dev, &create_info,
hwctx->alloc, &f->img[i]);
if (ret != VK_SUCCESS) {
av_log(ctx, AV_LOG_ERROR, "Image creation failure: %s\n",
return AVERROR_EXTERNAL;
}
- f->layout[i] = image_create_info.initialLayout;
+ f->layout[i] = create_info.initialLayout;
f->access[i] = 0x0;
}
}
}
-static AVBufferRef *vulkan_pool_alloc(void *opaque, int size)
+static AVBufferRef *vulkan_pool_alloc(void *opaque, size_t size)
{
int err;
AVVkFrame *f;
dev_hwctx->queue_family_comp_index,
GET_QUEUE_COUNT(dev_hwctx, 0, 1, 0));
if (err)
- goto fail;
+ return err;
err = create_exec_ctx(hwfc, &fp->upload_ctx,
dev_hwctx->queue_family_tx_index,
GET_QUEUE_COUNT(dev_hwctx, 0, 0, 1));
if (err)
- goto fail;
+ return err;
err = create_exec_ctx(hwfc, &fp->download_ctx,
dev_hwctx->queue_family_tx_index, 1);
if (err)
- goto fail;
+ return err;
/* Test to see if allocation will fail */
err = create_frame(hwfc, &f, hwctx->tiling, hwctx->usage,
hwctx->create_pnext);
if (err)
- goto fail;
+ return err;
vulkan_frame_free(hwfc, (uint8_t *)f);
hwfc->internal->pool_internal = av_buffer_pool_init2(sizeof(AVVkFrame),
hwfc, vulkan_pool_alloc,
NULL);
- if (!hwfc->internal->pool_internal) {
- err = AVERROR(ENOMEM);
- goto fail;
- }
+ if (!hwfc->internal->pool_internal)
+ return AVERROR(ENOMEM);
}
return 0;
-
-fail:
- free_exec_ctx(hwfc, &fp->conv_ctx);
- free_exec_ctx(hwfc, &fp->upload_ctx);
- free_exec_ctx(hwfc, &fp->download_ctx);
-
- return err;
}
static int vulkan_get_buffer(AVHWFramesContext *hwfc, AVFrame *frame)
}
static int vulkan_map_from_drm_frame_desc(AVHWFramesContext *hwfc, AVVkFrame **frame,
- AVDRMFrameDescriptor *desc)
+ const AVFrame *src)
{
int err = 0;
VkResult ret;
VulkanDevicePriv *p = ctx->internal->priv;
VulkanFramesPriv *fp = hwfc->internal->priv;
AVVulkanFramesContext *frames_hwctx = hwfc->hwctx;
- const AVPixFmtDescriptor *fmt_desc = av_pix_fmt_desc_get(hwfc->sw_format);
- const int has_modifiers = p->extensions & EXT_DRM_MODIFIER_FLAGS;
+ const AVDRMFrameDescriptor *desc = (AVDRMFrameDescriptor *)src->data[0];
+ const int has_modifiers = !!(p->extensions & EXT_DRM_MODIFIER_FLAGS);
VkSubresourceLayout plane_data[AV_NUM_DATA_POINTERS] = { 0 };
VkBindImageMemoryInfo bind_info[AV_NUM_DATA_POINTERS] = { 0 };
VkBindImagePlaneMemoryInfo plane_info[AV_NUM_DATA_POINTERS] = { 0 };
goto fail;
}
- for (int i = 0; i < desc->nb_objects; i++) {
- VkMemoryFdPropertiesKHR fdmp = {
- .sType = VK_STRUCTURE_TYPE_MEMORY_FD_PROPERTIES_KHR,
- };
- VkMemoryRequirements req = {
- .size = desc->objects[i].size,
- };
- VkImportMemoryFdInfoKHR idesc = {
- .sType = VK_STRUCTURE_TYPE_IMPORT_MEMORY_FD_INFO_KHR,
- .handleType = htype,
- .fd = dup(desc->objects[i].fd),
- };
-
- ret = pfn_vkGetMemoryFdPropertiesKHR(hwctx->act_dev, htype,
- idesc.fd, &fdmp);
- if (ret != VK_SUCCESS) {
- av_log(hwfc, AV_LOG_ERROR, "Failed to get FD properties: %s\n",
- vk_ret2str(ret));
- err = AVERROR_EXTERNAL;
- close(idesc.fd);
- goto fail;
- }
-
- req.memoryTypeBits = fdmp.memoryTypeBits;
-
- err = alloc_mem(ctx, &req, VK_MEMORY_PROPERTY_DEVICE_LOCAL_BIT,
- &idesc, &f->flags, &f->mem[i]);
- if (err) {
- close(idesc.fd);
- return err;
- }
-
- f->size[i] = desc->objects[i].size;
- }
-
f->tiling = has_modifiers ? VK_IMAGE_TILING_DRM_FORMAT_MODIFIER_EXT :
desc->objects[0].format_modifier == DRM_FORMAT_MOD_LINEAR ?
VK_IMAGE_TILING_LINEAR : VK_IMAGE_TILING_OPTIMAL;
for (int i = 0; i < desc->nb_layers; i++) {
const int planes = desc->layers[i].nb_planes;
- const int signal_p = has_modifiers && (planes > 1);
-
VkImageDrmFormatModifierExplicitCreateInfoEXT drm_info = {
.sType = VK_STRUCTURE_TYPE_IMAGE_DRM_FORMAT_MODIFIER_EXPLICIT_CREATE_INFO_EXT,
.drmFormatModifier = desc->objects[0].format_modifier,
.sType = VK_STRUCTURE_TYPE_SEMAPHORE_CREATE_INFO,
};
- const int p_w = i > 0 ? AV_CEIL_RSHIFT(hwfc->width, fmt_desc->log2_chroma_w) : hwfc->width;
- const int p_h = i > 0 ? AV_CEIL_RSHIFT(hwfc->height, fmt_desc->log2_chroma_h) : hwfc->height;
-
- VkImageCreateInfo image_create_info = {
+ VkImageCreateInfo create_info = {
.sType = VK_STRUCTURE_TYPE_IMAGE_CREATE_INFO,
.pNext = &einfo,
.imageType = VK_IMAGE_TYPE_2D,
.format = drm_to_vulkan_fmt(desc->layers[i].format),
- .extent.width = p_w,
- .extent.height = p_h,
.extent.depth = 1,
.mipLevels = 1,
.arrayLayers = 1,
VK_SHARING_MODE_EXCLUSIVE,
};
+ get_plane_wh(&create_info.extent.width, &create_info.extent.height,
+ hwfc->sw_format, src->width, src->height, i);
+
for (int j = 0; j < planes; j++) {
plane_data[j].offset = desc->layers[i].planes[j].offset;
plane_data[j].rowPitch = desc->layers[i].planes[j].pitch;
}
/* Create image */
- ret = vkCreateImage(hwctx->act_dev, &image_create_info,
+ ret = vkCreateImage(hwctx->act_dev, &create_info,
hwctx->alloc, &f->img[i]);
if (ret != VK_SUCCESS) {
av_log(ctx, AV_LOG_ERROR, "Image creation failure: %s\n",
* offer us anything we could import and sync with, so instead
* just signal the semaphore we created. */
- f->layout[i] = image_create_info.initialLayout;
+ f->layout[i] = create_info.initialLayout;
f->access[i] = 0x0;
+ }
+
+ for (int i = 0; i < desc->nb_objects; i++) {
+ int use_ded_mem = 0;
+ VkMemoryFdPropertiesKHR fdmp = {
+ .sType = VK_STRUCTURE_TYPE_MEMORY_FD_PROPERTIES_KHR,
+ };
+ VkMemoryRequirements req = {
+ .size = desc->objects[i].size,
+ };
+ VkImportMemoryFdInfoKHR idesc = {
+ .sType = VK_STRUCTURE_TYPE_IMPORT_MEMORY_FD_INFO_KHR,
+ .handleType = htype,
+ .fd = dup(desc->objects[i].fd),
+ };
+ VkMemoryDedicatedAllocateInfo ded_alloc = {
+ .sType = VK_STRUCTURE_TYPE_MEMORY_DEDICATED_ALLOCATE_INFO,
+ .pNext = &idesc,
+ };
+
+ ret = pfn_vkGetMemoryFdPropertiesKHR(hwctx->act_dev, htype,
+ idesc.fd, &fdmp);
+ if (ret != VK_SUCCESS) {
+ av_log(hwfc, AV_LOG_ERROR, "Failed to get FD properties: %s\n",
+ vk_ret2str(ret));
+ err = AVERROR_EXTERNAL;
+ close(idesc.fd);
+ goto fail;
+ }
+
+ req.memoryTypeBits = fdmp.memoryTypeBits;
+
+ /* Dedicated allocation only makes sense if there's a one to one mapping
+ * between images and the memory backing them, so only check in this
+ * case. */
+ if (desc->nb_layers == desc->nb_objects) {
+ VkImageMemoryRequirementsInfo2 req_desc = {
+ .sType = VK_STRUCTURE_TYPE_IMAGE_MEMORY_REQUIREMENTS_INFO_2,
+ .image = f->img[i],
+ };
+ VkMemoryDedicatedRequirements ded_req = {
+ .sType = VK_STRUCTURE_TYPE_MEMORY_DEDICATED_REQUIREMENTS,
+ };
+ VkMemoryRequirements2 req2 = {
+ .sType = VK_STRUCTURE_TYPE_MEMORY_REQUIREMENTS_2,
+ .pNext = &ded_req,
+ };
+ vkGetImageMemoryRequirements2(hwctx->act_dev, &req_desc, &req2);
+
+ use_ded_mem = ded_req.prefersDedicatedAllocation |
+ ded_req.requiresDedicatedAllocation;
+ if (use_ded_mem)
+ ded_alloc.image = f->img[i];
+ }
+
+ err = alloc_mem(ctx, &req, VK_MEMORY_PROPERTY_DEVICE_LOCAL_BIT,
+ use_ded_mem ? &ded_alloc : ded_alloc.pNext,
+ &f->flags, &f->mem[i]);
+ if (err) {
+ close(idesc.fd);
+ return err;
+ }
+
+ f->size[i] = desc->objects[i].size;
+ }
+
+ for (int i = 0; i < desc->nb_layers; i++) {
+ const int planes = desc->layers[i].nb_planes;
+ const int signal_p = has_modifiers && (planes > 1);
for (int j = 0; j < planes; j++) {
VkImageAspectFlagBits aspect = j == 0 ? VK_IMAGE_ASPECT_MEMORY_PLANE_0_BIT_EXT :
j == 1 ? VK_IMAGE_ASPECT_MEMORY_PLANE_1_BIT_EXT :
AVVkFrame *f;
VulkanMapping *map = NULL;
- err = vulkan_map_from_drm_frame_desc(hwfc, &f,
- (AVDRMFrameDescriptor *)src->data[0]);
- if (err)
+ if ((err = vulkan_map_from_drm_frame_desc(hwfc, &f, src)))
return err;
/* The unmapping function will free this */
CUDA_EXTERNAL_MEMORY_MIPMAPPED_ARRAY_DESC tex_desc = {
.offset = 0,
.arrayDesc = {
- .Width = i > 0 ? AV_CEIL_RSHIFT(hwfc->width, desc->log2_chroma_w)
- : hwfc->width,
- .Height = i > 0 ? AV_CEIL_RSHIFT(hwfc->height, desc->log2_chroma_h)
- : hwfc->height,
.Depth = 0,
.Format = cufmt,
.NumChannels = 1 + ((planes == 2) && i),
.type = CU_EXTERNAL_SEMAPHORE_HANDLE_TYPE_OPAQUE_FD,
};
+ int p_w, p_h;
+ get_plane_wh(&p_w, &p_h, hwfc->sw_format, hwfc->width, hwfc->height, i);
+
+ tex_desc.arrayDesc.Width = p_w;
+ tex_desc.arrayDesc.Height = p_h;
+
ret = pfn_vkGetMemoryFdKHR(hwctx->act_dev, &export_info,
&ext_desc.handle.fd);
if (ret != VK_SUCCESS) {
CUDA_EXTERNAL_SEMAPHORE_SIGNAL_PARAMS s_s_par[AV_NUM_DATA_POINTERS] = { 0 };
ret = CHECK_CU(cu->cuCtxPushCurrent(cuda_dev->cuda_ctx));
- if (ret < 0) {
- err = AVERROR_EXTERNAL;
- goto fail;
- }
+ if (ret < 0)
+ return AVERROR_EXTERNAL;
dst_f = (AVVkFrame *)dst->data[0];
ret = vulkan_export_to_cuda(hwfc, src->hw_frames_ctx, dst);
if (ret < 0) {
- goto fail;
+ CHECK_CU(cu->cuCtxPopCurrent(&dummy));
+ return ret;
}
+
dst_int = dst_f->internal;
ret = CHECK_CU(cu->cuWaitExternalSemaphoresAsync(dst_int->cu_sem, s_w_par,
.dstMemoryType = CU_MEMORYTYPE_ARRAY,
.dstArray = dst_int->cu_array[i],
- .WidthInBytes = (i > 0 ? AV_CEIL_RSHIFT(hwfc->width, desc->log2_chroma_w)
- : hwfc->width) * desc->comp[i].step,
- .Height = i > 0 ? AV_CEIL_RSHIFT(hwfc->height, desc->log2_chroma_h)
- : hwfc->height,
};
+ int p_w, p_h;
+ get_plane_wh(&p_w, &p_h, hwfc->sw_format, hwfc->width, hwfc->height, i);
+
+ cpy.WidthInBytes = p_w * desc->comp[i].step;
+ cpy.Height = p_h;
+
ret = CHECK_CU(cu->cuMemcpy2DAsync(&cpy, cuda_dev->stream));
if (ret < 0) {
err = AVERROR_EXTERNAL;
av_free(data);
}
-static int create_buf(AVHWDeviceContext *ctx, AVBufferRef **buf, size_t imp_size,
- int height, int *stride, VkBufferUsageFlags usage,
- VkMemoryPropertyFlagBits flags, void *create_pnext,
- void *alloc_pnext)
+static size_t get_req_buffer_size(VulkanDevicePriv *p, int *stride, int height)
+{
+ size_t size;
+ *stride = FFALIGN(*stride, p->props.properties.limits.optimalBufferCopyRowPitchAlignment);
+ size = height*(*stride);
+ size = FFALIGN(size, p->props.properties.limits.minMemoryMapAlignment);
+ return size;
+}
+
+static int create_buf(AVHWDeviceContext *ctx, AVBufferRef **buf,
+ VkBufferUsageFlags usage, VkMemoryPropertyFlagBits flags,
+ size_t size, uint32_t req_memory_bits, int host_mapped,
+ void *create_pnext, void *alloc_pnext)
{
int err;
VkResult ret;
int use_ded_mem;
AVVulkanDeviceContext *hwctx = ctx->hwctx;
- VulkanDevicePriv *p = ctx->internal->priv;
VkBufferCreateInfo buf_spawn = {
.sType = VK_STRUCTURE_TYPE_BUFFER_CREATE_INFO,
.pNext = create_pnext,
.usage = usage,
+ .size = size,
.sharingMode = VK_SHARING_MODE_EXCLUSIVE,
};
if (!vkbuf)
return AVERROR(ENOMEM);
- vkbuf->mapped_mem = !!imp_size;
-
- if (!vkbuf->mapped_mem) {
- *stride = FFALIGN(*stride, p->props.properties.limits.optimalBufferCopyRowPitchAlignment);
- buf_spawn.size = height*(*stride);
- buf_spawn.size = FFALIGN(buf_spawn.size, p->props.properties.limits.minMemoryMapAlignment);
- } else {
- buf_spawn.size = imp_size;
- }
+ vkbuf->mapped_mem = host_mapped;
ret = vkCreateBuffer(hwctx->act_dev, &buf_spawn, NULL, &vkbuf->buf);
if (ret != VK_SUCCESS) {
av_log(ctx, AV_LOG_ERROR, "Failed to create buffer: %s\n",
vk_ret2str(ret));
- return AVERROR_EXTERNAL;
+ err = AVERROR_EXTERNAL;
+ goto fail;
}
req_desc.buffer = vkbuf->buf;
if (use_ded_mem)
ded_alloc.buffer = vkbuf->buf;
+ /* Additional requirements imposed on us */
+ if (req_memory_bits)
+ req.memoryRequirements.memoryTypeBits &= req_memory_bits;
+
err = alloc_mem(ctx, &req.memoryRequirements, flags,
use_ded_mem ? &ded_alloc : (void *)ded_alloc.pNext,
&vkbuf->flags, &vkbuf->mem);
if (err)
- return err;
+ goto fail;
ret = vkBindBufferMemory(hwctx->act_dev, vkbuf->buf, vkbuf->mem, 0);
if (ret != VK_SUCCESS) {
av_log(ctx, AV_LOG_ERROR, "Failed to bind memory to buffer: %s\n",
vk_ret2str(ret));
- free_buf(ctx, (uint8_t *)vkbuf);
- return AVERROR_EXTERNAL;
+ err = AVERROR_EXTERNAL;
+ goto fail;
}
*buf = av_buffer_create((uint8_t *)vkbuf, sizeof(*vkbuf), free_buf, ctx, 0);
if (!(*buf)) {
- free_buf(ctx, (uint8_t *)vkbuf);
- return AVERROR(ENOMEM);
+ err = AVERROR(ENOMEM);
+ goto fail;
}
return 0;
+
+fail:
+ free_buf(ctx, (uint8_t *)vkbuf);
+ return err;
}
/* Skips mapping of host mapped buffers but still invalidates them */
.memory = vkbuf->mem,
.size = VK_WHOLE_SIZE,
};
+
+ /* For host imported memory Vulkan says to use platform-defined
+ * sync methods, but doesn't really say not to call flush or invalidate
+ * on original host pointers. It does explicitly allow to do that on
+ * host-mapped pointers which are then mapped again using vkMapMemory,
+ * but known implementations return the original pointers when mapped
+ * again. */
if (vkbuf->flags & VK_MEMORY_PROPERTY_HOST_COHERENT_BIT)
continue;
+
invalidate_ctx[invalidate_count++] = ival_buf;
}
.memory = vkbuf->mem,
.size = VK_WHOLE_SIZE,
};
+
if (vkbuf->flags & VK_MEMORY_PROPERTY_HOST_COHERENT_BIT)
continue;
+
flush_ctx[flush_count++] = flush_buf;
}
}
}
static int transfer_image_buf(AVHWFramesContext *hwfc, const AVFrame *f,
- AVBufferRef **bufs, const int *buf_stride, int w,
+ AVBufferRef **bufs, size_t *buf_offsets,
+ const int *buf_stride, int w,
int h, enum AVPixelFormat pix_fmt, int to_buf)
{
int err;
/* Schedule a copy for each plane */
for (int i = 0; i < planes; i++) {
ImageBuffer *vkbuf = (ImageBuffer *)bufs[i]->data;
- const int p_w = i > 0 ? AV_CEIL_RSHIFT(w, desc->log2_chroma_w) : w;
- const int p_h = i > 0 ? AV_CEIL_RSHIFT(h, desc->log2_chroma_h) : h;
VkBufferImageCopy buf_reg = {
- .bufferOffset = 0,
- /* Buffer stride isn't in bytes, it's in samples, the implementation
- * uses the image's VkFormat to know how many bytes per sample
- * the buffer has. So we have to convert by dividing. Stupid.
- * Won't work with YUVA or other planar formats with alpha. */
+ .bufferOffset = buf_offsets[i],
.bufferRowLength = buf_stride[i] / desc->comp[i].step,
- .bufferImageHeight = p_h,
.imageSubresource.layerCount = 1,
.imageSubresource.aspectMask = VK_IMAGE_ASPECT_COLOR_BIT,
.imageOffset = { 0, 0, 0, },
- .imageExtent = { p_w, p_h, 1, },
};
+ int p_w, p_h;
+ get_plane_wh(&p_w, &p_h, pix_fmt, w, h, i);
+
+ buf_reg.bufferImageHeight = p_h;
+ buf_reg.imageExtent = (VkExtent3D){ p_w, p_h, 1, };
+
if (to_buf)
vkCmdCopyImageToBuffer(cmd_buf, frame->img[i], frame->layout[i],
vkbuf->buf, 1, &buf_reg);
}
}
-static int vulkan_transfer_data_from_mem(AVHWFramesContext *hwfc, AVFrame *dst,
- const AVFrame *src)
+static int vulkan_transfer_data(AVHWFramesContext *hwfc, const AVFrame *vkf,
+ const AVFrame *swf, int from)
{
int err = 0;
- AVFrame tmp;
- AVVkFrame *f = (AVVkFrame *)dst->data[0];
+ VkResult ret;
+ AVVkFrame *f = (AVVkFrame *)vkf->data[0];
AVHWDeviceContext *dev_ctx = hwfc->device_ctx;
- AVBufferRef *bufs[AV_NUM_DATA_POINTERS] = { 0 };
- const int planes = av_pix_fmt_count_planes(src->format);
- int log2_chroma = av_pix_fmt_desc_get(src->format)->log2_chroma_h;
+ AVVulkanDeviceContext *hwctx = dev_ctx->hwctx;
VulkanDevicePriv *p = hwfc->device_ctx->internal->priv;
+
+ AVFrame tmp;
+ AVBufferRef *bufs[AV_NUM_DATA_POINTERS] = { 0 };
+ size_t buf_offsets[AV_NUM_DATA_POINTERS] = { 0 };
+
+ int p_w, p_h;
+ const int planes = av_pix_fmt_count_planes(swf->format);
+
int host_mapped[AV_NUM_DATA_POINTERS] = { 0 };
- int map_host = p->extensions & EXT_EXTERNAL_HOST_MEMORY;
+ const int map_host = !!(p->extensions & EXT_EXTERNAL_HOST_MEMORY);
- if ((src->format != AV_PIX_FMT_NONE && !av_vkfmt_from_pixfmt(src->format))) {
- av_log(hwfc, AV_LOG_ERROR, "Unsupported source pixel format!\n");
+ VK_LOAD_PFN(hwctx->inst, vkGetMemoryHostPointerPropertiesEXT);
+
+ if ((swf->format != AV_PIX_FMT_NONE && !av_vkfmt_from_pixfmt(swf->format))) {
+ av_log(hwfc, AV_LOG_ERROR, "Unsupported software frame pixel format!\n");
return AVERROR(EINVAL);
}
- if (src->width > hwfc->width || src->height > hwfc->height)
+ if (swf->width > hwfc->width || swf->height > hwfc->height)
return AVERROR(EINVAL);
/* For linear, host visiable images */
AVFrame *map = av_frame_alloc();
if (!map)
return AVERROR(ENOMEM);
- map->format = src->format;
+ map->format = swf->format;
- err = vulkan_map_frame_to_mem(hwfc, map, dst, AV_HWFRAME_MAP_WRITE);
+ err = vulkan_map_frame_to_mem(hwfc, map, vkf, AV_HWFRAME_MAP_WRITE);
if (err)
return err;
- err = av_frame_copy(map, src);
+ err = av_frame_copy((AVFrame *)(from ? swf : map), from ? map : swf);
av_frame_free(&map);
return err;
}
/* Create buffers */
for (int i = 0; i < planes; i++) {
- int h = src->height;
- int p_height = i > 0 ? AV_CEIL_RSHIFT(h, log2_chroma) : h;
- size_t p_size = FFABS(src->linesize[i]) * p_height;
+ size_t req_size;
+
+ VkExternalMemoryBufferCreateInfo create_desc = {
+ .sType = VK_STRUCTURE_TYPE_EXTERNAL_MEMORY_BUFFER_CREATE_INFO,
+ .handleTypes = VK_EXTERNAL_MEMORY_HANDLE_TYPE_HOST_ALLOCATION_BIT_EXT,
+ };
VkImportMemoryHostPointerInfoEXT import_desc = {
.sType = VK_STRUCTURE_TYPE_IMPORT_MEMORY_HOST_POINTER_INFO_EXT,
.handleType = VK_EXTERNAL_MEMORY_HANDLE_TYPE_HOST_ALLOCATION_BIT_EXT,
- .pHostPointer = src->data[i],
};
- /* We can only map images with positive stride and alignment appropriate
- * for the device. */
- host_mapped[i] = map_host && src->linesize[i] > 0 &&
- !(p_size % p->hprops.minImportedHostPointerAlignment) &&
- !(((uintptr_t)import_desc.pHostPointer) %
- p->hprops.minImportedHostPointerAlignment);
- p_size = host_mapped[i] ? p_size : 0;
-
- tmp.linesize[i] = FFABS(src->linesize[i]);
- err = create_buf(dev_ctx, &bufs[i], p_size, p_height, &tmp.linesize[i],
- VK_BUFFER_USAGE_TRANSFER_SRC_BIT,
- VK_MEMORY_PROPERTY_HOST_VISIBLE_BIT, NULL,
+ VkMemoryHostPointerPropertiesEXT p_props = {
+ .sType = VK_STRUCTURE_TYPE_MEMORY_HOST_POINTER_PROPERTIES_EXT,
+ };
+
+ get_plane_wh(&p_w, &p_h, swf->format, swf->width, swf->height, i);
+
+ tmp.linesize[i] = FFABS(swf->linesize[i]);
+
+ /* Do not map images with a negative stride */
+ if (map_host && swf->linesize[i] > 0) {
+ size_t offs;
+ offs = (uintptr_t)swf->data[i] % p->hprops.minImportedHostPointerAlignment;
+ import_desc.pHostPointer = swf->data[i] - offs;
+
+ /* We have to compensate for the few extra bytes of padding we
+ * completely ignore at the start */
+ req_size = FFALIGN(offs + tmp.linesize[i] * p_h,
+ p->hprops.minImportedHostPointerAlignment);
+
+ ret = pfn_vkGetMemoryHostPointerPropertiesEXT(hwctx->act_dev,
+ import_desc.handleType,
+ import_desc.pHostPointer,
+ &p_props);
+
+ if (ret == VK_SUCCESS) {
+ host_mapped[i] = 1;
+ buf_offsets[i] = offs;
+ }
+ }
+
+ if (!host_mapped[i])
+ req_size = get_req_buffer_size(p, &tmp.linesize[i], p_h);
+
+ err = create_buf(dev_ctx, &bufs[i],
+ from ? VK_BUFFER_USAGE_TRANSFER_DST_BIT :
+ VK_BUFFER_USAGE_TRANSFER_SRC_BIT,
+ VK_MEMORY_PROPERTY_HOST_VISIBLE_BIT,
+ req_size, p_props.memoryTypeBits, host_mapped[i],
+ host_mapped[i] ? &create_desc : NULL,
host_mapped[i] ? &import_desc : NULL);
if (err)
goto end;
}
- /* Map, copy image to buffer, unmap */
- if ((err = map_buffers(dev_ctx, bufs, tmp.data, planes, 0)))
- goto end;
+ if (!from) {
+ /* Map, copy image to buffer, unmap */
+ if ((err = map_buffers(dev_ctx, bufs, tmp.data, planes, 0)))
+ goto end;
- for (int i = 0; i < planes; i++) {
- int h = src->height;
- int p_height = i > 0 ? AV_CEIL_RSHIFT(h, log2_chroma) : h;
+ for (int i = 0; i < planes; i++) {
+ if (host_mapped[i])
+ continue;
- if (host_mapped[i])
- continue;
+ get_plane_wh(&p_w, &p_h, swf->format, swf->width, swf->height, i);
- av_image_copy_plane(tmp.data[i], tmp.linesize[i],
- (const uint8_t *)src->data[i], src->linesize[i],
- FFMIN(tmp.linesize[i], src->linesize[i]), p_height);
+ av_image_copy_plane(tmp.data[i], tmp.linesize[i],
+ (const uint8_t *)swf->data[i], swf->linesize[i],
+ FFMIN(tmp.linesize[i], FFABS(swf->linesize[i])),
+ p_h);
+ }
+
+ if ((err = unmap_buffers(dev_ctx, bufs, planes, 1)))
+ goto end;
}
- if ((err = unmap_buffers(dev_ctx, bufs, planes, 1)))
- goto end;
+ /* Copy buffers into/from image */
+ err = transfer_image_buf(hwfc, vkf, bufs, buf_offsets, tmp.linesize,
+ swf->width, swf->height, swf->format, from);
+
+ if (from) {
+ /* Map, copy image to buffer, unmap */
+ if ((err = map_buffers(dev_ctx, bufs, tmp.data, planes, 0)))
+ goto end;
- /* Copy buffers to image */
- err = transfer_image_buf(hwfc, dst, bufs, tmp.linesize,
- src->width, src->height, src->format, 0);
+ for (int i = 0; i < planes; i++) {
+ if (host_mapped[i])
+ continue;
+
+ get_plane_wh(&p_w, &p_h, swf->format, swf->width, swf->height, i);
+
+ av_image_copy_plane(swf->data[i], swf->linesize[i],
+ (const uint8_t *)tmp.data[i], tmp.linesize[i],
+ FFMIN(tmp.linesize[i], FFABS(swf->linesize[i])),
+ p_h);
+ }
+
+ if ((err = unmap_buffers(dev_ctx, bufs, planes, 1)))
+ goto end;
+ }
end:
for (int i = 0; i < planes; i++)
}
static int vulkan_transfer_data_to(AVHWFramesContext *hwfc, AVFrame *dst,
- const AVFrame *src)
+ const AVFrame *src)
{
av_unused VulkanDevicePriv *p = hwfc->device_ctx->internal->priv;
if (src->hw_frames_ctx)
return AVERROR(ENOSYS);
else
- return vulkan_transfer_data_from_mem(hwfc, dst, src);
+ return vulkan_transfer_data(hwfc, dst, src, 0);
}
}
#if CONFIG_CUDA
static int vulkan_transfer_data_to_cuda(AVHWFramesContext *hwfc, AVFrame *dst,
- const AVFrame *src)
+ const AVFrame *src)
{
int err;
VkResult ret;
AVCUDADeviceContext *cuda_dev = cuda_cu->hwctx;
AVCUDADeviceContextInternal *cu_internal = cuda_dev->internal;
CudaFunctions *cu = cu_internal->cuda_dl;
+ CUDA_EXTERNAL_SEMAPHORE_WAIT_PARAMS s_w_par[AV_NUM_DATA_POINTERS] = { 0 };
+ CUDA_EXTERNAL_SEMAPHORE_SIGNAL_PARAMS s_s_par[AV_NUM_DATA_POINTERS] = { 0 };
ret = CHECK_CU(cu->cuCtxPushCurrent(cuda_dev->cuda_ctx));
- if (ret < 0) {
- err = AVERROR_EXTERNAL;
- goto fail;
- }
+ if (ret < 0)
+ return AVERROR_EXTERNAL;
dst_f = (AVVkFrame *)src->data[0];
err = vulkan_export_to_cuda(hwfc, dst->hw_frames_ctx, src);
if (err < 0) {
- goto fail;
+ CHECK_CU(cu->cuCtxPopCurrent(&dummy));
+ return err;
}
dst_int = dst_f->internal;
+ ret = CHECK_CU(cu->cuWaitExternalSemaphoresAsync(dst_int->cu_sem, s_w_par,
+ planes, cuda_dev->stream));
+ if (ret < 0) {
+ err = AVERROR_EXTERNAL;
+ goto fail;
+ }
+
for (int i = 0; i < planes; i++) {
CUDA_MEMCPY2D cpy = {
.dstMemoryType = CU_MEMORYTYPE_DEVICE,
.srcMemoryType = CU_MEMORYTYPE_ARRAY,
.srcArray = dst_int->cu_array[i],
- .WidthInBytes = (i > 0 ? AV_CEIL_RSHIFT(hwfc->width, desc->log2_chroma_w)
- : hwfc->width) * desc->comp[i].step,
- .Height = i > 0 ? AV_CEIL_RSHIFT(hwfc->height, desc->log2_chroma_h)
- : hwfc->height,
};
+ int w, h;
+ get_plane_wh(&w, &h, hwfc->sw_format, hwfc->width, hwfc->height, i);
+
+ cpy.WidthInBytes = w * desc->comp[i].step;
+ cpy.Height = h;
+
ret = CHECK_CU(cu->cuMemcpy2DAsync(&cpy, cuda_dev->stream));
if (ret < 0) {
err = AVERROR_EXTERNAL;
}
}
+ ret = CHECK_CU(cu->cuSignalExternalSemaphoresAsync(dst_int->cu_sem, s_s_par,
+ planes, cuda_dev->stream));
+ if (ret < 0) {
+ err = AVERROR_EXTERNAL;
+ goto fail;
+ }
+
CHECK_CU(cu->cuCtxPopCurrent(&dummy));
av_log(hwfc, AV_LOG_VERBOSE, "Transfered Vulkan image to CUDA!\n");
}
#endif
-static int vulkan_transfer_data_to_mem(AVHWFramesContext *hwfc, AVFrame *dst,
- const AVFrame *src)
-{
- int err = 0;
- AVFrame tmp;
- AVVkFrame *f = (AVVkFrame *)src->data[0];
- AVHWDeviceContext *dev_ctx = hwfc->device_ctx;
- AVBufferRef *bufs[AV_NUM_DATA_POINTERS] = { 0 };
- const int planes = av_pix_fmt_count_planes(dst->format);
- int log2_chroma = av_pix_fmt_desc_get(dst->format)->log2_chroma_h;
- VulkanDevicePriv *p = hwfc->device_ctx->internal->priv;
- int host_mapped[AV_NUM_DATA_POINTERS] = { 0 };
- int map_host = p->extensions & EXT_EXTERNAL_HOST_MEMORY;
-
- if (dst->width > hwfc->width || dst->height > hwfc->height)
- return AVERROR(EINVAL);
-
- /* For linear, host visiable images */
- if (f->tiling == VK_IMAGE_TILING_LINEAR &&
- f->flags & VK_MEMORY_PROPERTY_HOST_VISIBLE_BIT) {
- AVFrame *map = av_frame_alloc();
- if (!map)
- return AVERROR(ENOMEM);
- map->format = dst->format;
-
- err = vulkan_map_frame_to_mem(hwfc, map, src, AV_HWFRAME_MAP_READ);
- if (err)
- return err;
-
- err = av_frame_copy(dst, map);
- av_frame_free(&map);
- return err;
- }
-
- /* Create buffers */
- for (int i = 0; i < planes; i++) {
- int h = dst->height;
- int p_height = i > 0 ? AV_CEIL_RSHIFT(h, log2_chroma) : h;
- size_t p_size = FFABS(dst->linesize[i]) * p_height;
-
- VkImportMemoryHostPointerInfoEXT import_desc = {
- .sType = VK_STRUCTURE_TYPE_IMPORT_MEMORY_HOST_POINTER_INFO_EXT,
- .handleType = VK_EXTERNAL_MEMORY_HANDLE_TYPE_HOST_ALLOCATION_BIT_EXT,
- .pHostPointer = dst->data[i],
- };
-
- /* We can only map images with positive stride and alignment appropriate
- * for the device. */
- host_mapped[i] = map_host && dst->linesize[i] > 0 &&
- !(p_size % p->hprops.minImportedHostPointerAlignment) &&
- !(((uintptr_t)import_desc.pHostPointer) %
- p->hprops.minImportedHostPointerAlignment);
- p_size = host_mapped[i] ? p_size : 0;
-
- tmp.linesize[i] = FFABS(dst->linesize[i]);
- err = create_buf(dev_ctx, &bufs[i], p_size, p_height,
- &tmp.linesize[i], VK_BUFFER_USAGE_TRANSFER_DST_BIT,
- VK_MEMORY_PROPERTY_HOST_VISIBLE_BIT, NULL,
- host_mapped[i] ? &import_desc : NULL);
- if (err)
- goto end;
- }
-
- /* Copy image to buffer */
- if ((err = transfer_image_buf(hwfc, src, bufs, tmp.linesize,
- dst->width, dst->height, dst->format, 1)))
- goto end;
-
- /* Map, copy buffer to frame, unmap */
- if ((err = map_buffers(dev_ctx, bufs, tmp.data, planes, 1)))
- goto end;
-
- for (int i = 0; i < planes; i++) {
- int h = dst->height;
- int p_height = i > 0 ? AV_CEIL_RSHIFT(h, log2_chroma) : h;
-
- if (host_mapped[i])
- continue;
-
- av_image_copy_plane(dst->data[i], dst->linesize[i],
- (const uint8_t *)tmp.data[i], tmp.linesize[i],
- FFMIN(tmp.linesize[i], dst->linesize[i]), p_height);
- }
-
- err = unmap_buffers(dev_ctx, bufs, planes, 0);
-
-end:
- for (int i = 0; i < planes; i++)
- av_buffer_unref(&bufs[i]);
-
- return err;
-}
-
static int vulkan_transfer_data_from(AVHWFramesContext *hwfc, AVFrame *dst,
const AVFrame *src)
{
if (dst->hw_frames_ctx)
return AVERROR(ENOSYS);
else
- return vulkan_transfer_data_to_mem(hwfc, dst, src);
+ return vulkan_transfer_data(hwfc, src, dst, 1);
}
}