2 * This file is part of FFmpeg.
4 * FFmpeg is free software; you can redistribute it and/or
5 * modify it under the terms of the GNU Lesser General Public
6 * License as published by the Free Software Foundation; either
7 * version 2.1 of the License, or (at your option) any later version.
9 * FFmpeg is distributed in the hope that it will be useful,
10 * but WITHOUT ANY WARRANTY; without even the implied warranty of
11 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
12 * Lesser General Public License for more details.
14 * You should have received a copy of the GNU Lesser General Public
15 * License along with FFmpeg; if not, write to the Free Software
16 * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
21 #include "hwcontext.h"
22 #include "hwcontext_internal.h"
23 #include "hwcontext_cuda.h"
28 #define CUDA_FRAME_ALIGNMENT 256
30 typedef struct CUDAFramesContext {
31 int shift_width, shift_height;
34 static const enum AVPixelFormat supported_formats[] = {
40 static void cuda_buffer_free(void *opaque, uint8_t *data)
42 AVHWFramesContext *ctx = opaque;
43 AVCUDADeviceContext *hwctx = ctx->device_ctx->hwctx;
47 cuCtxPushCurrent(hwctx->cuda_ctx);
49 cuMemFree((CUdeviceptr)data);
51 cuCtxPopCurrent(&dummy);
54 static AVBufferRef *cuda_pool_alloc(void *opaque, int size)
56 AVHWFramesContext *ctx = opaque;
57 AVCUDADeviceContext *hwctx = ctx->device_ctx->hwctx;
59 AVBufferRef *ret = NULL;
60 CUcontext dummy = NULL;
64 err = cuCtxPushCurrent(hwctx->cuda_ctx);
65 if (err != CUDA_SUCCESS) {
66 av_log(ctx, AV_LOG_ERROR, "Error setting current CUDA context\n");
70 err = cuMemAlloc(&data, size);
71 if (err != CUDA_SUCCESS)
74 ret = av_buffer_create((uint8_t*)data, size, cuda_buffer_free, ctx, 0);
81 cuCtxPopCurrent(&dummy);
85 static int cuda_frames_init(AVHWFramesContext *ctx)
87 CUDAFramesContext *priv = ctx->internal->priv;
88 int aligned_width = FFALIGN(ctx->width, CUDA_FRAME_ALIGNMENT);
91 for (i = 0; i < FF_ARRAY_ELEMS(supported_formats); i++) {
92 if (ctx->sw_format == supported_formats[i])
95 if (i == FF_ARRAY_ELEMS(supported_formats)) {
96 av_log(ctx, AV_LOG_ERROR, "Pixel format '%s' is not supported\n",
97 av_get_pix_fmt_name(ctx->sw_format));
98 return AVERROR(ENOSYS);
101 av_pix_fmt_get_chroma_sub_sample(ctx->sw_format, &priv->shift_width, &priv->shift_height);
106 switch (ctx->sw_format) {
107 case AV_PIX_FMT_NV12:
108 case AV_PIX_FMT_YUV420P:
109 size = aligned_width * ctx->height * 3 / 2;
111 case AV_PIX_FMT_YUV444P:
112 size = aligned_width * ctx->height * 3;
116 ctx->internal->pool_internal = av_buffer_pool_init2(size, ctx, cuda_pool_alloc, NULL);
117 if (!ctx->internal->pool_internal)
118 return AVERROR(ENOMEM);
124 static int cuda_get_buffer(AVHWFramesContext *ctx, AVFrame *frame)
126 int aligned_width = FFALIGN(ctx->width, CUDA_FRAME_ALIGNMENT);
128 frame->buf[0] = av_buffer_pool_get(ctx->pool);
130 return AVERROR(ENOMEM);
132 switch (ctx->sw_format) {
133 case AV_PIX_FMT_NV12:
134 frame->data[0] = frame->buf[0]->data;
135 frame->data[1] = frame->data[0] + aligned_width * ctx->height;
136 frame->linesize[0] = aligned_width;
137 frame->linesize[1] = aligned_width;
139 case AV_PIX_FMT_YUV420P:
140 frame->data[0] = frame->buf[0]->data;
141 frame->data[2] = frame->data[0] + aligned_width * ctx->height;
142 frame->data[1] = frame->data[2] + aligned_width * ctx->height / 4;
143 frame->linesize[0] = aligned_width;
144 frame->linesize[1] = aligned_width / 2;
145 frame->linesize[2] = aligned_width / 2;
147 case AV_PIX_FMT_YUV444P:
148 frame->data[0] = frame->buf[0]->data;
149 frame->data[1] = frame->data[0] + aligned_width * ctx->height;
150 frame->data[2] = frame->data[1] + aligned_width * ctx->height;
151 frame->linesize[0] = aligned_width;
152 frame->linesize[1] = aligned_width;
153 frame->linesize[2] = aligned_width;
156 av_frame_unref(frame);
160 frame->format = AV_PIX_FMT_CUDA;
161 frame->width = ctx->width;
162 frame->height = ctx->height;
167 static int cuda_transfer_get_formats(AVHWFramesContext *ctx,
168 enum AVHWFrameTransferDirection dir,
169 enum AVPixelFormat **formats)
171 enum AVPixelFormat *fmts;
173 fmts = av_malloc_array(2, sizeof(*fmts));
175 return AVERROR(ENOMEM);
177 fmts[0] = ctx->sw_format;
178 fmts[1] = AV_PIX_FMT_NONE;
185 static int cuda_transfer_data_from(AVHWFramesContext *ctx, AVFrame *dst,
188 CUDAFramesContext *priv = ctx->internal->priv;
189 AVCUDADeviceContext *device_hwctx = ctx->device_ctx->hwctx;
195 err = cuCtxPushCurrent(device_hwctx->cuda_ctx);
196 if (err != CUDA_SUCCESS)
197 return AVERROR_UNKNOWN;
199 for (i = 0; i < FF_ARRAY_ELEMS(src->data) && src->data[i]; i++) {
200 CUDA_MEMCPY2D cpy = {
201 .srcMemoryType = CU_MEMORYTYPE_DEVICE,
202 .dstMemoryType = CU_MEMORYTYPE_HOST,
203 .srcDevice = (CUdeviceptr)src->data[i],
204 .dstHost = dst->data[i],
205 .srcPitch = src->linesize[i],
206 .dstPitch = dst->linesize[i],
207 .WidthInBytes = FFMIN(src->linesize[i], dst->linesize[i]),
208 .Height = src->height >> (i ? priv->shift_height : 0),
211 err = cuMemcpy2D(&cpy);
212 if (err != CUDA_SUCCESS) {
213 av_log(ctx, AV_LOG_ERROR, "Error transferring the data from the CUDA frame\n");
214 return AVERROR_UNKNOWN;
218 cuCtxPopCurrent(&dummy);
223 static int cuda_transfer_data_to(AVHWFramesContext *ctx, AVFrame *dst,
226 CUDAFramesContext *priv = ctx->internal->priv;
227 AVCUDADeviceContext *device_hwctx = ctx->device_ctx->hwctx;
233 err = cuCtxPushCurrent(device_hwctx->cuda_ctx);
234 if (err != CUDA_SUCCESS)
235 return AVERROR_UNKNOWN;
237 for (i = 0; i < FF_ARRAY_ELEMS(src->data) && src->data[i]; i++) {
238 CUDA_MEMCPY2D cpy = {
239 .srcMemoryType = CU_MEMORYTYPE_HOST,
240 .dstMemoryType = CU_MEMORYTYPE_DEVICE,
241 .srcHost = src->data[i],
242 .dstDevice = (CUdeviceptr)dst->data[i],
243 .srcPitch = src->linesize[i],
244 .dstPitch = dst->linesize[i],
245 .WidthInBytes = FFMIN(src->linesize[i], dst->linesize[i]),
246 .Height = src->height >> (i ? priv->shift_height : 0),
249 err = cuMemcpy2D(&cpy);
250 if (err != CUDA_SUCCESS) {
251 av_log(ctx, AV_LOG_ERROR, "Error transferring the data from the CUDA frame\n");
252 return AVERROR_UNKNOWN;
256 cuCtxPopCurrent(&dummy);
261 static void cuda_device_free(AVHWDeviceContext *ctx)
263 AVCUDADeviceContext *hwctx = ctx->hwctx;
264 cuCtxDestroy(hwctx->cuda_ctx);
267 static int cuda_device_create(AVHWDeviceContext *ctx, const char *device,
268 AVDictionary *opts, int flags)
270 AVCUDADeviceContext *hwctx = ctx->hwctx;
277 device_idx = strtol(device, NULL, 0);
280 if (err != CUDA_SUCCESS) {
281 av_log(ctx, AV_LOG_ERROR, "Could not initialize the CUDA driver API\n");
282 return AVERROR_UNKNOWN;
285 err = cuDeviceGet(&cu_device, device_idx);
286 if (err != CUDA_SUCCESS) {
287 av_log(ctx, AV_LOG_ERROR, "Could not get the device number %d\n", device_idx);
288 return AVERROR_UNKNOWN;
291 err = cuCtxCreate(&hwctx->cuda_ctx, CU_CTX_SCHED_BLOCKING_SYNC, cu_device);
292 if (err != CUDA_SUCCESS) {
293 av_log(ctx, AV_LOG_ERROR, "Error creating a CUDA context\n");
294 return AVERROR_UNKNOWN;
297 cuCtxPopCurrent(&dummy);
299 ctx->free = cuda_device_free;
304 const HWContextType ff_hwcontext_type_cuda = {
305 .type = AV_HWDEVICE_TYPE_CUDA,
308 .device_hwctx_size = sizeof(AVCUDADeviceContext),
309 .frames_priv_size = sizeof(CUDAFramesContext),
311 .device_create = cuda_device_create,
312 .frames_init = cuda_frames_init,
313 .frames_get_buffer = cuda_get_buffer,
314 .transfer_get_formats = cuda_transfer_get_formats,
315 .transfer_data_to = cuda_transfer_data_to,
316 .transfer_data_from = cuda_transfer_data_from,
318 .pix_fmts = (const enum AVPixelFormat[]){ AV_PIX_FMT_CUDA, AV_PIX_FMT_NONE },