2 * This file is part of FFmpeg.
4 * FFmpeg is free software; you can redistribute it and/or
5 * modify it under the terms of the GNU Lesser General Public
6 * License as published by the Free Software Foundation; either
7 * version 2.1 of the License, or (at your option) any later version.
9 * FFmpeg is distributed in the hope that it will be useful,
10 * but WITHOUT ANY WARRANTY; without even the implied warranty of
11 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
12 * Lesser General Public License for more details.
14 * You should have received a copy of the GNU Lesser General Public
15 * License along with FFmpeg; if not, write to the Free Software
16 * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
21 #include "hwcontext.h"
22 #include "hwcontext_internal.h"
23 #include "hwcontext_cuda_internal.h"
28 #define CUDA_FRAME_ALIGNMENT 256
30 typedef struct CUDAFramesContext {
31 int shift_width, shift_height;
34 static const enum AVPixelFormat supported_formats[] = {
42 static void cuda_buffer_free(void *opaque, uint8_t *data)
44 AVHWFramesContext *ctx = opaque;
45 AVCUDADeviceContext *hwctx = ctx->device_ctx->hwctx;
46 CudaFunctions *cu = hwctx->internal->cuda_dl;
50 cu->cuCtxPushCurrent(hwctx->cuda_ctx);
52 cu->cuMemFree((CUdeviceptr)data);
54 cu->cuCtxPopCurrent(&dummy);
57 static AVBufferRef *cuda_pool_alloc(void *opaque, int size)
59 AVHWFramesContext *ctx = opaque;
60 AVCUDADeviceContext *hwctx = ctx->device_ctx->hwctx;
61 CudaFunctions *cu = hwctx->internal->cuda_dl;
63 AVBufferRef *ret = NULL;
64 CUcontext dummy = NULL;
68 err = cu->cuCtxPushCurrent(hwctx->cuda_ctx);
69 if (err != CUDA_SUCCESS) {
70 av_log(ctx, AV_LOG_ERROR, "Error setting current CUDA context\n");
74 err = cu->cuMemAlloc(&data, size);
75 if (err != CUDA_SUCCESS)
78 ret = av_buffer_create((uint8_t*)data, size, cuda_buffer_free, ctx, 0);
85 cu->cuCtxPopCurrent(&dummy);
89 static int cuda_frames_init(AVHWFramesContext *ctx)
91 CUDAFramesContext *priv = ctx->internal->priv;
92 int aligned_width = FFALIGN(ctx->width, CUDA_FRAME_ALIGNMENT);
95 for (i = 0; i < FF_ARRAY_ELEMS(supported_formats); i++) {
96 if (ctx->sw_format == supported_formats[i])
99 if (i == FF_ARRAY_ELEMS(supported_formats)) {
100 av_log(ctx, AV_LOG_ERROR, "Pixel format '%s' is not supported\n",
101 av_get_pix_fmt_name(ctx->sw_format));
102 return AVERROR(ENOSYS);
105 av_pix_fmt_get_chroma_sub_sample(ctx->sw_format, &priv->shift_width, &priv->shift_height);
110 switch (ctx->sw_format) {
111 case AV_PIX_FMT_NV12:
112 case AV_PIX_FMT_YUV420P:
113 size = aligned_width * ctx->height * 3 / 2;
115 case AV_PIX_FMT_YUV444P:
116 case AV_PIX_FMT_P010:
117 case AV_PIX_FMT_P016:
118 size = aligned_width * ctx->height * 3;
121 av_log(ctx, AV_LOG_ERROR, "BUG: Pixel format missing from size calculation.");
125 ctx->internal->pool_internal = av_buffer_pool_init2(size, ctx, cuda_pool_alloc, NULL);
126 if (!ctx->internal->pool_internal)
127 return AVERROR(ENOMEM);
133 static int cuda_get_buffer(AVHWFramesContext *ctx, AVFrame *frame)
136 int width_in_bytes = ctx->width;
138 if (ctx->sw_format == AV_PIX_FMT_P010 ||
139 ctx->sw_format == AV_PIX_FMT_P016) {
142 aligned_width = FFALIGN(width_in_bytes, CUDA_FRAME_ALIGNMENT);
144 frame->buf[0] = av_buffer_pool_get(ctx->pool);
146 return AVERROR(ENOMEM);
148 switch (ctx->sw_format) {
149 case AV_PIX_FMT_NV12:
150 case AV_PIX_FMT_P010:
151 case AV_PIX_FMT_P016:
152 frame->data[0] = frame->buf[0]->data;
153 frame->data[1] = frame->data[0] + aligned_width * ctx->height;
154 frame->linesize[0] = aligned_width;
155 frame->linesize[1] = aligned_width;
157 case AV_PIX_FMT_YUV420P:
158 frame->data[0] = frame->buf[0]->data;
159 frame->data[2] = frame->data[0] + aligned_width * ctx->height;
160 frame->data[1] = frame->data[2] + aligned_width * ctx->height / 4;
161 frame->linesize[0] = aligned_width;
162 frame->linesize[1] = aligned_width / 2;
163 frame->linesize[2] = aligned_width / 2;
165 case AV_PIX_FMT_YUV444P:
166 frame->data[0] = frame->buf[0]->data;
167 frame->data[1] = frame->data[0] + aligned_width * ctx->height;
168 frame->data[2] = frame->data[1] + aligned_width * ctx->height;
169 frame->linesize[0] = aligned_width;
170 frame->linesize[1] = aligned_width;
171 frame->linesize[2] = aligned_width;
174 av_frame_unref(frame);
178 frame->format = AV_PIX_FMT_CUDA;
179 frame->width = ctx->width;
180 frame->height = ctx->height;
185 static int cuda_transfer_get_formats(AVHWFramesContext *ctx,
186 enum AVHWFrameTransferDirection dir,
187 enum AVPixelFormat **formats)
189 enum AVPixelFormat *fmts;
191 fmts = av_malloc_array(2, sizeof(*fmts));
193 return AVERROR(ENOMEM);
195 fmts[0] = ctx->sw_format;
196 fmts[1] = AV_PIX_FMT_NONE;
203 static int cuda_transfer_data_from(AVHWFramesContext *ctx, AVFrame *dst,
206 CUDAFramesContext *priv = ctx->internal->priv;
207 AVCUDADeviceContext *device_hwctx = ctx->device_ctx->hwctx;
208 CudaFunctions *cu = device_hwctx->internal->cuda_dl;
214 err = cu->cuCtxPushCurrent(device_hwctx->cuda_ctx);
215 if (err != CUDA_SUCCESS)
216 return AVERROR_UNKNOWN;
218 for (i = 0; i < FF_ARRAY_ELEMS(src->data) && src->data[i]; i++) {
219 CUDA_MEMCPY2D cpy = {
220 .srcMemoryType = CU_MEMORYTYPE_DEVICE,
221 .dstMemoryType = CU_MEMORYTYPE_HOST,
222 .srcDevice = (CUdeviceptr)src->data[i],
223 .dstHost = dst->data[i],
224 .srcPitch = src->linesize[i],
225 .dstPitch = dst->linesize[i],
226 .WidthInBytes = FFMIN(src->linesize[i], dst->linesize[i]),
227 .Height = src->height >> (i ? priv->shift_height : 0),
230 err = cu->cuMemcpy2D(&cpy);
231 if (err != CUDA_SUCCESS) {
232 av_log(ctx, AV_LOG_ERROR, "Error transferring the data from the CUDA frame\n");
233 return AVERROR_UNKNOWN;
237 cu->cuCtxPopCurrent(&dummy);
242 static int cuda_transfer_data_to(AVHWFramesContext *ctx, AVFrame *dst,
245 CUDAFramesContext *priv = ctx->internal->priv;
246 AVCUDADeviceContext *device_hwctx = ctx->device_ctx->hwctx;
247 CudaFunctions *cu = device_hwctx->internal->cuda_dl;
253 err = cu->cuCtxPushCurrent(device_hwctx->cuda_ctx);
254 if (err != CUDA_SUCCESS)
255 return AVERROR_UNKNOWN;
257 for (i = 0; i < FF_ARRAY_ELEMS(src->data) && src->data[i]; i++) {
258 CUDA_MEMCPY2D cpy = {
259 .srcMemoryType = CU_MEMORYTYPE_HOST,
260 .dstMemoryType = CU_MEMORYTYPE_DEVICE,
261 .srcHost = src->data[i],
262 .dstDevice = (CUdeviceptr)dst->data[i],
263 .srcPitch = src->linesize[i],
264 .dstPitch = dst->linesize[i],
265 .WidthInBytes = FFMIN(src->linesize[i], dst->linesize[i]),
266 .Height = src->height >> (i ? priv->shift_height : 0),
269 err = cu->cuMemcpy2D(&cpy);
270 if (err != CUDA_SUCCESS) {
271 av_log(ctx, AV_LOG_ERROR, "Error transferring the data from the CUDA frame\n");
272 return AVERROR_UNKNOWN;
276 cu->cuCtxPopCurrent(&dummy);
281 static void cuda_device_uninit(AVHWDeviceContext *ctx)
283 AVCUDADeviceContext *hwctx = ctx->hwctx;
285 if (hwctx->internal) {
286 if (hwctx->internal->is_allocated && hwctx->cuda_ctx) {
287 hwctx->internal->cuda_dl->cuCtxDestroy(hwctx->cuda_ctx);
288 hwctx->cuda_ctx = NULL;
290 cuda_free_functions(&hwctx->internal->cuda_dl);
293 av_freep(&hwctx->internal);
296 static int cuda_device_init(AVHWDeviceContext *ctx)
298 AVCUDADeviceContext *hwctx = ctx->hwctx;
301 if (!hwctx->internal) {
302 hwctx->internal = av_mallocz(sizeof(*hwctx->internal));
303 if (!hwctx->internal)
304 return AVERROR(ENOMEM);
307 if (!hwctx->internal->cuda_dl) {
308 ret = cuda_load_functions(&hwctx->internal->cuda_dl);
310 av_log(ctx, AV_LOG_ERROR, "Could not dynamically load CUDA\n");
318 cuda_device_uninit(ctx);
322 static int cuda_device_create(AVHWDeviceContext *ctx, const char *device,
323 AVDictionary *opts, int flags)
325 AVCUDADeviceContext *hwctx = ctx->hwctx;
333 device_idx = strtol(device, NULL, 0);
335 if (cuda_device_init(ctx) < 0)
338 cu = hwctx->internal->cuda_dl;
341 if (err != CUDA_SUCCESS) {
342 av_log(ctx, AV_LOG_ERROR, "Could not initialize the CUDA driver API\n");
346 err = cu->cuDeviceGet(&cu_device, device_idx);
347 if (err != CUDA_SUCCESS) {
348 av_log(ctx, AV_LOG_ERROR, "Could not get the device number %d\n", device_idx);
352 err = cu->cuCtxCreate(&hwctx->cuda_ctx, CU_CTX_SCHED_BLOCKING_SYNC, cu_device);
353 if (err != CUDA_SUCCESS) {
354 av_log(ctx, AV_LOG_ERROR, "Error creating a CUDA context\n");
358 cu->cuCtxPopCurrent(&dummy);
360 hwctx->internal->is_allocated = 1;
365 cuda_device_uninit(ctx);
366 return AVERROR_UNKNOWN;
369 const HWContextType ff_hwcontext_type_cuda = {
370 .type = AV_HWDEVICE_TYPE_CUDA,
373 .device_hwctx_size = sizeof(AVCUDADeviceContext),
374 .frames_priv_size = sizeof(CUDAFramesContext),
376 .device_create = cuda_device_create,
377 .device_init = cuda_device_init,
378 .device_uninit = cuda_device_uninit,
379 .frames_init = cuda_frames_init,
380 .frames_get_buffer = cuda_get_buffer,
381 .transfer_get_formats = cuda_transfer_get_formats,
382 .transfer_data_to = cuda_transfer_data_to,
383 .transfer_data_from = cuda_transfer_data_from,
385 .pix_fmts = (const enum AVPixelFormat[]){ AV_PIX_FMT_CUDA, AV_PIX_FMT_NONE },