2 * This file is part of FFmpeg.
4 * FFmpeg is free software; you can redistribute it and/or
5 * modify it under the terms of the GNU Lesser General Public
6 * License as published by the Free Software Foundation; either
7 * version 2.1 of the License, or (at your option) any later version.
9 * FFmpeg is distributed in the hope that it will be useful,
10 * but WITHOUT ANY WARRANTY; without even the implied warranty of
11 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
12 * Lesser General Public License for more details.
14 * You should have received a copy of the GNU Lesser General Public
15 * License along with FFmpeg; if not, write to the Free Software
16 * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
21 #include "hwcontext.h"
22 #include "hwcontext_internal.h"
23 #include "hwcontext_cuda_internal.h"
28 #define CUDA_FRAME_ALIGNMENT 256
30 typedef struct CUDAFramesContext {
31 int shift_width, shift_height;
34 static const enum AVPixelFormat supported_formats[] = {
43 static int cuda_frames_get_constraints(AVHWDeviceContext *ctx,
45 AVHWFramesConstraints *constraints)
49 constraints->valid_sw_formats = av_malloc_array(FF_ARRAY_ELEMS(supported_formats) + 1,
50 sizeof(*constraints->valid_sw_formats));
51 if (!constraints->valid_sw_formats)
52 return AVERROR(ENOMEM);
54 for (i = 0; i < FF_ARRAY_ELEMS(supported_formats); i++)
55 constraints->valid_sw_formats[i] = supported_formats[i];
56 constraints->valid_sw_formats[FF_ARRAY_ELEMS(supported_formats)] = AV_PIX_FMT_NONE;
58 constraints->valid_hw_formats = av_malloc_array(2, sizeof(*constraints->valid_hw_formats));
59 if (!constraints->valid_hw_formats)
60 return AVERROR(ENOMEM);
62 constraints->valid_hw_formats[0] = AV_PIX_FMT_CUDA;
63 constraints->valid_hw_formats[1] = AV_PIX_FMT_NONE;
68 static void cuda_buffer_free(void *opaque, uint8_t *data)
70 AVHWFramesContext *ctx = opaque;
71 AVCUDADeviceContext *hwctx = ctx->device_ctx->hwctx;
72 CudaFunctions *cu = hwctx->internal->cuda_dl;
76 cu->cuCtxPushCurrent(hwctx->cuda_ctx);
78 cu->cuMemFree((CUdeviceptr)data);
80 cu->cuCtxPopCurrent(&dummy);
83 static AVBufferRef *cuda_pool_alloc(void *opaque, int size)
85 AVHWFramesContext *ctx = opaque;
86 AVCUDADeviceContext *hwctx = ctx->device_ctx->hwctx;
87 CudaFunctions *cu = hwctx->internal->cuda_dl;
89 AVBufferRef *ret = NULL;
90 CUcontext dummy = NULL;
94 err = cu->cuCtxPushCurrent(hwctx->cuda_ctx);
95 if (err != CUDA_SUCCESS) {
96 av_log(ctx, AV_LOG_ERROR, "Error setting current CUDA context\n");
100 err = cu->cuMemAlloc(&data, size);
101 if (err != CUDA_SUCCESS)
104 ret = av_buffer_create((uint8_t*)data, size, cuda_buffer_free, ctx, 0);
111 cu->cuCtxPopCurrent(&dummy);
115 static int cuda_frames_init(AVHWFramesContext *ctx)
117 CUDAFramesContext *priv = ctx->internal->priv;
118 int aligned_width = FFALIGN(ctx->width, CUDA_FRAME_ALIGNMENT);
121 for (i = 0; i < FF_ARRAY_ELEMS(supported_formats); i++) {
122 if (ctx->sw_format == supported_formats[i])
125 if (i == FF_ARRAY_ELEMS(supported_formats)) {
126 av_log(ctx, AV_LOG_ERROR, "Pixel format '%s' is not supported\n",
127 av_get_pix_fmt_name(ctx->sw_format));
128 return AVERROR(ENOSYS);
131 av_pix_fmt_get_chroma_sub_sample(ctx->sw_format, &priv->shift_width, &priv->shift_height);
136 switch (ctx->sw_format) {
137 case AV_PIX_FMT_NV12:
138 case AV_PIX_FMT_YUV420P:
139 size = aligned_width * ctx->height * 3 / 2;
141 case AV_PIX_FMT_YUV444P:
142 case AV_PIX_FMT_P010:
143 case AV_PIX_FMT_P016:
144 size = aligned_width * ctx->height * 3;
146 case AV_PIX_FMT_YUV444P16:
147 size = aligned_width * ctx->height * 6;
150 av_log(ctx, AV_LOG_ERROR, "BUG: Pixel format missing from size calculation.");
154 ctx->internal->pool_internal = av_buffer_pool_init2(size, ctx, cuda_pool_alloc, NULL);
155 if (!ctx->internal->pool_internal)
156 return AVERROR(ENOMEM);
162 static int cuda_get_buffer(AVHWFramesContext *ctx, AVFrame *frame)
165 int width_in_bytes = ctx->width;
167 if (ctx->sw_format == AV_PIX_FMT_P010 ||
168 ctx->sw_format == AV_PIX_FMT_P016 ||
169 ctx->sw_format == AV_PIX_FMT_YUV444P16) {
172 aligned_width = FFALIGN(width_in_bytes, CUDA_FRAME_ALIGNMENT);
174 frame->buf[0] = av_buffer_pool_get(ctx->pool);
176 return AVERROR(ENOMEM);
178 switch (ctx->sw_format) {
179 case AV_PIX_FMT_NV12:
180 case AV_PIX_FMT_P010:
181 case AV_PIX_FMT_P016:
182 frame->data[0] = frame->buf[0]->data;
183 frame->data[1] = frame->data[0] + aligned_width * ctx->height;
184 frame->linesize[0] = aligned_width;
185 frame->linesize[1] = aligned_width;
187 case AV_PIX_FMT_YUV420P:
188 frame->data[0] = frame->buf[0]->data;
189 frame->data[2] = frame->data[0] + aligned_width * ctx->height;
190 frame->data[1] = frame->data[2] + aligned_width * ctx->height / 4;
191 frame->linesize[0] = aligned_width;
192 frame->linesize[1] = aligned_width / 2;
193 frame->linesize[2] = aligned_width / 2;
195 case AV_PIX_FMT_YUV444P:
196 case AV_PIX_FMT_YUV444P16:
197 frame->data[0] = frame->buf[0]->data;
198 frame->data[1] = frame->data[0] + aligned_width * ctx->height;
199 frame->data[2] = frame->data[1] + aligned_width * ctx->height;
200 frame->linesize[0] = aligned_width;
201 frame->linesize[1] = aligned_width;
202 frame->linesize[2] = aligned_width;
205 av_frame_unref(frame);
209 frame->format = AV_PIX_FMT_CUDA;
210 frame->width = ctx->width;
211 frame->height = ctx->height;
216 static int cuda_transfer_get_formats(AVHWFramesContext *ctx,
217 enum AVHWFrameTransferDirection dir,
218 enum AVPixelFormat **formats)
220 enum AVPixelFormat *fmts;
222 fmts = av_malloc_array(2, sizeof(*fmts));
224 return AVERROR(ENOMEM);
226 fmts[0] = ctx->sw_format;
227 fmts[1] = AV_PIX_FMT_NONE;
234 static int cuda_transfer_data_from(AVHWFramesContext *ctx, AVFrame *dst,
237 CUDAFramesContext *priv = ctx->internal->priv;
238 AVCUDADeviceContext *device_hwctx = ctx->device_ctx->hwctx;
239 CudaFunctions *cu = device_hwctx->internal->cuda_dl;
245 err = cu->cuCtxPushCurrent(device_hwctx->cuda_ctx);
246 if (err != CUDA_SUCCESS)
247 return AVERROR_UNKNOWN;
249 for (i = 0; i < FF_ARRAY_ELEMS(src->data) && src->data[i]; i++) {
250 CUDA_MEMCPY2D cpy = {
251 .srcMemoryType = CU_MEMORYTYPE_DEVICE,
252 .dstMemoryType = CU_MEMORYTYPE_HOST,
253 .srcDevice = (CUdeviceptr)src->data[i],
254 .dstHost = dst->data[i],
255 .srcPitch = src->linesize[i],
256 .dstPitch = dst->linesize[i],
257 .WidthInBytes = FFMIN(src->linesize[i], dst->linesize[i]),
258 .Height = src->height >> (i ? priv->shift_height : 0),
261 err = cu->cuMemcpy2D(&cpy);
262 if (err != CUDA_SUCCESS) {
263 av_log(ctx, AV_LOG_ERROR, "Error transferring the data from the CUDA frame\n");
264 return AVERROR_UNKNOWN;
268 cu->cuCtxPopCurrent(&dummy);
273 static int cuda_transfer_data_to(AVHWFramesContext *ctx, AVFrame *dst,
276 CUDAFramesContext *priv = ctx->internal->priv;
277 AVCUDADeviceContext *device_hwctx = ctx->device_ctx->hwctx;
278 CudaFunctions *cu = device_hwctx->internal->cuda_dl;
284 err = cu->cuCtxPushCurrent(device_hwctx->cuda_ctx);
285 if (err != CUDA_SUCCESS)
286 return AVERROR_UNKNOWN;
288 for (i = 0; i < FF_ARRAY_ELEMS(src->data) && src->data[i]; i++) {
289 CUDA_MEMCPY2D cpy = {
290 .srcMemoryType = CU_MEMORYTYPE_HOST,
291 .dstMemoryType = CU_MEMORYTYPE_DEVICE,
292 .srcHost = src->data[i],
293 .dstDevice = (CUdeviceptr)dst->data[i],
294 .srcPitch = src->linesize[i],
295 .dstPitch = dst->linesize[i],
296 .WidthInBytes = FFMIN(src->linesize[i], dst->linesize[i]),
297 .Height = src->height >> (i ? priv->shift_height : 0),
300 err = cu->cuMemcpy2D(&cpy);
301 if (err != CUDA_SUCCESS) {
302 av_log(ctx, AV_LOG_ERROR, "Error transferring the data from the CUDA frame\n");
303 return AVERROR_UNKNOWN;
307 cu->cuCtxPopCurrent(&dummy);
312 static void cuda_device_uninit(AVHWDeviceContext *ctx)
314 AVCUDADeviceContext *hwctx = ctx->hwctx;
316 if (hwctx->internal) {
317 if (hwctx->internal->is_allocated && hwctx->cuda_ctx) {
318 hwctx->internal->cuda_dl->cuCtxDestroy(hwctx->cuda_ctx);
319 hwctx->cuda_ctx = NULL;
321 cuda_free_functions(&hwctx->internal->cuda_dl);
324 av_freep(&hwctx->internal);
327 static int cuda_device_init(AVHWDeviceContext *ctx)
329 AVCUDADeviceContext *hwctx = ctx->hwctx;
332 if (!hwctx->internal) {
333 hwctx->internal = av_mallocz(sizeof(*hwctx->internal));
334 if (!hwctx->internal)
335 return AVERROR(ENOMEM);
338 if (!hwctx->internal->cuda_dl) {
339 ret = cuda_load_functions(&hwctx->internal->cuda_dl);
341 av_log(ctx, AV_LOG_ERROR, "Could not dynamically load CUDA\n");
349 cuda_device_uninit(ctx);
353 static int cuda_device_create(AVHWDeviceContext *ctx, const char *device,
354 AVDictionary *opts, int flags)
356 AVCUDADeviceContext *hwctx = ctx->hwctx;
364 device_idx = strtol(device, NULL, 0);
366 if (cuda_device_init(ctx) < 0)
369 cu = hwctx->internal->cuda_dl;
372 if (err != CUDA_SUCCESS) {
373 av_log(ctx, AV_LOG_ERROR, "Could not initialize the CUDA driver API\n");
377 err = cu->cuDeviceGet(&cu_device, device_idx);
378 if (err != CUDA_SUCCESS) {
379 av_log(ctx, AV_LOG_ERROR, "Could not get the device number %d\n", device_idx);
383 err = cu->cuCtxCreate(&hwctx->cuda_ctx, CU_CTX_SCHED_BLOCKING_SYNC, cu_device);
384 if (err != CUDA_SUCCESS) {
385 av_log(ctx, AV_LOG_ERROR, "Error creating a CUDA context\n");
389 cu->cuCtxPopCurrent(&dummy);
391 hwctx->internal->is_allocated = 1;
396 cuda_device_uninit(ctx);
397 return AVERROR_UNKNOWN;
400 const HWContextType ff_hwcontext_type_cuda = {
401 .type = AV_HWDEVICE_TYPE_CUDA,
404 .device_hwctx_size = sizeof(AVCUDADeviceContext),
405 .frames_priv_size = sizeof(CUDAFramesContext),
407 .device_create = cuda_device_create,
408 .device_init = cuda_device_init,
409 .device_uninit = cuda_device_uninit,
410 .frames_get_constraints = cuda_frames_get_constraints,
411 .frames_init = cuda_frames_init,
412 .frames_get_buffer = cuda_get_buffer,
413 .transfer_get_formats = cuda_transfer_get_formats,
414 .transfer_data_to = cuda_transfer_data_to,
415 .transfer_data_from = cuda_transfer_data_from,
417 .pix_fmts = (const enum AVPixelFormat[]){ AV_PIX_FMT_CUDA, AV_PIX_FMT_NONE },