2 * This file is part of Libav.
4 * Libav is free software; you can redistribute it and/or
5 * modify it under the terms of the GNU Lesser General Public
6 * License as published by the Free Software Foundation; either
7 * version 2.1 of the License, or (at your option) any later version.
9 * Libav is distributed in the hope that it will be useful,
10 * but WITHOUT ANY WARRANTY; without even the implied warranty of
11 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
12 * Lesser General Public License for more details.
14 * You should have received a copy of the GNU Lesser General Public
15 * License along with Libav; if not, write to the Free Software
16 * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
21 #include "hwcontext.h"
22 #include "hwcontext_internal.h"
23 #include "hwcontext_cuda.h"
28 typedef struct CUDAFramesContext {
29 int shift_width, shift_height;
32 static const enum AVPixelFormat supported_formats[] = {
40 static int cuda_frames_get_constraints(AVHWDeviceContext *ctx,
42 AVHWFramesConstraints *constraints)
46 constraints->valid_sw_formats = av_malloc_array(FF_ARRAY_ELEMS(supported_formats) + 1,
47 sizeof(*constraints->valid_sw_formats));
48 if (!constraints->valid_sw_formats)
49 return AVERROR(ENOMEM);
51 for (i = 0; i < FF_ARRAY_ELEMS(supported_formats); i++)
52 constraints->valid_sw_formats[i] = supported_formats[i];
53 constraints->valid_sw_formats[FF_ARRAY_ELEMS(supported_formats)] = AV_PIX_FMT_NONE;
55 constraints->valid_hw_formats = av_malloc_array(2, sizeof(*constraints->valid_hw_formats));
56 if (!constraints->valid_hw_formats)
57 return AVERROR(ENOMEM);
59 constraints->valid_hw_formats[0] = AV_PIX_FMT_CUDA;
60 constraints->valid_hw_formats[1] = AV_PIX_FMT_NONE;
65 static void cuda_buffer_free(void *opaque, uint8_t *data)
67 AVHWFramesContext *ctx = opaque;
68 AVCUDADeviceContext *hwctx = ctx->device_ctx->hwctx;
72 cuCtxPushCurrent(hwctx->cuda_ctx);
74 cuMemFree((CUdeviceptr)data);
76 cuCtxPopCurrent(&dummy);
79 static AVBufferRef *cuda_pool_alloc(void *opaque, int size)
81 AVHWFramesContext *ctx = opaque;
82 AVCUDADeviceContext *hwctx = ctx->device_ctx->hwctx;
84 AVBufferRef *ret = NULL;
85 CUcontext dummy = NULL;
89 err = cuCtxPushCurrent(hwctx->cuda_ctx);
90 if (err != CUDA_SUCCESS) {
91 av_log(ctx, AV_LOG_ERROR, "Error setting current CUDA context\n");
95 err = cuMemAlloc(&data, size);
96 if (err != CUDA_SUCCESS)
99 ret = av_buffer_create((uint8_t*)data, size, cuda_buffer_free, ctx, 0);
106 cuCtxPopCurrent(&dummy);
110 static int cuda_frames_init(AVHWFramesContext *ctx)
112 CUDAFramesContext *priv = ctx->internal->priv;
115 for (i = 0; i < FF_ARRAY_ELEMS(supported_formats); i++) {
116 if (ctx->sw_format == supported_formats[i])
119 if (i == FF_ARRAY_ELEMS(supported_formats)) {
120 av_log(ctx, AV_LOG_ERROR, "Pixel format '%s' is not supported\n",
121 av_get_pix_fmt_name(ctx->sw_format));
122 return AVERROR(ENOSYS);
125 av_pix_fmt_get_chroma_sub_sample(ctx->sw_format, &priv->shift_width, &priv->shift_height);
130 switch (ctx->sw_format) {
131 case AV_PIX_FMT_NV12:
132 case AV_PIX_FMT_YUV420P:
133 size = ctx->width * ctx->height * 3 / 2;
135 case AV_PIX_FMT_P010:
136 size = ctx->width * ctx->height * 3;
138 case AV_PIX_FMT_YUV444P:
139 size = ctx->width * ctx->height * 3;
141 case AV_PIX_FMT_YUV444P16:
142 size = ctx->width * ctx->height * 6;
146 ctx->internal->pool_internal = av_buffer_pool_init2(size, ctx, cuda_pool_alloc, NULL);
147 if (!ctx->internal->pool_internal)
148 return AVERROR(ENOMEM);
154 static int cuda_get_buffer(AVHWFramesContext *ctx, AVFrame *frame)
156 frame->buf[0] = av_buffer_pool_get(ctx->pool);
158 return AVERROR(ENOMEM);
160 switch (ctx->sw_format) {
161 case AV_PIX_FMT_NV12:
162 frame->data[0] = frame->buf[0]->data;
163 frame->data[1] = frame->data[0] + ctx->width * ctx->height;
164 frame->linesize[0] = ctx->width;
165 frame->linesize[1] = ctx->width;
167 case AV_PIX_FMT_YUV420P:
168 frame->data[0] = frame->buf[0]->data;
169 frame->data[2] = frame->data[0] + ctx->width * ctx->height;
170 frame->data[1] = frame->data[2] + ctx->width * ctx->height / 4;
171 frame->linesize[0] = ctx->width;
172 frame->linesize[1] = ctx->width / 2;
173 frame->linesize[2] = ctx->width / 2;
175 case AV_PIX_FMT_P010:
176 frame->data[0] = frame->buf[0]->data;
177 frame->data[1] = frame->data[0] + 2 * ctx->width * ctx->height;
178 frame->linesize[0] = 2 * ctx->width;
179 frame->linesize[1] = 2 * ctx->width;
181 case AV_PIX_FMT_YUV444P:
182 frame->data[0] = frame->buf[0]->data;
183 frame->data[1] = frame->data[0] + ctx->width * ctx->height;
184 frame->data[2] = frame->data[1] + ctx->width * ctx->height;
185 frame->linesize[0] = ctx->width;
186 frame->linesize[1] = ctx->width;
187 frame->linesize[2] = ctx->width;
189 case AV_PIX_FMT_YUV444P16:
190 frame->data[0] = frame->buf[0]->data;
191 frame->data[1] = frame->data[0] + 2 * ctx->width * ctx->height;
192 frame->data[2] = frame->data[1] + 2 * ctx->width * ctx->height;
193 frame->linesize[0] = 2 * ctx->width;
194 frame->linesize[1] = 2 * ctx->width;
195 frame->linesize[2] = 2 * ctx->width;
198 av_frame_unref(frame);
202 frame->format = AV_PIX_FMT_CUDA;
203 frame->width = ctx->width;
204 frame->height = ctx->height;
209 static int cuda_transfer_get_formats(AVHWFramesContext *ctx,
210 enum AVHWFrameTransferDirection dir,
211 enum AVPixelFormat **formats)
213 enum AVPixelFormat *fmts;
215 fmts = av_malloc_array(2, sizeof(*fmts));
217 return AVERROR(ENOMEM);
219 fmts[0] = ctx->sw_format;
220 fmts[1] = AV_PIX_FMT_NONE;
227 static int cuda_transfer_data_from(AVHWFramesContext *ctx, AVFrame *dst,
230 CUDAFramesContext *priv = ctx->internal->priv;
231 AVCUDADeviceContext *device_hwctx = ctx->device_ctx->hwctx;
237 err = cuCtxPushCurrent(device_hwctx->cuda_ctx);
238 if (err != CUDA_SUCCESS)
239 return AVERROR_UNKNOWN;
241 for (i = 0; i < FF_ARRAY_ELEMS(src->data) && src->data[i]; i++) {
242 CUDA_MEMCPY2D cpy = {
243 .srcMemoryType = CU_MEMORYTYPE_DEVICE,
244 .dstMemoryType = CU_MEMORYTYPE_HOST,
245 .srcDevice = (CUdeviceptr)src->data[i],
246 .dstHost = dst->data[i],
247 .srcPitch = src->linesize[i],
248 .dstPitch = dst->linesize[i],
249 .WidthInBytes = FFMIN(src->linesize[i], dst->linesize[i]),
250 .Height = src->height >> (i ? priv->shift_height : 0),
253 err = cuMemcpy2D(&cpy);
254 if (err != CUDA_SUCCESS) {
255 av_log(ctx, AV_LOG_ERROR, "Error transferring the data from the CUDA frame\n");
256 return AVERROR_UNKNOWN;
260 cuCtxPopCurrent(&dummy);
265 static int cuda_transfer_data_to(AVHWFramesContext *ctx, AVFrame *dst,
268 CUDAFramesContext *priv = ctx->internal->priv;
269 AVCUDADeviceContext *device_hwctx = ctx->device_ctx->hwctx;
275 err = cuCtxPushCurrent(device_hwctx->cuda_ctx);
276 if (err != CUDA_SUCCESS)
277 return AVERROR_UNKNOWN;
279 for (i = 0; i < FF_ARRAY_ELEMS(src->data) && src->data[i]; i++) {
280 CUDA_MEMCPY2D cpy = {
281 .srcMemoryType = CU_MEMORYTYPE_HOST,
282 .dstMemoryType = CU_MEMORYTYPE_DEVICE,
283 .srcHost = src->data[i],
284 .dstDevice = (CUdeviceptr)dst->data[i],
285 .srcPitch = src->linesize[i],
286 .dstPitch = dst->linesize[i],
287 .WidthInBytes = FFMIN(src->linesize[i], dst->linesize[i]),
288 .Height = src->height >> (i ? priv->shift_height : 0),
291 err = cuMemcpy2D(&cpy);
292 if (err != CUDA_SUCCESS) {
293 av_log(ctx, AV_LOG_ERROR, "Error transferring the data from the CUDA frame\n");
294 return AVERROR_UNKNOWN;
298 cuCtxPopCurrent(&dummy);
303 static void cuda_device_free(AVHWDeviceContext *ctx)
305 AVCUDADeviceContext *hwctx = ctx->hwctx;
306 cuCtxDestroy(hwctx->cuda_ctx);
309 static int cuda_device_create(AVHWDeviceContext *ctx, const char *device,
310 AVDictionary *opts, int flags)
312 AVCUDADeviceContext *hwctx = ctx->hwctx;
319 device_idx = strtol(device, NULL, 0);
322 if (err != CUDA_SUCCESS) {
323 av_log(ctx, AV_LOG_ERROR, "Could not initialize the CUDA driver API\n");
324 return AVERROR_UNKNOWN;
327 err = cuDeviceGet(&cu_device, device_idx);
328 if (err != CUDA_SUCCESS) {
329 av_log(ctx, AV_LOG_ERROR, "Could not get the device number %d\n", device_idx);
330 return AVERROR_UNKNOWN;
333 err = cuCtxCreate(&hwctx->cuda_ctx, 0, cu_device);
334 if (err != CUDA_SUCCESS) {
335 av_log(ctx, AV_LOG_ERROR, "Error creating a CUDA context\n");
336 return AVERROR_UNKNOWN;
339 cuCtxPopCurrent(&dummy);
341 ctx->free = cuda_device_free;
346 const HWContextType ff_hwcontext_type_cuda = {
347 .type = AV_HWDEVICE_TYPE_CUDA,
350 .device_hwctx_size = sizeof(AVCUDADeviceContext),
351 .frames_priv_size = sizeof(CUDAFramesContext),
353 .device_create = cuda_device_create,
354 .frames_get_constraints = cuda_frames_get_constraints,
355 .frames_init = cuda_frames_init,
356 .frames_get_buffer = cuda_get_buffer,
357 .transfer_get_formats = cuda_transfer_get_formats,
358 .transfer_data_to = cuda_transfer_data_to,
359 .transfer_data_from = cuda_transfer_data_from,
361 .pix_fmts = (const enum AVPixelFormat[]){ AV_PIX_FMT_CUDA, AV_PIX_FMT_NONE },