]> git.sesse.net Git - ffmpeg/commitdiff
avcodec: Implement mpeg4 nvdec hwaccel
authorPhilip Langdale <philipl@overt.org>
Thu, 16 Nov 2017 04:59:29 +0000 (20:59 -0800)
committerPhilip Langdale <philipl@overt.org>
Mon, 20 Nov 2017 15:21:41 +0000 (07:21 -0800)
This was predictably nightmarish, given how ridiculous mpeg4 is.
I had to stare at the cuvid parser output for a long time to work
out what each field was supposed to be, and even then, I still don't
fully understand some of them. Particularly:

vop_coded: If I'm reading the decoder correctly, this flag will always
           be 1 as the decoder will not pass the hwaccel any frame
           where it is not 1.
divx_flags: There's obviously no documentation on what the possible
            flags are. I simply observed that this is '0' for a
            normal bitstream and '5' for packed b-frames.
gmc_enabled: I had a number of guesses as to what this mapped to.
             I picked the condition I did based on when the cuvid
             parser was setting flag.

Also note that as with the vdpau hwaccel, the decoder needs to
consume the entire frame and not the slice.

Changelog
configure
libavcodec/Makefile
libavcodec/allcodecs.c
libavcodec/h263dec.c
libavcodec/nvdec.c
libavcodec/nvdec_mpeg4.c [new file with mode: 0644]
libavcodec/version.h

index 5a9d183aeda239340090227610abe9ee5fa6a373..74ed35cfe6ba810dbfaeedd0fc5da48cdcf5eec3 100644 (file)
--- a/Changelog
+++ b/Changelog
@@ -13,7 +13,7 @@ version <next>:
 - PCE support for extended channel layouts in the AAC encoder
 - native aptX encoder and decoder
 - Raw aptX muxer and demuxer
-- NVIDIA NVDEC-accelerated H.264, HEVC, MPEG-1/2, VC1 and VP9 hwaccel decoding
+- NVIDIA NVDEC-accelerated H.264, HEVC, MPEG-1/2/4, VC1 and VP9 hwaccel decoding
 - Intel QSV-accelerated overlay filter
 - mcompand audio filter
 - acontrast audio filter
index 7ac9a8d3918a20713d1d1aad51bfeef85477d621..25c3124ca602134d3b8af07a08523b7b8645ead1 100755 (executable)
--- a/configure
+++ b/configure
@@ -2735,6 +2735,8 @@ mpeg2_xvmc_hwaccel_select="mpeg2video_decoder"
 mpeg4_cuvid_hwaccel_select="mpeg4_cuvid_decoder"
 mpeg4_mediacodec_hwaccel_deps="mediacodec"
 mpeg4_mmal_hwaccel_deps="mmal"
+mpeg4_nvdec_hwaccel_deps="nvdec"
+mpeg4_nvdec_hwaccel_select="mpeg4_decoder"
 mpeg4_vaapi_hwaccel_deps="vaapi"
 mpeg4_vaapi_hwaccel_select="mpeg4_decoder"
 mpeg4_vdpau_hwaccel_deps="vdpau"
index 0573454c7bff111330cc5d93b66969cf4b4b1900..2af957ab7279eeb181c12c05e5d83d0ec957f328 100644 (file)
@@ -861,6 +861,7 @@ OBJS-$(CONFIG_MPEG2_VAAPI_HWACCEL)        += vaapi_mpeg2.o
 OBJS-$(CONFIG_MPEG2_VDPAU_HWACCEL)        += vdpau_mpeg12.o
 OBJS-$(CONFIG_MPEG2_VIDEOTOOLBOX_HWACCEL) += videotoolbox.o
 OBJS-$(CONFIG_MPEG2_XVMC_HWACCEL)         += mpegvideo_xvmc.o
+OBJS-$(CONFIG_MPEG4_NVDEC_HWACCEL)        += nvdec_mpeg4.o
 OBJS-$(CONFIG_MPEG4_VAAPI_HWACCEL)        += vaapi_mpeg4.o
 OBJS-$(CONFIG_MPEG4_VDPAU_HWACCEL)        += vdpau_mpeg4.o
 OBJS-$(CONFIG_MPEG4_VIDEOTOOLBOX_HWACCEL) += videotoolbox.o
index e9df7049de2a11ed1617949149820c4ae28eb669..85c38c83aa46736f19a4f06f2f035c4a73e74e7c 100644 (file)
@@ -106,6 +106,7 @@ static void register_all(void)
     REGISTER_HWACCEL(MPEG4_CUVID,       mpeg4_cuvid);
     REGISTER_HWACCEL(MPEG4_MEDIACODEC,  mpeg4_mediacodec);
     REGISTER_HWACCEL(MPEG4_MMAL,        mpeg4_mmal);
+    REGISTER_HWACCEL(MPEG4_NVDEC,       mpeg4_nvdec);
     REGISTER_HWACCEL(MPEG4_VAAPI,       mpeg4_vaapi);
     REGISTER_HWACCEL(MPEG4_VDPAU,       mpeg4_vdpau);
     REGISTER_HWACCEL(MPEG4_VIDEOTOOLBOX, mpeg4_videotoolbox);
index c7cf4bc0c2e3fb65cb296343048bfbd8145fcb7e..b222de793b8c42b55d0b102c51b609ec1702ae76 100644 (file)
@@ -714,6 +714,9 @@ const enum AVPixelFormat ff_h263_hwaccel_pixfmt_list_420[] = {
 #if CONFIG_H263_VAAPI_HWACCEL || CONFIG_MPEG4_VAAPI_HWACCEL
     AV_PIX_FMT_VAAPI,
 #endif
+#if CONFIG_MPEG4_NVDEC_HWACCEL
+    AV_PIX_FMT_CUDA,
+#endif
 #if CONFIG_MPEG4_VDPAU_HWACCEL
     AV_PIX_FMT_VDPAU,
 #endif
index d5cf1058cb7b41609d6a088e5482492d4443188d..efcd47a7f752eeaa1bd9950e7b9c1b89b40ce02d 100644 (file)
@@ -56,6 +56,7 @@ static int map_avcodec_id(enum AVCodecID id)
     case AV_CODEC_ID_HEVC:       return cudaVideoCodec_HEVC;
     case AV_CODEC_ID_MPEG1VIDEO: return cudaVideoCodec_MPEG1;
     case AV_CODEC_ID_MPEG2VIDEO: return cudaVideoCodec_MPEG2;
+    case AV_CODEC_ID_MPEG4:      return cudaVideoCodec_MPEG4;
     case AV_CODEC_ID_VC1:        return cudaVideoCodec_VC1;
     case AV_CODEC_ID_VP9:        return cudaVideoCodec_VP9;
     case AV_CODEC_ID_WMV3:       return cudaVideoCodec_VC1;
diff --git a/libavcodec/nvdec_mpeg4.c b/libavcodec/nvdec_mpeg4.c
new file mode 100644 (file)
index 0000000..a0f9280
--- /dev/null
@@ -0,0 +1,121 @@
+/*
+ * MPEG-4 Part 2 HW decode acceleration through NVDEC
+ *
+ * Copyright (c) 2017 Philip Langdale
+ *
+ * This file is part of FFmpeg.
+ *
+ * FFmpeg is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU Lesser General Public
+ * License as published by the Free Software Foundation; either
+ * version 2.1 of the License, or (at your option) any later version.
+ *
+ * FFmpeg is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+ * Lesser General Public License for more details.
+ *
+ * You should have received a copy of the GNU Lesser General Public
+ * License along with FFmpeg; if not, write to the Free Software
+ * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
+ */
+
+#include "avcodec.h"
+#include "mpeg4video.h"
+#include "nvdec.h"
+#include "decode.h"
+
+static int nvdec_mpeg4_start_frame(AVCodecContext *avctx, const uint8_t *buffer, uint32_t size)
+{
+    Mpeg4DecContext *m = avctx->priv_data;
+    MpegEncContext *s = &m->m;
+
+    NVDECContext      *ctx = avctx->internal->hwaccel_priv_data;
+    CUVIDPICPARAMS     *pp = &ctx->pic_params;
+    CUVIDMPEG4PICPARAMS *ppc = &pp->CodecSpecific.mpeg4;
+    FrameDecodeData *fdd;
+    NVDECFrame *cf;
+    AVFrame *cur_frame = s->current_picture.f;
+
+    int ret, i;
+
+    ret = ff_nvdec_start_frame(avctx, cur_frame);
+    if (ret < 0)
+        return ret;
+
+    fdd = (FrameDecodeData*)cur_frame->private_ref->data;
+    cf  = (NVDECFrame*)fdd->hwaccel_priv;
+
+    *pp = (CUVIDPICPARAMS) {
+        .PicWidthInMbs     = (cur_frame->width  + 15) / 16,
+        .FrameHeightInMbs  = (cur_frame->height + 15) / 16,
+        .CurrPicIdx        = cf->idx,
+
+        .intra_pic_flag    = s->pict_type == AV_PICTURE_TYPE_I,
+        .ref_pic_flag      = s->pict_type == AV_PICTURE_TYPE_I ||
+                             s->pict_type == AV_PICTURE_TYPE_P ||
+                             s->pict_type == AV_PICTURE_TYPE_S,
+
+        .CodecSpecific.mpeg4 = {
+            .ForwardRefIdx                = ff_nvdec_get_ref_idx(s->last_picture.f),
+            .BackwardRefIdx               = ff_nvdec_get_ref_idx(s->next_picture.f),
+
+            .video_object_layer_width     = s->width,
+            .video_object_layer_height    = s->height,
+            .vop_time_increment_bitcount  = m->time_increment_bits,
+            .top_field_first              = s->top_field_first,
+            .resync_marker_disable        = !m->resync_marker,
+            .quant_type                   = s->mpeg_quant,
+            .quarter_sample               = s->quarter_sample,
+            .short_video_header           = avctx->codec->id == AV_CODEC_ID_H263,
+            .divx_flags                   = s->divx_packed ? 5 : 0,
+
+            .vop_coding_type              = s->pict_type - AV_PICTURE_TYPE_I,
+            .vop_coded                    = 1,
+            .vop_rounding_type            = s->no_rounding,
+            .alternate_vertical_scan_flag = s->alternate_scan,
+            .interlaced                   = !s->progressive_sequence,
+            .vop_fcode_forward            = s->f_code,
+            .vop_fcode_backward           = s->b_code,
+            .trd                          = { s->pp_time, s->pp_field_time >> 1 },
+            .trb                          = { s->pb_time, s->pb_field_time >> 1 },
+
+            .gmc_enabled                  = s->pict_type == AV_PICTURE_TYPE_S &&
+                                            m->vol_sprite_usage == GMC_SPRITE,
+        }
+    };
+
+    for (i = 0; i < 64; ++i) {
+        ppc->QuantMatrixIntra[i] = s->intra_matrix[i];
+        ppc->QuantMatrixInter[i] = s->inter_matrix[i];
+    }
+
+    // We need to pass the full frame buffer and not just the slice
+    return ff_nvdec_simple_decode_slice(avctx, buffer, size);
+}
+
+static int nvdec_mpeg4_decode_slice(AVCodecContext *avctx, const uint8_t *buffer, uint32_t size)
+{
+    return 0;
+}
+
+static int nvdec_mpeg4_frame_params(AVCodecContext *avctx,
+                                  AVBufferRef *hw_frames_ctx)
+{
+    // Each frame can at most have one P and one B reference
+    return ff_nvdec_frame_params(avctx, hw_frames_ctx, 2);
+}
+
+AVHWAccel ff_mpeg4_nvdec_hwaccel = {
+    .name                 = "mpeg4_nvdec",
+    .type                 = AVMEDIA_TYPE_VIDEO,
+    .id                   = AV_CODEC_ID_MPEG4,
+    .pix_fmt              = AV_PIX_FMT_CUDA,
+    .start_frame          = nvdec_mpeg4_start_frame,
+    .end_frame            = ff_nvdec_simple_end_frame,
+    .decode_slice         = nvdec_mpeg4_decode_slice,
+    .frame_params         = nvdec_mpeg4_frame_params,
+    .init                 = ff_nvdec_decode_init,
+    .uninit               = ff_nvdec_decode_uninit,
+    .priv_data_size       = sizeof(NVDECContext),
+};
index ff54670ea90b6ef01102dc480dc9534ab6189007..c8550bca9a5756cbbd394bb0ca08edca02278995 100644 (file)
@@ -29,7 +29,7 @@
 
 #define LIBAVCODEC_VERSION_MAJOR  58
 #define LIBAVCODEC_VERSION_MINOR   3
-#define LIBAVCODEC_VERSION_MICRO 104
+#define LIBAVCODEC_VERSION_MICRO 105
 
 #define LIBAVCODEC_VERSION_INT  AV_VERSION_INT(LIBAVCODEC_VERSION_MAJOR, \
                                                LIBAVCODEC_VERSION_MINOR, \