git.sesse.net Git - ffmpeg/blob - libavcodec/nvdec_vp9.c

   1 /*
   2  * VP9 HW decode acceleration through NVDEC
   3  *
   4  * Copyright (c) 2016 Timo Rothenpieler
   5  *
   6  * This file is part of FFmpeg.
   7  *
   8  * FFmpeg is free software; you can redistribute it and/or
   9  * modify it under the terms of the GNU Lesser General Public
  10  * License as published by the Free Software Foundation; either
  11  * version 2.1 of the License, or (at your option) any later version.
  12  *
  13  * FFmpeg is distributed in the hope that it will be useful,
  14  * but WITHOUT ANY WARRANTY; without even the implied warranty of
  15  * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
  16  * Lesser General Public License for more details.
  17  *
  18  * You should have received a copy of the GNU Lesser General Public
  19  * License along with FFmpeg; if not, write to the Free Software
  20  * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
  21  */
  22
  23 #include "libavutil/pixdesc.h"
  24
  25 #include "avcodec.h"
  26 #include "nvdec.h"
  27 #include "decode.h"
  28 #include "internal.h"
  29 #include "vp9shared.h"
  30
  31 static unsigned char get_ref_idx(AVFrame *frame)
  32 {
  33     FrameDecodeData *fdd;
  34     NVDECFrame *cf;
  35
  36     if (!frame || !frame->private_ref)
  37         return 255;
  38
  39     fdd = (FrameDecodeData*)frame->private_ref->data;
  40     cf  = (NVDECFrame*)fdd->hwaccel_priv;
  41
  42     return cf->idx;
  43 }
  44
  45 static int nvdec_vp9_start_frame(AVCodecContext *avctx, const uint8_t *buffer, uint32_t size)
  46 {
  47     VP9SharedContext *h = avctx->priv_data;
  48     const AVPixFmtDescriptor *pixdesc = av_pix_fmt_desc_get(avctx->sw_pix_fmt);
  49
  50     NVDECContext      *ctx = avctx->internal->hwaccel_priv_data;
  51     CUVIDPICPARAMS     *pp = &ctx->pic_params;
  52     CUVIDVP9PICPARAMS *ppc = &pp->CodecSpecific.vp9;
  53     FrameDecodeData *fdd;
  54     NVDECFrame *cf;
  55     AVFrame *cur_frame = h->frames[CUR_FRAME].tf.f;
  56
  57     int ret, i;
  58
  59     ret = ff_nvdec_start_frame(avctx, cur_frame);
  60     if (ret < 0)
  61         return ret;
  62
  63     fdd = (FrameDecodeData*)cur_frame->private_ref->data;
  64     cf  = (NVDECFrame*)fdd->hwaccel_priv;
  65
  66     *pp = (CUVIDPICPARAMS) {
  67         .PicWidthInMbs     = (cur_frame->width  + 15) / 16,
  68         .FrameHeightInMbs  = (cur_frame->height + 15) / 16,
  69         .CurrPicIdx        = cf->idx,
  70
  71         .CodecSpecific.vp9 = {
  72             .width                    = cur_frame->width,
  73             .height                   = cur_frame->height,
  74
  75             .LastRefIdx               = get_ref_idx(h->refs[h->h.refidx[0]].f),
  76             .GoldenRefIdx             = get_ref_idx(h->refs[h->h.refidx[1]].f),
  77             .AltRefIdx                = get_ref_idx(h->refs[h->h.refidx[2]].f),
  78
  79             .profile                  = h->h.profile,
  80             .frameContextIdx          = h->h.framectxid,
  81             .frameType                = !h->h.keyframe,
  82             .showFrame                = !h->h.invisible,
  83             .errorResilient           = h->h.errorres,
  84             .frameParallelDecoding    = h->h.parallelmode,
  85             .subSamplingX             = pixdesc->log2_chroma_w,
  86             .subSamplingY             = pixdesc->log2_chroma_h,
  87             .intraOnly                = h->h.intraonly,
  88             .allow_high_precision_mv  = h->h.keyframe ? 0 : h->h.highprecisionmvs,
  89             .refreshEntropyProbs      = h->h.refreshctx,
  90
  91             .bitDepthMinus8Luma       = pixdesc->comp[0].depth - 8,
  92             .bitDepthMinus8Chroma     = pixdesc->comp[1].depth - 8,
  93
  94             .loopFilterLevel          = h->h.filter.level,
  95             .loopFilterSharpness      = h->h.filter.sharpness,
  96             .modeRefLfEnabled         = h->h.lf_delta.enabled,
  97
  98             .log2_tile_columns        = h->h.tiling.log2_tile_cols,
  99             .log2_tile_rows           = h->h.tiling.log2_tile_rows,
 100
 101             .segmentEnabled           = h->h.segmentation.enabled,
 102             .segmentMapUpdate         = h->h.segmentation.update_map,
 103             .segmentMapTemporalUpdate = h->h.segmentation.temporal,
 104             .segmentFeatureMode       = h->h.segmentation.absolute_vals,
 105
 106             .qpYAc                    = h->h.yac_qi,
 107             .qpYDc                    = h->h.ydc_qdelta,
 108             .qpChDc                   = h->h.uvdc_qdelta,
 109             .qpChAc                   = h->h.uvac_qdelta,
 110
 111             .resetFrameContext        = h->h.resetctx,
 112             .mcomp_filter_type        = h->h.filtermode ^ (h->h.filtermode <= 1),
 113
 114             .frameTagSize             = h->h.uncompressed_header_size,
 115             .offsetToDctParts         = h->h.compressed_header_size,
 116
 117             .refFrameSignBias[0]      = 0,
 118         }
 119     };
 120
 121     for (i = 0; i < 2; i++)
 122         ppc->mbModeLfDelta[i] = h->h.lf_delta.mode[i];
 123
 124     for (i = 0; i < 4; i++)
 125         ppc->mbRefLfDelta[i] = h->h.lf_delta.ref[i];
 126
 127     for (i = 0; i < 7; i++)
 128         ppc->mb_segment_tree_probs[i] = h->h.segmentation.prob[i];
 129
 130     for (i = 0; i < 3; i++) {
 131         ppc->activeRefIdx[i] = h->h.refidx[i];
 132         ppc->segment_pred_probs[i] = h->h.segmentation.pred_prob[i];
 133         ppc->refFrameSignBias[i + 1] = h->h.signbias[i];
 134     }
 135
 136     for (i = 0; i < 8; i++) {
 137         ppc->segmentFeatureEnable[i][0] = h->h.segmentation.feat[i].q_enabled;
 138         ppc->segmentFeatureEnable[i][1] = h->h.segmentation.feat[i].lf_enabled;
 139         ppc->segmentFeatureEnable[i][2] = h->h.segmentation.feat[i].ref_enabled;
 140         ppc->segmentFeatureEnable[i][3] = h->h.segmentation.feat[i].skip_enabled;
 141
 142         ppc->segmentFeatureData[i][0] = h->h.segmentation.feat[i].q_val;
 143         ppc->segmentFeatureData[i][1] = h->h.segmentation.feat[i].lf_val;
 144         ppc->segmentFeatureData[i][2] = h->h.segmentation.feat[i].ref_val;
 145         ppc->segmentFeatureData[i][3] = 0;
 146     }
 147
 148     switch (avctx->colorspace) {
 149     default:
 150     case AVCOL_SPC_UNSPECIFIED:
 151         ppc->colorSpace = 0;
 152         break;
 153     case AVCOL_SPC_BT470BG:
 154         ppc->colorSpace = 1;
 155         break;
 156     case AVCOL_SPC_BT709:
 157         ppc->colorSpace = 2;
 158         break;
 159     case AVCOL_SPC_SMPTE170M:
 160         ppc->colorSpace = 3;
 161         break;
 162     case AVCOL_SPC_SMPTE240M:
 163         ppc->colorSpace = 4;
 164         break;
 165     case AVCOL_SPC_BT2020_NCL:
 166         ppc->colorSpace = 5;
 167         break;
 168     case AVCOL_SPC_RESERVED:
 169         ppc->colorSpace = 6;
 170         break;
 171     case AVCOL_SPC_RGB:
 172         ppc->colorSpace = 7;
 173         break;
 174     }
 175
 176     return 0;
 177 }
 178
 179 static int nvdec_vp9_end_frame(AVCodecContext *avctx)
 180 {
 181     NVDECContext *ctx = avctx->internal->hwaccel_priv_data;
 182     int ret = ff_nvdec_end_frame(avctx);
 183     ctx->bitstream = NULL;
 184     return ret;
 185 }
 186
 187 static int nvdec_vp9_decode_slice(AVCodecContext *avctx, const uint8_t *buffer, uint32_t size)
 188 {
 189     NVDECContext *ctx = avctx->internal->hwaccel_priv_data;
 190     void *tmp;
 191
 192     tmp = av_fast_realloc(ctx->slice_offsets, &ctx->slice_offsets_allocated,
 193                           (ctx->nb_slices + 1) * sizeof(*ctx->slice_offsets));
 194     if (!tmp)
 195         return AVERROR(ENOMEM);
 196     ctx->slice_offsets = tmp;
 197
 198     if (!ctx->bitstream)
 199         ctx->bitstream = (uint8_t*)buffer;
 200
 201     ctx->slice_offsets[ctx->nb_slices] = buffer - ctx->bitstream;
 202     ctx->bitstream_len += size;
 203     ctx->nb_slices++;
 204
 205     return 0;
 206 }
 207
 208 static int nvdec_vp9_frame_params(AVCodecContext *avctx,
 209                                   AVBufferRef *hw_frames_ctx)
 210 {
 211     // VP9 uses a fixed size pool of 8 possible reference frames
 212     return ff_nvdec_frame_params(avctx, hw_frames_ctx, 8);
 213 }
 214
 215 AVHWAccel ff_vp9_nvdec_hwaccel = {
 216     .name                 = "vp9_nvdec",
 217     .type                 = AVMEDIA_TYPE_VIDEO,
 218     .id                   = AV_CODEC_ID_VP9,
 219     .pix_fmt              = AV_PIX_FMT_CUDA,
 220     .start_frame          = nvdec_vp9_start_frame,
 221     .end_frame            = nvdec_vp9_end_frame,
 222     .decode_slice         = nvdec_vp9_decode_slice,
 223     .frame_params         = nvdec_vp9_frame_params,
 224     .init                 = ff_nvdec_decode_init,
 225     .uninit               = ff_nvdec_decode_uninit,
 226     .priv_data_size       = sizeof(NVDECContext),
 227 };