2 * VP9 HW decode acceleration through NVDEC
4 * Copyright (c) 2016 Timo Rothenpieler
6 * This file is part of FFmpeg.
8 * FFmpeg is free software; you can redistribute it and/or
9 * modify it under the terms of the GNU Lesser General Public
10 * License as published by the Free Software Foundation; either
11 * version 2.1 of the License, or (at your option) any later version.
13 * FFmpeg is distributed in the hope that it will be useful,
14 * but WITHOUT ANY WARRANTY; without even the implied warranty of
15 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
16 * Lesser General Public License for more details.
18 * You should have received a copy of the GNU Lesser General Public
19 * License along with FFmpeg; if not, write to the Free Software
20 * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
23 #include "libavutil/pixdesc.h"
29 #include "vp9shared.h"
31 static unsigned char get_ref_idx(AVFrame *frame)
36 if (!frame || !frame->private_ref)
39 fdd = (FrameDecodeData*)frame->private_ref->data;
40 cf = (NVDECFrame*)fdd->hwaccel_priv;
45 static int nvdec_vp9_start_frame(AVCodecContext *avctx, const uint8_t *buffer, uint32_t size)
47 VP9SharedContext *h = avctx->priv_data;
48 const AVPixFmtDescriptor *pixdesc = av_pix_fmt_desc_get(avctx->sw_pix_fmt);
50 NVDECContext *ctx = avctx->internal->hwaccel_priv_data;
51 CUVIDPICPARAMS *pp = &ctx->pic_params;
52 CUVIDVP9PICPARAMS *ppc = &pp->CodecSpecific.vp9;
55 AVFrame *cur_frame = h->frames[CUR_FRAME].tf.f;
59 ret = ff_nvdec_start_frame(avctx, cur_frame);
63 fdd = (FrameDecodeData*)cur_frame->private_ref->data;
64 cf = (NVDECFrame*)fdd->hwaccel_priv;
66 *pp = (CUVIDPICPARAMS) {
67 .PicWidthInMbs = (cur_frame->width + 15) / 16,
68 .FrameHeightInMbs = (cur_frame->height + 15) / 16,
69 .CurrPicIdx = cf->idx,
71 .CodecSpecific.vp9 = {
72 .width = cur_frame->width,
73 .height = cur_frame->height,
75 .LastRefIdx = get_ref_idx(h->refs[h->h.refidx[0]].f),
76 .GoldenRefIdx = get_ref_idx(h->refs[h->h.refidx[1]].f),
77 .AltRefIdx = get_ref_idx(h->refs[h->h.refidx[2]].f),
79 .profile = h->h.profile,
80 .frameContextIdx = h->h.framectxid,
81 .frameType = !h->h.keyframe,
82 .showFrame = !h->h.invisible,
83 .errorResilient = h->h.errorres,
84 .frameParallelDecoding = h->h.parallelmode,
85 .subSamplingX = pixdesc->log2_chroma_w,
86 .subSamplingY = pixdesc->log2_chroma_h,
87 .intraOnly = h->h.intraonly,
88 .allow_high_precision_mv = h->h.keyframe ? 0 : h->h.highprecisionmvs,
89 .refreshEntropyProbs = h->h.refreshctx,
91 .bitDepthMinus8Luma = pixdesc->comp[0].depth - 8,
92 .bitDepthMinus8Chroma = pixdesc->comp[1].depth - 8,
94 .loopFilterLevel = h->h.filter.level,
95 .loopFilterSharpness = h->h.filter.sharpness,
96 .modeRefLfEnabled = h->h.lf_delta.enabled,
98 .log2_tile_columns = h->h.tiling.log2_tile_cols,
99 .log2_tile_rows = h->h.tiling.log2_tile_rows,
101 .segmentEnabled = h->h.segmentation.enabled,
102 .segmentMapUpdate = h->h.segmentation.update_map,
103 .segmentMapTemporalUpdate = h->h.segmentation.temporal,
104 .segmentFeatureMode = h->h.segmentation.absolute_vals,
106 .qpYAc = h->h.yac_qi,
107 .qpYDc = h->h.ydc_qdelta,
108 .qpChDc = h->h.uvdc_qdelta,
109 .qpChAc = h->h.uvac_qdelta,
111 .resetFrameContext = h->h.resetctx,
112 .mcomp_filter_type = h->h.filtermode ^ (h->h.filtermode <= 1),
114 .frameTagSize = h->h.uncompressed_header_size,
115 .offsetToDctParts = h->h.compressed_header_size,
117 .refFrameSignBias[0] = 0,
121 for (i = 0; i < 2; i++)
122 ppc->mbModeLfDelta[i] = h->h.lf_delta.mode[i];
124 for (i = 0; i < 4; i++)
125 ppc->mbRefLfDelta[i] = h->h.lf_delta.ref[i];
127 for (i = 0; i < 7; i++)
128 ppc->mb_segment_tree_probs[i] = h->h.segmentation.prob[i];
130 for (i = 0; i < 3; i++) {
131 ppc->activeRefIdx[i] = h->h.refidx[i];
132 ppc->segment_pred_probs[i] = h->h.segmentation.pred_prob[i];
133 ppc->refFrameSignBias[i + 1] = h->h.signbias[i];
136 for (i = 0; i < 8; i++) {
137 ppc->segmentFeatureEnable[i][0] = h->h.segmentation.feat[i].q_enabled;
138 ppc->segmentFeatureEnable[i][1] = h->h.segmentation.feat[i].lf_enabled;
139 ppc->segmentFeatureEnable[i][2] = h->h.segmentation.feat[i].ref_enabled;
140 ppc->segmentFeatureEnable[i][3] = h->h.segmentation.feat[i].skip_enabled;
142 ppc->segmentFeatureData[i][0] = h->h.segmentation.feat[i].q_val;
143 ppc->segmentFeatureData[i][1] = h->h.segmentation.feat[i].lf_val;
144 ppc->segmentFeatureData[i][2] = h->h.segmentation.feat[i].ref_val;
145 ppc->segmentFeatureData[i][3] = 0;
148 switch (avctx->colorspace) {
150 case AVCOL_SPC_UNSPECIFIED:
153 case AVCOL_SPC_BT470BG:
156 case AVCOL_SPC_BT709:
159 case AVCOL_SPC_SMPTE170M:
162 case AVCOL_SPC_SMPTE240M:
165 case AVCOL_SPC_BT2020_NCL:
168 case AVCOL_SPC_RESERVED:
179 static int nvdec_vp9_end_frame(AVCodecContext *avctx)
181 NVDECContext *ctx = avctx->internal->hwaccel_priv_data;
182 int ret = ff_nvdec_end_frame(avctx);
183 ctx->bitstream = NULL;
187 static int nvdec_vp9_decode_slice(AVCodecContext *avctx, const uint8_t *buffer, uint32_t size)
189 NVDECContext *ctx = avctx->internal->hwaccel_priv_data;
192 tmp = av_fast_realloc(ctx->slice_offsets, &ctx->slice_offsets_allocated,
193 (ctx->nb_slices + 1) * sizeof(*ctx->slice_offsets));
195 return AVERROR(ENOMEM);
196 ctx->slice_offsets = tmp;
199 ctx->bitstream = (uint8_t*)buffer;
201 ctx->slice_offsets[ctx->nb_slices] = buffer - ctx->bitstream;
202 ctx->bitstream_len += size;
208 static int nvdec_vp9_frame_params(AVCodecContext *avctx,
209 AVBufferRef *hw_frames_ctx)
211 // VP9 uses a fixed size pool of 8 possible reference frames
212 return ff_nvdec_frame_params(avctx, hw_frames_ctx, 8);
215 AVHWAccel ff_vp9_nvdec_hwaccel = {
217 .type = AVMEDIA_TYPE_VIDEO,
218 .id = AV_CODEC_ID_VP9,
219 .pix_fmt = AV_PIX_FMT_CUDA,
220 .start_frame = nvdec_vp9_start_frame,
221 .end_frame = nvdec_vp9_end_frame,
222 .decode_slice = nvdec_vp9_decode_slice,
223 .frame_params = nvdec_vp9_frame_params,
224 .init = ff_nvdec_decode_init,
225 .uninit = ff_nvdec_decode_uninit,
226 .priv_data_size = sizeof(NVDECContext),