2 * MPEG-H Part 2 / HEVC / H.265 HW decode acceleration through VDPAU
4 * Copyright (c) 2013 Philip Langdale
6 * This file is part of FFmpeg.
8 * FFmpeg is free software; you can redistribute it and/or
9 * modify it under the terms of the GNU Lesser General Public
10 * License as published by the Free Software Foundation; either
11 * version 2.1 of the License, or (at your option) any later version.
13 * FFmpeg is distributed in the hope that it will be useful,
14 * but WITHOUT ANY WARRANTY; without even the implied warranty of
15 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
16 * Lesser General Public License for more details.
18 * You should have received a copy of the GNU Lesser General Public
19 * License along with FFmpeg; if not, write to the Free Software Foundation,
20 * Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
23 #include <vdpau/vdpau.h>
27 #include "hevc_data.h"
31 #include "vdpau_internal.h"
33 static int vdpau_hevc_start_frame(AVCodecContext *avctx,
34 const uint8_t *buffer, uint32_t size)
36 HEVCContext *h = avctx->priv_data;
37 HEVCFrame *pic = h->ref;
38 struct vdpau_picture_context *pic_ctx = pic->hwaccel_picture_private;
40 VdpPictureInfoHEVC *info = &pic_ctx->info.hevc;
41 #ifdef VDP_YCBCR_FORMAT_Y_U_V_444
42 VdpPictureInfoHEVC444 *info2 = &pic_ctx->info.hevc_444;
45 const HEVCSPS *sps = h->ps.sps;
46 const HEVCPPS *pps = h->ps.pps;
47 const SliceHeader *sh = &h->sh;
48 const ScalingList *sl = pps->scaling_list_data_present_flag ?
49 &pps->scaling_list : &sps->scaling_list;
51 /* init VdpPictureInfoHEVC */
54 info->chroma_format_idc = sps->chroma_format_idc;
55 info->separate_colour_plane_flag = sps->separate_colour_plane_flag;
56 info->pic_width_in_luma_samples = sps->width;
57 info->pic_height_in_luma_samples = sps->height;
58 info->bit_depth_luma_minus8 = sps->bit_depth - 8;
59 info->bit_depth_chroma_minus8 = sps->bit_depth - 8;
60 info->log2_max_pic_order_cnt_lsb_minus4 = sps->log2_max_poc_lsb - 4;
61 /* Provide the value corresponding to the nuh_temporal_id of the frame
63 info->sps_max_dec_pic_buffering_minus1 = sps->temporal_layer[sps->max_sub_layers - 1].max_dec_pic_buffering - 1;
64 info->log2_min_luma_coding_block_size_minus3 = sps->log2_min_cb_size - 3;
65 info->log2_diff_max_min_luma_coding_block_size = sps->log2_diff_max_min_coding_block_size;
66 info->log2_min_transform_block_size_minus2 = sps->log2_min_tb_size - 2;
67 info->log2_diff_max_min_transform_block_size = sps->log2_max_trafo_size - sps->log2_min_tb_size;
68 info->max_transform_hierarchy_depth_inter = sps->max_transform_hierarchy_depth_inter;
69 info->max_transform_hierarchy_depth_intra = sps->max_transform_hierarchy_depth_intra;
70 info->scaling_list_enabled_flag = sps->scaling_list_enable_flag;
71 /* Scaling lists, in diagonal order, to be used for this frame. */
72 for (size_t i = 0; i < 6; i++) {
73 for (size_t j = 0; j < 16; j++) {
74 /* Scaling List for 4x4 quantization matrix,
75 indexed as ScalingList4x4[matrixId][i]. */
76 uint8_t pos = 4 * ff_hevc_diag_scan4x4_y[j] + ff_hevc_diag_scan4x4_x[j];
77 info->ScalingList4x4[i][j] = sl->sl[0][i][pos];
79 for (size_t j = 0; j < 64; j++) {
80 uint8_t pos = 8 * ff_hevc_diag_scan8x8_y[j] + ff_hevc_diag_scan8x8_x[j];
81 /* Scaling List for 8x8 quantization matrix,
82 indexed as ScalingList8x8[matrixId][i]. */
83 info->ScalingList8x8[i][j] = sl->sl[1][i][pos];
84 /* Scaling List for 16x16 quantization matrix,
85 indexed as ScalingList16x16[matrixId][i]. */
86 info->ScalingList16x16[i][j] = sl->sl[2][i][pos];
88 /* Scaling List for 32x32 quantization matrix,
89 indexed as ScalingList32x32[matrixId][i]. */
90 info->ScalingList32x32[i][j] = sl->sl[3][i * 3][pos];
93 /* Scaling List DC Coefficients for 16x16,
94 indexed as ScalingListDCCoeff16x16[matrixId]. */
95 info->ScalingListDCCoeff16x16[i] = sl->sl_dc[0][i];
97 /* Scaling List DC Coefficients for 32x32,
98 indexed as ScalingListDCCoeff32x32[matrixId]. */
99 info->ScalingListDCCoeff32x32[i] = sl->sl_dc[1][i * 3];
102 info->amp_enabled_flag = sps->amp_enabled_flag;
103 info->sample_adaptive_offset_enabled_flag = sps->sao_enabled;
104 info->pcm_enabled_flag = sps->pcm_enabled_flag;
105 if (info->pcm_enabled_flag) {
106 /* Only needs to be set if pcm_enabled_flag is set. Ignored otherwise. */
107 info->pcm_sample_bit_depth_luma_minus1 = sps->pcm.bit_depth - 1;
108 /* Only needs to be set if pcm_enabled_flag is set. Ignored otherwise. */
109 info->pcm_sample_bit_depth_chroma_minus1 = sps->pcm.bit_depth_chroma - 1;
110 /* Only needs to be set if pcm_enabled_flag is set. Ignored otherwise. */
111 info->log2_min_pcm_luma_coding_block_size_minus3 = sps->pcm.log2_min_pcm_cb_size - 3;
112 /* Only needs to be set if pcm_enabled_flag is set. Ignored otherwise. */
113 info->log2_diff_max_min_pcm_luma_coding_block_size = sps->pcm.log2_max_pcm_cb_size - sps->pcm.log2_min_pcm_cb_size;
114 /* Only needs to be set if pcm_enabled_flag is set. Ignored otherwise. */
115 info->pcm_loop_filter_disabled_flag = sps->pcm.loop_filter_disable_flag;
117 /* Per spec, when zero, assume short_term_ref_pic_set_sps_flag
119 info->num_short_term_ref_pic_sets = sps->nb_st_rps;
120 info->long_term_ref_pics_present_flag = sps->long_term_ref_pics_present_flag;
121 /* Only needed if long_term_ref_pics_present_flag is set. Ignored
123 info->num_long_term_ref_pics_sps = sps->num_long_term_ref_pics_sps;
124 info->sps_temporal_mvp_enabled_flag = sps->sps_temporal_mvp_enabled_flag;
125 info->strong_intra_smoothing_enabled_flag = sps->sps_strong_intra_smoothing_enable_flag;
127 /* Copy the HEVC Picture Parameter Set bitstream fields. */
128 info->dependent_slice_segments_enabled_flag = pps->dependent_slice_segments_enabled_flag;
129 info->output_flag_present_flag = pps->output_flag_present_flag;
130 info->num_extra_slice_header_bits = pps->num_extra_slice_header_bits;
131 info->sign_data_hiding_enabled_flag = pps->sign_data_hiding_flag;
132 info->cabac_init_present_flag = pps->cabac_init_present_flag;
133 info->num_ref_idx_l0_default_active_minus1 = pps->num_ref_idx_l0_default_active - 1;
134 info->num_ref_idx_l1_default_active_minus1 = pps->num_ref_idx_l1_default_active - 1;
135 info->init_qp_minus26 = pps->pic_init_qp_minus26;
136 info->constrained_intra_pred_flag = pps->constrained_intra_pred_flag;
137 info->transform_skip_enabled_flag = pps->transform_skip_enabled_flag;
138 info->cu_qp_delta_enabled_flag = pps->cu_qp_delta_enabled_flag;
139 /* Only needed if cu_qp_delta_enabled_flag is set. Ignored otherwise. */
140 info->diff_cu_qp_delta_depth = pps->diff_cu_qp_delta_depth;
141 info->pps_cb_qp_offset = pps->cb_qp_offset;
142 info->pps_cr_qp_offset = pps->cr_qp_offset;
143 info->pps_slice_chroma_qp_offsets_present_flag = pps->pic_slice_level_chroma_qp_offsets_present_flag;
144 info->weighted_pred_flag = pps->weighted_pred_flag;
145 info->weighted_bipred_flag = pps->weighted_bipred_flag;
146 info->transquant_bypass_enabled_flag = pps->transquant_bypass_enable_flag;
147 info->tiles_enabled_flag = pps->tiles_enabled_flag;
148 info->entropy_coding_sync_enabled_flag = pps->entropy_coding_sync_enabled_flag;
149 if (info->tiles_enabled_flag) {
150 /* Only valid if tiles_enabled_flag is set. Ignored otherwise. */
151 info->num_tile_columns_minus1 = pps->num_tile_columns - 1;
152 /* Only valid if tiles_enabled_flag is set. Ignored otherwise. */
153 info->num_tile_rows_minus1 = pps->num_tile_rows - 1;
154 /* Only valid if tiles_enabled_flag is set. Ignored otherwise. */
155 info->uniform_spacing_flag = pps->uniform_spacing_flag;
156 /* Only need to set 0..num_tile_columns_minus1. The struct
157 definition reserves up to the maximum of 20. Invalid values are
159 for (ssize_t i = 0; i < pps->num_tile_columns; i++) {
160 info->column_width_minus1[i] = pps->column_width[i] - 1;
162 /* Only need to set 0..num_tile_rows_minus1. The struct
163 definition reserves up to the maximum of 22. Invalid values are
165 for (ssize_t i = 0; i < pps->num_tile_rows; i++) {
166 info->row_height_minus1[i] = pps->row_height[i] - 1;
168 /* Only needed if tiles_enabled_flag is set. Invalid values are
170 info->loop_filter_across_tiles_enabled_flag = pps->loop_filter_across_tiles_enabled_flag;
172 info->pps_loop_filter_across_slices_enabled_flag = pps->seq_loop_filter_across_slices_enabled_flag;
173 info->deblocking_filter_control_present_flag = pps->deblocking_filter_control_present_flag;
174 /* Only valid if deblocking_filter_control_present_flag is set. Ignored
176 info->deblocking_filter_override_enabled_flag = pps->deblocking_filter_override_enabled_flag;
177 /* Only valid if deblocking_filter_control_present_flag is set. Ignored
179 info->pps_deblocking_filter_disabled_flag = pps->disable_dbf;
180 /* Only valid if deblocking_filter_control_present_flag is set and
181 pps_deblocking_filter_disabled_flag is not set. Ignored otherwise.*/
182 info->pps_beta_offset_div2 = pps->beta_offset / 2;
183 /* Only valid if deblocking_filter_control_present_flag is set and
184 pps_deblocking_filter_disabled_flag is not set. Ignored otherwise. */
185 info->pps_tc_offset_div2 = pps->tc_offset / 2;
186 info->lists_modification_present_flag = pps->lists_modification_present_flag;
187 info->log2_parallel_merge_level_minus2 = pps->log2_parallel_merge_level - 2;
188 info->slice_segment_header_extension_present_flag = pps->slice_header_extension_present_flag;
190 /* Set to 1 if nal_unit_type is equal to IDR_W_RADL or IDR_N_LP.
191 Set to zero otherwise. */
192 info->IDRPicFlag = IS_IDR(h);
193 /* Set to 1 if nal_unit_type in the range of BLA_W_LP to
194 RSV_IRAP_VCL23, inclusive. Set to zero otherwise.*/
195 info->RAPPicFlag = IS_IRAP(h);
196 /* See section 7.4.7.1 of the specification. */
197 info->CurrRpsIdx = sps->nb_st_rps;
198 if (sh->short_term_ref_pic_set_sps_flag == 1) {
199 for (size_t i = 0; i < sps->nb_st_rps; i++) {
200 if (sh->short_term_rps == &sps->st_rps[i]) {
201 info->CurrRpsIdx = i;
206 /* See section 7.4.7.2 of the specification. */
207 info->NumPocTotalCurr = ff_hevc_frame_nb_refs(h);
208 if (sh->short_term_ref_pic_set_sps_flag == 0 && sh->short_term_rps) {
209 /* Corresponds to specification field, NumDeltaPocs[RefRpsIdx].
210 Only applicable when short_term_ref_pic_set_sps_flag == 0.
211 Implementations will ignore this value in other cases. See 7.4.8. */
212 info->NumDeltaPocsOfRefRpsIdx = sh->short_term_rps->rps_idx_num_delta_pocs;
214 /* Section 7.6.3.1 of the H.265/HEVC Specification defines the syntax of
215 the slice_segment_header. This header contains information that
216 some VDPAU implementations may choose to skip. The VDPAU API
217 requires client applications to track the number of bits used in the
218 slice header for structures associated with short term and long term
219 reference pictures. First, VDPAU requires the number of bits used by
220 the short_term_ref_pic_set array in the slice_segment_header. */
221 info->NumShortTermPictureSliceHeaderBits = sh->short_term_ref_pic_set_size;
222 /* Second, VDPAU requires the number of bits used for long term reference
223 pictures in the slice_segment_header. This is equal to the number
224 of bits used for the contents of the block beginning with
225 "if(long_term_ref_pics_present_flag)". */
226 info->NumLongTermPictureSliceHeaderBits = sh->long_term_ref_pic_set_size;
228 /* The value of PicOrderCntVal of the picture in the access unit
229 containing the SEI message. The picture being decoded. */
230 info->CurrPicOrderCntVal = h->poc;
232 /* Slice Decoding Process - Reference Picture Sets */
233 for (size_t i = 0; i < 16; i++) {
234 info->RefPics[i] = VDP_INVALID_HANDLE;
235 info->PicOrderCntVal[i] = 0;
236 info->IsLongTerm[i] = 0;
238 for (size_t i = 0, j = 0; i < FF_ARRAY_ELEMS(h->DPB); i++) {
239 const HEVCFrame *frame = &h->DPB[i];
240 if (frame != h->ref && (frame->flags & (HEVC_FRAME_FLAG_LONG_REF |
241 HEVC_FRAME_FLAG_SHORT_REF))) {
243 av_log(avctx, AV_LOG_WARNING,
244 "VDPAU only supports up to 16 references in the DPB. "
245 "This frame may not be decoded correctly.\n");
248 /* Array of video reference surfaces.
249 Set any unused positions to VDP_INVALID_HANDLE. */
250 info->RefPics[j] = ff_vdpau_get_surface_id(frame->frame);
251 /* Array of picture order counts. These correspond to positions
252 in the RefPics array. */
253 info->PicOrderCntVal[j] = frame->poc;
254 /* Array used to specify whether a particular RefPic is
255 a long term reference. A value of "1" indicates a long-term
257 // XXX: Setting this caused glitches in the nvidia implementation
258 // Always setting it to zero, produces correct results
259 //info->IsLongTerm[j] = frame->flags & HEVC_FRAME_FLAG_LONG_REF;
260 info->IsLongTerm[j] = 0;
264 /* Copy of specification field, see Section 8.3.2 of the
265 H.265/HEVC Specification. */
266 info->NumPocStCurrBefore = h->rps[ST_CURR_BEF].nb_refs;
267 if (info->NumPocStCurrBefore > 8) {
268 av_log(avctx, AV_LOG_WARNING,
269 "VDPAU only supports up to 8 references in StCurrBefore. "
270 "This frame may not be decoded correctly.\n");
271 info->NumPocStCurrBefore = 8;
273 /* Copy of specification field, see Section 8.3.2 of the
274 H.265/HEVC Specification. */
275 info->NumPocStCurrAfter = h->rps[ST_CURR_AFT].nb_refs;
276 if (info->NumPocStCurrAfter > 8) {
277 av_log(avctx, AV_LOG_WARNING,
278 "VDPAU only supports up to 8 references in StCurrAfter. "
279 "This frame may not be decoded correctly.\n");
280 info->NumPocStCurrAfter = 8;
282 /* Copy of specification field, see Section 8.3.2 of the
283 H.265/HEVC Specification. */
284 info->NumPocLtCurr = h->rps[LT_CURR].nb_refs;
285 if (info->NumPocLtCurr > 8) {
286 av_log(avctx, AV_LOG_WARNING,
287 "VDPAU only supports up to 8 references in LtCurr. "
288 "This frame may not be decoded correctly.\n");
289 info->NumPocLtCurr = 8;
291 /* Reference Picture Set list, one of the short-term RPS. These
292 correspond to positions in the RefPics array. */
293 for (ssize_t i = 0, j = 0; i < h->rps[ST_CURR_BEF].nb_refs; i++) {
294 HEVCFrame *frame = h->rps[ST_CURR_BEF].ref[i];
297 uintptr_t id = ff_vdpau_get_surface_id(frame->frame);
298 for (size_t k = 0; k < 16; k++) {
299 if (id == info->RefPics[k]) {
300 info->RefPicSetStCurrBefore[j] = k;
307 av_log(avctx, AV_LOG_WARNING, "missing surface: %p\n",
311 av_log(avctx, AV_LOG_WARNING, "missing STR Before frame: %zd\n", i);
314 /* Reference Picture Set list, one of the short-term RPS. These
315 correspond to positions in the RefPics array. */
316 for (ssize_t i = 0, j = 0; i < h->rps[ST_CURR_AFT].nb_refs; i++) {
317 HEVCFrame *frame = h->rps[ST_CURR_AFT].ref[i];
320 uintptr_t id = ff_vdpau_get_surface_id(frame->frame);
321 for (size_t k = 0; k < 16; k++) {
322 if (id == info->RefPics[k]) {
323 info->RefPicSetStCurrAfter[j] = k;
330 av_log(avctx, AV_LOG_WARNING, "missing surface: %p\n",
334 av_log(avctx, AV_LOG_WARNING, "missing STR After frame: %zd\n", i);
337 /* Reference Picture Set list, one of the long-term RPS. These
338 correspond to positions in the RefPics array. */
339 for (ssize_t i = 0, j = 0; i < h->rps[LT_CURR].nb_refs; i++) {
340 HEVCFrame *frame = h->rps[LT_CURR].ref[i];
343 uintptr_t id = ff_vdpau_get_surface_id(frame->frame);
344 for (size_t k = 0; k < 16; k++) {
345 if (id == info->RefPics[k]) {
346 info->RefPicSetLtCurr[j] = k;
353 av_log(avctx, AV_LOG_WARNING, "missing surface: %p\n",
357 av_log(avctx, AV_LOG_WARNING, "missing LTR frame: %zd\n", i);
361 #ifdef VDP_YCBCR_FORMAT_Y_U_V_444
362 if (sps->sps_range_extension_flag) {
363 info2->sps_range_extension_flag = 1;
364 info2->transformSkipRotationEnableFlag = sps->transform_skip_rotation_enabled_flag;
365 info2->transformSkipContextEnableFlag = sps->transform_skip_context_enabled_flag;
366 info2->implicitRdpcmEnableFlag = sps->implicit_rdpcm_enabled_flag;
367 info2->explicitRdpcmEnableFlag = sps->explicit_rdpcm_enabled_flag;
368 info2->extendedPrecisionProcessingFlag = sps->extended_precision_processing_flag;
369 info2->intraSmoothingDisabledFlag = sps->intra_smoothing_disabled_flag;
370 info2->highPrecisionOffsetsEnableFlag = sps->high_precision_offsets_enabled_flag;
371 info2->persistentRiceAdaptationEnableFlag = sps->persistent_rice_adaptation_enabled_flag;
372 info2->cabacBypassAlignmentEnableFlag = sps->cabac_bypass_alignment_enabled_flag;
374 info2->sps_range_extension_flag = 0;
376 if (pps->pps_range_extensions_flag) {
377 info2->pps_range_extension_flag = 1;
378 info2->log2MaxTransformSkipSize = pps->log2_max_transform_skip_block_size;
379 info2->crossComponentPredictionEnableFlag = pps->cross_component_prediction_enabled_flag;
380 info2->chromaQpAdjustmentEnableFlag = pps->chroma_qp_offset_list_enabled_flag;
381 info2->diffCuChromaQpAdjustmentDepth = pps->diff_cu_chroma_qp_offset_depth;
382 info2->chromaQpAdjustmentTableSize = pps->chroma_qp_offset_list_len_minus1 + 1;
383 info2->log2SaoOffsetScaleLuma = pps->log2_sao_offset_scale_luma;
384 info2->log2SaoOffsetScaleChroma = pps->log2_sao_offset_scale_chroma;
385 for (ssize_t i = 0; i < info2->chromaQpAdjustmentTableSize; i++)
387 info2->cb_qp_adjustment[i] = pps->cb_qp_offset_list[i];
388 info2->cr_qp_adjustment[i] = pps->cr_qp_offset_list[i];
392 info2->pps_range_extension_flag = 0;
396 return ff_vdpau_common_start_frame(pic_ctx, buffer, size);
399 static const uint8_t start_code_prefix[3] = { 0x00, 0x00, 0x01 };
401 static int vdpau_hevc_decode_slice(AVCodecContext *avctx,
402 const uint8_t *buffer, uint32_t size)
404 HEVCContext *h = avctx->priv_data;
405 struct vdpau_picture_context *pic_ctx = h->ref->hwaccel_picture_private;
408 val = ff_vdpau_add_buffer(pic_ctx, start_code_prefix, 3);
412 val = ff_vdpau_add_buffer(pic_ctx, buffer, size);
419 static int vdpau_hevc_end_frame(AVCodecContext *avctx)
421 HEVCContext *h = avctx->priv_data;
422 struct vdpau_picture_context *pic_ctx = h->ref->hwaccel_picture_private;
425 val = ff_vdpau_common_end_frame(avctx, h->ref->frame, pic_ctx);
432 static int vdpau_hevc_init(AVCodecContext *avctx)
434 VdpDecoderProfile profile;
435 uint32_t level = avctx->level;
437 switch (avctx->profile) {
438 case FF_PROFILE_HEVC_MAIN:
439 profile = VDP_DECODER_PROFILE_HEVC_MAIN;
441 case FF_PROFILE_HEVC_MAIN_10:
442 profile = VDP_DECODER_PROFILE_HEVC_MAIN_10;
444 case FF_PROFILE_HEVC_MAIN_STILL_PICTURE:
445 profile = VDP_DECODER_PROFILE_HEVC_MAIN_STILL;
447 case FF_PROFILE_HEVC_REXT:
448 profile = VDP_DECODER_PROFILE_HEVC_MAIN_444;
451 return AVERROR(ENOTSUP);
454 return ff_vdpau_common_init(avctx, profile, level);
457 const AVHWAccel ff_hevc_vdpau_hwaccel = {
458 .name = "hevc_vdpau",
459 .type = AVMEDIA_TYPE_VIDEO,
460 .id = AV_CODEC_ID_HEVC,
461 .pix_fmt = AV_PIX_FMT_VDPAU,
462 .start_frame = vdpau_hevc_start_frame,
463 .end_frame = vdpau_hevc_end_frame,
464 .decode_slice = vdpau_hevc_decode_slice,
465 .frame_priv_data_size = sizeof(struct vdpau_picture_context),
466 .init = vdpau_hevc_init,
467 .uninit = ff_vdpau_common_uninit,
468 .frame_params = ff_vdpau_common_frame_params,
469 .priv_data_size = sizeof(VDPAUContext),
470 .caps_internal = HWACCEL_CAP_ASYNC_SAFE,