git.sesse.net Git - ffmpeg/blob - libavcodec/hevc.c

   1 /*
   2  * HEVC video decoder
   3  *
   4  * Copyright (C) 2012 - 2013 Guillaume Martres
   5  * Copyright (C) 2012 - 2013 Mickael Raulet
   6  * Copyright (C) 2012 - 2013 Gildas Cocherel
   7  * Copyright (C) 2012 - 2013 Wassim Hamidouche
   8  *
   9  * This file is part of Libav.
  10  *
  11  * Libav is free software; you can redistribute it and/or
  12  * modify it under the terms of the GNU Lesser General Public
  13  * License as published by the Free Software Foundation; either
  14  * version 2.1 of the License, or (at your option) any later version.
  15  *
  16  * Libav is distributed in the hope that it will be useful,
  17  * but WITHOUT ANY WARRANTY; without even the implied warranty of
  18  * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
  19  * Lesser General Public License for more details.
  20  *
  21  * You should have received a copy of the GNU Lesser General Public
  22  * License along with Libav; if not, write to the Free Software
  23  * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
  24  */
  25
  26 #include "libavutil/attributes.h"
  27 #include "libavutil/common.h"
  28 #include "libavutil/display.h"
  29 #include "libavutil/internal.h"
  30 #include "libavutil/md5.h"
  31 #include "libavutil/opt.h"
  32 #include "libavutil/pixdesc.h"
  33 #include "libavutil/stereo3d.h"
  34
  35 #include "bswapdsp.h"
  36 #include "bytestream.h"
  37 #include "cabac_functions.h"
  38 #include "golomb.h"
  39 #include "hevc.h"
  40 #include "profiles.h"
  41
  42 const uint8_t ff_hevc_qpel_extra_before[4] = { 0, 3, 3, 3 };
  43 const uint8_t ff_hevc_qpel_extra_after[4]  = { 0, 4, 4, 4 };
  44 const uint8_t ff_hevc_qpel_extra[4]        = { 0, 7, 7, 7 };
  45
  46 static const uint8_t scan_1x1[1] = { 0 };
  47
  48 static const uint8_t horiz_scan2x2_x[4] = { 0, 1, 0, 1 };
  49
  50 static const uint8_t horiz_scan2x2_y[4] = { 0, 0, 1, 1 };
  51
  52 static const uint8_t horiz_scan4x4_x[16] = {
  53     0, 1, 2, 3,
  54     0, 1, 2, 3,
  55     0, 1, 2, 3,
  56     0, 1, 2, 3,
  57 };
  58
  59 static const uint8_t horiz_scan4x4_y[16] = {
  60     0, 0, 0, 0,
  61     1, 1, 1, 1,
  62     2, 2, 2, 2,
  63     3, 3, 3, 3,
  64 };
  65
  66 static const uint8_t horiz_scan8x8_inv[8][8] = {
  67     {  0,  1,  2,  3, 16, 17, 18, 19, },
  68     {  4,  5,  6,  7, 20, 21, 22, 23, },
  69     {  8,  9, 10, 11, 24, 25, 26, 27, },
  70     { 12, 13, 14, 15, 28, 29, 30, 31, },
  71     { 32, 33, 34, 35, 48, 49, 50, 51, },
  72     { 36, 37, 38, 39, 52, 53, 54, 55, },
  73     { 40, 41, 42, 43, 56, 57, 58, 59, },
  74     { 44, 45, 46, 47, 60, 61, 62, 63, },
  75 };
  76
  77 static const uint8_t diag_scan2x2_x[4] = { 0, 0, 1, 1 };
  78
  79 static const uint8_t diag_scan2x2_y[4] = { 0, 1, 0, 1 };
  80
  81 static const uint8_t diag_scan2x2_inv[2][2] = {
  82     { 0, 2, },
  83     { 1, 3, },
  84 };
  85
  86 static const uint8_t diag_scan4x4_inv[4][4] = {
  87     { 0,  2,  5,  9, },
  88     { 1,  4,  8, 12, },
  89     { 3,  7, 11, 14, },
  90     { 6, 10, 13, 15, },
  91 };
  92
  93 static const uint8_t diag_scan8x8_inv[8][8] = {
  94     {  0,  2,  5,  9, 14, 20, 27, 35, },
  95     {  1,  4,  8, 13, 19, 26, 34, 42, },
  96     {  3,  7, 12, 18, 25, 33, 41, 48, },
  97     {  6, 11, 17, 24, 32, 40, 47, 53, },
  98     { 10, 16, 23, 31, 39, 46, 52, 57, },
  99     { 15, 22, 30, 38, 45, 51, 56, 60, },
 100     { 21, 29, 37, 44, 50, 55, 59, 62, },
 101     { 28, 36, 43, 49, 54, 58, 61, 63, },
 102 };
 103
 104 /**
 105  * NOTE: Each function hls_foo correspond to the function foo in the
 106  * specification (HLS stands for High Level Syntax).
 107  */
 108
 109 /**
 110  * Section 5.7
 111  */
 112
 113 /* free everything allocated  by pic_arrays_init() */
 114 static void pic_arrays_free(HEVCContext *s)
 115 {
 116     av_freep(&s->sao);
 117     av_freep(&s->deblock);
 118
 119     av_freep(&s->skip_flag);
 120     av_freep(&s->tab_ct_depth);
 121
 122     av_freep(&s->tab_ipm);
 123     av_freep(&s->cbf_luma);
 124     av_freep(&s->is_pcm);
 125
 126     av_freep(&s->qp_y_tab);
 127     av_freep(&s->tab_slice_address);
 128     av_freep(&s->filter_slice_edges);
 129
 130     av_freep(&s->horizontal_bs);
 131     av_freep(&s->vertical_bs);
 132
 133     av_buffer_pool_uninit(&s->tab_mvf_pool);
 134     av_buffer_pool_uninit(&s->rpl_tab_pool);
 135 }
 136
 137 /* allocate arrays that depend on frame dimensions */
 138 static int pic_arrays_init(HEVCContext *s, const HEVCSPS *sps)
 139 {
 140     int log2_min_cb_size = sps->log2_min_cb_size;
 141     int width            = sps->width;
 142     int height           = sps->height;
 143     int pic_size_in_ctb  = ((width  >> log2_min_cb_size) + 1) *
 144                            ((height >> log2_min_cb_size) + 1);
 145     int ctb_count        = sps->ctb_width * sps->ctb_height;
 146     int min_pu_size      = sps->min_pu_width * sps->min_pu_height;
 147
 148     s->bs_width  = width  >> 3;
 149     s->bs_height = height >> 3;
 150
 151     s->sao           = av_mallocz_array(ctb_count, sizeof(*s->sao));
 152     s->deblock       = av_mallocz_array(ctb_count, sizeof(*s->deblock));
 153     if (!s->sao || !s->deblock)
 154         goto fail;
 155
 156     s->skip_flag    = av_malloc(pic_size_in_ctb);
 157     s->tab_ct_depth = av_malloc(sps->min_cb_height * sps->min_cb_width);
 158     if (!s->skip_flag || !s->tab_ct_depth)
 159         goto fail;
 160
 161     s->cbf_luma = av_malloc(sps->min_tb_width * sps->min_tb_height);
 162     s->tab_ipm  = av_mallocz(min_pu_size);
 163     s->is_pcm   = av_malloc(min_pu_size);
 164     if (!s->tab_ipm || !s->cbf_luma || !s->is_pcm)
 165         goto fail;
 166
 167     s->filter_slice_edges = av_malloc(ctb_count);
 168     s->tab_slice_address  = av_malloc(pic_size_in_ctb *
 169                                       sizeof(*s->tab_slice_address));
 170     s->qp_y_tab           = av_malloc(pic_size_in_ctb *
 171                                       sizeof(*s->qp_y_tab));
 172     if (!s->qp_y_tab || !s->filter_slice_edges || !s->tab_slice_address)
 173         goto fail;
 174
 175     s->horizontal_bs = av_mallocz(2 * s->bs_width * (s->bs_height + 1));
 176     s->vertical_bs   = av_mallocz(2 * s->bs_width * (s->bs_height + 1));
 177     if (!s->horizontal_bs || !s->vertical_bs)
 178         goto fail;
 179
 180     s->tab_mvf_pool = av_buffer_pool_init(min_pu_size * sizeof(MvField),
 181                                           av_buffer_alloc);
 182     s->rpl_tab_pool = av_buffer_pool_init(ctb_count * sizeof(RefPicListTab),
 183                                           av_buffer_allocz);
 184     if (!s->tab_mvf_pool || !s->rpl_tab_pool)
 185         goto fail;
 186
 187     return 0;
 188
 189 fail:
 190     pic_arrays_free(s);
 191     return AVERROR(ENOMEM);
 192 }
 193
 194 static void pred_weight_table(HEVCContext *s, GetBitContext *gb)
 195 {
 196     int i = 0;
 197     int j = 0;
 198     uint8_t luma_weight_l0_flag[16];
 199     uint8_t chroma_weight_l0_flag[16];
 200     uint8_t luma_weight_l1_flag[16];
 201     uint8_t chroma_weight_l1_flag[16];
 202
 203     s->sh.luma_log2_weight_denom = av_clip(get_ue_golomb_long(gb), 0, 7);
 204     if (s->ps.sps->chroma_format_idc != 0) {
 205         int delta = get_se_golomb(gb);
 206         s->sh.chroma_log2_weight_denom = av_clip(s->sh.luma_log2_weight_denom + delta, 0, 7);
 207     }
 208
 209     for (i = 0; i < s->sh.nb_refs[L0]; i++) {
 210         luma_weight_l0_flag[i] = get_bits1(gb);
 211         if (!luma_weight_l0_flag[i]) {
 212             s->sh.luma_weight_l0[i] = 1 << s->sh.luma_log2_weight_denom;
 213             s->sh.luma_offset_l0[i] = 0;
 214         }
 215     }
 216     if (s->ps.sps->chroma_format_idc != 0) { // FIXME: invert "if" and "for"
 217         for (i = 0; i < s->sh.nb_refs[L0]; i++)
 218             chroma_weight_l0_flag[i] = get_bits1(gb);
 219     } else {
 220         for (i = 0; i < s->sh.nb_refs[L0]; i++)
 221             chroma_weight_l0_flag[i] = 0;
 222     }
 223     for (i = 0; i < s->sh.nb_refs[L0]; i++) {
 224         if (luma_weight_l0_flag[i]) {
 225             int delta_luma_weight_l0 = get_se_golomb(gb);
 226             s->sh.luma_weight_l0[i] = (1 << s->sh.luma_log2_weight_denom) + delta_luma_weight_l0;
 227             s->sh.luma_offset_l0[i] = get_se_golomb(gb);
 228         }
 229         if (chroma_weight_l0_flag[i]) {
 230             for (j = 0; j < 2; j++) {
 231                 int delta_chroma_weight_l0 = get_se_golomb(gb);
 232                 int delta_chroma_offset_l0 = get_se_golomb(gb);
 233                 s->sh.chroma_weight_l0[i][j] = (1 << s->sh.chroma_log2_weight_denom) + delta_chroma_weight_l0;
 234                 s->sh.chroma_offset_l0[i][j] = av_clip((delta_chroma_offset_l0 - ((128 * s->sh.chroma_weight_l0[i][j])
 235                                                                                     >> s->sh.chroma_log2_weight_denom) + 128), -128, 127);
 236             }
 237         } else {
 238             s->sh.chroma_weight_l0[i][0] = 1 << s->sh.chroma_log2_weight_denom;
 239             s->sh.chroma_offset_l0[i][0] = 0;
 240             s->sh.chroma_weight_l0[i][1] = 1 << s->sh.chroma_log2_weight_denom;
 241             s->sh.chroma_offset_l0[i][1] = 0;
 242         }
 243     }
 244     if (s->sh.slice_type == B_SLICE) {
 245         for (i = 0; i < s->sh.nb_refs[L1]; i++) {
 246             luma_weight_l1_flag[i] = get_bits1(gb);
 247             if (!luma_weight_l1_flag[i]) {
 248                 s->sh.luma_weight_l1[i] = 1 << s->sh.luma_log2_weight_denom;
 249                 s->sh.luma_offset_l1[i] = 0;
 250             }
 251         }
 252         if (s->ps.sps->chroma_format_idc != 0) {
 253             for (i = 0; i < s->sh.nb_refs[L1]; i++)
 254                 chroma_weight_l1_flag[i] = get_bits1(gb);
 255         } else {
 256             for (i = 0; i < s->sh.nb_refs[L1]; i++)
 257                 chroma_weight_l1_flag[i] = 0;
 258         }
 259         for (i = 0; i < s->sh.nb_refs[L1]; i++) {
 260             if (luma_weight_l1_flag[i]) {
 261                 int delta_luma_weight_l1 = get_se_golomb(gb);
 262                 s->sh.luma_weight_l1[i] = (1 << s->sh.luma_log2_weight_denom) + delta_luma_weight_l1;
 263                 s->sh.luma_offset_l1[i] = get_se_golomb(gb);
 264             }
 265             if (chroma_weight_l1_flag[i]) {
 266                 for (j = 0; j < 2; j++) {
 267                     int delta_chroma_weight_l1 = get_se_golomb(gb);
 268                     int delta_chroma_offset_l1 = get_se_golomb(gb);
 269                     s->sh.chroma_weight_l1[i][j] = (1 << s->sh.chroma_log2_weight_denom) + delta_chroma_weight_l1;
 270                     s->sh.chroma_offset_l1[i][j] = av_clip((delta_chroma_offset_l1 - ((128 * s->sh.chroma_weight_l1[i][j])
 271                                                                                         >> s->sh.chroma_log2_weight_denom) + 128), -128, 127);
 272                 }
 273             } else {
 274                 s->sh.chroma_weight_l1[i][0] = 1 << s->sh.chroma_log2_weight_denom;
 275                 s->sh.chroma_offset_l1[i][0] = 0;
 276                 s->sh.chroma_weight_l1[i][1] = 1 << s->sh.chroma_log2_weight_denom;
 277                 s->sh.chroma_offset_l1[i][1] = 0;
 278             }
 279         }
 280     }
 281 }
 282
 283 static int decode_lt_rps(HEVCContext *s, LongTermRPS *rps, GetBitContext *gb)
 284 {
 285     const HEVCSPS *sps = s->ps.sps;
 286     int max_poc_lsb    = 1 << sps->log2_max_poc_lsb;
 287     int prev_delta_msb = 0;
 288     unsigned int nb_sps = 0, nb_sh;
 289     int i;
 290
 291     rps->nb_refs = 0;
 292     if (!sps->long_term_ref_pics_present_flag)
 293         return 0;
 294
 295     if (sps->num_long_term_ref_pics_sps > 0)
 296         nb_sps = get_ue_golomb_long(gb);
 297     nb_sh = get_ue_golomb_long(gb);
 298
 299     if (nb_sh + nb_sps > FF_ARRAY_ELEMS(rps->poc))
 300         return AVERROR_INVALIDDATA;
 301
 302     rps->nb_refs = nb_sh + nb_sps;
 303
 304     for (i = 0; i < rps->nb_refs; i++) {
 305         uint8_t delta_poc_msb_present;
 306
 307         if (i < nb_sps) {
 308             uint8_t lt_idx_sps = 0;
 309
 310             if (sps->num_long_term_ref_pics_sps > 1)
 311                 lt_idx_sps = get_bits(gb, av_ceil_log2(sps->num_long_term_ref_pics_sps));
 312
 313             rps->poc[i]  = sps->lt_ref_pic_poc_lsb_sps[lt_idx_sps];
 314             rps->used[i] = sps->used_by_curr_pic_lt_sps_flag[lt_idx_sps];
 315         } else {
 316             rps->poc[i]  = get_bits(gb, sps->log2_max_poc_lsb);
 317             rps->used[i] = get_bits1(gb);
 318         }
 319
 320         delta_poc_msb_present = get_bits1(gb);
 321         if (delta_poc_msb_present) {
 322             int delta = get_ue_golomb_long(gb);
 323
 324             if (i && i != nb_sps)
 325                 delta += prev_delta_msb;
 326
 327             rps->poc[i] += s->poc - delta * max_poc_lsb - s->sh.pic_order_cnt_lsb;
 328             prev_delta_msb = delta;
 329         }
 330     }
 331
 332     return 0;
 333 }
 334
 335 static void export_stream_params(AVCodecContext *avctx, const HEVCParamSets *ps,
 336                                  const HEVCSPS *sps)
 337 {
 338     const HEVCVPS *vps = (const HEVCVPS*)ps->vps_list[sps->vps_id]->data;
 339     unsigned int num = 0, den = 0;
 340
 341     avctx->pix_fmt             = sps->pix_fmt;
 342     avctx->coded_width         = sps->width;
 343     avctx->coded_height        = sps->height;
 344     avctx->width               = sps->output_width;
 345     avctx->height              = sps->output_height;
 346     avctx->has_b_frames        = sps->temporal_layer[sps->max_sub_layers - 1].num_reorder_pics;
 347     avctx->profile             = sps->ptl.general_ptl.profile_idc;
 348     avctx->level               = sps->ptl.general_ptl.level_idc;
 349
 350     ff_set_sar(avctx, sps->vui.sar);
 351
 352     if (sps->vui.video_signal_type_present_flag)
 353         avctx->color_range = sps->vui.video_full_range_flag ? AVCOL_RANGE_JPEG
 354                                                             : AVCOL_RANGE_MPEG;
 355     else
 356         avctx->color_range = AVCOL_RANGE_MPEG;
 357
 358     if (sps->vui.colour_description_present_flag) {
 359         avctx->color_primaries = sps->vui.colour_primaries;
 360         avctx->color_trc       = sps->vui.transfer_characteristic;
 361         avctx->colorspace      = sps->vui.matrix_coeffs;
 362     } else {
 363         avctx->color_primaries = AVCOL_PRI_UNSPECIFIED;
 364         avctx->color_trc       = AVCOL_TRC_UNSPECIFIED;
 365         avctx->colorspace      = AVCOL_SPC_UNSPECIFIED;
 366     }
 367
 368     if (vps->vps_timing_info_present_flag) {
 369         num = vps->vps_num_units_in_tick;
 370         den = vps->vps_time_scale;
 371     } else if (sps->vui.vui_timing_info_present_flag) {
 372         num = sps->vui.vui_num_units_in_tick;
 373         den = sps->vui.vui_time_scale;
 374     }
 375
 376     if (num != 0 && den != 0)
 377         av_reduce(&avctx->framerate.den, &avctx->framerate.num,
 378                   num, den, 1 << 30);
 379 }
 380
 381 static int set_sps(HEVCContext *s, const HEVCSPS *sps)
 382 {
 383     #define HWACCEL_MAX (CONFIG_HEVC_DXVA2_HWACCEL + CONFIG_HEVC_D3D11VA_HWACCEL + CONFIG_HEVC_VDPAU_HWACCEL)
 384     enum AVPixelFormat pix_fmts[HWACCEL_MAX + 2], *fmt = pix_fmts;
 385     int ret;
 386
 387     pic_arrays_free(s);
 388     s->ps.sps = NULL;
 389     s->ps.vps = NULL;
 390
 391     if (!sps)
 392         return 0;
 393
 394     ret = pic_arrays_init(s, sps);
 395     if (ret < 0)
 396         goto fail;
 397
 398     export_stream_params(s->avctx, &s->ps, sps);
 399
 400     if (sps->pix_fmt == AV_PIX_FMT_YUV420P || sps->pix_fmt == AV_PIX_FMT_YUVJ420P) {
 401 #if CONFIG_HEVC_DXVA2_HWACCEL
 402         *fmt++ = AV_PIX_FMT_DXVA2_VLD;
 403 #endif
 404 #if CONFIG_HEVC_D3D11VA_HWACCEL
 405         *fmt++ = AV_PIX_FMT_D3D11VA_VLD;
 406 #endif
 407 #if CONFIG_HEVC_VDPAU_HWACCEL
 408         *fmt++ = AV_PIX_FMT_VDPAU;
 409 #endif
 410     }
 411
 412     *fmt++ = sps->pix_fmt;
 413     *fmt = AV_PIX_FMT_NONE;
 414
 415     ret = ff_get_format(s->avctx, pix_fmts);
 416     if (ret < 0)
 417         goto fail;
 418     s->avctx->pix_fmt = ret;
 419
 420     ff_hevc_pred_init(&s->hpc,     sps->bit_depth);
 421     ff_hevc_dsp_init (&s->hevcdsp, sps->bit_depth);
 422     ff_videodsp_init (&s->vdsp,    sps->bit_depth);
 423
 424     if (sps->sao_enabled && !s->avctx->hwaccel) {
 425         av_frame_unref(s->tmp_frame);
 426         ret = ff_get_buffer(s->avctx, s->tmp_frame, AV_GET_BUFFER_FLAG_REF);
 427         if (ret < 0)
 428             goto fail;
 429         s->frame = s->tmp_frame;
 430     }
 431
 432     s->ps.sps = sps;
 433     s->ps.vps = (HEVCVPS*) s->ps.vps_list[s->ps.sps->vps_id]->data;
 434
 435     return 0;
 436
 437 fail:
 438     pic_arrays_free(s);
 439     s->ps.sps = NULL;
 440     return ret;
 441 }
 442
 443 static int hls_slice_header(HEVCContext *s)
 444 {
 445     GetBitContext *gb = &s->HEVClc.gb;
 446     SliceHeader *sh   = &s->sh;
 447     int i, ret;
 448
 449     // Coded parameters
 450     sh->first_slice_in_pic_flag = get_bits1(gb);
 451     if ((IS_IDR(s) || IS_BLA(s)) && sh->first_slice_in_pic_flag) {
 452         s->seq_decode = (s->seq_decode + 1) & 0xff;
 453         s->max_ra     = INT_MAX;
 454         if (IS_IDR(s))
 455             ff_hevc_clear_refs(s);
 456     }
 457     if (IS_IRAP(s))
 458         sh->no_output_of_prior_pics_flag = get_bits1(gb);
 459
 460     sh->pps_id = get_ue_golomb_long(gb);
 461     if (sh->pps_id >= MAX_PPS_COUNT || !s->ps.pps_list[sh->pps_id]) {
 462         av_log(s->avctx, AV_LOG_ERROR, "PPS id out of range: %d\n", sh->pps_id);
 463         return AVERROR_INVALIDDATA;
 464     }
 465     if (!sh->first_slice_in_pic_flag &&
 466         s->ps.pps != (HEVCPPS*)s->ps.pps_list[sh->pps_id]->data) {
 467         av_log(s->avctx, AV_LOG_ERROR, "PPS changed between slices.\n");
 468         return AVERROR_INVALIDDATA;
 469     }
 470     s->ps.pps = (HEVCPPS*)s->ps.pps_list[sh->pps_id]->data;
 471
 472     if (s->ps.sps != (HEVCSPS*)s->ps.sps_list[s->ps.pps->sps_id]->data) {
 473         s->ps.sps = (HEVCSPS*)s->ps.sps_list[s->ps.pps->sps_id]->data;
 474
 475         ff_hevc_clear_refs(s);
 476         ret = set_sps(s, s->ps.sps);
 477         if (ret < 0)
 478             return ret;
 479
 480         s->seq_decode = (s->seq_decode + 1) & 0xff;
 481         s->max_ra     = INT_MAX;
 482     }
 483
 484     sh->dependent_slice_segment_flag = 0;
 485     if (!sh->first_slice_in_pic_flag) {
 486         int slice_address_length;
 487
 488         if (s->ps.pps->dependent_slice_segments_enabled_flag)
 489             sh->dependent_slice_segment_flag = get_bits1(gb);
 490
 491         slice_address_length = av_ceil_log2(s->ps.sps->ctb_width *
 492                                             s->ps.sps->ctb_height);
 493         sh->slice_segment_addr = slice_address_length ? get_bits(gb, slice_address_length) : 0;
 494         if (sh->slice_segment_addr >= s->ps.sps->ctb_width * s->ps.sps->ctb_height) {
 495             av_log(s->avctx, AV_LOG_ERROR,
 496                    "Invalid slice segment address: %u.\n",
 497                    sh->slice_segment_addr);
 498             return AVERROR_INVALIDDATA;
 499         }
 500
 501         if (!sh->dependent_slice_segment_flag) {
 502             sh->slice_addr = sh->slice_segment_addr;
 503             s->slice_idx++;
 504         }
 505     } else {
 506         sh->slice_segment_addr = sh->slice_addr = 0;
 507         s->slice_idx           = 0;
 508         s->slice_initialized   = 0;
 509     }
 510
 511     if (!sh->dependent_slice_segment_flag) {
 512         s->slice_initialized = 0;
 513
 514         for (i = 0; i < s->ps.pps->num_extra_slice_header_bits; i++)
 515             skip_bits(gb, 1);  // slice_reserved_undetermined_flag[]
 516
 517         sh->slice_type = get_ue_golomb_long(gb);
 518         if (!(sh->slice_type == I_SLICE ||
 519               sh->slice_type == P_SLICE ||
 520               sh->slice_type == B_SLICE)) {
 521             av_log(s->avctx, AV_LOG_ERROR, "Unknown slice type: %d.\n",
 522                    sh->slice_type);
 523             return AVERROR_INVALIDDATA;
 524         }
 525         if (IS_IRAP(s) && sh->slice_type != I_SLICE) {
 526             av_log(s->avctx, AV_LOG_ERROR, "Inter slices in an IRAP frame.\n");
 527             return AVERROR_INVALIDDATA;
 528         }
 529
 530         // when flag is not present, picture is inferred to be output
 531         sh->pic_output_flag = 1;
 532         if (s->ps.pps->output_flag_present_flag)
 533             sh->pic_output_flag = get_bits1(gb);
 534
 535         if (s->ps.sps->separate_colour_plane_flag)
 536             sh->colour_plane_id = get_bits(gb, 2);
 537
 538         if (!IS_IDR(s)) {
 539             int poc, pos;
 540
 541             sh->pic_order_cnt_lsb = get_bits(gb, s->ps.sps->log2_max_poc_lsb);
 542             poc = ff_hevc_compute_poc(s, sh->pic_order_cnt_lsb);
 543             if (!sh->first_slice_in_pic_flag && poc != s->poc) {
 544                 av_log(s->avctx, AV_LOG_WARNING,
 545                        "Ignoring POC change between slices: %d -> %d\n", s->poc, poc);
 546                 if (s->avctx->err_recognition & AV_EF_EXPLODE)
 547                     return AVERROR_INVALIDDATA;
 548                 poc = s->poc;
 549             }
 550             s->poc = poc;
 551
 552             sh->short_term_ref_pic_set_sps_flag = get_bits1(gb);
 553             pos = get_bits_left(gb);
 554             if (!sh->short_term_ref_pic_set_sps_flag) {
 555                 ret = ff_hevc_decode_short_term_rps(gb, s->avctx, &sh->slice_rps, s->ps.sps, 1);
 556                 if (ret < 0)
 557                     return ret;
 558
 559                 sh->short_term_rps = &sh->slice_rps;
 560             } else {
 561                 int numbits, rps_idx;
 562
 563                 if (!s->ps.sps->nb_st_rps) {
 564                     av_log(s->avctx, AV_LOG_ERROR, "No ref lists in the SPS.\n");
 565                     return AVERROR_INVALIDDATA;
 566                 }
 567
 568                 numbits = av_ceil_log2(s->ps.sps->nb_st_rps);
 569                 rps_idx = numbits > 0 ? get_bits(gb, numbits) : 0;
 570                 sh->short_term_rps = &s->ps.sps->st_rps[rps_idx];
 571             }
 572             sh->short_term_ref_pic_set_size = pos - get_bits_left(gb);
 573
 574             pos = get_bits_left(gb);
 575             ret = decode_lt_rps(s, &sh->long_term_rps, gb);
 576             if (ret < 0) {
 577                 av_log(s->avctx, AV_LOG_WARNING, "Invalid long term RPS.\n");
 578                 if (s->avctx->err_recognition & AV_EF_EXPLODE)
 579                     return AVERROR_INVALIDDATA;
 580             }
 581             sh->long_term_ref_pic_set_size = pos - get_bits_left(gb);
 582
 583             if (s->ps.sps->sps_temporal_mvp_enabled_flag)
 584                 sh->slice_temporal_mvp_enabled_flag = get_bits1(gb);
 585             else
 586                 sh->slice_temporal_mvp_enabled_flag = 0;
 587         } else {
 588             s->sh.short_term_rps = NULL;
 589             s->poc               = 0;
 590         }
 591
 592         /* 8.3.1 */
 593         if (s->temporal_id == 0 &&
 594             s->nal_unit_type != NAL_TRAIL_N &&
 595             s->nal_unit_type != NAL_TSA_N   &&
 596             s->nal_unit_type != NAL_STSA_N  &&
 597             s->nal_unit_type != NAL_RADL_N  &&
 598             s->nal_unit_type != NAL_RADL_R  &&
 599             s->nal_unit_type != NAL_RASL_N  &&
 600             s->nal_unit_type != NAL_RASL_R)
 601             s->pocTid0 = s->poc;
 602
 603         if (s->ps.sps->sao_enabled) {
 604             sh->slice_sample_adaptive_offset_flag[0] = get_bits1(gb);
 605             sh->slice_sample_adaptive_offset_flag[1] =
 606             sh->slice_sample_adaptive_offset_flag[2] = get_bits1(gb);
 607         } else {
 608             sh->slice_sample_adaptive_offset_flag[0] = 0;
 609             sh->slice_sample_adaptive_offset_flag[1] = 0;
 610             sh->slice_sample_adaptive_offset_flag[2] = 0;
 611         }
 612
 613         sh->nb_refs[L0] = sh->nb_refs[L1] = 0;
 614         if (sh->slice_type == P_SLICE || sh->slice_type == B_SLICE) {
 615             int nb_refs;
 616
 617             sh->nb_refs[L0] = s->ps.pps->num_ref_idx_l0_default_active;
 618             if (sh->slice_type == B_SLICE)
 619                 sh->nb_refs[L1] = s->ps.pps->num_ref_idx_l1_default_active;
 620
 621             if (get_bits1(gb)) { // num_ref_idx_active_override_flag
 622                 sh->nb_refs[L0] = get_ue_golomb_long(gb) + 1;
 623                 if (sh->slice_type == B_SLICE)
 624                     sh->nb_refs[L1] = get_ue_golomb_long(gb) + 1;
 625             }
 626             if (sh->nb_refs[L0] > MAX_REFS || sh->nb_refs[L1] > MAX_REFS) {
 627                 av_log(s->avctx, AV_LOG_ERROR, "Too many refs: %d/%d.\n",
 628                        sh->nb_refs[L0], sh->nb_refs[L1]);
 629                 return AVERROR_INVALIDDATA;
 630             }
 631
 632             sh->rpl_modification_flag[0] = 0;
 633             sh->rpl_modification_flag[1] = 0;
 634             nb_refs = ff_hevc_frame_nb_refs(s);
 635             if (!nb_refs) {
 636                 av_log(s->avctx, AV_LOG_ERROR, "Zero refs for a frame with P or B slices.\n");
 637                 return AVERROR_INVALIDDATA;
 638             }
 639
 640             if (s->ps.pps->lists_modification_present_flag && nb_refs > 1) {
 641                 sh->rpl_modification_flag[0] = get_bits1(gb);
 642                 if (sh->rpl_modification_flag[0]) {
 643                     for (i = 0; i < sh->nb_refs[L0]; i++)
 644                         sh->list_entry_lx[0][i] = get_bits(gb, av_ceil_log2(nb_refs));
 645                 }
 646
 647                 if (sh->slice_type == B_SLICE) {
 648                     sh->rpl_modification_flag[1] = get_bits1(gb);
 649                     if (sh->rpl_modification_flag[1] == 1)
 650                         for (i = 0; i < sh->nb_refs[L1]; i++)
 651                             sh->list_entry_lx[1][i] = get_bits(gb, av_ceil_log2(nb_refs));
 652                 }
 653             }
 654
 655             if (sh->slice_type == B_SLICE)
 656                 sh->mvd_l1_zero_flag = get_bits1(gb);
 657
 658             if (s->ps.pps->cabac_init_present_flag)
 659                 sh->cabac_init_flag = get_bits1(gb);
 660             else
 661                 sh->cabac_init_flag = 0;
 662
 663             sh->collocated_ref_idx = 0;
 664             if (sh->slice_temporal_mvp_enabled_flag) {
 665                 sh->collocated_list = L0;
 666                 if (sh->slice_type == B_SLICE)
 667                     sh->collocated_list = !get_bits1(gb);
 668
 669                 if (sh->nb_refs[sh->collocated_list] > 1) {
 670                     sh->collocated_ref_idx = get_ue_golomb_long(gb);
 671                     if (sh->collocated_ref_idx >= sh->nb_refs[sh->collocated_list]) {
 672                         av_log(s->avctx, AV_LOG_ERROR,
 673                                "Invalid collocated_ref_idx: %d.\n",
 674                                sh->collocated_ref_idx);
 675                         return AVERROR_INVALIDDATA;
 676                     }
 677                 }
 678             }
 679
 680             if ((s->ps.pps->weighted_pred_flag   && sh->slice_type == P_SLICE) ||
 681                 (s->ps.pps->weighted_bipred_flag && sh->slice_type == B_SLICE)) {
 682                 pred_weight_table(s, gb);
 683             }
 684
 685             sh->max_num_merge_cand = 5 - get_ue_golomb_long(gb);
 686             if (sh->max_num_merge_cand < 1 || sh->max_num_merge_cand > 5) {
 687                 av_log(s->avctx, AV_LOG_ERROR,
 688                        "Invalid number of merging MVP candidates: %d.\n",
 689                        sh->max_num_merge_cand);
 690                 return AVERROR_INVALIDDATA;
 691             }
 692         }
 693
 694         sh->slice_qp_delta = get_se_golomb(gb);
 695
 696         if (s->ps.pps->pic_slice_level_chroma_qp_offsets_present_flag) {
 697             sh->slice_cb_qp_offset = get_se_golomb(gb);
 698             sh->slice_cr_qp_offset = get_se_golomb(gb);
 699         } else {
 700             sh->slice_cb_qp_offset = 0;
 701             sh->slice_cr_qp_offset = 0;
 702         }
 703
 704         if (s->ps.pps->deblocking_filter_control_present_flag) {
 705             int deblocking_filter_override_flag = 0;
 706
 707             if (s->ps.pps->deblocking_filter_override_enabled_flag)
 708                 deblocking_filter_override_flag = get_bits1(gb);
 709
 710             if (deblocking_filter_override_flag) {
 711                 sh->disable_deblocking_filter_flag = get_bits1(gb);
 712                 if (!sh->disable_deblocking_filter_flag) {
 713                     sh->beta_offset = get_se_golomb(gb) * 2;
 714                     sh->tc_offset   = get_se_golomb(gb) * 2;
 715                 }
 716             } else {
 717                 sh->disable_deblocking_filter_flag = s->ps.pps->disable_dbf;
 718                 sh->beta_offset                    = s->ps.pps->beta_offset;
 719                 sh->tc_offset                      = s->ps.pps->tc_offset;
 720             }
 721         } else {
 722             sh->disable_deblocking_filter_flag = 0;
 723             sh->beta_offset                    = 0;
 724             sh->tc_offset                      = 0;
 725         }
 726
 727         if (s->ps.pps->seq_loop_filter_across_slices_enabled_flag &&
 728             (sh->slice_sample_adaptive_offset_flag[0] ||
 729              sh->slice_sample_adaptive_offset_flag[1] ||
 730              !sh->disable_deblocking_filter_flag)) {
 731             sh->slice_loop_filter_across_slices_enabled_flag = get_bits1(gb);
 732         } else {
 733             sh->slice_loop_filter_across_slices_enabled_flag = s->ps.pps->seq_loop_filter_across_slices_enabled_flag;
 734         }
 735     } else if (!s->slice_initialized) {
 736         av_log(s->avctx, AV_LOG_ERROR, "Independent slice segment missing.\n");
 737         return AVERROR_INVALIDDATA;
 738     }
 739
 740     sh->num_entry_point_offsets = 0;
 741     if (s->ps.pps->tiles_enabled_flag || s->ps.pps->entropy_coding_sync_enabled_flag) {
 742         sh->num_entry_point_offsets = get_ue_golomb_long(gb);
 743         if (sh->num_entry_point_offsets > 0) {
 744             int offset_len = get_ue_golomb_long(gb) + 1;
 745
 746             for (i = 0; i < sh->num_entry_point_offsets; i++)
 747                 skip_bits(gb, offset_len);
 748         }
 749     }
 750
 751     if (s->ps.pps->slice_header_extension_present_flag) {
 752         unsigned int length = get_ue_golomb_long(gb);
 753         for (i = 0; i < length; i++)
 754             skip_bits(gb, 8);  // slice_header_extension_data_byte
 755     }
 756
 757     // Inferred parameters
 758     sh->slice_qp = 26 + s->ps.pps->pic_init_qp_minus26 + sh->slice_qp_delta;
 759     if (sh->slice_qp > 51 ||
 760         sh->slice_qp < -s->ps.sps->qp_bd_offset) {
 761         av_log(s->avctx, AV_LOG_ERROR,
 762                "The slice_qp %d is outside the valid range "
 763                "[%d, 51].\n",
 764                sh->slice_qp,
 765                -s->ps.sps->qp_bd_offset);
 766         return AVERROR_INVALIDDATA;
 767     }
 768
 769     sh->slice_ctb_addr_rs = sh->slice_segment_addr;
 770
 771     if (!s->sh.slice_ctb_addr_rs && s->sh.dependent_slice_segment_flag) {
 772         av_log(s->avctx, AV_LOG_ERROR, "Impossible slice segment.\n");
 773         return AVERROR_INVALIDDATA;
 774     }
 775
 776     s->HEVClc.first_qp_group = !s->sh.dependent_slice_segment_flag;
 777
 778     if (!s->ps.pps->cu_qp_delta_enabled_flag)
 779         s->HEVClc.qp_y = FFUMOD(s->sh.slice_qp + 52 + 2 * s->ps.sps->qp_bd_offset,
 780                                 52 + s->ps.sps->qp_bd_offset) - s->ps.sps->qp_bd_offset;
 781
 782     s->slice_initialized = 1;
 783
 784     return 0;
 785 }
 786
 787 #define CTB(tab, x, y) ((tab)[(y) * s->ps.sps->ctb_width + (x)])
 788
 789 #define SET_SAO(elem, value)                            \
 790 do {                                                    \
 791     if (!sao_merge_up_flag && !sao_merge_left_flag)     \
 792         sao->elem = value;                              \
 793     else if (sao_merge_left_flag)                       \
 794         sao->elem = CTB(s->sao, rx-1, ry).elem;         \
 795     else if (sao_merge_up_flag)                         \
 796         sao->elem = CTB(s->sao, rx, ry-1).elem;         \
 797     else                                                \
 798         sao->elem = 0;                                  \
 799 } while (0)
 800
 801 static void hls_sao_param(HEVCContext *s, int rx, int ry)
 802 {
 803     HEVCLocalContext *lc    = &s->HEVClc;
 804     int sao_merge_left_flag = 0;
 805     int sao_merge_up_flag   = 0;
 806     int shift               = s->ps.sps->bit_depth - FFMIN(s->ps.sps->bit_depth, 10);
 807     SAOParams *sao          = &CTB(s->sao, rx, ry);
 808     int c_idx, i;
 809
 810     if (s->sh.slice_sample_adaptive_offset_flag[0] ||
 811         s->sh.slice_sample_adaptive_offset_flag[1]) {
 812         if (rx > 0) {
 813             if (lc->ctb_left_flag)
 814                 sao_merge_left_flag = ff_hevc_sao_merge_flag_decode(s);
 815         }
 816         if (ry > 0 && !sao_merge_left_flag) {
 817             if (lc->ctb_up_flag)
 818                 sao_merge_up_flag = ff_hevc_sao_merge_flag_decode(s);
 819         }
 820     }
 821
 822     for (c_idx = 0; c_idx < 3; c_idx++) {
 823         if (!s->sh.slice_sample_adaptive_offset_flag[c_idx]) {
 824             sao->type_idx[c_idx] = SAO_NOT_APPLIED;
 825             continue;
 826         }
 827
 828         if (c_idx == 2) {
 829             sao->type_idx[2] = sao->type_idx[1];
 830             sao->eo_class[2] = sao->eo_class[1];
 831         } else {
 832             SET_SAO(type_idx[c_idx], ff_hevc_sao_type_idx_decode(s));
 833         }
 834
 835         if (sao->type_idx[c_idx] == SAO_NOT_APPLIED)
 836             continue;
 837
 838         for (i = 0; i < 4; i++)
 839             SET_SAO(offset_abs[c_idx][i], ff_hevc_sao_offset_abs_decode(s));
 840
 841         if (sao->type_idx[c_idx] == SAO_BAND) {
 842             for (i = 0; i < 4; i++) {
 843                 if (sao->offset_abs[c_idx][i]) {
 844                     SET_SAO(offset_sign[c_idx][i],
 845                             ff_hevc_sao_offset_sign_decode(s));
 846                 } else {
 847                     sao->offset_sign[c_idx][i] = 0;
 848                 }
 849             }
 850             SET_SAO(band_position[c_idx], ff_hevc_sao_band_position_decode(s));
 851         } else if (c_idx != 2) {
 852             SET_SAO(eo_class[c_idx], ff_hevc_sao_eo_class_decode(s));
 853         }
 854
 855         // Inferred parameters
 856         sao->offset_val[c_idx][0] = 0;
 857         for (i = 0; i < 4; i++) {
 858             sao->offset_val[c_idx][i + 1] = sao->offset_abs[c_idx][i] << shift;
 859             if (sao->type_idx[c_idx] == SAO_EDGE) {
 860                 if (i > 1)
 861                     sao->offset_val[c_idx][i + 1] = -sao->offset_val[c_idx][i + 1];
 862             } else if (sao->offset_sign[c_idx][i]) {
 863                 sao->offset_val[c_idx][i + 1] = -sao->offset_val[c_idx][i + 1];
 864             }
 865         }
 866     }
 867 }
 868
 869 #undef SET_SAO
 870 #undef CTB
 871
 872 static void hls_residual_coding(HEVCContext *s, int x0, int y0,
 873                                 int log2_trafo_size, enum ScanType scan_idx,
 874                                 int c_idx)
 875 {
 876 #define GET_COORD(offset, n)                                    \
 877     do {                                                        \
 878         x_c = (scan_x_cg[offset >> 4] << 2) + scan_x_off[n];    \
 879         y_c = (scan_y_cg[offset >> 4] << 2) + scan_y_off[n];    \
 880     } while (0)
 881     HEVCLocalContext *lc    = &s->HEVClc;
 882     int transform_skip_flag = 0;
 883
 884     int last_significant_coeff_x, last_significant_coeff_y;
 885     int last_scan_pos;
 886     int n_end;
 887     int num_coeff    = 0;
 888     int greater1_ctx = 1;
 889
 890     int num_last_subset;
 891     int x_cg_last_sig, y_cg_last_sig;
 892
 893     const uint8_t *scan_x_cg, *scan_y_cg, *scan_x_off, *scan_y_off;
 894
 895     ptrdiff_t stride = s->frame->linesize[c_idx];
 896     int hshift       = s->ps.sps->hshift[c_idx];
 897     int vshift       = s->ps.sps->vshift[c_idx];
 898     uint8_t *dst     = &s->frame->data[c_idx][(y0 >> vshift) * stride +
 899                                               ((x0 >> hshift) << s->ps.sps->pixel_shift)];
 900     DECLARE_ALIGNED(16, int16_t, coeffs[MAX_TB_SIZE * MAX_TB_SIZE]) = { 0 };
 901     DECLARE_ALIGNED(8, uint8_t, significant_coeff_group_flag[8][8]) = { { 0 } };
 902
 903     int trafo_size = 1 << log2_trafo_size;
 904     int i, qp, shift, add, scale, scale_m;
 905     const uint8_t level_scale[] = { 40, 45, 51, 57, 64, 72 };
 906     const uint8_t *scale_matrix;
 907     uint8_t dc_scale;
 908
 909     // Derive QP for dequant
 910     if (!lc->cu.cu_transquant_bypass_flag) {
 911         static const int qp_c[] = {
 912             29, 30, 31, 32, 33, 33, 34, 34, 35, 35, 36, 36, 37, 37
 913         };
 914
 915         static const uint8_t rem6[51 + 2 * 6 + 1] = {
 916             0, 1, 2, 3, 4, 5, 0, 1, 2, 3, 4, 5, 0, 1, 2, 3, 4, 5, 0, 1, 2,
 917             3, 4, 5, 0, 1, 2, 3, 4, 5, 0, 1, 2, 3, 4, 5, 0, 1, 2, 3, 4, 5,
 918             0, 1, 2, 3, 4, 5, 0, 1, 2, 3, 4, 5, 0, 1, 2, 3, 4, 5, 0, 1, 2, 3,
 919         };
 920
 921         static const uint8_t div6[51 + 2 * 6 + 1] = {
 922             0, 0, 0, 0, 0, 0, 1, 1, 1, 1, 1, 1, 2, 2, 2, 2, 2, 2,  3,  3,  3,
 923             3, 3, 3, 4, 4, 4, 4, 4, 4, 5, 5, 5, 5, 5, 5, 6, 6, 6,  6,  6,  6,
 924             7, 7, 7, 7, 7, 7, 8, 8, 8, 8, 8, 8, 9, 9, 9, 9, 9, 9, 10, 10, 10, 10,
 925         };
 926         int qp_y = lc->qp_y;
 927
 928         if (c_idx == 0) {
 929             qp = qp_y + s->ps.sps->qp_bd_offset;
 930         } else {
 931             int qp_i, offset;
 932
 933             if (c_idx == 1)
 934                 offset = s->ps.pps->cb_qp_offset + s->sh.slice_cb_qp_offset;
 935             else
 936                 offset = s->ps.pps->cr_qp_offset + s->sh.slice_cr_qp_offset;
 937
 938             qp_i = av_clip(qp_y + offset, -s->ps.sps->qp_bd_offset, 57);
 939             if (qp_i < 30)
 940                 qp = qp_i;
 941             else if (qp_i > 43)
 942                 qp = qp_i - 6;
 943             else
 944                 qp = qp_c[qp_i - 30];
 945
 946             qp += s->ps.sps->qp_bd_offset;
 947         }
 948
 949         shift    = s->ps.sps->bit_depth + log2_trafo_size - 5;
 950         add      = 1 << (shift - 1);
 951         scale    = level_scale[rem6[qp]] << (div6[qp]);
 952         scale_m  = 16; // default when no custom scaling lists.
 953         dc_scale = 16;
 954
 955         if (s->ps.sps->scaling_list_enable_flag) {
 956             const ScalingList *sl = s->ps.pps->scaling_list_data_present_flag ?
 957                                     &s->ps.pps->scaling_list : &s->ps.sps->scaling_list;
 958             int matrix_id = lc->cu.pred_mode != MODE_INTRA;
 959
 960             if (log2_trafo_size != 5)
 961                 matrix_id = 3 * matrix_id + c_idx;
 962
 963             scale_matrix = sl->sl[log2_trafo_size - 2][matrix_id];
 964             if (log2_trafo_size >= 4)
 965                 dc_scale = sl->sl_dc[log2_trafo_size - 4][matrix_id];
 966         }
 967     }
 968
 969     if (s->ps.pps->transform_skip_enabled_flag &&
 970         !lc->cu.cu_transquant_bypass_flag   &&
 971         log2_trafo_size == 2) {
 972         transform_skip_flag = ff_hevc_transform_skip_flag_decode(s, c_idx);
 973     }
 974
 975     last_significant_coeff_x =
 976         ff_hevc_last_significant_coeff_x_prefix_decode(s, c_idx, log2_trafo_size);
 977     last_significant_coeff_y =
 978         ff_hevc_last_significant_coeff_y_prefix_decode(s, c_idx, log2_trafo_size);
 979
 980     if (last_significant_coeff_x > 3) {
 981         int suffix = ff_hevc_last_significant_coeff_suffix_decode(s, last_significant_coeff_x);
 982         last_significant_coeff_x = (1 << ((last_significant_coeff_x >> 1) - 1)) *
 983                                    (2 + (last_significant_coeff_x & 1)) +
 984                                    suffix;
 985     }
 986
 987     if (last_significant_coeff_y > 3) {
 988         int suffix = ff_hevc_last_significant_coeff_suffix_decode(s, last_significant_coeff_y);
 989         last_significant_coeff_y = (1 << ((last_significant_coeff_y >> 1) - 1)) *
 990                                    (2 + (last_significant_coeff_y & 1)) +
 991                                    suffix;
 992     }
 993
 994     if (scan_idx == SCAN_VERT)
 995         FFSWAP(int, last_significant_coeff_x, last_significant_coeff_y);
 996
 997     x_cg_last_sig = last_significant_coeff_x >> 2;
 998     y_cg_last_sig = last_significant_coeff_y >> 2;
 999
1000     switch (scan_idx) {
1001     case SCAN_DIAG: {
1002         int last_x_c = last_significant_coeff_x & 3;
1003         int last_y_c = last_significant_coeff_y & 3;
1004
1005         scan_x_off = ff_hevc_diag_scan4x4_x;
1006         scan_y_off = ff_hevc_diag_scan4x4_y;
1007         num_coeff  = diag_scan4x4_inv[last_y_c][last_x_c];
1008         if (trafo_size == 4) {
1009             scan_x_cg = scan_1x1;
1010             scan_y_cg = scan_1x1;
1011         } else if (trafo_size == 8) {
1012             num_coeff += diag_scan2x2_inv[y_cg_last_sig][x_cg_last_sig] << 4;
1013             scan_x_cg  = diag_scan2x2_x;
1014             scan_y_cg  = diag_scan2x2_y;
1015         } else if (trafo_size == 16) {
1016             num_coeff += diag_scan4x4_inv[y_cg_last_sig][x_cg_last_sig] << 4;
1017             scan_x_cg  = ff_hevc_diag_scan4x4_x;
1018             scan_y_cg  = ff_hevc_diag_scan4x4_y;
1019         } else { // trafo_size == 32
1020             num_coeff += diag_scan8x8_inv[y_cg_last_sig][x_cg_last_sig] << 4;
1021             scan_x_cg  = ff_hevc_diag_scan8x8_x;
1022             scan_y_cg  = ff_hevc_diag_scan8x8_y;
1023         }
1024         break;
1025     }
1026     case SCAN_HORIZ:
1027         scan_x_cg  = horiz_scan2x2_x;
1028         scan_y_cg  = horiz_scan2x2_y;
1029         scan_x_off = horiz_scan4x4_x;
1030         scan_y_off = horiz_scan4x4_y;
1031         num_coeff  = horiz_scan8x8_inv[last_significant_coeff_y][last_significant_coeff_x];
1032         break;
1033     default: //SCAN_VERT
1034         scan_x_cg  = horiz_scan2x2_y;
1035         scan_y_cg  = horiz_scan2x2_x;
1036         scan_x_off = horiz_scan4x4_y;
1037         scan_y_off = horiz_scan4x4_x;
1038         num_coeff  = horiz_scan8x8_inv[last_significant_coeff_x][last_significant_coeff_y];
1039         break;
1040     }
1041     num_coeff++;
1042     num_last_subset = (num_coeff - 1) >> 4;
1043
1044     for (i = num_last_subset; i >= 0; i--) {
1045         int n, m;
1046         int x_cg, y_cg, x_c, y_c;
1047         int implicit_non_zero_coeff = 0;
1048         int64_t trans_coeff_level;
1049         int prev_sig = 0;
1050         int offset   = i << 4;
1051
1052         uint8_t significant_coeff_flag_idx[16];
1053         uint8_t nb_significant_coeff_flag = 0;
1054
1055         x_cg = scan_x_cg[i];
1056         y_cg = scan_y_cg[i];
1057
1058         if (i < num_last_subset && i > 0) {
1059             int ctx_cg = 0;
1060             if (x_cg < (1 << (log2_trafo_size - 2)) - 1)
1061                 ctx_cg += significant_coeff_group_flag[x_cg + 1][y_cg];
1062             if (y_cg < (1 << (log2_trafo_size - 2)) - 1)
1063                 ctx_cg += significant_coeff_group_flag[x_cg][y_cg + 1];
1064
1065             significant_coeff_group_flag[x_cg][y_cg] =
1066                 ff_hevc_significant_coeff_group_flag_decode(s, c_idx, ctx_cg);
1067             implicit_non_zero_coeff = 1;
1068         } else {
1069             significant_coeff_group_flag[x_cg][y_cg] =
1070                 ((x_cg == x_cg_last_sig && y_cg == y_cg_last_sig) ||
1071                  (x_cg == 0 && y_cg == 0));
1072         }
1073
1074         last_scan_pos = num_coeff - offset - 1;
1075
1076         if (i == num_last_subset) {
1077             n_end                         = last_scan_pos - 1;
1078             significant_coeff_flag_idx[0] = last_scan_pos;
1079             nb_significant_coeff_flag     = 1;
1080         } else {
1081             n_end = 15;
1082         }
1083
1084         if (x_cg < ((1 << log2_trafo_size) - 1) >> 2)
1085             prev_sig = significant_coeff_group_flag[x_cg + 1][y_cg];
1086         if (y_cg < ((1 << log2_trafo_size) - 1) >> 2)
1087             prev_sig += significant_coeff_group_flag[x_cg][y_cg + 1] << 1;
1088
1089         for (n = n_end; n >= 0; n--) {
1090             GET_COORD(offset, n);
1091
1092             if (significant_coeff_group_flag[x_cg][y_cg] &&
1093                 (n > 0 || implicit_non_zero_coeff == 0)) {
1094                 if (ff_hevc_significant_coeff_flag_decode(s, c_idx, x_c, y_c,
1095                                                           log2_trafo_size,
1096                                                           scan_idx,
1097                                                           prev_sig) == 1) {
1098                     significant_coeff_flag_idx[nb_significant_coeff_flag] = n;
1099                     nb_significant_coeff_flag++;
1100                     implicit_non_zero_coeff = 0;
1101                 }
1102             } else {
1103                 int last_cg = (x_c == (x_cg << 2) && y_c == (y_cg << 2));
1104                 if (last_cg && implicit_non_zero_coeff && significant_coeff_group_flag[x_cg][y_cg]) {
1105                     significant_coeff_flag_idx[nb_significant_coeff_flag] = n;
1106                     nb_significant_coeff_flag++;
1107                 }
1108             }
1109         }
1110
1111         n_end = nb_significant_coeff_flag;
1112
1113         if (n_end) {
1114             int first_nz_pos_in_cg = 16;
1115             int last_nz_pos_in_cg = -1;
1116             int c_rice_param = 0;
1117             int first_greater1_coeff_idx = -1;
1118             uint8_t coeff_abs_level_greater1_flag[16] = { 0 };
1119             uint16_t coeff_sign_flag;
1120             int sum_abs = 0;
1121             int sign_hidden = 0;
1122
1123             // initialize first elem of coeff_bas_level_greater1_flag
1124             int ctx_set = (i > 0 && c_idx == 0) ? 2 : 0;
1125
1126             if (!(i == num_last_subset) && greater1_ctx == 0)
1127                 ctx_set++;
1128             greater1_ctx      = 1;
1129             last_nz_pos_in_cg = significant_coeff_flag_idx[0];
1130
1131             for (m = 0; m < (n_end > 8 ? 8 : n_end); m++) {
1132                 int n_idx = significant_coeff_flag_idx[m];
1133                 int inc   = (ctx_set << 2) + greater1_ctx;
1134                 coeff_abs_level_greater1_flag[n_idx] =
1135                     ff_hevc_coeff_abs_level_greater1_flag_decode(s, c_idx, inc);
1136                 if (coeff_abs_level_greater1_flag[n_idx]) {
1137                     greater1_ctx = 0;
1138                 } else if (greater1_ctx > 0 && greater1_ctx < 3) {
1139                     greater1_ctx++;
1140                 }
1141
1142                 if (coeff_abs_level_greater1_flag[n_idx] &&
1143                     first_greater1_coeff_idx == -1)
1144                     first_greater1_coeff_idx = n_idx;
1145             }
1146             first_nz_pos_in_cg = significant_coeff_flag_idx[n_end - 1];
1147             sign_hidden        = last_nz_pos_in_cg - first_nz_pos_in_cg >= 4 &&
1148                                  !lc->cu.cu_transquant_bypass_flag;
1149
1150             if (first_greater1_coeff_idx != -1) {
1151                 coeff_abs_level_greater1_flag[first_greater1_coeff_idx] += ff_hevc_coeff_abs_level_greater2_flag_decode(s, c_idx, ctx_set);
1152             }
1153             if (!s->ps.pps->sign_data_hiding_flag || !sign_hidden) {
1154                 coeff_sign_flag = ff_hevc_coeff_sign_flag(s, nb_significant_coeff_flag) << (16 - nb_significant_coeff_flag);
1155             } else {
1156                 coeff_sign_flag = ff_hevc_coeff_sign_flag(s, nb_significant_coeff_flag - 1) << (16 - (nb_significant_coeff_flag - 1));
1157             }
1158
1159             for (m = 0; m < n_end; m++) {
1160                 n = significant_coeff_flag_idx[m];
1161                 GET_COORD(offset, n);
1162                 trans_coeff_level = 1 + coeff_abs_level_greater1_flag[n];
1163                 if (trans_coeff_level == ((m < 8) ?
1164                                           ((n == first_greater1_coeff_idx) ? 3 : 2) : 1)) {
1165                     int last_coeff_abs_level_remaining = ff_hevc_coeff_abs_level_remaining(s, trans_coeff_level, c_rice_param);
1166
1167                     trans_coeff_level += last_coeff_abs_level_remaining;
1168                     if ((trans_coeff_level) > (3 * (1 << c_rice_param)))
1169                         c_rice_param = FFMIN(c_rice_param + 1, 4);
1170                 }
1171                 if (s->ps.pps->sign_data_hiding_flag && sign_hidden) {
1172                     sum_abs += trans_coeff_level;
1173                     if (n == first_nz_pos_in_cg && ((sum_abs & 1) == 1))
1174                         trans_coeff_level = -trans_coeff_level;
1175                 }
1176                 if (coeff_sign_flag >> 15)
1177                     trans_coeff_level = -trans_coeff_level;
1178                 coeff_sign_flag <<= 1;
1179                 if (!lc->cu.cu_transquant_bypass_flag) {
1180                     if (s->ps.sps->scaling_list_enable_flag) {
1181                         if (y_c || x_c || log2_trafo_size < 4) {
1182                             int pos;
1183                             switch (log2_trafo_size) {
1184                             case 3:  pos = (y_c        << 3) +  x_c;       break;
1185                             case 4:  pos = ((y_c >> 1) << 3) + (x_c >> 1); break;
1186                             case 5:  pos = ((y_c >> 2) << 3) + (x_c >> 2); break;
1187                             default: pos = (y_c        << 2) +  x_c;
1188                             }
1189                             scale_m = scale_matrix[pos];
1190                         } else {
1191                             scale_m = dc_scale;
1192                         }
1193                     }
1194                     trans_coeff_level = (trans_coeff_level * (int64_t)scale * (int64_t)scale_m + add) >> shift;
1195                     if(trans_coeff_level < 0) {
1196                         if((~trans_coeff_level) & 0xFffffffffff8000)
1197                             trans_coeff_level = -32768;
1198                     } else {
1199                         if (trans_coeff_level & 0xffffffffffff8000)
1200                             trans_coeff_level = 32767;
1201                     }
1202                 }
1203                 coeffs[y_c * trafo_size + x_c] = trans_coeff_level;
1204             }
1205         }
1206     }
1207
1208     if (lc->cu.cu_transquant_bypass_flag) {
1209         s->hevcdsp.transquant_bypass[log2_trafo_size - 2](dst, coeffs, stride);
1210     } else {
1211         if (transform_skip_flag)
1212             s->hevcdsp.transform_skip(dst, coeffs, stride);
1213         else if (lc->cu.pred_mode == MODE_INTRA && c_idx == 0 &&
1214                  log2_trafo_size == 2)
1215             s->hevcdsp.transform_4x4_luma_add(dst, coeffs, stride);
1216         else
1217             s->hevcdsp.transform_add[log2_trafo_size - 2](dst, coeffs, stride);
1218     }
1219 }
1220
1221 static int hls_transform_unit(HEVCContext *s, int x0, int y0,
1222                               int xBase, int yBase, int cb_xBase, int cb_yBase,
1223                               int log2_cb_size, int log2_trafo_size,
1224                               int blk_idx, int cbf_luma, int cbf_cb, int cbf_cr)
1225 {
1226     HEVCLocalContext *lc = &s->HEVClc;
1227
1228     if (lc->cu.pred_mode == MODE_INTRA) {
1229         int trafo_size = 1 << log2_trafo_size;
1230         ff_hevc_set_neighbour_available(s, x0, y0, trafo_size, trafo_size);
1231
1232         s->hpc.intra_pred[log2_trafo_size - 2](s, x0, y0, 0);
1233         if (log2_trafo_size > 2) {
1234             trafo_size = trafo_size << (s->ps.sps->hshift[1] - 1);
1235             ff_hevc_set_neighbour_available(s, x0, y0, trafo_size, trafo_size);
1236             s->hpc.intra_pred[log2_trafo_size - 3](s, x0, y0, 1);
1237             s->hpc.intra_pred[log2_trafo_size - 3](s, x0, y0, 2);
1238         } else if (blk_idx == 3) {
1239             trafo_size = trafo_size << s->ps.sps->hshift[1];
1240             ff_hevc_set_neighbour_available(s, xBase, yBase,
1241                                             trafo_size, trafo_size);
1242             s->hpc.intra_pred[log2_trafo_size - 2](s, xBase, yBase, 1);
1243             s->hpc.intra_pred[log2_trafo_size - 2](s, xBase, yBase, 2);
1244         }
1245     }
1246
1247     if (cbf_luma || cbf_cb || cbf_cr) {
1248         int scan_idx   = SCAN_DIAG;
1249         int scan_idx_c = SCAN_DIAG;
1250
1251         if (s->ps.pps->cu_qp_delta_enabled_flag && !lc->tu.is_cu_qp_delta_coded) {
1252             lc->tu.cu_qp_delta = ff_hevc_cu_qp_delta_abs(s);
1253             if (lc->tu.cu_qp_delta != 0)
1254                 if (ff_hevc_cu_qp_delta_sign_flag(s) == 1)
1255                     lc->tu.cu_qp_delta = -lc->tu.cu_qp_delta;
1256             lc->tu.is_cu_qp_delta_coded = 1;
1257
1258             if (lc->tu.cu_qp_delta < -(26 + s->ps.sps->qp_bd_offset / 2) ||
1259                 lc->tu.cu_qp_delta >  (25 + s->ps.sps->qp_bd_offset / 2)) {
1260                 av_log(s->avctx, AV_LOG_ERROR,
1261                        "The cu_qp_delta %d is outside the valid range "
1262                        "[%d, %d].\n",
1263                        lc->tu.cu_qp_delta,
1264                        -(26 + s->ps.sps->qp_bd_offset / 2),
1265                         (25 + s->ps.sps->qp_bd_offset / 2));
1266                 return AVERROR_INVALIDDATA;
1267             }
1268
1269             ff_hevc_set_qPy(s, x0, y0, cb_xBase, cb_yBase, log2_cb_size);
1270         }
1271
1272         if (lc->cu.pred_mode == MODE_INTRA && log2_trafo_size < 4) {
1273             if (lc->tu.cur_intra_pred_mode >= 6 &&
1274                 lc->tu.cur_intra_pred_mode <= 14) {
1275                 scan_idx = SCAN_VERT;
1276             } else if (lc->tu.cur_intra_pred_mode >= 22 &&
1277                        lc->tu.cur_intra_pred_mode <= 30) {
1278                 scan_idx = SCAN_HORIZ;
1279             }
1280
1281             if (lc->pu.intra_pred_mode_c >=  6 &&
1282                 lc->pu.intra_pred_mode_c <= 14) {
1283                 scan_idx_c = SCAN_VERT;
1284             } else if (lc->pu.intra_pred_mode_c >= 22 &&
1285                        lc->pu.intra_pred_mode_c <= 30) {
1286                 scan_idx_c = SCAN_HORIZ;
1287             }
1288         }
1289
1290         if (cbf_luma)
1291             hls_residual_coding(s, x0, y0, log2_trafo_size, scan_idx, 0);
1292         if (log2_trafo_size > 2) {
1293             if (cbf_cb)
1294                 hls_residual_coding(s, x0, y0, log2_trafo_size - 1, scan_idx_c, 1);
1295             if (cbf_cr)
1296                 hls_residual_coding(s, x0, y0, log2_trafo_size - 1, scan_idx_c, 2);
1297         } else if (blk_idx == 3) {
1298             if (cbf_cb)
1299                 hls_residual_coding(s, xBase, yBase, log2_trafo_size, scan_idx_c, 1);
1300             if (cbf_cr)
1301                 hls_residual_coding(s, xBase, yBase, log2_trafo_size, scan_idx_c, 2);
1302         }
1303     }
1304     return 0;
1305 }
1306
1307 static void set_deblocking_bypass(HEVCContext *s, int x0, int y0, int log2_cb_size)
1308 {
1309     int cb_size          = 1 << log2_cb_size;
1310     int log2_min_pu_size = s->ps.sps->log2_min_pu_size;
1311
1312     int min_pu_width     = s->ps.sps->min_pu_width;
1313     int x_end = FFMIN(x0 + cb_size, s->ps.sps->width);
1314     int y_end = FFMIN(y0 + cb_size, s->ps.sps->height);
1315     int i, j;
1316
1317     for (j = (y0 >> log2_min_pu_size); j < (y_end >> log2_min_pu_size); j++)
1318         for (i = (x0 >> log2_min_pu_size); i < (x_end >> log2_min_pu_size); i++)
1319             s->is_pcm[i + j * min_pu_width] = 2;
1320 }
1321
1322 static int hls_transform_tree(HEVCContext *s, int x0, int y0,
1323                               int xBase, int yBase, int cb_xBase, int cb_yBase,
1324                               int log2_cb_size, int log2_trafo_size,
1325                               int trafo_depth, int blk_idx,
1326                               int cbf_cb, int cbf_cr)
1327 {
1328     HEVCLocalContext *lc = &s->HEVClc;
1329     uint8_t split_transform_flag;
1330     int ret;
1331
1332     if (lc->cu.intra_split_flag) {
1333         if (trafo_depth == 1)
1334             lc->tu.cur_intra_pred_mode = lc->pu.intra_pred_mode[blk_idx];
1335     } else {
1336         lc->tu.cur_intra_pred_mode = lc->pu.intra_pred_mode[0];
1337     }
1338
1339     if (log2_trafo_size <= s->ps.sps->log2_max_trafo_size &&
1340         log2_trafo_size >  s->ps.sps->log2_min_tb_size    &&
1341         trafo_depth     < lc->cu.max_trafo_depth       &&
1342         !(lc->cu.intra_split_flag && trafo_depth == 0)) {
1343         split_transform_flag = ff_hevc_split_transform_flag_decode(s, log2_trafo_size);
1344     } else {
1345         int inter_split = s->ps.sps->max_transform_hierarchy_depth_inter == 0 &&
1346                           lc->cu.pred_mode == MODE_INTER &&
1347                           lc->cu.part_mode != PART_2Nx2N &&
1348                           trafo_depth == 0;
1349
1350         split_transform_flag = log2_trafo_size > s->ps.sps->log2_max_trafo_size ||
1351                                (lc->cu.intra_split_flag && trafo_depth == 0) ||
1352                                inter_split;
1353     }
1354
1355     if (log2_trafo_size > 2 && (trafo_depth == 0 || cbf_cb))
1356         cbf_cb = ff_hevc_cbf_cb_cr_decode(s, trafo_depth);
1357     else if (log2_trafo_size > 2 || trafo_depth == 0)
1358         cbf_cb = 0;
1359     if (log2_trafo_size > 2 && (trafo_depth == 0 || cbf_cr))
1360         cbf_cr = ff_hevc_cbf_cb_cr_decode(s, trafo_depth);
1361     else if (log2_trafo_size > 2 || trafo_depth == 0)
1362         cbf_cr = 0;
1363
1364     if (split_transform_flag) {
1365         const int trafo_size_split = 1 << (log2_trafo_size - 1);
1366         const int x1 = x0 + trafo_size_split;
1367         const int y1 = y0 + trafo_size_split;
1368
1369 #define SUBDIVIDE(x, y, idx)                                                    \
1370 do {                                                                            \
1371     ret = hls_transform_tree(s, x, y, x0, y0, cb_xBase, cb_yBase, log2_cb_size, \
1372                              log2_trafo_size - 1, trafo_depth + 1, idx,         \
1373                              cbf_cb, cbf_cr);                                   \
1374     if (ret < 0)                                                                \
1375         return ret;                                                             \
1376 } while (0)
1377
1378         SUBDIVIDE(x0, y0, 0);
1379         SUBDIVIDE(x1, y0, 1);
1380         SUBDIVIDE(x0, y1, 2);
1381         SUBDIVIDE(x1, y1, 3);
1382
1383 #undef SUBDIVIDE
1384     } else {
1385         int min_tu_size      = 1 << s->ps.sps->log2_min_tb_size;
1386         int log2_min_tu_size = s->ps.sps->log2_min_tb_size;
1387         int min_tu_width     = s->ps.sps->min_tb_width;
1388         int cbf_luma         = 1;
1389
1390         if (lc->cu.pred_mode == MODE_INTRA || trafo_depth != 0 ||
1391             cbf_cb || cbf_cr)
1392             cbf_luma = ff_hevc_cbf_luma_decode(s, trafo_depth);
1393
1394         ret = hls_transform_unit(s, x0, y0, xBase, yBase, cb_xBase, cb_yBase,
1395                                  log2_cb_size, log2_trafo_size,
1396                                  blk_idx, cbf_luma, cbf_cb, cbf_cr);
1397         if (ret < 0)
1398             return ret;
1399         // TODO: store cbf_luma somewhere else
1400         if (cbf_luma) {
1401             int i, j;
1402             for (i = 0; i < (1 << log2_trafo_size); i += min_tu_size)
1403                 for (j = 0; j < (1 << log2_trafo_size); j += min_tu_size) {
1404                     int x_tu = (x0 + j) >> log2_min_tu_size;
1405                     int y_tu = (y0 + i) >> log2_min_tu_size;
1406                     s->cbf_luma[y_tu * min_tu_width + x_tu] = 1;
1407                 }
1408         }
1409         if (!s->sh.disable_deblocking_filter_flag) {
1410             ff_hevc_deblocking_boundary_strengths(s, x0, y0, log2_trafo_size);
1411             if (s->ps.pps->transquant_bypass_enable_flag &&
1412                 lc->cu.cu_transquant_bypass_flag)
1413                 set_deblocking_bypass(s, x0, y0, log2_trafo_size);
1414         }
1415     }
1416     return 0;
1417 }
1418
1419 static int hls_pcm_sample(HEVCContext *s, int x0, int y0, int log2_cb_size)
1420 {
1421     //TODO: non-4:2:0 support
1422     HEVCLocalContext *lc = &s->HEVClc;
1423     GetBitContext gb;
1424     int cb_size   = 1 << log2_cb_size;
1425     int stride0   = s->frame->linesize[0];
1426     uint8_t *dst0 = &s->frame->data[0][y0 * stride0 + (x0 << s->ps.sps->pixel_shift)];
1427     int   stride1 = s->frame->linesize[1];
1428     uint8_t *dst1 = &s->frame->data[1][(y0 >> s->ps.sps->vshift[1]) * stride1 + ((x0 >> s->ps.sps->hshift[1]) << s->ps.sps->pixel_shift)];
1429     int   stride2 = s->frame->linesize[2];
1430     uint8_t *dst2 = &s->frame->data[2][(y0 >> s->ps.sps->vshift[2]) * stride2 + ((x0 >> s->ps.sps->hshift[2]) << s->ps.sps->pixel_shift)];
1431
1432     int length         = cb_size * cb_size * s->ps.sps->pcm.bit_depth + ((cb_size * cb_size) >> 1) * s->ps.sps->pcm.bit_depth_chroma;
1433     const uint8_t *pcm = skip_bytes(&lc->cc, (length + 7) >> 3);
1434     int ret;
1435
1436     if (!s->sh.disable_deblocking_filter_flag)
1437         ff_hevc_deblocking_boundary_strengths(s, x0, y0, log2_cb_size);
1438
1439     ret = init_get_bits(&gb, pcm, length);
1440     if (ret < 0)
1441         return ret;
1442
1443     s->hevcdsp.put_pcm(dst0, stride0, cb_size,     &gb, s->ps.sps->pcm.bit_depth);
1444     s->hevcdsp.put_pcm(dst1, stride1, cb_size / 2, &gb, s->ps.sps->pcm.bit_depth_chroma);
1445     s->hevcdsp.put_pcm(dst2, stride2, cb_size / 2, &gb, s->ps.sps->pcm.bit_depth_chroma);
1446     return 0;
1447 }
1448
1449 static void hls_mvd_coding(HEVCContext *s, int x0, int y0, int log2_cb_size)
1450 {
1451     HEVCLocalContext *lc = &s->HEVClc;
1452     int x = ff_hevc_abs_mvd_greater0_flag_decode(s);
1453     int y = ff_hevc_abs_mvd_greater0_flag_decode(s);
1454
1455     if (x)
1456         x += ff_hevc_abs_mvd_greater1_flag_decode(s);
1457     if (y)
1458         y += ff_hevc_abs_mvd_greater1_flag_decode(s);
1459
1460     switch (x) {
1461     case 2: lc->pu.mvd.x = ff_hevc_mvd_decode(s);           break;
1462     case 1: lc->pu.mvd.x = ff_hevc_mvd_sign_flag_decode(s); break;
1463     case 0: lc->pu.mvd.x = 0;                               break;
1464     }
1465
1466     switch (y) {
1467     case 2: lc->pu.mvd.y = ff_hevc_mvd_decode(s);           break;
1468     case 1: lc->pu.mvd.y = ff_hevc_mvd_sign_flag_decode(s); break;
1469     case 0: lc->pu.mvd.y = 0;                               break;
1470     }
1471 }
1472
1473 /**
1474  * 8.5.3.2.2.1 Luma sample interpolation process
1475  *
1476  * @param s HEVC decoding context
1477  * @param dst target buffer for block data at block position
1478  * @param dststride stride of the dst buffer
1479  * @param ref reference picture buffer at origin (0, 0)
1480  * @param mv motion vector (relative to block position) to get pixel data from
1481  * @param x_off horizontal position of block from origin (0, 0)
1482  * @param y_off vertical position of block from origin (0, 0)
1483  * @param block_w width of block
1484  * @param block_h height of block
1485  */
1486 static void luma_mc(HEVCContext *s, int16_t *dst, ptrdiff_t dststride,
1487                     AVFrame *ref, const Mv *mv, int x_off, int y_off,
1488                     int block_w, int block_h, int pred_idx)
1489 {
1490     HEVCLocalContext *lc = &s->HEVClc;
1491     uint8_t *src         = ref->data[0];
1492     ptrdiff_t srcstride  = ref->linesize[0];
1493     int pic_width        = s->ps.sps->width;
1494     int pic_height       = s->ps.sps->height;
1495
1496     int mx         = mv->x & 3;
1497     int my         = mv->y & 3;
1498     int extra_left = ff_hevc_qpel_extra_before[mx];
1499     int extra_top  = ff_hevc_qpel_extra_before[my];
1500
1501     x_off += mv->x >> 2;
1502     y_off += mv->y >> 2;
1503     src   += y_off * srcstride + (x_off * (1 << s->ps.sps->pixel_shift));
1504
1505     if (x_off < extra_left || y_off < extra_top ||
1506         x_off >= pic_width - block_w - ff_hevc_qpel_extra_after[mx] ||
1507         y_off >= pic_height - block_h - ff_hevc_qpel_extra_after[my]) {
1508         const int edge_emu_stride = EDGE_EMU_BUFFER_STRIDE << s->ps.sps->pixel_shift;
1509         int offset = extra_top * srcstride + (extra_left << s->ps.sps->pixel_shift);
1510         int buf_offset = extra_top *
1511                          edge_emu_stride + (extra_left << s->ps.sps->pixel_shift);
1512
1513         s->vdsp.emulated_edge_mc(lc->edge_emu_buffer, src - offset,
1514                                  edge_emu_stride, srcstride,
1515                                  block_w + ff_hevc_qpel_extra[mx],
1516                                  block_h + ff_hevc_qpel_extra[my],
1517                                  x_off - extra_left, y_off - extra_top,
1518                                  pic_width, pic_height);
1519         src = lc->edge_emu_buffer + buf_offset;
1520         srcstride = edge_emu_stride;
1521     }
1522     s->hevcdsp.put_hevc_qpel[!!my][!!mx][pred_idx](dst, dststride, src, srcstride,
1523                                                    block_h, mx, my, lc->mc_buffer);
1524 }
1525
1526 /**
1527  * 8.5.3.2.2.2 Chroma sample interpolation process
1528  *
1529  * @param s HEVC decoding context
1530  * @param dst1 target buffer for block data at block position (U plane)
1531  * @param dst2 target buffer for block data at block position (V plane)
1532  * @param dststride stride of the dst1 and dst2 buffers
1533  * @param ref reference picture buffer at origin (0, 0)
1534  * @param mv motion vector (relative to block position) to get pixel data from
1535  * @param x_off horizontal position of block from origin (0, 0)
1536  * @param y_off vertical position of block from origin (0, 0)
1537  * @param block_w width of block
1538  * @param block_h height of block
1539  */
1540 static void chroma_mc(HEVCContext *s, int16_t *dst1, int16_t *dst2,
1541                       ptrdiff_t dststride, AVFrame *ref, const Mv *mv,
1542                       int x_off, int y_off, int block_w, int block_h, int pred_idx)
1543 {
1544     HEVCLocalContext *lc = &s->HEVClc;
1545     uint8_t *src1        = ref->data[1];
1546     uint8_t *src2        = ref->data[2];
1547     ptrdiff_t src1stride = ref->linesize[1];
1548     ptrdiff_t src2stride = ref->linesize[2];
1549     int pic_width        = s->ps.sps->width >> 1;
1550     int pic_height       = s->ps.sps->height >> 1;
1551
1552     int mx = mv->x & 7;
1553     int my = mv->y & 7;
1554
1555     x_off += mv->x >> 3;
1556     y_off += mv->y >> 3;
1557     src1  += y_off * src1stride + (x_off * (1 << s->ps.sps->pixel_shift));
1558     src2  += y_off * src2stride + (x_off * (1 << s->ps.sps->pixel_shift));
1559
1560     if (x_off < EPEL_EXTRA_BEFORE || y_off < EPEL_EXTRA_AFTER ||
1561         x_off >= pic_width - block_w - EPEL_EXTRA_AFTER ||
1562         y_off >= pic_height - block_h - EPEL_EXTRA_AFTER) {
1563         const int edge_emu_stride = EDGE_EMU_BUFFER_STRIDE << s->ps.sps->pixel_shift;
1564         int offset1 = EPEL_EXTRA_BEFORE * (src1stride + (1 << s->ps.sps->pixel_shift));
1565         int buf_offset1 = EPEL_EXTRA_BEFORE *
1566                           (edge_emu_stride + (1 << s->ps.sps->pixel_shift));
1567         int offset2 = EPEL_EXTRA_BEFORE * (src2stride + (1 << s->ps.sps->pixel_shift));
1568         int buf_offset2 = EPEL_EXTRA_BEFORE *
1569                           (edge_emu_stride + (1 << s->ps.sps->pixel_shift));
1570
1571         s->vdsp.emulated_edge_mc(lc->edge_emu_buffer, src1 - offset1,
1572                                  edge_emu_stride, src1stride,
1573                                  block_w + EPEL_EXTRA, block_h + EPEL_EXTRA,
1574                                  x_off - EPEL_EXTRA_BEFORE,
1575                                  y_off - EPEL_EXTRA_BEFORE,
1576                                  pic_width, pic_height);
1577
1578         src1 = lc->edge_emu_buffer + buf_offset1;
1579         src1stride = edge_emu_stride;
1580         s->hevcdsp.put_hevc_epel[!!my][!!mx][pred_idx](dst1, dststride, src1, src1stride,
1581                                                        block_h, mx, my, lc->mc_buffer);
1582
1583         s->vdsp.emulated_edge_mc(lc->edge_emu_buffer, src2 - offset2,
1584                                  edge_emu_stride, src2stride,
1585                                  block_w + EPEL_EXTRA, block_h + EPEL_EXTRA,
1586                                  x_off - EPEL_EXTRA_BEFORE,
1587                                  y_off - EPEL_EXTRA_BEFORE,
1588                                  pic_width, pic_height);
1589         src2 = lc->edge_emu_buffer + buf_offset2;
1590         src2stride = edge_emu_stride;
1591
1592         s->hevcdsp.put_hevc_epel[!!my][!!mx][pred_idx](dst2, dststride, src2, src2stride,
1593                                                        block_h, mx, my, lc->mc_buffer);
1594     } else {
1595         s->hevcdsp.put_hevc_epel[!!my][!!mx][pred_idx](dst1, dststride, src1, src1stride,
1596                                                        block_h, mx, my, lc->mc_buffer);
1597         s->hevcdsp.put_hevc_epel[!!my][!!mx][pred_idx](dst2, dststride, src2, src2stride,
1598                                                        block_h, mx, my, lc->mc_buffer);
1599     }
1600 }
1601
1602 static void hevc_await_progress(HEVCContext *s, HEVCFrame *ref,
1603                                 const Mv *mv, int y0, int height)
1604 {
1605     int y = (mv->y >> 2) + y0 + height + 9;
1606     ff_thread_await_progress(&ref->tf, y, 0);
1607 }
1608
1609 static void hevc_luma_mv_mpv_mode(HEVCContext *s, int x0, int y0, int nPbW,
1610                                   int nPbH, int log2_cb_size, int part_idx,
1611                                   int merge_idx, MvField *mv)
1612 {
1613     HEVCLocalContext *lc             = &s->HEVClc;
1614     enum InterPredIdc inter_pred_idc = PRED_L0;
1615     int mvp_flag;
1616
1617     ff_hevc_set_neighbour_available(s, x0, y0, nPbW, nPbH);
1618     if (s->sh.slice_type == B_SLICE)
1619         inter_pred_idc = ff_hevc_inter_pred_idc_decode(s, nPbW, nPbH);
1620
1621     if (inter_pred_idc != PRED_L1) {
1622         if (s->sh.nb_refs[L0])
1623             mv->ref_idx[0]= ff_hevc_ref_idx_lx_decode(s, s->sh.nb_refs[L0]);
1624
1625         mv->pred_flag[0] = 1;
1626         hls_mvd_coding(s, x0, y0, 0);
1627         mvp_flag = ff_hevc_mvp_lx_flag_decode(s);
1628         ff_hevc_luma_mv_mvp_mode(s, x0, y0, nPbW, nPbH, log2_cb_size,
1629                                  part_idx, merge_idx, mv, mvp_flag, 0);
1630         mv->mv[0].x += lc->pu.mvd.x;
1631         mv->mv[0].y += lc->pu.mvd.y;
1632     }
1633
1634     if (inter_pred_idc != PRED_L0) {
1635         if (s->sh.nb_refs[L1])
1636             mv->ref_idx[1]= ff_hevc_ref_idx_lx_decode(s, s->sh.nb_refs[L1]);
1637
1638         if (s->sh.mvd_l1_zero_flag == 1 && inter_pred_idc == PRED_BI) {
1639             AV_ZERO32(&lc->pu.mvd);
1640         } else {
1641             hls_mvd_coding(s, x0, y0, 1);
1642         }
1643
1644         mv->pred_flag[1] = 1;
1645         mvp_flag = ff_hevc_mvp_lx_flag_decode(s);
1646         ff_hevc_luma_mv_mvp_mode(s, x0, y0, nPbW, nPbH, log2_cb_size,
1647                                  part_idx, merge_idx, mv, mvp_flag, 1);
1648         mv->mv[1].x += lc->pu.mvd.x;
1649         mv->mv[1].y += lc->pu.mvd.y;
1650     }
1651 }
1652
1653 static void hls_prediction_unit(HEVCContext *s, int x0, int y0,
1654                                 int nPbW, int nPbH,
1655                                 int log2_cb_size, int partIdx)
1656 {
1657     static const int pred_indices[] = {
1658         [4] = 0, [8] = 1, [12] = 2, [16] = 3, [24] = 4, [32] = 5, [48] = 6, [64] = 7,
1659     };
1660     const int pred_idx = pred_indices[nPbW];
1661
1662 #define POS(c_idx, x, y)                                                              \
1663     &s->frame->data[c_idx][((y) >> s->ps.sps->vshift[c_idx]) * s->frame->linesize[c_idx] + \
1664                            (((x) >> s->ps.sps->hshift[c_idx]) << s->ps.sps->pixel_shift)]
1665     HEVCLocalContext *lc = &s->HEVClc;
1666     int merge_idx = 0;
1667     struct MvField current_mv = {{{ 0 }}};
1668
1669     int min_pu_width = s->ps.sps->min_pu_width;
1670
1671     MvField *tab_mvf = s->ref->tab_mvf;
1672     RefPicList  *refPicList = s->ref->refPicList;
1673     HEVCFrame *ref0, *ref1;
1674
1675     int tmpstride = MAX_PB_SIZE * sizeof(int16_t);
1676
1677     uint8_t *dst0 = POS(0, x0, y0);
1678     uint8_t *dst1 = POS(1, x0, y0);
1679     uint8_t *dst2 = POS(2, x0, y0);
1680     int log2_min_cb_size = s->ps.sps->log2_min_cb_size;
1681     int min_cb_width     = s->ps.sps->min_cb_width;
1682     int x_cb             = x0 >> log2_min_cb_size;
1683     int y_cb             = y0 >> log2_min_cb_size;
1684     int x_pu, y_pu;
1685     int i, j;
1686
1687     int skip_flag = SAMPLE_CTB(s->skip_flag, x_cb, y_cb);
1688
1689     if (!skip_flag)
1690         lc->pu.merge_flag = ff_hevc_merge_flag_decode(s);
1691
1692     if (skip_flag || lc->pu.merge_flag) {
1693         if (s->sh.max_num_merge_cand > 1)
1694             merge_idx = ff_hevc_merge_idx_decode(s);
1695         else
1696             merge_idx = 0;
1697
1698         ff_hevc_luma_mv_merge_mode(s, x0, y0, nPbW, nPbH, log2_cb_size,
1699                                    partIdx, merge_idx, &current_mv);
1700     } else {
1701         hevc_luma_mv_mpv_mode(s, x0, y0, nPbW, nPbH, log2_cb_size,
1702                               partIdx, merge_idx, &current_mv);
1703     }
1704
1705     x_pu = x0 >> s->ps.sps->log2_min_pu_size;
1706     y_pu = y0 >> s->ps.sps->log2_min_pu_size;
1707
1708     for (j = 0; j < nPbH >> s->ps.sps->log2_min_pu_size; j++)
1709         for (i = 0; i < nPbW >> s->ps.sps->log2_min_pu_size; i++)
1710             tab_mvf[(y_pu + j) * min_pu_width + x_pu + i] = current_mv;
1711
1712     if (current_mv.pred_flag[0]) {
1713         ref0 = refPicList[0].ref[current_mv.ref_idx[0]];
1714         if (!ref0)
1715             return;
1716         hevc_await_progress(s, ref0, &current_mv.mv[0], y0, nPbH);
1717     }
1718     if (current_mv.pred_flag[1]) {
1719         ref1 = refPicList[1].ref[current_mv.ref_idx[1]];
1720         if (!ref1)
1721             return;
1722         hevc_await_progress(s, ref1, &current_mv.mv[1], y0, nPbH);
1723     }
1724
1725     if (current_mv.pred_flag[0] && !current_mv.pred_flag[1]) {
1726         DECLARE_ALIGNED(16, int16_t,  tmp[MAX_PB_SIZE * MAX_PB_SIZE]);
1727         DECLARE_ALIGNED(16, int16_t, tmp2[MAX_PB_SIZE * MAX_PB_SIZE]);
1728
1729         luma_mc(s, tmp, tmpstride, ref0->frame,
1730                 &current_mv.mv[0], x0, y0, nPbW, nPbH, pred_idx);
1731
1732         if ((s->sh.slice_type == P_SLICE && s->ps.pps->weighted_pred_flag) ||
1733             (s->sh.slice_type == B_SLICE && s->ps.pps->weighted_bipred_flag)) {
1734             s->hevcdsp.weighted_pred[pred_idx](s->sh.luma_log2_weight_denom,
1735                                                s->sh.luma_weight_l0[current_mv.ref_idx[0]],
1736                                                s->sh.luma_offset_l0[current_mv.ref_idx[0]],
1737                                                dst0, s->frame->linesize[0], tmp,
1738                                                tmpstride, nPbH);
1739         } else {
1740             s->hevcdsp.put_unweighted_pred[pred_idx](dst0, s->frame->linesize[0], tmp, tmpstride, nPbH);
1741         }
1742         chroma_mc(s, tmp, tmp2, tmpstride, ref0->frame,
1743                   &current_mv.mv[0], x0 / 2, y0 / 2, nPbW / 2, nPbH / 2, pred_idx);
1744
1745         if ((s->sh.slice_type == P_SLICE && s->ps.pps->weighted_pred_flag) ||
1746             (s->sh.slice_type == B_SLICE && s->ps.pps->weighted_bipred_flag)) {
1747             s->hevcdsp.weighted_pred_chroma[pred_idx](s->sh.chroma_log2_weight_denom,
1748                                                       s->sh.chroma_weight_l0[current_mv.ref_idx[0]][0],
1749                                                       s->sh.chroma_offset_l0[current_mv.ref_idx[0]][0],
1750                                                       dst1, s->frame->linesize[1], tmp, tmpstride,
1751                                                       nPbH / 2);
1752             s->hevcdsp.weighted_pred_chroma[pred_idx](s->sh.chroma_log2_weight_denom,
1753                                                       s->sh.chroma_weight_l0[current_mv.ref_idx[0]][1],
1754                                                       s->sh.chroma_offset_l0[current_mv.ref_idx[0]][1],
1755                                                       dst2, s->frame->linesize[2], tmp2, tmpstride,
1756                                                       nPbH / 2);
1757         } else {
1758             s->hevcdsp.put_unweighted_pred_chroma[pred_idx](dst1, s->frame->linesize[1], tmp,  tmpstride, nPbH / 2);
1759             s->hevcdsp.put_unweighted_pred_chroma[pred_idx](dst2, s->frame->linesize[2], tmp2, tmpstride, nPbH / 2);
1760         }
1761     } else if (!current_mv.pred_flag[0] && current_mv.pred_flag[1]) {
1762         DECLARE_ALIGNED(16, int16_t, tmp [MAX_PB_SIZE * MAX_PB_SIZE]);
1763         DECLARE_ALIGNED(16, int16_t, tmp2[MAX_PB_SIZE * MAX_PB_SIZE]);
1764
1765         luma_mc(s, tmp, tmpstride, ref1->frame,
1766                 &current_mv.mv[1], x0, y0, nPbW, nPbH, pred_idx);
1767
1768         if ((s->sh.slice_type == P_SLICE && s->ps.pps->weighted_pred_flag) ||
1769             (s->sh.slice_type == B_SLICE && s->ps.pps->weighted_bipred_flag)) {
1770             s->hevcdsp.weighted_pred[pred_idx](s->sh.luma_log2_weight_denom,
1771                                                s->sh.luma_weight_l1[current_mv.ref_idx[1]],
1772                                                s->sh.luma_offset_l1[current_mv.ref_idx[1]],
1773                                                dst0, s->frame->linesize[0], tmp, tmpstride,
1774                                                nPbH);
1775         } else {
1776             s->hevcdsp.put_unweighted_pred[pred_idx](dst0, s->frame->linesize[0], tmp, tmpstride, nPbH);
1777         }
1778
1779         chroma_mc(s, tmp, tmp2, tmpstride, ref1->frame,
1780                   &current_mv.mv[1], x0 / 2, y0 / 2, nPbW / 2, nPbH / 2, pred_idx);
1781
1782         if ((s->sh.slice_type == P_SLICE && s->ps.pps->weighted_pred_flag) ||
1783             (s->sh.slice_type == B_SLICE && s->ps.pps->weighted_bipred_flag)) {
1784             s->hevcdsp.weighted_pred_chroma[pred_idx](s->sh.chroma_log2_weight_denom,
1785                                                       s->sh.chroma_weight_l1[current_mv.ref_idx[1]][0],
1786                                                       s->sh.chroma_offset_l1[current_mv.ref_idx[1]][0],
1787                                                       dst1, s->frame->linesize[1], tmp, tmpstride, nPbH/2);
1788             s->hevcdsp.weighted_pred_chroma[pred_idx](s->sh.chroma_log2_weight_denom,
1789                                                       s->sh.chroma_weight_l1[current_mv.ref_idx[1]][1],
1790                                                       s->sh.chroma_offset_l1[current_mv.ref_idx[1]][1],
1791                                                       dst2, s->frame->linesize[2], tmp2, tmpstride, nPbH/2);
1792         } else {
1793             s->hevcdsp.put_unweighted_pred_chroma[pred_idx](dst1, s->frame->linesize[1], tmp,  tmpstride, nPbH / 2);
1794             s->hevcdsp.put_unweighted_pred_chroma[pred_idx](dst2, s->frame->linesize[2], tmp2, tmpstride, nPbH / 2);
1795         }
1796     } else if (current_mv.pred_flag[0] && current_mv.pred_flag[1]) {
1797         DECLARE_ALIGNED(16, int16_t, tmp [MAX_PB_SIZE * MAX_PB_SIZE]);
1798         DECLARE_ALIGNED(16, int16_t, tmp2[MAX_PB_SIZE * MAX_PB_SIZE]);
1799         DECLARE_ALIGNED(16, int16_t, tmp3[MAX_PB_SIZE * MAX_PB_SIZE]);
1800         DECLARE_ALIGNED(16, int16_t, tmp4[MAX_PB_SIZE * MAX_PB_SIZE]);
1801
1802         luma_mc(s, tmp, tmpstride, ref0->frame,
1803                 &current_mv.mv[0], x0, y0, nPbW, nPbH, pred_idx);
1804         luma_mc(s, tmp2, tmpstride, ref1->frame,
1805                 &current_mv.mv[1], x0, y0, nPbW, nPbH, pred_idx);
1806
1807         if ((s->sh.slice_type == P_SLICE && s->ps.pps->weighted_pred_flag) ||
1808             (s->sh.slice_type == B_SLICE && s->ps.pps->weighted_bipred_flag)) {
1809             s->hevcdsp.weighted_pred_avg[pred_idx](s->sh.luma_log2_weight_denom,
1810                                                    s->sh.luma_weight_l0[current_mv.ref_idx[0]],
1811                                                    s->sh.luma_weight_l1[current_mv.ref_idx[1]],
1812                                                    s->sh.luma_offset_l0[current_mv.ref_idx[0]],
1813                                                    s->sh.luma_offset_l1[current_mv.ref_idx[1]],
1814                                                    dst0, s->frame->linesize[0],
1815                                                    tmp, tmp2, tmpstride, nPbH);
1816         } else {
1817             s->hevcdsp.put_unweighted_pred_avg[pred_idx](dst0, s->frame->linesize[0],
1818                                                          tmp, tmp2, tmpstride, nPbH);
1819         }
1820
1821         chroma_mc(s, tmp, tmp2, tmpstride, ref0->frame,
1822                   &current_mv.mv[0], x0 / 2, y0 / 2, nPbW / 2, nPbH / 2, pred_idx);
1823         chroma_mc(s, tmp3, tmp4, tmpstride, ref1->frame,
1824                   &current_mv.mv[1], x0 / 2, y0 / 2, nPbW / 2, nPbH / 2, pred_idx);
1825
1826         if ((s->sh.slice_type == P_SLICE && s->ps.pps->weighted_pred_flag) ||
1827             (s->sh.slice_type == B_SLICE && s->ps.pps->weighted_bipred_flag)) {
1828             s->hevcdsp.weighted_pred_avg_chroma[pred_idx](s->sh.chroma_log2_weight_denom,
1829                                                           s->sh.chroma_weight_l0[current_mv.ref_idx[0]][0],
1830                                                           s->sh.chroma_weight_l1[current_mv.ref_idx[1]][0],
1831                                                           s->sh.chroma_offset_l0[current_mv.ref_idx[0]][0],
1832                                                           s->sh.chroma_offset_l1[current_mv.ref_idx[1]][0],
1833                                                           dst1, s->frame->linesize[1], tmp, tmp3,
1834                                                           tmpstride, nPbH / 2);
1835             s->hevcdsp.weighted_pred_avg_chroma[pred_idx](s->sh.chroma_log2_weight_denom,
1836                                                           s->sh.chroma_weight_l0[current_mv.ref_idx[0]][1],
1837                                                           s->sh.chroma_weight_l1[current_mv.ref_idx[1]][1],
1838                                                           s->sh.chroma_offset_l0[current_mv.ref_idx[0]][1],
1839                                                           s->sh.chroma_offset_l1[current_mv.ref_idx[1]][1],
1840                                                           dst2, s->frame->linesize[2], tmp2, tmp4,
1841                                                           tmpstride, nPbH / 2);
1842         } else {
1843             s->hevcdsp.put_unweighted_pred_avg_chroma[pred_idx](dst1, s->frame->linesize[1], tmp, tmp3,  tmpstride, nPbH/2);
1844             s->hevcdsp.put_unweighted_pred_avg_chroma[pred_idx](dst2, s->frame->linesize[2], tmp2, tmp4, tmpstride, nPbH/2);
1845         }
1846     }
1847 }
1848
1849 /**
1850  * 8.4.1
1851  */
1852 static int luma_intra_pred_mode(HEVCContext *s, int x0, int y0, int pu_size,
1853                                 int prev_intra_luma_pred_flag)
1854 {
1855     HEVCLocalContext *lc = &s->HEVClc;
1856     int x_pu             = x0 >> s->ps.sps->log2_min_pu_size;
1857     int y_pu             = y0 >> s->ps.sps->log2_min_pu_size;
1858     int min_pu_width     = s->ps.sps->min_pu_width;
1859     int size_in_pus      = pu_size >> s->ps.sps->log2_min_pu_size;
1860     int x0b              = x0 & ((1 << s->ps.sps->log2_ctb_size) - 1);
1861     int y0b              = y0 & ((1 << s->ps.sps->log2_ctb_size) - 1);
1862
1863     int cand_up   = (lc->ctb_up_flag || y0b) ?
1864                     s->tab_ipm[(y_pu - 1) * min_pu_width + x_pu] : INTRA_DC;
1865     int cand_left = (lc->ctb_left_flag || x0b) ?
1866                     s->tab_ipm[y_pu * min_pu_width + x_pu - 1]   : INTRA_DC;
1867
1868     int y_ctb = (y0 >> (s->ps.sps->log2_ctb_size)) << (s->ps.sps->log2_ctb_size);
1869
1870     MvField *tab_mvf = s->ref->tab_mvf;
1871     int intra_pred_mode;
1872     int candidate[3];
1873     int i, j;
1874
1875     // intra_pred_mode prediction does not cross vertical CTB boundaries
1876     if ((y0 - 1) < y_ctb)
1877         cand_up = INTRA_DC;
1878
1879     if (cand_left == cand_up) {
1880         if (cand_left < 2) {
1881             candidate[0] = INTRA_PLANAR;
1882             candidate[1] = INTRA_DC;
1883             candidate[2] = INTRA_ANGULAR_26;
1884         } else {
1885             candidate[0] = cand_left;
1886             candidate[1] = 2 + ((cand_left - 2 - 1 + 32) & 31);
1887             candidate[2] = 2 + ((cand_left - 2 + 1) & 31);
1888         }
1889     } else {
1890         candidate[0] = cand_left;
1891         candidate[1] = cand_up;
1892         if (candidate[0] != INTRA_PLANAR && candidate[1] != INTRA_PLANAR) {
1893             candidate[2] = INTRA_PLANAR;
1894         } else if (candidate[0] != INTRA_DC && candidate[1] != INTRA_DC) {
1895             candidate[2] = INTRA_DC;
1896         } else {
1897             candidate[2] = INTRA_ANGULAR_26;
1898         }
1899     }
1900
1901     if (prev_intra_luma_pred_flag) {
1902         intra_pred_mode = candidate[lc->pu.mpm_idx];
1903     } else {
1904         if (candidate[0] > candidate[1])
1905             FFSWAP(uint8_t, candidate[0], candidate[1]);
1906         if (candidate[0] > candidate[2])
1907             FFSWAP(uint8_t, candidate[0], candidate[2]);
1908         if (candidate[1] > candidate[2])
1909             FFSWAP(uint8_t, candidate[1], candidate[2]);
1910
1911         intra_pred_mode = lc->pu.rem_intra_luma_pred_mode;
1912         for (i = 0; i < 3; i++)
1913             if (intra_pred_mode >= candidate[i])
1914                 intra_pred_mode++;
1915     }
1916
1917     /* write the intra prediction units into the mv array */
1918     if (!size_in_pus)
1919         size_in_pus = 1;
1920     for (i = 0; i < size_in_pus; i++) {
1921         memset(&s->tab_ipm[(y_pu + i) * min_pu_width + x_pu],
1922                intra_pred_mode, size_in_pus);
1923
1924         for (j = 0; j < size_in_pus; j++) {
1925             tab_mvf[(y_pu + j) * min_pu_width + x_pu + i].is_intra     = 1;
1926             tab_mvf[(y_pu + j) * min_pu_width + x_pu + i].pred_flag[0] = 0;
1927             tab_mvf[(y_pu + j) * min_pu_width + x_pu + i].pred_flag[1] = 0;
1928             tab_mvf[(y_pu + j) * min_pu_width + x_pu + i].ref_idx[0]   = 0;
1929             tab_mvf[(y_pu + j) * min_pu_width + x_pu + i].ref_idx[1]   = 0;
1930             tab_mvf[(y_pu + j) * min_pu_width + x_pu + i].mv[0].x      = 0;
1931             tab_mvf[(y_pu + j) * min_pu_width + x_pu + i].mv[0].y      = 0;
1932             tab_mvf[(y_pu + j) * min_pu_width + x_pu + i].mv[1].x      = 0;
1933             tab_mvf[(y_pu + j) * min_pu_width + x_pu + i].mv[1].y      = 0;
1934         }
1935     }
1936
1937     return intra_pred_mode;
1938 }
1939
1940 static av_always_inline void set_ct_depth(HEVCContext *s, int x0, int y0,
1941                                           int log2_cb_size, int ct_depth)
1942 {
1943     int length = (1 << log2_cb_size) >> s->ps.sps->log2_min_cb_size;
1944     int x_cb   = x0 >> s->ps.sps->log2_min_cb_size;
1945     int y_cb   = y0 >> s->ps.sps->log2_min_cb_size;
1946     int y;
1947
1948     for (y = 0; y < length; y++)
1949         memset(&s->tab_ct_depth[(y_cb + y) * s->ps.sps->min_cb_width + x_cb],
1950                ct_depth, length);
1951 }
1952
1953 static void intra_prediction_unit(HEVCContext *s, int x0, int y0,
1954                                   int log2_cb_size)
1955 {
1956     HEVCLocalContext *lc = &s->HEVClc;
1957     static const uint8_t intra_chroma_table[4] = { 0, 26, 10, 1 };
1958     uint8_t prev_intra_luma_pred_flag[4];
1959     int split   = lc->cu.part_mode == PART_NxN;
1960     int pb_size = (1 << log2_cb_size) >> split;
1961     int side    = split + 1;
1962     int chroma_mode;
1963     int i, j;
1964
1965     for (i = 0; i < side; i++)
1966         for (j = 0; j < side; j++)
1967             prev_intra_luma_pred_flag[2 * i + j] = ff_hevc_prev_intra_luma_pred_flag_decode(s);
1968
1969     for (i = 0; i < side; i++) {
1970         for (j = 0; j < side; j++) {
1971             if (prev_intra_luma_pred_flag[2 * i + j])
1972                 lc->pu.mpm_idx = ff_hevc_mpm_idx_decode(s);
1973             else
1974                 lc->pu.rem_intra_luma_pred_mode = ff_hevc_rem_intra_luma_pred_mode_decode(s);
1975
1976             lc->pu.intra_pred_mode[2 * i + j] =
1977                 luma_intra_pred_mode(s, x0 + pb_size * j, y0 + pb_size * i, pb_size,
1978                                      prev_intra_luma_pred_flag[2 * i + j]);
1979         }
1980     }
1981
1982     chroma_mode = ff_hevc_intra_chroma_pred_mode_decode(s);
1983     if (chroma_mode != 4) {
1984         if (lc->pu.intra_pred_mode[0] == intra_chroma_table[chroma_mode])
1985             lc->pu.intra_pred_mode_c = 34;
1986         else
1987             lc->pu.intra_pred_mode_c = intra_chroma_table[chroma_mode];
1988     } else {
1989         lc->pu.intra_pred_mode_c = lc->pu.intra_pred_mode[0];
1990     }
1991 }
1992
1993 static void intra_prediction_unit_default_value(HEVCContext *s,
1994                                                 int x0, int y0,
1995                                                 int log2_cb_size)
1996 {
1997     HEVCLocalContext *lc = &s->HEVClc;
1998     int pb_size          = 1 << log2_cb_size;
1999     int size_in_pus      = pb_size >> s->ps.sps->log2_min_pu_size;
2000     int min_pu_width     = s->ps.sps->min_pu_width;
2001     MvField *tab_mvf     = s->ref->tab_mvf;
2002     int x_pu             = x0 >> s->ps.sps->log2_min_pu_size;
2003     int y_pu             = y0 >> s->ps.sps->log2_min_pu_size;
2004     int j, k;
2005
2006     if (size_in_pus == 0)
2007         size_in_pus = 1;
2008     for (j = 0; j < size_in_pus; j++) {
2009         memset(&s->tab_ipm[(y_pu + j) * min_pu_width + x_pu], INTRA_DC, size_in_pus);
2010         for (k = 0; k < size_in_pus; k++)
2011             tab_mvf[(y_pu + j) * min_pu_width + x_pu + k].is_intra = lc->cu.pred_mode == MODE_INTRA;
2012     }
2013 }
2014
2015 static int hls_coding_unit(HEVCContext *s, int x0, int y0, int log2_cb_size)
2016 {
2017     int cb_size          = 1 << log2_cb_size;
2018     HEVCLocalContext *lc = &s->HEVClc;
2019     int log2_min_cb_size = s->ps.sps->log2_min_cb_size;
2020     int length           = cb_size >> log2_min_cb_size;
2021     int min_cb_width     = s->ps.sps->min_cb_width;
2022     int x_cb             = x0 >> log2_min_cb_size;
2023     int y_cb             = y0 >> log2_min_cb_size;
2024     int x, y, ret;
2025
2026     lc->cu.x                = x0;
2027     lc->cu.y                = y0;
2028     lc->cu.pred_mode        = MODE_INTRA;
2029     lc->cu.part_mode        = PART_2Nx2N;
2030     lc->cu.intra_split_flag = 0;
2031
2032     SAMPLE_CTB(s->skip_flag, x_cb, y_cb) = 0;
2033     for (x = 0; x < 4; x++)
2034         lc->pu.intra_pred_mode[x] = 1;
2035     if (s->ps.pps->transquant_bypass_enable_flag) {
2036         lc->cu.cu_transquant_bypass_flag = ff_hevc_cu_transquant_bypass_flag_decode(s);
2037         if (lc->cu.cu_transquant_bypass_flag)
2038             set_deblocking_bypass(s, x0, y0, log2_cb_size);
2039     } else
2040         lc->cu.cu_transquant_bypass_flag = 0;
2041
2042     if (s->sh.slice_type != I_SLICE) {
2043         uint8_t skip_flag = ff_hevc_skip_flag_decode(s, x0, y0, x_cb, y_cb);
2044
2045         x = y_cb * min_cb_width + x_cb;
2046         for (y = 0; y < length; y++) {
2047             memset(&s->skip_flag[x], skip_flag, length);
2048             x += min_cb_width;
2049         }
2050         lc->cu.pred_mode = skip_flag ? MODE_SKIP : MODE_INTER;
2051     }
2052
2053     if (SAMPLE_CTB(s->skip_flag, x_cb, y_cb)) {
2054         hls_prediction_unit(s, x0, y0, cb_size, cb_size, log2_cb_size, 0);
2055         intra_prediction_unit_default_value(s, x0, y0, log2_cb_size);
2056
2057         if (!s->sh.disable_deblocking_filter_flag)
2058             ff_hevc_deblocking_boundary_strengths(s, x0, y0, log2_cb_size);
2059     } else {
2060         int pcm_flag = 0;
2061
2062         if (s->sh.slice_type != I_SLICE)
2063             lc->cu.pred_mode = ff_hevc_pred_mode_decode(s);
2064         if (lc->cu.pred_mode != MODE_INTRA ||
2065             log2_cb_size == s->ps.sps->log2_min_cb_size) {
2066             lc->cu.part_mode        = ff_hevc_part_mode_decode(s, log2_cb_size);
2067             lc->cu.intra_split_flag = lc->cu.part_mode == PART_NxN &&
2068                                       lc->cu.pred_mode == MODE_INTRA;
2069         }
2070
2071         if (lc->cu.pred_mode == MODE_INTRA) {
2072             if (lc->cu.part_mode == PART_2Nx2N && s->ps.sps->pcm_enabled_flag &&
2073                 log2_cb_size >= s->ps.sps->pcm.log2_min_pcm_cb_size &&
2074                 log2_cb_size <= s->ps.sps->pcm.log2_max_pcm_cb_size) {
2075                 pcm_flag = ff_hevc_pcm_flag_decode(s);
2076             }
2077             if (pcm_flag) {
2078                 intra_prediction_unit_default_value(s, x0, y0, log2_cb_size);
2079                 ret = hls_pcm_sample(s, x0, y0, log2_cb_size);
2080                 if (s->ps.sps->pcm.loop_filter_disable_flag)
2081                     set_deblocking_bypass(s, x0, y0, log2_cb_size);
2082
2083                 if (ret < 0)
2084                     return ret;
2085             } else {
2086                 intra_prediction_unit(s, x0, y0, log2_cb_size);
2087             }
2088         } else {
2089             intra_prediction_unit_default_value(s, x0, y0, log2_cb_size);
2090             switch (lc->cu.part_mode) {
2091             case PART_2Nx2N:
2092                 hls_prediction_unit(s, x0, y0, cb_size, cb_size, log2_cb_size, 0);
2093                 break;
2094             case PART_2NxN:
2095                 hls_prediction_unit(s, x0, y0,               cb_size, cb_size / 2, log2_cb_size, 0);
2096                 hls_prediction_unit(s, x0, y0 + cb_size / 2, cb_size, cb_size / 2, log2_cb_size, 1);
2097                 break;
2098             case PART_Nx2N:
2099                 hls_prediction_unit(s, x0,               y0, cb_size / 2, cb_size, log2_cb_size, 0);
2100                 hls_prediction_unit(s, x0 + cb_size / 2, y0, cb_size / 2, cb_size, log2_cb_size, 1);
2101                 break;
2102             case PART_2NxnU:
2103                 hls_prediction_unit(s, x0, y0,               cb_size, cb_size     / 4, log2_cb_size, 0);
2104                 hls_prediction_unit(s, x0, y0 + cb_size / 4, cb_size, cb_size * 3 / 4, log2_cb_size, 1);
2105                 break;
2106             case PART_2NxnD:
2107                 hls_prediction_unit(s, x0, y0,                   cb_size, cb_size * 3 / 4, log2_cb_size, 0);
2108                 hls_prediction_unit(s, x0, y0 + cb_size * 3 / 4, cb_size, cb_size     / 4, log2_cb_size, 1);
2109                 break;
2110             case PART_nLx2N:
2111                 hls_prediction_unit(s, x0,               y0, cb_size     / 4, cb_size, log2_cb_size, 0);
2112                 hls_prediction_unit(s, x0 + cb_size / 4, y0, cb_size * 3 / 4, cb_size, log2_cb_size, 1);
2113                 break;
2114             case PART_nRx2N:
2115                 hls_prediction_unit(s, x0,                   y0, cb_size * 3 / 4, cb_size, log2_cb_size, 0);
2116                 hls_prediction_unit(s, x0 + cb_size * 3 / 4, y0, cb_size     / 4, cb_size, log2_cb_size, 1);
2117                 break;
2118             case PART_NxN:
2119                 hls_prediction_unit(s, x0,               y0,               cb_size / 2, cb_size / 2, log2_cb_size, 0);
2120                 hls_prediction_unit(s, x0 + cb_size / 2, y0,               cb_size / 2, cb_size / 2, log2_cb_size, 1);
2121                 hls_prediction_unit(s, x0,               y0 + cb_size / 2, cb_size / 2, cb_size / 2, log2_cb_size, 2);
2122                 hls_prediction_unit(s, x0 + cb_size / 2, y0 + cb_size / 2, cb_size / 2, cb_size / 2, log2_cb_size, 3);
2123                 break;
2124             }
2125         }
2126
2127         if (!pcm_flag) {
2128             int rqt_root_cbf = 1;
2129
2130             if (lc->cu.pred_mode != MODE_INTRA &&
2131                 !(lc->cu.part_mode == PART_2Nx2N && lc->pu.merge_flag)) {
2132                 rqt_root_cbf = ff_hevc_no_residual_syntax_flag_decode(s);
2133             }
2134             if (rqt_root_cbf) {
2135                 lc->cu.max_trafo_depth = lc->cu.pred_mode == MODE_INTRA ?
2136                                          s->ps.sps->max_transform_hierarchy_depth_intra + lc->cu.intra_split_flag :
2137                                          s->ps.sps->max_transform_hierarchy_depth_inter;
2138                 ret = hls_transform_tree(s, x0, y0, x0, y0, x0, y0,
2139                                          log2_cb_size,
2140                                          log2_cb_size, 0, 0, 0, 0);
2141                 if (ret < 0)
2142                     return ret;
2143             } else {
2144                 if (!s->sh.disable_deblocking_filter_flag)
2145                     ff_hevc_deblocking_boundary_strengths(s, x0, y0, log2_cb_size);
2146             }
2147         }
2148     }
2149
2150     if (s->ps.pps->cu_qp_delta_enabled_flag && lc->tu.is_cu_qp_delta_coded == 0)
2151         ff_hevc_set_qPy(s, x0, y0, x0, y0, log2_cb_size);
2152
2153     x = y_cb * min_cb_width + x_cb;
2154     for (y = 0; y < length; y++) {
2155         memset(&s->qp_y_tab[x], lc->qp_y, length);
2156         x += min_cb_width;
2157     }
2158
2159     set_ct_depth(s, x0, y0, log2_cb_size, lc->ct.depth);
2160
2161     return 0;
2162 }
2163
2164 static int hls_coding_quadtree(HEVCContext *s, int x0, int y0,
2165                                int log2_cb_size, int cb_depth)
2166 {
2167     HEVCLocalContext *lc = &s->HEVClc;
2168     const int cb_size    = 1 << log2_cb_size;
2169     int split_cu;
2170
2171     lc->ct.depth = cb_depth;
2172     if (x0 + cb_size <= s->ps.sps->width  &&
2173         y0 + cb_size <= s->ps.sps->height &&
2174         log2_cb_size > s->ps.sps->log2_min_cb_size) {
2175         split_cu = ff_hevc_split_coding_unit_flag_decode(s, cb_depth, x0, y0);
2176     } else {
2177         split_cu = (log2_cb_size > s->ps.sps->log2_min_cb_size);
2178     }
2179     if (s->ps.pps->cu_qp_delta_enabled_flag &&
2180         log2_cb_size >= s->ps.sps->log2_ctb_size - s->ps.pps->diff_cu_qp_delta_depth) {
2181         lc->tu.is_cu_qp_delta_coded = 0;
2182         lc->tu.cu_qp_delta          = 0;
2183     }
2184
2185     if (split_cu) {
2186         const int cb_size_split = cb_size >> 1;
2187         const int x1 = x0 + cb_size_split;
2188         const int y1 = y0 + cb_size_split;
2189
2190         log2_cb_size--;
2191         cb_depth++;
2192
2193 #define SUBDIVIDE(x, y)                                                \
2194 do {                                                                   \
2195     if (x < s->ps.sps->width && y < s->ps.sps->height) {                     \
2196         int ret = hls_coding_quadtree(s, x, y, log2_cb_size, cb_depth);\
2197         if (ret < 0)                                                   \
2198             return ret;                                                \
2199     }                                                                  \
2200 } while (0)
2201
2202         SUBDIVIDE(x0, y0);
2203         SUBDIVIDE(x1, y0);
2204         SUBDIVIDE(x0, y1);
2205         SUBDIVIDE(x1, y1);
2206     } else {
2207         int ret = hls_coding_unit(s, x0, y0, log2_cb_size);
2208         if (ret < 0)
2209             return ret;
2210     }
2211
2212     return 0;
2213 }
2214
2215 static void hls_decode_neighbour(HEVCContext *s, int x_ctb, int y_ctb,
2216                                  int ctb_addr_ts)
2217 {
2218     HEVCLocalContext *lc  = &s->HEVClc;
2219     int ctb_size          = 1 << s->ps.sps->log2_ctb_size;
2220     int ctb_addr_rs       = s->ps.pps->ctb_addr_ts_to_rs[ctb_addr_ts];
2221     int ctb_addr_in_slice = ctb_addr_rs - s->sh.slice_addr;
2222
2223     s->tab_slice_address[ctb_addr_rs] = s->sh.slice_addr;
2224
2225     if (s->ps.pps->entropy_coding_sync_enabled_flag) {
2226         if (x_ctb == 0 && (y_ctb & (ctb_size - 1)) == 0)
2227             lc->first_qp_group = 1;
2228         lc->end_of_tiles_x = s->ps.sps->width;
2229     } else if (s->ps.pps->tiles_enabled_flag) {
2230         if (ctb_addr_ts && s->ps.pps->tile_id[ctb_addr_ts] != s->ps.pps->tile_id[ctb_addr_ts - 1]) {
2231             int idxX = s->ps.pps->col_idxX[x_ctb >> s->ps.sps->log2_ctb_size];
2232             lc->start_of_tiles_x = x_ctb;
2233             lc->end_of_tiles_x   = x_ctb + (s->ps.pps->column_width[idxX] << s->ps.sps->log2_ctb_size);
2234             lc->first_qp_group   = 1;
2235         }
2236     } else {
2237         lc->end_of_tiles_x = s->ps.sps->width;
2238     }
2239
2240     lc->end_of_tiles_y = FFMIN(y_ctb + ctb_size, s->ps.sps->height);
2241
2242     lc->boundary_flags = 0;
2243     if (s->ps.pps->tiles_enabled_flag) {
2244         if (x_ctb > 0 && s->ps.pps->tile_id[ctb_addr_ts] != s->ps.pps->tile_id[s->ps.pps->ctb_addr_rs_to_ts[ctb_addr_rs - 1]])
2245             lc->boundary_flags |= BOUNDARY_LEFT_TILE;
2246         if (x_ctb > 0 && s->tab_slice_address[ctb_addr_rs] != s->tab_slice_address[ctb_addr_rs - 1])
2247             lc->boundary_flags |= BOUNDARY_LEFT_SLICE;
2248         if (y_ctb > 0 && s->ps.pps->tile_id[ctb_addr_ts] != s->ps.pps->tile_id[s->ps.pps->ctb_addr_rs_to_ts[ctb_addr_rs - s->ps.sps->ctb_width]])
2249             lc->boundary_flags |= BOUNDARY_UPPER_TILE;
2250         if (y_ctb > 0 && s->tab_slice_address[ctb_addr_rs] != s->tab_slice_address[ctb_addr_rs - s->ps.sps->ctb_width])
2251             lc->boundary_flags |= BOUNDARY_UPPER_SLICE;
2252     } else {
2253         if (!ctb_addr_in_slice)
2254             lc->boundary_flags |= BOUNDARY_LEFT_SLICE;
2255         if (ctb_addr_in_slice < s->ps.sps->ctb_width)
2256             lc->boundary_flags |= BOUNDARY_UPPER_SLICE;
2257     }
2258
2259     lc->ctb_left_flag = ((x_ctb > 0) && (ctb_addr_in_slice > 0) && !(lc->boundary_flags & BOUNDARY_LEFT_TILE));
2260     lc->ctb_up_flag   = ((y_ctb > 0) && (ctb_addr_in_slice >= s->ps.sps->ctb_width) && !(lc->boundary_flags & BOUNDARY_UPPER_TILE));
2261     lc->ctb_up_right_flag = ((y_ctb > 0)  && (ctb_addr_in_slice+1 >= s->ps.sps->ctb_width) && (s->ps.pps->tile_id[ctb_addr_ts] == s->ps.pps->tile_id[s->ps.pps->ctb_addr_rs_to_ts[ctb_addr_rs+1 - s->ps.sps->ctb_width]]));
2262     lc->ctb_up_left_flag = ((x_ctb > 0) && (y_ctb > 0)  && (ctb_addr_in_slice-1 >= s->ps.sps->ctb_width) && (s->ps.pps->tile_id[ctb_addr_ts] == s->ps.pps->tile_id[s->ps.pps->ctb_addr_rs_to_ts[ctb_addr_rs-1 - s->ps.sps->ctb_width]]));
2263 }
2264
2265 static int hls_slice_data(HEVCContext *s)
2266 {
2267     int ctb_size    = 1 << s->ps.sps->log2_ctb_size;
2268     int more_data   = 1;
2269     int x_ctb       = 0;
2270     int y_ctb       = 0;
2271     int ctb_addr_ts = s->ps.pps->ctb_addr_rs_to_ts[s->sh.slice_ctb_addr_rs];
2272     int ret;
2273
2274     while (more_data && ctb_addr_ts < s->ps.sps->ctb_size) {
2275         int ctb_addr_rs = s->ps.pps->ctb_addr_ts_to_rs[ctb_addr_ts];
2276
2277         x_ctb = (ctb_addr_rs % ((s->ps.sps->width + ctb_size - 1) >> s->ps.sps->log2_ctb_size)) << s->ps.sps->log2_ctb_size;
2278         y_ctb = (ctb_addr_rs / ((s->ps.sps->width + ctb_size - 1) >> s->ps.sps->log2_ctb_size)) << s->ps.sps->log2_ctb_size;
2279         hls_decode_neighbour(s, x_ctb, y_ctb, ctb_addr_ts);
2280
2281         ff_hevc_cabac_init(s, ctb_addr_ts);
2282
2283         hls_sao_param(s, x_ctb >> s->ps.sps->log2_ctb_size, y_ctb >> s->ps.sps->log2_ctb_size);
2284
2285         s->deblock[ctb_addr_rs].beta_offset = s->sh.beta_offset;
2286         s->deblock[ctb_addr_rs].tc_offset   = s->sh.tc_offset;
2287         s->filter_slice_edges[ctb_addr_rs]  = s->sh.slice_loop_filter_across_slices_enabled_flag;
2288
2289         ret = hls_coding_quadtree(s, x_ctb, y_ctb, s->ps.sps->log2_ctb_size, 0);
2290         if (ret < 0)
2291             return ret;
2292         more_data = !ff_hevc_end_of_slice_flag_decode(s);
2293
2294         ctb_addr_ts++;
2295         ff_hevc_save_states(s, ctb_addr_ts);
2296         ff_hevc_hls_filters(s, x_ctb, y_ctb, ctb_size);
2297     }
2298
2299     if (x_ctb + ctb_size >= s->ps.sps->width &&
2300         y_ctb + ctb_size >= s->ps.sps->height)
2301         ff_hevc_hls_filter(s, x_ctb, y_ctb);
2302
2303     return ctb_addr_ts;
2304 }
2305
2306 static void restore_tqb_pixels(HEVCContext *s)
2307 {
2308     int min_pu_size = 1 << s->ps.sps->log2_min_pu_size;
2309     int x, y, c_idx;
2310
2311     for (c_idx = 0; c_idx < 3; c_idx++) {
2312         ptrdiff_t stride = s->frame->linesize[c_idx];
2313         int hshift       = s->ps.sps->hshift[c_idx];
2314         int vshift       = s->ps.sps->vshift[c_idx];
2315         for (y = 0; y < s->ps.sps->min_pu_height; y++) {
2316             for (x = 0; x < s->ps.sps->min_pu_width; x++) {
2317                 if (s->is_pcm[y * s->ps.sps->min_pu_width + x]) {
2318                     int n;
2319                     int len      = min_pu_size >> hshift;
2320                     uint8_t *src = &s->frame->data[c_idx][((y << s->ps.sps->log2_min_pu_size) >> vshift) * stride + (((x << s->ps.sps->log2_min_pu_size) >> hshift) << s->ps.sps->pixel_shift)];
2321                     uint8_t *dst = &s->sao_frame->data[c_idx][((y << s->ps.sps->log2_min_pu_size) >> vshift) * stride + (((x << s->ps.sps->log2_min_pu_size) >> hshift) << s->ps.sps->pixel_shift)];
2322                     for (n = 0; n < (min_pu_size >> vshift); n++) {
2323                         memcpy(dst, src, len);
2324                         src += stride;
2325                         dst += stride;
2326                     }
2327                 }
2328             }
2329         }
2330     }
2331 }
2332
2333 static int set_side_data(HEVCContext *s)
2334 {
2335     AVFrame *out = s->ref->frame;
2336
2337     if (s->sei_frame_packing_present &&
2338         s->frame_packing_arrangement_type >= 3 &&
2339         s->frame_packing_arrangement_type <= 5 &&
2340         s->content_interpretation_type > 0 &&
2341         s->content_interpretation_type < 3) {
2342         AVStereo3D *stereo = av_stereo3d_create_side_data(out);
2343         if (!stereo)
2344             return AVERROR(ENOMEM);
2345
2346         switch (s->frame_packing_arrangement_type) {
2347         case 3:
2348             if (s->quincunx_subsampling)
2349                 stereo->type = AV_STEREO3D_SIDEBYSIDE_QUINCUNX;
2350             else
2351                 stereo->type = AV_STEREO3D_SIDEBYSIDE;
2352             break;
2353         case 4:
2354             stereo->type = AV_STEREO3D_TOPBOTTOM;
2355             break;
2356         case 5:
2357             stereo->type = AV_STEREO3D_FRAMESEQUENCE;
2358             break;
2359         }
2360
2361         if (s->content_interpretation_type == 2)
2362             stereo->flags = AV_STEREO3D_FLAG_INVERT;
2363     }
2364
2365     if (s->sei_display_orientation_present &&
2366         (s->sei_anticlockwise_rotation || s->sei_hflip || s->sei_vflip)) {
2367         double angle = s->sei_anticlockwise_rotation * 360 / (double) (1 << 16);
2368         AVFrameSideData *rotation = av_frame_new_side_data(out,
2369                                                            AV_FRAME_DATA_DISPLAYMATRIX,
2370                                                            sizeof(int32_t) * 9);
2371         if (!rotation)
2372             return AVERROR(ENOMEM);
2373
2374         av_display_rotation_set((int32_t *)rotation->data, angle);
2375         av_display_matrix_flip((int32_t *)rotation->data,
2376                                s->sei_hflip, s->sei_vflip);
2377     }
2378
2379     return 0;
2380 }
2381
2382 static int hevc_frame_start(HEVCContext *s)
2383 {
2384     HEVCLocalContext *lc = &s->HEVClc;
2385     int ret;
2386
2387     memset(s->horizontal_bs, 0, 2 * s->bs_width * (s->bs_height + 1));
2388     memset(s->vertical_bs,   0, 2 * s->bs_width * (s->bs_height + 1));
2389     memset(s->cbf_luma,      0, s->ps.sps->min_tb_width * s->ps.sps->min_tb_height);
2390     memset(s->is_pcm,        0, s->ps.sps->min_pu_width * s->ps.sps->min_pu_height);
2391
2392     lc->start_of_tiles_x = 0;
2393     s->is_decoded        = 0;
2394     s->first_nal_type    = s->nal_unit_type;
2395
2396     if (s->ps.pps->tiles_enabled_flag)
2397         lc->end_of_tiles_x = s->ps.pps->column_width[0] << s->ps.sps->log2_ctb_size;
2398
2399     ret = ff_hevc_set_new_ref(s, s->ps.sps->sao_enabled ? &s->sao_frame : &s->frame,
2400                               s->poc);
2401     if (ret < 0)
2402         goto fail;
2403
2404     ret = ff_hevc_frame_rps(s);
2405     if (ret < 0) {
2406         av_log(s->avctx, AV_LOG_ERROR, "Error constructing the frame RPS.\n");
2407         goto fail;
2408     }
2409
2410     s->ref->frame->key_frame = IS_IRAP(s);
2411
2412     ret = set_side_data(s);
2413     if (ret < 0)
2414         goto fail;
2415
2416     av_frame_unref(s->output_frame);
2417     ret = ff_hevc_output_frame(s, s->output_frame, 0);
2418     if (ret < 0)
2419         goto fail;
2420
2421     ff_thread_finish_setup(s->avctx);
2422
2423     return 0;
2424
2425 fail:
2426     if (s->ref)
2427         ff_hevc_unref_frame(s, s->ref, ~0);
2428     s->ref = NULL;
2429     return ret;
2430 }
2431
2432 static int decode_nal_unit(HEVCContext *s, const H2645NAL *nal)
2433 {
2434     HEVCLocalContext *lc = &s->HEVClc;
2435     GetBitContext *gb    = &lc->gb;
2436     int ctb_addr_ts, ret;
2437
2438     *gb              = nal->gb;
2439     s->nal_unit_type = nal->type;
2440     s->temporal_id   = nal->temporal_id;
2441
2442     switch (s->nal_unit_type) {
2443     case NAL_VPS:
2444         ret = ff_hevc_decode_nal_vps(gb, s->avctx, &s->ps);
2445         if (ret < 0)
2446             goto fail;
2447         break;
2448     case NAL_SPS:
2449         ret = ff_hevc_decode_nal_sps(gb, s->avctx, &s->ps,
2450                                      s->apply_defdispwin);
2451         if (ret < 0)
2452             goto fail;
2453         break;
2454     case NAL_PPS:
2455         ret = ff_hevc_decode_nal_pps(gb, s->avctx, &s->ps);
2456         if (ret < 0)
2457             goto fail;
2458         break;
2459     case NAL_SEI_PREFIX:
2460     case NAL_SEI_SUFFIX:
2461         ret = ff_hevc_decode_nal_sei(s);
2462         if (ret < 0)
2463             goto fail;
2464         break;
2465     case NAL_TRAIL_R:
2466     case NAL_TRAIL_N:
2467     case NAL_TSA_N:
2468     case NAL_TSA_R:
2469     case NAL_STSA_N:
2470     case NAL_STSA_R:
2471     case NAL_BLA_W_LP:
2472     case NAL_BLA_W_RADL:
2473     case NAL_BLA_N_LP:
2474     case NAL_IDR_W_RADL:
2475     case NAL_IDR_N_LP:
2476     case NAL_CRA_NUT:
2477     case NAL_RADL_N:
2478     case NAL_RADL_R:
2479     case NAL_RASL_N:
2480     case NAL_RASL_R:
2481         ret = hls_slice_header(s);
2482         if (ret < 0)
2483             return ret;
2484
2485         if (s->max_ra == INT_MAX) {
2486             if (s->nal_unit_type == NAL_CRA_NUT || IS_BLA(s)) {
2487                 s->max_ra = s->poc;
2488             } else {
2489                 if (IS_IDR(s))
2490                     s->max_ra = INT_MIN;
2491             }
2492         }
2493
2494         if ((s->nal_unit_type == NAL_RASL_R || s->nal_unit_type == NAL_RASL_N) &&
2495             s->poc <= s->max_ra) {
2496             s->is_decoded = 0;
2497             break;
2498         } else {
2499             if (s->nal_unit_type == NAL_RASL_R && s->poc > s->max_ra)
2500                 s->max_ra = INT_MIN;
2501         }
2502
2503         if (s->sh.first_slice_in_pic_flag) {
2504             ret = hevc_frame_start(s);
2505             if (ret < 0)
2506                 return ret;
2507         } else if (!s->ref) {
2508             av_log(s->avctx, AV_LOG_ERROR, "First slice in a frame missing.\n");
2509             goto fail;
2510         }
2511
2512         if (s->nal_unit_type != s->first_nal_type) {
2513             av_log(s->avctx, AV_LOG_ERROR,
2514                    "Non-matching NAL types of the VCL NALUs: %d %d\n",
2515                    s->first_nal_type, s->nal_unit_type);
2516             return AVERROR_INVALIDDATA;
2517         }
2518
2519         if (!s->sh.dependent_slice_segment_flag &&
2520             s->sh.slice_type != I_SLICE) {
2521             ret = ff_hevc_slice_rpl(s);
2522             if (ret < 0) {
2523                 av_log(s->avctx, AV_LOG_WARNING,
2524                        "Error constructing the reference lists for the current slice.\n");
2525                 goto fail;
2526             }
2527         }
2528
2529         if (s->sh.first_slice_in_pic_flag && s->avctx->hwaccel) {
2530             ret = s->avctx->hwaccel->start_frame(s->avctx, NULL, 0);
2531             if (ret < 0)
2532                 goto fail;
2533         }
2534
2535         if (s->avctx->hwaccel) {
2536             ret = s->avctx->hwaccel->decode_slice(s->avctx, nal->raw_data, nal->raw_size);
2537             if (ret < 0)
2538                 goto fail;
2539         } else {
2540             ctb_addr_ts = hls_slice_data(s);
2541             if (ctb_addr_ts >= (s->ps.sps->ctb_width * s->ps.sps->ctb_height)) {
2542                 s->is_decoded = 1;
2543                 if ((s->ps.pps->transquant_bypass_enable_flag ||
2544                      (s->ps.sps->pcm.loop_filter_disable_flag && s->ps.sps->pcm_enabled_flag)) &&
2545                     s->ps.sps->sao_enabled)
2546                     restore_tqb_pixels(s);
2547             }
2548
2549             if (ctb_addr_ts < 0) {
2550                 ret = ctb_addr_ts;
2551                 goto fail;
2552             }
2553         }
2554         break;
2555     case NAL_EOS_NUT:
2556     case NAL_EOB_NUT:
2557         s->seq_decode = (s->seq_decode + 1) & 0xff;
2558         s->max_ra     = INT_MAX;
2559         break;
2560     case NAL_AUD:
2561     case NAL_FD_NUT:
2562         break;
2563     default:
2564         av_log(s->avctx, AV_LOG_INFO,
2565                "Skipping NAL unit %d\n", s->nal_unit_type);
2566     }
2567
2568     return 0;
2569 fail:
2570     if (s->avctx->err_recognition & AV_EF_EXPLODE)
2571         return ret;
2572     return 0;
2573 }
2574
2575 static int decode_nal_units(HEVCContext *s, const uint8_t *buf, int length)
2576 {
2577     int i, ret = 0;
2578
2579     s->ref = NULL;
2580     s->eos = 0;
2581
2582     /* split the input packet into NAL units, so we know the upper bound on the
2583      * number of slices in the frame */
2584     ret = ff_h2645_packet_split(&s->pkt, buf, length, s->avctx, s->is_nalff,
2585                                 s->nal_length_size);
2586     if (ret < 0) {
2587         av_log(s->avctx, AV_LOG_ERROR,
2588                "Error splitting the input into NAL units.\n");
2589         return ret;
2590     }
2591
2592     for (i = 0; i < s->pkt.nb_nals; i++) {
2593         if (s->pkt.nals[i].type == NAL_EOB_NUT ||
2594             s->pkt.nals[i].type == NAL_EOS_NUT)
2595             s->eos = 1;
2596     }
2597
2598     /* decode the NAL units */
2599     for (i = 0; i < s->pkt.nb_nals; i++) {
2600         ret = decode_nal_unit(s, &s->pkt.nals[i]);
2601         if (ret < 0) {
2602             av_log(s->avctx, AV_LOG_WARNING,
2603                    "Error parsing NAL unit #%d.\n", i);
2604             goto fail;
2605         }
2606     }
2607
2608 fail:
2609     if (s->ref)
2610         ff_thread_report_progress(&s->ref->tf, INT_MAX, 0);
2611
2612     return ret;
2613 }
2614
2615 static void print_md5(void *log_ctx, int level, uint8_t md5[16])
2616 {
2617     int i;
2618     for (i = 0; i < 16; i++)
2619         av_log(log_ctx, level, "%02"PRIx8, md5[i]);
2620 }
2621
2622 static int verify_md5(HEVCContext *s, AVFrame *frame)
2623 {
2624     const AVPixFmtDescriptor *desc = av_pix_fmt_desc_get(frame->format);
2625     int pixel_shift;
2626     int i, j;
2627
2628     if (!desc)
2629         return AVERROR(EINVAL);
2630
2631     pixel_shift = desc->comp[0].depth > 8;
2632
2633     av_log(s->avctx, AV_LOG_DEBUG, "Verifying checksum for frame with POC %d: ",
2634            s->poc);
2635
2636     /* the checksums are LE, so we have to byteswap for >8bpp formats
2637      * on BE arches */
2638 #if HAVE_BIGENDIAN
2639     if (pixel_shift && !s->checksum_buf) {
2640         av_fast_malloc(&s->checksum_buf, &s->checksum_buf_size,
2641                        FFMAX3(frame->linesize[0], frame->linesize[1],
2642                               frame->linesize[2]));
2643         if (!s->checksum_buf)
2644             return AVERROR(ENOMEM);
2645     }
2646 #endif
2647
2648     for (i = 0; frame->data[i]; i++) {
2649         int width  = s->avctx->coded_width;
2650         int height = s->avctx->coded_height;
2651         int w = (i == 1 || i == 2) ? (width  >> desc->log2_chroma_w) : width;
2652         int h = (i == 1 || i == 2) ? (height >> desc->log2_chroma_h) : height;
2653         uint8_t md5[16];
2654
2655         av_md5_init(s->md5_ctx);
2656         for (j = 0; j < h; j++) {
2657             const uint8_t *src = frame->data[i] + j * frame->linesize[i];
2658 #if HAVE_BIGENDIAN
2659             if (pixel_shift) {
2660                 s->bdsp.bswap16_buf((uint16_t *) s->checksum_buf,
2661                                     (const uint16_t *) src, w);
2662                 src = s->checksum_buf;
2663             }
2664 #endif
2665             av_md5_update(s->md5_ctx, src, w << pixel_shift);
2666         }
2667         av_md5_final(s->md5_ctx, md5);
2668
2669         if (!memcmp(md5, s->md5[i], 16)) {
2670             av_log   (s->avctx, AV_LOG_DEBUG, "plane %d - correct ", i);
2671             print_md5(s->avctx, AV_LOG_DEBUG, md5);
2672             av_log   (s->avctx, AV_LOG_DEBUG, "; ");
2673         } else {
2674             av_log   (s->avctx, AV_LOG_ERROR, "mismatching checksum of plane %d - ", i);
2675             print_md5(s->avctx, AV_LOG_ERROR, md5);
2676             av_log   (s->avctx, AV_LOG_ERROR, " != ");
2677             print_md5(s->avctx, AV_LOG_ERROR, s->md5[i]);
2678             av_log   (s->avctx, AV_LOG_ERROR, "\n");
2679             return AVERROR_INVALIDDATA;
2680         }
2681     }
2682
2683     av_log(s->avctx, AV_LOG_DEBUG, "\n");
2684
2685     return 0;
2686 }
2687
2688 static int hevc_decode_frame(AVCodecContext *avctx, void *data, int *got_output,
2689                              AVPacket *avpkt)
2690 {
2691     int ret;
2692     HEVCContext *s = avctx->priv_data;
2693
2694     if (!avpkt->size) {
2695         ret = ff_hevc_output_frame(s, data, 1);
2696         if (ret < 0)
2697             return ret;
2698
2699         *got_output = ret;
2700         return 0;
2701     }
2702
2703     s->ref = NULL;
2704     ret    = decode_nal_units(s, avpkt->data, avpkt->size);
2705     if (ret < 0)
2706         return ret;
2707
2708     if (avctx->hwaccel) {
2709         if (s->ref && avctx->hwaccel->end_frame(avctx) < 0)
2710             av_log(avctx, AV_LOG_ERROR,
2711                    "hardware accelerator failed to decode picture\n");
2712     } else {
2713         /* verify the SEI checksum */
2714         if (avctx->err_recognition & AV_EF_CRCCHECK && s->is_decoded &&
2715             s->is_md5) {
2716             ret = verify_md5(s, s->ref->frame);
2717             if (ret < 0 && avctx->err_recognition & AV_EF_EXPLODE) {
2718                 ff_hevc_unref_frame(s, s->ref, ~0);
2719                 return ret;
2720             }
2721         }
2722     }
2723     s->is_md5 = 0;
2724
2725     if (s->is_decoded) {
2726         av_log(avctx, AV_LOG_DEBUG, "Decoded frame with POC %d.\n", s->poc);
2727         s->is_decoded = 0;
2728     }
2729
2730     if (s->output_frame->buf[0]) {
2731         av_frame_move_ref(data, s->output_frame);
2732         *got_output = 1;
2733     }
2734
2735     return avpkt->size;
2736 }
2737
2738 static int hevc_ref_frame(HEVCContext *s, HEVCFrame *dst, HEVCFrame *src)
2739 {
2740     int ret = ff_thread_ref_frame(&dst->tf, &src->tf);
2741     if (ret < 0)
2742         return ret;
2743
2744     dst->tab_mvf_buf = av_buffer_ref(src->tab_mvf_buf);
2745     if (!dst->tab_mvf_buf)
2746         goto fail;
2747     dst->tab_mvf = src->tab_mvf;
2748
2749     dst->rpl_tab_buf = av_buffer_ref(src->rpl_tab_buf);
2750     if (!dst->rpl_tab_buf)
2751         goto fail;
2752     dst->rpl_tab = src->rpl_tab;
2753
2754     dst->rpl_buf = av_buffer_ref(src->rpl_buf);
2755     if (!dst->rpl_buf)
2756         goto fail;
2757
2758     dst->poc        = src->poc;
2759     dst->ctb_count  = src->ctb_count;
2760     dst->window     = src->window;
2761     dst->flags      = src->flags;
2762     dst->sequence   = src->sequence;
2763
2764     if (src->hwaccel_picture_private) {
2765         dst->hwaccel_priv_buf = av_buffer_ref(src->hwaccel_priv_buf);
2766         if (!dst->hwaccel_priv_buf)
2767             goto fail;
2768         dst->hwaccel_picture_private = dst->hwaccel_priv_buf->data;
2769     }
2770
2771     return 0;
2772 fail:
2773     ff_hevc_unref_frame(s, dst, ~0);
2774     return AVERROR(ENOMEM);
2775 }
2776
2777 static av_cold int hevc_decode_free(AVCodecContext *avctx)
2778 {
2779     HEVCContext       *s = avctx->priv_data;
2780     int i;
2781
2782     pic_arrays_free(s);
2783
2784     av_freep(&s->md5_ctx);
2785
2786     av_frame_free(&s->tmp_frame);
2787     av_frame_free(&s->output_frame);
2788
2789     for (i = 0; i < FF_ARRAY_ELEMS(s->DPB); i++) {
2790         ff_hevc_unref_frame(s, &s->DPB[i], ~0);
2791         av_frame_free(&s->DPB[i].frame);
2792     }
2793
2794     for (i = 0; i < FF_ARRAY_ELEMS(s->ps.vps_list); i++)
2795         av_buffer_unref(&s->ps.vps_list[i]);
2796     for (i = 0; i < FF_ARRAY_ELEMS(s->ps.sps_list); i++)
2797         av_buffer_unref(&s->ps.sps_list[i]);
2798     for (i = 0; i < FF_ARRAY_ELEMS(s->ps.pps_list); i++)
2799         av_buffer_unref(&s->ps.pps_list[i]);
2800
2801     ff_h2645_packet_uninit(&s->pkt);
2802
2803     return 0;
2804 }
2805
2806 static av_cold int hevc_init_context(AVCodecContext *avctx)
2807 {
2808     HEVCContext *s = avctx->priv_data;
2809     int i;
2810
2811     s->avctx = avctx;
2812
2813     s->tmp_frame = av_frame_alloc();
2814     if (!s->tmp_frame)
2815         goto fail;
2816
2817     s->output_frame = av_frame_alloc();
2818     if (!s->output_frame)
2819         goto fail;
2820
2821     for (i = 0; i < FF_ARRAY_ELEMS(s->DPB); i++) {
2822         s->DPB[i].frame = av_frame_alloc();
2823         if (!s->DPB[i].frame)
2824             goto fail;
2825         s->DPB[i].tf.f = s->DPB[i].frame;
2826     }
2827
2828     s->max_ra = INT_MAX;
2829
2830     s->md5_ctx = av_md5_alloc();
2831     if (!s->md5_ctx)
2832         goto fail;
2833
2834     ff_bswapdsp_init(&s->bdsp);
2835
2836     s->context_initialized = 1;
2837
2838     return 0;
2839
2840 fail:
2841     hevc_decode_free(avctx);
2842     return AVERROR(ENOMEM);
2843 }
2844
2845 static int hevc_update_thread_context(AVCodecContext *dst,
2846                                       const AVCodecContext *src)
2847 {
2848     HEVCContext *s  = dst->priv_data;
2849     HEVCContext *s0 = src->priv_data;
2850     int i, ret;
2851
2852     if (!s->context_initialized) {
2853         ret = hevc_init_context(dst);
2854         if (ret < 0)
2855             return ret;
2856     }
2857
2858     for (i = 0; i < FF_ARRAY_ELEMS(s->DPB); i++) {
2859         ff_hevc_unref_frame(s, &s->DPB[i], ~0);
2860         if (s0->DPB[i].frame->buf[0]) {
2861             ret = hevc_ref_frame(s, &s->DPB[i], &s0->DPB[i]);
2862             if (ret < 0)
2863                 return ret;
2864         }
2865     }
2866
2867     for (i = 0; i < FF_ARRAY_ELEMS(s->ps.vps_list); i++) {
2868         av_buffer_unref(&s->ps.vps_list[i]);
2869         if (s0->ps.vps_list[i]) {
2870             s->ps.vps_list[i] = av_buffer_ref(s0->ps.vps_list[i]);
2871             if (!s->ps.vps_list[i])
2872                 return AVERROR(ENOMEM);
2873         }
2874     }
2875
2876     for (i = 0; i < FF_ARRAY_ELEMS(s->ps.sps_list); i++) {
2877         av_buffer_unref(&s->ps.sps_list[i]);
2878         if (s0->ps.sps_list[i]) {
2879             s->ps.sps_list[i] = av_buffer_ref(s0->ps.sps_list[i]);
2880             if (!s->ps.sps_list[i])
2881                 return AVERROR(ENOMEM);
2882         }
2883     }
2884
2885     for (i = 0; i < FF_ARRAY_ELEMS(s->ps.pps_list); i++) {
2886         av_buffer_unref(&s->ps.pps_list[i]);
2887         if (s0->ps.pps_list[i]) {
2888             s->ps.pps_list[i] = av_buffer_ref(s0->ps.pps_list[i]);
2889             if (!s->ps.pps_list[i])
2890                 return AVERROR(ENOMEM);
2891         }
2892     }
2893
2894     if (s->ps.sps != s0->ps.sps)
2895         ret = set_sps(s, s0->ps.sps);
2896
2897     s->seq_decode = s0->seq_decode;
2898     s->seq_output = s0->seq_output;
2899     s->pocTid0    = s0->pocTid0;
2900     s->max_ra     = s0->max_ra;
2901
2902     s->is_nalff        = s0->is_nalff;
2903     s->nal_length_size = s0->nal_length_size;
2904
2905     if (s0->eos) {
2906         s->seq_decode = (s->seq_decode + 1) & 0xff;
2907         s->max_ra = INT_MAX;
2908     }
2909
2910     return 0;
2911 }
2912
2913 static int hevc_decode_extradata(HEVCContext *s)
2914 {
2915     AVCodecContext *avctx = s->avctx;
2916     GetByteContext gb;
2917     int ret, i;
2918
2919     bytestream2_init(&gb, avctx->extradata, avctx->extradata_size);
2920
2921     if (avctx->extradata_size > 3 &&
2922         (avctx->extradata[0] || avctx->extradata[1] ||
2923          avctx->extradata[2] > 1)) {
2924         /* It seems the extradata is encoded as hvcC format.
2925          * Temporarily, we support configurationVersion==0 until 14496-15 3rd
2926          * is finalized. When finalized, configurationVersion will be 1 and we
2927          * can recognize hvcC by checking if avctx->extradata[0]==1 or not. */
2928         int i, j, num_arrays, nal_len_size;
2929
2930         s->is_nalff = 1;
2931
2932         bytestream2_skip(&gb, 21);
2933         nal_len_size = (bytestream2_get_byte(&gb) & 3) + 1;
2934         num_arrays   = bytestream2_get_byte(&gb);
2935
2936         /* nal units in the hvcC always have length coded with 2 bytes,
2937          * so put a fake nal_length_size = 2 while parsing them */
2938         s->nal_length_size = 2;
2939
2940         /* Decode nal units from hvcC. */
2941         for (i = 0; i < num_arrays; i++) {
2942             int type = bytestream2_get_byte(&gb) & 0x3f;
2943             int cnt  = bytestream2_get_be16(&gb);
2944
2945             for (j = 0; j < cnt; j++) {
2946                 // +2 for the nal size field
2947                 int nalsize = bytestream2_peek_be16(&gb) + 2;
2948                 if (bytestream2_get_bytes_left(&gb) < nalsize) {
2949                     av_log(s->avctx, AV_LOG_ERROR,
2950                            "Invalid NAL unit size in extradata.\n");
2951                     return AVERROR_INVALIDDATA;
2952                 }
2953
2954                 ret = decode_nal_units(s, gb.buffer, nalsize);
2955                 if (ret < 0) {
2956                     av_log(avctx, AV_LOG_ERROR,
2957                            "Decoding nal unit %d %d from hvcC failed\n",
2958                            type, i);
2959                     return ret;
2960                 }
2961                 bytestream2_skip(&gb, nalsize);
2962             }
2963         }
2964
2965         /* Now store right nal length size, that will be used to parse
2966          * all other nals */
2967         s->nal_length_size = nal_len_size;
2968     } else {
2969         s->is_nalff = 0;
2970         ret = decode_nal_units(s, avctx->extradata, avctx->extradata_size);
2971         if (ret < 0)
2972             return ret;
2973     }
2974
2975     /* export stream parameters from the first SPS */
2976     for (i = 0; i < FF_ARRAY_ELEMS(s->ps.sps_list); i++) {
2977         if (s->ps.sps_list[i]) {
2978             const HEVCSPS *sps = (const HEVCSPS*)s->ps.sps_list[i]->data;
2979             export_stream_params(s->avctx, &s->ps, sps);
2980             break;
2981         }
2982     }
2983
2984     return 0;
2985 }
2986
2987 static av_cold int hevc_decode_init(AVCodecContext *avctx)
2988 {
2989     HEVCContext *s = avctx->priv_data;
2990     int ret;
2991
2992     avctx->internal->allocate_progress = 1;
2993
2994     ret = hevc_init_context(avctx);
2995     if (ret < 0)
2996         return ret;
2997
2998     if (avctx->extradata_size > 0 && avctx->extradata) {
2999         ret = hevc_decode_extradata(s);
3000         if (ret < 0) {
3001             hevc_decode_free(avctx);
3002             return ret;
3003         }
3004     }
3005
3006     return 0;
3007 }
3008
3009 static av_cold int hevc_init_thread_copy(AVCodecContext *avctx)
3010 {
3011     HEVCContext *s = avctx->priv_data;
3012     int ret;
3013
3014     memset(s, 0, sizeof(*s));
3015
3016     ret = hevc_init_context(avctx);
3017     if (ret < 0)
3018         return ret;
3019
3020     return 0;
3021 }
3022
3023 static void hevc_decode_flush(AVCodecContext *avctx)
3024 {
3025     HEVCContext *s = avctx->priv_data;
3026     ff_hevc_flush_dpb(s);
3027     s->max_ra = INT_MAX;
3028 }
3029
3030 #define OFFSET(x) offsetof(HEVCContext, x)
3031 #define PAR (AV_OPT_FLAG_DECODING_PARAM | AV_OPT_FLAG_VIDEO_PARAM)
3032
3033 static const AVOption options[] = {
3034     { "apply_defdispwin", "Apply default display window from VUI", OFFSET(apply_defdispwin),
3035         AV_OPT_TYPE_INT, {.i64 = 0}, 0, 1, PAR },
3036     { NULL },
3037 };
3038
3039 static const AVClass hevc_decoder_class = {
3040     .class_name = "HEVC decoder",
3041     .item_name  = av_default_item_name,
3042     .option     = options,
3043     .version    = LIBAVUTIL_VERSION_INT,
3044 };
3045
3046 AVCodec ff_hevc_decoder = {
3047     .name                  = "hevc",
3048     .long_name             = NULL_IF_CONFIG_SMALL("HEVC (High Efficiency Video Coding)"),
3049     .type                  = AVMEDIA_TYPE_VIDEO,
3050     .id                    = AV_CODEC_ID_HEVC,
3051     .priv_data_size        = sizeof(HEVCContext),
3052     .priv_class            = &hevc_decoder_class,
3053     .init                  = hevc_decode_init,
3054     .close                 = hevc_decode_free,
3055     .decode                = hevc_decode_frame,
3056     .flush                 = hevc_decode_flush,
3057     .update_thread_context = hevc_update_thread_context,
3058     .init_thread_copy      = hevc_init_thread_copy,
3059     .capabilities          = AV_CODEC_CAP_DR1 | AV_CODEC_CAP_DELAY |
3060                              AV_CODEC_CAP_FRAME_THREADS,
3061     .profiles              = NULL_IF_CONFIG_SMALL(ff_hevc_profiles),
3062 };