git.sesse.net Git - ffmpeg/blob - libavcodec/hevc.c

   1 /*
   2  * HEVC video decoder
   3  *
   4  * Copyright (C) 2012 - 2013 Guillaume Martres
   5  * Copyright (C) 2012 - 2013 Mickael Raulet
   6  * Copyright (C) 2012 - 2013 Gildas Cocherel
   7  * Copyright (C) 2012 - 2013 Wassim Hamidouche
   8  *
   9  * This file is part of Libav.
  10  *
  11  * Libav is free software; you can redistribute it and/or
  12  * modify it under the terms of the GNU Lesser General Public
  13  * License as published by the Free Software Foundation; either
  14  * version 2.1 of the License, or (at your option) any later version.
  15  *
  16  * Libav is distributed in the hope that it will be useful,
  17  * but WITHOUT ANY WARRANTY; without even the implied warranty of
  18  * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
  19  * Lesser General Public License for more details.
  20  *
  21  * You should have received a copy of the GNU Lesser General Public
  22  * License along with Libav; if not, write to the Free Software
  23  * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
  24  */
  25
  26 #include "libavutil/attributes.h"
  27 #include "libavutil/common.h"
  28 #include "libavutil/display.h"
  29 #include "libavutil/internal.h"
  30 #include "libavutil/md5.h"
  31 #include "libavutil/opt.h"
  32 #include "libavutil/pixdesc.h"
  33 #include "libavutil/stereo3d.h"
  34
  35 #include "bswapdsp.h"
  36 #include "bytestream.h"
  37 #include "cabac_functions.h"
  38 #include "golomb.h"
  39 #include "hevc.h"
  40
  41 const uint8_t ff_hevc_qpel_extra_before[4] = { 0, 3, 3, 2 };
  42 const uint8_t ff_hevc_qpel_extra_after[4]  = { 0, 3, 4, 4 };
  43 const uint8_t ff_hevc_qpel_extra[4]        = { 0, 6, 7, 6 };
  44
  45 static const uint8_t scan_1x1[1] = { 0 };
  46
  47 static const uint8_t horiz_scan2x2_x[4] = { 0, 1, 0, 1 };
  48
  49 static const uint8_t horiz_scan2x2_y[4] = { 0, 0, 1, 1 };
  50
  51 static const uint8_t horiz_scan4x4_x[16] = {
  52     0, 1, 2, 3,
  53     0, 1, 2, 3,
  54     0, 1, 2, 3,
  55     0, 1, 2, 3,
  56 };
  57
  58 static const uint8_t horiz_scan4x4_y[16] = {
  59     0, 0, 0, 0,
  60     1, 1, 1, 1,
  61     2, 2, 2, 2,
  62     3, 3, 3, 3,
  63 };
  64
  65 static const uint8_t horiz_scan8x8_inv[8][8] = {
  66     {  0,  1,  2,  3, 16, 17, 18, 19, },
  67     {  4,  5,  6,  7, 20, 21, 22, 23, },
  68     {  8,  9, 10, 11, 24, 25, 26, 27, },
  69     { 12, 13, 14, 15, 28, 29, 30, 31, },
  70     { 32, 33, 34, 35, 48, 49, 50, 51, },
  71     { 36, 37, 38, 39, 52, 53, 54, 55, },
  72     { 40, 41, 42, 43, 56, 57, 58, 59, },
  73     { 44, 45, 46, 47, 60, 61, 62, 63, },
  74 };
  75
  76 static const uint8_t diag_scan2x2_x[4] = { 0, 0, 1, 1 };
  77
  78 static const uint8_t diag_scan2x2_y[4] = { 0, 1, 0, 1 };
  79
  80 static const uint8_t diag_scan2x2_inv[2][2] = {
  81     { 0, 2, },
  82     { 1, 3, },
  83 };
  84
  85 static const uint8_t diag_scan4x4_inv[4][4] = {
  86     { 0,  2,  5,  9, },
  87     { 1,  4,  8, 12, },
  88     { 3,  7, 11, 14, },
  89     { 6, 10, 13, 15, },
  90 };
  91
  92 static const uint8_t diag_scan8x8_inv[8][8] = {
  93     {  0,  2,  5,  9, 14, 20, 27, 35, },
  94     {  1,  4,  8, 13, 19, 26, 34, 42, },
  95     {  3,  7, 12, 18, 25, 33, 41, 48, },
  96     {  6, 11, 17, 24, 32, 40, 47, 53, },
  97     { 10, 16, 23, 31, 39, 46, 52, 57, },
  98     { 15, 22, 30, 38, 45, 51, 56, 60, },
  99     { 21, 29, 37, 44, 50, 55, 59, 62, },
 100     { 28, 36, 43, 49, 54, 58, 61, 63, },
 101 };
 102
 103 /**
 104  * NOTE: Each function hls_foo correspond to the function foo in the
 105  * specification (HLS stands for High Level Syntax).
 106  */
 107
 108 /**
 109  * Section 5.7
 110  */
 111
 112 /* free everything allocated  by pic_arrays_init() */
 113 static void pic_arrays_free(HEVCContext *s)
 114 {
 115     av_freep(&s->sao);
 116     av_freep(&s->deblock);
 117
 118     av_freep(&s->skip_flag);
 119     av_freep(&s->tab_ct_depth);
 120
 121     av_freep(&s->tab_ipm);
 122     av_freep(&s->cbf_luma);
 123     av_freep(&s->is_pcm);
 124
 125     av_freep(&s->qp_y_tab);
 126     av_freep(&s->tab_slice_address);
 127     av_freep(&s->filter_slice_edges);
 128
 129     av_freep(&s->horizontal_bs);
 130     av_freep(&s->vertical_bs);
 131
 132     av_buffer_pool_uninit(&s->tab_mvf_pool);
 133     av_buffer_pool_uninit(&s->rpl_tab_pool);
 134 }
 135
 136 /* allocate arrays that depend on frame dimensions */
 137 static int pic_arrays_init(HEVCContext *s, const HEVCSPS *sps)
 138 {
 139     int log2_min_cb_size = sps->log2_min_cb_size;
 140     int width            = sps->width;
 141     int height           = sps->height;
 142     int pic_size_in_ctb  = ((width  >> log2_min_cb_size) + 1) *
 143                            ((height >> log2_min_cb_size) + 1);
 144     int ctb_count        = sps->ctb_width * sps->ctb_height;
 145     int min_pu_size      = sps->min_pu_width * sps->min_pu_height;
 146
 147     s->bs_width  = width  >> 3;
 148     s->bs_height = height >> 3;
 149
 150     s->sao           = av_mallocz_array(ctb_count, sizeof(*s->sao));
 151     s->deblock       = av_mallocz_array(ctb_count, sizeof(*s->deblock));
 152     if (!s->sao || !s->deblock)
 153         goto fail;
 154
 155     s->skip_flag    = av_malloc(pic_size_in_ctb);
 156     s->tab_ct_depth = av_malloc(sps->min_cb_height * sps->min_cb_width);
 157     if (!s->skip_flag || !s->tab_ct_depth)
 158         goto fail;
 159
 160     s->cbf_luma = av_malloc(sps->min_tb_width * sps->min_tb_height);
 161     s->tab_ipm  = av_mallocz(min_pu_size);
 162     s->is_pcm   = av_malloc(min_pu_size);
 163     if (!s->tab_ipm || !s->cbf_luma || !s->is_pcm)
 164         goto fail;
 165
 166     s->filter_slice_edges = av_malloc(ctb_count);
 167     s->tab_slice_address  = av_malloc(pic_size_in_ctb *
 168                                       sizeof(*s->tab_slice_address));
 169     s->qp_y_tab           = av_malloc(pic_size_in_ctb *
 170                                       sizeof(*s->qp_y_tab));
 171     if (!s->qp_y_tab || !s->filter_slice_edges || !s->tab_slice_address)
 172         goto fail;
 173
 174     s->horizontal_bs = av_mallocz(2 * s->bs_width * (s->bs_height + 1));
 175     s->vertical_bs   = av_mallocz(2 * s->bs_width * (s->bs_height + 1));
 176     if (!s->horizontal_bs || !s->vertical_bs)
 177         goto fail;
 178
 179     s->tab_mvf_pool = av_buffer_pool_init(min_pu_size * sizeof(MvField),
 180                                           av_buffer_alloc);
 181     s->rpl_tab_pool = av_buffer_pool_init(ctb_count * sizeof(RefPicListTab),
 182                                           av_buffer_allocz);
 183     if (!s->tab_mvf_pool || !s->rpl_tab_pool)
 184         goto fail;
 185
 186     return 0;
 187
 188 fail:
 189     pic_arrays_free(s);
 190     return AVERROR(ENOMEM);
 191 }
 192
 193 static void pred_weight_table(HEVCContext *s, GetBitContext *gb)
 194 {
 195     int i = 0;
 196     int j = 0;
 197     uint8_t luma_weight_l0_flag[16];
 198     uint8_t chroma_weight_l0_flag[16];
 199     uint8_t luma_weight_l1_flag[16];
 200     uint8_t chroma_weight_l1_flag[16];
 201
 202     s->sh.luma_log2_weight_denom = av_clip(get_ue_golomb_long(gb), 0, 7);
 203     if (s->ps.sps->chroma_format_idc != 0) {
 204         int delta = get_se_golomb(gb);
 205         s->sh.chroma_log2_weight_denom = av_clip(s->sh.luma_log2_weight_denom + delta, 0, 7);
 206     }
 207
 208     for (i = 0; i < s->sh.nb_refs[L0]; i++) {
 209         luma_weight_l0_flag[i] = get_bits1(gb);
 210         if (!luma_weight_l0_flag[i]) {
 211             s->sh.luma_weight_l0[i] = 1 << s->sh.luma_log2_weight_denom;
 212             s->sh.luma_offset_l0[i] = 0;
 213         }
 214     }
 215     if (s->ps.sps->chroma_format_idc != 0) { // FIXME: invert "if" and "for"
 216         for (i = 0; i < s->sh.nb_refs[L0]; i++)
 217             chroma_weight_l0_flag[i] = get_bits1(gb);
 218     } else {
 219         for (i = 0; i < s->sh.nb_refs[L0]; i++)
 220             chroma_weight_l0_flag[i] = 0;
 221     }
 222     for (i = 0; i < s->sh.nb_refs[L0]; i++) {
 223         if (luma_weight_l0_flag[i]) {
 224             int delta_luma_weight_l0 = get_se_golomb(gb);
 225             s->sh.luma_weight_l0[i] = (1 << s->sh.luma_log2_weight_denom) + delta_luma_weight_l0;
 226             s->sh.luma_offset_l0[i] = get_se_golomb(gb);
 227         }
 228         if (chroma_weight_l0_flag[i]) {
 229             for (j = 0; j < 2; j++) {
 230                 int delta_chroma_weight_l0 = get_se_golomb(gb);
 231                 int delta_chroma_offset_l0 = get_se_golomb(gb);
 232                 s->sh.chroma_weight_l0[i][j] = (1 << s->sh.chroma_log2_weight_denom) + delta_chroma_weight_l0;
 233                 s->sh.chroma_offset_l0[i][j] = av_clip((delta_chroma_offset_l0 - ((128 * s->sh.chroma_weight_l0[i][j])
 234                                                                                     >> s->sh.chroma_log2_weight_denom) + 128), -128, 127);
 235             }
 236         } else {
 237             s->sh.chroma_weight_l0[i][0] = 1 << s->sh.chroma_log2_weight_denom;
 238             s->sh.chroma_offset_l0[i][0] = 0;
 239             s->sh.chroma_weight_l0[i][1] = 1 << s->sh.chroma_log2_weight_denom;
 240             s->sh.chroma_offset_l0[i][1] = 0;
 241         }
 242     }
 243     if (s->sh.slice_type == B_SLICE) {
 244         for (i = 0; i < s->sh.nb_refs[L1]; i++) {
 245             luma_weight_l1_flag[i] = get_bits1(gb);
 246             if (!luma_weight_l1_flag[i]) {
 247                 s->sh.luma_weight_l1[i] = 1 << s->sh.luma_log2_weight_denom;
 248                 s->sh.luma_offset_l1[i] = 0;
 249             }
 250         }
 251         if (s->ps.sps->chroma_format_idc != 0) {
 252             for (i = 0; i < s->sh.nb_refs[L1]; i++)
 253                 chroma_weight_l1_flag[i] = get_bits1(gb);
 254         } else {
 255             for (i = 0; i < s->sh.nb_refs[L1]; i++)
 256                 chroma_weight_l1_flag[i] = 0;
 257         }
 258         for (i = 0; i < s->sh.nb_refs[L1]; i++) {
 259             if (luma_weight_l1_flag[i]) {
 260                 int delta_luma_weight_l1 = get_se_golomb(gb);
 261                 s->sh.luma_weight_l1[i] = (1 << s->sh.luma_log2_weight_denom) + delta_luma_weight_l1;
 262                 s->sh.luma_offset_l1[i] = get_se_golomb(gb);
 263             }
 264             if (chroma_weight_l1_flag[i]) {
 265                 for (j = 0; j < 2; j++) {
 266                     int delta_chroma_weight_l1 = get_se_golomb(gb);
 267                     int delta_chroma_offset_l1 = get_se_golomb(gb);
 268                     s->sh.chroma_weight_l1[i][j] = (1 << s->sh.chroma_log2_weight_denom) + delta_chroma_weight_l1;
 269                     s->sh.chroma_offset_l1[i][j] = av_clip((delta_chroma_offset_l1 - ((128 * s->sh.chroma_weight_l1[i][j])
 270                                                                                         >> s->sh.chroma_log2_weight_denom) + 128), -128, 127);
 271                 }
 272             } else {
 273                 s->sh.chroma_weight_l1[i][0] = 1 << s->sh.chroma_log2_weight_denom;
 274                 s->sh.chroma_offset_l1[i][0] = 0;
 275                 s->sh.chroma_weight_l1[i][1] = 1 << s->sh.chroma_log2_weight_denom;
 276                 s->sh.chroma_offset_l1[i][1] = 0;
 277             }
 278         }
 279     }
 280 }
 281
 282 static int decode_lt_rps(HEVCContext *s, LongTermRPS *rps, GetBitContext *gb)
 283 {
 284     const HEVCSPS *sps = s->ps.sps;
 285     int max_poc_lsb    = 1 << sps->log2_max_poc_lsb;
 286     int prev_delta_msb = 0;
 287     unsigned int nb_sps = 0, nb_sh;
 288     int i;
 289
 290     rps->nb_refs = 0;
 291     if (!sps->long_term_ref_pics_present_flag)
 292         return 0;
 293
 294     if (sps->num_long_term_ref_pics_sps > 0)
 295         nb_sps = get_ue_golomb_long(gb);
 296     nb_sh = get_ue_golomb_long(gb);
 297
 298     if (nb_sh + nb_sps > FF_ARRAY_ELEMS(rps->poc))
 299         return AVERROR_INVALIDDATA;
 300
 301     rps->nb_refs = nb_sh + nb_sps;
 302
 303     for (i = 0; i < rps->nb_refs; i++) {
 304         uint8_t delta_poc_msb_present;
 305
 306         if (i < nb_sps) {
 307             uint8_t lt_idx_sps = 0;
 308
 309             if (sps->num_long_term_ref_pics_sps > 1)
 310                 lt_idx_sps = get_bits(gb, av_ceil_log2(sps->num_long_term_ref_pics_sps));
 311
 312             rps->poc[i]  = sps->lt_ref_pic_poc_lsb_sps[lt_idx_sps];
 313             rps->used[i] = sps->used_by_curr_pic_lt_sps_flag[lt_idx_sps];
 314         } else {
 315             rps->poc[i]  = get_bits(gb, sps->log2_max_poc_lsb);
 316             rps->used[i] = get_bits1(gb);
 317         }
 318
 319         delta_poc_msb_present = get_bits1(gb);
 320         if (delta_poc_msb_present) {
 321             int delta = get_ue_golomb_long(gb);
 322
 323             if (i && i != nb_sps)
 324                 delta += prev_delta_msb;
 325
 326             rps->poc[i] += s->poc - delta * max_poc_lsb - s->sh.pic_order_cnt_lsb;
 327             prev_delta_msb = delta;
 328         }
 329     }
 330
 331     return 0;
 332 }
 333
 334 static void export_stream_params(AVCodecContext *avctx, const HEVCParamSets *ps,
 335                                  const HEVCSPS *sps)
 336 {
 337     const HEVCVPS *vps = (const HEVCVPS*)ps->vps_list[sps->vps_id]->data;
 338     unsigned int num = 0, den = 0;
 339
 340     avctx->pix_fmt             = sps->pix_fmt;
 341     avctx->coded_width         = sps->width;
 342     avctx->coded_height        = sps->height;
 343     avctx->width               = sps->output_width;
 344     avctx->height              = sps->output_height;
 345     avctx->has_b_frames        = sps->temporal_layer[sps->max_sub_layers - 1].num_reorder_pics;
 346     avctx->profile             = sps->ptl.general_ptl.profile_idc;
 347     avctx->level               = sps->ptl.general_ptl.level_idc;
 348
 349     ff_set_sar(avctx, sps->vui.sar);
 350
 351     if (sps->vui.video_signal_type_present_flag)
 352         avctx->color_range = sps->vui.video_full_range_flag ? AVCOL_RANGE_JPEG
 353                                                             : AVCOL_RANGE_MPEG;
 354     else
 355         avctx->color_range = AVCOL_RANGE_MPEG;
 356
 357     if (sps->vui.colour_description_present_flag) {
 358         avctx->color_primaries = sps->vui.colour_primaries;
 359         avctx->color_trc       = sps->vui.transfer_characteristic;
 360         avctx->colorspace      = sps->vui.matrix_coeffs;
 361     } else {
 362         avctx->color_primaries = AVCOL_PRI_UNSPECIFIED;
 363         avctx->color_trc       = AVCOL_TRC_UNSPECIFIED;
 364         avctx->colorspace      = AVCOL_SPC_UNSPECIFIED;
 365     }
 366
 367     if (vps->vps_timing_info_present_flag) {
 368         num = vps->vps_num_units_in_tick;
 369         den = vps->vps_time_scale;
 370     } else if (sps->vui.vui_timing_info_present_flag) {
 371         num = sps->vui.vui_num_units_in_tick;
 372         den = sps->vui.vui_time_scale;
 373     }
 374
 375     if (num != 0 && den != 0)
 376         av_reduce(&avctx->framerate.den, &avctx->framerate.num,
 377                   num, den, 1 << 30);
 378 }
 379
 380 static int set_sps(HEVCContext *s, const HEVCSPS *sps)
 381 {
 382     #define HWACCEL_MAX (CONFIG_HEVC_DXVA2_HWACCEL + CONFIG_HEVC_D3D11VA_HWACCEL)
 383     enum AVPixelFormat pix_fmts[HWACCEL_MAX + 2], *fmt = pix_fmts;
 384     int ret;
 385
 386     pic_arrays_free(s);
 387     s->ps.sps = NULL;
 388     s->ps.vps = NULL;
 389
 390     if (!sps)
 391         return 0;
 392
 393     ret = pic_arrays_init(s, sps);
 394     if (ret < 0)
 395         goto fail;
 396
 397     export_stream_params(s->avctx, &s->ps, sps);
 398
 399     if (sps->pix_fmt == AV_PIX_FMT_YUV420P || sps->pix_fmt == AV_PIX_FMT_YUVJ420P) {
 400 #if CONFIG_HEVC_DXVA2_HWACCEL
 401         *fmt++ = AV_PIX_FMT_DXVA2_VLD;
 402 #endif
 403 #if CONFIG_HEVC_D3D11VA_HWACCEL
 404         *fmt++ = AV_PIX_FMT_D3D11VA_VLD;
 405 #endif
 406     }
 407
 408     *fmt++ = sps->pix_fmt;
 409     *fmt = AV_PIX_FMT_NONE;
 410
 411     ret = ff_get_format(s->avctx, pix_fmts);
 412     if (ret < 0)
 413         goto fail;
 414     s->avctx->pix_fmt = ret;
 415
 416     ff_hevc_pred_init(&s->hpc,     sps->bit_depth);
 417     ff_hevc_dsp_init (&s->hevcdsp, sps->bit_depth);
 418     ff_videodsp_init (&s->vdsp,    sps->bit_depth);
 419
 420     if (sps->sao_enabled && !s->avctx->hwaccel) {
 421         av_frame_unref(s->tmp_frame);
 422         ret = ff_get_buffer(s->avctx, s->tmp_frame, AV_GET_BUFFER_FLAG_REF);
 423         if (ret < 0)
 424             goto fail;
 425         s->frame = s->tmp_frame;
 426     }
 427
 428     s->ps.sps = sps;
 429     s->ps.vps = (HEVCVPS*) s->ps.vps_list[s->ps.sps->vps_id]->data;
 430
 431     return 0;
 432
 433 fail:
 434     pic_arrays_free(s);
 435     s->ps.sps = NULL;
 436     return ret;
 437 }
 438
 439 static int hls_slice_header(HEVCContext *s)
 440 {
 441     GetBitContext *gb = &s->HEVClc.gb;
 442     SliceHeader *sh   = &s->sh;
 443     int i, ret;
 444
 445     // Coded parameters
 446     sh->first_slice_in_pic_flag = get_bits1(gb);
 447     if ((IS_IDR(s) || IS_BLA(s)) && sh->first_slice_in_pic_flag) {
 448         s->seq_decode = (s->seq_decode + 1) & 0xff;
 449         s->max_ra     = INT_MAX;
 450         if (IS_IDR(s))
 451             ff_hevc_clear_refs(s);
 452     }
 453     if (IS_IRAP(s))
 454         sh->no_output_of_prior_pics_flag = get_bits1(gb);
 455
 456     sh->pps_id = get_ue_golomb_long(gb);
 457     if (sh->pps_id >= MAX_PPS_COUNT || !s->ps.pps_list[sh->pps_id]) {
 458         av_log(s->avctx, AV_LOG_ERROR, "PPS id out of range: %d\n", sh->pps_id);
 459         return AVERROR_INVALIDDATA;
 460     }
 461     if (!sh->first_slice_in_pic_flag &&
 462         s->ps.pps != (HEVCPPS*)s->ps.pps_list[sh->pps_id]->data) {
 463         av_log(s->avctx, AV_LOG_ERROR, "PPS changed between slices.\n");
 464         return AVERROR_INVALIDDATA;
 465     }
 466     s->ps.pps = (HEVCPPS*)s->ps.pps_list[sh->pps_id]->data;
 467
 468     if (s->ps.sps != (HEVCSPS*)s->ps.sps_list[s->ps.pps->sps_id]->data) {
 469         s->ps.sps = (HEVCSPS*)s->ps.sps_list[s->ps.pps->sps_id]->data;
 470
 471         ff_hevc_clear_refs(s);
 472         ret = set_sps(s, s->ps.sps);
 473         if (ret < 0)
 474             return ret;
 475
 476         s->seq_decode = (s->seq_decode + 1) & 0xff;
 477         s->max_ra     = INT_MAX;
 478     }
 479
 480     sh->dependent_slice_segment_flag = 0;
 481     if (!sh->first_slice_in_pic_flag) {
 482         int slice_address_length;
 483
 484         if (s->ps.pps->dependent_slice_segments_enabled_flag)
 485             sh->dependent_slice_segment_flag = get_bits1(gb);
 486
 487         slice_address_length = av_ceil_log2(s->ps.sps->ctb_width *
 488                                             s->ps.sps->ctb_height);
 489         sh->slice_segment_addr = slice_address_length ? get_bits(gb, slice_address_length) : 0;
 490         if (sh->slice_segment_addr >= s->ps.sps->ctb_width * s->ps.sps->ctb_height) {
 491             av_log(s->avctx, AV_LOG_ERROR,
 492                    "Invalid slice segment address: %u.\n",
 493                    sh->slice_segment_addr);
 494             return AVERROR_INVALIDDATA;
 495         }
 496
 497         if (!sh->dependent_slice_segment_flag) {
 498             sh->slice_addr = sh->slice_segment_addr;
 499             s->slice_idx++;
 500         }
 501     } else {
 502         sh->slice_segment_addr = sh->slice_addr = 0;
 503         s->slice_idx           = 0;
 504         s->slice_initialized   = 0;
 505     }
 506
 507     if (!sh->dependent_slice_segment_flag) {
 508         s->slice_initialized = 0;
 509
 510         for (i = 0; i < s->ps.pps->num_extra_slice_header_bits; i++)
 511             skip_bits(gb, 1);  // slice_reserved_undetermined_flag[]
 512
 513         sh->slice_type = get_ue_golomb_long(gb);
 514         if (!(sh->slice_type == I_SLICE ||
 515               sh->slice_type == P_SLICE ||
 516               sh->slice_type == B_SLICE)) {
 517             av_log(s->avctx, AV_LOG_ERROR, "Unknown slice type: %d.\n",
 518                    sh->slice_type);
 519             return AVERROR_INVALIDDATA;
 520         }
 521         if (IS_IRAP(s) && sh->slice_type != I_SLICE) {
 522             av_log(s->avctx, AV_LOG_ERROR, "Inter slices in an IRAP frame.\n");
 523             return AVERROR_INVALIDDATA;
 524         }
 525
 526         // when flag is not present, picture is inferred to be output
 527         sh->pic_output_flag = 1;
 528         if (s->ps.pps->output_flag_present_flag)
 529             sh->pic_output_flag = get_bits1(gb);
 530
 531         if (s->ps.sps->separate_colour_plane_flag)
 532             sh->colour_plane_id = get_bits(gb, 2);
 533
 534         if (!IS_IDR(s)) {
 535             int poc;
 536
 537             sh->pic_order_cnt_lsb = get_bits(gb, s->ps.sps->log2_max_poc_lsb);
 538             poc = ff_hevc_compute_poc(s, sh->pic_order_cnt_lsb);
 539             if (!sh->first_slice_in_pic_flag && poc != s->poc) {
 540                 av_log(s->avctx, AV_LOG_WARNING,
 541                        "Ignoring POC change between slices: %d -> %d\n", s->poc, poc);
 542                 if (s->avctx->err_recognition & AV_EF_EXPLODE)
 543                     return AVERROR_INVALIDDATA;
 544                 poc = s->poc;
 545             }
 546             s->poc = poc;
 547
 548             sh->short_term_ref_pic_set_sps_flag = get_bits1(gb);
 549             if (!sh->short_term_ref_pic_set_sps_flag) {
 550                 int pos = get_bits_left(gb);
 551                 ret = ff_hevc_decode_short_term_rps(gb, s->avctx, &sh->slice_rps, s->ps.sps, 1);
 552                 if (ret < 0)
 553                     return ret;
 554
 555                 sh->short_term_ref_pic_set_size = pos - get_bits_left(gb);
 556                 sh->short_term_rps = &sh->slice_rps;
 557             } else {
 558                 int numbits, rps_idx;
 559
 560                 if (!s->ps.sps->nb_st_rps) {
 561                     av_log(s->avctx, AV_LOG_ERROR, "No ref lists in the SPS.\n");
 562                     return AVERROR_INVALIDDATA;
 563                 }
 564
 565                 numbits = av_ceil_log2(s->ps.sps->nb_st_rps);
 566                 rps_idx = numbits > 0 ? get_bits(gb, numbits) : 0;
 567                 sh->short_term_rps = &s->ps.sps->st_rps[rps_idx];
 568             }
 569
 570             ret = decode_lt_rps(s, &sh->long_term_rps, gb);
 571             if (ret < 0) {
 572                 av_log(s->avctx, AV_LOG_WARNING, "Invalid long term RPS.\n");
 573                 if (s->avctx->err_recognition & AV_EF_EXPLODE)
 574                     return AVERROR_INVALIDDATA;
 575             }
 576
 577             if (s->ps.sps->sps_temporal_mvp_enabled_flag)
 578                 sh->slice_temporal_mvp_enabled_flag = get_bits1(gb);
 579             else
 580                 sh->slice_temporal_mvp_enabled_flag = 0;
 581         } else {
 582             s->sh.short_term_rps = NULL;
 583             s->poc               = 0;
 584         }
 585
 586         /* 8.3.1 */
 587         if (s->temporal_id == 0 &&
 588             s->nal_unit_type != NAL_TRAIL_N &&
 589             s->nal_unit_type != NAL_TSA_N   &&
 590             s->nal_unit_type != NAL_STSA_N  &&
 591             s->nal_unit_type != NAL_RADL_N  &&
 592             s->nal_unit_type != NAL_RADL_R  &&
 593             s->nal_unit_type != NAL_RASL_N  &&
 594             s->nal_unit_type != NAL_RASL_R)
 595             s->pocTid0 = s->poc;
 596
 597         if (s->ps.sps->sao_enabled) {
 598             sh->slice_sample_adaptive_offset_flag[0] = get_bits1(gb);
 599             sh->slice_sample_adaptive_offset_flag[1] =
 600             sh->slice_sample_adaptive_offset_flag[2] = get_bits1(gb);
 601         } else {
 602             sh->slice_sample_adaptive_offset_flag[0] = 0;
 603             sh->slice_sample_adaptive_offset_flag[1] = 0;
 604             sh->slice_sample_adaptive_offset_flag[2] = 0;
 605         }
 606
 607         sh->nb_refs[L0] = sh->nb_refs[L1] = 0;
 608         if (sh->slice_type == P_SLICE || sh->slice_type == B_SLICE) {
 609             int nb_refs;
 610
 611             sh->nb_refs[L0] = s->ps.pps->num_ref_idx_l0_default_active;
 612             if (sh->slice_type == B_SLICE)
 613                 sh->nb_refs[L1] = s->ps.pps->num_ref_idx_l1_default_active;
 614
 615             if (get_bits1(gb)) { // num_ref_idx_active_override_flag
 616                 sh->nb_refs[L0] = get_ue_golomb_long(gb) + 1;
 617                 if (sh->slice_type == B_SLICE)
 618                     sh->nb_refs[L1] = get_ue_golomb_long(gb) + 1;
 619             }
 620             if (sh->nb_refs[L0] > MAX_REFS || sh->nb_refs[L1] > MAX_REFS) {
 621                 av_log(s->avctx, AV_LOG_ERROR, "Too many refs: %d/%d.\n",
 622                        sh->nb_refs[L0], sh->nb_refs[L1]);
 623                 return AVERROR_INVALIDDATA;
 624             }
 625
 626             sh->rpl_modification_flag[0] = 0;
 627             sh->rpl_modification_flag[1] = 0;
 628             nb_refs = ff_hevc_frame_nb_refs(s);
 629             if (!nb_refs) {
 630                 av_log(s->avctx, AV_LOG_ERROR, "Zero refs for a frame with P or B slices.\n");
 631                 return AVERROR_INVALIDDATA;
 632             }
 633
 634             if (s->ps.pps->lists_modification_present_flag && nb_refs > 1) {
 635                 sh->rpl_modification_flag[0] = get_bits1(gb);
 636                 if (sh->rpl_modification_flag[0]) {
 637                     for (i = 0; i < sh->nb_refs[L0]; i++)
 638                         sh->list_entry_lx[0][i] = get_bits(gb, av_ceil_log2(nb_refs));
 639                 }
 640
 641                 if (sh->slice_type == B_SLICE) {
 642                     sh->rpl_modification_flag[1] = get_bits1(gb);
 643                     if (sh->rpl_modification_flag[1] == 1)
 644                         for (i = 0; i < sh->nb_refs[L1]; i++)
 645                             sh->list_entry_lx[1][i] = get_bits(gb, av_ceil_log2(nb_refs));
 646                 }
 647             }
 648
 649             if (sh->slice_type == B_SLICE)
 650                 sh->mvd_l1_zero_flag = get_bits1(gb);
 651
 652             if (s->ps.pps->cabac_init_present_flag)
 653                 sh->cabac_init_flag = get_bits1(gb);
 654             else
 655                 sh->cabac_init_flag = 0;
 656
 657             sh->collocated_ref_idx = 0;
 658             if (sh->slice_temporal_mvp_enabled_flag) {
 659                 sh->collocated_list = L0;
 660                 if (sh->slice_type == B_SLICE)
 661                     sh->collocated_list = !get_bits1(gb);
 662
 663                 if (sh->nb_refs[sh->collocated_list] > 1) {
 664                     sh->collocated_ref_idx = get_ue_golomb_long(gb);
 665                     if (sh->collocated_ref_idx >= sh->nb_refs[sh->collocated_list]) {
 666                         av_log(s->avctx, AV_LOG_ERROR,
 667                                "Invalid collocated_ref_idx: %d.\n",
 668                                sh->collocated_ref_idx);
 669                         return AVERROR_INVALIDDATA;
 670                     }
 671                 }
 672             }
 673
 674             if ((s->ps.pps->weighted_pred_flag   && sh->slice_type == P_SLICE) ||
 675                 (s->ps.pps->weighted_bipred_flag && sh->slice_type == B_SLICE)) {
 676                 pred_weight_table(s, gb);
 677             }
 678
 679             sh->max_num_merge_cand = 5 - get_ue_golomb_long(gb);
 680             if (sh->max_num_merge_cand < 1 || sh->max_num_merge_cand > 5) {
 681                 av_log(s->avctx, AV_LOG_ERROR,
 682                        "Invalid number of merging MVP candidates: %d.\n",
 683                        sh->max_num_merge_cand);
 684                 return AVERROR_INVALIDDATA;
 685             }
 686         }
 687
 688         sh->slice_qp_delta = get_se_golomb(gb);
 689
 690         if (s->ps.pps->pic_slice_level_chroma_qp_offsets_present_flag) {
 691             sh->slice_cb_qp_offset = get_se_golomb(gb);
 692             sh->slice_cr_qp_offset = get_se_golomb(gb);
 693         } else {
 694             sh->slice_cb_qp_offset = 0;
 695             sh->slice_cr_qp_offset = 0;
 696         }
 697
 698         if (s->ps.pps->deblocking_filter_control_present_flag) {
 699             int deblocking_filter_override_flag = 0;
 700
 701             if (s->ps.pps->deblocking_filter_override_enabled_flag)
 702                 deblocking_filter_override_flag = get_bits1(gb);
 703
 704             if (deblocking_filter_override_flag) {
 705                 sh->disable_deblocking_filter_flag = get_bits1(gb);
 706                 if (!sh->disable_deblocking_filter_flag) {
 707                     sh->beta_offset = get_se_golomb(gb) * 2;
 708                     sh->tc_offset   = get_se_golomb(gb) * 2;
 709                 }
 710             } else {
 711                 sh->disable_deblocking_filter_flag = s->ps.pps->disable_dbf;
 712                 sh->beta_offset                    = s->ps.pps->beta_offset;
 713                 sh->tc_offset                      = s->ps.pps->tc_offset;
 714             }
 715         } else {
 716             sh->disable_deblocking_filter_flag = 0;
 717             sh->beta_offset                    = 0;
 718             sh->tc_offset                      = 0;
 719         }
 720
 721         if (s->ps.pps->seq_loop_filter_across_slices_enabled_flag &&
 722             (sh->slice_sample_adaptive_offset_flag[0] ||
 723              sh->slice_sample_adaptive_offset_flag[1] ||
 724              !sh->disable_deblocking_filter_flag)) {
 725             sh->slice_loop_filter_across_slices_enabled_flag = get_bits1(gb);
 726         } else {
 727             sh->slice_loop_filter_across_slices_enabled_flag = s->ps.pps->seq_loop_filter_across_slices_enabled_flag;
 728         }
 729     } else if (!s->slice_initialized) {
 730         av_log(s->avctx, AV_LOG_ERROR, "Independent slice segment missing.\n");
 731         return AVERROR_INVALIDDATA;
 732     }
 733
 734     sh->num_entry_point_offsets = 0;
 735     if (s->ps.pps->tiles_enabled_flag || s->ps.pps->entropy_coding_sync_enabled_flag) {
 736         sh->num_entry_point_offsets = get_ue_golomb_long(gb);
 737         if (sh->num_entry_point_offsets > 0) {
 738             int offset_len = get_ue_golomb_long(gb) + 1;
 739
 740             for (i = 0; i < sh->num_entry_point_offsets; i++)
 741                 skip_bits(gb, offset_len);
 742         }
 743     }
 744
 745     if (s->ps.pps->slice_header_extension_present_flag) {
 746         unsigned int length = get_ue_golomb_long(gb);
 747         for (i = 0; i < length; i++)
 748             skip_bits(gb, 8);  // slice_header_extension_data_byte
 749     }
 750
 751     // Inferred parameters
 752     sh->slice_qp = 26 + s->ps.pps->pic_init_qp_minus26 + sh->slice_qp_delta;
 753     if (sh->slice_qp > 51 ||
 754         sh->slice_qp < -s->ps.sps->qp_bd_offset) {
 755         av_log(s->avctx, AV_LOG_ERROR,
 756                "The slice_qp %d is outside the valid range "
 757                "[%d, 51].\n",
 758                sh->slice_qp,
 759                -s->ps.sps->qp_bd_offset);
 760         return AVERROR_INVALIDDATA;
 761     }
 762
 763     sh->slice_ctb_addr_rs = sh->slice_segment_addr;
 764
 765     if (!s->sh.slice_ctb_addr_rs && s->sh.dependent_slice_segment_flag) {
 766         av_log(s->avctx, AV_LOG_ERROR, "Impossible slice segment.\n");
 767         return AVERROR_INVALIDDATA;
 768     }
 769
 770     s->HEVClc.first_qp_group = !s->sh.dependent_slice_segment_flag;
 771
 772     if (!s->ps.pps->cu_qp_delta_enabled_flag)
 773         s->HEVClc.qp_y = FFUMOD(s->sh.slice_qp + 52 + 2 * s->ps.sps->qp_bd_offset,
 774                                 52 + s->ps.sps->qp_bd_offset) - s->ps.sps->qp_bd_offset;
 775
 776     s->slice_initialized = 1;
 777
 778     return 0;
 779 }
 780
 781 #define CTB(tab, x, y) ((tab)[(y) * s->ps.sps->ctb_width + (x)])
 782
 783 #define SET_SAO(elem, value)                            \
 784 do {                                                    \
 785     if (!sao_merge_up_flag && !sao_merge_left_flag)     \
 786         sao->elem = value;                              \
 787     else if (sao_merge_left_flag)                       \
 788         sao->elem = CTB(s->sao, rx-1, ry).elem;         \
 789     else if (sao_merge_up_flag)                         \
 790         sao->elem = CTB(s->sao, rx, ry-1).elem;         \
 791     else                                                \
 792         sao->elem = 0;                                  \
 793 } while (0)
 794
 795 static void hls_sao_param(HEVCContext *s, int rx, int ry)
 796 {
 797     HEVCLocalContext *lc    = &s->HEVClc;
 798     int sao_merge_left_flag = 0;
 799     int sao_merge_up_flag   = 0;
 800     int shift               = s->ps.sps->bit_depth - FFMIN(s->ps.sps->bit_depth, 10);
 801     SAOParams *sao          = &CTB(s->sao, rx, ry);
 802     int c_idx, i;
 803
 804     if (s->sh.slice_sample_adaptive_offset_flag[0] ||
 805         s->sh.slice_sample_adaptive_offset_flag[1]) {
 806         if (rx > 0) {
 807             if (lc->ctb_left_flag)
 808                 sao_merge_left_flag = ff_hevc_sao_merge_flag_decode(s);
 809         }
 810         if (ry > 0 && !sao_merge_left_flag) {
 811             if (lc->ctb_up_flag)
 812                 sao_merge_up_flag = ff_hevc_sao_merge_flag_decode(s);
 813         }
 814     }
 815
 816     for (c_idx = 0; c_idx < 3; c_idx++) {
 817         if (!s->sh.slice_sample_adaptive_offset_flag[c_idx]) {
 818             sao->type_idx[c_idx] = SAO_NOT_APPLIED;
 819             continue;
 820         }
 821
 822         if (c_idx == 2) {
 823             sao->type_idx[2] = sao->type_idx[1];
 824             sao->eo_class[2] = sao->eo_class[1];
 825         } else {
 826             SET_SAO(type_idx[c_idx], ff_hevc_sao_type_idx_decode(s));
 827         }
 828
 829         if (sao->type_idx[c_idx] == SAO_NOT_APPLIED)
 830             continue;
 831
 832         for (i = 0; i < 4; i++)
 833             SET_SAO(offset_abs[c_idx][i], ff_hevc_sao_offset_abs_decode(s));
 834
 835         if (sao->type_idx[c_idx] == SAO_BAND) {
 836             for (i = 0; i < 4; i++) {
 837                 if (sao->offset_abs[c_idx][i]) {
 838                     SET_SAO(offset_sign[c_idx][i],
 839                             ff_hevc_sao_offset_sign_decode(s));
 840                 } else {
 841                     sao->offset_sign[c_idx][i] = 0;
 842                 }
 843             }
 844             SET_SAO(band_position[c_idx], ff_hevc_sao_band_position_decode(s));
 845         } else if (c_idx != 2) {
 846             SET_SAO(eo_class[c_idx], ff_hevc_sao_eo_class_decode(s));
 847         }
 848
 849         // Inferred parameters
 850         sao->offset_val[c_idx][0] = 0;
 851         for (i = 0; i < 4; i++) {
 852             sao->offset_val[c_idx][i + 1] = sao->offset_abs[c_idx][i] << shift;
 853             if (sao->type_idx[c_idx] == SAO_EDGE) {
 854                 if (i > 1)
 855                     sao->offset_val[c_idx][i + 1] = -sao->offset_val[c_idx][i + 1];
 856             } else if (sao->offset_sign[c_idx][i]) {
 857                 sao->offset_val[c_idx][i + 1] = -sao->offset_val[c_idx][i + 1];
 858             }
 859         }
 860     }
 861 }
 862
 863 #undef SET_SAO
 864 #undef CTB
 865
 866 static void hls_residual_coding(HEVCContext *s, int x0, int y0,
 867                                 int log2_trafo_size, enum ScanType scan_idx,
 868                                 int c_idx)
 869 {
 870 #define GET_COORD(offset, n)                                    \
 871     do {                                                        \
 872         x_c = (scan_x_cg[offset >> 4] << 2) + scan_x_off[n];    \
 873         y_c = (scan_y_cg[offset >> 4] << 2) + scan_y_off[n];    \
 874     } while (0)
 875     HEVCLocalContext *lc    = &s->HEVClc;
 876     int transform_skip_flag = 0;
 877
 878     int last_significant_coeff_x, last_significant_coeff_y;
 879     int last_scan_pos;
 880     int n_end;
 881     int num_coeff    = 0;
 882     int greater1_ctx = 1;
 883
 884     int num_last_subset;
 885     int x_cg_last_sig, y_cg_last_sig;
 886
 887     const uint8_t *scan_x_cg, *scan_y_cg, *scan_x_off, *scan_y_off;
 888
 889     ptrdiff_t stride = s->frame->linesize[c_idx];
 890     int hshift       = s->ps.sps->hshift[c_idx];
 891     int vshift       = s->ps.sps->vshift[c_idx];
 892     uint8_t *dst     = &s->frame->data[c_idx][(y0 >> vshift) * stride +
 893                                               ((x0 >> hshift) << s->ps.sps->pixel_shift)];
 894     DECLARE_ALIGNED(16, int16_t, coeffs[MAX_TB_SIZE * MAX_TB_SIZE]) = { 0 };
 895     DECLARE_ALIGNED(8, uint8_t, significant_coeff_group_flag[8][8]) = { { 0 } };
 896
 897     int trafo_size = 1 << log2_trafo_size;
 898     int i, qp, shift, add, scale, scale_m;
 899     const uint8_t level_scale[] = { 40, 45, 51, 57, 64, 72 };
 900     const uint8_t *scale_matrix;
 901     uint8_t dc_scale;
 902
 903     // Derive QP for dequant
 904     if (!lc->cu.cu_transquant_bypass_flag) {
 905         static const int qp_c[] = {
 906             29, 30, 31, 32, 33, 33, 34, 34, 35, 35, 36, 36, 37, 37
 907         };
 908
 909         static const uint8_t rem6[51 + 2 * 6 + 1] = {
 910             0, 1, 2, 3, 4, 5, 0, 1, 2, 3, 4, 5, 0, 1, 2, 3, 4, 5, 0, 1, 2,
 911             3, 4, 5, 0, 1, 2, 3, 4, 5, 0, 1, 2, 3, 4, 5, 0, 1, 2, 3, 4, 5,
 912             0, 1, 2, 3, 4, 5, 0, 1, 2, 3, 4, 5, 0, 1, 2, 3, 4, 5, 0, 1, 2, 3,
 913         };
 914
 915         static const uint8_t div6[51 + 2 * 6 + 1] = {
 916             0, 0, 0, 0, 0, 0, 1, 1, 1, 1, 1, 1, 2, 2, 2, 2, 2, 2,  3,  3,  3,
 917             3, 3, 3, 4, 4, 4, 4, 4, 4, 5, 5, 5, 5, 5, 5, 6, 6, 6,  6,  6,  6,
 918             7, 7, 7, 7, 7, 7, 8, 8, 8, 8, 8, 8, 9, 9, 9, 9, 9, 9, 10, 10, 10, 10,
 919         };
 920         int qp_y = lc->qp_y;
 921
 922         if (c_idx == 0) {
 923             qp = qp_y + s->ps.sps->qp_bd_offset;
 924         } else {
 925             int qp_i, offset;
 926
 927             if (c_idx == 1)
 928                 offset = s->ps.pps->cb_qp_offset + s->sh.slice_cb_qp_offset;
 929             else
 930                 offset = s->ps.pps->cr_qp_offset + s->sh.slice_cr_qp_offset;
 931
 932             qp_i = av_clip(qp_y + offset, -s->ps.sps->qp_bd_offset, 57);
 933             if (qp_i < 30)
 934                 qp = qp_i;
 935             else if (qp_i > 43)
 936                 qp = qp_i - 6;
 937             else
 938                 qp = qp_c[qp_i - 30];
 939
 940             qp += s->ps.sps->qp_bd_offset;
 941         }
 942
 943         shift    = s->ps.sps->bit_depth + log2_trafo_size - 5;
 944         add      = 1 << (shift - 1);
 945         scale    = level_scale[rem6[qp]] << (div6[qp]);
 946         scale_m  = 16; // default when no custom scaling lists.
 947         dc_scale = 16;
 948
 949         if (s->ps.sps->scaling_list_enable_flag) {
 950             const ScalingList *sl = s->ps.pps->scaling_list_data_present_flag ?
 951                                     &s->ps.pps->scaling_list : &s->ps.sps->scaling_list;
 952             int matrix_id = lc->cu.pred_mode != MODE_INTRA;
 953
 954             if (log2_trafo_size != 5)
 955                 matrix_id = 3 * matrix_id + c_idx;
 956
 957             scale_matrix = sl->sl[log2_trafo_size - 2][matrix_id];
 958             if (log2_trafo_size >= 4)
 959                 dc_scale = sl->sl_dc[log2_trafo_size - 4][matrix_id];
 960         }
 961     }
 962
 963     if (s->ps.pps->transform_skip_enabled_flag &&
 964         !lc->cu.cu_transquant_bypass_flag   &&
 965         log2_trafo_size == 2) {
 966         transform_skip_flag = ff_hevc_transform_skip_flag_decode(s, c_idx);
 967     }
 968
 969     last_significant_coeff_x =
 970         ff_hevc_last_significant_coeff_x_prefix_decode(s, c_idx, log2_trafo_size);
 971     last_significant_coeff_y =
 972         ff_hevc_last_significant_coeff_y_prefix_decode(s, c_idx, log2_trafo_size);
 973
 974     if (last_significant_coeff_x > 3) {
 975         int suffix = ff_hevc_last_significant_coeff_suffix_decode(s, last_significant_coeff_x);
 976         last_significant_coeff_x = (1 << ((last_significant_coeff_x >> 1) - 1)) *
 977                                    (2 + (last_significant_coeff_x & 1)) +
 978                                    suffix;
 979     }
 980
 981     if (last_significant_coeff_y > 3) {
 982         int suffix = ff_hevc_last_significant_coeff_suffix_decode(s, last_significant_coeff_y);
 983         last_significant_coeff_y = (1 << ((last_significant_coeff_y >> 1) - 1)) *
 984                                    (2 + (last_significant_coeff_y & 1)) +
 985                                    suffix;
 986     }
 987
 988     if (scan_idx == SCAN_VERT)
 989         FFSWAP(int, last_significant_coeff_x, last_significant_coeff_y);
 990
 991     x_cg_last_sig = last_significant_coeff_x >> 2;
 992     y_cg_last_sig = last_significant_coeff_y >> 2;
 993
 994     switch (scan_idx) {
 995     case SCAN_DIAG: {
 996         int last_x_c = last_significant_coeff_x & 3;
 997         int last_y_c = last_significant_coeff_y & 3;
 998
 999         scan_x_off = ff_hevc_diag_scan4x4_x;
1000         scan_y_off = ff_hevc_diag_scan4x4_y;
1001         num_coeff  = diag_scan4x4_inv[last_y_c][last_x_c];
1002         if (trafo_size == 4) {
1003             scan_x_cg = scan_1x1;
1004             scan_y_cg = scan_1x1;
1005         } else if (trafo_size == 8) {
1006             num_coeff += diag_scan2x2_inv[y_cg_last_sig][x_cg_last_sig] << 4;
1007             scan_x_cg  = diag_scan2x2_x;
1008             scan_y_cg  = diag_scan2x2_y;
1009         } else if (trafo_size == 16) {
1010             num_coeff += diag_scan4x4_inv[y_cg_last_sig][x_cg_last_sig] << 4;
1011             scan_x_cg  = ff_hevc_diag_scan4x4_x;
1012             scan_y_cg  = ff_hevc_diag_scan4x4_y;
1013         } else { // trafo_size == 32
1014             num_coeff += diag_scan8x8_inv[y_cg_last_sig][x_cg_last_sig] << 4;
1015             scan_x_cg  = ff_hevc_diag_scan8x8_x;
1016             scan_y_cg  = ff_hevc_diag_scan8x8_y;
1017         }
1018         break;
1019     }
1020     case SCAN_HORIZ:
1021         scan_x_cg  = horiz_scan2x2_x;
1022         scan_y_cg  = horiz_scan2x2_y;
1023         scan_x_off = horiz_scan4x4_x;
1024         scan_y_off = horiz_scan4x4_y;
1025         num_coeff  = horiz_scan8x8_inv[last_significant_coeff_y][last_significant_coeff_x];
1026         break;
1027     default: //SCAN_VERT
1028         scan_x_cg  = horiz_scan2x2_y;
1029         scan_y_cg  = horiz_scan2x2_x;
1030         scan_x_off = horiz_scan4x4_y;
1031         scan_y_off = horiz_scan4x4_x;
1032         num_coeff  = horiz_scan8x8_inv[last_significant_coeff_x][last_significant_coeff_y];
1033         break;
1034     }
1035     num_coeff++;
1036     num_last_subset = (num_coeff - 1) >> 4;
1037
1038     for (i = num_last_subset; i >= 0; i--) {
1039         int n, m;
1040         int x_cg, y_cg, x_c, y_c;
1041         int implicit_non_zero_coeff = 0;
1042         int64_t trans_coeff_level;
1043         int prev_sig = 0;
1044         int offset   = i << 4;
1045
1046         uint8_t significant_coeff_flag_idx[16];
1047         uint8_t nb_significant_coeff_flag = 0;
1048
1049         x_cg = scan_x_cg[i];
1050         y_cg = scan_y_cg[i];
1051
1052         if (i < num_last_subset && i > 0) {
1053             int ctx_cg = 0;
1054             if (x_cg < (1 << (log2_trafo_size - 2)) - 1)
1055                 ctx_cg += significant_coeff_group_flag[x_cg + 1][y_cg];
1056             if (y_cg < (1 << (log2_trafo_size - 2)) - 1)
1057                 ctx_cg += significant_coeff_group_flag[x_cg][y_cg + 1];
1058
1059             significant_coeff_group_flag[x_cg][y_cg] =
1060                 ff_hevc_significant_coeff_group_flag_decode(s, c_idx, ctx_cg);
1061             implicit_non_zero_coeff = 1;
1062         } else {
1063             significant_coeff_group_flag[x_cg][y_cg] =
1064                 ((x_cg == x_cg_last_sig && y_cg == y_cg_last_sig) ||
1065                  (x_cg == 0 && y_cg == 0));
1066         }
1067
1068         last_scan_pos = num_coeff - offset - 1;
1069
1070         if (i == num_last_subset) {
1071             n_end                         = last_scan_pos - 1;
1072             significant_coeff_flag_idx[0] = last_scan_pos;
1073             nb_significant_coeff_flag     = 1;
1074         } else {
1075             n_end = 15;
1076         }
1077
1078         if (x_cg < ((1 << log2_trafo_size) - 1) >> 2)
1079             prev_sig = significant_coeff_group_flag[x_cg + 1][y_cg];
1080         if (y_cg < ((1 << log2_trafo_size) - 1) >> 2)
1081             prev_sig += significant_coeff_group_flag[x_cg][y_cg + 1] << 1;
1082
1083         for (n = n_end; n >= 0; n--) {
1084             GET_COORD(offset, n);
1085
1086             if (significant_coeff_group_flag[x_cg][y_cg] &&
1087                 (n > 0 || implicit_non_zero_coeff == 0)) {
1088                 if (ff_hevc_significant_coeff_flag_decode(s, c_idx, x_c, y_c,
1089                                                           log2_trafo_size,
1090                                                           scan_idx,
1091                                                           prev_sig) == 1) {
1092                     significant_coeff_flag_idx[nb_significant_coeff_flag] = n;
1093                     nb_significant_coeff_flag++;
1094                     implicit_non_zero_coeff = 0;
1095                 }
1096             } else {
1097                 int last_cg = (x_c == (x_cg << 2) && y_c == (y_cg << 2));
1098                 if (last_cg && implicit_non_zero_coeff && significant_coeff_group_flag[x_cg][y_cg]) {
1099                     significant_coeff_flag_idx[nb_significant_coeff_flag] = n;
1100                     nb_significant_coeff_flag++;
1101                 }
1102             }
1103         }
1104
1105         n_end = nb_significant_coeff_flag;
1106
1107         if (n_end) {
1108             int first_nz_pos_in_cg = 16;
1109             int last_nz_pos_in_cg = -1;
1110             int c_rice_param = 0;
1111             int first_greater1_coeff_idx = -1;
1112             uint8_t coeff_abs_level_greater1_flag[16] = { 0 };
1113             uint16_t coeff_sign_flag;
1114             int sum_abs = 0;
1115             int sign_hidden = 0;
1116
1117             // initialize first elem of coeff_bas_level_greater1_flag
1118             int ctx_set = (i > 0 && c_idx == 0) ? 2 : 0;
1119
1120             if (!(i == num_last_subset) && greater1_ctx == 0)
1121                 ctx_set++;
1122             greater1_ctx      = 1;
1123             last_nz_pos_in_cg = significant_coeff_flag_idx[0];
1124
1125             for (m = 0; m < (n_end > 8 ? 8 : n_end); m++) {
1126                 int n_idx = significant_coeff_flag_idx[m];
1127                 int inc   = (ctx_set << 2) + greater1_ctx;
1128                 coeff_abs_level_greater1_flag[n_idx] =
1129                     ff_hevc_coeff_abs_level_greater1_flag_decode(s, c_idx, inc);
1130                 if (coeff_abs_level_greater1_flag[n_idx]) {
1131                     greater1_ctx = 0;
1132                 } else if (greater1_ctx > 0 && greater1_ctx < 3) {
1133                     greater1_ctx++;
1134                 }
1135
1136                 if (coeff_abs_level_greater1_flag[n_idx] &&
1137                     first_greater1_coeff_idx == -1)
1138                     first_greater1_coeff_idx = n_idx;
1139             }
1140             first_nz_pos_in_cg = significant_coeff_flag_idx[n_end - 1];
1141             sign_hidden        = last_nz_pos_in_cg - first_nz_pos_in_cg >= 4 &&
1142                                  !lc->cu.cu_transquant_bypass_flag;
1143
1144             if (first_greater1_coeff_idx != -1) {
1145                 coeff_abs_level_greater1_flag[first_greater1_coeff_idx] += ff_hevc_coeff_abs_level_greater2_flag_decode(s, c_idx, ctx_set);
1146             }
1147             if (!s->ps.pps->sign_data_hiding_flag || !sign_hidden) {
1148                 coeff_sign_flag = ff_hevc_coeff_sign_flag(s, nb_significant_coeff_flag) << (16 - nb_significant_coeff_flag);
1149             } else {
1150                 coeff_sign_flag = ff_hevc_coeff_sign_flag(s, nb_significant_coeff_flag - 1) << (16 - (nb_significant_coeff_flag - 1));
1151             }
1152
1153             for (m = 0; m < n_end; m++) {
1154                 n = significant_coeff_flag_idx[m];
1155                 GET_COORD(offset, n);
1156                 trans_coeff_level = 1 + coeff_abs_level_greater1_flag[n];
1157                 if (trans_coeff_level == ((m < 8) ?
1158                                           ((n == first_greater1_coeff_idx) ? 3 : 2) : 1)) {
1159                     int last_coeff_abs_level_remaining = ff_hevc_coeff_abs_level_remaining(s, trans_coeff_level, c_rice_param);
1160
1161                     trans_coeff_level += last_coeff_abs_level_remaining;
1162                     if ((trans_coeff_level) > (3 * (1 << c_rice_param)))
1163                         c_rice_param = FFMIN(c_rice_param + 1, 4);
1164                 }
1165                 if (s->ps.pps->sign_data_hiding_flag && sign_hidden) {
1166                     sum_abs += trans_coeff_level;
1167                     if (n == first_nz_pos_in_cg && ((sum_abs & 1) == 1))
1168                         trans_coeff_level = -trans_coeff_level;
1169                 }
1170                 if (coeff_sign_flag >> 15)
1171                     trans_coeff_level = -trans_coeff_level;
1172                 coeff_sign_flag <<= 1;
1173                 if (!lc->cu.cu_transquant_bypass_flag) {
1174                     if (s->ps.sps->scaling_list_enable_flag) {
1175                         if (y_c || x_c || log2_trafo_size < 4) {
1176                             int pos;
1177                             switch (log2_trafo_size) {
1178                             case 3:  pos = (y_c        << 3) +  x_c;       break;
1179                             case 4:  pos = ((y_c >> 1) << 3) + (x_c >> 1); break;
1180                             case 5:  pos = ((y_c >> 2) << 3) + (x_c >> 2); break;
1181                             default: pos = (y_c        << 2) +  x_c;
1182                             }
1183                             scale_m = scale_matrix[pos];
1184                         } else {
1185                             scale_m = dc_scale;
1186                         }
1187                     }
1188                     trans_coeff_level = (trans_coeff_level * (int64_t)scale * (int64_t)scale_m + add) >> shift;
1189                     if(trans_coeff_level < 0) {
1190                         if((~trans_coeff_level) & 0xFffffffffff8000)
1191                             trans_coeff_level = -32768;
1192                     } else {
1193                         if (trans_coeff_level & 0xffffffffffff8000)
1194                             trans_coeff_level = 32767;
1195                     }
1196                 }
1197                 coeffs[y_c * trafo_size + x_c] = trans_coeff_level;
1198             }
1199         }
1200     }
1201
1202     if (lc->cu.cu_transquant_bypass_flag) {
1203         s->hevcdsp.transquant_bypass[log2_trafo_size - 2](dst, coeffs, stride);
1204     } else {
1205         if (transform_skip_flag)
1206             s->hevcdsp.transform_skip(dst, coeffs, stride);
1207         else if (lc->cu.pred_mode == MODE_INTRA && c_idx == 0 &&
1208                  log2_trafo_size == 2)
1209             s->hevcdsp.transform_4x4_luma_add(dst, coeffs, stride);
1210         else
1211             s->hevcdsp.transform_add[log2_trafo_size - 2](dst, coeffs, stride);
1212     }
1213 }
1214
1215 static int hls_transform_unit(HEVCContext *s, int x0, int y0,
1216                               int xBase, int yBase, int cb_xBase, int cb_yBase,
1217                               int log2_cb_size, int log2_trafo_size,
1218                               int blk_idx, int cbf_luma, int cbf_cb, int cbf_cr)
1219 {
1220     HEVCLocalContext *lc = &s->HEVClc;
1221
1222     if (lc->cu.pred_mode == MODE_INTRA) {
1223         int trafo_size = 1 << log2_trafo_size;
1224         ff_hevc_set_neighbour_available(s, x0, y0, trafo_size, trafo_size);
1225
1226         s->hpc.intra_pred[log2_trafo_size - 2](s, x0, y0, 0);
1227         if (log2_trafo_size > 2) {
1228             trafo_size = trafo_size << (s->ps.sps->hshift[1] - 1);
1229             ff_hevc_set_neighbour_available(s, x0, y0, trafo_size, trafo_size);
1230             s->hpc.intra_pred[log2_trafo_size - 3](s, x0, y0, 1);
1231             s->hpc.intra_pred[log2_trafo_size - 3](s, x0, y0, 2);
1232         } else if (blk_idx == 3) {
1233             trafo_size = trafo_size << s->ps.sps->hshift[1];
1234             ff_hevc_set_neighbour_available(s, xBase, yBase,
1235                                             trafo_size, trafo_size);
1236             s->hpc.intra_pred[log2_trafo_size - 2](s, xBase, yBase, 1);
1237             s->hpc.intra_pred[log2_trafo_size - 2](s, xBase, yBase, 2);
1238         }
1239     }
1240
1241     if (cbf_luma || cbf_cb || cbf_cr) {
1242         int scan_idx   = SCAN_DIAG;
1243         int scan_idx_c = SCAN_DIAG;
1244
1245         if (s->ps.pps->cu_qp_delta_enabled_flag && !lc->tu.is_cu_qp_delta_coded) {
1246             lc->tu.cu_qp_delta = ff_hevc_cu_qp_delta_abs(s);
1247             if (lc->tu.cu_qp_delta != 0)
1248                 if (ff_hevc_cu_qp_delta_sign_flag(s) == 1)
1249                     lc->tu.cu_qp_delta = -lc->tu.cu_qp_delta;
1250             lc->tu.is_cu_qp_delta_coded = 1;
1251
1252             if (lc->tu.cu_qp_delta < -(26 + s->ps.sps->qp_bd_offset / 2) ||
1253                 lc->tu.cu_qp_delta >  (25 + s->ps.sps->qp_bd_offset / 2)) {
1254                 av_log(s->avctx, AV_LOG_ERROR,
1255                        "The cu_qp_delta %d is outside the valid range "
1256                        "[%d, %d].\n",
1257                        lc->tu.cu_qp_delta,
1258                        -(26 + s->ps.sps->qp_bd_offset / 2),
1259                         (25 + s->ps.sps->qp_bd_offset / 2));
1260                 return AVERROR_INVALIDDATA;
1261             }
1262
1263             ff_hevc_set_qPy(s, x0, y0, cb_xBase, cb_yBase, log2_cb_size);
1264         }
1265
1266         if (lc->cu.pred_mode == MODE_INTRA && log2_trafo_size < 4) {
1267             if (lc->tu.cur_intra_pred_mode >= 6 &&
1268                 lc->tu.cur_intra_pred_mode <= 14) {
1269                 scan_idx = SCAN_VERT;
1270             } else if (lc->tu.cur_intra_pred_mode >= 22 &&
1271                        lc->tu.cur_intra_pred_mode <= 30) {
1272                 scan_idx = SCAN_HORIZ;
1273             }
1274
1275             if (lc->pu.intra_pred_mode_c >=  6 &&
1276                 lc->pu.intra_pred_mode_c <= 14) {
1277                 scan_idx_c = SCAN_VERT;
1278             } else if (lc->pu.intra_pred_mode_c >= 22 &&
1279                        lc->pu.intra_pred_mode_c <= 30) {
1280                 scan_idx_c = SCAN_HORIZ;
1281             }
1282         }
1283
1284         if (cbf_luma)
1285             hls_residual_coding(s, x0, y0, log2_trafo_size, scan_idx, 0);
1286         if (log2_trafo_size > 2) {
1287             if (cbf_cb)
1288                 hls_residual_coding(s, x0, y0, log2_trafo_size - 1, scan_idx_c, 1);
1289             if (cbf_cr)
1290                 hls_residual_coding(s, x0, y0, log2_trafo_size - 1, scan_idx_c, 2);
1291         } else if (blk_idx == 3) {
1292             if (cbf_cb)
1293                 hls_residual_coding(s, xBase, yBase, log2_trafo_size, scan_idx_c, 1);
1294             if (cbf_cr)
1295                 hls_residual_coding(s, xBase, yBase, log2_trafo_size, scan_idx_c, 2);
1296         }
1297     }
1298     return 0;
1299 }
1300
1301 static void set_deblocking_bypass(HEVCContext *s, int x0, int y0, int log2_cb_size)
1302 {
1303     int cb_size          = 1 << log2_cb_size;
1304     int log2_min_pu_size = s->ps.sps->log2_min_pu_size;
1305
1306     int min_pu_width     = s->ps.sps->min_pu_width;
1307     int x_end = FFMIN(x0 + cb_size, s->ps.sps->width);
1308     int y_end = FFMIN(y0 + cb_size, s->ps.sps->height);
1309     int i, j;
1310
1311     for (j = (y0 >> log2_min_pu_size); j < (y_end >> log2_min_pu_size); j++)
1312         for (i = (x0 >> log2_min_pu_size); i < (x_end >> log2_min_pu_size); i++)
1313             s->is_pcm[i + j * min_pu_width] = 2;
1314 }
1315
1316 static int hls_transform_tree(HEVCContext *s, int x0, int y0,
1317                               int xBase, int yBase, int cb_xBase, int cb_yBase,
1318                               int log2_cb_size, int log2_trafo_size,
1319                               int trafo_depth, int blk_idx,
1320                               int cbf_cb, int cbf_cr)
1321 {
1322     HEVCLocalContext *lc = &s->HEVClc;
1323     uint8_t split_transform_flag;
1324     int ret;
1325
1326     if (lc->cu.intra_split_flag) {
1327         if (trafo_depth == 1)
1328             lc->tu.cur_intra_pred_mode = lc->pu.intra_pred_mode[blk_idx];
1329     } else {
1330         lc->tu.cur_intra_pred_mode = lc->pu.intra_pred_mode[0];
1331     }
1332
1333     if (log2_trafo_size <= s->ps.sps->log2_max_trafo_size &&
1334         log2_trafo_size >  s->ps.sps->log2_min_tb_size    &&
1335         trafo_depth     < lc->cu.max_trafo_depth       &&
1336         !(lc->cu.intra_split_flag && trafo_depth == 0)) {
1337         split_transform_flag = ff_hevc_split_transform_flag_decode(s, log2_trafo_size);
1338     } else {
1339         int inter_split = s->ps.sps->max_transform_hierarchy_depth_inter == 0 &&
1340                           lc->cu.pred_mode == MODE_INTER &&
1341                           lc->cu.part_mode != PART_2Nx2N &&
1342                           trafo_depth == 0;
1343
1344         split_transform_flag = log2_trafo_size > s->ps.sps->log2_max_trafo_size ||
1345                                (lc->cu.intra_split_flag && trafo_depth == 0) ||
1346                                inter_split;
1347     }
1348
1349     if (log2_trafo_size > 2 && (trafo_depth == 0 || cbf_cb))
1350         cbf_cb = ff_hevc_cbf_cb_cr_decode(s, trafo_depth);
1351     else if (log2_trafo_size > 2 || trafo_depth == 0)
1352         cbf_cb = 0;
1353     if (log2_trafo_size > 2 && (trafo_depth == 0 || cbf_cr))
1354         cbf_cr = ff_hevc_cbf_cb_cr_decode(s, trafo_depth);
1355     else if (log2_trafo_size > 2 || trafo_depth == 0)
1356         cbf_cr = 0;
1357
1358     if (split_transform_flag) {
1359         const int trafo_size_split = 1 << (log2_trafo_size - 1);
1360         const int x1 = x0 + trafo_size_split;
1361         const int y1 = y0 + trafo_size_split;
1362
1363 #define SUBDIVIDE(x, y, idx)                                                    \
1364 do {                                                                            \
1365     ret = hls_transform_tree(s, x, y, x0, y0, cb_xBase, cb_yBase, log2_cb_size, \
1366                              log2_trafo_size - 1, trafo_depth + 1, idx,         \
1367                              cbf_cb, cbf_cr);                                   \
1368     if (ret < 0)                                                                \
1369         return ret;                                                             \
1370 } while (0)
1371
1372         SUBDIVIDE(x0, y0, 0);
1373         SUBDIVIDE(x1, y0, 1);
1374         SUBDIVIDE(x0, y1, 2);
1375         SUBDIVIDE(x1, y1, 3);
1376
1377 #undef SUBDIVIDE
1378     } else {
1379         int min_tu_size      = 1 << s->ps.sps->log2_min_tb_size;
1380         int log2_min_tu_size = s->ps.sps->log2_min_tb_size;
1381         int min_tu_width     = s->ps.sps->min_tb_width;
1382         int cbf_luma         = 1;
1383
1384         if (lc->cu.pred_mode == MODE_INTRA || trafo_depth != 0 ||
1385             cbf_cb || cbf_cr)
1386             cbf_luma = ff_hevc_cbf_luma_decode(s, trafo_depth);
1387
1388         ret = hls_transform_unit(s, x0, y0, xBase, yBase, cb_xBase, cb_yBase,
1389                                  log2_cb_size, log2_trafo_size,
1390                                  blk_idx, cbf_luma, cbf_cb, cbf_cr);
1391         if (ret < 0)
1392             return ret;
1393         // TODO: store cbf_luma somewhere else
1394         if (cbf_luma) {
1395             int i, j;
1396             for (i = 0; i < (1 << log2_trafo_size); i += min_tu_size)
1397                 for (j = 0; j < (1 << log2_trafo_size); j += min_tu_size) {
1398                     int x_tu = (x0 + j) >> log2_min_tu_size;
1399                     int y_tu = (y0 + i) >> log2_min_tu_size;
1400                     s->cbf_luma[y_tu * min_tu_width + x_tu] = 1;
1401                 }
1402         }
1403         if (!s->sh.disable_deblocking_filter_flag) {
1404             ff_hevc_deblocking_boundary_strengths(s, x0, y0, log2_trafo_size);
1405             if (s->ps.pps->transquant_bypass_enable_flag &&
1406                 lc->cu.cu_transquant_bypass_flag)
1407                 set_deblocking_bypass(s, x0, y0, log2_trafo_size);
1408         }
1409     }
1410     return 0;
1411 }
1412
1413 static int hls_pcm_sample(HEVCContext *s, int x0, int y0, int log2_cb_size)
1414 {
1415     //TODO: non-4:2:0 support
1416     HEVCLocalContext *lc = &s->HEVClc;
1417     GetBitContext gb;
1418     int cb_size   = 1 << log2_cb_size;
1419     int stride0   = s->frame->linesize[0];
1420     uint8_t *dst0 = &s->frame->data[0][y0 * stride0 + (x0 << s->ps.sps->pixel_shift)];
1421     int   stride1 = s->frame->linesize[1];
1422     uint8_t *dst1 = &s->frame->data[1][(y0 >> s->ps.sps->vshift[1]) * stride1 + ((x0 >> s->ps.sps->hshift[1]) << s->ps.sps->pixel_shift)];
1423     int   stride2 = s->frame->linesize[2];
1424     uint8_t *dst2 = &s->frame->data[2][(y0 >> s->ps.sps->vshift[2]) * stride2 + ((x0 >> s->ps.sps->hshift[2]) << s->ps.sps->pixel_shift)];
1425
1426     int length         = cb_size * cb_size * s->ps.sps->pcm.bit_depth + ((cb_size * cb_size) >> 1) * s->ps.sps->pcm.bit_depth_chroma;
1427     const uint8_t *pcm = skip_bytes(&lc->cc, (length + 7) >> 3);
1428     int ret;
1429
1430     if (!s->sh.disable_deblocking_filter_flag)
1431         ff_hevc_deblocking_boundary_strengths(s, x0, y0, log2_cb_size);
1432
1433     ret = init_get_bits(&gb, pcm, length);
1434     if (ret < 0)
1435         return ret;
1436
1437     s->hevcdsp.put_pcm(dst0, stride0, cb_size,     &gb, s->ps.sps->pcm.bit_depth);
1438     s->hevcdsp.put_pcm(dst1, stride1, cb_size / 2, &gb, s->ps.sps->pcm.bit_depth_chroma);
1439     s->hevcdsp.put_pcm(dst2, stride2, cb_size / 2, &gb, s->ps.sps->pcm.bit_depth_chroma);
1440     return 0;
1441 }
1442
1443 static void hls_mvd_coding(HEVCContext *s, int x0, int y0, int log2_cb_size)
1444 {
1445     HEVCLocalContext *lc = &s->HEVClc;
1446     int x = ff_hevc_abs_mvd_greater0_flag_decode(s);
1447     int y = ff_hevc_abs_mvd_greater0_flag_decode(s);
1448
1449     if (x)
1450         x += ff_hevc_abs_mvd_greater1_flag_decode(s);
1451     if (y)
1452         y += ff_hevc_abs_mvd_greater1_flag_decode(s);
1453
1454     switch (x) {
1455     case 2: lc->pu.mvd.x = ff_hevc_mvd_decode(s);           break;
1456     case 1: lc->pu.mvd.x = ff_hevc_mvd_sign_flag_decode(s); break;
1457     case 0: lc->pu.mvd.x = 0;                               break;
1458     }
1459
1460     switch (y) {
1461     case 2: lc->pu.mvd.y = ff_hevc_mvd_decode(s);           break;
1462     case 1: lc->pu.mvd.y = ff_hevc_mvd_sign_flag_decode(s); break;
1463     case 0: lc->pu.mvd.y = 0;                               break;
1464     }
1465 }
1466
1467 /**
1468  * 8.5.3.2.2.1 Luma sample interpolation process
1469  *
1470  * @param s HEVC decoding context
1471  * @param dst target buffer for block data at block position
1472  * @param dststride stride of the dst buffer
1473  * @param ref reference picture buffer at origin (0, 0)
1474  * @param mv motion vector (relative to block position) to get pixel data from
1475  * @param x_off horizontal position of block from origin (0, 0)
1476  * @param y_off vertical position of block from origin (0, 0)
1477  * @param block_w width of block
1478  * @param block_h height of block
1479  */
1480 static void luma_mc(HEVCContext *s, int16_t *dst, ptrdiff_t dststride,
1481                     AVFrame *ref, const Mv *mv, int x_off, int y_off,
1482                     int block_w, int block_h)
1483 {
1484     HEVCLocalContext *lc = &s->HEVClc;
1485     uint8_t *src         = ref->data[0];
1486     ptrdiff_t srcstride  = ref->linesize[0];
1487     int pic_width        = s->ps.sps->width;
1488     int pic_height       = s->ps.sps->height;
1489
1490     int mx         = mv->x & 3;
1491     int my         = mv->y & 3;
1492     int extra_left = ff_hevc_qpel_extra_before[mx];
1493     int extra_top  = ff_hevc_qpel_extra_before[my];
1494
1495     x_off += mv->x >> 2;
1496     y_off += mv->y >> 2;
1497     src   += y_off * srcstride + (x_off * (1 << s->ps.sps->pixel_shift));
1498
1499     if (x_off < extra_left || y_off < extra_top ||
1500         x_off >= pic_width - block_w - ff_hevc_qpel_extra_after[mx] ||
1501         y_off >= pic_height - block_h - ff_hevc_qpel_extra_after[my]) {
1502         const int edge_emu_stride = EDGE_EMU_BUFFER_STRIDE << s->ps.sps->pixel_shift;
1503         int offset = extra_top * srcstride + (extra_left << s->ps.sps->pixel_shift);
1504         int buf_offset = extra_top *
1505                          edge_emu_stride + (extra_left << s->ps.sps->pixel_shift);
1506
1507         s->vdsp.emulated_edge_mc(lc->edge_emu_buffer, src - offset,
1508                                  edge_emu_stride, srcstride,
1509                                  block_w + ff_hevc_qpel_extra[mx],
1510                                  block_h + ff_hevc_qpel_extra[my],
1511                                  x_off - extra_left, y_off - extra_top,
1512                                  pic_width, pic_height);
1513         src = lc->edge_emu_buffer + buf_offset;
1514         srcstride = edge_emu_stride;
1515     }
1516     s->hevcdsp.put_hevc_qpel[my][mx](dst, dststride, src, srcstride, block_w,
1517                                      block_h, lc->mc_buffer);
1518 }
1519
1520 /**
1521  * 8.5.3.2.2.2 Chroma sample interpolation process
1522  *
1523  * @param s HEVC decoding context
1524  * @param dst1 target buffer for block data at block position (U plane)
1525  * @param dst2 target buffer for block data at block position (V plane)
1526  * @param dststride stride of the dst1 and dst2 buffers
1527  * @param ref reference picture buffer at origin (0, 0)
1528  * @param mv motion vector (relative to block position) to get pixel data from
1529  * @param x_off horizontal position of block from origin (0, 0)
1530  * @param y_off vertical position of block from origin (0, 0)
1531  * @param block_w width of block
1532  * @param block_h height of block
1533  */
1534 static void chroma_mc(HEVCContext *s, int16_t *dst1, int16_t *dst2,
1535                       ptrdiff_t dststride, AVFrame *ref, const Mv *mv,
1536                       int x_off, int y_off, int block_w, int block_h)
1537 {
1538     HEVCLocalContext *lc = &s->HEVClc;
1539     uint8_t *src1        = ref->data[1];
1540     uint8_t *src2        = ref->data[2];
1541     ptrdiff_t src1stride = ref->linesize[1];
1542     ptrdiff_t src2stride = ref->linesize[2];
1543     int pic_width        = s->ps.sps->width >> 1;
1544     int pic_height       = s->ps.sps->height >> 1;
1545
1546     int mx = mv->x & 7;
1547     int my = mv->y & 7;
1548
1549     x_off += mv->x >> 3;
1550     y_off += mv->y >> 3;
1551     src1  += y_off * src1stride + (x_off * (1 << s->ps.sps->pixel_shift));
1552     src2  += y_off * src2stride + (x_off * (1 << s->ps.sps->pixel_shift));
1553
1554     if (x_off < EPEL_EXTRA_BEFORE || y_off < EPEL_EXTRA_AFTER ||
1555         x_off >= pic_width - block_w - EPEL_EXTRA_AFTER ||
1556         y_off >= pic_height - block_h - EPEL_EXTRA_AFTER) {
1557         const int edge_emu_stride = EDGE_EMU_BUFFER_STRIDE << s->ps.sps->pixel_shift;
1558         int offset1 = EPEL_EXTRA_BEFORE * (src1stride + (1 << s->ps.sps->pixel_shift));
1559         int buf_offset1 = EPEL_EXTRA_BEFORE *
1560                           (edge_emu_stride + (1 << s->ps.sps->pixel_shift));
1561         int offset2 = EPEL_EXTRA_BEFORE * (src2stride + (1 << s->ps.sps->pixel_shift));
1562         int buf_offset2 = EPEL_EXTRA_BEFORE *
1563                           (edge_emu_stride + (1 << s->ps.sps->pixel_shift));
1564
1565         s->vdsp.emulated_edge_mc(lc->edge_emu_buffer, src1 - offset1,
1566                                  edge_emu_stride, src1stride,
1567                                  block_w + EPEL_EXTRA, block_h + EPEL_EXTRA,
1568                                  x_off - EPEL_EXTRA_BEFORE,
1569                                  y_off - EPEL_EXTRA_BEFORE,
1570                                  pic_width, pic_height);
1571
1572         src1 = lc->edge_emu_buffer + buf_offset1;
1573         src1stride = edge_emu_stride;
1574         s->hevcdsp.put_hevc_epel[!!my][!!mx](dst1, dststride, src1, src1stride,
1575                                              block_w, block_h, mx, my, lc->mc_buffer);
1576
1577         s->vdsp.emulated_edge_mc(lc->edge_emu_buffer, src2 - offset2,
1578                                  edge_emu_stride, src2stride,
1579                                  block_w + EPEL_EXTRA, block_h + EPEL_EXTRA,
1580                                  x_off - EPEL_EXTRA_BEFORE,
1581                                  y_off - EPEL_EXTRA_BEFORE,
1582                                  pic_width, pic_height);
1583         src2 = lc->edge_emu_buffer + buf_offset2;
1584         src2stride = edge_emu_stride;
1585
1586         s->hevcdsp.put_hevc_epel[!!my][!!mx](dst2, dststride, src2, src2stride,
1587                                              block_w, block_h, mx, my,
1588                                              lc->mc_buffer);
1589     } else {
1590         s->hevcdsp.put_hevc_epel[!!my][!!mx](dst1, dststride, src1, src1stride,
1591                                              block_w, block_h, mx, my,
1592                                              lc->mc_buffer);
1593         s->hevcdsp.put_hevc_epel[!!my][!!mx](dst2, dststride, src2, src2stride,
1594                                              block_w, block_h, mx, my,
1595                                              lc->mc_buffer);
1596     }
1597 }
1598
1599 static void hevc_await_progress(HEVCContext *s, HEVCFrame *ref,
1600                                 const Mv *mv, int y0, int height)
1601 {
1602     int y = (mv->y >> 2) + y0 + height + 9;
1603     ff_thread_await_progress(&ref->tf, y, 0);
1604 }
1605
1606 static void hevc_luma_mv_mpv_mode(HEVCContext *s, int x0, int y0, int nPbW,
1607                                   int nPbH, int log2_cb_size, int part_idx,
1608                                   int merge_idx, MvField *mv)
1609 {
1610     HEVCLocalContext *lc             = &s->HEVClc;
1611     enum InterPredIdc inter_pred_idc = PRED_L0;
1612     int mvp_flag;
1613
1614     ff_hevc_set_neighbour_available(s, x0, y0, nPbW, nPbH);
1615     if (s->sh.slice_type == B_SLICE)
1616         inter_pred_idc = ff_hevc_inter_pred_idc_decode(s, nPbW, nPbH);
1617
1618     if (inter_pred_idc != PRED_L1) {
1619         if (s->sh.nb_refs[L0])
1620             mv->ref_idx[0]= ff_hevc_ref_idx_lx_decode(s, s->sh.nb_refs[L0]);
1621
1622         mv->pred_flag[0] = 1;
1623         hls_mvd_coding(s, x0, y0, 0);
1624         mvp_flag = ff_hevc_mvp_lx_flag_decode(s);
1625         ff_hevc_luma_mv_mvp_mode(s, x0, y0, nPbW, nPbH, log2_cb_size,
1626                                  part_idx, merge_idx, mv, mvp_flag, 0);
1627         mv->mv[0].x += lc->pu.mvd.x;
1628         mv->mv[0].y += lc->pu.mvd.y;
1629     }
1630
1631     if (inter_pred_idc != PRED_L0) {
1632         if (s->sh.nb_refs[L1])
1633             mv->ref_idx[1]= ff_hevc_ref_idx_lx_decode(s, s->sh.nb_refs[L1]);
1634
1635         if (s->sh.mvd_l1_zero_flag == 1 && inter_pred_idc == PRED_BI) {
1636             AV_ZERO32(&lc->pu.mvd);
1637         } else {
1638             hls_mvd_coding(s, x0, y0, 1);
1639         }
1640
1641         mv->pred_flag[1] = 1;
1642         mvp_flag = ff_hevc_mvp_lx_flag_decode(s);
1643         ff_hevc_luma_mv_mvp_mode(s, x0, y0, nPbW, nPbH, log2_cb_size,
1644                                  part_idx, merge_idx, mv, mvp_flag, 1);
1645         mv->mv[1].x += lc->pu.mvd.x;
1646         mv->mv[1].y += lc->pu.mvd.y;
1647     }
1648 }
1649
1650 static void hls_prediction_unit(HEVCContext *s, int x0, int y0,
1651                                 int nPbW, int nPbH,
1652                                 int log2_cb_size, int partIdx)
1653 {
1654 #define POS(c_idx, x, y)                                                              \
1655     &s->frame->data[c_idx][((y) >> s->ps.sps->vshift[c_idx]) * s->frame->linesize[c_idx] + \
1656                            (((x) >> s->ps.sps->hshift[c_idx]) << s->ps.sps->pixel_shift)]
1657     HEVCLocalContext *lc = &s->HEVClc;
1658     int merge_idx = 0;
1659     struct MvField current_mv = {{{ 0 }}};
1660
1661     int min_pu_width = s->ps.sps->min_pu_width;
1662
1663     MvField *tab_mvf = s->ref->tab_mvf;
1664     RefPicList  *refPicList = s->ref->refPicList;
1665     HEVCFrame *ref0, *ref1;
1666
1667     int tmpstride = MAX_PB_SIZE;
1668
1669     uint8_t *dst0 = POS(0, x0, y0);
1670     uint8_t *dst1 = POS(1, x0, y0);
1671     uint8_t *dst2 = POS(2, x0, y0);
1672     int log2_min_cb_size = s->ps.sps->log2_min_cb_size;
1673     int min_cb_width     = s->ps.sps->min_cb_width;
1674     int x_cb             = x0 >> log2_min_cb_size;
1675     int y_cb             = y0 >> log2_min_cb_size;
1676     int x_pu, y_pu;
1677     int i, j;
1678
1679     int skip_flag = SAMPLE_CTB(s->skip_flag, x_cb, y_cb);
1680
1681     if (!skip_flag)
1682         lc->pu.merge_flag = ff_hevc_merge_flag_decode(s);
1683
1684     if (skip_flag || lc->pu.merge_flag) {
1685         if (s->sh.max_num_merge_cand > 1)
1686             merge_idx = ff_hevc_merge_idx_decode(s);
1687         else
1688             merge_idx = 0;
1689
1690         ff_hevc_luma_mv_merge_mode(s, x0, y0, nPbW, nPbH, log2_cb_size,
1691                                    partIdx, merge_idx, &current_mv);
1692     } else {
1693         hevc_luma_mv_mpv_mode(s, x0, y0, nPbW, nPbH, log2_cb_size,
1694                               partIdx, merge_idx, &current_mv);
1695     }
1696
1697     x_pu = x0 >> s->ps.sps->log2_min_pu_size;
1698     y_pu = y0 >> s->ps.sps->log2_min_pu_size;
1699
1700     for (j = 0; j < nPbH >> s->ps.sps->log2_min_pu_size; j++)
1701         for (i = 0; i < nPbW >> s->ps.sps->log2_min_pu_size; i++)
1702             tab_mvf[(y_pu + j) * min_pu_width + x_pu + i] = current_mv;
1703
1704     if (current_mv.pred_flag[0]) {
1705         ref0 = refPicList[0].ref[current_mv.ref_idx[0]];
1706         if (!ref0)
1707             return;
1708         hevc_await_progress(s, ref0, &current_mv.mv[0], y0, nPbH);
1709     }
1710     if (current_mv.pred_flag[1]) {
1711         ref1 = refPicList[1].ref[current_mv.ref_idx[1]];
1712         if (!ref1)
1713             return;
1714         hevc_await_progress(s, ref1, &current_mv.mv[1], y0, nPbH);
1715     }
1716
1717     if (current_mv.pred_flag[0] && !current_mv.pred_flag[1]) {
1718         DECLARE_ALIGNED(16, int16_t,  tmp[MAX_PB_SIZE * MAX_PB_SIZE]);
1719         DECLARE_ALIGNED(16, int16_t, tmp2[MAX_PB_SIZE * MAX_PB_SIZE]);
1720
1721         luma_mc(s, tmp, tmpstride, ref0->frame,
1722                 &current_mv.mv[0], x0, y0, nPbW, nPbH);
1723
1724         if ((s->sh.slice_type == P_SLICE && s->ps.pps->weighted_pred_flag) ||
1725             (s->sh.slice_type == B_SLICE && s->ps.pps->weighted_bipred_flag)) {
1726             s->hevcdsp.weighted_pred(s->sh.luma_log2_weight_denom,
1727                                      s->sh.luma_weight_l0[current_mv.ref_idx[0]],
1728                                      s->sh.luma_offset_l0[current_mv.ref_idx[0]],
1729                                      dst0, s->frame->linesize[0], tmp,
1730                                      tmpstride, nPbW, nPbH);
1731         } else {
1732             s->hevcdsp.put_unweighted_pred(dst0, s->frame->linesize[0], tmp, tmpstride, nPbW, nPbH);
1733         }
1734         chroma_mc(s, tmp, tmp2, tmpstride, ref0->frame,
1735                   &current_mv.mv[0], x0 / 2, y0 / 2, nPbW / 2, nPbH / 2);
1736
1737         if ((s->sh.slice_type == P_SLICE && s->ps.pps->weighted_pred_flag) ||
1738             (s->sh.slice_type == B_SLICE && s->ps.pps->weighted_bipred_flag)) {
1739             s->hevcdsp.weighted_pred(s->sh.chroma_log2_weight_denom,
1740                                      s->sh.chroma_weight_l0[current_mv.ref_idx[0]][0],
1741                                      s->sh.chroma_offset_l0[current_mv.ref_idx[0]][0],
1742                                      dst1, s->frame->linesize[1], tmp, tmpstride,
1743                                      nPbW / 2, nPbH / 2);
1744             s->hevcdsp.weighted_pred(s->sh.chroma_log2_weight_denom,
1745                                      s->sh.chroma_weight_l0[current_mv.ref_idx[0]][1],
1746                                      s->sh.chroma_offset_l0[current_mv.ref_idx[0]][1],
1747                                      dst2, s->frame->linesize[2], tmp2, tmpstride,
1748                                      nPbW / 2, nPbH / 2);
1749         } else {
1750             s->hevcdsp.put_unweighted_pred(dst1, s->frame->linesize[1], tmp, tmpstride, nPbW/2, nPbH/2);
1751             s->hevcdsp.put_unweighted_pred(dst2, s->frame->linesize[2], tmp2, tmpstride, nPbW/2, nPbH/2);
1752         }
1753     } else if (!current_mv.pred_flag[0] && current_mv.pred_flag[1]) {
1754         DECLARE_ALIGNED(16, int16_t, tmp [MAX_PB_SIZE * MAX_PB_SIZE]);
1755         DECLARE_ALIGNED(16, int16_t, tmp2[MAX_PB_SIZE * MAX_PB_SIZE]);
1756
1757         luma_mc(s, tmp, tmpstride, ref1->frame,
1758                 &current_mv.mv[1], x0, y0, nPbW, nPbH);
1759
1760         if ((s->sh.slice_type == P_SLICE && s->ps.pps->weighted_pred_flag) ||
1761             (s->sh.slice_type == B_SLICE && s->ps.pps->weighted_bipred_flag)) {
1762             s->hevcdsp.weighted_pred(s->sh.luma_log2_weight_denom,
1763                                       s->sh.luma_weight_l1[current_mv.ref_idx[1]],
1764                                       s->sh.luma_offset_l1[current_mv.ref_idx[1]],
1765                                       dst0, s->frame->linesize[0], tmp, tmpstride,
1766                                       nPbW, nPbH);
1767         } else {
1768             s->hevcdsp.put_unweighted_pred(dst0, s->frame->linesize[0], tmp, tmpstride, nPbW, nPbH);
1769         }
1770
1771         chroma_mc(s, tmp, tmp2, tmpstride, ref1->frame,
1772                   &current_mv.mv[1], x0/2, y0/2, nPbW/2, nPbH/2);
1773
1774         if ((s->sh.slice_type == P_SLICE && s->ps.pps->weighted_pred_flag) ||
1775             (s->sh.slice_type == B_SLICE && s->ps.pps->weighted_bipred_flag)) {
1776             s->hevcdsp.weighted_pred(s->sh.chroma_log2_weight_denom,
1777                                      s->sh.chroma_weight_l1[current_mv.ref_idx[1]][0],
1778                                      s->sh.chroma_offset_l1[current_mv.ref_idx[1]][0],
1779                                      dst1, s->frame->linesize[1], tmp, tmpstride, nPbW/2, nPbH/2);
1780             s->hevcdsp.weighted_pred(s->sh.chroma_log2_weight_denom,
1781                                      s->sh.chroma_weight_l1[current_mv.ref_idx[1]][1],
1782                                      s->sh.chroma_offset_l1[current_mv.ref_idx[1]][1],
1783                                      dst2, s->frame->linesize[2], tmp2, tmpstride, nPbW/2, nPbH/2);
1784         } else {
1785             s->hevcdsp.put_unweighted_pred(dst1, s->frame->linesize[1], tmp, tmpstride, nPbW/2, nPbH/2);
1786             s->hevcdsp.put_unweighted_pred(dst2, s->frame->linesize[2], tmp2, tmpstride, nPbW/2, nPbH/2);
1787         }
1788     } else if (current_mv.pred_flag[0] && current_mv.pred_flag[1]) {
1789         DECLARE_ALIGNED(16, int16_t, tmp [MAX_PB_SIZE * MAX_PB_SIZE]);
1790         DECLARE_ALIGNED(16, int16_t, tmp2[MAX_PB_SIZE * MAX_PB_SIZE]);
1791         DECLARE_ALIGNED(16, int16_t, tmp3[MAX_PB_SIZE * MAX_PB_SIZE]);
1792         DECLARE_ALIGNED(16, int16_t, tmp4[MAX_PB_SIZE * MAX_PB_SIZE]);
1793
1794         luma_mc(s, tmp, tmpstride, ref0->frame,
1795                 &current_mv.mv[0], x0, y0, nPbW, nPbH);
1796         luma_mc(s, tmp2, tmpstride, ref1->frame,
1797                 &current_mv.mv[1], x0, y0, nPbW, nPbH);
1798
1799         if ((s->sh.slice_type == P_SLICE && s->ps.pps->weighted_pred_flag) ||
1800             (s->sh.slice_type == B_SLICE && s->ps.pps->weighted_bipred_flag)) {
1801             s->hevcdsp.weighted_pred_avg(s->sh.luma_log2_weight_denom,
1802                                          s->sh.luma_weight_l0[current_mv.ref_idx[0]],
1803                                          s->sh.luma_weight_l1[current_mv.ref_idx[1]],
1804                                          s->sh.luma_offset_l0[current_mv.ref_idx[0]],
1805                                          s->sh.luma_offset_l1[current_mv.ref_idx[1]],
1806                                          dst0, s->frame->linesize[0],
1807                                          tmp, tmp2, tmpstride, nPbW, nPbH);
1808         } else {
1809             s->hevcdsp.put_unweighted_pred_avg(dst0, s->frame->linesize[0],
1810                                                tmp, tmp2, tmpstride, nPbW, nPbH);
1811         }
1812
1813         chroma_mc(s, tmp, tmp2, tmpstride, ref0->frame,
1814                   &current_mv.mv[0], x0 / 2, y0 / 2, nPbW / 2, nPbH / 2);
1815         chroma_mc(s, tmp3, tmp4, tmpstride, ref1->frame,
1816                   &current_mv.mv[1], x0 / 2, y0 / 2, nPbW / 2, nPbH / 2);
1817
1818         if ((s->sh.slice_type == P_SLICE && s->ps.pps->weighted_pred_flag) ||
1819             (s->sh.slice_type == B_SLICE && s->ps.pps->weighted_bipred_flag)) {
1820             s->hevcdsp.weighted_pred_avg(s->sh.chroma_log2_weight_denom,
1821                                          s->sh.chroma_weight_l0[current_mv.ref_idx[0]][0],
1822                                          s->sh.chroma_weight_l1[current_mv.ref_idx[1]][0],
1823                                          s->sh.chroma_offset_l0[current_mv.ref_idx[0]][0],
1824                                          s->sh.chroma_offset_l1[current_mv.ref_idx[1]][0],
1825                                          dst1, s->frame->linesize[1], tmp, tmp3,
1826                                          tmpstride, nPbW / 2, nPbH / 2);
1827             s->hevcdsp.weighted_pred_avg(s->sh.chroma_log2_weight_denom,
1828                                          s->sh.chroma_weight_l0[current_mv.ref_idx[0]][1],
1829                                          s->sh.chroma_weight_l1[current_mv.ref_idx[1]][1],
1830                                          s->sh.chroma_offset_l0[current_mv.ref_idx[0]][1],
1831                                          s->sh.chroma_offset_l1[current_mv.ref_idx[1]][1],
1832                                          dst2, s->frame->linesize[2], tmp2, tmp4,
1833                                          tmpstride, nPbW / 2, nPbH / 2);
1834         } else {
1835             s->hevcdsp.put_unweighted_pred_avg(dst1, s->frame->linesize[1], tmp, tmp3, tmpstride, nPbW/2, nPbH/2);
1836             s->hevcdsp.put_unweighted_pred_avg(dst2, s->frame->linesize[2], tmp2, tmp4, tmpstride, nPbW/2, nPbH/2);
1837         }
1838     }
1839 }
1840
1841 /**
1842  * 8.4.1
1843  */
1844 static int luma_intra_pred_mode(HEVCContext *s, int x0, int y0, int pu_size,
1845                                 int prev_intra_luma_pred_flag)
1846 {
1847     HEVCLocalContext *lc = &s->HEVClc;
1848     int x_pu             = x0 >> s->ps.sps->log2_min_pu_size;
1849     int y_pu             = y0 >> s->ps.sps->log2_min_pu_size;
1850     int min_pu_width     = s->ps.sps->min_pu_width;
1851     int size_in_pus      = pu_size >> s->ps.sps->log2_min_pu_size;
1852     int x0b              = x0 & ((1 << s->ps.sps->log2_ctb_size) - 1);
1853     int y0b              = y0 & ((1 << s->ps.sps->log2_ctb_size) - 1);
1854
1855     int cand_up   = (lc->ctb_up_flag || y0b) ?
1856                     s->tab_ipm[(y_pu - 1) * min_pu_width + x_pu] : INTRA_DC;
1857     int cand_left = (lc->ctb_left_flag || x0b) ?
1858                     s->tab_ipm[y_pu * min_pu_width + x_pu - 1]   : INTRA_DC;
1859
1860     int y_ctb = (y0 >> (s->ps.sps->log2_ctb_size)) << (s->ps.sps->log2_ctb_size);
1861
1862     MvField *tab_mvf = s->ref->tab_mvf;
1863     int intra_pred_mode;
1864     int candidate[3];
1865     int i, j;
1866
1867     // intra_pred_mode prediction does not cross vertical CTB boundaries
1868     if ((y0 - 1) < y_ctb)
1869         cand_up = INTRA_DC;
1870
1871     if (cand_left == cand_up) {
1872         if (cand_left < 2) {
1873             candidate[0] = INTRA_PLANAR;
1874             candidate[1] = INTRA_DC;
1875             candidate[2] = INTRA_ANGULAR_26;
1876         } else {
1877             candidate[0] = cand_left;
1878             candidate[1] = 2 + ((cand_left - 2 - 1 + 32) & 31);
1879             candidate[2] = 2 + ((cand_left - 2 + 1) & 31);
1880         }
1881     } else {
1882         candidate[0] = cand_left;
1883         candidate[1] = cand_up;
1884         if (candidate[0] != INTRA_PLANAR && candidate[1] != INTRA_PLANAR) {
1885             candidate[2] = INTRA_PLANAR;
1886         } else if (candidate[0] != INTRA_DC && candidate[1] != INTRA_DC) {
1887             candidate[2] = INTRA_DC;
1888         } else {
1889             candidate[2] = INTRA_ANGULAR_26;
1890         }
1891     }
1892
1893     if (prev_intra_luma_pred_flag) {
1894         intra_pred_mode = candidate[lc->pu.mpm_idx];
1895     } else {
1896         if (candidate[0] > candidate[1])
1897             FFSWAP(uint8_t, candidate[0], candidate[1]);
1898         if (candidate[0] > candidate[2])
1899             FFSWAP(uint8_t, candidate[0], candidate[2]);
1900         if (candidate[1] > candidate[2])
1901             FFSWAP(uint8_t, candidate[1], candidate[2]);
1902
1903         intra_pred_mode = lc->pu.rem_intra_luma_pred_mode;
1904         for (i = 0; i < 3; i++)
1905             if (intra_pred_mode >= candidate[i])
1906                 intra_pred_mode++;
1907     }
1908
1909     /* write the intra prediction units into the mv array */
1910     if (!size_in_pus)
1911         size_in_pus = 1;
1912     for (i = 0; i < size_in_pus; i++) {
1913         memset(&s->tab_ipm[(y_pu + i) * min_pu_width + x_pu],
1914                intra_pred_mode, size_in_pus);
1915
1916         for (j = 0; j < size_in_pus; j++) {
1917             tab_mvf[(y_pu + j) * min_pu_width + x_pu + i].is_intra     = 1;
1918             tab_mvf[(y_pu + j) * min_pu_width + x_pu + i].pred_flag[0] = 0;
1919             tab_mvf[(y_pu + j) * min_pu_width + x_pu + i].pred_flag[1] = 0;
1920             tab_mvf[(y_pu + j) * min_pu_width + x_pu + i].ref_idx[0]   = 0;
1921             tab_mvf[(y_pu + j) * min_pu_width + x_pu + i].ref_idx[1]   = 0;
1922             tab_mvf[(y_pu + j) * min_pu_width + x_pu + i].mv[0].x      = 0;
1923             tab_mvf[(y_pu + j) * min_pu_width + x_pu + i].mv[0].y      = 0;
1924             tab_mvf[(y_pu + j) * min_pu_width + x_pu + i].mv[1].x      = 0;
1925             tab_mvf[(y_pu + j) * min_pu_width + x_pu + i].mv[1].y      = 0;
1926         }
1927     }
1928
1929     return intra_pred_mode;
1930 }
1931
1932 static av_always_inline void set_ct_depth(HEVCContext *s, int x0, int y0,
1933                                           int log2_cb_size, int ct_depth)
1934 {
1935     int length = (1 << log2_cb_size) >> s->ps.sps->log2_min_cb_size;
1936     int x_cb   = x0 >> s->ps.sps->log2_min_cb_size;
1937     int y_cb   = y0 >> s->ps.sps->log2_min_cb_size;
1938     int y;
1939
1940     for (y = 0; y < length; y++)
1941         memset(&s->tab_ct_depth[(y_cb + y) * s->ps.sps->min_cb_width + x_cb],
1942                ct_depth, length);
1943 }
1944
1945 static void intra_prediction_unit(HEVCContext *s, int x0, int y0,
1946                                   int log2_cb_size)
1947 {
1948     HEVCLocalContext *lc = &s->HEVClc;
1949     static const uint8_t intra_chroma_table[4] = { 0, 26, 10, 1 };
1950     uint8_t prev_intra_luma_pred_flag[4];
1951     int split   = lc->cu.part_mode == PART_NxN;
1952     int pb_size = (1 << log2_cb_size) >> split;
1953     int side    = split + 1;
1954     int chroma_mode;
1955     int i, j;
1956
1957     for (i = 0; i < side; i++)
1958         for (j = 0; j < side; j++)
1959             prev_intra_luma_pred_flag[2 * i + j] = ff_hevc_prev_intra_luma_pred_flag_decode(s);
1960
1961     for (i = 0; i < side; i++) {
1962         for (j = 0; j < side; j++) {
1963             if (prev_intra_luma_pred_flag[2 * i + j])
1964                 lc->pu.mpm_idx = ff_hevc_mpm_idx_decode(s);
1965             else
1966                 lc->pu.rem_intra_luma_pred_mode = ff_hevc_rem_intra_luma_pred_mode_decode(s);
1967
1968             lc->pu.intra_pred_mode[2 * i + j] =
1969                 luma_intra_pred_mode(s, x0 + pb_size * j, y0 + pb_size * i, pb_size,
1970                                      prev_intra_luma_pred_flag[2 * i + j]);
1971         }
1972     }
1973
1974     chroma_mode = ff_hevc_intra_chroma_pred_mode_decode(s);
1975     if (chroma_mode != 4) {
1976         if (lc->pu.intra_pred_mode[0] == intra_chroma_table[chroma_mode])
1977             lc->pu.intra_pred_mode_c = 34;
1978         else
1979             lc->pu.intra_pred_mode_c = intra_chroma_table[chroma_mode];
1980     } else {
1981         lc->pu.intra_pred_mode_c = lc->pu.intra_pred_mode[0];
1982     }
1983 }
1984
1985 static void intra_prediction_unit_default_value(HEVCContext *s,
1986                                                 int x0, int y0,
1987                                                 int log2_cb_size)
1988 {
1989     HEVCLocalContext *lc = &s->HEVClc;
1990     int pb_size          = 1 << log2_cb_size;
1991     int size_in_pus      = pb_size >> s->ps.sps->log2_min_pu_size;
1992     int min_pu_width     = s->ps.sps->min_pu_width;
1993     MvField *tab_mvf     = s->ref->tab_mvf;
1994     int x_pu             = x0 >> s->ps.sps->log2_min_pu_size;
1995     int y_pu             = y0 >> s->ps.sps->log2_min_pu_size;
1996     int j, k;
1997
1998     if (size_in_pus == 0)
1999         size_in_pus = 1;
2000     for (j = 0; j < size_in_pus; j++) {
2001         memset(&s->tab_ipm[(y_pu + j) * min_pu_width + x_pu], INTRA_DC, size_in_pus);
2002         for (k = 0; k < size_in_pus; k++)
2003             tab_mvf[(y_pu + j) * min_pu_width + x_pu + k].is_intra = lc->cu.pred_mode == MODE_INTRA;
2004     }
2005 }
2006
2007 static int hls_coding_unit(HEVCContext *s, int x0, int y0, int log2_cb_size)
2008 {
2009     int cb_size          = 1 << log2_cb_size;
2010     HEVCLocalContext *lc = &s->HEVClc;
2011     int log2_min_cb_size = s->ps.sps->log2_min_cb_size;
2012     int length           = cb_size >> log2_min_cb_size;
2013     int min_cb_width     = s->ps.sps->min_cb_width;
2014     int x_cb             = x0 >> log2_min_cb_size;
2015     int y_cb             = y0 >> log2_min_cb_size;
2016     int x, y, ret;
2017
2018     lc->cu.x                = x0;
2019     lc->cu.y                = y0;
2020     lc->cu.pred_mode        = MODE_INTRA;
2021     lc->cu.part_mode        = PART_2Nx2N;
2022     lc->cu.intra_split_flag = 0;
2023
2024     SAMPLE_CTB(s->skip_flag, x_cb, y_cb) = 0;
2025     for (x = 0; x < 4; x++)
2026         lc->pu.intra_pred_mode[x] = 1;
2027     if (s->ps.pps->transquant_bypass_enable_flag) {
2028         lc->cu.cu_transquant_bypass_flag = ff_hevc_cu_transquant_bypass_flag_decode(s);
2029         if (lc->cu.cu_transquant_bypass_flag)
2030             set_deblocking_bypass(s, x0, y0, log2_cb_size);
2031     } else
2032         lc->cu.cu_transquant_bypass_flag = 0;
2033
2034     if (s->sh.slice_type != I_SLICE) {
2035         uint8_t skip_flag = ff_hevc_skip_flag_decode(s, x0, y0, x_cb, y_cb);
2036
2037         x = y_cb * min_cb_width + x_cb;
2038         for (y = 0; y < length; y++) {
2039             memset(&s->skip_flag[x], skip_flag, length);
2040             x += min_cb_width;
2041         }
2042         lc->cu.pred_mode = skip_flag ? MODE_SKIP : MODE_INTER;
2043     }
2044
2045     if (SAMPLE_CTB(s->skip_flag, x_cb, y_cb)) {
2046         hls_prediction_unit(s, x0, y0, cb_size, cb_size, log2_cb_size, 0);
2047         intra_prediction_unit_default_value(s, x0, y0, log2_cb_size);
2048
2049         if (!s->sh.disable_deblocking_filter_flag)
2050             ff_hevc_deblocking_boundary_strengths(s, x0, y0, log2_cb_size);
2051     } else {
2052         int pcm_flag = 0;
2053
2054         if (s->sh.slice_type != I_SLICE)
2055             lc->cu.pred_mode = ff_hevc_pred_mode_decode(s);
2056         if (lc->cu.pred_mode != MODE_INTRA ||
2057             log2_cb_size == s->ps.sps->log2_min_cb_size) {
2058             lc->cu.part_mode        = ff_hevc_part_mode_decode(s, log2_cb_size);
2059             lc->cu.intra_split_flag = lc->cu.part_mode == PART_NxN &&
2060                                       lc->cu.pred_mode == MODE_INTRA;
2061         }
2062
2063         if (lc->cu.pred_mode == MODE_INTRA) {
2064             if (lc->cu.part_mode == PART_2Nx2N && s->ps.sps->pcm_enabled_flag &&
2065                 log2_cb_size >= s->ps.sps->pcm.log2_min_pcm_cb_size &&
2066                 log2_cb_size <= s->ps.sps->pcm.log2_max_pcm_cb_size) {
2067                 pcm_flag = ff_hevc_pcm_flag_decode(s);
2068             }
2069             if (pcm_flag) {
2070                 intra_prediction_unit_default_value(s, x0, y0, log2_cb_size);
2071                 ret = hls_pcm_sample(s, x0, y0, log2_cb_size);
2072                 if (s->ps.sps->pcm.loop_filter_disable_flag)
2073                     set_deblocking_bypass(s, x0, y0, log2_cb_size);
2074
2075                 if (ret < 0)
2076                     return ret;
2077             } else {
2078                 intra_prediction_unit(s, x0, y0, log2_cb_size);
2079             }
2080         } else {
2081             intra_prediction_unit_default_value(s, x0, y0, log2_cb_size);
2082             switch (lc->cu.part_mode) {
2083             case PART_2Nx2N:
2084                 hls_prediction_unit(s, x0, y0, cb_size, cb_size, log2_cb_size, 0);
2085                 break;
2086             case PART_2NxN:
2087                 hls_prediction_unit(s, x0, y0,               cb_size, cb_size / 2, log2_cb_size, 0);
2088                 hls_prediction_unit(s, x0, y0 + cb_size / 2, cb_size, cb_size / 2, log2_cb_size, 1);
2089                 break;
2090             case PART_Nx2N:
2091                 hls_prediction_unit(s, x0,               y0, cb_size / 2, cb_size, log2_cb_size, 0);
2092                 hls_prediction_unit(s, x0 + cb_size / 2, y0, cb_size / 2, cb_size, log2_cb_size, 1);
2093                 break;
2094             case PART_2NxnU:
2095                 hls_prediction_unit(s, x0, y0,               cb_size, cb_size     / 4, log2_cb_size, 0);
2096                 hls_prediction_unit(s, x0, y0 + cb_size / 4, cb_size, cb_size * 3 / 4, log2_cb_size, 1);
2097                 break;
2098             case PART_2NxnD:
2099                 hls_prediction_unit(s, x0, y0,                   cb_size, cb_size * 3 / 4, log2_cb_size, 0);
2100                 hls_prediction_unit(s, x0, y0 + cb_size * 3 / 4, cb_size, cb_size     / 4, log2_cb_size, 1);
2101                 break;
2102             case PART_nLx2N:
2103                 hls_prediction_unit(s, x0,               y0, cb_size     / 4, cb_size, log2_cb_size, 0);
2104                 hls_prediction_unit(s, x0 + cb_size / 4, y0, cb_size * 3 / 4, cb_size, log2_cb_size, 1);
2105                 break;
2106             case PART_nRx2N:
2107                 hls_prediction_unit(s, x0,                   y0, cb_size * 3 / 4, cb_size, log2_cb_size, 0);
2108                 hls_prediction_unit(s, x0 + cb_size * 3 / 4, y0, cb_size     / 4, cb_size, log2_cb_size, 1);
2109                 break;
2110             case PART_NxN:
2111                 hls_prediction_unit(s, x0,               y0,               cb_size / 2, cb_size / 2, log2_cb_size, 0);
2112                 hls_prediction_unit(s, x0 + cb_size / 2, y0,               cb_size / 2, cb_size / 2, log2_cb_size, 1);
2113                 hls_prediction_unit(s, x0,               y0 + cb_size / 2, cb_size / 2, cb_size / 2, log2_cb_size, 2);
2114                 hls_prediction_unit(s, x0 + cb_size / 2, y0 + cb_size / 2, cb_size / 2, cb_size / 2, log2_cb_size, 3);
2115                 break;
2116             }
2117         }
2118
2119         if (!pcm_flag) {
2120             int rqt_root_cbf = 1;
2121
2122             if (lc->cu.pred_mode != MODE_INTRA &&
2123                 !(lc->cu.part_mode == PART_2Nx2N && lc->pu.merge_flag)) {
2124                 rqt_root_cbf = ff_hevc_no_residual_syntax_flag_decode(s);
2125             }
2126             if (rqt_root_cbf) {
2127                 lc->cu.max_trafo_depth = lc->cu.pred_mode == MODE_INTRA ?
2128                                          s->ps.sps->max_transform_hierarchy_depth_intra + lc->cu.intra_split_flag :
2129                                          s->ps.sps->max_transform_hierarchy_depth_inter;
2130                 ret = hls_transform_tree(s, x0, y0, x0, y0, x0, y0,
2131                                          log2_cb_size,
2132                                          log2_cb_size, 0, 0, 0, 0);
2133                 if (ret < 0)
2134                     return ret;
2135             } else {
2136                 if (!s->sh.disable_deblocking_filter_flag)
2137                     ff_hevc_deblocking_boundary_strengths(s, x0, y0, log2_cb_size);
2138             }
2139         }
2140     }
2141
2142     if (s->ps.pps->cu_qp_delta_enabled_flag && lc->tu.is_cu_qp_delta_coded == 0)
2143         ff_hevc_set_qPy(s, x0, y0, x0, y0, log2_cb_size);
2144
2145     x = y_cb * min_cb_width + x_cb;
2146     for (y = 0; y < length; y++) {
2147         memset(&s->qp_y_tab[x], lc->qp_y, length);
2148         x += min_cb_width;
2149     }
2150
2151     set_ct_depth(s, x0, y0, log2_cb_size, lc->ct.depth);
2152
2153     return 0;
2154 }
2155
2156 static int hls_coding_quadtree(HEVCContext *s, int x0, int y0,
2157                                int log2_cb_size, int cb_depth)
2158 {
2159     HEVCLocalContext *lc = &s->HEVClc;
2160     const int cb_size    = 1 << log2_cb_size;
2161     int split_cu;
2162
2163     lc->ct.depth = cb_depth;
2164     if (x0 + cb_size <= s->ps.sps->width  &&
2165         y0 + cb_size <= s->ps.sps->height &&
2166         log2_cb_size > s->ps.sps->log2_min_cb_size) {
2167         split_cu = ff_hevc_split_coding_unit_flag_decode(s, cb_depth, x0, y0);
2168     } else {
2169         split_cu = (log2_cb_size > s->ps.sps->log2_min_cb_size);
2170     }
2171     if (s->ps.pps->cu_qp_delta_enabled_flag &&
2172         log2_cb_size >= s->ps.sps->log2_ctb_size - s->ps.pps->diff_cu_qp_delta_depth) {
2173         lc->tu.is_cu_qp_delta_coded = 0;
2174         lc->tu.cu_qp_delta          = 0;
2175     }
2176
2177     if (split_cu) {
2178         const int cb_size_split = cb_size >> 1;
2179         const int x1 = x0 + cb_size_split;
2180         const int y1 = y0 + cb_size_split;
2181
2182         log2_cb_size--;
2183         cb_depth++;
2184
2185 #define SUBDIVIDE(x, y)                                                \
2186 do {                                                                   \
2187     if (x < s->ps.sps->width && y < s->ps.sps->height) {                     \
2188         int ret = hls_coding_quadtree(s, x, y, log2_cb_size, cb_depth);\
2189         if (ret < 0)                                                   \
2190             return ret;                                                \
2191     }                                                                  \
2192 } while (0)
2193
2194         SUBDIVIDE(x0, y0);
2195         SUBDIVIDE(x1, y0);
2196         SUBDIVIDE(x0, y1);
2197         SUBDIVIDE(x1, y1);
2198     } else {
2199         int ret = hls_coding_unit(s, x0, y0, log2_cb_size);
2200         if (ret < 0)
2201             return ret;
2202     }
2203
2204     return 0;
2205 }
2206
2207 static void hls_decode_neighbour(HEVCContext *s, int x_ctb, int y_ctb,
2208                                  int ctb_addr_ts)
2209 {
2210     HEVCLocalContext *lc  = &s->HEVClc;
2211     int ctb_size          = 1 << s->ps.sps->log2_ctb_size;
2212     int ctb_addr_rs       = s->ps.pps->ctb_addr_ts_to_rs[ctb_addr_ts];
2213     int ctb_addr_in_slice = ctb_addr_rs - s->sh.slice_addr;
2214
2215     s->tab_slice_address[ctb_addr_rs] = s->sh.slice_addr;
2216
2217     if (s->ps.pps->entropy_coding_sync_enabled_flag) {
2218         if (x_ctb == 0 && (y_ctb & (ctb_size - 1)) == 0)
2219             lc->first_qp_group = 1;
2220         lc->end_of_tiles_x = s->ps.sps->width;
2221     } else if (s->ps.pps->tiles_enabled_flag) {
2222         if (ctb_addr_ts && s->ps.pps->tile_id[ctb_addr_ts] != s->ps.pps->tile_id[ctb_addr_ts - 1]) {
2223             int idxX = s->ps.pps->col_idxX[x_ctb >> s->ps.sps->log2_ctb_size];
2224             lc->start_of_tiles_x = x_ctb;
2225             lc->end_of_tiles_x   = x_ctb + (s->ps.pps->column_width[idxX] << s->ps.sps->log2_ctb_size);
2226             lc->first_qp_group   = 1;
2227         }
2228     } else {
2229         lc->end_of_tiles_x = s->ps.sps->width;
2230     }
2231
2232     lc->end_of_tiles_y = FFMIN(y_ctb + ctb_size, s->ps.sps->height);
2233
2234     lc->boundary_flags = 0;
2235     if (s->ps.pps->tiles_enabled_flag) {
2236         if (x_ctb > 0 && s->ps.pps->tile_id[ctb_addr_ts] != s->ps.pps->tile_id[s->ps.pps->ctb_addr_rs_to_ts[ctb_addr_rs - 1]])
2237             lc->boundary_flags |= BOUNDARY_LEFT_TILE;
2238         if (x_ctb > 0 && s->tab_slice_address[ctb_addr_rs] != s->tab_slice_address[ctb_addr_rs - 1])
2239             lc->boundary_flags |= BOUNDARY_LEFT_SLICE;
2240         if (y_ctb > 0 && s->ps.pps->tile_id[ctb_addr_ts] != s->ps.pps->tile_id[s->ps.pps->ctb_addr_rs_to_ts[ctb_addr_rs - s->ps.sps->ctb_width]])
2241             lc->boundary_flags |= BOUNDARY_UPPER_TILE;
2242         if (y_ctb > 0 && s->tab_slice_address[ctb_addr_rs] != s->tab_slice_address[ctb_addr_rs - s->ps.sps->ctb_width])
2243             lc->boundary_flags |= BOUNDARY_UPPER_SLICE;
2244     } else {
2245         if (!ctb_addr_in_slice > 0)
2246             lc->boundary_flags |= BOUNDARY_LEFT_SLICE;
2247         if (ctb_addr_in_slice < s->ps.sps->ctb_width)
2248             lc->boundary_flags |= BOUNDARY_UPPER_SLICE;
2249     }
2250
2251     lc->ctb_left_flag = ((x_ctb > 0) && (ctb_addr_in_slice > 0) && !(lc->boundary_flags & BOUNDARY_LEFT_TILE));
2252     lc->ctb_up_flag   = ((y_ctb > 0) && (ctb_addr_in_slice >= s->ps.sps->ctb_width) && !(lc->boundary_flags & BOUNDARY_UPPER_TILE));
2253     lc->ctb_up_right_flag = ((y_ctb > 0)  && (ctb_addr_in_slice+1 >= s->ps.sps->ctb_width) && (s->ps.pps->tile_id[ctb_addr_ts] == s->ps.pps->tile_id[s->ps.pps->ctb_addr_rs_to_ts[ctb_addr_rs+1 - s->ps.sps->ctb_width]]));
2254     lc->ctb_up_left_flag = ((x_ctb > 0) && (y_ctb > 0)  && (ctb_addr_in_slice-1 >= s->ps.sps->ctb_width) && (s->ps.pps->tile_id[ctb_addr_ts] == s->ps.pps->tile_id[s->ps.pps->ctb_addr_rs_to_ts[ctb_addr_rs-1 - s->ps.sps->ctb_width]]));
2255 }
2256
2257 static int hls_slice_data(HEVCContext *s)
2258 {
2259     int ctb_size    = 1 << s->ps.sps->log2_ctb_size;
2260     int more_data   = 1;
2261     int x_ctb       = 0;
2262     int y_ctb       = 0;
2263     int ctb_addr_ts = s->ps.pps->ctb_addr_rs_to_ts[s->sh.slice_ctb_addr_rs];
2264     int ret;
2265
2266     while (more_data && ctb_addr_ts < s->ps.sps->ctb_size) {
2267         int ctb_addr_rs = s->ps.pps->ctb_addr_ts_to_rs[ctb_addr_ts];
2268
2269         x_ctb = (ctb_addr_rs % ((s->ps.sps->width + ctb_size - 1) >> s->ps.sps->log2_ctb_size)) << s->ps.sps->log2_ctb_size;
2270         y_ctb = (ctb_addr_rs / ((s->ps.sps->width + ctb_size - 1) >> s->ps.sps->log2_ctb_size)) << s->ps.sps->log2_ctb_size;
2271         hls_decode_neighbour(s, x_ctb, y_ctb, ctb_addr_ts);
2272
2273         ff_hevc_cabac_init(s, ctb_addr_ts);
2274
2275         hls_sao_param(s, x_ctb >> s->ps.sps->log2_ctb_size, y_ctb >> s->ps.sps->log2_ctb_size);
2276
2277         s->deblock[ctb_addr_rs].beta_offset = s->sh.beta_offset;
2278         s->deblock[ctb_addr_rs].tc_offset   = s->sh.tc_offset;
2279         s->filter_slice_edges[ctb_addr_rs]  = s->sh.slice_loop_filter_across_slices_enabled_flag;
2280
2281         ret = hls_coding_quadtree(s, x_ctb, y_ctb, s->ps.sps->log2_ctb_size, 0);
2282         if (ret < 0)
2283             return ret;
2284         more_data = !ff_hevc_end_of_slice_flag_decode(s);
2285
2286         ctb_addr_ts++;
2287         ff_hevc_save_states(s, ctb_addr_ts);
2288         ff_hevc_hls_filters(s, x_ctb, y_ctb, ctb_size);
2289     }
2290
2291     if (x_ctb + ctb_size >= s->ps.sps->width &&
2292         y_ctb + ctb_size >= s->ps.sps->height)
2293         ff_hevc_hls_filter(s, x_ctb, y_ctb);
2294
2295     return ctb_addr_ts;
2296 }
2297
2298 static void restore_tqb_pixels(HEVCContext *s)
2299 {
2300     int min_pu_size = 1 << s->ps.sps->log2_min_pu_size;
2301     int x, y, c_idx;
2302
2303     for (c_idx = 0; c_idx < 3; c_idx++) {
2304         ptrdiff_t stride = s->frame->linesize[c_idx];
2305         int hshift       = s->ps.sps->hshift[c_idx];
2306         int vshift       = s->ps.sps->vshift[c_idx];
2307         for (y = 0; y < s->ps.sps->min_pu_height; y++) {
2308             for (x = 0; x < s->ps.sps->min_pu_width; x++) {
2309                 if (s->is_pcm[y * s->ps.sps->min_pu_width + x]) {
2310                     int n;
2311                     int len      = min_pu_size >> hshift;
2312                     uint8_t *src = &s->frame->data[c_idx][((y << s->ps.sps->log2_min_pu_size) >> vshift) * stride + (((x << s->ps.sps->log2_min_pu_size) >> hshift) << s->ps.sps->pixel_shift)];
2313                     uint8_t *dst = &s->sao_frame->data[c_idx][((y << s->ps.sps->log2_min_pu_size) >> vshift) * stride + (((x << s->ps.sps->log2_min_pu_size) >> hshift) << s->ps.sps->pixel_shift)];
2314                     for (n = 0; n < (min_pu_size >> vshift); n++) {
2315                         memcpy(dst, src, len);
2316                         src += stride;
2317                         dst += stride;
2318                     }
2319                 }
2320             }
2321         }
2322     }
2323 }
2324
2325 static int set_side_data(HEVCContext *s)
2326 {
2327     AVFrame *out = s->ref->frame;
2328
2329     if (s->sei_frame_packing_present &&
2330         s->frame_packing_arrangement_type >= 3 &&
2331         s->frame_packing_arrangement_type <= 5 &&
2332         s->content_interpretation_type > 0 &&
2333         s->content_interpretation_type < 3) {
2334         AVStereo3D *stereo = av_stereo3d_create_side_data(out);
2335         if (!stereo)
2336             return AVERROR(ENOMEM);
2337
2338         switch (s->frame_packing_arrangement_type) {
2339         case 3:
2340             if (s->quincunx_subsampling)
2341                 stereo->type = AV_STEREO3D_SIDEBYSIDE_QUINCUNX;
2342             else
2343                 stereo->type = AV_STEREO3D_SIDEBYSIDE;
2344             break;
2345         case 4:
2346             stereo->type = AV_STEREO3D_TOPBOTTOM;
2347             break;
2348         case 5:
2349             stereo->type = AV_STEREO3D_FRAMESEQUENCE;
2350             break;
2351         }
2352
2353         if (s->content_interpretation_type == 2)
2354             stereo->flags = AV_STEREO3D_FLAG_INVERT;
2355     }
2356
2357     if (s->sei_display_orientation_present &&
2358         (s->sei_anticlockwise_rotation || s->sei_hflip || s->sei_vflip)) {
2359         double angle = s->sei_anticlockwise_rotation * 360 / (double) (1 << 16);
2360         AVFrameSideData *rotation = av_frame_new_side_data(out,
2361                                                            AV_FRAME_DATA_DISPLAYMATRIX,
2362                                                            sizeof(int32_t) * 9);
2363         if (!rotation)
2364             return AVERROR(ENOMEM);
2365
2366         av_display_rotation_set((int32_t *)rotation->data, angle);
2367         av_display_matrix_flip((int32_t *)rotation->data,
2368                                s->sei_hflip, s->sei_vflip);
2369     }
2370
2371     return 0;
2372 }
2373
2374 static int hevc_frame_start(HEVCContext *s)
2375 {
2376     HEVCLocalContext *lc = &s->HEVClc;
2377     int ret;
2378
2379     memset(s->horizontal_bs, 0, 2 * s->bs_width * (s->bs_height + 1));
2380     memset(s->vertical_bs,   0, 2 * s->bs_width * (s->bs_height + 1));
2381     memset(s->cbf_luma,      0, s->ps.sps->min_tb_width * s->ps.sps->min_tb_height);
2382     memset(s->is_pcm,        0, s->ps.sps->min_pu_width * s->ps.sps->min_pu_height);
2383
2384     lc->start_of_tiles_x = 0;
2385     s->is_decoded        = 0;
2386     s->first_nal_type    = s->nal_unit_type;
2387
2388     if (s->ps.pps->tiles_enabled_flag)
2389         lc->end_of_tiles_x = s->ps.pps->column_width[0] << s->ps.sps->log2_ctb_size;
2390
2391     ret = ff_hevc_set_new_ref(s, s->ps.sps->sao_enabled ? &s->sao_frame : &s->frame,
2392                               s->poc);
2393     if (ret < 0)
2394         goto fail;
2395
2396     ret = ff_hevc_frame_rps(s);
2397     if (ret < 0) {
2398         av_log(s->avctx, AV_LOG_ERROR, "Error constructing the frame RPS.\n");
2399         goto fail;
2400     }
2401
2402     s->ref->frame->key_frame = IS_IRAP(s);
2403
2404     ret = set_side_data(s);
2405     if (ret < 0)
2406         goto fail;
2407
2408     av_frame_unref(s->output_frame);
2409     ret = ff_hevc_output_frame(s, s->output_frame, 0);
2410     if (ret < 0)
2411         goto fail;
2412
2413     ff_thread_finish_setup(s->avctx);
2414
2415     return 0;
2416
2417 fail:
2418     if (s->ref)
2419         ff_hevc_unref_frame(s, s->ref, ~0);
2420     s->ref = NULL;
2421     return ret;
2422 }
2423
2424 static int decode_nal_unit(HEVCContext *s, const HEVCNAL *nal)
2425 {
2426     HEVCLocalContext *lc = &s->HEVClc;
2427     GetBitContext *gb    = &lc->gb;
2428     int ctb_addr_ts, ret;
2429
2430     *gb              = nal->gb;
2431     s->nal_unit_type = nal->type;
2432     s->temporal_id   = nal->temporal_id;
2433
2434     switch (s->nal_unit_type) {
2435     case NAL_VPS:
2436         ret = ff_hevc_decode_nal_vps(gb, s->avctx, &s->ps);
2437         if (ret < 0)
2438             goto fail;
2439         break;
2440     case NAL_SPS:
2441         ret = ff_hevc_decode_nal_sps(gb, s->avctx, &s->ps,
2442                                      s->apply_defdispwin);
2443         if (ret < 0)
2444             goto fail;
2445         break;
2446     case NAL_PPS:
2447         ret = ff_hevc_decode_nal_pps(gb, s->avctx, &s->ps);
2448         if (ret < 0)
2449             goto fail;
2450         break;
2451     case NAL_SEI_PREFIX:
2452     case NAL_SEI_SUFFIX:
2453         ret = ff_hevc_decode_nal_sei(s);
2454         if (ret < 0)
2455             goto fail;
2456         break;
2457     case NAL_TRAIL_R:
2458     case NAL_TRAIL_N:
2459     case NAL_TSA_N:
2460     case NAL_TSA_R:
2461     case NAL_STSA_N:
2462     case NAL_STSA_R:
2463     case NAL_BLA_W_LP:
2464     case NAL_BLA_W_RADL:
2465     case NAL_BLA_N_LP:
2466     case NAL_IDR_W_RADL:
2467     case NAL_IDR_N_LP:
2468     case NAL_CRA_NUT:
2469     case NAL_RADL_N:
2470     case NAL_RADL_R:
2471     case NAL_RASL_N:
2472     case NAL_RASL_R:
2473         ret = hls_slice_header(s);
2474         if (ret < 0)
2475             return ret;
2476
2477         if (s->max_ra == INT_MAX) {
2478             if (s->nal_unit_type == NAL_CRA_NUT || IS_BLA(s)) {
2479                 s->max_ra = s->poc;
2480             } else {
2481                 if (IS_IDR(s))
2482                     s->max_ra = INT_MIN;
2483             }
2484         }
2485
2486         if ((s->nal_unit_type == NAL_RASL_R || s->nal_unit_type == NAL_RASL_N) &&
2487             s->poc <= s->max_ra) {
2488             s->is_decoded = 0;
2489             break;
2490         } else {
2491             if (s->nal_unit_type == NAL_RASL_R && s->poc > s->max_ra)
2492                 s->max_ra = INT_MIN;
2493         }
2494
2495         if (s->sh.first_slice_in_pic_flag) {
2496             ret = hevc_frame_start(s);
2497             if (ret < 0)
2498                 return ret;
2499         } else if (!s->ref) {
2500             av_log(s->avctx, AV_LOG_ERROR, "First slice in a frame missing.\n");
2501             goto fail;
2502         }
2503
2504         if (s->nal_unit_type != s->first_nal_type) {
2505             av_log(s->avctx, AV_LOG_ERROR,
2506                    "Non-matching NAL types of the VCL NALUs: %d %d\n",
2507                    s->first_nal_type, s->nal_unit_type);
2508             return AVERROR_INVALIDDATA;
2509         }
2510
2511         if (!s->sh.dependent_slice_segment_flag &&
2512             s->sh.slice_type != I_SLICE) {
2513             ret = ff_hevc_slice_rpl(s);
2514             if (ret < 0) {
2515                 av_log(s->avctx, AV_LOG_WARNING,
2516                        "Error constructing the reference lists for the current slice.\n");
2517                 goto fail;
2518             }
2519         }
2520
2521         if (s->sh.first_slice_in_pic_flag && s->avctx->hwaccel) {
2522             ret = s->avctx->hwaccel->start_frame(s->avctx, NULL, 0);
2523             if (ret < 0)
2524                 goto fail;
2525         }
2526
2527         if (s->avctx->hwaccel) {
2528             ret = s->avctx->hwaccel->decode_slice(s->avctx, nal->raw_data, nal->raw_size);
2529             if (ret < 0)
2530                 goto fail;
2531         } else {
2532             ctb_addr_ts = hls_slice_data(s);
2533             if (ctb_addr_ts >= (s->ps.sps->ctb_width * s->ps.sps->ctb_height)) {
2534                 s->is_decoded = 1;
2535                 if ((s->ps.pps->transquant_bypass_enable_flag ||
2536                      (s->ps.sps->pcm.loop_filter_disable_flag && s->ps.sps->pcm_enabled_flag)) &&
2537                     s->ps.sps->sao_enabled)
2538                     restore_tqb_pixels(s);
2539             }
2540
2541             if (ctb_addr_ts < 0) {
2542                 ret = ctb_addr_ts;
2543                 goto fail;
2544             }
2545         }
2546         break;
2547     case NAL_EOS_NUT:
2548     case NAL_EOB_NUT:
2549         s->seq_decode = (s->seq_decode + 1) & 0xff;
2550         s->max_ra     = INT_MAX;
2551         break;
2552     case NAL_AUD:
2553     case NAL_FD_NUT:
2554         break;
2555     default:
2556         av_log(s->avctx, AV_LOG_INFO,
2557                "Skipping NAL unit %d\n", s->nal_unit_type);
2558     }
2559
2560     return 0;
2561 fail:
2562     if (s->avctx->err_recognition & AV_EF_EXPLODE)
2563         return ret;
2564     return 0;
2565 }
2566
2567 static int decode_nal_units(HEVCContext *s, const uint8_t *buf, int length)
2568 {
2569     int i, ret = 0;
2570
2571     s->ref = NULL;
2572     s->eos = 0;
2573
2574     /* split the input packet into NAL units, so we know the upper bound on the
2575      * number of slices in the frame */
2576     ret = ff_hevc_split_packet(&s->pkt, buf, length, s->avctx, s->is_nalff,
2577                                s->nal_length_size);
2578     if (ret < 0) {
2579         av_log(s->avctx, AV_LOG_ERROR,
2580                "Error splitting the input into NAL units.\n");
2581         return ret;
2582     }
2583
2584     for (i = 0; i < s->pkt.nb_nals; i++) {
2585         if (s->pkt.nals[i].type == NAL_EOB_NUT ||
2586             s->pkt.nals[i].type == NAL_EOS_NUT)
2587             s->eos = 1;
2588     }
2589
2590     /* decode the NAL units */
2591     for (i = 0; i < s->pkt.nb_nals; i++) {
2592         ret = decode_nal_unit(s, &s->pkt.nals[i]);
2593         if (ret < 0) {
2594             av_log(s->avctx, AV_LOG_WARNING,
2595                    "Error parsing NAL unit #%d.\n", i);
2596             goto fail;
2597         }
2598     }
2599
2600 fail:
2601     if (s->ref)
2602         ff_thread_report_progress(&s->ref->tf, INT_MAX, 0);
2603
2604     return ret;
2605 }
2606
2607 static void print_md5(void *log_ctx, int level, uint8_t md5[16])
2608 {
2609     int i;
2610     for (i = 0; i < 16; i++)
2611         av_log(log_ctx, level, "%02"PRIx8, md5[i]);
2612 }
2613
2614 static int verify_md5(HEVCContext *s, AVFrame *frame)
2615 {
2616     const AVPixFmtDescriptor *desc = av_pix_fmt_desc_get(frame->format);
2617     int pixel_shift;
2618     int i, j;
2619
2620     if (!desc)
2621         return AVERROR(EINVAL);
2622
2623     pixel_shift = desc->comp[0].depth_minus1 > 7;
2624
2625     av_log(s->avctx, AV_LOG_DEBUG, "Verifying checksum for frame with POC %d: ",
2626            s->poc);
2627
2628     /* the checksums are LE, so we have to byteswap for >8bpp formats
2629      * on BE arches */
2630 #if HAVE_BIGENDIAN
2631     if (pixel_shift && !s->checksum_buf) {
2632         av_fast_malloc(&s->checksum_buf, &s->checksum_buf_size,
2633                        FFMAX3(frame->linesize[0], frame->linesize[1],
2634                               frame->linesize[2]));
2635         if (!s->checksum_buf)
2636             return AVERROR(ENOMEM);
2637     }
2638 #endif
2639
2640     for (i = 0; frame->data[i]; i++) {
2641         int width  = s->avctx->coded_width;
2642         int height = s->avctx->coded_height;
2643         int w = (i == 1 || i == 2) ? (width  >> desc->log2_chroma_w) : width;
2644         int h = (i == 1 || i == 2) ? (height >> desc->log2_chroma_h) : height;
2645         uint8_t md5[16];
2646
2647         av_md5_init(s->md5_ctx);
2648         for (j = 0; j < h; j++) {
2649             const uint8_t *src = frame->data[i] + j * frame->linesize[i];
2650 #if HAVE_BIGENDIAN
2651             if (pixel_shift) {
2652                 s->bdsp.bswap16_buf((uint16_t *) s->checksum_buf,
2653                                     (const uint16_t *) src, w);
2654                 src = s->checksum_buf;
2655             }
2656 #endif
2657             av_md5_update(s->md5_ctx, src, w << pixel_shift);
2658         }
2659         av_md5_final(s->md5_ctx, md5);
2660
2661         if (!memcmp(md5, s->md5[i], 16)) {
2662             av_log   (s->avctx, AV_LOG_DEBUG, "plane %d - correct ", i);
2663             print_md5(s->avctx, AV_LOG_DEBUG, md5);
2664             av_log   (s->avctx, AV_LOG_DEBUG, "; ");
2665         } else {
2666             av_log   (s->avctx, AV_LOG_ERROR, "mismatching checksum of plane %d - ", i);
2667             print_md5(s->avctx, AV_LOG_ERROR, md5);
2668             av_log   (s->avctx, AV_LOG_ERROR, " != ");
2669             print_md5(s->avctx, AV_LOG_ERROR, s->md5[i]);
2670             av_log   (s->avctx, AV_LOG_ERROR, "\n");
2671             return AVERROR_INVALIDDATA;
2672         }
2673     }
2674
2675     av_log(s->avctx, AV_LOG_DEBUG, "\n");
2676
2677     return 0;
2678 }
2679
2680 static int hevc_decode_frame(AVCodecContext *avctx, void *data, int *got_output,
2681                              AVPacket *avpkt)
2682 {
2683     int ret;
2684     HEVCContext *s = avctx->priv_data;
2685
2686     if (!avpkt->size) {
2687         ret = ff_hevc_output_frame(s, data, 1);
2688         if (ret < 0)
2689             return ret;
2690
2691         *got_output = ret;
2692         return 0;
2693     }
2694
2695     s->ref = NULL;
2696     ret    = decode_nal_units(s, avpkt->data, avpkt->size);
2697     if (ret < 0)
2698         return ret;
2699
2700     if (avctx->hwaccel) {
2701         if (s->ref && avctx->hwaccel->end_frame(avctx) < 0)
2702             av_log(avctx, AV_LOG_ERROR,
2703                    "hardware accelerator failed to decode picture\n");
2704     } else {
2705         /* verify the SEI checksum */
2706         if (avctx->err_recognition & AV_EF_CRCCHECK && s->is_decoded &&
2707             s->is_md5) {
2708             ret = verify_md5(s, s->ref->frame);
2709             if (ret < 0 && avctx->err_recognition & AV_EF_EXPLODE) {
2710                 ff_hevc_unref_frame(s, s->ref, ~0);
2711                 return ret;
2712             }
2713         }
2714     }
2715     s->is_md5 = 0;
2716
2717     if (s->is_decoded) {
2718         av_log(avctx, AV_LOG_DEBUG, "Decoded frame with POC %d.\n", s->poc);
2719         s->is_decoded = 0;
2720     }
2721
2722     if (s->output_frame->buf[0]) {
2723         av_frame_move_ref(data, s->output_frame);
2724         *got_output = 1;
2725     }
2726
2727     return avpkt->size;
2728 }
2729
2730 static int hevc_ref_frame(HEVCContext *s, HEVCFrame *dst, HEVCFrame *src)
2731 {
2732     int ret = ff_thread_ref_frame(&dst->tf, &src->tf);
2733     if (ret < 0)
2734         return ret;
2735
2736     dst->tab_mvf_buf = av_buffer_ref(src->tab_mvf_buf);
2737     if (!dst->tab_mvf_buf)
2738         goto fail;
2739     dst->tab_mvf = src->tab_mvf;
2740
2741     dst->rpl_tab_buf = av_buffer_ref(src->rpl_tab_buf);
2742     if (!dst->rpl_tab_buf)
2743         goto fail;
2744     dst->rpl_tab = src->rpl_tab;
2745
2746     dst->rpl_buf = av_buffer_ref(src->rpl_buf);
2747     if (!dst->rpl_buf)
2748         goto fail;
2749
2750     dst->poc        = src->poc;
2751     dst->ctb_count  = src->ctb_count;
2752     dst->window     = src->window;
2753     dst->flags      = src->flags;
2754     dst->sequence   = src->sequence;
2755
2756     if (src->hwaccel_picture_private) {
2757         dst->hwaccel_priv_buf = av_buffer_ref(src->hwaccel_priv_buf);
2758         if (!dst->hwaccel_priv_buf)
2759             goto fail;
2760         dst->hwaccel_picture_private = dst->hwaccel_priv_buf->data;
2761     }
2762
2763     return 0;
2764 fail:
2765     ff_hevc_unref_frame(s, dst, ~0);
2766     return AVERROR(ENOMEM);
2767 }
2768
2769 static av_cold int hevc_decode_free(AVCodecContext *avctx)
2770 {
2771     HEVCContext       *s = avctx->priv_data;
2772     int i;
2773
2774     pic_arrays_free(s);
2775
2776     av_freep(&s->md5_ctx);
2777
2778     av_frame_free(&s->tmp_frame);
2779     av_frame_free(&s->output_frame);
2780
2781     for (i = 0; i < FF_ARRAY_ELEMS(s->DPB); i++) {
2782         ff_hevc_unref_frame(s, &s->DPB[i], ~0);
2783         av_frame_free(&s->DPB[i].frame);
2784     }
2785
2786     for (i = 0; i < FF_ARRAY_ELEMS(s->ps.vps_list); i++)
2787         av_buffer_unref(&s->ps.vps_list[i]);
2788     for (i = 0; i < FF_ARRAY_ELEMS(s->ps.sps_list); i++)
2789         av_buffer_unref(&s->ps.sps_list[i]);
2790     for (i = 0; i < FF_ARRAY_ELEMS(s->ps.pps_list); i++)
2791         av_buffer_unref(&s->ps.pps_list[i]);
2792
2793     for (i = 0; i < s->pkt.nals_allocated; i++)
2794         av_freep(&s->pkt.nals[i].rbsp_buffer);
2795     av_freep(&s->pkt.nals);
2796     s->pkt.nals_allocated = 0;
2797
2798     return 0;
2799 }
2800
2801 static av_cold int hevc_init_context(AVCodecContext *avctx)
2802 {
2803     HEVCContext *s = avctx->priv_data;
2804     int i;
2805
2806     s->avctx = avctx;
2807
2808     s->tmp_frame = av_frame_alloc();
2809     if (!s->tmp_frame)
2810         goto fail;
2811
2812     s->output_frame = av_frame_alloc();
2813     if (!s->output_frame)
2814         goto fail;
2815
2816     for (i = 0; i < FF_ARRAY_ELEMS(s->DPB); i++) {
2817         s->DPB[i].frame = av_frame_alloc();
2818         if (!s->DPB[i].frame)
2819             goto fail;
2820         s->DPB[i].tf.f = s->DPB[i].frame;
2821     }
2822
2823     s->max_ra = INT_MAX;
2824
2825     s->md5_ctx = av_md5_alloc();
2826     if (!s->md5_ctx)
2827         goto fail;
2828
2829     ff_bswapdsp_init(&s->bdsp);
2830
2831     s->context_initialized = 1;
2832
2833     return 0;
2834
2835 fail:
2836     hevc_decode_free(avctx);
2837     return AVERROR(ENOMEM);
2838 }
2839
2840 static int hevc_update_thread_context(AVCodecContext *dst,
2841                                       const AVCodecContext *src)
2842 {
2843     HEVCContext *s  = dst->priv_data;
2844     HEVCContext *s0 = src->priv_data;
2845     int i, ret;
2846
2847     if (!s->context_initialized) {
2848         ret = hevc_init_context(dst);
2849         if (ret < 0)
2850             return ret;
2851     }
2852
2853     for (i = 0; i < FF_ARRAY_ELEMS(s->DPB); i++) {
2854         ff_hevc_unref_frame(s, &s->DPB[i], ~0);
2855         if (s0->DPB[i].frame->buf[0]) {
2856             ret = hevc_ref_frame(s, &s->DPB[i], &s0->DPB[i]);
2857             if (ret < 0)
2858                 return ret;
2859         }
2860     }
2861
2862     for (i = 0; i < FF_ARRAY_ELEMS(s->ps.vps_list); i++) {
2863         av_buffer_unref(&s->ps.vps_list[i]);
2864         if (s0->ps.vps_list[i]) {
2865             s->ps.vps_list[i] = av_buffer_ref(s0->ps.vps_list[i]);
2866             if (!s->ps.vps_list[i])
2867                 return AVERROR(ENOMEM);
2868         }
2869     }
2870
2871     for (i = 0; i < FF_ARRAY_ELEMS(s->ps.sps_list); i++) {
2872         av_buffer_unref(&s->ps.sps_list[i]);
2873         if (s0->ps.sps_list[i]) {
2874             s->ps.sps_list[i] = av_buffer_ref(s0->ps.sps_list[i]);
2875             if (!s->ps.sps_list[i])
2876                 return AVERROR(ENOMEM);
2877         }
2878     }
2879
2880     for (i = 0; i < FF_ARRAY_ELEMS(s->ps.pps_list); i++) {
2881         av_buffer_unref(&s->ps.pps_list[i]);
2882         if (s0->ps.pps_list[i]) {
2883             s->ps.pps_list[i] = av_buffer_ref(s0->ps.pps_list[i]);
2884             if (!s->ps.pps_list[i])
2885                 return AVERROR(ENOMEM);
2886         }
2887     }
2888
2889     if (s->ps.sps != s0->ps.sps)
2890         ret = set_sps(s, s0->ps.sps);
2891
2892     s->seq_decode = s0->seq_decode;
2893     s->seq_output = s0->seq_output;
2894     s->pocTid0    = s0->pocTid0;
2895     s->max_ra     = s0->max_ra;
2896
2897     s->is_nalff        = s0->is_nalff;
2898     s->nal_length_size = s0->nal_length_size;
2899
2900     if (s0->eos) {
2901         s->seq_decode = (s->seq_decode + 1) & 0xff;
2902         s->max_ra = INT_MAX;
2903     }
2904
2905     return 0;
2906 }
2907
2908 static int hevc_decode_extradata(HEVCContext *s)
2909 {
2910     AVCodecContext *avctx = s->avctx;
2911     GetByteContext gb;
2912     int ret, i;
2913
2914     bytestream2_init(&gb, avctx->extradata, avctx->extradata_size);
2915
2916     if (avctx->extradata_size > 3 &&
2917         (avctx->extradata[0] || avctx->extradata[1] ||
2918          avctx->extradata[2] > 1)) {
2919         /* It seems the extradata is encoded as hvcC format.
2920          * Temporarily, we support configurationVersion==0 until 14496-15 3rd
2921          * is finalized. When finalized, configurationVersion will be 1 and we
2922          * can recognize hvcC by checking if avctx->extradata[0]==1 or not. */
2923         int i, j, num_arrays, nal_len_size;
2924
2925         s->is_nalff = 1;
2926
2927         bytestream2_skip(&gb, 21);
2928         nal_len_size = (bytestream2_get_byte(&gb) & 3) + 1;
2929         num_arrays   = bytestream2_get_byte(&gb);
2930
2931         /* nal units in the hvcC always have length coded with 2 bytes,
2932          * so put a fake nal_length_size = 2 while parsing them */
2933         s->nal_length_size = 2;
2934
2935         /* Decode nal units from hvcC. */
2936         for (i = 0; i < num_arrays; i++) {
2937             int type = bytestream2_get_byte(&gb) & 0x3f;
2938             int cnt  = bytestream2_get_be16(&gb);
2939
2940             for (j = 0; j < cnt; j++) {
2941                 // +2 for the nal size field
2942                 int nalsize = bytestream2_peek_be16(&gb) + 2;
2943                 if (bytestream2_get_bytes_left(&gb) < nalsize) {
2944                     av_log(s->avctx, AV_LOG_ERROR,
2945                            "Invalid NAL unit size in extradata.\n");
2946                     return AVERROR_INVALIDDATA;
2947                 }
2948
2949                 ret = decode_nal_units(s, gb.buffer, nalsize);
2950                 if (ret < 0) {
2951                     av_log(avctx, AV_LOG_ERROR,
2952                            "Decoding nal unit %d %d from hvcC failed\n",
2953                            type, i);
2954                     return ret;
2955                 }
2956                 bytestream2_skip(&gb, nalsize);
2957             }
2958         }
2959
2960         /* Now store right nal length size, that will be used to parse
2961          * all other nals */
2962         s->nal_length_size = nal_len_size;
2963     } else {
2964         s->is_nalff = 0;
2965         ret = decode_nal_units(s, avctx->extradata, avctx->extradata_size);
2966         if (ret < 0)
2967             return ret;
2968     }
2969
2970     /* export stream parameters from the first SPS */
2971     for (i = 0; i < FF_ARRAY_ELEMS(s->ps.sps_list); i++) {
2972         if (s->ps.sps_list[i]) {
2973             const HEVCSPS *sps = (const HEVCSPS*)s->ps.sps_list[i]->data;
2974             export_stream_params(s->avctx, &s->ps, sps);
2975             break;
2976         }
2977     }
2978
2979     return 0;
2980 }
2981
2982 static av_cold int hevc_decode_init(AVCodecContext *avctx)
2983 {
2984     HEVCContext *s = avctx->priv_data;
2985     int ret;
2986
2987     ff_init_cabac_states();
2988
2989     avctx->internal->allocate_progress = 1;
2990
2991     ret = hevc_init_context(avctx);
2992     if (ret < 0)
2993         return ret;
2994
2995     if (avctx->extradata_size > 0 && avctx->extradata) {
2996         ret = hevc_decode_extradata(s);
2997         if (ret < 0) {
2998             hevc_decode_free(avctx);
2999             return ret;
3000         }
3001     }
3002
3003     return 0;
3004 }
3005
3006 static av_cold int hevc_init_thread_copy(AVCodecContext *avctx)
3007 {
3008     HEVCContext *s = avctx->priv_data;
3009     int ret;
3010
3011     memset(s, 0, sizeof(*s));
3012
3013     ret = hevc_init_context(avctx);
3014     if (ret < 0)
3015         return ret;
3016
3017     return 0;
3018 }
3019
3020 static void hevc_decode_flush(AVCodecContext *avctx)
3021 {
3022     HEVCContext *s = avctx->priv_data;
3023     ff_hevc_flush_dpb(s);
3024     s->max_ra = INT_MAX;
3025 }
3026
3027 #define OFFSET(x) offsetof(HEVCContext, x)
3028 #define PAR (AV_OPT_FLAG_DECODING_PARAM | AV_OPT_FLAG_VIDEO_PARAM)
3029
3030 static const AVProfile profiles[] = {
3031     { FF_PROFILE_HEVC_MAIN,                 "Main"                },
3032     { FF_PROFILE_HEVC_MAIN_10,              "Main 10"             },
3033     { FF_PROFILE_HEVC_MAIN_STILL_PICTURE,   "Main Still Picture"  },
3034     { FF_PROFILE_UNKNOWN },
3035 };
3036
3037 static const AVOption options[] = {
3038     { "apply_defdispwin", "Apply default display window from VUI", OFFSET(apply_defdispwin),
3039         AV_OPT_TYPE_INT, {.i64 = 0}, 0, 1, PAR },
3040     { NULL },
3041 };
3042
3043 static const AVClass hevc_decoder_class = {
3044     .class_name = "HEVC decoder",
3045     .item_name  = av_default_item_name,
3046     .option     = options,
3047     .version    = LIBAVUTIL_VERSION_INT,
3048 };
3049
3050 AVCodec ff_hevc_decoder = {
3051     .name                  = "hevc",
3052     .long_name             = NULL_IF_CONFIG_SMALL("HEVC (High Efficiency Video Coding)"),
3053     .type                  = AVMEDIA_TYPE_VIDEO,
3054     .id                    = AV_CODEC_ID_HEVC,
3055     .priv_data_size        = sizeof(HEVCContext),
3056     .priv_class            = &hevc_decoder_class,
3057     .init                  = hevc_decode_init,
3058     .close                 = hevc_decode_free,
3059     .decode                = hevc_decode_frame,
3060     .flush                 = hevc_decode_flush,
3061     .update_thread_context = hevc_update_thread_context,
3062     .init_thread_copy      = hevc_init_thread_copy,
3063     .capabilities          = AV_CODEC_CAP_DR1 | AV_CODEC_CAP_DELAY |
3064                              AV_CODEC_CAP_FRAME_THREADS,
3065     .profiles              = NULL_IF_CONFIG_SMALL(profiles),
3066 };