git.sesse.net Git - ffmpeg/blob - libavcodec/hevc.c

   1 /*
   2  * HEVC video decoder
   3  *
   4  * Copyright (C) 2012 - 2013 Guillaume Martres
   5  * Copyright (C) 2012 - 2013 Mickael Raulet
   6  * Copyright (C) 2012 - 2013 Gildas Cocherel
   7  * Copyright (C) 2012 - 2013 Wassim Hamidouche
   8  *
   9  * This file is part of Libav.
  10  *
  11  * Libav is free software; you can redistribute it and/or
  12  * modify it under the terms of the GNU Lesser General Public
  13  * License as published by the Free Software Foundation; either
  14  * version 2.1 of the License, or (at your option) any later version.
  15  *
  16  * Libav is distributed in the hope that it will be useful,
  17  * but WITHOUT ANY WARRANTY; without even the implied warranty of
  18  * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
  19  * Lesser General Public License for more details.
  20  *
  21  * You should have received a copy of the GNU Lesser General Public
  22  * License along with Libav; if not, write to the Free Software
  23  * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
  24  */
  25
  26 #include "libavutil/attributes.h"
  27 #include "libavutil/common.h"
  28 #include "libavutil/internal.h"
  29 #include "libavutil/md5.h"
  30 #include "libavutil/opt.h"
  31 #include "libavutil/pixdesc.h"
  32 #include "libavutil/stereo3d.h"
  33
  34 #include "bytestream.h"
  35 #include "cabac_functions.h"
  36 #include "dsputil.h"
  37 #include "golomb.h"
  38 #include "hevc.h"
  39
  40 const uint8_t ff_hevc_qpel_extra_before[4] = { 0, 3, 3, 2 };
  41 const uint8_t ff_hevc_qpel_extra_after[4]  = { 0, 3, 4, 4 };
  42 const uint8_t ff_hevc_qpel_extra[4]        = { 0, 6, 7, 6 };
  43
  44 static const uint8_t scan_1x1[1] = { 0 };
  45
  46 static const uint8_t horiz_scan2x2_x[4] = { 0, 1, 0, 1 };
  47
  48 static const uint8_t horiz_scan2x2_y[4] = { 0, 0, 1, 1 };
  49
  50 static const uint8_t horiz_scan4x4_x[16] = {
  51     0, 1, 2, 3,
  52     0, 1, 2, 3,
  53     0, 1, 2, 3,
  54     0, 1, 2, 3,
  55 };
  56
  57 static const uint8_t horiz_scan4x4_y[16] = {
  58     0, 0, 0, 0,
  59     1, 1, 1, 1,
  60     2, 2, 2, 2,
  61     3, 3, 3, 3,
  62 };
  63
  64 static const uint8_t horiz_scan8x8_inv[8][8] = {
  65     {  0,  1,  2,  3, 16, 17, 18, 19, },
  66     {  4,  5,  6,  7, 20, 21, 22, 23, },
  67     {  8,  9, 10, 11, 24, 25, 26, 27, },
  68     { 12, 13, 14, 15, 28, 29, 30, 31, },
  69     { 32, 33, 34, 35, 48, 49, 50, 51, },
  70     { 36, 37, 38, 39, 52, 53, 54, 55, },
  71     { 40, 41, 42, 43, 56, 57, 58, 59, },
  72     { 44, 45, 46, 47, 60, 61, 62, 63, },
  73 };
  74
  75 static const uint8_t diag_scan2x2_x[4] = { 0, 0, 1, 1 };
  76
  77 static const uint8_t diag_scan2x2_y[4] = { 0, 1, 0, 1 };
  78
  79 static const uint8_t diag_scan2x2_inv[2][2] = {
  80     { 0, 2, },
  81     { 1, 3, },
  82 };
  83
  84 const uint8_t ff_hevc_diag_scan4x4_x[16] = {
  85     0, 0, 1, 0,
  86     1, 2, 0, 1,
  87     2, 3, 1, 2,
  88     3, 2, 3, 3,
  89 };
  90
  91 const uint8_t ff_hevc_diag_scan4x4_y[16] = {
  92     0, 1, 0, 2,
  93     1, 0, 3, 2,
  94     1, 0, 3, 2,
  95     1, 3, 2, 3,
  96 };
  97
  98 static const uint8_t diag_scan4x4_inv[4][4] = {
  99     { 0,  2,  5,  9, },
 100     { 1,  4,  8, 12, },
 101     { 3,  7, 11, 14, },
 102     { 6, 10, 13, 15, },
 103 };
 104
 105 const uint8_t ff_hevc_diag_scan8x8_x[64] = {
 106     0, 0, 1, 0,
 107     1, 2, 0, 1,
 108     2, 3, 0, 1,
 109     2, 3, 4, 0,
 110     1, 2, 3, 4,
 111     5, 0, 1, 2,
 112     3, 4, 5, 6,
 113     0, 1, 2, 3,
 114     4, 5, 6, 7,
 115     1, 2, 3, 4,
 116     5, 6, 7, 2,
 117     3, 4, 5, 6,
 118     7, 3, 4, 5,
 119     6, 7, 4, 5,
 120     6, 7, 5, 6,
 121     7, 6, 7, 7,
 122 };
 123
 124 const uint8_t ff_hevc_diag_scan8x8_y[64] = {
 125     0, 1, 0, 2,
 126     1, 0, 3, 2,
 127     1, 0, 4, 3,
 128     2, 1, 0, 5,
 129     4, 3, 2, 1,
 130     0, 6, 5, 4,
 131     3, 2, 1, 0,
 132     7, 6, 5, 4,
 133     3, 2, 1, 0,
 134     7, 6, 5, 4,
 135     3, 2, 1, 7,
 136     6, 5, 4, 3,
 137     2, 7, 6, 5,
 138     4, 3, 7, 6,
 139     5, 4, 7, 6,
 140     5, 7, 6, 7,
 141 };
 142
 143 static const uint8_t diag_scan8x8_inv[8][8] = {
 144     {  0,  2,  5,  9, 14, 20, 27, 35, },
 145     {  1,  4,  8, 13, 19, 26, 34, 42, },
 146     {  3,  7, 12, 18, 25, 33, 41, 48, },
 147     {  6, 11, 17, 24, 32, 40, 47, 53, },
 148     { 10, 16, 23, 31, 39, 46, 52, 57, },
 149     { 15, 22, 30, 38, 45, 51, 56, 60, },
 150     { 21, 29, 37, 44, 50, 55, 59, 62, },
 151     { 28, 36, 43, 49, 54, 58, 61, 63, },
 152 };
 153
 154 /**
 155  * NOTE: Each function hls_foo correspond to the function foo in the
 156  * specification (HLS stands for High Level Syntax).
 157  */
 158
 159 /**
 160  * Section 5.7
 161  */
 162
 163 /* free everything allocated  by pic_arrays_init() */
 164 static void pic_arrays_free(HEVCContext *s)
 165 {
 166     av_freep(&s->sao);
 167     av_freep(&s->deblock);
 168     av_freep(&s->split_cu_flag);
 169
 170     av_freep(&s->skip_flag);
 171     av_freep(&s->tab_ct_depth);
 172
 173     av_freep(&s->tab_ipm);
 174     av_freep(&s->cbf_luma);
 175     av_freep(&s->is_pcm);
 176
 177     av_freep(&s->qp_y_tab);
 178     av_freep(&s->tab_slice_address);
 179     av_freep(&s->filter_slice_edges);
 180
 181     av_freep(&s->horizontal_bs);
 182     av_freep(&s->vertical_bs);
 183
 184     av_buffer_pool_uninit(&s->tab_mvf_pool);
 185     av_buffer_pool_uninit(&s->rpl_tab_pool);
 186 }
 187
 188 /* allocate arrays that depend on frame dimensions */
 189 static int pic_arrays_init(HEVCContext *s, const HEVCSPS *sps)
 190 {
 191     int log2_min_cb_size = sps->log2_min_cb_size;
 192     int width            = sps->width;
 193     int height           = sps->height;
 194     int pic_size         = width * height;
 195     int pic_size_in_ctb  = ((width  >> log2_min_cb_size) + 1) *
 196                            ((height >> log2_min_cb_size) + 1);
 197     int ctb_count        = sps->ctb_width * sps->ctb_height;
 198     int min_pu_size      = sps->min_pu_width * sps->min_pu_height;
 199
 200     s->bs_width  = width  >> 3;
 201     s->bs_height = height >> 3;
 202
 203     s->sao           = av_mallocz_array(ctb_count, sizeof(*s->sao));
 204     s->deblock       = av_mallocz_array(ctb_count, sizeof(*s->deblock));
 205     s->split_cu_flag = av_malloc(pic_size);
 206     if (!s->sao || !s->deblock || !s->split_cu_flag)
 207         goto fail;
 208
 209     s->skip_flag    = av_malloc(pic_size_in_ctb);
 210     s->tab_ct_depth = av_malloc(sps->min_cb_height * sps->min_cb_width);
 211     if (!s->skip_flag || !s->tab_ct_depth)
 212         goto fail;
 213
 214     s->cbf_luma = av_malloc(sps->min_tb_width * sps->min_tb_height);
 215     s->tab_ipm  = av_malloc(min_pu_size);
 216     s->is_pcm   = av_malloc(min_pu_size);
 217     if (!s->tab_ipm || !s->cbf_luma || !s->is_pcm)
 218         goto fail;
 219
 220     s->filter_slice_edges = av_malloc(ctb_count);
 221     s->tab_slice_address  = av_malloc(pic_size_in_ctb *
 222                                       sizeof(*s->tab_slice_address));
 223     s->qp_y_tab           = av_malloc(pic_size_in_ctb *
 224                                       sizeof(*s->qp_y_tab));
 225     if (!s->qp_y_tab || !s->filter_slice_edges || !s->tab_slice_address)
 226         goto fail;
 227
 228     s->horizontal_bs = av_mallocz(2 * s->bs_width * (s->bs_height + 1));
 229     s->vertical_bs   = av_mallocz(2 * s->bs_width * (s->bs_height + 1));
 230     if (!s->horizontal_bs || !s->vertical_bs)
 231         goto fail;
 232
 233     s->tab_mvf_pool = av_buffer_pool_init(min_pu_size * sizeof(MvField),
 234                                           av_buffer_alloc);
 235     s->rpl_tab_pool = av_buffer_pool_init(ctb_count * sizeof(RefPicListTab),
 236                                           av_buffer_allocz);
 237     if (!s->tab_mvf_pool || !s->rpl_tab_pool)
 238         goto fail;
 239
 240     return 0;
 241
 242 fail:
 243     pic_arrays_free(s);
 244     return AVERROR(ENOMEM);
 245 }
 246
 247 static void pred_weight_table(HEVCContext *s, GetBitContext *gb)
 248 {
 249     int i = 0;
 250     int j = 0;
 251     uint8_t luma_weight_l0_flag[16];
 252     uint8_t chroma_weight_l0_flag[16];
 253     uint8_t luma_weight_l1_flag[16];
 254     uint8_t chroma_weight_l1_flag[16];
 255
 256     s->sh.luma_log2_weight_denom = get_ue_golomb_long(gb);
 257     if (s->sps->chroma_format_idc != 0) {
 258         int delta = get_se_golomb(gb);
 259         s->sh.chroma_log2_weight_denom = av_clip_c(s->sh.luma_log2_weight_denom + delta, 0, 7);
 260     }
 261
 262     for (i = 0; i < s->sh.nb_refs[L0]; i++) {
 263         luma_weight_l0_flag[i] = get_bits1(gb);
 264         if (!luma_weight_l0_flag[i]) {
 265             s->sh.luma_weight_l0[i] = 1 << s->sh.luma_log2_weight_denom;
 266             s->sh.luma_offset_l0[i] = 0;
 267         }
 268     }
 269     if (s->sps->chroma_format_idc != 0) { // FIXME: invert "if" and "for"
 270         for (i = 0; i < s->sh.nb_refs[L0]; i++)
 271             chroma_weight_l0_flag[i] = get_bits1(gb);
 272     } else {
 273         for (i = 0; i < s->sh.nb_refs[L0]; i++)
 274             chroma_weight_l0_flag[i] = 0;
 275     }
 276     for (i = 0; i < s->sh.nb_refs[L0]; i++) {
 277         if (luma_weight_l0_flag[i]) {
 278             int delta_luma_weight_l0 = get_se_golomb(gb);
 279             s->sh.luma_weight_l0[i] = (1 << s->sh.luma_log2_weight_denom) + delta_luma_weight_l0;
 280             s->sh.luma_offset_l0[i] = get_se_golomb(gb);
 281         }
 282         if (chroma_weight_l0_flag[i]) {
 283             for (j = 0; j < 2; j++) {
 284                 int delta_chroma_weight_l0 = get_se_golomb(gb);
 285                 int delta_chroma_offset_l0 = get_se_golomb(gb);
 286                 s->sh.chroma_weight_l0[i][j] = (1 << s->sh.chroma_log2_weight_denom) + delta_chroma_weight_l0;
 287                 s->sh.chroma_offset_l0[i][j] = av_clip_c((delta_chroma_offset_l0 - ((128 * s->sh.chroma_weight_l0[i][j])
 288                                                                                     >> s->sh.chroma_log2_weight_denom) + 128), -128, 127);
 289             }
 290         } else {
 291             s->sh.chroma_weight_l0[i][0] = 1 << s->sh.chroma_log2_weight_denom;
 292             s->sh.chroma_offset_l0[i][0] = 0;
 293             s->sh.chroma_weight_l0[i][1] = 1 << s->sh.chroma_log2_weight_denom;
 294             s->sh.chroma_offset_l0[i][1] = 0;
 295         }
 296     }
 297     if (s->sh.slice_type == B_SLICE) {
 298         for (i = 0; i < s->sh.nb_refs[L1]; i++) {
 299             luma_weight_l1_flag[i] = get_bits1(gb);
 300             if (!luma_weight_l1_flag[i]) {
 301                 s->sh.luma_weight_l1[i] = 1 << s->sh.luma_log2_weight_denom;
 302                 s->sh.luma_offset_l1[i] = 0;
 303             }
 304         }
 305         if (s->sps->chroma_format_idc != 0) {
 306             for (i = 0; i < s->sh.nb_refs[L1]; i++)
 307                 chroma_weight_l1_flag[i] = get_bits1(gb);
 308         } else {
 309             for (i = 0; i < s->sh.nb_refs[L1]; i++)
 310                 chroma_weight_l1_flag[i] = 0;
 311         }
 312         for (i = 0; i < s->sh.nb_refs[L1]; i++) {
 313             if (luma_weight_l1_flag[i]) {
 314                 int delta_luma_weight_l1 = get_se_golomb(gb);
 315                 s->sh.luma_weight_l1[i] = (1 << s->sh.luma_log2_weight_denom) + delta_luma_weight_l1;
 316                 s->sh.luma_offset_l1[i] = get_se_golomb(gb);
 317             }
 318             if (chroma_weight_l1_flag[i]) {
 319                 for (j = 0; j < 2; j++) {
 320                     int delta_chroma_weight_l1 = get_se_golomb(gb);
 321                     int delta_chroma_offset_l1 = get_se_golomb(gb);
 322                     s->sh.chroma_weight_l1[i][j] = (1 << s->sh.chroma_log2_weight_denom) + delta_chroma_weight_l1;
 323                     s->sh.chroma_offset_l1[i][j] = av_clip_c((delta_chroma_offset_l1 - ((128 * s->sh.chroma_weight_l1[i][j])
 324                                                                                         >> s->sh.chroma_log2_weight_denom) + 128), -128, 127);
 325                 }
 326             } else {
 327                 s->sh.chroma_weight_l1[i][0] = 1 << s->sh.chroma_log2_weight_denom;
 328                 s->sh.chroma_offset_l1[i][0] = 0;
 329                 s->sh.chroma_weight_l1[i][1] = 1 << s->sh.chroma_log2_weight_denom;
 330                 s->sh.chroma_offset_l1[i][1] = 0;
 331             }
 332         }
 333     }
 334 }
 335
 336 static int decode_lt_rps(HEVCContext *s, LongTermRPS *rps, GetBitContext *gb)
 337 {
 338     const HEVCSPS *sps = s->sps;
 339     int max_poc_lsb    = 1 << sps->log2_max_poc_lsb;
 340     int prev_delta_msb = 0;
 341     int nb_sps = 0, nb_sh;
 342     int i;
 343
 344     rps->nb_refs = 0;
 345     if (!sps->long_term_ref_pics_present_flag)
 346         return 0;
 347
 348     if (sps->num_long_term_ref_pics_sps > 0)
 349         nb_sps = get_ue_golomb_long(gb);
 350     nb_sh = get_ue_golomb_long(gb);
 351
 352     if (nb_sh + nb_sps > FF_ARRAY_ELEMS(rps->poc))
 353         return AVERROR_INVALIDDATA;
 354
 355     rps->nb_refs = nb_sh + nb_sps;
 356
 357     for (i = 0; i < rps->nb_refs; i++) {
 358         uint8_t delta_poc_msb_present;
 359
 360         if (i < nb_sps) {
 361             uint8_t lt_idx_sps = 0;
 362
 363             if (sps->num_long_term_ref_pics_sps > 1)
 364                 lt_idx_sps = get_bits(gb, av_ceil_log2(sps->num_long_term_ref_pics_sps));
 365
 366             rps->poc[i]  = sps->lt_ref_pic_poc_lsb_sps[lt_idx_sps];
 367             rps->used[i] = sps->used_by_curr_pic_lt_sps_flag[lt_idx_sps];
 368         } else {
 369             rps->poc[i]  = get_bits(gb, sps->log2_max_poc_lsb);
 370             rps->used[i] = get_bits1(gb);
 371         }
 372
 373         delta_poc_msb_present = get_bits1(gb);
 374         if (delta_poc_msb_present) {
 375             int delta = get_ue_golomb_long(gb);
 376
 377             if (i && i != nb_sps)
 378                 delta += prev_delta_msb;
 379
 380             rps->poc[i] += s->poc - delta * max_poc_lsb - s->sh.pic_order_cnt_lsb;
 381             prev_delta_msb = delta;
 382         }
 383     }
 384
 385     return 0;
 386 }
 387
 388 static int set_sps(HEVCContext *s, const HEVCSPS *sps)
 389 {
 390     int ret;
 391
 392     pic_arrays_free(s);
 393     ret = pic_arrays_init(s, sps);
 394     if (ret < 0)
 395         goto fail;
 396
 397     s->avctx->coded_width         = sps->width;
 398     s->avctx->coded_height        = sps->height;
 399     s->avctx->width               = sps->output_width;
 400     s->avctx->height              = sps->output_height;
 401     s->avctx->pix_fmt             = sps->pix_fmt;
 402     s->avctx->sample_aspect_ratio = sps->vui.sar;
 403     s->avctx->has_b_frames        = sps->temporal_layer[sps->max_sub_layers - 1].num_reorder_pics;
 404
 405     if (sps->vui.video_signal_type_present_flag)
 406         s->avctx->color_range = sps->vui.video_full_range_flag ? AVCOL_RANGE_JPEG
 407                                                                : AVCOL_RANGE_MPEG;
 408     else
 409         s->avctx->color_range = AVCOL_RANGE_MPEG;
 410
 411     if (sps->vui.colour_description_present_flag) {
 412         s->avctx->color_primaries = sps->vui.colour_primaries;
 413         s->avctx->color_trc       = sps->vui.transfer_characteristic;
 414         s->avctx->colorspace      = sps->vui.matrix_coeffs;
 415     } else {
 416         s->avctx->color_primaries = AVCOL_PRI_UNSPECIFIED;
 417         s->avctx->color_trc       = AVCOL_TRC_UNSPECIFIED;
 418         s->avctx->colorspace      = AVCOL_SPC_UNSPECIFIED;
 419     }
 420
 421     ff_hevc_pred_init(&s->hpc,     sps->bit_depth);
 422     ff_hevc_dsp_init (&s->hevcdsp, sps->bit_depth);
 423     ff_videodsp_init (&s->vdsp,    sps->bit_depth);
 424
 425     if (sps->sao_enabled) {
 426         av_frame_unref(s->tmp_frame);
 427         ret = ff_get_buffer(s->avctx, s->tmp_frame, AV_GET_BUFFER_FLAG_REF);
 428         if (ret < 0)
 429             goto fail;
 430         s->frame = s->tmp_frame;
 431     }
 432
 433     s->sps = sps;
 434     s->vps = (HEVCVPS*) s->vps_list[s->sps->vps_id]->data;
 435     return 0;
 436
 437 fail:
 438     pic_arrays_free(s);
 439     s->sps = NULL;
 440     return ret;
 441 }
 442
 443 static int hls_slice_header(HEVCContext *s)
 444 {
 445     GetBitContext *gb = &s->HEVClc.gb;
 446     SliceHeader *sh   = &s->sh;
 447     int i, ret;
 448
 449     // Coded parameters
 450     sh->first_slice_in_pic_flag = get_bits1(gb);
 451     if ((IS_IDR(s) || IS_BLA(s)) && sh->first_slice_in_pic_flag) {
 452         s->seq_decode = (s->seq_decode + 1) & 0xff;
 453         s->max_ra     = INT_MAX;
 454         if (IS_IDR(s))
 455             ff_hevc_clear_refs(s);
 456     }
 457     if (s->nal_unit_type >= 16 && s->nal_unit_type <= 23)
 458         sh->no_output_of_prior_pics_flag = get_bits1(gb);
 459
 460     sh->pps_id = get_ue_golomb_long(gb);
 461     if (sh->pps_id >= MAX_PPS_COUNT || !s->pps_list[sh->pps_id]) {
 462         av_log(s->avctx, AV_LOG_ERROR, "PPS id out of range: %d\n", sh->pps_id);
 463         return AVERROR_INVALIDDATA;
 464     }
 465     if (!sh->first_slice_in_pic_flag &&
 466         s->pps != (HEVCPPS*)s->pps_list[sh->pps_id]->data) {
 467         av_log(s->avctx, AV_LOG_ERROR, "PPS changed between slices.\n");
 468         return AVERROR_INVALIDDATA;
 469     }
 470     s->pps = (HEVCPPS*)s->pps_list[sh->pps_id]->data;
 471
 472     if (s->sps != (HEVCSPS*)s->sps_list[s->pps->sps_id]->data) {
 473         s->sps = (HEVCSPS*)s->sps_list[s->pps->sps_id]->data;
 474
 475         ff_hevc_clear_refs(s);
 476         ret = set_sps(s, s->sps);
 477         if (ret < 0)
 478             return ret;
 479
 480         s->seq_decode = (s->seq_decode + 1) & 0xff;
 481         s->max_ra     = INT_MAX;
 482     }
 483
 484     sh->dependent_slice_segment_flag = 0;
 485     if (!sh->first_slice_in_pic_flag) {
 486         int slice_address_length;
 487
 488         if (s->pps->dependent_slice_segments_enabled_flag)
 489             sh->dependent_slice_segment_flag = get_bits1(gb);
 490
 491         slice_address_length = av_ceil_log2(s->sps->ctb_width *
 492                                             s->sps->ctb_height);
 493         sh->slice_segment_addr = get_bits(gb, slice_address_length);
 494         if (sh->slice_segment_addr >= s->sps->ctb_width * s->sps->ctb_height) {
 495             av_log(s->avctx, AV_LOG_ERROR,
 496                    "Invalid slice segment address: %u.\n",
 497                    sh->slice_segment_addr);
 498             return AVERROR_INVALIDDATA;
 499         }
 500
 501         if (!sh->dependent_slice_segment_flag) {
 502             sh->slice_addr = sh->slice_segment_addr;
 503             s->slice_idx++;
 504         }
 505     } else {
 506         sh->slice_segment_addr = sh->slice_addr = 0;
 507         s->slice_idx           = 0;
 508         s->slice_initialized   = 0;
 509     }
 510
 511     if (!sh->dependent_slice_segment_flag) {
 512         s->slice_initialized = 0;
 513
 514         for (i = 0; i < s->pps->num_extra_slice_header_bits; i++)
 515             skip_bits(gb, 1);  // slice_reserved_undetermined_flag[]
 516
 517         sh->slice_type = get_ue_golomb_long(gb);
 518         if (!(sh->slice_type == I_SLICE ||
 519               sh->slice_type == P_SLICE ||
 520               sh->slice_type == B_SLICE)) {
 521             av_log(s->avctx, AV_LOG_ERROR, "Unknown slice type: %d.\n",
 522                    sh->slice_type);
 523             return AVERROR_INVALIDDATA;
 524         }
 525         if (IS_IRAP(s) && sh->slice_type != I_SLICE) {
 526             av_log(s->avctx, AV_LOG_ERROR, "Inter slices in an IRAP frame.\n");
 527             return AVERROR_INVALIDDATA;
 528         }
 529
 530         if (s->pps->output_flag_present_flag)
 531             sh->pic_output_flag = get_bits1(gb);
 532
 533         if (s->sps->separate_colour_plane_flag)
 534             sh->colour_plane_id = get_bits(gb, 2);
 535
 536         if (!IS_IDR(s)) {
 537             int short_term_ref_pic_set_sps_flag, poc;
 538
 539             sh->pic_order_cnt_lsb = get_bits(gb, s->sps->log2_max_poc_lsb);
 540             poc = ff_hevc_compute_poc(s, sh->pic_order_cnt_lsb);
 541             if (!sh->first_slice_in_pic_flag && poc != s->poc) {
 542                 av_log(s->avctx, AV_LOG_WARNING,
 543                        "Ignoring POC change between slices: %d -> %d\n", s->poc, poc);
 544                 if (s->avctx->err_recognition & AV_EF_EXPLODE)
 545                     return AVERROR_INVALIDDATA;
 546                 poc = s->poc;
 547             }
 548             s->poc = poc;
 549
 550             short_term_ref_pic_set_sps_flag = get_bits1(gb);
 551             if (!short_term_ref_pic_set_sps_flag) {
 552                 ret = ff_hevc_decode_short_term_rps(s, &sh->slice_rps, s->sps, 1);
 553                 if (ret < 0)
 554                     return ret;
 555
 556                 sh->short_term_rps = &sh->slice_rps;
 557             } else {
 558                 int numbits, rps_idx;
 559
 560                 if (!s->sps->nb_st_rps) {
 561                     av_log(s->avctx, AV_LOG_ERROR, "No ref lists in the SPS.\n");
 562                     return AVERROR_INVALIDDATA;
 563                 }
 564
 565                 numbits = av_ceil_log2(s->sps->nb_st_rps);
 566                 rps_idx = numbits > 0 ? get_bits(gb, numbits) : 0;
 567                 sh->short_term_rps = &s->sps->st_rps[rps_idx];
 568             }
 569
 570             ret = decode_lt_rps(s, &sh->long_term_rps, gb);
 571             if (ret < 0) {
 572                 av_log(s->avctx, AV_LOG_WARNING, "Invalid long term RPS.\n");
 573                 if (s->avctx->err_recognition & AV_EF_EXPLODE)
 574                     return AVERROR_INVALIDDATA;
 575             }
 576
 577             if (s->sps->sps_temporal_mvp_enabled_flag)
 578                 sh->slice_temporal_mvp_enabled_flag = get_bits1(gb);
 579             else
 580                 sh->slice_temporal_mvp_enabled_flag = 0;
 581         } else {
 582             s->sh.short_term_rps = NULL;
 583             s->poc               = 0;
 584         }
 585
 586         /* 8.3.1 */
 587         if (s->temporal_id == 0 &&
 588             s->nal_unit_type != NAL_TRAIL_N &&
 589             s->nal_unit_type != NAL_TSA_N   &&
 590             s->nal_unit_type != NAL_STSA_N  &&
 591             s->nal_unit_type != NAL_RADL_N  &&
 592             s->nal_unit_type != NAL_RADL_R  &&
 593             s->nal_unit_type != NAL_RASL_N  &&
 594             s->nal_unit_type != NAL_RASL_R)
 595             s->pocTid0 = s->poc;
 596
 597         if (s->sps->sao_enabled) {
 598             sh->slice_sample_adaptive_offset_flag[0] = get_bits1(gb);
 599             sh->slice_sample_adaptive_offset_flag[1] =
 600             sh->slice_sample_adaptive_offset_flag[2] = get_bits1(gb);
 601         } else {
 602             sh->slice_sample_adaptive_offset_flag[0] = 0;
 603             sh->slice_sample_adaptive_offset_flag[1] = 0;
 604             sh->slice_sample_adaptive_offset_flag[2] = 0;
 605         }
 606
 607         sh->nb_refs[L0] = sh->nb_refs[L1] = 0;
 608         if (sh->slice_type == P_SLICE || sh->slice_type == B_SLICE) {
 609             int nb_refs;
 610
 611             sh->nb_refs[L0] = s->pps->num_ref_idx_l0_default_active;
 612             if (sh->slice_type == B_SLICE)
 613                 sh->nb_refs[L1] = s->pps->num_ref_idx_l1_default_active;
 614
 615             if (get_bits1(gb)) { // num_ref_idx_active_override_flag
 616                 sh->nb_refs[L0] = get_ue_golomb_long(gb) + 1;
 617                 if (sh->slice_type == B_SLICE)
 618                     sh->nb_refs[L1] = get_ue_golomb_long(gb) + 1;
 619             }
 620             if (sh->nb_refs[L0] > MAX_REFS || sh->nb_refs[L1] > MAX_REFS) {
 621                 av_log(s->avctx, AV_LOG_ERROR, "Too many refs: %d/%d.\n",
 622                        sh->nb_refs[L0], sh->nb_refs[L1]);
 623                 return AVERROR_INVALIDDATA;
 624             }
 625
 626             sh->rpl_modification_flag[0] = 0;
 627             sh->rpl_modification_flag[1] = 0;
 628             nb_refs = ff_hevc_frame_nb_refs(s);
 629             if (!nb_refs) {
 630                 av_log(s->avctx, AV_LOG_ERROR, "Zero refs for a frame with P or B slices.\n");
 631                 return AVERROR_INVALIDDATA;
 632             }
 633
 634             if (s->pps->lists_modification_present_flag && nb_refs > 1) {
 635                 sh->rpl_modification_flag[0] = get_bits1(gb);
 636                 if (sh->rpl_modification_flag[0]) {
 637                     for (i = 0; i < sh->nb_refs[L0]; i++)
 638                         sh->list_entry_lx[0][i] = get_bits(gb, av_ceil_log2(nb_refs));
 639                 }
 640
 641                 if (sh->slice_type == B_SLICE) {
 642                     sh->rpl_modification_flag[1] = get_bits1(gb);
 643                     if (sh->rpl_modification_flag[1] == 1)
 644                         for (i = 0; i < sh->nb_refs[L1]; i++)
 645                             sh->list_entry_lx[1][i] = get_bits(gb, av_ceil_log2(nb_refs));
 646                 }
 647             }
 648
 649             if (sh->slice_type == B_SLICE)
 650                 sh->mvd_l1_zero_flag = get_bits1(gb);
 651
 652             if (s->pps->cabac_init_present_flag)
 653                 sh->cabac_init_flag = get_bits1(gb);
 654             else
 655                 sh->cabac_init_flag = 0;
 656
 657             sh->collocated_ref_idx = 0;
 658             if (sh->slice_temporal_mvp_enabled_flag) {
 659                 sh->collocated_list = L0;
 660                 if (sh->slice_type == B_SLICE)
 661                     sh->collocated_list = !get_bits1(gb);
 662
 663                 if (sh->nb_refs[sh->collocated_list] > 1) {
 664                     sh->collocated_ref_idx = get_ue_golomb_long(gb);
 665                     if (sh->collocated_ref_idx >= sh->nb_refs[sh->collocated_list]) {
 666                         av_log(s->avctx, AV_LOG_ERROR,
 667                                "Invalid collocated_ref_idx: %d.\n",
 668                                sh->collocated_ref_idx);
 669                         return AVERROR_INVALIDDATA;
 670                     }
 671                 }
 672             }
 673
 674             if ((s->pps->weighted_pred_flag   && sh->slice_type == P_SLICE) ||
 675                 (s->pps->weighted_bipred_flag && sh->slice_type == B_SLICE)) {
 676                 pred_weight_table(s, gb);
 677             }
 678
 679             sh->max_num_merge_cand = 5 - get_ue_golomb_long(gb);
 680             if (sh->max_num_merge_cand < 1 || sh->max_num_merge_cand > 5) {
 681                 av_log(s->avctx, AV_LOG_ERROR,
 682                        "Invalid number of merging MVP candidates: %d.\n",
 683                        sh->max_num_merge_cand);
 684                 return AVERROR_INVALIDDATA;
 685             }
 686         }
 687
 688         sh->slice_qp_delta = get_se_golomb(gb);
 689         if (s->pps->pic_slice_level_chroma_qp_offsets_present_flag) {
 690             sh->slice_cb_qp_offset = get_se_golomb(gb);
 691             sh->slice_cr_qp_offset = get_se_golomb(gb);
 692         } else {
 693             sh->slice_cb_qp_offset = 0;
 694             sh->slice_cr_qp_offset = 0;
 695         }
 696
 697         if (s->pps->deblocking_filter_control_present_flag) {
 698             int deblocking_filter_override_flag = 0;
 699
 700             if (s->pps->deblocking_filter_override_enabled_flag)
 701                 deblocking_filter_override_flag = get_bits1(gb);
 702
 703             if (deblocking_filter_override_flag) {
 704                 sh->disable_deblocking_filter_flag = get_bits1(gb);
 705                 if (!sh->disable_deblocking_filter_flag) {
 706                     sh->beta_offset = get_se_golomb(gb) * 2;
 707                     sh->tc_offset   = get_se_golomb(gb) * 2;
 708                 }
 709             } else {
 710                 sh->disable_deblocking_filter_flag = s->pps->disable_dbf;
 711                 sh->beta_offset                    = s->pps->beta_offset;
 712                 sh->tc_offset                      = s->pps->tc_offset;
 713             }
 714         } else {
 715             sh->disable_deblocking_filter_flag = 0;
 716             sh->beta_offset                    = 0;
 717             sh->tc_offset                      = 0;
 718         }
 719
 720         if (s->pps->seq_loop_filter_across_slices_enabled_flag &&
 721             (sh->slice_sample_adaptive_offset_flag[0] ||
 722              sh->slice_sample_adaptive_offset_flag[1] ||
 723              !sh->disable_deblocking_filter_flag)) {
 724             sh->slice_loop_filter_across_slices_enabled_flag = get_bits1(gb);
 725         } else {
 726             sh->slice_loop_filter_across_slices_enabled_flag = s->pps->seq_loop_filter_across_slices_enabled_flag;
 727         }
 728     } else if (!s->slice_initialized) {
 729         av_log(s->avctx, AV_LOG_ERROR, "Independent slice segment missing.\n");
 730         return AVERROR_INVALIDDATA;
 731     }
 732
 733     sh->num_entry_point_offsets = 0;
 734     if (s->pps->tiles_enabled_flag || s->pps->entropy_coding_sync_enabled_flag) {
 735         sh->num_entry_point_offsets = get_ue_golomb_long(gb);
 736         if (sh->num_entry_point_offsets > 0) {
 737             int offset_len = get_ue_golomb_long(gb) + 1;
 738
 739             for (i = 0; i < sh->num_entry_point_offsets; i++)
 740                 skip_bits(gb, offset_len);
 741         }
 742     }
 743
 744     if (s->pps->slice_header_extension_present_flag) {
 745         int length = get_ue_golomb_long(gb);
 746         for (i = 0; i < length; i++)
 747             skip_bits(gb, 8);  // slice_header_extension_data_byte
 748     }
 749
 750     // Inferred parameters
 751     sh->slice_qp          = 26 + s->pps->pic_init_qp_minus26 + sh->slice_qp_delta;
 752     sh->slice_ctb_addr_rs = sh->slice_segment_addr;
 753
 754     s->HEVClc.first_qp_group = !s->sh.dependent_slice_segment_flag;
 755
 756     if (!s->pps->cu_qp_delta_enabled_flag)
 757         s->HEVClc.qp_y = ((s->sh.slice_qp + 52 + 2 * s->sps->qp_bd_offset) %
 758                           (52 + s->sps->qp_bd_offset)) - s->sps->qp_bd_offset;
 759
 760     s->slice_initialized = 1;
 761
 762     return 0;
 763 }
 764
 765 #define CTB(tab, x, y) ((tab)[(y) * s->sps->ctb_width + (x)])
 766
 767 #define SET_SAO(elem, value)                            \
 768 do {                                                    \
 769     if (!sao_merge_up_flag && !sao_merge_left_flag)     \
 770         sao->elem = value;                              \
 771     else if (sao_merge_left_flag)                       \
 772         sao->elem = CTB(s->sao, rx-1, ry).elem;         \
 773     else if (sao_merge_up_flag)                         \
 774         sao->elem = CTB(s->sao, rx, ry-1).elem;         \
 775     else                                                \
 776         sao->elem = 0;                                  \
 777 } while (0)
 778
 779 static void hls_sao_param(HEVCContext *s, int rx, int ry)
 780 {
 781     HEVCLocalContext *lc    = &s->HEVClc;
 782     int sao_merge_left_flag = 0;
 783     int sao_merge_up_flag   = 0;
 784     int shift               = s->sps->bit_depth - FFMIN(s->sps->bit_depth, 10);
 785     SAOParams *sao          = &CTB(s->sao, rx, ry);
 786     int c_idx, i;
 787
 788     if (s->sh.slice_sample_adaptive_offset_flag[0] ||
 789         s->sh.slice_sample_adaptive_offset_flag[1]) {
 790         if (rx > 0) {
 791             if (lc->ctb_left_flag)
 792                 sao_merge_left_flag = ff_hevc_sao_merge_flag_decode(s);
 793         }
 794         if (ry > 0 && !sao_merge_left_flag) {
 795             if (lc->ctb_up_flag)
 796                 sao_merge_up_flag = ff_hevc_sao_merge_flag_decode(s);
 797         }
 798     }
 799
 800     for (c_idx = 0; c_idx < 3; c_idx++) {
 801         if (!s->sh.slice_sample_adaptive_offset_flag[c_idx]) {
 802             sao->type_idx[c_idx] = SAO_NOT_APPLIED;
 803             continue;
 804         }
 805
 806         if (c_idx == 2) {
 807             sao->type_idx[2] = sao->type_idx[1];
 808             sao->eo_class[2] = sao->eo_class[1];
 809         } else {
 810             SET_SAO(type_idx[c_idx], ff_hevc_sao_type_idx_decode(s));
 811         }
 812
 813         if (sao->type_idx[c_idx] == SAO_NOT_APPLIED)
 814             continue;
 815
 816         for (i = 0; i < 4; i++)
 817             SET_SAO(offset_abs[c_idx][i], ff_hevc_sao_offset_abs_decode(s));
 818
 819         if (sao->type_idx[c_idx] == SAO_BAND) {
 820             for (i = 0; i < 4; i++) {
 821                 if (sao->offset_abs[c_idx][i]) {
 822                     SET_SAO(offset_sign[c_idx][i],
 823                             ff_hevc_sao_offset_sign_decode(s));
 824                 } else {
 825                     sao->offset_sign[c_idx][i] = 0;
 826                 }
 827             }
 828             SET_SAO(band_position[c_idx], ff_hevc_sao_band_position_decode(s));
 829         } else if (c_idx != 2) {
 830             SET_SAO(eo_class[c_idx], ff_hevc_sao_eo_class_decode(s));
 831         }
 832
 833         // Inferred parameters
 834         sao->offset_val[c_idx][0] = 0;
 835         for (i = 0; i < 4; i++) {
 836             sao->offset_val[c_idx][i + 1] = sao->offset_abs[c_idx][i] << shift;
 837             if (sao->type_idx[c_idx] == SAO_EDGE) {
 838                 if (i > 1)
 839                     sao->offset_val[c_idx][i + 1] = -sao->offset_val[c_idx][i + 1];
 840             } else if (sao->offset_sign[c_idx][i]) {
 841                 sao->offset_val[c_idx][i + 1] = -sao->offset_val[c_idx][i + 1];
 842             }
 843         }
 844     }
 845 }
 846
 847 #undef SET_SAO
 848 #undef CTB
 849
 850 static void hls_residual_coding(HEVCContext *s, int x0, int y0,
 851                                 int log2_trafo_size, enum ScanType scan_idx,
 852                                 int c_idx)
 853 {
 854 #define GET_COORD(offset, n)                                    \
 855     do {                                                        \
 856         x_c = (scan_x_cg[offset >> 4] << 2) + scan_x_off[n];    \
 857         y_c = (scan_y_cg[offset >> 4] << 2) + scan_y_off[n];    \
 858     } while (0)
 859     HEVCLocalContext *lc    = &s->HEVClc;
 860     int transform_skip_flag = 0;
 861
 862     int last_significant_coeff_x, last_significant_coeff_y;
 863     int last_scan_pos;
 864     int n_end;
 865     int num_coeff    = 0;
 866     int greater1_ctx = 1;
 867
 868     int num_last_subset;
 869     int x_cg_last_sig, y_cg_last_sig;
 870
 871     const uint8_t *scan_x_cg, *scan_y_cg, *scan_x_off, *scan_y_off;
 872
 873     ptrdiff_t stride = s->frame->linesize[c_idx];
 874     int hshift       = s->sps->hshift[c_idx];
 875     int vshift       = s->sps->vshift[c_idx];
 876     uint8_t *dst     = &s->frame->data[c_idx][(y0 >> vshift) * stride +
 877                                               ((x0 >> hshift) << s->sps->pixel_shift)];
 878     DECLARE_ALIGNED(16, int16_t, coeffs[MAX_TB_SIZE * MAX_TB_SIZE]) = { 0 };
 879     DECLARE_ALIGNED(8, uint8_t, significant_coeff_group_flag[8][8]) = { { 0 } };
 880
 881     int trafo_size = 1 << log2_trafo_size;
 882     int i, qp, shift, add, scale, scale_m;
 883     const uint8_t level_scale[] = { 40, 45, 51, 57, 64, 72 };
 884     const uint8_t *scale_matrix;
 885     uint8_t dc_scale;
 886
 887     // Derive QP for dequant
 888     if (!lc->cu.cu_transquant_bypass_flag) {
 889         static const int qp_c[] = {
 890             29, 30, 31, 32, 33, 33, 34, 34, 35, 35, 36, 36, 37, 37
 891         };
 892
 893         static const uint8_t rem6[51 + 2 * 6 + 1] = {
 894             0, 1, 2, 3, 4, 5, 0, 1, 2, 3, 4, 5, 0, 1, 2, 3, 4, 5, 0, 1, 2,
 895             3, 4, 5, 0, 1, 2, 3, 4, 5, 0, 1, 2, 3, 4, 5, 0, 1, 2, 3, 4, 5,
 896             0, 1, 2, 3, 4, 5, 0, 1, 2, 3, 4, 5, 0, 1, 2, 3, 4, 5, 0, 1, 2, 3,
 897         };
 898
 899         static const uint8_t div6[51 + 2 * 6 + 1] = {
 900             0, 0, 0, 0, 0, 0, 1, 1, 1, 1, 1, 1, 2, 2, 2, 2, 2, 2,  3,  3,  3,
 901             3, 3, 3, 4, 4, 4, 4, 4, 4, 5, 5, 5, 5, 5, 5, 6, 6, 6,  6,  6,  6,
 902             7, 7, 7, 7, 7, 7, 8, 8, 8, 8, 8, 8, 9, 9, 9, 9, 9, 9, 10, 10, 10, 10,
 903         };
 904         int qp_y = lc->qp_y;
 905
 906         if (c_idx == 0) {
 907             qp = qp_y + s->sps->qp_bd_offset;
 908         } else {
 909             int qp_i, offset;
 910
 911             if (c_idx == 1)
 912                 offset = s->pps->cb_qp_offset + s->sh.slice_cb_qp_offset;
 913             else
 914                 offset = s->pps->cr_qp_offset + s->sh.slice_cr_qp_offset;
 915
 916             qp_i = av_clip_c(qp_y + offset, -s->sps->qp_bd_offset, 57);
 917             if (qp_i < 30)
 918                 qp = qp_i;
 919             else if (qp_i > 43)
 920                 qp = qp_i - 6;
 921             else
 922                 qp = qp_c[qp_i - 30];
 923
 924             qp += s->sps->qp_bd_offset;
 925         }
 926
 927         shift    = s->sps->bit_depth + log2_trafo_size - 5;
 928         add      = 1 << (shift - 1);
 929         scale    = level_scale[rem6[qp]] << (div6[qp]);
 930         scale_m  = 16; // default when no custom scaling lists.
 931         dc_scale = 16;
 932
 933         if (s->sps->scaling_list_enable_flag) {
 934             const ScalingList *sl = s->pps->scaling_list_data_present_flag ?
 935                                     &s->pps->scaling_list : &s->sps->scaling_list;
 936             int matrix_id = lc->cu.pred_mode != MODE_INTRA;
 937
 938             if (log2_trafo_size != 5)
 939                 matrix_id = 3 * matrix_id + c_idx;
 940
 941             scale_matrix = sl->sl[log2_trafo_size - 2][matrix_id];
 942             if (log2_trafo_size >= 4)
 943                 dc_scale = sl->sl_dc[log2_trafo_size - 4][matrix_id];
 944         }
 945     }
 946
 947     if (s->pps->transform_skip_enabled_flag &&
 948         !lc->cu.cu_transquant_bypass_flag   &&
 949         log2_trafo_size == 2) {
 950         transform_skip_flag = ff_hevc_transform_skip_flag_decode(s, c_idx);
 951     }
 952
 953     last_significant_coeff_x =
 954         ff_hevc_last_significant_coeff_x_prefix_decode(s, c_idx, log2_trafo_size);
 955     last_significant_coeff_y =
 956         ff_hevc_last_significant_coeff_y_prefix_decode(s, c_idx, log2_trafo_size);
 957
 958     if (last_significant_coeff_x > 3) {
 959         int suffix = ff_hevc_last_significant_coeff_suffix_decode(s, last_significant_coeff_x);
 960         last_significant_coeff_x = (1 << ((last_significant_coeff_x >> 1) - 1)) *
 961                                    (2 + (last_significant_coeff_x & 1)) +
 962                                    suffix;
 963     }
 964
 965     if (last_significant_coeff_y > 3) {
 966         int suffix = ff_hevc_last_significant_coeff_suffix_decode(s, last_significant_coeff_y);
 967         last_significant_coeff_y = (1 << ((last_significant_coeff_y >> 1) - 1)) *
 968                                    (2 + (last_significant_coeff_y & 1)) +
 969                                    suffix;
 970     }
 971
 972     if (scan_idx == SCAN_VERT)
 973         FFSWAP(int, last_significant_coeff_x, last_significant_coeff_y);
 974
 975     x_cg_last_sig = last_significant_coeff_x >> 2;
 976     y_cg_last_sig = last_significant_coeff_y >> 2;
 977
 978     switch (scan_idx) {
 979     case SCAN_DIAG: {
 980         int last_x_c = last_significant_coeff_x & 3;
 981         int last_y_c = last_significant_coeff_y & 3;
 982
 983         scan_x_off = ff_hevc_diag_scan4x4_x;
 984         scan_y_off = ff_hevc_diag_scan4x4_y;
 985         num_coeff  = diag_scan4x4_inv[last_y_c][last_x_c];
 986         if (trafo_size == 4) {
 987             scan_x_cg = scan_1x1;
 988             scan_y_cg = scan_1x1;
 989         } else if (trafo_size == 8) {
 990             num_coeff += diag_scan2x2_inv[y_cg_last_sig][x_cg_last_sig] << 4;
 991             scan_x_cg  = diag_scan2x2_x;
 992             scan_y_cg  = diag_scan2x2_y;
 993         } else if (trafo_size == 16) {
 994             num_coeff += diag_scan4x4_inv[y_cg_last_sig][x_cg_last_sig] << 4;
 995             scan_x_cg  = ff_hevc_diag_scan4x4_x;
 996             scan_y_cg  = ff_hevc_diag_scan4x4_y;
 997         } else { // trafo_size == 32
 998             num_coeff += diag_scan8x8_inv[y_cg_last_sig][x_cg_last_sig] << 4;
 999             scan_x_cg  = ff_hevc_diag_scan8x8_x;
1000             scan_y_cg  = ff_hevc_diag_scan8x8_y;
1001         }
1002         break;
1003     }
1004     case SCAN_HORIZ:
1005         scan_x_cg  = horiz_scan2x2_x;
1006         scan_y_cg  = horiz_scan2x2_y;
1007         scan_x_off = horiz_scan4x4_x;
1008         scan_y_off = horiz_scan4x4_y;
1009         num_coeff  = horiz_scan8x8_inv[last_significant_coeff_y][last_significant_coeff_x];
1010         break;
1011     default: //SCAN_VERT
1012         scan_x_cg  = horiz_scan2x2_y;
1013         scan_y_cg  = horiz_scan2x2_x;
1014         scan_x_off = horiz_scan4x4_y;
1015         scan_y_off = horiz_scan4x4_x;
1016         num_coeff  = horiz_scan8x8_inv[last_significant_coeff_x][last_significant_coeff_y];
1017         break;
1018     }
1019     num_coeff++;
1020     num_last_subset = (num_coeff - 1) >> 4;
1021
1022     for (i = num_last_subset; i >= 0; i--) {
1023         int n, m;
1024         int x_cg, y_cg, x_c, y_c;
1025         int implicit_non_zero_coeff = 0;
1026         int64_t trans_coeff_level;
1027         int prev_sig = 0;
1028         int offset   = i << 4;
1029
1030         uint8_t significant_coeff_flag_idx[16];
1031         uint8_t nb_significant_coeff_flag = 0;
1032
1033         x_cg = scan_x_cg[i];
1034         y_cg = scan_y_cg[i];
1035
1036         if (i < num_last_subset && i > 0) {
1037             int ctx_cg = 0;
1038             if (x_cg < (1 << (log2_trafo_size - 2)) - 1)
1039                 ctx_cg += significant_coeff_group_flag[x_cg + 1][y_cg];
1040             if (y_cg < (1 << (log2_trafo_size - 2)) - 1)
1041                 ctx_cg += significant_coeff_group_flag[x_cg][y_cg + 1];
1042
1043             significant_coeff_group_flag[x_cg][y_cg] =
1044                 ff_hevc_significant_coeff_group_flag_decode(s, c_idx, ctx_cg);
1045             implicit_non_zero_coeff = 1;
1046         } else {
1047             significant_coeff_group_flag[x_cg][y_cg] =
1048                 ((x_cg == x_cg_last_sig && y_cg == y_cg_last_sig) ||
1049                  (x_cg == 0 && y_cg == 0));
1050         }
1051
1052         last_scan_pos = num_coeff - offset - 1;
1053
1054         if (i == num_last_subset) {
1055             n_end                         = last_scan_pos - 1;
1056             significant_coeff_flag_idx[0] = last_scan_pos;
1057             nb_significant_coeff_flag     = 1;
1058         } else {
1059             n_end = 15;
1060         }
1061
1062         if (x_cg < ((1 << log2_trafo_size) - 1) >> 2)
1063             prev_sig = significant_coeff_group_flag[x_cg + 1][y_cg];
1064         if (y_cg < ((1 << log2_trafo_size) - 1) >> 2)
1065             prev_sig += significant_coeff_group_flag[x_cg][y_cg + 1] << 1;
1066
1067         for (n = n_end; n >= 0; n--) {
1068             GET_COORD(offset, n);
1069
1070             if (significant_coeff_group_flag[x_cg][y_cg] &&
1071                 (n > 0 || implicit_non_zero_coeff == 0)) {
1072                 if (ff_hevc_significant_coeff_flag_decode(s, c_idx, x_c, y_c,
1073                                                           log2_trafo_size,
1074                                                           scan_idx,
1075                                                           prev_sig) == 1) {
1076                     significant_coeff_flag_idx[nb_significant_coeff_flag] = n;
1077                     nb_significant_coeff_flag++;
1078                     implicit_non_zero_coeff = 0;
1079                 }
1080             } else {
1081                 int last_cg = (x_c == (x_cg << 2) && y_c == (y_cg << 2));
1082                 if (last_cg && implicit_non_zero_coeff && significant_coeff_group_flag[x_cg][y_cg]) {
1083                     significant_coeff_flag_idx[nb_significant_coeff_flag] = n;
1084                     nb_significant_coeff_flag++;
1085                 }
1086             }
1087         }
1088
1089         n_end = nb_significant_coeff_flag;
1090
1091         if (n_end) {
1092             int first_nz_pos_in_cg = 16;
1093             int last_nz_pos_in_cg = -1;
1094             int c_rice_param = 0;
1095             int first_greater1_coeff_idx = -1;
1096             uint8_t coeff_abs_level_greater1_flag[16] = { 0 };
1097             uint16_t coeff_sign_flag;
1098             int sum_abs = 0;
1099             int sign_hidden = 0;
1100
1101             // initialize first elem of coeff_bas_level_greater1_flag
1102             int ctx_set = (i > 0 && c_idx == 0) ? 2 : 0;
1103
1104             if (!(i == num_last_subset) && greater1_ctx == 0)
1105                 ctx_set++;
1106             greater1_ctx      = 1;
1107             last_nz_pos_in_cg = significant_coeff_flag_idx[0];
1108
1109             for (m = 0; m < (n_end > 8 ? 8 : n_end); m++) {
1110                 int n_idx = significant_coeff_flag_idx[m];
1111                 int inc   = (ctx_set << 2) + greater1_ctx;
1112                 coeff_abs_level_greater1_flag[n_idx] =
1113                     ff_hevc_coeff_abs_level_greater1_flag_decode(s, c_idx, inc);
1114                 if (coeff_abs_level_greater1_flag[n_idx]) {
1115                     greater1_ctx = 0;
1116                 } else if (greater1_ctx > 0 && greater1_ctx < 3) {
1117                     greater1_ctx++;
1118                 }
1119
1120                 if (coeff_abs_level_greater1_flag[n_idx] &&
1121                     first_greater1_coeff_idx == -1)
1122                     first_greater1_coeff_idx = n_idx;
1123             }
1124             first_nz_pos_in_cg = significant_coeff_flag_idx[n_end - 1];
1125             sign_hidden        = last_nz_pos_in_cg - first_nz_pos_in_cg >= 4 &&
1126                                  !lc->cu.cu_transquant_bypass_flag;
1127
1128             if (first_greater1_coeff_idx != -1) {
1129                 coeff_abs_level_greater1_flag[first_greater1_coeff_idx] += ff_hevc_coeff_abs_level_greater2_flag_decode(s, c_idx, ctx_set);
1130             }
1131             if (!s->pps->sign_data_hiding_flag || !sign_hidden) {
1132                 coeff_sign_flag = ff_hevc_coeff_sign_flag(s, nb_significant_coeff_flag) << (16 - nb_significant_coeff_flag);
1133             } else {
1134                 coeff_sign_flag = ff_hevc_coeff_sign_flag(s, nb_significant_coeff_flag - 1) << (16 - (nb_significant_coeff_flag - 1));
1135             }
1136
1137             for (m = 0; m < n_end; m++) {
1138                 n = significant_coeff_flag_idx[m];
1139                 GET_COORD(offset, n);
1140                 trans_coeff_level = 1 + coeff_abs_level_greater1_flag[n];
1141                 if (trans_coeff_level == ((m < 8) ?
1142                                           ((n == first_greater1_coeff_idx) ? 3 : 2) : 1)) {
1143                     int last_coeff_abs_level_remaining = ff_hevc_coeff_abs_level_remaining(s, trans_coeff_level, c_rice_param);
1144
1145                     trans_coeff_level += last_coeff_abs_level_remaining;
1146                     if ((trans_coeff_level) > (3 * (1 << c_rice_param)))
1147                         c_rice_param = FFMIN(c_rice_param + 1, 4);
1148                 }
1149                 if (s->pps->sign_data_hiding_flag && sign_hidden) {
1150                     sum_abs += trans_coeff_level;
1151                     if (n == first_nz_pos_in_cg && ((sum_abs & 1) == 1))
1152                         trans_coeff_level = -trans_coeff_level;
1153                 }
1154                 if (coeff_sign_flag >> 15)
1155                     trans_coeff_level = -trans_coeff_level;
1156                 coeff_sign_flag <<= 1;
1157                 if (!lc->cu.cu_transquant_bypass_flag) {
1158                     if (s->sps->scaling_list_enable_flag) {
1159                         if (y_c || x_c || log2_trafo_size < 4) {
1160                             int pos;
1161                             switch (log2_trafo_size) {
1162                             case 3:  pos = (y_c        << 3) +  x_c;       break;
1163                             case 4:  pos = ((y_c >> 1) << 3) + (x_c >> 1); break;
1164                             case 5:  pos = ((y_c >> 2) << 3) + (x_c >> 2); break;
1165                             default: pos = (y_c        << 2) +  x_c;
1166                             }
1167                             scale_m = scale_matrix[pos];
1168                         } else {
1169                             scale_m = dc_scale;
1170                         }
1171                     }
1172                     trans_coeff_level = (trans_coeff_level * (int64_t)scale * (int64_t)scale_m + add) >> shift;
1173                     if(trans_coeff_level < 0) {
1174                         if((~trans_coeff_level) & 0xFffffffffff8000)
1175                             trans_coeff_level = -32768;
1176                     } else {
1177                         if (trans_coeff_level & 0xffffffffffff8000)
1178                             trans_coeff_level = 32767;
1179                     }
1180                 }
1181                 coeffs[y_c * trafo_size + x_c] = trans_coeff_level;
1182             }
1183         }
1184     }
1185
1186     if (lc->cu.cu_transquant_bypass_flag) {
1187         s->hevcdsp.transquant_bypass[log2_trafo_size - 2](dst, coeffs, stride);
1188     } else {
1189         if (transform_skip_flag)
1190             s->hevcdsp.transform_skip(dst, coeffs, stride);
1191         else if (lc->cu.pred_mode == MODE_INTRA && c_idx == 0 &&
1192                  log2_trafo_size == 2)
1193             s->hevcdsp.transform_4x4_luma_add(dst, coeffs, stride);
1194         else
1195             s->hevcdsp.transform_add[log2_trafo_size - 2](dst, coeffs, stride);
1196     }
1197 }
1198
1199 static void hls_transform_unit(HEVCContext *s, int x0, int y0,
1200                                int xBase, int yBase, int cb_xBase, int cb_yBase,
1201                                int log2_cb_size, int log2_trafo_size,
1202                                int trafo_depth, int blk_idx)
1203 {
1204     HEVCLocalContext *lc = &s->HEVClc;
1205
1206     if (lc->cu.pred_mode == MODE_INTRA) {
1207         int trafo_size = 1 << log2_trafo_size;
1208         ff_hevc_set_neighbour_available(s, x0, y0, trafo_size, trafo_size);
1209
1210         s->hpc.intra_pred(s, x0, y0, log2_trafo_size, 0);
1211         if (log2_trafo_size > 2) {
1212             trafo_size = trafo_size << (s->sps->hshift[1] - 1);
1213             ff_hevc_set_neighbour_available(s, x0, y0, trafo_size, trafo_size);
1214             s->hpc.intra_pred(s, x0, y0, log2_trafo_size - 1, 1);
1215             s->hpc.intra_pred(s, x0, y0, log2_trafo_size - 1, 2);
1216         } else if (blk_idx == 3) {
1217             trafo_size = trafo_size << s->sps->hshift[1];
1218             ff_hevc_set_neighbour_available(s, xBase, yBase,
1219                                             trafo_size, trafo_size);
1220             s->hpc.intra_pred(s, xBase, yBase, log2_trafo_size, 1);
1221             s->hpc.intra_pred(s, xBase, yBase, log2_trafo_size, 2);
1222         }
1223     }
1224
1225     if (lc->tt.cbf_luma ||
1226         SAMPLE_CBF(lc->tt.cbf_cb[trafo_depth], x0, y0) ||
1227         SAMPLE_CBF(lc->tt.cbf_cr[trafo_depth], x0, y0)) {
1228         int scan_idx   = SCAN_DIAG;
1229         int scan_idx_c = SCAN_DIAG;
1230
1231         if (s->pps->cu_qp_delta_enabled_flag && !lc->tu.is_cu_qp_delta_coded) {
1232             lc->tu.cu_qp_delta = ff_hevc_cu_qp_delta_abs(s);
1233             if (lc->tu.cu_qp_delta != 0)
1234                 if (ff_hevc_cu_qp_delta_sign_flag(s) == 1)
1235                     lc->tu.cu_qp_delta = -lc->tu.cu_qp_delta;
1236             lc->tu.is_cu_qp_delta_coded = 1;
1237             ff_hevc_set_qPy(s, x0, y0, cb_xBase, cb_yBase, log2_cb_size);
1238         }
1239
1240         if (lc->cu.pred_mode == MODE_INTRA && log2_trafo_size < 4) {
1241             if (lc->tu.cur_intra_pred_mode >= 6 &&
1242                 lc->tu.cur_intra_pred_mode <= 14) {
1243                 scan_idx = SCAN_VERT;
1244             } else if (lc->tu.cur_intra_pred_mode >= 22 &&
1245                        lc->tu.cur_intra_pred_mode <= 30) {
1246                 scan_idx = SCAN_HORIZ;
1247             }
1248
1249             if (lc->pu.intra_pred_mode_c >=  6 &&
1250                 lc->pu.intra_pred_mode_c <= 14) {
1251                 scan_idx_c = SCAN_VERT;
1252             } else if (lc->pu.intra_pred_mode_c >= 22 &&
1253                        lc->pu.intra_pred_mode_c <= 30) {
1254                 scan_idx_c = SCAN_HORIZ;
1255             }
1256         }
1257
1258         if (lc->tt.cbf_luma)
1259             hls_residual_coding(s, x0, y0, log2_trafo_size, scan_idx, 0);
1260         if (log2_trafo_size > 2) {
1261             if (SAMPLE_CBF(lc->tt.cbf_cb[trafo_depth], x0, y0))
1262                 hls_residual_coding(s, x0, y0, log2_trafo_size - 1, scan_idx_c, 1);
1263             if (SAMPLE_CBF(lc->tt.cbf_cr[trafo_depth], x0, y0))
1264                 hls_residual_coding(s, x0, y0, log2_trafo_size - 1, scan_idx_c, 2);
1265         } else if (blk_idx == 3) {
1266             if (SAMPLE_CBF(lc->tt.cbf_cb[trafo_depth], xBase, yBase))
1267                 hls_residual_coding(s, xBase, yBase, log2_trafo_size, scan_idx_c, 1);
1268             if (SAMPLE_CBF(lc->tt.cbf_cr[trafo_depth], xBase, yBase))
1269                 hls_residual_coding(s, xBase, yBase, log2_trafo_size, scan_idx_c, 2);
1270         }
1271     }
1272 }
1273
1274 static void set_deblocking_bypass(HEVCContext *s, int x0, int y0, int log2_cb_size)
1275 {
1276     int cb_size          = 1 << log2_cb_size;
1277     int log2_min_pu_size = s->sps->log2_min_pu_size;
1278
1279     int min_pu_width     = s->sps->min_pu_width;
1280     int x_end = FFMIN(x0 + cb_size, s->sps->width);
1281     int y_end = FFMIN(y0 + cb_size, s->sps->height);
1282     int i, j;
1283
1284     for (j = (y0 >> log2_min_pu_size); j < (y_end >> log2_min_pu_size); j++)
1285         for (i = (x0 >> log2_min_pu_size); i < (x_end >> log2_min_pu_size); i++)
1286             s->is_pcm[i + j * min_pu_width] = 2;
1287 }
1288
1289 static void hls_transform_tree(HEVCContext *s, int x0, int y0,
1290                                int xBase, int yBase, int cb_xBase, int cb_yBase,
1291                                int log2_cb_size, int log2_trafo_size,
1292                                int trafo_depth, int blk_idx)
1293 {
1294     HEVCLocalContext *lc = &s->HEVClc;
1295     uint8_t split_transform_flag;
1296
1297     if (trafo_depth > 0 && log2_trafo_size == 2) {
1298         SAMPLE_CBF(lc->tt.cbf_cb[trafo_depth], x0, y0) =
1299             SAMPLE_CBF(lc->tt.cbf_cb[trafo_depth - 1], xBase, yBase);
1300         SAMPLE_CBF(lc->tt.cbf_cr[trafo_depth], x0, y0) =
1301             SAMPLE_CBF(lc->tt.cbf_cr[trafo_depth - 1], xBase, yBase);
1302     } else {
1303         SAMPLE_CBF(lc->tt.cbf_cb[trafo_depth], x0, y0) =
1304         SAMPLE_CBF(lc->tt.cbf_cr[trafo_depth], x0, y0) = 0;
1305     }
1306
1307     if (lc->cu.intra_split_flag) {
1308         if (trafo_depth == 1)
1309             lc->tu.cur_intra_pred_mode = lc->pu.intra_pred_mode[blk_idx];
1310     } else {
1311         lc->tu.cur_intra_pred_mode = lc->pu.intra_pred_mode[0];
1312     }
1313
1314     lc->tt.cbf_luma = 1;
1315
1316     lc->tt.inter_split_flag = s->sps->max_transform_hierarchy_depth_inter == 0 &&
1317                               lc->cu.pred_mode == MODE_INTER &&
1318                               lc->cu.part_mode != PART_2Nx2N &&
1319                               trafo_depth == 0;
1320
1321     if (log2_trafo_size <= s->sps->log2_max_trafo_size &&
1322         log2_trafo_size >  s->sps->log2_min_tb_size    &&
1323         trafo_depth     < lc->cu.max_trafo_depth       &&
1324         !(lc->cu.intra_split_flag && trafo_depth == 0)) {
1325         split_transform_flag = ff_hevc_split_transform_flag_decode(s, log2_trafo_size);
1326     } else {
1327         split_transform_flag = log2_trafo_size > s->sps->log2_max_trafo_size ||
1328                                (lc->cu.intra_split_flag && trafo_depth == 0) ||
1329                                lc->tt.inter_split_flag;
1330     }
1331
1332     if (log2_trafo_size > 2) {
1333         if (trafo_depth == 0 ||
1334             SAMPLE_CBF(lc->tt.cbf_cb[trafo_depth - 1], xBase, yBase)) {
1335             SAMPLE_CBF(lc->tt.cbf_cb[trafo_depth], x0, y0) =
1336                 ff_hevc_cbf_cb_cr_decode(s, trafo_depth);
1337         }
1338
1339         if (trafo_depth == 0 ||
1340             SAMPLE_CBF(lc->tt.cbf_cr[trafo_depth - 1], xBase, yBase)) {
1341             SAMPLE_CBF(lc->tt.cbf_cr[trafo_depth], x0, y0) =
1342                 ff_hevc_cbf_cb_cr_decode(s, trafo_depth);
1343         }
1344     }
1345
1346     if (split_transform_flag) {
1347         int x1 = x0 + ((1 << log2_trafo_size) >> 1);
1348         int y1 = y0 + ((1 << log2_trafo_size) >> 1);
1349
1350         hls_transform_tree(s, x0, y0, x0, y0, cb_xBase, cb_yBase, log2_cb_size,
1351                            log2_trafo_size - 1, trafo_depth + 1, 0);
1352         hls_transform_tree(s, x1, y0, x0, y0, cb_xBase, cb_yBase, log2_cb_size,
1353                            log2_trafo_size - 1, trafo_depth + 1, 1);
1354         hls_transform_tree(s, x0, y1, x0, y0, cb_xBase, cb_yBase, log2_cb_size,
1355                            log2_trafo_size - 1, trafo_depth + 1, 2);
1356         hls_transform_tree(s, x1, y1, x0, y0, cb_xBase, cb_yBase, log2_cb_size,
1357                            log2_trafo_size - 1, trafo_depth + 1, 3);
1358     } else {
1359         int min_tu_size      = 1 << s->sps->log2_min_tb_size;
1360         int log2_min_tu_size = s->sps->log2_min_tb_size;
1361         int min_tu_width     = s->sps->min_tb_width;
1362
1363         if (lc->cu.pred_mode == MODE_INTRA || trafo_depth != 0 ||
1364             SAMPLE_CBF(lc->tt.cbf_cb[trafo_depth], x0, y0) ||
1365             SAMPLE_CBF(lc->tt.cbf_cr[trafo_depth], x0, y0)) {
1366             lc->tt.cbf_luma = ff_hevc_cbf_luma_decode(s, trafo_depth);
1367         }
1368
1369         hls_transform_unit(s, x0, y0, xBase, yBase, cb_xBase, cb_yBase,
1370                            log2_cb_size, log2_trafo_size, trafo_depth, blk_idx);
1371
1372         // TODO: store cbf_luma somewhere else
1373         if (lc->tt.cbf_luma) {
1374             int i, j;
1375             for (i = 0; i < (1 << log2_trafo_size); i += min_tu_size)
1376                 for (j = 0; j < (1 << log2_trafo_size); j += min_tu_size) {
1377                     int x_tu = (x0 + j) >> log2_min_tu_size;
1378                     int y_tu = (y0 + i) >> log2_min_tu_size;
1379                     s->cbf_luma[y_tu * min_tu_width + x_tu] = 1;
1380                 }
1381         }
1382         if (!s->sh.disable_deblocking_filter_flag) {
1383             ff_hevc_deblocking_boundary_strengths(s, x0, y0, log2_trafo_size,
1384                                                   lc->slice_or_tiles_up_boundary,
1385                                                   lc->slice_or_tiles_left_boundary);
1386             if (s->pps->transquant_bypass_enable_flag &&
1387                 lc->cu.cu_transquant_bypass_flag)
1388                 set_deblocking_bypass(s, x0, y0, log2_trafo_size);
1389         }
1390     }
1391 }
1392
1393 static int hls_pcm_sample(HEVCContext *s, int x0, int y0, int log2_cb_size)
1394 {
1395     //TODO: non-4:2:0 support
1396     HEVCLocalContext *lc = &s->HEVClc;
1397     GetBitContext gb;
1398     int cb_size   = 1 << log2_cb_size;
1399     int stride0   = s->frame->linesize[0];
1400     uint8_t *dst0 = &s->frame->data[0][y0 * stride0 + (x0 << s->sps->pixel_shift)];
1401     int   stride1 = s->frame->linesize[1];
1402     uint8_t *dst1 = &s->frame->data[1][(y0 >> s->sps->vshift[1]) * stride1 + ((x0 >> s->sps->hshift[1]) << s->sps->pixel_shift)];
1403     int   stride2 = s->frame->linesize[2];
1404     uint8_t *dst2 = &s->frame->data[2][(y0 >> s->sps->vshift[2]) * stride2 + ((x0 >> s->sps->hshift[2]) << s->sps->pixel_shift)];
1405
1406     int length         = cb_size * cb_size * s->sps->pcm.bit_depth + ((cb_size * cb_size) >> 1) * s->sps->pcm.bit_depth;
1407     const uint8_t *pcm = skip_bytes(&s->HEVClc.cc, (length + 7) >> 3);
1408     int ret;
1409
1410     ff_hevc_deblocking_boundary_strengths(s, x0, y0, log2_cb_size,
1411                                           lc->slice_or_tiles_up_boundary,
1412                                           lc->slice_or_tiles_left_boundary);
1413
1414     ret = init_get_bits(&gb, pcm, length);
1415     if (ret < 0)
1416         return ret;
1417
1418     s->hevcdsp.put_pcm(dst0, stride0, cb_size,     &gb, s->sps->pcm.bit_depth);
1419     s->hevcdsp.put_pcm(dst1, stride1, cb_size / 2, &gb, s->sps->pcm.bit_depth);
1420     s->hevcdsp.put_pcm(dst2, stride2, cb_size / 2, &gb, s->sps->pcm.bit_depth);
1421     return 0;
1422 }
1423
1424 static void hls_mvd_coding(HEVCContext *s, int x0, int y0, int log2_cb_size)
1425 {
1426     HEVCLocalContext *lc = &s->HEVClc;
1427     int x = ff_hevc_abs_mvd_greater0_flag_decode(s);
1428     int y = ff_hevc_abs_mvd_greater0_flag_decode(s);
1429
1430     if (x)
1431         x += ff_hevc_abs_mvd_greater1_flag_decode(s);
1432     if (y)
1433         y += ff_hevc_abs_mvd_greater1_flag_decode(s);
1434
1435     switch (x) {
1436     case 2: lc->pu.mvd.x = ff_hevc_mvd_decode(s);           break;
1437     case 1: lc->pu.mvd.x = ff_hevc_mvd_sign_flag_decode(s); break;
1438     case 0: lc->pu.mvd.x = 0;                               break;
1439     }
1440
1441     switch (y) {
1442     case 2: lc->pu.mvd.y = ff_hevc_mvd_decode(s);           break;
1443     case 1: lc->pu.mvd.y = ff_hevc_mvd_sign_flag_decode(s); break;
1444     case 0: lc->pu.mvd.y = 0;                               break;
1445     }
1446 }
1447
1448 /**
1449  * 8.5.3.2.2.1 Luma sample interpolation process
1450  *
1451  * @param s HEVC decoding context
1452  * @param dst target buffer for block data at block position
1453  * @param dststride stride of the dst buffer
1454  * @param ref reference picture buffer at origin (0, 0)
1455  * @param mv motion vector (relative to block position) to get pixel data from
1456  * @param x_off horizontal position of block from origin (0, 0)
1457  * @param y_off vertical position of block from origin (0, 0)
1458  * @param block_w width of block
1459  * @param block_h height of block
1460  */
1461 static void luma_mc(HEVCContext *s, int16_t *dst, ptrdiff_t dststride,
1462                     AVFrame *ref, const Mv *mv, int x_off, int y_off,
1463                     int block_w, int block_h)
1464 {
1465     HEVCLocalContext *lc = &s->HEVClc;
1466     uint8_t *src         = ref->data[0];
1467     ptrdiff_t srcstride  = ref->linesize[0];
1468     int pic_width        = s->sps->width;
1469     int pic_height       = s->sps->height;
1470
1471     int mx         = mv->x & 3;
1472     int my         = mv->y & 3;
1473     int extra_left = ff_hevc_qpel_extra_before[mx];
1474     int extra_top  = ff_hevc_qpel_extra_before[my];
1475
1476     x_off += mv->x >> 2;
1477     y_off += mv->y >> 2;
1478     src   += y_off * srcstride + (x_off << s->sps->pixel_shift);
1479
1480     if (x_off < extra_left || y_off < extra_top ||
1481         x_off >= pic_width - block_w - ff_hevc_qpel_extra_after[mx] ||
1482         y_off >= pic_height - block_h - ff_hevc_qpel_extra_after[my]) {
1483         int offset = extra_top * srcstride + (extra_left << s->sps->pixel_shift);
1484
1485         s->vdsp.emulated_edge_mc(lc->edge_emu_buffer, src - offset,
1486                                  srcstride, srcstride,
1487                                  block_w + ff_hevc_qpel_extra[mx],
1488                                  block_h + ff_hevc_qpel_extra[my],
1489                                  x_off - extra_left, y_off - extra_top,
1490                                  pic_width, pic_height);
1491         src = lc->edge_emu_buffer + offset;
1492     }
1493     s->hevcdsp.put_hevc_qpel[my][mx](dst, dststride, src, srcstride, block_w,
1494                                      block_h, lc->mc_buffer);
1495 }
1496
1497 /**
1498  * 8.5.3.2.2.2 Chroma sample interpolation process
1499  *
1500  * @param s HEVC decoding context
1501  * @param dst1 target buffer for block data at block position (U plane)
1502  * @param dst2 target buffer for block data at block position (V plane)
1503  * @param dststride stride of the dst1 and dst2 buffers
1504  * @param ref reference picture buffer at origin (0, 0)
1505  * @param mv motion vector (relative to block position) to get pixel data from
1506  * @param x_off horizontal position of block from origin (0, 0)
1507  * @param y_off vertical position of block from origin (0, 0)
1508  * @param block_w width of block
1509  * @param block_h height of block
1510  */
1511 static void chroma_mc(HEVCContext *s, int16_t *dst1, int16_t *dst2,
1512                       ptrdiff_t dststride, AVFrame *ref, const Mv *mv,
1513                       int x_off, int y_off, int block_w, int block_h)
1514 {
1515     HEVCLocalContext *lc = &s->HEVClc;
1516     uint8_t *src1        = ref->data[1];
1517     uint8_t *src2        = ref->data[2];
1518     ptrdiff_t src1stride = ref->linesize[1];
1519     ptrdiff_t src2stride = ref->linesize[2];
1520     int pic_width        = s->sps->width >> 1;
1521     int pic_height       = s->sps->height >> 1;
1522
1523     int mx = mv->x & 7;
1524     int my = mv->y & 7;
1525
1526     x_off += mv->x >> 3;
1527     y_off += mv->y >> 3;
1528     src1  += y_off * src1stride + (x_off << s->sps->pixel_shift);
1529     src2  += y_off * src2stride + (x_off << s->sps->pixel_shift);
1530
1531     if (x_off < EPEL_EXTRA_BEFORE || y_off < EPEL_EXTRA_AFTER ||
1532         x_off >= pic_width - block_w - EPEL_EXTRA_AFTER ||
1533         y_off >= pic_height - block_h - EPEL_EXTRA_AFTER) {
1534         int offset1 = EPEL_EXTRA_BEFORE * (src1stride + (1 << s->sps->pixel_shift));
1535         int offset2 = EPEL_EXTRA_BEFORE * (src2stride + (1 << s->sps->pixel_shift));
1536
1537         s->vdsp.emulated_edge_mc(lc->edge_emu_buffer, src1 - offset1,
1538                                  src1stride, src1stride,
1539                                  block_w + EPEL_EXTRA, block_h + EPEL_EXTRA,
1540                                  x_off - EPEL_EXTRA_BEFORE,
1541                                  y_off - EPEL_EXTRA_BEFORE,
1542                                  pic_width, pic_height);
1543
1544         src1 = lc->edge_emu_buffer + offset1;
1545         s->hevcdsp.put_hevc_epel[!!my][!!mx](dst1, dststride, src1, src1stride,
1546                                              block_w, block_h, mx, my, lc->mc_buffer);
1547
1548         s->vdsp.emulated_edge_mc(lc->edge_emu_buffer, src2 - offset2,
1549                                  src2stride, src2stride,
1550                                  block_w + EPEL_EXTRA, block_h + EPEL_EXTRA,
1551                                  x_off - EPEL_EXTRA_BEFORE,
1552                                  y_off - EPEL_EXTRA_BEFORE,
1553                                  pic_width, pic_height);
1554         src2 = lc->edge_emu_buffer + offset2;
1555         s->hevcdsp.put_hevc_epel[!!my][!!mx](dst2, dststride, src2, src2stride,
1556                                              block_w, block_h, mx, my,
1557                                              lc->mc_buffer);
1558     } else {
1559         s->hevcdsp.put_hevc_epel[!!my][!!mx](dst1, dststride, src1, src1stride,
1560                                              block_w, block_h, mx, my,
1561                                              lc->mc_buffer);
1562         s->hevcdsp.put_hevc_epel[!!my][!!mx](dst2, dststride, src2, src2stride,
1563                                              block_w, block_h, mx, my,
1564                                              lc->mc_buffer);
1565     }
1566 }
1567
1568 static void hevc_await_progress(HEVCContext *s, HEVCFrame *ref,
1569                                 const Mv *mv, int y0, int height)
1570 {
1571     int y = (mv->y >> 2) + y0 + height + 9;
1572     ff_thread_await_progress(&ref->tf, y, 0);
1573 }
1574
1575 static void hls_prediction_unit(HEVCContext *s, int x0, int y0,
1576                                 int nPbW, int nPbH,
1577                                 int log2_cb_size, int partIdx)
1578 {
1579 #define POS(c_idx, x, y)                                                              \
1580     &s->frame->data[c_idx][((y) >> s->sps->vshift[c_idx]) * s->frame->linesize[c_idx] + \
1581                            (((x) >> s->sps->hshift[c_idx]) << s->sps->pixel_shift)]
1582     HEVCLocalContext *lc = &s->HEVClc;
1583     int merge_idx = 0;
1584     struct MvField current_mv = {{{ 0 }}};
1585
1586     int min_pu_width = s->sps->min_pu_width;
1587
1588     MvField *tab_mvf = s->ref->tab_mvf;
1589     RefPicList  *refPicList = s->ref->refPicList;
1590     HEVCFrame *ref0, *ref1;
1591
1592     int tmpstride = MAX_PB_SIZE;
1593
1594     uint8_t *dst0 = POS(0, x0, y0);
1595     uint8_t *dst1 = POS(1, x0, y0);
1596     uint8_t *dst2 = POS(2, x0, y0);
1597     int log2_min_cb_size = s->sps->log2_min_cb_size;
1598     int min_cb_width     = s->sps->min_cb_width;
1599     int x_cb             = x0 >> log2_min_cb_size;
1600     int y_cb             = y0 >> log2_min_cb_size;
1601     int ref_idx[2];
1602     int mvp_flag[2];
1603     int x_pu, y_pu;
1604     int i, j;
1605
1606     if (SAMPLE_CTB(s->skip_flag, x_cb, y_cb)) {
1607         if (s->sh.max_num_merge_cand > 1)
1608             merge_idx = ff_hevc_merge_idx_decode(s);
1609         else
1610             merge_idx = 0;
1611
1612         ff_hevc_luma_mv_merge_mode(s, x0, y0,
1613                                    1 << log2_cb_size,
1614                                    1 << log2_cb_size,
1615                                    log2_cb_size, partIdx,
1616                                    merge_idx, &current_mv);
1617         x_pu = x0 >> s->sps->log2_min_pu_size;
1618         y_pu = y0 >> s->sps->log2_min_pu_size;
1619
1620         for (i = 0; i < nPbW >> s->sps->log2_min_pu_size; i++)
1621             for (j = 0; j < nPbH >> s->sps->log2_min_pu_size; j++)
1622                 tab_mvf[(y_pu + j) * min_pu_width + x_pu + i] = current_mv;
1623     } else { /* MODE_INTER */
1624         lc->pu.merge_flag = ff_hevc_merge_flag_decode(s);
1625         if (lc->pu.merge_flag) {
1626             if (s->sh.max_num_merge_cand > 1)
1627                 merge_idx = ff_hevc_merge_idx_decode(s);
1628             else
1629                 merge_idx = 0;
1630
1631             ff_hevc_luma_mv_merge_mode(s, x0, y0, nPbW, nPbH, log2_cb_size,
1632                                        partIdx, merge_idx, &current_mv);
1633             x_pu = x0 >> s->sps->log2_min_pu_size;
1634             y_pu = y0 >> s->sps->log2_min_pu_size;
1635
1636             for (i = 0; i < nPbW >> s->sps->log2_min_pu_size; i++)
1637                 for (j = 0; j < nPbH >> s->sps->log2_min_pu_size; j++)
1638                     tab_mvf[(y_pu + j) * min_pu_width + x_pu + i] = current_mv;
1639         } else {
1640             enum InterPredIdc inter_pred_idc = PRED_L0;
1641             ff_hevc_set_neighbour_available(s, x0, y0, nPbW, nPbH);
1642             if (s->sh.slice_type == B_SLICE)
1643                 inter_pred_idc = ff_hevc_inter_pred_idc_decode(s, nPbW, nPbH);
1644
1645             if (inter_pred_idc != PRED_L1) {
1646                 if (s->sh.nb_refs[L0]) {
1647                     ref_idx[0] = ff_hevc_ref_idx_lx_decode(s, s->sh.nb_refs[L0]);
1648                     current_mv.ref_idx[0] = ref_idx[0];
1649                 }
1650                 current_mv.pred_flag[0] = 1;
1651                 hls_mvd_coding(s, x0, y0, 0);
1652                 mvp_flag[0] = ff_hevc_mvp_lx_flag_decode(s);
1653                 ff_hevc_luma_mv_mvp_mode(s, x0, y0, nPbW, nPbH, log2_cb_size,
1654                                          partIdx, merge_idx, &current_mv,
1655                                          mvp_flag[0], 0);
1656                 current_mv.mv[0].x += lc->pu.mvd.x;
1657                 current_mv.mv[0].y += lc->pu.mvd.y;
1658             }
1659
1660             if (inter_pred_idc != PRED_L0) {
1661                 if (s->sh.nb_refs[L1]) {
1662                     ref_idx[1] = ff_hevc_ref_idx_lx_decode(s, s->sh.nb_refs[L1]);
1663                     current_mv.ref_idx[1] = ref_idx[1];
1664                 }
1665
1666                 if (s->sh.mvd_l1_zero_flag == 1 && inter_pred_idc == PRED_BI) {
1667                     lc->pu.mvd.x = 0;
1668                     lc->pu.mvd.y = 0;
1669                 } else {
1670                     hls_mvd_coding(s, x0, y0, 1);
1671                 }
1672
1673                 current_mv.pred_flag[1] = 1;
1674                 mvp_flag[1] = ff_hevc_mvp_lx_flag_decode(s);
1675                 ff_hevc_luma_mv_mvp_mode(s, x0, y0, nPbW, nPbH, log2_cb_size,
1676                                          partIdx, merge_idx, &current_mv,
1677                                          mvp_flag[1], 1);
1678                 current_mv.mv[1].x += lc->pu.mvd.x;
1679                 current_mv.mv[1].y += lc->pu.mvd.y;
1680             }
1681
1682             x_pu = x0 >> s->sps->log2_min_pu_size;
1683             y_pu = y0 >> s->sps->log2_min_pu_size;
1684
1685             for (i = 0; i < nPbW >> s->sps->log2_min_pu_size; i++)
1686                 for(j = 0; j < nPbH >> s->sps->log2_min_pu_size; j++)
1687                     tab_mvf[(y_pu + j) * min_pu_width + x_pu + i] = current_mv;
1688         }
1689     }
1690
1691     if (current_mv.pred_flag[0]) {
1692         ref0 = refPicList[0].ref[current_mv.ref_idx[0]];
1693         if (!ref0)
1694             return;
1695         hevc_await_progress(s, ref0, &current_mv.mv[0], y0, nPbH);
1696     }
1697     if (current_mv.pred_flag[1]) {
1698         ref1 = refPicList[1].ref[current_mv.ref_idx[1]];
1699         if (!ref1)
1700             return;
1701         hevc_await_progress(s, ref1, &current_mv.mv[1], y0, nPbH);
1702     }
1703
1704     if (current_mv.pred_flag[0] && !current_mv.pred_flag[1]) {
1705         DECLARE_ALIGNED(16, int16_t,  tmp[MAX_PB_SIZE * MAX_PB_SIZE]);
1706         DECLARE_ALIGNED(16, int16_t, tmp2[MAX_PB_SIZE * MAX_PB_SIZE]);
1707
1708         luma_mc(s, tmp, tmpstride, ref0->frame,
1709                 &current_mv.mv[0], x0, y0, nPbW, nPbH);
1710
1711         if ((s->sh.slice_type == P_SLICE && s->pps->weighted_pred_flag) ||
1712             (s->sh.slice_type == B_SLICE && s->pps->weighted_bipred_flag)) {
1713             s->hevcdsp.weighted_pred(s->sh.luma_log2_weight_denom,
1714                                      s->sh.luma_weight_l0[current_mv.ref_idx[0]],
1715                                      s->sh.luma_offset_l0[current_mv.ref_idx[0]],
1716                                      dst0, s->frame->linesize[0], tmp,
1717                                      tmpstride, nPbW, nPbH);
1718         } else {
1719             s->hevcdsp.put_unweighted_pred(dst0, s->frame->linesize[0], tmp, tmpstride, nPbW, nPbH);
1720         }
1721         chroma_mc(s, tmp, tmp2, tmpstride, ref0->frame,
1722                   &current_mv.mv[0], x0 / 2, y0 / 2, nPbW / 2, nPbH / 2);
1723
1724         if ((s->sh.slice_type == P_SLICE && s->pps->weighted_pred_flag) ||
1725             (s->sh.slice_type == B_SLICE && s->pps->weighted_bipred_flag)) {
1726             s->hevcdsp.weighted_pred(s->sh.chroma_log2_weight_denom,
1727                                      s->sh.chroma_weight_l0[current_mv.ref_idx[0]][0],
1728                                      s->sh.chroma_offset_l0[current_mv.ref_idx[0]][0],
1729                                      dst1, s->frame->linesize[1], tmp, tmpstride,
1730                                      nPbW / 2, nPbH / 2);
1731             s->hevcdsp.weighted_pred(s->sh.chroma_log2_weight_denom,
1732                                      s->sh.chroma_weight_l0[current_mv.ref_idx[0]][1],
1733                                      s->sh.chroma_offset_l0[current_mv.ref_idx[0]][1],
1734                                      dst2, s->frame->linesize[2], tmp2, tmpstride,
1735                                      nPbW / 2, nPbH / 2);
1736         } else {
1737             s->hevcdsp.put_unweighted_pred(dst1, s->frame->linesize[1], tmp, tmpstride, nPbW/2, nPbH/2);
1738             s->hevcdsp.put_unweighted_pred(dst2, s->frame->linesize[2], tmp2, tmpstride, nPbW/2, nPbH/2);
1739         }
1740     } else if (!current_mv.pred_flag[0] && current_mv.pred_flag[1]) {
1741         DECLARE_ALIGNED(16, int16_t, tmp [MAX_PB_SIZE * MAX_PB_SIZE]);
1742         DECLARE_ALIGNED(16, int16_t, tmp2[MAX_PB_SIZE * MAX_PB_SIZE]);
1743
1744         if (!ref1)
1745             return;
1746
1747         luma_mc(s, tmp, tmpstride, ref1->frame,
1748                 &current_mv.mv[1], x0, y0, nPbW, nPbH);
1749
1750         if ((s->sh.slice_type == P_SLICE && s->pps->weighted_pred_flag) ||
1751             (s->sh.slice_type == B_SLICE && s->pps->weighted_bipred_flag)) {
1752             s->hevcdsp.weighted_pred(s->sh.luma_log2_weight_denom,
1753                                       s->sh.luma_weight_l1[current_mv.ref_idx[1]],
1754                                       s->sh.luma_offset_l1[current_mv.ref_idx[1]],
1755                                       dst0, s->frame->linesize[0], tmp, tmpstride,
1756                                       nPbW, nPbH);
1757         } else {
1758             s->hevcdsp.put_unweighted_pred(dst0, s->frame->linesize[0], tmp, tmpstride, nPbW, nPbH);
1759         }
1760
1761         chroma_mc(s, tmp, tmp2, tmpstride, ref1->frame,
1762                   &current_mv.mv[1], x0/2, y0/2, nPbW/2, nPbH/2);
1763
1764         if ((s->sh.slice_type == P_SLICE && s->pps->weighted_pred_flag) ||
1765             (s->sh.slice_type == B_SLICE && s->pps->weighted_bipred_flag)) {
1766             s->hevcdsp.weighted_pred(s->sh.chroma_log2_weight_denom,
1767                                      s->sh.chroma_weight_l1[current_mv.ref_idx[1]][0],
1768                                      s->sh.chroma_offset_l1[current_mv.ref_idx[1]][0],
1769                                      dst1, s->frame->linesize[1], tmp, tmpstride, nPbW/2, nPbH/2);
1770             s->hevcdsp.weighted_pred(s->sh.chroma_log2_weight_denom,
1771                                      s->sh.chroma_weight_l1[current_mv.ref_idx[1]][1],
1772                                      s->sh.chroma_offset_l1[current_mv.ref_idx[1]][1],
1773                                      dst2, s->frame->linesize[2], tmp2, tmpstride, nPbW/2, nPbH/2);
1774         } else {
1775             s->hevcdsp.put_unweighted_pred(dst1, s->frame->linesize[1], tmp, tmpstride, nPbW/2, nPbH/2);
1776             s->hevcdsp.put_unweighted_pred(dst2, s->frame->linesize[2], tmp2, tmpstride, nPbW/2, nPbH/2);
1777         }
1778     } else if (current_mv.pred_flag[0] && current_mv.pred_flag[1]) {
1779         DECLARE_ALIGNED(16, int16_t, tmp [MAX_PB_SIZE * MAX_PB_SIZE]);
1780         DECLARE_ALIGNED(16, int16_t, tmp2[MAX_PB_SIZE * MAX_PB_SIZE]);
1781         DECLARE_ALIGNED(16, int16_t, tmp3[MAX_PB_SIZE * MAX_PB_SIZE]);
1782         DECLARE_ALIGNED(16, int16_t, tmp4[MAX_PB_SIZE * MAX_PB_SIZE]);
1783         HEVCFrame *ref0 = refPicList[0].ref[current_mv.ref_idx[0]];
1784         HEVCFrame *ref1 = refPicList[1].ref[current_mv.ref_idx[1]];
1785
1786         if (!ref0 || !ref1)
1787             return;
1788
1789         luma_mc(s, tmp, tmpstride, ref0->frame,
1790                 &current_mv.mv[0], x0, y0, nPbW, nPbH);
1791         luma_mc(s, tmp2, tmpstride, ref1->frame,
1792                 &current_mv.mv[1], x0, y0, nPbW, nPbH);
1793
1794         if ((s->sh.slice_type == P_SLICE && s->pps->weighted_pred_flag) ||
1795             (s->sh.slice_type == B_SLICE && s->pps->weighted_bipred_flag)) {
1796             s->hevcdsp.weighted_pred_avg(s->sh.luma_log2_weight_denom,
1797                                          s->sh.luma_weight_l0[current_mv.ref_idx[0]],
1798                                          s->sh.luma_weight_l1[current_mv.ref_idx[1]],
1799                                          s->sh.luma_offset_l0[current_mv.ref_idx[0]],
1800                                          s->sh.luma_offset_l1[current_mv.ref_idx[1]],
1801                                          dst0, s->frame->linesize[0],
1802                                          tmp, tmp2, tmpstride, nPbW, nPbH);
1803         } else {
1804             s->hevcdsp.put_weighted_pred_avg(dst0, s->frame->linesize[0],
1805                                              tmp, tmp2, tmpstride, nPbW, nPbH);
1806         }
1807
1808         chroma_mc(s, tmp, tmp2, tmpstride, ref0->frame,
1809                   &current_mv.mv[0], x0 / 2, y0 / 2, nPbW / 2, nPbH / 2);
1810         chroma_mc(s, tmp3, tmp4, tmpstride, ref1->frame,
1811                   &current_mv.mv[1], x0 / 2, y0 / 2, nPbW / 2, nPbH / 2);
1812
1813         if ((s->sh.slice_type == P_SLICE && s->pps->weighted_pred_flag) ||
1814             (s->sh.slice_type == B_SLICE && s->pps->weighted_bipred_flag)) {
1815             s->hevcdsp.weighted_pred_avg(s->sh.chroma_log2_weight_denom,
1816                                          s->sh.chroma_weight_l0[current_mv.ref_idx[0]][0],
1817                                          s->sh.chroma_weight_l1[current_mv.ref_idx[1]][0],
1818                                          s->sh.chroma_offset_l0[current_mv.ref_idx[0]][0],
1819                                          s->sh.chroma_offset_l1[current_mv.ref_idx[1]][0],
1820                                          dst1, s->frame->linesize[1], tmp, tmp3,
1821                                          tmpstride, nPbW / 2, nPbH / 2);
1822             s->hevcdsp.weighted_pred_avg(s->sh.chroma_log2_weight_denom,
1823                                          s->sh.chroma_weight_l0[current_mv.ref_idx[0]][1],
1824                                          s->sh.chroma_weight_l1[current_mv.ref_idx[1]][1],
1825                                          s->sh.chroma_offset_l0[current_mv.ref_idx[0]][1],
1826                                          s->sh.chroma_offset_l1[current_mv.ref_idx[1]][1],
1827                                          dst2, s->frame->linesize[2], tmp2, tmp4,
1828                                          tmpstride, nPbW / 2, nPbH / 2);
1829         } else {
1830             s->hevcdsp.put_weighted_pred_avg(dst1, s->frame->linesize[1], tmp, tmp3, tmpstride, nPbW/2, nPbH/2);
1831             s->hevcdsp.put_weighted_pred_avg(dst2, s->frame->linesize[2], tmp2, tmp4, tmpstride, nPbW/2, nPbH/2);
1832         }
1833     }
1834 }
1835
1836 /**
1837  * 8.4.1
1838  */
1839 static int luma_intra_pred_mode(HEVCContext *s, int x0, int y0, int pu_size,
1840                                 int prev_intra_luma_pred_flag)
1841 {
1842     HEVCLocalContext *lc = &s->HEVClc;
1843     int x_pu             = x0 >> s->sps->log2_min_pu_size;
1844     int y_pu             = y0 >> s->sps->log2_min_pu_size;
1845     int min_pu_width     = s->sps->min_pu_width;
1846     int size_in_pus      = pu_size >> s->sps->log2_min_pu_size;
1847     int x0b              = x0 & ((1 << s->sps->log2_ctb_size) - 1);
1848     int y0b              = y0 & ((1 << s->sps->log2_ctb_size) - 1);
1849
1850     int cand_up   = (lc->ctb_up_flag || y0b) ?
1851                     s->tab_ipm[(y_pu - 1) * min_pu_width + x_pu] : INTRA_DC;
1852     int cand_left = (lc->ctb_left_flag || x0b) ?
1853                     s->tab_ipm[y_pu * min_pu_width + x_pu - 1]   : INTRA_DC;
1854
1855     int y_ctb = (y0 >> (s->sps->log2_ctb_size)) << (s->sps->log2_ctb_size);
1856
1857     MvField *tab_mvf = s->ref->tab_mvf;
1858     int intra_pred_mode;
1859     int candidate[3];
1860     int i, j;
1861
1862     // intra_pred_mode prediction does not cross vertical CTB boundaries
1863     if ((y0 - 1) < y_ctb)
1864         cand_up = INTRA_DC;
1865
1866     if (cand_left == cand_up) {
1867         if (cand_left < 2) {
1868             candidate[0] = INTRA_PLANAR;
1869             candidate[1] = INTRA_DC;
1870             candidate[2] = INTRA_ANGULAR_26;
1871         } else {
1872             candidate[0] = cand_left;
1873             candidate[1] = 2 + ((cand_left - 2 - 1 + 32) & 31);
1874             candidate[2] = 2 + ((cand_left - 2 + 1) & 31);
1875         }
1876     } else {
1877         candidate[0] = cand_left;
1878         candidate[1] = cand_up;
1879         if (candidate[0] != INTRA_PLANAR && candidate[1] != INTRA_PLANAR) {
1880             candidate[2] = INTRA_PLANAR;
1881         } else if (candidate[0] != INTRA_DC && candidate[1] != INTRA_DC) {
1882             candidate[2] = INTRA_DC;
1883         } else {
1884             candidate[2] = INTRA_ANGULAR_26;
1885         }
1886     }
1887
1888     if (prev_intra_luma_pred_flag) {
1889         intra_pred_mode = candidate[lc->pu.mpm_idx];
1890     } else {
1891         if (candidate[0] > candidate[1])
1892             FFSWAP(uint8_t, candidate[0], candidate[1]);
1893         if (candidate[0] > candidate[2])
1894             FFSWAP(uint8_t, candidate[0], candidate[2]);
1895         if (candidate[1] > candidate[2])
1896             FFSWAP(uint8_t, candidate[1], candidate[2]);
1897
1898         intra_pred_mode = lc->pu.rem_intra_luma_pred_mode;
1899         for (i = 0; i < 3; i++)
1900             if (intra_pred_mode >= candidate[i])
1901                 intra_pred_mode++;
1902     }
1903
1904     /* write the intra prediction units into the mv array */
1905     if (!size_in_pus)
1906         size_in_pus = 1;
1907     for (i = 0; i < size_in_pus; i++) {
1908         memset(&s->tab_ipm[(y_pu + i) * min_pu_width + x_pu],
1909                intra_pred_mode, size_in_pus);
1910
1911         for (j = 0; j < size_in_pus; j++) {
1912             tab_mvf[(y_pu + j) * min_pu_width + x_pu + i].is_intra     = 1;
1913             tab_mvf[(y_pu + j) * min_pu_width + x_pu + i].pred_flag[0] = 0;
1914             tab_mvf[(y_pu + j) * min_pu_width + x_pu + i].pred_flag[1] = 0;
1915             tab_mvf[(y_pu + j) * min_pu_width + x_pu + i].ref_idx[0]   = 0;
1916             tab_mvf[(y_pu + j) * min_pu_width + x_pu + i].ref_idx[1]   = 0;
1917             tab_mvf[(y_pu + j) * min_pu_width + x_pu + i].mv[0].x      = 0;
1918             tab_mvf[(y_pu + j) * min_pu_width + x_pu + i].mv[0].y      = 0;
1919             tab_mvf[(y_pu + j) * min_pu_width + x_pu + i].mv[1].x      = 0;
1920             tab_mvf[(y_pu + j) * min_pu_width + x_pu + i].mv[1].y      = 0;
1921         }
1922     }
1923
1924     return intra_pred_mode;
1925 }
1926
1927 static av_always_inline void set_ct_depth(HEVCContext *s, int x0, int y0,
1928                                           int log2_cb_size, int ct_depth)
1929 {
1930     int length = (1 << log2_cb_size) >> s->sps->log2_min_cb_size;
1931     int x_cb   = x0 >> s->sps->log2_min_cb_size;
1932     int y_cb   = y0 >> s->sps->log2_min_cb_size;
1933     int y;
1934
1935     for (y = 0; y < length; y++)
1936         memset(&s->tab_ct_depth[(y_cb + y) * s->sps->min_cb_width + x_cb],
1937                ct_depth, length);
1938 }
1939
1940 static void intra_prediction_unit(HEVCContext *s, int x0, int y0,
1941                                   int log2_cb_size)
1942 {
1943     HEVCLocalContext *lc = &s->HEVClc;
1944     static const uint8_t intra_chroma_table[4] = { 0, 26, 10, 1 };
1945     uint8_t prev_intra_luma_pred_flag[4];
1946     int split   = lc->cu.part_mode == PART_NxN;
1947     int pb_size = (1 << log2_cb_size) >> split;
1948     int side    = split + 1;
1949     int chroma_mode;
1950     int i, j;
1951
1952     for (i = 0; i < side; i++)
1953         for (j = 0; j < side; j++)
1954             prev_intra_luma_pred_flag[2 * i + j] = ff_hevc_prev_intra_luma_pred_flag_decode(s);
1955
1956     for (i = 0; i < side; i++) {
1957         for (j = 0; j < side; j++) {
1958             if (prev_intra_luma_pred_flag[2 * i + j])
1959                 lc->pu.mpm_idx = ff_hevc_mpm_idx_decode(s);
1960             else
1961                 lc->pu.rem_intra_luma_pred_mode = ff_hevc_rem_intra_luma_pred_mode_decode(s);
1962
1963             lc->pu.intra_pred_mode[2 * i + j] =
1964                 luma_intra_pred_mode(s, x0 + pb_size * j, y0 + pb_size * i, pb_size,
1965                                      prev_intra_luma_pred_flag[2 * i + j]);
1966         }
1967     }
1968
1969     chroma_mode = ff_hevc_intra_chroma_pred_mode_decode(s);
1970     if (chroma_mode != 4) {
1971         if (lc->pu.intra_pred_mode[0] == intra_chroma_table[chroma_mode])
1972             lc->pu.intra_pred_mode_c = 34;
1973         else
1974             lc->pu.intra_pred_mode_c = intra_chroma_table[chroma_mode];
1975     } else {
1976         lc->pu.intra_pred_mode_c = lc->pu.intra_pred_mode[0];
1977     }
1978 }
1979
1980 static void intra_prediction_unit_default_value(HEVCContext *s,
1981                                                 int x0, int y0,
1982                                                 int log2_cb_size)
1983 {
1984     HEVCLocalContext *lc = &s->HEVClc;
1985     int pb_size          = 1 << log2_cb_size;
1986     int size_in_pus      = pb_size >> s->sps->log2_min_pu_size;
1987     int min_pu_width     = s->sps->min_pu_width;
1988     MvField *tab_mvf     = s->ref->tab_mvf;
1989     int x_pu             = x0 >> s->sps->log2_min_pu_size;
1990     int y_pu             = y0 >> s->sps->log2_min_pu_size;
1991     int j, k;
1992
1993     if (size_in_pus == 0)
1994         size_in_pus = 1;
1995     for (j = 0; j < size_in_pus; j++) {
1996         memset(&s->tab_ipm[(y_pu + j) * min_pu_width + x_pu], INTRA_DC, size_in_pus);
1997         for (k = 0; k < size_in_pus; k++)
1998             tab_mvf[(y_pu + j) * min_pu_width + x_pu + k].is_intra = lc->cu.pred_mode == MODE_INTRA;
1999     }
2000 }
2001
2002 static int hls_coding_unit(HEVCContext *s, int x0, int y0, int log2_cb_size)
2003 {
2004     int cb_size          = 1 << log2_cb_size;
2005     HEVCLocalContext *lc = &s->HEVClc;
2006     int log2_min_cb_size = s->sps->log2_min_cb_size;
2007     int length           = cb_size >> log2_min_cb_size;
2008     int min_cb_width     = s->sps->min_cb_width;
2009     int x_cb             = x0 >> log2_min_cb_size;
2010     int y_cb             = y0 >> log2_min_cb_size;
2011     int x, y;
2012
2013     lc->cu.x                = x0;
2014     lc->cu.y                = y0;
2015     lc->cu.rqt_root_cbf     = 1;
2016     lc->cu.pred_mode        = MODE_INTRA;
2017     lc->cu.part_mode        = PART_2Nx2N;
2018     lc->cu.intra_split_flag = 0;
2019     lc->cu.pcm_flag         = 0;
2020
2021     SAMPLE_CTB(s->skip_flag, x_cb, y_cb) = 0;
2022     for (x = 0; x < 4; x++)
2023         lc->pu.intra_pred_mode[x] = 1;
2024     if (s->pps->transquant_bypass_enable_flag) {
2025         lc->cu.cu_transquant_bypass_flag = ff_hevc_cu_transquant_bypass_flag_decode(s);
2026         if (lc->cu.cu_transquant_bypass_flag)
2027             set_deblocking_bypass(s, x0, y0, log2_cb_size);
2028     } else
2029         lc->cu.cu_transquant_bypass_flag = 0;
2030
2031     if (s->sh.slice_type != I_SLICE) {
2032         uint8_t skip_flag = ff_hevc_skip_flag_decode(s, x0, y0, x_cb, y_cb);
2033
2034         lc->cu.pred_mode = MODE_SKIP;
2035         x = y_cb * min_cb_width + x_cb;
2036         for (y = 0; y < length; y++) {
2037             memset(&s->skip_flag[x], skip_flag, length);
2038             x += min_cb_width;
2039         }
2040         lc->cu.pred_mode = skip_flag ? MODE_SKIP : MODE_INTER;
2041     }
2042
2043     if (SAMPLE_CTB(s->skip_flag, x_cb, y_cb)) {
2044         hls_prediction_unit(s, x0, y0, cb_size, cb_size, log2_cb_size, 0);
2045         intra_prediction_unit_default_value(s, x0, y0, log2_cb_size);
2046
2047         if (!s->sh.disable_deblocking_filter_flag)
2048             ff_hevc_deblocking_boundary_strengths(s, x0, y0, log2_cb_size,
2049                                                   lc->slice_or_tiles_up_boundary,
2050                                                   lc->slice_or_tiles_left_boundary);
2051     } else {
2052         if (s->sh.slice_type != I_SLICE)
2053             lc->cu.pred_mode = ff_hevc_pred_mode_decode(s);
2054         if (lc->cu.pred_mode != MODE_INTRA ||
2055             log2_cb_size == s->sps->log2_min_cb_size) {
2056             lc->cu.part_mode        = ff_hevc_part_mode_decode(s, log2_cb_size);
2057             lc->cu.intra_split_flag = lc->cu.part_mode == PART_NxN &&
2058                                       lc->cu.pred_mode == MODE_INTRA;
2059         }
2060
2061         if (lc->cu.pred_mode == MODE_INTRA) {
2062             if (lc->cu.part_mode == PART_2Nx2N && s->sps->pcm_enabled_flag &&
2063                 log2_cb_size >= s->sps->pcm.log2_min_pcm_cb_size &&
2064                 log2_cb_size <= s->sps->pcm.log2_max_pcm_cb_size) {
2065                 lc->cu.pcm_flag = ff_hevc_pcm_flag_decode(s);
2066             }
2067             if (lc->cu.pcm_flag) {
2068                 int ret;
2069                 intra_prediction_unit_default_value(s, x0, y0, log2_cb_size);
2070                 ret = hls_pcm_sample(s, x0, y0, log2_cb_size);
2071                 if (s->sps->pcm.loop_filter_disable_flag)
2072                     set_deblocking_bypass(s, x0, y0, log2_cb_size);
2073
2074                 if (ret < 0)
2075                     return ret;
2076             } else {
2077                 intra_prediction_unit(s, x0, y0, log2_cb_size);
2078             }
2079         } else {
2080             intra_prediction_unit_default_value(s, x0, y0, log2_cb_size);
2081             switch (lc->cu.part_mode) {
2082             case PART_2Nx2N:
2083                 hls_prediction_unit(s, x0, y0, cb_size, cb_size, log2_cb_size, 0);
2084                 break;
2085             case PART_2NxN:
2086                 hls_prediction_unit(s, x0, y0,               cb_size, cb_size / 2, log2_cb_size, 0);
2087                 hls_prediction_unit(s, x0, y0 + cb_size / 2, cb_size, cb_size / 2, log2_cb_size, 1);
2088                 break;
2089             case PART_Nx2N:
2090                 hls_prediction_unit(s, x0,               y0, cb_size / 2, cb_size, log2_cb_size, 0);
2091                 hls_prediction_unit(s, x0 + cb_size / 2, y0, cb_size / 2, cb_size, log2_cb_size, 1);
2092                 break;
2093             case PART_2NxnU:
2094                 hls_prediction_unit(s, x0, y0,               cb_size, cb_size     / 4, log2_cb_size, 0);
2095                 hls_prediction_unit(s, x0, y0 + cb_size / 4, cb_size, cb_size * 3 / 4, log2_cb_size, 1);
2096                 break;
2097             case PART_2NxnD:
2098                 hls_prediction_unit(s, x0, y0,                   cb_size, cb_size * 3 / 4, log2_cb_size, 0);
2099                 hls_prediction_unit(s, x0, y0 + cb_size * 3 / 4, cb_size, cb_size     / 4, log2_cb_size, 1);
2100                 break;
2101             case PART_nLx2N:
2102                 hls_prediction_unit(s, x0,               y0, cb_size     / 4, cb_size, log2_cb_size, 0);
2103                 hls_prediction_unit(s, x0 + cb_size / 4, y0, cb_size * 3 / 4, cb_size, log2_cb_size, 1);
2104                 break;
2105             case PART_nRx2N:
2106                 hls_prediction_unit(s, x0,                   y0, cb_size * 3 / 4, cb_size, log2_cb_size, 0);
2107                 hls_prediction_unit(s, x0 + cb_size * 3 / 4, y0, cb_size     / 4, cb_size, log2_cb_size, 1);
2108                 break;
2109             case PART_NxN:
2110                 hls_prediction_unit(s, x0,               y0,               cb_size / 2, cb_size / 2, log2_cb_size, 0);
2111                 hls_prediction_unit(s, x0 + cb_size / 2, y0,               cb_size / 2, cb_size / 2, log2_cb_size, 1);
2112                 hls_prediction_unit(s, x0,               y0 + cb_size / 2, cb_size / 2, cb_size / 2, log2_cb_size, 2);
2113                 hls_prediction_unit(s, x0 + cb_size / 2, y0 + cb_size / 2, cb_size / 2, cb_size / 2, log2_cb_size, 3);
2114                 break;
2115             }
2116         }
2117
2118         if (!lc->cu.pcm_flag) {
2119             if (lc->cu.pred_mode != MODE_INTRA &&
2120                 !(lc->cu.part_mode == PART_2Nx2N && lc->pu.merge_flag)) {
2121                 lc->cu.rqt_root_cbf = ff_hevc_no_residual_syntax_flag_decode(s);
2122             }
2123             if (lc->cu.rqt_root_cbf) {
2124                 lc->cu.max_trafo_depth = lc->cu.pred_mode == MODE_INTRA ?
2125                                          s->sps->max_transform_hierarchy_depth_intra + lc->cu.intra_split_flag :
2126                                          s->sps->max_transform_hierarchy_depth_inter;
2127                 hls_transform_tree(s, x0, y0, x0, y0, x0, y0, log2_cb_size,
2128                                    log2_cb_size, 0, 0);
2129             } else {
2130                 if (!s->sh.disable_deblocking_filter_flag)
2131                     ff_hevc_deblocking_boundary_strengths(s, x0, y0, log2_cb_size,
2132                                                           lc->slice_or_tiles_up_boundary,
2133                                                           lc->slice_or_tiles_left_boundary);
2134             }
2135         }
2136     }
2137
2138     if (s->pps->cu_qp_delta_enabled_flag && lc->tu.is_cu_qp_delta_coded == 0)
2139         ff_hevc_set_qPy(s, x0, y0, x0, y0, log2_cb_size);
2140
2141     x = y_cb * min_cb_width + x_cb;
2142     for (y = 0; y < length; y++) {
2143         memset(&s->qp_y_tab[x], lc->qp_y, length);
2144         x += min_cb_width;
2145     }
2146
2147     set_ct_depth(s, x0, y0, log2_cb_size, lc->ct.depth);
2148
2149     return 0;
2150 }
2151
2152 static int hls_coding_quadtree(HEVCContext *s, int x0, int y0,
2153                                int log2_cb_size, int cb_depth)
2154 {
2155     HEVCLocalContext *lc = &s->HEVClc;
2156     const int cb_size    = 1 << log2_cb_size;
2157
2158     lc->ct.depth = cb_depth;
2159     if (x0 + cb_size <= s->sps->width  &&
2160         y0 + cb_size <= s->sps->height &&
2161         log2_cb_size > s->sps->log2_min_cb_size) {
2162         SAMPLE(s->split_cu_flag, x0, y0) =
2163             ff_hevc_split_coding_unit_flag_decode(s, cb_depth, x0, y0);
2164     } else {
2165         SAMPLE(s->split_cu_flag, x0, y0) =
2166             (log2_cb_size > s->sps->log2_min_cb_size);
2167     }
2168     if (s->pps->cu_qp_delta_enabled_flag &&
2169         log2_cb_size >= s->sps->log2_ctb_size - s->pps->diff_cu_qp_delta_depth) {
2170         lc->tu.is_cu_qp_delta_coded = 0;
2171         lc->tu.cu_qp_delta          = 0;
2172     }
2173
2174     if (SAMPLE(s->split_cu_flag, x0, y0)) {
2175         const int cb_size_split = cb_size >> 1;
2176         const int x1 = x0 + cb_size_split;
2177         const int y1 = y0 + cb_size_split;
2178
2179         log2_cb_size--;
2180         cb_depth++;
2181
2182 #define SUBDIVIDE(x, y)                                                \
2183 do {                                                                   \
2184     if (x < s->sps->width && y < s->sps->height) {                     \
2185         int ret = hls_coding_quadtree(s, x, y, log2_cb_size, cb_depth);\
2186         if (ret < 0)                                                   \
2187             return ret;                                                \
2188     }                                                                  \
2189 } while (0)
2190
2191         SUBDIVIDE(x0, y0);
2192         SUBDIVIDE(x1, y0);
2193         SUBDIVIDE(x0, y1);
2194         SUBDIVIDE(x1, y1);
2195     } else {
2196         int ret = hls_coding_unit(s, x0, y0, log2_cb_size);
2197         if (ret < 0)
2198             return ret;
2199     }
2200
2201     return 0;
2202 }
2203
2204 static void hls_decode_neighbour(HEVCContext *s, int x_ctb, int y_ctb,
2205                                  int ctb_addr_ts)
2206 {
2207     HEVCLocalContext *lc  = &s->HEVClc;
2208     int ctb_size          = 1 << s->sps->log2_ctb_size;
2209     int ctb_addr_rs       = s->pps->ctb_addr_ts_to_rs[ctb_addr_ts];
2210     int ctb_addr_in_slice = ctb_addr_rs - s->sh.slice_addr;
2211
2212     int tile_left_boundary, tile_up_boundary;
2213     int slice_left_boundary, slice_up_boundary;
2214
2215     s->tab_slice_address[ctb_addr_rs] = s->sh.slice_addr;
2216
2217     if (s->pps->entropy_coding_sync_enabled_flag) {
2218         if (x_ctb == 0 && (y_ctb & (ctb_size - 1)) == 0)
2219             lc->first_qp_group = 1;
2220         lc->end_of_tiles_x = s->sps->width;
2221     } else if (s->pps->tiles_enabled_flag) {
2222         if (ctb_addr_ts && s->pps->tile_id[ctb_addr_ts] != s->pps->tile_id[ctb_addr_ts - 1]) {
2223             int idxX = s->pps->col_idxX[x_ctb >> s->sps->log2_ctb_size];
2224             lc->start_of_tiles_x = x_ctb;
2225             lc->end_of_tiles_x   = x_ctb + (s->pps->column_width[idxX] << s->sps->log2_ctb_size);
2226             lc->first_qp_group   = 1;
2227         }
2228     } else {
2229         lc->end_of_tiles_x = s->sps->width;
2230     }
2231
2232     lc->end_of_tiles_y = FFMIN(y_ctb + ctb_size, s->sps->height);
2233
2234     if (s->pps->tiles_enabled_flag) {
2235         tile_left_boundary  = x_ctb > 0 &&
2236                               s->pps->tile_id[ctb_addr_ts] == s->pps->tile_id[s->pps->ctb_addr_rs_to_ts[ctb_addr_rs - 1]];
2237         slice_left_boundary = x_ctb > 0 &&
2238                               s->tab_slice_address[ctb_addr_rs] == s->tab_slice_address[ctb_addr_rs - 1];
2239         tile_up_boundary  = y_ctb > 0 &&
2240                             s->pps->tile_id[ctb_addr_ts] == s->pps->tile_id[s->pps->ctb_addr_rs_to_ts[ctb_addr_rs - s->sps->ctb_width]];
2241         slice_up_boundary = y_ctb > 0 &&
2242                             s->tab_slice_address[ctb_addr_rs] == s->tab_slice_address[ctb_addr_rs - s->sps->ctb_width];
2243     } else {
2244         tile_left_boundary  =
2245         tile_up_boundary    = 1;
2246         slice_left_boundary = ctb_addr_in_slice > 0;
2247         slice_up_boundary   = ctb_addr_in_slice >= s->sps->ctb_width;
2248     }
2249     lc->slice_or_tiles_left_boundary = (!slice_left_boundary) + (!tile_left_boundary << 1);
2250     lc->slice_or_tiles_up_boundary   = (!slice_up_boundary + (!tile_up_boundary << 1));
2251     lc->ctb_left_flag = ((x_ctb > 0) && (ctb_addr_in_slice > 0) && tile_left_boundary);
2252     lc->ctb_up_flag   = ((y_ctb > 0) && (ctb_addr_in_slice >= s->sps->ctb_width) && tile_up_boundary);
2253     lc->ctb_up_right_flag = ((y_ctb > 0)  && (ctb_addr_in_slice+1 >= s->sps->ctb_width) && (s->pps->tile_id[ctb_addr_ts] == s->pps->tile_id[s->pps->ctb_addr_rs_to_ts[ctb_addr_rs+1 - s->sps->ctb_width]]));
2254     lc->ctb_up_left_flag = ((x_ctb > 0) && (y_ctb > 0)  && (ctb_addr_in_slice-1 >= s->sps->ctb_width) && (s->pps->tile_id[ctb_addr_ts] == s->pps->tile_id[s->pps->ctb_addr_rs_to_ts[ctb_addr_rs-1 - s->sps->ctb_width]]));
2255 }
2256
2257 static int hls_slice_data(HEVCContext *s)
2258 {
2259     int ctb_size    = 1 << s->sps->log2_ctb_size;
2260     int more_data   = 1;
2261     int x_ctb       = 0;
2262     int y_ctb       = 0;
2263     int ctb_addr_ts = s->pps->ctb_addr_rs_to_ts[s->sh.slice_ctb_addr_rs];
2264     int ret;
2265
2266     while (more_data && ctb_addr_ts < s->sps->ctb_size) {
2267         int ctb_addr_rs = s->pps->ctb_addr_ts_to_rs[ctb_addr_ts];
2268
2269         x_ctb = (ctb_addr_rs % ((s->sps->width + ctb_size - 1) >> s->sps->log2_ctb_size)) << s->sps->log2_ctb_size;
2270         y_ctb = (ctb_addr_rs / ((s->sps->width + ctb_size - 1) >> s->sps->log2_ctb_size)) << s->sps->log2_ctb_size;
2271         hls_decode_neighbour(s, x_ctb, y_ctb, ctb_addr_ts);
2272
2273         ff_hevc_cabac_init(s, ctb_addr_ts);
2274
2275         hls_sao_param(s, x_ctb >> s->sps->log2_ctb_size, y_ctb >> s->sps->log2_ctb_size);
2276
2277         s->deblock[ctb_addr_rs].beta_offset = s->sh.beta_offset;
2278         s->deblock[ctb_addr_rs].tc_offset   = s->sh.tc_offset;
2279         s->filter_slice_edges[ctb_addr_rs]  = s->sh.slice_loop_filter_across_slices_enabled_flag;
2280
2281         ret = hls_coding_quadtree(s, x_ctb, y_ctb, s->sps->log2_ctb_size, 0);
2282         if (ret < 0)
2283             return ret;
2284         more_data = !ff_hevc_end_of_slice_flag_decode(s);
2285
2286         ctb_addr_ts++;
2287         ff_hevc_save_states(s, ctb_addr_ts);
2288         ff_hevc_hls_filters(s, x_ctb, y_ctb, ctb_size);
2289     }
2290
2291     if (x_ctb + ctb_size >= s->sps->width &&
2292         y_ctb + ctb_size >= s->sps->height)
2293         ff_hevc_hls_filter(s, x_ctb, y_ctb);
2294
2295     return ctb_addr_ts;
2296 }
2297
2298 /**
2299  * @return AVERROR_INVALIDDATA if the packet is not a valid NAL unit,
2300  * 0 if the unit should be skipped, 1 otherwise
2301  */
2302 static int hls_nal_unit(HEVCContext *s)
2303 {
2304     GetBitContext *gb = &s->HEVClc.gb;
2305     int nuh_layer_id;
2306
2307     if (get_bits1(gb) != 0)
2308         return AVERROR_INVALIDDATA;
2309
2310     s->nal_unit_type = get_bits(gb, 6);
2311
2312     nuh_layer_id   = get_bits(gb, 6);
2313     s->temporal_id = get_bits(gb, 3) - 1;
2314     if (s->temporal_id < 0)
2315         return AVERROR_INVALIDDATA;
2316
2317     av_log(s->avctx, AV_LOG_DEBUG,
2318            "nal_unit_type: %d, nuh_layer_id: %dtemporal_id: %d\n",
2319            s->nal_unit_type, nuh_layer_id, s->temporal_id);
2320
2321     return nuh_layer_id == 0;
2322 }
2323
2324 static void restore_tqb_pixels(HEVCContext *s)
2325 {
2326     int min_pu_size = 1 << s->sps->log2_min_pu_size;
2327     int x, y, c_idx;
2328
2329     for (c_idx = 0; c_idx < 3; c_idx++) {
2330         ptrdiff_t stride = s->frame->linesize[c_idx];
2331         int hshift       = s->sps->hshift[c_idx];
2332         int vshift       = s->sps->vshift[c_idx];
2333         for (y = 0; y < s->sps->min_pu_height; y++) {
2334             for (x = 0; x < s->sps->min_pu_width; x++) {
2335                 if (s->is_pcm[y * s->sps->min_pu_width + x]) {
2336                     int n;
2337                     int len      = min_pu_size >> hshift;
2338                     uint8_t *src = &s->frame->data[c_idx][((y << s->sps->log2_min_pu_size) >> vshift) * stride + (((x << s->sps->log2_min_pu_size) >> hshift) << s->sps->pixel_shift)];
2339                     uint8_t *dst = &s->sao_frame->data[c_idx][((y << s->sps->log2_min_pu_size) >> vshift) * stride + (((x << s->sps->log2_min_pu_size) >> hshift) << s->sps->pixel_shift)];
2340                     for (n = 0; n < (min_pu_size >> vshift); n++) {
2341                         memcpy(dst, src, len);
2342                         src += stride;
2343                         dst += stride;
2344                     }
2345                 }
2346             }
2347         }
2348     }
2349 }
2350
2351 static int set_side_data(HEVCContext *s)
2352 {
2353     AVFrame *out = s->ref->frame;
2354
2355     if (s->sei_frame_packing_present &&
2356         s->frame_packing_arrangement_type >= 3 &&
2357         s->frame_packing_arrangement_type <= 5 &&
2358         s->content_interpretation_type > 0 &&
2359         s->content_interpretation_type < 3) {
2360         AVStereo3D *stereo = av_stereo3d_create_side_data(out);
2361         if (!stereo)
2362             return AVERROR(ENOMEM);
2363
2364         switch (s->frame_packing_arrangement_type) {
2365         case 3:
2366             if (s->quincunx_subsampling)
2367                 stereo->type = AV_STEREO3D_SIDEBYSIDE_QUINCUNX;
2368             else
2369                 stereo->type = AV_STEREO3D_SIDEBYSIDE;
2370             break;
2371         case 4:
2372             stereo->type = AV_STEREO3D_TOPBOTTOM;
2373             break;
2374         case 5:
2375             stereo->type = AV_STEREO3D_FRAMESEQUENCE;
2376             break;
2377         }
2378
2379         if (s->content_interpretation_type == 2)
2380             stereo->flags = AV_STEREO3D_FLAG_INVERT;
2381     }
2382
2383     return 0;
2384 }
2385
2386 static int hevc_frame_start(HEVCContext *s)
2387 {
2388     HEVCLocalContext *lc = &s->HEVClc;
2389     int ret;
2390
2391     memset(s->horizontal_bs, 0, 2 * s->bs_width * (s->bs_height + 1));
2392     memset(s->vertical_bs,   0, 2 * s->bs_width * (s->bs_height + 1));
2393     memset(s->cbf_luma,      0, s->sps->min_tb_width * s->sps->min_tb_height);
2394     memset(s->is_pcm,        0, s->sps->min_pu_width * s->sps->min_pu_height);
2395
2396     lc->start_of_tiles_x = 0;
2397     s->is_decoded        = 0;
2398
2399     if (s->pps->tiles_enabled_flag)
2400         lc->end_of_tiles_x = s->pps->column_width[0] << s->sps->log2_ctb_size;
2401
2402     ret = ff_hevc_set_new_ref(s, s->sps->sao_enabled ? &s->sao_frame : &s->frame,
2403                               s->poc);
2404     if (ret < 0)
2405         goto fail;
2406
2407     av_fast_malloc(&lc->edge_emu_buffer, &lc->edge_emu_buffer_size,
2408                    (MAX_PB_SIZE + 7) * s->ref->frame->linesize[0]);
2409     if (!lc->edge_emu_buffer) {
2410         ret = AVERROR(ENOMEM);
2411         goto fail;
2412     }
2413
2414     ret = ff_hevc_frame_rps(s);
2415     if (ret < 0) {
2416         av_log(s->avctx, AV_LOG_ERROR, "Error constructing the frame RPS.\n");
2417         goto fail;
2418     }
2419
2420     ret = set_side_data(s);
2421     if (ret < 0)
2422         goto fail;
2423
2424     av_frame_unref(s->output_frame);
2425     ret = ff_hevc_output_frame(s, s->output_frame, 0);
2426     if (ret < 0)
2427         goto fail;
2428
2429     ff_thread_finish_setup(s->avctx);
2430
2431     return 0;
2432
2433 fail:
2434     if (s->ref)
2435         ff_thread_report_progress(&s->ref->tf, INT_MAX, 0);
2436     s->ref = NULL;
2437     return ret;
2438 }
2439
2440 static int decode_nal_unit(HEVCContext *s, const uint8_t *nal, int length)
2441 {
2442     HEVCLocalContext *lc = &s->HEVClc;
2443     GetBitContext *gb    = &lc->gb;
2444     int ctb_addr_ts, ret;
2445
2446     ret = init_get_bits8(gb, nal, length);
2447     if (ret < 0)
2448         return ret;
2449
2450     ret = hls_nal_unit(s);
2451     if (ret < 0) {
2452         av_log(s->avctx, AV_LOG_ERROR, "Invalid NAL unit %d, skipping.\n",
2453                s->nal_unit_type);
2454         if (s->avctx->err_recognition & AV_EF_EXPLODE)
2455             return ret;
2456         return 0;
2457     } else if (!ret)
2458         return 0;
2459
2460     switch (s->nal_unit_type) {
2461     case NAL_VPS:
2462         ret = ff_hevc_decode_nal_vps(s);
2463         if (ret < 0)
2464             return ret;
2465         break;
2466     case NAL_SPS:
2467         ret = ff_hevc_decode_nal_sps(s);
2468         if (ret < 0)
2469             return ret;
2470         break;
2471     case NAL_PPS:
2472         ret = ff_hevc_decode_nal_pps(s);
2473         if (ret < 0)
2474             return ret;
2475         break;
2476     case NAL_SEI_PREFIX:
2477     case NAL_SEI_SUFFIX:
2478         ret = ff_hevc_decode_nal_sei(s);
2479         if (ret < 0)
2480             return ret;
2481         break;
2482     case NAL_TRAIL_R:
2483     case NAL_TRAIL_N:
2484     case NAL_TSA_N:
2485     case NAL_TSA_R:
2486     case NAL_STSA_N:
2487     case NAL_STSA_R:
2488     case NAL_BLA_W_LP:
2489     case NAL_BLA_W_RADL:
2490     case NAL_BLA_N_LP:
2491     case NAL_IDR_W_RADL:
2492     case NAL_IDR_N_LP:
2493     case NAL_CRA_NUT:
2494     case NAL_RADL_N:
2495     case NAL_RADL_R:
2496     case NAL_RASL_N:
2497     case NAL_RASL_R:
2498         ret = hls_slice_header(s);
2499         if (ret < 0)
2500             return ret;
2501
2502         if (s->max_ra == INT_MAX) {
2503             if (s->nal_unit_type == NAL_CRA_NUT || IS_BLA(s)) {
2504                 s->max_ra = s->poc;
2505             } else {
2506                 if (IS_IDR(s))
2507                     s->max_ra = INT_MIN;
2508             }
2509         }
2510
2511         if ((s->nal_unit_type == NAL_RASL_R || s->nal_unit_type == NAL_RASL_N) &&
2512             s->poc <= s->max_ra) {
2513             s->is_decoded = 0;
2514             break;
2515         } else {
2516             if (s->nal_unit_type == NAL_RASL_R && s->poc > s->max_ra)
2517                 s->max_ra = INT_MIN;
2518         }
2519
2520         if (s->sh.first_slice_in_pic_flag) {
2521             ret = hevc_frame_start(s);
2522             if (ret < 0)
2523                 return ret;
2524         } else if (!s->ref) {
2525             av_log(s->avctx, AV_LOG_ERROR, "First slice in a frame missing.\n");
2526             return AVERROR_INVALIDDATA;
2527         }
2528
2529         if (!s->sh.dependent_slice_segment_flag &&
2530             s->sh.slice_type != I_SLICE) {
2531             ret = ff_hevc_slice_rpl(s);
2532             if (ret < 0) {
2533                 av_log(s->avctx, AV_LOG_WARNING,
2534                        "Error constructing the reference lists for the current slice.\n");
2535                 if (s->avctx->err_recognition & AV_EF_EXPLODE)
2536                     return ret;
2537             }
2538         }
2539
2540         ctb_addr_ts = hls_slice_data(s);
2541         if (ctb_addr_ts >= (s->sps->ctb_width * s->sps->ctb_height)) {
2542             s->is_decoded = 1;
2543             if ((s->pps->transquant_bypass_enable_flag ||
2544                  (s->sps->pcm.loop_filter_disable_flag && s->sps->pcm_enabled_flag)) &&
2545                 s->sps->sao_enabled)
2546                 restore_tqb_pixels(s);
2547         }
2548
2549         if (ctb_addr_ts < 0)
2550             return ctb_addr_ts;
2551         break;
2552     case NAL_EOS_NUT:
2553     case NAL_EOB_NUT:
2554         s->seq_decode = (s->seq_decode + 1) & 0xff;
2555         s->max_ra     = INT_MAX;
2556         break;
2557     case NAL_AUD:
2558     case NAL_FD_NUT:
2559         break;
2560     default:
2561         av_log(s->avctx, AV_LOG_INFO,
2562                "Skipping NAL unit %d\n", s->nal_unit_type);
2563     }
2564
2565     return 0;
2566 }
2567
2568 /* FIXME: This is adapted from ff_h264_decode_nal, avoiding duplication
2569  * between these functions would be nice. */
2570 static int extract_rbsp(const uint8_t *src, int length,
2571                         HEVCNAL *nal)
2572 {
2573     int i, si, di;
2574     uint8_t *dst;
2575
2576 #define STARTCODE_TEST                                                  \
2577         if (i + 2 < length && src[i + 1] == 0 && src[i + 2] <= 3) {     \
2578             if (src[i + 2] != 3) {                                      \
2579                 /* startcode, so we must be past the end */             \
2580                 length = i;                                             \
2581             }                                                           \
2582             break;                                                      \
2583         }
2584 #if HAVE_FAST_UNALIGNED
2585 #define FIND_FIRST_ZERO                                                 \
2586         if (i > 0 && !src[i])                                           \
2587             i--;                                                        \
2588         while (src[i])                                                  \
2589             i++
2590 #if HAVE_FAST_64BIT
2591     for (i = 0; i + 1 < length; i += 9) {
2592         if (!((~AV_RN64A(src + i) &
2593                (AV_RN64A(src + i) - 0x0100010001000101ULL)) &
2594               0x8000800080008080ULL))
2595             continue;
2596         FIND_FIRST_ZERO;
2597         STARTCODE_TEST;
2598         i -= 7;
2599     }
2600 #else
2601     for (i = 0; i + 1 < length; i += 5) {
2602         if (!((~AV_RN32A(src + i) &
2603                (AV_RN32A(src + i) - 0x01000101U)) &
2604               0x80008080U))
2605             continue;
2606         FIND_FIRST_ZERO;
2607         STARTCODE_TEST;
2608         i -= 3;
2609     }
2610 #endif /* HAVE_FAST_64BIT */
2611 #else
2612     for (i = 0; i + 1 < length; i += 2) {
2613         if (src[i])
2614             continue;
2615         if (i > 0 && src[i - 1] == 0)
2616             i--;
2617         STARTCODE_TEST;
2618     }
2619 #endif /* HAVE_FAST_UNALIGNED */
2620
2621     if (i >= length - 1) { // no escaped 0
2622         nal->data = src;
2623         nal->size = length;
2624         return length;
2625     }
2626
2627     av_fast_malloc(&nal->rbsp_buffer, &nal->rbsp_buffer_size,
2628                    length + FF_INPUT_BUFFER_PADDING_SIZE);
2629     if (!nal->rbsp_buffer)
2630         return AVERROR(ENOMEM);
2631
2632     dst = nal->rbsp_buffer;
2633
2634     memcpy(dst, src, i);
2635     si = di = i;
2636     while (si + 2 < length) {
2637         // remove escapes (very rare 1:2^22)
2638         if (src[si + 2] > 3) {
2639             dst[di++] = src[si++];
2640             dst[di++] = src[si++];
2641         } else if (src[si] == 0 && src[si + 1] == 0) {
2642             if (src[si + 2] == 3) { // escape
2643                 dst[di++] = 0;
2644                 dst[di++] = 0;
2645                 si       += 3;
2646
2647                 continue;
2648             } else // next start code
2649                 goto nsc;
2650         }
2651
2652         dst[di++] = src[si++];
2653     }
2654     while (si < length)
2655         dst[di++] = src[si++];
2656
2657 nsc:
2658     memset(dst + di, 0, FF_INPUT_BUFFER_PADDING_SIZE);
2659
2660     nal->data = dst;
2661     nal->size = di;
2662     return si;
2663 }
2664
2665 static int decode_nal_units(HEVCContext *s, const uint8_t *buf, int length)
2666 {
2667     int i, consumed, ret = 0;
2668
2669     s->ref = NULL;
2670     s->eos = 0;
2671
2672     /* split the input packet into NAL units, so we know the upper bound on the
2673      * number of slices in the frame */
2674     s->nb_nals = 0;
2675     while (length >= 4) {
2676         HEVCNAL *nal;
2677         int extract_length = 0;
2678
2679         if (s->is_nalff) {
2680             int i;
2681             for (i = 0; i < s->nal_length_size; i++)
2682                 extract_length = (extract_length << 8) | buf[i];
2683             buf    += s->nal_length_size;
2684             length -= s->nal_length_size;
2685
2686             if (extract_length > length) {
2687                 av_log(s->avctx, AV_LOG_ERROR, "Invalid NAL unit size.\n");
2688                 ret = AVERROR_INVALIDDATA;
2689                 goto fail;
2690             }
2691         } else {
2692             if (buf[2] == 0) {
2693                 length--;
2694                 buf++;
2695                 continue;
2696             }
2697             if (buf[0] != 0 || buf[1] != 0 || buf[2] != 1) {
2698                 ret = AVERROR_INVALIDDATA;
2699                 goto fail;
2700             }
2701
2702             buf           += 3;
2703             length        -= 3;
2704             extract_length = length;
2705         }
2706
2707         if (s->nals_allocated < s->nb_nals + 1) {
2708             int new_size = s->nals_allocated + 1;
2709             HEVCNAL *tmp = av_realloc_array(s->nals, new_size, sizeof(*tmp));
2710             if (!tmp) {
2711                 ret = AVERROR(ENOMEM);
2712                 goto fail;
2713             }
2714             s->nals = tmp;
2715             memset(s->nals + s->nals_allocated, 0,
2716                    (new_size - s->nals_allocated) * sizeof(*tmp));
2717             s->nals_allocated = new_size;
2718         }
2719         nal = &s->nals[s->nb_nals++];
2720
2721         consumed = extract_rbsp(buf, extract_length, nal);
2722         if (consumed < 0) {
2723             ret = consumed;
2724             goto fail;
2725         }
2726
2727         ret = init_get_bits8(&s->HEVClc.gb, nal->data, nal->size);
2728         if (ret < 0)
2729             goto fail;
2730         hls_nal_unit(s);
2731
2732         if (s->nal_unit_type == NAL_EOB_NUT ||
2733             s->nal_unit_type == NAL_EOS_NUT)
2734             s->eos = 1;
2735
2736         buf    += consumed;
2737         length -= consumed;
2738     }
2739
2740     /* parse the NAL units */
2741     for (i = 0; i < s->nb_nals; i++) {
2742         int ret = decode_nal_unit(s, s->nals[i].data, s->nals[i].size);
2743         if (ret < 0) {
2744             av_log(s->avctx, AV_LOG_WARNING,
2745                    "Error parsing NAL unit #%d.\n", i);
2746             if (s->avctx->err_recognition & AV_EF_EXPLODE)
2747                 goto fail;
2748         }
2749     }
2750
2751 fail:
2752     if (s->ref)
2753         ff_thread_report_progress(&s->ref->tf, INT_MAX, 0);
2754
2755     return ret;
2756 }
2757
2758 static void print_md5(void *log_ctx, int level, uint8_t md5[16])
2759 {
2760     int i;
2761     for (i = 0; i < 16; i++)
2762         av_log(log_ctx, level, "%02"PRIx8, md5[i]);
2763 }
2764
2765 static int verify_md5(HEVCContext *s, AVFrame *frame)
2766 {
2767     const AVPixFmtDescriptor *desc = av_pix_fmt_desc_get(frame->format);
2768     int pixel_shift;
2769     int i, j;
2770
2771     if (!desc)
2772         return AVERROR(EINVAL);
2773
2774     pixel_shift = desc->comp[0].depth_minus1 > 7;
2775
2776     av_log(s->avctx, AV_LOG_DEBUG, "Verifying checksum for frame with POC %d: ",
2777            s->poc);
2778
2779     /* the checksums are LE, so we have to byteswap for >8bpp formats
2780      * on BE arches */
2781 #if HAVE_BIGENDIAN
2782     if (pixel_shift && !s->checksum_buf) {
2783         av_fast_malloc(&s->checksum_buf, &s->checksum_buf_size,
2784                        FFMAX3(frame->linesize[0], frame->linesize[1],
2785                               frame->linesize[2]));
2786         if (!s->checksum_buf)
2787             return AVERROR(ENOMEM);
2788     }
2789 #endif
2790
2791     for (i = 0; frame->data[i]; i++) {
2792         int width  = s->avctx->coded_width;
2793         int height = s->avctx->coded_height;
2794         int w = (i == 1 || i == 2) ? (width  >> desc->log2_chroma_w) : width;
2795         int h = (i == 1 || i == 2) ? (height >> desc->log2_chroma_h) : height;
2796         uint8_t md5[16];
2797
2798         av_md5_init(s->md5_ctx);
2799         for (j = 0; j < h; j++) {
2800             const uint8_t *src = frame->data[i] + j * frame->linesize[i];
2801 #if HAVE_BIGENDIAN
2802             if (pixel_shift) {
2803                 s->dsp.bswap16_buf((uint16_t*)s->checksum_buf,
2804                                    (const uint16_t*)src, w);
2805                 src = s->checksum_buf;
2806             }
2807 #endif
2808             av_md5_update(s->md5_ctx, src, w << pixel_shift);
2809         }
2810         av_md5_final(s->md5_ctx, md5);
2811
2812         if (!memcmp(md5, s->md5[i], 16)) {
2813             av_log   (s->avctx, AV_LOG_DEBUG, "plane %d - correct ", i);
2814             print_md5(s->avctx, AV_LOG_DEBUG, md5);
2815             av_log   (s->avctx, AV_LOG_DEBUG, "; ");
2816         } else {
2817             av_log   (s->avctx, AV_LOG_ERROR, "mismatching checksum of plane %d - ", i);
2818             print_md5(s->avctx, AV_LOG_ERROR, md5);
2819             av_log   (s->avctx, AV_LOG_ERROR, " != ");
2820             print_md5(s->avctx, AV_LOG_ERROR, s->md5[i]);
2821             av_log   (s->avctx, AV_LOG_ERROR, "\n");
2822             return AVERROR_INVALIDDATA;
2823         }
2824     }
2825
2826     av_log(s->avctx, AV_LOG_DEBUG, "\n");
2827
2828     return 0;
2829 }
2830
2831 static int hevc_decode_frame(AVCodecContext *avctx, void *data, int *got_output,
2832                              AVPacket *avpkt)
2833 {
2834     int ret;
2835     HEVCContext *s = avctx->priv_data;
2836
2837     if (!avpkt->size) {
2838         ret = ff_hevc_output_frame(s, data, 1);
2839         if (ret < 0)
2840             return ret;
2841
2842         *got_output = ret;
2843         return 0;
2844     }
2845
2846     s->ref = NULL;
2847     ret    = decode_nal_units(s, avpkt->data, avpkt->size);
2848     if (ret < 0)
2849         return ret;
2850
2851     /* verify the SEI checksum */
2852     if (avctx->err_recognition & AV_EF_CRCCHECK && s->is_decoded &&
2853         s->is_md5) {
2854         ret = verify_md5(s, s->ref->frame);
2855         if (ret < 0 && avctx->err_recognition & AV_EF_EXPLODE) {
2856             ff_hevc_unref_frame(s, s->ref, ~0);
2857             return ret;
2858         }
2859     }
2860     s->is_md5 = 0;
2861
2862     if (s->is_decoded) {
2863         av_log(avctx, AV_LOG_DEBUG, "Decoded frame with POC %d.\n", s->poc);
2864         s->is_decoded = 0;
2865     }
2866
2867     if (s->output_frame->buf[0]) {
2868         av_frame_move_ref(data, s->output_frame);
2869         *got_output = 1;
2870     }
2871
2872     return avpkt->size;
2873 }
2874
2875 static int hevc_ref_frame(HEVCContext *s, HEVCFrame *dst, HEVCFrame *src)
2876 {
2877     int ret = ff_thread_ref_frame(&dst->tf, &src->tf);
2878     if (ret < 0)
2879         return ret;
2880
2881     dst->tab_mvf_buf = av_buffer_ref(src->tab_mvf_buf);
2882     if (!dst->tab_mvf_buf)
2883         goto fail;
2884     dst->tab_mvf = src->tab_mvf;
2885
2886     dst->rpl_tab_buf = av_buffer_ref(src->rpl_tab_buf);
2887     if (!dst->rpl_tab_buf)
2888         goto fail;
2889     dst->rpl_tab = src->rpl_tab;
2890
2891     dst->rpl_buf = av_buffer_ref(src->rpl_buf);
2892     if (!dst->rpl_buf)
2893         goto fail;
2894
2895     dst->poc        = src->poc;
2896     dst->ctb_count  = src->ctb_count;
2897     dst->window     = src->window;
2898     dst->flags      = src->flags;
2899     dst->sequence   = src->sequence;
2900
2901     return 0;
2902 fail:
2903     ff_hevc_unref_frame(s, dst, ~0);
2904     return AVERROR(ENOMEM);
2905 }
2906
2907 static av_cold int hevc_decode_free(AVCodecContext *avctx)
2908 {
2909     HEVCContext       *s = avctx->priv_data;
2910     HEVCLocalContext *lc = &s->HEVClc;
2911     int i;
2912
2913     pic_arrays_free(s);
2914
2915     av_freep(&lc->edge_emu_buffer);
2916     av_freep(&s->md5_ctx);
2917
2918     av_frame_free(&s->tmp_frame);
2919     av_frame_free(&s->output_frame);
2920
2921     for (i = 0; i < FF_ARRAY_ELEMS(s->DPB); i++) {
2922         ff_hevc_unref_frame(s, &s->DPB[i], ~0);
2923         av_frame_free(&s->DPB[i].frame);
2924     }
2925
2926     for (i = 0; i < FF_ARRAY_ELEMS(s->vps_list); i++)
2927         av_buffer_unref(&s->vps_list[i]);
2928     for (i = 0; i < FF_ARRAY_ELEMS(s->sps_list); i++)
2929         av_buffer_unref(&s->sps_list[i]);
2930     for (i = 0; i < FF_ARRAY_ELEMS(s->pps_list); i++)
2931         av_buffer_unref(&s->pps_list[i]);
2932
2933     for (i = 0; i < s->nals_allocated; i++)
2934         av_freep(&s->nals[i].rbsp_buffer);
2935     av_freep(&s->nals);
2936     s->nals_allocated = 0;
2937
2938     return 0;
2939 }
2940
2941 static av_cold int hevc_init_context(AVCodecContext *avctx)
2942 {
2943     HEVCContext *s = avctx->priv_data;
2944     int i;
2945
2946     s->avctx = avctx;
2947
2948     s->tmp_frame = av_frame_alloc();
2949     if (!s->tmp_frame)
2950         goto fail;
2951
2952     s->output_frame = av_frame_alloc();
2953     if (!s->output_frame)
2954         goto fail;
2955
2956     for (i = 0; i < FF_ARRAY_ELEMS(s->DPB); i++) {
2957         s->DPB[i].frame = av_frame_alloc();
2958         if (!s->DPB[i].frame)
2959             goto fail;
2960         s->DPB[i].tf.f = s->DPB[i].frame;
2961     }
2962
2963     s->max_ra = INT_MAX;
2964
2965     s->md5_ctx = av_md5_alloc();
2966     if (!s->md5_ctx)
2967         goto fail;
2968
2969     ff_dsputil_init(&s->dsp, avctx);
2970
2971     s->context_initialized = 1;
2972
2973     return 0;
2974
2975 fail:
2976     hevc_decode_free(avctx);
2977     return AVERROR(ENOMEM);
2978 }
2979
2980 static int hevc_update_thread_context(AVCodecContext *dst,
2981                                       const AVCodecContext *src)
2982 {
2983     HEVCContext *s  = dst->priv_data;
2984     HEVCContext *s0 = src->priv_data;
2985     int i, ret;
2986
2987     if (!s->context_initialized) {
2988         ret = hevc_init_context(dst);
2989         if (ret < 0)
2990             return ret;
2991     }
2992
2993     for (i = 0; i < FF_ARRAY_ELEMS(s->DPB); i++) {
2994         ff_hevc_unref_frame(s, &s->DPB[i], ~0);
2995         if (s0->DPB[i].frame->buf[0]) {
2996             ret = hevc_ref_frame(s, &s->DPB[i], &s0->DPB[i]);
2997             if (ret < 0)
2998                 return ret;
2999         }
3000     }
3001
3002     for (i = 0; i < FF_ARRAY_ELEMS(s->vps_list); i++) {
3003         av_buffer_unref(&s->vps_list[i]);
3004         if (s0->vps_list[i]) {
3005             s->vps_list[i] = av_buffer_ref(s0->vps_list[i]);
3006             if (!s->vps_list[i])
3007                 return AVERROR(ENOMEM);
3008         }
3009     }
3010
3011     for (i = 0; i < FF_ARRAY_ELEMS(s->sps_list); i++) {
3012         av_buffer_unref(&s->sps_list[i]);
3013         if (s0->sps_list[i]) {
3014             s->sps_list[i] = av_buffer_ref(s0->sps_list[i]);
3015             if (!s->sps_list[i])
3016                 return AVERROR(ENOMEM);
3017         }
3018     }
3019
3020     for (i = 0; i < FF_ARRAY_ELEMS(s->pps_list); i++) {
3021         av_buffer_unref(&s->pps_list[i]);
3022         if (s0->pps_list[i]) {
3023             s->pps_list[i] = av_buffer_ref(s0->pps_list[i]);
3024             if (!s->pps_list[i])
3025                 return AVERROR(ENOMEM);
3026         }
3027     }
3028
3029     if (s->sps != s0->sps)
3030         ret = set_sps(s, s0->sps);
3031
3032     s->seq_decode = s0->seq_decode;
3033     s->seq_output = s0->seq_output;
3034     s->pocTid0    = s0->pocTid0;
3035     s->max_ra     = s0->max_ra;
3036
3037     s->is_nalff        = s0->is_nalff;
3038     s->nal_length_size = s0->nal_length_size;
3039
3040     if (s0->eos) {
3041         s->seq_decode = (s->seq_decode + 1) & 0xff;
3042         s->max_ra = INT_MAX;
3043     }
3044
3045     return 0;
3046 }
3047
3048 static int hevc_decode_extradata(HEVCContext *s)
3049 {
3050     AVCodecContext *avctx = s->avctx;
3051     GetByteContext gb;
3052     int ret;
3053
3054     bytestream2_init(&gb, avctx->extradata, avctx->extradata_size);
3055
3056     if (avctx->extradata_size > 3 &&
3057         (avctx->extradata[0] || avctx->extradata[1] ||
3058          avctx->extradata[2] > 1)) {
3059         /* It seems the extradata is encoded as hvcC format.
3060          * Temporarily, we support configurationVersion==0 until 14496-15 3rd
3061          * is finalized. When finalized, configurationVersion will be 1 and we
3062          * can recognize hvcC by checking if avctx->extradata[0]==1 or not. */
3063         int i, j, num_arrays, nal_len_size;
3064
3065         s->is_nalff = 1;
3066
3067         bytestream2_skip(&gb, 21);
3068         nal_len_size = (bytestream2_get_byte(&gb) & 3) + 1;
3069         num_arrays   = bytestream2_get_byte(&gb);
3070
3071         /* nal units in the hvcC always have length coded with 2 bytes,
3072          * so put a fake nal_length_size = 2 while parsing them */
3073         s->nal_length_size = 2;
3074
3075         /* Decode nal units from hvcC. */
3076         for (i = 0; i < num_arrays; i++) {
3077             int type = bytestream2_get_byte(&gb) & 0x3f;
3078             int cnt  = bytestream2_get_be16(&gb);
3079
3080             for (j = 0; j < cnt; j++) {
3081                 // +2 for the nal size field
3082                 int nalsize = bytestream2_peek_be16(&gb) + 2;
3083                 if (bytestream2_get_bytes_left(&gb) < nalsize) {
3084                     av_log(s->avctx, AV_LOG_ERROR,
3085                            "Invalid NAL unit size in extradata.\n");
3086                     return AVERROR_INVALIDDATA;
3087                 }
3088
3089                 ret = decode_nal_units(s, gb.buffer, nalsize);
3090                 if (ret < 0) {
3091                     av_log(avctx, AV_LOG_ERROR,
3092                            "Decoding nal unit %d %d from hvcC failed\n",
3093                            type, i);
3094                     return ret;
3095                 }
3096                 bytestream2_skip(&gb, nalsize);
3097             }
3098         }
3099
3100         /* Now store right nal length size, that will be used to parse
3101          * all other nals */
3102         s->nal_length_size = nal_len_size;
3103     } else {
3104         s->is_nalff = 0;
3105         ret = decode_nal_units(s, avctx->extradata, avctx->extradata_size);
3106         if (ret < 0)
3107             return ret;
3108     }
3109     return 0;
3110 }
3111
3112 static av_cold int hevc_decode_init(AVCodecContext *avctx)
3113 {
3114     HEVCContext *s = avctx->priv_data;
3115     int ret;
3116
3117     ff_init_cabac_states();
3118
3119     avctx->internal->allocate_progress = 1;
3120
3121     ret = hevc_init_context(avctx);
3122     if (ret < 0)
3123         return ret;
3124
3125     if (avctx->extradata_size > 0 && avctx->extradata) {
3126         ret = hevc_decode_extradata(s);
3127         if (ret < 0) {
3128             hevc_decode_free(avctx);
3129             return ret;
3130         }
3131     }
3132
3133     return 0;
3134 }
3135
3136 static av_cold int hevc_init_thread_copy(AVCodecContext *avctx)
3137 {
3138     HEVCContext *s = avctx->priv_data;
3139     int ret;
3140
3141     memset(s, 0, sizeof(*s));
3142
3143     ret = hevc_init_context(avctx);
3144     if (ret < 0)
3145         return ret;
3146
3147     return 0;
3148 }
3149
3150 static void hevc_decode_flush(AVCodecContext *avctx)
3151 {
3152     HEVCContext *s = avctx->priv_data;
3153     ff_hevc_flush_dpb(s);
3154     s->max_ra = INT_MAX;
3155 }
3156
3157 #define OFFSET(x) offsetof(HEVCContext, x)
3158 #define PAR (AV_OPT_FLAG_DECODING_PARAM | AV_OPT_FLAG_VIDEO_PARAM)
3159 static const AVOption options[] = {
3160     { "apply_defdispwin", "Apply default display window from VUI", OFFSET(apply_defdispwin),
3161         AV_OPT_TYPE_INT, {.i64 = 0}, 0, 1, PAR },
3162     { NULL },
3163 };
3164
3165 static const AVClass hevc_decoder_class = {
3166     .class_name = "HEVC decoder",
3167     .item_name  = av_default_item_name,
3168     .option     = options,
3169     .version    = LIBAVUTIL_VERSION_INT,
3170 };
3171
3172 AVCodec ff_hevc_decoder = {
3173     .name                  = "hevc",
3174     .long_name             = NULL_IF_CONFIG_SMALL("HEVC (High Efficiency Video Coding)"),
3175     .type                  = AVMEDIA_TYPE_VIDEO,
3176     .id                    = AV_CODEC_ID_HEVC,
3177     .priv_data_size        = sizeof(HEVCContext),
3178     .priv_class            = &hevc_decoder_class,
3179     .init                  = hevc_decode_init,
3180     .close                 = hevc_decode_free,
3181     .decode                = hevc_decode_frame,
3182     .flush                 = hevc_decode_flush,
3183     .update_thread_context = hevc_update_thread_context,
3184     .init_thread_copy      = hevc_init_thread_copy,
3185     .capabilities          = CODEC_CAP_DR1 | CODEC_CAP_DELAY |
3186                              CODEC_CAP_FRAME_THREADS,
3187 };