git.sesse.net Git - ffmpeg/blob - libavcodec/hevc.c

   1 /*
   2  * HEVC video decoder
   3  *
   4  * Copyright (C) 2012 - 2013 Guillaume Martres
   5  * Copyright (C) 2012 - 2013 Mickael Raulet
   6  * Copyright (C) 2012 - 2013 Gildas Cocherel
   7  * Copyright (C) 2012 - 2013 Wassim Hamidouche
   8  *
   9  * This file is part of Libav.
  10  *
  11  * Libav is free software; you can redistribute it and/or
  12  * modify it under the terms of the GNU Lesser General Public
  13  * License as published by the Free Software Foundation; either
  14  * version 2.1 of the License, or (at your option) any later version.
  15  *
  16  * Libav is distributed in the hope that it will be useful,
  17  * but WITHOUT ANY WARRANTY; without even the implied warranty of
  18  * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
  19  * Lesser General Public License for more details.
  20  *
  21  * You should have received a copy of the GNU Lesser General Public
  22  * License along with Libav; if not, write to the Free Software
  23  * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
  24  */
  25
  26 #include "libavutil/attributes.h"
  27 #include "libavutil/common.h"
  28 #include "libavutil/internal.h"
  29 #include "libavutil/md5.h"
  30 #include "libavutil/opt.h"
  31 #include "libavutil/pixdesc.h"
  32 #include "libavutil/stereo3d.h"
  33
  34 #include "bytestream.h"
  35 #include "cabac_functions.h"
  36 #include "dsputil.h"
  37 #include "golomb.h"
  38 #include "hevc.h"
  39
  40 const uint8_t ff_hevc_qpel_extra_before[4] = { 0, 3, 3, 2 };
  41 const uint8_t ff_hevc_qpel_extra_after[4]  = { 0, 3, 4, 4 };
  42 const uint8_t ff_hevc_qpel_extra[4]        = { 0, 6, 7, 6 };
  43
  44 static const uint8_t scan_1x1[1] = { 0 };
  45
  46 static const uint8_t horiz_scan2x2_x[4] = { 0, 1, 0, 1 };
  47
  48 static const uint8_t horiz_scan2x2_y[4] = { 0, 0, 1, 1 };
  49
  50 static const uint8_t horiz_scan4x4_x[16] = {
  51     0, 1, 2, 3,
  52     0, 1, 2, 3,
  53     0, 1, 2, 3,
  54     0, 1, 2, 3,
  55 };
  56
  57 static const uint8_t horiz_scan4x4_y[16] = {
  58     0, 0, 0, 0,
  59     1, 1, 1, 1,
  60     2, 2, 2, 2,
  61     3, 3, 3, 3,
  62 };
  63
  64 static const uint8_t horiz_scan8x8_inv[8][8] = {
  65     {  0,  1,  2,  3, 16, 17, 18, 19, },
  66     {  4,  5,  6,  7, 20, 21, 22, 23, },
  67     {  8,  9, 10, 11, 24, 25, 26, 27, },
  68     { 12, 13, 14, 15, 28, 29, 30, 31, },
  69     { 32, 33, 34, 35, 48, 49, 50, 51, },
  70     { 36, 37, 38, 39, 52, 53, 54, 55, },
  71     { 40, 41, 42, 43, 56, 57, 58, 59, },
  72     { 44, 45, 46, 47, 60, 61, 62, 63, },
  73 };
  74
  75 static const uint8_t diag_scan2x2_x[4] = { 0, 0, 1, 1 };
  76
  77 static const uint8_t diag_scan2x2_y[4] = { 0, 1, 0, 1 };
  78
  79 static const uint8_t diag_scan2x2_inv[2][2] = {
  80     { 0, 2, },
  81     { 1, 3, },
  82 };
  83
  84 const uint8_t ff_hevc_diag_scan4x4_x[16] = {
  85     0, 0, 1, 0,
  86     1, 2, 0, 1,
  87     2, 3, 1, 2,
  88     3, 2, 3, 3,
  89 };
  90
  91 const uint8_t ff_hevc_diag_scan4x4_y[16] = {
  92     0, 1, 0, 2,
  93     1, 0, 3, 2,
  94     1, 0, 3, 2,
  95     1, 3, 2, 3,
  96 };
  97
  98 static const uint8_t diag_scan4x4_inv[4][4] = {
  99     { 0,  2,  5,  9, },
 100     { 1,  4,  8, 12, },
 101     { 3,  7, 11, 14, },
 102     { 6, 10, 13, 15, },
 103 };
 104
 105 const uint8_t ff_hevc_diag_scan8x8_x[64] = {
 106     0, 0, 1, 0,
 107     1, 2, 0, 1,
 108     2, 3, 0, 1,
 109     2, 3, 4, 0,
 110     1, 2, 3, 4,
 111     5, 0, 1, 2,
 112     3, 4, 5, 6,
 113     0, 1, 2, 3,
 114     4, 5, 6, 7,
 115     1, 2, 3, 4,
 116     5, 6, 7, 2,
 117     3, 4, 5, 6,
 118     7, 3, 4, 5,
 119     6, 7, 4, 5,
 120     6, 7, 5, 6,
 121     7, 6, 7, 7,
 122 };
 123
 124 const uint8_t ff_hevc_diag_scan8x8_y[64] = {
 125     0, 1, 0, 2,
 126     1, 0, 3, 2,
 127     1, 0, 4, 3,
 128     2, 1, 0, 5,
 129     4, 3, 2, 1,
 130     0, 6, 5, 4,
 131     3, 2, 1, 0,
 132     7, 6, 5, 4,
 133     3, 2, 1, 0,
 134     7, 6, 5, 4,
 135     3, 2, 1, 7,
 136     6, 5, 4, 3,
 137     2, 7, 6, 5,
 138     4, 3, 7, 6,
 139     5, 4, 7, 6,
 140     5, 7, 6, 7,
 141 };
 142
 143 static const uint8_t diag_scan8x8_inv[8][8] = {
 144     {  0,  2,  5,  9, 14, 20, 27, 35, },
 145     {  1,  4,  8, 13, 19, 26, 34, 42, },
 146     {  3,  7, 12, 18, 25, 33, 41, 48, },
 147     {  6, 11, 17, 24, 32, 40, 47, 53, },
 148     { 10, 16, 23, 31, 39, 46, 52, 57, },
 149     { 15, 22, 30, 38, 45, 51, 56, 60, },
 150     { 21, 29, 37, 44, 50, 55, 59, 62, },
 151     { 28, 36, 43, 49, 54, 58, 61, 63, },
 152 };
 153
 154 /**
 155  * NOTE: Each function hls_foo correspond to the function foo in the
 156  * specification (HLS stands for High Level Syntax).
 157  */
 158
 159 /**
 160  * Section 5.7
 161  */
 162
 163 /* free everything allocated  by pic_arrays_init() */
 164 static void pic_arrays_free(HEVCContext *s)
 165 {
 166     av_freep(&s->sao);
 167     av_freep(&s->deblock);
 168     av_freep(&s->split_cu_flag);
 169
 170     av_freep(&s->skip_flag);
 171     av_freep(&s->tab_ct_depth);
 172
 173     av_freep(&s->tab_ipm);
 174     av_freep(&s->cbf_luma);
 175     av_freep(&s->is_pcm);
 176
 177     av_freep(&s->qp_y_tab);
 178     av_freep(&s->tab_slice_address);
 179     av_freep(&s->filter_slice_edges);
 180
 181     av_freep(&s->horizontal_bs);
 182     av_freep(&s->vertical_bs);
 183
 184     av_buffer_pool_uninit(&s->tab_mvf_pool);
 185     av_buffer_pool_uninit(&s->rpl_tab_pool);
 186 }
 187
 188 /* allocate arrays that depend on frame dimensions */
 189 static int pic_arrays_init(HEVCContext *s, const HEVCSPS *sps)
 190 {
 191     int log2_min_cb_size = sps->log2_min_cb_size;
 192     int width            = sps->width;
 193     int height           = sps->height;
 194     int pic_size         = width * height;
 195     int pic_size_in_ctb  = ((width  >> log2_min_cb_size) + 1) *
 196                            ((height >> log2_min_cb_size) + 1);
 197     int ctb_count        = sps->ctb_width * sps->ctb_height;
 198     int min_pu_size      = sps->min_pu_width * sps->min_pu_height;
 199
 200     s->bs_width  = width  >> 3;
 201     s->bs_height = height >> 3;
 202
 203     s->sao           = av_mallocz_array(ctb_count, sizeof(*s->sao));
 204     s->deblock       = av_mallocz_array(ctb_count, sizeof(*s->deblock));
 205     s->split_cu_flag = av_malloc(pic_size);
 206     if (!s->sao || !s->deblock || !s->split_cu_flag)
 207         goto fail;
 208
 209     s->skip_flag    = av_malloc(pic_size_in_ctb);
 210     s->tab_ct_depth = av_malloc(sps->min_cb_height * sps->min_cb_width);
 211     if (!s->skip_flag || !s->tab_ct_depth)
 212         goto fail;
 213
 214     s->cbf_luma = av_malloc(sps->min_tb_width * sps->min_tb_height);
 215     s->tab_ipm  = av_malloc(min_pu_size);
 216     s->is_pcm   = av_malloc(min_pu_size);
 217     if (!s->tab_ipm || !s->cbf_luma || !s->is_pcm)
 218         goto fail;
 219
 220     s->filter_slice_edges = av_malloc(ctb_count);
 221     s->tab_slice_address  = av_malloc(pic_size_in_ctb *
 222                                       sizeof(*s->tab_slice_address));
 223     s->qp_y_tab           = av_malloc(pic_size_in_ctb *
 224                                       sizeof(*s->qp_y_tab));
 225     if (!s->qp_y_tab || !s->filter_slice_edges || !s->tab_slice_address)
 226         goto fail;
 227
 228     s->horizontal_bs = av_mallocz(2 * s->bs_width * (s->bs_height + 1));
 229     s->vertical_bs   = av_mallocz(2 * s->bs_width * (s->bs_height + 1));
 230     if (!s->horizontal_bs || !s->vertical_bs)
 231         goto fail;
 232
 233     s->tab_mvf_pool = av_buffer_pool_init(min_pu_size * sizeof(MvField),
 234                                           av_buffer_alloc);
 235     s->rpl_tab_pool = av_buffer_pool_init(ctb_count * sizeof(RefPicListTab),
 236                                           av_buffer_allocz);
 237     if (!s->tab_mvf_pool || !s->rpl_tab_pool)
 238         goto fail;
 239
 240     return 0;
 241
 242 fail:
 243     pic_arrays_free(s);
 244     return AVERROR(ENOMEM);
 245 }
 246
 247 static void pred_weight_table(HEVCContext *s, GetBitContext *gb)
 248 {
 249     int i = 0;
 250     int j = 0;
 251     uint8_t luma_weight_l0_flag[16];
 252     uint8_t chroma_weight_l0_flag[16];
 253     uint8_t luma_weight_l1_flag[16];
 254     uint8_t chroma_weight_l1_flag[16];
 255
 256     s->sh.luma_log2_weight_denom = get_ue_golomb_long(gb);
 257     if (s->sps->chroma_format_idc != 0) {
 258         int delta = get_se_golomb(gb);
 259         s->sh.chroma_log2_weight_denom = av_clip_c(s->sh.luma_log2_weight_denom + delta, 0, 7);
 260     }
 261
 262     for (i = 0; i < s->sh.nb_refs[L0]; i++) {
 263         luma_weight_l0_flag[i] = get_bits1(gb);
 264         if (!luma_weight_l0_flag[i]) {
 265             s->sh.luma_weight_l0[i] = 1 << s->sh.luma_log2_weight_denom;
 266             s->sh.luma_offset_l0[i] = 0;
 267         }
 268     }
 269     if (s->sps->chroma_format_idc != 0) { // FIXME: invert "if" and "for"
 270         for (i = 0; i < s->sh.nb_refs[L0]; i++)
 271             chroma_weight_l0_flag[i] = get_bits1(gb);
 272     } else {
 273         for (i = 0; i < s->sh.nb_refs[L0]; i++)
 274             chroma_weight_l0_flag[i] = 0;
 275     }
 276     for (i = 0; i < s->sh.nb_refs[L0]; i++) {
 277         if (luma_weight_l0_flag[i]) {
 278             int delta_luma_weight_l0 = get_se_golomb(gb);
 279             s->sh.luma_weight_l0[i] = (1 << s->sh.luma_log2_weight_denom) + delta_luma_weight_l0;
 280             s->sh.luma_offset_l0[i] = get_se_golomb(gb);
 281         }
 282         if (chroma_weight_l0_flag[i]) {
 283             for (j = 0; j < 2; j++) {
 284                 int delta_chroma_weight_l0 = get_se_golomb(gb);
 285                 int delta_chroma_offset_l0 = get_se_golomb(gb);
 286                 s->sh.chroma_weight_l0[i][j] = (1 << s->sh.chroma_log2_weight_denom) + delta_chroma_weight_l0;
 287                 s->sh.chroma_offset_l0[i][j] = av_clip_c((delta_chroma_offset_l0 - ((128 * s->sh.chroma_weight_l0[i][j])
 288                                                                                     >> s->sh.chroma_log2_weight_denom) + 128), -128, 127);
 289             }
 290         } else {
 291             s->sh.chroma_weight_l0[i][0] = 1 << s->sh.chroma_log2_weight_denom;
 292             s->sh.chroma_offset_l0[i][0] = 0;
 293             s->sh.chroma_weight_l0[i][1] = 1 << s->sh.chroma_log2_weight_denom;
 294             s->sh.chroma_offset_l0[i][1] = 0;
 295         }
 296     }
 297     if (s->sh.slice_type == B_SLICE) {
 298         for (i = 0; i < s->sh.nb_refs[L1]; i++) {
 299             luma_weight_l1_flag[i] = get_bits1(gb);
 300             if (!luma_weight_l1_flag[i]) {
 301                 s->sh.luma_weight_l1[i] = 1 << s->sh.luma_log2_weight_denom;
 302                 s->sh.luma_offset_l1[i] = 0;
 303             }
 304         }
 305         if (s->sps->chroma_format_idc != 0) {
 306             for (i = 0; i < s->sh.nb_refs[L1]; i++)
 307                 chroma_weight_l1_flag[i] = get_bits1(gb);
 308         } else {
 309             for (i = 0; i < s->sh.nb_refs[L1]; i++)
 310                 chroma_weight_l1_flag[i] = 0;
 311         }
 312         for (i = 0; i < s->sh.nb_refs[L1]; i++) {
 313             if (luma_weight_l1_flag[i]) {
 314                 int delta_luma_weight_l1 = get_se_golomb(gb);
 315                 s->sh.luma_weight_l1[i] = (1 << s->sh.luma_log2_weight_denom) + delta_luma_weight_l1;
 316                 s->sh.luma_offset_l1[i] = get_se_golomb(gb);
 317             }
 318             if (chroma_weight_l1_flag[i]) {
 319                 for (j = 0; j < 2; j++) {
 320                     int delta_chroma_weight_l1 = get_se_golomb(gb);
 321                     int delta_chroma_offset_l1 = get_se_golomb(gb);
 322                     s->sh.chroma_weight_l1[i][j] = (1 << s->sh.chroma_log2_weight_denom) + delta_chroma_weight_l1;
 323                     s->sh.chroma_offset_l1[i][j] = av_clip_c((delta_chroma_offset_l1 - ((128 * s->sh.chroma_weight_l1[i][j])
 324                                                                                         >> s->sh.chroma_log2_weight_denom) + 128), -128, 127);
 325                 }
 326             } else {
 327                 s->sh.chroma_weight_l1[i][0] = 1 << s->sh.chroma_log2_weight_denom;
 328                 s->sh.chroma_offset_l1[i][0] = 0;
 329                 s->sh.chroma_weight_l1[i][1] = 1 << s->sh.chroma_log2_weight_denom;
 330                 s->sh.chroma_offset_l1[i][1] = 0;
 331             }
 332         }
 333     }
 334 }
 335
 336 static int decode_lt_rps(HEVCContext *s, LongTermRPS *rps, GetBitContext *gb)
 337 {
 338     const HEVCSPS *sps = s->sps;
 339     int max_poc_lsb    = 1 << sps->log2_max_poc_lsb;
 340     int prev_delta_msb = 0;
 341     int nb_sps = 0, nb_sh;
 342     int i;
 343
 344     rps->nb_refs = 0;
 345     if (!sps->long_term_ref_pics_present_flag)
 346         return 0;
 347
 348     if (sps->num_long_term_ref_pics_sps > 0)
 349         nb_sps = get_ue_golomb_long(gb);
 350     nb_sh = get_ue_golomb_long(gb);
 351
 352     if (nb_sh + nb_sps > FF_ARRAY_ELEMS(rps->poc))
 353         return AVERROR_INVALIDDATA;
 354
 355     rps->nb_refs = nb_sh + nb_sps;
 356
 357     for (i = 0; i < rps->nb_refs; i++) {
 358         uint8_t delta_poc_msb_present;
 359
 360         if (i < nb_sps) {
 361             uint8_t lt_idx_sps = 0;
 362
 363             if (sps->num_long_term_ref_pics_sps > 1)
 364                 lt_idx_sps = get_bits(gb, av_ceil_log2(sps->num_long_term_ref_pics_sps));
 365
 366             rps->poc[i]  = sps->lt_ref_pic_poc_lsb_sps[lt_idx_sps];
 367             rps->used[i] = sps->used_by_curr_pic_lt_sps_flag[lt_idx_sps];
 368         } else {
 369             rps->poc[i]  = get_bits(gb, sps->log2_max_poc_lsb);
 370             rps->used[i] = get_bits1(gb);
 371         }
 372
 373         delta_poc_msb_present = get_bits1(gb);
 374         if (delta_poc_msb_present) {
 375             int delta = get_ue_golomb_long(gb);
 376
 377             if (i && i != nb_sps)
 378                 delta += prev_delta_msb;
 379
 380             rps->poc[i] += s->poc - delta * max_poc_lsb - s->sh.pic_order_cnt_lsb;
 381             prev_delta_msb = delta;
 382         }
 383     }
 384
 385     return 0;
 386 }
 387
 388 static int set_sps(HEVCContext *s, const HEVCSPS *sps)
 389 {
 390     int ret;
 391     int num = 0, den = 0;
 392
 393     pic_arrays_free(s);
 394     ret = pic_arrays_init(s, sps);
 395     if (ret < 0)
 396         goto fail;
 397
 398     s->avctx->coded_width         = sps->width;
 399     s->avctx->coded_height        = sps->height;
 400     s->avctx->width               = sps->output_width;
 401     s->avctx->height              = sps->output_height;
 402     s->avctx->pix_fmt             = sps->pix_fmt;
 403     s->avctx->sample_aspect_ratio = sps->vui.sar;
 404     s->avctx->has_b_frames        = sps->temporal_layer[sps->max_sub_layers - 1].num_reorder_pics;
 405
 406     if (sps->vui.video_signal_type_present_flag)
 407         s->avctx->color_range = sps->vui.video_full_range_flag ? AVCOL_RANGE_JPEG
 408                                                                : AVCOL_RANGE_MPEG;
 409     else
 410         s->avctx->color_range = AVCOL_RANGE_MPEG;
 411
 412     if (sps->vui.colour_description_present_flag) {
 413         s->avctx->color_primaries = sps->vui.colour_primaries;
 414         s->avctx->color_trc       = sps->vui.transfer_characteristic;
 415         s->avctx->colorspace      = sps->vui.matrix_coeffs;
 416     } else {
 417         s->avctx->color_primaries = AVCOL_PRI_UNSPECIFIED;
 418         s->avctx->color_trc       = AVCOL_TRC_UNSPECIFIED;
 419         s->avctx->colorspace      = AVCOL_SPC_UNSPECIFIED;
 420     }
 421
 422     ff_hevc_pred_init(&s->hpc,     sps->bit_depth);
 423     ff_hevc_dsp_init (&s->hevcdsp, sps->bit_depth);
 424     ff_videodsp_init (&s->vdsp,    sps->bit_depth);
 425
 426     if (sps->sao_enabled) {
 427         av_frame_unref(s->tmp_frame);
 428         ret = ff_get_buffer(s->avctx, s->tmp_frame, AV_GET_BUFFER_FLAG_REF);
 429         if (ret < 0)
 430             goto fail;
 431         s->frame = s->tmp_frame;
 432     }
 433
 434     s->sps = sps;
 435     s->vps = (HEVCVPS*) s->vps_list[s->sps->vps_id]->data;
 436
 437     if (s->vps->vps_timing_info_present_flag) {
 438         num = s->vps->vps_num_units_in_tick;
 439         den = s->vps->vps_time_scale;
 440     } else if (sps->vui.vui_timing_info_present_flag) {
 441         num = sps->vui.vui_num_units_in_tick;
 442         den = sps->vui.vui_time_scale;
 443     }
 444
 445     if (num != 0 && den != 0)
 446         av_reduce(&s->avctx->time_base.num, &s->avctx->time_base.den,
 447                   num, den, 1 << 30);
 448
 449     return 0;
 450
 451 fail:
 452     pic_arrays_free(s);
 453     s->sps = NULL;
 454     return ret;
 455 }
 456
 457 static int hls_slice_header(HEVCContext *s)
 458 {
 459     GetBitContext *gb = &s->HEVClc.gb;
 460     SliceHeader *sh   = &s->sh;
 461     int i, ret;
 462
 463     // Coded parameters
 464     sh->first_slice_in_pic_flag = get_bits1(gb);
 465     if ((IS_IDR(s) || IS_BLA(s)) && sh->first_slice_in_pic_flag) {
 466         s->seq_decode = (s->seq_decode + 1) & 0xff;
 467         s->max_ra     = INT_MAX;
 468         if (IS_IDR(s))
 469             ff_hevc_clear_refs(s);
 470     }
 471     if (s->nal_unit_type >= 16 && s->nal_unit_type <= 23)
 472         sh->no_output_of_prior_pics_flag = get_bits1(gb);
 473
 474     sh->pps_id = get_ue_golomb_long(gb);
 475     if (sh->pps_id >= MAX_PPS_COUNT || !s->pps_list[sh->pps_id]) {
 476         av_log(s->avctx, AV_LOG_ERROR, "PPS id out of range: %d\n", sh->pps_id);
 477         return AVERROR_INVALIDDATA;
 478     }
 479     if (!sh->first_slice_in_pic_flag &&
 480         s->pps != (HEVCPPS*)s->pps_list[sh->pps_id]->data) {
 481         av_log(s->avctx, AV_LOG_ERROR, "PPS changed between slices.\n");
 482         return AVERROR_INVALIDDATA;
 483     }
 484     s->pps = (HEVCPPS*)s->pps_list[sh->pps_id]->data;
 485
 486     if (s->sps != (HEVCSPS*)s->sps_list[s->pps->sps_id]->data) {
 487         s->sps = (HEVCSPS*)s->sps_list[s->pps->sps_id]->data;
 488
 489         ff_hevc_clear_refs(s);
 490         ret = set_sps(s, s->sps);
 491         if (ret < 0)
 492             return ret;
 493
 494         s->seq_decode = (s->seq_decode + 1) & 0xff;
 495         s->max_ra     = INT_MAX;
 496     }
 497
 498     sh->dependent_slice_segment_flag = 0;
 499     if (!sh->first_slice_in_pic_flag) {
 500         int slice_address_length;
 501
 502         if (s->pps->dependent_slice_segments_enabled_flag)
 503             sh->dependent_slice_segment_flag = get_bits1(gb);
 504
 505         slice_address_length = av_ceil_log2(s->sps->ctb_width *
 506                                             s->sps->ctb_height);
 507         sh->slice_segment_addr = get_bits(gb, slice_address_length);
 508         if (sh->slice_segment_addr >= s->sps->ctb_width * s->sps->ctb_height) {
 509             av_log(s->avctx, AV_LOG_ERROR,
 510                    "Invalid slice segment address: %u.\n",
 511                    sh->slice_segment_addr);
 512             return AVERROR_INVALIDDATA;
 513         }
 514
 515         if (!sh->dependent_slice_segment_flag) {
 516             sh->slice_addr = sh->slice_segment_addr;
 517             s->slice_idx++;
 518         }
 519     } else {
 520         sh->slice_segment_addr = sh->slice_addr = 0;
 521         s->slice_idx           = 0;
 522         s->slice_initialized   = 0;
 523     }
 524
 525     if (!sh->dependent_slice_segment_flag) {
 526         s->slice_initialized = 0;
 527
 528         for (i = 0; i < s->pps->num_extra_slice_header_bits; i++)
 529             skip_bits(gb, 1);  // slice_reserved_undetermined_flag[]
 530
 531         sh->slice_type = get_ue_golomb_long(gb);
 532         if (!(sh->slice_type == I_SLICE ||
 533               sh->slice_type == P_SLICE ||
 534               sh->slice_type == B_SLICE)) {
 535             av_log(s->avctx, AV_LOG_ERROR, "Unknown slice type: %d.\n",
 536                    sh->slice_type);
 537             return AVERROR_INVALIDDATA;
 538         }
 539         if (IS_IRAP(s) && sh->slice_type != I_SLICE) {
 540             av_log(s->avctx, AV_LOG_ERROR, "Inter slices in an IRAP frame.\n");
 541             return AVERROR_INVALIDDATA;
 542         }
 543
 544         if (s->pps->output_flag_present_flag)
 545             sh->pic_output_flag = get_bits1(gb);
 546
 547         if (s->sps->separate_colour_plane_flag)
 548             sh->colour_plane_id = get_bits(gb, 2);
 549
 550         if (!IS_IDR(s)) {
 551             int short_term_ref_pic_set_sps_flag, poc;
 552
 553             sh->pic_order_cnt_lsb = get_bits(gb, s->sps->log2_max_poc_lsb);
 554             poc = ff_hevc_compute_poc(s, sh->pic_order_cnt_lsb);
 555             if (!sh->first_slice_in_pic_flag && poc != s->poc) {
 556                 av_log(s->avctx, AV_LOG_WARNING,
 557                        "Ignoring POC change between slices: %d -> %d\n", s->poc, poc);
 558                 if (s->avctx->err_recognition & AV_EF_EXPLODE)
 559                     return AVERROR_INVALIDDATA;
 560                 poc = s->poc;
 561             }
 562             s->poc = poc;
 563
 564             short_term_ref_pic_set_sps_flag = get_bits1(gb);
 565             if (!short_term_ref_pic_set_sps_flag) {
 566                 ret = ff_hevc_decode_short_term_rps(s, &sh->slice_rps, s->sps, 1);
 567                 if (ret < 0)
 568                     return ret;
 569
 570                 sh->short_term_rps = &sh->slice_rps;
 571             } else {
 572                 int numbits, rps_idx;
 573
 574                 if (!s->sps->nb_st_rps) {
 575                     av_log(s->avctx, AV_LOG_ERROR, "No ref lists in the SPS.\n");
 576                     return AVERROR_INVALIDDATA;
 577                 }
 578
 579                 numbits = av_ceil_log2(s->sps->nb_st_rps);
 580                 rps_idx = numbits > 0 ? get_bits(gb, numbits) : 0;
 581                 sh->short_term_rps = &s->sps->st_rps[rps_idx];
 582             }
 583
 584             ret = decode_lt_rps(s, &sh->long_term_rps, gb);
 585             if (ret < 0) {
 586                 av_log(s->avctx, AV_LOG_WARNING, "Invalid long term RPS.\n");
 587                 if (s->avctx->err_recognition & AV_EF_EXPLODE)
 588                     return AVERROR_INVALIDDATA;
 589             }
 590
 591             if (s->sps->sps_temporal_mvp_enabled_flag)
 592                 sh->slice_temporal_mvp_enabled_flag = get_bits1(gb);
 593             else
 594                 sh->slice_temporal_mvp_enabled_flag = 0;
 595         } else {
 596             s->sh.short_term_rps = NULL;
 597             s->poc               = 0;
 598         }
 599
 600         /* 8.3.1 */
 601         if (s->temporal_id == 0 &&
 602             s->nal_unit_type != NAL_TRAIL_N &&
 603             s->nal_unit_type != NAL_TSA_N   &&
 604             s->nal_unit_type != NAL_STSA_N  &&
 605             s->nal_unit_type != NAL_RADL_N  &&
 606             s->nal_unit_type != NAL_RADL_R  &&
 607             s->nal_unit_type != NAL_RASL_N  &&
 608             s->nal_unit_type != NAL_RASL_R)
 609             s->pocTid0 = s->poc;
 610
 611         if (s->sps->sao_enabled) {
 612             sh->slice_sample_adaptive_offset_flag[0] = get_bits1(gb);
 613             sh->slice_sample_adaptive_offset_flag[1] =
 614             sh->slice_sample_adaptive_offset_flag[2] = get_bits1(gb);
 615         } else {
 616             sh->slice_sample_adaptive_offset_flag[0] = 0;
 617             sh->slice_sample_adaptive_offset_flag[1] = 0;
 618             sh->slice_sample_adaptive_offset_flag[2] = 0;
 619         }
 620
 621         sh->nb_refs[L0] = sh->nb_refs[L1] = 0;
 622         if (sh->slice_type == P_SLICE || sh->slice_type == B_SLICE) {
 623             int nb_refs;
 624
 625             sh->nb_refs[L0] = s->pps->num_ref_idx_l0_default_active;
 626             if (sh->slice_type == B_SLICE)
 627                 sh->nb_refs[L1] = s->pps->num_ref_idx_l1_default_active;
 628
 629             if (get_bits1(gb)) { // num_ref_idx_active_override_flag
 630                 sh->nb_refs[L0] = get_ue_golomb_long(gb) + 1;
 631                 if (sh->slice_type == B_SLICE)
 632                     sh->nb_refs[L1] = get_ue_golomb_long(gb) + 1;
 633             }
 634             if (sh->nb_refs[L0] > MAX_REFS || sh->nb_refs[L1] > MAX_REFS) {
 635                 av_log(s->avctx, AV_LOG_ERROR, "Too many refs: %d/%d.\n",
 636                        sh->nb_refs[L0], sh->nb_refs[L1]);
 637                 return AVERROR_INVALIDDATA;
 638             }
 639
 640             sh->rpl_modification_flag[0] = 0;
 641             sh->rpl_modification_flag[1] = 0;
 642             nb_refs = ff_hevc_frame_nb_refs(s);
 643             if (!nb_refs) {
 644                 av_log(s->avctx, AV_LOG_ERROR, "Zero refs for a frame with P or B slices.\n");
 645                 return AVERROR_INVALIDDATA;
 646             }
 647
 648             if (s->pps->lists_modification_present_flag && nb_refs > 1) {
 649                 sh->rpl_modification_flag[0] = get_bits1(gb);
 650                 if (sh->rpl_modification_flag[0]) {
 651                     for (i = 0; i < sh->nb_refs[L0]; i++)
 652                         sh->list_entry_lx[0][i] = get_bits(gb, av_ceil_log2(nb_refs));
 653                 }
 654
 655                 if (sh->slice_type == B_SLICE) {
 656                     sh->rpl_modification_flag[1] = get_bits1(gb);
 657                     if (sh->rpl_modification_flag[1] == 1)
 658                         for (i = 0; i < sh->nb_refs[L1]; i++)
 659                             sh->list_entry_lx[1][i] = get_bits(gb, av_ceil_log2(nb_refs));
 660                 }
 661             }
 662
 663             if (sh->slice_type == B_SLICE)
 664                 sh->mvd_l1_zero_flag = get_bits1(gb);
 665
 666             if (s->pps->cabac_init_present_flag)
 667                 sh->cabac_init_flag = get_bits1(gb);
 668             else
 669                 sh->cabac_init_flag = 0;
 670
 671             sh->collocated_ref_idx = 0;
 672             if (sh->slice_temporal_mvp_enabled_flag) {
 673                 sh->collocated_list = L0;
 674                 if (sh->slice_type == B_SLICE)
 675                     sh->collocated_list = !get_bits1(gb);
 676
 677                 if (sh->nb_refs[sh->collocated_list] > 1) {
 678                     sh->collocated_ref_idx = get_ue_golomb_long(gb);
 679                     if (sh->collocated_ref_idx >= sh->nb_refs[sh->collocated_list]) {
 680                         av_log(s->avctx, AV_LOG_ERROR,
 681                                "Invalid collocated_ref_idx: %d.\n",
 682                                sh->collocated_ref_idx);
 683                         return AVERROR_INVALIDDATA;
 684                     }
 685                 }
 686             }
 687
 688             if ((s->pps->weighted_pred_flag   && sh->slice_type == P_SLICE) ||
 689                 (s->pps->weighted_bipred_flag && sh->slice_type == B_SLICE)) {
 690                 pred_weight_table(s, gb);
 691             }
 692
 693             sh->max_num_merge_cand = 5 - get_ue_golomb_long(gb);
 694             if (sh->max_num_merge_cand < 1 || sh->max_num_merge_cand > 5) {
 695                 av_log(s->avctx, AV_LOG_ERROR,
 696                        "Invalid number of merging MVP candidates: %d.\n",
 697                        sh->max_num_merge_cand);
 698                 return AVERROR_INVALIDDATA;
 699             }
 700         }
 701
 702         sh->slice_qp_delta = get_se_golomb(gb);
 703         if (s->pps->pic_slice_level_chroma_qp_offsets_present_flag) {
 704             sh->slice_cb_qp_offset = get_se_golomb(gb);
 705             sh->slice_cr_qp_offset = get_se_golomb(gb);
 706         } else {
 707             sh->slice_cb_qp_offset = 0;
 708             sh->slice_cr_qp_offset = 0;
 709         }
 710
 711         if (s->pps->deblocking_filter_control_present_flag) {
 712             int deblocking_filter_override_flag = 0;
 713
 714             if (s->pps->deblocking_filter_override_enabled_flag)
 715                 deblocking_filter_override_flag = get_bits1(gb);
 716
 717             if (deblocking_filter_override_flag) {
 718                 sh->disable_deblocking_filter_flag = get_bits1(gb);
 719                 if (!sh->disable_deblocking_filter_flag) {
 720                     sh->beta_offset = get_se_golomb(gb) * 2;
 721                     sh->tc_offset   = get_se_golomb(gb) * 2;
 722                 }
 723             } else {
 724                 sh->disable_deblocking_filter_flag = s->pps->disable_dbf;
 725                 sh->beta_offset                    = s->pps->beta_offset;
 726                 sh->tc_offset                      = s->pps->tc_offset;
 727             }
 728         } else {
 729             sh->disable_deblocking_filter_flag = 0;
 730             sh->beta_offset                    = 0;
 731             sh->tc_offset                      = 0;
 732         }
 733
 734         if (s->pps->seq_loop_filter_across_slices_enabled_flag &&
 735             (sh->slice_sample_adaptive_offset_flag[0] ||
 736              sh->slice_sample_adaptive_offset_flag[1] ||
 737              !sh->disable_deblocking_filter_flag)) {
 738             sh->slice_loop_filter_across_slices_enabled_flag = get_bits1(gb);
 739         } else {
 740             sh->slice_loop_filter_across_slices_enabled_flag = s->pps->seq_loop_filter_across_slices_enabled_flag;
 741         }
 742     } else if (!s->slice_initialized) {
 743         av_log(s->avctx, AV_LOG_ERROR, "Independent slice segment missing.\n");
 744         return AVERROR_INVALIDDATA;
 745     }
 746
 747     sh->num_entry_point_offsets = 0;
 748     if (s->pps->tiles_enabled_flag || s->pps->entropy_coding_sync_enabled_flag) {
 749         sh->num_entry_point_offsets = get_ue_golomb_long(gb);
 750         if (sh->num_entry_point_offsets > 0) {
 751             int offset_len = get_ue_golomb_long(gb) + 1;
 752
 753             for (i = 0; i < sh->num_entry_point_offsets; i++)
 754                 skip_bits(gb, offset_len);
 755         }
 756     }
 757
 758     if (s->pps->slice_header_extension_present_flag) {
 759         int length = get_ue_golomb_long(gb);
 760         for (i = 0; i < length; i++)
 761             skip_bits(gb, 8);  // slice_header_extension_data_byte
 762     }
 763
 764     // Inferred parameters
 765     sh->slice_qp          = 26 + s->pps->pic_init_qp_minus26 + sh->slice_qp_delta;
 766     sh->slice_ctb_addr_rs = sh->slice_segment_addr;
 767
 768     s->HEVClc.first_qp_group = !s->sh.dependent_slice_segment_flag;
 769
 770     if (!s->pps->cu_qp_delta_enabled_flag)
 771         s->HEVClc.qp_y = ((s->sh.slice_qp + 52 + 2 * s->sps->qp_bd_offset) %
 772                           (52 + s->sps->qp_bd_offset)) - s->sps->qp_bd_offset;
 773
 774     s->slice_initialized = 1;
 775
 776     return 0;
 777 }
 778
 779 #define CTB(tab, x, y) ((tab)[(y) * s->sps->ctb_width + (x)])
 780
 781 #define SET_SAO(elem, value)                            \
 782 do {                                                    \
 783     if (!sao_merge_up_flag && !sao_merge_left_flag)     \
 784         sao->elem = value;                              \
 785     else if (sao_merge_left_flag)                       \
 786         sao->elem = CTB(s->sao, rx-1, ry).elem;         \
 787     else if (sao_merge_up_flag)                         \
 788         sao->elem = CTB(s->sao, rx, ry-1).elem;         \
 789     else                                                \
 790         sao->elem = 0;                                  \
 791 } while (0)
 792
 793 static void hls_sao_param(HEVCContext *s, int rx, int ry)
 794 {
 795     HEVCLocalContext *lc    = &s->HEVClc;
 796     int sao_merge_left_flag = 0;
 797     int sao_merge_up_flag   = 0;
 798     int shift               = s->sps->bit_depth - FFMIN(s->sps->bit_depth, 10);
 799     SAOParams *sao          = &CTB(s->sao, rx, ry);
 800     int c_idx, i;
 801
 802     if (s->sh.slice_sample_adaptive_offset_flag[0] ||
 803         s->sh.slice_sample_adaptive_offset_flag[1]) {
 804         if (rx > 0) {
 805             if (lc->ctb_left_flag)
 806                 sao_merge_left_flag = ff_hevc_sao_merge_flag_decode(s);
 807         }
 808         if (ry > 0 && !sao_merge_left_flag) {
 809             if (lc->ctb_up_flag)
 810                 sao_merge_up_flag = ff_hevc_sao_merge_flag_decode(s);
 811         }
 812     }
 813
 814     for (c_idx = 0; c_idx < 3; c_idx++) {
 815         if (!s->sh.slice_sample_adaptive_offset_flag[c_idx]) {
 816             sao->type_idx[c_idx] = SAO_NOT_APPLIED;
 817             continue;
 818         }
 819
 820         if (c_idx == 2) {
 821             sao->type_idx[2] = sao->type_idx[1];
 822             sao->eo_class[2] = sao->eo_class[1];
 823         } else {
 824             SET_SAO(type_idx[c_idx], ff_hevc_sao_type_idx_decode(s));
 825         }
 826
 827         if (sao->type_idx[c_idx] == SAO_NOT_APPLIED)
 828             continue;
 829
 830         for (i = 0; i < 4; i++)
 831             SET_SAO(offset_abs[c_idx][i], ff_hevc_sao_offset_abs_decode(s));
 832
 833         if (sao->type_idx[c_idx] == SAO_BAND) {
 834             for (i = 0; i < 4; i++) {
 835                 if (sao->offset_abs[c_idx][i]) {
 836                     SET_SAO(offset_sign[c_idx][i],
 837                             ff_hevc_sao_offset_sign_decode(s));
 838                 } else {
 839                     sao->offset_sign[c_idx][i] = 0;
 840                 }
 841             }
 842             SET_SAO(band_position[c_idx], ff_hevc_sao_band_position_decode(s));
 843         } else if (c_idx != 2) {
 844             SET_SAO(eo_class[c_idx], ff_hevc_sao_eo_class_decode(s));
 845         }
 846
 847         // Inferred parameters
 848         sao->offset_val[c_idx][0] = 0;
 849         for (i = 0; i < 4; i++) {
 850             sao->offset_val[c_idx][i + 1] = sao->offset_abs[c_idx][i] << shift;
 851             if (sao->type_idx[c_idx] == SAO_EDGE) {
 852                 if (i > 1)
 853                     sao->offset_val[c_idx][i + 1] = -sao->offset_val[c_idx][i + 1];
 854             } else if (sao->offset_sign[c_idx][i]) {
 855                 sao->offset_val[c_idx][i + 1] = -sao->offset_val[c_idx][i + 1];
 856             }
 857         }
 858     }
 859 }
 860
 861 #undef SET_SAO
 862 #undef CTB
 863
 864 static void hls_residual_coding(HEVCContext *s, int x0, int y0,
 865                                 int log2_trafo_size, enum ScanType scan_idx,
 866                                 int c_idx)
 867 {
 868 #define GET_COORD(offset, n)                                    \
 869     do {                                                        \
 870         x_c = (scan_x_cg[offset >> 4] << 2) + scan_x_off[n];    \
 871         y_c = (scan_y_cg[offset >> 4] << 2) + scan_y_off[n];    \
 872     } while (0)
 873     HEVCLocalContext *lc    = &s->HEVClc;
 874     int transform_skip_flag = 0;
 875
 876     int last_significant_coeff_x, last_significant_coeff_y;
 877     int last_scan_pos;
 878     int n_end;
 879     int num_coeff    = 0;
 880     int greater1_ctx = 1;
 881
 882     int num_last_subset;
 883     int x_cg_last_sig, y_cg_last_sig;
 884
 885     const uint8_t *scan_x_cg, *scan_y_cg, *scan_x_off, *scan_y_off;
 886
 887     ptrdiff_t stride = s->frame->linesize[c_idx];
 888     int hshift       = s->sps->hshift[c_idx];
 889     int vshift       = s->sps->vshift[c_idx];
 890     uint8_t *dst     = &s->frame->data[c_idx][(y0 >> vshift) * stride +
 891                                               ((x0 >> hshift) << s->sps->pixel_shift)];
 892     DECLARE_ALIGNED(16, int16_t, coeffs[MAX_TB_SIZE * MAX_TB_SIZE]) = { 0 };
 893     DECLARE_ALIGNED(8, uint8_t, significant_coeff_group_flag[8][8]) = { { 0 } };
 894
 895     int trafo_size = 1 << log2_trafo_size;
 896     int i, qp, shift, add, scale, scale_m;
 897     const uint8_t level_scale[] = { 40, 45, 51, 57, 64, 72 };
 898     const uint8_t *scale_matrix;
 899     uint8_t dc_scale;
 900
 901     // Derive QP for dequant
 902     if (!lc->cu.cu_transquant_bypass_flag) {
 903         static const int qp_c[] = {
 904             29, 30, 31, 32, 33, 33, 34, 34, 35, 35, 36, 36, 37, 37
 905         };
 906
 907         static const uint8_t rem6[51 + 2 * 6 + 1] = {
 908             0, 1, 2, 3, 4, 5, 0, 1, 2, 3, 4, 5, 0, 1, 2, 3, 4, 5, 0, 1, 2,
 909             3, 4, 5, 0, 1, 2, 3, 4, 5, 0, 1, 2, 3, 4, 5, 0, 1, 2, 3, 4, 5,
 910             0, 1, 2, 3, 4, 5, 0, 1, 2, 3, 4, 5, 0, 1, 2, 3, 4, 5, 0, 1, 2, 3,
 911         };
 912
 913         static const uint8_t div6[51 + 2 * 6 + 1] = {
 914             0, 0, 0, 0, 0, 0, 1, 1, 1, 1, 1, 1, 2, 2, 2, 2, 2, 2,  3,  3,  3,
 915             3, 3, 3, 4, 4, 4, 4, 4, 4, 5, 5, 5, 5, 5, 5, 6, 6, 6,  6,  6,  6,
 916             7, 7, 7, 7, 7, 7, 8, 8, 8, 8, 8, 8, 9, 9, 9, 9, 9, 9, 10, 10, 10, 10,
 917         };
 918         int qp_y = lc->qp_y;
 919
 920         if (c_idx == 0) {
 921             qp = qp_y + s->sps->qp_bd_offset;
 922         } else {
 923             int qp_i, offset;
 924
 925             if (c_idx == 1)
 926                 offset = s->pps->cb_qp_offset + s->sh.slice_cb_qp_offset;
 927             else
 928                 offset = s->pps->cr_qp_offset + s->sh.slice_cr_qp_offset;
 929
 930             qp_i = av_clip_c(qp_y + offset, -s->sps->qp_bd_offset, 57);
 931             if (qp_i < 30)
 932                 qp = qp_i;
 933             else if (qp_i > 43)
 934                 qp = qp_i - 6;
 935             else
 936                 qp = qp_c[qp_i - 30];
 937
 938             qp += s->sps->qp_bd_offset;
 939         }
 940
 941         shift    = s->sps->bit_depth + log2_trafo_size - 5;
 942         add      = 1 << (shift - 1);
 943         scale    = level_scale[rem6[qp]] << (div6[qp]);
 944         scale_m  = 16; // default when no custom scaling lists.
 945         dc_scale = 16;
 946
 947         if (s->sps->scaling_list_enable_flag) {
 948             const ScalingList *sl = s->pps->scaling_list_data_present_flag ?
 949                                     &s->pps->scaling_list : &s->sps->scaling_list;
 950             int matrix_id = lc->cu.pred_mode != MODE_INTRA;
 951
 952             if (log2_trafo_size != 5)
 953                 matrix_id = 3 * matrix_id + c_idx;
 954
 955             scale_matrix = sl->sl[log2_trafo_size - 2][matrix_id];
 956             if (log2_trafo_size >= 4)
 957                 dc_scale = sl->sl_dc[log2_trafo_size - 4][matrix_id];
 958         }
 959     }
 960
 961     if (s->pps->transform_skip_enabled_flag &&
 962         !lc->cu.cu_transquant_bypass_flag   &&
 963         log2_trafo_size == 2) {
 964         transform_skip_flag = ff_hevc_transform_skip_flag_decode(s, c_idx);
 965     }
 966
 967     last_significant_coeff_x =
 968         ff_hevc_last_significant_coeff_x_prefix_decode(s, c_idx, log2_trafo_size);
 969     last_significant_coeff_y =
 970         ff_hevc_last_significant_coeff_y_prefix_decode(s, c_idx, log2_trafo_size);
 971
 972     if (last_significant_coeff_x > 3) {
 973         int suffix = ff_hevc_last_significant_coeff_suffix_decode(s, last_significant_coeff_x);
 974         last_significant_coeff_x = (1 << ((last_significant_coeff_x >> 1) - 1)) *
 975                                    (2 + (last_significant_coeff_x & 1)) +
 976                                    suffix;
 977     }
 978
 979     if (last_significant_coeff_y > 3) {
 980         int suffix = ff_hevc_last_significant_coeff_suffix_decode(s, last_significant_coeff_y);
 981         last_significant_coeff_y = (1 << ((last_significant_coeff_y >> 1) - 1)) *
 982                                    (2 + (last_significant_coeff_y & 1)) +
 983                                    suffix;
 984     }
 985
 986     if (scan_idx == SCAN_VERT)
 987         FFSWAP(int, last_significant_coeff_x, last_significant_coeff_y);
 988
 989     x_cg_last_sig = last_significant_coeff_x >> 2;
 990     y_cg_last_sig = last_significant_coeff_y >> 2;
 991
 992     switch (scan_idx) {
 993     case SCAN_DIAG: {
 994         int last_x_c = last_significant_coeff_x & 3;
 995         int last_y_c = last_significant_coeff_y & 3;
 996
 997         scan_x_off = ff_hevc_diag_scan4x4_x;
 998         scan_y_off = ff_hevc_diag_scan4x4_y;
 999         num_coeff  = diag_scan4x4_inv[last_y_c][last_x_c];
1000         if (trafo_size == 4) {
1001             scan_x_cg = scan_1x1;
1002             scan_y_cg = scan_1x1;
1003         } else if (trafo_size == 8) {
1004             num_coeff += diag_scan2x2_inv[y_cg_last_sig][x_cg_last_sig] << 4;
1005             scan_x_cg  = diag_scan2x2_x;
1006             scan_y_cg  = diag_scan2x2_y;
1007         } else if (trafo_size == 16) {
1008             num_coeff += diag_scan4x4_inv[y_cg_last_sig][x_cg_last_sig] << 4;
1009             scan_x_cg  = ff_hevc_diag_scan4x4_x;
1010             scan_y_cg  = ff_hevc_diag_scan4x4_y;
1011         } else { // trafo_size == 32
1012             num_coeff += diag_scan8x8_inv[y_cg_last_sig][x_cg_last_sig] << 4;
1013             scan_x_cg  = ff_hevc_diag_scan8x8_x;
1014             scan_y_cg  = ff_hevc_diag_scan8x8_y;
1015         }
1016         break;
1017     }
1018     case SCAN_HORIZ:
1019         scan_x_cg  = horiz_scan2x2_x;
1020         scan_y_cg  = horiz_scan2x2_y;
1021         scan_x_off = horiz_scan4x4_x;
1022         scan_y_off = horiz_scan4x4_y;
1023         num_coeff  = horiz_scan8x8_inv[last_significant_coeff_y][last_significant_coeff_x];
1024         break;
1025     default: //SCAN_VERT
1026         scan_x_cg  = horiz_scan2x2_y;
1027         scan_y_cg  = horiz_scan2x2_x;
1028         scan_x_off = horiz_scan4x4_y;
1029         scan_y_off = horiz_scan4x4_x;
1030         num_coeff  = horiz_scan8x8_inv[last_significant_coeff_x][last_significant_coeff_y];
1031         break;
1032     }
1033     num_coeff++;
1034     num_last_subset = (num_coeff - 1) >> 4;
1035
1036     for (i = num_last_subset; i >= 0; i--) {
1037         int n, m;
1038         int x_cg, y_cg, x_c, y_c;
1039         int implicit_non_zero_coeff = 0;
1040         int64_t trans_coeff_level;
1041         int prev_sig = 0;
1042         int offset   = i << 4;
1043
1044         uint8_t significant_coeff_flag_idx[16];
1045         uint8_t nb_significant_coeff_flag = 0;
1046
1047         x_cg = scan_x_cg[i];
1048         y_cg = scan_y_cg[i];
1049
1050         if (i < num_last_subset && i > 0) {
1051             int ctx_cg = 0;
1052             if (x_cg < (1 << (log2_trafo_size - 2)) - 1)
1053                 ctx_cg += significant_coeff_group_flag[x_cg + 1][y_cg];
1054             if (y_cg < (1 << (log2_trafo_size - 2)) - 1)
1055                 ctx_cg += significant_coeff_group_flag[x_cg][y_cg + 1];
1056
1057             significant_coeff_group_flag[x_cg][y_cg] =
1058                 ff_hevc_significant_coeff_group_flag_decode(s, c_idx, ctx_cg);
1059             implicit_non_zero_coeff = 1;
1060         } else {
1061             significant_coeff_group_flag[x_cg][y_cg] =
1062                 ((x_cg == x_cg_last_sig && y_cg == y_cg_last_sig) ||
1063                  (x_cg == 0 && y_cg == 0));
1064         }
1065
1066         last_scan_pos = num_coeff - offset - 1;
1067
1068         if (i == num_last_subset) {
1069             n_end                         = last_scan_pos - 1;
1070             significant_coeff_flag_idx[0] = last_scan_pos;
1071             nb_significant_coeff_flag     = 1;
1072         } else {
1073             n_end = 15;
1074         }
1075
1076         if (x_cg < ((1 << log2_trafo_size) - 1) >> 2)
1077             prev_sig = significant_coeff_group_flag[x_cg + 1][y_cg];
1078         if (y_cg < ((1 << log2_trafo_size) - 1) >> 2)
1079             prev_sig += significant_coeff_group_flag[x_cg][y_cg + 1] << 1;
1080
1081         for (n = n_end; n >= 0; n--) {
1082             GET_COORD(offset, n);
1083
1084             if (significant_coeff_group_flag[x_cg][y_cg] &&
1085                 (n > 0 || implicit_non_zero_coeff == 0)) {
1086                 if (ff_hevc_significant_coeff_flag_decode(s, c_idx, x_c, y_c,
1087                                                           log2_trafo_size,
1088                                                           scan_idx,
1089                                                           prev_sig) == 1) {
1090                     significant_coeff_flag_idx[nb_significant_coeff_flag] = n;
1091                     nb_significant_coeff_flag++;
1092                     implicit_non_zero_coeff = 0;
1093                 }
1094             } else {
1095                 int last_cg = (x_c == (x_cg << 2) && y_c == (y_cg << 2));
1096                 if (last_cg && implicit_non_zero_coeff && significant_coeff_group_flag[x_cg][y_cg]) {
1097                     significant_coeff_flag_idx[nb_significant_coeff_flag] = n;
1098                     nb_significant_coeff_flag++;
1099                 }
1100             }
1101         }
1102
1103         n_end = nb_significant_coeff_flag;
1104
1105         if (n_end) {
1106             int first_nz_pos_in_cg = 16;
1107             int last_nz_pos_in_cg = -1;
1108             int c_rice_param = 0;
1109             int first_greater1_coeff_idx = -1;
1110             uint8_t coeff_abs_level_greater1_flag[16] = { 0 };
1111             uint16_t coeff_sign_flag;
1112             int sum_abs = 0;
1113             int sign_hidden = 0;
1114
1115             // initialize first elem of coeff_bas_level_greater1_flag
1116             int ctx_set = (i > 0 && c_idx == 0) ? 2 : 0;
1117
1118             if (!(i == num_last_subset) && greater1_ctx == 0)
1119                 ctx_set++;
1120             greater1_ctx      = 1;
1121             last_nz_pos_in_cg = significant_coeff_flag_idx[0];
1122
1123             for (m = 0; m < (n_end > 8 ? 8 : n_end); m++) {
1124                 int n_idx = significant_coeff_flag_idx[m];
1125                 int inc   = (ctx_set << 2) + greater1_ctx;
1126                 coeff_abs_level_greater1_flag[n_idx] =
1127                     ff_hevc_coeff_abs_level_greater1_flag_decode(s, c_idx, inc);
1128                 if (coeff_abs_level_greater1_flag[n_idx]) {
1129                     greater1_ctx = 0;
1130                 } else if (greater1_ctx > 0 && greater1_ctx < 3) {
1131                     greater1_ctx++;
1132                 }
1133
1134                 if (coeff_abs_level_greater1_flag[n_idx] &&
1135                     first_greater1_coeff_idx == -1)
1136                     first_greater1_coeff_idx = n_idx;
1137             }
1138             first_nz_pos_in_cg = significant_coeff_flag_idx[n_end - 1];
1139             sign_hidden        = last_nz_pos_in_cg - first_nz_pos_in_cg >= 4 &&
1140                                  !lc->cu.cu_transquant_bypass_flag;
1141
1142             if (first_greater1_coeff_idx != -1) {
1143                 coeff_abs_level_greater1_flag[first_greater1_coeff_idx] += ff_hevc_coeff_abs_level_greater2_flag_decode(s, c_idx, ctx_set);
1144             }
1145             if (!s->pps->sign_data_hiding_flag || !sign_hidden) {
1146                 coeff_sign_flag = ff_hevc_coeff_sign_flag(s, nb_significant_coeff_flag) << (16 - nb_significant_coeff_flag);
1147             } else {
1148                 coeff_sign_flag = ff_hevc_coeff_sign_flag(s, nb_significant_coeff_flag - 1) << (16 - (nb_significant_coeff_flag - 1));
1149             }
1150
1151             for (m = 0; m < n_end; m++) {
1152                 n = significant_coeff_flag_idx[m];
1153                 GET_COORD(offset, n);
1154                 trans_coeff_level = 1 + coeff_abs_level_greater1_flag[n];
1155                 if (trans_coeff_level == ((m < 8) ?
1156                                           ((n == first_greater1_coeff_idx) ? 3 : 2) : 1)) {
1157                     int last_coeff_abs_level_remaining = ff_hevc_coeff_abs_level_remaining(s, trans_coeff_level, c_rice_param);
1158
1159                     trans_coeff_level += last_coeff_abs_level_remaining;
1160                     if ((trans_coeff_level) > (3 * (1 << c_rice_param)))
1161                         c_rice_param = FFMIN(c_rice_param + 1, 4);
1162                 }
1163                 if (s->pps->sign_data_hiding_flag && sign_hidden) {
1164                     sum_abs += trans_coeff_level;
1165                     if (n == first_nz_pos_in_cg && ((sum_abs & 1) == 1))
1166                         trans_coeff_level = -trans_coeff_level;
1167                 }
1168                 if (coeff_sign_flag >> 15)
1169                     trans_coeff_level = -trans_coeff_level;
1170                 coeff_sign_flag <<= 1;
1171                 if (!lc->cu.cu_transquant_bypass_flag) {
1172                     if (s->sps->scaling_list_enable_flag) {
1173                         if (y_c || x_c || log2_trafo_size < 4) {
1174                             int pos;
1175                             switch (log2_trafo_size) {
1176                             case 3:  pos = (y_c        << 3) +  x_c;       break;
1177                             case 4:  pos = ((y_c >> 1) << 3) + (x_c >> 1); break;
1178                             case 5:  pos = ((y_c >> 2) << 3) + (x_c >> 2); break;
1179                             default: pos = (y_c        << 2) +  x_c;
1180                             }
1181                             scale_m = scale_matrix[pos];
1182                         } else {
1183                             scale_m = dc_scale;
1184                         }
1185                     }
1186                     trans_coeff_level = (trans_coeff_level * (int64_t)scale * (int64_t)scale_m + add) >> shift;
1187                     if(trans_coeff_level < 0) {
1188                         if((~trans_coeff_level) & 0xFffffffffff8000)
1189                             trans_coeff_level = -32768;
1190                     } else {
1191                         if (trans_coeff_level & 0xffffffffffff8000)
1192                             trans_coeff_level = 32767;
1193                     }
1194                 }
1195                 coeffs[y_c * trafo_size + x_c] = trans_coeff_level;
1196             }
1197         }
1198     }
1199
1200     if (lc->cu.cu_transquant_bypass_flag) {
1201         s->hevcdsp.transquant_bypass[log2_trafo_size - 2](dst, coeffs, stride);
1202     } else {
1203         if (transform_skip_flag)
1204             s->hevcdsp.transform_skip(dst, coeffs, stride);
1205         else if (lc->cu.pred_mode == MODE_INTRA && c_idx == 0 &&
1206                  log2_trafo_size == 2)
1207             s->hevcdsp.transform_4x4_luma_add(dst, coeffs, stride);
1208         else
1209             s->hevcdsp.transform_add[log2_trafo_size - 2](dst, coeffs, stride);
1210     }
1211 }
1212
1213 static void hls_transform_unit(HEVCContext *s, int x0, int y0,
1214                                int xBase, int yBase, int cb_xBase, int cb_yBase,
1215                                int log2_cb_size, int log2_trafo_size,
1216                                int trafo_depth, int blk_idx)
1217 {
1218     HEVCLocalContext *lc = &s->HEVClc;
1219
1220     if (lc->cu.pred_mode == MODE_INTRA) {
1221         int trafo_size = 1 << log2_trafo_size;
1222         ff_hevc_set_neighbour_available(s, x0, y0, trafo_size, trafo_size);
1223
1224         s->hpc.intra_pred(s, x0, y0, log2_trafo_size, 0);
1225         if (log2_trafo_size > 2) {
1226             trafo_size = trafo_size << (s->sps->hshift[1] - 1);
1227             ff_hevc_set_neighbour_available(s, x0, y0, trafo_size, trafo_size);
1228             s->hpc.intra_pred(s, x0, y0, log2_trafo_size - 1, 1);
1229             s->hpc.intra_pred(s, x0, y0, log2_trafo_size - 1, 2);
1230         } else if (blk_idx == 3) {
1231             trafo_size = trafo_size << s->sps->hshift[1];
1232             ff_hevc_set_neighbour_available(s, xBase, yBase,
1233                                             trafo_size, trafo_size);
1234             s->hpc.intra_pred(s, xBase, yBase, log2_trafo_size, 1);
1235             s->hpc.intra_pred(s, xBase, yBase, log2_trafo_size, 2);
1236         }
1237     }
1238
1239     if (lc->tt.cbf_luma ||
1240         SAMPLE_CBF(lc->tt.cbf_cb[trafo_depth], x0, y0) ||
1241         SAMPLE_CBF(lc->tt.cbf_cr[trafo_depth], x0, y0)) {
1242         int scan_idx   = SCAN_DIAG;
1243         int scan_idx_c = SCAN_DIAG;
1244
1245         if (s->pps->cu_qp_delta_enabled_flag && !lc->tu.is_cu_qp_delta_coded) {
1246             lc->tu.cu_qp_delta = ff_hevc_cu_qp_delta_abs(s);
1247             if (lc->tu.cu_qp_delta != 0)
1248                 if (ff_hevc_cu_qp_delta_sign_flag(s) == 1)
1249                     lc->tu.cu_qp_delta = -lc->tu.cu_qp_delta;
1250             lc->tu.is_cu_qp_delta_coded = 1;
1251             ff_hevc_set_qPy(s, x0, y0, cb_xBase, cb_yBase, log2_cb_size);
1252         }
1253
1254         if (lc->cu.pred_mode == MODE_INTRA && log2_trafo_size < 4) {
1255             if (lc->tu.cur_intra_pred_mode >= 6 &&
1256                 lc->tu.cur_intra_pred_mode <= 14) {
1257                 scan_idx = SCAN_VERT;
1258             } else if (lc->tu.cur_intra_pred_mode >= 22 &&
1259                        lc->tu.cur_intra_pred_mode <= 30) {
1260                 scan_idx = SCAN_HORIZ;
1261             }
1262
1263             if (lc->pu.intra_pred_mode_c >=  6 &&
1264                 lc->pu.intra_pred_mode_c <= 14) {
1265                 scan_idx_c = SCAN_VERT;
1266             } else if (lc->pu.intra_pred_mode_c >= 22 &&
1267                        lc->pu.intra_pred_mode_c <= 30) {
1268                 scan_idx_c = SCAN_HORIZ;
1269             }
1270         }
1271
1272         if (lc->tt.cbf_luma)
1273             hls_residual_coding(s, x0, y0, log2_trafo_size, scan_idx, 0);
1274         if (log2_trafo_size > 2) {
1275             if (SAMPLE_CBF(lc->tt.cbf_cb[trafo_depth], x0, y0))
1276                 hls_residual_coding(s, x0, y0, log2_trafo_size - 1, scan_idx_c, 1);
1277             if (SAMPLE_CBF(lc->tt.cbf_cr[trafo_depth], x0, y0))
1278                 hls_residual_coding(s, x0, y0, log2_trafo_size - 1, scan_idx_c, 2);
1279         } else if (blk_idx == 3) {
1280             if (SAMPLE_CBF(lc->tt.cbf_cb[trafo_depth], xBase, yBase))
1281                 hls_residual_coding(s, xBase, yBase, log2_trafo_size, scan_idx_c, 1);
1282             if (SAMPLE_CBF(lc->tt.cbf_cr[trafo_depth], xBase, yBase))
1283                 hls_residual_coding(s, xBase, yBase, log2_trafo_size, scan_idx_c, 2);
1284         }
1285     }
1286 }
1287
1288 static void set_deblocking_bypass(HEVCContext *s, int x0, int y0, int log2_cb_size)
1289 {
1290     int cb_size          = 1 << log2_cb_size;
1291     int log2_min_pu_size = s->sps->log2_min_pu_size;
1292
1293     int min_pu_width     = s->sps->min_pu_width;
1294     int x_end = FFMIN(x0 + cb_size, s->sps->width);
1295     int y_end = FFMIN(y0 + cb_size, s->sps->height);
1296     int i, j;
1297
1298     for (j = (y0 >> log2_min_pu_size); j < (y_end >> log2_min_pu_size); j++)
1299         for (i = (x0 >> log2_min_pu_size); i < (x_end >> log2_min_pu_size); i++)
1300             s->is_pcm[i + j * min_pu_width] = 2;
1301 }
1302
1303 static void hls_transform_tree(HEVCContext *s, int x0, int y0,
1304                                int xBase, int yBase, int cb_xBase, int cb_yBase,
1305                                int log2_cb_size, int log2_trafo_size,
1306                                int trafo_depth, int blk_idx)
1307 {
1308     HEVCLocalContext *lc = &s->HEVClc;
1309     uint8_t split_transform_flag;
1310
1311     if (trafo_depth > 0 && log2_trafo_size == 2) {
1312         SAMPLE_CBF(lc->tt.cbf_cb[trafo_depth], x0, y0) =
1313             SAMPLE_CBF(lc->tt.cbf_cb[trafo_depth - 1], xBase, yBase);
1314         SAMPLE_CBF(lc->tt.cbf_cr[trafo_depth], x0, y0) =
1315             SAMPLE_CBF(lc->tt.cbf_cr[trafo_depth - 1], xBase, yBase);
1316     } else {
1317         SAMPLE_CBF(lc->tt.cbf_cb[trafo_depth], x0, y0) =
1318         SAMPLE_CBF(lc->tt.cbf_cr[trafo_depth], x0, y0) = 0;
1319     }
1320
1321     if (lc->cu.intra_split_flag) {
1322         if (trafo_depth == 1)
1323             lc->tu.cur_intra_pred_mode = lc->pu.intra_pred_mode[blk_idx];
1324     } else {
1325         lc->tu.cur_intra_pred_mode = lc->pu.intra_pred_mode[0];
1326     }
1327
1328     lc->tt.cbf_luma = 1;
1329
1330     lc->tt.inter_split_flag = s->sps->max_transform_hierarchy_depth_inter == 0 &&
1331                               lc->cu.pred_mode == MODE_INTER &&
1332                               lc->cu.part_mode != PART_2Nx2N &&
1333                               trafo_depth == 0;
1334
1335     if (log2_trafo_size <= s->sps->log2_max_trafo_size &&
1336         log2_trafo_size >  s->sps->log2_min_tb_size    &&
1337         trafo_depth     < lc->cu.max_trafo_depth       &&
1338         !(lc->cu.intra_split_flag && trafo_depth == 0)) {
1339         split_transform_flag = ff_hevc_split_transform_flag_decode(s, log2_trafo_size);
1340     } else {
1341         split_transform_flag = log2_trafo_size > s->sps->log2_max_trafo_size ||
1342                                (lc->cu.intra_split_flag && trafo_depth == 0) ||
1343                                lc->tt.inter_split_flag;
1344     }
1345
1346     if (log2_trafo_size > 2) {
1347         if (trafo_depth == 0 ||
1348             SAMPLE_CBF(lc->tt.cbf_cb[trafo_depth - 1], xBase, yBase)) {
1349             SAMPLE_CBF(lc->tt.cbf_cb[trafo_depth], x0, y0) =
1350                 ff_hevc_cbf_cb_cr_decode(s, trafo_depth);
1351         }
1352
1353         if (trafo_depth == 0 ||
1354             SAMPLE_CBF(lc->tt.cbf_cr[trafo_depth - 1], xBase, yBase)) {
1355             SAMPLE_CBF(lc->tt.cbf_cr[trafo_depth], x0, y0) =
1356                 ff_hevc_cbf_cb_cr_decode(s, trafo_depth);
1357         }
1358     }
1359
1360     if (split_transform_flag) {
1361         int x1 = x0 + ((1 << log2_trafo_size) >> 1);
1362         int y1 = y0 + ((1 << log2_trafo_size) >> 1);
1363
1364         hls_transform_tree(s, x0, y0, x0, y0, cb_xBase, cb_yBase, log2_cb_size,
1365                            log2_trafo_size - 1, trafo_depth + 1, 0);
1366         hls_transform_tree(s, x1, y0, x0, y0, cb_xBase, cb_yBase, log2_cb_size,
1367                            log2_trafo_size - 1, trafo_depth + 1, 1);
1368         hls_transform_tree(s, x0, y1, x0, y0, cb_xBase, cb_yBase, log2_cb_size,
1369                            log2_trafo_size - 1, trafo_depth + 1, 2);
1370         hls_transform_tree(s, x1, y1, x0, y0, cb_xBase, cb_yBase, log2_cb_size,
1371                            log2_trafo_size - 1, trafo_depth + 1, 3);
1372     } else {
1373         int min_tu_size      = 1 << s->sps->log2_min_tb_size;
1374         int log2_min_tu_size = s->sps->log2_min_tb_size;
1375         int min_tu_width     = s->sps->min_tb_width;
1376
1377         if (lc->cu.pred_mode == MODE_INTRA || trafo_depth != 0 ||
1378             SAMPLE_CBF(lc->tt.cbf_cb[trafo_depth], x0, y0) ||
1379             SAMPLE_CBF(lc->tt.cbf_cr[trafo_depth], x0, y0)) {
1380             lc->tt.cbf_luma = ff_hevc_cbf_luma_decode(s, trafo_depth);
1381         }
1382
1383         hls_transform_unit(s, x0, y0, xBase, yBase, cb_xBase, cb_yBase,
1384                            log2_cb_size, log2_trafo_size, trafo_depth, blk_idx);
1385
1386         // TODO: store cbf_luma somewhere else
1387         if (lc->tt.cbf_luma) {
1388             int i, j;
1389             for (i = 0; i < (1 << log2_trafo_size); i += min_tu_size)
1390                 for (j = 0; j < (1 << log2_trafo_size); j += min_tu_size) {
1391                     int x_tu = (x0 + j) >> log2_min_tu_size;
1392                     int y_tu = (y0 + i) >> log2_min_tu_size;
1393                     s->cbf_luma[y_tu * min_tu_width + x_tu] = 1;
1394                 }
1395         }
1396         if (!s->sh.disable_deblocking_filter_flag) {
1397             ff_hevc_deblocking_boundary_strengths(s, x0, y0, log2_trafo_size,
1398                                                   lc->slice_or_tiles_up_boundary,
1399                                                   lc->slice_or_tiles_left_boundary);
1400             if (s->pps->transquant_bypass_enable_flag &&
1401                 lc->cu.cu_transquant_bypass_flag)
1402                 set_deblocking_bypass(s, x0, y0, log2_trafo_size);
1403         }
1404     }
1405 }
1406
1407 static int hls_pcm_sample(HEVCContext *s, int x0, int y0, int log2_cb_size)
1408 {
1409     //TODO: non-4:2:0 support
1410     HEVCLocalContext *lc = &s->HEVClc;
1411     GetBitContext gb;
1412     int cb_size   = 1 << log2_cb_size;
1413     int stride0   = s->frame->linesize[0];
1414     uint8_t *dst0 = &s->frame->data[0][y0 * stride0 + (x0 << s->sps->pixel_shift)];
1415     int   stride1 = s->frame->linesize[1];
1416     uint8_t *dst1 = &s->frame->data[1][(y0 >> s->sps->vshift[1]) * stride1 + ((x0 >> s->sps->hshift[1]) << s->sps->pixel_shift)];
1417     int   stride2 = s->frame->linesize[2];
1418     uint8_t *dst2 = &s->frame->data[2][(y0 >> s->sps->vshift[2]) * stride2 + ((x0 >> s->sps->hshift[2]) << s->sps->pixel_shift)];
1419
1420     int length         = cb_size * cb_size * s->sps->pcm.bit_depth + ((cb_size * cb_size) >> 1) * s->sps->pcm.bit_depth;
1421     const uint8_t *pcm = skip_bytes(&s->HEVClc.cc, (length + 7) >> 3);
1422     int ret;
1423
1424     ff_hevc_deblocking_boundary_strengths(s, x0, y0, log2_cb_size,
1425                                           lc->slice_or_tiles_up_boundary,
1426                                           lc->slice_or_tiles_left_boundary);
1427
1428     ret = init_get_bits(&gb, pcm, length);
1429     if (ret < 0)
1430         return ret;
1431
1432     s->hevcdsp.put_pcm(dst0, stride0, cb_size,     &gb, s->sps->pcm.bit_depth);
1433     s->hevcdsp.put_pcm(dst1, stride1, cb_size / 2, &gb, s->sps->pcm.bit_depth);
1434     s->hevcdsp.put_pcm(dst2, stride2, cb_size / 2, &gb, s->sps->pcm.bit_depth);
1435     return 0;
1436 }
1437
1438 static void hls_mvd_coding(HEVCContext *s, int x0, int y0, int log2_cb_size)
1439 {
1440     HEVCLocalContext *lc = &s->HEVClc;
1441     int x = ff_hevc_abs_mvd_greater0_flag_decode(s);
1442     int y = ff_hevc_abs_mvd_greater0_flag_decode(s);
1443
1444     if (x)
1445         x += ff_hevc_abs_mvd_greater1_flag_decode(s);
1446     if (y)
1447         y += ff_hevc_abs_mvd_greater1_flag_decode(s);
1448
1449     switch (x) {
1450     case 2: lc->pu.mvd.x = ff_hevc_mvd_decode(s);           break;
1451     case 1: lc->pu.mvd.x = ff_hevc_mvd_sign_flag_decode(s); break;
1452     case 0: lc->pu.mvd.x = 0;                               break;
1453     }
1454
1455     switch (y) {
1456     case 2: lc->pu.mvd.y = ff_hevc_mvd_decode(s);           break;
1457     case 1: lc->pu.mvd.y = ff_hevc_mvd_sign_flag_decode(s); break;
1458     case 0: lc->pu.mvd.y = 0;                               break;
1459     }
1460 }
1461
1462 /**
1463  * 8.5.3.2.2.1 Luma sample interpolation process
1464  *
1465  * @param s HEVC decoding context
1466  * @param dst target buffer for block data at block position
1467  * @param dststride stride of the dst buffer
1468  * @param ref reference picture buffer at origin (0, 0)
1469  * @param mv motion vector (relative to block position) to get pixel data from
1470  * @param x_off horizontal position of block from origin (0, 0)
1471  * @param y_off vertical position of block from origin (0, 0)
1472  * @param block_w width of block
1473  * @param block_h height of block
1474  */
1475 static void luma_mc(HEVCContext *s, int16_t *dst, ptrdiff_t dststride,
1476                     AVFrame *ref, const Mv *mv, int x_off, int y_off,
1477                     int block_w, int block_h)
1478 {
1479     HEVCLocalContext *lc = &s->HEVClc;
1480     uint8_t *src         = ref->data[0];
1481     ptrdiff_t srcstride  = ref->linesize[0];
1482     int pic_width        = s->sps->width;
1483     int pic_height       = s->sps->height;
1484
1485     int mx         = mv->x & 3;
1486     int my         = mv->y & 3;
1487     int extra_left = ff_hevc_qpel_extra_before[mx];
1488     int extra_top  = ff_hevc_qpel_extra_before[my];
1489
1490     x_off += mv->x >> 2;
1491     y_off += mv->y >> 2;
1492     src   += y_off * srcstride + (x_off << s->sps->pixel_shift);
1493
1494     if (x_off < extra_left || y_off < extra_top ||
1495         x_off >= pic_width - block_w - ff_hevc_qpel_extra_after[mx] ||
1496         y_off >= pic_height - block_h - ff_hevc_qpel_extra_after[my]) {
1497         int offset = extra_top * srcstride + (extra_left << s->sps->pixel_shift);
1498
1499         s->vdsp.emulated_edge_mc(lc->edge_emu_buffer, src - offset,
1500                                  srcstride, srcstride,
1501                                  block_w + ff_hevc_qpel_extra[mx],
1502                                  block_h + ff_hevc_qpel_extra[my],
1503                                  x_off - extra_left, y_off - extra_top,
1504                                  pic_width, pic_height);
1505         src = lc->edge_emu_buffer + offset;
1506     }
1507     s->hevcdsp.put_hevc_qpel[my][mx](dst, dststride, src, srcstride, block_w,
1508                                      block_h, lc->mc_buffer);
1509 }
1510
1511 /**
1512  * 8.5.3.2.2.2 Chroma sample interpolation process
1513  *
1514  * @param s HEVC decoding context
1515  * @param dst1 target buffer for block data at block position (U plane)
1516  * @param dst2 target buffer for block data at block position (V plane)
1517  * @param dststride stride of the dst1 and dst2 buffers
1518  * @param ref reference picture buffer at origin (0, 0)
1519  * @param mv motion vector (relative to block position) to get pixel data from
1520  * @param x_off horizontal position of block from origin (0, 0)
1521  * @param y_off vertical position of block from origin (0, 0)
1522  * @param block_w width of block
1523  * @param block_h height of block
1524  */
1525 static void chroma_mc(HEVCContext *s, int16_t *dst1, int16_t *dst2,
1526                       ptrdiff_t dststride, AVFrame *ref, const Mv *mv,
1527                       int x_off, int y_off, int block_w, int block_h)
1528 {
1529     HEVCLocalContext *lc = &s->HEVClc;
1530     uint8_t *src1        = ref->data[1];
1531     uint8_t *src2        = ref->data[2];
1532     ptrdiff_t src1stride = ref->linesize[1];
1533     ptrdiff_t src2stride = ref->linesize[2];
1534     int pic_width        = s->sps->width >> 1;
1535     int pic_height       = s->sps->height >> 1;
1536
1537     int mx = mv->x & 7;
1538     int my = mv->y & 7;
1539
1540     x_off += mv->x >> 3;
1541     y_off += mv->y >> 3;
1542     src1  += y_off * src1stride + (x_off << s->sps->pixel_shift);
1543     src2  += y_off * src2stride + (x_off << s->sps->pixel_shift);
1544
1545     if (x_off < EPEL_EXTRA_BEFORE || y_off < EPEL_EXTRA_AFTER ||
1546         x_off >= pic_width - block_w - EPEL_EXTRA_AFTER ||
1547         y_off >= pic_height - block_h - EPEL_EXTRA_AFTER) {
1548         int offset1 = EPEL_EXTRA_BEFORE * (src1stride + (1 << s->sps->pixel_shift));
1549         int offset2 = EPEL_EXTRA_BEFORE * (src2stride + (1 << s->sps->pixel_shift));
1550
1551         s->vdsp.emulated_edge_mc(lc->edge_emu_buffer, src1 - offset1,
1552                                  src1stride, src1stride,
1553                                  block_w + EPEL_EXTRA, block_h + EPEL_EXTRA,
1554                                  x_off - EPEL_EXTRA_BEFORE,
1555                                  y_off - EPEL_EXTRA_BEFORE,
1556                                  pic_width, pic_height);
1557
1558         src1 = lc->edge_emu_buffer + offset1;
1559         s->hevcdsp.put_hevc_epel[!!my][!!mx](dst1, dststride, src1, src1stride,
1560                                              block_w, block_h, mx, my, lc->mc_buffer);
1561
1562         s->vdsp.emulated_edge_mc(lc->edge_emu_buffer, src2 - offset2,
1563                                  src2stride, src2stride,
1564                                  block_w + EPEL_EXTRA, block_h + EPEL_EXTRA,
1565                                  x_off - EPEL_EXTRA_BEFORE,
1566                                  y_off - EPEL_EXTRA_BEFORE,
1567                                  pic_width, pic_height);
1568         src2 = lc->edge_emu_buffer + offset2;
1569         s->hevcdsp.put_hevc_epel[!!my][!!mx](dst2, dststride, src2, src2stride,
1570                                              block_w, block_h, mx, my,
1571                                              lc->mc_buffer);
1572     } else {
1573         s->hevcdsp.put_hevc_epel[!!my][!!mx](dst1, dststride, src1, src1stride,
1574                                              block_w, block_h, mx, my,
1575                                              lc->mc_buffer);
1576         s->hevcdsp.put_hevc_epel[!!my][!!mx](dst2, dststride, src2, src2stride,
1577                                              block_w, block_h, mx, my,
1578                                              lc->mc_buffer);
1579     }
1580 }
1581
1582 static void hevc_await_progress(HEVCContext *s, HEVCFrame *ref,
1583                                 const Mv *mv, int y0, int height)
1584 {
1585     int y = (mv->y >> 2) + y0 + height + 9;
1586     ff_thread_await_progress(&ref->tf, y, 0);
1587 }
1588
1589 static void hls_prediction_unit(HEVCContext *s, int x0, int y0,
1590                                 int nPbW, int nPbH,
1591                                 int log2_cb_size, int partIdx)
1592 {
1593 #define POS(c_idx, x, y)                                                              \
1594     &s->frame->data[c_idx][((y) >> s->sps->vshift[c_idx]) * s->frame->linesize[c_idx] + \
1595                            (((x) >> s->sps->hshift[c_idx]) << s->sps->pixel_shift)]
1596     HEVCLocalContext *lc = &s->HEVClc;
1597     int merge_idx = 0;
1598     struct MvField current_mv = {{{ 0 }}};
1599
1600     int min_pu_width = s->sps->min_pu_width;
1601
1602     MvField *tab_mvf = s->ref->tab_mvf;
1603     RefPicList  *refPicList = s->ref->refPicList;
1604     HEVCFrame *ref0, *ref1;
1605
1606     int tmpstride = MAX_PB_SIZE;
1607
1608     uint8_t *dst0 = POS(0, x0, y0);
1609     uint8_t *dst1 = POS(1, x0, y0);
1610     uint8_t *dst2 = POS(2, x0, y0);
1611     int log2_min_cb_size = s->sps->log2_min_cb_size;
1612     int min_cb_width     = s->sps->min_cb_width;
1613     int x_cb             = x0 >> log2_min_cb_size;
1614     int y_cb             = y0 >> log2_min_cb_size;
1615     int ref_idx[2];
1616     int mvp_flag[2];
1617     int x_pu, y_pu;
1618     int i, j;
1619
1620     if (SAMPLE_CTB(s->skip_flag, x_cb, y_cb)) {
1621         if (s->sh.max_num_merge_cand > 1)
1622             merge_idx = ff_hevc_merge_idx_decode(s);
1623         else
1624             merge_idx = 0;
1625
1626         ff_hevc_luma_mv_merge_mode(s, x0, y0,
1627                                    1 << log2_cb_size,
1628                                    1 << log2_cb_size,
1629                                    log2_cb_size, partIdx,
1630                                    merge_idx, &current_mv);
1631         x_pu = x0 >> s->sps->log2_min_pu_size;
1632         y_pu = y0 >> s->sps->log2_min_pu_size;
1633
1634         for (i = 0; i < nPbW >> s->sps->log2_min_pu_size; i++)
1635             for (j = 0; j < nPbH >> s->sps->log2_min_pu_size; j++)
1636                 tab_mvf[(y_pu + j) * min_pu_width + x_pu + i] = current_mv;
1637     } else { /* MODE_INTER */
1638         lc->pu.merge_flag = ff_hevc_merge_flag_decode(s);
1639         if (lc->pu.merge_flag) {
1640             if (s->sh.max_num_merge_cand > 1)
1641                 merge_idx = ff_hevc_merge_idx_decode(s);
1642             else
1643                 merge_idx = 0;
1644
1645             ff_hevc_luma_mv_merge_mode(s, x0, y0, nPbW, nPbH, log2_cb_size,
1646                                        partIdx, merge_idx, &current_mv);
1647             x_pu = x0 >> s->sps->log2_min_pu_size;
1648             y_pu = y0 >> s->sps->log2_min_pu_size;
1649
1650             for (i = 0; i < nPbW >> s->sps->log2_min_pu_size; i++)
1651                 for (j = 0; j < nPbH >> s->sps->log2_min_pu_size; j++)
1652                     tab_mvf[(y_pu + j) * min_pu_width + x_pu + i] = current_mv;
1653         } else {
1654             enum InterPredIdc inter_pred_idc = PRED_L0;
1655             ff_hevc_set_neighbour_available(s, x0, y0, nPbW, nPbH);
1656             if (s->sh.slice_type == B_SLICE)
1657                 inter_pred_idc = ff_hevc_inter_pred_idc_decode(s, nPbW, nPbH);
1658
1659             if (inter_pred_idc != PRED_L1) {
1660                 if (s->sh.nb_refs[L0]) {
1661                     ref_idx[0] = ff_hevc_ref_idx_lx_decode(s, s->sh.nb_refs[L0]);
1662                     current_mv.ref_idx[0] = ref_idx[0];
1663                 }
1664                 current_mv.pred_flag[0] = 1;
1665                 hls_mvd_coding(s, x0, y0, 0);
1666                 mvp_flag[0] = ff_hevc_mvp_lx_flag_decode(s);
1667                 ff_hevc_luma_mv_mvp_mode(s, x0, y0, nPbW, nPbH, log2_cb_size,
1668                                          partIdx, merge_idx, &current_mv,
1669                                          mvp_flag[0], 0);
1670                 current_mv.mv[0].x += lc->pu.mvd.x;
1671                 current_mv.mv[0].y += lc->pu.mvd.y;
1672             }
1673
1674             if (inter_pred_idc != PRED_L0) {
1675                 if (s->sh.nb_refs[L1]) {
1676                     ref_idx[1] = ff_hevc_ref_idx_lx_decode(s, s->sh.nb_refs[L1]);
1677                     current_mv.ref_idx[1] = ref_idx[1];
1678                 }
1679
1680                 if (s->sh.mvd_l1_zero_flag == 1 && inter_pred_idc == PRED_BI) {
1681                     lc->pu.mvd.x = 0;
1682                     lc->pu.mvd.y = 0;
1683                 } else {
1684                     hls_mvd_coding(s, x0, y0, 1);
1685                 }
1686
1687                 current_mv.pred_flag[1] = 1;
1688                 mvp_flag[1] = ff_hevc_mvp_lx_flag_decode(s);
1689                 ff_hevc_luma_mv_mvp_mode(s, x0, y0, nPbW, nPbH, log2_cb_size,
1690                                          partIdx, merge_idx, &current_mv,
1691                                          mvp_flag[1], 1);
1692                 current_mv.mv[1].x += lc->pu.mvd.x;
1693                 current_mv.mv[1].y += lc->pu.mvd.y;
1694             }
1695
1696             x_pu = x0 >> s->sps->log2_min_pu_size;
1697             y_pu = y0 >> s->sps->log2_min_pu_size;
1698
1699             for (i = 0; i < nPbW >> s->sps->log2_min_pu_size; i++)
1700                 for(j = 0; j < nPbH >> s->sps->log2_min_pu_size; j++)
1701                     tab_mvf[(y_pu + j) * min_pu_width + x_pu + i] = current_mv;
1702         }
1703     }
1704
1705     if (current_mv.pred_flag[0]) {
1706         ref0 = refPicList[0].ref[current_mv.ref_idx[0]];
1707         if (!ref0)
1708             return;
1709         hevc_await_progress(s, ref0, &current_mv.mv[0], y0, nPbH);
1710     }
1711     if (current_mv.pred_flag[1]) {
1712         ref1 = refPicList[1].ref[current_mv.ref_idx[1]];
1713         if (!ref1)
1714             return;
1715         hevc_await_progress(s, ref1, &current_mv.mv[1], y0, nPbH);
1716     }
1717
1718     if (current_mv.pred_flag[0] && !current_mv.pred_flag[1]) {
1719         DECLARE_ALIGNED(16, int16_t,  tmp[MAX_PB_SIZE * MAX_PB_SIZE]);
1720         DECLARE_ALIGNED(16, int16_t, tmp2[MAX_PB_SIZE * MAX_PB_SIZE]);
1721
1722         luma_mc(s, tmp, tmpstride, ref0->frame,
1723                 &current_mv.mv[0], x0, y0, nPbW, nPbH);
1724
1725         if ((s->sh.slice_type == P_SLICE && s->pps->weighted_pred_flag) ||
1726             (s->sh.slice_type == B_SLICE && s->pps->weighted_bipred_flag)) {
1727             s->hevcdsp.weighted_pred(s->sh.luma_log2_weight_denom,
1728                                      s->sh.luma_weight_l0[current_mv.ref_idx[0]],
1729                                      s->sh.luma_offset_l0[current_mv.ref_idx[0]],
1730                                      dst0, s->frame->linesize[0], tmp,
1731                                      tmpstride, nPbW, nPbH);
1732         } else {
1733             s->hevcdsp.put_unweighted_pred(dst0, s->frame->linesize[0], tmp, tmpstride, nPbW, nPbH);
1734         }
1735         chroma_mc(s, tmp, tmp2, tmpstride, ref0->frame,
1736                   &current_mv.mv[0], x0 / 2, y0 / 2, nPbW / 2, nPbH / 2);
1737
1738         if ((s->sh.slice_type == P_SLICE && s->pps->weighted_pred_flag) ||
1739             (s->sh.slice_type == B_SLICE && s->pps->weighted_bipred_flag)) {
1740             s->hevcdsp.weighted_pred(s->sh.chroma_log2_weight_denom,
1741                                      s->sh.chroma_weight_l0[current_mv.ref_idx[0]][0],
1742                                      s->sh.chroma_offset_l0[current_mv.ref_idx[0]][0],
1743                                      dst1, s->frame->linesize[1], tmp, tmpstride,
1744                                      nPbW / 2, nPbH / 2);
1745             s->hevcdsp.weighted_pred(s->sh.chroma_log2_weight_denom,
1746                                      s->sh.chroma_weight_l0[current_mv.ref_idx[0]][1],
1747                                      s->sh.chroma_offset_l0[current_mv.ref_idx[0]][1],
1748                                      dst2, s->frame->linesize[2], tmp2, tmpstride,
1749                                      nPbW / 2, nPbH / 2);
1750         } else {
1751             s->hevcdsp.put_unweighted_pred(dst1, s->frame->linesize[1], tmp, tmpstride, nPbW/2, nPbH/2);
1752             s->hevcdsp.put_unweighted_pred(dst2, s->frame->linesize[2], tmp2, tmpstride, nPbW/2, nPbH/2);
1753         }
1754     } else if (!current_mv.pred_flag[0] && current_mv.pred_flag[1]) {
1755         DECLARE_ALIGNED(16, int16_t, tmp [MAX_PB_SIZE * MAX_PB_SIZE]);
1756         DECLARE_ALIGNED(16, int16_t, tmp2[MAX_PB_SIZE * MAX_PB_SIZE]);
1757
1758         if (!ref1)
1759             return;
1760
1761         luma_mc(s, tmp, tmpstride, ref1->frame,
1762                 &current_mv.mv[1], x0, y0, nPbW, nPbH);
1763
1764         if ((s->sh.slice_type == P_SLICE && s->pps->weighted_pred_flag) ||
1765             (s->sh.slice_type == B_SLICE && s->pps->weighted_bipred_flag)) {
1766             s->hevcdsp.weighted_pred(s->sh.luma_log2_weight_denom,
1767                                       s->sh.luma_weight_l1[current_mv.ref_idx[1]],
1768                                       s->sh.luma_offset_l1[current_mv.ref_idx[1]],
1769                                       dst0, s->frame->linesize[0], tmp, tmpstride,
1770                                       nPbW, nPbH);
1771         } else {
1772             s->hevcdsp.put_unweighted_pred(dst0, s->frame->linesize[0], tmp, tmpstride, nPbW, nPbH);
1773         }
1774
1775         chroma_mc(s, tmp, tmp2, tmpstride, ref1->frame,
1776                   &current_mv.mv[1], x0/2, y0/2, nPbW/2, nPbH/2);
1777
1778         if ((s->sh.slice_type == P_SLICE && s->pps->weighted_pred_flag) ||
1779             (s->sh.slice_type == B_SLICE && s->pps->weighted_bipred_flag)) {
1780             s->hevcdsp.weighted_pred(s->sh.chroma_log2_weight_denom,
1781                                      s->sh.chroma_weight_l1[current_mv.ref_idx[1]][0],
1782                                      s->sh.chroma_offset_l1[current_mv.ref_idx[1]][0],
1783                                      dst1, s->frame->linesize[1], tmp, tmpstride, nPbW/2, nPbH/2);
1784             s->hevcdsp.weighted_pred(s->sh.chroma_log2_weight_denom,
1785                                      s->sh.chroma_weight_l1[current_mv.ref_idx[1]][1],
1786                                      s->sh.chroma_offset_l1[current_mv.ref_idx[1]][1],
1787                                      dst2, s->frame->linesize[2], tmp2, tmpstride, nPbW/2, nPbH/2);
1788         } else {
1789             s->hevcdsp.put_unweighted_pred(dst1, s->frame->linesize[1], tmp, tmpstride, nPbW/2, nPbH/2);
1790             s->hevcdsp.put_unweighted_pred(dst2, s->frame->linesize[2], tmp2, tmpstride, nPbW/2, nPbH/2);
1791         }
1792     } else if (current_mv.pred_flag[0] && current_mv.pred_flag[1]) {
1793         DECLARE_ALIGNED(16, int16_t, tmp [MAX_PB_SIZE * MAX_PB_SIZE]);
1794         DECLARE_ALIGNED(16, int16_t, tmp2[MAX_PB_SIZE * MAX_PB_SIZE]);
1795         DECLARE_ALIGNED(16, int16_t, tmp3[MAX_PB_SIZE * MAX_PB_SIZE]);
1796         DECLARE_ALIGNED(16, int16_t, tmp4[MAX_PB_SIZE * MAX_PB_SIZE]);
1797         HEVCFrame *ref0 = refPicList[0].ref[current_mv.ref_idx[0]];
1798         HEVCFrame *ref1 = refPicList[1].ref[current_mv.ref_idx[1]];
1799
1800         if (!ref0 || !ref1)
1801             return;
1802
1803         luma_mc(s, tmp, tmpstride, ref0->frame,
1804                 &current_mv.mv[0], x0, y0, nPbW, nPbH);
1805         luma_mc(s, tmp2, tmpstride, ref1->frame,
1806                 &current_mv.mv[1], x0, y0, nPbW, nPbH);
1807
1808         if ((s->sh.slice_type == P_SLICE && s->pps->weighted_pred_flag) ||
1809             (s->sh.slice_type == B_SLICE && s->pps->weighted_bipred_flag)) {
1810             s->hevcdsp.weighted_pred_avg(s->sh.luma_log2_weight_denom,
1811                                          s->sh.luma_weight_l0[current_mv.ref_idx[0]],
1812                                          s->sh.luma_weight_l1[current_mv.ref_idx[1]],
1813                                          s->sh.luma_offset_l0[current_mv.ref_idx[0]],
1814                                          s->sh.luma_offset_l1[current_mv.ref_idx[1]],
1815                                          dst0, s->frame->linesize[0],
1816                                          tmp, tmp2, tmpstride, nPbW, nPbH);
1817         } else {
1818             s->hevcdsp.put_weighted_pred_avg(dst0, s->frame->linesize[0],
1819                                              tmp, tmp2, tmpstride, nPbW, nPbH);
1820         }
1821
1822         chroma_mc(s, tmp, tmp2, tmpstride, ref0->frame,
1823                   &current_mv.mv[0], x0 / 2, y0 / 2, nPbW / 2, nPbH / 2);
1824         chroma_mc(s, tmp3, tmp4, tmpstride, ref1->frame,
1825                   &current_mv.mv[1], x0 / 2, y0 / 2, nPbW / 2, nPbH / 2);
1826
1827         if ((s->sh.slice_type == P_SLICE && s->pps->weighted_pred_flag) ||
1828             (s->sh.slice_type == B_SLICE && s->pps->weighted_bipred_flag)) {
1829             s->hevcdsp.weighted_pred_avg(s->sh.chroma_log2_weight_denom,
1830                                          s->sh.chroma_weight_l0[current_mv.ref_idx[0]][0],
1831                                          s->sh.chroma_weight_l1[current_mv.ref_idx[1]][0],
1832                                          s->sh.chroma_offset_l0[current_mv.ref_idx[0]][0],
1833                                          s->sh.chroma_offset_l1[current_mv.ref_idx[1]][0],
1834                                          dst1, s->frame->linesize[1], tmp, tmp3,
1835                                          tmpstride, nPbW / 2, nPbH / 2);
1836             s->hevcdsp.weighted_pred_avg(s->sh.chroma_log2_weight_denom,
1837                                          s->sh.chroma_weight_l0[current_mv.ref_idx[0]][1],
1838                                          s->sh.chroma_weight_l1[current_mv.ref_idx[1]][1],
1839                                          s->sh.chroma_offset_l0[current_mv.ref_idx[0]][1],
1840                                          s->sh.chroma_offset_l1[current_mv.ref_idx[1]][1],
1841                                          dst2, s->frame->linesize[2], tmp2, tmp4,
1842                                          tmpstride, nPbW / 2, nPbH / 2);
1843         } else {
1844             s->hevcdsp.put_weighted_pred_avg(dst1, s->frame->linesize[1], tmp, tmp3, tmpstride, nPbW/2, nPbH/2);
1845             s->hevcdsp.put_weighted_pred_avg(dst2, s->frame->linesize[2], tmp2, tmp4, tmpstride, nPbW/2, nPbH/2);
1846         }
1847     }
1848 }
1849
1850 /**
1851  * 8.4.1
1852  */
1853 static int luma_intra_pred_mode(HEVCContext *s, int x0, int y0, int pu_size,
1854                                 int prev_intra_luma_pred_flag)
1855 {
1856     HEVCLocalContext *lc = &s->HEVClc;
1857     int x_pu             = x0 >> s->sps->log2_min_pu_size;
1858     int y_pu             = y0 >> s->sps->log2_min_pu_size;
1859     int min_pu_width     = s->sps->min_pu_width;
1860     int size_in_pus      = pu_size >> s->sps->log2_min_pu_size;
1861     int x0b              = x0 & ((1 << s->sps->log2_ctb_size) - 1);
1862     int y0b              = y0 & ((1 << s->sps->log2_ctb_size) - 1);
1863
1864     int cand_up   = (lc->ctb_up_flag || y0b) ?
1865                     s->tab_ipm[(y_pu - 1) * min_pu_width + x_pu] : INTRA_DC;
1866     int cand_left = (lc->ctb_left_flag || x0b) ?
1867                     s->tab_ipm[y_pu * min_pu_width + x_pu - 1]   : INTRA_DC;
1868
1869     int y_ctb = (y0 >> (s->sps->log2_ctb_size)) << (s->sps->log2_ctb_size);
1870
1871     MvField *tab_mvf = s->ref->tab_mvf;
1872     int intra_pred_mode;
1873     int candidate[3];
1874     int i, j;
1875
1876     // intra_pred_mode prediction does not cross vertical CTB boundaries
1877     if ((y0 - 1) < y_ctb)
1878         cand_up = INTRA_DC;
1879
1880     if (cand_left == cand_up) {
1881         if (cand_left < 2) {
1882             candidate[0] = INTRA_PLANAR;
1883             candidate[1] = INTRA_DC;
1884             candidate[2] = INTRA_ANGULAR_26;
1885         } else {
1886             candidate[0] = cand_left;
1887             candidate[1] = 2 + ((cand_left - 2 - 1 + 32) & 31);
1888             candidate[2] = 2 + ((cand_left - 2 + 1) & 31);
1889         }
1890     } else {
1891         candidate[0] = cand_left;
1892         candidate[1] = cand_up;
1893         if (candidate[0] != INTRA_PLANAR && candidate[1] != INTRA_PLANAR) {
1894             candidate[2] = INTRA_PLANAR;
1895         } else if (candidate[0] != INTRA_DC && candidate[1] != INTRA_DC) {
1896             candidate[2] = INTRA_DC;
1897         } else {
1898             candidate[2] = INTRA_ANGULAR_26;
1899         }
1900     }
1901
1902     if (prev_intra_luma_pred_flag) {
1903         intra_pred_mode = candidate[lc->pu.mpm_idx];
1904     } else {
1905         if (candidate[0] > candidate[1])
1906             FFSWAP(uint8_t, candidate[0], candidate[1]);
1907         if (candidate[0] > candidate[2])
1908             FFSWAP(uint8_t, candidate[0], candidate[2]);
1909         if (candidate[1] > candidate[2])
1910             FFSWAP(uint8_t, candidate[1], candidate[2]);
1911
1912         intra_pred_mode = lc->pu.rem_intra_luma_pred_mode;
1913         for (i = 0; i < 3; i++)
1914             if (intra_pred_mode >= candidate[i])
1915                 intra_pred_mode++;
1916     }
1917
1918     /* write the intra prediction units into the mv array */
1919     if (!size_in_pus)
1920         size_in_pus = 1;
1921     for (i = 0; i < size_in_pus; i++) {
1922         memset(&s->tab_ipm[(y_pu + i) * min_pu_width + x_pu],
1923                intra_pred_mode, size_in_pus);
1924
1925         for (j = 0; j < size_in_pus; j++) {
1926             tab_mvf[(y_pu + j) * min_pu_width + x_pu + i].is_intra     = 1;
1927             tab_mvf[(y_pu + j) * min_pu_width + x_pu + i].pred_flag[0] = 0;
1928             tab_mvf[(y_pu + j) * min_pu_width + x_pu + i].pred_flag[1] = 0;
1929             tab_mvf[(y_pu + j) * min_pu_width + x_pu + i].ref_idx[0]   = 0;
1930             tab_mvf[(y_pu + j) * min_pu_width + x_pu + i].ref_idx[1]   = 0;
1931             tab_mvf[(y_pu + j) * min_pu_width + x_pu + i].mv[0].x      = 0;
1932             tab_mvf[(y_pu + j) * min_pu_width + x_pu + i].mv[0].y      = 0;
1933             tab_mvf[(y_pu + j) * min_pu_width + x_pu + i].mv[1].x      = 0;
1934             tab_mvf[(y_pu + j) * min_pu_width + x_pu + i].mv[1].y      = 0;
1935         }
1936     }
1937
1938     return intra_pred_mode;
1939 }
1940
1941 static av_always_inline void set_ct_depth(HEVCContext *s, int x0, int y0,
1942                                           int log2_cb_size, int ct_depth)
1943 {
1944     int length = (1 << log2_cb_size) >> s->sps->log2_min_cb_size;
1945     int x_cb   = x0 >> s->sps->log2_min_cb_size;
1946     int y_cb   = y0 >> s->sps->log2_min_cb_size;
1947     int y;
1948
1949     for (y = 0; y < length; y++)
1950         memset(&s->tab_ct_depth[(y_cb + y) * s->sps->min_cb_width + x_cb],
1951                ct_depth, length);
1952 }
1953
1954 static void intra_prediction_unit(HEVCContext *s, int x0, int y0,
1955                                   int log2_cb_size)
1956 {
1957     HEVCLocalContext *lc = &s->HEVClc;
1958     static const uint8_t intra_chroma_table[4] = { 0, 26, 10, 1 };
1959     uint8_t prev_intra_luma_pred_flag[4];
1960     int split   = lc->cu.part_mode == PART_NxN;
1961     int pb_size = (1 << log2_cb_size) >> split;
1962     int side    = split + 1;
1963     int chroma_mode;
1964     int i, j;
1965
1966     for (i = 0; i < side; i++)
1967         for (j = 0; j < side; j++)
1968             prev_intra_luma_pred_flag[2 * i + j] = ff_hevc_prev_intra_luma_pred_flag_decode(s);
1969
1970     for (i = 0; i < side; i++) {
1971         for (j = 0; j < side; j++) {
1972             if (prev_intra_luma_pred_flag[2 * i + j])
1973                 lc->pu.mpm_idx = ff_hevc_mpm_idx_decode(s);
1974             else
1975                 lc->pu.rem_intra_luma_pred_mode = ff_hevc_rem_intra_luma_pred_mode_decode(s);
1976
1977             lc->pu.intra_pred_mode[2 * i + j] =
1978                 luma_intra_pred_mode(s, x0 + pb_size * j, y0 + pb_size * i, pb_size,
1979                                      prev_intra_luma_pred_flag[2 * i + j]);
1980         }
1981     }
1982
1983     chroma_mode = ff_hevc_intra_chroma_pred_mode_decode(s);
1984     if (chroma_mode != 4) {
1985         if (lc->pu.intra_pred_mode[0] == intra_chroma_table[chroma_mode])
1986             lc->pu.intra_pred_mode_c = 34;
1987         else
1988             lc->pu.intra_pred_mode_c = intra_chroma_table[chroma_mode];
1989     } else {
1990         lc->pu.intra_pred_mode_c = lc->pu.intra_pred_mode[0];
1991     }
1992 }
1993
1994 static void intra_prediction_unit_default_value(HEVCContext *s,
1995                                                 int x0, int y0,
1996                                                 int log2_cb_size)
1997 {
1998     HEVCLocalContext *lc = &s->HEVClc;
1999     int pb_size          = 1 << log2_cb_size;
2000     int size_in_pus      = pb_size >> s->sps->log2_min_pu_size;
2001     int min_pu_width     = s->sps->min_pu_width;
2002     MvField *tab_mvf     = s->ref->tab_mvf;
2003     int x_pu             = x0 >> s->sps->log2_min_pu_size;
2004     int y_pu             = y0 >> s->sps->log2_min_pu_size;
2005     int j, k;
2006
2007     if (size_in_pus == 0)
2008         size_in_pus = 1;
2009     for (j = 0; j < size_in_pus; j++) {
2010         memset(&s->tab_ipm[(y_pu + j) * min_pu_width + x_pu], INTRA_DC, size_in_pus);
2011         for (k = 0; k < size_in_pus; k++)
2012             tab_mvf[(y_pu + j) * min_pu_width + x_pu + k].is_intra = lc->cu.pred_mode == MODE_INTRA;
2013     }
2014 }
2015
2016 static int hls_coding_unit(HEVCContext *s, int x0, int y0, int log2_cb_size)
2017 {
2018     int cb_size          = 1 << log2_cb_size;
2019     HEVCLocalContext *lc = &s->HEVClc;
2020     int log2_min_cb_size = s->sps->log2_min_cb_size;
2021     int length           = cb_size >> log2_min_cb_size;
2022     int min_cb_width     = s->sps->min_cb_width;
2023     int x_cb             = x0 >> log2_min_cb_size;
2024     int y_cb             = y0 >> log2_min_cb_size;
2025     int x, y;
2026
2027     lc->cu.x                = x0;
2028     lc->cu.y                = y0;
2029     lc->cu.rqt_root_cbf     = 1;
2030     lc->cu.pred_mode        = MODE_INTRA;
2031     lc->cu.part_mode        = PART_2Nx2N;
2032     lc->cu.intra_split_flag = 0;
2033     lc->cu.pcm_flag         = 0;
2034
2035     SAMPLE_CTB(s->skip_flag, x_cb, y_cb) = 0;
2036     for (x = 0; x < 4; x++)
2037         lc->pu.intra_pred_mode[x] = 1;
2038     if (s->pps->transquant_bypass_enable_flag) {
2039         lc->cu.cu_transquant_bypass_flag = ff_hevc_cu_transquant_bypass_flag_decode(s);
2040         if (lc->cu.cu_transquant_bypass_flag)
2041             set_deblocking_bypass(s, x0, y0, log2_cb_size);
2042     } else
2043         lc->cu.cu_transquant_bypass_flag = 0;
2044
2045     if (s->sh.slice_type != I_SLICE) {
2046         uint8_t skip_flag = ff_hevc_skip_flag_decode(s, x0, y0, x_cb, y_cb);
2047
2048         lc->cu.pred_mode = MODE_SKIP;
2049         x = y_cb * min_cb_width + x_cb;
2050         for (y = 0; y < length; y++) {
2051             memset(&s->skip_flag[x], skip_flag, length);
2052             x += min_cb_width;
2053         }
2054         lc->cu.pred_mode = skip_flag ? MODE_SKIP : MODE_INTER;
2055     }
2056
2057     if (SAMPLE_CTB(s->skip_flag, x_cb, y_cb)) {
2058         hls_prediction_unit(s, x0, y0, cb_size, cb_size, log2_cb_size, 0);
2059         intra_prediction_unit_default_value(s, x0, y0, log2_cb_size);
2060
2061         if (!s->sh.disable_deblocking_filter_flag)
2062             ff_hevc_deblocking_boundary_strengths(s, x0, y0, log2_cb_size,
2063                                                   lc->slice_or_tiles_up_boundary,
2064                                                   lc->slice_or_tiles_left_boundary);
2065     } else {
2066         if (s->sh.slice_type != I_SLICE)
2067             lc->cu.pred_mode = ff_hevc_pred_mode_decode(s);
2068         if (lc->cu.pred_mode != MODE_INTRA ||
2069             log2_cb_size == s->sps->log2_min_cb_size) {
2070             lc->cu.part_mode        = ff_hevc_part_mode_decode(s, log2_cb_size);
2071             lc->cu.intra_split_flag = lc->cu.part_mode == PART_NxN &&
2072                                       lc->cu.pred_mode == MODE_INTRA;
2073         }
2074
2075         if (lc->cu.pred_mode == MODE_INTRA) {
2076             if (lc->cu.part_mode == PART_2Nx2N && s->sps->pcm_enabled_flag &&
2077                 log2_cb_size >= s->sps->pcm.log2_min_pcm_cb_size &&
2078                 log2_cb_size <= s->sps->pcm.log2_max_pcm_cb_size) {
2079                 lc->cu.pcm_flag = ff_hevc_pcm_flag_decode(s);
2080             }
2081             if (lc->cu.pcm_flag) {
2082                 int ret;
2083                 intra_prediction_unit_default_value(s, x0, y0, log2_cb_size);
2084                 ret = hls_pcm_sample(s, x0, y0, log2_cb_size);
2085                 if (s->sps->pcm.loop_filter_disable_flag)
2086                     set_deblocking_bypass(s, x0, y0, log2_cb_size);
2087
2088                 if (ret < 0)
2089                     return ret;
2090             } else {
2091                 intra_prediction_unit(s, x0, y0, log2_cb_size);
2092             }
2093         } else {
2094             intra_prediction_unit_default_value(s, x0, y0, log2_cb_size);
2095             switch (lc->cu.part_mode) {
2096             case PART_2Nx2N:
2097                 hls_prediction_unit(s, x0, y0, cb_size, cb_size, log2_cb_size, 0);
2098                 break;
2099             case PART_2NxN:
2100                 hls_prediction_unit(s, x0, y0,               cb_size, cb_size / 2, log2_cb_size, 0);
2101                 hls_prediction_unit(s, x0, y0 + cb_size / 2, cb_size, cb_size / 2, log2_cb_size, 1);
2102                 break;
2103             case PART_Nx2N:
2104                 hls_prediction_unit(s, x0,               y0, cb_size / 2, cb_size, log2_cb_size, 0);
2105                 hls_prediction_unit(s, x0 + cb_size / 2, y0, cb_size / 2, cb_size, log2_cb_size, 1);
2106                 break;
2107             case PART_2NxnU:
2108                 hls_prediction_unit(s, x0, y0,               cb_size, cb_size     / 4, log2_cb_size, 0);
2109                 hls_prediction_unit(s, x0, y0 + cb_size / 4, cb_size, cb_size * 3 / 4, log2_cb_size, 1);
2110                 break;
2111             case PART_2NxnD:
2112                 hls_prediction_unit(s, x0, y0,                   cb_size, cb_size * 3 / 4, log2_cb_size, 0);
2113                 hls_prediction_unit(s, x0, y0 + cb_size * 3 / 4, cb_size, cb_size     / 4, log2_cb_size, 1);
2114                 break;
2115             case PART_nLx2N:
2116                 hls_prediction_unit(s, x0,               y0, cb_size     / 4, cb_size, log2_cb_size, 0);
2117                 hls_prediction_unit(s, x0 + cb_size / 4, y0, cb_size * 3 / 4, cb_size, log2_cb_size, 1);
2118                 break;
2119             case PART_nRx2N:
2120                 hls_prediction_unit(s, x0,                   y0, cb_size * 3 / 4, cb_size, log2_cb_size, 0);
2121                 hls_prediction_unit(s, x0 + cb_size * 3 / 4, y0, cb_size     / 4, cb_size, log2_cb_size, 1);
2122                 break;
2123             case PART_NxN:
2124                 hls_prediction_unit(s, x0,               y0,               cb_size / 2, cb_size / 2, log2_cb_size, 0);
2125                 hls_prediction_unit(s, x0 + cb_size / 2, y0,               cb_size / 2, cb_size / 2, log2_cb_size, 1);
2126                 hls_prediction_unit(s, x0,               y0 + cb_size / 2, cb_size / 2, cb_size / 2, log2_cb_size, 2);
2127                 hls_prediction_unit(s, x0 + cb_size / 2, y0 + cb_size / 2, cb_size / 2, cb_size / 2, log2_cb_size, 3);
2128                 break;
2129             }
2130         }
2131
2132         if (!lc->cu.pcm_flag) {
2133             if (lc->cu.pred_mode != MODE_INTRA &&
2134                 !(lc->cu.part_mode == PART_2Nx2N && lc->pu.merge_flag)) {
2135                 lc->cu.rqt_root_cbf = ff_hevc_no_residual_syntax_flag_decode(s);
2136             }
2137             if (lc->cu.rqt_root_cbf) {
2138                 lc->cu.max_trafo_depth = lc->cu.pred_mode == MODE_INTRA ?
2139                                          s->sps->max_transform_hierarchy_depth_intra + lc->cu.intra_split_flag :
2140                                          s->sps->max_transform_hierarchy_depth_inter;
2141                 hls_transform_tree(s, x0, y0, x0, y0, x0, y0, log2_cb_size,
2142                                    log2_cb_size, 0, 0);
2143             } else {
2144                 if (!s->sh.disable_deblocking_filter_flag)
2145                     ff_hevc_deblocking_boundary_strengths(s, x0, y0, log2_cb_size,
2146                                                           lc->slice_or_tiles_up_boundary,
2147                                                           lc->slice_or_tiles_left_boundary);
2148             }
2149         }
2150     }
2151
2152     if (s->pps->cu_qp_delta_enabled_flag && lc->tu.is_cu_qp_delta_coded == 0)
2153         ff_hevc_set_qPy(s, x0, y0, x0, y0, log2_cb_size);
2154
2155     x = y_cb * min_cb_width + x_cb;
2156     for (y = 0; y < length; y++) {
2157         memset(&s->qp_y_tab[x], lc->qp_y, length);
2158         x += min_cb_width;
2159     }
2160
2161     set_ct_depth(s, x0, y0, log2_cb_size, lc->ct.depth);
2162
2163     return 0;
2164 }
2165
2166 static int hls_coding_quadtree(HEVCContext *s, int x0, int y0,
2167                                int log2_cb_size, int cb_depth)
2168 {
2169     HEVCLocalContext *lc = &s->HEVClc;
2170     const int cb_size    = 1 << log2_cb_size;
2171
2172     lc->ct.depth = cb_depth;
2173     if (x0 + cb_size <= s->sps->width  &&
2174         y0 + cb_size <= s->sps->height &&
2175         log2_cb_size > s->sps->log2_min_cb_size) {
2176         SAMPLE(s->split_cu_flag, x0, y0) =
2177             ff_hevc_split_coding_unit_flag_decode(s, cb_depth, x0, y0);
2178     } else {
2179         SAMPLE(s->split_cu_flag, x0, y0) =
2180             (log2_cb_size > s->sps->log2_min_cb_size);
2181     }
2182     if (s->pps->cu_qp_delta_enabled_flag &&
2183         log2_cb_size >= s->sps->log2_ctb_size - s->pps->diff_cu_qp_delta_depth) {
2184         lc->tu.is_cu_qp_delta_coded = 0;
2185         lc->tu.cu_qp_delta          = 0;
2186     }
2187
2188     if (SAMPLE(s->split_cu_flag, x0, y0)) {
2189         const int cb_size_split = cb_size >> 1;
2190         const int x1 = x0 + cb_size_split;
2191         const int y1 = y0 + cb_size_split;
2192
2193         log2_cb_size--;
2194         cb_depth++;
2195
2196 #define SUBDIVIDE(x, y)                                                \
2197 do {                                                                   \
2198     if (x < s->sps->width && y < s->sps->height) {                     \
2199         int ret = hls_coding_quadtree(s, x, y, log2_cb_size, cb_depth);\
2200         if (ret < 0)                                                   \
2201             return ret;                                                \
2202     }                                                                  \
2203 } while (0)
2204
2205         SUBDIVIDE(x0, y0);
2206         SUBDIVIDE(x1, y0);
2207         SUBDIVIDE(x0, y1);
2208         SUBDIVIDE(x1, y1);
2209     } else {
2210         int ret = hls_coding_unit(s, x0, y0, log2_cb_size);
2211         if (ret < 0)
2212             return ret;
2213     }
2214
2215     return 0;
2216 }
2217
2218 static void hls_decode_neighbour(HEVCContext *s, int x_ctb, int y_ctb,
2219                                  int ctb_addr_ts)
2220 {
2221     HEVCLocalContext *lc  = &s->HEVClc;
2222     int ctb_size          = 1 << s->sps->log2_ctb_size;
2223     int ctb_addr_rs       = s->pps->ctb_addr_ts_to_rs[ctb_addr_ts];
2224     int ctb_addr_in_slice = ctb_addr_rs - s->sh.slice_addr;
2225
2226     int tile_left_boundary, tile_up_boundary;
2227     int slice_left_boundary, slice_up_boundary;
2228
2229     s->tab_slice_address[ctb_addr_rs] = s->sh.slice_addr;
2230
2231     if (s->pps->entropy_coding_sync_enabled_flag) {
2232         if (x_ctb == 0 && (y_ctb & (ctb_size - 1)) == 0)
2233             lc->first_qp_group = 1;
2234         lc->end_of_tiles_x = s->sps->width;
2235     } else if (s->pps->tiles_enabled_flag) {
2236         if (ctb_addr_ts && s->pps->tile_id[ctb_addr_ts] != s->pps->tile_id[ctb_addr_ts - 1]) {
2237             int idxX = s->pps->col_idxX[x_ctb >> s->sps->log2_ctb_size];
2238             lc->start_of_tiles_x = x_ctb;
2239             lc->end_of_tiles_x   = x_ctb + (s->pps->column_width[idxX] << s->sps->log2_ctb_size);
2240             lc->first_qp_group   = 1;
2241         }
2242     } else {
2243         lc->end_of_tiles_x = s->sps->width;
2244     }
2245
2246     lc->end_of_tiles_y = FFMIN(y_ctb + ctb_size, s->sps->height);
2247
2248     if (s->pps->tiles_enabled_flag) {
2249         tile_left_boundary  = x_ctb > 0 &&
2250                               s->pps->tile_id[ctb_addr_ts] == s->pps->tile_id[s->pps->ctb_addr_rs_to_ts[ctb_addr_rs - 1]];
2251         slice_left_boundary = x_ctb > 0 &&
2252                               s->tab_slice_address[ctb_addr_rs] == s->tab_slice_address[ctb_addr_rs - 1];
2253         tile_up_boundary  = y_ctb > 0 &&
2254                             s->pps->tile_id[ctb_addr_ts] == s->pps->tile_id[s->pps->ctb_addr_rs_to_ts[ctb_addr_rs - s->sps->ctb_width]];
2255         slice_up_boundary = y_ctb > 0 &&
2256                             s->tab_slice_address[ctb_addr_rs] == s->tab_slice_address[ctb_addr_rs - s->sps->ctb_width];
2257     } else {
2258         tile_left_boundary  =
2259         tile_up_boundary    = 1;
2260         slice_left_boundary = ctb_addr_in_slice > 0;
2261         slice_up_boundary   = ctb_addr_in_slice >= s->sps->ctb_width;
2262     }
2263     lc->slice_or_tiles_left_boundary = (!slice_left_boundary) + (!tile_left_boundary << 1);
2264     lc->slice_or_tiles_up_boundary   = (!slice_up_boundary + (!tile_up_boundary << 1));
2265     lc->ctb_left_flag = ((x_ctb > 0) && (ctb_addr_in_slice > 0) && tile_left_boundary);
2266     lc->ctb_up_flag   = ((y_ctb > 0) && (ctb_addr_in_slice >= s->sps->ctb_width) && tile_up_boundary);
2267     lc->ctb_up_right_flag = ((y_ctb > 0)  && (ctb_addr_in_slice+1 >= s->sps->ctb_width) && (s->pps->tile_id[ctb_addr_ts] == s->pps->tile_id[s->pps->ctb_addr_rs_to_ts[ctb_addr_rs+1 - s->sps->ctb_width]]));
2268     lc->ctb_up_left_flag = ((x_ctb > 0) && (y_ctb > 0)  && (ctb_addr_in_slice-1 >= s->sps->ctb_width) && (s->pps->tile_id[ctb_addr_ts] == s->pps->tile_id[s->pps->ctb_addr_rs_to_ts[ctb_addr_rs-1 - s->sps->ctb_width]]));
2269 }
2270
2271 static int hls_slice_data(HEVCContext *s)
2272 {
2273     int ctb_size    = 1 << s->sps->log2_ctb_size;
2274     int more_data   = 1;
2275     int x_ctb       = 0;
2276     int y_ctb       = 0;
2277     int ctb_addr_ts = s->pps->ctb_addr_rs_to_ts[s->sh.slice_ctb_addr_rs];
2278     int ret;
2279
2280     while (more_data && ctb_addr_ts < s->sps->ctb_size) {
2281         int ctb_addr_rs = s->pps->ctb_addr_ts_to_rs[ctb_addr_ts];
2282
2283         x_ctb = (ctb_addr_rs % ((s->sps->width + ctb_size - 1) >> s->sps->log2_ctb_size)) << s->sps->log2_ctb_size;
2284         y_ctb = (ctb_addr_rs / ((s->sps->width + ctb_size - 1) >> s->sps->log2_ctb_size)) << s->sps->log2_ctb_size;
2285         hls_decode_neighbour(s, x_ctb, y_ctb, ctb_addr_ts);
2286
2287         ff_hevc_cabac_init(s, ctb_addr_ts);
2288
2289         hls_sao_param(s, x_ctb >> s->sps->log2_ctb_size, y_ctb >> s->sps->log2_ctb_size);
2290
2291         s->deblock[ctb_addr_rs].beta_offset = s->sh.beta_offset;
2292         s->deblock[ctb_addr_rs].tc_offset   = s->sh.tc_offset;
2293         s->filter_slice_edges[ctb_addr_rs]  = s->sh.slice_loop_filter_across_slices_enabled_flag;
2294
2295         ret = hls_coding_quadtree(s, x_ctb, y_ctb, s->sps->log2_ctb_size, 0);
2296         if (ret < 0)
2297             return ret;
2298         more_data = !ff_hevc_end_of_slice_flag_decode(s);
2299
2300         ctb_addr_ts++;
2301         ff_hevc_save_states(s, ctb_addr_ts);
2302         ff_hevc_hls_filters(s, x_ctb, y_ctb, ctb_size);
2303     }
2304
2305     if (x_ctb + ctb_size >= s->sps->width &&
2306         y_ctb + ctb_size >= s->sps->height)
2307         ff_hevc_hls_filter(s, x_ctb, y_ctb);
2308
2309     return ctb_addr_ts;
2310 }
2311
2312 /**
2313  * @return AVERROR_INVALIDDATA if the packet is not a valid NAL unit,
2314  * 0 if the unit should be skipped, 1 otherwise
2315  */
2316 static int hls_nal_unit(HEVCContext *s)
2317 {
2318     GetBitContext *gb = &s->HEVClc.gb;
2319     int nuh_layer_id;
2320
2321     if (get_bits1(gb) != 0)
2322         return AVERROR_INVALIDDATA;
2323
2324     s->nal_unit_type = get_bits(gb, 6);
2325
2326     nuh_layer_id   = get_bits(gb, 6);
2327     s->temporal_id = get_bits(gb, 3) - 1;
2328     if (s->temporal_id < 0)
2329         return AVERROR_INVALIDDATA;
2330
2331     av_log(s->avctx, AV_LOG_DEBUG,
2332            "nal_unit_type: %d, nuh_layer_id: %dtemporal_id: %d\n",
2333            s->nal_unit_type, nuh_layer_id, s->temporal_id);
2334
2335     return nuh_layer_id == 0;
2336 }
2337
2338 static void restore_tqb_pixels(HEVCContext *s)
2339 {
2340     int min_pu_size = 1 << s->sps->log2_min_pu_size;
2341     int x, y, c_idx;
2342
2343     for (c_idx = 0; c_idx < 3; c_idx++) {
2344         ptrdiff_t stride = s->frame->linesize[c_idx];
2345         int hshift       = s->sps->hshift[c_idx];
2346         int vshift       = s->sps->vshift[c_idx];
2347         for (y = 0; y < s->sps->min_pu_height; y++) {
2348             for (x = 0; x < s->sps->min_pu_width; x++) {
2349                 if (s->is_pcm[y * s->sps->min_pu_width + x]) {
2350                     int n;
2351                     int len      = min_pu_size >> hshift;
2352                     uint8_t *src = &s->frame->data[c_idx][((y << s->sps->log2_min_pu_size) >> vshift) * stride + (((x << s->sps->log2_min_pu_size) >> hshift) << s->sps->pixel_shift)];
2353                     uint8_t *dst = &s->sao_frame->data[c_idx][((y << s->sps->log2_min_pu_size) >> vshift) * stride + (((x << s->sps->log2_min_pu_size) >> hshift) << s->sps->pixel_shift)];
2354                     for (n = 0; n < (min_pu_size >> vshift); n++) {
2355                         memcpy(dst, src, len);
2356                         src += stride;
2357                         dst += stride;
2358                     }
2359                 }
2360             }
2361         }
2362     }
2363 }
2364
2365 static int set_side_data(HEVCContext *s)
2366 {
2367     AVFrame *out = s->ref->frame;
2368
2369     if (s->sei_frame_packing_present &&
2370         s->frame_packing_arrangement_type >= 3 &&
2371         s->frame_packing_arrangement_type <= 5 &&
2372         s->content_interpretation_type > 0 &&
2373         s->content_interpretation_type < 3) {
2374         AVStereo3D *stereo = av_stereo3d_create_side_data(out);
2375         if (!stereo)
2376             return AVERROR(ENOMEM);
2377
2378         switch (s->frame_packing_arrangement_type) {
2379         case 3:
2380             if (s->quincunx_subsampling)
2381                 stereo->type = AV_STEREO3D_SIDEBYSIDE_QUINCUNX;
2382             else
2383                 stereo->type = AV_STEREO3D_SIDEBYSIDE;
2384             break;
2385         case 4:
2386             stereo->type = AV_STEREO3D_TOPBOTTOM;
2387             break;
2388         case 5:
2389             stereo->type = AV_STEREO3D_FRAMESEQUENCE;
2390             break;
2391         }
2392
2393         if (s->content_interpretation_type == 2)
2394             stereo->flags = AV_STEREO3D_FLAG_INVERT;
2395     }
2396
2397     return 0;
2398 }
2399
2400 static int hevc_frame_start(HEVCContext *s)
2401 {
2402     HEVCLocalContext *lc = &s->HEVClc;
2403     int ret;
2404
2405     memset(s->horizontal_bs, 0, 2 * s->bs_width * (s->bs_height + 1));
2406     memset(s->vertical_bs,   0, 2 * s->bs_width * (s->bs_height + 1));
2407     memset(s->cbf_luma,      0, s->sps->min_tb_width * s->sps->min_tb_height);
2408     memset(s->is_pcm,        0, s->sps->min_pu_width * s->sps->min_pu_height);
2409
2410     lc->start_of_tiles_x = 0;
2411     s->is_decoded        = 0;
2412
2413     if (s->pps->tiles_enabled_flag)
2414         lc->end_of_tiles_x = s->pps->column_width[0] << s->sps->log2_ctb_size;
2415
2416     ret = ff_hevc_set_new_ref(s, s->sps->sao_enabled ? &s->sao_frame : &s->frame,
2417                               s->poc);
2418     if (ret < 0)
2419         goto fail;
2420
2421     av_fast_malloc(&lc->edge_emu_buffer, &lc->edge_emu_buffer_size,
2422                    (MAX_PB_SIZE + 7) * s->ref->frame->linesize[0]);
2423     if (!lc->edge_emu_buffer) {
2424         ret = AVERROR(ENOMEM);
2425         goto fail;
2426     }
2427
2428     ret = ff_hevc_frame_rps(s);
2429     if (ret < 0) {
2430         av_log(s->avctx, AV_LOG_ERROR, "Error constructing the frame RPS.\n");
2431         goto fail;
2432     }
2433
2434     ret = set_side_data(s);
2435     if (ret < 0)
2436         goto fail;
2437
2438     av_frame_unref(s->output_frame);
2439     ret = ff_hevc_output_frame(s, s->output_frame, 0);
2440     if (ret < 0)
2441         goto fail;
2442
2443     ff_thread_finish_setup(s->avctx);
2444
2445     return 0;
2446
2447 fail:
2448     if (s->ref)
2449         ff_thread_report_progress(&s->ref->tf, INT_MAX, 0);
2450     s->ref = NULL;
2451     return ret;
2452 }
2453
2454 static int decode_nal_unit(HEVCContext *s, const uint8_t *nal, int length)
2455 {
2456     HEVCLocalContext *lc = &s->HEVClc;
2457     GetBitContext *gb    = &lc->gb;
2458     int ctb_addr_ts, ret;
2459
2460     ret = init_get_bits8(gb, nal, length);
2461     if (ret < 0)
2462         return ret;
2463
2464     ret = hls_nal_unit(s);
2465     if (ret < 0) {
2466         av_log(s->avctx, AV_LOG_ERROR, "Invalid NAL unit %d, skipping.\n",
2467                s->nal_unit_type);
2468         if (s->avctx->err_recognition & AV_EF_EXPLODE)
2469             return ret;
2470         return 0;
2471     } else if (!ret)
2472         return 0;
2473
2474     switch (s->nal_unit_type) {
2475     case NAL_VPS:
2476         ret = ff_hevc_decode_nal_vps(s);
2477         if (ret < 0)
2478             return ret;
2479         break;
2480     case NAL_SPS:
2481         ret = ff_hevc_decode_nal_sps(s);
2482         if (ret < 0)
2483             return ret;
2484         break;
2485     case NAL_PPS:
2486         ret = ff_hevc_decode_nal_pps(s);
2487         if (ret < 0)
2488             return ret;
2489         break;
2490     case NAL_SEI_PREFIX:
2491     case NAL_SEI_SUFFIX:
2492         ret = ff_hevc_decode_nal_sei(s);
2493         if (ret < 0)
2494             return ret;
2495         break;
2496     case NAL_TRAIL_R:
2497     case NAL_TRAIL_N:
2498     case NAL_TSA_N:
2499     case NAL_TSA_R:
2500     case NAL_STSA_N:
2501     case NAL_STSA_R:
2502     case NAL_BLA_W_LP:
2503     case NAL_BLA_W_RADL:
2504     case NAL_BLA_N_LP:
2505     case NAL_IDR_W_RADL:
2506     case NAL_IDR_N_LP:
2507     case NAL_CRA_NUT:
2508     case NAL_RADL_N:
2509     case NAL_RADL_R:
2510     case NAL_RASL_N:
2511     case NAL_RASL_R:
2512         ret = hls_slice_header(s);
2513         if (ret < 0)
2514             return ret;
2515
2516         if (s->max_ra == INT_MAX) {
2517             if (s->nal_unit_type == NAL_CRA_NUT || IS_BLA(s)) {
2518                 s->max_ra = s->poc;
2519             } else {
2520                 if (IS_IDR(s))
2521                     s->max_ra = INT_MIN;
2522             }
2523         }
2524
2525         if ((s->nal_unit_type == NAL_RASL_R || s->nal_unit_type == NAL_RASL_N) &&
2526             s->poc <= s->max_ra) {
2527             s->is_decoded = 0;
2528             break;
2529         } else {
2530             if (s->nal_unit_type == NAL_RASL_R && s->poc > s->max_ra)
2531                 s->max_ra = INT_MIN;
2532         }
2533
2534         if (s->sh.first_slice_in_pic_flag) {
2535             ret = hevc_frame_start(s);
2536             if (ret < 0)
2537                 return ret;
2538         } else if (!s->ref) {
2539             av_log(s->avctx, AV_LOG_ERROR, "First slice in a frame missing.\n");
2540             return AVERROR_INVALIDDATA;
2541         }
2542
2543         if (!s->sh.dependent_slice_segment_flag &&
2544             s->sh.slice_type != I_SLICE) {
2545             ret = ff_hevc_slice_rpl(s);
2546             if (ret < 0) {
2547                 av_log(s->avctx, AV_LOG_WARNING,
2548                        "Error constructing the reference lists for the current slice.\n");
2549                 if (s->avctx->err_recognition & AV_EF_EXPLODE)
2550                     return ret;
2551             }
2552         }
2553
2554         ctb_addr_ts = hls_slice_data(s);
2555         if (ctb_addr_ts >= (s->sps->ctb_width * s->sps->ctb_height)) {
2556             s->is_decoded = 1;
2557             if ((s->pps->transquant_bypass_enable_flag ||
2558                  (s->sps->pcm.loop_filter_disable_flag && s->sps->pcm_enabled_flag)) &&
2559                 s->sps->sao_enabled)
2560                 restore_tqb_pixels(s);
2561         }
2562
2563         if (ctb_addr_ts < 0)
2564             return ctb_addr_ts;
2565         break;
2566     case NAL_EOS_NUT:
2567     case NAL_EOB_NUT:
2568         s->seq_decode = (s->seq_decode + 1) & 0xff;
2569         s->max_ra     = INT_MAX;
2570         break;
2571     case NAL_AUD:
2572     case NAL_FD_NUT:
2573         break;
2574     default:
2575         av_log(s->avctx, AV_LOG_INFO,
2576                "Skipping NAL unit %d\n", s->nal_unit_type);
2577     }
2578
2579     return 0;
2580 }
2581
2582 /* FIXME: This is adapted from ff_h264_decode_nal, avoiding duplication
2583  * between these functions would be nice. */
2584 static int extract_rbsp(const uint8_t *src, int length,
2585                         HEVCNAL *nal)
2586 {
2587     int i, si, di;
2588     uint8_t *dst;
2589
2590 #define STARTCODE_TEST                                                  \
2591         if (i + 2 < length && src[i + 1] == 0 && src[i + 2] <= 3) {     \
2592             if (src[i + 2] != 3) {                                      \
2593                 /* startcode, so we must be past the end */             \
2594                 length = i;                                             \
2595             }                                                           \
2596             break;                                                      \
2597         }
2598 #if HAVE_FAST_UNALIGNED
2599 #define FIND_FIRST_ZERO                                                 \
2600         if (i > 0 && !src[i])                                           \
2601             i--;                                                        \
2602         while (src[i])                                                  \
2603             i++
2604 #if HAVE_FAST_64BIT
2605     for (i = 0; i + 1 < length; i += 9) {
2606         if (!((~AV_RN64A(src + i) &
2607                (AV_RN64A(src + i) - 0x0100010001000101ULL)) &
2608               0x8000800080008080ULL))
2609             continue;
2610         FIND_FIRST_ZERO;
2611         STARTCODE_TEST;
2612         i -= 7;
2613     }
2614 #else
2615     for (i = 0; i + 1 < length; i += 5) {
2616         if (!((~AV_RN32A(src + i) &
2617                (AV_RN32A(src + i) - 0x01000101U)) &
2618               0x80008080U))
2619             continue;
2620         FIND_FIRST_ZERO;
2621         STARTCODE_TEST;
2622         i -= 3;
2623     }
2624 #endif /* HAVE_FAST_64BIT */
2625 #else
2626     for (i = 0; i + 1 < length; i += 2) {
2627         if (src[i])
2628             continue;
2629         if (i > 0 && src[i - 1] == 0)
2630             i--;
2631         STARTCODE_TEST;
2632     }
2633 #endif /* HAVE_FAST_UNALIGNED */
2634
2635     if (i >= length - 1) { // no escaped 0
2636         nal->data = src;
2637         nal->size = length;
2638         return length;
2639     }
2640
2641     av_fast_malloc(&nal->rbsp_buffer, &nal->rbsp_buffer_size,
2642                    length + FF_INPUT_BUFFER_PADDING_SIZE);
2643     if (!nal->rbsp_buffer)
2644         return AVERROR(ENOMEM);
2645
2646     dst = nal->rbsp_buffer;
2647
2648     memcpy(dst, src, i);
2649     si = di = i;
2650     while (si + 2 < length) {
2651         // remove escapes (very rare 1:2^22)
2652         if (src[si + 2] > 3) {
2653             dst[di++] = src[si++];
2654             dst[di++] = src[si++];
2655         } else if (src[si] == 0 && src[si + 1] == 0) {
2656             if (src[si + 2] == 3) { // escape
2657                 dst[di++] = 0;
2658                 dst[di++] = 0;
2659                 si       += 3;
2660
2661                 continue;
2662             } else // next start code
2663                 goto nsc;
2664         }
2665
2666         dst[di++] = src[si++];
2667     }
2668     while (si < length)
2669         dst[di++] = src[si++];
2670
2671 nsc:
2672     memset(dst + di, 0, FF_INPUT_BUFFER_PADDING_SIZE);
2673
2674     nal->data = dst;
2675     nal->size = di;
2676     return si;
2677 }
2678
2679 static int decode_nal_units(HEVCContext *s, const uint8_t *buf, int length)
2680 {
2681     int i, consumed, ret = 0;
2682
2683     s->ref = NULL;
2684     s->eos = 0;
2685
2686     /* split the input packet into NAL units, so we know the upper bound on the
2687      * number of slices in the frame */
2688     s->nb_nals = 0;
2689     while (length >= 4) {
2690         HEVCNAL *nal;
2691         int extract_length = 0;
2692
2693         if (s->is_nalff) {
2694             int i;
2695             for (i = 0; i < s->nal_length_size; i++)
2696                 extract_length = (extract_length << 8) | buf[i];
2697             buf    += s->nal_length_size;
2698             length -= s->nal_length_size;
2699
2700             if (extract_length > length) {
2701                 av_log(s->avctx, AV_LOG_ERROR, "Invalid NAL unit size.\n");
2702                 ret = AVERROR_INVALIDDATA;
2703                 goto fail;
2704             }
2705         } else {
2706             if (buf[2] == 0) {
2707                 length--;
2708                 buf++;
2709                 continue;
2710             }
2711             if (buf[0] != 0 || buf[1] != 0 || buf[2] != 1) {
2712                 ret = AVERROR_INVALIDDATA;
2713                 goto fail;
2714             }
2715
2716             buf           += 3;
2717             length        -= 3;
2718             extract_length = length;
2719         }
2720
2721         if (s->nals_allocated < s->nb_nals + 1) {
2722             int new_size = s->nals_allocated + 1;
2723             HEVCNAL *tmp = av_realloc_array(s->nals, new_size, sizeof(*tmp));
2724             if (!tmp) {
2725                 ret = AVERROR(ENOMEM);
2726                 goto fail;
2727             }
2728             s->nals = tmp;
2729             memset(s->nals + s->nals_allocated, 0,
2730                    (new_size - s->nals_allocated) * sizeof(*tmp));
2731             s->nals_allocated = new_size;
2732         }
2733         nal = &s->nals[s->nb_nals++];
2734
2735         consumed = extract_rbsp(buf, extract_length, nal);
2736         if (consumed < 0) {
2737             ret = consumed;
2738             goto fail;
2739         }
2740
2741         ret = init_get_bits8(&s->HEVClc.gb, nal->data, nal->size);
2742         if (ret < 0)
2743             goto fail;
2744         hls_nal_unit(s);
2745
2746         if (s->nal_unit_type == NAL_EOB_NUT ||
2747             s->nal_unit_type == NAL_EOS_NUT)
2748             s->eos = 1;
2749
2750         buf    += consumed;
2751         length -= consumed;
2752     }
2753
2754     /* parse the NAL units */
2755     for (i = 0; i < s->nb_nals; i++) {
2756         int ret = decode_nal_unit(s, s->nals[i].data, s->nals[i].size);
2757         if (ret < 0) {
2758             av_log(s->avctx, AV_LOG_WARNING,
2759                    "Error parsing NAL unit #%d.\n", i);
2760             if (s->avctx->err_recognition & AV_EF_EXPLODE)
2761                 goto fail;
2762         }
2763     }
2764
2765 fail:
2766     if (s->ref)
2767         ff_thread_report_progress(&s->ref->tf, INT_MAX, 0);
2768
2769     return ret;
2770 }
2771
2772 static void print_md5(void *log_ctx, int level, uint8_t md5[16])
2773 {
2774     int i;
2775     for (i = 0; i < 16; i++)
2776         av_log(log_ctx, level, "%02"PRIx8, md5[i]);
2777 }
2778
2779 static int verify_md5(HEVCContext *s, AVFrame *frame)
2780 {
2781     const AVPixFmtDescriptor *desc = av_pix_fmt_desc_get(frame->format);
2782     int pixel_shift;
2783     int i, j;
2784
2785     if (!desc)
2786         return AVERROR(EINVAL);
2787
2788     pixel_shift = desc->comp[0].depth_minus1 > 7;
2789
2790     av_log(s->avctx, AV_LOG_DEBUG, "Verifying checksum for frame with POC %d: ",
2791            s->poc);
2792
2793     /* the checksums are LE, so we have to byteswap for >8bpp formats
2794      * on BE arches */
2795 #if HAVE_BIGENDIAN
2796     if (pixel_shift && !s->checksum_buf) {
2797         av_fast_malloc(&s->checksum_buf, &s->checksum_buf_size,
2798                        FFMAX3(frame->linesize[0], frame->linesize[1],
2799                               frame->linesize[2]));
2800         if (!s->checksum_buf)
2801             return AVERROR(ENOMEM);
2802     }
2803 #endif
2804
2805     for (i = 0; frame->data[i]; i++) {
2806         int width  = s->avctx->coded_width;
2807         int height = s->avctx->coded_height;
2808         int w = (i == 1 || i == 2) ? (width  >> desc->log2_chroma_w) : width;
2809         int h = (i == 1 || i == 2) ? (height >> desc->log2_chroma_h) : height;
2810         uint8_t md5[16];
2811
2812         av_md5_init(s->md5_ctx);
2813         for (j = 0; j < h; j++) {
2814             const uint8_t *src = frame->data[i] + j * frame->linesize[i];
2815 #if HAVE_BIGENDIAN
2816             if (pixel_shift) {
2817                 s->dsp.bswap16_buf((uint16_t*)s->checksum_buf,
2818                                    (const uint16_t*)src, w);
2819                 src = s->checksum_buf;
2820             }
2821 #endif
2822             av_md5_update(s->md5_ctx, src, w << pixel_shift);
2823         }
2824         av_md5_final(s->md5_ctx, md5);
2825
2826         if (!memcmp(md5, s->md5[i], 16)) {
2827             av_log   (s->avctx, AV_LOG_DEBUG, "plane %d - correct ", i);
2828             print_md5(s->avctx, AV_LOG_DEBUG, md5);
2829             av_log   (s->avctx, AV_LOG_DEBUG, "; ");
2830         } else {
2831             av_log   (s->avctx, AV_LOG_ERROR, "mismatching checksum of plane %d - ", i);
2832             print_md5(s->avctx, AV_LOG_ERROR, md5);
2833             av_log   (s->avctx, AV_LOG_ERROR, " != ");
2834             print_md5(s->avctx, AV_LOG_ERROR, s->md5[i]);
2835             av_log   (s->avctx, AV_LOG_ERROR, "\n");
2836             return AVERROR_INVALIDDATA;
2837         }
2838     }
2839
2840     av_log(s->avctx, AV_LOG_DEBUG, "\n");
2841
2842     return 0;
2843 }
2844
2845 static int hevc_decode_frame(AVCodecContext *avctx, void *data, int *got_output,
2846                              AVPacket *avpkt)
2847 {
2848     int ret;
2849     HEVCContext *s = avctx->priv_data;
2850
2851     if (!avpkt->size) {
2852         ret = ff_hevc_output_frame(s, data, 1);
2853         if (ret < 0)
2854             return ret;
2855
2856         *got_output = ret;
2857         return 0;
2858     }
2859
2860     s->ref = NULL;
2861     ret    = decode_nal_units(s, avpkt->data, avpkt->size);
2862     if (ret < 0)
2863         return ret;
2864
2865     /* verify the SEI checksum */
2866     if (avctx->err_recognition & AV_EF_CRCCHECK && s->is_decoded &&
2867         s->is_md5) {
2868         ret = verify_md5(s, s->ref->frame);
2869         if (ret < 0 && avctx->err_recognition & AV_EF_EXPLODE) {
2870             ff_hevc_unref_frame(s, s->ref, ~0);
2871             return ret;
2872         }
2873     }
2874     s->is_md5 = 0;
2875
2876     if (s->is_decoded) {
2877         av_log(avctx, AV_LOG_DEBUG, "Decoded frame with POC %d.\n", s->poc);
2878         s->is_decoded = 0;
2879     }
2880
2881     if (s->output_frame->buf[0]) {
2882         av_frame_move_ref(data, s->output_frame);
2883         *got_output = 1;
2884     }
2885
2886     return avpkt->size;
2887 }
2888
2889 static int hevc_ref_frame(HEVCContext *s, HEVCFrame *dst, HEVCFrame *src)
2890 {
2891     int ret = ff_thread_ref_frame(&dst->tf, &src->tf);
2892     if (ret < 0)
2893         return ret;
2894
2895     dst->tab_mvf_buf = av_buffer_ref(src->tab_mvf_buf);
2896     if (!dst->tab_mvf_buf)
2897         goto fail;
2898     dst->tab_mvf = src->tab_mvf;
2899
2900     dst->rpl_tab_buf = av_buffer_ref(src->rpl_tab_buf);
2901     if (!dst->rpl_tab_buf)
2902         goto fail;
2903     dst->rpl_tab = src->rpl_tab;
2904
2905     dst->rpl_buf = av_buffer_ref(src->rpl_buf);
2906     if (!dst->rpl_buf)
2907         goto fail;
2908
2909     dst->poc        = src->poc;
2910     dst->ctb_count  = src->ctb_count;
2911     dst->window     = src->window;
2912     dst->flags      = src->flags;
2913     dst->sequence   = src->sequence;
2914
2915     return 0;
2916 fail:
2917     ff_hevc_unref_frame(s, dst, ~0);
2918     return AVERROR(ENOMEM);
2919 }
2920
2921 static av_cold int hevc_decode_free(AVCodecContext *avctx)
2922 {
2923     HEVCContext       *s = avctx->priv_data;
2924     HEVCLocalContext *lc = &s->HEVClc;
2925     int i;
2926
2927     pic_arrays_free(s);
2928
2929     av_freep(&lc->edge_emu_buffer);
2930     av_freep(&s->md5_ctx);
2931
2932     av_frame_free(&s->tmp_frame);
2933     av_frame_free(&s->output_frame);
2934
2935     for (i = 0; i < FF_ARRAY_ELEMS(s->DPB); i++) {
2936         ff_hevc_unref_frame(s, &s->DPB[i], ~0);
2937         av_frame_free(&s->DPB[i].frame);
2938     }
2939
2940     for (i = 0; i < FF_ARRAY_ELEMS(s->vps_list); i++)
2941         av_buffer_unref(&s->vps_list[i]);
2942     for (i = 0; i < FF_ARRAY_ELEMS(s->sps_list); i++)
2943         av_buffer_unref(&s->sps_list[i]);
2944     for (i = 0; i < FF_ARRAY_ELEMS(s->pps_list); i++)
2945         av_buffer_unref(&s->pps_list[i]);
2946
2947     for (i = 0; i < s->nals_allocated; i++)
2948         av_freep(&s->nals[i].rbsp_buffer);
2949     av_freep(&s->nals);
2950     s->nals_allocated = 0;
2951
2952     return 0;
2953 }
2954
2955 static av_cold int hevc_init_context(AVCodecContext *avctx)
2956 {
2957     HEVCContext *s = avctx->priv_data;
2958     int i;
2959
2960     s->avctx = avctx;
2961
2962     s->tmp_frame = av_frame_alloc();
2963     if (!s->tmp_frame)
2964         goto fail;
2965
2966     s->output_frame = av_frame_alloc();
2967     if (!s->output_frame)
2968         goto fail;
2969
2970     for (i = 0; i < FF_ARRAY_ELEMS(s->DPB); i++) {
2971         s->DPB[i].frame = av_frame_alloc();
2972         if (!s->DPB[i].frame)
2973             goto fail;
2974         s->DPB[i].tf.f = s->DPB[i].frame;
2975     }
2976
2977     s->max_ra = INT_MAX;
2978
2979     s->md5_ctx = av_md5_alloc();
2980     if (!s->md5_ctx)
2981         goto fail;
2982
2983     ff_dsputil_init(&s->dsp, avctx);
2984
2985     s->context_initialized = 1;
2986
2987     return 0;
2988
2989 fail:
2990     hevc_decode_free(avctx);
2991     return AVERROR(ENOMEM);
2992 }
2993
2994 static int hevc_update_thread_context(AVCodecContext *dst,
2995                                       const AVCodecContext *src)
2996 {
2997     HEVCContext *s  = dst->priv_data;
2998     HEVCContext *s0 = src->priv_data;
2999     int i, ret;
3000
3001     if (!s->context_initialized) {
3002         ret = hevc_init_context(dst);
3003         if (ret < 0)
3004             return ret;
3005     }
3006
3007     for (i = 0; i < FF_ARRAY_ELEMS(s->DPB); i++) {
3008         ff_hevc_unref_frame(s, &s->DPB[i], ~0);
3009         if (s0->DPB[i].frame->buf[0]) {
3010             ret = hevc_ref_frame(s, &s->DPB[i], &s0->DPB[i]);
3011             if (ret < 0)
3012                 return ret;
3013         }
3014     }
3015
3016     for (i = 0; i < FF_ARRAY_ELEMS(s->vps_list); i++) {
3017         av_buffer_unref(&s->vps_list[i]);
3018         if (s0->vps_list[i]) {
3019             s->vps_list[i] = av_buffer_ref(s0->vps_list[i]);
3020             if (!s->vps_list[i])
3021                 return AVERROR(ENOMEM);
3022         }
3023     }
3024
3025     for (i = 0; i < FF_ARRAY_ELEMS(s->sps_list); i++) {
3026         av_buffer_unref(&s->sps_list[i]);
3027         if (s0->sps_list[i]) {
3028             s->sps_list[i] = av_buffer_ref(s0->sps_list[i]);
3029             if (!s->sps_list[i])
3030                 return AVERROR(ENOMEM);
3031         }
3032     }
3033
3034     for (i = 0; i < FF_ARRAY_ELEMS(s->pps_list); i++) {
3035         av_buffer_unref(&s->pps_list[i]);
3036         if (s0->pps_list[i]) {
3037             s->pps_list[i] = av_buffer_ref(s0->pps_list[i]);
3038             if (!s->pps_list[i])
3039                 return AVERROR(ENOMEM);
3040         }
3041     }
3042
3043     if (s->sps != s0->sps)
3044         ret = set_sps(s, s0->sps);
3045
3046     s->seq_decode = s0->seq_decode;
3047     s->seq_output = s0->seq_output;
3048     s->pocTid0    = s0->pocTid0;
3049     s->max_ra     = s0->max_ra;
3050
3051     s->is_nalff        = s0->is_nalff;
3052     s->nal_length_size = s0->nal_length_size;
3053
3054     if (s0->eos) {
3055         s->seq_decode = (s->seq_decode + 1) & 0xff;
3056         s->max_ra = INT_MAX;
3057     }
3058
3059     return 0;
3060 }
3061
3062 static int hevc_decode_extradata(HEVCContext *s)
3063 {
3064     AVCodecContext *avctx = s->avctx;
3065     GetByteContext gb;
3066     int ret;
3067
3068     bytestream2_init(&gb, avctx->extradata, avctx->extradata_size);
3069
3070     if (avctx->extradata_size > 3 &&
3071         (avctx->extradata[0] || avctx->extradata[1] ||
3072          avctx->extradata[2] > 1)) {
3073         /* It seems the extradata is encoded as hvcC format.
3074          * Temporarily, we support configurationVersion==0 until 14496-15 3rd
3075          * is finalized. When finalized, configurationVersion will be 1 and we
3076          * can recognize hvcC by checking if avctx->extradata[0]==1 or not. */
3077         int i, j, num_arrays, nal_len_size;
3078
3079         s->is_nalff = 1;
3080
3081         bytestream2_skip(&gb, 21);
3082         nal_len_size = (bytestream2_get_byte(&gb) & 3) + 1;
3083         num_arrays   = bytestream2_get_byte(&gb);
3084
3085         /* nal units in the hvcC always have length coded with 2 bytes,
3086          * so put a fake nal_length_size = 2 while parsing them */
3087         s->nal_length_size = 2;
3088
3089         /* Decode nal units from hvcC. */
3090         for (i = 0; i < num_arrays; i++) {
3091             int type = bytestream2_get_byte(&gb) & 0x3f;
3092             int cnt  = bytestream2_get_be16(&gb);
3093
3094             for (j = 0; j < cnt; j++) {
3095                 // +2 for the nal size field
3096                 int nalsize = bytestream2_peek_be16(&gb) + 2;
3097                 if (bytestream2_get_bytes_left(&gb) < nalsize) {
3098                     av_log(s->avctx, AV_LOG_ERROR,
3099                            "Invalid NAL unit size in extradata.\n");
3100                     return AVERROR_INVALIDDATA;
3101                 }
3102
3103                 ret = decode_nal_units(s, gb.buffer, nalsize);
3104                 if (ret < 0) {
3105                     av_log(avctx, AV_LOG_ERROR,
3106                            "Decoding nal unit %d %d from hvcC failed\n",
3107                            type, i);
3108                     return ret;
3109                 }
3110                 bytestream2_skip(&gb, nalsize);
3111             }
3112         }
3113
3114         /* Now store right nal length size, that will be used to parse
3115          * all other nals */
3116         s->nal_length_size = nal_len_size;
3117     } else {
3118         s->is_nalff = 0;
3119         ret = decode_nal_units(s, avctx->extradata, avctx->extradata_size);
3120         if (ret < 0)
3121             return ret;
3122     }
3123     return 0;
3124 }
3125
3126 static av_cold int hevc_decode_init(AVCodecContext *avctx)
3127 {
3128     HEVCContext *s = avctx->priv_data;
3129     int ret;
3130
3131     ff_init_cabac_states();
3132
3133     avctx->internal->allocate_progress = 1;
3134
3135     ret = hevc_init_context(avctx);
3136     if (ret < 0)
3137         return ret;
3138
3139     if (avctx->extradata_size > 0 && avctx->extradata) {
3140         ret = hevc_decode_extradata(s);
3141         if (ret < 0) {
3142             hevc_decode_free(avctx);
3143             return ret;
3144         }
3145     }
3146
3147     return 0;
3148 }
3149
3150 static av_cold int hevc_init_thread_copy(AVCodecContext *avctx)
3151 {
3152     HEVCContext *s = avctx->priv_data;
3153     int ret;
3154
3155     memset(s, 0, sizeof(*s));
3156
3157     ret = hevc_init_context(avctx);
3158     if (ret < 0)
3159         return ret;
3160
3161     return 0;
3162 }
3163
3164 static void hevc_decode_flush(AVCodecContext *avctx)
3165 {
3166     HEVCContext *s = avctx->priv_data;
3167     ff_hevc_flush_dpb(s);
3168     s->max_ra = INT_MAX;
3169 }
3170
3171 #define OFFSET(x) offsetof(HEVCContext, x)
3172 #define PAR (AV_OPT_FLAG_DECODING_PARAM | AV_OPT_FLAG_VIDEO_PARAM)
3173 static const AVOption options[] = {
3174     { "apply_defdispwin", "Apply default display window from VUI", OFFSET(apply_defdispwin),
3175         AV_OPT_TYPE_INT, {.i64 = 0}, 0, 1, PAR },
3176     { NULL },
3177 };
3178
3179 static const AVClass hevc_decoder_class = {
3180     .class_name = "HEVC decoder",
3181     .item_name  = av_default_item_name,
3182     .option     = options,
3183     .version    = LIBAVUTIL_VERSION_INT,
3184 };
3185
3186 AVCodec ff_hevc_decoder = {
3187     .name                  = "hevc",
3188     .long_name             = NULL_IF_CONFIG_SMALL("HEVC (High Efficiency Video Coding)"),
3189     .type                  = AVMEDIA_TYPE_VIDEO,
3190     .id                    = AV_CODEC_ID_HEVC,
3191     .priv_data_size        = sizeof(HEVCContext),
3192     .priv_class            = &hevc_decoder_class,
3193     .init                  = hevc_decode_init,
3194     .close                 = hevc_decode_free,
3195     .decode                = hevc_decode_frame,
3196     .flush                 = hevc_decode_flush,
3197     .update_thread_context = hevc_update_thread_context,
3198     .init_thread_copy      = hevc_init_thread_copy,
3199     .capabilities          = CODEC_CAP_DR1 | CODEC_CAP_DELAY |
3200                              CODEC_CAP_FRAME_THREADS,
3201 };