git.sesse.net Git - ffmpeg/blob - libavcodec/hevc.c

   1 /*
   2  * HEVC video decoder
   3  *
   4  * Copyright (C) 2012 - 2013 Guillaume Martres
   5  * Copyright (C) 2012 - 2013 Mickael Raulet
   6  * Copyright (C) 2012 - 2013 Gildas Cocherel
   7  * Copyright (C) 2012 - 2013 Wassim Hamidouche
   8  *
   9  * This file is part of Libav.
  10  *
  11  * Libav is free software; you can redistribute it and/or
  12  * modify it under the terms of the GNU Lesser General Public
  13  * License as published by the Free Software Foundation; either
  14  * version 2.1 of the License, or (at your option) any later version.
  15  *
  16  * Libav is distributed in the hope that it will be useful,
  17  * but WITHOUT ANY WARRANTY; without even the implied warranty of
  18  * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
  19  * Lesser General Public License for more details.
  20  *
  21  * You should have received a copy of the GNU Lesser General Public
  22  * License along with Libav; if not, write to the Free Software
  23  * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
  24  */
  25
  26 #include "libavutil/attributes.h"
  27 #include "libavutil/common.h"
  28 #include "libavutil/internal.h"
  29 #include "libavutil/md5.h"
  30 #include "libavutil/opt.h"
  31 #include "libavutil/pixdesc.h"
  32
  33 #include "bytestream.h"
  34 #include "cabac_functions.h"
  35 #include "dsputil.h"
  36 #include "golomb.h"
  37 #include "hevc.h"
  38
  39 const uint8_t ff_hevc_qpel_extra_before[4] = { 0, 3, 3, 2 };
  40 const uint8_t ff_hevc_qpel_extra_after[4]  = { 0, 3, 4, 4 };
  41 const uint8_t ff_hevc_qpel_extra[4]        = { 0, 6, 7, 6 };
  42
  43 static const uint8_t scan_1x1[1] = { 0 };
  44
  45 static const uint8_t horiz_scan2x2_x[4] = { 0, 1, 0, 1 };
  46
  47 static const uint8_t horiz_scan2x2_y[4] = { 0, 0, 1, 1 };
  48
  49 static const uint8_t horiz_scan4x4_x[16] = {
  50     0, 1, 2, 3,
  51     0, 1, 2, 3,
  52     0, 1, 2, 3,
  53     0, 1, 2, 3,
  54 };
  55
  56 static const uint8_t horiz_scan4x4_y[16] = {
  57     0, 0, 0, 0,
  58     1, 1, 1, 1,
  59     2, 2, 2, 2,
  60     3, 3, 3, 3,
  61 };
  62
  63 static const uint8_t horiz_scan8x8_inv[8][8] = {
  64     {  0,  1,  2,  3, 16, 17, 18, 19, },
  65     {  4,  5,  6,  7, 20, 21, 22, 23, },
  66     {  8,  9, 10, 11, 24, 25, 26, 27, },
  67     { 12, 13, 14, 15, 28, 29, 30, 31, },
  68     { 32, 33, 34, 35, 48, 49, 50, 51, },
  69     { 36, 37, 38, 39, 52, 53, 54, 55, },
  70     { 40, 41, 42, 43, 56, 57, 58, 59, },
  71     { 44, 45, 46, 47, 60, 61, 62, 63, },
  72 };
  73
  74 static const uint8_t diag_scan2x2_x[4] = { 0, 0, 1, 1 };
  75
  76 static const uint8_t diag_scan2x2_y[4] = { 0, 1, 0, 1 };
  77
  78 static const uint8_t diag_scan2x2_inv[2][2] = {
  79     { 0, 2, },
  80     { 1, 3, },
  81 };
  82
  83 const uint8_t ff_hevc_diag_scan4x4_x[16] = {
  84     0, 0, 1, 0,
  85     1, 2, 0, 1,
  86     2, 3, 1, 2,
  87     3, 2, 3, 3,
  88 };
  89
  90 const uint8_t ff_hevc_diag_scan4x4_y[16] = {
  91     0, 1, 0, 2,
  92     1, 0, 3, 2,
  93     1, 0, 3, 2,
  94     1, 3, 2, 3,
  95 };
  96
  97 static const uint8_t diag_scan4x4_inv[4][4] = {
  98     { 0,  2,  5,  9, },
  99     { 1,  4,  8, 12, },
 100     { 3,  7, 11, 14, },
 101     { 6, 10, 13, 15, },
 102 };
 103
 104 const uint8_t ff_hevc_diag_scan8x8_x[64] = {
 105     0, 0, 1, 0,
 106     1, 2, 0, 1,
 107     2, 3, 0, 1,
 108     2, 3, 4, 0,
 109     1, 2, 3, 4,
 110     5, 0, 1, 2,
 111     3, 4, 5, 6,
 112     0, 1, 2, 3,
 113     4, 5, 6, 7,
 114     1, 2, 3, 4,
 115     5, 6, 7, 2,
 116     3, 4, 5, 6,
 117     7, 3, 4, 5,
 118     6, 7, 4, 5,
 119     6, 7, 5, 6,
 120     7, 6, 7, 7,
 121 };
 122
 123 const uint8_t ff_hevc_diag_scan8x8_y[64] = {
 124     0, 1, 0, 2,
 125     1, 0, 3, 2,
 126     1, 0, 4, 3,
 127     2, 1, 0, 5,
 128     4, 3, 2, 1,
 129     0, 6, 5, 4,
 130     3, 2, 1, 0,
 131     7, 6, 5, 4,
 132     3, 2, 1, 0,
 133     7, 6, 5, 4,
 134     3, 2, 1, 7,
 135     6, 5, 4, 3,
 136     2, 7, 6, 5,
 137     4, 3, 7, 6,
 138     5, 4, 7, 6,
 139     5, 7, 6, 7,
 140 };
 141
 142 static const uint8_t diag_scan8x8_inv[8][8] = {
 143     {  0,  2,  5,  9, 14, 20, 27, 35, },
 144     {  1,  4,  8, 13, 19, 26, 34, 42, },
 145     {  3,  7, 12, 18, 25, 33, 41, 48, },
 146     {  6, 11, 17, 24, 32, 40, 47, 53, },
 147     { 10, 16, 23, 31, 39, 46, 52, 57, },
 148     { 15, 22, 30, 38, 45, 51, 56, 60, },
 149     { 21, 29, 37, 44, 50, 55, 59, 62, },
 150     { 28, 36, 43, 49, 54, 58, 61, 63, },
 151 };
 152
 153 /**
 154  * NOTE: Each function hls_foo correspond to the function foo in the
 155  * specification (HLS stands for High Level Syntax).
 156  */
 157
 158 /**
 159  * Section 5.7
 160  */
 161
 162 /* free everything allocated  by pic_arrays_init() */
 163 static void pic_arrays_free(HEVCContext *s)
 164 {
 165     av_freep(&s->sao);
 166     av_freep(&s->deblock);
 167     av_freep(&s->split_cu_flag);
 168
 169     av_freep(&s->skip_flag);
 170     av_freep(&s->tab_ct_depth);
 171
 172     av_freep(&s->tab_ipm);
 173     av_freep(&s->cbf_luma);
 174     av_freep(&s->is_pcm);
 175
 176     av_freep(&s->qp_y_tab);
 177     av_freep(&s->tab_slice_address);
 178     av_freep(&s->filter_slice_edges);
 179
 180     av_freep(&s->horizontal_bs);
 181     av_freep(&s->vertical_bs);
 182
 183     av_buffer_pool_uninit(&s->tab_mvf_pool);
 184     av_buffer_pool_uninit(&s->rpl_tab_pool);
 185 }
 186
 187 /* allocate arrays that depend on frame dimensions */
 188 static int pic_arrays_init(HEVCContext *s, const HEVCSPS *sps)
 189 {
 190     int log2_min_cb_size = sps->log2_min_cb_size;
 191     int width            = sps->width;
 192     int height           = sps->height;
 193     int pic_size         = width * height;
 194     int pic_size_in_ctb  = ((width  >> log2_min_cb_size) + 1) *
 195                            ((height >> log2_min_cb_size) + 1);
 196     int ctb_count        = sps->ctb_width * sps->ctb_height;
 197     int min_pu_size      = sps->min_pu_width * sps->min_pu_height;
 198
 199     s->bs_width  = width  >> 3;
 200     s->bs_height = height >> 3;
 201
 202     s->sao           = av_mallocz_array(ctb_count, sizeof(*s->sao));
 203     s->deblock       = av_mallocz_array(ctb_count, sizeof(*s->deblock));
 204     s->split_cu_flag = av_malloc(pic_size);
 205     if (!s->sao || !s->deblock || !s->split_cu_flag)
 206         goto fail;
 207
 208     s->skip_flag    = av_malloc(pic_size_in_ctb);
 209     s->tab_ct_depth = av_malloc(sps->min_cb_height * sps->min_cb_width);
 210     if (!s->skip_flag || !s->tab_ct_depth)
 211         goto fail;
 212
 213     s->cbf_luma = av_malloc(sps->min_tb_width * sps->min_tb_height);
 214     s->tab_ipm  = av_malloc(min_pu_size);
 215     s->is_pcm   = av_malloc(min_pu_size);
 216     if (!s->tab_ipm || !s->cbf_luma || !s->is_pcm)
 217         goto fail;
 218
 219     s->filter_slice_edges = av_malloc(ctb_count);
 220     s->tab_slice_address  = av_malloc(pic_size_in_ctb *
 221                                       sizeof(*s->tab_slice_address));
 222     s->qp_y_tab           = av_malloc(pic_size_in_ctb *
 223                                       sizeof(*s->qp_y_tab));
 224     if (!s->qp_y_tab || !s->filter_slice_edges || !s->tab_slice_address)
 225         goto fail;
 226
 227     s->horizontal_bs = av_mallocz(2 * s->bs_width * (s->bs_height + 1));
 228     s->vertical_bs   = av_mallocz(2 * s->bs_width * (s->bs_height + 1));
 229     if (!s->horizontal_bs || !s->vertical_bs)
 230         goto fail;
 231
 232     s->tab_mvf_pool = av_buffer_pool_init(min_pu_size * sizeof(MvField),
 233                                           av_buffer_alloc);
 234     s->rpl_tab_pool = av_buffer_pool_init(ctb_count * sizeof(RefPicListTab),
 235                                           av_buffer_allocz);
 236     if (!s->tab_mvf_pool || !s->rpl_tab_pool)
 237         goto fail;
 238
 239     return 0;
 240
 241 fail:
 242     pic_arrays_free(s);
 243     return AVERROR(ENOMEM);
 244 }
 245
 246 static void pred_weight_table(HEVCContext *s, GetBitContext *gb)
 247 {
 248     int i = 0;
 249     int j = 0;
 250     uint8_t luma_weight_l0_flag[16];
 251     uint8_t chroma_weight_l0_flag[16];
 252     uint8_t luma_weight_l1_flag[16];
 253     uint8_t chroma_weight_l1_flag[16];
 254
 255     s->sh.luma_log2_weight_denom = get_ue_golomb_long(gb);
 256     if (s->sps->chroma_format_idc != 0) {
 257         int delta = get_se_golomb(gb);
 258         s->sh.chroma_log2_weight_denom = av_clip_c(s->sh.luma_log2_weight_denom + delta, 0, 7);
 259     }
 260
 261     for (i = 0; i < s->sh.nb_refs[L0]; i++) {
 262         luma_weight_l0_flag[i] = get_bits1(gb);
 263         if (!luma_weight_l0_flag[i]) {
 264             s->sh.luma_weight_l0[i] = 1 << s->sh.luma_log2_weight_denom;
 265             s->sh.luma_offset_l0[i] = 0;
 266         }
 267     }
 268     if (s->sps->chroma_format_idc != 0) { // FIXME: invert "if" and "for"
 269         for (i = 0; i < s->sh.nb_refs[L0]; i++)
 270             chroma_weight_l0_flag[i] = get_bits1(gb);
 271     } else {
 272         for (i = 0; i < s->sh.nb_refs[L0]; i++)
 273             chroma_weight_l0_flag[i] = 0;
 274     }
 275     for (i = 0; i < s->sh.nb_refs[L0]; i++) {
 276         if (luma_weight_l0_flag[i]) {
 277             int delta_luma_weight_l0 = get_se_golomb(gb);
 278             s->sh.luma_weight_l0[i] = (1 << s->sh.luma_log2_weight_denom) + delta_luma_weight_l0;
 279             s->sh.luma_offset_l0[i] = get_se_golomb(gb);
 280         }
 281         if (chroma_weight_l0_flag[i]) {
 282             for (j = 0; j < 2; j++) {
 283                 int delta_chroma_weight_l0 = get_se_golomb(gb);
 284                 int delta_chroma_offset_l0 = get_se_golomb(gb);
 285                 s->sh.chroma_weight_l0[i][j] = (1 << s->sh.chroma_log2_weight_denom) + delta_chroma_weight_l0;
 286                 s->sh.chroma_offset_l0[i][j] = av_clip_c((delta_chroma_offset_l0 - ((128 * s->sh.chroma_weight_l0[i][j])
 287                                                                                     >> s->sh.chroma_log2_weight_denom) + 128), -128, 127);
 288             }
 289         } else {
 290             s->sh.chroma_weight_l0[i][0] = 1 << s->sh.chroma_log2_weight_denom;
 291             s->sh.chroma_offset_l0[i][0] = 0;
 292             s->sh.chroma_weight_l0[i][1] = 1 << s->sh.chroma_log2_weight_denom;
 293             s->sh.chroma_offset_l0[i][1] = 0;
 294         }
 295     }
 296     if (s->sh.slice_type == B_SLICE) {
 297         for (i = 0; i < s->sh.nb_refs[L1]; i++) {
 298             luma_weight_l1_flag[i] = get_bits1(gb);
 299             if (!luma_weight_l1_flag[i]) {
 300                 s->sh.luma_weight_l1[i] = 1 << s->sh.luma_log2_weight_denom;
 301                 s->sh.luma_offset_l1[i] = 0;
 302             }
 303         }
 304         if (s->sps->chroma_format_idc != 0) {
 305             for (i = 0; i < s->sh.nb_refs[L1]; i++)
 306                 chroma_weight_l1_flag[i] = get_bits1(gb);
 307         } else {
 308             for (i = 0; i < s->sh.nb_refs[L1]; i++)
 309                 chroma_weight_l1_flag[i] = 0;
 310         }
 311         for (i = 0; i < s->sh.nb_refs[L1]; i++) {
 312             if (luma_weight_l1_flag[i]) {
 313                 int delta_luma_weight_l1 = get_se_golomb(gb);
 314                 s->sh.luma_weight_l1[i] = (1 << s->sh.luma_log2_weight_denom) + delta_luma_weight_l1;
 315                 s->sh.luma_offset_l1[i] = get_se_golomb(gb);
 316             }
 317             if (chroma_weight_l1_flag[i]) {
 318                 for (j = 0; j < 2; j++) {
 319                     int delta_chroma_weight_l1 = get_se_golomb(gb);
 320                     int delta_chroma_offset_l1 = get_se_golomb(gb);
 321                     s->sh.chroma_weight_l1[i][j] = (1 << s->sh.chroma_log2_weight_denom) + delta_chroma_weight_l1;
 322                     s->sh.chroma_offset_l1[i][j] = av_clip_c((delta_chroma_offset_l1 - ((128 * s->sh.chroma_weight_l1[i][j])
 323                                                                                         >> s->sh.chroma_log2_weight_denom) + 128), -128, 127);
 324                 }
 325             } else {
 326                 s->sh.chroma_weight_l1[i][0] = 1 << s->sh.chroma_log2_weight_denom;
 327                 s->sh.chroma_offset_l1[i][0] = 0;
 328                 s->sh.chroma_weight_l1[i][1] = 1 << s->sh.chroma_log2_weight_denom;
 329                 s->sh.chroma_offset_l1[i][1] = 0;
 330             }
 331         }
 332     }
 333 }
 334
 335 static int decode_lt_rps(HEVCContext *s, LongTermRPS *rps, GetBitContext *gb)
 336 {
 337     const HEVCSPS *sps = s->sps;
 338     int max_poc_lsb    = 1 << sps->log2_max_poc_lsb;
 339     int prev_delta_msb = 0;
 340     int nb_sps = 0, nb_sh;
 341     int i;
 342
 343     rps->nb_refs = 0;
 344     if (!sps->long_term_ref_pics_present_flag)
 345         return 0;
 346
 347     if (sps->num_long_term_ref_pics_sps > 0)
 348         nb_sps = get_ue_golomb_long(gb);
 349     nb_sh = get_ue_golomb_long(gb);
 350
 351     if (nb_sh + nb_sps > FF_ARRAY_ELEMS(rps->poc))
 352         return AVERROR_INVALIDDATA;
 353
 354     rps->nb_refs = nb_sh + nb_sps;
 355
 356     for (i = 0; i < rps->nb_refs; i++) {
 357         uint8_t delta_poc_msb_present;
 358
 359         if (i < nb_sps) {
 360             uint8_t lt_idx_sps = 0;
 361
 362             if (sps->num_long_term_ref_pics_sps > 1)
 363                 lt_idx_sps = get_bits(gb, av_ceil_log2(sps->num_long_term_ref_pics_sps));
 364
 365             rps->poc[i]  = sps->lt_ref_pic_poc_lsb_sps[lt_idx_sps];
 366             rps->used[i] = sps->used_by_curr_pic_lt_sps_flag[lt_idx_sps];
 367         } else {
 368             rps->poc[i]  = get_bits(gb, sps->log2_max_poc_lsb);
 369             rps->used[i] = get_bits1(gb);
 370         }
 371
 372         delta_poc_msb_present = get_bits1(gb);
 373         if (delta_poc_msb_present) {
 374             int delta = get_ue_golomb_long(gb);
 375
 376             if (i && i != nb_sps)
 377                 delta += prev_delta_msb;
 378
 379             rps->poc[i] += s->poc - delta * max_poc_lsb - s->sh.pic_order_cnt_lsb;
 380             prev_delta_msb = delta;
 381         }
 382     }
 383
 384     return 0;
 385 }
 386
 387 static int set_sps(HEVCContext *s, const HEVCSPS *sps)
 388 {
 389     int ret;
 390
 391     pic_arrays_free(s);
 392     ret = pic_arrays_init(s, sps);
 393     if (ret < 0)
 394         goto fail;
 395
 396     s->avctx->coded_width         = sps->width;
 397     s->avctx->coded_height        = sps->height;
 398     s->avctx->width               = sps->output_width;
 399     s->avctx->height              = sps->output_height;
 400     s->avctx->pix_fmt             = sps->pix_fmt;
 401     s->avctx->sample_aspect_ratio = sps->vui.sar;
 402     s->avctx->has_b_frames        = sps->temporal_layer[sps->max_sub_layers - 1].num_reorder_pics;
 403
 404     if (sps->vui.video_signal_type_present_flag)
 405         s->avctx->color_range = sps->vui.video_full_range_flag ? AVCOL_RANGE_JPEG
 406                                                                : AVCOL_RANGE_MPEG;
 407     else
 408         s->avctx->color_range = AVCOL_RANGE_MPEG;
 409
 410     if (sps->vui.colour_description_present_flag) {
 411         s->avctx->color_primaries = sps->vui.colour_primaries;
 412         s->avctx->color_trc       = sps->vui.transfer_characteristic;
 413         s->avctx->colorspace      = sps->vui.matrix_coeffs;
 414     } else {
 415         s->avctx->color_primaries = AVCOL_PRI_UNSPECIFIED;
 416         s->avctx->color_trc       = AVCOL_TRC_UNSPECIFIED;
 417         s->avctx->colorspace      = AVCOL_SPC_UNSPECIFIED;
 418     }
 419
 420     ff_hevc_pred_init(&s->hpc,     sps->bit_depth);
 421     ff_hevc_dsp_init (&s->hevcdsp, sps->bit_depth);
 422     ff_videodsp_init (&s->vdsp,    sps->bit_depth);
 423
 424     if (sps->sao_enabled) {
 425         av_frame_unref(s->tmp_frame);
 426         ret = ff_get_buffer(s->avctx, s->tmp_frame, AV_GET_BUFFER_FLAG_REF);
 427         if (ret < 0)
 428             goto fail;
 429         s->frame = s->tmp_frame;
 430     }
 431
 432     s->sps = sps;
 433     s->vps = s->vps_list[s->sps->vps_id];
 434     return 0;
 435
 436 fail:
 437     pic_arrays_free(s);
 438     s->sps = NULL;
 439     return ret;
 440 }
 441
 442 static int hls_slice_header(HEVCContext *s)
 443 {
 444     GetBitContext *gb = &s->HEVClc.gb;
 445     SliceHeader *sh   = &s->sh;
 446     int i, ret;
 447
 448     // Coded parameters
 449     sh->first_slice_in_pic_flag = get_bits1(gb);
 450     if ((IS_IDR(s) || IS_BLA(s)) && sh->first_slice_in_pic_flag) {
 451         s->seq_decode = (s->seq_decode + 1) & 0xff;
 452         s->max_ra     = INT_MAX;
 453         if (IS_IDR(s))
 454             ff_hevc_clear_refs(s);
 455     }
 456     if (s->nal_unit_type >= 16 && s->nal_unit_type <= 23)
 457         sh->no_output_of_prior_pics_flag = get_bits1(gb);
 458
 459     sh->pps_id = get_ue_golomb_long(gb);
 460     if (sh->pps_id >= MAX_PPS_COUNT || !s->pps_list[sh->pps_id]) {
 461         av_log(s->avctx, AV_LOG_ERROR, "PPS id out of range: %d\n", sh->pps_id);
 462         return AVERROR_INVALIDDATA;
 463     }
 464     if (!sh->first_slice_in_pic_flag &&
 465         s->pps != (HEVCPPS*)s->pps_list[sh->pps_id]->data) {
 466         av_log(s->avctx, AV_LOG_ERROR, "PPS changed between slices.\n");
 467         return AVERROR_INVALIDDATA;
 468     }
 469     s->pps = (HEVCPPS*)s->pps_list[sh->pps_id]->data;
 470
 471     if (s->sps != (HEVCSPS*)s->sps_list[s->pps->sps_id]->data) {
 472         s->sps = (HEVCSPS*)s->sps_list[s->pps->sps_id]->data;
 473
 474         ff_hevc_clear_refs(s);
 475         ret = set_sps(s, s->sps);
 476         if (ret < 0)
 477             return ret;
 478
 479         s->seq_decode = (s->seq_decode + 1) & 0xff;
 480         s->max_ra     = INT_MAX;
 481     }
 482
 483     sh->dependent_slice_segment_flag = 0;
 484     if (!sh->first_slice_in_pic_flag) {
 485         int slice_address_length;
 486
 487         if (s->pps->dependent_slice_segments_enabled_flag)
 488             sh->dependent_slice_segment_flag = get_bits1(gb);
 489
 490         slice_address_length = av_ceil_log2(s->sps->ctb_width *
 491                                             s->sps->ctb_height);
 492         sh->slice_segment_addr = get_bits(gb, slice_address_length);
 493         if (sh->slice_segment_addr >= s->sps->ctb_width * s->sps->ctb_height) {
 494             av_log(s->avctx, AV_LOG_ERROR,
 495                    "Invalid slice segment address: %u.\n",
 496                    sh->slice_segment_addr);
 497             return AVERROR_INVALIDDATA;
 498         }
 499
 500         if (!sh->dependent_slice_segment_flag) {
 501             sh->slice_addr = sh->slice_segment_addr;
 502             s->slice_idx++;
 503         }
 504     } else {
 505         sh->slice_segment_addr = sh->slice_addr = 0;
 506         s->slice_idx           = 0;
 507         s->slice_initialized   = 0;
 508     }
 509
 510     if (!sh->dependent_slice_segment_flag) {
 511         s->slice_initialized = 0;
 512
 513         for (i = 0; i < s->pps->num_extra_slice_header_bits; i++)
 514             skip_bits(gb, 1);  // slice_reserved_undetermined_flag[]
 515
 516         sh->slice_type = get_ue_golomb_long(gb);
 517         if (!(sh->slice_type == I_SLICE ||
 518               sh->slice_type == P_SLICE ||
 519               sh->slice_type == B_SLICE)) {
 520             av_log(s->avctx, AV_LOG_ERROR, "Unknown slice type: %d.\n",
 521                    sh->slice_type);
 522             return AVERROR_INVALIDDATA;
 523         }
 524         if (IS_IRAP(s) && sh->slice_type != I_SLICE) {
 525             av_log(s->avctx, AV_LOG_ERROR, "Inter slices in an IRAP frame.\n");
 526             return AVERROR_INVALIDDATA;
 527         }
 528
 529         if (s->pps->output_flag_present_flag)
 530             sh->pic_output_flag = get_bits1(gb);
 531
 532         if (s->sps->separate_colour_plane_flag)
 533             sh->colour_plane_id = get_bits(gb, 2);
 534
 535         if (!IS_IDR(s)) {
 536             int short_term_ref_pic_set_sps_flag, poc;
 537
 538             sh->pic_order_cnt_lsb = get_bits(gb, s->sps->log2_max_poc_lsb);
 539             poc = ff_hevc_compute_poc(s, sh->pic_order_cnt_lsb);
 540             if (!sh->first_slice_in_pic_flag && poc != s->poc) {
 541                 av_log(s->avctx, AV_LOG_WARNING,
 542                        "Ignoring POC change between slices: %d -> %d\n", s->poc, poc);
 543                 if (s->avctx->err_recognition & AV_EF_EXPLODE)
 544                     return AVERROR_INVALIDDATA;
 545                 poc = s->poc;
 546             }
 547             s->poc = poc;
 548
 549             short_term_ref_pic_set_sps_flag = get_bits1(gb);
 550             if (!short_term_ref_pic_set_sps_flag) {
 551                 ret = ff_hevc_decode_short_term_rps(s, &sh->slice_rps, s->sps, 1);
 552                 if (ret < 0)
 553                     return ret;
 554
 555                 sh->short_term_rps = &sh->slice_rps;
 556             } else {
 557                 int numbits, rps_idx;
 558
 559                 if (!s->sps->nb_st_rps) {
 560                     av_log(s->avctx, AV_LOG_ERROR, "No ref lists in the SPS.\n");
 561                     return AVERROR_INVALIDDATA;
 562                 }
 563
 564                 numbits = av_ceil_log2(s->sps->nb_st_rps);
 565                 rps_idx = numbits > 0 ? get_bits(gb, numbits) : 0;
 566                 sh->short_term_rps = &s->sps->st_rps[rps_idx];
 567             }
 568
 569             ret = decode_lt_rps(s, &sh->long_term_rps, gb);
 570             if (ret < 0) {
 571                 av_log(s->avctx, AV_LOG_WARNING, "Invalid long term RPS.\n");
 572                 if (s->avctx->err_recognition & AV_EF_EXPLODE)
 573                     return AVERROR_INVALIDDATA;
 574             }
 575
 576             if (s->sps->sps_temporal_mvp_enabled_flag)
 577                 sh->slice_temporal_mvp_enabled_flag = get_bits1(gb);
 578             else
 579                 sh->slice_temporal_mvp_enabled_flag = 0;
 580         } else {
 581             s->sh.short_term_rps = NULL;
 582             s->poc               = 0;
 583         }
 584
 585         /* 8.3.1 */
 586         if (s->temporal_id == 0 &&
 587             s->nal_unit_type != NAL_TRAIL_N &&
 588             s->nal_unit_type != NAL_TSA_N   &&
 589             s->nal_unit_type != NAL_STSA_N  &&
 590             s->nal_unit_type != NAL_RADL_N  &&
 591             s->nal_unit_type != NAL_RADL_R  &&
 592             s->nal_unit_type != NAL_RASL_N  &&
 593             s->nal_unit_type != NAL_RASL_R)
 594             s->pocTid0 = s->poc;
 595
 596         if (s->sps->sao_enabled) {
 597             sh->slice_sample_adaptive_offset_flag[0] = get_bits1(gb);
 598             sh->slice_sample_adaptive_offset_flag[1] =
 599             sh->slice_sample_adaptive_offset_flag[2] = get_bits1(gb);
 600         } else {
 601             sh->slice_sample_adaptive_offset_flag[0] = 0;
 602             sh->slice_sample_adaptive_offset_flag[1] = 0;
 603             sh->slice_sample_adaptive_offset_flag[2] = 0;
 604         }
 605
 606         sh->nb_refs[L0] = sh->nb_refs[L1] = 0;
 607         if (sh->slice_type == P_SLICE || sh->slice_type == B_SLICE) {
 608             int nb_refs;
 609
 610             sh->nb_refs[L0] = s->pps->num_ref_idx_l0_default_active;
 611             if (sh->slice_type == B_SLICE)
 612                 sh->nb_refs[L1] = s->pps->num_ref_idx_l1_default_active;
 613
 614             if (get_bits1(gb)) { // num_ref_idx_active_override_flag
 615                 sh->nb_refs[L0] = get_ue_golomb_long(gb) + 1;
 616                 if (sh->slice_type == B_SLICE)
 617                     sh->nb_refs[L1] = get_ue_golomb_long(gb) + 1;
 618             }
 619             if (sh->nb_refs[L0] > MAX_REFS || sh->nb_refs[L1] > MAX_REFS) {
 620                 av_log(s->avctx, AV_LOG_ERROR, "Too many refs: %d/%d.\n",
 621                        sh->nb_refs[L0], sh->nb_refs[L1]);
 622                 return AVERROR_INVALIDDATA;
 623             }
 624
 625             sh->rpl_modification_flag[0] = 0;
 626             sh->rpl_modification_flag[1] = 0;
 627             nb_refs = ff_hevc_frame_nb_refs(s);
 628             if (!nb_refs) {
 629                 av_log(s->avctx, AV_LOG_ERROR, "Zero refs for a frame with P or B slices.\n");
 630                 return AVERROR_INVALIDDATA;
 631             }
 632
 633             if (s->pps->lists_modification_present_flag && nb_refs > 1) {
 634                 sh->rpl_modification_flag[0] = get_bits1(gb);
 635                 if (sh->rpl_modification_flag[0]) {
 636                     for (i = 0; i < sh->nb_refs[L0]; i++)
 637                         sh->list_entry_lx[0][i] = get_bits(gb, av_ceil_log2(nb_refs));
 638                 }
 639
 640                 if (sh->slice_type == B_SLICE) {
 641                     sh->rpl_modification_flag[1] = get_bits1(gb);
 642                     if (sh->rpl_modification_flag[1] == 1)
 643                         for (i = 0; i < sh->nb_refs[L1]; i++)
 644                             sh->list_entry_lx[1][i] = get_bits(gb, av_ceil_log2(nb_refs));
 645                 }
 646             }
 647
 648             if (sh->slice_type == B_SLICE)
 649                 sh->mvd_l1_zero_flag = get_bits1(gb);
 650
 651             if (s->pps->cabac_init_present_flag)
 652                 sh->cabac_init_flag = get_bits1(gb);
 653             else
 654                 sh->cabac_init_flag = 0;
 655
 656             sh->collocated_ref_idx = 0;
 657             if (sh->slice_temporal_mvp_enabled_flag) {
 658                 sh->collocated_list = L0;
 659                 if (sh->slice_type == B_SLICE)
 660                     sh->collocated_list = !get_bits1(gb);
 661
 662                 if (sh->nb_refs[sh->collocated_list] > 1) {
 663                     sh->collocated_ref_idx = get_ue_golomb_long(gb);
 664                     if (sh->collocated_ref_idx >= sh->nb_refs[sh->collocated_list]) {
 665                         av_log(s->avctx, AV_LOG_ERROR,
 666                                "Invalid collocated_ref_idx: %d.\n",
 667                                sh->collocated_ref_idx);
 668                         return AVERROR_INVALIDDATA;
 669                     }
 670                 }
 671             }
 672
 673             if ((s->pps->weighted_pred_flag   && sh->slice_type == P_SLICE) ||
 674                 (s->pps->weighted_bipred_flag && sh->slice_type == B_SLICE)) {
 675                 pred_weight_table(s, gb);
 676             }
 677
 678             sh->max_num_merge_cand = 5 - get_ue_golomb_long(gb);
 679             if (sh->max_num_merge_cand < 1 || sh->max_num_merge_cand > 5) {
 680                 av_log(s->avctx, AV_LOG_ERROR,
 681                        "Invalid number of merging MVP candidates: %d.\n",
 682                        sh->max_num_merge_cand);
 683                 return AVERROR_INVALIDDATA;
 684             }
 685         }
 686
 687         sh->slice_qp_delta = get_se_golomb(gb);
 688         if (s->pps->pic_slice_level_chroma_qp_offsets_present_flag) {
 689             sh->slice_cb_qp_offset = get_se_golomb(gb);
 690             sh->slice_cr_qp_offset = get_se_golomb(gb);
 691         } else {
 692             sh->slice_cb_qp_offset = 0;
 693             sh->slice_cr_qp_offset = 0;
 694         }
 695
 696         if (s->pps->deblocking_filter_control_present_flag) {
 697             int deblocking_filter_override_flag = 0;
 698
 699             if (s->pps->deblocking_filter_override_enabled_flag)
 700                 deblocking_filter_override_flag = get_bits1(gb);
 701
 702             if (deblocking_filter_override_flag) {
 703                 sh->disable_deblocking_filter_flag = get_bits1(gb);
 704                 if (!sh->disable_deblocking_filter_flag) {
 705                     sh->beta_offset = get_se_golomb(gb) * 2;
 706                     sh->tc_offset   = get_se_golomb(gb) * 2;
 707                 }
 708             } else {
 709                 sh->disable_deblocking_filter_flag = s->pps->disable_dbf;
 710                 sh->beta_offset                    = s->pps->beta_offset;
 711                 sh->tc_offset                      = s->pps->tc_offset;
 712             }
 713         } else {
 714             sh->disable_deblocking_filter_flag = 0;
 715             sh->beta_offset                    = 0;
 716             sh->tc_offset                      = 0;
 717         }
 718
 719         if (s->pps->seq_loop_filter_across_slices_enabled_flag &&
 720             (sh->slice_sample_adaptive_offset_flag[0] ||
 721              sh->slice_sample_adaptive_offset_flag[1] ||
 722              !sh->disable_deblocking_filter_flag)) {
 723             sh->slice_loop_filter_across_slices_enabled_flag = get_bits1(gb);
 724         } else {
 725             sh->slice_loop_filter_across_slices_enabled_flag = s->pps->seq_loop_filter_across_slices_enabled_flag;
 726         }
 727     } else if (!s->slice_initialized) {
 728         av_log(s->avctx, AV_LOG_ERROR, "Independent slice segment missing.\n");
 729         return AVERROR_INVALIDDATA;
 730     }
 731
 732     sh->num_entry_point_offsets = 0;
 733     if (s->pps->tiles_enabled_flag || s->pps->entropy_coding_sync_enabled_flag) {
 734         sh->num_entry_point_offsets = get_ue_golomb_long(gb);
 735         if (sh->num_entry_point_offsets > 0) {
 736             int offset_len = get_ue_golomb_long(gb) + 1;
 737
 738             for (i = 0; i < sh->num_entry_point_offsets; i++)
 739                 skip_bits(gb, offset_len);
 740         }
 741     }
 742
 743     if (s->pps->slice_header_extension_present_flag) {
 744         int length = get_ue_golomb_long(gb);
 745         for (i = 0; i < length; i++)
 746             skip_bits(gb, 8);  // slice_header_extension_data_byte
 747     }
 748
 749     // Inferred parameters
 750     sh->slice_qp          = 26 + s->pps->pic_init_qp_minus26 + sh->slice_qp_delta;
 751     sh->slice_ctb_addr_rs = sh->slice_segment_addr;
 752
 753     s->HEVClc.first_qp_group = !s->sh.dependent_slice_segment_flag;
 754
 755     if (!s->pps->cu_qp_delta_enabled_flag)
 756         s->HEVClc.qp_y = ((s->sh.slice_qp + 52 + 2 * s->sps->qp_bd_offset) %
 757                           (52 + s->sps->qp_bd_offset)) - s->sps->qp_bd_offset;
 758
 759     s->slice_initialized = 1;
 760
 761     return 0;
 762 }
 763
 764 #define CTB(tab, x, y) ((tab)[(y) * s->sps->ctb_width + (x)])
 765
 766 #define SET_SAO(elem, value)                            \
 767 do {                                                    \
 768     if (!sao_merge_up_flag && !sao_merge_left_flag)     \
 769         sao->elem = value;                              \
 770     else if (sao_merge_left_flag)                       \
 771         sao->elem = CTB(s->sao, rx-1, ry).elem;         \
 772     else if (sao_merge_up_flag)                         \
 773         sao->elem = CTB(s->sao, rx, ry-1).elem;         \
 774     else                                                \
 775         sao->elem = 0;                                  \
 776 } while (0)
 777
 778 static void hls_sao_param(HEVCContext *s, int rx, int ry)
 779 {
 780     HEVCLocalContext *lc    = &s->HEVClc;
 781     int sao_merge_left_flag = 0;
 782     int sao_merge_up_flag   = 0;
 783     int shift               = s->sps->bit_depth - FFMIN(s->sps->bit_depth, 10);
 784     SAOParams *sao          = &CTB(s->sao, rx, ry);
 785     int c_idx, i;
 786
 787     if (s->sh.slice_sample_adaptive_offset_flag[0] ||
 788         s->sh.slice_sample_adaptive_offset_flag[1]) {
 789         if (rx > 0) {
 790             if (lc->ctb_left_flag)
 791                 sao_merge_left_flag = ff_hevc_sao_merge_flag_decode(s);
 792         }
 793         if (ry > 0 && !sao_merge_left_flag) {
 794             if (lc->ctb_up_flag)
 795                 sao_merge_up_flag = ff_hevc_sao_merge_flag_decode(s);
 796         }
 797     }
 798
 799     for (c_idx = 0; c_idx < 3; c_idx++) {
 800         if (!s->sh.slice_sample_adaptive_offset_flag[c_idx]) {
 801             sao->type_idx[c_idx] = SAO_NOT_APPLIED;
 802             continue;
 803         }
 804
 805         if (c_idx == 2) {
 806             sao->type_idx[2] = sao->type_idx[1];
 807             sao->eo_class[2] = sao->eo_class[1];
 808         } else {
 809             SET_SAO(type_idx[c_idx], ff_hevc_sao_type_idx_decode(s));
 810         }
 811
 812         if (sao->type_idx[c_idx] == SAO_NOT_APPLIED)
 813             continue;
 814
 815         for (i = 0; i < 4; i++)
 816             SET_SAO(offset_abs[c_idx][i], ff_hevc_sao_offset_abs_decode(s));
 817
 818         if (sao->type_idx[c_idx] == SAO_BAND) {
 819             for (i = 0; i < 4; i++) {
 820                 if (sao->offset_abs[c_idx][i]) {
 821                     SET_SAO(offset_sign[c_idx][i],
 822                             ff_hevc_sao_offset_sign_decode(s));
 823                 } else {
 824                     sao->offset_sign[c_idx][i] = 0;
 825                 }
 826             }
 827             SET_SAO(band_position[c_idx], ff_hevc_sao_band_position_decode(s));
 828         } else if (c_idx != 2) {
 829             SET_SAO(eo_class[c_idx], ff_hevc_sao_eo_class_decode(s));
 830         }
 831
 832         // Inferred parameters
 833         sao->offset_val[c_idx][0] = 0;
 834         for (i = 0; i < 4; i++) {
 835             sao->offset_val[c_idx][i + 1] = sao->offset_abs[c_idx][i] << shift;
 836             if (sao->type_idx[c_idx] == SAO_EDGE) {
 837                 if (i > 1)
 838                     sao->offset_val[c_idx][i + 1] = -sao->offset_val[c_idx][i + 1];
 839             } else if (sao->offset_sign[c_idx][i]) {
 840                 sao->offset_val[c_idx][i + 1] = -sao->offset_val[c_idx][i + 1];
 841             }
 842         }
 843     }
 844 }
 845
 846 #undef SET_SAO
 847 #undef CTB
 848
 849 static void hls_residual_coding(HEVCContext *s, int x0, int y0,
 850                                 int log2_trafo_size, enum ScanType scan_idx,
 851                                 int c_idx)
 852 {
 853 #define GET_COORD(offset, n)                                    \
 854     do {                                                        \
 855         x_c = (scan_x_cg[offset >> 4] << 2) + scan_x_off[n];    \
 856         y_c = (scan_y_cg[offset >> 4] << 2) + scan_y_off[n];    \
 857     } while (0)
 858     HEVCLocalContext *lc    = &s->HEVClc;
 859     int transform_skip_flag = 0;
 860
 861     int last_significant_coeff_x, last_significant_coeff_y;
 862     int last_scan_pos;
 863     int n_end;
 864     int num_coeff    = 0;
 865     int greater1_ctx = 1;
 866
 867     int num_last_subset;
 868     int x_cg_last_sig, y_cg_last_sig;
 869
 870     const uint8_t *scan_x_cg, *scan_y_cg, *scan_x_off, *scan_y_off;
 871
 872     ptrdiff_t stride = s->frame->linesize[c_idx];
 873     int hshift       = s->sps->hshift[c_idx];
 874     int vshift       = s->sps->vshift[c_idx];
 875     uint8_t *dst     = &s->frame->data[c_idx][(y0 >> vshift) * stride +
 876                                               ((x0 >> hshift) << s->sps->pixel_shift)];
 877     DECLARE_ALIGNED(16, int16_t, coeffs[MAX_TB_SIZE * MAX_TB_SIZE]) = { 0 };
 878     DECLARE_ALIGNED(8, uint8_t, significant_coeff_group_flag[8][8]) = { { 0 } };
 879
 880     int trafo_size = 1 << log2_trafo_size;
 881     int i, qp, shift, add, scale, scale_m;
 882     const uint8_t level_scale[] = { 40, 45, 51, 57, 64, 72 };
 883     const uint8_t *scale_matrix;
 884     uint8_t dc_scale;
 885
 886     // Derive QP for dequant
 887     if (!lc->cu.cu_transquant_bypass_flag) {
 888         static const int qp_c[] = {
 889             29, 30, 31, 32, 33, 33, 34, 34, 35, 35, 36, 36, 37, 37
 890         };
 891
 892         static const uint8_t rem6[51 + 2 * 6 + 1] = {
 893             0, 1, 2, 3, 4, 5, 0, 1, 2, 3, 4, 5, 0, 1, 2, 3, 4, 5, 0, 1, 2,
 894             3, 4, 5, 0, 1, 2, 3, 4, 5, 0, 1, 2, 3, 4, 5, 0, 1, 2, 3, 4, 5,
 895             0, 1, 2, 3, 4, 5, 0, 1, 2, 3, 4, 5, 0, 1, 2, 3, 4, 5, 0, 1, 2, 3,
 896         };
 897
 898         static const uint8_t div6[51 + 2 * 6 + 1] = {
 899             0, 0, 0, 0, 0, 0, 1, 1, 1, 1, 1, 1, 2, 2, 2, 2, 2, 2,  3,  3,  3,
 900             3, 3, 3, 4, 4, 4, 4, 4, 4, 5, 5, 5, 5, 5, 5, 6, 6, 6,  6,  6,  6,
 901             7, 7, 7, 7, 7, 7, 8, 8, 8, 8, 8, 8, 9, 9, 9, 9, 9, 9, 10, 10, 10, 10,
 902         };
 903         int qp_y = lc->qp_y;
 904
 905         if (c_idx == 0) {
 906             qp = qp_y + s->sps->qp_bd_offset;
 907         } else {
 908             int qp_i, offset;
 909
 910             if (c_idx == 1)
 911                 offset = s->pps->cb_qp_offset + s->sh.slice_cb_qp_offset;
 912             else
 913                 offset = s->pps->cr_qp_offset + s->sh.slice_cr_qp_offset;
 914
 915             qp_i = av_clip_c(qp_y + offset, -s->sps->qp_bd_offset, 57);
 916             if (qp_i < 30)
 917                 qp = qp_i;
 918             else if (qp_i > 43)
 919                 qp = qp_i - 6;
 920             else
 921                 qp = qp_c[qp_i - 30];
 922
 923             qp += s->sps->qp_bd_offset;
 924         }
 925
 926         shift    = s->sps->bit_depth + log2_trafo_size - 5;
 927         add      = 1 << (shift - 1);
 928         scale    = level_scale[rem6[qp]] << (div6[qp]);
 929         scale_m  = 16; // default when no custom scaling lists.
 930         dc_scale = 16;
 931
 932         if (s->sps->scaling_list_enable_flag) {
 933             const ScalingList *sl = s->pps->scaling_list_data_present_flag ?
 934                                     &s->pps->scaling_list : &s->sps->scaling_list;
 935             int matrix_id = lc->cu.pred_mode != MODE_INTRA;
 936
 937             if (log2_trafo_size != 5)
 938                 matrix_id = 3 * matrix_id + c_idx;
 939
 940             scale_matrix = sl->sl[log2_trafo_size - 2][matrix_id];
 941             if (log2_trafo_size >= 4)
 942                 dc_scale = sl->sl_dc[log2_trafo_size - 4][matrix_id];
 943         }
 944     }
 945
 946     if (s->pps->transform_skip_enabled_flag &&
 947         !lc->cu.cu_transquant_bypass_flag   &&
 948         log2_trafo_size == 2) {
 949         transform_skip_flag = ff_hevc_transform_skip_flag_decode(s, c_idx);
 950     }
 951
 952     last_significant_coeff_x =
 953         ff_hevc_last_significant_coeff_x_prefix_decode(s, c_idx, log2_trafo_size);
 954     last_significant_coeff_y =
 955         ff_hevc_last_significant_coeff_y_prefix_decode(s, c_idx, log2_trafo_size);
 956
 957     if (last_significant_coeff_x > 3) {
 958         int suffix = ff_hevc_last_significant_coeff_suffix_decode(s, last_significant_coeff_x);
 959         last_significant_coeff_x = (1 << ((last_significant_coeff_x >> 1) - 1)) *
 960                                    (2 + (last_significant_coeff_x & 1)) +
 961                                    suffix;
 962     }
 963
 964     if (last_significant_coeff_y > 3) {
 965         int suffix = ff_hevc_last_significant_coeff_suffix_decode(s, last_significant_coeff_y);
 966         last_significant_coeff_y = (1 << ((last_significant_coeff_y >> 1) - 1)) *
 967                                    (2 + (last_significant_coeff_y & 1)) +
 968                                    suffix;
 969     }
 970
 971     if (scan_idx == SCAN_VERT)
 972         FFSWAP(int, last_significant_coeff_x, last_significant_coeff_y);
 973
 974     x_cg_last_sig = last_significant_coeff_x >> 2;
 975     y_cg_last_sig = last_significant_coeff_y >> 2;
 976
 977     switch (scan_idx) {
 978     case SCAN_DIAG: {
 979         int last_x_c = last_significant_coeff_x & 3;
 980         int last_y_c = last_significant_coeff_y & 3;
 981
 982         scan_x_off = ff_hevc_diag_scan4x4_x;
 983         scan_y_off = ff_hevc_diag_scan4x4_y;
 984         num_coeff  = diag_scan4x4_inv[last_y_c][last_x_c];
 985         if (trafo_size == 4) {
 986             scan_x_cg = scan_1x1;
 987             scan_y_cg = scan_1x1;
 988         } else if (trafo_size == 8) {
 989             num_coeff += diag_scan2x2_inv[y_cg_last_sig][x_cg_last_sig] << 4;
 990             scan_x_cg  = diag_scan2x2_x;
 991             scan_y_cg  = diag_scan2x2_y;
 992         } else if (trafo_size == 16) {
 993             num_coeff += diag_scan4x4_inv[y_cg_last_sig][x_cg_last_sig] << 4;
 994             scan_x_cg  = ff_hevc_diag_scan4x4_x;
 995             scan_y_cg  = ff_hevc_diag_scan4x4_y;
 996         } else { // trafo_size == 32
 997             num_coeff += diag_scan8x8_inv[y_cg_last_sig][x_cg_last_sig] << 4;
 998             scan_x_cg  = ff_hevc_diag_scan8x8_x;
 999             scan_y_cg  = ff_hevc_diag_scan8x8_y;
1000         }
1001         break;
1002     }
1003     case SCAN_HORIZ:
1004         scan_x_cg  = horiz_scan2x2_x;
1005         scan_y_cg  = horiz_scan2x2_y;
1006         scan_x_off = horiz_scan4x4_x;
1007         scan_y_off = horiz_scan4x4_y;
1008         num_coeff  = horiz_scan8x8_inv[last_significant_coeff_y][last_significant_coeff_x];
1009         break;
1010     default: //SCAN_VERT
1011         scan_x_cg  = horiz_scan2x2_y;
1012         scan_y_cg  = horiz_scan2x2_x;
1013         scan_x_off = horiz_scan4x4_y;
1014         scan_y_off = horiz_scan4x4_x;
1015         num_coeff  = horiz_scan8x8_inv[last_significant_coeff_x][last_significant_coeff_y];
1016         break;
1017     }
1018     num_coeff++;
1019     num_last_subset = (num_coeff - 1) >> 4;
1020
1021     for (i = num_last_subset; i >= 0; i--) {
1022         int n, m;
1023         int x_cg, y_cg, x_c, y_c;
1024         int implicit_non_zero_coeff = 0;
1025         int64_t trans_coeff_level;
1026         int prev_sig = 0;
1027         int offset   = i << 4;
1028
1029         uint8_t significant_coeff_flag_idx[16];
1030         uint8_t nb_significant_coeff_flag = 0;
1031
1032         x_cg = scan_x_cg[i];
1033         y_cg = scan_y_cg[i];
1034
1035         if (i < num_last_subset && i > 0) {
1036             int ctx_cg = 0;
1037             if (x_cg < (1 << (log2_trafo_size - 2)) - 1)
1038                 ctx_cg += significant_coeff_group_flag[x_cg + 1][y_cg];
1039             if (y_cg < (1 << (log2_trafo_size - 2)) - 1)
1040                 ctx_cg += significant_coeff_group_flag[x_cg][y_cg + 1];
1041
1042             significant_coeff_group_flag[x_cg][y_cg] =
1043                 ff_hevc_significant_coeff_group_flag_decode(s, c_idx, ctx_cg);
1044             implicit_non_zero_coeff = 1;
1045         } else {
1046             significant_coeff_group_flag[x_cg][y_cg] =
1047                 ((x_cg == x_cg_last_sig && y_cg == y_cg_last_sig) ||
1048                  (x_cg == 0 && y_cg == 0));
1049         }
1050
1051         last_scan_pos = num_coeff - offset - 1;
1052
1053         if (i == num_last_subset) {
1054             n_end                         = last_scan_pos - 1;
1055             significant_coeff_flag_idx[0] = last_scan_pos;
1056             nb_significant_coeff_flag     = 1;
1057         } else {
1058             n_end = 15;
1059         }
1060
1061         if (x_cg < ((1 << log2_trafo_size) - 1) >> 2)
1062             prev_sig = significant_coeff_group_flag[x_cg + 1][y_cg];
1063         if (y_cg < ((1 << log2_trafo_size) - 1) >> 2)
1064             prev_sig += significant_coeff_group_flag[x_cg][y_cg + 1] << 1;
1065
1066         for (n = n_end; n >= 0; n--) {
1067             GET_COORD(offset, n);
1068
1069             if (significant_coeff_group_flag[x_cg][y_cg] &&
1070                 (n > 0 || implicit_non_zero_coeff == 0)) {
1071                 if (ff_hevc_significant_coeff_flag_decode(s, c_idx, x_c, y_c,
1072                                                           log2_trafo_size,
1073                                                           scan_idx,
1074                                                           prev_sig) == 1) {
1075                     significant_coeff_flag_idx[nb_significant_coeff_flag] = n;
1076                     nb_significant_coeff_flag++;
1077                     implicit_non_zero_coeff = 0;
1078                 }
1079             } else {
1080                 int last_cg = (x_c == (x_cg << 2) && y_c == (y_cg << 2));
1081                 if (last_cg && implicit_non_zero_coeff && significant_coeff_group_flag[x_cg][y_cg]) {
1082                     significant_coeff_flag_idx[nb_significant_coeff_flag] = n;
1083                     nb_significant_coeff_flag++;
1084                 }
1085             }
1086         }
1087
1088         n_end = nb_significant_coeff_flag;
1089
1090         if (n_end) {
1091             int first_nz_pos_in_cg = 16;
1092             int last_nz_pos_in_cg = -1;
1093             int c_rice_param = 0;
1094             int first_greater1_coeff_idx = -1;
1095             uint8_t coeff_abs_level_greater1_flag[16] = { 0 };
1096             uint16_t coeff_sign_flag;
1097             int sum_abs = 0;
1098             int sign_hidden = 0;
1099
1100             // initialize first elem of coeff_bas_level_greater1_flag
1101             int ctx_set = (i > 0 && c_idx == 0) ? 2 : 0;
1102
1103             if (!(i == num_last_subset) && greater1_ctx == 0)
1104                 ctx_set++;
1105             greater1_ctx      = 1;
1106             last_nz_pos_in_cg = significant_coeff_flag_idx[0];
1107
1108             for (m = 0; m < (n_end > 8 ? 8 : n_end); m++) {
1109                 int n_idx = significant_coeff_flag_idx[m];
1110                 int inc   = (ctx_set << 2) + greater1_ctx;
1111                 coeff_abs_level_greater1_flag[n_idx] =
1112                     ff_hevc_coeff_abs_level_greater1_flag_decode(s, c_idx, inc);
1113                 if (coeff_abs_level_greater1_flag[n_idx]) {
1114                     greater1_ctx = 0;
1115                 } else if (greater1_ctx > 0 && greater1_ctx < 3) {
1116                     greater1_ctx++;
1117                 }
1118
1119                 if (coeff_abs_level_greater1_flag[n_idx] &&
1120                     first_greater1_coeff_idx == -1)
1121                     first_greater1_coeff_idx = n_idx;
1122             }
1123             first_nz_pos_in_cg = significant_coeff_flag_idx[n_end - 1];
1124             sign_hidden        = last_nz_pos_in_cg - first_nz_pos_in_cg >= 4 &&
1125                                  !lc->cu.cu_transquant_bypass_flag;
1126
1127             if (first_greater1_coeff_idx != -1) {
1128                 coeff_abs_level_greater1_flag[first_greater1_coeff_idx] += ff_hevc_coeff_abs_level_greater2_flag_decode(s, c_idx, ctx_set);
1129             }
1130             if (!s->pps->sign_data_hiding_flag || !sign_hidden) {
1131                 coeff_sign_flag = ff_hevc_coeff_sign_flag(s, nb_significant_coeff_flag) << (16 - nb_significant_coeff_flag);
1132             } else {
1133                 coeff_sign_flag = ff_hevc_coeff_sign_flag(s, nb_significant_coeff_flag - 1) << (16 - (nb_significant_coeff_flag - 1));
1134             }
1135
1136             for (m = 0; m < n_end; m++) {
1137                 n = significant_coeff_flag_idx[m];
1138                 GET_COORD(offset, n);
1139                 trans_coeff_level = 1 + coeff_abs_level_greater1_flag[n];
1140                 if (trans_coeff_level == ((m < 8) ?
1141                                           ((n == first_greater1_coeff_idx) ? 3 : 2) : 1)) {
1142                     int last_coeff_abs_level_remaining = ff_hevc_coeff_abs_level_remaining(s, trans_coeff_level, c_rice_param);
1143
1144                     trans_coeff_level += last_coeff_abs_level_remaining;
1145                     if ((trans_coeff_level) > (3 * (1 << c_rice_param)))
1146                         c_rice_param = FFMIN(c_rice_param + 1, 4);
1147                 }
1148                 if (s->pps->sign_data_hiding_flag && sign_hidden) {
1149                     sum_abs += trans_coeff_level;
1150                     if (n == first_nz_pos_in_cg && ((sum_abs & 1) == 1))
1151                         trans_coeff_level = -trans_coeff_level;
1152                 }
1153                 if (coeff_sign_flag >> 15)
1154                     trans_coeff_level = -trans_coeff_level;
1155                 coeff_sign_flag <<= 1;
1156                 if (!lc->cu.cu_transquant_bypass_flag) {
1157                     if (s->sps->scaling_list_enable_flag) {
1158                         if (y_c || x_c || log2_trafo_size < 4) {
1159                             int pos;
1160                             switch (log2_trafo_size) {
1161                             case 3:  pos = (y_c        << 3) +  x_c;       break;
1162                             case 4:  pos = ((y_c >> 1) << 3) + (x_c >> 1); break;
1163                             case 5:  pos = ((y_c >> 2) << 3) + (x_c >> 2); break;
1164                             default: pos = (y_c        << 2) +  x_c;
1165                             }
1166                             scale_m = scale_matrix[pos];
1167                         } else {
1168                             scale_m = dc_scale;
1169                         }
1170                     }
1171                     trans_coeff_level = (trans_coeff_level * (int64_t)scale * (int64_t)scale_m + add) >> shift;
1172                     if(trans_coeff_level < 0) {
1173                         if((~trans_coeff_level) & 0xFffffffffff8000)
1174                             trans_coeff_level = -32768;
1175                     } else {
1176                         if (trans_coeff_level & 0xffffffffffff8000)
1177                             trans_coeff_level = 32767;
1178                     }
1179                 }
1180                 coeffs[y_c * trafo_size + x_c] = trans_coeff_level;
1181             }
1182         }
1183     }
1184
1185     if (lc->cu.cu_transquant_bypass_flag) {
1186         s->hevcdsp.transquant_bypass[log2_trafo_size - 2](dst, coeffs, stride);
1187     } else {
1188         if (transform_skip_flag)
1189             s->hevcdsp.transform_skip(dst, coeffs, stride);
1190         else if (lc->cu.pred_mode == MODE_INTRA && c_idx == 0 &&
1191                  log2_trafo_size == 2)
1192             s->hevcdsp.transform_4x4_luma_add(dst, coeffs, stride);
1193         else
1194             s->hevcdsp.transform_add[log2_trafo_size - 2](dst, coeffs, stride);
1195     }
1196 }
1197
1198 static void hls_transform_unit(HEVCContext *s, int x0, int y0,
1199                                int xBase, int yBase, int cb_xBase, int cb_yBase,
1200                                int log2_cb_size, int log2_trafo_size,
1201                                int trafo_depth, int blk_idx)
1202 {
1203     HEVCLocalContext *lc = &s->HEVClc;
1204
1205     if (lc->cu.pred_mode == MODE_INTRA) {
1206         int trafo_size = 1 << log2_trafo_size;
1207         ff_hevc_set_neighbour_available(s, x0, y0, trafo_size, trafo_size);
1208
1209         s->hpc.intra_pred(s, x0, y0, log2_trafo_size, 0);
1210         if (log2_trafo_size > 2) {
1211             trafo_size = trafo_size << (s->sps->hshift[1] - 1);
1212             ff_hevc_set_neighbour_available(s, x0, y0, trafo_size, trafo_size);
1213             s->hpc.intra_pred(s, x0, y0, log2_trafo_size - 1, 1);
1214             s->hpc.intra_pred(s, x0, y0, log2_trafo_size - 1, 2);
1215         } else if (blk_idx == 3) {
1216             trafo_size = trafo_size << s->sps->hshift[1];
1217             ff_hevc_set_neighbour_available(s, xBase, yBase,
1218                                             trafo_size, trafo_size);
1219             s->hpc.intra_pred(s, xBase, yBase, log2_trafo_size, 1);
1220             s->hpc.intra_pred(s, xBase, yBase, log2_trafo_size, 2);
1221         }
1222     }
1223
1224     if (lc->tt.cbf_luma ||
1225         SAMPLE_CBF(lc->tt.cbf_cb[trafo_depth], x0, y0) ||
1226         SAMPLE_CBF(lc->tt.cbf_cr[trafo_depth], x0, y0)) {
1227         int scan_idx   = SCAN_DIAG;
1228         int scan_idx_c = SCAN_DIAG;
1229
1230         if (s->pps->cu_qp_delta_enabled_flag && !lc->tu.is_cu_qp_delta_coded) {
1231             lc->tu.cu_qp_delta = ff_hevc_cu_qp_delta_abs(s);
1232             if (lc->tu.cu_qp_delta != 0)
1233                 if (ff_hevc_cu_qp_delta_sign_flag(s) == 1)
1234                     lc->tu.cu_qp_delta = -lc->tu.cu_qp_delta;
1235             lc->tu.is_cu_qp_delta_coded = 1;
1236             ff_hevc_set_qPy(s, x0, y0, cb_xBase, cb_yBase, log2_cb_size);
1237         }
1238
1239         if (lc->cu.pred_mode == MODE_INTRA && log2_trafo_size < 4) {
1240             if (lc->tu.cur_intra_pred_mode >= 6 &&
1241                 lc->tu.cur_intra_pred_mode <= 14) {
1242                 scan_idx = SCAN_VERT;
1243             } else if (lc->tu.cur_intra_pred_mode >= 22 &&
1244                        lc->tu.cur_intra_pred_mode <= 30) {
1245                 scan_idx = SCAN_HORIZ;
1246             }
1247
1248             if (lc->pu.intra_pred_mode_c >=  6 &&
1249                 lc->pu.intra_pred_mode_c <= 14) {
1250                 scan_idx_c = SCAN_VERT;
1251             } else if (lc->pu.intra_pred_mode_c >= 22 &&
1252                        lc->pu.intra_pred_mode_c <= 30) {
1253                 scan_idx_c = SCAN_HORIZ;
1254             }
1255         }
1256
1257         if (lc->tt.cbf_luma)
1258             hls_residual_coding(s, x0, y0, log2_trafo_size, scan_idx, 0);
1259         if (log2_trafo_size > 2) {
1260             if (SAMPLE_CBF(lc->tt.cbf_cb[trafo_depth], x0, y0))
1261                 hls_residual_coding(s, x0, y0, log2_trafo_size - 1, scan_idx_c, 1);
1262             if (SAMPLE_CBF(lc->tt.cbf_cr[trafo_depth], x0, y0))
1263                 hls_residual_coding(s, x0, y0, log2_trafo_size - 1, scan_idx_c, 2);
1264         } else if (blk_idx == 3) {
1265             if (SAMPLE_CBF(lc->tt.cbf_cb[trafo_depth], xBase, yBase))
1266                 hls_residual_coding(s, xBase, yBase, log2_trafo_size, scan_idx_c, 1);
1267             if (SAMPLE_CBF(lc->tt.cbf_cr[trafo_depth], xBase, yBase))
1268                 hls_residual_coding(s, xBase, yBase, log2_trafo_size, scan_idx_c, 2);
1269         }
1270     }
1271 }
1272
1273 static void set_deblocking_bypass(HEVCContext *s, int x0, int y0, int log2_cb_size)
1274 {
1275     int cb_size          = 1 << log2_cb_size;
1276     int log2_min_pu_size = s->sps->log2_min_pu_size;
1277
1278     int min_pu_width     = s->sps->min_pu_width;
1279     int x_end = FFMIN(x0 + cb_size, s->sps->width);
1280     int y_end = FFMIN(y0 + cb_size, s->sps->height);
1281     int i, j;
1282
1283     for (j = (y0 >> log2_min_pu_size); j < (y_end >> log2_min_pu_size); j++)
1284         for (i = (x0 >> log2_min_pu_size); i < (x_end >> log2_min_pu_size); i++)
1285             s->is_pcm[i + j * min_pu_width] = 2;
1286 }
1287
1288 static void hls_transform_tree(HEVCContext *s, int x0, int y0,
1289                                int xBase, int yBase, int cb_xBase, int cb_yBase,
1290                                int log2_cb_size, int log2_trafo_size,
1291                                int trafo_depth, int blk_idx)
1292 {
1293     HEVCLocalContext *lc = &s->HEVClc;
1294     uint8_t split_transform_flag;
1295
1296     if (trafo_depth > 0 && log2_trafo_size == 2) {
1297         SAMPLE_CBF(lc->tt.cbf_cb[trafo_depth], x0, y0) =
1298             SAMPLE_CBF(lc->tt.cbf_cb[trafo_depth - 1], xBase, yBase);
1299         SAMPLE_CBF(lc->tt.cbf_cr[trafo_depth], x0, y0) =
1300             SAMPLE_CBF(lc->tt.cbf_cr[trafo_depth - 1], xBase, yBase);
1301     } else {
1302         SAMPLE_CBF(lc->tt.cbf_cb[trafo_depth], x0, y0) =
1303         SAMPLE_CBF(lc->tt.cbf_cr[trafo_depth], x0, y0) = 0;
1304     }
1305
1306     if (lc->cu.intra_split_flag) {
1307         if (trafo_depth == 1)
1308             lc->tu.cur_intra_pred_mode = lc->pu.intra_pred_mode[blk_idx];
1309     } else {
1310         lc->tu.cur_intra_pred_mode = lc->pu.intra_pred_mode[0];
1311     }
1312
1313     lc->tt.cbf_luma = 1;
1314
1315     lc->tt.inter_split_flag = s->sps->max_transform_hierarchy_depth_inter == 0 &&
1316                               lc->cu.pred_mode == MODE_INTER &&
1317                               lc->cu.part_mode != PART_2Nx2N &&
1318                               trafo_depth == 0;
1319
1320     if (log2_trafo_size <= s->sps->log2_max_trafo_size &&
1321         log2_trafo_size >  s->sps->log2_min_tb_size    &&
1322         trafo_depth     < lc->cu.max_trafo_depth       &&
1323         !(lc->cu.intra_split_flag && trafo_depth == 0)) {
1324         split_transform_flag = ff_hevc_split_transform_flag_decode(s, log2_trafo_size);
1325     } else {
1326         split_transform_flag = log2_trafo_size > s->sps->log2_max_trafo_size ||
1327                                (lc->cu.intra_split_flag && trafo_depth == 0) ||
1328                                lc->tt.inter_split_flag;
1329     }
1330
1331     if (log2_trafo_size > 2) {
1332         if (trafo_depth == 0 ||
1333             SAMPLE_CBF(lc->tt.cbf_cb[trafo_depth - 1], xBase, yBase)) {
1334             SAMPLE_CBF(lc->tt.cbf_cb[trafo_depth], x0, y0) =
1335                 ff_hevc_cbf_cb_cr_decode(s, trafo_depth);
1336         }
1337
1338         if (trafo_depth == 0 ||
1339             SAMPLE_CBF(lc->tt.cbf_cr[trafo_depth - 1], xBase, yBase)) {
1340             SAMPLE_CBF(lc->tt.cbf_cr[trafo_depth], x0, y0) =
1341                 ff_hevc_cbf_cb_cr_decode(s, trafo_depth);
1342         }
1343     }
1344
1345     if (split_transform_flag) {
1346         int x1 = x0 + ((1 << log2_trafo_size) >> 1);
1347         int y1 = y0 + ((1 << log2_trafo_size) >> 1);
1348
1349         hls_transform_tree(s, x0, y0, x0, y0, cb_xBase, cb_yBase, log2_cb_size,
1350                            log2_trafo_size - 1, trafo_depth + 1, 0);
1351         hls_transform_tree(s, x1, y0, x0, y0, cb_xBase, cb_yBase, log2_cb_size,
1352                            log2_trafo_size - 1, trafo_depth + 1, 1);
1353         hls_transform_tree(s, x0, y1, x0, y0, cb_xBase, cb_yBase, log2_cb_size,
1354                            log2_trafo_size - 1, trafo_depth + 1, 2);
1355         hls_transform_tree(s, x1, y1, x0, y0, cb_xBase, cb_yBase, log2_cb_size,
1356                            log2_trafo_size - 1, trafo_depth + 1, 3);
1357     } else {
1358         int min_tu_size      = 1 << s->sps->log2_min_tb_size;
1359         int log2_min_tu_size = s->sps->log2_min_tb_size;
1360         int min_tu_width     = s->sps->min_tb_width;
1361
1362         if (lc->cu.pred_mode == MODE_INTRA || trafo_depth != 0 ||
1363             SAMPLE_CBF(lc->tt.cbf_cb[trafo_depth], x0, y0) ||
1364             SAMPLE_CBF(lc->tt.cbf_cr[trafo_depth], x0, y0)) {
1365             lc->tt.cbf_luma = ff_hevc_cbf_luma_decode(s, trafo_depth);
1366         }
1367
1368         hls_transform_unit(s, x0, y0, xBase, yBase, cb_xBase, cb_yBase,
1369                            log2_cb_size, log2_trafo_size, trafo_depth, blk_idx);
1370
1371         // TODO: store cbf_luma somewhere else
1372         if (lc->tt.cbf_luma) {
1373             int i, j;
1374             for (i = 0; i < (1 << log2_trafo_size); i += min_tu_size)
1375                 for (j = 0; j < (1 << log2_trafo_size); j += min_tu_size) {
1376                     int x_tu = (x0 + j) >> log2_min_tu_size;
1377                     int y_tu = (y0 + i) >> log2_min_tu_size;
1378                     s->cbf_luma[y_tu * min_tu_width + x_tu] = 1;
1379                 }
1380         }
1381         if (!s->sh.disable_deblocking_filter_flag) {
1382             ff_hevc_deblocking_boundary_strengths(s, x0, y0, log2_trafo_size,
1383                                                   lc->slice_or_tiles_up_boundary,
1384                                                   lc->slice_or_tiles_left_boundary);
1385             if (s->pps->transquant_bypass_enable_flag &&
1386                 lc->cu.cu_transquant_bypass_flag)
1387                 set_deblocking_bypass(s, x0, y0, log2_trafo_size);
1388         }
1389     }
1390 }
1391
1392 static int hls_pcm_sample(HEVCContext *s, int x0, int y0, int log2_cb_size)
1393 {
1394     //TODO: non-4:2:0 support
1395     HEVCLocalContext *lc = &s->HEVClc;
1396     GetBitContext gb;
1397     int cb_size   = 1 << log2_cb_size;
1398     int stride0   = s->frame->linesize[0];
1399     uint8_t *dst0 = &s->frame->data[0][y0 * stride0 + (x0 << s->sps->pixel_shift)];
1400     int   stride1 = s->frame->linesize[1];
1401     uint8_t *dst1 = &s->frame->data[1][(y0 >> s->sps->vshift[1]) * stride1 + ((x0 >> s->sps->hshift[1]) << s->sps->pixel_shift)];
1402     int   stride2 = s->frame->linesize[2];
1403     uint8_t *dst2 = &s->frame->data[2][(y0 >> s->sps->vshift[2]) * stride2 + ((x0 >> s->sps->hshift[2]) << s->sps->pixel_shift)];
1404
1405     int length         = cb_size * cb_size * s->sps->pcm.bit_depth + ((cb_size * cb_size) >> 1) * s->sps->pcm.bit_depth;
1406     const uint8_t *pcm = skip_bytes(&s->HEVClc.cc, (length + 7) >> 3);
1407     int ret;
1408
1409     ff_hevc_deblocking_boundary_strengths(s, x0, y0, log2_cb_size,
1410                                           lc->slice_or_tiles_up_boundary,
1411                                           lc->slice_or_tiles_left_boundary);
1412
1413     ret = init_get_bits(&gb, pcm, length);
1414     if (ret < 0)
1415         return ret;
1416
1417     s->hevcdsp.put_pcm(dst0, stride0, cb_size,     &gb, s->sps->pcm.bit_depth);
1418     s->hevcdsp.put_pcm(dst1, stride1, cb_size / 2, &gb, s->sps->pcm.bit_depth);
1419     s->hevcdsp.put_pcm(dst2, stride2, cb_size / 2, &gb, s->sps->pcm.bit_depth);
1420     return 0;
1421 }
1422
1423 static void hls_mvd_coding(HEVCContext *s, int x0, int y0, int log2_cb_size)
1424 {
1425     HEVCLocalContext *lc = &s->HEVClc;
1426     int x = ff_hevc_abs_mvd_greater0_flag_decode(s);
1427     int y = ff_hevc_abs_mvd_greater0_flag_decode(s);
1428
1429     if (x)
1430         x += ff_hevc_abs_mvd_greater1_flag_decode(s);
1431     if (y)
1432         y += ff_hevc_abs_mvd_greater1_flag_decode(s);
1433
1434     switch (x) {
1435     case 2: lc->pu.mvd.x = ff_hevc_mvd_decode(s);           break;
1436     case 1: lc->pu.mvd.x = ff_hevc_mvd_sign_flag_decode(s); break;
1437     case 0: lc->pu.mvd.x = 0;                               break;
1438     }
1439
1440     switch (y) {
1441     case 2: lc->pu.mvd.y = ff_hevc_mvd_decode(s);           break;
1442     case 1: lc->pu.mvd.y = ff_hevc_mvd_sign_flag_decode(s); break;
1443     case 0: lc->pu.mvd.y = 0;                               break;
1444     }
1445 }
1446
1447 /**
1448  * 8.5.3.2.2.1 Luma sample interpolation process
1449  *
1450  * @param s HEVC decoding context
1451  * @param dst target buffer for block data at block position
1452  * @param dststride stride of the dst buffer
1453  * @param ref reference picture buffer at origin (0, 0)
1454  * @param mv motion vector (relative to block position) to get pixel data from
1455  * @param x_off horizontal position of block from origin (0, 0)
1456  * @param y_off vertical position of block from origin (0, 0)
1457  * @param block_w width of block
1458  * @param block_h height of block
1459  */
1460 static void luma_mc(HEVCContext *s, int16_t *dst, ptrdiff_t dststride,
1461                     AVFrame *ref, const Mv *mv, int x_off, int y_off,
1462                     int block_w, int block_h)
1463 {
1464     HEVCLocalContext *lc = &s->HEVClc;
1465     uint8_t *src         = ref->data[0];
1466     ptrdiff_t srcstride  = ref->linesize[0];
1467     int pic_width        = s->sps->width;
1468     int pic_height       = s->sps->height;
1469
1470     int mx         = mv->x & 3;
1471     int my         = mv->y & 3;
1472     int extra_left = ff_hevc_qpel_extra_before[mx];
1473     int extra_top  = ff_hevc_qpel_extra_before[my];
1474
1475     x_off += mv->x >> 2;
1476     y_off += mv->y >> 2;
1477     src   += y_off * srcstride + (x_off << s->sps->pixel_shift);
1478
1479     if (x_off < extra_left || y_off < extra_top ||
1480         x_off >= pic_width - block_w - ff_hevc_qpel_extra_after[mx] ||
1481         y_off >= pic_height - block_h - ff_hevc_qpel_extra_after[my]) {
1482         int offset = extra_top * srcstride + (extra_left << s->sps->pixel_shift);
1483
1484         s->vdsp.emulated_edge_mc(lc->edge_emu_buffer, src - offset,
1485                                  srcstride, srcstride,
1486                                  block_w + ff_hevc_qpel_extra[mx],
1487                                  block_h + ff_hevc_qpel_extra[my],
1488                                  x_off - extra_left, y_off - extra_top,
1489                                  pic_width, pic_height);
1490         src = lc->edge_emu_buffer + offset;
1491     }
1492     s->hevcdsp.put_hevc_qpel[my][mx](dst, dststride, src, srcstride, block_w,
1493                                      block_h, lc->mc_buffer);
1494 }
1495
1496 /**
1497  * 8.5.3.2.2.2 Chroma sample interpolation process
1498  *
1499  * @param s HEVC decoding context
1500  * @param dst1 target buffer for block data at block position (U plane)
1501  * @param dst2 target buffer for block data at block position (V plane)
1502  * @param dststride stride of the dst1 and dst2 buffers
1503  * @param ref reference picture buffer at origin (0, 0)
1504  * @param mv motion vector (relative to block position) to get pixel data from
1505  * @param x_off horizontal position of block from origin (0, 0)
1506  * @param y_off vertical position of block from origin (0, 0)
1507  * @param block_w width of block
1508  * @param block_h height of block
1509  */
1510 static void chroma_mc(HEVCContext *s, int16_t *dst1, int16_t *dst2,
1511                       ptrdiff_t dststride, AVFrame *ref, const Mv *mv,
1512                       int x_off, int y_off, int block_w, int block_h)
1513 {
1514     HEVCLocalContext *lc = &s->HEVClc;
1515     uint8_t *src1        = ref->data[1];
1516     uint8_t *src2        = ref->data[2];
1517     ptrdiff_t src1stride = ref->linesize[1];
1518     ptrdiff_t src2stride = ref->linesize[2];
1519     int pic_width        = s->sps->width >> 1;
1520     int pic_height       = s->sps->height >> 1;
1521
1522     int mx = mv->x & 7;
1523     int my = mv->y & 7;
1524
1525     x_off += mv->x >> 3;
1526     y_off += mv->y >> 3;
1527     src1  += y_off * src1stride + (x_off << s->sps->pixel_shift);
1528     src2  += y_off * src2stride + (x_off << s->sps->pixel_shift);
1529
1530     if (x_off < EPEL_EXTRA_BEFORE || y_off < EPEL_EXTRA_AFTER ||
1531         x_off >= pic_width - block_w - EPEL_EXTRA_AFTER ||
1532         y_off >= pic_height - block_h - EPEL_EXTRA_AFTER) {
1533         int offset1 = EPEL_EXTRA_BEFORE * (src1stride + (1 << s->sps->pixel_shift));
1534         int offset2 = EPEL_EXTRA_BEFORE * (src2stride + (1 << s->sps->pixel_shift));
1535
1536         s->vdsp.emulated_edge_mc(lc->edge_emu_buffer, src1 - offset1,
1537                                  src1stride, src1stride,
1538                                  block_w + EPEL_EXTRA, block_h + EPEL_EXTRA,
1539                                  x_off - EPEL_EXTRA_BEFORE,
1540                                  y_off - EPEL_EXTRA_BEFORE,
1541                                  pic_width, pic_height);
1542
1543         src1 = lc->edge_emu_buffer + offset1;
1544         s->hevcdsp.put_hevc_epel[!!my][!!mx](dst1, dststride, src1, src1stride,
1545                                              block_w, block_h, mx, my, lc->mc_buffer);
1546
1547         s->vdsp.emulated_edge_mc(lc->edge_emu_buffer, src2 - offset2,
1548                                  src2stride, src2stride,
1549                                  block_w + EPEL_EXTRA, block_h + EPEL_EXTRA,
1550                                  x_off - EPEL_EXTRA_BEFORE,
1551                                  y_off - EPEL_EXTRA_BEFORE,
1552                                  pic_width, pic_height);
1553         src2 = lc->edge_emu_buffer + offset2;
1554         s->hevcdsp.put_hevc_epel[!!my][!!mx](dst2, dststride, src2, src2stride,
1555                                              block_w, block_h, mx, my,
1556                                              lc->mc_buffer);
1557     } else {
1558         s->hevcdsp.put_hevc_epel[!!my][!!mx](dst1, dststride, src1, src1stride,
1559                                              block_w, block_h, mx, my,
1560                                              lc->mc_buffer);
1561         s->hevcdsp.put_hevc_epel[!!my][!!mx](dst2, dststride, src2, src2stride,
1562                                              block_w, block_h, mx, my,
1563                                              lc->mc_buffer);
1564     }
1565 }
1566
1567 static void hevc_await_progress(HEVCContext *s, HEVCFrame *ref,
1568                                 const Mv *mv, int y0, int height)
1569 {
1570     int y = (mv->y >> 2) + y0 + height + 9;
1571     ff_thread_await_progress(&ref->tf, y, 0);
1572 }
1573
1574 static void hls_prediction_unit(HEVCContext *s, int x0, int y0,
1575                                 int nPbW, int nPbH,
1576                                 int log2_cb_size, int partIdx)
1577 {
1578 #define POS(c_idx, x, y)                                                              \
1579     &s->frame->data[c_idx][((y) >> s->sps->vshift[c_idx]) * s->frame->linesize[c_idx] + \
1580                            (((x) >> s->sps->hshift[c_idx]) << s->sps->pixel_shift)]
1581     HEVCLocalContext *lc = &s->HEVClc;
1582     int merge_idx = 0;
1583     struct MvField current_mv = {{{ 0 }}};
1584
1585     int min_pu_width = s->sps->min_pu_width;
1586
1587     MvField *tab_mvf = s->ref->tab_mvf;
1588     RefPicList  *refPicList = s->ref->refPicList;
1589     HEVCFrame *ref0, *ref1;
1590
1591     int tmpstride = MAX_PB_SIZE;
1592
1593     uint8_t *dst0 = POS(0, x0, y0);
1594     uint8_t *dst1 = POS(1, x0, y0);
1595     uint8_t *dst2 = POS(2, x0, y0);
1596     int log2_min_cb_size = s->sps->log2_min_cb_size;
1597     int min_cb_width     = s->sps->min_cb_width;
1598     int x_cb             = x0 >> log2_min_cb_size;
1599     int y_cb             = y0 >> log2_min_cb_size;
1600     int ref_idx[2];
1601     int mvp_flag[2];
1602     int x_pu, y_pu;
1603     int i, j;
1604
1605     if (SAMPLE_CTB(s->skip_flag, x_cb, y_cb)) {
1606         if (s->sh.max_num_merge_cand > 1)
1607             merge_idx = ff_hevc_merge_idx_decode(s);
1608         else
1609             merge_idx = 0;
1610
1611         ff_hevc_luma_mv_merge_mode(s, x0, y0,
1612                                    1 << log2_cb_size,
1613                                    1 << log2_cb_size,
1614                                    log2_cb_size, partIdx,
1615                                    merge_idx, &current_mv);
1616         x_pu = x0 >> s->sps->log2_min_pu_size;
1617         y_pu = y0 >> s->sps->log2_min_pu_size;
1618
1619         for (i = 0; i < nPbW >> s->sps->log2_min_pu_size; i++)
1620             for (j = 0; j < nPbH >> s->sps->log2_min_pu_size; j++)
1621                 tab_mvf[(y_pu + j) * min_pu_width + x_pu + i] = current_mv;
1622     } else { /* MODE_INTER */
1623         lc->pu.merge_flag = ff_hevc_merge_flag_decode(s);
1624         if (lc->pu.merge_flag) {
1625             if (s->sh.max_num_merge_cand > 1)
1626                 merge_idx = ff_hevc_merge_idx_decode(s);
1627             else
1628                 merge_idx = 0;
1629
1630             ff_hevc_luma_mv_merge_mode(s, x0, y0, nPbW, nPbH, log2_cb_size,
1631                                        partIdx, merge_idx, &current_mv);
1632             x_pu = x0 >> s->sps->log2_min_pu_size;
1633             y_pu = y0 >> s->sps->log2_min_pu_size;
1634
1635             for (i = 0; i < nPbW >> s->sps->log2_min_pu_size; i++)
1636                 for (j = 0; j < nPbH >> s->sps->log2_min_pu_size; j++)
1637                     tab_mvf[(y_pu + j) * min_pu_width + x_pu + i] = current_mv;
1638         } else {
1639             enum InterPredIdc inter_pred_idc = PRED_L0;
1640             ff_hevc_set_neighbour_available(s, x0, y0, nPbW, nPbH);
1641             if (s->sh.slice_type == B_SLICE)
1642                 inter_pred_idc = ff_hevc_inter_pred_idc_decode(s, nPbW, nPbH);
1643
1644             if (inter_pred_idc != PRED_L1) {
1645                 if (s->sh.nb_refs[L0]) {
1646                     ref_idx[0] = ff_hevc_ref_idx_lx_decode(s, s->sh.nb_refs[L0]);
1647                     current_mv.ref_idx[0] = ref_idx[0];
1648                 }
1649                 current_mv.pred_flag[0] = 1;
1650                 hls_mvd_coding(s, x0, y0, 0);
1651                 mvp_flag[0] = ff_hevc_mvp_lx_flag_decode(s);
1652                 ff_hevc_luma_mv_mvp_mode(s, x0, y0, nPbW, nPbH, log2_cb_size,
1653                                          partIdx, merge_idx, &current_mv,
1654                                          mvp_flag[0], 0);
1655                 current_mv.mv[0].x += lc->pu.mvd.x;
1656                 current_mv.mv[0].y += lc->pu.mvd.y;
1657             }
1658
1659             if (inter_pred_idc != PRED_L0) {
1660                 if (s->sh.nb_refs[L1]) {
1661                     ref_idx[1] = ff_hevc_ref_idx_lx_decode(s, s->sh.nb_refs[L1]);
1662                     current_mv.ref_idx[1] = ref_idx[1];
1663                 }
1664
1665                 if (s->sh.mvd_l1_zero_flag == 1 && inter_pred_idc == PRED_BI) {
1666                     lc->pu.mvd.x = 0;
1667                     lc->pu.mvd.y = 0;
1668                 } else {
1669                     hls_mvd_coding(s, x0, y0, 1);
1670                 }
1671
1672                 current_mv.pred_flag[1] = 1;
1673                 mvp_flag[1] = ff_hevc_mvp_lx_flag_decode(s);
1674                 ff_hevc_luma_mv_mvp_mode(s, x0, y0, nPbW, nPbH, log2_cb_size,
1675                                          partIdx, merge_idx, &current_mv,
1676                                          mvp_flag[1], 1);
1677                 current_mv.mv[1].x += lc->pu.mvd.x;
1678                 current_mv.mv[1].y += lc->pu.mvd.y;
1679             }
1680
1681             x_pu = x0 >> s->sps->log2_min_pu_size;
1682             y_pu = y0 >> s->sps->log2_min_pu_size;
1683
1684             for (i = 0; i < nPbW >> s->sps->log2_min_pu_size; i++)
1685                 for(j = 0; j < nPbH >> s->sps->log2_min_pu_size; j++)
1686                     tab_mvf[(y_pu + j) * min_pu_width + x_pu + i] = current_mv;
1687         }
1688     }
1689
1690     if (current_mv.pred_flag[0]) {
1691         ref0 = refPicList[0].ref[current_mv.ref_idx[0]];
1692         if (!ref0)
1693             return;
1694         hevc_await_progress(s, ref0, &current_mv.mv[0], y0, nPbH);
1695     }
1696     if (current_mv.pred_flag[1]) {
1697         ref1 = refPicList[1].ref[current_mv.ref_idx[1]];
1698         if (!ref1)
1699             return;
1700         hevc_await_progress(s, ref1, &current_mv.mv[1], y0, nPbH);
1701     }
1702
1703     if (current_mv.pred_flag[0] && !current_mv.pred_flag[1]) {
1704         DECLARE_ALIGNED(16, int16_t,  tmp[MAX_PB_SIZE * MAX_PB_SIZE]);
1705         DECLARE_ALIGNED(16, int16_t, tmp2[MAX_PB_SIZE * MAX_PB_SIZE]);
1706
1707         luma_mc(s, tmp, tmpstride, ref0->frame,
1708                 &current_mv.mv[0], x0, y0, nPbW, nPbH);
1709
1710         if ((s->sh.slice_type == P_SLICE && s->pps->weighted_pred_flag) ||
1711             (s->sh.slice_type == B_SLICE && s->pps->weighted_bipred_flag)) {
1712             s->hevcdsp.weighted_pred(s->sh.luma_log2_weight_denom,
1713                                      s->sh.luma_weight_l0[current_mv.ref_idx[0]],
1714                                      s->sh.luma_offset_l0[current_mv.ref_idx[0]],
1715                                      dst0, s->frame->linesize[0], tmp,
1716                                      tmpstride, nPbW, nPbH);
1717         } else {
1718             s->hevcdsp.put_unweighted_pred(dst0, s->frame->linesize[0], tmp, tmpstride, nPbW, nPbH);
1719         }
1720         chroma_mc(s, tmp, tmp2, tmpstride, ref0->frame,
1721                   &current_mv.mv[0], x0 / 2, y0 / 2, nPbW / 2, nPbH / 2);
1722
1723         if ((s->sh.slice_type == P_SLICE && s->pps->weighted_pred_flag) ||
1724             (s->sh.slice_type == B_SLICE && s->pps->weighted_bipred_flag)) {
1725             s->hevcdsp.weighted_pred(s->sh.chroma_log2_weight_denom,
1726                                      s->sh.chroma_weight_l0[current_mv.ref_idx[0]][0],
1727                                      s->sh.chroma_offset_l0[current_mv.ref_idx[0]][0],
1728                                      dst1, s->frame->linesize[1], tmp, tmpstride,
1729                                      nPbW / 2, nPbH / 2);
1730             s->hevcdsp.weighted_pred(s->sh.chroma_log2_weight_denom,
1731                                      s->sh.chroma_weight_l0[current_mv.ref_idx[0]][1],
1732                                      s->sh.chroma_offset_l0[current_mv.ref_idx[0]][1],
1733                                      dst2, s->frame->linesize[2], tmp2, tmpstride,
1734                                      nPbW / 2, nPbH / 2);
1735         } else {
1736             s->hevcdsp.put_unweighted_pred(dst1, s->frame->linesize[1], tmp, tmpstride, nPbW/2, nPbH/2);
1737             s->hevcdsp.put_unweighted_pred(dst2, s->frame->linesize[2], tmp2, tmpstride, nPbW/2, nPbH/2);
1738         }
1739     } else if (!current_mv.pred_flag[0] && current_mv.pred_flag[1]) {
1740         DECLARE_ALIGNED(16, int16_t, tmp [MAX_PB_SIZE * MAX_PB_SIZE]);
1741         DECLARE_ALIGNED(16, int16_t, tmp2[MAX_PB_SIZE * MAX_PB_SIZE]);
1742
1743         if (!ref1)
1744             return;
1745
1746         luma_mc(s, tmp, tmpstride, ref1->frame,
1747                 &current_mv.mv[1], x0, y0, nPbW, nPbH);
1748
1749         if ((s->sh.slice_type == P_SLICE && s->pps->weighted_pred_flag) ||
1750             (s->sh.slice_type == B_SLICE && s->pps->weighted_bipred_flag)) {
1751             s->hevcdsp.weighted_pred(s->sh.luma_log2_weight_denom,
1752                                       s->sh.luma_weight_l1[current_mv.ref_idx[1]],
1753                                       s->sh.luma_offset_l1[current_mv.ref_idx[1]],
1754                                       dst0, s->frame->linesize[0], tmp, tmpstride,
1755                                       nPbW, nPbH);
1756         } else {
1757             s->hevcdsp.put_unweighted_pred(dst0, s->frame->linesize[0], tmp, tmpstride, nPbW, nPbH);
1758         }
1759
1760         chroma_mc(s, tmp, tmp2, tmpstride, ref1->frame,
1761                   &current_mv.mv[1], x0/2, y0/2, nPbW/2, nPbH/2);
1762
1763         if ((s->sh.slice_type == P_SLICE && s->pps->weighted_pred_flag) ||
1764             (s->sh.slice_type == B_SLICE && s->pps->weighted_bipred_flag)) {
1765             s->hevcdsp.weighted_pred(s->sh.chroma_log2_weight_denom,
1766                                      s->sh.chroma_weight_l1[current_mv.ref_idx[1]][0],
1767                                      s->sh.chroma_offset_l1[current_mv.ref_idx[1]][0],
1768                                      dst1, s->frame->linesize[1], tmp, tmpstride, nPbW/2, nPbH/2);
1769             s->hevcdsp.weighted_pred(s->sh.chroma_log2_weight_denom,
1770                                      s->sh.chroma_weight_l1[current_mv.ref_idx[1]][1],
1771                                      s->sh.chroma_offset_l1[current_mv.ref_idx[1]][1],
1772                                      dst2, s->frame->linesize[2], tmp2, tmpstride, nPbW/2, nPbH/2);
1773         } else {
1774             s->hevcdsp.put_unweighted_pred(dst1, s->frame->linesize[1], tmp, tmpstride, nPbW/2, nPbH/2);
1775             s->hevcdsp.put_unweighted_pred(dst2, s->frame->linesize[2], tmp2, tmpstride, nPbW/2, nPbH/2);
1776         }
1777     } else if (current_mv.pred_flag[0] && current_mv.pred_flag[1]) {
1778         DECLARE_ALIGNED(16, int16_t, tmp [MAX_PB_SIZE * MAX_PB_SIZE]);
1779         DECLARE_ALIGNED(16, int16_t, tmp2[MAX_PB_SIZE * MAX_PB_SIZE]);
1780         DECLARE_ALIGNED(16, int16_t, tmp3[MAX_PB_SIZE * MAX_PB_SIZE]);
1781         DECLARE_ALIGNED(16, int16_t, tmp4[MAX_PB_SIZE * MAX_PB_SIZE]);
1782         HEVCFrame *ref0 = refPicList[0].ref[current_mv.ref_idx[0]];
1783         HEVCFrame *ref1 = refPicList[1].ref[current_mv.ref_idx[1]];
1784
1785         if (!ref0 || !ref1)
1786             return;
1787
1788         luma_mc(s, tmp, tmpstride, ref0->frame,
1789                 &current_mv.mv[0], x0, y0, nPbW, nPbH);
1790         luma_mc(s, tmp2, tmpstride, ref1->frame,
1791                 &current_mv.mv[1], x0, y0, nPbW, nPbH);
1792
1793         if ((s->sh.slice_type == P_SLICE && s->pps->weighted_pred_flag) ||
1794             (s->sh.slice_type == B_SLICE && s->pps->weighted_bipred_flag)) {
1795             s->hevcdsp.weighted_pred_avg(s->sh.luma_log2_weight_denom,
1796                                          s->sh.luma_weight_l0[current_mv.ref_idx[0]],
1797                                          s->sh.luma_weight_l1[current_mv.ref_idx[1]],
1798                                          s->sh.luma_offset_l0[current_mv.ref_idx[0]],
1799                                          s->sh.luma_offset_l1[current_mv.ref_idx[1]],
1800                                          dst0, s->frame->linesize[0],
1801                                          tmp, tmp2, tmpstride, nPbW, nPbH);
1802         } else {
1803             s->hevcdsp.put_weighted_pred_avg(dst0, s->frame->linesize[0],
1804                                              tmp, tmp2, tmpstride, nPbW, nPbH);
1805         }
1806
1807         chroma_mc(s, tmp, tmp2, tmpstride, ref0->frame,
1808                   &current_mv.mv[0], x0 / 2, y0 / 2, nPbW / 2, nPbH / 2);
1809         chroma_mc(s, tmp3, tmp4, tmpstride, ref1->frame,
1810                   &current_mv.mv[1], x0 / 2, y0 / 2, nPbW / 2, nPbH / 2);
1811
1812         if ((s->sh.slice_type == P_SLICE && s->pps->weighted_pred_flag) ||
1813             (s->sh.slice_type == B_SLICE && s->pps->weighted_bipred_flag)) {
1814             s->hevcdsp.weighted_pred_avg(s->sh.chroma_log2_weight_denom,
1815                                          s->sh.chroma_weight_l0[current_mv.ref_idx[0]][0],
1816                                          s->sh.chroma_weight_l1[current_mv.ref_idx[1]][0],
1817                                          s->sh.chroma_offset_l0[current_mv.ref_idx[0]][0],
1818                                          s->sh.chroma_offset_l1[current_mv.ref_idx[1]][0],
1819                                          dst1, s->frame->linesize[1], tmp, tmp3,
1820                                          tmpstride, nPbW / 2, nPbH / 2);
1821             s->hevcdsp.weighted_pred_avg(s->sh.chroma_log2_weight_denom,
1822                                          s->sh.chroma_weight_l0[current_mv.ref_idx[0]][1],
1823                                          s->sh.chroma_weight_l1[current_mv.ref_idx[1]][1],
1824                                          s->sh.chroma_offset_l0[current_mv.ref_idx[0]][1],
1825                                          s->sh.chroma_offset_l1[current_mv.ref_idx[1]][1],
1826                                          dst2, s->frame->linesize[2], tmp2, tmp4,
1827                                          tmpstride, nPbW / 2, nPbH / 2);
1828         } else {
1829             s->hevcdsp.put_weighted_pred_avg(dst1, s->frame->linesize[1], tmp, tmp3, tmpstride, nPbW/2, nPbH/2);
1830             s->hevcdsp.put_weighted_pred_avg(dst2, s->frame->linesize[2], tmp2, tmp4, tmpstride, nPbW/2, nPbH/2);
1831         }
1832     }
1833 }
1834
1835 /**
1836  * 8.4.1
1837  */
1838 static int luma_intra_pred_mode(HEVCContext *s, int x0, int y0, int pu_size,
1839                                 int prev_intra_luma_pred_flag)
1840 {
1841     HEVCLocalContext *lc = &s->HEVClc;
1842     int x_pu             = x0 >> s->sps->log2_min_pu_size;
1843     int y_pu             = y0 >> s->sps->log2_min_pu_size;
1844     int min_pu_width     = s->sps->min_pu_width;
1845     int size_in_pus      = pu_size >> s->sps->log2_min_pu_size;
1846     int x0b              = x0 & ((1 << s->sps->log2_ctb_size) - 1);
1847     int y0b              = y0 & ((1 << s->sps->log2_ctb_size) - 1);
1848
1849     int cand_up   = (lc->ctb_up_flag || y0b) ?
1850                     s->tab_ipm[(y_pu - 1) * min_pu_width + x_pu] : INTRA_DC;
1851     int cand_left = (lc->ctb_left_flag || x0b) ?
1852                     s->tab_ipm[y_pu * min_pu_width + x_pu - 1]   : INTRA_DC;
1853
1854     int y_ctb = (y0 >> (s->sps->log2_ctb_size)) << (s->sps->log2_ctb_size);
1855
1856     MvField *tab_mvf = s->ref->tab_mvf;
1857     int intra_pred_mode;
1858     int candidate[3];
1859     int i, j;
1860
1861     // intra_pred_mode prediction does not cross vertical CTB boundaries
1862     if ((y0 - 1) < y_ctb)
1863         cand_up = INTRA_DC;
1864
1865     if (cand_left == cand_up) {
1866         if (cand_left < 2) {
1867             candidate[0] = INTRA_PLANAR;
1868             candidate[1] = INTRA_DC;
1869             candidate[2] = INTRA_ANGULAR_26;
1870         } else {
1871             candidate[0] = cand_left;
1872             candidate[1] = 2 + ((cand_left - 2 - 1 + 32) & 31);
1873             candidate[2] = 2 + ((cand_left - 2 + 1) & 31);
1874         }
1875     } else {
1876         candidate[0] = cand_left;
1877         candidate[1] = cand_up;
1878         if (candidate[0] != INTRA_PLANAR && candidate[1] != INTRA_PLANAR) {
1879             candidate[2] = INTRA_PLANAR;
1880         } else if (candidate[0] != INTRA_DC && candidate[1] != INTRA_DC) {
1881             candidate[2] = INTRA_DC;
1882         } else {
1883             candidate[2] = INTRA_ANGULAR_26;
1884         }
1885     }
1886
1887     if (prev_intra_luma_pred_flag) {
1888         intra_pred_mode = candidate[lc->pu.mpm_idx];
1889     } else {
1890         if (candidate[0] > candidate[1])
1891             FFSWAP(uint8_t, candidate[0], candidate[1]);
1892         if (candidate[0] > candidate[2])
1893             FFSWAP(uint8_t, candidate[0], candidate[2]);
1894         if (candidate[1] > candidate[2])
1895             FFSWAP(uint8_t, candidate[1], candidate[2]);
1896
1897         intra_pred_mode = lc->pu.rem_intra_luma_pred_mode;
1898         for (i = 0; i < 3; i++)
1899             if (intra_pred_mode >= candidate[i])
1900                 intra_pred_mode++;
1901     }
1902
1903     /* write the intra prediction units into the mv array */
1904     if (!size_in_pus)
1905         size_in_pus = 1;
1906     for (i = 0; i < size_in_pus; i++) {
1907         memset(&s->tab_ipm[(y_pu + i) * min_pu_width + x_pu],
1908                intra_pred_mode, size_in_pus);
1909
1910         for (j = 0; j < size_in_pus; j++) {
1911             tab_mvf[(y_pu + j) * min_pu_width + x_pu + i].is_intra     = 1;
1912             tab_mvf[(y_pu + j) * min_pu_width + x_pu + i].pred_flag[0] = 0;
1913             tab_mvf[(y_pu + j) * min_pu_width + x_pu + i].pred_flag[1] = 0;
1914             tab_mvf[(y_pu + j) * min_pu_width + x_pu + i].ref_idx[0]   = 0;
1915             tab_mvf[(y_pu + j) * min_pu_width + x_pu + i].ref_idx[1]   = 0;
1916             tab_mvf[(y_pu + j) * min_pu_width + x_pu + i].mv[0].x      = 0;
1917             tab_mvf[(y_pu + j) * min_pu_width + x_pu + i].mv[0].y      = 0;
1918             tab_mvf[(y_pu + j) * min_pu_width + x_pu + i].mv[1].x      = 0;
1919             tab_mvf[(y_pu + j) * min_pu_width + x_pu + i].mv[1].y      = 0;
1920         }
1921     }
1922
1923     return intra_pred_mode;
1924 }
1925
1926 static av_always_inline void set_ct_depth(HEVCContext *s, int x0, int y0,
1927                                           int log2_cb_size, int ct_depth)
1928 {
1929     int length = (1 << log2_cb_size) >> s->sps->log2_min_cb_size;
1930     int x_cb   = x0 >> s->sps->log2_min_cb_size;
1931     int y_cb   = y0 >> s->sps->log2_min_cb_size;
1932     int y;
1933
1934     for (y = 0; y < length; y++)
1935         memset(&s->tab_ct_depth[(y_cb + y) * s->sps->min_cb_width + x_cb],
1936                ct_depth, length);
1937 }
1938
1939 static void intra_prediction_unit(HEVCContext *s, int x0, int y0,
1940                                   int log2_cb_size)
1941 {
1942     HEVCLocalContext *lc = &s->HEVClc;
1943     static const uint8_t intra_chroma_table[4] = { 0, 26, 10, 1 };
1944     uint8_t prev_intra_luma_pred_flag[4];
1945     int split   = lc->cu.part_mode == PART_NxN;
1946     int pb_size = (1 << log2_cb_size) >> split;
1947     int side    = split + 1;
1948     int chroma_mode;
1949     int i, j;
1950
1951     for (i = 0; i < side; i++)
1952         for (j = 0; j < side; j++)
1953             prev_intra_luma_pred_flag[2 * i + j] = ff_hevc_prev_intra_luma_pred_flag_decode(s);
1954
1955     for (i = 0; i < side; i++) {
1956         for (j = 0; j < side; j++) {
1957             if (prev_intra_luma_pred_flag[2 * i + j])
1958                 lc->pu.mpm_idx = ff_hevc_mpm_idx_decode(s);
1959             else
1960                 lc->pu.rem_intra_luma_pred_mode = ff_hevc_rem_intra_luma_pred_mode_decode(s);
1961
1962             lc->pu.intra_pred_mode[2 * i + j] =
1963                 luma_intra_pred_mode(s, x0 + pb_size * j, y0 + pb_size * i, pb_size,
1964                                      prev_intra_luma_pred_flag[2 * i + j]);
1965         }
1966     }
1967
1968     chroma_mode = ff_hevc_intra_chroma_pred_mode_decode(s);
1969     if (chroma_mode != 4) {
1970         if (lc->pu.intra_pred_mode[0] == intra_chroma_table[chroma_mode])
1971             lc->pu.intra_pred_mode_c = 34;
1972         else
1973             lc->pu.intra_pred_mode_c = intra_chroma_table[chroma_mode];
1974     } else {
1975         lc->pu.intra_pred_mode_c = lc->pu.intra_pred_mode[0];
1976     }
1977 }
1978
1979 static void intra_prediction_unit_default_value(HEVCContext *s,
1980                                                 int x0, int y0,
1981                                                 int log2_cb_size)
1982 {
1983     HEVCLocalContext *lc = &s->HEVClc;
1984     int pb_size          = 1 << log2_cb_size;
1985     int size_in_pus      = pb_size >> s->sps->log2_min_pu_size;
1986     int min_pu_width     = s->sps->min_pu_width;
1987     MvField *tab_mvf     = s->ref->tab_mvf;
1988     int x_pu             = x0 >> s->sps->log2_min_pu_size;
1989     int y_pu             = y0 >> s->sps->log2_min_pu_size;
1990     int j, k;
1991
1992     if (size_in_pus == 0)
1993         size_in_pus = 1;
1994     for (j = 0; j < size_in_pus; j++) {
1995         memset(&s->tab_ipm[(y_pu + j) * min_pu_width + x_pu], INTRA_DC, size_in_pus);
1996         for (k = 0; k < size_in_pus; k++)
1997             tab_mvf[(y_pu + j) * min_pu_width + x_pu + k].is_intra = lc->cu.pred_mode == MODE_INTRA;
1998     }
1999 }
2000
2001 static int hls_coding_unit(HEVCContext *s, int x0, int y0, int log2_cb_size)
2002 {
2003     int cb_size          = 1 << log2_cb_size;
2004     HEVCLocalContext *lc = &s->HEVClc;
2005     int log2_min_cb_size = s->sps->log2_min_cb_size;
2006     int length           = cb_size >> log2_min_cb_size;
2007     int min_cb_width     = s->sps->min_cb_width;
2008     int x_cb             = x0 >> log2_min_cb_size;
2009     int y_cb             = y0 >> log2_min_cb_size;
2010     int x, y;
2011
2012     lc->cu.x                = x0;
2013     lc->cu.y                = y0;
2014     lc->cu.rqt_root_cbf     = 1;
2015     lc->cu.pred_mode        = MODE_INTRA;
2016     lc->cu.part_mode        = PART_2Nx2N;
2017     lc->cu.intra_split_flag = 0;
2018     lc->cu.pcm_flag         = 0;
2019
2020     SAMPLE_CTB(s->skip_flag, x_cb, y_cb) = 0;
2021     for (x = 0; x < 4; x++)
2022         lc->pu.intra_pred_mode[x] = 1;
2023     if (s->pps->transquant_bypass_enable_flag) {
2024         lc->cu.cu_transquant_bypass_flag = ff_hevc_cu_transquant_bypass_flag_decode(s);
2025         if (lc->cu.cu_transquant_bypass_flag)
2026             set_deblocking_bypass(s, x0, y0, log2_cb_size);
2027     } else
2028         lc->cu.cu_transquant_bypass_flag = 0;
2029
2030     if (s->sh.slice_type != I_SLICE) {
2031         uint8_t skip_flag = ff_hevc_skip_flag_decode(s, x0, y0, x_cb, y_cb);
2032
2033         lc->cu.pred_mode = MODE_SKIP;
2034         x = y_cb * min_cb_width + x_cb;
2035         for (y = 0; y < length; y++) {
2036             memset(&s->skip_flag[x], skip_flag, length);
2037             x += min_cb_width;
2038         }
2039         lc->cu.pred_mode = skip_flag ? MODE_SKIP : MODE_INTER;
2040     }
2041
2042     if (SAMPLE_CTB(s->skip_flag, x_cb, y_cb)) {
2043         hls_prediction_unit(s, x0, y0, cb_size, cb_size, log2_cb_size, 0);
2044         intra_prediction_unit_default_value(s, x0, y0, log2_cb_size);
2045
2046         if (!s->sh.disable_deblocking_filter_flag)
2047             ff_hevc_deblocking_boundary_strengths(s, x0, y0, log2_cb_size,
2048                                                   lc->slice_or_tiles_up_boundary,
2049                                                   lc->slice_or_tiles_left_boundary);
2050     } else {
2051         if (s->sh.slice_type != I_SLICE)
2052             lc->cu.pred_mode = ff_hevc_pred_mode_decode(s);
2053         if (lc->cu.pred_mode != MODE_INTRA ||
2054             log2_cb_size == s->sps->log2_min_cb_size) {
2055             lc->cu.part_mode        = ff_hevc_part_mode_decode(s, log2_cb_size);
2056             lc->cu.intra_split_flag = lc->cu.part_mode == PART_NxN &&
2057                                       lc->cu.pred_mode == MODE_INTRA;
2058         }
2059
2060         if (lc->cu.pred_mode == MODE_INTRA) {
2061             if (lc->cu.part_mode == PART_2Nx2N && s->sps->pcm_enabled_flag &&
2062                 log2_cb_size >= s->sps->pcm.log2_min_pcm_cb_size &&
2063                 log2_cb_size <= s->sps->pcm.log2_max_pcm_cb_size) {
2064                 lc->cu.pcm_flag = ff_hevc_pcm_flag_decode(s);
2065             }
2066             if (lc->cu.pcm_flag) {
2067                 int ret;
2068                 intra_prediction_unit_default_value(s, x0, y0, log2_cb_size);
2069                 ret = hls_pcm_sample(s, x0, y0, log2_cb_size);
2070                 if (s->sps->pcm.loop_filter_disable_flag)
2071                     set_deblocking_bypass(s, x0, y0, log2_cb_size);
2072
2073                 if (ret < 0)
2074                     return ret;
2075             } else {
2076                 intra_prediction_unit(s, x0, y0, log2_cb_size);
2077             }
2078         } else {
2079             intra_prediction_unit_default_value(s, x0, y0, log2_cb_size);
2080             switch (lc->cu.part_mode) {
2081             case PART_2Nx2N:
2082                 hls_prediction_unit(s, x0, y0, cb_size, cb_size, log2_cb_size, 0);
2083                 break;
2084             case PART_2NxN:
2085                 hls_prediction_unit(s, x0, y0,               cb_size, cb_size / 2, log2_cb_size, 0);
2086                 hls_prediction_unit(s, x0, y0 + cb_size / 2, cb_size, cb_size / 2, log2_cb_size, 1);
2087                 break;
2088             case PART_Nx2N:
2089                 hls_prediction_unit(s, x0,               y0, cb_size / 2, cb_size, log2_cb_size, 0);
2090                 hls_prediction_unit(s, x0 + cb_size / 2, y0, cb_size / 2, cb_size, log2_cb_size, 1);
2091                 break;
2092             case PART_2NxnU:
2093                 hls_prediction_unit(s, x0, y0,               cb_size, cb_size     / 4, log2_cb_size, 0);
2094                 hls_prediction_unit(s, x0, y0 + cb_size / 4, cb_size, cb_size * 3 / 4, log2_cb_size, 1);
2095                 break;
2096             case PART_2NxnD:
2097                 hls_prediction_unit(s, x0, y0,                   cb_size, cb_size * 3 / 4, log2_cb_size, 0);
2098                 hls_prediction_unit(s, x0, y0 + cb_size * 3 / 4, cb_size, cb_size     / 4, log2_cb_size, 1);
2099                 break;
2100             case PART_nLx2N:
2101                 hls_prediction_unit(s, x0,               y0, cb_size     / 4, cb_size, log2_cb_size, 0);
2102                 hls_prediction_unit(s, x0 + cb_size / 4, y0, cb_size * 3 / 4, cb_size, log2_cb_size, 1);
2103                 break;
2104             case PART_nRx2N:
2105                 hls_prediction_unit(s, x0,                   y0, cb_size * 3 / 4, cb_size, log2_cb_size, 0);
2106                 hls_prediction_unit(s, x0 + cb_size * 3 / 4, y0, cb_size     / 4, cb_size, log2_cb_size, 1);
2107                 break;
2108             case PART_NxN:
2109                 hls_prediction_unit(s, x0,               y0,               cb_size / 2, cb_size / 2, log2_cb_size, 0);
2110                 hls_prediction_unit(s, x0 + cb_size / 2, y0,               cb_size / 2, cb_size / 2, log2_cb_size, 1);
2111                 hls_prediction_unit(s, x0,               y0 + cb_size / 2, cb_size / 2, cb_size / 2, log2_cb_size, 2);
2112                 hls_prediction_unit(s, x0 + cb_size / 2, y0 + cb_size / 2, cb_size / 2, cb_size / 2, log2_cb_size, 3);
2113                 break;
2114             }
2115         }
2116
2117         if (!lc->cu.pcm_flag) {
2118             if (lc->cu.pred_mode != MODE_INTRA &&
2119                 !(lc->cu.part_mode == PART_2Nx2N && lc->pu.merge_flag)) {
2120                 lc->cu.rqt_root_cbf = ff_hevc_no_residual_syntax_flag_decode(s);
2121             }
2122             if (lc->cu.rqt_root_cbf) {
2123                 lc->cu.max_trafo_depth = lc->cu.pred_mode == MODE_INTRA ?
2124                                          s->sps->max_transform_hierarchy_depth_intra + lc->cu.intra_split_flag :
2125                                          s->sps->max_transform_hierarchy_depth_inter;
2126                 hls_transform_tree(s, x0, y0, x0, y0, x0, y0, log2_cb_size,
2127                                    log2_cb_size, 0, 0);
2128             } else {
2129                 if (!s->sh.disable_deblocking_filter_flag)
2130                     ff_hevc_deblocking_boundary_strengths(s, x0, y0, log2_cb_size,
2131                                                           lc->slice_or_tiles_up_boundary,
2132                                                           lc->slice_or_tiles_left_boundary);
2133             }
2134         }
2135     }
2136
2137     if (s->pps->cu_qp_delta_enabled_flag && lc->tu.is_cu_qp_delta_coded == 0)
2138         ff_hevc_set_qPy(s, x0, y0, x0, y0, log2_cb_size);
2139
2140     x = y_cb * min_cb_width + x_cb;
2141     for (y = 0; y < length; y++) {
2142         memset(&s->qp_y_tab[x], lc->qp_y, length);
2143         x += min_cb_width;
2144     }
2145
2146     set_ct_depth(s, x0, y0, log2_cb_size, lc->ct.depth);
2147
2148     return 0;
2149 }
2150
2151 static int hls_coding_quadtree(HEVCContext *s, int x0, int y0,
2152                                int log2_cb_size, int cb_depth)
2153 {
2154     HEVCLocalContext *lc = &s->HEVClc;
2155     const int cb_size    = 1 << log2_cb_size;
2156
2157     lc->ct.depth = cb_depth;
2158     if (x0 + cb_size <= s->sps->width  &&
2159         y0 + cb_size <= s->sps->height &&
2160         log2_cb_size > s->sps->log2_min_cb_size) {
2161         SAMPLE(s->split_cu_flag, x0, y0) =
2162             ff_hevc_split_coding_unit_flag_decode(s, cb_depth, x0, y0);
2163     } else {
2164         SAMPLE(s->split_cu_flag, x0, y0) =
2165             (log2_cb_size > s->sps->log2_min_cb_size);
2166     }
2167     if (s->pps->cu_qp_delta_enabled_flag &&
2168         log2_cb_size >= s->sps->log2_ctb_size - s->pps->diff_cu_qp_delta_depth) {
2169         lc->tu.is_cu_qp_delta_coded = 0;
2170         lc->tu.cu_qp_delta          = 0;
2171     }
2172
2173     if (SAMPLE(s->split_cu_flag, x0, y0)) {
2174         const int cb_size_split = cb_size >> 1;
2175         const int x1 = x0 + cb_size_split;
2176         const int y1 = y0 + cb_size_split;
2177
2178         log2_cb_size--;
2179         cb_depth++;
2180
2181 #define SUBDIVIDE(x, y)                                                \
2182 do {                                                                   \
2183     if (x < s->sps->width && y < s->sps->height) {                     \
2184         int ret = hls_coding_quadtree(s, x, y, log2_cb_size, cb_depth);\
2185         if (ret < 0)                                                   \
2186             return ret;                                                \
2187     }                                                                  \
2188 } while (0)
2189
2190         SUBDIVIDE(x0, y0);
2191         SUBDIVIDE(x1, y0);
2192         SUBDIVIDE(x0, y1);
2193         SUBDIVIDE(x1, y1);
2194     } else {
2195         int ret = hls_coding_unit(s, x0, y0, log2_cb_size);
2196         if (ret < 0)
2197             return ret;
2198     }
2199
2200     return 0;
2201 }
2202
2203 static void hls_decode_neighbour(HEVCContext *s, int x_ctb, int y_ctb,
2204                                  int ctb_addr_ts)
2205 {
2206     HEVCLocalContext *lc  = &s->HEVClc;
2207     int ctb_size          = 1 << s->sps->log2_ctb_size;
2208     int ctb_addr_rs       = s->pps->ctb_addr_ts_to_rs[ctb_addr_ts];
2209     int ctb_addr_in_slice = ctb_addr_rs - s->sh.slice_addr;
2210
2211     int tile_left_boundary, tile_up_boundary;
2212     int slice_left_boundary, slice_up_boundary;
2213
2214     s->tab_slice_address[ctb_addr_rs] = s->sh.slice_addr;
2215
2216     if (s->pps->entropy_coding_sync_enabled_flag) {
2217         if (x_ctb == 0 && (y_ctb & (ctb_size - 1)) == 0)
2218             lc->first_qp_group = 1;
2219         lc->end_of_tiles_x = s->sps->width;
2220     } else if (s->pps->tiles_enabled_flag) {
2221         if (ctb_addr_ts && s->pps->tile_id[ctb_addr_ts] != s->pps->tile_id[ctb_addr_ts - 1]) {
2222             int idxX = s->pps->col_idxX[x_ctb >> s->sps->log2_ctb_size];
2223             lc->start_of_tiles_x = x_ctb;
2224             lc->end_of_tiles_x   = x_ctb + (s->pps->column_width[idxX] << s->sps->log2_ctb_size);
2225             lc->first_qp_group   = 1;
2226         }
2227     } else {
2228         lc->end_of_tiles_x = s->sps->width;
2229     }
2230
2231     lc->end_of_tiles_y = FFMIN(y_ctb + ctb_size, s->sps->height);
2232
2233     if (s->pps->tiles_enabled_flag) {
2234         tile_left_boundary  = x_ctb > 0 &&
2235                               s->pps->tile_id[ctb_addr_ts] == s->pps->tile_id[s->pps->ctb_addr_rs_to_ts[ctb_addr_rs - 1]];
2236         slice_left_boundary = x_ctb > 0 &&
2237                               s->tab_slice_address[ctb_addr_rs] == s->tab_slice_address[ctb_addr_rs - 1];
2238         tile_up_boundary  = y_ctb > 0 &&
2239                             s->pps->tile_id[ctb_addr_ts] == s->pps->tile_id[s->pps->ctb_addr_rs_to_ts[ctb_addr_rs - s->sps->ctb_width]];
2240         slice_up_boundary = y_ctb > 0 &&
2241                             s->tab_slice_address[ctb_addr_rs] == s->tab_slice_address[ctb_addr_rs - s->sps->ctb_width];
2242     } else {
2243         tile_left_boundary  =
2244         tile_up_boundary    = 1;
2245         slice_left_boundary = ctb_addr_in_slice > 0;
2246         slice_up_boundary   = ctb_addr_in_slice >= s->sps->ctb_width;
2247     }
2248     lc->slice_or_tiles_left_boundary = (!slice_left_boundary) + (!tile_left_boundary << 1);
2249     lc->slice_or_tiles_up_boundary   = (!slice_up_boundary + (!tile_up_boundary << 1));
2250     lc->ctb_left_flag = ((x_ctb > 0) && (ctb_addr_in_slice > 0) && tile_left_boundary);
2251     lc->ctb_up_flag   = ((y_ctb > 0) && (ctb_addr_in_slice >= s->sps->ctb_width) && tile_up_boundary);
2252     lc->ctb_up_right_flag = ((y_ctb > 0)  && (ctb_addr_in_slice+1 >= s->sps->ctb_width) && (s->pps->tile_id[ctb_addr_ts] == s->pps->tile_id[s->pps->ctb_addr_rs_to_ts[ctb_addr_rs+1 - s->sps->ctb_width]]));
2253     lc->ctb_up_left_flag = ((x_ctb > 0) && (y_ctb > 0)  && (ctb_addr_in_slice-1 >= s->sps->ctb_width) && (s->pps->tile_id[ctb_addr_ts] == s->pps->tile_id[s->pps->ctb_addr_rs_to_ts[ctb_addr_rs-1 - s->sps->ctb_width]]));
2254 }
2255
2256 static int hls_slice_data(HEVCContext *s)
2257 {
2258     int ctb_size    = 1 << s->sps->log2_ctb_size;
2259     int more_data   = 1;
2260     int x_ctb       = 0;
2261     int y_ctb       = 0;
2262     int ctb_addr_ts = s->pps->ctb_addr_rs_to_ts[s->sh.slice_ctb_addr_rs];
2263     int ret;
2264
2265     while (more_data && ctb_addr_ts < s->sps->ctb_size) {
2266         int ctb_addr_rs = s->pps->ctb_addr_ts_to_rs[ctb_addr_ts];
2267
2268         x_ctb = (ctb_addr_rs % ((s->sps->width + ctb_size - 1) >> s->sps->log2_ctb_size)) << s->sps->log2_ctb_size;
2269         y_ctb = (ctb_addr_rs / ((s->sps->width + ctb_size - 1) >> s->sps->log2_ctb_size)) << s->sps->log2_ctb_size;
2270         hls_decode_neighbour(s, x_ctb, y_ctb, ctb_addr_ts);
2271
2272         ff_hevc_cabac_init(s, ctb_addr_ts);
2273
2274         hls_sao_param(s, x_ctb >> s->sps->log2_ctb_size, y_ctb >> s->sps->log2_ctb_size);
2275
2276         s->deblock[ctb_addr_rs].beta_offset = s->sh.beta_offset;
2277         s->deblock[ctb_addr_rs].tc_offset   = s->sh.tc_offset;
2278         s->filter_slice_edges[ctb_addr_rs]  = s->sh.slice_loop_filter_across_slices_enabled_flag;
2279
2280         ret = hls_coding_quadtree(s, x_ctb, y_ctb, s->sps->log2_ctb_size, 0);
2281         if (ret < 0)
2282             return ret;
2283         more_data = !ff_hevc_end_of_slice_flag_decode(s);
2284
2285         ctb_addr_ts++;
2286         ff_hevc_save_states(s, ctb_addr_ts);
2287         ff_hevc_hls_filters(s, x_ctb, y_ctb, ctb_size);
2288     }
2289
2290     if (x_ctb + ctb_size >= s->sps->width &&
2291         y_ctb + ctb_size >= s->sps->height)
2292         ff_hevc_hls_filter(s, x_ctb, y_ctb);
2293
2294     return ctb_addr_ts;
2295 }
2296
2297 /**
2298  * @return AVERROR_INVALIDDATA if the packet is not a valid NAL unit,
2299  * 0 if the unit should be skipped, 1 otherwise
2300  */
2301 static int hls_nal_unit(HEVCContext *s)
2302 {
2303     GetBitContext *gb = &s->HEVClc.gb;
2304     int nuh_layer_id;
2305
2306     if (get_bits1(gb) != 0)
2307         return AVERROR_INVALIDDATA;
2308
2309     s->nal_unit_type = get_bits(gb, 6);
2310
2311     nuh_layer_id   = get_bits(gb, 6);
2312     s->temporal_id = get_bits(gb, 3) - 1;
2313     if (s->temporal_id < 0)
2314         return AVERROR_INVALIDDATA;
2315
2316     av_log(s->avctx, AV_LOG_DEBUG,
2317            "nal_unit_type: %d, nuh_layer_id: %dtemporal_id: %d\n",
2318            s->nal_unit_type, nuh_layer_id, s->temporal_id);
2319
2320     return nuh_layer_id == 0;
2321 }
2322
2323 static void restore_tqb_pixels(HEVCContext *s)
2324 {
2325     int min_pu_size = 1 << s->sps->log2_min_pu_size;
2326     int x, y, c_idx;
2327
2328     for (c_idx = 0; c_idx < 3; c_idx++) {
2329         ptrdiff_t stride = s->frame->linesize[c_idx];
2330         int hshift       = s->sps->hshift[c_idx];
2331         int vshift       = s->sps->vshift[c_idx];
2332         for (y = 0; y < s->sps->min_pu_height; y++) {
2333             for (x = 0; x < s->sps->min_pu_width; x++) {
2334                 if (s->is_pcm[y * s->sps->min_pu_width + x]) {
2335                     int n;
2336                     int len      = min_pu_size >> hshift;
2337                     uint8_t *src = &s->frame->data[c_idx][((y << s->sps->log2_min_pu_size) >> vshift) * stride + (((x << s->sps->log2_min_pu_size) >> hshift) << s->sps->pixel_shift)];
2338                     uint8_t *dst = &s->sao_frame->data[c_idx][((y << s->sps->log2_min_pu_size) >> vshift) * stride + (((x << s->sps->log2_min_pu_size) >> hshift) << s->sps->pixel_shift)];
2339                     for (n = 0; n < (min_pu_size >> vshift); n++) {
2340                         memcpy(dst, src, len);
2341                         src += stride;
2342                         dst += stride;
2343                     }
2344                 }
2345             }
2346         }
2347     }
2348 }
2349
2350 static int hevc_frame_start(HEVCContext *s)
2351 {
2352     HEVCLocalContext *lc = &s->HEVClc;
2353     int ret;
2354
2355     memset(s->horizontal_bs, 0, 2 * s->bs_width * (s->bs_height + 1));
2356     memset(s->vertical_bs,   0, 2 * s->bs_width * (s->bs_height + 1));
2357     memset(s->cbf_luma,      0, s->sps->min_tb_width * s->sps->min_tb_height);
2358     memset(s->is_pcm,        0, s->sps->min_pu_width * s->sps->min_pu_height);
2359
2360     lc->start_of_tiles_x = 0;
2361     s->is_decoded        = 0;
2362
2363     if (s->pps->tiles_enabled_flag)
2364         lc->end_of_tiles_x = s->pps->column_width[0] << s->sps->log2_ctb_size;
2365
2366     ret = ff_hevc_set_new_ref(s, s->sps->sao_enabled ? &s->sao_frame : &s->frame,
2367                               s->poc);
2368     if (ret < 0)
2369         goto fail;
2370
2371     av_fast_malloc(&lc->edge_emu_buffer, &lc->edge_emu_buffer_size,
2372                    (MAX_PB_SIZE + 7) * s->ref->frame->linesize[0]);
2373     if (!lc->edge_emu_buffer) {
2374         ret = AVERROR(ENOMEM);
2375         goto fail;
2376     }
2377
2378     ret = ff_hevc_frame_rps(s);
2379     if (ret < 0) {
2380         av_log(s->avctx, AV_LOG_ERROR, "Error constructing the frame RPS.\n");
2381         goto fail;
2382     }
2383
2384     av_frame_unref(s->output_frame);
2385     ret = ff_hevc_output_frame(s, s->output_frame, 0);
2386     if (ret < 0)
2387         goto fail;
2388
2389     ff_thread_finish_setup(s->avctx);
2390
2391     return 0;
2392
2393 fail:
2394     if (s->ref)
2395         ff_thread_report_progress(&s->ref->tf, INT_MAX, 0);
2396     s->ref = NULL;
2397     return ret;
2398 }
2399
2400 static int decode_nal_unit(HEVCContext *s, const uint8_t *nal, int length)
2401 {
2402     HEVCLocalContext *lc = &s->HEVClc;
2403     GetBitContext *gb    = &lc->gb;
2404     int ctb_addr_ts, ret;
2405
2406     ret = init_get_bits8(gb, nal, length);
2407     if (ret < 0)
2408         return ret;
2409
2410     ret = hls_nal_unit(s);
2411     if (ret < 0) {
2412         av_log(s->avctx, AV_LOG_ERROR, "Invalid NAL unit %d, skipping.\n",
2413                s->nal_unit_type);
2414         if (s->avctx->err_recognition & AV_EF_EXPLODE)
2415             return ret;
2416         return 0;
2417     } else if (!ret)
2418         return 0;
2419
2420     switch (s->nal_unit_type) {
2421     case NAL_VPS:
2422         ret = ff_hevc_decode_nal_vps(s);
2423         if (ret < 0)
2424             return ret;
2425         break;
2426     case NAL_SPS:
2427         ret = ff_hevc_decode_nal_sps(s);
2428         if (ret < 0)
2429             return ret;
2430         break;
2431     case NAL_PPS:
2432         ret = ff_hevc_decode_nal_pps(s);
2433         if (ret < 0)
2434             return ret;
2435         break;
2436     case NAL_SEI_PREFIX:
2437     case NAL_SEI_SUFFIX:
2438         ret = ff_hevc_decode_nal_sei(s);
2439         if (ret < 0)
2440             return ret;
2441         break;
2442     case NAL_TRAIL_R:
2443     case NAL_TRAIL_N:
2444     case NAL_TSA_N:
2445     case NAL_TSA_R:
2446     case NAL_STSA_N:
2447     case NAL_STSA_R:
2448     case NAL_BLA_W_LP:
2449     case NAL_BLA_W_RADL:
2450     case NAL_BLA_N_LP:
2451     case NAL_IDR_W_RADL:
2452     case NAL_IDR_N_LP:
2453     case NAL_CRA_NUT:
2454     case NAL_RADL_N:
2455     case NAL_RADL_R:
2456     case NAL_RASL_N:
2457     case NAL_RASL_R:
2458         ret = hls_slice_header(s);
2459         if (ret < 0)
2460             return ret;
2461
2462         if (s->max_ra == INT_MAX) {
2463             if (s->nal_unit_type == NAL_CRA_NUT || IS_BLA(s)) {
2464                 s->max_ra = s->poc;
2465             } else {
2466                 if (IS_IDR(s))
2467                     s->max_ra = INT_MIN;
2468             }
2469         }
2470
2471         if ((s->nal_unit_type == NAL_RASL_R || s->nal_unit_type == NAL_RASL_N) &&
2472             s->poc <= s->max_ra) {
2473             s->is_decoded = 0;
2474             break;
2475         } else {
2476             if (s->nal_unit_type == NAL_RASL_R && s->poc > s->max_ra)
2477                 s->max_ra = INT_MIN;
2478         }
2479
2480         if (s->sh.first_slice_in_pic_flag) {
2481             ret = hevc_frame_start(s);
2482             if (ret < 0)
2483                 return ret;
2484         } else if (!s->ref) {
2485             av_log(s->avctx, AV_LOG_ERROR, "First slice in a frame missing.\n");
2486             return AVERROR_INVALIDDATA;
2487         }
2488
2489         if (!s->sh.dependent_slice_segment_flag &&
2490             s->sh.slice_type != I_SLICE) {
2491             ret = ff_hevc_slice_rpl(s);
2492             if (ret < 0) {
2493                 av_log(s->avctx, AV_LOG_WARNING,
2494                        "Error constructing the reference lists for the current slice.\n");
2495                 if (s->avctx->err_recognition & AV_EF_EXPLODE)
2496                     return ret;
2497             }
2498         }
2499
2500         ctb_addr_ts = hls_slice_data(s);
2501         if (ctb_addr_ts >= (s->sps->ctb_width * s->sps->ctb_height)) {
2502             s->is_decoded = 1;
2503             if ((s->pps->transquant_bypass_enable_flag ||
2504                  (s->sps->pcm.loop_filter_disable_flag && s->sps->pcm_enabled_flag)) &&
2505                 s->sps->sao_enabled)
2506                 restore_tqb_pixels(s);
2507         }
2508
2509         if (ctb_addr_ts < 0)
2510             return ctb_addr_ts;
2511         break;
2512     case NAL_EOS_NUT:
2513     case NAL_EOB_NUT:
2514         s->seq_decode = (s->seq_decode + 1) & 0xff;
2515         s->max_ra     = INT_MAX;
2516         break;
2517     case NAL_AUD:
2518     case NAL_FD_NUT:
2519         break;
2520     default:
2521         av_log(s->avctx, AV_LOG_INFO,
2522                "Skipping NAL unit %d\n", s->nal_unit_type);
2523     }
2524
2525     return 0;
2526 }
2527
2528 /* FIXME: This is adapted from ff_h264_decode_nal, avoiding duplication
2529  * between these functions would be nice. */
2530 static int extract_rbsp(const uint8_t *src, int length,
2531                         HEVCNAL *nal)
2532 {
2533     int i, si, di;
2534     uint8_t *dst;
2535
2536 #define STARTCODE_TEST                                                  \
2537         if (i + 2 < length && src[i + 1] == 0 && src[i + 2] <= 3) {     \
2538             if (src[i + 2] != 3) {                                      \
2539                 /* startcode, so we must be past the end */             \
2540                 length = i;                                             \
2541             }                                                           \
2542             break;                                                      \
2543         }
2544 #if HAVE_FAST_UNALIGNED
2545 #define FIND_FIRST_ZERO                                                 \
2546         if (i > 0 && !src[i])                                           \
2547             i--;                                                        \
2548         while (src[i])                                                  \
2549             i++
2550 #if HAVE_FAST_64BIT
2551     for (i = 0; i + 1 < length; i += 9) {
2552         if (!((~AV_RN64A(src + i) &
2553                (AV_RN64A(src + i) - 0x0100010001000101ULL)) &
2554               0x8000800080008080ULL))
2555             continue;
2556         FIND_FIRST_ZERO;
2557         STARTCODE_TEST;
2558         i -= 7;
2559     }
2560 #else
2561     for (i = 0; i + 1 < length; i += 5) {
2562         if (!((~AV_RN32A(src + i) &
2563                (AV_RN32A(src + i) - 0x01000101U)) &
2564               0x80008080U))
2565             continue;
2566         FIND_FIRST_ZERO;
2567         STARTCODE_TEST;
2568         i -= 3;
2569     }
2570 #endif /* HAVE_FAST_64BIT */
2571 #else
2572     for (i = 0; i + 1 < length; i += 2) {
2573         if (src[i])
2574             continue;
2575         if (i > 0 && src[i - 1] == 0)
2576             i--;
2577         STARTCODE_TEST;
2578     }
2579 #endif /* HAVE_FAST_UNALIGNED */
2580
2581     if (i >= length - 1) { // no escaped 0
2582         nal->data = src;
2583         nal->size = length;
2584         return length;
2585     }
2586
2587     av_fast_malloc(&nal->rbsp_buffer, &nal->rbsp_buffer_size,
2588                    length + FF_INPUT_BUFFER_PADDING_SIZE);
2589     if (!nal->rbsp_buffer)
2590         return AVERROR(ENOMEM);
2591
2592     dst = nal->rbsp_buffer;
2593
2594     memcpy(dst, src, i);
2595     si = di = i;
2596     while (si + 2 < length) {
2597         // remove escapes (very rare 1:2^22)
2598         if (src[si + 2] > 3) {
2599             dst[di++] = src[si++];
2600             dst[di++] = src[si++];
2601         } else if (src[si] == 0 && src[si + 1] == 0) {
2602             if (src[si + 2] == 3) { // escape
2603                 dst[di++] = 0;
2604                 dst[di++] = 0;
2605                 si       += 3;
2606
2607                 continue;
2608             } else // next start code
2609                 goto nsc;
2610         }
2611
2612         dst[di++] = src[si++];
2613     }
2614     while (si < length)
2615         dst[di++] = src[si++];
2616
2617 nsc:
2618     memset(dst + di, 0, FF_INPUT_BUFFER_PADDING_SIZE);
2619
2620     nal->data = dst;
2621     nal->size = di;
2622     return si;
2623 }
2624
2625 static int decode_nal_units(HEVCContext *s, const uint8_t *buf, int length)
2626 {
2627     int i, consumed, ret = 0;
2628
2629     s->ref = NULL;
2630     s->eos = 0;
2631
2632     /* split the input packet into NAL units, so we know the upper bound on the
2633      * number of slices in the frame */
2634     s->nb_nals = 0;
2635     while (length >= 4) {
2636         HEVCNAL *nal;
2637         int extract_length = 0;
2638
2639         if (s->is_nalff) {
2640             int i;
2641             for (i = 0; i < s->nal_length_size; i++)
2642                 extract_length = (extract_length << 8) | buf[i];
2643             buf    += s->nal_length_size;
2644             length -= s->nal_length_size;
2645
2646             if (extract_length > length) {
2647                 av_log(s->avctx, AV_LOG_ERROR, "Invalid NAL unit size.\n");
2648                 ret = AVERROR_INVALIDDATA;
2649                 goto fail;
2650             }
2651         } else {
2652             if (buf[2] == 0) {
2653                 length--;
2654                 buf++;
2655                 continue;
2656             }
2657             if (buf[0] != 0 || buf[1] != 0 || buf[2] != 1) {
2658                 ret = AVERROR_INVALIDDATA;
2659                 goto fail;
2660             }
2661
2662             buf           += 3;
2663             length        -= 3;
2664             extract_length = length;
2665         }
2666
2667         if (s->nals_allocated < s->nb_nals + 1) {
2668             int new_size = s->nals_allocated + 1;
2669             HEVCNAL *tmp = av_realloc_array(s->nals, new_size, sizeof(*tmp));
2670             if (!tmp) {
2671                 ret = AVERROR(ENOMEM);
2672                 goto fail;
2673             }
2674             s->nals = tmp;
2675             memset(s->nals + s->nals_allocated, 0,
2676                    (new_size - s->nals_allocated) * sizeof(*tmp));
2677             s->nals_allocated = new_size;
2678         }
2679         nal = &s->nals[s->nb_nals++];
2680
2681         consumed = extract_rbsp(buf, extract_length, nal);
2682         if (consumed < 0) {
2683             ret = consumed;
2684             goto fail;
2685         }
2686
2687         ret = init_get_bits8(&s->HEVClc.gb, nal->data, nal->size);
2688         if (ret < 0)
2689             goto fail;
2690         hls_nal_unit(s);
2691
2692         if (s->nal_unit_type == NAL_EOB_NUT ||
2693             s->nal_unit_type == NAL_EOS_NUT)
2694             s->eos = 1;
2695
2696         buf    += consumed;
2697         length -= consumed;
2698     }
2699
2700     /* parse the NAL units */
2701     for (i = 0; i < s->nb_nals; i++) {
2702         int ret = decode_nal_unit(s, s->nals[i].data, s->nals[i].size);
2703         if (ret < 0) {
2704             av_log(s->avctx, AV_LOG_WARNING,
2705                    "Error parsing NAL unit #%d.\n", i);
2706             if (s->avctx->err_recognition & AV_EF_EXPLODE)
2707                 goto fail;
2708         }
2709     }
2710
2711 fail:
2712     if (s->ref)
2713         ff_thread_report_progress(&s->ref->tf, INT_MAX, 0);
2714
2715     return ret;
2716 }
2717
2718 static void print_md5(void *log_ctx, int level, uint8_t md5[16])
2719 {
2720     int i;
2721     for (i = 0; i < 16; i++)
2722         av_log(log_ctx, level, "%02"PRIx8, md5[i]);
2723 }
2724
2725 static int verify_md5(HEVCContext *s, AVFrame *frame)
2726 {
2727     const AVPixFmtDescriptor *desc = av_pix_fmt_desc_get(frame->format);
2728     int pixel_shift = desc->comp[0].depth_minus1 > 7;
2729     int i, j;
2730
2731     if (!desc)
2732         return AVERROR(EINVAL);
2733
2734     av_log(s->avctx, AV_LOG_DEBUG, "Verifying checksum for frame with POC %d: ",
2735            s->poc);
2736
2737     /* the checksums are LE, so we have to byteswap for >8bpp formats
2738      * on BE arches */
2739 #if HAVE_BIGENDIAN
2740     if (pixel_shift && !s->checksum_buf) {
2741         av_fast_malloc(&s->checksum_buf, &s->checksum_buf_size,
2742                        FFMAX3(frame->linesize[0], frame->linesize[1],
2743                               frame->linesize[2]));
2744         if (!s->checksum_buf)
2745             return AVERROR(ENOMEM);
2746     }
2747 #endif
2748
2749     for (i = 0; frame->data[i]; i++) {
2750         int width  = s->avctx->coded_width;
2751         int height = s->avctx->coded_height;
2752         int w = (i == 1 || i == 2) ? (width  >> desc->log2_chroma_w) : width;
2753         int h = (i == 1 || i == 2) ? (height >> desc->log2_chroma_h) : height;
2754         uint8_t md5[16];
2755
2756         av_md5_init(s->md5_ctx);
2757         for (j = 0; j < h; j++) {
2758             const uint8_t *src = frame->data[i] + j * frame->linesize[i];
2759 #if HAVE_BIGENDIAN
2760             if (pixel_shift) {
2761                 s->dsp.bswap16_buf((uint16_t*)s->checksum_buf,
2762                                    (const uint16_t*)src, w);
2763                 src = s->checksum_buf;
2764             }
2765 #endif
2766             av_md5_update(s->md5_ctx, src, w << pixel_shift);
2767         }
2768         av_md5_final(s->md5_ctx, md5);
2769
2770         if (!memcmp(md5, s->md5[i], 16)) {
2771             av_log   (s->avctx, AV_LOG_DEBUG, "plane %d - correct ", i);
2772             print_md5(s->avctx, AV_LOG_DEBUG, md5);
2773             av_log   (s->avctx, AV_LOG_DEBUG, "; ");
2774         } else {
2775             av_log   (s->avctx, AV_LOG_ERROR, "mismatching checksum of plane %d - ", i);
2776             print_md5(s->avctx, AV_LOG_ERROR, md5);
2777             av_log   (s->avctx, AV_LOG_ERROR, " != ");
2778             print_md5(s->avctx, AV_LOG_ERROR, s->md5[i]);
2779             av_log   (s->avctx, AV_LOG_ERROR, "\n");
2780             return AVERROR_INVALIDDATA;
2781         }
2782     }
2783
2784     av_log(s->avctx, AV_LOG_DEBUG, "\n");
2785
2786     return 0;
2787 }
2788
2789 static int hevc_decode_frame(AVCodecContext *avctx, void *data, int *got_output,
2790                              AVPacket *avpkt)
2791 {
2792     int ret;
2793     HEVCContext *s = avctx->priv_data;
2794
2795     if (!avpkt->size) {
2796         ret = ff_hevc_output_frame(s, data, 1);
2797         if (ret < 0)
2798             return ret;
2799
2800         *got_output = ret;
2801         return 0;
2802     }
2803
2804     s->ref = NULL;
2805     ret    = decode_nal_units(s, avpkt->data, avpkt->size);
2806     if (ret < 0)
2807         return ret;
2808
2809     /* verify the SEI checksum */
2810     if (avctx->err_recognition & AV_EF_CRCCHECK && s->is_decoded &&
2811         s->is_md5) {
2812         ret = verify_md5(s, s->ref->frame);
2813         if (ret < 0 && avctx->err_recognition & AV_EF_EXPLODE) {
2814             ff_hevc_unref_frame(s, s->ref, ~0);
2815             return ret;
2816         }
2817     }
2818     s->is_md5 = 0;
2819
2820     if (s->is_decoded) {
2821         av_log(avctx, AV_LOG_DEBUG, "Decoded frame with POC %d.\n", s->poc);
2822         s->is_decoded = 0;
2823     }
2824
2825     if (s->output_frame->buf[0]) {
2826         av_frame_move_ref(data, s->output_frame);
2827         *got_output = 1;
2828     }
2829
2830     return avpkt->size;
2831 }
2832
2833 static int hevc_ref_frame(HEVCContext *s, HEVCFrame *dst, HEVCFrame *src)
2834 {
2835     int ret = ff_thread_ref_frame(&dst->tf, &src->tf);
2836     if (ret < 0)
2837         return ret;
2838
2839     dst->tab_mvf_buf = av_buffer_ref(src->tab_mvf_buf);
2840     if (!dst->tab_mvf_buf)
2841         goto fail;
2842     dst->tab_mvf = src->tab_mvf;
2843
2844     dst->rpl_tab_buf = av_buffer_ref(src->rpl_tab_buf);
2845     if (!dst->rpl_tab_buf)
2846         goto fail;
2847     dst->rpl_tab = src->rpl_tab;
2848
2849     dst->rpl_buf = av_buffer_ref(src->rpl_buf);
2850     if (!dst->rpl_buf)
2851         goto fail;
2852
2853     dst->poc        = src->poc;
2854     dst->ctb_count  = src->ctb_count;
2855     dst->window     = src->window;
2856     dst->flags      = src->flags;
2857     dst->sequence   = src->sequence;
2858
2859     return 0;
2860 fail:
2861     ff_hevc_unref_frame(s, dst, ~0);
2862     return AVERROR(ENOMEM);
2863 }
2864
2865 static av_cold int hevc_decode_free(AVCodecContext *avctx)
2866 {
2867     HEVCContext       *s = avctx->priv_data;
2868     HEVCLocalContext *lc = &s->HEVClc;
2869     int i;
2870
2871     pic_arrays_free(s);
2872
2873     av_freep(&lc->edge_emu_buffer);
2874     av_freep(&s->md5_ctx);
2875
2876     av_frame_free(&s->tmp_frame);
2877     av_frame_free(&s->output_frame);
2878
2879     for (i = 0; i < FF_ARRAY_ELEMS(s->DPB); i++) {
2880         ff_hevc_unref_frame(s, &s->DPB[i], ~0);
2881         av_frame_free(&s->DPB[i].frame);
2882     }
2883
2884     for (i = 0; i < FF_ARRAY_ELEMS(s->vps_list); i++)
2885         av_freep(&s->vps_list[i]);
2886     for (i = 0; i < FF_ARRAY_ELEMS(s->sps_list); i++)
2887         av_buffer_unref(&s->sps_list[i]);
2888     for (i = 0; i < FF_ARRAY_ELEMS(s->pps_list); i++)
2889         av_buffer_unref(&s->pps_list[i]);
2890
2891     for (i = 0; i < s->nals_allocated; i++)
2892         av_freep(&s->nals[i].rbsp_buffer);
2893     av_freep(&s->nals);
2894     s->nals_allocated = 0;
2895
2896     return 0;
2897 }
2898
2899 static av_cold int hevc_init_context(AVCodecContext *avctx)
2900 {
2901     HEVCContext *s = avctx->priv_data;
2902     int i;
2903
2904     s->avctx = avctx;
2905
2906     s->tmp_frame = av_frame_alloc();
2907     if (!s->tmp_frame)
2908         goto fail;
2909
2910     s->output_frame = av_frame_alloc();
2911     if (!s->output_frame)
2912         goto fail;
2913
2914     for (i = 0; i < FF_ARRAY_ELEMS(s->DPB); i++) {
2915         s->DPB[i].frame = av_frame_alloc();
2916         if (!s->DPB[i].frame)
2917             goto fail;
2918         s->DPB[i].tf.f = s->DPB[i].frame;
2919     }
2920
2921     s->max_ra = INT_MAX;
2922
2923     s->md5_ctx = av_md5_alloc();
2924     if (!s->md5_ctx)
2925         goto fail;
2926
2927     ff_dsputil_init(&s->dsp, avctx);
2928
2929     s->context_initialized = 1;
2930
2931     return 0;
2932
2933 fail:
2934     hevc_decode_free(avctx);
2935     return AVERROR(ENOMEM);
2936 }
2937
2938 static int hevc_update_thread_context(AVCodecContext *dst,
2939                                       const AVCodecContext *src)
2940 {
2941     HEVCContext *s  = dst->priv_data;
2942     HEVCContext *s0 = src->priv_data;
2943     int i, ret;
2944
2945     if (!s->context_initialized) {
2946         ret = hevc_init_context(dst);
2947         if (ret < 0)
2948             return ret;
2949     }
2950
2951     for (i = 0; i < FF_ARRAY_ELEMS(s->DPB); i++) {
2952         ff_hevc_unref_frame(s, &s->DPB[i], ~0);
2953         if (s0->DPB[i].frame->buf[0]) {
2954             ret = hevc_ref_frame(s, &s->DPB[i], &s0->DPB[i]);
2955             if (ret < 0)
2956                 return ret;
2957         }
2958     }
2959
2960     for (i = 0; i < FF_ARRAY_ELEMS(s->sps_list); i++) {
2961         av_buffer_unref(&s->sps_list[i]);
2962         if (s0->sps_list[i]) {
2963             s->sps_list[i] = av_buffer_ref(s0->sps_list[i]);
2964             if (!s->sps_list[i])
2965                 return AVERROR(ENOMEM);
2966         }
2967     }
2968
2969     for (i = 0; i < FF_ARRAY_ELEMS(s->pps_list); i++) {
2970         av_buffer_unref(&s->pps_list[i]);
2971         if (s0->pps_list[i]) {
2972             s->pps_list[i] = av_buffer_ref(s0->pps_list[i]);
2973             if (!s->pps_list[i])
2974                 return AVERROR(ENOMEM);
2975         }
2976     }
2977
2978     if (s->sps != s0->sps)
2979         ret = set_sps(s, s0->sps);
2980
2981     s->seq_decode = s0->seq_decode;
2982     s->seq_output = s0->seq_output;
2983     s->pocTid0    = s0->pocTid0;
2984     s->max_ra     = s0->max_ra;
2985
2986     s->is_nalff        = s0->is_nalff;
2987     s->nal_length_size = s0->nal_length_size;
2988
2989     if (s0->eos) {
2990         s->seq_decode = (s->seq_decode + 1) & 0xff;
2991         s->max_ra = INT_MAX;
2992     }
2993
2994     return 0;
2995 }
2996
2997 static int hevc_decode_extradata(HEVCContext *s)
2998 {
2999     AVCodecContext *avctx = s->avctx;
3000     GetByteContext gb;
3001     int ret;
3002
3003     bytestream2_init(&gb, avctx->extradata, avctx->extradata_size);
3004
3005     if (avctx->extradata_size > 3 &&
3006         (avctx->extradata[0] || avctx->extradata[1] ||
3007          avctx->extradata[2] > 1)) {
3008         /* It seems the extradata is encoded as hvcC format.
3009          * Temporarily, we support configurationVersion==0 until 14496-15 3rd
3010          * is finalized. When finalized, configurationVersion will be 1 and we
3011          * can recognize hvcC by checking if avctx->extradata[0]==1 or not. */
3012         int i, j, num_arrays, nal_len_size;
3013
3014         s->is_nalff = 1;
3015
3016         bytestream2_skip(&gb, 21);
3017         nal_len_size = (bytestream2_get_byte(&gb) & 3) + 1;
3018         num_arrays   = bytestream2_get_byte(&gb);
3019
3020         /* nal units in the hvcC always have length coded with 2 bytes,
3021          * so put a fake nal_length_size = 2 while parsing them */
3022         s->nal_length_size = 2;
3023
3024         /* Decode nal units from hvcC. */
3025         for (i = 0; i < num_arrays; i++) {
3026             int type = bytestream2_get_byte(&gb) & 0x3f;
3027             int cnt  = bytestream2_get_be16(&gb);
3028
3029             for (j = 0; j < cnt; j++) {
3030                 // +2 for the nal size field
3031                 int nalsize = bytestream2_peek_be16(&gb) + 2;
3032                 if (bytestream2_get_bytes_left(&gb) < nalsize) {
3033                     av_log(s->avctx, AV_LOG_ERROR,
3034                            "Invalid NAL unit size in extradata.\n");
3035                     return AVERROR_INVALIDDATA;
3036                 }
3037
3038                 ret = decode_nal_units(s, gb.buffer, nalsize);
3039                 if (ret < 0) {
3040                     av_log(avctx, AV_LOG_ERROR,
3041                            "Decoding nal unit %d %d from hvcC failed\n",
3042                            type, i);
3043                     return ret;
3044                 }
3045                 bytestream2_skip(&gb, nalsize);
3046             }
3047         }
3048
3049         /* Now store right nal length size, that will be used to parse
3050          * all other nals */
3051         s->nal_length_size = nal_len_size;
3052     } else {
3053         s->is_nalff = 0;
3054         ret = decode_nal_units(s, avctx->extradata, avctx->extradata_size);
3055         if (ret < 0)
3056             return ret;
3057     }
3058     return 0;
3059 }
3060
3061 static av_cold int hevc_decode_init(AVCodecContext *avctx)
3062 {
3063     HEVCContext *s = avctx->priv_data;
3064     int ret;
3065
3066     ff_init_cabac_states();
3067
3068     avctx->internal->allocate_progress = 1;
3069
3070     ret = hevc_init_context(avctx);
3071     if (ret < 0)
3072         return ret;
3073
3074     if (avctx->extradata_size > 0 && avctx->extradata) {
3075         ret = hevc_decode_extradata(s);
3076         if (ret < 0) {
3077             hevc_decode_free(avctx);
3078             return ret;
3079         }
3080     }
3081
3082     return 0;
3083 }
3084
3085 static av_cold int hevc_init_thread_copy(AVCodecContext *avctx)
3086 {
3087     HEVCContext *s = avctx->priv_data;
3088     int ret;
3089
3090     memset(s, 0, sizeof(*s));
3091
3092     ret = hevc_init_context(avctx);
3093     if (ret < 0)
3094         return ret;
3095
3096     return 0;
3097 }
3098
3099 static void hevc_decode_flush(AVCodecContext *avctx)
3100 {
3101     HEVCContext *s = avctx->priv_data;
3102     ff_hevc_flush_dpb(s);
3103     s->max_ra = INT_MAX;
3104 }
3105
3106 #define OFFSET(x) offsetof(HEVCContext, x)
3107 #define PAR (AV_OPT_FLAG_DECODING_PARAM | AV_OPT_FLAG_VIDEO_PARAM)
3108 static const AVOption options[] = {
3109     { "apply_defdispwin", "Apply default display window from VUI", OFFSET(apply_defdispwin),
3110         AV_OPT_TYPE_INT, {.i64 = 0}, 0, 1, PAR },
3111     { NULL },
3112 };
3113
3114 static const AVClass hevc_decoder_class = {
3115     .class_name = "HEVC decoder",
3116     .item_name  = av_default_item_name,
3117     .option     = options,
3118     .version    = LIBAVUTIL_VERSION_INT,
3119 };
3120
3121 AVCodec ff_hevc_decoder = {
3122     .name                  = "hevc",
3123     .long_name             = NULL_IF_CONFIG_SMALL("HEVC (High Efficiency Video Coding)"),
3124     .type                  = AVMEDIA_TYPE_VIDEO,
3125     .id                    = AV_CODEC_ID_HEVC,
3126     .priv_data_size        = sizeof(HEVCContext),
3127     .priv_class            = &hevc_decoder_class,
3128     .init                  = hevc_decode_init,
3129     .close                 = hevc_decode_free,
3130     .decode                = hevc_decode_frame,
3131     .flush                 = hevc_decode_flush,
3132     .update_thread_context = hevc_update_thread_context,
3133     .init_thread_copy      = hevc_init_thread_copy,
3134     .capabilities          = CODEC_CAP_DR1 | CODEC_CAP_DELAY |
3135                              CODEC_CAP_FRAME_THREADS,
3136 };