git.sesse.net Git - ffmpeg/blob - libavcodec/hevc.c

   1 /*
   2  * HEVC video decoder
   3  *
   4  * Copyright (C) 2012 - 2013 Guillaume Martres
   5  * Copyright (C) 2012 - 2013 Mickael Raulet
   6  * Copyright (C) 2012 - 2013 Gildas Cocherel
   7  * Copyright (C) 2012 - 2013 Wassim Hamidouche
   8  *
   9  * This file is part of Libav.
  10  *
  11  * Libav is free software; you can redistribute it and/or
  12  * modify it under the terms of the GNU Lesser General Public
  13  * License as published by the Free Software Foundation; either
  14  * version 2.1 of the License, or (at your option) any later version.
  15  *
  16  * Libav is distributed in the hope that it will be useful,
  17  * but WITHOUT ANY WARRANTY; without even the implied warranty of
  18  * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
  19  * Lesser General Public License for more details.
  20  *
  21  * You should have received a copy of the GNU Lesser General Public
  22  * License along with Libav; if not, write to the Free Software
  23  * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
  24  */
  25
  26 #include "libavutil/attributes.h"
  27 #include "libavutil/common.h"
  28 #include "libavutil/display.h"
  29 #include "libavutil/internal.h"
  30 #include "libavutil/md5.h"
  31 #include "libavutil/opt.h"
  32 #include "libavutil/pixdesc.h"
  33 #include "libavutil/stereo3d.h"
  34
  35 #include "bswapdsp.h"
  36 #include "bytestream.h"
  37 #include "cabac_functions.h"
  38 #include "golomb.h"
  39 #include "hevc.h"
  40
  41 const uint8_t ff_hevc_qpel_extra_before[4] = { 0, 3, 3, 2 };
  42 const uint8_t ff_hevc_qpel_extra_after[4]  = { 0, 3, 4, 4 };
  43 const uint8_t ff_hevc_qpel_extra[4]        = { 0, 6, 7, 6 };
  44
  45 static const uint8_t scan_1x1[1] = { 0 };
  46
  47 static const uint8_t horiz_scan2x2_x[4] = { 0, 1, 0, 1 };
  48
  49 static const uint8_t horiz_scan2x2_y[4] = { 0, 0, 1, 1 };
  50
  51 static const uint8_t horiz_scan4x4_x[16] = {
  52     0, 1, 2, 3,
  53     0, 1, 2, 3,
  54     0, 1, 2, 3,
  55     0, 1, 2, 3,
  56 };
  57
  58 static const uint8_t horiz_scan4x4_y[16] = {
  59     0, 0, 0, 0,
  60     1, 1, 1, 1,
  61     2, 2, 2, 2,
  62     3, 3, 3, 3,
  63 };
  64
  65 static const uint8_t horiz_scan8x8_inv[8][8] = {
  66     {  0,  1,  2,  3, 16, 17, 18, 19, },
  67     {  4,  5,  6,  7, 20, 21, 22, 23, },
  68     {  8,  9, 10, 11, 24, 25, 26, 27, },
  69     { 12, 13, 14, 15, 28, 29, 30, 31, },
  70     { 32, 33, 34, 35, 48, 49, 50, 51, },
  71     { 36, 37, 38, 39, 52, 53, 54, 55, },
  72     { 40, 41, 42, 43, 56, 57, 58, 59, },
  73     { 44, 45, 46, 47, 60, 61, 62, 63, },
  74 };
  75
  76 static const uint8_t diag_scan2x2_x[4] = { 0, 0, 1, 1 };
  77
  78 static const uint8_t diag_scan2x2_y[4] = { 0, 1, 0, 1 };
  79
  80 static const uint8_t diag_scan2x2_inv[2][2] = {
  81     { 0, 2, },
  82     { 1, 3, },
  83 };
  84
  85 const uint8_t ff_hevc_diag_scan4x4_x[16] = {
  86     0, 0, 1, 0,
  87     1, 2, 0, 1,
  88     2, 3, 1, 2,
  89     3, 2, 3, 3,
  90 };
  91
  92 const uint8_t ff_hevc_diag_scan4x4_y[16] = {
  93     0, 1, 0, 2,
  94     1, 0, 3, 2,
  95     1, 0, 3, 2,
  96     1, 3, 2, 3,
  97 };
  98
  99 static const uint8_t diag_scan4x4_inv[4][4] = {
 100     { 0,  2,  5,  9, },
 101     { 1,  4,  8, 12, },
 102     { 3,  7, 11, 14, },
 103     { 6, 10, 13, 15, },
 104 };
 105
 106 const uint8_t ff_hevc_diag_scan8x8_x[64] = {
 107     0, 0, 1, 0,
 108     1, 2, 0, 1,
 109     2, 3, 0, 1,
 110     2, 3, 4, 0,
 111     1, 2, 3, 4,
 112     5, 0, 1, 2,
 113     3, 4, 5, 6,
 114     0, 1, 2, 3,
 115     4, 5, 6, 7,
 116     1, 2, 3, 4,
 117     5, 6, 7, 2,
 118     3, 4, 5, 6,
 119     7, 3, 4, 5,
 120     6, 7, 4, 5,
 121     6, 7, 5, 6,
 122     7, 6, 7, 7,
 123 };
 124
 125 const uint8_t ff_hevc_diag_scan8x8_y[64] = {
 126     0, 1, 0, 2,
 127     1, 0, 3, 2,
 128     1, 0, 4, 3,
 129     2, 1, 0, 5,
 130     4, 3, 2, 1,
 131     0, 6, 5, 4,
 132     3, 2, 1, 0,
 133     7, 6, 5, 4,
 134     3, 2, 1, 0,
 135     7, 6, 5, 4,
 136     3, 2, 1, 7,
 137     6, 5, 4, 3,
 138     2, 7, 6, 5,
 139     4, 3, 7, 6,
 140     5, 4, 7, 6,
 141     5, 7, 6, 7,
 142 };
 143
 144 static const uint8_t diag_scan8x8_inv[8][8] = {
 145     {  0,  2,  5,  9, 14, 20, 27, 35, },
 146     {  1,  4,  8, 13, 19, 26, 34, 42, },
 147     {  3,  7, 12, 18, 25, 33, 41, 48, },
 148     {  6, 11, 17, 24, 32, 40, 47, 53, },
 149     { 10, 16, 23, 31, 39, 46, 52, 57, },
 150     { 15, 22, 30, 38, 45, 51, 56, 60, },
 151     { 21, 29, 37, 44, 50, 55, 59, 62, },
 152     { 28, 36, 43, 49, 54, 58, 61, 63, },
 153 };
 154
 155 /**
 156  * NOTE: Each function hls_foo correspond to the function foo in the
 157  * specification (HLS stands for High Level Syntax).
 158  */
 159
 160 /**
 161  * Section 5.7
 162  */
 163
 164 /* free everything allocated  by pic_arrays_init() */
 165 static void pic_arrays_free(HEVCContext *s)
 166 {
 167     av_freep(&s->sao);
 168     av_freep(&s->deblock);
 169
 170     av_freep(&s->skip_flag);
 171     av_freep(&s->tab_ct_depth);
 172
 173     av_freep(&s->tab_ipm);
 174     av_freep(&s->cbf_luma);
 175     av_freep(&s->is_pcm);
 176
 177     av_freep(&s->qp_y_tab);
 178     av_freep(&s->tab_slice_address);
 179     av_freep(&s->filter_slice_edges);
 180
 181     av_freep(&s->horizontal_bs);
 182     av_freep(&s->vertical_bs);
 183
 184     av_buffer_pool_uninit(&s->tab_mvf_pool);
 185     av_buffer_pool_uninit(&s->rpl_tab_pool);
 186 }
 187
 188 /* allocate arrays that depend on frame dimensions */
 189 static int pic_arrays_init(HEVCContext *s, const HEVCSPS *sps)
 190 {
 191     int log2_min_cb_size = sps->log2_min_cb_size;
 192     int width            = sps->width;
 193     int height           = sps->height;
 194     int pic_size_in_ctb  = ((width  >> log2_min_cb_size) + 1) *
 195                            ((height >> log2_min_cb_size) + 1);
 196     int ctb_count        = sps->ctb_width * sps->ctb_height;
 197     int min_pu_size      = sps->min_pu_width * sps->min_pu_height;
 198
 199     s->bs_width  = width  >> 3;
 200     s->bs_height = height >> 3;
 201
 202     s->sao           = av_mallocz_array(ctb_count, sizeof(*s->sao));
 203     s->deblock       = av_mallocz_array(ctb_count, sizeof(*s->deblock));
 204     if (!s->sao || !s->deblock)
 205         goto fail;
 206
 207     s->skip_flag    = av_malloc(pic_size_in_ctb);
 208     s->tab_ct_depth = av_malloc(sps->min_cb_height * sps->min_cb_width);
 209     if (!s->skip_flag || !s->tab_ct_depth)
 210         goto fail;
 211
 212     s->cbf_luma = av_malloc(sps->min_tb_width * sps->min_tb_height);
 213     s->tab_ipm  = av_mallocz(min_pu_size);
 214     s->is_pcm   = av_malloc(min_pu_size);
 215     if (!s->tab_ipm || !s->cbf_luma || !s->is_pcm)
 216         goto fail;
 217
 218     s->filter_slice_edges = av_malloc(ctb_count);
 219     s->tab_slice_address  = av_malloc(pic_size_in_ctb *
 220                                       sizeof(*s->tab_slice_address));
 221     s->qp_y_tab           = av_malloc(pic_size_in_ctb *
 222                                       sizeof(*s->qp_y_tab));
 223     if (!s->qp_y_tab || !s->filter_slice_edges || !s->tab_slice_address)
 224         goto fail;
 225
 226     s->horizontal_bs = av_mallocz(2 * s->bs_width * (s->bs_height + 1));
 227     s->vertical_bs   = av_mallocz(2 * s->bs_width * (s->bs_height + 1));
 228     if (!s->horizontal_bs || !s->vertical_bs)
 229         goto fail;
 230
 231     s->tab_mvf_pool = av_buffer_pool_init(min_pu_size * sizeof(MvField),
 232                                           av_buffer_alloc);
 233     s->rpl_tab_pool = av_buffer_pool_init(ctb_count * sizeof(RefPicListTab),
 234                                           av_buffer_allocz);
 235     if (!s->tab_mvf_pool || !s->rpl_tab_pool)
 236         goto fail;
 237
 238     return 0;
 239
 240 fail:
 241     pic_arrays_free(s);
 242     return AVERROR(ENOMEM);
 243 }
 244
 245 static void pred_weight_table(HEVCContext *s, GetBitContext *gb)
 246 {
 247     int i = 0;
 248     int j = 0;
 249     uint8_t luma_weight_l0_flag[16];
 250     uint8_t chroma_weight_l0_flag[16];
 251     uint8_t luma_weight_l1_flag[16];
 252     uint8_t chroma_weight_l1_flag[16];
 253
 254     s->sh.luma_log2_weight_denom = get_ue_golomb_long(gb);
 255     if (s->sps->chroma_format_idc != 0) {
 256         int delta = get_se_golomb(gb);
 257         s->sh.chroma_log2_weight_denom = av_clip_c(s->sh.luma_log2_weight_denom + delta, 0, 7);
 258     }
 259
 260     for (i = 0; i < s->sh.nb_refs[L0]; i++) {
 261         luma_weight_l0_flag[i] = get_bits1(gb);
 262         if (!luma_weight_l0_flag[i]) {
 263             s->sh.luma_weight_l0[i] = 1 << s->sh.luma_log2_weight_denom;
 264             s->sh.luma_offset_l0[i] = 0;
 265         }
 266     }
 267     if (s->sps->chroma_format_idc != 0) { // FIXME: invert "if" and "for"
 268         for (i = 0; i < s->sh.nb_refs[L0]; i++)
 269             chroma_weight_l0_flag[i] = get_bits1(gb);
 270     } else {
 271         for (i = 0; i < s->sh.nb_refs[L0]; i++)
 272             chroma_weight_l0_flag[i] = 0;
 273     }
 274     for (i = 0; i < s->sh.nb_refs[L0]; i++) {
 275         if (luma_weight_l0_flag[i]) {
 276             int delta_luma_weight_l0 = get_se_golomb(gb);
 277             s->sh.luma_weight_l0[i] = (1 << s->sh.luma_log2_weight_denom) + delta_luma_weight_l0;
 278             s->sh.luma_offset_l0[i] = get_se_golomb(gb);
 279         }
 280         if (chroma_weight_l0_flag[i]) {
 281             for (j = 0; j < 2; j++) {
 282                 int delta_chroma_weight_l0 = get_se_golomb(gb);
 283                 int delta_chroma_offset_l0 = get_se_golomb(gb);
 284                 s->sh.chroma_weight_l0[i][j] = (1 << s->sh.chroma_log2_weight_denom) + delta_chroma_weight_l0;
 285                 s->sh.chroma_offset_l0[i][j] = av_clip_c((delta_chroma_offset_l0 - ((128 * s->sh.chroma_weight_l0[i][j])
 286                                                                                     >> s->sh.chroma_log2_weight_denom) + 128), -128, 127);
 287             }
 288         } else {
 289             s->sh.chroma_weight_l0[i][0] = 1 << s->sh.chroma_log2_weight_denom;
 290             s->sh.chroma_offset_l0[i][0] = 0;
 291             s->sh.chroma_weight_l0[i][1] = 1 << s->sh.chroma_log2_weight_denom;
 292             s->sh.chroma_offset_l0[i][1] = 0;
 293         }
 294     }
 295     if (s->sh.slice_type == B_SLICE) {
 296         for (i = 0; i < s->sh.nb_refs[L1]; i++) {
 297             luma_weight_l1_flag[i] = get_bits1(gb);
 298             if (!luma_weight_l1_flag[i]) {
 299                 s->sh.luma_weight_l1[i] = 1 << s->sh.luma_log2_weight_denom;
 300                 s->sh.luma_offset_l1[i] = 0;
 301             }
 302         }
 303         if (s->sps->chroma_format_idc != 0) {
 304             for (i = 0; i < s->sh.nb_refs[L1]; i++)
 305                 chroma_weight_l1_flag[i] = get_bits1(gb);
 306         } else {
 307             for (i = 0; i < s->sh.nb_refs[L1]; i++)
 308                 chroma_weight_l1_flag[i] = 0;
 309         }
 310         for (i = 0; i < s->sh.nb_refs[L1]; i++) {
 311             if (luma_weight_l1_flag[i]) {
 312                 int delta_luma_weight_l1 = get_se_golomb(gb);
 313                 s->sh.luma_weight_l1[i] = (1 << s->sh.luma_log2_weight_denom) + delta_luma_weight_l1;
 314                 s->sh.luma_offset_l1[i] = get_se_golomb(gb);
 315             }
 316             if (chroma_weight_l1_flag[i]) {
 317                 for (j = 0; j < 2; j++) {
 318                     int delta_chroma_weight_l1 = get_se_golomb(gb);
 319                     int delta_chroma_offset_l1 = get_se_golomb(gb);
 320                     s->sh.chroma_weight_l1[i][j] = (1 << s->sh.chroma_log2_weight_denom) + delta_chroma_weight_l1;
 321                     s->sh.chroma_offset_l1[i][j] = av_clip_c((delta_chroma_offset_l1 - ((128 * s->sh.chroma_weight_l1[i][j])
 322                                                                                         >> s->sh.chroma_log2_weight_denom) + 128), -128, 127);
 323                 }
 324             } else {
 325                 s->sh.chroma_weight_l1[i][0] = 1 << s->sh.chroma_log2_weight_denom;
 326                 s->sh.chroma_offset_l1[i][0] = 0;
 327                 s->sh.chroma_weight_l1[i][1] = 1 << s->sh.chroma_log2_weight_denom;
 328                 s->sh.chroma_offset_l1[i][1] = 0;
 329             }
 330         }
 331     }
 332 }
 333
 334 static int decode_lt_rps(HEVCContext *s, LongTermRPS *rps, GetBitContext *gb)
 335 {
 336     const HEVCSPS *sps = s->sps;
 337     int max_poc_lsb    = 1 << sps->log2_max_poc_lsb;
 338     int prev_delta_msb = 0;
 339     unsigned int nb_sps = 0, nb_sh;
 340     int i;
 341
 342     rps->nb_refs = 0;
 343     if (!sps->long_term_ref_pics_present_flag)
 344         return 0;
 345
 346     if (sps->num_long_term_ref_pics_sps > 0)
 347         nb_sps = get_ue_golomb_long(gb);
 348     nb_sh = get_ue_golomb_long(gb);
 349
 350     if (nb_sh + nb_sps > FF_ARRAY_ELEMS(rps->poc))
 351         return AVERROR_INVALIDDATA;
 352
 353     rps->nb_refs = nb_sh + nb_sps;
 354
 355     for (i = 0; i < rps->nb_refs; i++) {
 356         uint8_t delta_poc_msb_present;
 357
 358         if (i < nb_sps) {
 359             uint8_t lt_idx_sps = 0;
 360
 361             if (sps->num_long_term_ref_pics_sps > 1)
 362                 lt_idx_sps = get_bits(gb, av_ceil_log2(sps->num_long_term_ref_pics_sps));
 363
 364             rps->poc[i]  = sps->lt_ref_pic_poc_lsb_sps[lt_idx_sps];
 365             rps->used[i] = sps->used_by_curr_pic_lt_sps_flag[lt_idx_sps];
 366         } else {
 367             rps->poc[i]  = get_bits(gb, sps->log2_max_poc_lsb);
 368             rps->used[i] = get_bits1(gb);
 369         }
 370
 371         delta_poc_msb_present = get_bits1(gb);
 372         if (delta_poc_msb_present) {
 373             int delta = get_ue_golomb_long(gb);
 374
 375             if (i && i != nb_sps)
 376                 delta += prev_delta_msb;
 377
 378             rps->poc[i] += s->poc - delta * max_poc_lsb - s->sh.pic_order_cnt_lsb;
 379             prev_delta_msb = delta;
 380         }
 381     }
 382
 383     return 0;
 384 }
 385
 386 static int set_sps(HEVCContext *s, const HEVCSPS *sps)
 387 {
 388     int ret;
 389     unsigned int num = 0, den = 0;
 390
 391     pic_arrays_free(s);
 392     ret = pic_arrays_init(s, sps);
 393     if (ret < 0)
 394         goto fail;
 395
 396     s->avctx->coded_width         = sps->width;
 397     s->avctx->coded_height        = sps->height;
 398     s->avctx->width               = sps->output_width;
 399     s->avctx->height              = sps->output_height;
 400     s->avctx->pix_fmt             = sps->pix_fmt;
 401     s->avctx->has_b_frames        = sps->temporal_layer[sps->max_sub_layers - 1].num_reorder_pics;
 402
 403     ff_set_sar(s->avctx, sps->vui.sar);
 404
 405     if (sps->vui.video_signal_type_present_flag)
 406         s->avctx->color_range = sps->vui.video_full_range_flag ? AVCOL_RANGE_JPEG
 407                                                                : AVCOL_RANGE_MPEG;
 408     else
 409         s->avctx->color_range = AVCOL_RANGE_MPEG;
 410
 411     if (sps->vui.colour_description_present_flag) {
 412         s->avctx->color_primaries = sps->vui.colour_primaries;
 413         s->avctx->color_trc       = sps->vui.transfer_characteristic;
 414         s->avctx->colorspace      = sps->vui.matrix_coeffs;
 415     } else {
 416         s->avctx->color_primaries = AVCOL_PRI_UNSPECIFIED;
 417         s->avctx->color_trc       = AVCOL_TRC_UNSPECIFIED;
 418         s->avctx->colorspace      = AVCOL_SPC_UNSPECIFIED;
 419     }
 420
 421     ff_hevc_pred_init(&s->hpc,     sps->bit_depth);
 422     ff_hevc_dsp_init (&s->hevcdsp, sps->bit_depth);
 423     ff_videodsp_init (&s->vdsp,    sps->bit_depth);
 424
 425     if (sps->sao_enabled) {
 426         av_frame_unref(s->tmp_frame);
 427         ret = ff_get_buffer(s->avctx, s->tmp_frame, AV_GET_BUFFER_FLAG_REF);
 428         if (ret < 0)
 429             goto fail;
 430         s->frame = s->tmp_frame;
 431     }
 432
 433     s->sps = sps;
 434     s->vps = (HEVCVPS*) s->vps_list[s->sps->vps_id]->data;
 435
 436     if (s->vps->vps_timing_info_present_flag) {
 437         num = s->vps->vps_num_units_in_tick;
 438         den = s->vps->vps_time_scale;
 439     } else if (sps->vui.vui_timing_info_present_flag) {
 440         num = sps->vui.vui_num_units_in_tick;
 441         den = sps->vui.vui_time_scale;
 442     }
 443
 444     if (num != 0 && den != 0)
 445         av_reduce(&s->avctx->framerate.den, &s->avctx->framerate.num,
 446                   num, den, 1 << 30);
 447
 448     return 0;
 449
 450 fail:
 451     pic_arrays_free(s);
 452     s->sps = NULL;
 453     return ret;
 454 }
 455
 456 static int hls_slice_header(HEVCContext *s)
 457 {
 458     GetBitContext *gb = &s->HEVClc.gb;
 459     SliceHeader *sh   = &s->sh;
 460     int i, ret;
 461
 462     // Coded parameters
 463     sh->first_slice_in_pic_flag = get_bits1(gb);
 464     if ((IS_IDR(s) || IS_BLA(s)) && sh->first_slice_in_pic_flag) {
 465         s->seq_decode = (s->seq_decode + 1) & 0xff;
 466         s->max_ra     = INT_MAX;
 467         if (IS_IDR(s))
 468             ff_hevc_clear_refs(s);
 469     }
 470     if (IS_IRAP(s))
 471         sh->no_output_of_prior_pics_flag = get_bits1(gb);
 472
 473     sh->pps_id = get_ue_golomb_long(gb);
 474     if (sh->pps_id >= MAX_PPS_COUNT || !s->pps_list[sh->pps_id]) {
 475         av_log(s->avctx, AV_LOG_ERROR, "PPS id out of range: %d\n", sh->pps_id);
 476         return AVERROR_INVALIDDATA;
 477     }
 478     if (!sh->first_slice_in_pic_flag &&
 479         s->pps != (HEVCPPS*)s->pps_list[sh->pps_id]->data) {
 480         av_log(s->avctx, AV_LOG_ERROR, "PPS changed between slices.\n");
 481         return AVERROR_INVALIDDATA;
 482     }
 483     s->pps = (HEVCPPS*)s->pps_list[sh->pps_id]->data;
 484
 485     if (s->sps != (HEVCSPS*)s->sps_list[s->pps->sps_id]->data) {
 486         s->sps = (HEVCSPS*)s->sps_list[s->pps->sps_id]->data;
 487
 488         ff_hevc_clear_refs(s);
 489         ret = set_sps(s, s->sps);
 490         if (ret < 0)
 491             return ret;
 492
 493         s->seq_decode = (s->seq_decode + 1) & 0xff;
 494         s->max_ra     = INT_MAX;
 495     }
 496
 497     s->avctx->profile = s->sps->ptl.general_ptl.profile_idc;
 498     s->avctx->level   = s->sps->ptl.general_ptl.level_idc;
 499
 500     sh->dependent_slice_segment_flag = 0;
 501     if (!sh->first_slice_in_pic_flag) {
 502         int slice_address_length;
 503
 504         if (s->pps->dependent_slice_segments_enabled_flag)
 505             sh->dependent_slice_segment_flag = get_bits1(gb);
 506
 507         slice_address_length = av_ceil_log2(s->sps->ctb_width *
 508                                             s->sps->ctb_height);
 509         sh->slice_segment_addr = get_bits(gb, slice_address_length);
 510         if (sh->slice_segment_addr >= s->sps->ctb_width * s->sps->ctb_height) {
 511             av_log(s->avctx, AV_LOG_ERROR,
 512                    "Invalid slice segment address: %u.\n",
 513                    sh->slice_segment_addr);
 514             return AVERROR_INVALIDDATA;
 515         }
 516
 517         if (!sh->dependent_slice_segment_flag) {
 518             sh->slice_addr = sh->slice_segment_addr;
 519             s->slice_idx++;
 520         }
 521     } else {
 522         sh->slice_segment_addr = sh->slice_addr = 0;
 523         s->slice_idx           = 0;
 524         s->slice_initialized   = 0;
 525     }
 526
 527     if (!sh->dependent_slice_segment_flag) {
 528         s->slice_initialized = 0;
 529
 530         for (i = 0; i < s->pps->num_extra_slice_header_bits; i++)
 531             skip_bits(gb, 1);  // slice_reserved_undetermined_flag[]
 532
 533         sh->slice_type = get_ue_golomb_long(gb);
 534         if (!(sh->slice_type == I_SLICE ||
 535               sh->slice_type == P_SLICE ||
 536               sh->slice_type == B_SLICE)) {
 537             av_log(s->avctx, AV_LOG_ERROR, "Unknown slice type: %d.\n",
 538                    sh->slice_type);
 539             return AVERROR_INVALIDDATA;
 540         }
 541         if (IS_IRAP(s) && sh->slice_type != I_SLICE) {
 542             av_log(s->avctx, AV_LOG_ERROR, "Inter slices in an IRAP frame.\n");
 543             return AVERROR_INVALIDDATA;
 544         }
 545
 546         // when flag is not present, picture is inferred to be output
 547         sh->pic_output_flag = 1;
 548         if (s->pps->output_flag_present_flag)
 549             sh->pic_output_flag = get_bits1(gb);
 550
 551         if (s->sps->separate_colour_plane_flag)
 552             sh->colour_plane_id = get_bits(gb, 2);
 553
 554         if (!IS_IDR(s)) {
 555             int short_term_ref_pic_set_sps_flag, poc;
 556
 557             sh->pic_order_cnt_lsb = get_bits(gb, s->sps->log2_max_poc_lsb);
 558             poc = ff_hevc_compute_poc(s, sh->pic_order_cnt_lsb);
 559             if (!sh->first_slice_in_pic_flag && poc != s->poc) {
 560                 av_log(s->avctx, AV_LOG_WARNING,
 561                        "Ignoring POC change between slices: %d -> %d\n", s->poc, poc);
 562                 if (s->avctx->err_recognition & AV_EF_EXPLODE)
 563                     return AVERROR_INVALIDDATA;
 564                 poc = s->poc;
 565             }
 566             s->poc = poc;
 567
 568             short_term_ref_pic_set_sps_flag = get_bits1(gb);
 569             if (!short_term_ref_pic_set_sps_flag) {
 570                 ret = ff_hevc_decode_short_term_rps(s, &sh->slice_rps, s->sps, 1);
 571                 if (ret < 0)
 572                     return ret;
 573
 574                 sh->short_term_rps = &sh->slice_rps;
 575             } else {
 576                 int numbits, rps_idx;
 577
 578                 if (!s->sps->nb_st_rps) {
 579                     av_log(s->avctx, AV_LOG_ERROR, "No ref lists in the SPS.\n");
 580                     return AVERROR_INVALIDDATA;
 581                 }
 582
 583                 numbits = av_ceil_log2(s->sps->nb_st_rps);
 584                 rps_idx = numbits > 0 ? get_bits(gb, numbits) : 0;
 585                 sh->short_term_rps = &s->sps->st_rps[rps_idx];
 586             }
 587
 588             ret = decode_lt_rps(s, &sh->long_term_rps, gb);
 589             if (ret < 0) {
 590                 av_log(s->avctx, AV_LOG_WARNING, "Invalid long term RPS.\n");
 591                 if (s->avctx->err_recognition & AV_EF_EXPLODE)
 592                     return AVERROR_INVALIDDATA;
 593             }
 594
 595             if (s->sps->sps_temporal_mvp_enabled_flag)
 596                 sh->slice_temporal_mvp_enabled_flag = get_bits1(gb);
 597             else
 598                 sh->slice_temporal_mvp_enabled_flag = 0;
 599         } else {
 600             s->sh.short_term_rps = NULL;
 601             s->poc               = 0;
 602         }
 603
 604         /* 8.3.1 */
 605         if (s->temporal_id == 0 &&
 606             s->nal_unit_type != NAL_TRAIL_N &&
 607             s->nal_unit_type != NAL_TSA_N   &&
 608             s->nal_unit_type != NAL_STSA_N  &&
 609             s->nal_unit_type != NAL_RADL_N  &&
 610             s->nal_unit_type != NAL_RADL_R  &&
 611             s->nal_unit_type != NAL_RASL_N  &&
 612             s->nal_unit_type != NAL_RASL_R)
 613             s->pocTid0 = s->poc;
 614
 615         if (s->sps->sao_enabled) {
 616             sh->slice_sample_adaptive_offset_flag[0] = get_bits1(gb);
 617             sh->slice_sample_adaptive_offset_flag[1] =
 618             sh->slice_sample_adaptive_offset_flag[2] = get_bits1(gb);
 619         } else {
 620             sh->slice_sample_adaptive_offset_flag[0] = 0;
 621             sh->slice_sample_adaptive_offset_flag[1] = 0;
 622             sh->slice_sample_adaptive_offset_flag[2] = 0;
 623         }
 624
 625         sh->nb_refs[L0] = sh->nb_refs[L1] = 0;
 626         if (sh->slice_type == P_SLICE || sh->slice_type == B_SLICE) {
 627             int nb_refs;
 628
 629             sh->nb_refs[L0] = s->pps->num_ref_idx_l0_default_active;
 630             if (sh->slice_type == B_SLICE)
 631                 sh->nb_refs[L1] = s->pps->num_ref_idx_l1_default_active;
 632
 633             if (get_bits1(gb)) { // num_ref_idx_active_override_flag
 634                 sh->nb_refs[L0] = get_ue_golomb_long(gb) + 1;
 635                 if (sh->slice_type == B_SLICE)
 636                     sh->nb_refs[L1] = get_ue_golomb_long(gb) + 1;
 637             }
 638             if (sh->nb_refs[L0] > MAX_REFS || sh->nb_refs[L1] > MAX_REFS) {
 639                 av_log(s->avctx, AV_LOG_ERROR, "Too many refs: %d/%d.\n",
 640                        sh->nb_refs[L0], sh->nb_refs[L1]);
 641                 return AVERROR_INVALIDDATA;
 642             }
 643
 644             sh->rpl_modification_flag[0] = 0;
 645             sh->rpl_modification_flag[1] = 0;
 646             nb_refs = ff_hevc_frame_nb_refs(s);
 647             if (!nb_refs) {
 648                 av_log(s->avctx, AV_LOG_ERROR, "Zero refs for a frame with P or B slices.\n");
 649                 return AVERROR_INVALIDDATA;
 650             }
 651
 652             if (s->pps->lists_modification_present_flag && nb_refs > 1) {
 653                 sh->rpl_modification_flag[0] = get_bits1(gb);
 654                 if (sh->rpl_modification_flag[0]) {
 655                     for (i = 0; i < sh->nb_refs[L0]; i++)
 656                         sh->list_entry_lx[0][i] = get_bits(gb, av_ceil_log2(nb_refs));
 657                 }
 658
 659                 if (sh->slice_type == B_SLICE) {
 660                     sh->rpl_modification_flag[1] = get_bits1(gb);
 661                     if (sh->rpl_modification_flag[1] == 1)
 662                         for (i = 0; i < sh->nb_refs[L1]; i++)
 663                             sh->list_entry_lx[1][i] = get_bits(gb, av_ceil_log2(nb_refs));
 664                 }
 665             }
 666
 667             if (sh->slice_type == B_SLICE)
 668                 sh->mvd_l1_zero_flag = get_bits1(gb);
 669
 670             if (s->pps->cabac_init_present_flag)
 671                 sh->cabac_init_flag = get_bits1(gb);
 672             else
 673                 sh->cabac_init_flag = 0;
 674
 675             sh->collocated_ref_idx = 0;
 676             if (sh->slice_temporal_mvp_enabled_flag) {
 677                 sh->collocated_list = L0;
 678                 if (sh->slice_type == B_SLICE)
 679                     sh->collocated_list = !get_bits1(gb);
 680
 681                 if (sh->nb_refs[sh->collocated_list] > 1) {
 682                     sh->collocated_ref_idx = get_ue_golomb_long(gb);
 683                     if (sh->collocated_ref_idx >= sh->nb_refs[sh->collocated_list]) {
 684                         av_log(s->avctx, AV_LOG_ERROR,
 685                                "Invalid collocated_ref_idx: %d.\n",
 686                                sh->collocated_ref_idx);
 687                         return AVERROR_INVALIDDATA;
 688                     }
 689                 }
 690             }
 691
 692             if ((s->pps->weighted_pred_flag   && sh->slice_type == P_SLICE) ||
 693                 (s->pps->weighted_bipred_flag && sh->slice_type == B_SLICE)) {
 694                 pred_weight_table(s, gb);
 695             }
 696
 697             sh->max_num_merge_cand = 5 - get_ue_golomb_long(gb);
 698             if (sh->max_num_merge_cand < 1 || sh->max_num_merge_cand > 5) {
 699                 av_log(s->avctx, AV_LOG_ERROR,
 700                        "Invalid number of merging MVP candidates: %d.\n",
 701                        sh->max_num_merge_cand);
 702                 return AVERROR_INVALIDDATA;
 703             }
 704         }
 705
 706         sh->slice_qp_delta = get_se_golomb(gb);
 707
 708         if (s->pps->pic_slice_level_chroma_qp_offsets_present_flag) {
 709             sh->slice_cb_qp_offset = get_se_golomb(gb);
 710             sh->slice_cr_qp_offset = get_se_golomb(gb);
 711         } else {
 712             sh->slice_cb_qp_offset = 0;
 713             sh->slice_cr_qp_offset = 0;
 714         }
 715
 716         if (s->pps->deblocking_filter_control_present_flag) {
 717             int deblocking_filter_override_flag = 0;
 718
 719             if (s->pps->deblocking_filter_override_enabled_flag)
 720                 deblocking_filter_override_flag = get_bits1(gb);
 721
 722             if (deblocking_filter_override_flag) {
 723                 sh->disable_deblocking_filter_flag = get_bits1(gb);
 724                 if (!sh->disable_deblocking_filter_flag) {
 725                     sh->beta_offset = get_se_golomb(gb) * 2;
 726                     sh->tc_offset   = get_se_golomb(gb) * 2;
 727                 }
 728             } else {
 729                 sh->disable_deblocking_filter_flag = s->pps->disable_dbf;
 730                 sh->beta_offset                    = s->pps->beta_offset;
 731                 sh->tc_offset                      = s->pps->tc_offset;
 732             }
 733         } else {
 734             sh->disable_deblocking_filter_flag = 0;
 735             sh->beta_offset                    = 0;
 736             sh->tc_offset                      = 0;
 737         }
 738
 739         if (s->pps->seq_loop_filter_across_slices_enabled_flag &&
 740             (sh->slice_sample_adaptive_offset_flag[0] ||
 741              sh->slice_sample_adaptive_offset_flag[1] ||
 742              !sh->disable_deblocking_filter_flag)) {
 743             sh->slice_loop_filter_across_slices_enabled_flag = get_bits1(gb);
 744         } else {
 745             sh->slice_loop_filter_across_slices_enabled_flag = s->pps->seq_loop_filter_across_slices_enabled_flag;
 746         }
 747     } else if (!s->slice_initialized) {
 748         av_log(s->avctx, AV_LOG_ERROR, "Independent slice segment missing.\n");
 749         return AVERROR_INVALIDDATA;
 750     }
 751
 752     sh->num_entry_point_offsets = 0;
 753     if (s->pps->tiles_enabled_flag || s->pps->entropy_coding_sync_enabled_flag) {
 754         sh->num_entry_point_offsets = get_ue_golomb_long(gb);
 755         if (sh->num_entry_point_offsets > 0) {
 756             int offset_len = get_ue_golomb_long(gb) + 1;
 757
 758             for (i = 0; i < sh->num_entry_point_offsets; i++)
 759                 skip_bits(gb, offset_len);
 760         }
 761     }
 762
 763     if (s->pps->slice_header_extension_present_flag) {
 764         unsigned int length = get_ue_golomb_long(gb);
 765         for (i = 0; i < length; i++)
 766             skip_bits(gb, 8);  // slice_header_extension_data_byte
 767     }
 768
 769     // Inferred parameters
 770     sh->slice_qp = 26 + s->pps->pic_init_qp_minus26 + sh->slice_qp_delta;
 771     if (sh->slice_qp > 51 ||
 772         sh->slice_qp < -s->sps->qp_bd_offset) {
 773         av_log(s->avctx, AV_LOG_ERROR,
 774                "The slice_qp %d is outside the valid range "
 775                "[%d, 51].\n",
 776                sh->slice_qp,
 777                -s->sps->qp_bd_offset);
 778         return AVERROR_INVALIDDATA;
 779     }
 780
 781     sh->slice_ctb_addr_rs = sh->slice_segment_addr;
 782
 783     if (!s->sh.slice_ctb_addr_rs && s->sh.dependent_slice_segment_flag) {
 784         av_log(s->avctx, AV_LOG_ERROR, "Impossible slice segment.\n");
 785         return AVERROR_INVALIDDATA;
 786     }
 787
 788     s->HEVClc.first_qp_group = !s->sh.dependent_slice_segment_flag;
 789
 790     if (!s->pps->cu_qp_delta_enabled_flag)
 791         s->HEVClc.qp_y = FFUMOD(s->sh.slice_qp + 52 + 2 * s->sps->qp_bd_offset,
 792                                 52 + s->sps->qp_bd_offset) - s->sps->qp_bd_offset;
 793
 794     s->slice_initialized = 1;
 795
 796     return 0;
 797 }
 798
 799 #define CTB(tab, x, y) ((tab)[(y) * s->sps->ctb_width + (x)])
 800
 801 #define SET_SAO(elem, value)                            \
 802 do {                                                    \
 803     if (!sao_merge_up_flag && !sao_merge_left_flag)     \
 804         sao->elem = value;                              \
 805     else if (sao_merge_left_flag)                       \
 806         sao->elem = CTB(s->sao, rx-1, ry).elem;         \
 807     else if (sao_merge_up_flag)                         \
 808         sao->elem = CTB(s->sao, rx, ry-1).elem;         \
 809     else                                                \
 810         sao->elem = 0;                                  \
 811 } while (0)
 812
 813 static void hls_sao_param(HEVCContext *s, int rx, int ry)
 814 {
 815     HEVCLocalContext *lc    = &s->HEVClc;
 816     int sao_merge_left_flag = 0;
 817     int sao_merge_up_flag   = 0;
 818     int shift               = s->sps->bit_depth - FFMIN(s->sps->bit_depth, 10);
 819     SAOParams *sao          = &CTB(s->sao, rx, ry);
 820     int c_idx, i;
 821
 822     if (s->sh.slice_sample_adaptive_offset_flag[0] ||
 823         s->sh.slice_sample_adaptive_offset_flag[1]) {
 824         if (rx > 0) {
 825             if (lc->ctb_left_flag)
 826                 sao_merge_left_flag = ff_hevc_sao_merge_flag_decode(s);
 827         }
 828         if (ry > 0 && !sao_merge_left_flag) {
 829             if (lc->ctb_up_flag)
 830                 sao_merge_up_flag = ff_hevc_sao_merge_flag_decode(s);
 831         }
 832     }
 833
 834     for (c_idx = 0; c_idx < 3; c_idx++) {
 835         if (!s->sh.slice_sample_adaptive_offset_flag[c_idx]) {
 836             sao->type_idx[c_idx] = SAO_NOT_APPLIED;
 837             continue;
 838         }
 839
 840         if (c_idx == 2) {
 841             sao->type_idx[2] = sao->type_idx[1];
 842             sao->eo_class[2] = sao->eo_class[1];
 843         } else {
 844             SET_SAO(type_idx[c_idx], ff_hevc_sao_type_idx_decode(s));
 845         }
 846
 847         if (sao->type_idx[c_idx] == SAO_NOT_APPLIED)
 848             continue;
 849
 850         for (i = 0; i < 4; i++)
 851             SET_SAO(offset_abs[c_idx][i], ff_hevc_sao_offset_abs_decode(s));
 852
 853         if (sao->type_idx[c_idx] == SAO_BAND) {
 854             for (i = 0; i < 4; i++) {
 855                 if (sao->offset_abs[c_idx][i]) {
 856                     SET_SAO(offset_sign[c_idx][i],
 857                             ff_hevc_sao_offset_sign_decode(s));
 858                 } else {
 859                     sao->offset_sign[c_idx][i] = 0;
 860                 }
 861             }
 862             SET_SAO(band_position[c_idx], ff_hevc_sao_band_position_decode(s));
 863         } else if (c_idx != 2) {
 864             SET_SAO(eo_class[c_idx], ff_hevc_sao_eo_class_decode(s));
 865         }
 866
 867         // Inferred parameters
 868         sao->offset_val[c_idx][0] = 0;
 869         for (i = 0; i < 4; i++) {
 870             sao->offset_val[c_idx][i + 1] = sao->offset_abs[c_idx][i] << shift;
 871             if (sao->type_idx[c_idx] == SAO_EDGE) {
 872                 if (i > 1)
 873                     sao->offset_val[c_idx][i + 1] = -sao->offset_val[c_idx][i + 1];
 874             } else if (sao->offset_sign[c_idx][i]) {
 875                 sao->offset_val[c_idx][i + 1] = -sao->offset_val[c_idx][i + 1];
 876             }
 877         }
 878     }
 879 }
 880
 881 #undef SET_SAO
 882 #undef CTB
 883
 884 static void hls_residual_coding(HEVCContext *s, int x0, int y0,
 885                                 int log2_trafo_size, enum ScanType scan_idx,
 886                                 int c_idx)
 887 {
 888 #define GET_COORD(offset, n)                                    \
 889     do {                                                        \
 890         x_c = (scan_x_cg[offset >> 4] << 2) + scan_x_off[n];    \
 891         y_c = (scan_y_cg[offset >> 4] << 2) + scan_y_off[n];    \
 892     } while (0)
 893     HEVCLocalContext *lc    = &s->HEVClc;
 894     int transform_skip_flag = 0;
 895
 896     int last_significant_coeff_x, last_significant_coeff_y;
 897     int last_scan_pos;
 898     int n_end;
 899     int num_coeff    = 0;
 900     int greater1_ctx = 1;
 901
 902     int num_last_subset;
 903     int x_cg_last_sig, y_cg_last_sig;
 904
 905     const uint8_t *scan_x_cg, *scan_y_cg, *scan_x_off, *scan_y_off;
 906
 907     ptrdiff_t stride = s->frame->linesize[c_idx];
 908     int hshift       = s->sps->hshift[c_idx];
 909     int vshift       = s->sps->vshift[c_idx];
 910     uint8_t *dst     = &s->frame->data[c_idx][(y0 >> vshift) * stride +
 911                                               ((x0 >> hshift) << s->sps->pixel_shift)];
 912     DECLARE_ALIGNED(16, int16_t, coeffs[MAX_TB_SIZE * MAX_TB_SIZE]) = { 0 };
 913     DECLARE_ALIGNED(8, uint8_t, significant_coeff_group_flag[8][8]) = { { 0 } };
 914
 915     int trafo_size = 1 << log2_trafo_size;
 916     int i, qp, shift, add, scale, scale_m;
 917     const uint8_t level_scale[] = { 40, 45, 51, 57, 64, 72 };
 918     const uint8_t *scale_matrix;
 919     uint8_t dc_scale;
 920
 921     // Derive QP for dequant
 922     if (!lc->cu.cu_transquant_bypass_flag) {
 923         static const int qp_c[] = {
 924             29, 30, 31, 32, 33, 33, 34, 34, 35, 35, 36, 36, 37, 37
 925         };
 926
 927         static const uint8_t rem6[51 + 2 * 6 + 1] = {
 928             0, 1, 2, 3, 4, 5, 0, 1, 2, 3, 4, 5, 0, 1, 2, 3, 4, 5, 0, 1, 2,
 929             3, 4, 5, 0, 1, 2, 3, 4, 5, 0, 1, 2, 3, 4, 5, 0, 1, 2, 3, 4, 5,
 930             0, 1, 2, 3, 4, 5, 0, 1, 2, 3, 4, 5, 0, 1, 2, 3, 4, 5, 0, 1, 2, 3,
 931         };
 932
 933         static const uint8_t div6[51 + 2 * 6 + 1] = {
 934             0, 0, 0, 0, 0, 0, 1, 1, 1, 1, 1, 1, 2, 2, 2, 2, 2, 2,  3,  3,  3,
 935             3, 3, 3, 4, 4, 4, 4, 4, 4, 5, 5, 5, 5, 5, 5, 6, 6, 6,  6,  6,  6,
 936             7, 7, 7, 7, 7, 7, 8, 8, 8, 8, 8, 8, 9, 9, 9, 9, 9, 9, 10, 10, 10, 10,
 937         };
 938         int qp_y = lc->qp_y;
 939
 940         if (c_idx == 0) {
 941             qp = qp_y + s->sps->qp_bd_offset;
 942         } else {
 943             int qp_i, offset;
 944
 945             if (c_idx == 1)
 946                 offset = s->pps->cb_qp_offset + s->sh.slice_cb_qp_offset;
 947             else
 948                 offset = s->pps->cr_qp_offset + s->sh.slice_cr_qp_offset;
 949
 950             qp_i = av_clip_c(qp_y + offset, -s->sps->qp_bd_offset, 57);
 951             if (qp_i < 30)
 952                 qp = qp_i;
 953             else if (qp_i > 43)
 954                 qp = qp_i - 6;
 955             else
 956                 qp = qp_c[qp_i - 30];
 957
 958             qp += s->sps->qp_bd_offset;
 959         }
 960
 961         shift    = s->sps->bit_depth + log2_trafo_size - 5;
 962         add      = 1 << (shift - 1);
 963         scale    = level_scale[rem6[qp]] << (div6[qp]);
 964         scale_m  = 16; // default when no custom scaling lists.
 965         dc_scale = 16;
 966
 967         if (s->sps->scaling_list_enable_flag) {
 968             const ScalingList *sl = s->pps->scaling_list_data_present_flag ?
 969                                     &s->pps->scaling_list : &s->sps->scaling_list;
 970             int matrix_id = lc->cu.pred_mode != MODE_INTRA;
 971
 972             if (log2_trafo_size != 5)
 973                 matrix_id = 3 * matrix_id + c_idx;
 974
 975             scale_matrix = sl->sl[log2_trafo_size - 2][matrix_id];
 976             if (log2_trafo_size >= 4)
 977                 dc_scale = sl->sl_dc[log2_trafo_size - 4][matrix_id];
 978         }
 979     }
 980
 981     if (s->pps->transform_skip_enabled_flag &&
 982         !lc->cu.cu_transquant_bypass_flag   &&
 983         log2_trafo_size == 2) {
 984         transform_skip_flag = ff_hevc_transform_skip_flag_decode(s, c_idx);
 985     }
 986
 987     last_significant_coeff_x =
 988         ff_hevc_last_significant_coeff_x_prefix_decode(s, c_idx, log2_trafo_size);
 989     last_significant_coeff_y =
 990         ff_hevc_last_significant_coeff_y_prefix_decode(s, c_idx, log2_trafo_size);
 991
 992     if (last_significant_coeff_x > 3) {
 993         int suffix = ff_hevc_last_significant_coeff_suffix_decode(s, last_significant_coeff_x);
 994         last_significant_coeff_x = (1 << ((last_significant_coeff_x >> 1) - 1)) *
 995                                    (2 + (last_significant_coeff_x & 1)) +
 996                                    suffix;
 997     }
 998
 999     if (last_significant_coeff_y > 3) {
1000         int suffix = ff_hevc_last_significant_coeff_suffix_decode(s, last_significant_coeff_y);
1001         last_significant_coeff_y = (1 << ((last_significant_coeff_y >> 1) - 1)) *
1002                                    (2 + (last_significant_coeff_y & 1)) +
1003                                    suffix;
1004     }
1005
1006     if (scan_idx == SCAN_VERT)
1007         FFSWAP(int, last_significant_coeff_x, last_significant_coeff_y);
1008
1009     x_cg_last_sig = last_significant_coeff_x >> 2;
1010     y_cg_last_sig = last_significant_coeff_y >> 2;
1011
1012     switch (scan_idx) {
1013     case SCAN_DIAG: {
1014         int last_x_c = last_significant_coeff_x & 3;
1015         int last_y_c = last_significant_coeff_y & 3;
1016
1017         scan_x_off = ff_hevc_diag_scan4x4_x;
1018         scan_y_off = ff_hevc_diag_scan4x4_y;
1019         num_coeff  = diag_scan4x4_inv[last_y_c][last_x_c];
1020         if (trafo_size == 4) {
1021             scan_x_cg = scan_1x1;
1022             scan_y_cg = scan_1x1;
1023         } else if (trafo_size == 8) {
1024             num_coeff += diag_scan2x2_inv[y_cg_last_sig][x_cg_last_sig] << 4;
1025             scan_x_cg  = diag_scan2x2_x;
1026             scan_y_cg  = diag_scan2x2_y;
1027         } else if (trafo_size == 16) {
1028             num_coeff += diag_scan4x4_inv[y_cg_last_sig][x_cg_last_sig] << 4;
1029             scan_x_cg  = ff_hevc_diag_scan4x4_x;
1030             scan_y_cg  = ff_hevc_diag_scan4x4_y;
1031         } else { // trafo_size == 32
1032             num_coeff += diag_scan8x8_inv[y_cg_last_sig][x_cg_last_sig] << 4;
1033             scan_x_cg  = ff_hevc_diag_scan8x8_x;
1034             scan_y_cg  = ff_hevc_diag_scan8x8_y;
1035         }
1036         break;
1037     }
1038     case SCAN_HORIZ:
1039         scan_x_cg  = horiz_scan2x2_x;
1040         scan_y_cg  = horiz_scan2x2_y;
1041         scan_x_off = horiz_scan4x4_x;
1042         scan_y_off = horiz_scan4x4_y;
1043         num_coeff  = horiz_scan8x8_inv[last_significant_coeff_y][last_significant_coeff_x];
1044         break;
1045     default: //SCAN_VERT
1046         scan_x_cg  = horiz_scan2x2_y;
1047         scan_y_cg  = horiz_scan2x2_x;
1048         scan_x_off = horiz_scan4x4_y;
1049         scan_y_off = horiz_scan4x4_x;
1050         num_coeff  = horiz_scan8x8_inv[last_significant_coeff_x][last_significant_coeff_y];
1051         break;
1052     }
1053     num_coeff++;
1054     num_last_subset = (num_coeff - 1) >> 4;
1055
1056     for (i = num_last_subset; i >= 0; i--) {
1057         int n, m;
1058         int x_cg, y_cg, x_c, y_c;
1059         int implicit_non_zero_coeff = 0;
1060         int64_t trans_coeff_level;
1061         int prev_sig = 0;
1062         int offset   = i << 4;
1063
1064         uint8_t significant_coeff_flag_idx[16];
1065         uint8_t nb_significant_coeff_flag = 0;
1066
1067         x_cg = scan_x_cg[i];
1068         y_cg = scan_y_cg[i];
1069
1070         if (i < num_last_subset && i > 0) {
1071             int ctx_cg = 0;
1072             if (x_cg < (1 << (log2_trafo_size - 2)) - 1)
1073                 ctx_cg += significant_coeff_group_flag[x_cg + 1][y_cg];
1074             if (y_cg < (1 << (log2_trafo_size - 2)) - 1)
1075                 ctx_cg += significant_coeff_group_flag[x_cg][y_cg + 1];
1076
1077             significant_coeff_group_flag[x_cg][y_cg] =
1078                 ff_hevc_significant_coeff_group_flag_decode(s, c_idx, ctx_cg);
1079             implicit_non_zero_coeff = 1;
1080         } else {
1081             significant_coeff_group_flag[x_cg][y_cg] =
1082                 ((x_cg == x_cg_last_sig && y_cg == y_cg_last_sig) ||
1083                  (x_cg == 0 && y_cg == 0));
1084         }
1085
1086         last_scan_pos = num_coeff - offset - 1;
1087
1088         if (i == num_last_subset) {
1089             n_end                         = last_scan_pos - 1;
1090             significant_coeff_flag_idx[0] = last_scan_pos;
1091             nb_significant_coeff_flag     = 1;
1092         } else {
1093             n_end = 15;
1094         }
1095
1096         if (x_cg < ((1 << log2_trafo_size) - 1) >> 2)
1097             prev_sig = significant_coeff_group_flag[x_cg + 1][y_cg];
1098         if (y_cg < ((1 << log2_trafo_size) - 1) >> 2)
1099             prev_sig += significant_coeff_group_flag[x_cg][y_cg + 1] << 1;
1100
1101         for (n = n_end; n >= 0; n--) {
1102             GET_COORD(offset, n);
1103
1104             if (significant_coeff_group_flag[x_cg][y_cg] &&
1105                 (n > 0 || implicit_non_zero_coeff == 0)) {
1106                 if (ff_hevc_significant_coeff_flag_decode(s, c_idx, x_c, y_c,
1107                                                           log2_trafo_size,
1108                                                           scan_idx,
1109                                                           prev_sig) == 1) {
1110                     significant_coeff_flag_idx[nb_significant_coeff_flag] = n;
1111                     nb_significant_coeff_flag++;
1112                     implicit_non_zero_coeff = 0;
1113                 }
1114             } else {
1115                 int last_cg = (x_c == (x_cg << 2) && y_c == (y_cg << 2));
1116                 if (last_cg && implicit_non_zero_coeff && significant_coeff_group_flag[x_cg][y_cg]) {
1117                     significant_coeff_flag_idx[nb_significant_coeff_flag] = n;
1118                     nb_significant_coeff_flag++;
1119                 }
1120             }
1121         }
1122
1123         n_end = nb_significant_coeff_flag;
1124
1125         if (n_end) {
1126             int first_nz_pos_in_cg = 16;
1127             int last_nz_pos_in_cg = -1;
1128             int c_rice_param = 0;
1129             int first_greater1_coeff_idx = -1;
1130             uint8_t coeff_abs_level_greater1_flag[16] = { 0 };
1131             uint16_t coeff_sign_flag;
1132             int sum_abs = 0;
1133             int sign_hidden = 0;
1134
1135             // initialize first elem of coeff_bas_level_greater1_flag
1136             int ctx_set = (i > 0 && c_idx == 0) ? 2 : 0;
1137
1138             if (!(i == num_last_subset) && greater1_ctx == 0)
1139                 ctx_set++;
1140             greater1_ctx      = 1;
1141             last_nz_pos_in_cg = significant_coeff_flag_idx[0];
1142
1143             for (m = 0; m < (n_end > 8 ? 8 : n_end); m++) {
1144                 int n_idx = significant_coeff_flag_idx[m];
1145                 int inc   = (ctx_set << 2) + greater1_ctx;
1146                 coeff_abs_level_greater1_flag[n_idx] =
1147                     ff_hevc_coeff_abs_level_greater1_flag_decode(s, c_idx, inc);
1148                 if (coeff_abs_level_greater1_flag[n_idx]) {
1149                     greater1_ctx = 0;
1150                 } else if (greater1_ctx > 0 && greater1_ctx < 3) {
1151                     greater1_ctx++;
1152                 }
1153
1154                 if (coeff_abs_level_greater1_flag[n_idx] &&
1155                     first_greater1_coeff_idx == -1)
1156                     first_greater1_coeff_idx = n_idx;
1157             }
1158             first_nz_pos_in_cg = significant_coeff_flag_idx[n_end - 1];
1159             sign_hidden        = last_nz_pos_in_cg - first_nz_pos_in_cg >= 4 &&
1160                                  !lc->cu.cu_transquant_bypass_flag;
1161
1162             if (first_greater1_coeff_idx != -1) {
1163                 coeff_abs_level_greater1_flag[first_greater1_coeff_idx] += ff_hevc_coeff_abs_level_greater2_flag_decode(s, c_idx, ctx_set);
1164             }
1165             if (!s->pps->sign_data_hiding_flag || !sign_hidden) {
1166                 coeff_sign_flag = ff_hevc_coeff_sign_flag(s, nb_significant_coeff_flag) << (16 - nb_significant_coeff_flag);
1167             } else {
1168                 coeff_sign_flag = ff_hevc_coeff_sign_flag(s, nb_significant_coeff_flag - 1) << (16 - (nb_significant_coeff_flag - 1));
1169             }
1170
1171             for (m = 0; m < n_end; m++) {
1172                 n = significant_coeff_flag_idx[m];
1173                 GET_COORD(offset, n);
1174                 trans_coeff_level = 1 + coeff_abs_level_greater1_flag[n];
1175                 if (trans_coeff_level == ((m < 8) ?
1176                                           ((n == first_greater1_coeff_idx) ? 3 : 2) : 1)) {
1177                     int last_coeff_abs_level_remaining = ff_hevc_coeff_abs_level_remaining(s, trans_coeff_level, c_rice_param);
1178
1179                     trans_coeff_level += last_coeff_abs_level_remaining;
1180                     if ((trans_coeff_level) > (3 * (1 << c_rice_param)))
1181                         c_rice_param = FFMIN(c_rice_param + 1, 4);
1182                 }
1183                 if (s->pps->sign_data_hiding_flag && sign_hidden) {
1184                     sum_abs += trans_coeff_level;
1185                     if (n == first_nz_pos_in_cg && ((sum_abs & 1) == 1))
1186                         trans_coeff_level = -trans_coeff_level;
1187                 }
1188                 if (coeff_sign_flag >> 15)
1189                     trans_coeff_level = -trans_coeff_level;
1190                 coeff_sign_flag <<= 1;
1191                 if (!lc->cu.cu_transquant_bypass_flag) {
1192                     if (s->sps->scaling_list_enable_flag) {
1193                         if (y_c || x_c || log2_trafo_size < 4) {
1194                             int pos;
1195                             switch (log2_trafo_size) {
1196                             case 3:  pos = (y_c        << 3) +  x_c;       break;
1197                             case 4:  pos = ((y_c >> 1) << 3) + (x_c >> 1); break;
1198                             case 5:  pos = ((y_c >> 2) << 3) + (x_c >> 2); break;
1199                             default: pos = (y_c        << 2) +  x_c;
1200                             }
1201                             scale_m = scale_matrix[pos];
1202                         } else {
1203                             scale_m = dc_scale;
1204                         }
1205                     }
1206                     trans_coeff_level = (trans_coeff_level * (int64_t)scale * (int64_t)scale_m + add) >> shift;
1207                     if(trans_coeff_level < 0) {
1208                         if((~trans_coeff_level) & 0xFffffffffff8000)
1209                             trans_coeff_level = -32768;
1210                     } else {
1211                         if (trans_coeff_level & 0xffffffffffff8000)
1212                             trans_coeff_level = 32767;
1213                     }
1214                 }
1215                 coeffs[y_c * trafo_size + x_c] = trans_coeff_level;
1216             }
1217         }
1218     }
1219
1220     if (lc->cu.cu_transquant_bypass_flag) {
1221         s->hevcdsp.transquant_bypass[log2_trafo_size - 2](dst, coeffs, stride);
1222     } else {
1223         if (transform_skip_flag)
1224             s->hevcdsp.transform_skip(dst, coeffs, stride);
1225         else if (lc->cu.pred_mode == MODE_INTRA && c_idx == 0 &&
1226                  log2_trafo_size == 2)
1227             s->hevcdsp.transform_4x4_luma_add(dst, coeffs, stride);
1228         else
1229             s->hevcdsp.transform_add[log2_trafo_size - 2](dst, coeffs, stride);
1230     }
1231 }
1232
1233 static int hls_transform_unit(HEVCContext *s, int x0, int y0,
1234                               int xBase, int yBase, int cb_xBase, int cb_yBase,
1235                               int log2_cb_size, int log2_trafo_size,
1236                               int blk_idx, int cbf_luma, int cbf_cb, int cbf_cr)
1237 {
1238     HEVCLocalContext *lc = &s->HEVClc;
1239
1240     if (lc->cu.pred_mode == MODE_INTRA) {
1241         int trafo_size = 1 << log2_trafo_size;
1242         ff_hevc_set_neighbour_available(s, x0, y0, trafo_size, trafo_size);
1243
1244         s->hpc.intra_pred[log2_trafo_size - 2](s, x0, y0, 0);
1245         if (log2_trafo_size > 2) {
1246             trafo_size = trafo_size << (s->sps->hshift[1] - 1);
1247             ff_hevc_set_neighbour_available(s, x0, y0, trafo_size, trafo_size);
1248             s->hpc.intra_pred[log2_trafo_size - 3](s, x0, y0, 1);
1249             s->hpc.intra_pred[log2_trafo_size - 3](s, x0, y0, 2);
1250         } else if (blk_idx == 3) {
1251             trafo_size = trafo_size << s->sps->hshift[1];
1252             ff_hevc_set_neighbour_available(s, xBase, yBase,
1253                                             trafo_size, trafo_size);
1254             s->hpc.intra_pred[log2_trafo_size - 2](s, xBase, yBase, 1);
1255             s->hpc.intra_pred[log2_trafo_size - 2](s, xBase, yBase, 2);
1256         }
1257     }
1258
1259     if (cbf_luma || cbf_cb || cbf_cr) {
1260         int scan_idx   = SCAN_DIAG;
1261         int scan_idx_c = SCAN_DIAG;
1262
1263         if (s->pps->cu_qp_delta_enabled_flag && !lc->tu.is_cu_qp_delta_coded) {
1264             lc->tu.cu_qp_delta = ff_hevc_cu_qp_delta_abs(s);
1265             if (lc->tu.cu_qp_delta != 0)
1266                 if (ff_hevc_cu_qp_delta_sign_flag(s) == 1)
1267                     lc->tu.cu_qp_delta = -lc->tu.cu_qp_delta;
1268             lc->tu.is_cu_qp_delta_coded = 1;
1269
1270             if (lc->tu.cu_qp_delta < -(26 + s->sps->qp_bd_offset / 2) ||
1271                 lc->tu.cu_qp_delta >  (25 + s->sps->qp_bd_offset / 2)) {
1272                 av_log(s->avctx, AV_LOG_ERROR,
1273                        "The cu_qp_delta %d is outside the valid range "
1274                        "[%d, %d].\n",
1275                        lc->tu.cu_qp_delta,
1276                        -(26 + s->sps->qp_bd_offset / 2),
1277                         (25 + s->sps->qp_bd_offset / 2));
1278                 return AVERROR_INVALIDDATA;
1279             }
1280
1281             ff_hevc_set_qPy(s, x0, y0, cb_xBase, cb_yBase, log2_cb_size);
1282         }
1283
1284         if (lc->cu.pred_mode == MODE_INTRA && log2_trafo_size < 4) {
1285             if (lc->tu.cur_intra_pred_mode >= 6 &&
1286                 lc->tu.cur_intra_pred_mode <= 14) {
1287                 scan_idx = SCAN_VERT;
1288             } else if (lc->tu.cur_intra_pred_mode >= 22 &&
1289                        lc->tu.cur_intra_pred_mode <= 30) {
1290                 scan_idx = SCAN_HORIZ;
1291             }
1292
1293             if (lc->pu.intra_pred_mode_c >=  6 &&
1294                 lc->pu.intra_pred_mode_c <= 14) {
1295                 scan_idx_c = SCAN_VERT;
1296             } else if (lc->pu.intra_pred_mode_c >= 22 &&
1297                        lc->pu.intra_pred_mode_c <= 30) {
1298                 scan_idx_c = SCAN_HORIZ;
1299             }
1300         }
1301
1302         if (cbf_luma)
1303             hls_residual_coding(s, x0, y0, log2_trafo_size, scan_idx, 0);
1304         if (log2_trafo_size > 2) {
1305             if (cbf_cb)
1306                 hls_residual_coding(s, x0, y0, log2_trafo_size - 1, scan_idx_c, 1);
1307             if (cbf_cr)
1308                 hls_residual_coding(s, x0, y0, log2_trafo_size - 1, scan_idx_c, 2);
1309         } else if (blk_idx == 3) {
1310             if (cbf_cb)
1311                 hls_residual_coding(s, xBase, yBase, log2_trafo_size, scan_idx_c, 1);
1312             if (cbf_cr)
1313                 hls_residual_coding(s, xBase, yBase, log2_trafo_size, scan_idx_c, 2);
1314         }
1315     }
1316     return 0;
1317 }
1318
1319 static void set_deblocking_bypass(HEVCContext *s, int x0, int y0, int log2_cb_size)
1320 {
1321     int cb_size          = 1 << log2_cb_size;
1322     int log2_min_pu_size = s->sps->log2_min_pu_size;
1323
1324     int min_pu_width     = s->sps->min_pu_width;
1325     int x_end = FFMIN(x0 + cb_size, s->sps->width);
1326     int y_end = FFMIN(y0 + cb_size, s->sps->height);
1327     int i, j;
1328
1329     for (j = (y0 >> log2_min_pu_size); j < (y_end >> log2_min_pu_size); j++)
1330         for (i = (x0 >> log2_min_pu_size); i < (x_end >> log2_min_pu_size); i++)
1331             s->is_pcm[i + j * min_pu_width] = 2;
1332 }
1333
1334 static int hls_transform_tree(HEVCContext *s, int x0, int y0,
1335                               int xBase, int yBase, int cb_xBase, int cb_yBase,
1336                               int log2_cb_size, int log2_trafo_size,
1337                               int trafo_depth, int blk_idx,
1338                               int cbf_cb, int cbf_cr)
1339 {
1340     HEVCLocalContext *lc = &s->HEVClc;
1341     uint8_t split_transform_flag;
1342     int ret;
1343
1344     if (lc->cu.intra_split_flag) {
1345         if (trafo_depth == 1)
1346             lc->tu.cur_intra_pred_mode = lc->pu.intra_pred_mode[blk_idx];
1347     } else {
1348         lc->tu.cur_intra_pred_mode = lc->pu.intra_pred_mode[0];
1349     }
1350
1351     if (log2_trafo_size <= s->sps->log2_max_trafo_size &&
1352         log2_trafo_size >  s->sps->log2_min_tb_size    &&
1353         trafo_depth     < lc->cu.max_trafo_depth       &&
1354         !(lc->cu.intra_split_flag && trafo_depth == 0)) {
1355         split_transform_flag = ff_hevc_split_transform_flag_decode(s, log2_trafo_size);
1356     } else {
1357         int inter_split = s->sps->max_transform_hierarchy_depth_inter == 0 &&
1358                           lc->cu.pred_mode == MODE_INTER &&
1359                           lc->cu.part_mode != PART_2Nx2N &&
1360                           trafo_depth == 0;
1361
1362         split_transform_flag = log2_trafo_size > s->sps->log2_max_trafo_size ||
1363                                (lc->cu.intra_split_flag && trafo_depth == 0) ||
1364                                inter_split;
1365     }
1366
1367     if (log2_trafo_size > 2 && (trafo_depth == 0 || cbf_cb))
1368         cbf_cb = ff_hevc_cbf_cb_cr_decode(s, trafo_depth);
1369     else if (log2_trafo_size > 2 || trafo_depth == 0)
1370         cbf_cb = 0;
1371     if (log2_trafo_size > 2 && (trafo_depth == 0 || cbf_cr))
1372         cbf_cr = ff_hevc_cbf_cb_cr_decode(s, trafo_depth);
1373     else if (log2_trafo_size > 2 || trafo_depth == 0)
1374         cbf_cr = 0;
1375
1376     if (split_transform_flag) {
1377         const int trafo_size_split = 1 << (log2_trafo_size - 1);
1378         const int x1 = x0 + trafo_size_split;
1379         const int y1 = y0 + trafo_size_split;
1380
1381 #define SUBDIVIDE(x, y, idx)                                                    \
1382 do {                                                                            \
1383     ret = hls_transform_tree(s, x, y, x0, y0, cb_xBase, cb_yBase, log2_cb_size, \
1384                              log2_trafo_size - 1, trafo_depth + 1, idx,         \
1385                              cbf_cb, cbf_cr);                                   \
1386     if (ret < 0)                                                                \
1387         return ret;                                                             \
1388 } while (0)
1389
1390         SUBDIVIDE(x0, y0, 0);
1391         SUBDIVIDE(x1, y0, 1);
1392         SUBDIVIDE(x0, y1, 2);
1393         SUBDIVIDE(x1, y1, 3);
1394
1395 #undef SUBDIVIDE
1396     } else {
1397         int min_tu_size      = 1 << s->sps->log2_min_tb_size;
1398         int log2_min_tu_size = s->sps->log2_min_tb_size;
1399         int min_tu_width     = s->sps->min_tb_width;
1400         int cbf_luma         = 1;
1401
1402         if (lc->cu.pred_mode == MODE_INTRA || trafo_depth != 0 ||
1403             cbf_cb || cbf_cr)
1404             cbf_luma = ff_hevc_cbf_luma_decode(s, trafo_depth);
1405
1406         ret = hls_transform_unit(s, x0, y0, xBase, yBase, cb_xBase, cb_yBase,
1407                                  log2_cb_size, log2_trafo_size,
1408                                  blk_idx, cbf_luma, cbf_cb, cbf_cr);
1409         if (ret < 0)
1410             return ret;
1411         // TODO: store cbf_luma somewhere else
1412         if (cbf_luma) {
1413             int i, j;
1414             for (i = 0; i < (1 << log2_trafo_size); i += min_tu_size)
1415                 for (j = 0; j < (1 << log2_trafo_size); j += min_tu_size) {
1416                     int x_tu = (x0 + j) >> log2_min_tu_size;
1417                     int y_tu = (y0 + i) >> log2_min_tu_size;
1418                     s->cbf_luma[y_tu * min_tu_width + x_tu] = 1;
1419                 }
1420         }
1421         if (!s->sh.disable_deblocking_filter_flag) {
1422             ff_hevc_deblocking_boundary_strengths(s, x0, y0, log2_trafo_size);
1423             if (s->pps->transquant_bypass_enable_flag &&
1424                 lc->cu.cu_transquant_bypass_flag)
1425                 set_deblocking_bypass(s, x0, y0, log2_trafo_size);
1426         }
1427     }
1428     return 0;
1429 }
1430
1431 static int hls_pcm_sample(HEVCContext *s, int x0, int y0, int log2_cb_size)
1432 {
1433     //TODO: non-4:2:0 support
1434     HEVCLocalContext *lc = &s->HEVClc;
1435     GetBitContext gb;
1436     int cb_size   = 1 << log2_cb_size;
1437     int stride0   = s->frame->linesize[0];
1438     uint8_t *dst0 = &s->frame->data[0][y0 * stride0 + (x0 << s->sps->pixel_shift)];
1439     int   stride1 = s->frame->linesize[1];
1440     uint8_t *dst1 = &s->frame->data[1][(y0 >> s->sps->vshift[1]) * stride1 + ((x0 >> s->sps->hshift[1]) << s->sps->pixel_shift)];
1441     int   stride2 = s->frame->linesize[2];
1442     uint8_t *dst2 = &s->frame->data[2][(y0 >> s->sps->vshift[2]) * stride2 + ((x0 >> s->sps->hshift[2]) << s->sps->pixel_shift)];
1443
1444     int length         = cb_size * cb_size * s->sps->pcm.bit_depth + ((cb_size * cb_size) >> 1) * s->sps->pcm.bit_depth_chroma;
1445     const uint8_t *pcm = skip_bytes(&lc->cc, (length + 7) >> 3);
1446     int ret;
1447
1448     if (!s->sh.disable_deblocking_filter_flag)
1449         ff_hevc_deblocking_boundary_strengths(s, x0, y0, log2_cb_size);
1450
1451     ret = init_get_bits(&gb, pcm, length);
1452     if (ret < 0)
1453         return ret;
1454
1455     s->hevcdsp.put_pcm(dst0, stride0, cb_size,     &gb, s->sps->pcm.bit_depth);
1456     s->hevcdsp.put_pcm(dst1, stride1, cb_size / 2, &gb, s->sps->pcm.bit_depth_chroma);
1457     s->hevcdsp.put_pcm(dst2, stride2, cb_size / 2, &gb, s->sps->pcm.bit_depth_chroma);
1458     return 0;
1459 }
1460
1461 static void hls_mvd_coding(HEVCContext *s, int x0, int y0, int log2_cb_size)
1462 {
1463     HEVCLocalContext *lc = &s->HEVClc;
1464     int x = ff_hevc_abs_mvd_greater0_flag_decode(s);
1465     int y = ff_hevc_abs_mvd_greater0_flag_decode(s);
1466
1467     if (x)
1468         x += ff_hevc_abs_mvd_greater1_flag_decode(s);
1469     if (y)
1470         y += ff_hevc_abs_mvd_greater1_flag_decode(s);
1471
1472     switch (x) {
1473     case 2: lc->pu.mvd.x = ff_hevc_mvd_decode(s);           break;
1474     case 1: lc->pu.mvd.x = ff_hevc_mvd_sign_flag_decode(s); break;
1475     case 0: lc->pu.mvd.x = 0;                               break;
1476     }
1477
1478     switch (y) {
1479     case 2: lc->pu.mvd.y = ff_hevc_mvd_decode(s);           break;
1480     case 1: lc->pu.mvd.y = ff_hevc_mvd_sign_flag_decode(s); break;
1481     case 0: lc->pu.mvd.y = 0;                               break;
1482     }
1483 }
1484
1485 /**
1486  * 8.5.3.2.2.1 Luma sample interpolation process
1487  *
1488  * @param s HEVC decoding context
1489  * @param dst target buffer for block data at block position
1490  * @param dststride stride of the dst buffer
1491  * @param ref reference picture buffer at origin (0, 0)
1492  * @param mv motion vector (relative to block position) to get pixel data from
1493  * @param x_off horizontal position of block from origin (0, 0)
1494  * @param y_off vertical position of block from origin (0, 0)
1495  * @param block_w width of block
1496  * @param block_h height of block
1497  */
1498 static void luma_mc(HEVCContext *s, int16_t *dst, ptrdiff_t dststride,
1499                     AVFrame *ref, const Mv *mv, int x_off, int y_off,
1500                     int block_w, int block_h)
1501 {
1502     HEVCLocalContext *lc = &s->HEVClc;
1503     uint8_t *src         = ref->data[0];
1504     ptrdiff_t srcstride  = ref->linesize[0];
1505     int pic_width        = s->sps->width;
1506     int pic_height       = s->sps->height;
1507
1508     int mx         = mv->x & 3;
1509     int my         = mv->y & 3;
1510     int extra_left = ff_hevc_qpel_extra_before[mx];
1511     int extra_top  = ff_hevc_qpel_extra_before[my];
1512
1513     x_off += mv->x >> 2;
1514     y_off += mv->y >> 2;
1515     src   += y_off * srcstride + (x_off << s->sps->pixel_shift);
1516
1517     if (x_off < extra_left || y_off < extra_top ||
1518         x_off >= pic_width - block_w - ff_hevc_qpel_extra_after[mx] ||
1519         y_off >= pic_height - block_h - ff_hevc_qpel_extra_after[my]) {
1520         const int edge_emu_stride = EDGE_EMU_BUFFER_STRIDE << s->sps->pixel_shift;
1521         int offset = extra_top * srcstride + (extra_left << s->sps->pixel_shift);
1522         int buf_offset = extra_top *
1523                          edge_emu_stride + (extra_left << s->sps->pixel_shift);
1524
1525         s->vdsp.emulated_edge_mc(lc->edge_emu_buffer, src - offset,
1526                                  edge_emu_stride, srcstride,
1527                                  block_w + ff_hevc_qpel_extra[mx],
1528                                  block_h + ff_hevc_qpel_extra[my],
1529                                  x_off - extra_left, y_off - extra_top,
1530                                  pic_width, pic_height);
1531         src = lc->edge_emu_buffer + buf_offset;
1532         srcstride = edge_emu_stride;
1533     }
1534     s->hevcdsp.put_hevc_qpel[my][mx](dst, dststride, src, srcstride, block_w,
1535                                      block_h, lc->mc_buffer);
1536 }
1537
1538 /**
1539  * 8.5.3.2.2.2 Chroma sample interpolation process
1540  *
1541  * @param s HEVC decoding context
1542  * @param dst1 target buffer for block data at block position (U plane)
1543  * @param dst2 target buffer for block data at block position (V plane)
1544  * @param dststride stride of the dst1 and dst2 buffers
1545  * @param ref reference picture buffer at origin (0, 0)
1546  * @param mv motion vector (relative to block position) to get pixel data from
1547  * @param x_off horizontal position of block from origin (0, 0)
1548  * @param y_off vertical position of block from origin (0, 0)
1549  * @param block_w width of block
1550  * @param block_h height of block
1551  */
1552 static void chroma_mc(HEVCContext *s, int16_t *dst1, int16_t *dst2,
1553                       ptrdiff_t dststride, AVFrame *ref, const Mv *mv,
1554                       int x_off, int y_off, int block_w, int block_h)
1555 {
1556     HEVCLocalContext *lc = &s->HEVClc;
1557     uint8_t *src1        = ref->data[1];
1558     uint8_t *src2        = ref->data[2];
1559     ptrdiff_t src1stride = ref->linesize[1];
1560     ptrdiff_t src2stride = ref->linesize[2];
1561     int pic_width        = s->sps->width >> 1;
1562     int pic_height       = s->sps->height >> 1;
1563
1564     int mx = mv->x & 7;
1565     int my = mv->y & 7;
1566
1567     x_off += mv->x >> 3;
1568     y_off += mv->y >> 3;
1569     src1  += y_off * src1stride + (x_off << s->sps->pixel_shift);
1570     src2  += y_off * src2stride + (x_off << s->sps->pixel_shift);
1571
1572     if (x_off < EPEL_EXTRA_BEFORE || y_off < EPEL_EXTRA_AFTER ||
1573         x_off >= pic_width - block_w - EPEL_EXTRA_AFTER ||
1574         y_off >= pic_height - block_h - EPEL_EXTRA_AFTER) {
1575         const int edge_emu_stride = EDGE_EMU_BUFFER_STRIDE << s->sps->pixel_shift;
1576         int offset1 = EPEL_EXTRA_BEFORE * (src1stride + (1 << s->sps->pixel_shift));
1577         int buf_offset1 = EPEL_EXTRA_BEFORE *
1578                           (edge_emu_stride + (1 << s->sps->pixel_shift));
1579         int offset2 = EPEL_EXTRA_BEFORE * (src2stride + (1 << s->sps->pixel_shift));
1580         int buf_offset2 = EPEL_EXTRA_BEFORE *
1581                           (edge_emu_stride + (1 << s->sps->pixel_shift));
1582
1583         s->vdsp.emulated_edge_mc(lc->edge_emu_buffer, src1 - offset1,
1584                                  edge_emu_stride, src1stride,
1585                                  block_w + EPEL_EXTRA, block_h + EPEL_EXTRA,
1586                                  x_off - EPEL_EXTRA_BEFORE,
1587                                  y_off - EPEL_EXTRA_BEFORE,
1588                                  pic_width, pic_height);
1589
1590         src1 = lc->edge_emu_buffer + buf_offset1;
1591         src1stride = edge_emu_stride;
1592         s->hevcdsp.put_hevc_epel[!!my][!!mx](dst1, dststride, src1, src1stride,
1593                                              block_w, block_h, mx, my, lc->mc_buffer);
1594
1595         s->vdsp.emulated_edge_mc(lc->edge_emu_buffer, src2 - offset2,
1596                                  edge_emu_stride, src2stride,
1597                                  block_w + EPEL_EXTRA, block_h + EPEL_EXTRA,
1598                                  x_off - EPEL_EXTRA_BEFORE,
1599                                  y_off - EPEL_EXTRA_BEFORE,
1600                                  pic_width, pic_height);
1601         src2 = lc->edge_emu_buffer + buf_offset2;
1602         src2stride = edge_emu_stride;
1603
1604         s->hevcdsp.put_hevc_epel[!!my][!!mx](dst2, dststride, src2, src2stride,
1605                                              block_w, block_h, mx, my,
1606                                              lc->mc_buffer);
1607     } else {
1608         s->hevcdsp.put_hevc_epel[!!my][!!mx](dst1, dststride, src1, src1stride,
1609                                              block_w, block_h, mx, my,
1610                                              lc->mc_buffer);
1611         s->hevcdsp.put_hevc_epel[!!my][!!mx](dst2, dststride, src2, src2stride,
1612                                              block_w, block_h, mx, my,
1613                                              lc->mc_buffer);
1614     }
1615 }
1616
1617 static void hevc_await_progress(HEVCContext *s, HEVCFrame *ref,
1618                                 const Mv *mv, int y0, int height)
1619 {
1620     int y = (mv->y >> 2) + y0 + height + 9;
1621     ff_thread_await_progress(&ref->tf, y, 0);
1622 }
1623
1624 static void hls_prediction_unit(HEVCContext *s, int x0, int y0,
1625                                 int nPbW, int nPbH,
1626                                 int log2_cb_size, int partIdx)
1627 {
1628 #define POS(c_idx, x, y)                                                              \
1629     &s->frame->data[c_idx][((y) >> s->sps->vshift[c_idx]) * s->frame->linesize[c_idx] + \
1630                            (((x) >> s->sps->hshift[c_idx]) << s->sps->pixel_shift)]
1631     HEVCLocalContext *lc = &s->HEVClc;
1632     int merge_idx = 0;
1633     struct MvField current_mv = {{{ 0 }}};
1634
1635     int min_pu_width = s->sps->min_pu_width;
1636
1637     MvField *tab_mvf = s->ref->tab_mvf;
1638     RefPicList  *refPicList = s->ref->refPicList;
1639     HEVCFrame *ref0, *ref1;
1640
1641     int tmpstride = MAX_PB_SIZE;
1642
1643     uint8_t *dst0 = POS(0, x0, y0);
1644     uint8_t *dst1 = POS(1, x0, y0);
1645     uint8_t *dst2 = POS(2, x0, y0);
1646     int log2_min_cb_size = s->sps->log2_min_cb_size;
1647     int min_cb_width     = s->sps->min_cb_width;
1648     int x_cb             = x0 >> log2_min_cb_size;
1649     int y_cb             = y0 >> log2_min_cb_size;
1650     int ref_idx[2];
1651     int x_pu, y_pu;
1652     int i, j;
1653
1654     int skip_flag = SAMPLE_CTB(s->skip_flag, x_cb, y_cb);
1655
1656     if (!skip_flag)
1657         lc->pu.merge_flag = ff_hevc_merge_flag_decode(s);
1658
1659     if (skip_flag || lc->pu.merge_flag) {
1660         if (s->sh.max_num_merge_cand > 1)
1661             merge_idx = ff_hevc_merge_idx_decode(s);
1662         else
1663             merge_idx = 0;
1664
1665         ff_hevc_luma_mv_merge_mode(s, x0, y0, nPbW, nPbH, log2_cb_size,
1666                                    partIdx, merge_idx, &current_mv);
1667     } else {
1668         enum InterPredIdc inter_pred_idc = PRED_L0;
1669         int mvp_flag;
1670
1671         ff_hevc_set_neighbour_available(s, x0, y0, nPbW, nPbH);
1672         if (s->sh.slice_type == B_SLICE)
1673             inter_pred_idc = ff_hevc_inter_pred_idc_decode(s, nPbW, nPbH);
1674
1675         if (inter_pred_idc != PRED_L1) {
1676             if (s->sh.nb_refs[L0]) {
1677                 ref_idx[0] = ff_hevc_ref_idx_lx_decode(s, s->sh.nb_refs[L0]);
1678                 current_mv.ref_idx[0] = ref_idx[0];
1679             }
1680             current_mv.pred_flag[0] = 1;
1681             hls_mvd_coding(s, x0, y0, 0);
1682             mvp_flag = ff_hevc_mvp_lx_flag_decode(s);
1683             ff_hevc_luma_mv_mvp_mode(s, x0, y0, nPbW, nPbH, log2_cb_size,
1684                                      partIdx, merge_idx, &current_mv,
1685                                      mvp_flag, 0);
1686             current_mv.mv[0].x += lc->pu.mvd.x;
1687             current_mv.mv[0].y += lc->pu.mvd.y;
1688         }
1689
1690         if (inter_pred_idc != PRED_L0) {
1691             if (s->sh.nb_refs[L1]) {
1692                 ref_idx[1] = ff_hevc_ref_idx_lx_decode(s, s->sh.nb_refs[L1]);
1693                 current_mv.ref_idx[1] = ref_idx[1];
1694             }
1695
1696             if (s->sh.mvd_l1_zero_flag == 1 && inter_pred_idc == PRED_BI) {
1697                 AV_ZERO32(&lc->pu.mvd);
1698             } else {
1699                 hls_mvd_coding(s, x0, y0, 1);
1700             }
1701
1702             current_mv.pred_flag[1] = 1;
1703             mvp_flag = ff_hevc_mvp_lx_flag_decode(s);
1704             ff_hevc_luma_mv_mvp_mode(s, x0, y0, nPbW, nPbH, log2_cb_size,
1705                                      partIdx, merge_idx, &current_mv,
1706                                      mvp_flag, 1);
1707             current_mv.mv[1].x += lc->pu.mvd.x;
1708             current_mv.mv[1].y += lc->pu.mvd.y;
1709         }
1710     }
1711
1712     x_pu = x0 >> s->sps->log2_min_pu_size;
1713     y_pu = y0 >> s->sps->log2_min_pu_size;
1714
1715     for (j = 0; j < nPbH >> s->sps->log2_min_pu_size; j++)
1716         for (i = 0; i < nPbW >> s->sps->log2_min_pu_size; i++)
1717             tab_mvf[(y_pu + j) * min_pu_width + x_pu + i] = current_mv;
1718
1719     if (current_mv.pred_flag[0]) {
1720         ref0 = refPicList[0].ref[current_mv.ref_idx[0]];
1721         if (!ref0)
1722             return;
1723         hevc_await_progress(s, ref0, &current_mv.mv[0], y0, nPbH);
1724     }
1725     if (current_mv.pred_flag[1]) {
1726         ref1 = refPicList[1].ref[current_mv.ref_idx[1]];
1727         if (!ref1)
1728             return;
1729         hevc_await_progress(s, ref1, &current_mv.mv[1], y0, nPbH);
1730     }
1731
1732     if (current_mv.pred_flag[0] && !current_mv.pred_flag[1]) {
1733         DECLARE_ALIGNED(16, int16_t,  tmp[MAX_PB_SIZE * MAX_PB_SIZE]);
1734         DECLARE_ALIGNED(16, int16_t, tmp2[MAX_PB_SIZE * MAX_PB_SIZE]);
1735
1736         luma_mc(s, tmp, tmpstride, ref0->frame,
1737                 &current_mv.mv[0], x0, y0, nPbW, nPbH);
1738
1739         if ((s->sh.slice_type == P_SLICE && s->pps->weighted_pred_flag) ||
1740             (s->sh.slice_type == B_SLICE && s->pps->weighted_bipred_flag)) {
1741             s->hevcdsp.weighted_pred(s->sh.luma_log2_weight_denom,
1742                                      s->sh.luma_weight_l0[current_mv.ref_idx[0]],
1743                                      s->sh.luma_offset_l0[current_mv.ref_idx[0]],
1744                                      dst0, s->frame->linesize[0], tmp,
1745                                      tmpstride, nPbW, nPbH);
1746         } else {
1747             s->hevcdsp.put_unweighted_pred(dst0, s->frame->linesize[0], tmp, tmpstride, nPbW, nPbH);
1748         }
1749         chroma_mc(s, tmp, tmp2, tmpstride, ref0->frame,
1750                   &current_mv.mv[0], x0 / 2, y0 / 2, nPbW / 2, nPbH / 2);
1751
1752         if ((s->sh.slice_type == P_SLICE && s->pps->weighted_pred_flag) ||
1753             (s->sh.slice_type == B_SLICE && s->pps->weighted_bipred_flag)) {
1754             s->hevcdsp.weighted_pred(s->sh.chroma_log2_weight_denom,
1755                                      s->sh.chroma_weight_l0[current_mv.ref_idx[0]][0],
1756                                      s->sh.chroma_offset_l0[current_mv.ref_idx[0]][0],
1757                                      dst1, s->frame->linesize[1], tmp, tmpstride,
1758                                      nPbW / 2, nPbH / 2);
1759             s->hevcdsp.weighted_pred(s->sh.chroma_log2_weight_denom,
1760                                      s->sh.chroma_weight_l0[current_mv.ref_idx[0]][1],
1761                                      s->sh.chroma_offset_l0[current_mv.ref_idx[0]][1],
1762                                      dst2, s->frame->linesize[2], tmp2, tmpstride,
1763                                      nPbW / 2, nPbH / 2);
1764         } else {
1765             s->hevcdsp.put_unweighted_pred(dst1, s->frame->linesize[1], tmp, tmpstride, nPbW/2, nPbH/2);
1766             s->hevcdsp.put_unweighted_pred(dst2, s->frame->linesize[2], tmp2, tmpstride, nPbW/2, nPbH/2);
1767         }
1768     } else if (!current_mv.pred_flag[0] && current_mv.pred_flag[1]) {
1769         DECLARE_ALIGNED(16, int16_t, tmp [MAX_PB_SIZE * MAX_PB_SIZE]);
1770         DECLARE_ALIGNED(16, int16_t, tmp2[MAX_PB_SIZE * MAX_PB_SIZE]);
1771
1772         if (!ref1)
1773             return;
1774
1775         luma_mc(s, tmp, tmpstride, ref1->frame,
1776                 &current_mv.mv[1], x0, y0, nPbW, nPbH);
1777
1778         if ((s->sh.slice_type == P_SLICE && s->pps->weighted_pred_flag) ||
1779             (s->sh.slice_type == B_SLICE && s->pps->weighted_bipred_flag)) {
1780             s->hevcdsp.weighted_pred(s->sh.luma_log2_weight_denom,
1781                                       s->sh.luma_weight_l1[current_mv.ref_idx[1]],
1782                                       s->sh.luma_offset_l1[current_mv.ref_idx[1]],
1783                                       dst0, s->frame->linesize[0], tmp, tmpstride,
1784                                       nPbW, nPbH);
1785         } else {
1786             s->hevcdsp.put_unweighted_pred(dst0, s->frame->linesize[0], tmp, tmpstride, nPbW, nPbH);
1787         }
1788
1789         chroma_mc(s, tmp, tmp2, tmpstride, ref1->frame,
1790                   &current_mv.mv[1], x0/2, y0/2, nPbW/2, nPbH/2);
1791
1792         if ((s->sh.slice_type == P_SLICE && s->pps->weighted_pred_flag) ||
1793             (s->sh.slice_type == B_SLICE && s->pps->weighted_bipred_flag)) {
1794             s->hevcdsp.weighted_pred(s->sh.chroma_log2_weight_denom,
1795                                      s->sh.chroma_weight_l1[current_mv.ref_idx[1]][0],
1796                                      s->sh.chroma_offset_l1[current_mv.ref_idx[1]][0],
1797                                      dst1, s->frame->linesize[1], tmp, tmpstride, nPbW/2, nPbH/2);
1798             s->hevcdsp.weighted_pred(s->sh.chroma_log2_weight_denom,
1799                                      s->sh.chroma_weight_l1[current_mv.ref_idx[1]][1],
1800                                      s->sh.chroma_offset_l1[current_mv.ref_idx[1]][1],
1801                                      dst2, s->frame->linesize[2], tmp2, tmpstride, nPbW/2, nPbH/2);
1802         } else {
1803             s->hevcdsp.put_unweighted_pred(dst1, s->frame->linesize[1], tmp, tmpstride, nPbW/2, nPbH/2);
1804             s->hevcdsp.put_unweighted_pred(dst2, s->frame->linesize[2], tmp2, tmpstride, nPbW/2, nPbH/2);
1805         }
1806     } else if (current_mv.pred_flag[0] && current_mv.pred_flag[1]) {
1807         DECLARE_ALIGNED(16, int16_t, tmp [MAX_PB_SIZE * MAX_PB_SIZE]);
1808         DECLARE_ALIGNED(16, int16_t, tmp2[MAX_PB_SIZE * MAX_PB_SIZE]);
1809         DECLARE_ALIGNED(16, int16_t, tmp3[MAX_PB_SIZE * MAX_PB_SIZE]);
1810         DECLARE_ALIGNED(16, int16_t, tmp4[MAX_PB_SIZE * MAX_PB_SIZE]);
1811         HEVCFrame *ref0 = refPicList[0].ref[current_mv.ref_idx[0]];
1812         HEVCFrame *ref1 = refPicList[1].ref[current_mv.ref_idx[1]];
1813
1814         if (!ref0 || !ref1)
1815             return;
1816
1817         luma_mc(s, tmp, tmpstride, ref0->frame,
1818                 &current_mv.mv[0], x0, y0, nPbW, nPbH);
1819         luma_mc(s, tmp2, tmpstride, ref1->frame,
1820                 &current_mv.mv[1], x0, y0, nPbW, nPbH);
1821
1822         if ((s->sh.slice_type == P_SLICE && s->pps->weighted_pred_flag) ||
1823             (s->sh.slice_type == B_SLICE && s->pps->weighted_bipred_flag)) {
1824             s->hevcdsp.weighted_pred_avg(s->sh.luma_log2_weight_denom,
1825                                          s->sh.luma_weight_l0[current_mv.ref_idx[0]],
1826                                          s->sh.luma_weight_l1[current_mv.ref_idx[1]],
1827                                          s->sh.luma_offset_l0[current_mv.ref_idx[0]],
1828                                          s->sh.luma_offset_l1[current_mv.ref_idx[1]],
1829                                          dst0, s->frame->linesize[0],
1830                                          tmp, tmp2, tmpstride, nPbW, nPbH);
1831         } else {
1832             s->hevcdsp.put_weighted_pred_avg(dst0, s->frame->linesize[0],
1833                                              tmp, tmp2, tmpstride, nPbW, nPbH);
1834         }
1835
1836         chroma_mc(s, tmp, tmp2, tmpstride, ref0->frame,
1837                   &current_mv.mv[0], x0 / 2, y0 / 2, nPbW / 2, nPbH / 2);
1838         chroma_mc(s, tmp3, tmp4, tmpstride, ref1->frame,
1839                   &current_mv.mv[1], x0 / 2, y0 / 2, nPbW / 2, nPbH / 2);
1840
1841         if ((s->sh.slice_type == P_SLICE && s->pps->weighted_pred_flag) ||
1842             (s->sh.slice_type == B_SLICE && s->pps->weighted_bipred_flag)) {
1843             s->hevcdsp.weighted_pred_avg(s->sh.chroma_log2_weight_denom,
1844                                          s->sh.chroma_weight_l0[current_mv.ref_idx[0]][0],
1845                                          s->sh.chroma_weight_l1[current_mv.ref_idx[1]][0],
1846                                          s->sh.chroma_offset_l0[current_mv.ref_idx[0]][0],
1847                                          s->sh.chroma_offset_l1[current_mv.ref_idx[1]][0],
1848                                          dst1, s->frame->linesize[1], tmp, tmp3,
1849                                          tmpstride, nPbW / 2, nPbH / 2);
1850             s->hevcdsp.weighted_pred_avg(s->sh.chroma_log2_weight_denom,
1851                                          s->sh.chroma_weight_l0[current_mv.ref_idx[0]][1],
1852                                          s->sh.chroma_weight_l1[current_mv.ref_idx[1]][1],
1853                                          s->sh.chroma_offset_l0[current_mv.ref_idx[0]][1],
1854                                          s->sh.chroma_offset_l1[current_mv.ref_idx[1]][1],
1855                                          dst2, s->frame->linesize[2], tmp2, tmp4,
1856                                          tmpstride, nPbW / 2, nPbH / 2);
1857         } else {
1858             s->hevcdsp.put_weighted_pred_avg(dst1, s->frame->linesize[1], tmp, tmp3, tmpstride, nPbW/2, nPbH/2);
1859             s->hevcdsp.put_weighted_pred_avg(dst2, s->frame->linesize[2], tmp2, tmp4, tmpstride, nPbW/2, nPbH/2);
1860         }
1861     }
1862 }
1863
1864 /**
1865  * 8.4.1
1866  */
1867 static int luma_intra_pred_mode(HEVCContext *s, int x0, int y0, int pu_size,
1868                                 int prev_intra_luma_pred_flag)
1869 {
1870     HEVCLocalContext *lc = &s->HEVClc;
1871     int x_pu             = x0 >> s->sps->log2_min_pu_size;
1872     int y_pu             = y0 >> s->sps->log2_min_pu_size;
1873     int min_pu_width     = s->sps->min_pu_width;
1874     int size_in_pus      = pu_size >> s->sps->log2_min_pu_size;
1875     int x0b              = x0 & ((1 << s->sps->log2_ctb_size) - 1);
1876     int y0b              = y0 & ((1 << s->sps->log2_ctb_size) - 1);
1877
1878     int cand_up   = (lc->ctb_up_flag || y0b) ?
1879                     s->tab_ipm[(y_pu - 1) * min_pu_width + x_pu] : INTRA_DC;
1880     int cand_left = (lc->ctb_left_flag || x0b) ?
1881                     s->tab_ipm[y_pu * min_pu_width + x_pu - 1]   : INTRA_DC;
1882
1883     int y_ctb = (y0 >> (s->sps->log2_ctb_size)) << (s->sps->log2_ctb_size);
1884
1885     MvField *tab_mvf = s->ref->tab_mvf;
1886     int intra_pred_mode;
1887     int candidate[3];
1888     int i, j;
1889
1890     // intra_pred_mode prediction does not cross vertical CTB boundaries
1891     if ((y0 - 1) < y_ctb)
1892         cand_up = INTRA_DC;
1893
1894     if (cand_left == cand_up) {
1895         if (cand_left < 2) {
1896             candidate[0] = INTRA_PLANAR;
1897             candidate[1] = INTRA_DC;
1898             candidate[2] = INTRA_ANGULAR_26;
1899         } else {
1900             candidate[0] = cand_left;
1901             candidate[1] = 2 + ((cand_left - 2 - 1 + 32) & 31);
1902             candidate[2] = 2 + ((cand_left - 2 + 1) & 31);
1903         }
1904     } else {
1905         candidate[0] = cand_left;
1906         candidate[1] = cand_up;
1907         if (candidate[0] != INTRA_PLANAR && candidate[1] != INTRA_PLANAR) {
1908             candidate[2] = INTRA_PLANAR;
1909         } else if (candidate[0] != INTRA_DC && candidate[1] != INTRA_DC) {
1910             candidate[2] = INTRA_DC;
1911         } else {
1912             candidate[2] = INTRA_ANGULAR_26;
1913         }
1914     }
1915
1916     if (prev_intra_luma_pred_flag) {
1917         intra_pred_mode = candidate[lc->pu.mpm_idx];
1918     } else {
1919         if (candidate[0] > candidate[1])
1920             FFSWAP(uint8_t, candidate[0], candidate[1]);
1921         if (candidate[0] > candidate[2])
1922             FFSWAP(uint8_t, candidate[0], candidate[2]);
1923         if (candidate[1] > candidate[2])
1924             FFSWAP(uint8_t, candidate[1], candidate[2]);
1925
1926         intra_pred_mode = lc->pu.rem_intra_luma_pred_mode;
1927         for (i = 0; i < 3; i++)
1928             if (intra_pred_mode >= candidate[i])
1929                 intra_pred_mode++;
1930     }
1931
1932     /* write the intra prediction units into the mv array */
1933     if (!size_in_pus)
1934         size_in_pus = 1;
1935     for (i = 0; i < size_in_pus; i++) {
1936         memset(&s->tab_ipm[(y_pu + i) * min_pu_width + x_pu],
1937                intra_pred_mode, size_in_pus);
1938
1939         for (j = 0; j < size_in_pus; j++) {
1940             tab_mvf[(y_pu + j) * min_pu_width + x_pu + i].is_intra     = 1;
1941             tab_mvf[(y_pu + j) * min_pu_width + x_pu + i].pred_flag[0] = 0;
1942             tab_mvf[(y_pu + j) * min_pu_width + x_pu + i].pred_flag[1] = 0;
1943             tab_mvf[(y_pu + j) * min_pu_width + x_pu + i].ref_idx[0]   = 0;
1944             tab_mvf[(y_pu + j) * min_pu_width + x_pu + i].ref_idx[1]   = 0;
1945             tab_mvf[(y_pu + j) * min_pu_width + x_pu + i].mv[0].x      = 0;
1946             tab_mvf[(y_pu + j) * min_pu_width + x_pu + i].mv[0].y      = 0;
1947             tab_mvf[(y_pu + j) * min_pu_width + x_pu + i].mv[1].x      = 0;
1948             tab_mvf[(y_pu + j) * min_pu_width + x_pu + i].mv[1].y      = 0;
1949         }
1950     }
1951
1952     return intra_pred_mode;
1953 }
1954
1955 static av_always_inline void set_ct_depth(HEVCContext *s, int x0, int y0,
1956                                           int log2_cb_size, int ct_depth)
1957 {
1958     int length = (1 << log2_cb_size) >> s->sps->log2_min_cb_size;
1959     int x_cb   = x0 >> s->sps->log2_min_cb_size;
1960     int y_cb   = y0 >> s->sps->log2_min_cb_size;
1961     int y;
1962
1963     for (y = 0; y < length; y++)
1964         memset(&s->tab_ct_depth[(y_cb + y) * s->sps->min_cb_width + x_cb],
1965                ct_depth, length);
1966 }
1967
1968 static void intra_prediction_unit(HEVCContext *s, int x0, int y0,
1969                                   int log2_cb_size)
1970 {
1971     HEVCLocalContext *lc = &s->HEVClc;
1972     static const uint8_t intra_chroma_table[4] = { 0, 26, 10, 1 };
1973     uint8_t prev_intra_luma_pred_flag[4];
1974     int split   = lc->cu.part_mode == PART_NxN;
1975     int pb_size = (1 << log2_cb_size) >> split;
1976     int side    = split + 1;
1977     int chroma_mode;
1978     int i, j;
1979
1980     for (i = 0; i < side; i++)
1981         for (j = 0; j < side; j++)
1982             prev_intra_luma_pred_flag[2 * i + j] = ff_hevc_prev_intra_luma_pred_flag_decode(s);
1983
1984     for (i = 0; i < side; i++) {
1985         for (j = 0; j < side; j++) {
1986             if (prev_intra_luma_pred_flag[2 * i + j])
1987                 lc->pu.mpm_idx = ff_hevc_mpm_idx_decode(s);
1988             else
1989                 lc->pu.rem_intra_luma_pred_mode = ff_hevc_rem_intra_luma_pred_mode_decode(s);
1990
1991             lc->pu.intra_pred_mode[2 * i + j] =
1992                 luma_intra_pred_mode(s, x0 + pb_size * j, y0 + pb_size * i, pb_size,
1993                                      prev_intra_luma_pred_flag[2 * i + j]);
1994         }
1995     }
1996
1997     chroma_mode = ff_hevc_intra_chroma_pred_mode_decode(s);
1998     if (chroma_mode != 4) {
1999         if (lc->pu.intra_pred_mode[0] == intra_chroma_table[chroma_mode])
2000             lc->pu.intra_pred_mode_c = 34;
2001         else
2002             lc->pu.intra_pred_mode_c = intra_chroma_table[chroma_mode];
2003     } else {
2004         lc->pu.intra_pred_mode_c = lc->pu.intra_pred_mode[0];
2005     }
2006 }
2007
2008 static void intra_prediction_unit_default_value(HEVCContext *s,
2009                                                 int x0, int y0,
2010                                                 int log2_cb_size)
2011 {
2012     HEVCLocalContext *lc = &s->HEVClc;
2013     int pb_size          = 1 << log2_cb_size;
2014     int size_in_pus      = pb_size >> s->sps->log2_min_pu_size;
2015     int min_pu_width     = s->sps->min_pu_width;
2016     MvField *tab_mvf     = s->ref->tab_mvf;
2017     int x_pu             = x0 >> s->sps->log2_min_pu_size;
2018     int y_pu             = y0 >> s->sps->log2_min_pu_size;
2019     int j, k;
2020
2021     if (size_in_pus == 0)
2022         size_in_pus = 1;
2023     for (j = 0; j < size_in_pus; j++) {
2024         memset(&s->tab_ipm[(y_pu + j) * min_pu_width + x_pu], INTRA_DC, size_in_pus);
2025         for (k = 0; k < size_in_pus; k++)
2026             tab_mvf[(y_pu + j) * min_pu_width + x_pu + k].is_intra = lc->cu.pred_mode == MODE_INTRA;
2027     }
2028 }
2029
2030 static int hls_coding_unit(HEVCContext *s, int x0, int y0, int log2_cb_size)
2031 {
2032     int cb_size          = 1 << log2_cb_size;
2033     HEVCLocalContext *lc = &s->HEVClc;
2034     int log2_min_cb_size = s->sps->log2_min_cb_size;
2035     int length           = cb_size >> log2_min_cb_size;
2036     int min_cb_width     = s->sps->min_cb_width;
2037     int x_cb             = x0 >> log2_min_cb_size;
2038     int y_cb             = y0 >> log2_min_cb_size;
2039     int x, y, ret;
2040
2041     lc->cu.x                = x0;
2042     lc->cu.y                = y0;
2043     lc->cu.pred_mode        = MODE_INTRA;
2044     lc->cu.part_mode        = PART_2Nx2N;
2045     lc->cu.intra_split_flag = 0;
2046
2047     SAMPLE_CTB(s->skip_flag, x_cb, y_cb) = 0;
2048     for (x = 0; x < 4; x++)
2049         lc->pu.intra_pred_mode[x] = 1;
2050     if (s->pps->transquant_bypass_enable_flag) {
2051         lc->cu.cu_transquant_bypass_flag = ff_hevc_cu_transquant_bypass_flag_decode(s);
2052         if (lc->cu.cu_transquant_bypass_flag)
2053             set_deblocking_bypass(s, x0, y0, log2_cb_size);
2054     } else
2055         lc->cu.cu_transquant_bypass_flag = 0;
2056
2057     if (s->sh.slice_type != I_SLICE) {
2058         uint8_t skip_flag = ff_hevc_skip_flag_decode(s, x0, y0, x_cb, y_cb);
2059
2060         x = y_cb * min_cb_width + x_cb;
2061         for (y = 0; y < length; y++) {
2062             memset(&s->skip_flag[x], skip_flag, length);
2063             x += min_cb_width;
2064         }
2065         lc->cu.pred_mode = skip_flag ? MODE_SKIP : MODE_INTER;
2066     }
2067
2068     if (SAMPLE_CTB(s->skip_flag, x_cb, y_cb)) {
2069         hls_prediction_unit(s, x0, y0, cb_size, cb_size, log2_cb_size, 0);
2070         intra_prediction_unit_default_value(s, x0, y0, log2_cb_size);
2071
2072         if (!s->sh.disable_deblocking_filter_flag)
2073             ff_hevc_deblocking_boundary_strengths(s, x0, y0, log2_cb_size);
2074     } else {
2075         int pcm_flag = 0;
2076
2077         if (s->sh.slice_type != I_SLICE)
2078             lc->cu.pred_mode = ff_hevc_pred_mode_decode(s);
2079         if (lc->cu.pred_mode != MODE_INTRA ||
2080             log2_cb_size == s->sps->log2_min_cb_size) {
2081             lc->cu.part_mode        = ff_hevc_part_mode_decode(s, log2_cb_size);
2082             lc->cu.intra_split_flag = lc->cu.part_mode == PART_NxN &&
2083                                       lc->cu.pred_mode == MODE_INTRA;
2084         }
2085
2086         if (lc->cu.pred_mode == MODE_INTRA) {
2087             if (lc->cu.part_mode == PART_2Nx2N && s->sps->pcm_enabled_flag &&
2088                 log2_cb_size >= s->sps->pcm.log2_min_pcm_cb_size &&
2089                 log2_cb_size <= s->sps->pcm.log2_max_pcm_cb_size) {
2090                 pcm_flag = ff_hevc_pcm_flag_decode(s);
2091             }
2092             if (pcm_flag) {
2093                 intra_prediction_unit_default_value(s, x0, y0, log2_cb_size);
2094                 ret = hls_pcm_sample(s, x0, y0, log2_cb_size);
2095                 if (s->sps->pcm.loop_filter_disable_flag)
2096                     set_deblocking_bypass(s, x0, y0, log2_cb_size);
2097
2098                 if (ret < 0)
2099                     return ret;
2100             } else {
2101                 intra_prediction_unit(s, x0, y0, log2_cb_size);
2102             }
2103         } else {
2104             intra_prediction_unit_default_value(s, x0, y0, log2_cb_size);
2105             switch (lc->cu.part_mode) {
2106             case PART_2Nx2N:
2107                 hls_prediction_unit(s, x0, y0, cb_size, cb_size, log2_cb_size, 0);
2108                 break;
2109             case PART_2NxN:
2110                 hls_prediction_unit(s, x0, y0,               cb_size, cb_size / 2, log2_cb_size, 0);
2111                 hls_prediction_unit(s, x0, y0 + cb_size / 2, cb_size, cb_size / 2, log2_cb_size, 1);
2112                 break;
2113             case PART_Nx2N:
2114                 hls_prediction_unit(s, x0,               y0, cb_size / 2, cb_size, log2_cb_size, 0);
2115                 hls_prediction_unit(s, x0 + cb_size / 2, y0, cb_size / 2, cb_size, log2_cb_size, 1);
2116                 break;
2117             case PART_2NxnU:
2118                 hls_prediction_unit(s, x0, y0,               cb_size, cb_size     / 4, log2_cb_size, 0);
2119                 hls_prediction_unit(s, x0, y0 + cb_size / 4, cb_size, cb_size * 3 / 4, log2_cb_size, 1);
2120                 break;
2121             case PART_2NxnD:
2122                 hls_prediction_unit(s, x0, y0,                   cb_size, cb_size * 3 / 4, log2_cb_size, 0);
2123                 hls_prediction_unit(s, x0, y0 + cb_size * 3 / 4, cb_size, cb_size     / 4, log2_cb_size, 1);
2124                 break;
2125             case PART_nLx2N:
2126                 hls_prediction_unit(s, x0,               y0, cb_size     / 4, cb_size, log2_cb_size, 0);
2127                 hls_prediction_unit(s, x0 + cb_size / 4, y0, cb_size * 3 / 4, cb_size, log2_cb_size, 1);
2128                 break;
2129             case PART_nRx2N:
2130                 hls_prediction_unit(s, x0,                   y0, cb_size * 3 / 4, cb_size, log2_cb_size, 0);
2131                 hls_prediction_unit(s, x0 + cb_size * 3 / 4, y0, cb_size     / 4, cb_size, log2_cb_size, 1);
2132                 break;
2133             case PART_NxN:
2134                 hls_prediction_unit(s, x0,               y0,               cb_size / 2, cb_size / 2, log2_cb_size, 0);
2135                 hls_prediction_unit(s, x0 + cb_size / 2, y0,               cb_size / 2, cb_size / 2, log2_cb_size, 1);
2136                 hls_prediction_unit(s, x0,               y0 + cb_size / 2, cb_size / 2, cb_size / 2, log2_cb_size, 2);
2137                 hls_prediction_unit(s, x0 + cb_size / 2, y0 + cb_size / 2, cb_size / 2, cb_size / 2, log2_cb_size, 3);
2138                 break;
2139             }
2140         }
2141
2142         if (!pcm_flag) {
2143             int rqt_root_cbf = 1;
2144
2145             if (lc->cu.pred_mode != MODE_INTRA &&
2146                 !(lc->cu.part_mode == PART_2Nx2N && lc->pu.merge_flag)) {
2147                 rqt_root_cbf = ff_hevc_no_residual_syntax_flag_decode(s);
2148             }
2149             if (rqt_root_cbf) {
2150                 lc->cu.max_trafo_depth = lc->cu.pred_mode == MODE_INTRA ?
2151                                          s->sps->max_transform_hierarchy_depth_intra + lc->cu.intra_split_flag :
2152                                          s->sps->max_transform_hierarchy_depth_inter;
2153                 ret = hls_transform_tree(s, x0, y0, x0, y0, x0, y0,
2154                                          log2_cb_size,
2155                                          log2_cb_size, 0, 0, 0, 0);
2156                 if (ret < 0)
2157                     return ret;
2158             } else {
2159                 if (!s->sh.disable_deblocking_filter_flag)
2160                     ff_hevc_deblocking_boundary_strengths(s, x0, y0, log2_cb_size);
2161             }
2162         }
2163     }
2164
2165     if (s->pps->cu_qp_delta_enabled_flag && lc->tu.is_cu_qp_delta_coded == 0)
2166         ff_hevc_set_qPy(s, x0, y0, x0, y0, log2_cb_size);
2167
2168     x = y_cb * min_cb_width + x_cb;
2169     for (y = 0; y < length; y++) {
2170         memset(&s->qp_y_tab[x], lc->qp_y, length);
2171         x += min_cb_width;
2172     }
2173
2174     set_ct_depth(s, x0, y0, log2_cb_size, lc->ct.depth);
2175
2176     return 0;
2177 }
2178
2179 static int hls_coding_quadtree(HEVCContext *s, int x0, int y0,
2180                                int log2_cb_size, int cb_depth)
2181 {
2182     HEVCLocalContext *lc = &s->HEVClc;
2183     const int cb_size    = 1 << log2_cb_size;
2184     int split_cu;
2185
2186     lc->ct.depth = cb_depth;
2187     if (x0 + cb_size <= s->sps->width  &&
2188         y0 + cb_size <= s->sps->height &&
2189         log2_cb_size > s->sps->log2_min_cb_size) {
2190         split_cu = ff_hevc_split_coding_unit_flag_decode(s, cb_depth, x0, y0);
2191     } else {
2192         split_cu = (log2_cb_size > s->sps->log2_min_cb_size);
2193     }
2194     if (s->pps->cu_qp_delta_enabled_flag &&
2195         log2_cb_size >= s->sps->log2_ctb_size - s->pps->diff_cu_qp_delta_depth) {
2196         lc->tu.is_cu_qp_delta_coded = 0;
2197         lc->tu.cu_qp_delta          = 0;
2198     }
2199
2200     if (split_cu) {
2201         const int cb_size_split = cb_size >> 1;
2202         const int x1 = x0 + cb_size_split;
2203         const int y1 = y0 + cb_size_split;
2204
2205         log2_cb_size--;
2206         cb_depth++;
2207
2208 #define SUBDIVIDE(x, y)                                                \
2209 do {                                                                   \
2210     if (x < s->sps->width && y < s->sps->height) {                     \
2211         int ret = hls_coding_quadtree(s, x, y, log2_cb_size, cb_depth);\
2212         if (ret < 0)                                                   \
2213             return ret;                                                \
2214     }                                                                  \
2215 } while (0)
2216
2217         SUBDIVIDE(x0, y0);
2218         SUBDIVIDE(x1, y0);
2219         SUBDIVIDE(x0, y1);
2220         SUBDIVIDE(x1, y1);
2221     } else {
2222         int ret = hls_coding_unit(s, x0, y0, log2_cb_size);
2223         if (ret < 0)
2224             return ret;
2225     }
2226
2227     return 0;
2228 }
2229
2230 static void hls_decode_neighbour(HEVCContext *s, int x_ctb, int y_ctb,
2231                                  int ctb_addr_ts)
2232 {
2233     HEVCLocalContext *lc  = &s->HEVClc;
2234     int ctb_size          = 1 << s->sps->log2_ctb_size;
2235     int ctb_addr_rs       = s->pps->ctb_addr_ts_to_rs[ctb_addr_ts];
2236     int ctb_addr_in_slice = ctb_addr_rs - s->sh.slice_addr;
2237
2238     s->tab_slice_address[ctb_addr_rs] = s->sh.slice_addr;
2239
2240     if (s->pps->entropy_coding_sync_enabled_flag) {
2241         if (x_ctb == 0 && (y_ctb & (ctb_size - 1)) == 0)
2242             lc->first_qp_group = 1;
2243         lc->end_of_tiles_x = s->sps->width;
2244     } else if (s->pps->tiles_enabled_flag) {
2245         if (ctb_addr_ts && s->pps->tile_id[ctb_addr_ts] != s->pps->tile_id[ctb_addr_ts - 1]) {
2246             int idxX = s->pps->col_idxX[x_ctb >> s->sps->log2_ctb_size];
2247             lc->start_of_tiles_x = x_ctb;
2248             lc->end_of_tiles_x   = x_ctb + (s->pps->column_width[idxX] << s->sps->log2_ctb_size);
2249             lc->first_qp_group   = 1;
2250         }
2251     } else {
2252         lc->end_of_tiles_x = s->sps->width;
2253     }
2254
2255     lc->end_of_tiles_y = FFMIN(y_ctb + ctb_size, s->sps->height);
2256
2257     lc->boundary_flags = 0;
2258     if (s->pps->tiles_enabled_flag) {
2259         if (x_ctb > 0 && s->pps->tile_id[ctb_addr_ts] != s->pps->tile_id[s->pps->ctb_addr_rs_to_ts[ctb_addr_rs - 1]])
2260             lc->boundary_flags |= BOUNDARY_LEFT_TILE;
2261         if (x_ctb > 0 && s->tab_slice_address[ctb_addr_rs] != s->tab_slice_address[ctb_addr_rs - 1])
2262             lc->boundary_flags |= BOUNDARY_LEFT_SLICE;
2263         if (y_ctb > 0 && s->pps->tile_id[ctb_addr_ts] != s->pps->tile_id[s->pps->ctb_addr_rs_to_ts[ctb_addr_rs - s->sps->ctb_width]])
2264             lc->boundary_flags |= BOUNDARY_UPPER_TILE;
2265         if (y_ctb > 0 && s->tab_slice_address[ctb_addr_rs] != s->tab_slice_address[ctb_addr_rs - s->sps->ctb_width])
2266             lc->boundary_flags |= BOUNDARY_UPPER_SLICE;
2267     } else {
2268         if (!ctb_addr_in_slice > 0)
2269             lc->boundary_flags |= BOUNDARY_LEFT_SLICE;
2270         if (ctb_addr_in_slice < s->sps->ctb_width)
2271             lc->boundary_flags |= BOUNDARY_UPPER_SLICE;
2272     }
2273
2274     lc->ctb_left_flag = ((x_ctb > 0) && (ctb_addr_in_slice > 0) && !(lc->boundary_flags & BOUNDARY_LEFT_TILE));
2275     lc->ctb_up_flag   = ((y_ctb > 0) && (ctb_addr_in_slice >= s->sps->ctb_width) && !(lc->boundary_flags & BOUNDARY_UPPER_TILE));
2276     lc->ctb_up_right_flag = ((y_ctb > 0)  && (ctb_addr_in_slice+1 >= s->sps->ctb_width) && (s->pps->tile_id[ctb_addr_ts] == s->pps->tile_id[s->pps->ctb_addr_rs_to_ts[ctb_addr_rs+1 - s->sps->ctb_width]]));
2277     lc->ctb_up_left_flag = ((x_ctb > 0) && (y_ctb > 0)  && (ctb_addr_in_slice-1 >= s->sps->ctb_width) && (s->pps->tile_id[ctb_addr_ts] == s->pps->tile_id[s->pps->ctb_addr_rs_to_ts[ctb_addr_rs-1 - s->sps->ctb_width]]));
2278 }
2279
2280 static int hls_slice_data(HEVCContext *s)
2281 {
2282     int ctb_size    = 1 << s->sps->log2_ctb_size;
2283     int more_data   = 1;
2284     int x_ctb       = 0;
2285     int y_ctb       = 0;
2286     int ctb_addr_ts = s->pps->ctb_addr_rs_to_ts[s->sh.slice_ctb_addr_rs];
2287     int ret;
2288
2289     while (more_data && ctb_addr_ts < s->sps->ctb_size) {
2290         int ctb_addr_rs = s->pps->ctb_addr_ts_to_rs[ctb_addr_ts];
2291
2292         x_ctb = (ctb_addr_rs % ((s->sps->width + ctb_size - 1) >> s->sps->log2_ctb_size)) << s->sps->log2_ctb_size;
2293         y_ctb = (ctb_addr_rs / ((s->sps->width + ctb_size - 1) >> s->sps->log2_ctb_size)) << s->sps->log2_ctb_size;
2294         hls_decode_neighbour(s, x_ctb, y_ctb, ctb_addr_ts);
2295
2296         ff_hevc_cabac_init(s, ctb_addr_ts);
2297
2298         hls_sao_param(s, x_ctb >> s->sps->log2_ctb_size, y_ctb >> s->sps->log2_ctb_size);
2299
2300         s->deblock[ctb_addr_rs].beta_offset = s->sh.beta_offset;
2301         s->deblock[ctb_addr_rs].tc_offset   = s->sh.tc_offset;
2302         s->filter_slice_edges[ctb_addr_rs]  = s->sh.slice_loop_filter_across_slices_enabled_flag;
2303
2304         ret = hls_coding_quadtree(s, x_ctb, y_ctb, s->sps->log2_ctb_size, 0);
2305         if (ret < 0)
2306             return ret;
2307         more_data = !ff_hevc_end_of_slice_flag_decode(s);
2308
2309         ctb_addr_ts++;
2310         ff_hevc_save_states(s, ctb_addr_ts);
2311         ff_hevc_hls_filters(s, x_ctb, y_ctb, ctb_size);
2312     }
2313
2314     if (x_ctb + ctb_size >= s->sps->width &&
2315         y_ctb + ctb_size >= s->sps->height)
2316         ff_hevc_hls_filter(s, x_ctb, y_ctb);
2317
2318     return ctb_addr_ts;
2319 }
2320
2321 /**
2322  * @return AVERROR_INVALIDDATA if the packet is not a valid NAL unit,
2323  * 0 if the unit should be skipped, 1 otherwise
2324  */
2325 static int hls_nal_unit(HEVCContext *s)
2326 {
2327     GetBitContext *gb = &s->HEVClc.gb;
2328     int nuh_layer_id;
2329
2330     if (get_bits1(gb) != 0)
2331         return AVERROR_INVALIDDATA;
2332
2333     s->nal_unit_type = get_bits(gb, 6);
2334
2335     nuh_layer_id   = get_bits(gb, 6);
2336     s->temporal_id = get_bits(gb, 3) - 1;
2337     if (s->temporal_id < 0)
2338         return AVERROR_INVALIDDATA;
2339
2340     av_log(s->avctx, AV_LOG_DEBUG,
2341            "nal_unit_type: %d, nuh_layer_id: %dtemporal_id: %d\n",
2342            s->nal_unit_type, nuh_layer_id, s->temporal_id);
2343
2344     return nuh_layer_id == 0;
2345 }
2346
2347 static void restore_tqb_pixels(HEVCContext *s)
2348 {
2349     int min_pu_size = 1 << s->sps->log2_min_pu_size;
2350     int x, y, c_idx;
2351
2352     for (c_idx = 0; c_idx < 3; c_idx++) {
2353         ptrdiff_t stride = s->frame->linesize[c_idx];
2354         int hshift       = s->sps->hshift[c_idx];
2355         int vshift       = s->sps->vshift[c_idx];
2356         for (y = 0; y < s->sps->min_pu_height; y++) {
2357             for (x = 0; x < s->sps->min_pu_width; x++) {
2358                 if (s->is_pcm[y * s->sps->min_pu_width + x]) {
2359                     int n;
2360                     int len      = min_pu_size >> hshift;
2361                     uint8_t *src = &s->frame->data[c_idx][((y << s->sps->log2_min_pu_size) >> vshift) * stride + (((x << s->sps->log2_min_pu_size) >> hshift) << s->sps->pixel_shift)];
2362                     uint8_t *dst = &s->sao_frame->data[c_idx][((y << s->sps->log2_min_pu_size) >> vshift) * stride + (((x << s->sps->log2_min_pu_size) >> hshift) << s->sps->pixel_shift)];
2363                     for (n = 0; n < (min_pu_size >> vshift); n++) {
2364                         memcpy(dst, src, len);
2365                         src += stride;
2366                         dst += stride;
2367                     }
2368                 }
2369             }
2370         }
2371     }
2372 }
2373
2374 static int set_side_data(HEVCContext *s)
2375 {
2376     AVFrame *out = s->ref->frame;
2377
2378     if (s->sei_frame_packing_present &&
2379         s->frame_packing_arrangement_type >= 3 &&
2380         s->frame_packing_arrangement_type <= 5 &&
2381         s->content_interpretation_type > 0 &&
2382         s->content_interpretation_type < 3) {
2383         AVStereo3D *stereo = av_stereo3d_create_side_data(out);
2384         if (!stereo)
2385             return AVERROR(ENOMEM);
2386
2387         switch (s->frame_packing_arrangement_type) {
2388         case 3:
2389             if (s->quincunx_subsampling)
2390                 stereo->type = AV_STEREO3D_SIDEBYSIDE_QUINCUNX;
2391             else
2392                 stereo->type = AV_STEREO3D_SIDEBYSIDE;
2393             break;
2394         case 4:
2395             stereo->type = AV_STEREO3D_TOPBOTTOM;
2396             break;
2397         case 5:
2398             stereo->type = AV_STEREO3D_FRAMESEQUENCE;
2399             break;
2400         }
2401
2402         if (s->content_interpretation_type == 2)
2403             stereo->flags = AV_STEREO3D_FLAG_INVERT;
2404     }
2405
2406     if (s->sei_display_orientation_present &&
2407         (s->sei_anticlockwise_rotation || s->sei_hflip || s->sei_vflip)) {
2408         double angle = s->sei_anticlockwise_rotation * 360 / (double) (1 << 16);
2409         AVFrameSideData *rotation = av_frame_new_side_data(out,
2410                                                            AV_FRAME_DATA_DISPLAYMATRIX,
2411                                                            sizeof(int32_t) * 9);
2412         if (!rotation)
2413             return AVERROR(ENOMEM);
2414
2415         av_display_rotation_set((int32_t *)rotation->data, angle);
2416         av_display_matrix_flip((int32_t *)rotation->data,
2417                                s->sei_hflip, s->sei_vflip);
2418     }
2419
2420     return 0;
2421 }
2422
2423 static int hevc_frame_start(HEVCContext *s)
2424 {
2425     HEVCLocalContext *lc = &s->HEVClc;
2426     int ret;
2427
2428     memset(s->horizontal_bs, 0, 2 * s->bs_width * (s->bs_height + 1));
2429     memset(s->vertical_bs,   0, 2 * s->bs_width * (s->bs_height + 1));
2430     memset(s->cbf_luma,      0, s->sps->min_tb_width * s->sps->min_tb_height);
2431     memset(s->is_pcm,        0, s->sps->min_pu_width * s->sps->min_pu_height);
2432
2433     lc->start_of_tiles_x = 0;
2434     s->is_decoded        = 0;
2435     s->first_nal_type    = s->nal_unit_type;
2436
2437     if (s->pps->tiles_enabled_flag)
2438         lc->end_of_tiles_x = s->pps->column_width[0] << s->sps->log2_ctb_size;
2439
2440     ret = ff_hevc_set_new_ref(s, s->sps->sao_enabled ? &s->sao_frame : &s->frame,
2441                               s->poc);
2442     if (ret < 0)
2443         goto fail;
2444
2445     ret = ff_hevc_frame_rps(s);
2446     if (ret < 0) {
2447         av_log(s->avctx, AV_LOG_ERROR, "Error constructing the frame RPS.\n");
2448         goto fail;
2449     }
2450
2451     s->ref->frame->key_frame = IS_IRAP(s);
2452
2453     ret = set_side_data(s);
2454     if (ret < 0)
2455         goto fail;
2456
2457     av_frame_unref(s->output_frame);
2458     ret = ff_hevc_output_frame(s, s->output_frame, 0);
2459     if (ret < 0)
2460         goto fail;
2461
2462     ff_thread_finish_setup(s->avctx);
2463
2464     return 0;
2465
2466 fail:
2467     if (s->ref)
2468         ff_thread_report_progress(&s->ref->tf, INT_MAX, 0);
2469     s->ref = NULL;
2470     return ret;
2471 }
2472
2473 static int decode_nal_unit(HEVCContext *s, const uint8_t *nal, int length)
2474 {
2475     HEVCLocalContext *lc = &s->HEVClc;
2476     GetBitContext *gb    = &lc->gb;
2477     int ctb_addr_ts, ret;
2478
2479     ret = init_get_bits8(gb, nal, length);
2480     if (ret < 0)
2481         return ret;
2482
2483     ret = hls_nal_unit(s);
2484     if (ret < 0) {
2485         av_log(s->avctx, AV_LOG_ERROR, "Invalid NAL unit %d, skipping.\n",
2486                s->nal_unit_type);
2487         goto fail;
2488     } else if (!ret)
2489         return 0;
2490
2491     switch (s->nal_unit_type) {
2492     case NAL_VPS:
2493         ret = ff_hevc_decode_nal_vps(s);
2494         if (ret < 0)
2495             goto fail;
2496         break;
2497     case NAL_SPS:
2498         ret = ff_hevc_decode_nal_sps(s);
2499         if (ret < 0)
2500             goto fail;
2501         break;
2502     case NAL_PPS:
2503         ret = ff_hevc_decode_nal_pps(s);
2504         if (ret < 0)
2505             goto fail;
2506         break;
2507     case NAL_SEI_PREFIX:
2508     case NAL_SEI_SUFFIX:
2509         ret = ff_hevc_decode_nal_sei(s);
2510         if (ret < 0)
2511             goto fail;
2512         break;
2513     case NAL_TRAIL_R:
2514     case NAL_TRAIL_N:
2515     case NAL_TSA_N:
2516     case NAL_TSA_R:
2517     case NAL_STSA_N:
2518     case NAL_STSA_R:
2519     case NAL_BLA_W_LP:
2520     case NAL_BLA_W_RADL:
2521     case NAL_BLA_N_LP:
2522     case NAL_IDR_W_RADL:
2523     case NAL_IDR_N_LP:
2524     case NAL_CRA_NUT:
2525     case NAL_RADL_N:
2526     case NAL_RADL_R:
2527     case NAL_RASL_N:
2528     case NAL_RASL_R:
2529         ret = hls_slice_header(s);
2530         if (ret < 0)
2531             return ret;
2532
2533         if (s->max_ra == INT_MAX) {
2534             if (s->nal_unit_type == NAL_CRA_NUT || IS_BLA(s)) {
2535                 s->max_ra = s->poc;
2536             } else {
2537                 if (IS_IDR(s))
2538                     s->max_ra = INT_MIN;
2539             }
2540         }
2541
2542         if ((s->nal_unit_type == NAL_RASL_R || s->nal_unit_type == NAL_RASL_N) &&
2543             s->poc <= s->max_ra) {
2544             s->is_decoded = 0;
2545             break;
2546         } else {
2547             if (s->nal_unit_type == NAL_RASL_R && s->poc > s->max_ra)
2548                 s->max_ra = INT_MIN;
2549         }
2550
2551         if (s->sh.first_slice_in_pic_flag) {
2552             ret = hevc_frame_start(s);
2553             if (ret < 0)
2554                 return ret;
2555         } else if (!s->ref) {
2556             av_log(s->avctx, AV_LOG_ERROR, "First slice in a frame missing.\n");
2557             goto fail;
2558         }
2559
2560         if (s->nal_unit_type != s->first_nal_type) {
2561             av_log(s->avctx, AV_LOG_ERROR,
2562                    "Non-matching NAL types of the VCL NALUs: %d %d\n",
2563                    s->first_nal_type, s->nal_unit_type);
2564             return AVERROR_INVALIDDATA;
2565         }
2566
2567         if (!s->sh.dependent_slice_segment_flag &&
2568             s->sh.slice_type != I_SLICE) {
2569             ret = ff_hevc_slice_rpl(s);
2570             if (ret < 0) {
2571                 av_log(s->avctx, AV_LOG_WARNING,
2572                        "Error constructing the reference lists for the current slice.\n");
2573                 goto fail;
2574             }
2575         }
2576
2577         ctb_addr_ts = hls_slice_data(s);
2578         if (ctb_addr_ts >= (s->sps->ctb_width * s->sps->ctb_height)) {
2579             s->is_decoded = 1;
2580             if ((s->pps->transquant_bypass_enable_flag ||
2581                  (s->sps->pcm.loop_filter_disable_flag && s->sps->pcm_enabled_flag)) &&
2582                 s->sps->sao_enabled)
2583                 restore_tqb_pixels(s);
2584         }
2585
2586         if (ctb_addr_ts < 0) {
2587             ret = ctb_addr_ts;
2588             goto fail;
2589         }
2590         break;
2591     case NAL_EOS_NUT:
2592     case NAL_EOB_NUT:
2593         s->seq_decode = (s->seq_decode + 1) & 0xff;
2594         s->max_ra     = INT_MAX;
2595         break;
2596     case NAL_AUD:
2597     case NAL_FD_NUT:
2598         break;
2599     default:
2600         av_log(s->avctx, AV_LOG_INFO,
2601                "Skipping NAL unit %d\n", s->nal_unit_type);
2602     }
2603
2604     return 0;
2605 fail:
2606     if (s->avctx->err_recognition & AV_EF_EXPLODE)
2607         return ret;
2608     return 0;
2609 }
2610
2611 /* FIXME: This is adapted from ff_h264_decode_nal, avoiding duplication
2612  * between these functions would be nice. */
2613 static int extract_rbsp(const uint8_t *src, int length,
2614                         HEVCNAL *nal)
2615 {
2616     int i, si, di;
2617     uint8_t *dst;
2618
2619 #define STARTCODE_TEST                                                  \
2620         if (i + 2 < length && src[i + 1] == 0 && src[i + 2] <= 3) {     \
2621             if (src[i + 2] != 3) {                                      \
2622                 /* startcode, so we must be past the end */             \
2623                 length = i;                                             \
2624             }                                                           \
2625             break;                                                      \
2626         }
2627 #if HAVE_FAST_UNALIGNED
2628 #define FIND_FIRST_ZERO                                                 \
2629         if (i > 0 && !src[i])                                           \
2630             i--;                                                        \
2631         while (src[i])                                                  \
2632             i++
2633 #if HAVE_FAST_64BIT
2634     for (i = 0; i + 1 < length; i += 9) {
2635         if (!((~AV_RN64A(src + i) &
2636                (AV_RN64A(src + i) - 0x0100010001000101ULL)) &
2637               0x8000800080008080ULL))
2638             continue;
2639         FIND_FIRST_ZERO;
2640         STARTCODE_TEST;
2641         i -= 7;
2642     }
2643 #else
2644     for (i = 0; i + 1 < length; i += 5) {
2645         if (!((~AV_RN32A(src + i) &
2646                (AV_RN32A(src + i) - 0x01000101U)) &
2647               0x80008080U))
2648             continue;
2649         FIND_FIRST_ZERO;
2650         STARTCODE_TEST;
2651         i -= 3;
2652     }
2653 #endif /* HAVE_FAST_64BIT */
2654 #else
2655     for (i = 0; i + 1 < length; i += 2) {
2656         if (src[i])
2657             continue;
2658         if (i > 0 && src[i - 1] == 0)
2659             i--;
2660         STARTCODE_TEST;
2661     }
2662 #endif /* HAVE_FAST_UNALIGNED */
2663
2664     if (i >= length - 1) { // no escaped 0
2665         nal->data = src;
2666         nal->size = length;
2667         return length;
2668     }
2669
2670     av_fast_malloc(&nal->rbsp_buffer, &nal->rbsp_buffer_size,
2671                    length + FF_INPUT_BUFFER_PADDING_SIZE);
2672     if (!nal->rbsp_buffer)
2673         return AVERROR(ENOMEM);
2674
2675     dst = nal->rbsp_buffer;
2676
2677     memcpy(dst, src, i);
2678     si = di = i;
2679     while (si + 2 < length) {
2680         // remove escapes (very rare 1:2^22)
2681         if (src[si + 2] > 3) {
2682             dst[di++] = src[si++];
2683             dst[di++] = src[si++];
2684         } else if (src[si] == 0 && src[si + 1] == 0) {
2685             if (src[si + 2] == 3) { // escape
2686                 dst[di++] = 0;
2687                 dst[di++] = 0;
2688                 si       += 3;
2689
2690                 continue;
2691             } else // next start code
2692                 goto nsc;
2693         }
2694
2695         dst[di++] = src[si++];
2696     }
2697     while (si < length)
2698         dst[di++] = src[si++];
2699
2700 nsc:
2701     memset(dst + di, 0, FF_INPUT_BUFFER_PADDING_SIZE);
2702
2703     nal->data = dst;
2704     nal->size = di;
2705     return si;
2706 }
2707
2708 static int decode_nal_units(HEVCContext *s, const uint8_t *buf, int length)
2709 {
2710     int i, consumed, ret = 0;
2711
2712     s->ref = NULL;
2713     s->eos = 0;
2714
2715     /* split the input packet into NAL units, so we know the upper bound on the
2716      * number of slices in the frame */
2717     s->nb_nals = 0;
2718     while (length >= 4) {
2719         HEVCNAL *nal;
2720         int extract_length = 0;
2721
2722         if (s->is_nalff) {
2723             int i;
2724             for (i = 0; i < s->nal_length_size; i++)
2725                 extract_length = (extract_length << 8) | buf[i];
2726             buf    += s->nal_length_size;
2727             length -= s->nal_length_size;
2728
2729             if (extract_length > length) {
2730                 av_log(s->avctx, AV_LOG_ERROR, "Invalid NAL unit size.\n");
2731                 ret = AVERROR_INVALIDDATA;
2732                 goto fail;
2733             }
2734         } else {
2735             if (buf[2] == 0) {
2736                 length--;
2737                 buf++;
2738                 continue;
2739             }
2740             if (buf[0] != 0 || buf[1] != 0 || buf[2] != 1) {
2741                 ret = AVERROR_INVALIDDATA;
2742                 goto fail;
2743             }
2744
2745             buf           += 3;
2746             length        -= 3;
2747             extract_length = length;
2748         }
2749
2750         if (s->nals_allocated < s->nb_nals + 1) {
2751             int new_size = s->nals_allocated + 1;
2752             HEVCNAL *tmp = av_realloc_array(s->nals, new_size, sizeof(*tmp));
2753             if (!tmp) {
2754                 ret = AVERROR(ENOMEM);
2755                 goto fail;
2756             }
2757             s->nals = tmp;
2758             memset(s->nals + s->nals_allocated, 0,
2759                    (new_size - s->nals_allocated) * sizeof(*tmp));
2760             s->nals_allocated = new_size;
2761         }
2762         nal = &s->nals[s->nb_nals++];
2763
2764         consumed = extract_rbsp(buf, extract_length, nal);
2765         if (consumed < 0) {
2766             ret = consumed;
2767             goto fail;
2768         }
2769
2770         ret = init_get_bits8(&s->HEVClc.gb, nal->data, nal->size);
2771         if (ret < 0)
2772             goto fail;
2773         hls_nal_unit(s);
2774
2775         if (s->nal_unit_type == NAL_EOB_NUT ||
2776             s->nal_unit_type == NAL_EOS_NUT)
2777             s->eos = 1;
2778
2779         buf    += consumed;
2780         length -= consumed;
2781     }
2782
2783     /* parse the NAL units */
2784     for (i = 0; i < s->nb_nals; i++) {
2785         int ret = decode_nal_unit(s, s->nals[i].data, s->nals[i].size);
2786         if (ret < 0) {
2787             av_log(s->avctx, AV_LOG_WARNING,
2788                    "Error parsing NAL unit #%d.\n", i);
2789             goto fail;
2790         }
2791     }
2792
2793 fail:
2794     if (s->ref)
2795         ff_thread_report_progress(&s->ref->tf, INT_MAX, 0);
2796
2797     return ret;
2798 }
2799
2800 static void print_md5(void *log_ctx, int level, uint8_t md5[16])
2801 {
2802     int i;
2803     for (i = 0; i < 16; i++)
2804         av_log(log_ctx, level, "%02"PRIx8, md5[i]);
2805 }
2806
2807 static int verify_md5(HEVCContext *s, AVFrame *frame)
2808 {
2809     const AVPixFmtDescriptor *desc = av_pix_fmt_desc_get(frame->format);
2810     int pixel_shift;
2811     int i, j;
2812
2813     if (!desc)
2814         return AVERROR(EINVAL);
2815
2816     pixel_shift = desc->comp[0].depth_minus1 > 7;
2817
2818     av_log(s->avctx, AV_LOG_DEBUG, "Verifying checksum for frame with POC %d: ",
2819            s->poc);
2820
2821     /* the checksums are LE, so we have to byteswap for >8bpp formats
2822      * on BE arches */
2823 #if HAVE_BIGENDIAN
2824     if (pixel_shift && !s->checksum_buf) {
2825         av_fast_malloc(&s->checksum_buf, &s->checksum_buf_size,
2826                        FFMAX3(frame->linesize[0], frame->linesize[1],
2827                               frame->linesize[2]));
2828         if (!s->checksum_buf)
2829             return AVERROR(ENOMEM);
2830     }
2831 #endif
2832
2833     for (i = 0; frame->data[i]; i++) {
2834         int width  = s->avctx->coded_width;
2835         int height = s->avctx->coded_height;
2836         int w = (i == 1 || i == 2) ? (width  >> desc->log2_chroma_w) : width;
2837         int h = (i == 1 || i == 2) ? (height >> desc->log2_chroma_h) : height;
2838         uint8_t md5[16];
2839
2840         av_md5_init(s->md5_ctx);
2841         for (j = 0; j < h; j++) {
2842             const uint8_t *src = frame->data[i] + j * frame->linesize[i];
2843 #if HAVE_BIGENDIAN
2844             if (pixel_shift) {
2845                 s->bdsp.bswap16_buf((uint16_t *) s->checksum_buf,
2846                                     (const uint16_t *) src, w);
2847                 src = s->checksum_buf;
2848             }
2849 #endif
2850             av_md5_update(s->md5_ctx, src, w << pixel_shift);
2851         }
2852         av_md5_final(s->md5_ctx, md5);
2853
2854         if (!memcmp(md5, s->md5[i], 16)) {
2855             av_log   (s->avctx, AV_LOG_DEBUG, "plane %d - correct ", i);
2856             print_md5(s->avctx, AV_LOG_DEBUG, md5);
2857             av_log   (s->avctx, AV_LOG_DEBUG, "; ");
2858         } else {
2859             av_log   (s->avctx, AV_LOG_ERROR, "mismatching checksum of plane %d - ", i);
2860             print_md5(s->avctx, AV_LOG_ERROR, md5);
2861             av_log   (s->avctx, AV_LOG_ERROR, " != ");
2862             print_md5(s->avctx, AV_LOG_ERROR, s->md5[i]);
2863             av_log   (s->avctx, AV_LOG_ERROR, "\n");
2864             return AVERROR_INVALIDDATA;
2865         }
2866     }
2867
2868     av_log(s->avctx, AV_LOG_DEBUG, "\n");
2869
2870     return 0;
2871 }
2872
2873 static int hevc_decode_frame(AVCodecContext *avctx, void *data, int *got_output,
2874                              AVPacket *avpkt)
2875 {
2876     int ret;
2877     HEVCContext *s = avctx->priv_data;
2878
2879     if (!avpkt->size) {
2880         ret = ff_hevc_output_frame(s, data, 1);
2881         if (ret < 0)
2882             return ret;
2883
2884         *got_output = ret;
2885         return 0;
2886     }
2887
2888     s->ref = NULL;
2889     ret    = decode_nal_units(s, avpkt->data, avpkt->size);
2890     if (ret < 0)
2891         return ret;
2892
2893     /* verify the SEI checksum */
2894     if (avctx->err_recognition & AV_EF_CRCCHECK && s->is_decoded &&
2895         s->is_md5) {
2896         ret = verify_md5(s, s->ref->frame);
2897         if (ret < 0 && avctx->err_recognition & AV_EF_EXPLODE) {
2898             ff_hevc_unref_frame(s, s->ref, ~0);
2899             return ret;
2900         }
2901     }
2902     s->is_md5 = 0;
2903
2904     if (s->is_decoded) {
2905         av_log(avctx, AV_LOG_DEBUG, "Decoded frame with POC %d.\n", s->poc);
2906         s->is_decoded = 0;
2907     }
2908
2909     if (s->output_frame->buf[0]) {
2910         av_frame_move_ref(data, s->output_frame);
2911         *got_output = 1;
2912     }
2913
2914     return avpkt->size;
2915 }
2916
2917 static int hevc_ref_frame(HEVCContext *s, HEVCFrame *dst, HEVCFrame *src)
2918 {
2919     int ret = ff_thread_ref_frame(&dst->tf, &src->tf);
2920     if (ret < 0)
2921         return ret;
2922
2923     dst->tab_mvf_buf = av_buffer_ref(src->tab_mvf_buf);
2924     if (!dst->tab_mvf_buf)
2925         goto fail;
2926     dst->tab_mvf = src->tab_mvf;
2927
2928     dst->rpl_tab_buf = av_buffer_ref(src->rpl_tab_buf);
2929     if (!dst->rpl_tab_buf)
2930         goto fail;
2931     dst->rpl_tab = src->rpl_tab;
2932
2933     dst->rpl_buf = av_buffer_ref(src->rpl_buf);
2934     if (!dst->rpl_buf)
2935         goto fail;
2936
2937     dst->poc        = src->poc;
2938     dst->ctb_count  = src->ctb_count;
2939     dst->window     = src->window;
2940     dst->flags      = src->flags;
2941     dst->sequence   = src->sequence;
2942
2943     return 0;
2944 fail:
2945     ff_hevc_unref_frame(s, dst, ~0);
2946     return AVERROR(ENOMEM);
2947 }
2948
2949 static av_cold int hevc_decode_free(AVCodecContext *avctx)
2950 {
2951     HEVCContext       *s = avctx->priv_data;
2952     int i;
2953
2954     pic_arrays_free(s);
2955
2956     av_freep(&s->md5_ctx);
2957
2958     av_frame_free(&s->tmp_frame);
2959     av_frame_free(&s->output_frame);
2960
2961     for (i = 0; i < FF_ARRAY_ELEMS(s->DPB); i++) {
2962         ff_hevc_unref_frame(s, &s->DPB[i], ~0);
2963         av_frame_free(&s->DPB[i].frame);
2964     }
2965
2966     for (i = 0; i < FF_ARRAY_ELEMS(s->vps_list); i++)
2967         av_buffer_unref(&s->vps_list[i]);
2968     for (i = 0; i < FF_ARRAY_ELEMS(s->sps_list); i++)
2969         av_buffer_unref(&s->sps_list[i]);
2970     for (i = 0; i < FF_ARRAY_ELEMS(s->pps_list); i++)
2971         av_buffer_unref(&s->pps_list[i]);
2972
2973     for (i = 0; i < s->nals_allocated; i++)
2974         av_freep(&s->nals[i].rbsp_buffer);
2975     av_freep(&s->nals);
2976     s->nals_allocated = 0;
2977
2978     return 0;
2979 }
2980
2981 static av_cold int hevc_init_context(AVCodecContext *avctx)
2982 {
2983     HEVCContext *s = avctx->priv_data;
2984     int i;
2985
2986     s->avctx = avctx;
2987
2988     s->tmp_frame = av_frame_alloc();
2989     if (!s->tmp_frame)
2990         goto fail;
2991
2992     s->output_frame = av_frame_alloc();
2993     if (!s->output_frame)
2994         goto fail;
2995
2996     for (i = 0; i < FF_ARRAY_ELEMS(s->DPB); i++) {
2997         s->DPB[i].frame = av_frame_alloc();
2998         if (!s->DPB[i].frame)
2999             goto fail;
3000         s->DPB[i].tf.f = s->DPB[i].frame;
3001     }
3002
3003     s->max_ra = INT_MAX;
3004
3005     s->md5_ctx = av_md5_alloc();
3006     if (!s->md5_ctx)
3007         goto fail;
3008
3009     ff_bswapdsp_init(&s->bdsp);
3010
3011     s->context_initialized = 1;
3012
3013     return 0;
3014
3015 fail:
3016     hevc_decode_free(avctx);
3017     return AVERROR(ENOMEM);
3018 }
3019
3020 static int hevc_update_thread_context(AVCodecContext *dst,
3021                                       const AVCodecContext *src)
3022 {
3023     HEVCContext *s  = dst->priv_data;
3024     HEVCContext *s0 = src->priv_data;
3025     int i, ret;
3026
3027     if (!s->context_initialized) {
3028         ret = hevc_init_context(dst);
3029         if (ret < 0)
3030             return ret;
3031     }
3032
3033     for (i = 0; i < FF_ARRAY_ELEMS(s->DPB); i++) {
3034         ff_hevc_unref_frame(s, &s->DPB[i], ~0);
3035         if (s0->DPB[i].frame->buf[0]) {
3036             ret = hevc_ref_frame(s, &s->DPB[i], &s0->DPB[i]);
3037             if (ret < 0)
3038                 return ret;
3039         }
3040     }
3041
3042     for (i = 0; i < FF_ARRAY_ELEMS(s->vps_list); i++) {
3043         av_buffer_unref(&s->vps_list[i]);
3044         if (s0->vps_list[i]) {
3045             s->vps_list[i] = av_buffer_ref(s0->vps_list[i]);
3046             if (!s->vps_list[i])
3047                 return AVERROR(ENOMEM);
3048         }
3049     }
3050
3051     for (i = 0; i < FF_ARRAY_ELEMS(s->sps_list); i++) {
3052         av_buffer_unref(&s->sps_list[i]);
3053         if (s0->sps_list[i]) {
3054             s->sps_list[i] = av_buffer_ref(s0->sps_list[i]);
3055             if (!s->sps_list[i])
3056                 return AVERROR(ENOMEM);
3057         }
3058     }
3059
3060     for (i = 0; i < FF_ARRAY_ELEMS(s->pps_list); i++) {
3061         av_buffer_unref(&s->pps_list[i]);
3062         if (s0->pps_list[i]) {
3063             s->pps_list[i] = av_buffer_ref(s0->pps_list[i]);
3064             if (!s->pps_list[i])
3065                 return AVERROR(ENOMEM);
3066         }
3067     }
3068
3069     if (s->sps != s0->sps)
3070         ret = set_sps(s, s0->sps);
3071
3072     s->seq_decode = s0->seq_decode;
3073     s->seq_output = s0->seq_output;
3074     s->pocTid0    = s0->pocTid0;
3075     s->max_ra     = s0->max_ra;
3076
3077     s->is_nalff        = s0->is_nalff;
3078     s->nal_length_size = s0->nal_length_size;
3079
3080     if (s0->eos) {
3081         s->seq_decode = (s->seq_decode + 1) & 0xff;
3082         s->max_ra = INT_MAX;
3083     }
3084
3085     return 0;
3086 }
3087
3088 static int hevc_decode_extradata(HEVCContext *s)
3089 {
3090     AVCodecContext *avctx = s->avctx;
3091     GetByteContext gb;
3092     int ret;
3093
3094     bytestream2_init(&gb, avctx->extradata, avctx->extradata_size);
3095
3096     if (avctx->extradata_size > 3 &&
3097         (avctx->extradata[0] || avctx->extradata[1] ||
3098          avctx->extradata[2] > 1)) {
3099         /* It seems the extradata is encoded as hvcC format.
3100          * Temporarily, we support configurationVersion==0 until 14496-15 3rd
3101          * is finalized. When finalized, configurationVersion will be 1 and we
3102          * can recognize hvcC by checking if avctx->extradata[0]==1 or not. */
3103         int i, j, num_arrays, nal_len_size;
3104
3105         s->is_nalff = 1;
3106
3107         bytestream2_skip(&gb, 21);
3108         nal_len_size = (bytestream2_get_byte(&gb) & 3) + 1;
3109         num_arrays   = bytestream2_get_byte(&gb);
3110
3111         /* nal units in the hvcC always have length coded with 2 bytes,
3112          * so put a fake nal_length_size = 2 while parsing them */
3113         s->nal_length_size = 2;
3114
3115         /* Decode nal units from hvcC. */
3116         for (i = 0; i < num_arrays; i++) {
3117             int type = bytestream2_get_byte(&gb) & 0x3f;
3118             int cnt  = bytestream2_get_be16(&gb);
3119
3120             for (j = 0; j < cnt; j++) {
3121                 // +2 for the nal size field
3122                 int nalsize = bytestream2_peek_be16(&gb) + 2;
3123                 if (bytestream2_get_bytes_left(&gb) < nalsize) {
3124                     av_log(s->avctx, AV_LOG_ERROR,
3125                            "Invalid NAL unit size in extradata.\n");
3126                     return AVERROR_INVALIDDATA;
3127                 }
3128
3129                 ret = decode_nal_units(s, gb.buffer, nalsize);
3130                 if (ret < 0) {
3131                     av_log(avctx, AV_LOG_ERROR,
3132                            "Decoding nal unit %d %d from hvcC failed\n",
3133                            type, i);
3134                     return ret;
3135                 }
3136                 bytestream2_skip(&gb, nalsize);
3137             }
3138         }
3139
3140         /* Now store right nal length size, that will be used to parse
3141          * all other nals */
3142         s->nal_length_size = nal_len_size;
3143     } else {
3144         s->is_nalff = 0;
3145         ret = decode_nal_units(s, avctx->extradata, avctx->extradata_size);
3146         if (ret < 0)
3147             return ret;
3148     }
3149     return 0;
3150 }
3151
3152 static av_cold int hevc_decode_init(AVCodecContext *avctx)
3153 {
3154     HEVCContext *s = avctx->priv_data;
3155     int ret;
3156
3157     ff_init_cabac_states();
3158
3159     avctx->internal->allocate_progress = 1;
3160
3161     ret = hevc_init_context(avctx);
3162     if (ret < 0)
3163         return ret;
3164
3165     if (avctx->extradata_size > 0 && avctx->extradata) {
3166         ret = hevc_decode_extradata(s);
3167         if (ret < 0) {
3168             hevc_decode_free(avctx);
3169             return ret;
3170         }
3171     }
3172
3173     return 0;
3174 }
3175
3176 static av_cold int hevc_init_thread_copy(AVCodecContext *avctx)
3177 {
3178     HEVCContext *s = avctx->priv_data;
3179     int ret;
3180
3181     memset(s, 0, sizeof(*s));
3182
3183     ret = hevc_init_context(avctx);
3184     if (ret < 0)
3185         return ret;
3186
3187     return 0;
3188 }
3189
3190 static void hevc_decode_flush(AVCodecContext *avctx)
3191 {
3192     HEVCContext *s = avctx->priv_data;
3193     ff_hevc_flush_dpb(s);
3194     s->max_ra = INT_MAX;
3195 }
3196
3197 #define OFFSET(x) offsetof(HEVCContext, x)
3198 #define PAR (AV_OPT_FLAG_DECODING_PARAM | AV_OPT_FLAG_VIDEO_PARAM)
3199
3200 static const AVProfile profiles[] = {
3201     { FF_PROFILE_HEVC_MAIN,                 "Main"                },
3202     { FF_PROFILE_HEVC_MAIN_10,              "Main 10"             },
3203     { FF_PROFILE_HEVC_MAIN_STILL_PICTURE,   "Main Still Picture"  },
3204     { FF_PROFILE_UNKNOWN },
3205 };
3206
3207 static const AVOption options[] = {
3208     { "apply_defdispwin", "Apply default display window from VUI", OFFSET(apply_defdispwin),
3209         AV_OPT_TYPE_INT, {.i64 = 0}, 0, 1, PAR },
3210     { NULL },
3211 };
3212
3213 static const AVClass hevc_decoder_class = {
3214     .class_name = "HEVC decoder",
3215     .item_name  = av_default_item_name,
3216     .option     = options,
3217     .version    = LIBAVUTIL_VERSION_INT,
3218 };
3219
3220 AVCodec ff_hevc_decoder = {
3221     .name                  = "hevc",
3222     .long_name             = NULL_IF_CONFIG_SMALL("HEVC (High Efficiency Video Coding)"),
3223     .type                  = AVMEDIA_TYPE_VIDEO,
3224     .id                    = AV_CODEC_ID_HEVC,
3225     .priv_data_size        = sizeof(HEVCContext),
3226     .priv_class            = &hevc_decoder_class,
3227     .init                  = hevc_decode_init,
3228     .close                 = hevc_decode_free,
3229     .decode                = hevc_decode_frame,
3230     .flush                 = hevc_decode_flush,
3231     .update_thread_context = hevc_update_thread_context,
3232     .init_thread_copy      = hevc_init_thread_copy,
3233     .capabilities          = CODEC_CAP_DR1 | CODEC_CAP_DELAY |
3234                              CODEC_CAP_FRAME_THREADS,
3235     .profiles              = NULL_IF_CONFIG_SMALL(profiles),
3236 };