git.sesse.net Git - ffmpeg/blob - libavcodec/hevc.c

   1 /*
   2  * HEVC video decoder
   3  *
   4  * Copyright (C) 2012 - 2013 Guillaume Martres
   5  * Copyright (C) 2012 - 2013 Mickael Raulet
   6  * Copyright (C) 2012 - 2013 Gildas Cocherel
   7  * Copyright (C) 2012 - 2013 Wassim Hamidouche
   8  *
   9  * This file is part of Libav.
  10  *
  11  * Libav is free software; you can redistribute it and/or
  12  * modify it under the terms of the GNU Lesser General Public
  13  * License as published by the Free Software Foundation; either
  14  * version 2.1 of the License, or (at your option) any later version.
  15  *
  16  * Libav is distributed in the hope that it will be useful,
  17  * but WITHOUT ANY WARRANTY; without even the implied warranty of
  18  * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
  19  * Lesser General Public License for more details.
  20  *
  21  * You should have received a copy of the GNU Lesser General Public
  22  * License along with Libav; if not, write to the Free Software
  23  * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
  24  */
  25
  26 #include "libavutil/attributes.h"
  27 #include "libavutil/common.h"
  28 #include "libavutil/display.h"
  29 #include "libavutil/internal.h"
  30 #include "libavutil/md5.h"
  31 #include "libavutil/opt.h"
  32 #include "libavutil/pixdesc.h"
  33 #include "libavutil/stereo3d.h"
  34
  35 #include "bswapdsp.h"
  36 #include "bytestream.h"
  37 #include "cabac_functions.h"
  38 #include "golomb.h"
  39 #include "hevc.h"
  40
  41 const uint8_t ff_hevc_qpel_extra_before[4] = { 0, 3, 3, 2 };
  42 const uint8_t ff_hevc_qpel_extra_after[4]  = { 0, 3, 4, 4 };
  43 const uint8_t ff_hevc_qpel_extra[4]        = { 0, 6, 7, 6 };
  44
  45 static const uint8_t scan_1x1[1] = { 0 };
  46
  47 static const uint8_t horiz_scan2x2_x[4] = { 0, 1, 0, 1 };
  48
  49 static const uint8_t horiz_scan2x2_y[4] = { 0, 0, 1, 1 };
  50
  51 static const uint8_t horiz_scan4x4_x[16] = {
  52     0, 1, 2, 3,
  53     0, 1, 2, 3,
  54     0, 1, 2, 3,
  55     0, 1, 2, 3,
  56 };
  57
  58 static const uint8_t horiz_scan4x4_y[16] = {
  59     0, 0, 0, 0,
  60     1, 1, 1, 1,
  61     2, 2, 2, 2,
  62     3, 3, 3, 3,
  63 };
  64
  65 static const uint8_t horiz_scan8x8_inv[8][8] = {
  66     {  0,  1,  2,  3, 16, 17, 18, 19, },
  67     {  4,  5,  6,  7, 20, 21, 22, 23, },
  68     {  8,  9, 10, 11, 24, 25, 26, 27, },
  69     { 12, 13, 14, 15, 28, 29, 30, 31, },
  70     { 32, 33, 34, 35, 48, 49, 50, 51, },
  71     { 36, 37, 38, 39, 52, 53, 54, 55, },
  72     { 40, 41, 42, 43, 56, 57, 58, 59, },
  73     { 44, 45, 46, 47, 60, 61, 62, 63, },
  74 };
  75
  76 static const uint8_t diag_scan2x2_x[4] = { 0, 0, 1, 1 };
  77
  78 static const uint8_t diag_scan2x2_y[4] = { 0, 1, 0, 1 };
  79
  80 static const uint8_t diag_scan2x2_inv[2][2] = {
  81     { 0, 2, },
  82     { 1, 3, },
  83 };
  84
  85 const uint8_t ff_hevc_diag_scan4x4_x[16] = {
  86     0, 0, 1, 0,
  87     1, 2, 0, 1,
  88     2, 3, 1, 2,
  89     3, 2, 3, 3,
  90 };
  91
  92 const uint8_t ff_hevc_diag_scan4x4_y[16] = {
  93     0, 1, 0, 2,
  94     1, 0, 3, 2,
  95     1, 0, 3, 2,
  96     1, 3, 2, 3,
  97 };
  98
  99 static const uint8_t diag_scan4x4_inv[4][4] = {
 100     { 0,  2,  5,  9, },
 101     { 1,  4,  8, 12, },
 102     { 3,  7, 11, 14, },
 103     { 6, 10, 13, 15, },
 104 };
 105
 106 const uint8_t ff_hevc_diag_scan8x8_x[64] = {
 107     0, 0, 1, 0,
 108     1, 2, 0, 1,
 109     2, 3, 0, 1,
 110     2, 3, 4, 0,
 111     1, 2, 3, 4,
 112     5, 0, 1, 2,
 113     3, 4, 5, 6,
 114     0, 1, 2, 3,
 115     4, 5, 6, 7,
 116     1, 2, 3, 4,
 117     5, 6, 7, 2,
 118     3, 4, 5, 6,
 119     7, 3, 4, 5,
 120     6, 7, 4, 5,
 121     6, 7, 5, 6,
 122     7, 6, 7, 7,
 123 };
 124
 125 const uint8_t ff_hevc_diag_scan8x8_y[64] = {
 126     0, 1, 0, 2,
 127     1, 0, 3, 2,
 128     1, 0, 4, 3,
 129     2, 1, 0, 5,
 130     4, 3, 2, 1,
 131     0, 6, 5, 4,
 132     3, 2, 1, 0,
 133     7, 6, 5, 4,
 134     3, 2, 1, 0,
 135     7, 6, 5, 4,
 136     3, 2, 1, 7,
 137     6, 5, 4, 3,
 138     2, 7, 6, 5,
 139     4, 3, 7, 6,
 140     5, 4, 7, 6,
 141     5, 7, 6, 7,
 142 };
 143
 144 static const uint8_t diag_scan8x8_inv[8][8] = {
 145     {  0,  2,  5,  9, 14, 20, 27, 35, },
 146     {  1,  4,  8, 13, 19, 26, 34, 42, },
 147     {  3,  7, 12, 18, 25, 33, 41, 48, },
 148     {  6, 11, 17, 24, 32, 40, 47, 53, },
 149     { 10, 16, 23, 31, 39, 46, 52, 57, },
 150     { 15, 22, 30, 38, 45, 51, 56, 60, },
 151     { 21, 29, 37, 44, 50, 55, 59, 62, },
 152     { 28, 36, 43, 49, 54, 58, 61, 63, },
 153 };
 154
 155 /**
 156  * NOTE: Each function hls_foo correspond to the function foo in the
 157  * specification (HLS stands for High Level Syntax).
 158  */
 159
 160 /**
 161  * Section 5.7
 162  */
 163
 164 /* free everything allocated  by pic_arrays_init() */
 165 static void pic_arrays_free(HEVCContext *s)
 166 {
 167     av_freep(&s->sao);
 168     av_freep(&s->deblock);
 169
 170     av_freep(&s->skip_flag);
 171     av_freep(&s->tab_ct_depth);
 172
 173     av_freep(&s->tab_ipm);
 174     av_freep(&s->cbf_luma);
 175     av_freep(&s->is_pcm);
 176
 177     av_freep(&s->qp_y_tab);
 178     av_freep(&s->tab_slice_address);
 179     av_freep(&s->filter_slice_edges);
 180
 181     av_freep(&s->horizontal_bs);
 182     av_freep(&s->vertical_bs);
 183
 184     av_buffer_pool_uninit(&s->tab_mvf_pool);
 185     av_buffer_pool_uninit(&s->rpl_tab_pool);
 186 }
 187
 188 /* allocate arrays that depend on frame dimensions */
 189 static int pic_arrays_init(HEVCContext *s, const HEVCSPS *sps)
 190 {
 191     int log2_min_cb_size = sps->log2_min_cb_size;
 192     int width            = sps->width;
 193     int height           = sps->height;
 194     int pic_size_in_ctb  = ((width  >> log2_min_cb_size) + 1) *
 195                            ((height >> log2_min_cb_size) + 1);
 196     int ctb_count        = sps->ctb_width * sps->ctb_height;
 197     int min_pu_size      = sps->min_pu_width * sps->min_pu_height;
 198
 199     s->bs_width  = width  >> 3;
 200     s->bs_height = height >> 3;
 201
 202     s->sao           = av_mallocz_array(ctb_count, sizeof(*s->sao));
 203     s->deblock       = av_mallocz_array(ctb_count, sizeof(*s->deblock));
 204     if (!s->sao || !s->deblock)
 205         goto fail;
 206
 207     s->skip_flag    = av_malloc(pic_size_in_ctb);
 208     s->tab_ct_depth = av_malloc(sps->min_cb_height * sps->min_cb_width);
 209     if (!s->skip_flag || !s->tab_ct_depth)
 210         goto fail;
 211
 212     s->cbf_luma = av_malloc(sps->min_tb_width * sps->min_tb_height);
 213     s->tab_ipm  = av_mallocz(min_pu_size);
 214     s->is_pcm   = av_malloc(min_pu_size);
 215     if (!s->tab_ipm || !s->cbf_luma || !s->is_pcm)
 216         goto fail;
 217
 218     s->filter_slice_edges = av_malloc(ctb_count);
 219     s->tab_slice_address  = av_malloc(pic_size_in_ctb *
 220                                       sizeof(*s->tab_slice_address));
 221     s->qp_y_tab           = av_malloc(pic_size_in_ctb *
 222                                       sizeof(*s->qp_y_tab));
 223     if (!s->qp_y_tab || !s->filter_slice_edges || !s->tab_slice_address)
 224         goto fail;
 225
 226     s->horizontal_bs = av_mallocz(2 * s->bs_width * (s->bs_height + 1));
 227     s->vertical_bs   = av_mallocz(2 * s->bs_width * (s->bs_height + 1));
 228     if (!s->horizontal_bs || !s->vertical_bs)
 229         goto fail;
 230
 231     s->tab_mvf_pool = av_buffer_pool_init(min_pu_size * sizeof(MvField),
 232                                           av_buffer_alloc);
 233     s->rpl_tab_pool = av_buffer_pool_init(ctb_count * sizeof(RefPicListTab),
 234                                           av_buffer_allocz);
 235     if (!s->tab_mvf_pool || !s->rpl_tab_pool)
 236         goto fail;
 237
 238     return 0;
 239
 240 fail:
 241     pic_arrays_free(s);
 242     return AVERROR(ENOMEM);
 243 }
 244
 245 static void pred_weight_table(HEVCContext *s, GetBitContext *gb)
 246 {
 247     int i = 0;
 248     int j = 0;
 249     uint8_t luma_weight_l0_flag[16];
 250     uint8_t chroma_weight_l0_flag[16];
 251     uint8_t luma_weight_l1_flag[16];
 252     uint8_t chroma_weight_l1_flag[16];
 253
 254     s->sh.luma_log2_weight_denom = av_clip_c(get_ue_golomb_long(gb), 0, 7);
 255     if (s->sps->chroma_format_idc != 0) {
 256         int delta = get_se_golomb(gb);
 257         s->sh.chroma_log2_weight_denom = av_clip_c(s->sh.luma_log2_weight_denom + delta, 0, 7);
 258     }
 259
 260     for (i = 0; i < s->sh.nb_refs[L0]; i++) {
 261         luma_weight_l0_flag[i] = get_bits1(gb);
 262         if (!luma_weight_l0_flag[i]) {
 263             s->sh.luma_weight_l0[i] = 1 << s->sh.luma_log2_weight_denom;
 264             s->sh.luma_offset_l0[i] = 0;
 265         }
 266     }
 267     if (s->sps->chroma_format_idc != 0) { // FIXME: invert "if" and "for"
 268         for (i = 0; i < s->sh.nb_refs[L0]; i++)
 269             chroma_weight_l0_flag[i] = get_bits1(gb);
 270     } else {
 271         for (i = 0; i < s->sh.nb_refs[L0]; i++)
 272             chroma_weight_l0_flag[i] = 0;
 273     }
 274     for (i = 0; i < s->sh.nb_refs[L0]; i++) {
 275         if (luma_weight_l0_flag[i]) {
 276             int delta_luma_weight_l0 = get_se_golomb(gb);
 277             s->sh.luma_weight_l0[i] = (1 << s->sh.luma_log2_weight_denom) + delta_luma_weight_l0;
 278             s->sh.luma_offset_l0[i] = get_se_golomb(gb);
 279         }
 280         if (chroma_weight_l0_flag[i]) {
 281             for (j = 0; j < 2; j++) {
 282                 int delta_chroma_weight_l0 = get_se_golomb(gb);
 283                 int delta_chroma_offset_l0 = get_se_golomb(gb);
 284                 s->sh.chroma_weight_l0[i][j] = (1 << s->sh.chroma_log2_weight_denom) + delta_chroma_weight_l0;
 285                 s->sh.chroma_offset_l0[i][j] = av_clip_c((delta_chroma_offset_l0 - ((128 * s->sh.chroma_weight_l0[i][j])
 286                                                                                     >> s->sh.chroma_log2_weight_denom) + 128), -128, 127);
 287             }
 288         } else {
 289             s->sh.chroma_weight_l0[i][0] = 1 << s->sh.chroma_log2_weight_denom;
 290             s->sh.chroma_offset_l0[i][0] = 0;
 291             s->sh.chroma_weight_l0[i][1] = 1 << s->sh.chroma_log2_weight_denom;
 292             s->sh.chroma_offset_l0[i][1] = 0;
 293         }
 294     }
 295     if (s->sh.slice_type == B_SLICE) {
 296         for (i = 0; i < s->sh.nb_refs[L1]; i++) {
 297             luma_weight_l1_flag[i] = get_bits1(gb);
 298             if (!luma_weight_l1_flag[i]) {
 299                 s->sh.luma_weight_l1[i] = 1 << s->sh.luma_log2_weight_denom;
 300                 s->sh.luma_offset_l1[i] = 0;
 301             }
 302         }
 303         if (s->sps->chroma_format_idc != 0) {
 304             for (i = 0; i < s->sh.nb_refs[L1]; i++)
 305                 chroma_weight_l1_flag[i] = get_bits1(gb);
 306         } else {
 307             for (i = 0; i < s->sh.nb_refs[L1]; i++)
 308                 chroma_weight_l1_flag[i] = 0;
 309         }
 310         for (i = 0; i < s->sh.nb_refs[L1]; i++) {
 311             if (luma_weight_l1_flag[i]) {
 312                 int delta_luma_weight_l1 = get_se_golomb(gb);
 313                 s->sh.luma_weight_l1[i] = (1 << s->sh.luma_log2_weight_denom) + delta_luma_weight_l1;
 314                 s->sh.luma_offset_l1[i] = get_se_golomb(gb);
 315             }
 316             if (chroma_weight_l1_flag[i]) {
 317                 for (j = 0; j < 2; j++) {
 318                     int delta_chroma_weight_l1 = get_se_golomb(gb);
 319                     int delta_chroma_offset_l1 = get_se_golomb(gb);
 320                     s->sh.chroma_weight_l1[i][j] = (1 << s->sh.chroma_log2_weight_denom) + delta_chroma_weight_l1;
 321                     s->sh.chroma_offset_l1[i][j] = av_clip_c((delta_chroma_offset_l1 - ((128 * s->sh.chroma_weight_l1[i][j])
 322                                                                                         >> s->sh.chroma_log2_weight_denom) + 128), -128, 127);
 323                 }
 324             } else {
 325                 s->sh.chroma_weight_l1[i][0] = 1 << s->sh.chroma_log2_weight_denom;
 326                 s->sh.chroma_offset_l1[i][0] = 0;
 327                 s->sh.chroma_weight_l1[i][1] = 1 << s->sh.chroma_log2_weight_denom;
 328                 s->sh.chroma_offset_l1[i][1] = 0;
 329             }
 330         }
 331     }
 332 }
 333
 334 static int decode_lt_rps(HEVCContext *s, LongTermRPS *rps, GetBitContext *gb)
 335 {
 336     const HEVCSPS *sps = s->sps;
 337     int max_poc_lsb    = 1 << sps->log2_max_poc_lsb;
 338     int prev_delta_msb = 0;
 339     unsigned int nb_sps = 0, nb_sh;
 340     int i;
 341
 342     rps->nb_refs = 0;
 343     if (!sps->long_term_ref_pics_present_flag)
 344         return 0;
 345
 346     if (sps->num_long_term_ref_pics_sps > 0)
 347         nb_sps = get_ue_golomb_long(gb);
 348     nb_sh = get_ue_golomb_long(gb);
 349
 350     if (nb_sh + nb_sps > FF_ARRAY_ELEMS(rps->poc))
 351         return AVERROR_INVALIDDATA;
 352
 353     rps->nb_refs = nb_sh + nb_sps;
 354
 355     for (i = 0; i < rps->nb_refs; i++) {
 356         uint8_t delta_poc_msb_present;
 357
 358         if (i < nb_sps) {
 359             uint8_t lt_idx_sps = 0;
 360
 361             if (sps->num_long_term_ref_pics_sps > 1)
 362                 lt_idx_sps = get_bits(gb, av_ceil_log2(sps->num_long_term_ref_pics_sps));
 363
 364             rps->poc[i]  = sps->lt_ref_pic_poc_lsb_sps[lt_idx_sps];
 365             rps->used[i] = sps->used_by_curr_pic_lt_sps_flag[lt_idx_sps];
 366         } else {
 367             rps->poc[i]  = get_bits(gb, sps->log2_max_poc_lsb);
 368             rps->used[i] = get_bits1(gb);
 369         }
 370
 371         delta_poc_msb_present = get_bits1(gb);
 372         if (delta_poc_msb_present) {
 373             int delta = get_ue_golomb_long(gb);
 374
 375             if (i && i != nb_sps)
 376                 delta += prev_delta_msb;
 377
 378             rps->poc[i] += s->poc - delta * max_poc_lsb - s->sh.pic_order_cnt_lsb;
 379             prev_delta_msb = delta;
 380         }
 381     }
 382
 383     return 0;
 384 }
 385
 386 static int set_sps(HEVCContext *s, const HEVCSPS *sps)
 387 {
 388     int ret;
 389     unsigned int num = 0, den = 0;
 390
 391     pic_arrays_free(s);
 392     ret = pic_arrays_init(s, sps);
 393     if (ret < 0)
 394         goto fail;
 395
 396     s->avctx->coded_width         = sps->width;
 397     s->avctx->coded_height        = sps->height;
 398     s->avctx->width               = sps->output_width;
 399     s->avctx->height              = sps->output_height;
 400     s->avctx->pix_fmt             = sps->pix_fmt;
 401     s->avctx->has_b_frames        = sps->temporal_layer[sps->max_sub_layers - 1].num_reorder_pics;
 402
 403     ff_set_sar(s->avctx, sps->vui.sar);
 404
 405     if (sps->vui.video_signal_type_present_flag)
 406         s->avctx->color_range = sps->vui.video_full_range_flag ? AVCOL_RANGE_JPEG
 407                                                                : AVCOL_RANGE_MPEG;
 408     else
 409         s->avctx->color_range = AVCOL_RANGE_MPEG;
 410
 411     if (sps->vui.colour_description_present_flag) {
 412         s->avctx->color_primaries = sps->vui.colour_primaries;
 413         s->avctx->color_trc       = sps->vui.transfer_characteristic;
 414         s->avctx->colorspace      = sps->vui.matrix_coeffs;
 415     } else {
 416         s->avctx->color_primaries = AVCOL_PRI_UNSPECIFIED;
 417         s->avctx->color_trc       = AVCOL_TRC_UNSPECIFIED;
 418         s->avctx->colorspace      = AVCOL_SPC_UNSPECIFIED;
 419     }
 420
 421     ff_hevc_pred_init(&s->hpc,     sps->bit_depth);
 422     ff_hevc_dsp_init (&s->hevcdsp, sps->bit_depth);
 423     ff_videodsp_init (&s->vdsp,    sps->bit_depth);
 424
 425     if (sps->sao_enabled) {
 426         av_frame_unref(s->tmp_frame);
 427         ret = ff_get_buffer(s->avctx, s->tmp_frame, AV_GET_BUFFER_FLAG_REF);
 428         if (ret < 0)
 429             goto fail;
 430         s->frame = s->tmp_frame;
 431     }
 432
 433     s->sps = sps;
 434     s->vps = (HEVCVPS*) s->vps_list[s->sps->vps_id]->data;
 435
 436     if (s->vps->vps_timing_info_present_flag) {
 437         num = s->vps->vps_num_units_in_tick;
 438         den = s->vps->vps_time_scale;
 439     } else if (sps->vui.vui_timing_info_present_flag) {
 440         num = sps->vui.vui_num_units_in_tick;
 441         den = sps->vui.vui_time_scale;
 442     }
 443
 444     if (num != 0 && den != 0)
 445         av_reduce(&s->avctx->framerate.den, &s->avctx->framerate.num,
 446                   num, den, 1 << 30);
 447
 448     return 0;
 449
 450 fail:
 451     pic_arrays_free(s);
 452     s->sps = NULL;
 453     return ret;
 454 }
 455
 456 static int hls_slice_header(HEVCContext *s)
 457 {
 458     GetBitContext *gb = &s->HEVClc.gb;
 459     SliceHeader *sh   = &s->sh;
 460     int i, ret;
 461
 462     // Coded parameters
 463     sh->first_slice_in_pic_flag = get_bits1(gb);
 464     if ((IS_IDR(s) || IS_BLA(s)) && sh->first_slice_in_pic_flag) {
 465         s->seq_decode = (s->seq_decode + 1) & 0xff;
 466         s->max_ra     = INT_MAX;
 467         if (IS_IDR(s))
 468             ff_hevc_clear_refs(s);
 469     }
 470     if (IS_IRAP(s))
 471         sh->no_output_of_prior_pics_flag = get_bits1(gb);
 472
 473     sh->pps_id = get_ue_golomb_long(gb);
 474     if (sh->pps_id >= MAX_PPS_COUNT || !s->pps_list[sh->pps_id]) {
 475         av_log(s->avctx, AV_LOG_ERROR, "PPS id out of range: %d\n", sh->pps_id);
 476         return AVERROR_INVALIDDATA;
 477     }
 478     if (!sh->first_slice_in_pic_flag &&
 479         s->pps != (HEVCPPS*)s->pps_list[sh->pps_id]->data) {
 480         av_log(s->avctx, AV_LOG_ERROR, "PPS changed between slices.\n");
 481         return AVERROR_INVALIDDATA;
 482     }
 483     s->pps = (HEVCPPS*)s->pps_list[sh->pps_id]->data;
 484
 485     if (s->sps != (HEVCSPS*)s->sps_list[s->pps->sps_id]->data) {
 486         s->sps = (HEVCSPS*)s->sps_list[s->pps->sps_id]->data;
 487
 488         ff_hevc_clear_refs(s);
 489         ret = set_sps(s, s->sps);
 490         if (ret < 0)
 491             return ret;
 492
 493         s->seq_decode = (s->seq_decode + 1) & 0xff;
 494         s->max_ra     = INT_MAX;
 495     }
 496
 497     s->avctx->profile = s->sps->ptl.general_ptl.profile_idc;
 498     s->avctx->level   = s->sps->ptl.general_ptl.level_idc;
 499
 500     sh->dependent_slice_segment_flag = 0;
 501     if (!sh->first_slice_in_pic_flag) {
 502         int slice_address_length;
 503
 504         if (s->pps->dependent_slice_segments_enabled_flag)
 505             sh->dependent_slice_segment_flag = get_bits1(gb);
 506
 507         slice_address_length = av_ceil_log2(s->sps->ctb_width *
 508                                             s->sps->ctb_height);
 509         sh->slice_segment_addr = get_bits(gb, slice_address_length);
 510         if (sh->slice_segment_addr >= s->sps->ctb_width * s->sps->ctb_height) {
 511             av_log(s->avctx, AV_LOG_ERROR,
 512                    "Invalid slice segment address: %u.\n",
 513                    sh->slice_segment_addr);
 514             return AVERROR_INVALIDDATA;
 515         }
 516
 517         if (!sh->dependent_slice_segment_flag) {
 518             sh->slice_addr = sh->slice_segment_addr;
 519             s->slice_idx++;
 520         }
 521     } else {
 522         sh->slice_segment_addr = sh->slice_addr = 0;
 523         s->slice_idx           = 0;
 524         s->slice_initialized   = 0;
 525     }
 526
 527     if (!sh->dependent_slice_segment_flag) {
 528         s->slice_initialized = 0;
 529
 530         for (i = 0; i < s->pps->num_extra_slice_header_bits; i++)
 531             skip_bits(gb, 1);  // slice_reserved_undetermined_flag[]
 532
 533         sh->slice_type = get_ue_golomb_long(gb);
 534         if (!(sh->slice_type == I_SLICE ||
 535               sh->slice_type == P_SLICE ||
 536               sh->slice_type == B_SLICE)) {
 537             av_log(s->avctx, AV_LOG_ERROR, "Unknown slice type: %d.\n",
 538                    sh->slice_type);
 539             return AVERROR_INVALIDDATA;
 540         }
 541         if (IS_IRAP(s) && sh->slice_type != I_SLICE) {
 542             av_log(s->avctx, AV_LOG_ERROR, "Inter slices in an IRAP frame.\n");
 543             return AVERROR_INVALIDDATA;
 544         }
 545
 546         // when flag is not present, picture is inferred to be output
 547         sh->pic_output_flag = 1;
 548         if (s->pps->output_flag_present_flag)
 549             sh->pic_output_flag = get_bits1(gb);
 550
 551         if (s->sps->separate_colour_plane_flag)
 552             sh->colour_plane_id = get_bits(gb, 2);
 553
 554         if (!IS_IDR(s)) {
 555             int poc;
 556
 557             sh->pic_order_cnt_lsb = get_bits(gb, s->sps->log2_max_poc_lsb);
 558             poc = ff_hevc_compute_poc(s, sh->pic_order_cnt_lsb);
 559             if (!sh->first_slice_in_pic_flag && poc != s->poc) {
 560                 av_log(s->avctx, AV_LOG_WARNING,
 561                        "Ignoring POC change between slices: %d -> %d\n", s->poc, poc);
 562                 if (s->avctx->err_recognition & AV_EF_EXPLODE)
 563                     return AVERROR_INVALIDDATA;
 564                 poc = s->poc;
 565             }
 566             s->poc = poc;
 567
 568             sh->short_term_ref_pic_set_sps_flag = get_bits1(gb);
 569             if (!sh->short_term_ref_pic_set_sps_flag) {
 570                 int pos = get_bits_left(gb);
 571                 ret = ff_hevc_decode_short_term_rps(s, &sh->slice_rps, s->sps, 1);
 572                 if (ret < 0)
 573                     return ret;
 574
 575                 sh->short_term_ref_pic_set_size = pos - get_bits_left(gb);
 576                 sh->short_term_rps = &sh->slice_rps;
 577             } else {
 578                 int numbits, rps_idx;
 579
 580                 if (!s->sps->nb_st_rps) {
 581                     av_log(s->avctx, AV_LOG_ERROR, "No ref lists in the SPS.\n");
 582                     return AVERROR_INVALIDDATA;
 583                 }
 584
 585                 numbits = av_ceil_log2(s->sps->nb_st_rps);
 586                 rps_idx = numbits > 0 ? get_bits(gb, numbits) : 0;
 587                 sh->short_term_rps = &s->sps->st_rps[rps_idx];
 588             }
 589
 590             ret = decode_lt_rps(s, &sh->long_term_rps, gb);
 591             if (ret < 0) {
 592                 av_log(s->avctx, AV_LOG_WARNING, "Invalid long term RPS.\n");
 593                 if (s->avctx->err_recognition & AV_EF_EXPLODE)
 594                     return AVERROR_INVALIDDATA;
 595             }
 596
 597             if (s->sps->sps_temporal_mvp_enabled_flag)
 598                 sh->slice_temporal_mvp_enabled_flag = get_bits1(gb);
 599             else
 600                 sh->slice_temporal_mvp_enabled_flag = 0;
 601         } else {
 602             s->sh.short_term_rps = NULL;
 603             s->poc               = 0;
 604         }
 605
 606         /* 8.3.1 */
 607         if (s->temporal_id == 0 &&
 608             s->nal_unit_type != NAL_TRAIL_N &&
 609             s->nal_unit_type != NAL_TSA_N   &&
 610             s->nal_unit_type != NAL_STSA_N  &&
 611             s->nal_unit_type != NAL_RADL_N  &&
 612             s->nal_unit_type != NAL_RADL_R  &&
 613             s->nal_unit_type != NAL_RASL_N  &&
 614             s->nal_unit_type != NAL_RASL_R)
 615             s->pocTid0 = s->poc;
 616
 617         if (s->sps->sao_enabled) {
 618             sh->slice_sample_adaptive_offset_flag[0] = get_bits1(gb);
 619             sh->slice_sample_adaptive_offset_flag[1] =
 620             sh->slice_sample_adaptive_offset_flag[2] = get_bits1(gb);
 621         } else {
 622             sh->slice_sample_adaptive_offset_flag[0] = 0;
 623             sh->slice_sample_adaptive_offset_flag[1] = 0;
 624             sh->slice_sample_adaptive_offset_flag[2] = 0;
 625         }
 626
 627         sh->nb_refs[L0] = sh->nb_refs[L1] = 0;
 628         if (sh->slice_type == P_SLICE || sh->slice_type == B_SLICE) {
 629             int nb_refs;
 630
 631             sh->nb_refs[L0] = s->pps->num_ref_idx_l0_default_active;
 632             if (sh->slice_type == B_SLICE)
 633                 sh->nb_refs[L1] = s->pps->num_ref_idx_l1_default_active;
 634
 635             if (get_bits1(gb)) { // num_ref_idx_active_override_flag
 636                 sh->nb_refs[L0] = get_ue_golomb_long(gb) + 1;
 637                 if (sh->slice_type == B_SLICE)
 638                     sh->nb_refs[L1] = get_ue_golomb_long(gb) + 1;
 639             }
 640             if (sh->nb_refs[L0] > MAX_REFS || sh->nb_refs[L1] > MAX_REFS) {
 641                 av_log(s->avctx, AV_LOG_ERROR, "Too many refs: %d/%d.\n",
 642                        sh->nb_refs[L0], sh->nb_refs[L1]);
 643                 return AVERROR_INVALIDDATA;
 644             }
 645
 646             sh->rpl_modification_flag[0] = 0;
 647             sh->rpl_modification_flag[1] = 0;
 648             nb_refs = ff_hevc_frame_nb_refs(s);
 649             if (!nb_refs) {
 650                 av_log(s->avctx, AV_LOG_ERROR, "Zero refs for a frame with P or B slices.\n");
 651                 return AVERROR_INVALIDDATA;
 652             }
 653
 654             if (s->pps->lists_modification_present_flag && nb_refs > 1) {
 655                 sh->rpl_modification_flag[0] = get_bits1(gb);
 656                 if (sh->rpl_modification_flag[0]) {
 657                     for (i = 0; i < sh->nb_refs[L0]; i++)
 658                         sh->list_entry_lx[0][i] = get_bits(gb, av_ceil_log2(nb_refs));
 659                 }
 660
 661                 if (sh->slice_type == B_SLICE) {
 662                     sh->rpl_modification_flag[1] = get_bits1(gb);
 663                     if (sh->rpl_modification_flag[1] == 1)
 664                         for (i = 0; i < sh->nb_refs[L1]; i++)
 665                             sh->list_entry_lx[1][i] = get_bits(gb, av_ceil_log2(nb_refs));
 666                 }
 667             }
 668
 669             if (sh->slice_type == B_SLICE)
 670                 sh->mvd_l1_zero_flag = get_bits1(gb);
 671
 672             if (s->pps->cabac_init_present_flag)
 673                 sh->cabac_init_flag = get_bits1(gb);
 674             else
 675                 sh->cabac_init_flag = 0;
 676
 677             sh->collocated_ref_idx = 0;
 678             if (sh->slice_temporal_mvp_enabled_flag) {
 679                 sh->collocated_list = L0;
 680                 if (sh->slice_type == B_SLICE)
 681                     sh->collocated_list = !get_bits1(gb);
 682
 683                 if (sh->nb_refs[sh->collocated_list] > 1) {
 684                     sh->collocated_ref_idx = get_ue_golomb_long(gb);
 685                     if (sh->collocated_ref_idx >= sh->nb_refs[sh->collocated_list]) {
 686                         av_log(s->avctx, AV_LOG_ERROR,
 687                                "Invalid collocated_ref_idx: %d.\n",
 688                                sh->collocated_ref_idx);
 689                         return AVERROR_INVALIDDATA;
 690                     }
 691                 }
 692             }
 693
 694             if ((s->pps->weighted_pred_flag   && sh->slice_type == P_SLICE) ||
 695                 (s->pps->weighted_bipred_flag && sh->slice_type == B_SLICE)) {
 696                 pred_weight_table(s, gb);
 697             }
 698
 699             sh->max_num_merge_cand = 5 - get_ue_golomb_long(gb);
 700             if (sh->max_num_merge_cand < 1 || sh->max_num_merge_cand > 5) {
 701                 av_log(s->avctx, AV_LOG_ERROR,
 702                        "Invalid number of merging MVP candidates: %d.\n",
 703                        sh->max_num_merge_cand);
 704                 return AVERROR_INVALIDDATA;
 705             }
 706         }
 707
 708         sh->slice_qp_delta = get_se_golomb(gb);
 709
 710         if (s->pps->pic_slice_level_chroma_qp_offsets_present_flag) {
 711             sh->slice_cb_qp_offset = get_se_golomb(gb);
 712             sh->slice_cr_qp_offset = get_se_golomb(gb);
 713         } else {
 714             sh->slice_cb_qp_offset = 0;
 715             sh->slice_cr_qp_offset = 0;
 716         }
 717
 718         if (s->pps->deblocking_filter_control_present_flag) {
 719             int deblocking_filter_override_flag = 0;
 720
 721             if (s->pps->deblocking_filter_override_enabled_flag)
 722                 deblocking_filter_override_flag = get_bits1(gb);
 723
 724             if (deblocking_filter_override_flag) {
 725                 sh->disable_deblocking_filter_flag = get_bits1(gb);
 726                 if (!sh->disable_deblocking_filter_flag) {
 727                     sh->beta_offset = get_se_golomb(gb) * 2;
 728                     sh->tc_offset   = get_se_golomb(gb) * 2;
 729                 }
 730             } else {
 731                 sh->disable_deblocking_filter_flag = s->pps->disable_dbf;
 732                 sh->beta_offset                    = s->pps->beta_offset;
 733                 sh->tc_offset                      = s->pps->tc_offset;
 734             }
 735         } else {
 736             sh->disable_deblocking_filter_flag = 0;
 737             sh->beta_offset                    = 0;
 738             sh->tc_offset                      = 0;
 739         }
 740
 741         if (s->pps->seq_loop_filter_across_slices_enabled_flag &&
 742             (sh->slice_sample_adaptive_offset_flag[0] ||
 743              sh->slice_sample_adaptive_offset_flag[1] ||
 744              !sh->disable_deblocking_filter_flag)) {
 745             sh->slice_loop_filter_across_slices_enabled_flag = get_bits1(gb);
 746         } else {
 747             sh->slice_loop_filter_across_slices_enabled_flag = s->pps->seq_loop_filter_across_slices_enabled_flag;
 748         }
 749     } else if (!s->slice_initialized) {
 750         av_log(s->avctx, AV_LOG_ERROR, "Independent slice segment missing.\n");
 751         return AVERROR_INVALIDDATA;
 752     }
 753
 754     sh->num_entry_point_offsets = 0;
 755     if (s->pps->tiles_enabled_flag || s->pps->entropy_coding_sync_enabled_flag) {
 756         sh->num_entry_point_offsets = get_ue_golomb_long(gb);
 757         if (sh->num_entry_point_offsets > 0) {
 758             int offset_len = get_ue_golomb_long(gb) + 1;
 759
 760             for (i = 0; i < sh->num_entry_point_offsets; i++)
 761                 skip_bits(gb, offset_len);
 762         }
 763     }
 764
 765     if (s->pps->slice_header_extension_present_flag) {
 766         unsigned int length = get_ue_golomb_long(gb);
 767         for (i = 0; i < length; i++)
 768             skip_bits(gb, 8);  // slice_header_extension_data_byte
 769     }
 770
 771     // Inferred parameters
 772     sh->slice_qp = 26 + s->pps->pic_init_qp_minus26 + sh->slice_qp_delta;
 773     if (sh->slice_qp > 51 ||
 774         sh->slice_qp < -s->sps->qp_bd_offset) {
 775         av_log(s->avctx, AV_LOG_ERROR,
 776                "The slice_qp %d is outside the valid range "
 777                "[%d, 51].\n",
 778                sh->slice_qp,
 779                -s->sps->qp_bd_offset);
 780         return AVERROR_INVALIDDATA;
 781     }
 782
 783     sh->slice_ctb_addr_rs = sh->slice_segment_addr;
 784
 785     if (!s->sh.slice_ctb_addr_rs && s->sh.dependent_slice_segment_flag) {
 786         av_log(s->avctx, AV_LOG_ERROR, "Impossible slice segment.\n");
 787         return AVERROR_INVALIDDATA;
 788     }
 789
 790     s->HEVClc.first_qp_group = !s->sh.dependent_slice_segment_flag;
 791
 792     if (!s->pps->cu_qp_delta_enabled_flag)
 793         s->HEVClc.qp_y = FFUMOD(s->sh.slice_qp + 52 + 2 * s->sps->qp_bd_offset,
 794                                 52 + s->sps->qp_bd_offset) - s->sps->qp_bd_offset;
 795
 796     s->slice_initialized = 1;
 797
 798     return 0;
 799 }
 800
 801 #define CTB(tab, x, y) ((tab)[(y) * s->sps->ctb_width + (x)])
 802
 803 #define SET_SAO(elem, value)                            \
 804 do {                                                    \
 805     if (!sao_merge_up_flag && !sao_merge_left_flag)     \
 806         sao->elem = value;                              \
 807     else if (sao_merge_left_flag)                       \
 808         sao->elem = CTB(s->sao, rx-1, ry).elem;         \
 809     else if (sao_merge_up_flag)                         \
 810         sao->elem = CTB(s->sao, rx, ry-1).elem;         \
 811     else                                                \
 812         sao->elem = 0;                                  \
 813 } while (0)
 814
 815 static void hls_sao_param(HEVCContext *s, int rx, int ry)
 816 {
 817     HEVCLocalContext *lc    = &s->HEVClc;
 818     int sao_merge_left_flag = 0;
 819     int sao_merge_up_flag   = 0;
 820     int shift               = s->sps->bit_depth - FFMIN(s->sps->bit_depth, 10);
 821     SAOParams *sao          = &CTB(s->sao, rx, ry);
 822     int c_idx, i;
 823
 824     if (s->sh.slice_sample_adaptive_offset_flag[0] ||
 825         s->sh.slice_sample_adaptive_offset_flag[1]) {
 826         if (rx > 0) {
 827             if (lc->ctb_left_flag)
 828                 sao_merge_left_flag = ff_hevc_sao_merge_flag_decode(s);
 829         }
 830         if (ry > 0 && !sao_merge_left_flag) {
 831             if (lc->ctb_up_flag)
 832                 sao_merge_up_flag = ff_hevc_sao_merge_flag_decode(s);
 833         }
 834     }
 835
 836     for (c_idx = 0; c_idx < 3; c_idx++) {
 837         if (!s->sh.slice_sample_adaptive_offset_flag[c_idx]) {
 838             sao->type_idx[c_idx] = SAO_NOT_APPLIED;
 839             continue;
 840         }
 841
 842         if (c_idx == 2) {
 843             sao->type_idx[2] = sao->type_idx[1];
 844             sao->eo_class[2] = sao->eo_class[1];
 845         } else {
 846             SET_SAO(type_idx[c_idx], ff_hevc_sao_type_idx_decode(s));
 847         }
 848
 849         if (sao->type_idx[c_idx] == SAO_NOT_APPLIED)
 850             continue;
 851
 852         for (i = 0; i < 4; i++)
 853             SET_SAO(offset_abs[c_idx][i], ff_hevc_sao_offset_abs_decode(s));
 854
 855         if (sao->type_idx[c_idx] == SAO_BAND) {
 856             for (i = 0; i < 4; i++) {
 857                 if (sao->offset_abs[c_idx][i]) {
 858                     SET_SAO(offset_sign[c_idx][i],
 859                             ff_hevc_sao_offset_sign_decode(s));
 860                 } else {
 861                     sao->offset_sign[c_idx][i] = 0;
 862                 }
 863             }
 864             SET_SAO(band_position[c_idx], ff_hevc_sao_band_position_decode(s));
 865         } else if (c_idx != 2) {
 866             SET_SAO(eo_class[c_idx], ff_hevc_sao_eo_class_decode(s));
 867         }
 868
 869         // Inferred parameters
 870         sao->offset_val[c_idx][0] = 0;
 871         for (i = 0; i < 4; i++) {
 872             sao->offset_val[c_idx][i + 1] = sao->offset_abs[c_idx][i] << shift;
 873             if (sao->type_idx[c_idx] == SAO_EDGE) {
 874                 if (i > 1)
 875                     sao->offset_val[c_idx][i + 1] = -sao->offset_val[c_idx][i + 1];
 876             } else if (sao->offset_sign[c_idx][i]) {
 877                 sao->offset_val[c_idx][i + 1] = -sao->offset_val[c_idx][i + 1];
 878             }
 879         }
 880     }
 881 }
 882
 883 #undef SET_SAO
 884 #undef CTB
 885
 886 static void hls_residual_coding(HEVCContext *s, int x0, int y0,
 887                                 int log2_trafo_size, enum ScanType scan_idx,
 888                                 int c_idx)
 889 {
 890 #define GET_COORD(offset, n)                                    \
 891     do {                                                        \
 892         x_c = (scan_x_cg[offset >> 4] << 2) + scan_x_off[n];    \
 893         y_c = (scan_y_cg[offset >> 4] << 2) + scan_y_off[n];    \
 894     } while (0)
 895     HEVCLocalContext *lc    = &s->HEVClc;
 896     int transform_skip_flag = 0;
 897
 898     int last_significant_coeff_x, last_significant_coeff_y;
 899     int last_scan_pos;
 900     int n_end;
 901     int num_coeff    = 0;
 902     int greater1_ctx = 1;
 903
 904     int num_last_subset;
 905     int x_cg_last_sig, y_cg_last_sig;
 906
 907     const uint8_t *scan_x_cg, *scan_y_cg, *scan_x_off, *scan_y_off;
 908
 909     ptrdiff_t stride = s->frame->linesize[c_idx];
 910     int hshift       = s->sps->hshift[c_idx];
 911     int vshift       = s->sps->vshift[c_idx];
 912     uint8_t *dst     = &s->frame->data[c_idx][(y0 >> vshift) * stride +
 913                                               ((x0 >> hshift) << s->sps->pixel_shift)];
 914     DECLARE_ALIGNED(16, int16_t, coeffs[MAX_TB_SIZE * MAX_TB_SIZE]) = { 0 };
 915     DECLARE_ALIGNED(8, uint8_t, significant_coeff_group_flag[8][8]) = { { 0 } };
 916
 917     int trafo_size = 1 << log2_trafo_size;
 918     int i, qp, shift, add, scale, scale_m;
 919     const uint8_t level_scale[] = { 40, 45, 51, 57, 64, 72 };
 920     const uint8_t *scale_matrix;
 921     uint8_t dc_scale;
 922
 923     // Derive QP for dequant
 924     if (!lc->cu.cu_transquant_bypass_flag) {
 925         static const int qp_c[] = {
 926             29, 30, 31, 32, 33, 33, 34, 34, 35, 35, 36, 36, 37, 37
 927         };
 928
 929         static const uint8_t rem6[51 + 2 * 6 + 1] = {
 930             0, 1, 2, 3, 4, 5, 0, 1, 2, 3, 4, 5, 0, 1, 2, 3, 4, 5, 0, 1, 2,
 931             3, 4, 5, 0, 1, 2, 3, 4, 5, 0, 1, 2, 3, 4, 5, 0, 1, 2, 3, 4, 5,
 932             0, 1, 2, 3, 4, 5, 0, 1, 2, 3, 4, 5, 0, 1, 2, 3, 4, 5, 0, 1, 2, 3,
 933         };
 934
 935         static const uint8_t div6[51 + 2 * 6 + 1] = {
 936             0, 0, 0, 0, 0, 0, 1, 1, 1, 1, 1, 1, 2, 2, 2, 2, 2, 2,  3,  3,  3,
 937             3, 3, 3, 4, 4, 4, 4, 4, 4, 5, 5, 5, 5, 5, 5, 6, 6, 6,  6,  6,  6,
 938             7, 7, 7, 7, 7, 7, 8, 8, 8, 8, 8, 8, 9, 9, 9, 9, 9, 9, 10, 10, 10, 10,
 939         };
 940         int qp_y = lc->qp_y;
 941
 942         if (c_idx == 0) {
 943             qp = qp_y + s->sps->qp_bd_offset;
 944         } else {
 945             int qp_i, offset;
 946
 947             if (c_idx == 1)
 948                 offset = s->pps->cb_qp_offset + s->sh.slice_cb_qp_offset;
 949             else
 950                 offset = s->pps->cr_qp_offset + s->sh.slice_cr_qp_offset;
 951
 952             qp_i = av_clip_c(qp_y + offset, -s->sps->qp_bd_offset, 57);
 953             if (qp_i < 30)
 954                 qp = qp_i;
 955             else if (qp_i > 43)
 956                 qp = qp_i - 6;
 957             else
 958                 qp = qp_c[qp_i - 30];
 959
 960             qp += s->sps->qp_bd_offset;
 961         }
 962
 963         shift    = s->sps->bit_depth + log2_trafo_size - 5;
 964         add      = 1 << (shift - 1);
 965         scale    = level_scale[rem6[qp]] << (div6[qp]);
 966         scale_m  = 16; // default when no custom scaling lists.
 967         dc_scale = 16;
 968
 969         if (s->sps->scaling_list_enable_flag) {
 970             const ScalingList *sl = s->pps->scaling_list_data_present_flag ?
 971                                     &s->pps->scaling_list : &s->sps->scaling_list;
 972             int matrix_id = lc->cu.pred_mode != MODE_INTRA;
 973
 974             if (log2_trafo_size != 5)
 975                 matrix_id = 3 * matrix_id + c_idx;
 976
 977             scale_matrix = sl->sl[log2_trafo_size - 2][matrix_id];
 978             if (log2_trafo_size >= 4)
 979                 dc_scale = sl->sl_dc[log2_trafo_size - 4][matrix_id];
 980         }
 981     }
 982
 983     if (s->pps->transform_skip_enabled_flag &&
 984         !lc->cu.cu_transquant_bypass_flag   &&
 985         log2_trafo_size == 2) {
 986         transform_skip_flag = ff_hevc_transform_skip_flag_decode(s, c_idx);
 987     }
 988
 989     last_significant_coeff_x =
 990         ff_hevc_last_significant_coeff_x_prefix_decode(s, c_idx, log2_trafo_size);
 991     last_significant_coeff_y =
 992         ff_hevc_last_significant_coeff_y_prefix_decode(s, c_idx, log2_trafo_size);
 993
 994     if (last_significant_coeff_x > 3) {
 995         int suffix = ff_hevc_last_significant_coeff_suffix_decode(s, last_significant_coeff_x);
 996         last_significant_coeff_x = (1 << ((last_significant_coeff_x >> 1) - 1)) *
 997                                    (2 + (last_significant_coeff_x & 1)) +
 998                                    suffix;
 999     }
1000
1001     if (last_significant_coeff_y > 3) {
1002         int suffix = ff_hevc_last_significant_coeff_suffix_decode(s, last_significant_coeff_y);
1003         last_significant_coeff_y = (1 << ((last_significant_coeff_y >> 1) - 1)) *
1004                                    (2 + (last_significant_coeff_y & 1)) +
1005                                    suffix;
1006     }
1007
1008     if (scan_idx == SCAN_VERT)
1009         FFSWAP(int, last_significant_coeff_x, last_significant_coeff_y);
1010
1011     x_cg_last_sig = last_significant_coeff_x >> 2;
1012     y_cg_last_sig = last_significant_coeff_y >> 2;
1013
1014     switch (scan_idx) {
1015     case SCAN_DIAG: {
1016         int last_x_c = last_significant_coeff_x & 3;
1017         int last_y_c = last_significant_coeff_y & 3;
1018
1019         scan_x_off = ff_hevc_diag_scan4x4_x;
1020         scan_y_off = ff_hevc_diag_scan4x4_y;
1021         num_coeff  = diag_scan4x4_inv[last_y_c][last_x_c];
1022         if (trafo_size == 4) {
1023             scan_x_cg = scan_1x1;
1024             scan_y_cg = scan_1x1;
1025         } else if (trafo_size == 8) {
1026             num_coeff += diag_scan2x2_inv[y_cg_last_sig][x_cg_last_sig] << 4;
1027             scan_x_cg  = diag_scan2x2_x;
1028             scan_y_cg  = diag_scan2x2_y;
1029         } else if (trafo_size == 16) {
1030             num_coeff += diag_scan4x4_inv[y_cg_last_sig][x_cg_last_sig] << 4;
1031             scan_x_cg  = ff_hevc_diag_scan4x4_x;
1032             scan_y_cg  = ff_hevc_diag_scan4x4_y;
1033         } else { // trafo_size == 32
1034             num_coeff += diag_scan8x8_inv[y_cg_last_sig][x_cg_last_sig] << 4;
1035             scan_x_cg  = ff_hevc_diag_scan8x8_x;
1036             scan_y_cg  = ff_hevc_diag_scan8x8_y;
1037         }
1038         break;
1039     }
1040     case SCAN_HORIZ:
1041         scan_x_cg  = horiz_scan2x2_x;
1042         scan_y_cg  = horiz_scan2x2_y;
1043         scan_x_off = horiz_scan4x4_x;
1044         scan_y_off = horiz_scan4x4_y;
1045         num_coeff  = horiz_scan8x8_inv[last_significant_coeff_y][last_significant_coeff_x];
1046         break;
1047     default: //SCAN_VERT
1048         scan_x_cg  = horiz_scan2x2_y;
1049         scan_y_cg  = horiz_scan2x2_x;
1050         scan_x_off = horiz_scan4x4_y;
1051         scan_y_off = horiz_scan4x4_x;
1052         num_coeff  = horiz_scan8x8_inv[last_significant_coeff_x][last_significant_coeff_y];
1053         break;
1054     }
1055     num_coeff++;
1056     num_last_subset = (num_coeff - 1) >> 4;
1057
1058     for (i = num_last_subset; i >= 0; i--) {
1059         int n, m;
1060         int x_cg, y_cg, x_c, y_c;
1061         int implicit_non_zero_coeff = 0;
1062         int64_t trans_coeff_level;
1063         int prev_sig = 0;
1064         int offset   = i << 4;
1065
1066         uint8_t significant_coeff_flag_idx[16];
1067         uint8_t nb_significant_coeff_flag = 0;
1068
1069         x_cg = scan_x_cg[i];
1070         y_cg = scan_y_cg[i];
1071
1072         if (i < num_last_subset && i > 0) {
1073             int ctx_cg = 0;
1074             if (x_cg < (1 << (log2_trafo_size - 2)) - 1)
1075                 ctx_cg += significant_coeff_group_flag[x_cg + 1][y_cg];
1076             if (y_cg < (1 << (log2_trafo_size - 2)) - 1)
1077                 ctx_cg += significant_coeff_group_flag[x_cg][y_cg + 1];
1078
1079             significant_coeff_group_flag[x_cg][y_cg] =
1080                 ff_hevc_significant_coeff_group_flag_decode(s, c_idx, ctx_cg);
1081             implicit_non_zero_coeff = 1;
1082         } else {
1083             significant_coeff_group_flag[x_cg][y_cg] =
1084                 ((x_cg == x_cg_last_sig && y_cg == y_cg_last_sig) ||
1085                  (x_cg == 0 && y_cg == 0));
1086         }
1087
1088         last_scan_pos = num_coeff - offset - 1;
1089
1090         if (i == num_last_subset) {
1091             n_end                         = last_scan_pos - 1;
1092             significant_coeff_flag_idx[0] = last_scan_pos;
1093             nb_significant_coeff_flag     = 1;
1094         } else {
1095             n_end = 15;
1096         }
1097
1098         if (x_cg < ((1 << log2_trafo_size) - 1) >> 2)
1099             prev_sig = significant_coeff_group_flag[x_cg + 1][y_cg];
1100         if (y_cg < ((1 << log2_trafo_size) - 1) >> 2)
1101             prev_sig += significant_coeff_group_flag[x_cg][y_cg + 1] << 1;
1102
1103         for (n = n_end; n >= 0; n--) {
1104             GET_COORD(offset, n);
1105
1106             if (significant_coeff_group_flag[x_cg][y_cg] &&
1107                 (n > 0 || implicit_non_zero_coeff == 0)) {
1108                 if (ff_hevc_significant_coeff_flag_decode(s, c_idx, x_c, y_c,
1109                                                           log2_trafo_size,
1110                                                           scan_idx,
1111                                                           prev_sig) == 1) {
1112                     significant_coeff_flag_idx[nb_significant_coeff_flag] = n;
1113                     nb_significant_coeff_flag++;
1114                     implicit_non_zero_coeff = 0;
1115                 }
1116             } else {
1117                 int last_cg = (x_c == (x_cg << 2) && y_c == (y_cg << 2));
1118                 if (last_cg && implicit_non_zero_coeff && significant_coeff_group_flag[x_cg][y_cg]) {
1119                     significant_coeff_flag_idx[nb_significant_coeff_flag] = n;
1120                     nb_significant_coeff_flag++;
1121                 }
1122             }
1123         }
1124
1125         n_end = nb_significant_coeff_flag;
1126
1127         if (n_end) {
1128             int first_nz_pos_in_cg = 16;
1129             int last_nz_pos_in_cg = -1;
1130             int c_rice_param = 0;
1131             int first_greater1_coeff_idx = -1;
1132             uint8_t coeff_abs_level_greater1_flag[16] = { 0 };
1133             uint16_t coeff_sign_flag;
1134             int sum_abs = 0;
1135             int sign_hidden = 0;
1136
1137             // initialize first elem of coeff_bas_level_greater1_flag
1138             int ctx_set = (i > 0 && c_idx == 0) ? 2 : 0;
1139
1140             if (!(i == num_last_subset) && greater1_ctx == 0)
1141                 ctx_set++;
1142             greater1_ctx      = 1;
1143             last_nz_pos_in_cg = significant_coeff_flag_idx[0];
1144
1145             for (m = 0; m < (n_end > 8 ? 8 : n_end); m++) {
1146                 int n_idx = significant_coeff_flag_idx[m];
1147                 int inc   = (ctx_set << 2) + greater1_ctx;
1148                 coeff_abs_level_greater1_flag[n_idx] =
1149                     ff_hevc_coeff_abs_level_greater1_flag_decode(s, c_idx, inc);
1150                 if (coeff_abs_level_greater1_flag[n_idx]) {
1151                     greater1_ctx = 0;
1152                 } else if (greater1_ctx > 0 && greater1_ctx < 3) {
1153                     greater1_ctx++;
1154                 }
1155
1156                 if (coeff_abs_level_greater1_flag[n_idx] &&
1157                     first_greater1_coeff_idx == -1)
1158                     first_greater1_coeff_idx = n_idx;
1159             }
1160             first_nz_pos_in_cg = significant_coeff_flag_idx[n_end - 1];
1161             sign_hidden        = last_nz_pos_in_cg - first_nz_pos_in_cg >= 4 &&
1162                                  !lc->cu.cu_transquant_bypass_flag;
1163
1164             if (first_greater1_coeff_idx != -1) {
1165                 coeff_abs_level_greater1_flag[first_greater1_coeff_idx] += ff_hevc_coeff_abs_level_greater2_flag_decode(s, c_idx, ctx_set);
1166             }
1167             if (!s->pps->sign_data_hiding_flag || !sign_hidden) {
1168                 coeff_sign_flag = ff_hevc_coeff_sign_flag(s, nb_significant_coeff_flag) << (16 - nb_significant_coeff_flag);
1169             } else {
1170                 coeff_sign_flag = ff_hevc_coeff_sign_flag(s, nb_significant_coeff_flag - 1) << (16 - (nb_significant_coeff_flag - 1));
1171             }
1172
1173             for (m = 0; m < n_end; m++) {
1174                 n = significant_coeff_flag_idx[m];
1175                 GET_COORD(offset, n);
1176                 trans_coeff_level = 1 + coeff_abs_level_greater1_flag[n];
1177                 if (trans_coeff_level == ((m < 8) ?
1178                                           ((n == first_greater1_coeff_idx) ? 3 : 2) : 1)) {
1179                     int last_coeff_abs_level_remaining = ff_hevc_coeff_abs_level_remaining(s, trans_coeff_level, c_rice_param);
1180
1181                     trans_coeff_level += last_coeff_abs_level_remaining;
1182                     if ((trans_coeff_level) > (3 * (1 << c_rice_param)))
1183                         c_rice_param = FFMIN(c_rice_param + 1, 4);
1184                 }
1185                 if (s->pps->sign_data_hiding_flag && sign_hidden) {
1186                     sum_abs += trans_coeff_level;
1187                     if (n == first_nz_pos_in_cg && ((sum_abs & 1) == 1))
1188                         trans_coeff_level = -trans_coeff_level;
1189                 }
1190                 if (coeff_sign_flag >> 15)
1191                     trans_coeff_level = -trans_coeff_level;
1192                 coeff_sign_flag <<= 1;
1193                 if (!lc->cu.cu_transquant_bypass_flag) {
1194                     if (s->sps->scaling_list_enable_flag) {
1195                         if (y_c || x_c || log2_trafo_size < 4) {
1196                             int pos;
1197                             switch (log2_trafo_size) {
1198                             case 3:  pos = (y_c        << 3) +  x_c;       break;
1199                             case 4:  pos = ((y_c >> 1) << 3) + (x_c >> 1); break;
1200                             case 5:  pos = ((y_c >> 2) << 3) + (x_c >> 2); break;
1201                             default: pos = (y_c        << 2) +  x_c;
1202                             }
1203                             scale_m = scale_matrix[pos];
1204                         } else {
1205                             scale_m = dc_scale;
1206                         }
1207                     }
1208                     trans_coeff_level = (trans_coeff_level * (int64_t)scale * (int64_t)scale_m + add) >> shift;
1209                     if(trans_coeff_level < 0) {
1210                         if((~trans_coeff_level) & 0xFffffffffff8000)
1211                             trans_coeff_level = -32768;
1212                     } else {
1213                         if (trans_coeff_level & 0xffffffffffff8000)
1214                             trans_coeff_level = 32767;
1215                     }
1216                 }
1217                 coeffs[y_c * trafo_size + x_c] = trans_coeff_level;
1218             }
1219         }
1220     }
1221
1222     if (lc->cu.cu_transquant_bypass_flag) {
1223         s->hevcdsp.transquant_bypass[log2_trafo_size - 2](dst, coeffs, stride);
1224     } else {
1225         if (transform_skip_flag)
1226             s->hevcdsp.transform_skip(dst, coeffs, stride);
1227         else if (lc->cu.pred_mode == MODE_INTRA && c_idx == 0 &&
1228                  log2_trafo_size == 2)
1229             s->hevcdsp.transform_4x4_luma_add(dst, coeffs, stride);
1230         else
1231             s->hevcdsp.transform_add[log2_trafo_size - 2](dst, coeffs, stride);
1232     }
1233 }
1234
1235 static int hls_transform_unit(HEVCContext *s, int x0, int y0,
1236                               int xBase, int yBase, int cb_xBase, int cb_yBase,
1237                               int log2_cb_size, int log2_trafo_size,
1238                               int blk_idx, int cbf_luma, int cbf_cb, int cbf_cr)
1239 {
1240     HEVCLocalContext *lc = &s->HEVClc;
1241
1242     if (lc->cu.pred_mode == MODE_INTRA) {
1243         int trafo_size = 1 << log2_trafo_size;
1244         ff_hevc_set_neighbour_available(s, x0, y0, trafo_size, trafo_size);
1245
1246         s->hpc.intra_pred[log2_trafo_size - 2](s, x0, y0, 0);
1247         if (log2_trafo_size > 2) {
1248             trafo_size = trafo_size << (s->sps->hshift[1] - 1);
1249             ff_hevc_set_neighbour_available(s, x0, y0, trafo_size, trafo_size);
1250             s->hpc.intra_pred[log2_trafo_size - 3](s, x0, y0, 1);
1251             s->hpc.intra_pred[log2_trafo_size - 3](s, x0, y0, 2);
1252         } else if (blk_idx == 3) {
1253             trafo_size = trafo_size << s->sps->hshift[1];
1254             ff_hevc_set_neighbour_available(s, xBase, yBase,
1255                                             trafo_size, trafo_size);
1256             s->hpc.intra_pred[log2_trafo_size - 2](s, xBase, yBase, 1);
1257             s->hpc.intra_pred[log2_trafo_size - 2](s, xBase, yBase, 2);
1258         }
1259     }
1260
1261     if (cbf_luma || cbf_cb || cbf_cr) {
1262         int scan_idx   = SCAN_DIAG;
1263         int scan_idx_c = SCAN_DIAG;
1264
1265         if (s->pps->cu_qp_delta_enabled_flag && !lc->tu.is_cu_qp_delta_coded) {
1266             lc->tu.cu_qp_delta = ff_hevc_cu_qp_delta_abs(s);
1267             if (lc->tu.cu_qp_delta != 0)
1268                 if (ff_hevc_cu_qp_delta_sign_flag(s) == 1)
1269                     lc->tu.cu_qp_delta = -lc->tu.cu_qp_delta;
1270             lc->tu.is_cu_qp_delta_coded = 1;
1271
1272             if (lc->tu.cu_qp_delta < -(26 + s->sps->qp_bd_offset / 2) ||
1273                 lc->tu.cu_qp_delta >  (25 + s->sps->qp_bd_offset / 2)) {
1274                 av_log(s->avctx, AV_LOG_ERROR,
1275                        "The cu_qp_delta %d is outside the valid range "
1276                        "[%d, %d].\n",
1277                        lc->tu.cu_qp_delta,
1278                        -(26 + s->sps->qp_bd_offset / 2),
1279                         (25 + s->sps->qp_bd_offset / 2));
1280                 return AVERROR_INVALIDDATA;
1281             }
1282
1283             ff_hevc_set_qPy(s, x0, y0, cb_xBase, cb_yBase, log2_cb_size);
1284         }
1285
1286         if (lc->cu.pred_mode == MODE_INTRA && log2_trafo_size < 4) {
1287             if (lc->tu.cur_intra_pred_mode >= 6 &&
1288                 lc->tu.cur_intra_pred_mode <= 14) {
1289                 scan_idx = SCAN_VERT;
1290             } else if (lc->tu.cur_intra_pred_mode >= 22 &&
1291                        lc->tu.cur_intra_pred_mode <= 30) {
1292                 scan_idx = SCAN_HORIZ;
1293             }
1294
1295             if (lc->pu.intra_pred_mode_c >=  6 &&
1296                 lc->pu.intra_pred_mode_c <= 14) {
1297                 scan_idx_c = SCAN_VERT;
1298             } else if (lc->pu.intra_pred_mode_c >= 22 &&
1299                        lc->pu.intra_pred_mode_c <= 30) {
1300                 scan_idx_c = SCAN_HORIZ;
1301             }
1302         }
1303
1304         if (cbf_luma)
1305             hls_residual_coding(s, x0, y0, log2_trafo_size, scan_idx, 0);
1306         if (log2_trafo_size > 2) {
1307             if (cbf_cb)
1308                 hls_residual_coding(s, x0, y0, log2_trafo_size - 1, scan_idx_c, 1);
1309             if (cbf_cr)
1310                 hls_residual_coding(s, x0, y0, log2_trafo_size - 1, scan_idx_c, 2);
1311         } else if (blk_idx == 3) {
1312             if (cbf_cb)
1313                 hls_residual_coding(s, xBase, yBase, log2_trafo_size, scan_idx_c, 1);
1314             if (cbf_cr)
1315                 hls_residual_coding(s, xBase, yBase, log2_trafo_size, scan_idx_c, 2);
1316         }
1317     }
1318     return 0;
1319 }
1320
1321 static void set_deblocking_bypass(HEVCContext *s, int x0, int y0, int log2_cb_size)
1322 {
1323     int cb_size          = 1 << log2_cb_size;
1324     int log2_min_pu_size = s->sps->log2_min_pu_size;
1325
1326     int min_pu_width     = s->sps->min_pu_width;
1327     int x_end = FFMIN(x0 + cb_size, s->sps->width);
1328     int y_end = FFMIN(y0 + cb_size, s->sps->height);
1329     int i, j;
1330
1331     for (j = (y0 >> log2_min_pu_size); j < (y_end >> log2_min_pu_size); j++)
1332         for (i = (x0 >> log2_min_pu_size); i < (x_end >> log2_min_pu_size); i++)
1333             s->is_pcm[i + j * min_pu_width] = 2;
1334 }
1335
1336 static int hls_transform_tree(HEVCContext *s, int x0, int y0,
1337                               int xBase, int yBase, int cb_xBase, int cb_yBase,
1338                               int log2_cb_size, int log2_trafo_size,
1339                               int trafo_depth, int blk_idx,
1340                               int cbf_cb, int cbf_cr)
1341 {
1342     HEVCLocalContext *lc = &s->HEVClc;
1343     uint8_t split_transform_flag;
1344     int ret;
1345
1346     if (lc->cu.intra_split_flag) {
1347         if (trafo_depth == 1)
1348             lc->tu.cur_intra_pred_mode = lc->pu.intra_pred_mode[blk_idx];
1349     } else {
1350         lc->tu.cur_intra_pred_mode = lc->pu.intra_pred_mode[0];
1351     }
1352
1353     if (log2_trafo_size <= s->sps->log2_max_trafo_size &&
1354         log2_trafo_size >  s->sps->log2_min_tb_size    &&
1355         trafo_depth     < lc->cu.max_trafo_depth       &&
1356         !(lc->cu.intra_split_flag && trafo_depth == 0)) {
1357         split_transform_flag = ff_hevc_split_transform_flag_decode(s, log2_trafo_size);
1358     } else {
1359         int inter_split = s->sps->max_transform_hierarchy_depth_inter == 0 &&
1360                           lc->cu.pred_mode == MODE_INTER &&
1361                           lc->cu.part_mode != PART_2Nx2N &&
1362                           trafo_depth == 0;
1363
1364         split_transform_flag = log2_trafo_size > s->sps->log2_max_trafo_size ||
1365                                (lc->cu.intra_split_flag && trafo_depth == 0) ||
1366                                inter_split;
1367     }
1368
1369     if (log2_trafo_size > 2 && (trafo_depth == 0 || cbf_cb))
1370         cbf_cb = ff_hevc_cbf_cb_cr_decode(s, trafo_depth);
1371     else if (log2_trafo_size > 2 || trafo_depth == 0)
1372         cbf_cb = 0;
1373     if (log2_trafo_size > 2 && (trafo_depth == 0 || cbf_cr))
1374         cbf_cr = ff_hevc_cbf_cb_cr_decode(s, trafo_depth);
1375     else if (log2_trafo_size > 2 || trafo_depth == 0)
1376         cbf_cr = 0;
1377
1378     if (split_transform_flag) {
1379         const int trafo_size_split = 1 << (log2_trafo_size - 1);
1380         const int x1 = x0 + trafo_size_split;
1381         const int y1 = y0 + trafo_size_split;
1382
1383 #define SUBDIVIDE(x, y, idx)                                                    \
1384 do {                                                                            \
1385     ret = hls_transform_tree(s, x, y, x0, y0, cb_xBase, cb_yBase, log2_cb_size, \
1386                              log2_trafo_size - 1, trafo_depth + 1, idx,         \
1387                              cbf_cb, cbf_cr);                                   \
1388     if (ret < 0)                                                                \
1389         return ret;                                                             \
1390 } while (0)
1391
1392         SUBDIVIDE(x0, y0, 0);
1393         SUBDIVIDE(x1, y0, 1);
1394         SUBDIVIDE(x0, y1, 2);
1395         SUBDIVIDE(x1, y1, 3);
1396
1397 #undef SUBDIVIDE
1398     } else {
1399         int min_tu_size      = 1 << s->sps->log2_min_tb_size;
1400         int log2_min_tu_size = s->sps->log2_min_tb_size;
1401         int min_tu_width     = s->sps->min_tb_width;
1402         int cbf_luma         = 1;
1403
1404         if (lc->cu.pred_mode == MODE_INTRA || trafo_depth != 0 ||
1405             cbf_cb || cbf_cr)
1406             cbf_luma = ff_hevc_cbf_luma_decode(s, trafo_depth);
1407
1408         ret = hls_transform_unit(s, x0, y0, xBase, yBase, cb_xBase, cb_yBase,
1409                                  log2_cb_size, log2_trafo_size,
1410                                  blk_idx, cbf_luma, cbf_cb, cbf_cr);
1411         if (ret < 0)
1412             return ret;
1413         // TODO: store cbf_luma somewhere else
1414         if (cbf_luma) {
1415             int i, j;
1416             for (i = 0; i < (1 << log2_trafo_size); i += min_tu_size)
1417                 for (j = 0; j < (1 << log2_trafo_size); j += min_tu_size) {
1418                     int x_tu = (x0 + j) >> log2_min_tu_size;
1419                     int y_tu = (y0 + i) >> log2_min_tu_size;
1420                     s->cbf_luma[y_tu * min_tu_width + x_tu] = 1;
1421                 }
1422         }
1423         if (!s->sh.disable_deblocking_filter_flag) {
1424             ff_hevc_deblocking_boundary_strengths(s, x0, y0, log2_trafo_size);
1425             if (s->pps->transquant_bypass_enable_flag &&
1426                 lc->cu.cu_transquant_bypass_flag)
1427                 set_deblocking_bypass(s, x0, y0, log2_trafo_size);
1428         }
1429     }
1430     return 0;
1431 }
1432
1433 static int hls_pcm_sample(HEVCContext *s, int x0, int y0, int log2_cb_size)
1434 {
1435     //TODO: non-4:2:0 support
1436     HEVCLocalContext *lc = &s->HEVClc;
1437     GetBitContext gb;
1438     int cb_size   = 1 << log2_cb_size;
1439     int stride0   = s->frame->linesize[0];
1440     uint8_t *dst0 = &s->frame->data[0][y0 * stride0 + (x0 << s->sps->pixel_shift)];
1441     int   stride1 = s->frame->linesize[1];
1442     uint8_t *dst1 = &s->frame->data[1][(y0 >> s->sps->vshift[1]) * stride1 + ((x0 >> s->sps->hshift[1]) << s->sps->pixel_shift)];
1443     int   stride2 = s->frame->linesize[2];
1444     uint8_t *dst2 = &s->frame->data[2][(y0 >> s->sps->vshift[2]) * stride2 + ((x0 >> s->sps->hshift[2]) << s->sps->pixel_shift)];
1445
1446     int length         = cb_size * cb_size * s->sps->pcm.bit_depth + ((cb_size * cb_size) >> 1) * s->sps->pcm.bit_depth_chroma;
1447     const uint8_t *pcm = skip_bytes(&lc->cc, (length + 7) >> 3);
1448     int ret;
1449
1450     if (!s->sh.disable_deblocking_filter_flag)
1451         ff_hevc_deblocking_boundary_strengths(s, x0, y0, log2_cb_size);
1452
1453     ret = init_get_bits(&gb, pcm, length);
1454     if (ret < 0)
1455         return ret;
1456
1457     s->hevcdsp.put_pcm(dst0, stride0, cb_size,     &gb, s->sps->pcm.bit_depth);
1458     s->hevcdsp.put_pcm(dst1, stride1, cb_size / 2, &gb, s->sps->pcm.bit_depth_chroma);
1459     s->hevcdsp.put_pcm(dst2, stride2, cb_size / 2, &gb, s->sps->pcm.bit_depth_chroma);
1460     return 0;
1461 }
1462
1463 static void hls_mvd_coding(HEVCContext *s, int x0, int y0, int log2_cb_size)
1464 {
1465     HEVCLocalContext *lc = &s->HEVClc;
1466     int x = ff_hevc_abs_mvd_greater0_flag_decode(s);
1467     int y = ff_hevc_abs_mvd_greater0_flag_decode(s);
1468
1469     if (x)
1470         x += ff_hevc_abs_mvd_greater1_flag_decode(s);
1471     if (y)
1472         y += ff_hevc_abs_mvd_greater1_flag_decode(s);
1473
1474     switch (x) {
1475     case 2: lc->pu.mvd.x = ff_hevc_mvd_decode(s);           break;
1476     case 1: lc->pu.mvd.x = ff_hevc_mvd_sign_flag_decode(s); break;
1477     case 0: lc->pu.mvd.x = 0;                               break;
1478     }
1479
1480     switch (y) {
1481     case 2: lc->pu.mvd.y = ff_hevc_mvd_decode(s);           break;
1482     case 1: lc->pu.mvd.y = ff_hevc_mvd_sign_flag_decode(s); break;
1483     case 0: lc->pu.mvd.y = 0;                               break;
1484     }
1485 }
1486
1487 /**
1488  * 8.5.3.2.2.1 Luma sample interpolation process
1489  *
1490  * @param s HEVC decoding context
1491  * @param dst target buffer for block data at block position
1492  * @param dststride stride of the dst buffer
1493  * @param ref reference picture buffer at origin (0, 0)
1494  * @param mv motion vector (relative to block position) to get pixel data from
1495  * @param x_off horizontal position of block from origin (0, 0)
1496  * @param y_off vertical position of block from origin (0, 0)
1497  * @param block_w width of block
1498  * @param block_h height of block
1499  */
1500 static void luma_mc(HEVCContext *s, int16_t *dst, ptrdiff_t dststride,
1501                     AVFrame *ref, const Mv *mv, int x_off, int y_off,
1502                     int block_w, int block_h)
1503 {
1504     HEVCLocalContext *lc = &s->HEVClc;
1505     uint8_t *src         = ref->data[0];
1506     ptrdiff_t srcstride  = ref->linesize[0];
1507     int pic_width        = s->sps->width;
1508     int pic_height       = s->sps->height;
1509
1510     int mx         = mv->x & 3;
1511     int my         = mv->y & 3;
1512     int extra_left = ff_hevc_qpel_extra_before[mx];
1513     int extra_top  = ff_hevc_qpel_extra_before[my];
1514
1515     x_off += mv->x >> 2;
1516     y_off += mv->y >> 2;
1517     src   += y_off * srcstride + (x_off << s->sps->pixel_shift);
1518
1519     if (x_off < extra_left || y_off < extra_top ||
1520         x_off >= pic_width - block_w - ff_hevc_qpel_extra_after[mx] ||
1521         y_off >= pic_height - block_h - ff_hevc_qpel_extra_after[my]) {
1522         const int edge_emu_stride = EDGE_EMU_BUFFER_STRIDE << s->sps->pixel_shift;
1523         int offset = extra_top * srcstride + (extra_left << s->sps->pixel_shift);
1524         int buf_offset = extra_top *
1525                          edge_emu_stride + (extra_left << s->sps->pixel_shift);
1526
1527         s->vdsp.emulated_edge_mc(lc->edge_emu_buffer, src - offset,
1528                                  edge_emu_stride, srcstride,
1529                                  block_w + ff_hevc_qpel_extra[mx],
1530                                  block_h + ff_hevc_qpel_extra[my],
1531                                  x_off - extra_left, y_off - extra_top,
1532                                  pic_width, pic_height);
1533         src = lc->edge_emu_buffer + buf_offset;
1534         srcstride = edge_emu_stride;
1535     }
1536     s->hevcdsp.put_hevc_qpel[my][mx](dst, dststride, src, srcstride, block_w,
1537                                      block_h, lc->mc_buffer);
1538 }
1539
1540 /**
1541  * 8.5.3.2.2.2 Chroma sample interpolation process
1542  *
1543  * @param s HEVC decoding context
1544  * @param dst1 target buffer for block data at block position (U plane)
1545  * @param dst2 target buffer for block data at block position (V plane)
1546  * @param dststride stride of the dst1 and dst2 buffers
1547  * @param ref reference picture buffer at origin (0, 0)
1548  * @param mv motion vector (relative to block position) to get pixel data from
1549  * @param x_off horizontal position of block from origin (0, 0)
1550  * @param y_off vertical position of block from origin (0, 0)
1551  * @param block_w width of block
1552  * @param block_h height of block
1553  */
1554 static void chroma_mc(HEVCContext *s, int16_t *dst1, int16_t *dst2,
1555                       ptrdiff_t dststride, AVFrame *ref, const Mv *mv,
1556                       int x_off, int y_off, int block_w, int block_h)
1557 {
1558     HEVCLocalContext *lc = &s->HEVClc;
1559     uint8_t *src1        = ref->data[1];
1560     uint8_t *src2        = ref->data[2];
1561     ptrdiff_t src1stride = ref->linesize[1];
1562     ptrdiff_t src2stride = ref->linesize[2];
1563     int pic_width        = s->sps->width >> 1;
1564     int pic_height       = s->sps->height >> 1;
1565
1566     int mx = mv->x & 7;
1567     int my = mv->y & 7;
1568
1569     x_off += mv->x >> 3;
1570     y_off += mv->y >> 3;
1571     src1  += y_off * src1stride + (x_off << s->sps->pixel_shift);
1572     src2  += y_off * src2stride + (x_off << s->sps->pixel_shift);
1573
1574     if (x_off < EPEL_EXTRA_BEFORE || y_off < EPEL_EXTRA_AFTER ||
1575         x_off >= pic_width - block_w - EPEL_EXTRA_AFTER ||
1576         y_off >= pic_height - block_h - EPEL_EXTRA_AFTER) {
1577         const int edge_emu_stride = EDGE_EMU_BUFFER_STRIDE << s->sps->pixel_shift;
1578         int offset1 = EPEL_EXTRA_BEFORE * (src1stride + (1 << s->sps->pixel_shift));
1579         int buf_offset1 = EPEL_EXTRA_BEFORE *
1580                           (edge_emu_stride + (1 << s->sps->pixel_shift));
1581         int offset2 = EPEL_EXTRA_BEFORE * (src2stride + (1 << s->sps->pixel_shift));
1582         int buf_offset2 = EPEL_EXTRA_BEFORE *
1583                           (edge_emu_stride + (1 << s->sps->pixel_shift));
1584
1585         s->vdsp.emulated_edge_mc(lc->edge_emu_buffer, src1 - offset1,
1586                                  edge_emu_stride, src1stride,
1587                                  block_w + EPEL_EXTRA, block_h + EPEL_EXTRA,
1588                                  x_off - EPEL_EXTRA_BEFORE,
1589                                  y_off - EPEL_EXTRA_BEFORE,
1590                                  pic_width, pic_height);
1591
1592         src1 = lc->edge_emu_buffer + buf_offset1;
1593         src1stride = edge_emu_stride;
1594         s->hevcdsp.put_hevc_epel[!!my][!!mx](dst1, dststride, src1, src1stride,
1595                                              block_w, block_h, mx, my, lc->mc_buffer);
1596
1597         s->vdsp.emulated_edge_mc(lc->edge_emu_buffer, src2 - offset2,
1598                                  edge_emu_stride, src2stride,
1599                                  block_w + EPEL_EXTRA, block_h + EPEL_EXTRA,
1600                                  x_off - EPEL_EXTRA_BEFORE,
1601                                  y_off - EPEL_EXTRA_BEFORE,
1602                                  pic_width, pic_height);
1603         src2 = lc->edge_emu_buffer + buf_offset2;
1604         src2stride = edge_emu_stride;
1605
1606         s->hevcdsp.put_hevc_epel[!!my][!!mx](dst2, dststride, src2, src2stride,
1607                                              block_w, block_h, mx, my,
1608                                              lc->mc_buffer);
1609     } else {
1610         s->hevcdsp.put_hevc_epel[!!my][!!mx](dst1, dststride, src1, src1stride,
1611                                              block_w, block_h, mx, my,
1612                                              lc->mc_buffer);
1613         s->hevcdsp.put_hevc_epel[!!my][!!mx](dst2, dststride, src2, src2stride,
1614                                              block_w, block_h, mx, my,
1615                                              lc->mc_buffer);
1616     }
1617 }
1618
1619 static void hevc_await_progress(HEVCContext *s, HEVCFrame *ref,
1620                                 const Mv *mv, int y0, int height)
1621 {
1622     int y = (mv->y >> 2) + y0 + height + 9;
1623     ff_thread_await_progress(&ref->tf, y, 0);
1624 }
1625
1626 static void hevc_luma_mv_mpv_mode(HEVCContext *s, int x0, int y0, int nPbW,
1627                                   int nPbH, int log2_cb_size, int part_idx,
1628                                   int merge_idx, MvField *mv)
1629 {
1630     HEVCLocalContext *lc             = &s->HEVClc;
1631     enum InterPredIdc inter_pred_idc = PRED_L0;
1632     int mvp_flag;
1633
1634     ff_hevc_set_neighbour_available(s, x0, y0, nPbW, nPbH);
1635     if (s->sh.slice_type == B_SLICE)
1636         inter_pred_idc = ff_hevc_inter_pred_idc_decode(s, nPbW, nPbH);
1637
1638     if (inter_pred_idc != PRED_L1) {
1639         if (s->sh.nb_refs[L0])
1640             mv->ref_idx[0]= ff_hevc_ref_idx_lx_decode(s, s->sh.nb_refs[L0]);
1641
1642         mv->pred_flag[0] = 1;
1643         hls_mvd_coding(s, x0, y0, 0);
1644         mvp_flag = ff_hevc_mvp_lx_flag_decode(s);
1645         ff_hevc_luma_mv_mvp_mode(s, x0, y0, nPbW, nPbH, log2_cb_size,
1646                                  part_idx, merge_idx, mv, mvp_flag, 0);
1647         mv->mv[0].x += lc->pu.mvd.x;
1648         mv->mv[0].y += lc->pu.mvd.y;
1649     }
1650
1651     if (inter_pred_idc != PRED_L0) {
1652         if (s->sh.nb_refs[L1])
1653             mv->ref_idx[1]= ff_hevc_ref_idx_lx_decode(s, s->sh.nb_refs[L1]);
1654
1655         if (s->sh.mvd_l1_zero_flag == 1 && inter_pred_idc == PRED_BI) {
1656             AV_ZERO32(&lc->pu.mvd);
1657         } else {
1658             hls_mvd_coding(s, x0, y0, 1);
1659         }
1660
1661         mv->pred_flag[1] = 1;
1662         mvp_flag = ff_hevc_mvp_lx_flag_decode(s);
1663         ff_hevc_luma_mv_mvp_mode(s, x0, y0, nPbW, nPbH, log2_cb_size,
1664                                  part_idx, merge_idx, mv, mvp_flag, 1);
1665         mv->mv[1].x += lc->pu.mvd.x;
1666         mv->mv[1].y += lc->pu.mvd.y;
1667     }
1668 }
1669
1670 static void hls_prediction_unit(HEVCContext *s, int x0, int y0,
1671                                 int nPbW, int nPbH,
1672                                 int log2_cb_size, int partIdx)
1673 {
1674 #define POS(c_idx, x, y)                                                              \
1675     &s->frame->data[c_idx][((y) >> s->sps->vshift[c_idx]) * s->frame->linesize[c_idx] + \
1676                            (((x) >> s->sps->hshift[c_idx]) << s->sps->pixel_shift)]
1677     HEVCLocalContext *lc = &s->HEVClc;
1678     int merge_idx = 0;
1679     struct MvField current_mv = {{{ 0 }}};
1680
1681     int min_pu_width = s->sps->min_pu_width;
1682
1683     MvField *tab_mvf = s->ref->tab_mvf;
1684     RefPicList  *refPicList = s->ref->refPicList;
1685     HEVCFrame *ref0, *ref1;
1686
1687     int tmpstride = MAX_PB_SIZE;
1688
1689     uint8_t *dst0 = POS(0, x0, y0);
1690     uint8_t *dst1 = POS(1, x0, y0);
1691     uint8_t *dst2 = POS(2, x0, y0);
1692     int log2_min_cb_size = s->sps->log2_min_cb_size;
1693     int min_cb_width     = s->sps->min_cb_width;
1694     int x_cb             = x0 >> log2_min_cb_size;
1695     int y_cb             = y0 >> log2_min_cb_size;
1696     int x_pu, y_pu;
1697     int i, j;
1698
1699     int skip_flag = SAMPLE_CTB(s->skip_flag, x_cb, y_cb);
1700
1701     if (!skip_flag)
1702         lc->pu.merge_flag = ff_hevc_merge_flag_decode(s);
1703
1704     if (skip_flag || lc->pu.merge_flag) {
1705         if (s->sh.max_num_merge_cand > 1)
1706             merge_idx = ff_hevc_merge_idx_decode(s);
1707         else
1708             merge_idx = 0;
1709
1710         ff_hevc_luma_mv_merge_mode(s, x0, y0, nPbW, nPbH, log2_cb_size,
1711                                    partIdx, merge_idx, &current_mv);
1712     } else {
1713         hevc_luma_mv_mpv_mode(s, x0, y0, nPbW, nPbH, log2_cb_size,
1714                               partIdx, merge_idx, &current_mv);
1715     }
1716
1717     x_pu = x0 >> s->sps->log2_min_pu_size;
1718     y_pu = y0 >> s->sps->log2_min_pu_size;
1719
1720     for (j = 0; j < nPbH >> s->sps->log2_min_pu_size; j++)
1721         for (i = 0; i < nPbW >> s->sps->log2_min_pu_size; i++)
1722             tab_mvf[(y_pu + j) * min_pu_width + x_pu + i] = current_mv;
1723
1724     if (current_mv.pred_flag[0]) {
1725         ref0 = refPicList[0].ref[current_mv.ref_idx[0]];
1726         if (!ref0)
1727             return;
1728         hevc_await_progress(s, ref0, &current_mv.mv[0], y0, nPbH);
1729     }
1730     if (current_mv.pred_flag[1]) {
1731         ref1 = refPicList[1].ref[current_mv.ref_idx[1]];
1732         if (!ref1)
1733             return;
1734         hevc_await_progress(s, ref1, &current_mv.mv[1], y0, nPbH);
1735     }
1736
1737     if (current_mv.pred_flag[0] && !current_mv.pred_flag[1]) {
1738         DECLARE_ALIGNED(16, int16_t,  tmp[MAX_PB_SIZE * MAX_PB_SIZE]);
1739         DECLARE_ALIGNED(16, int16_t, tmp2[MAX_PB_SIZE * MAX_PB_SIZE]);
1740
1741         luma_mc(s, tmp, tmpstride, ref0->frame,
1742                 &current_mv.mv[0], x0, y0, nPbW, nPbH);
1743
1744         if ((s->sh.slice_type == P_SLICE && s->pps->weighted_pred_flag) ||
1745             (s->sh.slice_type == B_SLICE && s->pps->weighted_bipred_flag)) {
1746             s->hevcdsp.weighted_pred(s->sh.luma_log2_weight_denom,
1747                                      s->sh.luma_weight_l0[current_mv.ref_idx[0]],
1748                                      s->sh.luma_offset_l0[current_mv.ref_idx[0]],
1749                                      dst0, s->frame->linesize[0], tmp,
1750                                      tmpstride, nPbW, nPbH);
1751         } else {
1752             s->hevcdsp.put_unweighted_pred(dst0, s->frame->linesize[0], tmp, tmpstride, nPbW, nPbH);
1753         }
1754         chroma_mc(s, tmp, tmp2, tmpstride, ref0->frame,
1755                   &current_mv.mv[0], x0 / 2, y0 / 2, nPbW / 2, nPbH / 2);
1756
1757         if ((s->sh.slice_type == P_SLICE && s->pps->weighted_pred_flag) ||
1758             (s->sh.slice_type == B_SLICE && s->pps->weighted_bipred_flag)) {
1759             s->hevcdsp.weighted_pred(s->sh.chroma_log2_weight_denom,
1760                                      s->sh.chroma_weight_l0[current_mv.ref_idx[0]][0],
1761                                      s->sh.chroma_offset_l0[current_mv.ref_idx[0]][0],
1762                                      dst1, s->frame->linesize[1], tmp, tmpstride,
1763                                      nPbW / 2, nPbH / 2);
1764             s->hevcdsp.weighted_pred(s->sh.chroma_log2_weight_denom,
1765                                      s->sh.chroma_weight_l0[current_mv.ref_idx[0]][1],
1766                                      s->sh.chroma_offset_l0[current_mv.ref_idx[0]][1],
1767                                      dst2, s->frame->linesize[2], tmp2, tmpstride,
1768                                      nPbW / 2, nPbH / 2);
1769         } else {
1770             s->hevcdsp.put_unweighted_pred(dst1, s->frame->linesize[1], tmp, tmpstride, nPbW/2, nPbH/2);
1771             s->hevcdsp.put_unweighted_pred(dst2, s->frame->linesize[2], tmp2, tmpstride, nPbW/2, nPbH/2);
1772         }
1773     } else if (!current_mv.pred_flag[0] && current_mv.pred_flag[1]) {
1774         DECLARE_ALIGNED(16, int16_t, tmp [MAX_PB_SIZE * MAX_PB_SIZE]);
1775         DECLARE_ALIGNED(16, int16_t, tmp2[MAX_PB_SIZE * MAX_PB_SIZE]);
1776
1777         luma_mc(s, tmp, tmpstride, ref1->frame,
1778                 &current_mv.mv[1], x0, y0, nPbW, nPbH);
1779
1780         if ((s->sh.slice_type == P_SLICE && s->pps->weighted_pred_flag) ||
1781             (s->sh.slice_type == B_SLICE && s->pps->weighted_bipred_flag)) {
1782             s->hevcdsp.weighted_pred(s->sh.luma_log2_weight_denom,
1783                                       s->sh.luma_weight_l1[current_mv.ref_idx[1]],
1784                                       s->sh.luma_offset_l1[current_mv.ref_idx[1]],
1785                                       dst0, s->frame->linesize[0], tmp, tmpstride,
1786                                       nPbW, nPbH);
1787         } else {
1788             s->hevcdsp.put_unweighted_pred(dst0, s->frame->linesize[0], tmp, tmpstride, nPbW, nPbH);
1789         }
1790
1791         chroma_mc(s, tmp, tmp2, tmpstride, ref1->frame,
1792                   &current_mv.mv[1], x0/2, y0/2, nPbW/2, nPbH/2);
1793
1794         if ((s->sh.slice_type == P_SLICE && s->pps->weighted_pred_flag) ||
1795             (s->sh.slice_type == B_SLICE && s->pps->weighted_bipred_flag)) {
1796             s->hevcdsp.weighted_pred(s->sh.chroma_log2_weight_denom,
1797                                      s->sh.chroma_weight_l1[current_mv.ref_idx[1]][0],
1798                                      s->sh.chroma_offset_l1[current_mv.ref_idx[1]][0],
1799                                      dst1, s->frame->linesize[1], tmp, tmpstride, nPbW/2, nPbH/2);
1800             s->hevcdsp.weighted_pred(s->sh.chroma_log2_weight_denom,
1801                                      s->sh.chroma_weight_l1[current_mv.ref_idx[1]][1],
1802                                      s->sh.chroma_offset_l1[current_mv.ref_idx[1]][1],
1803                                      dst2, s->frame->linesize[2], tmp2, tmpstride, nPbW/2, nPbH/2);
1804         } else {
1805             s->hevcdsp.put_unweighted_pred(dst1, s->frame->linesize[1], tmp, tmpstride, nPbW/2, nPbH/2);
1806             s->hevcdsp.put_unweighted_pred(dst2, s->frame->linesize[2], tmp2, tmpstride, nPbW/2, nPbH/2);
1807         }
1808     } else if (current_mv.pred_flag[0] && current_mv.pred_flag[1]) {
1809         DECLARE_ALIGNED(16, int16_t, tmp [MAX_PB_SIZE * MAX_PB_SIZE]);
1810         DECLARE_ALIGNED(16, int16_t, tmp2[MAX_PB_SIZE * MAX_PB_SIZE]);
1811         DECLARE_ALIGNED(16, int16_t, tmp3[MAX_PB_SIZE * MAX_PB_SIZE]);
1812         DECLARE_ALIGNED(16, int16_t, tmp4[MAX_PB_SIZE * MAX_PB_SIZE]);
1813
1814         luma_mc(s, tmp, tmpstride, ref0->frame,
1815                 &current_mv.mv[0], x0, y0, nPbW, nPbH);
1816         luma_mc(s, tmp2, tmpstride, ref1->frame,
1817                 &current_mv.mv[1], x0, y0, nPbW, nPbH);
1818
1819         if ((s->sh.slice_type == P_SLICE && s->pps->weighted_pred_flag) ||
1820             (s->sh.slice_type == B_SLICE && s->pps->weighted_bipred_flag)) {
1821             s->hevcdsp.weighted_pred_avg(s->sh.luma_log2_weight_denom,
1822                                          s->sh.luma_weight_l0[current_mv.ref_idx[0]],
1823                                          s->sh.luma_weight_l1[current_mv.ref_idx[1]],
1824                                          s->sh.luma_offset_l0[current_mv.ref_idx[0]],
1825                                          s->sh.luma_offset_l1[current_mv.ref_idx[1]],
1826                                          dst0, s->frame->linesize[0],
1827                                          tmp, tmp2, tmpstride, nPbW, nPbH);
1828         } else {
1829             s->hevcdsp.put_weighted_pred_avg(dst0, s->frame->linesize[0],
1830                                              tmp, tmp2, tmpstride, nPbW, nPbH);
1831         }
1832
1833         chroma_mc(s, tmp, tmp2, tmpstride, ref0->frame,
1834                   &current_mv.mv[0], x0 / 2, y0 / 2, nPbW / 2, nPbH / 2);
1835         chroma_mc(s, tmp3, tmp4, tmpstride, ref1->frame,
1836                   &current_mv.mv[1], x0 / 2, y0 / 2, nPbW / 2, nPbH / 2);
1837
1838         if ((s->sh.slice_type == P_SLICE && s->pps->weighted_pred_flag) ||
1839             (s->sh.slice_type == B_SLICE && s->pps->weighted_bipred_flag)) {
1840             s->hevcdsp.weighted_pred_avg(s->sh.chroma_log2_weight_denom,
1841                                          s->sh.chroma_weight_l0[current_mv.ref_idx[0]][0],
1842                                          s->sh.chroma_weight_l1[current_mv.ref_idx[1]][0],
1843                                          s->sh.chroma_offset_l0[current_mv.ref_idx[0]][0],
1844                                          s->sh.chroma_offset_l1[current_mv.ref_idx[1]][0],
1845                                          dst1, s->frame->linesize[1], tmp, tmp3,
1846                                          tmpstride, nPbW / 2, nPbH / 2);
1847             s->hevcdsp.weighted_pred_avg(s->sh.chroma_log2_weight_denom,
1848                                          s->sh.chroma_weight_l0[current_mv.ref_idx[0]][1],
1849                                          s->sh.chroma_weight_l1[current_mv.ref_idx[1]][1],
1850                                          s->sh.chroma_offset_l0[current_mv.ref_idx[0]][1],
1851                                          s->sh.chroma_offset_l1[current_mv.ref_idx[1]][1],
1852                                          dst2, s->frame->linesize[2], tmp2, tmp4,
1853                                          tmpstride, nPbW / 2, nPbH / 2);
1854         } else {
1855             s->hevcdsp.put_weighted_pred_avg(dst1, s->frame->linesize[1], tmp, tmp3, tmpstride, nPbW/2, nPbH/2);
1856             s->hevcdsp.put_weighted_pred_avg(dst2, s->frame->linesize[2], tmp2, tmp4, tmpstride, nPbW/2, nPbH/2);
1857         }
1858     }
1859 }
1860
1861 /**
1862  * 8.4.1
1863  */
1864 static int luma_intra_pred_mode(HEVCContext *s, int x0, int y0, int pu_size,
1865                                 int prev_intra_luma_pred_flag)
1866 {
1867     HEVCLocalContext *lc = &s->HEVClc;
1868     int x_pu             = x0 >> s->sps->log2_min_pu_size;
1869     int y_pu             = y0 >> s->sps->log2_min_pu_size;
1870     int min_pu_width     = s->sps->min_pu_width;
1871     int size_in_pus      = pu_size >> s->sps->log2_min_pu_size;
1872     int x0b              = x0 & ((1 << s->sps->log2_ctb_size) - 1);
1873     int y0b              = y0 & ((1 << s->sps->log2_ctb_size) - 1);
1874
1875     int cand_up   = (lc->ctb_up_flag || y0b) ?
1876                     s->tab_ipm[(y_pu - 1) * min_pu_width + x_pu] : INTRA_DC;
1877     int cand_left = (lc->ctb_left_flag || x0b) ?
1878                     s->tab_ipm[y_pu * min_pu_width + x_pu - 1]   : INTRA_DC;
1879
1880     int y_ctb = (y0 >> (s->sps->log2_ctb_size)) << (s->sps->log2_ctb_size);
1881
1882     MvField *tab_mvf = s->ref->tab_mvf;
1883     int intra_pred_mode;
1884     int candidate[3];
1885     int i, j;
1886
1887     // intra_pred_mode prediction does not cross vertical CTB boundaries
1888     if ((y0 - 1) < y_ctb)
1889         cand_up = INTRA_DC;
1890
1891     if (cand_left == cand_up) {
1892         if (cand_left < 2) {
1893             candidate[0] = INTRA_PLANAR;
1894             candidate[1] = INTRA_DC;
1895             candidate[2] = INTRA_ANGULAR_26;
1896         } else {
1897             candidate[0] = cand_left;
1898             candidate[1] = 2 + ((cand_left - 2 - 1 + 32) & 31);
1899             candidate[2] = 2 + ((cand_left - 2 + 1) & 31);
1900         }
1901     } else {
1902         candidate[0] = cand_left;
1903         candidate[1] = cand_up;
1904         if (candidate[0] != INTRA_PLANAR && candidate[1] != INTRA_PLANAR) {
1905             candidate[2] = INTRA_PLANAR;
1906         } else if (candidate[0] != INTRA_DC && candidate[1] != INTRA_DC) {
1907             candidate[2] = INTRA_DC;
1908         } else {
1909             candidate[2] = INTRA_ANGULAR_26;
1910         }
1911     }
1912
1913     if (prev_intra_luma_pred_flag) {
1914         intra_pred_mode = candidate[lc->pu.mpm_idx];
1915     } else {
1916         if (candidate[0] > candidate[1])
1917             FFSWAP(uint8_t, candidate[0], candidate[1]);
1918         if (candidate[0] > candidate[2])
1919             FFSWAP(uint8_t, candidate[0], candidate[2]);
1920         if (candidate[1] > candidate[2])
1921             FFSWAP(uint8_t, candidate[1], candidate[2]);
1922
1923         intra_pred_mode = lc->pu.rem_intra_luma_pred_mode;
1924         for (i = 0; i < 3; i++)
1925             if (intra_pred_mode >= candidate[i])
1926                 intra_pred_mode++;
1927     }
1928
1929     /* write the intra prediction units into the mv array */
1930     if (!size_in_pus)
1931         size_in_pus = 1;
1932     for (i = 0; i < size_in_pus; i++) {
1933         memset(&s->tab_ipm[(y_pu + i) * min_pu_width + x_pu],
1934                intra_pred_mode, size_in_pus);
1935
1936         for (j = 0; j < size_in_pus; j++) {
1937             tab_mvf[(y_pu + j) * min_pu_width + x_pu + i].is_intra     = 1;
1938             tab_mvf[(y_pu + j) * min_pu_width + x_pu + i].pred_flag[0] = 0;
1939             tab_mvf[(y_pu + j) * min_pu_width + x_pu + i].pred_flag[1] = 0;
1940             tab_mvf[(y_pu + j) * min_pu_width + x_pu + i].ref_idx[0]   = 0;
1941             tab_mvf[(y_pu + j) * min_pu_width + x_pu + i].ref_idx[1]   = 0;
1942             tab_mvf[(y_pu + j) * min_pu_width + x_pu + i].mv[0].x      = 0;
1943             tab_mvf[(y_pu + j) * min_pu_width + x_pu + i].mv[0].y      = 0;
1944             tab_mvf[(y_pu + j) * min_pu_width + x_pu + i].mv[1].x      = 0;
1945             tab_mvf[(y_pu + j) * min_pu_width + x_pu + i].mv[1].y      = 0;
1946         }
1947     }
1948
1949     return intra_pred_mode;
1950 }
1951
1952 static av_always_inline void set_ct_depth(HEVCContext *s, int x0, int y0,
1953                                           int log2_cb_size, int ct_depth)
1954 {
1955     int length = (1 << log2_cb_size) >> s->sps->log2_min_cb_size;
1956     int x_cb   = x0 >> s->sps->log2_min_cb_size;
1957     int y_cb   = y0 >> s->sps->log2_min_cb_size;
1958     int y;
1959
1960     for (y = 0; y < length; y++)
1961         memset(&s->tab_ct_depth[(y_cb + y) * s->sps->min_cb_width + x_cb],
1962                ct_depth, length);
1963 }
1964
1965 static void intra_prediction_unit(HEVCContext *s, int x0, int y0,
1966                                   int log2_cb_size)
1967 {
1968     HEVCLocalContext *lc = &s->HEVClc;
1969     static const uint8_t intra_chroma_table[4] = { 0, 26, 10, 1 };
1970     uint8_t prev_intra_luma_pred_flag[4];
1971     int split   = lc->cu.part_mode == PART_NxN;
1972     int pb_size = (1 << log2_cb_size) >> split;
1973     int side    = split + 1;
1974     int chroma_mode;
1975     int i, j;
1976
1977     for (i = 0; i < side; i++)
1978         for (j = 0; j < side; j++)
1979             prev_intra_luma_pred_flag[2 * i + j] = ff_hevc_prev_intra_luma_pred_flag_decode(s);
1980
1981     for (i = 0; i < side; i++) {
1982         for (j = 0; j < side; j++) {
1983             if (prev_intra_luma_pred_flag[2 * i + j])
1984                 lc->pu.mpm_idx = ff_hevc_mpm_idx_decode(s);
1985             else
1986                 lc->pu.rem_intra_luma_pred_mode = ff_hevc_rem_intra_luma_pred_mode_decode(s);
1987
1988             lc->pu.intra_pred_mode[2 * i + j] =
1989                 luma_intra_pred_mode(s, x0 + pb_size * j, y0 + pb_size * i, pb_size,
1990                                      prev_intra_luma_pred_flag[2 * i + j]);
1991         }
1992     }
1993
1994     chroma_mode = ff_hevc_intra_chroma_pred_mode_decode(s);
1995     if (chroma_mode != 4) {
1996         if (lc->pu.intra_pred_mode[0] == intra_chroma_table[chroma_mode])
1997             lc->pu.intra_pred_mode_c = 34;
1998         else
1999             lc->pu.intra_pred_mode_c = intra_chroma_table[chroma_mode];
2000     } else {
2001         lc->pu.intra_pred_mode_c = lc->pu.intra_pred_mode[0];
2002     }
2003 }
2004
2005 static void intra_prediction_unit_default_value(HEVCContext *s,
2006                                                 int x0, int y0,
2007                                                 int log2_cb_size)
2008 {
2009     HEVCLocalContext *lc = &s->HEVClc;
2010     int pb_size          = 1 << log2_cb_size;
2011     int size_in_pus      = pb_size >> s->sps->log2_min_pu_size;
2012     int min_pu_width     = s->sps->min_pu_width;
2013     MvField *tab_mvf     = s->ref->tab_mvf;
2014     int x_pu             = x0 >> s->sps->log2_min_pu_size;
2015     int y_pu             = y0 >> s->sps->log2_min_pu_size;
2016     int j, k;
2017
2018     if (size_in_pus == 0)
2019         size_in_pus = 1;
2020     for (j = 0; j < size_in_pus; j++) {
2021         memset(&s->tab_ipm[(y_pu + j) * min_pu_width + x_pu], INTRA_DC, size_in_pus);
2022         for (k = 0; k < size_in_pus; k++)
2023             tab_mvf[(y_pu + j) * min_pu_width + x_pu + k].is_intra = lc->cu.pred_mode == MODE_INTRA;
2024     }
2025 }
2026
2027 static int hls_coding_unit(HEVCContext *s, int x0, int y0, int log2_cb_size)
2028 {
2029     int cb_size          = 1 << log2_cb_size;
2030     HEVCLocalContext *lc = &s->HEVClc;
2031     int log2_min_cb_size = s->sps->log2_min_cb_size;
2032     int length           = cb_size >> log2_min_cb_size;
2033     int min_cb_width     = s->sps->min_cb_width;
2034     int x_cb             = x0 >> log2_min_cb_size;
2035     int y_cb             = y0 >> log2_min_cb_size;
2036     int x, y, ret;
2037
2038     lc->cu.x                = x0;
2039     lc->cu.y                = y0;
2040     lc->cu.pred_mode        = MODE_INTRA;
2041     lc->cu.part_mode        = PART_2Nx2N;
2042     lc->cu.intra_split_flag = 0;
2043
2044     SAMPLE_CTB(s->skip_flag, x_cb, y_cb) = 0;
2045     for (x = 0; x < 4; x++)
2046         lc->pu.intra_pred_mode[x] = 1;
2047     if (s->pps->transquant_bypass_enable_flag) {
2048         lc->cu.cu_transquant_bypass_flag = ff_hevc_cu_transquant_bypass_flag_decode(s);
2049         if (lc->cu.cu_transquant_bypass_flag)
2050             set_deblocking_bypass(s, x0, y0, log2_cb_size);
2051     } else
2052         lc->cu.cu_transquant_bypass_flag = 0;
2053
2054     if (s->sh.slice_type != I_SLICE) {
2055         uint8_t skip_flag = ff_hevc_skip_flag_decode(s, x0, y0, x_cb, y_cb);
2056
2057         x = y_cb * min_cb_width + x_cb;
2058         for (y = 0; y < length; y++) {
2059             memset(&s->skip_flag[x], skip_flag, length);
2060             x += min_cb_width;
2061         }
2062         lc->cu.pred_mode = skip_flag ? MODE_SKIP : MODE_INTER;
2063     }
2064
2065     if (SAMPLE_CTB(s->skip_flag, x_cb, y_cb)) {
2066         hls_prediction_unit(s, x0, y0, cb_size, cb_size, log2_cb_size, 0);
2067         intra_prediction_unit_default_value(s, x0, y0, log2_cb_size);
2068
2069         if (!s->sh.disable_deblocking_filter_flag)
2070             ff_hevc_deblocking_boundary_strengths(s, x0, y0, log2_cb_size);
2071     } else {
2072         int pcm_flag = 0;
2073
2074         if (s->sh.slice_type != I_SLICE)
2075             lc->cu.pred_mode = ff_hevc_pred_mode_decode(s);
2076         if (lc->cu.pred_mode != MODE_INTRA ||
2077             log2_cb_size == s->sps->log2_min_cb_size) {
2078             lc->cu.part_mode        = ff_hevc_part_mode_decode(s, log2_cb_size);
2079             lc->cu.intra_split_flag = lc->cu.part_mode == PART_NxN &&
2080                                       lc->cu.pred_mode == MODE_INTRA;
2081         }
2082
2083         if (lc->cu.pred_mode == MODE_INTRA) {
2084             if (lc->cu.part_mode == PART_2Nx2N && s->sps->pcm_enabled_flag &&
2085                 log2_cb_size >= s->sps->pcm.log2_min_pcm_cb_size &&
2086                 log2_cb_size <= s->sps->pcm.log2_max_pcm_cb_size) {
2087                 pcm_flag = ff_hevc_pcm_flag_decode(s);
2088             }
2089             if (pcm_flag) {
2090                 intra_prediction_unit_default_value(s, x0, y0, log2_cb_size);
2091                 ret = hls_pcm_sample(s, x0, y0, log2_cb_size);
2092                 if (s->sps->pcm.loop_filter_disable_flag)
2093                     set_deblocking_bypass(s, x0, y0, log2_cb_size);
2094
2095                 if (ret < 0)
2096                     return ret;
2097             } else {
2098                 intra_prediction_unit(s, x0, y0, log2_cb_size);
2099             }
2100         } else {
2101             intra_prediction_unit_default_value(s, x0, y0, log2_cb_size);
2102             switch (lc->cu.part_mode) {
2103             case PART_2Nx2N:
2104                 hls_prediction_unit(s, x0, y0, cb_size, cb_size, log2_cb_size, 0);
2105                 break;
2106             case PART_2NxN:
2107                 hls_prediction_unit(s, x0, y0,               cb_size, cb_size / 2, log2_cb_size, 0);
2108                 hls_prediction_unit(s, x0, y0 + cb_size / 2, cb_size, cb_size / 2, log2_cb_size, 1);
2109                 break;
2110             case PART_Nx2N:
2111                 hls_prediction_unit(s, x0,               y0, cb_size / 2, cb_size, log2_cb_size, 0);
2112                 hls_prediction_unit(s, x0 + cb_size / 2, y0, cb_size / 2, cb_size, log2_cb_size, 1);
2113                 break;
2114             case PART_2NxnU:
2115                 hls_prediction_unit(s, x0, y0,               cb_size, cb_size     / 4, log2_cb_size, 0);
2116                 hls_prediction_unit(s, x0, y0 + cb_size / 4, cb_size, cb_size * 3 / 4, log2_cb_size, 1);
2117                 break;
2118             case PART_2NxnD:
2119                 hls_prediction_unit(s, x0, y0,                   cb_size, cb_size * 3 / 4, log2_cb_size, 0);
2120                 hls_prediction_unit(s, x0, y0 + cb_size * 3 / 4, cb_size, cb_size     / 4, log2_cb_size, 1);
2121                 break;
2122             case PART_nLx2N:
2123                 hls_prediction_unit(s, x0,               y0, cb_size     / 4, cb_size, log2_cb_size, 0);
2124                 hls_prediction_unit(s, x0 + cb_size / 4, y0, cb_size * 3 / 4, cb_size, log2_cb_size, 1);
2125                 break;
2126             case PART_nRx2N:
2127                 hls_prediction_unit(s, x0,                   y0, cb_size * 3 / 4, cb_size, log2_cb_size, 0);
2128                 hls_prediction_unit(s, x0 + cb_size * 3 / 4, y0, cb_size     / 4, cb_size, log2_cb_size, 1);
2129                 break;
2130             case PART_NxN:
2131                 hls_prediction_unit(s, x0,               y0,               cb_size / 2, cb_size / 2, log2_cb_size, 0);
2132                 hls_prediction_unit(s, x0 + cb_size / 2, y0,               cb_size / 2, cb_size / 2, log2_cb_size, 1);
2133                 hls_prediction_unit(s, x0,               y0 + cb_size / 2, cb_size / 2, cb_size / 2, log2_cb_size, 2);
2134                 hls_prediction_unit(s, x0 + cb_size / 2, y0 + cb_size / 2, cb_size / 2, cb_size / 2, log2_cb_size, 3);
2135                 break;
2136             }
2137         }
2138
2139         if (!pcm_flag) {
2140             int rqt_root_cbf = 1;
2141
2142             if (lc->cu.pred_mode != MODE_INTRA &&
2143                 !(lc->cu.part_mode == PART_2Nx2N && lc->pu.merge_flag)) {
2144                 rqt_root_cbf = ff_hevc_no_residual_syntax_flag_decode(s);
2145             }
2146             if (rqt_root_cbf) {
2147                 lc->cu.max_trafo_depth = lc->cu.pred_mode == MODE_INTRA ?
2148                                          s->sps->max_transform_hierarchy_depth_intra + lc->cu.intra_split_flag :
2149                                          s->sps->max_transform_hierarchy_depth_inter;
2150                 ret = hls_transform_tree(s, x0, y0, x0, y0, x0, y0,
2151                                          log2_cb_size,
2152                                          log2_cb_size, 0, 0, 0, 0);
2153                 if (ret < 0)
2154                     return ret;
2155             } else {
2156                 if (!s->sh.disable_deblocking_filter_flag)
2157                     ff_hevc_deblocking_boundary_strengths(s, x0, y0, log2_cb_size);
2158             }
2159         }
2160     }
2161
2162     if (s->pps->cu_qp_delta_enabled_flag && lc->tu.is_cu_qp_delta_coded == 0)
2163         ff_hevc_set_qPy(s, x0, y0, x0, y0, log2_cb_size);
2164
2165     x = y_cb * min_cb_width + x_cb;
2166     for (y = 0; y < length; y++) {
2167         memset(&s->qp_y_tab[x], lc->qp_y, length);
2168         x += min_cb_width;
2169     }
2170
2171     set_ct_depth(s, x0, y0, log2_cb_size, lc->ct.depth);
2172
2173     return 0;
2174 }
2175
2176 static int hls_coding_quadtree(HEVCContext *s, int x0, int y0,
2177                                int log2_cb_size, int cb_depth)
2178 {
2179     HEVCLocalContext *lc = &s->HEVClc;
2180     const int cb_size    = 1 << log2_cb_size;
2181     int split_cu;
2182
2183     lc->ct.depth = cb_depth;
2184     if (x0 + cb_size <= s->sps->width  &&
2185         y0 + cb_size <= s->sps->height &&
2186         log2_cb_size > s->sps->log2_min_cb_size) {
2187         split_cu = ff_hevc_split_coding_unit_flag_decode(s, cb_depth, x0, y0);
2188     } else {
2189         split_cu = (log2_cb_size > s->sps->log2_min_cb_size);
2190     }
2191     if (s->pps->cu_qp_delta_enabled_flag &&
2192         log2_cb_size >= s->sps->log2_ctb_size - s->pps->diff_cu_qp_delta_depth) {
2193         lc->tu.is_cu_qp_delta_coded = 0;
2194         lc->tu.cu_qp_delta          = 0;
2195     }
2196
2197     if (split_cu) {
2198         const int cb_size_split = cb_size >> 1;
2199         const int x1 = x0 + cb_size_split;
2200         const int y1 = y0 + cb_size_split;
2201
2202         log2_cb_size--;
2203         cb_depth++;
2204
2205 #define SUBDIVIDE(x, y)                                                \
2206 do {                                                                   \
2207     if (x < s->sps->width && y < s->sps->height) {                     \
2208         int ret = hls_coding_quadtree(s, x, y, log2_cb_size, cb_depth);\
2209         if (ret < 0)                                                   \
2210             return ret;                                                \
2211     }                                                                  \
2212 } while (0)
2213
2214         SUBDIVIDE(x0, y0);
2215         SUBDIVIDE(x1, y0);
2216         SUBDIVIDE(x0, y1);
2217         SUBDIVIDE(x1, y1);
2218     } else {
2219         int ret = hls_coding_unit(s, x0, y0, log2_cb_size);
2220         if (ret < 0)
2221             return ret;
2222     }
2223
2224     return 0;
2225 }
2226
2227 static void hls_decode_neighbour(HEVCContext *s, int x_ctb, int y_ctb,
2228                                  int ctb_addr_ts)
2229 {
2230     HEVCLocalContext *lc  = &s->HEVClc;
2231     int ctb_size          = 1 << s->sps->log2_ctb_size;
2232     int ctb_addr_rs       = s->pps->ctb_addr_ts_to_rs[ctb_addr_ts];
2233     int ctb_addr_in_slice = ctb_addr_rs - s->sh.slice_addr;
2234
2235     s->tab_slice_address[ctb_addr_rs] = s->sh.slice_addr;
2236
2237     if (s->pps->entropy_coding_sync_enabled_flag) {
2238         if (x_ctb == 0 && (y_ctb & (ctb_size - 1)) == 0)
2239             lc->first_qp_group = 1;
2240         lc->end_of_tiles_x = s->sps->width;
2241     } else if (s->pps->tiles_enabled_flag) {
2242         if (ctb_addr_ts && s->pps->tile_id[ctb_addr_ts] != s->pps->tile_id[ctb_addr_ts - 1]) {
2243             int idxX = s->pps->col_idxX[x_ctb >> s->sps->log2_ctb_size];
2244             lc->start_of_tiles_x = x_ctb;
2245             lc->end_of_tiles_x   = x_ctb + (s->pps->column_width[idxX] << s->sps->log2_ctb_size);
2246             lc->first_qp_group   = 1;
2247         }
2248     } else {
2249         lc->end_of_tiles_x = s->sps->width;
2250     }
2251
2252     lc->end_of_tiles_y = FFMIN(y_ctb + ctb_size, s->sps->height);
2253
2254     lc->boundary_flags = 0;
2255     if (s->pps->tiles_enabled_flag) {
2256         if (x_ctb > 0 && s->pps->tile_id[ctb_addr_ts] != s->pps->tile_id[s->pps->ctb_addr_rs_to_ts[ctb_addr_rs - 1]])
2257             lc->boundary_flags |= BOUNDARY_LEFT_TILE;
2258         if (x_ctb > 0 && s->tab_slice_address[ctb_addr_rs] != s->tab_slice_address[ctb_addr_rs - 1])
2259             lc->boundary_flags |= BOUNDARY_LEFT_SLICE;
2260         if (y_ctb > 0 && s->pps->tile_id[ctb_addr_ts] != s->pps->tile_id[s->pps->ctb_addr_rs_to_ts[ctb_addr_rs - s->sps->ctb_width]])
2261             lc->boundary_flags |= BOUNDARY_UPPER_TILE;
2262         if (y_ctb > 0 && s->tab_slice_address[ctb_addr_rs] != s->tab_slice_address[ctb_addr_rs - s->sps->ctb_width])
2263             lc->boundary_flags |= BOUNDARY_UPPER_SLICE;
2264     } else {
2265         if (!ctb_addr_in_slice > 0)
2266             lc->boundary_flags |= BOUNDARY_LEFT_SLICE;
2267         if (ctb_addr_in_slice < s->sps->ctb_width)
2268             lc->boundary_flags |= BOUNDARY_UPPER_SLICE;
2269     }
2270
2271     lc->ctb_left_flag = ((x_ctb > 0) && (ctb_addr_in_slice > 0) && !(lc->boundary_flags & BOUNDARY_LEFT_TILE));
2272     lc->ctb_up_flag   = ((y_ctb > 0) && (ctb_addr_in_slice >= s->sps->ctb_width) && !(lc->boundary_flags & BOUNDARY_UPPER_TILE));
2273     lc->ctb_up_right_flag = ((y_ctb > 0)  && (ctb_addr_in_slice+1 >= s->sps->ctb_width) && (s->pps->tile_id[ctb_addr_ts] == s->pps->tile_id[s->pps->ctb_addr_rs_to_ts[ctb_addr_rs+1 - s->sps->ctb_width]]));
2274     lc->ctb_up_left_flag = ((x_ctb > 0) && (y_ctb > 0)  && (ctb_addr_in_slice-1 >= s->sps->ctb_width) && (s->pps->tile_id[ctb_addr_ts] == s->pps->tile_id[s->pps->ctb_addr_rs_to_ts[ctb_addr_rs-1 - s->sps->ctb_width]]));
2275 }
2276
2277 static int hls_slice_data(HEVCContext *s)
2278 {
2279     int ctb_size    = 1 << s->sps->log2_ctb_size;
2280     int more_data   = 1;
2281     int x_ctb       = 0;
2282     int y_ctb       = 0;
2283     int ctb_addr_ts = s->pps->ctb_addr_rs_to_ts[s->sh.slice_ctb_addr_rs];
2284     int ret;
2285
2286     while (more_data && ctb_addr_ts < s->sps->ctb_size) {
2287         int ctb_addr_rs = s->pps->ctb_addr_ts_to_rs[ctb_addr_ts];
2288
2289         x_ctb = (ctb_addr_rs % ((s->sps->width + ctb_size - 1) >> s->sps->log2_ctb_size)) << s->sps->log2_ctb_size;
2290         y_ctb = (ctb_addr_rs / ((s->sps->width + ctb_size - 1) >> s->sps->log2_ctb_size)) << s->sps->log2_ctb_size;
2291         hls_decode_neighbour(s, x_ctb, y_ctb, ctb_addr_ts);
2292
2293         ff_hevc_cabac_init(s, ctb_addr_ts);
2294
2295         hls_sao_param(s, x_ctb >> s->sps->log2_ctb_size, y_ctb >> s->sps->log2_ctb_size);
2296
2297         s->deblock[ctb_addr_rs].beta_offset = s->sh.beta_offset;
2298         s->deblock[ctb_addr_rs].tc_offset   = s->sh.tc_offset;
2299         s->filter_slice_edges[ctb_addr_rs]  = s->sh.slice_loop_filter_across_slices_enabled_flag;
2300
2301         ret = hls_coding_quadtree(s, x_ctb, y_ctb, s->sps->log2_ctb_size, 0);
2302         if (ret < 0)
2303             return ret;
2304         more_data = !ff_hevc_end_of_slice_flag_decode(s);
2305
2306         ctb_addr_ts++;
2307         ff_hevc_save_states(s, ctb_addr_ts);
2308         ff_hevc_hls_filters(s, x_ctb, y_ctb, ctb_size);
2309     }
2310
2311     if (x_ctb + ctb_size >= s->sps->width &&
2312         y_ctb + ctb_size >= s->sps->height)
2313         ff_hevc_hls_filter(s, x_ctb, y_ctb);
2314
2315     return ctb_addr_ts;
2316 }
2317
2318 /**
2319  * @return AVERROR_INVALIDDATA if the packet is not a valid NAL unit,
2320  * 0 if the unit should be skipped, 1 otherwise
2321  */
2322 static int hls_nal_unit(HEVCContext *s)
2323 {
2324     GetBitContext *gb = &s->HEVClc.gb;
2325     int nuh_layer_id;
2326
2327     if (get_bits1(gb) != 0)
2328         return AVERROR_INVALIDDATA;
2329
2330     s->nal_unit_type = get_bits(gb, 6);
2331
2332     nuh_layer_id   = get_bits(gb, 6);
2333     s->temporal_id = get_bits(gb, 3) - 1;
2334     if (s->temporal_id < 0)
2335         return AVERROR_INVALIDDATA;
2336
2337     av_log(s->avctx, AV_LOG_DEBUG,
2338            "nal_unit_type: %d, nuh_layer_id: %dtemporal_id: %d\n",
2339            s->nal_unit_type, nuh_layer_id, s->temporal_id);
2340
2341     return nuh_layer_id == 0;
2342 }
2343
2344 static void restore_tqb_pixels(HEVCContext *s)
2345 {
2346     int min_pu_size = 1 << s->sps->log2_min_pu_size;
2347     int x, y, c_idx;
2348
2349     for (c_idx = 0; c_idx < 3; c_idx++) {
2350         ptrdiff_t stride = s->frame->linesize[c_idx];
2351         int hshift       = s->sps->hshift[c_idx];
2352         int vshift       = s->sps->vshift[c_idx];
2353         for (y = 0; y < s->sps->min_pu_height; y++) {
2354             for (x = 0; x < s->sps->min_pu_width; x++) {
2355                 if (s->is_pcm[y * s->sps->min_pu_width + x]) {
2356                     int n;
2357                     int len      = min_pu_size >> hshift;
2358                     uint8_t *src = &s->frame->data[c_idx][((y << s->sps->log2_min_pu_size) >> vshift) * stride + (((x << s->sps->log2_min_pu_size) >> hshift) << s->sps->pixel_shift)];
2359                     uint8_t *dst = &s->sao_frame->data[c_idx][((y << s->sps->log2_min_pu_size) >> vshift) * stride + (((x << s->sps->log2_min_pu_size) >> hshift) << s->sps->pixel_shift)];
2360                     for (n = 0; n < (min_pu_size >> vshift); n++) {
2361                         memcpy(dst, src, len);
2362                         src += stride;
2363                         dst += stride;
2364                     }
2365                 }
2366             }
2367         }
2368     }
2369 }
2370
2371 static int set_side_data(HEVCContext *s)
2372 {
2373     AVFrame *out = s->ref->frame;
2374
2375     if (s->sei_frame_packing_present &&
2376         s->frame_packing_arrangement_type >= 3 &&
2377         s->frame_packing_arrangement_type <= 5 &&
2378         s->content_interpretation_type > 0 &&
2379         s->content_interpretation_type < 3) {
2380         AVStereo3D *stereo = av_stereo3d_create_side_data(out);
2381         if (!stereo)
2382             return AVERROR(ENOMEM);
2383
2384         switch (s->frame_packing_arrangement_type) {
2385         case 3:
2386             if (s->quincunx_subsampling)
2387                 stereo->type = AV_STEREO3D_SIDEBYSIDE_QUINCUNX;
2388             else
2389                 stereo->type = AV_STEREO3D_SIDEBYSIDE;
2390             break;
2391         case 4:
2392             stereo->type = AV_STEREO3D_TOPBOTTOM;
2393             break;
2394         case 5:
2395             stereo->type = AV_STEREO3D_FRAMESEQUENCE;
2396             break;
2397         }
2398
2399         if (s->content_interpretation_type == 2)
2400             stereo->flags = AV_STEREO3D_FLAG_INVERT;
2401     }
2402
2403     if (s->sei_display_orientation_present &&
2404         (s->sei_anticlockwise_rotation || s->sei_hflip || s->sei_vflip)) {
2405         double angle = s->sei_anticlockwise_rotation * 360 / (double) (1 << 16);
2406         AVFrameSideData *rotation = av_frame_new_side_data(out,
2407                                                            AV_FRAME_DATA_DISPLAYMATRIX,
2408                                                            sizeof(int32_t) * 9);
2409         if (!rotation)
2410             return AVERROR(ENOMEM);
2411
2412         av_display_rotation_set((int32_t *)rotation->data, angle);
2413         av_display_matrix_flip((int32_t *)rotation->data,
2414                                s->sei_hflip, s->sei_vflip);
2415     }
2416
2417     return 0;
2418 }
2419
2420 static int hevc_frame_start(HEVCContext *s)
2421 {
2422     HEVCLocalContext *lc = &s->HEVClc;
2423     int ret;
2424
2425     memset(s->horizontal_bs, 0, 2 * s->bs_width * (s->bs_height + 1));
2426     memset(s->vertical_bs,   0, 2 * s->bs_width * (s->bs_height + 1));
2427     memset(s->cbf_luma,      0, s->sps->min_tb_width * s->sps->min_tb_height);
2428     memset(s->is_pcm,        0, s->sps->min_pu_width * s->sps->min_pu_height);
2429
2430     lc->start_of_tiles_x = 0;
2431     s->is_decoded        = 0;
2432     s->first_nal_type    = s->nal_unit_type;
2433
2434     if (s->pps->tiles_enabled_flag)
2435         lc->end_of_tiles_x = s->pps->column_width[0] << s->sps->log2_ctb_size;
2436
2437     ret = ff_hevc_set_new_ref(s, s->sps->sao_enabled ? &s->sao_frame : &s->frame,
2438                               s->poc);
2439     if (ret < 0)
2440         goto fail;
2441
2442     ret = ff_hevc_frame_rps(s);
2443     if (ret < 0) {
2444         av_log(s->avctx, AV_LOG_ERROR, "Error constructing the frame RPS.\n");
2445         goto fail;
2446     }
2447
2448     s->ref->frame->key_frame = IS_IRAP(s);
2449
2450     ret = set_side_data(s);
2451     if (ret < 0)
2452         goto fail;
2453
2454     av_frame_unref(s->output_frame);
2455     ret = ff_hevc_output_frame(s, s->output_frame, 0);
2456     if (ret < 0)
2457         goto fail;
2458
2459     ff_thread_finish_setup(s->avctx);
2460
2461     return 0;
2462
2463 fail:
2464     if (s->ref)
2465         ff_hevc_unref_frame(s, s->ref, ~0);
2466     s->ref = NULL;
2467     return ret;
2468 }
2469
2470 static int decode_nal_unit(HEVCContext *s, const HEVCNAL *nal)
2471 {
2472     HEVCLocalContext *lc = &s->HEVClc;
2473     GetBitContext *gb    = &lc->gb;
2474     int ctb_addr_ts, ret;
2475
2476     ret = init_get_bits8(gb, nal->data, nal->size);
2477     if (ret < 0)
2478         return ret;
2479
2480     ret = hls_nal_unit(s);
2481     if (ret < 0) {
2482         av_log(s->avctx, AV_LOG_ERROR, "Invalid NAL unit %d, skipping.\n",
2483                s->nal_unit_type);
2484         goto fail;
2485     } else if (!ret)
2486         return 0;
2487
2488     switch (s->nal_unit_type) {
2489     case NAL_VPS:
2490         ret = ff_hevc_decode_nal_vps(s);
2491         if (ret < 0)
2492             goto fail;
2493         break;
2494     case NAL_SPS:
2495         ret = ff_hevc_decode_nal_sps(s);
2496         if (ret < 0)
2497             goto fail;
2498         break;
2499     case NAL_PPS:
2500         ret = ff_hevc_decode_nal_pps(s);
2501         if (ret < 0)
2502             goto fail;
2503         break;
2504     case NAL_SEI_PREFIX:
2505     case NAL_SEI_SUFFIX:
2506         ret = ff_hevc_decode_nal_sei(s);
2507         if (ret < 0)
2508             goto fail;
2509         break;
2510     case NAL_TRAIL_R:
2511     case NAL_TRAIL_N:
2512     case NAL_TSA_N:
2513     case NAL_TSA_R:
2514     case NAL_STSA_N:
2515     case NAL_STSA_R:
2516     case NAL_BLA_W_LP:
2517     case NAL_BLA_W_RADL:
2518     case NAL_BLA_N_LP:
2519     case NAL_IDR_W_RADL:
2520     case NAL_IDR_N_LP:
2521     case NAL_CRA_NUT:
2522     case NAL_RADL_N:
2523     case NAL_RADL_R:
2524     case NAL_RASL_N:
2525     case NAL_RASL_R:
2526         ret = hls_slice_header(s);
2527         if (ret < 0)
2528             return ret;
2529
2530         if (s->max_ra == INT_MAX) {
2531             if (s->nal_unit_type == NAL_CRA_NUT || IS_BLA(s)) {
2532                 s->max_ra = s->poc;
2533             } else {
2534                 if (IS_IDR(s))
2535                     s->max_ra = INT_MIN;
2536             }
2537         }
2538
2539         if ((s->nal_unit_type == NAL_RASL_R || s->nal_unit_type == NAL_RASL_N) &&
2540             s->poc <= s->max_ra) {
2541             s->is_decoded = 0;
2542             break;
2543         } else {
2544             if (s->nal_unit_type == NAL_RASL_R && s->poc > s->max_ra)
2545                 s->max_ra = INT_MIN;
2546         }
2547
2548         if (s->sh.first_slice_in_pic_flag) {
2549             ret = hevc_frame_start(s);
2550             if (ret < 0)
2551                 return ret;
2552         } else if (!s->ref) {
2553             av_log(s->avctx, AV_LOG_ERROR, "First slice in a frame missing.\n");
2554             goto fail;
2555         }
2556
2557         if (s->nal_unit_type != s->first_nal_type) {
2558             av_log(s->avctx, AV_LOG_ERROR,
2559                    "Non-matching NAL types of the VCL NALUs: %d %d\n",
2560                    s->first_nal_type, s->nal_unit_type);
2561             return AVERROR_INVALIDDATA;
2562         }
2563
2564         if (!s->sh.dependent_slice_segment_flag &&
2565             s->sh.slice_type != I_SLICE) {
2566             ret = ff_hevc_slice_rpl(s);
2567             if (ret < 0) {
2568                 av_log(s->avctx, AV_LOG_WARNING,
2569                        "Error constructing the reference lists for the current slice.\n");
2570                 goto fail;
2571             }
2572         }
2573
2574         ctb_addr_ts = hls_slice_data(s);
2575         if (ctb_addr_ts >= (s->sps->ctb_width * s->sps->ctb_height)) {
2576             s->is_decoded = 1;
2577             if ((s->pps->transquant_bypass_enable_flag ||
2578                  (s->sps->pcm.loop_filter_disable_flag && s->sps->pcm_enabled_flag)) &&
2579                 s->sps->sao_enabled)
2580                 restore_tqb_pixels(s);
2581         }
2582
2583         if (ctb_addr_ts < 0) {
2584             ret = ctb_addr_ts;
2585             goto fail;
2586         }
2587         break;
2588     case NAL_EOS_NUT:
2589     case NAL_EOB_NUT:
2590         s->seq_decode = (s->seq_decode + 1) & 0xff;
2591         s->max_ra     = INT_MAX;
2592         break;
2593     case NAL_AUD:
2594     case NAL_FD_NUT:
2595         break;
2596     default:
2597         av_log(s->avctx, AV_LOG_INFO,
2598                "Skipping NAL unit %d\n", s->nal_unit_type);
2599     }
2600
2601     return 0;
2602 fail:
2603     if (s->avctx->err_recognition & AV_EF_EXPLODE)
2604         return ret;
2605     return 0;
2606 }
2607
2608 /* FIXME: This is adapted from ff_h264_decode_nal, avoiding duplication
2609  * between these functions would be nice. */
2610 static int extract_rbsp(const uint8_t *src, int length,
2611                         HEVCNAL *nal)
2612 {
2613     int i, si, di;
2614     uint8_t *dst;
2615
2616 #define STARTCODE_TEST                                                  \
2617         if (i + 2 < length && src[i + 1] == 0 && src[i + 2] <= 3) {     \
2618             if (src[i + 2] != 3) {                                      \
2619                 /* startcode, so we must be past the end */             \
2620                 length = i;                                             \
2621             }                                                           \
2622             break;                                                      \
2623         }
2624 #if HAVE_FAST_UNALIGNED
2625 #define FIND_FIRST_ZERO                                                 \
2626         if (i > 0 && !src[i])                                           \
2627             i--;                                                        \
2628         while (src[i])                                                  \
2629             i++
2630 #if HAVE_FAST_64BIT
2631     for (i = 0; i + 1 < length; i += 9) {
2632         if (!((~AV_RN64A(src + i) &
2633                (AV_RN64A(src + i) - 0x0100010001000101ULL)) &
2634               0x8000800080008080ULL))
2635             continue;
2636         FIND_FIRST_ZERO;
2637         STARTCODE_TEST;
2638         i -= 7;
2639     }
2640 #else
2641     for (i = 0; i + 1 < length; i += 5) {
2642         if (!((~AV_RN32A(src + i) &
2643                (AV_RN32A(src + i) - 0x01000101U)) &
2644               0x80008080U))
2645             continue;
2646         FIND_FIRST_ZERO;
2647         STARTCODE_TEST;
2648         i -= 3;
2649     }
2650 #endif /* HAVE_FAST_64BIT */
2651 #else
2652     for (i = 0; i + 1 < length; i += 2) {
2653         if (src[i])
2654             continue;
2655         if (i > 0 && src[i - 1] == 0)
2656             i--;
2657         STARTCODE_TEST;
2658     }
2659 #endif /* HAVE_FAST_UNALIGNED */
2660
2661     if (i >= length - 1) { // no escaped 0
2662         nal->data     =
2663         nal->raw_data = src;
2664         nal->size     =
2665         nal->raw_size = length;
2666         return length;
2667     }
2668
2669     av_fast_malloc(&nal->rbsp_buffer, &nal->rbsp_buffer_size,
2670                    length + FF_INPUT_BUFFER_PADDING_SIZE);
2671     if (!nal->rbsp_buffer)
2672         return AVERROR(ENOMEM);
2673
2674     dst = nal->rbsp_buffer;
2675
2676     memcpy(dst, src, i);
2677     si = di = i;
2678     while (si + 2 < length) {
2679         // remove escapes (very rare 1:2^22)
2680         if (src[si + 2] > 3) {
2681             dst[di++] = src[si++];
2682             dst[di++] = src[si++];
2683         } else if (src[si] == 0 && src[si + 1] == 0) {
2684             if (src[si + 2] == 3) { // escape
2685                 dst[di++] = 0;
2686                 dst[di++] = 0;
2687                 si       += 3;
2688
2689                 continue;
2690             } else // next start code
2691                 goto nsc;
2692         }
2693
2694         dst[di++] = src[si++];
2695     }
2696     while (si < length)
2697         dst[di++] = src[si++];
2698
2699 nsc:
2700     memset(dst + di, 0, FF_INPUT_BUFFER_PADDING_SIZE);
2701
2702     nal->data = dst;
2703     nal->size = di;
2704     nal->raw_data = src;
2705     nal->raw_size = si;
2706     return si;
2707 }
2708
2709 static int decode_nal_units(HEVCContext *s, const uint8_t *buf, int length)
2710 {
2711     int i, consumed, ret = 0;
2712
2713     s->ref = NULL;
2714     s->eos = 0;
2715
2716     /* split the input packet into NAL units, so we know the upper bound on the
2717      * number of slices in the frame */
2718     s->nb_nals = 0;
2719     while (length >= 4) {
2720         HEVCNAL *nal;
2721         int extract_length = 0;
2722
2723         if (s->is_nalff) {
2724             int i;
2725             for (i = 0; i < s->nal_length_size; i++)
2726                 extract_length = (extract_length << 8) | buf[i];
2727             buf    += s->nal_length_size;
2728             length -= s->nal_length_size;
2729
2730             if (extract_length > length) {
2731                 av_log(s->avctx, AV_LOG_ERROR, "Invalid NAL unit size.\n");
2732                 ret = AVERROR_INVALIDDATA;
2733                 goto fail;
2734             }
2735         } else {
2736             if (buf[2] == 0) {
2737                 length--;
2738                 buf++;
2739                 continue;
2740             }
2741             if (buf[0] != 0 || buf[1] != 0 || buf[2] != 1) {
2742                 ret = AVERROR_INVALIDDATA;
2743                 goto fail;
2744             }
2745
2746             buf           += 3;
2747             length        -= 3;
2748             extract_length = length;
2749         }
2750
2751         if (s->nals_allocated < s->nb_nals + 1) {
2752             int new_size = s->nals_allocated + 1;
2753             HEVCNAL *tmp = av_realloc_array(s->nals, new_size, sizeof(*tmp));
2754             if (!tmp) {
2755                 ret = AVERROR(ENOMEM);
2756                 goto fail;
2757             }
2758             s->nals = tmp;
2759             memset(s->nals + s->nals_allocated, 0,
2760                    (new_size - s->nals_allocated) * sizeof(*tmp));
2761             s->nals_allocated = new_size;
2762         }
2763         nal = &s->nals[s->nb_nals++];
2764
2765         consumed = extract_rbsp(buf, extract_length, nal);
2766         if (consumed < 0) {
2767             ret = consumed;
2768             goto fail;
2769         }
2770
2771         ret = init_get_bits8(&s->HEVClc.gb, nal->data, nal->size);
2772         if (ret < 0)
2773             goto fail;
2774         hls_nal_unit(s);
2775
2776         if (s->nal_unit_type == NAL_EOB_NUT ||
2777             s->nal_unit_type == NAL_EOS_NUT)
2778             s->eos = 1;
2779
2780         buf    += consumed;
2781         length -= consumed;
2782     }
2783
2784     /* parse the NAL units */
2785     for (i = 0; i < s->nb_nals; i++) {
2786         int ret = decode_nal_unit(s, &s->nals[i]);
2787         if (ret < 0) {
2788             av_log(s->avctx, AV_LOG_WARNING,
2789                    "Error parsing NAL unit #%d.\n", i);
2790             goto fail;
2791         }
2792     }
2793
2794 fail:
2795     if (s->ref)
2796         ff_thread_report_progress(&s->ref->tf, INT_MAX, 0);
2797
2798     return ret;
2799 }
2800
2801 static void print_md5(void *log_ctx, int level, uint8_t md5[16])
2802 {
2803     int i;
2804     for (i = 0; i < 16; i++)
2805         av_log(log_ctx, level, "%02"PRIx8, md5[i]);
2806 }
2807
2808 static int verify_md5(HEVCContext *s, AVFrame *frame)
2809 {
2810     const AVPixFmtDescriptor *desc = av_pix_fmt_desc_get(frame->format);
2811     int pixel_shift;
2812     int i, j;
2813
2814     if (!desc)
2815         return AVERROR(EINVAL);
2816
2817     pixel_shift = desc->comp[0].depth_minus1 > 7;
2818
2819     av_log(s->avctx, AV_LOG_DEBUG, "Verifying checksum for frame with POC %d: ",
2820            s->poc);
2821
2822     /* the checksums are LE, so we have to byteswap for >8bpp formats
2823      * on BE arches */
2824 #if HAVE_BIGENDIAN
2825     if (pixel_shift && !s->checksum_buf) {
2826         av_fast_malloc(&s->checksum_buf, &s->checksum_buf_size,
2827                        FFMAX3(frame->linesize[0], frame->linesize[1],
2828                               frame->linesize[2]));
2829         if (!s->checksum_buf)
2830             return AVERROR(ENOMEM);
2831     }
2832 #endif
2833
2834     for (i = 0; frame->data[i]; i++) {
2835         int width  = s->avctx->coded_width;
2836         int height = s->avctx->coded_height;
2837         int w = (i == 1 || i == 2) ? (width  >> desc->log2_chroma_w) : width;
2838         int h = (i == 1 || i == 2) ? (height >> desc->log2_chroma_h) : height;
2839         uint8_t md5[16];
2840
2841         av_md5_init(s->md5_ctx);
2842         for (j = 0; j < h; j++) {
2843             const uint8_t *src = frame->data[i] + j * frame->linesize[i];
2844 #if HAVE_BIGENDIAN
2845             if (pixel_shift) {
2846                 s->bdsp.bswap16_buf((uint16_t *) s->checksum_buf,
2847                                     (const uint16_t *) src, w);
2848                 src = s->checksum_buf;
2849             }
2850 #endif
2851             av_md5_update(s->md5_ctx, src, w << pixel_shift);
2852         }
2853         av_md5_final(s->md5_ctx, md5);
2854
2855         if (!memcmp(md5, s->md5[i], 16)) {
2856             av_log   (s->avctx, AV_LOG_DEBUG, "plane %d - correct ", i);
2857             print_md5(s->avctx, AV_LOG_DEBUG, md5);
2858             av_log   (s->avctx, AV_LOG_DEBUG, "; ");
2859         } else {
2860             av_log   (s->avctx, AV_LOG_ERROR, "mismatching checksum of plane %d - ", i);
2861             print_md5(s->avctx, AV_LOG_ERROR, md5);
2862             av_log   (s->avctx, AV_LOG_ERROR, " != ");
2863             print_md5(s->avctx, AV_LOG_ERROR, s->md5[i]);
2864             av_log   (s->avctx, AV_LOG_ERROR, "\n");
2865             return AVERROR_INVALIDDATA;
2866         }
2867     }
2868
2869     av_log(s->avctx, AV_LOG_DEBUG, "\n");
2870
2871     return 0;
2872 }
2873
2874 static int hevc_decode_frame(AVCodecContext *avctx, void *data, int *got_output,
2875                              AVPacket *avpkt)
2876 {
2877     int ret;
2878     HEVCContext *s = avctx->priv_data;
2879
2880     if (!avpkt->size) {
2881         ret = ff_hevc_output_frame(s, data, 1);
2882         if (ret < 0)
2883             return ret;
2884
2885         *got_output = ret;
2886         return 0;
2887     }
2888
2889     s->ref = NULL;
2890     ret    = decode_nal_units(s, avpkt->data, avpkt->size);
2891     if (ret < 0)
2892         return ret;
2893
2894     /* verify the SEI checksum */
2895     if (avctx->err_recognition & AV_EF_CRCCHECK && s->is_decoded &&
2896         s->is_md5) {
2897         ret = verify_md5(s, s->ref->frame);
2898         if (ret < 0 && avctx->err_recognition & AV_EF_EXPLODE) {
2899             ff_hevc_unref_frame(s, s->ref, ~0);
2900             return ret;
2901         }
2902     }
2903     s->is_md5 = 0;
2904
2905     if (s->is_decoded) {
2906         av_log(avctx, AV_LOG_DEBUG, "Decoded frame with POC %d.\n", s->poc);
2907         s->is_decoded = 0;
2908     }
2909
2910     if (s->output_frame->buf[0]) {
2911         av_frame_move_ref(data, s->output_frame);
2912         *got_output = 1;
2913     }
2914
2915     return avpkt->size;
2916 }
2917
2918 static int hevc_ref_frame(HEVCContext *s, HEVCFrame *dst, HEVCFrame *src)
2919 {
2920     int ret = ff_thread_ref_frame(&dst->tf, &src->tf);
2921     if (ret < 0)
2922         return ret;
2923
2924     dst->tab_mvf_buf = av_buffer_ref(src->tab_mvf_buf);
2925     if (!dst->tab_mvf_buf)
2926         goto fail;
2927     dst->tab_mvf = src->tab_mvf;
2928
2929     dst->rpl_tab_buf = av_buffer_ref(src->rpl_tab_buf);
2930     if (!dst->rpl_tab_buf)
2931         goto fail;
2932     dst->rpl_tab = src->rpl_tab;
2933
2934     dst->rpl_buf = av_buffer_ref(src->rpl_buf);
2935     if (!dst->rpl_buf)
2936         goto fail;
2937
2938     dst->poc        = src->poc;
2939     dst->ctb_count  = src->ctb_count;
2940     dst->window     = src->window;
2941     dst->flags      = src->flags;
2942     dst->sequence   = src->sequence;
2943
2944     return 0;
2945 fail:
2946     ff_hevc_unref_frame(s, dst, ~0);
2947     return AVERROR(ENOMEM);
2948 }
2949
2950 static av_cold int hevc_decode_free(AVCodecContext *avctx)
2951 {
2952     HEVCContext       *s = avctx->priv_data;
2953     int i;
2954
2955     pic_arrays_free(s);
2956
2957     av_freep(&s->md5_ctx);
2958
2959     av_frame_free(&s->tmp_frame);
2960     av_frame_free(&s->output_frame);
2961
2962     for (i = 0; i < FF_ARRAY_ELEMS(s->DPB); i++) {
2963         ff_hevc_unref_frame(s, &s->DPB[i], ~0);
2964         av_frame_free(&s->DPB[i].frame);
2965     }
2966
2967     for (i = 0; i < FF_ARRAY_ELEMS(s->vps_list); i++)
2968         av_buffer_unref(&s->vps_list[i]);
2969     for (i = 0; i < FF_ARRAY_ELEMS(s->sps_list); i++)
2970         av_buffer_unref(&s->sps_list[i]);
2971     for (i = 0; i < FF_ARRAY_ELEMS(s->pps_list); i++)
2972         av_buffer_unref(&s->pps_list[i]);
2973
2974     for (i = 0; i < s->nals_allocated; i++)
2975         av_freep(&s->nals[i].rbsp_buffer);
2976     av_freep(&s->nals);
2977     s->nals_allocated = 0;
2978
2979     return 0;
2980 }
2981
2982 static av_cold int hevc_init_context(AVCodecContext *avctx)
2983 {
2984     HEVCContext *s = avctx->priv_data;
2985     int i;
2986
2987     s->avctx = avctx;
2988
2989     s->tmp_frame = av_frame_alloc();
2990     if (!s->tmp_frame)
2991         goto fail;
2992
2993     s->output_frame = av_frame_alloc();
2994     if (!s->output_frame)
2995         goto fail;
2996
2997     for (i = 0; i < FF_ARRAY_ELEMS(s->DPB); i++) {
2998         s->DPB[i].frame = av_frame_alloc();
2999         if (!s->DPB[i].frame)
3000             goto fail;
3001         s->DPB[i].tf.f = s->DPB[i].frame;
3002     }
3003
3004     s->max_ra = INT_MAX;
3005
3006     s->md5_ctx = av_md5_alloc();
3007     if (!s->md5_ctx)
3008         goto fail;
3009
3010     ff_bswapdsp_init(&s->bdsp);
3011
3012     s->context_initialized = 1;
3013
3014     return 0;
3015
3016 fail:
3017     hevc_decode_free(avctx);
3018     return AVERROR(ENOMEM);
3019 }
3020
3021 static int hevc_update_thread_context(AVCodecContext *dst,
3022                                       const AVCodecContext *src)
3023 {
3024     HEVCContext *s  = dst->priv_data;
3025     HEVCContext *s0 = src->priv_data;
3026     int i, ret;
3027
3028     if (!s->context_initialized) {
3029         ret = hevc_init_context(dst);
3030         if (ret < 0)
3031             return ret;
3032     }
3033
3034     for (i = 0; i < FF_ARRAY_ELEMS(s->DPB); i++) {
3035         ff_hevc_unref_frame(s, &s->DPB[i], ~0);
3036         if (s0->DPB[i].frame->buf[0]) {
3037             ret = hevc_ref_frame(s, &s->DPB[i], &s0->DPB[i]);
3038             if (ret < 0)
3039                 return ret;
3040         }
3041     }
3042
3043     for (i = 0; i < FF_ARRAY_ELEMS(s->vps_list); i++) {
3044         av_buffer_unref(&s->vps_list[i]);
3045         if (s0->vps_list[i]) {
3046             s->vps_list[i] = av_buffer_ref(s0->vps_list[i]);
3047             if (!s->vps_list[i])
3048                 return AVERROR(ENOMEM);
3049         }
3050     }
3051
3052     for (i = 0; i < FF_ARRAY_ELEMS(s->sps_list); i++) {
3053         av_buffer_unref(&s->sps_list[i]);
3054         if (s0->sps_list[i]) {
3055             s->sps_list[i] = av_buffer_ref(s0->sps_list[i]);
3056             if (!s->sps_list[i])
3057                 return AVERROR(ENOMEM);
3058         }
3059     }
3060
3061     for (i = 0; i < FF_ARRAY_ELEMS(s->pps_list); i++) {
3062         av_buffer_unref(&s->pps_list[i]);
3063         if (s0->pps_list[i]) {
3064             s->pps_list[i] = av_buffer_ref(s0->pps_list[i]);
3065             if (!s->pps_list[i])
3066                 return AVERROR(ENOMEM);
3067         }
3068     }
3069
3070     if (s->sps != s0->sps)
3071         ret = set_sps(s, s0->sps);
3072
3073     s->seq_decode = s0->seq_decode;
3074     s->seq_output = s0->seq_output;
3075     s->pocTid0    = s0->pocTid0;
3076     s->max_ra     = s0->max_ra;
3077
3078     s->is_nalff        = s0->is_nalff;
3079     s->nal_length_size = s0->nal_length_size;
3080
3081     if (s0->eos) {
3082         s->seq_decode = (s->seq_decode + 1) & 0xff;
3083         s->max_ra = INT_MAX;
3084     }
3085
3086     return 0;
3087 }
3088
3089 static int hevc_decode_extradata(HEVCContext *s)
3090 {
3091     AVCodecContext *avctx = s->avctx;
3092     GetByteContext gb;
3093     int ret;
3094
3095     bytestream2_init(&gb, avctx->extradata, avctx->extradata_size);
3096
3097     if (avctx->extradata_size > 3 &&
3098         (avctx->extradata[0] || avctx->extradata[1] ||
3099          avctx->extradata[2] > 1)) {
3100         /* It seems the extradata is encoded as hvcC format.
3101          * Temporarily, we support configurationVersion==0 until 14496-15 3rd
3102          * is finalized. When finalized, configurationVersion will be 1 and we
3103          * can recognize hvcC by checking if avctx->extradata[0]==1 or not. */
3104         int i, j, num_arrays, nal_len_size;
3105
3106         s->is_nalff = 1;
3107
3108         bytestream2_skip(&gb, 21);
3109         nal_len_size = (bytestream2_get_byte(&gb) & 3) + 1;
3110         num_arrays   = bytestream2_get_byte(&gb);
3111
3112         /* nal units in the hvcC always have length coded with 2 bytes,
3113          * so put a fake nal_length_size = 2 while parsing them */
3114         s->nal_length_size = 2;
3115
3116         /* Decode nal units from hvcC. */
3117         for (i = 0; i < num_arrays; i++) {
3118             int type = bytestream2_get_byte(&gb) & 0x3f;
3119             int cnt  = bytestream2_get_be16(&gb);
3120
3121             for (j = 0; j < cnt; j++) {
3122                 // +2 for the nal size field
3123                 int nalsize = bytestream2_peek_be16(&gb) + 2;
3124                 if (bytestream2_get_bytes_left(&gb) < nalsize) {
3125                     av_log(s->avctx, AV_LOG_ERROR,
3126                            "Invalid NAL unit size in extradata.\n");
3127                     return AVERROR_INVALIDDATA;
3128                 }
3129
3130                 ret = decode_nal_units(s, gb.buffer, nalsize);
3131                 if (ret < 0) {
3132                     av_log(avctx, AV_LOG_ERROR,
3133                            "Decoding nal unit %d %d from hvcC failed\n",
3134                            type, i);
3135                     return ret;
3136                 }
3137                 bytestream2_skip(&gb, nalsize);
3138             }
3139         }
3140
3141         /* Now store right nal length size, that will be used to parse
3142          * all other nals */
3143         s->nal_length_size = nal_len_size;
3144     } else {
3145         s->is_nalff = 0;
3146         ret = decode_nal_units(s, avctx->extradata, avctx->extradata_size);
3147         if (ret < 0)
3148             return ret;
3149     }
3150     return 0;
3151 }
3152
3153 static av_cold int hevc_decode_init(AVCodecContext *avctx)
3154 {
3155     HEVCContext *s = avctx->priv_data;
3156     int ret;
3157
3158     ff_init_cabac_states();
3159
3160     avctx->internal->allocate_progress = 1;
3161
3162     ret = hevc_init_context(avctx);
3163     if (ret < 0)
3164         return ret;
3165
3166     if (avctx->extradata_size > 0 && avctx->extradata) {
3167         ret = hevc_decode_extradata(s);
3168         if (ret < 0) {
3169             hevc_decode_free(avctx);
3170             return ret;
3171         }
3172     }
3173
3174     return 0;
3175 }
3176
3177 static av_cold int hevc_init_thread_copy(AVCodecContext *avctx)
3178 {
3179     HEVCContext *s = avctx->priv_data;
3180     int ret;
3181
3182     memset(s, 0, sizeof(*s));
3183
3184     ret = hevc_init_context(avctx);
3185     if (ret < 0)
3186         return ret;
3187
3188     return 0;
3189 }
3190
3191 static void hevc_decode_flush(AVCodecContext *avctx)
3192 {
3193     HEVCContext *s = avctx->priv_data;
3194     ff_hevc_flush_dpb(s);
3195     s->max_ra = INT_MAX;
3196 }
3197
3198 #define OFFSET(x) offsetof(HEVCContext, x)
3199 #define PAR (AV_OPT_FLAG_DECODING_PARAM | AV_OPT_FLAG_VIDEO_PARAM)
3200
3201 static const AVProfile profiles[] = {
3202     { FF_PROFILE_HEVC_MAIN,                 "Main"                },
3203     { FF_PROFILE_HEVC_MAIN_10,              "Main 10"             },
3204     { FF_PROFILE_HEVC_MAIN_STILL_PICTURE,   "Main Still Picture"  },
3205     { FF_PROFILE_UNKNOWN },
3206 };
3207
3208 static const AVOption options[] = {
3209     { "apply_defdispwin", "Apply default display window from VUI", OFFSET(apply_defdispwin),
3210         AV_OPT_TYPE_INT, {.i64 = 0}, 0, 1, PAR },
3211     { NULL },
3212 };
3213
3214 static const AVClass hevc_decoder_class = {
3215     .class_name = "HEVC decoder",
3216     .item_name  = av_default_item_name,
3217     .option     = options,
3218     .version    = LIBAVUTIL_VERSION_INT,
3219 };
3220
3221 AVCodec ff_hevc_decoder = {
3222     .name                  = "hevc",
3223     .long_name             = NULL_IF_CONFIG_SMALL("HEVC (High Efficiency Video Coding)"),
3224     .type                  = AVMEDIA_TYPE_VIDEO,
3225     .id                    = AV_CODEC_ID_HEVC,
3226     .priv_data_size        = sizeof(HEVCContext),
3227     .priv_class            = &hevc_decoder_class,
3228     .init                  = hevc_decode_init,
3229     .close                 = hevc_decode_free,
3230     .decode                = hevc_decode_frame,
3231     .flush                 = hevc_decode_flush,
3232     .update_thread_context = hevc_update_thread_context,
3233     .init_thread_copy      = hevc_init_thread_copy,
3234     .capabilities          = CODEC_CAP_DR1 | CODEC_CAP_DELAY |
3235                              CODEC_CAP_FRAME_THREADS,
3236     .profiles              = NULL_IF_CONFIG_SMALL(profiles),
3237 };