git.sesse.net Git - ffmpeg/blob - libavcodec/hevc.c

   1 /*
   2  * HEVC video decoder
   3  *
   4  * Copyright (C) 2012 - 2013 Guillaume Martres
   5  * Copyright (C) 2012 - 2013 Mickael Raulet
   6  * Copyright (C) 2012 - 2013 Gildas Cocherel
   7  * Copyright (C) 2012 - 2013 Wassim Hamidouche
   8  *
   9  * This file is part of Libav.
  10  *
  11  * Libav is free software; you can redistribute it and/or
  12  * modify it under the terms of the GNU Lesser General Public
  13  * License as published by the Free Software Foundation; either
  14  * version 2.1 of the License, or (at your option) any later version.
  15  *
  16  * Libav is distributed in the hope that it will be useful,
  17  * but WITHOUT ANY WARRANTY; without even the implied warranty of
  18  * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
  19  * Lesser General Public License for more details.
  20  *
  21  * You should have received a copy of the GNU Lesser General Public
  22  * License along with Libav; if not, write to the Free Software
  23  * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
  24  */
  25
  26 #include "libavutil/attributes.h"
  27 #include "libavutil/common.h"
  28 #include "libavutil/display.h"
  29 #include "libavutil/internal.h"
  30 #include "libavutil/md5.h"
  31 #include "libavutil/opt.h"
  32 #include "libavutil/pixdesc.h"
  33 #include "libavutil/stereo3d.h"
  34
  35 #include "bswapdsp.h"
  36 #include "bytestream.h"
  37 #include "cabac_functions.h"
  38 #include "golomb.h"
  39 #include "hevc.h"
  40
  41 const uint8_t ff_hevc_qpel_extra_before[4] = { 0, 3, 3, 2 };
  42 const uint8_t ff_hevc_qpel_extra_after[4]  = { 0, 3, 4, 4 };
  43 const uint8_t ff_hevc_qpel_extra[4]        = { 0, 6, 7, 6 };
  44
  45 static const uint8_t scan_1x1[1] = { 0 };
  46
  47 static const uint8_t horiz_scan2x2_x[4] = { 0, 1, 0, 1 };
  48
  49 static const uint8_t horiz_scan2x2_y[4] = { 0, 0, 1, 1 };
  50
  51 static const uint8_t horiz_scan4x4_x[16] = {
  52     0, 1, 2, 3,
  53     0, 1, 2, 3,
  54     0, 1, 2, 3,
  55     0, 1, 2, 3,
  56 };
  57
  58 static const uint8_t horiz_scan4x4_y[16] = {
  59     0, 0, 0, 0,
  60     1, 1, 1, 1,
  61     2, 2, 2, 2,
  62     3, 3, 3, 3,
  63 };
  64
  65 static const uint8_t horiz_scan8x8_inv[8][8] = {
  66     {  0,  1,  2,  3, 16, 17, 18, 19, },
  67     {  4,  5,  6,  7, 20, 21, 22, 23, },
  68     {  8,  9, 10, 11, 24, 25, 26, 27, },
  69     { 12, 13, 14, 15, 28, 29, 30, 31, },
  70     { 32, 33, 34, 35, 48, 49, 50, 51, },
  71     { 36, 37, 38, 39, 52, 53, 54, 55, },
  72     { 40, 41, 42, 43, 56, 57, 58, 59, },
  73     { 44, 45, 46, 47, 60, 61, 62, 63, },
  74 };
  75
  76 static const uint8_t diag_scan2x2_x[4] = { 0, 0, 1, 1 };
  77
  78 static const uint8_t diag_scan2x2_y[4] = { 0, 1, 0, 1 };
  79
  80 static const uint8_t diag_scan2x2_inv[2][2] = {
  81     { 0, 2, },
  82     { 1, 3, },
  83 };
  84
  85 const uint8_t ff_hevc_diag_scan4x4_x[16] = {
  86     0, 0, 1, 0,
  87     1, 2, 0, 1,
  88     2, 3, 1, 2,
  89     3, 2, 3, 3,
  90 };
  91
  92 const uint8_t ff_hevc_diag_scan4x4_y[16] = {
  93     0, 1, 0, 2,
  94     1, 0, 3, 2,
  95     1, 0, 3, 2,
  96     1, 3, 2, 3,
  97 };
  98
  99 static const uint8_t diag_scan4x4_inv[4][4] = {
 100     { 0,  2,  5,  9, },
 101     { 1,  4,  8, 12, },
 102     { 3,  7, 11, 14, },
 103     { 6, 10, 13, 15, },
 104 };
 105
 106 const uint8_t ff_hevc_diag_scan8x8_x[64] = {
 107     0, 0, 1, 0,
 108     1, 2, 0, 1,
 109     2, 3, 0, 1,
 110     2, 3, 4, 0,
 111     1, 2, 3, 4,
 112     5, 0, 1, 2,
 113     3, 4, 5, 6,
 114     0, 1, 2, 3,
 115     4, 5, 6, 7,
 116     1, 2, 3, 4,
 117     5, 6, 7, 2,
 118     3, 4, 5, 6,
 119     7, 3, 4, 5,
 120     6, 7, 4, 5,
 121     6, 7, 5, 6,
 122     7, 6, 7, 7,
 123 };
 124
 125 const uint8_t ff_hevc_diag_scan8x8_y[64] = {
 126     0, 1, 0, 2,
 127     1, 0, 3, 2,
 128     1, 0, 4, 3,
 129     2, 1, 0, 5,
 130     4, 3, 2, 1,
 131     0, 6, 5, 4,
 132     3, 2, 1, 0,
 133     7, 6, 5, 4,
 134     3, 2, 1, 0,
 135     7, 6, 5, 4,
 136     3, 2, 1, 7,
 137     6, 5, 4, 3,
 138     2, 7, 6, 5,
 139     4, 3, 7, 6,
 140     5, 4, 7, 6,
 141     5, 7, 6, 7,
 142 };
 143
 144 static const uint8_t diag_scan8x8_inv[8][8] = {
 145     {  0,  2,  5,  9, 14, 20, 27, 35, },
 146     {  1,  4,  8, 13, 19, 26, 34, 42, },
 147     {  3,  7, 12, 18, 25, 33, 41, 48, },
 148     {  6, 11, 17, 24, 32, 40, 47, 53, },
 149     { 10, 16, 23, 31, 39, 46, 52, 57, },
 150     { 15, 22, 30, 38, 45, 51, 56, 60, },
 151     { 21, 29, 37, 44, 50, 55, 59, 62, },
 152     { 28, 36, 43, 49, 54, 58, 61, 63, },
 153 };
 154
 155 /**
 156  * NOTE: Each function hls_foo correspond to the function foo in the
 157  * specification (HLS stands for High Level Syntax).
 158  */
 159
 160 /**
 161  * Section 5.7
 162  */
 163
 164 /* free everything allocated  by pic_arrays_init() */
 165 static void pic_arrays_free(HEVCContext *s)
 166 {
 167     av_freep(&s->sao);
 168     av_freep(&s->deblock);
 169
 170     av_freep(&s->skip_flag);
 171     av_freep(&s->tab_ct_depth);
 172
 173     av_freep(&s->tab_ipm);
 174     av_freep(&s->cbf_luma);
 175     av_freep(&s->is_pcm);
 176
 177     av_freep(&s->qp_y_tab);
 178     av_freep(&s->tab_slice_address);
 179     av_freep(&s->filter_slice_edges);
 180
 181     av_freep(&s->horizontal_bs);
 182     av_freep(&s->vertical_bs);
 183
 184     av_buffer_pool_uninit(&s->tab_mvf_pool);
 185     av_buffer_pool_uninit(&s->rpl_tab_pool);
 186 }
 187
 188 /* allocate arrays that depend on frame dimensions */
 189 static int pic_arrays_init(HEVCContext *s, const HEVCSPS *sps)
 190 {
 191     int log2_min_cb_size = sps->log2_min_cb_size;
 192     int width            = sps->width;
 193     int height           = sps->height;
 194     int pic_size_in_ctb  = ((width  >> log2_min_cb_size) + 1) *
 195                            ((height >> log2_min_cb_size) + 1);
 196     int ctb_count        = sps->ctb_width * sps->ctb_height;
 197     int min_pu_size      = sps->min_pu_width * sps->min_pu_height;
 198
 199     s->bs_width  = width  >> 3;
 200     s->bs_height = height >> 3;
 201
 202     s->sao           = av_mallocz_array(ctb_count, sizeof(*s->sao));
 203     s->deblock       = av_mallocz_array(ctb_count, sizeof(*s->deblock));
 204     if (!s->sao || !s->deblock)
 205         goto fail;
 206
 207     s->skip_flag    = av_malloc(pic_size_in_ctb);
 208     s->tab_ct_depth = av_malloc(sps->min_cb_height * sps->min_cb_width);
 209     if (!s->skip_flag || !s->tab_ct_depth)
 210         goto fail;
 211
 212     s->cbf_luma = av_malloc(sps->min_tb_width * sps->min_tb_height);
 213     s->tab_ipm  = av_mallocz(min_pu_size);
 214     s->is_pcm   = av_malloc(min_pu_size);
 215     if (!s->tab_ipm || !s->cbf_luma || !s->is_pcm)
 216         goto fail;
 217
 218     s->filter_slice_edges = av_malloc(ctb_count);
 219     s->tab_slice_address  = av_malloc(pic_size_in_ctb *
 220                                       sizeof(*s->tab_slice_address));
 221     s->qp_y_tab           = av_malloc(pic_size_in_ctb *
 222                                       sizeof(*s->qp_y_tab));
 223     if (!s->qp_y_tab || !s->filter_slice_edges || !s->tab_slice_address)
 224         goto fail;
 225
 226     s->horizontal_bs = av_mallocz(2 * s->bs_width * (s->bs_height + 1));
 227     s->vertical_bs   = av_mallocz(2 * s->bs_width * (s->bs_height + 1));
 228     if (!s->horizontal_bs || !s->vertical_bs)
 229         goto fail;
 230
 231     s->tab_mvf_pool = av_buffer_pool_init(min_pu_size * sizeof(MvField),
 232                                           av_buffer_alloc);
 233     s->rpl_tab_pool = av_buffer_pool_init(ctb_count * sizeof(RefPicListTab),
 234                                           av_buffer_allocz);
 235     if (!s->tab_mvf_pool || !s->rpl_tab_pool)
 236         goto fail;
 237
 238     return 0;
 239
 240 fail:
 241     pic_arrays_free(s);
 242     return AVERROR(ENOMEM);
 243 }
 244
 245 static void pred_weight_table(HEVCContext *s, GetBitContext *gb)
 246 {
 247     int i = 0;
 248     int j = 0;
 249     uint8_t luma_weight_l0_flag[16];
 250     uint8_t chroma_weight_l0_flag[16];
 251     uint8_t luma_weight_l1_flag[16];
 252     uint8_t chroma_weight_l1_flag[16];
 253
 254     s->sh.luma_log2_weight_denom = av_clip(get_ue_golomb_long(gb), 0, 7);
 255     if (s->sps->chroma_format_idc != 0) {
 256         int delta = get_se_golomb(gb);
 257         s->sh.chroma_log2_weight_denom = av_clip(s->sh.luma_log2_weight_denom + delta, 0, 7);
 258     }
 259
 260     for (i = 0; i < s->sh.nb_refs[L0]; i++) {
 261         luma_weight_l0_flag[i] = get_bits1(gb);
 262         if (!luma_weight_l0_flag[i]) {
 263             s->sh.luma_weight_l0[i] = 1 << s->sh.luma_log2_weight_denom;
 264             s->sh.luma_offset_l0[i] = 0;
 265         }
 266     }
 267     if (s->sps->chroma_format_idc != 0) { // FIXME: invert "if" and "for"
 268         for (i = 0; i < s->sh.nb_refs[L0]; i++)
 269             chroma_weight_l0_flag[i] = get_bits1(gb);
 270     } else {
 271         for (i = 0; i < s->sh.nb_refs[L0]; i++)
 272             chroma_weight_l0_flag[i] = 0;
 273     }
 274     for (i = 0; i < s->sh.nb_refs[L0]; i++) {
 275         if (luma_weight_l0_flag[i]) {
 276             int delta_luma_weight_l0 = get_se_golomb(gb);
 277             s->sh.luma_weight_l0[i] = (1 << s->sh.luma_log2_weight_denom) + delta_luma_weight_l0;
 278             s->sh.luma_offset_l0[i] = get_se_golomb(gb);
 279         }
 280         if (chroma_weight_l0_flag[i]) {
 281             for (j = 0; j < 2; j++) {
 282                 int delta_chroma_weight_l0 = get_se_golomb(gb);
 283                 int delta_chroma_offset_l0 = get_se_golomb(gb);
 284                 s->sh.chroma_weight_l0[i][j] = (1 << s->sh.chroma_log2_weight_denom) + delta_chroma_weight_l0;
 285                 s->sh.chroma_offset_l0[i][j] = av_clip((delta_chroma_offset_l0 - ((128 * s->sh.chroma_weight_l0[i][j])
 286                                                                                     >> s->sh.chroma_log2_weight_denom) + 128), -128, 127);
 287             }
 288         } else {
 289             s->sh.chroma_weight_l0[i][0] = 1 << s->sh.chroma_log2_weight_denom;
 290             s->sh.chroma_offset_l0[i][0] = 0;
 291             s->sh.chroma_weight_l0[i][1] = 1 << s->sh.chroma_log2_weight_denom;
 292             s->sh.chroma_offset_l0[i][1] = 0;
 293         }
 294     }
 295     if (s->sh.slice_type == B_SLICE) {
 296         for (i = 0; i < s->sh.nb_refs[L1]; i++) {
 297             luma_weight_l1_flag[i] = get_bits1(gb);
 298             if (!luma_weight_l1_flag[i]) {
 299                 s->sh.luma_weight_l1[i] = 1 << s->sh.luma_log2_weight_denom;
 300                 s->sh.luma_offset_l1[i] = 0;
 301             }
 302         }
 303         if (s->sps->chroma_format_idc != 0) {
 304             for (i = 0; i < s->sh.nb_refs[L1]; i++)
 305                 chroma_weight_l1_flag[i] = get_bits1(gb);
 306         } else {
 307             for (i = 0; i < s->sh.nb_refs[L1]; i++)
 308                 chroma_weight_l1_flag[i] = 0;
 309         }
 310         for (i = 0; i < s->sh.nb_refs[L1]; i++) {
 311             if (luma_weight_l1_flag[i]) {
 312                 int delta_luma_weight_l1 = get_se_golomb(gb);
 313                 s->sh.luma_weight_l1[i] = (1 << s->sh.luma_log2_weight_denom) + delta_luma_weight_l1;
 314                 s->sh.luma_offset_l1[i] = get_se_golomb(gb);
 315             }
 316             if (chroma_weight_l1_flag[i]) {
 317                 for (j = 0; j < 2; j++) {
 318                     int delta_chroma_weight_l1 = get_se_golomb(gb);
 319                     int delta_chroma_offset_l1 = get_se_golomb(gb);
 320                     s->sh.chroma_weight_l1[i][j] = (1 << s->sh.chroma_log2_weight_denom) + delta_chroma_weight_l1;
 321                     s->sh.chroma_offset_l1[i][j] = av_clip((delta_chroma_offset_l1 - ((128 * s->sh.chroma_weight_l1[i][j])
 322                                                                                         >> s->sh.chroma_log2_weight_denom) + 128), -128, 127);
 323                 }
 324             } else {
 325                 s->sh.chroma_weight_l1[i][0] = 1 << s->sh.chroma_log2_weight_denom;
 326                 s->sh.chroma_offset_l1[i][0] = 0;
 327                 s->sh.chroma_weight_l1[i][1] = 1 << s->sh.chroma_log2_weight_denom;
 328                 s->sh.chroma_offset_l1[i][1] = 0;
 329             }
 330         }
 331     }
 332 }
 333
 334 static int decode_lt_rps(HEVCContext *s, LongTermRPS *rps, GetBitContext *gb)
 335 {
 336     const HEVCSPS *sps = s->sps;
 337     int max_poc_lsb    = 1 << sps->log2_max_poc_lsb;
 338     int prev_delta_msb = 0;
 339     unsigned int nb_sps = 0, nb_sh;
 340     int i;
 341
 342     rps->nb_refs = 0;
 343     if (!sps->long_term_ref_pics_present_flag)
 344         return 0;
 345
 346     if (sps->num_long_term_ref_pics_sps > 0)
 347         nb_sps = get_ue_golomb_long(gb);
 348     nb_sh = get_ue_golomb_long(gb);
 349
 350     if (nb_sh + nb_sps > FF_ARRAY_ELEMS(rps->poc))
 351         return AVERROR_INVALIDDATA;
 352
 353     rps->nb_refs = nb_sh + nb_sps;
 354
 355     for (i = 0; i < rps->nb_refs; i++) {
 356         uint8_t delta_poc_msb_present;
 357
 358         if (i < nb_sps) {
 359             uint8_t lt_idx_sps = 0;
 360
 361             if (sps->num_long_term_ref_pics_sps > 1)
 362                 lt_idx_sps = get_bits(gb, av_ceil_log2(sps->num_long_term_ref_pics_sps));
 363
 364             rps->poc[i]  = sps->lt_ref_pic_poc_lsb_sps[lt_idx_sps];
 365             rps->used[i] = sps->used_by_curr_pic_lt_sps_flag[lt_idx_sps];
 366         } else {
 367             rps->poc[i]  = get_bits(gb, sps->log2_max_poc_lsb);
 368             rps->used[i] = get_bits1(gb);
 369         }
 370
 371         delta_poc_msb_present = get_bits1(gb);
 372         if (delta_poc_msb_present) {
 373             int delta = get_ue_golomb_long(gb);
 374
 375             if (i && i != nb_sps)
 376                 delta += prev_delta_msb;
 377
 378             rps->poc[i] += s->poc - delta * max_poc_lsb - s->sh.pic_order_cnt_lsb;
 379             prev_delta_msb = delta;
 380         }
 381     }
 382
 383     return 0;
 384 }
 385
 386 static void export_stream_params(AVCodecContext *avctx,
 387                                  const HEVCContext *s, const HEVCSPS *sps)
 388 {
 389     const HEVCVPS *vps = (const HEVCVPS*)s->vps_list[sps->vps_id]->data;
 390     unsigned int num = 0, den = 0;
 391
 392     avctx->pix_fmt             = sps->pix_fmt;
 393     avctx->coded_width         = sps->width;
 394     avctx->coded_height        = sps->height;
 395     avctx->width               = sps->output_width;
 396     avctx->height              = sps->output_height;
 397     avctx->has_b_frames        = sps->temporal_layer[sps->max_sub_layers - 1].num_reorder_pics;
 398     avctx->profile             = sps->ptl.general_ptl.profile_idc;
 399     avctx->level               = sps->ptl.general_ptl.level_idc;
 400
 401     ff_set_sar(avctx, sps->vui.sar);
 402
 403     if (sps->vui.video_signal_type_present_flag)
 404         avctx->color_range = sps->vui.video_full_range_flag ? AVCOL_RANGE_JPEG
 405                                                             : AVCOL_RANGE_MPEG;
 406     else
 407         avctx->color_range = AVCOL_RANGE_MPEG;
 408
 409     if (sps->vui.colour_description_present_flag) {
 410         avctx->color_primaries = sps->vui.colour_primaries;
 411         avctx->color_trc       = sps->vui.transfer_characteristic;
 412         avctx->colorspace      = sps->vui.matrix_coeffs;
 413     } else {
 414         avctx->color_primaries = AVCOL_PRI_UNSPECIFIED;
 415         avctx->color_trc       = AVCOL_TRC_UNSPECIFIED;
 416         avctx->colorspace      = AVCOL_SPC_UNSPECIFIED;
 417     }
 418
 419     if (vps->vps_timing_info_present_flag) {
 420         num = vps->vps_num_units_in_tick;
 421         den = vps->vps_time_scale;
 422     } else if (sps->vui.vui_timing_info_present_flag) {
 423         num = sps->vui.vui_num_units_in_tick;
 424         den = sps->vui.vui_time_scale;
 425     }
 426
 427     if (num != 0 && den != 0)
 428         av_reduce(&avctx->framerate.den, &avctx->framerate.num,
 429                   num, den, 1 << 30);
 430 }
 431
 432 static int set_sps(HEVCContext *s, const HEVCSPS *sps)
 433 {
 434     #define HWACCEL_MAX (CONFIG_HEVC_DXVA2_HWACCEL)
 435     enum AVPixelFormat pix_fmts[HWACCEL_MAX + 2], *fmt = pix_fmts;
 436     int ret;
 437
 438     export_stream_params(s->avctx, s, sps);
 439
 440     pic_arrays_free(s);
 441     ret = pic_arrays_init(s, sps);
 442     if (ret < 0)
 443         goto fail;
 444
 445     if (sps->pix_fmt == AV_PIX_FMT_YUV420P || sps->pix_fmt == AV_PIX_FMT_YUVJ420P) {
 446 #if CONFIG_HEVC_DXVA2_HWACCEL
 447         *fmt++ = AV_PIX_FMT_DXVA2_VLD;
 448 #endif
 449     }
 450
 451     *fmt++ = sps->pix_fmt;
 452     *fmt = AV_PIX_FMT_NONE;
 453
 454     ret = ff_get_format(s->avctx, pix_fmts);
 455     if (ret < 0)
 456         goto fail;
 457     s->avctx->pix_fmt = ret;
 458
 459     ff_hevc_pred_init(&s->hpc,     sps->bit_depth);
 460     ff_hevc_dsp_init (&s->hevcdsp, sps->bit_depth);
 461     ff_videodsp_init (&s->vdsp,    sps->bit_depth);
 462
 463     if (sps->sao_enabled && !s->avctx->hwaccel) {
 464         av_frame_unref(s->tmp_frame);
 465         ret = ff_get_buffer(s->avctx, s->tmp_frame, AV_GET_BUFFER_FLAG_REF);
 466         if (ret < 0)
 467             goto fail;
 468         s->frame = s->tmp_frame;
 469     }
 470
 471     s->sps = sps;
 472     s->vps = (HEVCVPS*) s->vps_list[s->sps->vps_id]->data;
 473
 474     return 0;
 475
 476 fail:
 477     pic_arrays_free(s);
 478     s->sps = NULL;
 479     return ret;
 480 }
 481
 482 static int hls_slice_header(HEVCContext *s)
 483 {
 484     GetBitContext *gb = &s->HEVClc.gb;
 485     SliceHeader *sh   = &s->sh;
 486     int i, ret;
 487
 488     // Coded parameters
 489     sh->first_slice_in_pic_flag = get_bits1(gb);
 490     if ((IS_IDR(s) || IS_BLA(s)) && sh->first_slice_in_pic_flag) {
 491         s->seq_decode = (s->seq_decode + 1) & 0xff;
 492         s->max_ra     = INT_MAX;
 493         if (IS_IDR(s))
 494             ff_hevc_clear_refs(s);
 495     }
 496     if (IS_IRAP(s))
 497         sh->no_output_of_prior_pics_flag = get_bits1(gb);
 498
 499     sh->pps_id = get_ue_golomb_long(gb);
 500     if (sh->pps_id >= MAX_PPS_COUNT || !s->pps_list[sh->pps_id]) {
 501         av_log(s->avctx, AV_LOG_ERROR, "PPS id out of range: %d\n", sh->pps_id);
 502         return AVERROR_INVALIDDATA;
 503     }
 504     if (!sh->first_slice_in_pic_flag &&
 505         s->pps != (HEVCPPS*)s->pps_list[sh->pps_id]->data) {
 506         av_log(s->avctx, AV_LOG_ERROR, "PPS changed between slices.\n");
 507         return AVERROR_INVALIDDATA;
 508     }
 509     s->pps = (HEVCPPS*)s->pps_list[sh->pps_id]->data;
 510
 511     if (s->sps != (HEVCSPS*)s->sps_list[s->pps->sps_id]->data) {
 512         s->sps = (HEVCSPS*)s->sps_list[s->pps->sps_id]->data;
 513
 514         ff_hevc_clear_refs(s);
 515         ret = set_sps(s, s->sps);
 516         if (ret < 0)
 517             return ret;
 518
 519         s->seq_decode = (s->seq_decode + 1) & 0xff;
 520         s->max_ra     = INT_MAX;
 521     }
 522
 523     sh->dependent_slice_segment_flag = 0;
 524     if (!sh->first_slice_in_pic_flag) {
 525         int slice_address_length;
 526
 527         if (s->pps->dependent_slice_segments_enabled_flag)
 528             sh->dependent_slice_segment_flag = get_bits1(gb);
 529
 530         slice_address_length = av_ceil_log2(s->sps->ctb_width *
 531                                             s->sps->ctb_height);
 532         sh->slice_segment_addr = get_bits(gb, slice_address_length);
 533         if (sh->slice_segment_addr >= s->sps->ctb_width * s->sps->ctb_height) {
 534             av_log(s->avctx, AV_LOG_ERROR,
 535                    "Invalid slice segment address: %u.\n",
 536                    sh->slice_segment_addr);
 537             return AVERROR_INVALIDDATA;
 538         }
 539
 540         if (!sh->dependent_slice_segment_flag) {
 541             sh->slice_addr = sh->slice_segment_addr;
 542             s->slice_idx++;
 543         }
 544     } else {
 545         sh->slice_segment_addr = sh->slice_addr = 0;
 546         s->slice_idx           = 0;
 547         s->slice_initialized   = 0;
 548     }
 549
 550     if (!sh->dependent_slice_segment_flag) {
 551         s->slice_initialized = 0;
 552
 553         for (i = 0; i < s->pps->num_extra_slice_header_bits; i++)
 554             skip_bits(gb, 1);  // slice_reserved_undetermined_flag[]
 555
 556         sh->slice_type = get_ue_golomb_long(gb);
 557         if (!(sh->slice_type == I_SLICE ||
 558               sh->slice_type == P_SLICE ||
 559               sh->slice_type == B_SLICE)) {
 560             av_log(s->avctx, AV_LOG_ERROR, "Unknown slice type: %d.\n",
 561                    sh->slice_type);
 562             return AVERROR_INVALIDDATA;
 563         }
 564         if (IS_IRAP(s) && sh->slice_type != I_SLICE) {
 565             av_log(s->avctx, AV_LOG_ERROR, "Inter slices in an IRAP frame.\n");
 566             return AVERROR_INVALIDDATA;
 567         }
 568
 569         // when flag is not present, picture is inferred to be output
 570         sh->pic_output_flag = 1;
 571         if (s->pps->output_flag_present_flag)
 572             sh->pic_output_flag = get_bits1(gb);
 573
 574         if (s->sps->separate_colour_plane_flag)
 575             sh->colour_plane_id = get_bits(gb, 2);
 576
 577         if (!IS_IDR(s)) {
 578             int poc;
 579
 580             sh->pic_order_cnt_lsb = get_bits(gb, s->sps->log2_max_poc_lsb);
 581             poc = ff_hevc_compute_poc(s, sh->pic_order_cnt_lsb);
 582             if (!sh->first_slice_in_pic_flag && poc != s->poc) {
 583                 av_log(s->avctx, AV_LOG_WARNING,
 584                        "Ignoring POC change between slices: %d -> %d\n", s->poc, poc);
 585                 if (s->avctx->err_recognition & AV_EF_EXPLODE)
 586                     return AVERROR_INVALIDDATA;
 587                 poc = s->poc;
 588             }
 589             s->poc = poc;
 590
 591             sh->short_term_ref_pic_set_sps_flag = get_bits1(gb);
 592             if (!sh->short_term_ref_pic_set_sps_flag) {
 593                 int pos = get_bits_left(gb);
 594                 ret = ff_hevc_decode_short_term_rps(s, &sh->slice_rps, s->sps, 1);
 595                 if (ret < 0)
 596                     return ret;
 597
 598                 sh->short_term_ref_pic_set_size = pos - get_bits_left(gb);
 599                 sh->short_term_rps = &sh->slice_rps;
 600             } else {
 601                 int numbits, rps_idx;
 602
 603                 if (!s->sps->nb_st_rps) {
 604                     av_log(s->avctx, AV_LOG_ERROR, "No ref lists in the SPS.\n");
 605                     return AVERROR_INVALIDDATA;
 606                 }
 607
 608                 numbits = av_ceil_log2(s->sps->nb_st_rps);
 609                 rps_idx = numbits > 0 ? get_bits(gb, numbits) : 0;
 610                 sh->short_term_rps = &s->sps->st_rps[rps_idx];
 611             }
 612
 613             ret = decode_lt_rps(s, &sh->long_term_rps, gb);
 614             if (ret < 0) {
 615                 av_log(s->avctx, AV_LOG_WARNING, "Invalid long term RPS.\n");
 616                 if (s->avctx->err_recognition & AV_EF_EXPLODE)
 617                     return AVERROR_INVALIDDATA;
 618             }
 619
 620             if (s->sps->sps_temporal_mvp_enabled_flag)
 621                 sh->slice_temporal_mvp_enabled_flag = get_bits1(gb);
 622             else
 623                 sh->slice_temporal_mvp_enabled_flag = 0;
 624         } else {
 625             s->sh.short_term_rps = NULL;
 626             s->poc               = 0;
 627         }
 628
 629         /* 8.3.1 */
 630         if (s->temporal_id == 0 &&
 631             s->nal_unit_type != NAL_TRAIL_N &&
 632             s->nal_unit_type != NAL_TSA_N   &&
 633             s->nal_unit_type != NAL_STSA_N  &&
 634             s->nal_unit_type != NAL_RADL_N  &&
 635             s->nal_unit_type != NAL_RADL_R  &&
 636             s->nal_unit_type != NAL_RASL_N  &&
 637             s->nal_unit_type != NAL_RASL_R)
 638             s->pocTid0 = s->poc;
 639
 640         if (s->sps->sao_enabled) {
 641             sh->slice_sample_adaptive_offset_flag[0] = get_bits1(gb);
 642             sh->slice_sample_adaptive_offset_flag[1] =
 643             sh->slice_sample_adaptive_offset_flag[2] = get_bits1(gb);
 644         } else {
 645             sh->slice_sample_adaptive_offset_flag[0] = 0;
 646             sh->slice_sample_adaptive_offset_flag[1] = 0;
 647             sh->slice_sample_adaptive_offset_flag[2] = 0;
 648         }
 649
 650         sh->nb_refs[L0] = sh->nb_refs[L1] = 0;
 651         if (sh->slice_type == P_SLICE || sh->slice_type == B_SLICE) {
 652             int nb_refs;
 653
 654             sh->nb_refs[L0] = s->pps->num_ref_idx_l0_default_active;
 655             if (sh->slice_type == B_SLICE)
 656                 sh->nb_refs[L1] = s->pps->num_ref_idx_l1_default_active;
 657
 658             if (get_bits1(gb)) { // num_ref_idx_active_override_flag
 659                 sh->nb_refs[L0] = get_ue_golomb_long(gb) + 1;
 660                 if (sh->slice_type == B_SLICE)
 661                     sh->nb_refs[L1] = get_ue_golomb_long(gb) + 1;
 662             }
 663             if (sh->nb_refs[L0] > MAX_REFS || sh->nb_refs[L1] > MAX_REFS) {
 664                 av_log(s->avctx, AV_LOG_ERROR, "Too many refs: %d/%d.\n",
 665                        sh->nb_refs[L0], sh->nb_refs[L1]);
 666                 return AVERROR_INVALIDDATA;
 667             }
 668
 669             sh->rpl_modification_flag[0] = 0;
 670             sh->rpl_modification_flag[1] = 0;
 671             nb_refs = ff_hevc_frame_nb_refs(s);
 672             if (!nb_refs) {
 673                 av_log(s->avctx, AV_LOG_ERROR, "Zero refs for a frame with P or B slices.\n");
 674                 return AVERROR_INVALIDDATA;
 675             }
 676
 677             if (s->pps->lists_modification_present_flag && nb_refs > 1) {
 678                 sh->rpl_modification_flag[0] = get_bits1(gb);
 679                 if (sh->rpl_modification_flag[0]) {
 680                     for (i = 0; i < sh->nb_refs[L0]; i++)
 681                         sh->list_entry_lx[0][i] = get_bits(gb, av_ceil_log2(nb_refs));
 682                 }
 683
 684                 if (sh->slice_type == B_SLICE) {
 685                     sh->rpl_modification_flag[1] = get_bits1(gb);
 686                     if (sh->rpl_modification_flag[1] == 1)
 687                         for (i = 0; i < sh->nb_refs[L1]; i++)
 688                             sh->list_entry_lx[1][i] = get_bits(gb, av_ceil_log2(nb_refs));
 689                 }
 690             }
 691
 692             if (sh->slice_type == B_SLICE)
 693                 sh->mvd_l1_zero_flag = get_bits1(gb);
 694
 695             if (s->pps->cabac_init_present_flag)
 696                 sh->cabac_init_flag = get_bits1(gb);
 697             else
 698                 sh->cabac_init_flag = 0;
 699
 700             sh->collocated_ref_idx = 0;
 701             if (sh->slice_temporal_mvp_enabled_flag) {
 702                 sh->collocated_list = L0;
 703                 if (sh->slice_type == B_SLICE)
 704                     sh->collocated_list = !get_bits1(gb);
 705
 706                 if (sh->nb_refs[sh->collocated_list] > 1) {
 707                     sh->collocated_ref_idx = get_ue_golomb_long(gb);
 708                     if (sh->collocated_ref_idx >= sh->nb_refs[sh->collocated_list]) {
 709                         av_log(s->avctx, AV_LOG_ERROR,
 710                                "Invalid collocated_ref_idx: %d.\n",
 711                                sh->collocated_ref_idx);
 712                         return AVERROR_INVALIDDATA;
 713                     }
 714                 }
 715             }
 716
 717             if ((s->pps->weighted_pred_flag   && sh->slice_type == P_SLICE) ||
 718                 (s->pps->weighted_bipred_flag && sh->slice_type == B_SLICE)) {
 719                 pred_weight_table(s, gb);
 720             }
 721
 722             sh->max_num_merge_cand = 5 - get_ue_golomb_long(gb);
 723             if (sh->max_num_merge_cand < 1 || sh->max_num_merge_cand > 5) {
 724                 av_log(s->avctx, AV_LOG_ERROR,
 725                        "Invalid number of merging MVP candidates: %d.\n",
 726                        sh->max_num_merge_cand);
 727                 return AVERROR_INVALIDDATA;
 728             }
 729         }
 730
 731         sh->slice_qp_delta = get_se_golomb(gb);
 732
 733         if (s->pps->pic_slice_level_chroma_qp_offsets_present_flag) {
 734             sh->slice_cb_qp_offset = get_se_golomb(gb);
 735             sh->slice_cr_qp_offset = get_se_golomb(gb);
 736         } else {
 737             sh->slice_cb_qp_offset = 0;
 738             sh->slice_cr_qp_offset = 0;
 739         }
 740
 741         if (s->pps->deblocking_filter_control_present_flag) {
 742             int deblocking_filter_override_flag = 0;
 743
 744             if (s->pps->deblocking_filter_override_enabled_flag)
 745                 deblocking_filter_override_flag = get_bits1(gb);
 746
 747             if (deblocking_filter_override_flag) {
 748                 sh->disable_deblocking_filter_flag = get_bits1(gb);
 749                 if (!sh->disable_deblocking_filter_flag) {
 750                     sh->beta_offset = get_se_golomb(gb) * 2;
 751                     sh->tc_offset   = get_se_golomb(gb) * 2;
 752                 }
 753             } else {
 754                 sh->disable_deblocking_filter_flag = s->pps->disable_dbf;
 755                 sh->beta_offset                    = s->pps->beta_offset;
 756                 sh->tc_offset                      = s->pps->tc_offset;
 757             }
 758         } else {
 759             sh->disable_deblocking_filter_flag = 0;
 760             sh->beta_offset                    = 0;
 761             sh->tc_offset                      = 0;
 762         }
 763
 764         if (s->pps->seq_loop_filter_across_slices_enabled_flag &&
 765             (sh->slice_sample_adaptive_offset_flag[0] ||
 766              sh->slice_sample_adaptive_offset_flag[1] ||
 767              !sh->disable_deblocking_filter_flag)) {
 768             sh->slice_loop_filter_across_slices_enabled_flag = get_bits1(gb);
 769         } else {
 770             sh->slice_loop_filter_across_slices_enabled_flag = s->pps->seq_loop_filter_across_slices_enabled_flag;
 771         }
 772     } else if (!s->slice_initialized) {
 773         av_log(s->avctx, AV_LOG_ERROR, "Independent slice segment missing.\n");
 774         return AVERROR_INVALIDDATA;
 775     }
 776
 777     sh->num_entry_point_offsets = 0;
 778     if (s->pps->tiles_enabled_flag || s->pps->entropy_coding_sync_enabled_flag) {
 779         sh->num_entry_point_offsets = get_ue_golomb_long(gb);
 780         if (sh->num_entry_point_offsets > 0) {
 781             int offset_len = get_ue_golomb_long(gb) + 1;
 782
 783             for (i = 0; i < sh->num_entry_point_offsets; i++)
 784                 skip_bits(gb, offset_len);
 785         }
 786     }
 787
 788     if (s->pps->slice_header_extension_present_flag) {
 789         unsigned int length = get_ue_golomb_long(gb);
 790         for (i = 0; i < length; i++)
 791             skip_bits(gb, 8);  // slice_header_extension_data_byte
 792     }
 793
 794     // Inferred parameters
 795     sh->slice_qp = 26 + s->pps->pic_init_qp_minus26 + sh->slice_qp_delta;
 796     if (sh->slice_qp > 51 ||
 797         sh->slice_qp < -s->sps->qp_bd_offset) {
 798         av_log(s->avctx, AV_LOG_ERROR,
 799                "The slice_qp %d is outside the valid range "
 800                "[%d, 51].\n",
 801                sh->slice_qp,
 802                -s->sps->qp_bd_offset);
 803         return AVERROR_INVALIDDATA;
 804     }
 805
 806     sh->slice_ctb_addr_rs = sh->slice_segment_addr;
 807
 808     if (!s->sh.slice_ctb_addr_rs && s->sh.dependent_slice_segment_flag) {
 809         av_log(s->avctx, AV_LOG_ERROR, "Impossible slice segment.\n");
 810         return AVERROR_INVALIDDATA;
 811     }
 812
 813     s->HEVClc.first_qp_group = !s->sh.dependent_slice_segment_flag;
 814
 815     if (!s->pps->cu_qp_delta_enabled_flag)
 816         s->HEVClc.qp_y = FFUMOD(s->sh.slice_qp + 52 + 2 * s->sps->qp_bd_offset,
 817                                 52 + s->sps->qp_bd_offset) - s->sps->qp_bd_offset;
 818
 819     s->slice_initialized = 1;
 820
 821     return 0;
 822 }
 823
 824 #define CTB(tab, x, y) ((tab)[(y) * s->sps->ctb_width + (x)])
 825
 826 #define SET_SAO(elem, value)                            \
 827 do {                                                    \
 828     if (!sao_merge_up_flag && !sao_merge_left_flag)     \
 829         sao->elem = value;                              \
 830     else if (sao_merge_left_flag)                       \
 831         sao->elem = CTB(s->sao, rx-1, ry).elem;         \
 832     else if (sao_merge_up_flag)                         \
 833         sao->elem = CTB(s->sao, rx, ry-1).elem;         \
 834     else                                                \
 835         sao->elem = 0;                                  \
 836 } while (0)
 837
 838 static void hls_sao_param(HEVCContext *s, int rx, int ry)
 839 {
 840     HEVCLocalContext *lc    = &s->HEVClc;
 841     int sao_merge_left_flag = 0;
 842     int sao_merge_up_flag   = 0;
 843     int shift               = s->sps->bit_depth - FFMIN(s->sps->bit_depth, 10);
 844     SAOParams *sao          = &CTB(s->sao, rx, ry);
 845     int c_idx, i;
 846
 847     if (s->sh.slice_sample_adaptive_offset_flag[0] ||
 848         s->sh.slice_sample_adaptive_offset_flag[1]) {
 849         if (rx > 0) {
 850             if (lc->ctb_left_flag)
 851                 sao_merge_left_flag = ff_hevc_sao_merge_flag_decode(s);
 852         }
 853         if (ry > 0 && !sao_merge_left_flag) {
 854             if (lc->ctb_up_flag)
 855                 sao_merge_up_flag = ff_hevc_sao_merge_flag_decode(s);
 856         }
 857     }
 858
 859     for (c_idx = 0; c_idx < 3; c_idx++) {
 860         if (!s->sh.slice_sample_adaptive_offset_flag[c_idx]) {
 861             sao->type_idx[c_idx] = SAO_NOT_APPLIED;
 862             continue;
 863         }
 864
 865         if (c_idx == 2) {
 866             sao->type_idx[2] = sao->type_idx[1];
 867             sao->eo_class[2] = sao->eo_class[1];
 868         } else {
 869             SET_SAO(type_idx[c_idx], ff_hevc_sao_type_idx_decode(s));
 870         }
 871
 872         if (sao->type_idx[c_idx] == SAO_NOT_APPLIED)
 873             continue;
 874
 875         for (i = 0; i < 4; i++)
 876             SET_SAO(offset_abs[c_idx][i], ff_hevc_sao_offset_abs_decode(s));
 877
 878         if (sao->type_idx[c_idx] == SAO_BAND) {
 879             for (i = 0; i < 4; i++) {
 880                 if (sao->offset_abs[c_idx][i]) {
 881                     SET_SAO(offset_sign[c_idx][i],
 882                             ff_hevc_sao_offset_sign_decode(s));
 883                 } else {
 884                     sao->offset_sign[c_idx][i] = 0;
 885                 }
 886             }
 887             SET_SAO(band_position[c_idx], ff_hevc_sao_band_position_decode(s));
 888         } else if (c_idx != 2) {
 889             SET_SAO(eo_class[c_idx], ff_hevc_sao_eo_class_decode(s));
 890         }
 891
 892         // Inferred parameters
 893         sao->offset_val[c_idx][0] = 0;
 894         for (i = 0; i < 4; i++) {
 895             sao->offset_val[c_idx][i + 1] = sao->offset_abs[c_idx][i] << shift;
 896             if (sao->type_idx[c_idx] == SAO_EDGE) {
 897                 if (i > 1)
 898                     sao->offset_val[c_idx][i + 1] = -sao->offset_val[c_idx][i + 1];
 899             } else if (sao->offset_sign[c_idx][i]) {
 900                 sao->offset_val[c_idx][i + 1] = -sao->offset_val[c_idx][i + 1];
 901             }
 902         }
 903     }
 904 }
 905
 906 #undef SET_SAO
 907 #undef CTB
 908
 909 static void hls_residual_coding(HEVCContext *s, int x0, int y0,
 910                                 int log2_trafo_size, enum ScanType scan_idx,
 911                                 int c_idx)
 912 {
 913 #define GET_COORD(offset, n)                                    \
 914     do {                                                        \
 915         x_c = (scan_x_cg[offset >> 4] << 2) + scan_x_off[n];    \
 916         y_c = (scan_y_cg[offset >> 4] << 2) + scan_y_off[n];    \
 917     } while (0)
 918     HEVCLocalContext *lc    = &s->HEVClc;
 919     int transform_skip_flag = 0;
 920
 921     int last_significant_coeff_x, last_significant_coeff_y;
 922     int last_scan_pos;
 923     int n_end;
 924     int num_coeff    = 0;
 925     int greater1_ctx = 1;
 926
 927     int num_last_subset;
 928     int x_cg_last_sig, y_cg_last_sig;
 929
 930     const uint8_t *scan_x_cg, *scan_y_cg, *scan_x_off, *scan_y_off;
 931
 932     ptrdiff_t stride = s->frame->linesize[c_idx];
 933     int hshift       = s->sps->hshift[c_idx];
 934     int vshift       = s->sps->vshift[c_idx];
 935     uint8_t *dst     = &s->frame->data[c_idx][(y0 >> vshift) * stride +
 936                                               ((x0 >> hshift) << s->sps->pixel_shift)];
 937     DECLARE_ALIGNED(16, int16_t, coeffs[MAX_TB_SIZE * MAX_TB_SIZE]) = { 0 };
 938     DECLARE_ALIGNED(8, uint8_t, significant_coeff_group_flag[8][8]) = { { 0 } };
 939
 940     int trafo_size = 1 << log2_trafo_size;
 941     int i, qp, shift, add, scale, scale_m;
 942     const uint8_t level_scale[] = { 40, 45, 51, 57, 64, 72 };
 943     const uint8_t *scale_matrix;
 944     uint8_t dc_scale;
 945
 946     // Derive QP for dequant
 947     if (!lc->cu.cu_transquant_bypass_flag) {
 948         static const int qp_c[] = {
 949             29, 30, 31, 32, 33, 33, 34, 34, 35, 35, 36, 36, 37, 37
 950         };
 951
 952         static const uint8_t rem6[51 + 2 * 6 + 1] = {
 953             0, 1, 2, 3, 4, 5, 0, 1, 2, 3, 4, 5, 0, 1, 2, 3, 4, 5, 0, 1, 2,
 954             3, 4, 5, 0, 1, 2, 3, 4, 5, 0, 1, 2, 3, 4, 5, 0, 1, 2, 3, 4, 5,
 955             0, 1, 2, 3, 4, 5, 0, 1, 2, 3, 4, 5, 0, 1, 2, 3, 4, 5, 0, 1, 2, 3,
 956         };
 957
 958         static const uint8_t div6[51 + 2 * 6 + 1] = {
 959             0, 0, 0, 0, 0, 0, 1, 1, 1, 1, 1, 1, 2, 2, 2, 2, 2, 2,  3,  3,  3,
 960             3, 3, 3, 4, 4, 4, 4, 4, 4, 5, 5, 5, 5, 5, 5, 6, 6, 6,  6,  6,  6,
 961             7, 7, 7, 7, 7, 7, 8, 8, 8, 8, 8, 8, 9, 9, 9, 9, 9, 9, 10, 10, 10, 10,
 962         };
 963         int qp_y = lc->qp_y;
 964
 965         if (c_idx == 0) {
 966             qp = qp_y + s->sps->qp_bd_offset;
 967         } else {
 968             int qp_i, offset;
 969
 970             if (c_idx == 1)
 971                 offset = s->pps->cb_qp_offset + s->sh.slice_cb_qp_offset;
 972             else
 973                 offset = s->pps->cr_qp_offset + s->sh.slice_cr_qp_offset;
 974
 975             qp_i = av_clip(qp_y + offset, -s->sps->qp_bd_offset, 57);
 976             if (qp_i < 30)
 977                 qp = qp_i;
 978             else if (qp_i > 43)
 979                 qp = qp_i - 6;
 980             else
 981                 qp = qp_c[qp_i - 30];
 982
 983             qp += s->sps->qp_bd_offset;
 984         }
 985
 986         shift    = s->sps->bit_depth + log2_trafo_size - 5;
 987         add      = 1 << (shift - 1);
 988         scale    = level_scale[rem6[qp]] << (div6[qp]);
 989         scale_m  = 16; // default when no custom scaling lists.
 990         dc_scale = 16;
 991
 992         if (s->sps->scaling_list_enable_flag) {
 993             const ScalingList *sl = s->pps->scaling_list_data_present_flag ?
 994                                     &s->pps->scaling_list : &s->sps->scaling_list;
 995             int matrix_id = lc->cu.pred_mode != MODE_INTRA;
 996
 997             if (log2_trafo_size != 5)
 998                 matrix_id = 3 * matrix_id + c_idx;
 999
1000             scale_matrix = sl->sl[log2_trafo_size - 2][matrix_id];
1001             if (log2_trafo_size >= 4)
1002                 dc_scale = sl->sl_dc[log2_trafo_size - 4][matrix_id];
1003         }
1004     }
1005
1006     if (s->pps->transform_skip_enabled_flag &&
1007         !lc->cu.cu_transquant_bypass_flag   &&
1008         log2_trafo_size == 2) {
1009         transform_skip_flag = ff_hevc_transform_skip_flag_decode(s, c_idx);
1010     }
1011
1012     last_significant_coeff_x =
1013         ff_hevc_last_significant_coeff_x_prefix_decode(s, c_idx, log2_trafo_size);
1014     last_significant_coeff_y =
1015         ff_hevc_last_significant_coeff_y_prefix_decode(s, c_idx, log2_trafo_size);
1016
1017     if (last_significant_coeff_x > 3) {
1018         int suffix = ff_hevc_last_significant_coeff_suffix_decode(s, last_significant_coeff_x);
1019         last_significant_coeff_x = (1 << ((last_significant_coeff_x >> 1) - 1)) *
1020                                    (2 + (last_significant_coeff_x & 1)) +
1021                                    suffix;
1022     }
1023
1024     if (last_significant_coeff_y > 3) {
1025         int suffix = ff_hevc_last_significant_coeff_suffix_decode(s, last_significant_coeff_y);
1026         last_significant_coeff_y = (1 << ((last_significant_coeff_y >> 1) - 1)) *
1027                                    (2 + (last_significant_coeff_y & 1)) +
1028                                    suffix;
1029     }
1030
1031     if (scan_idx == SCAN_VERT)
1032         FFSWAP(int, last_significant_coeff_x, last_significant_coeff_y);
1033
1034     x_cg_last_sig = last_significant_coeff_x >> 2;
1035     y_cg_last_sig = last_significant_coeff_y >> 2;
1036
1037     switch (scan_idx) {
1038     case SCAN_DIAG: {
1039         int last_x_c = last_significant_coeff_x & 3;
1040         int last_y_c = last_significant_coeff_y & 3;
1041
1042         scan_x_off = ff_hevc_diag_scan4x4_x;
1043         scan_y_off = ff_hevc_diag_scan4x4_y;
1044         num_coeff  = diag_scan4x4_inv[last_y_c][last_x_c];
1045         if (trafo_size == 4) {
1046             scan_x_cg = scan_1x1;
1047             scan_y_cg = scan_1x1;
1048         } else if (trafo_size == 8) {
1049             num_coeff += diag_scan2x2_inv[y_cg_last_sig][x_cg_last_sig] << 4;
1050             scan_x_cg  = diag_scan2x2_x;
1051             scan_y_cg  = diag_scan2x2_y;
1052         } else if (trafo_size == 16) {
1053             num_coeff += diag_scan4x4_inv[y_cg_last_sig][x_cg_last_sig] << 4;
1054             scan_x_cg  = ff_hevc_diag_scan4x4_x;
1055             scan_y_cg  = ff_hevc_diag_scan4x4_y;
1056         } else { // trafo_size == 32
1057             num_coeff += diag_scan8x8_inv[y_cg_last_sig][x_cg_last_sig] << 4;
1058             scan_x_cg  = ff_hevc_diag_scan8x8_x;
1059             scan_y_cg  = ff_hevc_diag_scan8x8_y;
1060         }
1061         break;
1062     }
1063     case SCAN_HORIZ:
1064         scan_x_cg  = horiz_scan2x2_x;
1065         scan_y_cg  = horiz_scan2x2_y;
1066         scan_x_off = horiz_scan4x4_x;
1067         scan_y_off = horiz_scan4x4_y;
1068         num_coeff  = horiz_scan8x8_inv[last_significant_coeff_y][last_significant_coeff_x];
1069         break;
1070     default: //SCAN_VERT
1071         scan_x_cg  = horiz_scan2x2_y;
1072         scan_y_cg  = horiz_scan2x2_x;
1073         scan_x_off = horiz_scan4x4_y;
1074         scan_y_off = horiz_scan4x4_x;
1075         num_coeff  = horiz_scan8x8_inv[last_significant_coeff_x][last_significant_coeff_y];
1076         break;
1077     }
1078     num_coeff++;
1079     num_last_subset = (num_coeff - 1) >> 4;
1080
1081     for (i = num_last_subset; i >= 0; i--) {
1082         int n, m;
1083         int x_cg, y_cg, x_c, y_c;
1084         int implicit_non_zero_coeff = 0;
1085         int64_t trans_coeff_level;
1086         int prev_sig = 0;
1087         int offset   = i << 4;
1088
1089         uint8_t significant_coeff_flag_idx[16];
1090         uint8_t nb_significant_coeff_flag = 0;
1091
1092         x_cg = scan_x_cg[i];
1093         y_cg = scan_y_cg[i];
1094
1095         if (i < num_last_subset && i > 0) {
1096             int ctx_cg = 0;
1097             if (x_cg < (1 << (log2_trafo_size - 2)) - 1)
1098                 ctx_cg += significant_coeff_group_flag[x_cg + 1][y_cg];
1099             if (y_cg < (1 << (log2_trafo_size - 2)) - 1)
1100                 ctx_cg += significant_coeff_group_flag[x_cg][y_cg + 1];
1101
1102             significant_coeff_group_flag[x_cg][y_cg] =
1103                 ff_hevc_significant_coeff_group_flag_decode(s, c_idx, ctx_cg);
1104             implicit_non_zero_coeff = 1;
1105         } else {
1106             significant_coeff_group_flag[x_cg][y_cg] =
1107                 ((x_cg == x_cg_last_sig && y_cg == y_cg_last_sig) ||
1108                  (x_cg == 0 && y_cg == 0));
1109         }
1110
1111         last_scan_pos = num_coeff - offset - 1;
1112
1113         if (i == num_last_subset) {
1114             n_end                         = last_scan_pos - 1;
1115             significant_coeff_flag_idx[0] = last_scan_pos;
1116             nb_significant_coeff_flag     = 1;
1117         } else {
1118             n_end = 15;
1119         }
1120
1121         if (x_cg < ((1 << log2_trafo_size) - 1) >> 2)
1122             prev_sig = significant_coeff_group_flag[x_cg + 1][y_cg];
1123         if (y_cg < ((1 << log2_trafo_size) - 1) >> 2)
1124             prev_sig += significant_coeff_group_flag[x_cg][y_cg + 1] << 1;
1125
1126         for (n = n_end; n >= 0; n--) {
1127             GET_COORD(offset, n);
1128
1129             if (significant_coeff_group_flag[x_cg][y_cg] &&
1130                 (n > 0 || implicit_non_zero_coeff == 0)) {
1131                 if (ff_hevc_significant_coeff_flag_decode(s, c_idx, x_c, y_c,
1132                                                           log2_trafo_size,
1133                                                           scan_idx,
1134                                                           prev_sig) == 1) {
1135                     significant_coeff_flag_idx[nb_significant_coeff_flag] = n;
1136                     nb_significant_coeff_flag++;
1137                     implicit_non_zero_coeff = 0;
1138                 }
1139             } else {
1140                 int last_cg = (x_c == (x_cg << 2) && y_c == (y_cg << 2));
1141                 if (last_cg && implicit_non_zero_coeff && significant_coeff_group_flag[x_cg][y_cg]) {
1142                     significant_coeff_flag_idx[nb_significant_coeff_flag] = n;
1143                     nb_significant_coeff_flag++;
1144                 }
1145             }
1146         }
1147
1148         n_end = nb_significant_coeff_flag;
1149
1150         if (n_end) {
1151             int first_nz_pos_in_cg = 16;
1152             int last_nz_pos_in_cg = -1;
1153             int c_rice_param = 0;
1154             int first_greater1_coeff_idx = -1;
1155             uint8_t coeff_abs_level_greater1_flag[16] = { 0 };
1156             uint16_t coeff_sign_flag;
1157             int sum_abs = 0;
1158             int sign_hidden = 0;
1159
1160             // initialize first elem of coeff_bas_level_greater1_flag
1161             int ctx_set = (i > 0 && c_idx == 0) ? 2 : 0;
1162
1163             if (!(i == num_last_subset) && greater1_ctx == 0)
1164                 ctx_set++;
1165             greater1_ctx      = 1;
1166             last_nz_pos_in_cg = significant_coeff_flag_idx[0];
1167
1168             for (m = 0; m < (n_end > 8 ? 8 : n_end); m++) {
1169                 int n_idx = significant_coeff_flag_idx[m];
1170                 int inc   = (ctx_set << 2) + greater1_ctx;
1171                 coeff_abs_level_greater1_flag[n_idx] =
1172                     ff_hevc_coeff_abs_level_greater1_flag_decode(s, c_idx, inc);
1173                 if (coeff_abs_level_greater1_flag[n_idx]) {
1174                     greater1_ctx = 0;
1175                 } else if (greater1_ctx > 0 && greater1_ctx < 3) {
1176                     greater1_ctx++;
1177                 }
1178
1179                 if (coeff_abs_level_greater1_flag[n_idx] &&
1180                     first_greater1_coeff_idx == -1)
1181                     first_greater1_coeff_idx = n_idx;
1182             }
1183             first_nz_pos_in_cg = significant_coeff_flag_idx[n_end - 1];
1184             sign_hidden        = last_nz_pos_in_cg - first_nz_pos_in_cg >= 4 &&
1185                                  !lc->cu.cu_transquant_bypass_flag;
1186
1187             if (first_greater1_coeff_idx != -1) {
1188                 coeff_abs_level_greater1_flag[first_greater1_coeff_idx] += ff_hevc_coeff_abs_level_greater2_flag_decode(s, c_idx, ctx_set);
1189             }
1190             if (!s->pps->sign_data_hiding_flag || !sign_hidden) {
1191                 coeff_sign_flag = ff_hevc_coeff_sign_flag(s, nb_significant_coeff_flag) << (16 - nb_significant_coeff_flag);
1192             } else {
1193                 coeff_sign_flag = ff_hevc_coeff_sign_flag(s, nb_significant_coeff_flag - 1) << (16 - (nb_significant_coeff_flag - 1));
1194             }
1195
1196             for (m = 0; m < n_end; m++) {
1197                 n = significant_coeff_flag_idx[m];
1198                 GET_COORD(offset, n);
1199                 trans_coeff_level = 1 + coeff_abs_level_greater1_flag[n];
1200                 if (trans_coeff_level == ((m < 8) ?
1201                                           ((n == first_greater1_coeff_idx) ? 3 : 2) : 1)) {
1202                     int last_coeff_abs_level_remaining = ff_hevc_coeff_abs_level_remaining(s, trans_coeff_level, c_rice_param);
1203
1204                     trans_coeff_level += last_coeff_abs_level_remaining;
1205                     if ((trans_coeff_level) > (3 * (1 << c_rice_param)))
1206                         c_rice_param = FFMIN(c_rice_param + 1, 4);
1207                 }
1208                 if (s->pps->sign_data_hiding_flag && sign_hidden) {
1209                     sum_abs += trans_coeff_level;
1210                     if (n == first_nz_pos_in_cg && ((sum_abs & 1) == 1))
1211                         trans_coeff_level = -trans_coeff_level;
1212                 }
1213                 if (coeff_sign_flag >> 15)
1214                     trans_coeff_level = -trans_coeff_level;
1215                 coeff_sign_flag <<= 1;
1216                 if (!lc->cu.cu_transquant_bypass_flag) {
1217                     if (s->sps->scaling_list_enable_flag) {
1218                         if (y_c || x_c || log2_trafo_size < 4) {
1219                             int pos;
1220                             switch (log2_trafo_size) {
1221                             case 3:  pos = (y_c        << 3) +  x_c;       break;
1222                             case 4:  pos = ((y_c >> 1) << 3) + (x_c >> 1); break;
1223                             case 5:  pos = ((y_c >> 2) << 3) + (x_c >> 2); break;
1224                             default: pos = (y_c        << 2) +  x_c;
1225                             }
1226                             scale_m = scale_matrix[pos];
1227                         } else {
1228                             scale_m = dc_scale;
1229                         }
1230                     }
1231                     trans_coeff_level = (trans_coeff_level * (int64_t)scale * (int64_t)scale_m + add) >> shift;
1232                     if(trans_coeff_level < 0) {
1233                         if((~trans_coeff_level) & 0xFffffffffff8000)
1234                             trans_coeff_level = -32768;
1235                     } else {
1236                         if (trans_coeff_level & 0xffffffffffff8000)
1237                             trans_coeff_level = 32767;
1238                     }
1239                 }
1240                 coeffs[y_c * trafo_size + x_c] = trans_coeff_level;
1241             }
1242         }
1243     }
1244
1245     if (lc->cu.cu_transquant_bypass_flag) {
1246         s->hevcdsp.transquant_bypass[log2_trafo_size - 2](dst, coeffs, stride);
1247     } else {
1248         if (transform_skip_flag)
1249             s->hevcdsp.transform_skip(dst, coeffs, stride);
1250         else if (lc->cu.pred_mode == MODE_INTRA && c_idx == 0 &&
1251                  log2_trafo_size == 2)
1252             s->hevcdsp.transform_4x4_luma_add(dst, coeffs, stride);
1253         else
1254             s->hevcdsp.transform_add[log2_trafo_size - 2](dst, coeffs, stride);
1255     }
1256 }
1257
1258 static int hls_transform_unit(HEVCContext *s, int x0, int y0,
1259                               int xBase, int yBase, int cb_xBase, int cb_yBase,
1260                               int log2_cb_size, int log2_trafo_size,
1261                               int blk_idx, int cbf_luma, int cbf_cb, int cbf_cr)
1262 {
1263     HEVCLocalContext *lc = &s->HEVClc;
1264
1265     if (lc->cu.pred_mode == MODE_INTRA) {
1266         int trafo_size = 1 << log2_trafo_size;
1267         ff_hevc_set_neighbour_available(s, x0, y0, trafo_size, trafo_size);
1268
1269         s->hpc.intra_pred[log2_trafo_size - 2](s, x0, y0, 0);
1270         if (log2_trafo_size > 2) {
1271             trafo_size = trafo_size << (s->sps->hshift[1] - 1);
1272             ff_hevc_set_neighbour_available(s, x0, y0, trafo_size, trafo_size);
1273             s->hpc.intra_pred[log2_trafo_size - 3](s, x0, y0, 1);
1274             s->hpc.intra_pred[log2_trafo_size - 3](s, x0, y0, 2);
1275         } else if (blk_idx == 3) {
1276             trafo_size = trafo_size << s->sps->hshift[1];
1277             ff_hevc_set_neighbour_available(s, xBase, yBase,
1278                                             trafo_size, trafo_size);
1279             s->hpc.intra_pred[log2_trafo_size - 2](s, xBase, yBase, 1);
1280             s->hpc.intra_pred[log2_trafo_size - 2](s, xBase, yBase, 2);
1281         }
1282     }
1283
1284     if (cbf_luma || cbf_cb || cbf_cr) {
1285         int scan_idx   = SCAN_DIAG;
1286         int scan_idx_c = SCAN_DIAG;
1287
1288         if (s->pps->cu_qp_delta_enabled_flag && !lc->tu.is_cu_qp_delta_coded) {
1289             lc->tu.cu_qp_delta = ff_hevc_cu_qp_delta_abs(s);
1290             if (lc->tu.cu_qp_delta != 0)
1291                 if (ff_hevc_cu_qp_delta_sign_flag(s) == 1)
1292                     lc->tu.cu_qp_delta = -lc->tu.cu_qp_delta;
1293             lc->tu.is_cu_qp_delta_coded = 1;
1294
1295             if (lc->tu.cu_qp_delta < -(26 + s->sps->qp_bd_offset / 2) ||
1296                 lc->tu.cu_qp_delta >  (25 + s->sps->qp_bd_offset / 2)) {
1297                 av_log(s->avctx, AV_LOG_ERROR,
1298                        "The cu_qp_delta %d is outside the valid range "
1299                        "[%d, %d].\n",
1300                        lc->tu.cu_qp_delta,
1301                        -(26 + s->sps->qp_bd_offset / 2),
1302                         (25 + s->sps->qp_bd_offset / 2));
1303                 return AVERROR_INVALIDDATA;
1304             }
1305
1306             ff_hevc_set_qPy(s, x0, y0, cb_xBase, cb_yBase, log2_cb_size);
1307         }
1308
1309         if (lc->cu.pred_mode == MODE_INTRA && log2_trafo_size < 4) {
1310             if (lc->tu.cur_intra_pred_mode >= 6 &&
1311                 lc->tu.cur_intra_pred_mode <= 14) {
1312                 scan_idx = SCAN_VERT;
1313             } else if (lc->tu.cur_intra_pred_mode >= 22 &&
1314                        lc->tu.cur_intra_pred_mode <= 30) {
1315                 scan_idx = SCAN_HORIZ;
1316             }
1317
1318             if (lc->pu.intra_pred_mode_c >=  6 &&
1319                 lc->pu.intra_pred_mode_c <= 14) {
1320                 scan_idx_c = SCAN_VERT;
1321             } else if (lc->pu.intra_pred_mode_c >= 22 &&
1322                        lc->pu.intra_pred_mode_c <= 30) {
1323                 scan_idx_c = SCAN_HORIZ;
1324             }
1325         }
1326
1327         if (cbf_luma)
1328             hls_residual_coding(s, x0, y0, log2_trafo_size, scan_idx, 0);
1329         if (log2_trafo_size > 2) {
1330             if (cbf_cb)
1331                 hls_residual_coding(s, x0, y0, log2_trafo_size - 1, scan_idx_c, 1);
1332             if (cbf_cr)
1333                 hls_residual_coding(s, x0, y0, log2_trafo_size - 1, scan_idx_c, 2);
1334         } else if (blk_idx == 3) {
1335             if (cbf_cb)
1336                 hls_residual_coding(s, xBase, yBase, log2_trafo_size, scan_idx_c, 1);
1337             if (cbf_cr)
1338                 hls_residual_coding(s, xBase, yBase, log2_trafo_size, scan_idx_c, 2);
1339         }
1340     }
1341     return 0;
1342 }
1343
1344 static void set_deblocking_bypass(HEVCContext *s, int x0, int y0, int log2_cb_size)
1345 {
1346     int cb_size          = 1 << log2_cb_size;
1347     int log2_min_pu_size = s->sps->log2_min_pu_size;
1348
1349     int min_pu_width     = s->sps->min_pu_width;
1350     int x_end = FFMIN(x0 + cb_size, s->sps->width);
1351     int y_end = FFMIN(y0 + cb_size, s->sps->height);
1352     int i, j;
1353
1354     for (j = (y0 >> log2_min_pu_size); j < (y_end >> log2_min_pu_size); j++)
1355         for (i = (x0 >> log2_min_pu_size); i < (x_end >> log2_min_pu_size); i++)
1356             s->is_pcm[i + j * min_pu_width] = 2;
1357 }
1358
1359 static int hls_transform_tree(HEVCContext *s, int x0, int y0,
1360                               int xBase, int yBase, int cb_xBase, int cb_yBase,
1361                               int log2_cb_size, int log2_trafo_size,
1362                               int trafo_depth, int blk_idx,
1363                               int cbf_cb, int cbf_cr)
1364 {
1365     HEVCLocalContext *lc = &s->HEVClc;
1366     uint8_t split_transform_flag;
1367     int ret;
1368
1369     if (lc->cu.intra_split_flag) {
1370         if (trafo_depth == 1)
1371             lc->tu.cur_intra_pred_mode = lc->pu.intra_pred_mode[blk_idx];
1372     } else {
1373         lc->tu.cur_intra_pred_mode = lc->pu.intra_pred_mode[0];
1374     }
1375
1376     if (log2_trafo_size <= s->sps->log2_max_trafo_size &&
1377         log2_trafo_size >  s->sps->log2_min_tb_size    &&
1378         trafo_depth     < lc->cu.max_trafo_depth       &&
1379         !(lc->cu.intra_split_flag && trafo_depth == 0)) {
1380         split_transform_flag = ff_hevc_split_transform_flag_decode(s, log2_trafo_size);
1381     } else {
1382         int inter_split = s->sps->max_transform_hierarchy_depth_inter == 0 &&
1383                           lc->cu.pred_mode == MODE_INTER &&
1384                           lc->cu.part_mode != PART_2Nx2N &&
1385                           trafo_depth == 0;
1386
1387         split_transform_flag = log2_trafo_size > s->sps->log2_max_trafo_size ||
1388                                (lc->cu.intra_split_flag && trafo_depth == 0) ||
1389                                inter_split;
1390     }
1391
1392     if (log2_trafo_size > 2 && (trafo_depth == 0 || cbf_cb))
1393         cbf_cb = ff_hevc_cbf_cb_cr_decode(s, trafo_depth);
1394     else if (log2_trafo_size > 2 || trafo_depth == 0)
1395         cbf_cb = 0;
1396     if (log2_trafo_size > 2 && (trafo_depth == 0 || cbf_cr))
1397         cbf_cr = ff_hevc_cbf_cb_cr_decode(s, trafo_depth);
1398     else if (log2_trafo_size > 2 || trafo_depth == 0)
1399         cbf_cr = 0;
1400
1401     if (split_transform_flag) {
1402         const int trafo_size_split = 1 << (log2_trafo_size - 1);
1403         const int x1 = x0 + trafo_size_split;
1404         const int y1 = y0 + trafo_size_split;
1405
1406 #define SUBDIVIDE(x, y, idx)                                                    \
1407 do {                                                                            \
1408     ret = hls_transform_tree(s, x, y, x0, y0, cb_xBase, cb_yBase, log2_cb_size, \
1409                              log2_trafo_size - 1, trafo_depth + 1, idx,         \
1410                              cbf_cb, cbf_cr);                                   \
1411     if (ret < 0)                                                                \
1412         return ret;                                                             \
1413 } while (0)
1414
1415         SUBDIVIDE(x0, y0, 0);
1416         SUBDIVIDE(x1, y0, 1);
1417         SUBDIVIDE(x0, y1, 2);
1418         SUBDIVIDE(x1, y1, 3);
1419
1420 #undef SUBDIVIDE
1421     } else {
1422         int min_tu_size      = 1 << s->sps->log2_min_tb_size;
1423         int log2_min_tu_size = s->sps->log2_min_tb_size;
1424         int min_tu_width     = s->sps->min_tb_width;
1425         int cbf_luma         = 1;
1426
1427         if (lc->cu.pred_mode == MODE_INTRA || trafo_depth != 0 ||
1428             cbf_cb || cbf_cr)
1429             cbf_luma = ff_hevc_cbf_luma_decode(s, trafo_depth);
1430
1431         ret = hls_transform_unit(s, x0, y0, xBase, yBase, cb_xBase, cb_yBase,
1432                                  log2_cb_size, log2_trafo_size,
1433                                  blk_idx, cbf_luma, cbf_cb, cbf_cr);
1434         if (ret < 0)
1435             return ret;
1436         // TODO: store cbf_luma somewhere else
1437         if (cbf_luma) {
1438             int i, j;
1439             for (i = 0; i < (1 << log2_trafo_size); i += min_tu_size)
1440                 for (j = 0; j < (1 << log2_trafo_size); j += min_tu_size) {
1441                     int x_tu = (x0 + j) >> log2_min_tu_size;
1442                     int y_tu = (y0 + i) >> log2_min_tu_size;
1443                     s->cbf_luma[y_tu * min_tu_width + x_tu] = 1;
1444                 }
1445         }
1446         if (!s->sh.disable_deblocking_filter_flag) {
1447             ff_hevc_deblocking_boundary_strengths(s, x0, y0, log2_trafo_size);
1448             if (s->pps->transquant_bypass_enable_flag &&
1449                 lc->cu.cu_transquant_bypass_flag)
1450                 set_deblocking_bypass(s, x0, y0, log2_trafo_size);
1451         }
1452     }
1453     return 0;
1454 }
1455
1456 static int hls_pcm_sample(HEVCContext *s, int x0, int y0, int log2_cb_size)
1457 {
1458     //TODO: non-4:2:0 support
1459     HEVCLocalContext *lc = &s->HEVClc;
1460     GetBitContext gb;
1461     int cb_size   = 1 << log2_cb_size;
1462     int stride0   = s->frame->linesize[0];
1463     uint8_t *dst0 = &s->frame->data[0][y0 * stride0 + (x0 << s->sps->pixel_shift)];
1464     int   stride1 = s->frame->linesize[1];
1465     uint8_t *dst1 = &s->frame->data[1][(y0 >> s->sps->vshift[1]) * stride1 + ((x0 >> s->sps->hshift[1]) << s->sps->pixel_shift)];
1466     int   stride2 = s->frame->linesize[2];
1467     uint8_t *dst2 = &s->frame->data[2][(y0 >> s->sps->vshift[2]) * stride2 + ((x0 >> s->sps->hshift[2]) << s->sps->pixel_shift)];
1468
1469     int length         = cb_size * cb_size * s->sps->pcm.bit_depth + ((cb_size * cb_size) >> 1) * s->sps->pcm.bit_depth_chroma;
1470     const uint8_t *pcm = skip_bytes(&lc->cc, (length + 7) >> 3);
1471     int ret;
1472
1473     if (!s->sh.disable_deblocking_filter_flag)
1474         ff_hevc_deblocking_boundary_strengths(s, x0, y0, log2_cb_size);
1475
1476     ret = init_get_bits(&gb, pcm, length);
1477     if (ret < 0)
1478         return ret;
1479
1480     s->hevcdsp.put_pcm(dst0, stride0, cb_size,     &gb, s->sps->pcm.bit_depth);
1481     s->hevcdsp.put_pcm(dst1, stride1, cb_size / 2, &gb, s->sps->pcm.bit_depth_chroma);
1482     s->hevcdsp.put_pcm(dst2, stride2, cb_size / 2, &gb, s->sps->pcm.bit_depth_chroma);
1483     return 0;
1484 }
1485
1486 static void hls_mvd_coding(HEVCContext *s, int x0, int y0, int log2_cb_size)
1487 {
1488     HEVCLocalContext *lc = &s->HEVClc;
1489     int x = ff_hevc_abs_mvd_greater0_flag_decode(s);
1490     int y = ff_hevc_abs_mvd_greater0_flag_decode(s);
1491
1492     if (x)
1493         x += ff_hevc_abs_mvd_greater1_flag_decode(s);
1494     if (y)
1495         y += ff_hevc_abs_mvd_greater1_flag_decode(s);
1496
1497     switch (x) {
1498     case 2: lc->pu.mvd.x = ff_hevc_mvd_decode(s);           break;
1499     case 1: lc->pu.mvd.x = ff_hevc_mvd_sign_flag_decode(s); break;
1500     case 0: lc->pu.mvd.x = 0;                               break;
1501     }
1502
1503     switch (y) {
1504     case 2: lc->pu.mvd.y = ff_hevc_mvd_decode(s);           break;
1505     case 1: lc->pu.mvd.y = ff_hevc_mvd_sign_flag_decode(s); break;
1506     case 0: lc->pu.mvd.y = 0;                               break;
1507     }
1508 }
1509
1510 /**
1511  * 8.5.3.2.2.1 Luma sample interpolation process
1512  *
1513  * @param s HEVC decoding context
1514  * @param dst target buffer for block data at block position
1515  * @param dststride stride of the dst buffer
1516  * @param ref reference picture buffer at origin (0, 0)
1517  * @param mv motion vector (relative to block position) to get pixel data from
1518  * @param x_off horizontal position of block from origin (0, 0)
1519  * @param y_off vertical position of block from origin (0, 0)
1520  * @param block_w width of block
1521  * @param block_h height of block
1522  */
1523 static void luma_mc(HEVCContext *s, int16_t *dst, ptrdiff_t dststride,
1524                     AVFrame *ref, const Mv *mv, int x_off, int y_off,
1525                     int block_w, int block_h)
1526 {
1527     HEVCLocalContext *lc = &s->HEVClc;
1528     uint8_t *src         = ref->data[0];
1529     ptrdiff_t srcstride  = ref->linesize[0];
1530     int pic_width        = s->sps->width;
1531     int pic_height       = s->sps->height;
1532
1533     int mx         = mv->x & 3;
1534     int my         = mv->y & 3;
1535     int extra_left = ff_hevc_qpel_extra_before[mx];
1536     int extra_top  = ff_hevc_qpel_extra_before[my];
1537
1538     x_off += mv->x >> 2;
1539     y_off += mv->y >> 2;
1540     src   += y_off * srcstride + (x_off << s->sps->pixel_shift);
1541
1542     if (x_off < extra_left || y_off < extra_top ||
1543         x_off >= pic_width - block_w - ff_hevc_qpel_extra_after[mx] ||
1544         y_off >= pic_height - block_h - ff_hevc_qpel_extra_after[my]) {
1545         const int edge_emu_stride = EDGE_EMU_BUFFER_STRIDE << s->sps->pixel_shift;
1546         int offset = extra_top * srcstride + (extra_left << s->sps->pixel_shift);
1547         int buf_offset = extra_top *
1548                          edge_emu_stride + (extra_left << s->sps->pixel_shift);
1549
1550         s->vdsp.emulated_edge_mc(lc->edge_emu_buffer, src - offset,
1551                                  edge_emu_stride, srcstride,
1552                                  block_w + ff_hevc_qpel_extra[mx],
1553                                  block_h + ff_hevc_qpel_extra[my],
1554                                  x_off - extra_left, y_off - extra_top,
1555                                  pic_width, pic_height);
1556         src = lc->edge_emu_buffer + buf_offset;
1557         srcstride = edge_emu_stride;
1558     }
1559     s->hevcdsp.put_hevc_qpel[my][mx](dst, dststride, src, srcstride, block_w,
1560                                      block_h, lc->mc_buffer);
1561 }
1562
1563 /**
1564  * 8.5.3.2.2.2 Chroma sample interpolation process
1565  *
1566  * @param s HEVC decoding context
1567  * @param dst1 target buffer for block data at block position (U plane)
1568  * @param dst2 target buffer for block data at block position (V plane)
1569  * @param dststride stride of the dst1 and dst2 buffers
1570  * @param ref reference picture buffer at origin (0, 0)
1571  * @param mv motion vector (relative to block position) to get pixel data from
1572  * @param x_off horizontal position of block from origin (0, 0)
1573  * @param y_off vertical position of block from origin (0, 0)
1574  * @param block_w width of block
1575  * @param block_h height of block
1576  */
1577 static void chroma_mc(HEVCContext *s, int16_t *dst1, int16_t *dst2,
1578                       ptrdiff_t dststride, AVFrame *ref, const Mv *mv,
1579                       int x_off, int y_off, int block_w, int block_h)
1580 {
1581     HEVCLocalContext *lc = &s->HEVClc;
1582     uint8_t *src1        = ref->data[1];
1583     uint8_t *src2        = ref->data[2];
1584     ptrdiff_t src1stride = ref->linesize[1];
1585     ptrdiff_t src2stride = ref->linesize[2];
1586     int pic_width        = s->sps->width >> 1;
1587     int pic_height       = s->sps->height >> 1;
1588
1589     int mx = mv->x & 7;
1590     int my = mv->y & 7;
1591
1592     x_off += mv->x >> 3;
1593     y_off += mv->y >> 3;
1594     src1  += y_off * src1stride + (x_off << s->sps->pixel_shift);
1595     src2  += y_off * src2stride + (x_off << s->sps->pixel_shift);
1596
1597     if (x_off < EPEL_EXTRA_BEFORE || y_off < EPEL_EXTRA_AFTER ||
1598         x_off >= pic_width - block_w - EPEL_EXTRA_AFTER ||
1599         y_off >= pic_height - block_h - EPEL_EXTRA_AFTER) {
1600         const int edge_emu_stride = EDGE_EMU_BUFFER_STRIDE << s->sps->pixel_shift;
1601         int offset1 = EPEL_EXTRA_BEFORE * (src1stride + (1 << s->sps->pixel_shift));
1602         int buf_offset1 = EPEL_EXTRA_BEFORE *
1603                           (edge_emu_stride + (1 << s->sps->pixel_shift));
1604         int offset2 = EPEL_EXTRA_BEFORE * (src2stride + (1 << s->sps->pixel_shift));
1605         int buf_offset2 = EPEL_EXTRA_BEFORE *
1606                           (edge_emu_stride + (1 << s->sps->pixel_shift));
1607
1608         s->vdsp.emulated_edge_mc(lc->edge_emu_buffer, src1 - offset1,
1609                                  edge_emu_stride, src1stride,
1610                                  block_w + EPEL_EXTRA, block_h + EPEL_EXTRA,
1611                                  x_off - EPEL_EXTRA_BEFORE,
1612                                  y_off - EPEL_EXTRA_BEFORE,
1613                                  pic_width, pic_height);
1614
1615         src1 = lc->edge_emu_buffer + buf_offset1;
1616         src1stride = edge_emu_stride;
1617         s->hevcdsp.put_hevc_epel[!!my][!!mx](dst1, dststride, src1, src1stride,
1618                                              block_w, block_h, mx, my, lc->mc_buffer);
1619
1620         s->vdsp.emulated_edge_mc(lc->edge_emu_buffer, src2 - offset2,
1621                                  edge_emu_stride, src2stride,
1622                                  block_w + EPEL_EXTRA, block_h + EPEL_EXTRA,
1623                                  x_off - EPEL_EXTRA_BEFORE,
1624                                  y_off - EPEL_EXTRA_BEFORE,
1625                                  pic_width, pic_height);
1626         src2 = lc->edge_emu_buffer + buf_offset2;
1627         src2stride = edge_emu_stride;
1628
1629         s->hevcdsp.put_hevc_epel[!!my][!!mx](dst2, dststride, src2, src2stride,
1630                                              block_w, block_h, mx, my,
1631                                              lc->mc_buffer);
1632     } else {
1633         s->hevcdsp.put_hevc_epel[!!my][!!mx](dst1, dststride, src1, src1stride,
1634                                              block_w, block_h, mx, my,
1635                                              lc->mc_buffer);
1636         s->hevcdsp.put_hevc_epel[!!my][!!mx](dst2, dststride, src2, src2stride,
1637                                              block_w, block_h, mx, my,
1638                                              lc->mc_buffer);
1639     }
1640 }
1641
1642 static void hevc_await_progress(HEVCContext *s, HEVCFrame *ref,
1643                                 const Mv *mv, int y0, int height)
1644 {
1645     int y = (mv->y >> 2) + y0 + height + 9;
1646     ff_thread_await_progress(&ref->tf, y, 0);
1647 }
1648
1649 static void hevc_luma_mv_mpv_mode(HEVCContext *s, int x0, int y0, int nPbW,
1650                                   int nPbH, int log2_cb_size, int part_idx,
1651                                   int merge_idx, MvField *mv)
1652 {
1653     HEVCLocalContext *lc             = &s->HEVClc;
1654     enum InterPredIdc inter_pred_idc = PRED_L0;
1655     int mvp_flag;
1656
1657     ff_hevc_set_neighbour_available(s, x0, y0, nPbW, nPbH);
1658     if (s->sh.slice_type == B_SLICE)
1659         inter_pred_idc = ff_hevc_inter_pred_idc_decode(s, nPbW, nPbH);
1660
1661     if (inter_pred_idc != PRED_L1) {
1662         if (s->sh.nb_refs[L0])
1663             mv->ref_idx[0]= ff_hevc_ref_idx_lx_decode(s, s->sh.nb_refs[L0]);
1664
1665         mv->pred_flag[0] = 1;
1666         hls_mvd_coding(s, x0, y0, 0);
1667         mvp_flag = ff_hevc_mvp_lx_flag_decode(s);
1668         ff_hevc_luma_mv_mvp_mode(s, x0, y0, nPbW, nPbH, log2_cb_size,
1669                                  part_idx, merge_idx, mv, mvp_flag, 0);
1670         mv->mv[0].x += lc->pu.mvd.x;
1671         mv->mv[0].y += lc->pu.mvd.y;
1672     }
1673
1674     if (inter_pred_idc != PRED_L0) {
1675         if (s->sh.nb_refs[L1])
1676             mv->ref_idx[1]= ff_hevc_ref_idx_lx_decode(s, s->sh.nb_refs[L1]);
1677
1678         if (s->sh.mvd_l1_zero_flag == 1 && inter_pred_idc == PRED_BI) {
1679             AV_ZERO32(&lc->pu.mvd);
1680         } else {
1681             hls_mvd_coding(s, x0, y0, 1);
1682         }
1683
1684         mv->pred_flag[1] = 1;
1685         mvp_flag = ff_hevc_mvp_lx_flag_decode(s);
1686         ff_hevc_luma_mv_mvp_mode(s, x0, y0, nPbW, nPbH, log2_cb_size,
1687                                  part_idx, merge_idx, mv, mvp_flag, 1);
1688         mv->mv[1].x += lc->pu.mvd.x;
1689         mv->mv[1].y += lc->pu.mvd.y;
1690     }
1691 }
1692
1693 static void hls_prediction_unit(HEVCContext *s, int x0, int y0,
1694                                 int nPbW, int nPbH,
1695                                 int log2_cb_size, int partIdx)
1696 {
1697 #define POS(c_idx, x, y)                                                              \
1698     &s->frame->data[c_idx][((y) >> s->sps->vshift[c_idx]) * s->frame->linesize[c_idx] + \
1699                            (((x) >> s->sps->hshift[c_idx]) << s->sps->pixel_shift)]
1700     HEVCLocalContext *lc = &s->HEVClc;
1701     int merge_idx = 0;
1702     struct MvField current_mv = {{{ 0 }}};
1703
1704     int min_pu_width = s->sps->min_pu_width;
1705
1706     MvField *tab_mvf = s->ref->tab_mvf;
1707     RefPicList  *refPicList = s->ref->refPicList;
1708     HEVCFrame *ref0, *ref1;
1709
1710     int tmpstride = MAX_PB_SIZE;
1711
1712     uint8_t *dst0 = POS(0, x0, y0);
1713     uint8_t *dst1 = POS(1, x0, y0);
1714     uint8_t *dst2 = POS(2, x0, y0);
1715     int log2_min_cb_size = s->sps->log2_min_cb_size;
1716     int min_cb_width     = s->sps->min_cb_width;
1717     int x_cb             = x0 >> log2_min_cb_size;
1718     int y_cb             = y0 >> log2_min_cb_size;
1719     int x_pu, y_pu;
1720     int i, j;
1721
1722     int skip_flag = SAMPLE_CTB(s->skip_flag, x_cb, y_cb);
1723
1724     if (!skip_flag)
1725         lc->pu.merge_flag = ff_hevc_merge_flag_decode(s);
1726
1727     if (skip_flag || lc->pu.merge_flag) {
1728         if (s->sh.max_num_merge_cand > 1)
1729             merge_idx = ff_hevc_merge_idx_decode(s);
1730         else
1731             merge_idx = 0;
1732
1733         ff_hevc_luma_mv_merge_mode(s, x0, y0, nPbW, nPbH, log2_cb_size,
1734                                    partIdx, merge_idx, &current_mv);
1735     } else {
1736         hevc_luma_mv_mpv_mode(s, x0, y0, nPbW, nPbH, log2_cb_size,
1737                               partIdx, merge_idx, &current_mv);
1738     }
1739
1740     x_pu = x0 >> s->sps->log2_min_pu_size;
1741     y_pu = y0 >> s->sps->log2_min_pu_size;
1742
1743     for (j = 0; j < nPbH >> s->sps->log2_min_pu_size; j++)
1744         for (i = 0; i < nPbW >> s->sps->log2_min_pu_size; i++)
1745             tab_mvf[(y_pu + j) * min_pu_width + x_pu + i] = current_mv;
1746
1747     if (current_mv.pred_flag[0]) {
1748         ref0 = refPicList[0].ref[current_mv.ref_idx[0]];
1749         if (!ref0)
1750             return;
1751         hevc_await_progress(s, ref0, &current_mv.mv[0], y0, nPbH);
1752     }
1753     if (current_mv.pred_flag[1]) {
1754         ref1 = refPicList[1].ref[current_mv.ref_idx[1]];
1755         if (!ref1)
1756             return;
1757         hevc_await_progress(s, ref1, &current_mv.mv[1], y0, nPbH);
1758     }
1759
1760     if (current_mv.pred_flag[0] && !current_mv.pred_flag[1]) {
1761         DECLARE_ALIGNED(16, int16_t,  tmp[MAX_PB_SIZE * MAX_PB_SIZE]);
1762         DECLARE_ALIGNED(16, int16_t, tmp2[MAX_PB_SIZE * MAX_PB_SIZE]);
1763
1764         luma_mc(s, tmp, tmpstride, ref0->frame,
1765                 &current_mv.mv[0], x0, y0, nPbW, nPbH);
1766
1767         if ((s->sh.slice_type == P_SLICE && s->pps->weighted_pred_flag) ||
1768             (s->sh.slice_type == B_SLICE && s->pps->weighted_bipred_flag)) {
1769             s->hevcdsp.weighted_pred(s->sh.luma_log2_weight_denom,
1770                                      s->sh.luma_weight_l0[current_mv.ref_idx[0]],
1771                                      s->sh.luma_offset_l0[current_mv.ref_idx[0]],
1772                                      dst0, s->frame->linesize[0], tmp,
1773                                      tmpstride, nPbW, nPbH);
1774         } else {
1775             s->hevcdsp.put_unweighted_pred(dst0, s->frame->linesize[0], tmp, tmpstride, nPbW, nPbH);
1776         }
1777         chroma_mc(s, tmp, tmp2, tmpstride, ref0->frame,
1778                   &current_mv.mv[0], x0 / 2, y0 / 2, nPbW / 2, nPbH / 2);
1779
1780         if ((s->sh.slice_type == P_SLICE && s->pps->weighted_pred_flag) ||
1781             (s->sh.slice_type == B_SLICE && s->pps->weighted_bipred_flag)) {
1782             s->hevcdsp.weighted_pred(s->sh.chroma_log2_weight_denom,
1783                                      s->sh.chroma_weight_l0[current_mv.ref_idx[0]][0],
1784                                      s->sh.chroma_offset_l0[current_mv.ref_idx[0]][0],
1785                                      dst1, s->frame->linesize[1], tmp, tmpstride,
1786                                      nPbW / 2, nPbH / 2);
1787             s->hevcdsp.weighted_pred(s->sh.chroma_log2_weight_denom,
1788                                      s->sh.chroma_weight_l0[current_mv.ref_idx[0]][1],
1789                                      s->sh.chroma_offset_l0[current_mv.ref_idx[0]][1],
1790                                      dst2, s->frame->linesize[2], tmp2, tmpstride,
1791                                      nPbW / 2, nPbH / 2);
1792         } else {
1793             s->hevcdsp.put_unweighted_pred(dst1, s->frame->linesize[1], tmp, tmpstride, nPbW/2, nPbH/2);
1794             s->hevcdsp.put_unweighted_pred(dst2, s->frame->linesize[2], tmp2, tmpstride, nPbW/2, nPbH/2);
1795         }
1796     } else if (!current_mv.pred_flag[0] && current_mv.pred_flag[1]) {
1797         DECLARE_ALIGNED(16, int16_t, tmp [MAX_PB_SIZE * MAX_PB_SIZE]);
1798         DECLARE_ALIGNED(16, int16_t, tmp2[MAX_PB_SIZE * MAX_PB_SIZE]);
1799
1800         luma_mc(s, tmp, tmpstride, ref1->frame,
1801                 &current_mv.mv[1], x0, y0, nPbW, nPbH);
1802
1803         if ((s->sh.slice_type == P_SLICE && s->pps->weighted_pred_flag) ||
1804             (s->sh.slice_type == B_SLICE && s->pps->weighted_bipred_flag)) {
1805             s->hevcdsp.weighted_pred(s->sh.luma_log2_weight_denom,
1806                                       s->sh.luma_weight_l1[current_mv.ref_idx[1]],
1807                                       s->sh.luma_offset_l1[current_mv.ref_idx[1]],
1808                                       dst0, s->frame->linesize[0], tmp, tmpstride,
1809                                       nPbW, nPbH);
1810         } else {
1811             s->hevcdsp.put_unweighted_pred(dst0, s->frame->linesize[0], tmp, tmpstride, nPbW, nPbH);
1812         }
1813
1814         chroma_mc(s, tmp, tmp2, tmpstride, ref1->frame,
1815                   &current_mv.mv[1], x0/2, y0/2, nPbW/2, nPbH/2);
1816
1817         if ((s->sh.slice_type == P_SLICE && s->pps->weighted_pred_flag) ||
1818             (s->sh.slice_type == B_SLICE && s->pps->weighted_bipred_flag)) {
1819             s->hevcdsp.weighted_pred(s->sh.chroma_log2_weight_denom,
1820                                      s->sh.chroma_weight_l1[current_mv.ref_idx[1]][0],
1821                                      s->sh.chroma_offset_l1[current_mv.ref_idx[1]][0],
1822                                      dst1, s->frame->linesize[1], tmp, tmpstride, nPbW/2, nPbH/2);
1823             s->hevcdsp.weighted_pred(s->sh.chroma_log2_weight_denom,
1824                                      s->sh.chroma_weight_l1[current_mv.ref_idx[1]][1],
1825                                      s->sh.chroma_offset_l1[current_mv.ref_idx[1]][1],
1826                                      dst2, s->frame->linesize[2], tmp2, tmpstride, nPbW/2, nPbH/2);
1827         } else {
1828             s->hevcdsp.put_unweighted_pred(dst1, s->frame->linesize[1], tmp, tmpstride, nPbW/2, nPbH/2);
1829             s->hevcdsp.put_unweighted_pred(dst2, s->frame->linesize[2], tmp2, tmpstride, nPbW/2, nPbH/2);
1830         }
1831     } else if (current_mv.pred_flag[0] && current_mv.pred_flag[1]) {
1832         DECLARE_ALIGNED(16, int16_t, tmp [MAX_PB_SIZE * MAX_PB_SIZE]);
1833         DECLARE_ALIGNED(16, int16_t, tmp2[MAX_PB_SIZE * MAX_PB_SIZE]);
1834         DECLARE_ALIGNED(16, int16_t, tmp3[MAX_PB_SIZE * MAX_PB_SIZE]);
1835         DECLARE_ALIGNED(16, int16_t, tmp4[MAX_PB_SIZE * MAX_PB_SIZE]);
1836
1837         luma_mc(s, tmp, tmpstride, ref0->frame,
1838                 &current_mv.mv[0], x0, y0, nPbW, nPbH);
1839         luma_mc(s, tmp2, tmpstride, ref1->frame,
1840                 &current_mv.mv[1], x0, y0, nPbW, nPbH);
1841
1842         if ((s->sh.slice_type == P_SLICE && s->pps->weighted_pred_flag) ||
1843             (s->sh.slice_type == B_SLICE && s->pps->weighted_bipred_flag)) {
1844             s->hevcdsp.weighted_pred_avg(s->sh.luma_log2_weight_denom,
1845                                          s->sh.luma_weight_l0[current_mv.ref_idx[0]],
1846                                          s->sh.luma_weight_l1[current_mv.ref_idx[1]],
1847                                          s->sh.luma_offset_l0[current_mv.ref_idx[0]],
1848                                          s->sh.luma_offset_l1[current_mv.ref_idx[1]],
1849                                          dst0, s->frame->linesize[0],
1850                                          tmp, tmp2, tmpstride, nPbW, nPbH);
1851         } else {
1852             s->hevcdsp.put_weighted_pred_avg(dst0, s->frame->linesize[0],
1853                                              tmp, tmp2, tmpstride, nPbW, nPbH);
1854         }
1855
1856         chroma_mc(s, tmp, tmp2, tmpstride, ref0->frame,
1857                   &current_mv.mv[0], x0 / 2, y0 / 2, nPbW / 2, nPbH / 2);
1858         chroma_mc(s, tmp3, tmp4, tmpstride, ref1->frame,
1859                   &current_mv.mv[1], x0 / 2, y0 / 2, nPbW / 2, nPbH / 2);
1860
1861         if ((s->sh.slice_type == P_SLICE && s->pps->weighted_pred_flag) ||
1862             (s->sh.slice_type == B_SLICE && s->pps->weighted_bipred_flag)) {
1863             s->hevcdsp.weighted_pred_avg(s->sh.chroma_log2_weight_denom,
1864                                          s->sh.chroma_weight_l0[current_mv.ref_idx[0]][0],
1865                                          s->sh.chroma_weight_l1[current_mv.ref_idx[1]][0],
1866                                          s->sh.chroma_offset_l0[current_mv.ref_idx[0]][0],
1867                                          s->sh.chroma_offset_l1[current_mv.ref_idx[1]][0],
1868                                          dst1, s->frame->linesize[1], tmp, tmp3,
1869                                          tmpstride, nPbW / 2, nPbH / 2);
1870             s->hevcdsp.weighted_pred_avg(s->sh.chroma_log2_weight_denom,
1871                                          s->sh.chroma_weight_l0[current_mv.ref_idx[0]][1],
1872                                          s->sh.chroma_weight_l1[current_mv.ref_idx[1]][1],
1873                                          s->sh.chroma_offset_l0[current_mv.ref_idx[0]][1],
1874                                          s->sh.chroma_offset_l1[current_mv.ref_idx[1]][1],
1875                                          dst2, s->frame->linesize[2], tmp2, tmp4,
1876                                          tmpstride, nPbW / 2, nPbH / 2);
1877         } else {
1878             s->hevcdsp.put_weighted_pred_avg(dst1, s->frame->linesize[1], tmp, tmp3, tmpstride, nPbW/2, nPbH/2);
1879             s->hevcdsp.put_weighted_pred_avg(dst2, s->frame->linesize[2], tmp2, tmp4, tmpstride, nPbW/2, nPbH/2);
1880         }
1881     }
1882 }
1883
1884 /**
1885  * 8.4.1
1886  */
1887 static int luma_intra_pred_mode(HEVCContext *s, int x0, int y0, int pu_size,
1888                                 int prev_intra_luma_pred_flag)
1889 {
1890     HEVCLocalContext *lc = &s->HEVClc;
1891     int x_pu             = x0 >> s->sps->log2_min_pu_size;
1892     int y_pu             = y0 >> s->sps->log2_min_pu_size;
1893     int min_pu_width     = s->sps->min_pu_width;
1894     int size_in_pus      = pu_size >> s->sps->log2_min_pu_size;
1895     int x0b              = x0 & ((1 << s->sps->log2_ctb_size) - 1);
1896     int y0b              = y0 & ((1 << s->sps->log2_ctb_size) - 1);
1897
1898     int cand_up   = (lc->ctb_up_flag || y0b) ?
1899                     s->tab_ipm[(y_pu - 1) * min_pu_width + x_pu] : INTRA_DC;
1900     int cand_left = (lc->ctb_left_flag || x0b) ?
1901                     s->tab_ipm[y_pu * min_pu_width + x_pu - 1]   : INTRA_DC;
1902
1903     int y_ctb = (y0 >> (s->sps->log2_ctb_size)) << (s->sps->log2_ctb_size);
1904
1905     MvField *tab_mvf = s->ref->tab_mvf;
1906     int intra_pred_mode;
1907     int candidate[3];
1908     int i, j;
1909
1910     // intra_pred_mode prediction does not cross vertical CTB boundaries
1911     if ((y0 - 1) < y_ctb)
1912         cand_up = INTRA_DC;
1913
1914     if (cand_left == cand_up) {
1915         if (cand_left < 2) {
1916             candidate[0] = INTRA_PLANAR;
1917             candidate[1] = INTRA_DC;
1918             candidate[2] = INTRA_ANGULAR_26;
1919         } else {
1920             candidate[0] = cand_left;
1921             candidate[1] = 2 + ((cand_left - 2 - 1 + 32) & 31);
1922             candidate[2] = 2 + ((cand_left - 2 + 1) & 31);
1923         }
1924     } else {
1925         candidate[0] = cand_left;
1926         candidate[1] = cand_up;
1927         if (candidate[0] != INTRA_PLANAR && candidate[1] != INTRA_PLANAR) {
1928             candidate[2] = INTRA_PLANAR;
1929         } else if (candidate[0] != INTRA_DC && candidate[1] != INTRA_DC) {
1930             candidate[2] = INTRA_DC;
1931         } else {
1932             candidate[2] = INTRA_ANGULAR_26;
1933         }
1934     }
1935
1936     if (prev_intra_luma_pred_flag) {
1937         intra_pred_mode = candidate[lc->pu.mpm_idx];
1938     } else {
1939         if (candidate[0] > candidate[1])
1940             FFSWAP(uint8_t, candidate[0], candidate[1]);
1941         if (candidate[0] > candidate[2])
1942             FFSWAP(uint8_t, candidate[0], candidate[2]);
1943         if (candidate[1] > candidate[2])
1944             FFSWAP(uint8_t, candidate[1], candidate[2]);
1945
1946         intra_pred_mode = lc->pu.rem_intra_luma_pred_mode;
1947         for (i = 0; i < 3; i++)
1948             if (intra_pred_mode >= candidate[i])
1949                 intra_pred_mode++;
1950     }
1951
1952     /* write the intra prediction units into the mv array */
1953     if (!size_in_pus)
1954         size_in_pus = 1;
1955     for (i = 0; i < size_in_pus; i++) {
1956         memset(&s->tab_ipm[(y_pu + i) * min_pu_width + x_pu],
1957                intra_pred_mode, size_in_pus);
1958
1959         for (j = 0; j < size_in_pus; j++) {
1960             tab_mvf[(y_pu + j) * min_pu_width + x_pu + i].is_intra     = 1;
1961             tab_mvf[(y_pu + j) * min_pu_width + x_pu + i].pred_flag[0] = 0;
1962             tab_mvf[(y_pu + j) * min_pu_width + x_pu + i].pred_flag[1] = 0;
1963             tab_mvf[(y_pu + j) * min_pu_width + x_pu + i].ref_idx[0]   = 0;
1964             tab_mvf[(y_pu + j) * min_pu_width + x_pu + i].ref_idx[1]   = 0;
1965             tab_mvf[(y_pu + j) * min_pu_width + x_pu + i].mv[0].x      = 0;
1966             tab_mvf[(y_pu + j) * min_pu_width + x_pu + i].mv[0].y      = 0;
1967             tab_mvf[(y_pu + j) * min_pu_width + x_pu + i].mv[1].x      = 0;
1968             tab_mvf[(y_pu + j) * min_pu_width + x_pu + i].mv[1].y      = 0;
1969         }
1970     }
1971
1972     return intra_pred_mode;
1973 }
1974
1975 static av_always_inline void set_ct_depth(HEVCContext *s, int x0, int y0,
1976                                           int log2_cb_size, int ct_depth)
1977 {
1978     int length = (1 << log2_cb_size) >> s->sps->log2_min_cb_size;
1979     int x_cb   = x0 >> s->sps->log2_min_cb_size;
1980     int y_cb   = y0 >> s->sps->log2_min_cb_size;
1981     int y;
1982
1983     for (y = 0; y < length; y++)
1984         memset(&s->tab_ct_depth[(y_cb + y) * s->sps->min_cb_width + x_cb],
1985                ct_depth, length);
1986 }
1987
1988 static void intra_prediction_unit(HEVCContext *s, int x0, int y0,
1989                                   int log2_cb_size)
1990 {
1991     HEVCLocalContext *lc = &s->HEVClc;
1992     static const uint8_t intra_chroma_table[4] = { 0, 26, 10, 1 };
1993     uint8_t prev_intra_luma_pred_flag[4];
1994     int split   = lc->cu.part_mode == PART_NxN;
1995     int pb_size = (1 << log2_cb_size) >> split;
1996     int side    = split + 1;
1997     int chroma_mode;
1998     int i, j;
1999
2000     for (i = 0; i < side; i++)
2001         for (j = 0; j < side; j++)
2002             prev_intra_luma_pred_flag[2 * i + j] = ff_hevc_prev_intra_luma_pred_flag_decode(s);
2003
2004     for (i = 0; i < side; i++) {
2005         for (j = 0; j < side; j++) {
2006             if (prev_intra_luma_pred_flag[2 * i + j])
2007                 lc->pu.mpm_idx = ff_hevc_mpm_idx_decode(s);
2008             else
2009                 lc->pu.rem_intra_luma_pred_mode = ff_hevc_rem_intra_luma_pred_mode_decode(s);
2010
2011             lc->pu.intra_pred_mode[2 * i + j] =
2012                 luma_intra_pred_mode(s, x0 + pb_size * j, y0 + pb_size * i, pb_size,
2013                                      prev_intra_luma_pred_flag[2 * i + j]);
2014         }
2015     }
2016
2017     chroma_mode = ff_hevc_intra_chroma_pred_mode_decode(s);
2018     if (chroma_mode != 4) {
2019         if (lc->pu.intra_pred_mode[0] == intra_chroma_table[chroma_mode])
2020             lc->pu.intra_pred_mode_c = 34;
2021         else
2022             lc->pu.intra_pred_mode_c = intra_chroma_table[chroma_mode];
2023     } else {
2024         lc->pu.intra_pred_mode_c = lc->pu.intra_pred_mode[0];
2025     }
2026 }
2027
2028 static void intra_prediction_unit_default_value(HEVCContext *s,
2029                                                 int x0, int y0,
2030                                                 int log2_cb_size)
2031 {
2032     HEVCLocalContext *lc = &s->HEVClc;
2033     int pb_size          = 1 << log2_cb_size;
2034     int size_in_pus      = pb_size >> s->sps->log2_min_pu_size;
2035     int min_pu_width     = s->sps->min_pu_width;
2036     MvField *tab_mvf     = s->ref->tab_mvf;
2037     int x_pu             = x0 >> s->sps->log2_min_pu_size;
2038     int y_pu             = y0 >> s->sps->log2_min_pu_size;
2039     int j, k;
2040
2041     if (size_in_pus == 0)
2042         size_in_pus = 1;
2043     for (j = 0; j < size_in_pus; j++) {
2044         memset(&s->tab_ipm[(y_pu + j) * min_pu_width + x_pu], INTRA_DC, size_in_pus);
2045         for (k = 0; k < size_in_pus; k++)
2046             tab_mvf[(y_pu + j) * min_pu_width + x_pu + k].is_intra = lc->cu.pred_mode == MODE_INTRA;
2047     }
2048 }
2049
2050 static int hls_coding_unit(HEVCContext *s, int x0, int y0, int log2_cb_size)
2051 {
2052     int cb_size          = 1 << log2_cb_size;
2053     HEVCLocalContext *lc = &s->HEVClc;
2054     int log2_min_cb_size = s->sps->log2_min_cb_size;
2055     int length           = cb_size >> log2_min_cb_size;
2056     int min_cb_width     = s->sps->min_cb_width;
2057     int x_cb             = x0 >> log2_min_cb_size;
2058     int y_cb             = y0 >> log2_min_cb_size;
2059     int x, y, ret;
2060
2061     lc->cu.x                = x0;
2062     lc->cu.y                = y0;
2063     lc->cu.pred_mode        = MODE_INTRA;
2064     lc->cu.part_mode        = PART_2Nx2N;
2065     lc->cu.intra_split_flag = 0;
2066
2067     SAMPLE_CTB(s->skip_flag, x_cb, y_cb) = 0;
2068     for (x = 0; x < 4; x++)
2069         lc->pu.intra_pred_mode[x] = 1;
2070     if (s->pps->transquant_bypass_enable_flag) {
2071         lc->cu.cu_transquant_bypass_flag = ff_hevc_cu_transquant_bypass_flag_decode(s);
2072         if (lc->cu.cu_transquant_bypass_flag)
2073             set_deblocking_bypass(s, x0, y0, log2_cb_size);
2074     } else
2075         lc->cu.cu_transquant_bypass_flag = 0;
2076
2077     if (s->sh.slice_type != I_SLICE) {
2078         uint8_t skip_flag = ff_hevc_skip_flag_decode(s, x0, y0, x_cb, y_cb);
2079
2080         x = y_cb * min_cb_width + x_cb;
2081         for (y = 0; y < length; y++) {
2082             memset(&s->skip_flag[x], skip_flag, length);
2083             x += min_cb_width;
2084         }
2085         lc->cu.pred_mode = skip_flag ? MODE_SKIP : MODE_INTER;
2086     }
2087
2088     if (SAMPLE_CTB(s->skip_flag, x_cb, y_cb)) {
2089         hls_prediction_unit(s, x0, y0, cb_size, cb_size, log2_cb_size, 0);
2090         intra_prediction_unit_default_value(s, x0, y0, log2_cb_size);
2091
2092         if (!s->sh.disable_deblocking_filter_flag)
2093             ff_hevc_deblocking_boundary_strengths(s, x0, y0, log2_cb_size);
2094     } else {
2095         int pcm_flag = 0;
2096
2097         if (s->sh.slice_type != I_SLICE)
2098             lc->cu.pred_mode = ff_hevc_pred_mode_decode(s);
2099         if (lc->cu.pred_mode != MODE_INTRA ||
2100             log2_cb_size == s->sps->log2_min_cb_size) {
2101             lc->cu.part_mode        = ff_hevc_part_mode_decode(s, log2_cb_size);
2102             lc->cu.intra_split_flag = lc->cu.part_mode == PART_NxN &&
2103                                       lc->cu.pred_mode == MODE_INTRA;
2104         }
2105
2106         if (lc->cu.pred_mode == MODE_INTRA) {
2107             if (lc->cu.part_mode == PART_2Nx2N && s->sps->pcm_enabled_flag &&
2108                 log2_cb_size >= s->sps->pcm.log2_min_pcm_cb_size &&
2109                 log2_cb_size <= s->sps->pcm.log2_max_pcm_cb_size) {
2110                 pcm_flag = ff_hevc_pcm_flag_decode(s);
2111             }
2112             if (pcm_flag) {
2113                 intra_prediction_unit_default_value(s, x0, y0, log2_cb_size);
2114                 ret = hls_pcm_sample(s, x0, y0, log2_cb_size);
2115                 if (s->sps->pcm.loop_filter_disable_flag)
2116                     set_deblocking_bypass(s, x0, y0, log2_cb_size);
2117
2118                 if (ret < 0)
2119                     return ret;
2120             } else {
2121                 intra_prediction_unit(s, x0, y0, log2_cb_size);
2122             }
2123         } else {
2124             intra_prediction_unit_default_value(s, x0, y0, log2_cb_size);
2125             switch (lc->cu.part_mode) {
2126             case PART_2Nx2N:
2127                 hls_prediction_unit(s, x0, y0, cb_size, cb_size, log2_cb_size, 0);
2128                 break;
2129             case PART_2NxN:
2130                 hls_prediction_unit(s, x0, y0,               cb_size, cb_size / 2, log2_cb_size, 0);
2131                 hls_prediction_unit(s, x0, y0 + cb_size / 2, cb_size, cb_size / 2, log2_cb_size, 1);
2132                 break;
2133             case PART_Nx2N:
2134                 hls_prediction_unit(s, x0,               y0, cb_size / 2, cb_size, log2_cb_size, 0);
2135                 hls_prediction_unit(s, x0 + cb_size / 2, y0, cb_size / 2, cb_size, log2_cb_size, 1);
2136                 break;
2137             case PART_2NxnU:
2138                 hls_prediction_unit(s, x0, y0,               cb_size, cb_size     / 4, log2_cb_size, 0);
2139                 hls_prediction_unit(s, x0, y0 + cb_size / 4, cb_size, cb_size * 3 / 4, log2_cb_size, 1);
2140                 break;
2141             case PART_2NxnD:
2142                 hls_prediction_unit(s, x0, y0,                   cb_size, cb_size * 3 / 4, log2_cb_size, 0);
2143                 hls_prediction_unit(s, x0, y0 + cb_size * 3 / 4, cb_size, cb_size     / 4, log2_cb_size, 1);
2144                 break;
2145             case PART_nLx2N:
2146                 hls_prediction_unit(s, x0,               y0, cb_size     / 4, cb_size, log2_cb_size, 0);
2147                 hls_prediction_unit(s, x0 + cb_size / 4, y0, cb_size * 3 / 4, cb_size, log2_cb_size, 1);
2148                 break;
2149             case PART_nRx2N:
2150                 hls_prediction_unit(s, x0,                   y0, cb_size * 3 / 4, cb_size, log2_cb_size, 0);
2151                 hls_prediction_unit(s, x0 + cb_size * 3 / 4, y0, cb_size     / 4, cb_size, log2_cb_size, 1);
2152                 break;
2153             case PART_NxN:
2154                 hls_prediction_unit(s, x0,               y0,               cb_size / 2, cb_size / 2, log2_cb_size, 0);
2155                 hls_prediction_unit(s, x0 + cb_size / 2, y0,               cb_size / 2, cb_size / 2, log2_cb_size, 1);
2156                 hls_prediction_unit(s, x0,               y0 + cb_size / 2, cb_size / 2, cb_size / 2, log2_cb_size, 2);
2157                 hls_prediction_unit(s, x0 + cb_size / 2, y0 + cb_size / 2, cb_size / 2, cb_size / 2, log2_cb_size, 3);
2158                 break;
2159             }
2160         }
2161
2162         if (!pcm_flag) {
2163             int rqt_root_cbf = 1;
2164
2165             if (lc->cu.pred_mode != MODE_INTRA &&
2166                 !(lc->cu.part_mode == PART_2Nx2N && lc->pu.merge_flag)) {
2167                 rqt_root_cbf = ff_hevc_no_residual_syntax_flag_decode(s);
2168             }
2169             if (rqt_root_cbf) {
2170                 lc->cu.max_trafo_depth = lc->cu.pred_mode == MODE_INTRA ?
2171                                          s->sps->max_transform_hierarchy_depth_intra + lc->cu.intra_split_flag :
2172                                          s->sps->max_transform_hierarchy_depth_inter;
2173                 ret = hls_transform_tree(s, x0, y0, x0, y0, x0, y0,
2174                                          log2_cb_size,
2175                                          log2_cb_size, 0, 0, 0, 0);
2176                 if (ret < 0)
2177                     return ret;
2178             } else {
2179                 if (!s->sh.disable_deblocking_filter_flag)
2180                     ff_hevc_deblocking_boundary_strengths(s, x0, y0, log2_cb_size);
2181             }
2182         }
2183     }
2184
2185     if (s->pps->cu_qp_delta_enabled_flag && lc->tu.is_cu_qp_delta_coded == 0)
2186         ff_hevc_set_qPy(s, x0, y0, x0, y0, log2_cb_size);
2187
2188     x = y_cb * min_cb_width + x_cb;
2189     for (y = 0; y < length; y++) {
2190         memset(&s->qp_y_tab[x], lc->qp_y, length);
2191         x += min_cb_width;
2192     }
2193
2194     set_ct_depth(s, x0, y0, log2_cb_size, lc->ct.depth);
2195
2196     return 0;
2197 }
2198
2199 static int hls_coding_quadtree(HEVCContext *s, int x0, int y0,
2200                                int log2_cb_size, int cb_depth)
2201 {
2202     HEVCLocalContext *lc = &s->HEVClc;
2203     const int cb_size    = 1 << log2_cb_size;
2204     int split_cu;
2205
2206     lc->ct.depth = cb_depth;
2207     if (x0 + cb_size <= s->sps->width  &&
2208         y0 + cb_size <= s->sps->height &&
2209         log2_cb_size > s->sps->log2_min_cb_size) {
2210         split_cu = ff_hevc_split_coding_unit_flag_decode(s, cb_depth, x0, y0);
2211     } else {
2212         split_cu = (log2_cb_size > s->sps->log2_min_cb_size);
2213     }
2214     if (s->pps->cu_qp_delta_enabled_flag &&
2215         log2_cb_size >= s->sps->log2_ctb_size - s->pps->diff_cu_qp_delta_depth) {
2216         lc->tu.is_cu_qp_delta_coded = 0;
2217         lc->tu.cu_qp_delta          = 0;
2218     }
2219
2220     if (split_cu) {
2221         const int cb_size_split = cb_size >> 1;
2222         const int x1 = x0 + cb_size_split;
2223         const int y1 = y0 + cb_size_split;
2224
2225         log2_cb_size--;
2226         cb_depth++;
2227
2228 #define SUBDIVIDE(x, y)                                                \
2229 do {                                                                   \
2230     if (x < s->sps->width && y < s->sps->height) {                     \
2231         int ret = hls_coding_quadtree(s, x, y, log2_cb_size, cb_depth);\
2232         if (ret < 0)                                                   \
2233             return ret;                                                \
2234     }                                                                  \
2235 } while (0)
2236
2237         SUBDIVIDE(x0, y0);
2238         SUBDIVIDE(x1, y0);
2239         SUBDIVIDE(x0, y1);
2240         SUBDIVIDE(x1, y1);
2241     } else {
2242         int ret = hls_coding_unit(s, x0, y0, log2_cb_size);
2243         if (ret < 0)
2244             return ret;
2245     }
2246
2247     return 0;
2248 }
2249
2250 static void hls_decode_neighbour(HEVCContext *s, int x_ctb, int y_ctb,
2251                                  int ctb_addr_ts)
2252 {
2253     HEVCLocalContext *lc  = &s->HEVClc;
2254     int ctb_size          = 1 << s->sps->log2_ctb_size;
2255     int ctb_addr_rs       = s->pps->ctb_addr_ts_to_rs[ctb_addr_ts];
2256     int ctb_addr_in_slice = ctb_addr_rs - s->sh.slice_addr;
2257
2258     s->tab_slice_address[ctb_addr_rs] = s->sh.slice_addr;
2259
2260     if (s->pps->entropy_coding_sync_enabled_flag) {
2261         if (x_ctb == 0 && (y_ctb & (ctb_size - 1)) == 0)
2262             lc->first_qp_group = 1;
2263         lc->end_of_tiles_x = s->sps->width;
2264     } else if (s->pps->tiles_enabled_flag) {
2265         if (ctb_addr_ts && s->pps->tile_id[ctb_addr_ts] != s->pps->tile_id[ctb_addr_ts - 1]) {
2266             int idxX = s->pps->col_idxX[x_ctb >> s->sps->log2_ctb_size];
2267             lc->start_of_tiles_x = x_ctb;
2268             lc->end_of_tiles_x   = x_ctb + (s->pps->column_width[idxX] << s->sps->log2_ctb_size);
2269             lc->first_qp_group   = 1;
2270         }
2271     } else {
2272         lc->end_of_tiles_x = s->sps->width;
2273     }
2274
2275     lc->end_of_tiles_y = FFMIN(y_ctb + ctb_size, s->sps->height);
2276
2277     lc->boundary_flags = 0;
2278     if (s->pps->tiles_enabled_flag) {
2279         if (x_ctb > 0 && s->pps->tile_id[ctb_addr_ts] != s->pps->tile_id[s->pps->ctb_addr_rs_to_ts[ctb_addr_rs - 1]])
2280             lc->boundary_flags |= BOUNDARY_LEFT_TILE;
2281         if (x_ctb > 0 && s->tab_slice_address[ctb_addr_rs] != s->tab_slice_address[ctb_addr_rs - 1])
2282             lc->boundary_flags |= BOUNDARY_LEFT_SLICE;
2283         if (y_ctb > 0 && s->pps->tile_id[ctb_addr_ts] != s->pps->tile_id[s->pps->ctb_addr_rs_to_ts[ctb_addr_rs - s->sps->ctb_width]])
2284             lc->boundary_flags |= BOUNDARY_UPPER_TILE;
2285         if (y_ctb > 0 && s->tab_slice_address[ctb_addr_rs] != s->tab_slice_address[ctb_addr_rs - s->sps->ctb_width])
2286             lc->boundary_flags |= BOUNDARY_UPPER_SLICE;
2287     } else {
2288         if (!ctb_addr_in_slice > 0)
2289             lc->boundary_flags |= BOUNDARY_LEFT_SLICE;
2290         if (ctb_addr_in_slice < s->sps->ctb_width)
2291             lc->boundary_flags |= BOUNDARY_UPPER_SLICE;
2292     }
2293
2294     lc->ctb_left_flag = ((x_ctb > 0) && (ctb_addr_in_slice > 0) && !(lc->boundary_flags & BOUNDARY_LEFT_TILE));
2295     lc->ctb_up_flag   = ((y_ctb > 0) && (ctb_addr_in_slice >= s->sps->ctb_width) && !(lc->boundary_flags & BOUNDARY_UPPER_TILE));
2296     lc->ctb_up_right_flag = ((y_ctb > 0)  && (ctb_addr_in_slice+1 >= s->sps->ctb_width) && (s->pps->tile_id[ctb_addr_ts] == s->pps->tile_id[s->pps->ctb_addr_rs_to_ts[ctb_addr_rs+1 - s->sps->ctb_width]]));
2297     lc->ctb_up_left_flag = ((x_ctb > 0) && (y_ctb > 0)  && (ctb_addr_in_slice-1 >= s->sps->ctb_width) && (s->pps->tile_id[ctb_addr_ts] == s->pps->tile_id[s->pps->ctb_addr_rs_to_ts[ctb_addr_rs-1 - s->sps->ctb_width]]));
2298 }
2299
2300 static int hls_slice_data(HEVCContext *s)
2301 {
2302     int ctb_size    = 1 << s->sps->log2_ctb_size;
2303     int more_data   = 1;
2304     int x_ctb       = 0;
2305     int y_ctb       = 0;
2306     int ctb_addr_ts = s->pps->ctb_addr_rs_to_ts[s->sh.slice_ctb_addr_rs];
2307     int ret;
2308
2309     while (more_data && ctb_addr_ts < s->sps->ctb_size) {
2310         int ctb_addr_rs = s->pps->ctb_addr_ts_to_rs[ctb_addr_ts];
2311
2312         x_ctb = (ctb_addr_rs % ((s->sps->width + ctb_size - 1) >> s->sps->log2_ctb_size)) << s->sps->log2_ctb_size;
2313         y_ctb = (ctb_addr_rs / ((s->sps->width + ctb_size - 1) >> s->sps->log2_ctb_size)) << s->sps->log2_ctb_size;
2314         hls_decode_neighbour(s, x_ctb, y_ctb, ctb_addr_ts);
2315
2316         ff_hevc_cabac_init(s, ctb_addr_ts);
2317
2318         hls_sao_param(s, x_ctb >> s->sps->log2_ctb_size, y_ctb >> s->sps->log2_ctb_size);
2319
2320         s->deblock[ctb_addr_rs].beta_offset = s->sh.beta_offset;
2321         s->deblock[ctb_addr_rs].tc_offset   = s->sh.tc_offset;
2322         s->filter_slice_edges[ctb_addr_rs]  = s->sh.slice_loop_filter_across_slices_enabled_flag;
2323
2324         ret = hls_coding_quadtree(s, x_ctb, y_ctb, s->sps->log2_ctb_size, 0);
2325         if (ret < 0)
2326             return ret;
2327         more_data = !ff_hevc_end_of_slice_flag_decode(s);
2328
2329         ctb_addr_ts++;
2330         ff_hevc_save_states(s, ctb_addr_ts);
2331         ff_hevc_hls_filters(s, x_ctb, y_ctb, ctb_size);
2332     }
2333
2334     if (x_ctb + ctb_size >= s->sps->width &&
2335         y_ctb + ctb_size >= s->sps->height)
2336         ff_hevc_hls_filter(s, x_ctb, y_ctb);
2337
2338     return ctb_addr_ts;
2339 }
2340
2341 /**
2342  * @return AVERROR_INVALIDDATA if the packet is not a valid NAL unit,
2343  * 0 if the unit should be skipped, 1 otherwise
2344  */
2345 static int hls_nal_unit(HEVCContext *s)
2346 {
2347     GetBitContext *gb = &s->HEVClc.gb;
2348     int nuh_layer_id;
2349
2350     if (get_bits1(gb) != 0)
2351         return AVERROR_INVALIDDATA;
2352
2353     s->nal_unit_type = get_bits(gb, 6);
2354
2355     nuh_layer_id   = get_bits(gb, 6);
2356     s->temporal_id = get_bits(gb, 3) - 1;
2357     if (s->temporal_id < 0)
2358         return AVERROR_INVALIDDATA;
2359
2360     av_log(s->avctx, AV_LOG_DEBUG,
2361            "nal_unit_type: %d, nuh_layer_id: %dtemporal_id: %d\n",
2362            s->nal_unit_type, nuh_layer_id, s->temporal_id);
2363
2364     return nuh_layer_id == 0;
2365 }
2366
2367 static void restore_tqb_pixels(HEVCContext *s)
2368 {
2369     int min_pu_size = 1 << s->sps->log2_min_pu_size;
2370     int x, y, c_idx;
2371
2372     for (c_idx = 0; c_idx < 3; c_idx++) {
2373         ptrdiff_t stride = s->frame->linesize[c_idx];
2374         int hshift       = s->sps->hshift[c_idx];
2375         int vshift       = s->sps->vshift[c_idx];
2376         for (y = 0; y < s->sps->min_pu_height; y++) {
2377             for (x = 0; x < s->sps->min_pu_width; x++) {
2378                 if (s->is_pcm[y * s->sps->min_pu_width + x]) {
2379                     int n;
2380                     int len      = min_pu_size >> hshift;
2381                     uint8_t *src = &s->frame->data[c_idx][((y << s->sps->log2_min_pu_size) >> vshift) * stride + (((x << s->sps->log2_min_pu_size) >> hshift) << s->sps->pixel_shift)];
2382                     uint8_t *dst = &s->sao_frame->data[c_idx][((y << s->sps->log2_min_pu_size) >> vshift) * stride + (((x << s->sps->log2_min_pu_size) >> hshift) << s->sps->pixel_shift)];
2383                     for (n = 0; n < (min_pu_size >> vshift); n++) {
2384                         memcpy(dst, src, len);
2385                         src += stride;
2386                         dst += stride;
2387                     }
2388                 }
2389             }
2390         }
2391     }
2392 }
2393
2394 static int set_side_data(HEVCContext *s)
2395 {
2396     AVFrame *out = s->ref->frame;
2397
2398     if (s->sei_frame_packing_present &&
2399         s->frame_packing_arrangement_type >= 3 &&
2400         s->frame_packing_arrangement_type <= 5 &&
2401         s->content_interpretation_type > 0 &&
2402         s->content_interpretation_type < 3) {
2403         AVStereo3D *stereo = av_stereo3d_create_side_data(out);
2404         if (!stereo)
2405             return AVERROR(ENOMEM);
2406
2407         switch (s->frame_packing_arrangement_type) {
2408         case 3:
2409             if (s->quincunx_subsampling)
2410                 stereo->type = AV_STEREO3D_SIDEBYSIDE_QUINCUNX;
2411             else
2412                 stereo->type = AV_STEREO3D_SIDEBYSIDE;
2413             break;
2414         case 4:
2415             stereo->type = AV_STEREO3D_TOPBOTTOM;
2416             break;
2417         case 5:
2418             stereo->type = AV_STEREO3D_FRAMESEQUENCE;
2419             break;
2420         }
2421
2422         if (s->content_interpretation_type == 2)
2423             stereo->flags = AV_STEREO3D_FLAG_INVERT;
2424     }
2425
2426     if (s->sei_display_orientation_present &&
2427         (s->sei_anticlockwise_rotation || s->sei_hflip || s->sei_vflip)) {
2428         double angle = s->sei_anticlockwise_rotation * 360 / (double) (1 << 16);
2429         AVFrameSideData *rotation = av_frame_new_side_data(out,
2430                                                            AV_FRAME_DATA_DISPLAYMATRIX,
2431                                                            sizeof(int32_t) * 9);
2432         if (!rotation)
2433             return AVERROR(ENOMEM);
2434
2435         av_display_rotation_set((int32_t *)rotation->data, angle);
2436         av_display_matrix_flip((int32_t *)rotation->data,
2437                                s->sei_hflip, s->sei_vflip);
2438     }
2439
2440     return 0;
2441 }
2442
2443 static int hevc_frame_start(HEVCContext *s)
2444 {
2445     HEVCLocalContext *lc = &s->HEVClc;
2446     int ret;
2447
2448     memset(s->horizontal_bs, 0, 2 * s->bs_width * (s->bs_height + 1));
2449     memset(s->vertical_bs,   0, 2 * s->bs_width * (s->bs_height + 1));
2450     memset(s->cbf_luma,      0, s->sps->min_tb_width * s->sps->min_tb_height);
2451     memset(s->is_pcm,        0, s->sps->min_pu_width * s->sps->min_pu_height);
2452
2453     lc->start_of_tiles_x = 0;
2454     s->is_decoded        = 0;
2455     s->first_nal_type    = s->nal_unit_type;
2456
2457     if (s->pps->tiles_enabled_flag)
2458         lc->end_of_tiles_x = s->pps->column_width[0] << s->sps->log2_ctb_size;
2459
2460     ret = ff_hevc_set_new_ref(s, s->sps->sao_enabled ? &s->sao_frame : &s->frame,
2461                               s->poc);
2462     if (ret < 0)
2463         goto fail;
2464
2465     ret = ff_hevc_frame_rps(s);
2466     if (ret < 0) {
2467         av_log(s->avctx, AV_LOG_ERROR, "Error constructing the frame RPS.\n");
2468         goto fail;
2469     }
2470
2471     s->ref->frame->key_frame = IS_IRAP(s);
2472
2473     ret = set_side_data(s);
2474     if (ret < 0)
2475         goto fail;
2476
2477     av_frame_unref(s->output_frame);
2478     ret = ff_hevc_output_frame(s, s->output_frame, 0);
2479     if (ret < 0)
2480         goto fail;
2481
2482     ff_thread_finish_setup(s->avctx);
2483
2484     return 0;
2485
2486 fail:
2487     if (s->ref)
2488         ff_hevc_unref_frame(s, s->ref, ~0);
2489     s->ref = NULL;
2490     return ret;
2491 }
2492
2493 static int decode_nal_unit(HEVCContext *s, const HEVCNAL *nal)
2494 {
2495     HEVCLocalContext *lc = &s->HEVClc;
2496     GetBitContext *gb    = &lc->gb;
2497     int ctb_addr_ts, ret;
2498
2499     ret = init_get_bits8(gb, nal->data, nal->size);
2500     if (ret < 0)
2501         return ret;
2502
2503     ret = hls_nal_unit(s);
2504     if (ret < 0) {
2505         av_log(s->avctx, AV_LOG_ERROR, "Invalid NAL unit %d, skipping.\n",
2506                s->nal_unit_type);
2507         goto fail;
2508     } else if (!ret)
2509         return 0;
2510
2511     switch (s->nal_unit_type) {
2512     case NAL_VPS:
2513         ret = ff_hevc_decode_nal_vps(s);
2514         if (ret < 0)
2515             goto fail;
2516         break;
2517     case NAL_SPS:
2518         ret = ff_hevc_decode_nal_sps(s);
2519         if (ret < 0)
2520             goto fail;
2521         break;
2522     case NAL_PPS:
2523         ret = ff_hevc_decode_nal_pps(s);
2524         if (ret < 0)
2525             goto fail;
2526         break;
2527     case NAL_SEI_PREFIX:
2528     case NAL_SEI_SUFFIX:
2529         ret = ff_hevc_decode_nal_sei(s);
2530         if (ret < 0)
2531             goto fail;
2532         break;
2533     case NAL_TRAIL_R:
2534     case NAL_TRAIL_N:
2535     case NAL_TSA_N:
2536     case NAL_TSA_R:
2537     case NAL_STSA_N:
2538     case NAL_STSA_R:
2539     case NAL_BLA_W_LP:
2540     case NAL_BLA_W_RADL:
2541     case NAL_BLA_N_LP:
2542     case NAL_IDR_W_RADL:
2543     case NAL_IDR_N_LP:
2544     case NAL_CRA_NUT:
2545     case NAL_RADL_N:
2546     case NAL_RADL_R:
2547     case NAL_RASL_N:
2548     case NAL_RASL_R:
2549         ret = hls_slice_header(s);
2550         if (ret < 0)
2551             return ret;
2552
2553         if (s->max_ra == INT_MAX) {
2554             if (s->nal_unit_type == NAL_CRA_NUT || IS_BLA(s)) {
2555                 s->max_ra = s->poc;
2556             } else {
2557                 if (IS_IDR(s))
2558                     s->max_ra = INT_MIN;
2559             }
2560         }
2561
2562         if ((s->nal_unit_type == NAL_RASL_R || s->nal_unit_type == NAL_RASL_N) &&
2563             s->poc <= s->max_ra) {
2564             s->is_decoded = 0;
2565             break;
2566         } else {
2567             if (s->nal_unit_type == NAL_RASL_R && s->poc > s->max_ra)
2568                 s->max_ra = INT_MIN;
2569         }
2570
2571         if (s->sh.first_slice_in_pic_flag) {
2572             ret = hevc_frame_start(s);
2573             if (ret < 0)
2574                 return ret;
2575         } else if (!s->ref) {
2576             av_log(s->avctx, AV_LOG_ERROR, "First slice in a frame missing.\n");
2577             goto fail;
2578         }
2579
2580         if (s->nal_unit_type != s->first_nal_type) {
2581             av_log(s->avctx, AV_LOG_ERROR,
2582                    "Non-matching NAL types of the VCL NALUs: %d %d\n",
2583                    s->first_nal_type, s->nal_unit_type);
2584             return AVERROR_INVALIDDATA;
2585         }
2586
2587         if (!s->sh.dependent_slice_segment_flag &&
2588             s->sh.slice_type != I_SLICE) {
2589             ret = ff_hevc_slice_rpl(s);
2590             if (ret < 0) {
2591                 av_log(s->avctx, AV_LOG_WARNING,
2592                        "Error constructing the reference lists for the current slice.\n");
2593                 goto fail;
2594             }
2595         }
2596
2597         if (s->sh.first_slice_in_pic_flag && s->avctx->hwaccel) {
2598             ret = s->avctx->hwaccel->start_frame(s->avctx, NULL, 0);
2599             if (ret < 0)
2600                 goto fail;
2601         }
2602
2603         if (s->avctx->hwaccel) {
2604             ret = s->avctx->hwaccel->decode_slice(s->avctx, nal->raw_data, nal->raw_size);
2605             if (ret < 0)
2606                 goto fail;
2607         } else {
2608             ctb_addr_ts = hls_slice_data(s);
2609             if (ctb_addr_ts >= (s->sps->ctb_width * s->sps->ctb_height)) {
2610                 s->is_decoded = 1;
2611                 if ((s->pps->transquant_bypass_enable_flag ||
2612                      (s->sps->pcm.loop_filter_disable_flag && s->sps->pcm_enabled_flag)) &&
2613                     s->sps->sao_enabled)
2614                     restore_tqb_pixels(s);
2615             }
2616
2617             if (ctb_addr_ts < 0) {
2618                 ret = ctb_addr_ts;
2619                 goto fail;
2620             }
2621         }
2622         break;
2623     case NAL_EOS_NUT:
2624     case NAL_EOB_NUT:
2625         s->seq_decode = (s->seq_decode + 1) & 0xff;
2626         s->max_ra     = INT_MAX;
2627         break;
2628     case NAL_AUD:
2629     case NAL_FD_NUT:
2630         break;
2631     default:
2632         av_log(s->avctx, AV_LOG_INFO,
2633                "Skipping NAL unit %d\n", s->nal_unit_type);
2634     }
2635
2636     return 0;
2637 fail:
2638     if (s->avctx->err_recognition & AV_EF_EXPLODE)
2639         return ret;
2640     return 0;
2641 }
2642
2643 /* FIXME: This is adapted from ff_h264_decode_nal, avoiding duplication
2644  * between these functions would be nice. */
2645 static int extract_rbsp(const uint8_t *src, int length,
2646                         HEVCNAL *nal)
2647 {
2648     int i, si, di;
2649     uint8_t *dst;
2650
2651 #define STARTCODE_TEST                                                  \
2652         if (i + 2 < length && src[i + 1] == 0 && src[i + 2] <= 3) {     \
2653             if (src[i + 2] != 3) {                                      \
2654                 /* startcode, so we must be past the end */             \
2655                 length = i;                                             \
2656             }                                                           \
2657             break;                                                      \
2658         }
2659 #if HAVE_FAST_UNALIGNED
2660 #define FIND_FIRST_ZERO                                                 \
2661         if (i > 0 && !src[i])                                           \
2662             i--;                                                        \
2663         while (src[i])                                                  \
2664             i++
2665 #if HAVE_FAST_64BIT
2666     for (i = 0; i + 1 < length; i += 9) {
2667         if (!((~AV_RN64A(src + i) &
2668                (AV_RN64A(src + i) - 0x0100010001000101ULL)) &
2669               0x8000800080008080ULL))
2670             continue;
2671         FIND_FIRST_ZERO;
2672         STARTCODE_TEST;
2673         i -= 7;
2674     }
2675 #else
2676     for (i = 0; i + 1 < length; i += 5) {
2677         if (!((~AV_RN32A(src + i) &
2678                (AV_RN32A(src + i) - 0x01000101U)) &
2679               0x80008080U))
2680             continue;
2681         FIND_FIRST_ZERO;
2682         STARTCODE_TEST;
2683         i -= 3;
2684     }
2685 #endif /* HAVE_FAST_64BIT */
2686 #else
2687     for (i = 0; i + 1 < length; i += 2) {
2688         if (src[i])
2689             continue;
2690         if (i > 0 && src[i - 1] == 0)
2691             i--;
2692         STARTCODE_TEST;
2693     }
2694 #endif /* HAVE_FAST_UNALIGNED */
2695
2696     if (i >= length - 1) { // no escaped 0
2697         nal->data     =
2698         nal->raw_data = src;
2699         nal->size     =
2700         nal->raw_size = length;
2701         return length;
2702     }
2703
2704     av_fast_malloc(&nal->rbsp_buffer, &nal->rbsp_buffer_size,
2705                    length + FF_INPUT_BUFFER_PADDING_SIZE);
2706     if (!nal->rbsp_buffer)
2707         return AVERROR(ENOMEM);
2708
2709     dst = nal->rbsp_buffer;
2710
2711     memcpy(dst, src, i);
2712     si = di = i;
2713     while (si + 2 < length) {
2714         // remove escapes (very rare 1:2^22)
2715         if (src[si + 2] > 3) {
2716             dst[di++] = src[si++];
2717             dst[di++] = src[si++];
2718         } else if (src[si] == 0 && src[si + 1] == 0) {
2719             if (src[si + 2] == 3) { // escape
2720                 dst[di++] = 0;
2721                 dst[di++] = 0;
2722                 si       += 3;
2723
2724                 continue;
2725             } else // next start code
2726                 goto nsc;
2727         }
2728
2729         dst[di++] = src[si++];
2730     }
2731     while (si < length)
2732         dst[di++] = src[si++];
2733
2734 nsc:
2735     memset(dst + di, 0, FF_INPUT_BUFFER_PADDING_SIZE);
2736
2737     nal->data = dst;
2738     nal->size = di;
2739     nal->raw_data = src;
2740     nal->raw_size = si;
2741     return si;
2742 }
2743
2744 static int decode_nal_units(HEVCContext *s, const uint8_t *buf, int length)
2745 {
2746     int i, consumed, ret = 0;
2747
2748     s->ref = NULL;
2749     s->eos = 0;
2750
2751     /* split the input packet into NAL units, so we know the upper bound on the
2752      * number of slices in the frame */
2753     s->nb_nals = 0;
2754     while (length >= 4) {
2755         HEVCNAL *nal;
2756         int extract_length = 0;
2757
2758         if (s->is_nalff) {
2759             int i;
2760             for (i = 0; i < s->nal_length_size; i++)
2761                 extract_length = (extract_length << 8) | buf[i];
2762             buf    += s->nal_length_size;
2763             length -= s->nal_length_size;
2764
2765             if (extract_length > length) {
2766                 av_log(s->avctx, AV_LOG_ERROR, "Invalid NAL unit size.\n");
2767                 ret = AVERROR_INVALIDDATA;
2768                 goto fail;
2769             }
2770         } else {
2771             if (buf[2] == 0) {
2772                 length--;
2773                 buf++;
2774                 continue;
2775             }
2776             if (buf[0] != 0 || buf[1] != 0 || buf[2] != 1) {
2777                 ret = AVERROR_INVALIDDATA;
2778                 goto fail;
2779             }
2780
2781             buf           += 3;
2782             length        -= 3;
2783             extract_length = length;
2784         }
2785
2786         if (s->nals_allocated < s->nb_nals + 1) {
2787             int new_size = s->nals_allocated + 1;
2788             HEVCNAL *tmp = av_realloc_array(s->nals, new_size, sizeof(*tmp));
2789             if (!tmp) {
2790                 ret = AVERROR(ENOMEM);
2791                 goto fail;
2792             }
2793             s->nals = tmp;
2794             memset(s->nals + s->nals_allocated, 0,
2795                    (new_size - s->nals_allocated) * sizeof(*tmp));
2796             s->nals_allocated = new_size;
2797         }
2798         nal = &s->nals[s->nb_nals++];
2799
2800         consumed = extract_rbsp(buf, extract_length, nal);
2801         if (consumed < 0) {
2802             ret = consumed;
2803             goto fail;
2804         }
2805
2806         ret = init_get_bits8(&s->HEVClc.gb, nal->data, nal->size);
2807         if (ret < 0)
2808             goto fail;
2809         hls_nal_unit(s);
2810
2811         if (s->nal_unit_type == NAL_EOB_NUT ||
2812             s->nal_unit_type == NAL_EOS_NUT)
2813             s->eos = 1;
2814
2815         buf    += consumed;
2816         length -= consumed;
2817     }
2818
2819     /* parse the NAL units */
2820     for (i = 0; i < s->nb_nals; i++) {
2821         ret = decode_nal_unit(s, &s->nals[i]);
2822         if (ret < 0) {
2823             av_log(s->avctx, AV_LOG_WARNING,
2824                    "Error parsing NAL unit #%d.\n", i);
2825             goto fail;
2826         }
2827     }
2828
2829 fail:
2830     if (s->ref)
2831         ff_thread_report_progress(&s->ref->tf, INT_MAX, 0);
2832
2833     return ret;
2834 }
2835
2836 static void print_md5(void *log_ctx, int level, uint8_t md5[16])
2837 {
2838     int i;
2839     for (i = 0; i < 16; i++)
2840         av_log(log_ctx, level, "%02"PRIx8, md5[i]);
2841 }
2842
2843 static int verify_md5(HEVCContext *s, AVFrame *frame)
2844 {
2845     const AVPixFmtDescriptor *desc = av_pix_fmt_desc_get(frame->format);
2846     int pixel_shift;
2847     int i, j;
2848
2849     if (!desc)
2850         return AVERROR(EINVAL);
2851
2852     pixel_shift = desc->comp[0].depth_minus1 > 7;
2853
2854     av_log(s->avctx, AV_LOG_DEBUG, "Verifying checksum for frame with POC %d: ",
2855            s->poc);
2856
2857     /* the checksums are LE, so we have to byteswap for >8bpp formats
2858      * on BE arches */
2859 #if HAVE_BIGENDIAN
2860     if (pixel_shift && !s->checksum_buf) {
2861         av_fast_malloc(&s->checksum_buf, &s->checksum_buf_size,
2862                        FFMAX3(frame->linesize[0], frame->linesize[1],
2863                               frame->linesize[2]));
2864         if (!s->checksum_buf)
2865             return AVERROR(ENOMEM);
2866     }
2867 #endif
2868
2869     for (i = 0; frame->data[i]; i++) {
2870         int width  = s->avctx->coded_width;
2871         int height = s->avctx->coded_height;
2872         int w = (i == 1 || i == 2) ? (width  >> desc->log2_chroma_w) : width;
2873         int h = (i == 1 || i == 2) ? (height >> desc->log2_chroma_h) : height;
2874         uint8_t md5[16];
2875
2876         av_md5_init(s->md5_ctx);
2877         for (j = 0; j < h; j++) {
2878             const uint8_t *src = frame->data[i] + j * frame->linesize[i];
2879 #if HAVE_BIGENDIAN
2880             if (pixel_shift) {
2881                 s->bdsp.bswap16_buf((uint16_t *) s->checksum_buf,
2882                                     (const uint16_t *) src, w);
2883                 src = s->checksum_buf;
2884             }
2885 #endif
2886             av_md5_update(s->md5_ctx, src, w << pixel_shift);
2887         }
2888         av_md5_final(s->md5_ctx, md5);
2889
2890         if (!memcmp(md5, s->md5[i], 16)) {
2891             av_log   (s->avctx, AV_LOG_DEBUG, "plane %d - correct ", i);
2892             print_md5(s->avctx, AV_LOG_DEBUG, md5);
2893             av_log   (s->avctx, AV_LOG_DEBUG, "; ");
2894         } else {
2895             av_log   (s->avctx, AV_LOG_ERROR, "mismatching checksum of plane %d - ", i);
2896             print_md5(s->avctx, AV_LOG_ERROR, md5);
2897             av_log   (s->avctx, AV_LOG_ERROR, " != ");
2898             print_md5(s->avctx, AV_LOG_ERROR, s->md5[i]);
2899             av_log   (s->avctx, AV_LOG_ERROR, "\n");
2900             return AVERROR_INVALIDDATA;
2901         }
2902     }
2903
2904     av_log(s->avctx, AV_LOG_DEBUG, "\n");
2905
2906     return 0;
2907 }
2908
2909 static int hevc_decode_frame(AVCodecContext *avctx, void *data, int *got_output,
2910                              AVPacket *avpkt)
2911 {
2912     int ret;
2913     HEVCContext *s = avctx->priv_data;
2914
2915     if (!avpkt->size) {
2916         ret = ff_hevc_output_frame(s, data, 1);
2917         if (ret < 0)
2918             return ret;
2919
2920         *got_output = ret;
2921         return 0;
2922     }
2923
2924     s->ref = NULL;
2925     ret    = decode_nal_units(s, avpkt->data, avpkt->size);
2926     if (ret < 0)
2927         return ret;
2928
2929     if (avctx->hwaccel) {
2930         if (s->ref && avctx->hwaccel->end_frame(avctx) < 0)
2931             av_log(avctx, AV_LOG_ERROR,
2932                    "hardware accelerator failed to decode picture\n");
2933     } else {
2934         /* verify the SEI checksum */
2935         if (avctx->err_recognition & AV_EF_CRCCHECK && s->is_decoded &&
2936             s->is_md5) {
2937             ret = verify_md5(s, s->ref->frame);
2938             if (ret < 0 && avctx->err_recognition & AV_EF_EXPLODE) {
2939                 ff_hevc_unref_frame(s, s->ref, ~0);
2940                 return ret;
2941             }
2942         }
2943     }
2944     s->is_md5 = 0;
2945
2946     if (s->is_decoded) {
2947         av_log(avctx, AV_LOG_DEBUG, "Decoded frame with POC %d.\n", s->poc);
2948         s->is_decoded = 0;
2949     }
2950
2951     if (s->output_frame->buf[0]) {
2952         av_frame_move_ref(data, s->output_frame);
2953         *got_output = 1;
2954     }
2955
2956     return avpkt->size;
2957 }
2958
2959 static int hevc_ref_frame(HEVCContext *s, HEVCFrame *dst, HEVCFrame *src)
2960 {
2961     int ret = ff_thread_ref_frame(&dst->tf, &src->tf);
2962     if (ret < 0)
2963         return ret;
2964
2965     dst->tab_mvf_buf = av_buffer_ref(src->tab_mvf_buf);
2966     if (!dst->tab_mvf_buf)
2967         goto fail;
2968     dst->tab_mvf = src->tab_mvf;
2969
2970     dst->rpl_tab_buf = av_buffer_ref(src->rpl_tab_buf);
2971     if (!dst->rpl_tab_buf)
2972         goto fail;
2973     dst->rpl_tab = src->rpl_tab;
2974
2975     dst->rpl_buf = av_buffer_ref(src->rpl_buf);
2976     if (!dst->rpl_buf)
2977         goto fail;
2978
2979     dst->poc        = src->poc;
2980     dst->ctb_count  = src->ctb_count;
2981     dst->window     = src->window;
2982     dst->flags      = src->flags;
2983     dst->sequence   = src->sequence;
2984
2985     if (src->hwaccel_picture_private) {
2986         dst->hwaccel_priv_buf = av_buffer_ref(src->hwaccel_priv_buf);
2987         if (!dst->hwaccel_priv_buf)
2988             goto fail;
2989         dst->hwaccel_picture_private = dst->hwaccel_priv_buf->data;
2990     }
2991
2992     return 0;
2993 fail:
2994     ff_hevc_unref_frame(s, dst, ~0);
2995     return AVERROR(ENOMEM);
2996 }
2997
2998 static av_cold int hevc_decode_free(AVCodecContext *avctx)
2999 {
3000     HEVCContext       *s = avctx->priv_data;
3001     int i;
3002
3003     pic_arrays_free(s);
3004
3005     av_freep(&s->md5_ctx);
3006
3007     av_frame_free(&s->tmp_frame);
3008     av_frame_free(&s->output_frame);
3009
3010     for (i = 0; i < FF_ARRAY_ELEMS(s->DPB); i++) {
3011         ff_hevc_unref_frame(s, &s->DPB[i], ~0);
3012         av_frame_free(&s->DPB[i].frame);
3013     }
3014
3015     for (i = 0; i < FF_ARRAY_ELEMS(s->vps_list); i++)
3016         av_buffer_unref(&s->vps_list[i]);
3017     for (i = 0; i < FF_ARRAY_ELEMS(s->sps_list); i++)
3018         av_buffer_unref(&s->sps_list[i]);
3019     for (i = 0; i < FF_ARRAY_ELEMS(s->pps_list); i++)
3020         av_buffer_unref(&s->pps_list[i]);
3021
3022     for (i = 0; i < s->nals_allocated; i++)
3023         av_freep(&s->nals[i].rbsp_buffer);
3024     av_freep(&s->nals);
3025     s->nals_allocated = 0;
3026
3027     return 0;
3028 }
3029
3030 static av_cold int hevc_init_context(AVCodecContext *avctx)
3031 {
3032     HEVCContext *s = avctx->priv_data;
3033     int i;
3034
3035     s->avctx = avctx;
3036
3037     s->tmp_frame = av_frame_alloc();
3038     if (!s->tmp_frame)
3039         goto fail;
3040
3041     s->output_frame = av_frame_alloc();
3042     if (!s->output_frame)
3043         goto fail;
3044
3045     for (i = 0; i < FF_ARRAY_ELEMS(s->DPB); i++) {
3046         s->DPB[i].frame = av_frame_alloc();
3047         if (!s->DPB[i].frame)
3048             goto fail;
3049         s->DPB[i].tf.f = s->DPB[i].frame;
3050     }
3051
3052     s->max_ra = INT_MAX;
3053
3054     s->md5_ctx = av_md5_alloc();
3055     if (!s->md5_ctx)
3056         goto fail;
3057
3058     ff_bswapdsp_init(&s->bdsp);
3059
3060     s->context_initialized = 1;
3061
3062     return 0;
3063
3064 fail:
3065     hevc_decode_free(avctx);
3066     return AVERROR(ENOMEM);
3067 }
3068
3069 static int hevc_update_thread_context(AVCodecContext *dst,
3070                                       const AVCodecContext *src)
3071 {
3072     HEVCContext *s  = dst->priv_data;
3073     HEVCContext *s0 = src->priv_data;
3074     int i, ret;
3075
3076     if (!s->context_initialized) {
3077         ret = hevc_init_context(dst);
3078         if (ret < 0)
3079             return ret;
3080     }
3081
3082     for (i = 0; i < FF_ARRAY_ELEMS(s->DPB); i++) {
3083         ff_hevc_unref_frame(s, &s->DPB[i], ~0);
3084         if (s0->DPB[i].frame->buf[0]) {
3085             ret = hevc_ref_frame(s, &s->DPB[i], &s0->DPB[i]);
3086             if (ret < 0)
3087                 return ret;
3088         }
3089     }
3090
3091     for (i = 0; i < FF_ARRAY_ELEMS(s->vps_list); i++) {
3092         av_buffer_unref(&s->vps_list[i]);
3093         if (s0->vps_list[i]) {
3094             s->vps_list[i] = av_buffer_ref(s0->vps_list[i]);
3095             if (!s->vps_list[i])
3096                 return AVERROR(ENOMEM);
3097         }
3098     }
3099
3100     for (i = 0; i < FF_ARRAY_ELEMS(s->sps_list); i++) {
3101         av_buffer_unref(&s->sps_list[i]);
3102         if (s0->sps_list[i]) {
3103             s->sps_list[i] = av_buffer_ref(s0->sps_list[i]);
3104             if (!s->sps_list[i])
3105                 return AVERROR(ENOMEM);
3106         }
3107     }
3108
3109     for (i = 0; i < FF_ARRAY_ELEMS(s->pps_list); i++) {
3110         av_buffer_unref(&s->pps_list[i]);
3111         if (s0->pps_list[i]) {
3112             s->pps_list[i] = av_buffer_ref(s0->pps_list[i]);
3113             if (!s->pps_list[i])
3114                 return AVERROR(ENOMEM);
3115         }
3116     }
3117
3118     if (s->sps != s0->sps)
3119         ret = set_sps(s, s0->sps);
3120
3121     s->seq_decode = s0->seq_decode;
3122     s->seq_output = s0->seq_output;
3123     s->pocTid0    = s0->pocTid0;
3124     s->max_ra     = s0->max_ra;
3125
3126     s->is_nalff        = s0->is_nalff;
3127     s->nal_length_size = s0->nal_length_size;
3128
3129     if (s0->eos) {
3130         s->seq_decode = (s->seq_decode + 1) & 0xff;
3131         s->max_ra = INT_MAX;
3132     }
3133
3134     return 0;
3135 }
3136
3137 static int hevc_decode_extradata(HEVCContext *s)
3138 {
3139     AVCodecContext *avctx = s->avctx;
3140     GetByteContext gb;
3141     int ret, i;
3142
3143     bytestream2_init(&gb, avctx->extradata, avctx->extradata_size);
3144
3145     if (avctx->extradata_size > 3 &&
3146         (avctx->extradata[0] || avctx->extradata[1] ||
3147          avctx->extradata[2] > 1)) {
3148         /* It seems the extradata is encoded as hvcC format.
3149          * Temporarily, we support configurationVersion==0 until 14496-15 3rd
3150          * is finalized. When finalized, configurationVersion will be 1 and we
3151          * can recognize hvcC by checking if avctx->extradata[0]==1 or not. */
3152         int i, j, num_arrays, nal_len_size;
3153
3154         s->is_nalff = 1;
3155
3156         bytestream2_skip(&gb, 21);
3157         nal_len_size = (bytestream2_get_byte(&gb) & 3) + 1;
3158         num_arrays   = bytestream2_get_byte(&gb);
3159
3160         /* nal units in the hvcC always have length coded with 2 bytes,
3161          * so put a fake nal_length_size = 2 while parsing them */
3162         s->nal_length_size = 2;
3163
3164         /* Decode nal units from hvcC. */
3165         for (i = 0; i < num_arrays; i++) {
3166             int type = bytestream2_get_byte(&gb) & 0x3f;
3167             int cnt  = bytestream2_get_be16(&gb);
3168
3169             for (j = 0; j < cnt; j++) {
3170                 // +2 for the nal size field
3171                 int nalsize = bytestream2_peek_be16(&gb) + 2;
3172                 if (bytestream2_get_bytes_left(&gb) < nalsize) {
3173                     av_log(s->avctx, AV_LOG_ERROR,
3174                            "Invalid NAL unit size in extradata.\n");
3175                     return AVERROR_INVALIDDATA;
3176                 }
3177
3178                 ret = decode_nal_units(s, gb.buffer, nalsize);
3179                 if (ret < 0) {
3180                     av_log(avctx, AV_LOG_ERROR,
3181                            "Decoding nal unit %d %d from hvcC failed\n",
3182                            type, i);
3183                     return ret;
3184                 }
3185                 bytestream2_skip(&gb, nalsize);
3186             }
3187         }
3188
3189         /* Now store right nal length size, that will be used to parse
3190          * all other nals */
3191         s->nal_length_size = nal_len_size;
3192     } else {
3193         s->is_nalff = 0;
3194         ret = decode_nal_units(s, avctx->extradata, avctx->extradata_size);
3195         if (ret < 0)
3196             return ret;
3197     }
3198
3199     /* export stream parameters from the first SPS */
3200     for (i = 0; i < FF_ARRAY_ELEMS(s->sps_list); i++) {
3201         if (s->sps_list[i]) {
3202             const HEVCSPS *sps = (const HEVCSPS*)s->sps_list[i]->data;
3203             export_stream_params(s->avctx, s, sps);
3204             break;
3205         }
3206     }
3207
3208     return 0;
3209 }
3210
3211 static av_cold int hevc_decode_init(AVCodecContext *avctx)
3212 {
3213     HEVCContext *s = avctx->priv_data;
3214     int ret;
3215
3216     ff_init_cabac_states();
3217
3218     avctx->internal->allocate_progress = 1;
3219
3220     ret = hevc_init_context(avctx);
3221     if (ret < 0)
3222         return ret;
3223
3224     if (avctx->extradata_size > 0 && avctx->extradata) {
3225         ret = hevc_decode_extradata(s);
3226         if (ret < 0) {
3227             hevc_decode_free(avctx);
3228             return ret;
3229         }
3230     }
3231
3232     return 0;
3233 }
3234
3235 static av_cold int hevc_init_thread_copy(AVCodecContext *avctx)
3236 {
3237     HEVCContext *s = avctx->priv_data;
3238     int ret;
3239
3240     memset(s, 0, sizeof(*s));
3241
3242     ret = hevc_init_context(avctx);
3243     if (ret < 0)
3244         return ret;
3245
3246     return 0;
3247 }
3248
3249 static void hevc_decode_flush(AVCodecContext *avctx)
3250 {
3251     HEVCContext *s = avctx->priv_data;
3252     ff_hevc_flush_dpb(s);
3253     s->max_ra = INT_MAX;
3254 }
3255
3256 #define OFFSET(x) offsetof(HEVCContext, x)
3257 #define PAR (AV_OPT_FLAG_DECODING_PARAM | AV_OPT_FLAG_VIDEO_PARAM)
3258
3259 static const AVProfile profiles[] = {
3260     { FF_PROFILE_HEVC_MAIN,                 "Main"                },
3261     { FF_PROFILE_HEVC_MAIN_10,              "Main 10"             },
3262     { FF_PROFILE_HEVC_MAIN_STILL_PICTURE,   "Main Still Picture"  },
3263     { FF_PROFILE_UNKNOWN },
3264 };
3265
3266 static const AVOption options[] = {
3267     { "apply_defdispwin", "Apply default display window from VUI", OFFSET(apply_defdispwin),
3268         AV_OPT_TYPE_INT, {.i64 = 0}, 0, 1, PAR },
3269     { NULL },
3270 };
3271
3272 static const AVClass hevc_decoder_class = {
3273     .class_name = "HEVC decoder",
3274     .item_name  = av_default_item_name,
3275     .option     = options,
3276     .version    = LIBAVUTIL_VERSION_INT,
3277 };
3278
3279 AVCodec ff_hevc_decoder = {
3280     .name                  = "hevc",
3281     .long_name             = NULL_IF_CONFIG_SMALL("HEVC (High Efficiency Video Coding)"),
3282     .type                  = AVMEDIA_TYPE_VIDEO,
3283     .id                    = AV_CODEC_ID_HEVC,
3284     .priv_data_size        = sizeof(HEVCContext),
3285     .priv_class            = &hevc_decoder_class,
3286     .init                  = hevc_decode_init,
3287     .close                 = hevc_decode_free,
3288     .decode                = hevc_decode_frame,
3289     .flush                 = hevc_decode_flush,
3290     .update_thread_context = hevc_update_thread_context,
3291     .init_thread_copy      = hevc_init_thread_copy,
3292     .capabilities          = CODEC_CAP_DR1 | CODEC_CAP_DELAY |
3293                              CODEC_CAP_FRAME_THREADS,
3294     .profiles              = NULL_IF_CONFIG_SMALL(profiles),
3295 };